mirror of
https://github.com/notepad-plus-plus/notepad-plus-plus.git
synced 2025-07-28 08:14:18 +02:00
Fix "Sort Lines as Integer" not considering negative number
The current implementation is a variant of Natural Sort, which takes in account of negative numbers. Fix #11023, fix #2025, close #11481
This commit is contained in:
parent
a16f08468e
commit
721f994df8
@ -161,18 +161,13 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Treat consecutive numerals as one number
|
class IntegerSorter : public ISorter
|
||||||
// Otherwise it is a lexicographic sort
|
|
||||||
class NaturalSorter : public ISorter
|
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
NaturalSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { };
|
IntegerSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { };
|
||||||
|
|
||||||
std::vector<generic_string> sort(std::vector<generic_string> lines) override
|
std::vector<generic_string> sort(std::vector<generic_string> lines) override
|
||||||
{
|
{
|
||||||
// Note that both branches here are equivalent in the sense that they always give the same answer.
|
|
||||||
// However, if we are *not* sorting specific columns, then we get a 40% speed improvement by not calling
|
|
||||||
// getSortKey() so many times.
|
|
||||||
if (isSortingSpecificColumns())
|
if (isSortingSpecificColumns())
|
||||||
{
|
{
|
||||||
std::sort(lines.begin(), lines.end(), [this](generic_string aIn, generic_string bIn)
|
std::sort(lines.begin(), lines.end(), [this](generic_string aIn, generic_string bIn)
|
||||||
@ -181,43 +176,152 @@ public:
|
|||||||
generic_string b = getSortKey(bIn);
|
generic_string b = getSortKey(bIn);
|
||||||
|
|
||||||
long long compareResult = 0;
|
long long compareResult = 0;
|
||||||
size_t i = 0;
|
size_t aNumIndex = 0;
|
||||||
|
size_t bNumIndex = 0;
|
||||||
while (compareResult == 0)
|
while (compareResult == 0)
|
||||||
{
|
{
|
||||||
if (i >= a.length() || i >= b.length())
|
if (aNumIndex >= a.length() || bNumIndex >= b.length())
|
||||||
{
|
{
|
||||||
compareResult = a.compare(min(i, a.length()), generic_string::npos, b, min(i, b.length()), generic_string::npos);
|
compareResult = a.compare(min(aNumIndex, a.length()), generic_string::npos, b, min(bNumIndex, b.length()), generic_string::npos);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool aChunkIsNum = a[i] >= L'0' && a[i] <= L'9';
|
bool aChunkIsNum = a[aNumIndex] >= L'0' && a[aNumIndex] <= L'9';
|
||||||
bool bChunkIsNum = b[i] >= L'0' && b[i] <= L'9';
|
bool bChunkIsNum = b[bNumIndex] >= L'0' && b[bNumIndex] <= L'9';
|
||||||
|
|
||||||
|
int aNumSign = 1;
|
||||||
|
// Could be start of negative number
|
||||||
|
if (!aChunkIsNum && (aNumIndex + 1) < a.length())
|
||||||
|
{
|
||||||
|
aChunkIsNum = (a[aNumIndex] == L'-' && (a[aNumIndex + 1] >= L'0' && a[aNumIndex + 1] <= L'9'));
|
||||||
|
aNumSign = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bNumSign = 1;
|
||||||
|
if (!bChunkIsNum && (bNumIndex + 1) < b.length())
|
||||||
|
{
|
||||||
|
bChunkIsNum = (b[bNumIndex] == L'-' && (b[bNumIndex + 1] >= L'0' && b[bNumIndex + 1] <= L'9'));
|
||||||
|
bNumSign = -1;
|
||||||
|
}
|
||||||
|
|
||||||
// One is number and one is string
|
// One is number and one is string
|
||||||
if (aChunkIsNum != bChunkIsNum)
|
if (aChunkIsNum != bChunkIsNum)
|
||||||
{
|
{
|
||||||
compareResult = a[i] - b[i];
|
compareResult = a[aNumIndex] - b[bNumIndex];
|
||||||
// No need to update i; compareResult != 0
|
|
||||||
|
// compareResult isn't necessarily 0
|
||||||
|
// consider this case: "0-0", "0-"
|
||||||
|
// "-0" is considered a number, but "-" isn't
|
||||||
|
// but we are comparing two "-', which is the same
|
||||||
|
aNumIndex++;
|
||||||
|
bNumIndex++;
|
||||||
}
|
}
|
||||||
// Both are numbers
|
// Both are numbers
|
||||||
else if (aChunkIsNum)
|
else if (aChunkIsNum)
|
||||||
{
|
{
|
||||||
size_t delta = 0;
|
// if the sign is differemt, just return the compareResult
|
||||||
|
if (aNumSign != bNumSign)
|
||||||
|
{
|
||||||
|
if (aNumSign == 1)
|
||||||
|
{
|
||||||
|
compareResult = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
compareResult = -1;
|
||||||
|
}
|
||||||
|
// No need to update anything; compareResult != 0; will break out while loop
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (aNumSign == -1)
|
||||||
|
{
|
||||||
|
aNumIndex++;
|
||||||
|
bNumIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
// stoll crashes if number exceeds the limit for unsigned long long
|
size_t aNumEnd = a.find_first_not_of(L"1234567890", aNumIndex);
|
||||||
// Maximum value for a variable of type unsigned long long | 18446744073709551615
|
if (aNumEnd == generic_string::npos)
|
||||||
// So take the max length 18 to convert the number
|
{
|
||||||
const size_t maxLen = 18;
|
aNumEnd = a.length();
|
||||||
compareResult = std::stoll(a.substr(i, maxLen)) - std::stoll(b.substr(i, maxLen), &delta);
|
}
|
||||||
i += delta;
|
|
||||||
|
size_t bNumEnd = b.find_first_not_of(L"1234567890", bNumIndex);
|
||||||
|
if (bNumEnd == generic_string::npos)
|
||||||
|
{
|
||||||
|
bNumEnd = b.length();
|
||||||
|
}
|
||||||
|
|
||||||
|
int aZeroNum = 0;
|
||||||
|
while (aNumIndex < a.length() && a[aNumIndex] == '0')
|
||||||
|
{
|
||||||
|
aZeroNum++;
|
||||||
|
aNumIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bZeroNum = 0;
|
||||||
|
while (bNumIndex < b.length() && b[bNumIndex] == '0')
|
||||||
|
{
|
||||||
|
bZeroNum++;
|
||||||
|
bNumIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t aNumLength = aNumEnd - aNumIndex;
|
||||||
|
size_t bNumLength = bNumEnd - bNumIndex;
|
||||||
|
|
||||||
|
// aNum is longer than bNum, must be larger (smaller if negative)
|
||||||
|
if (aNumLength > bNumLength)
|
||||||
|
{
|
||||||
|
compareResult = 1 * aNumSign;
|
||||||
|
// No need to update anything; compareResult != 0; will break out while loop
|
||||||
|
}
|
||||||
|
// bNum is longer than aNum, must be larger (smaller if negative)
|
||||||
|
else if (aNumLength < bNumLength)
|
||||||
|
{
|
||||||
|
compareResult = -1 * aNumSign;
|
||||||
|
// No need to update anything; compareResult != 0; will break out while loop
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// the lengths of the numbers are equal
|
||||||
|
// compare the two number. However, we can not use std::stoll
|
||||||
|
// because the number strings can be every large, well over the maximum long long value
|
||||||
|
// thus, we compare each digit one by one
|
||||||
|
while (compareResult == 0
|
||||||
|
&& aNumIndex < a.length()
|
||||||
|
&& (a[aNumIndex] >= L'0' && a[aNumIndex] <= L'9')
|
||||||
|
&& bNumIndex < b.length()
|
||||||
|
&& (b[bNumIndex] >= L'0' && b[bNumIndex] <= L'9'))
|
||||||
|
{
|
||||||
|
compareResult = (a[aNumIndex] - b[bNumIndex]) * aNumSign;
|
||||||
|
aNumIndex++;
|
||||||
|
bNumIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (compareResult == 0)
|
||||||
|
{
|
||||||
|
compareResult = bZeroNum - aZeroNum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// Both are strings
|
// Both are strings
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
size_t aChunkEnd = a.find_first_of(L"1234567890", i);
|
if (a[aNumIndex] == L'-')
|
||||||
size_t bChunkEnd = b.find_first_of(L"1234567890", i);
|
{
|
||||||
compareResult = a.compare(i, aChunkEnd - i, b, i, bChunkEnd - i);
|
aNumIndex++;
|
||||||
i = aChunkEnd;
|
}
|
||||||
|
if (b[bNumIndex] == L'-')
|
||||||
|
{
|
||||||
|
bNumIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t aChunkEnd = a.find_first_of(L"1234567890-", aNumIndex);
|
||||||
|
size_t bChunkEnd = b.find_first_of(L"1234567890-", bNumIndex);
|
||||||
|
compareResult = a.compare(aNumIndex, aChunkEnd - aNumIndex, b, bNumIndex, bChunkEnd - bNumIndex);
|
||||||
|
aNumIndex = aChunkEnd;
|
||||||
|
bNumIndex = bChunkEnd;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -233,46 +337,159 @@ public:
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
std::sort(lines.begin(), lines.end(), [this](generic_string a, generic_string b)
|
std::sort(lines.begin(), lines.end(), [this](generic_string aIn, generic_string bIn)
|
||||||
{
|
{
|
||||||
|
generic_string a = aIn;
|
||||||
|
generic_string b = bIn;
|
||||||
|
|
||||||
long long compareResult = 0;
|
long long compareResult = 0;
|
||||||
size_t i = 0;
|
size_t aNumIndex = 0;
|
||||||
|
size_t bNumIndex = 0;
|
||||||
while (compareResult == 0)
|
while (compareResult == 0)
|
||||||
{
|
{
|
||||||
if (i >= a.length() || i >= b.length())
|
if (aNumIndex >= a.length() || bNumIndex >= b.length())
|
||||||
{
|
{
|
||||||
compareResult = a.compare(min(i,a.length()), generic_string::npos, b, min(i,b.length()), generic_string::npos);
|
compareResult = a.compare(min(aNumIndex, a.length()), generic_string::npos, b, min(bNumIndex, b.length()), generic_string::npos);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool aChunkIsNum = a[i] >= L'0' && a[i] <= L'9';
|
bool aChunkIsNum = a[aNumIndex] >= L'0' && a[aNumIndex] <= L'9';
|
||||||
bool bChunkIsNum = b[i] >= L'0' && b[i] <= L'9';
|
bool bChunkIsNum = b[bNumIndex] >= L'0' && b[bNumIndex] <= L'9';
|
||||||
|
|
||||||
|
int aNumSign = 1;
|
||||||
|
// Could be start of negative number
|
||||||
|
if (!aChunkIsNum && (aNumIndex + 1) < a.length())
|
||||||
|
{
|
||||||
|
aChunkIsNum = (a[aNumIndex] == L'-' && (a[aNumIndex + 1] >= L'0' && a[aNumIndex + 1] <= L'9'));
|
||||||
|
aNumSign = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bNumSign = 1;
|
||||||
|
if (!bChunkIsNum && (bNumIndex + 1) < b.length())
|
||||||
|
{
|
||||||
|
bChunkIsNum = (b[bNumIndex] == L'-' && (b[bNumIndex + 1] >= L'0' && b[bNumIndex + 1] <= L'9'));
|
||||||
|
bNumSign = -1;
|
||||||
|
}
|
||||||
|
|
||||||
// One is number and one is string
|
// One is number and one is string
|
||||||
if (aChunkIsNum != bChunkIsNum)
|
if (aChunkIsNum != bChunkIsNum)
|
||||||
{
|
{
|
||||||
compareResult = a[i] - b[i];
|
compareResult = a[aNumIndex] - b[bNumIndex];
|
||||||
// No need to update i; compareResult != 0
|
|
||||||
|
// compareResult isn't necessarily 0
|
||||||
|
// consider this case: "0-0", "0-"
|
||||||
|
// "-0" is considered a number, but "-" isn't
|
||||||
|
// but we are comparing two "-', which is the same
|
||||||
|
// increment the indexes for this case and continue the loop
|
||||||
|
aNumIndex++;
|
||||||
|
bNumIndex++;
|
||||||
}
|
}
|
||||||
// Both are numbers
|
// Both are numbers
|
||||||
else if (aChunkIsNum)
|
else if (aChunkIsNum)
|
||||||
{
|
{
|
||||||
size_t delta = 0;
|
// if the sign is differemt, just return the compareResult
|
||||||
|
if (aNumSign != bNumSign)
|
||||||
|
{
|
||||||
|
if (aNumSign == 1)
|
||||||
|
{
|
||||||
|
compareResult = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
compareResult = -1;
|
||||||
|
}
|
||||||
|
// No need to update anything; compareResult != 0; will break out while loop
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (aNumSign == -1)
|
||||||
|
{
|
||||||
|
aNumIndex++;
|
||||||
|
bNumIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
// stoll crashes if number exceeds the limit for unsigned long long
|
size_t aNumEnd = a.find_first_not_of(L"1234567890", aNumIndex);
|
||||||
// Maximum value for a variable of type unsigned long long | 18446744073709551615
|
if (aNumEnd == generic_string::npos)
|
||||||
// So take the max length 18 to convert the number
|
{
|
||||||
const size_t maxLen = 18;
|
aNumEnd = a.length();
|
||||||
compareResult = std::stoll(a.substr(i, maxLen)) - std::stoll(b.substr(i, maxLen), &delta);
|
}
|
||||||
i += delta;
|
|
||||||
|
size_t bNumEnd = b.find_first_not_of(L"1234567890", bNumIndex);
|
||||||
|
if (bNumEnd == generic_string::npos)
|
||||||
|
{
|
||||||
|
bNumEnd = b.length();
|
||||||
|
}
|
||||||
|
|
||||||
|
int aZeroNum = 0;
|
||||||
|
while (aNumIndex < a.length() && a[aNumIndex] == '0')
|
||||||
|
{
|
||||||
|
aZeroNum++;
|
||||||
|
aNumIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bZeroNum = 0;
|
||||||
|
while (bNumIndex < b.length() && b[bNumIndex] == '0')
|
||||||
|
{
|
||||||
|
bZeroNum++;
|
||||||
|
bNumIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t aNumLength = aNumEnd - aNumIndex;
|
||||||
|
size_t bNumLength = bNumEnd - bNumIndex;
|
||||||
|
|
||||||
|
// aNum is longer than bNum, must be larger (smaller if negative)
|
||||||
|
if (aNumLength > bNumLength)
|
||||||
|
{
|
||||||
|
compareResult = 1 * aNumSign;
|
||||||
|
// No need to update anything; compareResult != 0; will break out while loop
|
||||||
|
}
|
||||||
|
// bNum is longer than aNum, must be larger (smaller if negative)
|
||||||
|
else if (aNumLength < bNumLength)
|
||||||
|
{
|
||||||
|
compareResult = -1 * aNumSign;
|
||||||
|
// No need to update anything; compareResult != 0; will break out while loop
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// the lengths of the numbers are equal
|
||||||
|
// compare the two number. However, we can not use std::stoll
|
||||||
|
// because the number strings can be every large, well over the maximum long long value
|
||||||
|
// thus, we compare each digit one by one
|
||||||
|
while (compareResult == 0
|
||||||
|
&& aNumIndex < a.length()
|
||||||
|
&& (a[aNumIndex] >= L'0' && a[aNumIndex] <= L'9')
|
||||||
|
&& bNumIndex < b.length()
|
||||||
|
&& (b[bNumIndex] >= L'0' && b[bNumIndex] <= L'9'))
|
||||||
|
{
|
||||||
|
compareResult = (a[aNumIndex] - b[bNumIndex]) * aNumSign;
|
||||||
|
aNumIndex++;
|
||||||
|
bNumIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (compareResult == 0)
|
||||||
|
{
|
||||||
|
compareResult = bZeroNum - aZeroNum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// Both are strings
|
// Both are strings
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
size_t aChunkEnd = a.find_first_of(L"1234567890", i);
|
if (a[aNumIndex] == L'-')
|
||||||
size_t bChunkEnd = b.find_first_of(L"1234567890", i);
|
{
|
||||||
compareResult = a.compare(i, aChunkEnd-i, b, i, bChunkEnd-i);
|
aNumIndex++;
|
||||||
i = aChunkEnd;
|
}
|
||||||
|
if (b[bNumIndex] == L'-')
|
||||||
|
{
|
||||||
|
bNumIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t aChunkEnd = a.find_first_of(L"1234567890-", aNumIndex);
|
||||||
|
size_t bChunkEnd = b.find_first_of(L"1234567890-", bNumIndex);
|
||||||
|
compareResult = a.compare(aNumIndex, aChunkEnd - aNumIndex, b, bNumIndex, bChunkEnd - bNumIndex);
|
||||||
|
aNumIndex = aChunkEnd;
|
||||||
|
bNumIndex = bChunkEnd;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -286,6 +503,8 @@ public:
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
return lines;
|
return lines;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -708,7 +708,7 @@ void Notepad_plus::command(int id)
|
|||||||
}
|
}
|
||||||
else if (id == IDM_EDIT_SORTLINES_INTEGER_DESCENDING || id == IDM_EDIT_SORTLINES_INTEGER_ASCENDING)
|
else if (id == IDM_EDIT_SORTLINES_INTEGER_DESCENDING || id == IDM_EDIT_SORTLINES_INTEGER_ASCENDING)
|
||||||
{
|
{
|
||||||
pSorter = std::unique_ptr<ISorter>(new NaturalSorter(isDescending, fromColumn, toColumn));
|
pSorter = std::unique_ptr<ISorter>(new IntegerSorter(isDescending, fromColumn, toColumn));
|
||||||
}
|
}
|
||||||
else if (id == IDM_EDIT_SORTLINES_DECIMALCOMMA_DESCENDING || id == IDM_EDIT_SORTLINES_DECIMALCOMMA_ASCENDING)
|
else if (id == IDM_EDIT_SORTLINES_DECIMALCOMMA_DESCENDING || id == IDM_EDIT_SORTLINES_DECIMALCOMMA_ASCENDING)
|
||||||
{
|
{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user