Fix "Sort Lines as Integer" not considering negative number

The current implementation is a variant of Natural Sort, which takes in account of negative numbers.

Fix #11023, fix #2025, close #11481
This commit is contained in:
wiseyestudio00 2022-04-06 02:07:47 -04:00 committed by Don Ho
parent a16f08468e
commit 721f994df8
2 changed files with 264 additions and 45 deletions

View File

@ -161,18 +161,13 @@ public:
}
};
// Treat consecutive numerals as one number
// Otherwise it is a lexicographic sort
class NaturalSorter : public ISorter
class IntegerSorter : public ISorter
{
public:
NaturalSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { };
IntegerSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { };
std::vector<generic_string> sort(std::vector<generic_string> lines) override
{
// Note that both branches here are equivalent in the sense that they always give the same answer.
// However, if we are *not* sorting specific columns, then we get a 40% speed improvement by not calling
// getSortKey() so many times.
if (isSortingSpecificColumns())
{
std::sort(lines.begin(), lines.end(), [this](generic_string aIn, generic_string bIn)
@ -181,43 +176,152 @@ public:
generic_string b = getSortKey(bIn);
long long compareResult = 0;
size_t i = 0;
size_t aNumIndex = 0;
size_t bNumIndex = 0;
while (compareResult == 0)
{
if (i >= a.length() || i >= b.length())
if (aNumIndex >= a.length() || bNumIndex >= b.length())
{
compareResult = a.compare(min(i, a.length()), generic_string::npos, b, min(i, b.length()), generic_string::npos);
compareResult = a.compare(min(aNumIndex, a.length()), generic_string::npos, b, min(bNumIndex, b.length()), generic_string::npos);
break;
}
bool aChunkIsNum = a[i] >= L'0' && a[i] <= L'9';
bool bChunkIsNum = b[i] >= L'0' && b[i] <= L'9';
bool aChunkIsNum = a[aNumIndex] >= L'0' && a[aNumIndex] <= L'9';
bool bChunkIsNum = b[bNumIndex] >= L'0' && b[bNumIndex] <= L'9';
int aNumSign = 1;
// Could be start of negative number
if (!aChunkIsNum && (aNumIndex + 1) < a.length())
{
aChunkIsNum = (a[aNumIndex] == L'-' && (a[aNumIndex + 1] >= L'0' && a[aNumIndex + 1] <= L'9'));
aNumSign = -1;
}
int bNumSign = 1;
if (!bChunkIsNum && (bNumIndex + 1) < b.length())
{
bChunkIsNum = (b[bNumIndex] == L'-' && (b[bNumIndex + 1] >= L'0' && b[bNumIndex + 1] <= L'9'));
bNumSign = -1;
}
// One is number and one is string
if (aChunkIsNum != bChunkIsNum)
{
compareResult = a[i] - b[i];
// No need to update i; compareResult != 0
compareResult = a[aNumIndex] - b[bNumIndex];
// compareResult isn't necessarily 0
// consider this case: "0-0", "0-"
// "-0" is considered a number, but "-" isn't
// but we are comparing two "-', which is the same
aNumIndex++;
bNumIndex++;
}
// Both are numbers
else if (aChunkIsNum)
{
size_t delta = 0;
// if the sign is differemt, just return the compareResult
if (aNumSign != bNumSign)
{
if (aNumSign == 1)
{
compareResult = 1;
}
else
{
compareResult = -1;
}
// No need to update anything; compareResult != 0; will break out while loop
}
else
{
if (aNumSign == -1)
{
aNumIndex++;
bNumIndex++;
}
// stoll crashes if number exceeds the limit for unsigned long long
// Maximum value for a variable of type unsigned long long | 18446744073709551615
// So take the max length 18 to convert the number
const size_t maxLen = 18;
compareResult = std::stoll(a.substr(i, maxLen)) - std::stoll(b.substr(i, maxLen), &delta);
i += delta;
size_t aNumEnd = a.find_first_not_of(L"1234567890", aNumIndex);
if (aNumEnd == generic_string::npos)
{
aNumEnd = a.length();
}
size_t bNumEnd = b.find_first_not_of(L"1234567890", bNumIndex);
if (bNumEnd == generic_string::npos)
{
bNumEnd = b.length();
}
int aZeroNum = 0;
while (aNumIndex < a.length() && a[aNumIndex] == '0')
{
aZeroNum++;
aNumIndex++;
}
int bZeroNum = 0;
while (bNumIndex < b.length() && b[bNumIndex] == '0')
{
bZeroNum++;
bNumIndex++;
}
size_t aNumLength = aNumEnd - aNumIndex;
size_t bNumLength = bNumEnd - bNumIndex;
// aNum is longer than bNum, must be larger (smaller if negative)
if (aNumLength > bNumLength)
{
compareResult = 1 * aNumSign;
// No need to update anything; compareResult != 0; will break out while loop
}
// bNum is longer than aNum, must be larger (smaller if negative)
else if (aNumLength < bNumLength)
{
compareResult = -1 * aNumSign;
// No need to update anything; compareResult != 0; will break out while loop
}
else
{
// the lengths of the numbers are equal
// compare the two number. However, we can not use std::stoll
// because the number strings can be every large, well over the maximum long long value
// thus, we compare each digit one by one
while (compareResult == 0
&& aNumIndex < a.length()
&& (a[aNumIndex] >= L'0' && a[aNumIndex] <= L'9')
&& bNumIndex < b.length()
&& (b[bNumIndex] >= L'0' && b[bNumIndex] <= L'9'))
{
compareResult = (a[aNumIndex] - b[bNumIndex]) * aNumSign;
aNumIndex++;
bNumIndex++;
}
if (compareResult == 0)
{
compareResult = bZeroNum - aZeroNum;
}
}
}
}
// Both are strings
else
{
size_t aChunkEnd = a.find_first_of(L"1234567890", i);
size_t bChunkEnd = b.find_first_of(L"1234567890", i);
compareResult = a.compare(i, aChunkEnd - i, b, i, bChunkEnd - i);
i = aChunkEnd;
if (a[aNumIndex] == L'-')
{
aNumIndex++;
}
if (b[bNumIndex] == L'-')
{
bNumIndex++;
}
size_t aChunkEnd = a.find_first_of(L"1234567890-", aNumIndex);
size_t bChunkEnd = b.find_first_of(L"1234567890-", bNumIndex);
compareResult = a.compare(aNumIndex, aChunkEnd - aNumIndex, b, bNumIndex, bChunkEnd - bNumIndex);
aNumIndex = aChunkEnd;
bNumIndex = bChunkEnd;
}
}
@ -233,46 +337,159 @@ public:
}
else
{
std::sort(lines.begin(), lines.end(), [this](generic_string a, generic_string b)
std::sort(lines.begin(), lines.end(), [this](generic_string aIn, generic_string bIn)
{
generic_string a = aIn;
generic_string b = bIn;
long long compareResult = 0;
size_t i = 0;
size_t aNumIndex = 0;
size_t bNumIndex = 0;
while (compareResult == 0)
{
if (i >= a.length() || i >= b.length())
if (aNumIndex >= a.length() || bNumIndex >= b.length())
{
compareResult = a.compare(min(i,a.length()), generic_string::npos, b, min(i,b.length()), generic_string::npos);
compareResult = a.compare(min(aNumIndex, a.length()), generic_string::npos, b, min(bNumIndex, b.length()), generic_string::npos);
break;
}
bool aChunkIsNum = a[i] >= L'0' && a[i] <= L'9';
bool bChunkIsNum = b[i] >= L'0' && b[i] <= L'9';
bool aChunkIsNum = a[aNumIndex] >= L'0' && a[aNumIndex] <= L'9';
bool bChunkIsNum = b[bNumIndex] >= L'0' && b[bNumIndex] <= L'9';
int aNumSign = 1;
// Could be start of negative number
if (!aChunkIsNum && (aNumIndex + 1) < a.length())
{
aChunkIsNum = (a[aNumIndex] == L'-' && (a[aNumIndex + 1] >= L'0' && a[aNumIndex + 1] <= L'9'));
aNumSign = -1;
}
int bNumSign = 1;
if (!bChunkIsNum && (bNumIndex + 1) < b.length())
{
bChunkIsNum = (b[bNumIndex] == L'-' && (b[bNumIndex + 1] >= L'0' && b[bNumIndex + 1] <= L'9'));
bNumSign = -1;
}
// One is number and one is string
if (aChunkIsNum != bChunkIsNum)
{
compareResult = a[i] - b[i];
// No need to update i; compareResult != 0
compareResult = a[aNumIndex] - b[bNumIndex];
// compareResult isn't necessarily 0
// consider this case: "0-0", "0-"
// "-0" is considered a number, but "-" isn't
// but we are comparing two "-', which is the same
// increment the indexes for this case and continue the loop
aNumIndex++;
bNumIndex++;
}
// Both are numbers
else if (aChunkIsNum)
{
size_t delta = 0;
// if the sign is differemt, just return the compareResult
if (aNumSign != bNumSign)
{
if (aNumSign == 1)
{
compareResult = 1;
}
else
{
compareResult = -1;
}
// No need to update anything; compareResult != 0; will break out while loop
}
else
{
if (aNumSign == -1)
{
aNumIndex++;
bNumIndex++;
}
// stoll crashes if number exceeds the limit for unsigned long long
// Maximum value for a variable of type unsigned long long | 18446744073709551615
// So take the max length 18 to convert the number
const size_t maxLen = 18;
compareResult = std::stoll(a.substr(i, maxLen)) - std::stoll(b.substr(i, maxLen), &delta);
i += delta;
size_t aNumEnd = a.find_first_not_of(L"1234567890", aNumIndex);
if (aNumEnd == generic_string::npos)
{
aNumEnd = a.length();
}
size_t bNumEnd = b.find_first_not_of(L"1234567890", bNumIndex);
if (bNumEnd == generic_string::npos)
{
bNumEnd = b.length();
}
int aZeroNum = 0;
while (aNumIndex < a.length() && a[aNumIndex] == '0')
{
aZeroNum++;
aNumIndex++;
}
int bZeroNum = 0;
while (bNumIndex < b.length() && b[bNumIndex] == '0')
{
bZeroNum++;
bNumIndex++;
}
size_t aNumLength = aNumEnd - aNumIndex;
size_t bNumLength = bNumEnd - bNumIndex;
// aNum is longer than bNum, must be larger (smaller if negative)
if (aNumLength > bNumLength)
{
compareResult = 1 * aNumSign;
// No need to update anything; compareResult != 0; will break out while loop
}
// bNum is longer than aNum, must be larger (smaller if negative)
else if (aNumLength < bNumLength)
{
compareResult = -1 * aNumSign;
// No need to update anything; compareResult != 0; will break out while loop
}
else
{
// the lengths of the numbers are equal
// compare the two number. However, we can not use std::stoll
// because the number strings can be every large, well over the maximum long long value
// thus, we compare each digit one by one
while (compareResult == 0
&& aNumIndex < a.length()
&& (a[aNumIndex] >= L'0' && a[aNumIndex] <= L'9')
&& bNumIndex < b.length()
&& (b[bNumIndex] >= L'0' && b[bNumIndex] <= L'9'))
{
compareResult = (a[aNumIndex] - b[bNumIndex]) * aNumSign;
aNumIndex++;
bNumIndex++;
}
if (compareResult == 0)
{
compareResult = bZeroNum - aZeroNum;
}
}
}
}
// Both are strings
else
{
size_t aChunkEnd = a.find_first_of(L"1234567890", i);
size_t bChunkEnd = b.find_first_of(L"1234567890", i);
compareResult = a.compare(i, aChunkEnd-i, b, i, bChunkEnd-i);
i = aChunkEnd;
if (a[aNumIndex] == L'-')
{
aNumIndex++;
}
if (b[bNumIndex] == L'-')
{
bNumIndex++;
}
size_t aChunkEnd = a.find_first_of(L"1234567890-", aNumIndex);
size_t bChunkEnd = b.find_first_of(L"1234567890-", bNumIndex);
compareResult = a.compare(aNumIndex, aChunkEnd - aNumIndex, b, bNumIndex, bChunkEnd - bNumIndex);
aNumIndex = aChunkEnd;
bNumIndex = bChunkEnd;
}
}
@ -286,6 +503,8 @@ public:
}
});
}
return lines;
}
};

View File

@ -708,7 +708,7 @@ void Notepad_plus::command(int id)
}
else if (id == IDM_EDIT_SORTLINES_INTEGER_DESCENDING || id == IDM_EDIT_SORTLINES_INTEGER_ASCENDING)
{
pSorter = std::unique_ptr<ISorter>(new NaturalSorter(isDescending, fromColumn, toColumn));
pSorter = std::unique_ptr<ISorter>(new IntegerSorter(isDescending, fromColumn, toColumn));
}
else if (id == IDM_EDIT_SORTLINES_DECIMALCOMMA_DESCENDING || id == IDM_EDIT_SORTLINES_DECIMALCOMMA_ASCENDING)
{