Improve lines sorting memory consumption

Use reference instead of copy for the sorting result.
Also improve lines sorting performance slightly: Sorting a 200 MB text file takes 13.71 seconds instead of 14.63 seconds.

Fix #10435, close #13852
This commit is contained in:
Don Ho 2023-07-01 18:10:01 +02:00
parent 02dd1d36fc
commit 9e24ec55db
5 changed files with 29 additions and 31 deletions

View File

@ -746,11 +746,10 @@ generic_string stringReplace(generic_string subject, const generic_string& searc
}
std::vector<generic_string> stringSplit(const generic_string& input, const generic_string& delimiter)
void stringSplit(const generic_string& input, const generic_string& delimiter, std::vector<generic_string>& output)
{
size_t start = 0U;
size_t end = input.find(delimiter);
std::vector<generic_string> output;
const size_t delimiterLength = delimiter.length();
while (end != std::string::npos)
{
@ -759,7 +758,6 @@ std::vector<generic_string> stringSplit(const generic_string& input, const gener
end = input.find(delimiter, start);
}
output.push_back(input.substr(start, end));
return output;
}
@ -784,7 +782,8 @@ bool str2numberVector(generic_string str2convert, std::vector<size_t>& numVect)
}
}
std::vector<generic_string> v = stringSplit(str2convert, TEXT(" "));
std::vector<generic_string> v;
stringSplit(str2convert, TEXT(" "), v);
for (const auto& i : v)
{
// Don't treat empty string and the number greater than 9999
@ -796,19 +795,17 @@ bool str2numberVector(generic_string str2convert, std::vector<size_t>& numVect)
return true;
}
generic_string stringJoin(const std::vector<generic_string>& strings, const generic_string& separator)
void stringJoin(const std::vector<generic_string>& strings, const generic_string& separator, generic_string& joinedString)
{
generic_string joined;
size_t length = strings.size();
for (size_t i = 0; i < length; ++i)
{
joined += strings.at(i);
joinedString += strings.at(i);
if (i != length - 1)
{
joined += separator;
joinedString += separator;
}
}
return joined;
}

View File

@ -157,9 +157,9 @@ COLORREF getCtrlBgColor(HWND hWnd);
generic_string stringToUpper(generic_string strToConvert);
generic_string stringToLower(generic_string strToConvert);
generic_string stringReplace(generic_string subject, const generic_string& search, const generic_string& replace);
std::vector<generic_string> stringSplit(const generic_string& input, const generic_string& delimiter);
void stringSplit(const generic_string& input, const generic_string& delimiter, std::vector<generic_string>& output);
bool str2numberVector(generic_string str2convert, std::vector<size_t>& numVect);
generic_string stringJoin(const std::vector<generic_string>& strings, const generic_string& separator);
void stringJoin(const std::vector<generic_string>& strings, const generic_string& separator, generic_string& joinedString);
generic_string stringTakeWhileAdmissable(const generic_string& input, const generic_string& admissable);
double stodLocale(const generic_string& str, _locale_t loc, size_t* idx = NULL);

View File

@ -67,7 +67,7 @@ public:
assert(_fromColumn <= _toColumn);
};
virtual ~ISorter() { };
virtual std::vector<generic_string> sort(std::vector<generic_string> lines) = 0;
virtual void sort(std::vector<generic_string>& lines) = 0;
};
// Implementation of lexicographic sorting of lines.
@ -76,7 +76,7 @@ class LexicographicSorter : public ISorter
public:
LexicographicSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { };
std::vector<generic_string> sort(std::vector<generic_string> lines) override {
void sort(std::vector<generic_string>& lines) override {
// Note that both branches here are equivalent in the sense that they always give the same answer.
// However, if we are *not* sorting specific columns, then we get a 40% speed improvement by not calling
// getSortKey() so many times.
@ -109,7 +109,6 @@ public:
}
});
}
return lines;
};
};
@ -119,7 +118,7 @@ class LexicographicCaseInsensitiveSorter : public ISorter
public:
LexicographicCaseInsensitiveSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { };
std::vector<generic_string> sort(std::vector<generic_string> lines) override {
void sort(std::vector<generic_string>& lines) override {
// Note that both branches here are equivalent in the sense that they always give the same answer.
// However, if we are *not* sorting specific columns, then we get a 40% speed improvement by not calling
// getSortKey() so many times.
@ -151,7 +150,6 @@ public:
}
});
}
return lines;
};
};
@ -160,7 +158,7 @@ class IntegerSorter : public ISorter
public:
IntegerSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { };
std::vector<generic_string> sort(std::vector<generic_string> lines) override {
void sort(std::vector<generic_string>& lines) override {
if (isSortingSpecificColumns())
{
std::stable_sort(lines.begin(), lines.end(), [this](generic_string aIn, generic_string bIn)
@ -496,8 +494,6 @@ public:
}
});
}
return lines;
};
};
@ -523,7 +519,7 @@ public:
#endif
}
std::vector<generic_string> sort(std::vector<generic_string> lines) override {
void sort(std::vector<generic_string>& lines) override {
// Note that empty lines are filtered out and added back manually to the output at the end.
std::vector<std::pair<size_t, T_Num>> nonEmptyInputAsNumbers;
std::vector<generic_string> empties;
@ -581,7 +577,7 @@ public:
}
assert(output.size() == lines.size());
return output;
lines = output;
};
protected:
@ -640,9 +636,8 @@ class ReverseSorter : public ISorter
public:
ReverseSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { };
std::vector<generic_string> sort(std::vector<generic_string> lines) override {
void sort(std::vector<generic_string>& lines) override {
std::reverse(lines.begin(), lines.end());
return lines;
};
};
@ -655,9 +650,8 @@ public:
seed = static_cast<unsigned>(time(NULL));
};
std::vector<generic_string> sort(std::vector<generic_string> lines) override {
void sort(std::vector<generic_string>& lines) override {
std::shuffle(lines.begin(), lines.end(), std::default_random_engine(seed));
return lines;
};
};

View File

@ -4821,7 +4821,9 @@ void Finder::copy()
}
}
}
const generic_string toClipboard = stringJoin(lines, TEXT("\r\n")) + TEXT("\r\n");
generic_string toClipboard;
stringJoin(lines, TEXT("\r\n"), toClipboard);
toClipboard += TEXT("\r\n");
if (!toClipboard.empty())
{
if (!str2Clipboard(toClipboard, _hSelf))

View File

@ -3943,7 +3943,8 @@ void ScintillaEditView::sortLines(size_t fromLine, size_t toLine, ISorter* pSort
const auto startPos = execute(SCI_POSITIONFROMLINE, fromLine);
const auto endPos = execute(SCI_POSITIONFROMLINE, toLine) + execute(SCI_LINELENGTH, toLine);
const generic_string text = getGenericTextAsString(startPos, endPos);
std::vector<generic_string> splitText = stringSplit(text, getEOLString());
std::vector<generic_string> splitText;
stringSplit(text, getEOLString(), splitText);
const size_t lineCount = execute(SCI_GETLINECOUNT);
const bool sortEntireDocument = toLine == lineCount - 1;
if (!sortEntireDocument)
@ -3954,8 +3955,10 @@ void ScintillaEditView::sortLines(size_t fromLine, size_t toLine, ISorter* pSort
}
}
assert(toLine - fromLine + 1 == splitText.size());
const std::vector<generic_string> sortedText = pSort->sort(splitText);
generic_string joined = stringJoin(sortedText, getEOLString());
pSort->sort(splitText);
generic_string joined;
stringJoin(splitText, getEOLString(), joined);
if (sortEntireDocument)
{
assert(joined.length() == text.length());
@ -4272,7 +4275,8 @@ void ScintillaEditView::removeAnyDuplicateLines()
const auto startPos = execute(SCI_POSITIONFROMLINE, fromLine);
const auto endPos = execute(SCI_POSITIONFROMLINE, toLine) + execute(SCI_LINELENGTH, toLine);
const generic_string text = getGenericTextAsString(startPos, endPos);
std::vector<generic_string> linesVect = stringSplit(text, getEOLString());
std::vector<generic_string> linesVect;
stringSplit(text, getEOLString(), linesVect);
const size_t lineCount = execute(SCI_GETLINECOUNT);
const bool doingEntireDocument = toLine == lineCount - 1;
@ -4288,7 +4292,8 @@ void ScintillaEditView::removeAnyDuplicateLines()
size_t newSize = vecRemoveDuplicates(linesVect);
if (origSize != newSize)
{
generic_string joined = stringJoin(linesVect, getEOLString());
generic_string joined;
stringJoin(linesVect, getEOLString(), joined);
if (!doingEntireDocument)
{
joined += getEOLString();