Improve lines sorting memory consumption
Use reference instead of copy for the sorting result. Also improve lines sorting performance slightly: Sorting a 200 MB text file takes 13.71 seconds instead of 14.63 seconds. Fix #10435, close #13852
This commit is contained in:
parent
02dd1d36fc
commit
9e24ec55db
|
@ -746,11 +746,10 @@ generic_string stringReplace(generic_string subject, const generic_string& searc
|
|||
}
|
||||
|
||||
|
||||
std::vector<generic_string> stringSplit(const generic_string& input, const generic_string& delimiter)
|
||||
void stringSplit(const generic_string& input, const generic_string& delimiter, std::vector<generic_string>& output)
|
||||
{
|
||||
size_t start = 0U;
|
||||
size_t end = input.find(delimiter);
|
||||
std::vector<generic_string> output;
|
||||
const size_t delimiterLength = delimiter.length();
|
||||
while (end != std::string::npos)
|
||||
{
|
||||
|
@ -759,7 +758,6 @@ std::vector<generic_string> stringSplit(const generic_string& input, const gener
|
|||
end = input.find(delimiter, start);
|
||||
}
|
||||
output.push_back(input.substr(start, end));
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
|
@ -784,7 +782,8 @@ bool str2numberVector(generic_string str2convert, std::vector<size_t>& numVect)
|
|||
}
|
||||
}
|
||||
|
||||
std::vector<generic_string> v = stringSplit(str2convert, TEXT(" "));
|
||||
std::vector<generic_string> v;
|
||||
stringSplit(str2convert, TEXT(" "), v);
|
||||
for (const auto& i : v)
|
||||
{
|
||||
// Don't treat empty string and the number greater than 9999
|
||||
|
@ -796,19 +795,17 @@ bool str2numberVector(generic_string str2convert, std::vector<size_t>& numVect)
|
|||
return true;
|
||||
}
|
||||
|
||||
generic_string stringJoin(const std::vector<generic_string>& strings, const generic_string& separator)
|
||||
void stringJoin(const std::vector<generic_string>& strings, const generic_string& separator, generic_string& joinedString)
|
||||
{
|
||||
generic_string joined;
|
||||
size_t length = strings.size();
|
||||
for (size_t i = 0; i < length; ++i)
|
||||
{
|
||||
joined += strings.at(i);
|
||||
joinedString += strings.at(i);
|
||||
if (i != length - 1)
|
||||
{
|
||||
joined += separator;
|
||||
joinedString += separator;
|
||||
}
|
||||
}
|
||||
return joined;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -157,9 +157,9 @@ COLORREF getCtrlBgColor(HWND hWnd);
|
|||
generic_string stringToUpper(generic_string strToConvert);
|
||||
generic_string stringToLower(generic_string strToConvert);
|
||||
generic_string stringReplace(generic_string subject, const generic_string& search, const generic_string& replace);
|
||||
std::vector<generic_string> stringSplit(const generic_string& input, const generic_string& delimiter);
|
||||
void stringSplit(const generic_string& input, const generic_string& delimiter, std::vector<generic_string>& output);
|
||||
bool str2numberVector(generic_string str2convert, std::vector<size_t>& numVect);
|
||||
generic_string stringJoin(const std::vector<generic_string>& strings, const generic_string& separator);
|
||||
void stringJoin(const std::vector<generic_string>& strings, const generic_string& separator, generic_string& joinedString);
|
||||
generic_string stringTakeWhileAdmissable(const generic_string& input, const generic_string& admissable);
|
||||
double stodLocale(const generic_string& str, _locale_t loc, size_t* idx = NULL);
|
||||
|
||||
|
|
|
@ -67,7 +67,7 @@ public:
|
|||
assert(_fromColumn <= _toColumn);
|
||||
};
|
||||
virtual ~ISorter() { };
|
||||
virtual std::vector<generic_string> sort(std::vector<generic_string> lines) = 0;
|
||||
virtual void sort(std::vector<generic_string>& lines) = 0;
|
||||
};
|
||||
|
||||
// Implementation of lexicographic sorting of lines.
|
||||
|
@ -76,7 +76,7 @@ class LexicographicSorter : public ISorter
|
|||
public:
|
||||
LexicographicSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { };
|
||||
|
||||
std::vector<generic_string> sort(std::vector<generic_string> lines) override {
|
||||
void sort(std::vector<generic_string>& lines) override {
|
||||
// Note that both branches here are equivalent in the sense that they always give the same answer.
|
||||
// However, if we are *not* sorting specific columns, then we get a 40% speed improvement by not calling
|
||||
// getSortKey() so many times.
|
||||
|
@ -109,7 +109,6 @@ public:
|
|||
}
|
||||
});
|
||||
}
|
||||
return lines;
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -119,7 +118,7 @@ class LexicographicCaseInsensitiveSorter : public ISorter
|
|||
public:
|
||||
LexicographicCaseInsensitiveSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { };
|
||||
|
||||
std::vector<generic_string> sort(std::vector<generic_string> lines) override {
|
||||
void sort(std::vector<generic_string>& lines) override {
|
||||
// Note that both branches here are equivalent in the sense that they always give the same answer.
|
||||
// However, if we are *not* sorting specific columns, then we get a 40% speed improvement by not calling
|
||||
// getSortKey() so many times.
|
||||
|
@ -151,7 +150,6 @@ public:
|
|||
}
|
||||
});
|
||||
}
|
||||
return lines;
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -160,7 +158,7 @@ class IntegerSorter : public ISorter
|
|||
public:
|
||||
IntegerSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { };
|
||||
|
||||
std::vector<generic_string> sort(std::vector<generic_string> lines) override {
|
||||
void sort(std::vector<generic_string>& lines) override {
|
||||
if (isSortingSpecificColumns())
|
||||
{
|
||||
std::stable_sort(lines.begin(), lines.end(), [this](generic_string aIn, generic_string bIn)
|
||||
|
@ -496,8 +494,6 @@ public:
|
|||
}
|
||||
});
|
||||
}
|
||||
|
||||
return lines;
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -523,7 +519,7 @@ public:
|
|||
#endif
|
||||
}
|
||||
|
||||
std::vector<generic_string> sort(std::vector<generic_string> lines) override {
|
||||
void sort(std::vector<generic_string>& lines) override {
|
||||
// Note that empty lines are filtered out and added back manually to the output at the end.
|
||||
std::vector<std::pair<size_t, T_Num>> nonEmptyInputAsNumbers;
|
||||
std::vector<generic_string> empties;
|
||||
|
@ -581,7 +577,7 @@ public:
|
|||
}
|
||||
|
||||
assert(output.size() == lines.size());
|
||||
return output;
|
||||
lines = output;
|
||||
};
|
||||
|
||||
protected:
|
||||
|
@ -640,9 +636,8 @@ class ReverseSorter : public ISorter
|
|||
public:
|
||||
ReverseSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { };
|
||||
|
||||
std::vector<generic_string> sort(std::vector<generic_string> lines) override {
|
||||
void sort(std::vector<generic_string>& lines) override {
|
||||
std::reverse(lines.begin(), lines.end());
|
||||
return lines;
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -655,9 +650,8 @@ public:
|
|||
seed = static_cast<unsigned>(time(NULL));
|
||||
};
|
||||
|
||||
std::vector<generic_string> sort(std::vector<generic_string> lines) override {
|
||||
void sort(std::vector<generic_string>& lines) override {
|
||||
std::shuffle(lines.begin(), lines.end(), std::default_random_engine(seed));
|
||||
return lines;
|
||||
};
|
||||
};
|
||||
|
||||
|
|
|
@ -4821,7 +4821,9 @@ void Finder::copy()
|
|||
}
|
||||
}
|
||||
}
|
||||
const generic_string toClipboard = stringJoin(lines, TEXT("\r\n")) + TEXT("\r\n");
|
||||
generic_string toClipboard;
|
||||
stringJoin(lines, TEXT("\r\n"), toClipboard);
|
||||
toClipboard += TEXT("\r\n");
|
||||
if (!toClipboard.empty())
|
||||
{
|
||||
if (!str2Clipboard(toClipboard, _hSelf))
|
||||
|
|
|
@ -3943,7 +3943,8 @@ void ScintillaEditView::sortLines(size_t fromLine, size_t toLine, ISorter* pSort
|
|||
const auto startPos = execute(SCI_POSITIONFROMLINE, fromLine);
|
||||
const auto endPos = execute(SCI_POSITIONFROMLINE, toLine) + execute(SCI_LINELENGTH, toLine);
|
||||
const generic_string text = getGenericTextAsString(startPos, endPos);
|
||||
std::vector<generic_string> splitText = stringSplit(text, getEOLString());
|
||||
std::vector<generic_string> splitText;
|
||||
stringSplit(text, getEOLString(), splitText);
|
||||
const size_t lineCount = execute(SCI_GETLINECOUNT);
|
||||
const bool sortEntireDocument = toLine == lineCount - 1;
|
||||
if (!sortEntireDocument)
|
||||
|
@ -3954,8 +3955,10 @@ void ScintillaEditView::sortLines(size_t fromLine, size_t toLine, ISorter* pSort
|
|||
}
|
||||
}
|
||||
assert(toLine - fromLine + 1 == splitText.size());
|
||||
const std::vector<generic_string> sortedText = pSort->sort(splitText);
|
||||
generic_string joined = stringJoin(sortedText, getEOLString());
|
||||
pSort->sort(splitText);
|
||||
generic_string joined;
|
||||
stringJoin(splitText, getEOLString(), joined);
|
||||
|
||||
if (sortEntireDocument)
|
||||
{
|
||||
assert(joined.length() == text.length());
|
||||
|
@ -4272,7 +4275,8 @@ void ScintillaEditView::removeAnyDuplicateLines()
|
|||
const auto startPos = execute(SCI_POSITIONFROMLINE, fromLine);
|
||||
const auto endPos = execute(SCI_POSITIONFROMLINE, toLine) + execute(SCI_LINELENGTH, toLine);
|
||||
const generic_string text = getGenericTextAsString(startPos, endPos);
|
||||
std::vector<generic_string> linesVect = stringSplit(text, getEOLString());
|
||||
std::vector<generic_string> linesVect;
|
||||
stringSplit(text, getEOLString(), linesVect);
|
||||
const size_t lineCount = execute(SCI_GETLINECOUNT);
|
||||
|
||||
const bool doingEntireDocument = toLine == lineCount - 1;
|
||||
|
@ -4288,7 +4292,8 @@ void ScintillaEditView::removeAnyDuplicateLines()
|
|||
size_t newSize = vecRemoveDuplicates(linesVect);
|
||||
if (origSize != newSize)
|
||||
{
|
||||
generic_string joined = stringJoin(linesVect, getEOLString());
|
||||
generic_string joined;
|
||||
stringJoin(linesVect, getEOLString(), joined);
|
||||
if (!doingEntireDocument)
|
||||
{
|
||||
joined += getEOLString();
|
||||
|
|
Loading…
Reference in New Issue