Fix search result cut off result content due to nul characters ('\0')

Fix #6281, fix #15216, fix #16466, close #16469
This commit is contained in:
Don Ho 2025-04-25 23:53:42 +02:00
parent 8a4aa48ed7
commit 51449b09de
6 changed files with 141 additions and 61 deletions

View File

@ -333,7 +333,7 @@ wstring purgeMenuItemString(const wchar_t * menuItemStr, bool keepAmpersand)
}
const wchar_t * WcharMbcsConvertor::char2wchar(const char * mbcs2Convert, size_t codepage, int lenMbcs, int* pLenWc, int* pBytesNotProcessed)
const wchar_t* WcharMbcsConvertor::char2wchar(const char* mbcs2Convert, size_t codepage, int lenMbcs, int* pLenWc, int* pBytesNotProcessed)
{
// Do not process NULL pointer
if (!mbcs2Convert)
@ -358,12 +358,12 @@ const wchar_t * WcharMbcsConvertor::char2wchar(const char * mbcs2Convert, size_t
// Otherwise, test if we are cutting a multi-byte character at end of buffer
else if (lenMbcs != -1 && cp == CP_UTF8) // For UTF-8, we know how to test it
{
int indexOfLastChar = Utf8::characterStart(mbcs2Convert, lenMbcs-1); // get index of last character
if (indexOfLastChar != 0 && !Utf8::isValid(mbcs2Convert+indexOfLastChar, lenMbcs-indexOfLastChar)) // if it is not valid we do not process it right now (unless its the only character in string, to ensure that we always progress, e.g. that bytesNotProcessed < lenMbcs)
int indexOfLastChar = Utf8::characterStart(mbcs2Convert, lenMbcs - 1); // get index of last character
if (indexOfLastChar != 0 && !Utf8::isValid(mbcs2Convert + indexOfLastChar, lenMbcs - indexOfLastChar)) // if it is not valid we do not process it right now (unless its the only character in string, to ensure that we always progress, e.g. that bytesNotProcessed < lenMbcs)
{
bytesNotProcessed = lenMbcs-indexOfLastChar;
bytesNotProcessed = lenMbcs - indexOfLastChar;
}
lenWc = MultiByteToWideChar(cp, 0, mbcs2Convert, lenMbcs-bytesNotProcessed, NULL, 0);
lenWc = MultiByteToWideChar(cp, 0, mbcs2Convert, lenMbcs - bytesNotProcessed, NULL, 0);
}
else // For other encodings, ask system if there are any invalid characters; note that it will not correctly know if last character is cut when there are invalid characters inside the text
{
@ -371,7 +371,7 @@ const wchar_t * WcharMbcsConvertor::char2wchar(const char * mbcs2Convert, size_t
if (lenWc == 0 && GetLastError() == ERROR_NO_UNICODE_TRANSLATION)
{
// Test without last byte
if (lenMbcs > 1) lenWc = MultiByteToWideChar(cp, MB_ERR_INVALID_CHARS, mbcs2Convert, lenMbcs-1, NULL, 0);
if (lenMbcs > 1) lenWc = MultiByteToWideChar(cp, MB_ERR_INVALID_CHARS, mbcs2Convert, lenMbcs - 1, NULL, 0);
if (lenWc == 0) // don't have to check that the error is still ERROR_NO_UNICODE_TRANSLATION, since only the length parameter changed
{
// TODO: should warn user about incorrect loading due to invalid characters
@ -384,12 +384,20 @@ const wchar_t * WcharMbcsConvertor::char2wchar(const char * mbcs2Convert, size_t
bytesNotProcessed = 1;
}
}
else if (lenWc == 0)
{
lenWc = MultiByteToWideChar(cp, 0, mbcs2Convert, lenMbcs, NULL, 0);
}
}
if (lenWc > 0)
{
_wideCharStr.sizeTo(lenWc);
MultiByteToWideChar(cp, 0, mbcs2Convert, lenMbcs-bytesNotProcessed, _wideCharStr, lenWc);
MultiByteToWideChar(cp, 0, mbcs2Convert, lenMbcs - bytesNotProcessed, _wideCharStr, lenWc);
if (lenMbcs == -1)
_wideCharStr[lenWc - 1] = '\0';
else
_wideCharStr[lenWc] = '\0';
}
else
_wideCharStr.empty();
@ -405,22 +413,37 @@ const wchar_t * WcharMbcsConvertor::char2wchar(const char * mbcs2Convert, size_t
// "mstart" and "mend" are pointers to indexes in mbcs2Convert,
// which are converted to the corresponding indexes in the returned wchar_t string.
const wchar_t * WcharMbcsConvertor::char2wchar(const char * mbcs2Convert, size_t codepage, intptr_t* mstart, intptr_t* mend)
const wchar_t* WcharMbcsConvertor::char2wchar(const char* mbcs2Convert, size_t codepage, intptr_t* mstart, intptr_t* mend, int mbcsLen)
{
// Do not process NULL pointer
if (!mbcs2Convert) return NULL;
if (mbcsLen == 0 || (mbcsLen == -1 && mbcs2Convert[0] == 0))
{
_wideCharStr.empty();
*mstart = 0;
*mend = 0;
return _wideCharStr;
}
UINT cp = static_cast<UINT>(codepage);
int len = MultiByteToWideChar(cp, 0, mbcs2Convert, -1, NULL, 0);
int len = MultiByteToWideChar(cp, 0, mbcs2Convert, mbcsLen ? mbcsLen : -1, NULL, 0);
if (len > 0)
{
_wideCharStr.sizeTo(len);
len = MultiByteToWideChar(cp, 0, mbcs2Convert, -1, _wideCharStr, len);
len = MultiByteToWideChar(cp, 0, mbcs2Convert, mbcsLen ? mbcsLen : -1, _wideCharStr, len);
if (mbcsLen == -1) // added
_wideCharStr[len - 1] = '\0';
else
_wideCharStr[len] = '\0';
if ((size_t)*mstart < strlen(mbcs2Convert) && (size_t)*mend <= strlen(mbcs2Convert))
intptr_t mbcsLen2 = mbcsLen ? mbcsLen : (intptr_t)strlen(mbcs2Convert);
if (*mstart < mbcsLen2 && *mend <= mbcsLen2)
{
*mstart = MultiByteToWideChar(cp, 0, mbcs2Convert, static_cast<int>(*mstart), _wideCharStr, 0);
*mend = MultiByteToWideChar(cp, 0, mbcs2Convert, static_cast<int>(*mend), _wideCharStr, 0);
if (*mstart >= len || *mend >= len)
*mend = MultiByteToWideChar(cp, 0, mbcs2Convert, static_cast<int>(*mend), _wideCharStr, 0);
if (*mstart >= len || *mend > len)
{
*mstart = 0;
*mend = 0;
@ -437,17 +460,27 @@ const wchar_t * WcharMbcsConvertor::char2wchar(const char * mbcs2Convert, size_t
}
const char* WcharMbcsConvertor::wchar2char(const wchar_t * wcharStr2Convert, size_t codepage, int lenWc, int* pLenMbcs)
const char* WcharMbcsConvertor::wchar2char(const wchar_t* wcharStr2Convert, size_t codepage, int lenWc, int* pLenMbcs)
{
if (!wcharStr2Convert)
return nullptr;
if (lenWc == 0 || (lenWc == -1 && wcharStr2Convert[0] == 0))
{
_multiByteStr.empty();
return _multiByteStr;
}
UINT cp = static_cast<UINT>(codepage);
int lenMbcs = WideCharToMultiByte(cp, 0, wcharStr2Convert, lenWc, NULL, 0, NULL, NULL);
if (lenMbcs > 0)
{
_multiByteStr.sizeTo(lenMbcs);
WideCharToMultiByte(cp, 0, wcharStr2Convert, lenWc, _multiByteStr, lenMbcs, NULL, NULL);
if (lenWc == -1)
_multiByteStr[lenMbcs - 1] = '\0';
else
_multiByteStr[lenMbcs] = '\0';
}
else
_multiByteStr.empty();
@ -458,23 +491,38 @@ const char* WcharMbcsConvertor::wchar2char(const wchar_t * wcharStr2Convert, siz
}
const char * WcharMbcsConvertor::wchar2char(const wchar_t * wcharStr2Convert, size_t codepage, intptr_t* mstart, intptr_t* mend)
const char* WcharMbcsConvertor::wchar2char(const wchar_t* wcharStr2Convert, size_t codepage, intptr_t* mstart, intptr_t* mend, int wcharLenIn, int* lenOut)
{
if (!wcharStr2Convert)
return nullptr;
if (wcharLenIn == 0 || (wcharLenIn == -1 && wcharStr2Convert[0] == 0))
{
_multiByteStr.empty();
*mstart = 0;
*mend = 0;
return _multiByteStr;
}
UINT cp = static_cast<UINT>(codepage);
int len = WideCharToMultiByte(cp, 0, wcharStr2Convert, -1, NULL, 0, NULL, NULL);
int len = WideCharToMultiByte(cp, 0, wcharStr2Convert, wcharLenIn ? wcharLenIn : -1, NULL, 0, NULL, NULL);
if (len > 0)
{
_multiByteStr.sizeTo(len);
len = WideCharToMultiByte(cp, 0, wcharStr2Convert, -1, _multiByteStr, len, NULL, NULL); // not needed?
len = WideCharToMultiByte(cp, 0, wcharStr2Convert, wcharLenIn ? wcharLenIn : -1, _multiByteStr, len, NULL, NULL);
if (wcharLenIn == -1)
_multiByteStr[len - 1] = '\0';
else
_multiByteStr[len] = '\0';
if (*mstart < lstrlenW(wcharStr2Convert) && *mend < lstrlenW(wcharStr2Convert))
{
intptr_t wcharLenIn2 = wcharLenIn ? wcharLenIn : (intptr_t)wcslen(wcharStr2Convert);
if (*mstart < wcharLenIn2 && *mend < wcharLenIn2)
{
*mstart = WideCharToMultiByte(cp, 0, wcharStr2Convert, (int)*mstart, NULL, 0, NULL, NULL);
*mend = WideCharToMultiByte(cp, 0, wcharStr2Convert, (int)*mend, NULL, 0, NULL, NULL);
if (*mstart >= len || *mend >= len)
if (*mstart >= len || *mend > len)
{
*mstart = 0;
*mend = 0;
@ -482,8 +530,14 @@ const char * WcharMbcsConvertor::wchar2char(const wchar_t * wcharStr2Convert, si
}
}
else
{
_multiByteStr.empty();
*mstart = 0;
*mend = 0;
}
if (lenOut)
*lenOut = len;
return _multiByteStr;
}

View File

@ -87,17 +87,18 @@ public:
return instance;
}
const wchar_t * char2wchar(const char *mbStr, size_t codepage, int lenMbcs =-1, int* pLenOut=NULL, int* pBytesNotProcessed=NULL);
const wchar_t * char2wchar(const char *mbcs2Convert, size_t codepage, intptr_t* mstart, intptr_t* mend);
const char * wchar2char(const wchar_t *wcStr, size_t codepage, int lenIn = -1, int* pLenOut = NULL);
const char * wchar2char(const wchar_t *wcStr, size_t codepage, intptr_t* mstart, intptr_t* mend);
const wchar_t* char2wchar(const char* mbStr, size_t codepage, int lenMbcs = -1, int* pLenOut = NULL, int* pBytesNotProcessed = NULL);
const wchar_t* char2wchar(const char* mbcs2Convert, size_t codepage, intptr_t* mstart, intptr_t* mend, int len = 0);
size_t getSizeW() { return _wideCharStr.size(); };
const char* wchar2char(const wchar_t* wcStr, size_t codepage, int lenIn = -1, int* pLenOut = NULL);
const char* wchar2char(const wchar_t* wcStr, size_t codepage, intptr_t* mstart, intptr_t* mend, int lenIn = 0, int* lenOut = nullptr);
size_t getSizeA() { return _multiByteStr.size(); };
const char * encode(UINT fromCodepage, UINT toCodepage, const char *txt2Encode, int lenIn = -1, int* pLenOut=NULL, int* pBytesNotProcessed=NULL)
{
const char* encode(UINT fromCodepage, UINT toCodepage, const char* txt2Encode, int lenIn = -1, int* pLenOut = NULL, int* pBytesNotProcessed = NULL) {
int lenWc = 0;
const wchar_t * strW = char2wchar(txt2Encode, fromCodepage, lenIn, &lenWc, pBytesNotProcessed);
return wchar2char(strW, toCodepage, lenWc, pLenOut);
}
const wchar_t* strW = char2wchar(txt2Encode, fromCodepage, lenIn, &lenWc, pBytesNotProcessed);
return wchar2char(strW, toCodepage, lenWc, pLenOut);
}
protected:
WcharMbcsConvertor() = default;
@ -112,38 +113,39 @@ protected:
WcharMbcsConvertor(WcharMbcsConvertor&&) = delete;
WcharMbcsConvertor& operator= (WcharMbcsConvertor&&) = delete;
template <class T>
class StringBuffer final
template <class T> class StringBuffer final
{
public:
~StringBuffer() { if (_allocLen) delete[] _str; }
void sizeTo(size_t size)
{
if (_allocLen < size)
void sizeTo(size_t size) {
if (_allocLen < size + 1)
{
if (_allocLen)
delete[] _str;
_allocLen = std::max<size_t>(size, initSize);
_str = new T[_allocLen];
_allocLen = std::max<size_t>(size + 1, initSize);
_str = new T[_allocLen]{};
}
_dataLen = size;
}
void empty()
{
void empty() {
static T nullStr = 0; // routines may return an empty string, with null terminator, without allocating memory; a pointer to this null character will be returned in that case
if (_allocLen == 0)
_str = &nullStr;
else
_str[0] = 0;
_dataLen = 0;
}
size_t size() const { return _dataLen; }
operator T* () { return _str; }
operator const T* () const { return _str; }
protected:
static const int initSize = 1024;
size_t _allocLen = 0;
size_t _dataLen = 0;
T* _str = nullptr;
};
@ -151,7 +153,6 @@ protected:
StringBuffer<wchar_t> _wideCharStr;
};
#define REBARBAND_SIZE sizeof(REBARBANDINFO)
std::wstring pathRemoveFileSpec(std::wstring & path);

View File

@ -3399,19 +3399,22 @@ int FindReplaceDlg::processRange(ProcessOperation op, FindReplaceInfo & findRepl
wchar_t lineBuf[SC_SEARCHRESULT_LINEBUFFERMAXLENGTH]{};
if (nbChar > SC_SEARCHRESULT_LINEBUFFERMAXLENGTH - 3)
{
lend = lstart + SC_SEARCHRESULT_LINEBUFFERMAXLENGTH - 4;
nbChar = SC_SEARCHRESULT_LINEBUFFERMAXLENGTH - 4;
}
intptr_t start_mark = targetStart - lstart;
intptr_t end_mark = targetEnd - lstart;
pEditView->getGenericText(lineBuf, SC_SEARCHRESULT_LINEBUFFERMAXLENGTH, lstart, lend, &start_mark, &end_mark);
pEditView->getGenericText(lineBuf, SC_SEARCHRESULT_LINEBUFFERMAXLENGTH, lstart, lend, &start_mark, &end_mark, &nbChar);
wstring line = lineBuf;
line += L"\r\n";
lineBuf[nbChar++] = '\r';
lineBuf[nbChar++] = '\n';
SearchResultMarkingLine srml;
srml._segmentPostions.push_back(std::pair<intptr_t, intptr_t>(start_mark, end_mark));
text2AddUtf8->append(_pFinder->foundLine(FoundInfo(targetStart, targetEnd, lineNumber + 1, pFileName), srml, line.c_str(), totalLineNumber));
text2AddUtf8->append(_pFinder->foundLine(FoundInfo(targetStart, targetEnd, lineNumber + 1, pFileName), srml, lineBuf, nbChar, totalLineNumber));
if (text2AddUtf8->length() > FINDTEMPSTRING_MAXSIZE)
{
@ -3440,15 +3443,19 @@ int FindReplaceDlg::processRange(ProcessOperation op, FindReplaceInfo & findRepl
wchar_t lineBuf[SC_SEARCHRESULT_LINEBUFFERMAXLENGTH]{};
if (nbChar > SC_SEARCHRESULT_LINEBUFFERMAXLENGTH - 3)
{
lend = lstart + SC_SEARCHRESULT_LINEBUFFERMAXLENGTH - 4;
nbChar = SC_SEARCHRESULT_LINEBUFFERMAXLENGTH - 4;
}
intptr_t start_mark = targetStart - lstart;
intptr_t end_mark = targetEnd - lstart;
pEditView->getGenericText(lineBuf, SC_SEARCHRESULT_LINEBUFFERMAXLENGTH, lstart, lend, &start_mark, &end_mark);
pEditView->getGenericText(lineBuf, SC_SEARCHRESULT_LINEBUFFERMAXLENGTH, lstart, lend, &start_mark, &end_mark, &nbChar);
lineBuf[nbChar++] = '\r';
lineBuf[nbChar++] = '\n';
wstring line = lineBuf;
line += L"\r\n";
SearchResultMarkingLine srml;
srml._segmentPostions.push_back(std::pair<intptr_t, intptr_t>(start_mark, end_mark));
@ -3460,7 +3467,7 @@ int FindReplaceDlg::processRange(ProcessOperation op, FindReplaceInfo & findRepl
pFindersInfo->_pDestFinder->addFileNameTitle(pFileName);
findAllFileNameAdded = true;
}
text2AddUtf8->append(pFindersInfo->_pDestFinder->foundLine(FoundInfo(targetStart, targetEnd, lineNumber + 1, pFileName), srml, line.c_str(), totalLineNumber));
text2AddUtf8->append(pFindersInfo->_pDestFinder->foundLine(FoundInfo(targetStart, targetEnd, lineNumber + 1, pFileName), srml, lineBuf, nbChar, totalLineNumber));
if (text2AddUtf8->length() > FINDTEMPSTRING_MAXSIZE)
{
@ -5372,7 +5379,7 @@ void Finder::addSearchResultInfo(int count, int countSearched, bool searchedEnti
setFinderReadOnly(true);
}
const char* Finder::foundLine(FoundInfo fi, SearchResultMarkingLine miLine, const wchar_t* foundline, size_t totalLineNumber)
string Finder::foundLine(FoundInfo fi, SearchResultMarkingLine miLine, const wchar_t* foundline, size_t foundLineLen, size_t totalLineNumber)
{
bool isRepeatedLine = false;
@ -5410,10 +5417,13 @@ const char* Finder::foundLine(FoundInfo fi, SearchResultMarkingLine miLine, cons
miLine._segmentPostions[0].first += headerStr.length();
miLine._segmentPostions[0].second += headerStr.length();
headerStr += foundline;
headerStr += wstring(foundline, foundLineLen);
WcharMbcsConvertor& wmc = WcharMbcsConvertor::getInstance();
const char* text2AddUtf8 = wmc.wchar2char(headerStr.c_str(), SC_CP_UTF8, &miLine._segmentPostions[0].first, &miLine._segmentPostions[0].second); // certainly utf8 here
size_t text2AddUtf8Len = strlen(text2AddUtf8);
int text2AddUtf8Len = 0;
const char* text2AddUtf8 = wmc.wchar2char(headerStr.c_str(), SC_CP_UTF8, &miLine._segmentPostions[0].first, &miLine._segmentPostions[0].second, static_cast<int>(headerStr.length()), &text2AddUtf8Len); // certainly utf8 here
// if current line is the repeated line of previous one, and settings make per found line show ONCE in the result even there are several found occurences in the same line, for:
if ((isRepeatedLine &&
@ -5441,12 +5451,12 @@ const char* Finder::foundLine(FoundInfo fi, SearchResultMarkingLine miLine, cons
cut--;
memcpy((void*)&text2AddUtf8[cut], endOfLongLine, lenEndOfLongLine + 1);
text2AddUtf8Len = cut + lenEndOfLongLine;
text2AddUtf8Len = static_cast<int>(cut + lenEndOfLongLine);
}
_pMainMarkings->push_back(miLine);
return text2AddUtf8;
return string(text2AddUtf8, text2AddUtf8Len);
}
}

View File

@ -128,7 +128,7 @@ public:
void addFileNameTitle(const wchar_t * fileName);
void addFileHitCount(int count);
void addSearchResultInfo(int count, int countSearched, bool searchedEntireNotSelection, const FindOption *pFindOpt);
const char* foundLine(FoundInfo fi, SearchResultMarkingLine mi, const wchar_t* foundline, size_t totalLineNumber);
std::string foundLine(FoundInfo fi, SearchResultMarkingLine mi, const wchar_t* foundline, size_t foundLineLen, size_t totalLineNumber);
void setFinderStyle();
void setFinderStyleForNpc(bool onlyColor = false);
void removeAll();

View File

@ -2662,15 +2662,30 @@ void ScintillaEditView::getGenericText(wchar_t *dest, size_t destlen, size_t sta
// "mstart" and "mend" are pointers to indexes in the read string,
// which are converted to the corresponding indexes in the returned wchar_t string.
void ScintillaEditView::getGenericText(wchar_t *dest, size_t destlen, size_t start, size_t end, intptr_t* mstart, intptr_t* mend) const
void ScintillaEditView::getGenericText(wchar_t* dest, size_t destlen, size_t start, size_t end, intptr_t* mstart, intptr_t* mend, intptr_t* outLen/* = nullptr*/) const
{
size_t nbChar = end - start;
if (nbChar == 0)
{
dest[0] = L'\0';
return;
}
WcharMbcsConvertor& wmc = WcharMbcsConvertor::getInstance();
char *destA = new char[end - start + 1];
char* destA = new char[nbChar + 1];
getText(destA, start, end);
size_t cp = execute(SCI_GETCODEPAGE) ;
const wchar_t *destW = wmc.char2wchar(destA, cp, mstart, mend);
wcsncpy_s(dest, destlen, destW, _TRUNCATE);
delete [] destA;
size_t cp = execute(SCI_GETCODEPAGE);
const wchar_t* destW = wmc.char2wchar(destA, cp, mstart, mend, static_cast<int>(nbChar));
size_t lenW = wmc.getSizeW();
if (lenW >= destlen)
lenW = destlen - 1;
memcpy_s(dest, destlen * sizeof(wchar_t), destW, lenW * sizeof(wchar_t));
dest[lenW] = L'\0';
if (outLen)
*outLen = lenW;
delete[] destA;
}
void ScintillaEditView::insertGenericTextFrom(size_t position, const wchar_t *text2insert) const

View File

@ -455,7 +455,7 @@ public:
void getText(char *dest, size_t start, size_t end) const;
void getGenericText(wchar_t *dest, size_t destlen, size_t start, size_t end) const;
void getGenericText(wchar_t *dest, size_t deslen, size_t start, size_t end, intptr_t* mstart, intptr_t* mend) const;
void getGenericText(wchar_t* dest, size_t destlen, size_t start, size_t end, intptr_t* mstart, intptr_t* mend, intptr_t* outLen = nullptr) const;
std::wstring getGenericTextAsString(size_t start, size_t end) const;
void insertGenericTextFrom(size_t position, const wchar_t *text2insert) const;
void replaceSelWith(const char * replaceText);