mirror of
https://github.com/notepad-plus-plus/notepad-plus-plus.git
synced 2025-07-28 16:24:27 +02:00
[BUG_FIXED] (Author: François-R Boyer) Fix Unicode file corrupted on saving bug (the buffer alignment issue).
git-svn-id: svn://svn.tuxfamily.org/svnroot/notepadplus/repository/trunk@648 f5eea248-9336-0410-98b8-ebc06183d4e3
This commit is contained in:
parent
49e4230a4a
commit
29143b3d6c
@ -692,9 +692,25 @@ bool FileManager::loadFileData(Document doc, const TCHAR * filename, Utf8_16_Rea
|
|||||||
size_t lenFile = 0;
|
size_t lenFile = 0;
|
||||||
size_t lenConvert = 0; //just in case conversion results in 0, but file not empty
|
size_t lenConvert = 0; //just in case conversion results in 0, but file not empty
|
||||||
bool isFirstTime = true;
|
bool isFirstTime = true;
|
||||||
|
int incompleteMultibyteChar = 0; //we do not want to call SCI_APPENDTEXT with an incomplete character if the buffer ends in the middle of one
|
||||||
|
char incompleteMultibyteChar_first = 0;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
lenFile = fread(data, 1, blockSize, fp);
|
lenFile = fread(data+incompleteMultibyteChar, 1, blockSize-incompleteMultibyteChar, fp) + incompleteMultibyteChar;
|
||||||
|
|
||||||
|
// we might not know yet the encoding; we ensure that valid UTF-8 characters will not be cut in the middle, without causing problems if it's not UTF-8
|
||||||
|
// TODO: all expressions for testing UTF chars should be put in inline functions, not directly in the code
|
||||||
|
if(lenFile == blockSize && (data[blockSize-1]&0x80) != 0) // possible multi-byte character that could be cut due to blockSize
|
||||||
|
{
|
||||||
|
incompleteMultibyteChar = 1;
|
||||||
|
while(incompleteMultibyteChar < 6 // longest "defined" UTF-8 code (including restricted codes not yet defined by Unicode)
|
||||||
|
&& (data[blockSize-incompleteMultibyteChar]&0xC0) == 0x80) // is possibly a continuation byte in a multi-byte character
|
||||||
|
++incompleteMultibyteChar;
|
||||||
|
// leave for the next buffer all bytes that could potentially be multi-byte UTF-8 at the end of current buffer
|
||||||
|
lenFile -= incompleteMultibyteChar;
|
||||||
|
incompleteMultibyteChar_first = data[lenFile]; // this byte can be erased by following code to put a null terminator
|
||||||
|
}
|
||||||
|
else incompleteMultibyteChar = 0;
|
||||||
|
|
||||||
// check if file contain any BOM
|
// check if file contain any BOM
|
||||||
if (isFirstTime)
|
if (isFirstTime)
|
||||||
@ -723,6 +739,13 @@ bool FileManager::loadFileData(Document doc, const TCHAR * filename, Utf8_16_Rea
|
|||||||
_pscratchTilla->execute(SCI_APPENDTEXT, lenConvert, (LPARAM)(UnicodeConvertor->getNewBuf()));
|
_pscratchTilla->execute(SCI_APPENDTEXT, lenConvert, (LPARAM)(UnicodeConvertor->getNewBuf()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(incompleteMultibyteChar != 0)
|
||||||
|
{
|
||||||
|
// copy bytes to next buffer
|
||||||
|
memcpy(data, data+blockSize-incompleteMultibyteChar, incompleteMultibyteChar);
|
||||||
|
data[0] = incompleteMultibyteChar_first;
|
||||||
|
}
|
||||||
|
|
||||||
} while (lenFile > 0);
|
} while (lenFile > 0);
|
||||||
} __except(filter(GetExceptionCode(), GetExceptionInformation())) {
|
} __except(filter(GetExceptionCode(), GetExceptionInformation())) {
|
||||||
printStr(TEXT("File is too big to be opened by Notepad++"));
|
printStr(TEXT("File is too big to be opened by Notepad++"));
|
||||||
|
Loading…
x
Reference in New Issue
Block a user