[BUG_FIXED] (Author: François-R Boyer) Fix Unicode file corrupted on saving bug (the buffer alignment issue).

git-svn-id: svn://svn.tuxfamily.org/svnroot/notepadplus/repository/trunk@648 f5eea248-9336-0410-98b8-ebc06183d4e3
This commit is contained in:
Don Ho 2010-08-14 09:21:59 +00:00
parent 49e4230a4a
commit 29143b3d6c
1 changed files with 25 additions and 2 deletions

View File

@ -692,10 +692,26 @@ bool FileManager::loadFileData(Document doc, const TCHAR * filename, Utf8_16_Rea
size_t lenFile = 0;
size_t lenConvert = 0; //just in case conversion results in 0, but file not empty
bool isFirstTime = true;
int incompleteMultibyteChar = 0; //we do not want to call SCI_APPENDTEXT with an incomplete character if the buffer ends in the middle of one
char incompleteMultibyteChar_first = 0;
do {
lenFile = fread(data, 1, blockSize, fp);
lenFile = fread(data+incompleteMultibyteChar, 1, blockSize-incompleteMultibyteChar, fp) + incompleteMultibyteChar;
// we might not know yet the encoding; we ensure that valid UTF-8 characters will not be cut in the middle, without causing problems if it's not UTF-8
// TODO: all expressions for testing UTF chars should be put in inline functions, not directly in the code
if(lenFile == blockSize && (data[blockSize-1]&0x80) != 0) // possible multi-byte character that could be cut due to blockSize
{
incompleteMultibyteChar = 1;
while(incompleteMultibyteChar < 6 // longest "defined" UTF-8 code (including restricted codes not yet defined by Unicode)
&& (data[blockSize-incompleteMultibyteChar]&0xC0) == 0x80) // is possibly a continuation byte in a multi-byte character
++incompleteMultibyteChar;
// leave for the next buffer all bytes that could potentially be multi-byte UTF-8 at the end of current buffer
lenFile -= incompleteMultibyteChar;
incompleteMultibyteChar_first = data[lenFile]; // this byte can be erased by following code to put a null terminator
}
else incompleteMultibyteChar = 0;
// check if file contain any BOM
if (isFirstTime)
{
@ -722,6 +738,13 @@ bool FileManager::loadFileData(Document doc, const TCHAR * filename, Utf8_16_Rea
lenConvert = UnicodeConvertor->convert(data, lenFile);
_pscratchTilla->execute(SCI_APPENDTEXT, lenConvert, (LPARAM)(UnicodeConvertor->getNewBuf()));
}
if(incompleteMultibyteChar != 0)
{
// copy bytes to next buffer
memcpy(data, data+blockSize-incompleteMultibyteChar, incompleteMultibyteChar);
data[0] = incompleteMultibyteChar_first;
}
} while (lenFile > 0);
} __except(filter(GetExceptionCode(), GetExceptionInformation())) {