[BUG_FIXED] (Author: François-R Boyer) Fix Unicode file corrupted on saving bug (the buffer alignment issue).
git-svn-id: svn://svn.tuxfamily.org/svnroot/notepadplus/repository/trunk@648 f5eea248-9336-0410-98b8-ebc06183d4e3
This commit is contained in:
parent
49e4230a4a
commit
29143b3d6c
|
@ -692,10 +692,26 @@ bool FileManager::loadFileData(Document doc, const TCHAR * filename, Utf8_16_Rea
|
|||
size_t lenFile = 0;
|
||||
size_t lenConvert = 0; //just in case conversion results in 0, but file not empty
|
||||
bool isFirstTime = true;
|
||||
int incompleteMultibyteChar = 0; //we do not want to call SCI_APPENDTEXT with an incomplete character if the buffer ends in the middle of one
|
||||
char incompleteMultibyteChar_first = 0;
|
||||
|
||||
do {
|
||||
lenFile = fread(data, 1, blockSize, fp);
|
||||
|
||||
lenFile = fread(data+incompleteMultibyteChar, 1, blockSize-incompleteMultibyteChar, fp) + incompleteMultibyteChar;
|
||||
|
||||
// we might not know yet the encoding; we ensure that valid UTF-8 characters will not be cut in the middle, without causing problems if it's not UTF-8
|
||||
// TODO: all expressions for testing UTF chars should be put in inline functions, not directly in the code
|
||||
if(lenFile == blockSize && (data[blockSize-1]&0x80) != 0) // possible multi-byte character that could be cut due to blockSize
|
||||
{
|
||||
incompleteMultibyteChar = 1;
|
||||
while(incompleteMultibyteChar < 6 // longest "defined" UTF-8 code (including restricted codes not yet defined by Unicode)
|
||||
&& (data[blockSize-incompleteMultibyteChar]&0xC0) == 0x80) // is possibly a continuation byte in a multi-byte character
|
||||
++incompleteMultibyteChar;
|
||||
// leave for the next buffer all bytes that could potentially be multi-byte UTF-8 at the end of current buffer
|
||||
lenFile -= incompleteMultibyteChar;
|
||||
incompleteMultibyteChar_first = data[lenFile]; // this byte can be erased by following code to put a null terminator
|
||||
}
|
||||
else incompleteMultibyteChar = 0;
|
||||
|
||||
// check if file contain any BOM
|
||||
if (isFirstTime)
|
||||
{
|
||||
|
@ -722,6 +738,13 @@ bool FileManager::loadFileData(Document doc, const TCHAR * filename, Utf8_16_Rea
|
|||
lenConvert = UnicodeConvertor->convert(data, lenFile);
|
||||
_pscratchTilla->execute(SCI_APPENDTEXT, lenConvert, (LPARAM)(UnicodeConvertor->getNewBuf()));
|
||||
}
|
||||
|
||||
if(incompleteMultibyteChar != 0)
|
||||
{
|
||||
// copy bytes to next buffer
|
||||
memcpy(data, data+blockSize-incompleteMultibyteChar, incompleteMultibyteChar);
|
||||
data[0] = incompleteMultibyteChar_first;
|
||||
}
|
||||
|
||||
} while (lenFile > 0);
|
||||
} __except(filter(GetExceptionCode(), GetExceptionInformation())) {
|
||||
|
|
Loading…
Reference in New Issue