Fix UTF-8 detected wrongly as TIS-620 issue

uchardet detects usually wrongly UTF-8 as TIS-620, hence TIS-620 detection is disabled in this commit.
More info:
https://github.com/notepad-plus-plus/notepad-plus-plus/issues/10916#issuecomment-1001671957

Fix #10916, fix #940, fix #8755, fix #3588, fix #3188, fix #4932, fix #3172, fix #10492, close #10958
This commit is contained in:
Don Ho 2021-12-27 17:02:03 +01:00
parent 33a0587a65
commit ae09024e66
1 changed files with 3 additions and 1 deletions

View File

@ -1229,11 +1229,13 @@ BufferID FileManager::bufferFromDocument(Document doc, bool dontIncrease, bool d
int FileManager::detectCodepage(char* buf, size_t len) int FileManager::detectCodepage(char* buf, size_t len)
{ {
int codepage = -1;
uchardet_t ud = uchardet_new(); uchardet_t ud = uchardet_new();
uchardet_handle_data(ud, buf, len); uchardet_handle_data(ud, buf, len);
uchardet_data_end(ud); uchardet_data_end(ud);
const char* cs = uchardet_get_charset(ud); const char* cs = uchardet_get_charset(ud);
int codepage = EncodingMapper::getInstance().getEncodingFromString(cs); if (stricmp(cs, "TIS-620") != 0) // TIS-620 detection is disabled here because uchardet detects usually wrongly UTF-8 as TIS-620
codepage = EncodingMapper::getInstance().getEncodingFromString(cs);
uchardet_delete(ud); uchardet_delete(ud);
return codepage; return codepage;
} }