Fix UTF-8 detected wrongly as TIS-620 issue
uchardet detects usually wrongly UTF-8 as TIS-620, hence TIS-620 detection is disabled in this commit. More info: https://github.com/notepad-plus-plus/notepad-plus-plus/issues/10916#issuecomment-1001671957 Fix #10916, fix #940, fix #8755, fix #3588, fix #3188, fix #4932, fix #3172, fix #10492, close #10958
This commit is contained in:
parent
33a0587a65
commit
ae09024e66
|
@ -1229,11 +1229,13 @@ BufferID FileManager::bufferFromDocument(Document doc, bool dontIncrease, bool d
|
||||||
|
|
||||||
int FileManager::detectCodepage(char* buf, size_t len)
|
int FileManager::detectCodepage(char* buf, size_t len)
|
||||||
{
|
{
|
||||||
|
int codepage = -1;
|
||||||
uchardet_t ud = uchardet_new();
|
uchardet_t ud = uchardet_new();
|
||||||
uchardet_handle_data(ud, buf, len);
|
uchardet_handle_data(ud, buf, len);
|
||||||
uchardet_data_end(ud);
|
uchardet_data_end(ud);
|
||||||
const char* cs = uchardet_get_charset(ud);
|
const char* cs = uchardet_get_charset(ud);
|
||||||
int codepage = EncodingMapper::getInstance().getEncodingFromString(cs);
|
if (stricmp(cs, "TIS-620") != 0) // TIS-620 detection is disabled here because uchardet detects usually wrongly UTF-8 as TIS-620
|
||||||
|
codepage = EncodingMapper::getInstance().getEncodingFromString(cs);
|
||||||
uchardet_delete(ud);
|
uchardet_delete(ud);
|
||||||
return codepage;
|
return codepage;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue