Fix UTF-8 detected wrongly as TIS-620 issue

uchardet detects usually wrongly UTF-8 as TIS-620, hence TIS-620 detection is disabled in this commit. More info: https://github.com/notepad-plus-plus/notepad-plus-plus/issues/10916#issuecomment-1001671957 Fix #10916, fix #940, fix #8755, fix #3588, fix #3188, fix #4932, fix #3172, fix #10492, close #10958
2025-07-25 14:54:39 +02:00 · 2021-12-27 17:02:03 +01:00 · 2021-12-27 17:02:03 +01:00 · ae09024e66
commit ae09024e66
parent 33a0587a65
1 changed files with 3 additions and 1 deletions
--- a/PowerEditor/src/ScintillaComponent/Buffer.cpp
+++ b/PowerEditor/src/ScintillaComponent/Buffer.cpp
@ -1229,11 +1229,13 @@ BufferID FileManager::bufferFromDocument(Document doc, bool dontIncrease, bool d
 int FileManager::detectCodepage(char* buf, size_t len)
 {
 	int codepage = -1;
 	uchardet_t ud = uchardet_new();
 	uchardet_handle_data(ud, buf, len);
 	uchardet_data_end(ud);
 	const char* cs = uchardet_get_charset(ud);
-	int codepage = EncodingMapper::getInstance().getEncodingFromString(cs);
+	if (stricmp(cs, "TIS-620") != 0) // TIS-620 detection is disabled here because uchardet detects usually wrongly UTF-8 as TIS-620
 		codepage = EncodingMapper::getInstance().getEncodingFromString(cs);
 	uchardet_delete(ud);
 	return codepage;
 }