From 8149f72717fb523623f7f47bfaf1f662fb833622 Mon Sep 17 00:00:00 2001 From: Don HO Date: Sun, 5 Oct 2025 19:53:55 +0200 Subject: [PATCH] Disable ANSI under full Unicode system & fix switching encoding not working While **Use Unicode UTF-8 for worldwide language support** is checked: 1. ANSI commands is disabled. 2. Encoding "ANSI" & "Applied to opened ANSI file" options are disabled in "New Document" section of Preferences. 3. Open the "single byte" files with the old code page, if the encoding detection finds nothing. Fix #17057, fix #17033, close #17080 --- PowerEditor/src/Notepad_plus.cpp | 63 ++++++----- PowerEditor/src/NppCommands.cpp | 42 +++++-- PowerEditor/src/Parameters.cpp | 17 ++- PowerEditor/src/Parameters.h | 27 ++++- PowerEditor/src/ScintillaComponent/Buffer.cpp | 107 +++++++++++++----- PowerEditor/src/ScintillaComponent/Buffer.h | 15 ++- .../ScintillaComponent/ScintillaEditView.cpp | 6 +- .../ScintillaComponent/ScintillaEditView.h | 1 + PowerEditor/src/Utf8_16.cpp | 11 +- PowerEditor/src/Utf8_16.h | 4 +- .../src/WinControls/AboutDlg/AboutDlg.cpp | 2 +- .../src/WinControls/Preference/preference.rc | 2 +- .../WinControls/Preference/preferenceDlg.cpp | 27 ++++- .../WinControls/Preference/preferenceDlg.h | 6 +- .../WinControls/Preference/preference_rc.h | 2 +- 15 files changed, 230 insertions(+), 102 deletions(-) diff --git a/PowerEditor/src/Notepad_plus.cpp b/PowerEditor/src/Notepad_plus.cpp index 16c09ddae..70e102089 100644 --- a/PowerEditor/src/Notepad_plus.cpp +++ b/PowerEditor/src/Notepad_plus.cpp @@ -638,9 +638,17 @@ LRESULT Notepad_plus::init(HWND hwnd) ::DrawMenuBar(hwnd); - //Windows menu + // Windows menu _windowsMenu.init(_mainMenuHandle); + // if user set system codepage to UTF8, ANSI encoding capacity should be disable once for all + if (NppParameters::getInstance().isCurrentSystemCodepageUTF8()) + { + enableCommand(IDM_FORMAT_ANSI, false, MENU); + enableCommand(IDM_FORMAT_CONV2_ANSI, false, MENU); + } + + // Update Scintilla context menu strings (translated) vector & tmp = nppParam.getContextMenuItems(); size_t len = tmp.size(); @@ -2510,7 +2518,7 @@ int Notepad_plus::doDeleteOrNot(const wchar_t *fn) void Notepad_plus::enableMenu(int cmdID, bool doEnable) const { - int flag = doEnable?MF_ENABLED | MF_BYCOMMAND:MF_DISABLED | MF_GRAYED | MF_BYCOMMAND; + int flag = doEnable ? MF_ENABLED | MF_BYCOMMAND : MF_GRAYED | MF_DISABLED | MF_BYCOMMAND; ::EnableMenuItem(_mainMenuHandle, cmdID, flag); } @@ -2724,6 +2732,9 @@ void Notepad_plus::setupColorSampleBitmapsOnMainMenuItems() } } +// doCheck searches for the menu item matching the provided id across the main menu and all of its submenus, +// once the target id is found, it ensures that item is checked, and all other menu items at that same level are automatically unchecked. +// If id is -1, then all the menu items are unchecked. bool doCheck(HMENU mainHandle, int id) { MENUITEMINFO mii{}; @@ -2735,7 +2746,7 @@ bool doCheck(HMENU mainHandle, int id) for (int i = 0; i < count; i++) { ::GetMenuItemInfo(mainHandle, i, MF_BYPOSITION, &mii); - if (mii.fType == MFT_RADIOCHECK || mii.fType == MFT_STRING) + if (!(mii.fState & MFS_GRAYED) && (mii.fType == MFT_RADIOCHECK || mii.fType == MFT_STRING)) { bool checked = mii.hSubMenu ? doCheck(mii.hSubMenu, id) : (mii.wID == (unsigned int)id); if (checked) @@ -3072,7 +3083,7 @@ void Notepad_plus::setUniModeText() uniModeTextString = L"UTF-16 Big Endian"; break; case uni16LE_NoBOM: uniModeTextString = L"UTF-16 Little Endian"; break; - case uniCookie: + case uniUTF8_NoBOM: uniModeTextString = L"UTF-8"; break; default : uniModeTextString = L"ANSI"; @@ -4191,7 +4202,7 @@ size_t Notepad_plus::getSelectedCharNumber(UniMode u) { size_t result = 0; size_t numSel = _pEditView->execute(SCI_GETSELECTIONS); - if (u == uniUTF8 || u == uniCookie) + if (u == uniUTF8 || u == uniUTF8_NoBOM) { for (size_t i = 0; i < numSel; ++i) { @@ -4254,7 +4265,7 @@ static inline size_t countUtf8Characters(const unsigned char *buf, size_t pos, s size_t Notepad_plus::getCurrentDocCharCount(UniMode u) { - if (u != uniUTF8 && u != uniCookie) + if (u != uniUTF8 && u != uniUTF8_NoBOM) { size_t numLines = _pEditView->execute(SCI_GETLINECOUNT); auto result = _pEditView->execute(SCI_GETLENGTH); @@ -4299,7 +4310,7 @@ size_t Notepad_plus::getCurrentDocCharCount(UniMode u) bool Notepad_plus::isFormatUnicode(UniMode u) { - return (u != uni8Bit && u != uni7Bit && u != uniUTF8 && u != uniCookie); + return (u != uni8Bit && u != uni7Bit && u != uniUTF8 && u != uniUTF8_NoBOM); } int Notepad_plus::getBOMSize(UniMode u) @@ -5160,11 +5171,11 @@ void Notepad_plus::checkUnicodeMenuItems() const int id = -1; switch (um) { - case uniUTF8 : id = IDM_FORMAT_UTF_8; break; - case uni16BE : id = IDM_FORMAT_UTF_16BE; break; - case uni16LE : id = IDM_FORMAT_UTF_16LE; break; - case uniCookie : id = IDM_FORMAT_AS_UTF_8; break; - case uni8Bit : id = IDM_FORMAT_ANSI; break; + case uniUTF8 : id = IDM_FORMAT_UTF_8; break; + case uni16BE : id = IDM_FORMAT_UTF_16BE; break; + case uni16LE : id = IDM_FORMAT_UTF_16LE; break; + case uniUTF8_NoBOM : id = IDM_FORMAT_AS_UTF_8; break; + case uni8Bit : id = IDM_FORMAT_ANSI; break; case uni7Bit: case uni16BE_NoBOM: @@ -5174,24 +5185,21 @@ void Notepad_plus::checkUnicodeMenuItems() const break; } - if (encoding == -1) + HMENU _formatMenuHandle = ::GetSubMenu(_mainMenuHandle, MENUINDEX_FORMAT); + + if (encoding == -1) // encoding is not used, so use uniMode to check menu item { - // Uncheck all in the sub encoding menu - HMENU _formatMenuHandle = ::GetSubMenu(_mainMenuHandle, MENUINDEX_FORMAT); + // Uncheck all in the main & sub encoding menu doCheck(_formatMenuHandle, -1); - if (id == -1) //um == uni16BE_NoBOM || um == uni16LE_NoBOM + if (id != -1) { - // Uncheck all in the main encoding menu - ::CheckMenuRadioItem(_mainMenuHandle, IDM_FORMAT_ANSI, IDM_FORMAT_AS_UTF_8, IDM_FORMAT_ANSI, MF_BYCOMMAND); - ::CheckMenuItem(_mainMenuHandle, IDM_FORMAT_ANSI, MF_UNCHECKED | MF_BYCOMMAND); - } - else - { - ::CheckMenuRadioItem(_mainMenuHandle, IDM_FORMAT_ANSI, IDM_FORMAT_AS_UTF_8, id, MF_BYCOMMAND); + DWORD state = GetMenuState(_formatMenuHandle, IDM_FORMAT_ANSI, MF_BYCOMMAND); + ::CheckMenuRadioItem(_mainMenuHandle, (state & MFS_GRAYED) ? IDM_FORMAT_UTF_8 : IDM_FORMAT_ANSI, IDM_FORMAT_AS_UTF_8, id, MF_BYCOMMAND); } + // else if (id == -1) => um == uni16BE_NoBOM || um == uni16LE_NoBOM, let all items unchecked. } - else + else // encoding is used { const EncodingMapper& em = EncodingMapper::getInstance(); int cmdID = em.getIndexFromEncoding(encoding); @@ -5202,12 +5210,7 @@ void Notepad_plus::checkUnicodeMenuItems() const } cmdID += IDM_FORMAT_ENCODE; - // Uncheck all in the main encoding menu - ::CheckMenuRadioItem(_mainMenuHandle, IDM_FORMAT_ANSI, IDM_FORMAT_AS_UTF_8, IDM_FORMAT_ANSI, MF_BYCOMMAND); - ::CheckMenuItem(_mainMenuHandle, IDM_FORMAT_ANSI, MF_UNCHECKED | MF_BYCOMMAND); - - // Check the encoding item - HMENU _formatMenuHandle = ::GetSubMenu(_mainMenuHandle, MENUINDEX_FORMAT); + // Check the encoding item doCheck(_formatMenuHandle, cmdID); } } diff --git a/PowerEditor/src/NppCommands.cpp b/PowerEditor/src/NppCommands.cpp index 6e7cd243b..ef7e19b1a 100644 --- a/PowerEditor/src/NppCommands.cpp +++ b/PowerEditor/src/NppCommands.cpp @@ -2757,15 +2757,25 @@ void Notepad_plus::command(int id) case IDM_FORMAT_UTF_16LE : case IDM_FORMAT_AS_UTF_8 : { + bool isUTF8System = NppParameters::getInstance().isCurrentSystemCodepageUTF8(); + + if (isUTF8System && id == IDM_FORMAT_ANSI) + { + return; + } + Buffer * buf = _pEditView->getCurrentBuffer(); + UniMode originalUm = buf->getUnicodeMode(); + int originalEncoding = buf->getEncoding(); + UniMode um; bool shouldBeDirty = true; switch (id) { case IDM_FORMAT_AS_UTF_8: - shouldBeDirty = buf->getUnicodeMode() != uni8Bit; - um = uniCookie; + shouldBeDirty = originalUm != uni8Bit; + um = uniUTF8_NoBOM; break; case IDM_FORMAT_UTF_8: @@ -2781,11 +2791,11 @@ void Notepad_plus::command(int id) break; default : // IDM_FORMAT_ANSI - shouldBeDirty = buf->getUnicodeMode() != uniCookie; + shouldBeDirty = originalUm != uniUTF8_NoBOM; um = uni8Bit; } - if (buf->getEncoding() != -1) + if (originalEncoding != -1) { if (buf->isDirty()) { @@ -2801,7 +2811,9 @@ void Notepad_plus::command(int id) _pEditView->execute(SCI_EMPTYUNDOBUFFER); } else + { return; + } } if (_pEditView->execute(SCI_CANUNDO) == TRUE) @@ -2816,26 +2828,35 @@ void Notepad_plus::command(int id) // Do nothing } else + { return; + } } buf->setEncoding(-1); if (um == uni8Bit) - _pEditView->execute(SCI_SETCODEPAGE, CP_ACP); + { + NppParameters& nppParams = NppParameters::getInstance(); + _pEditView->execute(SCI_SETCODEPAGE, !nppParams.isCurrentSystemCodepageUTF8() ? CP_ACP : SC_CP_UTF8); + } else buf->setUnicodeMode(um); + + MainFileManager.disableAutoDetectEncoding4Loading(); fileReload(); + MainFileManager.enableAutoDetectEncoding4Loading(); } else { - if (buf->getUnicodeMode() != um) + if (originalUm != um) { buf->setUnicodeMode(um); if (shouldBeDirty) buf->setDirty(true); } } + break; } @@ -2929,8 +2950,11 @@ void Notepad_plus::command(int id) if (!buf->isDirty()) { buf->setEncoding(encoding); - buf->setUnicodeMode(uniCookie); + buf->setUnicodeMode(uniUTF8_NoBOM); + + MainFileManager.disableAutoDetectEncoding4Loading(); fileReload(); + MainFileManager.enableAutoDetectEncoding4Loading(); } break; } @@ -2971,13 +2995,13 @@ void Notepad_plus::command(int id) if (encoding != -1) { buf->setDirty(true); - buf->setUnicodeMode(uniCookie); + buf->setUnicodeMode(uniUTF8_NoBOM); buf->setEncoding(-1); return; } idEncoding = IDM_FORMAT_AS_UTF_8; - if (um == uniCookie) + if (um == uniUTF8_NoBOM) return; if (um != uni8Bit) diff --git a/PowerEditor/src/Parameters.cpp b/PowerEditor/src/Parameters.cpp index b09215ffb..291c26570 100644 --- a/PowerEditor/src/Parameters.cpp +++ b/PowerEditor/src/Parameters.cpp @@ -1003,9 +1003,12 @@ winVer NppParameters::getWindowsVersion() NppParameters::NppParameters() { - //Get windows version + // Get windows version _winVersion = getWindowsVersion(); + // Get current system code page + _currentSystemCodepage = GetACP(); + // Prepare for default path wchar_t nppPath[MAX_PATH]; ::GetModuleFileName(NULL, nppPath, MAX_PATH); @@ -5550,7 +5553,15 @@ void NppParameters::feedGUIParameters(TiXmlNode *node) } if (element->Attribute(L"encoding", &i)) - _nppGUI._newDocDefaultSettings._unicodeMode = (UniMode)i; + { + if (isCurrentSystemCodepageUTF8() // "Beta: Use Unicode UTF-8 for worldwide language support" option is checked in Windows + && static_cast(i) == uni8Bit) // Notepad++ default new document setting is ANSI + { + // Force Notepad++ default new document setting from ANSI to UTF-8 + i = static_cast(uniUTF8); + } + _nppGUI._newDocDefaultSettings._unicodeMode = static_cast(i); + } if (element->Attribute(L"lang", &i)) _nppGUI._newDocDefaultSettings._lang = (LangType)i; @@ -5560,7 +5571,7 @@ void NppParameters::feedGUIParameters(TiXmlNode *node) const wchar_t* val = element->Attribute(L"openAnsiAsUTF8"); if (val) - _nppGUI._newDocDefaultSettings._openAnsiAsUtf8 = (lstrcmp(val, L"yes") == 0); + _nppGUI._newDocDefaultSettings._openAnsiAsUtf8 = isCurrentSystemCodepageUTF8() ? false : (lstrcmp(val, L"yes") == 0); val = element->Attribute(L"addNewDocumentOnStartup"); if (val) diff --git a/PowerEditor/src/Parameters.h b/PowerEditor/src/Parameters.h index 53205dea3..14d74870f 100644 --- a/PowerEditor/src/Parameters.h +++ b/PowerEditor/src/Parameters.h @@ -100,18 +100,17 @@ enum class EolType: std::uint8_t EolType convertIntToFormatType(int value, EolType defvalue = EolType::osdefault); - - enum UniMode { uni8Bit = 0, // ANSI uniUTF8 = 1, // UTF-8 with BOM uni16BE = 2, // UTF-16 Big Endian with BOM uni16LE = 3, // UTF-16 Little Endian with BOM - uniCookie = 4, // UTF-8 without BOM - uni7Bit = 5, // + uniUTF8_NoBOM = 4, // UTF-8 without BOM + uni7Bit = 5, // 0 - 127 ASCII uni16BE_NoBOM = 6, // UTF-16 Big Endian without BOM uni16LE_NoBOM = 7, // UTF-16 Little Endian without BOM - uniEnd}; + uniEnd +}; enum ChangeDetect { cdDisabled = 0x0, cdEnabledOld = 0x01, cdEnabledNew = 0x02, cdAutoUpdate = 0x04, cdGo2end = 0x08 }; enum BackupFeature {bak_none = 0, bak_simple = 1, bak_verbose = 2}; @@ -612,7 +611,7 @@ private : struct NewDocDefaultSettings final { EolType _format = EolType::osdefault; - UniMode _unicodeMode = uniCookie; + UniMode _unicodeMode = uniUTF8_NoBOM; bool _openAnsiAsUtf8 = true; LangType _lang = L_TEXT; int _codepage = -1; // -1 when not using @@ -1821,6 +1820,20 @@ public: winVer getWinVersion() const {return _winVersion;}; std::wstring getWinVersionStr() const; std::wstring getWinVerBitStr() const; + + int currentSystemCodepage() const { return _currentSystemCodepage; }; + bool isCurrentSystemCodepageUTF8() const { return _currentSystemCodepage == 65001; }; + int defaultCodepage() const { // return all the codepage except UTF8 (65001) + if (isCurrentSystemCodepageUTF8()) + { + int localCodepage = 0; + // Get the system default codepage, which is the codepage before "Beta: Use Unicode UTF-8 for worldwide language support" option being checked + GetLocaleInfoEx(LOCALE_NAME_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER, reinterpret_cast(&localCodepage), 2); + return localCodepage; + } + return _currentSystemCodepage; + }; + FindHistory & getFindHistory() {return _findHistory;}; bool _isFindReplacing = false; // an on the fly variable for find/replace functions #ifndef _WIN64 @@ -2085,6 +2098,8 @@ private: bool _isPlaceHolderEnabled = false; bool _theWarningHasBeenGiven = false; + int _currentSystemCodepage = -1; + public: std::wstring getWingupFullPath() const { return _wingupFullPath; }; std::wstring getWingupParams() const { return _wingupParams; }; diff --git a/PowerEditor/src/ScintillaComponent/Buffer.cpp b/PowerEditor/src/ScintillaComponent/Buffer.cpp index 2d88e2454..09f56a382 100644 --- a/PowerEditor/src/ScintillaComponent/Buffer.cpp +++ b/PowerEditor/src/ScintillaComponent/Buffer.cpp @@ -89,7 +89,7 @@ Buffer::Buffer(FileManager * pManager, BufferID id, Document doc, DocFileStatus _unicodeMode = ndds._unicodeMode; _encoding = ndds._codepage; if (_encoding != -1) - _unicodeMode = uniCookie; + _unicodeMode = uniUTF8_NoBOM; _currentStatus = type; @@ -1005,7 +1005,7 @@ bool FileManager::reloadBuffer(BufferID id) { Buffer* buf = getBufferByID(id); Document doc = buf->getDocument(); - Utf8_16_Read UnicodeConvertor; + Utf8_16_Read unicodeConvertor; LoadedFileFormat loadedFileFormat; loadedFileFormat._encoding = buf->getEncoding(); @@ -1037,7 +1037,7 @@ bool FileManager::reloadBuffer(BufferID id) char* data = new char[blockSize + 8]; // +8 for incomplete multibyte char buf->_canNotify = false; //disable notify during file load, we don't want dirty status to be triggered - bool res = loadFileData(doc, fileSize, buf->getFullPathName(), data, &UnicodeConvertor, loadedFileFormat); + bool res = loadFileData(doc, fileSize, buf->getFullPathName(), data, &unicodeConvertor, loadedFileFormat); buf->_canNotify = true; delete[] data; @@ -1050,7 +1050,7 @@ bool FileManager::reloadBuffer(BufferID id) buf->setSavePointDirty(false); - setLoadedBufferEncodingAndEol(buf, UnicodeConvertor, loadedFileFormat._encoding, loadedFileFormat._eolFormat); + setLoadedBufferEncodingAndEol(buf, unicodeConvertor, loadedFileFormat._encoding, loadedFileFormat._eolFormat); } return res; @@ -1059,24 +1059,28 @@ bool FileManager::reloadBuffer(BufferID id) void FileManager::setLoadedBufferEncodingAndEol(Buffer* buf, const Utf8_16_Read& UnicodeConvertor, int encoding, EolType bkformat) { - if (encoding == -1) + int encoding2Set = encoding; + UniMode unimode2Set = UnicodeConvertor.getEncoding(); + + if (encoding2Set == -1) { NppParameters& nppParamInst = NppParameters::getInstance(); const NewDocDefaultSettings & ndds = (nppParamInst.getNppGUI()).getNewDocDefaultSettings(); - - UniMode um = UnicodeConvertor.getEncoding(); - if (um == uni7Bit) - um = (ndds._openAnsiAsUtf8) ? uniCookie : uni8Bit; - - buf->setUnicodeMode(um); + + if (unimode2Set == uni7Bit) + unimode2Set = (ndds._openAnsiAsUtf8) ? uniUTF8_NoBOM : uni8Bit; } else { // Test if encoding is set to UTF8 w/o BOM (usually for utf8 indicator of xml or html) - buf->setEncoding((encoding == SC_CP_UTF8)?-1:encoding); - buf->setUnicodeMode(uniCookie); + encoding2Set = ((encoding2Set == SC_CP_UTF8) ? -1 : encoding2Set); + unimode2Set = uniUTF8_NoBOM; } + buf->setEncoding(encoding2Set); + buf->setUnicodeMode(unimode2Set); + + // Since the buffer will be reloaded from the disk, EOL might have been changed if (bkformat != EolType::unknown) buf->setEolFormat(bkformat); @@ -1208,7 +1212,7 @@ bool FileManager::backupCurrentBuffer() if (buffer->isModified()) // buffer dirty and modified, write the backup file { UniMode mode = buffer->getUnicodeMode(); - if (mode == uniCookie) + if (mode == uniUTF8_NoBOM) mode = uni8Bit; //set the mode to ANSI to prevent converter from adding BOM and performing conversions, Scintilla's data can be copied directly Utf8_16_Write UnicodeConvertor; @@ -1421,7 +1425,7 @@ SavingStatus FileManager::saveBuffer(BufferID id, const wchar_t* filename, bool } UniMode mode = buffer->getUnicodeMode(); - if (mode == uniCookie) + if (mode == uniUTF8_NoBOM) mode = uni8Bit; //set the mode to ANSI to prevent converter from adding BOM and performing conversions, Scintilla's data can be copied directly Utf8_16_Write UnicodeConvertor; @@ -1942,23 +1946,34 @@ bool FileManager::loadFileData(Document doc, int64_t fileSize, const wchar_t * f if (lenFile == 0) break; + bool hasBOM = false; if (isFirstTime) { - const NppGUI& nppGui = NppParameters::getInstance().getNppGUI(); + NppParameters& nppParamInst = NppParameters::getInstance(); + const NppGUI& nppGui = nppParamInst.getNppGUI(); + + // + // Detect encoding + // // check if file contain any BOM - if (Utf8_16_Read::determineEncoding((unsigned char *)data, lenFile) != uni8Bit) - { - // if file contains any BOM, then encoding will be erased, - // and the document will be interpreted as UTF + if (Utf8_16_Read::determineEncodingFromBOM((unsigned char*)data, lenFile) != uni8Bit) + { + // if file contains any BOM, then encoding will be erased, + // and the document will be interpreted as UTF fileFormat._encoding = -1; + hasBOM = true; } else if (fileFormat._encoding == -1) { - if (nppGui._detectEncoding) + if (nppGui._detectEncoding && !isAutoDetectEncodingDisabled4Loading) fileFormat._encoding = detectCodepage(data, lenFile); - } - + } + + // + // Detect programming language + // + bool isLargeFile = fileSize >= nppGui._largeFileRestriction._largeFileSizeDefInByte; if (!isLargeFile && fileFormat._language == L_TEXT) { @@ -1969,6 +1984,7 @@ bool FileManager::loadFileData(Document doc, int64_t fileSize, const wchar_t * f isFirstTime = false; } + if (fileFormat._encoding != -1) { if (fileFormat._encoding == SC_CP_UTF8) @@ -1980,19 +1996,52 @@ bool FileManager::loadFileData(Document doc, int64_t fileSize, const wchar_t * f { WcharMbcsConvertor& wmc = WcharMbcsConvertor::getInstance(); int newDataLen = 0; - const char *newData = wmc.encode(fileFormat._encoding, SC_CP_UTF8, data, static_cast(lenFile), &newDataLen, &incompleteMultibyteChar); + const char* newData = wmc.encode(fileFormat._encoding, SC_CP_UTF8, data, static_cast(lenFile), &newDataLen, &incompleteMultibyteChar); _pscratchTilla->execute(SCI_APPENDTEXT, newDataLen, reinterpret_cast(newData)); } if (format == EolType::unknown) format = getEOLFormatForm(data, lenFile, EolType::unknown); } - else + else // (fileFormat._encoding == -1) => encoding not found yet or BOM found { + NppParameters& nppParamInst = NppParameters::getInstance(); lenConvert = unicodeConvertor->convert(data, lenFile); - _pscratchTilla->execute(SCI_APPENDTEXT, lenConvert, reinterpret_cast(unicodeConvertor->getNewBuf())); - if (format == EolType::unknown) - format = getEOLFormatForm(unicodeConvertor->getNewBuf(), unicodeConvertor->getNewSize(), EolType::unknown); + + if (!nppParamInst.isCurrentSystemCodepageUTF8()) // Default mode: all other encodings + { + _pscratchTilla->execute(SCI_APPENDTEXT, lenConvert, reinterpret_cast(unicodeConvertor->getNewBuf())); + if (format == EolType::unknown) + format = getEOLFormatForm(unicodeConvertor->getNewBuf(), unicodeConvertor->getNewSize(), EolType::unknown); + } + else // "Use Unicode UTF-8 for worldwide language support" option is enabled + { + UniMode uniMode = unicodeConvertor->getEncoding(); + + if (hasBOM || // uniUTF8, uni16BE, uni16LE + uniMode == uni16BE_NoBOM || uniMode == uni16LE_NoBOM || uniMode == uniUTF8_NoBOM || uniMode == uni7Bit) + { + if (uniMode == uni7Bit) + fileFormat._encoding = nppParamInst.currentSystemCodepage(); + + _pscratchTilla->execute(SCI_APPENDTEXT, lenConvert, reinterpret_cast(unicodeConvertor->getNewBuf())); + + if (format == EolType::unknown) + format = getEOLFormatForm(unicodeConvertor->getNewBuf(), unicodeConvertor->getNewSize(), EolType::unknown); + } + else // if (uniMode == uni8Bit) + { + WcharMbcsConvertor& wmc = WcharMbcsConvertor::getInstance(); + int newDataLen = 0; + fileFormat._encoding = nppParamInst.defaultCodepage(); + + const char* newData = wmc.encode(fileFormat._encoding, SC_CP_UTF8, data, static_cast(lenFile), &newDataLen, &incompleteMultibyteChar); + _pscratchTilla->execute(SCI_APPENDTEXT, newDataLen, reinterpret_cast(newData)); + + if (format == EolType::unknown) + format = getEOLFormatForm(data, lenFile, EolType::unknown); + } + } } sciStatus = static_cast(_pscratchTilla->execute(SCI_GETSTATUS)); @@ -2060,7 +2109,7 @@ bool FileManager::loadFileData(Document doc, int64_t fileSize, const wchar_t * f //for empty files, if the default for new files is UTF8, and "Apply to opened ANSI files" is set, apply it if ((fileSize == 0) && (fileFormat._encoding < 1)) { - if (ndds._unicodeMode == uniCookie && ndds._openAnsiAsUtf8) + if (ndds._unicodeMode == uniUTF8_NoBOM && ndds._openAnsiAsUtf8) fileFormat._encoding = SC_CP_UTF8; } } diff --git a/PowerEditor/src/ScintillaComponent/Buffer.h b/PowerEditor/src/ScintillaComponent/Buffer.h index 1a838f817..8fe35c54b 100644 --- a/PowerEditor/src/ScintillaComponent/Buffer.h +++ b/PowerEditor/src/ScintillaComponent/Buffer.h @@ -117,11 +117,16 @@ public: void removeHotSpot(Buffer * buffer) const; size_t nextUntitledNewNumber() const; + void enableAutoDetectEncoding4Loading() { isAutoDetectEncodingDisabled4Loading = false; }; + void disableAutoDetectEncoding4Loading() { isAutoDetectEncodingDisabled4Loading = true; }; // Disable the encoding auto-detection on loading file while switching among the different encoding. + // The value of isAutoDetectEncodingDisabled4Loading will be restored to false after each file loading + // to restore the encoding auto-detection ability for other file loading operations. + private: struct LoadedFileFormat { LoadedFileFormat() = default; LangType _language = L_TEXT; - int _encoding = 0; + int _encoding = uni8Bit; EolType _eolFormat = EolType::osdefault; }; @@ -137,6 +142,8 @@ private: FileManager& operator=(FileManager&&) = delete; int detectCodepage(char* buf, size_t len); + bool isAutoDetectEncodingDisabled4Loading = false; + bool loadFileData(Document doc, int64_t fileSize, const wchar_t* filename, char* buffer, Utf8_16_Read* UnicodeConvertor, LoadedFileFormat& fileFormat); LangType detectLanguageFromTextBeginning(const unsigned char *data, size_t dataLen); @@ -409,8 +416,12 @@ private: std::wstring _userLangExt; // it's useful if only (_lang == L_USER) bool _isDirty = false; EolType _eolFormat = EolType::osdefault; - UniMode _unicodeMode = uniUTF8; + + // if _encoding == -1, then _unicodeMode is used. + // otherwise _encoding is used. int _encoding = -1; + UniMode _unicodeMode = uniUTF8; + bool _isUserReadOnly = false; bool _isFromNetwork = false; bool _needLexer = false; // new buffers do not need lexing, Scintilla takes care of that diff --git a/PowerEditor/src/ScintillaComponent/ScintillaEditView.cpp b/PowerEditor/src/ScintillaComponent/ScintillaEditView.cpp index 8d3e84778..113a12936 100644 --- a/PowerEditor/src/ScintillaComponent/ScintillaEditView.cpp +++ b/PowerEditor/src/ScintillaComponent/ScintillaEditView.cpp @@ -343,7 +343,7 @@ void ScintillaEditView::init(HINSTANCE hInst, HWND hPere) // so that existing plugins using SCI_SETTECHNOLOGY behave like before } - _codepage = ::GetACP(); + _codepage = nppParams.currentSystemCodepage(); ::SetWindowLongPtr(_hSelf, GWLP_USERDATA, reinterpret_cast(this)); _callWindowProc = CallWindowProc; @@ -1012,7 +1012,7 @@ void ScintillaEditView::setUserLexer(const wchar_t *userLangName) int encoding = _currentBuffer->getEncoding(); if (encoding == -1) { - if (unicodeMode == uniUTF8 || unicodeMode == uniCookie) + if (unicodeMode == uniUTF8 || unicodeMode == uniUTF8_NoBOM) codepage = CP_UTF8; } else @@ -2461,10 +2461,12 @@ void ScintillaEditView::bufferUpdated(Buffer * buffer, int mask) { execute(SCI_SETEOLMODE, static_cast(_currentBuffer->getEolFormat())); } + if (mask & BufferChangeReadonly) { execute(SCI_SETREADONLY, _currentBuffer->isReadOnly()); } + if (mask & BufferChangeUnicode) { int enc = CP_ACP; diff --git a/PowerEditor/src/ScintillaComponent/ScintillaEditView.h b/PowerEditor/src/ScintillaComponent/ScintillaEditView.h index dedf486ea..c8a351185 100644 --- a/PowerEditor/src/ScintillaComponent/ScintillaEditView.h +++ b/PowerEditor/src/ScintillaComponent/ScintillaEditView.h @@ -874,6 +874,7 @@ protected: Buffer* _currentBuffer = nullptr; int _codepage = CP_ACP; + bool _wrapRestoreNeeded = false; bool _positionRestoreNeeded = false; uint32_t _restorePositionRetryCount = 0; diff --git a/PowerEditor/src/Utf8_16.cpp b/PowerEditor/src/Utf8_16.cpp index 861e42089..eaf1a10d1 100644 --- a/PowerEditor/src/Utf8_16.cpp +++ b/PowerEditor/src/Utf8_16.cpp @@ -140,7 +140,7 @@ size_t Utf8_16_Read::convert(char* buf, size_t len) { case uni7Bit: case uni8Bit: - case uniCookie: + case uniUTF8_NoBOM: { // Do nothing, pass through m_nAllocatedBufSize = 0; @@ -249,8 +249,9 @@ void Utf8_16_Read::determineEncoding() else { u78 detectedEncoding = utf8_7bits_8bits(); + if (detectedEncoding == utf8NoBOM) - m_eEncoding = uniCookie; + m_eEncoding = uniUTF8_NoBOM; else if (detectedEncoding == ascii7bits) m_eEncoding = uni7Bit; else //(detectedEncoding == ascii8bits) @@ -259,7 +260,7 @@ void Utf8_16_Read::determineEncoding() } } -UniMode Utf8_16_Read::determineEncoding(const unsigned char *buf, size_t bufLen) +UniMode Utf8_16_Read::determineEncodingFromBOM(const unsigned char *buf, size_t bufLen) { // detect UTF-16 big-endian with BOM if (bufLen > 1 && buf[0] == k_Boms[uni16BE][0] && buf[1] == k_Boms[uni16BE][1]) @@ -369,7 +370,7 @@ bool Utf8_16_Write::writeFile(const void* p, size_t _size) { case uni7Bit: case uni8Bit: - case uniCookie: + case uniUTF8_NoBOM: case uniUTF8: { // Normal write @@ -436,7 +437,7 @@ size_t Utf8_16_Write::convert(char* p, size_t _size) { case uni7Bit: case uni8Bit: - case uniCookie: + case uniUTF8_NoBOM: { // Normal write m_pNewBuf = new ubyte[_size]; diff --git a/PowerEditor/src/Utf8_16.h b/PowerEditor/src/Utf8_16.h index 75766dc72..5eafe5b5c 100644 --- a/PowerEditor/src/Utf8_16.h +++ b/PowerEditor/src/Utf8_16.h @@ -113,14 +113,14 @@ public: size_t getNewSize() const { return m_nNewBufSize; } UniMode getEncoding() const { return m_eEncoding; } - static UniMode determineEncoding(const unsigned char *buf, size_t bufLen); + static UniMode determineEncodingFromBOM(const unsigned char *buf, size_t bufLen); protected: void determineEncoding(); u78 utf8_7bits_8bits(); private: - UniMode m_eEncoding = uni8Bit; + UniMode m_eEncoding = uni8Bit; ubyte* m_pBuf = nullptr; ubyte* m_pNewBuf = nullptr; // size of the new buffer diff --git a/PowerEditor/src/WinControls/AboutDlg/AboutDlg.cpp b/PowerEditor/src/WinControls/AboutDlg/AboutDlg.cpp index dc23882f4..d3311e23e 100644 --- a/PowerEditor/src/WinControls/AboutDlg/AboutDlg.cpp +++ b/PowerEditor/src/WinControls/AboutDlg/AboutDlg.cpp @@ -516,7 +516,7 @@ intptr_t CALLBACK DebugInfoDlg::run_dlgProc(UINT message, WPARAM wParam, LPARAM { constexpr size_t bufSizeACP = 32; wchar_t szACP[bufSizeACP] = { '\0' }; - swprintf(szACP, bufSizeACP, L"%u", ::GetACP()); + swprintf(szACP, bufSizeACP, L"%u", nppParam.currentSystemCodepage()); _debugInfoStr += L"Current ANSI codepage: "; _debugInfoStr += szACP; _debugInfoStr += L"\r\n"; diff --git a/PowerEditor/src/WinControls/Preference/preference.rc b/PowerEditor/src/WinControls/Preference/preference.rc index 31ae5e6da..c13efd9db 100644 --- a/PowerEditor/src/WinControls/Preference/preference.rc +++ b/PowerEditor/src/WinControls/Preference/preference.rc @@ -255,7 +255,7 @@ BEGIN CONTROL "Macintosh (CR)",IDC_RADIO_F_MAC,"Button",BS_AUTORADIOBUTTON,65,80,70,10 GROUPBOX "Encoding",IDC_ENCODING_STATIC,232,28,175,122,BS_CENTER CONTROL "ANSI",IDC_RADIO_ANSI,"Button",BS_AUTORADIOBUTTON | WS_GROUP,242,39,150,10 - CONTROL "UTF-8",IDC_RADIO_UTF8SANSBOM,"Button",BS_AUTORADIOBUTTON,242,53,150,10 + CONTROL "UTF-8",IDC_RADIO_UTF8_NO_BOM,"Button",BS_AUTORADIOBUTTON,242,53,150,10 CONTROL "Apply to opened ANSI files",IDC_CHECK_OPENANSIASUTF8, "Button",BS_AUTOCHECKBOX | WS_TABSTOP,252,65,150,10 CONTROL "UTF-8 with BOM",IDC_RADIO_UTF8,"Button",BS_AUTORADIOBUTTON,242,79,150,10 CONTROL "UTF-16 Big Endian with BOM",IDC_RADIO_UTF16BIG,"Button",BS_AUTORADIOBUTTON,242,95,150,10 diff --git a/PowerEditor/src/WinControls/Preference/preferenceDlg.cpp b/PowerEditor/src/WinControls/Preference/preferenceDlg.cpp index 12990a707..a431f4c91 100644 --- a/PowerEditor/src/WinControls/Preference/preferenceDlg.cpp +++ b/PowerEditor/src/WinControls/Preference/preferenceDlg.cpp @@ -3360,6 +3360,14 @@ intptr_t CALLBACK MiscSubDlg::run_dlgProc(UINT message, WPARAM wParam, LPARAM) return FALSE; } + +void NewDocumentSubDlg::makeOpenAnsiAsUtf8(bool doIt) const +{ + if (!doIt) + ::SendDlgItemMessage(_hSelf, IDC_CHECK_OPENANSIASUTF8, BM_SETCHECK, BST_UNCHECKED, 0); + ::EnableWindow(::GetDlgItem(_hSelf, IDC_CHECK_OPENANSIASUTF8), doIt && !NppParameters::getInstance().isCurrentSystemCodepageUTF8()); +} + intptr_t CALLBACK NewDocumentSubDlg::run_dlgProc(UINT message, WPARAM wParam, LPARAM) { NppParameters& nppParam = NppParameters::getInstance(); @@ -3399,8 +3407,8 @@ intptr_t CALLBACK NewDocumentSubDlg::run_dlgProc(UINT message, WPARAM wParam, LP case uniUTF8 : ID2Check = IDC_RADIO_UTF8; break; - case uniCookie : - ID2Check = IDC_RADIO_UTF8SANSBOM; + case uniUTF8_NoBOM : + ID2Check = IDC_RADIO_UTF8_NO_BOM; break; default : //uni8Bit @@ -3435,8 +3443,15 @@ intptr_t CALLBACK NewDocumentSubDlg::run_dlgProc(UINT message, WPARAM wParam, LP } ::SendDlgItemMessage(_hSelf, ID2Check, BM_SETCHECK, BST_CHECKED, 0); - ::SendDlgItemMessage(_hSelf, IDC_CHECK_OPENANSIASUTF8, BM_SETCHECK, (ID2Check == IDC_RADIO_UTF8SANSBOM && ndds._openAnsiAsUtf8)?BST_CHECKED:BST_UNCHECKED, 0); - ::EnableWindow(::GetDlgItem(_hSelf, IDC_CHECK_OPENANSIASUTF8), ID2Check == IDC_RADIO_UTF8SANSBOM); + ::SendDlgItemMessage(_hSelf, IDC_CHECK_OPENANSIASUTF8, BM_SETCHECK, (ID2Check == IDC_RADIO_UTF8_NO_BOM && ndds._openAnsiAsUtf8)?BST_CHECKED:BST_UNCHECKED, 0); + + bool isEnableAnsiAsUTF = ID2Check == IDC_RADIO_UTF8_NO_BOM; + if (nppParam.isCurrentSystemCodepageUTF8()) + { + isEnableAnsiAsUTF = false; + ::EnableWindow(::GetDlgItem(_hSelf, IDC_RADIO_ANSI), false); + } + ::EnableWindow(::GetDlgItem(_hSelf, IDC_CHECK_OPENANSIASUTF8), isEnableAnsiAsUTF); for (int i = L_TEXT + 1 ; i < nppParam.L_END ; ++i) // Skip L_TEXT { @@ -3525,8 +3540,8 @@ intptr_t CALLBACK NewDocumentSubDlg::run_dlgProc(UINT message, WPARAM wParam, LP ndds._codepage = -1; ::EnableWindow(::GetDlgItem(_hSelf, IDC_COMBO_OTHERCP), false); return TRUE; - case IDC_RADIO_UTF8SANSBOM: - ndds._unicodeMode = uniCookie; + case IDC_RADIO_UTF8_NO_BOM: + ndds._unicodeMode = uniUTF8_NoBOM; makeOpenAnsiAsUtf8(true); ndds._codepage = -1; ::EnableWindow(::GetDlgItem(_hSelf, IDC_COMBO_OTHERCP), false); diff --git a/PowerEditor/src/WinControls/Preference/preferenceDlg.h b/PowerEditor/src/WinControls/Preference/preferenceDlg.h index 1624e9716..5f0506a9b 100644 --- a/PowerEditor/src/WinControls/Preference/preferenceDlg.h +++ b/PowerEditor/src/WinControls/Preference/preferenceDlg.h @@ -186,11 +186,7 @@ public : NewDocumentSubDlg() = default; private : - void makeOpenAnsiAsUtf8(bool doIt){ - if (!doIt) - ::SendDlgItemMessage(_hSelf, IDC_CHECK_OPENANSIASUTF8, BM_SETCHECK, BST_UNCHECKED, 0); - ::EnableWindow(::GetDlgItem(_hSelf, IDC_CHECK_OPENANSIASUTF8), doIt); - }; + void makeOpenAnsiAsUtf8(bool doIt) const; intptr_t CALLBACK run_dlgProc(UINT message, WPARAM wParam, LPARAM lParam) override; }; diff --git a/PowerEditor/src/WinControls/Preference/preference_rc.h b/PowerEditor/src/WinControls/Preference/preference_rc.h index 608b80447..15b616862 100644 --- a/PowerEditor/src/WinControls/Preference/preference_rc.h +++ b/PowerEditor/src/WinControls/Preference/preference_rc.h @@ -295,7 +295,7 @@ #define IDC_RADIO_F_MAC 6404 #define IDC_ENCODING_STATIC 6405 #define IDC_RADIO_ANSI 6406 -#define IDC_RADIO_UTF8SANSBOM 6407 +#define IDC_RADIO_UTF8_NO_BOM 6407 #define IDC_RADIO_UTF8 6408 #define IDC_RADIO_UTF16BIG 6409 #define IDC_RADIO_UTF16SMALL 6410