diff --git a/PowerEditor/src/Notepad_plus.cpp b/PowerEditor/src/Notepad_plus.cpp index 16c09ddae..70e102089 100644 --- a/PowerEditor/src/Notepad_plus.cpp +++ b/PowerEditor/src/Notepad_plus.cpp @@ -638,9 +638,17 @@ LRESULT Notepad_plus::init(HWND hwnd) ::DrawMenuBar(hwnd); - //Windows menu + // Windows menu _windowsMenu.init(_mainMenuHandle); + // if user set system codepage to UTF8, ANSI encoding capacity should be disable once for all + if (NppParameters::getInstance().isCurrentSystemCodepageUTF8()) + { + enableCommand(IDM_FORMAT_ANSI, false, MENU); + enableCommand(IDM_FORMAT_CONV2_ANSI, false, MENU); + } + + // Update Scintilla context menu strings (translated) vector & tmp = nppParam.getContextMenuItems(); size_t len = tmp.size(); @@ -2510,7 +2518,7 @@ int Notepad_plus::doDeleteOrNot(const wchar_t *fn) void Notepad_plus::enableMenu(int cmdID, bool doEnable) const { - int flag = doEnable?MF_ENABLED | MF_BYCOMMAND:MF_DISABLED | MF_GRAYED | MF_BYCOMMAND; + int flag = doEnable ? MF_ENABLED | MF_BYCOMMAND : MF_GRAYED | MF_DISABLED | MF_BYCOMMAND; ::EnableMenuItem(_mainMenuHandle, cmdID, flag); } @@ -2724,6 +2732,9 @@ void Notepad_plus::setupColorSampleBitmapsOnMainMenuItems() } } +// doCheck searches for the menu item matching the provided id across the main menu and all of its submenus, +// once the target id is found, it ensures that item is checked, and all other menu items at that same level are automatically unchecked. +// If id is -1, then all the menu items are unchecked. bool doCheck(HMENU mainHandle, int id) { MENUITEMINFO mii{}; @@ -2735,7 +2746,7 @@ bool doCheck(HMENU mainHandle, int id) for (int i = 0; i < count; i++) { ::GetMenuItemInfo(mainHandle, i, MF_BYPOSITION, &mii); - if (mii.fType == MFT_RADIOCHECK || mii.fType == MFT_STRING) + if (!(mii.fState & MFS_GRAYED) && (mii.fType == MFT_RADIOCHECK || mii.fType == MFT_STRING)) { bool checked = mii.hSubMenu ? doCheck(mii.hSubMenu, id) : (mii.wID == (unsigned int)id); if (checked) @@ -3072,7 +3083,7 @@ void Notepad_plus::setUniModeText() uniModeTextString = L"UTF-16 Big Endian"; break; case uni16LE_NoBOM: uniModeTextString = L"UTF-16 Little Endian"; break; - case uniCookie: + case uniUTF8_NoBOM: uniModeTextString = L"UTF-8"; break; default : uniModeTextString = L"ANSI"; @@ -4191,7 +4202,7 @@ size_t Notepad_plus::getSelectedCharNumber(UniMode u) { size_t result = 0; size_t numSel = _pEditView->execute(SCI_GETSELECTIONS); - if (u == uniUTF8 || u == uniCookie) + if (u == uniUTF8 || u == uniUTF8_NoBOM) { for (size_t i = 0; i < numSel; ++i) { @@ -4254,7 +4265,7 @@ static inline size_t countUtf8Characters(const unsigned char *buf, size_t pos, s size_t Notepad_plus::getCurrentDocCharCount(UniMode u) { - if (u != uniUTF8 && u != uniCookie) + if (u != uniUTF8 && u != uniUTF8_NoBOM) { size_t numLines = _pEditView->execute(SCI_GETLINECOUNT); auto result = _pEditView->execute(SCI_GETLENGTH); @@ -4299,7 +4310,7 @@ size_t Notepad_plus::getCurrentDocCharCount(UniMode u) bool Notepad_plus::isFormatUnicode(UniMode u) { - return (u != uni8Bit && u != uni7Bit && u != uniUTF8 && u != uniCookie); + return (u != uni8Bit && u != uni7Bit && u != uniUTF8 && u != uniUTF8_NoBOM); } int Notepad_plus::getBOMSize(UniMode u) @@ -5160,11 +5171,11 @@ void Notepad_plus::checkUnicodeMenuItems() const int id = -1; switch (um) { - case uniUTF8 : id = IDM_FORMAT_UTF_8; break; - case uni16BE : id = IDM_FORMAT_UTF_16BE; break; - case uni16LE : id = IDM_FORMAT_UTF_16LE; break; - case uniCookie : id = IDM_FORMAT_AS_UTF_8; break; - case uni8Bit : id = IDM_FORMAT_ANSI; break; + case uniUTF8 : id = IDM_FORMAT_UTF_8; break; + case uni16BE : id = IDM_FORMAT_UTF_16BE; break; + case uni16LE : id = IDM_FORMAT_UTF_16LE; break; + case uniUTF8_NoBOM : id = IDM_FORMAT_AS_UTF_8; break; + case uni8Bit : id = IDM_FORMAT_ANSI; break; case uni7Bit: case uni16BE_NoBOM: @@ -5174,24 +5185,21 @@ void Notepad_plus::checkUnicodeMenuItems() const break; } - if (encoding == -1) + HMENU _formatMenuHandle = ::GetSubMenu(_mainMenuHandle, MENUINDEX_FORMAT); + + if (encoding == -1) // encoding is not used, so use uniMode to check menu item { - // Uncheck all in the sub encoding menu - HMENU _formatMenuHandle = ::GetSubMenu(_mainMenuHandle, MENUINDEX_FORMAT); + // Uncheck all in the main & sub encoding menu doCheck(_formatMenuHandle, -1); - if (id == -1) //um == uni16BE_NoBOM || um == uni16LE_NoBOM + if (id != -1) { - // Uncheck all in the main encoding menu - ::CheckMenuRadioItem(_mainMenuHandle, IDM_FORMAT_ANSI, IDM_FORMAT_AS_UTF_8, IDM_FORMAT_ANSI, MF_BYCOMMAND); - ::CheckMenuItem(_mainMenuHandle, IDM_FORMAT_ANSI, MF_UNCHECKED | MF_BYCOMMAND); - } - else - { - ::CheckMenuRadioItem(_mainMenuHandle, IDM_FORMAT_ANSI, IDM_FORMAT_AS_UTF_8, id, MF_BYCOMMAND); + DWORD state = GetMenuState(_formatMenuHandle, IDM_FORMAT_ANSI, MF_BYCOMMAND); + ::CheckMenuRadioItem(_mainMenuHandle, (state & MFS_GRAYED) ? IDM_FORMAT_UTF_8 : IDM_FORMAT_ANSI, IDM_FORMAT_AS_UTF_8, id, MF_BYCOMMAND); } + // else if (id == -1) => um == uni16BE_NoBOM || um == uni16LE_NoBOM, let all items unchecked. } - else + else // encoding is used { const EncodingMapper& em = EncodingMapper::getInstance(); int cmdID = em.getIndexFromEncoding(encoding); @@ -5202,12 +5210,7 @@ void Notepad_plus::checkUnicodeMenuItems() const } cmdID += IDM_FORMAT_ENCODE; - // Uncheck all in the main encoding menu - ::CheckMenuRadioItem(_mainMenuHandle, IDM_FORMAT_ANSI, IDM_FORMAT_AS_UTF_8, IDM_FORMAT_ANSI, MF_BYCOMMAND); - ::CheckMenuItem(_mainMenuHandle, IDM_FORMAT_ANSI, MF_UNCHECKED | MF_BYCOMMAND); - - // Check the encoding item - HMENU _formatMenuHandle = ::GetSubMenu(_mainMenuHandle, MENUINDEX_FORMAT); + // Check the encoding item doCheck(_formatMenuHandle, cmdID); } } diff --git a/PowerEditor/src/NppCommands.cpp b/PowerEditor/src/NppCommands.cpp index 6e7cd243b..ef7e19b1a 100644 --- a/PowerEditor/src/NppCommands.cpp +++ b/PowerEditor/src/NppCommands.cpp @@ -2757,15 +2757,25 @@ void Notepad_plus::command(int id) case IDM_FORMAT_UTF_16LE : case IDM_FORMAT_AS_UTF_8 : { + bool isUTF8System = NppParameters::getInstance().isCurrentSystemCodepageUTF8(); + + if (isUTF8System && id == IDM_FORMAT_ANSI) + { + return; + } + Buffer * buf = _pEditView->getCurrentBuffer(); + UniMode originalUm = buf->getUnicodeMode(); + int originalEncoding = buf->getEncoding(); + UniMode um; bool shouldBeDirty = true; switch (id) { case IDM_FORMAT_AS_UTF_8: - shouldBeDirty = buf->getUnicodeMode() != uni8Bit; - um = uniCookie; + shouldBeDirty = originalUm != uni8Bit; + um = uniUTF8_NoBOM; break; case IDM_FORMAT_UTF_8: @@ -2781,11 +2791,11 @@ void Notepad_plus::command(int id) break; default : // IDM_FORMAT_ANSI - shouldBeDirty = buf->getUnicodeMode() != uniCookie; + shouldBeDirty = originalUm != uniUTF8_NoBOM; um = uni8Bit; } - if (buf->getEncoding() != -1) + if (originalEncoding != -1) { if (buf->isDirty()) { @@ -2801,7 +2811,9 @@ void Notepad_plus::command(int id) _pEditView->execute(SCI_EMPTYUNDOBUFFER); } else + { return; + } } if (_pEditView->execute(SCI_CANUNDO) == TRUE) @@ -2816,26 +2828,35 @@ void Notepad_plus::command(int id) // Do nothing } else + { return; + } } buf->setEncoding(-1); if (um == uni8Bit) - _pEditView->execute(SCI_SETCODEPAGE, CP_ACP); + { + NppParameters& nppParams = NppParameters::getInstance(); + _pEditView->execute(SCI_SETCODEPAGE, !nppParams.isCurrentSystemCodepageUTF8() ? CP_ACP : SC_CP_UTF8); + } else buf->setUnicodeMode(um); + + MainFileManager.disableAutoDetectEncoding4Loading(); fileReload(); + MainFileManager.enableAutoDetectEncoding4Loading(); } else { - if (buf->getUnicodeMode() != um) + if (originalUm != um) { buf->setUnicodeMode(um); if (shouldBeDirty) buf->setDirty(true); } } + break; } @@ -2929,8 +2950,11 @@ void Notepad_plus::command(int id) if (!buf->isDirty()) { buf->setEncoding(encoding); - buf->setUnicodeMode(uniCookie); + buf->setUnicodeMode(uniUTF8_NoBOM); + + MainFileManager.disableAutoDetectEncoding4Loading(); fileReload(); + MainFileManager.enableAutoDetectEncoding4Loading(); } break; } @@ -2971,13 +2995,13 @@ void Notepad_plus::command(int id) if (encoding != -1) { buf->setDirty(true); - buf->setUnicodeMode(uniCookie); + buf->setUnicodeMode(uniUTF8_NoBOM); buf->setEncoding(-1); return; } idEncoding = IDM_FORMAT_AS_UTF_8; - if (um == uniCookie) + if (um == uniUTF8_NoBOM) return; if (um != uni8Bit) diff --git a/PowerEditor/src/Parameters.cpp b/PowerEditor/src/Parameters.cpp index b09215ffb..291c26570 100644 --- a/PowerEditor/src/Parameters.cpp +++ b/PowerEditor/src/Parameters.cpp @@ -1003,9 +1003,12 @@ winVer NppParameters::getWindowsVersion() NppParameters::NppParameters() { - //Get windows version + // Get windows version _winVersion = getWindowsVersion(); + // Get current system code page + _currentSystemCodepage = GetACP(); + // Prepare for default path wchar_t nppPath[MAX_PATH]; ::GetModuleFileName(NULL, nppPath, MAX_PATH); @@ -5550,7 +5553,15 @@ void NppParameters::feedGUIParameters(TiXmlNode *node) } if (element->Attribute(L"encoding", &i)) - _nppGUI._newDocDefaultSettings._unicodeMode = (UniMode)i; + { + if (isCurrentSystemCodepageUTF8() // "Beta: Use Unicode UTF-8 for worldwide language support" option is checked in Windows + && static_cast(i) == uni8Bit) // Notepad++ default new document setting is ANSI + { + // Force Notepad++ default new document setting from ANSI to UTF-8 + i = static_cast(uniUTF8); + } + _nppGUI._newDocDefaultSettings._unicodeMode = static_cast(i); + } if (element->Attribute(L"lang", &i)) _nppGUI._newDocDefaultSettings._lang = (LangType)i; @@ -5560,7 +5571,7 @@ void NppParameters::feedGUIParameters(TiXmlNode *node) const wchar_t* val = element->Attribute(L"openAnsiAsUTF8"); if (val) - _nppGUI._newDocDefaultSettings._openAnsiAsUtf8 = (lstrcmp(val, L"yes") == 0); + _nppGUI._newDocDefaultSettings._openAnsiAsUtf8 = isCurrentSystemCodepageUTF8() ? false : (lstrcmp(val, L"yes") == 0); val = element->Attribute(L"addNewDocumentOnStartup"); if (val) diff --git a/PowerEditor/src/Parameters.h b/PowerEditor/src/Parameters.h index 53205dea3..14d74870f 100644 --- a/PowerEditor/src/Parameters.h +++ b/PowerEditor/src/Parameters.h @@ -100,18 +100,17 @@ enum class EolType: std::uint8_t EolType convertIntToFormatType(int value, EolType defvalue = EolType::osdefault); - - enum UniMode { uni8Bit = 0, // ANSI uniUTF8 = 1, // UTF-8 with BOM uni16BE = 2, // UTF-16 Big Endian with BOM uni16LE = 3, // UTF-16 Little Endian with BOM - uniCookie = 4, // UTF-8 without BOM - uni7Bit = 5, // + uniUTF8_NoBOM = 4, // UTF-8 without BOM + uni7Bit = 5, // 0 - 127 ASCII uni16BE_NoBOM = 6, // UTF-16 Big Endian without BOM uni16LE_NoBOM = 7, // UTF-16 Little Endian without BOM - uniEnd}; + uniEnd +}; enum ChangeDetect { cdDisabled = 0x0, cdEnabledOld = 0x01, cdEnabledNew = 0x02, cdAutoUpdate = 0x04, cdGo2end = 0x08 }; enum BackupFeature {bak_none = 0, bak_simple = 1, bak_verbose = 2}; @@ -612,7 +611,7 @@ private : struct NewDocDefaultSettings final { EolType _format = EolType::osdefault; - UniMode _unicodeMode = uniCookie; + UniMode _unicodeMode = uniUTF8_NoBOM; bool _openAnsiAsUtf8 = true; LangType _lang = L_TEXT; int _codepage = -1; // -1 when not using @@ -1821,6 +1820,20 @@ public: winVer getWinVersion() const {return _winVersion;}; std::wstring getWinVersionStr() const; std::wstring getWinVerBitStr() const; + + int currentSystemCodepage() const { return _currentSystemCodepage; }; + bool isCurrentSystemCodepageUTF8() const { return _currentSystemCodepage == 65001; }; + int defaultCodepage() const { // return all the codepage except UTF8 (65001) + if (isCurrentSystemCodepageUTF8()) + { + int localCodepage = 0; + // Get the system default codepage, which is the codepage before "Beta: Use Unicode UTF-8 for worldwide language support" option being checked + GetLocaleInfoEx(LOCALE_NAME_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER, reinterpret_cast(&localCodepage), 2); + return localCodepage; + } + return _currentSystemCodepage; + }; + FindHistory & getFindHistory() {return _findHistory;}; bool _isFindReplacing = false; // an on the fly variable for find/replace functions #ifndef _WIN64 @@ -2085,6 +2098,8 @@ private: bool _isPlaceHolderEnabled = false; bool _theWarningHasBeenGiven = false; + int _currentSystemCodepage = -1; + public: std::wstring getWingupFullPath() const { return _wingupFullPath; }; std::wstring getWingupParams() const { return _wingupParams; }; diff --git a/PowerEditor/src/ScintillaComponent/Buffer.cpp b/PowerEditor/src/ScintillaComponent/Buffer.cpp index 2d88e2454..09f56a382 100644 --- a/PowerEditor/src/ScintillaComponent/Buffer.cpp +++ b/PowerEditor/src/ScintillaComponent/Buffer.cpp @@ -89,7 +89,7 @@ Buffer::Buffer(FileManager * pManager, BufferID id, Document doc, DocFileStatus _unicodeMode = ndds._unicodeMode; _encoding = ndds._codepage; if (_encoding != -1) - _unicodeMode = uniCookie; + _unicodeMode = uniUTF8_NoBOM; _currentStatus = type; @@ -1005,7 +1005,7 @@ bool FileManager::reloadBuffer(BufferID id) { Buffer* buf = getBufferByID(id); Document doc = buf->getDocument(); - Utf8_16_Read UnicodeConvertor; + Utf8_16_Read unicodeConvertor; LoadedFileFormat loadedFileFormat; loadedFileFormat._encoding = buf->getEncoding(); @@ -1037,7 +1037,7 @@ bool FileManager::reloadBuffer(BufferID id) char* data = new char[blockSize + 8]; // +8 for incomplete multibyte char buf->_canNotify = false; //disable notify during file load, we don't want dirty status to be triggered - bool res = loadFileData(doc, fileSize, buf->getFullPathName(), data, &UnicodeConvertor, loadedFileFormat); + bool res = loadFileData(doc, fileSize, buf->getFullPathName(), data, &unicodeConvertor, loadedFileFormat); buf->_canNotify = true; delete[] data; @@ -1050,7 +1050,7 @@ bool FileManager::reloadBuffer(BufferID id) buf->setSavePointDirty(false); - setLoadedBufferEncodingAndEol(buf, UnicodeConvertor, loadedFileFormat._encoding, loadedFileFormat._eolFormat); + setLoadedBufferEncodingAndEol(buf, unicodeConvertor, loadedFileFormat._encoding, loadedFileFormat._eolFormat); } return res; @@ -1059,24 +1059,28 @@ bool FileManager::reloadBuffer(BufferID id) void FileManager::setLoadedBufferEncodingAndEol(Buffer* buf, const Utf8_16_Read& UnicodeConvertor, int encoding, EolType bkformat) { - if (encoding == -1) + int encoding2Set = encoding; + UniMode unimode2Set = UnicodeConvertor.getEncoding(); + + if (encoding2Set == -1) { NppParameters& nppParamInst = NppParameters::getInstance(); const NewDocDefaultSettings & ndds = (nppParamInst.getNppGUI()).getNewDocDefaultSettings(); - - UniMode um = UnicodeConvertor.getEncoding(); - if (um == uni7Bit) - um = (ndds._openAnsiAsUtf8) ? uniCookie : uni8Bit; - - buf->setUnicodeMode(um); + + if (unimode2Set == uni7Bit) + unimode2Set = (ndds._openAnsiAsUtf8) ? uniUTF8_NoBOM : uni8Bit; } else { // Test if encoding is set to UTF8 w/o BOM (usually for utf8 indicator of xml or html) - buf->setEncoding((encoding == SC_CP_UTF8)?-1:encoding); - buf->setUnicodeMode(uniCookie); + encoding2Set = ((encoding2Set == SC_CP_UTF8) ? -1 : encoding2Set); + unimode2Set = uniUTF8_NoBOM; } + buf->setEncoding(encoding2Set); + buf->setUnicodeMode(unimode2Set); + + // Since the buffer will be reloaded from the disk, EOL might have been changed if (bkformat != EolType::unknown) buf->setEolFormat(bkformat); @@ -1208,7 +1212,7 @@ bool FileManager::backupCurrentBuffer() if (buffer->isModified()) // buffer dirty and modified, write the backup file { UniMode mode = buffer->getUnicodeMode(); - if (mode == uniCookie) + if (mode == uniUTF8_NoBOM) mode = uni8Bit; //set the mode to ANSI to prevent converter from adding BOM and performing conversions, Scintilla's data can be copied directly Utf8_16_Write UnicodeConvertor; @@ -1421,7 +1425,7 @@ SavingStatus FileManager::saveBuffer(BufferID id, const wchar_t* filename, bool } UniMode mode = buffer->getUnicodeMode(); - if (mode == uniCookie) + if (mode == uniUTF8_NoBOM) mode = uni8Bit; //set the mode to ANSI to prevent converter from adding BOM and performing conversions, Scintilla's data can be copied directly Utf8_16_Write UnicodeConvertor; @@ -1942,23 +1946,34 @@ bool FileManager::loadFileData(Document doc, int64_t fileSize, const wchar_t * f if (lenFile == 0) break; + bool hasBOM = false; if (isFirstTime) { - const NppGUI& nppGui = NppParameters::getInstance().getNppGUI(); + NppParameters& nppParamInst = NppParameters::getInstance(); + const NppGUI& nppGui = nppParamInst.getNppGUI(); + + // + // Detect encoding + // // check if file contain any BOM - if (Utf8_16_Read::determineEncoding((unsigned char *)data, lenFile) != uni8Bit) - { - // if file contains any BOM, then encoding will be erased, - // and the document will be interpreted as UTF + if (Utf8_16_Read::determineEncodingFromBOM((unsigned char*)data, lenFile) != uni8Bit) + { + // if file contains any BOM, then encoding will be erased, + // and the document will be interpreted as UTF fileFormat._encoding = -1; + hasBOM = true; } else if (fileFormat._encoding == -1) { - if (nppGui._detectEncoding) + if (nppGui._detectEncoding && !isAutoDetectEncodingDisabled4Loading) fileFormat._encoding = detectCodepage(data, lenFile); - } - + } + + // + // Detect programming language + // + bool isLargeFile = fileSize >= nppGui._largeFileRestriction._largeFileSizeDefInByte; if (!isLargeFile && fileFormat._language == L_TEXT) { @@ -1969,6 +1984,7 @@ bool FileManager::loadFileData(Document doc, int64_t fileSize, const wchar_t * f isFirstTime = false; } + if (fileFormat._encoding != -1) { if (fileFormat._encoding == SC_CP_UTF8) @@ -1980,19 +1996,52 @@ bool FileManager::loadFileData(Document doc, int64_t fileSize, const wchar_t * f { WcharMbcsConvertor& wmc = WcharMbcsConvertor::getInstance(); int newDataLen = 0; - const char *newData = wmc.encode(fileFormat._encoding, SC_CP_UTF8, data, static_cast(lenFile), &newDataLen, &incompleteMultibyteChar); + const char* newData = wmc.encode(fileFormat._encoding, SC_CP_UTF8, data, static_cast(lenFile), &newDataLen, &incompleteMultibyteChar); _pscratchTilla->execute(SCI_APPENDTEXT, newDataLen, reinterpret_cast(newData)); } if (format == EolType::unknown) format = getEOLFormatForm(data, lenFile, EolType::unknown); } - else + else // (fileFormat._encoding == -1) => encoding not found yet or BOM found { + NppParameters& nppParamInst = NppParameters::getInstance(); lenConvert = unicodeConvertor->convert(data, lenFile); - _pscratchTilla->execute(SCI_APPENDTEXT, lenConvert, reinterpret_cast(unicodeConvertor->getNewBuf())); - if (format == EolType::unknown) - format = getEOLFormatForm(unicodeConvertor->getNewBuf(), unicodeConvertor->getNewSize(), EolType::unknown); + + if (!nppParamInst.isCurrentSystemCodepageUTF8()) // Default mode: all other encodings + { + _pscratchTilla->execute(SCI_APPENDTEXT, lenConvert, reinterpret_cast(unicodeConvertor->getNewBuf())); + if (format == EolType::unknown) + format = getEOLFormatForm(unicodeConvertor->getNewBuf(), unicodeConvertor->getNewSize(), EolType::unknown); + } + else // "Use Unicode UTF-8 for worldwide language support" option is enabled + { + UniMode uniMode = unicodeConvertor->getEncoding(); + + if (hasBOM || // uniUTF8, uni16BE, uni16LE + uniMode == uni16BE_NoBOM || uniMode == uni16LE_NoBOM || uniMode == uniUTF8_NoBOM || uniMode == uni7Bit) + { + if (uniMode == uni7Bit) + fileFormat._encoding = nppParamInst.currentSystemCodepage(); + + _pscratchTilla->execute(SCI_APPENDTEXT, lenConvert, reinterpret_cast(unicodeConvertor->getNewBuf())); + + if (format == EolType::unknown) + format = getEOLFormatForm(unicodeConvertor->getNewBuf(), unicodeConvertor->getNewSize(), EolType::unknown); + } + else // if (uniMode == uni8Bit) + { + WcharMbcsConvertor& wmc = WcharMbcsConvertor::getInstance(); + int newDataLen = 0; + fileFormat._encoding = nppParamInst.defaultCodepage(); + + const char* newData = wmc.encode(fileFormat._encoding, SC_CP_UTF8, data, static_cast(lenFile), &newDataLen, &incompleteMultibyteChar); + _pscratchTilla->execute(SCI_APPENDTEXT, newDataLen, reinterpret_cast(newData)); + + if (format == EolType::unknown) + format = getEOLFormatForm(data, lenFile, EolType::unknown); + } + } } sciStatus = static_cast(_pscratchTilla->execute(SCI_GETSTATUS)); @@ -2060,7 +2109,7 @@ bool FileManager::loadFileData(Document doc, int64_t fileSize, const wchar_t * f //for empty files, if the default for new files is UTF8, and "Apply to opened ANSI files" is set, apply it if ((fileSize == 0) && (fileFormat._encoding < 1)) { - if (ndds._unicodeMode == uniCookie && ndds._openAnsiAsUtf8) + if (ndds._unicodeMode == uniUTF8_NoBOM && ndds._openAnsiAsUtf8) fileFormat._encoding = SC_CP_UTF8; } } diff --git a/PowerEditor/src/ScintillaComponent/Buffer.h b/PowerEditor/src/ScintillaComponent/Buffer.h index 1a838f817..8fe35c54b 100644 --- a/PowerEditor/src/ScintillaComponent/Buffer.h +++ b/PowerEditor/src/ScintillaComponent/Buffer.h @@ -117,11 +117,16 @@ public: void removeHotSpot(Buffer * buffer) const; size_t nextUntitledNewNumber() const; + void enableAutoDetectEncoding4Loading() { isAutoDetectEncodingDisabled4Loading = false; }; + void disableAutoDetectEncoding4Loading() { isAutoDetectEncodingDisabled4Loading = true; }; // Disable the encoding auto-detection on loading file while switching among the different encoding. + // The value of isAutoDetectEncodingDisabled4Loading will be restored to false after each file loading + // to restore the encoding auto-detection ability for other file loading operations. + private: struct LoadedFileFormat { LoadedFileFormat() = default; LangType _language = L_TEXT; - int _encoding = 0; + int _encoding = uni8Bit; EolType _eolFormat = EolType::osdefault; }; @@ -137,6 +142,8 @@ private: FileManager& operator=(FileManager&&) = delete; int detectCodepage(char* buf, size_t len); + bool isAutoDetectEncodingDisabled4Loading = false; + bool loadFileData(Document doc, int64_t fileSize, const wchar_t* filename, char* buffer, Utf8_16_Read* UnicodeConvertor, LoadedFileFormat& fileFormat); LangType detectLanguageFromTextBeginning(const unsigned char *data, size_t dataLen); @@ -409,8 +416,12 @@ private: std::wstring _userLangExt; // it's useful if only (_lang == L_USER) bool _isDirty = false; EolType _eolFormat = EolType::osdefault; - UniMode _unicodeMode = uniUTF8; + + // if _encoding == -1, then _unicodeMode is used. + // otherwise _encoding is used. int _encoding = -1; + UniMode _unicodeMode = uniUTF8; + bool _isUserReadOnly = false; bool _isFromNetwork = false; bool _needLexer = false; // new buffers do not need lexing, Scintilla takes care of that diff --git a/PowerEditor/src/ScintillaComponent/ScintillaEditView.cpp b/PowerEditor/src/ScintillaComponent/ScintillaEditView.cpp index 8d3e84778..113a12936 100644 --- a/PowerEditor/src/ScintillaComponent/ScintillaEditView.cpp +++ b/PowerEditor/src/ScintillaComponent/ScintillaEditView.cpp @@ -343,7 +343,7 @@ void ScintillaEditView::init(HINSTANCE hInst, HWND hPere) // so that existing plugins using SCI_SETTECHNOLOGY behave like before } - _codepage = ::GetACP(); + _codepage = nppParams.currentSystemCodepage(); ::SetWindowLongPtr(_hSelf, GWLP_USERDATA, reinterpret_cast(this)); _callWindowProc = CallWindowProc; @@ -1012,7 +1012,7 @@ void ScintillaEditView::setUserLexer(const wchar_t *userLangName) int encoding = _currentBuffer->getEncoding(); if (encoding == -1) { - if (unicodeMode == uniUTF8 || unicodeMode == uniCookie) + if (unicodeMode == uniUTF8 || unicodeMode == uniUTF8_NoBOM) codepage = CP_UTF8; } else @@ -2461,10 +2461,12 @@ void ScintillaEditView::bufferUpdated(Buffer * buffer, int mask) { execute(SCI_SETEOLMODE, static_cast(_currentBuffer->getEolFormat())); } + if (mask & BufferChangeReadonly) { execute(SCI_SETREADONLY, _currentBuffer->isReadOnly()); } + if (mask & BufferChangeUnicode) { int enc = CP_ACP; diff --git a/PowerEditor/src/ScintillaComponent/ScintillaEditView.h b/PowerEditor/src/ScintillaComponent/ScintillaEditView.h index dedf486ea..c8a351185 100644 --- a/PowerEditor/src/ScintillaComponent/ScintillaEditView.h +++ b/PowerEditor/src/ScintillaComponent/ScintillaEditView.h @@ -874,6 +874,7 @@ protected: Buffer* _currentBuffer = nullptr; int _codepage = CP_ACP; + bool _wrapRestoreNeeded = false; bool _positionRestoreNeeded = false; uint32_t _restorePositionRetryCount = 0; diff --git a/PowerEditor/src/Utf8_16.cpp b/PowerEditor/src/Utf8_16.cpp index 861e42089..eaf1a10d1 100644 --- a/PowerEditor/src/Utf8_16.cpp +++ b/PowerEditor/src/Utf8_16.cpp @@ -140,7 +140,7 @@ size_t Utf8_16_Read::convert(char* buf, size_t len) { case uni7Bit: case uni8Bit: - case uniCookie: + case uniUTF8_NoBOM: { // Do nothing, pass through m_nAllocatedBufSize = 0; @@ -249,8 +249,9 @@ void Utf8_16_Read::determineEncoding() else { u78 detectedEncoding = utf8_7bits_8bits(); + if (detectedEncoding == utf8NoBOM) - m_eEncoding = uniCookie; + m_eEncoding = uniUTF8_NoBOM; else if (detectedEncoding == ascii7bits) m_eEncoding = uni7Bit; else //(detectedEncoding == ascii8bits) @@ -259,7 +260,7 @@ void Utf8_16_Read::determineEncoding() } } -UniMode Utf8_16_Read::determineEncoding(const unsigned char *buf, size_t bufLen) +UniMode Utf8_16_Read::determineEncodingFromBOM(const unsigned char *buf, size_t bufLen) { // detect UTF-16 big-endian with BOM if (bufLen > 1 && buf[0] == k_Boms[uni16BE][0] && buf[1] == k_Boms[uni16BE][1]) @@ -369,7 +370,7 @@ bool Utf8_16_Write::writeFile(const void* p, size_t _size) { case uni7Bit: case uni8Bit: - case uniCookie: + case uniUTF8_NoBOM: case uniUTF8: { // Normal write @@ -436,7 +437,7 @@ size_t Utf8_16_Write::convert(char* p, size_t _size) { case uni7Bit: case uni8Bit: - case uniCookie: + case uniUTF8_NoBOM: { // Normal write m_pNewBuf = new ubyte[_size]; diff --git a/PowerEditor/src/Utf8_16.h b/PowerEditor/src/Utf8_16.h index 75766dc72..5eafe5b5c 100644 --- a/PowerEditor/src/Utf8_16.h +++ b/PowerEditor/src/Utf8_16.h @@ -113,14 +113,14 @@ public: size_t getNewSize() const { return m_nNewBufSize; } UniMode getEncoding() const { return m_eEncoding; } - static UniMode determineEncoding(const unsigned char *buf, size_t bufLen); + static UniMode determineEncodingFromBOM(const unsigned char *buf, size_t bufLen); protected: void determineEncoding(); u78 utf8_7bits_8bits(); private: - UniMode m_eEncoding = uni8Bit; + UniMode m_eEncoding = uni8Bit; ubyte* m_pBuf = nullptr; ubyte* m_pNewBuf = nullptr; // size of the new buffer diff --git a/PowerEditor/src/WinControls/AboutDlg/AboutDlg.cpp b/PowerEditor/src/WinControls/AboutDlg/AboutDlg.cpp index dc23882f4..d3311e23e 100644 --- a/PowerEditor/src/WinControls/AboutDlg/AboutDlg.cpp +++ b/PowerEditor/src/WinControls/AboutDlg/AboutDlg.cpp @@ -516,7 +516,7 @@ intptr_t CALLBACK DebugInfoDlg::run_dlgProc(UINT message, WPARAM wParam, LPARAM { constexpr size_t bufSizeACP = 32; wchar_t szACP[bufSizeACP] = { '\0' }; - swprintf(szACP, bufSizeACP, L"%u", ::GetACP()); + swprintf(szACP, bufSizeACP, L"%u", nppParam.currentSystemCodepage()); _debugInfoStr += L"Current ANSI codepage: "; _debugInfoStr += szACP; _debugInfoStr += L"\r\n"; diff --git a/PowerEditor/src/WinControls/Preference/preference.rc b/PowerEditor/src/WinControls/Preference/preference.rc index 31ae5e6da..c13efd9db 100644 --- a/PowerEditor/src/WinControls/Preference/preference.rc +++ b/PowerEditor/src/WinControls/Preference/preference.rc @@ -255,7 +255,7 @@ BEGIN CONTROL "Macintosh (CR)",IDC_RADIO_F_MAC,"Button",BS_AUTORADIOBUTTON,65,80,70,10 GROUPBOX "Encoding",IDC_ENCODING_STATIC,232,28,175,122,BS_CENTER CONTROL "ANSI",IDC_RADIO_ANSI,"Button",BS_AUTORADIOBUTTON | WS_GROUP,242,39,150,10 - CONTROL "UTF-8",IDC_RADIO_UTF8SANSBOM,"Button",BS_AUTORADIOBUTTON,242,53,150,10 + CONTROL "UTF-8",IDC_RADIO_UTF8_NO_BOM,"Button",BS_AUTORADIOBUTTON,242,53,150,10 CONTROL "Apply to opened ANSI files",IDC_CHECK_OPENANSIASUTF8, "Button",BS_AUTOCHECKBOX | WS_TABSTOP,252,65,150,10 CONTROL "UTF-8 with BOM",IDC_RADIO_UTF8,"Button",BS_AUTORADIOBUTTON,242,79,150,10 CONTROL "UTF-16 Big Endian with BOM",IDC_RADIO_UTF16BIG,"Button",BS_AUTORADIOBUTTON,242,95,150,10 diff --git a/PowerEditor/src/WinControls/Preference/preferenceDlg.cpp b/PowerEditor/src/WinControls/Preference/preferenceDlg.cpp index 12990a707..a431f4c91 100644 --- a/PowerEditor/src/WinControls/Preference/preferenceDlg.cpp +++ b/PowerEditor/src/WinControls/Preference/preferenceDlg.cpp @@ -3360,6 +3360,14 @@ intptr_t CALLBACK MiscSubDlg::run_dlgProc(UINT message, WPARAM wParam, LPARAM) return FALSE; } + +void NewDocumentSubDlg::makeOpenAnsiAsUtf8(bool doIt) const +{ + if (!doIt) + ::SendDlgItemMessage(_hSelf, IDC_CHECK_OPENANSIASUTF8, BM_SETCHECK, BST_UNCHECKED, 0); + ::EnableWindow(::GetDlgItem(_hSelf, IDC_CHECK_OPENANSIASUTF8), doIt && !NppParameters::getInstance().isCurrentSystemCodepageUTF8()); +} + intptr_t CALLBACK NewDocumentSubDlg::run_dlgProc(UINT message, WPARAM wParam, LPARAM) { NppParameters& nppParam = NppParameters::getInstance(); @@ -3399,8 +3407,8 @@ intptr_t CALLBACK NewDocumentSubDlg::run_dlgProc(UINT message, WPARAM wParam, LP case uniUTF8 : ID2Check = IDC_RADIO_UTF8; break; - case uniCookie : - ID2Check = IDC_RADIO_UTF8SANSBOM; + case uniUTF8_NoBOM : + ID2Check = IDC_RADIO_UTF8_NO_BOM; break; default : //uni8Bit @@ -3435,8 +3443,15 @@ intptr_t CALLBACK NewDocumentSubDlg::run_dlgProc(UINT message, WPARAM wParam, LP } ::SendDlgItemMessage(_hSelf, ID2Check, BM_SETCHECK, BST_CHECKED, 0); - ::SendDlgItemMessage(_hSelf, IDC_CHECK_OPENANSIASUTF8, BM_SETCHECK, (ID2Check == IDC_RADIO_UTF8SANSBOM && ndds._openAnsiAsUtf8)?BST_CHECKED:BST_UNCHECKED, 0); - ::EnableWindow(::GetDlgItem(_hSelf, IDC_CHECK_OPENANSIASUTF8), ID2Check == IDC_RADIO_UTF8SANSBOM); + ::SendDlgItemMessage(_hSelf, IDC_CHECK_OPENANSIASUTF8, BM_SETCHECK, (ID2Check == IDC_RADIO_UTF8_NO_BOM && ndds._openAnsiAsUtf8)?BST_CHECKED:BST_UNCHECKED, 0); + + bool isEnableAnsiAsUTF = ID2Check == IDC_RADIO_UTF8_NO_BOM; + if (nppParam.isCurrentSystemCodepageUTF8()) + { + isEnableAnsiAsUTF = false; + ::EnableWindow(::GetDlgItem(_hSelf, IDC_RADIO_ANSI), false); + } + ::EnableWindow(::GetDlgItem(_hSelf, IDC_CHECK_OPENANSIASUTF8), isEnableAnsiAsUTF); for (int i = L_TEXT + 1 ; i < nppParam.L_END ; ++i) // Skip L_TEXT { @@ -3525,8 +3540,8 @@ intptr_t CALLBACK NewDocumentSubDlg::run_dlgProc(UINT message, WPARAM wParam, LP ndds._codepage = -1; ::EnableWindow(::GetDlgItem(_hSelf, IDC_COMBO_OTHERCP), false); return TRUE; - case IDC_RADIO_UTF8SANSBOM: - ndds._unicodeMode = uniCookie; + case IDC_RADIO_UTF8_NO_BOM: + ndds._unicodeMode = uniUTF8_NoBOM; makeOpenAnsiAsUtf8(true); ndds._codepage = -1; ::EnableWindow(::GetDlgItem(_hSelf, IDC_COMBO_OTHERCP), false); diff --git a/PowerEditor/src/WinControls/Preference/preferenceDlg.h b/PowerEditor/src/WinControls/Preference/preferenceDlg.h index 1624e9716..5f0506a9b 100644 --- a/PowerEditor/src/WinControls/Preference/preferenceDlg.h +++ b/PowerEditor/src/WinControls/Preference/preferenceDlg.h @@ -186,11 +186,7 @@ public : NewDocumentSubDlg() = default; private : - void makeOpenAnsiAsUtf8(bool doIt){ - if (!doIt) - ::SendDlgItemMessage(_hSelf, IDC_CHECK_OPENANSIASUTF8, BM_SETCHECK, BST_UNCHECKED, 0); - ::EnableWindow(::GetDlgItem(_hSelf, IDC_CHECK_OPENANSIASUTF8), doIt); - }; + void makeOpenAnsiAsUtf8(bool doIt) const; intptr_t CALLBACK run_dlgProc(UINT message, WPARAM wParam, LPARAM lParam) override; }; diff --git a/PowerEditor/src/WinControls/Preference/preference_rc.h b/PowerEditor/src/WinControls/Preference/preference_rc.h index 608b80447..15b616862 100644 --- a/PowerEditor/src/WinControls/Preference/preference_rc.h +++ b/PowerEditor/src/WinControls/Preference/preference_rc.h @@ -295,7 +295,7 @@ #define IDC_RADIO_F_MAC 6404 #define IDC_ENCODING_STATIC 6405 #define IDC_RADIO_ANSI 6406 -#define IDC_RADIO_UTF8SANSBOM 6407 +#define IDC_RADIO_UTF8_NO_BOM 6407 #define IDC_RADIO_UTF8 6408 #define IDC_RADIO_UTF16BIG 6409 #define IDC_RADIO_UTF16SMALL 6410