From 6d161a269efe4b96f71eb5dcbd8a47bb8457d792 Mon Sep 17 00:00:00 2001 From: Don Ho Date: Sun, 17 Jan 2010 23:13:06 +0000 Subject: [PATCH] [BUG_FIXED] Fix xml/html encoding overrides BOM if present. git-svn-id: svn://svn.tuxfamily.org/svnroot/notepadplus/repository/trunk@597 f5eea248-9336-0410-98b8-ebc06183d4e3 --- PowerEditor/installer/nppSetup.nsi | 32 ++++++-------------- PowerEditor/src/ScitillaComponent/Buffer.cpp | 16 ++++++++-- PowerEditor/src/ScitillaComponent/Buffer.h | 2 +- PowerEditor/src/Utf8_16.cpp | 24 +++++++++++++++ PowerEditor/src/Utf8_16.h | 3 ++ 5 files changed, 51 insertions(+), 26 deletions(-) diff --git a/PowerEditor/installer/nppSetup.nsi b/PowerEditor/installer/nppSetup.nsi index e60d35b75..8db6c3e6a 100644 --- a/PowerEditor/installer/nppSetup.nsi +++ b/PowerEditor/installer/nppSetup.nsi @@ -694,28 +694,7 @@ SubSection "Plugins" Plugins SetOutPath "$INSTDIR\plugins" File "..\bin\plugins\ComparePlugin.dll" SectionEnd - - Section "NppAutoIndent" NppAutoIndent - Delete "$INSTDIR\plugins\NppAutoIndent.dll" - SetOutPath "$INSTDIR\plugins" - File "..\bin\plugins\NppAutoIndent.dll" - - StrCmp $IS_LOCAL "1" 0 NOT_LOCAL - SetOutPath "$INSTDIR\plugins\Config\" - goto LOCAL - NOT_LOCAL: - SetOutPath "$APPDATA\Notepad++\plugins\Config\" - LOCAL: - File "..\bin\plugins\Config\NppAutoIndent.ini" - - SectionEnd - - Section "Document Monitor" DocMonitor - Delete "$INSTDIR\plugins\docMonitor.dll" - SetOutPath "$INSTDIR\plugins" - File "..\bin\plugins\docMonitor.dll" - SectionEnd Section "Change Markers" ChangeMarkers Delete "$INSTDIR\plugins\NppPlugin_ChangeMarker.dll" @@ -735,7 +714,14 @@ SubSection "Plugins" Plugins File "..\bin\plugins\PluginManager.dll" SetOutPath "$INSTDIR\updater" File "..\bin\updater\gpup.exe" - SectionEnd + SectionEnd + + Section "Light Explorer" LightExplorer + Delete "$INSTDIR\plugins\LightExplorer.dll" + SetOutPath "$INSTDIR\plugins" + File "..\bin\plugins\LightExplorer.dll" + SectionEnd + SubSectionEnd SubSection "Themes" Themes @@ -1023,7 +1009,7 @@ SubSection un.Plugins SectionEnd - Section un.FileBrowserLite + Section un.LightExplorer Delete "$INSTDIR\plugins\LightExplorer.dll" Delete "$INSTDIR\lightExplorer.ini" RMDir "$INSTDIR\plugins\" diff --git a/PowerEditor/src/ScitillaComponent/Buffer.cpp b/PowerEditor/src/ScitillaComponent/Buffer.cpp index 4f079a3a4..d02733d3b 100644 --- a/PowerEditor/src/ScitillaComponent/Buffer.cpp +++ b/PowerEditor/src/ScitillaComponent/Buffer.cpp @@ -637,7 +637,7 @@ BufferID FileManager::bufferFromDocument(Document doc, bool dontIncrease, bool d return id; } -bool FileManager::loadFileData(Document doc, const TCHAR * filename, Utf8_16_Read * UnicodeConvertor, LangType language, int encoding, formatType *pFormat) +bool FileManager::loadFileData(Document doc, const TCHAR * filename, Utf8_16_Read * UnicodeConvertor, LangType language, int & encoding, formatType *pFormat) { const int blockSize = 128 * 1024; //128 kB char data[blockSize+1]; @@ -682,9 +682,21 @@ bool FileManager::loadFileData(Document doc, const TCHAR * filename, Utf8_16_Rea __try { size_t lenFile = 0; size_t lenConvert = 0; //just in case conversion results in 0, but file not empty - + bool isFirstTime = true; + do { lenFile = fread(data, 1, blockSize, fp); + + if (isFirstTime) + { + if (Utf8_16_Read::determineEncoding((unsigned char *)data, lenFile) != uni8Bit) + { + //printStr(TEXT("hola")); + encoding = -1; + } + isFirstTime = false; + } + if (encoding != -1) { data[lenFile] = '\0'; diff --git a/PowerEditor/src/ScitillaComponent/Buffer.h b/PowerEditor/src/ScitillaComponent/Buffer.h index f9d11e4d9..cfbcdcf91 100644 --- a/PowerEditor/src/ScitillaComponent/Buffer.h +++ b/PowerEditor/src/ScitillaComponent/Buffer.h @@ -122,7 +122,7 @@ private: BufferID _nextBufferID; size_t _nrBufs; - bool loadFileData(Document doc, const TCHAR * filename, Utf8_16_Read * UnicodeConvertor, LangType language, int encoding = -1, formatType *pFormat = NULL); + bool loadFileData(Document doc, const TCHAR * filename, Utf8_16_Read * UnicodeConvertor, LangType language, int & encoding, formatType *pFormat = NULL); }; #define MainFileManager FileManager::getInstance() diff --git a/PowerEditor/src/Utf8_16.cpp b/PowerEditor/src/Utf8_16.cpp index 52247ef95..6bee0631e 100644 --- a/PowerEditor/src/Utf8_16.cpp +++ b/PowerEditor/src/Utf8_16.cpp @@ -231,6 +231,30 @@ void Utf8_16_Read::determineEncoding() } } +UniMode Utf8_16_Read::determineEncoding(const unsigned char *buf, int bufLen) +{ + // detect UTF-16 big-endian with BOM + if (bufLen > 1 && buf[0] == k_Boms[uni16BE][0] && buf[1] == k_Boms[uni16BE][1]) + { + return uni16BE; + } + + // detect UTF-16 little-endian with BOM + if (bufLen > 1 && buf[0] == k_Boms[uni16LE][0] && buf[1] == k_Boms[uni16LE][1]) + { + return uni16LE; + } + + // detect UTF-8 with BOM + if (bufLen > 2 && buf[0] == k_Boms[uniUTF8][0] && + buf[1] == k_Boms[uniUTF8][1] && buf[2] == k_Boms[uniUTF8][2]) + { + return uniUTF8; + } + + return uni8Bit; +} + // ================================================================== diff --git a/PowerEditor/src/Utf8_16.h b/PowerEditor/src/Utf8_16.h index d88cbb739..aa8600568 100644 --- a/PowerEditor/src/Utf8_16.h +++ b/PowerEditor/src/Utf8_16.h @@ -116,8 +116,11 @@ public: UniMode getEncoding() const { return m_eEncoding; } size_t calcCurPos(size_t pos); + static UniMode determineEncoding(const unsigned char *buf, int bufLen); + protected: void determineEncoding(); + u78 utf8_7bits_8bits(); private: UniMode m_eEncoding;