[BUG_FIXED] Fix xml/html encoding overrides BOM if present.

git-svn-id: svn://svn.tuxfamily.org/svnroot/notepadplus/repository/trunk@597 f5eea248-9336-0410-98b8-ebc06183d4e3
This commit is contained in:
Don Ho 2010-01-17 23:13:06 +00:00
parent 5d4d1d0ee9
commit 6d161a269e
5 changed files with 51 additions and 26 deletions

View File

@ -694,28 +694,7 @@ SubSection "Plugins" Plugins
SetOutPath "$INSTDIR\plugins" SetOutPath "$INSTDIR\plugins"
File "..\bin\plugins\ComparePlugin.dll" File "..\bin\plugins\ComparePlugin.dll"
SectionEnd SectionEnd
Section "NppAutoIndent" NppAutoIndent
Delete "$INSTDIR\plugins\NppAutoIndent.dll"
SetOutPath "$INSTDIR\plugins"
File "..\bin\plugins\NppAutoIndent.dll"
StrCmp $IS_LOCAL "1" 0 NOT_LOCAL
SetOutPath "$INSTDIR\plugins\Config\"
goto LOCAL
NOT_LOCAL:
SetOutPath "$APPDATA\Notepad++\plugins\Config\"
LOCAL:
File "..\bin\plugins\Config\NppAutoIndent.ini"
SectionEnd
Section "Document Monitor" DocMonitor
Delete "$INSTDIR\plugins\docMonitor.dll"
SetOutPath "$INSTDIR\plugins"
File "..\bin\plugins\docMonitor.dll"
SectionEnd
Section "Change Markers" ChangeMarkers Section "Change Markers" ChangeMarkers
Delete "$INSTDIR\plugins\NppPlugin_ChangeMarker.dll" Delete "$INSTDIR\plugins\NppPlugin_ChangeMarker.dll"
@ -735,7 +714,14 @@ SubSection "Plugins" Plugins
File "..\bin\plugins\PluginManager.dll" File "..\bin\plugins\PluginManager.dll"
SetOutPath "$INSTDIR\updater" SetOutPath "$INSTDIR\updater"
File "..\bin\updater\gpup.exe" File "..\bin\updater\gpup.exe"
SectionEnd SectionEnd
Section "Light Explorer" LightExplorer
Delete "$INSTDIR\plugins\LightExplorer.dll"
SetOutPath "$INSTDIR\plugins"
File "..\bin\plugins\LightExplorer.dll"
SectionEnd
SubSectionEnd SubSectionEnd
SubSection "Themes" Themes SubSection "Themes" Themes
@ -1023,7 +1009,7 @@ SubSection un.Plugins
SectionEnd SectionEnd
Section un.FileBrowserLite Section un.LightExplorer
Delete "$INSTDIR\plugins\LightExplorer.dll" Delete "$INSTDIR\plugins\LightExplorer.dll"
Delete "$INSTDIR\lightExplorer.ini" Delete "$INSTDIR\lightExplorer.ini"
RMDir "$INSTDIR\plugins\" RMDir "$INSTDIR\plugins\"

View File

@ -637,7 +637,7 @@ BufferID FileManager::bufferFromDocument(Document doc, bool dontIncrease, bool d
return id; return id;
} }
bool FileManager::loadFileData(Document doc, const TCHAR * filename, Utf8_16_Read * UnicodeConvertor, LangType language, int encoding, formatType *pFormat) bool FileManager::loadFileData(Document doc, const TCHAR * filename, Utf8_16_Read * UnicodeConvertor, LangType language, int & encoding, formatType *pFormat)
{ {
const int blockSize = 128 * 1024; //128 kB const int blockSize = 128 * 1024; //128 kB
char data[blockSize+1]; char data[blockSize+1];
@ -682,9 +682,21 @@ bool FileManager::loadFileData(Document doc, const TCHAR * filename, Utf8_16_Rea
__try { __try {
size_t lenFile = 0; size_t lenFile = 0;
size_t lenConvert = 0; //just in case conversion results in 0, but file not empty size_t lenConvert = 0; //just in case conversion results in 0, but file not empty
bool isFirstTime = true;
do { do {
lenFile = fread(data, 1, blockSize, fp); lenFile = fread(data, 1, blockSize, fp);
if (isFirstTime)
{
if (Utf8_16_Read::determineEncoding((unsigned char *)data, lenFile) != uni8Bit)
{
//printStr(TEXT("hola"));
encoding = -1;
}
isFirstTime = false;
}
if (encoding != -1) if (encoding != -1)
{ {
data[lenFile] = '\0'; data[lenFile] = '\0';

View File

@ -122,7 +122,7 @@ private:
BufferID _nextBufferID; BufferID _nextBufferID;
size_t _nrBufs; size_t _nrBufs;
bool loadFileData(Document doc, const TCHAR * filename, Utf8_16_Read * UnicodeConvertor, LangType language, int encoding = -1, formatType *pFormat = NULL); bool loadFileData(Document doc, const TCHAR * filename, Utf8_16_Read * UnicodeConvertor, LangType language, int & encoding, formatType *pFormat = NULL);
}; };
#define MainFileManager FileManager::getInstance() #define MainFileManager FileManager::getInstance()

View File

@ -231,6 +231,30 @@ void Utf8_16_Read::determineEncoding()
} }
} }
UniMode Utf8_16_Read::determineEncoding(const unsigned char *buf, int bufLen)
{
// detect UTF-16 big-endian with BOM
if (bufLen > 1 && buf[0] == k_Boms[uni16BE][0] && buf[1] == k_Boms[uni16BE][1])
{
return uni16BE;
}
// detect UTF-16 little-endian with BOM
if (bufLen > 1 && buf[0] == k_Boms[uni16LE][0] && buf[1] == k_Boms[uni16LE][1])
{
return uni16LE;
}
// detect UTF-8 with BOM
if (bufLen > 2 && buf[0] == k_Boms[uniUTF8][0] &&
buf[1] == k_Boms[uniUTF8][1] && buf[2] == k_Boms[uniUTF8][2])
{
return uniUTF8;
}
return uni8Bit;
}
// ================================================================== // ==================================================================

View File

@ -116,8 +116,11 @@ public:
UniMode getEncoding() const { return m_eEncoding; } UniMode getEncoding() const { return m_eEncoding; }
size_t calcCurPos(size_t pos); size_t calcCurPos(size_t pos);
static UniMode determineEncoding(const unsigned char *buf, int bufLen);
protected: protected:
void determineEncoding(); void determineEncoding();
u78 utf8_7bits_8bits(); u78 utf8_7bits_8bits();
private: private:
UniMode m_eEncoding; UniMode m_eEncoding;