[BUG_FIXED] Fix xml/html encoding overrides BOM if present.

git-svn-id: svn://svn.tuxfamily.org/svnroot/notepadplus/repository/trunk@597 f5eea248-9336-0410-98b8-ebc06183d4e3
This commit is contained in:
Don Ho 2010-01-17 23:13:06 +00:00
parent 5d4d1d0ee9
commit 6d161a269e
5 changed files with 51 additions and 26 deletions

View File

@ -696,27 +696,6 @@ SubSection "Plugins" Plugins
SectionEnd
Section "NppAutoIndent" NppAutoIndent
Delete "$INSTDIR\plugins\NppAutoIndent.dll"
SetOutPath "$INSTDIR\plugins"
File "..\bin\plugins\NppAutoIndent.dll"
StrCmp $IS_LOCAL "1" 0 NOT_LOCAL
SetOutPath "$INSTDIR\plugins\Config\"
goto LOCAL
NOT_LOCAL:
SetOutPath "$APPDATA\Notepad++\plugins\Config\"
LOCAL:
File "..\bin\plugins\Config\NppAutoIndent.ini"
SectionEnd
Section "Document Monitor" DocMonitor
Delete "$INSTDIR\plugins\docMonitor.dll"
SetOutPath "$INSTDIR\plugins"
File "..\bin\plugins\docMonitor.dll"
SectionEnd
Section "Change Markers" ChangeMarkers
Delete "$INSTDIR\plugins\NppPlugin_ChangeMarker.dll"
SetOutPath "$INSTDIR\plugins"
@ -736,6 +715,13 @@ SubSection "Plugins" Plugins
SetOutPath "$INSTDIR\updater"
File "..\bin\updater\gpup.exe"
SectionEnd
Section "Light Explorer" LightExplorer
Delete "$INSTDIR\plugins\LightExplorer.dll"
SetOutPath "$INSTDIR\plugins"
File "..\bin\plugins\LightExplorer.dll"
SectionEnd
SubSectionEnd
SubSection "Themes" Themes
@ -1023,7 +1009,7 @@ SubSection un.Plugins
SectionEnd
Section un.FileBrowserLite
Section un.LightExplorer
Delete "$INSTDIR\plugins\LightExplorer.dll"
Delete "$INSTDIR\lightExplorer.ini"
RMDir "$INSTDIR\plugins\"

View File

@ -637,7 +637,7 @@ BufferID FileManager::bufferFromDocument(Document doc, bool dontIncrease, bool d
return id;
}
bool FileManager::loadFileData(Document doc, const TCHAR * filename, Utf8_16_Read * UnicodeConvertor, LangType language, int encoding, formatType *pFormat)
bool FileManager::loadFileData(Document doc, const TCHAR * filename, Utf8_16_Read * UnicodeConvertor, LangType language, int & encoding, formatType *pFormat)
{
const int blockSize = 128 * 1024; //128 kB
char data[blockSize+1];
@ -682,9 +682,21 @@ bool FileManager::loadFileData(Document doc, const TCHAR * filename, Utf8_16_Rea
__try {
size_t lenFile = 0;
size_t lenConvert = 0; //just in case conversion results in 0, but file not empty
bool isFirstTime = true;
do {
lenFile = fread(data, 1, blockSize, fp);
if (isFirstTime)
{
if (Utf8_16_Read::determineEncoding((unsigned char *)data, lenFile) != uni8Bit)
{
//printStr(TEXT("hola"));
encoding = -1;
}
isFirstTime = false;
}
if (encoding != -1)
{
data[lenFile] = '\0';

View File

@ -122,7 +122,7 @@ private:
BufferID _nextBufferID;
size_t _nrBufs;
bool loadFileData(Document doc, const TCHAR * filename, Utf8_16_Read * UnicodeConvertor, LangType language, int encoding = -1, formatType *pFormat = NULL);
bool loadFileData(Document doc, const TCHAR * filename, Utf8_16_Read * UnicodeConvertor, LangType language, int & encoding, formatType *pFormat = NULL);
};
#define MainFileManager FileManager::getInstance()

View File

@ -231,6 +231,30 @@ void Utf8_16_Read::determineEncoding()
}
}
UniMode Utf8_16_Read::determineEncoding(const unsigned char *buf, int bufLen)
{
// detect UTF-16 big-endian with BOM
if (bufLen > 1 && buf[0] == k_Boms[uni16BE][0] && buf[1] == k_Boms[uni16BE][1])
{
return uni16BE;
}
// detect UTF-16 little-endian with BOM
if (bufLen > 1 && buf[0] == k_Boms[uni16LE][0] && buf[1] == k_Boms[uni16LE][1])
{
return uni16LE;
}
// detect UTF-8 with BOM
if (bufLen > 2 && buf[0] == k_Boms[uniUTF8][0] &&
buf[1] == k_Boms[uniUTF8][1] && buf[2] == k_Boms[uniUTF8][2])
{
return uniUTF8;
}
return uni8Bit;
}
// ==================================================================

View File

@ -116,8 +116,11 @@ public:
UniMode getEncoding() const { return m_eEncoding; }
size_t calcCurPos(size_t pos);
static UniMode determineEncoding(const unsigned char *buf, int bufLen);
protected:
void determineEncoding();
u78 utf8_7bits_8bits();
private:
UniMode m_eEncoding;