diff --git a/PowerEditor/bin/change.log b/PowerEditor/bin/change.log
index 699f0bf91..5729cd02d 100644
--- a/PowerEditor/bin/change.log
+++ b/PowerEditor/bin/change.log
@@ -1,18 +1,19 @@
Notepad++ v5.6 new features and fixed bugs (from v5.5.1) :
1. Add languages encoding - Chinese traditional (BIG5), Chinese Simplified (GB2312), Japanese (Shift JIS), Korean (EUC), Thai (TIS-620), Hebrew (iso-8859-8), Hebrew (1255), Central European (1250), Cyrillic (1251), Cyrillic (KOI8-U), Cyrillic (KOI8-R), Cyrillic (Mac), Western European(1252), Greek (1253), Turkish(1254), Arabic (1256), Baltic (1257) and Vietnamese (1258).
-2. Add COBOL, D, Gui4Cli, PowerShell and R language support.
-3. Add Marker Jumper feature (Jump down/up : Ctrl+Num/Ctrl+Shift+Num).
-4. Add indent guide line highlighting for html/xml tags.
-5. Add system tray context menu and new command argument "-systemtray".
-6. Add new command argument "--help".
-7. Fix Calltip hint bug and add a new capacity in it.
-8. Add the ability to add the second keyword group for user in both LISP and Scheme languages.
-9. Fix the wrap symbol display problem.
-10. Add SQL ESC symbol '\'.
-11. Fix column editor insert number bug in virtual space mode.
-12. Fix status bar displaying "-2 char" issue for a empty document.
-13. Fix installation of NppShell64 failed issue in installer.
+2. Add auto-detection of HTML and XML files encodings.
+3. Add COBOL, D, Gui4Cli, PowerShell and R language support.
+4. Add Marker Jumper feature (Jump down/up : Ctrl+Num/Ctrl+Shift+Num).
+5. Add indent guide line highlighting for html/xml tags.
+6. Add system tray context menu and new command argument "-systemtray".
+7. Add new command argument "--help".
+8. Fix Calltip hint bug and add a new capacity in it.
+9. Add the ability to add the second keyword group for user in both LISP and Scheme languages.
+10. Fix the wrap symbol display problem.
+11. Add SQL ESC symbol '\'.
+12. Fix column editor insert number bug in virtual space mode.
+13. Fix status bar displaying "-2 char" issue for a empty document.
+14. Fix installation of NppShell64 failed issue in installer.
Included plugins (Unicode):
diff --git a/PowerEditor/src/Notepad_plus.cpp b/PowerEditor/src/Notepad_plus.cpp
index 6eb24fbde..aaa0f3c08 100644
--- a/PowerEditor/src/Notepad_plus.cpp
+++ b/PowerEditor/src/Notepad_plus.cpp
@@ -783,6 +783,11 @@ BufferID Notepad_plus::doOpen(const TCHAR *fileName, bool isReadOnly, int encodi
scnN.nmhdr.idFrom = NULL;
_pluginsManager.notify(&scnN);
+ if (encoding == -1)
+ {
+ encoding = getHtmlXmlEncoding(longFileName);
+ }
+
BufferID buffer = MainFileManager->loadFile(longFileName, NULL, encoding);
if (buffer != BUFFER_INVALID)
{
@@ -858,6 +863,118 @@ BufferID Notepad_plus::doOpen(const TCHAR *fileName, bool isReadOnly, int encodi
}
}
+int Notepad_plus::getHtmlXmlEncoding(const TCHAR *fileName) const
+{
+ // Get Language type
+ TCHAR *ext = PathFindExtension(fileName);
+ if (*ext == '.') //extension found
+ {
+ ext += 1;
+ }
+ else
+ {
+ return -1;
+ }
+ NppParameters *pNppParamInst = NppParameters::getInstance();
+ LangType langT = pNppParamInst->getLangFromExt(ext);
+ if (langT != L_XML && langT != L_HTML && langT == L_PHP)
+ return -1;
+
+ // Get the begining of file data
+ FILE *f = generic_fopen(fileName, TEXT("rb"));
+ if (!f)
+ return -1;
+ const int blockSize = 1024; // To ensure that length is long enough to capture the encoding in html
+ char data[blockSize];
+ int lenFile = fread(data, 1, blockSize, f);
+ fclose(f);
+
+ // Put data in _invisibleEditView
+ _invisibleEditView.execute(SCI_CLEARALL);
+ _invisibleEditView.execute(SCI_APPENDTEXT, lenFile, (LPARAM)data);
+
+ const char *encodingAliasRegExpr = "[a-zA-Z0-9_-]+";
+
+ if (langT == L_XML)
+ {
+ // find encoding by RegExpr
+
+ const char *xmlHeaderRegExpr = "";
+
+ int startPos = 0;
+ int endPos = lenFile-1;
+ _invisibleEditView.execute(SCI_SETSEARCHFLAGS, SCFIND_REGEXP|SCFIND_POSIX);
+
+ _invisibleEditView.execute(SCI_SETTARGETSTART, startPos);
+ _invisibleEditView.execute(SCI_SETTARGETEND, endPos);
+
+ int posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(xmlHeaderRegExpr), (LPARAM)xmlHeaderRegExpr);
+ if (posFound != -1)
+ {
+ const char *encodingBlockRegExpr = "encoding[ \\t]*=[ \\t]*\"[^\".]+\"";
+ posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(encodingBlockRegExpr), (LPARAM)encodingBlockRegExpr);
+
+ const char *encodingRegExpr = "\".+\"";
+ posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(encodingRegExpr), (LPARAM)encodingRegExpr);
+
+ posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(encodingAliasRegExpr), (LPARAM)encodingAliasRegExpr);
+
+ startPos = int(_invisibleEditView.execute(SCI_GETTARGETSTART));
+ endPos = int(_invisibleEditView.execute(SCI_GETTARGETEND));
+
+ char encodingStr[128];
+ _invisibleEditView.getText(encodingStr, startPos, endPos);
+
+ int enc = getCpFromStringValue(encodingStr);
+ return (enc==CP_ACP?-1:enc);
+ }
+ return -1;
+ }
+ else // if (langT == L_HTML)
+ {
+ // find encoding by RegExpr
+ const char *htmlHeaderRegExpr = "";
+ const char *htmlHeaderRegExpr2 = "";
+
+ int startPos = 0;
+ int endPos = lenFile-1;
+ _invisibleEditView.execute(SCI_SETSEARCHFLAGS, SCFIND_REGEXP|SCFIND_POSIX);
+
+ _invisibleEditView.execute(SCI_SETTARGETSTART, startPos);
+ _invisibleEditView.execute(SCI_SETTARGETEND, endPos);
+
+ int posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(htmlHeaderRegExpr), (LPARAM)htmlHeaderRegExpr);
+
+ if (posFound != -1)
+ {
+ const char *charsetBlockRegExpr = "charset[ \\t]*=[ \\t]*.+\"";
+ posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(charsetBlockRegExpr), (LPARAM)charsetBlockRegExpr);
+
+ const char *charsetRegExpr = "=[ \\t]*[^\"]+";
+ posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(charsetRegExpr), (LPARAM)charsetRegExpr);
+
+ posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(encodingAliasRegExpr), (LPARAM)encodingAliasRegExpr);
+
+ startPos = int(_invisibleEditView.execute(SCI_GETTARGETSTART));
+ endPos = int(_invisibleEditView.execute(SCI_GETTARGETEND));
+
+ char encodingStr[128];
+ _invisibleEditView.getText(encodingStr, startPos, endPos);
+
+ int enc = getCpFromStringValue(encodingStr);
+ return (enc==CP_ACP?-1:enc);
+ }
+ else
+ {
+ posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(htmlHeaderRegExpr2), (LPARAM)htmlHeaderRegExpr2);
+ if (posFound == -1)
+ return -1;
+ //TODO
+ }
+
+ return -1;
+ }
+}
bool Notepad_plus::doReload(BufferID id, bool alert)
{
diff --git a/PowerEditor/src/Notepad_plus.h b/PowerEditor/src/Notepad_plus.h
index df72bf7a4..c7e7355be 100644
--- a/PowerEditor/src/Notepad_plus.h
+++ b/PowerEditor/src/Notepad_plus.h
@@ -265,6 +265,7 @@ public:
bool replaceInFiles();
void setFindReplaceFolderFilter(const TCHAR *dir, const TCHAR *filters);
vector addNppComponents(const TCHAR *destDir, const TCHAR *extFilterName, const TCHAR *extFilter);
+ int getHtmlXmlEncoding(const TCHAR *fileName) const;
static HWND gNppHWND; //static handle to Notepad++ window, NULL if non-existant
private:
diff --git a/PowerEditor/src/Parameters.cpp b/PowerEditor/src/Parameters.cpp
index d73bb02bd..a6fcf5d93 100644
--- a/PowerEditor/src/Parameters.cpp
+++ b/PowerEditor/src/Parameters.cpp
@@ -377,6 +377,34 @@ ScintillaKeyDefinition scintKeyDefs[] = { //array of accelerator keys for all po
//
};
+static bool isInList(const TCHAR *token, const TCHAR *list) {
+ if ((!token) || (!list))
+ return false;
+ TCHAR word[64];
+ int i = 0;
+ int j = 0;
+ for (; i <= int(lstrlen(list)) ; i++)
+ {
+ if ((list[i] == ' ')||(list[i] == '\0'))
+ {
+ if (j != 0)
+ {
+ word[j] = '\0';
+ j = 0;
+
+ if (!generic_stricmp(token, word))
+ return true;
+ }
+ }
+ else
+ {
+ word[j] = list[i];
+ j++;
+ }
+ }
+ return false;
+};
+
static int strVal(const TCHAR *str, int base) {
if (!str) return -1;
if (!str[0]) return 0;
@@ -1988,6 +2016,38 @@ void NppParameters::feedUserLang(TiXmlNode *node)
}
}
+LangType NppParameters::getLangFromExt(const TCHAR *ext)
+{
+ int i = getNbLang();
+ i--;
+ while (i >= 0)
+ {
+ Lang *l = getLangFromIndex(i--);
+
+ const TCHAR *defList = l->getDefaultExtList();
+ const TCHAR *userList = NULL;
+
+ LexerStylerArray &lsa = getLStylerArray();
+ const TCHAR *lName = l->getLangName();
+ LexerStyler *pLS = lsa.getLexerStylerByName(lName);
+
+ if (pLS)
+ userList = pLS->getLexerUserExt();
+
+ generic_string list(TEXT(""));
+ if (defList)
+ list += defList;
+ if (userList)
+ {
+ list += TEXT(" ");
+ list += userList;
+ }
+ if (isInList(ext, list.c_str()))
+ return l->getLangID();
+ }
+ return L_TXT;
+}
+
void NppParameters::writeUserDefinedLang()
{
if (!_pXmlUserLangDoc)
diff --git a/PowerEditor/src/Parameters.h b/PowerEditor/src/Parameters.h
index 5f0491e57..c3187e199 100644
--- a/PowerEditor/src/Parameters.h
+++ b/PowerEditor/src/Parameters.h
@@ -1113,6 +1113,8 @@ public:
};
int getNbLang() const {return _nbLang;};
+
+ LangType getLangFromExt(const TCHAR *ext);
const TCHAR * getLangExtFromName(const TCHAR *langName) const {
for (int i = 0 ; i < _nbLang ; i++)
diff --git a/PowerEditor/src/ScitillaComponent/Buffer.cpp b/PowerEditor/src/ScitillaComponent/Buffer.cpp
index 6b44609b7..76a41e059 100644
--- a/PowerEditor/src/ScitillaComponent/Buffer.cpp
+++ b/PowerEditor/src/ScitillaComponent/Buffer.cpp
@@ -34,34 +34,6 @@ const int blockSize = 128 * 1024 + 4;
const int CR = 0x0D;
const int LF = 0x0A;
-static bool isInList(const TCHAR *token, const TCHAR *list) {
- if ((!token) || (!list))
- return false;
- TCHAR word[64];
- int i = 0;
- int j = 0;
- for (; i <= int(lstrlen(list)) ; i++)
- {
- if ((list[i] == ' ')||(list[i] == '\0'))
- {
- if (j != 0)
- {
- word[j] = '\0';
- j = 0;
-
- if (!generic_stricmp(token, word))
- return true;
- }
- }
- else
- {
- word[j] = list[i];
- j++;
- }
- }
- return false;
-};
-
Buffer::Buffer(FileManager * pManager, BufferID id, Document doc, DocFileStatus type, const TCHAR *fileName) //type must be either DOC_REGULAR or DOC_UNNAMED
: _pManager(pManager), _id(id), _isDirty(false), _doc(doc), _isFileReadOnly(false), _isUserReadOnly(false), _recentTag(-1), _references(0),
_canNotify(false), _timeStamp(0), _needReloading(false), _encoding(-1)
@@ -84,6 +56,7 @@ Buffer::Buffer(FileManager * pManager, BufferID id, Document doc, DocFileStatus
_canNotify = true;
}
+
void Buffer::determinateFormat(const char *data) {
_format = WIN_FORMAT;
size_t len = strlen(data);
@@ -169,7 +142,7 @@ void Buffer::setFileName(const TCHAR *fn, LangType defaultLang)
else // if it's not user lang, then check if it's supported lang
{
_userLangExt[0] = '\0';
- newLang = getLangFromExt(ext);
+ newLang = pNppParamInst->getLangFromExt(ext);
}
}
@@ -278,39 +251,6 @@ std::vector & Buffer::getHeaderLineState(ScintillaEditView * id
return _foldStates.at(index);
}
-LangType Buffer::getLangFromExt(const TCHAR *ext)
-{
- NppParameters *pNppParam = NppParameters::getInstance();
- int i = pNppParam->getNbLang();
- i--;
- while (i >= 0)
- {
- Lang *l = pNppParam->getLangFromIndex(i--);
-
- const TCHAR *defList = l->getDefaultExtList();
- const TCHAR *userList = NULL;
-
- LexerStylerArray &lsa = pNppParam->getLStylerArray();
- const TCHAR *lName = l->getLangName();
- LexerStyler *pLS = lsa.getLexerStylerByName(lName);
-
- if (pLS)
- userList = pLS->getLexerUserExt();
-
- generic_string list(TEXT(""));
- if (defList)
- list += defList;
- if (userList)
- {
- list += TEXT(" ");
- list += userList;
- }
- if (isInList(ext, list.c_str()))
- return l->getLangID();
- }
- return L_TXT;
-}
-
Lang * Buffer::getCurrentLang() const {
NppParameters *pNppParam = NppParameters::getInstance();
int i = 0;
diff --git a/PowerEditor/src/ScitillaComponent/Buffer.h b/PowerEditor/src/ScitillaComponent/Buffer.h
index fa37d799e..69c8b86d3 100644
--- a/PowerEditor/src/ScitillaComponent/Buffer.h
+++ b/PowerEditor/src/ScitillaComponent/Buffer.h
@@ -139,8 +139,6 @@ public :
//Destructor makes sure its purged
Buffer(FileManager * pManager, BufferID id, Document doc, DocFileStatus type, const TCHAR *fileName);
- LangType getLangFromExt(const TCHAR *ext);
-
// this method 1. copies the file name
// 2. determinates the language from the ext of file name
// 3. gets the last modified time