mirror of
https://github.com/notepad-plus-plus/notepad-plus-plus.git
synced 2025-07-25 14:54:39 +02:00
[NEW_FEATURE] Add auto-detection of HTML/XML file encoding.
git-svn-id: svn://svn.tuxfamily.org/svnroot/notepadplus/repository/trunk@573 f5eea248-9336-0410-98b8-ebc06183d4e3
This commit is contained in:
parent
5b23ddeefb
commit
9ebb4b39f5
@ -1,18 +1,19 @@
|
|||||||
Notepad++ v5.6 new features and fixed bugs (from v5.5.1) :
|
Notepad++ v5.6 new features and fixed bugs (from v5.5.1) :
|
||||||
|
|
||||||
1. Add languages encoding - Chinese traditional (BIG5), Chinese Simplified (GB2312), Japanese (Shift JIS), Korean (EUC), Thai (TIS-620), Hebrew (iso-8859-8), Hebrew (1255), Central European (1250), Cyrillic (1251), Cyrillic (KOI8-U), Cyrillic (KOI8-R), Cyrillic (Mac), Western European(1252), Greek (1253), Turkish(1254), Arabic (1256), Baltic (1257) and Vietnamese (1258).
|
1. Add languages encoding - Chinese traditional (BIG5), Chinese Simplified (GB2312), Japanese (Shift JIS), Korean (EUC), Thai (TIS-620), Hebrew (iso-8859-8), Hebrew (1255), Central European (1250), Cyrillic (1251), Cyrillic (KOI8-U), Cyrillic (KOI8-R), Cyrillic (Mac), Western European(1252), Greek (1253), Turkish(1254), Arabic (1256), Baltic (1257) and Vietnamese (1258).
|
||||||
2. Add COBOL, D, Gui4Cli, PowerShell and R language support.
|
2. Add auto-detection of HTML and XML files encodings.
|
||||||
3. Add Marker Jumper feature (Jump down/up : Ctrl+Num/Ctrl+Shift+Num).
|
3. Add COBOL, D, Gui4Cli, PowerShell and R language support.
|
||||||
4. Add indent guide line highlighting for html/xml tags.
|
4. Add Marker Jumper feature (Jump down/up : Ctrl+Num/Ctrl+Shift+Num).
|
||||||
5. Add system tray context menu and new command argument "-systemtray".
|
5. Add indent guide line highlighting for html/xml tags.
|
||||||
6. Add new command argument "--help".
|
6. Add system tray context menu and new command argument "-systemtray".
|
||||||
7. Fix Calltip hint bug and add a new capacity in it.
|
7. Add new command argument "--help".
|
||||||
8. Add the ability to add the second keyword group for user in both LISP and Scheme languages.
|
8. Fix Calltip hint bug and add a new capacity in it.
|
||||||
9. Fix the wrap symbol display problem.
|
9. Add the ability to add the second keyword group for user in both LISP and Scheme languages.
|
||||||
10. Add SQL ESC symbol '\'.
|
10. Fix the wrap symbol display problem.
|
||||||
11. Fix column editor insert number bug in virtual space mode.
|
11. Add SQL ESC symbol '\'.
|
||||||
12. Fix status bar displaying "-2 char" issue for a empty document.
|
12. Fix column editor insert number bug in virtual space mode.
|
||||||
13. Fix installation of NppShell64 failed issue in installer.
|
13. Fix status bar displaying "-2 char" issue for a empty document.
|
||||||
|
14. Fix installation of NppShell64 failed issue in installer.
|
||||||
|
|
||||||
|
|
||||||
Included plugins (Unicode):
|
Included plugins (Unicode):
|
||||||
|
@ -783,6 +783,11 @@ BufferID Notepad_plus::doOpen(const TCHAR *fileName, bool isReadOnly, int encodi
|
|||||||
scnN.nmhdr.idFrom = NULL;
|
scnN.nmhdr.idFrom = NULL;
|
||||||
_pluginsManager.notify(&scnN);
|
_pluginsManager.notify(&scnN);
|
||||||
|
|
||||||
|
if (encoding == -1)
|
||||||
|
{
|
||||||
|
encoding = getHtmlXmlEncoding(longFileName);
|
||||||
|
}
|
||||||
|
|
||||||
BufferID buffer = MainFileManager->loadFile(longFileName, NULL, encoding);
|
BufferID buffer = MainFileManager->loadFile(longFileName, NULL, encoding);
|
||||||
if (buffer != BUFFER_INVALID)
|
if (buffer != BUFFER_INVALID)
|
||||||
{
|
{
|
||||||
@ -858,6 +863,118 @@ BufferID Notepad_plus::doOpen(const TCHAR *fileName, bool isReadOnly, int encodi
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int Notepad_plus::getHtmlXmlEncoding(const TCHAR *fileName) const
|
||||||
|
{
|
||||||
|
// Get Language type
|
||||||
|
TCHAR *ext = PathFindExtension(fileName);
|
||||||
|
if (*ext == '.') //extension found
|
||||||
|
{
|
||||||
|
ext += 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
NppParameters *pNppParamInst = NppParameters::getInstance();
|
||||||
|
LangType langT = pNppParamInst->getLangFromExt(ext);
|
||||||
|
if (langT != L_XML && langT != L_HTML && langT == L_PHP)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
// Get the begining of file data
|
||||||
|
FILE *f = generic_fopen(fileName, TEXT("rb"));
|
||||||
|
if (!f)
|
||||||
|
return -1;
|
||||||
|
const int blockSize = 1024; // To ensure that length is long enough to capture the encoding in html
|
||||||
|
char data[blockSize];
|
||||||
|
int lenFile = fread(data, 1, blockSize, f);
|
||||||
|
fclose(f);
|
||||||
|
|
||||||
|
// Put data in _invisibleEditView
|
||||||
|
_invisibleEditView.execute(SCI_CLEARALL);
|
||||||
|
_invisibleEditView.execute(SCI_APPENDTEXT, lenFile, (LPARAM)data);
|
||||||
|
|
||||||
|
const char *encodingAliasRegExpr = "[a-zA-Z0-9_-]+";
|
||||||
|
|
||||||
|
if (langT == L_XML)
|
||||||
|
{
|
||||||
|
// find encoding by RegExpr
|
||||||
|
|
||||||
|
const char *xmlHeaderRegExpr = "<?xml[ \\t]+version[ \\t]*=[ \\t]*\"[^\"]+\"[ \\t]+encoding[ \\t]*=[ \\t]*\"[^\"]+\"[ \\t]*.*?>";
|
||||||
|
|
||||||
|
int startPos = 0;
|
||||||
|
int endPos = lenFile-1;
|
||||||
|
_invisibleEditView.execute(SCI_SETSEARCHFLAGS, SCFIND_REGEXP|SCFIND_POSIX);
|
||||||
|
|
||||||
|
_invisibleEditView.execute(SCI_SETTARGETSTART, startPos);
|
||||||
|
_invisibleEditView.execute(SCI_SETTARGETEND, endPos);
|
||||||
|
|
||||||
|
int posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(xmlHeaderRegExpr), (LPARAM)xmlHeaderRegExpr);
|
||||||
|
if (posFound != -1)
|
||||||
|
{
|
||||||
|
const char *encodingBlockRegExpr = "encoding[ \\t]*=[ \\t]*\"[^\".]+\"";
|
||||||
|
posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(encodingBlockRegExpr), (LPARAM)encodingBlockRegExpr);
|
||||||
|
|
||||||
|
const char *encodingRegExpr = "\".+\"";
|
||||||
|
posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(encodingRegExpr), (LPARAM)encodingRegExpr);
|
||||||
|
|
||||||
|
posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(encodingAliasRegExpr), (LPARAM)encodingAliasRegExpr);
|
||||||
|
|
||||||
|
startPos = int(_invisibleEditView.execute(SCI_GETTARGETSTART));
|
||||||
|
endPos = int(_invisibleEditView.execute(SCI_GETTARGETEND));
|
||||||
|
|
||||||
|
char encodingStr[128];
|
||||||
|
_invisibleEditView.getText(encodingStr, startPos, endPos);
|
||||||
|
|
||||||
|
int enc = getCpFromStringValue(encodingStr);
|
||||||
|
return (enc==CP_ACP?-1:enc);
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
else // if (langT == L_HTML)
|
||||||
|
{
|
||||||
|
// find encoding by RegExpr
|
||||||
|
const char *htmlHeaderRegExpr = "<meta[ \\t]+http-equiv[ \\t]*=[ \\t]*\"Content-Type\"[ \\t]+content[ \\t]*=[ \\t]*\"text/html;[ \\t]+charset[ \\t]*=[ \\t]*.+\"[ \\t]*/*>";
|
||||||
|
const char *htmlHeaderRegExpr2 = "<meta[ \\t]+content[ \\t]*=[ \\t]*\"text/html;[ \\t]+charset[ \\t]*=[ \\t]*.+\"[ \\t]*http-equiv[ \\t]*=[ \\t]*\"Content-Type\"[ \\t]+/*>";
|
||||||
|
|
||||||
|
int startPos = 0;
|
||||||
|
int endPos = lenFile-1;
|
||||||
|
_invisibleEditView.execute(SCI_SETSEARCHFLAGS, SCFIND_REGEXP|SCFIND_POSIX);
|
||||||
|
|
||||||
|
_invisibleEditView.execute(SCI_SETTARGETSTART, startPos);
|
||||||
|
_invisibleEditView.execute(SCI_SETTARGETEND, endPos);
|
||||||
|
|
||||||
|
int posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(htmlHeaderRegExpr), (LPARAM)htmlHeaderRegExpr);
|
||||||
|
|
||||||
|
if (posFound != -1)
|
||||||
|
{
|
||||||
|
const char *charsetBlockRegExpr = "charset[ \\t]*=[ \\t]*.+\"";
|
||||||
|
posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(charsetBlockRegExpr), (LPARAM)charsetBlockRegExpr);
|
||||||
|
|
||||||
|
const char *charsetRegExpr = "=[ \\t]*[^\"]+";
|
||||||
|
posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(charsetRegExpr), (LPARAM)charsetRegExpr);
|
||||||
|
|
||||||
|
posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(encodingAliasRegExpr), (LPARAM)encodingAliasRegExpr);
|
||||||
|
|
||||||
|
startPos = int(_invisibleEditView.execute(SCI_GETTARGETSTART));
|
||||||
|
endPos = int(_invisibleEditView.execute(SCI_GETTARGETEND));
|
||||||
|
|
||||||
|
char encodingStr[128];
|
||||||
|
_invisibleEditView.getText(encodingStr, startPos, endPos);
|
||||||
|
|
||||||
|
int enc = getCpFromStringValue(encodingStr);
|
||||||
|
return (enc==CP_ACP?-1:enc);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(htmlHeaderRegExpr2), (LPARAM)htmlHeaderRegExpr2);
|
||||||
|
if (posFound == -1)
|
||||||
|
return -1;
|
||||||
|
//TODO
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool Notepad_plus::doReload(BufferID id, bool alert)
|
bool Notepad_plus::doReload(BufferID id, bool alert)
|
||||||
{
|
{
|
||||||
|
@ -265,6 +265,7 @@ public:
|
|||||||
bool replaceInFiles();
|
bool replaceInFiles();
|
||||||
void setFindReplaceFolderFilter(const TCHAR *dir, const TCHAR *filters);
|
void setFindReplaceFolderFilter(const TCHAR *dir, const TCHAR *filters);
|
||||||
vector<generic_string> addNppComponents(const TCHAR *destDir, const TCHAR *extFilterName, const TCHAR *extFilter);
|
vector<generic_string> addNppComponents(const TCHAR *destDir, const TCHAR *extFilterName, const TCHAR *extFilter);
|
||||||
|
int getHtmlXmlEncoding(const TCHAR *fileName) const;
|
||||||
|
|
||||||
static HWND gNppHWND; //static handle to Notepad++ window, NULL if non-existant
|
static HWND gNppHWND; //static handle to Notepad++ window, NULL if non-existant
|
||||||
private:
|
private:
|
||||||
|
@ -377,6 +377,34 @@ ScintillaKeyDefinition scintKeyDefs[] = { //array of accelerator keys for all po
|
|||||||
//
|
//
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static bool isInList(const TCHAR *token, const TCHAR *list) {
|
||||||
|
if ((!token) || (!list))
|
||||||
|
return false;
|
||||||
|
TCHAR word[64];
|
||||||
|
int i = 0;
|
||||||
|
int j = 0;
|
||||||
|
for (; i <= int(lstrlen(list)) ; i++)
|
||||||
|
{
|
||||||
|
if ((list[i] == ' ')||(list[i] == '\0'))
|
||||||
|
{
|
||||||
|
if (j != 0)
|
||||||
|
{
|
||||||
|
word[j] = '\0';
|
||||||
|
j = 0;
|
||||||
|
|
||||||
|
if (!generic_stricmp(token, word))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
word[j] = list[i];
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
|
||||||
static int strVal(const TCHAR *str, int base) {
|
static int strVal(const TCHAR *str, int base) {
|
||||||
if (!str) return -1;
|
if (!str) return -1;
|
||||||
if (!str[0]) return 0;
|
if (!str[0]) return 0;
|
||||||
@ -1988,6 +2016,38 @@ void NppParameters::feedUserLang(TiXmlNode *node)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LangType NppParameters::getLangFromExt(const TCHAR *ext)
|
||||||
|
{
|
||||||
|
int i = getNbLang();
|
||||||
|
i--;
|
||||||
|
while (i >= 0)
|
||||||
|
{
|
||||||
|
Lang *l = getLangFromIndex(i--);
|
||||||
|
|
||||||
|
const TCHAR *defList = l->getDefaultExtList();
|
||||||
|
const TCHAR *userList = NULL;
|
||||||
|
|
||||||
|
LexerStylerArray &lsa = getLStylerArray();
|
||||||
|
const TCHAR *lName = l->getLangName();
|
||||||
|
LexerStyler *pLS = lsa.getLexerStylerByName(lName);
|
||||||
|
|
||||||
|
if (pLS)
|
||||||
|
userList = pLS->getLexerUserExt();
|
||||||
|
|
||||||
|
generic_string list(TEXT(""));
|
||||||
|
if (defList)
|
||||||
|
list += defList;
|
||||||
|
if (userList)
|
||||||
|
{
|
||||||
|
list += TEXT(" ");
|
||||||
|
list += userList;
|
||||||
|
}
|
||||||
|
if (isInList(ext, list.c_str()))
|
||||||
|
return l->getLangID();
|
||||||
|
}
|
||||||
|
return L_TXT;
|
||||||
|
}
|
||||||
|
|
||||||
void NppParameters::writeUserDefinedLang()
|
void NppParameters::writeUserDefinedLang()
|
||||||
{
|
{
|
||||||
if (!_pXmlUserLangDoc)
|
if (!_pXmlUserLangDoc)
|
||||||
|
@ -1113,6 +1113,8 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
int getNbLang() const {return _nbLang;};
|
int getNbLang() const {return _nbLang;};
|
||||||
|
|
||||||
|
LangType getLangFromExt(const TCHAR *ext);
|
||||||
|
|
||||||
const TCHAR * getLangExtFromName(const TCHAR *langName) const {
|
const TCHAR * getLangExtFromName(const TCHAR *langName) const {
|
||||||
for (int i = 0 ; i < _nbLang ; i++)
|
for (int i = 0 ; i < _nbLang ; i++)
|
||||||
|
@ -34,34 +34,6 @@ const int blockSize = 128 * 1024 + 4;
|
|||||||
const int CR = 0x0D;
|
const int CR = 0x0D;
|
||||||
const int LF = 0x0A;
|
const int LF = 0x0A;
|
||||||
|
|
||||||
static bool isInList(const TCHAR *token, const TCHAR *list) {
|
|
||||||
if ((!token) || (!list))
|
|
||||||
return false;
|
|
||||||
TCHAR word[64];
|
|
||||||
int i = 0;
|
|
||||||
int j = 0;
|
|
||||||
for (; i <= int(lstrlen(list)) ; i++)
|
|
||||||
{
|
|
||||||
if ((list[i] == ' ')||(list[i] == '\0'))
|
|
||||||
{
|
|
||||||
if (j != 0)
|
|
||||||
{
|
|
||||||
word[j] = '\0';
|
|
||||||
j = 0;
|
|
||||||
|
|
||||||
if (!generic_stricmp(token, word))
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
word[j] = list[i];
|
|
||||||
j++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
};
|
|
||||||
|
|
||||||
Buffer::Buffer(FileManager * pManager, BufferID id, Document doc, DocFileStatus type, const TCHAR *fileName) //type must be either DOC_REGULAR or DOC_UNNAMED
|
Buffer::Buffer(FileManager * pManager, BufferID id, Document doc, DocFileStatus type, const TCHAR *fileName) //type must be either DOC_REGULAR or DOC_UNNAMED
|
||||||
: _pManager(pManager), _id(id), _isDirty(false), _doc(doc), _isFileReadOnly(false), _isUserReadOnly(false), _recentTag(-1), _references(0),
|
: _pManager(pManager), _id(id), _isDirty(false), _doc(doc), _isFileReadOnly(false), _isUserReadOnly(false), _recentTag(-1), _references(0),
|
||||||
_canNotify(false), _timeStamp(0), _needReloading(false), _encoding(-1)
|
_canNotify(false), _timeStamp(0), _needReloading(false), _encoding(-1)
|
||||||
@ -84,6 +56,7 @@ Buffer::Buffer(FileManager * pManager, BufferID id, Document doc, DocFileStatus
|
|||||||
_canNotify = true;
|
_canNotify = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void Buffer::determinateFormat(const char *data) {
|
void Buffer::determinateFormat(const char *data) {
|
||||||
_format = WIN_FORMAT;
|
_format = WIN_FORMAT;
|
||||||
size_t len = strlen(data);
|
size_t len = strlen(data);
|
||||||
@ -169,7 +142,7 @@ void Buffer::setFileName(const TCHAR *fn, LangType defaultLang)
|
|||||||
else // if it's not user lang, then check if it's supported lang
|
else // if it's not user lang, then check if it's supported lang
|
||||||
{
|
{
|
||||||
_userLangExt[0] = '\0';
|
_userLangExt[0] = '\0';
|
||||||
newLang = getLangFromExt(ext);
|
newLang = pNppParamInst->getLangFromExt(ext);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -278,39 +251,6 @@ std::vector<HeaderLineState> & Buffer::getHeaderLineState(ScintillaEditView * id
|
|||||||
return _foldStates.at(index);
|
return _foldStates.at(index);
|
||||||
}
|
}
|
||||||
|
|
||||||
LangType Buffer::getLangFromExt(const TCHAR *ext)
|
|
||||||
{
|
|
||||||
NppParameters *pNppParam = NppParameters::getInstance();
|
|
||||||
int i = pNppParam->getNbLang();
|
|
||||||
i--;
|
|
||||||
while (i >= 0)
|
|
||||||
{
|
|
||||||
Lang *l = pNppParam->getLangFromIndex(i--);
|
|
||||||
|
|
||||||
const TCHAR *defList = l->getDefaultExtList();
|
|
||||||
const TCHAR *userList = NULL;
|
|
||||||
|
|
||||||
LexerStylerArray &lsa = pNppParam->getLStylerArray();
|
|
||||||
const TCHAR *lName = l->getLangName();
|
|
||||||
LexerStyler *pLS = lsa.getLexerStylerByName(lName);
|
|
||||||
|
|
||||||
if (pLS)
|
|
||||||
userList = pLS->getLexerUserExt();
|
|
||||||
|
|
||||||
generic_string list(TEXT(""));
|
|
||||||
if (defList)
|
|
||||||
list += defList;
|
|
||||||
if (userList)
|
|
||||||
{
|
|
||||||
list += TEXT(" ");
|
|
||||||
list += userList;
|
|
||||||
}
|
|
||||||
if (isInList(ext, list.c_str()))
|
|
||||||
return l->getLangID();
|
|
||||||
}
|
|
||||||
return L_TXT;
|
|
||||||
}
|
|
||||||
|
|
||||||
Lang * Buffer::getCurrentLang() const {
|
Lang * Buffer::getCurrentLang() const {
|
||||||
NppParameters *pNppParam = NppParameters::getInstance();
|
NppParameters *pNppParam = NppParameters::getInstance();
|
||||||
int i = 0;
|
int i = 0;
|
||||||
|
@ -139,8 +139,6 @@ public :
|
|||||||
//Destructor makes sure its purged
|
//Destructor makes sure its purged
|
||||||
Buffer(FileManager * pManager, BufferID id, Document doc, DocFileStatus type, const TCHAR *fileName);
|
Buffer(FileManager * pManager, BufferID id, Document doc, DocFileStatus type, const TCHAR *fileName);
|
||||||
|
|
||||||
LangType getLangFromExt(const TCHAR *ext);
|
|
||||||
|
|
||||||
// this method 1. copies the file name
|
// this method 1. copies the file name
|
||||||
// 2. determinates the language from the ext of file name
|
// 2. determinates the language from the ext of file name
|
||||||
// 3. gets the last modified time
|
// 3. gets the last modified time
|
||||||
|
Loading…
x
Reference in New Issue
Block a user