mirror of
https://github.com/notepad-plus-plus/notepad-plus-plus.git
synced 2025-07-24 06:14:47 +02:00
[NEW_FEATURE] Add auto-detection of HTML/XML file encoding.
git-svn-id: svn://svn.tuxfamily.org/svnroot/notepadplus/repository/trunk@573 f5eea248-9336-0410-98b8-ebc06183d4e3
This commit is contained in:
parent
5b23ddeefb
commit
9ebb4b39f5
@ -1,18 +1,19 @@
|
||||
Notepad++ v5.6 new features and fixed bugs (from v5.5.1) :
|
||||
|
||||
1. Add languages encoding - Chinese traditional (BIG5), Chinese Simplified (GB2312), Japanese (Shift JIS), Korean (EUC), Thai (TIS-620), Hebrew (iso-8859-8), Hebrew (1255), Central European (1250), Cyrillic (1251), Cyrillic (KOI8-U), Cyrillic (KOI8-R), Cyrillic (Mac), Western European(1252), Greek (1253), Turkish(1254), Arabic (1256), Baltic (1257) and Vietnamese (1258).
|
||||
2. Add COBOL, D, Gui4Cli, PowerShell and R language support.
|
||||
3. Add Marker Jumper feature (Jump down/up : Ctrl+Num/Ctrl+Shift+Num).
|
||||
4. Add indent guide line highlighting for html/xml tags.
|
||||
5. Add system tray context menu and new command argument "-systemtray".
|
||||
6. Add new command argument "--help".
|
||||
7. Fix Calltip hint bug and add a new capacity in it.
|
||||
8. Add the ability to add the second keyword group for user in both LISP and Scheme languages.
|
||||
9. Fix the wrap symbol display problem.
|
||||
10. Add SQL ESC symbol '\'.
|
||||
11. Fix column editor insert number bug in virtual space mode.
|
||||
12. Fix status bar displaying "-2 char" issue for a empty document.
|
||||
13. Fix installation of NppShell64 failed issue in installer.
|
||||
2. Add auto-detection of HTML and XML files encodings.
|
||||
3. Add COBOL, D, Gui4Cli, PowerShell and R language support.
|
||||
4. Add Marker Jumper feature (Jump down/up : Ctrl+Num/Ctrl+Shift+Num).
|
||||
5. Add indent guide line highlighting for html/xml tags.
|
||||
6. Add system tray context menu and new command argument "-systemtray".
|
||||
7. Add new command argument "--help".
|
||||
8. Fix Calltip hint bug and add a new capacity in it.
|
||||
9. Add the ability to add the second keyword group for user in both LISP and Scheme languages.
|
||||
10. Fix the wrap symbol display problem.
|
||||
11. Add SQL ESC symbol '\'.
|
||||
12. Fix column editor insert number bug in virtual space mode.
|
||||
13. Fix status bar displaying "-2 char" issue for a empty document.
|
||||
14. Fix installation of NppShell64 failed issue in installer.
|
||||
|
||||
|
||||
Included plugins (Unicode):
|
||||
|
@ -783,6 +783,11 @@ BufferID Notepad_plus::doOpen(const TCHAR *fileName, bool isReadOnly, int encodi
|
||||
scnN.nmhdr.idFrom = NULL;
|
||||
_pluginsManager.notify(&scnN);
|
||||
|
||||
if (encoding == -1)
|
||||
{
|
||||
encoding = getHtmlXmlEncoding(longFileName);
|
||||
}
|
||||
|
||||
BufferID buffer = MainFileManager->loadFile(longFileName, NULL, encoding);
|
||||
if (buffer != BUFFER_INVALID)
|
||||
{
|
||||
@ -858,6 +863,118 @@ BufferID Notepad_plus::doOpen(const TCHAR *fileName, bool isReadOnly, int encodi
|
||||
}
|
||||
}
|
||||
|
||||
int Notepad_plus::getHtmlXmlEncoding(const TCHAR *fileName) const
|
||||
{
|
||||
// Get Language type
|
||||
TCHAR *ext = PathFindExtension(fileName);
|
||||
if (*ext == '.') //extension found
|
||||
{
|
||||
ext += 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
NppParameters *pNppParamInst = NppParameters::getInstance();
|
||||
LangType langT = pNppParamInst->getLangFromExt(ext);
|
||||
if (langT != L_XML && langT != L_HTML && langT == L_PHP)
|
||||
return -1;
|
||||
|
||||
// Get the begining of file data
|
||||
FILE *f = generic_fopen(fileName, TEXT("rb"));
|
||||
if (!f)
|
||||
return -1;
|
||||
const int blockSize = 1024; // To ensure that length is long enough to capture the encoding in html
|
||||
char data[blockSize];
|
||||
int lenFile = fread(data, 1, blockSize, f);
|
||||
fclose(f);
|
||||
|
||||
// Put data in _invisibleEditView
|
||||
_invisibleEditView.execute(SCI_CLEARALL);
|
||||
_invisibleEditView.execute(SCI_APPENDTEXT, lenFile, (LPARAM)data);
|
||||
|
||||
const char *encodingAliasRegExpr = "[a-zA-Z0-9_-]+";
|
||||
|
||||
if (langT == L_XML)
|
||||
{
|
||||
// find encoding by RegExpr
|
||||
|
||||
const char *xmlHeaderRegExpr = "<?xml[ \\t]+version[ \\t]*=[ \\t]*\"[^\"]+\"[ \\t]+encoding[ \\t]*=[ \\t]*\"[^\"]+\"[ \\t]*.*?>";
|
||||
|
||||
int startPos = 0;
|
||||
int endPos = lenFile-1;
|
||||
_invisibleEditView.execute(SCI_SETSEARCHFLAGS, SCFIND_REGEXP|SCFIND_POSIX);
|
||||
|
||||
_invisibleEditView.execute(SCI_SETTARGETSTART, startPos);
|
||||
_invisibleEditView.execute(SCI_SETTARGETEND, endPos);
|
||||
|
||||
int posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(xmlHeaderRegExpr), (LPARAM)xmlHeaderRegExpr);
|
||||
if (posFound != -1)
|
||||
{
|
||||
const char *encodingBlockRegExpr = "encoding[ \\t]*=[ \\t]*\"[^\".]+\"";
|
||||
posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(encodingBlockRegExpr), (LPARAM)encodingBlockRegExpr);
|
||||
|
||||
const char *encodingRegExpr = "\".+\"";
|
||||
posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(encodingRegExpr), (LPARAM)encodingRegExpr);
|
||||
|
||||
posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(encodingAliasRegExpr), (LPARAM)encodingAliasRegExpr);
|
||||
|
||||
startPos = int(_invisibleEditView.execute(SCI_GETTARGETSTART));
|
||||
endPos = int(_invisibleEditView.execute(SCI_GETTARGETEND));
|
||||
|
||||
char encodingStr[128];
|
||||
_invisibleEditView.getText(encodingStr, startPos, endPos);
|
||||
|
||||
int enc = getCpFromStringValue(encodingStr);
|
||||
return (enc==CP_ACP?-1:enc);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
else // if (langT == L_HTML)
|
||||
{
|
||||
// find encoding by RegExpr
|
||||
const char *htmlHeaderRegExpr = "<meta[ \\t]+http-equiv[ \\t]*=[ \\t]*\"Content-Type\"[ \\t]+content[ \\t]*=[ \\t]*\"text/html;[ \\t]+charset[ \\t]*=[ \\t]*.+\"[ \\t]*/*>";
|
||||
const char *htmlHeaderRegExpr2 = "<meta[ \\t]+content[ \\t]*=[ \\t]*\"text/html;[ \\t]+charset[ \\t]*=[ \\t]*.+\"[ \\t]*http-equiv[ \\t]*=[ \\t]*\"Content-Type\"[ \\t]+/*>";
|
||||
|
||||
int startPos = 0;
|
||||
int endPos = lenFile-1;
|
||||
_invisibleEditView.execute(SCI_SETSEARCHFLAGS, SCFIND_REGEXP|SCFIND_POSIX);
|
||||
|
||||
_invisibleEditView.execute(SCI_SETTARGETSTART, startPos);
|
||||
_invisibleEditView.execute(SCI_SETTARGETEND, endPos);
|
||||
|
||||
int posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(htmlHeaderRegExpr), (LPARAM)htmlHeaderRegExpr);
|
||||
|
||||
if (posFound != -1)
|
||||
{
|
||||
const char *charsetBlockRegExpr = "charset[ \\t]*=[ \\t]*.+\"";
|
||||
posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(charsetBlockRegExpr), (LPARAM)charsetBlockRegExpr);
|
||||
|
||||
const char *charsetRegExpr = "=[ \\t]*[^\"]+";
|
||||
posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(charsetRegExpr), (LPARAM)charsetRegExpr);
|
||||
|
||||
posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(encodingAliasRegExpr), (LPARAM)encodingAliasRegExpr);
|
||||
|
||||
startPos = int(_invisibleEditView.execute(SCI_GETTARGETSTART));
|
||||
endPos = int(_invisibleEditView.execute(SCI_GETTARGETEND));
|
||||
|
||||
char encodingStr[128];
|
||||
_invisibleEditView.getText(encodingStr, startPos, endPos);
|
||||
|
||||
int enc = getCpFromStringValue(encodingStr);
|
||||
return (enc==CP_ACP?-1:enc);
|
||||
}
|
||||
else
|
||||
{
|
||||
posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(htmlHeaderRegExpr2), (LPARAM)htmlHeaderRegExpr2);
|
||||
if (posFound == -1)
|
||||
return -1;
|
||||
//TODO
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
bool Notepad_plus::doReload(BufferID id, bool alert)
|
||||
{
|
||||
|
@ -265,6 +265,7 @@ public:
|
||||
bool replaceInFiles();
|
||||
void setFindReplaceFolderFilter(const TCHAR *dir, const TCHAR *filters);
|
||||
vector<generic_string> addNppComponents(const TCHAR *destDir, const TCHAR *extFilterName, const TCHAR *extFilter);
|
||||
int getHtmlXmlEncoding(const TCHAR *fileName) const;
|
||||
|
||||
static HWND gNppHWND; //static handle to Notepad++ window, NULL if non-existant
|
||||
private:
|
||||
|
@ -377,6 +377,34 @@ ScintillaKeyDefinition scintKeyDefs[] = { //array of accelerator keys for all po
|
||||
//
|
||||
};
|
||||
|
||||
static bool isInList(const TCHAR *token, const TCHAR *list) {
|
||||
if ((!token) || (!list))
|
||||
return false;
|
||||
TCHAR word[64];
|
||||
int i = 0;
|
||||
int j = 0;
|
||||
for (; i <= int(lstrlen(list)) ; i++)
|
||||
{
|
||||
if ((list[i] == ' ')||(list[i] == '\0'))
|
||||
{
|
||||
if (j != 0)
|
||||
{
|
||||
word[j] = '\0';
|
||||
j = 0;
|
||||
|
||||
if (!generic_stricmp(token, word))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
word[j] = list[i];
|
||||
j++;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
static int strVal(const TCHAR *str, int base) {
|
||||
if (!str) return -1;
|
||||
if (!str[0]) return 0;
|
||||
@ -1988,6 +2016,38 @@ void NppParameters::feedUserLang(TiXmlNode *node)
|
||||
}
|
||||
}
|
||||
|
||||
LangType NppParameters::getLangFromExt(const TCHAR *ext)
|
||||
{
|
||||
int i = getNbLang();
|
||||
i--;
|
||||
while (i >= 0)
|
||||
{
|
||||
Lang *l = getLangFromIndex(i--);
|
||||
|
||||
const TCHAR *defList = l->getDefaultExtList();
|
||||
const TCHAR *userList = NULL;
|
||||
|
||||
LexerStylerArray &lsa = getLStylerArray();
|
||||
const TCHAR *lName = l->getLangName();
|
||||
LexerStyler *pLS = lsa.getLexerStylerByName(lName);
|
||||
|
||||
if (pLS)
|
||||
userList = pLS->getLexerUserExt();
|
||||
|
||||
generic_string list(TEXT(""));
|
||||
if (defList)
|
||||
list += defList;
|
||||
if (userList)
|
||||
{
|
||||
list += TEXT(" ");
|
||||
list += userList;
|
||||
}
|
||||
if (isInList(ext, list.c_str()))
|
||||
return l->getLangID();
|
||||
}
|
||||
return L_TXT;
|
||||
}
|
||||
|
||||
void NppParameters::writeUserDefinedLang()
|
||||
{
|
||||
if (!_pXmlUserLangDoc)
|
||||
|
@ -1113,6 +1113,8 @@ public:
|
||||
};
|
||||
|
||||
int getNbLang() const {return _nbLang;};
|
||||
|
||||
LangType getLangFromExt(const TCHAR *ext);
|
||||
|
||||
const TCHAR * getLangExtFromName(const TCHAR *langName) const {
|
||||
for (int i = 0 ; i < _nbLang ; i++)
|
||||
|
@ -34,34 +34,6 @@ const int blockSize = 128 * 1024 + 4;
|
||||
const int CR = 0x0D;
|
||||
const int LF = 0x0A;
|
||||
|
||||
static bool isInList(const TCHAR *token, const TCHAR *list) {
|
||||
if ((!token) || (!list))
|
||||
return false;
|
||||
TCHAR word[64];
|
||||
int i = 0;
|
||||
int j = 0;
|
||||
for (; i <= int(lstrlen(list)) ; i++)
|
||||
{
|
||||
if ((list[i] == ' ')||(list[i] == '\0'))
|
||||
{
|
||||
if (j != 0)
|
||||
{
|
||||
word[j] = '\0';
|
||||
j = 0;
|
||||
|
||||
if (!generic_stricmp(token, word))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
word[j] = list[i];
|
||||
j++;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
Buffer::Buffer(FileManager * pManager, BufferID id, Document doc, DocFileStatus type, const TCHAR *fileName) //type must be either DOC_REGULAR or DOC_UNNAMED
|
||||
: _pManager(pManager), _id(id), _isDirty(false), _doc(doc), _isFileReadOnly(false), _isUserReadOnly(false), _recentTag(-1), _references(0),
|
||||
_canNotify(false), _timeStamp(0), _needReloading(false), _encoding(-1)
|
||||
@ -84,6 +56,7 @@ Buffer::Buffer(FileManager * pManager, BufferID id, Document doc, DocFileStatus
|
||||
_canNotify = true;
|
||||
}
|
||||
|
||||
|
||||
void Buffer::determinateFormat(const char *data) {
|
||||
_format = WIN_FORMAT;
|
||||
size_t len = strlen(data);
|
||||
@ -169,7 +142,7 @@ void Buffer::setFileName(const TCHAR *fn, LangType defaultLang)
|
||||
else // if it's not user lang, then check if it's supported lang
|
||||
{
|
||||
_userLangExt[0] = '\0';
|
||||
newLang = getLangFromExt(ext);
|
||||
newLang = pNppParamInst->getLangFromExt(ext);
|
||||
}
|
||||
}
|
||||
|
||||
@ -278,39 +251,6 @@ std::vector<HeaderLineState> & Buffer::getHeaderLineState(ScintillaEditView * id
|
||||
return _foldStates.at(index);
|
||||
}
|
||||
|
||||
LangType Buffer::getLangFromExt(const TCHAR *ext)
|
||||
{
|
||||
NppParameters *pNppParam = NppParameters::getInstance();
|
||||
int i = pNppParam->getNbLang();
|
||||
i--;
|
||||
while (i >= 0)
|
||||
{
|
||||
Lang *l = pNppParam->getLangFromIndex(i--);
|
||||
|
||||
const TCHAR *defList = l->getDefaultExtList();
|
||||
const TCHAR *userList = NULL;
|
||||
|
||||
LexerStylerArray &lsa = pNppParam->getLStylerArray();
|
||||
const TCHAR *lName = l->getLangName();
|
||||
LexerStyler *pLS = lsa.getLexerStylerByName(lName);
|
||||
|
||||
if (pLS)
|
||||
userList = pLS->getLexerUserExt();
|
||||
|
||||
generic_string list(TEXT(""));
|
||||
if (defList)
|
||||
list += defList;
|
||||
if (userList)
|
||||
{
|
||||
list += TEXT(" ");
|
||||
list += userList;
|
||||
}
|
||||
if (isInList(ext, list.c_str()))
|
||||
return l->getLangID();
|
||||
}
|
||||
return L_TXT;
|
||||
}
|
||||
|
||||
Lang * Buffer::getCurrentLang() const {
|
||||
NppParameters *pNppParam = NppParameters::getInstance();
|
||||
int i = 0;
|
||||
|
@ -139,8 +139,6 @@ public :
|
||||
//Destructor makes sure its purged
|
||||
Buffer(FileManager * pManager, BufferID id, Document doc, DocFileStatus type, const TCHAR *fileName);
|
||||
|
||||
LangType getLangFromExt(const TCHAR *ext);
|
||||
|
||||
// this method 1. copies the file name
|
||||
// 2. determinates the language from the ext of file name
|
||||
// 3. gets the last modified time
|
||||
|
Loading…
x
Reference in New Issue
Block a user