[NEW_FEATURE] Add auto-detection of HTML/XML file encoding.

git-svn-id: svn://svn.tuxfamily.org/svnroot/notepadplus/repository/trunk@573 f5eea248-9336-0410-98b8-ebc06183d4e3
This commit is contained in:
Don Ho 2009-11-26 01:34:25 +00:00
parent 5b23ddeefb
commit 9ebb4b39f5
7 changed files with 195 additions and 76 deletions

View File

@ -1,18 +1,19 @@
Notepad++ v5.6 new features and fixed bugs (from v5.5.1) :
1. Add languages encoding - Chinese traditional (BIG5), Chinese Simplified (GB2312), Japanese (Shift JIS), Korean (EUC), Thai (TIS-620), Hebrew (iso-8859-8), Hebrew (1255), Central European (1250), Cyrillic (1251), Cyrillic (KOI8-U), Cyrillic (KOI8-R), Cyrillic (Mac), Western European(1252), Greek (1253), Turkish(1254), Arabic (1256), Baltic (1257) and Vietnamese (1258).
2. Add COBOL, D, Gui4Cli, PowerShell and R language support.
3. Add Marker Jumper feature (Jump down/up : Ctrl+Num/Ctrl+Shift+Num).
4. Add indent guide line highlighting for html/xml tags.
5. Add system tray context menu and new command argument "-systemtray".
6. Add new command argument "--help".
7. Fix Calltip hint bug and add a new capacity in it.
8. Add the ability to add the second keyword group for user in both LISP and Scheme languages.
9. Fix the wrap symbol display problem.
10. Add SQL ESC symbol '\'.
11. Fix column editor insert number bug in virtual space mode.
12. Fix status bar displaying "-2 char" issue for a empty document.
13. Fix installation of NppShell64 failed issue in installer.
2. Add auto-detection of HTML and XML files encodings.
3. Add COBOL, D, Gui4Cli, PowerShell and R language support.
4. Add Marker Jumper feature (Jump down/up : Ctrl+Num/Ctrl+Shift+Num).
5. Add indent guide line highlighting for html/xml tags.
6. Add system tray context menu and new command argument "-systemtray".
7. Add new command argument "--help".
8. Fix Calltip hint bug and add a new capacity in it.
9. Add the ability to add the second keyword group for user in both LISP and Scheme languages.
10. Fix the wrap symbol display problem.
11. Add SQL ESC symbol '\'.
12. Fix column editor insert number bug in virtual space mode.
13. Fix status bar displaying "-2 char" issue for a empty document.
14. Fix installation of NppShell64 failed issue in installer.
Included plugins (Unicode):

View File

@ -783,6 +783,11 @@ BufferID Notepad_plus::doOpen(const TCHAR *fileName, bool isReadOnly, int encodi
scnN.nmhdr.idFrom = NULL;
_pluginsManager.notify(&scnN);
if (encoding == -1)
{
encoding = getHtmlXmlEncoding(longFileName);
}
BufferID buffer = MainFileManager->loadFile(longFileName, NULL, encoding);
if (buffer != BUFFER_INVALID)
{
@ -858,6 +863,118 @@ BufferID Notepad_plus::doOpen(const TCHAR *fileName, bool isReadOnly, int encodi
}
}
int Notepad_plus::getHtmlXmlEncoding(const TCHAR *fileName) const
{
// Get Language type
TCHAR *ext = PathFindExtension(fileName);
if (*ext == '.') //extension found
{
ext += 1;
}
else
{
return -1;
}
NppParameters *pNppParamInst = NppParameters::getInstance();
LangType langT = pNppParamInst->getLangFromExt(ext);
if (langT != L_XML && langT != L_HTML && langT == L_PHP)
return -1;
// Get the begining of file data
FILE *f = generic_fopen(fileName, TEXT("rb"));
if (!f)
return -1;
const int blockSize = 1024; // To ensure that length is long enough to capture the encoding in html
char data[blockSize];
int lenFile = fread(data, 1, blockSize, f);
fclose(f);
// Put data in _invisibleEditView
_invisibleEditView.execute(SCI_CLEARALL);
_invisibleEditView.execute(SCI_APPENDTEXT, lenFile, (LPARAM)data);
const char *encodingAliasRegExpr = "[a-zA-Z0-9_-]+";
if (langT == L_XML)
{
// find encoding by RegExpr
const char *xmlHeaderRegExpr = "<?xml[ \\t]+version[ \\t]*=[ \\t]*\"[^\"]+\"[ \\t]+encoding[ \\t]*=[ \\t]*\"[^\"]+\"[ \\t]*.*?>";
int startPos = 0;
int endPos = lenFile-1;
_invisibleEditView.execute(SCI_SETSEARCHFLAGS, SCFIND_REGEXP|SCFIND_POSIX);
_invisibleEditView.execute(SCI_SETTARGETSTART, startPos);
_invisibleEditView.execute(SCI_SETTARGETEND, endPos);
int posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(xmlHeaderRegExpr), (LPARAM)xmlHeaderRegExpr);
if (posFound != -1)
{
const char *encodingBlockRegExpr = "encoding[ \\t]*=[ \\t]*\"[^\".]+\"";
posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(encodingBlockRegExpr), (LPARAM)encodingBlockRegExpr);
const char *encodingRegExpr = "\".+\"";
posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(encodingRegExpr), (LPARAM)encodingRegExpr);
posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(encodingAliasRegExpr), (LPARAM)encodingAliasRegExpr);
startPos = int(_invisibleEditView.execute(SCI_GETTARGETSTART));
endPos = int(_invisibleEditView.execute(SCI_GETTARGETEND));
char encodingStr[128];
_invisibleEditView.getText(encodingStr, startPos, endPos);
int enc = getCpFromStringValue(encodingStr);
return (enc==CP_ACP?-1:enc);
}
return -1;
}
else // if (langT == L_HTML)
{
// find encoding by RegExpr
const char *htmlHeaderRegExpr = "<meta[ \\t]+http-equiv[ \\t]*=[ \\t]*\"Content-Type\"[ \\t]+content[ \\t]*=[ \\t]*\"text/html;[ \\t]+charset[ \\t]*=[ \\t]*.+\"[ \\t]*/*>";
const char *htmlHeaderRegExpr2 = "<meta[ \\t]+content[ \\t]*=[ \\t]*\"text/html;[ \\t]+charset[ \\t]*=[ \\t]*.+\"[ \\t]*http-equiv[ \\t]*=[ \\t]*\"Content-Type\"[ \\t]+/*>";
int startPos = 0;
int endPos = lenFile-1;
_invisibleEditView.execute(SCI_SETSEARCHFLAGS, SCFIND_REGEXP|SCFIND_POSIX);
_invisibleEditView.execute(SCI_SETTARGETSTART, startPos);
_invisibleEditView.execute(SCI_SETTARGETEND, endPos);
int posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(htmlHeaderRegExpr), (LPARAM)htmlHeaderRegExpr);
if (posFound != -1)
{
const char *charsetBlockRegExpr = "charset[ \\t]*=[ \\t]*.+\"";
posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(charsetBlockRegExpr), (LPARAM)charsetBlockRegExpr);
const char *charsetRegExpr = "=[ \\t]*[^\"]+";
posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(charsetRegExpr), (LPARAM)charsetRegExpr);
posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(encodingAliasRegExpr), (LPARAM)encodingAliasRegExpr);
startPos = int(_invisibleEditView.execute(SCI_GETTARGETSTART));
endPos = int(_invisibleEditView.execute(SCI_GETTARGETEND));
char encodingStr[128];
_invisibleEditView.getText(encodingStr, startPos, endPos);
int enc = getCpFromStringValue(encodingStr);
return (enc==CP_ACP?-1:enc);
}
else
{
posFound = _invisibleEditView.execute(SCI_SEARCHINTARGET, strlen(htmlHeaderRegExpr2), (LPARAM)htmlHeaderRegExpr2);
if (posFound == -1)
return -1;
//TODO
}
return -1;
}
}
bool Notepad_plus::doReload(BufferID id, bool alert)
{

View File

@ -265,6 +265,7 @@ public:
bool replaceInFiles();
void setFindReplaceFolderFilter(const TCHAR *dir, const TCHAR *filters);
vector<generic_string> addNppComponents(const TCHAR *destDir, const TCHAR *extFilterName, const TCHAR *extFilter);
int getHtmlXmlEncoding(const TCHAR *fileName) const;
static HWND gNppHWND; //static handle to Notepad++ window, NULL if non-existant
private:

View File

@ -377,6 +377,34 @@ ScintillaKeyDefinition scintKeyDefs[] = { //array of accelerator keys for all po
//
};
static bool isInList(const TCHAR *token, const TCHAR *list) {
if ((!token) || (!list))
return false;
TCHAR word[64];
int i = 0;
int j = 0;
for (; i <= int(lstrlen(list)) ; i++)
{
if ((list[i] == ' ')||(list[i] == '\0'))
{
if (j != 0)
{
word[j] = '\0';
j = 0;
if (!generic_stricmp(token, word))
return true;
}
}
else
{
word[j] = list[i];
j++;
}
}
return false;
};
static int strVal(const TCHAR *str, int base) {
if (!str) return -1;
if (!str[0]) return 0;
@ -1988,6 +2016,38 @@ void NppParameters::feedUserLang(TiXmlNode *node)
}
}
LangType NppParameters::getLangFromExt(const TCHAR *ext)
{
int i = getNbLang();
i--;
while (i >= 0)
{
Lang *l = getLangFromIndex(i--);
const TCHAR *defList = l->getDefaultExtList();
const TCHAR *userList = NULL;
LexerStylerArray &lsa = getLStylerArray();
const TCHAR *lName = l->getLangName();
LexerStyler *pLS = lsa.getLexerStylerByName(lName);
if (pLS)
userList = pLS->getLexerUserExt();
generic_string list(TEXT(""));
if (defList)
list += defList;
if (userList)
{
list += TEXT(" ");
list += userList;
}
if (isInList(ext, list.c_str()))
return l->getLangID();
}
return L_TXT;
}
void NppParameters::writeUserDefinedLang()
{
if (!_pXmlUserLangDoc)

View File

@ -1113,6 +1113,8 @@ public:
};
int getNbLang() const {return _nbLang;};
LangType getLangFromExt(const TCHAR *ext);
const TCHAR * getLangExtFromName(const TCHAR *langName) const {
for (int i = 0 ; i < _nbLang ; i++)

View File

@ -34,34 +34,6 @@ const int blockSize = 128 * 1024 + 4;
const int CR = 0x0D;
const int LF = 0x0A;
static bool isInList(const TCHAR *token, const TCHAR *list) {
if ((!token) || (!list))
return false;
TCHAR word[64];
int i = 0;
int j = 0;
for (; i <= int(lstrlen(list)) ; i++)
{
if ((list[i] == ' ')||(list[i] == '\0'))
{
if (j != 0)
{
word[j] = '\0';
j = 0;
if (!generic_stricmp(token, word))
return true;
}
}
else
{
word[j] = list[i];
j++;
}
}
return false;
};
Buffer::Buffer(FileManager * pManager, BufferID id, Document doc, DocFileStatus type, const TCHAR *fileName) //type must be either DOC_REGULAR or DOC_UNNAMED
: _pManager(pManager), _id(id), _isDirty(false), _doc(doc), _isFileReadOnly(false), _isUserReadOnly(false), _recentTag(-1), _references(0),
_canNotify(false), _timeStamp(0), _needReloading(false), _encoding(-1)
@ -84,6 +56,7 @@ Buffer::Buffer(FileManager * pManager, BufferID id, Document doc, DocFileStatus
_canNotify = true;
}
void Buffer::determinateFormat(const char *data) {
_format = WIN_FORMAT;
size_t len = strlen(data);
@ -169,7 +142,7 @@ void Buffer::setFileName(const TCHAR *fn, LangType defaultLang)
else // if it's not user lang, then check if it's supported lang
{
_userLangExt[0] = '\0';
newLang = getLangFromExt(ext);
newLang = pNppParamInst->getLangFromExt(ext);
}
}
@ -278,39 +251,6 @@ std::vector<HeaderLineState> & Buffer::getHeaderLineState(ScintillaEditView * id
return _foldStates.at(index);
}
LangType Buffer::getLangFromExt(const TCHAR *ext)
{
NppParameters *pNppParam = NppParameters::getInstance();
int i = pNppParam->getNbLang();
i--;
while (i >= 0)
{
Lang *l = pNppParam->getLangFromIndex(i--);
const TCHAR *defList = l->getDefaultExtList();
const TCHAR *userList = NULL;
LexerStylerArray &lsa = pNppParam->getLStylerArray();
const TCHAR *lName = l->getLangName();
LexerStyler *pLS = lsa.getLexerStylerByName(lName);
if (pLS)
userList = pLS->getLexerUserExt();
generic_string list(TEXT(""));
if (defList)
list += defList;
if (userList)
{
list += TEXT(" ");
list += userList;
}
if (isInList(ext, list.c_str()))
return l->getLangID();
}
return L_TXT;
}
Lang * Buffer::getCurrentLang() const {
NppParameters *pNppParam = NppParameters::getInstance();
int i = 0;

View File

@ -139,8 +139,6 @@ public :
//Destructor makes sure its purged
Buffer(FileManager * pManager, BufferID id, Document doc, DocFileStatus type, const TCHAR *fileName);
LangType getLangFromExt(const TCHAR *ext);
// this method 1. copies the file name
// 2. determinates the language from the ext of file name
// 3. gets the last modified time