diff --git a/PowerEditor/bin/Nppold.exe b/PowerEditor/bin/Nppold.exe new file mode 100644 index 000000000..122ec558e Binary files /dev/null and b/PowerEditor/bin/Nppold.exe differ diff --git a/PowerEditor/src/ScitillaComponent/Buffer.cpp b/PowerEditor/src/ScitillaComponent/Buffer.cpp index 2ef011ed1..fa42d90ba 100644 --- a/PowerEditor/src/ScitillaComponent/Buffer.cpp +++ b/PowerEditor/src/ScitillaComponent/Buffer.cpp @@ -26,6 +26,7 @@ // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. #include +#include #include #include #include "Buffer.h" @@ -592,8 +593,8 @@ BufferID FileManager::loadFile(const TCHAR * filename, Document doc, int encodin char data[blockSize + 8]; // +8 for incomplete multibyte char FormatType bkformat = FormatType::unknown; - - bool res = loadFileData(doc, backupFileName?backupFileName:fullpath, data, &UnicodeConvertor, L_TEXT, encoding, &bkformat); + LangType detectedLang = L_TEXT; + bool res = loadFileData(doc, backupFileName ? backupFileName : fullpath, data, &UnicodeConvertor, detectedLang, encoding, &bkformat); if (res) { Buffer* newBuf = new Buffer(this, _nextBufferID, doc, DOC_REGULAR, fullpath); @@ -620,6 +621,11 @@ BufferID FileManager::loadFile(const TCHAR * filename, Document doc, int encodin buf->setUnicodeMode(ndds._unicodeMode); buf->setEncoding(-1); + // if a language has been detected, and the detected value is different from the file extension, + // we use the detected value + if (detectedLang != L_TEXT && detectedLang != buf->getLangType()) + buf->setLangType(detectedLang); + if (encoding == -1) { // 3 formats : WIN_FORMAT, UNIX_FORMAT and MAC_FORMAT @@ -667,8 +673,9 @@ bool FileManager::reloadBuffer(BufferID id) int encoding = buf->getEncoding(); char data[blockSize + 8]; // +8 for incomplete multibyte char FormatType bkformat; + LangType lang = buf->getLangType(); - bool res = loadFileData(doc, buf->getFullPathName(), data, &UnicodeConvertor, buf->getLangType(), encoding, &bkformat); + bool res = loadFileData(doc, buf->getFullPathName(), data, &UnicodeConvertor, lang, encoding, &bkformat); buf->_canNotify = true; if (res) @@ -1245,8 +1252,60 @@ int FileManager::detectCodepage(char* buf, size_t len) return codepage; } +LangType FileManager::detectLanguageFromTextBegining(const unsigned char *data, unsigned int dataLen) +{ + // it detectes xml, php and bash script file + std::string xmlHeader = ""; // length : 6 + std::string htmlHeader1 = ""; // length : 15 + + const size_t longestLength = htmlHeader1.length(); // longest length - html header Length + size_t i = 0; + + for (; i < dataLen; ++i) + { + if (data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r') + break; + } + + std::string buf2Test = std::string((const char *)data + i, longestLength); + + auto res = std::mismatch(bashHeader.begin(), bashHeader.end(), buf2Test.begin()); + if (res.first == bashHeader.end()) + { + return L_BASH; + } + + res = std::mismatch(phpHeader.begin(), phpHeader.end(), buf2Test.begin()); + if (res.first == phpHeader.end()) + { + return L_PHP; + } + + res = std::mismatch(xmlHeader.begin(), xmlHeader.end(), buf2Test.begin()); + if (res.first == xmlHeader.end()) + { + return L_XML; + } + + res = std::mismatch(htmlHeader1.begin(), htmlHeader1.end(), buf2Test.begin()); + if (res.first == htmlHeader1.end()) + { + return L_HTML; + } + res = std::mismatch(htmlHeader2.begin(), htmlHeader2.end(), buf2Test.begin()); + if (res.first == htmlHeader2.end()) + { + return L_HTML; + } + + return L_TEXT; +} + inline bool FileManager::loadFileData(Document doc, const TCHAR * filename, char* data, Utf8_16_Read * UnicodeConvertor, - LangType language, int & encoding, FormatType* pFormat) + LangType & language, int & encoding, FormatType* pFormat) { FILE *fp = generic_fopen(filename, TEXT("rb")); if (!fp) @@ -1319,9 +1378,9 @@ inline bool FileManager::loadFileData(Document doc, const TCHAR * filename, char lenFile = fread(data+incompleteMultibyteChar, 1, blockSize-incompleteMultibyteChar, fp) + incompleteMultibyteChar; if (lenFile == 0) break; - // check if file contain any BOM if (isFirstTime) { + // check if file contain any BOM if (Utf8_16_Read::determineEncoding((unsigned char *)data, lenFile) != uni8Bit) { // if file contains any BOM, then encoding will be erased, @@ -1333,6 +1392,13 @@ inline bool FileManager::loadFileData(Document doc, const TCHAR * filename, char if (NppParameters::getInstance()->getNppGUI()._detectEncoding) encoding = detectCodepage(data, lenFile); } + + if (language == L_TEXT) + { + // check the language du fichier + language = detectLanguageFromTextBegining((unsigned char *)data, lenFile); + } + isFirstTime = false; } diff --git a/PowerEditor/src/ScitillaComponent/Buffer.h b/PowerEditor/src/ScitillaComponent/Buffer.h index cc0e91926..5baa1ede0 100644 --- a/PowerEditor/src/ScitillaComponent/Buffer.h +++ b/PowerEditor/src/ScitillaComponent/Buffer.h @@ -111,7 +111,8 @@ public: private: ~FileManager(); int detectCodepage(char* buf, size_t len); - bool loadFileData(Document doc, const TCHAR* filename, char* buffer, Utf8_16_Read* UnicodeConvertor, LangType language, int& encoding, FormatType* pFormat = nullptr); + bool loadFileData(Document doc, const TCHAR* filename, char* buffer, Utf8_16_Read* UnicodeConvertor, LangType & language, int& encoding, FormatType* pFormat = nullptr); + LangType detectLanguageFromTextBegining(const unsigned char *data, unsigned int dataLen); private: