diff --git a/PowerEditor/bin/npp.pdb b/PowerEditor/bin/npp.pdb index c1bcafa99..ed624b389 100644 Binary files a/PowerEditor/bin/npp.pdb and b/PowerEditor/bin/npp.pdb differ diff --git a/PowerEditor/src/Notepad_plus.cpp b/PowerEditor/src/Notepad_plus.cpp index bc4792846..314e1bc68 100644 --- a/PowerEditor/src/Notepad_plus.cpp +++ b/PowerEditor/src/Notepad_plus.cpp @@ -5492,9 +5492,14 @@ void Notepad_plus::checkUnicodeMenuItems(UniMode um) const case uni16BE : id = IDM_FORMAT_UCS_2BE; break; case uni16LE : id = IDM_FORMAT_UCS_2LE; break; case uniCookie : id = IDM_FORMAT_AS_UTF_8; break; - default : - id = IDM_FORMAT_ANSI; + case uni8Bit : id = IDM_FORMAT_ANSI; break; } + if (id == -1) //um == uni16BE_NoBOM || um == uni16LE_NoBOM + { + ::CheckMenuRadioItem(_mainMenuHandle, IDM_FORMAT_ANSI, IDM_FORMAT_AS_UTF_8, IDM_FORMAT_ANSI, MF_BYCOMMAND); + ::CheckMenuItem(_mainMenuHandle, IDM_FORMAT_ANSI, MF_UNCHECKED | MF_BYCOMMAND); + } + else ::CheckMenuRadioItem(_mainMenuHandle, IDM_FORMAT_ANSI, IDM_FORMAT_AS_UTF_8, id, MF_BYCOMMAND); } diff --git a/PowerEditor/src/Notepad_plus.h b/PowerEditor/src/Notepad_plus.h index 4a20d1167..8cdd1f690 100644 --- a/PowerEditor/src/Notepad_plus.h +++ b/PowerEditor/src/Notepad_plus.h @@ -522,7 +522,11 @@ private: case uni16BE: uniModeText = TEXT("UCS-2 Big Endian"); break; case uni16LE: - uniModeText = TEXT("UCS-2 little Endian"); break; + uniModeText = TEXT("UCS-2 Little Endian"); break; + case uni16BE_NoBOM: + uniModeText = TEXT("UCS-2 BE w/o BOM"); break; + case uni16LE_NoBOM: + uniModeText = TEXT("UCS-2 LE w/o BOM"); break; case uniCookie: uniModeText = TEXT("ANSI as UTF-8"); break; default : diff --git a/PowerEditor/src/Parameters.h b/PowerEditor/src/Parameters.h index c71f629e3..bcccd8c8e 100644 --- a/PowerEditor/src/Parameters.h +++ b/PowerEditor/src/Parameters.h @@ -56,7 +56,7 @@ const int TAB_MULTILINE = 128; // 1000 0000 const int TAB_HIDE = 256; //1 0000 0000 enum formatType {WIN_FORMAT, MAC_FORMAT, UNIX_FORMAT}; -enum UniMode {uni8Bit=0, uniUTF8=1, uni16BE=2, uni16LE=3, uniCookie=4, uni7Bit=5, uniEnd}; +enum UniMode {uni8Bit=0, uniUTF8=1, uni16BE=2, uni16LE=3, uniCookie=4, uni7Bit=5, uni16BE_NoBOM=6, uni16LE_NoBOM=7, uniEnd}; enum ChangeDetect {cdDisabled=0, cdEnabled=1, cdAutoUpdate=2, cdGo2end=3, cdAutoUpdateGo2end=4}; enum BackupFeature {bak_none = 0, bak_simple = 1, bak_verbose = 2}; enum OpenSaveDirSetting {dir_followCurrent = 0, dir_last = 1, dir_userDef = 2}; diff --git a/PowerEditor/src/Utf8_16.cpp b/PowerEditor/src/Utf8_16.cpp index ad2357a30..948264704 100644 --- a/PowerEditor/src/Utf8_16.cpp +++ b/PowerEditor/src/Utf8_16.cpp @@ -41,7 +41,7 @@ Utf8_16_Read::Utf8_16_Read() { Utf8_16_Read::~Utf8_16_Read() { - if ((m_eEncoding == uni16BE) || (m_eEncoding == uni16LE)) + if ((m_eEncoding == uni16BE) || (m_eEncoding == uni16LE) || (m_eEncoding == uni16BE_NoBOM) || (m_eEncoding == uni16LE_NoBOM)) { delete [] m_pNewBuf; m_pNewBuf = NULL; @@ -146,6 +146,8 @@ size_t Utf8_16_Read::convert(char* buf, size_t len) ret = len - nSkip; break; } + case uni16BE_NoBOM: + case uni16LE_NoBOM: case uni16BE: case uni16LE: { size_t newSize = len + len / 2 + 1; @@ -186,22 +188,37 @@ void Utf8_16_Read::determineEncoding() m_eEncoding = uni8Bit; m_nSkip = 0; + // detect UTF-16 big-endian with BOM if (m_nLen > 1 && m_pBuf[0] == k_Boms[uni16BE][0] && m_pBuf[1] == k_Boms[uni16BE][1]) { m_eEncoding = uni16BE; m_nSkip = 2; } + // detect UTF-16 little-endian with BOM else if (m_nLen > 1 && m_pBuf[0] == k_Boms[uni16LE][0] && m_pBuf[1] == k_Boms[uni16LE][1]) { m_eEncoding = uni16LE; m_nSkip = 2; } + // detect UTF-8 with BOM else if (m_nLen > 2 && m_pBuf[0] == k_Boms[uniUTF8][0] && m_pBuf[1] == k_Boms[uniUTF8][1] && m_pBuf[2] == k_Boms[uniUTF8][2]) { m_eEncoding = uniUTF8; m_nSkip = 3; } + // try to detect UTF-16 little-endian without BOM + else if (m_nLen > 1 && m_pBuf[0] != NULL && m_pBuf[1] == NULL && IsTextUnicode(m_pBuf, m_nLen, NULL)) + { + m_eEncoding = uni16LE_NoBOM; + m_nSkip = 0; + } + // try to detect UTF-16 big-endian without BOM + else if (m_nLen > 1 && m_pBuf[0] == NULL && m_pBuf[1] != NULL) + { + m_eEncoding = uni16BE_NoBOM; + m_nSkip = 0; + } else { u78 detectedEncoding = utf8_7bits_8bits(); @@ -281,6 +298,8 @@ size_t Utf8_16_Write::fwrite(const void* p, size_t _size) ret = ::fwrite(p, _size, 1, m_pFile); break; } + case uni16BE_NoBOM: + case uni16LE_NoBOM: case uni16BE: case uni16LE: { if (_size > m_nBufSize) @@ -338,6 +357,8 @@ size_t Utf8_16_Write::convert(char* p, size_t _size) memcpy(&m_pNewBuf[3], p, _size); break; } + case uni16BE_NoBOM: + case uni16LE_NoBOM: case uni16BE: case uni16LE: { m_pNewBuf = (ubyte*)new ubyte[sizeof(utf16) * (_size + 1)]; @@ -442,7 +463,7 @@ void Utf8_Iter::operator++() void Utf8_Iter::toStart() { m_eState = eStart; - if (m_eEncoding == uni16BE) + if (m_eEncoding == uni16BE || m_eEncoding == uni16BE_NoBOM) { swap(); } @@ -492,10 +513,13 @@ void Utf16_Iter::operator++() switch (m_eState) { case eStart: - if (m_eEncoding == uni16LE) { + if (m_eEncoding == uni16LE || m_eEncoding == uni16LE_NoBOM) + { m_nCur16 = *m_pRead++; m_nCur16 |= static_cast(*m_pRead << 8); - } else { + } + else //(m_eEncoding == uni16BE || m_eEncoding == uni16BE_NoBOM) + { m_nCur16 = static_cast(*m_pRead++ << 8); m_nCur16 |= *m_pRead; }