From 9734d81f3243684f4dd9f937ef0b624c50a7d462 Mon Sep 17 00:00:00 2001 From: Udo Hoffmann Date: Sat, 24 Apr 2021 11:02:50 +0200 Subject: [PATCH] Fix a special character in UTF16 file crash issue (regression) Fix UTF iterators end too early. This regression (https://github.com/notepad-plus-plus/notepad-plus-plus/pull/9599#issuecomment-825654605) is introduced via https://github.com/notepad-plus-plus/notepad-plus-plus/commit/38bf76e84312db8e60d51126acf4853201470663 Close #9797 --- PowerEditor/src/Utf8_16.cpp | 22 ++++++++-------------- PowerEditor/src/Utf8_16.h | 7 ++----- 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/PowerEditor/src/Utf8_16.cpp b/PowerEditor/src/Utf8_16.cpp index 2be7ca3de..d12b1c080 100644 --- a/PowerEditor/src/Utf8_16.cpp +++ b/PowerEditor/src/Utf8_16.cpp @@ -629,24 +629,18 @@ void Utf16_Iter::operator++() m_highSurrogate = m_nCur16; } else if (m_nCur16 < 0x80) { - pushout(static_cast(m_nCur16 & 0xFF)); + pushout(static_cast(m_nCur16)); m_eState = eStart; } else if (m_nCur16 < 0x800) { pushout(static_cast(0xC0 | m_nCur16 >> 6)); - m_eState = e2Bytes2; + pushout(0x80 | m_nCur16 & 0x3f); + m_eState = eStart; } else { - pushout(static_cast(0xE0 | m_nCur16 >> 12)); - m_eState = e3Bytes2; + pushout(0xE0 | (m_nCur16 >> 12)); + pushout(0x80 | (m_nCur16 >> 6) & 0x3f); + pushout(0x80 | m_nCur16 & 0x3f); + m_eState = eStart; } - break; - case e2Bytes2: - case e3Bytes3: - pushout(static_cast(0x80 | m_nCur16 & 0x3F)); - m_eState = eStart; - break; - case e3Bytes2: - pushout(static_cast(0x80 | ((m_nCur16 >> 6) & 0x3F))); - m_eState = e3Bytes3; break; case eSurrogate: read(); @@ -657,8 +651,8 @@ void Utf16_Iter::operator++() pushout(0x80 | (code >> 12) & 0x3f); pushout(0x80 | (code >> 6) & 0x3f); pushout(0x80 | code & 0x3f); - m_eState = eStart; } + m_eState = eStart; break; } } diff --git a/PowerEditor/src/Utf8_16.h b/PowerEditor/src/Utf8_16.h index a703ca683..938aaae8e 100644 --- a/PowerEditor/src/Utf8_16.h +++ b/PowerEditor/src/Utf8_16.h @@ -40,9 +40,6 @@ class Utf16_Iter : public Utf8_16 { public: enum eState { eStart, - e2Bytes2, - e3Bytes2, - e3Bytes3, eSurrogate }; @@ -52,7 +49,7 @@ public: bool get(utf8 *c); void operator++(); eState getState() { return m_eState; }; - operator bool() { return m_pRead < m_pEnd; }; + operator bool() { return (m_pRead < m_pEnd) || (m_out1st != m_outLst); }; protected: void read(); @@ -81,7 +78,7 @@ public: bool canGet() const { return m_out1st != m_outLst; } void toStart(); void operator++(); - operator bool() { return m_pRead < m_pEnd; } + operator bool() { return (m_pRead < m_pEnd) || (m_out1st != m_outLst); } protected: enum eState {eStart, eFollow};