From 02cc028cd807dd3e0daef7b947ba1fe131c16882 Mon Sep 17 00:00:00 2001 From: Don Ho Date: Wed, 6 Sep 2023 02:45:58 +0200 Subject: [PATCH] Fix eventual memory leak in Utf8_16.cpp Fix #14099, close #14104 --- PowerEditor/src/Utf8_16.cpp | 200 +++++++++++++++++++++++------------- 1 file changed, 131 insertions(+), 69 deletions(-) diff --git a/PowerEditor/src/Utf8_16.cpp b/PowerEditor/src/Utf8_16.cpp index acf8c3878..91160ec09 100644 --- a/PowerEditor/src/Utf8_16.cpp +++ b/PowerEditor/src/Utf8_16.cpp @@ -70,10 +70,12 @@ u78 Utf8_16_Read::utf8_7bits_8bits() else if ((*sx & (0x80+0x40+0x20)) == (0x80+0x40)) { // 110xxxvv 10nnnnnn, 11 bit character ASCII7only=0; - if (std::distance(sx, endx) < 2) { + if (std::distance(sx, endx) < 2) + { rv=0; break; } - if ( (sx[1]&(0x80+0x40)) != 0x80) { + if ( (sx[1]&(0x80+0x40)) != 0x80) + { rv=0; break; } sx+=2; @@ -81,10 +83,12 @@ u78 Utf8_16_Read::utf8_7bits_8bits() else if ((*sx & (0x80+0x40+0x20+0x10)) == (0x80+0x40+0x20)) { // 1110qqqq 10xxxxvv 10nnnnnn, 16 bit character ASCII7only=0; - if (std::distance(sx, endx) < 3) { + if (std::distance(sx, endx) < 3) + { rv=0; break; } - if ((sx[1]&(0x80+0x40)) != 0x80 || (sx[2]&(0x80+0x40)) != 0x80) { + if ((sx[1]&(0x80+0x40)) != 0x80 || (sx[2]&(0x80+0x40)) != 0x80) + { rv=0; break; } sx+=3; @@ -92,10 +96,12 @@ u78 Utf8_16_Read::utf8_7bits_8bits() else if ((*sx & (0x80+0x40+0x20+0x10+0x8)) == (0x80+0x40+0x20+0x10)) { // 11110qqq 10xxxxvv 10nnnnnn 10mmmmmm, 21 bit character ASCII7only=0; - if (std::distance(sx, endx) < 4) { + if (std::distance(sx, endx) < 4) + { rv=0; break; } - if ((sx[1]&(0x80+0x40)) != 0x80 || (sx[2]&(0x80+0x40)) != 0x80 || (sx[3]&(0x80+0x40)) != 0x80) { + if ((sx[1]&(0x80+0x40)) != 0x80 || (sx[2]&(0x80+0x40)) != 0x80 || (sx[3]&(0x80+0x40)) != 0x80) + { rv=0; break; } sx+=4; @@ -134,24 +140,29 @@ size_t Utf8_16_Read::convert(char* buf, size_t len) { case uni7Bit: case uni8Bit: - case uniCookie: { + case uniCookie: + { // Do nothing, pass through m_nAllocatedBufSize = 0; m_pNewBuf = m_pBuf; m_nNewBufSize = len; - break; } - case uniUTF8: { + break; + + case uniUTF8: + { // Pass through after BOM m_nAllocatedBufSize = 0; m_pNewBuf = m_pBuf + nSkip; m_nNewBufSize = len - nSkip; - break; - } + } + break; + case uni16BE_NoBOM: case uni16LE_NoBOM: case uni16BE: - case uni16LE: { + case uni16LE: + { size_t newSize = (len + len % 2) + (len + len % 2) / 2; if (m_nAllocatedBufSize != newSize) @@ -175,9 +186,9 @@ size_t Utf8_16_Read::convert(char* buf, size_t len) *pCur++ = c; } m_nNewBufSize = pCur - m_pNewBuf; - - break; } + break; + default: break; } @@ -311,19 +322,26 @@ bool Utf8_16_Write::writeFile(const void* p, size_t _size) { switch (m_eEncoding) { - case uniUTF8: { + case uniUTF8: + { if (!m_pFile->write(k_Boms[m_eEncoding], 3)) return false; - break; - } + } + break; + case uni16BE: case uni16LE: - if (!m_pFile->write(k_Boms[m_eEncoding], 2)) + { + if (!m_pFile->write(k_Boms[m_eEncoding], 2)) return false; - break; + } + break; + default: - // nothing to do - break; + { + // nothing to do + } + break; } m_bFirstWrite = false; } @@ -335,16 +353,20 @@ bool Utf8_16_Write::writeFile(const void* p, size_t _size) case uni7Bit: case uni8Bit: case uniCookie: - case uniUTF8: { + case uniUTF8: + { // Normal write if (m_pFile->write(p, _size)) isOK = true; - break; + } + break; + case uni16BE_NoBOM: case uni16LE_NoBOM: case uni16BE: - case uni16LE: { + case uni16LE: + { static const unsigned int bufSize = 64*1024; utf16* buf = new utf16[bufSize]; @@ -352,21 +374,29 @@ bool Utf8_16_Write::writeFile(const void* p, size_t _size) iter8.set(static_cast(p), _size, m_eEncoding); unsigned int bufIndex = 0; - while (iter8) { + while (iter8) + { ++iter8; while ((bufIndex < bufSize) && iter8.canGet()) iter8.get(&buf [bufIndex++]); - if (bufIndex == bufSize || !iter8) { - if (!m_pFile->write(buf, bufIndex*sizeof(utf16))) return 0; + if (bufIndex == bufSize || !iter8) + { + if (!m_pFile->write(buf, bufIndex * sizeof(utf16))) + { + delete[] buf; + return 0; + } bufIndex = 0; } } isOK = true; delete[] buf; - break; - } - default: + + } + break; + + default: break; } @@ -386,20 +416,24 @@ size_t Utf8_16_Write::convert(char* p, size_t _size) { case uni7Bit: case uni8Bit: - case uniCookie: { + case uniCookie: + { // Normal write m_nBufSize = _size; m_pNewBuf = (ubyte*)new ubyte[m_nBufSize]; memcpy(m_pNewBuf, p, _size); - break; } - case uniUTF8: { + break; + + case uniUTF8: + { m_nBufSize = _size + 3; m_pNewBuf = (ubyte*)new ubyte[m_nBufSize]; memcpy(m_pNewBuf, k_Boms[m_eEncoding], 3); memcpy(&m_pNewBuf[3], p, _size); - break; } + break; + case uni16BE_NoBOM: case uni16LE_NoBOM: case uni16BE: @@ -407,12 +441,15 @@ size_t Utf8_16_Write::convert(char* p, size_t _size) { utf16* pCur = NULL; - if (m_eEncoding == uni16BE || m_eEncoding == uni16LE) { + if (m_eEncoding == uni16BE || m_eEncoding == uni16LE) + { // Write the BOM m_pNewBuf = (ubyte*)new ubyte[sizeof(utf16) * (_size + 1)]; memcpy(m_pNewBuf, k_Boms[m_eEncoding], 2); pCur = (utf16*)&m_pNewBuf[2]; - } else { + } + else + { m_pNewBuf = (ubyte*)new ubyte[sizeof(utf16) * _size]; pCur = (utf16*)m_pNewBuf; } @@ -420,14 +457,17 @@ size_t Utf8_16_Write::convert(char* p, size_t _size) Utf8_Iter iter8; iter8.set(reinterpret_cast(p), _size, m_eEncoding); - for (; iter8; ++iter8) { - if (iter8.canGet()) { + for (; iter8; ++iter8) + { + if (iter8.canGet()) + { iter8.get(pCur++); } } m_nBufSize = (const char*)pCur - (const char*)m_pNewBuf; - break; } + break; + default: break; } @@ -499,30 +539,41 @@ void Utf8_Iter::operator++() switch (m_eState) { case eStart: - if (*m_pRead < 0x80) { - m_code = *m_pRead; + { + if (*m_pRead < 0x80) + { + m_code = *m_pRead; toStart(); - } else if (*m_pRead < 0xE0) { - m_code = static_cast(0x1f & *m_pRead); - m_eState = eFollow; + } + else if (*m_pRead < 0xE0) + { + m_code = static_cast(0x1f & *m_pRead); + m_eState = eFollow; m_count = 1; - } else if (*m_pRead < 0xF0) { + } + else if (*m_pRead < 0xF0) + { m_code = static_cast(0x0f & *m_pRead); m_eState = eFollow; m_count = 2; - } else { - m_code = static_cast(0x07 & *m_pRead); - m_eState = eFollow; + } + else + { + m_code = static_cast(0x07 & *m_pRead); + m_eState = eFollow; m_count = 3; - } - break; + } + } + break; case eFollow: - m_code = (m_code << 6) | static_cast(0x3F & *m_pRead); + { + m_code = (m_code << 6) | static_cast(0x3F & *m_pRead); m_count--; if (m_count == 0) toStart(); - break; + } + break; } ++m_pRead; } @@ -625,36 +676,47 @@ void Utf16_Iter::operator++() switch (m_eState) { case eStart: + { read(); - if ((m_nCur16 >= 0xd800) && (m_nCur16 < 0xdc00)) { + if ((m_nCur16 >= 0xd800) && (m_nCur16 < 0xdc00)) + { m_eState = eSurrogate; m_highSurrogate = m_nCur16; } - else if (m_nCur16 < 0x80) { - pushout(static_cast(m_nCur16)); - m_eState = eStart; - } else if (m_nCur16 < 0x800) { - pushout(static_cast(0xC0 | m_nCur16 >> 6)); - pushout(static_cast(0x80 | (m_nCur16 & 0x3f))); - m_eState = eStart; - } else { - pushout(static_cast(0xE0 | (m_nCur16 >> 12))); - pushout(static_cast(0x80 | ((m_nCur16 >> 6) & 0x3f))); - pushout(static_cast(0x80 | (m_nCur16 & 0x3f))); - m_eState = eStart; - } - break; + else if (m_nCur16 < 0x80) + { + pushout(static_cast(m_nCur16)); + m_eState = eStart; + } + else if (m_nCur16 < 0x800) + { + pushout(static_cast(0xC0 | m_nCur16 >> 6)); + pushout(static_cast(0x80 | (m_nCur16 & 0x3f))); + m_eState = eStart; + } + else + { + pushout(static_cast(0xE0 | (m_nCur16 >> 12))); + pushout(static_cast(0x80 | ((m_nCur16 >> 6) & 0x3f))); + pushout(static_cast(0x80 | (m_nCur16 & 0x3f))); + m_eState = eStart; + } + } + break; + case eSurrogate: + { read(); if ((m_nCur16 >= 0xDC00) && (m_nCur16 < 0xE000)) { // valid surrogate pair UINT code = 0x10000 + ((m_highSurrogate & 0x3ff) << 10) + (m_nCur16 & 0x3ff); pushout(0xf0 | ((code >> 18) & 0x07)); pushout(0x80 | ((code >> 12) & 0x3f)); - pushout(0x80 | ((code >> 6) & 0x3f)); + pushout(0x80 | ((code >> 6) & 0x3f)); pushout(0x80 | (code & 0x3f)); } m_eState = eStart; - break; + } + break; } }