Fix eventual memory leak in Utf8_16.cpp

Fix #14099, close #14104
This commit is contained in:
Don Ho 2023-09-06 02:45:58 +02:00
parent 6eb74a9653
commit 02cc028cd8
1 changed files with 131 additions and 69 deletions

View File

@ -70,10 +70,12 @@ u78 Utf8_16_Read::utf8_7bits_8bits()
else if ((*sx & (0x80+0x40+0x20)) == (0x80+0x40)) else if ((*sx & (0x80+0x40+0x20)) == (0x80+0x40))
{ // 110xxxvv 10nnnnnn, 11 bit character { // 110xxxvv 10nnnnnn, 11 bit character
ASCII7only=0; ASCII7only=0;
if (std::distance(sx, endx) < 2) { if (std::distance(sx, endx) < 2)
{
rv=0; break; rv=0; break;
} }
if ( (sx[1]&(0x80+0x40)) != 0x80) { if ( (sx[1]&(0x80+0x40)) != 0x80)
{
rv=0; break; rv=0; break;
} }
sx+=2; sx+=2;
@ -81,10 +83,12 @@ u78 Utf8_16_Read::utf8_7bits_8bits()
else if ((*sx & (0x80+0x40+0x20+0x10)) == (0x80+0x40+0x20)) else if ((*sx & (0x80+0x40+0x20+0x10)) == (0x80+0x40+0x20))
{ // 1110qqqq 10xxxxvv 10nnnnnn, 16 bit character { // 1110qqqq 10xxxxvv 10nnnnnn, 16 bit character
ASCII7only=0; ASCII7only=0;
if (std::distance(sx, endx) < 3) { if (std::distance(sx, endx) < 3)
{
rv=0; break; rv=0; break;
} }
if ((sx[1]&(0x80+0x40)) != 0x80 || (sx[2]&(0x80+0x40)) != 0x80) { if ((sx[1]&(0x80+0x40)) != 0x80 || (sx[2]&(0x80+0x40)) != 0x80)
{
rv=0; break; rv=0; break;
} }
sx+=3; sx+=3;
@ -92,10 +96,12 @@ u78 Utf8_16_Read::utf8_7bits_8bits()
else if ((*sx & (0x80+0x40+0x20+0x10+0x8)) == (0x80+0x40+0x20+0x10)) else if ((*sx & (0x80+0x40+0x20+0x10+0x8)) == (0x80+0x40+0x20+0x10))
{ // 11110qqq 10xxxxvv 10nnnnnn 10mmmmmm, 21 bit character { // 11110qqq 10xxxxvv 10nnnnnn 10mmmmmm, 21 bit character
ASCII7only=0; ASCII7only=0;
if (std::distance(sx, endx) < 4) { if (std::distance(sx, endx) < 4)
{
rv=0; break; rv=0; break;
} }
if ((sx[1]&(0x80+0x40)) != 0x80 || (sx[2]&(0x80+0x40)) != 0x80 || (sx[3]&(0x80+0x40)) != 0x80) { if ((sx[1]&(0x80+0x40)) != 0x80 || (sx[2]&(0x80+0x40)) != 0x80 || (sx[3]&(0x80+0x40)) != 0x80)
{
rv=0; break; rv=0; break;
} }
sx+=4; sx+=4;
@ -134,24 +140,29 @@ size_t Utf8_16_Read::convert(char* buf, size_t len)
{ {
case uni7Bit: case uni7Bit:
case uni8Bit: case uni8Bit:
case uniCookie: { case uniCookie:
{
// Do nothing, pass through // Do nothing, pass through
m_nAllocatedBufSize = 0; m_nAllocatedBufSize = 0;
m_pNewBuf = m_pBuf; m_pNewBuf = m_pBuf;
m_nNewBufSize = len; m_nNewBufSize = len;
break;
} }
case uniUTF8: { break;
case uniUTF8:
{
// Pass through after BOM // Pass through after BOM
m_nAllocatedBufSize = 0; m_nAllocatedBufSize = 0;
m_pNewBuf = m_pBuf + nSkip; m_pNewBuf = m_pBuf + nSkip;
m_nNewBufSize = len - nSkip; m_nNewBufSize = len - nSkip;
break; }
} break;
case uni16BE_NoBOM: case uni16BE_NoBOM:
case uni16LE_NoBOM: case uni16LE_NoBOM:
case uni16BE: case uni16BE:
case uni16LE: { case uni16LE:
{
size_t newSize = (len + len % 2) + (len + len % 2) / 2; size_t newSize = (len + len % 2) + (len + len % 2) / 2;
if (m_nAllocatedBufSize != newSize) if (m_nAllocatedBufSize != newSize)
@ -175,9 +186,9 @@ size_t Utf8_16_Read::convert(char* buf, size_t len)
*pCur++ = c; *pCur++ = c;
} }
m_nNewBufSize = pCur - m_pNewBuf; m_nNewBufSize = pCur - m_pNewBuf;
break;
} }
break;
default: default:
break; break;
} }
@ -311,19 +322,26 @@ bool Utf8_16_Write::writeFile(const void* p, size_t _size)
{ {
switch (m_eEncoding) switch (m_eEncoding)
{ {
case uniUTF8: { case uniUTF8:
{
if (!m_pFile->write(k_Boms[m_eEncoding], 3)) if (!m_pFile->write(k_Boms[m_eEncoding], 3))
return false; return false;
break; }
} break;
case uni16BE: case uni16BE:
case uni16LE: case uni16LE:
if (!m_pFile->write(k_Boms[m_eEncoding], 2)) {
if (!m_pFile->write(k_Boms[m_eEncoding], 2))
return false; return false;
break; }
break;
default: default:
// nothing to do {
break; // nothing to do
}
break;
} }
m_bFirstWrite = false; m_bFirstWrite = false;
} }
@ -335,16 +353,20 @@ bool Utf8_16_Write::writeFile(const void* p, size_t _size)
case uni7Bit: case uni7Bit:
case uni8Bit: case uni8Bit:
case uniCookie: case uniCookie:
case uniUTF8: { case uniUTF8:
{
// Normal write // Normal write
if (m_pFile->write(p, _size)) if (m_pFile->write(p, _size))
isOK = true; isOK = true;
break;
} }
break;
case uni16BE_NoBOM: case uni16BE_NoBOM:
case uni16LE_NoBOM: case uni16LE_NoBOM:
case uni16BE: case uni16BE:
case uni16LE: { case uni16LE:
{
static const unsigned int bufSize = 64*1024; static const unsigned int bufSize = 64*1024;
utf16* buf = new utf16[bufSize]; utf16* buf = new utf16[bufSize];
@ -352,21 +374,29 @@ bool Utf8_16_Write::writeFile(const void* p, size_t _size)
iter8.set(static_cast<const ubyte*>(p), _size, m_eEncoding); iter8.set(static_cast<const ubyte*>(p), _size, m_eEncoding);
unsigned int bufIndex = 0; unsigned int bufIndex = 0;
while (iter8) { while (iter8)
{
++iter8; ++iter8;
while ((bufIndex < bufSize) && iter8.canGet()) while ((bufIndex < bufSize) && iter8.canGet())
iter8.get(&buf [bufIndex++]); iter8.get(&buf [bufIndex++]);
if (bufIndex == bufSize || !iter8) { if (bufIndex == bufSize || !iter8)
if (!m_pFile->write(buf, bufIndex*sizeof(utf16))) return 0; {
if (!m_pFile->write(buf, bufIndex * sizeof(utf16)))
{
delete[] buf;
return 0;
}
bufIndex = 0; bufIndex = 0;
} }
} }
isOK = true; isOK = true;
delete[] buf; delete[] buf;
break;
} }
default: break;
default:
break; break;
} }
@ -386,20 +416,24 @@ size_t Utf8_16_Write::convert(char* p, size_t _size)
{ {
case uni7Bit: case uni7Bit:
case uni8Bit: case uni8Bit:
case uniCookie: { case uniCookie:
{
// Normal write // Normal write
m_nBufSize = _size; m_nBufSize = _size;
m_pNewBuf = (ubyte*)new ubyte[m_nBufSize]; m_pNewBuf = (ubyte*)new ubyte[m_nBufSize];
memcpy(m_pNewBuf, p, _size); memcpy(m_pNewBuf, p, _size);
break;
} }
case uniUTF8: { break;
case uniUTF8:
{
m_nBufSize = _size + 3; m_nBufSize = _size + 3;
m_pNewBuf = (ubyte*)new ubyte[m_nBufSize]; m_pNewBuf = (ubyte*)new ubyte[m_nBufSize];
memcpy(m_pNewBuf, k_Boms[m_eEncoding], 3); memcpy(m_pNewBuf, k_Boms[m_eEncoding], 3);
memcpy(&m_pNewBuf[3], p, _size); memcpy(&m_pNewBuf[3], p, _size);
break;
} }
break;
case uni16BE_NoBOM: case uni16BE_NoBOM:
case uni16LE_NoBOM: case uni16LE_NoBOM:
case uni16BE: case uni16BE:
@ -407,12 +441,15 @@ size_t Utf8_16_Write::convert(char* p, size_t _size)
{ {
utf16* pCur = NULL; utf16* pCur = NULL;
if (m_eEncoding == uni16BE || m_eEncoding == uni16LE) { if (m_eEncoding == uni16BE || m_eEncoding == uni16LE)
{
// Write the BOM // Write the BOM
m_pNewBuf = (ubyte*)new ubyte[sizeof(utf16) * (_size + 1)]; m_pNewBuf = (ubyte*)new ubyte[sizeof(utf16) * (_size + 1)];
memcpy(m_pNewBuf, k_Boms[m_eEncoding], 2); memcpy(m_pNewBuf, k_Boms[m_eEncoding], 2);
pCur = (utf16*)&m_pNewBuf[2]; pCur = (utf16*)&m_pNewBuf[2];
} else { }
else
{
m_pNewBuf = (ubyte*)new ubyte[sizeof(utf16) * _size]; m_pNewBuf = (ubyte*)new ubyte[sizeof(utf16) * _size];
pCur = (utf16*)m_pNewBuf; pCur = (utf16*)m_pNewBuf;
} }
@ -420,14 +457,17 @@ size_t Utf8_16_Write::convert(char* p, size_t _size)
Utf8_Iter iter8; Utf8_Iter iter8;
iter8.set(reinterpret_cast<const ubyte*>(p), _size, m_eEncoding); iter8.set(reinterpret_cast<const ubyte*>(p), _size, m_eEncoding);
for (; iter8; ++iter8) { for (; iter8; ++iter8)
if (iter8.canGet()) { {
if (iter8.canGet())
{
iter8.get(pCur++); iter8.get(pCur++);
} }
} }
m_nBufSize = (const char*)pCur - (const char*)m_pNewBuf; m_nBufSize = (const char*)pCur - (const char*)m_pNewBuf;
break;
} }
break;
default: default:
break; break;
} }
@ -499,30 +539,41 @@ void Utf8_Iter::operator++()
switch (m_eState) switch (m_eState)
{ {
case eStart: case eStart:
if (*m_pRead < 0x80) { {
m_code = *m_pRead; if (*m_pRead < 0x80)
{
m_code = *m_pRead;
toStart(); toStart();
} else if (*m_pRead < 0xE0) { }
m_code = static_cast<utf16>(0x1f & *m_pRead); else if (*m_pRead < 0xE0)
m_eState = eFollow; {
m_code = static_cast<utf16>(0x1f & *m_pRead);
m_eState = eFollow;
m_count = 1; m_count = 1;
} else if (*m_pRead < 0xF0) { }
else if (*m_pRead < 0xF0)
{
m_code = static_cast<utf16>(0x0f & *m_pRead); m_code = static_cast<utf16>(0x0f & *m_pRead);
m_eState = eFollow; m_eState = eFollow;
m_count = 2; m_count = 2;
} else { }
m_code = static_cast<utf16>(0x07 & *m_pRead); else
m_eState = eFollow; {
m_code = static_cast<utf16>(0x07 & *m_pRead);
m_eState = eFollow;
m_count = 3; m_count = 3;
} }
break; }
break;
case eFollow: case eFollow:
m_code = (m_code << 6) | static_cast<utf8>(0x3F & *m_pRead); {
m_code = (m_code << 6) | static_cast<utf8>(0x3F & *m_pRead);
m_count--; m_count--;
if (m_count == 0) if (m_count == 0)
toStart(); toStart();
break; }
break;
} }
++m_pRead; ++m_pRead;
} }
@ -625,36 +676,47 @@ void Utf16_Iter::operator++()
switch (m_eState) switch (m_eState)
{ {
case eStart: case eStart:
{
read(); read();
if ((m_nCur16 >= 0xd800) && (m_nCur16 < 0xdc00)) { if ((m_nCur16 >= 0xd800) && (m_nCur16 < 0xdc00))
{
m_eState = eSurrogate; m_eState = eSurrogate;
m_highSurrogate = m_nCur16; m_highSurrogate = m_nCur16;
} }
else if (m_nCur16 < 0x80) { else if (m_nCur16 < 0x80)
pushout(static_cast<ubyte>(m_nCur16)); {
m_eState = eStart; pushout(static_cast<ubyte>(m_nCur16));
} else if (m_nCur16 < 0x800) { m_eState = eStart;
pushout(static_cast<ubyte>(0xC0 | m_nCur16 >> 6)); }
pushout(static_cast<ubyte>(0x80 | (m_nCur16 & 0x3f))); else if (m_nCur16 < 0x800)
m_eState = eStart; {
} else { pushout(static_cast<ubyte>(0xC0 | m_nCur16 >> 6));
pushout(static_cast<ubyte>(0xE0 | (m_nCur16 >> 12))); pushout(static_cast<ubyte>(0x80 | (m_nCur16 & 0x3f)));
pushout(static_cast<ubyte>(0x80 | ((m_nCur16 >> 6) & 0x3f))); m_eState = eStart;
pushout(static_cast<ubyte>(0x80 | (m_nCur16 & 0x3f))); }
m_eState = eStart; else
} {
break; pushout(static_cast<ubyte>(0xE0 | (m_nCur16 >> 12)));
pushout(static_cast<ubyte>(0x80 | ((m_nCur16 >> 6) & 0x3f)));
pushout(static_cast<ubyte>(0x80 | (m_nCur16 & 0x3f)));
m_eState = eStart;
}
}
break;
case eSurrogate: case eSurrogate:
{
read(); read();
if ((m_nCur16 >= 0xDC00) && (m_nCur16 < 0xE000)) if ((m_nCur16 >= 0xDC00) && (m_nCur16 < 0xE000))
{ // valid surrogate pair { // valid surrogate pair
UINT code = 0x10000 + ((m_highSurrogate & 0x3ff) << 10) + (m_nCur16 & 0x3ff); UINT code = 0x10000 + ((m_highSurrogate & 0x3ff) << 10) + (m_nCur16 & 0x3ff);
pushout(0xf0 | ((code >> 18) & 0x07)); pushout(0xf0 | ((code >> 18) & 0x07));
pushout(0x80 | ((code >> 12) & 0x3f)); pushout(0x80 | ((code >> 12) & 0x3f));
pushout(0x80 | ((code >> 6) & 0x3f)); pushout(0x80 | ((code >> 6) & 0x3f));
pushout(0x80 | (code & 0x3f)); pushout(0x80 | (code & 0x3f));
} }
m_eState = eStart; m_eState = eStart;
break; }
break;
} }
} }