mirror of
https://github.com/notepad-plus-plus/notepad-plus-plus.git
synced 2025-05-29 19:10:32 +02:00
Release 5.5.5 (https://www.scintilla.org/scintilla555.zip) Released 25 February 2025. Remember selection with undo and redo. Controlled with SCI_SETUNDOSELECTIONHISTORY. Feature #1273, Bug #1479, Bug #1224. Serialize selection type and ranges with SCI_GETSELECTIONSERIALIZED and SCI_SETSELECTIONSERIALIZED. For Win32, update Direct2D and DirectWrite interfaces used to 1.1 and add a lower-level approach to calling DirectWrite 1.1 by specifying SC_TECHNOLOGY_DIRECT_WRITE_1. Since Windows Vista does not support these API versions, Scintilla o longer supports DirectWrite on Windows Vista and will fall back to using GDI. Fix segmentation of long lexemes to avoid breaking before modifiers like accents that must be drawn with their base letters. For wrapping, try to break lines without separating letters from modifiers. For GTK on Windows, replace reverse arrow cursor with hand as reverse arrow was small in scaled modes. Bug #2460. Fix bug on Qt where double-click stopped working when Scintilla instance had been running for weeks. Release 5.4.3 (https://www.scintilla.org/lexilla543.zip) Released 25 February 2025. C++: Fix evaluation of != in preprocessor condition. Issue #299. Modula-3: Allow digits in uppercase identifiers. Issue #297. Pascal: Fix asm style extending past end. Issue #295. Python: Fix detection of attributes and decorators. Issue #294, Pull request #302. Ruby: Implement substyles for identifiers SCE_RB_IDENTIFIER. Ruby: Recognize name as SCE_RB_DEFNAME in def when `::` used as well as `.`. Issue #300. Close #16235
179 lines
4.3 KiB
C++
179 lines
4.3 KiB
C++
// This file is part of Notepad++ project
|
|
// Copyright (C) 2021 Notepad++ authors.
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// at your option any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
#include "UTF8DocumentIterator.h"
|
|
#include <string_view>
|
|
#include <stdexcept>
|
|
#include <optional>
|
|
#include <map>
|
|
#include <algorithm>
|
|
|
|
#include "ILoader.h"
|
|
#include "ILexer.h"
|
|
#include "Scintilla.h"
|
|
#include "ScintillaTypes.h"
|
|
#include "ScintillaMessages.h"
|
|
#include "Debugging.h"
|
|
#include "Geometry.h"
|
|
#include "Platform.h"
|
|
|
|
#include "CharacterCategoryMap.h"
|
|
#include "Position.h"
|
|
#include "SplitVector.h"
|
|
#include "Partitioning.h"
|
|
#include "RunStyles.h"
|
|
#include "CellBuffer.h"
|
|
#include "CharClassify.h"
|
|
#include "Decoration.h"
|
|
#include "CaseFolder.h"
|
|
#include "Document.h"
|
|
|
|
using namespace Scintilla::Internal;
|
|
|
|
UTF8DocumentIterator::UTF8DocumentIterator(Document* doc, Sci::Position pos, Sci::Position end) :
|
|
m_pos(pos),
|
|
m_end(end),
|
|
m_characterIndex(0),
|
|
m_doc(doc)
|
|
{
|
|
// Check for debug builds
|
|
PLATFORM_ASSERT(m_pos <= m_end);
|
|
|
|
// Ensure for release.
|
|
if (m_pos > m_end)
|
|
{
|
|
m_pos = m_end;
|
|
}
|
|
readCharacter();
|
|
}
|
|
|
|
UTF8DocumentIterator::UTF8DocumentIterator(const UTF8DocumentIterator& copy) :
|
|
m_pos(copy.m_pos),
|
|
m_end(copy.m_end),
|
|
m_characterIndex(copy.m_characterIndex),
|
|
m_utf8Length(copy.m_utf8Length),
|
|
m_utf16Length(copy.m_utf16Length),
|
|
m_doc(copy.m_doc)
|
|
{
|
|
// Check for debug builds
|
|
PLATFORM_ASSERT(m_pos <= m_end);
|
|
m_character[0] = copy.m_character[0];
|
|
m_character[1] = copy.m_character[1];
|
|
|
|
// Ensure for release.
|
|
if (m_pos > m_end)
|
|
{
|
|
m_pos = m_end;
|
|
}
|
|
}
|
|
|
|
UTF8DocumentIterator& UTF8DocumentIterator::operator ++ ()
|
|
{
|
|
PLATFORM_ASSERT(m_pos < m_end);
|
|
if (m_utf16Length == 2 && m_characterIndex == 0)
|
|
{
|
|
m_characterIndex = 1;
|
|
}
|
|
else
|
|
{
|
|
m_pos += m_utf8Length;
|
|
|
|
if (m_pos > m_end)
|
|
{
|
|
m_pos = m_end;
|
|
}
|
|
m_characterIndex = 0;
|
|
readCharacter();
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
UTF8DocumentIterator& UTF8DocumentIterator::operator -- ()
|
|
{
|
|
if (m_utf16Length == 2 && m_characterIndex == 1)
|
|
{
|
|
m_characterIndex = 0;
|
|
}
|
|
else
|
|
{
|
|
--m_pos;
|
|
// Skip past the UTF-8 extension bytes
|
|
while (0x80 == (m_doc->CharAt(m_pos) & 0xC0) && m_pos > 0)
|
|
--m_pos;
|
|
|
|
readCharacter();
|
|
if (m_utf16Length == 2)
|
|
{
|
|
m_characterIndex = 1;
|
|
}
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
void UTF8DocumentIterator::readCharacter()
|
|
{
|
|
unsigned char currentChar = m_doc->CharAt(m_pos);
|
|
if (currentChar & 0x80)
|
|
{
|
|
int mask = 0x40;
|
|
int nBytes = 1;
|
|
|
|
do
|
|
{
|
|
mask >>= 1;
|
|
++nBytes;
|
|
} while (currentChar & mask);
|
|
|
|
int result = currentChar & m_firstByteMask[nBytes];
|
|
Sci::Position pos = m_pos;
|
|
m_utf8Length = 1;
|
|
// work out the unicode point, and count the actual bytes.
|
|
// If a byte does not start with 10xxxxxx then it's not part of the
|
|
// the code. Therefore invalid UTF-8 encodings are dealt with, simply by stopping when
|
|
// the UTF8 extension bytes are no longer valid.
|
|
while ((--nBytes) && (pos < m_end) && (0x80 == ((currentChar = m_doc->CharAt(++pos)) & 0xC0)))
|
|
{
|
|
result = (result << 6) | (currentChar & 0x3F);
|
|
++m_utf8Length;
|
|
}
|
|
|
|
if (result >= 0x10000)
|
|
{
|
|
result -= 0x10000;
|
|
m_utf16Length = 2;
|
|
// UTF-16 Pair
|
|
m_character[0] = static_cast<wchar_t>(0xD800 + (result >> 10));
|
|
m_character[1] = static_cast<wchar_t>(0xDC00 + (result & 0x3FF));
|
|
|
|
}
|
|
else
|
|
{
|
|
m_utf16Length = 1;
|
|
m_character[0] = static_cast<wchar_t>(result);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
m_utf8Length = 1;
|
|
m_utf16Length = 1;
|
|
m_characterIndex = 0;
|
|
m_character[0] = static_cast<wchar_t>(currentChar);
|
|
}
|
|
}
|
|
|
|
|
|
const unsigned char UTF8DocumentIterator::m_firstByteMask[7] = { 0x7F, 0x3F, 0x1F, 0x0F, 0x07, 0x03, 0x01 };
|