mirror of
https://github.com/notepad-plus-plus/notepad-plus-plus.git
synced 2025-04-08 17:15:37 +02:00
Release 5.5.2 ( https://www.scintilla.org/scintilla552.zip ) Released 21 August 2024. Add SCI_SETCOPYSEPARATOR for separator between parts of a multiple selection when copied to the clipboard. Feature #1530. Add SCI_GETUNDOSEQUENCE to determine whether an undo sequence is active and its nesting depth. Add SCI_STYLESETSTRETCH to support condensed and expanded text styles. Add SCI_LINEINDENT and SCI_LINEDEDENT. Feature #1524. Fix bug on Cocoa where double-click stopped working when system had been running for a long time. On Cocoa implement more values of font weight and stretch. Release 5.4.0 ( https://www.scintilla.org/lexilla540.zip ) Released 21 August 2024. Inside Lexilla, LexerModule instances are now const. This will require changes to applications that modify Lexilla.cxx, which may be done to add custom lexers. Lexer added for TOML "toml". Bash: Handle backslash in heredoc delimiter. Issue #257. Progress: Fix lexing of nested comments. Pull request #258. Force lower-casing of case-insensitive keyword lists so keywords match in some lexers. Issue #259. Close #15564
401 lines
12 KiB
C++
401 lines
12 KiB
C++
// Scintilla Lexer for X12
|
|
// @file LexX12.cxx
|
|
// Written by Iain Clarke, IMCSoft & Inobiz AB.
|
|
// X12 official documentation is behind a paywall, but there's a description of the syntax here:
|
|
// http://www.rawlinsecconsulting.com/x12tutorial/x12syn.html
|
|
// This code is subject to the same license terms as the rest of the scintilla project:
|
|
// The License.txt file describes the conditions under which this software may be distributed.
|
|
//
|
|
|
|
// Header order must match order in scripts/HeaderOrder.txt
|
|
#include <cstdlib>
|
|
#include <cassert>
|
|
#include <cstring>
|
|
#include <cctype>
|
|
|
|
#include <string>
|
|
#include <string_view>
|
|
|
|
#include <vector>
|
|
#include <algorithm>
|
|
|
|
#include "ILexer.h"
|
|
#include "Scintilla.h"
|
|
#include "SciLexer.h"
|
|
#include "LexerModule.h"
|
|
#include "DefaultLexer.h"
|
|
|
|
using namespace Scintilla;
|
|
using namespace Lexilla;
|
|
|
|
class LexerX12 : public DefaultLexer
|
|
{
|
|
public:
|
|
LexerX12();
|
|
virtual ~LexerX12() {} // virtual destructor, as we inherit from ILexer
|
|
|
|
static ILexer5 *Factory() {
|
|
return new LexerX12;
|
|
}
|
|
|
|
int SCI_METHOD Version() const override
|
|
{
|
|
return lvRelease5;
|
|
}
|
|
void SCI_METHOD Release() override
|
|
{
|
|
delete this;
|
|
}
|
|
|
|
const char * SCI_METHOD PropertyNames() override
|
|
{
|
|
return "fold";
|
|
}
|
|
int SCI_METHOD PropertyType(const char *) override
|
|
{
|
|
return SC_TYPE_BOOLEAN; // Only one property!
|
|
}
|
|
const char * SCI_METHOD DescribeProperty(const char *name) override
|
|
{
|
|
if (!strcmp(name, "fold"))
|
|
return "Whether to apply folding to document or not";
|
|
return "";
|
|
}
|
|
|
|
Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override
|
|
{
|
|
if (!strcmp(key, "fold"))
|
|
{
|
|
m_bFold = strcmp(val, "0") ? true : false;
|
|
return 0;
|
|
}
|
|
return -1;
|
|
}
|
|
const char * SCI_METHOD PropertyGet(const char *) override {
|
|
return "";
|
|
}
|
|
const char * SCI_METHOD DescribeWordListSets() override
|
|
{
|
|
return "";
|
|
}
|
|
Sci_Position SCI_METHOD WordListSet(int, const char *) override
|
|
{
|
|
return -1;
|
|
}
|
|
void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
|
|
void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
|
|
void * SCI_METHOD PrivateCall(int, void *) override
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
protected:
|
|
struct Terminator
|
|
{
|
|
int Style = SCE_X12_BAD;
|
|
Sci_PositionU pos = 0;
|
|
Sci_PositionU length = 0;
|
|
int FoldChange = 0;
|
|
};
|
|
Terminator InitialiseFromISA(IDocument *pAccess);
|
|
Sci_PositionU FindPreviousSegmentStart(IDocument *pAccess, Sci_Position startPos) const;
|
|
Terminator DetectSegmentHeader(IDocument *pAccess, Sci_PositionU pos) const;
|
|
Terminator FindNextTerminator(IDocument *pAccess, Sci_PositionU pos, bool bJustSegmentTerminator = false) const;
|
|
|
|
bool m_bFold = false;
|
|
char m_SeparatorSubElement = 0;
|
|
char m_SeparatorElement = 0;
|
|
std::string m_SeparatorSegment; // might be multiple characters
|
|
std::string m_LineFeed;
|
|
};
|
|
|
|
extern const LexerModule lmX12(SCLEX_X12, LexerX12::Factory, "x12");
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
LexerX12::LexerX12() : DefaultLexer("x12", SCLEX_X12)
|
|
{
|
|
}
|
|
|
|
void LexerX12::Lex(Sci_PositionU startPos, Sci_Position length, int, IDocument *pAccess)
|
|
{
|
|
Sci_PositionU posFinish = startPos + length;
|
|
|
|
Terminator T = InitialiseFromISA(pAccess);
|
|
|
|
if (T.Style == SCE_X12_BAD)
|
|
{
|
|
if (T.pos < startPos)
|
|
T.pos = startPos; // we may be colouring in batches.
|
|
pAccess->StartStyling(startPos);
|
|
pAccess->SetStyleFor(T.pos - startPos, SCE_X12_ENVELOPE);
|
|
pAccess->SetStyleFor(posFinish - T.pos, SCE_X12_BAD);
|
|
return;
|
|
}
|
|
|
|
// Look backwards for a segment start or a document beginning
|
|
Sci_PositionU posCurrent = FindPreviousSegmentStart (pAccess, startPos);
|
|
|
|
// Style buffer, so we're not issuing loads of notifications
|
|
pAccess->StartStyling(posCurrent);
|
|
|
|
while (posCurrent < posFinish)
|
|
{
|
|
// Look for first element marker, so we can denote segment
|
|
T = DetectSegmentHeader(pAccess, posCurrent);
|
|
if (T.Style == SCE_X12_BAD)
|
|
break;
|
|
|
|
pAccess->SetStyleFor(T.pos - posCurrent, T.Style);
|
|
pAccess->SetStyleFor(T.length, SCE_X12_SEP_ELEMENT);
|
|
posCurrent = T.pos + T.length;
|
|
|
|
while (T.Style != SCE_X12_BAD && T.Style != SCE_X12_SEGMENTEND) // Break on bad or segment ending
|
|
{
|
|
T = FindNextTerminator(pAccess, posCurrent, false);
|
|
if (T.Style == SCE_X12_BAD)
|
|
break;
|
|
|
|
int Style = T.Style;
|
|
|
|
pAccess->SetStyleFor(T.pos - posCurrent, SCE_X12_DEFAULT);
|
|
pAccess->SetStyleFor(T.length, Style);
|
|
posCurrent = T.pos + T.length;
|
|
}
|
|
if (T.Style == SCE_X12_BAD)
|
|
break;
|
|
}
|
|
|
|
pAccess->SetStyleFor(posFinish - posCurrent, SCE_X12_BAD);
|
|
}
|
|
|
|
void LexerX12::Fold(Sci_PositionU startPos, Sci_Position length, int, IDocument *pAccess)
|
|
{
|
|
if (!m_bFold)
|
|
return;
|
|
|
|
// Are we even foldable?
|
|
// check for cr,lf,cr+lf.
|
|
if (m_LineFeed.empty())
|
|
return;
|
|
|
|
Sci_PositionU posFinish = startPos + length;
|
|
|
|
// Look backwards for a segment start or a document beginning
|
|
startPos = FindPreviousSegmentStart(pAccess, startPos);
|
|
Terminator T;
|
|
|
|
Sci_PositionU currLine = pAccess->LineFromPosition(startPos);
|
|
int levelCurrentStyle = SC_FOLDLEVELBASE;
|
|
int indentCurrent = 0;
|
|
if (currLine > 0)
|
|
{
|
|
levelCurrentStyle = pAccess->GetLevel(currLine - 1); // bottom 12 bits are level
|
|
indentCurrent = levelCurrentStyle & (SC_FOLDLEVELBASE - 1); // indent from previous line
|
|
Sci_PositionU posLine = pAccess->LineStart(currLine - 1);
|
|
T = DetectSegmentHeader(pAccess, posLine);
|
|
indentCurrent += T.FoldChange;
|
|
}
|
|
|
|
while (startPos < posFinish)
|
|
{
|
|
T = DetectSegmentHeader(pAccess, startPos);
|
|
int indentNext = indentCurrent + T.FoldChange;
|
|
if (indentNext < 0)
|
|
indentNext = 0;
|
|
|
|
levelCurrentStyle = (T.FoldChange > 0) ? (SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG) : SC_FOLDLEVELBASE;
|
|
|
|
currLine = pAccess->LineFromPosition(startPos);
|
|
pAccess->SetLevel(currLine, levelCurrentStyle | indentCurrent);
|
|
|
|
T = FindNextTerminator(pAccess, startPos, true);
|
|
|
|
if (T.Style == SCE_X12_BAD)
|
|
break;
|
|
|
|
startPos = T.pos + T.length;
|
|
indentCurrent = indentNext;
|
|
}
|
|
}
|
|
|
|
LexerX12::Terminator LexerX12::InitialiseFromISA(IDocument *pAccess)
|
|
{
|
|
Sci_Position length = pAccess->Length();
|
|
if (length <= 108)
|
|
return { SCE_X12_BAD, 0 };
|
|
|
|
pAccess->GetCharRange(&m_SeparatorElement, 3, 1);
|
|
pAccess->GetCharRange(&m_SeparatorSubElement, 104, 1);
|
|
|
|
// Look for GS, as that's the next segment. Anything between 105 and GS/IEA is our segment separator.
|
|
Sci_Position posNextSegment;
|
|
char bufSegment[3] = { 0 };
|
|
for (posNextSegment = 105; posNextSegment < length - 3; posNextSegment++)
|
|
{
|
|
pAccess->GetCharRange(bufSegment, posNextSegment, 3);
|
|
if (!memcmp (bufSegment, "GS", 2) || !memcmp(bufSegment, "IEA", 3))
|
|
{
|
|
m_SeparatorSegment.resize(posNextSegment - 105);
|
|
pAccess->GetCharRange(&m_SeparatorSegment.at(0), 105, posNextSegment - 105);
|
|
|
|
// Is some of that CR+LF?
|
|
size_t nPos = m_SeparatorSegment.find_last_not_of("\r\n");
|
|
m_LineFeed = m_SeparatorSegment.substr(nPos + 1);
|
|
m_SeparatorSegment = m_SeparatorSegment.substr(0, nPos + 1);
|
|
break;
|
|
}
|
|
}
|
|
if (m_SeparatorSegment.empty() && m_LineFeed.empty())
|
|
{
|
|
return { SCE_X12_BAD, 105 };
|
|
}
|
|
|
|
// Validate we have an element separator, and it's not silly!
|
|
if (m_SeparatorElement == '\0' || m_SeparatorElement == '\n' || m_SeparatorElement == '\r')
|
|
return { SCE_X12_BAD, 3 };
|
|
|
|
// Validate we have an element separator, and it's not silly!
|
|
if (m_SeparatorSubElement == '\0' || m_SeparatorSubElement == '\n' || m_SeparatorSubElement == '\r')
|
|
return { SCE_X12_BAD, 103 };
|
|
if (m_SeparatorElement == m_SeparatorSubElement)
|
|
return { SCE_X12_BAD, 104 };
|
|
for (auto& c : m_SeparatorSegment)
|
|
{
|
|
if (m_SeparatorElement == c)
|
|
return { SCE_X12_BAD, 105 };
|
|
if (m_SeparatorSubElement == c)
|
|
return { SCE_X12_BAD, 105 };
|
|
}
|
|
|
|
// Check we have element markers at all the right places! ISA element has fixed entries.
|
|
std::vector<Sci_PositionU> ElementMarkers = { 3, 6, 17, 20, 31, 34, 50, 53, 69, 76, 81, 83, 89, 99, 101, 103 };
|
|
for (auto i : ElementMarkers)
|
|
{
|
|
char c;
|
|
pAccess->GetCharRange(&c, i, 1);
|
|
if (c != m_SeparatorElement)
|
|
return { SCE_X12_BAD, i };
|
|
}
|
|
// Check we have no element markers anywhere else!
|
|
for (Sci_PositionU i = 0; i < 105; i++)
|
|
{
|
|
if (std::find(ElementMarkers.begin(), ElementMarkers.end(), i) != ElementMarkers.end())
|
|
continue;
|
|
|
|
char c;
|
|
pAccess->GetCharRange(&c, i, 1);
|
|
if (c == m_SeparatorElement)
|
|
return { SCE_X12_BAD, i };
|
|
}
|
|
|
|
return { SCE_X12_ENVELOPE };
|
|
}
|
|
|
|
Sci_PositionU LexerX12::FindPreviousSegmentStart(IDocument *pAccess, Sci_Position startPos) const
|
|
{
|
|
Sci_PositionU length = pAccess->Length();
|
|
std::string bufTest = m_SeparatorSegment + m_LineFeed; // quick way of making the lengths the same
|
|
std::string bufCompare = bufTest;
|
|
|
|
for (; startPos > 0; startPos--)
|
|
{
|
|
if (startPos + bufTest.size() > length)
|
|
continue;
|
|
|
|
pAccess->GetCharRange(&bufTest.at(0), startPos, bufTest.size());
|
|
if (bufTest == bufCompare)
|
|
{
|
|
return startPos + bufTest.size();
|
|
}
|
|
}
|
|
// We didn't find a ', so just go with the beginning
|
|
return 0;
|
|
}
|
|
|
|
LexerX12::Terminator LexerX12::DetectSegmentHeader(IDocument *pAccess, Sci_PositionU pos) const
|
|
{
|
|
Sci_PositionU Length = pAccess->Length();
|
|
Length -= pos;
|
|
char c, Buf[4] = { 0 }; // max 3 + separator
|
|
for (Sci_PositionU posOffset = 0; posOffset < std::size(Buf) && posOffset < Length; posOffset++)
|
|
{
|
|
pAccess->GetCharRange(&c, pos + posOffset, 1);
|
|
if (c != m_SeparatorElement)
|
|
{
|
|
Buf[posOffset] = c;
|
|
continue;
|
|
}
|
|
|
|
// check for special segments, involved in folding start/stop.
|
|
if (memcmp(Buf, "ISA", 3) == 0)
|
|
return { SCE_X12_ENVELOPE, pos + posOffset, 1, +1 };
|
|
if (memcmp(Buf, "IEA", 3) == 0)
|
|
return { SCE_X12_ENVELOPE, pos + posOffset, 1, -1 };
|
|
if (memcmp(Buf, "GS", 2) == 0)
|
|
return { SCE_X12_FUNCTIONGROUP, pos + posOffset, 1, +1 };
|
|
if (memcmp(Buf, "GE", 2) == 0)
|
|
return { SCE_X12_FUNCTIONGROUP, pos + posOffset, 1, -1 };
|
|
if (memcmp(Buf, "ST", 2) == 0)
|
|
return { SCE_X12_TRANSACTIONSET, pos + posOffset, 1, +1 };
|
|
if (memcmp(Buf, "SE", 2) == 0)
|
|
return { SCE_X12_TRANSACTIONSET, pos + posOffset, 1, -1 };
|
|
return { SCE_X12_SEGMENTHEADER, pos + posOffset, 1, 0 };
|
|
}
|
|
return { SCE_X12_BAD, pos, 0, 0 };
|
|
}
|
|
|
|
LexerX12::Terminator LexerX12::FindNextTerminator(IDocument *pAccess, Sci_PositionU pos, bool bJustSegmentTerminator) const
|
|
{
|
|
char c;
|
|
Sci_PositionU length = pAccess->Length();
|
|
std::string bufTestSegment = m_SeparatorSegment; // quick way of making the lengths the same
|
|
std::string bufTestLineFeed = m_LineFeed; // quick way of making the lengths the same
|
|
|
|
|
|
while (pos < (Sci_PositionU)length)
|
|
{
|
|
pAccess->GetCharRange(&c, pos, 1);
|
|
if (pos + m_SeparatorSegment.size() > length)
|
|
bufTestSegment.clear(); // going up - so once we can't get this, we're done with the buffer.
|
|
else if (!bufTestSegment.empty())
|
|
pAccess->GetCharRange(&bufTestSegment.at(0), pos, bufTestSegment.size());
|
|
if (pos + m_LineFeed.size() > length)
|
|
bufTestLineFeed.clear(); // going up - so once we can't get this, we're done with the buffer.
|
|
else if (!bufTestLineFeed.empty())
|
|
pAccess->GetCharRange(&bufTestLineFeed.at(0), pos, bufTestLineFeed.size());
|
|
|
|
if (!bJustSegmentTerminator && c == m_SeparatorElement)
|
|
return { SCE_X12_SEP_ELEMENT, pos, 1 };
|
|
else if (!bJustSegmentTerminator && c == m_SeparatorSubElement)
|
|
return { SCE_X12_SEP_SUBELEMENT, pos, 1 };
|
|
else if (!m_SeparatorSegment.empty() && bufTestSegment == m_SeparatorSegment)
|
|
{
|
|
if (m_LineFeed.empty())
|
|
return { SCE_X12_SEGMENTEND, pos, m_SeparatorSegment.size() };
|
|
// is this the end?
|
|
if (pos + m_SeparatorSegment.size() == length)
|
|
return { SCE_X12_SEGMENTEND, pos, m_SeparatorSegment.size() };
|
|
// Check if we're followed by a linefeed.
|
|
if (pos + m_SeparatorSegment.size() + m_LineFeed.size() > length)
|
|
return { SCE_X12_BAD, pos };
|
|
bufTestSegment = m_LineFeed;
|
|
pAccess->GetCharRange(&bufTestSegment.at(0), pos + m_SeparatorSegment.size(), bufTestSegment.size());
|
|
if (bufTestSegment == m_LineFeed)
|
|
return { SCE_X12_SEGMENTEND, pos, m_SeparatorSegment.size() + m_LineFeed.size() };
|
|
break;
|
|
}
|
|
else if (m_SeparatorSegment.empty() && bufTestLineFeed == m_LineFeed)
|
|
{
|
|
return { SCE_X12_SEGMENTEND, pos, m_LineFeed.size() };
|
|
}
|
|
pos++;
|
|
}
|
|
|
|
return { SCE_X12_BAD, pos };
|
|
}
|