mirror of
				https://github.com/notepad-plus-plus/notepad-plus-plus.git
				synced 2025-10-31 03:24:04 +01:00 
			
		
		
		
	Update with https://www.scintilla.org/scintilla521.zip https://www.scintilla.org/lexilla515.zip - fix setting to bring Scintilla::PositionCR from ScintillaStructures.h inline with Sci_Position.h Sci_PositionCR - add workaround to enable lexer for searchResult commented out SCI_SETILEXER call on searchResult to get one result which is correctly handled by the lexer, added comment about the current problem with property @MarkingsStruct which seems to disappear after call to SCI_SETILEXER or CreateLexer - corrected usage of ObjC lexer - removed unnecessary filter stuff - use own sections for scintilla and lexilla build targets and allow parallel builds - as libscilex is no longer existing, changed to libscintilla - adapt makefiles and cmake - use VS2019 - started simple changes for createlexer adaptations, nullpointercheck missing on return of lexer name from deprecated LexerNameFromID -> undefined behaviour - movement from id -> lexer name, mostly done via LexerNameFromID + switching off corresponding compiler warning - changed to SCI_SETILEXER from SCI_SETLEXER, SCI_SETLEXERLANGUAGE needs to be corrected, see Scintilla5Migration.html - just commented out: SCI_LOADLEXERLIBRARY Fix #10504, close #11419
		
			
				
	
	
		
			406 lines
		
	
	
		
			9.9 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			406 lines
		
	
	
		
			9.9 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| // Scintilla Lexer for EDIFACT
 | |
| // @file LexEDIFACT.cxx
 | |
| // Written by Iain Clarke, IMCSoft & Inobiz AB.
 | |
| // EDIFACT documented here: https://www.unece.org/cefact/edifact/welcome.html
 | |
| // and more readably here: https://en.wikipedia.org/wiki/EDIFACT
 | |
| // This code is subject to the same license terms as the rest of the scintilla project:
 | |
| // The License.txt file describes the conditions under which this software may be distributed.
 | |
| //
 | |
| 
 | |
| // Header order must match order in scripts/HeaderOrder.txt
 | |
| #include <cstdlib>
 | |
| #include <cassert>
 | |
| #include <cstring>
 | |
| #include <cctype>
 | |
| 
 | |
| #include <string>
 | |
| #include <string_view>
 | |
| 
 | |
| #include "ILexer.h"
 | |
| #include "Scintilla.h"
 | |
| #include "SciLexer.h"
 | |
| 
 | |
| #include "LexAccessor.h"
 | |
| #include "LexerModule.h"
 | |
| #include "DefaultLexer.h"
 | |
| 
 | |
| using namespace Scintilla;
 | |
| using namespace Lexilla;
 | |
| 
 | |
| class LexerEDIFACT : public DefaultLexer
 | |
| {
 | |
| public:
 | |
| 	LexerEDIFACT();
 | |
| 	virtual ~LexerEDIFACT() {} // virtual destructor, as we inherit from ILexer
 | |
| 
 | |
| 	static ILexer5 *Factory() {
 | |
| 		return new LexerEDIFACT;
 | |
| 	}
 | |
| 
 | |
| 	int SCI_METHOD Version() const override
 | |
| 	{
 | |
| 		return lvRelease5;
 | |
| 	}
 | |
| 	void SCI_METHOD Release() override
 | |
| 	{
 | |
| 		delete this;
 | |
| 	}
 | |
| 
 | |
| 	const char * SCI_METHOD PropertyNames() override
 | |
| 	{
 | |
| 		return "fold\nlexer.edifact.highlight.un.all";
 | |
| 	}
 | |
| 	int SCI_METHOD PropertyType(const char *) override
 | |
| 	{
 | |
| 		return SC_TYPE_BOOLEAN; // Only one property!
 | |
| 	}
 | |
| 	const char * SCI_METHOD DescribeProperty(const char *name) override
 | |
| 	{
 | |
| 		if (!strcmp(name, "fold"))
 | |
| 			return "Whether to apply folding to document or not";
 | |
| 		if (!strcmp(name, "lexer.edifact.highlight.un.all"))
 | |
| 			return "Whether to apply UN* highlighting to all UN segments, or just to UNH";
 | |
| 		return NULL;
 | |
| 	}
 | |
| 
 | |
| 	Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override
 | |
| 	{
 | |
| 		if (!strcmp(key, "fold"))
 | |
| 		{
 | |
| 			m_bFold = strcmp(val, "0") ? true : false;
 | |
| 			return 0;
 | |
| 		}
 | |
| 		if (!strcmp(key, "lexer.edifact.highlight.un.all"))	// GetProperty
 | |
| 		{
 | |
| 			m_bHighlightAllUN = strcmp(val, "0") ? true : false;
 | |
| 			return 0;
 | |
| 		}
 | |
| 		return -1;
 | |
| 	}
 | |
| 
 | |
| 	const char * SCI_METHOD PropertyGet(const char *key) override
 | |
| 	{
 | |
| 		m_lastPropertyValue = "";
 | |
| 		if (!strcmp(key, "fold"))
 | |
| 		{
 | |
| 			m_lastPropertyValue = m_bFold ? "1" : "0";
 | |
| 		}
 | |
| 		if (!strcmp(key, "lexer.edifact.highlight.un.all"))	// GetProperty
 | |
| 		{
 | |
| 			m_lastPropertyValue = m_bHighlightAllUN ? "1" : "0";
 | |
| 		}
 | |
| 		return m_lastPropertyValue.c_str();
 | |
| 	}
 | |
| 
 | |
| 	const char * SCI_METHOD DescribeWordListSets() override
 | |
| 	{
 | |
| 		return NULL;
 | |
| 	}
 | |
| 	Sci_Position SCI_METHOD WordListSet(int, const char *) override
 | |
| 	{
 | |
| 		return -1;
 | |
| 	}
 | |
| 	void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
 | |
| 	void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
 | |
| 	void * SCI_METHOD PrivateCall(int, void *) override
 | |
| 	{
 | |
| 		return NULL;
 | |
| 	}
 | |
| 
 | |
| protected:
 | |
| 	Sci_Position InitialiseFromUNA(IDocument *pAccess, Sci_PositionU MaxLength);
 | |
| 	Sci_Position FindPreviousEnd(IDocument *pAccess, Sci_Position startPos) const;
 | |
| 	Sci_Position ForwardPastWhitespace(IDocument *pAccess, Sci_Position startPos, Sci_Position MaxLength) const;
 | |
| 	int DetectSegmentHeader(char SegmentHeader[3]) const;
 | |
| 
 | |
| 	bool m_bFold;
 | |
| 
 | |
| 	// property lexer.edifact.highlight.un.all
 | |
| 	//	Set to 0 to highlight only UNA segments, or 1 to highlight all UNx segments.
 | |
| 	bool m_bHighlightAllUN;
 | |
| 
 | |
| 	char m_chComponent;
 | |
| 	char m_chData;
 | |
| 	char m_chDecimal;
 | |
| 	char m_chRelease;
 | |
| 	char m_chSegment;
 | |
| 
 | |
| 	std::string m_lastPropertyValue;
 | |
| };
 | |
| 
 | |
| LexerModule lmEDIFACT(SCLEX_EDIFACT, LexerEDIFACT::Factory, "edifact");
 | |
| 
 | |
| ///////////////////////////////////////////////////////////////////////////////
 | |
| 
 | |
| 
 | |
| 
 | |
| ///////////////////////////////////////////////////////////////////////////////
 | |
| 
 | |
| LexerEDIFACT::LexerEDIFACT() : DefaultLexer("edifact", SCLEX_EDIFACT)
 | |
| {
 | |
| 	m_bFold = false;
 | |
| 	m_bHighlightAllUN = false;
 | |
| 	m_chComponent = ':';
 | |
| 	m_chData = '+';
 | |
| 	m_chDecimal = '.';
 | |
| 	m_chRelease = '?';
 | |
| 	m_chSegment = '\'';
 | |
| }
 | |
| 
 | |
| void LexerEDIFACT::Lex(Sci_PositionU startPos, Sci_Position length, int, IDocument *pAccess)
 | |
| {
 | |
| 	Sci_PositionU posFinish = startPos + length;
 | |
| 	InitialiseFromUNA(pAccess, posFinish);
 | |
| 
 | |
| 	// Look backwards for a ' or a document beginning
 | |
| 	Sci_PositionU posCurrent = FindPreviousEnd(pAccess, startPos);
 | |
| 	// And jump past the ' if this was not the beginning of the document
 | |
| 	if (posCurrent != 0)
 | |
| 		posCurrent++;
 | |
| 
 | |
| 	// Style buffer, so we're not issuing loads of notifications
 | |
| 	LexAccessor styler (pAccess);
 | |
| 	pAccess->StartStyling(posCurrent);
 | |
| 	styler.StartSegment(posCurrent);
 | |
| 	Sci_Position posSegmentStart = -1;
 | |
| 
 | |
| 	while ((posCurrent < posFinish) && (posSegmentStart == -1))
 | |
| 	{
 | |
| 		posCurrent = ForwardPastWhitespace(pAccess, posCurrent, posFinish);
 | |
| 		// Mark whitespace as default
 | |
| 		styler.ColourTo(posCurrent - 1, SCE_EDI_DEFAULT);
 | |
| 		if (posCurrent >= posFinish)
 | |
| 			break;
 | |
| 
 | |
| 		// Does is start with 3 charaters? ie, UNH
 | |
| 		char SegmentHeader[4] = { 0 };
 | |
| 		pAccess->GetCharRange(SegmentHeader, posCurrent, 3);
 | |
| 
 | |
| 		int SegmentStyle = DetectSegmentHeader(SegmentHeader);
 | |
| 		if (SegmentStyle == SCE_EDI_BADSEGMENT)
 | |
| 			break;
 | |
| 		if (SegmentStyle == SCE_EDI_UNA)
 | |
| 		{
 | |
| 			posCurrent += 9;
 | |
| 			styler.ColourTo(posCurrent - 1, SCE_EDI_UNA); // UNA
 | |
| 			continue;
 | |
| 		}
 | |
| 		posSegmentStart = posCurrent;
 | |
| 		posCurrent += 3;
 | |
| 
 | |
| 		styler.ColourTo(posCurrent - 1, SegmentStyle); // UNH etc
 | |
| 
 | |
| 		// Colour in the rest of the segment
 | |
| 		for (char c; posCurrent < posFinish; posCurrent++)
 | |
| 		{
 | |
| 			pAccess->GetCharRange(&c, posCurrent, 1);
 | |
| 
 | |
| 			if (c == m_chRelease) // ? escape character, check first, in case of ?'
 | |
| 				posCurrent++;
 | |
| 			else if (c == m_chSegment) // '
 | |
| 			{
 | |
| 				// Make sure the whole segment is on one line. styler won't let us go back in time, so we'll settle for marking the ' as bad.
 | |
| 				Sci_Position lineSegmentStart = pAccess->LineFromPosition(posSegmentStart);
 | |
| 				Sci_Position lineSegmentEnd = pAccess->LineFromPosition(posCurrent);
 | |
| 				if (lineSegmentStart == lineSegmentEnd)
 | |
| 					styler.ColourTo(posCurrent, SCE_EDI_SEGMENTEND);
 | |
| 				else
 | |
| 					styler.ColourTo(posCurrent, SCE_EDI_BADSEGMENT);
 | |
| 				posSegmentStart = -1;
 | |
| 				posCurrent++;
 | |
| 				break;
 | |
| 			}
 | |
| 			else if (c == m_chComponent) // :
 | |
| 				styler.ColourTo(posCurrent, SCE_EDI_SEP_COMPOSITE);
 | |
| 			else if (c == m_chData) // +
 | |
| 				styler.ColourTo(posCurrent, SCE_EDI_SEP_ELEMENT);
 | |
| 			else
 | |
| 				styler.ColourTo(posCurrent, SCE_EDI_DEFAULT);
 | |
| 		}
 | |
| 	}
 | |
| 	styler.Flush();
 | |
| 
 | |
| 	if (posSegmentStart == -1)
 | |
| 		return;
 | |
| 
 | |
| 	pAccess->StartStyling(posSegmentStart);
 | |
| 	pAccess->SetStyleFor(posFinish - posSegmentStart, SCE_EDI_BADSEGMENT);
 | |
| }
 | |
| 
 | |
| void LexerEDIFACT::Fold(Sci_PositionU startPos, Sci_Position length, int, IDocument *pAccess)
 | |
| {
 | |
| 	if (!m_bFold)
 | |
| 		return;
 | |
| 
 | |
| 	Sci_PositionU endPos = startPos + length;
 | |
| 	startPos = FindPreviousEnd(pAccess, startPos);
 | |
| 	char c;
 | |
| 	char SegmentHeader[4] = { 0 };
 | |
| 
 | |
| 	bool AwaitingSegment = true;
 | |
| 	Sci_PositionU currLine = pAccess->LineFromPosition(startPos);
 | |
| 	int levelCurrentStyle = SC_FOLDLEVELBASE;
 | |
| 	if (currLine > 0)
 | |
| 		levelCurrentStyle = pAccess->GetLevel(currLine - 1); // bottom 12 bits are level
 | |
| 	int indentCurrent = levelCurrentStyle & SC_FOLDLEVELNUMBERMASK;
 | |
| 	int indentNext = indentCurrent;
 | |
| 
 | |
| 	while (startPos < endPos)
 | |
| 	{
 | |
| 		pAccess->GetCharRange(&c, startPos, 1);
 | |
| 		switch (c)
 | |
| 		{
 | |
| 		case '\t':
 | |
| 		case '\r':
 | |
| 		case ' ':
 | |
| 			startPos++;
 | |
| 			continue;
 | |
| 		case '\n':
 | |
| 			currLine = pAccess->LineFromPosition(startPos);
 | |
| 			pAccess->SetLevel(currLine, levelCurrentStyle | indentCurrent);
 | |
| 			startPos++;
 | |
| 			levelCurrentStyle = SC_FOLDLEVELBASE;
 | |
| 			indentCurrent = indentNext;
 | |
| 			continue;
 | |
| 		}
 | |
| 		if (c == m_chRelease)
 | |
| 		{
 | |
| 			startPos += 2;
 | |
| 			continue;
 | |
| 		}
 | |
| 		if (c == m_chSegment)
 | |
| 		{
 | |
| 			AwaitingSegment = true;
 | |
| 			startPos++;
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		if (!AwaitingSegment)
 | |
| 		{
 | |
| 			startPos++;
 | |
| 			continue;
 | |
| 		}
 | |
| 		
 | |
| 		// Segment!
 | |
| 		pAccess->GetCharRange(SegmentHeader, startPos, 3);
 | |
| 		if (SegmentHeader[0] != 'U' || SegmentHeader[1] != 'N')
 | |
| 		{
 | |
| 			startPos++;
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		AwaitingSegment = false;
 | |
| 		switch (SegmentHeader[2])
 | |
| 		{
 | |
| 		case 'H':
 | |
| 		case 'G':
 | |
| 			indentNext++;
 | |
| 			levelCurrentStyle = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG;
 | |
| 			break;
 | |
| 
 | |
| 		case 'T':
 | |
| 		case 'E':
 | |
| 			if (indentNext > 0)
 | |
| 				indentNext--;
 | |
| 			break;
 | |
| 		}
 | |
| 
 | |
| 		startPos += 3;
 | |
| 	}
 | |
| }
 | |
| 
 | |
| Sci_Position LexerEDIFACT::InitialiseFromUNA(IDocument *pAccess, Sci_PositionU MaxLength)
 | |
| {
 | |
| 	MaxLength -= 9; // drop 9 chars, to give us room for UNA:+.? '
 | |
| 
 | |
| 	Sci_PositionU startPos = 0;
 | |
| 	startPos += ForwardPastWhitespace(pAccess, 0, MaxLength);
 | |
| 	if (startPos < MaxLength)
 | |
| 	{
 | |
| 		char bufUNA[9];
 | |
| 		pAccess->GetCharRange(bufUNA, startPos, 9);
 | |
| 
 | |
| 		// Check it's UNA segment
 | |
| 		if (!memcmp(bufUNA, "UNA", 3))
 | |
| 		{
 | |
| 			m_chComponent = bufUNA[3];
 | |
| 			m_chData = bufUNA[4];
 | |
| 			m_chDecimal = bufUNA[5];
 | |
| 			m_chRelease = bufUNA[6];
 | |
| 			// bufUNA [7] should be space - reserved.
 | |
| 			m_chSegment = bufUNA[8];
 | |
| 
 | |
| 			return 0; // success!
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// We failed to find a UNA, so drop to defaults
 | |
| 	m_chComponent = ':';
 | |
| 	m_chData = '+';
 | |
| 	m_chDecimal = '.';
 | |
| 	m_chRelease = '?';
 | |
| 	m_chSegment = '\'';
 | |
| 
 | |
| 	return -1;
 | |
| }
 | |
| 
 | |
| Sci_Position LexerEDIFACT::ForwardPastWhitespace(IDocument *pAccess, Sci_Position startPos, Sci_Position MaxLength) const
 | |
| {
 | |
| 	char c;
 | |
| 
 | |
| 	while (startPos < MaxLength)
 | |
| 	{
 | |
| 		pAccess->GetCharRange(&c, startPos, 1);
 | |
| 		switch (c)
 | |
| 		{
 | |
| 		case '\t':
 | |
| 		case '\r':
 | |
| 		case '\n':
 | |
| 		case ' ':
 | |
| 			break;
 | |
| 		default:
 | |
| 			return startPos;
 | |
| 		}
 | |
| 
 | |
| 		startPos++;
 | |
| 	}
 | |
| 
 | |
| 	return MaxLength;
 | |
| }
 | |
| 
 | |
| int LexerEDIFACT::DetectSegmentHeader(char SegmentHeader[3]) const
 | |
| {
 | |
| 	if (
 | |
| 		SegmentHeader[0] < 'A' || SegmentHeader[0] > 'Z' ||
 | |
| 		SegmentHeader[1] < 'A' || SegmentHeader[1] > 'Z' ||
 | |
| 		SegmentHeader[2] < 'A' || SegmentHeader[2] > 'Z')
 | |
| 		return SCE_EDI_BADSEGMENT;
 | |
| 
 | |
| 	if (!memcmp(SegmentHeader, "UNA", 3))
 | |
| 		return SCE_EDI_UNA;
 | |
| 
 | |
| 	if (m_bHighlightAllUN && !memcmp(SegmentHeader, "UN", 2))
 | |
| 		return SCE_EDI_UNH;
 | |
| 	else if (!memcmp(SegmentHeader, "UNH", 3))
 | |
| 		return SCE_EDI_UNH;
 | |
| 	else if (!memcmp(SegmentHeader, "UNG", 3))
 | |
| 		return SCE_EDI_UNH;
 | |
| 
 | |
| 	return SCE_EDI_SEGMENTSTART;
 | |
| }
 | |
| 
 | |
| // Look backwards for a ' or a document beginning
 | |
| Sci_Position LexerEDIFACT::FindPreviousEnd(IDocument *pAccess, Sci_Position startPos) const
 | |
| {
 | |
| 	for (char c; startPos > 0; startPos--)
 | |
| 	{
 | |
| 		pAccess->GetCharRange(&c, startPos, 1);
 | |
| 		if (c == m_chSegment)
 | |
| 			return startPos;
 | |
| 	}
 | |
| 	// We didn't find a ', so just go with the beginning
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| 
 |