notepad-plus-plus/lexilla/lexers/LexHollywood.cxx

// Scintilla source code edit control
/** @file LexHollywood.cxx
 ** Lexer for Hollywood
 ** Written by Andreas Falkenhahn, based on the BlitzBasic/PureBasic/Lua lexers
 ** Thanks to Nicholai Benalal
 ** For more information on Hollywood, see http://www.hollywood-mal.com/
 ** Mail me (andreas <at> airsoftsoftwair <dot> de) for any bugs.
 ** This code is subject to the same license terms as the rest of the Scintilla project:
 ** The License.txt file describes the conditions under which this software may be distributed.
 **/

#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <stdarg.h>
#include <assert.h>
#include <ctype.h>

#include <string>
#include <string_view>
#include <map>
#include <functional>

#include "ILexer.h"
#include "Scintilla.h"
#include "SciLexer.h"

#include "WordList.h"
#include "LexAccessor.h"
#include "StyleContext.h"
#include "CharacterSet.h"
#include "LexerModule.h"
#include "OptionSet.h"
#include "DefaultLexer.h"

using namespace Scintilla;
using namespace Lexilla;

/* Bits:
 * 1  - whitespace
 * 2  - operator
 * 4  - identifier
 * 8  - decimal digit
 * 16 - hex digit
 * 32 - bin digit
 * 64 - letter
 */
static int character_classification[128] =
{
	0, // NUL ($0)
	0, // SOH ($1)
	0, // STX ($2)
	0, // ETX ($3)
	0, // EOT ($4)
	0, // ENQ ($5)
	0, // ACK ($6)
	0, // BEL ($7)
	0, // BS ($8)
	1, // HT ($9)
	1, // LF ($A)
	0, // VT ($B)
	0, // FF ($C)
	1, // CR ($D)
	0, // SO ($E)
	0, // SI ($F)
	0, // DLE ($10)
	0, // DC1 ($11)
	0, // DC2 ($12)
	0, // DC3 ($13)
	0, // DC4 ($14)
	0, // NAK ($15)
	0, // SYN ($16)
	0, // ETB ($17)
	0, // CAN ($18)
	0, // EM ($19)
	0, // SUB ($1A)
	0, // ESC ($1B)
	0, // FS ($1C)
	0, // GS ($1D)
	0, // RS ($1E)
	0, // US ($1F)
	1, // space ($20)
	4, // ! ($21)
	0, // " ($22)
	0, // # ($23)
	4, // $ ($24)
	2, // % ($25)
	2, // & ($26)
	2, // ' ($27)
	2, // ( ($28)
	2, // ) ($29)
	2, // * ($2A)
	2, // + ($2B)
	2, // , ($2C)
	2, // - ($2D)
	// NB: we treat "." as an identifier although it is also an operator and a decimal digit
	// the reason why we treat it as an identifier is to support syntax highlighting for
	// plugin commands which always use a "." in their names, e.g. pdf.OpenDocument();
	// we handle the decimal digit case manually below so that 3.1415 and .123 is styled correctly
	// the collateral damage of treating "." as an identifier is that "." is never styled
	// SCE_HOLLYWOOD_OPERATOR
	4, // . ($2E)
	2, // / ($2F)
	28, // 0 ($30)
	28, // 1 ($31)
	28, // 2 ($32)
	28, // 3 ($33)
	28, // 4 ($34)
	28, // 5 ($35)
	28, // 6 ($36)
	28, // 7 ($37)
	28, // 8 ($38)
	28, // 9 ($39)
	2, // : ($3A)
	2, // ; ($3B)
	2, // < ($3C)
	2, // = ($3D)
	2, // > ($3E)
	2, // ? ($3F)
	0, // @ ($40)
	84, // A ($41)
	84, // B ($42)
	84, // C ($43)
	84, // D ($44)
	84, // E ($45)
	84, // F ($46)
	68, // G ($47)
	68, // H ($48)
	68, // I ($49)
	68, // J ($4A)
	68, // K ($4B)
	68, // L ($4C)
	68, // M ($4D)
	68, // N ($4E)
	68, // O ($4F)
	68, // P ($50)
	68, // Q ($51)
	68, // R ($52)
	68, // S ($53)
	68, // T ($54)
	68, // U ($55)
	68, // V ($56)
	68, // W ($57)
	68, // X ($58)
	68, // Y ($59)
	68, // Z ($5A)
	2, // [ ($5B)
	2, // \ ($5C)
	2, // ] ($5D)
	2, // ^ ($5E)
	68, // _ ($5F)
	2, // ` ($60)
	84, // a ($61)
	84, // b ($62)
	84, // c ($63)
	84, // d ($64)
	84, // e ($65)
	84, // f ($66)
	68, // g ($67)
	68, // h ($68)
	68, // i ($69)
	68, // j ($6A)
	68, // k ($6B)
	68, // l ($6C)
	68, // m ($6D)
	68, // n ($6E)
	68, // o ($6F)
	68, // p ($70)
	68, // q ($71)
	68, // r ($72)
	68, // s ($73)
	68, // t ($74)
	68, // u ($75)
	68, // v ($76)
	68, // w ($77)
	68, // x ($78)
	68, // y ($79)
	68, // z ($7A)
	2, // { ($7B)
	2, // | ($7C)
	2, // } ($7D)
	2, // ~ ($7E)
	0, // &#127; ($7F)
};

static bool IsSpace(int c) {
	return c < 128 && (character_classification[c] & 1);
}

static bool IsOperator(int c) {
	return c < 128 && (character_classification[c] & 2);
}

static bool IsIdentifier(int c) {
	return c < 128 && (character_classification[c] & 4);
}

static bool IsDigit(int c) {
	return c < 128 && (character_classification[c] & 8);
}

static bool IsHexDigit(int c) {
	return c < 128 && (character_classification[c] & 16);
}

static int LowerCase(int c)
{
	if (c >= 'A' && c <= 'Z')
		return 'a' + c - 'A';
	return c;
}

static int CheckHollywoodFoldPoint(char const *token) {
	if (!strcmp(token, "function")) {
		return 1;
	}
	if (!strcmp(token, "endfunction")) {
		return -1;
	}
	return 0;
}

// An individual named option for use in an OptionSet

// Options used for LexerHollywood
struct OptionsHollywood {
	bool fold;
	bool foldCompact;
	OptionsHollywood() {
		fold = false;
		foldCompact = false;
	}
};

static const char * const hollywoodWordListDesc[] = {
	"Hollywood keywords",
	"Hollywood standard API functions",
	"Hollywood plugin API functions",
	"Hollywood plugin methods",
	0
};

struct OptionSetHollywood : public OptionSet<OptionsHollywood> {
	OptionSetHollywood(const char * const wordListDescriptions[]) {
		DefineProperty("fold", &OptionsHollywood::fold);
		DefineProperty("fold.compact", &OptionsHollywood::foldCompact);
		DefineWordListSets(wordListDescriptions);
	}
};

class LexerHollywood : public DefaultLexer {
	int (*CheckFoldPoint)(char const *);
	WordList keywordlists[4];
	OptionsHollywood options;
	OptionSetHollywood osHollywood;
public:
	LexerHollywood(int (*CheckFoldPoint_)(char const *), const char * const wordListDescriptions[]) :
						 DefaultLexer("hollywood", SCLEX_HOLLYWOOD),
						 CheckFoldPoint(CheckFoldPoint_),
						 osHollywood(wordListDescriptions) {
	}
	virtual ~LexerHollywood() {
	}
	void SCI_METHOD Release() override {
		delete this;
	}
	int SCI_METHOD Version() const override {
		return lvRelease5;
	}
	const char * SCI_METHOD PropertyNames() override {
		return osHollywood.PropertyNames();
	}
	int SCI_METHOD PropertyType(const char *name) override {
		return osHollywood.PropertyType(name);
	}
	const char * SCI_METHOD DescribeProperty(const char *name) override {
		return osHollywood.DescribeProperty(name);
	}
	Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
	const char * SCI_METHOD PropertyGet(const char* key) override {
		return osHollywood.PropertyGet(key);
	}
	const char * SCI_METHOD DescribeWordListSets() override {
		return osHollywood.DescribeWordListSets();
	}
	Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
	void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
	void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;

	void * SCI_METHOD PrivateCall(int, void *) override {
		return 0;
	}
	static ILexer5 *LexerFactoryHollywood() {
		return new LexerHollywood(CheckHollywoodFoldPoint, hollywoodWordListDesc);
	}
};

Sci_Position SCI_METHOD LexerHollywood::PropertySet(const char *key, const char *val) {
	if (osHollywood.PropertySet(&options, key, val)) {
		return 0;
	}
	return -1;
}

Sci_Position SCI_METHOD LexerHollywood::WordListSet(int n, const char *wl) {
	WordList *wordListN = 0;
	switch (n) {
	case 0:
		wordListN = &keywordlists[0];
		break;
	case 1:
		wordListN = &keywordlists[1];
		break;
	case 2:
		wordListN = &keywordlists[2];
		break;
	case 3:
		wordListN = &keywordlists[3];
		break;
	}
	Sci_Position firstModification = -1;
	if (wordListN) {
		WordList wlNew;
		wlNew.Set(wl);
		if (*wordListN != wlNew) {
			wordListN->Set(wl);
			firstModification = 0;
		}
	}
	return firstModification;
}

void SCI_METHOD LexerHollywood::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
	LexAccessor styler(pAccess);

	styler.StartAt(startPos);
	bool inString = false;

	StyleContext sc(startPos, length, initStyle, styler);

	// Can't use sc.More() here else we miss the last character
	for (; ; sc.Forward())
	 {
	 	if (sc.atLineStart) inString = false;

	 	if (sc.ch == '\"' && sc.chPrev != '\\') inString = !inString;

		if (sc.state == SCE_HOLLYWOOD_IDENTIFIER) {
			if (!IsIdentifier(sc.ch)) {
				char s[100];
				int kstates[4] = {
					SCE_HOLLYWOOD_KEYWORD,
					SCE_HOLLYWOOD_STDAPI,
					SCE_HOLLYWOOD_PLUGINAPI,
					SCE_HOLLYWOOD_PLUGINMETHOD,
				};
				sc.GetCurrentLowered(s, sizeof(s));
				for (int i = 0; i < 4; i++) {
					if (keywordlists[i].InList(s)) {
						sc.ChangeState(kstates[i]);
					}
				}
				sc.SetState(SCE_HOLLYWOOD_DEFAULT);
			}
		} else if (sc.state == SCE_HOLLYWOOD_OPERATOR) {

			// always reset to default on operators because otherwise
			// comments won't be recognized in sequences like "+/* Hello*/"
			// --> "+/*" would be recognized as a sequence of operators

			// if (!IsOperator(sc.ch)) sc.SetState(SCE_HOLLYWOOD_DEFAULT);
			sc.SetState(SCE_HOLLYWOOD_DEFAULT);

		} else if (sc.state == SCE_HOLLYWOOD_PREPROCESSOR) {
			if (!IsIdentifier(sc.ch))
				sc.SetState(SCE_HOLLYWOOD_DEFAULT);
		} else if (sc.state == SCE_HOLLYWOOD_CONSTANT) {
			if (!IsIdentifier(sc.ch))
				sc.SetState(SCE_HOLLYWOOD_DEFAULT);
		} else if (sc.state == SCE_HOLLYWOOD_NUMBER) {
			if (!IsDigit(sc.ch) && sc.ch != '.')
				sc.SetState(SCE_HOLLYWOOD_DEFAULT);
		} else if (sc.state == SCE_HOLLYWOOD_HEXNUMBER) {
			if (!IsHexDigit(sc.ch))
				sc.SetState(SCE_HOLLYWOOD_DEFAULT);
		} else if (sc.state == SCE_HOLLYWOOD_STRING) {
			if (sc.ch == '"') {
				sc.ForwardSetState(SCE_HOLLYWOOD_DEFAULT);
			}
			if (sc.atLineEnd) {
				sc.SetState(SCE_HOLLYWOOD_DEFAULT);
			}
		} else if (sc.state == SCE_HOLLYWOOD_COMMENT) {
			if (sc.atLineEnd) {
				sc.SetState(SCE_HOLLYWOOD_DEFAULT);
			}
		} else if (sc.state == SCE_HOLLYWOOD_COMMENTBLOCK) {
			if (sc.Match("*/") && !inString) {
				sc.Forward();
				sc.ForwardSetState(SCE_HOLLYWOOD_DEFAULT);
			}
		} else if (sc.state == SCE_HOLLYWOOD_STRINGBLOCK) {
			if (sc.Match("]]") && !inString) {
				sc.Forward();
				sc.ForwardSetState(SCE_HOLLYWOOD_DEFAULT);
			}
		}

		if (sc.state == SCE_HOLLYWOOD_DEFAULT) {
			if (sc.Match(';')) {
				sc.SetState(SCE_HOLLYWOOD_COMMENT);
			} else if (sc.Match("/*")) {
				sc.SetState(SCE_HOLLYWOOD_COMMENTBLOCK);
				sc.Forward();
			} else if (sc.Match("[[")) {
				sc.SetState(SCE_HOLLYWOOD_STRINGBLOCK);
				sc.Forward();
			} else if (sc.Match('"')) {
				sc.SetState(SCE_HOLLYWOOD_STRING);
			} else if (sc.Match('$')) {
				sc.SetState(SCE_HOLLYWOOD_HEXNUMBER);
			} else if (sc.Match("0x") || sc.Match("0X")) {  // must be before IsDigit() because of 0x
				sc.SetState(SCE_HOLLYWOOD_HEXNUMBER);
				sc.Forward();
			} else if (sc.ch == '.' && (sc.chNext >= '0' && sc.chNext <= '9')) {  // ".1234" style numbers
				sc.SetState(SCE_HOLLYWOOD_NUMBER);
				sc.Forward();
			} else if (IsDigit(sc.ch)) {
				sc.SetState(SCE_HOLLYWOOD_NUMBER);
			} else if (sc.Match('#')) {
				sc.SetState(SCE_HOLLYWOOD_CONSTANT);
			} else if (sc.Match('@')) {
				sc.SetState(SCE_HOLLYWOOD_PREPROCESSOR);
			} else if (IsOperator(sc.ch)) {
				sc.SetState(SCE_HOLLYWOOD_OPERATOR);
			} else if (IsIdentifier(sc.ch)) {
				sc.SetState(SCE_HOLLYWOOD_IDENTIFIER);
			}
		}

		if (!sc.More())
			break;
	}
	sc.Complete();
}

void SCI_METHOD LexerHollywood::Fold(Sci_PositionU startPos, Sci_Position length, int /* initStyle */, IDocument *pAccess) {

	if (!options.fold)
		return;

	LexAccessor styler(pAccess);

	Sci_PositionU lengthDoc = startPos + length;
	int visibleChars = 0;
	Sci_Position lineCurrent = styler.GetLine(startPos);
	int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
	int levelCurrent = levelPrev;
	char chNext = styler[startPos];
	int styleNext = styler.StyleAt(startPos);
	int done = 0;
	char word[256];
	int wordlen = 0;

	for (Sci_PositionU i = startPos; i < lengthDoc; i++) {
		char ch = chNext;
		chNext = styler.SafeGetCharAt(i + 1);
		int style = styleNext;
		styleNext = styler.StyleAt(i + 1);
		bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
		if (!done) {
			if (wordlen) { // are we scanning a token already?
				word[wordlen] = static_cast<char>(LowerCase(ch));
				if (!IsIdentifier(ch)) { // done with token
					word[wordlen] = '\0';
					levelCurrent += CheckFoldPoint(word);
					done = 1;
				} else if (wordlen < 255) {
					wordlen++;
				}
			} else { // start scanning at first non-whitespace character
				if (!IsSpace(ch)) {
					if (style != SCE_HOLLYWOOD_COMMENTBLOCK && IsIdentifier(ch)) {
						word[0] = static_cast<char>(LowerCase(ch));
						wordlen = 1;
					} else // done with this line
						done = 1;
				}
			}
		}

		if (atEOL) {
			int lev = levelPrev;
			if (visibleChars == 0 && options.foldCompact) {
				lev |= SC_FOLDLEVELWHITEFLAG;
			}
			if ((levelCurrent > levelPrev) && (visibleChars > 0)) {
				lev |= SC_FOLDLEVELHEADERFLAG;
			}
			if (lev != styler.LevelAt(lineCurrent)) {
				styler.SetLevel(lineCurrent, lev);
			}
			lineCurrent++;
			levelPrev = levelCurrent;
			visibleChars = 0;
			done = 0;
			wordlen = 0;
		}
		if (!IsSpace(ch)) {
			visibleChars++;
		}
	}
	// Fill in the real level of the next line, keeping the current flags as they will be filled in later

	int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
	styler.SetLevel(lineCurrent, levelPrev | flagsNext);
}

extern const LexerModule lmHollywood(SCLEX_HOLLYWOOD, LexerHollywood::LexerFactoryHollywood, "hollywood", hollywoodWordListDesc);