Christian Grasser ad79718fc8 Update to scintilla 5.5.2 & Lexilla 5.4.0
Release 5.5.2 ( https://www.scintilla.org/scintilla552.zip )

    Released 21 August 2024.
    Add SCI_SETCOPYSEPARATOR for separator between parts of a multiple selection when copied to the clipboard. Feature #1530.
    Add SCI_GETUNDOSEQUENCE to determine whether an undo sequence is active and its nesting depth.
    Add SCI_STYLESETSTRETCH to support condensed and expanded text styles.
    Add SCI_LINEINDENT and SCI_LINEDEDENT. Feature #1524.
    Fix bug on Cocoa where double-click stopped working when system had been running for a long time.
    On Cocoa implement more values of font weight and stretch.

Release 5.4.0 ( https://www.scintilla.org/lexilla540.zip )

    Released 21 August 2024.
    Inside Lexilla, LexerModule instances are now const. This will require changes to applications that modify Lexilla.cxx, which may be done to add custom lexers.
    Lexer added for TOML "toml".
    Bash: Handle backslash in heredoc delimiter. Issue #257.
    Progress: Fix lexing of nested comments. Pull request #258.
    Force lower-casing of case-insensitive keyword lists so keywords match in some lexers. Issue #259.

Close #15564
2024-08-23 02:59:58 +02:00

1635 lines
50 KiB
C++

/** @file LexRaku.cxx
** Lexer for Raku
**
** Copyright (c) 2019 Mark Reay <mark@reay.net.au>
**/
// Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
// The License.txt file describes the conditions under which this software may be distributed.
/*
* Raku (Perl6) Lexer for Scintilla
* ---------------------------------
* ---------------------------------
* 06-Dec-2019: More Unicode support:
* - Added a full scope of allowed numbers and letters
* 29-Nov-2019: More highlighting / implemented basic folding:
* - Operators (blanket cover, no sequence checking)
* - Class / Grammar name highlighting
* - Folding:
* - Comments: line / multi-line
* - POD sections
* - Code blocks {}
* 26-Nov-2019: Basic syntax highlighting covering the following:
* - Comments, both line and embedded (multi-line)
* - POD, no inline highlighting as yet...
* - Heredoc block string, with variable highlighting (with qq)
* - Strings, with variable highlighting (with ")
* - Q Language, including adverbs (also basic q and qq)
* - Regex, including adverbs
* - Numbers
* - Bareword / identifiers
* - Types
* - Variables: mu, positional, associative, callable
* TODO:
* - POD inline
* - Better operator sequence coverage
*/
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <stdarg.h>
#include <assert.h>
#include <ctype.h>
#include <string>
#include <string_view>
#include <vector>
#include <map>
#include <functional>
#include "ILexer.h"
#include "Scintilla.h"
#include "SciLexer.h"
#include "WordList.h"
#include "LexAccessor.h"
#include "StyleContext.h"
#include "CharacterSet.h"
#include "CharacterCategory.h"
#include "LexerModule.h"
#include "OptionSet.h"
#include "DefaultLexer.h"
using namespace Scintilla;
using namespace Lexilla;
namespace { // anonymous namespace to isolate any name clashes
/*----------------------------------------------------------------------------*
* --- DEFINITIONS: OPTIONS / CONSTANTS ---
*----------------------------------------------------------------------------*/
// Number types
#define RAKUNUM_BINARY 1 // order is significant: 1-3 cannot have a dot
#define RAKUNUM_OCTAL 2
#define RAKUNUM_FLOAT_EXP 3 // exponent part only
#define RAKUNUM_HEX 4 // may be a hex float
#define RAKUNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings
#define RAKUNUM_VECTOR 6
#define RAKUNUM_V_VECTOR 7
#define RAKUNUM_VERSION 8 // can contain multiple '.'s
#define RAKUNUM_BAD 9
// Regex / Q string types
#define RAKUTYPE_REGEX_NORM 0 // 0 char ident
#define RAKUTYPE_REGEX_S 1 // order is significant:
#define RAKUTYPE_REGEX_M 2 // 1 char ident
#define RAKUTYPE_REGEX_Y 3 // 1 char ident
#define RAKUTYPE_REGEX 4 // > RAKUTYPE_REGEX == 2 char identifiers
#define RAKUTYPE_REGEX_RX 5 // 2 char ident
#define RAKUTYPE_REGEX_TR 6 // 2 char ident
#define RAKUTYPE_QLANG 7 // < RAKUTYPE_QLANG == RAKUTYPE_REGEX_?
#define RAKUTYPE_STR_WQ 8 // 0 char ident < word quote >
#define RAKUTYPE_STR_Q 9 // 1 char ident
#define RAKUTYPE_STR_QX 10 // 2 char ident
#define RAKUTYPE_STR_QW 11 // 2 char ident
#define RAKUTYPE_STR_QQ 12 // 2 char ident
#define RAKUTYPE_STR_QQX 13 // 3 char ident
#define RAKUTYPE_STR_QQW 14 // 3 char ident
#define RAKUTYPE_STR_QQWW 15 // 4 char ident
// Delimiter types
#define RAKUDELIM_BRACKET 0 // bracket: regex, Q language
#define RAKUDELIM_QUOTE 1 // quote: normal string
// rakuWordLists: keywords as defined in config
const char *const rakuWordLists[] = {
"Keywords and identifiers",
"Functions",
"Types basic",
"Types composite",
"Types domain-specific",
"Types exception",
"Adverbs",
nullptr,
};
// Options and defaults
struct OptionsRaku {
bool fold;
bool foldCompact;
bool foldComment;
bool foldCommentMultiline;
bool foldCommentPOD;
OptionsRaku() {
fold = true;
foldCompact = false;
foldComment = true;
foldCommentMultiline = true;
foldCommentPOD = true;
}
};
// init options and words
struct OptionSetRaku : public OptionSet<OptionsRaku> {
OptionSetRaku() {
DefineProperty("fold", &OptionsRaku::fold);
DefineProperty("fold.comment", &OptionsRaku::foldComment);
DefineProperty("fold.compact", &OptionsRaku::foldCompact);
DefineProperty("fold.raku.comment.multiline", &OptionsRaku::foldCommentMultiline,
"Set this property to 0 to disable folding multi-line comments when fold.comment=1.");
DefineProperty("fold.raku.comment.pod", &OptionsRaku::foldCommentPOD,
"Set this property to 0 to disable folding POD comments when fold.comment=1.");
// init word lists
DefineWordListSets(rakuWordLists);
}
};
// Delimiter pair
struct DelimPair {
int opener; // opener char
int closer[2]; // closer chars
bool interpol; // can variables be interpolated?
short count; // delimiter char count
DelimPair() {
opener = 0;
closer[0] = 0;
closer[1] = 0;
interpol = false;
count = 0;
}
bool isCloser(int ch) const {
return ch == closer[0] || ch == closer[1];
}
};
/*----------------------------------------------------------------------------*
* --- FUNCTIONS ---
*----------------------------------------------------------------------------*/
/*
* IsANewLine
* - returns true if this is a new line char
*/
constexpr bool IsANewLine(int ch) noexcept {
return ch == '\r' || ch == '\n';
}
/*
* IsAWhitespace
* - returns true if this is a whitespace (or newline) char
*/
bool IsAWhitespace(int ch) noexcept {
return IsASpaceOrTab(ch) || IsANewLine(ch);
}
/*
* IsAlphabet
* - returns true if this is an alphabetical char
*/
constexpr bool IsAlphabet(int ch) noexcept {
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
}
/*
* IsCommentLine
* - returns true if this is a comment line
* - tests: SCE_RAKU_COMMENTLINE or SCE_RAKU_COMMENTEMBED
* modified from: LexPerl.cxx
*/
bool IsCommentLine(Sci_Position line, LexAccessor &styler, int type = SCE_RAKU_COMMENTLINE) {
Sci_Position pos = styler.LineStart(line);
Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
for (Sci_Position i = pos; i < eol_pos; i++) {
char ch = styler[i];
int style = styler.StyleAt(i);
if (type == SCE_RAKU_COMMENTEMBED) {
if (i == (eol_pos - 1) && style == type)
return true;
} else { // make sure the line is NOT a SCE_RAKU_COMMENTEMBED
if (ch == '#' && style == type && styler[i+1] != '`' )
return true;
else if (!IsASpaceOrTab(ch))
return false;
}
}
return false;
}
/*
* ContainsQTo
* - returns true if this range contains ":to" in style SCE_RAKU_ADVERB indicating the start
* of a SCE_RAKU_HEREDOC_Q or SCE_RAKU_HEREDOC_QQ.
*/
bool ContainsQTo(Sci_Position start, Sci_Position end, LexAccessor &styler) {
std::string adverb;
for (Sci_Position i = start; i < end; i++) {
if (styler.StyleAt(i) == SCE_RAKU_ADVERB) {
adverb.push_back(styler[i]);
}
}
return adverb.find(":to") != std::string::npos;
}
/*
* GetBracketCloseChar
* - returns the end bracket char: opposite of start
* - see: http://www.unicode.org/Public/5.1.0/ucd/BidiMirroring.txt (first section)
* - Categories are general matches for valid BiDi types
* - Most closer chars are opener + 1
*/
int GetBracketCloseChar(const int ch) noexcept {
const CharacterCategory cc = CategoriseCharacter(ch);
switch (cc) {
case ccSm:
switch (ch) {
case 0x3C: return 0x3E; // LESS-THAN SIGN
case 0x2208: return 0x220B; // ELEMENT OF
case 0x2209: return 0x220C; // NOT AN ELEMENT OF
case 0x220A: return 0x220D; // SMALL ELEMENT OF
case 0x2215: return 0x29F5; // DIVISION SLASH
case 0x2243: return 0x22CD; // ASYMPTOTICALLY EQUAL TO
case 0x2298: return 0x29B8; // CIRCLED DIVISION SLASH
case 0x22A6: return 0x2ADE; // ASSERTION
case 0x22A8: return 0x2AE4; // TRUE
case 0x22A9: return 0x2AE3; // FORCES
case 0x22AB: return 0x2AE5; // DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE
case 0x22F2: return 0x22FA; // ELEMENT OF WITH LONG HORIZONTAL STROKE
case 0x22F3: return 0x22FB; // ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE
case 0x22F4: return 0x22FC; // SMALL ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE
case 0x22F6: return 0x22FD; // ELEMENT OF WITH OVERBAR
case 0x22F7: return 0x22FE; // SMALL ELEMENT OF WITH OVERBAR
case 0xFF1C: return 0xFF1E; // FULLWIDTH LESS-THAN SIGN
}
break;
case ccPs:
switch (ch) {
case 0x5B: return 0x5D; // LEFT SQUARE BRACKET
case 0x7B: return 0x7D; // LEFT CURLY BRACKET
case 0x298D: return 0x2990; // LEFT SQUARE BRACKET WITH TICK IN TOP CORNER
case 0x298F: return 0x298E; // LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
case 0xFF3B: return 0xFF3D; // FULLWIDTH LEFT SQUARE BRACKET
case 0xFF5B: return 0xFF5D; // FULLWIDTH LEFT CURLY BRACKET
}
break;
case ccPi:
break;
default: return 0;
}
return ch + 1;
}
/*
* IsValidQuoteOpener
* -
*/
bool IsValidQuoteOpener(const int ch, DelimPair &dp, int type = RAKUDELIM_BRACKET) noexcept {
dp.closer[0] = 0;
dp.closer[1] = 0;
dp.interpol = true;
if (type == RAKUDELIM_QUOTE) {
switch (ch) {
// Opener Closer Description
case '\'': dp.closer[0] = '\''; // APOSTROPHE
dp.interpol = false;
break;
case '"': dp.closer[0] = '"'; // QUOTATION MARK
break;
case 0x2018: dp.closer[0] = 0x2019; // LEFT SINGLE QUOTATION MARK
dp.interpol = false;
break;
case 0x201C: dp.closer[0] = 0x201D; // LEFT DOUBLE QUOTATION MARK
break;
case 0x201D: dp.closer[0] = 0x201C; // RIGHT DOUBLE QUOTATION MARK
break;
case 0x201E: dp.closer[0] = 0x201C; // DOUBLE LOW-9 QUOTATION MARK
dp.closer[1] = 0x201D;
break;
case 0xFF62: dp.closer[0] = 0xFF63; // HALFWIDTH LEFT CORNER BRACKET
dp.interpol = false;
break;
default: return false;
}
} else if (type == RAKUDELIM_BRACKET) {
dp.closer[0] = GetBracketCloseChar(ch);
}
dp.opener = ch;
dp.count = 1;
return dp.closer[0] > 0;
}
/*
* IsBracketOpenChar
* - true if this is a valid start bracket character
*/
bool IsBracketOpenChar(int ch) noexcept {
return GetBracketCloseChar(ch) > 0;
}
/*
* IsValidRegOrQAdjacent
* - returns true if ch is a valid character to put directly after Q / q
* * ref: Q Language: https://docs.raku.org/language/quoting
*/
bool IsValidRegOrQAdjacent(int ch) noexcept {
return !(IsAlphaNumeric(ch) || ch == '_' || ch == '(' || ch == ')' || ch == '\'' );
}
/*
* IsValidRegOrQPrecede
* - returns true if ch is a valid preceding character to put directly before Q / q
* * ref: Q Language: https://docs.raku.org/language/quoting
*/
bool IsValidRegOrQPrecede(int ch) noexcept {
return !(IsAlphaNumeric(ch) || ch == '_');
}
/*
* MatchCharInRange
* - returns true if the mach character is found in range (of length)
* - ignoreDelim (default false)
*/
bool MatchCharInRange(StyleContext &sc, const Sci_Position length,
const int match, bool ignoreDelim = false) {
Sci_Position len = 0;
int chPrev = sc.chPrev;
while (++len < length) {
const int ch = sc.GetRelativeCharacter(len);
if (ch == match && (ignoreDelim || chPrev != '\\'))
return true;
}
return false;
}
/*
* PrevNonWhitespaceChar
* - returns the last non-whitespace char
*/
int PrevNonWhitespaceChar(StyleContext &sc) {
Sci_Position rel = 0;
Sci_Position max_back = 0 - sc.currentPos;
while (--rel > max_back) {
const int ch = sc.GetRelativeCharacter(rel);
if (!IsAWhitespace(ch))
return ch;
}
return 0; // no matching char
}
/*
* IsQLangStartAtScPos
* - returns true if this is a valid Q Language sc position
* - ref: https://docs.raku.org/language/quoting
* - Q :adverb :adverb //;
* - q,qx,qw,qq,qqx,qqw,qqww :adverb /:adverb /;
*/
bool IsQLangStartAtScPos(StyleContext &sc, int &type, const Sci_Position length) {
const bool valid_adj = IsValidRegOrQAdjacent(sc.chNext);
const int chFw2 = sc.GetRelativeCharacter(2);
const int chFw3 = sc.GetRelativeCharacter(3);
type = -1;
if (IsValidRegOrQPrecede(sc.chPrev)) {
if (sc.ch == 'Q' && valid_adj) {
type = RAKUTYPE_QLANG;
} else if (sc.ch == 'q') {
switch (sc.chNext) {
case 'x':
type = RAKUTYPE_STR_QX;
break;
case 'w':
type = RAKUTYPE_STR_QW;
break;
case 'q':
if (chFw2 == 'x') {
type = RAKUTYPE_STR_QQX;
} else if (chFw2 == 'w') {
if (chFw3 == 'w') {
type = RAKUTYPE_STR_QQWW;
} else {
type = RAKUTYPE_STR_QQW;
}
} else {
type = RAKUTYPE_STR_QQ;
}
break;
default:
type = RAKUTYPE_STR_Q;
}
} else if (sc.ch == '<' && MatchCharInRange(sc, length, '>')) {
type = RAKUTYPE_STR_WQ; // < word quote >
}
}
return type >= 0;
}
/*
* IsRegexStartAtScPos
* - returns true if this is a valid Regex sc position
* - ref: https://docs.raku.org/language/regexes
* - Regex: (rx/s/m/tr/y) :adverb /:adverb /;
* - regex R :adverb //;
* - /:adverb /;
*/
bool IsRegexStartAtScPos(StyleContext &sc, int &type, CharacterSet &set) {
const bool valid_adj = IsValidRegOrQAdjacent(sc.chNext);
type = -1;
if (IsValidRegOrQPrecede(sc.chPrev)) {
switch (sc.ch) {
case 'r':
if (sc.chNext == 'x')
type = RAKUTYPE_REGEX_RX;
break;
case 't':
case 'T':
if (sc.chNext == 'r' || sc.chNext == 'R')
type = RAKUTYPE_REGEX_TR;
break;
case 'm':
if (valid_adj)
type = RAKUTYPE_REGEX_M;
break;
case 's':
case 'S':
if (valid_adj)
type = RAKUTYPE_REGEX_S;
break;
case 'y':
if (valid_adj)
type = RAKUTYPE_REGEX_Y;
break;
case '/':
if (set.Contains(PrevNonWhitespaceChar(sc)))
type = RAKUTYPE_REGEX_NORM;
}
}
return type >= 0;
}
/*
* IsValidIdentPrecede
* - returns if ch is a valid preceding char to put directly before an identifier
*/
bool IsValidIdentPrecede(int ch) noexcept {
return !(IsAlphaNumeric(ch) || ch == '_' || ch == '@' || ch == '$' || ch == '%');
}
/*
* IsValidDelimiter
* - returns if ch is a valid delimiter (most chars are valid)
* * ref: Q Language: https://docs.raku.org/language/quoting
*/
bool IsValidDelimiter(int ch) noexcept {
return !(IsAlphaNumeric(ch) || ch == ':');
}
/*
* GetDelimiterCloseChar
* - returns the corresponding close char for a given delimiter (could be the same char)
*/
int GetDelimiterCloseChar(int ch) noexcept {
int ch_end = GetBracketCloseChar(ch);
if (ch_end == 0 && IsValidDelimiter(ch)) {
ch_end = ch;
}
return ch_end;
}
/*
* GetRepeatCharCount
* - returns the occurrence count of match
*/
Sci_Position GetRepeatCharCount(StyleContext &sc, int chMatch, Sci_Position length) {
Sci_Position cnt = 0;
while (cnt < length) {
if (sc.GetRelativeCharacter(cnt) != chMatch) {
break;
}
cnt++;
}
return cnt;
}
/*
* LengthToDelimiter
* - returns the length until the end of a delimited string section
* - Ignores nested delimiters (if opener != closer)
* - no trailing char after last closer (default false)
*/
Sci_Position LengthToDelimiter(StyleContext &sc, const DelimPair &dp,
Sci_Position length, bool noTrailing = false) {
short cnt_open = 0; // count open bracket
short cnt_close = 0; // count close bracket
bool is_escape = false; // has been escaped using '\'?
Sci_Position len = 0; // count characters
int chOpener = dp.opener; // look for nested opener / closer
if (dp.opener == dp.closer[0])
chOpener = 0; // no opening delimiter (no nesting possible)
while (len < length) {
const int chPrev = sc.GetRelativeCharacter(len - 1);
const int ch = sc.GetRelativeCharacter(len);
const int chNext = sc.GetRelativeCharacter(len+1);
if (cnt_open == 0 && cnt_close == dp.count) {
return len; // end condition has been met
} else if (is_escape) {
is_escape = false;
} else if (ch == '\\') {
is_escape = true;
} else {
if (ch == chOpener) {
cnt_open++; // open nested bracket
} else if (dp.isCloser(ch)) {
if ( cnt_open > 0 ) {
cnt_open--; // close nested bracket
} else if (dp.count > 1 && cnt_close < (dp.count - 1)) {
if (cnt_close > 1) {
if (dp.isCloser(chPrev)) {
cnt_close++;
} else { // reset if previous char was not close
cnt_close = 0;
}
} else {
cnt_close++;
}
} else if (!noTrailing || (IsAWhitespace(chNext))) {
cnt_close++; // found last close
if (cnt_close > 1 && !dp.isCloser(chPrev)) {
cnt_close = 0; // reset if previous char was not close
}
} else {
cnt_close = 0; // non handled close: reset
}
} else if (IsANewLine(ch)) {
cnt_open = 0; // reset after each line
cnt_close = 0;
}
}
len++;
}
return -1; // end condition has NOT been met
}
/*
* LengthToEndHeredoc
* - returns the length until the end of a heredoc section
* - delimiter string MUST begin on a new line
*/
Sci_Position LengthToEndHeredoc(const StyleContext &sc, LexAccessor &styler,
const Sci_Position length, const char *delim) {
bool on_new_ln = false;
int i = 0; // str index
for (int n = 0; n < length; n++) {
const char ch = styler.SafeGetCharAt(sc.currentPos + n, 0);
if (on_new_ln) {
if (delim[i] == '\0')
return n; // at end of str, match found!
if (ch != delim[i++])
i = 0; // no char match, reset 'i'ndex
}
if (i == 0) // detect new line
on_new_ln = IsANewLine(ch);
}
return -1; // no match found
}
/*
* LengthToNextChar
* - returns the length until the next character
*/
Sci_Position LengthToNextChar(StyleContext &sc, const Sci_Position length) {
Sci_Position len = 0;
while (++len < length) {
const int ch = sc.GetRelativeCharacter(len);
if (!IsASpaceOrTab(ch) && !IsANewLine(ch)) {
break;
}
}
return len;
}
/*
* GetRelativeString
* - gets a relative string and sets it in &str
* - resets string before setting
*/
void GetRelativeString(StyleContext &sc, Sci_Position offset, Sci_Position length,
std::string &str) {
Sci_Position pos = offset;
str.clear();
while (pos < length) {
str += sc.GetRelativeCharacter(pos++);
}
}
} // end anonymous namespace
/*----------------------------------------------------------------------------*
* --- class: LexerRaku ---
*----------------------------------------------------------------------------*/
//class LexerRaku : public ILexerWithMetaData {
class LexerRaku : public DefaultLexer {
CharacterSet setWord;
CharacterSet setSigil;
CharacterSet setTwigil;
CharacterSet setOperator;
CharacterSet setSpecialVar;
WordList regexIdent; // identifiers that specify a regex
OptionsRaku options; // Options from config
OptionSetRaku osRaku;
WordList keywords; // Word Lists from config
WordList functions;
WordList typesBasic;
WordList typesComposite;
WordList typesDomainSpecific;
WordList typesExceptions;
WordList adverbs;
public:
// Defined as explicit, so that constructor can not be copied
explicit LexerRaku() :
DefaultLexer("raku", SCLEX_RAKU),
setWord(CharacterSet::setAlphaNum, "-_", 0x80),
setSigil(CharacterSet::setNone, "$&%@"),
setTwigil(CharacterSet::setNone, "!*.:<=?^~"),
setOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;<>,?!.~"),
setSpecialVar(CharacterSet::setNone, "_/!") {
regexIdent.Set("regex rule token");
}
// Deleted so LexerRaku objects can not be copied.
LexerRaku(const LexerRaku &) = delete;
LexerRaku(LexerRaku &&) = delete;
void operator=(const LexerRaku &) = delete;
void operator=(LexerRaku &&) = delete;
virtual ~LexerRaku() {
}
void SCI_METHOD Release() noexcept override {
delete this;
}
int SCI_METHOD Version() const noexcept override {
return lvRelease5;
}
const char *SCI_METHOD PropertyNames() override {
return osRaku.PropertyNames();
}
int SCI_METHOD PropertyType(const char *name) override {
return osRaku.PropertyType(name);
}
const char *SCI_METHOD DescribeProperty(const char *name) override {
return osRaku.DescribeProperty(name);
}
Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
const char *SCI_METHOD PropertyGet(const char *key) override {
return osRaku.PropertyGet(key);
}
const char *SCI_METHOD DescribeWordListSets() override {
return osRaku.DescribeWordListSets();
}
Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
static ILexer5 *LexerFactoryRaku() {
return new LexerRaku();
}
protected:
bool IsOperatorChar(const int ch);
bool IsWordChar(const int ch, bool allowNumber = true);
bool IsWordStartChar(const int ch);
bool IsNumberChar(const int ch, int base = 10);
bool ProcessRegexTwinCapture(StyleContext &sc, const Sci_Position length,
int &type, const DelimPair &dp);
void ProcessStringVars(StyleContext &sc, const Sci_Position length, const int varState);
bool ProcessValidRegQlangStart(StyleContext &sc, Sci_Position length, const int type,
WordList &wordsAdverbs, DelimPair &dp);
Sci_Position LengthToNonWordChar(StyleContext &sc, Sci_Position length,
char *s, const int size, Sci_Position offset = 0);
};
/*----------------------------------------------------------------------------*
* --- METHODS: LexerRaku ---
*----------------------------------------------------------------------------*/
/*
* LexerRaku::IsOperatorChar
* - Test for both ASCII and Unicode operators
* see: https://docs.raku.org/language/unicode_entry
*/
bool LexerRaku::IsOperatorChar(const int ch) {
if (ch > 0x7F) {
switch (ch) {
// Unicode ASCII Equiv.
case 0x2208: // (elem)
case 0x2209: // !(elem)
case 0x220B: // (cont)
case 0x220C: // !(cont)
case 0x2216: // (-)
case 0x2229: // (&)
case 0x222A: // (|)
case 0x2282: // (<)
case 0x2283: // (>)
case 0x2284: // !(<)
case 0x2285: // !(>)
case 0x2286: // (<=)
case 0x2287: // (>=)
case 0x2288: // !(<=)
case 0x2289: // !(>=)
case 0x228D: // (.)
case 0x228E: // (+)
case 0x2296: // (^)
return true;
}
}
return setOperator.Contains(ch);
}
/*
* LexerRaku::IsWordChar
* - Test for both ASCII and Unicode identifier characters
* see: https://docs.raku.org/language/unicode_ascii
* also: ftp://ftp.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
* FIXME: *still* may not contain all valid characters
*/
bool LexerRaku::IsWordChar(const int ch, bool allowNumber) {
// Unicode numbers should not appear in word identifiers
if (ch > 0x7F) {
const CharacterCategory cc = CategoriseCharacter(ch);
switch (cc) {
// Letters
case ccLu:
case ccLl:
case ccLt:
case ccLm:
case ccLo:
return true;
default:
return false;
}
} else if (allowNumber && IsADigit(ch)) {
return true; // an ASCII number type
}
return setWord.Contains(ch);
}
/*
* LexerRaku::IsWordStartChar
* - Test for both ASCII and Unicode identifier "start / first" characters
*/
bool LexerRaku::IsWordStartChar(const int ch) {
return ch != '-' && IsWordChar(ch, false); // no numbers allowed
}
/*
* LexerRaku::IsNumberChar
* - Test for both ASCII and Unicode identifier number characters
* see: https://docs.raku.org/language/unicode_ascii
* also: ftp://ftp.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
* FILTERED by Unicode letters that are NUMBER
* and NOT PARENTHESIZED or CIRCLED
* FIXME: *still* may not contain all valid number characters
*/
bool LexerRaku::IsNumberChar(const int ch, int base) {
if (ch > 0x7F) {
const CharacterCategory cc = CategoriseCharacter(ch);
switch (cc) {
// Numbers
case ccNd:
case ccNl:
case ccNo:
return true;
default:
return false;
}
}
return IsADigit(ch, base);
}
/*
* LexerRaku::PropertySet
* -
*/
Sci_Position SCI_METHOD LexerRaku::PropertySet(const char *key, const char *val) {
if (osRaku.PropertySet(&options, key, val))
return 0;
return -1;
}
/*
* LexerRaku::WordListSet
* -
*/
Sci_Position SCI_METHOD LexerRaku::WordListSet(int n, const char *wl) {
WordList *wordListN = nullptr;
switch (n) {
case 0:
wordListN = &keywords;
break;
case 1:
wordListN = &functions;
break;
case 2:
wordListN = &typesBasic;
break;
case 3:
wordListN = &typesComposite;
break;
case 4:
wordListN = &typesDomainSpecific;
break;
case 5:
wordListN = &typesExceptions;
break;
case 6:
wordListN = &adverbs;
break;
}
Sci_Position firstModification = -1;
if (wordListN) {
if (wordListN->Set(wl)) {
firstModification = 0;
}
}
return firstModification;
}
/*
* LexerRaku::ProcessRegexTwinCapture
* - processes the transition between a regex pair (two sets of delimiters)
* - moves to first new delimiter, if a bracket
* - returns true when valid delimiter start found (if bracket)
*/
bool LexerRaku::ProcessRegexTwinCapture(StyleContext &sc, const Sci_Position length,
int &type, const DelimPair &dp) {
if (type == RAKUTYPE_REGEX_S || type == RAKUTYPE_REGEX_TR || type == RAKUTYPE_REGEX_Y) {
type = -1; // clear type
// move past chRegQClose if it was the previous char
if (dp.isCloser(sc.chPrev))
sc.Forward();
// no processing needed for non-bracket
if (dp.isCloser(dp.opener))
return true;
// move to next opening bracket
const Sci_Position len = LengthToNextChar(sc, length);
if (sc.GetRelativeCharacter(len) == dp.opener) {
sc.Forward(len);
return true;
}
}
return false;
}
/*
* LexerRaku::ProcessStringVars
* - processes a string and highlights any valid variables
*/
void LexerRaku::ProcessStringVars(StyleContext &sc, const Sci_Position length, const int varState) {
const int state = sc.state;
for (Sci_Position pos = 0; pos < length; pos++) {
if (sc.state == varState && !IsWordChar(sc.ch)) {
sc.SetState(state);
} else if (sc.chPrev != '\\'
&& (sc.ch == '$' || sc.ch == '@')
&& IsWordStartChar(sc.chNext)) {
sc.SetState(varState);
}
sc.Forward(); // Next character
}
}
/*
* LexerRaku::ProcessValidRegQlangStart
* - processes a section of the document range from after a Regex / Q delimiter
* - returns true on success
* - sets: adverbs, chOpen, chClose, chCount
* ref: https://docs.raku.org/language/regexes
*/
bool LexerRaku::ProcessValidRegQlangStart(StyleContext &sc, Sci_Position length, const int type,
WordList &wordsAdverbs, DelimPair &dp) {
Sci_Position startPos = sc.currentPos;
Sci_Position startLen = length;
const int target_state = sc.state;
int state = SCE_RAKU_DEFAULT;
std::string str;
// find our opening delimiter (and occurrences) / save any adverbs
dp.opener = 0; // adverbs can be after the first delimiter
bool got_all_adverbs = false; // in Regex statements
bool got_ident = false; // regex can have an identifier: 'regex R'
sc.SetState(state); // set state default to avoid pre-highlights
while ((dp.opener == 0 || !got_all_adverbs) && sc.More()) {
// move to the next non-space character
const bool was_space = IsAWhitespace(sc.ch);
if (!got_all_adverbs && was_space) {
sc.Forward(LengthToNextChar(sc, length));
}
length = startLen - (sc.currentPos - startPos); // update length remaining
// parse / eat an identifier (if type == RAKUTYPE_REGEX)
if (dp.opener == 0 && !got_ident && type == RAKUTYPE_REGEX && IsAlphabet(sc.ch)) {
// eat identifier / account for special adverb :sym<name>
bool got_sym = false;
while (sc.More()) {
sc.SetState(SCE_RAKU_IDENTIFIER);
while (sc.More() && (IsAlphaNumeric(sc.chNext)
|| sc.chNext == '_' || sc.chNext == '-')) {
sc.Forward();
}
sc.Forward();
if (got_sym && sc.ch == '>') {
sc.SetState(SCE_RAKU_OPERATOR); // '>'
sc.Forward();
break;
} else if (type == RAKUTYPE_REGEX && sc.Match(":sym<")) {
sc.SetState(SCE_RAKU_ADVERB); // ':sym'
sc.Forward(4);
sc.SetState(SCE_RAKU_OPERATOR); // '<'
sc.Forward();
got_sym = true;
} else {
break;
}
}
sc.SetState(state);
got_ident = true;
}
// parse / save an adverb: RAKUTYPE_REGEX only has adverbs after delim
// >= RAKUTYPE_QLANG only has adverbs before delim
else if (!got_all_adverbs && sc.ch == ':' && (!(dp.opener == 0 && got_ident)
&& !(dp.opener > 0 && type >= RAKUTYPE_QLANG))) {
sc.SetState(SCE_RAKU_ADVERB);
while (IsAlphaNumeric(sc.chNext) && sc.More()) {
sc.Forward();
str += sc.ch;
}
str += ' ';
sc.Forward();
sc.SetState(state);
}
// find starting delimiter
else if (dp.opener == 0 && (was_space || IsValidRegOrQAdjacent(sc.ch))
&& IsValidDelimiter(sc.ch)) { // make sure the delimiter is legal (most are)
sc.SetState((state = target_state));// start state here...
dp.opener = sc.ch; // this is our delimiter, get count
if (type < RAKUTYPE_QLANG) // type is Regex
dp.count = 1; // has only one delimiter
else
dp.count = GetRepeatCharCount(sc, dp.opener, length);
sc.Forward(dp.count);
}
// we must have all the adverbs by now...
else {
if (got_all_adverbs)
break; // prevent infinite loop: occurs on missing open char
got_all_adverbs = true;
}
}
// set word list / find a valid closing delimiter (or bomb!)
wordsAdverbs.Set(str.c_str());
dp.closer[0] = GetDelimiterCloseChar(dp.opener);
dp.closer[1] = 0; // no other closer char
return dp.closer[0] > 0;
}
/*
* LexerRaku::LengthToNonWordChar
* - returns the length until the next non "word" character: AlphaNum + '_'
* - also sets all the parsed chars in 's'
*/
Sci_Position LexerRaku::LengthToNonWordChar(StyleContext &sc, Sci_Position length,
char *s, const int size, Sci_Position offset) {
Sci_Position len = 0;
Sci_Position max_length = size < length ? size : length;
while (len <= max_length) {
const int ch = sc.GetRelativeCharacter(len + offset);
if (!IsWordChar(ch)) {
s[len] = '\0';
break;
}
s[len] = ch;
len++;
}
s[len + 1] = '\0';
return len;
}
/*
* LexerRaku::Lex
* - Main lexer method
*/
void SCI_METHOD LexerRaku::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
LexAccessor styler(pAccess);
DelimPair dpEmbeded; // delimiter pair: embedded comments
DelimPair dpString; // delimiter pair: string
DelimPair dpRegQ; // delimiter pair: Regex / Q Lang
std::string hereDelim; // heredoc delimiter (if in heredoc)
int hereState = 0; // heredoc state to use (Q / QQ)
int numState = 0; // number state / type
short cntDecimal = 0; // number decimal count
std::string wordLast; // last word seen
std::string identLast; // last identifier seen
std::string adverbLast; // last (single) adverb seen
WordList lastAdverbs; // last adverbs seen
Sci_Position len; // temp length value
char s[100]; // temp char string
int typeDetect = -1; // temp type detected (for regex and Q lang)
Sci_Position lengthToEnd; // length until the end of range
// Backtrack to safe start position before complex quoted elements
Sci_PositionU newStartPos = startPos;
if (initStyle != SCE_RAKU_DEFAULT) {
// Backtrack to last SCE_RAKU_DEFAULT or 0
while (newStartPos > 0) {
newStartPos--;
if (styler.StyleAt(newStartPos) == SCE_RAKU_DEFAULT)
break;
}
// Backtrack to start of line before SCE_RAKU_HEREDOC_Q?
if (initStyle == SCE_RAKU_HEREDOC_Q || initStyle == SCE_RAKU_HEREDOC_QQ) {
if (newStartPos > 0) {
newStartPos = styler.LineStart(styler.GetLine(newStartPos));
}
}
} else {
const Sci_Position line = styler.GetLine(newStartPos);
if (line > 0) {
// If the previous line is a start of a q or qq heredoc, backtrack to start of line
const Sci_Position startPreviousLine = styler.LineStart(line-1);
if (ContainsQTo(startPreviousLine, newStartPos, styler)) {
newStartPos = startPreviousLine;
}
}
}
// Re-calculate (any) changed startPos, length and initStyle state
if (newStartPos < startPos) {
initStyle = SCE_RAKU_DEFAULT;
length += startPos - newStartPos;
startPos = newStartPos;
}
// init StyleContext
StyleContext sc(startPos, length, initStyle, styler);
// StyleContext Loop
for (; sc.More(); sc.Forward()) {
lengthToEnd = (length - (sc.currentPos - startPos)); // end of range
/* *** Determine if the current state should terminate ************** *
* Everything within the 'switch' statement processes characters up
* until the end of a syntax highlight section / state.
* ****************************************************************** */
switch (sc.state) {
case SCE_RAKU_OPERATOR:
sc.SetState(SCE_RAKU_DEFAULT);
break; // FIXME: better valid operator sequences needed?
case SCE_RAKU_COMMENTLINE:
if (IsANewLine(sc.ch)) {
sc.SetState(SCE_RAKU_DEFAULT);
}
break;
case SCE_RAKU_COMMENTEMBED:
if ((len = LengthToDelimiter(sc, dpEmbeded, lengthToEnd)) >= 0) {
sc.Forward(len); // Move to end delimiter
sc.SetState(SCE_RAKU_DEFAULT);
} else {
sc.Forward(lengthToEnd); // no end delimiter found
}
break;
case SCE_RAKU_POD:
if (sc.atLineStart && sc.Match("=end pod")) {
sc.Forward(8);
sc.SetState(SCE_RAKU_DEFAULT);
}
break;
case SCE_RAKU_STRING:
// Process the string for variables: move to end delimiter
if ((len = LengthToDelimiter(sc, dpString, lengthToEnd)) >= 0) {
if (dpString.interpol) {
ProcessStringVars(sc, len, SCE_RAKU_STRING_VAR);
} else {
sc.Forward(len);
}
sc.SetState(SCE_RAKU_DEFAULT);
} else {
sc.Forward(lengthToEnd); // no end delimiter found
}
break;
case SCE_RAKU_STRING_Q:
case SCE_RAKU_STRING_QQ:
case SCE_RAKU_STRING_Q_LANG:
// No string: previous char was the delimiter
if (dpRegQ.count == 1 && dpRegQ.isCloser(sc.chPrev)) {
sc.SetState(SCE_RAKU_DEFAULT);
}
// Process the string for variables: move to end delimiter
else if ((len = LengthToDelimiter(sc, dpRegQ, lengthToEnd)) >= 0) {
// set (any) heredoc delimiter string
if (lastAdverbs.InList("to")) {
GetRelativeString(sc, -1, len - dpRegQ.count, hereDelim);
hereState = SCE_RAKU_HEREDOC_Q; // default heredoc state
}
// select variable identifiers
if (sc.state == SCE_RAKU_STRING_QQ || lastAdverbs.InList("qq")) {
ProcessStringVars(sc, len, SCE_RAKU_STRING_VAR);
hereState = SCE_RAKU_HEREDOC_QQ; // potential heredoc state
} else {
sc.Forward(len);
}
sc.SetState(SCE_RAKU_DEFAULT);
} else {
sc.Forward(lengthToEnd); // no end delimiter found
}
break;
case SCE_RAKU_HEREDOC_Q:
case SCE_RAKU_HEREDOC_QQ:
if ((len = LengthToEndHeredoc(sc, styler, lengthToEnd, hereDelim.c_str())) >= 0) {
// select variable identifiers
if (sc.state == SCE_RAKU_HEREDOC_QQ) {
ProcessStringVars(sc, len, SCE_RAKU_STRING_VAR);
} else {
sc.Forward(len);
}
sc.SetState(SCE_RAKU_DEFAULT);
} else {
sc.Forward(lengthToEnd); // no end delimiter found
}
hereDelim.clear(); // clear heredoc delimiter
break;
case SCE_RAKU_REGEX:
// account for typeDetect = RAKUTYPE_REGEX_S/TR/Y
while (sc.state == SCE_RAKU_REGEX) {
// No string: previous char was the delimiter
if (dpRegQ.count == 1 && dpRegQ.isCloser(sc.chPrev)) {
if (ProcessRegexTwinCapture(sc, lengthToEnd, typeDetect, dpRegQ))
continue;
sc.SetState(SCE_RAKU_DEFAULT);
break;
}
// Process the string for variables: move to end delimiter
else if ((len = LengthToDelimiter(sc, dpRegQ, lengthToEnd)) >= 0) {
ProcessStringVars(sc, len, SCE_RAKU_REGEX_VAR);
if (ProcessRegexTwinCapture(sc, lengthToEnd, typeDetect, dpRegQ))
continue;
sc.SetState(SCE_RAKU_DEFAULT);
break;
} else {
sc.Forward(lengthToEnd); // no end delimiter found
break;
}
}
break;
case SCE_RAKU_NUMBER:
if (sc.ch == '.') {
if (sc.chNext == '.') { // '..' is an operator
sc.SetState(SCE_RAKU_OPERATOR);
sc.Forward();
if (sc.chNext == '.') // '...' is also an operator
sc.Forward();
break;
} else if (numState > RAKUNUM_FLOAT_EXP
&& (cntDecimal < 1 || numState == RAKUNUM_VERSION)) {
cntDecimal++;
sc.Forward();
} else {
sc.SetState(SCE_RAKU_DEFAULT);
break; // too many decimal places
}
}
switch (numState) {
case RAKUNUM_BINARY:
if (!IsNumberChar(sc.ch, 2))
sc.SetState(SCE_RAKU_DEFAULT);
break;
case RAKUNUM_OCTAL:
if (!IsNumberChar(sc.ch, 8))
sc.SetState(SCE_RAKU_DEFAULT);
break;
case RAKUNUM_HEX:
if (!IsNumberChar(sc.ch, 16))
sc.SetState(SCE_RAKU_DEFAULT);
break;
case RAKUNUM_DECIMAL:
case RAKUNUM_VERSION:
if (!IsNumberChar(sc.ch))
sc.SetState(SCE_RAKU_DEFAULT);
}
break;
case SCE_RAKU_WORD:
case SCE_RAKU_FUNCTION:
case SCE_RAKU_TYPEDEF:
case SCE_RAKU_ADVERB:
sc.SetState(SCE_RAKU_DEFAULT);
break;
case SCE_RAKU_MU:
case SCE_RAKU_POSITIONAL:
case SCE_RAKU_ASSOCIATIVE:
case SCE_RAKU_CALLABLE:
case SCE_RAKU_IDENTIFIER:
case SCE_RAKU_GRAMMAR:
case SCE_RAKU_CLASS:
sc.SetState(SCE_RAKU_DEFAULT);
break;
}
/* *** Determine if a new state should be entered ******************* *
* Everything below here identifies the beginning of a state, all or part
* of the characters within this state are processed here, the rest are
* completed above in the terminate state section.
* ****************************************************************** */
if (sc.state == SCE_RAKU_DEFAULT) {
// --- Single line comment
if (sc.ch == '#') {
sc.SetState(SCE_RAKU_COMMENTLINE);
}
// --- POD block
else if (sc.atLineStart && sc.Match("=begin pod")) {
sc.SetState(SCE_RAKU_POD);
sc.Forward(10);
}
// --- String (normal)
else if (sc.chPrev != '\\' && (IsValidQuoteOpener(sc.ch, dpString, RAKUDELIM_QUOTE))) {
sc.SetState(SCE_RAKU_STRING);
}
// --- String (Q Language) ----------------------------------------
// - https://docs.raku.org/language/quoting
// - Q :adverb :adverb //;
// - q,qx,qw,qq,qqx,qqw,qqww :adverb :adverb //;
else if (IsQLangStartAtScPos(sc, typeDetect, lengthToEnd)) {
int state = SCE_RAKU_STRING_Q_LANG;
Sci_Position forward = 1; // single char ident (default)
if (typeDetect > RAKUTYPE_QLANG) {
state = SCE_RAKU_STRING_Q;
if (typeDetect == RAKUTYPE_STR_WQ)
forward = 0; // no char ident
}
if (typeDetect > RAKUTYPE_STR_Q) {
if (typeDetect == RAKUTYPE_STR_QQ)
state = SCE_RAKU_STRING_QQ;
forward++; // two char ident
}
if (typeDetect > RAKUTYPE_STR_QQ)
forward++; // three char ident
if (typeDetect == RAKUTYPE_STR_QQWW)
forward++; // four char ident
// Proceed: check for a valid character after statement
if (IsValidRegOrQAdjacent(sc.GetRelative(forward)) || typeDetect == RAKUTYPE_QLANG) {
sc.SetState(state);
sc.Forward(forward);
lastAdverbs.Clear();
// Process: adverbs / opening delimiter / adverbs after delim
if (ProcessValidRegQlangStart(sc, lengthToEnd, typeDetect,
lastAdverbs, dpRegQ))
sc.SetState(state);
}
}
// --- Regex (rx/s/m/tr/y) ----------------------------------------
// - https://docs.raku.org/language/regexes
else if ((IsRegexStartAtScPos(sc, typeDetect, setOperator) || regexIdent.InList(wordLast.c_str()))) {
if (typeDetect == -1) { // must be a regex identifier word
wordLast.clear();
typeDetect = RAKUTYPE_REGEX;
}
Sci_Position forward = 0; // no ident (RAKUTYPE_REGEX, RAKUTYPE_REGEX_NORM)
if (typeDetect > 0 && typeDetect != RAKUTYPE_REGEX)
forward++; // single char ident
if (typeDetect > RAKUTYPE_REGEX)
forward++; // two char ident
// Proceed: check for a valid character after statement
if (IsValidRegOrQAdjacent(sc.GetRelative(forward)) || typeDetect == RAKUTYPE_REGEX_NORM) {
sc.SetState(SCE_RAKU_REGEX);
sc.Forward(forward);
lastAdverbs.Clear();
// Process: adverbs / opening delimiter / adverbs after delim
if (ProcessValidRegQlangStart(sc, lengthToEnd, typeDetect,
lastAdverbs, dpRegQ))
sc.SetState(SCE_RAKU_REGEX);
}
}
// --- Numbers ----------------------------------------------------
else if (IsValidIdentPrecede(sc.chPrev) && (IsNumberChar(sc.ch)
|| (sc.ch == 'v' && IsNumberChar(sc.chNext) && wordLast == "use"))) {
numState = RAKUNUM_DECIMAL; // default: decimal (base 10)
cntDecimal = 0;
sc.SetState(SCE_RAKU_NUMBER);
if (sc.ch == 'v') // forward past 'v'
sc.Forward();
if (wordLast == "use") { // package version number
numState = RAKUNUM_VERSION;
} else if (sc.ch == '0') { // other type of number
switch (sc.chNext) {
case 'b': // binary (base 2)
numState = RAKUNUM_BINARY;
break;
case 'o': // octal (base 8)
numState = RAKUNUM_OCTAL;
break;
case 'x': // hexadecimal (base 16)
numState = RAKUNUM_HEX;
}
if (numState != RAKUNUM_DECIMAL)
sc.Forward(); // forward to number type char
}
}
// --- Keywords / functions / types / barewords -------------------
else if ((sc.currentPos == 0 || sc.atLineStart || IsValidIdentPrecede(sc.chPrev))
&& IsWordStartChar(sc.ch)) {
len = LengthToNonWordChar(sc, lengthToEnd, s, sizeof(s));
if (keywords.InList(s)) {
sc.SetState(SCE_RAKU_WORD); // Keywords
} else if(functions.InList(s)) {
sc.SetState(SCE_RAKU_FUNCTION); // Functions
} else if(typesBasic.InList(s)) {
sc.SetState(SCE_RAKU_TYPEDEF); // Types (basic)
} else if(typesComposite.InList(s)) {
sc.SetState(SCE_RAKU_TYPEDEF); // Types (composite)
} else if(typesDomainSpecific.InList(s)) {
sc.SetState(SCE_RAKU_TYPEDEF); // Types (domain-specific)
} else if(typesExceptions.InList(s)) {
sc.SetState(SCE_RAKU_TYPEDEF); // Types (exceptions)
} else {
if (wordLast == "class")
sc.SetState(SCE_RAKU_CLASS); // a Class ident
else if (wordLast == "grammar")
sc.SetState(SCE_RAKU_GRAMMAR); // a Grammar ident
else
sc.SetState(SCE_RAKU_IDENTIFIER); // Bareword
identLast = s; // save identifier
}
if (adverbLast == "sym") { // special adverb ":sym"
sc.SetState(SCE_RAKU_IDENTIFIER); // treat as identifier
identLast = s; // save identifier
}
if (sc.state != SCE_RAKU_IDENTIFIER)
wordLast = s; // save word
sc.Forward(len - 1); // ...forward past word
}
// --- Adverbs ----------------------------------------------------
else if (sc.ch == ':' && IsWordStartChar(sc.chNext)) {
len = LengthToNonWordChar(sc, lengthToEnd, s, sizeof(s), 1);
if (adverbs.InList(s)) {
sc.SetState(SCE_RAKU_ADVERB); // Adverbs (begin with ':')
adverbLast = s; // save word
sc.Forward(len); // ...forward past word (less offset: 1)
}
}
// --- Identifiers: $mu / @positional / %associative / &callable --
// see: https://docs.raku.org/language/variables
else if (setSigil.Contains(sc.ch) && (setTwigil.Contains(sc.chNext)
|| setSpecialVar.Contains(sc.chNext)
|| IsWordStartChar(sc.chNext))) {
// State based on sigil
switch (sc.ch) {
case '$': sc.SetState(SCE_RAKU_MU);
break;
case '@': sc.SetState(SCE_RAKU_POSITIONAL);
break;
case '%': sc.SetState(SCE_RAKU_ASSOCIATIVE);
break;
case '&': sc.SetState(SCE_RAKU_CALLABLE);
}
const int state = sc.state;
sc.Forward();
char ch_delim = 0;
if (setSpecialVar.Contains(sc.ch)
&& !setWord.Contains(sc.chNext)) { // Process Special Var
ch_delim = -1;
} else if (setTwigil.Contains(sc.ch)) { // Process Twigil
sc.SetState(SCE_RAKU_OPERATOR);
if (sc.ch == '<' && setWord.Contains(sc.chNext))
ch_delim = '>';
sc.Forward();
sc.SetState(state);
}
// Process (any) identifier
if (ch_delim >= 0) {
sc.Forward(LengthToNonWordChar(sc, lengthToEnd, s, sizeof(s)) - 1);
if (ch_delim > 0 && sc.chNext == ch_delim) {
sc.Forward();
sc.SetState(SCE_RAKU_OPERATOR);
}
identLast = s; // save identifier
}
}
// --- Operators --------------------------------------------------
else if (IsOperatorChar(sc.ch)) {
// FIXME: better valid operator sequences needed?
sc.SetState(SCE_RAKU_OPERATOR);
}
// --- Heredoc: begin ---------------------------------------------
else if (!hereDelim.empty() && sc.atLineEnd) {
if (IsANewLine(sc.ch))
sc.Forward(); // skip a possible CRLF situation
sc.SetState(hereState);
}
// Reset words: on operator semi-colon OR '}' (end of statement)
if (sc.state == SCE_RAKU_OPERATOR && (sc.ch == ';' || sc.ch == '}')) {
wordLast.clear();
identLast.clear();
adverbLast.clear();
}
}
/* *** Determine if an "embedded comment" is to be entered ********** *
* This type of embedded comment section, or multi-line comment comes
* after a normal comment has begun... e.g: #`[ ... ]
* ****************************************************************** */
else if (sc.state == SCE_RAKU_COMMENTLINE && sc.chPrev == '#' && sc.ch == '`') {
if (IsBracketOpenChar(sc.chNext)) {
sc.Forward(); // Condition met for "embedded comment"
dpEmbeded.opener = sc.ch;
// Find the opposite (termination) closing bracket (if any)
dpEmbeded.closer[0] = GetBracketCloseChar(dpEmbeded.opener);
if (dpEmbeded.closer[0] > 0) { // Enter "embedded comment"
// Find multiple opening character occurrence
dpEmbeded.count = GetRepeatCharCount(sc, dpEmbeded.opener, lengthToEnd);
sc.SetState(SCE_RAKU_COMMENTEMBED);
sc.Forward(dpEmbeded.count - 1); // incremented in the next loop
}
}
}
}
// And we're done...
sc.Complete();
}
/*
* LexerRaku::Lex
* - Main fold method
* NOTE: although Raku uses and supports UNICODE characters, we're only looking
* at normal chars here, using 'SafeGetCharAt' - for folding purposes
* that is all we need.
*/
#define RAKU_HEADFOLD_SHIFT 4
#define RAKU_HEADFOLD_MASK 0xF0
void SCI_METHOD LexerRaku::Fold(Sci_PositionU startPos, Sci_Position length, int /* initStyle */, IDocument *pAccess) {
// init LexAccessor / return if fold option is off
if (!options.fold) return;
LexAccessor styler(pAccess);
// init char and line positions
const Sci_PositionU endPos = startPos + length;
Sci_Position lineCurrent = styler.GetLine(startPos);
// Backtrack to last SCE_RAKU_DEFAULT line
if (startPos > 0 && lineCurrent > 0) {
while (lineCurrent > 0 && styler.StyleAt(startPos) != SCE_RAKU_DEFAULT) {
lineCurrent--;
startPos = styler.LineStart(lineCurrent);
}
lineCurrent = styler.GetLine(startPos);
}
Sci_PositionU lineStart = startPos;
Sci_PositionU lineStartNext = styler.LineStart(lineCurrent + 1);
// init line folding level
int levelPrev = SC_FOLDLEVELBASE;
if (lineCurrent > 0)
levelPrev = styler.LevelAt(lineCurrent - 1) >> 16;
int levelCurrent = levelPrev;
// init char and style variables
char chNext = styler[startPos];
int stylePrev = styler.StyleAt(startPos - 1);
int styleNext = styler.StyleAt(startPos);
int styleNextStartLine = styler.StyleAt(lineStartNext);
int visibleChars = 0;
bool wasCommentMulti = false;
// main loop
for (Sci_PositionU i = startPos; i < endPos; i++) {
// next char, style and flags
const char ch = chNext;
chNext = styler.SafeGetCharAt(i + 1);
const int style = styleNext;
styleNext = styler.StyleAt(i + 1);
const bool atEOL = i == (lineStartNext - 1);
const bool atLineStart = i == lineStart;
// --- Comments / Multi-line / POD ------------------------------------
if (options.foldComment) {
// Multi-line
if (options.foldCommentMultiline) {
if (style == SCE_RAKU_COMMENTLINE && atLineStart && ch == '#' && chNext == '`'
&& styleNextStartLine == SCE_RAKU_COMMENTEMBED) {
levelCurrent++;
wasCommentMulti = true; // don't confuse line comments
} else if (style == SCE_RAKU_COMMENTEMBED && atLineStart
&& styleNextStartLine != SCE_RAKU_COMMENTEMBED) {
levelCurrent--;
}
}
// Line comments
if (!wasCommentMulti && atEOL && stylePrev == SCE_RAKU_COMMENTLINE
&& IsCommentLine(lineCurrent, styler)) {
if (!IsCommentLine(lineCurrent - 1, styler)
&& IsCommentLine(lineCurrent + 1, styler))
levelCurrent++;
else if (IsCommentLine(lineCurrent - 1, styler)
&& !IsCommentLine(lineCurrent + 1, styler))
levelCurrent--;
}
// POD
if (options.foldCommentPOD && atLineStart && style == SCE_RAKU_POD) {
if (styler.Match(i, "=begin"))
levelCurrent++;
else if (styler.Match(i, "=end"))
levelCurrent--;
}
}
// --- Code block -----------------------------------------------------
if (style == SCE_RAKU_OPERATOR) {
if (ch == '{') {
if (levelCurrent < levelPrev) levelPrev--;
levelCurrent++;
} else if (ch == '}') {
levelCurrent--;
}
}
// --- at end of line / range / apply fold ----------------------------
if (atEOL) {
int level = levelPrev;
// set level flags
level |= levelCurrent << 16;
if (visibleChars == 0 && options.foldCompact)
level |= SC_FOLDLEVELWHITEFLAG;
if ((levelCurrent > levelPrev) && (visibleChars > 0))
level |= SC_FOLDLEVELHEADERFLAG;
if (level != styler.LevelAt(lineCurrent)) {
styler.SetLevel(lineCurrent, level);
}
lineCurrent++;
lineStart = lineStartNext;
lineStartNext = styler.LineStart(lineCurrent + 1);
styleNextStartLine = styler.StyleAt(lineStartNext);
levelPrev = levelCurrent;
visibleChars = 0;
wasCommentMulti = false;
}
// increment visibleChars / set previous char
if (!isspacechar(ch))
visibleChars++;
stylePrev = style;
}
// Done: set real level of the next line
int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
styler.SetLevel(lineCurrent, levelPrev | flagsNext);
}
/*----------------------------------------------------------------------------*
* --- Scintilla: LexerModule ---
*----------------------------------------------------------------------------*/
extern const LexerModule lmRaku(SCLEX_RAKU, LexerRaku::LexerFactoryRaku, "raku", rakuWordLists);