mirror of
https://github.com/notepad-plus-plus/notepad-plus-plus.git
synced 2025-04-08 17:15:37 +02:00
Release 5.5.2 ( https://www.scintilla.org/scintilla552.zip ) Released 21 August 2024. Add SCI_SETCOPYSEPARATOR for separator between parts of a multiple selection when copied to the clipboard. Feature #1530. Add SCI_GETUNDOSEQUENCE to determine whether an undo sequence is active and its nesting depth. Add SCI_STYLESETSTRETCH to support condensed and expanded text styles. Add SCI_LINEINDENT and SCI_LINEDEDENT. Feature #1524. Fix bug on Cocoa where double-click stopped working when system had been running for a long time. On Cocoa implement more values of font weight and stretch. Release 5.4.0 ( https://www.scintilla.org/lexilla540.zip ) Released 21 August 2024. Inside Lexilla, LexerModule instances are now const. This will require changes to applications that modify Lexilla.cxx, which may be done to add custom lexers. Lexer added for TOML "toml". Bash: Handle backslash in heredoc delimiter. Issue #257. Progress: Fix lexing of nested comments. Pull request #258. Force lower-casing of case-insensitive keyword lists so keywords match in some lexers. Issue #259. Close #15564
1635 lines
50 KiB
C++
1635 lines
50 KiB
C++
/** @file LexRaku.cxx
|
|
** Lexer for Raku
|
|
**
|
|
** Copyright (c) 2019 Mark Reay <mark@reay.net.au>
|
|
**/
|
|
// Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
|
|
// The License.txt file describes the conditions under which this software may be distributed.
|
|
|
|
/*
|
|
* Raku (Perl6) Lexer for Scintilla
|
|
* ---------------------------------
|
|
* ---------------------------------
|
|
* 06-Dec-2019: More Unicode support:
|
|
* - Added a full scope of allowed numbers and letters
|
|
* 29-Nov-2019: More highlighting / implemented basic folding:
|
|
* - Operators (blanket cover, no sequence checking)
|
|
* - Class / Grammar name highlighting
|
|
* - Folding:
|
|
* - Comments: line / multi-line
|
|
* - POD sections
|
|
* - Code blocks {}
|
|
* 26-Nov-2019: Basic syntax highlighting covering the following:
|
|
* - Comments, both line and embedded (multi-line)
|
|
* - POD, no inline highlighting as yet...
|
|
* - Heredoc block string, with variable highlighting (with qq)
|
|
* - Strings, with variable highlighting (with ")
|
|
* - Q Language, including adverbs (also basic q and qq)
|
|
* - Regex, including adverbs
|
|
* - Numbers
|
|
* - Bareword / identifiers
|
|
* - Types
|
|
* - Variables: mu, positional, associative, callable
|
|
* TODO:
|
|
* - POD inline
|
|
* - Better operator sequence coverage
|
|
*/
|
|
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <stdio.h>
|
|
#include <stdarg.h>
|
|
#include <assert.h>
|
|
#include <ctype.h>
|
|
|
|
#include <string>
|
|
#include <string_view>
|
|
#include <vector>
|
|
#include <map>
|
|
#include <functional>
|
|
|
|
#include "ILexer.h"
|
|
#include "Scintilla.h"
|
|
#include "SciLexer.h"
|
|
|
|
#include "WordList.h"
|
|
#include "LexAccessor.h"
|
|
#include "StyleContext.h"
|
|
#include "CharacterSet.h"
|
|
#include "CharacterCategory.h"
|
|
#include "LexerModule.h"
|
|
#include "OptionSet.h"
|
|
#include "DefaultLexer.h"
|
|
|
|
using namespace Scintilla;
|
|
using namespace Lexilla;
|
|
|
|
namespace { // anonymous namespace to isolate any name clashes
|
|
/*----------------------------------------------------------------------------*
|
|
* --- DEFINITIONS: OPTIONS / CONSTANTS ---
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
// Number types
|
|
#define RAKUNUM_BINARY 1 // order is significant: 1-3 cannot have a dot
|
|
#define RAKUNUM_OCTAL 2
|
|
#define RAKUNUM_FLOAT_EXP 3 // exponent part only
|
|
#define RAKUNUM_HEX 4 // may be a hex float
|
|
#define RAKUNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings
|
|
#define RAKUNUM_VECTOR 6
|
|
#define RAKUNUM_V_VECTOR 7
|
|
#define RAKUNUM_VERSION 8 // can contain multiple '.'s
|
|
#define RAKUNUM_BAD 9
|
|
|
|
// Regex / Q string types
|
|
#define RAKUTYPE_REGEX_NORM 0 // 0 char ident
|
|
#define RAKUTYPE_REGEX_S 1 // order is significant:
|
|
#define RAKUTYPE_REGEX_M 2 // 1 char ident
|
|
#define RAKUTYPE_REGEX_Y 3 // 1 char ident
|
|
#define RAKUTYPE_REGEX 4 // > RAKUTYPE_REGEX == 2 char identifiers
|
|
#define RAKUTYPE_REGEX_RX 5 // 2 char ident
|
|
#define RAKUTYPE_REGEX_TR 6 // 2 char ident
|
|
#define RAKUTYPE_QLANG 7 // < RAKUTYPE_QLANG == RAKUTYPE_REGEX_?
|
|
#define RAKUTYPE_STR_WQ 8 // 0 char ident < word quote >
|
|
#define RAKUTYPE_STR_Q 9 // 1 char ident
|
|
#define RAKUTYPE_STR_QX 10 // 2 char ident
|
|
#define RAKUTYPE_STR_QW 11 // 2 char ident
|
|
#define RAKUTYPE_STR_QQ 12 // 2 char ident
|
|
#define RAKUTYPE_STR_QQX 13 // 3 char ident
|
|
#define RAKUTYPE_STR_QQW 14 // 3 char ident
|
|
#define RAKUTYPE_STR_QQWW 15 // 4 char ident
|
|
|
|
// Delimiter types
|
|
#define RAKUDELIM_BRACKET 0 // bracket: regex, Q language
|
|
#define RAKUDELIM_QUOTE 1 // quote: normal string
|
|
|
|
// rakuWordLists: keywords as defined in config
|
|
const char *const rakuWordLists[] = {
|
|
"Keywords and identifiers",
|
|
"Functions",
|
|
"Types basic",
|
|
"Types composite",
|
|
"Types domain-specific",
|
|
"Types exception",
|
|
"Adverbs",
|
|
nullptr,
|
|
};
|
|
|
|
// Options and defaults
|
|
struct OptionsRaku {
|
|
bool fold;
|
|
bool foldCompact;
|
|
bool foldComment;
|
|
bool foldCommentMultiline;
|
|
bool foldCommentPOD;
|
|
OptionsRaku() {
|
|
fold = true;
|
|
foldCompact = false;
|
|
foldComment = true;
|
|
foldCommentMultiline = true;
|
|
foldCommentPOD = true;
|
|
}
|
|
};
|
|
|
|
// init options and words
|
|
struct OptionSetRaku : public OptionSet<OptionsRaku> {
|
|
OptionSetRaku() {
|
|
DefineProperty("fold", &OptionsRaku::fold);
|
|
DefineProperty("fold.comment", &OptionsRaku::foldComment);
|
|
DefineProperty("fold.compact", &OptionsRaku::foldCompact);
|
|
|
|
DefineProperty("fold.raku.comment.multiline", &OptionsRaku::foldCommentMultiline,
|
|
"Set this property to 0 to disable folding multi-line comments when fold.comment=1.");
|
|
DefineProperty("fold.raku.comment.pod", &OptionsRaku::foldCommentPOD,
|
|
"Set this property to 0 to disable folding POD comments when fold.comment=1.");
|
|
|
|
// init word lists
|
|
DefineWordListSets(rakuWordLists);
|
|
}
|
|
};
|
|
|
|
// Delimiter pair
|
|
struct DelimPair {
|
|
int opener; // opener char
|
|
int closer[2]; // closer chars
|
|
bool interpol; // can variables be interpolated?
|
|
short count; // delimiter char count
|
|
DelimPair() {
|
|
opener = 0;
|
|
closer[0] = 0;
|
|
closer[1] = 0;
|
|
interpol = false;
|
|
count = 0;
|
|
}
|
|
bool isCloser(int ch) const {
|
|
return ch == closer[0] || ch == closer[1];
|
|
}
|
|
};
|
|
|
|
/*----------------------------------------------------------------------------*
|
|
* --- FUNCTIONS ---
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
/*
|
|
* IsANewLine
|
|
* - returns true if this is a new line char
|
|
*/
|
|
constexpr bool IsANewLine(int ch) noexcept {
|
|
return ch == '\r' || ch == '\n';
|
|
}
|
|
|
|
/*
|
|
* IsAWhitespace
|
|
* - returns true if this is a whitespace (or newline) char
|
|
*/
|
|
bool IsAWhitespace(int ch) noexcept {
|
|
return IsASpaceOrTab(ch) || IsANewLine(ch);
|
|
}
|
|
|
|
/*
|
|
* IsAlphabet
|
|
* - returns true if this is an alphabetical char
|
|
*/
|
|
constexpr bool IsAlphabet(int ch) noexcept {
|
|
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
|
|
}
|
|
|
|
/*
|
|
* IsCommentLine
|
|
* - returns true if this is a comment line
|
|
* - tests: SCE_RAKU_COMMENTLINE or SCE_RAKU_COMMENTEMBED
|
|
* modified from: LexPerl.cxx
|
|
*/
|
|
bool IsCommentLine(Sci_Position line, LexAccessor &styler, int type = SCE_RAKU_COMMENTLINE) {
|
|
Sci_Position pos = styler.LineStart(line);
|
|
Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
|
|
for (Sci_Position i = pos; i < eol_pos; i++) {
|
|
char ch = styler[i];
|
|
int style = styler.StyleAt(i);
|
|
if (type == SCE_RAKU_COMMENTEMBED) {
|
|
if (i == (eol_pos - 1) && style == type)
|
|
return true;
|
|
} else { // make sure the line is NOT a SCE_RAKU_COMMENTEMBED
|
|
if (ch == '#' && style == type && styler[i+1] != '`' )
|
|
return true;
|
|
else if (!IsASpaceOrTab(ch))
|
|
return false;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* ContainsQTo
|
|
* - returns true if this range contains ":to" in style SCE_RAKU_ADVERB indicating the start
|
|
* of a SCE_RAKU_HEREDOC_Q or SCE_RAKU_HEREDOC_QQ.
|
|
*/
|
|
bool ContainsQTo(Sci_Position start, Sci_Position end, LexAccessor &styler) {
|
|
std::string adverb;
|
|
for (Sci_Position i = start; i < end; i++) {
|
|
if (styler.StyleAt(i) == SCE_RAKU_ADVERB) {
|
|
adverb.push_back(styler[i]);
|
|
}
|
|
}
|
|
return adverb.find(":to") != std::string::npos;
|
|
}
|
|
|
|
/*
|
|
* GetBracketCloseChar
|
|
* - returns the end bracket char: opposite of start
|
|
* - see: http://www.unicode.org/Public/5.1.0/ucd/BidiMirroring.txt (first section)
|
|
* - Categories are general matches for valid BiDi types
|
|
* - Most closer chars are opener + 1
|
|
*/
|
|
int GetBracketCloseChar(const int ch) noexcept {
|
|
const CharacterCategory cc = CategoriseCharacter(ch);
|
|
switch (cc) {
|
|
case ccSm:
|
|
switch (ch) {
|
|
case 0x3C: return 0x3E; // LESS-THAN SIGN
|
|
case 0x2208: return 0x220B; // ELEMENT OF
|
|
case 0x2209: return 0x220C; // NOT AN ELEMENT OF
|
|
case 0x220A: return 0x220D; // SMALL ELEMENT OF
|
|
case 0x2215: return 0x29F5; // DIVISION SLASH
|
|
case 0x2243: return 0x22CD; // ASYMPTOTICALLY EQUAL TO
|
|
case 0x2298: return 0x29B8; // CIRCLED DIVISION SLASH
|
|
case 0x22A6: return 0x2ADE; // ASSERTION
|
|
case 0x22A8: return 0x2AE4; // TRUE
|
|
case 0x22A9: return 0x2AE3; // FORCES
|
|
case 0x22AB: return 0x2AE5; // DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE
|
|
case 0x22F2: return 0x22FA; // ELEMENT OF WITH LONG HORIZONTAL STROKE
|
|
case 0x22F3: return 0x22FB; // ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE
|
|
case 0x22F4: return 0x22FC; // SMALL ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE
|
|
case 0x22F6: return 0x22FD; // ELEMENT OF WITH OVERBAR
|
|
case 0x22F7: return 0x22FE; // SMALL ELEMENT OF WITH OVERBAR
|
|
case 0xFF1C: return 0xFF1E; // FULLWIDTH LESS-THAN SIGN
|
|
}
|
|
break;
|
|
case ccPs:
|
|
switch (ch) {
|
|
case 0x5B: return 0x5D; // LEFT SQUARE BRACKET
|
|
case 0x7B: return 0x7D; // LEFT CURLY BRACKET
|
|
case 0x298D: return 0x2990; // LEFT SQUARE BRACKET WITH TICK IN TOP CORNER
|
|
case 0x298F: return 0x298E; // LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
|
|
case 0xFF3B: return 0xFF3D; // FULLWIDTH LEFT SQUARE BRACKET
|
|
case 0xFF5B: return 0xFF5D; // FULLWIDTH LEFT CURLY BRACKET
|
|
}
|
|
break;
|
|
case ccPi:
|
|
break;
|
|
default: return 0;
|
|
}
|
|
return ch + 1;
|
|
}
|
|
|
|
/*
|
|
* IsValidQuoteOpener
|
|
* -
|
|
*/
|
|
bool IsValidQuoteOpener(const int ch, DelimPair &dp, int type = RAKUDELIM_BRACKET) noexcept {
|
|
dp.closer[0] = 0;
|
|
dp.closer[1] = 0;
|
|
dp.interpol = true;
|
|
if (type == RAKUDELIM_QUOTE) {
|
|
switch (ch) {
|
|
// Opener Closer Description
|
|
case '\'': dp.closer[0] = '\''; // APOSTROPHE
|
|
dp.interpol = false;
|
|
break;
|
|
case '"': dp.closer[0] = '"'; // QUOTATION MARK
|
|
break;
|
|
case 0x2018: dp.closer[0] = 0x2019; // LEFT SINGLE QUOTATION MARK
|
|
dp.interpol = false;
|
|
break;
|
|
case 0x201C: dp.closer[0] = 0x201D; // LEFT DOUBLE QUOTATION MARK
|
|
break;
|
|
case 0x201D: dp.closer[0] = 0x201C; // RIGHT DOUBLE QUOTATION MARK
|
|
break;
|
|
case 0x201E: dp.closer[0] = 0x201C; // DOUBLE LOW-9 QUOTATION MARK
|
|
dp.closer[1] = 0x201D;
|
|
break;
|
|
case 0xFF62: dp.closer[0] = 0xFF63; // HALFWIDTH LEFT CORNER BRACKET
|
|
dp.interpol = false;
|
|
break;
|
|
default: return false;
|
|
}
|
|
} else if (type == RAKUDELIM_BRACKET) {
|
|
dp.closer[0] = GetBracketCloseChar(ch);
|
|
}
|
|
dp.opener = ch;
|
|
dp.count = 1;
|
|
return dp.closer[0] > 0;
|
|
}
|
|
|
|
/*
|
|
* IsBracketOpenChar
|
|
* - true if this is a valid start bracket character
|
|
*/
|
|
bool IsBracketOpenChar(int ch) noexcept {
|
|
return GetBracketCloseChar(ch) > 0;
|
|
}
|
|
|
|
/*
|
|
* IsValidRegOrQAdjacent
|
|
* - returns true if ch is a valid character to put directly after Q / q
|
|
* * ref: Q Language: https://docs.raku.org/language/quoting
|
|
*/
|
|
bool IsValidRegOrQAdjacent(int ch) noexcept {
|
|
return !(IsAlphaNumeric(ch) || ch == '_' || ch == '(' || ch == ')' || ch == '\'' );
|
|
}
|
|
|
|
/*
|
|
* IsValidRegOrQPrecede
|
|
* - returns true if ch is a valid preceding character to put directly before Q / q
|
|
* * ref: Q Language: https://docs.raku.org/language/quoting
|
|
*/
|
|
bool IsValidRegOrQPrecede(int ch) noexcept {
|
|
return !(IsAlphaNumeric(ch) || ch == '_');
|
|
}
|
|
|
|
/*
|
|
* MatchCharInRange
|
|
* - returns true if the mach character is found in range (of length)
|
|
* - ignoreDelim (default false)
|
|
*/
|
|
bool MatchCharInRange(StyleContext &sc, const Sci_Position length,
|
|
const int match, bool ignoreDelim = false) {
|
|
Sci_Position len = 0;
|
|
int chPrev = sc.chPrev;
|
|
while (++len < length) {
|
|
const int ch = sc.GetRelativeCharacter(len);
|
|
if (ch == match && (ignoreDelim || chPrev != '\\'))
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* PrevNonWhitespaceChar
|
|
* - returns the last non-whitespace char
|
|
*/
|
|
int PrevNonWhitespaceChar(StyleContext &sc) {
|
|
Sci_Position rel = 0;
|
|
Sci_Position max_back = 0 - sc.currentPos;
|
|
while (--rel > max_back) {
|
|
const int ch = sc.GetRelativeCharacter(rel);
|
|
if (!IsAWhitespace(ch))
|
|
return ch;
|
|
}
|
|
return 0; // no matching char
|
|
}
|
|
|
|
/*
|
|
* IsQLangStartAtScPos
|
|
* - returns true if this is a valid Q Language sc position
|
|
* - ref: https://docs.raku.org/language/quoting
|
|
* - Q :adverb :adverb //;
|
|
* - q,qx,qw,qq,qqx,qqw,qqww :adverb /:adverb /;
|
|
*/
|
|
bool IsQLangStartAtScPos(StyleContext &sc, int &type, const Sci_Position length) {
|
|
const bool valid_adj = IsValidRegOrQAdjacent(sc.chNext);
|
|
const int chFw2 = sc.GetRelativeCharacter(2);
|
|
const int chFw3 = sc.GetRelativeCharacter(3);
|
|
type = -1;
|
|
if (IsValidRegOrQPrecede(sc.chPrev)) {
|
|
if (sc.ch == 'Q' && valid_adj) {
|
|
type = RAKUTYPE_QLANG;
|
|
} else if (sc.ch == 'q') {
|
|
switch (sc.chNext) {
|
|
case 'x':
|
|
type = RAKUTYPE_STR_QX;
|
|
break;
|
|
case 'w':
|
|
type = RAKUTYPE_STR_QW;
|
|
break;
|
|
case 'q':
|
|
if (chFw2 == 'x') {
|
|
type = RAKUTYPE_STR_QQX;
|
|
} else if (chFw2 == 'w') {
|
|
if (chFw3 == 'w') {
|
|
type = RAKUTYPE_STR_QQWW;
|
|
} else {
|
|
type = RAKUTYPE_STR_QQW;
|
|
}
|
|
} else {
|
|
type = RAKUTYPE_STR_QQ;
|
|
}
|
|
break;
|
|
default:
|
|
type = RAKUTYPE_STR_Q;
|
|
}
|
|
} else if (sc.ch == '<' && MatchCharInRange(sc, length, '>')) {
|
|
type = RAKUTYPE_STR_WQ; // < word quote >
|
|
}
|
|
}
|
|
return type >= 0;
|
|
}
|
|
|
|
/*
|
|
* IsRegexStartAtScPos
|
|
* - returns true if this is a valid Regex sc position
|
|
* - ref: https://docs.raku.org/language/regexes
|
|
* - Regex: (rx/s/m/tr/y) :adverb /:adverb /;
|
|
* - regex R :adverb //;
|
|
* - /:adverb /;
|
|
*/
|
|
bool IsRegexStartAtScPos(StyleContext &sc, int &type, CharacterSet &set) {
|
|
const bool valid_adj = IsValidRegOrQAdjacent(sc.chNext);
|
|
type = -1;
|
|
if (IsValidRegOrQPrecede(sc.chPrev)) {
|
|
switch (sc.ch) {
|
|
case 'r':
|
|
if (sc.chNext == 'x')
|
|
type = RAKUTYPE_REGEX_RX;
|
|
break;
|
|
case 't':
|
|
case 'T':
|
|
if (sc.chNext == 'r' || sc.chNext == 'R')
|
|
type = RAKUTYPE_REGEX_TR;
|
|
break;
|
|
case 'm':
|
|
if (valid_adj)
|
|
type = RAKUTYPE_REGEX_M;
|
|
break;
|
|
case 's':
|
|
case 'S':
|
|
if (valid_adj)
|
|
type = RAKUTYPE_REGEX_S;
|
|
break;
|
|
case 'y':
|
|
if (valid_adj)
|
|
type = RAKUTYPE_REGEX_Y;
|
|
break;
|
|
case '/':
|
|
if (set.Contains(PrevNonWhitespaceChar(sc)))
|
|
type = RAKUTYPE_REGEX_NORM;
|
|
}
|
|
}
|
|
return type >= 0;
|
|
}
|
|
|
|
/*
|
|
* IsValidIdentPrecede
|
|
* - returns if ch is a valid preceding char to put directly before an identifier
|
|
*/
|
|
bool IsValidIdentPrecede(int ch) noexcept {
|
|
return !(IsAlphaNumeric(ch) || ch == '_' || ch == '@' || ch == '$' || ch == '%');
|
|
}
|
|
|
|
/*
|
|
* IsValidDelimiter
|
|
* - returns if ch is a valid delimiter (most chars are valid)
|
|
* * ref: Q Language: https://docs.raku.org/language/quoting
|
|
*/
|
|
bool IsValidDelimiter(int ch) noexcept {
|
|
return !(IsAlphaNumeric(ch) || ch == ':');
|
|
}
|
|
|
|
/*
|
|
* GetDelimiterCloseChar
|
|
* - returns the corresponding close char for a given delimiter (could be the same char)
|
|
*/
|
|
int GetDelimiterCloseChar(int ch) noexcept {
|
|
int ch_end = GetBracketCloseChar(ch);
|
|
if (ch_end == 0 && IsValidDelimiter(ch)) {
|
|
ch_end = ch;
|
|
}
|
|
return ch_end;
|
|
}
|
|
|
|
/*
|
|
* GetRepeatCharCount
|
|
* - returns the occurrence count of match
|
|
*/
|
|
Sci_Position GetRepeatCharCount(StyleContext &sc, int chMatch, Sci_Position length) {
|
|
Sci_Position cnt = 0;
|
|
while (cnt < length) {
|
|
if (sc.GetRelativeCharacter(cnt) != chMatch) {
|
|
break;
|
|
}
|
|
cnt++;
|
|
}
|
|
return cnt;
|
|
}
|
|
|
|
/*
|
|
* LengthToDelimiter
|
|
* - returns the length until the end of a delimited string section
|
|
* - Ignores nested delimiters (if opener != closer)
|
|
* - no trailing char after last closer (default false)
|
|
*/
|
|
Sci_Position LengthToDelimiter(StyleContext &sc, const DelimPair &dp,
|
|
Sci_Position length, bool noTrailing = false) {
|
|
short cnt_open = 0; // count open bracket
|
|
short cnt_close = 0; // count close bracket
|
|
bool is_escape = false; // has been escaped using '\'?
|
|
Sci_Position len = 0; // count characters
|
|
int chOpener = dp.opener; // look for nested opener / closer
|
|
if (dp.opener == dp.closer[0])
|
|
chOpener = 0; // no opening delimiter (no nesting possible)
|
|
|
|
while (len < length) {
|
|
const int chPrev = sc.GetRelativeCharacter(len - 1);
|
|
const int ch = sc.GetRelativeCharacter(len);
|
|
const int chNext = sc.GetRelativeCharacter(len+1);
|
|
|
|
if (cnt_open == 0 && cnt_close == dp.count) {
|
|
return len; // end condition has been met
|
|
} else if (is_escape) {
|
|
is_escape = false;
|
|
} else if (ch == '\\') {
|
|
is_escape = true;
|
|
} else {
|
|
if (ch == chOpener) {
|
|
cnt_open++; // open nested bracket
|
|
} else if (dp.isCloser(ch)) {
|
|
if ( cnt_open > 0 ) {
|
|
cnt_open--; // close nested bracket
|
|
} else if (dp.count > 1 && cnt_close < (dp.count - 1)) {
|
|
if (cnt_close > 1) {
|
|
if (dp.isCloser(chPrev)) {
|
|
cnt_close++;
|
|
} else { // reset if previous char was not close
|
|
cnt_close = 0;
|
|
}
|
|
} else {
|
|
cnt_close++;
|
|
}
|
|
} else if (!noTrailing || (IsAWhitespace(chNext))) {
|
|
cnt_close++; // found last close
|
|
if (cnt_close > 1 && !dp.isCloser(chPrev)) {
|
|
cnt_close = 0; // reset if previous char was not close
|
|
}
|
|
} else {
|
|
cnt_close = 0; // non handled close: reset
|
|
}
|
|
} else if (IsANewLine(ch)) {
|
|
cnt_open = 0; // reset after each line
|
|
cnt_close = 0;
|
|
}
|
|
}
|
|
len++;
|
|
}
|
|
return -1; // end condition has NOT been met
|
|
}
|
|
|
|
/*
|
|
* LengthToEndHeredoc
|
|
* - returns the length until the end of a heredoc section
|
|
* - delimiter string MUST begin on a new line
|
|
*/
|
|
Sci_Position LengthToEndHeredoc(const StyleContext &sc, LexAccessor &styler,
|
|
const Sci_Position length, const char *delim) {
|
|
bool on_new_ln = false;
|
|
int i = 0; // str index
|
|
for (int n = 0; n < length; n++) {
|
|
const char ch = styler.SafeGetCharAt(sc.currentPos + n, 0);
|
|
if (on_new_ln) {
|
|
if (delim[i] == '\0')
|
|
return n; // at end of str, match found!
|
|
if (ch != delim[i++])
|
|
i = 0; // no char match, reset 'i'ndex
|
|
}
|
|
if (i == 0) // detect new line
|
|
on_new_ln = IsANewLine(ch);
|
|
}
|
|
return -1; // no match found
|
|
}
|
|
|
|
/*
|
|
* LengthToNextChar
|
|
* - returns the length until the next character
|
|
*/
|
|
Sci_Position LengthToNextChar(StyleContext &sc, const Sci_Position length) {
|
|
Sci_Position len = 0;
|
|
while (++len < length) {
|
|
const int ch = sc.GetRelativeCharacter(len);
|
|
if (!IsASpaceOrTab(ch) && !IsANewLine(ch)) {
|
|
break;
|
|
}
|
|
}
|
|
return len;
|
|
}
|
|
|
|
/*
|
|
* GetRelativeString
|
|
* - gets a relative string and sets it in &str
|
|
* - resets string before setting
|
|
*/
|
|
void GetRelativeString(StyleContext &sc, Sci_Position offset, Sci_Position length,
|
|
std::string &str) {
|
|
Sci_Position pos = offset;
|
|
str.clear();
|
|
while (pos < length) {
|
|
str += sc.GetRelativeCharacter(pos++);
|
|
}
|
|
}
|
|
|
|
} // end anonymous namespace
|
|
|
|
/*----------------------------------------------------------------------------*
|
|
* --- class: LexerRaku ---
|
|
*----------------------------------------------------------------------------*/
|
|
//class LexerRaku : public ILexerWithMetaData {
|
|
class LexerRaku : public DefaultLexer {
|
|
CharacterSet setWord;
|
|
CharacterSet setSigil;
|
|
CharacterSet setTwigil;
|
|
CharacterSet setOperator;
|
|
CharacterSet setSpecialVar;
|
|
WordList regexIdent; // identifiers that specify a regex
|
|
OptionsRaku options; // Options from config
|
|
OptionSetRaku osRaku;
|
|
WordList keywords; // Word Lists from config
|
|
WordList functions;
|
|
WordList typesBasic;
|
|
WordList typesComposite;
|
|
WordList typesDomainSpecific;
|
|
WordList typesExceptions;
|
|
WordList adverbs;
|
|
|
|
public:
|
|
// Defined as explicit, so that constructor can not be copied
|
|
explicit LexerRaku() :
|
|
DefaultLexer("raku", SCLEX_RAKU),
|
|
setWord(CharacterSet::setAlphaNum, "-_", 0x80),
|
|
setSigil(CharacterSet::setNone, "$&%@"),
|
|
setTwigil(CharacterSet::setNone, "!*.:<=?^~"),
|
|
setOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;<>,?!.~"),
|
|
setSpecialVar(CharacterSet::setNone, "_/!") {
|
|
regexIdent.Set("regex rule token");
|
|
}
|
|
// Deleted so LexerRaku objects can not be copied.
|
|
LexerRaku(const LexerRaku &) = delete;
|
|
LexerRaku(LexerRaku &&) = delete;
|
|
void operator=(const LexerRaku &) = delete;
|
|
void operator=(LexerRaku &&) = delete;
|
|
virtual ~LexerRaku() {
|
|
}
|
|
void SCI_METHOD Release() noexcept override {
|
|
delete this;
|
|
}
|
|
int SCI_METHOD Version() const noexcept override {
|
|
return lvRelease5;
|
|
}
|
|
const char *SCI_METHOD PropertyNames() override {
|
|
return osRaku.PropertyNames();
|
|
}
|
|
int SCI_METHOD PropertyType(const char *name) override {
|
|
return osRaku.PropertyType(name);
|
|
}
|
|
const char *SCI_METHOD DescribeProperty(const char *name) override {
|
|
return osRaku.DescribeProperty(name);
|
|
}
|
|
Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
|
|
const char *SCI_METHOD PropertyGet(const char *key) override {
|
|
return osRaku.PropertyGet(key);
|
|
}
|
|
const char *SCI_METHOD DescribeWordListSets() override {
|
|
return osRaku.DescribeWordListSets();
|
|
}
|
|
Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
|
|
void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
|
|
void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
|
|
|
|
static ILexer5 *LexerFactoryRaku() {
|
|
return new LexerRaku();
|
|
}
|
|
|
|
protected:
|
|
bool IsOperatorChar(const int ch);
|
|
bool IsWordChar(const int ch, bool allowNumber = true);
|
|
bool IsWordStartChar(const int ch);
|
|
bool IsNumberChar(const int ch, int base = 10);
|
|
bool ProcessRegexTwinCapture(StyleContext &sc, const Sci_Position length,
|
|
int &type, const DelimPair &dp);
|
|
void ProcessStringVars(StyleContext &sc, const Sci_Position length, const int varState);
|
|
bool ProcessValidRegQlangStart(StyleContext &sc, Sci_Position length, const int type,
|
|
WordList &wordsAdverbs, DelimPair &dp);
|
|
Sci_Position LengthToNonWordChar(StyleContext &sc, Sci_Position length,
|
|
char *s, const int size, Sci_Position offset = 0);
|
|
};
|
|
|
|
/*----------------------------------------------------------------------------*
|
|
* --- METHODS: LexerRaku ---
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
/*
|
|
* LexerRaku::IsOperatorChar
|
|
* - Test for both ASCII and Unicode operators
|
|
* see: https://docs.raku.org/language/unicode_entry
|
|
*/
|
|
bool LexerRaku::IsOperatorChar(const int ch) {
|
|
if (ch > 0x7F) {
|
|
switch (ch) {
|
|
// Unicode ASCII Equiv.
|
|
case 0x2208: // (elem)
|
|
case 0x2209: // !(elem)
|
|
case 0x220B: // (cont)
|
|
case 0x220C: // !(cont)
|
|
case 0x2216: // (-)
|
|
case 0x2229: // (&)
|
|
case 0x222A: // (|)
|
|
case 0x2282: // (<)
|
|
case 0x2283: // (>)
|
|
case 0x2284: // !(<)
|
|
case 0x2285: // !(>)
|
|
case 0x2286: // (<=)
|
|
case 0x2287: // (>=)
|
|
case 0x2288: // !(<=)
|
|
case 0x2289: // !(>=)
|
|
case 0x228D: // (.)
|
|
case 0x228E: // (+)
|
|
case 0x2296: // (^)
|
|
return true;
|
|
}
|
|
}
|
|
return setOperator.Contains(ch);
|
|
}
|
|
|
|
/*
|
|
* LexerRaku::IsWordChar
|
|
* - Test for both ASCII and Unicode identifier characters
|
|
* see: https://docs.raku.org/language/unicode_ascii
|
|
* also: ftp://ftp.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
|
|
* FIXME: *still* may not contain all valid characters
|
|
*/
|
|
bool LexerRaku::IsWordChar(const int ch, bool allowNumber) {
|
|
// Unicode numbers should not appear in word identifiers
|
|
if (ch > 0x7F) {
|
|
const CharacterCategory cc = CategoriseCharacter(ch);
|
|
switch (cc) {
|
|
// Letters
|
|
case ccLu:
|
|
case ccLl:
|
|
case ccLt:
|
|
case ccLm:
|
|
case ccLo:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
} else if (allowNumber && IsADigit(ch)) {
|
|
return true; // an ASCII number type
|
|
}
|
|
return setWord.Contains(ch);
|
|
}
|
|
|
|
/*
|
|
* LexerRaku::IsWordStartChar
|
|
* - Test for both ASCII and Unicode identifier "start / first" characters
|
|
*/
|
|
bool LexerRaku::IsWordStartChar(const int ch) {
|
|
return ch != '-' && IsWordChar(ch, false); // no numbers allowed
|
|
}
|
|
|
|
/*
|
|
* LexerRaku::IsNumberChar
|
|
* - Test for both ASCII and Unicode identifier number characters
|
|
* see: https://docs.raku.org/language/unicode_ascii
|
|
* also: ftp://ftp.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
|
|
* FILTERED by Unicode letters that are NUMBER
|
|
* and NOT PARENTHESIZED or CIRCLED
|
|
* FIXME: *still* may not contain all valid number characters
|
|
*/
|
|
bool LexerRaku::IsNumberChar(const int ch, int base) {
|
|
if (ch > 0x7F) {
|
|
const CharacterCategory cc = CategoriseCharacter(ch);
|
|
switch (cc) {
|
|
// Numbers
|
|
case ccNd:
|
|
case ccNl:
|
|
case ccNo:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
return IsADigit(ch, base);
|
|
}
|
|
|
|
/*
|
|
* LexerRaku::PropertySet
|
|
* -
|
|
*/
|
|
Sci_Position SCI_METHOD LexerRaku::PropertySet(const char *key, const char *val) {
|
|
if (osRaku.PropertySet(&options, key, val))
|
|
return 0;
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
* LexerRaku::WordListSet
|
|
* -
|
|
*/
|
|
Sci_Position SCI_METHOD LexerRaku::WordListSet(int n, const char *wl) {
|
|
WordList *wordListN = nullptr;
|
|
switch (n) {
|
|
case 0:
|
|
wordListN = &keywords;
|
|
break;
|
|
case 1:
|
|
wordListN = &functions;
|
|
break;
|
|
case 2:
|
|
wordListN = &typesBasic;
|
|
break;
|
|
case 3:
|
|
wordListN = &typesComposite;
|
|
break;
|
|
case 4:
|
|
wordListN = &typesDomainSpecific;
|
|
break;
|
|
case 5:
|
|
wordListN = &typesExceptions;
|
|
break;
|
|
case 6:
|
|
wordListN = &adverbs;
|
|
break;
|
|
}
|
|
Sci_Position firstModification = -1;
|
|
if (wordListN) {
|
|
if (wordListN->Set(wl)) {
|
|
firstModification = 0;
|
|
}
|
|
}
|
|
return firstModification;
|
|
}
|
|
|
|
/*
|
|
* LexerRaku::ProcessRegexTwinCapture
|
|
* - processes the transition between a regex pair (two sets of delimiters)
|
|
* - moves to first new delimiter, if a bracket
|
|
* - returns true when valid delimiter start found (if bracket)
|
|
*/
|
|
bool LexerRaku::ProcessRegexTwinCapture(StyleContext &sc, const Sci_Position length,
|
|
int &type, const DelimPair &dp) {
|
|
|
|
if (type == RAKUTYPE_REGEX_S || type == RAKUTYPE_REGEX_TR || type == RAKUTYPE_REGEX_Y) {
|
|
type = -1; // clear type
|
|
|
|
// move past chRegQClose if it was the previous char
|
|
if (dp.isCloser(sc.chPrev))
|
|
sc.Forward();
|
|
|
|
// no processing needed for non-bracket
|
|
if (dp.isCloser(dp.opener))
|
|
return true;
|
|
|
|
// move to next opening bracket
|
|
const Sci_Position len = LengthToNextChar(sc, length);
|
|
if (sc.GetRelativeCharacter(len) == dp.opener) {
|
|
sc.Forward(len);
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* LexerRaku::ProcessStringVars
|
|
* - processes a string and highlights any valid variables
|
|
*/
|
|
void LexerRaku::ProcessStringVars(StyleContext &sc, const Sci_Position length, const int varState) {
|
|
const int state = sc.state;
|
|
for (Sci_Position pos = 0; pos < length; pos++) {
|
|
if (sc.state == varState && !IsWordChar(sc.ch)) {
|
|
sc.SetState(state);
|
|
} else if (sc.chPrev != '\\'
|
|
&& (sc.ch == '$' || sc.ch == '@')
|
|
&& IsWordStartChar(sc.chNext)) {
|
|
sc.SetState(varState);
|
|
}
|
|
sc.Forward(); // Next character
|
|
}
|
|
}
|
|
/*
|
|
* LexerRaku::ProcessValidRegQlangStart
|
|
* - processes a section of the document range from after a Regex / Q delimiter
|
|
* - returns true on success
|
|
* - sets: adverbs, chOpen, chClose, chCount
|
|
* ref: https://docs.raku.org/language/regexes
|
|
*/
|
|
bool LexerRaku::ProcessValidRegQlangStart(StyleContext &sc, Sci_Position length, const int type,
|
|
WordList &wordsAdverbs, DelimPair &dp) {
|
|
Sci_Position startPos = sc.currentPos;
|
|
Sci_Position startLen = length;
|
|
const int target_state = sc.state;
|
|
int state = SCE_RAKU_DEFAULT;
|
|
std::string str;
|
|
|
|
// find our opening delimiter (and occurrences) / save any adverbs
|
|
dp.opener = 0; // adverbs can be after the first delimiter
|
|
bool got_all_adverbs = false; // in Regex statements
|
|
bool got_ident = false; // regex can have an identifier: 'regex R'
|
|
sc.SetState(state); // set state default to avoid pre-highlights
|
|
while ((dp.opener == 0 || !got_all_adverbs) && sc.More()) {
|
|
|
|
// move to the next non-space character
|
|
const bool was_space = IsAWhitespace(sc.ch);
|
|
if (!got_all_adverbs && was_space) {
|
|
sc.Forward(LengthToNextChar(sc, length));
|
|
}
|
|
length = startLen - (sc.currentPos - startPos); // update length remaining
|
|
|
|
// parse / eat an identifier (if type == RAKUTYPE_REGEX)
|
|
if (dp.opener == 0 && !got_ident && type == RAKUTYPE_REGEX && IsAlphabet(sc.ch)) {
|
|
|
|
// eat identifier / account for special adverb :sym<name>
|
|
bool got_sym = false;
|
|
while (sc.More()) {
|
|
sc.SetState(SCE_RAKU_IDENTIFIER);
|
|
while (sc.More() && (IsAlphaNumeric(sc.chNext)
|
|
|| sc.chNext == '_' || sc.chNext == '-')) {
|
|
sc.Forward();
|
|
}
|
|
sc.Forward();
|
|
if (got_sym && sc.ch == '>') {
|
|
sc.SetState(SCE_RAKU_OPERATOR); // '>'
|
|
sc.Forward();
|
|
break;
|
|
} else if (type == RAKUTYPE_REGEX && sc.Match(":sym<")) {
|
|
sc.SetState(SCE_RAKU_ADVERB); // ':sym'
|
|
sc.Forward(4);
|
|
sc.SetState(SCE_RAKU_OPERATOR); // '<'
|
|
sc.Forward();
|
|
got_sym = true;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
sc.SetState(state);
|
|
got_ident = true;
|
|
}
|
|
|
|
// parse / save an adverb: RAKUTYPE_REGEX only has adverbs after delim
|
|
// >= RAKUTYPE_QLANG only has adverbs before delim
|
|
else if (!got_all_adverbs && sc.ch == ':' && (!(dp.opener == 0 && got_ident)
|
|
&& !(dp.opener > 0 && type >= RAKUTYPE_QLANG))) {
|
|
sc.SetState(SCE_RAKU_ADVERB);
|
|
while (IsAlphaNumeric(sc.chNext) && sc.More()) {
|
|
sc.Forward();
|
|
str += sc.ch;
|
|
}
|
|
str += ' ';
|
|
sc.Forward();
|
|
sc.SetState(state);
|
|
}
|
|
|
|
// find starting delimiter
|
|
else if (dp.opener == 0 && (was_space || IsValidRegOrQAdjacent(sc.ch))
|
|
&& IsValidDelimiter(sc.ch)) { // make sure the delimiter is legal (most are)
|
|
sc.SetState((state = target_state));// start state here...
|
|
dp.opener = sc.ch; // this is our delimiter, get count
|
|
if (type < RAKUTYPE_QLANG) // type is Regex
|
|
dp.count = 1; // has only one delimiter
|
|
else
|
|
dp.count = GetRepeatCharCount(sc, dp.opener, length);
|
|
sc.Forward(dp.count);
|
|
}
|
|
|
|
// we must have all the adverbs by now...
|
|
else {
|
|
if (got_all_adverbs)
|
|
break; // prevent infinite loop: occurs on missing open char
|
|
got_all_adverbs = true;
|
|
}
|
|
}
|
|
|
|
// set word list / find a valid closing delimiter (or bomb!)
|
|
wordsAdverbs.Set(str.c_str());
|
|
dp.closer[0] = GetDelimiterCloseChar(dp.opener);
|
|
dp.closer[1] = 0; // no other closer char
|
|
return dp.closer[0] > 0;
|
|
}
|
|
|
|
/*
|
|
* LexerRaku::LengthToNonWordChar
|
|
* - returns the length until the next non "word" character: AlphaNum + '_'
|
|
* - also sets all the parsed chars in 's'
|
|
*/
|
|
Sci_Position LexerRaku::LengthToNonWordChar(StyleContext &sc, Sci_Position length,
|
|
char *s, const int size, Sci_Position offset) {
|
|
Sci_Position len = 0;
|
|
Sci_Position max_length = size < length ? size : length;
|
|
while (len <= max_length) {
|
|
const int ch = sc.GetRelativeCharacter(len + offset);
|
|
if (!IsWordChar(ch)) {
|
|
s[len] = '\0';
|
|
break;
|
|
}
|
|
s[len] = ch;
|
|
len++;
|
|
}
|
|
s[len + 1] = '\0';
|
|
return len;
|
|
}
|
|
|
|
/*
|
|
* LexerRaku::Lex
|
|
* - Main lexer method
|
|
*/
|
|
void SCI_METHOD LexerRaku::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
|
|
LexAccessor styler(pAccess);
|
|
DelimPair dpEmbeded; // delimiter pair: embedded comments
|
|
DelimPair dpString; // delimiter pair: string
|
|
DelimPair dpRegQ; // delimiter pair: Regex / Q Lang
|
|
std::string hereDelim; // heredoc delimiter (if in heredoc)
|
|
int hereState = 0; // heredoc state to use (Q / QQ)
|
|
int numState = 0; // number state / type
|
|
short cntDecimal = 0; // number decimal count
|
|
std::string wordLast; // last word seen
|
|
std::string identLast; // last identifier seen
|
|
std::string adverbLast; // last (single) adverb seen
|
|
WordList lastAdverbs; // last adverbs seen
|
|
Sci_Position len; // temp length value
|
|
char s[100]; // temp char string
|
|
int typeDetect = -1; // temp type detected (for regex and Q lang)
|
|
Sci_Position lengthToEnd; // length until the end of range
|
|
|
|
// Backtrack to safe start position before complex quoted elements
|
|
|
|
Sci_PositionU newStartPos = startPos;
|
|
if (initStyle != SCE_RAKU_DEFAULT) {
|
|
// Backtrack to last SCE_RAKU_DEFAULT or 0
|
|
while (newStartPos > 0) {
|
|
newStartPos--;
|
|
if (styler.StyleAt(newStartPos) == SCE_RAKU_DEFAULT)
|
|
break;
|
|
}
|
|
// Backtrack to start of line before SCE_RAKU_HEREDOC_Q?
|
|
if (initStyle == SCE_RAKU_HEREDOC_Q || initStyle == SCE_RAKU_HEREDOC_QQ) {
|
|
if (newStartPos > 0) {
|
|
newStartPos = styler.LineStart(styler.GetLine(newStartPos));
|
|
}
|
|
}
|
|
} else {
|
|
const Sci_Position line = styler.GetLine(newStartPos);
|
|
if (line > 0) {
|
|
// If the previous line is a start of a q or qq heredoc, backtrack to start of line
|
|
const Sci_Position startPreviousLine = styler.LineStart(line-1);
|
|
if (ContainsQTo(startPreviousLine, newStartPos, styler)) {
|
|
newStartPos = startPreviousLine;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
// Re-calculate (any) changed startPos, length and initStyle state
|
|
if (newStartPos < startPos) {
|
|
initStyle = SCE_RAKU_DEFAULT;
|
|
length += startPos - newStartPos;
|
|
startPos = newStartPos;
|
|
}
|
|
|
|
// init StyleContext
|
|
StyleContext sc(startPos, length, initStyle, styler);
|
|
|
|
// StyleContext Loop
|
|
for (; sc.More(); sc.Forward()) {
|
|
lengthToEnd = (length - (sc.currentPos - startPos)); // end of range
|
|
|
|
/* *** Determine if the current state should terminate ************** *
|
|
* Everything within the 'switch' statement processes characters up
|
|
* until the end of a syntax highlight section / state.
|
|
* ****************************************************************** */
|
|
switch (sc.state) {
|
|
case SCE_RAKU_OPERATOR:
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
break; // FIXME: better valid operator sequences needed?
|
|
case SCE_RAKU_COMMENTLINE:
|
|
if (IsANewLine(sc.ch)) {
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
}
|
|
break;
|
|
case SCE_RAKU_COMMENTEMBED:
|
|
if ((len = LengthToDelimiter(sc, dpEmbeded, lengthToEnd)) >= 0) {
|
|
sc.Forward(len); // Move to end delimiter
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
} else {
|
|
sc.Forward(lengthToEnd); // no end delimiter found
|
|
}
|
|
break;
|
|
case SCE_RAKU_POD:
|
|
if (sc.atLineStart && sc.Match("=end pod")) {
|
|
sc.Forward(8);
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
}
|
|
break;
|
|
case SCE_RAKU_STRING:
|
|
|
|
// Process the string for variables: move to end delimiter
|
|
if ((len = LengthToDelimiter(sc, dpString, lengthToEnd)) >= 0) {
|
|
if (dpString.interpol) {
|
|
ProcessStringVars(sc, len, SCE_RAKU_STRING_VAR);
|
|
} else {
|
|
sc.Forward(len);
|
|
}
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
} else {
|
|
sc.Forward(lengthToEnd); // no end delimiter found
|
|
}
|
|
break;
|
|
case SCE_RAKU_STRING_Q:
|
|
case SCE_RAKU_STRING_QQ:
|
|
case SCE_RAKU_STRING_Q_LANG:
|
|
|
|
// No string: previous char was the delimiter
|
|
if (dpRegQ.count == 1 && dpRegQ.isCloser(sc.chPrev)) {
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
}
|
|
|
|
// Process the string for variables: move to end delimiter
|
|
else if ((len = LengthToDelimiter(sc, dpRegQ, lengthToEnd)) >= 0) {
|
|
|
|
// set (any) heredoc delimiter string
|
|
if (lastAdverbs.InList("to")) {
|
|
GetRelativeString(sc, -1, len - dpRegQ.count, hereDelim);
|
|
hereState = SCE_RAKU_HEREDOC_Q; // default heredoc state
|
|
}
|
|
|
|
// select variable identifiers
|
|
if (sc.state == SCE_RAKU_STRING_QQ || lastAdverbs.InList("qq")) {
|
|
ProcessStringVars(sc, len, SCE_RAKU_STRING_VAR);
|
|
hereState = SCE_RAKU_HEREDOC_QQ; // potential heredoc state
|
|
} else {
|
|
sc.Forward(len);
|
|
}
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
} else {
|
|
sc.Forward(lengthToEnd); // no end delimiter found
|
|
}
|
|
break;
|
|
case SCE_RAKU_HEREDOC_Q:
|
|
case SCE_RAKU_HEREDOC_QQ:
|
|
if ((len = LengthToEndHeredoc(sc, styler, lengthToEnd, hereDelim.c_str())) >= 0) {
|
|
// select variable identifiers
|
|
if (sc.state == SCE_RAKU_HEREDOC_QQ) {
|
|
ProcessStringVars(sc, len, SCE_RAKU_STRING_VAR);
|
|
} else {
|
|
sc.Forward(len);
|
|
}
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
} else {
|
|
sc.Forward(lengthToEnd); // no end delimiter found
|
|
}
|
|
hereDelim.clear(); // clear heredoc delimiter
|
|
break;
|
|
case SCE_RAKU_REGEX:
|
|
// account for typeDetect = RAKUTYPE_REGEX_S/TR/Y
|
|
while (sc.state == SCE_RAKU_REGEX) {
|
|
|
|
// No string: previous char was the delimiter
|
|
if (dpRegQ.count == 1 && dpRegQ.isCloser(sc.chPrev)) {
|
|
if (ProcessRegexTwinCapture(sc, lengthToEnd, typeDetect, dpRegQ))
|
|
continue;
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
break;
|
|
}
|
|
|
|
// Process the string for variables: move to end delimiter
|
|
else if ((len = LengthToDelimiter(sc, dpRegQ, lengthToEnd)) >= 0) {
|
|
ProcessStringVars(sc, len, SCE_RAKU_REGEX_VAR);
|
|
if (ProcessRegexTwinCapture(sc, lengthToEnd, typeDetect, dpRegQ))
|
|
continue;
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
break;
|
|
} else {
|
|
sc.Forward(lengthToEnd); // no end delimiter found
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
case SCE_RAKU_NUMBER:
|
|
if (sc.ch == '.') {
|
|
if (sc.chNext == '.') { // '..' is an operator
|
|
sc.SetState(SCE_RAKU_OPERATOR);
|
|
sc.Forward();
|
|
if (sc.chNext == '.') // '...' is also an operator
|
|
sc.Forward();
|
|
break;
|
|
} else if (numState > RAKUNUM_FLOAT_EXP
|
|
&& (cntDecimal < 1 || numState == RAKUNUM_VERSION)) {
|
|
cntDecimal++;
|
|
sc.Forward();
|
|
} else {
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
break; // too many decimal places
|
|
}
|
|
}
|
|
switch (numState) {
|
|
case RAKUNUM_BINARY:
|
|
if (!IsNumberChar(sc.ch, 2))
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
break;
|
|
case RAKUNUM_OCTAL:
|
|
if (!IsNumberChar(sc.ch, 8))
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
break;
|
|
case RAKUNUM_HEX:
|
|
if (!IsNumberChar(sc.ch, 16))
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
break;
|
|
case RAKUNUM_DECIMAL:
|
|
case RAKUNUM_VERSION:
|
|
if (!IsNumberChar(sc.ch))
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
}
|
|
break;
|
|
case SCE_RAKU_WORD:
|
|
case SCE_RAKU_FUNCTION:
|
|
case SCE_RAKU_TYPEDEF:
|
|
case SCE_RAKU_ADVERB:
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
break;
|
|
case SCE_RAKU_MU:
|
|
case SCE_RAKU_POSITIONAL:
|
|
case SCE_RAKU_ASSOCIATIVE:
|
|
case SCE_RAKU_CALLABLE:
|
|
case SCE_RAKU_IDENTIFIER:
|
|
case SCE_RAKU_GRAMMAR:
|
|
case SCE_RAKU_CLASS:
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
break;
|
|
}
|
|
|
|
/* *** Determine if a new state should be entered ******************* *
|
|
* Everything below here identifies the beginning of a state, all or part
|
|
* of the characters within this state are processed here, the rest are
|
|
* completed above in the terminate state section.
|
|
* ****************************************************************** */
|
|
if (sc.state == SCE_RAKU_DEFAULT) {
|
|
|
|
// --- Single line comment
|
|
if (sc.ch == '#') {
|
|
sc.SetState(SCE_RAKU_COMMENTLINE);
|
|
}
|
|
|
|
// --- POD block
|
|
else if (sc.atLineStart && sc.Match("=begin pod")) {
|
|
sc.SetState(SCE_RAKU_POD);
|
|
sc.Forward(10);
|
|
}
|
|
|
|
// --- String (normal)
|
|
else if (sc.chPrev != '\\' && (IsValidQuoteOpener(sc.ch, dpString, RAKUDELIM_QUOTE))) {
|
|
sc.SetState(SCE_RAKU_STRING);
|
|
}
|
|
|
|
// --- String (Q Language) ----------------------------------------
|
|
// - https://docs.raku.org/language/quoting
|
|
// - Q :adverb :adverb //;
|
|
// - q,qx,qw,qq,qqx,qqw,qqww :adverb :adverb //;
|
|
else if (IsQLangStartAtScPos(sc, typeDetect, lengthToEnd)) {
|
|
int state = SCE_RAKU_STRING_Q_LANG;
|
|
Sci_Position forward = 1; // single char ident (default)
|
|
if (typeDetect > RAKUTYPE_QLANG) {
|
|
state = SCE_RAKU_STRING_Q;
|
|
if (typeDetect == RAKUTYPE_STR_WQ)
|
|
forward = 0; // no char ident
|
|
}
|
|
if (typeDetect > RAKUTYPE_STR_Q) {
|
|
if (typeDetect == RAKUTYPE_STR_QQ)
|
|
state = SCE_RAKU_STRING_QQ;
|
|
forward++; // two char ident
|
|
}
|
|
if (typeDetect > RAKUTYPE_STR_QQ)
|
|
forward++; // three char ident
|
|
if (typeDetect == RAKUTYPE_STR_QQWW)
|
|
forward++; // four char ident
|
|
|
|
// Proceed: check for a valid character after statement
|
|
if (IsValidRegOrQAdjacent(sc.GetRelative(forward)) || typeDetect == RAKUTYPE_QLANG) {
|
|
sc.SetState(state);
|
|
sc.Forward(forward);
|
|
lastAdverbs.Clear();
|
|
|
|
// Process: adverbs / opening delimiter / adverbs after delim
|
|
if (ProcessValidRegQlangStart(sc, lengthToEnd, typeDetect,
|
|
lastAdverbs, dpRegQ))
|
|
sc.SetState(state);
|
|
}
|
|
}
|
|
|
|
// --- Regex (rx/s/m/tr/y) ----------------------------------------
|
|
// - https://docs.raku.org/language/regexes
|
|
else if ((IsRegexStartAtScPos(sc, typeDetect, setOperator) || regexIdent.InList(wordLast.c_str()))) {
|
|
if (typeDetect == -1) { // must be a regex identifier word
|
|
wordLast.clear();
|
|
typeDetect = RAKUTYPE_REGEX;
|
|
}
|
|
Sci_Position forward = 0; // no ident (RAKUTYPE_REGEX, RAKUTYPE_REGEX_NORM)
|
|
if (typeDetect > 0 && typeDetect != RAKUTYPE_REGEX)
|
|
forward++; // single char ident
|
|
if (typeDetect > RAKUTYPE_REGEX)
|
|
forward++; // two char ident
|
|
|
|
// Proceed: check for a valid character after statement
|
|
if (IsValidRegOrQAdjacent(sc.GetRelative(forward)) || typeDetect == RAKUTYPE_REGEX_NORM) {
|
|
sc.SetState(SCE_RAKU_REGEX);
|
|
sc.Forward(forward);
|
|
lastAdverbs.Clear();
|
|
|
|
// Process: adverbs / opening delimiter / adverbs after delim
|
|
if (ProcessValidRegQlangStart(sc, lengthToEnd, typeDetect,
|
|
lastAdverbs, dpRegQ))
|
|
sc.SetState(SCE_RAKU_REGEX);
|
|
}
|
|
}
|
|
|
|
// --- Numbers ----------------------------------------------------
|
|
else if (IsValidIdentPrecede(sc.chPrev) && (IsNumberChar(sc.ch)
|
|
|| (sc.ch == 'v' && IsNumberChar(sc.chNext) && wordLast == "use"))) {
|
|
numState = RAKUNUM_DECIMAL; // default: decimal (base 10)
|
|
cntDecimal = 0;
|
|
sc.SetState(SCE_RAKU_NUMBER);
|
|
if (sc.ch == 'v') // forward past 'v'
|
|
sc.Forward();
|
|
if (wordLast == "use") { // package version number
|
|
numState = RAKUNUM_VERSION;
|
|
} else if (sc.ch == '0') { // other type of number
|
|
switch (sc.chNext) {
|
|
case 'b': // binary (base 2)
|
|
numState = RAKUNUM_BINARY;
|
|
break;
|
|
case 'o': // octal (base 8)
|
|
numState = RAKUNUM_OCTAL;
|
|
break;
|
|
case 'x': // hexadecimal (base 16)
|
|
numState = RAKUNUM_HEX;
|
|
}
|
|
if (numState != RAKUNUM_DECIMAL)
|
|
sc.Forward(); // forward to number type char
|
|
}
|
|
}
|
|
|
|
// --- Keywords / functions / types / barewords -------------------
|
|
else if ((sc.currentPos == 0 || sc.atLineStart || IsValidIdentPrecede(sc.chPrev))
|
|
&& IsWordStartChar(sc.ch)) {
|
|
len = LengthToNonWordChar(sc, lengthToEnd, s, sizeof(s));
|
|
if (keywords.InList(s)) {
|
|
sc.SetState(SCE_RAKU_WORD); // Keywords
|
|
} else if(functions.InList(s)) {
|
|
sc.SetState(SCE_RAKU_FUNCTION); // Functions
|
|
} else if(typesBasic.InList(s)) {
|
|
sc.SetState(SCE_RAKU_TYPEDEF); // Types (basic)
|
|
} else if(typesComposite.InList(s)) {
|
|
sc.SetState(SCE_RAKU_TYPEDEF); // Types (composite)
|
|
} else if(typesDomainSpecific.InList(s)) {
|
|
sc.SetState(SCE_RAKU_TYPEDEF); // Types (domain-specific)
|
|
} else if(typesExceptions.InList(s)) {
|
|
sc.SetState(SCE_RAKU_TYPEDEF); // Types (exceptions)
|
|
} else {
|
|
if (wordLast == "class")
|
|
sc.SetState(SCE_RAKU_CLASS); // a Class ident
|
|
else if (wordLast == "grammar")
|
|
sc.SetState(SCE_RAKU_GRAMMAR); // a Grammar ident
|
|
else
|
|
sc.SetState(SCE_RAKU_IDENTIFIER); // Bareword
|
|
identLast = s; // save identifier
|
|
}
|
|
if (adverbLast == "sym") { // special adverb ":sym"
|
|
sc.SetState(SCE_RAKU_IDENTIFIER); // treat as identifier
|
|
identLast = s; // save identifier
|
|
}
|
|
if (sc.state != SCE_RAKU_IDENTIFIER)
|
|
wordLast = s; // save word
|
|
sc.Forward(len - 1); // ...forward past word
|
|
}
|
|
|
|
// --- Adverbs ----------------------------------------------------
|
|
else if (sc.ch == ':' && IsWordStartChar(sc.chNext)) {
|
|
len = LengthToNonWordChar(sc, lengthToEnd, s, sizeof(s), 1);
|
|
if (adverbs.InList(s)) {
|
|
sc.SetState(SCE_RAKU_ADVERB); // Adverbs (begin with ':')
|
|
adverbLast = s; // save word
|
|
sc.Forward(len); // ...forward past word (less offset: 1)
|
|
}
|
|
}
|
|
|
|
// --- Identifiers: $mu / @positional / %associative / &callable --
|
|
// see: https://docs.raku.org/language/variables
|
|
else if (setSigil.Contains(sc.ch) && (setTwigil.Contains(sc.chNext)
|
|
|| setSpecialVar.Contains(sc.chNext)
|
|
|| IsWordStartChar(sc.chNext))) {
|
|
|
|
// State based on sigil
|
|
switch (sc.ch) {
|
|
case '$': sc.SetState(SCE_RAKU_MU);
|
|
break;
|
|
case '@': sc.SetState(SCE_RAKU_POSITIONAL);
|
|
break;
|
|
case '%': sc.SetState(SCE_RAKU_ASSOCIATIVE);
|
|
break;
|
|
case '&': sc.SetState(SCE_RAKU_CALLABLE);
|
|
}
|
|
const int state = sc.state;
|
|
sc.Forward();
|
|
char ch_delim = 0;
|
|
if (setSpecialVar.Contains(sc.ch)
|
|
&& !setWord.Contains(sc.chNext)) { // Process Special Var
|
|
ch_delim = -1;
|
|
} else if (setTwigil.Contains(sc.ch)) { // Process Twigil
|
|
sc.SetState(SCE_RAKU_OPERATOR);
|
|
if (sc.ch == '<' && setWord.Contains(sc.chNext))
|
|
ch_delim = '>';
|
|
sc.Forward();
|
|
sc.SetState(state);
|
|
}
|
|
|
|
// Process (any) identifier
|
|
if (ch_delim >= 0) {
|
|
sc.Forward(LengthToNonWordChar(sc, lengthToEnd, s, sizeof(s)) - 1);
|
|
if (ch_delim > 0 && sc.chNext == ch_delim) {
|
|
sc.Forward();
|
|
sc.SetState(SCE_RAKU_OPERATOR);
|
|
}
|
|
identLast = s; // save identifier
|
|
}
|
|
}
|
|
|
|
// --- Operators --------------------------------------------------
|
|
else if (IsOperatorChar(sc.ch)) {
|
|
// FIXME: better valid operator sequences needed?
|
|
sc.SetState(SCE_RAKU_OPERATOR);
|
|
}
|
|
|
|
// --- Heredoc: begin ---------------------------------------------
|
|
else if (!hereDelim.empty() && sc.atLineEnd) {
|
|
if (IsANewLine(sc.ch))
|
|
sc.Forward(); // skip a possible CRLF situation
|
|
sc.SetState(hereState);
|
|
}
|
|
|
|
// Reset words: on operator semi-colon OR '}' (end of statement)
|
|
if (sc.state == SCE_RAKU_OPERATOR && (sc.ch == ';' || sc.ch == '}')) {
|
|
wordLast.clear();
|
|
identLast.clear();
|
|
adverbLast.clear();
|
|
}
|
|
}
|
|
|
|
/* *** Determine if an "embedded comment" is to be entered ********** *
|
|
* This type of embedded comment section, or multi-line comment comes
|
|
* after a normal comment has begun... e.g: #`[ ... ]
|
|
* ****************************************************************** */
|
|
else if (sc.state == SCE_RAKU_COMMENTLINE && sc.chPrev == '#' && sc.ch == '`') {
|
|
if (IsBracketOpenChar(sc.chNext)) {
|
|
sc.Forward(); // Condition met for "embedded comment"
|
|
dpEmbeded.opener = sc.ch;
|
|
|
|
// Find the opposite (termination) closing bracket (if any)
|
|
dpEmbeded.closer[0] = GetBracketCloseChar(dpEmbeded.opener);
|
|
if (dpEmbeded.closer[0] > 0) { // Enter "embedded comment"
|
|
|
|
// Find multiple opening character occurrence
|
|
dpEmbeded.count = GetRepeatCharCount(sc, dpEmbeded.opener, lengthToEnd);
|
|
sc.SetState(SCE_RAKU_COMMENTEMBED);
|
|
sc.Forward(dpEmbeded.count - 1); // incremented in the next loop
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// And we're done...
|
|
sc.Complete();
|
|
}
|
|
|
|
/*
|
|
* LexerRaku::Lex
|
|
* - Main fold method
|
|
* NOTE: although Raku uses and supports UNICODE characters, we're only looking
|
|
* at normal chars here, using 'SafeGetCharAt' - for folding purposes
|
|
* that is all we need.
|
|
*/
|
|
#define RAKU_HEADFOLD_SHIFT 4
|
|
#define RAKU_HEADFOLD_MASK 0xF0
|
|
void SCI_METHOD LexerRaku::Fold(Sci_PositionU startPos, Sci_Position length, int /* initStyle */, IDocument *pAccess) {
|
|
|
|
// init LexAccessor / return if fold option is off
|
|
if (!options.fold) return;
|
|
LexAccessor styler(pAccess);
|
|
|
|
// init char and line positions
|
|
const Sci_PositionU endPos = startPos + length;
|
|
Sci_Position lineCurrent = styler.GetLine(startPos);
|
|
|
|
// Backtrack to last SCE_RAKU_DEFAULT line
|
|
if (startPos > 0 && lineCurrent > 0) {
|
|
while (lineCurrent > 0 && styler.StyleAt(startPos) != SCE_RAKU_DEFAULT) {
|
|
lineCurrent--;
|
|
startPos = styler.LineStart(lineCurrent);
|
|
}
|
|
lineCurrent = styler.GetLine(startPos);
|
|
}
|
|
Sci_PositionU lineStart = startPos;
|
|
Sci_PositionU lineStartNext = styler.LineStart(lineCurrent + 1);
|
|
|
|
// init line folding level
|
|
int levelPrev = SC_FOLDLEVELBASE;
|
|
if (lineCurrent > 0)
|
|
levelPrev = styler.LevelAt(lineCurrent - 1) >> 16;
|
|
int levelCurrent = levelPrev;
|
|
|
|
// init char and style variables
|
|
char chNext = styler[startPos];
|
|
int stylePrev = styler.StyleAt(startPos - 1);
|
|
int styleNext = styler.StyleAt(startPos);
|
|
int styleNextStartLine = styler.StyleAt(lineStartNext);
|
|
int visibleChars = 0;
|
|
bool wasCommentMulti = false;
|
|
|
|
// main loop
|
|
for (Sci_PositionU i = startPos; i < endPos; i++) {
|
|
|
|
// next char, style and flags
|
|
const char ch = chNext;
|
|
chNext = styler.SafeGetCharAt(i + 1);
|
|
const int style = styleNext;
|
|
styleNext = styler.StyleAt(i + 1);
|
|
const bool atEOL = i == (lineStartNext - 1);
|
|
const bool atLineStart = i == lineStart;
|
|
|
|
// --- Comments / Multi-line / POD ------------------------------------
|
|
if (options.foldComment) {
|
|
|
|
// Multi-line
|
|
if (options.foldCommentMultiline) {
|
|
if (style == SCE_RAKU_COMMENTLINE && atLineStart && ch == '#' && chNext == '`'
|
|
&& styleNextStartLine == SCE_RAKU_COMMENTEMBED) {
|
|
levelCurrent++;
|
|
wasCommentMulti = true; // don't confuse line comments
|
|
} else if (style == SCE_RAKU_COMMENTEMBED && atLineStart
|
|
&& styleNextStartLine != SCE_RAKU_COMMENTEMBED) {
|
|
levelCurrent--;
|
|
}
|
|
}
|
|
|
|
// Line comments
|
|
if (!wasCommentMulti && atEOL && stylePrev == SCE_RAKU_COMMENTLINE
|
|
&& IsCommentLine(lineCurrent, styler)) {
|
|
if (!IsCommentLine(lineCurrent - 1, styler)
|
|
&& IsCommentLine(lineCurrent + 1, styler))
|
|
levelCurrent++;
|
|
else if (IsCommentLine(lineCurrent - 1, styler)
|
|
&& !IsCommentLine(lineCurrent + 1, styler))
|
|
levelCurrent--;
|
|
}
|
|
|
|
// POD
|
|
if (options.foldCommentPOD && atLineStart && style == SCE_RAKU_POD) {
|
|
if (styler.Match(i, "=begin"))
|
|
levelCurrent++;
|
|
else if (styler.Match(i, "=end"))
|
|
levelCurrent--;
|
|
}
|
|
}
|
|
|
|
// --- Code block -----------------------------------------------------
|
|
if (style == SCE_RAKU_OPERATOR) {
|
|
if (ch == '{') {
|
|
if (levelCurrent < levelPrev) levelPrev--;
|
|
levelCurrent++;
|
|
} else if (ch == '}') {
|
|
levelCurrent--;
|
|
}
|
|
}
|
|
|
|
// --- at end of line / range / apply fold ----------------------------
|
|
if (atEOL) {
|
|
int level = levelPrev;
|
|
|
|
// set level flags
|
|
level |= levelCurrent << 16;
|
|
if (visibleChars == 0 && options.foldCompact)
|
|
level |= SC_FOLDLEVELWHITEFLAG;
|
|
if ((levelCurrent > levelPrev) && (visibleChars > 0))
|
|
level |= SC_FOLDLEVELHEADERFLAG;
|
|
if (level != styler.LevelAt(lineCurrent)) {
|
|
styler.SetLevel(lineCurrent, level);
|
|
}
|
|
lineCurrent++;
|
|
lineStart = lineStartNext;
|
|
lineStartNext = styler.LineStart(lineCurrent + 1);
|
|
styleNextStartLine = styler.StyleAt(lineStartNext);
|
|
levelPrev = levelCurrent;
|
|
visibleChars = 0;
|
|
wasCommentMulti = false;
|
|
}
|
|
|
|
// increment visibleChars / set previous char
|
|
if (!isspacechar(ch))
|
|
visibleChars++;
|
|
stylePrev = style;
|
|
}
|
|
|
|
// Done: set real level of the next line
|
|
int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
|
|
styler.SetLevel(lineCurrent, levelPrev | flagsNext);
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*
|
|
* --- Scintilla: LexerModule ---
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
extern const LexerModule lmRaku(SCLEX_RAKU, LexerRaku::LexerFactoryRaku, "raku", rakuWordLists);
|