Christian Grasser ad79718fc8 Update to scintilla 5.5.2 & Lexilla 5.4.0
Release 5.5.2 ( https://www.scintilla.org/scintilla552.zip )

    Released 21 August 2024.
    Add SCI_SETCOPYSEPARATOR for separator between parts of a multiple selection when copied to the clipboard. Feature #1530.
    Add SCI_GETUNDOSEQUENCE to determine whether an undo sequence is active and its nesting depth.
    Add SCI_STYLESETSTRETCH to support condensed and expanded text styles.
    Add SCI_LINEINDENT and SCI_LINEDEDENT. Feature #1524.
    Fix bug on Cocoa where double-click stopped working when system had been running for a long time.
    On Cocoa implement more values of font weight and stretch.

Release 5.4.0 ( https://www.scintilla.org/lexilla540.zip )

    Released 21 August 2024.
    Inside Lexilla, LexerModule instances are now const. This will require changes to applications that modify Lexilla.cxx, which may be done to add custom lexers.
    Lexer added for TOML "toml".
    Bash: Handle backslash in heredoc delimiter. Issue #257.
    Progress: Fix lexing of nested comments. Pull request #258.
    Force lower-casing of case-insensitive keyword lists so keywords match in some lexers. Issue #259.

Close #15564
2024-08-23 02:59:58 +02:00

351 lines
9.4 KiB
C++

// Scintilla source code edit control
/** @file LexR.cxx
** Lexer for R, S, SPlus Statistics Program (Heavily derived from CPP Lexer).
**
**/
// Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
// The License.txt file describes the conditions under which this software may be distributed.
#include <cassert>
#include <cctype>
#include <string>
#include <string_view>
#include "ILexer.h"
#include "Scintilla.h"
#include "SciLexer.h"
#include "WordList.h"
#include "LexAccessor.h"
#include "Accessor.h"
#include "StyleContext.h"
#include "CharacterSet.h"
#include "LexerModule.h"
using namespace Lexilla;
namespace {
inline bool IsAWordChar(int ch) noexcept {
return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
}
inline bool IsAWordStart(int ch) noexcept {
return (ch < 0x80) && (isalnum(ch) || ch == '_');
}
constexpr bool IsAnOperator(int ch) noexcept {
// '.' left out as it is used to make up numbers
if (ch == '-' || ch == '+' || ch == '!' || ch == '~' ||
ch == '?' || ch == ':' || ch == '*' || ch == '/' ||
ch == '^' || ch == '<' || ch == '>' || ch == '=' ||
ch == '&' || ch == '|' || ch == '$' || ch == '(' ||
ch == ')' || ch == '}' || ch == '{' || ch == '[' ||
ch == ']')
return true;
return false;
}
constexpr bool IsOctalOrHex(int ch, bool hex) noexcept {
return IsAnOctalDigit(ch) || (hex && IsAHeXDigit(ch));
}
// https://search.r-project.org/R/refmans/base/html/Quotes.html
struct EscapeSequence {
int outerState = SCE_R_DEFAULT;
int digitsLeft = 0;
bool hex = false;
bool brace = false;
// highlight any character as escape sequence, unrecognized escape sequence is syntax error.
void resetEscapeState(int state, int chNext) noexcept {
outerState = state;
digitsLeft = 1;
hex = true;
brace = false;
if (chNext == 'x') {
digitsLeft = 3;
} else if (chNext == 'u') {
digitsLeft = 5;
} else if (chNext == 'U') {
digitsLeft = 9;
} else if (IsAnOctalDigit(chNext)) {
digitsLeft = 3;
hex = false;
}
}
bool atEscapeEnd(int ch) noexcept {
--digitsLeft;
return digitsLeft <= 0 || !IsOctalOrHex(ch, hex);
}
};
int CheckRawString(LexAccessor &styler, Sci_Position pos, int &dashCount) {
dashCount = 0;
while (true) {
const char ch = styler.SafeGetCharAt(pos++);
switch (ch) {
case '-':
++dashCount;
break;
case '(':
return ')';
case '[':
return ']';
case '{':
return '}';
default:
dashCount = 0;
return 0;
}
}
}
void ColouriseRDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[],
Accessor &styler) {
const WordList &keywords = *keywordlists[0];
const WordList &keywords2 = *keywordlists[1];
const WordList &keywords3 = *keywordlists[2];
// state for raw string
int matchingDelimiter = 0;
int dashCount = 0;
// property lexer.r.escape.sequence
// Set to 1 to enable highlighting of escape sequences in strings.
const bool escapeSequence = styler.GetPropertyInt("lexer.r.escape.sequence", 0) != 0;
EscapeSequence escapeSeq;
StyleContext sc(startPos, length, initStyle, styler);
if (sc.currentLine > 0) {
const int lineState = styler.GetLineState(sc.currentLine - 1);
matchingDelimiter = lineState & 0xff;
dashCount = lineState >> 8;
}
while (sc.More()) {
// Determine if the current state should terminate.
switch (sc.state) {
case SCE_R_OPERATOR:
sc.SetState(SCE_R_DEFAULT);
break;
case SCE_R_NUMBER:
// https://cran.r-project.org/doc/manuals/r-release/R-lang.html#Literal-constants
if (AnyOf(sc.ch, 'e', 'E', 'p', 'P') && (IsADigit(sc.chNext) || sc.chNext == '+' || sc.chNext == '-')) {
sc.Forward(); // exponent part
} else if (!(IsAHeXDigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext)))) {
if (AnyOf(sc.ch, 'L', 'i')) {
sc.Forward(); // integer and complex qualifier
}
sc.SetState(SCE_R_DEFAULT);
}
break;
case SCE_R_IDENTIFIER:
if (!IsAWordChar(sc.ch)) {
char s[100];
sc.GetCurrent(s, sizeof(s));
if (keywords.InList(s)) {
sc.ChangeState(SCE_R_KWORD);
} else if (keywords2.InList(s)) {
sc.ChangeState(SCE_R_BASEKWORD);
} else if (keywords3.InList(s)) {
sc.ChangeState(SCE_R_OTHERKWORD);
}
sc.SetState(SCE_R_DEFAULT);
}
break;
case SCE_R_COMMENT:
if (sc.MatchLineEnd()) {
sc.SetState(SCE_R_DEFAULT);
}
break;
case SCE_R_STRING:
case SCE_R_STRING2:
case SCE_R_BACKTICKS:
if (sc.ch == '\\') {
if (escapeSequence) {
escapeSeq.resetEscapeState(sc.state, sc.chNext);
sc.SetState(SCE_R_ESCAPESEQUENCE);
sc.Forward();
if (sc.chNext == '{' && AnyOf(sc.ch, 'u', 'U')) {
escapeSeq.brace = true;
sc.Forward();
} else if (sc.MatchLineEnd()) {
// don't highlight line ending as escape sequence:
// escapeSeq.outerState is lost when editing on next line.
sc.SetState(escapeSeq.outerState);
}
} else {
sc.Forward(); // Skip all characters after the backslash
}
} else if ((sc.state == SCE_R_STRING && sc.ch == '\"')
|| (sc.state == SCE_R_STRING2 && sc.ch == '\'')
|| (sc.state == SCE_R_BACKTICKS && sc.ch == '`')) {
sc.ForwardSetState(SCE_R_DEFAULT);
}
break;
case SCE_R_ESCAPESEQUENCE:
if (escapeSeq.atEscapeEnd(sc.ch)) {
if (escapeSeq.brace && sc.ch == '}') {
sc.Forward();
}
sc.SetState(escapeSeq.outerState);
continue;
}
break;
case SCE_R_RAWSTRING:
case SCE_R_RAWSTRING2:
while (sc.ch == matchingDelimiter) {
sc.Forward();
int count = dashCount;
while (count != 0 && sc.ch == '-') {
--count;
sc.Forward();
}
if (count == 0 && sc.ch == ((sc.state == SCE_R_RAWSTRING) ? '\"' : '\'')) {
matchingDelimiter = 0;
dashCount = 0;
sc.ForwardSetState(SCE_R_DEFAULT);
break;
}
}
break;
case SCE_R_INFIX:
if (sc.ch == '%') {
sc.ForwardSetState(SCE_R_DEFAULT);
} else if (sc.atLineEnd) {
sc.ChangeState(SCE_R_INFIXEOL);
}
break;
case SCE_R_INFIXEOL:
if (sc.atLineStart) {
sc.SetState(SCE_R_DEFAULT);
}
break;
}
// Determine if a new state should be entered.
if (sc.state == SCE_R_DEFAULT) {
if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
sc.SetState(SCE_R_NUMBER);
if (sc.ch == '0' && AnyOf(sc.chNext, 'x', 'X')) {
sc.Forward();
}
} else if (AnyOf(sc.ch, 'r', 'R') && AnyOf(sc.chNext, '\"', '\'')) {
const int chNext = sc.chNext;
matchingDelimiter = CheckRawString(styler, sc.currentPos + 2, dashCount);
if (matchingDelimiter) {
sc.SetState((chNext == '\"') ? SCE_R_RAWSTRING : SCE_R_RAWSTRING2);
sc.Forward(dashCount + 2);
} else {
// syntax error
sc.SetState(SCE_R_IDENTIFIER);
sc.ForwardSetState((chNext == '\"') ? SCE_R_STRING : SCE_R_STRING2);
}
} else if (IsAWordStart(sc.ch) ) {
sc.SetState(SCE_R_IDENTIFIER);
} else if (sc.Match('#')) {
sc.SetState(SCE_R_COMMENT);
} else if (sc.ch == '\"') {
sc.SetState(SCE_R_STRING);
} else if (sc.ch == '%') {
sc.SetState(SCE_R_INFIX);
} else if (sc.ch == '\'') {
sc.SetState(SCE_R_STRING2);
} else if (sc.ch == '`') {
sc.SetState(SCE_R_BACKTICKS);
} else if (IsAnOperator(sc.ch)) {
sc.SetState(SCE_R_OPERATOR);
}
}
if (sc.atLineEnd) {
const int lineState = matchingDelimiter | (dashCount << 8);
styler.SetLineState(sc.currentLine, lineState);
}
sc.Forward();
}
sc.Complete();
}
// Store both the current line's fold level and the next lines in the
// level store to make it easy to pick up with each increment
// and to make it possible to fiddle the current level for "} else {".
void FoldRDoc(Sci_PositionU startPos, Sci_Position length, int, WordList *[],
Accessor &styler) {
const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
const bool foldAtElse = styler.GetPropertyInt("fold.at.else", 0) != 0;
const Sci_PositionU endPos = startPos + length;
int visibleChars = 0;
Sci_Position lineCurrent = styler.GetLine(startPos);
int levelCurrent = SC_FOLDLEVELBASE;
if (lineCurrent > 0)
levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
int levelMinCurrent = levelCurrent;
int levelNext = levelCurrent;
char chNext = styler[startPos];
int styleNext = styler.StyleAt(startPos);
for (Sci_PositionU i = startPos; i < endPos; i++) {
const char ch = chNext;
chNext = styler.SafeGetCharAt(i + 1);
const int style = styleNext;
styleNext = styler.StyleAt(i + 1);
const bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
if (style == SCE_R_OPERATOR) {
if (ch == '{') {
// Measure the minimum before a '{' to allow
// folding on "} else {"
if (levelMinCurrent > levelNext) {
levelMinCurrent = levelNext;
}
levelNext++;
} else if (ch == '}') {
levelNext--;
}
}
if (atEOL) {
int levelUse = levelCurrent;
if (foldAtElse) {
levelUse = levelMinCurrent;
}
int lev = levelUse | levelNext << 16;
if (visibleChars == 0 && foldCompact)
lev |= SC_FOLDLEVELWHITEFLAG;
if (levelUse < levelNext)
lev |= SC_FOLDLEVELHEADERFLAG;
if (lev != styler.LevelAt(lineCurrent)) {
styler.SetLevel(lineCurrent, lev);
}
lineCurrent++;
levelCurrent = levelNext;
levelMinCurrent = levelCurrent;
visibleChars = 0;
}
if (!isspacechar(ch))
visibleChars++;
}
}
const char * const RWordLists[] = {
"Language Keywords",
"Base / Default package function",
"Other Package Functions",
"Unused",
"Unused",
nullptr,
};
}
extern const LexerModule lmR(SCLEX_R, ColouriseRDoc, "r", FoldRDoc, RWordLists);