Christian Grasser ad79718fc8 Update to scintilla 5.5.2 & Lexilla 5.4.0
Release 5.5.2 ( https://www.scintilla.org/scintilla552.zip )

    Released 21 August 2024.
    Add SCI_SETCOPYSEPARATOR for separator between parts of a multiple selection when copied to the clipboard. Feature #1530.
    Add SCI_GETUNDOSEQUENCE to determine whether an undo sequence is active and its nesting depth.
    Add SCI_STYLESETSTRETCH to support condensed and expanded text styles.
    Add SCI_LINEINDENT and SCI_LINEDEDENT. Feature #1524.
    Fix bug on Cocoa where double-click stopped working when system had been running for a long time.
    On Cocoa implement more values of font weight and stretch.

Release 5.4.0 ( https://www.scintilla.org/lexilla540.zip )

    Released 21 August 2024.
    Inside Lexilla, LexerModule instances are now const. This will require changes to applications that modify Lexilla.cxx, which may be done to add custom lexers.
    Lexer added for TOML "toml".
    Bash: Handle backslash in heredoc delimiter. Issue #257.
    Progress: Fix lexing of nested comments. Pull request #258.
    Force lower-casing of case-insensitive keyword lists so keywords match in some lexers. Issue #259.

Close #15564
2024-08-23 02:59:58 +02:00

508 lines
14 KiB
C++

// Scintilla source code edit control
/**
* @file LexJSON.cxx
* @date February 19, 2016
* @brief Lexer for JSON and JSON-LD formats
* @author nkmathew
*
* The License.txt file describes the conditions under which this software may
* be distributed.
*
*/
#include <cstdlib>
#include <cassert>
#include <cctype>
#include <cstdio>
#include <string>
#include <string_view>
#include <vector>
#include <map>
#include <functional>
#include "ILexer.h"
#include "Scintilla.h"
#include "SciLexer.h"
#include "WordList.h"
#include "LexAccessor.h"
#include "StyleContext.h"
#include "CharacterSet.h"
#include "LexerModule.h"
#include "OptionSet.h"
#include "DefaultLexer.h"
using namespace Scintilla;
using namespace Lexilla;
namespace {
const char *const JSONWordListDesc[] = {
"JSON Keywords",
"JSON-LD Keywords",
0
};
/**
* Used to detect compact IRI/URLs in JSON-LD without first looking ahead for the
* colon separating the prefix and suffix
*
* https://www.w3.org/TR/json-ld/#dfn-compact-iri
*/
struct CompactIRI {
int colonCount;
bool foundInvalidChar;
CharacterSet setCompactIRI;
CompactIRI() {
colonCount = 0;
foundInvalidChar = false;
setCompactIRI = CharacterSet(CharacterSet::setAlpha, "$_-");
}
void resetState() {
colonCount = 0;
foundInvalidChar = false;
}
void checkChar(int ch) {
if (ch == ':') {
colonCount++;
} else {
foundInvalidChar |= !setCompactIRI.Contains(ch);
}
}
bool shouldHighlight() const {
return !foundInvalidChar && colonCount == 1;
}
};
/**
* Keeps track of escaped characters in strings as per:
*
* https://tools.ietf.org/html/rfc7159#section-7
*/
struct EscapeSequence {
int digitsLeft;
CharacterSet setHexDigits;
CharacterSet setEscapeChars;
EscapeSequence() {
digitsLeft = 0;
setHexDigits = CharacterSet(CharacterSet::setDigits, "ABCDEFabcdef");
setEscapeChars = CharacterSet(CharacterSet::setNone, "\\\"tnbfru/");
}
// Returns true if the following character is a valid escaped character
bool newSequence(int nextChar) {
digitsLeft = 0;
if (nextChar == 'u') {
digitsLeft = 5;
} else if (!setEscapeChars.Contains(nextChar)) {
return false;
}
return true;
}
bool atEscapeEnd() const {
return digitsLeft <= 0;
}
bool isInvalidChar(int currChar) const {
return !setHexDigits.Contains(currChar);
}
};
struct OptionsJSON {
bool foldCompact;
bool fold;
bool allowComments;
bool escapeSequence;
OptionsJSON() {
foldCompact = false;
fold = false;
allowComments = false;
escapeSequence = false;
}
};
struct OptionSetJSON : public OptionSet<OptionsJSON> {
OptionSetJSON() {
DefineProperty("lexer.json.escape.sequence", &OptionsJSON::escapeSequence,
"Set to 1 to enable highlighting of escape sequences in strings");
DefineProperty("lexer.json.allow.comments", &OptionsJSON::allowComments,
"Set to 1 to enable highlighting of line/block comments in JSON");
DefineProperty("fold.compact", &OptionsJSON::foldCompact);
DefineProperty("fold", &OptionsJSON::fold);
DefineWordListSets(JSONWordListDesc);
}
};
class LexerJSON : public DefaultLexer {
OptionsJSON options;
OptionSetJSON optSetJSON;
EscapeSequence escapeSeq;
WordList keywordsJSON;
WordList keywordsJSONLD;
CharacterSet setOperators;
CharacterSet setURL;
CharacterSet setKeywordJSONLD;
CharacterSet setKeywordJSON;
CompactIRI compactIRI;
static bool IsNextNonWhitespace(LexAccessor &styler, Sci_Position start, char ch) {
Sci_Position i = 0;
while (i < 50) {
i++;
char curr = styler.SafeGetCharAt(start+i, '\0');
char next = styler.SafeGetCharAt(start+i+1, '\0');
bool atEOL = (curr == '\r' && next != '\n') || (curr == '\n');
if (curr == ch) {
return true;
} else if (!isspacechar(curr) || atEOL) {
return false;
}
}
return false;
}
/**
* Looks for the colon following the end quote
*
* Assumes property names of lengths no longer than a 100 characters.
* The colon is also expected to be less than 50 spaces after the end
* quote for the string to be considered a property name
*/
static bool AtPropertyName(LexAccessor &styler, Sci_Position start) {
Sci_Position i = 0;
bool escaped = false;
while (i < 100) {
i++;
char curr = styler.SafeGetCharAt(start+i, '\0');
if (escaped) {
escaped = false;
continue;
}
escaped = curr == '\\';
if (curr == '"') {
return IsNextNonWhitespace(styler, start+i, ':');
} else if (!curr) {
return false;
}
}
return false;
}
static bool IsNextWordInList(WordList &keywordList, CharacterSet wordSet,
StyleContext &context, LexAccessor &styler) {
char word[51];
Sci_Position currPos = (Sci_Position) context.currentPos;
int i = 0;
while (i < 50) {
char ch = styler.SafeGetCharAt(currPos + i);
if (!wordSet.Contains(ch)) {
break;
}
word[i] = ch;
i++;
}
word[i] = '\0';
return keywordList.InList(word);
}
public:
LexerJSON() :
DefaultLexer("json", SCLEX_JSON),
setOperators(CharacterSet::setNone, "[{}]:,"),
setURL(CharacterSet::setAlphaNum, "-._~:/?#[]@!$&'()*+,),="),
setKeywordJSONLD(CharacterSet::setAlpha, ":@"),
setKeywordJSON(CharacterSet::setAlpha, "$_") {
}
virtual ~LexerJSON() {}
int SCI_METHOD Version() const override {
return lvRelease5;
}
void SCI_METHOD Release() override {
delete this;
}
const char *SCI_METHOD PropertyNames() override {
return optSetJSON.PropertyNames();
}
int SCI_METHOD PropertyType(const char *name) override {
return optSetJSON.PropertyType(name);
}
const char *SCI_METHOD DescribeProperty(const char *name) override {
return optSetJSON.DescribeProperty(name);
}
Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override {
if (optSetJSON.PropertySet(&options, key, val)) {
return 0;
}
return -1;
}
const char * SCI_METHOD PropertyGet(const char *key) override {
return optSetJSON.PropertyGet(key);
}
Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override {
WordList *wordListN = 0;
switch (n) {
case 0:
wordListN = &keywordsJSON;
break;
case 1:
wordListN = &keywordsJSONLD;
break;
}
Sci_Position firstModification = -1;
if (wordListN) {
if (wordListN->Set(wl)) {
firstModification = 0;
}
}
return firstModification;
}
void *SCI_METHOD PrivateCall(int, void *) override {
return 0;
}
static ILexer5 *LexerFactoryJSON() {
return new LexerJSON;
}
const char *SCI_METHOD DescribeWordListSets() override {
return optSetJSON.DescribeWordListSets();
}
void SCI_METHOD Lex(Sci_PositionU startPos,
Sci_Position length,
int initStyle,
IDocument *pAccess) override;
void SCI_METHOD Fold(Sci_PositionU startPos,
Sci_Position length,
int initStyle,
IDocument *pAccess) override;
};
void SCI_METHOD LexerJSON::Lex(Sci_PositionU startPos,
Sci_Position length,
int initStyle,
IDocument *pAccess) {
LexAccessor styler(pAccess);
StyleContext context(startPos, length, initStyle, styler);
int stringStyleBefore = SCE_JSON_STRING;
while (context.More()) {
switch (context.state) {
case SCE_JSON_BLOCKCOMMENT:
if (context.Match("*/")) {
context.Forward();
context.ForwardSetState(SCE_JSON_DEFAULT);
}
break;
case SCE_JSON_LINECOMMENT:
if (context.MatchLineEnd()) {
context.SetState(SCE_JSON_DEFAULT);
}
break;
case SCE_JSON_STRINGEOL:
if (context.atLineStart) {
context.SetState(SCE_JSON_DEFAULT);
}
break;
case SCE_JSON_ESCAPESEQUENCE:
escapeSeq.digitsLeft--;
if (!escapeSeq.atEscapeEnd()) {
if (escapeSeq.isInvalidChar(context.ch)) {
context.SetState(SCE_JSON_ERROR);
}
break;
}
if (context.ch == '"') {
context.SetState(stringStyleBefore);
context.ForwardSetState(SCE_JSON_DEFAULT);
} else if (context.ch == '\\') {
if (!escapeSeq.newSequence(context.chNext)) {
context.SetState(SCE_JSON_ERROR);
}
context.Forward();
} else {
context.SetState(stringStyleBefore);
if (context.atLineEnd) {
context.ChangeState(SCE_JSON_STRINGEOL);
}
}
break;
case SCE_JSON_PROPERTYNAME:
case SCE_JSON_STRING:
if (context.ch == '"') {
if (compactIRI.shouldHighlight()) {
context.ChangeState(SCE_JSON_COMPACTIRI);
context.ForwardSetState(SCE_JSON_DEFAULT);
compactIRI.resetState();
} else {
context.ForwardSetState(SCE_JSON_DEFAULT);
}
} else if (context.atLineEnd) {
context.ChangeState(SCE_JSON_STRINGEOL);
} else if (context.ch == '\\') {
stringStyleBefore = context.state;
if (options.escapeSequence) {
context.SetState(SCE_JSON_ESCAPESEQUENCE);
if (!escapeSeq.newSequence(context.chNext)) {
context.SetState(SCE_JSON_ERROR);
}
}
context.Forward();
} else if (context.Match("https://") ||
context.Match("http://") ||
context.Match("ssh://") ||
context.Match("git://") ||
context.Match("svn://") ||
context.Match("ftp://") ||
context.Match("mailto:")) {
// Handle most common URI schemes only
stringStyleBefore = context.state;
context.SetState(SCE_JSON_URI);
} else if (context.ch == '@') {
// https://www.w3.org/TR/json-ld/#dfn-keyword
if (IsNextWordInList(keywordsJSONLD, setKeywordJSONLD, context, styler)) {
stringStyleBefore = context.state;
context.SetState(SCE_JSON_LDKEYWORD);
}
} else {
compactIRI.checkChar(context.ch);
}
break;
case SCE_JSON_LDKEYWORD:
case SCE_JSON_URI:
if ((!setKeywordJSONLD.Contains(context.ch) &&
(context.state == SCE_JSON_LDKEYWORD)) ||
(!setURL.Contains(context.ch))) {
context.SetState(stringStyleBefore);
}
if (context.ch == '"') {
context.ForwardSetState(SCE_JSON_DEFAULT);
} else if (context.atLineEnd) {
context.ChangeState(SCE_JSON_STRINGEOL);
}
break;
case SCE_JSON_OPERATOR:
case SCE_JSON_NUMBER:
context.SetState(SCE_JSON_DEFAULT);
break;
case SCE_JSON_ERROR:
if (context.MatchLineEnd()) {
context.SetState(SCE_JSON_DEFAULT);
}
break;
case SCE_JSON_KEYWORD:
if (!setKeywordJSON.Contains(context.ch)) {
context.SetState(SCE_JSON_DEFAULT);
}
break;
}
if (context.state == SCE_JSON_DEFAULT) {
if (context.ch == '"') {
compactIRI.resetState();
context.SetState(SCE_JSON_STRING);
Sci_Position currPos = static_cast<Sci_Position>(context.currentPos);
if (AtPropertyName(styler, currPos)) {
context.SetState(SCE_JSON_PROPERTYNAME);
}
} else if (setOperators.Contains(context.ch)) {
context.SetState(SCE_JSON_OPERATOR);
} else if (options.allowComments && context.Match("/*")) {
context.SetState(SCE_JSON_BLOCKCOMMENT);
context.Forward();
} else if (options.allowComments && context.Match("//")) {
context.SetState(SCE_JSON_LINECOMMENT);
} else if (setKeywordJSON.Contains(context.ch)) {
if (IsNextWordInList(keywordsJSON, setKeywordJSON, context, styler)) {
context.SetState(SCE_JSON_KEYWORD);
}
}
bool numberStart =
IsADigit(context.ch) && (context.chPrev == '+'||
context.chPrev == '-' ||
context.atLineStart ||
IsASpace(context.chPrev) ||
setOperators.Contains(context.chPrev));
bool exponentPart =
tolower(context.ch) == 'e' &&
IsADigit(context.chPrev) &&
(IsADigit(context.chNext) ||
context.chNext == '+' ||
context.chNext == '-');
bool signPart =
(context.ch == '-' || context.ch == '+') &&
((tolower(context.chPrev) == 'e' && IsADigit(context.chNext)) ||
((IsASpace(context.chPrev) || setOperators.Contains(context.chPrev))
&& IsADigit(context.chNext)));
bool adjacentDigit =
IsADigit(context.ch) && IsADigit(context.chPrev);
bool afterExponent = IsADigit(context.ch) && tolower(context.chPrev) == 'e';
bool dotPart = context.ch == '.' &&
IsADigit(context.chPrev) &&
IsADigit(context.chNext);
bool afterDot = IsADigit(context.ch) && context.chPrev == '.';
if (numberStart ||
exponentPart ||
signPart ||
adjacentDigit ||
dotPart ||
afterExponent ||
afterDot) {
context.SetState(SCE_JSON_NUMBER);
} else if (context.state == SCE_JSON_DEFAULT && !IsASpace(context.ch)) {
context.SetState(SCE_JSON_ERROR);
}
}
context.Forward();
}
context.Complete();
}
void SCI_METHOD LexerJSON::Fold(Sci_PositionU startPos,
Sci_Position length,
int,
IDocument *pAccess) {
if (!options.fold) {
return;
}
LexAccessor styler(pAccess);
Sci_PositionU currLine = styler.GetLine(startPos);
Sci_PositionU endPos = startPos + length;
int currLevel = SC_FOLDLEVELBASE;
if (currLine > 0)
currLevel = styler.LevelAt(currLine - 1) >> 16;
int nextLevel = currLevel;
int visibleChars = 0;
for (Sci_PositionU i = startPos; i < endPos; i++) {
char curr = styler.SafeGetCharAt(i);
char next = styler.SafeGetCharAt(i+1);
bool atEOL = (curr == '\r' && next != '\n') || (curr == '\n');
if (styler.StyleAt(i) == SCE_JSON_OPERATOR) {
if (curr == '{' || curr == '[') {
nextLevel++;
} else if (curr == '}' || curr == ']') {
nextLevel--;
}
}
if (atEOL || i == (endPos-1)) {
int level = currLevel | nextLevel << 16;
if (!visibleChars && options.foldCompact) {
level |= SC_FOLDLEVELWHITEFLAG;
} else if (nextLevel > currLevel) {
level |= SC_FOLDLEVELHEADERFLAG;
}
if (level != styler.LevelAt(currLine)) {
styler.SetLevel(currLine, level);
}
currLine++;
currLevel = nextLevel;
visibleChars = 0;
}
if (!isspacechar(curr)) {
visibleChars++;
}
}
}
}
extern const LexerModule lmJSON(SCLEX_JSON,
LexerJSON::LexerFactoryJSON,
"json",
JSONWordListDesc);