Christian Grasser ad79718fc8 Update to scintilla 5.5.2 & Lexilla 5.4.0
Release 5.5.2 ( https://www.scintilla.org/scintilla552.zip )

    Released 21 August 2024.
    Add SCI_SETCOPYSEPARATOR for separator between parts of a multiple selection when copied to the clipboard. Feature #1530.
    Add SCI_GETUNDOSEQUENCE to determine whether an undo sequence is active and its nesting depth.
    Add SCI_STYLESETSTRETCH to support condensed and expanded text styles.
    Add SCI_LINEINDENT and SCI_LINEDEDENT. Feature #1524.
    Fix bug on Cocoa where double-click stopped working when system had been running for a long time.
    On Cocoa implement more values of font weight and stretch.

Release 5.4.0 ( https://www.scintilla.org/lexilla540.zip )

    Released 21 August 2024.
    Inside Lexilla, LexerModule instances are now const. This will require changes to applications that modify Lexilla.cxx, which may be done to add custom lexers.
    Lexer added for TOML "toml".
    Bash: Handle backslash in heredoc delimiter. Issue #257.
    Progress: Fix lexing of nested comments. Pull request #258.
    Force lower-casing of case-insensitive keyword lists so keywords match in some lexers. Issue #259.

Close #15564
2024-08-23 02:59:58 +02:00

480 lines
14 KiB
C++

// Scintilla source code edit control
/** @file LexAsm.cxx
** Lexer for Assembler, just for the MASM syntax
** Written by The Black Horus
** Enhancements and NASM stuff by Kein-Hong Man, 2003-10
** SCE_ASM_COMMENTBLOCK and SCE_ASM_CHARACTER are for future GNU as colouring
** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net>
**/
// Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>
// The License.txt file describes the conditions under which this software may be distributed.
#include <cstdlib>
#include <cassert>
#include <cstring>
#include <cctype>
#include <cstdio>
#include <cstdarg>
#include <string>
#include <string_view>
#include <map>
#include <set>
#include <functional>
#include "ILexer.h"
#include "Scintilla.h"
#include "SciLexer.h"
#include "WordList.h"
#include "LexAccessor.h"
#include "StyleContext.h"
#include "CharacterSet.h"
#include "LexerModule.h"
#include "OptionSet.h"
#include "DefaultLexer.h"
using namespace Scintilla;
using namespace Lexilla;
namespace {
bool IsAWordChar(const int ch) noexcept {
return (ch < 0x80) && (isalnum(ch) || ch == '.' ||
ch == '_' || ch == '?');
}
bool IsAWordStart(const int ch) noexcept {
return (ch < 0x80) && (isalnum(ch) || ch == '_' || ch == '.' ||
ch == '%' || ch == '@' || ch == '$' || ch == '?');
}
bool IsAsmOperator(const int ch) noexcept {
if ((ch < 0x80) && (isalnum(ch)))
return false;
// '.' left out as it is used to make up numbers
if (ch == '*' || ch == '/' || ch == '-' || ch == '+' ||
ch == '(' || ch == ')' || ch == '=' || ch == '^' ||
ch == '[' || ch == ']' || ch == '<' || ch == '&' ||
ch == '>' || ch == ',' || ch == '|' || ch == '~' ||
ch == '%' || ch == ':')
return true;
return false;
}
constexpr bool IsStreamCommentStyle(int style) noexcept {
return style == SCE_ASM_COMMENTDIRECTIVE || style == SCE_ASM_COMMENTBLOCK;
}
// An individual named option for use in an OptionSet
// Options used for LexerAsm
struct OptionsAsm {
std::string delimiter;
bool fold;
bool foldSyntaxBased;
bool foldCommentMultiline;
bool foldCommentExplicit;
std::string foldExplicitStart;
std::string foldExplicitEnd;
bool foldExplicitAnywhere;
bool foldCompact;
std::string commentChar;
OptionsAsm() {
delimiter = "";
fold = false;
foldSyntaxBased = true;
foldCommentMultiline = false;
foldCommentExplicit = false;
foldExplicitStart = "";
foldExplicitEnd = "";
foldExplicitAnywhere = false;
foldCompact = true;
commentChar = "";
}
};
const char *const asmWordListDesc[] = {
"CPU instructions",
"FPU instructions",
"Registers",
"Directives",
"Directive operands",
"Extended instructions",
"Directives4Foldstart",
"Directives4Foldend",
nullptr
};
struct OptionSetAsm : public OptionSet<OptionsAsm> {
OptionSetAsm() {
DefineProperty("lexer.asm.comment.delimiter", &OptionsAsm::delimiter,
"Character used for COMMENT directive's delimiter, replacing the standard \"~\".");
DefineProperty("fold", &OptionsAsm::fold);
DefineProperty("fold.asm.syntax.based", &OptionsAsm::foldSyntaxBased,
"Set this property to 0 to disable syntax based folding.");
DefineProperty("fold.asm.comment.multiline", &OptionsAsm::foldCommentMultiline,
"Set this property to 1 to enable folding multi-line comments.");
DefineProperty("fold.asm.comment.explicit", &OptionsAsm::foldCommentExplicit,
"This option enables folding explicit fold points when using the Asm lexer. "
"Explicit fold points allows adding extra folding by placing a ;{ comment at the start and a ;} "
"at the end of a section that should fold.");
DefineProperty("fold.asm.explicit.start", &OptionsAsm::foldExplicitStart,
"The string to use for explicit fold start points, replacing the standard ;{.");
DefineProperty("fold.asm.explicit.end", &OptionsAsm::foldExplicitEnd,
"The string to use for explicit fold end points, replacing the standard ;}.");
DefineProperty("fold.asm.explicit.anywhere", &OptionsAsm::foldExplicitAnywhere,
"Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
DefineProperty("fold.compact", &OptionsAsm::foldCompact);
DefineProperty("lexer.as.comment.character", &OptionsAsm::commentChar,
"Overrides the default comment character (which is ';' for asm and '#' for as).");
DefineWordListSets(asmWordListDesc);
}
};
class LexerAsm : public DefaultLexer {
WordList cpuInstruction;
WordList mathInstruction;
WordList registers;
WordList directive;
WordList directiveOperand;
WordList extInstruction;
WordList directives4foldstart;
WordList directives4foldend;
OptionsAsm options;
OptionSetAsm osAsm;
int commentChar;
public:
LexerAsm(const char *languageName_, int language_, int commentChar_) : DefaultLexer(languageName_, language_) {
commentChar = commentChar_;
}
virtual ~LexerAsm() {
}
void SCI_METHOD Release() override {
delete this;
}
int SCI_METHOD Version() const override {
return lvRelease5;
}
const char * SCI_METHOD PropertyNames() override {
return osAsm.PropertyNames();
}
int SCI_METHOD PropertyType(const char *name) override {
return osAsm.PropertyType(name);
}
const char * SCI_METHOD DescribeProperty(const char *name) override {
return osAsm.DescribeProperty(name);
}
Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
const char * SCI_METHOD PropertyGet(const char *key) override {
return osAsm.PropertyGet(key);
}
const char * SCI_METHOD DescribeWordListSets() override {
return osAsm.DescribeWordListSets();
}
Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
void * SCI_METHOD PrivateCall(int, void *) override {
return nullptr;
}
static ILexer5 *LexerFactoryAsm() {
return new LexerAsm("asm", SCLEX_ASM, ';');
}
static ILexer5 *LexerFactoryAs() {
return new LexerAsm("as", SCLEX_AS, '#');
}
};
Sci_Position SCI_METHOD LexerAsm::PropertySet(const char *key, const char *val) {
if (osAsm.PropertySet(&options, key, val)) {
return 0;
}
return -1;
}
Sci_Position SCI_METHOD LexerAsm::WordListSet(int n, const char *wl) {
WordList *wordListN = nullptr;
switch (n) {
case 0:
wordListN = &cpuInstruction;
break;
case 1:
wordListN = &mathInstruction;
break;
case 2:
wordListN = &registers;
break;
case 3:
wordListN = &directive;
break;
case 4:
wordListN = &directiveOperand;
break;
case 5:
wordListN = &extInstruction;
break;
case 6:
wordListN = &directives4foldstart;
break;
case 7:
wordListN = &directives4foldend;
break;
}
Sci_Position firstModification = -1;
if (wordListN) {
if (wordListN->Set(wl, true)) {
firstModification = 0;
}
}
return firstModification;
}
void SCI_METHOD LexerAsm::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
LexAccessor styler(pAccess);
const char commentCharacter = options.commentChar.empty() ?
commentChar : options.commentChar.front();
// Do not leak onto next line
if (initStyle == SCE_ASM_STRINGEOL)
initStyle = SCE_ASM_DEFAULT;
StyleContext sc(startPos, length, initStyle, styler);
for (; sc.More(); sc.Forward())
{
if (sc.atLineStart) {
switch (sc.state) {
case SCE_ASM_STRING:
case SCE_ASM_CHARACTER:
// Prevent SCE_ASM_STRINGEOL from leaking back to previous line
sc.SetState(sc.state);
break;
case SCE_ASM_COMMENT:
sc.SetState(SCE_ASM_DEFAULT);
break;
default:
break;
}
}
// Handle line continuation generically.
if (sc.ch == '\\') {
if (sc.chNext == '\n' || sc.chNext == '\r') {
sc.Forward();
if (sc.ch == '\r' && sc.chNext == '\n') {
sc.Forward();
}
continue;
}
}
// Determine if the current state should terminate.
if (sc.state == SCE_ASM_OPERATOR) {
if (!IsAsmOperator(sc.ch)) {
sc.SetState(SCE_ASM_DEFAULT);
}
} else if (sc.state == SCE_ASM_NUMBER) {
if (!IsAWordChar(sc.ch)) {
sc.SetState(SCE_ASM_DEFAULT);
}
} else if (sc.state == SCE_ASM_IDENTIFIER) {
if (!IsAWordChar(sc.ch) ) {
char s[100];
sc.GetCurrentLowered(s, sizeof(s));
bool IsDirective = false;
if (cpuInstruction.InList(s)) {
sc.ChangeState(SCE_ASM_CPUINSTRUCTION);
} else if (mathInstruction.InList(s)) {
sc.ChangeState(SCE_ASM_MATHINSTRUCTION);
} else if (registers.InList(s)) {
sc.ChangeState(SCE_ASM_REGISTER);
} else if (directive.InList(s)) {
sc.ChangeState(SCE_ASM_DIRECTIVE);
IsDirective = true;
} else if (directiveOperand.InList(s)) {
sc.ChangeState(SCE_ASM_DIRECTIVEOPERAND);
} else if (extInstruction.InList(s)) {
sc.ChangeState(SCE_ASM_EXTINSTRUCTION);
}
sc.SetState(SCE_ASM_DEFAULT);
if (IsDirective && !strcmp(s, "comment")) {
const char delimiter = options.delimiter.empty() ? '~' : options.delimiter.c_str()[0];
while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd) {
sc.ForwardSetState(SCE_ASM_DEFAULT);
}
if (sc.ch == delimiter) {
sc.SetState(SCE_ASM_COMMENTDIRECTIVE);
}
}
}
} else if (sc.state == SCE_ASM_COMMENTDIRECTIVE) {
const char delimiter = options.delimiter.empty() ? '~' : options.delimiter.c_str()[0];
if (sc.ch == delimiter) {
while (!sc.MatchLineEnd()) {
sc.Forward();
}
sc.SetState(SCE_ASM_DEFAULT);
}
} else if (sc.state == SCE_ASM_STRING) {
if (sc.ch == '\\') {
if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
sc.Forward();
}
} else if (sc.ch == '\"') {
sc.ForwardSetState(SCE_ASM_DEFAULT);
} else if (sc.atLineEnd) {
sc.ChangeState(SCE_ASM_STRINGEOL);
sc.ForwardSetState(SCE_ASM_DEFAULT);
}
} else if (sc.state == SCE_ASM_CHARACTER) {
if (sc.ch == '\\') {
if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
sc.Forward();
}
} else if (sc.ch == '\'') {
sc.ForwardSetState(SCE_ASM_DEFAULT);
} else if (sc.atLineEnd) {
sc.ChangeState(SCE_ASM_STRINGEOL);
sc.ForwardSetState(SCE_ASM_DEFAULT);
}
}
// Determine if a new state should be entered.
if (sc.state == SCE_ASM_DEFAULT) {
if (sc.ch == commentCharacter) {
sc.SetState(SCE_ASM_COMMENT);
} else if (IsASCII(sc.ch) && (isdigit(sc.ch) || (sc.ch == '.' && IsASCII(sc.chNext) && isdigit(sc.chNext)))) {
sc.SetState(SCE_ASM_NUMBER);
} else if (IsAWordStart(sc.ch)) {
sc.SetState(SCE_ASM_IDENTIFIER);
} else if (sc.ch == '\"') {
sc.SetState(SCE_ASM_STRING);
} else if (sc.ch == '\'') {
sc.SetState(SCE_ASM_CHARACTER);
} else if (IsAsmOperator(sc.ch)) {
sc.SetState(SCE_ASM_OPERATOR);
}
}
}
sc.Complete();
}
// Store both the current line's fold level and the next lines in the
// level store to make it easy to pick up with each increment
// and to make it possible to fiddle the current level for "else".
void SCI_METHOD LexerAsm::Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
if (!options.fold)
return;
LexAccessor styler(pAccess);
const Sci_PositionU endPos = startPos + length;
int visibleChars = 0;
Sci_Position lineCurrent = styler.GetLine(startPos);
int levelCurrent = SC_FOLDLEVELBASE;
if (lineCurrent > 0)
levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
int levelNext = levelCurrent;
char chNext = styler[startPos];
int styleNext = styler.StyleAt(startPos);
int style = initStyle;
char word[100]{};
int wordlen = 0;
const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
for (Sci_PositionU i = startPos; i < endPos; i++) {
const char ch = chNext;
chNext = styler.SafeGetCharAt(i + 1);
const int stylePrev = style;
style = styleNext;
styleNext = styler.StyleAt(i + 1);
const bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
if (options.foldCommentMultiline && IsStreamCommentStyle(style)) {
if (!IsStreamCommentStyle(stylePrev)) {
levelNext++;
} else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
// Comments don't end at end of line and the next character may be unstyled.
levelNext--;
}
}
if (options.foldCommentExplicit && ((style == SCE_ASM_COMMENT) || options.foldExplicitAnywhere)) {
if (userDefinedFoldMarkers) {
if (styler.Match(i, options.foldExplicitStart.c_str())) {
levelNext++;
} else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
levelNext--;
}
} else {
if (ch == ';') {
if (chNext == '{') {
levelNext++;
} else if (chNext == '}') {
levelNext--;
}
}
}
}
if (options.foldSyntaxBased && (style == SCE_ASM_DIRECTIVE)) {
word[wordlen++] = MakeLowerCase(ch);
if (wordlen == 100) { // prevent overflow
word[0] = '\0';
wordlen = 1;
}
if (styleNext != SCE_ASM_DIRECTIVE) { // reading directive ready
word[wordlen] = '\0';
wordlen = 0;
if (directives4foldstart.InList(word)) {
levelNext++;
} else if (directives4foldend.InList(word)){
levelNext--;
}
}
}
if (!IsASpace(ch))
visibleChars++;
if (atEOL || (i == endPos-1)) {
const int levelUse = levelCurrent;
int lev = levelUse | levelNext << 16;
if (visibleChars == 0 && options.foldCompact)
lev |= SC_FOLDLEVELWHITEFLAG;
if (levelUse < levelNext)
lev |= SC_FOLDLEVELHEADERFLAG;
if (lev != styler.LevelAt(lineCurrent)) {
styler.SetLevel(lineCurrent, lev);
}
lineCurrent++;
levelCurrent = levelNext;
if (atEOL && (i == static_cast<Sci_PositionU>(styler.Length() - 1))) {
// There is an empty line at end of file so give it same level and empty
styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
}
visibleChars = 0;
}
}
}
}
extern const LexerModule lmAsm(SCLEX_ASM, LexerAsm::LexerFactoryAsm, "asm", asmWordListDesc);
extern const LexerModule lmAs(SCLEX_AS, LexerAsm::LexerFactoryAs, "as", asmWordListDesc);