Christian Grasser ad79718fc8 Update to scintilla 5.5.2 & Lexilla 5.4.0
Release 5.5.2 ( https://www.scintilla.org/scintilla552.zip )

    Released 21 August 2024.
    Add SCI_SETCOPYSEPARATOR for separator between parts of a multiple selection when copied to the clipboard. Feature #1530.
    Add SCI_GETUNDOSEQUENCE to determine whether an undo sequence is active and its nesting depth.
    Add SCI_STYLESETSTRETCH to support condensed and expanded text styles.
    Add SCI_LINEINDENT and SCI_LINEDEDENT. Feature #1524.
    Fix bug on Cocoa where double-click stopped working when system had been running for a long time.
    On Cocoa implement more values of font weight and stretch.

Release 5.4.0 ( https://www.scintilla.org/lexilla540.zip )

    Released 21 August 2024.
    Inside Lexilla, LexerModule instances are now const. This will require changes to applications that modify Lexilla.cxx, which may be done to add custom lexers.
    Lexer added for TOML "toml".
    Bash: Handle backslash in heredoc delimiter. Issue #257.
    Progress: Fix lexing of nested comments. Pull request #258.
    Force lower-casing of case-insensitive keyword lists so keywords match in some lexers. Issue #259.

Close #15564
2024-08-23 02:59:58 +02:00

388 lines
13 KiB
C++

// Scintilla source code edit control
/** @file LexCOBOL.cxx
** Lexer for COBOL
** Based on LexPascal.cxx
** Written by Laurent le Tynevez
** Updated by Simon Steele <s.steele@pnotepad.org> September 2002
** Updated by Mathias Rauen <scite@madshi.net> May 2003 (Delphi adjustments)
** Updated by Rod Falck, Aug 2006 Converted to COBOL
**/
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <stdarg.h>
#include <assert.h>
#include <ctype.h>
#include <string>
#include <string_view>
#include "ILexer.h"
#include "Scintilla.h"
#include "SciLexer.h"
#include "WordList.h"
#include "LexAccessor.h"
#include "Accessor.h"
#include "StyleContext.h"
#include "CharacterSet.h"
#include "LexerModule.h"
using namespace Lexilla;
#define IN_DIVISION 0x01
#define IN_DECLARATIVES 0x02
#define IN_SECTION 0x04
#define IN_PARAGRAPH 0x08
#define IN_FLAGS 0xF
#define NOT_HEADER 0x10
inline bool isCOBOLoperator(char ch)
{
return isoperator(ch);
}
inline bool isCOBOLwordchar(char ch)
{
return IsASCII(ch) && (isalnum(ch) || ch == '-');
}
inline bool isCOBOLwordstart(char ch)
{
return IsASCII(ch) && isalnum(ch);
}
static int CountBits(int nBits)
{
int count = 0;
for (int i = 0; i < 32; ++i)
{
count += nBits & 1;
nBits >>= 1;
}
return count;
}
static void getRange(Sci_PositionU start,
Sci_PositionU end,
Accessor &styler,
char *s,
Sci_PositionU len) {
Sci_PositionU i = 0;
while ((i < end - start + 1) && (i < len-1)) {
s[i] = static_cast<char>(tolower(styler[start + i]));
i++;
}
s[i] = '\0';
}
static void ColourTo(Accessor &styler, Sci_PositionU end, unsigned int attr) {
styler.ColourTo(end, attr);
}
static int classifyWordCOBOL(Sci_PositionU start, Sci_PositionU end, /*WordList &keywords*/WordList *keywordlists[], Accessor &styler, int nContainment, bool *bAarea) {
int ret = 0;
char s[100];
s[0] = '\0';
s[1] = '\0';
getRange(start, end, styler, s, sizeof(s));
int chAttr = SCE_C_IDENTIFIER;
if (isdigit(s[0]) || (s[0] == '.') || (s[0] == 'v')) {
chAttr = SCE_C_NUMBER;
char *p = s + 1;
while (*p) {
if ((!isdigit(*p) && (*p) != 'v') && isCOBOLwordchar(*p)) {
chAttr = SCE_C_IDENTIFIER;
break;
}
++p;
}
}
if (chAttr == SCE_C_IDENTIFIER) {
WordList& a_keywords = *keywordlists[0];
WordList& b_keywords = *keywordlists[1];
WordList& c_keywords = *keywordlists[2];
if (a_keywords.InList(s)) {
chAttr = SCE_C_WORD;
}
else if (b_keywords.InList(s)) {
chAttr = SCE_C_WORD2;
}
else if (c_keywords.InList(s)) {
chAttr = SCE_C_UUID;
}
}
if (*bAarea) {
if (strcmp(s, "division") == 0) {
ret = IN_DIVISION;
// we've determined the containment, anything else is just ignored for those purposes
*bAarea = false;
} else if (strcmp(s, "declaratives") == 0) {
ret = IN_DIVISION | IN_DECLARATIVES;
if (nContainment & IN_DECLARATIVES)
ret |= NOT_HEADER | IN_SECTION;
// we've determined the containment, anything else is just ignored for those purposes
*bAarea = false;
} else if (strcmp(s, "section") == 0) {
ret = (nContainment &~ IN_PARAGRAPH) | IN_SECTION;
// we've determined the containment, anything else is just ignored for those purposes
*bAarea = false;
} else if (strcmp(s, "end") == 0 && (nContainment & IN_DECLARATIVES)) {
ret = IN_DIVISION | IN_DECLARATIVES | IN_SECTION | NOT_HEADER;
} else {
ret = nContainment | IN_PARAGRAPH;
}
}
ColourTo(styler, end, chAttr);
return ret;
}
static void ColouriseCOBOLDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[],
Accessor &styler) {
styler.StartAt(startPos);
int state = initStyle;
if (state == SCE_C_CHARACTER) // Does not leak onto next line
state = SCE_C_DEFAULT;
char chPrev = ' ';
char chNext = styler[startPos];
Sci_PositionU lengthDoc = startPos + length;
int nContainment;
Sci_Position currentLine = styler.GetLine(startPos);
if (currentLine > 0) {
styler.SetLineState(currentLine, styler.GetLineState(currentLine-1));
nContainment = styler.GetLineState(currentLine);
nContainment &= ~NOT_HEADER;
} else {
styler.SetLineState(currentLine, 0);
nContainment = 0;
}
styler.StartSegment(startPos);
bool bNewLine = true;
bool bAarea = !isspacechar(chNext);
int column = 0;
for (Sci_PositionU i = startPos; i < lengthDoc; i++) {
char ch = chNext;
chNext = styler.SafeGetCharAt(i + 1);
++column;
if (bNewLine) {
column = 0;
}
if (column <= 1 && !bAarea) {
bAarea = !isspacechar(ch);
}
bool bSetNewLine = false;
if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
// Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
// Avoid triggering two times on Dos/Win
// End of line
if (state == SCE_C_CHARACTER) {
ColourTo(styler, i, state);
state = SCE_C_DEFAULT;
}
styler.SetLineState(currentLine, nContainment);
currentLine++;
bSetNewLine = true;
if (nContainment & NOT_HEADER)
nContainment &= ~(NOT_HEADER | IN_DECLARATIVES | IN_SECTION);
}
if (styler.IsLeadByte(ch)) {
chNext = styler.SafeGetCharAt(i + 2);
chPrev = ' ';
i += 1;
continue;
}
if (state == SCE_C_DEFAULT) {
if (isCOBOLwordstart(ch) || (ch == '$' && IsASCII(chNext) && isalpha(chNext))) {
ColourTo(styler, i-1, state);
state = SCE_C_IDENTIFIER;
} else if (column == 6 && (ch == '*' || ch == '/')) {
// Cobol comment line: asterisk in column 7.
ColourTo(styler, i-1, state);
state = SCE_C_COMMENTLINE;
} else if (ch == '*' && chNext == '>') {
// Cobol inline comment: asterisk, followed by greater than.
ColourTo(styler, i-1, state);
state = SCE_C_COMMENTLINE;
} else if (column == 0 && ch == '*' && chNext != '*') {
ColourTo(styler, i-1, state);
state = SCE_C_COMMENTLINE;
} else if (column == 0 && ch == '/' && chNext != '*') {
ColourTo(styler, i-1, state);
state = SCE_C_COMMENTLINE;
} else if (column == 0 && ch == '*' && chNext == '*') {
ColourTo(styler, i-1, state);
state = SCE_C_COMMENTDOC;
} else if (column == 0 && ch == '/' && chNext == '*') {
ColourTo(styler, i-1, state);
state = SCE_C_COMMENTDOC;
} else if (ch == '"') {
ColourTo(styler, i-1, state);
state = SCE_C_STRING;
} else if (ch == '\'') {
ColourTo(styler, i-1, state);
state = SCE_C_CHARACTER;
} else if (ch == '?' && column == 0) {
ColourTo(styler, i-1, state);
state = SCE_C_PREPROCESSOR;
} else if (isCOBOLoperator(ch)) {
ColourTo(styler, i-1, state);
ColourTo(styler, i, SCE_C_OPERATOR);
}
} else if (state == SCE_C_IDENTIFIER) {
if (!isCOBOLwordchar(ch)) {
int lStateChange = classifyWordCOBOL(styler.GetStartSegment(), i - 1, keywordlists, styler, nContainment, &bAarea);
if(lStateChange != 0) {
styler.SetLineState(currentLine, lStateChange);
nContainment = lStateChange;
}
state = SCE_C_DEFAULT;
chNext = styler.SafeGetCharAt(i + 1);
if (column == 6 && (ch == '*' || ch == '/')) {
state = SCE_C_COMMENTLINE;
} else if (ch == '"') {
state = SCE_C_STRING;
} else if (ch == '\'') {
state = SCE_C_CHARACTER;
} else if (isCOBOLoperator(ch)) {
ColourTo(styler, i, SCE_C_OPERATOR);
}
}
} else {
if (state == SCE_C_PREPROCESSOR) {
if ((ch == '\r' || ch == '\n') && !(chPrev == '\\' || chPrev == '\r')) {
ColourTo(styler, i-1, state);
state = SCE_C_DEFAULT;
}
} else if (state == SCE_C_COMMENT) {
if (ch == '\r' || ch == '\n') {
ColourTo(styler, i-1, state);
state = SCE_C_DEFAULT;
}
} else if (state == SCE_C_COMMENTDOC) {
if (ch == '\r' || ch == '\n') {
if (((i > styler.GetStartSegment() + 2) || (
(initStyle == SCE_C_COMMENTDOC) &&
(styler.GetStartSegment() == static_cast<Sci_PositionU>(startPos))))) {
ColourTo(styler, i-1, state);
state = SCE_C_DEFAULT;
}
}
} else if (state == SCE_C_COMMENTLINE) {
if (ch == '\r' || ch == '\n') {
ColourTo(styler, i-1, state);
state = SCE_C_DEFAULT;
}
} else if (state == SCE_C_STRING) {
if (ch == '"') {
ColourTo(styler, i, state);
state = SCE_C_DEFAULT;
} else if (ch == '\r' || ch == '\n') {
ColourTo(styler, i-1, state);
state = SCE_C_DEFAULT;
}
} else if (state == SCE_C_CHARACTER) {
if (ch == '\'') {
ColourTo(styler, i, state);
state = SCE_C_DEFAULT;
}
}
}
chPrev = ch;
bNewLine = bSetNewLine;
if (bNewLine)
{
bAarea = false;
}
}
ColourTo(styler, lengthDoc - 1, state);
}
static void FoldCOBOLDoc(Sci_PositionU startPos, Sci_Position length, int, WordList *[],
Accessor &styler) {
bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
Sci_PositionU endPos = startPos + length;
int visibleChars = 0;
Sci_Position lineCurrent = styler.GetLine(startPos);
int levelPrev = lineCurrent > 0 ? styler.LevelAt(lineCurrent - 1) & SC_FOLDLEVELNUMBERMASK : 0xFFF;
char chNext = styler[startPos];
bool bNewLine = true;
bool bAarea = !isspacechar(chNext);
int column = 0;
bool bComment = false;
for (Sci_PositionU i = startPos; i < endPos; i++) {
char ch = chNext;
chNext = styler.SafeGetCharAt(i + 1);
++column;
if (bNewLine) {
column = 0;
bComment = (ch == '*' || ch == '/' || ch == '?');
}
if (column <= 1 && !bAarea) {
bAarea = !isspacechar(ch);
}
bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
if (atEOL) {
int nContainment = styler.GetLineState(lineCurrent);
int lev = CountBits(nContainment & IN_FLAGS) | SC_FOLDLEVELBASE;
if (bAarea && !bComment)
--lev;
if (visibleChars == 0 && foldCompact)
lev |= SC_FOLDLEVELWHITEFLAG;
if ((bAarea) && (visibleChars > 0) && !(nContainment & NOT_HEADER) && !bComment)
lev |= SC_FOLDLEVELHEADERFLAG;
if (lev != styler.LevelAt(lineCurrent)) {
styler.SetLevel(lineCurrent, lev);
}
if ((lev & SC_FOLDLEVELNUMBERMASK) <= (levelPrev & SC_FOLDLEVELNUMBERMASK)) {
// this level is at the same level or less than the previous line
// therefore these is nothing for the previous header to collapse, so remove the header
styler.SetLevel(lineCurrent - 1, levelPrev & ~SC_FOLDLEVELHEADERFLAG);
}
levelPrev = lev;
visibleChars = 0;
bAarea = false;
bNewLine = true;
lineCurrent++;
} else {
bNewLine = false;
}
if (!isspacechar(ch))
visibleChars++;
}
// Fill in the real level of the next line, keeping the current flags as they will be filled in later
int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
styler.SetLevel(lineCurrent, levelPrev | flagsNext);
}
static const char * const COBOLWordListDesc[] = {
"A Keywords",
"B Keywords",
"Extended Keywords",
0
};
extern const LexerModule lmCOBOL(SCLEX_COBOL, ColouriseCOBOLDoc, "COBOL", FoldCOBOLDoc, COBOLWordListDesc);