mirror of
https://github.com/notepad-plus-plus/notepad-plus-plus.git
synced 2025-07-31 01:34:58 +02:00
Release 5.5.2 ( https://www.scintilla.org/scintilla552.zip ) Released 21 August 2024. Add SCI_SETCOPYSEPARATOR for separator between parts of a multiple selection when copied to the clipboard. Feature #1530. Add SCI_GETUNDOSEQUENCE to determine whether an undo sequence is active and its nesting depth. Add SCI_STYLESETSTRETCH to support condensed and expanded text styles. Add SCI_LINEINDENT and SCI_LINEDEDENT. Feature #1524. Fix bug on Cocoa where double-click stopped working when system had been running for a long time. On Cocoa implement more values of font weight and stretch. Release 5.4.0 ( https://www.scintilla.org/lexilla540.zip ) Released 21 August 2024. Inside Lexilla, LexerModule instances are now const. This will require changes to applications that modify Lexilla.cxx, which may be done to add custom lexers. Lexer added for TOML "toml". Bash: Handle backslash in heredoc delimiter. Issue #257. Progress: Fix lexing of nested comments. Pull request #258. Force lower-casing of case-insensitive keyword lists so keywords match in some lexers. Issue #259. Close #15564
207 lines
7.2 KiB
C++
207 lines
7.2 KiB
C++
// Scintilla source code edit control
|
|
/** @file LexStata.cxx
|
|
** Lexer for Stata
|
|
**/
|
|
// Author: Luke Rasmussen (luke.rasmussen@gmail.com)
|
|
//
|
|
// The License.txt file describes the conditions under which this software may
|
|
// be distributed.
|
|
//
|
|
// Developed as part of the StatTag project at Northwestern University Feinberg
|
|
// School of Medicine with funding from Northwestern University Clinical and
|
|
// Translational Sciences Institute through CTSA grant UL1TR001422. This work
|
|
// has not been reviewed or endorsed by NCATS or the NIH.
|
|
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <stdio.h>
|
|
#include <stdarg.h>
|
|
#include <assert.h>
|
|
#include <ctype.h>
|
|
|
|
#include <string>
|
|
#include <string_view>
|
|
|
|
#include "ILexer.h"
|
|
#include "Scintilla.h"
|
|
#include "SciLexer.h"
|
|
|
|
#include "WordList.h"
|
|
#include "LexAccessor.h"
|
|
#include "Accessor.h"
|
|
#include "StyleContext.h"
|
|
#include "CharacterSet.h"
|
|
#include "LexerModule.h"
|
|
|
|
using namespace Lexilla;
|
|
|
|
static void ColouriseStataDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[],
|
|
Accessor &styler) {
|
|
|
|
WordList &keywords = *keywordlists[0];
|
|
WordList &types = *keywordlists[1];
|
|
|
|
CharacterSet setCouldBePostOp(CharacterSet::setNone, "+-");
|
|
CharacterSet setWordStart(CharacterSet::setAlpha, "_", 0x80, true);
|
|
CharacterSet setWord(CharacterSet::setAlphaNum, "._", 0x80, true);
|
|
|
|
StyleContext sc(startPos, length, initStyle, styler);
|
|
bool lineHasNonCommentChar = false;
|
|
for (; sc.More(); sc.Forward()) {
|
|
if (sc.atLineStart) {
|
|
lineHasNonCommentChar = false;
|
|
}
|
|
|
|
// Determine if the current state should terminate.
|
|
switch (sc.state) {
|
|
case SCE_STATA_OPERATOR:
|
|
sc.SetState(SCE_STATA_DEFAULT);
|
|
break;
|
|
case SCE_STATA_NUMBER:
|
|
// We accept almost anything because of hex. and number suffixes
|
|
if (!setWord.Contains(sc.ch)) {
|
|
sc.SetState(SCE_STATA_DEFAULT);
|
|
}
|
|
break;
|
|
case SCE_STATA_IDENTIFIER:
|
|
if (!setWord.Contains(sc.ch) || (sc.ch == '.')) {
|
|
char s[1000];
|
|
sc.GetCurrent(s, sizeof(s));
|
|
if (keywords.InList(s)) {
|
|
sc.ChangeState(SCE_STATA_WORD);
|
|
}
|
|
else if (types.InList(s)) {
|
|
sc.ChangeState(SCE_STATA_TYPE);
|
|
}
|
|
sc.SetState(SCE_STATA_DEFAULT);
|
|
}
|
|
break;
|
|
case SCE_STATA_COMMENTBLOCK:
|
|
if (sc.Match('*', '/')) {
|
|
sc.Forward();
|
|
sc.ForwardSetState(SCE_STATA_DEFAULT);
|
|
}
|
|
break;
|
|
case SCE_STATA_COMMENT:
|
|
case SCE_STATA_COMMENTLINE:
|
|
if (sc.atLineStart) {
|
|
sc.SetState(SCE_STATA_DEFAULT);
|
|
}
|
|
break;
|
|
case SCE_STATA_STRING:
|
|
if (sc.ch == '\\') {
|
|
// Per Stata documentation, the following characters are the only ones that can
|
|
// be escaped (not our typical set of quotes, etc.):
|
|
// https://www.stata.com/support/faqs/programming/backslashes-and-macros/
|
|
if (sc.chNext == '$' || sc.chNext == '`' || sc.chNext == '\\') {
|
|
sc.Forward();
|
|
}
|
|
}
|
|
else if (sc.ch == '\"') {
|
|
sc.ForwardSetState(SCE_STATA_DEFAULT);
|
|
}
|
|
break;
|
|
}
|
|
|
|
// Determine if a new state should be entered.
|
|
if (sc.state == SCE_STATA_DEFAULT) {
|
|
if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
|
|
lineHasNonCommentChar = true;
|
|
sc.SetState(SCE_STATA_NUMBER);
|
|
}
|
|
else if (setWordStart.Contains(sc.ch)) {
|
|
lineHasNonCommentChar = true;
|
|
sc.SetState(SCE_STATA_IDENTIFIER);
|
|
}
|
|
else if (sc.Match('*') && !lineHasNonCommentChar) {
|
|
sc.SetState(SCE_STATA_COMMENT);
|
|
}
|
|
else if (sc.Match('/', '*')) {
|
|
sc.SetState(SCE_STATA_COMMENTBLOCK);
|
|
sc.Forward(); // Eat the * so it isn't used for the end of the comment
|
|
}
|
|
else if (sc.Match('/', '/')) {
|
|
sc.SetState(SCE_STATA_COMMENTLINE);
|
|
}
|
|
else if (sc.ch == '\"') {
|
|
lineHasNonCommentChar = true;
|
|
sc.SetState(SCE_STATA_STRING);
|
|
}
|
|
else if (isoperator(sc.ch)) {
|
|
lineHasNonCommentChar = true;
|
|
sc.SetState(SCE_STATA_OPERATOR);
|
|
}
|
|
}
|
|
}
|
|
|
|
sc.Complete();
|
|
}
|
|
|
|
// Store both the current line's fold level and the next lines in the
|
|
// level store to make it easy to pick up with each increment
|
|
// and to make it possible to fiddle the current level for "} else {".
|
|
static void FoldStataDoc(Sci_PositionU startPos, Sci_Position length, int, WordList *[],
|
|
Accessor &styler) {
|
|
bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
|
|
bool foldAtElse = styler.GetPropertyInt("fold.at.else", 0) != 0;
|
|
Sci_PositionU endPos = startPos + length;
|
|
int visibleChars = 0;
|
|
Sci_Position lineCurrent = styler.GetLine(startPos);
|
|
int levelCurrent = SC_FOLDLEVELBASE;
|
|
if (lineCurrent > 0)
|
|
levelCurrent = styler.LevelAt(lineCurrent - 1) >> 16;
|
|
int levelMinCurrent = levelCurrent;
|
|
int levelNext = levelCurrent;
|
|
char chNext = styler[startPos];
|
|
int styleNext = styler.StyleAt(startPos);
|
|
for (Sci_PositionU i = startPos; i < endPos; i++) {
|
|
char ch = chNext;
|
|
chNext = styler.SafeGetCharAt(i + 1);
|
|
int style = styleNext;
|
|
styleNext = styler.StyleAt(i + 1);
|
|
bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
|
|
if (style == SCE_R_OPERATOR) {
|
|
if (ch == '{') {
|
|
// Measure the minimum before a '{' to allow
|
|
// folding on "} else {"
|
|
if (levelMinCurrent > levelNext) {
|
|
levelMinCurrent = levelNext;
|
|
}
|
|
levelNext++;
|
|
}
|
|
else if (ch == '}') {
|
|
levelNext--;
|
|
}
|
|
}
|
|
if (atEOL) {
|
|
int levelUse = levelCurrent;
|
|
if (foldAtElse) {
|
|
levelUse = levelMinCurrent;
|
|
}
|
|
int lev = levelUse | levelNext << 16;
|
|
if (visibleChars == 0 && foldCompact)
|
|
lev |= SC_FOLDLEVELWHITEFLAG;
|
|
if (levelUse < levelNext)
|
|
lev |= SC_FOLDLEVELHEADERFLAG;
|
|
if (lev != styler.LevelAt(lineCurrent)) {
|
|
styler.SetLevel(lineCurrent, lev);
|
|
}
|
|
lineCurrent++;
|
|
levelCurrent = levelNext;
|
|
levelMinCurrent = levelCurrent;
|
|
visibleChars = 0;
|
|
}
|
|
if (!isspacechar(ch))
|
|
visibleChars++;
|
|
}
|
|
}
|
|
|
|
|
|
static const char * const StataWordLists[] = {
|
|
"Language Keywords",
|
|
"Types",
|
|
0,
|
|
};
|
|
|
|
extern const LexerModule lmStata(SCLEX_STATA, ColouriseStataDoc, "stata", FoldStataDoc, StataWordLists);
|