Christian Grasser ad79718fc8 Update to scintilla 5.5.2 & Lexilla 5.4.0
Release 5.5.2 ( https://www.scintilla.org/scintilla552.zip )

    Released 21 August 2024.
    Add SCI_SETCOPYSEPARATOR for separator between parts of a multiple selection when copied to the clipboard. Feature #1530.
    Add SCI_GETUNDOSEQUENCE to determine whether an undo sequence is active and its nesting depth.
    Add SCI_STYLESETSTRETCH to support condensed and expanded text styles.
    Add SCI_LINEINDENT and SCI_LINEDEDENT. Feature #1524.
    Fix bug on Cocoa where double-click stopped working when system had been running for a long time.
    On Cocoa implement more values of font weight and stretch.

Release 5.4.0 ( https://www.scintilla.org/lexilla540.zip )

    Released 21 August 2024.
    Inside Lexilla, LexerModule instances are now const. This will require changes to applications that modify Lexilla.cxx, which may be done to add custom lexers.
    Lexer added for TOML "toml".
    Bash: Handle backslash in heredoc delimiter. Issue #257.
    Progress: Fix lexing of nested comments. Pull request #258.
    Force lower-casing of case-insensitive keyword lists so keywords match in some lexers. Issue #259.

Close #15564
2024-08-23 02:59:58 +02:00

1120 lines
36 KiB
C++

/******************************************************************
* LexHaskell.cxx
*
* A haskell lexer for the scintilla code control.
* Some stuff "lended" from LexPython.cxx and LexCPP.cxx.
* External lexer stuff inspired from the caml external lexer.
* Folder copied from Python's.
*
* Written by Tobias Engvall - tumm at dtek dot chalmers dot se
*
* Several bug fixes by Krasimir Angelov - kr.angelov at gmail.com
*
* Improved by kudah <kudahkukarek@gmail.com>
*
* TODO:
* * A proper lexical folder to fold group declarations, comments, pragmas,
* #ifdefs, explicit layout, lists, tuples, quasi-quotes, splces, etc, etc,
* etc.
*
*****************************************************************/
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <stdarg.h>
#include <assert.h>
#include <ctype.h>
#include <string>
#include <string_view>
#include <vector>
#include <map>
#include <functional>
#include "ILexer.h"
#include "Scintilla.h"
#include "SciLexer.h"
#include "PropSetSimple.h"
#include "WordList.h"
#include "LexAccessor.h"
#include "Accessor.h"
#include "StyleContext.h"
#include "CharacterSet.h"
#include "CharacterCategory.h"
#include "LexerModule.h"
#include "OptionSet.h"
#include "DefaultLexer.h"
using namespace Scintilla;
using namespace Lexilla;
// See https://github.com/ghc/ghc/blob/master/compiler/parser/Lexer.x#L1682
// Note, letter modifiers are prohibited.
static int u_iswupper (int ch) {
CharacterCategory c = CategoriseCharacter(ch);
return c == ccLu || c == ccLt;
}
static int u_iswalpha (int ch) {
CharacterCategory c = CategoriseCharacter(ch);
return c == ccLl || c == ccLu || c == ccLt || c == ccLo;
}
static int u_iswalnum (int ch) {
CharacterCategory c = CategoriseCharacter(ch);
return c == ccLl || c == ccLu || c == ccLt || c == ccLo
|| c == ccNd || c == ccNo;
}
static int u_IsHaskellSymbol(int ch) {
CharacterCategory c = CategoriseCharacter(ch);
return c == ccPc || c == ccPd || c == ccPo
|| c == ccSm || c == ccSc || c == ccSk || c == ccSo;
}
static inline bool IsHaskellLetter(const int ch) {
if (IsASCII(ch)) {
return (ch >= 'a' && ch <= 'z')
|| (ch >= 'A' && ch <= 'Z');
} else {
return u_iswalpha(ch) != 0;
}
}
static inline bool IsHaskellAlphaNumeric(const int ch) {
if (IsASCII(ch)) {
return IsAlphaNumeric(ch);
} else {
return u_iswalnum(ch) != 0;
}
}
static inline bool IsHaskellUpperCase(const int ch) {
if (IsASCII(ch)) {
return ch >= 'A' && ch <= 'Z';
} else {
return u_iswupper(ch) != 0;
}
}
static inline bool IsAnHaskellOperatorChar(const int ch) {
if (IsASCII(ch)) {
return
( ch == '!' || ch == '#' || ch == '$' || ch == '%'
|| ch == '&' || ch == '*' || ch == '+' || ch == '-'
|| ch == '.' || ch == '/' || ch == ':' || ch == '<'
|| ch == '=' || ch == '>' || ch == '?' || ch == '@'
|| ch == '^' || ch == '|' || ch == '~' || ch == '\\');
} else {
return u_IsHaskellSymbol(ch) != 0;
}
}
static inline bool IsAHaskellWordStart(const int ch) {
return IsHaskellLetter(ch) || ch == '_';
}
static inline bool IsAHaskellWordChar(const int ch) {
return ( IsHaskellAlphaNumeric(ch)
|| ch == '_'
|| ch == '\'');
}
static inline bool IsCommentBlockStyle(int style) {
return (style >= SCE_HA_COMMENTBLOCK && style <= SCE_HA_COMMENTBLOCK3);
}
static inline bool IsCommentStyle(int style) {
return (style >= SCE_HA_COMMENTLINE && style <= SCE_HA_COMMENTBLOCK3)
|| ( style == SCE_HA_LITERATE_COMMENT
|| style == SCE_HA_LITERATE_CODEDELIM);
}
// styles which do not belong to Haskell, but to external tools
static inline bool IsExternalStyle(int style) {
return ( style == SCE_HA_PREPROCESSOR
|| style == SCE_HA_LITERATE_COMMENT
|| style == SCE_HA_LITERATE_CODEDELIM);
}
static inline int CommentBlockStyleFromNestLevel(const unsigned int nestLevel) {
return SCE_HA_COMMENTBLOCK + (nestLevel % 3);
}
// Mangled version of lexlib/Accessor.cxx IndentAmount.
// Modified to treat comment blocks as whitespace
// plus special case for commentline/preprocessor.
static int HaskellIndentAmount(Accessor &styler, const Sci_Position line) {
// Determines the indentation level of the current line
// Comment blocks are treated as whitespace
Sci_Position pos = styler.LineStart(line);
Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
char ch = styler[pos];
int style = styler.StyleAt(pos);
int indent = 0;
bool inPrevPrefix = line > 0;
Sci_Position posPrev = inPrevPrefix ? styler.LineStart(line-1) : 0;
while (( ch == ' ' || ch == '\t'
|| IsCommentBlockStyle(style)
|| style == SCE_HA_LITERATE_CODEDELIM)
&& (pos < eol_pos)) {
if (inPrevPrefix) {
char chPrev = styler[posPrev++];
if (chPrev != ' ' && chPrev != '\t') {
inPrevPrefix = false;
}
}
if (ch == '\t') {
indent = (indent / 8 + 1) * 8;
} else { // Space or comment block
indent++;
}
pos++;
ch = styler[pos];
style = styler.StyleAt(pos);
}
indent += SC_FOLDLEVELBASE;
// if completely empty line or the start of a comment or preprocessor...
if ( styler.LineStart(line) == styler.Length()
|| ch == ' '
|| ch == '\t'
|| ch == '\n'
|| ch == '\r'
|| IsCommentStyle(style)
|| style == SCE_HA_PREPROCESSOR)
return indent | SC_FOLDLEVELWHITEFLAG;
else
return indent;
}
struct OptionsHaskell {
bool magicHash;
bool allowQuotes;
bool implicitParams;
bool highlightSafe;
bool cpp;
bool stylingWithinPreprocessor;
bool fold;
bool foldComment;
bool foldCompact;
bool foldImports;
OptionsHaskell() {
magicHash = true; // Widespread use, enabled by default.
allowQuotes = true; // Widespread use, enabled by default.
implicitParams = false; // Fell out of favor, seldom used, disabled.
highlightSafe = true; // Moderately used, doesn't hurt to enable.
cpp = true; // Widespread use, enabled by default;
stylingWithinPreprocessor = false;
fold = false;
foldComment = false;
foldCompact = false;
foldImports = false;
}
};
static const char * const haskellWordListDesc[] = {
"Keywords",
"FFI",
"Reserved operators",
0
};
struct OptionSetHaskell : public OptionSet<OptionsHaskell> {
OptionSetHaskell() {
DefineProperty("lexer.haskell.allow.hash", &OptionsHaskell::magicHash,
"Set to 0 to disallow the '#' character at the end of identifiers and "
"literals with the haskell lexer "
"(GHC -XMagicHash extension)");
DefineProperty("lexer.haskell.allow.quotes", &OptionsHaskell::allowQuotes,
"Set to 0 to disable highlighting of Template Haskell name quotations "
"and promoted constructors "
"(GHC -XTemplateHaskell and -XDataKinds extensions)");
DefineProperty("lexer.haskell.allow.questionmark", &OptionsHaskell::implicitParams,
"Set to 1 to allow the '?' character at the start of identifiers "
"with the haskell lexer "
"(GHC & Hugs -XImplicitParams extension)");
DefineProperty("lexer.haskell.import.safe", &OptionsHaskell::highlightSafe,
"Set to 0 to disallow \"safe\" keyword in imports "
"(GHC -XSafe, -XTrustworthy, -XUnsafe extensions)");
DefineProperty("lexer.haskell.cpp", &OptionsHaskell::cpp,
"Set to 0 to disable C-preprocessor highlighting "
"(-XCPP extension)");
DefineProperty("styling.within.preprocessor", &OptionsHaskell::stylingWithinPreprocessor,
"For Haskell code, determines whether all preprocessor code is styled in the "
"preprocessor style (0, the default) or only from the initial # to the end "
"of the command word(1)."
);
DefineProperty("fold", &OptionsHaskell::fold);
DefineProperty("fold.comment", &OptionsHaskell::foldComment);
DefineProperty("fold.compact", &OptionsHaskell::foldCompact);
DefineProperty("fold.haskell.imports", &OptionsHaskell::foldImports,
"Set to 1 to enable folding of import declarations");
DefineWordListSets(haskellWordListDesc);
}
};
class LexerHaskell : public DefaultLexer {
bool literate;
Sci_Position firstImportLine;
int firstImportIndent;
WordList keywords;
WordList ffi;
WordList reserved_operators;
OptionsHaskell options;
OptionSetHaskell osHaskell;
enum HashCount {
oneHash
,twoHashes
,unlimitedHashes
};
enum KeywordMode {
HA_MODE_DEFAULT = 0
,HA_MODE_IMPORT1 = 1 // after "import", before "qualified" or "safe" or package name or module name.
,HA_MODE_IMPORT2 = 2 // after module name, before "as" or "hiding".
,HA_MODE_IMPORT3 = 3 // after "as", before "hiding"
,HA_MODE_MODULE = 4 // after "module", before module name.
,HA_MODE_FFI = 5 // after "foreign", before FFI keywords
,HA_MODE_TYPE = 6 // after "type" or "data", before "family"
};
enum LiterateMode {
LITERATE_BIRD = 0 // if '>' is the first character on the line,
// color '>' as a codedelim and the rest of
// the line as code.
// else if "\begin{code}" is the only word on the
// line except whitespace, switch to LITERATE_BLOCK
// otherwise color the line as a literate comment.
,LITERATE_BLOCK = 1 // if the string "\end{code}" is encountered at column
// 0 ignoring all later characters, color the line
// as a codedelim and switch to LITERATE_BIRD
// otherwise color the line as code.
};
struct HaskellLineInfo {
unsigned int nestLevel; // 22 bits ought to be enough for anybody
unsigned int nonexternalStyle; // 5 bits, widen if number of styles goes
// beyond 31.
bool pragma;
LiterateMode lmode;
KeywordMode mode;
HaskellLineInfo(int state) :
nestLevel (state >> 10)
, nonexternalStyle ((state >> 5) & 0x1F)
, pragma ((state >> 4) & 0x1)
, lmode (static_cast<LiterateMode>((state >> 3) & 0x1))
, mode (static_cast<KeywordMode>(state & 0x7))
{}
int ToLineState() {
return
(nestLevel << 10)
| (nonexternalStyle << 5)
| (pragma << 4)
| (lmode << 3)
| mode;
}
};
inline void skipMagicHash(StyleContext &sc, const HashCount hashes) const {
if (options.magicHash && sc.ch == '#') {
sc.Forward();
if (hashes == twoHashes && sc.ch == '#') {
sc.Forward();
} else if (hashes == unlimitedHashes) {
while (sc.ch == '#') {
sc.Forward();
}
}
}
}
bool LineContainsImport(const Sci_Position line, Accessor &styler) const {
if (options.foldImports) {
Sci_Position currentPos = styler.LineStart(line);
int style = styler.StyleAt(currentPos);
Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
while (currentPos < eol_pos) {
int ch = styler[currentPos];
style = styler.StyleAt(currentPos);
if (ch == ' ' || ch == '\t'
|| IsCommentBlockStyle(style)
|| style == SCE_HA_LITERATE_CODEDELIM) {
currentPos++;
} else {
break;
}
}
return (style == SCE_HA_KEYWORD
&& styler.Match(currentPos, "import"));
} else {
return false;
}
}
inline int IndentAmountWithOffset(Accessor &styler, const Sci_Position line) const {
const int indent = HaskellIndentAmount(styler, line);
const int indentLevel = indent & SC_FOLDLEVELNUMBERMASK;
return indentLevel <= ((firstImportIndent - 1) + SC_FOLDLEVELBASE)
? indent
: (indentLevel + firstImportIndent) | (indent & ~SC_FOLDLEVELNUMBERMASK);
}
inline int IndentLevelRemoveIndentOffset(const int indentLevel) const {
return indentLevel <= ((firstImportIndent - 1) + SC_FOLDLEVELBASE)
? indentLevel
: indentLevel - firstImportIndent;
}
public:
LexerHaskell(bool literate_)
: DefaultLexer(literate_ ? "literatehaskell" : "haskell", literate_ ? SCLEX_LITERATEHASKELL : SCLEX_HASKELL)
, literate(literate_)
, firstImportLine(-1)
, firstImportIndent(0)
{}
virtual ~LexerHaskell() {}
void SCI_METHOD Release() override {
delete this;
}
int SCI_METHOD Version() const override {
return lvRelease5;
}
const char * SCI_METHOD PropertyNames() override {
return osHaskell.PropertyNames();
}
int SCI_METHOD PropertyType(const char *name) override {
return osHaskell.PropertyType(name);
}
const char * SCI_METHOD DescribeProperty(const char *name) override {
return osHaskell.DescribeProperty(name);
}
Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
const char * SCI_METHOD PropertyGet(const char *key) override {
return osHaskell.PropertyGet(key);
}
const char * SCI_METHOD DescribeWordListSets() override {
return osHaskell.DescribeWordListSets();
}
Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
void * SCI_METHOD PrivateCall(int, void *) override {
return 0;
}
static ILexer5 *LexerFactoryHaskell() {
return new LexerHaskell(false);
}
static ILexer5 *LexerFactoryLiterateHaskell() {
return new LexerHaskell(true);
}
};
Sci_Position SCI_METHOD LexerHaskell::PropertySet(const char *key, const char *val) {
if (osHaskell.PropertySet(&options, key, val)) {
return 0;
}
return -1;
}
Sci_Position SCI_METHOD LexerHaskell::WordListSet(int n, const char *wl) {
WordList *wordListN = 0;
switch (n) {
case 0:
wordListN = &keywords;
break;
case 1:
wordListN = &ffi;
break;
case 2:
wordListN = &reserved_operators;
break;
}
Sci_Position firstModification = -1;
if (wordListN) {
WordList wlNew;
wlNew.Set(wl);
if (*wordListN != wlNew) {
wordListN->Set(wl);
firstModification = 0;
}
}
return firstModification;
}
void SCI_METHOD LexerHaskell::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle
,IDocument *pAccess) {
LexAccessor styler(pAccess);
Sci_Position lineCurrent = styler.GetLine(startPos);
HaskellLineInfo hs = HaskellLineInfo(lineCurrent ? styler.GetLineState(lineCurrent-1) : 0);
// Do not leak onto next line
if (initStyle == SCE_HA_STRINGEOL)
initStyle = SCE_HA_DEFAULT;
else if (initStyle == SCE_HA_LITERATE_CODEDELIM)
initStyle = hs.nonexternalStyle;
StyleContext sc(startPos, length, initStyle, styler);
int base = 10;
bool dot = false;
bool inDashes = false;
bool alreadyInTheMiddleOfOperator = false;
assert(!(IsCommentBlockStyle(initStyle) && hs.nestLevel == 0));
while (sc.More()) {
// Check for state end
if (!IsExternalStyle(sc.state)) {
hs.nonexternalStyle = sc.state;
}
// For lexer to work, states should unconditionally forward at least one
// character.
// If they don't, they should still check if they are at line end and
// forward if so.
// If a state forwards more than one character, it should check every time
// that it is not a line end and cease forwarding otherwise.
if (sc.atLineEnd) {
// Remember the line state for future incremental lexing
styler.SetLineState(lineCurrent, hs.ToLineState());
lineCurrent++;
}
// Handle line continuation generically.
if (sc.ch == '\\' && (sc.chNext == '\n' || sc.chNext == '\r')
&& ( sc.state == SCE_HA_STRING
|| sc.state == SCE_HA_PREPROCESSOR)) {
// Remember the line state for future incremental lexing
styler.SetLineState(lineCurrent, hs.ToLineState());
lineCurrent++;
sc.Forward();
if (sc.ch == '\r' && sc.chNext == '\n') {
sc.Forward();
}
sc.Forward();
continue;
}
if (sc.atLineStart) {
if (sc.state == SCE_HA_STRING || sc.state == SCE_HA_CHARACTER) {
// Prevent SCE_HA_STRINGEOL from leaking back to previous line
sc.SetState(sc.state);
}
if (literate && hs.lmode == LITERATE_BIRD) {
if (!IsExternalStyle(sc.state)) {
sc.SetState(SCE_HA_LITERATE_COMMENT);
}
}
}
// External
// Literate
if ( literate && hs.lmode == LITERATE_BIRD && sc.atLineStart
&& sc.ch == '>') {
sc.SetState(SCE_HA_LITERATE_CODEDELIM);
sc.ForwardSetState(hs.nonexternalStyle);
}
else if (literate && hs.lmode == LITERATE_BIRD && sc.atLineStart
&& ( sc.ch == ' ' || sc.ch == '\t'
|| sc.Match("\\begin{code}"))) {
sc.SetState(sc.state);
while ((sc.ch == ' ' || sc.ch == '\t') && sc.More())
sc.Forward();
if (sc.Match("\\begin{code}")) {
sc.Forward(static_cast<int>(strlen("\\begin{code}")));
bool correct = true;
while (!sc.atLineEnd && sc.More()) {
if (sc.ch != ' ' && sc.ch != '\t') {
correct = false;
}
sc.Forward();
}
if (correct) {
sc.ChangeState(SCE_HA_LITERATE_CODEDELIM); // color the line end
hs.lmode = LITERATE_BLOCK;
}
}
}
else if (literate && hs.lmode == LITERATE_BLOCK && sc.atLineStart
&& sc.Match("\\end{code}")) {
sc.SetState(SCE_HA_LITERATE_CODEDELIM);
sc.Forward(static_cast<int>(strlen("\\end{code}")));
while (!sc.atLineEnd && sc.More()) {
sc.Forward();
}
sc.SetState(SCE_HA_LITERATE_COMMENT);
hs.lmode = LITERATE_BIRD;
}
// Preprocessor
else if (sc.atLineStart && sc.ch == '#' && options.cpp
&& (!options.stylingWithinPreprocessor || sc.state == SCE_HA_DEFAULT)) {
sc.SetState(SCE_HA_PREPROCESSOR);
sc.Forward();
}
// Literate
else if (sc.state == SCE_HA_LITERATE_COMMENT) {
sc.Forward();
}
else if (sc.state == SCE_HA_LITERATE_CODEDELIM) {
sc.ForwardSetState(hs.nonexternalStyle);
}
// Preprocessor
else if (sc.state == SCE_HA_PREPROCESSOR) {
if (sc.atLineEnd) {
sc.SetState(options.stylingWithinPreprocessor
? SCE_HA_DEFAULT
: hs.nonexternalStyle);
sc.Forward(); // prevent double counting a line
} else if (options.stylingWithinPreprocessor && !IsHaskellLetter(sc.ch)) {
sc.SetState(SCE_HA_DEFAULT);
} else {
sc.Forward();
}
}
// Haskell
// Operator
else if (sc.state == SCE_HA_OPERATOR) {
int style = SCE_HA_OPERATOR;
if ( sc.ch == ':'
&& !alreadyInTheMiddleOfOperator
// except "::"
&& !( sc.chNext == ':'
&& !IsAnHaskellOperatorChar(sc.GetRelative(2)))) {
style = SCE_HA_CAPITAL;
}
alreadyInTheMiddleOfOperator = false;
while (IsAnHaskellOperatorChar(sc.ch))
sc.Forward();
char s[100];
sc.GetCurrent(s, sizeof(s));
if (reserved_operators.InList(s))
style = SCE_HA_RESERVED_OPERATOR;
sc.ChangeState(style);
sc.SetState(SCE_HA_DEFAULT);
}
// String
else if (sc.state == SCE_HA_STRING) {
if (sc.atLineEnd) {
sc.ChangeState(SCE_HA_STRINGEOL);
sc.ForwardSetState(SCE_HA_DEFAULT);
} else if (sc.ch == '\"') {
sc.Forward();
skipMagicHash(sc, oneHash);
sc.SetState(SCE_HA_DEFAULT);
} else if (sc.ch == '\\') {
sc.Forward(2);
} else {
sc.Forward();
}
}
// Char
else if (sc.state == SCE_HA_CHARACTER) {
if (sc.atLineEnd) {
sc.ChangeState(SCE_HA_STRINGEOL);
sc.ForwardSetState(SCE_HA_DEFAULT);
} else if (sc.ch == '\'') {
sc.Forward();
skipMagicHash(sc, oneHash);
sc.SetState(SCE_HA_DEFAULT);
} else if (sc.ch == '\\') {
sc.Forward(2);
} else {
sc.Forward();
}
}
// Number
else if (sc.state == SCE_HA_NUMBER) {
if (sc.atLineEnd) {
sc.SetState(SCE_HA_DEFAULT);
sc.Forward(); // prevent double counting a line
} else if (IsADigit(sc.ch, base)) {
sc.Forward();
} else if (sc.ch=='.' && dot && IsADigit(sc.chNext, base)) {
sc.Forward(2);
dot = false;
} else if ((base == 10) &&
(sc.ch == 'e' || sc.ch == 'E') &&
(IsADigit(sc.chNext) || sc.chNext == '+' || sc.chNext == '-')) {
sc.Forward();
if (sc.ch == '+' || sc.ch == '-')
sc.Forward();
} else {
skipMagicHash(sc, twoHashes);
sc.SetState(SCE_HA_DEFAULT);
}
}
// Keyword or Identifier
else if (sc.state == SCE_HA_IDENTIFIER) {
int style = IsHaskellUpperCase(sc.ch) ? SCE_HA_CAPITAL : SCE_HA_IDENTIFIER;
assert(IsAHaskellWordStart(sc.ch));
sc.Forward();
while (sc.More()) {
if (IsAHaskellWordChar(sc.ch)) {
sc.Forward();
} else if (sc.ch == '.' && style == SCE_HA_CAPITAL) {
if (IsHaskellUpperCase(sc.chNext)) {
sc.Forward();
style = SCE_HA_CAPITAL;
} else if (IsAHaskellWordStart(sc.chNext)) {
sc.Forward();
style = SCE_HA_IDENTIFIER;
} else if (IsAnHaskellOperatorChar(sc.chNext)) {
sc.Forward();
style = sc.ch == ':' ? SCE_HA_CAPITAL : SCE_HA_OPERATOR;
while (IsAnHaskellOperatorChar(sc.ch))
sc.Forward();
break;
} else {
break;
}
} else {
break;
}
}
skipMagicHash(sc, unlimitedHashes);
char s[100];
sc.GetCurrent(s, sizeof(s));
KeywordMode new_mode = HA_MODE_DEFAULT;
if (keywords.InList(s)) {
style = SCE_HA_KEYWORD;
} else if (style == SCE_HA_CAPITAL) {
if (hs.mode == HA_MODE_IMPORT1 || hs.mode == HA_MODE_IMPORT3) {
style = SCE_HA_MODULE;
new_mode = HA_MODE_IMPORT2;
} else if (hs.mode == HA_MODE_MODULE) {
style = SCE_HA_MODULE;
}
} else if (hs.mode == HA_MODE_IMPORT1 &&
strcmp(s,"qualified") == 0) {
style = SCE_HA_KEYWORD;
new_mode = HA_MODE_IMPORT1;
} else if (options.highlightSafe &&
hs.mode == HA_MODE_IMPORT1 &&
strcmp(s,"safe") == 0) {
style = SCE_HA_KEYWORD;
new_mode = HA_MODE_IMPORT1;
} else if (hs.mode == HA_MODE_IMPORT2) {
if (strcmp(s,"as") == 0) {
style = SCE_HA_KEYWORD;
new_mode = HA_MODE_IMPORT3;
} else if (strcmp(s,"hiding") == 0) {
style = SCE_HA_KEYWORD;
}
} else if (hs.mode == HA_MODE_TYPE) {
if (strcmp(s,"family") == 0)
style = SCE_HA_KEYWORD;
}
if (hs.mode == HA_MODE_FFI) {
if (ffi.InList(s)) {
style = SCE_HA_KEYWORD;
new_mode = HA_MODE_FFI;
}
}
sc.ChangeState(style);
sc.SetState(SCE_HA_DEFAULT);
if (strcmp(s,"import") == 0 && hs.mode != HA_MODE_FFI)
new_mode = HA_MODE_IMPORT1;
else if (strcmp(s,"module") == 0)
new_mode = HA_MODE_MODULE;
else if (strcmp(s,"foreign") == 0)
new_mode = HA_MODE_FFI;
else if (strcmp(s,"type") == 0
|| strcmp(s,"data") == 0)
new_mode = HA_MODE_TYPE;
hs.mode = new_mode;
}
// Comments
// Oneliner
else if (sc.state == SCE_HA_COMMENTLINE) {
if (sc.atLineEnd) {
sc.SetState(hs.pragma ? SCE_HA_PRAGMA : SCE_HA_DEFAULT);
sc.Forward(); // prevent double counting a line
} else if (inDashes && sc.ch != '-' && !hs.pragma) {
inDashes = false;
if (IsAnHaskellOperatorChar(sc.ch)) {
alreadyInTheMiddleOfOperator = true;
sc.ChangeState(SCE_HA_OPERATOR);
}
} else {
sc.Forward();
}
}
// Nested
else if (IsCommentBlockStyle(sc.state)) {
if (sc.Match('{','-')) {
sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
sc.Forward(2);
hs.nestLevel++;
} else if (sc.Match('-','}')) {
sc.Forward(2);
assert(hs.nestLevel > 0);
if (hs.nestLevel > 0)
hs.nestLevel--;
sc.SetState(
hs.nestLevel == 0
? (hs.pragma ? SCE_HA_PRAGMA : SCE_HA_DEFAULT)
: CommentBlockStyleFromNestLevel(hs.nestLevel - 1));
} else {
sc.Forward();
}
}
// Pragma
else if (sc.state == SCE_HA_PRAGMA) {
if (sc.Match("#-}")) {
hs.pragma = false;
sc.Forward(3);
sc.SetState(SCE_HA_DEFAULT);
} else if (sc.Match('-','-')) {
sc.SetState(SCE_HA_COMMENTLINE);
sc.Forward(2);
inDashes = false;
} else if (sc.Match('{','-')) {
sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
sc.Forward(2);
hs.nestLevel = 1;
} else {
sc.Forward();
}
}
// New state?
else if (sc.state == SCE_HA_DEFAULT) {
// Digit
if (IsADigit(sc.ch)) {
hs.mode = HA_MODE_DEFAULT;
sc.SetState(SCE_HA_NUMBER);
if (sc.ch == '0' && (sc.chNext == 'X' || sc.chNext == 'x')) {
// Match anything starting with "0x" or "0X", too
sc.Forward(2);
base = 16;
dot = false;
} else if (sc.ch == '0' && (sc.chNext == 'O' || sc.chNext == 'o')) {
// Match anything starting with "0o" or "0O", too
sc.Forward(2);
base = 8;
dot = false;
} else {
sc.Forward();
base = 10;
dot = true;
}
}
// Pragma
else if (sc.Match("{-#")) {
hs.pragma = true;
sc.SetState(SCE_HA_PRAGMA);
sc.Forward(3);
}
// Comment line
else if (sc.Match('-','-')) {
sc.SetState(SCE_HA_COMMENTLINE);
sc.Forward(2);
inDashes = true;
}
// Comment block
else if (sc.Match('{','-')) {
sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
sc.Forward(2);
hs.nestLevel = 1;
}
// String
else if (sc.ch == '\"') {
sc.SetState(SCE_HA_STRING);
sc.Forward();
}
// Character or quoted name or promoted term
else if (sc.ch == '\'') {
hs.mode = HA_MODE_DEFAULT;
sc.SetState(SCE_HA_CHARACTER);
sc.Forward();
if (options.allowQuotes) {
// Quoted type ''T
if (sc.ch=='\'' && IsAHaskellWordStart(sc.chNext)) {
sc.Forward();
sc.ChangeState(SCE_HA_IDENTIFIER);
} else if (sc.chNext != '\'') {
// Quoted name 'n or promoted constructor 'N
if (IsAHaskellWordStart(sc.ch)) {
sc.ChangeState(SCE_HA_IDENTIFIER);
// Promoted constructor operator ':~>
} else if (sc.ch == ':') {
alreadyInTheMiddleOfOperator = false;
sc.ChangeState(SCE_HA_OPERATOR);
// Promoted list or tuple '[T]
} else if (sc.ch == '[' || sc.ch== '(') {
sc.ChangeState(SCE_HA_OPERATOR);
sc.ForwardSetState(SCE_HA_DEFAULT);
}
}
}
}
// Operator starting with '?' or an implicit parameter
else if (sc.ch == '?') {
hs.mode = HA_MODE_DEFAULT;
alreadyInTheMiddleOfOperator = false;
sc.SetState(SCE_HA_OPERATOR);
if ( options.implicitParams
&& IsAHaskellWordStart(sc.chNext)
&& !IsHaskellUpperCase(sc.chNext)) {
sc.Forward();
sc.ChangeState(SCE_HA_IDENTIFIER);
}
}
// Operator
else if (IsAnHaskellOperatorChar(sc.ch)) {
hs.mode = HA_MODE_DEFAULT;
sc.SetState(SCE_HA_OPERATOR);
}
// Braces and punctuation
else if (sc.ch == ',' || sc.ch == ';'
|| sc.ch == '(' || sc.ch == ')'
|| sc.ch == '[' || sc.ch == ']'
|| sc.ch == '{' || sc.ch == '}') {
sc.SetState(SCE_HA_OPERATOR);
sc.ForwardSetState(SCE_HA_DEFAULT);
}
// Keyword or Identifier
else if (IsAHaskellWordStart(sc.ch)) {
sc.SetState(SCE_HA_IDENTIFIER);
// Something we don't care about
} else {
sc.Forward();
}
}
// This branch should never be reached.
else {
assert(false);
sc.Forward();
}
}
sc.Complete();
}
void SCI_METHOD LexerHaskell::Fold(Sci_PositionU startPos, Sci_Position length, int // initStyle
,IDocument *pAccess) {
if (!options.fold)
return;
Accessor styler(pAccess, NULL);
Sci_Position lineCurrent = styler.GetLine(startPos);
if (lineCurrent <= firstImportLine) {
firstImportLine = -1; // readjust first import position
firstImportIndent = 0;
}
const Sci_Position maxPos = startPos + length;
const Sci_Position maxLines =
maxPos == styler.Length()
? styler.GetLine(maxPos)
: styler.GetLine(maxPos - 1); // Requested last line
const Sci_Position docLines = styler.GetLine(styler.Length()); // Available last line
// Backtrack to previous non-blank line so we can determine indent level
// for any white space lines
// and so we can fix any preceding fold level (which is why we go back
// at least one line in all cases)
bool importHere = LineContainsImport(lineCurrent, styler);
int indentCurrent = IndentAmountWithOffset(styler, lineCurrent);
while (lineCurrent > 0) {
lineCurrent--;
importHere = LineContainsImport(lineCurrent, styler);
indentCurrent = IndentAmountWithOffset(styler, lineCurrent);
if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG))
break;
}
int indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
if (importHere) {
indentCurrentLevel = IndentLevelRemoveIndentOffset(indentCurrentLevel);
if (firstImportLine == -1) {
firstImportLine = lineCurrent;
firstImportIndent = (1 + indentCurrentLevel) - SC_FOLDLEVELBASE;
}
if (firstImportLine != lineCurrent) {
indentCurrentLevel++;
}
}
indentCurrent = indentCurrentLevel | (indentCurrent & ~SC_FOLDLEVELNUMBERMASK);
// Process all characters to end of requested range
//that hangs over the end of the range. Cap processing in all cases
// to end of document.
while (lineCurrent <= docLines && lineCurrent <= maxLines) {
// Gather info
Sci_Position lineNext = lineCurrent + 1;
importHere = false;
int indentNext = indentCurrent;
if (lineNext <= docLines) {
// Information about next line is only available if not at end of document
importHere = LineContainsImport(lineNext, styler);
indentNext = IndentAmountWithOffset(styler, lineNext);
}
if (indentNext & SC_FOLDLEVELWHITEFLAG)
indentNext = SC_FOLDLEVELWHITEFLAG | indentCurrentLevel;
// Skip past any blank lines for next indent level info; we skip also
// comments (all comments, not just those starting in column 0)
// which effectively folds them into surrounding code rather
// than screwing up folding.
while (lineNext < docLines && (indentNext & SC_FOLDLEVELWHITEFLAG)) {
lineNext++;
importHere = LineContainsImport(lineNext, styler);
indentNext = IndentAmountWithOffset(styler, lineNext);
}
int indentNextLevel = indentNext & SC_FOLDLEVELNUMBERMASK;
if (importHere) {
indentNextLevel = IndentLevelRemoveIndentOffset(indentNextLevel);
if (firstImportLine == -1) {
firstImportLine = lineNext;
firstImportIndent = (1 + indentNextLevel) - SC_FOLDLEVELBASE;
}
if (firstImportLine != lineNext) {
indentNextLevel++;
}
}
indentNext = indentNextLevel | (indentNext & ~SC_FOLDLEVELNUMBERMASK);
const int levelBeforeComments = Maximum(indentCurrentLevel,indentNextLevel);
// Now set all the indent levels on the lines we skipped
// Do this from end to start. Once we encounter one line
// which is indented more than the line after the end of
// the comment-block, use the level of the block before
Sci_Position skipLine = lineNext;
int skipLevel = indentNextLevel;
while (--skipLine > lineCurrent) {
int skipLineIndent = IndentAmountWithOffset(styler, skipLine);
if (options.foldCompact) {
if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > indentNextLevel) {
skipLevel = levelBeforeComments;
}
int whiteFlag = skipLineIndent & SC_FOLDLEVELWHITEFLAG;
styler.SetLevel(skipLine, skipLevel | whiteFlag);
} else {
if ( (skipLineIndent & SC_FOLDLEVELNUMBERMASK) > indentNextLevel
&& !(skipLineIndent & SC_FOLDLEVELWHITEFLAG)) {
skipLevel = levelBeforeComments;
}
styler.SetLevel(skipLine, skipLevel);
}
}
int lev = indentCurrent;
if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG)) {
if ((indentCurrent & SC_FOLDLEVELNUMBERMASK) < (indentNext & SC_FOLDLEVELNUMBERMASK))
lev |= SC_FOLDLEVELHEADERFLAG;
}
// Set fold level for this line and move to next line
styler.SetLevel(lineCurrent, options.foldCompact ? lev : lev & ~SC_FOLDLEVELWHITEFLAG);
indentCurrent = indentNext;
indentCurrentLevel = indentNextLevel;
lineCurrent = lineNext;
}
// NOTE: Cannot set level of last line here because indentCurrent doesn't have
// header flag set; the loop above is crafted to take care of this case!
//styler.SetLevel(lineCurrent, indentCurrent);
}
extern const LexerModule lmHaskell(SCLEX_HASKELL, LexerHaskell::LexerFactoryHaskell, "haskell", haskellWordListDesc);
extern const LexerModule lmLiterateHaskell(SCLEX_LITERATEHASKELL, LexerHaskell::LexerFactoryLiterateHaskell, "literatehaskell", haskellWordListDesc);