Christian Grasser ad79718fc8 Update to scintilla 5.5.2 & Lexilla 5.4.0
Release 5.5.2 ( https://www.scintilla.org/scintilla552.zip )

    Released 21 August 2024.
    Add SCI_SETCOPYSEPARATOR for separator between parts of a multiple selection when copied to the clipboard. Feature #1530.
    Add SCI_GETUNDOSEQUENCE to determine whether an undo sequence is active and its nesting depth.
    Add SCI_STYLESETSTRETCH to support condensed and expanded text styles.
    Add SCI_LINEINDENT and SCI_LINEDEDENT. Feature #1524.
    Fix bug on Cocoa where double-click stopped working when system had been running for a long time.
    On Cocoa implement more values of font weight and stretch.

Release 5.4.0 ( https://www.scintilla.org/lexilla540.zip )

    Released 21 August 2024.
    Inside Lexilla, LexerModule instances are now const. This will require changes to applications that modify Lexilla.cxx, which may be done to add custom lexers.
    Lexer added for TOML "toml".
    Bash: Handle backslash in heredoc delimiter. Issue #257.
    Progress: Fix lexing of nested comments. Pull request #258.
    Force lower-casing of case-insensitive keyword lists so keywords match in some lexers. Issue #259.

Close #15564
2024-08-23 02:59:58 +02:00

475 lines
16 KiB
C++

// Scintilla source code edit control
/** @file LexKVIrc.cxx
** Lexer for KVIrc script.
**/
// Copyright 2013 by OmegaPhil <OmegaPhil+scintilla@gmail.com>, based in
// part from LexPython Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
// and LexCmake Copyright 2007 by Cristian Adam <cristian [dot] adam [at] gmx [dot] net>
// The License.txt file describes the conditions under which this software may be distributed.
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <stdarg.h>
#include <assert.h>
#include <ctype.h>
#include <string>
#include <string_view>
#include "ILexer.h"
#include "Scintilla.h"
#include "SciLexer.h"
#include "WordList.h"
#include "LexAccessor.h"
#include "Accessor.h"
#include "StyleContext.h"
#include "CharacterSet.h"
#include "LexerModule.h"
using namespace Lexilla;
/* KVIrc Script syntactic rules: http://www.kvirc.net/doc/doc_syntactic_rules.html */
/* Utility functions */
static inline bool IsAWordChar(int ch) {
/* Keyword list includes modules, i.e. words including '.', and
* alias namespaces include ':' */
return (ch < 0x80) && (isalnum(ch) || ch == '_' || ch == '.'
|| ch == ':');
}
static inline bool IsAWordStart(int ch) {
/* Functions (start with '$') are treated separately to keywords */
return (ch < 0x80) && (isalnum(ch) || ch == '_' );
}
/* Interface function called by Scintilla to request some text to be
syntax highlighted */
static void ColouriseKVIrcDoc(Sci_PositionU startPos, Sci_Position length,
int initStyle, WordList *keywordlists[],
Accessor &styler)
{
/* Fetching style context */
StyleContext sc(startPos, length, initStyle, styler);
/* Accessing keywords and function-marking keywords */
WordList &keywords = *keywordlists[0];
WordList &functionKeywords = *keywordlists[1];
/* Looping for all characters - only automatically moving forward
* when asked for (transitions leaving strings and keywords do this
* already) */
bool next = true;
for( ; sc.More(); next ? sc.Forward() : (void)0 )
{
/* Resetting next */
next = true;
/* Dealing with different states */
switch (sc.state)
{
case SCE_KVIRC_DEFAULT:
/* Detecting single-line comments
* Unfortunately KVIrc script allows raw '#<channel
* name>' to be used, and appending # to an array returns
* its length...
* Going for a compromise where single line comments not
* starting on a newline are allowed in all cases except
* when they are preceeded with an opening bracket or comma
* (this will probably be the most common style a valid
* string-less channel name will be used with), with the
* array length case included
*/
if (
(sc.ch == '#' && sc.atLineStart) ||
(sc.ch == '#' && (
sc.chPrev != '(' && sc.chPrev != ',' &&
sc.chPrev != ']')
)
)
{
sc.SetState(SCE_KVIRC_COMMENT);
break;
}
/* Detecting multi-line comments */
if (sc.Match('/', '*'))
{
sc.SetState(SCE_KVIRC_COMMENTBLOCK);
break;
}
/* Detecting strings */
if (sc.ch == '"')
{
sc.SetState(SCE_KVIRC_STRING);
break;
}
/* Detecting functions */
if (sc.ch == '$')
{
sc.SetState(SCE_KVIRC_FUNCTION);
break;
}
/* Detecting variables */
if (sc.ch == '%')
{
sc.SetState(SCE_KVIRC_VARIABLE);
break;
}
/* Detecting numbers - isdigit is unsafe as it does not
* validate, use CharacterSet.h functions */
if (IsADigit(sc.ch))
{
sc.SetState(SCE_KVIRC_NUMBER);
break;
}
/* Detecting words */
if (IsAWordStart(sc.ch) && IsAWordChar(sc.chNext))
{
sc.SetState(SCE_KVIRC_WORD);
sc.Forward();
break;
}
/* Detecting operators */
if (isoperator(sc.ch))
{
sc.SetState(SCE_KVIRC_OPERATOR);
break;
}
break;
case SCE_KVIRC_COMMENT:
/* Breaking out of single line comment when a newline
* is introduced */
if (sc.ch == '\r' || sc.ch == '\n')
{
sc.SetState(SCE_KVIRC_DEFAULT);
break;
}
break;
case SCE_KVIRC_COMMENTBLOCK:
/* Detecting end of multi-line comment */
if (sc.Match('*', '/'))
{
// Moving the current position forward two characters
// so that '*/' is included in the comment
sc.Forward(2);
sc.SetState(SCE_KVIRC_DEFAULT);
/* Comment has been exited and the current position
* moved forward, yet the new current character
* has yet to be defined - loop without moving
* forward again */
next = false;
break;
}
break;
case SCE_KVIRC_STRING:
/* Detecting end of string - closing speechmarks */
if (sc.ch == '"')
{
/* Allowing escaped speechmarks to pass */
if (sc.chPrev == '\\')
break;
/* Moving the current position forward to capture the
* terminating speechmarks, and ending string */
sc.ForwardSetState(SCE_KVIRC_DEFAULT);
/* String has been exited and the current position
* moved forward, yet the new current character
* has yet to be defined - loop without moving
* forward again */
next = false;
break;
}
/* Functions and variables are now highlighted in strings
* Detecting functions */
if (sc.ch == '$')
{
/* Allowing escaped functions to pass */
if (sc.chPrev == '\\')
break;
sc.SetState(SCE_KVIRC_STRING_FUNCTION);
break;
}
/* Detecting variables */
if (sc.ch == '%')
{
/* Allowing escaped variables to pass */
if (sc.chPrev == '\\')
break;
sc.SetState(SCE_KVIRC_STRING_VARIABLE);
break;
}
/* Breaking out of a string when a newline is introduced */
if (sc.ch == '\r' || sc.ch == '\n')
{
/* Allowing escaped newlines */
if (sc.chPrev == '\\')
break;
sc.SetState(SCE_KVIRC_DEFAULT);
break;
}
break;
case SCE_KVIRC_FUNCTION:
case SCE_KVIRC_VARIABLE:
/* Detecting the end of a function/variable (word) */
if (!IsAWordChar(sc.ch))
{
sc.SetState(SCE_KVIRC_DEFAULT);
/* Word has been exited yet the current character
* has yet to be defined - loop without moving
* forward again */
next = false;
break;
}
break;
case SCE_KVIRC_STRING_FUNCTION:
case SCE_KVIRC_STRING_VARIABLE:
/* A function or variable in a string
* Detecting the end of a function/variable (word) */
if (!IsAWordChar(sc.ch))
{
sc.SetState(SCE_KVIRC_STRING);
/* Word has been exited yet the current character
* has yet to be defined - loop without moving
* forward again */
next = false;
break;
}
break;
case SCE_KVIRC_NUMBER:
/* Detecting the end of a number */
if (!IsADigit(sc.ch))
{
sc.SetState(SCE_KVIRC_DEFAULT);
/* Number has been exited yet the current character
* has yet to be defined - loop without moving
* forward */
next = false;
break;
}
break;
case SCE_KVIRC_OPERATOR:
/* Because '%' is an operator but is also the marker for
* a variable, I need to always treat operators as single
* character strings and therefore redo their detection
* after every character */
sc.SetState(SCE_KVIRC_DEFAULT);
/* Operator has been exited yet the current character
* has yet to be defined - loop without moving
* forward */
next = false;
break;
case SCE_KVIRC_WORD:
/* Detecting the end of a word */
if (!IsAWordChar(sc.ch))
{
/* Checking if the word was actually a keyword -
* fetching the current word, NULL-terminated like
* the keyword list */
char s[100];
Sci_Position wordLen = sc.currentPos - styler.GetStartSegment();
if (wordLen > 99)
wordLen = 99; /* Include '\0' in buffer */
Sci_Position i;
for( i = 0; i < wordLen; ++i )
{
s[i] = styler.SafeGetCharAt( styler.GetStartSegment() + i );
}
s[wordLen] = '\0';
/* Actually detecting keywords and fixing the state */
if (keywords.InList(s))
{
/* The SetState call actually commits the
* previous keyword state */
sc.ChangeState(SCE_KVIRC_KEYWORD);
}
else if (functionKeywords.InList(s))
{
// Detecting function keywords and fixing the state
sc.ChangeState(SCE_KVIRC_FUNCTION_KEYWORD);
}
/* Transitioning to default and committing the previous
* word state */
sc.SetState(SCE_KVIRC_DEFAULT);
/* Word has been exited yet the current character
* has yet to be defined - loop without moving
* forward again */
next = false;
break;
}
break;
}
}
/* Indicating processing is complete */
sc.Complete();
}
static void FoldKVIrcDoc(Sci_PositionU startPos, Sci_Position length, int /*initStyle - unused*/,
WordList *[], Accessor &styler)
{
/* Based on CMake's folder */
/* Exiting if folding isnt enabled */
if ( styler.GetPropertyInt("fold") == 0 )
return;
/* Obtaining current line number*/
Sci_Position currentLine = styler.GetLine(startPos);
/* Obtaining starting character - indentation is done on a line basis,
* not character */
Sci_PositionU safeStartPos = styler.LineStart( currentLine );
/* Initialising current level - this is defined as indentation level
* in the low 12 bits, with flag bits in the upper four bits.
* It looks like two indentation states are maintained in the returned
* 32bit value - 'nextLevel' in the most-significant bits, 'currentLevel'
* in the least-significant bits. Since the next level is the most
* up to date, this must refer to the current state of indentation.
* So the code bitshifts the old current level out of existence to
* get at the actual current state of indentation
* Based on the LexerCPP.cxx line 958 comment */
int currentLevel = SC_FOLDLEVELBASE;
if (currentLine > 0)
currentLevel = styler.LevelAt(currentLine - 1) >> 16;
int nextLevel = currentLevel;
// Looping for characters in range
for (Sci_PositionU i = safeStartPos; i < startPos + length; ++i)
{
/* Folding occurs after syntax highlighting, meaning Scintilla
* already knows where the comments are
* Fetching the current state */
int state = styler.StyleAt(i) & 31;
switch( styler.SafeGetCharAt(i) )
{
case '{':
/* Indenting only when the braces are not contained in
* a comment */
if (state != SCE_KVIRC_COMMENT &&
state != SCE_KVIRC_COMMENTBLOCK)
++nextLevel;
break;
case '}':
/* Outdenting only when the braces are not contained in
* a comment */
if (state != SCE_KVIRC_COMMENT &&
state != SCE_KVIRC_COMMENTBLOCK)
--nextLevel;
break;
case '\n':
case '\r':
/* Preparing indentation information to return - combining
* current and next level data */
int lev = currentLevel | nextLevel << 16;
/* If the next level increases the indent level, mark the
* current line as a fold point - current level data is
* in the least significant bits */
if (nextLevel > currentLevel )
lev |= SC_FOLDLEVELHEADERFLAG;
/* Updating indentation level if needed */
if (lev != styler.LevelAt(currentLine))
styler.SetLevel(currentLine, lev);
/* Updating variables */
++currentLine;
currentLevel = nextLevel;
/* Dealing with problematic Windows newlines -
* incrementing to avoid the extra newline breaking the
* fold point */
if (styler.SafeGetCharAt(i) == '\r' &&
styler.SafeGetCharAt(i + 1) == '\n')
++i;
break;
}
}
/* At this point the data has ended, so presumably the end of the line?
* Preparing indentation information to return - combining current
* and next level data */
int lev = currentLevel | nextLevel << 16;
/* If the next level increases the indent level, mark the current
* line as a fold point - current level data is in the least
* significant bits */
if (nextLevel > currentLevel )
lev |= SC_FOLDLEVELHEADERFLAG;
/* Updating indentation level if needed */
if (lev != styler.LevelAt(currentLine))
styler.SetLevel(currentLine, lev);
}
/* Registering wordlists */
static const char *const kvircWordListDesc[] = {
"primary",
"function_keywords",
0
};
/* Registering functions and wordlists */
extern const LexerModule lmKVIrc(SCLEX_KVIRC, ColouriseKVIrcDoc, "kvirc", FoldKVIrcDoc,
kvircWordListDesc);