mirror of
https://github.com/notepad-plus-plus/notepad-plus-plus.git
synced 2025-07-31 01:34:58 +02:00
Release 5.5.2 ( https://www.scintilla.org/scintilla552.zip ) Released 21 August 2024. Add SCI_SETCOPYSEPARATOR for separator between parts of a multiple selection when copied to the clipboard. Feature #1530. Add SCI_GETUNDOSEQUENCE to determine whether an undo sequence is active and its nesting depth. Add SCI_STYLESETSTRETCH to support condensed and expanded text styles. Add SCI_LINEINDENT and SCI_LINEDEDENT. Feature #1524. Fix bug on Cocoa where double-click stopped working when system had been running for a long time. On Cocoa implement more values of font weight and stretch. Release 5.4.0 ( https://www.scintilla.org/lexilla540.zip ) Released 21 August 2024. Inside Lexilla, LexerModule instances are now const. This will require changes to applications that modify Lexilla.cxx, which may be done to add custom lexers. Lexer added for TOML "toml". Bash: Handle backslash in heredoc delimiter. Issue #257. Progress: Fix lexing of nested comments. Pull request #258. Force lower-casing of case-insensitive keyword lists so keywords match in some lexers. Issue #259. Close #15564
487 lines
18 KiB
C++
487 lines
18 KiB
C++
/******************************************************************
|
|
* LexMarkdown.cxx
|
|
*
|
|
* A simple Markdown lexer for scintilla.
|
|
*
|
|
* Includes highlighting for some extra features from the
|
|
* Pandoc implementation; strikeout, using '#.' as a default
|
|
* ordered list item marker, and delimited code blocks.
|
|
*
|
|
* Limitations:
|
|
*
|
|
* Standard indented code blocks are not highlighted at all,
|
|
* as it would conflict with other indentation schemes. Use
|
|
* delimited code blocks for blanket highlighting of an
|
|
* entire code block. Embedded HTML is not highlighted either.
|
|
* Blanket HTML highlighting has issues, because some Markdown
|
|
* implementations allow Markdown markup inside of the HTML. Also,
|
|
* there is a following blank line issue that can't be ignored,
|
|
* explained in the next paragraph. Embedded HTML and code
|
|
* blocks would be better supported with language specific
|
|
* highlighting.
|
|
*
|
|
* The highlighting aims to accurately reflect correct syntax,
|
|
* but a few restrictions are relaxed. Delimited code blocks are
|
|
* highlighted, even if the line following the code block is not blank.
|
|
* Requiring a blank line after a block, breaks the highlighting
|
|
* in certain cases, because of the way Scintilla ends up calling
|
|
* the lexer.
|
|
*
|
|
* Written by Jon Strait - jstrait@moonloop.net
|
|
*
|
|
* The License.txt file describes the conditions under which this
|
|
* software may be distributed.
|
|
*
|
|
*****************************************************************/
|
|
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <stdio.h>
|
|
#include <stdarg.h>
|
|
#include <assert.h>
|
|
|
|
#include <string>
|
|
#include <string_view>
|
|
|
|
#include "ILexer.h"
|
|
#include "Scintilla.h"
|
|
#include "SciLexer.h"
|
|
|
|
#include "WordList.h"
|
|
#include "LexAccessor.h"
|
|
#include "Accessor.h"
|
|
#include "StyleContext.h"
|
|
#include "CharacterSet.h"
|
|
#include "LexerModule.h"
|
|
|
|
using namespace Lexilla;
|
|
|
|
namespace {
|
|
|
|
constexpr bool IsNewline(const int ch) {
|
|
// sc.GetRelative(i) returns '\0' if out of range
|
|
return (ch == '\n' || ch == '\r' || ch == '\0');
|
|
}
|
|
|
|
}
|
|
|
|
// True if can follow ch down to the end with possibly trailing whitespace
|
|
// Does not set the state SCE_MARKDOWN_LINE_BEGIN as to allow further processing
|
|
static bool FollowToLineEnd(const int ch, const int state, const Sci_PositionU endPos, StyleContext &sc) {
|
|
Sci_Position i = 0;
|
|
while (sc.GetRelative(++i) == ch)
|
|
;
|
|
// Skip over whitespace
|
|
while (IsASpaceOrTab(sc.GetRelative(i)) && sc.currentPos + i < endPos)
|
|
++i;
|
|
if (IsNewline(sc.GetRelative(i)) || sc.currentPos + i == endPos) {
|
|
sc.SetState(state);
|
|
sc.Forward(i);
|
|
return true;
|
|
}
|
|
else return false;
|
|
}
|
|
|
|
// Set the state on text section from current to length characters,
|
|
// then set the rest until the newline to default, except for any characters matching token
|
|
static void SetStateAndZoom(const int state, const Sci_Position length, const int token, StyleContext &sc) {
|
|
sc.SetState(state);
|
|
sc.Forward(length);
|
|
sc.SetState(SCE_MARKDOWN_DEFAULT);
|
|
sc.Forward();
|
|
bool started = false;
|
|
while (sc.More() && !IsNewline(sc.ch)) {
|
|
if (sc.ch == token && !started) {
|
|
sc.SetState(state);
|
|
started = true;
|
|
}
|
|
else if (sc.ch != token) {
|
|
sc.SetState(SCE_MARKDOWN_DEFAULT);
|
|
started = false;
|
|
}
|
|
sc.Forward();
|
|
}
|
|
sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
|
|
}
|
|
|
|
// Does the previous line have more than spaces and tabs?
|
|
static bool HasPrevLineContent(StyleContext &sc) {
|
|
Sci_Position i = 0;
|
|
// Go back to the previous newline
|
|
while ((--i + (Sci_Position)sc.currentPos) >= 0 && !IsNewline(sc.GetRelative(i)))
|
|
;
|
|
while ((--i + (Sci_Position)sc.currentPos) >= 0) {
|
|
const int ch = sc.GetRelative(i);
|
|
if (ch == '\n')
|
|
break;
|
|
if (!((ch == '\r' || IsASpaceOrTab(ch))))
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static bool AtTermStart(StyleContext &sc) {
|
|
return sc.currentPos == 0 || sc.chPrev == 0 || isspacechar(sc.chPrev);
|
|
}
|
|
|
|
static bool IsCompleteStyleRegion(StyleContext &sc, const char *token) {
|
|
bool found = false;
|
|
const size_t start = strlen(token);
|
|
Sci_Position i = static_cast<Sci_Position>(start);
|
|
while (!IsNewline(sc.GetRelative(i))) {
|
|
// make sure an empty pair of single-char tokens doesn't match
|
|
// with a longer token: {*}{*} != {**}
|
|
if (sc.GetRelative(i) == *token && sc.GetRelative(i - 1) != *token) {
|
|
found = start > 1U ? sc.GetRelative(i + 1) == token[1] : true;
|
|
break;
|
|
}
|
|
i++;
|
|
}
|
|
return AtTermStart(sc) && found;
|
|
}
|
|
|
|
static bool IsValidHrule(const Sci_PositionU endPos, StyleContext &sc) {
|
|
int count = 1;
|
|
Sci_Position i = 0;
|
|
for (;;) {
|
|
++i;
|
|
int c = sc.GetRelative(i);
|
|
if (c == sc.ch)
|
|
++count;
|
|
// hit a terminating character
|
|
else if (!IsASpaceOrTab(c) || sc.currentPos + i == endPos) {
|
|
// Are we a valid HRULE
|
|
if ((IsNewline(c) || sc.currentPos + i == endPos) &&
|
|
count >= 3 && !HasPrevLineContent(sc)) {
|
|
sc.SetState(SCE_MARKDOWN_HRULE);
|
|
sc.Forward(i);
|
|
sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
|
|
return true;
|
|
}
|
|
else {
|
|
sc.SetState(SCE_MARKDOWN_DEFAULT);
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void ColorizeMarkdownDoc(Sci_PositionU startPos, Sci_Position length, int initStyle,
|
|
WordList **, Accessor &styler) {
|
|
Sci_PositionU endPos = startPos + length;
|
|
int precharCount = 0;
|
|
bool isLinkNameDetecting = false;
|
|
// Don't advance on a new loop iteration and retry at the same position.
|
|
// Useful in the corner case of having to start at the beginning file position
|
|
// in the default state.
|
|
bool freezeCursor = false;
|
|
|
|
// property lexer.markdown.header.eolfill
|
|
// Set to 1 to highlight all ATX header text.
|
|
bool headerEOLFill = styler.GetPropertyInt("lexer.markdown.header.eolfill", 0) == 1;
|
|
|
|
StyleContext sc(startPos, static_cast<Sci_PositionU>(length), initStyle, styler);
|
|
|
|
while (sc.More()) {
|
|
// Skip past escaped characters
|
|
if (sc.ch == '\\') {
|
|
sc.Forward();
|
|
continue;
|
|
}
|
|
|
|
// A blockquotes resets the line semantics
|
|
if (sc.state == SCE_MARKDOWN_BLOCKQUOTE)
|
|
sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
|
|
|
|
// Conditional state-based actions
|
|
if (sc.state == SCE_MARKDOWN_CODE2) {
|
|
if (sc.Match("``")) {
|
|
const int closingSpan = (sc.GetRelative(2) == '`') ? 3 : 2;
|
|
sc.Forward(closingSpan);
|
|
sc.SetState(SCE_MARKDOWN_DEFAULT);
|
|
}
|
|
}
|
|
else if (sc.state == SCE_MARKDOWN_CODE) {
|
|
if (sc.ch == '`' && sc.chPrev != ' ')
|
|
sc.ForwardSetState(SCE_MARKDOWN_DEFAULT);
|
|
}
|
|
/* De-activated because it gets in the way of other valid indentation
|
|
* schemes, for example multiple paragraphs inside a list item.
|
|
// Code block
|
|
else if (sc.state == SCE_MARKDOWN_CODEBK) {
|
|
bool d = true;
|
|
if (IsNewline(sc.ch)) {
|
|
if (sc.chNext != '\t') {
|
|
for (int c = 1; c < 5; ++c) {
|
|
if (sc.GetRelative(c) != ' ')
|
|
d = false;
|
|
}
|
|
}
|
|
}
|
|
else if (sc.atLineStart) {
|
|
if (sc.ch != '\t' ) {
|
|
for (int i = 0; i < 4; ++i) {
|
|
if (sc.GetRelative(i) != ' ')
|
|
d = false;
|
|
}
|
|
}
|
|
}
|
|
if (!d)
|
|
sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
|
|
}
|
|
*/
|
|
// Strong
|
|
else if (sc.state == SCE_MARKDOWN_STRONG1) {
|
|
if ((sc.Match("**") && sc.chPrev != ' ') || IsNewline(sc.GetRelative(2))) {
|
|
sc.Forward(2);
|
|
sc.SetState(SCE_MARKDOWN_DEFAULT);
|
|
}
|
|
}
|
|
else if (sc.state == SCE_MARKDOWN_STRONG2) {
|
|
if ((sc.Match("__") && sc.chPrev != ' ') || IsNewline(sc.GetRelative(2))) {
|
|
sc.Forward(2);
|
|
sc.SetState(SCE_MARKDOWN_DEFAULT);
|
|
}
|
|
}
|
|
// Emphasis
|
|
else if (sc.state == SCE_MARKDOWN_EM1) {
|
|
if ((sc.ch == '*' && sc.chPrev != ' ') || IsNewline(sc.chNext))
|
|
sc.ForwardSetState(SCE_MARKDOWN_DEFAULT);
|
|
}
|
|
else if (sc.state == SCE_MARKDOWN_EM2) {
|
|
if ((sc.ch == '_' && sc.chPrev != ' ') || IsNewline(sc.chNext))
|
|
sc.ForwardSetState(SCE_MARKDOWN_DEFAULT);
|
|
}
|
|
else if (sc.state == SCE_MARKDOWN_CODEBK) {
|
|
if (sc.atLineStart && sc.Match("~~~")) {
|
|
Sci_Position i = 1;
|
|
while (!IsNewline(sc.GetRelative(i)) && sc.currentPos + i < endPos)
|
|
i++;
|
|
sc.Forward(i);
|
|
sc.SetState(SCE_MARKDOWN_DEFAULT);
|
|
}
|
|
}
|
|
else if (sc.state == SCE_MARKDOWN_STRIKEOUT) {
|
|
if ((sc.Match("~~") && sc.chPrev != ' ') || IsNewline(sc.GetRelative(2))) {
|
|
sc.Forward(2);
|
|
sc.SetState(SCE_MARKDOWN_DEFAULT);
|
|
}
|
|
}
|
|
else if (sc.state == SCE_MARKDOWN_LINE_BEGIN) {
|
|
// Header
|
|
if (sc.Match("######")) {
|
|
if (headerEOLFill)
|
|
sc.SetState(SCE_MARKDOWN_HEADER6);
|
|
else
|
|
SetStateAndZoom(SCE_MARKDOWN_HEADER6, 6, '#', sc);
|
|
}
|
|
else if (sc.Match("#####")) {
|
|
if (headerEOLFill)
|
|
sc.SetState(SCE_MARKDOWN_HEADER5);
|
|
else
|
|
SetStateAndZoom(SCE_MARKDOWN_HEADER5, 5, '#', sc);
|
|
}
|
|
else if (sc.Match("####")) {
|
|
if (headerEOLFill)
|
|
sc.SetState(SCE_MARKDOWN_HEADER4);
|
|
else
|
|
SetStateAndZoom(SCE_MARKDOWN_HEADER4, 4, '#', sc);
|
|
}
|
|
else if (sc.Match("###")) {
|
|
if (headerEOLFill)
|
|
sc.SetState(SCE_MARKDOWN_HEADER3);
|
|
else
|
|
SetStateAndZoom(SCE_MARKDOWN_HEADER3, 3, '#', sc);
|
|
}
|
|
else if (sc.Match("##")) {
|
|
if (headerEOLFill)
|
|
sc.SetState(SCE_MARKDOWN_HEADER2);
|
|
else
|
|
SetStateAndZoom(SCE_MARKDOWN_HEADER2, 2, '#', sc);
|
|
}
|
|
else if (sc.Match("#")) {
|
|
// Catch the special case of an unordered list
|
|
if (sc.chNext == '.' && IsASpaceOrTab(sc.GetRelative(2))) {
|
|
precharCount = 0;
|
|
sc.SetState(SCE_MARKDOWN_PRECHAR);
|
|
}
|
|
else if (headerEOLFill) {
|
|
sc.SetState(SCE_MARKDOWN_HEADER1);
|
|
}
|
|
else
|
|
SetStateAndZoom(SCE_MARKDOWN_HEADER1, 1, '#', sc);
|
|
}
|
|
// Code block
|
|
else if (sc.Match("~~~")) {
|
|
if (!HasPrevLineContent(sc))
|
|
sc.SetState(SCE_MARKDOWN_CODEBK);
|
|
else
|
|
sc.SetState(SCE_MARKDOWN_DEFAULT);
|
|
}
|
|
else if (sc.ch == '=') {
|
|
if (HasPrevLineContent(sc) && FollowToLineEnd('=', SCE_MARKDOWN_HEADER1, endPos, sc)) {
|
|
if (!headerEOLFill)
|
|
sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
|
|
}
|
|
else
|
|
sc.SetState(SCE_MARKDOWN_DEFAULT);
|
|
}
|
|
else if (sc.ch == '-') {
|
|
if (HasPrevLineContent(sc) && FollowToLineEnd('-', SCE_MARKDOWN_HEADER2, endPos, sc)) {
|
|
if (!headerEOLFill)
|
|
sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
|
|
}
|
|
else {
|
|
precharCount = 0;
|
|
sc.SetState(SCE_MARKDOWN_PRECHAR);
|
|
}
|
|
}
|
|
else if (IsNewline(sc.ch))
|
|
sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
|
|
else {
|
|
precharCount = 0;
|
|
sc.SetState(SCE_MARKDOWN_PRECHAR);
|
|
}
|
|
}
|
|
|
|
// The header lasts until the newline
|
|
else if (sc.state == SCE_MARKDOWN_HEADER1 || sc.state == SCE_MARKDOWN_HEADER2 ||
|
|
sc.state == SCE_MARKDOWN_HEADER3 || sc.state == SCE_MARKDOWN_HEADER4 ||
|
|
sc.state == SCE_MARKDOWN_HEADER5 || sc.state == SCE_MARKDOWN_HEADER6) {
|
|
if (headerEOLFill) {
|
|
if (sc.atLineStart) {
|
|
sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
|
|
freezeCursor = true;
|
|
}
|
|
}
|
|
else if (IsNewline(sc.ch))
|
|
sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
|
|
}
|
|
|
|
// New state only within the initial whitespace
|
|
if (sc.state == SCE_MARKDOWN_PRECHAR) {
|
|
// Blockquote
|
|
if (sc.ch == '>' && precharCount < 5)
|
|
sc.SetState(SCE_MARKDOWN_BLOCKQUOTE);
|
|
/*
|
|
// Begin of code block
|
|
else if (!HasPrevLineContent(sc) && (sc.chPrev == '\t' || precharCount >= 4))
|
|
sc.SetState(SCE_MARKDOWN_CODEBK);
|
|
*/
|
|
// HRule - Total of three or more hyphens, asterisks, or underscores
|
|
// on a line by themselves
|
|
else if ((sc.ch == '-' || sc.ch == '*' || sc.ch == '_') && IsValidHrule(endPos, sc))
|
|
;
|
|
// Unordered list
|
|
else if ((sc.ch == '-' || sc.ch == '*' || sc.ch == '+') && IsASpaceOrTab(sc.chNext)) {
|
|
sc.SetState(SCE_MARKDOWN_ULIST_ITEM);
|
|
sc.ForwardSetState(SCE_MARKDOWN_DEFAULT);
|
|
}
|
|
// Ordered list
|
|
else if (IsADigit(sc.ch)) {
|
|
int digitCount = 0;
|
|
while (IsADigit(sc.GetRelative(++digitCount)))
|
|
;
|
|
if (sc.GetRelative(digitCount) == '.' &&
|
|
IsASpaceOrTab(sc.GetRelative(digitCount + 1))) {
|
|
sc.SetState(SCE_MARKDOWN_OLIST_ITEM);
|
|
sc.Forward(digitCount + 1);
|
|
sc.SetState(SCE_MARKDOWN_DEFAULT);
|
|
} else {
|
|
// a textual number at the margin should be plain text
|
|
sc.SetState(SCE_MARKDOWN_DEFAULT);
|
|
}
|
|
}
|
|
// Alternate Ordered list
|
|
else if (sc.ch == '#' && sc.chNext == '.' && IsASpaceOrTab(sc.GetRelative(2))) {
|
|
sc.SetState(SCE_MARKDOWN_OLIST_ITEM);
|
|
sc.Forward(2);
|
|
sc.SetState(SCE_MARKDOWN_DEFAULT);
|
|
}
|
|
else if (sc.ch != ' ' || precharCount > 2)
|
|
sc.SetState(SCE_MARKDOWN_DEFAULT);
|
|
else
|
|
++precharCount;
|
|
}
|
|
|
|
// Any link
|
|
if (sc.state == SCE_MARKDOWN_LINK) {
|
|
if (sc.Match("](") && sc.GetRelative(-1) != '\\') {
|
|
sc.Forward(2);
|
|
isLinkNameDetecting = true;
|
|
}
|
|
else if (sc.Match("]:") && sc.GetRelative(-1) != '\\') {
|
|
sc.Forward(2);
|
|
sc.SetState(SCE_MARKDOWN_DEFAULT);
|
|
}
|
|
else if (!isLinkNameDetecting && sc.ch == ']' && sc.GetRelative(-1) != '\\') {
|
|
sc.Forward();
|
|
sc.SetState(SCE_MARKDOWN_DEFAULT);
|
|
}
|
|
else if (isLinkNameDetecting && sc.ch == ')' && sc.GetRelative(-1) != '\\') {
|
|
sc.Forward();
|
|
sc.SetState(SCE_MARKDOWN_DEFAULT);
|
|
isLinkNameDetecting = false;
|
|
}
|
|
}
|
|
|
|
// New state anywhere in doc
|
|
if (sc.state == SCE_MARKDOWN_DEFAULT) {
|
|
if (sc.atLineStart && sc.ch == '#') {
|
|
sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
|
|
freezeCursor = true;
|
|
}
|
|
// Links and Images
|
|
if (sc.Match("![")) {
|
|
sc.SetState(SCE_MARKDOWN_LINK);
|
|
sc.Forward(1);
|
|
}
|
|
else if (sc.ch == '[' && sc.GetRelative(-1) != '\\') {
|
|
sc.SetState(SCE_MARKDOWN_LINK);
|
|
}
|
|
// Code - also a special case for alternate inside spacing
|
|
else if (sc.Match("``") && sc.GetRelative(3) != ' ' && AtTermStart(sc)) {
|
|
const int openingSpan = (sc.GetRelative(2) == '`') ? 2 : 1;
|
|
sc.SetState(SCE_MARKDOWN_CODE2);
|
|
sc.Forward(openingSpan);
|
|
}
|
|
else if (sc.ch == '`' && sc.chNext != ' ' && IsCompleteStyleRegion(sc, "`")) {
|
|
sc.SetState(SCE_MARKDOWN_CODE);
|
|
}
|
|
// Strong
|
|
else if (sc.Match("**") && sc.GetRelative(2) != ' ' && IsCompleteStyleRegion(sc, "**")) {
|
|
sc.SetState(SCE_MARKDOWN_STRONG1);
|
|
sc.Forward();
|
|
}
|
|
else if (sc.Match("__") && sc.GetRelative(2) != ' ' && IsCompleteStyleRegion(sc, "__")) {
|
|
sc.SetState(SCE_MARKDOWN_STRONG2);
|
|
sc.Forward();
|
|
}
|
|
// Emphasis
|
|
else if (sc.ch == '*' && sc.chNext != ' ' && IsCompleteStyleRegion(sc, "*")) {
|
|
sc.SetState(SCE_MARKDOWN_EM1);
|
|
}
|
|
else if (sc.ch == '_' && sc.chNext != ' ' && IsCompleteStyleRegion(sc, "_")) {
|
|
sc.SetState(SCE_MARKDOWN_EM2);
|
|
}
|
|
// Strikeout
|
|
else if (sc.Match("~~") && !(sc.GetRelative(2) == '~' || sc.GetRelative(2) == ' ') &&
|
|
IsCompleteStyleRegion(sc, "~~")) {
|
|
sc.SetState(SCE_MARKDOWN_STRIKEOUT);
|
|
sc.Forward();
|
|
}
|
|
// Beginning of line
|
|
else if (IsNewline(sc.ch)) {
|
|
sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
|
|
}
|
|
}
|
|
// Advance if not holding back the cursor for this iteration.
|
|
if (!freezeCursor)
|
|
sc.Forward();
|
|
freezeCursor = false;
|
|
}
|
|
sc.Complete();
|
|
}
|
|
|
|
extern const LexerModule lmMarkdown(SCLEX_MARKDOWN, ColorizeMarkdownDoc, "markdown");
|