mirror of
				https://github.com/notepad-plus-plus/notepad-plus-plus.git
				synced 2025-10-31 11:34:05 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			382 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			382 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| // Scintilla source code edit control
 | |
| /** @file LexCOBOL.cxx
 | |
|  ** Lexer for COBOL
 | |
|  ** Based on LexPascal.cxx
 | |
|  ** Written by Laurent le Tynevez
 | |
|  ** Updated by Simon Steele <s.steele@pnotepad.org> September 2002
 | |
|  ** Updated by Mathias Rauen <scite@madshi.net> May 2003 (Delphi adjustments)
 | |
|  ** Updated by Rod Falck, Aug 2006 Converted to COBOL
 | |
|  **/
 | |
| 
 | |
| #include <stdlib.h>
 | |
| #include <string.h>
 | |
| #include <stdio.h>
 | |
| #include <stdarg.h>
 | |
| #include <assert.h>
 | |
| #include <ctype.h>
 | |
| 
 | |
| #include "ILexer.h"
 | |
| #include "Scintilla.h"
 | |
| #include "SciLexer.h"
 | |
| 
 | |
| #include "WordList.h"
 | |
| #include "LexAccessor.h"
 | |
| #include "Accessor.h"
 | |
| #include "StyleContext.h"
 | |
| #include "CharacterSet.h"
 | |
| #include "LexerModule.h"
 | |
| 
 | |
| #ifdef SCI_NAMESPACE
 | |
| using namespace Scintilla;
 | |
| #endif
 | |
| 
 | |
| #define IN_DIVISION 0x01
 | |
| #define IN_DECLARATIVES 0x02
 | |
| #define IN_SECTION 0x04
 | |
| #define IN_PARAGRAPH 0x08
 | |
| #define IN_FLAGS 0xF
 | |
| #define NOT_HEADER 0x10
 | |
| 
 | |
| inline bool isCOBOLoperator(char ch)
 | |
|     {
 | |
|     return isoperator(ch);
 | |
|     }
 | |
| 
 | |
| inline bool isCOBOLwordchar(char ch)
 | |
|     {
 | |
|     return IsASCII(ch) && (isalnum(ch) || ch == '-');
 | |
| 
 | |
|     }
 | |
| 
 | |
| inline bool isCOBOLwordstart(char ch)
 | |
|     {
 | |
|     return IsASCII(ch) && isalnum(ch);
 | |
|     }
 | |
| 
 | |
| static int CountBits(int nBits)
 | |
| 	{
 | |
| 	int count = 0;
 | |
| 	for (int i = 0; i < 32; ++i)
 | |
| 		{
 | |
| 		count += nBits & 1;
 | |
| 		nBits >>= 1;
 | |
| 		}
 | |
| 	return count;
 | |
| 	}
 | |
| 
 | |
| static void getRange(unsigned int start,
 | |
|         unsigned int end,
 | |
|         Accessor &styler,
 | |
|         char *s,
 | |
|         unsigned int len) {
 | |
|     unsigned int i = 0;
 | |
|     while ((i < end - start + 1) && (i < len-1)) {
 | |
|         s[i] = static_cast<char>(tolower(styler[start + i]));
 | |
|         i++;
 | |
|     }
 | |
|     s[i] = '\0';
 | |
| }
 | |
| 
 | |
| static void ColourTo(Accessor &styler, unsigned int end, unsigned int attr) {
 | |
|     styler.ColourTo(end, attr);
 | |
| }
 | |
| 
 | |
| 
 | |
| static int classifyWordCOBOL(unsigned int start, unsigned int end, /*WordList &keywords*/WordList *keywordlists[], Accessor &styler, int nContainment, bool *bAarea) {
 | |
|     int ret = 0;
 | |
| 
 | |
|     WordList& a_keywords = *keywordlists[0];
 | |
|     WordList& b_keywords = *keywordlists[1];
 | |
|     WordList& c_keywords = *keywordlists[2];
 | |
| 
 | |
|     char s[100];
 | |
|     s[0] = '\0';
 | |
|     s[1] = '\0';
 | |
|     getRange(start, end, styler, s, sizeof(s));
 | |
| 
 | |
|     char chAttr = SCE_C_IDENTIFIER;
 | |
|     if (isdigit(s[0]) || (s[0] == '.') || (s[0] == 'v')) {
 | |
|         chAttr = SCE_C_NUMBER;
 | |
| 		char *p = s + 1;
 | |
| 		while (*p) {
 | |
| 			if ((!isdigit(*p) && (*p) != 'v') && isCOBOLwordchar(*p)) {
 | |
| 				chAttr = SCE_C_IDENTIFIER;
 | |
| 			    break;
 | |
| 			}
 | |
| 			++p;
 | |
| 		}
 | |
|     }
 | |
|     else {
 | |
|         if (a_keywords.InList(s)) {
 | |
|             chAttr = SCE_C_WORD;
 | |
|         }
 | |
|         else if (b_keywords.InList(s)) {
 | |
|             chAttr = SCE_C_WORD2;
 | |
|         }
 | |
|         else if (c_keywords.InList(s)) {
 | |
|             chAttr = SCE_C_UUID;
 | |
|         }
 | |
|     }
 | |
|     if (*bAarea) {
 | |
|         if (strcmp(s, "division") == 0) {
 | |
|             ret = IN_DIVISION;
 | |
| 			// we've determined the containment, anything else is just ignored for those purposes
 | |
| 			*bAarea = false;
 | |
| 		} else if (strcmp(s, "declaratives") == 0) {
 | |
|             ret = IN_DIVISION | IN_DECLARATIVES;
 | |
| 			if (nContainment & IN_DECLARATIVES)
 | |
| 				ret |= NOT_HEADER | IN_SECTION;
 | |
| 			// we've determined the containment, anything else is just ignored for those purposes
 | |
| 			*bAarea = false;
 | |
| 		} else if (strcmp(s, "section") == 0) {
 | |
|             ret = (nContainment &~ IN_PARAGRAPH) | IN_SECTION;
 | |
| 			// we've determined the containment, anything else is just ignored for those purposes
 | |
| 			*bAarea = false;
 | |
| 		} else if (strcmp(s, "end") == 0 && (nContainment & IN_DECLARATIVES)) {
 | |
|             ret = IN_DIVISION | IN_DECLARATIVES | IN_SECTION | NOT_HEADER;
 | |
| 		} else {
 | |
| 			ret = nContainment | IN_PARAGRAPH;
 | |
|         }
 | |
|     }
 | |
|     ColourTo(styler, end, chAttr);
 | |
|     return ret;
 | |
| }
 | |
| 
 | |
| static void ColouriseCOBOLDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
 | |
|     Accessor &styler) {
 | |
| 
 | |
|     styler.StartAt(startPos);
 | |
| 
 | |
|     int state = initStyle;
 | |
|     if (state == SCE_C_CHARACTER)   // Does not leak onto next line
 | |
|         state = SCE_C_DEFAULT;
 | |
|     char chPrev = ' ';
 | |
|     char chNext = styler[startPos];
 | |
|     unsigned int lengthDoc = startPos + length;
 | |
| 
 | |
|     int nContainment;
 | |
| 
 | |
|     int currentLine = styler.GetLine(startPos);
 | |
|     if (currentLine > 0) {
 | |
|         styler.SetLineState(currentLine, styler.GetLineState(currentLine-1));
 | |
|         nContainment = styler.GetLineState(currentLine);
 | |
| 		nContainment &= ~NOT_HEADER;
 | |
|     } else {
 | |
|         styler.SetLineState(currentLine, 0);
 | |
|         nContainment = 0;
 | |
|     }
 | |
| 
 | |
|     styler.StartSegment(startPos);
 | |
|     bool bNewLine = true;
 | |
|     bool bAarea = !isspacechar(chNext);
 | |
| 	int column = 0;
 | |
|     for (unsigned int i = startPos; i < lengthDoc; i++) {
 | |
|         char ch = chNext;
 | |
| 
 | |
|         chNext = styler.SafeGetCharAt(i + 1);
 | |
| 
 | |
| 		++column;
 | |
| 
 | |
|         if (bNewLine) {
 | |
| 			column = 0;
 | |
|         }
 | |
| 		if (column <= 1 && !bAarea) {
 | |
| 			bAarea = !isspacechar(ch);
 | |
| 			}
 | |
|         bool bSetNewLine = false;
 | |
|         if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
 | |
|             // Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
 | |
|             // Avoid triggering two times on Dos/Win
 | |
|             // End of line
 | |
|             if (state == SCE_C_CHARACTER) {
 | |
|                 ColourTo(styler, i, state);
 | |
|                 state = SCE_C_DEFAULT;
 | |
|             }
 | |
|             styler.SetLineState(currentLine, nContainment);
 | |
|             currentLine++;
 | |
|             bSetNewLine = true;
 | |
| 			if (nContainment & NOT_HEADER)
 | |
| 				nContainment &= ~(NOT_HEADER | IN_DECLARATIVES | IN_SECTION);
 | |
|         }
 | |
| 
 | |
|         if (styler.IsLeadByte(ch)) {
 | |
|             chNext = styler.SafeGetCharAt(i + 2);
 | |
|             chPrev = ' ';
 | |
|             i += 1;
 | |
|             continue;
 | |
|         }
 | |
| 
 | |
|         if (state == SCE_C_DEFAULT) {
 | |
|             if (isCOBOLwordstart(ch) || (ch == '$' && IsASCII(chNext) && isalpha(chNext))) {
 | |
|                 ColourTo(styler, i-1, state);
 | |
|                 state = SCE_C_IDENTIFIER;
 | |
|             } else if (column == 6 && ch == '*') {
 | |
|             // Cobol comment line: asterisk in column 7.
 | |
|                 ColourTo(styler, i-1, state);
 | |
|                 state = SCE_C_COMMENTLINE;
 | |
|             } else if (ch == '*' && chNext == '>') {
 | |
|             // Cobol inline comment: asterisk, followed by greater than.
 | |
|                 ColourTo(styler, i-1, state);
 | |
|                 state = SCE_C_COMMENTLINE;
 | |
|             } else if (column == 0 && ch == '*' && chNext != '*') {
 | |
|                 ColourTo(styler, i-1, state);
 | |
|                 state = SCE_C_COMMENTLINE;
 | |
|             } else if (column == 0 && ch == '/' && chNext != '*') {
 | |
|                 ColourTo(styler, i-1, state);
 | |
|                 state = SCE_C_COMMENTLINE;
 | |
|             } else if (column == 0 && ch == '*' && chNext == '*') {
 | |
|                 ColourTo(styler, i-1, state);
 | |
|                 state = SCE_C_COMMENTDOC;
 | |
|             } else if (column == 0 && ch == '/' && chNext == '*') {
 | |
|                 ColourTo(styler, i-1, state);
 | |
|                 state = SCE_C_COMMENTDOC;
 | |
|             } else if (ch == '"') {
 | |
|                 ColourTo(styler, i-1, state);
 | |
|                 state = SCE_C_STRING;
 | |
|             } else if (ch == '\'') {
 | |
|                 ColourTo(styler, i-1, state);
 | |
|                 state = SCE_C_CHARACTER;
 | |
|             } else if (ch == '?' && column == 0) {
 | |
|                 ColourTo(styler, i-1, state);
 | |
|                 state = SCE_C_PREPROCESSOR;
 | |
|             } else if (isCOBOLoperator(ch)) {
 | |
|                 ColourTo(styler, i-1, state);
 | |
|                 ColourTo(styler, i, SCE_C_OPERATOR);
 | |
|             }
 | |
|         } else if (state == SCE_C_IDENTIFIER) {
 | |
|             if (!isCOBOLwordchar(ch)) {
 | |
|                 int lStateChange = classifyWordCOBOL(styler.GetStartSegment(), i - 1, keywordlists, styler, nContainment, &bAarea);
 | |
| 
 | |
|                 if(lStateChange != 0) {
 | |
|                     styler.SetLineState(currentLine, lStateChange);
 | |
|                     nContainment = lStateChange;
 | |
|                 }
 | |
| 
 | |
|                 state = SCE_C_DEFAULT;
 | |
|                 chNext = styler.SafeGetCharAt(i + 1);
 | |
|                 if (ch == '"') {
 | |
|                     state = SCE_C_STRING;
 | |
|                 } else if (ch == '\'') {
 | |
|                     state = SCE_C_CHARACTER;
 | |
|                 } else if (isCOBOLoperator(ch)) {
 | |
|                     ColourTo(styler, i, SCE_C_OPERATOR);
 | |
|                 }
 | |
|             }
 | |
|         } else {
 | |
|             if (state == SCE_C_PREPROCESSOR) {
 | |
|                 if ((ch == '\r' || ch == '\n') && !(chPrev == '\\' || chPrev == '\r')) {
 | |
|                     ColourTo(styler, i-1, state);
 | |
|                     state = SCE_C_DEFAULT;
 | |
|                 }
 | |
|             } else if (state == SCE_C_COMMENT) {
 | |
|                 if (ch == '\r' || ch == '\n') {
 | |
|                     ColourTo(styler, i, state);
 | |
|                     state = SCE_C_DEFAULT;
 | |
|                 }
 | |
|             } else if (state == SCE_C_COMMENTDOC) {
 | |
|                 if (ch == '\r' || ch == '\n') {
 | |
|                     if (((i > styler.GetStartSegment() + 2) || (
 | |
|                         (initStyle == SCE_C_COMMENTDOC) &&
 | |
|                         (styler.GetStartSegment() == static_cast<unsigned int>(startPos))))) {
 | |
|                             ColourTo(styler, i, state);
 | |
|                             state = SCE_C_DEFAULT;
 | |
|                     }
 | |
|                 }
 | |
|             } else if (state == SCE_C_COMMENTLINE) {
 | |
|                 if (ch == '\r' || ch == '\n') {
 | |
|                     ColourTo(styler, i-1, state);
 | |
|                     state = SCE_C_DEFAULT;
 | |
|                 }
 | |
|             } else if (state == SCE_C_STRING) {
 | |
|                 if (ch == '"') {
 | |
|                     ColourTo(styler, i, state);
 | |
|                     state = SCE_C_DEFAULT;
 | |
|                 }
 | |
|             } else if (state == SCE_C_CHARACTER) {
 | |
|                 if (ch == '\'') {
 | |
|                     ColourTo(styler, i, state);
 | |
|                     state = SCE_C_DEFAULT;
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|         chPrev = ch;
 | |
|         bNewLine = bSetNewLine;
 | |
| 		if (bNewLine)
 | |
| 			{
 | |
| 			bAarea = false;
 | |
| 			}
 | |
|     }
 | |
|     ColourTo(styler, lengthDoc - 1, state);
 | |
| }
 | |
| 
 | |
| static void FoldCOBOLDoc(unsigned int startPos, int length, int, WordList *[],
 | |
|                             Accessor &styler) {
 | |
|     bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
 | |
|     unsigned int endPos = startPos + length;
 | |
|     int visibleChars = 0;
 | |
|     int lineCurrent = styler.GetLine(startPos);
 | |
|     int levelPrev = lineCurrent > 0 ? styler.LevelAt(lineCurrent - 1) & SC_FOLDLEVELNUMBERMASK : 0xFFF;
 | |
|     char chNext = styler[startPos];
 | |
| 
 | |
|     bool bNewLine = true;
 | |
|     bool bAarea = !isspacechar(chNext);
 | |
| 	int column = 0;
 | |
| 	bool bComment = false;
 | |
|     for (unsigned int i = startPos; i < endPos; i++) {
 | |
|         char ch = chNext;
 | |
|         chNext = styler.SafeGetCharAt(i + 1);
 | |
| 		++column;
 | |
| 
 | |
|         if (bNewLine) {
 | |
| 			column = 0;
 | |
| 			bComment = (ch == '*' || ch == '/' || ch == '?');
 | |
|         }
 | |
| 		if (column <= 1 && !bAarea) {
 | |
| 			bAarea = !isspacechar(ch);
 | |
| 			}
 | |
|         bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
 | |
|         if (atEOL) {
 | |
| 			int nContainment = styler.GetLineState(lineCurrent);
 | |
|             int lev = CountBits(nContainment & IN_FLAGS) | SC_FOLDLEVELBASE;
 | |
| 			if (bAarea && !bComment)
 | |
| 				--lev;
 | |
|             if (visibleChars == 0 && foldCompact)
 | |
|                 lev |= SC_FOLDLEVELWHITEFLAG;
 | |
|             if ((bAarea) && (visibleChars > 0) && !(nContainment & NOT_HEADER) && !bComment)
 | |
|                 lev |= SC_FOLDLEVELHEADERFLAG;
 | |
|             if (lev != styler.LevelAt(lineCurrent)) {
 | |
|                 styler.SetLevel(lineCurrent, lev);
 | |
|             }
 | |
| 			if ((lev & SC_FOLDLEVELNUMBERMASK) <= (levelPrev & SC_FOLDLEVELNUMBERMASK)) {
 | |
| 				// this level is at the same level or less than the previous line
 | |
| 				// therefore these is nothing for the previous header to collapse, so remove the header
 | |
| 				styler.SetLevel(lineCurrent - 1, levelPrev & ~SC_FOLDLEVELHEADERFLAG);
 | |
| 			}
 | |
|             levelPrev = lev;
 | |
|             visibleChars = 0;
 | |
| 			bAarea = false;
 | |
|             bNewLine = true;
 | |
|             lineCurrent++;
 | |
|         } else {
 | |
|             bNewLine = false;
 | |
|         }
 | |
| 
 | |
| 
 | |
|         if (!isspacechar(ch))
 | |
|             visibleChars++;
 | |
|     }
 | |
| 
 | |
|     // Fill in the real level of the next line, keeping the current flags as they will be filled in later
 | |
|     int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
 | |
|     styler.SetLevel(lineCurrent, levelPrev | flagsNext);
 | |
| }
 | |
| 
 | |
| static const char * const COBOLWordListDesc[] = {
 | |
|     "A Keywords",
 | |
|     "B Keywords",
 | |
|     "Extended Keywords",
 | |
|     0
 | |
| };
 | |
| 
 | |
| LexerModule lmCOBOL(SCLEX_COBOL, ColouriseCOBOLDoc, "COBOL", FoldCOBOLDoc, COBOLWordListDesc);
 |