notepad-plus-plus/lexilla/scripts/LexillaData.py

339 lines
14 KiB
Python

#!/usr/bin/env python3
# LexillaData.py - implemented 2013 by Neil Hodgson neilh@scintilla.org
# Released to the public domain.
# Requires FileGenerator from Scintilla so scintilla must be a peer directory of lexilla.
"""
Common code used by Lexilla and SciTE for source file regeneration.
"""
# The LexillaData object exposes information about Lexilla as properties:
# Version properties
# version
# versionDotted
# versionCommad
#
# Date last modified
# dateModified
# yearModified
# mdyModified
# dmyModified
# myModified
#
# Information about lexers and properties defined in lexers
# lexFiles
# sorted list of lexer file stems like LexAbaqus
# lexerModules
# sorted list of module names like lmAbaqus
# lexerProperties
# sorted list of lexer properties like lexer.bash.command.substitution
# propertyDocuments
# dictionary of property documentation { name: document string }
# like lexer.bash.special.parameter: Set shell (default is Bash) special parameters.
# sclexFromName
# dictionary of SCLEX_* IDs { name: SCLEX_ID } like ave: SCLEX_AVE
# fileFromSclex
# dictionary of file names { SCLEX_ID: file name } like SCLEX_AU3: LexAU3.cxx
# lexersXcode
# dictionary of project file UUIDs { file name: [build UUID, file UUID] }
# like LexTCL: [28BA733B24E34D9700272C2D,28BA72C924E34D9100272C2D]
# credits
# list of names of contributors like Atsuo Ishimoto
# This file can be run to see the data it provides.
# Requires Python 3.6 or later
import datetime, pathlib, sys, textwrap
neutralEncoding = "iso-8859-1" # Each byte value is valid in iso-8859-1
def ReadFileAsList(path):
"""Read all the lnes in the file and return as a list of strings without line ends.
"""
with path.open(encoding="utf-8") as f:
return [line.rstrip('\n') for line in f]
def FindModules(lexFile):
""" Return a list of modules found within a lexer implementation file. """
modules = []
partLine = ""
with lexFile.open(encoding=neutralEncoding) as f:
lineNum = 0
for line in f.readlines():
lineNum += 1
line = line.rstrip()
if partLine or line.startswith("LexerModule"):
if ")" in line:
line = partLine + line
original = line
line = line.replace("(", " ")
line = line.replace(")", " ")
line = line.replace(",", " ")
parts = line.split()
lexerName = parts[4]
if not (lexerName.startswith('"') and lexerName.endswith('"')):
print(f"{lexFile}:{lineNum}: Bad LexerModule statement:\n{original}")
sys.exit(1)
lexerName = lexerName.strip('"')
modules.append([parts[1], parts[2], lexerName])
partLine = ""
else:
partLine = partLine + line
return modules
def FindSectionInList(lines, markers):
"""Find a section defined by an initial start marker, an optional secondary
marker and an end marker.
The section is between the secondary/initial start and the end.
Report as a slice object so the section can be extracted or replaced.
Raises an exception if the markers can't be found.
Currently only used for Xcode project files.
"""
start = -1
end = -1
state = 0
for i, line in enumerate(lines):
if markers[0] in line:
if markers[1]:
state = 1
else:
start = i+1
state = 2
elif state == 1:
if markers[1] in line:
start = i+1
state = 2
elif state == 2:
if markers[2] in line:
end = i
state = 3
# Check that section was found
if start == -1:
raise ValueError("Could not find start marker(s) |" + markers[0] + "|" + markers[1] + "|")
if end == -1:
raise ValueError("Could not find end marker " + markers[2])
return slice(start, end)
def FindLexersInXcode(xCodeProject):
""" Return a dictionary { file name: [build UUID, file UUID] } of lexers in Xcode project. """
lines = ReadFileAsList(xCodeProject)
# PBXBuildFile section is a list of all buildable files in the project so extract the file
# basename and its build and file IDs
uidsOfBuild = {}
markersPBXBuildFile = ["Begin PBXBuildFile section", "", "End PBXBuildFile section"]
for buildLine in lines[FindSectionInList(lines, markersPBXBuildFile)]:
# Occurs for each file in the build. Find the UIDs used for the file.
#\t\t[0-9A-F]+ /* [a-zA-Z]+.cxx in sources */ = {isa = PBXBuildFile; fileRef = [0-9A-F]+ /* [a-zA-Z]+ */; };
pieces = buildLine.split()
uid1 = pieces[0]
filename = pieces[2].split(".")[0]
uid2 = pieces[12]
uidsOfBuild[filename] = [uid1, uid2]
# PBXGroup section contains the folders (Lexilla, Lexers, LexLib, ...) so is used to find the lexers
lexers = {}
markersLexers = ["/* Lexers */ =", "children", ");"]
for lexerLine in lines[FindSectionInList(lines, markersLexers)]:
#\t\t\t\t[0-9A-F]+ /* [a-zA-Z]+.cxx */,
uid, _, rest = lexerLine.partition("/* ")
uid = uid.strip()
lexer, _, _ = rest.partition(".")
lexers[lexer] = uidsOfBuild[lexer]
return lexers
# Properties that start with lexer. or fold. are automatically found but there are some
# older properties that don't follow this pattern so must be explicitly listed.
knownIrregularProperties = [
"fold",
"styling.within.preprocessor",
"tab.timmy.whinge.level",
"asp.default.language",
"html.tags.case.sensitive",
"ps.level",
"ps.tokenize",
"sql.backslash.escapes",
"nsis.uservars",
"nsis.ignorecase"
]
def FindProperties(lexFile):
""" Return a set of property names in a lexer implementation file. """
properties = set()
with open(lexFile, encoding=neutralEncoding) as f:
for s in f.readlines():
if ("GetProperty" in s or "DefineProperty" in s) and "\"" in s:
s = s.strip()
if not s.startswith("//"): # Drop comments
propertyName = s.split("\"")[1]
if propertyName.lower() == propertyName:
# Only allow lower case property names
if propertyName in knownIrregularProperties or \
propertyName.startswith("fold.") or \
propertyName.startswith("lexer."):
properties.add(propertyName)
return properties
def FindPropertyDocumentation(lexFile):
""" Return a dictionary { name: document string } of property documentation in a lexer. """
documents = {}
with lexFile.open(encoding=neutralEncoding) as f:
name = ""
for line in f.readlines():
line = line.strip()
if "// property " in line:
propertyName = line.split()[2]
if propertyName.lower() == propertyName:
# Only allow lower case property names
name = propertyName
documents[name] = ""
elif "DefineProperty" in line and "\"" in line:
propertyName = line.split("\"")[1]
if propertyName.lower() == propertyName:
# Only allow lower case property names
name = propertyName
documents[name] = ""
elif name:
if line.startswith("//"):
if documents[name]:
documents[name] += " "
documents[name] += line[2:].strip()
elif line.startswith("\""):
line = line[1:].strip()
if line.endswith(";"):
line = line[:-1].strip()
if line.endswith(")"):
line = line[:-1].strip()
if line.endswith("\""):
line = line[:-1]
# Fix escaped double quotes
line = line.replace("\\\"", "\"")
documents[name] += line
else:
name = ""
for name in list(documents.keys()):
if documents[name] == "":
del documents[name]
return documents
def FindCredits(historyFile):
""" Return a list of contributors in a history file. """
creditList = []
stage = 0
with historyFile.open(encoding="utf-8") as f:
for line in f.readlines():
line = line.strip()
if stage == 0 and line == "<table>":
stage = 1
elif stage == 1 and line == "</table>":
stage = 2
if stage == 1 and line.startswith("<td>"):
credit = line[4:-5]
if "<a" in line:
title, dummy, rest = credit.partition("<a href=")
urlplus, _bracket, end = rest.partition(">")
name = end.split("<")[0]
url = urlplus[1:-1]
credit = title.strip()
if credit:
credit += " "
credit += name + " " + url
creditList.append(credit)
return creditList
def ciKey(a):
""" Return a string lowered to be used when sorting. """
return str(a).lower()
def SortListInsensitive(l):
""" Sort a list of strings case insensitively. """
l.sort(key=ciKey)
class LexillaData:
""" Expose information about Lexilla as properties. """
def __init__(self, scintillaRoot):
# Discover version information
self.version = (scintillaRoot / "version.txt").read_text().strip()
self.versionDotted = self.version[0:-2] + '.' + self.version[-2] + '.' + \
self.version[-1]
self.versionCommad = self.versionDotted.replace(".", ", ") + ', 0'
with (scintillaRoot / "doc" / "Lexilla.html").open() as f:
self.dateModified = [d for d in f.readlines() if "Date.Modified" in d]\
[0].split('\"')[3]
# 20130602
# Lexilla.html
dtModified = datetime.datetime.strptime(self.dateModified, "%Y%m%d")
self.yearModified = self.dateModified[0:4]
monthModified = dtModified.strftime("%B")
dayModified = f"{dtModified.day}"
self.mdyModified = monthModified + " " + dayModified + " " + self.yearModified
# May 22 2013
# Lexilla.html, SciTE.html
self.dmyModified = dayModified + " " + monthModified + " " + self.yearModified
# 22 May 2013
# LexillaHistory.html -- only first should change
self.myModified = monthModified + " " + self.yearModified
# Find all the lexer source code files
lexFilePaths = list((scintillaRoot / "lexers").glob("Lex*.cxx"))
SortListInsensitive(lexFilePaths)
self.lexFiles = [f.stem for f in lexFilePaths]
self.lexerModules = []
lexerProperties = set()
self.propertyDocuments = {}
self.sclexFromName = {}
self.fileFromSclex = {}
for lexFile in lexFilePaths:
modules = FindModules(lexFile)
for module in modules:
self.sclexFromName[module[2]] = module[1]
self.fileFromSclex[module[1]] = lexFile
self.lexerModules.append(module[0])
for prop in FindProperties(lexFile):
lexerProperties.add(prop)
documents = FindPropertyDocumentation(lexFile)
for prop, doc in documents.items():
if prop not in self.propertyDocuments:
self.propertyDocuments[prop] = doc
SortListInsensitive(self.lexerModules)
self.lexerProperties = list(lexerProperties)
SortListInsensitive(self.lexerProperties)
self.lexersXcode = FindLexersInXcode(scintillaRoot /
"src/Lexilla/Lexilla.xcodeproj/project.pbxproj")
self.credits = FindCredits(scintillaRoot / "doc" / "LexillaHistory.html")
def printWrapped(text):
""" Print string wrapped with subsequent lines indented. """
print(textwrap.fill(text, subsequent_indent=" "))
if __name__=="__main__":
sci = LexillaData(pathlib.Path(__file__).resolve().parent.parent)
print(f"Version {sci.version} {sci.versionDotted} {sci.versionCommad}")
print(f"Date last modified {sci.dateModified} {sci.yearModified} {sci.mdyModified}"
f" {sci.dmyModified} {sci.myModified}")
printWrapped(str(len(sci.lexFiles)) + " lexer files: " + ", ".join(sci.lexFiles))
printWrapped(str(len(sci.lexerModules)) + " lexer modules: " + ", ".join(sci.lexerModules))
#~ printWrapped(str(len(sci.lexersXcode)) + " Xcode lexer references: " + ", ".join(
#~ [lex+":"+uids[0]+","+uids[1] for lex, uids in sci.lexersXcode.items()]))
print("Lexer name to ID:")
lexNames = sorted(sci.sclexFromName.keys())
for lexName in lexNames:
sclex = sci.sclexFromName[lexName]
fileName = sci.fileFromSclex[sclex].name
print(" " + lexName + " -> " + sclex + " in " + fileName)
printWrapped("Lexer properties: " + ", ".join(sci.lexerProperties))
print("Lexer property documentation:")
documentProperties = list(sci.propertyDocuments.keys())
SortListInsensitive(documentProperties)
for k in documentProperties:
print(" " + k)
print(textwrap.fill(sci.propertyDocuments[k], initial_indent=" ",
subsequent_indent=" "))
print("Credits:")
for c in sci.credits:
sys.stdout.buffer.write(b" " + c.encode("utf-8") + b"\n")