audk/BaseTools/Source/Python/UPT/Library/CommentParsing.py

594 lines
22 KiB
Python

## @file
# This file is used to define comment parsing interface
#
# Copyright (c) 2011 - 2018, Intel Corporation. All rights reserved.<BR>
#
# SPDX-License-Identifier: BSD-2-Clause-Patent
#
'''
CommentParsing
'''
##
# Import Modules
#
import re
from Library.StringUtils import GetSplitValueList
from Library.StringUtils import CleanString2
from Library.DataType import HEADER_COMMENT_NOT_STARTED
from Library.DataType import TAB_COMMENT_SPLIT
from Library.DataType import HEADER_COMMENT_LICENSE
from Library.DataType import HEADER_COMMENT_ABSTRACT
from Library.DataType import HEADER_COMMENT_COPYRIGHT
from Library.DataType import HEADER_COMMENT_DESCRIPTION
from Library.DataType import TAB_SPACE_SPLIT
from Library.DataType import TAB_COMMA_SPLIT
from Library.DataType import SUP_MODULE_LIST
from Library.DataType import TAB_VALUE_SPLIT
from Library.DataType import TAB_PCD_VALIDRANGE
from Library.DataType import TAB_PCD_VALIDLIST
from Library.DataType import TAB_PCD_EXPRESSION
from Library.DataType import TAB_PCD_PROMPT
from Library.DataType import TAB_CAPHEX_START
from Library.DataType import TAB_HEX_START
from Library.DataType import PCD_ERR_CODE_MAX_SIZE
from Library.ExpressionValidate import IsValidRangeExpr
from Library.ExpressionValidate import IsValidListExpr
from Library.ExpressionValidate import IsValidLogicalExpr
from Object.POM.CommonObject import TextObject
from Object.POM.CommonObject import PcdErrorObject
import Logger.Log as Logger
from Logger.ToolError import FORMAT_INVALID
from Logger.ToolError import FORMAT_NOT_SUPPORTED
from Logger import StringTable as ST
## ParseHeaderCommentSection
#
# Parse Header comment section lines, extract Abstract, Description, Copyright
# , License lines
#
# @param CommentList: List of (Comment, LineNumber)
# @param FileName: FileName of the comment
#
def ParseHeaderCommentSection(CommentList, FileName = None, IsBinaryHeader = False):
Abstract = ''
Description = ''
Copyright = ''
License = ''
EndOfLine = "\n"
if IsBinaryHeader:
STR_HEADER_COMMENT_START = "@BinaryHeader"
else:
STR_HEADER_COMMENT_START = "@file"
HeaderCommentStage = HEADER_COMMENT_NOT_STARTED
#
# first find the last copyright line
#
Last = 0
for Index in range(len(CommentList)-1, 0, -1):
Line = CommentList[Index][0]
if _IsCopyrightLine(Line):
Last = Index
break
for Item in CommentList:
Line = Item[0]
LineNo = Item[1]
if not Line.startswith(TAB_COMMENT_SPLIT) and Line:
Logger.Error("\nUPT", FORMAT_INVALID, ST.ERR_INVALID_COMMENT_FORMAT, FileName, Item[1])
Comment = CleanString2(Line)[1]
Comment = Comment.strip()
#
# if there are blank lines between License or Description, keep them as they would be
# indication of different block; or in the position that Abstract should be, also keep it
# as it indicates that no abstract
#
if not Comment and HeaderCommentStage not in [HEADER_COMMENT_LICENSE, \
HEADER_COMMENT_DESCRIPTION, HEADER_COMMENT_ABSTRACT]:
continue
if HeaderCommentStage == HEADER_COMMENT_NOT_STARTED:
if Comment.startswith(STR_HEADER_COMMENT_START):
HeaderCommentStage = HEADER_COMMENT_ABSTRACT
else:
License += Comment + EndOfLine
else:
if HeaderCommentStage == HEADER_COMMENT_ABSTRACT:
#
# in case there is no abstract and description
#
if not Comment:
HeaderCommentStage = HEADER_COMMENT_DESCRIPTION
elif _IsCopyrightLine(Comment):
Result, ErrMsg = _ValidateCopyright(Comment)
ValidateCopyright(Result, ST.WRN_INVALID_COPYRIGHT, FileName, LineNo, ErrMsg)
Copyright += Comment + EndOfLine
HeaderCommentStage = HEADER_COMMENT_COPYRIGHT
else:
Abstract += Comment + EndOfLine
HeaderCommentStage = HEADER_COMMENT_DESCRIPTION
elif HeaderCommentStage == HEADER_COMMENT_DESCRIPTION:
#
# in case there is no description
#
if _IsCopyrightLine(Comment):
Result, ErrMsg = _ValidateCopyright(Comment)
ValidateCopyright(Result, ST.WRN_INVALID_COPYRIGHT, FileName, LineNo, ErrMsg)
Copyright += Comment + EndOfLine
HeaderCommentStage = HEADER_COMMENT_COPYRIGHT
else:
Description += Comment + EndOfLine
elif HeaderCommentStage == HEADER_COMMENT_COPYRIGHT:
if _IsCopyrightLine(Comment):
Result, ErrMsg = _ValidateCopyright(Comment)
ValidateCopyright(Result, ST.WRN_INVALID_COPYRIGHT, FileName, LineNo, ErrMsg)
Copyright += Comment + EndOfLine
else:
#
# Contents after copyright line are license, those non-copyright lines in between
# copyright line will be discarded
#
if LineNo > Last:
if License:
License += EndOfLine
License += Comment + EndOfLine
HeaderCommentStage = HEADER_COMMENT_LICENSE
else:
if not Comment and not License:
continue
License += Comment + EndOfLine
return Abstract.strip(), Description.strip(), Copyright.strip(), License.strip()
## _IsCopyrightLine
# check whether current line is copyright line, the criteria is whether there is case insensitive keyword "Copyright"
# followed by zero or more white space characters followed by a "(" character
#
# @param LineContent: the line need to be checked
# @return: True if current line is copyright line, False else
#
def _IsCopyrightLine (LineContent):
LineContent = LineContent.upper()
Result = False
ReIsCopyrightRe = re.compile(r"""(^|\s)COPYRIGHT *\(""", re.DOTALL)
if ReIsCopyrightRe.search(LineContent):
Result = True
return Result
## ParseGenericComment
#
# @param GenericComment: Generic comment list, element of
# (CommentLine, LineNum)
# @param ContainerFile: Input value for filename of Dec file
#
def ParseGenericComment (GenericComment, ContainerFile=None, SkipTag=None):
if ContainerFile:
pass
HelpTxt = None
HelpStr = ''
for Item in GenericComment:
CommentLine = Item[0]
Comment = CleanString2(CommentLine)[1]
if SkipTag is not None and Comment.startswith(SkipTag):
Comment = Comment.replace(SkipTag, '', 1)
HelpStr += Comment + '\n'
if HelpStr:
HelpTxt = TextObject()
if HelpStr.endswith('\n') and not HelpStr.endswith('\n\n') and HelpStr != '\n':
HelpStr = HelpStr[:-1]
HelpTxt.SetString(HelpStr)
return HelpTxt
## ParsePcdErrorCode
#
# @param Value: original ErrorCode value
# @param ContainerFile: Input value for filename of Dec file
# @param LineNum: Line Num
#
def ParsePcdErrorCode (Value = None, ContainerFile = None, LineNum = None):
try:
if Value.strip().startswith((TAB_HEX_START, TAB_CAPHEX_START)):
Base = 16
else:
Base = 10
ErrorCode = int(Value, Base)
if ErrorCode > PCD_ERR_CODE_MAX_SIZE or ErrorCode < 0:
Logger.Error('Parser',
FORMAT_NOT_SUPPORTED,
"The format %s of ErrorCode is not valid, should be UNIT32 type or long type" % Value,
File = ContainerFile,
Line = LineNum)
ErrorCode = '0x%x' % ErrorCode
return ErrorCode
except ValueError as XStr:
if XStr:
pass
Logger.Error('Parser',
FORMAT_NOT_SUPPORTED,
"The format %s of ErrorCode is not valid, should be UNIT32 type or long type" % Value,
File = ContainerFile,
Line = LineNum)
## ParseDecPcdGenericComment
#
# @param GenericComment: Generic comment list, element of (CommentLine,
# LineNum)
# @param ContainerFile: Input value for filename of Dec file
#
def ParseDecPcdGenericComment (GenericComment, ContainerFile, TokenSpaceGuidCName, CName, MacroReplaceDict):
HelpStr = ''
PromptStr = ''
PcdErr = None
PcdErrList = []
ValidValueNum = 0
ValidRangeNum = 0
ExpressionNum = 0
for (CommentLine, LineNum) in GenericComment:
Comment = CleanString2(CommentLine)[1]
#
# To replace Macro
#
MACRO_PATTERN = '[\t\s]*\$\([A-Z][_A-Z0-9]*\)'
MatchedStrs = re.findall(MACRO_PATTERN, Comment)
for MatchedStr in MatchedStrs:
if MatchedStr:
Macro = MatchedStr.strip().lstrip('$(').rstrip(')').strip()
if Macro in MacroReplaceDict:
Comment = Comment.replace(MatchedStr, MacroReplaceDict[Macro])
if Comment.startswith(TAB_PCD_VALIDRANGE):
if ValidValueNum > 0 or ExpressionNum > 0:
Logger.Error('Parser',
FORMAT_NOT_SUPPORTED,
ST.WRN_MULTI_PCD_RANGES,
File = ContainerFile,
Line = LineNum)
else:
PcdErr = PcdErrorObject()
PcdErr.SetTokenSpaceGuidCName(TokenSpaceGuidCName)
PcdErr.SetCName(CName)
PcdErr.SetFileLine(Comment)
PcdErr.SetLineNum(LineNum)
ValidRangeNum += 1
ValidRange = Comment.replace(TAB_PCD_VALIDRANGE, "", 1).strip()
Valid, Cause = _CheckRangeExpression(ValidRange)
if Valid:
ValueList = ValidRange.split(TAB_VALUE_SPLIT)
if len(ValueList) > 1:
PcdErr.SetValidValueRange((TAB_VALUE_SPLIT.join(ValueList[1:])).strip())
PcdErr.SetErrorNumber(ParsePcdErrorCode(ValueList[0], ContainerFile, LineNum))
else:
PcdErr.SetValidValueRange(ValidRange)
PcdErrList.append(PcdErr)
else:
Logger.Error("Parser",
FORMAT_NOT_SUPPORTED,
Cause,
ContainerFile,
LineNum)
elif Comment.startswith(TAB_PCD_VALIDLIST):
if ValidRangeNum > 0 or ExpressionNum > 0:
Logger.Error('Parser',
FORMAT_NOT_SUPPORTED,
ST.WRN_MULTI_PCD_RANGES,
File = ContainerFile,
Line = LineNum)
elif ValidValueNum > 0:
Logger.Error('Parser',
FORMAT_NOT_SUPPORTED,
ST.WRN_MULTI_PCD_VALIDVALUE,
File = ContainerFile,
Line = LineNum)
else:
PcdErr = PcdErrorObject()
PcdErr.SetTokenSpaceGuidCName(TokenSpaceGuidCName)
PcdErr.SetCName(CName)
PcdErr.SetFileLine(Comment)
PcdErr.SetLineNum(LineNum)
ValidValueNum += 1
ValidValueExpr = Comment.replace(TAB_PCD_VALIDLIST, "", 1).strip()
Valid, Cause = _CheckListExpression(ValidValueExpr)
if Valid:
ValidValue = Comment.replace(TAB_PCD_VALIDLIST, "", 1).replace(TAB_COMMA_SPLIT, TAB_SPACE_SPLIT)
ValueList = ValidValue.split(TAB_VALUE_SPLIT)
if len(ValueList) > 1:
PcdErr.SetValidValue((TAB_VALUE_SPLIT.join(ValueList[1:])).strip())
PcdErr.SetErrorNumber(ParsePcdErrorCode(ValueList[0], ContainerFile, LineNum))
else:
PcdErr.SetValidValue(ValidValue)
PcdErrList.append(PcdErr)
else:
Logger.Error("Parser",
FORMAT_NOT_SUPPORTED,
Cause,
ContainerFile,
LineNum)
elif Comment.startswith(TAB_PCD_EXPRESSION):
if ValidRangeNum > 0 or ValidValueNum > 0:
Logger.Error('Parser',
FORMAT_NOT_SUPPORTED,
ST.WRN_MULTI_PCD_RANGES,
File = ContainerFile,
Line = LineNum)
else:
PcdErr = PcdErrorObject()
PcdErr.SetTokenSpaceGuidCName(TokenSpaceGuidCName)
PcdErr.SetCName(CName)
PcdErr.SetFileLine(Comment)
PcdErr.SetLineNum(LineNum)
ExpressionNum += 1
Expression = Comment.replace(TAB_PCD_EXPRESSION, "", 1).strip()
Valid, Cause = _CheckExpression(Expression)
if Valid:
ValueList = Expression.split(TAB_VALUE_SPLIT)
if len(ValueList) > 1:
PcdErr.SetExpression((TAB_VALUE_SPLIT.join(ValueList[1:])).strip())
PcdErr.SetErrorNumber(ParsePcdErrorCode(ValueList[0], ContainerFile, LineNum))
else:
PcdErr.SetExpression(Expression)
PcdErrList.append(PcdErr)
else:
Logger.Error("Parser",
FORMAT_NOT_SUPPORTED,
Cause,
ContainerFile,
LineNum)
elif Comment.startswith(TAB_PCD_PROMPT):
if PromptStr:
Logger.Error('Parser',
FORMAT_NOT_SUPPORTED,
ST.WRN_MULTI_PCD_PROMPT,
File = ContainerFile,
Line = LineNum)
PromptStr = Comment.replace(TAB_PCD_PROMPT, "", 1).strip()
else:
if Comment:
HelpStr += Comment + '\n'
#
# remove the last EOL if the comment is of format 'FOO\n'
#
if HelpStr.endswith('\n'):
if HelpStr != '\n' and not HelpStr.endswith('\n\n'):
HelpStr = HelpStr[:-1]
return HelpStr, PcdErrList, PromptStr
## ParseDecPcdTailComment
#
# @param TailCommentList: Tail comment list of Pcd, item of format (Comment, LineNum)
# @param ContainerFile: Input value for filename of Dec file
# @retVal SupModuleList: The supported module type list detected
# @retVal HelpStr: The generic help text string detected
#
def ParseDecPcdTailComment (TailCommentList, ContainerFile):
assert(len(TailCommentList) == 1)
TailComment = TailCommentList[0][0]
LineNum = TailCommentList[0][1]
Comment = TailComment.lstrip(" #")
ReFindFirstWordRe = re.compile(r"""^([^ #]*)""", re.DOTALL)
#
# get first word and compare with SUP_MODULE_LIST
#
MatchObject = ReFindFirstWordRe.match(Comment)
if not (MatchObject and MatchObject.group(1) in SUP_MODULE_LIST):
return None, Comment
#
# parse line, it must have supported module type specified
#
if Comment.find(TAB_COMMENT_SPLIT) == -1:
Comment += TAB_COMMENT_SPLIT
SupMode, HelpStr = GetSplitValueList(Comment, TAB_COMMENT_SPLIT, 1)
SupModuleList = []
for Mod in GetSplitValueList(SupMode, TAB_SPACE_SPLIT):
if not Mod:
continue
elif Mod not in SUP_MODULE_LIST:
Logger.Error("UPT",
FORMAT_INVALID,
ST.WRN_INVALID_MODULE_TYPE%Mod,
ContainerFile,
LineNum)
else:
SupModuleList.append(Mod)
return SupModuleList, HelpStr
## _CheckListExpression
#
# @param Expression: Pcd value list expression
#
def _CheckListExpression(Expression):
ListExpr = ''
if TAB_VALUE_SPLIT in Expression:
ListExpr = Expression[Expression.find(TAB_VALUE_SPLIT)+1:]
else:
ListExpr = Expression
return IsValidListExpr(ListExpr)
## _CheckExpression
#
# @param Expression: Pcd value expression
#
def _CheckExpression(Expression):
Expr = ''
if TAB_VALUE_SPLIT in Expression:
Expr = Expression[Expression.find(TAB_VALUE_SPLIT)+1:]
else:
Expr = Expression
return IsValidLogicalExpr(Expr, True)
## _CheckRangeExpression
#
# @param Expression: Pcd range expression
#
def _CheckRangeExpression(Expression):
RangeExpr = ''
if TAB_VALUE_SPLIT in Expression:
RangeExpr = Expression[Expression.find(TAB_VALUE_SPLIT)+1:]
else:
RangeExpr = Expression
return IsValidRangeExpr(RangeExpr)
## ValidateCopyright
#
#
#
def ValidateCopyright(Result, ErrType, FileName, LineNo, ErrMsg):
if not Result:
Logger.Warn("\nUPT", ErrType, FileName, LineNo, ErrMsg)
## _ValidateCopyright
#
# @param Line: Line that contains copyright information, # stripped
#
# @retval Result: True if line is conformed to Spec format, False else
# @retval ErrMsg: the detailed error description
#
def _ValidateCopyright(Line):
if Line:
pass
Result = True
ErrMsg = ''
return Result, ErrMsg
def GenerateTokenList (Comment):
#
# Tokenize Comment using '#' and ' ' as token separators
#
ReplacedComment = None
while Comment != ReplacedComment:
ReplacedComment = Comment
Comment = Comment.replace('##', '#').replace(' ', ' ').replace(' ', '#').strip('# ')
return Comment.split('#')
#
# Comment - Comment to parse
# TypeTokens - A dictionary of type token synonyms
# RemoveTokens - A list of tokens to remove from help text
# ParseVariable - True for parsing [Guids]. Otherwise False
#
def ParseComment (Comment, UsageTokens, TypeTokens, RemoveTokens, ParseVariable):
#
# Initialize return values
#
Usage = None
Type = None
String = None
Comment = Comment[0]
NumTokens = 2
if ParseVariable:
#
# Remove white space around first instance of ':' from Comment if 'Variable'
# is in front of ':' and Variable is the 1st or 2nd token in Comment.
#
List = Comment.split(':', 1)
if len(List) > 1:
SubList = GenerateTokenList (List[0].strip())
if len(SubList) in [1, 2] and SubList[-1] == 'Variable':
if List[1].strip().find('L"') == 0:
Comment = List[0].strip() + ':' + List[1].strip()
#
# Remove first instance of L"<VariableName> from Comment and put into String
# if and only if L"<VariableName>" is the 1st token, the 2nd token. Or
# L"<VariableName>" is the third token immediately following 'Variable:'.
#
End = -1
Start = Comment.find('Variable:L"')
if Start >= 0:
String = Comment[Start + 9:]
End = String[2:].find('"')
else:
Start = Comment.find('L"')
if Start >= 0:
String = Comment[Start:]
End = String[2:].find('"')
if End >= 0:
SubList = GenerateTokenList (Comment[:Start])
if len(SubList) < 2:
Comment = Comment[:Start] + String[End + 3:]
String = String[:End + 3]
Type = 'Variable'
NumTokens = 1
#
# Initialize HelpText to Comment.
# Content will be remove from HelpText as matching tokens are found
#
HelpText = Comment
#
# Tokenize Comment using '#' and ' ' as token separators
#
List = GenerateTokenList (Comment)
#
# Search first two tokens for Usage and Type and remove any matching tokens
# from HelpText
#
for Token in List[0:NumTokens]:
if Usage is None and Token in UsageTokens:
Usage = UsageTokens[Token]
HelpText = HelpText.replace(Token, '')
if Usage is not None or not ParseVariable:
for Token in List[0:NumTokens]:
if Type is None and Token in TypeTokens:
Type = TypeTokens[Token]
HelpText = HelpText.replace(Token, '')
if Usage is not None:
for Token in List[0:NumTokens]:
if Token in RemoveTokens:
HelpText = HelpText.replace(Token, '')
#
# If no Usage token is present and set Usage to UNDEFINED
#
if Usage is None:
Usage = 'UNDEFINED'
#
# If no Type token is present and set Type to UNDEFINED
#
if Type is None:
Type = 'UNDEFINED'
#
# If Type is not 'Variable:', then set String to None
#
if Type != 'Variable':
String = None
#
# Strip ' ' and '#' from the beginning of HelpText
# If HelpText is an empty string after all parsing is
# complete then set HelpText to None
#
HelpText = HelpText.lstrip('# ')
if HelpText == '':
HelpText = None
#
# Return parsing results
#
return Usage, Type, String, HelpText