audk/BaseTools/Source/Python/Ecc/CodeFragmentCollector.py

599 lines
25 KiB
Python
Raw Normal View History

## @file
# preprocess source file
#
# Copyright (c) 2007 - 2018, Intel Corporation. All rights reserved.<BR>
#
# This program and the accompanying materials
# are licensed and made available under the terms and conditions of the BSD License
# which accompanies this distribution. The full text of the license may be found at
# http://opensource.org/licenses/bsd-license.php
#
# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
#
##
# Import Modules
#
from __future__ import print_function
from __future__ import absolute_import
import re
import Common.LongFilePathOs as os
import sys
if sys.version_info.major == 3:
import antlr4 as antlr
from Ecc.CParser4.CLexer import CLexer
from Ecc.CParser4.CParser import CParser
else:
import antlr3 as antlr
antlr.InputString = antlr.StringStream
from Ecc.CParser3.CLexer import CLexer
from Ecc.CParser3.CParser import CParser
from Ecc import FileProfile
from Ecc.CodeFragment import Comment
from Ecc.CodeFragment import PP_Directive
from Ecc.ParserWarning import Warning
##define T_CHAR_SPACE ' '
##define T_CHAR_NULL '\0'
##define T_CHAR_CR '\r'
##define T_CHAR_TAB '\t'
##define T_CHAR_LF '\n'
##define T_CHAR_SLASH '/'
##define T_CHAR_BACKSLASH '\\'
##define T_CHAR_DOUBLE_QUOTE '\"'
##define T_CHAR_SINGLE_QUOTE '\''
##define T_CHAR_STAR '*'
##define T_CHAR_HASH '#'
(T_CHAR_SPACE, T_CHAR_NULL, T_CHAR_CR, T_CHAR_TAB, T_CHAR_LF, T_CHAR_SLASH, \
T_CHAR_BACKSLASH, T_CHAR_DOUBLE_QUOTE, T_CHAR_SINGLE_QUOTE, T_CHAR_STAR, T_CHAR_HASH) = \
(' ', '\0', '\r', '\t', '\n', '/', '\\', '\"', '\'', '*', '#')
SEPERATOR_TUPLE = ('=', '|', ',', '{', '}')
(T_COMMENT_TWO_SLASH, T_COMMENT_SLASH_STAR) = (0, 1)
(T_PP_INCLUDE, T_PP_DEFINE, T_PP_OTHERS) = (0, 1, 2)
## The collector for source code fragments.
#
# PreprocessFile method should be called prior to ParseFile
#
# GetNext*** procedures mean these procedures will get next token first, then make judgement.
# Get*** procedures mean these procedures will make judgement on current token only.
#
class CodeFragmentCollector:
## The constructor
#
# @param self The object pointer
# @param FileName The file that to be parsed
#
def __init__(self, FileName):
self.Profile = FileProfile.FileProfile(FileName)
self.Profile.FileLinesList.append(T_CHAR_LF)
self.FileName = FileName
self.CurrentLineNumber = 1
self.CurrentOffsetWithinLine = 0
self.__Token = ""
self.__SkippedChars = ""
## __EndOfFile() method
#
# Judge current buffer pos is at file end
#
# @param self The object pointer
# @retval True Current File buffer position is at file end
# @retval False Current File buffer position is NOT at file end
#
def __EndOfFile(self):
NumberOfLines = len(self.Profile.FileLinesList)
SizeOfLastLine = NumberOfLines
if NumberOfLines > 0:
SizeOfLastLine = len(self.Profile.FileLinesList[-1])
if self.CurrentLineNumber == NumberOfLines and self.CurrentOffsetWithinLine >= SizeOfLastLine - 1:
return True
elif self.CurrentLineNumber > NumberOfLines:
return True
else:
return False
## __EndOfLine() method
#
# Judge current buffer pos is at line end
#
# @param self The object pointer
# @retval True Current File buffer position is at line end
# @retval False Current File buffer position is NOT at line end
#
def __EndOfLine(self):
SizeOfCurrentLine = len(self.Profile.FileLinesList[self.CurrentLineNumber - 1])
if self.CurrentOffsetWithinLine >= SizeOfCurrentLine - 1:
return True
else:
return False
## Rewind() method
#
# Reset file data buffer to the initial state
#
# @param self The object pointer
#
def Rewind(self):
self.CurrentLineNumber = 1
self.CurrentOffsetWithinLine = 0
## __UndoOneChar() method
#
# Go back one char in the file buffer
#
# @param self The object pointer
# @retval True Successfully go back one char
# @retval False Not able to go back one char as file beginning reached
#
def __UndoOneChar(self):
if self.CurrentLineNumber == 1 and self.CurrentOffsetWithinLine == 0:
return False
elif self.CurrentOffsetWithinLine == 0:
self.CurrentLineNumber -= 1
self.CurrentOffsetWithinLine = len(self.__CurrentLine()) - 1
else:
self.CurrentOffsetWithinLine -= 1
return True
## __GetOneChar() method
#
# Move forward one char in the file buffer
#
# @param self The object pointer
#
def __GetOneChar(self):
if self.CurrentOffsetWithinLine == len(self.Profile.FileLinesList[self.CurrentLineNumber - 1]) - 1:
self.CurrentLineNumber += 1
self.CurrentOffsetWithinLine = 0
else:
self.CurrentOffsetWithinLine += 1
## __CurrentChar() method
#
# Get the char pointed to by the file buffer pointer
#
# @param self The object pointer
# @retval Char Current char
#
def __CurrentChar(self):
CurrentChar = self.Profile.FileLinesList[self.CurrentLineNumber - 1][self.CurrentOffsetWithinLine]
# if CurrentChar > 255:
# raise Warning("Non-Ascii char found At Line %d, offset %d" % (self.CurrentLineNumber, self.CurrentOffsetWithinLine), self.FileName, self.CurrentLineNumber)
return CurrentChar
## __NextChar() method
#
# Get the one char pass the char pointed to by the file buffer pointer
#
# @param self The object pointer
# @retval Char Next char
#
def __NextChar(self):
if self.CurrentOffsetWithinLine == len(self.Profile.FileLinesList[self.CurrentLineNumber - 1]) - 1:
return self.Profile.FileLinesList[self.CurrentLineNumber][0]
else:
return self.Profile.FileLinesList[self.CurrentLineNumber - 1][self.CurrentOffsetWithinLine + 1]
## __SetCurrentCharValue() method
#
# Modify the value of current char
#
# @param self The object pointer
# @param Value The new value of current char
#
def __SetCurrentCharValue(self, Value):
self.Profile.FileLinesList[self.CurrentLineNumber - 1][self.CurrentOffsetWithinLine] = Value
## __SetCharValue() method
#
# Modify the value of current char
#
# @param self The object pointer
# @param Value The new value of current char
#
def __SetCharValue(self, Line, Offset, Value):
self.Profile.FileLinesList[Line - 1][Offset] = Value
## __CurrentLine() method
#
# Get the list that contains current line contents
#
# @param self The object pointer
# @retval List current line contents
#
def __CurrentLine(self):
return self.Profile.FileLinesList[self.CurrentLineNumber - 1]
## __InsertComma() method
#
# Insert ',' to replace PP
#
# @param self The object pointer
# @retval List current line contents
#
def __InsertComma(self, Line):
if self.Profile.FileLinesList[Line - 1][0] != T_CHAR_HASH:
BeforeHashPart = str(self.Profile.FileLinesList[Line - 1]).split(T_CHAR_HASH)[0]
if BeforeHashPart.rstrip().endswith(T_CHAR_COMMA) or BeforeHashPart.rstrip().endswith(';'):
return
if Line - 2 >= 0 and str(self.Profile.FileLinesList[Line - 2]).rstrip().endswith(','):
return
if Line - 2 >= 0 and str(self.Profile.FileLinesList[Line - 2]).rstrip().endswith(';'):
return
if str(self.Profile.FileLinesList[Line]).lstrip().startswith(',') or str(self.Profile.FileLinesList[Line]).lstrip().startswith(';'):
return
self.Profile.FileLinesList[Line - 1].insert(self.CurrentOffsetWithinLine, ',')
## PreprocessFile() method
#
# Preprocess file contents, replace comments with spaces.
# In the end, rewind the file buffer pointer to the beginning
# BUGBUG: No !include statement processing contained in this procedure
# !include statement should be expanded at the same FileLinesList[CurrentLineNumber - 1]
#
# @param self The object pointer
#
def PreprocessFile(self):
self.Rewind()
InComment = False
DoubleSlashComment = False
HashComment = False
PPExtend = False
CommentObj = None
PPDirectiveObj = None
# HashComment in quoted string " " is ignored.
InString = False
InCharLiteral = False
self.Profile.FileLinesList = [list(s) for s in self.Profile.FileLinesListFromFile]
while not self.__EndOfFile():
if not InComment and self.__CurrentChar() == T_CHAR_DOUBLE_QUOTE:
InString = not InString
if not InComment and self.__CurrentChar() == T_CHAR_SINGLE_QUOTE:
InCharLiteral = not InCharLiteral
# meet new line, then no longer in a comment for // and '#'
if self.__CurrentChar() == T_CHAR_LF:
if HashComment and PPDirectiveObj is not None:
if PPDirectiveObj.Content.rstrip(T_CHAR_CR).endswith(T_CHAR_BACKSLASH):
PPDirectiveObj.Content += T_CHAR_LF
PPExtend = True
else:
PPExtend = False
EndLinePos = (self.CurrentLineNumber, self.CurrentOffsetWithinLine)
if InComment and DoubleSlashComment:
InComment = False
DoubleSlashComment = False
CommentObj.Content += T_CHAR_LF
CommentObj.EndPos = EndLinePos
FileProfile.CommentList.append(CommentObj)
CommentObj = None
if InComment and HashComment and not PPExtend:
InComment = False
HashComment = False
PPDirectiveObj.Content += T_CHAR_LF
PPDirectiveObj.EndPos = EndLinePos
FileProfile.PPDirectiveList.append(PPDirectiveObj)
PPDirectiveObj = None
if InString or InCharLiteral:
CurrentLine = "".join(self.__CurrentLine())
if CurrentLine.rstrip(T_CHAR_LF).rstrip(T_CHAR_CR).endswith(T_CHAR_BACKSLASH):
SlashIndex = CurrentLine.rindex(T_CHAR_BACKSLASH)
self.__SetCharValue(self.CurrentLineNumber, SlashIndex, T_CHAR_SPACE)
if InComment and not DoubleSlashComment and not HashComment:
CommentObj.Content += T_CHAR_LF
self.CurrentLineNumber += 1
self.CurrentOffsetWithinLine = 0
# check for */ comment end
elif InComment and not DoubleSlashComment and not HashComment and self.__CurrentChar() == T_CHAR_STAR and self.__NextChar() == T_CHAR_SLASH:
CommentObj.Content += self.__CurrentChar()
# self.__SetCurrentCharValue(T_CHAR_SPACE)
self.__GetOneChar()
CommentObj.Content += self.__CurrentChar()
# self.__SetCurrentCharValue(T_CHAR_SPACE)
CommentObj.EndPos = (self.CurrentLineNumber, self.CurrentOffsetWithinLine)
FileProfile.CommentList.append(CommentObj)
CommentObj = None
self.__GetOneChar()
InComment = False
# set comments to spaces
elif InComment:
if HashComment:
# // follows hash PP directive
if self.__CurrentChar() == T_CHAR_SLASH and self.__NextChar() == T_CHAR_SLASH:
InComment = False
HashComment = False
PPDirectiveObj.EndPos = (self.CurrentLineNumber, self.CurrentOffsetWithinLine - 1)
FileProfile.PPDirectiveList.append(PPDirectiveObj)
PPDirectiveObj = None
continue
else:
PPDirectiveObj.Content += self.__CurrentChar()
if PPExtend:
self.__SetCurrentCharValue(T_CHAR_SPACE)
else:
CommentObj.Content += self.__CurrentChar()
# self.__SetCurrentCharValue(T_CHAR_SPACE)
self.__GetOneChar()
# check for // comment
elif self.__CurrentChar() == T_CHAR_SLASH and self.__NextChar() == T_CHAR_SLASH:
InComment = True
DoubleSlashComment = True
CommentObj = Comment('', (self.CurrentLineNumber, self.CurrentOffsetWithinLine), None, T_COMMENT_TWO_SLASH)
# check for '#' comment
elif self.__CurrentChar() == T_CHAR_HASH and not InString and not InCharLiteral:
InComment = True
HashComment = True
PPDirectiveObj = PP_Directive('', (self.CurrentLineNumber, self.CurrentOffsetWithinLine), None)
# check for /* comment start
elif self.__CurrentChar() == T_CHAR_SLASH and self.__NextChar() == T_CHAR_STAR:
CommentObj = Comment('', (self.CurrentLineNumber, self.CurrentOffsetWithinLine), None, T_COMMENT_SLASH_STAR)
CommentObj.Content += self.__CurrentChar()
# self.__SetCurrentCharValue( T_CHAR_SPACE)
self.__GetOneChar()
CommentObj.Content += self.__CurrentChar()
# self.__SetCurrentCharValue( T_CHAR_SPACE)
self.__GetOneChar()
InComment = True
else:
self.__GetOneChar()
EndLinePos = (self.CurrentLineNumber, self.CurrentOffsetWithinLine)
if InComment and DoubleSlashComment:
CommentObj.EndPos = EndLinePos
FileProfile.CommentList.append(CommentObj)
if InComment and HashComment and not PPExtend:
PPDirectiveObj.EndPos = EndLinePos
FileProfile.PPDirectiveList.append(PPDirectiveObj)
self.Rewind()
def PreprocessFileWithClear(self):
self.Rewind()
InComment = False
DoubleSlashComment = False
HashComment = False
PPExtend = False
CommentObj = None
PPDirectiveObj = None
# HashComment in quoted string " " is ignored.
InString = False
InCharLiteral = False
self.Profile.FileLinesList = [list(s) for s in self.Profile.FileLinesListFromFile]
while not self.__EndOfFile():
if not InComment and self.__CurrentChar() == T_CHAR_DOUBLE_QUOTE:
InString = not InString
if not InComment and self.__CurrentChar() == T_CHAR_SINGLE_QUOTE:
InCharLiteral = not InCharLiteral
# meet new line, then no longer in a comment for // and '#'
if self.__CurrentChar() == T_CHAR_LF:
if HashComment and PPDirectiveObj is not None:
if PPDirectiveObj.Content.rstrip(T_CHAR_CR).endswith(T_CHAR_BACKSLASH):
PPDirectiveObj.Content += T_CHAR_LF
PPExtend = True
else:
PPExtend = False
EndLinePos = (self.CurrentLineNumber, self.CurrentOffsetWithinLine)
if InComment and DoubleSlashComment:
InComment = False
DoubleSlashComment = False
CommentObj.Content += T_CHAR_LF
CommentObj.EndPos = EndLinePos
FileProfile.CommentList.append(CommentObj)
CommentObj = None
if InComment and HashComment and not PPExtend:
InComment = False
HashComment = False
PPDirectiveObj.Content += T_CHAR_LF
PPDirectiveObj.EndPos = EndLinePos
FileProfile.PPDirectiveList.append(PPDirectiveObj)
PPDirectiveObj = None
if InString or InCharLiteral:
CurrentLine = "".join(self.__CurrentLine())
if CurrentLine.rstrip(T_CHAR_LF).rstrip(T_CHAR_CR).endswith(T_CHAR_BACKSLASH):
SlashIndex = CurrentLine.rindex(T_CHAR_BACKSLASH)
self.__SetCharValue(self.CurrentLineNumber, SlashIndex, T_CHAR_SPACE)
if InComment and not DoubleSlashComment and not HashComment:
CommentObj.Content += T_CHAR_LF
self.CurrentLineNumber += 1
self.CurrentOffsetWithinLine = 0
# check for */ comment end
elif InComment and not DoubleSlashComment and not HashComment and self.__CurrentChar() == T_CHAR_STAR and self.__NextChar() == T_CHAR_SLASH:
CommentObj.Content += self.__CurrentChar()
self.__SetCurrentCharValue(T_CHAR_SPACE)
self.__GetOneChar()
CommentObj.Content += self.__CurrentChar()
self.__SetCurrentCharValue(T_CHAR_SPACE)
CommentObj.EndPos = (self.CurrentLineNumber, self.CurrentOffsetWithinLine)
FileProfile.CommentList.append(CommentObj)
CommentObj = None
self.__GetOneChar()
InComment = False
# set comments to spaces
elif InComment:
if HashComment:
# // follows hash PP directive
if self.__CurrentChar() == T_CHAR_SLASH and self.__NextChar() == T_CHAR_SLASH:
InComment = False
HashComment = False
PPDirectiveObj.EndPos = (self.CurrentLineNumber, self.CurrentOffsetWithinLine - 1)
FileProfile.PPDirectiveList.append(PPDirectiveObj)
PPDirectiveObj = None
continue
else:
PPDirectiveObj.Content += self.__CurrentChar()
# if PPExtend:
# self.__SetCurrentCharValue(T_CHAR_SPACE)
else:
CommentObj.Content += self.__CurrentChar()
self.__SetCurrentCharValue(T_CHAR_SPACE)
self.__GetOneChar()
# check for // comment
elif self.__CurrentChar() == T_CHAR_SLASH and self.__NextChar() == T_CHAR_SLASH:
InComment = True
DoubleSlashComment = True
CommentObj = Comment('', (self.CurrentLineNumber, self.CurrentOffsetWithinLine), None, T_COMMENT_TWO_SLASH)
# check for '#' comment
elif self.__CurrentChar() == T_CHAR_HASH and not InString and not InCharLiteral:
InComment = True
HashComment = True
PPDirectiveObj = PP_Directive('', (self.CurrentLineNumber, self.CurrentOffsetWithinLine), None)
# check for /* comment start
elif self.__CurrentChar() == T_CHAR_SLASH and self.__NextChar() == T_CHAR_STAR:
CommentObj = Comment('', (self.CurrentLineNumber, self.CurrentOffsetWithinLine), None, T_COMMENT_SLASH_STAR)
CommentObj.Content += self.__CurrentChar()
self.__SetCurrentCharValue( T_CHAR_SPACE)
self.__GetOneChar()
CommentObj.Content += self.__CurrentChar()
self.__SetCurrentCharValue( T_CHAR_SPACE)
self.__GetOneChar()
InComment = True
else:
self.__GetOneChar()
EndLinePos = (self.CurrentLineNumber, self.CurrentOffsetWithinLine)
if InComment and DoubleSlashComment:
CommentObj.EndPos = EndLinePos
FileProfile.CommentList.append(CommentObj)
if InComment and HashComment and not PPExtend:
PPDirectiveObj.EndPos = EndLinePos
FileProfile.PPDirectiveList.append(PPDirectiveObj)
self.Rewind()
## ParseFile() method
#
# Parse the file profile buffer to extract fd, fv ... information
# Exception will be raised if syntax error found
#
# @param self The object pointer
#
def ParseFile(self):
self.PreprocessFile()
# restore from ListOfList to ListOfString
self.Profile.FileLinesList = ["".join(list) for list in self.Profile.FileLinesList]
FileStringContents = ''
for fileLine in self.Profile.FileLinesList:
FileStringContents += fileLine
cStream = antlr.InputStream(FileStringContents)
lexer = CLexer(cStream)
tStream = antlr.CommonTokenStream(lexer)
parser = CParser(tStream)
parser.translation_unit()
def ParseFileWithClearedPPDirective(self):
self.PreprocessFileWithClear()
# restore from ListOfList to ListOfString
self.Profile.FileLinesList = ["".join(list) for list in self.Profile.FileLinesList]
FileStringContents = ''
for fileLine in self.Profile.FileLinesList:
FileStringContents += fileLine
cStream = antlr.InputStream(FileStringContents)
lexer = CLexer(cStream)
tStream = antlr.CommonTokenStream(lexer)
parser = CParser(tStream)
parser.translation_unit()
def CleanFileProfileBuffer(self):
FileProfile.CommentList = []
FileProfile.PPDirectiveList = []
FileProfile.PredicateExpressionList = []
FileProfile.FunctionDefinitionList = []
FileProfile.VariableDeclarationList = []
FileProfile.EnumerationDefinitionList = []
FileProfile.StructUnionDefinitionList = []
FileProfile.TypedefDefinitionList = []
FileProfile.FunctionCallingList = []
def PrintFragments(self):
print('################# ' + self.FileName + '#####################')
print('/****************************************/')
print('/*************** COMMENTS ***************/')
print('/****************************************/')
for comment in FileProfile.CommentList:
print(str(comment.StartPos) + comment.Content)
print('/****************************************/')
print('/********* PREPROCESS DIRECTIVES ********/')
print('/****************************************/')
for pp in FileProfile.PPDirectiveList:
print(str(pp.StartPos) + pp.Content)
print('/****************************************/')
print('/********* VARIABLE DECLARATIONS ********/')
print('/****************************************/')
for var in FileProfile.VariableDeclarationList:
print(str(var.StartPos) + var.Modifier + ' '+ var.Declarator)
print('/****************************************/')
print('/********* FUNCTION DEFINITIONS *********/')
print('/****************************************/')
for func in FileProfile.FunctionDefinitionList:
print(str(func.StartPos) + func.Modifier + ' '+ func.Declarator + ' ' + str(func.NamePos))
print('/****************************************/')
print('/************ ENUMERATIONS **************/')
print('/****************************************/')
for enum in FileProfile.EnumerationDefinitionList:
print(str(enum.StartPos) + enum.Content)
print('/****************************************/')
print('/*********** STRUCTS/UNIONS *************/')
print('/****************************************/')
for su in FileProfile.StructUnionDefinitionList:
print(str(su.StartPos) + su.Content)
print('/****************************************/')
print('/********* PREDICATE EXPRESSIONS ********/')
print('/****************************************/')
for predexp in FileProfile.PredicateExpressionList:
print(str(predexp.StartPos) + predexp.Content)
print('/****************************************/')
print('/************** TYPEDEFS ****************/')
print('/****************************************/')
for typedef in FileProfile.TypedefDefinitionList:
print(str(typedef.StartPos) + typedef.ToType)
if __name__ == "__main__":
collector = CodeFragmentCollector(sys.argv[1])
collector.PreprocessFile()
print("For Test.")