test262/tools/generation/lib/util/find_comments.py

74 lines
2.8 KiB
Python

# Copyright (C) 2016 the V8 project authors. All rights reserved.
# This code is governed by the BSD license found in the LICENSE file.
def find_comments(source):
'''Parse input string describing JavaScript source and yield dictionaries
describing the JavaScript comments in the order they appear in the source.
This includes comment patterns within string literals.
Each dictionary defines the following attributes:
- source: the source text of the comment
- firstchar: the zero-indexed position of the token that begins the comment
- lastchar: the zero-indexed position of the token that closes the comment
- lineno: the zero-indexed offset of the line on which the comment appears
- in_string: `False` if the comment is a true JavaScript comment, one of
'\'' (single quote), '"' (double quote), or '`' (back tick) if
the comment pattern appears within a string literal.
'''
in_string = False
in_s_comment = False
in_m_comment = False
follows_escape = False
comment = ''
lineno = 0
for idx in xrange(len(source)):
if source[idx] == '\n':
lineno += 1
# Within comments and strings, any odd number of back-slashes begins an
# escape sequence.
if source[idx - 1] == '\\':
follows_escape = not follows_escape
else:
follows_escape = False
if in_s_comment:
if source[idx] == '\n':
in_s_comment = False
yield dict(
source=comment[1:],
firstchar=idx - len(comment) - 1,
lastchar=idx,
in_string=in_string,
lineno=lineno)
continue
elif in_m_comment:
if source[idx - 1] == '*' and source[idx] == '/':
in_m_comment = False
yield dict(
source=comment[1:-1],
firstchar=idx - len(comment) - 1,
lastchar=idx + 1,
in_string=in_string,
lineno=lineno)
continue
elif in_string:
if source[idx] == in_string and not follows_escape:
in_string = False
elif source[idx] == '\n' and in_string != '`' and not follows_escape:
in_string = False
if in_m_comment or in_s_comment:
comment += source[idx]
continue
in_m_comment = source[idx] == '/' and source[idx + 1] == '*'
in_s_comment = source[idx] == '/' and source[idx + 1] == '/'
if in_m_comment or in_s_comment:
comment = ''
elif source[idx] == '\'' or source[idx] == '"' or source[idx] == '`':
in_string = source[idx]