test262/tools/generation/lib/util/find_comments.py

74 lines
2.8 KiB
Python
Raw Normal View History

2016-03-16 18:58:10 +01:00
# Copyright (C) 2016 the V8 project authors. All rights reserved.
# This code is governed by the BSD license found in the LICENSE file.
def find_comments(source):
'''Parse input string describing JavaScript source and yield dictionaries
describing the JavaScript comments in the order they appear in the source.
This includes comment patterns within string literals.
2016-03-16 18:58:10 +01:00
Each dictionary defines the following attributes:
- source: the source text of the comment
- firstchar: the zero-indexed position of the token that begins the comment
- lastchar: the zero-indexed position of the token that closes the comment
- lineno: the zero-indexed offset of the line on which the comment appears
- in_string: `False` if the comment is a true JavaScript comment, one of
'\'' (single quote), '"' (double quote), or '`' (back tick) if
the comment pattern appears within a string literal.
2016-03-16 18:58:10 +01:00
'''
in_string = False
in_s_comment = False
in_m_comment = False
follows_escape = False
comment = ''
lineno = 0
for idx in xrange(len(source)):
if source[idx] == '\n':
lineno += 1
# Within comments and strings, any odd number of back-slashes begins an
# escape sequence.
if source[idx - 1] == '\\':
follows_escape = not follows_escape
else:
follows_escape = False
if in_s_comment:
if source[idx] == '\n':
in_s_comment = False
yield dict(
source=comment[1:],
firstchar=idx - len(comment) - 1,
lastchar=idx,
in_string=in_string,
2016-03-16 18:58:10 +01:00
lineno=lineno)
continue
elif in_m_comment:
if source[idx - 1] == '*' and source[idx] == '/':
in_m_comment = False
yield dict(
source=comment[1:-1],
firstchar=idx - len(comment) - 1,
lastchar=idx + 1,
in_string=in_string,
2016-03-16 18:58:10 +01:00
lineno=lineno)
continue
elif in_string:
if source[idx] == in_string and not follows_escape:
in_string = False
elif source[idx] == '\n' and in_string != '`' and not follows_escape:
in_string = False
if in_m_comment or in_s_comment:
comment += source[idx]
continue
in_m_comment = source[idx] == '/' and source[idx + 1] == '*'
in_s_comment = source[idx] == '/' and source[idx + 1] == '/'
if in_m_comment or in_s_comment:
comment = ''
elif source[idx] == '\'' or source[idx] == '"' or source[idx] == '`':
in_string = source[idx]