mirror of https://github.com/tc39/test262.git
69 lines
2.4 KiB
Python
69 lines
2.4 KiB
Python
|
# Copyright (C) 2016 the V8 project authors. All rights reserved.
|
||
|
# This code is governed by the BSD license found in the LICENSE file.
|
||
|
|
||
|
def find_comments(source):
|
||
|
'''Parse input string describing JavaScript source and yield dictionaries
|
||
|
describing the JavaScript comments in the order they appear in the source.
|
||
|
|
||
|
Each dictionary defines the following attributes:
|
||
|
|
||
|
- source: the source text of the comment
|
||
|
- firstchar: the zero-indexed position of the token that begins the comment
|
||
|
- lastchar: the zero-indexed position of the token that closes the comment
|
||
|
- lineno: the zero-indexed offset of the line on which the comment appears
|
||
|
'''
|
||
|
in_string = False
|
||
|
in_s_comment = False
|
||
|
in_m_comment = False
|
||
|
follows_escape = False
|
||
|
comment = ''
|
||
|
lineno = 0
|
||
|
|
||
|
for idx in xrange(len(source)):
|
||
|
if source[idx] == '\n':
|
||
|
lineno += 1
|
||
|
|
||
|
# Within comments and strings, any odd number of back-slashes begins an
|
||
|
# escape sequence.
|
||
|
if source[idx - 1] == '\\':
|
||
|
follows_escape = not follows_escape
|
||
|
else:
|
||
|
follows_escape = False
|
||
|
|
||
|
if in_s_comment:
|
||
|
if source[idx] == '\n':
|
||
|
in_s_comment = False
|
||
|
yield dict(
|
||
|
source=comment[1:],
|
||
|
firstchar=idx - len(comment) - 1,
|
||
|
lastchar=idx,
|
||
|
lineno=lineno)
|
||
|
continue
|
||
|
elif in_m_comment:
|
||
|
if source[idx - 1] == '*' and source[idx] == '/':
|
||
|
in_m_comment = False
|
||
|
yield dict(
|
||
|
source=comment[1:-1],
|
||
|
firstchar=idx - len(comment) - 1,
|
||
|
lastchar=idx + 1,
|
||
|
lineno=lineno)
|
||
|
continue
|
||
|
elif in_string:
|
||
|
if source[idx] == in_string and not follows_escape:
|
||
|
in_string = False
|
||
|
elif source[idx] == '\n' and in_string != '`' and not follows_escape:
|
||
|
in_string = False
|
||
|
continue
|
||
|
|
||
|
if in_m_comment or in_s_comment:
|
||
|
comment += source[idx]
|
||
|
continue
|
||
|
|
||
|
in_m_comment = source[idx] == '/' and source[idx + 1] == '*'
|
||
|
in_s_comment = source[idx] == '/' and source[idx + 1] == '/'
|
||
|
|
||
|
if in_m_comment or in_s_comment:
|
||
|
comment = ''
|
||
|
elif source[idx] == '\'' or source[idx] == '"' or source[idx] == '`':
|
||
|
in_string = source[idx]
|