[generation] Tolerate unicode in source files (#1411)

This commit is contained in:
jugglinmike 2018-02-09 11:27:33 -05:00 committed by Leo Balter
parent 18c1e799a0
commit 7b01f9799f
7 changed files with 22 additions and 14 deletions

View File

@ -1,6 +1,7 @@
# Copyright (C) 2016 the V8 project authors. All rights reserved.
# This code is governed by the BSD license found in the LICENSE file.
import codecs
import re
from util.find_comments import find_comments
@ -9,10 +10,10 @@ from util.parse_yaml import parse_yaml
regionStartPattern = re.compile(r'-\s+(\S+)')
class Case:
def __init__(self, file_name):
def __init__(self, file_name, encoding):
self.attribs = dict(meta=None, regions=dict())
with open(file_name) as handle:
with codecs.open(file_name, 'r', encoding) as handle:
self.attribs = self._parse(handle.read())
def _parse(self, source):

View File

@ -14,18 +14,20 @@ class Expander:
self.templates = dict()
self.case_dir = case_dir
def _load_templates(self, template_class):
def _load_templates(self, template_class, encoding):
directory = os.path.join(self.case_dir, template_class)
file_names = map(
lambda x: os.path.join(directory, x),
filter(self.is_template_file, os.listdir(directory))
)
self.templates[template_class] = [Template(x) for x in file_names]
self.templates[template_class] = [
Template(x, encoding) for x in file_names
]
def _get_templates(self, template_class):
def _get_templates(self, template_class, encoding):
if not template_class in self.templates:
self._load_templates(template_class)
self._load_templates(template_class, encoding)
return self.templates[template_class]
@ -49,10 +51,10 @@ class Expander:
yield test
def expand_case(self, file_name, encoding):
case = Case(file_name)
case = Case(file_name, encoding)
template_class = case.attribs['meta']['template']
templates = self.templates.get(template_class)
for template in self._get_templates(template_class):
for template in self._get_templates(template_class, encoding):
yield template.expand(file_name, os.path.basename(file_name[:-5]), case.attribs, encoding)

View File

@ -50,10 +50,10 @@ def indent(text, prefix = ' ', js_value = False):
return '\n'.join(indented)
class Template:
def __init__(self, filename):
def __init__(self, filename, encoding):
self.filename = filename
with open(filename) as template_file:
with codecs.open(filename, 'r', encoding) as template_file:
self.source = template_file.read()
self.attribs = dict()
@ -203,5 +203,6 @@ class Template:
frontmatter = self._frontmatter(case_filename, case_values)
body = self.expand_regions(self.source, case_values)
assert encoding == 'utf-8'
return Test(self.attribs['meta']['path'] + case_name + '.js',
source=codecs.encode(frontmatter + '\n' + body, encoding))

View File

@ -12,7 +12,7 @@ info: |
case info
---*/
before-Third valueSecond value-after
before-Third value (Special characters like `` should be tolerated.)Second value-after
/* Improperly-terminated comments should not break the tokenizer *

View File

@ -12,11 +12,13 @@ info: |
case info
---*/
before-First value-between-Third value-after
before-First value-between-Third value (Special characters like `` should be tolerated.)-after
before*Second value*between*First value*after
before/* " */Third valueafter
before/* " */Third value (Special characters like `` should be tolerated.)after
// Special characters like `≠` should be tolerated.
The following should not be expanded:

View File

@ -22,7 +22,7 @@ First value
//- second
Second value
//- third
Third value
Third value (Special characters like `≠` should be tolerated.)
//- fourth
Quote characters: " ' `
//- teardown

View File

@ -14,6 +14,8 @@ before*/*{ second }*/*between*/*{ first }*/*after
before/* " *//*{ third }*/after
// Special characters like `≠` should be tolerated.
The following should not be expanded:
/* */*{ first }*/