Some fixes for flake8

This commit is contained in:
ZyX 2013-03-10 23:12:07 +04:00
parent 3fd1215091
commit 5da848fa4a
14 changed files with 1466 additions and 1539 deletions

View File

@ -108,7 +108,7 @@ class Spec(object):
else: else:
proceed, echo, fhadproblem = item_func(item, data, context, echoerr) proceed, echo, fhadproblem = item_func(item, data, context, echoerr)
if echo and fhadproblem: if echo and fhadproblem:
echoerr(context=self.cmsg.format(key=context_key(context)+'/list item '+unicode(i)), echoerr(context=self.cmsg.format(key=context_key(context) + '/list item ' + unicode(i)),
context_mark=value.mark, context_mark=value.mark,
problem=msg_func(item), problem=msg_func(item),
problem_mark=item.mark) problem_mark=item.mark)
@ -141,7 +141,7 @@ class Spec(object):
def check_tuple(self, value, context_mark, data, context, echoerr, start, end): def check_tuple(self, value, context_mark, data, context, echoerr, start, end):
hadproblem = False hadproblem = False
for (i, item, spec) in zip(itertools.count(), value, self.specs[start:end]): for (i, item, spec) in zip(itertools.count(), value, self.specs[start:end]):
proceed, ihadproblem = spec.match(item, value.mark, data, context + (('tuple item '+unicode(i), item),), echoerr) proceed, ihadproblem = spec.match(item, value.mark, data, context + (('tuple item ' + unicode(i), item),), echoerr)
if ihadproblem: if ihadproblem:
hadproblem = True hadproblem = True
if not proceed: if not proceed:
@ -154,9 +154,9 @@ class Spec(object):
cmp_funcs = { cmp_funcs = {
'le': lambda x, y: x <= y, 'le': lambda x, y: x <= y,
'lt': lambda x, y: x < y, 'lt': lambda x, y: x < y,
'ge': lambda x, y: x >= y, 'ge': lambda x, y: x >= y,
'gt': lambda x, y: x > y, 'gt': lambda x, y: x > y,
'eq': lambda x, y: x == y, 'eq': lambda x, y: x == y,
} }
@ -424,8 +424,8 @@ main_spec = (Spec(
.context_message('Error while loading extensions configuration (key {key})'), .context_message('Error while loading extensions configuration (key {key})'),
).context_message('Error while loading main configuration')) ).context_message('Error while loading main configuration'))
term_color_spec=Spec().unsigned().cmp('le', 255).copy term_color_spec = Spec().unsigned().cmp('le', 255).copy
true_color_spec=Spec().re('^[0-9a-fA-F]{6}$', true_color_spec = Spec().re('^[0-9a-fA-F]{6}$',
lambda value: '"{0}" is not a six-digit hexadecimal unsigned integer written as a string'.format(value)).copy lambda value: '"{0}" is not a six-digit hexadecimal unsigned integer written as a string'.format(value)).copy
colors_spec = (Spec( colors_spec = (Spec(
colors=Spec().unknown_spec( colors=Spec().unknown_spec(
@ -568,7 +568,7 @@ def check_full_segment_data(segment, data, context, echoerr):
names = [segment['name']] names = [segment['name']]
if segment.get('type', 'function') == 'function': if segment.get('type', 'function') == 'function':
module = segment.get('module', context[0][1].get('default_module', 'powerline.segments.'+ext)) module = segment.get('module', context[0][1].get('default_module', 'powerline.segments.' + ext))
names.insert(0, unicode(module) + '.' + unicode(names[0])) names.insert(0, unicode(module) + '.' + unicode(names[0]))
segment_copy = segment.copy() segment_copy = segment.copy()
@ -592,7 +592,7 @@ def check_full_segment_data(segment, data, context, echoerr):
def check_segment_name(name, data, context, echoerr): def check_segment_name(name, data, context, echoerr):
ext = data['ext'] ext = data['ext']
if context[-2][1].get('type', 'function') == 'function': if context[-2][1].get('type', 'function') == 'function':
module = context[-2][1].get('module', context[0][1].get('default_module', 'powerline.segments.'+ext)) module = context[-2][1].get('module', context[0][1].get('default_module', 'powerline.segments.' + ext))
with WithPath(data['import_paths']): with WithPath(data['import_paths']):
try: try:
func = getattr(__import__(unicode(module), fromlist=[unicode(name)]), unicode(name)) func = getattr(__import__(unicode(module), fromlist=[unicode(name)]), unicode(name))
@ -616,9 +616,9 @@ def check_segment_name(name, data, context, echoerr):
D_H_G_USED_STR = 'Divider highlight group used: ' D_H_G_USED_STR = 'Divider highlight group used: '
for line in func.__doc__.split('\n'): for line in func.__doc__.split('\n'):
if H_G_USED_STR in line: if H_G_USED_STR in line:
hl_groups.append(line[line.index(H_G_USED_STR)+len(H_G_USED_STR):]) hl_groups.append(line[line.index(H_G_USED_STR) + len(H_G_USED_STR):])
elif D_H_G_USED_STR in line: elif D_H_G_USED_STR in line:
divider_hl_group = line[line.index(D_H_G_USED_STR)+len(D_H_G_USED_STR)+2:-3] divider_hl_group = line[line.index(D_H_G_USED_STR) + len(D_H_G_USED_STR) + 2:-3]
hadproblem = False hadproblem = False
@ -756,7 +756,7 @@ def check_segment_data_key(key, data, context, echoerr):
if 'name' in segment: if 'name' in segment:
if key == segment['name']: if key == segment['name']:
found = True found = True
module = segment.get('module', theme.get('default_module', 'powerline.segments.'+ext)) module = segment.get('module', theme.get('default_module', 'powerline.segments.' + ext))
if key == unicode(module) + '.' + unicode(segment['name']): if key == unicode(module) + '.' + unicode(segment['name']):
found = True found = True
if found: if found:
@ -861,6 +861,7 @@ def check(path=None):
)) ))
lhadproblem = [False] lhadproblem = [False]
def load_config(stream): def load_config(stream):
r, hadproblem = load(stream) r, hadproblem = load(stream)
if hadproblem: if hadproblem:

View File

@ -1,23 +1,17 @@
from .error import *
from .tokens import *
from .events import *
from .nodes import *
from .loader import *
__version__ = '3.10' __version__ = '3.10'
def load(stream, Loader=Loader):
"""
Parse the first YAML document in a stream
and produce the corresponding Python object.
"""
loader = Loader(stream)
try:
r = loader.get_single_data()
return r, loader.haserrors
finally:
loader.dispose()
from .loader import Loader
def load(stream, Loader=Loader):
"""
Parse the first YAML document in a stream
and produce the corresponding Python object.
"""
loader = Loader(stream)
try:
r = loader.get_single_data()
return r, loader.haserrors
finally:
loader.dispose()

View File

@ -1,119 +1,117 @@
__all__ = ['Composer', 'ComposerError'] __all__ = ['Composer', 'ComposerError']
from .error import MarkedYAMLError from .error import MarkedYAMLError
from .events import * from .events import * # NOQA
from .nodes import * from .nodes import * # NOQA
class ComposerError(MarkedYAMLError): class ComposerError(MarkedYAMLError):
pass pass
class Composer: class Composer:
def __init__(self):
pass
def __init__(self): def check_node(self):
pass # Drop the STREAM-START event.
if self.check_event(StreamStartEvent):
self.get_event()
def check_node(self): # If there are more documents available?
# Drop the STREAM-START event. return not self.check_event(StreamEndEvent)
if self.check_event(StreamStartEvent):
self.get_event()
# If there are more documents available? def get_node(self):
return not self.check_event(StreamEndEvent) # Get the root node of the next document.
if not self.check_event(StreamEndEvent):
return self.compose_document()
def get_node(self): def get_single_node(self):
# Get the root node of the next document. # Drop the STREAM-START event.
if not self.check_event(StreamEndEvent): self.get_event()
return self.compose_document()
def get_single_node(self): # Compose a document if the stream is not empty.
# Drop the STREAM-START event. document = None
self.get_event() if not self.check_event(StreamEndEvent):
document = self.compose_document()
# Compose a document if the stream is not empty. # Ensure that the stream contains no more documents.
document = None if not self.check_event(StreamEndEvent):
if not self.check_event(StreamEndEvent): event = self.get_event()
document = self.compose_document() raise ComposerError("expected a single document in the stream",
document.start_mark, "but found another document",
event.start_mark)
# Ensure that the stream contains no more documents. # Drop the STREAM-END event.
if not self.check_event(StreamEndEvent): self.get_event()
event = self.get_event()
raise ComposerError("expected a single document in the stream",
document.start_mark, "but found another document",
event.start_mark)
# Drop the STREAM-END event. return document
self.get_event()
return document def compose_document(self):
# Drop the DOCUMENT-START event.
self.get_event()
def compose_document(self): # Compose the root node.
# Drop the DOCUMENT-START event. node = self.compose_node(None, None)
self.get_event()
# Compose the root node. # Drop the DOCUMENT-END event.
node = self.compose_node(None, None) self.get_event()
# Drop the DOCUMENT-END event. return node
self.get_event()
return node def compose_node(self, parent, index):
self.descend_resolver(parent, index)
if self.check_event(ScalarEvent):
node = self.compose_scalar_node()
elif self.check_event(SequenceStartEvent):
node = self.compose_sequence_node()
elif self.check_event(MappingStartEvent):
node = self.compose_mapping_node()
self.ascend_resolver()
return node
def compose_node(self, parent, index): def compose_scalar_node(self):
event = self.peek_event() event = self.get_event()
self.descend_resolver(parent, index) tag = event.tag
if self.check_event(ScalarEvent): if tag is None or tag == '!':
node = self.compose_scalar_node() tag = self.resolve(ScalarNode, event.value, event.implicit, event.start_mark)
elif self.check_event(SequenceStartEvent): node = ScalarNode(tag, event.value,
node = self.compose_sequence_node() event.start_mark, event.end_mark, style=event.style)
elif self.check_event(MappingStartEvent): return node
node = self.compose_mapping_node()
self.ascend_resolver()
return node
def compose_scalar_node(self): def compose_sequence_node(self):
event = self.get_event() start_event = self.get_event()
tag = event.tag tag = start_event.tag
if tag is None or tag == '!': if tag is None or tag == '!':
tag = self.resolve(ScalarNode, event.value, event.implicit, event.start_mark) tag = self.resolve(SequenceNode, None, start_event.implicit)
node = ScalarNode(tag, event.value, node = SequenceNode(tag, [],
event.start_mark, event.end_mark, style=event.style) start_event.start_mark, None,
return node flow_style=start_event.flow_style)
index = 0
def compose_sequence_node(self): while not self.check_event(SequenceEndEvent):
start_event = self.get_event() node.value.append(self.compose_node(node, index))
tag = start_event.tag index += 1
if tag is None or tag == '!': end_event = self.get_event()
tag = self.resolve(SequenceNode, None, start_event.implicit) node.end_mark = end_event.end_mark
node = SequenceNode(tag, [], return node
start_event.start_mark, None,
flow_style=start_event.flow_style)
index = 0
while not self.check_event(SequenceEndEvent):
node.value.append(self.compose_node(node, index))
index += 1
end_event = self.get_event()
node.end_mark = end_event.end_mark
return node
def compose_mapping_node(self):
start_event = self.get_event()
tag = start_event.tag
if tag is None or tag == '!':
tag = self.resolve(MappingNode, None, start_event.implicit)
node = MappingNode(tag, [],
start_event.start_mark, None,
flow_style=start_event.flow_style)
while not self.check_event(MappingEndEvent):
#key_event = self.peek_event()
item_key = self.compose_node(node, None)
#if item_key in node.value:
# raise ComposerError("while composing a mapping", start_event.start_mark,
# "found duplicate key", key_event.start_mark)
item_value = self.compose_node(node, item_key)
#node.value[item_key] = item_value
node.value.append((item_key, item_value))
end_event = self.get_event()
node.end_mark = end_event.end_mark
return node
def compose_mapping_node(self):
start_event = self.get_event()
tag = start_event.tag
if tag is None or tag == '!':
tag = self.resolve(MappingNode, None, start_event.implicit)
node = MappingNode(tag, [],
start_event.start_mark, None,
flow_style=start_event.flow_style)
while not self.check_event(MappingEndEvent):
#key_event = self.peek_event()
item_key = self.compose_node(node, None)
#if item_key in node.value:
# raise ComposerError("while composing a mapping", start_event.start_mark,
# "found duplicate key", key_event.start_mark)
item_value = self.compose_node(node, item_key)
#node.value[item_key] = item_value
node.value.append((item_key, item_value))
end_event = self.get_event()
node.end_mark = end_event.end_mark
return node

View File

@ -1,324 +1,274 @@
__all__ = ['BaseConstructor', 'Constructor', 'ConstructorError'] __all__ = ['BaseConstructor', 'Constructor', 'ConstructorError']
from .error import * from .error import MarkedYAMLError
from .nodes import * from .nodes import * # NOQA
from .markedvalue import * from .markedvalue import gen_marked_value
import collections, datetime, base64, binascii, re, sys, types import collections
import types
from functools import wraps from functools import wraps
try: try:
from __builtin__ import unicode from __builtin__ import unicode
except ImportError: except ImportError:
unicode = str unicode = str # NOQA
def marked(func): def marked(func):
@wraps(func) @wraps(func)
def f(self, node, *args, **kwargs): def f(self, node, *args, **kwargs):
return gen_marked_value(func(self, node, *args, **kwargs), node.start_mark) return gen_marked_value(func(self, node, *args, **kwargs), node.start_mark)
return f return f
class ConstructorError(MarkedYAMLError): class ConstructorError(MarkedYAMLError):
pass pass
class BaseConstructor: class BaseConstructor:
yaml_constructors = {}
yaml_constructors = {} def __init__(self):
yaml_multi_constructors = {} self.constructed_objects = {}
self.state_generators = []
self.deep_construct = False
def __init__(self): def check_data(self):
self.constructed_objects = {} # If there are more documents available?
self.state_generators = [] return self.check_node()
self.deep_construct = False
def check_data(self): def get_data(self):
# If there are more documents available? # Construct and return the next document.
return self.check_node() if self.check_node():
return self.construct_document(self.get_node())
def get_data(self): def get_single_data(self):
# Construct and return the next document. # Ensure that the stream contains a single document and construct it.
if self.check_node(): node = self.get_single_node()
return self.construct_document(self.get_node()) if node is not None:
return self.construct_document(node)
return None
def get_single_data(self): def construct_document(self, node):
# Ensure that the stream contains a single document and construct it. data = self.construct_object(node)
node = self.get_single_node() while self.state_generators:
if node is not None: state_generators = self.state_generators
return self.construct_document(node) self.state_generators = []
return None for generator in state_generators:
for dummy in generator:
pass
self.constructed_objects = {}
self.deep_construct = False
return data
def construct_document(self, node): def construct_object(self, node, deep=False):
data = self.construct_object(node) if node in self.constructed_objects:
while self.state_generators: return self.constructed_objects[node]
state_generators = self.state_generators if deep:
self.state_generators = [] old_deep = self.deep_construct
for generator in state_generators: self.deep_construct = True
for dummy in generator: constructor = None
pass tag_suffix = None
self.constructed_objects = {} if node.tag in self.yaml_constructors:
self.deep_construct = False constructor = self.yaml_constructors[node.tag]
return data else:
raise ConstructorError(None, None, 'no constructor for tag %s' % node.tag)
if tag_suffix is None:
data = constructor(self, node)
else:
data = constructor(self, tag_suffix, node)
if isinstance(data, types.GeneratorType):
generator = data
data = next(generator)
if self.deep_construct:
for dummy in generator:
pass
else:
self.state_generators.append(generator)
self.constructed_objects[node] = data
if deep:
self.deep_construct = old_deep
return data
def construct_object(self, node, deep=False): @marked
if node in self.constructed_objects: def construct_scalar(self, node):
return self.constructed_objects[node] if not isinstance(node, ScalarNode):
if deep: raise ConstructorError(None, None,
old_deep = self.deep_construct "expected a scalar node, but found %s" % node.id,
self.deep_construct = True node.start_mark)
constructor = None return node.value
tag_suffix = None
if node.tag in self.yaml_constructors:
constructor = self.yaml_constructors[node.tag]
else:
for tag_prefix in self.yaml_multi_constructors:
if node.tag.startswith(tag_prefix):
tag_suffix = node.tag[len(tag_prefix):]
constructor = self.yaml_multi_constructors[tag_prefix]
break
else:
if None in self.yaml_multi_constructors:
tag_suffix = node.tag
constructor = self.yaml_multi_constructors[None]
elif None in self.yaml_constructors:
constructor = self.yaml_constructors[None]
elif isinstance(node, ScalarNode):
constructor = self.__class__.construct_scalar
elif isinstance(node, SequenceNode):
constructor = self.__class__.construct_sequence
elif isinstance(node, MappingNode):
constructor = self.__class__.construct_mapping
if tag_suffix is None:
data = constructor(self, node)
else:
data = constructor(self, tag_suffix, node)
if isinstance(data, types.GeneratorType):
generator = data
data = next(generator)
if self.deep_construct:
for dummy in generator:
pass
else:
self.state_generators.append(generator)
self.constructed_objects[node] = data
if deep:
self.deep_construct = old_deep
return data
@marked def construct_sequence(self, node, deep=False):
def construct_scalar(self, node): if not isinstance(node, SequenceNode):
if not isinstance(node, ScalarNode): raise ConstructorError(None, None,
raise ConstructorError(None, None, "expected a sequence node, but found %s" % node.id,
"expected a scalar node, but found %s" % node.id, node.start_mark)
node.start_mark) return [self.construct_object(child, deep=deep)
return node.value for child in node.value]
def construct_sequence(self, node, deep=False): @marked
if not isinstance(node, SequenceNode): def construct_mapping(self, node, deep=False):
raise ConstructorError(None, None, if not isinstance(node, MappingNode):
"expected a sequence node, but found %s" % node.id, raise ConstructorError(None, None,
node.start_mark) "expected a mapping node, but found %s" % node.id,
return [self.construct_object(child, deep=deep) node.start_mark)
for child in node.value] mapping = {}
for key_node, value_node in node.value:
key = self.construct_object(key_node, deep=deep)
if not isinstance(key, collections.Hashable):
self.echoerr('While constructing a mapping', node.start_mark,
'found unhashable key', key_node.start_mark)
continue
elif type(key.value) != unicode:
self.echoerr('Error while constructing a mapping', node.start_mark,
'found key that is not a string', key_node.start_mark)
continue
elif key in mapping:
self.echoerr('Error while constructing a mapping', node.start_mark,
'found duplicate key', key_node.start_mark)
continue
value = self.construct_object(value_node, deep=deep)
mapping[key] = value
return mapping
@marked @classmethod
def construct_mapping(self, node, deep=False): def add_constructor(cls, tag, constructor):
if not isinstance(node, MappingNode): if not 'yaml_constructors' in cls.__dict__:
raise ConstructorError(None, None, cls.yaml_constructors = cls.yaml_constructors.copy()
"expected a mapping node, but found %s" % node.id, cls.yaml_constructors[tag] = constructor
node.start_mark)
mapping = {}
for key_node, value_node in node.value:
key = self.construct_object(key_node, deep=deep)
if not isinstance(key, collections.Hashable):
self.echoerr('While constructing a mapping', node.start_mark,
'found unhashable key', key_node.start_mark)
continue
elif type(key.value) != unicode:
self.echoerr('Error while constructing a mapping', node.start_mark,
'found key that is not a string', key_node.start_mark)
continue
elif key in mapping:
self.echoerr('Error while constructing a mapping', node.start_mark,
'found duplicate key', key_node.start_mark)
continue
value = self.construct_object(value_node, deep=deep)
mapping[key] = value
return mapping
@classmethod
def add_constructor(cls, tag, constructor):
if not 'yaml_constructors' in cls.__dict__:
cls.yaml_constructors = cls.yaml_constructors.copy()
cls.yaml_constructors[tag] = constructor
class Constructor(BaseConstructor): class Constructor(BaseConstructor):
def construct_scalar(self, node):
if isinstance(node, MappingNode):
for key_node, value_node in node.value:
if key_node.tag == 'tag:yaml.org,2002:value':
return self.construct_scalar(value_node)
return BaseConstructor.construct_scalar(self, node)
def construct_scalar(self, node): def flatten_mapping(self, node):
if isinstance(node, MappingNode): merge = []
for key_node, value_node in node.value: index = 0
if key_node.tag == 'tag:yaml.org,2002:value': while index < len(node.value):
return self.construct_scalar(value_node) key_node, value_node = node.value[index]
return BaseConstructor.construct_scalar(self, node) if key_node.tag == 'tag:yaml.org,2002:merge':
del node.value[index]
if isinstance(value_node, MappingNode):
self.flatten_mapping(value_node)
merge.extend(value_node.value)
elif isinstance(value_node, SequenceNode):
submerge = []
for subnode in value_node.value:
if not isinstance(subnode, MappingNode):
raise ConstructorError("while constructing a mapping",
node.start_mark,
"expected a mapping for merging, but found %s"
% subnode.id, subnode.start_mark)
self.flatten_mapping(subnode)
submerge.append(subnode.value)
submerge.reverse()
for value in submerge:
merge.extend(value)
else:
raise ConstructorError("while constructing a mapping", node.start_mark,
"expected a mapping or list of mappings for merging, but found %s"
% value_node.id, value_node.start_mark)
elif key_node.tag == 'tag:yaml.org,2002:value':
key_node.tag = 'tag:yaml.org,2002:str'
index += 1
else:
index += 1
if merge:
node.value = merge + node.value
def flatten_mapping(self, node): def construct_mapping(self, node, deep=False):
merge = [] if isinstance(node, MappingNode):
index = 0 self.flatten_mapping(node)
while index < len(node.value): return BaseConstructor.construct_mapping(self, node, deep=deep)
key_node, value_node = node.value[index]
if key_node.tag == 'tag:yaml.org,2002:merge':
del node.value[index]
if isinstance(value_node, MappingNode):
self.flatten_mapping(value_node)
merge.extend(value_node.value)
elif isinstance(value_node, SequenceNode):
submerge = []
for subnode in value_node.value:
if not isinstance(subnode, MappingNode):
raise ConstructorError("while constructing a mapping",
node.start_mark,
"expected a mapping for merging, but found %s"
% subnode.id, subnode.start_mark)
self.flatten_mapping(subnode)
submerge.append(subnode.value)
submerge.reverse()
for value in submerge:
merge.extend(value)
else:
raise ConstructorError("while constructing a mapping", node.start_mark,
"expected a mapping or list of mappings for merging, but found %s"
% value_node.id, value_node.start_mark)
elif key_node.tag == 'tag:yaml.org,2002:value':
key_node.tag = 'tag:yaml.org,2002:str'
index += 1
else:
index += 1
if merge:
node.value = merge + node.value
def construct_mapping(self, node, deep=False): @marked
if isinstance(node, MappingNode): def construct_yaml_null(self, node):
self.flatten_mapping(node) self.construct_scalar(node)
return BaseConstructor.construct_mapping(self, node, deep=deep) return None
@marked @marked
def construct_yaml_null(self, node): def construct_yaml_bool(self, node):
self.construct_scalar(node) value = self.construct_scalar(node).value
return None return bool(value)
bool_values = { @marked
'yes': True, def construct_yaml_int(self, node):
'no': False, value = self.construct_scalar(node).value
'true': True, sign = +1
'false': False, if value[0] == '-':
'on': True, sign = -1
'off': False, if value[0] in '+-':
} value = value[1:]
if value == '0':
return 0
else:
return sign * int(value)
@marked @marked
def construct_yaml_bool(self, node): def construct_yaml_float(self, node):
value = self.construct_scalar(node) value = self.construct_scalar(node).value
return self.bool_values[value.lower()] sign = +1
if value[0] == '-':
sign = -1
if value[0] in '+-':
value = value[1:]
else:
return sign * float(value)
@marked def construct_yaml_str(self, node):
def construct_yaml_int(self, node): return self.construct_scalar(node)
value = self.construct_scalar(node)
value = value.replace('_', '')
sign = +1
if value[0] == '-':
sign = -1
if value[0] in '+-':
value = value[1:]
if value == '0':
return 0
elif value.startswith('0b'):
return sign*int(value[2:], 2)
elif value.startswith('0x'):
return sign*int(value[2:], 16)
elif value[0] == '0':
return sign*int(value, 8)
elif ':' in value:
digits = [int(part) for part in value.split(':')]
digits.reverse()
base = 1
value = 0
for digit in digits:
value += digit*base
base *= 60
return sign*value
else:
return sign*int(value)
@marked def construct_yaml_seq(self, node):
def construct_yaml_float(self, node): data = gen_marked_value([], node.start_mark)
value = self.construct_scalar(node) yield data
value = value.replace('_', '').lower() data.extend(self.construct_sequence(node))
sign = +1
if value[0] == '-':
sign = -1
if value[0] in '+-':
value = value[1:]
elif ':' in value:
digits = [float(part) for part in value.split(':')]
digits.reverse()
base = 1
value = 0.0
for digit in digits:
value += digit*base
base *= 60
return sign*value
else:
return sign*float(value)
def construct_yaml_str(self, node): def construct_yaml_map(self, node):
return self.construct_scalar(node) data = gen_marked_value({}, node.start_mark)
yield data
value = self.construct_mapping(node)
data.update(value)
def construct_yaml_seq(self, node): def construct_undefined(self, node):
data = gen_marked_value([], node.start_mark) raise ConstructorError(None, None,
yield data "could not determine a constructor for the tag %r" % node.tag,
data.extend(self.construct_sequence(node)) node.start_mark)
def construct_yaml_map(self, node):
data = gen_marked_value({}, node.start_mark)
yield data
value = self.construct_mapping(node)
data.update(value)
def construct_undefined(self, node):
raise ConstructorError(None, None,
"could not determine a constructor for the tag %r" % node.tag,
node.start_mark)
Constructor.add_constructor( Constructor.add_constructor(
'tag:yaml.org,2002:null', 'tag:yaml.org,2002:null',
Constructor.construct_yaml_null) Constructor.construct_yaml_null)
Constructor.add_constructor( Constructor.add_constructor(
'tag:yaml.org,2002:bool', 'tag:yaml.org,2002:bool',
Constructor.construct_yaml_bool) Constructor.construct_yaml_bool)
Constructor.add_constructor( Constructor.add_constructor(
'tag:yaml.org,2002:int', 'tag:yaml.org,2002:int',
Constructor.construct_yaml_int) Constructor.construct_yaml_int)
Constructor.add_constructor( Constructor.add_constructor(
'tag:yaml.org,2002:float', 'tag:yaml.org,2002:float',
Constructor.construct_yaml_float) Constructor.construct_yaml_float)
Constructor.add_constructor( Constructor.add_constructor(
'tag:yaml.org,2002:str', 'tag:yaml.org,2002:str',
Constructor.construct_yaml_str) Constructor.construct_yaml_str)
Constructor.add_constructor( Constructor.add_constructor(
'tag:yaml.org,2002:seq', 'tag:yaml.org,2002:seq',
Constructor.construct_yaml_seq) Constructor.construct_yaml_seq)
Constructor.add_constructor( Constructor.add_constructor(
'tag:yaml.org,2002:map', 'tag:yaml.org,2002:map',
Constructor.construct_yaml_map) Constructor.construct_yaml_map)
Constructor.add_constructor(None, Constructor.add_constructor(None,
Constructor.construct_undefined) Constructor.construct_undefined)

View File

@ -1,4 +1,3 @@
__all__ = ['Mark', 'YAMLError', 'MarkedYAMLError', 'echoerr'] __all__ = ['Mark', 'YAMLError', 'MarkedYAMLError', 'echoerr']
@ -6,82 +5,84 @@ import sys
def strtrans(s): def strtrans(s):
return s.replace('\t', '>---') return s.replace('\t', '>---')
class Mark: class Mark:
def __init__(self, name, index, line, column, buffer, pointer):
self.name = name
self.index = index
self.line = line
self.column = column
self.buffer = buffer
self.pointer = pointer
def __init__(self, name, index, line, column, buffer, pointer): def get_snippet(self, indent=4, max_length=75):
self.name = name if self.buffer is None:
self.index = index return None
self.line = line head = ''
self.column = column start = self.pointer
self.buffer = buffer while start > 0 and self.buffer[start - 1] not in '\0\n':
self.pointer = pointer start -= 1
if self.pointer - start > max_length / 2 - 1:
head = ' ... '
start += 5
break
tail = ''
end = self.pointer
while end < len(self.buffer) and self.buffer[end] not in '\0\n':
end += 1
if end - self.pointer > max_length / 2 - 1:
tail = ' ... '
end -= 5
break
snippet = [self.buffer[start:self.pointer], self.buffer[self.pointer], self.buffer[self.pointer + 1:end]]
snippet = [strtrans(s) for s in snippet]
return ' ' * indent + head + ''.join(snippet) + tail + '\n' \
+ ' ' * (indent + len(head) + len(snippet[0])) + '^'
def get_snippet(self, indent=4, max_length=75): def __str__(self):
if self.buffer is None: snippet = self.get_snippet()
return None where = " in \"%s\", line %d, column %d" \
head = '' % (self.name, self.line + 1, self.column + 1)
start = self.pointer if snippet is not None:
while start > 0 and self.buffer[start-1] not in '\0\n': where += ":\n" + snippet
start -= 1 if type(where) is str:
if self.pointer-start > max_length/2-1: return where
head = ' ... ' else:
start += 5 return where.encode('utf-8')
break
tail = ''
end = self.pointer
while end < len(self.buffer) and self.buffer[end] not in '\0\n':
end += 1
if end-self.pointer > max_length/2-1:
tail = ' ... '
end -= 5
break
snippet = [self.buffer[start:self.pointer], self.buffer[self.pointer], self.buffer[self.pointer+1:end]]
snippet = [strtrans(s) for s in snippet]
return ' ' * indent + head + ''.join(snippet) + tail + '\n' \
+ ' ' * (indent + len(head) + len(snippet[0])) + '^'
def __str__(self):
snippet = self.get_snippet()
where = " in \"%s\", line %d, column %d" \
% (self.name, self.line+1, self.column+1)
if snippet is not None:
where += ":\n" + snippet
if type(where) is str:
return where
else:
return where.encode('utf-8')
class YAMLError(Exception): class YAMLError(Exception):
pass pass
def echoerr(*args, **kwargs): def echoerr(*args, **kwargs):
sys.stderr.write('\n') sys.stderr.write('\n')
sys.stderr.write(format_error(*args, **kwargs) + '\n') sys.stderr.write(format_error(*args, **kwargs) + '\n')
def format_error(context=None, context_mark=None, problem=None, problem_mark=None, note=None): def format_error(context=None, context_mark=None, problem=None, problem_mark=None, note=None):
lines = [] lines = []
if context is not None: if context is not None:
lines.append(context) lines.append(context)
if context_mark is not None \ if context_mark is not None \
and (problem is None or problem_mark is None and (problem is None or problem_mark is None
or context_mark.name != problem_mark.name or context_mark.name != problem_mark.name
or context_mark.line != problem_mark.line or context_mark.line != problem_mark.line
or context_mark.column != problem_mark.column): or context_mark.column != problem_mark.column):
lines.append(str(context_mark)) lines.append(str(context_mark))
if problem is not None: if problem is not None:
lines.append(problem) lines.append(problem)
if problem_mark is not None: if problem_mark is not None:
lines.append(str(problem_mark)) lines.append(str(problem_mark))
if note is not None: if note is not None:
lines.append(note) lines.append(note)
return '\n'.join(lines) return '\n'.join(lines)
class MarkedYAMLError(YAMLError): class MarkedYAMLError(YAMLError):
def __init__(self, context=None, context_mark=None,
def __init__(self, context=None, context_mark=None, problem=None, problem_mark=None, note=None):
problem=None, problem_mark=None, note=None): YAMLError.__init__(self, format_error(context, context_mark, problem,
YAMLError.__init__(self, format_error(context, context_mark, problem, problem_mark, note))
problem_mark, note))

View File

@ -1,83 +1,97 @@
# Abstract classes. # Abstract classes.
class Event(object): class Event(object):
def __init__(self, start_mark=None, end_mark=None): def __init__(self, start_mark=None, end_mark=None):
self.start_mark = start_mark self.start_mark = start_mark
self.end_mark = end_mark self.end_mark = end_mark
def __repr__(self):
attributes = [key for key in ['implicit', 'value'] def __repr__(self):
if hasattr(self, key)] attributes = [key for key in ['implicit', 'value']
arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) if hasattr(self, key)]
for key in attributes]) arguments = ', '.join(['%s=%r' % (key, getattr(self, key))
return '%s(%s)' % (self.__class__.__name__, arguments) for key in attributes])
return '%s(%s)' % (self.__class__.__name__, arguments)
class NodeEvent(Event): class NodeEvent(Event):
def __init__(self, start_mark=None, end_mark=None): def __init__(self, start_mark=None, end_mark=None):
self.start_mark = start_mark self.start_mark = start_mark
self.end_mark = end_mark self.end_mark = end_mark
class CollectionStartEvent(NodeEvent): class CollectionStartEvent(NodeEvent):
def __init__(self, implicit, start_mark=None, end_mark=None, def __init__(self, implicit, start_mark=None, end_mark=None,
flow_style=None): flow_style=None):
self.tag = None self.tag = None
self.implicit = implicit self.implicit = implicit
self.start_mark = start_mark self.start_mark = start_mark
self.end_mark = end_mark self.end_mark = end_mark
self.flow_style = flow_style self.flow_style = flow_style
class CollectionEndEvent(Event): class CollectionEndEvent(Event):
pass pass
# Implementations. # Implementations.
class StreamStartEvent(Event): class StreamStartEvent(Event):
def __init__(self, start_mark=None, end_mark=None, encoding=None): def __init__(self, start_mark=None, end_mark=None, encoding=None):
self.start_mark = start_mark self.start_mark = start_mark
self.end_mark = end_mark self.end_mark = end_mark
self.encoding = encoding self.encoding = encoding
class StreamEndEvent(Event): class StreamEndEvent(Event):
pass pass
class DocumentStartEvent(Event): class DocumentStartEvent(Event):
def __init__(self, start_mark=None, end_mark=None, def __init__(self, start_mark=None, end_mark=None,
explicit=None, version=None, tags=None): explicit=None, version=None, tags=None):
self.start_mark = start_mark self.start_mark = start_mark
self.end_mark = end_mark self.end_mark = end_mark
self.explicit = explicit self.explicit = explicit
self.version = version self.version = version
self.tags = tags self.tags = tags
class DocumentEndEvent(Event): class DocumentEndEvent(Event):
def __init__(self, start_mark=None, end_mark=None, def __init__(self, start_mark=None, end_mark=None,
explicit=None): explicit=None):
self.start_mark = start_mark self.start_mark = start_mark
self.end_mark = end_mark self.end_mark = end_mark
self.explicit = explicit self.explicit = explicit
class AliasEvent(NodeEvent): class AliasEvent(NodeEvent):
pass pass
class ScalarEvent(NodeEvent): class ScalarEvent(NodeEvent):
def __init__(self, implicit, value, def __init__(self, implicit, value,
start_mark=None, end_mark=None, style=None): start_mark=None, end_mark=None, style=None):
self.tag = None self.tag = None
self.implicit = implicit self.implicit = implicit
self.value = value self.value = value
self.start_mark = start_mark self.start_mark = start_mark
self.end_mark = end_mark self.end_mark = end_mark
self.style = style self.style = style
class SequenceStartEvent(CollectionStartEvent): class SequenceStartEvent(CollectionStartEvent):
pass pass
class SequenceEndEvent(CollectionEndEvent): class SequenceEndEvent(CollectionEndEvent):
pass pass
class MappingStartEvent(CollectionStartEvent): class MappingStartEvent(CollectionStartEvent):
pass pass
class MappingEndEvent(CollectionEndEvent): class MappingEndEvent(CollectionEndEvent):
pass pass

View File

@ -1,26 +1,24 @@
__all__ = ['Loader'] __all__ = ['Loader']
from .reader import * from .reader import Reader
from .scanner import * from .scanner import Scanner
from .parser import * from .parser import Parser
from .composer import * from .composer import Composer
from .constructor import * from .constructor import Constructor
from .resolver import * from .resolver import Resolver
from .error import echoerr from .error import echoerr
class Loader(Reader, Scanner, Parser, Composer, Constructor, Resolver): class Loader(Reader, Scanner, Parser, Composer, Constructor, Resolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
Constructor.__init__(self)
Resolver.__init__(self)
self.haserrors = False
def __init__(self, stream): def echoerr(self, *args, **kwargs):
Reader.__init__(self, stream) echoerr(*args, **kwargs)
Scanner.__init__(self) self.haserrors = True
Parser.__init__(self)
Composer.__init__(self)
Constructor.__init__(self)
Resolver.__init__(self)
self.haserrors = False
def echoerr(self, *args, **kwargs):
echoerr(*args, **kwargs)
self.haserrors = True

View File

@ -1,26 +1,29 @@
__all__ = ['gen_marked_value', 'MarkedValue'] __all__ = ['gen_marked_value', 'MarkedValue']
class MarkedValue: class MarkedValue:
def __init__(self, value, mark): def __init__(self, value, mark):
self.mark = mark self.mark = mark
self.value = value self.value = value
classcache = {} classcache = {}
def gen_marked_value(value, mark):
if value.__class__ in classcache:
Marked = classcache[value.__class__]
else:
class Marked(MarkedValue):
for func in value.__class__.__dict__:
if func not in set(('__init__', '__new__', '__getattribute__')):
if func in set(('__eq__',)):
# HACK to make marked dictionaries always work
exec (('def {0}(self, *args):\n'
' return self.value.{0}(*[arg.value if isinstance(arg, MarkedValue) else arg for arg in args])').format(func))
else:
exec (('def {0}(self, *args, **kwargs):\n'
' return self.value.{0}(*args, **kwargs)\n').format(func))
classcache[value.__class__] = Marked
return Marked(value, mark) def gen_marked_value(value, mark):
if value.__class__ in classcache:
Marked = classcache[value.__class__]
else:
class Marked(MarkedValue):
for func in value.__class__.__dict__:
if func not in set(('__init__', '__new__', '__getattribute__')):
if func in set(('__eq__',)):
# HACK to make marked dictionaries always work
exec (('def {0}(self, *args):\n'
' return self.value.{0}(*[arg.value if isinstance(arg, MarkedValue) else arg for arg in args])').format(func))
else:
exec (('def {0}(self, *args, **kwargs):\n'
' return self.value.{0}(*args, **kwargs)\n').format(func))
classcache[value.__class__] = Marked
return Marked(value, mark)

View File

@ -1,49 +1,53 @@
class Node(object): class Node(object):
def __init__(self, tag, value, start_mark, end_mark): def __init__(self, tag, value, start_mark, end_mark):
self.tag = tag self.tag = tag
self.value = value self.value = value
self.start_mark = start_mark self.start_mark = start_mark
self.end_mark = end_mark self.end_mark = end_mark
def __repr__(self):
value = self.value def __repr__(self):
#if isinstance(value, list): value = self.value
# if len(value) == 0: #if isinstance(value, list):
# value = '<empty>' # if len(value) == 0:
# elif len(value) == 1: # value = '<empty>'
# value = '<1 item>' # elif len(value) == 1:
# else: # value = '<1 item>'
# value = '<%d items>' % len(value) # else:
#else: # value = '<%d items>' % len(value)
# if len(value) > 75: #else:
# value = repr(value[:70]+u' ... ') # if len(value) > 75:
# else: # value = repr(value[:70]+u' ... ')
# value = repr(value) # else:
value = repr(value) # value = repr(value)
return '%s(tag=%r, value=%s)' % (self.__class__.__name__, self.tag, value) value = repr(value)
return '%s(tag=%r, value=%s)' % (self.__class__.__name__, self.tag, value)
class ScalarNode(Node): class ScalarNode(Node):
id = 'scalar' id = 'scalar'
def __init__(self, tag, value,
start_mark=None, end_mark=None, style=None): def __init__(self, tag, value,
self.tag = tag start_mark=None, end_mark=None, style=None):
self.value = value self.tag = tag
self.start_mark = start_mark self.value = value
self.end_mark = end_mark self.start_mark = start_mark
self.style = style self.end_mark = end_mark
self.style = style
class CollectionNode(Node): class CollectionNode(Node):
def __init__(self, tag, value, def __init__(self, tag, value,
start_mark=None, end_mark=None, flow_style=None): start_mark=None, end_mark=None, flow_style=None):
self.tag = tag self.tag = tag
self.value = value self.value = value
self.start_mark = start_mark self.start_mark = start_mark
self.end_mark = end_mark self.end_mark = end_mark
self.flow_style = flow_style self.flow_style = flow_style
class SequenceNode(CollectionNode): class SequenceNode(CollectionNode):
id = 'sequence' id = 'sequence'
class MappingNode(CollectionNode): class MappingNode(CollectionNode):
id = 'mapping' id = 'mapping'

View File

@ -1,255 +1,240 @@
__all__ = ['Parser', 'ParserError'] __all__ = ['Parser', 'ParserError']
from .error import MarkedYAMLError from .error import MarkedYAMLError
from .tokens import * from .tokens import * # NOQA
from .events import * from .events import * # NOQA
from .scanner import *
class ParserError(MarkedYAMLError): class ParserError(MarkedYAMLError):
pass pass
class Parser: class Parser:
# Since writing a recursive-descendant parser is a straightforward task, we def __init__(self):
# do not give many comments here. self.current_event = None
self.yaml_version = None
self.states = []
self.marks = []
self.state = self.parse_stream_start
DEFAULT_TAGS = { def dispose(self):
'!': '!', # Reset the state attributes (to clear self-references)
'!!': 'tag:yaml.org,2002:', self.states = []
} self.state = None
def __init__(self): def check_event(self, *choices):
self.current_event = None # Check the type of the next event.
self.yaml_version = None if self.current_event is None:
self.tag_handles = {} if self.state:
self.states = [] self.current_event = self.state()
self.marks = [] if self.current_event is not None:
self.state = self.parse_stream_start if not choices:
return True
for choice in choices:
if isinstance(self.current_event, choice):
return True
return False
def dispose(self): def peek_event(self):
# Reset the state attributes (to clear self-references) # Get the next event.
self.states = [] if self.current_event is None:
self.state = None if self.state:
self.current_event = self.state()
return self.current_event
def check_event(self, *choices): def get_event(self):
# Check the type of the next event. # Get the next event and proceed further.
if self.current_event is None: if self.current_event is None:
if self.state: if self.state:
self.current_event = self.state() self.current_event = self.state()
if self.current_event is not None: value = self.current_event
if not choices: self.current_event = None
return True return value
for choice in choices:
if isinstance(self.current_event, choice):
return True
return False
def peek_event(self): # stream ::= STREAM-START implicit_document? explicit_document* STREAM-END
# Get the next event. # implicit_document ::= block_node DOCUMENT-END*
if self.current_event is None: # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
if self.state:
self.current_event = self.state()
return self.current_event
def get_event(self): def parse_stream_start(self):
# Get the next event and proceed further. # Parse the stream start.
if self.current_event is None: token = self.get_token()
if self.state: event = StreamStartEvent(token.start_mark, token.end_mark,
self.current_event = self.state() encoding=token.encoding)
value = self.current_event
self.current_event = None
return value
# stream ::= STREAM-START implicit_document? explicit_document* STREAM-END # Prepare the next state.
# implicit_document ::= block_node DOCUMENT-END* self.state = self.parse_implicit_document_start
# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
def parse_stream_start(self): return event
# Parse the stream start. def parse_implicit_document_start(self):
token = self.get_token() # Parse an implicit document.
event = StreamStartEvent(token.start_mark, token.end_mark, if not self.check_token(StreamEndToken):
encoding=token.encoding) token = self.peek_token()
start_mark = end_mark = token.start_mark
event = DocumentStartEvent(start_mark, end_mark, explicit=False)
# Prepare the next state. # Prepare the next state.
self.state = self.parse_implicit_document_start self.states.append(self.parse_document_end)
self.state = self.parse_node
return event return event
def parse_implicit_document_start(self): else:
return self.parse_document_start()
# Parse an implicit document. def parse_document_start(self):
if not self.check_token(StreamEndToken): # Parse an explicit document.
self.tag_handles = self.DEFAULT_TAGS if not self.check_token(StreamEndToken):
token = self.peek_token() token = self.peek_token()
start_mark = end_mark = token.start_mark self.echoerr(None, None,
event = DocumentStartEvent(start_mark, end_mark, explicit=False) "expected '<stream end>', but found %r" % token.id,
token.start_mark)
return StreamEndEvent(token.start_mark, token.end_mark)
else:
# Parse the end of the stream.
token = self.get_token()
event = StreamEndEvent(token.start_mark, token.end_mark)
assert not self.states
assert not self.marks
self.state = None
return event
# Prepare the next state. def parse_document_end(self):
self.states.append(self.parse_document_end) # Parse the document end.
self.state = self.parse_node token = self.peek_token()
start_mark = end_mark = token.start_mark
explicit = False
event = DocumentEndEvent(start_mark, end_mark, explicit=explicit)
return event # Prepare the next state.
self.state = self.parse_document_start
else: return event
return self.parse_document_start()
def parse_document_start(self): def parse_document_content(self):
return self.parse_node()
# Parse an explicit document. def parse_node(self, indentless_sequence=False):
if not self.check_token(StreamEndToken): start_mark = end_mark = None
token = self.peek_token() if start_mark is None:
start_mark = token.start_mark start_mark = end_mark = self.peek_token().start_mark
self.echoerr(None, None, event = None
"expected '<stream end>', but found %r" implicit = True
% self.peek_token().id, if self.check_token(ScalarToken):
self.peek_token().start_mark) token = self.get_token()
return StreamEndEvent(token.start_mark, token.end_mark) end_mark = token.end_mark
else: if token.plain:
# Parse the end of the stream. implicit = (True, False)
token = self.get_token() else:
event = StreamEndEvent(token.start_mark, token.end_mark) implicit = (False, True)
assert not self.states event = ScalarEvent(implicit, token.value,
assert not self.marks start_mark, end_mark, style=token.style)
self.state = None self.state = self.states.pop()
return event elif self.check_token(FlowSequenceStartToken):
end_mark = self.peek_token().end_mark
event = SequenceStartEvent(implicit,
start_mark, end_mark, flow_style=True)
self.state = self.parse_flow_sequence_first_entry
elif self.check_token(FlowMappingStartToken):
end_mark = self.peek_token().end_mark
event = MappingStartEvent(implicit,
start_mark, end_mark, flow_style=True)
self.state = self.parse_flow_mapping_first_key
else:
token = self.peek_token()
raise ParserError("while parsing a flow node", start_mark,
"expected the node content, but found %r" % token.id,
token.start_mark)
return event
def parse_document_end(self): def parse_flow_sequence_first_entry(self):
token = self.get_token()
self.marks.append(token.start_mark)
return self.parse_flow_sequence_entry(first=True)
# Parse the document end. def parse_flow_sequence_entry(self, first=False):
token = self.peek_token() if not self.check_token(FlowSequenceEndToken):
start_mark = end_mark = token.start_mark if not first:
explicit = False if self.check_token(FlowEntryToken):
event = DocumentEndEvent(start_mark, end_mark, explicit=explicit) self.get_token()
if self.check_token(FlowSequenceEndToken):
token = self.peek_token()
self.echoerr("While parsing a flow sequence", self.marks[-1],
"expected sequence value, but got %r" % token.id, token.start_mark)
else:
token = self.peek_token()
raise ParserError("while parsing a flow sequence", self.marks[-1],
"expected ',' or ']', but got %r" % token.id, token.start_mark)
# Prepare the next state. if not self.check_token(FlowSequenceEndToken):
self.state = self.parse_document_start self.states.append(self.parse_flow_sequence_entry)
return self.parse_node()
token = self.get_token()
event = SequenceEndEvent(token.start_mark, token.end_mark)
self.state = self.states.pop()
self.marks.pop()
return event
return event def parse_flow_sequence_entry_mapping_end(self):
self.state = self.parse_flow_sequence_entry
token = self.peek_token()
return MappingEndEvent(token.start_mark, token.start_mark)
def parse_document_content(self): def parse_flow_mapping_first_key(self):
return self.parse_node() token = self.get_token()
self.marks.append(token.start_mark)
return self.parse_flow_mapping_key(first=True)
def parse_node(self, indentless_sequence=False): def parse_flow_mapping_key(self, first=False):
start_mark = end_mark = tag_mark = None if not self.check_token(FlowMappingEndToken):
if start_mark is None: if not first:
start_mark = end_mark = self.peek_token().start_mark if self.check_token(FlowEntryToken):
event = None self.get_token()
implicit = True if self.check_token(FlowMappingEndToken):
if self.check_token(ScalarToken): token = self.peek_token()
token = self.get_token() self.echoerr("While parsing a flow mapping", self.marks[-1],
end_mark = token.end_mark "expected mapping key, but got %r" % token.id, token.start_mark)
if token.plain: else:
implicit = (True, False) token = self.peek_token()
else: raise ParserError("while parsing a flow mapping", self.marks[-1],
implicit = (False, True) "expected ',' or '}', but got %r" % token.id, token.start_mark)
event = ScalarEvent(implicit, token.value, if self.check_token(KeyToken):
start_mark, end_mark, style=token.style) token = self.get_token()
self.state = self.states.pop() if not self.check_token(ValueToken,
elif self.check_token(FlowSequenceStartToken): FlowEntryToken, FlowMappingEndToken):
end_mark = self.peek_token().end_mark self.states.append(self.parse_flow_mapping_value)
event = SequenceStartEvent(implicit, return self.parse_node()
start_mark, end_mark, flow_style=True) else:
self.state = self.parse_flow_sequence_first_entry token = self.peek_token()
elif self.check_token(FlowMappingStartToken): raise ParserError("while parsing a flow mapping", self.marks[-1],
end_mark = self.peek_token().end_mark "expected value, but got %r" % token.id, token.start_mark)
event = MappingStartEvent(implicit, elif not self.check_token(FlowMappingEndToken):
start_mark, end_mark, flow_style=True) token = self.peek_token()
self.state = self.parse_flow_mapping_first_key expect_key = self.check_token(ValueToken, FlowEntryToken)
else: if not expect_key:
token = self.peek_token() self.get_token()
raise ParserError("while parsing a flow node", start_mark, expect_key = self.check_token(ValueToken)
"expected the node content, but found %r" % token.id,
token.start_mark)
return event
def parse_flow_sequence_first_entry(self): if expect_key:
token = self.get_token() raise ParserError("while parsing a flow mapping", self.marks[-1],
self.marks.append(token.start_mark) "expected string key, but got %r" % token.id, token.start_mark)
return self.parse_flow_sequence_entry(first=True) else:
token = self.peek_token()
raise ParserError("while parsing a flow mapping", self.marks[-1],
"expected ':', but got %r" % token.id, token.start_mark)
token = self.get_token()
event = MappingEndEvent(token.start_mark, token.end_mark)
self.state = self.states.pop()
self.marks.pop()
return event
def parse_flow_sequence_entry(self, first=False): def parse_flow_mapping_value(self):
if not self.check_token(FlowSequenceEndToken): if self.check_token(ValueToken):
if not first: token = self.get_token()
if self.check_token(FlowEntryToken): if not self.check_token(FlowEntryToken, FlowMappingEndToken):
self.get_token() self.states.append(self.parse_flow_mapping_key)
if self.check_token(FlowSequenceEndToken): return self.parse_node()
token = self.peek_token()
self.echoerr("While parsing a flow sequence", self.marks[-1],
"expected sequence value, but got %r" % token.id, token.start_mark)
else:
token = self.peek_token()
raise ParserError("while parsing a flow sequence", self.marks[-1],
"expected ',' or ']', but got %r" % token.id, token.start_mark)
if not self.check_token(FlowSequenceEndToken): token = self.peek_token()
self.states.append(self.parse_flow_sequence_entry) raise ParserError("while parsing a flow mapping", self.marks[-1],
return self.parse_node() "expected mapping value, but got %r" % token.id, token.start_mark)
token = self.get_token()
event = SequenceEndEvent(token.start_mark, token.end_mark)
self.state = self.states.pop()
self.marks.pop()
return event
def parse_flow_sequence_entry_mapping_end(self):
self.state = self.parse_flow_sequence_entry
token = self.peek_token()
return MappingEndEvent(token.start_mark, token.start_mark)
def parse_flow_mapping_first_key(self):
token = self.get_token()
self.marks.append(token.start_mark)
return self.parse_flow_mapping_key(first=True)
def parse_flow_mapping_key(self, first=False):
if not self.check_token(FlowMappingEndToken):
if not first:
if self.check_token(FlowEntryToken):
self.get_token()
if self.check_token(FlowMappingEndToken):
token = self.peek_token()
self.echoerr("While parsing a flow mapping", self.marks[-1],
"expected mapping key, but got %r" % token.id, token.start_mark)
else:
token = self.peek_token()
raise ParserError("while parsing a flow mapping", self.marks[-1],
"expected ',' or '}', but got %r" % token.id, token.start_mark)
if self.check_token(KeyToken):
token = self.get_token()
if not self.check_token(ValueToken,
FlowEntryToken, FlowMappingEndToken):
self.states.append(self.parse_flow_mapping_value)
return self.parse_node()
else:
token = self.peek_token()
raise ParserError("while parsing a flow mapping", self.marks[-1],
"expected value, but got %r" % token.id, token.start_mark)
elif not self.check_token(FlowMappingEndToken):
token = self.peek_token()
expect_key = self.check_token(ValueToken, FlowEntryToken)
if not expect_key:
self.get_token()
expect_key = self.check_token(ValueToken)
if expect_key:
raise ParserError("while parsing a flow mapping", self.marks[-1],
"expected string key, but got %r" % token.id, token.start_mark)
else:
token = self.peek_token()
raise ParserError("while parsing a flow mapping", self.marks[-1],
"expected ':', but got %r" % token.id, token.start_mark)
token = self.get_token()
event = MappingEndEvent(token.start_mark, token.end_mark)
self.state = self.states.pop()
self.marks.pop()
return event
def parse_flow_mapping_value(self):
if self.check_token(ValueToken):
token = self.get_token()
if not self.check_token(FlowEntryToken, FlowMappingEndToken):
self.states.append(self.parse_flow_mapping_key)
return self.parse_node()
token = self.peek_token()
raise ParserError("while parsing a flow mapping", self.marks[-1],
"expected mapping value, but got %r" % token.id, token.start_mark)

View File

@ -1,160 +1,144 @@
# This module contains abstractions for the input stream. You don't have to # This module contains abstractions for the input stream. You don't have to
# looks further, there are no pretty code. # looks further, there are no pretty code.
#
# We define two classes here.
#
# Mark(source, line, column)
# It's just a record and its only use is producing nice error messages.
# Parser does not use it for any other purposes.
#
# Reader(source, data)
# Reader determines the encoding of `data` and converts it to unicode.
# Reader provides the following methods and attributes:
# reader.peek(length=1) - return the next `length` characters
# reader.forward(length=1) - move the current position to `length` characters.
# reader.index - the number of the current character.
# reader.line, stream.column - the line and the column of the current character.
__all__ = ['Reader', 'ReaderError'] __all__ = ['Reader', 'ReaderError']
from .error import YAMLError, Mark from .error import YAMLError, Mark
import codecs, re import codecs
import re
try: try:
from __builtin__ import unicode, unichr from __builtin__ import unicode, unichr
except ImportError: except ImportError:
unicode = str unicode = str # NOQA
unichr = chr unichr = chr # NOQA
class ReaderError(YAMLError): class ReaderError(YAMLError):
def __init__(self, name, position, character, encoding, reason):
self.name = name
self.character = character
self.position = position
self.encoding = encoding
self.reason = reason
def __init__(self, name, position, character, encoding, reason): def __str__(self):
self.name = name if isinstance(self.character, bytes):
self.character = character return "'%s' codec can't decode byte #x%02x: %s\n" \
self.position = position " in \"%s\", position %d" \
self.encoding = encoding % (self.encoding, ord(self.character), self.reason,
self.reason = reason self.name, self.position)
else:
return "unacceptable character #x%04x: %s\n" \
" in \"%s\", position %d" \
% (self.character, self.reason,
self.name, self.position)
def __str__(self):
if isinstance(self.character, bytes):
return "'%s' codec can't decode byte #x%02x: %s\n" \
" in \"%s\", position %d" \
% (self.encoding, ord(self.character), self.reason,
self.name, self.position)
else:
return "unacceptable character #x%04x: %s\n" \
" in \"%s\", position %d" \
% (self.character, self.reason,
self.name, self.position)
class Reader(object): class Reader(object):
# Reader: # Reader:
# - determines the data encoding and converts it to a unicode string, # - determines the data encoding and converts it to a unicode string,
# - checks if characters are in allowed range, # - checks if characters are in allowed range,
# - adds '\0' to the end. # - adds '\0' to the end.
# Reader accepts # Reader accepts
# - a file-like object with its `read` method returning `str`, # - a file-like object with its `read` method returning `str`,
# Yeah, it's ugly and slow. # Yeah, it's ugly and slow.
def __init__(self, stream):
self.name = None
self.stream = None
self.stream_pointer = 0
self.eof = True
self.buffer = ''
self.pointer = 0
self.full_buffer = unicode('')
self.full_pointer = 0
self.raw_buffer = None
self.raw_decode = codecs.utf_8_decode
self.encoding = 'utf-8'
self.index = 0
self.line = 0
self.column = 0
def __init__(self, stream): self.stream = stream
self.name = None self.name = getattr(stream, 'name', "<file>")
self.stream = None self.eof = False
self.stream_pointer = 0 self.raw_buffer = None
self.eof = True
self.buffer = ''
self.pointer = 0
self.full_buffer = unicode('')
self.full_pointer = 0
self.raw_buffer = None
self.raw_decode = codecs.utf_8_decode
self.encoding = 'utf-8'
self.index = 0
self.line = 0
self.column = 0
self.stream = stream while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2):
self.name = getattr(stream, 'name', "<file>") self.update_raw()
self.eof = False self.update(1)
self.raw_buffer = None
while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2): def peek(self, index=0):
self.update_raw() try:
self.update(1) return self.buffer[self.pointer + index]
except IndexError:
self.update(index + 1)
return self.buffer[self.pointer + index]
def peek(self, index=0): def prefix(self, length=1):
try: if self.pointer + length >= len(self.buffer):
return self.buffer[self.pointer+index] self.update(length)
except IndexError: return self.buffer[self.pointer:self.pointer + length]
self.update(index+1)
return self.buffer[self.pointer+index]
def prefix(self, length=1): def forward(self, length=1):
if self.pointer+length >= len(self.buffer): if self.pointer + length + 1 >= len(self.buffer):
self.update(length) self.update(length + 1)
return self.buffer[self.pointer:self.pointer+length] while length:
ch = self.buffer[self.pointer]
self.pointer += 1
self.full_pointer += 1
self.index += 1
if ch == '\n':
self.line += 1
self.column = 0
length -= 1
def forward(self, length=1): def get_mark(self):
if self.pointer+length+1 >= len(self.buffer): return Mark(self.name, self.index, self.line, self.column,
self.update(length+1) self.full_buffer, self.full_pointer)
while length:
ch = self.buffer[self.pointer]
self.pointer += 1
self.full_pointer += 1
self.index += 1
if ch == '\n':
self.line += 1
self.column = 0
length -= 1
def get_mark(self): NON_PRINTABLE = re.compile('[^\t\n\x20-\x7E' + unichr(0x85) + (unichr(0xA0) + '-' + unichr(0xD7FF)) + (unichr(0xE000) + '-' + unichr(0xFFFD)) + ']')
return Mark(self.name, self.index, self.line, self.column,
self.full_buffer, self.full_pointer)
NON_PRINTABLE = re.compile('[^\t\n\x20-\x7E' + unichr(0x85) + (unichr(0xA0)+'-'+unichr(0xD7FF)) + (unichr(0xE000)+'-'+unichr(0xFFFD)) + ']') def check_printable(self, data):
def check_printable(self, data): match = self.NON_PRINTABLE.search(data)
match = self.NON_PRINTABLE.search(data) if match:
if match: character = match.group()
character = match.group() position = self.index + (len(self.buffer) - self.pointer) + match.start()
position = self.index+(len(self.buffer)-self.pointer)+match.start() raise ReaderError(self.name, position, ord(character), 'unicode', "special characters are not allowed")
raise ReaderError(self.name, position, ord(character),
'unicode', "special characters are not allowed")
def update(self, length): def update(self, length):
if self.raw_buffer is None: if self.raw_buffer is None:
return return
self.buffer = self.buffer[self.pointer:] self.buffer = self.buffer[self.pointer:]
self.pointer = 0 self.pointer = 0
while len(self.buffer) < length: while len(self.buffer) < length:
if not self.eof: if not self.eof:
self.update_raw() self.update_raw()
try: try:
data, converted = self.raw_decode(self.raw_buffer, data, converted = self.raw_decode(self.raw_buffer,
'strict', self.eof) 'strict', self.eof)
except UnicodeDecodeError as exc: except UnicodeDecodeError as exc:
character = self.raw_buffer[exc.start] character = self.raw_buffer[exc.start]
position = self.stream_pointer-len(self.raw_buffer)+exc.start position = self.stream_pointer - len(self.raw_buffer) + exc.start
raise ReaderError(self.name, position, character, raise ReaderError(self.name, position, character, exc.encoding, exc.reason)
exc.encoding, exc.reason) self.check_printable(data)
self.check_printable(data) self.buffer += data
self.buffer += data self.full_buffer += data
self.full_buffer += data self.raw_buffer = self.raw_buffer[converted:]
self.raw_buffer = self.raw_buffer[converted:] if self.eof:
if self.eof: self.buffer += '\0'
self.buffer += '\0' self.raw_buffer = None
self.raw_buffer = None break
break
def update_raw(self, size=4096):
data = self.stream.read(size)
if self.raw_buffer is None:
self.raw_buffer = data
else:
self.raw_buffer += data
self.stream_pointer += len(data)
if not data:
self.eof = True
def update_raw(self, size=4096):
data = self.stream.read(size)
if self.raw_buffer is None:
self.raw_buffer = data
else:
self.raw_buffer += data
self.stream_pointer += len(data)
if not data:
self.eof = True

View File

@ -1,135 +1,131 @@
__all__ = ['BaseResolver', 'Resolver'] __all__ = ['BaseResolver', 'Resolver']
from .error import * from .error import MarkedYAMLError
from .nodes import * from .nodes import * # NOQA
import re import re
class ResolverError(MarkedYAMLError): class ResolverError(MarkedYAMLError):
pass pass
class BaseResolver: class BaseResolver:
DEFAULT_SCALAR_TAG = 'tag:yaml.org,2002:str'
DEFAULT_SEQUENCE_TAG = 'tag:yaml.org,2002:seq'
DEFAULT_MAPPING_TAG = 'tag:yaml.org,2002:map'
DEFAULT_SCALAR_TAG = 'tag:yaml.org,2002:str' yaml_implicit_resolvers = {}
DEFAULT_SEQUENCE_TAG = 'tag:yaml.org,2002:seq' yaml_path_resolvers = {}
DEFAULT_MAPPING_TAG = 'tag:yaml.org,2002:map'
yaml_implicit_resolvers = {} def __init__(self):
yaml_path_resolvers = {} self.resolver_exact_paths = []
self.resolver_prefix_paths = []
def __init__(self): @classmethod
self.resolver_exact_paths = [] def add_implicit_resolver(cls, tag, regexp, first):
self.resolver_prefix_paths = [] if not 'yaml_implicit_resolvers' in cls.__dict__:
cls.yaml_implicit_resolvers = cls.yaml_implicit_resolvers.copy()
if first is None:
first = [None]
for ch in first:
cls.yaml_implicit_resolvers.setdefault(ch, []).append((tag, regexp))
@classmethod def descend_resolver(self, current_node, current_index):
def add_implicit_resolver(cls, tag, regexp, first): if not self.yaml_path_resolvers:
if not 'yaml_implicit_resolvers' in cls.__dict__: return
cls.yaml_implicit_resolvers = cls.yaml_implicit_resolvers.copy() exact_paths = {}
if first is None: prefix_paths = []
first = [None] if current_node:
for ch in first: depth = len(self.resolver_prefix_paths)
cls.yaml_implicit_resolvers.setdefault(ch, []).append((tag, regexp)) for path, kind in self.resolver_prefix_paths[-1]:
if self.check_resolver_prefix(depth, path, kind,
current_node, current_index):
if len(path) > depth:
prefix_paths.append((path, kind))
else:
exact_paths[kind] = self.yaml_path_resolvers[path, kind]
else:
for path, kind in self.yaml_path_resolvers:
if not path:
exact_paths[kind] = self.yaml_path_resolvers[path, kind]
else:
prefix_paths.append((path, kind))
self.resolver_exact_paths.append(exact_paths)
self.resolver_prefix_paths.append(prefix_paths)
def descend_resolver(self, current_node, current_index): def ascend_resolver(self):
if not self.yaml_path_resolvers: if not self.yaml_path_resolvers:
return return
exact_paths = {} self.resolver_exact_paths.pop()
prefix_paths = [] self.resolver_prefix_paths.pop()
if current_node:
depth = len(self.resolver_prefix_paths)
for path, kind in self.resolver_prefix_paths[-1]:
if self.check_resolver_prefix(depth, path, kind,
current_node, current_index):
if len(path) > depth:
prefix_paths.append((path, kind))
else:
exact_paths[kind] = self.yaml_path_resolvers[path, kind]
else:
for path, kind in self.yaml_path_resolvers:
if not path:
exact_paths[kind] = self.yaml_path_resolvers[path, kind]
else:
prefix_paths.append((path, kind))
self.resolver_exact_paths.append(exact_paths)
self.resolver_prefix_paths.append(prefix_paths)
def ascend_resolver(self): def check_resolver_prefix(self, depth, path, kind,
if not self.yaml_path_resolvers: current_node, current_index):
return node_check, index_check = path[depth - 1]
self.resolver_exact_paths.pop() if isinstance(node_check, str):
self.resolver_prefix_paths.pop() if current_node.tag != node_check:
return
elif node_check is not None:
if not isinstance(current_node, node_check):
return
if index_check is True and current_index is not None:
return
if (index_check is False or index_check is None) \
and current_index is None:
return
if isinstance(index_check, str):
if not (isinstance(current_index, ScalarNode)
and index_check == current_index.value):
return
elif isinstance(index_check, int) and not isinstance(index_check, bool):
if index_check != current_index:
return
return True
def check_resolver_prefix(self, depth, path, kind, def resolve(self, kind, value, implicit, mark=None):
current_node, current_index): if kind is ScalarNode and implicit[0]:
node_check, index_check = path[depth-1] if value == '':
if isinstance(node_check, str): resolvers = self.yaml_implicit_resolvers.get('', [])
if current_node.tag != node_check: else:
return resolvers = self.yaml_implicit_resolvers.get(value[0], [])
elif node_check is not None: resolvers += self.yaml_implicit_resolvers.get(None, [])
if not isinstance(current_node, node_check): for tag, regexp in resolvers:
return if regexp.match(value):
if index_check is True and current_index is not None: return tag
return else:
if (index_check is False or index_check is None) \ self.echoerr('While resolving plain scalar', None,
and current_index is None: 'expected floating-point value, integer, null or boolean, but got %r' % value,
return mark)
if isinstance(index_check, str): return self.DEFAULT_SCALAR_TAG
if not (isinstance(current_index, ScalarNode) if kind is ScalarNode:
and index_check == current_index.value): return self.DEFAULT_SCALAR_TAG
return elif kind is SequenceNode:
elif isinstance(index_check, int) and not isinstance(index_check, bool): return self.DEFAULT_SEQUENCE_TAG
if index_check != current_index: elif kind is MappingNode:
return return self.DEFAULT_MAPPING_TAG
return True
def resolve(self, kind, value, implicit, mark=None):
if kind is ScalarNode and implicit[0]:
if value == '':
resolvers = self.yaml_implicit_resolvers.get('', [])
else:
resolvers = self.yaml_implicit_resolvers.get(value[0], [])
resolvers += self.yaml_implicit_resolvers.get(None, [])
for tag, regexp in resolvers:
if regexp.match(value):
return tag
else:
raise ResolverError('while resolving plain scalar', None,
"expected floating-point value, integer, null or boolean, but got %r" % value,
mark)
implicit = implicit[1]
if self.yaml_path_resolvers:
exact_paths = self.resolver_exact_paths[-1]
if kind in exact_paths:
return exact_paths[kind]
if None in exact_paths:
return exact_paths[None]
if kind is ScalarNode:
return self.DEFAULT_SCALAR_TAG
elif kind is SequenceNode:
return self.DEFAULT_SEQUENCE_TAG
elif kind is MappingNode:
return self.DEFAULT_MAPPING_TAG
class Resolver(BaseResolver): class Resolver(BaseResolver):
pass pass
Resolver.add_implicit_resolver( Resolver.add_implicit_resolver(
'tag:yaml.org,2002:bool', 'tag:yaml.org,2002:bool',
re.compile(r'''^(?:true|false)$''', re.X), re.compile(r'''^(?:true|false)$''', re.X),
list('yYnNtTfFoO')) list('yYnNtTfFoO'))
Resolver.add_implicit_resolver( Resolver.add_implicit_resolver(
'tag:yaml.org,2002:float', 'tag:yaml.org,2002:float',
re.compile(r'^-?(?:0|[1-9]\d*)(?=[.eE])(?:\.\d+)?(?:[eE][-+]?\d+)?$', re.X), re.compile(r'^-?(?:0|[1-9]\d*)(?=[.eE])(?:\.\d+)?(?:[eE][-+]?\d+)?$', re.X),
list('-0123456789')) list('-0123456789'))
Resolver.add_implicit_resolver( Resolver.add_implicit_resolver(
'tag:yaml.org,2002:int', 'tag:yaml.org,2002:int',
re.compile(r'^(?:0|-?[1-9]\d*)$', re.X), re.compile(r'^(?:0|-?[1-9]\d*)$', re.X),
list('-0123456789')) list('-0123456789'))
Resolver.add_implicit_resolver( Resolver.add_implicit_resolver(
'tag:yaml.org,2002:null', 'tag:yaml.org,2002:null',
re.compile(r'^null$', re.X), re.compile(r'^null$', re.X),
['n']) ['n'])

View File

@ -1,4 +1,3 @@
# Scanner produces tokens of the following types: # Scanner produces tokens of the following types:
# STREAM-START # STREAM-START
# STREAM-END # STREAM-END
@ -11,467 +10,459 @@
# FLOW-ENTRY # FLOW-ENTRY
# KEY # KEY
# VALUE # VALUE
# ALIAS(value)
# ANCHOR(value)
# TAG(value)
# SCALAR(value, plain, style) # SCALAR(value, plain, style)
# #
# Read comments in the Scanner code for more details. # Read comments in the Scanner code for more details.
#
__all__ = ['Scanner', 'ScannerError'] __all__ = ['Scanner', 'ScannerError']
from .error import MarkedYAMLError from .error import MarkedYAMLError
from .tokens import * from .tokens import * # NOQA
class ScannerError(MarkedYAMLError): class ScannerError(MarkedYAMLError):
pass pass
class SimpleKey: class SimpleKey:
# See below simple keys treatment. # See below simple keys treatment.
def __init__(self, token_number, index, line, column, mark):
self.token_number = token_number
self.index = index
self.line = line
self.column = column
self.mark = mark
def __init__(self, token_number, index, line, column, mark):
self.token_number = token_number
self.index = index
self.line = line
self.column = column
self.mark = mark
class Scanner: class Scanner:
def __init__(self):
"""Initialize the scanner."""
# It is assumed that Scanner and Reader will have a common descendant.
# Reader do the dirty work of checking for BOM and converting the
# input data to Unicode. It also adds NUL to the end.
#
# Reader supports the following methods
# self.peek(i=0) # peek the next i-th character
# self.prefix(l=1) # peek the next l characters
# self.forward(l=1) # read the next l characters and move the pointer.
# Had we reached the end of the stream?
self.done = False
# The number of unclosed '{' and '['. `flow_level == 0` means block
# context.
self.flow_level = 0
# List of processed tokens that are not yet emitted.
self.tokens = []
# Add the STREAM-START token.
self.fetch_stream_start()
# Number of tokens that were emitted through the `get_token` method.
self.tokens_taken = 0
# Variables related to simple keys treatment.
# A simple key is a key that is not denoted by the '?' indicator.
# We emit the KEY token before all keys, so when we find a potential
# simple key, we try to locate the corresponding ':' indicator.
# Simple keys should be limited to a single line.
# Can a simple key start at the current position? A simple key may
# start:
# - after '{', '[', ',' (in the flow context),
self.allow_simple_key = False
# Keep track of possible simple keys. This is a dictionary. The key
# is `flow_level`; there can be no more that one possible simple key
# for each level. The value is a SimpleKey record:
# (token_number, index, line, column, mark)
# A simple key may start with SCALAR(flow), '[', or '{' tokens.
self.possible_simple_keys = {}
# Public methods.
def check_token(self, *choices):
# Check if the next token is one of the given types.
while self.need_more_tokens():
self.fetch_more_tokens()
if self.tokens:
if not choices:
return True
for choice in choices:
if isinstance(self.tokens[0], choice):
return True
return False
def peek_token(self):
# Return the next token, but do not delete if from the queue.
while self.need_more_tokens():
self.fetch_more_tokens()
if self.tokens:
return self.tokens[0]
def get_token(self):
# Return the next token.
while self.need_more_tokens():
self.fetch_more_tokens()
if self.tokens:
self.tokens_taken += 1
return self.tokens.pop(0)
# Private methods.
def need_more_tokens(self):
if self.done:
return False
if not self.tokens:
return True
# The current token may be a potential simple key, so we
# need to look further.
self.stale_possible_simple_keys()
if self.next_possible_simple_key() == self.tokens_taken:
return True
def fetch_more_tokens(self):
# Eat whitespaces and comments until we reach the next token.
self.scan_to_next_token()
# Remove obsolete possible simple keys.
self.stale_possible_simple_keys()
# Peek the next character.
ch = self.peek()
# Is it the end of stream?
if ch == '\0':
return self.fetch_stream_end()
# Note: the order of the following checks is NOT significant.
# Is it the flow sequence start indicator?
if ch == '[':
return self.fetch_flow_sequence_start()
# Is it the flow mapping start indicator?
if ch == '{':
return self.fetch_flow_mapping_start()
# Is it the flow sequence end indicator?
if ch == ']':
return self.fetch_flow_sequence_end()
# Is it the flow mapping end indicator?
if ch == '}':
return self.fetch_flow_mapping_end()
# Is it the flow entry indicator?
if ch == ',':
return self.fetch_flow_entry()
# Is it the value indicator?
if ch == ':' and self.flow_level:
return self.fetch_value()
# Is it a double quoted scalar?
if ch == '\"':
return self.fetch_double()
# It must be a plain scalar then.
if self.check_plain():
return self.fetch_plain()
# No? It's an error. Let's produce a nice error message.
raise ScannerError("while scanning for the next token", None,
"found character %r that cannot start any token" % ch,
self.get_mark())
# Simple keys treatment.
def next_possible_simple_key(self):
# Return the number of the nearest possible simple key. Actually we
# don't need to loop through the whole dictionary. We may replace it
# with the following code:
# if not self.possible_simple_keys:
# return None
# return self.possible_simple_keys[
# min(self.possible_simple_keys.keys())].token_number
min_token_number = None
for level in self.possible_simple_keys:
key = self.possible_simple_keys[level]
if min_token_number is None or key.token_number < min_token_number:
min_token_number = key.token_number
return min_token_number
def stale_possible_simple_keys(self):
# Remove entries that are no longer possible simple keys. According to
# the YAML specification, simple keys
# - should be limited to a single line,
# Disabling this procedure will allow simple keys of any length and
# height (may cause problems if indentation is broken though).
for level in list(self.possible_simple_keys):
key = self.possible_simple_keys[level]
if key.line != self.line:
del self.possible_simple_keys[level]
def save_possible_simple_key(self):
# The next token may start a simple key. We check if it's possible
# and save its position. This function is called for
# SCALAR(flow), '[', and '{'.
# The next token might be a simple key. Let's save it's number and
# position.
if self.allow_simple_key:
self.remove_possible_simple_key()
token_number = self.tokens_taken + len(self.tokens)
key = SimpleKey(token_number,
self.index, self.line, self.column, self.get_mark())
self.possible_simple_keys[self.flow_level] = key
def remove_possible_simple_key(self):
# Remove the saved possible key position at the current flow level.
if self.flow_level in self.possible_simple_keys:
del self.possible_simple_keys[self.flow_level]
# Fetchers.
def fetch_stream_start(self):
# We always add STREAM-START as the first token and STREAM-END as the
# last token.
# Read the token.
mark = self.get_mark()
# Add STREAM-START.
self.tokens.append(StreamStartToken(mark, mark,
encoding=self.encoding))
def fetch_stream_end(self):
# Reset simple keys.
self.remove_possible_simple_key()
self.allow_simple_key = False
self.possible_simple_keys = {}
# Read the token.
mark = self.get_mark()
# Add STREAM-END.
self.tokens.append(StreamEndToken(mark, mark))
# The steam is finished.
self.done = True
def __init__(self): def fetch_flow_sequence_start(self):
"""Initialize the scanner.""" self.fetch_flow_collection_start(FlowSequenceStartToken)
# It is assumed that Scanner and Reader will have a common descendant.
# Reader do the dirty work of checking for BOM and converting the def fetch_flow_mapping_start(self):
# input data to Unicode. It also adds NUL to the end. self.fetch_flow_collection_start(FlowMappingStartToken)
#
# Reader supports the following methods def fetch_flow_collection_start(self, TokenClass):
# self.peek(i=0) # peek the next i-th character
# self.prefix(l=1) # peek the next l characters # '[' and '{' may start a simple key.
# self.forward(l=1) # read the next l characters and move the pointer. self.save_possible_simple_key()
# Had we reached the end of the stream? # Increase the flow level.
self.done = False self.flow_level += 1
# The number of unclosed '{' and '['. `flow_level == 0` means block # Simple keys are allowed after '[' and '{'.
# context. self.allow_simple_key = True
self.flow_level = 0
# Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
# List of processed tokens that are not yet emitted. start_mark = self.get_mark()
self.tokens = [] self.forward()
end_mark = self.get_mark()
# Add the STREAM-START token. self.tokens.append(TokenClass(start_mark, end_mark))
self.fetch_stream_start()
def fetch_flow_sequence_end(self):
# Number of tokens that were emitted through the `get_token` method. self.fetch_flow_collection_end(FlowSequenceEndToken)
self.tokens_taken = 0
def fetch_flow_mapping_end(self):
# Variables related to simple keys treatment. self.fetch_flow_collection_end(FlowMappingEndToken)
# A simple key is a key that is not denoted by the '?' indicator. def fetch_flow_collection_end(self, TokenClass):
# We emit the KEY token before all keys, so when we find a potential
# simple key, we try to locate the corresponding ':' indicator. # Reset possible simple key on the current level.
# Simple keys should be limited to a single line. self.remove_possible_simple_key()
# Can a simple key start at the current position? A simple key may # Decrease the flow level.
# start: self.flow_level -= 1
# - after '{', '[', ',' (in the flow context),
self.allow_simple_key = False # No simple keys after ']' or '}'.
self.allow_simple_key = False
# Keep track of possible simple keys. This is a dictionary. The key
# is `flow_level`; there can be no more that one possible simple key # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
# for each level. The value is a SimpleKey record: start_mark = self.get_mark()
# (token_number, index, line, column, mark) self.forward()
# A simple key may start with SCALAR(flow), '[', or '{' tokens. end_mark = self.get_mark()
self.possible_simple_keys = {} self.tokens.append(TokenClass(start_mark, end_mark))
# Public methods. def fetch_value(self):
# Do we determine a simple key?
def check_token(self, *choices): if self.flow_level in self.possible_simple_keys:
# Check if the next token is one of the given types.
while self.need_more_tokens(): # Add KEY.
self.fetch_more_tokens() key = self.possible_simple_keys[self.flow_level]
if self.tokens: del self.possible_simple_keys[self.flow_level]
if not choices: self.tokens.insert(key.token_number - self.tokens_taken,
return True KeyToken(key.mark, key.mark))
for choice in choices:
if isinstance(self.tokens[0], choice): # There cannot be two simple keys one after another.
return True self.allow_simple_key = False
return False
# Add VALUE.
def peek_token(self): start_mark = self.get_mark()
# Return the next token, but do not delete if from the queue. self.forward()
while self.need_more_tokens(): end_mark = self.get_mark()
self.fetch_more_tokens() self.tokens.append(ValueToken(start_mark, end_mark))
if self.tokens:
return self.tokens[0] def fetch_flow_entry(self):
def get_token(self): # Simple keys are allowed after ','.
# Return the next token. self.allow_simple_key = True
while self.need_more_tokens():
self.fetch_more_tokens() # Reset possible simple key on the current level.
if self.tokens: self.remove_possible_simple_key()
self.tokens_taken += 1
return self.tokens.pop(0) # Add FLOW-ENTRY.
start_mark = self.get_mark()
# Private methods. self.forward()
end_mark = self.get_mark()
def need_more_tokens(self): self.tokens.append(FlowEntryToken(start_mark, end_mark))
if self.done:
return False def fetch_double(self):
if not self.tokens: # A flow scalar could be a simple key.
return True self.save_possible_simple_key()
# The current token may be a potential simple key, so we
# need to look further. # No simple keys after flow scalars.
self.stale_possible_simple_keys() self.allow_simple_key = False
if self.next_possible_simple_key() == self.tokens_taken:
return True # Scan and add SCALAR.
self.tokens.append(self.scan_flow_scalar())
def fetch_more_tokens(self):
def fetch_plain(self):
# Eat whitespaces and comments until we reach the next token.
self.scan_to_next_token() self.save_possible_simple_key()
# Remove obsolete possible simple keys. # No simple keys after plain scalars.
self.stale_possible_simple_keys() self.allow_simple_key = False
# Peek the next character. # Scan and add SCALAR. May change `allow_simple_key`.
ch = self.peek() self.tokens.append(self.scan_plain())
# Is it the end of stream? # Checkers.
if ch == '\0':
return self.fetch_stream_end() def check_plain(self):
return self.peek() in '0123456789-ntf'
# Note: the order of the following checks is NOT significant.
# Scanners.
# Is it the flow sequence start indicator?
if ch == '[': def scan_to_next_token(self):
return self.fetch_flow_sequence_start() while self.peek() in ' \t\n':
self.forward()
# Is it the flow mapping start indicator?
if ch == '{': def scan_flow_scalar(self):
return self.fetch_flow_mapping_start() # See the specification for details.
# Note that we loose indentation rules for quoted scalars. Quoted
# Is it the flow sequence end indicator? # scalars don't need to adhere indentation because " and ' clearly
if ch == ']': # mark the beginning and the end of them. Therefore we are less
return self.fetch_flow_sequence_end() # restrictive then the specification requires. We only need to check
# that document separators are not included in scalars.
# Is it the flow mapping end indicator? chunks = []
if ch == '}': start_mark = self.get_mark()
return self.fetch_flow_mapping_end() quote = self.peek()
self.forward()
# Is it the flow entry indicator? chunks.extend(self.scan_flow_scalar_non_spaces(start_mark))
if ch == ',': while self.peek() != quote:
return self.fetch_flow_entry() chunks.extend(self.scan_flow_scalar_spaces(start_mark))
chunks.extend(self.scan_flow_scalar_non_spaces(start_mark))
# Is it the value indicator? self.forward()
if ch == ':' and self.flow_level: end_mark = self.get_mark()
return self.fetch_value() return ScalarToken(''.join(chunks), False, start_mark, end_mark, '"')
# Is it a double quoted scalar? ESCAPE_REPLACEMENTS = {
if ch == '\"': 'b': '\x08',
return self.fetch_double() 't': '\x09',
'n': '\x0A',
# It must be a plain scalar then. 'f': '\x0C',
if self.check_plain(): 'r': '\x0D',
return self.fetch_plain() '\"': '\"',
'\\': '\\',
# No? It's an error. Let's produce a nice error message. }
raise ScannerError("while scanning for the next token", None,
"found character %r that cannot start any token" % ch, ESCAPE_CODES = {
self.get_mark()) 'u': 4,
}
# Simple keys treatment.
def scan_flow_scalar_non_spaces(self, start_mark):
def next_possible_simple_key(self): # See the specification for details.
# Return the number of the nearest possible simple key. Actually we chunks = []
# don't need to loop through the whole dictionary. We may replace it while True:
# with the following code: length = 0
# if not self.possible_simple_keys: while self.peek(length) not in '\"\\\0 \t\n':
# return None length += 1
# return self.possible_simple_keys[ if length:
# min(self.possible_simple_keys.keys())].token_number chunks.append(self.prefix(length))
min_token_number = None self.forward(length)
for level in self.possible_simple_keys: ch = self.peek()
key = self.possible_simple_keys[level] if ch == '\\':
if min_token_number is None or key.token_number < min_token_number: self.forward()
min_token_number = key.token_number ch = self.peek()
return min_token_number if ch in self.ESCAPE_REPLACEMENTS:
chunks.append(self.ESCAPE_REPLACEMENTS[ch])
def stale_possible_simple_keys(self): self.forward()
# Remove entries that are no longer possible simple keys. According to elif ch in self.ESCAPE_CODES:
# the YAML specification, simple keys length = self.ESCAPE_CODES[ch]
# - should be limited to a single line, self.forward()
# Disabling this procedure will allow simple keys of any length and for k in range(length):
# height (may cause problems if indentation is broken though). if self.peek(k) not in '0123456789ABCDEFabcdef':
for level in list(self.possible_simple_keys): raise ScannerError("while scanning a double-quoted scalar", start_mark,
key = self.possible_simple_keys[level] "expected escape sequence of %d hexdecimal numbers, but found %r" %
if key.line != self.line: (length, self.peek(k)), self.get_mark())
del self.possible_simple_keys[level] code = int(self.prefix(length), 16)
chunks.append(chr(code))
def save_possible_simple_key(self): self.forward(length)
# The next token may start a simple key. We check if it's possible else:
# and save its position. This function is called for raise ScannerError("while scanning a double-quoted scalar", start_mark,
# SCALAR(flow), '[', and '{'. "found unknown escape character %r" % ch, self.get_mark())
else:
# The next token might be a simple key. Let's save it's number and return chunks
# position.
if self.allow_simple_key: def scan_flow_scalar_spaces(self, start_mark):
self.remove_possible_simple_key() # See the specification for details.
token_number = self.tokens_taken+len(self.tokens) chunks = []
key = SimpleKey(token_number, length = 0
self.index, self.line, self.column, self.get_mark()) while self.peek(length) in ' \t':
self.possible_simple_keys[self.flow_level] = key length += 1
whitespaces = self.prefix(length)
def remove_possible_simple_key(self): self.forward(length)
# Remove the saved possible key position at the current flow level. ch = self.peek()
if self.flow_level in self.possible_simple_keys: if ch == '\0':
key = self.possible_simple_keys[self.flow_level] raise ScannerError("while scanning a quoted scalar", start_mark,
"found unexpected end of stream", self.get_mark())
del self.possible_simple_keys[self.flow_level] elif ch == '\n':
raise ScannerError("while scanning a quoted scalar", start_mark,
# Fetchers. "found unexpected line end", self.get_mark())
else:
def fetch_stream_start(self): chunks.append(whitespaces)
# We always add STREAM-START as the first token and STREAM-END as the return chunks
# last token.
def scan_plain(self):
# Read the token. chunks = []
mark = self.get_mark() start_mark = self.get_mark()
spaces = []
# Add STREAM-START. while True:
self.tokens.append(StreamStartToken(mark, mark, length = 0
encoding=self.encoding)) while True:
if self.peek(length) not in 'eE.0123456789nul-tr+fas':
break
def fetch_stream_end(self): length += 1
if length == 0:
# Reset simple keys. break
self.remove_possible_simple_key() self.allow_simple_key = False
self.allow_simple_key = False chunks.extend(spaces)
self.possible_simple_keys = {} chunks.append(self.prefix(length))
self.forward(length)
# Read the token. end_mark = self.get_mark()
mark = self.get_mark() return ScalarToken(''.join(chunks), True, start_mark, end_mark)
# Add STREAM-END.
self.tokens.append(StreamEndToken(mark, mark))
# The steam is finished.
self.done = True
def fetch_flow_sequence_start(self):
self.fetch_flow_collection_start(FlowSequenceStartToken)
def fetch_flow_mapping_start(self):
self.fetch_flow_collection_start(FlowMappingStartToken)
def fetch_flow_collection_start(self, TokenClass):
# '[' and '{' may start a simple key.
self.save_possible_simple_key()
# Increase the flow level.
self.flow_level += 1
# Simple keys are allowed after '[' and '{'.
self.allow_simple_key = True
# Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
start_mark = self.get_mark()
self.forward()
end_mark = self.get_mark()
self.tokens.append(TokenClass(start_mark, end_mark))
def fetch_flow_sequence_end(self):
self.fetch_flow_collection_end(FlowSequenceEndToken)
def fetch_flow_mapping_end(self):
self.fetch_flow_collection_end(FlowMappingEndToken)
def fetch_flow_collection_end(self, TokenClass):
# Reset possible simple key on the current level.
self.remove_possible_simple_key()
# Decrease the flow level.
self.flow_level -= 1
# No simple keys after ']' or '}'.
self.allow_simple_key = False
# Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
start_mark = self.get_mark()
self.forward()
end_mark = self.get_mark()
self.tokens.append(TokenClass(start_mark, end_mark))
def fetch_value(self):
# Do we determine a simple key?
if self.flow_level in self.possible_simple_keys:
# Add KEY.
key = self.possible_simple_keys[self.flow_level]
del self.possible_simple_keys[self.flow_level]
self.tokens.insert(key.token_number-self.tokens_taken,
KeyToken(key.mark, key.mark))
# There cannot be two simple keys one after another.
self.allow_simple_key = False
# Add VALUE.
start_mark = self.get_mark()
self.forward()
end_mark = self.get_mark()
self.tokens.append(ValueToken(start_mark, end_mark))
def fetch_flow_entry(self):
# Simple keys are allowed after ','.
self.allow_simple_key = True
# Reset possible simple key on the current level.
self.remove_possible_simple_key()
# Add FLOW-ENTRY.
start_mark = self.get_mark()
self.forward()
end_mark = self.get_mark()
self.tokens.append(FlowEntryToken(start_mark, end_mark))
def fetch_double(self):
# A flow scalar could be a simple key.
self.save_possible_simple_key()
# No simple keys after flow scalars.
self.allow_simple_key = False
# Scan and add SCALAR.
self.tokens.append(self.scan_flow_scalar())
def fetch_plain(self):
self.save_possible_simple_key()
# No simple keys after plain scalars.
self.allow_simple_key = False
# Scan and add SCALAR. May change `allow_simple_key`.
self.tokens.append(self.scan_plain())
# Checkers.
def check_plain(self):
return self.peek() in '0123456789-ntf'
# Scanners.
def scan_to_next_token(self):
while self.peek() in ' \t\n':
self.forward()
def scan_flow_scalar(self):
# See the specification for details.
# Note that we loose indentation rules for quoted scalars. Quoted
# scalars don't need to adhere indentation because " and ' clearly
# mark the beginning and the end of them. Therefore we are less
# restrictive then the specification requires. We only need to check
# that document separators are not included in scalars.
chunks = []
start_mark = self.get_mark()
quote = self.peek()
self.forward()
chunks.extend(self.scan_flow_scalar_non_spaces(start_mark))
while self.peek() != quote:
chunks.extend(self.scan_flow_scalar_spaces(start_mark))
chunks.extend(self.scan_flow_scalar_non_spaces(start_mark))
self.forward()
end_mark = self.get_mark()
return ScalarToken(''.join(chunks), False, start_mark, end_mark, '"')
ESCAPE_REPLACEMENTS = {
'b': '\x08',
't': '\x09',
'n': '\x0A',
'f': '\x0C',
'r': '\x0D',
'\"': '\"',
'\\': '\\',
}
ESCAPE_CODES = {
'u': 4,
}
def scan_flow_scalar_non_spaces(self, start_mark):
# See the specification for details.
chunks = []
while True:
length = 0
while self.peek(length) not in '\"\\\0 \t\n':
length += 1
if length:
chunks.append(self.prefix(length))
self.forward(length)
ch = self.peek()
if ch == '\\':
self.forward()
ch = self.peek()
if ch in self.ESCAPE_REPLACEMENTS:
chunks.append(self.ESCAPE_REPLACEMENTS[ch])
self.forward()
elif ch in self.ESCAPE_CODES:
length = self.ESCAPE_CODES[ch]
self.forward()
for k in range(length):
if self.peek(k) not in '0123456789ABCDEFabcdef':
raise ScannerError("while scanning a double-quoted scalar", start_mark,
"expected escape sequence of %d hexdecimal numbers, but found %r" %
(length, self.peek(k)), self.get_mark())
code = int(self.prefix(length), 16)
chunks.append(chr(code))
self.forward(length)
else:
raise ScannerError("while scanning a double-quoted scalar", start_mark,
"found unknown escape character %r" % ch, self.get_mark())
else:
return chunks
def scan_flow_scalar_spaces(self, start_mark):
# See the specification for details.
chunks = []
length = 0
while self.peek(length) in ' \t':
length += 1
whitespaces = self.prefix(length)
self.forward(length)
ch = self.peek()
if ch == '\0':
raise ScannerError("while scanning a quoted scalar", start_mark,
"found unexpected end of stream", self.get_mark())
elif ch == '\n':
raise ScannerError("while scanning a quoted scalar", start_mark,
"found unexpected line end", self.get_mark())
else:
chunks.append(whitespaces)
return chunks
def scan_plain(self):
chunks = []
start_mark = self.get_mark()
spaces = []
while True:
length = 0
while True:
ch = self.peek(length)
if self.peek(length) not in 'eE.0123456789nul-tr+fas':
break
length += 1
if length == 0:
break
self.allow_simple_key = False
chunks.extend(spaces)
chunks.append(self.prefix(length))
self.forward(length)
end_mark = self.get_mark()
return ScalarToken(''.join(chunks), True, start_mark, end_mark)

View File

@ -1,57 +1,65 @@
class Token(object): class Token(object):
def __init__(self, start_mark, end_mark): def __init__(self, start_mark, end_mark):
self.start_mark = start_mark self.start_mark = start_mark
self.end_mark = end_mark self.end_mark = end_mark
def __repr__(self):
attributes = [key for key in self.__dict__ def __repr__(self):
if not key.endswith('_mark')] attributes = [key for key in self.__dict__
attributes.sort() if not key.endswith('_mark')]
arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) attributes.sort()
for key in attributes]) arguments = ', '.join(['%s=%r' % (key, getattr(self, key))
return '%s(%s)' % (self.__class__.__name__, arguments) for key in attributes])
return '%s(%s)' % (self.__class__.__name__, arguments)
#class BOMToken(Token):
# id = '<byte order mark>'
class StreamStartToken(Token): class StreamStartToken(Token):
id = '<stream start>' id = '<stream start>'
def __init__(self, start_mark=None, end_mark=None,
encoding=None): def __init__(self, start_mark=None, end_mark=None,
self.start_mark = start_mark encoding=None):
self.end_mark = end_mark self.start_mark = start_mark
self.encoding = encoding self.end_mark = end_mark
self.encoding = encoding
class StreamEndToken(Token): class StreamEndToken(Token):
id = '<stream end>' id = '<stream end>'
class FlowSequenceStartToken(Token): class FlowSequenceStartToken(Token):
id = '[' id = '['
class FlowMappingStartToken(Token): class FlowMappingStartToken(Token):
id = '{' id = '{'
class FlowSequenceEndToken(Token): class FlowSequenceEndToken(Token):
id = ']' id = ']'
class FlowMappingEndToken(Token): class FlowMappingEndToken(Token):
id = '}' id = '}'
class KeyToken(Token): class KeyToken(Token):
id = '?' id = '?'
class ValueToken(Token): class ValueToken(Token):
id = ':' id = ':'
class FlowEntryToken(Token): class FlowEntryToken(Token):
id = ',' id = ','
class ScalarToken(Token): class ScalarToken(Token):
id = '<scalar>' id = '<scalar>'
def __init__(self, value, plain, start_mark, end_mark, style=None):
self.value = value
self.plain = plain
self.start_mark = start_mark
self.end_mark = end_mark
self.style = style
def __init__(self, value, plain, start_mark, end_mark, style=None):
self.value = value
self.plain = plain
self.start_mark = start_mark
self.end_mark = end_mark
self.style = style