diff --git a/.appveyor.yml b/.appveyor.yml deleted file mode 100644 index fbc41c3..0000000 --- a/.appveyor.yml +++ /dev/null @@ -1,37 +0,0 @@ -# TODO: update this from inside the build to use branch current version -version: '{build}' - -image: -- Visual Studio 2015 - -#cache: -#- 'C:\Python38\' -#- 'C:\Python38-x64' - -environment: - libyaml_repo_url: https://github.com/yaml/libyaml.git - libyaml_refspec: 0.2.5 - PYYAML_TEST_GROUP: all - -# matrix: -# - PYTHON_VER: Python27 -# - PYTHON_VER: Python27-x64 -# - PYTHON_VER: Python35 -# - PYTHON_VER: Python35-x64 -# - PYTHON_VER: Python36 -# - PYTHON_VER: Python36-x64 -# - PYTHON_VER: Python37 -# - PYTHON_VER: Python37-x64 -# - PYTHON_VER: Python38 -# - PYTHON_VER: Python38-x64 -# - PYTHON_VER: Python39 -# - PYTHON_VER: Python39-x64 - -#init: -#- ps: iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1')) - -build_script: -- ps: packaging\build\appveyor.ps1 - -#on_finish: -#- ps: $blockRdp = $true; iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1')) diff --git a/.gitignore b/.gitignore index b59e62a..e531e48 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,6 @@ /dist/* /build/* /lib/PyYAML.egg-info/* -/lib3/PyYAML.egg-info/* /wheelhouse/* /yaml/_yaml.c MANIFEST diff --git a/CHANGES b/CHANGES index 8d647a5..c37d9be 100644 --- a/CHANGES +++ b/CHANGES @@ -4,6 +4,19 @@ For a complete changelog, see: * https://github.com/yaml/pyyaml/commits/ * https://bitbucket.org/xi/pyyaml/commits/ +6.0 (2021-10-13) + +* https://github.com/yaml/pyyaml/pull/327 -- Change README format to Markdown +* https://github.com/yaml/pyyaml/pull/483 -- Add a test for YAML 1.1 types +* https://github.com/yaml/pyyaml/pull/497 -- fix float resolver to ignore `.` and `._` +* https://github.com/yaml/pyyaml/pull/550 -- drop Python 2.7 +* https://github.com/yaml/pyyaml/pull/553 -- Fix spelling of “hexadecimal” +* https://github.com/yaml/pyyaml/pull/556 -- fix representation of Enum subclasses +* https://github.com/yaml/pyyaml/pull/557 -- fix libyaml extension compiler warnings +* https://github.com/yaml/pyyaml/pull/560 -- fix ResourceWarning on leaked file descriptors +* https://github.com/yaml/pyyaml/pull/561 -- always require `Loader` arg to `yaml.load()` +* https://github.com/yaml/pyyaml/pull/564 -- remove remaining direct distutils usage + 5.4.1 (2021-01-20) * https://github.com/yaml/pyyaml/pull/480 -- Fix stub compat with older pyyaml versions that may unwittingly load it diff --git a/MANIFEST.in b/MANIFEST.in index f4051a1..3ab0c4f 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,10 +1,7 @@ include CHANGES README LICENSE Makefile pyproject.toml setup.py recursive-include lib/yaml *.py recursive-include lib/_yaml *.py -recursive-include lib3/yaml *.py -recursive-include lib3/_yaml *.py recursive-include examples *.py *.cfg *.yaml recursive-include tests/data * recursive-include tests/lib *.py -recursive-include tests/lib3 *.py recursive-include yaml * diff --git a/Makefile b/Makefile index 69efbdc..34a1d40 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ -.PHONY: default build buildext force forceext install installext test testext dist clean +.PHONY: build dist -PYTHON=/usr/bin/python +PYTHON=/usr/bin/python3 TEST= PARAMETERS= @@ -42,3 +42,10 @@ windist: clean: ${PYTHON} setup.py --with-libyaml clean -a + rm -fr \ + dist/ \ + lib/PyYAML.egg-info/ \ + lib/yaml/__pycache__/ \ + tests/lib/__pycache__/ \ + yaml/_yaml.c \ + diff --git a/OAT.xml b/OAT.xml index 4ba24b6..4723d21 100644 --- a/OAT.xml +++ b/OAT.xml @@ -39,10 +39,9 @@ - + - diff --git a/README.OpenSource b/README.OpenSource index 13c8d5b..93e0b50 100644 --- a/README.OpenSource +++ b/README.OpenSource @@ -3,7 +3,7 @@ "Name": "PyYAML", "License": "MIT License", "License File": "LICENSE", - "Version Number": "5.4.1", + "Version Number": "6.0", "Owner": "xuyong59@huawei.com", "Upstream URL": "https://pypi.org/project/PyYAML", "Description": "A YAML parser and emitter for Python" diff --git a/README.md b/README.md index 49c87e7..7d01da2 100644 --- a/README.md +++ b/README.md @@ -1,43 +1,53 @@ -PyYAML - The next generation YAML parser and emitter for Python. +PyYAML +====== -To install, type 'python setup.py install'. +A full-featured YAML processing framework for Python -By default, the setup.py script checks whether LibYAML is installed -and if so, builds and installs LibYAML bindings. To skip the check -and force installation of LibYAML bindings, use the option '--with-libyaml': -'python setup.py --with-libyaml install'. To disable the check and -skip building and installing LibYAML bindings, use '--without-libyaml': -'python setup.py --without-libyaml install'. +## Installation -When LibYAML bindings are installed, you may use fast LibYAML-based -parser and emitter as follows: +To install, type `python setup.py install`. + +By default, the `setup.py` script checks whether LibYAML is installed and if +so, builds and installs LibYAML bindings. +To skip the check and force installation of LibYAML bindings, use the option +`--with-libyaml`: `python setup.py --with-libyaml install`. +To disable the check and skip building and installing LibYAML bindings, use +`--without-libyaml`: `python setup.py --without-libyaml install`. + +When LibYAML bindings are installed, you may use fast LibYAML-based parser and +emitter as follows: >>> yaml.load(stream, Loader=yaml.CLoader) >>> yaml.dump(data, Dumper=yaml.CDumper) -If you don't trust the input stream, you should use: +If you don't trust the input YAML stream, you should use: >>> yaml.safe_load(stream) -PyYAML includes a comprehensive test suite. To run the tests, -type 'python setup.py test'. +## Testing -For more information, check the PyYAML homepage: -'https://github.com/yaml/pyyaml'. +PyYAML includes a comprehensive test suite. +To run the tests, type `python setup.py test`. -For PyYAML tutorial and reference, see: -'http://pyyaml.org/wiki/PyYAMLDocumentation'. +## Further Information -Discuss PyYAML with the maintainers in IRC #pyyaml irc.freenode.net. +* For more information, check the + [PyYAML homepage](https://github.com/yaml/pyyaml). -You may also use the YAML-Core mailing list: -'http://lists.sourceforge.net/lists/listinfo/yaml-core'. +* [PyYAML tutorial and reference](http://pyyaml.org/wiki/PyYAMLDocumentation). -Submit bug reports and feature requests to the PyYAML bug tracker: -'https://github.com/yaml/pyyaml/issues'. +* Discuss PyYAML with the maintainers on + Matrix at https://matrix.to/#/#pyyaml:yaml.io or + IRC #pyyaml irc.libera.chat + +* Submit bug reports and feature requests to the + [PyYAML bug tracker](https://github.com/yaml/pyyaml/issues). + +## License The PyYAML module was written by Kirill Simonov . It is currently maintained by the YAML and Python communities. PyYAML is released under the MIT license. + See the file LICENSE for more details. diff --git a/announcement.msg b/announcement.msg index 9117d51..abc1bbe 100644 --- a/announcement.msg +++ b/announcement.msg @@ -1,31 +1,44 @@ From: Ingy döt Net To: python-list@python.org, python-announce@python.org, yaml-core@lists.sourceforge.net -Subject: [ANN] PyYAML-5.4.1 Released +Subject: [ANN] PyYAML-6.0 Released -======================= -Announcing PyYAML-5.4.1 -======================= +===================== +Announcing PyYAML-6.0 +===================== A new release of PyYAML is now available: -https://github.com/yaml/pyyaml/releases/tag/5.4.1 +https://github.com/yaml/pyyaml/releases/tag/6.0 -This release contains a fix for AttributeError during module import in some -mixed version installations. +The previously-deprecated default loader selection in `yaml.load()` has +been removed; `Loader` is now a required argument. -PyYAML 5.4.1 will be the last release to support Python 2.7 (except for possible -critical bug fix releases). +Support for Python 2.7 and 3.5 has been dropped, and support for Python 3.10 +added. It now includes libyaml 0.2.5 extension wheels for MacOS M1 +(Apple Silicon/arm64), Linux s390x and Linux aarch64. + +Numerous other bugfixes and code cleanups are included in this release. Changes ======= -* https://github.com/yaml/pyyaml/pull/480 -- Fix stub compat with older pyyaml versions that may unwittingly load it +* https://github.com/yaml/pyyaml/pull/327 -- Change README format to Markdown +* https://github.com/yaml/pyyaml/pull/483 -- Add a test for YAML 1.1 types +* https://github.com/yaml/pyyaml/pull/497 -- fix float resolver to ignore `.` and `._` +* https://github.com/yaml/pyyaml/pull/550 -- drop Python 2.7 +* https://github.com/yaml/pyyaml/pull/553 -- Fix spelling of “hexadecimal” +* https://github.com/yaml/pyyaml/pull/556 -- fix representation of Enum subclasses +* https://github.com/yaml/pyyaml/pull/557 -- fix libyaml extension compiler warnings +* https://github.com/yaml/pyyaml/pull/560 -- fix ResourceWarning on leaked file descriptors +* https://github.com/yaml/pyyaml/pull/561 -- always require `Loader` arg to `yaml.load()` +* https://github.com/yaml/pyyaml/pull/564 -- remove remaining direct distutils usage Resources ========= -PyYAML IRC Channel: #pyyaml on irc.freenode.net +PyYAML Matrix: https://matrix.to/#/#pyyaml:yaml.io +PyYAML IRC Channel: #pyyaml on irc.libera.chat PyYAML homepage: https://github.com/yaml/pyyaml PyYAML documentation: http://pyyaml.org/wiki/PyYAMLDocumentation Source and binary installers: https://pypi.org/project/PyYAML/ @@ -83,7 +96,7 @@ The following people are currently responsible for maintaining PyYAML: * Ingy döt Net * Matt Davis -and many thanks to all who have contribributed! +and many thanks to all who have contributed! See: https://github.com/yaml/pyyaml/pulls diff --git a/bundle.json b/bundle.json index 8cbce33..b58a32a 100644 --- a/bundle.json +++ b/bundle.json @@ -1,7 +1,7 @@ { "name": "@ohos/PyYAML", "description": "A YAML parser and emitter for Python.", - "version": "5.4.1", + "version": "6.0", "license": "MIT", "publishAs": "code-segment", "segment": { diff --git a/lib/yaml/__init__.py b/lib/yaml/__init__.py index 3c98819..465041d 100644 --- a/lib/yaml/__init__.py +++ b/lib/yaml/__init__.py @@ -1,58 +1,29 @@ -from error import * +from .error import * -from tokens import * -from events import * -from nodes import * +from .tokens import * +from .events import * +from .nodes import * -from loader import * -from dumper import * - -__version__ = '5.4.1' +from .loader import * +from .dumper import * +__version__ = '6.0' try: - from cyaml import * + from .cyaml import * __with_libyaml__ = True except ImportError: __with_libyaml__ = False +import io #------------------------------------------------------------------------------ -# Warnings control +# XXX "Warnings control" is now deprecated. Leaving in the API function to not +# break code that uses it. #------------------------------------------------------------------------------ - -# 'Global' warnings state: -_warnings_enabled = { - 'YAMLLoadWarning': True, -} - -# Get or set global warnings' state def warnings(settings=None): if settings is None: - return _warnings_enabled - - if type(settings) is dict: - for key in settings: - if key in _warnings_enabled: - _warnings_enabled[key] = settings[key] - -# Warn when load() is called without Loader=... -class YAMLLoadWarning(RuntimeWarning): - pass - -def load_warning(method): - if _warnings_enabled['YAMLLoadWarning'] is False: - return - - import warnings - - message = ( - "calling yaml.%s() without Loader=... is deprecated, as the " - "default Loader is unsafe. Please read " - "https://msg.pyyaml.org/load for full details." - ) % method - - warnings.warn(message, YAMLLoadWarning, stacklevel=3) + return {} #------------------------------------------------------------------------------ def scan(stream, Loader=Loader): @@ -100,30 +71,22 @@ def compose_all(stream, Loader=Loader): finally: loader.dispose() -def load(stream, Loader=None): +def load(stream, Loader): """ Parse the first YAML document in a stream and produce the corresponding Python object. """ - if Loader is None: - load_warning('load') - Loader = FullLoader - loader = Loader(stream) try: return loader.get_single_data() finally: loader.dispose() -def load_all(stream, Loader=None): +def load_all(stream, Loader): """ Parse all YAML documents in a stream and produce corresponding Python objects. """ - if Loader is None: - load_warning('load_all') - Loader = FullLoader - loader = Loader(stream) try: while loader.check_data(): @@ -200,8 +163,7 @@ def emit(events, stream=None, Dumper=Dumper, """ getvalue = None if stream is None: - from StringIO import StringIO - stream = StringIO() + stream = io.StringIO() getvalue = stream.getvalue dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, allow_unicode=allow_unicode, line_break=line_break) @@ -216,7 +178,7 @@ def emit(events, stream=None, Dumper=Dumper, def serialize_all(nodes, stream=None, Dumper=Dumper, canonical=None, indent=None, width=None, allow_unicode=None, line_break=None, - encoding='utf-8', explicit_start=None, explicit_end=None, + encoding=None, explicit_start=None, explicit_end=None, version=None, tags=None): """ Serialize a sequence of representation trees into a YAML stream. @@ -225,10 +187,9 @@ def serialize_all(nodes, stream=None, Dumper=Dumper, getvalue = None if stream is None: if encoding is None: - from StringIO import StringIO + stream = io.StringIO() else: - from cStringIO import StringIO - stream = StringIO() + stream = io.BytesIO() getvalue = stream.getvalue dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, allow_unicode=allow_unicode, line_break=line_break, @@ -255,7 +216,7 @@ def dump_all(documents, stream=None, Dumper=Dumper, default_style=None, default_flow_style=False, canonical=None, indent=None, width=None, allow_unicode=None, line_break=None, - encoding='utf-8', explicit_start=None, explicit_end=None, + encoding=None, explicit_start=None, explicit_end=None, version=None, tags=None, sort_keys=True): """ Serialize a sequence of Python objects into a YAML stream. @@ -264,10 +225,9 @@ def dump_all(documents, stream=None, Dumper=Dumper, getvalue = None if stream is None: if encoding is None: - from StringIO import StringIO + stream = io.StringIO() else: - from cStringIO import StringIO - stream = StringIO() + stream = io.BytesIO() getvalue = stream.getvalue dumper = Dumper(stream, default_style=default_style, default_flow_style=default_flow_style, @@ -399,13 +359,12 @@ class YAMLObjectMetaclass(type): cls.yaml_dumper.add_representer(cls, cls.to_yaml) -class YAMLObject(object): +class YAMLObject(metaclass=YAMLObjectMetaclass): """ An object that can dump itself to a YAML stream and load itself from a YAML stream. """ - __metaclass__ = YAMLObjectMetaclass __slots__ = () # no direct instantiation, so allow immutable subclasses yaml_loader = [Loader, FullLoader, UnsafeLoader] @@ -414,18 +373,18 @@ class YAMLObject(object): yaml_tag = None yaml_flow_style = None + @classmethod def from_yaml(cls, loader, node): """ Convert a representation node to a Python object. """ return loader.construct_yaml_object(node, cls) - from_yaml = classmethod(from_yaml) + @classmethod def to_yaml(cls, dumper, data): """ Convert a Python object to a representation node. """ return dumper.represent_yaml_object(cls.yaml_tag, data, cls, flow_style=cls.yaml_flow_style) - to_yaml = classmethod(to_yaml) diff --git a/lib/yaml/composer.py b/lib/yaml/composer.py index df85ef6..6d15cb4 100644 --- a/lib/yaml/composer.py +++ b/lib/yaml/composer.py @@ -1,14 +1,14 @@ __all__ = ['Composer', 'ComposerError'] -from error import MarkedYAMLError -from events import * -from nodes import * +from .error import MarkedYAMLError +from .events import * +from .nodes import * class ComposerError(MarkedYAMLError): pass -class Composer(object): +class Composer: def __init__(self): self.anchors = {} @@ -66,14 +66,14 @@ class Composer(object): anchor = event.anchor if anchor not in self.anchors: raise ComposerError(None, None, "found undefined alias %r" - % anchor.encode('utf-8'), event.start_mark) + % anchor, event.start_mark) return self.anchors[anchor] event = self.peek_event() anchor = event.anchor if anchor is not None: if anchor in self.anchors: raise ComposerError("found duplicate anchor %r; first occurrence" - % anchor.encode('utf-8'), self.anchors[anchor].start_mark, + % anchor, self.anchors[anchor].start_mark, "second occurrence", event.start_mark) self.descend_resolver(parent, index) if self.check_event(ScalarEvent): @@ -88,7 +88,7 @@ class Composer(object): def compose_scalar_node(self, anchor): event = self.get_event() tag = event.tag - if tag is None or tag == u'!': + if tag is None or tag == '!': tag = self.resolve(ScalarNode, event.value, event.implicit) node = ScalarNode(tag, event.value, event.start_mark, event.end_mark, style=event.style) @@ -99,7 +99,7 @@ class Composer(object): def compose_sequence_node(self, anchor): start_event = self.get_event() tag = start_event.tag - if tag is None or tag == u'!': + if tag is None or tag == '!': tag = self.resolve(SequenceNode, None, start_event.implicit) node = SequenceNode(tag, [], start_event.start_mark, None, @@ -117,7 +117,7 @@ class Composer(object): def compose_mapping_node(self, anchor): start_event = self.get_event() tag = start_event.tag - if tag is None or tag == u'!': + if tag is None or tag == '!': tag = self.resolve(MappingNode, None, start_event.implicit) node = MappingNode(tag, [], start_event.start_mark, None, diff --git a/lib/yaml/constructor.py b/lib/yaml/constructor.py index ff4e368..619acd3 100644 --- a/lib/yaml/constructor.py +++ b/lib/yaml/constructor.py @@ -8,46 +8,15 @@ __all__ = [ 'ConstructorError' ] -from error import * -from nodes import * +from .error import * +from .nodes import * -import datetime - -import binascii, re, sys, types +import collections.abc, datetime, base64, binascii, re, sys, types class ConstructorError(MarkedYAMLError): pass - -class timezone(datetime.tzinfo): - def __init__(self, offset): - self._offset = offset - seconds = abs(offset).total_seconds() - self._name = 'UTC%s%02d:%02d' % ( - '-' if offset.days < 0 else '+', - seconds // 3600, - seconds % 3600 // 60 - ) - - def tzname(self, dt=None): - return self._name - - def utcoffset(self, dt=None): - return self._offset - - def dst(self, dt=None): - return datetime.timedelta(0) - - def __copy__(self): - return self.__deepcopy__() - - def __deepcopy__(self, memodict={}): - return self.__class__(self.utcoffset()) - - __repr__ = __str__ = tzname - - -class BaseConstructor(object): +class BaseConstructor: yaml_constructors = {} yaml_multi_constructors = {} @@ -133,7 +102,7 @@ class BaseConstructor(object): data = constructor(self, tag_suffix, node) if isinstance(data, types.GeneratorType): generator = data - data = generator.next() + data = next(generator) if self.deep_construct: for dummy in generator: pass @@ -168,11 +137,9 @@ class BaseConstructor(object): mapping = {} for key_node, value_node in node.value: key = self.construct_object(key_node, deep=deep) - try: - hash(key) - except TypeError, exc: + if not isinstance(key, collections.abc.Hashable): raise ConstructorError("while constructing a mapping", node.start_mark, - "found unacceptable key (%s)" % exc, key_node.start_mark) + "found unhashable key", key_node.start_mark) value = self.construct_object(value_node, deep=deep) mapping[key] = value return mapping @@ -189,33 +156,33 @@ class BaseConstructor(object): pairs.append((key, value)) return pairs + @classmethod def add_constructor(cls, tag, constructor): if not 'yaml_constructors' in cls.__dict__: cls.yaml_constructors = cls.yaml_constructors.copy() cls.yaml_constructors[tag] = constructor - add_constructor = classmethod(add_constructor) + @classmethod def add_multi_constructor(cls, tag_prefix, multi_constructor): if not 'yaml_multi_constructors' in cls.__dict__: cls.yaml_multi_constructors = cls.yaml_multi_constructors.copy() cls.yaml_multi_constructors[tag_prefix] = multi_constructor - add_multi_constructor = classmethod(add_multi_constructor) class SafeConstructor(BaseConstructor): def construct_scalar(self, node): if isinstance(node, MappingNode): for key_node, value_node in node.value: - if key_node.tag == u'tag:yaml.org,2002:value': + if key_node.tag == 'tag:yaml.org,2002:value': return self.construct_scalar(value_node) - return BaseConstructor.construct_scalar(self, node) + return super().construct_scalar(node) def flatten_mapping(self, node): merge = [] index = 0 while index < len(node.value): key_node, value_node = node.value[index] - if key_node.tag == u'tag:yaml.org,2002:merge': + if key_node.tag == 'tag:yaml.org,2002:merge': del node.value[index] if isinstance(value_node, MappingNode): self.flatten_mapping(value_node) @@ -237,8 +204,8 @@ class SafeConstructor(BaseConstructor): raise ConstructorError("while constructing a mapping", node.start_mark, "expected a mapping or list of mappings for merging, but found %s" % value_node.id, value_node.start_mark) - elif key_node.tag == u'tag:yaml.org,2002:value': - key_node.tag = u'tag:yaml.org,2002:str' + elif key_node.tag == 'tag:yaml.org,2002:value': + key_node.tag = 'tag:yaml.org,2002:str' index += 1 else: index += 1 @@ -248,19 +215,19 @@ class SafeConstructor(BaseConstructor): def construct_mapping(self, node, deep=False): if isinstance(node, MappingNode): self.flatten_mapping(node) - return BaseConstructor.construct_mapping(self, node, deep=deep) + return super().construct_mapping(node, deep=deep) def construct_yaml_null(self, node): self.construct_scalar(node) return None bool_values = { - u'yes': True, - u'no': False, - u'true': True, - u'false': False, - u'on': True, - u'off': False, + 'yes': True, + 'no': False, + 'true': True, + 'false': False, + 'on': True, + 'off': False, } def construct_yaml_bool(self, node): @@ -268,7 +235,7 @@ class SafeConstructor(BaseConstructor): return self.bool_values[value.lower()] def construct_yaml_int(self, node): - value = str(self.construct_scalar(node)) + value = self.construct_scalar(node) value = value.replace('_', '') sign = +1 if value[0] == '-': @@ -301,7 +268,7 @@ class SafeConstructor(BaseConstructor): nan_value = -inf_value/inf_value # Trying to make a quiet NaN (like C99). def construct_yaml_float(self, node): - value = str(self.construct_scalar(node)) + value = self.construct_scalar(node) value = value.replace('_', '').lower() sign = +1 if value[0] == '-': @@ -325,15 +292,23 @@ class SafeConstructor(BaseConstructor): return sign*float(value) def construct_yaml_binary(self, node): - value = self.construct_scalar(node) try: - return str(value).decode('base64') - except (binascii.Error, UnicodeEncodeError), exc: + value = self.construct_scalar(node).encode('ascii') + except UnicodeEncodeError as exc: + raise ConstructorError(None, None, + "failed to convert base64 data into ascii: %s" % exc, + node.start_mark) + try: + if hasattr(base64, 'decodebytes'): + return base64.decodebytes(value) + else: + return base64.decodestring(value) + except binascii.Error as exc: raise ConstructorError(None, None, "failed to decode base64 data: %s" % exc, node.start_mark) timestamp_regexp = re.compile( - ur'''^(?P[0-9][0-9][0-9][0-9]) + r'''^(?P[0-9][0-9][0-9][0-9]) -(?P[0-9][0-9]?) -(?P[0-9][0-9]?) (?:(?:[Tt]|[ \t]+) @@ -369,9 +344,9 @@ class SafeConstructor(BaseConstructor): delta = datetime.timedelta(hours=tz_hour, minutes=tz_minute) if values['tz_sign'] == '-': delta = -delta - tzinfo = timezone(delta) + tzinfo = datetime.timezone(delta) elif values['tz']: - tzinfo = timezone(datetime.timedelta(0)) + tzinfo = datetime.timezone.utc return datetime.datetime(year, month, day, hour, minute, second, fraction, tzinfo=tzinfo) @@ -425,11 +400,7 @@ class SafeConstructor(BaseConstructor): data.update(value) def construct_yaml_str(self, node): - value = self.construct_scalar(node) - try: - return value.encode('ascii') - except UnicodeEncodeError: - return value + return self.construct_scalar(node) def construct_yaml_seq(self, node): data = [] @@ -454,55 +425,55 @@ class SafeConstructor(BaseConstructor): def construct_undefined(self, node): raise ConstructorError(None, None, - "could not determine a constructor for the tag %r" % node.tag.encode('utf-8'), + "could not determine a constructor for the tag %r" % node.tag, node.start_mark) SafeConstructor.add_constructor( - u'tag:yaml.org,2002:null', + 'tag:yaml.org,2002:null', SafeConstructor.construct_yaml_null) SafeConstructor.add_constructor( - u'tag:yaml.org,2002:bool', + 'tag:yaml.org,2002:bool', SafeConstructor.construct_yaml_bool) SafeConstructor.add_constructor( - u'tag:yaml.org,2002:int', + 'tag:yaml.org,2002:int', SafeConstructor.construct_yaml_int) SafeConstructor.add_constructor( - u'tag:yaml.org,2002:float', + 'tag:yaml.org,2002:float', SafeConstructor.construct_yaml_float) SafeConstructor.add_constructor( - u'tag:yaml.org,2002:binary', + 'tag:yaml.org,2002:binary', SafeConstructor.construct_yaml_binary) SafeConstructor.add_constructor( - u'tag:yaml.org,2002:timestamp', + 'tag:yaml.org,2002:timestamp', SafeConstructor.construct_yaml_timestamp) SafeConstructor.add_constructor( - u'tag:yaml.org,2002:omap', + 'tag:yaml.org,2002:omap', SafeConstructor.construct_yaml_omap) SafeConstructor.add_constructor( - u'tag:yaml.org,2002:pairs', + 'tag:yaml.org,2002:pairs', SafeConstructor.construct_yaml_pairs) SafeConstructor.add_constructor( - u'tag:yaml.org,2002:set', + 'tag:yaml.org,2002:set', SafeConstructor.construct_yaml_set) SafeConstructor.add_constructor( - u'tag:yaml.org,2002:str', + 'tag:yaml.org,2002:str', SafeConstructor.construct_yaml_str) SafeConstructor.add_constructor( - u'tag:yaml.org,2002:seq', + 'tag:yaml.org,2002:seq', SafeConstructor.construct_yaml_seq) SafeConstructor.add_constructor( - u'tag:yaml.org,2002:map', + 'tag:yaml.org,2002:map', SafeConstructor.construct_yaml_map) SafeConstructor.add_constructor(None, @@ -521,13 +492,29 @@ class FullConstructor(SafeConstructor): return self.state_keys_blacklist_regexp def construct_python_str(self, node): - return self.construct_scalar(node).encode('utf-8') + return self.construct_scalar(node) def construct_python_unicode(self, node): return self.construct_scalar(node) + def construct_python_bytes(self, node): + try: + value = self.construct_scalar(node).encode('ascii') + except UnicodeEncodeError as exc: + raise ConstructorError(None, None, + "failed to convert base64 data into ascii: %s" % exc, + node.start_mark) + try: + if hasattr(base64, 'decodebytes'): + return base64.decodebytes(value) + else: + return base64.decodestring(value) + except binascii.Error as exc: + raise ConstructorError(None, None, + "failed to decode base64 data: %s" % exc, node.start_mark) + def construct_python_long(self, node): - return long(self.construct_yaml_int(node)) + return self.construct_yaml_int(node) def construct_python_complex(self, node): return complex(self.construct_scalar(node)) @@ -542,57 +529,53 @@ class FullConstructor(SafeConstructor): if unsafe: try: __import__(name) - except ImportError, exc: + except ImportError as exc: raise ConstructorError("while constructing a Python module", mark, - "cannot find module %r (%s)" % (name.encode('utf-8'), exc), mark) + "cannot find module %r (%s)" % (name, exc), mark) if name not in sys.modules: raise ConstructorError("while constructing a Python module", mark, - "module %r is not imported" % name.encode('utf-8'), mark) + "module %r is not imported" % name, mark) return sys.modules[name] def find_python_name(self, name, mark, unsafe=False): if not name: raise ConstructorError("while constructing a Python object", mark, "expected non-empty name appended to the tag", mark) - if u'.' in name: + if '.' in name: module_name, object_name = name.rsplit('.', 1) else: - module_name = '__builtin__' + module_name = 'builtins' object_name = name if unsafe: try: __import__(module_name) - except ImportError, exc: + except ImportError as exc: raise ConstructorError("while constructing a Python object", mark, - "cannot find module %r (%s)" % (module_name.encode('utf-8'), exc), mark) + "cannot find module %r (%s)" % (module_name, exc), mark) if module_name not in sys.modules: raise ConstructorError("while constructing a Python object", mark, - "module %r is not imported" % module_name.encode('utf-8'), mark) + "module %r is not imported" % module_name, mark) module = sys.modules[module_name] if not hasattr(module, object_name): raise ConstructorError("while constructing a Python object", mark, - "cannot find %r in the module %r" % (object_name.encode('utf-8'), - module.__name__), mark) + "cannot find %r in the module %r" + % (object_name, module.__name__), mark) return getattr(module, object_name) def construct_python_name(self, suffix, node): value = self.construct_scalar(node) if value: raise ConstructorError("while constructing a Python name", node.start_mark, - "expected the empty value, but found %r" % value.encode('utf-8'), - node.start_mark) + "expected the empty value, but found %r" % value, node.start_mark) return self.find_python_name(suffix, node.start_mark) def construct_python_module(self, suffix, node): value = self.construct_scalar(node) if value: raise ConstructorError("while constructing a Python module", node.start_mark, - "expected the empty value, but found %r" % value.encode('utf-8'), - node.start_mark) + "expected the empty value, but found %r" % value, node.start_mark) return self.find_python_module(suffix, node.start_mark) - class classobj: pass - def make_python_instance(self, suffix, node, args=None, kwds=None, newobj=False, unsafe=False): if not args: @@ -600,16 +583,11 @@ class FullConstructor(SafeConstructor): if not kwds: kwds = {} cls = self.find_python_name(suffix, node.start_mark) - if not (unsafe or isinstance(cls, type) or isinstance(cls, type(self.classobj))): + if not (unsafe or isinstance(cls, type)): raise ConstructorError("while constructing a Python instance", node.start_mark, "expected a class, but found %r" % type(cls), node.start_mark) - if newobj and isinstance(cls, type(self.classobj)) \ - and not args and not kwds: - instance = self.classobj() - instance.__class__ = cls - return instance - elif newobj and isinstance(cls, type): + if newobj and isinstance(cls, type): return cls.__new__(cls, *args, **kwds) else: return cls(*args, **kwds) @@ -681,51 +659,55 @@ class FullConstructor(SafeConstructor): return self.construct_python_object_apply(suffix, node, newobj=True) FullConstructor.add_constructor( - u'tag:yaml.org,2002:python/none', + 'tag:yaml.org,2002:python/none', FullConstructor.construct_yaml_null) FullConstructor.add_constructor( - u'tag:yaml.org,2002:python/bool', + 'tag:yaml.org,2002:python/bool', FullConstructor.construct_yaml_bool) FullConstructor.add_constructor( - u'tag:yaml.org,2002:python/str', + 'tag:yaml.org,2002:python/str', FullConstructor.construct_python_str) FullConstructor.add_constructor( - u'tag:yaml.org,2002:python/unicode', + 'tag:yaml.org,2002:python/unicode', FullConstructor.construct_python_unicode) FullConstructor.add_constructor( - u'tag:yaml.org,2002:python/int', + 'tag:yaml.org,2002:python/bytes', + FullConstructor.construct_python_bytes) + +FullConstructor.add_constructor( + 'tag:yaml.org,2002:python/int', FullConstructor.construct_yaml_int) FullConstructor.add_constructor( - u'tag:yaml.org,2002:python/long', + 'tag:yaml.org,2002:python/long', FullConstructor.construct_python_long) FullConstructor.add_constructor( - u'tag:yaml.org,2002:python/float', + 'tag:yaml.org,2002:python/float', FullConstructor.construct_yaml_float) FullConstructor.add_constructor( - u'tag:yaml.org,2002:python/complex', + 'tag:yaml.org,2002:python/complex', FullConstructor.construct_python_complex) FullConstructor.add_constructor( - u'tag:yaml.org,2002:python/list', + 'tag:yaml.org,2002:python/list', FullConstructor.construct_yaml_seq) FullConstructor.add_constructor( - u'tag:yaml.org,2002:python/tuple', + 'tag:yaml.org,2002:python/tuple', FullConstructor.construct_python_tuple) FullConstructor.add_constructor( - u'tag:yaml.org,2002:python/dict', + 'tag:yaml.org,2002:python/dict', FullConstructor.construct_yaml_map) FullConstructor.add_multi_constructor( - u'tag:yaml.org,2002:python/name:', + 'tag:yaml.org,2002:python/name:', FullConstructor.construct_python_name) class UnsafeConstructor(FullConstructor): @@ -745,19 +727,19 @@ class UnsafeConstructor(FullConstructor): instance, state, unsafe=True) UnsafeConstructor.add_multi_constructor( - u'tag:yaml.org,2002:python/module:', + 'tag:yaml.org,2002:python/module:', UnsafeConstructor.construct_python_module) UnsafeConstructor.add_multi_constructor( - u'tag:yaml.org,2002:python/object:', + 'tag:yaml.org,2002:python/object:', UnsafeConstructor.construct_python_object) UnsafeConstructor.add_multi_constructor( - u'tag:yaml.org,2002:python/object/new:', + 'tag:yaml.org,2002:python/object/new:', UnsafeConstructor.construct_python_object_new) UnsafeConstructor.add_multi_constructor( - u'tag:yaml.org,2002:python/object/apply:', + 'tag:yaml.org,2002:python/object/apply:', UnsafeConstructor.construct_python_object_apply) # Constructor is same as UnsafeConstructor. Need to leave this in place in case diff --git a/lib/yaml/cyaml.py b/lib/yaml/cyaml.py index 768b49d..0c21345 100644 --- a/lib/yaml/cyaml.py +++ b/lib/yaml/cyaml.py @@ -6,12 +6,12 @@ __all__ = [ from yaml._yaml import CParser, CEmitter -from constructor import * +from .constructor import * -from serializer import * -from representer import * +from .serializer import * +from .representer import * -from resolver import * +from .resolver import * class CBaseLoader(CParser, BaseConstructor, BaseResolver): diff --git a/lib/yaml/dumper.py b/lib/yaml/dumper.py index f9cd49f..6aadba5 100644 --- a/lib/yaml/dumper.py +++ b/lib/yaml/dumper.py @@ -1,10 +1,10 @@ __all__ = ['BaseDumper', 'SafeDumper', 'Dumper'] -from emitter import * -from serializer import * -from representer import * -from resolver import * +from .emitter import * +from .serializer import * +from .representer import * +from .resolver import * class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseResolver): diff --git a/lib/yaml/emitter.py b/lib/yaml/emitter.py index 23c25ca..a664d01 100644 --- a/lib/yaml/emitter.py +++ b/lib/yaml/emitter.py @@ -8,17 +8,13 @@ __all__ = ['Emitter', 'EmitterError'] -import sys - -from error import YAMLError -from events import * - -has_ucs4 = sys.maxunicode > 0xffff +from .error import YAMLError +from .events import * class EmitterError(YAMLError): pass -class ScalarAnalysis(object): +class ScalarAnalysis: def __init__(self, scalar, empty, multiline, allow_flow_plain, allow_block_plain, allow_single_quoted, allow_double_quoted, @@ -32,11 +28,11 @@ class ScalarAnalysis(object): self.allow_double_quoted = allow_double_quoted self.allow_block = allow_block -class Emitter(object): +class Emitter: DEFAULT_TAG_PREFIXES = { - u'!' : u'!', - u'tag:yaml.org,2002:' : u'!!', + '!' : '!', + 'tag:yaml.org,2002:' : '!!', } def __init__(self, stream, canonical=None, indent=None, width=None, @@ -92,8 +88,8 @@ class Emitter(object): self.best_width = 80 if width and width > self.best_indent*2: self.best_width = width - self.best_line_break = u'\n' - if line_break in [u'\r', u'\n', u'\r\n']: + self.best_line_break = '\n' + if line_break in ['\r', '\n', '\r\n']: self.best_line_break = line_break # Tag prefixes. @@ -163,7 +159,7 @@ class Emitter(object): def expect_stream_start(self): if isinstance(self.event, StreamStartEvent): - if self.event.encoding and not getattr(self.stream, 'encoding', None): + if self.event.encoding and not hasattr(self.stream, 'encoding'): self.encoding = self.event.encoding self.write_stream_start() self.state = self.expect_first_document_start @@ -182,15 +178,14 @@ class Emitter(object): def expect_document_start(self, first=False): if isinstance(self.event, DocumentStartEvent): if (self.event.version or self.event.tags) and self.open_ended: - self.write_indicator(u'...', True) + self.write_indicator('...', True) self.write_indent() if self.event.version: version_text = self.prepare_version(self.event.version) self.write_version_directive(version_text) self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy() if self.event.tags: - handles = self.event.tags.keys() - handles.sort() + handles = sorted(self.event.tags.keys()) for handle in handles: prefix = self.event.tags[handle] self.tag_prefixes[prefix] = handle @@ -202,13 +197,13 @@ class Emitter(object): and not self.check_empty_document()) if not implicit: self.write_indent() - self.write_indicator(u'---', True) + self.write_indicator('---', True) if self.canonical: self.write_indent() self.state = self.expect_document_root elif isinstance(self.event, StreamEndEvent): if self.open_ended: - self.write_indicator(u'...', True) + self.write_indicator('...', True) self.write_indent() self.write_stream_end() self.state = self.expect_nothing @@ -220,7 +215,7 @@ class Emitter(object): if isinstance(self.event, DocumentEndEvent): self.write_indent() if self.event.explicit: - self.write_indicator(u'...', True) + self.write_indicator('...', True) self.write_indent() self.flush_stream() self.state = self.expect_document_start @@ -243,7 +238,7 @@ class Emitter(object): if isinstance(self.event, AliasEvent): self.expect_alias() elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)): - self.process_anchor(u'&') + self.process_anchor('&') self.process_tag() if isinstance(self.event, ScalarEvent): self.expect_scalar() @@ -265,7 +260,7 @@ class Emitter(object): def expect_alias(self): if self.event.anchor is None: raise EmitterError("anchor is not specified for alias") - self.process_anchor(u'*') + self.process_anchor('*') self.state = self.states.pop() def expect_scalar(self): @@ -277,7 +272,7 @@ class Emitter(object): # Flow sequence handlers. def expect_flow_sequence(self): - self.write_indicator(u'[', True, whitespace=True) + self.write_indicator('[', True, whitespace=True) self.flow_level += 1 self.increase_indent(flow=True) self.state = self.expect_first_flow_sequence_item @@ -286,7 +281,7 @@ class Emitter(object): if isinstance(self.event, SequenceEndEvent): self.indent = self.indents.pop() self.flow_level -= 1 - self.write_indicator(u']', False) + self.write_indicator(']', False) self.state = self.states.pop() else: if self.canonical or self.column > self.best_width: @@ -299,12 +294,12 @@ class Emitter(object): self.indent = self.indents.pop() self.flow_level -= 1 if self.canonical: - self.write_indicator(u',', False) + self.write_indicator(',', False) self.write_indent() - self.write_indicator(u']', False) + self.write_indicator(']', False) self.state = self.states.pop() else: - self.write_indicator(u',', False) + self.write_indicator(',', False) if self.canonical or self.column > self.best_width: self.write_indent() self.states.append(self.expect_flow_sequence_item) @@ -313,7 +308,7 @@ class Emitter(object): # Flow mapping handlers. def expect_flow_mapping(self): - self.write_indicator(u'{', True, whitespace=True) + self.write_indicator('{', True, whitespace=True) self.flow_level += 1 self.increase_indent(flow=True) self.state = self.expect_first_flow_mapping_key @@ -322,7 +317,7 @@ class Emitter(object): if isinstance(self.event, MappingEndEvent): self.indent = self.indents.pop() self.flow_level -= 1 - self.write_indicator(u'}', False) + self.write_indicator('}', False) self.state = self.states.pop() else: if self.canonical or self.column > self.best_width: @@ -331,7 +326,7 @@ class Emitter(object): self.states.append(self.expect_flow_mapping_simple_value) self.expect_node(mapping=True, simple_key=True) else: - self.write_indicator(u'?', True) + self.write_indicator('?', True) self.states.append(self.expect_flow_mapping_value) self.expect_node(mapping=True) @@ -340,31 +335,31 @@ class Emitter(object): self.indent = self.indents.pop() self.flow_level -= 1 if self.canonical: - self.write_indicator(u',', False) + self.write_indicator(',', False) self.write_indent() - self.write_indicator(u'}', False) + self.write_indicator('}', False) self.state = self.states.pop() else: - self.write_indicator(u',', False) + self.write_indicator(',', False) if self.canonical or self.column > self.best_width: self.write_indent() if not self.canonical and self.check_simple_key(): self.states.append(self.expect_flow_mapping_simple_value) self.expect_node(mapping=True, simple_key=True) else: - self.write_indicator(u'?', True) + self.write_indicator('?', True) self.states.append(self.expect_flow_mapping_value) self.expect_node(mapping=True) def expect_flow_mapping_simple_value(self): - self.write_indicator(u':', False) + self.write_indicator(':', False) self.states.append(self.expect_flow_mapping_key) self.expect_node(mapping=True) def expect_flow_mapping_value(self): if self.canonical or self.column > self.best_width: self.write_indent() - self.write_indicator(u':', True) + self.write_indicator(':', True) self.states.append(self.expect_flow_mapping_key) self.expect_node(mapping=True) @@ -384,7 +379,7 @@ class Emitter(object): self.state = self.states.pop() else: self.write_indent() - self.write_indicator(u'-', True, indention=True) + self.write_indicator('-', True, indention=True) self.states.append(self.expect_block_sequence_item) self.expect_node(sequence=True) @@ -407,18 +402,18 @@ class Emitter(object): self.states.append(self.expect_block_mapping_simple_value) self.expect_node(mapping=True, simple_key=True) else: - self.write_indicator(u'?', True, indention=True) + self.write_indicator('?', True, indention=True) self.states.append(self.expect_block_mapping_value) self.expect_node(mapping=True) def expect_block_mapping_simple_value(self): - self.write_indicator(u':', False) + self.write_indicator(':', False) self.states.append(self.expect_block_mapping_key) self.expect_node(mapping=True) def expect_block_mapping_value(self): self.write_indent() - self.write_indicator(u':', True, indention=True) + self.write_indicator(':', True, indention=True) self.states.append(self.expect_block_mapping_key) self.expect_node(mapping=True) @@ -437,7 +432,7 @@ class Emitter(object): return False event = self.events[0] return (isinstance(event, ScalarEvent) and event.anchor is None - and event.tag is None and event.implicit and event.value == u'') + and event.tag is None and event.implicit and event.value == '') def check_simple_key(self): length = 0 @@ -482,7 +477,7 @@ class Emitter(object): self.prepared_tag = None return if self.event.implicit[0] and tag is None: - tag = u'!' + tag = '!' self.prepared_tag = None else: if (not self.canonical or tag is None) and self.event.implicit: @@ -545,19 +540,18 @@ class Emitter(object): major, minor = version if major != 1: raise EmitterError("unsupported YAML version: %d.%d" % (major, minor)) - return u'%d.%d' % (major, minor) + return '%d.%d' % (major, minor) def prepare_tag_handle(self, handle): if not handle: raise EmitterError("tag handle must not be empty") - if handle[0] != u'!' or handle[-1] != u'!': - raise EmitterError("tag handle must start and end with '!': %r" - % (handle.encode('utf-8'))) + if handle[0] != '!' or handle[-1] != '!': + raise EmitterError("tag handle must start and end with '!': %r" % handle) for ch in handle[1:-1]: - if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-_'): + if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_'): raise EmitterError("invalid character %r in the tag handle: %r" - % (ch.encode('utf-8'), handle.encode('utf-8'))) + % (ch, handle)) return handle def prepare_tag_prefix(self, prefix): @@ -565,12 +559,12 @@ class Emitter(object): raise EmitterError("tag prefix must not be empty") chunks = [] start = end = 0 - if prefix[0] == u'!': + if prefix[0] == '!': end = 1 while end < len(prefix): ch = prefix[end] - if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-;/?!:@&=+$,_.~*\'()[]': + if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-;/?!:@&=+$,_.~*\'()[]': end += 1 else: if start < end: @@ -578,32 +572,31 @@ class Emitter(object): start = end = end+1 data = ch.encode('utf-8') for ch in data: - chunks.append(u'%%%02X' % ord(ch)) + chunks.append('%%%02X' % ord(ch)) if start < end: chunks.append(prefix[start:end]) - return u''.join(chunks) + return ''.join(chunks) def prepare_tag(self, tag): if not tag: raise EmitterError("tag must not be empty") - if tag == u'!': + if tag == '!': return tag handle = None suffix = tag - prefixes = self.tag_prefixes.keys() - prefixes.sort() + prefixes = sorted(self.tag_prefixes.keys()) for prefix in prefixes: if tag.startswith(prefix) \ - and (prefix == u'!' or len(prefix) < len(tag)): + and (prefix == '!' or len(prefix) < len(tag)): handle = self.tag_prefixes[prefix] suffix = tag[len(prefix):] chunks = [] start = end = 0 while end < len(suffix): ch = suffix[end] - if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-;/?:@&=+$,_.~*\'()[]' \ - or (ch == u'!' and handle != u'!'): + if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-;/?:@&=+$,_.~*\'()[]' \ + or (ch == '!' and handle != '!'): end += 1 else: if start < end: @@ -611,23 +604,23 @@ class Emitter(object): start = end = end+1 data = ch.encode('utf-8') for ch in data: - chunks.append(u'%%%02X' % ord(ch)) + chunks.append('%%%02X' % ch) if start < end: chunks.append(suffix[start:end]) - suffix_text = u''.join(chunks) + suffix_text = ''.join(chunks) if handle: - return u'%s%s' % (handle, suffix_text) + return '%s%s' % (handle, suffix_text) else: - return u'!<%s>' % suffix_text + return '!<%s>' % suffix_text def prepare_anchor(self, anchor): if not anchor: raise EmitterError("anchor must not be empty") for ch in anchor: - if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-_'): + if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_'): raise EmitterError("invalid character %r in the anchor: %r" - % (ch.encode('utf-8'), anchor.encode('utf-8'))) + % (ch, anchor)) return anchor def analyze_scalar(self, scalar): @@ -654,7 +647,7 @@ class Emitter(object): space_break = False # Check document indicators. - if scalar.startswith(u'---') or scalar.startswith(u'...'): + if scalar.startswith('---') or scalar.startswith('...'): block_indicators = True flow_indicators = True @@ -663,7 +656,7 @@ class Emitter(object): # Last character or followed by a whitespace. followed_by_whitespace = (len(scalar) == 1 or - scalar[1] in u'\0 \t\r\n\x85\u2028\u2029') + scalar[1] in '\0 \t\r\n\x85\u2028\u2029') # The previous character is a space. previous_space = False @@ -678,35 +671,35 @@ class Emitter(object): # Check for indicators. if index == 0: # Leading indicators are special characters. - if ch in u'#,[]{}&*!|>\'\"%@`': + if ch in '#,[]{}&*!|>\'\"%@`': flow_indicators = True block_indicators = True - if ch in u'?:': + if ch in '?:': flow_indicators = True if followed_by_whitespace: block_indicators = True - if ch == u'-' and followed_by_whitespace: + if ch == '-' and followed_by_whitespace: flow_indicators = True block_indicators = True else: # Some indicators cannot appear within a scalar as well. - if ch in u',?[]{}': + if ch in ',?[]{}': flow_indicators = True - if ch == u':': + if ch == ':': flow_indicators = True if followed_by_whitespace: block_indicators = True - if ch == u'#' and preceded_by_whitespace: + if ch == '#' and preceded_by_whitespace: flow_indicators = True block_indicators = True # Check for line breaks, special, and unicode characters. - if ch in u'\n\x85\u2028\u2029': + if ch in '\n\x85\u2028\u2029': line_breaks = True - if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'): - if (ch == u'\x85' or u'\xA0' <= ch <= u'\uD7FF' - or u'\uE000' <= ch <= u'\uFFFD' - or (u'\U00010000' <= ch < u'\U0010ffff')) and ch != u'\uFEFF': + if not (ch == '\n' or '\x20' <= ch <= '\x7E'): + if (ch == '\x85' or '\xA0' <= ch <= '\uD7FF' + or '\uE000' <= ch <= '\uFFFD' + or '\U00010000' <= ch < '\U0010ffff') and ch != '\uFEFF': unicode_characters = True if not self.allow_unicode: special_characters = True @@ -714,7 +707,7 @@ class Emitter(object): special_characters = True # Detect important whitespace combinations. - if ch == u' ': + if ch == ' ': if index == 0: leading_space = True if index == len(scalar)-1: @@ -723,7 +716,7 @@ class Emitter(object): break_space = True previous_space = True previous_break = False - elif ch in u'\n\x85\u2028\u2029': + elif ch in '\n\x85\u2028\u2029': if index == 0: leading_break = True if index == len(scalar)-1: @@ -738,9 +731,9 @@ class Emitter(object): # Prepare for the next character. index += 1 - preceded_by_whitespace = (ch in u'\0 \t\r\n\x85\u2028\u2029') + preceded_by_whitespace = (ch in '\0 \t\r\n\x85\u2028\u2029') followed_by_whitespace = (index+1 >= len(scalar) or - scalar[index+1] in u'\0 \t\r\n\x85\u2028\u2029') + scalar[index+1] in '\0 \t\r\n\x85\u2028\u2029') # Let's decide what styles are allowed. allow_flow_plain = True @@ -799,7 +792,7 @@ class Emitter(object): def write_stream_start(self): # Write BOM if needed. if self.encoding and self.encoding.startswith('utf-16'): - self.stream.write(u'\uFEFF'.encode(self.encoding)) + self.stream.write('\uFEFF'.encode(self.encoding)) def write_stream_end(self): self.flush_stream() @@ -809,7 +802,7 @@ class Emitter(object): if self.whitespace or not need_whitespace: data = indicator else: - data = u' '+indicator + data = ' '+indicator self.whitespace = whitespace self.indention = self.indention and indention self.column += len(data) @@ -825,7 +818,7 @@ class Emitter(object): self.write_line_break() if self.column < indent: self.whitespace = True - data = u' '*(indent-self.column) + data = ' '*(indent-self.column) self.column = indent if self.encoding: data = data.encode(self.encoding) @@ -843,14 +836,14 @@ class Emitter(object): self.stream.write(data) def write_version_directive(self, version_text): - data = u'%%YAML %s' % version_text + data = '%%YAML %s' % version_text if self.encoding: data = data.encode(self.encoding) self.stream.write(data) self.write_line_break() def write_tag_directive(self, handle_text, prefix_text): - data = u'%%TAG %s %s' % (handle_text, prefix_text) + data = '%%TAG %s %s' % (handle_text, prefix_text) if self.encoding: data = data.encode(self.encoding) self.stream.write(data) @@ -859,7 +852,7 @@ class Emitter(object): # Scalar streams. def write_single_quoted(self, text, split=True): - self.write_indicator(u'\'', True) + self.write_indicator('\'', True) spaces = False breaks = False start = end = 0 @@ -868,7 +861,7 @@ class Emitter(object): if end < len(text): ch = text[end] if spaces: - if ch is None or ch != u' ': + if ch is None or ch != ' ': if start+1 == end and self.column > self.best_width and split \ and start != 0 and end != len(text): self.write_indent() @@ -880,18 +873,18 @@ class Emitter(object): self.stream.write(data) start = end elif breaks: - if ch is None or ch not in u'\n\x85\u2028\u2029': - if text[start] == u'\n': + if ch is None or ch not in '\n\x85\u2028\u2029': + if text[start] == '\n': self.write_line_break() for br in text[start:end]: - if br == u'\n': + if br == '\n': self.write_line_break() else: self.write_line_break(br) self.write_indent() start = end else: - if ch is None or ch in u' \n\x85\u2028\u2029' or ch == u'\'': + if ch is None or ch in ' \n\x85\u2028\u2029' or ch == '\'': if start < end: data = text[start:end] self.column += len(data) @@ -899,49 +892,49 @@ class Emitter(object): data = data.encode(self.encoding) self.stream.write(data) start = end - if ch == u'\'': - data = u'\'\'' + if ch == '\'': + data = '\'\'' self.column += 2 if self.encoding: data = data.encode(self.encoding) self.stream.write(data) start = end + 1 if ch is not None: - spaces = (ch == u' ') - breaks = (ch in u'\n\x85\u2028\u2029') + spaces = (ch == ' ') + breaks = (ch in '\n\x85\u2028\u2029') end += 1 - self.write_indicator(u'\'', False) + self.write_indicator('\'', False) ESCAPE_REPLACEMENTS = { - u'\0': u'0', - u'\x07': u'a', - u'\x08': u'b', - u'\x09': u't', - u'\x0A': u'n', - u'\x0B': u'v', - u'\x0C': u'f', - u'\x0D': u'r', - u'\x1B': u'e', - u'\"': u'\"', - u'\\': u'\\', - u'\x85': u'N', - u'\xA0': u'_', - u'\u2028': u'L', - u'\u2029': u'P', + '\0': '0', + '\x07': 'a', + '\x08': 'b', + '\x09': 't', + '\x0A': 'n', + '\x0B': 'v', + '\x0C': 'f', + '\x0D': 'r', + '\x1B': 'e', + '\"': '\"', + '\\': '\\', + '\x85': 'N', + '\xA0': '_', + '\u2028': 'L', + '\u2029': 'P', } def write_double_quoted(self, text, split=True): - self.write_indicator(u'"', True) + self.write_indicator('"', True) start = end = 0 while end <= len(text): ch = None if end < len(text): ch = text[end] - if ch is None or ch in u'"\\\x85\u2028\u2029\uFEFF' \ - or not (u'\x20' <= ch <= u'\x7E' + if ch is None or ch in '"\\\x85\u2028\u2029\uFEFF' \ + or not ('\x20' <= ch <= '\x7E' or (self.allow_unicode - and (u'\xA0' <= ch <= u'\uD7FF' - or u'\uE000' <= ch <= u'\uFFFD'))): + and ('\xA0' <= ch <= '\uD7FF' + or '\uE000' <= ch <= '\uFFFD'))): if start < end: data = text[start:end] self.column += len(data) @@ -951,21 +944,21 @@ class Emitter(object): start = end if ch is not None: if ch in self.ESCAPE_REPLACEMENTS: - data = u'\\'+self.ESCAPE_REPLACEMENTS[ch] - elif ch <= u'\xFF': - data = u'\\x%02X' % ord(ch) - elif ch <= u'\uFFFF': - data = u'\\u%04X' % ord(ch) + data = '\\'+self.ESCAPE_REPLACEMENTS[ch] + elif ch <= '\xFF': + data = '\\x%02X' % ord(ch) + elif ch <= '\uFFFF': + data = '\\u%04X' % ord(ch) else: - data = u'\\U%08X' % ord(ch) + data = '\\U%08X' % ord(ch) self.column += len(data) if self.encoding: data = data.encode(self.encoding) self.stream.write(data) start = end+1 - if 0 < end < len(text)-1 and (ch == u' ' or start >= end) \ + if 0 < end < len(text)-1 and (ch == ' ' or start >= end) \ and self.column+(end-start) > self.best_width and split: - data = text[start:end]+u'\\' + data = text[start:end]+'\\' if start < end: start = end self.column += len(data) @@ -975,30 +968,30 @@ class Emitter(object): self.write_indent() self.whitespace = False self.indention = False - if text[start] == u' ': - data = u'\\' + if text[start] == ' ': + data = '\\' self.column += len(data) if self.encoding: data = data.encode(self.encoding) self.stream.write(data) end += 1 - self.write_indicator(u'"', False) + self.write_indicator('"', False) def determine_block_hints(self, text): - hints = u'' + hints = '' if text: - if text[0] in u' \n\x85\u2028\u2029': - hints += unicode(self.best_indent) - if text[-1] not in u'\n\x85\u2028\u2029': - hints += u'-' - elif len(text) == 1 or text[-2] in u'\n\x85\u2028\u2029': - hints += u'+' + if text[0] in ' \n\x85\u2028\u2029': + hints += str(self.best_indent) + if text[-1] not in '\n\x85\u2028\u2029': + hints += '-' + elif len(text) == 1 or text[-2] in '\n\x85\u2028\u2029': + hints += '+' return hints def write_folded(self, text): hints = self.determine_block_hints(text) - self.write_indicator(u'>'+hints, True) - if hints[-1:] == u'+': + self.write_indicator('>'+hints, True) + if hints[-1:] == '+': self.open_ended = True self.write_line_break() leading_space = True @@ -1010,13 +1003,13 @@ class Emitter(object): if end < len(text): ch = text[end] if breaks: - if ch is None or ch not in u'\n\x85\u2028\u2029': - if not leading_space and ch is not None and ch != u' ' \ - and text[start] == u'\n': + if ch is None or ch not in '\n\x85\u2028\u2029': + if not leading_space and ch is not None and ch != ' ' \ + and text[start] == '\n': self.write_line_break() - leading_space = (ch == u' ') + leading_space = (ch == ' ') for br in text[start:end]: - if br == u'\n': + if br == '\n': self.write_line_break() else: self.write_line_break(br) @@ -1024,7 +1017,7 @@ class Emitter(object): self.write_indent() start = end elif spaces: - if ch != u' ': + if ch != ' ': if start+1 == end and self.column > self.best_width: self.write_indent() else: @@ -1035,7 +1028,7 @@ class Emitter(object): self.stream.write(data) start = end else: - if ch is None or ch in u' \n\x85\u2028\u2029': + if ch is None or ch in ' \n\x85\u2028\u2029': data = text[start:end] self.column += len(data) if self.encoding: @@ -1045,14 +1038,14 @@ class Emitter(object): self.write_line_break() start = end if ch is not None: - breaks = (ch in u'\n\x85\u2028\u2029') - spaces = (ch == u' ') + breaks = (ch in '\n\x85\u2028\u2029') + spaces = (ch == ' ') end += 1 def write_literal(self, text): hints = self.determine_block_hints(text) - self.write_indicator(u'|'+hints, True) - if hints[-1:] == u'+': + self.write_indicator('|'+hints, True) + if hints[-1:] == '+': self.open_ended = True self.write_line_break() breaks = True @@ -1062,9 +1055,9 @@ class Emitter(object): if end < len(text): ch = text[end] if breaks: - if ch is None or ch not in u'\n\x85\u2028\u2029': + if ch is None or ch not in '\n\x85\u2028\u2029': for br in text[start:end]: - if br == u'\n': + if br == '\n': self.write_line_break() else: self.write_line_break(br) @@ -1072,7 +1065,7 @@ class Emitter(object): self.write_indent() start = end else: - if ch is None or ch in u'\n\x85\u2028\u2029': + if ch is None or ch in '\n\x85\u2028\u2029': data = text[start:end] if self.encoding: data = data.encode(self.encoding) @@ -1081,7 +1074,7 @@ class Emitter(object): self.write_line_break() start = end if ch is not None: - breaks = (ch in u'\n\x85\u2028\u2029') + breaks = (ch in '\n\x85\u2028\u2029') end += 1 def write_plain(self, text, split=True): @@ -1090,7 +1083,7 @@ class Emitter(object): if not text: return if not self.whitespace: - data = u' ' + data = ' ' self.column += len(data) if self.encoding: data = data.encode(self.encoding) @@ -1105,7 +1098,7 @@ class Emitter(object): if end < len(text): ch = text[end] if spaces: - if ch != u' ': + if ch != ' ': if start+1 == end and self.column > self.best_width and split: self.write_indent() self.whitespace = False @@ -1118,11 +1111,11 @@ class Emitter(object): self.stream.write(data) start = end elif breaks: - if ch not in u'\n\x85\u2028\u2029': - if text[start] == u'\n': + if ch not in '\n\x85\u2028\u2029': + if text[start] == '\n': self.write_line_break() for br in text[start:end]: - if br == u'\n': + if br == '\n': self.write_line_break() else: self.write_line_break(br) @@ -1131,7 +1124,7 @@ class Emitter(object): self.indention = False start = end else: - if ch is None or ch in u' \n\x85\u2028\u2029': + if ch is None or ch in ' \n\x85\u2028\u2029': data = text[start:end] self.column += len(data) if self.encoding: @@ -1139,6 +1132,6 @@ class Emitter(object): self.stream.write(data) start = end if ch is not None: - spaces = (ch == u' ') - breaks = (ch in u'\n\x85\u2028\u2029') + spaces = (ch == ' ') + breaks = (ch in '\n\x85\u2028\u2029') end += 1 diff --git a/lib/yaml/error.py b/lib/yaml/error.py index 577686d..b796b4d 100644 --- a/lib/yaml/error.py +++ b/lib/yaml/error.py @@ -1,7 +1,7 @@ __all__ = ['Mark', 'YAMLError', 'MarkedYAMLError'] -class Mark(object): +class Mark: def __init__(self, name, index, line, column, buffer, pointer): self.name = name @@ -16,7 +16,7 @@ class Mark(object): return None head = '' start = self.pointer - while start > 0 and self.buffer[start-1] not in u'\0\r\n\x85\u2028\u2029': + while start > 0 and self.buffer[start-1] not in '\0\r\n\x85\u2028\u2029': start -= 1 if self.pointer-start > max_length/2-1: head = ' ... ' @@ -24,13 +24,13 @@ class Mark(object): break tail = '' end = self.pointer - while end < len(self.buffer) and self.buffer[end] not in u'\0\r\n\x85\u2028\u2029': + while end < len(self.buffer) and self.buffer[end] not in '\0\r\n\x85\u2028\u2029': end += 1 if end-self.pointer > max_length/2-1: tail = ' ... ' end -= 5 break - snippet = self.buffer[start:end].encode('utf-8') + snippet = self.buffer[start:end] return ' '*indent + head + snippet + tail + '\n' \ + ' '*(indent+self.pointer-start+len(head)) + '^' diff --git a/lib/yaml/loader.py b/lib/yaml/loader.py index 4d773c3..e90c112 100644 --- a/lib/yaml/loader.py +++ b/lib/yaml/loader.py @@ -1,12 +1,12 @@ __all__ = ['BaseLoader', 'FullLoader', 'SafeLoader', 'Loader', 'UnsafeLoader'] -from reader import * -from scanner import * -from parser import * -from composer import * -from constructor import * -from resolver import * +from .reader import * +from .scanner import * +from .parser import * +from .composer import * +from .constructor import * +from .resolver import * class BaseLoader(Reader, Scanner, Parser, Composer, BaseConstructor, BaseResolver): diff --git a/lib/yaml/parser.py b/lib/yaml/parser.py index f9e3057..13a5995 100644 --- a/lib/yaml/parser.py +++ b/lib/yaml/parser.py @@ -61,21 +61,21 @@ __all__ = ['Parser', 'ParserError'] -from error import MarkedYAMLError -from tokens import * -from events import * -from scanner import * +from .error import MarkedYAMLError +from .tokens import * +from .events import * +from .scanner import * class ParserError(MarkedYAMLError): pass -class Parser(object): +class Parser: # Since writing a recursive-descendant parser is a straightforward task, we # do not give many comments here. DEFAULT_TAGS = { - u'!': u'!', - u'!!': u'tag:yaml.org,2002:', + '!': '!', + '!!': 'tag:yaml.org,2002:', } def __init__(self): @@ -219,7 +219,7 @@ class Parser(object): self.tag_handles = {} while self.check_token(DirectiveToken): token = self.get_token() - if token.name == u'YAML': + if token.name == 'YAML': if self.yaml_version is not None: raise ParserError(None, None, "found duplicate YAML directive", token.start_mark) @@ -229,11 +229,11 @@ class Parser(object): "found incompatible YAML document (version 1.* is required)", token.start_mark) self.yaml_version = token.value - elif token.name == u'TAG': + elif token.name == 'TAG': handle, prefix = token.value if handle in self.tag_handles: raise ParserError(None, None, - "duplicate tag handle %r" % handle.encode('utf-8'), + "duplicate tag handle %r" % handle, token.start_mark) self.tag_handles[handle] = prefix if self.tag_handles: @@ -303,19 +303,19 @@ class Parser(object): if handle is not None: if handle not in self.tag_handles: raise ParserError("while parsing a node", start_mark, - "found undefined tag handle %r" % handle.encode('utf-8'), + "found undefined tag handle %r" % handle, tag_mark) tag = self.tag_handles[handle]+suffix else: tag = suffix - #if tag == u'!': + #if tag == '!': # raise ParserError("while parsing a node", start_mark, # "found non-specific tag '!'", tag_mark, # "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' and share your opinion.") if start_mark is None: start_mark = end_mark = self.peek_token().start_mark event = None - implicit = (tag is None or tag == u'!') + implicit = (tag is None or tag == '!') if indentless_sequence and self.check_token(BlockEntryToken): end_mark = self.peek_token().end_mark event = SequenceStartEvent(anchor, tag, implicit, @@ -325,7 +325,7 @@ class Parser(object): if self.check_token(ScalarToken): token = self.get_token() end_mark = token.end_mark - if (token.plain and tag is None) or tag == u'!': + if (token.plain and tag is None) or tag == '!': implicit = (True, False) elif tag is None: implicit = (False, True) @@ -357,7 +357,7 @@ class Parser(object): elif anchor is not None or tag is not None: # Empty scalars are allowed even if a tag or an anchor is # specified. - event = ScalarEvent(anchor, tag, (implicit, False), u'', + event = ScalarEvent(anchor, tag, (implicit, False), '', start_mark, end_mark) self.state = self.states.pop() else: @@ -585,5 +585,5 @@ class Parser(object): return self.process_empty_scalar(self.peek_token().start_mark) def process_empty_scalar(self, mark): - return ScalarEvent(None, None, (True, False), u'', mark, mark) + return ScalarEvent(None, None, (True, False), '', mark, mark) diff --git a/lib/yaml/reader.py b/lib/yaml/reader.py index 4c42150..774b021 100644 --- a/lib/yaml/reader.py +++ b/lib/yaml/reader.py @@ -17,11 +17,9 @@ __all__ = ['Reader', 'ReaderError'] -from error import YAMLError, Mark +from .error import YAMLError, Mark -import codecs, re, sys - -has_ucs4 = sys.maxunicode > 0xffff +import codecs, re class ReaderError(YAMLError): @@ -33,7 +31,7 @@ class ReaderError(YAMLError): self.reason = reason def __str__(self): - if isinstance(self.character, str): + if isinstance(self.character, bytes): return "'%s' codec can't decode byte #x%02x: %s\n" \ " in \"%s\", position %d" \ % (self.encoding, ord(self.character), self.reason, @@ -46,13 +44,13 @@ class ReaderError(YAMLError): class Reader(object): # Reader: - # - determines the data encoding and converts it to unicode, + # - determines the data encoding and converts it to a unicode string, # - checks if characters are in allowed range, # - adds '\0' to the end. # Reader accepts + # - a `bytes` object, # - a `str` object, - # - a `unicode` object, # - a file-like object with its `read` method returning `str`, # - a file-like object with its `read` method returning `unicode`. @@ -63,7 +61,7 @@ class Reader(object): self.stream = None self.stream_pointer = 0 self.eof = True - self.buffer = u'' + self.buffer = '' self.pointer = 0 self.raw_buffer = None self.raw_decode = None @@ -71,19 +69,19 @@ class Reader(object): self.index = 0 self.line = 0 self.column = 0 - if isinstance(stream, unicode): + if isinstance(stream, str): self.name = "" self.check_printable(stream) - self.buffer = stream+u'\0' - elif isinstance(stream, str): - self.name = "" + self.buffer = stream+'\0' + elif isinstance(stream, bytes): + self.name = "" self.raw_buffer = stream self.determine_encoding() else: self.stream = stream self.name = getattr(stream, 'name', "") self.eof = False - self.raw_buffer = '' + self.raw_buffer = None self.determine_encoding() def peek(self, index=0): @@ -105,11 +103,11 @@ class Reader(object): ch = self.buffer[self.pointer] self.pointer += 1 self.index += 1 - if ch in u'\n\x85\u2028\u2029' \ - or (ch == u'\r' and self.buffer[self.pointer] != u'\n'): + if ch in '\n\x85\u2028\u2029' \ + or (ch == '\r' and self.buffer[self.pointer] != '\n'): self.line += 1 self.column = 0 - elif ch != u'\uFEFF': + elif ch != '\uFEFF': self.column += 1 length -= 1 @@ -122,9 +120,9 @@ class Reader(object): None, None) def determine_encoding(self): - while not self.eof and len(self.raw_buffer) < 2: + while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2): self.update_raw() - if not isinstance(self.raw_buffer, unicode): + if isinstance(self.raw_buffer, bytes): if self.raw_buffer.startswith(codecs.BOM_UTF16_LE): self.raw_decode = codecs.utf_16_le_decode self.encoding = 'utf-16-le' @@ -136,15 +134,7 @@ class Reader(object): self.encoding = 'utf-8' self.update(1) - if has_ucs4: - NON_PRINTABLE = u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]' - elif sys.platform.startswith('java'): - # Jython doesn't support lone surrogates https://bugs.jython.org/issue2048 - NON_PRINTABLE = u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]' - else: - # Need to use eval here due to the above Jython issue - NON_PRINTABLE = eval(r"u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uFFFD]|(?:^|[^\uD800-\uDBFF])[\uDC00-\uDFFF]|[\uD800-\uDBFF](?:[^\uDC00-\uDFFF]|$)'") - NON_PRINTABLE = re.compile(NON_PRINTABLE) + NON_PRINTABLE = re.compile('[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]') def check_printable(self, data): match = self.NON_PRINTABLE.search(data) if match: @@ -165,8 +155,8 @@ class Reader(object): try: data, converted = self.raw_decode(self.raw_buffer, 'strict', self.eof) - except UnicodeDecodeError, exc: - character = exc.object[exc.start] + except UnicodeDecodeError as exc: + character = self.raw_buffer[exc.start] if self.stream is not None: position = self.stream_pointer-len(self.raw_buffer)+exc.start else: @@ -180,14 +170,16 @@ class Reader(object): self.buffer += data self.raw_buffer = self.raw_buffer[converted:] if self.eof: - self.buffer += u'\0' + self.buffer += '\0' self.raw_buffer = None break - def update_raw(self, size=1024): + def update_raw(self, size=4096): data = self.stream.read(size) - if data: - self.raw_buffer += data - self.stream_pointer += len(data) + if self.raw_buffer is None: + self.raw_buffer = data else: + self.raw_buffer += data + self.stream_pointer += len(data) + if not data: self.eof = True diff --git a/lib/yaml/representer.py b/lib/yaml/representer.py index 93e09b6..808ca06 100644 --- a/lib/yaml/representer.py +++ b/lib/yaml/representer.py @@ -2,26 +2,23 @@ __all__ = ['BaseRepresenter', 'SafeRepresenter', 'Representer', 'RepresenterError'] -from error import * +from .error import * +from .nodes import * -from nodes import * - -import datetime - -import copy_reg, types +import datetime, copyreg, types, base64, collections class RepresenterError(YAMLError): pass -class BaseRepresenter(object): +class BaseRepresenter: yaml_representers = {} yaml_multi_representers = {} def __init__(self, default_style=None, default_flow_style=False, sort_keys=True): self.default_style = default_style - self.default_flow_style = default_flow_style self.sort_keys = sort_keys + self.default_flow_style = default_flow_style self.represented_objects = {} self.object_keeper = [] self.alias_key = None @@ -33,12 +30,6 @@ class BaseRepresenter(object): self.object_keeper = [] self.alias_key = None - def get_classobj_bases(self, cls): - bases = [cls] - for base in cls.__bases__: - bases.extend(self.get_classobj_bases(base)) - return bases - def represent_data(self, data): if self.ignore_aliases(data): self.alias_key = None @@ -53,8 +44,6 @@ class BaseRepresenter(object): #self.represented_objects[alias_key] = None self.object_keeper.append(data) data_types = type(data).__mro__ - if type(data) is types.InstanceType: - data_types = self.get_classobj_bases(data.__class__)+list(data_types) if data_types[0] in self.yaml_representers: node = self.yaml_representers[data_types[0]](self, data) else: @@ -68,22 +57,22 @@ class BaseRepresenter(object): elif None in self.yaml_representers: node = self.yaml_representers[None](self, data) else: - node = ScalarNode(None, unicode(data)) + node = ScalarNode(None, str(data)) #if alias_key is not None: # self.represented_objects[alias_key] = node return node + @classmethod def add_representer(cls, data_type, representer): if not 'yaml_representers' in cls.__dict__: cls.yaml_representers = cls.yaml_representers.copy() cls.yaml_representers[data_type] = representer - add_representer = classmethod(add_representer) + @classmethod def add_multi_representer(cls, data_type, representer): if not 'yaml_multi_representers' in cls.__dict__: cls.yaml_multi_representers = cls.yaml_multi_representers.copy() cls.yaml_multi_representers[data_type] = representer - add_multi_representer = classmethod(add_multi_representer) def represent_scalar(self, tag, value, style=None): if style is None: @@ -118,9 +107,12 @@ class BaseRepresenter(object): self.represented_objects[self.alias_key] = node best_style = True if hasattr(mapping, 'items'): - mapping = mapping.items() + mapping = list(mapping.items()) if self.sort_keys: - mapping.sort() + try: + mapping = sorted(mapping) + except TypeError: + pass for item_key, item_value in mapping: node_key = self.represent_data(item_key) node_value = self.represent_data(item_value) @@ -146,44 +138,31 @@ class SafeRepresenter(BaseRepresenter): return True if isinstance(data, tuple) and data == (): return True - if isinstance(data, (str, unicode, bool, int, float)): + if isinstance(data, (str, bytes, bool, int, float)): return True def represent_none(self, data): - return self.represent_scalar(u'tag:yaml.org,2002:null', - u'null') + return self.represent_scalar('tag:yaml.org,2002:null', 'null') def represent_str(self, data): - tag = None - style = None - try: - data = unicode(data, 'ascii') - tag = u'tag:yaml.org,2002:str' - except UnicodeDecodeError: - try: - data = unicode(data, 'utf-8') - tag = u'tag:yaml.org,2002:str' - except UnicodeDecodeError: - data = data.encode('base64') - tag = u'tag:yaml.org,2002:binary' - style = '|' - return self.represent_scalar(tag, data, style=style) + return self.represent_scalar('tag:yaml.org,2002:str', data) - def represent_unicode(self, data): - return self.represent_scalar(u'tag:yaml.org,2002:str', data) + def represent_binary(self, data): + if hasattr(base64, 'encodebytes'): + data = base64.encodebytes(data).decode('ascii') + else: + data = base64.encodestring(data).decode('ascii') + return self.represent_scalar('tag:yaml.org,2002:binary', data, style='|') def represent_bool(self, data): if data: - value = u'true' + value = 'true' else: - value = u'false' - return self.represent_scalar(u'tag:yaml.org,2002:bool', value) + value = 'false' + return self.represent_scalar('tag:yaml.org,2002:bool', value) def represent_int(self, data): - return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(data)) - - def represent_long(self, data): - return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(data)) + return self.represent_scalar('tag:yaml.org,2002:int', str(data)) inf_value = 1e300 while repr(inf_value) != repr(inf_value*inf_value): @@ -191,13 +170,13 @@ class SafeRepresenter(BaseRepresenter): def represent_float(self, data): if data != data or (data == 0.0 and data == 1.0): - value = u'.nan' + value = '.nan' elif data == self.inf_value: - value = u'.inf' + value = '.inf' elif data == -self.inf_value: - value = u'-.inf' + value = '-.inf' else: - value = unicode(repr(data)).lower() + value = repr(data).lower() # Note that in some cases `repr(data)` represents a float number # without the decimal parts. For instance: # >>> repr(1e17) @@ -205,9 +184,9 @@ class SafeRepresenter(BaseRepresenter): # Unfortunately, this is not a valid float representation according # to the definition of the `!!float` tag. We fix this by adding # '.0' before the 'e' symbol. - if u'.' not in value and u'e' in value: - value = value.replace(u'e', u'.0e', 1) - return self.represent_scalar(u'tag:yaml.org,2002:float', value) + if '.' not in value and 'e' in value: + value = value.replace('e', '.0e', 1) + return self.represent_scalar('tag:yaml.org,2002:float', value) def represent_list(self, data): #pairs = (len(data) > 0 and isinstance(data, list)) @@ -217,7 +196,7 @@ class SafeRepresenter(BaseRepresenter): # pairs = False # break #if not pairs: - return self.represent_sequence(u'tag:yaml.org,2002:seq', data) + return self.represent_sequence('tag:yaml.org,2002:seq', data) #value = [] #for item_key, item_value in data: # value.append(self.represent_mapping(u'tag:yaml.org,2002:map', @@ -225,21 +204,21 @@ class SafeRepresenter(BaseRepresenter): #return SequenceNode(u'tag:yaml.org,2002:pairs', value) def represent_dict(self, data): - return self.represent_mapping(u'tag:yaml.org,2002:map', data) + return self.represent_mapping('tag:yaml.org,2002:map', data) def represent_set(self, data): value = {} for key in data: value[key] = None - return self.represent_mapping(u'tag:yaml.org,2002:set', value) + return self.represent_mapping('tag:yaml.org,2002:set', value) def represent_date(self, data): - value = unicode(data.isoformat()) - return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value) + value = data.isoformat() + return self.represent_scalar('tag:yaml.org,2002:timestamp', value) def represent_datetime(self, data): - value = unicode(data.isoformat(' ')) - return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value) + value = data.isoformat(' ') + return self.represent_scalar('tag:yaml.org,2002:timestamp', value) def represent_yaml_object(self, tag, data, cls, flow_style=None): if hasattr(data, '__getstate__'): @@ -257,8 +236,8 @@ SafeRepresenter.add_representer(type(None), SafeRepresenter.add_representer(str, SafeRepresenter.represent_str) -SafeRepresenter.add_representer(unicode, - SafeRepresenter.represent_unicode) +SafeRepresenter.add_representer(bytes, + SafeRepresenter.represent_binary) SafeRepresenter.add_representer(bool, SafeRepresenter.represent_bool) @@ -266,9 +245,6 @@ SafeRepresenter.add_representer(bool, SafeRepresenter.add_representer(int, SafeRepresenter.represent_int) -SafeRepresenter.add_representer(long, - SafeRepresenter.represent_long) - SafeRepresenter.add_representer(float, SafeRepresenter.represent_float) @@ -295,99 +271,27 @@ SafeRepresenter.add_representer(None, class Representer(SafeRepresenter): - def represent_str(self, data): - tag = None - style = None - try: - data = unicode(data, 'ascii') - tag = u'tag:yaml.org,2002:str' - except UnicodeDecodeError: - try: - data = unicode(data, 'utf-8') - tag = u'tag:yaml.org,2002:python/str' - except UnicodeDecodeError: - data = data.encode('base64') - tag = u'tag:yaml.org,2002:binary' - style = '|' - return self.represent_scalar(tag, data, style=style) - - def represent_unicode(self, data): - tag = None - try: - data.encode('ascii') - tag = u'tag:yaml.org,2002:python/unicode' - except UnicodeEncodeError: - tag = u'tag:yaml.org,2002:str' - return self.represent_scalar(tag, data) - - def represent_long(self, data): - tag = u'tag:yaml.org,2002:int' - if int(data) is not data: - tag = u'tag:yaml.org,2002:python/long' - return self.represent_scalar(tag, unicode(data)) - def represent_complex(self, data): if data.imag == 0.0: - data = u'%r' % data.real + data = '%r' % data.real elif data.real == 0.0: - data = u'%rj' % data.imag + data = '%rj' % data.imag elif data.imag > 0: - data = u'%r+%rj' % (data.real, data.imag) + data = '%r+%rj' % (data.real, data.imag) else: - data = u'%r%rj' % (data.real, data.imag) - return self.represent_scalar(u'tag:yaml.org,2002:python/complex', data) + data = '%r%rj' % (data.real, data.imag) + return self.represent_scalar('tag:yaml.org,2002:python/complex', data) def represent_tuple(self, data): - return self.represent_sequence(u'tag:yaml.org,2002:python/tuple', data) + return self.represent_sequence('tag:yaml.org,2002:python/tuple', data) def represent_name(self, data): - name = u'%s.%s' % (data.__module__, data.__name__) - return self.represent_scalar(u'tag:yaml.org,2002:python/name:'+name, u'') + name = '%s.%s' % (data.__module__, data.__name__) + return self.represent_scalar('tag:yaml.org,2002:python/name:'+name, '') def represent_module(self, data): return self.represent_scalar( - u'tag:yaml.org,2002:python/module:'+data.__name__, u'') - - def represent_instance(self, data): - # For instances of classic classes, we use __getinitargs__ and - # __getstate__ to serialize the data. - - # If data.__getinitargs__ exists, the object must be reconstructed by - # calling cls(**args), where args is a tuple returned by - # __getinitargs__. Otherwise, the cls.__init__ method should never be - # called and the class instance is created by instantiating a trivial - # class and assigning to the instance's __class__ variable. - - # If data.__getstate__ exists, it returns the state of the object. - # Otherwise, the state of the object is data.__dict__. - - # We produce either a !!python/object or !!python/object/new node. - # If data.__getinitargs__ does not exist and state is a dictionary, we - # produce a !!python/object node . Otherwise we produce a - # !!python/object/new node. - - cls = data.__class__ - class_name = u'%s.%s' % (cls.__module__, cls.__name__) - args = None - state = None - if hasattr(data, '__getinitargs__'): - args = list(data.__getinitargs__()) - if hasattr(data, '__getstate__'): - state = data.__getstate__() - else: - state = data.__dict__ - if args is None and isinstance(state, dict): - return self.represent_mapping( - u'tag:yaml.org,2002:python/object:'+class_name, state) - if isinstance(state, dict) and not state: - return self.represent_sequence( - u'tag:yaml.org,2002:python/object/new:'+class_name, args) - value = {} - if args: - value['args'] = args - value['state'] = state - return self.represent_mapping( - u'tag:yaml.org,2002:python/object/new:'+class_name, value) + 'tag:yaml.org,2002:python/module:'+data.__name__, '') def represent_object(self, data): # We use __reduce__ API to save the data. data.__reduce__ returns @@ -407,8 +311,8 @@ class Representer(SafeRepresenter): # !!python/object/apply node. cls = type(data) - if cls in copy_reg.dispatch_table: - reduce = copy_reg.dispatch_table[cls](data) + if cls in copyreg.dispatch_table: + reduce = copyreg.dispatch_table[cls](data) elif hasattr(data, '__reduce_ex__'): reduce = data.__reduce_ex__(2) elif hasattr(data, '__reduce__'): @@ -427,16 +331,16 @@ class Representer(SafeRepresenter): if function.__name__ == '__newobj__': function = args[0] args = args[1:] - tag = u'tag:yaml.org,2002:python/object/new:' + tag = 'tag:yaml.org,2002:python/object/new:' newobj = True else: - tag = u'tag:yaml.org,2002:python/object/apply:' + tag = 'tag:yaml.org,2002:python/object/apply:' newobj = False - function_name = u'%s.%s' % (function.__module__, function.__name__) + function_name = '%s.%s' % (function.__module__, function.__name__) if not args and not listitems and not dictitems \ and isinstance(state, dict) and newobj: return self.represent_mapping( - u'tag:yaml.org,2002:python/object:'+function_name, state) + 'tag:yaml.org,2002:python/object:'+function_name, state) if not listitems and not dictitems \ and isinstance(state, dict) and not state: return self.represent_sequence(tag+function_name, args) @@ -451,14 +355,13 @@ class Representer(SafeRepresenter): value['dictitems'] = dictitems return self.represent_mapping(tag+function_name, value) -Representer.add_representer(str, - Representer.represent_str) - -Representer.add_representer(unicode, - Representer.represent_unicode) - -Representer.add_representer(long, - Representer.represent_long) + def represent_ordered_dict(self, data): + # Provide uniform representation across different Python versions. + data_type = type(data) + tag = 'tag:yaml.org,2002:python/object/apply:%s.%s' \ + % (data_type.__module__, data_type.__name__) + items = [[key, value] for key, value in data.items()] + return self.represent_sequence(tag, [items]) Representer.add_representer(complex, Representer.represent_complex) @@ -466,11 +369,11 @@ Representer.add_representer(complex, Representer.add_representer(tuple, Representer.represent_tuple) -Representer.add_representer(type, +Representer.add_multi_representer(type, Representer.represent_name) -Representer.add_representer(types.ClassType, - Representer.represent_name) +Representer.add_representer(collections.OrderedDict, + Representer.represent_ordered_dict) Representer.add_representer(types.FunctionType, Representer.represent_name) @@ -481,9 +384,6 @@ Representer.add_representer(types.BuiltinFunctionType, Representer.add_representer(types.ModuleType, Representer.represent_module) -Representer.add_multi_representer(types.InstanceType, - Representer.represent_instance) - Representer.add_multi_representer(object, Representer.represent_object) diff --git a/lib/yaml/resolver.py b/lib/yaml/resolver.py index ba9aeab..3522bda 100644 --- a/lib/yaml/resolver.py +++ b/lib/yaml/resolver.py @@ -1,19 +1,19 @@ __all__ = ['BaseResolver', 'Resolver'] -from error import * -from nodes import * +from .error import * +from .nodes import * import re class ResolverError(YAMLError): pass -class BaseResolver(object): +class BaseResolver: - DEFAULT_SCALAR_TAG = u'tag:yaml.org,2002:str' - DEFAULT_SEQUENCE_TAG = u'tag:yaml.org,2002:seq' - DEFAULT_MAPPING_TAG = u'tag:yaml.org,2002:map' + DEFAULT_SCALAR_TAG = 'tag:yaml.org,2002:str' + DEFAULT_SEQUENCE_TAG = 'tag:yaml.org,2002:seq' + DEFAULT_MAPPING_TAG = 'tag:yaml.org,2002:map' yaml_implicit_resolvers = {} yaml_path_resolvers = {} @@ -22,6 +22,7 @@ class BaseResolver(object): self.resolver_exact_paths = [] self.resolver_prefix_paths = [] + @classmethod def add_implicit_resolver(cls, tag, regexp, first): if not 'yaml_implicit_resolvers' in cls.__dict__: implicit_resolvers = {} @@ -32,8 +33,8 @@ class BaseResolver(object): first = [None] for ch in first: cls.yaml_implicit_resolvers.setdefault(ch, []).append((tag, regexp)) - add_implicit_resolver = classmethod(add_implicit_resolver) + @classmethod def add_path_resolver(cls, tag, path, kind=None): # Note: `add_path_resolver` is experimental. The API could be changed. # `new_path` is a pattern that is matched against the path from the @@ -69,10 +70,10 @@ class BaseResolver(object): elif node_check is dict: node_check = MappingNode elif node_check not in [ScalarNode, SequenceNode, MappingNode] \ - and not isinstance(node_check, basestring) \ + and not isinstance(node_check, str) \ and node_check is not None: raise ResolverError("Invalid node checker: %s" % node_check) - if not isinstance(index_check, (basestring, int)) \ + if not isinstance(index_check, (str, int)) \ and index_check is not None: raise ResolverError("Invalid index checker: %s" % index_check) new_path.append((node_check, index_check)) @@ -86,7 +87,6 @@ class BaseResolver(object): and kind is not None: raise ResolverError("Invalid node kind: %s" % kind) cls.yaml_path_resolvers[tuple(new_path), kind] = tag - add_path_resolver = classmethod(add_path_resolver) def descend_resolver(self, current_node, current_index): if not self.yaml_path_resolvers: @@ -120,7 +120,7 @@ class BaseResolver(object): def check_resolver_prefix(self, depth, path, kind, current_node, current_index): node_check, index_check = path[depth-1] - if isinstance(node_check, basestring): + if isinstance(node_check, str): if current_node.tag != node_check: return elif node_check is not None: @@ -131,7 +131,7 @@ class BaseResolver(object): if (index_check is False or index_check is None) \ and current_index is None: return - if isinstance(index_check, basestring): + if isinstance(index_check, str): if not (isinstance(current_index, ScalarNode) and index_check == current_index.value): return @@ -142,8 +142,8 @@ class BaseResolver(object): def resolve(self, kind, value, implicit): if kind is ScalarNode and implicit[0]: - if value == u'': - resolvers = self.yaml_implicit_resolvers.get(u'', []) + if value == '': + resolvers = self.yaml_implicit_resolvers.get('', []) else: resolvers = self.yaml_implicit_resolvers.get(value[0], []) wildcard_resolvers = self.yaml_implicit_resolvers.get(None, []) @@ -168,60 +168,60 @@ class Resolver(BaseResolver): pass Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:bool', - re.compile(ur'''^(?:yes|Yes|YES|no|No|NO + 'tag:yaml.org,2002:bool', + re.compile(r'''^(?:yes|Yes|YES|no|No|NO |true|True|TRUE|false|False|FALSE |on|On|ON|off|Off|OFF)$''', re.X), - list(u'yYnNtTfFoO')) + list('yYnNtTfFoO')) Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:float', - re.compile(ur'''^(?:[-+]?(?:[0-9][0-9_]*)\.[0-9_]*(?:[eE][-+][0-9]+)? - |\.[0-9_]+(?:[eE][-+][0-9]+)? + 'tag:yaml.org,2002:float', + re.compile(r'''^(?:[-+]?(?:[0-9][0-9_]*)\.[0-9_]*(?:[eE][-+][0-9]+)? + |\.[0-9][0-9_]*(?:[eE][-+][0-9]+)? |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\.[0-9_]* |[-+]?\.(?:inf|Inf|INF) |\.(?:nan|NaN|NAN))$''', re.X), - list(u'-+0123456789.')) + list('-+0123456789.')) Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:int', - re.compile(ur'''^(?:[-+]?0b[0-1_]+ + 'tag:yaml.org,2002:int', + re.compile(r'''^(?:[-+]?0b[0-1_]+ |[-+]?0[0-7_]+ |[-+]?(?:0|[1-9][0-9_]*) |[-+]?0x[0-9a-fA-F_]+ |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$''', re.X), - list(u'-+0123456789')) + list('-+0123456789')) Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:merge', - re.compile(ur'^(?:<<)$'), - [u'<']) + 'tag:yaml.org,2002:merge', + re.compile(r'^(?:<<)$'), + ['<']) Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:null', - re.compile(ur'''^(?: ~ + 'tag:yaml.org,2002:null', + re.compile(r'''^(?: ~ |null|Null|NULL | )$''', re.X), - [u'~', u'n', u'N', u'']) + ['~', 'n', 'N', '']) Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:timestamp', - re.compile(ur'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] + 'tag:yaml.org,2002:timestamp', + re.compile(r'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] |[0-9][0-9][0-9][0-9] -[0-9][0-9]? -[0-9][0-9]? (?:[Tt]|[ \t]+)[0-9][0-9]? :[0-9][0-9] :[0-9][0-9] (?:\.[0-9]*)? (?:[ \t]*(?:Z|[-+][0-9][0-9]?(?::[0-9][0-9])?))?)$''', re.X), - list(u'0123456789')) + list('0123456789')) Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:value', - re.compile(ur'^(?:=)$'), - [u'=']) + 'tag:yaml.org,2002:value', + re.compile(r'^(?:=)$'), + ['=']) # The following resolver is only for documentation purposes. It cannot work # because plain scalars cannot start with '!', '&', or '*'. Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:yaml', - re.compile(ur'^(?:!|&|\*)$'), - list(u'!&*')) + 'tag:yaml.org,2002:yaml', + re.compile(r'^(?:!|&|\*)$'), + list('!&*')) diff --git a/lib/yaml/scanner.py b/lib/yaml/scanner.py index 098ea7b..de925b0 100644 --- a/lib/yaml/scanner.py +++ b/lib/yaml/scanner.py @@ -26,13 +26,13 @@ __all__ = ['Scanner', 'ScannerError'] -from error import MarkedYAMLError -from tokens import * +from .error import MarkedYAMLError +from .tokens import * class ScannerError(MarkedYAMLError): pass -class SimpleKey(object): +class SimpleKey: # See below simple keys treatment. def __init__(self, token_number, required, index, line, column, mark): @@ -43,7 +43,7 @@ class SimpleKey(object): self.column = column self.mark = mark -class Scanner(object): +class Scanner: def __init__(self): """Initialize the scanner.""" @@ -169,85 +169,85 @@ class Scanner(object): ch = self.peek() # Is it the end of stream? - if ch == u'\0': + if ch == '\0': return self.fetch_stream_end() # Is it a directive? - if ch == u'%' and self.check_directive(): + if ch == '%' and self.check_directive(): return self.fetch_directive() # Is it the document start? - if ch == u'-' and self.check_document_start(): + if ch == '-' and self.check_document_start(): return self.fetch_document_start() # Is it the document end? - if ch == u'.' and self.check_document_end(): + if ch == '.' and self.check_document_end(): return self.fetch_document_end() # TODO: support for BOM within a stream. - #if ch == u'\uFEFF': + #if ch == '\uFEFF': # return self.fetch_bom() <-- issue BOMToken # Note: the order of the following checks is NOT significant. # Is it the flow sequence start indicator? - if ch == u'[': + if ch == '[': return self.fetch_flow_sequence_start() # Is it the flow mapping start indicator? - if ch == u'{': + if ch == '{': return self.fetch_flow_mapping_start() # Is it the flow sequence end indicator? - if ch == u']': + if ch == ']': return self.fetch_flow_sequence_end() # Is it the flow mapping end indicator? - if ch == u'}': + if ch == '}': return self.fetch_flow_mapping_end() # Is it the flow entry indicator? - if ch == u',': + if ch == ',': return self.fetch_flow_entry() # Is it the block entry indicator? - if ch == u'-' and self.check_block_entry(): + if ch == '-' and self.check_block_entry(): return self.fetch_block_entry() # Is it the key indicator? - if ch == u'?' and self.check_key(): + if ch == '?' and self.check_key(): return self.fetch_key() # Is it the value indicator? - if ch == u':' and self.check_value(): + if ch == ':' and self.check_value(): return self.fetch_value() # Is it an alias? - if ch == u'*': + if ch == '*': return self.fetch_alias() # Is it an anchor? - if ch == u'&': + if ch == '&': return self.fetch_anchor() # Is it a tag? - if ch == u'!': + if ch == '!': return self.fetch_tag() # Is it a literal scalar? - if ch == u'|' and not self.flow_level: + if ch == '|' and not self.flow_level: return self.fetch_literal() # Is it a folded scalar? - if ch == u'>' and not self.flow_level: + if ch == '>' and not self.flow_level: return self.fetch_folded() # Is it a single quoted scalar? - if ch == u'\'': + if ch == '\'': return self.fetch_single() # Is it a double quoted scalar? - if ch == u'\"': + if ch == '\"': return self.fetch_double() # It must be a plain scalar then. @@ -256,8 +256,8 @@ class Scanner(object): # No? It's an error. Let's produce a nice error message. raise ScannerError("while scanning for the next token", None, - "found character %r that cannot start any token" - % ch.encode('utf-8'), self.get_mark()) + "found character %r that cannot start any token" % ch, + self.get_mark()) # Simple keys treatment. @@ -283,7 +283,7 @@ class Scanner(object): # - should be no longer than 1024 characters. # Disabling this procedure will allow simple keys of any length and # height (may cause problems if indentation is broken though). - for level in self.possible_simple_keys.keys(): + for level in list(self.possible_simple_keys): key = self.possible_simple_keys[level] if key.line != self.line \ or self.index-key.index > 1024: @@ -691,22 +691,22 @@ class Scanner(object): # DOCUMENT-START: ^ '---' (' '|'\n') if self.column == 0: - if self.prefix(3) == u'---' \ - and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + if self.prefix(3) == '---' \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': return True def check_document_end(self): # DOCUMENT-END: ^ '...' (' '|'\n') if self.column == 0: - if self.prefix(3) == u'...' \ - and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + if self.prefix(3) == '...' \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': return True def check_block_entry(self): # BLOCK-ENTRY: '-' (' '|'\n') - return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' + return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' def check_key(self): @@ -716,7 +716,7 @@ class Scanner(object): # KEY(block context): '?' (' '|'\n') else: - return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' + return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' def check_value(self): @@ -726,7 +726,7 @@ class Scanner(object): # VALUE(block context): ':' (' '|'\n') else: - return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' + return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' def check_plain(self): @@ -743,9 +743,9 @@ class Scanner(object): # '-' character) because we want the flow context to be space # independent. ch = self.peek() - return ch not in u'\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \ - or (self.peek(1) not in u'\0 \t\r\n\x85\u2028\u2029' - and (ch == u'-' or (not self.flow_level and ch in u'?:'))) + return ch not in '\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \ + or (self.peek(1) not in '\0 \t\r\n\x85\u2028\u2029' + and (ch == '-' or (not self.flow_level and ch in '?:'))) # Scanners. @@ -769,14 +769,14 @@ class Scanner(object): # `unwind_indent` before issuing BLOCK-END. # Scanners for block, flow, and plain scalars need to be modified. - if self.index == 0 and self.peek() == u'\uFEFF': + if self.index == 0 and self.peek() == '\uFEFF': self.forward() found = False while not found: - while self.peek() == u' ': + while self.peek() == ' ': self.forward() - if self.peek() == u'#': - while self.peek() not in u'\0\r\n\x85\u2028\u2029': + if self.peek() == '#': + while self.peek() not in '\0\r\n\x85\u2028\u2029': self.forward() if self.scan_line_break(): if not self.flow_level: @@ -790,15 +790,15 @@ class Scanner(object): self.forward() name = self.scan_directive_name(start_mark) value = None - if name == u'YAML': + if name == 'YAML': value = self.scan_yaml_directive_value(start_mark) end_mark = self.get_mark() - elif name == u'TAG': + elif name == 'TAG': value = self.scan_tag_directive_value(start_mark) end_mark = self.get_mark() else: end_mark = self.get_mark() - while self.peek() not in u'\0\r\n\x85\u2028\u2029': + while self.peek() not in '\0\r\n\x85\u2028\u2029': self.forward() self.scan_directive_ignored_line(start_mark) return DirectiveToken(name, value, start_mark, end_mark) @@ -807,51 +807,48 @@ class Scanner(object): # See the specification for details. length = 0 ch = self.peek(length) - while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-_': + while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_': length += 1 ch = self.peek(length) if not length: raise ScannerError("while scanning a directive", start_mark, "expected alphabetic or numeric character, but found %r" - % ch.encode('utf-8'), self.get_mark()) + % ch, self.get_mark()) value = self.prefix(length) self.forward(length) ch = self.peek() - if ch not in u'\0 \r\n\x85\u2028\u2029': + if ch not in '\0 \r\n\x85\u2028\u2029': raise ScannerError("while scanning a directive", start_mark, "expected alphabetic or numeric character, but found %r" - % ch.encode('utf-8'), self.get_mark()) + % ch, self.get_mark()) return value def scan_yaml_directive_value(self, start_mark): # See the specification for details. - while self.peek() == u' ': + while self.peek() == ' ': self.forward() major = self.scan_yaml_directive_number(start_mark) if self.peek() != '.': raise ScannerError("while scanning a directive", start_mark, - "expected a digit or '.', but found %r" - % self.peek().encode('utf-8'), + "expected a digit or '.', but found %r" % self.peek(), self.get_mark()) self.forward() minor = self.scan_yaml_directive_number(start_mark) - if self.peek() not in u'\0 \r\n\x85\u2028\u2029': + if self.peek() not in '\0 \r\n\x85\u2028\u2029': raise ScannerError("while scanning a directive", start_mark, - "expected a digit or ' ', but found %r" - % self.peek().encode('utf-8'), + "expected a digit or ' ', but found %r" % self.peek(), self.get_mark()) return (major, minor) def scan_yaml_directive_number(self, start_mark): # See the specification for details. ch = self.peek() - if not (u'0' <= ch <= u'9'): + if not ('0' <= ch <= '9'): raise ScannerError("while scanning a directive", start_mark, - "expected a digit, but found %r" % ch.encode('utf-8'), - self.get_mark()) + "expected a digit, but found %r" % ch, self.get_mark()) length = 0 - while u'0' <= self.peek(length) <= u'9': + while '0' <= self.peek(length) <= '9': length += 1 value = int(self.prefix(length)) self.forward(length) @@ -859,10 +856,10 @@ class Scanner(object): def scan_tag_directive_value(self, start_mark): # See the specification for details. - while self.peek() == u' ': + while self.peek() == ' ': self.forward() handle = self.scan_tag_directive_handle(start_mark) - while self.peek() == u' ': + while self.peek() == ' ': self.forward() prefix = self.scan_tag_directive_prefix(start_mark) return (handle, prefix) @@ -871,34 +868,32 @@ class Scanner(object): # See the specification for details. value = self.scan_tag_handle('directive', start_mark) ch = self.peek() - if ch != u' ': + if ch != ' ': raise ScannerError("while scanning a directive", start_mark, - "expected ' ', but found %r" % ch.encode('utf-8'), - self.get_mark()) + "expected ' ', but found %r" % ch, self.get_mark()) return value def scan_tag_directive_prefix(self, start_mark): # See the specification for details. value = self.scan_tag_uri('directive', start_mark) ch = self.peek() - if ch not in u'\0 \r\n\x85\u2028\u2029': + if ch not in '\0 \r\n\x85\u2028\u2029': raise ScannerError("while scanning a directive", start_mark, - "expected ' ', but found %r" % ch.encode('utf-8'), - self.get_mark()) + "expected ' ', but found %r" % ch, self.get_mark()) return value def scan_directive_ignored_line(self, start_mark): # See the specification for details. - while self.peek() == u' ': + while self.peek() == ' ': self.forward() - if self.peek() == u'#': - while self.peek() not in u'\0\r\n\x85\u2028\u2029': + if self.peek() == '#': + while self.peek() not in '\0\r\n\x85\u2028\u2029': self.forward() ch = self.peek() - if ch not in u'\0\r\n\x85\u2028\u2029': + if ch not in '\0\r\n\x85\u2028\u2029': raise ScannerError("while scanning a directive", start_mark, "expected a comment or a line break, but found %r" - % ch.encode('utf-8'), self.get_mark()) + % ch, self.get_mark()) self.scan_line_break() def scan_anchor(self, TokenClass): @@ -912,28 +907,28 @@ class Scanner(object): # Therefore we restrict aliases to numbers and ASCII letters. start_mark = self.get_mark() indicator = self.peek() - if indicator == u'*': + if indicator == '*': name = 'alias' else: name = 'anchor' self.forward() length = 0 ch = self.peek(length) - while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-_': + while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_': length += 1 ch = self.peek(length) if not length: raise ScannerError("while scanning an %s" % name, start_mark, "expected alphabetic or numeric character, but found %r" - % ch.encode('utf-8'), self.get_mark()) + % ch, self.get_mark()) value = self.prefix(length) self.forward(length) ch = self.peek() - if ch not in u'\0 \t\r\n\x85\u2028\u2029?:,]}%@`': + if ch not in '\0 \t\r\n\x85\u2028\u2029?:,]}%@`': raise ScannerError("while scanning an %s" % name, start_mark, "expected alphabetic or numeric character, but found %r" - % ch.encode('utf-8'), self.get_mark()) + % ch, self.get_mark()) end_mark = self.get_mark() return TokenClass(value, start_mark, end_mark) @@ -941,40 +936,39 @@ class Scanner(object): # See the specification for details. start_mark = self.get_mark() ch = self.peek(1) - if ch == u'<': + if ch == '<': handle = None self.forward(2) suffix = self.scan_tag_uri('tag', start_mark) - if self.peek() != u'>': + if self.peek() != '>': raise ScannerError("while parsing a tag", start_mark, - "expected '>', but found %r" % self.peek().encode('utf-8'), + "expected '>', but found %r" % self.peek(), self.get_mark()) self.forward() - elif ch in u'\0 \t\r\n\x85\u2028\u2029': + elif ch in '\0 \t\r\n\x85\u2028\u2029': handle = None - suffix = u'!' + suffix = '!' self.forward() else: length = 1 use_handle = False - while ch not in u'\0 \r\n\x85\u2028\u2029': - if ch == u'!': + while ch not in '\0 \r\n\x85\u2028\u2029': + if ch == '!': use_handle = True break length += 1 ch = self.peek(length) - handle = u'!' + handle = '!' if use_handle: handle = self.scan_tag_handle('tag', start_mark) else: - handle = u'!' + handle = '!' self.forward() suffix = self.scan_tag_uri('tag', start_mark) ch = self.peek() - if ch not in u'\0 \r\n\x85\u2028\u2029': + if ch not in '\0 \r\n\x85\u2028\u2029': raise ScannerError("while scanning a tag", start_mark, - "expected ' ', but found %r" % ch.encode('utf-8'), - self.get_mark()) + "expected ' ', but found %r" % ch, self.get_mark()) value = (handle, suffix) end_mark = self.get_mark() return TagToken(value, start_mark, end_mark) @@ -1005,39 +999,39 @@ class Scanner(object): else: indent = min_indent+increment-1 breaks, end_mark = self.scan_block_scalar_breaks(indent) - line_break = u'' + line_break = '' # Scan the inner part of the block scalar. - while self.column == indent and self.peek() != u'\0': + while self.column == indent and self.peek() != '\0': chunks.extend(breaks) - leading_non_space = self.peek() not in u' \t' + leading_non_space = self.peek() not in ' \t' length = 0 - while self.peek(length) not in u'\0\r\n\x85\u2028\u2029': + while self.peek(length) not in '\0\r\n\x85\u2028\u2029': length += 1 chunks.append(self.prefix(length)) self.forward(length) line_break = self.scan_line_break() breaks, end_mark = self.scan_block_scalar_breaks(indent) - if self.column == indent and self.peek() != u'\0': + if self.column == indent and self.peek() != '\0': # Unfortunately, folding rules are ambiguous. # # This is the folding according to the specification: - if folded and line_break == u'\n' \ - and leading_non_space and self.peek() not in u' \t': + if folded and line_break == '\n' \ + and leading_non_space and self.peek() not in ' \t': if not breaks: - chunks.append(u' ') + chunks.append(' ') else: chunks.append(line_break) # This is Clark Evans's interpretation (also in the spec # examples): # - #if folded and line_break == u'\n': + #if folded and line_break == '\n': # if not breaks: # if self.peek() not in ' \t': - # chunks.append(u' ') + # chunks.append(' ') # else: # chunks.append(line_break) #else: @@ -1052,7 +1046,7 @@ class Scanner(object): chunks.extend(breaks) # We are done. - return ScalarToken(u''.join(chunks), False, start_mark, end_mark, + return ScalarToken(''.join(chunks), False, start_mark, end_mark, style) def scan_block_scalar_indicators(self, start_mark): @@ -1060,21 +1054,21 @@ class Scanner(object): chomping = None increment = None ch = self.peek() - if ch in u'+-': + if ch in '+-': if ch == '+': chomping = True else: chomping = False self.forward() ch = self.peek() - if ch in u'0123456789': + if ch in '0123456789': increment = int(ch) if increment == 0: raise ScannerError("while scanning a block scalar", start_mark, "expected indentation indicator in the range 1-9, but found 0", self.get_mark()) self.forward() - elif ch in u'0123456789': + elif ch in '0123456789': increment = int(ch) if increment == 0: raise ScannerError("while scanning a block scalar", start_mark, @@ -1082,31 +1076,31 @@ class Scanner(object): self.get_mark()) self.forward() ch = self.peek() - if ch in u'+-': + if ch in '+-': if ch == '+': chomping = True else: chomping = False self.forward() ch = self.peek() - if ch not in u'\0 \r\n\x85\u2028\u2029': + if ch not in '\0 \r\n\x85\u2028\u2029': raise ScannerError("while scanning a block scalar", start_mark, "expected chomping or indentation indicators, but found %r" - % ch.encode('utf-8'), self.get_mark()) + % ch, self.get_mark()) return chomping, increment def scan_block_scalar_ignored_line(self, start_mark): # See the specification for details. - while self.peek() == u' ': + while self.peek() == ' ': self.forward() - if self.peek() == u'#': - while self.peek() not in u'\0\r\n\x85\u2028\u2029': + if self.peek() == '#': + while self.peek() not in '\0\r\n\x85\u2028\u2029': self.forward() ch = self.peek() - if ch not in u'\0\r\n\x85\u2028\u2029': + if ch not in '\0\r\n\x85\u2028\u2029': raise ScannerError("while scanning a block scalar", start_mark, - "expected a comment or a line break, but found %r" - % ch.encode('utf-8'), self.get_mark()) + "expected a comment or a line break, but found %r" % ch, + self.get_mark()) self.scan_line_break() def scan_block_scalar_indentation(self): @@ -1114,8 +1108,8 @@ class Scanner(object): chunks = [] max_indent = 0 end_mark = self.get_mark() - while self.peek() in u' \r\n\x85\u2028\u2029': - if self.peek() != u' ': + while self.peek() in ' \r\n\x85\u2028\u2029': + if self.peek() != ' ': chunks.append(self.scan_line_break()) end_mark = self.get_mark() else: @@ -1128,12 +1122,12 @@ class Scanner(object): # See the specification for details. chunks = [] end_mark = self.get_mark() - while self.column < indent and self.peek() == u' ': + while self.column < indent and self.peek() == ' ': self.forward() - while self.peek() in u'\r\n\x85\u2028\u2029': + while self.peek() in '\r\n\x85\u2028\u2029': chunks.append(self.scan_line_break()) end_mark = self.get_mark() - while self.column < indent and self.peek() == u' ': + while self.column < indent and self.peek() == ' ': self.forward() return chunks, end_mark @@ -1158,34 +1152,34 @@ class Scanner(object): chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) self.forward() end_mark = self.get_mark() - return ScalarToken(u''.join(chunks), False, start_mark, end_mark, + return ScalarToken(''.join(chunks), False, start_mark, end_mark, style) ESCAPE_REPLACEMENTS = { - u'0': u'\0', - u'a': u'\x07', - u'b': u'\x08', - u't': u'\x09', - u'\t': u'\x09', - u'n': u'\x0A', - u'v': u'\x0B', - u'f': u'\x0C', - u'r': u'\x0D', - u'e': u'\x1B', - u' ': u'\x20', - u'\"': u'\"', - u'\\': u'\\', - u'/': u'/', - u'N': u'\x85', - u'_': u'\xA0', - u'L': u'\u2028', - u'P': u'\u2029', + '0': '\0', + 'a': '\x07', + 'b': '\x08', + 't': '\x09', + '\t': '\x09', + 'n': '\x0A', + 'v': '\x0B', + 'f': '\x0C', + 'r': '\x0D', + 'e': '\x1B', + ' ': '\x20', + '\"': '\"', + '\\': '\\', + '/': '/', + 'N': '\x85', + '_': '\xA0', + 'L': '\u2028', + 'P': '\u2029', } ESCAPE_CODES = { - u'x': 2, - u'u': 4, - u'U': 8, + 'x': 2, + 'u': 4, + 'U': 8, } def scan_flow_scalar_non_spaces(self, double, start_mark): @@ -1193,19 +1187,19 @@ class Scanner(object): chunks = [] while True: length = 0 - while self.peek(length) not in u'\'\"\\\0 \t\r\n\x85\u2028\u2029': + while self.peek(length) not in '\'\"\\\0 \t\r\n\x85\u2028\u2029': length += 1 if length: chunks.append(self.prefix(length)) self.forward(length) ch = self.peek() - if not double and ch == u'\'' and self.peek(1) == u'\'': - chunks.append(u'\'') + if not double and ch == '\'' and self.peek(1) == '\'': + chunks.append('\'') self.forward(2) - elif (double and ch == u'\'') or (not double and ch in u'\"\\'): + elif (double and ch == '\'') or (not double and ch in '\"\\'): chunks.append(ch) self.forward() - elif double and ch == u'\\': + elif double and ch == '\\': self.forward() ch = self.peek() if ch in self.ESCAPE_REPLACEMENTS: @@ -1215,19 +1209,19 @@ class Scanner(object): length = self.ESCAPE_CODES[ch] self.forward() for k in range(length): - if self.peek(k) not in u'0123456789ABCDEFabcdef': + if self.peek(k) not in '0123456789ABCDEFabcdef': raise ScannerError("while scanning a double-quoted scalar", start_mark, - "expected escape sequence of %d hexdecimal numbers, but found %r" % - (length, self.peek(k).encode('utf-8')), self.get_mark()) + "expected escape sequence of %d hexadecimal numbers, but found %r" % + (length, self.peek(k)), self.get_mark()) code = int(self.prefix(length), 16) - chunks.append(unichr(code)) + chunks.append(chr(code)) self.forward(length) - elif ch in u'\r\n\x85\u2028\u2029': + elif ch in '\r\n\x85\u2028\u2029': self.scan_line_break() chunks.extend(self.scan_flow_scalar_breaks(double, start_mark)) else: raise ScannerError("while scanning a double-quoted scalar", start_mark, - "found unknown escape character %r" % ch.encode('utf-8'), self.get_mark()) + "found unknown escape character %r" % ch, self.get_mark()) else: return chunks @@ -1235,21 +1229,21 @@ class Scanner(object): # See the specification for details. chunks = [] length = 0 - while self.peek(length) in u' \t': + while self.peek(length) in ' \t': length += 1 whitespaces = self.prefix(length) self.forward(length) ch = self.peek() - if ch == u'\0': + if ch == '\0': raise ScannerError("while scanning a quoted scalar", start_mark, "found unexpected end of stream", self.get_mark()) - elif ch in u'\r\n\x85\u2028\u2029': + elif ch in '\r\n\x85\u2028\u2029': line_break = self.scan_line_break() breaks = self.scan_flow_scalar_breaks(double, start_mark) - if line_break != u'\n': + if line_break != '\n': chunks.append(line_break) elif not breaks: - chunks.append(u' ') + chunks.append(' ') chunks.extend(breaks) else: chunks.append(whitespaces) @@ -1262,13 +1256,13 @@ class Scanner(object): # Instead of checking indentation, we check for document # separators. prefix = self.prefix(3) - if (prefix == u'---' or prefix == u'...') \ - and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + if (prefix == '---' or prefix == '...') \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': raise ScannerError("while scanning a quoted scalar", start_mark, "found unexpected document separator", self.get_mark()) - while self.peek() in u' \t': + while self.peek() in ' \t': self.forward() - if self.peek() in u'\r\n\x85\u2028\u2029': + if self.peek() in '\r\n\x85\u2028\u2029': chunks.append(self.scan_line_break()) else: return chunks @@ -1290,15 +1284,15 @@ class Scanner(object): spaces = [] while True: length = 0 - if self.peek() == u'#': + if self.peek() == '#': break while True: ch = self.peek(length) - if ch in u'\0 \t\r\n\x85\u2028\u2029' \ - or (ch == u':' and - self.peek(length+1) in u'\0 \t\r\n\x85\u2028\u2029' + if ch in '\0 \t\r\n\x85\u2028\u2029' \ + or (ch == ':' and + self.peek(length+1) in '\0 \t\r\n\x85\u2028\u2029' + (u',[]{}' if self.flow_level else u''))\ - or (self.flow_level and ch in u',?[]{}'): + or (self.flow_level and ch in ',?[]{}'): break length += 1 if length == 0: @@ -1309,10 +1303,10 @@ class Scanner(object): self.forward(length) end_mark = self.get_mark() spaces = self.scan_plain_spaces(indent, start_mark) - if not spaces or self.peek() == u'#' \ + if not spaces or self.peek() == '#' \ or (not self.flow_level and self.column < indent): break - return ScalarToken(u''.join(chunks), True, start_mark, end_mark) + return ScalarToken(''.join(chunks), True, start_mark, end_mark) def scan_plain_spaces(self, indent, start_mark): # See the specification for details. @@ -1320,32 +1314,32 @@ class Scanner(object): # We just forbid them completely. Do not use tabs in YAML! chunks = [] length = 0 - while self.peek(length) in u' ': + while self.peek(length) in ' ': length += 1 whitespaces = self.prefix(length) self.forward(length) ch = self.peek() - if ch in u'\r\n\x85\u2028\u2029': + if ch in '\r\n\x85\u2028\u2029': line_break = self.scan_line_break() self.allow_simple_key = True prefix = self.prefix(3) - if (prefix == u'---' or prefix == u'...') \ - and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + if (prefix == '---' or prefix == '...') \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': return breaks = [] - while self.peek() in u' \r\n\x85\u2028\u2029': + while self.peek() in ' \r\n\x85\u2028\u2029': if self.peek() == ' ': self.forward() else: breaks.append(self.scan_line_break()) prefix = self.prefix(3) - if (prefix == u'---' or prefix == u'...') \ - and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + if (prefix == '---' or prefix == '...') \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': return - if line_break != u'\n': + if line_break != '\n': chunks.append(line_break) elif not breaks: - chunks.append(u' ') + chunks.append(' ') chunks.extend(breaks) elif whitespaces: chunks.append(whitespaces) @@ -1356,22 +1350,20 @@ class Scanner(object): # For some strange reasons, the specification does not allow '_' in # tag handles. I have allowed it anyway. ch = self.peek() - if ch != u'!': + if ch != '!': raise ScannerError("while scanning a %s" % name, start_mark, - "expected '!', but found %r" % ch.encode('utf-8'), - self.get_mark()) + "expected '!', but found %r" % ch, self.get_mark()) length = 1 ch = self.peek(length) - if ch != u' ': - while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-_': + if ch != ' ': + while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_': length += 1 ch = self.peek(length) - if ch != u'!': + if ch != '!': self.forward(length) raise ScannerError("while scanning a %s" % name, start_mark, - "expected '!', but found %r" % ch.encode('utf-8'), - self.get_mark()) + "expected '!', but found %r" % ch, self.get_mark()) length += 1 value = self.prefix(length) self.forward(length) @@ -1383,9 +1375,9 @@ class Scanner(object): chunks = [] length = 0 ch = self.peek(length) - while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-;/?:@&=+$,_.!~*\'()[]%': - if ch == u'%': + while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-;/?:@&=+$,_.!~*\'()[]%': + if ch == '%': chunks.append(self.prefix(length)) self.forward(length) length = 0 @@ -1399,26 +1391,25 @@ class Scanner(object): length = 0 if not chunks: raise ScannerError("while parsing a %s" % name, start_mark, - "expected URI, but found %r" % ch.encode('utf-8'), - self.get_mark()) - return u''.join(chunks) + "expected URI, but found %r" % ch, self.get_mark()) + return ''.join(chunks) def scan_uri_escapes(self, name, start_mark): # See the specification for details. - bytes = [] + codes = [] mark = self.get_mark() - while self.peek() == u'%': + while self.peek() == '%': self.forward() for k in range(2): - if self.peek(k) not in u'0123456789ABCDEFabcdef': + if self.peek(k) not in '0123456789ABCDEFabcdef': raise ScannerError("while scanning a %s" % name, start_mark, - "expected URI escape sequence of 2 hexdecimal numbers, but found %r" % - (self.peek(k).encode('utf-8')), self.get_mark()) - bytes.append(chr(int(self.prefix(2), 16))) + "expected URI escape sequence of 2 hexadecimal numbers, but found %r" + % self.peek(k), self.get_mark()) + codes.append(int(self.prefix(2), 16)) self.forward(2) try: - value = unicode(''.join(bytes), 'utf-8') - except UnicodeDecodeError, exc: + value = bytes(codes).decode('utf-8') + except UnicodeDecodeError as exc: raise ScannerError("while scanning a %s" % name, start_mark, str(exc), mark) return value @@ -1432,13 +1423,13 @@ class Scanner(object): # '\u2029 : '\u2029' # default : '' ch = self.peek() - if ch in u'\r\n\x85': - if self.prefix(2) == u'\r\n': + if ch in '\r\n\x85': + if self.prefix(2) == '\r\n': self.forward(2) else: self.forward() - return u'\n' - elif ch in u'\u2028\u2029': + return '\n' + elif ch in '\u2028\u2029': self.forward() return ch - return u'' + return '' diff --git a/lib/yaml/serializer.py b/lib/yaml/serializer.py index 0bf1e96..fe911e6 100644 --- a/lib/yaml/serializer.py +++ b/lib/yaml/serializer.py @@ -1,16 +1,16 @@ __all__ = ['Serializer', 'SerializerError'] -from error import YAMLError -from events import * -from nodes import * +from .error import YAMLError +from .events import * +from .nodes import * class SerializerError(YAMLError): pass -class Serializer(object): +class Serializer: - ANCHOR_TEMPLATE = u'id%03d' + ANCHOR_TEMPLATE = 'id%03d' def __init__(self, encoding=None, explicit_start=None, explicit_end=None, version=None, tags=None): diff --git a/lib3/_yaml/__init__.py b/lib3/_yaml/__init__.py deleted file mode 100644 index 7baa8c4..0000000 --- a/lib3/_yaml/__init__.py +++ /dev/null @@ -1,33 +0,0 @@ -# This is a stub package designed to roughly emulate the _yaml -# extension module, which previously existed as a standalone module -# and has been moved into the `yaml` package namespace. -# It does not perfectly mimic its old counterpart, but should get -# close enough for anyone who's relying on it even when they shouldn't. -import yaml - -# in some circumstances, the yaml module we imoprted may be from a different version, so we need -# to tread carefully when poking at it here (it may not have the attributes we expect) -if not getattr(yaml, '__with_libyaml__', False): - from sys import version_info - - exc = ModuleNotFoundError if version_info >= (3, 6) else ImportError - raise exc("No module named '_yaml'") -else: - from yaml._yaml import * - import warnings - warnings.warn( - 'The _yaml extension module is now located at yaml._yaml' - ' and its location is subject to change. To use the' - ' LibYAML-based parser and emitter, import from `yaml`:' - ' `from yaml import CLoader as Loader, CDumper as Dumper`.', - DeprecationWarning - ) - del warnings - # Don't `del yaml` here because yaml is actually an existing - # namespace member of _yaml. - -__name__ = '_yaml' -# If the module is top-level (i.e. not a part of any specific package) -# then the attribute should be set to ''. -# https://docs.python.org/3.8/library/types.html -__package__ = '' diff --git a/lib3/yaml/__init__.py b/lib3/yaml/__init__.py deleted file mode 100644 index 86d07b5..0000000 --- a/lib3/yaml/__init__.py +++ /dev/null @@ -1,427 +0,0 @@ - -from .error import * - -from .tokens import * -from .events import * -from .nodes import * - -from .loader import * -from .dumper import * - -__version__ = '5.4.1' -try: - from .cyaml import * - __with_libyaml__ = True -except ImportError: - __with_libyaml__ = False - -import io - -#------------------------------------------------------------------------------ -# Warnings control -#------------------------------------------------------------------------------ - -# 'Global' warnings state: -_warnings_enabled = { - 'YAMLLoadWarning': True, -} - -# Get or set global warnings' state -def warnings(settings=None): - if settings is None: - return _warnings_enabled - - if type(settings) is dict: - for key in settings: - if key in _warnings_enabled: - _warnings_enabled[key] = settings[key] - -# Warn when load() is called without Loader=... -class YAMLLoadWarning(RuntimeWarning): - pass - -def load_warning(method): - if _warnings_enabled['YAMLLoadWarning'] is False: - return - - import warnings - - message = ( - "calling yaml.%s() without Loader=... is deprecated, as the " - "default Loader is unsafe. Please read " - "https://msg.pyyaml.org/load for full details." - ) % method - - warnings.warn(message, YAMLLoadWarning, stacklevel=3) - -#------------------------------------------------------------------------------ -def scan(stream, Loader=Loader): - """ - Scan a YAML stream and produce scanning tokens. - """ - loader = Loader(stream) - try: - while loader.check_token(): - yield loader.get_token() - finally: - loader.dispose() - -def parse(stream, Loader=Loader): - """ - Parse a YAML stream and produce parsing events. - """ - loader = Loader(stream) - try: - while loader.check_event(): - yield loader.get_event() - finally: - loader.dispose() - -def compose(stream, Loader=Loader): - """ - Parse the first YAML document in a stream - and produce the corresponding representation tree. - """ - loader = Loader(stream) - try: - return loader.get_single_node() - finally: - loader.dispose() - -def compose_all(stream, Loader=Loader): - """ - Parse all YAML documents in a stream - and produce corresponding representation trees. - """ - loader = Loader(stream) - try: - while loader.check_node(): - yield loader.get_node() - finally: - loader.dispose() - -def load(stream, Loader=None): - """ - Parse the first YAML document in a stream - and produce the corresponding Python object. - """ - if Loader is None: - load_warning('load') - Loader = FullLoader - - loader = Loader(stream) - try: - return loader.get_single_data() - finally: - loader.dispose() - -def load_all(stream, Loader=None): - """ - Parse all YAML documents in a stream - and produce corresponding Python objects. - """ - if Loader is None: - load_warning('load_all') - Loader = FullLoader - - loader = Loader(stream) - try: - while loader.check_data(): - yield loader.get_data() - finally: - loader.dispose() - -def full_load(stream): - """ - Parse the first YAML document in a stream - and produce the corresponding Python object. - - Resolve all tags except those known to be - unsafe on untrusted input. - """ - return load(stream, FullLoader) - -def full_load_all(stream): - """ - Parse all YAML documents in a stream - and produce corresponding Python objects. - - Resolve all tags except those known to be - unsafe on untrusted input. - """ - return load_all(stream, FullLoader) - -def safe_load(stream): - """ - Parse the first YAML document in a stream - and produce the corresponding Python object. - - Resolve only basic YAML tags. This is known - to be safe for untrusted input. - """ - return load(stream, SafeLoader) - -def safe_load_all(stream): - """ - Parse all YAML documents in a stream - and produce corresponding Python objects. - - Resolve only basic YAML tags. This is known - to be safe for untrusted input. - """ - return load_all(stream, SafeLoader) - -def unsafe_load(stream): - """ - Parse the first YAML document in a stream - and produce the corresponding Python object. - - Resolve all tags, even those known to be - unsafe on untrusted input. - """ - return load(stream, UnsafeLoader) - -def unsafe_load_all(stream): - """ - Parse all YAML documents in a stream - and produce corresponding Python objects. - - Resolve all tags, even those known to be - unsafe on untrusted input. - """ - return load_all(stream, UnsafeLoader) - -def emit(events, stream=None, Dumper=Dumper, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None): - """ - Emit YAML parsing events into a stream. - If stream is None, return the produced string instead. - """ - getvalue = None - if stream is None: - stream = io.StringIO() - getvalue = stream.getvalue - dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, - allow_unicode=allow_unicode, line_break=line_break) - try: - for event in events: - dumper.emit(event) - finally: - dumper.dispose() - if getvalue: - return getvalue() - -def serialize_all(nodes, stream=None, Dumper=Dumper, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None): - """ - Serialize a sequence of representation trees into a YAML stream. - If stream is None, return the produced string instead. - """ - getvalue = None - if stream is None: - if encoding is None: - stream = io.StringIO() - else: - stream = io.BytesIO() - getvalue = stream.getvalue - dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, - allow_unicode=allow_unicode, line_break=line_break, - encoding=encoding, version=version, tags=tags, - explicit_start=explicit_start, explicit_end=explicit_end) - try: - dumper.open() - for node in nodes: - dumper.serialize(node) - dumper.close() - finally: - dumper.dispose() - if getvalue: - return getvalue() - -def serialize(node, stream=None, Dumper=Dumper, **kwds): - """ - Serialize a representation tree into a YAML stream. - If stream is None, return the produced string instead. - """ - return serialize_all([node], stream, Dumper=Dumper, **kwds) - -def dump_all(documents, stream=None, Dumper=Dumper, - default_style=None, default_flow_style=False, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None, sort_keys=True): - """ - Serialize a sequence of Python objects into a YAML stream. - If stream is None, return the produced string instead. - """ - getvalue = None - if stream is None: - if encoding is None: - stream = io.StringIO() - else: - stream = io.BytesIO() - getvalue = stream.getvalue - dumper = Dumper(stream, default_style=default_style, - default_flow_style=default_flow_style, - canonical=canonical, indent=indent, width=width, - allow_unicode=allow_unicode, line_break=line_break, - encoding=encoding, version=version, tags=tags, - explicit_start=explicit_start, explicit_end=explicit_end, sort_keys=sort_keys) - try: - dumper.open() - for data in documents: - dumper.represent(data) - dumper.close() - finally: - dumper.dispose() - if getvalue: - return getvalue() - -def dump(data, stream=None, Dumper=Dumper, **kwds): - """ - Serialize a Python object into a YAML stream. - If stream is None, return the produced string instead. - """ - return dump_all([data], stream, Dumper=Dumper, **kwds) - -def safe_dump_all(documents, stream=None, **kwds): - """ - Serialize a sequence of Python objects into a YAML stream. - Produce only basic YAML tags. - If stream is None, return the produced string instead. - """ - return dump_all(documents, stream, Dumper=SafeDumper, **kwds) - -def safe_dump(data, stream=None, **kwds): - """ - Serialize a Python object into a YAML stream. - Produce only basic YAML tags. - If stream is None, return the produced string instead. - """ - return dump_all([data], stream, Dumper=SafeDumper, **kwds) - -def add_implicit_resolver(tag, regexp, first=None, - Loader=None, Dumper=Dumper): - """ - Add an implicit scalar detector. - If an implicit scalar value matches the given regexp, - the corresponding tag is assigned to the scalar. - first is a sequence of possible initial characters or None. - """ - if Loader is None: - loader.Loader.add_implicit_resolver(tag, regexp, first) - loader.FullLoader.add_implicit_resolver(tag, regexp, first) - loader.UnsafeLoader.add_implicit_resolver(tag, regexp, first) - else: - Loader.add_implicit_resolver(tag, regexp, first) - Dumper.add_implicit_resolver(tag, regexp, first) - -def add_path_resolver(tag, path, kind=None, Loader=None, Dumper=Dumper): - """ - Add a path based resolver for the given tag. - A path is a list of keys that forms a path - to a node in the representation tree. - Keys can be string values, integers, or None. - """ - if Loader is None: - loader.Loader.add_path_resolver(tag, path, kind) - loader.FullLoader.add_path_resolver(tag, path, kind) - loader.UnsafeLoader.add_path_resolver(tag, path, kind) - else: - Loader.add_path_resolver(tag, path, kind) - Dumper.add_path_resolver(tag, path, kind) - -def add_constructor(tag, constructor, Loader=None): - """ - Add a constructor for the given tag. - Constructor is a function that accepts a Loader instance - and a node object and produces the corresponding Python object. - """ - if Loader is None: - loader.Loader.add_constructor(tag, constructor) - loader.FullLoader.add_constructor(tag, constructor) - loader.UnsafeLoader.add_constructor(tag, constructor) - else: - Loader.add_constructor(tag, constructor) - -def add_multi_constructor(tag_prefix, multi_constructor, Loader=None): - """ - Add a multi-constructor for the given tag prefix. - Multi-constructor is called for a node if its tag starts with tag_prefix. - Multi-constructor accepts a Loader instance, a tag suffix, - and a node object and produces the corresponding Python object. - """ - if Loader is None: - loader.Loader.add_multi_constructor(tag_prefix, multi_constructor) - loader.FullLoader.add_multi_constructor(tag_prefix, multi_constructor) - loader.UnsafeLoader.add_multi_constructor(tag_prefix, multi_constructor) - else: - Loader.add_multi_constructor(tag_prefix, multi_constructor) - -def add_representer(data_type, representer, Dumper=Dumper): - """ - Add a representer for the given type. - Representer is a function accepting a Dumper instance - and an instance of the given data type - and producing the corresponding representation node. - """ - Dumper.add_representer(data_type, representer) - -def add_multi_representer(data_type, multi_representer, Dumper=Dumper): - """ - Add a representer for the given type. - Multi-representer is a function accepting a Dumper instance - and an instance of the given data type or subtype - and producing the corresponding representation node. - """ - Dumper.add_multi_representer(data_type, multi_representer) - -class YAMLObjectMetaclass(type): - """ - The metaclass for YAMLObject. - """ - def __init__(cls, name, bases, kwds): - super(YAMLObjectMetaclass, cls).__init__(name, bases, kwds) - if 'yaml_tag' in kwds and kwds['yaml_tag'] is not None: - if isinstance(cls.yaml_loader, list): - for loader in cls.yaml_loader: - loader.add_constructor(cls.yaml_tag, cls.from_yaml) - else: - cls.yaml_loader.add_constructor(cls.yaml_tag, cls.from_yaml) - - cls.yaml_dumper.add_representer(cls, cls.to_yaml) - -class YAMLObject(metaclass=YAMLObjectMetaclass): - """ - An object that can dump itself to a YAML stream - and load itself from a YAML stream. - """ - - __slots__ = () # no direct instantiation, so allow immutable subclasses - - yaml_loader = [Loader, FullLoader, UnsafeLoader] - yaml_dumper = Dumper - - yaml_tag = None - yaml_flow_style = None - - @classmethod - def from_yaml(cls, loader, node): - """ - Convert a representation node to a Python object. - """ - return loader.construct_yaml_object(node, cls) - - @classmethod - def to_yaml(cls, dumper, data): - """ - Convert a Python object to a representation node. - """ - return dumper.represent_yaml_object(cls.yaml_tag, data, cls, - flow_style=cls.yaml_flow_style) - diff --git a/lib3/yaml/composer.py b/lib3/yaml/composer.py deleted file mode 100644 index 6d15cb4..0000000 --- a/lib3/yaml/composer.py +++ /dev/null @@ -1,139 +0,0 @@ - -__all__ = ['Composer', 'ComposerError'] - -from .error import MarkedYAMLError -from .events import * -from .nodes import * - -class ComposerError(MarkedYAMLError): - pass - -class Composer: - - def __init__(self): - self.anchors = {} - - def check_node(self): - # Drop the STREAM-START event. - if self.check_event(StreamStartEvent): - self.get_event() - - # If there are more documents available? - return not self.check_event(StreamEndEvent) - - def get_node(self): - # Get the root node of the next document. - if not self.check_event(StreamEndEvent): - return self.compose_document() - - def get_single_node(self): - # Drop the STREAM-START event. - self.get_event() - - # Compose a document if the stream is not empty. - document = None - if not self.check_event(StreamEndEvent): - document = self.compose_document() - - # Ensure that the stream contains no more documents. - if not self.check_event(StreamEndEvent): - event = self.get_event() - raise ComposerError("expected a single document in the stream", - document.start_mark, "but found another document", - event.start_mark) - - # Drop the STREAM-END event. - self.get_event() - - return document - - def compose_document(self): - # Drop the DOCUMENT-START event. - self.get_event() - - # Compose the root node. - node = self.compose_node(None, None) - - # Drop the DOCUMENT-END event. - self.get_event() - - self.anchors = {} - return node - - def compose_node(self, parent, index): - if self.check_event(AliasEvent): - event = self.get_event() - anchor = event.anchor - if anchor not in self.anchors: - raise ComposerError(None, None, "found undefined alias %r" - % anchor, event.start_mark) - return self.anchors[anchor] - event = self.peek_event() - anchor = event.anchor - if anchor is not None: - if anchor in self.anchors: - raise ComposerError("found duplicate anchor %r; first occurrence" - % anchor, self.anchors[anchor].start_mark, - "second occurrence", event.start_mark) - self.descend_resolver(parent, index) - if self.check_event(ScalarEvent): - node = self.compose_scalar_node(anchor) - elif self.check_event(SequenceStartEvent): - node = self.compose_sequence_node(anchor) - elif self.check_event(MappingStartEvent): - node = self.compose_mapping_node(anchor) - self.ascend_resolver() - return node - - def compose_scalar_node(self, anchor): - event = self.get_event() - tag = event.tag - if tag is None or tag == '!': - tag = self.resolve(ScalarNode, event.value, event.implicit) - node = ScalarNode(tag, event.value, - event.start_mark, event.end_mark, style=event.style) - if anchor is not None: - self.anchors[anchor] = node - return node - - def compose_sequence_node(self, anchor): - start_event = self.get_event() - tag = start_event.tag - if tag is None or tag == '!': - tag = self.resolve(SequenceNode, None, start_event.implicit) - node = SequenceNode(tag, [], - start_event.start_mark, None, - flow_style=start_event.flow_style) - if anchor is not None: - self.anchors[anchor] = node - index = 0 - while not self.check_event(SequenceEndEvent): - node.value.append(self.compose_node(node, index)) - index += 1 - end_event = self.get_event() - node.end_mark = end_event.end_mark - return node - - def compose_mapping_node(self, anchor): - start_event = self.get_event() - tag = start_event.tag - if tag is None or tag == '!': - tag = self.resolve(MappingNode, None, start_event.implicit) - node = MappingNode(tag, [], - start_event.start_mark, None, - flow_style=start_event.flow_style) - if anchor is not None: - self.anchors[anchor] = node - while not self.check_event(MappingEndEvent): - #key_event = self.peek_event() - item_key = self.compose_node(node, None) - #if item_key in node.value: - # raise ComposerError("while composing a mapping", start_event.start_mark, - # "found duplicate key", key_event.start_mark) - item_value = self.compose_node(node, item_key) - #node.value[item_key] = item_value - node.value.append((item_key, item_value)) - end_event = self.get_event() - node.end_mark = end_event.end_mark - return node - diff --git a/lib3/yaml/constructor.py b/lib3/yaml/constructor.py deleted file mode 100644 index 619acd3..0000000 --- a/lib3/yaml/constructor.py +++ /dev/null @@ -1,748 +0,0 @@ - -__all__ = [ - 'BaseConstructor', - 'SafeConstructor', - 'FullConstructor', - 'UnsafeConstructor', - 'Constructor', - 'ConstructorError' -] - -from .error import * -from .nodes import * - -import collections.abc, datetime, base64, binascii, re, sys, types - -class ConstructorError(MarkedYAMLError): - pass - -class BaseConstructor: - - yaml_constructors = {} - yaml_multi_constructors = {} - - def __init__(self): - self.constructed_objects = {} - self.recursive_objects = {} - self.state_generators = [] - self.deep_construct = False - - def check_data(self): - # If there are more documents available? - return self.check_node() - - def check_state_key(self, key): - """Block special attributes/methods from being set in a newly created - object, to prevent user-controlled methods from being called during - deserialization""" - if self.get_state_keys_blacklist_regexp().match(key): - raise ConstructorError(None, None, - "blacklisted key '%s' in instance state found" % (key,), None) - - def get_data(self): - # Construct and return the next document. - if self.check_node(): - return self.construct_document(self.get_node()) - - def get_single_data(self): - # Ensure that the stream contains a single document and construct it. - node = self.get_single_node() - if node is not None: - return self.construct_document(node) - return None - - def construct_document(self, node): - data = self.construct_object(node) - while self.state_generators: - state_generators = self.state_generators - self.state_generators = [] - for generator in state_generators: - for dummy in generator: - pass - self.constructed_objects = {} - self.recursive_objects = {} - self.deep_construct = False - return data - - def construct_object(self, node, deep=False): - if node in self.constructed_objects: - return self.constructed_objects[node] - if deep: - old_deep = self.deep_construct - self.deep_construct = True - if node in self.recursive_objects: - raise ConstructorError(None, None, - "found unconstructable recursive node", node.start_mark) - self.recursive_objects[node] = None - constructor = None - tag_suffix = None - if node.tag in self.yaml_constructors: - constructor = self.yaml_constructors[node.tag] - else: - for tag_prefix in self.yaml_multi_constructors: - if tag_prefix is not None and node.tag.startswith(tag_prefix): - tag_suffix = node.tag[len(tag_prefix):] - constructor = self.yaml_multi_constructors[tag_prefix] - break - else: - if None in self.yaml_multi_constructors: - tag_suffix = node.tag - constructor = self.yaml_multi_constructors[None] - elif None in self.yaml_constructors: - constructor = self.yaml_constructors[None] - elif isinstance(node, ScalarNode): - constructor = self.__class__.construct_scalar - elif isinstance(node, SequenceNode): - constructor = self.__class__.construct_sequence - elif isinstance(node, MappingNode): - constructor = self.__class__.construct_mapping - if tag_suffix is None: - data = constructor(self, node) - else: - data = constructor(self, tag_suffix, node) - if isinstance(data, types.GeneratorType): - generator = data - data = next(generator) - if self.deep_construct: - for dummy in generator: - pass - else: - self.state_generators.append(generator) - self.constructed_objects[node] = data - del self.recursive_objects[node] - if deep: - self.deep_construct = old_deep - return data - - def construct_scalar(self, node): - if not isinstance(node, ScalarNode): - raise ConstructorError(None, None, - "expected a scalar node, but found %s" % node.id, - node.start_mark) - return node.value - - def construct_sequence(self, node, deep=False): - if not isinstance(node, SequenceNode): - raise ConstructorError(None, None, - "expected a sequence node, but found %s" % node.id, - node.start_mark) - return [self.construct_object(child, deep=deep) - for child in node.value] - - def construct_mapping(self, node, deep=False): - if not isinstance(node, MappingNode): - raise ConstructorError(None, None, - "expected a mapping node, but found %s" % node.id, - node.start_mark) - mapping = {} - for key_node, value_node in node.value: - key = self.construct_object(key_node, deep=deep) - if not isinstance(key, collections.abc.Hashable): - raise ConstructorError("while constructing a mapping", node.start_mark, - "found unhashable key", key_node.start_mark) - value = self.construct_object(value_node, deep=deep) - mapping[key] = value - return mapping - - def construct_pairs(self, node, deep=False): - if not isinstance(node, MappingNode): - raise ConstructorError(None, None, - "expected a mapping node, but found %s" % node.id, - node.start_mark) - pairs = [] - for key_node, value_node in node.value: - key = self.construct_object(key_node, deep=deep) - value = self.construct_object(value_node, deep=deep) - pairs.append((key, value)) - return pairs - - @classmethod - def add_constructor(cls, tag, constructor): - if not 'yaml_constructors' in cls.__dict__: - cls.yaml_constructors = cls.yaml_constructors.copy() - cls.yaml_constructors[tag] = constructor - - @classmethod - def add_multi_constructor(cls, tag_prefix, multi_constructor): - if not 'yaml_multi_constructors' in cls.__dict__: - cls.yaml_multi_constructors = cls.yaml_multi_constructors.copy() - cls.yaml_multi_constructors[tag_prefix] = multi_constructor - -class SafeConstructor(BaseConstructor): - - def construct_scalar(self, node): - if isinstance(node, MappingNode): - for key_node, value_node in node.value: - if key_node.tag == 'tag:yaml.org,2002:value': - return self.construct_scalar(value_node) - return super().construct_scalar(node) - - def flatten_mapping(self, node): - merge = [] - index = 0 - while index < len(node.value): - key_node, value_node = node.value[index] - if key_node.tag == 'tag:yaml.org,2002:merge': - del node.value[index] - if isinstance(value_node, MappingNode): - self.flatten_mapping(value_node) - merge.extend(value_node.value) - elif isinstance(value_node, SequenceNode): - submerge = [] - for subnode in value_node.value: - if not isinstance(subnode, MappingNode): - raise ConstructorError("while constructing a mapping", - node.start_mark, - "expected a mapping for merging, but found %s" - % subnode.id, subnode.start_mark) - self.flatten_mapping(subnode) - submerge.append(subnode.value) - submerge.reverse() - for value in submerge: - merge.extend(value) - else: - raise ConstructorError("while constructing a mapping", node.start_mark, - "expected a mapping or list of mappings for merging, but found %s" - % value_node.id, value_node.start_mark) - elif key_node.tag == 'tag:yaml.org,2002:value': - key_node.tag = 'tag:yaml.org,2002:str' - index += 1 - else: - index += 1 - if merge: - node.value = merge + node.value - - def construct_mapping(self, node, deep=False): - if isinstance(node, MappingNode): - self.flatten_mapping(node) - return super().construct_mapping(node, deep=deep) - - def construct_yaml_null(self, node): - self.construct_scalar(node) - return None - - bool_values = { - 'yes': True, - 'no': False, - 'true': True, - 'false': False, - 'on': True, - 'off': False, - } - - def construct_yaml_bool(self, node): - value = self.construct_scalar(node) - return self.bool_values[value.lower()] - - def construct_yaml_int(self, node): - value = self.construct_scalar(node) - value = value.replace('_', '') - sign = +1 - if value[0] == '-': - sign = -1 - if value[0] in '+-': - value = value[1:] - if value == '0': - return 0 - elif value.startswith('0b'): - return sign*int(value[2:], 2) - elif value.startswith('0x'): - return sign*int(value[2:], 16) - elif value[0] == '0': - return sign*int(value, 8) - elif ':' in value: - digits = [int(part) for part in value.split(':')] - digits.reverse() - base = 1 - value = 0 - for digit in digits: - value += digit*base - base *= 60 - return sign*value - else: - return sign*int(value) - - inf_value = 1e300 - while inf_value != inf_value*inf_value: - inf_value *= inf_value - nan_value = -inf_value/inf_value # Trying to make a quiet NaN (like C99). - - def construct_yaml_float(self, node): - value = self.construct_scalar(node) - value = value.replace('_', '').lower() - sign = +1 - if value[0] == '-': - sign = -1 - if value[0] in '+-': - value = value[1:] - if value == '.inf': - return sign*self.inf_value - elif value == '.nan': - return self.nan_value - elif ':' in value: - digits = [float(part) for part in value.split(':')] - digits.reverse() - base = 1 - value = 0.0 - for digit in digits: - value += digit*base - base *= 60 - return sign*value - else: - return sign*float(value) - - def construct_yaml_binary(self, node): - try: - value = self.construct_scalar(node).encode('ascii') - except UnicodeEncodeError as exc: - raise ConstructorError(None, None, - "failed to convert base64 data into ascii: %s" % exc, - node.start_mark) - try: - if hasattr(base64, 'decodebytes'): - return base64.decodebytes(value) - else: - return base64.decodestring(value) - except binascii.Error as exc: - raise ConstructorError(None, None, - "failed to decode base64 data: %s" % exc, node.start_mark) - - timestamp_regexp = re.compile( - r'''^(?P[0-9][0-9][0-9][0-9]) - -(?P[0-9][0-9]?) - -(?P[0-9][0-9]?) - (?:(?:[Tt]|[ \t]+) - (?P[0-9][0-9]?) - :(?P[0-9][0-9]) - :(?P[0-9][0-9]) - (?:\.(?P[0-9]*))? - (?:[ \t]*(?PZ|(?P[-+])(?P[0-9][0-9]?) - (?::(?P[0-9][0-9]))?))?)?$''', re.X) - - def construct_yaml_timestamp(self, node): - value = self.construct_scalar(node) - match = self.timestamp_regexp.match(node.value) - values = match.groupdict() - year = int(values['year']) - month = int(values['month']) - day = int(values['day']) - if not values['hour']: - return datetime.date(year, month, day) - hour = int(values['hour']) - minute = int(values['minute']) - second = int(values['second']) - fraction = 0 - tzinfo = None - if values['fraction']: - fraction = values['fraction'][:6] - while len(fraction) < 6: - fraction += '0' - fraction = int(fraction) - if values['tz_sign']: - tz_hour = int(values['tz_hour']) - tz_minute = int(values['tz_minute'] or 0) - delta = datetime.timedelta(hours=tz_hour, minutes=tz_minute) - if values['tz_sign'] == '-': - delta = -delta - tzinfo = datetime.timezone(delta) - elif values['tz']: - tzinfo = datetime.timezone.utc - return datetime.datetime(year, month, day, hour, minute, second, fraction, - tzinfo=tzinfo) - - def construct_yaml_omap(self, node): - # Note: we do not check for duplicate keys, because it's too - # CPU-expensive. - omap = [] - yield omap - if not isinstance(node, SequenceNode): - raise ConstructorError("while constructing an ordered map", node.start_mark, - "expected a sequence, but found %s" % node.id, node.start_mark) - for subnode in node.value: - if not isinstance(subnode, MappingNode): - raise ConstructorError("while constructing an ordered map", node.start_mark, - "expected a mapping of length 1, but found %s" % subnode.id, - subnode.start_mark) - if len(subnode.value) != 1: - raise ConstructorError("while constructing an ordered map", node.start_mark, - "expected a single mapping item, but found %d items" % len(subnode.value), - subnode.start_mark) - key_node, value_node = subnode.value[0] - key = self.construct_object(key_node) - value = self.construct_object(value_node) - omap.append((key, value)) - - def construct_yaml_pairs(self, node): - # Note: the same code as `construct_yaml_omap`. - pairs = [] - yield pairs - if not isinstance(node, SequenceNode): - raise ConstructorError("while constructing pairs", node.start_mark, - "expected a sequence, but found %s" % node.id, node.start_mark) - for subnode in node.value: - if not isinstance(subnode, MappingNode): - raise ConstructorError("while constructing pairs", node.start_mark, - "expected a mapping of length 1, but found %s" % subnode.id, - subnode.start_mark) - if len(subnode.value) != 1: - raise ConstructorError("while constructing pairs", node.start_mark, - "expected a single mapping item, but found %d items" % len(subnode.value), - subnode.start_mark) - key_node, value_node = subnode.value[0] - key = self.construct_object(key_node) - value = self.construct_object(value_node) - pairs.append((key, value)) - - def construct_yaml_set(self, node): - data = set() - yield data - value = self.construct_mapping(node) - data.update(value) - - def construct_yaml_str(self, node): - return self.construct_scalar(node) - - def construct_yaml_seq(self, node): - data = [] - yield data - data.extend(self.construct_sequence(node)) - - def construct_yaml_map(self, node): - data = {} - yield data - value = self.construct_mapping(node) - data.update(value) - - def construct_yaml_object(self, node, cls): - data = cls.__new__(cls) - yield data - if hasattr(data, '__setstate__'): - state = self.construct_mapping(node, deep=True) - data.__setstate__(state) - else: - state = self.construct_mapping(node) - data.__dict__.update(state) - - def construct_undefined(self, node): - raise ConstructorError(None, None, - "could not determine a constructor for the tag %r" % node.tag, - node.start_mark) - -SafeConstructor.add_constructor( - 'tag:yaml.org,2002:null', - SafeConstructor.construct_yaml_null) - -SafeConstructor.add_constructor( - 'tag:yaml.org,2002:bool', - SafeConstructor.construct_yaml_bool) - -SafeConstructor.add_constructor( - 'tag:yaml.org,2002:int', - SafeConstructor.construct_yaml_int) - -SafeConstructor.add_constructor( - 'tag:yaml.org,2002:float', - SafeConstructor.construct_yaml_float) - -SafeConstructor.add_constructor( - 'tag:yaml.org,2002:binary', - SafeConstructor.construct_yaml_binary) - -SafeConstructor.add_constructor( - 'tag:yaml.org,2002:timestamp', - SafeConstructor.construct_yaml_timestamp) - -SafeConstructor.add_constructor( - 'tag:yaml.org,2002:omap', - SafeConstructor.construct_yaml_omap) - -SafeConstructor.add_constructor( - 'tag:yaml.org,2002:pairs', - SafeConstructor.construct_yaml_pairs) - -SafeConstructor.add_constructor( - 'tag:yaml.org,2002:set', - SafeConstructor.construct_yaml_set) - -SafeConstructor.add_constructor( - 'tag:yaml.org,2002:str', - SafeConstructor.construct_yaml_str) - -SafeConstructor.add_constructor( - 'tag:yaml.org,2002:seq', - SafeConstructor.construct_yaml_seq) - -SafeConstructor.add_constructor( - 'tag:yaml.org,2002:map', - SafeConstructor.construct_yaml_map) - -SafeConstructor.add_constructor(None, - SafeConstructor.construct_undefined) - -class FullConstructor(SafeConstructor): - # 'extend' is blacklisted because it is used by - # construct_python_object_apply to add `listitems` to a newly generate - # python instance - def get_state_keys_blacklist(self): - return ['^extend$', '^__.*__$'] - - def get_state_keys_blacklist_regexp(self): - if not hasattr(self, 'state_keys_blacklist_regexp'): - self.state_keys_blacklist_regexp = re.compile('(' + '|'.join(self.get_state_keys_blacklist()) + ')') - return self.state_keys_blacklist_regexp - - def construct_python_str(self, node): - return self.construct_scalar(node) - - def construct_python_unicode(self, node): - return self.construct_scalar(node) - - def construct_python_bytes(self, node): - try: - value = self.construct_scalar(node).encode('ascii') - except UnicodeEncodeError as exc: - raise ConstructorError(None, None, - "failed to convert base64 data into ascii: %s" % exc, - node.start_mark) - try: - if hasattr(base64, 'decodebytes'): - return base64.decodebytes(value) - else: - return base64.decodestring(value) - except binascii.Error as exc: - raise ConstructorError(None, None, - "failed to decode base64 data: %s" % exc, node.start_mark) - - def construct_python_long(self, node): - return self.construct_yaml_int(node) - - def construct_python_complex(self, node): - return complex(self.construct_scalar(node)) - - def construct_python_tuple(self, node): - return tuple(self.construct_sequence(node)) - - def find_python_module(self, name, mark, unsafe=False): - if not name: - raise ConstructorError("while constructing a Python module", mark, - "expected non-empty name appended to the tag", mark) - if unsafe: - try: - __import__(name) - except ImportError as exc: - raise ConstructorError("while constructing a Python module", mark, - "cannot find module %r (%s)" % (name, exc), mark) - if name not in sys.modules: - raise ConstructorError("while constructing a Python module", mark, - "module %r is not imported" % name, mark) - return sys.modules[name] - - def find_python_name(self, name, mark, unsafe=False): - if not name: - raise ConstructorError("while constructing a Python object", mark, - "expected non-empty name appended to the tag", mark) - if '.' in name: - module_name, object_name = name.rsplit('.', 1) - else: - module_name = 'builtins' - object_name = name - if unsafe: - try: - __import__(module_name) - except ImportError as exc: - raise ConstructorError("while constructing a Python object", mark, - "cannot find module %r (%s)" % (module_name, exc), mark) - if module_name not in sys.modules: - raise ConstructorError("while constructing a Python object", mark, - "module %r is not imported" % module_name, mark) - module = sys.modules[module_name] - if not hasattr(module, object_name): - raise ConstructorError("while constructing a Python object", mark, - "cannot find %r in the module %r" - % (object_name, module.__name__), mark) - return getattr(module, object_name) - - def construct_python_name(self, suffix, node): - value = self.construct_scalar(node) - if value: - raise ConstructorError("while constructing a Python name", node.start_mark, - "expected the empty value, but found %r" % value, node.start_mark) - return self.find_python_name(suffix, node.start_mark) - - def construct_python_module(self, suffix, node): - value = self.construct_scalar(node) - if value: - raise ConstructorError("while constructing a Python module", node.start_mark, - "expected the empty value, but found %r" % value, node.start_mark) - return self.find_python_module(suffix, node.start_mark) - - def make_python_instance(self, suffix, node, - args=None, kwds=None, newobj=False, unsafe=False): - if not args: - args = [] - if not kwds: - kwds = {} - cls = self.find_python_name(suffix, node.start_mark) - if not (unsafe or isinstance(cls, type)): - raise ConstructorError("while constructing a Python instance", node.start_mark, - "expected a class, but found %r" % type(cls), - node.start_mark) - if newobj and isinstance(cls, type): - return cls.__new__(cls, *args, **kwds) - else: - return cls(*args, **kwds) - - def set_python_instance_state(self, instance, state, unsafe=False): - if hasattr(instance, '__setstate__'): - instance.__setstate__(state) - else: - slotstate = {} - if isinstance(state, tuple) and len(state) == 2: - state, slotstate = state - if hasattr(instance, '__dict__'): - if not unsafe and state: - for key in state.keys(): - self.check_state_key(key) - instance.__dict__.update(state) - elif state: - slotstate.update(state) - for key, value in slotstate.items(): - if not unsafe: - self.check_state_key(key) - setattr(instance, key, value) - - def construct_python_object(self, suffix, node): - # Format: - # !!python/object:module.name { ... state ... } - instance = self.make_python_instance(suffix, node, newobj=True) - yield instance - deep = hasattr(instance, '__setstate__') - state = self.construct_mapping(node, deep=deep) - self.set_python_instance_state(instance, state) - - def construct_python_object_apply(self, suffix, node, newobj=False): - # Format: - # !!python/object/apply # (or !!python/object/new) - # args: [ ... arguments ... ] - # kwds: { ... keywords ... } - # state: ... state ... - # listitems: [ ... listitems ... ] - # dictitems: { ... dictitems ... } - # or short format: - # !!python/object/apply [ ... arguments ... ] - # The difference between !!python/object/apply and !!python/object/new - # is how an object is created, check make_python_instance for details. - if isinstance(node, SequenceNode): - args = self.construct_sequence(node, deep=True) - kwds = {} - state = {} - listitems = [] - dictitems = {} - else: - value = self.construct_mapping(node, deep=True) - args = value.get('args', []) - kwds = value.get('kwds', {}) - state = value.get('state', {}) - listitems = value.get('listitems', []) - dictitems = value.get('dictitems', {}) - instance = self.make_python_instance(suffix, node, args, kwds, newobj) - if state: - self.set_python_instance_state(instance, state) - if listitems: - instance.extend(listitems) - if dictitems: - for key in dictitems: - instance[key] = dictitems[key] - return instance - - def construct_python_object_new(self, suffix, node): - return self.construct_python_object_apply(suffix, node, newobj=True) - -FullConstructor.add_constructor( - 'tag:yaml.org,2002:python/none', - FullConstructor.construct_yaml_null) - -FullConstructor.add_constructor( - 'tag:yaml.org,2002:python/bool', - FullConstructor.construct_yaml_bool) - -FullConstructor.add_constructor( - 'tag:yaml.org,2002:python/str', - FullConstructor.construct_python_str) - -FullConstructor.add_constructor( - 'tag:yaml.org,2002:python/unicode', - FullConstructor.construct_python_unicode) - -FullConstructor.add_constructor( - 'tag:yaml.org,2002:python/bytes', - FullConstructor.construct_python_bytes) - -FullConstructor.add_constructor( - 'tag:yaml.org,2002:python/int', - FullConstructor.construct_yaml_int) - -FullConstructor.add_constructor( - 'tag:yaml.org,2002:python/long', - FullConstructor.construct_python_long) - -FullConstructor.add_constructor( - 'tag:yaml.org,2002:python/float', - FullConstructor.construct_yaml_float) - -FullConstructor.add_constructor( - 'tag:yaml.org,2002:python/complex', - FullConstructor.construct_python_complex) - -FullConstructor.add_constructor( - 'tag:yaml.org,2002:python/list', - FullConstructor.construct_yaml_seq) - -FullConstructor.add_constructor( - 'tag:yaml.org,2002:python/tuple', - FullConstructor.construct_python_tuple) - -FullConstructor.add_constructor( - 'tag:yaml.org,2002:python/dict', - FullConstructor.construct_yaml_map) - -FullConstructor.add_multi_constructor( - 'tag:yaml.org,2002:python/name:', - FullConstructor.construct_python_name) - -class UnsafeConstructor(FullConstructor): - - def find_python_module(self, name, mark): - return super(UnsafeConstructor, self).find_python_module(name, mark, unsafe=True) - - def find_python_name(self, name, mark): - return super(UnsafeConstructor, self).find_python_name(name, mark, unsafe=True) - - def make_python_instance(self, suffix, node, args=None, kwds=None, newobj=False): - return super(UnsafeConstructor, self).make_python_instance( - suffix, node, args, kwds, newobj, unsafe=True) - - def set_python_instance_state(self, instance, state): - return super(UnsafeConstructor, self).set_python_instance_state( - instance, state, unsafe=True) - -UnsafeConstructor.add_multi_constructor( - 'tag:yaml.org,2002:python/module:', - UnsafeConstructor.construct_python_module) - -UnsafeConstructor.add_multi_constructor( - 'tag:yaml.org,2002:python/object:', - UnsafeConstructor.construct_python_object) - -UnsafeConstructor.add_multi_constructor( - 'tag:yaml.org,2002:python/object/new:', - UnsafeConstructor.construct_python_object_new) - -UnsafeConstructor.add_multi_constructor( - 'tag:yaml.org,2002:python/object/apply:', - UnsafeConstructor.construct_python_object_apply) - -# Constructor is same as UnsafeConstructor. Need to leave this in place in case -# people have extended it directly. -class Constructor(UnsafeConstructor): - pass diff --git a/lib3/yaml/cyaml.py b/lib3/yaml/cyaml.py deleted file mode 100644 index 0c21345..0000000 --- a/lib3/yaml/cyaml.py +++ /dev/null @@ -1,101 +0,0 @@ - -__all__ = [ - 'CBaseLoader', 'CSafeLoader', 'CFullLoader', 'CUnsafeLoader', 'CLoader', - 'CBaseDumper', 'CSafeDumper', 'CDumper' -] - -from yaml._yaml import CParser, CEmitter - -from .constructor import * - -from .serializer import * -from .representer import * - -from .resolver import * - -class CBaseLoader(CParser, BaseConstructor, BaseResolver): - - def __init__(self, stream): - CParser.__init__(self, stream) - BaseConstructor.__init__(self) - BaseResolver.__init__(self) - -class CSafeLoader(CParser, SafeConstructor, Resolver): - - def __init__(self, stream): - CParser.__init__(self, stream) - SafeConstructor.__init__(self) - Resolver.__init__(self) - -class CFullLoader(CParser, FullConstructor, Resolver): - - def __init__(self, stream): - CParser.__init__(self, stream) - FullConstructor.__init__(self) - Resolver.__init__(self) - -class CUnsafeLoader(CParser, UnsafeConstructor, Resolver): - - def __init__(self, stream): - CParser.__init__(self, stream) - UnsafeConstructor.__init__(self) - Resolver.__init__(self) - -class CLoader(CParser, Constructor, Resolver): - - def __init__(self, stream): - CParser.__init__(self, stream) - Constructor.__init__(self) - Resolver.__init__(self) - -class CBaseDumper(CEmitter, BaseRepresenter, BaseResolver): - - def __init__(self, stream, - default_style=None, default_flow_style=False, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None, sort_keys=True): - CEmitter.__init__(self, stream, canonical=canonical, - indent=indent, width=width, encoding=encoding, - allow_unicode=allow_unicode, line_break=line_break, - explicit_start=explicit_start, explicit_end=explicit_end, - version=version, tags=tags) - Representer.__init__(self, default_style=default_style, - default_flow_style=default_flow_style, sort_keys=sort_keys) - Resolver.__init__(self) - -class CSafeDumper(CEmitter, SafeRepresenter, Resolver): - - def __init__(self, stream, - default_style=None, default_flow_style=False, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None, sort_keys=True): - CEmitter.__init__(self, stream, canonical=canonical, - indent=indent, width=width, encoding=encoding, - allow_unicode=allow_unicode, line_break=line_break, - explicit_start=explicit_start, explicit_end=explicit_end, - version=version, tags=tags) - SafeRepresenter.__init__(self, default_style=default_style, - default_flow_style=default_flow_style, sort_keys=sort_keys) - Resolver.__init__(self) - -class CDumper(CEmitter, Serializer, Representer, Resolver): - - def __init__(self, stream, - default_style=None, default_flow_style=False, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None, sort_keys=True): - CEmitter.__init__(self, stream, canonical=canonical, - indent=indent, width=width, encoding=encoding, - allow_unicode=allow_unicode, line_break=line_break, - explicit_start=explicit_start, explicit_end=explicit_end, - version=version, tags=tags) - Representer.__init__(self, default_style=default_style, - default_flow_style=default_flow_style, sort_keys=sort_keys) - Resolver.__init__(self) - diff --git a/lib3/yaml/dumper.py b/lib3/yaml/dumper.py deleted file mode 100644 index 6aadba5..0000000 --- a/lib3/yaml/dumper.py +++ /dev/null @@ -1,62 +0,0 @@ - -__all__ = ['BaseDumper', 'SafeDumper', 'Dumper'] - -from .emitter import * -from .serializer import * -from .representer import * -from .resolver import * - -class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseResolver): - - def __init__(self, stream, - default_style=None, default_flow_style=False, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None, sort_keys=True): - Emitter.__init__(self, stream, canonical=canonical, - indent=indent, width=width, - allow_unicode=allow_unicode, line_break=line_break) - Serializer.__init__(self, encoding=encoding, - explicit_start=explicit_start, explicit_end=explicit_end, - version=version, tags=tags) - Representer.__init__(self, default_style=default_style, - default_flow_style=default_flow_style, sort_keys=sort_keys) - Resolver.__init__(self) - -class SafeDumper(Emitter, Serializer, SafeRepresenter, Resolver): - - def __init__(self, stream, - default_style=None, default_flow_style=False, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None, sort_keys=True): - Emitter.__init__(self, stream, canonical=canonical, - indent=indent, width=width, - allow_unicode=allow_unicode, line_break=line_break) - Serializer.__init__(self, encoding=encoding, - explicit_start=explicit_start, explicit_end=explicit_end, - version=version, tags=tags) - SafeRepresenter.__init__(self, default_style=default_style, - default_flow_style=default_flow_style, sort_keys=sort_keys) - Resolver.__init__(self) - -class Dumper(Emitter, Serializer, Representer, Resolver): - - def __init__(self, stream, - default_style=None, default_flow_style=False, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None, sort_keys=True): - Emitter.__init__(self, stream, canonical=canonical, - indent=indent, width=width, - allow_unicode=allow_unicode, line_break=line_break) - Serializer.__init__(self, encoding=encoding, - explicit_start=explicit_start, explicit_end=explicit_end, - version=version, tags=tags) - Representer.__init__(self, default_style=default_style, - default_flow_style=default_flow_style, sort_keys=sort_keys) - Resolver.__init__(self) - diff --git a/lib3/yaml/emitter.py b/lib3/yaml/emitter.py deleted file mode 100644 index a664d01..0000000 --- a/lib3/yaml/emitter.py +++ /dev/null @@ -1,1137 +0,0 @@ - -# Emitter expects events obeying the following grammar: -# stream ::= STREAM-START document* STREAM-END -# document ::= DOCUMENT-START node DOCUMENT-END -# node ::= SCALAR | sequence | mapping -# sequence ::= SEQUENCE-START node* SEQUENCE-END -# mapping ::= MAPPING-START (node node)* MAPPING-END - -__all__ = ['Emitter', 'EmitterError'] - -from .error import YAMLError -from .events import * - -class EmitterError(YAMLError): - pass - -class ScalarAnalysis: - def __init__(self, scalar, empty, multiline, - allow_flow_plain, allow_block_plain, - allow_single_quoted, allow_double_quoted, - allow_block): - self.scalar = scalar - self.empty = empty - self.multiline = multiline - self.allow_flow_plain = allow_flow_plain - self.allow_block_plain = allow_block_plain - self.allow_single_quoted = allow_single_quoted - self.allow_double_quoted = allow_double_quoted - self.allow_block = allow_block - -class Emitter: - - DEFAULT_TAG_PREFIXES = { - '!' : '!', - 'tag:yaml.org,2002:' : '!!', - } - - def __init__(self, stream, canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None): - - # The stream should have the methods `write` and possibly `flush`. - self.stream = stream - - # Encoding can be overridden by STREAM-START. - self.encoding = None - - # Emitter is a state machine with a stack of states to handle nested - # structures. - self.states = [] - self.state = self.expect_stream_start - - # Current event and the event queue. - self.events = [] - self.event = None - - # The current indentation level and the stack of previous indents. - self.indents = [] - self.indent = None - - # Flow level. - self.flow_level = 0 - - # Contexts. - self.root_context = False - self.sequence_context = False - self.mapping_context = False - self.simple_key_context = False - - # Characteristics of the last emitted character: - # - current position. - # - is it a whitespace? - # - is it an indention character - # (indentation space, '-', '?', or ':')? - self.line = 0 - self.column = 0 - self.whitespace = True - self.indention = True - - # Whether the document requires an explicit document indicator - self.open_ended = False - - # Formatting details. - self.canonical = canonical - self.allow_unicode = allow_unicode - self.best_indent = 2 - if indent and 1 < indent < 10: - self.best_indent = indent - self.best_width = 80 - if width and width > self.best_indent*2: - self.best_width = width - self.best_line_break = '\n' - if line_break in ['\r', '\n', '\r\n']: - self.best_line_break = line_break - - # Tag prefixes. - self.tag_prefixes = None - - # Prepared anchor and tag. - self.prepared_anchor = None - self.prepared_tag = None - - # Scalar analysis and style. - self.analysis = None - self.style = None - - def dispose(self): - # Reset the state attributes (to clear self-references) - self.states = [] - self.state = None - - def emit(self, event): - self.events.append(event) - while not self.need_more_events(): - self.event = self.events.pop(0) - self.state() - self.event = None - - # In some cases, we wait for a few next events before emitting. - - def need_more_events(self): - if not self.events: - return True - event = self.events[0] - if isinstance(event, DocumentStartEvent): - return self.need_events(1) - elif isinstance(event, SequenceStartEvent): - return self.need_events(2) - elif isinstance(event, MappingStartEvent): - return self.need_events(3) - else: - return False - - def need_events(self, count): - level = 0 - for event in self.events[1:]: - if isinstance(event, (DocumentStartEvent, CollectionStartEvent)): - level += 1 - elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)): - level -= 1 - elif isinstance(event, StreamEndEvent): - level = -1 - if level < 0: - return False - return (len(self.events) < count+1) - - def increase_indent(self, flow=False, indentless=False): - self.indents.append(self.indent) - if self.indent is None: - if flow: - self.indent = self.best_indent - else: - self.indent = 0 - elif not indentless: - self.indent += self.best_indent - - # States. - - # Stream handlers. - - def expect_stream_start(self): - if isinstance(self.event, StreamStartEvent): - if self.event.encoding and not hasattr(self.stream, 'encoding'): - self.encoding = self.event.encoding - self.write_stream_start() - self.state = self.expect_first_document_start - else: - raise EmitterError("expected StreamStartEvent, but got %s" - % self.event) - - def expect_nothing(self): - raise EmitterError("expected nothing, but got %s" % self.event) - - # Document handlers. - - def expect_first_document_start(self): - return self.expect_document_start(first=True) - - def expect_document_start(self, first=False): - if isinstance(self.event, DocumentStartEvent): - if (self.event.version or self.event.tags) and self.open_ended: - self.write_indicator('...', True) - self.write_indent() - if self.event.version: - version_text = self.prepare_version(self.event.version) - self.write_version_directive(version_text) - self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy() - if self.event.tags: - handles = sorted(self.event.tags.keys()) - for handle in handles: - prefix = self.event.tags[handle] - self.tag_prefixes[prefix] = handle - handle_text = self.prepare_tag_handle(handle) - prefix_text = self.prepare_tag_prefix(prefix) - self.write_tag_directive(handle_text, prefix_text) - implicit = (first and not self.event.explicit and not self.canonical - and not self.event.version and not self.event.tags - and not self.check_empty_document()) - if not implicit: - self.write_indent() - self.write_indicator('---', True) - if self.canonical: - self.write_indent() - self.state = self.expect_document_root - elif isinstance(self.event, StreamEndEvent): - if self.open_ended: - self.write_indicator('...', True) - self.write_indent() - self.write_stream_end() - self.state = self.expect_nothing - else: - raise EmitterError("expected DocumentStartEvent, but got %s" - % self.event) - - def expect_document_end(self): - if isinstance(self.event, DocumentEndEvent): - self.write_indent() - if self.event.explicit: - self.write_indicator('...', True) - self.write_indent() - self.flush_stream() - self.state = self.expect_document_start - else: - raise EmitterError("expected DocumentEndEvent, but got %s" - % self.event) - - def expect_document_root(self): - self.states.append(self.expect_document_end) - self.expect_node(root=True) - - # Node handlers. - - def expect_node(self, root=False, sequence=False, mapping=False, - simple_key=False): - self.root_context = root - self.sequence_context = sequence - self.mapping_context = mapping - self.simple_key_context = simple_key - if isinstance(self.event, AliasEvent): - self.expect_alias() - elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)): - self.process_anchor('&') - self.process_tag() - if isinstance(self.event, ScalarEvent): - self.expect_scalar() - elif isinstance(self.event, SequenceStartEvent): - if self.flow_level or self.canonical or self.event.flow_style \ - or self.check_empty_sequence(): - self.expect_flow_sequence() - else: - self.expect_block_sequence() - elif isinstance(self.event, MappingStartEvent): - if self.flow_level or self.canonical or self.event.flow_style \ - or self.check_empty_mapping(): - self.expect_flow_mapping() - else: - self.expect_block_mapping() - else: - raise EmitterError("expected NodeEvent, but got %s" % self.event) - - def expect_alias(self): - if self.event.anchor is None: - raise EmitterError("anchor is not specified for alias") - self.process_anchor('*') - self.state = self.states.pop() - - def expect_scalar(self): - self.increase_indent(flow=True) - self.process_scalar() - self.indent = self.indents.pop() - self.state = self.states.pop() - - # Flow sequence handlers. - - def expect_flow_sequence(self): - self.write_indicator('[', True, whitespace=True) - self.flow_level += 1 - self.increase_indent(flow=True) - self.state = self.expect_first_flow_sequence_item - - def expect_first_flow_sequence_item(self): - if isinstance(self.event, SequenceEndEvent): - self.indent = self.indents.pop() - self.flow_level -= 1 - self.write_indicator(']', False) - self.state = self.states.pop() - else: - if self.canonical or self.column > self.best_width: - self.write_indent() - self.states.append(self.expect_flow_sequence_item) - self.expect_node(sequence=True) - - def expect_flow_sequence_item(self): - if isinstance(self.event, SequenceEndEvent): - self.indent = self.indents.pop() - self.flow_level -= 1 - if self.canonical: - self.write_indicator(',', False) - self.write_indent() - self.write_indicator(']', False) - self.state = self.states.pop() - else: - self.write_indicator(',', False) - if self.canonical or self.column > self.best_width: - self.write_indent() - self.states.append(self.expect_flow_sequence_item) - self.expect_node(sequence=True) - - # Flow mapping handlers. - - def expect_flow_mapping(self): - self.write_indicator('{', True, whitespace=True) - self.flow_level += 1 - self.increase_indent(flow=True) - self.state = self.expect_first_flow_mapping_key - - def expect_first_flow_mapping_key(self): - if isinstance(self.event, MappingEndEvent): - self.indent = self.indents.pop() - self.flow_level -= 1 - self.write_indicator('}', False) - self.state = self.states.pop() - else: - if self.canonical or self.column > self.best_width: - self.write_indent() - if not self.canonical and self.check_simple_key(): - self.states.append(self.expect_flow_mapping_simple_value) - self.expect_node(mapping=True, simple_key=True) - else: - self.write_indicator('?', True) - self.states.append(self.expect_flow_mapping_value) - self.expect_node(mapping=True) - - def expect_flow_mapping_key(self): - if isinstance(self.event, MappingEndEvent): - self.indent = self.indents.pop() - self.flow_level -= 1 - if self.canonical: - self.write_indicator(',', False) - self.write_indent() - self.write_indicator('}', False) - self.state = self.states.pop() - else: - self.write_indicator(',', False) - if self.canonical or self.column > self.best_width: - self.write_indent() - if not self.canonical and self.check_simple_key(): - self.states.append(self.expect_flow_mapping_simple_value) - self.expect_node(mapping=True, simple_key=True) - else: - self.write_indicator('?', True) - self.states.append(self.expect_flow_mapping_value) - self.expect_node(mapping=True) - - def expect_flow_mapping_simple_value(self): - self.write_indicator(':', False) - self.states.append(self.expect_flow_mapping_key) - self.expect_node(mapping=True) - - def expect_flow_mapping_value(self): - if self.canonical or self.column > self.best_width: - self.write_indent() - self.write_indicator(':', True) - self.states.append(self.expect_flow_mapping_key) - self.expect_node(mapping=True) - - # Block sequence handlers. - - def expect_block_sequence(self): - indentless = (self.mapping_context and not self.indention) - self.increase_indent(flow=False, indentless=indentless) - self.state = self.expect_first_block_sequence_item - - def expect_first_block_sequence_item(self): - return self.expect_block_sequence_item(first=True) - - def expect_block_sequence_item(self, first=False): - if not first and isinstance(self.event, SequenceEndEvent): - self.indent = self.indents.pop() - self.state = self.states.pop() - else: - self.write_indent() - self.write_indicator('-', True, indention=True) - self.states.append(self.expect_block_sequence_item) - self.expect_node(sequence=True) - - # Block mapping handlers. - - def expect_block_mapping(self): - self.increase_indent(flow=False) - self.state = self.expect_first_block_mapping_key - - def expect_first_block_mapping_key(self): - return self.expect_block_mapping_key(first=True) - - def expect_block_mapping_key(self, first=False): - if not first and isinstance(self.event, MappingEndEvent): - self.indent = self.indents.pop() - self.state = self.states.pop() - else: - self.write_indent() - if self.check_simple_key(): - self.states.append(self.expect_block_mapping_simple_value) - self.expect_node(mapping=True, simple_key=True) - else: - self.write_indicator('?', True, indention=True) - self.states.append(self.expect_block_mapping_value) - self.expect_node(mapping=True) - - def expect_block_mapping_simple_value(self): - self.write_indicator(':', False) - self.states.append(self.expect_block_mapping_key) - self.expect_node(mapping=True) - - def expect_block_mapping_value(self): - self.write_indent() - self.write_indicator(':', True, indention=True) - self.states.append(self.expect_block_mapping_key) - self.expect_node(mapping=True) - - # Checkers. - - def check_empty_sequence(self): - return (isinstance(self.event, SequenceStartEvent) and self.events - and isinstance(self.events[0], SequenceEndEvent)) - - def check_empty_mapping(self): - return (isinstance(self.event, MappingStartEvent) and self.events - and isinstance(self.events[0], MappingEndEvent)) - - def check_empty_document(self): - if not isinstance(self.event, DocumentStartEvent) or not self.events: - return False - event = self.events[0] - return (isinstance(event, ScalarEvent) and event.anchor is None - and event.tag is None and event.implicit and event.value == '') - - def check_simple_key(self): - length = 0 - if isinstance(self.event, NodeEvent) and self.event.anchor is not None: - if self.prepared_anchor is None: - self.prepared_anchor = self.prepare_anchor(self.event.anchor) - length += len(self.prepared_anchor) - if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \ - and self.event.tag is not None: - if self.prepared_tag is None: - self.prepared_tag = self.prepare_tag(self.event.tag) - length += len(self.prepared_tag) - if isinstance(self.event, ScalarEvent): - if self.analysis is None: - self.analysis = self.analyze_scalar(self.event.value) - length += len(self.analysis.scalar) - return (length < 128 and (isinstance(self.event, AliasEvent) - or (isinstance(self.event, ScalarEvent) - and not self.analysis.empty and not self.analysis.multiline) - or self.check_empty_sequence() or self.check_empty_mapping())) - - # Anchor, Tag, and Scalar processors. - - def process_anchor(self, indicator): - if self.event.anchor is None: - self.prepared_anchor = None - return - if self.prepared_anchor is None: - self.prepared_anchor = self.prepare_anchor(self.event.anchor) - if self.prepared_anchor: - self.write_indicator(indicator+self.prepared_anchor, True) - self.prepared_anchor = None - - def process_tag(self): - tag = self.event.tag - if isinstance(self.event, ScalarEvent): - if self.style is None: - self.style = self.choose_scalar_style() - if ((not self.canonical or tag is None) and - ((self.style == '' and self.event.implicit[0]) - or (self.style != '' and self.event.implicit[1]))): - self.prepared_tag = None - return - if self.event.implicit[0] and tag is None: - tag = '!' - self.prepared_tag = None - else: - if (not self.canonical or tag is None) and self.event.implicit: - self.prepared_tag = None - return - if tag is None: - raise EmitterError("tag is not specified") - if self.prepared_tag is None: - self.prepared_tag = self.prepare_tag(tag) - if self.prepared_tag: - self.write_indicator(self.prepared_tag, True) - self.prepared_tag = None - - def choose_scalar_style(self): - if self.analysis is None: - self.analysis = self.analyze_scalar(self.event.value) - if self.event.style == '"' or self.canonical: - return '"' - if not self.event.style and self.event.implicit[0]: - if (not (self.simple_key_context and - (self.analysis.empty or self.analysis.multiline)) - and (self.flow_level and self.analysis.allow_flow_plain - or (not self.flow_level and self.analysis.allow_block_plain))): - return '' - if self.event.style and self.event.style in '|>': - if (not self.flow_level and not self.simple_key_context - and self.analysis.allow_block): - return self.event.style - if not self.event.style or self.event.style == '\'': - if (self.analysis.allow_single_quoted and - not (self.simple_key_context and self.analysis.multiline)): - return '\'' - return '"' - - def process_scalar(self): - if self.analysis is None: - self.analysis = self.analyze_scalar(self.event.value) - if self.style is None: - self.style = self.choose_scalar_style() - split = (not self.simple_key_context) - #if self.analysis.multiline and split \ - # and (not self.style or self.style in '\'\"'): - # self.write_indent() - if self.style == '"': - self.write_double_quoted(self.analysis.scalar, split) - elif self.style == '\'': - self.write_single_quoted(self.analysis.scalar, split) - elif self.style == '>': - self.write_folded(self.analysis.scalar) - elif self.style == '|': - self.write_literal(self.analysis.scalar) - else: - self.write_plain(self.analysis.scalar, split) - self.analysis = None - self.style = None - - # Analyzers. - - def prepare_version(self, version): - major, minor = version - if major != 1: - raise EmitterError("unsupported YAML version: %d.%d" % (major, minor)) - return '%d.%d' % (major, minor) - - def prepare_tag_handle(self, handle): - if not handle: - raise EmitterError("tag handle must not be empty") - if handle[0] != '!' or handle[-1] != '!': - raise EmitterError("tag handle must start and end with '!': %r" % handle) - for ch in handle[1:-1]: - if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ - or ch in '-_'): - raise EmitterError("invalid character %r in the tag handle: %r" - % (ch, handle)) - return handle - - def prepare_tag_prefix(self, prefix): - if not prefix: - raise EmitterError("tag prefix must not be empty") - chunks = [] - start = end = 0 - if prefix[0] == '!': - end = 1 - while end < len(prefix): - ch = prefix[end] - if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ - or ch in '-;/?!:@&=+$,_.~*\'()[]': - end += 1 - else: - if start < end: - chunks.append(prefix[start:end]) - start = end = end+1 - data = ch.encode('utf-8') - for ch in data: - chunks.append('%%%02X' % ord(ch)) - if start < end: - chunks.append(prefix[start:end]) - return ''.join(chunks) - - def prepare_tag(self, tag): - if not tag: - raise EmitterError("tag must not be empty") - if tag == '!': - return tag - handle = None - suffix = tag - prefixes = sorted(self.tag_prefixes.keys()) - for prefix in prefixes: - if tag.startswith(prefix) \ - and (prefix == '!' or len(prefix) < len(tag)): - handle = self.tag_prefixes[prefix] - suffix = tag[len(prefix):] - chunks = [] - start = end = 0 - while end < len(suffix): - ch = suffix[end] - if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ - or ch in '-;/?:@&=+$,_.~*\'()[]' \ - or (ch == '!' and handle != '!'): - end += 1 - else: - if start < end: - chunks.append(suffix[start:end]) - start = end = end+1 - data = ch.encode('utf-8') - for ch in data: - chunks.append('%%%02X' % ch) - if start < end: - chunks.append(suffix[start:end]) - suffix_text = ''.join(chunks) - if handle: - return '%s%s' % (handle, suffix_text) - else: - return '!<%s>' % suffix_text - - def prepare_anchor(self, anchor): - if not anchor: - raise EmitterError("anchor must not be empty") - for ch in anchor: - if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ - or ch in '-_'): - raise EmitterError("invalid character %r in the anchor: %r" - % (ch, anchor)) - return anchor - - def analyze_scalar(self, scalar): - - # Empty scalar is a special case. - if not scalar: - return ScalarAnalysis(scalar=scalar, empty=True, multiline=False, - allow_flow_plain=False, allow_block_plain=True, - allow_single_quoted=True, allow_double_quoted=True, - allow_block=False) - - # Indicators and special characters. - block_indicators = False - flow_indicators = False - line_breaks = False - special_characters = False - - # Important whitespace combinations. - leading_space = False - leading_break = False - trailing_space = False - trailing_break = False - break_space = False - space_break = False - - # Check document indicators. - if scalar.startswith('---') or scalar.startswith('...'): - block_indicators = True - flow_indicators = True - - # First character or preceded by a whitespace. - preceded_by_whitespace = True - - # Last character or followed by a whitespace. - followed_by_whitespace = (len(scalar) == 1 or - scalar[1] in '\0 \t\r\n\x85\u2028\u2029') - - # The previous character is a space. - previous_space = False - - # The previous character is a break. - previous_break = False - - index = 0 - while index < len(scalar): - ch = scalar[index] - - # Check for indicators. - if index == 0: - # Leading indicators are special characters. - if ch in '#,[]{}&*!|>\'\"%@`': - flow_indicators = True - block_indicators = True - if ch in '?:': - flow_indicators = True - if followed_by_whitespace: - block_indicators = True - if ch == '-' and followed_by_whitespace: - flow_indicators = True - block_indicators = True - else: - # Some indicators cannot appear within a scalar as well. - if ch in ',?[]{}': - flow_indicators = True - if ch == ':': - flow_indicators = True - if followed_by_whitespace: - block_indicators = True - if ch == '#' and preceded_by_whitespace: - flow_indicators = True - block_indicators = True - - # Check for line breaks, special, and unicode characters. - if ch in '\n\x85\u2028\u2029': - line_breaks = True - if not (ch == '\n' or '\x20' <= ch <= '\x7E'): - if (ch == '\x85' or '\xA0' <= ch <= '\uD7FF' - or '\uE000' <= ch <= '\uFFFD' - or '\U00010000' <= ch < '\U0010ffff') and ch != '\uFEFF': - unicode_characters = True - if not self.allow_unicode: - special_characters = True - else: - special_characters = True - - # Detect important whitespace combinations. - if ch == ' ': - if index == 0: - leading_space = True - if index == len(scalar)-1: - trailing_space = True - if previous_break: - break_space = True - previous_space = True - previous_break = False - elif ch in '\n\x85\u2028\u2029': - if index == 0: - leading_break = True - if index == len(scalar)-1: - trailing_break = True - if previous_space: - space_break = True - previous_space = False - previous_break = True - else: - previous_space = False - previous_break = False - - # Prepare for the next character. - index += 1 - preceded_by_whitespace = (ch in '\0 \t\r\n\x85\u2028\u2029') - followed_by_whitespace = (index+1 >= len(scalar) or - scalar[index+1] in '\0 \t\r\n\x85\u2028\u2029') - - # Let's decide what styles are allowed. - allow_flow_plain = True - allow_block_plain = True - allow_single_quoted = True - allow_double_quoted = True - allow_block = True - - # Leading and trailing whitespaces are bad for plain scalars. - if (leading_space or leading_break - or trailing_space or trailing_break): - allow_flow_plain = allow_block_plain = False - - # We do not permit trailing spaces for block scalars. - if trailing_space: - allow_block = False - - # Spaces at the beginning of a new line are only acceptable for block - # scalars. - if break_space: - allow_flow_plain = allow_block_plain = allow_single_quoted = False - - # Spaces followed by breaks, as well as special character are only - # allowed for double quoted scalars. - if space_break or special_characters: - allow_flow_plain = allow_block_plain = \ - allow_single_quoted = allow_block = False - - # Although the plain scalar writer supports breaks, we never emit - # multiline plain scalars. - if line_breaks: - allow_flow_plain = allow_block_plain = False - - # Flow indicators are forbidden for flow plain scalars. - if flow_indicators: - allow_flow_plain = False - - # Block indicators are forbidden for block plain scalars. - if block_indicators: - allow_block_plain = False - - return ScalarAnalysis(scalar=scalar, - empty=False, multiline=line_breaks, - allow_flow_plain=allow_flow_plain, - allow_block_plain=allow_block_plain, - allow_single_quoted=allow_single_quoted, - allow_double_quoted=allow_double_quoted, - allow_block=allow_block) - - # Writers. - - def flush_stream(self): - if hasattr(self.stream, 'flush'): - self.stream.flush() - - def write_stream_start(self): - # Write BOM if needed. - if self.encoding and self.encoding.startswith('utf-16'): - self.stream.write('\uFEFF'.encode(self.encoding)) - - def write_stream_end(self): - self.flush_stream() - - def write_indicator(self, indicator, need_whitespace, - whitespace=False, indention=False): - if self.whitespace or not need_whitespace: - data = indicator - else: - data = ' '+indicator - self.whitespace = whitespace - self.indention = self.indention and indention - self.column += len(data) - self.open_ended = False - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - - def write_indent(self): - indent = self.indent or 0 - if not self.indention or self.column > indent \ - or (self.column == indent and not self.whitespace): - self.write_line_break() - if self.column < indent: - self.whitespace = True - data = ' '*(indent-self.column) - self.column = indent - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - - def write_line_break(self, data=None): - if data is None: - data = self.best_line_break - self.whitespace = True - self.indention = True - self.line += 1 - self.column = 0 - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - - def write_version_directive(self, version_text): - data = '%%YAML %s' % version_text - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - self.write_line_break() - - def write_tag_directive(self, handle_text, prefix_text): - data = '%%TAG %s %s' % (handle_text, prefix_text) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - self.write_line_break() - - # Scalar streams. - - def write_single_quoted(self, text, split=True): - self.write_indicator('\'', True) - spaces = False - breaks = False - start = end = 0 - while end <= len(text): - ch = None - if end < len(text): - ch = text[end] - if spaces: - if ch is None or ch != ' ': - if start+1 == end and self.column > self.best_width and split \ - and start != 0 and end != len(text): - self.write_indent() - else: - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end - elif breaks: - if ch is None or ch not in '\n\x85\u2028\u2029': - if text[start] == '\n': - self.write_line_break() - for br in text[start:end]: - if br == '\n': - self.write_line_break() - else: - self.write_line_break(br) - self.write_indent() - start = end - else: - if ch is None or ch in ' \n\x85\u2028\u2029' or ch == '\'': - if start < end: - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end - if ch == '\'': - data = '\'\'' - self.column += 2 - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end + 1 - if ch is not None: - spaces = (ch == ' ') - breaks = (ch in '\n\x85\u2028\u2029') - end += 1 - self.write_indicator('\'', False) - - ESCAPE_REPLACEMENTS = { - '\0': '0', - '\x07': 'a', - '\x08': 'b', - '\x09': 't', - '\x0A': 'n', - '\x0B': 'v', - '\x0C': 'f', - '\x0D': 'r', - '\x1B': 'e', - '\"': '\"', - '\\': '\\', - '\x85': 'N', - '\xA0': '_', - '\u2028': 'L', - '\u2029': 'P', - } - - def write_double_quoted(self, text, split=True): - self.write_indicator('"', True) - start = end = 0 - while end <= len(text): - ch = None - if end < len(text): - ch = text[end] - if ch is None or ch in '"\\\x85\u2028\u2029\uFEFF' \ - or not ('\x20' <= ch <= '\x7E' - or (self.allow_unicode - and ('\xA0' <= ch <= '\uD7FF' - or '\uE000' <= ch <= '\uFFFD'))): - if start < end: - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end - if ch is not None: - if ch in self.ESCAPE_REPLACEMENTS: - data = '\\'+self.ESCAPE_REPLACEMENTS[ch] - elif ch <= '\xFF': - data = '\\x%02X' % ord(ch) - elif ch <= '\uFFFF': - data = '\\u%04X' % ord(ch) - else: - data = '\\U%08X' % ord(ch) - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end+1 - if 0 < end < len(text)-1 and (ch == ' ' or start >= end) \ - and self.column+(end-start) > self.best_width and split: - data = text[start:end]+'\\' - if start < end: - start = end - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - self.write_indent() - self.whitespace = False - self.indention = False - if text[start] == ' ': - data = '\\' - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - end += 1 - self.write_indicator('"', False) - - def determine_block_hints(self, text): - hints = '' - if text: - if text[0] in ' \n\x85\u2028\u2029': - hints += str(self.best_indent) - if text[-1] not in '\n\x85\u2028\u2029': - hints += '-' - elif len(text) == 1 or text[-2] in '\n\x85\u2028\u2029': - hints += '+' - return hints - - def write_folded(self, text): - hints = self.determine_block_hints(text) - self.write_indicator('>'+hints, True) - if hints[-1:] == '+': - self.open_ended = True - self.write_line_break() - leading_space = True - spaces = False - breaks = True - start = end = 0 - while end <= len(text): - ch = None - if end < len(text): - ch = text[end] - if breaks: - if ch is None or ch not in '\n\x85\u2028\u2029': - if not leading_space and ch is not None and ch != ' ' \ - and text[start] == '\n': - self.write_line_break() - leading_space = (ch == ' ') - for br in text[start:end]: - if br == '\n': - self.write_line_break() - else: - self.write_line_break(br) - if ch is not None: - self.write_indent() - start = end - elif spaces: - if ch != ' ': - if start+1 == end and self.column > self.best_width: - self.write_indent() - else: - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end - else: - if ch is None or ch in ' \n\x85\u2028\u2029': - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - if ch is None: - self.write_line_break() - start = end - if ch is not None: - breaks = (ch in '\n\x85\u2028\u2029') - spaces = (ch == ' ') - end += 1 - - def write_literal(self, text): - hints = self.determine_block_hints(text) - self.write_indicator('|'+hints, True) - if hints[-1:] == '+': - self.open_ended = True - self.write_line_break() - breaks = True - start = end = 0 - while end <= len(text): - ch = None - if end < len(text): - ch = text[end] - if breaks: - if ch is None or ch not in '\n\x85\u2028\u2029': - for br in text[start:end]: - if br == '\n': - self.write_line_break() - else: - self.write_line_break(br) - if ch is not None: - self.write_indent() - start = end - else: - if ch is None or ch in '\n\x85\u2028\u2029': - data = text[start:end] - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - if ch is None: - self.write_line_break() - start = end - if ch is not None: - breaks = (ch in '\n\x85\u2028\u2029') - end += 1 - - def write_plain(self, text, split=True): - if self.root_context: - self.open_ended = True - if not text: - return - if not self.whitespace: - data = ' ' - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - self.whitespace = False - self.indention = False - spaces = False - breaks = False - start = end = 0 - while end <= len(text): - ch = None - if end < len(text): - ch = text[end] - if spaces: - if ch != ' ': - if start+1 == end and self.column > self.best_width and split: - self.write_indent() - self.whitespace = False - self.indention = False - else: - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end - elif breaks: - if ch not in '\n\x85\u2028\u2029': - if text[start] == '\n': - self.write_line_break() - for br in text[start:end]: - if br == '\n': - self.write_line_break() - else: - self.write_line_break(br) - self.write_indent() - self.whitespace = False - self.indention = False - start = end - else: - if ch is None or ch in ' \n\x85\u2028\u2029': - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end - if ch is not None: - spaces = (ch == ' ') - breaks = (ch in '\n\x85\u2028\u2029') - end += 1 diff --git a/lib3/yaml/error.py b/lib3/yaml/error.py deleted file mode 100644 index b796b4d..0000000 --- a/lib3/yaml/error.py +++ /dev/null @@ -1,75 +0,0 @@ - -__all__ = ['Mark', 'YAMLError', 'MarkedYAMLError'] - -class Mark: - - def __init__(self, name, index, line, column, buffer, pointer): - self.name = name - self.index = index - self.line = line - self.column = column - self.buffer = buffer - self.pointer = pointer - - def get_snippet(self, indent=4, max_length=75): - if self.buffer is None: - return None - head = '' - start = self.pointer - while start > 0 and self.buffer[start-1] not in '\0\r\n\x85\u2028\u2029': - start -= 1 - if self.pointer-start > max_length/2-1: - head = ' ... ' - start += 5 - break - tail = '' - end = self.pointer - while end < len(self.buffer) and self.buffer[end] not in '\0\r\n\x85\u2028\u2029': - end += 1 - if end-self.pointer > max_length/2-1: - tail = ' ... ' - end -= 5 - break - snippet = self.buffer[start:end] - return ' '*indent + head + snippet + tail + '\n' \ - + ' '*(indent+self.pointer-start+len(head)) + '^' - - def __str__(self): - snippet = self.get_snippet() - where = " in \"%s\", line %d, column %d" \ - % (self.name, self.line+1, self.column+1) - if snippet is not None: - where += ":\n"+snippet - return where - -class YAMLError(Exception): - pass - -class MarkedYAMLError(YAMLError): - - def __init__(self, context=None, context_mark=None, - problem=None, problem_mark=None, note=None): - self.context = context - self.context_mark = context_mark - self.problem = problem - self.problem_mark = problem_mark - self.note = note - - def __str__(self): - lines = [] - if self.context is not None: - lines.append(self.context) - if self.context_mark is not None \ - and (self.problem is None or self.problem_mark is None - or self.context_mark.name != self.problem_mark.name - or self.context_mark.line != self.problem_mark.line - or self.context_mark.column != self.problem_mark.column): - lines.append(str(self.context_mark)) - if self.problem is not None: - lines.append(self.problem) - if self.problem_mark is not None: - lines.append(str(self.problem_mark)) - if self.note is not None: - lines.append(self.note) - return '\n'.join(lines) - diff --git a/lib3/yaml/events.py b/lib3/yaml/events.py deleted file mode 100644 index f79ad38..0000000 --- a/lib3/yaml/events.py +++ /dev/null @@ -1,86 +0,0 @@ - -# Abstract classes. - -class Event(object): - def __init__(self, start_mark=None, end_mark=None): - self.start_mark = start_mark - self.end_mark = end_mark - def __repr__(self): - attributes = [key for key in ['anchor', 'tag', 'implicit', 'value'] - if hasattr(self, key)] - arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) - for key in attributes]) - return '%s(%s)' % (self.__class__.__name__, arguments) - -class NodeEvent(Event): - def __init__(self, anchor, start_mark=None, end_mark=None): - self.anchor = anchor - self.start_mark = start_mark - self.end_mark = end_mark - -class CollectionStartEvent(NodeEvent): - def __init__(self, anchor, tag, implicit, start_mark=None, end_mark=None, - flow_style=None): - self.anchor = anchor - self.tag = tag - self.implicit = implicit - self.start_mark = start_mark - self.end_mark = end_mark - self.flow_style = flow_style - -class CollectionEndEvent(Event): - pass - -# Implementations. - -class StreamStartEvent(Event): - def __init__(self, start_mark=None, end_mark=None, encoding=None): - self.start_mark = start_mark - self.end_mark = end_mark - self.encoding = encoding - -class StreamEndEvent(Event): - pass - -class DocumentStartEvent(Event): - def __init__(self, start_mark=None, end_mark=None, - explicit=None, version=None, tags=None): - self.start_mark = start_mark - self.end_mark = end_mark - self.explicit = explicit - self.version = version - self.tags = tags - -class DocumentEndEvent(Event): - def __init__(self, start_mark=None, end_mark=None, - explicit=None): - self.start_mark = start_mark - self.end_mark = end_mark - self.explicit = explicit - -class AliasEvent(NodeEvent): - pass - -class ScalarEvent(NodeEvent): - def __init__(self, anchor, tag, implicit, value, - start_mark=None, end_mark=None, style=None): - self.anchor = anchor - self.tag = tag - self.implicit = implicit - self.value = value - self.start_mark = start_mark - self.end_mark = end_mark - self.style = style - -class SequenceStartEvent(CollectionStartEvent): - pass - -class SequenceEndEvent(CollectionEndEvent): - pass - -class MappingStartEvent(CollectionStartEvent): - pass - -class MappingEndEvent(CollectionEndEvent): - pass - diff --git a/lib3/yaml/loader.py b/lib3/yaml/loader.py deleted file mode 100644 index e90c112..0000000 --- a/lib3/yaml/loader.py +++ /dev/null @@ -1,63 +0,0 @@ - -__all__ = ['BaseLoader', 'FullLoader', 'SafeLoader', 'Loader', 'UnsafeLoader'] - -from .reader import * -from .scanner import * -from .parser import * -from .composer import * -from .constructor import * -from .resolver import * - -class BaseLoader(Reader, Scanner, Parser, Composer, BaseConstructor, BaseResolver): - - def __init__(self, stream): - Reader.__init__(self, stream) - Scanner.__init__(self) - Parser.__init__(self) - Composer.__init__(self) - BaseConstructor.__init__(self) - BaseResolver.__init__(self) - -class FullLoader(Reader, Scanner, Parser, Composer, FullConstructor, Resolver): - - def __init__(self, stream): - Reader.__init__(self, stream) - Scanner.__init__(self) - Parser.__init__(self) - Composer.__init__(self) - FullConstructor.__init__(self) - Resolver.__init__(self) - -class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver): - - def __init__(self, stream): - Reader.__init__(self, stream) - Scanner.__init__(self) - Parser.__init__(self) - Composer.__init__(self) - SafeConstructor.__init__(self) - Resolver.__init__(self) - -class Loader(Reader, Scanner, Parser, Composer, Constructor, Resolver): - - def __init__(self, stream): - Reader.__init__(self, stream) - Scanner.__init__(self) - Parser.__init__(self) - Composer.__init__(self) - Constructor.__init__(self) - Resolver.__init__(self) - -# UnsafeLoader is the same as Loader (which is and was always unsafe on -# untrusted input). Use of either Loader or UnsafeLoader should be rare, since -# FullLoad should be able to load almost all YAML safely. Loader is left intact -# to ensure backwards compatibility. -class UnsafeLoader(Reader, Scanner, Parser, Composer, Constructor, Resolver): - - def __init__(self, stream): - Reader.__init__(self, stream) - Scanner.__init__(self) - Parser.__init__(self) - Composer.__init__(self) - Constructor.__init__(self) - Resolver.__init__(self) diff --git a/lib3/yaml/nodes.py b/lib3/yaml/nodes.py deleted file mode 100644 index c4f070c..0000000 --- a/lib3/yaml/nodes.py +++ /dev/null @@ -1,49 +0,0 @@ - -class Node(object): - def __init__(self, tag, value, start_mark, end_mark): - self.tag = tag - self.value = value - self.start_mark = start_mark - self.end_mark = end_mark - def __repr__(self): - value = self.value - #if isinstance(value, list): - # if len(value) == 0: - # value = '' - # elif len(value) == 1: - # value = '<1 item>' - # else: - # value = '<%d items>' % len(value) - #else: - # if len(value) > 75: - # value = repr(value[:70]+u' ... ') - # else: - # value = repr(value) - value = repr(value) - return '%s(tag=%r, value=%s)' % (self.__class__.__name__, self.tag, value) - -class ScalarNode(Node): - id = 'scalar' - def __init__(self, tag, value, - start_mark=None, end_mark=None, style=None): - self.tag = tag - self.value = value - self.start_mark = start_mark - self.end_mark = end_mark - self.style = style - -class CollectionNode(Node): - def __init__(self, tag, value, - start_mark=None, end_mark=None, flow_style=None): - self.tag = tag - self.value = value - self.start_mark = start_mark - self.end_mark = end_mark - self.flow_style = flow_style - -class SequenceNode(CollectionNode): - id = 'sequence' - -class MappingNode(CollectionNode): - id = 'mapping' - diff --git a/lib3/yaml/parser.py b/lib3/yaml/parser.py deleted file mode 100644 index 13a5995..0000000 --- a/lib3/yaml/parser.py +++ /dev/null @@ -1,589 +0,0 @@ - -# The following YAML grammar is LL(1) and is parsed by a recursive descent -# parser. -# -# stream ::= STREAM-START implicit_document? explicit_document* STREAM-END -# implicit_document ::= block_node DOCUMENT-END* -# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* -# block_node_or_indentless_sequence ::= -# ALIAS -# | properties (block_content | indentless_block_sequence)? -# | block_content -# | indentless_block_sequence -# block_node ::= ALIAS -# | properties block_content? -# | block_content -# flow_node ::= ALIAS -# | properties flow_content? -# | flow_content -# properties ::= TAG ANCHOR? | ANCHOR TAG? -# block_content ::= block_collection | flow_collection | SCALAR -# flow_content ::= flow_collection | SCALAR -# block_collection ::= block_sequence | block_mapping -# flow_collection ::= flow_sequence | flow_mapping -# block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END -# indentless_sequence ::= (BLOCK-ENTRY block_node?)+ -# block_mapping ::= BLOCK-MAPPING_START -# ((KEY block_node_or_indentless_sequence?)? -# (VALUE block_node_or_indentless_sequence?)?)* -# BLOCK-END -# flow_sequence ::= FLOW-SEQUENCE-START -# (flow_sequence_entry FLOW-ENTRY)* -# flow_sequence_entry? -# FLOW-SEQUENCE-END -# flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? -# flow_mapping ::= FLOW-MAPPING-START -# (flow_mapping_entry FLOW-ENTRY)* -# flow_mapping_entry? -# FLOW-MAPPING-END -# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? -# -# FIRST sets: -# -# stream: { STREAM-START } -# explicit_document: { DIRECTIVE DOCUMENT-START } -# implicit_document: FIRST(block_node) -# block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START } -# flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START } -# block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } -# flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } -# block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START } -# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } -# block_sequence: { BLOCK-SEQUENCE-START } -# block_mapping: { BLOCK-MAPPING-START } -# block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY } -# indentless_sequence: { ENTRY } -# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } -# flow_sequence: { FLOW-SEQUENCE-START } -# flow_mapping: { FLOW-MAPPING-START } -# flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } -# flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } - -__all__ = ['Parser', 'ParserError'] - -from .error import MarkedYAMLError -from .tokens import * -from .events import * -from .scanner import * - -class ParserError(MarkedYAMLError): - pass - -class Parser: - # Since writing a recursive-descendant parser is a straightforward task, we - # do not give many comments here. - - DEFAULT_TAGS = { - '!': '!', - '!!': 'tag:yaml.org,2002:', - } - - def __init__(self): - self.current_event = None - self.yaml_version = None - self.tag_handles = {} - self.states = [] - self.marks = [] - self.state = self.parse_stream_start - - def dispose(self): - # Reset the state attributes (to clear self-references) - self.states = [] - self.state = None - - def check_event(self, *choices): - # Check the type of the next event. - if self.current_event is None: - if self.state: - self.current_event = self.state() - if self.current_event is not None: - if not choices: - return True - for choice in choices: - if isinstance(self.current_event, choice): - return True - return False - - def peek_event(self): - # Get the next event. - if self.current_event is None: - if self.state: - self.current_event = self.state() - return self.current_event - - def get_event(self): - # Get the next event and proceed further. - if self.current_event is None: - if self.state: - self.current_event = self.state() - value = self.current_event - self.current_event = None - return value - - # stream ::= STREAM-START implicit_document? explicit_document* STREAM-END - # implicit_document ::= block_node DOCUMENT-END* - # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* - - def parse_stream_start(self): - - # Parse the stream start. - token = self.get_token() - event = StreamStartEvent(token.start_mark, token.end_mark, - encoding=token.encoding) - - # Prepare the next state. - self.state = self.parse_implicit_document_start - - return event - - def parse_implicit_document_start(self): - - # Parse an implicit document. - if not self.check_token(DirectiveToken, DocumentStartToken, - StreamEndToken): - self.tag_handles = self.DEFAULT_TAGS - token = self.peek_token() - start_mark = end_mark = token.start_mark - event = DocumentStartEvent(start_mark, end_mark, - explicit=False) - - # Prepare the next state. - self.states.append(self.parse_document_end) - self.state = self.parse_block_node - - return event - - else: - return self.parse_document_start() - - def parse_document_start(self): - - # Parse any extra document end indicators. - while self.check_token(DocumentEndToken): - self.get_token() - - # Parse an explicit document. - if not self.check_token(StreamEndToken): - token = self.peek_token() - start_mark = token.start_mark - version, tags = self.process_directives() - if not self.check_token(DocumentStartToken): - raise ParserError(None, None, - "expected '', but found %r" - % self.peek_token().id, - self.peek_token().start_mark) - token = self.get_token() - end_mark = token.end_mark - event = DocumentStartEvent(start_mark, end_mark, - explicit=True, version=version, tags=tags) - self.states.append(self.parse_document_end) - self.state = self.parse_document_content - else: - # Parse the end of the stream. - token = self.get_token() - event = StreamEndEvent(token.start_mark, token.end_mark) - assert not self.states - assert not self.marks - self.state = None - return event - - def parse_document_end(self): - - # Parse the document end. - token = self.peek_token() - start_mark = end_mark = token.start_mark - explicit = False - if self.check_token(DocumentEndToken): - token = self.get_token() - end_mark = token.end_mark - explicit = True - event = DocumentEndEvent(start_mark, end_mark, - explicit=explicit) - - # Prepare the next state. - self.state = self.parse_document_start - - return event - - def parse_document_content(self): - if self.check_token(DirectiveToken, - DocumentStartToken, DocumentEndToken, StreamEndToken): - event = self.process_empty_scalar(self.peek_token().start_mark) - self.state = self.states.pop() - return event - else: - return self.parse_block_node() - - def process_directives(self): - self.yaml_version = None - self.tag_handles = {} - while self.check_token(DirectiveToken): - token = self.get_token() - if token.name == 'YAML': - if self.yaml_version is not None: - raise ParserError(None, None, - "found duplicate YAML directive", token.start_mark) - major, minor = token.value - if major != 1: - raise ParserError(None, None, - "found incompatible YAML document (version 1.* is required)", - token.start_mark) - self.yaml_version = token.value - elif token.name == 'TAG': - handle, prefix = token.value - if handle in self.tag_handles: - raise ParserError(None, None, - "duplicate tag handle %r" % handle, - token.start_mark) - self.tag_handles[handle] = prefix - if self.tag_handles: - value = self.yaml_version, self.tag_handles.copy() - else: - value = self.yaml_version, None - for key in self.DEFAULT_TAGS: - if key not in self.tag_handles: - self.tag_handles[key] = self.DEFAULT_TAGS[key] - return value - - # block_node_or_indentless_sequence ::= ALIAS - # | properties (block_content | indentless_block_sequence)? - # | block_content - # | indentless_block_sequence - # block_node ::= ALIAS - # | properties block_content? - # | block_content - # flow_node ::= ALIAS - # | properties flow_content? - # | flow_content - # properties ::= TAG ANCHOR? | ANCHOR TAG? - # block_content ::= block_collection | flow_collection | SCALAR - # flow_content ::= flow_collection | SCALAR - # block_collection ::= block_sequence | block_mapping - # flow_collection ::= flow_sequence | flow_mapping - - def parse_block_node(self): - return self.parse_node(block=True) - - def parse_flow_node(self): - return self.parse_node() - - def parse_block_node_or_indentless_sequence(self): - return self.parse_node(block=True, indentless_sequence=True) - - def parse_node(self, block=False, indentless_sequence=False): - if self.check_token(AliasToken): - token = self.get_token() - event = AliasEvent(token.value, token.start_mark, token.end_mark) - self.state = self.states.pop() - else: - anchor = None - tag = None - start_mark = end_mark = tag_mark = None - if self.check_token(AnchorToken): - token = self.get_token() - start_mark = token.start_mark - end_mark = token.end_mark - anchor = token.value - if self.check_token(TagToken): - token = self.get_token() - tag_mark = token.start_mark - end_mark = token.end_mark - tag = token.value - elif self.check_token(TagToken): - token = self.get_token() - start_mark = tag_mark = token.start_mark - end_mark = token.end_mark - tag = token.value - if self.check_token(AnchorToken): - token = self.get_token() - end_mark = token.end_mark - anchor = token.value - if tag is not None: - handle, suffix = tag - if handle is not None: - if handle not in self.tag_handles: - raise ParserError("while parsing a node", start_mark, - "found undefined tag handle %r" % handle, - tag_mark) - tag = self.tag_handles[handle]+suffix - else: - tag = suffix - #if tag == '!': - # raise ParserError("while parsing a node", start_mark, - # "found non-specific tag '!'", tag_mark, - # "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' and share your opinion.") - if start_mark is None: - start_mark = end_mark = self.peek_token().start_mark - event = None - implicit = (tag is None or tag == '!') - if indentless_sequence and self.check_token(BlockEntryToken): - end_mark = self.peek_token().end_mark - event = SequenceStartEvent(anchor, tag, implicit, - start_mark, end_mark) - self.state = self.parse_indentless_sequence_entry - else: - if self.check_token(ScalarToken): - token = self.get_token() - end_mark = token.end_mark - if (token.plain and tag is None) or tag == '!': - implicit = (True, False) - elif tag is None: - implicit = (False, True) - else: - implicit = (False, False) - event = ScalarEvent(anchor, tag, implicit, token.value, - start_mark, end_mark, style=token.style) - self.state = self.states.pop() - elif self.check_token(FlowSequenceStartToken): - end_mark = self.peek_token().end_mark - event = SequenceStartEvent(anchor, tag, implicit, - start_mark, end_mark, flow_style=True) - self.state = self.parse_flow_sequence_first_entry - elif self.check_token(FlowMappingStartToken): - end_mark = self.peek_token().end_mark - event = MappingStartEvent(anchor, tag, implicit, - start_mark, end_mark, flow_style=True) - self.state = self.parse_flow_mapping_first_key - elif block and self.check_token(BlockSequenceStartToken): - end_mark = self.peek_token().start_mark - event = SequenceStartEvent(anchor, tag, implicit, - start_mark, end_mark, flow_style=False) - self.state = self.parse_block_sequence_first_entry - elif block and self.check_token(BlockMappingStartToken): - end_mark = self.peek_token().start_mark - event = MappingStartEvent(anchor, tag, implicit, - start_mark, end_mark, flow_style=False) - self.state = self.parse_block_mapping_first_key - elif anchor is not None or tag is not None: - # Empty scalars are allowed even if a tag or an anchor is - # specified. - event = ScalarEvent(anchor, tag, (implicit, False), '', - start_mark, end_mark) - self.state = self.states.pop() - else: - if block: - node = 'block' - else: - node = 'flow' - token = self.peek_token() - raise ParserError("while parsing a %s node" % node, start_mark, - "expected the node content, but found %r" % token.id, - token.start_mark) - return event - - # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END - - def parse_block_sequence_first_entry(self): - token = self.get_token() - self.marks.append(token.start_mark) - return self.parse_block_sequence_entry() - - def parse_block_sequence_entry(self): - if self.check_token(BlockEntryToken): - token = self.get_token() - if not self.check_token(BlockEntryToken, BlockEndToken): - self.states.append(self.parse_block_sequence_entry) - return self.parse_block_node() - else: - self.state = self.parse_block_sequence_entry - return self.process_empty_scalar(token.end_mark) - if not self.check_token(BlockEndToken): - token = self.peek_token() - raise ParserError("while parsing a block collection", self.marks[-1], - "expected , but found %r" % token.id, token.start_mark) - token = self.get_token() - event = SequenceEndEvent(token.start_mark, token.end_mark) - self.state = self.states.pop() - self.marks.pop() - return event - - # indentless_sequence ::= (BLOCK-ENTRY block_node?)+ - - def parse_indentless_sequence_entry(self): - if self.check_token(BlockEntryToken): - token = self.get_token() - if not self.check_token(BlockEntryToken, - KeyToken, ValueToken, BlockEndToken): - self.states.append(self.parse_indentless_sequence_entry) - return self.parse_block_node() - else: - self.state = self.parse_indentless_sequence_entry - return self.process_empty_scalar(token.end_mark) - token = self.peek_token() - event = SequenceEndEvent(token.start_mark, token.start_mark) - self.state = self.states.pop() - return event - - # block_mapping ::= BLOCK-MAPPING_START - # ((KEY block_node_or_indentless_sequence?)? - # (VALUE block_node_or_indentless_sequence?)?)* - # BLOCK-END - - def parse_block_mapping_first_key(self): - token = self.get_token() - self.marks.append(token.start_mark) - return self.parse_block_mapping_key() - - def parse_block_mapping_key(self): - if self.check_token(KeyToken): - token = self.get_token() - if not self.check_token(KeyToken, ValueToken, BlockEndToken): - self.states.append(self.parse_block_mapping_value) - return self.parse_block_node_or_indentless_sequence() - else: - self.state = self.parse_block_mapping_value - return self.process_empty_scalar(token.end_mark) - if not self.check_token(BlockEndToken): - token = self.peek_token() - raise ParserError("while parsing a block mapping", self.marks[-1], - "expected , but found %r" % token.id, token.start_mark) - token = self.get_token() - event = MappingEndEvent(token.start_mark, token.end_mark) - self.state = self.states.pop() - self.marks.pop() - return event - - def parse_block_mapping_value(self): - if self.check_token(ValueToken): - token = self.get_token() - if not self.check_token(KeyToken, ValueToken, BlockEndToken): - self.states.append(self.parse_block_mapping_key) - return self.parse_block_node_or_indentless_sequence() - else: - self.state = self.parse_block_mapping_key - return self.process_empty_scalar(token.end_mark) - else: - self.state = self.parse_block_mapping_key - token = self.peek_token() - return self.process_empty_scalar(token.start_mark) - - # flow_sequence ::= FLOW-SEQUENCE-START - # (flow_sequence_entry FLOW-ENTRY)* - # flow_sequence_entry? - # FLOW-SEQUENCE-END - # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? - # - # Note that while production rules for both flow_sequence_entry and - # flow_mapping_entry are equal, their interpretations are different. - # For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?` - # generate an inline mapping (set syntax). - - def parse_flow_sequence_first_entry(self): - token = self.get_token() - self.marks.append(token.start_mark) - return self.parse_flow_sequence_entry(first=True) - - def parse_flow_sequence_entry(self, first=False): - if not self.check_token(FlowSequenceEndToken): - if not first: - if self.check_token(FlowEntryToken): - self.get_token() - else: - token = self.peek_token() - raise ParserError("while parsing a flow sequence", self.marks[-1], - "expected ',' or ']', but got %r" % token.id, token.start_mark) - - if self.check_token(KeyToken): - token = self.peek_token() - event = MappingStartEvent(None, None, True, - token.start_mark, token.end_mark, - flow_style=True) - self.state = self.parse_flow_sequence_entry_mapping_key - return event - elif not self.check_token(FlowSequenceEndToken): - self.states.append(self.parse_flow_sequence_entry) - return self.parse_flow_node() - token = self.get_token() - event = SequenceEndEvent(token.start_mark, token.end_mark) - self.state = self.states.pop() - self.marks.pop() - return event - - def parse_flow_sequence_entry_mapping_key(self): - token = self.get_token() - if not self.check_token(ValueToken, - FlowEntryToken, FlowSequenceEndToken): - self.states.append(self.parse_flow_sequence_entry_mapping_value) - return self.parse_flow_node() - else: - self.state = self.parse_flow_sequence_entry_mapping_value - return self.process_empty_scalar(token.end_mark) - - def parse_flow_sequence_entry_mapping_value(self): - if self.check_token(ValueToken): - token = self.get_token() - if not self.check_token(FlowEntryToken, FlowSequenceEndToken): - self.states.append(self.parse_flow_sequence_entry_mapping_end) - return self.parse_flow_node() - else: - self.state = self.parse_flow_sequence_entry_mapping_end - return self.process_empty_scalar(token.end_mark) - else: - self.state = self.parse_flow_sequence_entry_mapping_end - token = self.peek_token() - return self.process_empty_scalar(token.start_mark) - - def parse_flow_sequence_entry_mapping_end(self): - self.state = self.parse_flow_sequence_entry - token = self.peek_token() - return MappingEndEvent(token.start_mark, token.start_mark) - - # flow_mapping ::= FLOW-MAPPING-START - # (flow_mapping_entry FLOW-ENTRY)* - # flow_mapping_entry? - # FLOW-MAPPING-END - # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? - - def parse_flow_mapping_first_key(self): - token = self.get_token() - self.marks.append(token.start_mark) - return self.parse_flow_mapping_key(first=True) - - def parse_flow_mapping_key(self, first=False): - if not self.check_token(FlowMappingEndToken): - if not first: - if self.check_token(FlowEntryToken): - self.get_token() - else: - token = self.peek_token() - raise ParserError("while parsing a flow mapping", self.marks[-1], - "expected ',' or '}', but got %r" % token.id, token.start_mark) - if self.check_token(KeyToken): - token = self.get_token() - if not self.check_token(ValueToken, - FlowEntryToken, FlowMappingEndToken): - self.states.append(self.parse_flow_mapping_value) - return self.parse_flow_node() - else: - self.state = self.parse_flow_mapping_value - return self.process_empty_scalar(token.end_mark) - elif not self.check_token(FlowMappingEndToken): - self.states.append(self.parse_flow_mapping_empty_value) - return self.parse_flow_node() - token = self.get_token() - event = MappingEndEvent(token.start_mark, token.end_mark) - self.state = self.states.pop() - self.marks.pop() - return event - - def parse_flow_mapping_value(self): - if self.check_token(ValueToken): - token = self.get_token() - if not self.check_token(FlowEntryToken, FlowMappingEndToken): - self.states.append(self.parse_flow_mapping_key) - return self.parse_flow_node() - else: - self.state = self.parse_flow_mapping_key - return self.process_empty_scalar(token.end_mark) - else: - self.state = self.parse_flow_mapping_key - token = self.peek_token() - return self.process_empty_scalar(token.start_mark) - - def parse_flow_mapping_empty_value(self): - self.state = self.parse_flow_mapping_key - return self.process_empty_scalar(self.peek_token().start_mark) - - def process_empty_scalar(self, mark): - return ScalarEvent(None, None, (True, False), '', mark, mark) - diff --git a/lib3/yaml/reader.py b/lib3/yaml/reader.py deleted file mode 100644 index 774b021..0000000 --- a/lib3/yaml/reader.py +++ /dev/null @@ -1,185 +0,0 @@ -# This module contains abstractions for the input stream. You don't have to -# looks further, there are no pretty code. -# -# We define two classes here. -# -# Mark(source, line, column) -# It's just a record and its only use is producing nice error messages. -# Parser does not use it for any other purposes. -# -# Reader(source, data) -# Reader determines the encoding of `data` and converts it to unicode. -# Reader provides the following methods and attributes: -# reader.peek(length=1) - return the next `length` characters -# reader.forward(length=1) - move the current position to `length` characters. -# reader.index - the number of the current character. -# reader.line, stream.column - the line and the column of the current character. - -__all__ = ['Reader', 'ReaderError'] - -from .error import YAMLError, Mark - -import codecs, re - -class ReaderError(YAMLError): - - def __init__(self, name, position, character, encoding, reason): - self.name = name - self.character = character - self.position = position - self.encoding = encoding - self.reason = reason - - def __str__(self): - if isinstance(self.character, bytes): - return "'%s' codec can't decode byte #x%02x: %s\n" \ - " in \"%s\", position %d" \ - % (self.encoding, ord(self.character), self.reason, - self.name, self.position) - else: - return "unacceptable character #x%04x: %s\n" \ - " in \"%s\", position %d" \ - % (self.character, self.reason, - self.name, self.position) - -class Reader(object): - # Reader: - # - determines the data encoding and converts it to a unicode string, - # - checks if characters are in allowed range, - # - adds '\0' to the end. - - # Reader accepts - # - a `bytes` object, - # - a `str` object, - # - a file-like object with its `read` method returning `str`, - # - a file-like object with its `read` method returning `unicode`. - - # Yeah, it's ugly and slow. - - def __init__(self, stream): - self.name = None - self.stream = None - self.stream_pointer = 0 - self.eof = True - self.buffer = '' - self.pointer = 0 - self.raw_buffer = None - self.raw_decode = None - self.encoding = None - self.index = 0 - self.line = 0 - self.column = 0 - if isinstance(stream, str): - self.name = "" - self.check_printable(stream) - self.buffer = stream+'\0' - elif isinstance(stream, bytes): - self.name = "" - self.raw_buffer = stream - self.determine_encoding() - else: - self.stream = stream - self.name = getattr(stream, 'name', "") - self.eof = False - self.raw_buffer = None - self.determine_encoding() - - def peek(self, index=0): - try: - return self.buffer[self.pointer+index] - except IndexError: - self.update(index+1) - return self.buffer[self.pointer+index] - - def prefix(self, length=1): - if self.pointer+length >= len(self.buffer): - self.update(length) - return self.buffer[self.pointer:self.pointer+length] - - def forward(self, length=1): - if self.pointer+length+1 >= len(self.buffer): - self.update(length+1) - while length: - ch = self.buffer[self.pointer] - self.pointer += 1 - self.index += 1 - if ch in '\n\x85\u2028\u2029' \ - or (ch == '\r' and self.buffer[self.pointer] != '\n'): - self.line += 1 - self.column = 0 - elif ch != '\uFEFF': - self.column += 1 - length -= 1 - - def get_mark(self): - if self.stream is None: - return Mark(self.name, self.index, self.line, self.column, - self.buffer, self.pointer) - else: - return Mark(self.name, self.index, self.line, self.column, - None, None) - - def determine_encoding(self): - while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2): - self.update_raw() - if isinstance(self.raw_buffer, bytes): - if self.raw_buffer.startswith(codecs.BOM_UTF16_LE): - self.raw_decode = codecs.utf_16_le_decode - self.encoding = 'utf-16-le' - elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE): - self.raw_decode = codecs.utf_16_be_decode - self.encoding = 'utf-16-be' - else: - self.raw_decode = codecs.utf_8_decode - self.encoding = 'utf-8' - self.update(1) - - NON_PRINTABLE = re.compile('[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]') - def check_printable(self, data): - match = self.NON_PRINTABLE.search(data) - if match: - character = match.group() - position = self.index+(len(self.buffer)-self.pointer)+match.start() - raise ReaderError(self.name, position, ord(character), - 'unicode', "special characters are not allowed") - - def update(self, length): - if self.raw_buffer is None: - return - self.buffer = self.buffer[self.pointer:] - self.pointer = 0 - while len(self.buffer) < length: - if not self.eof: - self.update_raw() - if self.raw_decode is not None: - try: - data, converted = self.raw_decode(self.raw_buffer, - 'strict', self.eof) - except UnicodeDecodeError as exc: - character = self.raw_buffer[exc.start] - if self.stream is not None: - position = self.stream_pointer-len(self.raw_buffer)+exc.start - else: - position = exc.start - raise ReaderError(self.name, position, character, - exc.encoding, exc.reason) - else: - data = self.raw_buffer - converted = len(data) - self.check_printable(data) - self.buffer += data - self.raw_buffer = self.raw_buffer[converted:] - if self.eof: - self.buffer += '\0' - self.raw_buffer = None - break - - def update_raw(self, size=4096): - data = self.stream.read(size) - if self.raw_buffer is None: - self.raw_buffer = data - else: - self.raw_buffer += data - self.stream_pointer += len(data) - if not data: - self.eof = True diff --git a/lib3/yaml/representer.py b/lib3/yaml/representer.py deleted file mode 100644 index 3b0b192..0000000 --- a/lib3/yaml/representer.py +++ /dev/null @@ -1,389 +0,0 @@ - -__all__ = ['BaseRepresenter', 'SafeRepresenter', 'Representer', - 'RepresenterError'] - -from .error import * -from .nodes import * - -import datetime, copyreg, types, base64, collections - -class RepresenterError(YAMLError): - pass - -class BaseRepresenter: - - yaml_representers = {} - yaml_multi_representers = {} - - def __init__(self, default_style=None, default_flow_style=False, sort_keys=True): - self.default_style = default_style - self.sort_keys = sort_keys - self.default_flow_style = default_flow_style - self.represented_objects = {} - self.object_keeper = [] - self.alias_key = None - - def represent(self, data): - node = self.represent_data(data) - self.serialize(node) - self.represented_objects = {} - self.object_keeper = [] - self.alias_key = None - - def represent_data(self, data): - if self.ignore_aliases(data): - self.alias_key = None - else: - self.alias_key = id(data) - if self.alias_key is not None: - if self.alias_key in self.represented_objects: - node = self.represented_objects[self.alias_key] - #if node is None: - # raise RepresenterError("recursive objects are not allowed: %r" % data) - return node - #self.represented_objects[alias_key] = None - self.object_keeper.append(data) - data_types = type(data).__mro__ - if data_types[0] in self.yaml_representers: - node = self.yaml_representers[data_types[0]](self, data) - else: - for data_type in data_types: - if data_type in self.yaml_multi_representers: - node = self.yaml_multi_representers[data_type](self, data) - break - else: - if None in self.yaml_multi_representers: - node = self.yaml_multi_representers[None](self, data) - elif None in self.yaml_representers: - node = self.yaml_representers[None](self, data) - else: - node = ScalarNode(None, str(data)) - #if alias_key is not None: - # self.represented_objects[alias_key] = node - return node - - @classmethod - def add_representer(cls, data_type, representer): - if not 'yaml_representers' in cls.__dict__: - cls.yaml_representers = cls.yaml_representers.copy() - cls.yaml_representers[data_type] = representer - - @classmethod - def add_multi_representer(cls, data_type, representer): - if not 'yaml_multi_representers' in cls.__dict__: - cls.yaml_multi_representers = cls.yaml_multi_representers.copy() - cls.yaml_multi_representers[data_type] = representer - - def represent_scalar(self, tag, value, style=None): - if style is None: - style = self.default_style - node = ScalarNode(tag, value, style=style) - if self.alias_key is not None: - self.represented_objects[self.alias_key] = node - return node - - def represent_sequence(self, tag, sequence, flow_style=None): - value = [] - node = SequenceNode(tag, value, flow_style=flow_style) - if self.alias_key is not None: - self.represented_objects[self.alias_key] = node - best_style = True - for item in sequence: - node_item = self.represent_data(item) - if not (isinstance(node_item, ScalarNode) and not node_item.style): - best_style = False - value.append(node_item) - if flow_style is None: - if self.default_flow_style is not None: - node.flow_style = self.default_flow_style - else: - node.flow_style = best_style - return node - - def represent_mapping(self, tag, mapping, flow_style=None): - value = [] - node = MappingNode(tag, value, flow_style=flow_style) - if self.alias_key is not None: - self.represented_objects[self.alias_key] = node - best_style = True - if hasattr(mapping, 'items'): - mapping = list(mapping.items()) - if self.sort_keys: - try: - mapping = sorted(mapping) - except TypeError: - pass - for item_key, item_value in mapping: - node_key = self.represent_data(item_key) - node_value = self.represent_data(item_value) - if not (isinstance(node_key, ScalarNode) and not node_key.style): - best_style = False - if not (isinstance(node_value, ScalarNode) and not node_value.style): - best_style = False - value.append((node_key, node_value)) - if flow_style is None: - if self.default_flow_style is not None: - node.flow_style = self.default_flow_style - else: - node.flow_style = best_style - return node - - def ignore_aliases(self, data): - return False - -class SafeRepresenter(BaseRepresenter): - - def ignore_aliases(self, data): - if data is None: - return True - if isinstance(data, tuple) and data == (): - return True - if isinstance(data, (str, bytes, bool, int, float)): - return True - - def represent_none(self, data): - return self.represent_scalar('tag:yaml.org,2002:null', 'null') - - def represent_str(self, data): - return self.represent_scalar('tag:yaml.org,2002:str', data) - - def represent_binary(self, data): - if hasattr(base64, 'encodebytes'): - data = base64.encodebytes(data).decode('ascii') - else: - data = base64.encodestring(data).decode('ascii') - return self.represent_scalar('tag:yaml.org,2002:binary', data, style='|') - - def represent_bool(self, data): - if data: - value = 'true' - else: - value = 'false' - return self.represent_scalar('tag:yaml.org,2002:bool', value) - - def represent_int(self, data): - return self.represent_scalar('tag:yaml.org,2002:int', str(data)) - - inf_value = 1e300 - while repr(inf_value) != repr(inf_value*inf_value): - inf_value *= inf_value - - def represent_float(self, data): - if data != data or (data == 0.0 and data == 1.0): - value = '.nan' - elif data == self.inf_value: - value = '.inf' - elif data == -self.inf_value: - value = '-.inf' - else: - value = repr(data).lower() - # Note that in some cases `repr(data)` represents a float number - # without the decimal parts. For instance: - # >>> repr(1e17) - # '1e17' - # Unfortunately, this is not a valid float representation according - # to the definition of the `!!float` tag. We fix this by adding - # '.0' before the 'e' symbol. - if '.' not in value and 'e' in value: - value = value.replace('e', '.0e', 1) - return self.represent_scalar('tag:yaml.org,2002:float', value) - - def represent_list(self, data): - #pairs = (len(data) > 0 and isinstance(data, list)) - #if pairs: - # for item in data: - # if not isinstance(item, tuple) or len(item) != 2: - # pairs = False - # break - #if not pairs: - return self.represent_sequence('tag:yaml.org,2002:seq', data) - #value = [] - #for item_key, item_value in data: - # value.append(self.represent_mapping(u'tag:yaml.org,2002:map', - # [(item_key, item_value)])) - #return SequenceNode(u'tag:yaml.org,2002:pairs', value) - - def represent_dict(self, data): - return self.represent_mapping('tag:yaml.org,2002:map', data) - - def represent_set(self, data): - value = {} - for key in data: - value[key] = None - return self.represent_mapping('tag:yaml.org,2002:set', value) - - def represent_date(self, data): - value = data.isoformat() - return self.represent_scalar('tag:yaml.org,2002:timestamp', value) - - def represent_datetime(self, data): - value = data.isoformat(' ') - return self.represent_scalar('tag:yaml.org,2002:timestamp', value) - - def represent_yaml_object(self, tag, data, cls, flow_style=None): - if hasattr(data, '__getstate__'): - state = data.__getstate__() - else: - state = data.__dict__.copy() - return self.represent_mapping(tag, state, flow_style=flow_style) - - def represent_undefined(self, data): - raise RepresenterError("cannot represent an object", data) - -SafeRepresenter.add_representer(type(None), - SafeRepresenter.represent_none) - -SafeRepresenter.add_representer(str, - SafeRepresenter.represent_str) - -SafeRepresenter.add_representer(bytes, - SafeRepresenter.represent_binary) - -SafeRepresenter.add_representer(bool, - SafeRepresenter.represent_bool) - -SafeRepresenter.add_representer(int, - SafeRepresenter.represent_int) - -SafeRepresenter.add_representer(float, - SafeRepresenter.represent_float) - -SafeRepresenter.add_representer(list, - SafeRepresenter.represent_list) - -SafeRepresenter.add_representer(tuple, - SafeRepresenter.represent_list) - -SafeRepresenter.add_representer(dict, - SafeRepresenter.represent_dict) - -SafeRepresenter.add_representer(set, - SafeRepresenter.represent_set) - -SafeRepresenter.add_representer(datetime.date, - SafeRepresenter.represent_date) - -SafeRepresenter.add_representer(datetime.datetime, - SafeRepresenter.represent_datetime) - -SafeRepresenter.add_representer(None, - SafeRepresenter.represent_undefined) - -class Representer(SafeRepresenter): - - def represent_complex(self, data): - if data.imag == 0.0: - data = '%r' % data.real - elif data.real == 0.0: - data = '%rj' % data.imag - elif data.imag > 0: - data = '%r+%rj' % (data.real, data.imag) - else: - data = '%r%rj' % (data.real, data.imag) - return self.represent_scalar('tag:yaml.org,2002:python/complex', data) - - def represent_tuple(self, data): - return self.represent_sequence('tag:yaml.org,2002:python/tuple', data) - - def represent_name(self, data): - name = '%s.%s' % (data.__module__, data.__name__) - return self.represent_scalar('tag:yaml.org,2002:python/name:'+name, '') - - def represent_module(self, data): - return self.represent_scalar( - 'tag:yaml.org,2002:python/module:'+data.__name__, '') - - def represent_object(self, data): - # We use __reduce__ API to save the data. data.__reduce__ returns - # a tuple of length 2-5: - # (function, args, state, listitems, dictitems) - - # For reconstructing, we calls function(*args), then set its state, - # listitems, and dictitems if they are not None. - - # A special case is when function.__name__ == '__newobj__'. In this - # case we create the object with args[0].__new__(*args). - - # Another special case is when __reduce__ returns a string - we don't - # support it. - - # We produce a !!python/object, !!python/object/new or - # !!python/object/apply node. - - cls = type(data) - if cls in copyreg.dispatch_table: - reduce = copyreg.dispatch_table[cls](data) - elif hasattr(data, '__reduce_ex__'): - reduce = data.__reduce_ex__(2) - elif hasattr(data, '__reduce__'): - reduce = data.__reduce__() - else: - raise RepresenterError("cannot represent an object", data) - reduce = (list(reduce)+[None]*5)[:5] - function, args, state, listitems, dictitems = reduce - args = list(args) - if state is None: - state = {} - if listitems is not None: - listitems = list(listitems) - if dictitems is not None: - dictitems = dict(dictitems) - if function.__name__ == '__newobj__': - function = args[0] - args = args[1:] - tag = 'tag:yaml.org,2002:python/object/new:' - newobj = True - else: - tag = 'tag:yaml.org,2002:python/object/apply:' - newobj = False - function_name = '%s.%s' % (function.__module__, function.__name__) - if not args and not listitems and not dictitems \ - and isinstance(state, dict) and newobj: - return self.represent_mapping( - 'tag:yaml.org,2002:python/object:'+function_name, state) - if not listitems and not dictitems \ - and isinstance(state, dict) and not state: - return self.represent_sequence(tag+function_name, args) - value = {} - if args: - value['args'] = args - if state or not isinstance(state, dict): - value['state'] = state - if listitems: - value['listitems'] = listitems - if dictitems: - value['dictitems'] = dictitems - return self.represent_mapping(tag+function_name, value) - - def represent_ordered_dict(self, data): - # Provide uniform representation across different Python versions. - data_type = type(data) - tag = 'tag:yaml.org,2002:python/object/apply:%s.%s' \ - % (data_type.__module__, data_type.__name__) - items = [[key, value] for key, value in data.items()] - return self.represent_sequence(tag, [items]) - -Representer.add_representer(complex, - Representer.represent_complex) - -Representer.add_representer(tuple, - Representer.represent_tuple) - -Representer.add_representer(type, - Representer.represent_name) - -Representer.add_representer(collections.OrderedDict, - Representer.represent_ordered_dict) - -Representer.add_representer(types.FunctionType, - Representer.represent_name) - -Representer.add_representer(types.BuiltinFunctionType, - Representer.represent_name) - -Representer.add_representer(types.ModuleType, - Representer.represent_module) - -Representer.add_multi_representer(object, - Representer.represent_object) - diff --git a/lib3/yaml/resolver.py b/lib3/yaml/resolver.py deleted file mode 100644 index 013896d..0000000 --- a/lib3/yaml/resolver.py +++ /dev/null @@ -1,227 +0,0 @@ - -__all__ = ['BaseResolver', 'Resolver'] - -from .error import * -from .nodes import * - -import re - -class ResolverError(YAMLError): - pass - -class BaseResolver: - - DEFAULT_SCALAR_TAG = 'tag:yaml.org,2002:str' - DEFAULT_SEQUENCE_TAG = 'tag:yaml.org,2002:seq' - DEFAULT_MAPPING_TAG = 'tag:yaml.org,2002:map' - - yaml_implicit_resolvers = {} - yaml_path_resolvers = {} - - def __init__(self): - self.resolver_exact_paths = [] - self.resolver_prefix_paths = [] - - @classmethod - def add_implicit_resolver(cls, tag, regexp, first): - if not 'yaml_implicit_resolvers' in cls.__dict__: - implicit_resolvers = {} - for key in cls.yaml_implicit_resolvers: - implicit_resolvers[key] = cls.yaml_implicit_resolvers[key][:] - cls.yaml_implicit_resolvers = implicit_resolvers - if first is None: - first = [None] - for ch in first: - cls.yaml_implicit_resolvers.setdefault(ch, []).append((tag, regexp)) - - @classmethod - def add_path_resolver(cls, tag, path, kind=None): - # Note: `add_path_resolver` is experimental. The API could be changed. - # `new_path` is a pattern that is matched against the path from the - # root to the node that is being considered. `node_path` elements are - # tuples `(node_check, index_check)`. `node_check` is a node class: - # `ScalarNode`, `SequenceNode`, `MappingNode` or `None`. `None` - # matches any kind of a node. `index_check` could be `None`, a boolean - # value, a string value, or a number. `None` and `False` match against - # any _value_ of sequence and mapping nodes. `True` matches against - # any _key_ of a mapping node. A string `index_check` matches against - # a mapping value that corresponds to a scalar key which content is - # equal to the `index_check` value. An integer `index_check` matches - # against a sequence value with the index equal to `index_check`. - if not 'yaml_path_resolvers' in cls.__dict__: - cls.yaml_path_resolvers = cls.yaml_path_resolvers.copy() - new_path = [] - for element in path: - if isinstance(element, (list, tuple)): - if len(element) == 2: - node_check, index_check = element - elif len(element) == 1: - node_check = element[0] - index_check = True - else: - raise ResolverError("Invalid path element: %s" % element) - else: - node_check = None - index_check = element - if node_check is str: - node_check = ScalarNode - elif node_check is list: - node_check = SequenceNode - elif node_check is dict: - node_check = MappingNode - elif node_check not in [ScalarNode, SequenceNode, MappingNode] \ - and not isinstance(node_check, str) \ - and node_check is not None: - raise ResolverError("Invalid node checker: %s" % node_check) - if not isinstance(index_check, (str, int)) \ - and index_check is not None: - raise ResolverError("Invalid index checker: %s" % index_check) - new_path.append((node_check, index_check)) - if kind is str: - kind = ScalarNode - elif kind is list: - kind = SequenceNode - elif kind is dict: - kind = MappingNode - elif kind not in [ScalarNode, SequenceNode, MappingNode] \ - and kind is not None: - raise ResolverError("Invalid node kind: %s" % kind) - cls.yaml_path_resolvers[tuple(new_path), kind] = tag - - def descend_resolver(self, current_node, current_index): - if not self.yaml_path_resolvers: - return - exact_paths = {} - prefix_paths = [] - if current_node: - depth = len(self.resolver_prefix_paths) - for path, kind in self.resolver_prefix_paths[-1]: - if self.check_resolver_prefix(depth, path, kind, - current_node, current_index): - if len(path) > depth: - prefix_paths.append((path, kind)) - else: - exact_paths[kind] = self.yaml_path_resolvers[path, kind] - else: - for path, kind in self.yaml_path_resolvers: - if not path: - exact_paths[kind] = self.yaml_path_resolvers[path, kind] - else: - prefix_paths.append((path, kind)) - self.resolver_exact_paths.append(exact_paths) - self.resolver_prefix_paths.append(prefix_paths) - - def ascend_resolver(self): - if not self.yaml_path_resolvers: - return - self.resolver_exact_paths.pop() - self.resolver_prefix_paths.pop() - - def check_resolver_prefix(self, depth, path, kind, - current_node, current_index): - node_check, index_check = path[depth-1] - if isinstance(node_check, str): - if current_node.tag != node_check: - return - elif node_check is not None: - if not isinstance(current_node, node_check): - return - if index_check is True and current_index is not None: - return - if (index_check is False or index_check is None) \ - and current_index is None: - return - if isinstance(index_check, str): - if not (isinstance(current_index, ScalarNode) - and index_check == current_index.value): - return - elif isinstance(index_check, int) and not isinstance(index_check, bool): - if index_check != current_index: - return - return True - - def resolve(self, kind, value, implicit): - if kind is ScalarNode and implicit[0]: - if value == '': - resolvers = self.yaml_implicit_resolvers.get('', []) - else: - resolvers = self.yaml_implicit_resolvers.get(value[0], []) - wildcard_resolvers = self.yaml_implicit_resolvers.get(None, []) - for tag, regexp in resolvers + wildcard_resolvers: - if regexp.match(value): - return tag - implicit = implicit[1] - if self.yaml_path_resolvers: - exact_paths = self.resolver_exact_paths[-1] - if kind in exact_paths: - return exact_paths[kind] - if None in exact_paths: - return exact_paths[None] - if kind is ScalarNode: - return self.DEFAULT_SCALAR_TAG - elif kind is SequenceNode: - return self.DEFAULT_SEQUENCE_TAG - elif kind is MappingNode: - return self.DEFAULT_MAPPING_TAG - -class Resolver(BaseResolver): - pass - -Resolver.add_implicit_resolver( - 'tag:yaml.org,2002:bool', - re.compile(r'''^(?:yes|Yes|YES|no|No|NO - |true|True|TRUE|false|False|FALSE - |on|On|ON|off|Off|OFF)$''', re.X), - list('yYnNtTfFoO')) - -Resolver.add_implicit_resolver( - 'tag:yaml.org,2002:float', - re.compile(r'''^(?:[-+]?(?:[0-9][0-9_]*)\.[0-9_]*(?:[eE][-+][0-9]+)? - |\.[0-9_]+(?:[eE][-+][0-9]+)? - |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\.[0-9_]* - |[-+]?\.(?:inf|Inf|INF) - |\.(?:nan|NaN|NAN))$''', re.X), - list('-+0123456789.')) - -Resolver.add_implicit_resolver( - 'tag:yaml.org,2002:int', - re.compile(r'''^(?:[-+]?0b[0-1_]+ - |[-+]?0[0-7_]+ - |[-+]?(?:0|[1-9][0-9_]*) - |[-+]?0x[0-9a-fA-F_]+ - |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$''', re.X), - list('-+0123456789')) - -Resolver.add_implicit_resolver( - 'tag:yaml.org,2002:merge', - re.compile(r'^(?:<<)$'), - ['<']) - -Resolver.add_implicit_resolver( - 'tag:yaml.org,2002:null', - re.compile(r'''^(?: ~ - |null|Null|NULL - | )$''', re.X), - ['~', 'n', 'N', '']) - -Resolver.add_implicit_resolver( - 'tag:yaml.org,2002:timestamp', - re.compile(r'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] - |[0-9][0-9][0-9][0-9] -[0-9][0-9]? -[0-9][0-9]? - (?:[Tt]|[ \t]+)[0-9][0-9]? - :[0-9][0-9] :[0-9][0-9] (?:\.[0-9]*)? - (?:[ \t]*(?:Z|[-+][0-9][0-9]?(?::[0-9][0-9])?))?)$''', re.X), - list('0123456789')) - -Resolver.add_implicit_resolver( - 'tag:yaml.org,2002:value', - re.compile(r'^(?:=)$'), - ['=']) - -# The following resolver is only for documentation purposes. It cannot work -# because plain scalars cannot start with '!', '&', or '*'. -Resolver.add_implicit_resolver( - 'tag:yaml.org,2002:yaml', - re.compile(r'^(?:!|&|\*)$'), - list('!&*')) - diff --git a/lib3/yaml/scanner.py b/lib3/yaml/scanner.py deleted file mode 100644 index 7437ede..0000000 --- a/lib3/yaml/scanner.py +++ /dev/null @@ -1,1435 +0,0 @@ - -# Scanner produces tokens of the following types: -# STREAM-START -# STREAM-END -# DIRECTIVE(name, value) -# DOCUMENT-START -# DOCUMENT-END -# BLOCK-SEQUENCE-START -# BLOCK-MAPPING-START -# BLOCK-END -# FLOW-SEQUENCE-START -# FLOW-MAPPING-START -# FLOW-SEQUENCE-END -# FLOW-MAPPING-END -# BLOCK-ENTRY -# FLOW-ENTRY -# KEY -# VALUE -# ALIAS(value) -# ANCHOR(value) -# TAG(value) -# SCALAR(value, plain, style) -# -# Read comments in the Scanner code for more details. -# - -__all__ = ['Scanner', 'ScannerError'] - -from .error import MarkedYAMLError -from .tokens import * - -class ScannerError(MarkedYAMLError): - pass - -class SimpleKey: - # See below simple keys treatment. - - def __init__(self, token_number, required, index, line, column, mark): - self.token_number = token_number - self.required = required - self.index = index - self.line = line - self.column = column - self.mark = mark - -class Scanner: - - def __init__(self): - """Initialize the scanner.""" - # It is assumed that Scanner and Reader will have a common descendant. - # Reader do the dirty work of checking for BOM and converting the - # input data to Unicode. It also adds NUL to the end. - # - # Reader supports the following methods - # self.peek(i=0) # peek the next i-th character - # self.prefix(l=1) # peek the next l characters - # self.forward(l=1) # read the next l characters and move the pointer. - - # Had we reached the end of the stream? - self.done = False - - # The number of unclosed '{' and '['. `flow_level == 0` means block - # context. - self.flow_level = 0 - - # List of processed tokens that are not yet emitted. - self.tokens = [] - - # Add the STREAM-START token. - self.fetch_stream_start() - - # Number of tokens that were emitted through the `get_token` method. - self.tokens_taken = 0 - - # The current indentation level. - self.indent = -1 - - # Past indentation levels. - self.indents = [] - - # Variables related to simple keys treatment. - - # A simple key is a key that is not denoted by the '?' indicator. - # Example of simple keys: - # --- - # block simple key: value - # ? not a simple key: - # : { flow simple key: value } - # We emit the KEY token before all keys, so when we find a potential - # simple key, we try to locate the corresponding ':' indicator. - # Simple keys should be limited to a single line and 1024 characters. - - # Can a simple key start at the current position? A simple key may - # start: - # - at the beginning of the line, not counting indentation spaces - # (in block context), - # - after '{', '[', ',' (in the flow context), - # - after '?', ':', '-' (in the block context). - # In the block context, this flag also signifies if a block collection - # may start at the current position. - self.allow_simple_key = True - - # Keep track of possible simple keys. This is a dictionary. The key - # is `flow_level`; there can be no more that one possible simple key - # for each level. The value is a SimpleKey record: - # (token_number, required, index, line, column, mark) - # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow), - # '[', or '{' tokens. - self.possible_simple_keys = {} - - # Public methods. - - def check_token(self, *choices): - # Check if the next token is one of the given types. - while self.need_more_tokens(): - self.fetch_more_tokens() - if self.tokens: - if not choices: - return True - for choice in choices: - if isinstance(self.tokens[0], choice): - return True - return False - - def peek_token(self): - # Return the next token, but do not delete if from the queue. - # Return None if no more tokens. - while self.need_more_tokens(): - self.fetch_more_tokens() - if self.tokens: - return self.tokens[0] - else: - return None - - def get_token(self): - # Return the next token. - while self.need_more_tokens(): - self.fetch_more_tokens() - if self.tokens: - self.tokens_taken += 1 - return self.tokens.pop(0) - - # Private methods. - - def need_more_tokens(self): - if self.done: - return False - if not self.tokens: - return True - # The current token may be a potential simple key, so we - # need to look further. - self.stale_possible_simple_keys() - if self.next_possible_simple_key() == self.tokens_taken: - return True - - def fetch_more_tokens(self): - - # Eat whitespaces and comments until we reach the next token. - self.scan_to_next_token() - - # Remove obsolete possible simple keys. - self.stale_possible_simple_keys() - - # Compare the current indentation and column. It may add some tokens - # and decrease the current indentation level. - self.unwind_indent(self.column) - - # Peek the next character. - ch = self.peek() - - # Is it the end of stream? - if ch == '\0': - return self.fetch_stream_end() - - # Is it a directive? - if ch == '%' and self.check_directive(): - return self.fetch_directive() - - # Is it the document start? - if ch == '-' and self.check_document_start(): - return self.fetch_document_start() - - # Is it the document end? - if ch == '.' and self.check_document_end(): - return self.fetch_document_end() - - # TODO: support for BOM within a stream. - #if ch == '\uFEFF': - # return self.fetch_bom() <-- issue BOMToken - - # Note: the order of the following checks is NOT significant. - - # Is it the flow sequence start indicator? - if ch == '[': - return self.fetch_flow_sequence_start() - - # Is it the flow mapping start indicator? - if ch == '{': - return self.fetch_flow_mapping_start() - - # Is it the flow sequence end indicator? - if ch == ']': - return self.fetch_flow_sequence_end() - - # Is it the flow mapping end indicator? - if ch == '}': - return self.fetch_flow_mapping_end() - - # Is it the flow entry indicator? - if ch == ',': - return self.fetch_flow_entry() - - # Is it the block entry indicator? - if ch == '-' and self.check_block_entry(): - return self.fetch_block_entry() - - # Is it the key indicator? - if ch == '?' and self.check_key(): - return self.fetch_key() - - # Is it the value indicator? - if ch == ':' and self.check_value(): - return self.fetch_value() - - # Is it an alias? - if ch == '*': - return self.fetch_alias() - - # Is it an anchor? - if ch == '&': - return self.fetch_anchor() - - # Is it a tag? - if ch == '!': - return self.fetch_tag() - - # Is it a literal scalar? - if ch == '|' and not self.flow_level: - return self.fetch_literal() - - # Is it a folded scalar? - if ch == '>' and not self.flow_level: - return self.fetch_folded() - - # Is it a single quoted scalar? - if ch == '\'': - return self.fetch_single() - - # Is it a double quoted scalar? - if ch == '\"': - return self.fetch_double() - - # It must be a plain scalar then. - if self.check_plain(): - return self.fetch_plain() - - # No? It's an error. Let's produce a nice error message. - raise ScannerError("while scanning for the next token", None, - "found character %r that cannot start any token" % ch, - self.get_mark()) - - # Simple keys treatment. - - def next_possible_simple_key(self): - # Return the number of the nearest possible simple key. Actually we - # don't need to loop through the whole dictionary. We may replace it - # with the following code: - # if not self.possible_simple_keys: - # return None - # return self.possible_simple_keys[ - # min(self.possible_simple_keys.keys())].token_number - min_token_number = None - for level in self.possible_simple_keys: - key = self.possible_simple_keys[level] - if min_token_number is None or key.token_number < min_token_number: - min_token_number = key.token_number - return min_token_number - - def stale_possible_simple_keys(self): - # Remove entries that are no longer possible simple keys. According to - # the YAML specification, simple keys - # - should be limited to a single line, - # - should be no longer than 1024 characters. - # Disabling this procedure will allow simple keys of any length and - # height (may cause problems if indentation is broken though). - for level in list(self.possible_simple_keys): - key = self.possible_simple_keys[level] - if key.line != self.line \ - or self.index-key.index > 1024: - if key.required: - raise ScannerError("while scanning a simple key", key.mark, - "could not find expected ':'", self.get_mark()) - del self.possible_simple_keys[level] - - def save_possible_simple_key(self): - # The next token may start a simple key. We check if it's possible - # and save its position. This function is called for - # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'. - - # Check if a simple key is required at the current position. - required = not self.flow_level and self.indent == self.column - - # The next token might be a simple key. Let's save it's number and - # position. - if self.allow_simple_key: - self.remove_possible_simple_key() - token_number = self.tokens_taken+len(self.tokens) - key = SimpleKey(token_number, required, - self.index, self.line, self.column, self.get_mark()) - self.possible_simple_keys[self.flow_level] = key - - def remove_possible_simple_key(self): - # Remove the saved possible key position at the current flow level. - if self.flow_level in self.possible_simple_keys: - key = self.possible_simple_keys[self.flow_level] - - if key.required: - raise ScannerError("while scanning a simple key", key.mark, - "could not find expected ':'", self.get_mark()) - - del self.possible_simple_keys[self.flow_level] - - # Indentation functions. - - def unwind_indent(self, column): - - ## In flow context, tokens should respect indentation. - ## Actually the condition should be `self.indent >= column` according to - ## the spec. But this condition will prohibit intuitively correct - ## constructions such as - ## key : { - ## } - #if self.flow_level and self.indent > column: - # raise ScannerError(None, None, - # "invalid indentation or unclosed '[' or '{'", - # self.get_mark()) - - # In the flow context, indentation is ignored. We make the scanner less - # restrictive then specification requires. - if self.flow_level: - return - - # In block context, we may need to issue the BLOCK-END tokens. - while self.indent > column: - mark = self.get_mark() - self.indent = self.indents.pop() - self.tokens.append(BlockEndToken(mark, mark)) - - def add_indent(self, column): - # Check if we need to increase indentation. - if self.indent < column: - self.indents.append(self.indent) - self.indent = column - return True - return False - - # Fetchers. - - def fetch_stream_start(self): - # We always add STREAM-START as the first token and STREAM-END as the - # last token. - - # Read the token. - mark = self.get_mark() - - # Add STREAM-START. - self.tokens.append(StreamStartToken(mark, mark, - encoding=self.encoding)) - - - def fetch_stream_end(self): - - # Set the current indentation to -1. - self.unwind_indent(-1) - - # Reset simple keys. - self.remove_possible_simple_key() - self.allow_simple_key = False - self.possible_simple_keys = {} - - # Read the token. - mark = self.get_mark() - - # Add STREAM-END. - self.tokens.append(StreamEndToken(mark, mark)) - - # The steam is finished. - self.done = True - - def fetch_directive(self): - - # Set the current indentation to -1. - self.unwind_indent(-1) - - # Reset simple keys. - self.remove_possible_simple_key() - self.allow_simple_key = False - - # Scan and add DIRECTIVE. - self.tokens.append(self.scan_directive()) - - def fetch_document_start(self): - self.fetch_document_indicator(DocumentStartToken) - - def fetch_document_end(self): - self.fetch_document_indicator(DocumentEndToken) - - def fetch_document_indicator(self, TokenClass): - - # Set the current indentation to -1. - self.unwind_indent(-1) - - # Reset simple keys. Note that there could not be a block collection - # after '---'. - self.remove_possible_simple_key() - self.allow_simple_key = False - - # Add DOCUMENT-START or DOCUMENT-END. - start_mark = self.get_mark() - self.forward(3) - end_mark = self.get_mark() - self.tokens.append(TokenClass(start_mark, end_mark)) - - def fetch_flow_sequence_start(self): - self.fetch_flow_collection_start(FlowSequenceStartToken) - - def fetch_flow_mapping_start(self): - self.fetch_flow_collection_start(FlowMappingStartToken) - - def fetch_flow_collection_start(self, TokenClass): - - # '[' and '{' may start a simple key. - self.save_possible_simple_key() - - # Increase the flow level. - self.flow_level += 1 - - # Simple keys are allowed after '[' and '{'. - self.allow_simple_key = True - - # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START. - start_mark = self.get_mark() - self.forward() - end_mark = self.get_mark() - self.tokens.append(TokenClass(start_mark, end_mark)) - - def fetch_flow_sequence_end(self): - self.fetch_flow_collection_end(FlowSequenceEndToken) - - def fetch_flow_mapping_end(self): - self.fetch_flow_collection_end(FlowMappingEndToken) - - def fetch_flow_collection_end(self, TokenClass): - - # Reset possible simple key on the current level. - self.remove_possible_simple_key() - - # Decrease the flow level. - self.flow_level -= 1 - - # No simple keys after ']' or '}'. - self.allow_simple_key = False - - # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END. - start_mark = self.get_mark() - self.forward() - end_mark = self.get_mark() - self.tokens.append(TokenClass(start_mark, end_mark)) - - def fetch_flow_entry(self): - - # Simple keys are allowed after ','. - self.allow_simple_key = True - - # Reset possible simple key on the current level. - self.remove_possible_simple_key() - - # Add FLOW-ENTRY. - start_mark = self.get_mark() - self.forward() - end_mark = self.get_mark() - self.tokens.append(FlowEntryToken(start_mark, end_mark)) - - def fetch_block_entry(self): - - # Block context needs additional checks. - if not self.flow_level: - - # Are we allowed to start a new entry? - if not self.allow_simple_key: - raise ScannerError(None, None, - "sequence entries are not allowed here", - self.get_mark()) - - # We may need to add BLOCK-SEQUENCE-START. - if self.add_indent(self.column): - mark = self.get_mark() - self.tokens.append(BlockSequenceStartToken(mark, mark)) - - # It's an error for the block entry to occur in the flow context, - # but we let the parser detect this. - else: - pass - - # Simple keys are allowed after '-'. - self.allow_simple_key = True - - # Reset possible simple key on the current level. - self.remove_possible_simple_key() - - # Add BLOCK-ENTRY. - start_mark = self.get_mark() - self.forward() - end_mark = self.get_mark() - self.tokens.append(BlockEntryToken(start_mark, end_mark)) - - def fetch_key(self): - - # Block context needs additional checks. - if not self.flow_level: - - # Are we allowed to start a key (not necessary a simple)? - if not self.allow_simple_key: - raise ScannerError(None, None, - "mapping keys are not allowed here", - self.get_mark()) - - # We may need to add BLOCK-MAPPING-START. - if self.add_indent(self.column): - mark = self.get_mark() - self.tokens.append(BlockMappingStartToken(mark, mark)) - - # Simple keys are allowed after '?' in the block context. - self.allow_simple_key = not self.flow_level - - # Reset possible simple key on the current level. - self.remove_possible_simple_key() - - # Add KEY. - start_mark = self.get_mark() - self.forward() - end_mark = self.get_mark() - self.tokens.append(KeyToken(start_mark, end_mark)) - - def fetch_value(self): - - # Do we determine a simple key? - if self.flow_level in self.possible_simple_keys: - - # Add KEY. - key = self.possible_simple_keys[self.flow_level] - del self.possible_simple_keys[self.flow_level] - self.tokens.insert(key.token_number-self.tokens_taken, - KeyToken(key.mark, key.mark)) - - # If this key starts a new block mapping, we need to add - # BLOCK-MAPPING-START. - if not self.flow_level: - if self.add_indent(key.column): - self.tokens.insert(key.token_number-self.tokens_taken, - BlockMappingStartToken(key.mark, key.mark)) - - # There cannot be two simple keys one after another. - self.allow_simple_key = False - - # It must be a part of a complex key. - else: - - # Block context needs additional checks. - # (Do we really need them? They will be caught by the parser - # anyway.) - if not self.flow_level: - - # We are allowed to start a complex value if and only if - # we can start a simple key. - if not self.allow_simple_key: - raise ScannerError(None, None, - "mapping values are not allowed here", - self.get_mark()) - - # If this value starts a new block mapping, we need to add - # BLOCK-MAPPING-START. It will be detected as an error later by - # the parser. - if not self.flow_level: - if self.add_indent(self.column): - mark = self.get_mark() - self.tokens.append(BlockMappingStartToken(mark, mark)) - - # Simple keys are allowed after ':' in the block context. - self.allow_simple_key = not self.flow_level - - # Reset possible simple key on the current level. - self.remove_possible_simple_key() - - # Add VALUE. - start_mark = self.get_mark() - self.forward() - end_mark = self.get_mark() - self.tokens.append(ValueToken(start_mark, end_mark)) - - def fetch_alias(self): - - # ALIAS could be a simple key. - self.save_possible_simple_key() - - # No simple keys after ALIAS. - self.allow_simple_key = False - - # Scan and add ALIAS. - self.tokens.append(self.scan_anchor(AliasToken)) - - def fetch_anchor(self): - - # ANCHOR could start a simple key. - self.save_possible_simple_key() - - # No simple keys after ANCHOR. - self.allow_simple_key = False - - # Scan and add ANCHOR. - self.tokens.append(self.scan_anchor(AnchorToken)) - - def fetch_tag(self): - - # TAG could start a simple key. - self.save_possible_simple_key() - - # No simple keys after TAG. - self.allow_simple_key = False - - # Scan and add TAG. - self.tokens.append(self.scan_tag()) - - def fetch_literal(self): - self.fetch_block_scalar(style='|') - - def fetch_folded(self): - self.fetch_block_scalar(style='>') - - def fetch_block_scalar(self, style): - - # A simple key may follow a block scalar. - self.allow_simple_key = True - - # Reset possible simple key on the current level. - self.remove_possible_simple_key() - - # Scan and add SCALAR. - self.tokens.append(self.scan_block_scalar(style)) - - def fetch_single(self): - self.fetch_flow_scalar(style='\'') - - def fetch_double(self): - self.fetch_flow_scalar(style='"') - - def fetch_flow_scalar(self, style): - - # A flow scalar could be a simple key. - self.save_possible_simple_key() - - # No simple keys after flow scalars. - self.allow_simple_key = False - - # Scan and add SCALAR. - self.tokens.append(self.scan_flow_scalar(style)) - - def fetch_plain(self): - - # A plain scalar could be a simple key. - self.save_possible_simple_key() - - # No simple keys after plain scalars. But note that `scan_plain` will - # change this flag if the scan is finished at the beginning of the - # line. - self.allow_simple_key = False - - # Scan and add SCALAR. May change `allow_simple_key`. - self.tokens.append(self.scan_plain()) - - # Checkers. - - def check_directive(self): - - # DIRECTIVE: ^ '%' ... - # The '%' indicator is already checked. - if self.column == 0: - return True - - def check_document_start(self): - - # DOCUMENT-START: ^ '---' (' '|'\n') - if self.column == 0: - if self.prefix(3) == '---' \ - and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': - return True - - def check_document_end(self): - - # DOCUMENT-END: ^ '...' (' '|'\n') - if self.column == 0: - if self.prefix(3) == '...' \ - and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': - return True - - def check_block_entry(self): - - # BLOCK-ENTRY: '-' (' '|'\n') - return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' - - def check_key(self): - - # KEY(flow context): '?' - if self.flow_level: - return True - - # KEY(block context): '?' (' '|'\n') - else: - return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' - - def check_value(self): - - # VALUE(flow context): ':' - if self.flow_level: - return True - - # VALUE(block context): ':' (' '|'\n') - else: - return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' - - def check_plain(self): - - # A plain scalar may start with any non-space character except: - # '-', '?', ':', ',', '[', ']', '{', '}', - # '#', '&', '*', '!', '|', '>', '\'', '\"', - # '%', '@', '`'. - # - # It may also start with - # '-', '?', ':' - # if it is followed by a non-space character. - # - # Note that we limit the last rule to the block context (except the - # '-' character) because we want the flow context to be space - # independent. - ch = self.peek() - return ch not in '\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \ - or (self.peek(1) not in '\0 \t\r\n\x85\u2028\u2029' - and (ch == '-' or (not self.flow_level and ch in '?:'))) - - # Scanners. - - def scan_to_next_token(self): - # We ignore spaces, line breaks and comments. - # If we find a line break in the block context, we set the flag - # `allow_simple_key` on. - # The byte order mark is stripped if it's the first character in the - # stream. We do not yet support BOM inside the stream as the - # specification requires. Any such mark will be considered as a part - # of the document. - # - # TODO: We need to make tab handling rules more sane. A good rule is - # Tabs cannot precede tokens - # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END, - # KEY(block), VALUE(block), BLOCK-ENTRY - # So the checking code is - # if : - # self.allow_simple_keys = False - # We also need to add the check for `allow_simple_keys == True` to - # `unwind_indent` before issuing BLOCK-END. - # Scanners for block, flow, and plain scalars need to be modified. - - if self.index == 0 and self.peek() == '\uFEFF': - self.forward() - found = False - while not found: - while self.peek() == ' ': - self.forward() - if self.peek() == '#': - while self.peek() not in '\0\r\n\x85\u2028\u2029': - self.forward() - if self.scan_line_break(): - if not self.flow_level: - self.allow_simple_key = True - else: - found = True - - def scan_directive(self): - # See the specification for details. - start_mark = self.get_mark() - self.forward() - name = self.scan_directive_name(start_mark) - value = None - if name == 'YAML': - value = self.scan_yaml_directive_value(start_mark) - end_mark = self.get_mark() - elif name == 'TAG': - value = self.scan_tag_directive_value(start_mark) - end_mark = self.get_mark() - else: - end_mark = self.get_mark() - while self.peek() not in '\0\r\n\x85\u2028\u2029': - self.forward() - self.scan_directive_ignored_line(start_mark) - return DirectiveToken(name, value, start_mark, end_mark) - - def scan_directive_name(self, start_mark): - # See the specification for details. - length = 0 - ch = self.peek(length) - while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ - or ch in '-_': - length += 1 - ch = self.peek(length) - if not length: - raise ScannerError("while scanning a directive", start_mark, - "expected alphabetic or numeric character, but found %r" - % ch, self.get_mark()) - value = self.prefix(length) - self.forward(length) - ch = self.peek() - if ch not in '\0 \r\n\x85\u2028\u2029': - raise ScannerError("while scanning a directive", start_mark, - "expected alphabetic or numeric character, but found %r" - % ch, self.get_mark()) - return value - - def scan_yaml_directive_value(self, start_mark): - # See the specification for details. - while self.peek() == ' ': - self.forward() - major = self.scan_yaml_directive_number(start_mark) - if self.peek() != '.': - raise ScannerError("while scanning a directive", start_mark, - "expected a digit or '.', but found %r" % self.peek(), - self.get_mark()) - self.forward() - minor = self.scan_yaml_directive_number(start_mark) - if self.peek() not in '\0 \r\n\x85\u2028\u2029': - raise ScannerError("while scanning a directive", start_mark, - "expected a digit or ' ', but found %r" % self.peek(), - self.get_mark()) - return (major, minor) - - def scan_yaml_directive_number(self, start_mark): - # See the specification for details. - ch = self.peek() - if not ('0' <= ch <= '9'): - raise ScannerError("while scanning a directive", start_mark, - "expected a digit, but found %r" % ch, self.get_mark()) - length = 0 - while '0' <= self.peek(length) <= '9': - length += 1 - value = int(self.prefix(length)) - self.forward(length) - return value - - def scan_tag_directive_value(self, start_mark): - # See the specification for details. - while self.peek() == ' ': - self.forward() - handle = self.scan_tag_directive_handle(start_mark) - while self.peek() == ' ': - self.forward() - prefix = self.scan_tag_directive_prefix(start_mark) - return (handle, prefix) - - def scan_tag_directive_handle(self, start_mark): - # See the specification for details. - value = self.scan_tag_handle('directive', start_mark) - ch = self.peek() - if ch != ' ': - raise ScannerError("while scanning a directive", start_mark, - "expected ' ', but found %r" % ch, self.get_mark()) - return value - - def scan_tag_directive_prefix(self, start_mark): - # See the specification for details. - value = self.scan_tag_uri('directive', start_mark) - ch = self.peek() - if ch not in '\0 \r\n\x85\u2028\u2029': - raise ScannerError("while scanning a directive", start_mark, - "expected ' ', but found %r" % ch, self.get_mark()) - return value - - def scan_directive_ignored_line(self, start_mark): - # See the specification for details. - while self.peek() == ' ': - self.forward() - if self.peek() == '#': - while self.peek() not in '\0\r\n\x85\u2028\u2029': - self.forward() - ch = self.peek() - if ch not in '\0\r\n\x85\u2028\u2029': - raise ScannerError("while scanning a directive", start_mark, - "expected a comment or a line break, but found %r" - % ch, self.get_mark()) - self.scan_line_break() - - def scan_anchor(self, TokenClass): - # The specification does not restrict characters for anchors and - # aliases. This may lead to problems, for instance, the document: - # [ *alias, value ] - # can be interpreted in two ways, as - # [ "value" ] - # and - # [ *alias , "value" ] - # Therefore we restrict aliases to numbers and ASCII letters. - start_mark = self.get_mark() - indicator = self.peek() - if indicator == '*': - name = 'alias' - else: - name = 'anchor' - self.forward() - length = 0 - ch = self.peek(length) - while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ - or ch in '-_': - length += 1 - ch = self.peek(length) - if not length: - raise ScannerError("while scanning an %s" % name, start_mark, - "expected alphabetic or numeric character, but found %r" - % ch, self.get_mark()) - value = self.prefix(length) - self.forward(length) - ch = self.peek() - if ch not in '\0 \t\r\n\x85\u2028\u2029?:,]}%@`': - raise ScannerError("while scanning an %s" % name, start_mark, - "expected alphabetic or numeric character, but found %r" - % ch, self.get_mark()) - end_mark = self.get_mark() - return TokenClass(value, start_mark, end_mark) - - def scan_tag(self): - # See the specification for details. - start_mark = self.get_mark() - ch = self.peek(1) - if ch == '<': - handle = None - self.forward(2) - suffix = self.scan_tag_uri('tag', start_mark) - if self.peek() != '>': - raise ScannerError("while parsing a tag", start_mark, - "expected '>', but found %r" % self.peek(), - self.get_mark()) - self.forward() - elif ch in '\0 \t\r\n\x85\u2028\u2029': - handle = None - suffix = '!' - self.forward() - else: - length = 1 - use_handle = False - while ch not in '\0 \r\n\x85\u2028\u2029': - if ch == '!': - use_handle = True - break - length += 1 - ch = self.peek(length) - handle = '!' - if use_handle: - handle = self.scan_tag_handle('tag', start_mark) - else: - handle = '!' - self.forward() - suffix = self.scan_tag_uri('tag', start_mark) - ch = self.peek() - if ch not in '\0 \r\n\x85\u2028\u2029': - raise ScannerError("while scanning a tag", start_mark, - "expected ' ', but found %r" % ch, self.get_mark()) - value = (handle, suffix) - end_mark = self.get_mark() - return TagToken(value, start_mark, end_mark) - - def scan_block_scalar(self, style): - # See the specification for details. - - if style == '>': - folded = True - else: - folded = False - - chunks = [] - start_mark = self.get_mark() - - # Scan the header. - self.forward() - chomping, increment = self.scan_block_scalar_indicators(start_mark) - self.scan_block_scalar_ignored_line(start_mark) - - # Determine the indentation level and go to the first non-empty line. - min_indent = self.indent+1 - if min_indent < 1: - min_indent = 1 - if increment is None: - breaks, max_indent, end_mark = self.scan_block_scalar_indentation() - indent = max(min_indent, max_indent) - else: - indent = min_indent+increment-1 - breaks, end_mark = self.scan_block_scalar_breaks(indent) - line_break = '' - - # Scan the inner part of the block scalar. - while self.column == indent and self.peek() != '\0': - chunks.extend(breaks) - leading_non_space = self.peek() not in ' \t' - length = 0 - while self.peek(length) not in '\0\r\n\x85\u2028\u2029': - length += 1 - chunks.append(self.prefix(length)) - self.forward(length) - line_break = self.scan_line_break() - breaks, end_mark = self.scan_block_scalar_breaks(indent) - if self.column == indent and self.peek() != '\0': - - # Unfortunately, folding rules are ambiguous. - # - # This is the folding according to the specification: - - if folded and line_break == '\n' \ - and leading_non_space and self.peek() not in ' \t': - if not breaks: - chunks.append(' ') - else: - chunks.append(line_break) - - # This is Clark Evans's interpretation (also in the spec - # examples): - # - #if folded and line_break == '\n': - # if not breaks: - # if self.peek() not in ' \t': - # chunks.append(' ') - # else: - # chunks.append(line_break) - #else: - # chunks.append(line_break) - else: - break - - # Chomp the tail. - if chomping is not False: - chunks.append(line_break) - if chomping is True: - chunks.extend(breaks) - - # We are done. - return ScalarToken(''.join(chunks), False, start_mark, end_mark, - style) - - def scan_block_scalar_indicators(self, start_mark): - # See the specification for details. - chomping = None - increment = None - ch = self.peek() - if ch in '+-': - if ch == '+': - chomping = True - else: - chomping = False - self.forward() - ch = self.peek() - if ch in '0123456789': - increment = int(ch) - if increment == 0: - raise ScannerError("while scanning a block scalar", start_mark, - "expected indentation indicator in the range 1-9, but found 0", - self.get_mark()) - self.forward() - elif ch in '0123456789': - increment = int(ch) - if increment == 0: - raise ScannerError("while scanning a block scalar", start_mark, - "expected indentation indicator in the range 1-9, but found 0", - self.get_mark()) - self.forward() - ch = self.peek() - if ch in '+-': - if ch == '+': - chomping = True - else: - chomping = False - self.forward() - ch = self.peek() - if ch not in '\0 \r\n\x85\u2028\u2029': - raise ScannerError("while scanning a block scalar", start_mark, - "expected chomping or indentation indicators, but found %r" - % ch, self.get_mark()) - return chomping, increment - - def scan_block_scalar_ignored_line(self, start_mark): - # See the specification for details. - while self.peek() == ' ': - self.forward() - if self.peek() == '#': - while self.peek() not in '\0\r\n\x85\u2028\u2029': - self.forward() - ch = self.peek() - if ch not in '\0\r\n\x85\u2028\u2029': - raise ScannerError("while scanning a block scalar", start_mark, - "expected a comment or a line break, but found %r" % ch, - self.get_mark()) - self.scan_line_break() - - def scan_block_scalar_indentation(self): - # See the specification for details. - chunks = [] - max_indent = 0 - end_mark = self.get_mark() - while self.peek() in ' \r\n\x85\u2028\u2029': - if self.peek() != ' ': - chunks.append(self.scan_line_break()) - end_mark = self.get_mark() - else: - self.forward() - if self.column > max_indent: - max_indent = self.column - return chunks, max_indent, end_mark - - def scan_block_scalar_breaks(self, indent): - # See the specification for details. - chunks = [] - end_mark = self.get_mark() - while self.column < indent and self.peek() == ' ': - self.forward() - while self.peek() in '\r\n\x85\u2028\u2029': - chunks.append(self.scan_line_break()) - end_mark = self.get_mark() - while self.column < indent and self.peek() == ' ': - self.forward() - return chunks, end_mark - - def scan_flow_scalar(self, style): - # See the specification for details. - # Note that we loose indentation rules for quoted scalars. Quoted - # scalars don't need to adhere indentation because " and ' clearly - # mark the beginning and the end of them. Therefore we are less - # restrictive then the specification requires. We only need to check - # that document separators are not included in scalars. - if style == '"': - double = True - else: - double = False - chunks = [] - start_mark = self.get_mark() - quote = self.peek() - self.forward() - chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) - while self.peek() != quote: - chunks.extend(self.scan_flow_scalar_spaces(double, start_mark)) - chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) - self.forward() - end_mark = self.get_mark() - return ScalarToken(''.join(chunks), False, start_mark, end_mark, - style) - - ESCAPE_REPLACEMENTS = { - '0': '\0', - 'a': '\x07', - 'b': '\x08', - 't': '\x09', - '\t': '\x09', - 'n': '\x0A', - 'v': '\x0B', - 'f': '\x0C', - 'r': '\x0D', - 'e': '\x1B', - ' ': '\x20', - '\"': '\"', - '\\': '\\', - '/': '/', - 'N': '\x85', - '_': '\xA0', - 'L': '\u2028', - 'P': '\u2029', - } - - ESCAPE_CODES = { - 'x': 2, - 'u': 4, - 'U': 8, - } - - def scan_flow_scalar_non_spaces(self, double, start_mark): - # See the specification for details. - chunks = [] - while True: - length = 0 - while self.peek(length) not in '\'\"\\\0 \t\r\n\x85\u2028\u2029': - length += 1 - if length: - chunks.append(self.prefix(length)) - self.forward(length) - ch = self.peek() - if not double and ch == '\'' and self.peek(1) == '\'': - chunks.append('\'') - self.forward(2) - elif (double and ch == '\'') or (not double and ch in '\"\\'): - chunks.append(ch) - self.forward() - elif double and ch == '\\': - self.forward() - ch = self.peek() - if ch in self.ESCAPE_REPLACEMENTS: - chunks.append(self.ESCAPE_REPLACEMENTS[ch]) - self.forward() - elif ch in self.ESCAPE_CODES: - length = self.ESCAPE_CODES[ch] - self.forward() - for k in range(length): - if self.peek(k) not in '0123456789ABCDEFabcdef': - raise ScannerError("while scanning a double-quoted scalar", start_mark, - "expected escape sequence of %d hexdecimal numbers, but found %r" % - (length, self.peek(k)), self.get_mark()) - code = int(self.prefix(length), 16) - chunks.append(chr(code)) - self.forward(length) - elif ch in '\r\n\x85\u2028\u2029': - self.scan_line_break() - chunks.extend(self.scan_flow_scalar_breaks(double, start_mark)) - else: - raise ScannerError("while scanning a double-quoted scalar", start_mark, - "found unknown escape character %r" % ch, self.get_mark()) - else: - return chunks - - def scan_flow_scalar_spaces(self, double, start_mark): - # See the specification for details. - chunks = [] - length = 0 - while self.peek(length) in ' \t': - length += 1 - whitespaces = self.prefix(length) - self.forward(length) - ch = self.peek() - if ch == '\0': - raise ScannerError("while scanning a quoted scalar", start_mark, - "found unexpected end of stream", self.get_mark()) - elif ch in '\r\n\x85\u2028\u2029': - line_break = self.scan_line_break() - breaks = self.scan_flow_scalar_breaks(double, start_mark) - if line_break != '\n': - chunks.append(line_break) - elif not breaks: - chunks.append(' ') - chunks.extend(breaks) - else: - chunks.append(whitespaces) - return chunks - - def scan_flow_scalar_breaks(self, double, start_mark): - # See the specification for details. - chunks = [] - while True: - # Instead of checking indentation, we check for document - # separators. - prefix = self.prefix(3) - if (prefix == '---' or prefix == '...') \ - and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': - raise ScannerError("while scanning a quoted scalar", start_mark, - "found unexpected document separator", self.get_mark()) - while self.peek() in ' \t': - self.forward() - if self.peek() in '\r\n\x85\u2028\u2029': - chunks.append(self.scan_line_break()) - else: - return chunks - - def scan_plain(self): - # See the specification for details. - # We add an additional restriction for the flow context: - # plain scalars in the flow context cannot contain ',' or '?'. - # We also keep track of the `allow_simple_key` flag here. - # Indentation rules are loosed for the flow context. - chunks = [] - start_mark = self.get_mark() - end_mark = start_mark - indent = self.indent+1 - # We allow zero indentation for scalars, but then we need to check for - # document separators at the beginning of the line. - #if indent == 0: - # indent = 1 - spaces = [] - while True: - length = 0 - if self.peek() == '#': - break - while True: - ch = self.peek(length) - if ch in '\0 \t\r\n\x85\u2028\u2029' \ - or (ch == ':' and - self.peek(length+1) in '\0 \t\r\n\x85\u2028\u2029' - + (u',[]{}' if self.flow_level else u''))\ - or (self.flow_level and ch in ',?[]{}'): - break - length += 1 - if length == 0: - break - self.allow_simple_key = False - chunks.extend(spaces) - chunks.append(self.prefix(length)) - self.forward(length) - end_mark = self.get_mark() - spaces = self.scan_plain_spaces(indent, start_mark) - if not spaces or self.peek() == '#' \ - or (not self.flow_level and self.column < indent): - break - return ScalarToken(''.join(chunks), True, start_mark, end_mark) - - def scan_plain_spaces(self, indent, start_mark): - # See the specification for details. - # The specification is really confusing about tabs in plain scalars. - # We just forbid them completely. Do not use tabs in YAML! - chunks = [] - length = 0 - while self.peek(length) in ' ': - length += 1 - whitespaces = self.prefix(length) - self.forward(length) - ch = self.peek() - if ch in '\r\n\x85\u2028\u2029': - line_break = self.scan_line_break() - self.allow_simple_key = True - prefix = self.prefix(3) - if (prefix == '---' or prefix == '...') \ - and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': - return - breaks = [] - while self.peek() in ' \r\n\x85\u2028\u2029': - if self.peek() == ' ': - self.forward() - else: - breaks.append(self.scan_line_break()) - prefix = self.prefix(3) - if (prefix == '---' or prefix == '...') \ - and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': - return - if line_break != '\n': - chunks.append(line_break) - elif not breaks: - chunks.append(' ') - chunks.extend(breaks) - elif whitespaces: - chunks.append(whitespaces) - return chunks - - def scan_tag_handle(self, name, start_mark): - # See the specification for details. - # For some strange reasons, the specification does not allow '_' in - # tag handles. I have allowed it anyway. - ch = self.peek() - if ch != '!': - raise ScannerError("while scanning a %s" % name, start_mark, - "expected '!', but found %r" % ch, self.get_mark()) - length = 1 - ch = self.peek(length) - if ch != ' ': - while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ - or ch in '-_': - length += 1 - ch = self.peek(length) - if ch != '!': - self.forward(length) - raise ScannerError("while scanning a %s" % name, start_mark, - "expected '!', but found %r" % ch, self.get_mark()) - length += 1 - value = self.prefix(length) - self.forward(length) - return value - - def scan_tag_uri(self, name, start_mark): - # See the specification for details. - # Note: we do not check if URI is well-formed. - chunks = [] - length = 0 - ch = self.peek(length) - while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ - or ch in '-;/?:@&=+$,_.!~*\'()[]%': - if ch == '%': - chunks.append(self.prefix(length)) - self.forward(length) - length = 0 - chunks.append(self.scan_uri_escapes(name, start_mark)) - else: - length += 1 - ch = self.peek(length) - if length: - chunks.append(self.prefix(length)) - self.forward(length) - length = 0 - if not chunks: - raise ScannerError("while parsing a %s" % name, start_mark, - "expected URI, but found %r" % ch, self.get_mark()) - return ''.join(chunks) - - def scan_uri_escapes(self, name, start_mark): - # See the specification for details. - codes = [] - mark = self.get_mark() - while self.peek() == '%': - self.forward() - for k in range(2): - if self.peek(k) not in '0123456789ABCDEFabcdef': - raise ScannerError("while scanning a %s" % name, start_mark, - "expected URI escape sequence of 2 hexdecimal numbers, but found %r" - % self.peek(k), self.get_mark()) - codes.append(int(self.prefix(2), 16)) - self.forward(2) - try: - value = bytes(codes).decode('utf-8') - except UnicodeDecodeError as exc: - raise ScannerError("while scanning a %s" % name, start_mark, str(exc), mark) - return value - - def scan_line_break(self): - # Transforms: - # '\r\n' : '\n' - # '\r' : '\n' - # '\n' : '\n' - # '\x85' : '\n' - # '\u2028' : '\u2028' - # '\u2029 : '\u2029' - # default : '' - ch = self.peek() - if ch in '\r\n\x85': - if self.prefix(2) == '\r\n': - self.forward(2) - else: - self.forward() - return '\n' - elif ch in '\u2028\u2029': - self.forward() - return ch - return '' diff --git a/lib3/yaml/serializer.py b/lib3/yaml/serializer.py deleted file mode 100644 index fe911e6..0000000 --- a/lib3/yaml/serializer.py +++ /dev/null @@ -1,111 +0,0 @@ - -__all__ = ['Serializer', 'SerializerError'] - -from .error import YAMLError -from .events import * -from .nodes import * - -class SerializerError(YAMLError): - pass - -class Serializer: - - ANCHOR_TEMPLATE = 'id%03d' - - def __init__(self, encoding=None, - explicit_start=None, explicit_end=None, version=None, tags=None): - self.use_encoding = encoding - self.use_explicit_start = explicit_start - self.use_explicit_end = explicit_end - self.use_version = version - self.use_tags = tags - self.serialized_nodes = {} - self.anchors = {} - self.last_anchor_id = 0 - self.closed = None - - def open(self): - if self.closed is None: - self.emit(StreamStartEvent(encoding=self.use_encoding)) - self.closed = False - elif self.closed: - raise SerializerError("serializer is closed") - else: - raise SerializerError("serializer is already opened") - - def close(self): - if self.closed is None: - raise SerializerError("serializer is not opened") - elif not self.closed: - self.emit(StreamEndEvent()) - self.closed = True - - #def __del__(self): - # self.close() - - def serialize(self, node): - if self.closed is None: - raise SerializerError("serializer is not opened") - elif self.closed: - raise SerializerError("serializer is closed") - self.emit(DocumentStartEvent(explicit=self.use_explicit_start, - version=self.use_version, tags=self.use_tags)) - self.anchor_node(node) - self.serialize_node(node, None, None) - self.emit(DocumentEndEvent(explicit=self.use_explicit_end)) - self.serialized_nodes = {} - self.anchors = {} - self.last_anchor_id = 0 - - def anchor_node(self, node): - if node in self.anchors: - if self.anchors[node] is None: - self.anchors[node] = self.generate_anchor(node) - else: - self.anchors[node] = None - if isinstance(node, SequenceNode): - for item in node.value: - self.anchor_node(item) - elif isinstance(node, MappingNode): - for key, value in node.value: - self.anchor_node(key) - self.anchor_node(value) - - def generate_anchor(self, node): - self.last_anchor_id += 1 - return self.ANCHOR_TEMPLATE % self.last_anchor_id - - def serialize_node(self, node, parent, index): - alias = self.anchors[node] - if node in self.serialized_nodes: - self.emit(AliasEvent(alias)) - else: - self.serialized_nodes[node] = True - self.descend_resolver(parent, index) - if isinstance(node, ScalarNode): - detected_tag = self.resolve(ScalarNode, node.value, (True, False)) - default_tag = self.resolve(ScalarNode, node.value, (False, True)) - implicit = (node.tag == detected_tag), (node.tag == default_tag) - self.emit(ScalarEvent(alias, node.tag, implicit, node.value, - style=node.style)) - elif isinstance(node, SequenceNode): - implicit = (node.tag - == self.resolve(SequenceNode, node.value, True)) - self.emit(SequenceStartEvent(alias, node.tag, implicit, - flow_style=node.flow_style)) - index = 0 - for item in node.value: - self.serialize_node(item, node, index) - index += 1 - self.emit(SequenceEndEvent()) - elif isinstance(node, MappingNode): - implicit = (node.tag - == self.resolve(MappingNode, node.value, True)) - self.emit(MappingStartEvent(alias, node.tag, implicit, - flow_style=node.flow_style)) - for key, value in node.value: - self.serialize_node(key, node, None) - self.serialize_node(value, node, key) - self.emit(MappingEndEvent()) - self.ascend_resolver() - diff --git a/lib3/yaml/tokens.py b/lib3/yaml/tokens.py deleted file mode 100644 index 4d0b48a..0000000 --- a/lib3/yaml/tokens.py +++ /dev/null @@ -1,104 +0,0 @@ - -class Token(object): - def __init__(self, start_mark, end_mark): - self.start_mark = start_mark - self.end_mark = end_mark - def __repr__(self): - attributes = [key for key in self.__dict__ - if not key.endswith('_mark')] - attributes.sort() - arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) - for key in attributes]) - return '%s(%s)' % (self.__class__.__name__, arguments) - -#class BOMToken(Token): -# id = '' - -class DirectiveToken(Token): - id = '' - def __init__(self, name, value, start_mark, end_mark): - self.name = name - self.value = value - self.start_mark = start_mark - self.end_mark = end_mark - -class DocumentStartToken(Token): - id = '' - -class DocumentEndToken(Token): - id = '' - -class StreamStartToken(Token): - id = '' - def __init__(self, start_mark=None, end_mark=None, - encoding=None): - self.start_mark = start_mark - self.end_mark = end_mark - self.encoding = encoding - -class StreamEndToken(Token): - id = '' - -class BlockSequenceStartToken(Token): - id = '' - -class BlockMappingStartToken(Token): - id = '' - -class BlockEndToken(Token): - id = '' - -class FlowSequenceStartToken(Token): - id = '[' - -class FlowMappingStartToken(Token): - id = '{' - -class FlowSequenceEndToken(Token): - id = ']' - -class FlowMappingEndToken(Token): - id = '}' - -class KeyToken(Token): - id = '?' - -class ValueToken(Token): - id = ':' - -class BlockEntryToken(Token): - id = '-' - -class FlowEntryToken(Token): - id = ',' - -class AliasToken(Token): - id = '' - def __init__(self, value, start_mark, end_mark): - self.value = value - self.start_mark = start_mark - self.end_mark = end_mark - -class AnchorToken(Token): - id = '' - def __init__(self, value, start_mark, end_mark): - self.value = value - self.start_mark = start_mark - self.end_mark = end_mark - -class TagToken(Token): - id = '' - def __init__(self, value, start_mark, end_mark): - self.value = value - self.start_mark = start_mark - self.end_mark = end_mark - -class ScalarToken(Token): - id = '' - def __init__(self, value, plain, start_mark, end_mark, style=None): - self.value = value - self.plain = plain - self.start_mark = start_mark - self.end_mark = end_mark - self.style = style - diff --git a/packaging/build/FixVS9CMake.reg b/packaging/build/FixVS9CMake.reg deleted file mode 100644 index 51c444b..0000000 --- a/packaging/build/FixVS9CMake.reg +++ /dev/null @@ -1,76 +0,0 @@ -Windows Registry Editor Version 5.00 - -[HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Microsoft\VCExpress\9.0\CLSID\{600dd186-2429-11d7-8bf6-00b0d03daa06}] -"InprocServer32"="C:\\Program Files (x86)\\Microsoft Visual Studio 9.0\\VC\\vcpackages\\VCProjectIA64Platform.dll" -@="Win64 (Itanium) Platform Class" - -[HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Microsoft\VCExpress\9.0\CLSID\{600dd187-2429-11d7-8bf6-00b0d03daa06}] -"InprocServer32"="C:\\Program Files (x86)\\Microsoft Visual Studio 9.0\\VC\\vcpackages\\VCProjectIA64Platform.dll" -@="ClIA64CodeGeneration Class" - -[HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Microsoft\VCExpress\9.0\CLSID\{600dd188-2429-11d7-8bf6-00b0d03daa06}] -"InprocServer32"="C:\\Program Files (x86)\\Microsoft Visual Studio 9.0\\VC\\vcpackages\\VCProjectIA64Platform.dll" -@="ClIA64General Class" - -[HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Microsoft\VCExpress\9.0\CLSID\{600dd189-2429-11d7-8bf6-00b0d03daa06}] -"InprocServer32"="C:\\Program Files (x86)\\Microsoft Visual Studio 9.0\\VC\\vcpackages\\VCProjectIA64Platform.dll" -@="ClIA64AdditionalOptions Class" - -[HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Microsoft\VCExpress\9.0\CLSID\{656d875f-2429-11d7-8bf6-00b0d03daa06}] -"InprocServer32"="C:\\Program Files (x86)\\Microsoft Visual Studio 9.0\\VC\\vcpackages\\VCProjectAMD64Platform.dll" -@="ClAMD64CodeGeneration Class" - -[HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Microsoft\VCExpress\9.0\CLSID\{656d8760-2429-11d7-8bf6-00b0d03daa06}] -"InprocServer32"="C:\\Program Files (x86)\\Microsoft Visual Studio 9.0\\VC\\vcpackages\\VCProjectAMD64Platform.dll" -@="ClAMD64General Class" - -[HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Microsoft\VCExpress\9.0\CLSID\{656d8763-2429-11d7-8bf6-00b0d03daa06}] -"InprocServer32"="C:\\Program Files (x86)\\Microsoft Visual Studio 9.0\\VC\\vcpackages\\VCProjectAMD64Platform.dll" -@="Win64 (AMD64) Platform Class" - -[HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Microsoft\VCExpress\9.0\CLSID\{656d8766-2429-11d7-8bf6-00b0d03daa06}] -"InprocServer32"="C:\\Program Files (x86)\\Microsoft Visual Studio 9.0\\VC\\vcpackages\\VCProjectAMD64Platform.dll" -@="ClAMD64AdditionalOptions Class" - -[HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Microsoft\VCExpress\9.0\VC\VC_OBJECTS_PLATFORM_INFO] - -[HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Microsoft\VCExpress\9.0\VC\VC_OBJECTS_PLATFORM_INFO\Win64 (AMD64)] -@="{656d8763-2429-11d7-8bf6-00b0d03daa06}" - -[HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Microsoft\VCExpress\9.0\VC\VC_OBJECTS_PLATFORM_INFO\Win64 (AMD64)\ToolDefaultExtensionLists] -"VCCLCompilerTool"="*.cpp;*.cxx;*.cc;*.c" -"VCLinkerTool"="*.obj;*.res;*.lib;*.rsc" -"VCLibrarianTool"="*.obj;*.res;*.lib;*.rsc" -"VCMIDLTool"="*.idl;*.odl" -"VCCustomBuildTool"="*.bat" -"VCResourceCompilerTool"="*.rc" -"VCPreBuildEventTool"="*.bat" -"VCPreLinkEventTool"="*.bat" -"VCPostBuildEventTool"="*.bat" -"VCBscMakeTool"="*.sbr" -"VCNMakeTool"="" -"VCWebServiceProxyGeneratorTool"="*.sdl;*.wsdl" -"VCWebDeploymentTool"="" -"VCALinkTool"="*.resources" -"VCManagedResourceCompilerTool"="*.resx" - -[HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Microsoft\VCExpress\9.0\VC\VC_OBJECTS_PLATFORM_INFO\Win64 (Itanium)] -@="{600dd186-2429-11d7-8bf6-00b0d03daa06}" - -[HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Microsoft\VCExpress\9.0\VC\VC_OBJECTS_PLATFORM_INFO\Win64 (Itanium)\ToolDefaultExtensionLists] -"VCCLCompilerTool"="*.cpp;*.cxx;*.cc;*.c" -"VCLinkerTool"="*.obj;*.res;*.lib;*.rsc" -"VCLibrarianTool"="*.obj;*.res;*.lib;*.rsc" -"VCMIDLTool"="*.idl;*.odl" -"VCCustomBuildTool"="*.bat" -"VCResourceCompilerTool"="*.rc" -"VCPreBuildEventTool"="*.bat" -"VCPreLinkEventTool"="*.bat" -"VCPostBuildEventTool"="*.bat" -"VCBscMakeTool"="*.sbr" -"VCNMakeTool"="" -"VCWebServiceProxyGeneratorTool"="*.sdl;*.wsdl" -"VCWebDeploymentTool"="" -"VCALinkTool"="*.resources" -"VCManagedResourceCompilerTool"="*.resx" - diff --git a/packaging/build/appveyor.ps1 b/packaging/build/appveyor.ps1 deleted file mode 100644 index d4cc6cf..0000000 --- a/packaging/build/appveyor.ps1 +++ /dev/null @@ -1,138 +0,0 @@ -# TODO: run in PR/test mode (larger matrix) vs "all-in-one" artifact/packaging mode -# TODO: use dynamic matrix so PRs are multi-job and tag builds are one (consolidate artifacts) -# TODO: consider secure credential storage for inline upload on tags? Or keep that all manual/OOB for security... -# TODO: refactor libyaml/pyyaml tests to enable first-class output for AppVeyor -# TODO: get version number from setup.py and/or lib(3)/__version__ -# Update-AppveyorBuild -Version $dynamic_version - -Function Invoke-Exe([scriptblock]$sb) { - & $sb - $exitcode = $LASTEXITCODE - If($exitcode -ne 0) { - throw "exe failed with nonzero exit code $exitcode" - } -} - -Function Bootstrap() { - - # ensure python 3.9 is present (current Appveyor VS2015 image doesn't include it) - If(-not $(Test-Path C:\Python39)) { - Invoke-Exe { choco.exe install python3 --version=3.9.1 -i --forcex86 --force --params="/InstallDir:C:\Python39" --no-progress } - } - - If(-not $(Test-Path C:\Python39-x64)) { - Invoke-Exe { choco.exe install python3 --version=3.9.1 -i --force --params="/InstallDir:C:\Python39-x64" --no-progress } - } - - Write-Output "patching Windows SDK bits for distutils" - - # patch 7.0/7.1 vcvars SDK bits up to work with distutils query - Set-Content -Path 'C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\bin\amd64\vcvarsamd64.bat' '@CALL "C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\bin\vcvars64.bat"' - Set-Content -Path 'C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\bin\amd64\vcvars64.bat' '@CALL "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /Release /x64' - - # patch VS9 x64 CMake config for VS Express, hide `reg.exe` stderr noise - Invoke-Exe { $noise = reg.exe import packaging\build\FixVS9CMake.reg 2>&1 } - - Copy-Item -Path "C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\vcpackages\AMD64.VCPlatform.config" -Destination "C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\vcpackages\AMD64.VCPlatform.Express.config" -Force - Copy-Item -Path "C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\vcpackages\Itanium.VCPlatform.config" -Destination "C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\vcpackages\Itanium.VCPlatform.Express.config" -Force - - # git spews all over stderr unless we tell it not to - $env:GIT_REDIRECT_STDERR="2>&1"; - - $libyaml_repo_url = If($env:libyaml_repo_url) { $env:libyaml_repo_url } Else { "https://github.com/yaml/libyaml.git" } - $libyaml_refspec = If($env:libyaml_refspec) { $env:libyaml_refspec } Else { "master" } - - Write-Output "cloning libyaml from $libyaml_repo_url / $libyaml_refspec" - - If(-not $(Test-Path .\libyaml)) { - Invoke-Exe { git clone -b $libyaml_refspec $libyaml_repo_url 2>&1 } - } -} - -Function Build-Wheel($python_path) { - - #$python_path = Join-Path C:\ $env:PYTHON_VER - $python = Join-Path $python_path python.exe - - Write-Output "building pyyaml wheel for $python_path" - - # query distutils for the VC version used to build this Python; translate to a VS version to choose the right generator - $python_vs_buildver = Invoke-Exe { & $python -c "from distutils.version import LooseVersion; from distutils.msvc9compiler import get_build_version; print(LooseVersion(str(get_build_version())).version[0])" } - - $python_cmake_generator = switch($python_vs_buildver) { - "9" { "Visual Studio 9 2008" } - "10" { "Visual Studio 10 2010" } - "14" { "Visual Studio 14 2015" } - default { throw "Python was built with unknown VS build version: $python_vs_buildver" } - } - - # query arch this python was built for - $python_arch = Invoke-Exe { & $python -c "from distutils.util import get_platform; print(str(get_platform()))" } - - if($python_arch -eq 'win-amd64') { - $python_cmake_generator += " Win64" - $vcvars_arch = "x64" - } - - # snarf VS vars (paths, etc) for the matching VS version and arch that built this Python - $raw_vars_out = Invoke-Exe { cmd.exe /c "`"C:\Program Files (x86)\Microsoft Visual Studio $($python_vs_buildver).0\VC\vcvarsall.bat`" $vcvars_arch & set" } - foreach($kv in $raw_vars_out) { - If($kv -match "=") { - $kv = $kv.Split("=", 2) - Set-Item -Force "env:$kv[0]" $kv[1] - } - Else { - Write-Output $kv - } - } - - # ensure pip is current (some appveyor pips are not) - Invoke-Exe { & $python -W "ignore:DEPRECATION" -m pip install --upgrade pip } - - # ensure required-for-build packages are present and up-to-date - Invoke-Exe { & $python -W "ignore:DEPRECATION" -m pip install --upgrade cython wheel setuptools --no-warn-script-location } - - pushd libyaml - Invoke-Exe { git clean -fdx } - popd - - mkdir libyaml\build - - pushd libyaml\build - Invoke-Exe { cmake.exe -G $python_cmake_generator -DYAML_STATIC_LIB_NAME=yaml .. } - Invoke-Exe { cmake.exe --build . --config Release } - popd - - Invoke-Exe { & $python setup.py --with-libyaml build_ext -I libyaml\include -L libyaml\build\Release -D YAML_DECLARE_STATIC build test bdist_wheel } -} - -Function Upload-Artifacts() { - Write-Output "uploading artifacts..." - - foreach($wheel in @(Resolve-Path dist\*.whl)) { - Push-AppveyorArtifact $wheel - } -} - -Bootstrap - -$pythons = @( -"C:\Python27" -"C:\Python27-x64" -"C:\Python36" -"C:\Python36-x64" -"C:\Python37" -"C:\Python37-x64" -"C:\Python38" -"C:\Python38-x64" -"C:\Python39" -"C:\Python39-x64" -) - -#$pythons = @("C:\$($env:PYTHON_VER)") - -foreach($python in $pythons) { - Build-Wheel $python -} - -Upload-Artifacts diff --git a/packaging/build/macos.sh b/packaging/build/macos.sh deleted file mode 100644 index 3e629ab..0000000 --- a/packaging/build/macos.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash - -set -eux - -# doesn't really matter which Python we use, so long as it can run cibuildwheels, and we're consistent within the -# build, since cibuildwheel is internally managing looping over all the Pythons for us. -export PYBIN=/usr/bin/python3 - -${PYBIN} -V -${PYBIN} -m pip install -U --user cibuildwheel -# run cibuildwheel; we can skip CIBW_ENVIRONMENT since the Mac version will directly inherit the envvars we set to -# force Cython and --with-libyaml. cibuildwheel will install Cython before each version is built. We expect that -# the calling environment will set CIBW_SKIP or CIBW_BUILD to control which Pythons we build for. (eg, CIBW_SKIP='pp* cp27* cp35*') - -# we're using a private build of libyaml, so set paths to favor that instead of whatever's laying around -export C_INCLUDE_PATH=$(cd libyaml/include; pwd):${C_INCLUDE_PATH:-} -export LIBRARY_PATH=$(cd libyaml/src/.libs; pwd):${LIBRARY_PATH:-} -export LD_LIBRARY_PATH=$(cd libyaml/src/.libs; pwd):${LD_LIBRARY_PATH:-} - -export PYYAML_FORCE_CYTHON=1 -export PYYAML_FORCE_LIBYAML=1 - -if [[ ${PYYAML_RUN_TESTS:-1} -eq 1 ]]; then - # tweak CIBW behavior to run our tests for us - export CIBW_BEFORE_BUILD='pip install Cython && make testall PYTHON=python' -else - echo "skipping test suite..." -fi - -export CIBW_TEST_COMMAND='python {project}/packaging/build/smoketest.py' - -${PYBIN} -m cibuildwheel --platform macos . - -mkdir -p dist -mv wheelhouse/* dist/ - -# ensure exactly one artifact -shopt -s nullglob -DISTFILES=(dist/*.whl) -if [[ ${#DISTFILES[@]} -ne 1 ]]; then - echo -e "unexpected dist content:\n\n$(ls)" - exit 1 -fi diff --git a/packaging/build/manylinux.sh b/packaging/build/manylinux.sh deleted file mode 100644 index 46f5dec..0000000 --- a/packaging/build/manylinux.sh +++ /dev/null @@ -1,62 +0,0 @@ -#!/bin/bash - -set -eux - -PYBIN="/opt/python/${PYTHON_TAG}/bin/python" - -# modern tools don't allow us to pass eg, --with-libyaml, so we force it via env -export PYYAML_FORCE_CYTHON=1 -export PYYAML_FORCE_LIBYAML=1 - -# we're using a private build of libyaml, so set paths to favor that instead of whatever's laying around -export C_INCLUDE_PATH=libyaml/include:${C_INCLUDE_PATH:-} -export LIBRARY_PATH=libyaml/src/.libs:${LIBRARY_PATH:-} -export LD_LIBRARY_PATH=libyaml/src/.libs:${LD_LIBRARY_PATH:-} - -# install deps -echo "::group::installing build deps" -# FIXME: installing Cython here won't be necessary once we fix tests, since the build is PEP517 and declares its own deps -"${PYBIN}" -m pip install build==0.1.0 Cython -echo "::endgroup::" - -if [[ ${PYYAML_RUN_TESTS:-1} -eq 1 ]]; then - echo "::group::running test suite" - # FIXME: split tests out for easier direct execution w/o Makefile - # run full test suite - make testall PYTHON="${PYBIN}" - echo "::endgroup::" -else - echo "skipping test suite..." -fi - - -if [[ ${PYYAML_BUILD_WHEELS:-0} -eq 1 ]]; then - echo "::group::building wheels" - "${PYBIN}" -m build -w -o tempwheel . - echo "::endgroup::" - - echo "::group::validating wheels" - - for whl in tempwheel/*.whl; do - auditwheel repair --plat "${AW_PLAT}" "$whl" -w dist/ - done - - # ensure exactly one finished artifact - shopt -s nullglob - DISTFILES=(dist/*.whl) - if [[ ${#DISTFILES[@]} -ne 1 ]]; then - echo -e "unexpected dist content:\n\n$(ls)" - exit 1 - fi - - "${PYBIN}" -m pip install dist/*.whl - - "${PYBIN}" packaging/build/smoketest.py - - ls -1 dist/ - - echo "::endgroup::" - -else - echo "skipping wheel build..." -fi diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index b186858..0000000 --- a/setup.cfg +++ /dev/null @@ -1,25 +0,0 @@ - -# The INCLUDE and LIB directories to build the '_yaml' extension. -# You may also set them using the options '-I' and '-L'. -[build_ext] - -# List of directories to search for 'yaml.h' (separated by ':'). -#include_dirs=/usr/local/include:../../include - -# List of directories to search for 'libyaml.a' (separated by ':'). -#library_dirs=/usr/local/lib:../../lib - -# An alternative compiler to build the extension. -#compiler=mingw32 - -# Additional preprocessor definitions might be required. -#define=YAML_DECLARE_STATIC - -# The following options are used to build PyYAML Windows installer -# for Python 2.7 on my PC: -#include_dirs=../../../libyaml/tags/0.1.4/include -#library_dirs=../../../libyaml/tags/0.1.4/win32/vs2008/output/release/lib -#define=YAML_DECLARE_STATIC - -[metadata] -license_file = LICENSE \ No newline at end of file diff --git a/setup.py b/setup.py index d7476c6..548b19f 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ NAME = 'PyYAML' -VERSION = '5.4.1' +VERSION = '6.0' DESCRIPTION = "YAML parser and emitter for Python" LONG_DESCRIPTION = """\ YAML is a data serialization format designed for human readability @@ -27,13 +27,12 @@ CLASSIFIERS = [ "Operating System :: OS Independent", "Programming Language :: Cython", "Programming Language :: Python", - "Programming Language :: Python :: 2", - "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", "Topic :: Software Development :: Libraries :: Python Modules", @@ -65,11 +64,15 @@ int main(void) { """ -import sys, os, os.path, platform, warnings +import sys, os, os.path, pathlib, platform, shutil, tempfile, warnings + +# for newer setuptools, enable the embedded distutils before importing setuptools/distutils to avoid warnings +os.environ['SETUPTOOLS_USE_DISTUTILS'] = 'local' -from distutils import log from setuptools import setup, Command, Distribution as _Distribution, Extension as _Extension from setuptools.command.build_ext import build_ext as _build_ext +# NB: distutils imports must remain below setuptools to ensure we use the embedded version +from distutils import log from distutils.errors import DistutilsError, CompileError, LinkError, DistutilsPlatformError with_cython = False @@ -248,14 +251,28 @@ class test(Command): def run(self): build_cmd = self.get_finalized_command('build') build_cmd.run() - sys.path.insert(0, build_cmd.build_lib) - if sys.version_info[0] < 3: + + # running the tests this way can pollute the post-MANIFEST build sources + # (see https://github.com/yaml/pyyaml/issues/527#issuecomment-921058344) + # until we remove the test command, run tests from an ephemeral copy of the intermediate build sources + tempdir = tempfile.TemporaryDirectory(prefix='test_pyyaml') + + try: + # have to create a subdir since we don't get dir_exists_ok on copytree until 3.8 + temp_test_path = pathlib.Path(tempdir.name) / 'pyyaml' + shutil.copytree(build_cmd.build_lib, temp_test_path) + sys.path.insert(0, str(temp_test_path)) sys.path.insert(0, 'tests/lib') - else: - sys.path.insert(0, 'tests/lib3') - import test_all - if not test_all.main([]): - raise DistutilsError("Tests failed") + + import test_all + if not test_all.main([]): + raise DistutilsError("Tests failed") + finally: + try: + # this can fail under Windows; best-effort cleanup + tempdir.cleanup() + except Exception: + pass cmdclass = { @@ -282,7 +299,7 @@ if __name__ == '__main__': classifiers=CLASSIFIERS, project_urls=PROJECT_URLS, - package_dir={'': {2: 'lib', 3: 'lib3'}[sys.version_info[0]]}, + package_dir={'': 'lib'}, packages=['yaml', '_yaml'], ext_modules=[ Extension('yaml._yaml', ['yaml/_yaml.pyx'], @@ -292,5 +309,5 @@ if __name__ == '__main__': distclass=Distribution, cmdclass=cmdclass, - python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*', + python_requires='>=3.6', ) diff --git a/tests/data/construct-python-name-module.code b/tests/data/construct-python-name-module.code index 6f39148..b8a4b6f 100644 --- a/tests/data/construct-python-name-module.code +++ b/tests/data/construct-python-name-module.code @@ -1 +1 @@ -[str, yaml.Loader, yaml.dump, abs, yaml.tokens] +[str, yaml.Loader, yaml.dump, abs, yaml.tokens, signal.Handlers] diff --git a/tests/data/construct-python-name-module.data b/tests/data/construct-python-name-module.data index f0c9712..f1a2c24 100644 --- a/tests/data/construct-python-name-module.data +++ b/tests/data/construct-python-name-module.data @@ -3,3 +3,4 @@ - !!python/name:yaml.dump - !!python/name:abs - !!python/module:yaml.tokens +- !!python/name:signal.Handlers diff --git a/tests/data/yaml11.schema b/tests/data/yaml11.schema new file mode 100644 index 0000000..e2791aa --- /dev/null +++ b/tests/data/yaml11.schema @@ -0,0 +1,264 @@ +# https://github.com/perlpunk/yaml-test-schema/blob/master/data/schema-yaml11.yaml +--- +'!!bool FALSE': ['bool', 'false()', 'false'] +'!!bool False': ['bool', 'false()', 'false'] +'!!bool N': ['bool', 'false()', "false"] +'!!bool NO': ['bool', 'false()', "false"] +'!!bool No': ['bool', 'false()', "false"] +'!!bool OFF': ['bool', 'false()', "false"] +'!!bool ON': ['bool', 'true()', "true"] +'!!bool Off': ['bool', 'false()', "false"] +'!!bool On': ['bool', 'true()', "true"] +'!!bool TRUE': ['bool', 'true()', 'true'] +'!!bool True': ['bool', 'true()', 'true'] +'!!bool Y': ['bool', 'true()', "true"] +'!!bool YES': ['bool', 'true()', "true"] +'!!bool Yes': ['bool', 'true()', "true"] +'!!bool false': ['bool', 'false()', 'false'] +'!!bool n': ['bool', 'false()', "false"] +'!!bool no': ['bool', 'false()', "false"] +'!!bool off': ['bool', 'false()', "false"] +'!!bool on': ['bool', 'true()', "true"] +'!!bool true': ['bool', 'true()', 'true'] +'!!bool y': ['bool', 'true()', "true"] +'!!bool yes': ['bool', 'true()', "true"] +'!!float +.INF': ['inf', 'inf()', '.inf'] +'!!float +.Inf': ['inf', 'inf()', '.inf'] +'!!float +.inf': ['inf', 'inf()', '.inf'] +'!!float +0.3e+3': ['float', '300.0', '300.0'] +'!!float -.INF': ['inf', 'inf-neg()', '-.inf'] +'!!float -.Inf': ['inf', 'inf-neg()', '-.inf'] +'!!float -.inf': ['inf', 'inf-neg()', '-.inf'] +'!!float -3.14': ['float', '-3.14', '-3.14'] +'!!float .0': ['float', '0.0', '0.0'] +'!!float .14': ['float', '0.14', '0.14'] +'!!float .1_4': ['float', '0.14', '0.14'] +'!!float .3E-1': ['float', '0.03', '0.03'] +'!!float .3e+3': ['float', '300.0', '300.0'] +'!!float .INF': ['inf', 'inf()', '.inf'] +'!!float .Inf': ['inf', 'inf()', '.inf'] +'!!float .NAN': ['nan', 'nan()', '.nan'] +'!!float .NaN': ['nan', 'nan()', '.nan'] +'!!float .inf': ['inf', 'inf()', '.inf'] +'!!float .nan': ['nan', 'nan()', '.nan'] +'!!float 0.0': ['float', '0.0', '0.0'] +'!!float 001.23': ['float', '1.23', '1.23'] +'!!float 190:20:30.15': ['float', '685230.15', '685230.15'] +'!!float 3.': ['float', '3.0', '3.0'] +'!!float 3.14': ['float', '3.14', '3.14'] +'!!float 3.3e+3': ['float', '3300.0', '3300.0'] +'!!float 85.230_15e+03': ['float', '85230.15', '85230.15'] +'!!float 85_230.15': ['float', '85230.15', '85230.15'] +'!!int +0': ['int', '0', '0'] +'!!int +0100_200': ['int', '32896', '32896'] +'!!int +0b100': ['int', '4', '4'] +'!!int +190:20:30': ['int', '685230', '685230'] +'!!int +23': ['int', '23', '23'] +'!!int -0': ['int', '0', '0'] +'!!int -0100_200': ['int', '-32896', '-32896'] +'!!int -0b101': ['int', '-5', '-5'] +'!!int -0x30': ['int', '-48', '-48'] +'!!int -190:20:30': ['int', '-685230', '-685230'] +'!!int -23': ['int', '-23', '-23'] +'!!int 0': ['int', '0', '0'] +'!!int 00': ['int', '0', '0'] +'!!int 0011': ['int', '9', '9'] +'!!int 010': ['int', '8', '8'] +'!!int 02_0': ['int', '16', '16'] +'!!int 07': ['int', '7', '7'] +'!!int 0b0': ['int', '0', '0'] +'!!int 0b100_101': ['int', '37', '37'] +'!!int 0x0': ['int', '0', '0'] +'!!int 0x10': ['int', '16', '16'] +'!!int 0x2_0': ['int', '32', '32'] +'!!int 0x42': ['int', '66', '66'] +'!!int 0xa': ['int', '10', '10'] +'!!int 100_000': ['int', '100000', '100000'] +'!!int 190:20:30': ['int', '685230', '685230'] +'!!int 23': ['int', '23', '23'] +'!!null #empty': ['null', 'null()', "null"] +'!!null NULL': ['null', 'null()', "null"] +'!!null Null': ['null', 'null()', "null"] +'!!null null': ['null', 'null()', 'null'] +'!!null ~': ['null', 'null()', 'null'] +'!!str #empty': ['str', '', "''"] +'!!str +.INF': ['str', '+.INF', "'+.INF'"] +'!!str +.Inf': ['str', '+.Inf', "'+.Inf'"] +'!!str +.inf': ['str', '+.inf', "'+.inf'"] +'!!str +0': ['str', '+0', "'+0'"] +'!!str +0.3e+3': ['str', '+0.3e+3', "'+0.3e+3'"] +'!!str +0.3e3': ['str', '+0.3e3', "+0.3e3"] +'!!str +0100_200': ['str', '+0100_200', "'+0100_200'"] +'!!str +0b100': ['str', '+0b100', "'+0b100'"] +'!!str +190:20:30': ['str', '+190:20:30', "'+190:20:30'"] +'!!str +23': ['str', '+23', "'+23'"] +'!!str -.INF': ['str', '-.INF', "'-.INF'"] +'!!str -.Inf': ['str', '-.Inf', "'-.Inf'"] +'!!str -.inf': ['str', '-.inf', "'-.inf'"] +'!!str -0': ['str', '-0', "'-0'"] +'!!str -0100_200': ['str', '-0100_200', "'-0100_200'"] +'!!str -0b101': ['str', '-0b101', "'-0b101'"] +'!!str -0x30': ['str', '-0x30', "'-0x30'"] +'!!str -190:20:30': ['str', '-190:20:30', "'-190:20:30'"] +'!!str -23': ['str', '-23', "'-23'"] +'!!str -3.14': ['str', '-3.14', "'-3.14'"] +'!!str .': ['str', '.', '.'] +'!!str .0': ['str', '.0', "'.0'"] +'!!str .14': ['str', '.14', "'.14'"] +'!!str .1_4': ['str', '.1_4', "'.1_4'"] +'!!str .3E-1': ['str', '.3E-1', "'.3E-1'"] +'!!str .3e+3': ['str', '.3e+3', "'.3e+3'"] +'!!str .3e3': ['str', '.3e3', ".3e3"] +'!!str .INF': ['str', '.INF', "'.INF'"] +'!!str .Inf': ['str', '.Inf', "'.Inf'"] +'!!str .NAN': ['str', '.NAN', "'.NAN'"] +'!!str .NaN': ['str', '.NaN', "'.NaN'"] +'!!str ._': ['str', '._', '._'] +'!!str ._14': ['str', '._14', '._14'] +'!!str .inf': ['str', '.inf', "'.inf'"] +'!!str .nan': ['str', '.nan', "'.nan'"] +'!!str 0': ['str', '0', "'0'"] +'!!str 0.0': ['str', '0.0', "'0.0'"] +'!!str 0.3e3': ['str', '0.3e3', "0.3e3"] +'!!str 00': ['str', '00', "'00'"] +'!!str 001.23': ['str', '001.23', "'001.23'"] +'!!str 0011': ['str', '0011', "'0011'"] +'!!str 010': ['str', '010', "'010'"] +'!!str 02_0': ['str', '02_0', "'02_0'"] +'!!str 07': ['str', '07', "'07'"] +'!!str 0b0': ['str', '0b0', "'0b0'"] +'!!str 0b100_101': ['str', '0b100_101', "'0b100_101'"] +'!!str 0o0': ['str', '0o0', "0o0"] +'!!str 0o10': ['str', '0o10', "0o10"] +'!!str 0o7': ['str', '0o7', "0o7"] +'!!str 0x0': ['str', '0x0', "'0x0'"] +'!!str 0x2_0': ['str', '0x2_0', "'0x2_0'"] +'!!str 0xa': ['str', '0xa', "'0xa'"] +'!!str 100_000': ['str', '100_000', "'100_000'"] +'!!str 190:20:30': ['str', '190:20:30', "'190:20:30'"] +'!!str 190:20:30.15': ['str', '190:20:30.15', "'190:20:30.15'"] +'!!str 23': ['str', '23', "'23'"] +'!!str 3.': ['str', '3.', "'3.'"] +'!!str 3.14': ['str', '3.14', "'3.14'"] +'!!str 3.3e+3': ['str', '3.3e+3', "'3.3e+3'"] +'!!str 85.230_15e+03': ['str', '85.230_15e+03', "'85.230_15e+03'"] +'!!str 85_230.15': ['str', '85_230.15', "'85_230.15'"] +'!!str FALSE': ['str', 'FALSE', "'FALSE'"] +'!!str False': ['str', 'False', "'False'"] +'!!str N': ['str', 'N', "'N'"] +'!!str NO': ['str', 'NO', "'NO'"] +'!!str NULL': ['str', 'NULL', "'NULL'"] +'!!str Null': ['str', 'Null', "'Null'"] +'!!str OFF': ['str', 'OFF', "'OFF'"] +'!!str ON': ['str', 'ON', "'ON'"] +'!!str Off': ['str', 'Off', "'Off'"] +'!!str On': ['str', 'On', "'On'"] +'!!str TRUE': ['str', 'TRUE', "'TRUE'"] +'!!str True': ['str', 'True', "'True'"] +'!!str Y': ['str', 'Y', "'Y'"] +'!!str YES': ['str', 'YES', "'YES'"] +'!!str Yes': ['str', 'Yes', "'Yes'"] +'!!str _._': ['str', '_._', '_._'] +'!!str false': ['str', 'false', "'false'"] +'!!str n': ['str', 'n', "'n'"] +'!!str no': ['str', 'no', "'no'"] +'!!str null': ['str', 'null', "'null'"] +'!!str off': ['str', 'off', "'off'"] +'!!str on': ['str', 'on', "'on'"] +'!!str true': ['str', 'true', "'true'"] +'!!str y': ['str', 'y', "'y'"] +'!!str yes': ['str', 'yes', "'yes'"] +'!!str ~': ['str', '~', "'~'"] +'#empty': ['null', 'null()', "null"] +'+.INF': ['inf', 'inf()', '.inf'] +'+.Inf': ['inf', 'inf()', '.inf'] +'+.inf': ['inf', 'inf()', '.inf'] +'+0': ['int', '0', '0'] +'+0.3e+3': ['float', '300.0', '300.0'] +'+0.3e3': ['str', '+0.3e3', '+0.3e3'] +'+0100_200': ['int', '32896', '32896'] +'+0b100': ['int', '4', '4'] +'+190:20:30': ['int', '685230', '685230'] +'+23': ['int', '23', '23'] +'+3.14': ['float', '3.14', '3.14'] +'-.INF': ['inf', 'inf-neg()', '-.inf'] +'-.Inf': ['inf', 'inf-neg()', '-.inf'] +'-.inf': ['inf', 'inf-neg()', '-.inf'] +'-0': ['int', '0', '0'] +'-0100_200': ['int', '-32896', '-32896'] +'-0b101': ['int', '-5', '-5'] +'-0x30': ['int', '-48', '-48'] +'-190:20:30': ['int', '-685230', '-685230'] +'-23': ['int', '-23', '-23'] +'-3.14': ['float', '-3.14', '-3.14'] +'.': ['str', '.', '.'] +'.0': ['float', '0.0', '0.0'] +'.14': ['float', '0.14', '0.14'] +'.1_4': ['float', '0.14', '0.14'] +'.3E-1': ['float', '0.03', '0.03'] +'.3e+3': ['float', '300.0', '300.0'] +'.3e3': ['str', '.3e3', '.3e3'] +'.INF': ['inf', 'inf()', '.inf'] +'.Inf': ['inf', 'inf()', '.inf'] +'.NAN': ['nan', 'nan()', '.nan'] +'.NaN': ['nan', 'nan()', '.nan'] +'._': ['str', '._', '._'] +'._14': ['str', '._14', '._14'] +'.inf': ['inf', 'inf()', '.inf'] +'.nan': ['nan', 'nan()', '.nan'] +'0': ['int', '0', '0'] +'0.0': ['float', '0.0', '0.0'] +'0.3e3': ['str', '0.3e3', '0.3e3'] +'00': ['int', '0', '0'] +'001.23': ['float', '1.23', '1.23'] +'0011': ['int', '9', '9'] +'010': ['int', '8', '8'] +'02_0': ['int', '16', '16'] +'07': ['int', '7', '7'] +'08': ['str', '08', '08'] +'0b0': ['int', '0', '0'] +'0b100_101': ['int', '37', '37'] +'0o0': ['str', '0o0', '0o0'] +'0o10': ['str', '0o10', '0o10'] +'0o7': ['str', '0o7', '0o7'] +'0x0': ['int', '0', '0'] +'0x10': ['int', '16', '16'] +'0x2_0': ['int', '32', '32'] +'0x42': ['int', '66', '66'] +'0xa': ['int', '10', '10'] +'100_000': ['int', '100000', '100000'] +'190:20:30': ['int', '685230', '685230'] +'190:20:30.15': ['float', '685230.15', '685230.15'] +'23': ['int', '23', '23'] +'3.': ['float', '3.0', '3.0'] +'3.14': ['float', '3.14', '3.14'] +'3.3e+3': ['float', '3300', '3300.0'] +'3e3': ['str', '3e3', '3e3'] +'85.230_15e+03': ['float', '85230.15', '85230.15'] +'85_230.15': ['float', '85230.15', '85230.15'] +'FALSE': ['bool', 'false()', 'false'] +'False': ['bool', 'false()', 'false'] +'N': ['bool', 'false()', "false"] +'NO': ['bool', 'false()', "false"] +'NULL': ['null', 'null()', "null"] +'Null': ['null', 'null()', "null"] +'OFF': ['bool', 'false()', "false"] +'ON': ['bool', 'true()', "true"] +'Off': ['bool', 'false()', "false"] +'On': ['bool', 'true()', "true"] +'TRUE': ['bool', 'true()', 'true'] +'True': ['bool', 'true()', 'true'] +'Y': ['bool', 'true()', "true"] +'YES': ['bool', 'true()', "true"] +'Yes': ['bool', 'true()', "true"] +'_._': ['str', '_._', '_._'] +'false': ['bool', 'false()', 'false'] +'n': ['bool', 'false()', "false"] +'no': ['bool', 'false()', "false"] +'null': ['null', 'null()', "null"] +'off': ['bool', 'false()', "false"] +'on': ['bool', 'true()', "true"] +'true': ['bool', 'true()', 'true'] +'y': ['bool', 'true()', "true"] +'yes': ['bool', 'true()', "true"] +'~': ['null', 'null()', "null"] diff --git a/tests/data/yaml11.schema-skip b/tests/data/yaml11.schema-skip new file mode 100644 index 0000000..2d78a61 --- /dev/null +++ b/tests/data/yaml11.schema-skip @@ -0,0 +1,7 @@ +load: { + 'Y': 1, 'y': 1, 'N': 1, 'n': 1, + '!!bool Y': 1, '!!bool N': 1, '!!bool n': 1, '!!bool y': 1, + } +dump: { + '!!str N': 1, '!!str Y': 1, '!!str n': 1, '!!str y': 1, + } diff --git a/tests/lib/canonical.py b/tests/lib/canonical.py index 020e6db..a8b4e3a 100644 --- a/tests/lib/canonical.py +++ b/tests/lib/canonical.py @@ -7,10 +7,12 @@ class CanonicalError(yaml.YAMLError): class CanonicalScanner: def __init__(self, data): - try: - self.data = unicode(data, 'utf-8')+u'\0' - except UnicodeDecodeError: - raise CanonicalError("utf-8 stream is expected") + if isinstance(data, bytes): + try: + data = data.decode('utf-8') + except UnicodeDecodeError: + raise CanonicalError("utf-8 stream is expected") + self.data = data+'\0' self.index = 0 self.tokens = [] self.scanned = False @@ -49,63 +51,63 @@ class CanonicalScanner: while True: self.find_token() ch = self.data[self.index] - if ch == u'\0': + if ch == '\0': self.tokens.append(yaml.StreamEndToken(None, None)) break - elif ch == u'%': + elif ch == '%': self.tokens.append(self.scan_directive()) - elif ch == u'-' and self.data[self.index:self.index+3] == u'---': + elif ch == '-' and self.data[self.index:self.index+3] == '---': self.index += 3 self.tokens.append(yaml.DocumentStartToken(None, None)) - elif ch == u'[': + elif ch == '[': self.index += 1 self.tokens.append(yaml.FlowSequenceStartToken(None, None)) - elif ch == u'{': + elif ch == '{': self.index += 1 self.tokens.append(yaml.FlowMappingStartToken(None, None)) - elif ch == u']': + elif ch == ']': self.index += 1 self.tokens.append(yaml.FlowSequenceEndToken(None, None)) - elif ch == u'}': + elif ch == '}': self.index += 1 self.tokens.append(yaml.FlowMappingEndToken(None, None)) - elif ch == u'?': + elif ch == '?': self.index += 1 self.tokens.append(yaml.KeyToken(None, None)) - elif ch == u':': + elif ch == ':': self.index += 1 self.tokens.append(yaml.ValueToken(None, None)) - elif ch == u',': + elif ch == ',': self.index += 1 self.tokens.append(yaml.FlowEntryToken(None, None)) - elif ch == u'*' or ch == u'&': + elif ch == '*' or ch == '&': self.tokens.append(self.scan_alias()) - elif ch == u'!': + elif ch == '!': self.tokens.append(self.scan_tag()) - elif ch == u'"': + elif ch == '"': self.tokens.append(self.scan_scalar()) else: raise CanonicalError("invalid token") self.scanned = True - DIRECTIVE = u'%YAML 1.1' + DIRECTIVE = '%YAML 1.1' def scan_directive(self): if self.data[self.index:self.index+len(self.DIRECTIVE)] == self.DIRECTIVE and \ - self.data[self.index+len(self.DIRECTIVE)] in u' \n\0': + self.data[self.index+len(self.DIRECTIVE)] in ' \n\0': self.index += len(self.DIRECTIVE) return yaml.DirectiveToken('YAML', (1, 1), None, None) else: raise CanonicalError("invalid directive") def scan_alias(self): - if self.data[self.index] == u'*': + if self.data[self.index] == '*': TokenClass = yaml.AliasToken else: TokenClass = yaml.AnchorToken self.index += 1 start = self.index - while self.data[self.index] not in u', \n\0': + while self.data[self.index] not in ', \n\0': self.index += 1 value = self.data[start:self.index] return TokenClass(value, None, None) @@ -113,17 +115,17 @@ class CanonicalScanner: def scan_tag(self): self.index += 1 start = self.index - while self.data[self.index] not in u' \n\0': + while self.data[self.index] not in ' \n\0': self.index += 1 value = self.data[start:self.index] if not value: - value = u'!' - elif value[0] == u'!': + value = '!' + elif value[0] == '!': value = 'tag:yaml.org,2002:'+value[1:] - elif value[0] == u'<' and value[-1] == u'>': + elif value[0] == '<' and value[-1] == '>': value = value[1:-1] else: - value = u'!'+value + value = '!'+value return yaml.TagToken(value, None, None) QUOTE_CODES = { @@ -133,23 +135,22 @@ class CanonicalScanner: } QUOTE_REPLACES = { - u'\\': u'\\', - u'\"': u'\"', - u' ': u' ', - u'a': u'\x07', - u'b': u'\x08', - u'e': u'\x1B', - u'f': u'\x0C', - u'n': u'\x0A', - u'r': u'\x0D', - u't': u'\x09', - u'v': u'\x0B', - u'N': u'\u0085', - u'L': u'\u2028', - u'P': u'\u2029', - u'_': u'_', - u'0': u'\x00', - + '\\': '\\', + '\"': '\"', + ' ': ' ', + 'a': '\x07', + 'b': '\x08', + 'e': '\x1B', + 'f': '\x0C', + 'n': '\x0A', + 'r': '\x0D', + 't': '\x09', + 'v': '\x0B', + 'N': '\u0085', + 'L': '\u2028', + 'P': '\u2029', + '_': '_', + '0': '\x00', } def scan_scalar(self): @@ -157,32 +158,32 @@ class CanonicalScanner: chunks = [] start = self.index ignore_spaces = False - while self.data[self.index] != u'"': - if self.data[self.index] == u'\\': + while self.data[self.index] != '"': + if self.data[self.index] == '\\': ignore_spaces = False chunks.append(self.data[start:self.index]) self.index += 1 ch = self.data[self.index] self.index += 1 - if ch == u'\n': + if ch == '\n': ignore_spaces = True elif ch in self.QUOTE_CODES: length = self.QUOTE_CODES[ch] code = int(self.data[self.index:self.index+length], 16) - chunks.append(unichr(code)) + chunks.append(chr(code)) self.index += length else: if ch not in self.QUOTE_REPLACES: raise CanonicalError("invalid escape code") chunks.append(self.QUOTE_REPLACES[ch]) start = self.index - elif self.data[self.index] == u'\n': + elif self.data[self.index] == '\n': chunks.append(self.data[start:self.index]) - chunks.append(u' ') + chunks.append(' ') self.index += 1 start = self.index ignore_spaces = True - elif ignore_spaces and self.data[self.index] == u' ': + elif ignore_spaces and self.data[self.index] == ' ': self.index += 1 start = self.index else: @@ -190,17 +191,17 @@ class CanonicalScanner: self.index += 1 chunks.append(self.data[start:self.index]) self.index += 1 - return yaml.ScalarToken(u''.join(chunks), False, None, None) + return yaml.ScalarToken(''.join(chunks), False, None, None) def find_token(self): found = False while not found: - while self.data[self.index] in u' \t': + while self.data[self.index] in ' \t': self.index += 1 - if self.data[self.index] == u'#': - while self.data[self.index] != u'\n': + if self.data[self.index] == '#': + while self.data[self.index] != '\n': self.index += 1 - if self.data[self.index] == u'\n': + if self.data[self.index] == '\n': self.index += 1 else: found = True diff --git a/tests/lib/test_appliance.py b/tests/lib/test_appliance.py index 5ec4575..0c5cda1 100644 --- a/tests/lib/test_appliance.py +++ b/tests/lib/test_appliance.py @@ -3,8 +3,6 @@ import sys, os, os.path, types, traceback, pprint DATA = 'tests/data' -has_ucs4 = sys.maxunicode > 0xffff - def find_test_functions(collections): if not isinstance(collections, list): collections = [collections] @@ -12,9 +10,7 @@ def find_test_functions(collections): for collection in collections: if not isinstance(collection, dict): collection = vars(collection) - keys = collection.keys() - keys.sort() - for key in keys: + for key in sorted(collection): value = collection[key] if isinstance(value, types.FunctionType) and hasattr(value, 'unittest'): functions.append(value) @@ -25,13 +21,10 @@ def find_test_filenames(directory): for filename in os.listdir(directory): if os.path.isfile(os.path.join(directory, filename)): base, ext = os.path.splitext(filename) - if base.endswith('-py3'): - continue - if not has_ucs4 and base.find('-ucs4-') > -1: + if base.endswith('-py2'): continue filenames.setdefault(base, []).append(ext) - filenames = filenames.items() - filenames.sort() + filenames = sorted(filenames.items()) return filenames def parse_arguments(args): @@ -58,16 +51,13 @@ def parse_arguments(args): return include_functions, include_filenames, verbose def execute(function, filenames, verbose): - if hasattr(function, 'unittest_name'): - name = function.unittest_name - else: - name = function.func_name + name = function.__name__ if verbose: sys.stdout.write('='*75+'\n') sys.stdout.write('%s(%s)...\n' % (name, ', '.join(filenames))) try: function(verbose=verbose, *filenames) - except Exception, exc: + except Exception as exc: info = sys.exc_info() if isinstance(exc, AssertionError): kind = 'FAILURE' @@ -113,7 +103,8 @@ def display(results, verbose): for filename in filenames: sys.stdout.write('-'*75+'\n') sys.stdout.write('%s:\n' % filename) - data = open(filename, 'rb').read() + with open(filename, 'r', errors='replace') as file: + data = file.read() sys.stdout.write(data) if data and data[-1] != '\n': sys.stdout.write('\n') @@ -131,9 +122,9 @@ def run(collections, args=None): include_functions, include_filenames, verbose = parse_arguments(args) results = [] for function in test_functions: - if include_functions and function.func_name not in include_functions: + if include_functions and function.__name__ not in include_functions: continue - if function.unittest: + if function.unittest and function.unittest is not True: for base, exts in test_filenames: if include_filenames and base not in include_filenames: continue diff --git a/tests/lib/test_canonical.py b/tests/lib/test_canonical.py index a851ef2..7bded81 100644 --- a/tests/lib/test_canonical.py +++ b/tests/lib/test_canonical.py @@ -2,32 +2,35 @@ import yaml, canonical def test_canonical_scanner(canonical_filename, verbose=False): - data = open(canonical_filename, 'rb').read() + with open(canonical_filename, 'rb') as file: + data = file.read() tokens = list(yaml.canonical_scan(data)) assert tokens, tokens if verbose: for token in tokens: - print token + print(token) test_canonical_scanner.unittest = ['.canonical'] def test_canonical_parser(canonical_filename, verbose=False): - data = open(canonical_filename, 'rb').read() + with open(canonical_filename, 'rb') as file: + data = file.read() events = list(yaml.canonical_parse(data)) assert events, events if verbose: for event in events: - print event + print(event) test_canonical_parser.unittest = ['.canonical'] def test_canonical_error(data_filename, canonical_filename, verbose=False): - data = open(data_filename, 'rb').read() + with open(data_filename, 'rb') as file: + data = file.read() try: output = list(yaml.canonical_load_all(data)) - except yaml.YAMLError, exc: + except yaml.YAMLError as exc: if verbose: - print exc + print(exc) else: raise AssertionError("expected an exception") diff --git a/tests/lib/test_constructor.py b/tests/lib/test_constructor.py index c76df5e..0783a21 100644 --- a/tests/lib/test_constructor.py +++ b/tests/lib/test_constructor.py @@ -3,14 +3,14 @@ import yaml import pprint import datetime -try: - set -except NameError: - from sets import Set as set import yaml.tokens +# Import any packages here that need to be referenced in .code files. +import signal + def execute(code): - exec code + global value + exec(code) return value def _make_objects(): @@ -114,7 +114,7 @@ def _make_objects(): else: return False - class AnObject(object): + class AnObject: def __new__(cls, foo=None, bar=None, baz=None): self = object.__new__(cls) self.foo = foo @@ -158,20 +158,6 @@ def _make_objects(): def __setstate__(self, state): self.foo, self.bar, self.baz = state - class InitArgs(AnInstance): - def __getinitargs__(self): - return (self.foo, self.bar, self.baz) - def __getstate__(self): - return {} - - class InitArgsWithState(AnInstance): - def __getinitargs__(self): - return (self.foo, self.bar) - def __getstate__(self): - return self.baz - def __setstate__(self, state): - self.baz = state - class NewArgs(AnObject): def __getnewargs__(self): return (self.foo, self.bar, self.baz) @@ -186,6 +172,10 @@ def _make_objects(): def __setstate__(self, state): self.baz = state + InitArgs = NewArgs + + InitArgsWithState = NewArgsWithState + class Reduce(AnObject): def __reduce__(self): return self.__class__, (self.foo, self.bar, self.baz) @@ -196,7 +186,7 @@ def _make_objects(): def __setstate__(self, state): self.baz = state - class Slots(object): + class Slots: __slots__ = ("foo", "bar", "baz") def __init__(self, foo=None, bar=None, baz=None): self.foo = foo @@ -237,7 +227,7 @@ def _make_objects(): class MyFullLoader(yaml.FullLoader): def get_state_keys_blacklist(self): - return super(MyFullLoader, self).get_state_keys_blacklist() + ['^mymethod$', '^wrong_.*$'] + return super().get_state_keys_blacklist() + ['^mymethod$', '^wrong_.*$'] today = datetime.date.today() @@ -257,8 +247,6 @@ def _serialize_value(data): return '{%s}' % ', '.join(items) elif isinstance(data, datetime.datetime): return repr(data.utctimetuple()) - elif isinstance(data, unicode): - return data.encode('utf-8') elif isinstance(data, float) and data != data: return '?' else: @@ -269,26 +257,28 @@ def test_constructor_types(data_filename, code_filename, verbose=False): native1 = None native2 = None try: - native1 = list(yaml.load_all(open(data_filename, 'rb'), Loader=MyLoader)) + with open(data_filename, 'rb') as file: + native1 = list(yaml.load_all(file, Loader=MyLoader)) if len(native1) == 1: native1 = native1[0] - native2 = _load_code(open(code_filename, 'rb').read()) + with open(code_filename, 'rb') as file: + native2 = _load_code(file.read()) try: if native1 == native2: return except TypeError: pass if verbose: - print "SERIALIZED NATIVE1:" - print _serialize_value(native1) - print "SERIALIZED NATIVE2:" - print _serialize_value(native2) + print("SERIALIZED NATIVE1:") + print(_serialize_value(native1)) + print("SERIALIZED NATIVE2:") + print(_serialize_value(native2)) assert _serialize_value(native1) == _serialize_value(native2), (native1, native2) finally: if verbose: - print "NATIVE1:" + print("NATIVE1:") pprint.pprint(native1) - print "NATIVE2:" + print("NATIVE2:") pprint.pprint(native2) test_constructor_types.unittest = ['.data', '.code'] @@ -296,7 +286,8 @@ test_constructor_types.unittest = ['.data', '.code'] def test_subclass_blacklist_types(data_filename, verbose=False): _make_objects() try: - yaml.load(open(data_filename, 'rb').read(), MyFullLoader) + with open(data_filename, 'rb') as file: + yaml.load(file.read(), MyFullLoader) except yaml.YAMLError as exc: if verbose: print("%s:" % exc.__class__.__name__, exc) @@ -305,18 +296,6 @@ def test_subclass_blacklist_types(data_filename, verbose=False): test_subclass_blacklist_types.unittest = ['.subclass_blacklist'] -def test_timezone_copy(verbose=False): - import copy - tzinfo = yaml.constructor.timezone(datetime.timedelta(0)) - - tz_copy = copy.copy(tzinfo) - tz_deepcopy = copy.deepcopy(tzinfo) - - if tzinfo.tzname() != tz_copy.tzname() != tz_deepcopy.tzname(): - raise AssertionError("Timezones should be equal") - -test_timezone_copy.unittest = [] - if __name__ == '__main__': import sys, test_constructor sys.modules['test_constructor'] = sys.modules['__main__'] diff --git a/tests/lib/test_dump_load.py b/tests/lib/test_dump_load.py new file mode 100644 index 0000000..f3b10b0 --- /dev/null +++ b/tests/lib/test_dump_load.py @@ -0,0 +1,23 @@ +import yaml + +def test_dump(verbose=False): + assert yaml.dump(['foo']) +test_dump.unittest = True + +def test_load_no_loader(verbose=False): + try: + yaml.load("- foo\n") + except TypeError: + return True + assert(False, "load() require Loader=...") +test_load_no_loader.unittest = True + +def test_load_safeloader(verbose=False): + assert yaml.load("- foo\n", Loader=yaml.SafeLoader) +test_load_safeloader.unittest = True + +if __name__ == '__main__': + import sys, test_load + sys.modules['test_load'] = sys.modules['__main__'] + import test_appliance + test_appliance.run(globals()) diff --git a/tests/lib/test_emitter.py b/tests/lib/test_emitter.py index 61fd941..5eba0a3 100644 --- a/tests/lib/test_emitter.py +++ b/tests/lib/test_emitter.py @@ -15,23 +15,25 @@ def _compare_events(events1, events2): assert event1.value == event2.value, (event1, event2) def test_emitter_on_data(data_filename, canonical_filename, verbose=False): - events = list(yaml.parse(open(data_filename, 'rb'))) + with open(data_filename, 'rb') as file: + events = list(yaml.parse(file)) output = yaml.emit(events) if verbose: - print "OUTPUT:" - print output + print("OUTPUT:") + print(output) new_events = list(yaml.parse(output)) _compare_events(events, new_events) test_emitter_on_data.unittest = ['.data', '.canonical'] def test_emitter_on_canonical(canonical_filename, verbose=False): - events = list(yaml.parse(open(canonical_filename, 'rb'))) + with open(canonical_filename, 'rb') as file: + events = list(yaml.parse(file)) for canonical in [False, True]: output = yaml.emit(events, canonical=canonical) if verbose: - print "OUTPUT (canonical=%s):" % canonical - print output + print("OUTPUT (canonical=%s):" % canonical) + print(output) new_events = list(yaml.parse(output)) _compare_events(events, new_events) @@ -39,7 +41,8 @@ test_emitter_on_canonical.unittest = ['.canonical'] def test_emitter_styles(data_filename, canonical_filename, verbose=False): for filename in [data_filename, canonical_filename]: - events = list(yaml.parse(open(filename, 'rb'))) + with open(filename, 'rb') as file: + events = list(yaml.parse(file)) for flow_style in [False, True]: for style in ['|', '>', '"', '\'', '']: styled_events = [] @@ -56,8 +59,8 @@ def test_emitter_styles(data_filename, canonical_filename, verbose=False): styled_events.append(event) output = yaml.emit(styled_events) if verbose: - print "OUTPUT (filename=%r, flow_style=%r, style=%r)" % (filename, flow_style, style) - print output + print("OUTPUT (filename=%r, flow_style=%r, style=%r)" % (filename, flow_style, style)) + print(output) new_events = list(yaml.parse(output)) _compare_events(events, new_events) @@ -86,11 +89,12 @@ class EventsLoader(yaml.Loader): EventsLoader.add_constructor(None, EventsLoader.construct_event) def test_emitter_events(events_filename, verbose=False): - events = list(yaml.load(open(events_filename, 'rb'), Loader=EventsLoader)) + with open(events_filename, 'rb') as file: + events = list(yaml.load(file, Loader=EventsLoader)) output = yaml.emit(events) if verbose: - print "OUTPUT:" - print output + print("OUTPUT:") + print(output) new_events = list(yaml.parse(output)) _compare_events(events, new_events) diff --git a/tests/lib/test_errors.py b/tests/lib/test_errors.py index 32423c1..45f233f 100644 --- a/tests/lib/test_errors.py +++ b/tests/lib/test_errors.py @@ -3,10 +3,11 @@ import yaml, test_emitter def test_loader_error(error_filename, verbose=False): try: - list(yaml.load_all(open(error_filename, 'rb'), yaml.FullLoader)) - except yaml.YAMLError, exc: + with open(error_filename, 'rb') as file: + list(yaml.load_all(file, yaml.FullLoader)) + except yaml.YAMLError as exc: if verbose: - print "%s:" % exc.__class__.__name__, exc + print("%s:" % exc.__class__.__name__, exc) else: raise AssertionError("expected an exception") @@ -14,10 +15,11 @@ test_loader_error.unittest = ['.loader-error'] def test_loader_error_string(error_filename, verbose=False): try: - list(yaml.load_all(open(error_filename, 'rb').read(), yaml.FullLoader)) - except yaml.YAMLError, exc: + with open(error_filename, 'rb') as file: + list(yaml.load_all(file.read(), yaml.FullLoader)) + except yaml.YAMLError as exc: if verbose: - print "%s:" % exc.__class__.__name__, exc + print("%s:" % exc.__class__.__name__, exc) else: raise AssertionError("expected an exception") @@ -25,37 +27,39 @@ test_loader_error_string.unittest = ['.loader-error'] def test_loader_error_single(error_filename, verbose=False): try: - yaml.load(open(error_filename, 'rb').read(), yaml.FullLoader) - except yaml.YAMLError, exc: + with open(error_filename, 'rb') as file: + yaml.load(file.read(), yaml.FullLoader) + except yaml.YAMLError as exc: if verbose: - print "%s:" % exc.__class__.__name__, exc + print("%s:" % exc.__class__.__name__, exc) else: raise AssertionError("expected an exception") test_loader_error_single.unittest = ['.single-loader-error'] def test_emitter_error(error_filename, verbose=False): - events = list(yaml.load(open(error_filename, 'rb'), - Loader=test_emitter.EventsLoader)) + with open(error_filename, 'rb') as file: + events = list(yaml.load(file, Loader=test_emitter.EventsLoader)) try: yaml.emit(events) - except yaml.YAMLError, exc: + except yaml.YAMLError as exc: if verbose: - print "%s:" % exc.__class__.__name__, exc + print("%s:" % exc.__class__.__name__, exc) else: raise AssertionError("expected an exception") test_emitter_error.unittest = ['.emitter-error'] def test_dumper_error(error_filename, verbose=False): - code = open(error_filename, 'rb').read() + with open(error_filename, 'rb') as file: + code = file.read() try: import yaml - from StringIO import StringIO - exec code - except yaml.YAMLError, exc: + from io import StringIO + exec(code) + except yaml.YAMLError as exc: if verbose: - print "%s:" % exc.__class__.__name__, exc + print("%s:" % exc.__class__.__name__, exc) else: raise AssertionError("expected an exception") diff --git a/tests/lib/test_input_output.py b/tests/lib/test_input_output.py index 12e95b1..6d82c0d 100644 --- a/tests/lib/test_input_output.py +++ b/tests/lib/test_input_output.py @@ -1,39 +1,31 @@ import yaml -import codecs, StringIO, tempfile, os, os.path - -def _unicode_open(file, encoding, errors='strict'): - info = codecs.lookup(encoding) - if isinstance(info, tuple): - reader = info[2] - writer = info[3] - else: - reader = info.streamreader - writer = info.streamwriter - srw = codecs.StreamReaderWriter(file, reader, writer, errors) - srw.encoding = encoding - return srw +import codecs, io, tempfile, os, os.path def test_unicode_input(unicode_filename, verbose=False): - data = open(unicode_filename, 'rb').read().decode('utf-8') + with open(unicode_filename, 'rb') as file: + data = file.read().decode('utf-8') value = ' '.join(data.split()) - output = yaml.full_load(_unicode_open(StringIO.StringIO(data.encode('utf-8')), 'utf-8')) + output = yaml.full_load(data) assert output == value, (output, value) - for input in [data, data.encode('utf-8'), + output = yaml.full_load(io.StringIO(data)) + assert output == value, (output, value) + for input in [data.encode('utf-8'), codecs.BOM_UTF8+data.encode('utf-8'), codecs.BOM_UTF16_BE+data.encode('utf-16-be'), codecs.BOM_UTF16_LE+data.encode('utf-16-le')]: if verbose: - print "INPUT:", repr(input[:10]), "..." + print("INPUT:", repr(input[:10]), "...") output = yaml.full_load(input) assert output == value, (output, value) - output = yaml.full_load(StringIO.StringIO(input)) + output = yaml.full_load(io.BytesIO(input)) assert output == value, (output, value) test_unicode_input.unittest = ['.unicode'] def test_unicode_input_errors(unicode_filename, verbose=False): - data = open(unicode_filename, 'rb').read().decode('utf-8') + with open(unicode_filename, 'rb') as file: + data = file.read().decode('utf-8') for input in [data.encode('utf-16-be'), data.encode('utf-16-le'), codecs.BOM_UTF8+data.encode('utf-16-be'), @@ -41,70 +33,77 @@ def test_unicode_input_errors(unicode_filename, verbose=False): try: yaml.full_load(input) - except yaml.YAMLError, exc: + except yaml.YAMLError as exc: if verbose: - print exc + print(exc) else: raise AssertionError("expected an exception") try: - yaml.full_load(StringIO.StringIO(input)) - except yaml.YAMLError, exc: + yaml.full_load(io.BytesIO(input)) + except yaml.YAMLError as exc: if verbose: - print exc + print(exc) else: raise AssertionError("expected an exception") test_unicode_input_errors.unittest = ['.unicode'] def test_unicode_output(unicode_filename, verbose=False): - data = open(unicode_filename, 'rb').read().decode('utf-8') + with open(unicode_filename, 'rb') as file: + data = file.read().decode('utf-8') value = ' '.join(data.split()) for allow_unicode in [False, True]: data1 = yaml.dump(value, allow_unicode=allow_unicode) for encoding in [None, 'utf-8', 'utf-16-be', 'utf-16-le']: - stream = StringIO.StringIO() - yaml.dump(value, _unicode_open(stream, 'utf-8'), encoding=encoding, allow_unicode=allow_unicode) + stream = io.StringIO() + yaml.dump(value, stream, encoding=encoding, allow_unicode=allow_unicode) data2 = stream.getvalue() data3 = yaml.dump(value, encoding=encoding, allow_unicode=allow_unicode) - stream = StringIO.StringIO() - yaml.dump(value, stream, encoding=encoding, allow_unicode=allow_unicode) - data4 = stream.getvalue() + if encoding is not None: + assert isinstance(data3, bytes) + data3 = data3.decode(encoding) + stream = io.BytesIO() + if encoding is None: + try: + yaml.dump(value, stream, encoding=encoding, allow_unicode=allow_unicode) + except TypeError as exc: + if verbose: + print(exc) + data4 = None + else: + raise AssertionError("expected an exception") + else: + yaml.dump(value, stream, encoding=encoding, allow_unicode=allow_unicode) + data4 = stream.getvalue() + if verbose: + print("BYTES:", data4[:50]) + data4 = data4.decode(encoding) assert isinstance(data1, str), (type(data1), encoding) - data1.decode('utf-8') assert isinstance(data2, str), (type(data2), encoding) - data2.decode('utf-8') - if encoding is None: - assert isinstance(data3, unicode), (type(data3), encoding) - assert isinstance(data4, unicode), (type(data4), encoding) - else: - assert isinstance(data3, str), (type(data3), encoding) - data3.decode(encoding) - assert isinstance(data4, str), (type(data4), encoding) - data4.decode(encoding) test_unicode_output.unittest = ['.unicode'] def test_file_output(unicode_filename, verbose=False): - data = open(unicode_filename, 'rb').read().decode('utf-8') + with open(unicode_filename, 'rb') as file: + data = file.read().decode('utf-8') handle, filename = tempfile.mkstemp() os.close(handle) try: - stream = StringIO.StringIO() + stream = io.StringIO() yaml.dump(data, stream, allow_unicode=True) data1 = stream.getvalue() - stream = open(filename, 'wb') - yaml.dump(data, stream, allow_unicode=True) - stream.close() - data2 = open(filename, 'rb').read() - stream = open(filename, 'wb') + stream = io.BytesIO() yaml.dump(data, stream, encoding='utf-16-le', allow_unicode=True) - stream.close() - data3 = open(filename, 'rb').read().decode('utf-16-le')[1:].encode('utf-8') - stream = _unicode_open(open(filename, 'wb'), 'utf-8') - yaml.dump(data, stream, allow_unicode=True) - stream.close() - data4 = open(filename, 'rb').read() + data2 = stream.getvalue().decode('utf-16-le')[1:] + with open(filename, 'w', encoding='utf-16-le') as stream: + yaml.dump(data, stream, allow_unicode=True) + with open(filename, 'r', encoding='utf-16-le') as file: + data3 = file.read() + with open(filename, 'wb') as stream: + yaml.dump(data, stream, encoding='utf-8', allow_unicode=True) + with open(filename, 'r', encoding='utf-8') as file: + data4 = file.read() assert data1 == data2, (data1, data2) assert data1 == data3, (data1, data3) assert data1 == data4, (data1, data4) @@ -115,23 +114,25 @@ def test_file_output(unicode_filename, verbose=False): test_file_output.unittest = ['.unicode'] def test_unicode_transfer(unicode_filename, verbose=False): - data = open(unicode_filename, 'rb').read().decode('utf-8') + with open(unicode_filename, 'rb') as file: + data = file.read().decode('utf-8') for encoding in [None, 'utf-8', 'utf-16-be', 'utf-16-le']: input = data if encoding is not None: - input = (u'\ufeff'+input).encode(encoding) + input = ('\ufeff'+input).encode(encoding) output1 = yaml.emit(yaml.parse(input), allow_unicode=True) - stream = StringIO.StringIO() - yaml.emit(yaml.parse(input), _unicode_open(stream, 'utf-8'), - allow_unicode=True) - output2 = stream.getvalue() if encoding is None: - assert isinstance(output1, unicode), (type(output1), encoding) + stream = io.StringIO() else: - assert isinstance(output1, str), (type(output1), encoding) - output1.decode(encoding) - assert isinstance(output2, str), (type(output2), encoding) - output2.decode('utf-8') + stream = io.BytesIO() + yaml.emit(yaml.parse(input), stream, allow_unicode=True) + output2 = stream.getvalue() + assert isinstance(output1, str), (type(output1), encoding) + if encoding is None: + assert isinstance(output2, str), (type(output1), encoding) + else: + assert isinstance(output2, bytes), (type(output1), encoding) + output2.decode(encoding) test_unicode_transfer.unittest = ['.unicode'] diff --git a/tests/lib/test_mark.py b/tests/lib/test_mark.py index f30a121..02d8411 100644 --- a/tests/lib/test_mark.py +++ b/tests/lib/test_mark.py @@ -2,7 +2,8 @@ import yaml def test_marks(marks_filename, verbose=False): - inputs = open(marks_filename, 'rb').read().split('---\n')[1:] + with open(marks_filename, 'r') as file: + inputs = file.read().split('---\n')[1:] for input in inputs: index = 0 line = 0 @@ -14,10 +15,10 @@ def test_marks(marks_filename, verbose=False): else: column += 1 index += 1 - mark = yaml.Mark(marks_filename, index, line, column, unicode(input), index) + mark = yaml.Mark(marks_filename, index, line, column, input, index) snippet = mark.get_snippet(indent=2, max_length=79) if verbose: - print snippet + print(snippet) assert isinstance(snippet, str), type(snippet) assert snippet.count('\n') == 1, snippet.count('\n') data, pointer = snippet.split('\n') diff --git a/tests/lib/test_multi_constructor.py b/tests/lib/test_multi_constructor.py index f6e28fe..dcff2ce 100644 --- a/tests/lib/test_multi_constructor.py +++ b/tests/lib/test_multi_constructor.py @@ -29,8 +29,10 @@ class Multi2(yaml.FullLoader): pass def test_multi_constructor(input_filename, code_filename, verbose=False): - input = open(input_filename, 'rb').read().decode('utf-8') - native = _load_code(open(code_filename, 'rb').read()) + with open(input_filename, 'rb') as file: + input = file.read().decode('utf-8') + with open(code_filename, 'rb') as file: + native = _load_code(file.read()) # default multi constructor for ! and !! tags Multi1.add_multi_constructor('!', myconstructor1) diff --git a/tests/lib/test_reader.py b/tests/lib/test_reader.py index 3576ae6..ec88746 100644 --- a/tests/lib/test_reader.py +++ b/tests/lib/test_reader.py @@ -1,31 +1,34 @@ import yaml.reader -import codecs def _run_reader(data, verbose): try: stream = yaml.reader.Reader(data) - while stream.peek() != u'\0': + while stream.peek() != '\0': stream.forward() - except yaml.reader.ReaderError, exc: + except yaml.reader.ReaderError as exc: if verbose: - print exc + print(exc) else: raise AssertionError("expected an exception") def test_stream_error(error_filename, verbose=False): - _run_reader(open(error_filename, 'rb'), verbose) - _run_reader(open(error_filename, 'rb').read(), verbose) + with open(error_filename, 'rb') as file: + _run_reader(file, verbose) + with open(error_filename, 'rb') as file: + _run_reader(file.read(), verbose) for encoding in ['utf-8', 'utf-16-le', 'utf-16-be']: try: - data = unicode(open(error_filename, 'rb').read(), encoding) + with open(error_filename, 'rb') as file: + data = file.read().decode(encoding) break except UnicodeDecodeError: pass else: return _run_reader(data, verbose) - _run_reader(codecs.open(error_filename, encoding=encoding), verbose) + with open(error_filename, encoding=encoding) as file: + _run_reader(file, verbose) test_stream_error.unittest = ['.stream-error'] diff --git a/tests/lib/test_recursive.py b/tests/lib/test_recursive.py index 04c5798..527b258 100644 --- a/tests/lib/test_recursive.py +++ b/tests/lib/test_recursive.py @@ -23,24 +23,26 @@ class AnInstanceWithState(AnInstance): self.foo, self.bar = state['attributes'] def test_recursive(recursive_filename, verbose=False): - exec open(recursive_filename, 'rb').read() - value1 = value + context = globals().copy() + with open(recursive_filename, 'rb') as file: + exec(file.read(), context) + value1 = context['value'] output1 = None value2 = None output2 = None try: output1 = yaml.dump(value1) - value2 = yaml.load(output1, yaml.UnsafeLoader) + value2 = yaml.unsafe_load(output1) output2 = yaml.dump(value2) assert output1 == output2, (output1, output2) finally: if verbose: - #print "VALUE1:", value1 - #print "VALUE2:", value2 - print "OUTPUT1:" - print output1 - print "OUTPUT2:" - print output2 + print("VALUE1:", value1) + print("VALUE2:", value2) + print("OUTPUT1:") + print(output1) + print("OUTPUT2:") + print(output2) test_recursive.unittest = ['.recursive'] diff --git a/tests/lib/test_representer.py b/tests/lib/test_representer.py index a82a32a..f3095bf 100644 --- a/tests/lib/test_representer.py +++ b/tests/lib/test_representer.py @@ -7,7 +7,8 @@ def test_representer_types(code_filename, verbose=False): test_constructor._make_objects() for allow_unicode in [False, True]: for encoding in ['utf-8', 'utf-16-be', 'utf-16-le']: - native1 = test_constructor._load_code(open(code_filename, 'rb').read()) + with open(code_filename, 'rb') as file: + native1 = test_constructor._load_code(file.read()) native2 = None try: output = yaml.dump(native1, Dumper=test_constructor.MyDumper, @@ -21,19 +22,19 @@ def test_representer_types(code_filename, verbose=False): value1 = test_constructor._serialize_value(native1) value2 = test_constructor._serialize_value(native2) if verbose: - print "SERIALIZED NATIVE1:" - print value1 - print "SERIALIZED NATIVE2:" - print value2 + print("SERIALIZED NATIVE1:") + print(value1) + print("SERIALIZED NATIVE2:") + print(value2) assert value1 == value2, (native1, native2) finally: if verbose: - print "NATIVE1:" + print("NATIVE1:") pprint.pprint(native1) - print "NATIVE2:" + print("NATIVE2:") pprint.pprint(native2) - print "OUTPUT:" - print output + print("OUTPUT:") + print(output) test_representer_types.unittest = ['.code'] diff --git a/tests/lib/test_resolver.py b/tests/lib/test_resolver.py index 5566750..d893941 100644 --- a/tests/lib/test_resolver.py +++ b/tests/lib/test_resolver.py @@ -6,17 +6,19 @@ def test_implicit_resolver(data_filename, detect_filename, verbose=False): correct_tag = None node = None try: - correct_tag = open(detect_filename, 'rb').read().strip() - node = yaml.compose(open(data_filename, 'rb')) + with open(detect_filename, 'r') as file: + correct_tag = file.read().strip() + with open(data_filename, 'rb') as file: + node = yaml.compose(file) assert isinstance(node, yaml.SequenceNode), node for scalar in node.value: assert isinstance(scalar, yaml.ScalarNode), scalar assert scalar.tag == correct_tag, (scalar.tag, correct_tag) finally: if verbose: - print "CORRECT TAG:", correct_tag + print("CORRECT TAG:", correct_tag) if hasattr(node, 'value'): - print "CHILDREN:" + print("CHILDREN:") pprint.pprint(node.value) test_implicit_resolver.unittest = ['.data', '.detect'] @@ -29,15 +31,15 @@ def _make_path_loader_and_dumper(): class MyDumper(yaml.Dumper): pass - yaml.add_path_resolver(u'!root', [], + yaml.add_path_resolver('!root', [], Loader=MyLoader, Dumper=MyDumper) - yaml.add_path_resolver(u'!root/scalar', [], str, + yaml.add_path_resolver('!root/scalar', [], str, Loader=MyLoader, Dumper=MyDumper) - yaml.add_path_resolver(u'!root/key11/key12/*', ['key11', 'key12'], + yaml.add_path_resolver('!root/key11/key12/*', ['key11', 'key12'], Loader=MyLoader, Dumper=MyDumper) - yaml.add_path_resolver(u'!root/key21/1/*', ['key21', 1], + yaml.add_path_resolver('!root/key21/1/*', ['key21', 1], Loader=MyLoader, Dumper=MyDumper) - yaml.add_path_resolver(u'!root/key31/*/*/key14/map', ['key31', None, None, 'key14'], dict, + yaml.add_path_resolver('!root/key31/*/*/key14/map', ['key31', None, None, 'key14'], dict, Loader=MyLoader, Dumper=MyDumper) return MyLoader, MyDumper @@ -58,8 +60,10 @@ def _convert_node(node): def test_path_resolver_loader(data_filename, path_filename, verbose=False): _make_path_loader_and_dumper() - nodes1 = list(yaml.compose_all(open(data_filename, 'rb').read(), Loader=MyLoader)) - nodes2 = list(yaml.compose_all(open(path_filename, 'rb').read())) + with open(data_filename, 'rb') as file: + nodes1 = list(yaml.compose_all(file.read(), Loader=MyLoader)) + with open(path_filename, 'rb') as file: + nodes2 = list(yaml.compose_all(file.read())) try: for node1, node2 in zip(nodes1, nodes2): data1 = _convert_node(node1) @@ -67,22 +71,24 @@ def test_path_resolver_loader(data_filename, path_filename, verbose=False): assert data1 == data2, (data1, data2) finally: if verbose: - print yaml.serialize_all(nodes1) + print(yaml.serialize_all(nodes1)) test_path_resolver_loader.unittest = ['.data', '.path'] def test_path_resolver_dumper(data_filename, path_filename, verbose=False): _make_path_loader_and_dumper() for filename in [data_filename, path_filename]: - output = yaml.serialize_all(yaml.compose_all(open(filename, 'rb')), Dumper=MyDumper) + with open(filename, 'rb') as file: + output = yaml.serialize_all(yaml.compose_all(file), Dumper=MyDumper) if verbose: - print output + print(output) nodes1 = yaml.compose_all(output) - nodes2 = yaml.compose_all(open(data_filename, 'rb')) - for node1, node2 in zip(nodes1, nodes2): - data1 = _convert_node(node1) - data2 = _convert_node(node2) - assert data1 == data2, (data1, data2) + with open(data_filename, 'rb') as file: + nodes2 = yaml.compose_all(file) + for node1, node2 in zip(nodes1, nodes2): + data1 = _convert_node(node1) + data2 = _convert_node(node2) + assert data1 == data2, (data1, data2) test_path_resolver_dumper.unittest = ['.data', '.path'] diff --git a/tests/lib/test_schema.py b/tests/lib/test_schema.py new file mode 100644 index 0000000..f3370ec --- /dev/null +++ b/tests/lib/test_schema.py @@ -0,0 +1,152 @@ +import yaml +import sys +import pprint +import math + +def check_bool(value, expected): + if expected == 'false()' and value is False: + return 1 + if expected == 'true()' and value is True: + return 1 + print(value) + print(expected) + return 0 + +def check_int(value, expected): + if (int(expected) == value): + return 1 + print(value) + print(expected) + return 0 + +def check_float(value, expected): + if expected == 'inf()': + if value == math.inf: + return 1 + elif expected == 'inf-neg()': + if value == -math.inf: + return 1 + elif expected == 'nan()': + if math.isnan(value): + return 1 + elif (float(expected) == value): + return 1 + else: + print(value) + print(expected) + return 0 + +def check_str(value, expected): + if value == expected: + return 1 + print(value) + print(expected) + return 0 + + +def _fail(input, test): + print("Input: >>" + input + "<<") + print(test) + +# The tests/data/yaml11.schema file is copied from +# https://github.com/perlpunk/yaml-test-schema/blob/master/data/schema-yaml11.yaml +def test_implicit_resolver(data_filename, skip_filename, verbose=False): + types = { + 'str': [str, check_str], + 'int': [int, check_int], + 'float': [float, check_float], + 'inf': [float, check_float], + 'nan': [float, check_float], + 'bool': [bool, check_bool], + } + with open(skip_filename, 'rb') as file: + skipdata = yaml.load(file, Loader=yaml.SafeLoader) + skip_load = skipdata['load'] + skip_dump = skipdata['dump'] + if verbose: + print(skip_load) + with open(data_filename, 'rb') as file: + tests = yaml.load(file, Loader=yaml.SafeLoader) + + i = 0 + fail = 0 + for i, (input, test) in enumerate(sorted(tests.items())): + if verbose: + print('-------------------- ' + str(i)) + + # Skip known loader bugs + if input in skip_load: + continue + + exp_type = test[0] + data = test[1] + exp_dump = test[2] + + # Test loading + try: + loaded = yaml.safe_load(input) + except: + print("Error:", sys.exc_info()[0], '(', sys.exc_info()[1], ')') + fail+=1 + _fail(input, test) + continue + + if verbose: + print(input) + print(test) + print(loaded) + print(type(loaded)) + + if exp_type == 'null': + if loaded is None: + pass + else: + fail+=1 + _fail(input, test) + else: + t = types[exp_type][0] + code = types[exp_type][1] + if isinstance(loaded, t): + if code(loaded, data): + pass + else: + fail+=1 + _fail(input, test) + else: + fail+=1 + _fail(input, test) + + # Skip known dumper bugs + if input in skip_dump: + continue + + dump = yaml.safe_dump(loaded, explicit_end=False) + # strip trailing newlines and footers + if dump.endswith('\n...\n'): + dump = dump[:-5] + if dump.endswith('\n'): + dump = dump[:-1] + if dump == exp_dump: + pass + else: + print("Compare: >>" + dump + "<< >>" + exp_dump + "<<") + fail+=1 + _fail(input, test) + +# if i >= 80: +# break + + if fail > 0: + print("Failed " + str(fail) + " / " + str(i) + " tests") + assert(False) + else: + print("Passed " + str(i) + " tests") + print("Skipped " + str(len(skip_load)) + " load tests") + print("Skipped " + str(len(skip_dump)) + " dump tests") + +test_implicit_resolver.unittest = ['.schema', '.schema-skip'] + +if __name__ == '__main__': + import test_appliance + test_appliance.run(globals()) + diff --git a/tests/lib/test_sort_keys.py b/tests/lib/test_sort_keys.py index 08072d2..d9de8e6 100644 --- a/tests/lib/test_sort_keys.py +++ b/tests/lib/test_sort_keys.py @@ -3,12 +3,14 @@ import pprint import sys def test_sort_keys(input_filename, sorted_filename, verbose=False): - input = open(input_filename, 'rb').read().decode('utf-8') - sorted = open(sorted_filename, 'rb').read().decode('utf-8') + with open(input_filename, 'rb') as file: + input = file.read().decode('utf-8') + with open(sorted_filename, 'rb') as file: + sorted = file.read().decode('utf-8') data = yaml.load(input, Loader=yaml.FullLoader) dump_sorted = yaml.dump(data, default_flow_style=False, sort_keys=True) dump_unsorted = yaml.dump(data, default_flow_style=False, sort_keys=False) - dump_unsorted = yaml.dump(data, default_flow_style=False, sort_keys=False, Dumper=yaml.SafeDumper) + dump_unsorted_safe = yaml.dump(data, default_flow_style=False, sort_keys=False, Dumper=yaml.SafeDumper) if verbose: print("INPUT:") print(input) @@ -17,8 +19,9 @@ def test_sort_keys(input_filename, sorted_filename, verbose=False): assert dump_sorted == sorted - - + if sys.version_info>=(3,7): + assert dump_unsorted == input + assert dump_unsorted_safe == input test_sort_keys.unittest = ['.sort', '.sorted'] diff --git a/tests/lib/test_structure.py b/tests/lib/test_structure.py index 61bcb80..cbd4c3e 100644 --- a/tests/lib/test_structure.py +++ b/tests/lib/test_structure.py @@ -34,23 +34,27 @@ def _convert_structure(loader): def test_structure(data_filename, structure_filename, verbose=False): nodes1 = [] - nodes2 = eval(open(structure_filename, 'rb').read()) + with open(structure_filename, 'r') as file: + nodes2 = eval(file.read()) try: - loader = yaml.Loader(open(data_filename, 'rb')) - while loader.check_event(): - if loader.check_event(yaml.StreamStartEvent, yaml.StreamEndEvent, - yaml.DocumentStartEvent, yaml.DocumentEndEvent): - loader.get_event() - continue - nodes1.append(_convert_structure(loader)) + with open(data_filename, 'rb') as file: + loader = yaml.Loader(file) + while loader.check_event(): + if loader.check_event( + yaml.StreamStartEvent, yaml.StreamEndEvent, + yaml.DocumentStartEvent, yaml.DocumentEndEvent + ): + loader.get_event() + continue + nodes1.append(_convert_structure(loader)) if len(nodes1) == 1: nodes1 = nodes1[0] assert nodes1 == nodes2, (nodes1, nodes2) finally: if verbose: - print "NODES1:" + print("NODES1:") pprint.pprint(nodes1) - print "NODES2:" + print("NODES2:") pprint.pprint(nodes2) test_structure.unittest = ['.data', '.structure'] @@ -62,7 +66,7 @@ def _compare_events(events1, events2, full=False): if isinstance(event1, yaml.AliasEvent) and full: assert event1.anchor == event2.anchor, (event1, event2) if isinstance(event1, (yaml.ScalarEvent, yaml.CollectionStartEvent)): - if (event1.tag not in [None, u'!'] and event2.tag not in [None, u'!']) or full: + if (event1.tag not in [None, '!'] and event2.tag not in [None, '!']) or full: assert event1.tag == event2.tag, (event1, event2) if isinstance(event1, yaml.ScalarEvent): assert event1.value == event2.value, (event1, event2) @@ -71,14 +75,16 @@ def test_parser(data_filename, canonical_filename, verbose=False): events1 = None events2 = None try: - events1 = list(yaml.parse(open(data_filename, 'rb'))) - events2 = list(yaml.canonical_parse(open(canonical_filename, 'rb'))) + with open(data_filename, 'rb') as file: + events1 = list(yaml.parse(file)) + with open(canonical_filename, 'rb') as file: + events2 = list(yaml.canonical_parse(file)) _compare_events(events1, events2) finally: if verbose: - print "EVENTS1:" + print("EVENTS1:") pprint.pprint(events1) - print "EVENTS2:" + print("EVENTS2:") pprint.pprint(events2) test_parser.unittest = ['.data', '.canonical'] @@ -87,14 +93,16 @@ def test_parser_on_canonical(canonical_filename, verbose=False): events1 = None events2 = None try: - events1 = list(yaml.parse(open(canonical_filename, 'rb'))) - events2 = list(yaml.canonical_parse(open(canonical_filename, 'rb'))) + with open(canonical_filename, 'rb') as file: + events1 = list(yaml.parse(file)) + with open(canonical_filename, 'rb') as file: + events2 = list(yaml.canonical_parse(file)) _compare_events(events1, events2, full=True) finally: if verbose: - print "EVENTS1:" + print("EVENTS1:") pprint.pprint(events1) - print "EVENTS2:" + print("EVENTS2:") pprint.pprint(events2) test_parser_on_canonical.unittest = ['.canonical'] @@ -117,16 +125,18 @@ def test_composer(data_filename, canonical_filename, verbose=False): nodes1 = None nodes2 = None try: - nodes1 = list(yaml.compose_all(open(data_filename, 'rb'))) - nodes2 = list(yaml.canonical_compose_all(open(canonical_filename, 'rb'))) + with open(data_filename, 'rb') as file: + nodes1 = list(yaml.compose_all(file)) + with open(canonical_filename, 'rb') as file: + nodes2 = list(yaml.canonical_compose_all(file)) assert len(nodes1) == len(nodes2), (len(nodes1), len(nodes2)) for node1, node2 in zip(nodes1, nodes2): _compare_nodes(node1, node2) finally: if verbose: - print "NODES1:" + print("NODES1:") pprint.pprint(nodes1) - print "NODES2:" + print("NODES2:") pprint.pprint(nodes2) test_composer.unittest = ['.data', '.canonical'] @@ -139,12 +149,12 @@ def _make_loader(): return tuple(yaml.Loader.construct_sequence(self, node)) def construct_mapping(self, node): pairs = self.construct_pairs(node) - pairs.sort() + pairs.sort(key=(lambda i: str(i))) return pairs def construct_undefined(self, node): return self.construct_scalar(node) - MyLoader.add_constructor(u'tag:yaml.org,2002:map', MyLoader.construct_mapping) + MyLoader.add_constructor('tag:yaml.org,2002:map', MyLoader.construct_mapping) MyLoader.add_constructor(None, MyLoader.construct_undefined) def _make_canonical_loader(): @@ -155,12 +165,12 @@ def _make_canonical_loader(): return tuple(yaml.CanonicalLoader.construct_sequence(self, node)) def construct_mapping(self, node): pairs = self.construct_pairs(node) - pairs.sort() + pairs.sort(key=(lambda i: str(i))) return pairs def construct_undefined(self, node): return self.construct_scalar(node) - MyCanonicalLoader.add_constructor(u'tag:yaml.org,2002:map', MyCanonicalLoader.construct_mapping) + MyCanonicalLoader.add_constructor('tag:yaml.org,2002:map', MyCanonicalLoader.construct_mapping) MyCanonicalLoader.add_constructor(None, MyCanonicalLoader.construct_undefined) def test_constructor(data_filename, canonical_filename, verbose=False): @@ -169,14 +179,16 @@ def test_constructor(data_filename, canonical_filename, verbose=False): native1 = None native2 = None try: - native1 = list(yaml.load_all(open(data_filename, 'rb'), Loader=MyLoader)) - native2 = list(yaml.load_all(open(canonical_filename, 'rb'), Loader=MyCanonicalLoader)) + with open(data_filename, 'rb') as file: + native1 = list(yaml.load_all(file, Loader=MyLoader)) + with open(canonical_filename, 'rb') as file: + native2 = list(yaml.load_all(file, Loader=MyCanonicalLoader)) assert native1 == native2, (native1, native2) finally: if verbose: - print "NATIVE1:" + print("NATIVE1:") pprint.pprint(native1) - print "NATIVE2:" + print("NATIVE2:") pprint.pprint(native2) test_constructor.unittest = ['.data', '.canonical'] diff --git a/tests/lib/test_tokens.py b/tests/lib/test_tokens.py index 9613fa0..89ce7ac 100644 --- a/tests/lib/test_tokens.py +++ b/tests/lib/test_tokens.py @@ -44,15 +44,17 @@ _replaces = { def test_tokens(data_filename, tokens_filename, verbose=False): tokens1 = [] - tokens2 = open(tokens_filename, 'rb').read().split() + with open(tokens_filename, 'r') as file: + tokens2 = file.read().split() try: - for token in yaml.scan(open(data_filename, 'rb')): - if not isinstance(token, (yaml.StreamStartToken, yaml.StreamEndToken)): - tokens1.append(_replaces[token.__class__]) + with open(data_filename, 'rb') as file: + for token in yaml.scan(file): + if not isinstance(token, (yaml.StreamStartToken, yaml.StreamEndToken)): + tokens1.append(_replaces[token.__class__]) finally: if verbose: - print "TOKENS1:", ' '.join(tokens1) - print "TOKENS2:", ' '.join(tokens2) + print("TOKENS1:", ' '.join(tokens1)) + print("TOKENS2:", ' '.join(tokens2)) assert len(tokens1) == len(tokens2), (tokens1, tokens2) for token1, token2 in zip(tokens1, tokens2): assert token1 == token2, (token1, token2) @@ -63,8 +65,9 @@ def test_scanner(data_filename, canonical_filename, verbose=False): for filename in [data_filename, canonical_filename]: tokens = [] try: - for token in yaml.scan(open(filename, 'rb')): - tokens.append(token.__class__.__name__) + with open(filename, 'rb') as file: + for token in yaml.scan(file): + tokens.append(token.__class__.__name__) finally: if verbose: pprint.pprint(tokens) diff --git a/tests/lib/test_yaml.py b/tests/lib/test_yaml.py index 352cd8d..a5c10a3 100644 --- a/tests/lib/test_yaml.py +++ b/tests/lib/test_yaml.py @@ -1,4 +1,5 @@ +from test_dump_load import * from test_mark import * from test_reader import * from test_canonical import * @@ -14,6 +15,8 @@ from test_input_output import * from test_sort_keys import * from test_multi_constructor import * +from test_schema import * + if __name__ == '__main__': import test_appliance test_appliance.run(globals()) diff --git a/tests/lib/test_yaml_ext.py b/tests/lib/test_yaml_ext.py index dfe2618..e1c4bb8 100644 --- a/tests/lib/test_yaml_ext.py +++ b/tests/lib/test_yaml_ext.py @@ -117,8 +117,8 @@ def _tear_down(): def test_c_version(verbose=False): if verbose: - print yaml._yaml.get_version() - print yaml._yaml.get_version_string() + print(_yaml.get_version()) + print(_yaml.get_version_string()) assert ("%s.%s.%s" % yaml._yaml.get_version()) == yaml._yaml.get_version_string(), \ (_yaml.get_version(), yaml._yaml.get_version_string()) @@ -148,20 +148,20 @@ def _compare_scanners(py_data, c_data, verbose): assert py_end == c_end, (py_end, c_end) finally: if verbose: - print "PY_TOKENS:" + print("PY_TOKENS:") pprint.pprint(py_tokens) - print "C_TOKENS:" + print("C_TOKENS:") pprint.pprint(c_tokens) def test_c_scanner(data_filename, canonical_filename, verbose=False): - _compare_scanners(open(data_filename, 'rb'), - open(data_filename, 'rb'), verbose) - _compare_scanners(open(data_filename, 'rb').read(), - open(data_filename, 'rb').read(), verbose) - _compare_scanners(open(canonical_filename, 'rb'), - open(canonical_filename, 'rb'), verbose) - _compare_scanners(open(canonical_filename, 'rb').read(), - open(canonical_filename, 'rb').read(), verbose) + with open(data_filename, 'rb') as file1, open(data_filename, 'rb') as file2: + _compare_scanners(file1, file2, verbose) + with open(data_filename, 'rb') as file1, open(data_filename, 'rb') as file2: + _compare_scanners(file1.read(), file2.read(), verbose) + with open(canonical_filename, 'rb') as file1, open(canonical_filename, 'rb') as file2: + _compare_scanners(file1, file2, verbose) + with open(canonical_filename, 'rb') as file1, open(canonical_filename, 'rb') as file2: + _compare_scanners(file1.read(), file2.read(), verbose) test_c_scanner.unittest = ['.data', '.canonical'] test_c_scanner.skip = ['.skip-ext'] @@ -181,20 +181,20 @@ def _compare_parsers(py_data, c_data, verbose): assert py_value == c_value, (py_event, c_event, attribute) finally: if verbose: - print "PY_EVENTS:" + print("PY_EVENTS:") pprint.pprint(py_events) - print "C_EVENTS:" + print("C_EVENTS:") pprint.pprint(c_events) def test_c_parser(data_filename, canonical_filename, verbose=False): - _compare_parsers(open(data_filename, 'rb'), - open(data_filename, 'rb'), verbose) - _compare_parsers(open(data_filename, 'rb').read(), - open(data_filename, 'rb').read(), verbose) - _compare_parsers(open(canonical_filename, 'rb'), - open(canonical_filename, 'rb'), verbose) - _compare_parsers(open(canonical_filename, 'rb').read(), - open(canonical_filename, 'rb').read(), verbose) + with open(data_filename, 'rb') as file1, open(data_filename, 'rb') as file2: + _compare_parsers(file1, file2, verbose) + with open(data_filename, 'rb') as file1, open(data_filename, 'rb') as file2: + _compare_parsers(file1.read(), file2.read(), verbose) + with open(canonical_filename, 'rb') as file1, open(canonical_filename, 'rb') as file2: + _compare_parsers(file1, file2, verbose) + with open(canonical_filename, 'rb') as file1, open(canonical_filename, 'rb') as file2: + _compare_parsers(file1.read(), file2.read(), verbose) test_c_parser.unittest = ['.data', '.canonical'] test_c_parser.skip = ['.skip-ext'] @@ -203,7 +203,7 @@ def _compare_emitters(data, verbose): events = list(yaml.parse(data, Loader=yaml.PyLoader)) c_data = yaml.emit(events, Dumper=yaml.CDumper) if verbose: - print c_data + print(c_data) py_events = list(yaml.parse(c_data, Loader=yaml.PyLoader)) c_events = list(yaml.parse(c_data, Loader=yaml.CLoader)) try: @@ -215,8 +215,8 @@ def _compare_emitters(data, verbose): value = getattr(event, attribute, None) py_value = getattr(py_event, attribute, None) c_value = getattr(c_event, attribute, None) - if attribute == 'tag' and value in [None, u'!'] \ - and py_value in [None, u'!'] and c_value in [None, u'!']: + if attribute == 'tag' and value in [None, '!'] \ + and py_value in [None, '!'] and c_value in [None, '!']: continue if attribute == 'explicit' and (py_value or c_value): continue @@ -224,16 +224,18 @@ def _compare_emitters(data, verbose): assert value == c_value, (event, c_event, attribute) finally: if verbose: - print "EVENTS:" + print("EVENTS:") pprint.pprint(events) - print "PY_EVENTS:" + print("PY_EVENTS:") pprint.pprint(py_events) - print "C_EVENTS:" + print("C_EVENTS:") pprint.pprint(c_events) def test_c_emitter(data_filename, canonical_filename, verbose=False): - _compare_emitters(open(data_filename, 'rb').read(), verbose) - _compare_emitters(open(canonical_filename, 'rb').read(), verbose) + with open(data_filename, 'rb') as file: + _compare_emitters(file.read(), verbose) + with open(canonical_filename, 'rb') as file: + _compare_emitters(file.read(), verbose) test_c_emitter.unittest = ['.data', '.canonical'] test_c_emitter.skip = ['.skip-ext'] @@ -248,7 +250,7 @@ def test_large_file(verbose=False): return with tempfile.TemporaryFile() as temp_file: for i in range(2**(SIZE_FILE-SIZE_ITERATION-SIZE_LINE) + 1): - temp_file.write(('-' + (' ' * (2**SIZE_LINE-4))+ '{}\n')*(2**SIZE_ITERATION)) + temp_file.write(bytes(('-' + (' ' * (2**SIZE_LINE-4))+ '{}\n')*(2**SIZE_ITERATION), 'utf-8')) temp_file.seek(0) yaml.load(temp_file, Loader=yaml.CLoader) @@ -261,11 +263,7 @@ def wrap_ext_function(function): function(*args, **kwds) finally: _tear_down() - try: - wrapper.func_name = '%s_ext' % function.func_name - except TypeError: - pass - wrapper.unittest_name = '%s_ext' % function.func_name + wrapper.__name__ = '%s_ext' % function.__name__ wrapper.unittest = function.unittest wrapper.skip = getattr(function, 'skip', [])+['.skip-ext'] return wrapper @@ -277,15 +275,13 @@ def wrap_ext(collections): for collection in collections: if not isinstance(collection, dict): collection = vars(collection) - keys = collection.keys() - keys.sort() - for key in keys: + for key in sorted(collection): value = collection[key] if isinstance(value, types.FunctionType) and hasattr(value, 'unittest'): functions.append(wrap_ext_function(value)) for function in functions: - assert function.unittest_name not in globals() - globals()[function.unittest_name] = function + assert function.__name__ not in globals() + globals()[function.__name__] = function import test_tokens, test_structure, test_errors, test_resolver, test_constructor, \ test_emitter, test_representer, test_recursive, test_input_output diff --git a/tests/lib3/canonical.py b/tests/lib3/canonical.py deleted file mode 100644 index a8b4e3a..0000000 --- a/tests/lib3/canonical.py +++ /dev/null @@ -1,361 +0,0 @@ - -import yaml, yaml.composer, yaml.constructor, yaml.resolver - -class CanonicalError(yaml.YAMLError): - pass - -class CanonicalScanner: - - def __init__(self, data): - if isinstance(data, bytes): - try: - data = data.decode('utf-8') - except UnicodeDecodeError: - raise CanonicalError("utf-8 stream is expected") - self.data = data+'\0' - self.index = 0 - self.tokens = [] - self.scanned = False - - def check_token(self, *choices): - if not self.scanned: - self.scan() - if self.tokens: - if not choices: - return True - for choice in choices: - if isinstance(self.tokens[0], choice): - return True - return False - - def peek_token(self): - if not self.scanned: - self.scan() - if self.tokens: - return self.tokens[0] - - def get_token(self, choice=None): - if not self.scanned: - self.scan() - token = self.tokens.pop(0) - if choice and not isinstance(token, choice): - raise CanonicalError("unexpected token "+repr(token)) - return token - - def get_token_value(self): - token = self.get_token() - return token.value - - def scan(self): - self.tokens.append(yaml.StreamStartToken(None, None)) - while True: - self.find_token() - ch = self.data[self.index] - if ch == '\0': - self.tokens.append(yaml.StreamEndToken(None, None)) - break - elif ch == '%': - self.tokens.append(self.scan_directive()) - elif ch == '-' and self.data[self.index:self.index+3] == '---': - self.index += 3 - self.tokens.append(yaml.DocumentStartToken(None, None)) - elif ch == '[': - self.index += 1 - self.tokens.append(yaml.FlowSequenceStartToken(None, None)) - elif ch == '{': - self.index += 1 - self.tokens.append(yaml.FlowMappingStartToken(None, None)) - elif ch == ']': - self.index += 1 - self.tokens.append(yaml.FlowSequenceEndToken(None, None)) - elif ch == '}': - self.index += 1 - self.tokens.append(yaml.FlowMappingEndToken(None, None)) - elif ch == '?': - self.index += 1 - self.tokens.append(yaml.KeyToken(None, None)) - elif ch == ':': - self.index += 1 - self.tokens.append(yaml.ValueToken(None, None)) - elif ch == ',': - self.index += 1 - self.tokens.append(yaml.FlowEntryToken(None, None)) - elif ch == '*' or ch == '&': - self.tokens.append(self.scan_alias()) - elif ch == '!': - self.tokens.append(self.scan_tag()) - elif ch == '"': - self.tokens.append(self.scan_scalar()) - else: - raise CanonicalError("invalid token") - self.scanned = True - - DIRECTIVE = '%YAML 1.1' - - def scan_directive(self): - if self.data[self.index:self.index+len(self.DIRECTIVE)] == self.DIRECTIVE and \ - self.data[self.index+len(self.DIRECTIVE)] in ' \n\0': - self.index += len(self.DIRECTIVE) - return yaml.DirectiveToken('YAML', (1, 1), None, None) - else: - raise CanonicalError("invalid directive") - - def scan_alias(self): - if self.data[self.index] == '*': - TokenClass = yaml.AliasToken - else: - TokenClass = yaml.AnchorToken - self.index += 1 - start = self.index - while self.data[self.index] not in ', \n\0': - self.index += 1 - value = self.data[start:self.index] - return TokenClass(value, None, None) - - def scan_tag(self): - self.index += 1 - start = self.index - while self.data[self.index] not in ' \n\0': - self.index += 1 - value = self.data[start:self.index] - if not value: - value = '!' - elif value[0] == '!': - value = 'tag:yaml.org,2002:'+value[1:] - elif value[0] == '<' and value[-1] == '>': - value = value[1:-1] - else: - value = '!'+value - return yaml.TagToken(value, None, None) - - QUOTE_CODES = { - 'x': 2, - 'u': 4, - 'U': 8, - } - - QUOTE_REPLACES = { - '\\': '\\', - '\"': '\"', - ' ': ' ', - 'a': '\x07', - 'b': '\x08', - 'e': '\x1B', - 'f': '\x0C', - 'n': '\x0A', - 'r': '\x0D', - 't': '\x09', - 'v': '\x0B', - 'N': '\u0085', - 'L': '\u2028', - 'P': '\u2029', - '_': '_', - '0': '\x00', - } - - def scan_scalar(self): - self.index += 1 - chunks = [] - start = self.index - ignore_spaces = False - while self.data[self.index] != '"': - if self.data[self.index] == '\\': - ignore_spaces = False - chunks.append(self.data[start:self.index]) - self.index += 1 - ch = self.data[self.index] - self.index += 1 - if ch == '\n': - ignore_spaces = True - elif ch in self.QUOTE_CODES: - length = self.QUOTE_CODES[ch] - code = int(self.data[self.index:self.index+length], 16) - chunks.append(chr(code)) - self.index += length - else: - if ch not in self.QUOTE_REPLACES: - raise CanonicalError("invalid escape code") - chunks.append(self.QUOTE_REPLACES[ch]) - start = self.index - elif self.data[self.index] == '\n': - chunks.append(self.data[start:self.index]) - chunks.append(' ') - self.index += 1 - start = self.index - ignore_spaces = True - elif ignore_spaces and self.data[self.index] == ' ': - self.index += 1 - start = self.index - else: - ignore_spaces = False - self.index += 1 - chunks.append(self.data[start:self.index]) - self.index += 1 - return yaml.ScalarToken(''.join(chunks), False, None, None) - - def find_token(self): - found = False - while not found: - while self.data[self.index] in ' \t': - self.index += 1 - if self.data[self.index] == '#': - while self.data[self.index] != '\n': - self.index += 1 - if self.data[self.index] == '\n': - self.index += 1 - else: - found = True - -class CanonicalParser: - - def __init__(self): - self.events = [] - self.parsed = False - - def dispose(self): - pass - - # stream: STREAM-START document* STREAM-END - def parse_stream(self): - self.get_token(yaml.StreamStartToken) - self.events.append(yaml.StreamStartEvent(None, None)) - while not self.check_token(yaml.StreamEndToken): - if self.check_token(yaml.DirectiveToken, yaml.DocumentStartToken): - self.parse_document() - else: - raise CanonicalError("document is expected, got "+repr(self.tokens[0])) - self.get_token(yaml.StreamEndToken) - self.events.append(yaml.StreamEndEvent(None, None)) - - # document: DIRECTIVE? DOCUMENT-START node - def parse_document(self): - node = None - if self.check_token(yaml.DirectiveToken): - self.get_token(yaml.DirectiveToken) - self.get_token(yaml.DocumentStartToken) - self.events.append(yaml.DocumentStartEvent(None, None)) - self.parse_node() - self.events.append(yaml.DocumentEndEvent(None, None)) - - # node: ALIAS | ANCHOR? TAG? (SCALAR|sequence|mapping) - def parse_node(self): - if self.check_token(yaml.AliasToken): - self.events.append(yaml.AliasEvent(self.get_token_value(), None, None)) - else: - anchor = None - if self.check_token(yaml.AnchorToken): - anchor = self.get_token_value() - tag = None - if self.check_token(yaml.TagToken): - tag = self.get_token_value() - if self.check_token(yaml.ScalarToken): - self.events.append(yaml.ScalarEvent(anchor, tag, (False, False), self.get_token_value(), None, None)) - elif self.check_token(yaml.FlowSequenceStartToken): - self.events.append(yaml.SequenceStartEvent(anchor, tag, None, None)) - self.parse_sequence() - elif self.check_token(yaml.FlowMappingStartToken): - self.events.append(yaml.MappingStartEvent(anchor, tag, None, None)) - self.parse_mapping() - else: - raise CanonicalError("SCALAR, '[', or '{' is expected, got "+repr(self.tokens[0])) - - # sequence: SEQUENCE-START (node (ENTRY node)*)? ENTRY? SEQUENCE-END - def parse_sequence(self): - self.get_token(yaml.FlowSequenceStartToken) - if not self.check_token(yaml.FlowSequenceEndToken): - self.parse_node() - while not self.check_token(yaml.FlowSequenceEndToken): - self.get_token(yaml.FlowEntryToken) - if not self.check_token(yaml.FlowSequenceEndToken): - self.parse_node() - self.get_token(yaml.FlowSequenceEndToken) - self.events.append(yaml.SequenceEndEvent(None, None)) - - # mapping: MAPPING-START (map_entry (ENTRY map_entry)*)? ENTRY? MAPPING-END - def parse_mapping(self): - self.get_token(yaml.FlowMappingStartToken) - if not self.check_token(yaml.FlowMappingEndToken): - self.parse_map_entry() - while not self.check_token(yaml.FlowMappingEndToken): - self.get_token(yaml.FlowEntryToken) - if not self.check_token(yaml.FlowMappingEndToken): - self.parse_map_entry() - self.get_token(yaml.FlowMappingEndToken) - self.events.append(yaml.MappingEndEvent(None, None)) - - # map_entry: KEY node VALUE node - def parse_map_entry(self): - self.get_token(yaml.KeyToken) - self.parse_node() - self.get_token(yaml.ValueToken) - self.parse_node() - - def parse(self): - self.parse_stream() - self.parsed = True - - def get_event(self): - if not self.parsed: - self.parse() - return self.events.pop(0) - - def check_event(self, *choices): - if not self.parsed: - self.parse() - if self.events: - if not choices: - return True - for choice in choices: - if isinstance(self.events[0], choice): - return True - return False - - def peek_event(self): - if not self.parsed: - self.parse() - return self.events[0] - -class CanonicalLoader(CanonicalScanner, CanonicalParser, - yaml.composer.Composer, yaml.constructor.Constructor, yaml.resolver.Resolver): - - def __init__(self, stream): - if hasattr(stream, 'read'): - stream = stream.read() - CanonicalScanner.__init__(self, stream) - CanonicalParser.__init__(self) - yaml.composer.Composer.__init__(self) - yaml.constructor.Constructor.__init__(self) - yaml.resolver.Resolver.__init__(self) - -yaml.CanonicalLoader = CanonicalLoader - -def canonical_scan(stream): - return yaml.scan(stream, Loader=CanonicalLoader) - -yaml.canonical_scan = canonical_scan - -def canonical_parse(stream): - return yaml.parse(stream, Loader=CanonicalLoader) - -yaml.canonical_parse = canonical_parse - -def canonical_compose(stream): - return yaml.compose(stream, Loader=CanonicalLoader) - -yaml.canonical_compose = canonical_compose - -def canonical_compose_all(stream): - return yaml.compose_all(stream, Loader=CanonicalLoader) - -yaml.canonical_compose_all = canonical_compose_all - -def canonical_load(stream): - return yaml.load(stream, Loader=CanonicalLoader) - -yaml.canonical_load = canonical_load - -def canonical_load_all(stream): - return yaml.load_all(stream, Loader=CanonicalLoader) - -yaml.canonical_load_all = canonical_load_all - diff --git a/tests/lib3/test_all.py b/tests/lib3/test_all.py deleted file mode 100644 index 72a5067..0000000 --- a/tests/lib3/test_all.py +++ /dev/null @@ -1,15 +0,0 @@ - -import sys, yaml, test_appliance - -def main(args=None): - collections = [] - import test_yaml - collections.append(test_yaml) - if yaml.__with_libyaml__: - import test_yaml_ext - collections.append(test_yaml_ext) - return test_appliance.run(collections, args) - -if __name__ == '__main__': - main() - diff --git a/tests/lib3/test_appliance.py b/tests/lib3/test_appliance.py deleted file mode 100644 index b6f956d..0000000 --- a/tests/lib3/test_appliance.py +++ /dev/null @@ -1,147 +0,0 @@ - -import sys, os, os.path, types, traceback, pprint - -DATA = 'tests/data' - -def find_test_functions(collections): - if not isinstance(collections, list): - collections = [collections] - functions = [] - for collection in collections: - if not isinstance(collection, dict): - collection = vars(collection) - for key in sorted(collection): - value = collection[key] - if isinstance(value, types.FunctionType) and hasattr(value, 'unittest'): - functions.append(value) - return functions - -def find_test_filenames(directory): - filenames = {} - for filename in os.listdir(directory): - if os.path.isfile(os.path.join(directory, filename)): - base, ext = os.path.splitext(filename) - if base.endswith('-py2'): - continue - filenames.setdefault(base, []).append(ext) - filenames = sorted(filenames.items()) - return filenames - -def parse_arguments(args): - if args is None: - args = sys.argv[1:] - verbose = False - if '-v' in args: - verbose = True - args.remove('-v') - if '--verbose' in args: - verbose = True - args.remove('--verbose') - if 'YAML_TEST_VERBOSE' in os.environ: - verbose = True - include_functions = [] - if args: - include_functions.append(args.pop(0)) - if 'YAML_TEST_FUNCTIONS' in os.environ: - include_functions.extend(os.environ['YAML_TEST_FUNCTIONS'].split()) - include_filenames = [] - include_filenames.extend(args) - if 'YAML_TEST_FILENAMES' in os.environ: - include_filenames.extend(os.environ['YAML_TEST_FILENAMES'].split()) - return include_functions, include_filenames, verbose - -def execute(function, filenames, verbose): - name = function.__name__ - if verbose: - sys.stdout.write('='*75+'\n') - sys.stdout.write('%s(%s)...\n' % (name, ', '.join(filenames))) - try: - function(verbose=verbose, *filenames) - except Exception as exc: - info = sys.exc_info() - if isinstance(exc, AssertionError): - kind = 'FAILURE' - else: - kind = 'ERROR' - if verbose: - traceback.print_exc(limit=1, file=sys.stdout) - else: - sys.stdout.write(kind[0]) - sys.stdout.flush() - else: - kind = 'SUCCESS' - info = None - if not verbose: - sys.stdout.write('.') - sys.stdout.flush() - return (name, filenames, kind, info) - -def display(results, verbose): - if results and not verbose: - sys.stdout.write('\n') - total = len(results) - failures = 0 - errors = 0 - for name, filenames, kind, info in results: - if kind == 'SUCCESS': - continue - if kind == 'FAILURE': - failures += 1 - if kind == 'ERROR': - errors += 1 - sys.stdout.write('='*75+'\n') - sys.stdout.write('%s(%s): %s\n' % (name, ', '.join(filenames), kind)) - if kind == 'ERROR': - traceback.print_exception(file=sys.stdout, *info) - else: - sys.stdout.write('Traceback (most recent call last):\n') - traceback.print_tb(info[2], file=sys.stdout) - sys.stdout.write('%s: see below\n' % info[0].__name__) - sys.stdout.write('~'*75+'\n') - for arg in info[1].args: - pprint.pprint(arg, stream=sys.stdout) - for filename in filenames: - sys.stdout.write('-'*75+'\n') - sys.stdout.write('%s:\n' % filename) - data = open(filename, 'r', errors='replace').read() - sys.stdout.write(data) - if data and data[-1] != '\n': - sys.stdout.write('\n') - sys.stdout.write('='*75+'\n') - sys.stdout.write('TESTS: %s\n' % total) - if failures: - sys.stdout.write('FAILURES: %s\n' % failures) - if errors: - sys.stdout.write('ERRORS: %s\n' % errors) - return not (failures or errors) - -def run(collections, args=None): - test_functions = find_test_functions(collections) - test_filenames = find_test_filenames(DATA) - include_functions, include_filenames, verbose = parse_arguments(args) - results = [] - for function in test_functions: - if include_functions and function.__name__ not in include_functions: - continue - if function.unittest: - for base, exts in test_filenames: - if include_filenames and base not in include_filenames: - continue - filenames = [] - for ext in function.unittest: - if ext not in exts: - break - filenames.append(os.path.join(DATA, base+ext)) - else: - skip_exts = getattr(function, 'skip', []) - for skip_ext in skip_exts: - if skip_ext in exts: - break - else: - result = execute(function, filenames, verbose) - results.append(result) - else: - result = execute(function, [], verbose) - results.append(result) - return display(results, verbose=verbose) - diff --git a/tests/lib3/test_build.py b/tests/lib3/test_build.py deleted file mode 100644 index df3f943..0000000 --- a/tests/lib3/test_build.py +++ /dev/null @@ -1,10 +0,0 @@ - -if __name__ == '__main__': - import sys, os, distutils.util - build_lib = 'build/lib' - build_lib_ext = os.path.join('build', 'lib.{}-{}.{}'.format(distutils.util.get_platform(), *sys.version_info)) - sys.path.insert(0, build_lib) - sys.path.insert(0, build_lib_ext) - import test_yaml, test_appliance - test_appliance.run(test_yaml) - diff --git a/tests/lib3/test_build_ext.py b/tests/lib3/test_build_ext.py deleted file mode 100644 index fa843f6..0000000 --- a/tests/lib3/test_build_ext.py +++ /dev/null @@ -1,11 +0,0 @@ - - -if __name__ == '__main__': - import sys, os, distutils.util - build_lib = 'build/lib' - build_lib_ext = os.path.join('build', 'lib.{}-{}.{}'.format(distutils.util.get_platform(), *sys.version_info)) - sys.path.insert(0, build_lib) - sys.path.insert(0, build_lib_ext) - import test_yaml_ext, test_appliance - test_appliance.run(test_yaml_ext) - diff --git a/tests/lib3/test_canonical.py b/tests/lib3/test_canonical.py deleted file mode 100644 index a3b1153..0000000 --- a/tests/lib3/test_canonical.py +++ /dev/null @@ -1,40 +0,0 @@ - -import yaml, canonical - -def test_canonical_scanner(canonical_filename, verbose=False): - data = open(canonical_filename, 'rb').read() - tokens = list(yaml.canonical_scan(data)) - assert tokens, tokens - if verbose: - for token in tokens: - print(token) - -test_canonical_scanner.unittest = ['.canonical'] - -def test_canonical_parser(canonical_filename, verbose=False): - data = open(canonical_filename, 'rb').read() - events = list(yaml.canonical_parse(data)) - assert events, events - if verbose: - for event in events: - print(event) - -test_canonical_parser.unittest = ['.canonical'] - -def test_canonical_error(data_filename, canonical_filename, verbose=False): - data = open(data_filename, 'rb').read() - try: - output = list(yaml.canonical_load_all(data)) - except yaml.YAMLError as exc: - if verbose: - print(exc) - else: - raise AssertionError("expected an exception") - -test_canonical_error.unittest = ['.data', '.canonical'] -test_canonical_error.skip = ['.empty'] - -if __name__ == '__main__': - import test_appliance - test_appliance.run(globals()) - diff --git a/tests/lib3/test_constructor.py b/tests/lib3/test_constructor.py deleted file mode 100644 index f9a5077..0000000 --- a/tests/lib3/test_constructor.py +++ /dev/null @@ -1,298 +0,0 @@ - -import yaml -import pprint - -import datetime -import yaml.tokens - -def execute(code): - global value - exec(code) - return value - -def _make_objects(): - global MyLoader, MyDumper, MyTestClass1, MyTestClass2, MyTestClass3, YAMLObject1, YAMLObject2, \ - AnObject, AnInstance, AState, ACustomState, InitArgs, InitArgsWithState, \ - NewArgs, NewArgsWithState, Reduce, ReduceWithState, Slots, MyInt, MyList, MyDict, \ - FixedOffset, today, execute, MyFullLoader - - class MyLoader(yaml.Loader): - pass - class MyDumper(yaml.Dumper): - pass - - class MyTestClass1: - def __init__(self, x, y=0, z=0): - self.x = x - self.y = y - self.z = z - def __eq__(self, other): - if isinstance(other, MyTestClass1): - return self.__class__, self.__dict__ == other.__class__, other.__dict__ - else: - return False - - def construct1(constructor, node): - mapping = constructor.construct_mapping(node) - return MyTestClass1(**mapping) - def represent1(representer, native): - return representer.represent_mapping("!tag1", native.__dict__) - - def my_time_constructor(constructor, node): - seq = constructor.construct_sequence(node) - dt = seq[0] - tz = None - try: - tz = dt.tzinfo.tzname(dt) - except: - pass - return [dt, tz] - - yaml.add_constructor("!tag1", construct1, Loader=MyLoader) - yaml.add_constructor("!MyTime", my_time_constructor, Loader=MyLoader) - yaml.add_representer(MyTestClass1, represent1, Dumper=MyDumper) - - class MyTestClass2(MyTestClass1, yaml.YAMLObject): - yaml_loader = MyLoader - yaml_dumper = MyDumper - yaml_tag = "!tag2" - def from_yaml(cls, constructor, node): - x = constructor.construct_yaml_int(node) - return cls(x=x) - from_yaml = classmethod(from_yaml) - def to_yaml(cls, representer, native): - return representer.represent_scalar(cls.yaml_tag, str(native.x)) - to_yaml = classmethod(to_yaml) - - class MyTestClass3(MyTestClass2): - yaml_tag = "!tag3" - def from_yaml(cls, constructor, node): - mapping = constructor.construct_mapping(node) - if '=' in mapping: - x = mapping['='] - del mapping['='] - mapping['x'] = x - return cls(**mapping) - from_yaml = classmethod(from_yaml) - def to_yaml(cls, representer, native): - return representer.represent_mapping(cls.yaml_tag, native.__dict__) - to_yaml = classmethod(to_yaml) - - class YAMLObject1(yaml.YAMLObject): - yaml_loader = MyLoader - yaml_dumper = MyDumper - yaml_tag = '!foo' - def __init__(self, my_parameter=None, my_another_parameter=None): - self.my_parameter = my_parameter - self.my_another_parameter = my_another_parameter - def __eq__(self, other): - if isinstance(other, YAMLObject1): - return self.__class__, self.__dict__ == other.__class__, other.__dict__ - else: - return False - - class YAMLObject2(yaml.YAMLObject): - yaml_loader = MyLoader - yaml_dumper = MyDumper - yaml_tag = '!bar' - def __init__(self, foo=1, bar=2, baz=3): - self.foo = foo - self.bar = bar - self.baz = baz - def __getstate__(self): - return {1: self.foo, 2: self.bar, 3: self.baz} - def __setstate__(self, state): - self.foo = state[1] - self.bar = state[2] - self.baz = state[3] - def __eq__(self, other): - if isinstance(other, YAMLObject2): - return self.__class__, self.__dict__ == other.__class__, other.__dict__ - else: - return False - - class AnObject: - def __new__(cls, foo=None, bar=None, baz=None): - self = object.__new__(cls) - self.foo = foo - self.bar = bar - self.baz = baz - return self - def __cmp__(self, other): - return cmp((type(self), self.foo, self.bar, self.baz), - (type(other), other.foo, other.bar, other.baz)) - def __eq__(self, other): - return type(self) is type(other) and \ - (self.foo, self.bar, self.baz) == (other.foo, other.bar, other.baz) - - class AnInstance: - def __init__(self, foo=None, bar=None, baz=None): - self.foo = foo - self.bar = bar - self.baz = baz - def __cmp__(self, other): - return cmp((type(self), self.foo, self.bar, self.baz), - (type(other), other.foo, other.bar, other.baz)) - def __eq__(self, other): - return type(self) is type(other) and \ - (self.foo, self.bar, self.baz) == (other.foo, other.bar, other.baz) - - class AState(AnInstance): - def __getstate__(self): - return { - '_foo': self.foo, - '_bar': self.bar, - '_baz': self.baz, - } - def __setstate__(self, state): - self.foo = state['_foo'] - self.bar = state['_bar'] - self.baz = state['_baz'] - - class ACustomState(AnInstance): - def __getstate__(self): - return (self.foo, self.bar, self.baz) - def __setstate__(self, state): - self.foo, self.bar, self.baz = state - - class NewArgs(AnObject): - def __getnewargs__(self): - return (self.foo, self.bar, self.baz) - def __getstate__(self): - return {} - - class NewArgsWithState(AnObject): - def __getnewargs__(self): - return (self.foo, self.bar) - def __getstate__(self): - return self.baz - def __setstate__(self, state): - self.baz = state - - InitArgs = NewArgs - - InitArgsWithState = NewArgsWithState - - class Reduce(AnObject): - def __reduce__(self): - return self.__class__, (self.foo, self.bar, self.baz) - - class ReduceWithState(AnObject): - def __reduce__(self): - return self.__class__, (self.foo, self.bar), self.baz - def __setstate__(self, state): - self.baz = state - - class Slots: - __slots__ = ("foo", "bar", "baz") - def __init__(self, foo=None, bar=None, baz=None): - self.foo = foo - self.bar = bar - self.baz = baz - - def __eq__(self, other): - return type(self) is type(other) and \ - (self.foo, self.bar, self.baz) == (other.foo, other.bar, other.baz) - - class MyInt(int): - def __eq__(self, other): - return type(self) is type(other) and int(self) == int(other) - - class MyList(list): - def __init__(self, n=1): - self.extend([None]*n) - def __eq__(self, other): - return type(self) is type(other) and list(self) == list(other) - - class MyDict(dict): - def __init__(self, n=1): - for k in range(n): - self[k] = None - def __eq__(self, other): - return type(self) is type(other) and dict(self) == dict(other) - - class FixedOffset(datetime.tzinfo): - def __init__(self, offset, name): - self.__offset = datetime.timedelta(minutes=offset) - self.__name = name - def utcoffset(self, dt): - return self.__offset - def tzname(self, dt): - return self.__name - def dst(self, dt): - return datetime.timedelta(0) - - class MyFullLoader(yaml.FullLoader): - def get_state_keys_blacklist(self): - return super().get_state_keys_blacklist() + ['^mymethod$', '^wrong_.*$'] - - today = datetime.date.today() - -def _load_code(expression): - return eval(expression) - -def _serialize_value(data): - if isinstance(data, list): - return '[%s]' % ', '.join(map(_serialize_value, data)) - elif isinstance(data, dict): - items = [] - for key, value in data.items(): - key = _serialize_value(key) - value = _serialize_value(value) - items.append("%s: %s" % (key, value)) - items.sort() - return '{%s}' % ', '.join(items) - elif isinstance(data, datetime.datetime): - return repr(data.utctimetuple()) - elif isinstance(data, float) and data != data: - return '?' - else: - return str(data) - -def test_constructor_types(data_filename, code_filename, verbose=False): - _make_objects() - native1 = None - native2 = None - try: - native1 = list(yaml.load_all(open(data_filename, 'rb'), Loader=MyLoader)) - if len(native1) == 1: - native1 = native1[0] - native2 = _load_code(open(code_filename, 'rb').read()) - try: - if native1 == native2: - return - except TypeError: - pass - if verbose: - print("SERIALIZED NATIVE1:") - print(_serialize_value(native1)) - print("SERIALIZED NATIVE2:") - print(_serialize_value(native2)) - assert _serialize_value(native1) == _serialize_value(native2), (native1, native2) - finally: - if verbose: - print("NATIVE1:") - pprint.pprint(native1) - print("NATIVE2:") - pprint.pprint(native2) - -test_constructor_types.unittest = ['.data', '.code'] - -def test_subclass_blacklist_types(data_filename, verbose=False): - _make_objects() - try: - yaml.load(open(data_filename, 'rb').read(), MyFullLoader) - except yaml.YAMLError as exc: - if verbose: - print("%s:" % exc.__class__.__name__, exc) - else: - raise AssertionError("expected an exception") - -test_subclass_blacklist_types.unittest = ['.subclass_blacklist'] - -if __name__ == '__main__': - import sys, test_constructor - sys.modules['test_constructor'] = sys.modules['__main__'] - import test_appliance - test_appliance.run(globals()) - diff --git a/tests/lib3/test_emitter.py b/tests/lib3/test_emitter.py deleted file mode 100644 index 90d1652..0000000 --- a/tests/lib3/test_emitter.py +++ /dev/null @@ -1,100 +0,0 @@ - -import yaml - -def _compare_events(events1, events2): - assert len(events1) == len(events2), (events1, events2) - for event1, event2 in zip(events1, events2): - assert event1.__class__ == event2.__class__, (event1, event2) - if isinstance(event1, yaml.NodeEvent): - assert event1.anchor == event2.anchor, (event1, event2) - if isinstance(event1, yaml.CollectionStartEvent): - assert event1.tag == event2.tag, (event1, event2) - if isinstance(event1, yaml.ScalarEvent): - if True not in event1.implicit+event2.implicit: - assert event1.tag == event2.tag, (event1, event2) - assert event1.value == event2.value, (event1, event2) - -def test_emitter_on_data(data_filename, canonical_filename, verbose=False): - events = list(yaml.parse(open(data_filename, 'rb'))) - output = yaml.emit(events) - if verbose: - print("OUTPUT:") - print(output) - new_events = list(yaml.parse(output)) - _compare_events(events, new_events) - -test_emitter_on_data.unittest = ['.data', '.canonical'] - -def test_emitter_on_canonical(canonical_filename, verbose=False): - events = list(yaml.parse(open(canonical_filename, 'rb'))) - for canonical in [False, True]: - output = yaml.emit(events, canonical=canonical) - if verbose: - print("OUTPUT (canonical=%s):" % canonical) - print(output) - new_events = list(yaml.parse(output)) - _compare_events(events, new_events) - -test_emitter_on_canonical.unittest = ['.canonical'] - -def test_emitter_styles(data_filename, canonical_filename, verbose=False): - for filename in [data_filename, canonical_filename]: - events = list(yaml.parse(open(filename, 'rb'))) - for flow_style in [False, True]: - for style in ['|', '>', '"', '\'', '']: - styled_events = [] - for event in events: - if isinstance(event, yaml.ScalarEvent): - event = yaml.ScalarEvent(event.anchor, event.tag, - event.implicit, event.value, style=style) - elif isinstance(event, yaml.SequenceStartEvent): - event = yaml.SequenceStartEvent(event.anchor, event.tag, - event.implicit, flow_style=flow_style) - elif isinstance(event, yaml.MappingStartEvent): - event = yaml.MappingStartEvent(event.anchor, event.tag, - event.implicit, flow_style=flow_style) - styled_events.append(event) - output = yaml.emit(styled_events) - if verbose: - print("OUTPUT (filename=%r, flow_style=%r, style=%r)" % (filename, flow_style, style)) - print(output) - new_events = list(yaml.parse(output)) - _compare_events(events, new_events) - -test_emitter_styles.unittest = ['.data', '.canonical'] - -class EventsLoader(yaml.Loader): - - def construct_event(self, node): - if isinstance(node, yaml.ScalarNode): - mapping = {} - else: - mapping = self.construct_mapping(node) - class_name = str(node.tag[1:])+'Event' - if class_name in ['AliasEvent', 'ScalarEvent', 'SequenceStartEvent', 'MappingStartEvent']: - mapping.setdefault('anchor', None) - if class_name in ['ScalarEvent', 'SequenceStartEvent', 'MappingStartEvent']: - mapping.setdefault('tag', None) - if class_name in ['SequenceStartEvent', 'MappingStartEvent']: - mapping.setdefault('implicit', True) - if class_name == 'ScalarEvent': - mapping.setdefault('implicit', (False, True)) - mapping.setdefault('value', '') - value = getattr(yaml, class_name)(**mapping) - return value - -EventsLoader.add_constructor(None, EventsLoader.construct_event) - -def test_emitter_events(events_filename, verbose=False): - events = list(yaml.load(open(events_filename, 'rb'), Loader=EventsLoader)) - output = yaml.emit(events) - if verbose: - print("OUTPUT:") - print(output) - new_events = list(yaml.parse(output)) - _compare_events(events, new_events) - -if __name__ == '__main__': - import test_appliance - test_appliance.run(globals()) - diff --git a/tests/lib3/test_errors.py b/tests/lib3/test_errors.py deleted file mode 100644 index 9ef9bbe..0000000 --- a/tests/lib3/test_errors.py +++ /dev/null @@ -1,67 +0,0 @@ - -import yaml, test_emitter - -def test_loader_error(error_filename, verbose=False): - try: - list(yaml.load_all(open(error_filename, 'rb'), yaml.FullLoader)) - except yaml.YAMLError as exc: - if verbose: - print("%s:" % exc.__class__.__name__, exc) - else: - raise AssertionError("expected an exception") - -test_loader_error.unittest = ['.loader-error'] - -def test_loader_error_string(error_filename, verbose=False): - try: - list(yaml.load_all(open(error_filename, 'rb').read(), yaml.FullLoader)) - except yaml.YAMLError as exc: - if verbose: - print("%s:" % exc.__class__.__name__, exc) - else: - raise AssertionError("expected an exception") - -test_loader_error_string.unittest = ['.loader-error'] - -def test_loader_error_single(error_filename, verbose=False): - try: - yaml.load(open(error_filename, 'rb').read(), yaml.FullLoader) - except yaml.YAMLError as exc: - if verbose: - print("%s:" % exc.__class__.__name__, exc) - else: - raise AssertionError("expected an exception") - -test_loader_error_single.unittest = ['.single-loader-error'] - -def test_emitter_error(error_filename, verbose=False): - events = list(yaml.load(open(error_filename, 'rb'), - Loader=test_emitter.EventsLoader)) - try: - yaml.emit(events) - except yaml.YAMLError as exc: - if verbose: - print("%s:" % exc.__class__.__name__, exc) - else: - raise AssertionError("expected an exception") - -test_emitter_error.unittest = ['.emitter-error'] - -def test_dumper_error(error_filename, verbose=False): - code = open(error_filename, 'rb').read() - try: - import yaml - from io import StringIO - exec(code) - except yaml.YAMLError as exc: - if verbose: - print("%s:" % exc.__class__.__name__, exc) - else: - raise AssertionError("expected an exception") - -test_dumper_error.unittest = ['.dumper-error'] - -if __name__ == '__main__': - import test_appliance - test_appliance.run(globals()) - diff --git a/tests/lib3/test_input_output.py b/tests/lib3/test_input_output.py deleted file mode 100644 index 52ac342..0000000 --- a/tests/lib3/test_input_output.py +++ /dev/null @@ -1,136 +0,0 @@ - -import yaml -import codecs, io, tempfile, os, os.path - -def test_unicode_input(unicode_filename, verbose=False): - data = open(unicode_filename, 'rb').read().decode('utf-8') - value = ' '.join(data.split()) - output = yaml.full_load(data) - assert output == value, (output, value) - output = yaml.full_load(io.StringIO(data)) - assert output == value, (output, value) - for input in [data.encode('utf-8'), - codecs.BOM_UTF8+data.encode('utf-8'), - codecs.BOM_UTF16_BE+data.encode('utf-16-be'), - codecs.BOM_UTF16_LE+data.encode('utf-16-le')]: - if verbose: - print("INPUT:", repr(input[:10]), "...") - output = yaml.full_load(input) - assert output == value, (output, value) - output = yaml.full_load(io.BytesIO(input)) - assert output == value, (output, value) - -test_unicode_input.unittest = ['.unicode'] - -def test_unicode_input_errors(unicode_filename, verbose=False): - data = open(unicode_filename, 'rb').read().decode('utf-8') - for input in [data.encode('utf-16-be'), - data.encode('utf-16-le'), - codecs.BOM_UTF8+data.encode('utf-16-be'), - codecs.BOM_UTF8+data.encode('utf-16-le')]: - - try: - yaml.full_load(input) - except yaml.YAMLError as exc: - if verbose: - print(exc) - else: - raise AssertionError("expected an exception") - try: - yaml.full_load(io.BytesIO(input)) - except yaml.YAMLError as exc: - if verbose: - print(exc) - else: - raise AssertionError("expected an exception") - -test_unicode_input_errors.unittest = ['.unicode'] - -def test_unicode_output(unicode_filename, verbose=False): - data = open(unicode_filename, 'rb').read().decode('utf-8') - value = ' '.join(data.split()) - for allow_unicode in [False, True]: - data1 = yaml.dump(value, allow_unicode=allow_unicode) - for encoding in [None, 'utf-8', 'utf-16-be', 'utf-16-le']: - stream = io.StringIO() - yaml.dump(value, stream, encoding=encoding, allow_unicode=allow_unicode) - data2 = stream.getvalue() - data3 = yaml.dump(value, encoding=encoding, allow_unicode=allow_unicode) - if encoding is not None: - assert isinstance(data3, bytes) - data3 = data3.decode(encoding) - stream = io.BytesIO() - if encoding is None: - try: - yaml.dump(value, stream, encoding=encoding, allow_unicode=allow_unicode) - except TypeError as exc: - if verbose: - print(exc) - data4 = None - else: - raise AssertionError("expected an exception") - else: - yaml.dump(value, stream, encoding=encoding, allow_unicode=allow_unicode) - data4 = stream.getvalue() - if verbose: - print("BYTES:", data4[:50]) - data4 = data4.decode(encoding) - - assert isinstance(data1, str), (type(data1), encoding) - assert isinstance(data2, str), (type(data2), encoding) - -test_unicode_output.unittest = ['.unicode'] - -def test_file_output(unicode_filename, verbose=False): - data = open(unicode_filename, 'rb').read().decode('utf-8') - handle, filename = tempfile.mkstemp() - os.close(handle) - try: - stream = io.StringIO() - yaml.dump(data, stream, allow_unicode=True) - data1 = stream.getvalue() - stream = io.BytesIO() - yaml.dump(data, stream, encoding='utf-16-le', allow_unicode=True) - data2 = stream.getvalue().decode('utf-16-le')[1:] - stream = open(filename, 'w', encoding='utf-16-le') - yaml.dump(data, stream, allow_unicode=True) - stream.close() - data3 = open(filename, 'r', encoding='utf-16-le').read() - stream = open(filename, 'wb') - yaml.dump(data, stream, encoding='utf-8', allow_unicode=True) - stream.close() - data4 = open(filename, 'r', encoding='utf-8').read() - assert data1 == data2, (data1, data2) - assert data1 == data3, (data1, data3) - assert data1 == data4, (data1, data4) - finally: - if os.path.exists(filename): - os.unlink(filename) - -test_file_output.unittest = ['.unicode'] - -def test_unicode_transfer(unicode_filename, verbose=False): - data = open(unicode_filename, 'rb').read().decode('utf-8') - for encoding in [None, 'utf-8', 'utf-16-be', 'utf-16-le']: - input = data - if encoding is not None: - input = ('\ufeff'+input).encode(encoding) - output1 = yaml.emit(yaml.parse(input), allow_unicode=True) - if encoding is None: - stream = io.StringIO() - else: - stream = io.BytesIO() - yaml.emit(yaml.parse(input), stream, allow_unicode=True) - output2 = stream.getvalue() - assert isinstance(output1, str), (type(output1), encoding) - if encoding is None: - assert isinstance(output2, str), (type(output1), encoding) - else: - assert isinstance(output2, bytes), (type(output1), encoding) - output2.decode(encoding) - -test_unicode_transfer.unittest = ['.unicode'] - -if __name__ == '__main__': - import test_appliance - test_appliance.run(globals()) diff --git a/tests/lib3/test_mark.py b/tests/lib3/test_mark.py deleted file mode 100644 index 09eea2e..0000000 --- a/tests/lib3/test_mark.py +++ /dev/null @@ -1,32 +0,0 @@ - -import yaml - -def test_marks(marks_filename, verbose=False): - inputs = open(marks_filename, 'r').read().split('---\n')[1:] - for input in inputs: - index = 0 - line = 0 - column = 0 - while input[index] != '*': - if input[index] == '\n': - line += 1 - column = 0 - else: - column += 1 - index += 1 - mark = yaml.Mark(marks_filename, index, line, column, input, index) - snippet = mark.get_snippet(indent=2, max_length=79) - if verbose: - print(snippet) - assert isinstance(snippet, str), type(snippet) - assert snippet.count('\n') == 1, snippet.count('\n') - data, pointer = snippet.split('\n') - assert len(data) < 82, len(data) - assert data[len(pointer)-1] == '*', data[len(pointer)-1] - -test_marks.unittest = ['.marks'] - -if __name__ == '__main__': - import test_appliance - test_appliance.run(globals()) - diff --git a/tests/lib3/test_multi_constructor.py b/tests/lib3/test_multi_constructor.py deleted file mode 100644 index f6e28fe..0000000 --- a/tests/lib3/test_multi_constructor.py +++ /dev/null @@ -1,63 +0,0 @@ -import yaml -import pprint -import sys - -def _load_code(expression): - return eval(expression) - -def myconstructor1(constructor, tag, node): - seq = constructor.construct_sequence(node) - return {tag: seq } - -def myconstructor2(constructor, tag, node): - seq = constructor.construct_sequence(node) - string = '' - try: - i = tag.index('!') + 1 - except: - try: - i = tag.rindex(':') + 1 - except: - pass - if i >= 0: - tag = tag[i:] - return { tag: seq } - -class Multi1(yaml.FullLoader): - pass -class Multi2(yaml.FullLoader): - pass - -def test_multi_constructor(input_filename, code_filename, verbose=False): - input = open(input_filename, 'rb').read().decode('utf-8') - native = _load_code(open(code_filename, 'rb').read()) - - # default multi constructor for ! and !! tags - Multi1.add_multi_constructor('!', myconstructor1) - Multi1.add_multi_constructor('tag:yaml.org,2002:', myconstructor1) - - data = yaml.load(input, Loader=Multi1) - if verbose: - print('Multi1:') - print(data) - print(native) - assert(data == native) - - - # default multi constructor for all tags - Multi2.add_multi_constructor(None, myconstructor2) - - data = yaml.load(input, Loader=Multi2) - if verbose: - print('Multi2:') - print(data) - print(native) - assert(data == native) - - -test_multi_constructor.unittest = ['.multi', '.code'] - -if __name__ == '__main__': - import test_appliance - test_appliance.run(globals()) - diff --git a/tests/lib3/test_reader.py b/tests/lib3/test_reader.py deleted file mode 100644 index c07b346..0000000 --- a/tests/lib3/test_reader.py +++ /dev/null @@ -1,34 +0,0 @@ - -import yaml.reader - -def _run_reader(data, verbose): - try: - stream = yaml.reader.Reader(data) - while stream.peek() != '\0': - stream.forward() - except yaml.reader.ReaderError as exc: - if verbose: - print(exc) - else: - raise AssertionError("expected an exception") - -def test_stream_error(error_filename, verbose=False): - _run_reader(open(error_filename, 'rb'), verbose) - _run_reader(open(error_filename, 'rb').read(), verbose) - for encoding in ['utf-8', 'utf-16-le', 'utf-16-be']: - try: - data = open(error_filename, 'rb').read().decode(encoding) - break - except UnicodeDecodeError: - pass - else: - return - _run_reader(data, verbose) - _run_reader(open(error_filename, encoding=encoding), verbose) - -test_stream_error.unittest = ['.stream-error'] - -if __name__ == '__main__': - import test_appliance - test_appliance.run(globals()) - diff --git a/tests/lib3/test_recursive.py b/tests/lib3/test_recursive.py deleted file mode 100644 index 08042c8..0000000 --- a/tests/lib3/test_recursive.py +++ /dev/null @@ -1,51 +0,0 @@ - -import yaml - -class AnInstance: - - def __init__(self, foo, bar): - self.foo = foo - self.bar = bar - - def __repr__(self): - try: - return "%s(foo=%r, bar=%r)" % (self.__class__.__name__, - self.foo, self.bar) - except RuntimeError: - return "%s(foo=..., bar=...)" % self.__class__.__name__ - -class AnInstanceWithState(AnInstance): - - def __getstate__(self): - return {'attributes': [self.foo, self.bar]} - - def __setstate__(self, state): - self.foo, self.bar = state['attributes'] - -def test_recursive(recursive_filename, verbose=False): - context = globals().copy() - exec(open(recursive_filename, 'rb').read(), context) - value1 = context['value'] - output1 = None - value2 = None - output2 = None - try: - output1 = yaml.dump(value1) - value2 = yaml.unsafe_load(output1) - output2 = yaml.dump(value2) - assert output1 == output2, (output1, output2) - finally: - if verbose: - print("VALUE1:", value1) - print("VALUE2:", value2) - print("OUTPUT1:") - print(output1) - print("OUTPUT2:") - print(output2) - -test_recursive.unittest = ['.recursive'] - -if __name__ == '__main__': - import test_appliance - test_appliance.run(globals()) - diff --git a/tests/lib3/test_representer.py b/tests/lib3/test_representer.py deleted file mode 100644 index 10d4a8f..0000000 --- a/tests/lib3/test_representer.py +++ /dev/null @@ -1,43 +0,0 @@ - -import yaml -import test_constructor -import pprint - -def test_representer_types(code_filename, verbose=False): - test_constructor._make_objects() - for allow_unicode in [False, True]: - for encoding in ['utf-8', 'utf-16-be', 'utf-16-le']: - native1 = test_constructor._load_code(open(code_filename, 'rb').read()) - native2 = None - try: - output = yaml.dump(native1, Dumper=test_constructor.MyDumper, - allow_unicode=allow_unicode, encoding=encoding) - native2 = yaml.load(output, Loader=test_constructor.MyLoader) - try: - if native1 == native2: - continue - except TypeError: - pass - value1 = test_constructor._serialize_value(native1) - value2 = test_constructor._serialize_value(native2) - if verbose: - print("SERIALIZED NATIVE1:") - print(value1) - print("SERIALIZED NATIVE2:") - print(value2) - assert value1 == value2, (native1, native2) - finally: - if verbose: - print("NATIVE1:") - pprint.pprint(native1) - print("NATIVE2:") - pprint.pprint(native2) - print("OUTPUT:") - print(output) - -test_representer_types.unittest = ['.code'] - -if __name__ == '__main__': - import test_appliance - test_appliance.run(globals()) - diff --git a/tests/lib3/test_resolver.py b/tests/lib3/test_resolver.py deleted file mode 100644 index f059dab..0000000 --- a/tests/lib3/test_resolver.py +++ /dev/null @@ -1,92 +0,0 @@ - -import yaml -import pprint - -def test_implicit_resolver(data_filename, detect_filename, verbose=False): - correct_tag = None - node = None - try: - correct_tag = open(detect_filename, 'r').read().strip() - node = yaml.compose(open(data_filename, 'rb')) - assert isinstance(node, yaml.SequenceNode), node - for scalar in node.value: - assert isinstance(scalar, yaml.ScalarNode), scalar - assert scalar.tag == correct_tag, (scalar.tag, correct_tag) - finally: - if verbose: - print("CORRECT TAG:", correct_tag) - if hasattr(node, 'value'): - print("CHILDREN:") - pprint.pprint(node.value) - -test_implicit_resolver.unittest = ['.data', '.detect'] - -def _make_path_loader_and_dumper(): - global MyLoader, MyDumper - - class MyLoader(yaml.Loader): - pass - class MyDumper(yaml.Dumper): - pass - - yaml.add_path_resolver('!root', [], - Loader=MyLoader, Dumper=MyDumper) - yaml.add_path_resolver('!root/scalar', [], str, - Loader=MyLoader, Dumper=MyDumper) - yaml.add_path_resolver('!root/key11/key12/*', ['key11', 'key12'], - Loader=MyLoader, Dumper=MyDumper) - yaml.add_path_resolver('!root/key21/1/*', ['key21', 1], - Loader=MyLoader, Dumper=MyDumper) - yaml.add_path_resolver('!root/key31/*/*/key14/map', ['key31', None, None, 'key14'], dict, - Loader=MyLoader, Dumper=MyDumper) - - return MyLoader, MyDumper - -def _convert_node(node): - if isinstance(node, yaml.ScalarNode): - return (node.tag, node.value) - elif isinstance(node, yaml.SequenceNode): - value = [] - for item in node.value: - value.append(_convert_node(item)) - return (node.tag, value) - elif isinstance(node, yaml.MappingNode): - value = [] - for key, item in node.value: - value.append((_convert_node(key), _convert_node(item))) - return (node.tag, value) - -def test_path_resolver_loader(data_filename, path_filename, verbose=False): - _make_path_loader_and_dumper() - nodes1 = list(yaml.compose_all(open(data_filename, 'rb').read(), Loader=MyLoader)) - nodes2 = list(yaml.compose_all(open(path_filename, 'rb').read())) - try: - for node1, node2 in zip(nodes1, nodes2): - data1 = _convert_node(node1) - data2 = _convert_node(node2) - assert data1 == data2, (data1, data2) - finally: - if verbose: - print(yaml.serialize_all(nodes1)) - -test_path_resolver_loader.unittest = ['.data', '.path'] - -def test_path_resolver_dumper(data_filename, path_filename, verbose=False): - _make_path_loader_and_dumper() - for filename in [data_filename, path_filename]: - output = yaml.serialize_all(yaml.compose_all(open(filename, 'rb')), Dumper=MyDumper) - if verbose: - print(output) - nodes1 = yaml.compose_all(output) - nodes2 = yaml.compose_all(open(data_filename, 'rb')) - for node1, node2 in zip(nodes1, nodes2): - data1 = _convert_node(node1) - data2 = _convert_node(node2) - assert data1 == data2, (data1, data2) - -test_path_resolver_dumper.unittest = ['.data', '.path'] - -if __name__ == '__main__': - import test_appliance - test_appliance.run(globals()) - diff --git a/tests/lib3/test_sort_keys.py b/tests/lib3/test_sort_keys.py deleted file mode 100644 index f3f8a74..0000000 --- a/tests/lib3/test_sort_keys.py +++ /dev/null @@ -1,29 +0,0 @@ -import yaml -import pprint -import sys - -def test_sort_keys(input_filename, sorted_filename, verbose=False): - input = open(input_filename, 'rb').read().decode('utf-8') - sorted = open(sorted_filename, 'rb').read().decode('utf-8') - data = yaml.load(input, Loader=yaml.FullLoader) - dump_sorted = yaml.dump(data, default_flow_style=False, sort_keys=True) - dump_unsorted = yaml.dump(data, default_flow_style=False, sort_keys=False) - dump_unsorted_safe = yaml.dump(data, default_flow_style=False, sort_keys=False, Dumper=yaml.SafeDumper) - if verbose: - print("INPUT:") - print(input) - print("DATA:") - print(data) - - assert dump_sorted == sorted - - if sys.version_info>=(3,7): - assert dump_unsorted == input - assert dump_unsorted_safe == input - -test_sort_keys.unittest = ['.sort', '.sorted'] - -if __name__ == '__main__': - import test_appliance - test_appliance.run(globals()) - diff --git a/tests/lib3/test_structure.py b/tests/lib3/test_structure.py deleted file mode 100644 index 6d6f59d..0000000 --- a/tests/lib3/test_structure.py +++ /dev/null @@ -1,187 +0,0 @@ - -import yaml, canonical -import pprint - -def _convert_structure(loader): - if loader.check_event(yaml.ScalarEvent): - event = loader.get_event() - if event.tag or event.anchor or event.value: - return True - else: - return None - elif loader.check_event(yaml.SequenceStartEvent): - loader.get_event() - sequence = [] - while not loader.check_event(yaml.SequenceEndEvent): - sequence.append(_convert_structure(loader)) - loader.get_event() - return sequence - elif loader.check_event(yaml.MappingStartEvent): - loader.get_event() - mapping = [] - while not loader.check_event(yaml.MappingEndEvent): - key = _convert_structure(loader) - value = _convert_structure(loader) - mapping.append((key, value)) - loader.get_event() - return mapping - elif loader.check_event(yaml.AliasEvent): - loader.get_event() - return '*' - else: - loader.get_event() - return '?' - -def test_structure(data_filename, structure_filename, verbose=False): - nodes1 = [] - nodes2 = eval(open(structure_filename, 'r').read()) - try: - loader = yaml.Loader(open(data_filename, 'rb')) - while loader.check_event(): - if loader.check_event(yaml.StreamStartEvent, yaml.StreamEndEvent, - yaml.DocumentStartEvent, yaml.DocumentEndEvent): - loader.get_event() - continue - nodes1.append(_convert_structure(loader)) - if len(nodes1) == 1: - nodes1 = nodes1[0] - assert nodes1 == nodes2, (nodes1, nodes2) - finally: - if verbose: - print("NODES1:") - pprint.pprint(nodes1) - print("NODES2:") - pprint.pprint(nodes2) - -test_structure.unittest = ['.data', '.structure'] - -def _compare_events(events1, events2, full=False): - assert len(events1) == len(events2), (len(events1), len(events2)) - for event1, event2 in zip(events1, events2): - assert event1.__class__ == event2.__class__, (event1, event2) - if isinstance(event1, yaml.AliasEvent) and full: - assert event1.anchor == event2.anchor, (event1, event2) - if isinstance(event1, (yaml.ScalarEvent, yaml.CollectionStartEvent)): - if (event1.tag not in [None, '!'] and event2.tag not in [None, '!']) or full: - assert event1.tag == event2.tag, (event1, event2) - if isinstance(event1, yaml.ScalarEvent): - assert event1.value == event2.value, (event1, event2) - -def test_parser(data_filename, canonical_filename, verbose=False): - events1 = None - events2 = None - try: - events1 = list(yaml.parse(open(data_filename, 'rb'))) - events2 = list(yaml.canonical_parse(open(canonical_filename, 'rb'))) - _compare_events(events1, events2) - finally: - if verbose: - print("EVENTS1:") - pprint.pprint(events1) - print("EVENTS2:") - pprint.pprint(events2) - -test_parser.unittest = ['.data', '.canonical'] - -def test_parser_on_canonical(canonical_filename, verbose=False): - events1 = None - events2 = None - try: - events1 = list(yaml.parse(open(canonical_filename, 'rb'))) - events2 = list(yaml.canonical_parse(open(canonical_filename, 'rb'))) - _compare_events(events1, events2, full=True) - finally: - if verbose: - print("EVENTS1:") - pprint.pprint(events1) - print("EVENTS2:") - pprint.pprint(events2) - -test_parser_on_canonical.unittest = ['.canonical'] - -def _compare_nodes(node1, node2): - assert node1.__class__ == node2.__class__, (node1, node2) - assert node1.tag == node2.tag, (node1, node2) - if isinstance(node1, yaml.ScalarNode): - assert node1.value == node2.value, (node1, node2) - else: - assert len(node1.value) == len(node2.value), (node1, node2) - for item1, item2 in zip(node1.value, node2.value): - if not isinstance(item1, tuple): - item1 = (item1,) - item2 = (item2,) - for subnode1, subnode2 in zip(item1, item2): - _compare_nodes(subnode1, subnode2) - -def test_composer(data_filename, canonical_filename, verbose=False): - nodes1 = None - nodes2 = None - try: - nodes1 = list(yaml.compose_all(open(data_filename, 'rb'))) - nodes2 = list(yaml.canonical_compose_all(open(canonical_filename, 'rb'))) - assert len(nodes1) == len(nodes2), (len(nodes1), len(nodes2)) - for node1, node2 in zip(nodes1, nodes2): - _compare_nodes(node1, node2) - finally: - if verbose: - print("NODES1:") - pprint.pprint(nodes1) - print("NODES2:") - pprint.pprint(nodes2) - -test_composer.unittest = ['.data', '.canonical'] - -def _make_loader(): - global MyLoader - - class MyLoader(yaml.Loader): - def construct_sequence(self, node): - return tuple(yaml.Loader.construct_sequence(self, node)) - def construct_mapping(self, node): - pairs = self.construct_pairs(node) - pairs.sort(key=(lambda i: str(i))) - return pairs - def construct_undefined(self, node): - return self.construct_scalar(node) - - MyLoader.add_constructor('tag:yaml.org,2002:map', MyLoader.construct_mapping) - MyLoader.add_constructor(None, MyLoader.construct_undefined) - -def _make_canonical_loader(): - global MyCanonicalLoader - - class MyCanonicalLoader(yaml.CanonicalLoader): - def construct_sequence(self, node): - return tuple(yaml.CanonicalLoader.construct_sequence(self, node)) - def construct_mapping(self, node): - pairs = self.construct_pairs(node) - pairs.sort(key=(lambda i: str(i))) - return pairs - def construct_undefined(self, node): - return self.construct_scalar(node) - - MyCanonicalLoader.add_constructor('tag:yaml.org,2002:map', MyCanonicalLoader.construct_mapping) - MyCanonicalLoader.add_constructor(None, MyCanonicalLoader.construct_undefined) - -def test_constructor(data_filename, canonical_filename, verbose=False): - _make_loader() - _make_canonical_loader() - native1 = None - native2 = None - try: - native1 = list(yaml.load_all(open(data_filename, 'rb'), Loader=MyLoader)) - native2 = list(yaml.load_all(open(canonical_filename, 'rb'), Loader=MyCanonicalLoader)) - assert native1 == native2, (native1, native2) - finally: - if verbose: - print("NATIVE1:") - pprint.pprint(native1) - print("NATIVE2:") - pprint.pprint(native2) - -test_constructor.unittest = ['.data', '.canonical'] - -if __name__ == '__main__': - import test_appliance - test_appliance.run(globals()) - diff --git a/tests/lib3/test_tokens.py b/tests/lib3/test_tokens.py deleted file mode 100644 index 828945a..0000000 --- a/tests/lib3/test_tokens.py +++ /dev/null @@ -1,77 +0,0 @@ - -import yaml -import pprint - -# Tokens mnemonic: -# directive: % -# document_start: --- -# document_end: ... -# alias: * -# anchor: & -# tag: ! -# scalar _ -# block_sequence_start: [[ -# block_mapping_start: {{ -# block_end: ]} -# flow_sequence_start: [ -# flow_sequence_end: ] -# flow_mapping_start: { -# flow_mapping_end: } -# entry: , -# key: ? -# value: : - -_replaces = { - yaml.DirectiveToken: '%', - yaml.DocumentStartToken: '---', - yaml.DocumentEndToken: '...', - yaml.AliasToken: '*', - yaml.AnchorToken: '&', - yaml.TagToken: '!', - yaml.ScalarToken: '_', - yaml.BlockSequenceStartToken: '[[', - yaml.BlockMappingStartToken: '{{', - yaml.BlockEndToken: ']}', - yaml.FlowSequenceStartToken: '[', - yaml.FlowSequenceEndToken: ']', - yaml.FlowMappingStartToken: '{', - yaml.FlowMappingEndToken: '}', - yaml.BlockEntryToken: ',', - yaml.FlowEntryToken: ',', - yaml.KeyToken: '?', - yaml.ValueToken: ':', -} - -def test_tokens(data_filename, tokens_filename, verbose=False): - tokens1 = [] - tokens2 = open(tokens_filename, 'r').read().split() - try: - for token in yaml.scan(open(data_filename, 'rb')): - if not isinstance(token, (yaml.StreamStartToken, yaml.StreamEndToken)): - tokens1.append(_replaces[token.__class__]) - finally: - if verbose: - print("TOKENS1:", ' '.join(tokens1)) - print("TOKENS2:", ' '.join(tokens2)) - assert len(tokens1) == len(tokens2), (tokens1, tokens2) - for token1, token2 in zip(tokens1, tokens2): - assert token1 == token2, (token1, token2) - -test_tokens.unittest = ['.data', '.tokens'] - -def test_scanner(data_filename, canonical_filename, verbose=False): - for filename in [data_filename, canonical_filename]: - tokens = [] - try: - for token in yaml.scan(open(filename, 'rb')): - tokens.append(token.__class__.__name__) - finally: - if verbose: - pprint.pprint(tokens) - -test_scanner.unittest = ['.data', '.canonical'] - -if __name__ == '__main__': - import test_appliance - test_appliance.run(globals()) - diff --git a/tests/lib3/test_yaml.py b/tests/lib3/test_yaml.py deleted file mode 100644 index 352cd8d..0000000 --- a/tests/lib3/test_yaml.py +++ /dev/null @@ -1,20 +0,0 @@ - -from test_mark import * -from test_reader import * -from test_canonical import * -from test_tokens import * -from test_structure import * -from test_errors import * -from test_resolver import * -from test_constructor import * -from test_emitter import * -from test_representer import * -from test_recursive import * -from test_input_output import * -from test_sort_keys import * -from test_multi_constructor import * - -if __name__ == '__main__': - import test_appliance - test_appliance.run(globals()) - diff --git a/tests/lib3/test_yaml_ext.py b/tests/lib3/test_yaml_ext.py deleted file mode 100644 index 264df0d..0000000 --- a/tests/lib3/test_yaml_ext.py +++ /dev/null @@ -1,292 +0,0 @@ - -import yaml._yaml, yaml -import types, pprint, tempfile, sys, os - -yaml.PyBaseLoader = yaml.BaseLoader -yaml.PySafeLoader = yaml.SafeLoader -yaml.PyLoader = yaml.Loader -yaml.PyBaseDumper = yaml.BaseDumper -yaml.PySafeDumper = yaml.SafeDumper -yaml.PyDumper = yaml.Dumper - -old_scan = yaml.scan -def new_scan(stream, Loader=yaml.CLoader): - return old_scan(stream, Loader) - -old_parse = yaml.parse -def new_parse(stream, Loader=yaml.CLoader): - return old_parse(stream, Loader) - -old_compose = yaml.compose -def new_compose(stream, Loader=yaml.CLoader): - return old_compose(stream, Loader) - -old_compose_all = yaml.compose_all -def new_compose_all(stream, Loader=yaml.CLoader): - return old_compose_all(stream, Loader) - -old_load = yaml.load -def new_load(stream, Loader=yaml.CLoader): - return old_load(stream, Loader) - -old_load_all = yaml.load_all -def new_load_all(stream, Loader=yaml.CLoader): - return old_load_all(stream, Loader) - -old_safe_load = yaml.safe_load -def new_safe_load(stream): - return old_load(stream, yaml.CSafeLoader) - -old_safe_load_all = yaml.safe_load_all -def new_safe_load_all(stream): - return old_load_all(stream, yaml.CSafeLoader) - -old_emit = yaml.emit -def new_emit(events, stream=None, Dumper=yaml.CDumper, **kwds): - return old_emit(events, stream, Dumper, **kwds) - -old_serialize = yaml.serialize -def new_serialize(node, stream, Dumper=yaml.CDumper, **kwds): - return old_serialize(node, stream, Dumper, **kwds) - -old_serialize_all = yaml.serialize_all -def new_serialize_all(nodes, stream=None, Dumper=yaml.CDumper, **kwds): - return old_serialize_all(nodes, stream, Dumper, **kwds) - -old_dump = yaml.dump -def new_dump(data, stream=None, Dumper=yaml.CDumper, **kwds): - return old_dump(data, stream, Dumper, **kwds) - -old_dump_all = yaml.dump_all -def new_dump_all(documents, stream=None, Dumper=yaml.CDumper, **kwds): - return old_dump_all(documents, stream, Dumper, **kwds) - -old_safe_dump = yaml.safe_dump -def new_safe_dump(data, stream=None, **kwds): - return old_dump(data, stream, yaml.CSafeDumper, **kwds) - -old_safe_dump_all = yaml.safe_dump_all -def new_safe_dump_all(documents, stream=None, **kwds): - return old_dump_all(documents, stream, yaml.CSafeDumper, **kwds) - -def _set_up(): - yaml.BaseLoader = yaml.CBaseLoader - yaml.SafeLoader = yaml.CSafeLoader - yaml.Loader = yaml.CLoader - yaml.BaseDumper = yaml.CBaseDumper - yaml.SafeDumper = yaml.CSafeDumper - yaml.Dumper = yaml.CDumper - yaml.scan = new_scan - yaml.parse = new_parse - yaml.compose = new_compose - yaml.compose_all = new_compose_all - yaml.load = new_load - yaml.load_all = new_load_all - yaml.safe_load = new_safe_load - yaml.safe_load_all = new_safe_load_all - yaml.emit = new_emit - yaml.serialize = new_serialize - yaml.serialize_all = new_serialize_all - yaml.dump = new_dump - yaml.dump_all = new_dump_all - yaml.safe_dump = new_safe_dump - yaml.safe_dump_all = new_safe_dump_all - -def _tear_down(): - yaml.BaseLoader = yaml.PyBaseLoader - yaml.SafeLoader = yaml.PySafeLoader - yaml.Loader = yaml.PyLoader - yaml.BaseDumper = yaml.PyBaseDumper - yaml.SafeDumper = yaml.PySafeDumper - yaml.Dumper = yaml.PyDumper - yaml.scan = old_scan - yaml.parse = old_parse - yaml.compose = old_compose - yaml.compose_all = old_compose_all - yaml.load = old_load - yaml.load_all = old_load_all - yaml.safe_load = old_safe_load - yaml.safe_load_all = old_safe_load_all - yaml.emit = old_emit - yaml.serialize = old_serialize - yaml.serialize_all = old_serialize_all - yaml.dump = old_dump - yaml.dump_all = old_dump_all - yaml.safe_dump = old_safe_dump - yaml.safe_dump_all = old_safe_dump_all - -def test_c_version(verbose=False): - if verbose: - print(_yaml.get_version()) - print(_yaml.get_version_string()) - assert ("%s.%s.%s" % yaml._yaml.get_version()) == yaml._yaml.get_version_string(), \ - (_yaml.get_version(), yaml._yaml.get_version_string()) - -def test_deprecate_yaml_module(): - import _yaml - assert _yaml.__package__ == '' - assert isinstance(_yaml.get_version(), str) - -def _compare_scanners(py_data, c_data, verbose): - py_tokens = list(yaml.scan(py_data, Loader=yaml.PyLoader)) - c_tokens = [] - try: - for token in yaml.scan(c_data, Loader=yaml.CLoader): - c_tokens.append(token) - assert len(py_tokens) == len(c_tokens), (len(py_tokens), len(c_tokens)) - for py_token, c_token in zip(py_tokens, c_tokens): - assert py_token.__class__ == c_token.__class__, (py_token, c_token) - if hasattr(py_token, 'value'): - assert py_token.value == c_token.value, (py_token, c_token) - if isinstance(py_token, yaml.StreamEndToken): - continue - py_start = (py_token.start_mark.index, py_token.start_mark.line, py_token.start_mark.column) - py_end = (py_token.end_mark.index, py_token.end_mark.line, py_token.end_mark.column) - c_start = (c_token.start_mark.index, c_token.start_mark.line, c_token.start_mark.column) - c_end = (c_token.end_mark.index, c_token.end_mark.line, c_token.end_mark.column) - assert py_start == c_start, (py_start, c_start) - assert py_end == c_end, (py_end, c_end) - finally: - if verbose: - print("PY_TOKENS:") - pprint.pprint(py_tokens) - print("C_TOKENS:") - pprint.pprint(c_tokens) - -def test_c_scanner(data_filename, canonical_filename, verbose=False): - _compare_scanners(open(data_filename, 'rb'), - open(data_filename, 'rb'), verbose) - _compare_scanners(open(data_filename, 'rb').read(), - open(data_filename, 'rb').read(), verbose) - _compare_scanners(open(canonical_filename, 'rb'), - open(canonical_filename, 'rb'), verbose) - _compare_scanners(open(canonical_filename, 'rb').read(), - open(canonical_filename, 'rb').read(), verbose) - -test_c_scanner.unittest = ['.data', '.canonical'] -test_c_scanner.skip = ['.skip-ext'] - -def _compare_parsers(py_data, c_data, verbose): - py_events = list(yaml.parse(py_data, Loader=yaml.PyLoader)) - c_events = [] - try: - for event in yaml.parse(c_data, Loader=yaml.CLoader): - c_events.append(event) - assert len(py_events) == len(c_events), (len(py_events), len(c_events)) - for py_event, c_event in zip(py_events, c_events): - for attribute in ['__class__', 'anchor', 'tag', 'implicit', - 'value', 'explicit', 'version', 'tags']: - py_value = getattr(py_event, attribute, None) - c_value = getattr(c_event, attribute, None) - assert py_value == c_value, (py_event, c_event, attribute) - finally: - if verbose: - print("PY_EVENTS:") - pprint.pprint(py_events) - print("C_EVENTS:") - pprint.pprint(c_events) - -def test_c_parser(data_filename, canonical_filename, verbose=False): - _compare_parsers(open(data_filename, 'rb'), - open(data_filename, 'rb'), verbose) - _compare_parsers(open(data_filename, 'rb').read(), - open(data_filename, 'rb').read(), verbose) - _compare_parsers(open(canonical_filename, 'rb'), - open(canonical_filename, 'rb'), verbose) - _compare_parsers(open(canonical_filename, 'rb').read(), - open(canonical_filename, 'rb').read(), verbose) - -test_c_parser.unittest = ['.data', '.canonical'] -test_c_parser.skip = ['.skip-ext'] - -def _compare_emitters(data, verbose): - events = list(yaml.parse(data, Loader=yaml.PyLoader)) - c_data = yaml.emit(events, Dumper=yaml.CDumper) - if verbose: - print(c_data) - py_events = list(yaml.parse(c_data, Loader=yaml.PyLoader)) - c_events = list(yaml.parse(c_data, Loader=yaml.CLoader)) - try: - assert len(events) == len(py_events), (len(events), len(py_events)) - assert len(events) == len(c_events), (len(events), len(c_events)) - for event, py_event, c_event in zip(events, py_events, c_events): - for attribute in ['__class__', 'anchor', 'tag', 'implicit', - 'value', 'explicit', 'version', 'tags']: - value = getattr(event, attribute, None) - py_value = getattr(py_event, attribute, None) - c_value = getattr(c_event, attribute, None) - if attribute == 'tag' and value in [None, '!'] \ - and py_value in [None, '!'] and c_value in [None, '!']: - continue - if attribute == 'explicit' and (py_value or c_value): - continue - assert value == py_value, (event, py_event, attribute) - assert value == c_value, (event, c_event, attribute) - finally: - if verbose: - print("EVENTS:") - pprint.pprint(events) - print("PY_EVENTS:") - pprint.pprint(py_events) - print("C_EVENTS:") - pprint.pprint(c_events) - -def test_c_emitter(data_filename, canonical_filename, verbose=False): - _compare_emitters(open(data_filename, 'rb').read(), verbose) - _compare_emitters(open(canonical_filename, 'rb').read(), verbose) - -test_c_emitter.unittest = ['.data', '.canonical'] -test_c_emitter.skip = ['.skip-ext'] - -def test_large_file(verbose=False): - SIZE_LINE = 24 - SIZE_ITERATION = 0 - SIZE_FILE = 31 - if sys.maxsize <= 2**32: - return - if os.environ.get('PYYAML_TEST_GROUP', '') != 'all': - return - with tempfile.TemporaryFile() as temp_file: - for i in range(2**(SIZE_FILE-SIZE_ITERATION-SIZE_LINE) + 1): - temp_file.write(bytes(('-' + (' ' * (2**SIZE_LINE-4))+ '{}\n')*(2**SIZE_ITERATION), 'utf-8')) - temp_file.seek(0) - yaml.load(temp_file, Loader=yaml.CLoader) - -test_large_file.unittest = None - -def wrap_ext_function(function): - def wrapper(*args, **kwds): - _set_up() - try: - function(*args, **kwds) - finally: - _tear_down() - wrapper.__name__ = '%s_ext' % function.__name__ - wrapper.unittest = function.unittest - wrapper.skip = getattr(function, 'skip', [])+['.skip-ext'] - return wrapper - -def wrap_ext(collections): - functions = [] - if not isinstance(collections, list): - collections = [collections] - for collection in collections: - if not isinstance(collection, dict): - collection = vars(collection) - for key in sorted(collection): - value = collection[key] - if isinstance(value, types.FunctionType) and hasattr(value, 'unittest'): - functions.append(wrap_ext_function(value)) - for function in functions: - assert function.__name__ not in globals() - globals()[function.__name__] = function - -import test_tokens, test_structure, test_errors, test_resolver, test_constructor, \ - test_emitter, test_representer, test_recursive, test_input_output -wrap_ext([test_tokens, test_structure, test_errors, test_resolver, test_constructor, - test_emitter, test_representer, test_recursive, test_input_output]) - -if __name__ == '__main__': - import test_appliance - test_appliance.run(globals()) - diff --git a/tox.ini b/tox.ini index 8694f6d..d736a6c 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py27,pypy,py35,py36,py37,py38,py39 +envlist = pypy3,py36,py37,py38,py39,py310 [testenv] deps = diff --git a/yaml/_yaml.h b/yaml/_yaml.h index 21fd6a9..e3984c4 100644 --- a/yaml/_yaml.h +++ b/yaml/_yaml.h @@ -1,18 +1,8 @@ #include -#if PY_MAJOR_VERSION < 3 - -#define PyUnicode_FromString(s) PyUnicode_DecodeUTF8((s), strlen(s), "strict") - -#else - -#define PyString_CheckExact PyBytes_CheckExact -#define PyString_AS_STRING PyBytes_AS_STRING -#define PyString_GET_SIZE PyBytes_GET_SIZE -#define PyString_FromStringAndSize PyBytes_FromStringAndSize - -#endif +#define PyUnicode_FromYamlString(s) PyUnicode_FromString((const char *)(void *)(s)) +#define PyBytes_AS_Yaml_STRING(s) ((yaml_char_t *)PyBytes_AS_STRING(s)) #ifdef _MSC_VER /* MS Visual C++ 6.0 */ #if _MSC_VER == 1200 diff --git a/yaml/_yaml.pxd b/yaml/_yaml.pxd index 7937c9d..713244d 100644 --- a/yaml/_yaml.pxd +++ b/yaml/_yaml.pxd @@ -2,18 +2,25 @@ cdef extern from "_yaml.h": void malloc(int l) - void memcpy(char *d, char *s, int l) + void memcpy(void *d, void *s, int l) int strlen(char *s) int PyString_CheckExact(object o) int PyUnicode_CheckExact(object o) char *PyString_AS_STRING(object o) - int PyString_GET_SIZE(object o) - object PyString_FromStringAndSize(char *v, int l) object PyUnicode_FromString(char *u) object PyUnicode_DecodeUTF8(char *u, int s, char *e) object PyUnicode_AsUTF8String(object o) int PY_MAJOR_VERSION + ctypedef unsigned char yaml_char_t + + object PyUnicode_FromYamlString(void *u) + yaml_char_t *PyBytes_AS_Yaml_STRING(object o) + const char *PyBytes_AS_STRING(object o) + int PyBytes_CheckExact(object o) + int PyBytes_GET_SIZE(object o) + object PyBytes_FromStringAndSize(char *v, int l) + ctypedef enum: SIZEOF_VOID_P ctypedef enum yaml_encoding_t: @@ -85,10 +92,10 @@ cdef extern from "_yaml.h": YAML_MAPPING_START_EVENT YAML_MAPPING_END_EVENT - ctypedef int yaml_read_handler_t(void *data, char *buffer, + ctypedef int yaml_read_handler_t(void *data, unsigned char *buffer, size_t size, size_t *size_read) except 0 - ctypedef int yaml_write_handler_t(void *data, char *buffer, + ctypedef int yaml_write_handler_t(void *data, unsigned char *buffer, size_t size) except 0 ctypedef struct yaml_mark_t: @@ -99,8 +106,8 @@ cdef extern from "_yaml.h": int major int minor ctypedef struct yaml_tag_directive_t: - char *handle - char *prefix + yaml_char_t *handle + yaml_char_t *prefix ctypedef struct _yaml_token_stream_start_data_t: yaml_encoding_t encoding @@ -208,23 +215,23 @@ cdef extern from "_yaml.h": int implicit) int yaml_document_end_event_initialize(yaml_event_t *event, int implicit) - int yaml_alias_event_initialize(yaml_event_t *event, char *anchor) + int yaml_alias_event_initialize(yaml_event_t *event, yaml_char_t *anchor) int yaml_scalar_event_initialize(yaml_event_t *event, - char *anchor, char *tag, char *value, size_t length, + yaml_char_t *anchor, yaml_char_t *tag, yaml_char_t *value, int length, int plain_implicit, int quoted_implicit, yaml_scalar_style_t style) int yaml_sequence_start_event_initialize(yaml_event_t *event, - char *anchor, char *tag, int implicit, yaml_sequence_style_t style) + yaml_char_t *anchor, yaml_char_t *tag, int implicit, yaml_sequence_style_t style) int yaml_sequence_end_event_initialize(yaml_event_t *event) int yaml_mapping_start_event_initialize(yaml_event_t *event, - char *anchor, char *tag, int implicit, yaml_mapping_style_t style) + yaml_char_t *anchor, yaml_char_t *tag, int implicit, yaml_mapping_style_t style) int yaml_mapping_end_event_initialize(yaml_event_t *event) void yaml_event_delete(yaml_event_t *event) int yaml_parser_initialize(yaml_parser_t *parser) void yaml_parser_delete(yaml_parser_t *parser) void yaml_parser_set_input_string(yaml_parser_t *parser, - char *input, size_t size) + const unsigned char *input, size_t size) void yaml_parser_set_input(yaml_parser_t *parser, yaml_read_handler_t *handler, void *data) void yaml_parser_set_encoding(yaml_parser_t *parser, diff --git a/yaml/_yaml.pyx b/yaml/_yaml.pyx index ff4efe8..e3e93e2 100644 --- a/yaml/_yaml.pyx +++ b/yaml/_yaml.pyx @@ -2,12 +2,9 @@ import yaml def get_version_string(): - cdef char *value + cdef const char *value value = yaml_get_version_string() - if PY_MAJOR_VERSION < 3: - return value - else: - return PyUnicode_FromString(value) + return PyUnicode_FromString(value) def get_version(): cdef int major, minor, patch @@ -275,10 +272,7 @@ cdef class CParser: try: self.stream_name = stream.name except AttributeError: - if PY_MAJOR_VERSION < 3: - self.stream_name = '' - else: - self.stream_name = u'' + self.stream_name = u'' self.stream_cache = None self.stream_cache_len = 0 self.stream_cache_pos = 0 @@ -286,23 +280,14 @@ cdef class CParser: else: if PyUnicode_CheckExact(stream) != 0: stream = PyUnicode_AsUTF8String(stream) - if PY_MAJOR_VERSION < 3: - self.stream_name = '' - else: - self.stream_name = u'' + self.stream_name = u'' self.unicode_source = 1 else: - if PY_MAJOR_VERSION < 3: - self.stream_name = '' - else: - self.stream_name = u'' - if PyString_CheckExact(stream) == 0: - if PY_MAJOR_VERSION < 3: - raise TypeError("a string or stream input is required") - else: - raise TypeError(u"a string or stream input is required") + self.stream_name = u'' + if PyBytes_CheckExact(stream) == 0: + raise TypeError(u"a string or stream input is required") self.stream = stream - yaml_parser_set_input_string(&self.parser, PyString_AS_STRING(stream), PyString_GET_SIZE(stream)) + yaml_parser_set_input_string(&self.parser, PyBytes_AS_Yaml_STRING(stream), PyBytes_GET_SIZE(stream)) self.current_token = None self.current_event = None self.anchors = {} @@ -318,12 +303,8 @@ cdef class CParser: if self.parser.error == YAML_MEMORY_ERROR: return MemoryError elif self.parser.error == YAML_READER_ERROR: - if PY_MAJOR_VERSION < 3: - return ReaderError(self.stream_name, self.parser.problem_offset, - self.parser.problem_value, '?', self.parser.problem) - else: - return ReaderError(self.stream_name, self.parser.problem_offset, - self.parser.problem_value, u'?', PyUnicode_FromString(self.parser.problem)) + return ReaderError(self.stream_name, self.parser.problem_offset, + self.parser.problem_value, u'?', PyUnicode_FromString(self.parser.problem)) elif self.parser.error == YAML_SCANNER_ERROR \ or self.parser.error == YAML_PARSER_ERROR: context_mark = None @@ -340,22 +321,13 @@ cdef class CParser: self.parser.problem_mark.column, None, None) context = None if self.parser.context != NULL: - if PY_MAJOR_VERSION < 3: - context = self.parser.context - else: - context = PyUnicode_FromString(self.parser.context) - if PY_MAJOR_VERSION < 3: - problem = self.parser.problem - else: - problem = PyUnicode_FromString(self.parser.problem) + context = PyUnicode_FromString(self.parser.context) + problem = PyUnicode_FromString(self.parser.problem) if self.parser.error == YAML_SCANNER_ERROR: return ScannerError(context, context_mark, problem, problem_mark) else: return ParserError(context, context_mark, problem, problem_mark) - if PY_MAJOR_VERSION < 3: - raise ValueError("no parser error") - else: - raise ValueError(u"no parser error") + raise ValueError(u"no parser error") def raw_scan(self): cdef yaml_token_t token @@ -414,8 +386,8 @@ cdef class CParser: token.data.version_directive.minor), start_mark, end_mark) elif token.type == YAML_TAG_DIRECTIVE_TOKEN: - handle = PyUnicode_FromString(token.data.tag_directive.handle) - prefix = PyUnicode_FromString(token.data.tag_directive.prefix) + handle = PyUnicode_FromYamlString(token.data.tag_directive.handle) + prefix = PyUnicode_FromYamlString(token.data.tag_directive.prefix) return DirectiveToken(u"TAG", (handle, prefix), start_mark, end_mark) elif token.type == YAML_DOCUMENT_START_TOKEN: @@ -445,19 +417,19 @@ cdef class CParser: elif token.type == YAML_VALUE_TOKEN: return ValueToken(start_mark, end_mark) elif token.type == YAML_ALIAS_TOKEN: - value = PyUnicode_FromString(token.data.alias.value) + value = PyUnicode_FromYamlString(token.data.alias.value) return AliasToken(value, start_mark, end_mark) elif token.type == YAML_ANCHOR_TOKEN: - value = PyUnicode_FromString(token.data.anchor.value) + value = PyUnicode_FromYamlString(token.data.anchor.value) return AnchorToken(value, start_mark, end_mark) elif token.type == YAML_TAG_TOKEN: - handle = PyUnicode_FromString(token.data.tag.handle) - suffix = PyUnicode_FromString(token.data.tag.suffix) + handle = PyUnicode_FromYamlString(token.data.tag.handle) + suffix = PyUnicode_FromYamlString(token.data.tag.suffix) if not handle: handle = None return TagToken((handle, suffix), start_mark, end_mark) elif token.type == YAML_SCALAR_TOKEN: - value = PyUnicode_DecodeUTF8(token.data.scalar.value, + value = PyUnicode_DecodeUTF8(token.data.scalar.value, token.data.scalar.length, 'strict') plain = False style = None @@ -475,10 +447,7 @@ cdef class CParser: return ScalarToken(value, plain, start_mark, end_mark, style) else: - if PY_MAJOR_VERSION < 3: - raise ValueError("unknown token type") - else: - raise ValueError(u"unknown token type") + raise ValueError(u"unknown token type") def get_token(self): if self.current_token is not None: @@ -571,8 +540,8 @@ cdef class CParser: tags = {} tag_directive = event.data.document_start.tag_directives.start while tag_directive != event.data.document_start.tag_directives.end: - handle = PyUnicode_FromString(tag_directive.handle) - prefix = PyUnicode_FromString(tag_directive.prefix) + handle = PyUnicode_FromYamlString(tag_directive.handle) + prefix = PyUnicode_FromYamlString(tag_directive.prefix) tags[handle] = prefix tag_directive = tag_directive+1 return DocumentStartEvent(start_mark, end_mark, @@ -583,16 +552,16 @@ cdef class CParser: explicit = True return DocumentEndEvent(start_mark, end_mark, explicit) elif event.type == YAML_ALIAS_EVENT: - anchor = PyUnicode_FromString(event.data.alias.anchor) + anchor = PyUnicode_FromYamlString(event.data.alias.anchor) return AliasEvent(anchor, start_mark, end_mark) elif event.type == YAML_SCALAR_EVENT: anchor = None if event.data.scalar.anchor != NULL: - anchor = PyUnicode_FromString(event.data.scalar.anchor) + anchor = PyUnicode_FromYamlString(event.data.scalar.anchor) tag = None if event.data.scalar.tag != NULL: - tag = PyUnicode_FromString(event.data.scalar.tag) - value = PyUnicode_DecodeUTF8(event.data.scalar.value, + tag = PyUnicode_FromYamlString(event.data.scalar.tag) + value = PyUnicode_DecodeUTF8(event.data.scalar.value, event.data.scalar.length, 'strict') plain_implicit = False if event.data.scalar.plain_implicit == 1: @@ -617,10 +586,10 @@ cdef class CParser: elif event.type == YAML_SEQUENCE_START_EVENT: anchor = None if event.data.sequence_start.anchor != NULL: - anchor = PyUnicode_FromString(event.data.sequence_start.anchor) + anchor = PyUnicode_FromYamlString(event.data.sequence_start.anchor) tag = None if event.data.sequence_start.tag != NULL: - tag = PyUnicode_FromString(event.data.sequence_start.tag) + tag = PyUnicode_FromYamlString(event.data.sequence_start.tag) implicit = False if event.data.sequence_start.implicit == 1: implicit = True @@ -634,10 +603,10 @@ cdef class CParser: elif event.type == YAML_MAPPING_START_EVENT: anchor = None if event.data.mapping_start.anchor != NULL: - anchor = PyUnicode_FromString(event.data.mapping_start.anchor) + anchor = PyUnicode_FromYamlString(event.data.mapping_start.anchor) tag = None if event.data.mapping_start.tag != NULL: - tag = PyUnicode_FromString(event.data.mapping_start.tag) + tag = PyUnicode_FromYamlString(event.data.mapping_start.tag) implicit = False if event.data.mapping_start.implicit == 1: implicit = True @@ -653,10 +622,7 @@ cdef class CParser: elif event.type == YAML_MAPPING_END_EVENT: return MappingEndEvent(start_mark, end_mark) else: - if PY_MAJOR_VERSION < 3: - raise ValueError("unknown event type") - else: - raise ValueError(u"unknown event type") + raise ValueError(u"unknown event type") def get_event(self): if self.current_event is not None: @@ -712,12 +678,8 @@ cdef class CParser: self.parsed_event.start_mark.line, self.parsed_event.start_mark.column, None, None) - if PY_MAJOR_VERSION < 3: - raise ComposerError("expected a single document in the stream", - document.start_mark, "but found another document", mark) - else: - raise ComposerError(u"expected a single document in the stream", - document.start_mark, u"but found another document", mark) + raise ComposerError(u"expected a single document in the stream", + document.start_mark, u"but found another document", mark) return document cdef object _compose_document(self): @@ -731,29 +693,26 @@ cdef class CParser: cdef object _compose_node(self, object parent, object index): self._parse_next_event() if self.parsed_event.type == YAML_ALIAS_EVENT: - anchor = PyUnicode_FromString(self.parsed_event.data.alias.anchor) + anchor = PyUnicode_FromYamlString(self.parsed_event.data.alias.anchor) if anchor not in self.anchors: mark = Mark(self.stream_name, self.parsed_event.start_mark.index, self.parsed_event.start_mark.line, self.parsed_event.start_mark.column, None, None) - if PY_MAJOR_VERSION < 3: - raise ComposerError(None, None, "found undefined alias", mark) - else: - raise ComposerError(None, None, u"found undefined alias", mark) + raise ComposerError(None, None, u"found undefined alias", mark) yaml_event_delete(&self.parsed_event) return self.anchors[anchor] anchor = None if self.parsed_event.type == YAML_SCALAR_EVENT \ and self.parsed_event.data.scalar.anchor != NULL: - anchor = PyUnicode_FromString(self.parsed_event.data.scalar.anchor) + anchor = PyUnicode_FromYamlString(self.parsed_event.data.scalar.anchor) elif self.parsed_event.type == YAML_SEQUENCE_START_EVENT \ and self.parsed_event.data.sequence_start.anchor != NULL: - anchor = PyUnicode_FromString(self.parsed_event.data.sequence_start.anchor) + anchor = PyUnicode_FromYamlString(self.parsed_event.data.sequence_start.anchor) elif self.parsed_event.type == YAML_MAPPING_START_EVENT \ and self.parsed_event.data.mapping_start.anchor != NULL: - anchor = PyUnicode_FromString(self.parsed_event.data.mapping_start.anchor) + anchor = PyUnicode_FromYamlString(self.parsed_event.data.mapping_start.anchor) if anchor is not None: if anchor in self.anchors: mark = Mark(self.stream_name, @@ -761,12 +720,8 @@ cdef class CParser: self.parsed_event.start_mark.line, self.parsed_event.start_mark.column, None, None) - if PY_MAJOR_VERSION < 3: - raise ComposerError("found duplicate anchor; first occurrence", - self.anchors[anchor].start_mark, "second occurrence", mark) - else: - raise ComposerError(u"found duplicate anchor; first occurrence", - self.anchors[anchor].start_mark, u"second occurrence", mark) + raise ComposerError(u"found duplicate anchor; first occurrence", + self.anchors[anchor].start_mark, u"second occurrence", mark) self.descend_resolver(parent, index) if self.parsed_event.type == YAML_SCALAR_EVENT: node = self._compose_scalar_node(anchor) @@ -788,7 +743,7 @@ cdef class CParser: self.parsed_event.end_mark.line, self.parsed_event.end_mark.column, None, None) - value = PyUnicode_DecodeUTF8(self.parsed_event.data.scalar.value, + value = PyUnicode_DecodeUTF8(self.parsed_event.data.scalar.value, self.parsed_event.data.scalar.length, 'strict') plain_implicit = False if self.parsed_event.data.scalar.plain_implicit == 1: @@ -801,7 +756,7 @@ cdef class CParser: and self.parsed_event.data.scalar.tag[1] == c'\0'): tag = self.resolve(ScalarNode, value, (plain_implicit, quoted_implicit)) else: - tag = PyUnicode_FromString(self.parsed_event.data.scalar.tag) + tag = PyUnicode_FromYamlString(self.parsed_event.data.scalar.tag) style = None if self.parsed_event.data.scalar.style == YAML_PLAIN_SCALAR_STYLE: style = u'' @@ -834,7 +789,7 @@ cdef class CParser: and self.parsed_event.data.sequence_start.tag[1] == c'\0'): tag = self.resolve(SequenceNode, None, implicit) else: - tag = PyUnicode_FromString(self.parsed_event.data.sequence_start.tag) + tag = PyUnicode_FromYamlString(self.parsed_event.data.sequence_start.tag) flow_style = None if self.parsed_event.data.sequence_start.style == YAML_FLOW_SEQUENCE_STYLE: flow_style = True @@ -873,7 +828,7 @@ cdef class CParser: and self.parsed_event.data.mapping_start.tag[1] == c'\0'): tag = self.resolve(MappingNode, None, implicit) else: - tag = PyUnicode_FromString(self.parsed_event.data.mapping_start.tag) + tag = PyUnicode_FromYamlString(self.parsed_event.data.mapping_start.tag) flow_style = None if self.parsed_event.data.mapping_start.style == YAML_FLOW_MAPPING_STYLE: flow_style = True @@ -905,7 +860,7 @@ cdef class CParser: raise error return 1 -cdef int input_handler(void *data, char *buffer, size_t size, size_t *read) except 0: +cdef int input_handler(void *data, unsigned char *buffer, size_t size, size_t *read) except 0: cdef CParser parser parser = data if parser.stream_cache is None: @@ -913,18 +868,15 @@ cdef int input_handler(void *data, char *buffer, size_t size, size_t *read) exce if PyUnicode_CheckExact(value) != 0: value = PyUnicode_AsUTF8String(value) parser.unicode_source = 1 - if PyString_CheckExact(value) == 0: - if PY_MAJOR_VERSION < 3: - raise TypeError("a string value is expected") - else: - raise TypeError(u"a string value is expected") + if PyBytes_CheckExact(value) == 0: + raise TypeError(u"a string value is expected") parser.stream_cache = value parser.stream_cache_pos = 0 - parser.stream_cache_len = PyString_GET_SIZE(value) - if (parser.stream_cache_len - parser.stream_cache_pos) < size: + parser.stream_cache_len = PyBytes_GET_SIZE(value) + if (parser.stream_cache_len - parser.stream_cache_pos) < size: size = parser.stream_cache_len - parser.stream_cache_pos if size > 0: - memcpy(buffer, PyString_AS_STRING(parser.stream_cache) + memcpy(buffer, PyBytes_AS_STRING(parser.stream_cache) + parser.stream_cache_pos, size) read[0] = size parser.stream_cache_pos += size @@ -957,12 +909,8 @@ cdef class CEmitter: raise MemoryError self.stream = stream self.dump_unicode = 0 - if PY_MAJOR_VERSION < 3: - if getattr3(stream, 'encoding', None): - self.dump_unicode = 1 - else: - if hasattr(stream, u'encoding'): - self.dump_unicode = 1 + if hasattr(stream, u'encoding'): + self.dump_unicode = 1 self.use_encoding = encoding yaml_emitter_set_output(&self.emitter, output_handler, self) if canonical: @@ -1003,15 +951,9 @@ cdef class CEmitter: if self.emitter.error == YAML_MEMORY_ERROR: return MemoryError elif self.emitter.error == YAML_EMITTER_ERROR: - if PY_MAJOR_VERSION < 3: - problem = self.emitter.problem - else: - problem = PyUnicode_FromString(self.emitter.problem) + problem = PyUnicode_FromString(self.emitter.problem) return EmitterError(problem) - if PY_MAJOR_VERSION < 3: - raise ValueError("no emitter error") - else: - raise ValueError(u"no emitter error") + raise ValueError(u"no emitter error") cdef int _object_to_event(self, object event_object, yaml_event_t *event) except 0: cdef yaml_encoding_t encoding @@ -1023,9 +965,9 @@ cdef class CEmitter: cdef int implicit cdef int plain_implicit cdef int quoted_implicit - cdef char *anchor - cdef char *tag - cdef char *value + cdef yaml_char_t *anchor + cdef yaml_char_t *tag + cdef yaml_char_t *value cdef int length cdef yaml_scalar_style_t scalar_style cdef yaml_sequence_style_t sequence_style @@ -1054,10 +996,7 @@ cdef class CEmitter: tag_directives_end = NULL if event_object.tags: if len(event_object.tags) > 128: - if PY_MAJOR_VERSION < 3: - raise ValueError("too many tags") - else: - raise ValueError(u"too many tags") + raise ValueError(u"too many tags") tag_directives_start = tag_directives_value tag_directives_end = tag_directives_value cache = [] @@ -1066,21 +1005,15 @@ cdef class CEmitter: if PyUnicode_CheckExact(handle): handle = PyUnicode_AsUTF8String(handle) cache.append(handle) - if not PyString_CheckExact(handle): - if PY_MAJOR_VERSION < 3: - raise TypeError("tag handle must be a string") - else: - raise TypeError(u"tag handle must be a string") - tag_directives_end.handle = PyString_AS_STRING(handle) + if not PyBytes_CheckExact(handle): + raise TypeError(u"tag handle must be a string") + tag_directives_end.handle = PyBytes_AS_Yaml_STRING(handle) if PyUnicode_CheckExact(prefix): prefix = PyUnicode_AsUTF8String(prefix) cache.append(prefix) - if not PyString_CheckExact(prefix): - if PY_MAJOR_VERSION < 3: - raise TypeError("tag prefix must be a string") - else: - raise TypeError(u"tag prefix must be a string") - tag_directives_end.prefix = PyString_AS_STRING(prefix) + if not PyBytes_CheckExact(prefix): + raise TypeError(u"tag prefix must be a string") + tag_directives_end.prefix = PyBytes_AS_Yaml_STRING(prefix) tag_directives_end = tag_directives_end+1 implicit = 1 if event_object.explicit: @@ -1098,12 +1031,9 @@ cdef class CEmitter: anchor_object = event_object.anchor if PyUnicode_CheckExact(anchor_object): anchor_object = PyUnicode_AsUTF8String(anchor_object) - if not PyString_CheckExact(anchor_object): - if PY_MAJOR_VERSION < 3: - raise TypeError("anchor must be a string") - else: - raise TypeError(u"anchor must be a string") - anchor = PyString_AS_STRING(anchor_object) + if not PyBytes_CheckExact(anchor_object): + raise TypeError(u"anchor must be a string") + anchor = PyBytes_AS_Yaml_STRING(anchor_object) if yaml_alias_event_initialize(event, anchor) == 0: raise MemoryError elif event_class is ScalarEvent: @@ -1112,33 +1042,24 @@ cdef class CEmitter: if anchor_object is not None: if PyUnicode_CheckExact(anchor_object): anchor_object = PyUnicode_AsUTF8String(anchor_object) - if not PyString_CheckExact(anchor_object): - if PY_MAJOR_VERSION < 3: - raise TypeError("anchor must be a string") - else: - raise TypeError(u"anchor must be a string") - anchor = PyString_AS_STRING(anchor_object) + if not PyBytes_CheckExact(anchor_object): + raise TypeError(u"anchor must be a string") + anchor = PyBytes_AS_Yaml_STRING(anchor_object) tag = NULL tag_object = event_object.tag if tag_object is not None: if PyUnicode_CheckExact(tag_object): tag_object = PyUnicode_AsUTF8String(tag_object) - if not PyString_CheckExact(tag_object): - if PY_MAJOR_VERSION < 3: - raise TypeError("tag must be a string") - else: - raise TypeError(u"tag must be a string") - tag = PyString_AS_STRING(tag_object) + if not PyBytes_CheckExact(tag_object): + raise TypeError(u"tag must be a string") + tag = PyBytes_AS_Yaml_STRING(tag_object) value_object = event_object.value if PyUnicode_CheckExact(value_object): value_object = PyUnicode_AsUTF8String(value_object) - if not PyString_CheckExact(value_object): - if PY_MAJOR_VERSION < 3: - raise TypeError("value must be a string") - else: - raise TypeError(u"value must be a string") - value = PyString_AS_STRING(value_object) - length = PyString_GET_SIZE(value_object) + if not PyBytes_CheckExact(value_object): + raise TypeError(u"value must be a string") + value = PyBytes_AS_Yaml_STRING(value_object) + length = PyBytes_GET_SIZE(value_object) plain_implicit = 0 quoted_implicit = 0 if event_object.implicit is not None: @@ -1163,23 +1084,17 @@ cdef class CEmitter: if anchor_object is not None: if PyUnicode_CheckExact(anchor_object): anchor_object = PyUnicode_AsUTF8String(anchor_object) - if not PyString_CheckExact(anchor_object): - if PY_MAJOR_VERSION < 3: - raise TypeError("anchor must be a string") - else: - raise TypeError(u"anchor must be a string") - anchor = PyString_AS_STRING(anchor_object) + if not PyBytes_CheckExact(anchor_object): + raise TypeError(u"anchor must be a string") + anchor = PyBytes_AS_Yaml_STRING(anchor_object) tag = NULL tag_object = event_object.tag if tag_object is not None: if PyUnicode_CheckExact(tag_object): tag_object = PyUnicode_AsUTF8String(tag_object) - if not PyString_CheckExact(tag_object): - if PY_MAJOR_VERSION < 3: - raise TypeError("tag must be a string") - else: - raise TypeError(u"tag must be a string") - tag = PyString_AS_STRING(tag_object) + if not PyBytes_CheckExact(tag_object): + raise TypeError(u"tag must be a string") + tag = PyBytes_AS_Yaml_STRING(tag_object) implicit = 0 if event_object.implicit: implicit = 1 @@ -1195,23 +1110,17 @@ cdef class CEmitter: if anchor_object is not None: if PyUnicode_CheckExact(anchor_object): anchor_object = PyUnicode_AsUTF8String(anchor_object) - if not PyString_CheckExact(anchor_object): - if PY_MAJOR_VERSION < 3: - raise TypeError("anchor must be a string") - else: - raise TypeError(u"anchor must be a string") - anchor = PyString_AS_STRING(anchor_object) + if not PyBytes_CheckExact(anchor_object): + raise TypeError(u"anchor must be a string") + anchor = PyBytes_AS_Yaml_STRING(anchor_object) tag = NULL tag_object = event_object.tag if tag_object is not None: if PyUnicode_CheckExact(tag_object): tag_object = PyUnicode_AsUTF8String(tag_object) - if not PyString_CheckExact(tag_object): - if PY_MAJOR_VERSION < 3: - raise TypeError("tag must be a string") - else: - raise TypeError(u"tag must be a string") - tag = PyString_AS_STRING(tag_object) + if not PyBytes_CheckExact(tag_object): + raise TypeError(u"tag must be a string") + tag = PyBytes_AS_Yaml_STRING(tag_object) implicit = 0 if event_object.implicit: implicit = 1 @@ -1226,10 +1135,7 @@ cdef class CEmitter: elif event_class is MappingEndEvent: yaml_mapping_end_event_initialize(event) else: - if PY_MAJOR_VERSION < 3: - raise TypeError("invalid event %s" % event_object) - else: - raise TypeError(u"invalid event %s" % event_object) + raise TypeError(u"invalid event %s" % event_object) return 1 def emit(self, event_object): @@ -1259,23 +1165,14 @@ cdef class CEmitter: raise error self.closed = 0 elif self.closed == 1: - if PY_MAJOR_VERSION < 3: - raise SerializerError("serializer is closed") - else: - raise SerializerError(u"serializer is closed") + raise SerializerError(u"serializer is closed") else: - if PY_MAJOR_VERSION < 3: - raise SerializerError("serializer is already opened") - else: - raise SerializerError(u"serializer is already opened") + raise SerializerError(u"serializer is already opened") def close(self): cdef yaml_event_t event if self.closed == -1: - if PY_MAJOR_VERSION < 3: - raise SerializerError("serializer is not opened") - else: - raise SerializerError(u"serializer is not opened") + raise SerializerError(u"serializer is not opened") elif self.closed == 0: yaml_stream_end_event_initialize(&event) if yaml_emitter_emit(&self.emitter, &event) == 0: @@ -1291,15 +1188,9 @@ cdef class CEmitter: cdef yaml_tag_directive_t *tag_directives_start cdef yaml_tag_directive_t *tag_directives_end if self.closed == -1: - if PY_MAJOR_VERSION < 3: - raise SerializerError("serializer is not opened") - else: - raise SerializerError(u"serializer is not opened") + raise SerializerError(u"serializer is not opened") elif self.closed == 1: - if PY_MAJOR_VERSION < 3: - raise SerializerError("serializer is closed") - else: - raise SerializerError(u"serializer is closed") + raise SerializerError(u"serializer is closed") cache = [] version_directive = NULL if self.use_version: @@ -1310,10 +1201,7 @@ cdef class CEmitter: tag_directives_end = NULL if self.use_tags: if len(self.use_tags) > 128: - if PY_MAJOR_VERSION < 3: - raise ValueError("too many tags") - else: - raise ValueError(u"too many tags") + raise ValueError(u"too many tags") tag_directives_start = tag_directives_value tag_directives_end = tag_directives_value for handle in self.use_tags: @@ -1321,21 +1209,15 @@ cdef class CEmitter: if PyUnicode_CheckExact(handle): handle = PyUnicode_AsUTF8String(handle) cache.append(handle) - if not PyString_CheckExact(handle): - if PY_MAJOR_VERSION < 3: - raise TypeError("tag handle must be a string") - else: - raise TypeError(u"tag handle must be a string") - tag_directives_end.handle = PyString_AS_STRING(handle) + if not PyBytes_CheckExact(handle): + raise TypeError(u"tag handle must be a string") + tag_directives_end.handle = PyBytes_AS_Yaml_STRING(handle) if PyUnicode_CheckExact(prefix): prefix = PyUnicode_AsUTF8String(prefix) cache.append(prefix) - if not PyString_CheckExact(prefix): - if PY_MAJOR_VERSION < 3: - raise TypeError("tag prefix must be a string") - else: - raise TypeError(u"tag prefix must be a string") - tag_directives_end.prefix = PyString_AS_STRING(prefix) + if not PyBytes_CheckExact(prefix): + raise TypeError(u"tag prefix must be a string") + tag_directives_end.prefix = PyBytes_AS_Yaml_STRING(prefix) tag_directives_end = tag_directives_end+1 if yaml_document_start_event_initialize(&event, version_directive, tag_directives_start, tag_directives_end, @@ -1376,9 +1258,9 @@ cdef class CEmitter: cdef int implicit cdef int plain_implicit cdef int quoted_implicit - cdef char *anchor - cdef char *tag - cdef char *value + cdef yaml_char_t *anchor + cdef yaml_char_t *tag + cdef yaml_char_t *value cdef int length cdef int item_index cdef yaml_scalar_style_t scalar_style @@ -1389,12 +1271,9 @@ cdef class CEmitter: if anchor_object is not None: if PyUnicode_CheckExact(anchor_object): anchor_object = PyUnicode_AsUTF8String(anchor_object) - if not PyString_CheckExact(anchor_object): - if PY_MAJOR_VERSION < 3: - raise TypeError("anchor must be a string") - else: - raise TypeError(u"anchor must be a string") - anchor = PyString_AS_STRING(anchor_object) + if not PyBytes_CheckExact(anchor_object): + raise TypeError(u"anchor must be a string") + anchor = PyBytes_AS_Yaml_STRING(anchor_object) if node in self.serialized_nodes: if yaml_alias_event_initialize(&event, anchor) == 0: raise MemoryError @@ -1417,22 +1296,16 @@ cdef class CEmitter: if tag_object is not None: if PyUnicode_CheckExact(tag_object): tag_object = PyUnicode_AsUTF8String(tag_object) - if not PyString_CheckExact(tag_object): - if PY_MAJOR_VERSION < 3: - raise TypeError("tag must be a string") - else: - raise TypeError(u"tag must be a string") - tag = PyString_AS_STRING(tag_object) + if not PyBytes_CheckExact(tag_object): + raise TypeError(u"tag must be a string") + tag = PyBytes_AS_Yaml_STRING(tag_object) value_object = node.value if PyUnicode_CheckExact(value_object): value_object = PyUnicode_AsUTF8String(value_object) - if not PyString_CheckExact(value_object): - if PY_MAJOR_VERSION < 3: - raise TypeError("value must be a string") - else: - raise TypeError(u"value must be a string") - value = PyString_AS_STRING(value_object) - length = PyString_GET_SIZE(value_object) + if not PyBytes_CheckExact(value_object): + raise TypeError(u"value must be a string") + value = PyBytes_AS_Yaml_STRING(value_object) + length = PyBytes_GET_SIZE(value_object) style_object = node.style scalar_style = YAML_PLAIN_SCALAR_STYLE if style_object == "'" or style_object == u"'": @@ -1458,12 +1331,9 @@ cdef class CEmitter: if tag_object is not None: if PyUnicode_CheckExact(tag_object): tag_object = PyUnicode_AsUTF8String(tag_object) - if not PyString_CheckExact(tag_object): - if PY_MAJOR_VERSION < 3: - raise TypeError("tag must be a string") - else: - raise TypeError(u"tag must be a string") - tag = PyString_AS_STRING(tag_object) + if not PyBytes_CheckExact(tag_object): + raise TypeError(u"tag must be a string") + tag = PyBytes_AS_Yaml_STRING(tag_object) sequence_style = YAML_BLOCK_SEQUENCE_STYLE if node.flow_style: sequence_style = YAML_FLOW_SEQUENCE_STYLE @@ -1490,12 +1360,9 @@ cdef class CEmitter: if tag_object is not None: if PyUnicode_CheckExact(tag_object): tag_object = PyUnicode_AsUTF8String(tag_object) - if not PyString_CheckExact(tag_object): - if PY_MAJOR_VERSION < 3: - raise TypeError("tag must be a string") - else: - raise TypeError(u"tag must be a string") - tag = PyString_AS_STRING(tag_object) + if not PyBytes_CheckExact(tag_object): + raise TypeError(u"tag must be a string") + tag = PyBytes_AS_Yaml_STRING(tag_object) mapping_style = YAML_BLOCK_MAPPING_STYLE if node.flow_style: mapping_style = YAML_FLOW_MAPPING_STYLE @@ -1515,11 +1382,13 @@ cdef class CEmitter: self.ascend_resolver() return 1 -cdef int output_handler(void *data, char *buffer, size_t size) except 0: +cdef int output_handler(void *data, unsigned char *bufferu, size_t size) except 0: cdef CEmitter emitter + cdef char *buffer + buffer = bufferu emitter = data if emitter.dump_unicode == 0: - value = PyString_FromStringAndSize(buffer, size) + value = PyBytes_FromStringAndSize(buffer, size) else: value = PyUnicode_DecodeUTF8(buffer, size, 'strict') emitter.stream.write(value)