From de09f5861b927c7e289b30145e07ec4fa9834797 Mon Sep 17 00:00:00 2001 From: Tom Prince Date: Tue, 30 Oct 2018 18:01:06 +0000 Subject: [PATCH] Bug 1492128: Vendor pathlib2==2.3.2; r=firefox-build-system-reviewers,gps Differential Revision: https://phabricator.services.mozilla.com/D10145 --HG-- extra : moz-landing-system : lando --- build/virtualenv_packages.txt | 2 + third_party/python/pathlib2/CHANGELOG.rst | 137 + third_party/python/pathlib2/LICENSE.rst | 23 + third_party/python/pathlib2/MANIFEST.in | 10 + third_party/python/pathlib2/PKG-INFO | 72 + third_party/python/pathlib2/README.rst | 52 + third_party/python/pathlib2/VERSION | 1 + .../python/pathlib2/pathlib2/__init__.py | 1670 ++++++++++++ third_party/python/pathlib2/requirements.txt | 3 + third_party/python/pathlib2/setup.cfg | 13 + third_party/python/pathlib2/setup.py | 49 + .../python/pathlib2/tests/test_pathlib2.py | 2401 +++++++++++++++++ third_party/python/requirements.in | 1 + third_party/python/requirements.txt | 16 + third_party/python/scandir/LICENSE.txt | 27 + third_party/python/scandir/MANIFEST.in | 6 + third_party/python/scandir/PKG-INFO | 238 ++ third_party/python/scandir/README.rst | 211 ++ third_party/python/scandir/_scandir.c | 1833 +++++++++++++ third_party/python/scandir/benchmark.py | 192 ++ third_party/python/scandir/osdefs.h | 48 + third_party/python/scandir/scandir.py | 693 +++++ third_party/python/scandir/setup.cfg | 4 + third_party/python/scandir/setup.py | 80 + third_party/python/scandir/test/run_tests.py | 25 + .../python/scandir/test/test_scandir.py | 320 +++ third_party/python/scandir/test/test_walk.py | 213 ++ third_party/python/scandir/winreparse.h | 53 + 28 files changed, 8393 insertions(+) create mode 100644 third_party/python/pathlib2/CHANGELOG.rst create mode 100644 third_party/python/pathlib2/LICENSE.rst create mode 100644 third_party/python/pathlib2/MANIFEST.in create mode 100644 third_party/python/pathlib2/PKG-INFO create mode 100644 third_party/python/pathlib2/README.rst create mode 100644 third_party/python/pathlib2/VERSION create mode 100644 third_party/python/pathlib2/pathlib2/__init__.py create mode 100644 third_party/python/pathlib2/requirements.txt create mode 100644 third_party/python/pathlib2/setup.cfg create mode 100644 third_party/python/pathlib2/setup.py create mode 100644 third_party/python/pathlib2/tests/test_pathlib2.py create mode 100644 third_party/python/scandir/LICENSE.txt create mode 100644 third_party/python/scandir/MANIFEST.in create mode 100644 third_party/python/scandir/PKG-INFO create mode 100644 third_party/python/scandir/README.rst create mode 100644 third_party/python/scandir/_scandir.c create mode 100644 third_party/python/scandir/benchmark.py create mode 100644 third_party/python/scandir/osdefs.h create mode 100644 third_party/python/scandir/scandir.py create mode 100644 third_party/python/scandir/setup.cfg create mode 100644 third_party/python/scandir/setup.py create mode 100644 third_party/python/scandir/test/run_tests.py create mode 100644 third_party/python/scandir/test/test_scandir.py create mode 100644 third_party/python/scandir/test/test_walk.py create mode 100644 third_party/python/scandir/winreparse.h diff --git a/build/virtualenv_packages.txt b/build/virtualenv_packages.txt index 68b667fb3bbc..ee44ab56a5db 100644 --- a/build/virtualenv_packages.txt +++ b/build/virtualenv_packages.txt @@ -20,6 +20,7 @@ mozilla.pth:third_party/python/funcsigs mozilla.pth:third_party/python/futures mozilla.pth:third_party/python/more-itertools mozilla.pth:third_party/python/mozilla-version +mozilla.pth:third_party/python/pathlib2 mozilla.pth:third_party/python/gyp/pylib mozilla.pth:third_party/python/python-hglib mozilla.pth:third_party/python/pluggy @@ -33,6 +34,7 @@ mozilla.pth:third_party/python/pystache mozilla.pth:third_party/python/pyyaml/lib mozilla.pth:third_party/python/requests mozilla.pth:third_party/python/requests-unixsocket +mozilla.pth:third_party/python/scandir mozilla.pth:third_party/python/slugid mozilla.pth:third_party/python/py mozilla.pth:third_party/python/pytest/src diff --git a/third_party/python/pathlib2/CHANGELOG.rst b/third_party/python/pathlib2/CHANGELOG.rst new file mode 100644 index 000000000000..8739b2dc73a2 --- /dev/null +++ b/third_party/python/pathlib2/CHANGELOG.rst @@ -0,0 +1,137 @@ +History +------- + +Version 2.3.2 +^^^^^^^^^^^^^ + +- Hotfix for broken setup.py. + +Version 2.3.1 +^^^^^^^^^^^^^ + +- Fix tests for systems where filesystem encoding only supports ascii + (reported by yurivict, fixed with help of honnibal, see issue #30). + +- Use modern setuptools syntax for specifying conditional scandir + dependency (see issue #31). + +- Remove legacy use of support module from old pathlib module (see + issue #39). This fixes the tests for Python 3.6. + +- Drop the "from __future__ import unicode_literals" and -Qnew tests + as it introduced subtle bugs in the tests, and maintaining separate + test modules for these legacy features seems not worth the effort. + +- Drop Python 3.2 support, as scandir no longer supports it. + +Version 2.3.0 +^^^^^^^^^^^^^ + +- Sync with upstream pathlib from CPython 3.6.1 (7d1017d). + +Version 2.2.1 +^^^^^^^^^^^^^ + +- Fix conditional scandir dependency in wheel (reported by AvdN, see + issue #20 and pull request #21). + +Version 2.2.0 +^^^^^^^^^^^^^ + +- Sync with upstream pathlib from CPython 3.5.2 and 3.6.0: fix various + exceptions, empty glob pattern, scandir, __fspath__. + +- Support unicode strings to be used to construct paths in Python 2 + (reported by native-api, see issue #13 and pull request #15). + +Version 2.1.0 +^^^^^^^^^^^^^ + +- Sync with upstream pathlib from CPython 3.5.0: gethomedir, home, + expanduser. + +Version 2.0.1 +^^^^^^^^^^^^^ + +- Fix TypeError exceptions in write_bytes and write_text (contributed + by Emanuele Gaifas, see pull request #2). + +Version 2.0 +^^^^^^^^^^^ + +- Sync with upstream pathlib from CPython: read_text, write_text, + read_bytes, write_bytes, __enter__, __exit__, samefile. +- Use travis and appveyor for continuous integration. +- Fixed some bugs in test code. + +Version 1.0.1 +^^^^^^^^^^^^^ + +- Pull request #4: Python 2.6 compatibility by eevee. + +Version 1.0 +^^^^^^^^^^^ + +This version brings ``pathlib`` up to date with the official Python 3.4 +release, and also fixes a couple of 2.7-specific issues. + +- Python issue #20765: Add missing documentation for PurePath.with_name() + and PurePath.with_suffix(). +- Fix test_mkdir_parents when the working directory has additional bits + set (such as the setgid or sticky bits). +- Python issue #20111: pathlib.Path.with_suffix() now sanity checks the + given suffix. +- Python issue #19918: Fix PurePath.relative_to() under Windows. +- Python issue #19921: When Path.mkdir() is called with parents=True, any + missing parent is created with the default permissions, ignoring the mode + argument (mimicking the POSIX "mkdir -p" command). +- Python issue #19887: Improve the Path.resolve() algorithm to support + certain symlink chains. +- Make pathlib usable under Python 2.7 with unicode pathnames (only pure + ASCII, though). +- Issue #21: fix TypeError under Python 2.7 when using new division. +- Add tox support for easier testing. + +Version 0.97 +^^^^^^^^^^^^ + +This version brings ``pathlib`` up to date with the final API specified +in :pep:`428`. The changes are too long to list here, it is recommended +to read the `documentation `_. + +.. warning:: + The API in this version is partially incompatible with pathlib 0.8 and + earlier. Be sure to check your code for possible breakage! + +Version 0.8 +^^^^^^^^^^^ + +- Add PurePath.name and PurePath.anchor. +- Add Path.owner and Path.group. +- Add Path.replace(). +- Add Path.as_uri(). +- Issue #10: when creating a file with Path.open(), don't set the executable + bit. +- Issue #11: fix comparisons with non-Path objects. + +Version 0.7 +^^^^^^^^^^^ + +- Add '**' (recursive) patterns to Path.glob(). +- Fix openat() support after the API refactoring in Python 3.3 beta1. +- Add a *target_is_directory* argument to Path.symlink_to() + +Version 0.6 +^^^^^^^^^^^ + +- Add Path.is_file() and Path.is_symlink() +- Add Path.glob() and Path.rglob() +- Add PurePath.match() + +Version 0.5 +^^^^^^^^^^^ + +- Add Path.mkdir(). +- Add Python 2.7 compatibility by Michele Lacchia. +- Make parent() raise ValueError when the level is greater than the path + length. diff --git a/third_party/python/pathlib2/LICENSE.rst b/third_party/python/pathlib2/LICENSE.rst new file mode 100644 index 000000000000..1715d3d7a2cd --- /dev/null +++ b/third_party/python/pathlib2/LICENSE.rst @@ -0,0 +1,23 @@ +The MIT License (MIT) + +Copyright (c) 2014-2017 Matthias C. M. Troffaes +Copyright (c) 2012-2014 Antoine Pitrou and contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/third_party/python/pathlib2/MANIFEST.in b/third_party/python/pathlib2/MANIFEST.in new file mode 100644 index 000000000000..2f03369dc681 --- /dev/null +++ b/third_party/python/pathlib2/MANIFEST.in @@ -0,0 +1,10 @@ +include *.py +recursive-include pathlib2 *.py +recursive-include tests *.py +include *.rst +include VERSION +include requirements.txt +exclude .travis.yml +exclude appveyor.yml +exclude codecov.yml +prune appveyor diff --git a/third_party/python/pathlib2/PKG-INFO b/third_party/python/pathlib2/PKG-INFO new file mode 100644 index 000000000000..f32d0caec2b2 --- /dev/null +++ b/third_party/python/pathlib2/PKG-INFO @@ -0,0 +1,72 @@ +Metadata-Version: 1.1 +Name: pathlib2 +Version: 2.3.2 +Summary: Object-oriented filesystem paths +Home-page: https://pypi.python.org/pypi/pathlib2/ +Author: Matthias C. M. Troffaes +Author-email: matthias.troffaes@gmail.com +License: MIT +Download-URL: https://pypi.python.org/pypi/pathlib2/ +Description-Content-Type: UNKNOWN +Description: The `old pathlib `_ + module on bitbucket is in bugfix-only mode. + The goal of pathlib2 is to provide a backport of + `standard pathlib `_ + module which tracks the standard library module, + so all the newest features of the standard pathlib can be + used also on older Python versions. + + Download + -------- + + Standalone releases are available on PyPI: + http://pypi.python.org/pypi/pathlib2/ + + Development + ----------- + + The main development takes place in the Python standard library: see + the `Python developer's guide `_. + In particular, new features should be submitted to the + `Python bug tracker `_. + + Issues that occur in this backport, but that do not occur not in the + standard Python pathlib module can be submitted on + the `pathlib2 bug tracker `_. + + Documentation + ------------- + + Refer to the + `standard pathlib `_ + documentation. + + .. |travis| image:: https://travis-ci.org/mcmtroffaes/pathlib2.png?branch=develop + :target: https://travis-ci.org/mcmtroffaes/pathlib2 + :alt: travis-ci + + .. |appveyor| image:: https://ci.appveyor.com/api/projects/status/baddx3rpet2wyi2c?svg=true + :target: https://ci.appveyor.com/project/mcmtroffaes/pathlib2 + :alt: appveyor + + .. |codecov| image:: https://codecov.io/gh/mcmtroffaes/pathlib2/branch/develop/graph/badge.svg + :target: https://codecov.io/gh/mcmtroffaes/pathlib2 + :alt: codecov + + +Platform: UNKNOWN +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: MIT License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 2.6 +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3.3 +Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Topic :: Software Development :: Libraries +Classifier: Topic :: System :: Filesystems diff --git a/third_party/python/pathlib2/README.rst b/third_party/python/pathlib2/README.rst new file mode 100644 index 000000000000..d276fdcd033a --- /dev/null +++ b/third_party/python/pathlib2/README.rst @@ -0,0 +1,52 @@ +pathlib2 +======== + +|appveyor| |travis| |codecov| + +Fork of pathlib aiming to support the full stdlib Python API. + +The `old pathlib `_ +module on bitbucket is in bugfix-only mode. +The goal of pathlib2 is to provide a backport of +`standard pathlib `_ +module which tracks the standard library module, +so all the newest features of the standard pathlib can be +used also on older Python versions. + +Download +-------- + +Standalone releases are available on PyPI: +http://pypi.python.org/pypi/pathlib2/ + +Development +----------- + +The main development takes place in the Python standard library: see +the `Python developer's guide `_. +In particular, new features should be submitted to the +`Python bug tracker `_. + +Issues that occur in this backport, but that do not occur not in the +standard Python pathlib module can be submitted on +the `pathlib2 bug tracker `_. + +Documentation +------------- + +Refer to the +`standard pathlib `_ +documentation. + +.. |travis| image:: https://travis-ci.org/mcmtroffaes/pathlib2.png?branch=develop + :target: https://travis-ci.org/mcmtroffaes/pathlib2 + :alt: travis-ci + +.. |appveyor| image:: https://ci.appveyor.com/api/projects/status/baddx3rpet2wyi2c?svg=true + :target: https://ci.appveyor.com/project/mcmtroffaes/pathlib2 + :alt: appveyor + +.. |codecov| image:: https://codecov.io/gh/mcmtroffaes/pathlib2/branch/develop/graph/badge.svg + :target: https://codecov.io/gh/mcmtroffaes/pathlib2 + :alt: codecov + diff --git a/third_party/python/pathlib2/VERSION b/third_party/python/pathlib2/VERSION new file mode 100644 index 000000000000..f90b1afc082f --- /dev/null +++ b/third_party/python/pathlib2/VERSION @@ -0,0 +1 @@ +2.3.2 diff --git a/third_party/python/pathlib2/pathlib2/__init__.py b/third_party/python/pathlib2/pathlib2/__init__.py new file mode 100644 index 000000000000..2eb41e309e3f --- /dev/null +++ b/third_party/python/pathlib2/pathlib2/__init__.py @@ -0,0 +1,1670 @@ +# Copyright (c) 2014-2017 Matthias C. M. Troffaes +# Copyright (c) 2012-2014 Antoine Pitrou and contributors +# Distributed under the terms of the MIT License. + +import ctypes +import fnmatch +import functools +import io +import ntpath +import os +import posixpath +import re +import six +import sys +from collections import Sequence +from errno import EINVAL, ENOENT, ENOTDIR, EEXIST, EPERM, EACCES +from operator import attrgetter + +from stat import ( + S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO) +try: + from urllib import quote as urlquote_from_bytes +except ImportError: + from urllib.parse import quote_from_bytes as urlquote_from_bytes + + +try: + intern = intern +except NameError: + intern = sys.intern + +supports_symlinks = True +if os.name == 'nt': + import nt + if sys.getwindowsversion()[:2] >= (6, 0) and sys.version_info >= (3, 2): + from nt import _getfinalpathname + else: + supports_symlinks = False + _getfinalpathname = None +else: + nt = None + +try: + from os import scandir as os_scandir +except ImportError: + from scandir import scandir as os_scandir + +__all__ = [ + "PurePath", "PurePosixPath", "PureWindowsPath", + "Path", "PosixPath", "WindowsPath", + ] + +# +# Internals +# + + +def _py2_fsencode(parts): + # py2 => minimal unicode support + assert six.PY2 + return [part.encode('ascii') if isinstance(part, six.text_type) + else part for part in parts] + + +def _try_except_fileexistserror(try_func, except_func, else_func=None): + if sys.version_info >= (3, 3): + try: + try_func() + except FileExistsError as exc: + except_func(exc) + else: + if else_func is not None: + else_func() + else: + try: + try_func() + except EnvironmentError as exc: + if exc.errno != EEXIST: + raise + else: + except_func(exc) + else: + if else_func is not None: + else_func() + + +def _try_except_filenotfounderror(try_func, except_func): + if sys.version_info >= (3, 3): + try: + try_func() + except FileNotFoundError as exc: + except_func(exc) + else: + try: + try_func() + except EnvironmentError as exc: + if exc.errno != ENOENT: + raise + else: + except_func(exc) + + +def _try_except_permissionerror_iter(try_iter, except_iter): + if sys.version_info >= (3, 3): + try: + for x in try_iter(): + yield x + except PermissionError as exc: + for x in except_iter(exc): + yield x + else: + try: + for x in try_iter(): + yield x + except EnvironmentError as exc: + if exc.errno not in (EPERM, EACCES): + raise + else: + for x in except_iter(exc): + yield x + + +def _win32_get_unique_path_id(path): + # get file information, needed for samefile on older Python versions + # see http://timgolden.me.uk/python/win32_how_do_i/ + # see_if_two_files_are_the_same_file.html + from ctypes import POINTER, Structure, WinError + from ctypes.wintypes import DWORD, HANDLE, BOOL + + class FILETIME(Structure): + _fields_ = [("datetime_lo", DWORD), + ("datetime_hi", DWORD), + ] + + class BY_HANDLE_FILE_INFORMATION(Structure): + _fields_ = [("attributes", DWORD), + ("created_at", FILETIME), + ("accessed_at", FILETIME), + ("written_at", FILETIME), + ("volume", DWORD), + ("file_hi", DWORD), + ("file_lo", DWORD), + ("n_links", DWORD), + ("index_hi", DWORD), + ("index_lo", DWORD), + ] + + CreateFile = ctypes.windll.kernel32.CreateFileW + CreateFile.argtypes = [ctypes.c_wchar_p, DWORD, DWORD, ctypes.c_void_p, + DWORD, DWORD, HANDLE] + CreateFile.restype = HANDLE + GetFileInformationByHandle = ( + ctypes.windll.kernel32.GetFileInformationByHandle) + GetFileInformationByHandle.argtypes = [ + HANDLE, POINTER(BY_HANDLE_FILE_INFORMATION)] + GetFileInformationByHandle.restype = BOOL + CloseHandle = ctypes.windll.kernel32.CloseHandle + CloseHandle.argtypes = [HANDLE] + CloseHandle.restype = BOOL + GENERIC_READ = 0x80000000 + FILE_SHARE_READ = 0x00000001 + FILE_FLAG_BACKUP_SEMANTICS = 0x02000000 + OPEN_EXISTING = 3 + if os.path.isdir(path): + flags = FILE_FLAG_BACKUP_SEMANTICS + else: + flags = 0 + hfile = CreateFile(path, GENERIC_READ, FILE_SHARE_READ, + None, OPEN_EXISTING, flags, None) + if hfile == 0xffffffff: + if sys.version_info >= (3, 3): + raise FileNotFoundError(path) + else: + exc = OSError("file not found: path") + exc.errno = ENOENT + raise exc + info = BY_HANDLE_FILE_INFORMATION() + success = GetFileInformationByHandle(hfile, info) + CloseHandle(hfile) + if success == 0: + raise WinError() + return info.volume, info.index_hi, info.index_lo + + +def _is_wildcard_pattern(pat): + # Whether this pattern needs actual matching using fnmatch, or can + # be looked up directly as a file. + return "*" in pat or "?" in pat or "[" in pat + + +class _Flavour(object): + + """A flavour implements a particular (platform-specific) set of path + semantics.""" + + def __init__(self): + self.join = self.sep.join + + def parse_parts(self, parts): + if six.PY2: + parts = _py2_fsencode(parts) + parsed = [] + sep = self.sep + altsep = self.altsep + drv = root = '' + it = reversed(parts) + for part in it: + if not part: + continue + if altsep: + part = part.replace(altsep, sep) + drv, root, rel = self.splitroot(part) + if sep in rel: + for x in reversed(rel.split(sep)): + if x and x != '.': + parsed.append(intern(x)) + else: + if rel and rel != '.': + parsed.append(intern(rel)) + if drv or root: + if not drv: + # If no drive is present, try to find one in the previous + # parts. This makes the result of parsing e.g. + # ("C:", "/", "a") reasonably intuitive. + for part in it: + if not part: + continue + if altsep: + part = part.replace(altsep, sep) + drv = self.splitroot(part)[0] + if drv: + break + break + if drv or root: + parsed.append(drv + root) + parsed.reverse() + return drv, root, parsed + + def join_parsed_parts(self, drv, root, parts, drv2, root2, parts2): + """ + Join the two paths represented by the respective + (drive, root, parts) tuples. Return a new (drive, root, parts) tuple. + """ + if root2: + if not drv2 and drv: + return drv, root2, [drv + root2] + parts2[1:] + elif drv2: + if drv2 == drv or self.casefold(drv2) == self.casefold(drv): + # Same drive => second path is relative to the first + return drv, root, parts + parts2[1:] + else: + # Second path is non-anchored (common case) + return drv, root, parts + parts2 + return drv2, root2, parts2 + + +class _WindowsFlavour(_Flavour): + # Reference for Windows paths can be found at + # http://msdn.microsoft.com/en-us/library/aa365247%28v=vs.85%29.aspx + + sep = '\\' + altsep = '/' + has_drv = True + pathmod = ntpath + + is_supported = (os.name == 'nt') + + drive_letters = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ') + ext_namespace_prefix = '\\\\?\\' + + reserved_names = ( + set(['CON', 'PRN', 'AUX', 'NUL']) | + set(['COM%d' % i for i in range(1, 10)]) | + set(['LPT%d' % i for i in range(1, 10)]) + ) + + # Interesting findings about extended paths: + # - '\\?\c:\a', '//?/c:\a' and '//?/c:/a' are all supported + # but '\\?\c:/a' is not + # - extended paths are always absolute; "relative" extended paths will + # fail. + + def splitroot(self, part, sep=sep): + first = part[0:1] + second = part[1:2] + if (second == sep and first == sep): + # XXX extended paths should also disable the collapsing of "." + # components (according to MSDN docs). + prefix, part = self._split_extended_path(part) + first = part[0:1] + second = part[1:2] + else: + prefix = '' + third = part[2:3] + if (second == sep and first == sep and third != sep): + # is a UNC path: + # vvvvvvvvvvvvvvvvvvvvv root + # \\machine\mountpoint\directory\etc\... + # directory ^^^^^^^^^^^^^^ + index = part.find(sep, 2) + if index != -1: + index2 = part.find(sep, index + 1) + # a UNC path can't have two slashes in a row + # (after the initial two) + if index2 != index + 1: + if index2 == -1: + index2 = len(part) + if prefix: + return prefix + part[1:index2], sep, part[index2 + 1:] + else: + return part[:index2], sep, part[index2 + 1:] + drv = root = '' + if second == ':' and first in self.drive_letters: + drv = part[:2] + part = part[2:] + first = third + if first == sep: + root = first + part = part.lstrip(sep) + return prefix + drv, root, part + + def casefold(self, s): + return s.lower() + + def casefold_parts(self, parts): + return [p.lower() for p in parts] + + def resolve(self, path, strict=False): + s = str(path) + if not s: + return os.getcwd() + previous_s = None + if _getfinalpathname is not None: + if strict: + return self._ext_to_normal(_getfinalpathname(s)) + else: + # End of the path after the first one not found + tail_parts = [] + while True: + try: + s = self._ext_to_normal(_getfinalpathname(s)) + except FileNotFoundError: + previous_s = s + s, tail = os.path.split(s) + tail_parts.append(tail) + if previous_s == s: + return path + else: + return os.path.join(s, *reversed(tail_parts)) + # Means fallback on absolute + return None + + def _split_extended_path(self, s, ext_prefix=ext_namespace_prefix): + prefix = '' + if s.startswith(ext_prefix): + prefix = s[:4] + s = s[4:] + if s.startswith('UNC\\'): + prefix += s[:3] + s = '\\' + s[3:] + return prefix, s + + def _ext_to_normal(self, s): + # Turn back an extended path into a normal DOS-like path + return self._split_extended_path(s)[1] + + def is_reserved(self, parts): + # NOTE: the rules for reserved names seem somewhat complicated + # (e.g. r"..\NUL" is reserved but not r"foo\NUL"). + # We err on the side of caution and return True for paths which are + # not considered reserved by Windows. + if not parts: + return False + if parts[0].startswith('\\\\'): + # UNC paths are never reserved + return False + return parts[-1].partition('.')[0].upper() in self.reserved_names + + def make_uri(self, path): + # Under Windows, file URIs use the UTF-8 encoding. + drive = path.drive + if len(drive) == 2 and drive[1] == ':': + # It's a path on a local drive => 'file:///c:/a/b' + rest = path.as_posix()[2:].lstrip('/') + return 'file:///%s/%s' % ( + drive, urlquote_from_bytes(rest.encode('utf-8'))) + else: + # It's a path on a network drive => 'file://host/share/a/b' + return 'file:' + urlquote_from_bytes( + path.as_posix().encode('utf-8')) + + def gethomedir(self, username): + if 'HOME' in os.environ: + userhome = os.environ['HOME'] + elif 'USERPROFILE' in os.environ: + userhome = os.environ['USERPROFILE'] + elif 'HOMEPATH' in os.environ: + try: + drv = os.environ['HOMEDRIVE'] + except KeyError: + drv = '' + userhome = drv + os.environ['HOMEPATH'] + else: + raise RuntimeError("Can't determine home directory") + + if username: + # Try to guess user home directory. By default all users + # directories are located in the same place and are named by + # corresponding usernames. If current user home directory points + # to nonstandard place, this guess is likely wrong. + if os.environ['USERNAME'] != username: + drv, root, parts = self.parse_parts((userhome,)) + if parts[-1] != os.environ['USERNAME']: + raise RuntimeError("Can't determine home directory " + "for %r" % username) + parts[-1] = username + if drv or root: + userhome = drv + root + self.join(parts[1:]) + else: + userhome = self.join(parts) + return userhome + + +class _PosixFlavour(_Flavour): + sep = '/' + altsep = '' + has_drv = False + pathmod = posixpath + + is_supported = (os.name != 'nt') + + def splitroot(self, part, sep=sep): + if part and part[0] == sep: + stripped_part = part.lstrip(sep) + # According to POSIX path resolution: + # http://pubs.opengroup.org/onlinepubs/009695399/basedefs/ + # xbd_chap04.html#tag_04_11 + # "A pathname that begins with two successive slashes may be + # interpreted in an implementation-defined manner, although more + # than two leading slashes shall be treated as a single slash". + if len(part) - len(stripped_part) == 2: + return '', sep * 2, stripped_part + else: + return '', sep, stripped_part + else: + return '', '', part + + def casefold(self, s): + return s + + def casefold_parts(self, parts): + return parts + + def resolve(self, path, strict=False): + sep = self.sep + accessor = path._accessor + seen = {} + + def _resolve(path, rest): + if rest.startswith(sep): + path = '' + + for name in rest.split(sep): + if not name or name == '.': + # current dir + continue + if name == '..': + # parent dir + path, _, _ = path.rpartition(sep) + continue + newpath = path + sep + name + if newpath in seen: + # Already seen this path + path = seen[newpath] + if path is not None: + # use cached value + continue + # The symlink is not resolved, so we must have a symlink + # loop. + raise RuntimeError("Symlink loop from %r" % newpath) + # Resolve the symbolic link + try: + target = accessor.readlink(newpath) + except OSError as e: + if e.errno != EINVAL and strict: + raise + # Not a symlink, or non-strict mode. We just leave the path + # untouched. + path = newpath + else: + seen[newpath] = None # not resolved symlink + path = _resolve(path, target) + seen[newpath] = path # resolved symlink + + return path + # NOTE: according to POSIX, getcwd() cannot contain path components + # which are symlinks. + base = '' if path.is_absolute() else os.getcwd() + return _resolve(base, str(path)) or sep + + def is_reserved(self, parts): + return False + + def make_uri(self, path): + # We represent the path using the local filesystem encoding, + # for portability to other applications. + bpath = bytes(path) + return 'file://' + urlquote_from_bytes(bpath) + + def gethomedir(self, username): + if not username: + try: + return os.environ['HOME'] + except KeyError: + import pwd + return pwd.getpwuid(os.getuid()).pw_dir + else: + import pwd + try: + return pwd.getpwnam(username).pw_dir + except KeyError: + raise RuntimeError("Can't determine home directory " + "for %r" % username) + + +_windows_flavour = _WindowsFlavour() +_posix_flavour = _PosixFlavour() + + +class _Accessor: + + """An accessor implements a particular (system-specific or not) way of + accessing paths on the filesystem.""" + + +class _NormalAccessor(_Accessor): + + def _wrap_strfunc(strfunc): + @functools.wraps(strfunc) + def wrapped(pathobj, *args): + return strfunc(str(pathobj), *args) + return staticmethod(wrapped) + + def _wrap_binary_strfunc(strfunc): + @functools.wraps(strfunc) + def wrapped(pathobjA, pathobjB, *args): + return strfunc(str(pathobjA), str(pathobjB), *args) + return staticmethod(wrapped) + + stat = _wrap_strfunc(os.stat) + + lstat = _wrap_strfunc(os.lstat) + + open = _wrap_strfunc(os.open) + + listdir = _wrap_strfunc(os.listdir) + + scandir = _wrap_strfunc(os_scandir) + + chmod = _wrap_strfunc(os.chmod) + + if hasattr(os, "lchmod"): + lchmod = _wrap_strfunc(os.lchmod) + else: + def lchmod(self, pathobj, mode): + raise NotImplementedError("lchmod() not available on this system") + + mkdir = _wrap_strfunc(os.mkdir) + + unlink = _wrap_strfunc(os.unlink) + + rmdir = _wrap_strfunc(os.rmdir) + + rename = _wrap_binary_strfunc(os.rename) + + if sys.version_info >= (3, 3): + replace = _wrap_binary_strfunc(os.replace) + + if nt: + if supports_symlinks: + symlink = _wrap_binary_strfunc(os.symlink) + else: + def symlink(a, b, target_is_directory): + raise NotImplementedError( + "symlink() not available on this system") + else: + # Under POSIX, os.symlink() takes two args + @staticmethod + def symlink(a, b, target_is_directory): + return os.symlink(str(a), str(b)) + + utime = _wrap_strfunc(os.utime) + + # Helper for resolve() + def readlink(self, path): + return os.readlink(path) + + +_normal_accessor = _NormalAccessor() + + +# +# Globbing helpers +# + +def _make_selector(pattern_parts): + pat = pattern_parts[0] + child_parts = pattern_parts[1:] + if pat == '**': + cls = _RecursiveWildcardSelector + elif '**' in pat: + raise ValueError( + "Invalid pattern: '**' can only be an entire path component") + elif _is_wildcard_pattern(pat): + cls = _WildcardSelector + else: + cls = _PreciseSelector + return cls(pat, child_parts) + + +if hasattr(functools, "lru_cache"): + _make_selector = functools.lru_cache()(_make_selector) + + +class _Selector: + + """A selector matches a specific glob pattern part against the children + of a given path.""" + + def __init__(self, child_parts): + self.child_parts = child_parts + if child_parts: + self.successor = _make_selector(child_parts) + self.dironly = True + else: + self.successor = _TerminatingSelector() + self.dironly = False + + def select_from(self, parent_path): + """Iterate over all child paths of `parent_path` matched by this + selector. This can contain parent_path itself.""" + path_cls = type(parent_path) + is_dir = path_cls.is_dir + exists = path_cls.exists + scandir = parent_path._accessor.scandir + if not is_dir(parent_path): + return iter([]) + return self._select_from(parent_path, is_dir, exists, scandir) + + +class _TerminatingSelector: + + def _select_from(self, parent_path, is_dir, exists, scandir): + yield parent_path + + +class _PreciseSelector(_Selector): + + def __init__(self, name, child_parts): + self.name = name + _Selector.__init__(self, child_parts) + + def _select_from(self, parent_path, is_dir, exists, scandir): + def try_iter(): + path = parent_path._make_child_relpath(self.name) + if (is_dir if self.dironly else exists)(path): + for p in self.successor._select_from( + path, is_dir, exists, scandir): + yield p + + def except_iter(exc): + return + yield + + for x in _try_except_permissionerror_iter(try_iter, except_iter): + yield x + + +class _WildcardSelector(_Selector): + + def __init__(self, pat, child_parts): + self.pat = re.compile(fnmatch.translate(pat)) + _Selector.__init__(self, child_parts) + + def _select_from(self, parent_path, is_dir, exists, scandir): + def try_iter(): + cf = parent_path._flavour.casefold + entries = list(scandir(parent_path)) + for entry in entries: + if not self.dironly or entry.is_dir(): + name = entry.name + casefolded = cf(name) + if self.pat.match(casefolded): + path = parent_path._make_child_relpath(name) + for p in self.successor._select_from( + path, is_dir, exists, scandir): + yield p + + def except_iter(exc): + return + yield + + for x in _try_except_permissionerror_iter(try_iter, except_iter): + yield x + + +class _RecursiveWildcardSelector(_Selector): + + def __init__(self, pat, child_parts): + _Selector.__init__(self, child_parts) + + def _iterate_directories(self, parent_path, is_dir, scandir): + yield parent_path + + def try_iter(): + entries = list(scandir(parent_path)) + for entry in entries: + if entry.is_dir() and not entry.is_symlink(): + path = parent_path._make_child_relpath(entry.name) + for p in self._iterate_directories(path, is_dir, scandir): + yield p + + def except_iter(exc): + return + yield + + for x in _try_except_permissionerror_iter(try_iter, except_iter): + yield x + + def _select_from(self, parent_path, is_dir, exists, scandir): + def try_iter(): + yielded = set() + try: + successor_select = self.successor._select_from + for starting_point in self._iterate_directories( + parent_path, is_dir, scandir): + for p in successor_select( + starting_point, is_dir, exists, scandir): + if p not in yielded: + yield p + yielded.add(p) + finally: + yielded.clear() + + def except_iter(exc): + return + yield + + for x in _try_except_permissionerror_iter(try_iter, except_iter): + yield x + + +# +# Public API +# + +class _PathParents(Sequence): + + """This object provides sequence-like access to the logical ancestors + of a path. Don't try to construct it yourself.""" + __slots__ = ('_pathcls', '_drv', '_root', '_parts') + + def __init__(self, path): + # We don't store the instance to avoid reference cycles + self._pathcls = type(path) + self._drv = path._drv + self._root = path._root + self._parts = path._parts + + def __len__(self): + if self._drv or self._root: + return len(self._parts) - 1 + else: + return len(self._parts) + + def __getitem__(self, idx): + if idx < 0 or idx >= len(self): + raise IndexError(idx) + return self._pathcls._from_parsed_parts(self._drv, self._root, + self._parts[:-idx - 1]) + + def __repr__(self): + return "<{0}.parents>".format(self._pathcls.__name__) + + +class PurePath(object): + + """PurePath represents a filesystem path and offers operations which + don't imply any actual filesystem I/O. Depending on your system, + instantiating a PurePath will return either a PurePosixPath or a + PureWindowsPath object. You can also instantiate either of these classes + directly, regardless of your system. + """ + __slots__ = ( + '_drv', '_root', '_parts', + '_str', '_hash', '_pparts', '_cached_cparts', + ) + + def __new__(cls, *args): + """Construct a PurePath from one or several strings and or existing + PurePath objects. The strings and path objects are combined so as + to yield a canonicalized path, which is incorporated into the + new PurePath object. + """ + if cls is PurePath: + cls = PureWindowsPath if os.name == 'nt' else PurePosixPath + return cls._from_parts(args) + + def __reduce__(self): + # Using the parts tuple helps share interned path parts + # when pickling related paths. + return (self.__class__, tuple(self._parts)) + + @classmethod + def _parse_args(cls, args): + # This is useful when you don't want to create an instance, just + # canonicalize some constructor arguments. + parts = [] + for a in args: + if isinstance(a, PurePath): + parts += a._parts + else: + if sys.version_info >= (3, 6): + a = os.fspath(a) + else: + # duck typing for older Python versions + if hasattr(a, "__fspath__"): + a = a.__fspath__() + if isinstance(a, str): + # Force-cast str subclasses to str (issue #21127) + parts.append(str(a)) + # also handle unicode for PY2 (six.text_type = unicode) + elif six.PY2 and isinstance(a, six.text_type): + # cast to str using filesystem encoding + parts.append(a.encode(sys.getfilesystemencoding())) + else: + raise TypeError( + "argument should be a str object or an os.PathLike " + "object returning str, not %r" + % type(a)) + return cls._flavour.parse_parts(parts) + + @classmethod + def _from_parts(cls, args, init=True): + # We need to call _parse_args on the instance, so as to get the + # right flavour. + self = object.__new__(cls) + drv, root, parts = self._parse_args(args) + self._drv = drv + self._root = root + self._parts = parts + if init: + self._init() + return self + + @classmethod + def _from_parsed_parts(cls, drv, root, parts, init=True): + self = object.__new__(cls) + self._drv = drv + self._root = root + self._parts = parts + if init: + self._init() + return self + + @classmethod + def _format_parsed_parts(cls, drv, root, parts): + if drv or root: + return drv + root + cls._flavour.join(parts[1:]) + else: + return cls._flavour.join(parts) + + def _init(self): + # Overridden in concrete Path + pass + + def _make_child(self, args): + drv, root, parts = self._parse_args(args) + drv, root, parts = self._flavour.join_parsed_parts( + self._drv, self._root, self._parts, drv, root, parts) + return self._from_parsed_parts(drv, root, parts) + + def __str__(self): + """Return the string representation of the path, suitable for + passing to system calls.""" + try: + return self._str + except AttributeError: + self._str = self._format_parsed_parts(self._drv, self._root, + self._parts) or '.' + return self._str + + def __fspath__(self): + return str(self) + + def as_posix(self): + """Return the string representation of the path with forward (/) + slashes.""" + f = self._flavour + return str(self).replace(f.sep, '/') + + def __bytes__(self): + """Return the bytes representation of the path. This is only + recommended to use under Unix.""" + if sys.version_info < (3, 2): + raise NotImplementedError("needs Python 3.2 or later") + return os.fsencode(str(self)) + + def __repr__(self): + return "{0}({1!r})".format(self.__class__.__name__, self.as_posix()) + + def as_uri(self): + """Return the path as a 'file' URI.""" + if not self.is_absolute(): + raise ValueError("relative path can't be expressed as a file URI") + return self._flavour.make_uri(self) + + @property + def _cparts(self): + # Cached casefolded parts, for hashing and comparison + try: + return self._cached_cparts + except AttributeError: + self._cached_cparts = self._flavour.casefold_parts(self._parts) + return self._cached_cparts + + def __eq__(self, other): + if not isinstance(other, PurePath): + return NotImplemented + return ( + self._cparts == other._cparts + and self._flavour is other._flavour) + + def __ne__(self, other): + return not self == other + + def __hash__(self): + try: + return self._hash + except AttributeError: + self._hash = hash(tuple(self._cparts)) + return self._hash + + def __lt__(self, other): + if (not isinstance(other, PurePath) + or self._flavour is not other._flavour): + return NotImplemented + return self._cparts < other._cparts + + def __le__(self, other): + if (not isinstance(other, PurePath) + or self._flavour is not other._flavour): + return NotImplemented + return self._cparts <= other._cparts + + def __gt__(self, other): + if (not isinstance(other, PurePath) + or self._flavour is not other._flavour): + return NotImplemented + return self._cparts > other._cparts + + def __ge__(self, other): + if (not isinstance(other, PurePath) + or self._flavour is not other._flavour): + return NotImplemented + return self._cparts >= other._cparts + + drive = property(attrgetter('_drv'), + doc="""The drive prefix (letter or UNC path), if any.""") + + root = property(attrgetter('_root'), + doc="""The root of the path, if any.""") + + @property + def anchor(self): + """The concatenation of the drive and root, or ''.""" + anchor = self._drv + self._root + return anchor + + @property + def name(self): + """The final path component, if any.""" + parts = self._parts + if len(parts) == (1 if (self._drv or self._root) else 0): + return '' + return parts[-1] + + @property + def suffix(self): + """The final component's last suffix, if any.""" + name = self.name + i = name.rfind('.') + if 0 < i < len(name) - 1: + return name[i:] + else: + return '' + + @property + def suffixes(self): + """A list of the final component's suffixes, if any.""" + name = self.name + if name.endswith('.'): + return [] + name = name.lstrip('.') + return ['.' + suffix for suffix in name.split('.')[1:]] + + @property + def stem(self): + """The final path component, minus its last suffix.""" + name = self.name + i = name.rfind('.') + if 0 < i < len(name) - 1: + return name[:i] + else: + return name + + def with_name(self, name): + """Return a new path with the file name changed.""" + if not self.name: + raise ValueError("%r has an empty name" % (self,)) + drv, root, parts = self._flavour.parse_parts((name,)) + if (not name or name[-1] in [self._flavour.sep, self._flavour.altsep] + or drv or root or len(parts) != 1): + raise ValueError("Invalid name %r" % (name)) + return self._from_parsed_parts(self._drv, self._root, + self._parts[:-1] + [name]) + + def with_suffix(self, suffix): + """Return a new path with the file suffix changed (or added, if + none). + """ + # XXX if suffix is None, should the current suffix be removed? + f = self._flavour + if f.sep in suffix or f.altsep and f.altsep in suffix: + raise ValueError("Invalid suffix %r" % (suffix)) + if suffix and not suffix.startswith('.') or suffix == '.': + raise ValueError("Invalid suffix %r" % (suffix)) + name = self.name + if not name: + raise ValueError("%r has an empty name" % (self,)) + old_suffix = self.suffix + if not old_suffix: + name = name + suffix + else: + name = name[:-len(old_suffix)] + suffix + return self._from_parsed_parts(self._drv, self._root, + self._parts[:-1] + [name]) + + def relative_to(self, *other): + """Return the relative path to another path identified by the passed + arguments. If the operation is not possible (because this is not + a subpath of the other path), raise ValueError. + """ + # For the purpose of this method, drive and root are considered + # separate parts, i.e.: + # Path('c:/').relative_to('c:') gives Path('/') + # Path('c:/').relative_to('/') raise ValueError + if not other: + raise TypeError("need at least one argument") + parts = self._parts + drv = self._drv + root = self._root + if root: + abs_parts = [drv, root] + parts[1:] + else: + abs_parts = parts + to_drv, to_root, to_parts = self._parse_args(other) + if to_root: + to_abs_parts = [to_drv, to_root] + to_parts[1:] + else: + to_abs_parts = to_parts + n = len(to_abs_parts) + cf = self._flavour.casefold_parts + if (root or drv) if n == 0 else cf(abs_parts[:n]) != cf(to_abs_parts): + formatted = self._format_parsed_parts(to_drv, to_root, to_parts) + raise ValueError("{0!r} does not start with {1!r}" + .format(str(self), str(formatted))) + return self._from_parsed_parts('', root if n == 1 else '', + abs_parts[n:]) + + @property + def parts(self): + """An object providing sequence-like access to the + components in the filesystem path.""" + # We cache the tuple to avoid building a new one each time .parts + # is accessed. XXX is this necessary? + try: + return self._pparts + except AttributeError: + self._pparts = tuple(self._parts) + return self._pparts + + def joinpath(self, *args): + """Combine this path with one or several arguments, and return a + new path representing either a subpath (if all arguments are relative + paths) or a totally different path (if one of the arguments is + anchored). + """ + return self._make_child(args) + + def __truediv__(self, key): + return self._make_child((key,)) + + def __rtruediv__(self, key): + return self._from_parts([key] + self._parts) + + if six.PY2: + __div__ = __truediv__ + __rdiv__ = __rtruediv__ + + @property + def parent(self): + """The logical parent of the path.""" + drv = self._drv + root = self._root + parts = self._parts + if len(parts) == 1 and (drv or root): + return self + return self._from_parsed_parts(drv, root, parts[:-1]) + + @property + def parents(self): + """A sequence of this path's logical parents.""" + return _PathParents(self) + + def is_absolute(self): + """True if the path is absolute (has both a root and, if applicable, + a drive).""" + if not self._root: + return False + return not self._flavour.has_drv or bool(self._drv) + + def is_reserved(self): + """Return True if the path contains one of the special names reserved + by the system, if any.""" + return self._flavour.is_reserved(self._parts) + + def match(self, path_pattern): + """ + Return True if this path matches the given pattern. + """ + cf = self._flavour.casefold + path_pattern = cf(path_pattern) + drv, root, pat_parts = self._flavour.parse_parts((path_pattern,)) + if not pat_parts: + raise ValueError("empty pattern") + if drv and drv != cf(self._drv): + return False + if root and root != cf(self._root): + return False + parts = self._cparts + if drv or root: + if len(pat_parts) != len(parts): + return False + pat_parts = pat_parts[1:] + elif len(pat_parts) > len(parts): + return False + for part, pat in zip(reversed(parts), reversed(pat_parts)): + if not fnmatch.fnmatchcase(part, pat): + return False + return True + + +# Can't subclass os.PathLike from PurePath and keep the constructor +# optimizations in PurePath._parse_args(). +if sys.version_info >= (3, 6): + os.PathLike.register(PurePath) + + +class PurePosixPath(PurePath): + _flavour = _posix_flavour + __slots__ = () + + +class PureWindowsPath(PurePath): + _flavour = _windows_flavour + __slots__ = () + + +# Filesystem-accessing classes + + +class Path(PurePath): + __slots__ = ( + '_accessor', + '_closed', + ) + + def __new__(cls, *args, **kwargs): + if cls is Path: + cls = WindowsPath if os.name == 'nt' else PosixPath + self = cls._from_parts(args, init=False) + if not self._flavour.is_supported: + raise NotImplementedError("cannot instantiate %r on your system" + % (cls.__name__,)) + self._init() + return self + + def _init(self, + # Private non-constructor arguments + template=None, + ): + self._closed = False + if template is not None: + self._accessor = template._accessor + else: + self._accessor = _normal_accessor + + def _make_child_relpath(self, part): + # This is an optimization used for dir walking. `part` must be + # a single part relative to this path. + parts = self._parts + [part] + return self._from_parsed_parts(self._drv, self._root, parts) + + def __enter__(self): + if self._closed: + self._raise_closed() + return self + + def __exit__(self, t, v, tb): + self._closed = True + + def _raise_closed(self): + raise ValueError("I/O operation on closed path") + + def _opener(self, name, flags, mode=0o666): + # A stub for the opener argument to built-in open() + return self._accessor.open(self, flags, mode) + + def _raw_open(self, flags, mode=0o777): + """ + Open the file pointed by this path and return a file descriptor, + as os.open() does. + """ + if self._closed: + self._raise_closed() + return self._accessor.open(self, flags, mode) + + # Public API + + @classmethod + def cwd(cls): + """Return a new path pointing to the current working directory + (as returned by os.getcwd()). + """ + return cls(os.getcwd()) + + @classmethod + def home(cls): + """Return a new path pointing to the user's home directory (as + returned by os.path.expanduser('~')). + """ + return cls(cls()._flavour.gethomedir(None)) + + def samefile(self, other_path): + """Return whether other_path is the same or not as this file + (as returned by os.path.samefile()). + """ + if hasattr(os.path, "samestat"): + st = self.stat() + try: + other_st = other_path.stat() + except AttributeError: + other_st = os.stat(other_path) + return os.path.samestat(st, other_st) + else: + filename1 = six.text_type(self) + filename2 = six.text_type(other_path) + st1 = _win32_get_unique_path_id(filename1) + st2 = _win32_get_unique_path_id(filename2) + return st1 == st2 + + def iterdir(self): + """Iterate over the files in this directory. Does not yield any + result for the special paths '.' and '..'. + """ + if self._closed: + self._raise_closed() + for name in self._accessor.listdir(self): + if name in ('.', '..'): + # Yielding a path object for these makes little sense + continue + yield self._make_child_relpath(name) + if self._closed: + self._raise_closed() + + def glob(self, pattern): + """Iterate over this subtree and yield all existing files (of any + kind, including directories) matching the given pattern. + """ + if not pattern: + raise ValueError("Unacceptable pattern: {0!r}".format(pattern)) + pattern = self._flavour.casefold(pattern) + drv, root, pattern_parts = self._flavour.parse_parts((pattern,)) + if drv or root: + raise NotImplementedError("Non-relative patterns are unsupported") + selector = _make_selector(tuple(pattern_parts)) + for p in selector.select_from(self): + yield p + + def rglob(self, pattern): + """Recursively yield all existing files (of any kind, including + directories) matching the given pattern, anywhere in this subtree. + """ + pattern = self._flavour.casefold(pattern) + drv, root, pattern_parts = self._flavour.parse_parts((pattern,)) + if drv or root: + raise NotImplementedError("Non-relative patterns are unsupported") + selector = _make_selector(("**",) + tuple(pattern_parts)) + for p in selector.select_from(self): + yield p + + def absolute(self): + """Return an absolute version of this path. This function works + even if the path doesn't point to anything. + + No normalization is done, i.e. all '.' and '..' will be kept along. + Use resolve() to get the canonical path to a file. + """ + # XXX untested yet! + if self._closed: + self._raise_closed() + if self.is_absolute(): + return self + # FIXME this must defer to the specific flavour (and, under Windows, + # use nt._getfullpathname()) + obj = self._from_parts([os.getcwd()] + self._parts, init=False) + obj._init(template=self) + return obj + + def resolve(self, strict=False): + """ + Make the path absolute, resolving all symlinks on the way and also + normalizing it (for example turning slashes into backslashes under + Windows). + """ + if self._closed: + self._raise_closed() + s = self._flavour.resolve(self, strict=strict) + if s is None: + # No symlink resolution => for consistency, raise an error if + # the path doesn't exist or is forbidden + self.stat() + s = str(self.absolute()) + # Now we have no symlinks in the path, it's safe to normalize it. + normed = self._flavour.pathmod.normpath(s) + obj = self._from_parts((normed,), init=False) + obj._init(template=self) + return obj + + def stat(self): + """ + Return the result of the stat() system call on this path, like + os.stat() does. + """ + return self._accessor.stat(self) + + def owner(self): + """ + Return the login name of the file owner. + """ + import pwd + return pwd.getpwuid(self.stat().st_uid).pw_name + + def group(self): + """ + Return the group name of the file gid. + """ + import grp + return grp.getgrgid(self.stat().st_gid).gr_name + + def open(self, mode='r', buffering=-1, encoding=None, + errors=None, newline=None): + """ + Open the file pointed by this path and return a file object, as + the built-in open() function does. + """ + if self._closed: + self._raise_closed() + if sys.version_info >= (3, 3): + return io.open( + str(self), mode, buffering, encoding, errors, newline, + opener=self._opener) + else: + return io.open(str(self), mode, buffering, + encoding, errors, newline) + + def read_bytes(self): + """ + Open the file in bytes mode, read it, and close the file. + """ + with self.open(mode='rb') as f: + return f.read() + + def read_text(self, encoding=None, errors=None): + """ + Open the file in text mode, read it, and close the file. + """ + with self.open(mode='r', encoding=encoding, errors=errors) as f: + return f.read() + + def write_bytes(self, data): + """ + Open the file in bytes mode, write to it, and close the file. + """ + if not isinstance(data, six.binary_type): + raise TypeError( + 'data must be %s, not %s' % + (six.binary_type.__name__, data.__class__.__name__)) + with self.open(mode='wb') as f: + return f.write(data) + + def write_text(self, data, encoding=None, errors=None): + """ + Open the file in text mode, write to it, and close the file. + """ + if not isinstance(data, six.text_type): + raise TypeError( + 'data must be %s, not %s' % + (six.text_type.__name__, data.__class__.__name__)) + with self.open(mode='w', encoding=encoding, errors=errors) as f: + return f.write(data) + + def touch(self, mode=0o666, exist_ok=True): + """ + Create this file with the given access mode, if it doesn't exist. + """ + if self._closed: + self._raise_closed() + if exist_ok: + # First try to bump modification time + # Implementation note: GNU touch uses the UTIME_NOW option of + # the utimensat() / futimens() functions. + try: + self._accessor.utime(self, None) + except OSError: + # Avoid exception chaining + pass + else: + return + flags = os.O_CREAT | os.O_WRONLY + if not exist_ok: + flags |= os.O_EXCL + fd = self._raw_open(flags, mode) + os.close(fd) + + def mkdir(self, mode=0o777, parents=False, exist_ok=False): + """ + Create a new directory at this given path. + """ + if self._closed: + self._raise_closed() + + def _try_func(): + self._accessor.mkdir(self, mode) + + def _exc_func(exc): + if not parents or self.parent == self: + raise exc + self.parent.mkdir(parents=True, exist_ok=True) + self.mkdir(mode, parents=False, exist_ok=exist_ok) + + try: + _try_except_filenotfounderror(_try_func, _exc_func) + except OSError: + if not exist_ok or not self.is_dir(): + raise + + def chmod(self, mode): + """ + Change the permissions of the path, like os.chmod(). + """ + if self._closed: + self._raise_closed() + self._accessor.chmod(self, mode) + + def lchmod(self, mode): + """ + Like chmod(), except if the path points to a symlink, the symlink's + permissions are changed, rather than its target's. + """ + if self._closed: + self._raise_closed() + self._accessor.lchmod(self, mode) + + def unlink(self): + """ + Remove this file or link. + If the path is a directory, use rmdir() instead. + """ + if self._closed: + self._raise_closed() + self._accessor.unlink(self) + + def rmdir(self): + """ + Remove this directory. The directory must be empty. + """ + if self._closed: + self._raise_closed() + self._accessor.rmdir(self) + + def lstat(self): + """ + Like stat(), except if the path points to a symlink, the symlink's + status information is returned, rather than its target's. + """ + if self._closed: + self._raise_closed() + return self._accessor.lstat(self) + + def rename(self, target): + """ + Rename this path to the given path. + """ + if self._closed: + self._raise_closed() + self._accessor.rename(self, target) + + def replace(self, target): + """ + Rename this path to the given path, clobbering the existing + destination if it exists. + """ + if sys.version_info < (3, 3): + raise NotImplementedError("replace() is only available " + "with Python 3.3 and later") + if self._closed: + self._raise_closed() + self._accessor.replace(self, target) + + def symlink_to(self, target, target_is_directory=False): + """ + Make this path a symlink pointing to the given path. + Note the order of arguments (self, target) is the reverse of + os.symlink's. + """ + if self._closed: + self._raise_closed() + self._accessor.symlink(target, self, target_is_directory) + + # Convenience functions for querying the stat results + + def exists(self): + """ + Whether this path exists. + """ + try: + self.stat() + except OSError as e: + if e.errno not in (ENOENT, ENOTDIR): + raise + return False + return True + + def is_dir(self): + """ + Whether this path is a directory. + """ + try: + return S_ISDIR(self.stat().st_mode) + except OSError as e: + if e.errno not in (ENOENT, ENOTDIR): + raise + # Path doesn't exist or is a broken symlink + # (see https://bitbucket.org/pitrou/pathlib/issue/12/) + return False + + def is_file(self): + """ + Whether this path is a regular file (also True for symlinks pointing + to regular files). + """ + try: + return S_ISREG(self.stat().st_mode) + except OSError as e: + if e.errno not in (ENOENT, ENOTDIR): + raise + # Path doesn't exist or is a broken symlink + # (see https://bitbucket.org/pitrou/pathlib/issue/12/) + return False + + def is_symlink(self): + """ + Whether this path is a symbolic link. + """ + try: + return S_ISLNK(self.lstat().st_mode) + except OSError as e: + if e.errno not in (ENOENT, ENOTDIR): + raise + # Path doesn't exist + return False + + def is_block_device(self): + """ + Whether this path is a block device. + """ + try: + return S_ISBLK(self.stat().st_mode) + except OSError as e: + if e.errno not in (ENOENT, ENOTDIR): + raise + # Path doesn't exist or is a broken symlink + # (see https://bitbucket.org/pitrou/pathlib/issue/12/) + return False + + def is_char_device(self): + """ + Whether this path is a character device. + """ + try: + return S_ISCHR(self.stat().st_mode) + except OSError as e: + if e.errno not in (ENOENT, ENOTDIR): + raise + # Path doesn't exist or is a broken symlink + # (see https://bitbucket.org/pitrou/pathlib/issue/12/) + return False + + def is_fifo(self): + """ + Whether this path is a FIFO. + """ + try: + return S_ISFIFO(self.stat().st_mode) + except OSError as e: + if e.errno not in (ENOENT, ENOTDIR): + raise + # Path doesn't exist or is a broken symlink + # (see https://bitbucket.org/pitrou/pathlib/issue/12/) + return False + + def is_socket(self): + """ + Whether this path is a socket. + """ + try: + return S_ISSOCK(self.stat().st_mode) + except OSError as e: + if e.errno not in (ENOENT, ENOTDIR): + raise + # Path doesn't exist or is a broken symlink + # (see https://bitbucket.org/pitrou/pathlib/issue/12/) + return False + + def expanduser(self): + """ Return a new path with expanded ~ and ~user constructs + (as returned by os.path.expanduser) + """ + if (not (self._drv or self._root) + and self._parts and self._parts[0][:1] == '~'): + homedir = self._flavour.gethomedir(self._parts[0][1:]) + return self._from_parts([homedir] + self._parts[1:]) + + return self + + +class PosixPath(Path, PurePosixPath): + __slots__ = () + + +class WindowsPath(Path, PureWindowsPath): + __slots__ = () + + def owner(self): + raise NotImplementedError("Path.owner() is unsupported on this system") + + def group(self): + raise NotImplementedError("Path.group() is unsupported on this system") diff --git a/third_party/python/pathlib2/requirements.txt b/third_party/python/pathlib2/requirements.txt new file mode 100644 index 000000000000..9d432127900e --- /dev/null +++ b/third_party/python/pathlib2/requirements.txt @@ -0,0 +1,3 @@ +six +scandir; python_version < '3.5' +mock; python_version < '3.3' diff --git a/third_party/python/pathlib2/setup.cfg b/third_party/python/pathlib2/setup.cfg new file mode 100644 index 000000000000..81f27fcfa307 --- /dev/null +++ b/third_party/python/pathlib2/setup.cfg @@ -0,0 +1,13 @@ +[nosetests] +with-coverage = 1 +cover-package = pathlib2 +cover-branches = 1 +cover-html = 1 + +[wheel] +universal = 1 + +[egg_info] +tag_build = +tag_date = 0 + diff --git a/third_party/python/pathlib2/setup.py b/third_party/python/pathlib2/setup.py new file mode 100644 index 000000000000..2004ab0be191 --- /dev/null +++ b/third_party/python/pathlib2/setup.py @@ -0,0 +1,49 @@ +# Copyright (c) 2014-2017 Matthias C. M. Troffaes +# Copyright (c) 2012-2014 Antoine Pitrou and contributors +# Distributed under the terms of the MIT License. + +import io +from setuptools import setup, find_packages + + +def readfile(filename): + with io.open(filename, encoding="utf-8") as stream: + return stream.read().split("\n") + + +readme = readfile("README.rst")[5:] # skip title and badges +version = readfile("VERSION")[0].strip() + +setup( + name='pathlib2', + version=version, + packages=find_packages(), + license='MIT', + description='Object-oriented filesystem paths', + long_description="\n".join(readme[2:]), + author='Matthias C. M. Troffaes', + author_email='matthias.troffaes@gmail.com', + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: MIT License', + 'Operating System :: OS Independent', + 'Programming Language :: Python', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 2.6', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3.3', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Topic :: Software Development :: Libraries', + 'Topic :: System :: Filesystems', + ], + download_url='https://pypi.python.org/pypi/pathlib2/', + url='https://pypi.python.org/pypi/pathlib2/', + install_requires=[ + 'six', + 'scandir;python_version<"3.5"', + ], +) diff --git a/third_party/python/pathlib2/tests/test_pathlib2.py b/third_party/python/pathlib2/tests/test_pathlib2.py new file mode 100644 index 000000000000..9bbb49863c2c --- /dev/null +++ b/third_party/python/pathlib2/tests/test_pathlib2.py @@ -0,0 +1,2401 @@ +# Copyright (c) 2014-2017 Matthias C. M. Troffaes +# Copyright (c) 2012-2014 Antoine Pitrou and contributors +# Distributed under the terms of the MIT License. + + +import io +import os +import errno +import pathlib2 as pathlib +import pickle +import six +import socket +import stat +import sys +import tempfile + +if sys.version_info >= (3, 3): + import collections.abc as collections_abc +else: + import collections as collections_abc + +if sys.version_info < (2, 7): + try: + import unittest2 as unittest + except ImportError: + raise ImportError("unittest2 is required for tests on pre-2.7") +else: + import unittest + +if sys.version_info < (3, 3): + try: + import mock + except ImportError: + raise ImportError("mock is required for tests on pre-3.3") +else: + from unittest import mock + +# assertRaisesRegex is missing prior to Python 3.2 +if sys.version_info < (3, 2): + unittest.TestCase.assertRaisesRegex = unittest.TestCase.assertRaisesRegexp + +try: + from test import support +except ImportError: + from test import test_support as support + +android_not_root = getattr(support, "android_not_root", False) + +TESTFN = support.TESTFN + +# work around broken support.rmtree on Python 3.3 on Windows +if (os.name == 'nt' + and sys.version_info >= (3, 0) and sys.version_info < (3, 4)): + import shutil + support.rmtree = shutil.rmtree + +try: + import grp + import pwd +except ImportError: + grp = pwd = None + +# support.can_symlink is missing prior to Python 3 +if six.PY2: + + def support_can_symlink(): + return pathlib.supports_symlinks + + support_skip_unless_symlink = unittest.skipIf( + not pathlib.supports_symlinks, + "symlinks not supported on this platform") +else: + support_can_symlink = support.can_symlink + support_skip_unless_symlink = support.skip_unless_symlink + + +# Backported from 3.4 +def fs_is_case_insensitive(directory): + """Detects if the file system for the specified directory is + case-insensitive. + """ + base_fp, base_path = tempfile.mkstemp(dir=directory) + case_path = base_path.upper() + if case_path == base_path: + case_path = base_path.lower() + try: + return os.path.samefile(base_path, case_path) + except OSError as e: + if e.errno != errno.ENOENT: + raise + return False + finally: + os.unlink(base_path) + + +support.fs_is_case_insensitive = fs_is_case_insensitive + + +class _BaseFlavourTest(object): + + def _check_parse_parts(self, arg, expected): + f = self.flavour.parse_parts + sep = self.flavour.sep + altsep = self.flavour.altsep + actual = f([x.replace('/', sep) for x in arg]) + self.assertEqual(actual, expected) + if altsep: + actual = f([x.replace('/', altsep) for x in arg]) + self.assertEqual(actual, expected) + drv, root, parts = actual + # neither bytes (py3) nor unicode (py2) + self.assertIsInstance(drv, str) + self.assertIsInstance(root, str) + for p in parts: + self.assertIsInstance(p, str) + + def test_parse_parts_common(self): + check = self._check_parse_parts + sep = self.flavour.sep + # Unanchored parts + check([], ('', '', [])) + check(['a'], ('', '', ['a'])) + check(['a/'], ('', '', ['a'])) + check(['a', 'b'], ('', '', ['a', 'b'])) + # Expansion + check(['a/b'], ('', '', ['a', 'b'])) + check(['a/b/'], ('', '', ['a', 'b'])) + check(['a', 'b/c', 'd'], ('', '', ['a', 'b', 'c', 'd'])) + # Collapsing and stripping excess slashes + check(['a', 'b//c', 'd'], ('', '', ['a', 'b', 'c', 'd'])) + check(['a', 'b/c/', 'd'], ('', '', ['a', 'b', 'c', 'd'])) + # Eliminating standalone dots + check(['.'], ('', '', [])) + check(['.', '.', 'b'], ('', '', ['b'])) + check(['a', '.', 'b'], ('', '', ['a', 'b'])) + check(['a', '.', '.'], ('', '', ['a'])) + # The first part is anchored + check(['/a/b'], ('', sep, [sep, 'a', 'b'])) + check(['/a', 'b'], ('', sep, [sep, 'a', 'b'])) + check(['/a/', 'b'], ('', sep, [sep, 'a', 'b'])) + # Ignoring parts before an anchored part + check(['a', '/b', 'c'], ('', sep, [sep, 'b', 'c'])) + check(['a', '/b', '/c'], ('', sep, [sep, 'c'])) + + +class PosixFlavourTest(_BaseFlavourTest, unittest.TestCase): + flavour = pathlib._posix_flavour + + def test_parse_parts(self): + check = self._check_parse_parts + # Collapsing of excess leading slashes, except for the double-slash + # special case. + check(['//a', 'b'], ('', '//', ['//', 'a', 'b'])) + check(['///a', 'b'], ('', '/', ['/', 'a', 'b'])) + check(['////a', 'b'], ('', '/', ['/', 'a', 'b'])) + # Paths which look like NT paths aren't treated specially + check(['c:a'], ('', '', ['c:a'])) + check(['c:\\a'], ('', '', ['c:\\a'])) + check(['\\a'], ('', '', ['\\a'])) + + def test_splitroot(self): + f = self.flavour.splitroot + self.assertEqual(f(''), ('', '', '')) + self.assertEqual(f('a'), ('', '', 'a')) + self.assertEqual(f('a/b'), ('', '', 'a/b')) + self.assertEqual(f('a/b/'), ('', '', 'a/b/')) + self.assertEqual(f('/a'), ('', '/', 'a')) + self.assertEqual(f('/a/b'), ('', '/', 'a/b')) + self.assertEqual(f('/a/b/'), ('', '/', 'a/b/')) + # The root is collapsed when there are redundant slashes + # except when there are exactly two leading slashes, which + # is a special case in POSIX. + self.assertEqual(f('//a'), ('', '//', 'a')) + self.assertEqual(f('///a'), ('', '/', 'a')) + self.assertEqual(f('///a/b'), ('', '/', 'a/b')) + # Paths which look like NT paths aren't treated specially + self.assertEqual(f('c:/a/b'), ('', '', 'c:/a/b')) + self.assertEqual(f('\\/a/b'), ('', '', '\\/a/b')) + self.assertEqual(f('\\a\\b'), ('', '', '\\a\\b')) + + +class NTFlavourTest(_BaseFlavourTest, unittest.TestCase): + flavour = pathlib._windows_flavour + + def test_parse_parts(self): + check = self._check_parse_parts + # First part is anchored + check(['c:'], ('c:', '', ['c:'])) + check(['c:/'], ('c:', '\\', ['c:\\'])) + check(['/'], ('', '\\', ['\\'])) + check(['c:a'], ('c:', '', ['c:', 'a'])) + check(['c:/a'], ('c:', '\\', ['c:\\', 'a'])) + check(['/a'], ('', '\\', ['\\', 'a'])) + # UNC paths + check(['//a/b'], ('\\\\a\\b', '\\', ['\\\\a\\b\\'])) + check(['//a/b/'], ('\\\\a\\b', '\\', ['\\\\a\\b\\'])) + check(['//a/b/c'], ('\\\\a\\b', '\\', ['\\\\a\\b\\', 'c'])) + # Second part is anchored, so that the first part is ignored + check(['a', 'Z:b', 'c'], ('Z:', '', ['Z:', 'b', 'c'])) + check(['a', 'Z:/b', 'c'], ('Z:', '\\', ['Z:\\', 'b', 'c'])) + # UNC paths + check(['a', '//b/c', 'd'], ('\\\\b\\c', '\\', ['\\\\b\\c\\', 'd'])) + # Collapsing and stripping excess slashes + check(['a', 'Z://b//c/', 'd/'], ('Z:', '\\', ['Z:\\', 'b', 'c', 'd'])) + # UNC paths + check(['a', '//b/c//', 'd'], ('\\\\b\\c', '\\', ['\\\\b\\c\\', 'd'])) + # Extended paths + check(['//?/c:/'], ('\\\\?\\c:', '\\', ['\\\\?\\c:\\'])) + check(['//?/c:/a'], ('\\\\?\\c:', '\\', ['\\\\?\\c:\\', 'a'])) + check(['//?/c:/a', '/b'], ('\\\\?\\c:', '\\', ['\\\\?\\c:\\', 'b'])) + # Extended UNC paths (format is "\\?\UNC\server\share") + check(['//?/UNC/b/c'], + ('\\\\?\\UNC\\b\\c', '\\', ['\\\\?\\UNC\\b\\c\\'])) + check(['//?/UNC/b/c/d'], + ('\\\\?\\UNC\\b\\c', '\\', ['\\\\?\\UNC\\b\\c\\', 'd'])) + # Second part has a root but not drive + check(['a', '/b', 'c'], ('', '\\', ['\\', 'b', 'c'])) + check(['Z:/a', '/b', 'c'], ('Z:', '\\', ['Z:\\', 'b', 'c'])) + check(['//?/Z:/a', '/b', 'c'], + ('\\\\?\\Z:', '\\', ['\\\\?\\Z:\\', 'b', 'c'])) + + def test_splitroot(self): + f = self.flavour.splitroot + self.assertEqual(f(''), ('', '', '')) + self.assertEqual(f('a'), ('', '', 'a')) + self.assertEqual(f('a\\b'), ('', '', 'a\\b')) + self.assertEqual(f('\\a'), ('', '\\', 'a')) + self.assertEqual(f('\\a\\b'), ('', '\\', 'a\\b')) + self.assertEqual(f('c:a\\b'), ('c:', '', 'a\\b')) + self.assertEqual(f('c:\\a\\b'), ('c:', '\\', 'a\\b')) + # Redundant slashes in the root are collapsed + self.assertEqual(f('\\\\a'), ('', '\\', 'a')) + self.assertEqual(f('\\\\\\a/b'), ('', '\\', 'a/b')) + self.assertEqual(f('c:\\\\a'), ('c:', '\\', 'a')) + self.assertEqual(f('c:\\\\\\a/b'), ('c:', '\\', 'a/b')) + # Valid UNC paths + self.assertEqual(f('\\\\a\\b'), ('\\\\a\\b', '\\', '')) + self.assertEqual(f('\\\\a\\b\\'), ('\\\\a\\b', '\\', '')) + self.assertEqual(f('\\\\a\\b\\c\\d'), ('\\\\a\\b', '\\', 'c\\d')) + # These are non-UNC paths (according to ntpath.py and test_ntpath) + # However, command.com says such paths are invalid, so it's + # difficult to know what the right semantics are + self.assertEqual(f('\\\\\\a\\b'), ('', '\\', 'a\\b')) + self.assertEqual(f('\\\\a'), ('', '\\', 'a')) + + +# +# Tests for the pure classes +# + +with_fsencode = unittest.skipIf( + sys.version_info < (3, 2), + 'os.fsencode has been introduced in version 3.2') + + +class _BasePurePathTest(object): + + # keys are canonical paths, values are list of tuples of arguments + # supposed to produce equal paths + equivalences = { + 'a/b': [ + ('a', 'b'), ('a/', 'b'), ('a', 'b/'), ('a/', 'b/'), + ('a/b/',), ('a//b',), ('a//b//',), + # empty components get removed + ('', 'a', 'b'), ('a', '', 'b'), ('a', 'b', ''), + ], + '/b/c/d': [ + ('a', '/b/c', 'd'), ('a', '///b//c', 'd/'), + ('/a', '/b/c', 'd'), + # empty components get removed + ('/', 'b', '', 'c/d'), ('/', '', 'b/c/d'), ('', '/b/c/d'), + ], + } + + def setUp(self): + p = self.cls('a') + self.flavour = p._flavour + self.sep = self.flavour.sep + self.altsep = self.flavour.altsep + + def test_constructor_common(self): + P = self.cls + p = P('a') + self.assertIsInstance(p, P) + + class PathLike: + def __fspath__(self): + return "a/b/c" + + P('a', 'b', 'c') + P('/a', 'b', 'c') + P('a/b/c') + P('/a/b/c') + P(PathLike()) + self.assertEqual(P(P('a')), P('a')) + self.assertEqual(P(P('a'), 'b'), P('a/b')) + self.assertEqual(P(P('a'), P('b')), P('a/b')) + self.assertEqual(P(P('a'), P('b'), P('c')), P(PathLike())) + + def _check_str_subclass(self, *args): + # Issue #21127: it should be possible to construct a PurePath object + # from a str subclass instance, and it then gets converted to + # a pure str object. + class StrSubclass(str): + pass + P = self.cls + p = P(*(StrSubclass(x) for x in args)) + self.assertEqual(p, P(*args)) + for part in p.parts: + self.assertIs(type(part), str) + + def test_str_subclass_common(self): + self._check_str_subclass('') + self._check_str_subclass('.') + self._check_str_subclass('a') + self._check_str_subclass('a/b.txt') + self._check_str_subclass('/a/b.txt') + + def test_join_common(self): + P = self.cls + p = P('a/b') + pp = p.joinpath('c') + self.assertEqual(pp, P('a/b/c')) + self.assertIs(type(pp), type(p)) + pp = p.joinpath('c', 'd') + self.assertEqual(pp, P('a/b/c/d')) + pp = p.joinpath(P('c')) + self.assertEqual(pp, P('a/b/c')) + pp = p.joinpath('/c') + self.assertEqual(pp, P('/c')) + + def test_div_common(self): + # Basically the same as joinpath() + P = self.cls + p = P('a/b') + pp = p / 'c' + self.assertEqual(pp, P('a/b/c')) + self.assertIs(type(pp), type(p)) + pp = p / 'c/d' + self.assertEqual(pp, P('a/b/c/d')) + pp = p / 'c' / 'd' + self.assertEqual(pp, P('a/b/c/d')) + pp = 'c' / p / 'd' + self.assertEqual(pp, P('c/a/b/d')) + pp = p / P('c') + self.assertEqual(pp, P('a/b/c')) + pp = p / '/c' + self.assertEqual(pp, P('/c')) + + def _check_str(self, expected, args): + p = self.cls(*args) + self.assertEqual(str(p), expected.replace('/', self.sep)) + + def test_str_common(self): + # Canonicalized paths roundtrip + for pathstr in ('a', 'a/b', 'a/b/c', '/', '/a/b', '/a/b/c'): + self._check_str(pathstr, (pathstr,)) + # Special case for the empty path + self._check_str('.', ('',)) + # Other tests for str() are in test_equivalences() + + def test_as_posix_common(self): + P = self.cls + for pathstr in ('a', 'a/b', 'a/b/c', '/', '/a/b', '/a/b/c'): + self.assertEqual(P(pathstr).as_posix(), pathstr) + # Other tests for as_posix() are in test_equivalences() + + @with_fsencode + def test_as_bytes_common(self): + sep = os.fsencode(self.sep) + P = self.cls + self.assertEqual(bytes(P('a/b')), b'a' + sep + b'b') + + def test_as_uri_common(self): + P = self.cls + with self.assertRaises(ValueError): + P('a').as_uri() + with self.assertRaises(ValueError): + P().as_uri() + + def test_repr_common(self): + for pathstr in ('a', 'a/b', 'a/b/c', '/', '/a/b', '/a/b/c'): + p = self.cls(pathstr) + clsname = p.__class__.__name__ + r = repr(p) + # The repr() is in the form ClassName("forward-slashes path") + self.assertTrue(r.startswith(clsname + '('), r) + self.assertTrue(r.endswith(')'), r) + inner = r[len(clsname) + 1: -1] + self.assertEqual(eval(inner), p.as_posix()) + # The repr() roundtrips + q = eval(r, pathlib.__dict__) + self.assertIs(q.__class__, p.__class__) + self.assertEqual(q, p) + self.assertEqual(repr(q), r) + + def test_eq_common(self): + P = self.cls + self.assertEqual(P('a/b'), P('a/b')) + self.assertEqual(P('a/b'), P('a', 'b')) + self.assertNotEqual(P('a/b'), P('a')) + self.assertNotEqual(P('a/b'), P('/a/b')) + self.assertNotEqual(P('a/b'), P()) + self.assertNotEqual(P('/a/b'), P('/')) + self.assertNotEqual(P(), P('/')) + self.assertNotEqual(P(), "") + self.assertNotEqual(P(), {}) + self.assertNotEqual(P(), int) + + def test_match_common(self): + P = self.cls + self.assertRaises(ValueError, P('a').match, '') + self.assertRaises(ValueError, P('a').match, '.') + # Simple relative pattern + self.assertTrue(P('b.py').match('b.py')) + self.assertTrue(P('a/b.py').match('b.py')) + self.assertTrue(P('/a/b.py').match('b.py')) + self.assertFalse(P('a.py').match('b.py')) + self.assertFalse(P('b/py').match('b.py')) + self.assertFalse(P('/a.py').match('b.py')) + self.assertFalse(P('b.py/c').match('b.py')) + # Wilcard relative pattern + self.assertTrue(P('b.py').match('*.py')) + self.assertTrue(P('a/b.py').match('*.py')) + self.assertTrue(P('/a/b.py').match('*.py')) + self.assertFalse(P('b.pyc').match('*.py')) + self.assertFalse(P('b./py').match('*.py')) + self.assertFalse(P('b.py/c').match('*.py')) + # Multi-part relative pattern + self.assertTrue(P('ab/c.py').match('a*/*.py')) + self.assertTrue(P('/d/ab/c.py').match('a*/*.py')) + self.assertFalse(P('a.py').match('a*/*.py')) + self.assertFalse(P('/dab/c.py').match('a*/*.py')) + self.assertFalse(P('ab/c.py/d').match('a*/*.py')) + # Absolute pattern + self.assertTrue(P('/b.py').match('/*.py')) + self.assertFalse(P('b.py').match('/*.py')) + self.assertFalse(P('a/b.py').match('/*.py')) + self.assertFalse(P('/a/b.py').match('/*.py')) + # Multi-part absolute pattern + self.assertTrue(P('/a/b.py').match('/a/*.py')) + self.assertFalse(P('/ab.py').match('/a/*.py')) + self.assertFalse(P('/a/b/c.py').match('/a/*.py')) + + def test_ordering_common(self): + # Ordering is tuple-alike + def assertLess(a, b): + self.assertLess(a, b) + self.assertGreater(b, a) + P = self.cls + a = P('a') + b = P('a/b') + c = P('abc') + d = P('b') + assertLess(a, b) + assertLess(a, c) + assertLess(a, d) + assertLess(b, c) + assertLess(c, d) + P = self.cls + a = P('/a') + b = P('/a/b') + c = P('/abc') + d = P('/b') + assertLess(a, b) + assertLess(a, c) + assertLess(a, d) + assertLess(b, c) + assertLess(c, d) + if sys.version_info > (3,): + with self.assertRaises(TypeError): + P() < {} + else: + P() < {} + + def test_parts_common(self): + # `parts` returns a tuple + sep = self.sep + P = self.cls + p = P('a/b') + parts = p.parts + self.assertEqual(parts, ('a', 'b')) + # The object gets reused + self.assertIs(parts, p.parts) + # When the path is absolute, the anchor is a separate part + p = P('/a/b') + parts = p.parts + self.assertEqual(parts, (sep, 'a', 'b')) + + def test_fspath_common(self): + P = self.cls + p = P('a/b') + self._check_str(p.__fspath__(), ('a/b',)) + if sys.version_info >= (3, 6): + self._check_str(os.fspath(p), ('a/b',)) + + def test_equivalences(self): + for k, tuples in self.equivalences.items(): + canon = k.replace('/', self.sep) + posix = k.replace(self.sep, '/') + if canon != posix: + tuples = tuples + [ + tuple(part.replace('/', self.sep) for part in t) + for t in tuples + ] + tuples.append((posix, )) + pcanon = self.cls(canon) + for t in tuples: + p = self.cls(*t) + self.assertEqual(p, pcanon, "failed with args {0}".format(t)) + self.assertEqual(hash(p), hash(pcanon)) + self.assertEqual(str(p), canon) + self.assertEqual(p.as_posix(), posix) + + def test_parent_common(self): + # Relative + P = self.cls + p = P('a/b/c') + self.assertEqual(p.parent, P('a/b')) + self.assertEqual(p.parent.parent, P('a')) + self.assertEqual(p.parent.parent.parent, P()) + self.assertEqual(p.parent.parent.parent.parent, P()) + # Anchored + p = P('/a/b/c') + self.assertEqual(p.parent, P('/a/b')) + self.assertEqual(p.parent.parent, P('/a')) + self.assertEqual(p.parent.parent.parent, P('/')) + self.assertEqual(p.parent.parent.parent.parent, P('/')) + + def test_parents_common(self): + # Relative + P = self.cls + p = P('a/b/c') + par = p.parents + self.assertEqual(len(par), 3) + self.assertEqual(par[0], P('a/b')) + self.assertEqual(par[1], P('a')) + self.assertEqual(par[2], P('.')) + self.assertEqual(list(par), [P('a/b'), P('a'), P('.')]) + with self.assertRaises(IndexError): + par[-1] + with self.assertRaises(IndexError): + par[3] + with self.assertRaises(TypeError): + par[0] = p + # Anchored + p = P('/a/b/c') + par = p.parents + self.assertEqual(len(par), 3) + self.assertEqual(par[0], P('/a/b')) + self.assertEqual(par[1], P('/a')) + self.assertEqual(par[2], P('/')) + self.assertEqual(list(par), [P('/a/b'), P('/a'), P('/')]) + with self.assertRaises(IndexError): + par[3] + + def test_drive_common(self): + P = self.cls + self.assertEqual(P('a/b').drive, '') + self.assertEqual(P('/a/b').drive, '') + self.assertEqual(P('').drive, '') + + def test_root_common(self): + P = self.cls + sep = self.sep + self.assertEqual(P('').root, '') + self.assertEqual(P('a/b').root, '') + self.assertEqual(P('/').root, sep) + self.assertEqual(P('/a/b').root, sep) + + def test_anchor_common(self): + P = self.cls + sep = self.sep + self.assertEqual(P('').anchor, '') + self.assertEqual(P('a/b').anchor, '') + self.assertEqual(P('/').anchor, sep) + self.assertEqual(P('/a/b').anchor, sep) + + def test_name_common(self): + P = self.cls + self.assertEqual(P('').name, '') + self.assertEqual(P('.').name, '') + self.assertEqual(P('/').name, '') + self.assertEqual(P('a/b').name, 'b') + self.assertEqual(P('/a/b').name, 'b') + self.assertEqual(P('/a/b/.').name, 'b') + self.assertEqual(P('a/b.py').name, 'b.py') + self.assertEqual(P('/a/b.py').name, 'b.py') + + def test_suffix_common(self): + P = self.cls + self.assertEqual(P('').suffix, '') + self.assertEqual(P('.').suffix, '') + self.assertEqual(P('..').suffix, '') + self.assertEqual(P('/').suffix, '') + self.assertEqual(P('a/b').suffix, '') + self.assertEqual(P('/a/b').suffix, '') + self.assertEqual(P('/a/b/.').suffix, '') + self.assertEqual(P('a/b.py').suffix, '.py') + self.assertEqual(P('/a/b.py').suffix, '.py') + self.assertEqual(P('a/.hgrc').suffix, '') + self.assertEqual(P('/a/.hgrc').suffix, '') + self.assertEqual(P('a/.hg.rc').suffix, '.rc') + self.assertEqual(P('/a/.hg.rc').suffix, '.rc') + self.assertEqual(P('a/b.tar.gz').suffix, '.gz') + self.assertEqual(P('/a/b.tar.gz').suffix, '.gz') + self.assertEqual(P('a/Some name. Ending with a dot.').suffix, '') + self.assertEqual(P('/a/Some name. Ending with a dot.').suffix, '') + + def test_suffixes_common(self): + P = self.cls + self.assertEqual(P('').suffixes, []) + self.assertEqual(P('.').suffixes, []) + self.assertEqual(P('/').suffixes, []) + self.assertEqual(P('a/b').suffixes, []) + self.assertEqual(P('/a/b').suffixes, []) + self.assertEqual(P('/a/b/.').suffixes, []) + self.assertEqual(P('a/b.py').suffixes, ['.py']) + self.assertEqual(P('/a/b.py').suffixes, ['.py']) + self.assertEqual(P('a/.hgrc').suffixes, []) + self.assertEqual(P('/a/.hgrc').suffixes, []) + self.assertEqual(P('a/.hg.rc').suffixes, ['.rc']) + self.assertEqual(P('/a/.hg.rc').suffixes, ['.rc']) + self.assertEqual(P('a/b.tar.gz').suffixes, ['.tar', '.gz']) + self.assertEqual(P('/a/b.tar.gz').suffixes, ['.tar', '.gz']) + self.assertEqual(P('a/Some name. Ending with a dot.').suffixes, []) + self.assertEqual(P('/a/Some name. Ending with a dot.').suffixes, []) + + def test_stem_common(self): + P = self.cls + self.assertEqual(P('').stem, '') + self.assertEqual(P('.').stem, '') + self.assertEqual(P('..').stem, '..') + self.assertEqual(P('/').stem, '') + self.assertEqual(P('a/b').stem, 'b') + self.assertEqual(P('a/b.py').stem, 'b') + self.assertEqual(P('a/.hgrc').stem, '.hgrc') + self.assertEqual(P('a/.hg.rc').stem, '.hg') + self.assertEqual(P('a/b.tar.gz').stem, 'b.tar') + self.assertEqual(P('a/Some name. Ending with a dot.').stem, + 'Some name. Ending with a dot.') + + def test_with_name_common(self): + P = self.cls + self.assertEqual(P('a/b').with_name('d.xml'), P('a/d.xml')) + self.assertEqual(P('/a/b').with_name('d.xml'), P('/a/d.xml')) + self.assertEqual(P('a/b.py').with_name('d.xml'), P('a/d.xml')) + self.assertEqual(P('/a/b.py').with_name('d.xml'), P('/a/d.xml')) + self.assertEqual(P('a/Dot ending.').with_name('d.xml'), P('a/d.xml')) + self.assertEqual(P('/a/Dot ending.').with_name('d.xml'), P('/a/d.xml')) + self.assertRaises(ValueError, P('').with_name, 'd.xml') + self.assertRaises(ValueError, P('.').with_name, 'd.xml') + self.assertRaises(ValueError, P('/').with_name, 'd.xml') + self.assertRaises(ValueError, P('a/b').with_name, '') + self.assertRaises(ValueError, P('a/b').with_name, '/c') + self.assertRaises(ValueError, P('a/b').with_name, 'c/') + self.assertRaises(ValueError, P('a/b').with_name, 'c/d') + + def test_with_suffix_common(self): + P = self.cls + self.assertEqual(P('a/b').with_suffix('.gz'), P('a/b.gz')) + self.assertEqual(P('/a/b').with_suffix('.gz'), P('/a/b.gz')) + self.assertEqual(P('a/b.py').with_suffix('.gz'), P('a/b.gz')) + self.assertEqual(P('/a/b.py').with_suffix('.gz'), P('/a/b.gz')) + # Stripping suffix + self.assertEqual(P('a/b.py').with_suffix(''), P('a/b')) + self.assertEqual(P('/a/b').with_suffix(''), P('/a/b')) + # Path doesn't have a "filename" component + self.assertRaises(ValueError, P('').with_suffix, '.gz') + self.assertRaises(ValueError, P('.').with_suffix, '.gz') + self.assertRaises(ValueError, P('/').with_suffix, '.gz') + # Invalid suffix + self.assertRaises(ValueError, P('a/b').with_suffix, 'gz') + self.assertRaises(ValueError, P('a/b').with_suffix, '/') + self.assertRaises(ValueError, P('a/b').with_suffix, '.') + self.assertRaises(ValueError, P('a/b').with_suffix, '/.gz') + self.assertRaises(ValueError, P('a/b').with_suffix, 'c/d') + self.assertRaises(ValueError, P('a/b').with_suffix, '.c/.d') + self.assertRaises(ValueError, P('a/b').with_suffix, './.d') + self.assertRaises(ValueError, P('a/b').with_suffix, '.d/.') + + def test_relative_to_common(self): + P = self.cls + p = P('a/b') + self.assertRaises(TypeError, p.relative_to) + if six.PY3: + self.assertRaises(TypeError, p.relative_to, b'a') + self.assertEqual(p.relative_to(P()), P('a/b')) + self.assertEqual(p.relative_to(''), P('a/b')) + self.assertEqual(p.relative_to(P('a')), P('b')) + self.assertEqual(p.relative_to('a'), P('b')) + self.assertEqual(p.relative_to('a/'), P('b')) + self.assertEqual(p.relative_to(P('a/b')), P()) + self.assertEqual(p.relative_to('a/b'), P()) + # With several args + self.assertEqual(p.relative_to('a', 'b'), P()) + # Unrelated paths + self.assertRaises(ValueError, p.relative_to, P('c')) + self.assertRaises(ValueError, p.relative_to, P('a/b/c')) + self.assertRaises(ValueError, p.relative_to, P('a/c')) + self.assertRaises(ValueError, p.relative_to, P('/a')) + p = P('/a/b') + self.assertEqual(p.relative_to(P('/')), P('a/b')) + self.assertEqual(p.relative_to('/'), P('a/b')) + self.assertEqual(p.relative_to(P('/a')), P('b')) + self.assertEqual(p.relative_to('/a'), P('b')) + self.assertEqual(p.relative_to('/a/'), P('b')) + self.assertEqual(p.relative_to(P('/a/b')), P()) + self.assertEqual(p.relative_to('/a/b'), P()) + # Unrelated paths + self.assertRaises(ValueError, p.relative_to, P('/c')) + self.assertRaises(ValueError, p.relative_to, P('/a/b/c')) + self.assertRaises(ValueError, p.relative_to, P('/a/c')) + self.assertRaises(ValueError, p.relative_to, P()) + self.assertRaises(ValueError, p.relative_to, '') + self.assertRaises(ValueError, p.relative_to, P('a')) + + def test_pickling_common(self): + P = self.cls + p = P('/a/b') + for proto in range(0, pickle.HIGHEST_PROTOCOL + 1): + dumped = pickle.dumps(p, proto) + pp = pickle.loads(dumped) + self.assertIs(pp.__class__, p.__class__) + self.assertEqual(pp, p) + self.assertEqual(hash(pp), hash(p)) + self.assertEqual(str(pp), str(p)) + + # note: this is a new test not part of upstream + # test that unicode works on Python 2 + @unittest.skipIf( + six.unichr(0x0100).encode( + sys.getfilesystemencoding(), "replace") == b"?", + "file system encoding only supports ascii") + def test_unicode(self): + self.cls(six.unichr(0x0100)) + + +class PurePosixPathTest(_BasePurePathTest, unittest.TestCase): + cls = pathlib.PurePosixPath + + def test_root(self): + P = self.cls + self.assertEqual(P('/a/b').root, '/') + self.assertEqual(P('///a/b').root, '/') + # POSIX special case for two leading slashes + self.assertEqual(P('//a/b').root, '//') + + def test_eq(self): + P = self.cls + self.assertNotEqual(P('a/b'), P('A/b')) + self.assertEqual(P('/a'), P('///a')) + self.assertNotEqual(P('/a'), P('//a')) + + def test_as_uri(self): + P = self.cls + self.assertEqual(P('/').as_uri(), 'file:///') + self.assertEqual(P('/a/b.c').as_uri(), 'file:///a/b.c') + self.assertEqual(P('/a/b%#c').as_uri(), 'file:///a/b%25%23c') + + @with_fsencode + def test_as_uri_non_ascii(self): + from urllib.parse import quote_from_bytes + P = self.cls + try: + os.fsencode('\xe9') + except UnicodeEncodeError: + self.skipTest("\\xe9 cannot be encoded to the filesystem encoding") + self.assertEqual(P('/a/b\xe9').as_uri(), + 'file:///a/b' + quote_from_bytes(os.fsencode('\xe9'))) + + def test_match(self): + P = self.cls + self.assertFalse(P('A.py').match('a.PY')) + + def test_is_absolute(self): + P = self.cls + self.assertFalse(P().is_absolute()) + self.assertFalse(P('a').is_absolute()) + self.assertFalse(P('a/b/').is_absolute()) + self.assertTrue(P('/').is_absolute()) + self.assertTrue(P('/a').is_absolute()) + self.assertTrue(P('/a/b/').is_absolute()) + self.assertTrue(P('//a').is_absolute()) + self.assertTrue(P('//a/b').is_absolute()) + + def test_is_reserved(self): + P = self.cls + self.assertIs(False, P('').is_reserved()) + self.assertIs(False, P('/').is_reserved()) + self.assertIs(False, P('/foo/bar').is_reserved()) + self.assertIs(False, P('/dev/con/PRN/NUL').is_reserved()) + + def test_join(self): + P = self.cls + p = P('//a') + pp = p.joinpath('b') + self.assertEqual(pp, P('//a/b')) + pp = P('/a').joinpath('//c') + self.assertEqual(pp, P('//c')) + pp = P('//a').joinpath('/c') + self.assertEqual(pp, P('/c')) + + def test_div(self): + # Basically the same as joinpath() + P = self.cls + p = P('//a') + pp = p / 'b' + self.assertEqual(pp, P('//a/b')) + pp = P('/a') / '//c' + self.assertEqual(pp, P('//c')) + pp = P('//a') / '/c' + self.assertEqual(pp, P('/c')) + + +class PureWindowsPathTest(_BasePurePathTest, unittest.TestCase): + cls = pathlib.PureWindowsPath + + equivalences = _BasePurePathTest.equivalences.copy() + equivalences.update({ + 'c:a': [('c:', 'a'), ('c:', 'a/'), ('/', 'c:', 'a')], + 'c:/a': [ + ('c:/', 'a'), ('c:', '/', 'a'), ('c:', '/a'), + ('/z', 'c:/', 'a'), ('//x/y', 'c:/', 'a'), + ], + '//a/b/': [('//a/b',)], + '//a/b/c': [ + ('//a/b', 'c'), ('//a/b/', 'c'), + ], + }) + + def test_str(self): + p = self.cls('a/b/c') + self.assertEqual(str(p), 'a\\b\\c') + p = self.cls('c:/a/b/c') + self.assertEqual(str(p), 'c:\\a\\b\\c') + p = self.cls('//a/b') + self.assertEqual(str(p), '\\\\a\\b\\') + p = self.cls('//a/b/c') + self.assertEqual(str(p), '\\\\a\\b\\c') + p = self.cls('//a/b/c/d') + self.assertEqual(str(p), '\\\\a\\b\\c\\d') + + def test_str_subclass(self): + self._check_str_subclass('c:') + self._check_str_subclass('c:a') + self._check_str_subclass('c:a\\b.txt') + self._check_str_subclass('c:\\') + self._check_str_subclass('c:\\a') + self._check_str_subclass('c:\\a\\b.txt') + self._check_str_subclass('\\\\some\\share') + self._check_str_subclass('\\\\some\\share\\a') + self._check_str_subclass('\\\\some\\share\\a\\b.txt') + + def test_eq(self): + P = self.cls + self.assertEqual(P('c:a/b'), P('c:a/b')) + self.assertEqual(P('c:a/b'), P('c:', 'a', 'b')) + self.assertNotEqual(P('c:a/b'), P('d:a/b')) + self.assertNotEqual(P('c:a/b'), P('c:/a/b')) + self.assertNotEqual(P('/a/b'), P('c:/a/b')) + # Case-insensitivity + self.assertEqual(P('a/B'), P('A/b')) + self.assertEqual(P('C:a/B'), P('c:A/b')) + self.assertEqual(P('//Some/SHARE/a/B'), P('//somE/share/A/b')) + + @with_fsencode + def test_as_uri(self): + P = self.cls + with self.assertRaises(ValueError): + P('/a/b').as_uri() + with self.assertRaises(ValueError): + P('c:a/b').as_uri() + self.assertEqual(P('c:/').as_uri(), 'file:///c:/') + self.assertEqual(P('c:/a/b.c').as_uri(), 'file:///c:/a/b.c') + self.assertEqual(P('c:/a/b%#c').as_uri(), 'file:///c:/a/b%25%23c') + self.assertEqual(P('c:/a/b\xe9').as_uri(), 'file:///c:/a/b%C3%A9') + self.assertEqual(P('//some/share/').as_uri(), 'file://some/share/') + self.assertEqual(P('//some/share/a/b.c').as_uri(), + 'file://some/share/a/b.c') + self.assertEqual(P('//some/share/a/b%#c\xe9').as_uri(), + 'file://some/share/a/b%25%23c%C3%A9') + + def test_match_common(self): + P = self.cls + # Absolute patterns + self.assertTrue(P('c:/b.py').match('/*.py')) + self.assertTrue(P('c:/b.py').match('c:*.py')) + self.assertTrue(P('c:/b.py').match('c:/*.py')) + self.assertFalse(P('d:/b.py').match('c:/*.py')) # wrong drive + self.assertFalse(P('b.py').match('/*.py')) + self.assertFalse(P('b.py').match('c:*.py')) + self.assertFalse(P('b.py').match('c:/*.py')) + self.assertFalse(P('c:b.py').match('/*.py')) + self.assertFalse(P('c:b.py').match('c:/*.py')) + self.assertFalse(P('/b.py').match('c:*.py')) + self.assertFalse(P('/b.py').match('c:/*.py')) + # UNC patterns + self.assertTrue(P('//some/share/a.py').match('/*.py')) + self.assertTrue(P('//some/share/a.py').match('//some/share/*.py')) + self.assertFalse(P('//other/share/a.py').match('//some/share/*.py')) + self.assertFalse(P('//some/share/a/b.py').match('//some/share/*.py')) + # Case-insensitivity + self.assertTrue(P('B.py').match('b.PY')) + self.assertTrue(P('c:/a/B.Py').match('C:/A/*.pY')) + self.assertTrue(P('//Some/Share/B.Py').match('//somE/sharE/*.pY')) + + def test_ordering_common(self): + # Case-insensitivity + def assertOrderedEqual(a, b): + self.assertLessEqual(a, b) + self.assertGreaterEqual(b, a) + P = self.cls + p = P('c:A/b') + q = P('C:a/B') + assertOrderedEqual(p, q) + self.assertFalse(p < q) + self.assertFalse(p > q) + p = P('//some/Share/A/b') + q = P('//Some/SHARE/a/B') + assertOrderedEqual(p, q) + self.assertFalse(p < q) + self.assertFalse(p > q) + + def test_parts(self): + P = self.cls + p = P('c:a/b') + parts = p.parts + self.assertEqual(parts, ('c:', 'a', 'b')) + p = P('c:/a/b') + parts = p.parts + self.assertEqual(parts, ('c:\\', 'a', 'b')) + p = P('//a/b/c/d') + parts = p.parts + self.assertEqual(parts, ('\\\\a\\b\\', 'c', 'd')) + + def test_parent(self): + # Anchored + P = self.cls + p = P('z:a/b/c') + self.assertEqual(p.parent, P('z:a/b')) + self.assertEqual(p.parent.parent, P('z:a')) + self.assertEqual(p.parent.parent.parent, P('z:')) + self.assertEqual(p.parent.parent.parent.parent, P('z:')) + p = P('z:/a/b/c') + self.assertEqual(p.parent, P('z:/a/b')) + self.assertEqual(p.parent.parent, P('z:/a')) + self.assertEqual(p.parent.parent.parent, P('z:/')) + self.assertEqual(p.parent.parent.parent.parent, P('z:/')) + p = P('//a/b/c/d') + self.assertEqual(p.parent, P('//a/b/c')) + self.assertEqual(p.parent.parent, P('//a/b')) + self.assertEqual(p.parent.parent.parent, P('//a/b')) + + def test_parents(self): + # Anchored + P = self.cls + p = P('z:a/b/') + par = p.parents + self.assertEqual(len(par), 2) + self.assertEqual(par[0], P('z:a')) + self.assertEqual(par[1], P('z:')) + self.assertEqual(list(par), [P('z:a'), P('z:')]) + with self.assertRaises(IndexError): + par[2] + p = P('z:/a/b/') + par = p.parents + self.assertEqual(len(par), 2) + self.assertEqual(par[0], P('z:/a')) + self.assertEqual(par[1], P('z:/')) + self.assertEqual(list(par), [P('z:/a'), P('z:/')]) + with self.assertRaises(IndexError): + par[2] + p = P('//a/b/c/d') + par = p.parents + self.assertEqual(len(par), 2) + self.assertEqual(par[0], P('//a/b/c')) + self.assertEqual(par[1], P('//a/b')) + self.assertEqual(list(par), [P('//a/b/c'), P('//a/b')]) + with self.assertRaises(IndexError): + par[2] + + def test_drive(self): + P = self.cls + self.assertEqual(P('c:').drive, 'c:') + self.assertEqual(P('c:a/b').drive, 'c:') + self.assertEqual(P('c:/').drive, 'c:') + self.assertEqual(P('c:/a/b/').drive, 'c:') + self.assertEqual(P('//a/b').drive, '\\\\a\\b') + self.assertEqual(P('//a/b/').drive, '\\\\a\\b') + self.assertEqual(P('//a/b/c/d').drive, '\\\\a\\b') + + def test_root(self): + P = self.cls + self.assertEqual(P('c:').root, '') + self.assertEqual(P('c:a/b').root, '') + self.assertEqual(P('c:/').root, '\\') + self.assertEqual(P('c:/a/b/').root, '\\') + self.assertEqual(P('//a/b').root, '\\') + self.assertEqual(P('//a/b/').root, '\\') + self.assertEqual(P('//a/b/c/d').root, '\\') + + def test_anchor(self): + P = self.cls + self.assertEqual(P('c:').anchor, 'c:') + self.assertEqual(P('c:a/b').anchor, 'c:') + self.assertEqual(P('c:/').anchor, 'c:\\') + self.assertEqual(P('c:/a/b/').anchor, 'c:\\') + self.assertEqual(P('//a/b').anchor, '\\\\a\\b\\') + self.assertEqual(P('//a/b/').anchor, '\\\\a\\b\\') + self.assertEqual(P('//a/b/c/d').anchor, '\\\\a\\b\\') + + def test_name(self): + P = self.cls + self.assertEqual(P('c:').name, '') + self.assertEqual(P('c:/').name, '') + self.assertEqual(P('c:a/b').name, 'b') + self.assertEqual(P('c:/a/b').name, 'b') + self.assertEqual(P('c:a/b.py').name, 'b.py') + self.assertEqual(P('c:/a/b.py').name, 'b.py') + self.assertEqual(P('//My.py/Share.php').name, '') + self.assertEqual(P('//My.py/Share.php/a/b').name, 'b') + + def test_suffix(self): + P = self.cls + self.assertEqual(P('c:').suffix, '') + self.assertEqual(P('c:/').suffix, '') + self.assertEqual(P('c:a/b').suffix, '') + self.assertEqual(P('c:/a/b').suffix, '') + self.assertEqual(P('c:a/b.py').suffix, '.py') + self.assertEqual(P('c:/a/b.py').suffix, '.py') + self.assertEqual(P('c:a/.hgrc').suffix, '') + self.assertEqual(P('c:/a/.hgrc').suffix, '') + self.assertEqual(P('c:a/.hg.rc').suffix, '.rc') + self.assertEqual(P('c:/a/.hg.rc').suffix, '.rc') + self.assertEqual(P('c:a/b.tar.gz').suffix, '.gz') + self.assertEqual(P('c:/a/b.tar.gz').suffix, '.gz') + self.assertEqual(P('c:a/Some name. Ending with a dot.').suffix, '') + self.assertEqual(P('c:/a/Some name. Ending with a dot.').suffix, '') + self.assertEqual(P('//My.py/Share.php').suffix, '') + self.assertEqual(P('//My.py/Share.php/a/b').suffix, '') + + def test_suffixes(self): + P = self.cls + self.assertEqual(P('c:').suffixes, []) + self.assertEqual(P('c:/').suffixes, []) + self.assertEqual(P('c:a/b').suffixes, []) + self.assertEqual(P('c:/a/b').suffixes, []) + self.assertEqual(P('c:a/b.py').suffixes, ['.py']) + self.assertEqual(P('c:/a/b.py').suffixes, ['.py']) + self.assertEqual(P('c:a/.hgrc').suffixes, []) + self.assertEqual(P('c:/a/.hgrc').suffixes, []) + self.assertEqual(P('c:a/.hg.rc').suffixes, ['.rc']) + self.assertEqual(P('c:/a/.hg.rc').suffixes, ['.rc']) + self.assertEqual(P('c:a/b.tar.gz').suffixes, ['.tar', '.gz']) + self.assertEqual(P('c:/a/b.tar.gz').suffixes, ['.tar', '.gz']) + self.assertEqual(P('//My.py/Share.php').suffixes, []) + self.assertEqual(P('//My.py/Share.php/a/b').suffixes, []) + self.assertEqual(P('c:a/Some name. Ending with a dot.').suffixes, []) + self.assertEqual(P('c:/a/Some name. Ending with a dot.').suffixes, []) + + def test_stem(self): + P = self.cls + self.assertEqual(P('c:').stem, '') + self.assertEqual(P('c:.').stem, '') + self.assertEqual(P('c:..').stem, '..') + self.assertEqual(P('c:/').stem, '') + self.assertEqual(P('c:a/b').stem, 'b') + self.assertEqual(P('c:a/b.py').stem, 'b') + self.assertEqual(P('c:a/.hgrc').stem, '.hgrc') + self.assertEqual(P('c:a/.hg.rc').stem, '.hg') + self.assertEqual(P('c:a/b.tar.gz').stem, 'b.tar') + self.assertEqual(P('c:a/Some name. Ending with a dot.').stem, + 'Some name. Ending with a dot.') + + def test_with_name(self): + P = self.cls + self.assertEqual(P('c:a/b').with_name('d.xml'), P('c:a/d.xml')) + self.assertEqual(P('c:/a/b').with_name('d.xml'), P('c:/a/d.xml')) + self.assertEqual( + P('c:a/Dot ending.').with_name('d.xml'), P('c:a/d.xml')) + self.assertEqual( + P('c:/a/Dot ending.').with_name('d.xml'), P('c:/a/d.xml')) + self.assertRaises(ValueError, P('c:').with_name, 'd.xml') + self.assertRaises(ValueError, P('c:/').with_name, 'd.xml') + self.assertRaises(ValueError, P('//My/Share').with_name, 'd.xml') + self.assertRaises(ValueError, P('c:a/b').with_name, 'd:') + self.assertRaises(ValueError, P('c:a/b').with_name, 'd:e') + self.assertRaises(ValueError, P('c:a/b').with_name, 'd:/e') + self.assertRaises(ValueError, P('c:a/b').with_name, '//My/Share') + + def test_with_suffix(self): + P = self.cls + self.assertEqual(P('c:a/b').with_suffix('.gz'), P('c:a/b.gz')) + self.assertEqual(P('c:/a/b').with_suffix('.gz'), P('c:/a/b.gz')) + self.assertEqual(P('c:a/b.py').with_suffix('.gz'), P('c:a/b.gz')) + self.assertEqual(P('c:/a/b.py').with_suffix('.gz'), P('c:/a/b.gz')) + # Path doesn't have a "filename" component + self.assertRaises(ValueError, P('').with_suffix, '.gz') + self.assertRaises(ValueError, P('.').with_suffix, '.gz') + self.assertRaises(ValueError, P('/').with_suffix, '.gz') + self.assertRaises(ValueError, P('//My/Share').with_suffix, '.gz') + # Invalid suffix + self.assertRaises(ValueError, P('c:a/b').with_suffix, 'gz') + self.assertRaises(ValueError, P('c:a/b').with_suffix, '/') + self.assertRaises(ValueError, P('c:a/b').with_suffix, '\\') + self.assertRaises(ValueError, P('c:a/b').with_suffix, 'c:') + self.assertRaises(ValueError, P('c:a/b').with_suffix, '/.gz') + self.assertRaises(ValueError, P('c:a/b').with_suffix, '\\.gz') + self.assertRaises(ValueError, P('c:a/b').with_suffix, 'c:.gz') + self.assertRaises(ValueError, P('c:a/b').with_suffix, 'c/d') + self.assertRaises(ValueError, P('c:a/b').with_suffix, 'c\\d') + self.assertRaises(ValueError, P('c:a/b').with_suffix, '.c/d') + self.assertRaises(ValueError, P('c:a/b').with_suffix, '.c\\d') + + def test_relative_to(self): + P = self.cls + p = P('C:Foo/Bar') + self.assertEqual(p.relative_to(P('c:')), P('Foo/Bar')) + self.assertEqual(p.relative_to('c:'), P('Foo/Bar')) + self.assertEqual(p.relative_to(P('c:foO')), P('Bar')) + self.assertEqual(p.relative_to('c:foO'), P('Bar')) + self.assertEqual(p.relative_to('c:foO/'), P('Bar')) + self.assertEqual(p.relative_to(P('c:foO/baR')), P()) + self.assertEqual(p.relative_to('c:foO/baR'), P()) + # Unrelated paths + self.assertRaises(ValueError, p.relative_to, P()) + self.assertRaises(ValueError, p.relative_to, '') + self.assertRaises(ValueError, p.relative_to, P('d:')) + self.assertRaises(ValueError, p.relative_to, P('/')) + self.assertRaises(ValueError, p.relative_to, P('Foo')) + self.assertRaises(ValueError, p.relative_to, P('/Foo')) + self.assertRaises(ValueError, p.relative_to, P('C:/Foo')) + self.assertRaises(ValueError, p.relative_to, P('C:Foo/Bar/Baz')) + self.assertRaises(ValueError, p.relative_to, P('C:Foo/Baz')) + p = P('C:/Foo/Bar') + self.assertEqual(p.relative_to(P('c:')), P('/Foo/Bar')) + self.assertEqual(p.relative_to('c:'), P('/Foo/Bar')) + self.assertEqual(str(p.relative_to(P('c:'))), '\\Foo\\Bar') + self.assertEqual(str(p.relative_to('c:')), '\\Foo\\Bar') + self.assertEqual(p.relative_to(P('c:/')), P('Foo/Bar')) + self.assertEqual(p.relative_to('c:/'), P('Foo/Bar')) + self.assertEqual(p.relative_to(P('c:/foO')), P('Bar')) + self.assertEqual(p.relative_to('c:/foO'), P('Bar')) + self.assertEqual(p.relative_to('c:/foO/'), P('Bar')) + self.assertEqual(p.relative_to(P('c:/foO/baR')), P()) + self.assertEqual(p.relative_to('c:/foO/baR'), P()) + # Unrelated paths + self.assertRaises(ValueError, p.relative_to, P('C:/Baz')) + self.assertRaises(ValueError, p.relative_to, P('C:/Foo/Bar/Baz')) + self.assertRaises(ValueError, p.relative_to, P('C:/Foo/Baz')) + self.assertRaises(ValueError, p.relative_to, P('C:Foo')) + self.assertRaises(ValueError, p.relative_to, P('d:')) + self.assertRaises(ValueError, p.relative_to, P('d:/')) + self.assertRaises(ValueError, p.relative_to, P('/')) + self.assertRaises(ValueError, p.relative_to, P('/Foo')) + self.assertRaises(ValueError, p.relative_to, P('//C/Foo')) + # UNC paths + p = P('//Server/Share/Foo/Bar') + self.assertEqual(p.relative_to(P('//sErver/sHare')), P('Foo/Bar')) + self.assertEqual(p.relative_to('//sErver/sHare'), P('Foo/Bar')) + self.assertEqual(p.relative_to('//sErver/sHare/'), P('Foo/Bar')) + self.assertEqual(p.relative_to(P('//sErver/sHare/Foo')), P('Bar')) + self.assertEqual(p.relative_to('//sErver/sHare/Foo'), P('Bar')) + self.assertEqual(p.relative_to('//sErver/sHare/Foo/'), P('Bar')) + self.assertEqual(p.relative_to(P('//sErver/sHare/Foo/Bar')), P()) + self.assertEqual(p.relative_to('//sErver/sHare/Foo/Bar'), P()) + # Unrelated paths + self.assertRaises(ValueError, p.relative_to, P('/Server/Share/Foo')) + self.assertRaises(ValueError, p.relative_to, P('c:/Server/Share/Foo')) + self.assertRaises(ValueError, p.relative_to, P('//z/Share/Foo')) + self.assertRaises(ValueError, p.relative_to, P('//Server/z/Foo')) + + def test_is_absolute(self): + P = self.cls + # Under NT, only paths with both a drive and a root are absolute + self.assertFalse(P().is_absolute()) + self.assertFalse(P('a').is_absolute()) + self.assertFalse(P('a/b/').is_absolute()) + self.assertFalse(P('/').is_absolute()) + self.assertFalse(P('/a').is_absolute()) + self.assertFalse(P('/a/b/').is_absolute()) + self.assertFalse(P('c:').is_absolute()) + self.assertFalse(P('c:a').is_absolute()) + self.assertFalse(P('c:a/b/').is_absolute()) + self.assertTrue(P('c:/').is_absolute()) + self.assertTrue(P('c:/a').is_absolute()) + self.assertTrue(P('c:/a/b/').is_absolute()) + # UNC paths are absolute by definition + self.assertTrue(P('//a/b').is_absolute()) + self.assertTrue(P('//a/b/').is_absolute()) + self.assertTrue(P('//a/b/c').is_absolute()) + self.assertTrue(P('//a/b/c/d').is_absolute()) + + def test_join(self): + P = self.cls + p = P('C:/a/b') + pp = p.joinpath('x/y') + self.assertEqual(pp, P('C:/a/b/x/y')) + pp = p.joinpath('/x/y') + self.assertEqual(pp, P('C:/x/y')) + # Joining with a different drive => the first path is ignored, even + # if the second path is relative. + pp = p.joinpath('D:x/y') + self.assertEqual(pp, P('D:x/y')) + pp = p.joinpath('D:/x/y') + self.assertEqual(pp, P('D:/x/y')) + pp = p.joinpath('//host/share/x/y') + self.assertEqual(pp, P('//host/share/x/y')) + # Joining with the same drive => the first path is appended to if + # the second path is relative. + pp = p.joinpath('c:x/y') + self.assertEqual(pp, P('C:/a/b/x/y')) + pp = p.joinpath('c:/x/y') + self.assertEqual(pp, P('C:/x/y')) + + def test_div(self): + # Basically the same as joinpath() + P = self.cls + p = P('C:/a/b') + self.assertEqual(p / 'x/y', P('C:/a/b/x/y')) + self.assertEqual(p / 'x' / 'y', P('C:/a/b/x/y')) + self.assertEqual(p / '/x/y', P('C:/x/y')) + self.assertEqual(p / '/x' / 'y', P('C:/x/y')) + # Joining with a different drive => the first path is ignored, even + # if the second path is relative. + self.assertEqual(p / 'D:x/y', P('D:x/y')) + self.assertEqual(p / 'D:' / 'x/y', P('D:x/y')) + self.assertEqual(p / 'D:/x/y', P('D:/x/y')) + self.assertEqual(p / 'D:' / '/x/y', P('D:/x/y')) + self.assertEqual(p / '//host/share/x/y', P('//host/share/x/y')) + # Joining with the same drive => the first path is appended to if + # the second path is relative. + self.assertEqual(p / 'c:x/y', P('C:/a/b/x/y')) + self.assertEqual(p / 'c:/x/y', P('C:/x/y')) + + def test_is_reserved(self): + P = self.cls + self.assertIs(False, P('').is_reserved()) + self.assertIs(False, P('/').is_reserved()) + self.assertIs(False, P('/foo/bar').is_reserved()) + self.assertIs(True, P('con').is_reserved()) + self.assertIs(True, P('NUL').is_reserved()) + self.assertIs(True, P('NUL.txt').is_reserved()) + self.assertIs(True, P('com1').is_reserved()) + self.assertIs(True, P('com9.bar').is_reserved()) + self.assertIs(False, P('bar.com9').is_reserved()) + self.assertIs(True, P('lpt1').is_reserved()) + self.assertIs(True, P('lpt9.bar').is_reserved()) + self.assertIs(False, P('bar.lpt9').is_reserved()) + # Only the last component matters + self.assertIs(False, P('c:/NUL/con/baz').is_reserved()) + # UNC paths are never reserved + self.assertIs(False, P('//my/share/nul/con/aux').is_reserved()) + + +class PurePathTest(_BasePurePathTest, unittest.TestCase): + cls = pathlib.PurePath + + def test_concrete_class(self): + p = self.cls('a') + self.assertIs( + type(p), + pathlib.PureWindowsPath + if os.name == 'nt' else pathlib.PurePosixPath) + + def test_different_flavours_unequal(self): + p = pathlib.PurePosixPath('a') + q = pathlib.PureWindowsPath('a') + self.assertNotEqual(p, q) + + @unittest.skipIf(sys.version_info < (3, 0), + 'Most types are orderable in Python 2') + def test_different_flavours_unordered(self): + p = pathlib.PurePosixPath('a') + q = pathlib.PureWindowsPath('a') + with self.assertRaises(TypeError): + p < q + with self.assertRaises(TypeError): + p <= q + with self.assertRaises(TypeError): + p > q + with self.assertRaises(TypeError): + p >= q + + +# +# Tests for the concrete classes +# + +# Make sure any symbolic links in the base test path are resolved +BASE = os.path.realpath(TESTFN) + + +def join(*x): + return os.path.join(BASE, *x) + + +def rel_join(*x): + return os.path.join(TESTFN, *x) + + +only_nt = unittest.skipIf(os.name != 'nt', + 'test requires a Windows-compatible system') +only_posix = unittest.skipIf(os.name == 'nt', + 'test requires a POSIX-compatible system') + + +@only_posix +class PosixPathAsPureTest(PurePosixPathTest): + cls = pathlib.PosixPath + + +@only_nt +class WindowsPathAsPureTest(PureWindowsPathTest): + cls = pathlib.WindowsPath + + def test_owner(self): + P = self.cls + with self.assertRaises(NotImplementedError): + P('c:/').owner() + + def test_group(self): + P = self.cls + with self.assertRaises(NotImplementedError): + P('c:/').group() + + +class _BasePathTest(object): + """Tests for the FS-accessing functionalities of the Path classes.""" + + # (BASE) + # | + # |-- brokenLink -> non-existing + # |-- dirA + # | `-- linkC -> ../dirB + # |-- dirB + # | |-- fileB + # | `-- linkD -> ../dirB + # |-- dirC + # | |-- dirD + # | | `-- fileD + # | `-- fileC + # |-- dirE # No permissions + # |-- fileA + # |-- linkA -> fileA + # `-- linkB -> dirB + # + + def setUp(self): + def cleanup(): + os.chmod(join('dirE'), 0o777) + support.rmtree(BASE) + self.addCleanup(cleanup) + os.mkdir(BASE) + os.mkdir(join('dirA')) + os.mkdir(join('dirB')) + os.mkdir(join('dirC')) + os.mkdir(join('dirC', 'dirD')) + os.mkdir(join('dirE')) + with open(join('fileA'), 'wb') as f: + f.write(b"this is file A\n") + with open(join('dirB', 'fileB'), 'wb') as f: + f.write(b"this is file B\n") + with open(join('dirC', 'fileC'), 'wb') as f: + f.write(b"this is file C\n") + with open(join('dirC', 'dirD', 'fileD'), 'wb') as f: + f.write(b"this is file D\n") + os.chmod(join('dirE'), 0) + if support_can_symlink(): + # Relative symlinks + os.symlink('fileA', join('linkA')) + os.symlink('non-existing', join('brokenLink')) + self.dirlink('dirB', join('linkB')) + self.dirlink(os.path.join('..', 'dirB'), join('dirA', 'linkC')) + # This one goes upwards, creating a loop + self.dirlink(os.path.join('..', 'dirB'), join('dirB', 'linkD')) + + if os.name == 'nt': + # Workaround for http://bugs.python.org/issue13772 + def dirlink(self, src, dest): + os.symlink(src, dest, target_is_directory=True) + else: + def dirlink(self, src, dest): + os.symlink(src, dest) + + def assertSame(self, path_a, path_b): + self.assertTrue(os.path.samefile(str(path_a), str(path_b)), + "%r and %r don't point to the same file" % + (path_a, path_b)) + + def assertFileNotFound(self, func, *args, **kwargs): + if sys.version_info >= (3, 3): + with self.assertRaises(FileNotFoundError) as cm: + func(*args, **kwargs) + else: + with self.assertRaises(OSError) as cm: + # Python 2.6 kludge for http://bugs.python.org/issue7853 + try: + func(*args, **kwargs) + except: # noqa: E722 + raise + self.assertEqual(cm.exception.errno, errno.ENOENT) + + def assertFileExists(self, func, *args, **kwargs): + if sys.version_info >= (3, 3): + with self.assertRaises(FileExistsError) as cm: + func(*args, **kwargs) + else: + with self.assertRaises(OSError) as cm: + # Python 2.6 kludge for http://bugs.python.org/issue7853 + try: + func(*args, **kwargs) + except: # noqa: E722 + raise + self.assertEqual(cm.exception.errno, errno.EEXIST) + + def _test_cwd(self, p): + q = self.cls(os.getcwd()) + self.assertEqual(p, q) + self.assertEqual(str(p), str(q)) + self.assertIs(type(p), type(q)) + self.assertTrue(p.is_absolute()) + + def test_cwd(self): + p = self.cls.cwd() + self._test_cwd(p) + + def _test_home(self, p): + q = self.cls(os.path.expanduser('~')) + self.assertEqual(p, q) + self.assertEqual(str(p), str(q)) + self.assertIs(type(p), type(q)) + self.assertTrue(p.is_absolute()) + + def test_home(self): + p = self.cls.home() + self._test_home(p) + + def test_samefile(self): + fileA_path = os.path.join(BASE, 'fileA') + fileB_path = os.path.join(BASE, 'dirB', 'fileB') + p = self.cls(fileA_path) + pp = self.cls(fileA_path) + q = self.cls(fileB_path) + self.assertTrue(p.samefile(fileA_path)) + self.assertTrue(p.samefile(pp)) + self.assertFalse(p.samefile(fileB_path)) + self.assertFalse(p.samefile(q)) + # Test the non-existent file case + non_existent = os.path.join(BASE, 'foo') + r = self.cls(non_existent) + self.assertFileNotFound(p.samefile, r) + self.assertFileNotFound(p.samefile, non_existent) + self.assertFileNotFound(r.samefile, p) + self.assertFileNotFound(r.samefile, non_existent) + self.assertFileNotFound(r.samefile, r) + self.assertFileNotFound(r.samefile, non_existent) + + def test_empty_path(self): + # The empty path points to '.' + p = self.cls('') + self.assertEqual(p.stat(), os.stat('.')) + + def test_expanduser_common(self): + P = self.cls + p = P('~') + self.assertEqual(p.expanduser(), P(os.path.expanduser('~'))) + p = P('foo') + self.assertEqual(p.expanduser(), p) + p = P('/~') + self.assertEqual(p.expanduser(), p) + p = P('../~') + self.assertEqual(p.expanduser(), p) + p = P(P('').absolute().anchor) / '~' + self.assertEqual(p.expanduser(), p) + + def test_exists(self): + P = self.cls + p = P(BASE) + self.assertIs(True, p.exists()) + self.assertIs(True, (p / 'dirA').exists()) + self.assertIs(True, (p / 'fileA').exists()) + self.assertIs(False, (p / 'fileA' / 'bah').exists()) + if support_can_symlink(): + self.assertIs(True, (p / 'linkA').exists()) + self.assertIs(True, (p / 'linkB').exists()) + self.assertIs(True, (p / 'linkB' / 'fileB').exists()) + self.assertIs(False, (p / 'linkA' / 'bah').exists()) + self.assertIs(False, (p / 'foo').exists()) + self.assertIs(False, P('/xyzzy').exists()) + + def test_open_common(self): + p = self.cls(BASE) + with (p / 'fileA').open('r') as f: + self.assertIsInstance(f, io.TextIOBase) + self.assertEqual(f.read(), "this is file A\n") + with (p / 'fileA').open('rb') as f: + self.assertIsInstance(f, io.BufferedIOBase) + self.assertEqual(f.read().strip(), b"this is file A") + with (p / 'fileA').open('rb', buffering=0) as f: + self.assertIsInstance(f, io.RawIOBase) + self.assertEqual(f.read().strip(), b"this is file A") + + def test_read_write_bytes(self): + p = self.cls(BASE) + (p / 'fileA').write_bytes(b'abcdefg') + self.assertEqual((p / 'fileA').read_bytes(), b'abcdefg') + # check that trying to write str does not truncate the file + with self.assertRaises(TypeError) as cm: + (p / 'fileA').write_bytes(six.u('somestr')) + self.assertTrue(str(cm.exception).startswith('data must be')) + self.assertEqual((p / 'fileA').read_bytes(), b'abcdefg') + + def test_read_write_text(self): + p = self.cls(BASE) + (p / 'fileA').write_text(six.u('\u00e4bcdefg'), encoding='latin-1') + self.assertEqual((p / 'fileA').read_text( + encoding='utf-8', errors='ignore'), six.u('bcdefg')) + # check that trying to write bytes does not truncate the file + with self.assertRaises(TypeError) as cm: + (p / 'fileA').write_text(b'somebytes') + self.assertTrue(str(cm.exception).startswith('data must be')) + self.assertEqual((p / 'fileA').read_text(encoding='latin-1'), + six.u('\u00e4bcdefg')) + + def test_iterdir(self): + P = self.cls + p = P(BASE) + it = p.iterdir() + paths = set(it) + expected = ['dirA', 'dirB', 'dirC', 'dirE', 'fileA'] + if support_can_symlink(): + expected += ['linkA', 'linkB', 'brokenLink'] + self.assertEqual(paths, set(P(BASE, q) for q in expected)) + + @support_skip_unless_symlink + def test_iterdir_symlink(self): + # __iter__ on a symlink to a directory + P = self.cls + p = P(BASE, 'linkB') + paths = set(p.iterdir()) + expected = set(P(BASE, 'linkB', q) for q in ['fileB', 'linkD']) + self.assertEqual(paths, expected) + + def test_iterdir_nodir(self): + # __iter__ on something that is not a directory + p = self.cls(BASE, 'fileA') + with self.assertRaises(OSError) as cm: + # Python 2.6 kludge for http://bugs.python.org/issue7853 + try: + next(p.iterdir()) + except: # noqa: E722s + raise + # ENOENT or EINVAL under Windows, ENOTDIR otherwise + # (see issue #12802) + self.assertIn(cm.exception.errno, (errno.ENOTDIR, + errno.ENOENT, errno.EINVAL)) + + def test_glob_common(self): + def _check(glob, expected): + self.assertEqual(set(glob), set(P(BASE, q) for q in expected)) + P = self.cls + p = P(BASE) + it = p.glob("fileA") + self.assertIsInstance(it, collections_abc.Iterator) + _check(it, ["fileA"]) + _check(p.glob("fileB"), []) + _check(p.glob("dir*/file*"), ["dirB/fileB", "dirC/fileC"]) + if not support_can_symlink(): + _check(p.glob("*A"), ['dirA', 'fileA']) + else: + _check(p.glob("*A"), ['dirA', 'fileA', 'linkA']) + if not support_can_symlink(): + _check(p.glob("*B/*"), ['dirB/fileB']) + else: + _check(p.glob("*B/*"), ['dirB/fileB', 'dirB/linkD', + 'linkB/fileB', 'linkB/linkD']) + if not support_can_symlink(): + _check(p.glob("*/fileB"), ['dirB/fileB']) + else: + _check(p.glob("*/fileB"), ['dirB/fileB', 'linkB/fileB']) + + def test_rglob_common(self): + def _check(glob, expected): + self.assertEqual(set(glob), set(P(BASE, q) for q in expected)) + P = self.cls + p = P(BASE) + it = p.rglob("fileA") + self.assertIsInstance(it, collections_abc.Iterator) + _check(it, ["fileA"]) + _check(p.rglob("fileB"), ["dirB/fileB"]) + _check(p.rglob("*/fileA"), []) + if not support_can_symlink(): + _check(p.rglob("*/fileB"), ["dirB/fileB"]) + else: + _check(p.rglob("*/fileB"), ["dirB/fileB", "dirB/linkD/fileB", + "linkB/fileB", "dirA/linkC/fileB"]) + _check(p.rglob("file*"), ["fileA", "dirB/fileB", + "dirC/fileC", "dirC/dirD/fileD"]) + p = P(BASE, "dirC") + _check(p.rglob("file*"), ["dirC/fileC", "dirC/dirD/fileD"]) + _check(p.rglob("*/*"), ["dirC/dirD/fileD"]) + + @support_skip_unless_symlink + def test_rglob_symlink_loop(self): + # Don't get fooled by symlink loops (Issue #26012) + P = self.cls + p = P(BASE) + given = set(p.rglob('*')) + expect = set([ + 'brokenLink', + 'dirA', 'dirA/linkC', + 'dirB', 'dirB/fileB', 'dirB/linkD', + 'dirC', 'dirC/dirD', 'dirC/dirD/fileD', 'dirC/fileC', + 'dirE', + 'fileA', + 'linkA', + 'linkB', + ]) + self.assertEqual(given, set([p / x for x in expect])) + + def test_glob_dotdot(self): + # ".." is not special in globs + P = self.cls + p = P(BASE) + self.assertEqual(set(p.glob("..")), set([P(BASE, "..")])) + self.assertEqual(set(p.glob("dirA/../file*")), + set([P(BASE, "dirA/../fileA")])) + self.assertEqual(set(p.glob("../xyzzy")), set()) + + def _check_resolve(self, p, expected, strict=True): + q = p.resolve(strict) + self.assertEqual(q, expected) + + # this can be used to check both relative and absolute resolutions + _check_resolve_relative = _check_resolve_absolute = _check_resolve + + @support_skip_unless_symlink + def test_resolve_common(self): + P = self.cls + p = P(BASE, 'foo') + with self.assertRaises(OSError) as cm: + p.resolve(strict=True) + self.assertEqual(cm.exception.errno, errno.ENOENT) + # Non-strict + self.assertEqual(str(p.resolve(strict=False)), + os.path.join(BASE, 'foo')) + p = P(BASE, 'foo', 'in', 'spam') + self.assertEqual(str(p.resolve(strict=False)), + os.path.join(BASE, 'foo', 'in', 'spam')) + p = P(BASE, '..', 'foo', 'in', 'spam') + self.assertEqual(str(p.resolve(strict=False)), + os.path.abspath(os.path.join('foo', 'in', 'spam'))) + # These are all relative symlinks + p = P(BASE, 'dirB', 'fileB') + self._check_resolve_relative(p, p) + p = P(BASE, 'linkA') + self._check_resolve_relative(p, P(BASE, 'fileA')) + p = P(BASE, 'dirA', 'linkC', 'fileB') + self._check_resolve_relative(p, P(BASE, 'dirB', 'fileB')) + p = P(BASE, 'dirB', 'linkD', 'fileB') + self._check_resolve_relative(p, P(BASE, 'dirB', 'fileB')) + # Non-strict + p = P(BASE, 'dirA', 'linkC', 'fileB', 'foo', 'in', 'spam') + self._check_resolve_relative(p, P(BASE, 'dirB', 'fileB', 'foo', 'in', + 'spam'), False) + p = P(BASE, 'dirA', 'linkC', '..', 'foo', 'in', 'spam') + if os.name == 'nt': + # In Windows, if linkY points to dirB, 'dirA\linkY\..' + # resolves to 'dirA' without resolving linkY first. + self._check_resolve_relative(p, P(BASE, 'dirA', 'foo', 'in', + 'spam'), False) + else: + # In Posix, if linkY points to dirB, 'dirA/linkY/..' + # resolves to 'dirB/..' first before resolving to parent of dirB. + self._check_resolve_relative( + p, P(BASE, 'foo', 'in', 'spam'), False) + # Now create absolute symlinks + d = tempfile.mkdtemp(suffix='-dirD') + self.addCleanup(support.rmtree, d) + os.symlink(os.path.join(d), join('dirA', 'linkX')) + os.symlink(join('dirB'), os.path.join(d, 'linkY')) + p = P(BASE, 'dirA', 'linkX', 'linkY', 'fileB') + self._check_resolve_absolute(p, P(BASE, 'dirB', 'fileB')) + # Non-strict + p = P(BASE, 'dirA', 'linkX', 'linkY', 'foo', 'in', 'spam') + self._check_resolve_relative(p, P(BASE, 'dirB', 'foo', 'in', 'spam'), + False) + p = P(BASE, 'dirA', 'linkX', 'linkY', '..', 'foo', 'in', 'spam') + if os.name == 'nt': + # In Windows, if linkY points to dirB, 'dirA\linkY\..' + # resolves to 'dirA' without resolving linkY first. + self._check_resolve_relative(p, P(d, 'foo', 'in', 'spam'), False) + else: + # In Posix, if linkY points to dirB, 'dirA/linkY/..' + # resolves to 'dirB/..' first before resolving to parent of dirB. + self._check_resolve_relative( + p, P(BASE, 'foo', 'in', 'spam'), False) + + @support_skip_unless_symlink + def test_resolve_dot(self): + # See https://bitbucket.org/pitrou/pathlib/issue/9/ + # pathresolve-fails-on-complex-symlinks + p = self.cls(BASE) + self.dirlink('.', join('0')) + self.dirlink(os.path.join('0', '0'), join('1')) + self.dirlink(os.path.join('1', '1'), join('2')) + q = p / '2' + self.assertEqual(q.resolve(strict=True), p) + r = q / '3' / '4' + self.assertFileNotFound(r.resolve, strict=True) + # Non-strict + self.assertEqual(r.resolve(strict=False), p / '3' / '4') + + def test_with(self): + p = self.cls(BASE) + it = p.iterdir() + it2 = p.iterdir() + next(it2) + with p: + pass + # I/O operation on closed path + self.assertRaises(ValueError, next, it) + self.assertRaises(ValueError, next, it2) + self.assertRaises(ValueError, p.open) + self.assertRaises(ValueError, p.resolve) + self.assertRaises(ValueError, p.absolute) + self.assertRaises(ValueError, p.__enter__) + + def test_chmod(self): + p = self.cls(BASE) / 'fileA' + mode = p.stat().st_mode + # Clear writable bit + new_mode = mode & ~0o222 + p.chmod(new_mode) + self.assertEqual(p.stat().st_mode, new_mode) + # Set writable bit + new_mode = mode | 0o222 + p.chmod(new_mode) + self.assertEqual(p.stat().st_mode, new_mode) + + # XXX also need a test for lchmod + + def test_stat(self): + p = self.cls(BASE) / 'fileA' + st = p.stat() + self.assertEqual(p.stat(), st) + # Change file mode by flipping write bit + p.chmod(st.st_mode ^ 0o222) + self.addCleanup(p.chmod, st.st_mode) + self.assertNotEqual(p.stat(), st) + + @support_skip_unless_symlink + def test_lstat(self): + p = self.cls(BASE) / 'linkA' + st = p.stat() + self.assertNotEqual(st, p.lstat()) + + def test_lstat_nosymlink(self): + p = self.cls(BASE) / 'fileA' + st = p.stat() + self.assertEqual(st, p.lstat()) + + @unittest.skipUnless(pwd, "the pwd module is needed for this test") + def test_owner(self): + p = self.cls(BASE) / 'fileA' + uid = p.stat().st_uid + try: + name = pwd.getpwuid(uid).pw_name + except KeyError: + self.skipTest( + "user %d doesn't have an entry in the system database" % uid) + self.assertEqual(name, p.owner()) + + @unittest.skipUnless(grp, "the grp module is needed for this test") + def test_group(self): + p = self.cls(BASE) / 'fileA' + gid = p.stat().st_gid + try: + name = grp.getgrgid(gid).gr_name + except KeyError: + self.skipTest( + "group %d doesn't have an entry in the system database" % gid) + self.assertEqual(name, p.group()) + + def test_unlink(self): + p = self.cls(BASE) / 'fileA' + p.unlink() + self.assertFileNotFound(p.stat) + self.assertFileNotFound(p.unlink) + + def test_rmdir(self): + p = self.cls(BASE) / 'dirA' + for q in p.iterdir(): + q.unlink() + p.rmdir() + self.assertFileNotFound(p.stat) + self.assertFileNotFound(p.unlink) + + def test_rename(self): + P = self.cls(BASE) + p = P / 'fileA' + size = p.stat().st_size + # Renaming to another path + q = P / 'dirA' / 'fileAA' + p.rename(q) + self.assertEqual(q.stat().st_size, size) + self.assertFileNotFound(p.stat) + # Renaming to a str of a relative path + r = rel_join('fileAAA') + q.rename(r) + self.assertEqual(os.stat(r).st_size, size) + self.assertFileNotFound(q.stat) + + def test_replace(self): + P = self.cls(BASE) + p = P / 'fileA' + if sys.version_info < (3, 3): + self.assertRaises(NotImplementedError, p.replace, p) + return + size = p.stat().st_size + # Replacing a non-existing path + q = P / 'dirA' / 'fileAA' + p.replace(q) + self.assertEqual(q.stat().st_size, size) + self.assertFileNotFound(p.stat) + # Replacing another (existing) path + r = rel_join('dirB', 'fileB') + q.replace(r) + self.assertEqual(os.stat(r).st_size, size) + self.assertFileNotFound(q.stat) + + def test_touch_common(self): + P = self.cls(BASE) + p = P / 'newfileA' + self.assertFalse(p.exists()) + p.touch() + self.assertTrue(p.exists()) + # Rewind the mtime sufficiently far in the past to work around + # filesystem-specific timestamp granularity. + old_mtime = p.stat().st_mtime - 10 + os.utime(str(p), (old_mtime, old_mtime)) + # The file mtime should be refreshed by calling touch() again + p.touch() + self.assertGreaterEqual(p.stat().st_mtime, old_mtime) + # Now with exist_ok=False + p = P / 'newfileB' + self.assertFalse(p.exists()) + p.touch(mode=0o700, exist_ok=False) + self.assertTrue(p.exists()) + self.assertRaises(OSError, p.touch, exist_ok=False) + + def test_touch_nochange(self): + P = self.cls(BASE) + p = P / 'fileA' + p.touch() + with p.open('rb') as f: + self.assertEqual(f.read().strip(), b"this is file A") + + def test_mkdir(self): + P = self.cls(BASE) + p = P / 'newdirA' + self.assertFalse(p.exists()) + p.mkdir() + self.assertTrue(p.exists()) + self.assertTrue(p.is_dir()) + with self.assertRaises(OSError) as cm: + # Python 2.6 kludge for http://bugs.python.org/issue7853 + try: + p.mkdir() + except: # noqa: E722 + raise + self.assertEqual(cm.exception.errno, errno.EEXIST) + + def test_mkdir_parents(self): + # Creating a chain of directories + p = self.cls(BASE, 'newdirB', 'newdirC') + self.assertFalse(p.exists()) + with self.assertRaises(OSError) as cm: + p.mkdir() + self.assertEqual(cm.exception.errno, errno.ENOENT) + p.mkdir(parents=True) + self.assertTrue(p.exists()) + self.assertTrue(p.is_dir()) + with self.assertRaises(OSError) as cm: + p.mkdir(parents=True) + self.assertEqual(cm.exception.errno, errno.EEXIST) + # test `mode` arg + mode = stat.S_IMODE(p.stat().st_mode) # default mode + p = self.cls(BASE, 'newdirD', 'newdirE') + p.mkdir(0o555, parents=True) + self.assertTrue(p.exists()) + self.assertTrue(p.is_dir()) + if os.name != 'nt': + # the directory's permissions follow the mode argument + self.assertEqual(stat.S_IMODE(p.stat().st_mode), 0o7555 & mode) + # the parent's permissions follow the default process settings + self.assertEqual(stat.S_IMODE(p.parent.stat().st_mode), mode) + + def test_mkdir_exist_ok(self): + p = self.cls(BASE, 'dirB') + st_ctime_first = p.stat().st_ctime + self.assertTrue(p.exists()) + self.assertTrue(p.is_dir()) + self.assertFileExists(p.mkdir) + p.mkdir(exist_ok=True) + self.assertTrue(p.exists()) + self.assertEqual(p.stat().st_ctime, st_ctime_first) + + def test_mkdir_exist_ok_with_parent(self): + p = self.cls(BASE, 'dirC') + self.assertTrue(p.exists()) + self.assertFileExists(p.mkdir) + p = p / 'newdirC' + p.mkdir(parents=True) + st_ctime_first = p.stat().st_ctime + self.assertTrue(p.exists()) + self.assertFileExists(p.mkdir, parents=True) + p.mkdir(parents=True, exist_ok=True) + self.assertTrue(p.exists()) + self.assertEqual(p.stat().st_ctime, st_ctime_first) + + def test_mkdir_exist_ok_root(self): + # Issue #25803: A drive root could raise PermissionError on Windows + self.cls('/').resolve().mkdir(exist_ok=True) + self.cls('/').resolve().mkdir(parents=True, exist_ok=True) + + @only_nt # XXX: not sure how to test this on POSIX + def test_mkdir_with_unknown_drive(self): + for d in 'ZYXWVUTSRQPONMLKJIHGFEDCBA': + p = self.cls(d + ':\\') + if not p.is_dir(): + break + else: + self.skipTest("cannot find a drive that doesn't exist") + with self.assertRaises(OSError): + (p / 'child' / 'path').mkdir(parents=True) + + def test_mkdir_with_child_file(self): + p = self.cls(BASE, 'dirB', 'fileB') + self.assertTrue(p.exists()) + # An exception is raised when the last path component is an existing + # regular file, regardless of whether exist_ok is true or not. + self.assertFileExists(p.mkdir, parents=True) + self.assertFileExists(p.mkdir, parents=True, exist_ok=True) + + def test_mkdir_no_parents_file(self): + p = self.cls(BASE, 'fileA') + self.assertTrue(p.exists()) + # An exception is raised when the last path component is an existing + # regular file, regardless of whether exist_ok is true or not. + self.assertFileExists(p.mkdir) + self.assertFileExists(p.mkdir, exist_ok=True) + + def test_mkdir_concurrent_parent_creation(self): + for pattern_num in range(32): + p = self.cls(BASE, 'dirCPC%d' % pattern_num) + self.assertFalse(p.exists()) + + def my_mkdir(path, mode=0o777): + path = str(path) + # Emulate another process that would create the directory + # just before we try to create it ourselves. We do it + # in all possible pattern combinations, assuming that this + # function is called at most 5 times (dirCPC/dir1/dir2, + # dirCPC/dir1, dirCPC, dirCPC/dir1, dirCPC/dir1/dir2). + if pattern.pop(): + os.mkdir(path, mode) # from another process + concurrently_created.add(path) + os.mkdir(path, mode) # our real call + + pattern = [bool(pattern_num & (1 << n)) for n in range(5)] + concurrently_created = set() + p12 = p / 'dir1' / 'dir2' + + def _try_func(): + with mock.patch("pathlib2._normal_accessor.mkdir", my_mkdir): + p12.mkdir(parents=True, exist_ok=False) + + def _exc_func(exc): + self.assertIn(str(p12), concurrently_created) + + def _else_func(): + self.assertNotIn(str(p12), concurrently_created) + + pathlib._try_except_fileexistserror( + _try_func, _exc_func, _else_func) + self.assertTrue(p.exists()) + + @support_skip_unless_symlink + def test_symlink_to(self): + P = self.cls(BASE) + target = P / 'fileA' + # Symlinking a path target + link = P / 'dirA' / 'linkAA' + link.symlink_to(target) + self.assertEqual(link.stat(), target.stat()) + self.assertNotEqual(link.lstat(), target.stat()) + # Symlinking a str target + link = P / 'dirA' / 'linkAAA' + link.symlink_to(str(target)) + self.assertEqual(link.stat(), target.stat()) + self.assertNotEqual(link.lstat(), target.stat()) + self.assertFalse(link.is_dir()) + # Symlinking to a directory + target = P / 'dirB' + link = P / 'dirA' / 'linkAAAA' + link.symlink_to(target, target_is_directory=True) + self.assertEqual(link.stat(), target.stat()) + self.assertNotEqual(link.lstat(), target.stat()) + self.assertTrue(link.is_dir()) + self.assertTrue(list(link.iterdir())) + + def test_is_dir(self): + P = self.cls(BASE) + self.assertTrue((P / 'dirA').is_dir()) + self.assertFalse((P / 'fileA').is_dir()) + self.assertFalse((P / 'non-existing').is_dir()) + self.assertFalse((P / 'fileA' / 'bah').is_dir()) + if support_can_symlink(): + self.assertFalse((P / 'linkA').is_dir()) + self.assertTrue((P / 'linkB').is_dir()) + self.assertFalse((P / 'brokenLink').is_dir()) + + def test_is_file(self): + P = self.cls(BASE) + self.assertTrue((P / 'fileA').is_file()) + self.assertFalse((P / 'dirA').is_file()) + self.assertFalse((P / 'non-existing').is_file()) + self.assertFalse((P / 'fileA' / 'bah').is_file()) + if support_can_symlink(): + self.assertTrue((P / 'linkA').is_file()) + self.assertFalse((P / 'linkB').is_file()) + self.assertFalse((P / 'brokenLink').is_file()) + + def test_is_symlink(self): + P = self.cls(BASE) + self.assertFalse((P / 'fileA').is_symlink()) + self.assertFalse((P / 'dirA').is_symlink()) + self.assertFalse((P / 'non-existing').is_symlink()) + self.assertFalse((P / 'fileA' / 'bah').is_symlink()) + if support_can_symlink(): + self.assertTrue((P / 'linkA').is_symlink()) + self.assertTrue((P / 'linkB').is_symlink()) + self.assertTrue((P / 'brokenLink').is_symlink()) + + def test_is_fifo_false(self): + P = self.cls(BASE) + self.assertFalse((P / 'fileA').is_fifo()) + self.assertFalse((P / 'dirA').is_fifo()) + self.assertFalse((P / 'non-existing').is_fifo()) + self.assertFalse((P / 'fileA' / 'bah').is_fifo()) + + @unittest.skipUnless(hasattr(os, "mkfifo"), "os.mkfifo() required") + @unittest.skipIf(android_not_root, "mkfifo not allowed, non root user") + def test_is_fifo_true(self): + P = self.cls(BASE, 'myfifo') + os.mkfifo(str(P)) + self.assertTrue(P.is_fifo()) + self.assertFalse(P.is_socket()) + self.assertFalse(P.is_file()) + + def test_is_socket_false(self): + P = self.cls(BASE) + self.assertFalse((P / 'fileA').is_socket()) + self.assertFalse((P / 'dirA').is_socket()) + self.assertFalse((P / 'non-existing').is_socket()) + self.assertFalse((P / 'fileA' / 'bah').is_socket()) + + @unittest.skipUnless(hasattr(socket, "AF_UNIX"), "Unix sockets required") + def test_is_socket_true(self): + P = self.cls(BASE, 'mysock') + sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + self.addCleanup(sock.close) + try: + sock.bind(str(P)) + except OSError as e: + if "AF_UNIX path too long" in str(e): + self.skipTest("cannot bind Unix socket: " + str(e)) + self.assertTrue(P.is_socket()) + self.assertFalse(P.is_fifo()) + self.assertFalse(P.is_file()) + + def test_is_block_device_false(self): + P = self.cls(BASE) + self.assertFalse((P / 'fileA').is_block_device()) + self.assertFalse((P / 'dirA').is_block_device()) + self.assertFalse((P / 'non-existing').is_block_device()) + self.assertFalse((P / 'fileA' / 'bah').is_block_device()) + + def test_is_char_device_false(self): + P = self.cls(BASE) + self.assertFalse((P / 'fileA').is_char_device()) + self.assertFalse((P / 'dirA').is_char_device()) + self.assertFalse((P / 'non-existing').is_char_device()) + self.assertFalse((P / 'fileA' / 'bah').is_char_device()) + + @only_posix + def test_is_char_device_true(self): + # Under Unix, /dev/null should generally be a char device + P = self.cls('/dev/null') + if not P.exists(): + self.skipTest("/dev/null required") + self.assertTrue(P.is_char_device()) + self.assertFalse(P.is_block_device()) + self.assertFalse(P.is_file()) + + def test_pickling_common(self): + p = self.cls(BASE, 'fileA') + for proto in range(0, pickle.HIGHEST_PROTOCOL + 1): + dumped = pickle.dumps(p, proto) + pp = pickle.loads(dumped) + self.assertEqual(pp.stat(), p.stat()) + + def test_parts_interning(self): + P = self.cls + p = P('/usr/bin/foo') + q = P('/usr/local/bin') + # 'usr' + self.assertIs(p.parts[1], q.parts[1]) + # 'bin' + self.assertIs(p.parts[2], q.parts[3]) + + def _check_complex_symlinks(self, link0_target): + # Test solving a non-looping chain of symlinks (issue #19887) + P = self.cls(BASE) + self.dirlink(os.path.join('link0', 'link0'), join('link1')) + self.dirlink(os.path.join('link1', 'link1'), join('link2')) + self.dirlink(os.path.join('link2', 'link2'), join('link3')) + self.dirlink(link0_target, join('link0')) + + # Resolve absolute paths + p = (P / 'link0').resolve() + self.assertEqual(p, P) + self.assertEqual(str(p), BASE) + p = (P / 'link1').resolve() + self.assertEqual(p, P) + self.assertEqual(str(p), BASE) + p = (P / 'link2').resolve() + self.assertEqual(p, P) + self.assertEqual(str(p), BASE) + p = (P / 'link3').resolve() + self.assertEqual(p, P) + self.assertEqual(str(p), BASE) + + # Resolve relative paths + old_path = os.getcwd() + os.chdir(BASE) + try: + p = self.cls('link0').resolve() + self.assertEqual(p, P) + self.assertEqual(str(p), BASE) + p = self.cls('link1').resolve() + self.assertEqual(p, P) + self.assertEqual(str(p), BASE) + p = self.cls('link2').resolve() + self.assertEqual(p, P) + self.assertEqual(str(p), BASE) + p = self.cls('link3').resolve() + self.assertEqual(p, P) + self.assertEqual(str(p), BASE) + finally: + os.chdir(old_path) + + @support_skip_unless_symlink + def test_complex_symlinks_absolute(self): + self._check_complex_symlinks(BASE) + + @support_skip_unless_symlink + def test_complex_symlinks_relative(self): + self._check_complex_symlinks('.') + + @support_skip_unless_symlink + def test_complex_symlinks_relative_dot_dot(self): + self._check_complex_symlinks(os.path.join('dirA', '..')) + + +class PathTest(_BasePathTest, unittest.TestCase): + cls = pathlib.Path + + def test_concrete_class(self): + p = self.cls('a') + self.assertIs( + type(p), + pathlib.WindowsPath if os.name == 'nt' else pathlib.PosixPath) + + def test_unsupported_flavour(self): + if os.name == 'nt': + self.assertRaises(NotImplementedError, pathlib.PosixPath) + else: + self.assertRaises(NotImplementedError, pathlib.WindowsPath) + + def test_glob_empty_pattern(self): + p = self.cls() + with self.assertRaisesRegex(ValueError, 'Unacceptable pattern'): + list(p.glob('')) + + +@only_posix +class PosixPathTest(_BasePathTest, unittest.TestCase): + cls = pathlib.PosixPath + + def _check_symlink_loop(self, *args): + path = self.cls(*args) + with self.assertRaises(RuntimeError): + print(path.resolve(strict=True)) + + def _check_symlink_loop_nonstrict(self, *args): + path = self.cls(*args) + with self.assertRaises(RuntimeError): + print(path.resolve(strict=False)) + + def test_open_mode(self): + old_mask = os.umask(0) + self.addCleanup(os.umask, old_mask) + p = self.cls(BASE) + with (p / 'new_file').open('wb'): + pass + st = os.stat(join('new_file')) + self.assertEqual(stat.S_IMODE(st.st_mode), 0o666) + os.umask(0o022) + with (p / 'other_new_file').open('wb'): + pass + st = os.stat(join('other_new_file')) + self.assertEqual(stat.S_IMODE(st.st_mode), 0o644) + + def test_touch_mode(self): + old_mask = os.umask(0) + self.addCleanup(os.umask, old_mask) + p = self.cls(BASE) + (p / 'new_file').touch() + st = os.stat(join('new_file')) + self.assertEqual(stat.S_IMODE(st.st_mode), 0o666) + os.umask(0o022) + (p / 'other_new_file').touch() + st = os.stat(join('other_new_file')) + self.assertEqual(stat.S_IMODE(st.st_mode), 0o644) + (p / 'masked_new_file').touch(mode=0o750) + st = os.stat(join('masked_new_file')) + self.assertEqual(stat.S_IMODE(st.st_mode), 0o750) + + @support_skip_unless_symlink + def test_resolve_loop(self): + # Loops with relative symlinks + os.symlink('linkX/inside', join('linkX')) + self._check_symlink_loop(BASE, 'linkX') + os.symlink('linkY', join('linkY')) + self._check_symlink_loop(BASE, 'linkY') + os.symlink('linkZ/../linkZ', join('linkZ')) + self._check_symlink_loop(BASE, 'linkZ') + # Non-strict + self._check_symlink_loop_nonstrict(BASE, 'linkZ', 'foo') + # Loops with absolute symlinks + os.symlink(join('linkU/inside'), join('linkU')) + self._check_symlink_loop(BASE, 'linkU') + os.symlink(join('linkV'), join('linkV')) + self._check_symlink_loop(BASE, 'linkV') + os.symlink(join('linkW/../linkW'), join('linkW')) + self._check_symlink_loop(BASE, 'linkW') + # Non-strict + self._check_symlink_loop_nonstrict(BASE, 'linkW', 'foo') + + def test_glob(self): + P = self.cls + p = P(BASE) + given = set(p.glob("FILEa")) + expect = set() if not support.fs_is_case_insensitive(BASE) else given + self.assertEqual(given, expect) + self.assertEqual(set(p.glob("FILEa*")), set()) + + def test_rglob(self): + P = self.cls + p = P(BASE, "dirC") + given = set(p.rglob("FILEd")) + expect = set() if not support.fs_is_case_insensitive(BASE) else given + self.assertEqual(given, expect) + self.assertEqual(set(p.rglob("FILEd*")), set()) + + @unittest.skipUnless(hasattr(pwd, 'getpwall'), + 'pwd module does not expose getpwall()') + def test_expanduser(self): + P = self.cls + support.import_module('pwd') + import pwd + pwdent = pwd.getpwuid(os.getuid()) + username = pwdent.pw_name + userhome = pwdent.pw_dir.rstrip('/') or '/' + # find arbitrary different user (if exists) + for pwdent in pwd.getpwall(): + othername = pwdent.pw_name + otherhome = pwdent.pw_dir.rstrip('/') + if othername != username and otherhome: + break + + p1 = P('~/Documents') + p2 = P('~' + username + '/Documents') + p3 = P('~' + othername + '/Documents') + p4 = P('../~' + username + '/Documents') + p5 = P('/~' + username + '/Documents') + p6 = P('') + p7 = P('~fakeuser/Documents') + + with support.EnvironmentVarGuard() as env: + env.unset('HOME') + + self.assertEqual(p1.expanduser(), P(userhome) / 'Documents') + self.assertEqual(p2.expanduser(), P(userhome) / 'Documents') + self.assertEqual(p3.expanduser(), P(otherhome) / 'Documents') + self.assertEqual(p4.expanduser(), p4) + self.assertEqual(p5.expanduser(), p5) + self.assertEqual(p6.expanduser(), p6) + self.assertRaises(RuntimeError, p7.expanduser) + + env.set('HOME', '/tmp') + self.assertEqual(p1.expanduser(), P('/tmp/Documents')) + self.assertEqual(p2.expanduser(), P(userhome) / 'Documents') + self.assertEqual(p3.expanduser(), P(otherhome) / 'Documents') + self.assertEqual(p4.expanduser(), p4) + self.assertEqual(p5.expanduser(), p5) + self.assertEqual(p6.expanduser(), p6) + self.assertRaises(RuntimeError, p7.expanduser) + + +@only_nt +class WindowsPathTest(_BasePathTest, unittest.TestCase): + cls = pathlib.WindowsPath + + def test_glob(self): + P = self.cls + p = P(BASE) + self.assertEqual(set(p.glob("FILEa")), set([P(BASE, "fileA")])) + + def test_rglob(self): + P = self.cls + p = P(BASE, "dirC") + self.assertEqual(set(p.rglob("FILEd")), + set([P(BASE, "dirC/dirD/fileD")])) + + def test_expanduser(self): + P = self.cls + with support.EnvironmentVarGuard() as env: + env.unset('HOME') + env.unset('USERPROFILE') + env.unset('HOMEPATH') + env.unset('HOMEDRIVE') + env.set('USERNAME', 'alice') + + # test that the path returns unchanged + p1 = P('~/My Documents') + p2 = P('~alice/My Documents') + p3 = P('~bob/My Documents') + p4 = P('/~/My Documents') + p5 = P('d:~/My Documents') + p6 = P('') + self.assertRaises(RuntimeError, p1.expanduser) + self.assertRaises(RuntimeError, p2.expanduser) + self.assertRaises(RuntimeError, p3.expanduser) + self.assertEqual(p4.expanduser(), p4) + self.assertEqual(p5.expanduser(), p5) + self.assertEqual(p6.expanduser(), p6) + + def check(): + env.unset('USERNAME') + self.assertEqual(p1.expanduser(), + P('C:/Users/alice/My Documents')) + self.assertRaises(KeyError, p2.expanduser) + env.set('USERNAME', 'alice') + self.assertEqual(p2.expanduser(), + P('C:/Users/alice/My Documents')) + self.assertEqual(p3.expanduser(), + P('C:/Users/bob/My Documents')) + self.assertEqual(p4.expanduser(), p4) + self.assertEqual(p5.expanduser(), p5) + self.assertEqual(p6.expanduser(), p6) + + # test the first lookup key in the env vars + env.set('HOME', 'C:\\Users\\alice') + check() + + # test that HOMEPATH is available instead + env.unset('HOME') + env.set('HOMEPATH', 'C:\\Users\\alice') + check() + + env.set('HOMEDRIVE', 'C:\\') + env.set('HOMEPATH', 'Users\\alice') + check() + + env.unset('HOMEDRIVE') + env.unset('HOMEPATH') + env.set('USERPROFILE', 'C:\\Users\\alice') + check() + + +def main(): + unittest.main(__name__) + + +if __name__ == "__main__": + unittest.main() diff --git a/third_party/python/requirements.in b/third_party/python/requirements.in index 119a70fdbfb0..f61cffa76e5d 100644 --- a/third_party/python/requirements.in +++ b/third_party/python/requirements.in @@ -3,6 +3,7 @@ blessings==1.7 jsmin==2.1.0 json-e==2.7.0 mozilla-version==0.3.0 +pathlib2==2.3.2 pip-tools==3.0.0 pipenv==2018.5.18 psutil==5.4.3 diff --git a/third_party/python/requirements.txt b/third_party/python/requirements.txt index 5c3465ce82d8..3a9f3156e7a7 100644 --- a/third_party/python/requirements.txt +++ b/third_party/python/requirements.txt @@ -38,6 +38,9 @@ more-itertools==4.3.0 \ # via pytest mozilla-version==0.3.0 \ --hash=sha256:97f428f6a87f1a0569e03c39e446eeed87c3ec5d8300319d41e8348ef832e8ea +pathlib2==2.3.2 \ + --hash=sha256:8eb170f8d0d61825e09a95b38be068299ddeda82f35e96c3301a8a5e7604cb83 \ + --hash=sha256:d1aa2a11ba7b8f7b21ab852b1fb5afb277e1bb99d5dfc663380b5015c0d80c5a pip-tools==3.0.0 \ --hash=sha256:4a94997602848f77ff02f660c0fcdfeaf316924ebb236c865f9742ce212aa6f9 \ --hash=sha256:e45e5198ce3799068642ebb0e7c9be5520bcff944c0186f79c1199a2759c970a @@ -71,6 +74,19 @@ python-hglib==2.4 \ requests==2.9.1 \ --hash=sha256:113fbba5531a9e34945b7d36b33a084e8ba5d0664b703c81a7c572d91919a5b8 \ --hash=sha256:c577815dd00f1394203fc44eb979724b098f88264a9ef898ee45b8e5e9cf587f +scandir==1.9.0 \ + --hash=sha256:04b8adb105f2ed313a7c2ef0f1cf7aff4871aa7a1883fa4d8c44b5551ab052d6 \ + --hash=sha256:1444134990356c81d12f30e4b311379acfbbcd03e0bab591de2696a3b126d58e \ + --hash=sha256:1b5c314e39f596875e5a95dd81af03730b338c277c54a454226978d5ba95dbb6 \ + --hash=sha256:346619f72eb0ddc4cf355ceffd225fa52506c92a2ff05318cfabd02a144e7c4e \ + --hash=sha256:44975e209c4827fc18a3486f257154d34ec6eaec0f90fef0cca1caa482db7064 \ + --hash=sha256:61859fd7e40b8c71e609c202db5b0c1dbec0d5c7f1449dec2245575bdc866792 \ + --hash=sha256:a5e232a0bf188362fa00123cc0bb842d363a292de7126126df5527b6a369586a \ + --hash=sha256:c14701409f311e7a9b7ec8e337f0815baf7ac95776cc78b419a1e6d49889a383 \ + --hash=sha256:c7708f29d843fc2764310732e41f0ce27feadde453261859ec0fca7865dfc41b \ + --hash=sha256:c9009c527929f6e25604aec39b0a43c3f831d2947d89d6caaab22f057b7055c8 \ + --hash=sha256:f5c71e29b4e2af7ccdc03a020c626ede51da471173b4a6ad1e904f2b2e04b4bd \ + # via pathlib2 six==1.10.0 \ --hash=sha256:0ff78c403d9bccf5a425a6d31a12aa6b47f1c21ca4dc2573a7e2f32a97335eb1 \ --hash=sha256:105f8d68616f8248e24bf0e9372ef04d3cc10104f1980f54d57b2ce73a5ad56a diff --git a/third_party/python/scandir/LICENSE.txt b/third_party/python/scandir/LICENSE.txt new file mode 100644 index 000000000000..0759f503f213 --- /dev/null +++ b/third_party/python/scandir/LICENSE.txt @@ -0,0 +1,27 @@ +Copyright (c) 2012, Ben Hoyt +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +* Neither the name of Ben Hoyt nor the names of its contributors may be used +to endorse or promote products derived from this software without specific +prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/third_party/python/scandir/MANIFEST.in b/third_party/python/scandir/MANIFEST.in new file mode 100644 index 000000000000..7524c5385a6c --- /dev/null +++ b/third_party/python/scandir/MANIFEST.in @@ -0,0 +1,6 @@ +include *.py +include *.c +include *.h +include *.txt +include *.rst +include test/*.py diff --git a/third_party/python/scandir/PKG-INFO b/third_party/python/scandir/PKG-INFO new file mode 100644 index 000000000000..e4625662b17a --- /dev/null +++ b/third_party/python/scandir/PKG-INFO @@ -0,0 +1,238 @@ +Metadata-Version: 1.1 +Name: scandir +Version: 1.9.0 +Summary: scandir, a better directory iterator and faster os.walk() +Home-page: https://github.com/benhoyt/scandir +Author: Ben Hoyt +Author-email: benhoyt@gmail.com +License: New BSD License +Description-Content-Type: UNKNOWN +Description: + scandir, a better directory iterator and faster os.walk() + ========================================================= + + .. image:: https://img.shields.io/pypi/v/scandir.svg + :target: https://pypi.python.org/pypi/scandir + :alt: scandir on PyPI (Python Package Index) + + .. image:: https://travis-ci.org/benhoyt/scandir.svg?branch=master + :target: https://travis-ci.org/benhoyt/scandir + :alt: Travis CI tests (Linux) + + .. image:: https://ci.appveyor.com/api/projects/status/github/benhoyt/scandir?branch=master&svg=true + :target: https://ci.appveyor.com/project/benhoyt/scandir + :alt: Appveyor tests (Windows) + + + ``scandir()`` is a directory iteration function like ``os.listdir()``, + except that instead of returning a list of bare filenames, it yields + ``DirEntry`` objects that include file type and stat information along + with the name. Using ``scandir()`` increases the speed of ``os.walk()`` + by 2-20 times (depending on the platform and file system) by avoiding + unnecessary calls to ``os.stat()`` in most cases. + + + Now included in a Python near you! + ---------------------------------- + + ``scandir`` has been included in the Python 3.5 standard library as + ``os.scandir()``, and the related performance improvements to + ``os.walk()`` have also been included. So if you're lucky enough to be + using Python 3.5 (release date September 13, 2015) you get the benefit + immediately, otherwise just + `download this module from PyPI `_, + install it with ``pip install scandir``, and then do something like + this in your code: + + .. code-block:: python + + # Use the built-in version of scandir/walk if possible, otherwise + # use the scandir module version + try: + from os import scandir, walk + except ImportError: + from scandir import scandir, walk + + `PEP 471 `_, which is the + PEP that proposes including ``scandir`` in the Python standard library, + was `accepted `_ + in July 2014 by Victor Stinner, the BDFL-delegate for the PEP. + + This ``scandir`` module is intended to work on Python 2.6+ and Python + 3.2+ (and it has been tested on those versions). + + + Background + ---------- + + Python's built-in ``os.walk()`` is significantly slower than it needs to be, + because -- in addition to calling ``listdir()`` on each directory -- it calls + ``stat()`` on each file to determine whether the filename is a directory or not. + But both ``FindFirstFile`` / ``FindNextFile`` on Windows and ``readdir`` on Linux/OS + X already tell you whether the files returned are directories or not, so + no further ``stat`` system calls are needed. In short, you can reduce the number + of system calls from about 2N to N, where N is the total number of files and + directories in the tree. + + In practice, removing all those extra system calls makes ``os.walk()`` about + **7-50 times as fast on Windows, and about 3-10 times as fast on Linux and Mac OS + X.** So we're not talking about micro-optimizations. See more benchmarks + in the "Benchmarks" section below. + + Somewhat relatedly, many people have also asked for a version of + ``os.listdir()`` that yields filenames as it iterates instead of returning them + as one big list. This improves memory efficiency for iterating very large + directories. + + So as well as a faster ``walk()``, scandir adds a new ``scandir()`` function. + They're pretty easy to use, but see "The API" below for the full docs. + + + Benchmarks + ---------- + + Below are results showing how many times as fast ``scandir.walk()`` is than + ``os.walk()`` on various systems, found by running ``benchmark.py`` with no + arguments: + + ==================== ============== ============= + System version Python version Times as fast + ==================== ============== ============= + Windows 7 64-bit 2.7.7 64-bit 10.4 + Windows 7 64-bit SSD 2.7.7 64-bit 10.3 + Windows 7 64-bit NFS 2.7.6 64-bit 36.8 + Windows 7 64-bit SSD 3.4.1 64-bit 9.9 + Windows 7 64-bit SSD 3.5.0 64-bit 9.5 + CentOS 6.2 64-bit 2.6.6 64-bit 3.9 + Ubuntu 14.04 64-bit 2.7.6 64-bit 5.8 + Mac OS X 10.9.3 2.7.5 64-bit 3.8 + ==================== ============== ============= + + All of the above tests were done using the fast C version of scandir + (source code in ``_scandir.c``). + + Note that the gains are less than the above on smaller directories and greater + on larger directories. This is why ``benchmark.py`` creates a test directory + tree with a standardized size. + + + The API + ------- + + walk() + ~~~~~~ + + The API for ``scandir.walk()`` is exactly the same as ``os.walk()``, so just + `read the Python docs `_. + + scandir() + ~~~~~~~~~ + + The full docs for ``scandir()`` and the ``DirEntry`` objects it yields are + available in the `Python documentation here `_. + But below is a brief summary as well. + + scandir(path='.') -> iterator of DirEntry objects for given path + + Like ``listdir``, ``scandir`` calls the operating system's directory + iteration system calls to get the names of the files in the given + ``path``, but it's different from ``listdir`` in two ways: + + * Instead of returning bare filename strings, it returns lightweight + ``DirEntry`` objects that hold the filename string and provide + simple methods that allow access to the additional data the + operating system may have returned. + + * It returns a generator instead of a list, so that ``scandir`` acts + as a true iterator instead of returning the full list immediately. + + ``scandir()`` yields a ``DirEntry`` object for each file and + sub-directory in ``path``. Just like ``listdir``, the ``'.'`` + and ``'..'`` pseudo-directories are skipped, and the entries are + yielded in system-dependent order. Each ``DirEntry`` object has the + following attributes and methods: + + * ``name``: the entry's filename, relative to the scandir ``path`` + argument (corresponds to the return values of ``os.listdir``) + + * ``path``: the entry's full path name (not necessarily an absolute + path) -- the equivalent of ``os.path.join(scandir_path, entry.name)`` + + * ``is_dir(*, follow_symlinks=True)``: similar to + ``pathlib.Path.is_dir()``, but the return value is cached on the + ``DirEntry`` object; doesn't require a system call in most cases; + don't follow symbolic links if ``follow_symlinks`` is False + + * ``is_file(*, follow_symlinks=True)``: similar to + ``pathlib.Path.is_file()``, but the return value is cached on the + ``DirEntry`` object; doesn't require a system call in most cases; + don't follow symbolic links if ``follow_symlinks`` is False + + * ``is_symlink()``: similar to ``pathlib.Path.is_symlink()``, but the + return value is cached on the ``DirEntry`` object; doesn't require a + system call in most cases + + * ``stat(*, follow_symlinks=True)``: like ``os.stat()``, but the + return value is cached on the ``DirEntry`` object; does not require a + system call on Windows (except for symlinks); don't follow symbolic links + (like ``os.lstat()``) if ``follow_symlinks`` is False + + * ``inode()``: return the inode number of the entry; the return value + is cached on the ``DirEntry`` object + + Here's a very simple example of ``scandir()`` showing use of the + ``DirEntry.name`` attribute and the ``DirEntry.is_dir()`` method: + + .. code-block:: python + + def subdirs(path): + """Yield directory names not starting with '.' under given path.""" + for entry in os.scandir(path): + if not entry.name.startswith('.') and entry.is_dir(): + yield entry.name + + This ``subdirs()`` function will be significantly faster with scandir + than ``os.listdir()`` and ``os.path.isdir()`` on both Windows and POSIX + systems, especially on medium-sized or large directories. + + + Further reading + --------------- + + * `The Python docs for scandir `_ + * `PEP 471 `_, the + (now-accepted) Python Enhancement Proposal that proposed adding + ``scandir`` to the standard library -- a lot of details here, + including rejected ideas and previous discussion + + + Flames, comments, bug reports + ----------------------------- + + Please send flames, comments, and questions about scandir to Ben Hoyt: + + http://benhoyt.com/ + + File bug reports for the version in the Python 3.5 standard library + `here `_, or file bug reports + or feature requests for this module at the GitHub project page: + + https://github.com/benhoyt/scandir + +Platform: UNKNOWN +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: Operating System :: OS Independent +Classifier: License :: OSI Approved :: BSD License +Classifier: Programming Language :: Python +Classifier: Topic :: System :: Filesystems +Classifier: Topic :: System :: Operating System +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 2.6 +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: Implementation :: CPython diff --git a/third_party/python/scandir/README.rst b/third_party/python/scandir/README.rst new file mode 100644 index 000000000000..a5537517dda2 --- /dev/null +++ b/third_party/python/scandir/README.rst @@ -0,0 +1,211 @@ + +scandir, a better directory iterator and faster os.walk() +========================================================= + +.. image:: https://img.shields.io/pypi/v/scandir.svg + :target: https://pypi.python.org/pypi/scandir + :alt: scandir on PyPI (Python Package Index) + +.. image:: https://travis-ci.org/benhoyt/scandir.svg?branch=master + :target: https://travis-ci.org/benhoyt/scandir + :alt: Travis CI tests (Linux) + +.. image:: https://ci.appveyor.com/api/projects/status/github/benhoyt/scandir?branch=master&svg=true + :target: https://ci.appveyor.com/project/benhoyt/scandir + :alt: Appveyor tests (Windows) + + +``scandir()`` is a directory iteration function like ``os.listdir()``, +except that instead of returning a list of bare filenames, it yields +``DirEntry`` objects that include file type and stat information along +with the name. Using ``scandir()`` increases the speed of ``os.walk()`` +by 2-20 times (depending on the platform and file system) by avoiding +unnecessary calls to ``os.stat()`` in most cases. + + +Now included in a Python near you! +---------------------------------- + +``scandir`` has been included in the Python 3.5 standard library as +``os.scandir()``, and the related performance improvements to +``os.walk()`` have also been included. So if you're lucky enough to be +using Python 3.5 (release date September 13, 2015) you get the benefit +immediately, otherwise just +`download this module from PyPI `_, +install it with ``pip install scandir``, and then do something like +this in your code: + +.. code-block:: python + + # Use the built-in version of scandir/walk if possible, otherwise + # use the scandir module version + try: + from os import scandir, walk + except ImportError: + from scandir import scandir, walk + +`PEP 471 `_, which is the +PEP that proposes including ``scandir`` in the Python standard library, +was `accepted `_ +in July 2014 by Victor Stinner, the BDFL-delegate for the PEP. + +This ``scandir`` module is intended to work on Python 2.6+ and Python +3.2+ (and it has been tested on those versions). + + +Background +---------- + +Python's built-in ``os.walk()`` is significantly slower than it needs to be, +because -- in addition to calling ``listdir()`` on each directory -- it calls +``stat()`` on each file to determine whether the filename is a directory or not. +But both ``FindFirstFile`` / ``FindNextFile`` on Windows and ``readdir`` on Linux/OS +X already tell you whether the files returned are directories or not, so +no further ``stat`` system calls are needed. In short, you can reduce the number +of system calls from about 2N to N, where N is the total number of files and +directories in the tree. + +In practice, removing all those extra system calls makes ``os.walk()`` about +**7-50 times as fast on Windows, and about 3-10 times as fast on Linux and Mac OS +X.** So we're not talking about micro-optimizations. See more benchmarks +in the "Benchmarks" section below. + +Somewhat relatedly, many people have also asked for a version of +``os.listdir()`` that yields filenames as it iterates instead of returning them +as one big list. This improves memory efficiency for iterating very large +directories. + +So as well as a faster ``walk()``, scandir adds a new ``scandir()`` function. +They're pretty easy to use, but see "The API" below for the full docs. + + +Benchmarks +---------- + +Below are results showing how many times as fast ``scandir.walk()`` is than +``os.walk()`` on various systems, found by running ``benchmark.py`` with no +arguments: + +==================== ============== ============= +System version Python version Times as fast +==================== ============== ============= +Windows 7 64-bit 2.7.7 64-bit 10.4 +Windows 7 64-bit SSD 2.7.7 64-bit 10.3 +Windows 7 64-bit NFS 2.7.6 64-bit 36.8 +Windows 7 64-bit SSD 3.4.1 64-bit 9.9 +Windows 7 64-bit SSD 3.5.0 64-bit 9.5 +CentOS 6.2 64-bit 2.6.6 64-bit 3.9 +Ubuntu 14.04 64-bit 2.7.6 64-bit 5.8 +Mac OS X 10.9.3 2.7.5 64-bit 3.8 +==================== ============== ============= + +All of the above tests were done using the fast C version of scandir +(source code in ``_scandir.c``). + +Note that the gains are less than the above on smaller directories and greater +on larger directories. This is why ``benchmark.py`` creates a test directory +tree with a standardized size. + + +The API +------- + +walk() +~~~~~~ + +The API for ``scandir.walk()`` is exactly the same as ``os.walk()``, so just +`read the Python docs `_. + +scandir() +~~~~~~~~~ + +The full docs for ``scandir()`` and the ``DirEntry`` objects it yields are +available in the `Python documentation here `_. +But below is a brief summary as well. + + scandir(path='.') -> iterator of DirEntry objects for given path + +Like ``listdir``, ``scandir`` calls the operating system's directory +iteration system calls to get the names of the files in the given +``path``, but it's different from ``listdir`` in two ways: + +* Instead of returning bare filename strings, it returns lightweight + ``DirEntry`` objects that hold the filename string and provide + simple methods that allow access to the additional data the + operating system may have returned. + +* It returns a generator instead of a list, so that ``scandir`` acts + as a true iterator instead of returning the full list immediately. + +``scandir()`` yields a ``DirEntry`` object for each file and +sub-directory in ``path``. Just like ``listdir``, the ``'.'`` +and ``'..'`` pseudo-directories are skipped, and the entries are +yielded in system-dependent order. Each ``DirEntry`` object has the +following attributes and methods: + +* ``name``: the entry's filename, relative to the scandir ``path`` + argument (corresponds to the return values of ``os.listdir``) + +* ``path``: the entry's full path name (not necessarily an absolute + path) -- the equivalent of ``os.path.join(scandir_path, entry.name)`` + +* ``is_dir(*, follow_symlinks=True)``: similar to + ``pathlib.Path.is_dir()``, but the return value is cached on the + ``DirEntry`` object; doesn't require a system call in most cases; + don't follow symbolic links if ``follow_symlinks`` is False + +* ``is_file(*, follow_symlinks=True)``: similar to + ``pathlib.Path.is_file()``, but the return value is cached on the + ``DirEntry`` object; doesn't require a system call in most cases; + don't follow symbolic links if ``follow_symlinks`` is False + +* ``is_symlink()``: similar to ``pathlib.Path.is_symlink()``, but the + return value is cached on the ``DirEntry`` object; doesn't require a + system call in most cases + +* ``stat(*, follow_symlinks=True)``: like ``os.stat()``, but the + return value is cached on the ``DirEntry`` object; does not require a + system call on Windows (except for symlinks); don't follow symbolic links + (like ``os.lstat()``) if ``follow_symlinks`` is False + +* ``inode()``: return the inode number of the entry; the return value + is cached on the ``DirEntry`` object + +Here's a very simple example of ``scandir()`` showing use of the +``DirEntry.name`` attribute and the ``DirEntry.is_dir()`` method: + +.. code-block:: python + + def subdirs(path): + """Yield directory names not starting with '.' under given path.""" + for entry in os.scandir(path): + if not entry.name.startswith('.') and entry.is_dir(): + yield entry.name + +This ``subdirs()`` function will be significantly faster with scandir +than ``os.listdir()`` and ``os.path.isdir()`` on both Windows and POSIX +systems, especially on medium-sized or large directories. + + +Further reading +--------------- + +* `The Python docs for scandir `_ +* `PEP 471 `_, the + (now-accepted) Python Enhancement Proposal that proposed adding + ``scandir`` to the standard library -- a lot of details here, + including rejected ideas and previous discussion + + +Flames, comments, bug reports +----------------------------- + +Please send flames, comments, and questions about scandir to Ben Hoyt: + +http://benhoyt.com/ + +File bug reports for the version in the Python 3.5 standard library +`here `_, or file bug reports +or feature requests for this module at the GitHub project page: + +https://github.com/benhoyt/scandir diff --git a/third_party/python/scandir/_scandir.c b/third_party/python/scandir/_scandir.c new file mode 100644 index 000000000000..b35f17041d08 --- /dev/null +++ b/third_party/python/scandir/_scandir.c @@ -0,0 +1,1833 @@ +/* C speedups for scandir module + +This is divided into four sections (each prefixed with a "SECTION:" +comment): + +1) Python 2/3 compatibility +2) Helper utilities from posixmodule.c, fileutils.h, etc +3) SECTION: Main DirEntry and scandir implementation, taken from + Python 3.5's posixmodule.c +4) Module and method definitions and initialization code + +*/ + +#include +#include +#include +#include "osdefs.h" + +#ifdef MS_WINDOWS +#include +#include "winreparse.h" +#else +#include +#ifndef HAVE_DIRENT_H +#define HAVE_DIRENT_H 1 +#endif +#endif + +#define MODNAME "scandir" + + +/* SECTION: Python 2/3 compatibility */ + +#if PY_MAJOR_VERSION >= 3 +#define INIT_ERROR return NULL +#else +#define INIT_ERROR return +// Because on PyPy, Py_FileSystemDefaultEncoding is (was) defined to be NULL +// (see PyPy Bitbucket issue #2669) +#define FS_ENCODING (Py_FileSystemDefaultEncoding ? Py_FileSystemDefaultEncoding : "UTF-8") +#endif + +#if PY_MAJOR_VERSION < 3 || PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION <= 2 +#define _Py_IDENTIFIER(name) static char * PyId_##name = #name; +#define _PyObject_GetAttrId(obj, pyid_name) PyObject_GetAttrString((obj), *(pyid_name)) +#define PyExc_FileNotFoundError PyExc_OSError +#define PyUnicode_AsUnicodeAndSize(unicode, addr_length) \ + PyUnicode_AsUnicode(unicode); *(addr_length) = PyUnicode_GetSize(unicode) +#endif + +// Because on PyPy not working without +#if PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION > 2 && defined(PYPY_VERSION_NUM) +#define _Py_IDENTIFIER(name) static char * PyId_##name = #name; +#define _PyObject_GetAttrId(obj, pyid_name) PyObject_GetAttrString((obj), *(pyid_name)) +#endif + +/* SECTION: Helper utilities from posixmodule.c, fileutils.h, etc */ + +#if !defined(MS_WINDOWS) && defined(DT_UNKNOWN) +#define HAVE_DIRENT_D_TYPE 1 +#endif + +#ifdef HAVE_DIRENT_H +#include +#define NAMLEN(dirent) strlen((dirent)->d_name) +#else +#if defined(__WATCOMC__) && !defined(__QNX__) +#include +#define NAMLEN(dirent) strlen((dirent)->d_name) +#else +#define dirent direct +#define NAMLEN(dirent) (dirent)->d_namlen +#endif +#ifdef HAVE_SYS_NDIR_H +#include +#endif +#ifdef HAVE_SYS_DIR_H +#include +#endif +#ifdef HAVE_NDIR_H +#include +#endif +#endif + +#ifndef Py_CLEANUP_SUPPORTED +#define Py_CLEANUP_SUPPORTED 0x20000 +#endif + +#ifndef S_IFLNK +/* Windows doesn't define S_IFLNK but posixmodule.c maps + * IO_REPARSE_TAG_SYMLINK to S_IFLNK */ +# define S_IFLNK 0120000 +#endif + +// _Py_stat_struct is already defined in fileutils.h on Python 3.5+ +// But not in PyPy +#if PY_MAJOR_VERSION < 3 || (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION < 5) || defined(PYPY_VERSION_NUM) +#ifdef MS_WINDOWS +struct _Py_stat_struct { + unsigned long st_dev; + unsigned __int64 st_ino; + unsigned short st_mode; + int st_nlink; + int st_uid; + int st_gid; + unsigned long st_rdev; + __int64 st_size; + time_t st_atime; + int st_atime_nsec; + time_t st_mtime; + int st_mtime_nsec; + time_t st_ctime; + int st_ctime_nsec; + unsigned long st_file_attributes; +}; +#else +# define _Py_stat_struct stat +#endif +#endif + +/* choose the appropriate stat and fstat functions and return structs */ +#undef STAT +#undef FSTAT +#undef STRUCT_STAT +#ifdef MS_WINDOWS +# define STAT win32_stat +# define LSTAT win32_lstat +# define FSTAT _Py_fstat_noraise +# define STRUCT_STAT struct _Py_stat_struct +#else +# define STAT stat +# define LSTAT lstat +# define FSTAT fstat +# define STRUCT_STAT struct stat +#endif + +#ifdef MS_WINDOWS + +static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */ + +static void +FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out) +{ + /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */ + /* Cannot simply cast and dereference in_ptr, + since it might not be aligned properly */ + __int64 in; + memcpy(&in, in_ptr, sizeof(in)); + *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */ + *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t); +} + +/* Below, we *know* that ugo+r is 0444 */ +#if _S_IREAD != 0400 +#error Unsupported C library +#endif +static int +attributes_to_mode(DWORD attr) +{ + int m = 0; + if (attr & FILE_ATTRIBUTE_DIRECTORY) + m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */ + else + m |= _S_IFREG; + if (attr & FILE_ATTRIBUTE_READONLY) + m |= 0444; + else + m |= 0666; + return m; +} + +void +_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag, + struct _Py_stat_struct *result) +{ + memset(result, 0, sizeof(*result)); + result->st_mode = attributes_to_mode(info->dwFileAttributes); + result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow; + result->st_dev = info->dwVolumeSerialNumber; + result->st_rdev = result->st_dev; + FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec); + FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec); + FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec); + result->st_nlink = info->nNumberOfLinks; + result->st_ino = (((unsigned __int64)info->nFileIndexHigh)<<32) + info->nFileIndexLow; + if (reparse_tag == IO_REPARSE_TAG_SYMLINK) { + /* first clear the S_IFMT bits */ + result->st_mode ^= (result->st_mode & S_IFMT); + /* now set the bits that make this a symlink */ + result->st_mode |= S_IFLNK; + } + result->st_file_attributes = info->dwFileAttributes; +} + +static BOOL +get_target_path(HANDLE hdl, wchar_t **target_path) +{ + int buf_size, result_length; + wchar_t *buf; + + /* We have a good handle to the target, use it to determine + the target path name (then we'll call lstat on it). */ + buf_size = GetFinalPathNameByHandleW(hdl, 0, 0, + VOLUME_NAME_DOS); + if(!buf_size) + return FALSE; + + buf = PyMem_New(wchar_t, buf_size+1); + if (!buf) { + SetLastError(ERROR_OUTOFMEMORY); + return FALSE; + } + + result_length = GetFinalPathNameByHandleW(hdl, + buf, buf_size, VOLUME_NAME_DOS); + + if(!result_length) { + PyMem_Free(buf); + return FALSE; + } + + if(!CloseHandle(hdl)) { + PyMem_Free(buf); + return FALSE; + } + + buf[result_length] = 0; + + *target_path = buf; + return TRUE; +} + +static int +win32_get_reparse_tag(HANDLE reparse_point_handle, ULONG *reparse_tag) +{ + char target_buffer[MAXIMUM_REPARSE_DATA_BUFFER_SIZE]; + REPARSE_DATA_BUFFER *rdb = (REPARSE_DATA_BUFFER *)target_buffer; + DWORD n_bytes_returned; + + if (0 == DeviceIoControl( + reparse_point_handle, + FSCTL_GET_REPARSE_POINT, + NULL, 0, /* in buffer */ + target_buffer, sizeof(target_buffer), + &n_bytes_returned, + NULL)) /* we're not using OVERLAPPED_IO */ + return FALSE; + + if (reparse_tag) + *reparse_tag = rdb->ReparseTag; + + return TRUE; +} + +static void +find_data_to_file_info_w(WIN32_FIND_DATAW *pFileData, + BY_HANDLE_FILE_INFORMATION *info, + ULONG *reparse_tag) +{ + memset(info, 0, sizeof(*info)); + info->dwFileAttributes = pFileData->dwFileAttributes; + info->ftCreationTime = pFileData->ftCreationTime; + info->ftLastAccessTime = pFileData->ftLastAccessTime; + info->ftLastWriteTime = pFileData->ftLastWriteTime; + info->nFileSizeHigh = pFileData->nFileSizeHigh; + info->nFileSizeLow = pFileData->nFileSizeLow; +/* info->nNumberOfLinks = 1; */ + if (pFileData->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) + *reparse_tag = pFileData->dwReserved0; + else + *reparse_tag = 0; +} + +static BOOL +attributes_from_dir_w(LPCWSTR pszFile, BY_HANDLE_FILE_INFORMATION *info, ULONG *reparse_tag) +{ + HANDLE hFindFile; + WIN32_FIND_DATAW FileData; + hFindFile = FindFirstFileW(pszFile, &FileData); + if (hFindFile == INVALID_HANDLE_VALUE) + return FALSE; + FindClose(hFindFile); + find_data_to_file_info_w(&FileData, info, reparse_tag); + return TRUE; +} + +static int +win32_xstat_impl_w(const wchar_t *path, struct _Py_stat_struct *result, + BOOL traverse) +{ + int code; + HANDLE hFile, hFile2; + BY_HANDLE_FILE_INFORMATION info; + ULONG reparse_tag = 0; + wchar_t *target_path; + const wchar_t *dot; + + hFile = CreateFileW( + path, + FILE_READ_ATTRIBUTES, /* desired access */ + 0, /* share mode */ + NULL, /* security attributes */ + OPEN_EXISTING, + /* FILE_FLAG_BACKUP_SEMANTICS is required to open a directory */ + /* FILE_FLAG_OPEN_REPARSE_POINT does not follow the symlink. + Because of this, calls like GetFinalPathNameByHandle will return + the symlink path again and not the actual final path. */ + FILE_ATTRIBUTE_NORMAL|FILE_FLAG_BACKUP_SEMANTICS| + FILE_FLAG_OPEN_REPARSE_POINT, + NULL); + + if (hFile == INVALID_HANDLE_VALUE) { + /* Either the target doesn't exist, or we don't have access to + get a handle to it. If the former, we need to return an error. + If the latter, we can use attributes_from_dir. */ + if (GetLastError() != ERROR_SHARING_VIOLATION) + return -1; + /* Could not get attributes on open file. Fall back to + reading the directory. */ + if (!attributes_from_dir_w(path, &info, &reparse_tag)) + /* Very strange. This should not fail now */ + return -1; + if (info.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) { + if (traverse) { + /* Should traverse, but could not open reparse point handle */ + SetLastError(ERROR_SHARING_VIOLATION); + return -1; + } + } + } else { + if (!GetFileInformationByHandle(hFile, &info)) { + CloseHandle(hFile); + return -1; + } + if (info.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) { + if (!win32_get_reparse_tag(hFile, &reparse_tag)) + return -1; + + /* Close the outer open file handle now that we're about to + reopen it with different flags. */ + if (!CloseHandle(hFile)) + return -1; + + if (traverse) { + /* In order to call GetFinalPathNameByHandle we need to open + the file without the reparse handling flag set. */ + hFile2 = CreateFileW( + path, FILE_READ_ATTRIBUTES, FILE_SHARE_READ, + NULL, OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL|FILE_FLAG_BACKUP_SEMANTICS, + NULL); + if (hFile2 == INVALID_HANDLE_VALUE) + return -1; + + if (!get_target_path(hFile2, &target_path)) + return -1; + + code = win32_xstat_impl_w(target_path, result, FALSE); + PyMem_Free(target_path); + return code; + } + } else + CloseHandle(hFile); + } + _Py_attribute_data_to_stat(&info, reparse_tag, result); + + /* Set S_IEXEC if it is an .exe, .bat, ... */ + dot = wcsrchr(path, '.'); + if (dot) { + if (_wcsicmp(dot, L".bat") == 0 || _wcsicmp(dot, L".cmd") == 0 || + _wcsicmp(dot, L".exe") == 0 || _wcsicmp(dot, L".com") == 0) + result->st_mode |= 0111; + } + return 0; +} + +static int +win32_xstat_w(const wchar_t *path, struct _Py_stat_struct *result, BOOL traverse) +{ + /* Protocol violation: we explicitly clear errno, instead of + setting it to a POSIX error. Callers should use GetLastError. */ + int code = win32_xstat_impl_w(path, result, traverse); + errno = 0; + return code; +} + +static int +win32_lstat_w(const wchar_t* path, struct _Py_stat_struct *result) +{ + return win32_xstat_w(path, result, FALSE); +} + +static int +win32_stat_w(const wchar_t* path, struct _Py_stat_struct *result) +{ + return win32_xstat_w(path, result, TRUE); +} + +#endif /* MS_WINDOWS */ + +static PyTypeObject StatResultType; + +static PyObject *billion = NULL; + +static newfunc structseq_new; + +static PyObject * +statresult_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyStructSequence *result; + int i; + + result = (PyStructSequence*)structseq_new(type, args, kwds); + if (!result) + return NULL; + /* If we have been initialized from a tuple, + st_?time might be set to None. Initialize it + from the int slots. */ + for (i = 7; i <= 9; i++) { + if (result->ob_item[i+3] == Py_None) { + Py_DECREF(Py_None); + Py_INCREF(result->ob_item[i]); + result->ob_item[i+3] = result->ob_item[i]; + } + } + return (PyObject*)result; +} + +/* If true, st_?time is float. */ +static int _stat_float_times = 1; + +static void +fill_time(PyObject *v, int index, time_t sec, unsigned long nsec) +{ +#if SIZEOF_TIME_T > SIZEOF_LONG + PyObject *s = PyLong_FromLongLong((PY_LONG_LONG)sec); +#else +#if PY_MAJOR_VERSION >= 3 + PyObject *s = PyLong_FromLong((long)sec); +#else + PyObject *s = PyInt_FromLong((long)sec); +#endif +#endif + PyObject *ns_fractional = PyLong_FromUnsignedLong(nsec); + PyObject *s_in_ns = NULL; + PyObject *ns_total = NULL; + PyObject *float_s = NULL; + + if (!(s && ns_fractional)) + goto exit; + + s_in_ns = PyNumber_Multiply(s, billion); + if (!s_in_ns) + goto exit; + + ns_total = PyNumber_Add(s_in_ns, ns_fractional); + if (!ns_total) + goto exit; + + if (_stat_float_times) { + float_s = PyFloat_FromDouble(sec + 1e-9*nsec); + if (!float_s) + goto exit; + } + else { + float_s = s; + Py_INCREF(float_s); + } + + PyStructSequence_SET_ITEM(v, index, s); + PyStructSequence_SET_ITEM(v, index+3, float_s); + PyStructSequence_SET_ITEM(v, index+6, ns_total); + s = NULL; + float_s = NULL; + ns_total = NULL; +exit: + Py_XDECREF(s); + Py_XDECREF(ns_fractional); + Py_XDECREF(s_in_ns); + Py_XDECREF(ns_total); + Py_XDECREF(float_s); +} + +#ifdef MS_WINDOWS +#define HAVE_STAT_NSEC 1 +#define HAVE_STRUCT_STAT_ST_FILE_ATTRIBUTES 1 +#endif + +#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE +#define ST_BLKSIZE_IDX 16 +#else +#define ST_BLKSIZE_IDX 15 +#endif + +#ifdef HAVE_STRUCT_STAT_ST_BLOCKS +#define ST_BLOCKS_IDX (ST_BLKSIZE_IDX+1) +#else +#define ST_BLOCKS_IDX ST_BLKSIZE_IDX +#endif + +#ifdef HAVE_STRUCT_STAT_ST_RDEV +#define ST_RDEV_IDX (ST_BLOCKS_IDX+1) +#else +#define ST_RDEV_IDX ST_BLOCKS_IDX +#endif + +#ifdef HAVE_STRUCT_STAT_ST_FLAGS +#define ST_FLAGS_IDX (ST_RDEV_IDX+1) +#else +#define ST_FLAGS_IDX ST_RDEV_IDX +#endif + +#ifdef HAVE_STRUCT_STAT_ST_GEN +#define ST_GEN_IDX (ST_FLAGS_IDX+1) +#else +#define ST_GEN_IDX ST_FLAGS_IDX +#endif + +#ifdef HAVE_STRUCT_STAT_ST_BIRTHTIME +#define ST_BIRTHTIME_IDX (ST_GEN_IDX+1) +#else +#define ST_BIRTHTIME_IDX ST_GEN_IDX +#endif + +#ifdef HAVE_STRUCT_STAT_ST_FILE_ATTRIBUTES +#define ST_FILE_ATTRIBUTES_IDX (ST_BIRTHTIME_IDX+1) +#else +#define ST_FILE_ATTRIBUTES_IDX ST_BIRTHTIME_IDX +#endif + +#ifdef HAVE_LONG_LONG +# define _PyLong_FromDev PyLong_FromLongLong +#else +# define _PyLong_FromDev PyLong_FromLong +#endif + +#ifndef MS_WINDOWS +PyObject * +_PyLong_FromUid(uid_t uid) +{ + if (uid == (uid_t)-1) + return PyLong_FromLong(-1); + return PyLong_FromUnsignedLong(uid); +} + +PyObject * +_PyLong_FromGid(gid_t gid) +{ + if (gid == (gid_t)-1) + return PyLong_FromLong(-1); + return PyLong_FromUnsignedLong(gid); +} +#endif + +/* pack a system stat C structure into the Python stat tuple + (used by posix_stat() and posix_fstat()) */ +static PyObject* +_pystat_fromstructstat(STRUCT_STAT *st) +{ + unsigned long ansec, mnsec, cnsec; + PyObject *v = PyStructSequence_New(&StatResultType); + if (v == NULL) + return NULL; + + PyStructSequence_SET_ITEM(v, 0, PyLong_FromLong((long)st->st_mode)); +#ifdef HAVE_LARGEFILE_SUPPORT + PyStructSequence_SET_ITEM(v, 1, + PyLong_FromUnsignedLongLong(st->st_ino)); +#else + PyStructSequence_SET_ITEM(v, 1, PyLong_FromUnsignedLong((unsigned long)st->st_ino)); +#endif +#ifdef MS_WINDOWS + PyStructSequence_SET_ITEM(v, 2, PyLong_FromUnsignedLong(st->st_dev)); +#else + PyStructSequence_SET_ITEM(v, 2, _PyLong_FromDev(st->st_dev)); +#endif + PyStructSequence_SET_ITEM(v, 3, PyLong_FromLong((long)st->st_nlink)); +#if defined(MS_WINDOWS) + PyStructSequence_SET_ITEM(v, 4, PyLong_FromLong(0)); + PyStructSequence_SET_ITEM(v, 5, PyLong_FromLong(0)); +#else + PyStructSequence_SET_ITEM(v, 4, _PyLong_FromUid(st->st_uid)); + PyStructSequence_SET_ITEM(v, 5, _PyLong_FromGid(st->st_gid)); +#endif +#ifdef HAVE_LARGEFILE_SUPPORT + PyStructSequence_SET_ITEM(v, 6, + PyLong_FromLongLong((PY_LONG_LONG)st->st_size)); +#else + PyStructSequence_SET_ITEM(v, 6, PyLong_FromLong(st->st_size)); +#endif + +#if defined(HAVE_STAT_TV_NSEC) + ansec = st->st_atim.tv_nsec; + mnsec = st->st_mtim.tv_nsec; + cnsec = st->st_ctim.tv_nsec; +#elif defined(HAVE_STAT_TV_NSEC2) + ansec = st->st_atimespec.tv_nsec; + mnsec = st->st_mtimespec.tv_nsec; + cnsec = st->st_ctimespec.tv_nsec; +#elif defined(HAVE_STAT_NSEC) + ansec = st->st_atime_nsec; + mnsec = st->st_mtime_nsec; + cnsec = st->st_ctime_nsec; +#else + ansec = mnsec = cnsec = 0; +#endif + fill_time(v, 7, st->st_atime, ansec); + fill_time(v, 8, st->st_mtime, mnsec); + fill_time(v, 9, st->st_ctime, cnsec); + +#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE + PyStructSequence_SET_ITEM(v, ST_BLKSIZE_IDX, + PyLong_FromLong((long)st->st_blksize)); +#endif +#ifdef HAVE_STRUCT_STAT_ST_BLOCKS + PyStructSequence_SET_ITEM(v, ST_BLOCKS_IDX, + PyLong_FromLong((long)st->st_blocks)); +#endif +#ifdef HAVE_STRUCT_STAT_ST_RDEV + PyStructSequence_SET_ITEM(v, ST_RDEV_IDX, + PyLong_FromLong((long)st->st_rdev)); +#endif +#ifdef HAVE_STRUCT_STAT_ST_GEN + PyStructSequence_SET_ITEM(v, ST_GEN_IDX, + PyLong_FromLong((long)st->st_gen)); +#endif +#ifdef HAVE_STRUCT_STAT_ST_BIRTHTIME + { + PyObject *val; + unsigned long bsec,bnsec; + bsec = (long)st->st_birthtime; +#ifdef HAVE_STAT_TV_NSEC2 + bnsec = st->st_birthtimespec.tv_nsec; +#else + bnsec = 0; +#endif + if (_stat_float_times) { + val = PyFloat_FromDouble(bsec + 1e-9*bnsec); + } else { + val = PyLong_FromLong((long)bsec); + } + PyStructSequence_SET_ITEM(v, ST_BIRTHTIME_IDX, + val); + } +#endif +#ifdef HAVE_STRUCT_STAT_ST_FLAGS + PyStructSequence_SET_ITEM(v, ST_FLAGS_IDX, + PyLong_FromLong((long)st->st_flags)); +#endif +#ifdef HAVE_STRUCT_STAT_ST_FILE_ATTRIBUTES + PyStructSequence_SET_ITEM(v, ST_FILE_ATTRIBUTES_IDX, + PyLong_FromUnsignedLong(st->st_file_attributes)); +#endif + + if (PyErr_Occurred()) { + Py_DECREF(v); + return NULL; + } + + return v; +} + +char *PyStructSequence_UnnamedField = "unnamed field"; + +PyDoc_STRVAR(stat_result__doc__, +"stat_result: Result from stat, fstat, or lstat.\n\n\ +This object may be accessed either as a tuple of\n\ + (mode, ino, dev, nlink, uid, gid, size, atime, mtime, ctime)\n\ +or via the attributes st_mode, st_ino, st_dev, st_nlink, st_uid, and so on.\n\ +\n\ +Posix/windows: If your platform supports st_blksize, st_blocks, st_rdev,\n\ +or st_flags, they are available as attributes only.\n\ +\n\ +See os.stat for more information."); + +static PyStructSequence_Field stat_result_fields[] = { + {"st_mode", "protection bits"}, + {"st_ino", "inode"}, + {"st_dev", "device"}, + {"st_nlink", "number of hard links"}, + {"st_uid", "user ID of owner"}, + {"st_gid", "group ID of owner"}, + {"st_size", "total size, in bytes"}, + /* The NULL is replaced with PyStructSequence_UnnamedField later. */ + {NULL, "integer time of last access"}, + {NULL, "integer time of last modification"}, + {NULL, "integer time of last change"}, + {"st_atime", "time of last access"}, + {"st_mtime", "time of last modification"}, + {"st_ctime", "time of last change"}, + {"st_atime_ns", "time of last access in nanoseconds"}, + {"st_mtime_ns", "time of last modification in nanoseconds"}, + {"st_ctime_ns", "time of last change in nanoseconds"}, +#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE + {"st_blksize", "blocksize for filesystem I/O"}, +#endif +#ifdef HAVE_STRUCT_STAT_ST_BLOCKS + {"st_blocks", "number of blocks allocated"}, +#endif +#ifdef HAVE_STRUCT_STAT_ST_RDEV + {"st_rdev", "device type (if inode device)"}, +#endif +#ifdef HAVE_STRUCT_STAT_ST_FLAGS + {"st_flags", "user defined flags for file"}, +#endif +#ifdef HAVE_STRUCT_STAT_ST_GEN + {"st_gen", "generation number"}, +#endif +#ifdef HAVE_STRUCT_STAT_ST_BIRTHTIME + {"st_birthtime", "time of creation"}, +#endif +#ifdef HAVE_STRUCT_STAT_ST_FILE_ATTRIBUTES + {"st_file_attributes", "Windows file attribute bits"}, +#endif + {0} +}; + +static PyStructSequence_Desc stat_result_desc = { + "scandir.stat_result", /* name */ + stat_result__doc__, /* doc */ + stat_result_fields, + 10 +}; + + +#ifdef MS_WINDOWS +static int +win32_warn_bytes_api() +{ + return PyErr_WarnEx(PyExc_DeprecationWarning, + "The Windows bytes API has been deprecated, " + "use Unicode filenames instead", + 1); +} +#endif + +typedef struct { + const char *function_name; + const char *argument_name; + int nullable; + wchar_t *wide; + char *narrow; + int fd; + Py_ssize_t length; + PyObject *object; + PyObject *cleanup; +} path_t; + +static void +path_cleanup(path_t *path) { + if (path->cleanup) { + Py_CLEAR(path->cleanup); + } +} + +static int +path_converter(PyObject *o, void *p) { + path_t *path = (path_t *)p; + PyObject *unicode, *bytes; + Py_ssize_t length; + char *narrow; + +#define FORMAT_EXCEPTION(exc, fmt) \ + PyErr_Format(exc, "%s%s" fmt, \ + path->function_name ? path->function_name : "", \ + path->function_name ? ": " : "", \ + path->argument_name ? path->argument_name : "path") + + /* Py_CLEANUP_SUPPORTED support */ + if (o == NULL) { + path_cleanup(path); + return 1; + } + + /* ensure it's always safe to call path_cleanup() */ + path->cleanup = NULL; + + if (o == Py_None) { + if (!path->nullable) { + FORMAT_EXCEPTION(PyExc_TypeError, + "can't specify None for %s argument"); + return 0; + } + path->wide = NULL; + path->narrow = NULL; + path->length = 0; + path->object = o; + path->fd = -1; + return 1; + } + + unicode = PyUnicode_FromObject(o); + if (unicode) { +#ifdef MS_WINDOWS + wchar_t *wide; + + wide = PyUnicode_AsUnicodeAndSize(unicode, &length); + if (!wide) { + Py_DECREF(unicode); + return 0; + } + if (length > 32767) { + FORMAT_EXCEPTION(PyExc_ValueError, "%s too long for Windows"); + Py_DECREF(unicode); + return 0; + } + if (wcslen(wide) != length) { + FORMAT_EXCEPTION(PyExc_ValueError, "embedded null character"); + Py_DECREF(unicode); + return 0; + } + + path->wide = wide; + path->narrow = NULL; + path->length = length; + path->object = o; + path->fd = -1; + path->cleanup = unicode; + return Py_CLEANUP_SUPPORTED; +#else +#if PY_MAJOR_VERSION >= 3 + if (!PyUnicode_FSConverter(unicode, &bytes)) + bytes = NULL; +#else + bytes = PyUnicode_AsEncodedString(unicode, FS_ENCODING, "strict"); +#endif + Py_DECREF(unicode); +#endif + } + else { + PyErr_Clear(); +#if PY_MAJOR_VERSION >= 3 + if (PyObject_CheckBuffer(o)) { + bytes = PyBytes_FromObject(o); + } +#else + if (PyString_Check(o)) { + bytes = o; + Py_INCREF(bytes); + } +#endif + else + bytes = NULL; + if (!bytes) { + PyErr_Clear(); + } + } + + if (!bytes) { + if (!PyErr_Occurred()) + FORMAT_EXCEPTION(PyExc_TypeError, "illegal type for %s parameter"); + return 0; + } + +#ifdef MS_WINDOWS + if (win32_warn_bytes_api()) { + Py_DECREF(bytes); + return 0; + } +#endif + + length = PyBytes_GET_SIZE(bytes); +#ifdef MS_WINDOWS + if (length > MAX_PATH-1) { + FORMAT_EXCEPTION(PyExc_ValueError, "%s too long for Windows"); + Py_DECREF(bytes); + return 0; + } +#endif + + narrow = PyBytes_AS_STRING(bytes); + if ((size_t)length != strlen(narrow)) { + FORMAT_EXCEPTION(PyExc_ValueError, "embedded null character in %s"); + Py_DECREF(bytes); + return 0; + } + + path->wide = NULL; + path->narrow = narrow; + path->length = length; + path->object = o; + path->fd = -1; + path->cleanup = bytes; + return Py_CLEANUP_SUPPORTED; +} + +static PyObject * +path_error(path_t *path) +{ +#ifdef MS_WINDOWS + return PyErr_SetExcFromWindowsErrWithFilenameObject(PyExc_OSError, + 0, path->object); +#else + return PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path->object); +#endif +} + + +/* SECTION: Main DirEntry and scandir implementation, taken from + Python 3.5's posixmodule.c */ + +PyDoc_STRVAR(posix_scandir__doc__, +"scandir(path='.') -> iterator of DirEntry objects for given path"); + +static char *follow_symlinks_keywords[] = {"follow_symlinks", NULL}; +#if PY_MAJOR_VERSION >= 3 && PY_MINOR_VERSION >= 3 +static char *follow_symlinks_format = "|$p:DirEntry.stat"; +#else +static char *follow_symlinks_format = "|i:DirEntry.stat"; +#endif + +typedef struct { + PyObject_HEAD + PyObject *name; + PyObject *path; + PyObject *stat; + PyObject *lstat; +#ifdef MS_WINDOWS + struct _Py_stat_struct win32_lstat; + unsigned __int64 win32_file_index; + int got_file_index; +#if PY_MAJOR_VERSION < 3 + int name_path_bytes; +#endif +#else /* POSIX */ +#ifdef HAVE_DIRENT_D_TYPE + unsigned char d_type; +#endif + ino_t d_ino; +#endif +} DirEntry; + +static void +DirEntry_dealloc(DirEntry *entry) +{ + Py_XDECREF(entry->name); + Py_XDECREF(entry->path); + Py_XDECREF(entry->stat); + Py_XDECREF(entry->lstat); + Py_TYPE(entry)->tp_free((PyObject *)entry); +} + +/* Forward reference */ +static int +DirEntry_test_mode(DirEntry *self, int follow_symlinks, unsigned short mode_bits); + +/* Set exception and return -1 on error, 0 for False, 1 for True */ +static int +DirEntry_is_symlink(DirEntry *self) +{ +#ifdef MS_WINDOWS + return (self->win32_lstat.st_mode & S_IFMT) == S_IFLNK; +#elif defined(HAVE_DIRENT_D_TYPE) + /* POSIX */ + if (self->d_type != DT_UNKNOWN) + return self->d_type == DT_LNK; + else + return DirEntry_test_mode(self, 0, S_IFLNK); +#else + /* POSIX without d_type */ + return DirEntry_test_mode(self, 0, S_IFLNK); +#endif +} + +static PyObject * +DirEntry_py_is_symlink(DirEntry *self) +{ + int result; + + result = DirEntry_is_symlink(self); + if (result == -1) + return NULL; + return PyBool_FromLong(result); +} + +static PyObject * +DirEntry_fetch_stat(DirEntry *self, int follow_symlinks) +{ + int result; + struct _Py_stat_struct st; + +#ifdef MS_WINDOWS + wchar_t *path; + + path = PyUnicode_AsUnicode(self->path); + if (!path) + return NULL; + + if (follow_symlinks) + result = win32_stat_w(path, &st); + else + result = win32_lstat_w(path, &st); + + if (result != 0) { + return PyErr_SetExcFromWindowsErrWithFilenameObject(PyExc_OSError, + 0, self->path); + } +#else /* POSIX */ + PyObject *bytes; + char *path; + +#if PY_MAJOR_VERSION >= 3 + if (!PyUnicode_FSConverter(self->path, &bytes)) + return NULL; +#else + if (PyString_Check(self->path)) { + bytes = self->path; + Py_INCREF(bytes); + } else { + bytes = PyUnicode_AsEncodedString(self->path, FS_ENCODING, "strict"); + if (!bytes) + return NULL; + } +#endif + path = PyBytes_AS_STRING(bytes); + + if (follow_symlinks) + result = STAT(path, &st); + else + result = LSTAT(path, &st); + Py_DECREF(bytes); + + if (result != 0) + return PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, self->path); +#endif + + return _pystat_fromstructstat(&st); +} + +static PyObject * +DirEntry_get_lstat(DirEntry *self) +{ + if (!self->lstat) { +#ifdef MS_WINDOWS + self->lstat = _pystat_fromstructstat(&self->win32_lstat); +#else /* POSIX */ + self->lstat = DirEntry_fetch_stat(self, 0); +#endif + } + Py_XINCREF(self->lstat); + return self->lstat; +} + +static PyObject * +DirEntry_get_stat(DirEntry *self, int follow_symlinks) +{ + if (!follow_symlinks) + return DirEntry_get_lstat(self); + + if (!self->stat) { + int result = DirEntry_is_symlink(self); + if (result == -1) + return NULL; + else if (result) + self->stat = DirEntry_fetch_stat(self, 1); + else + self->stat = DirEntry_get_lstat(self); + } + + Py_XINCREF(self->stat); + return self->stat; +} + +static PyObject * +DirEntry_stat(DirEntry *self, PyObject *args, PyObject *kwargs) +{ + int follow_symlinks = 1; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, follow_symlinks_format, + follow_symlinks_keywords, &follow_symlinks)) + return NULL; + + return DirEntry_get_stat(self, follow_symlinks); +} + +/* Set exception and return -1 on error, 0 for False, 1 for True */ +static int +DirEntry_test_mode(DirEntry *self, int follow_symlinks, unsigned short mode_bits) +{ + PyObject *stat = NULL; + PyObject *st_mode = NULL; + long mode; + int result; +#if defined(MS_WINDOWS) || defined(HAVE_DIRENT_D_TYPE) + int is_symlink; + int need_stat; +#endif +#ifdef MS_WINDOWS + unsigned long dir_bits; +#endif + _Py_IDENTIFIER(st_mode); + +#ifdef MS_WINDOWS + is_symlink = (self->win32_lstat.st_mode & S_IFMT) == S_IFLNK; + need_stat = follow_symlinks && is_symlink; +#elif defined(HAVE_DIRENT_D_TYPE) + is_symlink = self->d_type == DT_LNK; + need_stat = self->d_type == DT_UNKNOWN || (follow_symlinks && is_symlink); +#endif + +#if defined(MS_WINDOWS) || defined(HAVE_DIRENT_D_TYPE) + if (need_stat) { +#endif + stat = DirEntry_get_stat(self, follow_symlinks); + if (!stat) { + if (PyErr_ExceptionMatches(PyExc_FileNotFoundError)) { + /* If file doesn't exist (anymore), then return False + (i.e., say it's not a file/directory) */ + PyErr_Clear(); + return 0; + } + goto error; + } + st_mode = _PyObject_GetAttrId(stat, &PyId_st_mode); + if (!st_mode) + goto error; + + mode = PyLong_AsLong(st_mode); + if (mode == -1 && PyErr_Occurred()) + goto error; + Py_CLEAR(st_mode); + Py_CLEAR(stat); + result = (mode & S_IFMT) == mode_bits; +#if defined(MS_WINDOWS) || defined(HAVE_DIRENT_D_TYPE) + } + else if (is_symlink) { + assert(mode_bits != S_IFLNK); + result = 0; + } + else { + assert(mode_bits == S_IFDIR || mode_bits == S_IFREG); +#ifdef MS_WINDOWS + dir_bits = self->win32_lstat.st_file_attributes & FILE_ATTRIBUTE_DIRECTORY; + if (mode_bits == S_IFDIR) + result = dir_bits != 0; + else + result = dir_bits == 0; +#else /* POSIX */ + if (mode_bits == S_IFDIR) + result = self->d_type == DT_DIR; + else + result = self->d_type == DT_REG; +#endif + } +#endif + + return result; + +error: + Py_XDECREF(st_mode); + Py_XDECREF(stat); + return -1; +} + +static PyObject * +DirEntry_py_test_mode(DirEntry *self, int follow_symlinks, unsigned short mode_bits) +{ + int result; + + result = DirEntry_test_mode(self, follow_symlinks, mode_bits); + if (result == -1) + return NULL; + return PyBool_FromLong(result); +} + +static PyObject * +DirEntry_is_dir(DirEntry *self, PyObject *args, PyObject *kwargs) +{ + int follow_symlinks = 1; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, follow_symlinks_format, + follow_symlinks_keywords, &follow_symlinks)) + return NULL; + + return DirEntry_py_test_mode(self, follow_symlinks, S_IFDIR); +} + +static PyObject * +DirEntry_is_file(DirEntry *self, PyObject *args, PyObject *kwargs) +{ + int follow_symlinks = 1; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, follow_symlinks_format, + follow_symlinks_keywords, &follow_symlinks)) + return NULL; + + return DirEntry_py_test_mode(self, follow_symlinks, S_IFREG); +} + +static PyObject * +DirEntry_inode(DirEntry *self) +{ +#ifdef MS_WINDOWS + if (!self->got_file_index) { + wchar_t *path; + struct _Py_stat_struct stat; + + path = PyUnicode_AsUnicode(self->path); + if (!path) + return NULL; + + if (win32_lstat_w(path, &stat) != 0) { + return PyErr_SetExcFromWindowsErrWithFilenameObject(PyExc_OSError, + 0, self->path); + } + + self->win32_file_index = stat.st_ino; + self->got_file_index = 1; + } + return PyLong_FromUnsignedLongLong(self->win32_file_index); +#else /* POSIX */ +#ifdef HAVE_LARGEFILE_SUPPORT + return PyLong_FromUnsignedLongLong(self->d_ino); +#else + return PyLong_FromUnsignedLong((unsigned long)self->d_ino); +#endif +#endif +} + +#if PY_MAJOR_VERSION < 3 && defined(MS_WINDOWS) + +PyObject *DirEntry_name_getter(DirEntry *self, void *closure) { + if (self->name_path_bytes) { + return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(self->name), + PyUnicode_GetSize(self->name), "strict"); + } else { + Py_INCREF(self->name); + return self->name; + } +} + +PyObject *DirEntry_path_getter(DirEntry *self, void *closure) { + if (self->name_path_bytes) { + return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(self->path), + PyUnicode_GetSize(self->path), "strict"); + } else { + Py_INCREF(self->path); + return self->path; + } +} + +static PyGetSetDef DirEntry_getset[] = { + {"name", (getter)DirEntry_name_getter, NULL, + "the entry's base filename, relative to scandir() \"path\" argument", NULL}, + {"path", (getter)DirEntry_path_getter, NULL, + "the entry's full path name; equivalent to os.path.join(scandir_path, entry.name)", NULL}, + {NULL} +}; + +#else + +static PyMemberDef DirEntry_members[] = { + {"name", T_OBJECT_EX, offsetof(DirEntry, name), READONLY, + "the entry's base filename, relative to scandir() \"path\" argument"}, + {"path", T_OBJECT_EX, offsetof(DirEntry, path), READONLY, + "the entry's full path name; equivalent to os.path.join(scandir_path, entry.name)"}, + {NULL} +}; + +#endif + +static PyObject * +DirEntry_repr(DirEntry *self) +{ +#if PY_MAJOR_VERSION >= 3 + return PyUnicode_FromFormat("", self->name); +#elif defined(MS_WINDOWS) + PyObject *name; + PyObject *name_repr; + PyObject *entry_repr; + + name = DirEntry_name_getter(self, NULL); + if (!name) + return NULL; + name_repr = PyObject_Repr(name); + Py_DECREF(name); + if (!name_repr) + return NULL; + entry_repr = PyString_FromFormat("", PyString_AsString(name_repr)); + Py_DECREF(name_repr); + return entry_repr; +#else + PyObject *name_repr; + PyObject *entry_repr; + + name_repr = PyObject_Repr(self->name); + if (!name_repr) + return NULL; + entry_repr = PyString_FromFormat("", PyString_AsString(name_repr)); + Py_DECREF(name_repr); + return entry_repr; +#endif +} + +static PyMethodDef DirEntry_methods[] = { + {"is_dir", (PyCFunction)DirEntry_is_dir, METH_VARARGS | METH_KEYWORDS, + "return True if the entry is a directory; cached per entry" + }, + {"is_file", (PyCFunction)DirEntry_is_file, METH_VARARGS | METH_KEYWORDS, + "return True if the entry is a file; cached per entry" + }, + {"is_symlink", (PyCFunction)DirEntry_py_is_symlink, METH_NOARGS, + "return True if the entry is a symbolic link; cached per entry" + }, + {"stat", (PyCFunction)DirEntry_stat, METH_VARARGS | METH_KEYWORDS, + "return stat_result object for the entry; cached per entry" + }, + {"inode", (PyCFunction)DirEntry_inode, METH_NOARGS, + "return inode of the entry; cached per entry", + }, + {NULL} +}; + +static PyTypeObject DirEntryType = { + PyVarObject_HEAD_INIT(NULL, 0) + MODNAME ".DirEntry", /* tp_name */ + sizeof(DirEntry), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)DirEntry_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + (reprfunc)DirEntry_repr, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + DirEntry_methods, /* tp_methods */ +#if PY_MAJOR_VERSION < 3 && defined(MS_WINDOWS) + NULL, /* tp_members */ + DirEntry_getset, /* tp_getset */ +#else + DirEntry_members, /* tp_members */ + NULL, /* tp_getset */ +#endif +}; + +#ifdef MS_WINDOWS + +static wchar_t * +join_path_filenameW(wchar_t *path_wide, wchar_t* filename) +{ + Py_ssize_t path_len; + Py_ssize_t size; + wchar_t *result; + wchar_t ch; + + if (!path_wide) { /* Default arg: "." */ + path_wide = L"."; + path_len = 1; + } + else { + path_len = wcslen(path_wide); + } + + /* The +1's are for the path separator and the NUL */ + size = path_len + 1 + wcslen(filename) + 1; + result = PyMem_New(wchar_t, size); + if (!result) { + PyErr_NoMemory(); + return NULL; + } + wcscpy(result, path_wide); + if (path_len > 0) { + ch = result[path_len - 1]; + if (ch != SEP && ch != ALTSEP && ch != L':') + result[path_len++] = SEP; + wcscpy(result + path_len, filename); + } + return result; +} + +static PyObject * +DirEntry_from_find_data(path_t *path, WIN32_FIND_DATAW *dataW) +{ + DirEntry *entry; + BY_HANDLE_FILE_INFORMATION file_info; + ULONG reparse_tag; + wchar_t *joined_path; + + entry = PyObject_New(DirEntry, &DirEntryType); + if (!entry) + return NULL; + entry->name = NULL; + entry->path = NULL; + entry->stat = NULL; + entry->lstat = NULL; + entry->got_file_index = 0; +#if PY_MAJOR_VERSION < 3 + entry->name_path_bytes = path->object && PyBytes_Check(path->object); +#endif + + entry->name = PyUnicode_FromWideChar(dataW->cFileName, wcslen(dataW->cFileName)); + if (!entry->name) + goto error; + + joined_path = join_path_filenameW(path->wide, dataW->cFileName); + if (!joined_path) + goto error; + + entry->path = PyUnicode_FromWideChar(joined_path, wcslen(joined_path)); + PyMem_Free(joined_path); + if (!entry->path) + goto error; + + find_data_to_file_info_w(dataW, &file_info, &reparse_tag); + _Py_attribute_data_to_stat(&file_info, reparse_tag, &entry->win32_lstat); + + return (PyObject *)entry; + +error: + Py_DECREF(entry); + return NULL; +} + +#else /* POSIX */ + +static char * +join_path_filename(char *path_narrow, char* filename, Py_ssize_t filename_len) +{ + Py_ssize_t path_len; + Py_ssize_t size; + char *result; + + if (!path_narrow) { /* Default arg: "." */ + path_narrow = "."; + path_len = 1; + } + else { + path_len = strlen(path_narrow); + } + + if (filename_len == -1) + filename_len = strlen(filename); + + /* The +1's are for the path separator and the NUL */ + size = path_len + 1 + filename_len + 1; + result = PyMem_New(char, size); + if (!result) { + PyErr_NoMemory(); + return NULL; + } + strcpy(result, path_narrow); + if (path_len > 0 && result[path_len - 1] != '/') + result[path_len++] = '/'; + strcpy(result + path_len, filename); + return result; +} + +static PyObject * +DirEntry_from_posix_info(path_t *path, char *name, Py_ssize_t name_len, + ino_t d_ino +#ifdef HAVE_DIRENT_D_TYPE + , unsigned char d_type +#endif + ) +{ + DirEntry *entry; + char *joined_path; + + entry = PyObject_New(DirEntry, &DirEntryType); + if (!entry) + return NULL; + entry->name = NULL; + entry->path = NULL; + entry->stat = NULL; + entry->lstat = NULL; + + joined_path = join_path_filename(path->narrow, name, name_len); + if (!joined_path) + goto error; + + if (!path->narrow || !PyBytes_Check(path->object)) { +#if PY_MAJOR_VERSION >= 3 + entry->name = PyUnicode_DecodeFSDefaultAndSize(name, name_len); + entry->path = PyUnicode_DecodeFSDefault(joined_path); +#else + entry->name = PyUnicode_Decode(name, name_len, + FS_ENCODING, "strict"); + entry->path = PyUnicode_Decode(joined_path, strlen(joined_path), + FS_ENCODING, "strict"); +#endif + } + else { + entry->name = PyBytes_FromStringAndSize(name, name_len); + entry->path = PyBytes_FromString(joined_path); + } + PyMem_Free(joined_path); + if (!entry->name || !entry->path) + goto error; + +#ifdef HAVE_DIRENT_D_TYPE + entry->d_type = d_type; +#endif + entry->d_ino = d_ino; + + return (PyObject *)entry; + +error: + Py_XDECREF(entry); + return NULL; +} + +#endif + + +typedef struct { + PyObject_HEAD + path_t path; +#ifdef MS_WINDOWS + HANDLE handle; + WIN32_FIND_DATAW file_data; + int first_time; +#else /* POSIX */ + DIR *dirp; +#endif +} ScandirIterator; + +#ifdef MS_WINDOWS + +static void +ScandirIterator_close(ScandirIterator *iterator) +{ + if (iterator->handle == INVALID_HANDLE_VALUE) + return; + + Py_BEGIN_ALLOW_THREADS + FindClose(iterator->handle); + Py_END_ALLOW_THREADS + iterator->handle = INVALID_HANDLE_VALUE; +} + +static PyObject * +ScandirIterator_iternext(ScandirIterator *iterator) +{ + WIN32_FIND_DATAW *file_data = &iterator->file_data; + BOOL success; + + /* Happens if the iterator is iterated twice */ + if (iterator->handle == INVALID_HANDLE_VALUE) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + while (1) { + if (!iterator->first_time) { + Py_BEGIN_ALLOW_THREADS + success = FindNextFileW(iterator->handle, file_data); + Py_END_ALLOW_THREADS + if (!success) { + if (GetLastError() != ERROR_NO_MORE_FILES) + return path_error(&iterator->path); + /* No more files found in directory, stop iterating */ + break; + } + } + iterator->first_time = 0; + + /* Skip over . and .. */ + if (wcscmp(file_data->cFileName, L".") != 0 && + wcscmp(file_data->cFileName, L"..") != 0) + return DirEntry_from_find_data(&iterator->path, file_data); + + /* Loop till we get a non-dot directory or finish iterating */ + } + + ScandirIterator_close(iterator); + + PyErr_SetNone(PyExc_StopIteration); + return NULL; +} + +#else /* POSIX */ + +static void +ScandirIterator_close(ScandirIterator *iterator) +{ + if (!iterator->dirp) + return; + + Py_BEGIN_ALLOW_THREADS + closedir(iterator->dirp); + Py_END_ALLOW_THREADS + iterator->dirp = NULL; + return; +} + +static PyObject * +ScandirIterator_iternext(ScandirIterator *iterator) +{ + struct dirent *direntp; + Py_ssize_t name_len; + int is_dot; + + /* Happens if the iterator is iterated twice */ + if (!iterator->dirp) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + while (1) { + errno = 0; + Py_BEGIN_ALLOW_THREADS + direntp = readdir(iterator->dirp); + Py_END_ALLOW_THREADS + + if (!direntp) { + if (errno != 0) + return path_error(&iterator->path); + /* No more files found in directory, stop iterating */ + break; + } + + /* Skip over . and .. */ + name_len = NAMLEN(direntp); + is_dot = direntp->d_name[0] == '.' && + (name_len == 1 || (direntp->d_name[1] == '.' && name_len == 2)); + if (!is_dot) { + return DirEntry_from_posix_info(&iterator->path, direntp->d_name, + name_len, direntp->d_ino +#ifdef HAVE_DIRENT_D_TYPE + , direntp->d_type +#endif + ); + } + + /* Loop till we get a non-dot directory or finish iterating */ + } + + ScandirIterator_close(iterator); + + PyErr_SetNone(PyExc_StopIteration); + return NULL; +} + +#endif + +static void +ScandirIterator_dealloc(ScandirIterator *iterator) +{ + ScandirIterator_close(iterator); + Py_XDECREF(iterator->path.object); + path_cleanup(&iterator->path); + Py_TYPE(iterator)->tp_free((PyObject *)iterator); +} + +static PyTypeObject ScandirIteratorType = { + PyVarObject_HEAD_INIT(NULL, 0) + MODNAME ".ScandirIterator", /* tp_name */ + sizeof(ScandirIterator), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)ScandirIterator_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)ScandirIterator_iternext, /* tp_iternext */ +}; + +static PyObject * +posix_scandir(PyObject *self, PyObject *args, PyObject *kwargs) +{ + ScandirIterator *iterator; + static char *keywords[] = {"path", NULL}; +#ifdef MS_WINDOWS + wchar_t *path_strW; +#else + char *path; +#endif + + iterator = PyObject_New(ScandirIterator, &ScandirIteratorType); + if (!iterator) + return NULL; + memset(&iterator->path, 0, sizeof(path_t)); + iterator->path.function_name = "scandir"; + iterator->path.nullable = 1; + +#ifdef MS_WINDOWS + iterator->handle = INVALID_HANDLE_VALUE; +#else + iterator->dirp = NULL; +#endif + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O&:scandir", keywords, + path_converter, &iterator->path)) + goto error; + + /* path_converter doesn't keep path.object around, so do it + manually for the lifetime of the iterator here (the refcount + is decremented in ScandirIterator_dealloc) + */ + Py_XINCREF(iterator->path.object); + +#ifdef MS_WINDOWS + if (iterator->path.narrow) { + PyErr_SetString(PyExc_TypeError, + "os.scandir() doesn't support bytes path on Windows, use Unicode instead"); + goto error; + } + iterator->first_time = 1; + + path_strW = join_path_filenameW(iterator->path.wide, L"*.*"); + if (!path_strW) + goto error; + + Py_BEGIN_ALLOW_THREADS + iterator->handle = FindFirstFileW(path_strW, &iterator->file_data); + Py_END_ALLOW_THREADS + + PyMem_Free(path_strW); + + if (iterator->handle == INVALID_HANDLE_VALUE) { + path_error(&iterator->path); + goto error; + } +#else /* POSIX */ + if (iterator->path.narrow) + path = iterator->path.narrow; + else + path = "."; + + errno = 0; + Py_BEGIN_ALLOW_THREADS + iterator->dirp = opendir(path); + Py_END_ALLOW_THREADS + + if (!iterator->dirp) { + path_error(&iterator->path); + goto error; + } +#endif + + return (PyObject *)iterator; + +error: + Py_DECREF(iterator); + return NULL; +} + + +/* SECTION: Module and method definitions and initialization code */ + +static PyMethodDef scandir_methods[] = { + {"scandir", (PyCFunction)posix_scandir, + METH_VARARGS | METH_KEYWORDS, + posix_scandir__doc__}, + {NULL, NULL}, +}; + +#if PY_MAJOR_VERSION >= 3 +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "_scandir", + NULL, + 0, + scandir_methods, + NULL, + NULL, + NULL, + NULL, +}; +#endif + +#if PY_MAJOR_VERSION >= 3 +PyObject * +PyInit__scandir(void) +{ + PyObject *module = PyModule_Create(&moduledef); +#else +void +init_scandir(void) +{ + PyObject *module = Py_InitModule("_scandir", scandir_methods); +#endif + if (module == NULL) { + INIT_ERROR; + } + + billion = PyLong_FromLong(1000000000); + if (!billion) + INIT_ERROR; + + stat_result_desc.fields[7].name = PyStructSequence_UnnamedField; + stat_result_desc.fields[8].name = PyStructSequence_UnnamedField; + stat_result_desc.fields[9].name = PyStructSequence_UnnamedField; + PyStructSequence_InitType(&StatResultType, &stat_result_desc); + structseq_new = StatResultType.tp_new; + StatResultType.tp_new = statresult_new; + + if (PyType_Ready(&ScandirIteratorType) < 0) + INIT_ERROR; + if (PyType_Ready(&DirEntryType) < 0) + INIT_ERROR; + + PyModule_AddObject(module, "DirEntry", (PyObject *)&DirEntryType); + +#if PY_MAJOR_VERSION >= 3 + return module; +#endif +} diff --git a/third_party/python/scandir/benchmark.py b/third_party/python/scandir/benchmark.py new file mode 100644 index 000000000000..89a4b9d8916d --- /dev/null +++ b/third_party/python/scandir/benchmark.py @@ -0,0 +1,192 @@ +"""Simple benchmark to compare the speed of scandir.walk() with os.walk().""" + +import optparse +import os +import stat +import sys +import timeit + +import warnings +with warnings.catch_warnings(record=True): + import scandir + +DEPTH = 4 +NUM_DIRS = 5 +NUM_FILES = 50 + + +def os_walk_pre_35(top, topdown=True, onerror=None, followlinks=False): + """Pre Python 3.5 implementation of os.walk() that doesn't use scandir.""" + islink, join, isdir = os.path.islink, os.path.join, os.path.isdir + + try: + names = os.listdir(top) + except OSError as err: + if onerror is not None: + onerror(err) + return + + dirs, nondirs = [], [] + for name in names: + if isdir(join(top, name)): + dirs.append(name) + else: + nondirs.append(name) + + if topdown: + yield top, dirs, nondirs + for name in dirs: + new_path = join(top, name) + if followlinks or not islink(new_path): + for x in os_walk_pre_35(new_path, topdown, onerror, followlinks): + yield x + if not topdown: + yield top, dirs, nondirs + + +def create_tree(path, depth=DEPTH): + """Create a directory tree at path with given depth, and NUM_DIRS and + NUM_FILES at each level. + """ + os.mkdir(path) + for i in range(NUM_FILES): + filename = os.path.join(path, 'file{0:03}.txt'.format(i)) + with open(filename, 'wb') as f: + f.write(b'foo') + if depth <= 1: + return + for i in range(NUM_DIRS): + dirname = os.path.join(path, 'dir{0:03}'.format(i)) + create_tree(dirname, depth - 1) + + +def get_tree_size(path): + """Return total size of all files in directory tree at path.""" + size = 0 + try: + for entry in scandir.scandir(path): + if entry.is_symlink(): + pass + elif entry.is_dir(): + size += get_tree_size(os.path.join(path, entry.name)) + else: + size += entry.stat().st_size + except OSError: + pass + return size + + +def benchmark(path, get_size=False): + sizes = {} + + if get_size: + def do_os_walk(): + size = 0 + for root, dirs, files in os.walk(path): + for filename in files: + fullname = os.path.join(root, filename) + st = os.lstat(fullname) + if not stat.S_ISLNK(st.st_mode): + size += st.st_size + sizes['os_walk'] = size + + def do_scandir_walk(): + sizes['scandir_walk'] = get_tree_size(path) + + else: + def do_os_walk(): + for root, dirs, files in os.walk(path): + pass + + def do_scandir_walk(): + for root, dirs, files in scandir.walk(path): + pass + + # Run this once first to cache things, so we're not benchmarking I/O + print("Priming the system's cache...") + do_scandir_walk() + + # Use the best of 3 time for each of them to eliminate high outliers + os_walk_time = 1000000 + scandir_walk_time = 1000000 + N = 3 + for i in range(N): + print('Benchmarking walks on {0}, repeat {1}/{2}...'.format( + path, i + 1, N)) + os_walk_time = min(os_walk_time, timeit.timeit(do_os_walk, number=1)) + scandir_walk_time = min(scandir_walk_time, + timeit.timeit(do_scandir_walk, number=1)) + + if get_size: + if sizes['os_walk'] == sizes['scandir_walk']: + equality = 'equal' + else: + equality = 'NOT EQUAL!' + print('os.walk size {0}, scandir.walk size {1} -- {2}'.format( + sizes['os_walk'], sizes['scandir_walk'], equality)) + + print('os.walk took {0:.3f}s, scandir.walk took {1:.3f}s -- {2:.1f}x as fast'.format( + os_walk_time, scandir_walk_time, os_walk_time / scandir_walk_time)) + + +if __name__ == '__main__': + usage = """Usage: benchmark.py [-h] [tree_dir] + +Create a large directory tree named "benchtree" (relative to this script) and +benchmark os.walk() versus scandir.walk(). If tree_dir is specified, benchmark +using it instead of creating a tree.""" + parser = optparse.OptionParser(usage=usage) + parser.add_option('-s', '--size', action='store_true', + help='get size of directory tree while walking') + parser.add_option('-c', '--scandir', type='choice', choices=['best', 'generic', 'c', 'python', 'os'], default='best', + help='version of scandir() to use, default "%default"') + options, args = parser.parse_args() + + if args: + tree_dir = args[0] + else: + tree_dir = os.path.join(os.path.dirname(__file__), 'benchtree') + if not os.path.exists(tree_dir): + print('Creating tree at {0}: depth={1}, num_dirs={2}, num_files={3}'.format( + tree_dir, DEPTH, NUM_DIRS, NUM_FILES)) + create_tree(tree_dir) + + if options.scandir == 'generic': + scandir.scandir = scandir.scandir_generic + elif options.scandir == 'c': + if scandir.scandir_c is None: + print("ERROR: Compiled C version of scandir not found!") + sys.exit(1) + scandir.scandir = scandir.scandir_c + elif options.scandir == 'python': + if scandir.scandir_python is None: + print("ERROR: Python version of scandir not found!") + sys.exit(1) + scandir.scandir = scandir.scandir_python + elif options.scandir == 'os': + if not hasattr(os, 'scandir'): + print("ERROR: Python 3.5's os.scandir() not found!") + sys.exit(1) + scandir.scandir = os.scandir + elif hasattr(os, 'scandir'): + scandir.scandir = os.scandir + + if scandir.scandir == getattr(os, 'scandir', None): + print("Using Python 3.5's builtin os.scandir()") + elif scandir.scandir == scandir.scandir_c: + print('Using fast C version of scandir') + elif scandir.scandir == scandir.scandir_python: + print('Using slower ctypes version of scandir') + elif scandir.scandir == scandir.scandir_generic: + print('Using very slow generic version of scandir') + else: + print('ERROR: Unsure which version of scandir we are using!') + sys.exit(1) + + if hasattr(os, 'scandir'): + os.walk = os_walk_pre_35 + print('Comparing against pre-Python 3.5 version of os.walk()') + else: + print('Comparing against builtin version of os.walk()') + + benchmark(tree_dir, get_size=options.size) diff --git a/third_party/python/scandir/osdefs.h b/third_party/python/scandir/osdefs.h new file mode 100644 index 000000000000..d678ca3b4dbe --- /dev/null +++ b/third_party/python/scandir/osdefs.h @@ -0,0 +1,48 @@ +// from CPython +#ifndef Py_OSDEFS_H +#define Py_OSDEFS_H +#ifdef __cplusplus +extern "C" { +#endif + + +/* Operating system dependencies */ + +#ifdef MS_WINDOWS +#define SEP L'\\' +#define ALTSEP L'/' +#define MAXPATHLEN 256 +#define DELIM L';' +#endif + +/* Filename separator */ +#ifndef SEP +#define SEP L'/' +#endif + +/* Max pathname length */ +#ifdef __hpux +#include +#include +#ifndef PATH_MAX +#define PATH_MAX MAXPATHLEN +#endif +#endif + +#ifndef MAXPATHLEN +#if defined(PATH_MAX) && PATH_MAX > 1024 +#define MAXPATHLEN PATH_MAX +#else +#define MAXPATHLEN 1024 +#endif +#endif + +/* Search path entry delimiter */ +#ifndef DELIM +#define DELIM L':' +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_OSDEFS_H */ diff --git a/third_party/python/scandir/scandir.py b/third_party/python/scandir/scandir.py new file mode 100644 index 000000000000..aac7208e8e93 --- /dev/null +++ b/third_party/python/scandir/scandir.py @@ -0,0 +1,693 @@ +"""scandir, a better directory iterator and faster os.walk(), now in the Python 3.5 stdlib + +scandir() is a generator version of os.listdir() that returns an +iterator over files in a directory, and also exposes the extra +information most OSes provide while iterating files in a directory +(such as type and stat information). + +This module also includes a version of os.walk() that uses scandir() +to speed it up significantly. + +See README.md or https://github.com/benhoyt/scandir for rationale and +docs, or read PEP 471 (https://www.python.org/dev/peps/pep-0471/) for +more details on its inclusion into Python 3.5 + +scandir is released under the new BSD 3-clause license. See +LICENSE.txt for the full license text. +""" + +from __future__ import division + +from errno import ENOENT +from os import listdir, lstat, stat, strerror +from os.path import join, islink +from stat import S_IFDIR, S_IFLNK, S_IFREG +import collections +import sys + +try: + import _scandir +except ImportError: + _scandir = None + +try: + import ctypes +except ImportError: + ctypes = None + +if _scandir is None and ctypes is None: + import warnings + warnings.warn("scandir can't find the compiled _scandir C module " + "or ctypes, using slow generic fallback") + +__version__ = '1.9.0' +__all__ = ['scandir', 'walk'] + +# Windows FILE_ATTRIBUTE constants for interpreting the +# FIND_DATA.dwFileAttributes member +FILE_ATTRIBUTE_ARCHIVE = 32 +FILE_ATTRIBUTE_COMPRESSED = 2048 +FILE_ATTRIBUTE_DEVICE = 64 +FILE_ATTRIBUTE_DIRECTORY = 16 +FILE_ATTRIBUTE_ENCRYPTED = 16384 +FILE_ATTRIBUTE_HIDDEN = 2 +FILE_ATTRIBUTE_INTEGRITY_STREAM = 32768 +FILE_ATTRIBUTE_NORMAL = 128 +FILE_ATTRIBUTE_NOT_CONTENT_INDEXED = 8192 +FILE_ATTRIBUTE_NO_SCRUB_DATA = 131072 +FILE_ATTRIBUTE_OFFLINE = 4096 +FILE_ATTRIBUTE_READONLY = 1 +FILE_ATTRIBUTE_REPARSE_POINT = 1024 +FILE_ATTRIBUTE_SPARSE_FILE = 512 +FILE_ATTRIBUTE_SYSTEM = 4 +FILE_ATTRIBUTE_TEMPORARY = 256 +FILE_ATTRIBUTE_VIRTUAL = 65536 + +IS_PY3 = sys.version_info >= (3, 0) + +if IS_PY3: + unicode = str # Because Python <= 3.2 doesn't have u'unicode' syntax + + +class GenericDirEntry(object): + __slots__ = ('name', '_stat', '_lstat', '_scandir_path', '_path') + + def __init__(self, scandir_path, name): + self._scandir_path = scandir_path + self.name = name + self._stat = None + self._lstat = None + self._path = None + + @property + def path(self): + if self._path is None: + self._path = join(self._scandir_path, self.name) + return self._path + + def stat(self, follow_symlinks=True): + if follow_symlinks: + if self._stat is None: + self._stat = stat(self.path) + return self._stat + else: + if self._lstat is None: + self._lstat = lstat(self.path) + return self._lstat + + # The code duplication below is intentional: this is for slightly + # better performance on systems that fall back to GenericDirEntry. + # It avoids an additional attribute lookup and method call, which + # are relatively slow on CPython. + def is_dir(self, follow_symlinks=True): + try: + st = self.stat(follow_symlinks=follow_symlinks) + except OSError as e: + if e.errno != ENOENT: + raise + return False # Path doesn't exist or is a broken symlink + return st.st_mode & 0o170000 == S_IFDIR + + def is_file(self, follow_symlinks=True): + try: + st = self.stat(follow_symlinks=follow_symlinks) + except OSError as e: + if e.errno != ENOENT: + raise + return False # Path doesn't exist or is a broken symlink + return st.st_mode & 0o170000 == S_IFREG + + def is_symlink(self): + try: + st = self.stat(follow_symlinks=False) + except OSError as e: + if e.errno != ENOENT: + raise + return False # Path doesn't exist or is a broken symlink + return st.st_mode & 0o170000 == S_IFLNK + + def inode(self): + st = self.stat(follow_symlinks=False) + return st.st_ino + + def __str__(self): + return '<{0}: {1!r}>'.format(self.__class__.__name__, self.name) + + __repr__ = __str__ + + +def _scandir_generic(path=unicode('.')): + """Like os.listdir(), but yield DirEntry objects instead of returning + a list of names. + """ + for name in listdir(path): + yield GenericDirEntry(path, name) + + +if IS_PY3 and sys.platform == 'win32': + def scandir_generic(path=unicode('.')): + if isinstance(path, bytes): + raise TypeError("os.scandir() doesn't support bytes path on Windows, use Unicode instead") + return _scandir_generic(path) + scandir_generic.__doc__ = _scandir_generic.__doc__ +else: + scandir_generic = _scandir_generic + + +scandir_c = None +scandir_python = None + + +if sys.platform == 'win32': + if ctypes is not None: + from ctypes import wintypes + + # Various constants from windows.h + INVALID_HANDLE_VALUE = ctypes.c_void_p(-1).value + ERROR_FILE_NOT_FOUND = 2 + ERROR_NO_MORE_FILES = 18 + IO_REPARSE_TAG_SYMLINK = 0xA000000C + + # Numer of seconds between 1601-01-01 and 1970-01-01 + SECONDS_BETWEEN_EPOCHS = 11644473600 + + kernel32 = ctypes.windll.kernel32 + + # ctypes wrappers for (wide string versions of) FindFirstFile, + # FindNextFile, and FindClose + FindFirstFile = kernel32.FindFirstFileW + FindFirstFile.argtypes = [ + wintypes.LPCWSTR, + ctypes.POINTER(wintypes.WIN32_FIND_DATAW), + ] + FindFirstFile.restype = wintypes.HANDLE + + FindNextFile = kernel32.FindNextFileW + FindNextFile.argtypes = [ + wintypes.HANDLE, + ctypes.POINTER(wintypes.WIN32_FIND_DATAW), + ] + FindNextFile.restype = wintypes.BOOL + + FindClose = kernel32.FindClose + FindClose.argtypes = [wintypes.HANDLE] + FindClose.restype = wintypes.BOOL + + Win32StatResult = collections.namedtuple('Win32StatResult', [ + 'st_mode', + 'st_ino', + 'st_dev', + 'st_nlink', + 'st_uid', + 'st_gid', + 'st_size', + 'st_atime', + 'st_mtime', + 'st_ctime', + 'st_atime_ns', + 'st_mtime_ns', + 'st_ctime_ns', + 'st_file_attributes', + ]) + + def filetime_to_time(filetime): + """Convert Win32 FILETIME to time since Unix epoch in seconds.""" + total = filetime.dwHighDateTime << 32 | filetime.dwLowDateTime + return total / 10000000 - SECONDS_BETWEEN_EPOCHS + + def find_data_to_stat(data): + """Convert Win32 FIND_DATA struct to stat_result.""" + # First convert Win32 dwFileAttributes to st_mode + attributes = data.dwFileAttributes + st_mode = 0 + if attributes & FILE_ATTRIBUTE_DIRECTORY: + st_mode |= S_IFDIR | 0o111 + else: + st_mode |= S_IFREG + if attributes & FILE_ATTRIBUTE_READONLY: + st_mode |= 0o444 + else: + st_mode |= 0o666 + if (attributes & FILE_ATTRIBUTE_REPARSE_POINT and + data.dwReserved0 == IO_REPARSE_TAG_SYMLINK): + st_mode ^= st_mode & 0o170000 + st_mode |= S_IFLNK + + st_size = data.nFileSizeHigh << 32 | data.nFileSizeLow + st_atime = filetime_to_time(data.ftLastAccessTime) + st_mtime = filetime_to_time(data.ftLastWriteTime) + st_ctime = filetime_to_time(data.ftCreationTime) + + # Some fields set to zero per CPython's posixmodule.c: st_ino, st_dev, + # st_nlink, st_uid, st_gid + return Win32StatResult(st_mode, 0, 0, 0, 0, 0, st_size, + st_atime, st_mtime, st_ctime, + int(st_atime * 1000000000), + int(st_mtime * 1000000000), + int(st_ctime * 1000000000), + attributes) + + class Win32DirEntryPython(object): + __slots__ = ('name', '_stat', '_lstat', '_find_data', '_scandir_path', '_path', '_inode') + + def __init__(self, scandir_path, name, find_data): + self._scandir_path = scandir_path + self.name = name + self._stat = None + self._lstat = None + self._find_data = find_data + self._path = None + self._inode = None + + @property + def path(self): + if self._path is None: + self._path = join(self._scandir_path, self.name) + return self._path + + def stat(self, follow_symlinks=True): + if follow_symlinks: + if self._stat is None: + if self.is_symlink(): + # It's a symlink, call link-following stat() + self._stat = stat(self.path) + else: + # Not a symlink, stat is same as lstat value + if self._lstat is None: + self._lstat = find_data_to_stat(self._find_data) + self._stat = self._lstat + return self._stat + else: + if self._lstat is None: + # Lazily convert to stat object, because it's slow + # in Python, and often we only need is_dir() etc + self._lstat = find_data_to_stat(self._find_data) + return self._lstat + + def is_dir(self, follow_symlinks=True): + is_symlink = self.is_symlink() + if follow_symlinks and is_symlink: + try: + return self.stat().st_mode & 0o170000 == S_IFDIR + except OSError as e: + if e.errno != ENOENT: + raise + return False + elif is_symlink: + return False + else: + return (self._find_data.dwFileAttributes & + FILE_ATTRIBUTE_DIRECTORY != 0) + + def is_file(self, follow_symlinks=True): + is_symlink = self.is_symlink() + if follow_symlinks and is_symlink: + try: + return self.stat().st_mode & 0o170000 == S_IFREG + except OSError as e: + if e.errno != ENOENT: + raise + return False + elif is_symlink: + return False + else: + return (self._find_data.dwFileAttributes & + FILE_ATTRIBUTE_DIRECTORY == 0) + + def is_symlink(self): + return (self._find_data.dwFileAttributes & + FILE_ATTRIBUTE_REPARSE_POINT != 0 and + self._find_data.dwReserved0 == IO_REPARSE_TAG_SYMLINK) + + def inode(self): + if self._inode is None: + self._inode = lstat(self.path).st_ino + return self._inode + + def __str__(self): + return '<{0}: {1!r}>'.format(self.__class__.__name__, self.name) + + __repr__ = __str__ + + def win_error(error, filename): + exc = WindowsError(error, ctypes.FormatError(error)) + exc.filename = filename + return exc + + def _scandir_python(path=unicode('.')): + """Like os.listdir(), but yield DirEntry objects instead of returning + a list of names. + """ + # Call FindFirstFile and handle errors + if isinstance(path, bytes): + is_bytes = True + filename = join(path.decode('mbcs', 'strict'), '*.*') + else: + is_bytes = False + filename = join(path, '*.*') + data = wintypes.WIN32_FIND_DATAW() + data_p = ctypes.byref(data) + handle = FindFirstFile(filename, data_p) + if handle == INVALID_HANDLE_VALUE: + error = ctypes.GetLastError() + if error == ERROR_FILE_NOT_FOUND: + # No files, don't yield anything + return + raise win_error(error, path) + + # Call FindNextFile in a loop, stopping when no more files + try: + while True: + # Skip '.' and '..' (current and parent directory), but + # otherwise yield (filename, stat_result) tuple + name = data.cFileName + if name not in ('.', '..'): + if is_bytes: + name = name.encode('mbcs', 'replace') + yield Win32DirEntryPython(path, name, data) + + data = wintypes.WIN32_FIND_DATAW() + data_p = ctypes.byref(data) + success = FindNextFile(handle, data_p) + if not success: + error = ctypes.GetLastError() + if error == ERROR_NO_MORE_FILES: + break + raise win_error(error, path) + finally: + if not FindClose(handle): + raise win_error(ctypes.GetLastError(), path) + + if IS_PY3: + def scandir_python(path=unicode('.')): + if isinstance(path, bytes): + raise TypeError("os.scandir() doesn't support bytes path on Windows, use Unicode instead") + return _scandir_python(path) + scandir_python.__doc__ = _scandir_python.__doc__ + else: + scandir_python = _scandir_python + + if _scandir is not None: + scandir_c = _scandir.scandir + DirEntry_c = _scandir.DirEntry + + if _scandir is not None: + scandir = scandir_c + DirEntry = DirEntry_c + elif ctypes is not None: + scandir = scandir_python + DirEntry = Win32DirEntryPython + else: + scandir = scandir_generic + DirEntry = GenericDirEntry + + +# Linux, OS X, and BSD implementation +elif sys.platform.startswith(('linux', 'darwin', 'sunos5')) or 'bsd' in sys.platform: + have_dirent_d_type = (sys.platform != 'sunos5') + + if ctypes is not None and have_dirent_d_type: + import ctypes.util + + DIR_p = ctypes.c_void_p + + # Rather annoying how the dirent struct is slightly different on each + # platform. The only fields we care about are d_name and d_type. + class Dirent(ctypes.Structure): + if sys.platform.startswith('linux'): + _fields_ = ( + ('d_ino', ctypes.c_ulong), + ('d_off', ctypes.c_long), + ('d_reclen', ctypes.c_ushort), + ('d_type', ctypes.c_byte), + ('d_name', ctypes.c_char * 256), + ) + elif 'openbsd' in sys.platform: + _fields_ = ( + ('d_ino', ctypes.c_uint64), + ('d_off', ctypes.c_uint64), + ('d_reclen', ctypes.c_uint16), + ('d_type', ctypes.c_uint8), + ('d_namlen', ctypes.c_uint8), + ('__d_padding', ctypes.c_uint8 * 4), + ('d_name', ctypes.c_char * 256), + ) + else: + _fields_ = ( + ('d_ino', ctypes.c_uint32), # must be uint32, not ulong + ('d_reclen', ctypes.c_ushort), + ('d_type', ctypes.c_byte), + ('d_namlen', ctypes.c_byte), + ('d_name', ctypes.c_char * 256), + ) + + DT_UNKNOWN = 0 + DT_DIR = 4 + DT_REG = 8 + DT_LNK = 10 + + Dirent_p = ctypes.POINTER(Dirent) + Dirent_pp = ctypes.POINTER(Dirent_p) + + libc = ctypes.CDLL(ctypes.util.find_library('c'), use_errno=True) + opendir = libc.opendir + opendir.argtypes = [ctypes.c_char_p] + opendir.restype = DIR_p + + readdir_r = libc.readdir_r + readdir_r.argtypes = [DIR_p, Dirent_p, Dirent_pp] + readdir_r.restype = ctypes.c_int + + closedir = libc.closedir + closedir.argtypes = [DIR_p] + closedir.restype = ctypes.c_int + + file_system_encoding = sys.getfilesystemencoding() + + class PosixDirEntry(object): + __slots__ = ('name', '_d_type', '_stat', '_lstat', '_scandir_path', '_path', '_inode') + + def __init__(self, scandir_path, name, d_type, inode): + self._scandir_path = scandir_path + self.name = name + self._d_type = d_type + self._inode = inode + self._stat = None + self._lstat = None + self._path = None + + @property + def path(self): + if self._path is None: + self._path = join(self._scandir_path, self.name) + return self._path + + def stat(self, follow_symlinks=True): + if follow_symlinks: + if self._stat is None: + if self.is_symlink(): + self._stat = stat(self.path) + else: + if self._lstat is None: + self._lstat = lstat(self.path) + self._stat = self._lstat + return self._stat + else: + if self._lstat is None: + self._lstat = lstat(self.path) + return self._lstat + + def is_dir(self, follow_symlinks=True): + if (self._d_type == DT_UNKNOWN or + (follow_symlinks and self.is_symlink())): + try: + st = self.stat(follow_symlinks=follow_symlinks) + except OSError as e: + if e.errno != ENOENT: + raise + return False + return st.st_mode & 0o170000 == S_IFDIR + else: + return self._d_type == DT_DIR + + def is_file(self, follow_symlinks=True): + if (self._d_type == DT_UNKNOWN or + (follow_symlinks and self.is_symlink())): + try: + st = self.stat(follow_symlinks=follow_symlinks) + except OSError as e: + if e.errno != ENOENT: + raise + return False + return st.st_mode & 0o170000 == S_IFREG + else: + return self._d_type == DT_REG + + def is_symlink(self): + if self._d_type == DT_UNKNOWN: + try: + st = self.stat(follow_symlinks=False) + except OSError as e: + if e.errno != ENOENT: + raise + return False + return st.st_mode & 0o170000 == S_IFLNK + else: + return self._d_type == DT_LNK + + def inode(self): + return self._inode + + def __str__(self): + return '<{0}: {1!r}>'.format(self.__class__.__name__, self.name) + + __repr__ = __str__ + + def posix_error(filename): + errno = ctypes.get_errno() + exc = OSError(errno, strerror(errno)) + exc.filename = filename + return exc + + def scandir_python(path=unicode('.')): + """Like os.listdir(), but yield DirEntry objects instead of returning + a list of names. + """ + if isinstance(path, bytes): + opendir_path = path + is_bytes = True + else: + opendir_path = path.encode(file_system_encoding) + is_bytes = False + dir_p = opendir(opendir_path) + if not dir_p: + raise posix_error(path) + try: + result = Dirent_p() + while True: + entry = Dirent() + if readdir_r(dir_p, entry, result): + raise posix_error(path) + if not result: + break + name = entry.d_name + if name not in (b'.', b'..'): + if not is_bytes: + name = name.decode(file_system_encoding) + yield PosixDirEntry(path, name, entry.d_type, entry.d_ino) + finally: + if closedir(dir_p): + raise posix_error(path) + + if _scandir is not None: + scandir_c = _scandir.scandir + DirEntry_c = _scandir.DirEntry + + if _scandir is not None: + scandir = scandir_c + DirEntry = DirEntry_c + elif ctypes is not None: + scandir = scandir_python + DirEntry = PosixDirEntry + else: + scandir = scandir_generic + DirEntry = GenericDirEntry + + +# Some other system -- no d_type or stat information +else: + scandir = scandir_generic + DirEntry = GenericDirEntry + + +def _walk(top, topdown=True, onerror=None, followlinks=False): + """Like Python 3.5's implementation of os.walk() -- faster than + the pre-Python 3.5 version as it uses scandir() internally. + """ + dirs = [] + nondirs = [] + + # We may not have read permission for top, in which case we can't + # get a list of the files the directory contains. os.walk + # always suppressed the exception then, rather than blow up for a + # minor reason when (say) a thousand readable directories are still + # left to visit. That logic is copied here. + try: + scandir_it = scandir(top) + except OSError as error: + if onerror is not None: + onerror(error) + return + + while True: + try: + try: + entry = next(scandir_it) + except StopIteration: + break + except OSError as error: + if onerror is not None: + onerror(error) + return + + try: + is_dir = entry.is_dir() + except OSError: + # If is_dir() raises an OSError, consider that the entry is not + # a directory, same behaviour than os.path.isdir(). + is_dir = False + + if is_dir: + dirs.append(entry.name) + else: + nondirs.append(entry.name) + + if not topdown and is_dir: + # Bottom-up: recurse into sub-directory, but exclude symlinks to + # directories if followlinks is False + if followlinks: + walk_into = True + else: + try: + is_symlink = entry.is_symlink() + except OSError: + # If is_symlink() raises an OSError, consider that the + # entry is not a symbolic link, same behaviour than + # os.path.islink(). + is_symlink = False + walk_into = not is_symlink + + if walk_into: + for entry in walk(entry.path, topdown, onerror, followlinks): + yield entry + + # Yield before recursion if going top down + if topdown: + yield top, dirs, nondirs + + # Recurse into sub-directories + for name in dirs: + new_path = join(top, name) + # Issue #23605: os.path.islink() is used instead of caching + # entry.is_symlink() result during the loop on os.scandir() because + # the caller can replace the directory entry during the "yield" + # above. + if followlinks or not islink(new_path): + for entry in walk(new_path, topdown, onerror, followlinks): + yield entry + else: + # Yield after recursion if going bottom up + yield top, dirs, nondirs + + +if IS_PY3 or sys.platform != 'win32': + walk = _walk +else: + # Fix for broken unicode handling on Windows on Python 2.x, see: + # https://github.com/benhoyt/scandir/issues/54 + file_system_encoding = sys.getfilesystemencoding() + + def walk(top, topdown=True, onerror=None, followlinks=False): + if isinstance(top, bytes): + top = top.decode(file_system_encoding) + return _walk(top, topdown, onerror, followlinks) diff --git a/third_party/python/scandir/setup.cfg b/third_party/python/scandir/setup.cfg new file mode 100644 index 000000000000..8bfd5a12f85b --- /dev/null +++ b/third_party/python/scandir/setup.cfg @@ -0,0 +1,4 @@ +[egg_info] +tag_build = +tag_date = 0 + diff --git a/third_party/python/scandir/setup.py b/third_party/python/scandir/setup.py new file mode 100644 index 000000000000..5987c54ea826 --- /dev/null +++ b/third_party/python/scandir/setup.py @@ -0,0 +1,80 @@ +"""Run "python setup.py install" to install scandir.""" + +try: + from setuptools import setup, Extension + from setuptools.command.build_ext import build_ext as base_build_ext +except ImportError: + import warnings + import sys + val = sys.exc_info()[1] + + warnings.warn("import of setuptools failed %r" % val) + from distutils.core import setup, Extension + from distutils.command.build_ext import build_ext as base_build_ext + +import os +import re +import sys +import logging + +# Get version without importing scandir because that will lock the +# .pyd file (if scandir is already installed) so it can't be +# overwritten during the install process +with open(os.path.join(os.path.dirname(__file__), 'scandir.py')) as f: + for line in f: + match = re.match(r"__version__.*'([0-9.]+)'", line) + if match: + version = match.group(1) + break + else: + raise Exception("Couldn't find version in setup.py") + +with open('README.rst') as f: + long_description = f.read() + + +class BuildExt(base_build_ext): + + # the extension is optional since in case of lack of c the api + # there is a ctypes fallback and a slow python fallback + + def build_extension(self, ext): + try: + base_build_ext.build_extension(self, ext) + except Exception: + exception = sys.exc_info()[0] + logging.warn("building the %s failed with %s", ext.name, exception) + +extension = Extension('_scandir', ['_scandir.c'], optional=True) + + +setup( + name='scandir', + version=version, + author='Ben Hoyt', + author_email='benhoyt@gmail.com', + url='https://github.com/benhoyt/scandir', + license='New BSD License', + description='scandir, a better directory iterator and faster os.walk()', + long_description=long_description, + py_modules=['scandir'], + ext_modules=[extension], + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Developers', + 'Operating System :: OS Independent', + 'License :: OSI Approved :: BSD License', + 'Programming Language :: Python', + 'Topic :: System :: Filesystems', + 'Topic :: System :: Operating System', + 'Programming Language :: Python', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.6', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: Implementation :: CPython', + ], cmdclass={'build_ext': BuildExt}, +) diff --git a/third_party/python/scandir/test/run_tests.py b/third_party/python/scandir/test/run_tests.py new file mode 100644 index 000000000000..409ad97c917e --- /dev/null +++ b/third_party/python/scandir/test/run_tests.py @@ -0,0 +1,25 @@ +"""Run all unit tests.""" + +import glob +import os +import sys + +if sys.version_info[:2] < (2, 7): + import unittest2 as unittest +else: + import unittest + + +def main(): + test_dir = os.path.dirname(os.path.abspath(__file__)) + test_files = glob.glob(os.path.join(test_dir, 'test_*.py')) + test_names = [os.path.basename(f)[:-3] for f in test_files] + + sys.path.insert(0, os.path.join(test_dir, '..')) + + suite = unittest.defaultTestLoader.loadTestsFromNames(test_names) + result = unittest.TextTestRunner(verbosity=2).run(suite) + sys.exit(1 if (result.errors or result.failures) else 0) + +if __name__ == '__main__': + main() diff --git a/third_party/python/scandir/test/test_scandir.py b/third_party/python/scandir/test/test_scandir.py new file mode 100644 index 000000000000..8e8d1a3ed515 --- /dev/null +++ b/third_party/python/scandir/test/test_scandir.py @@ -0,0 +1,320 @@ +"""Tests for scandir.scandir().""" + +from __future__ import unicode_literals + +import os +import shutil +import sys +import time + +if sys.version_info[:2] < (2, 7): + import unittest2 as unittest +else: + import unittest + +try: + import scandir + has_scandir = True +except ImportError: + has_scandir = False + +FILE_ATTRIBUTE_DIRECTORY = 16 + +TEST_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), 'testdir')) + +IS_PY3 = sys.version_info >= (3, 0) + +if IS_PY3: + int_types = int +else: + int_types = (int, long) + str = unicode + + +if hasattr(os, 'symlink'): + try: + link_name = os.path.join(os.path.dirname(__file__), '_testlink') + os.symlink(__file__, link_name) + os.remove(link_name) + symlinks_supported = True + except NotImplementedError: + # Windows versions before Vista don't support symbolic links + symlinks_supported = False +else: + symlinks_supported = False + + +def create_file(path, contents='1234'): + with open(path, 'w') as f: + f.write(contents) + + +def setup_main(): + join = os.path.join + + os.mkdir(TEST_PATH) + os.mkdir(join(TEST_PATH, 'subdir')) + create_file(join(TEST_PATH, 'file1.txt')) + create_file(join(TEST_PATH, 'file2.txt'), contents='12345678') + + os.mkdir(join(TEST_PATH, 'subdir', 'unidir\u018F')) + create_file(join(TEST_PATH, 'subdir', 'file1.txt')) + create_file(join(TEST_PATH, 'subdir', 'unicod\u018F.txt')) + + create_file(join(TEST_PATH, 'subdir', 'unidir\u018F', 'file1.txt')) + + os.mkdir(join(TEST_PATH, 'linkdir')) + + +def setup_symlinks(): + join = os.path.join + + os.mkdir(join(TEST_PATH, 'linkdir', 'linksubdir')) + create_file(join(TEST_PATH, 'linkdir', 'file1.txt')) + + os.symlink(os.path.abspath(join(TEST_PATH, 'linkdir', 'file1.txt')), + join(TEST_PATH, 'linkdir', 'link_to_file')) + + dir_name = os.path.abspath(join(TEST_PATH, 'linkdir', 'linksubdir')) + dir_link = join(TEST_PATH, 'linkdir', 'link_to_dir') + if sys.version_info >= (3, 3): + # "target_is_directory" was only added in Python 3.3 + os.symlink(dir_name, dir_link, target_is_directory=True) + else: + os.symlink(dir_name, dir_link) + + +def teardown(): + try: + shutil.rmtree(TEST_PATH) + except OSError: + # why does the above fail sometimes? + time.sleep(0.1) + shutil.rmtree(TEST_PATH) + + +class TestMixin(object): + def setUp(self): + if not os.path.exists(TEST_PATH): + setup_main() + if symlinks_supported and not os.path.exists( + os.path.join(TEST_PATH, 'linkdir', 'linksubdir')): + setup_symlinks() + + if not hasattr(unittest.TestCase, 'skipTest'): + def skipTest(self, reason): + sys.stdout.write('skipped {0!r} '.format(reason)) + + def test_basic(self): + entries = sorted(self.scandir_func(TEST_PATH), key=lambda e: e.name) + self.assertEqual([(e.name, e.is_dir()) for e in entries], + [('file1.txt', False), ('file2.txt', False), + ('linkdir', True), ('subdir', True)]) + self.assertEqual([e.path for e in entries], + [os.path.join(TEST_PATH, e.name) for e in entries]) + + def test_dir_entry(self): + entries = dict((e.name, e) for e in self.scandir_func(TEST_PATH)) + e = entries['file1.txt'] + self.assertEqual([e.is_dir(), e.is_file(), e.is_symlink()], [False, True, False]) + e = entries['file2.txt'] + self.assertEqual([e.is_dir(), e.is_file(), e.is_symlink()], [False, True, False]) + e = entries['subdir'] + self.assertEqual([e.is_dir(), e.is_file(), e.is_symlink()], [True, False, False]) + + self.assertEqual(entries['file1.txt'].stat().st_size, 4) + self.assertEqual(entries['file2.txt'].stat().st_size, 8) + + def test_stat(self): + entries = list(self.scandir_func(TEST_PATH)) + for entry in entries: + os_stat = os.stat(os.path.join(TEST_PATH, entry.name)) + scandir_stat = entry.stat() + self.assertEqual(os_stat.st_mode, scandir_stat.st_mode) + # TODO: be nice to figure out why these aren't identical on Windows and on PyPy + # * Windows: they seem to be a few microseconds to tens of seconds out + # * PyPy: for some reason os_stat's times are nanosecond, scandir's are not + self.assertAlmostEqual(os_stat.st_mtime, scandir_stat.st_mtime, delta=1) + self.assertAlmostEqual(os_stat.st_ctime, scandir_stat.st_ctime, delta=1) + if entry.is_file(): + self.assertEqual(os_stat.st_size, scandir_stat.st_size) + + def test_returns_iter(self): + it = self.scandir_func(TEST_PATH) + entry = next(it) + assert hasattr(entry, 'name') + + def check_file_attributes(self, result): + self.assertTrue(hasattr(result, 'st_file_attributes')) + self.assertTrue(isinstance(result.st_file_attributes, int_types)) + self.assertTrue(0 <= result.st_file_attributes <= 0xFFFFFFFF) + + def test_file_attributes(self): + if sys.platform != 'win32' or not self.has_file_attributes: + # st_file_attributes is Win32 specific (but can't use + # unittest.skipUnless on Python 2.6) + return self.skipTest('st_file_attributes not supported') + + entries = dict((e.name, e) for e in self.scandir_func(TEST_PATH)) + + # test st_file_attributes on a file (FILE_ATTRIBUTE_DIRECTORY not set) + result = entries['file1.txt'].stat() + self.check_file_attributes(result) + self.assertEqual(result.st_file_attributes & FILE_ATTRIBUTE_DIRECTORY, 0) + + # test st_file_attributes on a directory (FILE_ATTRIBUTE_DIRECTORY set) + result = entries['subdir'].stat() + self.check_file_attributes(result) + self.assertEqual(result.st_file_attributes & FILE_ATTRIBUTE_DIRECTORY, + FILE_ATTRIBUTE_DIRECTORY) + + def test_path(self): + entries = sorted(self.scandir_func(TEST_PATH), key=lambda e: e.name) + self.assertEqual([os.path.basename(e.name) for e in entries], + ['file1.txt', 'file2.txt', 'linkdir', 'subdir']) + self.assertEqual([os.path.normpath(os.path.join(TEST_PATH, e.name)) for e in entries], + [os.path.normpath(e.path) for e in entries]) + + def test_symlink(self): + if not symlinks_supported: + return self.skipTest('symbolic links not supported') + + entries = sorted(self.scandir_func(os.path.join(TEST_PATH, 'linkdir')), + key=lambda e: e.name) + + self.assertEqual([(e.name, e.is_symlink()) for e in entries], + [('file1.txt', False), + ('link_to_dir', True), + ('link_to_file', True), + ('linksubdir', False)]) + + self.assertEqual([(e.name, e.is_file(), e.is_file(follow_symlinks=False)) + for e in entries], + [('file1.txt', True, True), + ('link_to_dir', False, False), + ('link_to_file', True, False), + ('linksubdir', False, False)]) + + self.assertEqual([(e.name, e.is_dir(), e.is_dir(follow_symlinks=False)) + for e in entries], + [('file1.txt', False, False), + ('link_to_dir', True, False), + ('link_to_file', False, False), + ('linksubdir', True, True)]) + + def test_bytes(self): + # Check that unicode filenames are returned correctly as bytes in output + path = os.path.join(TEST_PATH, 'subdir').encode(sys.getfilesystemencoding(), 'replace') + self.assertTrue(isinstance(path, bytes)) + + # Python 3.6 on Windows fixes the bytes filename thing by using UTF-8 + if IS_PY3 and sys.platform == 'win32': + if not (sys.version_info >= (3, 6) and self.scandir_func == os.scandir): + self.assertRaises(TypeError, self.scandir_func, path) + return + + entries = [e for e in self.scandir_func(path) if e.name.startswith(b'unicod')] + self.assertEqual(len(entries), 1) + entry = entries[0] + + self.assertTrue(isinstance(entry.name, bytes)) + self.assertTrue(isinstance(entry.path, bytes)) + + # b'unicod?.txt' on Windows, b'unicod\xc6\x8f.txt' (UTF-8) or similar on POSIX + entry_name = 'unicod\u018f.txt'.encode(sys.getfilesystemencoding(), 'replace') + self.assertEqual(entry.name, entry_name) + self.assertEqual(entry.path, os.path.join(path, entry_name)) + + def test_unicode(self): + # Check that unicode filenames are returned correctly as (unicode) str in output + path = os.path.join(TEST_PATH, 'subdir') + if not IS_PY3: + path = path.decode(sys.getfilesystemencoding(), 'replace') + self.assertTrue(isinstance(path, str)) + entries = [e for e in self.scandir_func(path) if e.name.startswith('unicod')] + self.assertEqual(len(entries), 1) + entry = entries[0] + + self.assertTrue(isinstance(entry.name, str)) + self.assertTrue(isinstance(entry.path, str)) + + entry_name = 'unicod\u018f.txt' + self.assertEqual(entry.name, entry_name) + self.assertEqual(entry.path, os.path.join(path, 'unicod\u018f.txt')) + + # Check that it handles unicode input properly + path = os.path.join(TEST_PATH, 'subdir', 'unidir\u018f') + self.assertTrue(isinstance(path, str)) + entries = list(self.scandir_func(path)) + self.assertEqual(len(entries), 1) + entry = entries[0] + + self.assertTrue(isinstance(entry.name, str)) + self.assertTrue(isinstance(entry.path, str)) + self.assertEqual(entry.name, 'file1.txt') + self.assertEqual(entry.path, os.path.join(path, 'file1.txt')) + + def test_walk_unicode_handling(self): + encoding = sys.getfilesystemencoding() + dirname_unicode = u'test_unicode_dir' + dirname_bytes = dirname_unicode.encode(encoding) + dirpath = os.path.join(TEST_PATH.encode(encoding), dirname_bytes) + try: + os.makedirs(dirpath) + + if sys.platform != 'win32': + # test bytes + self.assertTrue(isinstance(dirpath, bytes)) + for (path, dirs, files) in scandir.walk(dirpath): + self.assertTrue(isinstance(path, bytes)) + + # test unicode + text_type = str if IS_PY3 else unicode + dirpath_unicode = text_type(dirpath, encoding) + self.assertTrue(isinstance(dirpath_unicode, text_type)) + for (path, dirs, files) in scandir.walk(dirpath_unicode): + self.assertTrue(isinstance(path, text_type)) + finally: + shutil.rmtree(dirpath) + +if has_scandir: + class TestScandirGeneric(TestMixin, unittest.TestCase): + def setUp(self): + self.scandir_func = scandir.scandir_generic + self.has_file_attributes = False + TestMixin.setUp(self) + + + if getattr(scandir, 'scandir_python', None): + class TestScandirPython(TestMixin, unittest.TestCase): + def setUp(self): + self.scandir_func = scandir.scandir_python + self.has_file_attributes = True + TestMixin.setUp(self) + + + if getattr(scandir, 'scandir_c', None): + class TestScandirC(TestMixin, unittest.TestCase): + def setUp(self): + self.scandir_func = scandir.scandir_c + self.has_file_attributes = True + TestMixin.setUp(self) + + + class TestScandirDirEntry(unittest.TestCase): + def setUp(self): + if not os.path.exists(TEST_PATH): + setup_main() + + def test_iter_returns_dir_entry(self): + it = scandir.scandir(TEST_PATH) + entry = next(it) + assert isinstance(entry, scandir.DirEntry) + + +if hasattr(os, 'scandir'): + class TestScandirOS(TestMixin, unittest.TestCase): + def setUp(self): + self.scandir_func = os.scandir + self.has_file_attributes = True + TestMixin.setUp(self) diff --git a/third_party/python/scandir/test/test_walk.py b/third_party/python/scandir/test/test_walk.py new file mode 100644 index 000000000000..7995f3adba43 --- /dev/null +++ b/third_party/python/scandir/test/test_walk.py @@ -0,0 +1,213 @@ +"""Tests for scandir.walk(), copied from CPython's tests for os.walk().""" + +import os +import shutil +import sys + +if sys.version_info[:2] < (2, 7): + import unittest2 as unittest +else: + import unittest + +import scandir + +walk_func = scandir.walk + + +class TestWalk(unittest.TestCase): + testfn = os.path.join(os.path.dirname(__file__), 'temp') + + def test_traversal(self): + # Build: + # TESTFN/ + # TEST1/ a file kid and two directory kids + # tmp1 + # SUB1/ a file kid and a directory kid + # tmp2 + # SUB11/ no kids + # SUB2/ a file kid and a dirsymlink kid + # tmp3 + # link/ a symlink to TESTFN.2 + # TEST2/ + # tmp4 a lone file + walk_path = os.path.join(self.testfn, "TEST1") + sub1_path = os.path.join(walk_path, "SUB1") + sub11_path = os.path.join(sub1_path, "SUB11") + sub2_path = os.path.join(walk_path, "SUB2") + tmp1_path = os.path.join(walk_path, "tmp1") + tmp2_path = os.path.join(sub1_path, "tmp2") + tmp3_path = os.path.join(sub2_path, "tmp3") + link_path = os.path.join(sub2_path, "link") + t2_path = os.path.join(self.testfn, "TEST2") + tmp4_path = os.path.join(self.testfn, "TEST2", "tmp4") + + # Create stuff. + os.makedirs(sub11_path) + os.makedirs(sub2_path) + os.makedirs(t2_path) + for path in tmp1_path, tmp2_path, tmp3_path, tmp4_path: + f = open(path, "w") + f.write("I'm " + path + " and proud of it. Blame test_os.\n") + f.close() + has_symlink = hasattr(os, "symlink") + if has_symlink: + try: + if sys.platform == 'win32' and sys.version_info >= (3, 2): + # "target_is_directory" was only added in Python 3.2 (on Windows) + os.symlink(os.path.abspath(t2_path), link_path, target_is_directory=True) + else: + os.symlink(os.path.abspath(t2_path), link_path) + sub2_tree = (sub2_path, ["link"], ["tmp3"]) + except NotImplementedError: + sub2_tree = (sub2_path, [], ["tmp3"]) + else: + sub2_tree = (sub2_path, [], ["tmp3"]) + + # Walk top-down. + all = list(walk_func(walk_path)) + self.assertEqual(len(all), 4) + # We can't know which order SUB1 and SUB2 will appear in. + # Not flipped: TESTFN, SUB1, SUB11, SUB2 + # flipped: TESTFN, SUB2, SUB1, SUB11 + flipped = all[0][1][0] != "SUB1" + all[0][1].sort() + self.assertEqual(all[0], (walk_path, ["SUB1", "SUB2"], ["tmp1"])) + self.assertEqual(all[1 + flipped], (sub1_path, ["SUB11"], ["tmp2"])) + self.assertEqual(all[2 + flipped], (sub11_path, [], [])) + self.assertEqual(all[3 - 2 * flipped], sub2_tree) + + # Prune the search. + all = [] + for root, dirs, files in walk_func(walk_path): + all.append((root, dirs, files)) + # Don't descend into SUB1. + if 'SUB1' in dirs: + # Note that this also mutates the dirs we appended to all! + dirs.remove('SUB1') + self.assertEqual(len(all), 2) + self.assertEqual(all[0], (walk_path, ["SUB2"], ["tmp1"])) + self.assertEqual(all[1], sub2_tree) + + # Walk bottom-up. + all = list(walk_func(walk_path, topdown=False)) + self.assertEqual(len(all), 4) + # We can't know which order SUB1 and SUB2 will appear in. + # Not flipped: SUB11, SUB1, SUB2, TESTFN + # flipped: SUB2, SUB11, SUB1, TESTFN + flipped = all[3][1][0] != "SUB1" + all[3][1].sort() + self.assertEqual(all[3], (walk_path, ["SUB1", "SUB2"], ["tmp1"])) + self.assertEqual(all[flipped], (sub11_path, [], [])) + self.assertEqual(all[flipped + 1], (sub1_path, ["SUB11"], ["tmp2"])) + self.assertEqual(all[2 - 2 * flipped], sub2_tree) + + if has_symlink: + # Walk, following symlinks. + for root, dirs, files in walk_func(walk_path, followlinks=True): + if root == link_path: + self.assertEqual(dirs, []) + self.assertEqual(files, ["tmp4"]) + break + else: + self.fail("Didn't follow symlink with followlinks=True") + + # Test creating a directory and adding it to dirnames + sub3_path = os.path.join(walk_path, "SUB3") + all = [] + for root, dirs, files in walk_func(walk_path): + all.append((root, dirs, files)) + if 'SUB1' in dirs: + os.makedirs(sub3_path) + dirs.append('SUB3') + all.sort() + self.assertEqual(os.path.split(all[-1][0])[1], 'SUB3') + + def tearDown(self): + # Tear everything down. This is a decent use for bottom-up on + # Windows, which doesn't have a recursive delete command. The + # (not so) subtlety is that rmdir will fail unless the dir's + # kids are removed first, so bottom up is essential. + for root, dirs, files in os.walk(self.testfn, topdown=False): + for name in files: + os.remove(os.path.join(root, name)) + for name in dirs: + dirname = os.path.join(root, name) + if not os.path.islink(dirname): + os.rmdir(dirname) + else: + os.remove(dirname) + os.rmdir(self.testfn) + + +class TestWalkSymlink(unittest.TestCase): + temp_dir = os.path.join(os.path.dirname(__file__), 'temp') + + def setUp(self): + os.mkdir(self.temp_dir) + self.dir_name = os.path.join(self.temp_dir, 'dir') + os.mkdir(self.dir_name) + open(os.path.join(self.dir_name, 'subfile'), 'w').close() + self.file_name = os.path.join(self.temp_dir, 'file') + open(self.file_name, 'w').close() + + def tearDown(self): + shutil.rmtree(self.temp_dir) + + def test_symlink_to_file(self): + if not hasattr(os, 'symlink'): + return + + try: + os.symlink(self.file_name, os.path.join(self.temp_dir, + 'link_to_file')) + except NotImplementedError: + # Windows versions before Vista don't support symbolic links + return + + output = sorted(walk_func(self.temp_dir)) + dirs = sorted(output[0][1]) + files = sorted(output[0][2]) + self.assertEqual(dirs, ['dir']) + self.assertEqual(files, ['file', 'link_to_file']) + + self.assertEqual(len(output), 2) + self.assertEqual(output[1][1], []) + self.assertEqual(output[1][2], ['subfile']) + + def test_symlink_to_directory(self): + if not hasattr(os, 'symlink'): + return + + link_name = os.path.join(self.temp_dir, 'link_to_dir') + try: + if sys.platform == 'win32' and sys.version_info >= (3, 2): + # "target_is_directory" was only added in Python 3.2 (on Windows) + os.symlink(self.dir_name, link_name, target_is_directory=True) + else: + os.symlink(self.dir_name, link_name) + except NotImplementedError: + # Windows versions before Vista don't support symbolic links + return + + output = sorted(walk_func(self.temp_dir)) + dirs = sorted(output[0][1]) + files = sorted(output[0][2]) + self.assertEqual(dirs, ['dir', 'link_to_dir']) + self.assertEqual(files, ['file']) + + self.assertEqual(len(output), 2) + self.assertEqual(output[1][1], []) + self.assertEqual(output[1][2], ['subfile']) + + output = sorted(walk_func(self.temp_dir, followlinks=True)) + dirs = sorted(output[0][1]) + files = sorted(output[0][2]) + self.assertEqual(dirs, ['dir', 'link_to_dir']) + self.assertEqual(files, ['file']) + + self.assertEqual(len(output), 3) + self.assertEqual(output[1][1], []) + self.assertEqual(output[1][2], ['subfile']) + self.assertEqual(os.path.basename(output[2][0]), 'link_to_dir') + self.assertEqual(output[2][1], []) + self.assertEqual(output[2][2], ['subfile']) diff --git a/third_party/python/scandir/winreparse.h b/third_party/python/scandir/winreparse.h new file mode 100644 index 000000000000..66f7775dd2e8 --- /dev/null +++ b/third_party/python/scandir/winreparse.h @@ -0,0 +1,53 @@ +#ifndef Py_WINREPARSE_H +#define Py_WINREPARSE_H + +#ifdef MS_WINDOWS +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* The following structure was copied from + http://msdn.microsoft.com/en-us/library/ff552012.aspx as the required + include doesn't seem to be present in the Windows SDK (at least as included + with Visual Studio Express). */ +typedef struct _REPARSE_DATA_BUFFER { + ULONG ReparseTag; + USHORT ReparseDataLength; + USHORT Reserved; + union { + struct { + USHORT SubstituteNameOffset; + USHORT SubstituteNameLength; + USHORT PrintNameOffset; + USHORT PrintNameLength; + ULONG Flags; + WCHAR PathBuffer[1]; + } SymbolicLinkReparseBuffer; + + struct { + USHORT SubstituteNameOffset; + USHORT SubstituteNameLength; + USHORT PrintNameOffset; + USHORT PrintNameLength; + WCHAR PathBuffer[1]; + } MountPointReparseBuffer; + + struct { + UCHAR DataBuffer[1]; + } GenericReparseBuffer; + }; +} REPARSE_DATA_BUFFER, *PREPARSE_DATA_BUFFER; + +#define REPARSE_DATA_BUFFER_HEADER_SIZE FIELD_OFFSET(REPARSE_DATA_BUFFER,\ + GenericReparseBuffer) +#define MAXIMUM_REPARSE_DATA_BUFFER_SIZE ( 16 * 1024 ) + +#ifdef __cplusplus +} +#endif + +#endif /* MS_WINDOWS */ + +#endif /* !Py_WINREPARSE_H */