From 8f1fff4dab430ec0c7b0ecf373561981f51f00c7 Mon Sep 17 00:00:00 2001 From: Daniel Tsiang <74436899+DanielTsiang@users.noreply.github.com> Date: Fri, 27 Oct 2023 00:31:33 +0100 Subject: [PATCH 1/3] Catch PermissionError in fsspec.core when attempting to auto_mkdir Catch `PermissionError` in fsspec.core when attempting to auto_mkdir for parent dir, as the user may not have permission to create the parent dir. This should hopefully allow the user to carry on without error if the parent dir already exists. If not, a later exception should be thrown when trying to write to a file when the parent dir doesn't exist. This attempts to address the following issue: * https://github.com/fsspec/filesystem_spec/issues/1404 Comments on how to improve this PR are welcome, as this is my first time contributing to the repo. --- fsspec/core.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fsspec/core.py b/fsspec/core.py index 23c0db535..9b0c8daa2 100644 --- a/fsspec/core.py +++ b/fsspec/core.py @@ -290,7 +290,11 @@ def open_files( fs.auto_mkdir = auto_mkdir elif "r" not in mode and auto_mkdir: parents = {fs._parent(path) for path in paths} - [fs.makedirs(parent, exist_ok=True) for parent in parents] + try: + [fs.makedirs(parent, exist_ok=True) for parent in parents] + except PermissionError: + # may not have permission to make parent dir + pass return OpenFiles( [ OpenFile( From 230786e668278c02bf82cdf4124245105b21e900 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 2 Nov 2023 10:11:00 -0400 Subject: [PATCH 2/3] Update fsspec/core.py --- fsspec/core.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fsspec/core.py b/fsspec/core.py index 9b0c8daa2..dd5e9c4f0 100644 --- a/fsspec/core.py +++ b/fsspec/core.py @@ -290,11 +290,11 @@ def open_files( fs.auto_mkdir = auto_mkdir elif "r" not in mode and auto_mkdir: parents = {fs._parent(path) for path in paths} - try: - [fs.makedirs(parent, exist_ok=True) for parent in parents] - except PermissionError: - # may not have permission to make parent dir - pass + for parent in parents: + try: + fs.makedirs(parent, exist_ok=True) + except PermissionError: + pass return OpenFiles( [ OpenFile( From b17221628922ab7898986ea8bc39028ae56ea491 Mon Sep 17 00:00:00 2001 From: Daniel Tsiang <74436899+DanielTsiang@users.noreply.github.com> Date: Sun, 5 Nov 2023 23:30:00 +0000 Subject: [PATCH 3/3] Merge master branch of filesystem_spec (#6) Merge master branch of filesystem_spec into PR branch --------- Co-authored-by: Martin Durant Co-authored-by: Guido Diepen Co-authored-by: Martin Durant --- .github/workflows/main.yaml | 32 +++++++++++++++---------------- ci/environment-friends.yml | 2 +- ci/environment-py38.yml | 2 +- ci/environment-typecheck.yml | 4 +++- ci/environment-win.yml | 2 +- fsspec/generic.py | 4 ++++ fsspec/implementations/webhdfs.py | 30 +++++++++++++++++++++++++---- fsspec/utils.py | 6 +++--- pyproject.toml | 2 +- setup.cfg | 4 ++-- 10 files changed, 58 insertions(+), 30 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 590fafead..dd3a8d3fe 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -71,22 +71,22 @@ jobs: python-version: "3.11" - uses: pre-commit/action@main - typecheck: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v3 - - - name: Setup conda - uses: mamba-org/setup-micromamba@v1 - with: - environment-file: ci/environment-typecheck.yml - - - name: mypy - shell: bash -l {0} - run: | - mypy fsspec - +# typecheck: +# runs-on: ubuntu-latest +# steps: +# - name: Checkout +# uses: actions/checkout@v3 +# +# - name: Setup conda +# uses: mamba-org/setup-micromamba@v1 +# with: +# environment-file: ci/environment-typecheck.yml +# +# - name: mypy +# shell: bash -l {0} +# run: | +# mypy fsspec +# downstream: name: downstream runs-on: ubuntu-latest diff --git a/ci/environment-friends.yml b/ci/environment-friends.yml index 1d6b7d61b..b2b3c1b25 100644 --- a/ci/environment-friends.yml +++ b/ci/environment-friends.yml @@ -4,7 +4,7 @@ channels: dependencies: - python=3.9 - pytest - - pytest-asyncio + - pytest-asyncio !=0.22.0 - pytest-benchmark - pytest-cov - pytest-mock diff --git a/ci/environment-py38.yml b/ci/environment-py38.yml index 6bd0e1afb..41ee00898 100644 --- a/ci/environment-py38.yml +++ b/ci/environment-py38.yml @@ -21,7 +21,7 @@ dependencies: - pyftpdlib - cloudpickle - pytest - - pytest-asyncio + - pytest-asyncio !=0.22.0 - pytest-benchmark - pytest-cov - pytest-mock diff --git a/ci/environment-typecheck.yml b/ci/environment-typecheck.yml index 59e6abdf5..1b7c482f5 100644 --- a/ci/environment-typecheck.yml +++ b/ci/environment-typecheck.yml @@ -2,7 +2,7 @@ name: test_env channels: - conda-forge dependencies: - - mypy=1.3 + - mypy=1.4.1 - pyarrow - python=3.8 - pip @@ -10,4 +10,6 @@ dependencies: - types-paramiko - types-requests - types-tqdm + - types-paramiko + - types-PyYAML - types-ujson diff --git a/ci/environment-win.yml b/ci/environment-win.yml index 8d22ffc3a..e621fff90 100644 --- a/ci/environment-win.yml +++ b/ci/environment-win.yml @@ -14,7 +14,7 @@ dependencies: - fastparquet - pandas - pytest - - pytest-asyncio + - pytest-asyncio !=0.22.0 - pytest-benchmark - pytest-cov - pytest-mock diff --git a/fsspec/generic.py b/fsspec/generic.py index 85301cc5f..290bb436a 100644 --- a/fsspec/generic.py +++ b/fsspec/generic.py @@ -171,6 +171,10 @@ def __init__(self, default_method="default", **kwargs): self.method = default_method super().__init__(**kwargs) + def _parent(self, path): + fs = _resolve_fs(path, self.method) + return fs.unstrip_protocol(fs._parent(path)) + def _strip_protocol(self, path): # normalization only fs = _resolve_fs(path, self.method) diff --git a/fsspec/implementations/webhdfs.py b/fsspec/implementations/webhdfs.py index 2a57170ea..53e0df53b 100644 --- a/fsspec/implementations/webhdfs.py +++ b/fsspec/implementations/webhdfs.py @@ -21,7 +21,7 @@ class WebHDFS(AbstractFileSystem): """ Interface to HDFS over HTTP using the WebHDFS API. Supports also HttpFS gateways. - Three auth mechanisms are supported: + Four auth mechanisms are supported: insecure: no auth is done, and the user is assumed to be whoever they say they are (parameter ``user``), or a predefined value such as @@ -34,6 +34,8 @@ class WebHDFS(AbstractFileSystem): service. Indeed, this client can also generate such tokens when not insecure. Note that tokens expire, but can be renewed (by a previously specified user) and may allow for proxying. + basic-auth: used when both parameter ``user`` and parameter ``password`` + are provided. """ @@ -47,6 +49,7 @@ def __init__( kerberos=False, token=None, user=None, + password=None, proxy_to=None, kerb_kwargs=None, data_proxy=None, @@ -68,6 +71,9 @@ def __init__( given user: str or None If given, assert the user name to connect with + password: str or None + If given, assert the password to use for basic auth. If password + is provided, user must be provided also proxy_to: str or None If given, the user has the authority to proxy, and this value is the user in who's name actions are taken @@ -102,8 +108,19 @@ def __init__( " token" ) self.pars["delegation"] = token - if user is not None: - self.pars["user.name"] = user + self.user = user + self.password = password + + if password is not None: + if user is None: + raise ValueError( + "If passing a password, the user must also be" + "set in order to set up the basic-auth" + ) + else: + if user is not None: + self.pars["user.name"] = user + if proxy_to is not None: self.pars["doas"] = proxy_to if kerberos and user is not None: @@ -126,8 +143,13 @@ def _connect(self): self.session.auth = HTTPKerberosAuth(**self.kerb_kwargs) + if self.user is not None and self.password is not None: + from requests.auth import HTTPBasicAuth + + self.session.auth = HTTPBasicAuth(self.user, self.password) + def _call(self, op, method="get", path=None, data=None, redirect=True, **kwargs): - url = self.url + quote(path or "") + url = self._apply_proxy(self.url + quote(path or "")) args = kwargs.copy() args.update(self.pars) args["op"] = op.upper() diff --git a/fsspec/utils.py b/fsspec/utils.py index 38d878a06..34f1ad821 100644 --- a/fsspec/utils.py +++ b/fsspec/utils.py @@ -320,7 +320,7 @@ def tokenize(*args: Any, **kwargs: Any) -> str: h = md5(str(args).encode()) except ValueError: # FIPS systems: https://github.com/fsspec/filesystem_spec/issues/380 - h = md5(str(args).encode(), usedforsecurity=False) # type: ignore[call-arg] + h = md5(str(args).encode(), usedforsecurity=False) return h.hexdigest() @@ -631,7 +631,7 @@ def atomic_write(path: str, mode: str = "wb"): def _translate(pat, STAR, QUESTION_MARK): # Copied from: https://github.com/python/cpython/pull/106703. - res = [] + res: list[str] = [] add = res.append i, n = 0, len(pat) while i < n: @@ -709,7 +709,7 @@ def glob_translate(pat): # recursive=True, include_hidden=True, seps=None """Translate a pathname with shell wildcards to a regular expression.""" if os.path.altsep: - seps = (os.path.sep, os.path.altsep) + seps = os.path.sep + os.path.altsep else: seps = os.path.sep escaped_seps = "".join(map(re.escape, seps)) diff --git a/pyproject.toml b/pyproject.toml index cfa8bbaf9..ab9055633 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [tool.black] -target_version = ['py37', 'py38'] +target_version = ['py310'] line-length = 88 skip-string-normalization = false exclude = ''' diff --git a/setup.cfg b/setup.cfg index 42e7ad282..67467cd20 100644 --- a/setup.cfg +++ b/setup.cfg @@ -43,8 +43,8 @@ follow_imports = normal ignore_missing_imports = True enable_error_code = ignore-without-code,truthy-bool,truthy-iterable,unused-awaitable -disallow_untyped_decorators = True -strict_equality = True +disallow_untyped_decorators = False +strict_equality = False warn_redundant_casts = True warn_unused_configs = True warn_unused_ignores = True