Skip to content

Commit

Permalink
fix: allow protocol prefixed paths for webhdfs (#1761)
Browse files Browse the repository at this point in the history
  • Loading branch information
buckwheat445 authored Dec 11, 2024
1 parent 5ac1500 commit c36066c
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 1 deletion.
13 changes: 13 additions & 0 deletions fsspec/implementations/tests/test_webhdfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,3 +195,16 @@ def test_write_read_verify_file_with_equals(hdfs_cluster):
assert len(file_info) == 1
assert file_info[0]["name"] == file_path
assert file_info[0]["size"] == len(content)


def test_protocol_prefixed_path(hdfs_cluster):
fs = WebHDFS(
hdfs_cluster, user="testuser", data_proxy={"worker.example.com": "localhost"}
)
protocol_prefixed_path = "webhdfs://localhost:50070/user/testuser/test_dir"

fs.mkdir(protocol_prefixed_path)
assert fs.exists(protocol_prefixed_path)

file_info = fs.ls(protocol_prefixed_path, detail=True)
assert len(file_info) == 0
3 changes: 2 additions & 1 deletion fsspec/implementations/webhdfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,8 @@ def _connect(self):
self.session.auth = HTTPBasicAuth(self.user, self.password)

def _call(self, op, method="get", path=None, data=None, redirect=True, **kwargs):
url = self._apply_proxy(self.url + quote(path or "", safe="/="))
path = self._strip_protocol(path) if path is not None else ""
url = self._apply_proxy(self.url + quote(path, safe="/="))
args = kwargs.copy()
args.update(self.pars)
args["op"] = op.upper()
Expand Down

0 comments on commit c36066c

Please sign in to comment.