From 7630cd417b6632ae5cf775b1777317c9cf2d55ea Mon Sep 17 00:00:00 2001 From: Batuhan Taskaya Date: Tue, 25 May 2021 12:46:31 +0300 Subject: [PATCH 1/6] spec: support exists() on directories pre-cache --- adlfs/spec.py | 4 +++- adlfs/tests/test_spec.py | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/adlfs/spec.py b/adlfs/spec.py index 3402bdef..dc34c76f 100644 --- a/adlfs/spec.py +++ b/adlfs/spec.py @@ -1275,6 +1275,7 @@ async def _exists(self, path): except KeyError: pass + full_path = path container_name, path = self.split_path(path) if not path: @@ -1286,7 +1287,8 @@ async def _exists(self, path): async with self.service_client.get_blob_client(container_name, path) as bc: exists = await bc.exists() - return exists + + return exists or await super()._exists(full_path) async def _pipe_file(self, path, value, overwrite=True, **kwargs): """Set the bytes of given file""" diff --git a/adlfs/tests/test_spec.py b/adlfs/tests/test_spec.py index 6e9db5cb..e8635a44 100644 --- a/adlfs/tests/test_spec.py +++ b/adlfs/tests/test_spec.py @@ -1206,3 +1206,21 @@ def test_exists(storage): assert not fs.exists("non-existent-container/") assert fs.exists("") assert not fs.exists("data/not-a-key") + + +def test_exists_directory(storage): + fs = AzureBlobFileSystem( + account_name=storage.account_name, connection_string=CONN_STR + ) + + fs.mkdir("temp_exists") + fs.touch("temp_exists/data/data.txt") + fs.touch("temp_exists/data/something/data.txt") + fs.invalidate_cache() + + assert fs.exists("temp_exists/data/something/") + assert fs.exists("temp_exists/data/something") + assert fs.exists("temp_exists/data/") + assert fs.exists("temp_exists/data") + assert fs.exists("temp_exists/") + assert fs.exists("temp_exists") From d710efe9703e5e8b4f626018593902aee94202bf Mon Sep 17 00:00:00 2001 From: Batuhan Taskaya Date: Tue, 8 Jun 2021 10:46:53 +0300 Subject: [PATCH 2/6] use list_blobs to find --- adlfs/spec.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/adlfs/spec.py b/adlfs/spec.py index dc34c76f..6e024c14 100644 --- a/adlfs/spec.py +++ b/adlfs/spec.py @@ -1275,8 +1275,8 @@ async def _exists(self, path): except KeyError: pass - full_path = path - container_name, path = self.split_path(path) + full_path = self._strip_protocol(path) + container_name, path = self.split_path(full_path) if not path: if container_name: @@ -1286,9 +1286,17 @@ async def _exists(self, path): return True async with self.service_client.get_blob_client(container_name, path) as bc: - exists = await bc.exists() + if await bc.exists(): + return True - return exists or await super()._exists(full_path) + dir_path = path.lstrip("/") + "/" + async with self.service_client.get_container_client( + container=container_name + ) as container_client: + async for blob in container_client.list_blobs(name_starts_with=dir_path): + return True + else: + return False async def _pipe_file(self, path, value, overwrite=True, **kwargs): """Set the bytes of given file""" From 3626d863bd5a80e66f217d99d411a4e763f129a6 Mon Sep 17 00:00:00 2001 From: Batuhan Taskaya Date: Tue, 8 Jun 2021 10:47:29 +0300 Subject: [PATCH 3/6] set results_per_page=1 to be more efficient --- adlfs/spec.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/adlfs/spec.py b/adlfs/spec.py index 6e024c14..fa67dbcc 100644 --- a/adlfs/spec.py +++ b/adlfs/spec.py @@ -1293,7 +1293,9 @@ async def _exists(self, path): async with self.service_client.get_container_client( container=container_name ) as container_client: - async for blob in container_client.list_blobs(name_starts_with=dir_path): + async for blob in container_client.list_blobs( + results_per_page=1, name_starts_with=dir_path + ): return True else: return False From d9fe4a50eb317a6f6e72fa3310d59316f46f4d8e Mon Sep 17 00:00:00 2001 From: Batuhan Taskaya Date: Tue, 8 Jun 2021 10:50:08 +0300 Subject: [PATCH 4/6] strip the trailing slash --- adlfs/spec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adlfs/spec.py b/adlfs/spec.py index fa67dbcc..b2bd1f80 100644 --- a/adlfs/spec.py +++ b/adlfs/spec.py @@ -1289,7 +1289,7 @@ async def _exists(self, path): if await bc.exists(): return True - dir_path = path.lstrip("/") + "/" + dir_path = path.rstrip("/") + "/" async with self.service_client.get_container_client( container=container_name ) as container_client: From 9d02238e7fb285f724cb6fadbcd011e572de17a4 Mon Sep 17 00:00:00 2001 From: Batuhan Taskaya Date: Tue, 8 Jun 2021 10:50:30 +0300 Subject: [PATCH 5/6] remove redundant strip_proto call --- adlfs/spec.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/adlfs/spec.py b/adlfs/spec.py index b2bd1f80..2bd3cfef 100644 --- a/adlfs/spec.py +++ b/adlfs/spec.py @@ -1275,8 +1275,7 @@ async def _exists(self, path): except KeyError: pass - full_path = self._strip_protocol(path) - container_name, path = self.split_path(full_path) + container_name, path = self.split_path(path) if not path: if container_name: From bebfe964fa90706c1608f23bb96583fd0c89d90b Mon Sep 17 00:00:00 2001 From: Batuhan Taskaya Date: Tue, 8 Jun 2021 10:52:13 +0300 Subject: [PATCH 6/6] add the changelog entry --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index df4ad3b5..6038b766 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,8 @@ **Change Log** +v0.7.5 +------ +- `exists()` calls now also checks whether a directory with that name exists or not. Previously this was only checked from the cache. + v0.7.4 ------ - Added the location_mode parameter to AzureBlobFileSystem object, and set default to "primary" to enable Access Control Lists and RA-GRS access. Valid values are "primary" and "secondary"