From 1d4f348b71653121ff7afd45704644f061259acb Mon Sep 17 00:00:00 2001 From: Israel Martinez Date: Tue, 20 Feb 2024 13:08:33 -0500 Subject: [PATCH 1/3] Add fetch_wasabi_file() utility function. Includes an unit test. --- cosipy/util/__init__.py | 1 + cosipy/util/data_fetching.py | 23 +++++++++++++++++++++++ tests/util/test_data_fetching.py | 8 ++++++++ 3 files changed, 32 insertions(+) create mode 100644 cosipy/util/__init__.py create mode 100644 cosipy/util/data_fetching.py create mode 100644 tests/util/test_data_fetching.py diff --git a/cosipy/util/__init__.py b/cosipy/util/__init__.py new file mode 100644 index 00000000..8289e516 --- /dev/null +++ b/cosipy/util/__init__.py @@ -0,0 +1 @@ +from .data_fetching import fetch_wasabi_file diff --git a/cosipy/util/data_fetching.py b/cosipy/util/data_fetching.py new file mode 100644 index 00000000..f4324ffe --- /dev/null +++ b/cosipy/util/data_fetching.py @@ -0,0 +1,23 @@ +import subprocess, os + +def fetch_wasabi_file(file, + output = None, + override = False, + bucket = 'cosi-pipeline-public', + endpoint = 'https://s3.us-west-1.wasabisys.com', + access_key_id = 'GBAL6XATQZNRV3GFH9Y4', + access_key = 'GToOczY5hGX3sketNO2fUwiq4DJoewzIgvTCHoOv'): + + if output is None: + output = file.split('/')[-1] + + if os.path.exists(output) and not override: + raise RuntimeError(f"File {output} already exists.") + + subprocess.run(['aws', 's3api', 'get-object', + '--bucket', bucket, + '--key', file, + '--endpoint-url', endpoint, + output], + env = os.environ.copy() | {'AWS_ACCESS_KEY_ID':access_key_id, + 'AWS_SECRET_ACCESS_KEY':access_key}) diff --git a/tests/util/test_data_fetching.py b/tests/util/test_data_fetching.py new file mode 100644 index 00000000..7b0d8510 --- /dev/null +++ b/tests/util/test_data_fetching.py @@ -0,0 +1,8 @@ +from cosipy.util import fetch_wasabi_file + +fetch_wasabi_file('test_file.txt', override = True) + +f = open('test_file.txt') + +assert f.read() == 'Small file used for testing purposes.\n' + From 7354b2193edd53a8cac2e6ad88efe6955677bb7c Mon Sep 17 00:00:00 2001 From: Israel Martinez Date: Tue, 20 Feb 2024 15:13:40 -0500 Subject: [PATCH 2/3] Use awscli python instead of calling subprocess. Fixes issue with PATH in Jupyter. Also add docstring --- cosipy/util/data_fetching.py | 45 +++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/cosipy/util/data_fetching.py b/cosipy/util/data_fetching.py index f4324ffe..1d8dce02 100644 --- a/cosipy/util/data_fetching.py +++ b/cosipy/util/data_fetching.py @@ -1,23 +1,46 @@ -import subprocess, os +import os +from awscli.clidriver import create_clidriver def fetch_wasabi_file(file, output = None, override = False, bucket = 'cosi-pipeline-public', endpoint = 'https://s3.us-west-1.wasabisys.com', - access_key_id = 'GBAL6XATQZNRV3GFH9Y4', - access_key = 'GToOczY5hGX3sketNO2fUwiq4DJoewzIgvTCHoOv'): + access_key = 'GBAL6XATQZNRV3GFH9Y4', + secret_key = 'GToOczY5hGX3sketNO2fUwiq4DJoewzIgvTCHoOv'): + """ + Download a file from COSI's Wasabi acccount. + Parameters + ---------- + file: str + Full path to file in Wasabi + output: str + Full path to the downloaded file in the local system. By default it will use + the current durectory and the same file name as the input file. + bucket: str + Passed to aws --bucket option + endpoint: str: + Passed to aws --endpoint-url option + access_key: str + AWS_ACCESS_KEY_ID + secret_key: str + AWS_SECRET_ACCESS_KEY + """ + if output is None: output = file.split('/')[-1] if os.path.exists(output) and not override: raise RuntimeError(f"File {output} already exists.") - - subprocess.run(['aws', 's3api', 'get-object', - '--bucket', bucket, - '--key', file, - '--endpoint-url', endpoint, - output], - env = os.environ.copy() | {'AWS_ACCESS_KEY_ID':access_key_id, - 'AWS_SECRET_ACCESS_KEY':access_key}) + + cli = create_clidriver() + + cli.session.set_credentials(access_key, secret_key) + + cli.main(['s3api', 'get-object', + '--bucket', bucket, + '--key', file, + '--endpoint-url', endpoint, + output]) + From a8af8c45c738f0759cc60eb1484de9723f8c7984 Mon Sep 17 00:00:00 2001 From: Israel Martinez Date: Tue, 20 Feb 2024 15:58:26 -0500 Subject: [PATCH 3/3] Add wasabi function to to docs --- cosipy/util/data_fetching.py | 12 ++++++------ docs/api/index.rst | 2 +- docs/api/util.rst | 4 ++++ docs/conf.py | 3 ++- 4 files changed, 13 insertions(+), 8 deletions(-) create mode 100644 docs/api/util.rst diff --git a/cosipy/util/data_fetching.py b/cosipy/util/data_fetching.py index 1d8dce02..bef3e66c 100644 --- a/cosipy/util/data_fetching.py +++ b/cosipy/util/data_fetching.py @@ -13,18 +13,18 @@ def fetch_wasabi_file(file, Parameters ---------- - file: str + file : str Full path to file in Wasabi - output: str + output : str, optional Full path to the downloaded file in the local system. By default it will use the current durectory and the same file name as the input file. - bucket: str + bucket : str, optional Passed to aws --bucket option - endpoint: str: + endpoint : str, optional Passed to aws --endpoint-url option - access_key: str + access_key : str, optional AWS_ACCESS_KEY_ID - secret_key: str + secret_key : str, optional AWS_SECRET_ACCESS_KEY """ diff --git a/docs/api/index.rst b/docs/api/index.rst index bccae241..79f566d5 100644 --- a/docs/api/index.rst +++ b/docs/api/index.rst @@ -18,6 +18,6 @@ If you are instead interested in an overview on how to use cosipy, see out `tuto threeml ts_map image_deconvolution + util - diff --git a/docs/api/util.rst b/docs/api/util.rst new file mode 100644 index 00000000..962a8863 --- /dev/null +++ b/docs/api/util.rst @@ -0,0 +1,4 @@ +Utilities +========= + +.. autofunction:: cosipy.util.fetch_wasabi_file diff --git a/docs/conf.py b/docs/conf.py index 57a82fdb..3d93c3f6 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -65,7 +65,8 @@ 'scoords', 'pandas', 'tqdm', - 'scipy'] + 'scipy', + 'awscli'] # There seems to be a conflict between unittest.mock (used by sphinx) and metaclasses # The cosipy.threeml.custom_functions.Band_Eflux includes a metaclass from