From 9be6e9c153c596b9fa8a90259b66cf61688fda18 Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Thu, 31 Oct 2024 13:25:45 -0600 Subject: [PATCH 01/13] Add `harmony-py` as a dependency --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 1ff3a8824..fc5e71b16 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,7 @@ fiona geopandas h5netcdf h5py +harmony-py holoviews hvplot matplotlib From 192cee254b92665238a8952ef15e882915c57037 Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Thu, 31 Oct 2024 17:47:18 -0600 Subject: [PATCH 02/13] Re-write `get_capabilities` to use `harmony-py` --- icepyx/core/harmony.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/icepyx/core/harmony.py b/icepyx/core/harmony.py index 8d03ecc5d..cc812b008 100644 --- a/icepyx/core/harmony.py +++ b/icepyx/core/harmony.py @@ -1,13 +1,14 @@ from typing import Any -import requests - -from icepyx.core.urls import CAPABILITIES_BASE_URL +import harmony def get_capabilities(concept_id: str) -> dict[str, Any]: - response = requests.get( - CAPABILITIES_BASE_URL, - params={"collectionId": concept_id}, - ) - return response.json() + capabilities_request = harmony.CapabilitiesRequest(concept_id=concept_id) + # TODO: This will work if the user has a .netrc file available but the other + # auth options might fail. We might need to add harmony client auth to the + # icepyx auth package. + harmony_client = harmony.Client() + response = harmony_client.submit(capabilities_request) + + return response From e7ff043d51c293b48d09c2ed685c12487fdc6bd4 Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Mon, 11 Nov 2024 14:57:31 -0700 Subject: [PATCH 03/13] Harmony api: use `EarthdataAuthMixin` --- icepyx/core/harmony.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/icepyx/core/harmony.py b/icepyx/core/harmony.py index cc812b008..33274a5ac 100644 --- a/icepyx/core/harmony.py +++ b/icepyx/core/harmony.py @@ -2,13 +2,22 @@ import harmony +from icepyx.core.auth import EarthdataAuthMixin -def get_capabilities(concept_id: str) -> dict[str, Any]: - capabilities_request = harmony.CapabilitiesRequest(concept_id=concept_id) - # TODO: This will work if the user has a .netrc file available but the other - # auth options might fail. We might need to add harmony client auth to the - # icepyx auth package. - harmony_client = harmony.Client() - response = harmony_client.submit(capabilities_request) - return response +class HarmonyApi(EarthdataAuthMixin): + def __init__(self): + # initialize authentication properties + EarthdataAuthMixin.__init__(self) + self.harmony_client = harmony.Client( + auth=( + self.auth.username, + self.auth.password, + ), + ) + + def get_capabilities(self, concept_id: str) -> dict[str, Any]: + capabilities_request = harmony.CapabilitiesRequest(concept_id=concept_id) + response = self.harmony_client.submit(capabilities_request) + + return response From d80a4ce1ef1b9afa908b2baeaf7676c3450adc13 Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Tue, 12 Nov 2024 14:28:43 -0700 Subject: [PATCH 04/13] WIP begin implementing use of harmony-py Subset orders can now be submitted and the job completes. Some next steps: * Support downloads from harmony * Support non-subset downloads. This might be a little more complicated, as it does not appear that the harmony API can support non-subset orders (it always uses spatial/temporal constraints to do subsetting, not just filtering). So we may need to use `earthaccess` to download our granule list instead. * Remove code supporting variable subsetting. This is not supported by Harmony, but could be in the future. --- icepyx/core/APIformatting.py | 28 ++--- icepyx/core/granules.py | 227 ++++++++++------------------------- icepyx/core/harmony.py | 43 ++++++- icepyx/core/query.py | 35 +++--- icepyx/core/spatial.py | 58 +++++++++ icepyx/core/types/api.py | 9 +- 6 files changed, 201 insertions(+), 199 deletions(-) diff --git a/icepyx/core/APIformatting.py b/icepyx/core/APIformatting.py index dc20c6878..307084057 100644 --- a/icepyx/core/APIformatting.py +++ b/icepyx/core/APIformatting.py @@ -1,13 +1,12 @@ """Generate and format information for submitting to API (CMR and NSIDC).""" import datetime as dt -from typing import Any, Generic, Literal, Optional, TypeVar, Union, overload +from typing import Any, Generic, Literal, Optional, TypeVar, overload from icepyx.core.exceptions import ExhaustiveTypeGuardException, TypeGuardException -from icepyx.core.types import ( +from icepyx.core.harmony import HarmonyTemporal +from icepyx.core.types.api import ( CMRParams, - EGIParamsSubset, - EGIRequiredParams, ) # ---------------------------------------------------------------------- @@ -38,18 +37,17 @@ def _fmt_temporal(start, end, key): assert isinstance(start, dt.datetime) assert isinstance(end, dt.datetime) - if key == "temporal": + if key == "temporal": # search option. fmt_timerange = ( start.strftime("%Y-%m-%dT%H:%M:%SZ") + "," + end.strftime("%Y-%m-%dT%H:%M:%SZ") ) - elif key == "time": - fmt_timerange = ( - start.strftime("%Y-%m-%dT%H:%M:%S") - + "," - + end.strftime("%Y-%m-%dT%H:%M:%S") - ) + elif key == "time": # subsetting option. + # Format for harmony. This will do subsetting. + # TODO: change `key` to something more clear. `temporal` is the key + # passed into Harmony, so this is very confusing! + fmt_timerange: HarmonyTemporal = {"start": start, "stop": end} else: raise ValueError("An invalid time key was submitted for formatting.") @@ -212,20 +210,22 @@ def __get__( self, instance: 'Parameters[Literal["required"]]', owner: Any, - ) -> EGIRequiredParams: ... + ): # -> EGIRequiredParams: ... + ... @overload def __get__( self, instance: 'Parameters[Literal["subset"]]', owner: Any, - ) -> EGIParamsSubset: ... + ): # -> EGIParamsSubset: ... + ... def __get__( self, instance: "Parameters", owner: Any, - ) -> Union[CMRParams, EGIRequiredParams, EGIParamsSubset]: + ) -> CMRParams: """ Returns the dictionary of formatted keys associated with the parameter object. diff --git a/icepyx/core/granules.py b/icepyx/core/granules.py index d6a519048..286767b7d 100644 --- a/icepyx/core/granules.py +++ b/icepyx/core/granules.py @@ -8,24 +8,24 @@ import re import time from typing import Union -from xml.etree import ElementTree as ET import zipfile import numpy as np import requests -from requests.compat import unquote import icepyx.core.APIformatting as apifmt from icepyx.core.auth import EarthdataAuthMixin -from icepyx.core.cmr import CMR_PROVIDER +from icepyx.core.cmr import CMR_PROVIDER, get_concept_id import icepyx.core.exceptions -from icepyx.core.types import ( +from icepyx.core.harmony import HarmonyApi +from icepyx.core.types.api import ( CMRParams, - EGIRequiredParamsDownload, - EGIRequiredParamsSearch, ) -from icepyx.core.urls import DOWNLOAD_BASE_URL, GRANULE_SEARCH_BASE_URL, ORDER_BASE_URL -from icepyx.uat import EDL_ACCESS_TOKEN +from icepyx.core.urls import DOWNLOAD_BASE_URL, GRANULE_SEARCH_BASE_URL + +# TODO: mix this into existing classes rather than declaring as a global +# variable. +HARMONY_API = HarmonyApi() def info(grans: list[dict]) -> dict[str, Union[int, float]]: @@ -191,7 +191,6 @@ def __init__( def get_avail( self, CMRparams: CMRParams, - reqparams: EGIRequiredParamsSearch, cloud: bool = False, ): """ @@ -222,9 +221,7 @@ def get_avail( query.Query.avail_granules """ - assert ( - CMRparams is not None and reqparams is not None - ), "Missing required input parameter dictionaries" + assert CMRparams is not None, "Missing required input parameter dictionary" # if not hasattr(self, 'avail'): self.avail = [] @@ -232,14 +229,12 @@ def get_avail( headers = { "Accept": "application/json", "Client-Id": "icepyx", - "Authorization": f"Bearer {EDL_ACCESS_TOKEN}", } # note we should also check for errors whenever we ping NSIDC-API - # make a function to check for errors params = apifmt.combine_params( CMRparams, - {k: reqparams[k] for k in ["short_name", "version", "page_size"]}, {"provider": CMR_PROVIDER}, ) @@ -292,7 +287,7 @@ def get_avail( def place_order( self, CMRparams: CMRParams, - reqparams: EGIRequiredParamsDownload, + reqparams, # : EGIRequiredParamsDownload, subsetparams, verbose, subset=True, @@ -337,10 +332,13 @@ def place_order( -------- query.Query.order_granules """ - raise icepyx.core.exceptions.RefactoringException + # raise icepyx.core.exceptions.RefactoringException self.get_avail(CMRparams, reqparams) + # TODO: the harmony API may not support non-subsetting. So we may need + # to provide a list of granules for harmony to download, or use a + # different API. if subset is False: request_params = apifmt.combine_params( CMRparams, reqparams, {"agent": "NO"} @@ -348,158 +346,63 @@ def place_order( else: request_params = apifmt.combine_params(CMRparams, reqparams, subsetparams) - order_fn = ".order_restart" + concept_id = get_concept_id( + product=request_params["short_name"], version=request_params["version"] + ) - total_pages = int(np.ceil(len(self.avail) / reqparams["page_size"])) - print( - "Total number of data order requests is ", - total_pages, - " for ", - len(self.avail), - " granules.", + # TODO: At this point, the request parameters have been formatted into + # strings. `harmony-py` expects python objects (e.g., `dt.datetime` for + # temporal values) + + # Place the order. + # TODO: there are probably other options we want to more generically + # expose here. E.g., instead of just accepting a `bounding_box` of a + # particular flavor, we want to be able to pass in a polygon? + job_id = HARMONY_API.place_order( + concept_id=concept_id, + # TODO: why the double-nested bbox dict here? + bounding_box=subsetparams["bbox"]["bbox"], + temporal=subsetparams["time"], ) - if reqparams["page_num"] > 0: - pagenums = [reqparams["page_num"]] - else: - pagenums = range(1, total_pages + 1) + # TODO/Question: should this be changed from a list to a single value? + # There will only be one harmony job per request (I think) + self.orderIDs = [job_id] + order_fn = ".order_restart" + with open(order_fn, "w") as fid: + json.dump({"orderIDs": self.orderIDs}, fid) - for page_num in pagenums: + print("order ID: ", job_id) + status = HARMONY_API.check_order_status(job_id) + print("Initial status of your harmony order request: ", status["status"]) + # TODO: confirm which status responses we might expect. "running", + # "paused", or "canceled" are documented here: + # https://harmony.earthdata.nasa.gov/docs#getting-job-status + # I have also seen `running` and `running_with_errors`. + while status["status"].startswith("running"): print( - "Data request ", - page_num, - " of ", - total_pages, - " is submitting to NSIDC", + "Your harmony order status is still ", + status["status"], + ". Please continue waiting... this may take a few moments.", ) - breakpoint() - request_params.update({"page_num": page_num}) - - request = self.session.get(ORDER_BASE_URL, params=request_params) - - # DevGoal: use the request response/number to do some error handling/ - # give the user better messaging for failures - # print(request.content) - # root = ET.fromstring(request.content) - # print([subset_agent.attrib for subset_agent in root.iter('SubsetAgent')]) - - if verbose is True: - print("Request HTTP response: ", request.status_code) - # print('Order request URL: ', request.url) - - # Raise bad request: Loop will stop for bad response code. - request.raise_for_status() - esir_root = ET.fromstring(request.content) - if verbose is True: - print("Order request URL: ", unquote(request.url)) - print( - "Order request response XML content: ", - request.content.decode("utf-8"), - ) - - # Look up order ID - orderlist = [] - for order in esir_root.findall("./order/"): - # if verbose is True: - # print(order) - orderlist.append(order.text) - orderID = orderlist[0] - print("order ID: ", orderID) - - # Create status URL - statusURL = f"{ORDER_BASE_URL}/{orderID}" - if verbose is True: - print("status URL: ", statusURL) - - # Find order status - request_response = self.session.get(statusURL) - if verbose is True: - print( - "HTTP response from order response URL: ", - request_response.status_code, - ) - - # Raise bad request: Loop will stop for bad response code. - request_response.raise_for_status() - request_root = ET.fromstring(request_response.content) - statuslist = [] - for status in request_root.findall("./requestStatus/"): - statuslist.append(status.text) - status = statuslist[0] - print("Initial status of your order request at NSIDC is: ", status) - - loop_root = None - # If status is already finished without going into pending/processing - if status.startswith("complete"): - loop_response = self.session.get(statusURL) - loop_root = ET.fromstring(loop_response.content) - - # Continue loop while request is still processing - while status == "pending" or status == "processing": - print( - "Your order status is still ", - status, - " at NSIDC. Please continue waiting... this may take a few moments.", - ) - # print('Status is not complete. Trying again') - time.sleep(10) - loop_response = self.session.get(statusURL) - - # Raise bad request: Loop will stop for bad response code. - loop_response.raise_for_status() - loop_root = ET.fromstring(loop_response.content) - - # find status - statuslist = [] - for status in loop_root.findall("./requestStatus/"): - statuslist.append(status.text) - status = statuslist[0] - # print('Retry request status is: ', status) - if status == "pending" or status == "processing": - continue - - if not isinstance(loop_root, ET.Element): - # The typechecker needs help knowing that at this point loop_root is - # set, as it can't tell that the conditionals above are supposed to be - # exhaustive. - raise icepyx.core.exceptions.ExhaustiveTypeGuardException - - # Order can either complete, complete_with_errors, or fail: - # Provide complete_with_errors error message: - if status == "complete_with_errors" or status == "failed": - messagelist = [] - for message in loop_root.findall("./processInfo/"): - messagelist.append(message.text) - print("Your order is: ", status) - print("NSIDC provided these error messages:") - pprint.pprint(messagelist) - - if status == "complete" or status == "complete_with_errors": - print("Your order is:", status) - messagelist = [] - for message in loop_root.findall("./processInfo/info"): - messagelist.append(message.text) - if messagelist != []: - print("NSIDC returned these messages") - pprint.pprint(messagelist) - if not hasattr(self, "orderIDs"): - self.orderIDs = [] - - self.orderIDs.append(orderID) - else: - print("Request failed.") - - # DevGoal: save orderIDs more frequently than just at the end for large orders - # (e.g. for len(reqparams['page_num']) > 5 or 10 or something) - # Save orderIDs to file to avoid resubmitting order in case kernel breaks down. - # save orderIDs for every 5 orders when more than 10 orders are submitted. - if reqparams["page_num"] >= 10: - with open(order_fn, "w") as fid: - json.dump({"orderIDs": self.orderIDs}, fid) - - # --- Output the final orderIDs - with open(order_fn, "w") as fid: - json.dump({"orderIDs": self.orderIDs}, fid) + # Requesting the status too often can result in a 500 error. + time.sleep(5) + status = HARMONY_API.check_order_status(job_id) + + if status["status"] == "complete_with_errors" or status["status"] == "failed": + print("Your order is: ", status["status"]) + print("Harmony provided these error messages:") + pprint.pprint(status["errors"]) + + # TODO: consider always printing the status message. There's no need for + # this check, and the message is relevant regardless of if there are + # errors or not. We could check for a failure status instead. + if status["status"] == "complete" or status["status"] == "complete_with_errors": + print("Your order is:", status["status"]) + print("Harmony returned this message:") + pprint.pprint(status["message"]) + else: + print(f"Request failed with status {status['status']}.") return self.orderIDs diff --git a/icepyx/core/harmony.py b/icepyx/core/harmony.py index 33274a5ac..270083c7e 100644 --- a/icepyx/core/harmony.py +++ b/icepyx/core/harmony.py @@ -1,10 +1,16 @@ -from typing import Any +import datetime as dt +from typing import Any, NotRequired, TypedDict, Union import harmony from icepyx.core.auth import EarthdataAuthMixin +class HarmonyTemporal(TypedDict): + start: NotRequired[dt.datetime] + stop: NotRequired[dt.datetime] + + class HarmonyApi(EarthdataAuthMixin): def __init__(self): # initialize authentication properties @@ -21,3 +27,38 @@ def get_capabilities(self, concept_id: str) -> dict[str, Any]: response = self.harmony_client.submit(capabilities_request) return response + + def place_order( + self, + concept_id: str, + # These are optional subset parameters + bounding_box: Union[harmony.BBox, None] = None, + temporal: Union[HarmonyTemporal, None] = None, + ) -> str: + """Places a Harmony order with the given parameters. + + Return a string representing a job ID. + + TODO/Question: it looks like this code will always use the provided + parameters to do subsetting. Are there cases where we just want the data + downloaded as whole granules? If so, we may need to use another API to + do so? + """ + collection = harmony.Collection(id=concept_id) + request = harmony.Request( + collection=collection, + spatial=bounding_box, + temporal=temporal, + ) + + if not request.is_valid(): + # TODO: consider more specific error class & message + raise RuntimeError("Failed to create valid request") + + job_id = self.harmony_client.submit(request) + + return job_id + + def check_order_status(self, job_id: str): + status = self.harmony_client.status(job_id) + return status diff --git a/icepyx/core/query.py b/icepyx/core/query.py index 573ca8b1b..5f93b0375 100644 --- a/icepyx/core/query.py +++ b/icepyx/core/query.py @@ -1,7 +1,7 @@ import datetime as dt from functools import cached_property import pprint -from typing import Optional, Union, cast +from typing import Optional, Union import geopandas as gpd import holoviews as hv @@ -17,11 +17,8 @@ import icepyx.core.is2ref as is2ref import icepyx.core.spatial as spat import icepyx.core.temporal as tp -from icepyx.core.types import ( +from icepyx.core.types.api import ( CMRParams, - EGIParamsSubset, - EGIRequiredParams, - EGIRequiredParamsDownload, ) import icepyx.core.validate_inputs as val from icepyx.core.variables import Variables as Variables @@ -597,7 +594,7 @@ def CMRparams(self) -> CMRParams: return self._CMRparams.fmted_keys @property - def reqparams(self) -> EGIRequiredParams: + def reqparams(self): # -> EGIRequiredParams: """ Display the required key:value pairs that will be submitted. It generates the dictionary if it does not already exist. @@ -613,8 +610,6 @@ def reqparams(self) -> EGIRequiredParams: >>> reg_a.reqparams # doctest: +SKIP {'short_name': 'ATL06', 'version': '006', 'page_size': 2000, 'page_num': 1, 'request_mode': 'async', 'include_meta': 'Y', 'client_string': 'icepyx'} """ - raise RefactoringException - if not hasattr(self, "_reqparams"): self._reqparams = apifmt.Parameters("required", reqtype="search") self._reqparams.build_params(product=self.product, version=self._version) @@ -624,7 +619,7 @@ def reqparams(self) -> EGIRequiredParams: # @property # DevQuestion: if I make this a property, I get a "dict" object is not callable # when I try to give input kwargs... what approach should I be taking? - def subsetparams(self, **kwargs) -> Union[EGIParamsSubset, dict[Never, Never]]: + def subsetparams(self, **kwargs): # -> Union[EGIParamsSubset, dict[Never, Never]]: """ Display the subsetting key:value pairs that will be submitted. It generates the dictionary if it does not already exist @@ -650,7 +645,7 @@ def subsetparams(self, **kwargs) -> Union[EGIParamsSubset, dict[Never, Never]]: {'time': '2019-02-20T00:00:00,2019-02-28T23:59:59', 'bbox': '-55.0,68.0,-48.0,71.0'} """ - raise RefactoringException + # raise RefactoringException if not hasattr(self, "_subsetparams"): self._subsetparams = apifmt.Parameters("subset") @@ -665,6 +660,7 @@ def subsetparams(self, **kwargs) -> Union[EGIParamsSubset, dict[Never, Never]]: else: # If the user has supplied a subset list of variables, append the # icepyx required variables to the Coverage dict + # TODO: this is not supported in Harmony. if "Coverage" in kwargs: var_list = [ "orbit_info/sc_orient", @@ -690,13 +686,13 @@ def subsetparams(self, **kwargs) -> Union[EGIParamsSubset, dict[Never, Never]]: self._subsetparams.build_params( geom_filepath=self._spatial._geom_file, extent_type=self._spatial._ext_type, - spatial_extent=self._spatial.fmt_for_EGI(), + spatial_extent=self._spatial.fmt_for_harmony(), **kwargs, ) else: self._subsetparams.build_params( extent_type=self._spatial._ext_type, - spatial_extent=self._spatial.fmt_for_EGI(), + spatial_extent=self._spatial.fmt_for_harmony(), **kwargs, ) @@ -1024,12 +1020,13 @@ def order_granules( . Retry request status is: complete """ - breakpoint() - raise RefactoringException - - if not hasattr(self, "reqparams"): - self.reqparams + # breakpoint() + # raise RefactoringException + # TODO: this probably shouldn't be mutated based on which method is being called... + # It is also very confusing to have both `self.reqparams` and + # `self._reqparams`, each of which does something different! + self.reqparams if self._reqparams._reqtype == "search": self._reqparams._reqtype = "download" @@ -1065,7 +1062,7 @@ def order_granules( tempCMRparams["readable_granule_name[]"] = gran self.granules.place_order( tempCMRparams, - cast(EGIRequiredParamsDownload, self.reqparams), + self.reqparams, self.subsetparams(**kwargs), verbose, subset, @@ -1075,7 +1072,7 @@ def order_granules( else: self.granules.place_order( self.CMRparams, - cast(EGIRequiredParamsDownload, self.reqparams), + self.reqparams, self.subsetparams(**kwargs), verbose, subset, diff --git a/icepyx/core/spatial.py b/icepyx/core/spatial.py index fef61846f..b282b559b 100644 --- a/icepyx/core/spatial.py +++ b/icepyx/core/spatial.py @@ -4,6 +4,7 @@ import warnings import geopandas as gpd +import harmony import numpy as np from numpy.typing import NDArray from shapely.geometry import Polygon, box @@ -821,3 +822,60 @@ def fmt_for_EGI(self) -> str: else: raise icepyx.core.exceptions.ExhaustiveTypeGuardException + + def fmt_for_harmony(self) -> dict[str, harmony.BBox]: + """ + Format the spatial extent input into format expected by `harmony-py`. + + Returns a dictionary with keys mapping to `harmony.Request` kwargs, with + values appropriately formated for the harmony request. + + `harmony-py` can take two different spatial parameters: + + * `spatial`: "Bounding box spatial constraints on the data or Well Known + Text (WKT) string describing the spatial constraints." The "Bounding + box" is expected to be a `harmony.BBox`. + * `shape`: "a file path to an ESRI Shapefile zip, GeoJSON file, or KML + file to use for spatial subsetting. Note: not all collections support + shapefile subsetting" + + Question: is `spatial` the same as `shape`, in terms of performance? If + so, we could be consistent and always turn the input into geojson and + pass that along to harmony. Otherwise we should choose `spatial` if the + extent_type is bounding, otherwise `shape`. + Answer: No! They're not the same. They map to different harmony + parameters and each is a different service. E.g., some collections may + have bounding box subsetting while others have shape subsetting (or + both). + TODO: think more about how we verify if certain inputs are valid for + harmony. E.g., do we need to check the capabilities of each and + cross-check that with user inputs to determine which action to take? + Also: Does `icepyx` always perform subsetting based on user input? If + not, how do we determine which parameters are for finding granules vs + performing subetting? + + Question: is there any way to pass in a geojson string directly, so that + we do not have to mock out a file just for harmony? Answer: no, not + direcly. `harmony-py` wants a path to a file on disk. We may want to + have the function that submits the request to harmony with `harmony-py` + accept something that's easily-serializable to a geojson file so that it + can manage the lifespan of the file. It would be best (I think) to avoid + writing tmp files to disk in this function, because it doesn't know when + the request gets made/when to cleanup the file. That means that we may + leave stray files on the user's computer. Ideally, we would be able to + pass `harmony-py` a bytes object (or a shapely Polygon!) + """ + # Begin with bounding box because this is the simplest case. + if self.extent_type == "bounding_box": + harmony_kwargs = { + "bbox": harmony.BBox( + w=self.extent[0], + s=self.extent[1], + e=self.extent[2], + n=self.extent[3], + ) + } + + return harmony_kwargs + else: + raise NotImplementedError diff --git a/icepyx/core/types/api.py b/icepyx/core/types/api.py index b29ba8fb4..9af9bb9d6 100644 --- a/icepyx/core/types/api.py +++ b/icepyx/core/types/api.py @@ -1,11 +1,14 @@ -from typing import Literal, TypedDict, Union +from typing import TypedDict, Union -from typing_extensions import NotRequired from pydantic import BaseModel +from typing_extensions import NotRequired CMRParamsBase = TypedDict( "CMRParamsBase", { + "short_name": str, + "version": str, + "page_size": int, "temporal": NotRequired[str], "options[readable_granule_name][pattern]": NotRequired[str], "options[spatial][or]": NotRequired[str], @@ -25,4 +28,4 @@ class CMRParamsWithPolygon(CMRParamsBase): CMRParams = Union[CMRParamsWithBbox, CMRParamsWithPolygon] -class HarmonyCoverageAPIParamsBase(BaseModel): +class HarmonyCoverageAPIParamsBase(BaseModel): ... From 98709d7c8c82404d303d3c2d41f227f36ab8141f Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Tue, 12 Nov 2024 14:44:53 -0700 Subject: [PATCH 05/13] WIP add TODO --- icepyx/core/granules.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/icepyx/core/granules.py b/icepyx/core/granules.py index 286767b7d..bcec24592 100644 --- a/icepyx/core/granules.py +++ b/icepyx/core/granules.py @@ -334,6 +334,8 @@ def place_order( """ # raise icepyx.core.exceptions.RefactoringException + # TODO: this returns granules for collections that do not match our + # query. Why? Is this expected or a bug? self.get_avail(CMRparams, reqparams) # TODO: the harmony API may not support non-subsetting. So we may need From a83fc62bc05d53c6c167be8dfe78cc2a63874058 Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Wed, 13 Nov 2024 11:43:45 -0700 Subject: [PATCH 06/13] WIP very confused... The handling of various parameters is VERY confusing. What is necessary for CMR vs subsetting/ordering is not clear and conifg is mutated by various parts of the code, making it difficult to track down where values come from/get set/overridden --- icepyx/core/APIformatting.py | 3 ++ icepyx/core/granules.py | 22 +++++++-------- icepyx/core/harmony.py | 2 +- icepyx/core/query.py | 55 +++++++++++++++++++++--------------- 4 files changed, 47 insertions(+), 35 deletions(-) diff --git a/icepyx/core/APIformatting.py b/icepyx/core/APIformatting.py index 307084057..a87b2efd6 100644 --- a/icepyx/core/APIformatting.py +++ b/icepyx/core/APIformatting.py @@ -265,6 +265,9 @@ def __init__( self, partype: T, values: Optional[dict] = None, + # TODO: 'download" reqtype appears to never get used. `None` is most + # common, and `search` is passed in when creating required cmr + # parameters to search for matching granules. reqtype: Optional[Literal["search", "download"]] = None, ): assert partype in [ diff --git a/icepyx/core/granules.py b/icepyx/core/granules.py index bcec24592..8a9a0103f 100644 --- a/icepyx/core/granules.py +++ b/icepyx/core/granules.py @@ -249,6 +249,7 @@ def get_avail( headers=headers, params=apifmt.to_string(params), ) + breakpoint() try: cmr_search_after = response.headers["CMR-Search-After"] @@ -287,7 +288,6 @@ def get_avail( def place_order( self, CMRparams: CMRParams, - reqparams, # : EGIRequiredParamsDownload, subsetparams, verbose, subset=True, @@ -305,7 +305,7 @@ def place_order( Dictionary of properly formatted CMR search parameters. reqparams : Dictionary of properly formatted parameters required for searching, ordering, - or downloading from NSIDC (via their EGI system). + or downloading from NSASA (via harmony). subsetparams : dictionary Dictionary of properly formatted subsetting parameters. An empty dictionary is passed as input here when subsetting is set to False in query methods. @@ -336,26 +336,22 @@ def place_order( # TODO: this returns granules for collections that do not match our # query. Why? Is this expected or a bug? - self.get_avail(CMRparams, reqparams) + breakpoint() + self.get_avail(CMRparams) # TODO: the harmony API may not support non-subsetting. So we may need # to provide a list of granules for harmony to download, or use a # different API. if subset is False: - request_params = apifmt.combine_params( - CMRparams, reqparams, {"agent": "NO"} - ) + request_params = apifmt.combine_params(CMRparams, {"agent": "NO"}) else: - request_params = apifmt.combine_params(CMRparams, reqparams, subsetparams) + request_params = apifmt.combine_params(CMRparams, subsetparams) concept_id = get_concept_id( - product=request_params["short_name"], version=request_params["version"] + product=request_params["short_name"], + version=request_params["version"], ) - # TODO: At this point, the request parameters have been formatted into - # strings. `harmony-py` expects python objects (e.g., `dt.datetime` for - # temporal values) - # Place the order. # TODO: there are probably other options we want to more generically # expose here. E.g., instead of just accepting a `bounding_box` of a @@ -381,6 +377,8 @@ def place_order( # "paused", or "canceled" are documented here: # https://harmony.earthdata.nasa.gov/docs#getting-job-status # I have also seen `running` and `running_with_errors`. + # The list of possible statues are here: + # https://github.com/nasa/harmony/blob/8b2eb47feab5283d237f3679ac8e09f50e85038f/db/db.sql#L8 while status["status"].startswith("running"): print( "Your harmony order status is still ", diff --git a/icepyx/core/harmony.py b/icepyx/core/harmony.py index 270083c7e..8143956a6 100644 --- a/icepyx/core/harmony.py +++ b/icepyx/core/harmony.py @@ -59,6 +59,6 @@ def place_order( return job_id - def check_order_status(self, job_id: str): + def check_order_status(self, job_id: str) -> dict[str, Any]: status = self.harmony_client.status(job_id) return status diff --git a/icepyx/core/query.py b/icepyx/core/query.py index 5f93b0375..1b2659b36 100644 --- a/icepyx/core/query.py +++ b/icepyx/core/query.py @@ -571,8 +571,13 @@ def CMRparams(self) -> CMRParams: # print(self._CMRparams) # print(self._CMRparams.fmted_keys) - # dictionary of optional CMR parameters - kwargs = {} + # create a dictionary of required and optional CMR parameters. + # Start by copying the required keys, formatted for CMR requests. + # TODO: This gets set at some point in the code, but later `CMRparams` + # lacks the values in `cmr_reqparams`! Why?? + self.cmr_reqparams + kwargs = self._cmr_reqparams.fmted_keys.copy() + # temporal CMR parameters if hasattr(self, "_temporal") and self.product != "ATL11": kwargs["start"] = self._temporal._start @@ -594,9 +599,9 @@ def CMRparams(self) -> CMRParams: return self._CMRparams.fmted_keys @property - def reqparams(self): # -> EGIRequiredParams: + def cmr_reqparams(self): """ - Display the required key:value pairs that will be submitted. + Display the required key:value pairs that will be submitted for a CMR request. It generates the dictionary if it does not already exist. Examples @@ -611,10 +616,12 @@ def reqparams(self): # -> EGIRequiredParams: {'short_name': 'ATL06', 'version': '006', 'page_size': 2000, 'page_num': 1, 'request_mode': 'async', 'include_meta': 'Y', 'client_string': 'icepyx'} """ if not hasattr(self, "_reqparams"): - self._reqparams = apifmt.Parameters("required", reqtype="search") - self._reqparams.build_params(product=self.product, version=self._version) + self._cmr_reqparams = apifmt.Parameters("required", reqtype="search") + self._cmr_reqparams.build_params( + product=self.product, version=self._version + ) - return self._reqparams.fmted_keys + return self._cmr_reqparams.fmted_keys # @property # DevQuestion: if I make this a property, I get a "dict" object is not callable @@ -955,7 +962,7 @@ def avail_granules( try: self.granules.avail except AttributeError: - self.granules.get_avail(self.CMRparams, self.reqparams) + self.granules.get_avail(self.CMRparams) if ids or cycles or tracks or cloud: # list of outputs in order of ids, cycles, tracks, cloud @@ -1023,19 +1030,17 @@ def order_granules( # breakpoint() # raise RefactoringException - # TODO: this probably shouldn't be mutated based on which method is being called... - # It is also very confusing to have both `self.reqparams` and - # `self._reqparams`, each of which does something different! - self.reqparams - if self._reqparams._reqtype == "search": - self._reqparams._reqtype = "download" + # This call ensures that `self._cmr_reqparams` is set. + self.cmr_reqparams + if self._cmr_reqparams._reqtype == "search": + self._cmr_reqparams._reqtype = "download" - if "email" in self._reqparams.fmted_keys or email is False: - self._reqparams.build_params(**self._reqparams.fmted_keys) + if "email" in self._cmr_reqparams.fmted_keys or email is False: + self._cmr_reqparams.build_params(**self._cmr_reqparams.fmted_keys) elif email is True: user_profile = self.auth.get_user_profile() # pyright: ignore[reportAttributeAccessIssue] - self._reqparams.build_params( - **self._reqparams.fmted_keys, email=user_profile["email_address"] + self._cmr_reqparams.build_params( + **self._cmr_reqparams.fmted_keys, email=user_profile["email_address"] ) if subset is False: @@ -1051,19 +1056,26 @@ def order_granules( # and also if it already has a list of avail granules (if not, need to create one and add session) # Place multiple orders, one per granule, if readable_granule_name is used. + # TODO/Question: is "readable granule name" a thing in harmony? This may + # only be relevant for non-subset requests that will be handled by e.g., + # `earthaccess`. + # Answer: there appears to be a `granuleName` parameter that harmony can + # take for its own internal queries to CMR, but it is unclear how that + # works. See https://harmony.earthdata.nasa.gov/docs#query-parameters + # `harmony-py` accepts `granule_name` as an input. if "readable_granule_name[]" in self.CMRparams: gran_name_list = self.CMRparams["readable_granule_name[]"] tempCMRparams = self.CMRparams if len(gran_name_list) > 1: print( - "NSIDC only allows ordering of one granule by name at a time; your orders will be placed accordingly." + "Harmony only allows ordering of one granule by name at a time;" + " your orders will be placed accordingly." ) for gran in gran_name_list: tempCMRparams["readable_granule_name[]"] = gran self.granules.place_order( tempCMRparams, - self.reqparams, - self.subsetparams(**kwargs), + self.subsetparams(granule_name=gran, **kwargs), verbose, subset, geom_filepath=self._spatial._geom_file, @@ -1072,7 +1084,6 @@ def order_granules( else: self.granules.place_order( self.CMRparams, - self.reqparams, self.subsetparams(**kwargs), verbose, subset, From 4a668f667f2e999889b49e188ed556df022a51f7 Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Wed, 13 Nov 2024 14:04:40 -0700 Subject: [PATCH 07/13] WIP Extract subset-specific ordering code to its own function Makes it more clear that non-subset orders are not working right now. --- icepyx/core/granules.py | 138 +++++++++++++++++++++------------------- icepyx/core/harmony.py | 4 +- icepyx/core/query.py | 11 ++-- 3 files changed, 82 insertions(+), 71 deletions(-) diff --git a/icepyx/core/granules.py b/icepyx/core/granules.py index 8a9a0103f..b786618b8 100644 --- a/icepyx/core/granules.py +++ b/icepyx/core/granules.py @@ -249,7 +249,6 @@ def get_avail( headers=headers, params=apifmt.to_string(params), ) - breakpoint() try: cmr_search_after = response.headers["CMR-Search-After"] @@ -282,71 +281,11 @@ def get_avail( len(self.avail) > 0 ), "Your search returned no results; try different search parameters" - # DevNote: currently, default subsetting DOES NOT include variable subsetting, - # only spatial and temporal - # DevGoal: add kwargs to allow subsetting and more control over request options. - def place_order( + def _place_harmony_subset_order( self, - CMRparams: CMRParams, + request_params, subsetparams, - verbose, - subset=True, - geom_filepath=None, - ): - """ - Place an order for the available granules for the query object. - Adds the list of zipped files (orders) to the granules data object (which is - stored as the `granules` attribute of the query object). - You must be logged in to Earthdata to use this function. - - Parameters - ---------- - CMRparams : - Dictionary of properly formatted CMR search parameters. - reqparams : - Dictionary of properly formatted parameters required for searching, ordering, - or downloading from NSASA (via harmony). - subsetparams : dictionary - Dictionary of properly formatted subsetting parameters. An empty dictionary - is passed as input here when subsetting is set to False in query methods. - verbose : boolean, default False - Print out all feedback available from the order process. - Progress information is automatically printed regardless of the value of verbose. - subset : boolean, default True - Apply subsetting to the data order from the NSIDC, returning only data that meets the - subset parameters. - Spatial and temporal subsetting based on the input parameters happens - by default when subset=True, but additional subsetting options are available. - Spatial subsetting returns all data that are within the area of interest - (but not complete granules. - This eliminates false-positive granules returned by the metadata-level search) - geom_filepath : string, default None - String of the full filename and path when the spatial input is a file. - - Notes - ----- - This function is used by query.Query.order_granules(), which automatically - feeds in the required parameters. - - See Also - -------- - query.Query.order_granules - """ - # raise icepyx.core.exceptions.RefactoringException - - # TODO: this returns granules for collections that do not match our - # query. Why? Is this expected or a bug? - breakpoint() - self.get_avail(CMRparams) - - # TODO: the harmony API may not support non-subsetting. So we may need - # to provide a list of granules for harmony to download, or use a - # different API. - if subset is False: - request_params = apifmt.combine_params(CMRparams, {"agent": "NO"}) - else: - request_params = apifmt.combine_params(CMRparams, subsetparams) - + ) -> list[str]: concept_id = get_concept_id( product=request_params["short_name"], version=request_params["version"], @@ -406,6 +345,77 @@ def place_order( return self.orderIDs + def _place_non_subset_order( + self, + CMRparams: CMRParams, + ): + # TODO: use e.g., `earthaccess` to download files un-processed by harmony. + raise NotImplementedError("Support for non-subset orders is not implemented.") + self.get_avail(CMRparams) + request_params = apifmt.combine_params(CMRparams, {"agent": "NO"}) + + # DevNote: currently, default subsetting DOES NOT include variable subsetting, + # only spatial and temporal + # DevGoal: add kwargs to allow subsetting and more control over request options. + def place_order( + self, + CMRparams: CMRParams, + subsetparams, + verbose, + subset=True, + geom_filepath=None, + ): + """ + Place an order for the available granules for the query object. + Adds the list of zipped files (orders) to the granules data object (which is + stored as the `granules` attribute of the query object). + You must be logged in to Earthdata to use this function. + + Parameters + ---------- + CMRparams : + Dictionary of properly formatted CMR search parameters. + reqparams : + Dictionary of properly formatted parameters required for searching, ordering, + or downloading from NSASA (via harmony). + subsetparams : dictionary + Dictionary of properly formatted subsetting parameters. An empty dictionary + is passed as input here when subsetting is set to False in query methods. + verbose : boolean, default False + Print out all feedback available from the order process. + Progress information is automatically printed regardless of the value of verbose. + subset : boolean, default True + Apply subsetting to the data order from the NSIDC, returning only data that meets the + subset parameters. + Spatial and temporal subsetting based on the input parameters happens + by default when subset=True, but additional subsetting options are available. + Spatial subsetting returns all data that are within the area of interest + (but not complete granules. + This eliminates false-positive granules returned by the metadata-level search) + geom_filepath : string, default None + String of the full filename and path when the spatial input is a file. + + Notes + ----- + This function is used by query.Query.order_granules(), which automatically + feeds in the required parameters. + + See Also + -------- + query.Query.order_granules + """ + if subset is False: + self._place_non_subset_order(CMRparams) + else: + # TODO: why are we combining these like this? We could just pass in + # these values separately. Eventually would like to collapse this + # down into just one `HarmonySubsetParams`. + request_params = apifmt.combine_params(CMRparams, subsetparams) + return self._place_harmony_subset_order( + request_params, + subsetparams, + ) + def download(self, verbose, path, restart=False): """ Downloads the data for the object's orderIDs, which are generated by ordering data diff --git a/icepyx/core/harmony.py b/icepyx/core/harmony.py index 8143956a6..bf0d1cb3e 100644 --- a/icepyx/core/harmony.py +++ b/icepyx/core/harmony.py @@ -46,9 +46,7 @@ def place_order( """ collection = harmony.Collection(id=concept_id) request = harmony.Request( - collection=collection, - spatial=bounding_box, - temporal=temporal, + collection=collection, spatial=bounding_box, temporal=temporal ) if not request.is_valid(): diff --git a/icepyx/core/query.py b/icepyx/core/query.py index 1b2659b36..1252cf116 100644 --- a/icepyx/core/query.py +++ b/icepyx/core/query.py @@ -1052,6 +1052,9 @@ def order_granules( ): del self._subsetparams + # TODO: this shouldn't be necessary: + cmr_params = {**self.CMRparams, **self.cmr_reqparams} + # REFACTOR: add checks here to see if the granules object has been created, # and also if it already has a list of avail granules (if not, need to create one and add session) @@ -1063,9 +1066,9 @@ def order_granules( # take for its own internal queries to CMR, but it is unclear how that # works. See https://harmony.earthdata.nasa.gov/docs#query-parameters # `harmony-py` accepts `granule_name` as an input. - if "readable_granule_name[]" in self.CMRparams: - gran_name_list = self.CMRparams["readable_granule_name[]"] - tempCMRparams = self.CMRparams + if "readable_granule_name[]" in cmr_params: + gran_name_list = cmr_params["readable_granule_name[]"] + tempCMRparams = cmr_params.copy() if len(gran_name_list) > 1: print( "Harmony only allows ordering of one granule by name at a time;" @@ -1083,7 +1086,7 @@ def order_granules( else: self.granules.place_order( - self.CMRparams, + cmr_params, self.subsetparams(**kwargs), verbose, subset, From 5ad82cf0089d0b06253f2b52fd82dfffce6b0274 Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Wed, 13 Nov 2024 14:08:20 -0700 Subject: [PATCH 08/13] WIP initialize `self.orderIDs` and append to it There can be more than one job submitted by a user. Make it easy to track all of the submitted jobs by initializing to a empty list and then appending to it as necessary --- icepyx/core/granules.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/icepyx/core/granules.py b/icepyx/core/granules.py index b786618b8..fc0b8dc27 100644 --- a/icepyx/core/granules.py +++ b/icepyx/core/granules.py @@ -185,6 +185,8 @@ def __init__( # self.files = files # session = session + orderIDs: list[str] = [] + # ---------------------------------------------------------------------- # Methods @@ -302,9 +304,9 @@ def _place_harmony_subset_order( temporal=subsetparams["time"], ) - # TODO/Question: should this be changed from a list to a single value? - # There will only be one harmony job per request (I think) - self.orderIDs = [job_id] + # Append this job to the list of order ids. + self.orderIDs.append(job_id) + order_fn = ".order_restart" with open(order_fn, "w") as fid: json.dump({"orderIDs": self.orderIDs}, fid) From 2cacbd7c21b89e634f6f7eb6295fc1b849352815 Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Wed, 13 Nov 2024 14:15:33 -0700 Subject: [PATCH 09/13] WIP stub out functions to simplify getting params --- icepyx/core/query.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/icepyx/core/query.py b/icepyx/core/query.py index 1252cf116..c03310a78 100644 --- a/icepyx/core/query.py +++ b/icepyx/core/query.py @@ -623,6 +623,27 @@ def cmr_reqparams(self): return self._cmr_reqparams.fmted_keys + def get_harmony_subset_order_params(self): + """TODO: this function will return the parameters for a harmony subset order. + + Params are formatted for use with an `HarmonyAPI` instance. This + function will return all of the required and optional parameters. + """ + + def get_cmr_search_params(self): + """TODO: this function will return the parameters for a CMR search query. + + Params are formatted for a CMR search URL. This function will return all + of the required and optional parameters. + """ + + def get_non_subset_order_params(self): + """TODO: this function will return the parameters for a non-subset order via earthaccess + + Params are formatted for input into `earthaccess`. This function will + return all of the required and optional parameters. + """ + # @property # DevQuestion: if I make this a property, I get a "dict" object is not callable # when I try to give input kwargs... what approach should I be taking? From 0da719f5ea4970aa63ebc2efd4bb52fa748a4b5e Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Wed, 13 Nov 2024 14:30:10 -0700 Subject: [PATCH 10/13] WIP remove `granules.place_order` in favor of `granules.place_harmon_subset_order` separate concerns --- icepyx/core/granules.py | 84 +++++++++++++++++------------------------ icepyx/core/query.py | 14 +++---- 2 files changed, 39 insertions(+), 59 deletions(-) diff --git a/icepyx/core/granules.py b/icepyx/core/granules.py index fc0b8dc27..52b97265a 100644 --- a/icepyx/core/granules.py +++ b/icepyx/core/granules.py @@ -283,11 +283,37 @@ def get_avail( len(self.avail) > 0 ), "Your search returned no results; try different search parameters" - def _place_harmony_subset_order( + def place_harmony_subset_order( self, - request_params, + # TODO: remove `CMRparams` from this function. This should only take subset params. + CMRparams, subsetparams, + # TODO: support passing in a geometry filepath + geom_filepath=None, ) -> list[str]: + """ + Place a harmony subset order. + + Parameters + ---------- + CMRparams : + Dictionary of properly formatted CMR search parameters. + subsetparams : dictionary + Dictionary of properly formatted subsetting parameters. An empty dictionary + is passed as input here when subsetting is set to False in query methods. + geom_filepath : string, default None + String of the full filename and path when the spatial input is a file. + + Notes + ----- + This function is used by query.Query.order_granules(), which automatically + feeds in the required parameters. + + See Also + -------- + query.Query.order_granules + """ + request_params = apifmt.combine_params(CMRparams, subsetparams) concept_id = get_concept_id( product=request_params["short_name"], version=request_params["version"], @@ -347,55 +373,20 @@ def _place_harmony_subset_order( return self.orderIDs - def _place_non_subset_order( + def place_non_subset_order( self, CMRparams: CMRParams, - ): - # TODO: use e.g., `earthaccess` to download files un-processed by harmony. - raise NotImplementedError("Support for non-subset orders is not implemented.") - self.get_avail(CMRparams) - request_params = apifmt.combine_params(CMRparams, {"agent": "NO"}) - - # DevNote: currently, default subsetting DOES NOT include variable subsetting, - # only spatial and temporal - # DevGoal: add kwargs to allow subsetting and more control over request options. - def place_order( - self, - CMRparams: CMRParams, - subsetparams, - verbose, - subset=True, - geom_filepath=None, ): """ - Place an order for the available granules for the query object. - Adds the list of zipped files (orders) to the granules data object (which is - stored as the `granules` attribute of the query object). - You must be logged in to Earthdata to use this function. + Place an order for data files with `earthaccess`. Parameters ---------- CMRparams : Dictionary of properly formatted CMR search parameters. - reqparams : - Dictionary of properly formatted parameters required for searching, ordering, - or downloading from NSASA (via harmony). subsetparams : dictionary Dictionary of properly formatted subsetting parameters. An empty dictionary is passed as input here when subsetting is set to False in query methods. - verbose : boolean, default False - Print out all feedback available from the order process. - Progress information is automatically printed regardless of the value of verbose. - subset : boolean, default True - Apply subsetting to the data order from the NSIDC, returning only data that meets the - subset parameters. - Spatial and temporal subsetting based on the input parameters happens - by default when subset=True, but additional subsetting options are available. - Spatial subsetting returns all data that are within the area of interest - (but not complete granules. - This eliminates false-positive granules returned by the metadata-level search) - geom_filepath : string, default None - String of the full filename and path when the spatial input is a file. Notes ----- @@ -406,17 +397,10 @@ def place_order( -------- query.Query.order_granules """ - if subset is False: - self._place_non_subset_order(CMRparams) - else: - # TODO: why are we combining these like this? We could just pass in - # these values separately. Eventually would like to collapse this - # down into just one `HarmonySubsetParams`. - request_params = apifmt.combine_params(CMRparams, subsetparams) - return self._place_harmony_subset_order( - request_params, - subsetparams, - ) + # TODO: use e.g., `earthaccess` to download files un-processed by harmony. + raise NotImplementedError("Support for non-subset orders is not implemented.") + self.get_avail(CMRparams) + request_params = apifmt.combine_params(CMRparams, {"agent": "NO"}) def download(self, verbose, path, restart=False): """ diff --git a/icepyx/core/query.py b/icepyx/core/query.py index c03310a78..3b07a695f 100644 --- a/icepyx/core/query.py +++ b/icepyx/core/query.py @@ -1016,7 +1016,7 @@ def order_granules( Print out all feedback available from the order process. Progress information is automatically printed regardless of the value of verbose. subset : - Apply subsetting to the data order from the NSIDC, returning only data that meets the + Apply subsetting to the data order from Harmony, returning only data that meets the subset parameters. Spatial and temporal subsetting based on the input parameters happens by default when subset=True, but additional subsetting options are available. Spatial subsetting returns all data that are within the area of interest (but not complete @@ -1048,8 +1048,8 @@ def order_granules( . Retry request status is: complete """ - # breakpoint() - # raise RefactoringException + if not subset: + raise NotImplementedError # This call ensures that `self._cmr_reqparams` is set. self.cmr_reqparams @@ -1097,20 +1097,16 @@ def order_granules( ) for gran in gran_name_list: tempCMRparams["readable_granule_name[]"] = gran - self.granules.place_order( + self.granules.place_subset_order( tempCMRparams, self.subsetparams(granule_name=gran, **kwargs), - verbose, - subset, geom_filepath=self._spatial._geom_file, ) else: - self.granules.place_order( + self.granules.place_subset_order( cmr_params, self.subsetparams(**kwargs), - verbose, - subset, geom_filepath=self._spatial._geom_file, ) From 6e6fa9280a4df23c1842519c31763e16cacc5d10 Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Wed, 13 Nov 2024 14:31:23 -0700 Subject: [PATCH 11/13] WIP Remove `email` option from `query.place_order` Email is not an option with harmony --- icepyx/core/query.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/icepyx/core/query.py b/icepyx/core/query.py index 3b07a695f..e39425372 100644 --- a/icepyx/core/query.py +++ b/icepyx/core/query.py @@ -1004,7 +1004,6 @@ def order_granules( self, verbose: bool = False, subset: bool = True, - email: bool = False, **kwargs, ) -> None: """ @@ -1021,9 +1020,6 @@ def order_granules( by default when subset=True, but additional subsetting options are available. Spatial subsetting returns all data that are within the area of interest (but not complete granules. This eliminates false-positive granules returned by the metadata-level search) - email : - Have NSIDC auto-send order status email updates to indicate order status as pending/completed. - The emails are sent to the account associated with your Earthdata account. **kwargs : key-value pairs Additional parameters to be passed to the subsetter. By default temporal and spatial subset keys are passed. @@ -1056,14 +1052,6 @@ def order_granules( if self._cmr_reqparams._reqtype == "search": self._cmr_reqparams._reqtype = "download" - if "email" in self._cmr_reqparams.fmted_keys or email is False: - self._cmr_reqparams.build_params(**self._cmr_reqparams.fmted_keys) - elif email is True: - user_profile = self.auth.get_user_profile() # pyright: ignore[reportAttributeAccessIssue] - self._cmr_reqparams.build_params( - **self._cmr_reqparams.fmted_keys, email=user_profile["email_address"] - ) - if subset is False: self._subsetparams = None elif ( From fb2a9a7d73a64e52e35064dbcecf80c4bc1a265d Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Wed, 13 Nov 2024 14:51:57 -0700 Subject: [PATCH 12/13] WIP refactor `query` method that orders granules Separate out the subset and non-subset options --- icepyx/core/query.py | 113 ++++++++++++++++++++++--------------------- 1 file changed, 58 insertions(+), 55 deletions(-) diff --git a/icepyx/core/query.py b/icepyx/core/query.py index e39425372..71b6dad59 100644 --- a/icepyx/core/query.py +++ b/icepyx/core/query.py @@ -997,6 +997,58 @@ def avail_granules( else: return granules.info(self.granules.avail) + def _order_subset_granules(self, **kwargs): + # This call ensures that `self._cmr_reqparams` is set. + self.cmr_reqparams + if self._cmr_reqparams._reqtype == "search": + self._cmr_reqparams._reqtype = "download" + + # TODO: is it necessary to delete the `self._subsetparams` attr? We are + # performing a subset so this seems the opposite of what is expected. + if hasattr(self, "_subsetparams") and self._subsetparams is None: + del self._subsetparams + + # TODO: this shouldn't be necessary: + cmr_params = {**self.CMRparams, **self.cmr_reqparams} + + # REFACTOR: add checks here to see if the granules object has been created, + # and also if it already has a list of avail granules (if not, need to create one and add session) + + # Place multiple orders, one per granule, if readable_granule_name is used. + # TODO/Question: is "readable granule name" a thing in harmony? This may + # only be relevant for non-subset requests that will be handled by e.g., + # `earthaccess`. + # Answer: there appears to be a `granuleName` parameter that harmony can + # take for its own internal queries to CMR, but it is unclear how that + # works. See https://harmony.earthdata.nasa.gov/docs#query-parameters + # `harmony-py` accepts `granule_name` as an input. + if "readable_granule_name[]" in cmr_params: + gran_name_list = cmr_params["readable_granule_name[]"] + tempCMRparams = cmr_params.copy() + if len(gran_name_list) > 1: + print( + "Harmony only allows ordering of one granule by name at a time;" + " your orders will be placed accordingly." + ) + for gran in gran_name_list: + tempCMRparams["readable_granule_name[]"] = gran + self.granules.place_harmony_subset_order( + tempCMRparams, + self.subsetparams(granule_name=gran, **kwargs), + geom_filepath=self._spatial._geom_file, + ) + + else: + self.granules.place_harmony_subset_order( + cmr_params, + self.subsetparams(**kwargs), + geom_filepath=self._spatial._geom_file, + ) + + def _order_whole_granules(self): + raise NotImplementedError + self._subsetparams = None + # DevGoal: display output to indicate number of granules successfully ordered (and number of errors) # DevGoal: deal with subset=True for variables now, and make sure that if a variable subset # Coverage kwarg is input it's successfully passed through all other functions even if this is the only one run. @@ -1023,9 +1075,7 @@ def order_granules( **kwargs : key-value pairs Additional parameters to be passed to the subsetter. By default temporal and spatial subset keys are passed. - Acceptable key values are ['format','projection','projection_parameters','Coverage']. - The variable 'Coverage' list should be constructed using the `order_vars.wanted` attribute of the object. - At this time (2020-05), only variable ('Coverage') parameters will be automatically formatted. + Acceptable key values are [TODO]. See Also -------- @@ -1044,59 +1094,12 @@ def order_granules( . Retry request status is: complete """ - if not subset: - raise NotImplementedError - - # This call ensures that `self._cmr_reqparams` is set. - self.cmr_reqparams - if self._cmr_reqparams._reqtype == "search": - self._cmr_reqparams._reqtype = "download" - - if subset is False: - self._subsetparams = None - elif ( - subset is True - and hasattr(self, "_subsetparams") - and self._subsetparams is None - ): - del self._subsetparams - - # TODO: this shouldn't be necessary: - cmr_params = {**self.CMRparams, **self.cmr_reqparams} - - # REFACTOR: add checks here to see if the granules object has been created, - # and also if it already has a list of avail granules (if not, need to create one and add session) - - # Place multiple orders, one per granule, if readable_granule_name is used. - # TODO/Question: is "readable granule name" a thing in harmony? This may - # only be relevant for non-subset requests that will be handled by e.g., - # `earthaccess`. - # Answer: there appears to be a `granuleName` parameter that harmony can - # take for its own internal queries to CMR, but it is unclear how that - # works. See https://harmony.earthdata.nasa.gov/docs#query-parameters - # `harmony-py` accepts `granule_name` as an input. - if "readable_granule_name[]" in cmr_params: - gran_name_list = cmr_params["readable_granule_name[]"] - tempCMRparams = cmr_params.copy() - if len(gran_name_list) > 1: - print( - "Harmony only allows ordering of one granule by name at a time;" - " your orders will be placed accordingly." - ) - for gran in gran_name_list: - tempCMRparams["readable_granule_name[]"] = gran - self.granules.place_subset_order( - tempCMRparams, - self.subsetparams(granule_name=gran, **kwargs), - geom_filepath=self._spatial._geom_file, - ) - - else: - self.granules.place_subset_order( - cmr_params, - self.subsetparams(**kwargs), - geom_filepath=self._spatial._geom_file, + if subset: + self._order_subset_granules( + **kwargs, ) + else: + self._order_whole_granules() # DevGoal: put back in the kwargs here so that people can just call download granules with subset=False! def download_granules( From b7b76aa4d501ef103b27c7ff11f0d19a8cb42f30 Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Thu, 14 Nov 2024 09:22:34 -0700 Subject: [PATCH 13/13] WIP init work on `get_cmr_search_params` --- icepyx/core/query.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/icepyx/core/query.py b/icepyx/core/query.py index 71b6dad59..630efac95 100644 --- a/icepyx/core/query.py +++ b/icepyx/core/query.py @@ -624,21 +624,31 @@ def cmr_reqparams(self): return self._cmr_reqparams.fmted_keys def get_harmony_subset_order_params(self): - """TODO: this function will return the parameters for a harmony subset order. + """TODO: this method will return the parameters for a harmony subset order. Params are formatted for use with an `HarmonyAPI` instance. This function will return all of the required and optional parameters. """ def get_cmr_search_params(self): - """TODO: this function will return the parameters for a CMR search query. + """TODO: this method will return the parameters for a CMR search query. Params are formatted for a CMR search URL. This function will return all of the required and optional parameters. """ + # This call ensures that `self._cmr_reqparams` is set. + self.cmr_reqparams + + # This combines the CMRparams with the reqparams. + # CMR params are just the user-provided params. The cmr_reqparams + # contains the required CMR parameters that any request needs, + # regardless of user input. + cmr_params = {**self.CMRparams, **self.cmr_reqparams} + + return cmr_params def get_non_subset_order_params(self): - """TODO: this function will return the parameters for a non-subset order via earthaccess + """TODO: this method will return the parameters for a non-subset order via earthaccess Params are formatted for input into `earthaccess`. This function will return all of the required and optional parameters. @@ -1008,8 +1018,7 @@ def _order_subset_granules(self, **kwargs): if hasattr(self, "_subsetparams") and self._subsetparams is None: del self._subsetparams - # TODO: this shouldn't be necessary: - cmr_params = {**self.CMRparams, **self.cmr_reqparams} + cmr_search_params = self.get_cmr_search_params() # REFACTOR: add checks here to see if the granules object has been created, # and also if it already has a list of avail granules (if not, need to create one and add session)