diff --git a/eedl/__init__.py b/eedl/__init__.py index e72be2a..1380b9b 100644 --- a/eedl/__init__.py +++ b/eedl/__init__.py @@ -1 +1 @@ -__version__ = "2023.08.31" +__version__ = "2023.09.12" diff --git a/eedl/google_cloud.py b/eedl/google_cloud.py index 5512692..7add67b 100644 --- a/eedl/google_cloud.py +++ b/eedl/google_cloud.py @@ -29,10 +29,10 @@ def get_public_export_urls(bucket_name: str, prefix: str = "") -> List[str]: # get the content of the bucket (it needs to be public) listing = requests.get(search_url).text - # comes back as an XML listing - don't need to parse the XML, just need the values of the Key elements + # Comes back as an XML listing - don't need to parse the XML, just need the values of the Key elements pattern = re.compile("(.*?)") items = pattern.findall(listing) - # make them into full URLs with the bucket URL at the front and check if the files have the prefix specific + # Make them into full URLs with the bucket URL at the front and check if the files have the prefix specific filtered = [f"{request_url}{item}" for item in items if item.startswith(prefix)] return filtered @@ -49,17 +49,17 @@ def download_public_export(bucket_name: str, output_folder: Union[str, Path], pr :type prefix: str :return: None. """ - # get the urls of items in the bucket with the specified prefix + # Get the urls of items in the bucket with the specified prefix urls = get_public_export_urls(bucket_name, prefix) os.makedirs(output_folder, exist_ok=True) for url in urls: - filename = url.split("/")[-1] # get the filename - output_path = Path(output_folder) / filename # construct the output path - # get the data - this could be a problem if it's larger than fits in RAM - I believe requests has a way to operate as a streambuffer - not looking into that at this moment + filename = url.split("/")[-1] # Get the filename + output_path = Path(output_folder) / filename # Construct the output path + # Get the data - this could be a problem if it's larger than fits in RAM - I believe requests has a way to operate as a streambuffer - not looking into that at this moment response = requests.get(url) - output_path.write_bytes(response.content) # write it to a file + output_path.write_bytes(response.content) # Write it to a file def download_export(bucket_name: str, @@ -68,7 +68,7 @@ def download_export(bucket_name: str, delimiter: str = "/", autodelete: bool = True) -> None: - """Downloads a blob from the bucket. + """Downloads a blob from the specified bucket. Modified from Google Cloud sample documentation at https://cloud.google.com/storage/docs/samples/storage-download-file#storage_download_file-python diff --git a/eedl/helpers.py b/eedl/helpers.py index d5ba419..9493a48 100644 --- a/eedl/helpers.py +++ b/eedl/helpers.py @@ -85,7 +85,7 @@ def _single_item_extract(self, image, task_registry, zonal_features, aoi_attr, e export_image.zonal_inject_constants = zonal_inject_constants filename_suffix = f"{aoi_attr}_{image_date}" - if self.skip_existing and export_image.check_mosaic_exists(aoi_download_folder, self.export_folder, f"{filename_description}_{filename_suffix}"): + if self.skip_existing and export_image.check_mosaic_exists(aoi_download_folder, self.export_folder, f"{self.filename_description}_{filename_suffix}"): print(f"Image {filename_suffix} exists and skip_existing=True. Skipping") return diff --git a/eedl/image.py b/eedl/image.py index 56e068a..3de5c67 100644 --- a/eedl/image.py +++ b/eedl/image.py @@ -40,11 +40,11 @@ def download_images_in_folder(source_location: Union[str, Path], download_locati """ Handles pulling data from Google Drive over to a local location, filtering by a filename prefix and folder - :param source_location: Directory to search for files + :param source_location: Directory to search for files. :type source_location: Union[str, Path] - :param download_location: Destination for files with the specified prefix + :param download_location: Destination for files with the specified prefix. :type download_location: Union[str, Path] - :param prefix: A prefix to use to filter items in the folder - only files where the name matches this prefix will be moved + :param prefix: A prefix to use to filter items in the folder - only files where the name matches this prefix will be moved. :type prefix: str :return: None """ @@ -57,12 +57,12 @@ def download_images_in_folder(source_location: Union[str, Path], download_locati os.makedirs(download_location, exist_ok=True) for filename in files: - shutil.move(os.path.join(folder_search_path, filename), os.path.join(download_location, filename)) + shutil.move(str(os.path.join(folder_search_path, filename)), str(os.path.join(download_location, filename))) class TaskRegistry: """ - The TaskRegistry class makes it convent to manage arbitrarily many Earth Engine images that are in varying states of being downloaded. + The TaskRegistry class makes it convenient to manage arbitrarily many Earth Engine images that are in varying states of being downloaded. """ INCOMPLETE_STATUSES = ("READY", "UNSUBMITTED", "RUNNING") COMPLETE_STATUSES = ["COMPLETED"] @@ -81,8 +81,7 @@ def __init__(self) -> None: def add(self, image: ee.image.Image) -> None: """ - Adds an Earth Engine image to the list of Earth Engine images - + Adds an Earth Engine image to the list of Earth Engine images. :param image: Earth Engine image to be added to the list of images :type image: ee.image.Image :return: None @@ -92,9 +91,8 @@ def add(self, image: ee.image.Image) -> None: @property def incomplete_tasks(self) -> List[ee.image.Image]: """ - List of Earth Engine images that have not been completed yet - - :return: List of Earth Engine images that have not been completed yet + List of Earth Engine images that have not been completed yet. + :return: List of Earth Engine images that have not been completed yet. :rtype: List[ee.image.Image] """ initial_tasks = [image for image in self.images if image.last_task_status['state'] in self.INCOMPLETE_STATUSES] @@ -106,30 +104,32 @@ def incomplete_tasks(self) -> List[ee.image.Image]: @property def complete_tasks(self) -> List[ee.image.Image]: """ - List of Earth Engine images - - :return: List of Earth Engine images + List of Earth Engine images. + :return: List of Earth Engine images. :rtype: List[ee.image.Image] """ return [image for image in self.images if image.last_task_status['state'] in self.COMPLETE_STATUSES + self.FAILED_STATUSES] @property def failed_tasks(self) -> List[ee.image.Image]: + """ + List of Earth Engine images that have either been cancelled or that have failed + """ return [image for image in self.images if image.last_task_status['state'] in self.FAILED_STATUSES] @property def downloadable_tasks(self) -> List[ee.image.Image]: """ - List of Earth Engine images that have successfully been downloaded - :return: List of Earth Engine images that have successfully been downloaded + List of Earth Engine images that have not been cancelled or have failed. + :return: List of Earth Engine images that have not been cancelled or have failed. :rtype: List[ee.image.Image] """ return [image for image in self.complete_tasks if image.task_data_downloaded is False and image.last_task_status['state'] not in self.FAILED_STATUSES] def download_ready_images(self, download_location: Union[str, Path]) -> None: """ - - :param download_location: Destination for downloaded files + Downloads all images that are ready to be downloaded. + :param download_location: Destination for downloaded files. :type download_location: Union[str, Path] :return: None """ @@ -178,7 +178,6 @@ def wait_for_images(self, on_failure: str = "log") -> None: """ Blocker until there are no more incomplete or downloadable tasks left. - :param download_location: Destination for downloaded files. :type download_location: Union[str, Path] :param sleep_time: Time between checking if the disk is full in seconds. Defaults to 10 seconds. @@ -235,9 +234,9 @@ class EEDLImage: :param crs: Coordinate Reference System to use for exports in a format Earth Engine understands, such as "EPSG:3310" :type crs: Optional[str] - :param tile_size: the number of pixels per side of tiles to export + :param tile_size: The number of pixels per side of tiles to export :type tile_size: Optional[int] - :param export_folder: the name of the folder in the chosen export location that will be created for the export + :param export_folder: The name of the folder in the chosen export location that will be created for the export :type export_folder: Optional[Union[str, Path]] This docstring needs to be checked to ensure it's in a standard format that Sphinx will render @@ -273,24 +272,24 @@ def __init__(self, **kwargs) -> None: self.zonal_inject_constants: dict = dict() self.zonal_nodata_value: int = -9999 - # set the defaults here - this is a nice strategy where we get to define constants near the top that aren't buried in code, then apply them here + # Set the defaults here - this is a nice strategy where we get to define constants near the top that aren't buried in code, then apply them here. for key in DEFAULTS: setattr(self, key.lower(), DEFAULTS[key]) - for key in kwargs: # now apply any provided keyword arguments over the top of the defaults. + for key in kwargs: # Now apply any provided keyword arguments over the top of the defaults. setattr(self, key, kwargs[key]) self._last_task_status = {"state": "UNSUBMITTED"} - # this will be the default status initially, so always assume it's UNSUBMITTED if we haven't gotten anything - # from the server. "None" would work too, but then we couldn't just check the status + # This will be the default status initially, so always assume it's UNSUBMITTED if we haven't gotten anything. + # From the server. "None" would work too, but then we couldn't just check the status. self.task_data_downloaded = False - self.export_type = "Drive" # other option is "Cloud" + self.export_type = "Drive" # The other option is "Cloud". def _set_names(self, filename_suffix: str = "") -> None: """ :param filename_suffix: Suffix used to later identify files. - :type filename_suffix: Str + :type filename_suffix: str :return: None """ self.description = filename_suffix @@ -300,15 +299,14 @@ def _set_names(self, filename_suffix: str = "") -> None: def _initialize() -> None: """ Handles the initialization and potentially the authentication of Earth Engine - :return: None """ - try: # try just a basic discardable operation used in their docs so that we don't initialize if we don't need to + try: # Try just a basic discard-able operation used in their docs so that we don't initialize if we don't need to. _ = ee.Image("NASA/NASADEM_HGT/001") - except EEException: # if it fails, try just running initialize + except EEException: # If it fails, try just running initialize. try: ee.Initialize() - except EEException: # if that still fails, try authenticating first + except EEException: # If that still fails, try authenticating first. ee.Authenticate() ee.Initialize() @@ -327,7 +325,7 @@ def last_task_status(self, new_status: Dict[str, str]) -> None: Sets the value of the private variable "_last_task_status" to a specified value. Realistically, this shouldn't be used as the value should only be set from within the object, but it's here in case it's needed. - :param new_status: Updated status + :param new_status: Status to update the _last_task_status to. :type new_status: Dict[str, str] :return: None """ @@ -343,7 +341,6 @@ def export(self, **export_kwargs: Unpack[EEExportDict]) -> None: """ Handles the exporting of an image - :param image: Image for export :type image: ee.image.Image :param filename_suffix: The unique identifier used internally to identify images. @@ -363,6 +360,7 @@ def export(self, """ if not isinstance(image, ee.image.Image): + raise ValueError("Invalid image provided for export - please provide a single image (not a collection or another object) of class ee.image.Image for export") if export_type.lower() == "drive" and \ @@ -372,7 +370,7 @@ def export(self, raise NotADirectoryError("The provided path for the Google Drive export folder is not a valid directory but" " Drive export was specified. Either change the export type to use Google Cloud" " and set that up properly (with a bucket, etc), or set the drive_root_folder" - " to a valid folder") + " to a valid folder.") elif export_type.lower() == "drive": if drive_root_folder: self.drive_root_folder = drive_root_folder @@ -449,15 +447,15 @@ def check_mosaic_exists(download_location: Union[str, Path], export_folder: Unio def download_results(self, download_location: Union[str, Path], callback: Optional[str] = None, drive_wait: int = 15) -> None: """ - - :param download_location: The directory where the results should be downloaded to + :param download_location: The directory where the results should be downloaded to. Expects a string path or a Pathlib Path object. :type download_location: Union[str, Path] - :param callback: The callback function called once the image is downloaded + :param callback: The callback function is called once the image has been downloaded. :type callback: Optional[str] + :param drive_wait: The amount of time in seconds to wait to allow for files that Earth Engine reports have been exported to actually populate. Default is 15 seconds. + :type drive_wait: int :return: None """ - # need an event loop that checks self.task.status(), which - # will get the current state of the task + # Need an event loop that checks self.task.status(), which will get the current state of the task. # state options # == "CANCELLED", "CANCEL_REQUESTED", "COMPLETED", @@ -536,14 +534,13 @@ def zonal_stats(self, :param stats: :type stats: Tuple[str, ...] :param report_threshold: After how many iterations should it print out the feature number it's on. Defaults to 1000. - Set to None to disable + Set to None to disable. :type report_threshold: int - :param write_batch_size: How many zones should we store up before writing to the disk? Defaults to 2000 + :param write_batch_size: How many zones should we store up before writing to the disk? Defaults to 2000. :type write_batch_size: int :param use_points: :type use_points: bool :return: None - """ self.zonal_output_filepath = zonal.zonal_stats( @@ -564,7 +561,7 @@ def _check_task_status(self) -> Dict[str, Union[Dict[str, str], bool]]: """ Updates the status is it needs to be changed - :return: Returns a dictionary of the most up-to-date status and whether it was changed + :return: Returns a dictionary of the most up-to-date status and whether that status was changed :rtype: Dict[str, Union[Dict[str, str], bool]] """ diff --git a/eedl/merge.py b/eedl/merge.py index f91fc69..ca8d10c 100644 --- a/eedl/merge.py +++ b/eedl/merge.py @@ -16,7 +16,6 @@ def merge_outputs(file_mapping, """ Makes output zonal stats files into a data frame and adds a datetime field. Merges all inputs into one DF, and can optionally insert into a sqlite database. - :param file_mapping: A set of tuples with a path to a file and a time value (string or datetime) to associate with it. :type file_mapping: :param date_field: Defaults to "et_date". @@ -38,7 +37,7 @@ def merge_outputs(file_mapping, df.loc[:, date_field] = time_value dfs.append(df) - # merge all the data frames together + # Merge all the data frames together final_df = pandas.concat(dfs) final_df.reset_index(inplace=True) @@ -51,16 +50,16 @@ def merge_outputs(file_mapping, def plot_merged(df: pandas.DataFrame, et_field: str, date_field: str = "et_date", uniqueid: str = "UniqueID") -> so.Plot: """ - - :param df: Data source for the plot + Creates a seaborn plot of the + :param df: Data source for the plot. :type df: pandas.DataFrame - :param et_field: Name of the variable on the x-axis + :param et_field: Name of the variable on the x-axis. :type et_field: str - :param date_field: Name of the variable on the y-axis. Default is "et_date" + :param date_field: Name of the variable on the y-axis. Default is "et_date". :type date_field: str - :param uniqueid: Defines additional data subsets that transforms should operate on independently. Default is "UniqueID" + :param uniqueid: Defines additional data subsets that transforms should operate on independently. Default is "UniqueID". :type uniqueid: str - :return: Returns a seaborn object plot + :return: Returns a seaborn object plot. :rtype: so.Plot """ return ( diff --git a/eedl/mosaic_rasters.py b/eedl/mosaic_rasters.py index 24d9d5d..51f86c5 100644 --- a/eedl/mosaic_rasters.py +++ b/eedl/mosaic_rasters.py @@ -10,9 +10,9 @@ def mosaic_folder(folder_path: Union[str, Path], output_path: Union[str, Path], prefix: str = "") -> None: """ - :param folder_path: Location of the folder + :param folder_path: Location of the folder. :type folder_path: Union[str, Path] - :param output_path: Output destination + :param output_path: Output destination. :type output_path: Union[str, Path] :param prefix: Used to find the files of interest. :type prefix: Str @@ -49,11 +49,11 @@ def mosaic_rasters(raster_paths: Sequence[Union[str, Path]], vrt_options = gdal.BuildVRTOptions(resampleAlg='nearest', resolution="highest") my_vrt = gdal.BuildVRT(vrt_path, raster_paths, options=vrt_options) # my_vrt = None - my_vrt.FlushCache() # write the VRT out + my_vrt.FlushCache() # Write the VRT out print(f"VRT at {vrt_path}") - # now let's export it to the output_path as a geotiff - driver = gdal.GetDriverByName("GTIFF") # we'll use VRT driver.CreateCopy + # Now let's export it to the output_path as a geotiff + driver = gdal.GetDriverByName("GTIFF") # We'll use VRT driver.CreateCopy vrt_data = gdal.Open(vrt_path) output = driver.CreateCopy(output_path, vrt_data, 0, ["COMPRESS=DEFLATE", ]) output.FlushCache() diff --git a/eedl/zonal.py b/eedl/zonal.py index e8275f1..5c55d58 100644 --- a/eedl/zonal.py +++ b/eedl/zonal.py @@ -1,7 +1,8 @@ import csv import os from pathlib import Path -from typing import Iterable, Union +from typing import Iterable, Optional, Union + import fiona import rasterstats @@ -25,20 +26,20 @@ def zonal_stats(features: Union[str, Path, fiona.Collection], # automatically align them and we just get bad output. """ - - :param features: Location to the features + If the raster and the polygons are not in the CRS, this function will produce bad output. + :param features: Location to the features. :type features: Union[str, Path] - :param raster: Location of the raster + :param raster: Location of the raster. :type raster: Union[str, Path, None] - :param output_folder: Output destination + :param output_folder: Output destination. :type output_folder: Union[str, Path, None] - :param filename: Name of the file + :param filename: Name of the file. :type filename: Str - :param keep_fields: Fields that will be used + :param keep_fields: Fields that will be used. :type keep_fields: Iterable[str] :param stats: The various statistical measurements to be computed. :type stats: Iterable[str] - :param report_threshold: The number of iterations before it prints out the feature number it's on. Default is 1000. Set to None to disable + :param report_threshold: The number of iterations before it prints out the feature number it's on. Default is 1000. Set to None to disable. :type report_threshold: Int :param write_batch_size: The number of zones that should be stored up before writing to disk. :type write_batch_size: Int @@ -62,7 +63,7 @@ def zonal_stats(features: Union[str, Path, fiona.Collection], # next line, each item isn't evaluated, which should prevent us from needing to store a geojson representation of # all the polygons at one time since we'll strip it off (it'd be bad to try to keep all of it - output_filepath: Union[str, None] = None + output_filepath: Optional[str] = None if not (isinstance(features, fiona.Collection) or hasattr(features, "__iter__")): # if features isn't already a fiona collection instance or something else we can iterate over # A silly hack to get fiona to open GDB data by splitting it only if the input is a gdb data item, then providing @@ -130,7 +131,7 @@ def zonal_stats(features: Union[str, Path, fiona.Collection], if report_threshold and i % report_threshold == 0: print(i) - if len(results) > 0: # Clear out any remaining items at the end + if results: # Clear out any remaining items at the end writer.writerows(results) print(i) finally: diff --git a/lint.bat b/lint.bat new file mode 100644 index 0000000..6869b81 --- /dev/null +++ b/lint.bat @@ -0,0 +1,4 @@ +flake8 eedl --ignore="W191,E501,E128,E124,E126,E127" +flake8 tests --ignore="W191,E501,E128,E124,E126,E127" +flake8 examples --ignore="W191,E501,E128,E124,E126,E127" +mypy eedl --enable-incomplete-feature=Unpack \ No newline at end of file diff --git a/tests/data/_ee_export_test_image.tif.aux.xml b/tests/data/_ee_export_test_image.tif.aux.xml new file mode 100644 index 0000000..6fbb784 --- /dev/null +++ b/tests/data/_ee_export_test_image.tif.aux.xml @@ -0,0 +1,26 @@ + + + B8 + + + 0 + 9560 + 256 + 1 + 0 + 37336|0|0|1|4|0|5|15|21|32|47|73|86|99|126|115|152|115|147|174|170|174|217|195|233|242|239|221|255|253|248|272|273|352|327|314|342|317|364|388|387|390|434|383|421|413|487|454|459|528|546|548|588|651|654|635|687|715|692|752|721|793|844|852|776|869|805|866|828|943|878|869|912|923|889|957|880|944|911|906|876|907|773|850|818|810|741|788|680|663|619|632|644|577|568|556|551|516|501|460|492|419|418|401|357|396|307|308|304|257|303|265|288|219|228|249|242|227|180|205|188|172|186|158|175|156|156|188|150|141|161|151|126|142|126|102|119|123|107|125|140|91|132|97|122|89|102|98|109|86|91|84|89|93|81|82|78|59|73|77|82|62|71|63|59|54|61|58|57|70|47|56|48|69|48|46|52|50|52|48|39|46|38|25|32|26|38|28|26|33|12|24|23|19|27|27|25|23|24|20|12|23|13|13|10|8|5|7|4|7|5|3|3|1|1|0|0|3|2|1|1|0|0|0|2|1|0|1|0|1|1|0|0|1|0|0|0|0|0|0|0|0|0|1|0|2|1|1|1|1|0|0|1|0|0|1 + + + + 3063315.745456948 + 9560 + 1833.781199187 + 1912.000000 + 0 + 1 + 1 + 1750.2241611289 + 100 + + +