diff --git a/eedl/__init__.py b/eedl/__init__.py
index e72be2a..1380b9b 100644
--- a/eedl/__init__.py
+++ b/eedl/__init__.py
@@ -1 +1 @@
-__version__ = "2023.08.31"
+__version__ = "2023.09.12"
diff --git a/eedl/google_cloud.py b/eedl/google_cloud.py
index 5512692..7add67b 100644
--- a/eedl/google_cloud.py
+++ b/eedl/google_cloud.py
@@ -29,10 +29,10 @@ def get_public_export_urls(bucket_name: str, prefix: str = "") -> List[str]:
# get the content of the bucket (it needs to be public)
listing = requests.get(search_url).text
- # comes back as an XML listing - don't need to parse the XML, just need the values of the Key elements
+ # Comes back as an XML listing - don't need to parse the XML, just need the values of the Key elements
pattern = re.compile("(.*?)")
items = pattern.findall(listing)
- # make them into full URLs with the bucket URL at the front and check if the files have the prefix specific
+ # Make them into full URLs with the bucket URL at the front and check if the files have the prefix specific
filtered = [f"{request_url}{item}" for item in items if item.startswith(prefix)]
return filtered
@@ -49,17 +49,17 @@ def download_public_export(bucket_name: str, output_folder: Union[str, Path], pr
:type prefix: str
:return: None.
"""
- # get the urls of items in the bucket with the specified prefix
+ # Get the urls of items in the bucket with the specified prefix
urls = get_public_export_urls(bucket_name, prefix)
os.makedirs(output_folder, exist_ok=True)
for url in urls:
- filename = url.split("/")[-1] # get the filename
- output_path = Path(output_folder) / filename # construct the output path
- # get the data - this could be a problem if it's larger than fits in RAM - I believe requests has a way to operate as a streambuffer - not looking into that at this moment
+ filename = url.split("/")[-1] # Get the filename
+ output_path = Path(output_folder) / filename # Construct the output path
+ # Get the data - this could be a problem if it's larger than fits in RAM - I believe requests has a way to operate as a streambuffer - not looking into that at this moment
response = requests.get(url)
- output_path.write_bytes(response.content) # write it to a file
+ output_path.write_bytes(response.content) # Write it to a file
def download_export(bucket_name: str,
@@ -68,7 +68,7 @@ def download_export(bucket_name: str,
delimiter: str = "/",
autodelete: bool = True) -> None:
- """Downloads a blob from the bucket.
+ """Downloads a blob from the specified bucket.
Modified from Google Cloud sample documentation at
https://cloud.google.com/storage/docs/samples/storage-download-file#storage_download_file-python
diff --git a/eedl/helpers.py b/eedl/helpers.py
index d5ba419..9493a48 100644
--- a/eedl/helpers.py
+++ b/eedl/helpers.py
@@ -85,7 +85,7 @@ def _single_item_extract(self, image, task_registry, zonal_features, aoi_attr, e
export_image.zonal_inject_constants = zonal_inject_constants
filename_suffix = f"{aoi_attr}_{image_date}"
- if self.skip_existing and export_image.check_mosaic_exists(aoi_download_folder, self.export_folder, f"{filename_description}_{filename_suffix}"):
+ if self.skip_existing and export_image.check_mosaic_exists(aoi_download_folder, self.export_folder, f"{self.filename_description}_{filename_suffix}"):
print(f"Image {filename_suffix} exists and skip_existing=True. Skipping")
return
diff --git a/eedl/image.py b/eedl/image.py
index 56e068a..3de5c67 100644
--- a/eedl/image.py
+++ b/eedl/image.py
@@ -40,11 +40,11 @@ def download_images_in_folder(source_location: Union[str, Path], download_locati
"""
Handles pulling data from Google Drive over to a local location, filtering by a filename prefix and folder
- :param source_location: Directory to search for files
+ :param source_location: Directory to search for files.
:type source_location: Union[str, Path]
- :param download_location: Destination for files with the specified prefix
+ :param download_location: Destination for files with the specified prefix.
:type download_location: Union[str, Path]
- :param prefix: A prefix to use to filter items in the folder - only files where the name matches this prefix will be moved
+ :param prefix: A prefix to use to filter items in the folder - only files where the name matches this prefix will be moved.
:type prefix: str
:return: None
"""
@@ -57,12 +57,12 @@ def download_images_in_folder(source_location: Union[str, Path], download_locati
os.makedirs(download_location, exist_ok=True)
for filename in files:
- shutil.move(os.path.join(folder_search_path, filename), os.path.join(download_location, filename))
+ shutil.move(str(os.path.join(folder_search_path, filename)), str(os.path.join(download_location, filename)))
class TaskRegistry:
"""
- The TaskRegistry class makes it convent to manage arbitrarily many Earth Engine images that are in varying states of being downloaded.
+ The TaskRegistry class makes it convenient to manage arbitrarily many Earth Engine images that are in varying states of being downloaded.
"""
INCOMPLETE_STATUSES = ("READY", "UNSUBMITTED", "RUNNING")
COMPLETE_STATUSES = ["COMPLETED"]
@@ -81,8 +81,7 @@ def __init__(self) -> None:
def add(self, image: ee.image.Image) -> None:
"""
- Adds an Earth Engine image to the list of Earth Engine images
-
+ Adds an Earth Engine image to the list of Earth Engine images.
:param image: Earth Engine image to be added to the list of images
:type image: ee.image.Image
:return: None
@@ -92,9 +91,8 @@ def add(self, image: ee.image.Image) -> None:
@property
def incomplete_tasks(self) -> List[ee.image.Image]:
"""
- List of Earth Engine images that have not been completed yet
-
- :return: List of Earth Engine images that have not been completed yet
+ List of Earth Engine images that have not been completed yet.
+ :return: List of Earth Engine images that have not been completed yet.
:rtype: List[ee.image.Image]
"""
initial_tasks = [image for image in self.images if image.last_task_status['state'] in self.INCOMPLETE_STATUSES]
@@ -106,30 +104,32 @@ def incomplete_tasks(self) -> List[ee.image.Image]:
@property
def complete_tasks(self) -> List[ee.image.Image]:
"""
- List of Earth Engine images
-
- :return: List of Earth Engine images
+ List of Earth Engine images.
+ :return: List of Earth Engine images.
:rtype: List[ee.image.Image]
"""
return [image for image in self.images if image.last_task_status['state'] in self.COMPLETE_STATUSES + self.FAILED_STATUSES]
@property
def failed_tasks(self) -> List[ee.image.Image]:
+ """
+ List of Earth Engine images that have either been cancelled or that have failed
+ """
return [image for image in self.images if image.last_task_status['state'] in self.FAILED_STATUSES]
@property
def downloadable_tasks(self) -> List[ee.image.Image]:
"""
- List of Earth Engine images that have successfully been downloaded
- :return: List of Earth Engine images that have successfully been downloaded
+ List of Earth Engine images that have not been cancelled or have failed.
+ :return: List of Earth Engine images that have not been cancelled or have failed.
:rtype: List[ee.image.Image]
"""
return [image for image in self.complete_tasks if image.task_data_downloaded is False and image.last_task_status['state'] not in self.FAILED_STATUSES]
def download_ready_images(self, download_location: Union[str, Path]) -> None:
"""
-
- :param download_location: Destination for downloaded files
+ Downloads all images that are ready to be downloaded.
+ :param download_location: Destination for downloaded files.
:type download_location: Union[str, Path]
:return: None
"""
@@ -178,7 +178,6 @@ def wait_for_images(self,
on_failure: str = "log") -> None:
"""
Blocker until there are no more incomplete or downloadable tasks left.
-
:param download_location: Destination for downloaded files.
:type download_location: Union[str, Path]
:param sleep_time: Time between checking if the disk is full in seconds. Defaults to 10 seconds.
@@ -235,9 +234,9 @@ class EEDLImage:
:param crs: Coordinate Reference System to use for exports in a format Earth Engine understands, such as "EPSG:3310"
:type crs: Optional[str]
- :param tile_size: the number of pixels per side of tiles to export
+ :param tile_size: The number of pixels per side of tiles to export
:type tile_size: Optional[int]
- :param export_folder: the name of the folder in the chosen export location that will be created for the export
+ :param export_folder: The name of the folder in the chosen export location that will be created for the export
:type export_folder: Optional[Union[str, Path]]
This docstring needs to be checked to ensure it's in a standard format that Sphinx will render
@@ -273,24 +272,24 @@ def __init__(self, **kwargs) -> None:
self.zonal_inject_constants: dict = dict()
self.zonal_nodata_value: int = -9999
- # set the defaults here - this is a nice strategy where we get to define constants near the top that aren't buried in code, then apply them here
+ # Set the defaults here - this is a nice strategy where we get to define constants near the top that aren't buried in code, then apply them here.
for key in DEFAULTS:
setattr(self, key.lower(), DEFAULTS[key])
- for key in kwargs: # now apply any provided keyword arguments over the top of the defaults.
+ for key in kwargs: # Now apply any provided keyword arguments over the top of the defaults.
setattr(self, key, kwargs[key])
self._last_task_status = {"state": "UNSUBMITTED"}
- # this will be the default status initially, so always assume it's UNSUBMITTED if we haven't gotten anything
- # from the server. "None" would work too, but then we couldn't just check the status
+ # This will be the default status initially, so always assume it's UNSUBMITTED if we haven't gotten anything.
+ # From the server. "None" would work too, but then we couldn't just check the status.
self.task_data_downloaded = False
- self.export_type = "Drive" # other option is "Cloud"
+ self.export_type = "Drive" # The other option is "Cloud".
def _set_names(self, filename_suffix: str = "") -> None:
"""
:param filename_suffix: Suffix used to later identify files.
- :type filename_suffix: Str
+ :type filename_suffix: str
:return: None
"""
self.description = filename_suffix
@@ -300,15 +299,14 @@ def _set_names(self, filename_suffix: str = "") -> None:
def _initialize() -> None:
"""
Handles the initialization and potentially the authentication of Earth Engine
-
:return: None
"""
- try: # try just a basic discardable operation used in their docs so that we don't initialize if we don't need to
+ try: # Try just a basic discard-able operation used in their docs so that we don't initialize if we don't need to.
_ = ee.Image("NASA/NASADEM_HGT/001")
- except EEException: # if it fails, try just running initialize
+ except EEException: # If it fails, try just running initialize.
try:
ee.Initialize()
- except EEException: # if that still fails, try authenticating first
+ except EEException: # If that still fails, try authenticating first.
ee.Authenticate()
ee.Initialize()
@@ -327,7 +325,7 @@ def last_task_status(self, new_status: Dict[str, str]) -> None:
Sets the value of the private variable "_last_task_status" to a specified value. Realistically, this shouldn't
be used as the value should only be set from within the object, but it's here in case it's needed.
- :param new_status: Updated status
+ :param new_status: Status to update the _last_task_status to.
:type new_status: Dict[str, str]
:return: None
"""
@@ -343,7 +341,6 @@ def export(self,
**export_kwargs: Unpack[EEExportDict]) -> None:
"""
Handles the exporting of an image
-
:param image: Image for export
:type image: ee.image.Image
:param filename_suffix: The unique identifier used internally to identify images.
@@ -363,6 +360,7 @@ def export(self,
"""
if not isinstance(image, ee.image.Image):
+
raise ValueError("Invalid image provided for export - please provide a single image (not a collection or another object) of class ee.image.Image for export")
if export_type.lower() == "drive" and \
@@ -372,7 +370,7 @@ def export(self,
raise NotADirectoryError("The provided path for the Google Drive export folder is not a valid directory but"
" Drive export was specified. Either change the export type to use Google Cloud"
" and set that up properly (with a bucket, etc), or set the drive_root_folder"
- " to a valid folder")
+ " to a valid folder.")
elif export_type.lower() == "drive":
if drive_root_folder:
self.drive_root_folder = drive_root_folder
@@ -449,15 +447,15 @@ def check_mosaic_exists(download_location: Union[str, Path], export_folder: Unio
def download_results(self, download_location: Union[str, Path], callback: Optional[str] = None, drive_wait: int = 15) -> None:
"""
-
- :param download_location: The directory where the results should be downloaded to
+ :param download_location: The directory where the results should be downloaded to. Expects a string path or a Pathlib Path object.
:type download_location: Union[str, Path]
- :param callback: The callback function called once the image is downloaded
+ :param callback: The callback function is called once the image has been downloaded.
:type callback: Optional[str]
+ :param drive_wait: The amount of time in seconds to wait to allow for files that Earth Engine reports have been exported to actually populate. Default is 15 seconds.
+ :type drive_wait: int
:return: None
"""
- # need an event loop that checks self.task.status(), which
- # will get the current state of the task
+ # Need an event loop that checks self.task.status(), which will get the current state of the task.
# state options
# == "CANCELLED", "CANCEL_REQUESTED", "COMPLETED",
@@ -536,14 +534,13 @@ def zonal_stats(self,
:param stats:
:type stats: Tuple[str, ...]
:param report_threshold: After how many iterations should it print out the feature number it's on. Defaults to 1000.
- Set to None to disable
+ Set to None to disable.
:type report_threshold: int
- :param write_batch_size: How many zones should we store up before writing to the disk? Defaults to 2000
+ :param write_batch_size: How many zones should we store up before writing to the disk? Defaults to 2000.
:type write_batch_size: int
:param use_points:
:type use_points: bool
:return: None
-
"""
self.zonal_output_filepath = zonal.zonal_stats(
@@ -564,7 +561,7 @@ def _check_task_status(self) -> Dict[str, Union[Dict[str, str], bool]]:
"""
Updates the status is it needs to be changed
- :return: Returns a dictionary of the most up-to-date status and whether it was changed
+ :return: Returns a dictionary of the most up-to-date status and whether that status was changed
:rtype: Dict[str, Union[Dict[str, str], bool]]
"""
diff --git a/eedl/merge.py b/eedl/merge.py
index f91fc69..ca8d10c 100644
--- a/eedl/merge.py
+++ b/eedl/merge.py
@@ -16,7 +16,6 @@ def merge_outputs(file_mapping,
"""
Makes output zonal stats files into a data frame and adds a datetime field. Merges all inputs into one DF, and
can optionally insert into a sqlite database.
-
:param file_mapping: A set of tuples with a path to a file and a time value (string or datetime) to associate with it.
:type file_mapping:
:param date_field: Defaults to "et_date".
@@ -38,7 +37,7 @@ def merge_outputs(file_mapping,
df.loc[:, date_field] = time_value
dfs.append(df)
- # merge all the data frames together
+ # Merge all the data frames together
final_df = pandas.concat(dfs)
final_df.reset_index(inplace=True)
@@ -51,16 +50,16 @@ def merge_outputs(file_mapping,
def plot_merged(df: pandas.DataFrame, et_field: str, date_field: str = "et_date", uniqueid: str = "UniqueID") -> so.Plot:
"""
-
- :param df: Data source for the plot
+ Creates a seaborn plot of the
+ :param df: Data source for the plot.
:type df: pandas.DataFrame
- :param et_field: Name of the variable on the x-axis
+ :param et_field: Name of the variable on the x-axis.
:type et_field: str
- :param date_field: Name of the variable on the y-axis. Default is "et_date"
+ :param date_field: Name of the variable on the y-axis. Default is "et_date".
:type date_field: str
- :param uniqueid: Defines additional data subsets that transforms should operate on independently. Default is "UniqueID"
+ :param uniqueid: Defines additional data subsets that transforms should operate on independently. Default is "UniqueID".
:type uniqueid: str
- :return: Returns a seaborn object plot
+ :return: Returns a seaborn object plot.
:rtype: so.Plot
"""
return (
diff --git a/eedl/mosaic_rasters.py b/eedl/mosaic_rasters.py
index 24d9d5d..51f86c5 100644
--- a/eedl/mosaic_rasters.py
+++ b/eedl/mosaic_rasters.py
@@ -10,9 +10,9 @@
def mosaic_folder(folder_path: Union[str, Path], output_path: Union[str, Path], prefix: str = "") -> None:
"""
- :param folder_path: Location of the folder
+ :param folder_path: Location of the folder.
:type folder_path: Union[str, Path]
- :param output_path: Output destination
+ :param output_path: Output destination.
:type output_path: Union[str, Path]
:param prefix: Used to find the files of interest.
:type prefix: Str
@@ -49,11 +49,11 @@ def mosaic_rasters(raster_paths: Sequence[Union[str, Path]],
vrt_options = gdal.BuildVRTOptions(resampleAlg='nearest', resolution="highest")
my_vrt = gdal.BuildVRT(vrt_path, raster_paths, options=vrt_options)
# my_vrt = None
- my_vrt.FlushCache() # write the VRT out
+ my_vrt.FlushCache() # Write the VRT out
print(f"VRT at {vrt_path}")
- # now let's export it to the output_path as a geotiff
- driver = gdal.GetDriverByName("GTIFF") # we'll use VRT driver.CreateCopy
+ # Now let's export it to the output_path as a geotiff
+ driver = gdal.GetDriverByName("GTIFF") # We'll use VRT driver.CreateCopy
vrt_data = gdal.Open(vrt_path)
output = driver.CreateCopy(output_path, vrt_data, 0, ["COMPRESS=DEFLATE", ])
output.FlushCache()
diff --git a/eedl/zonal.py b/eedl/zonal.py
index e8275f1..5c55d58 100644
--- a/eedl/zonal.py
+++ b/eedl/zonal.py
@@ -1,7 +1,8 @@
import csv
import os
from pathlib import Path
-from typing import Iterable, Union
+from typing import Iterable, Optional, Union
+
import fiona
import rasterstats
@@ -25,20 +26,20 @@ def zonal_stats(features: Union[str, Path, fiona.Collection],
# automatically align them and we just get bad output.
"""
-
- :param features: Location to the features
+ If the raster and the polygons are not in the CRS, this function will produce bad output.
+ :param features: Location to the features.
:type features: Union[str, Path]
- :param raster: Location of the raster
+ :param raster: Location of the raster.
:type raster: Union[str, Path, None]
- :param output_folder: Output destination
+ :param output_folder: Output destination.
:type output_folder: Union[str, Path, None]
- :param filename: Name of the file
+ :param filename: Name of the file.
:type filename: Str
- :param keep_fields: Fields that will be used
+ :param keep_fields: Fields that will be used.
:type keep_fields: Iterable[str]
:param stats: The various statistical measurements to be computed.
:type stats: Iterable[str]
- :param report_threshold: The number of iterations before it prints out the feature number it's on. Default is 1000. Set to None to disable
+ :param report_threshold: The number of iterations before it prints out the feature number it's on. Default is 1000. Set to None to disable.
:type report_threshold: Int
:param write_batch_size: The number of zones that should be stored up before writing to disk.
:type write_batch_size: Int
@@ -62,7 +63,7 @@ def zonal_stats(features: Union[str, Path, fiona.Collection],
# next line, each item isn't evaluated, which should prevent us from needing to store a geojson representation of
# all the polygons at one time since we'll strip it off (it'd be bad to try to keep all of it
- output_filepath: Union[str, None] = None
+ output_filepath: Optional[str] = None
if not (isinstance(features, fiona.Collection) or hasattr(features, "__iter__")): # if features isn't already a fiona collection instance or something else we can iterate over
# A silly hack to get fiona to open GDB data by splitting it only if the input is a gdb data item, then providing
@@ -130,7 +131,7 @@ def zonal_stats(features: Union[str, Path, fiona.Collection],
if report_threshold and i % report_threshold == 0:
print(i)
- if len(results) > 0: # Clear out any remaining items at the end
+ if results: # Clear out any remaining items at the end
writer.writerows(results)
print(i)
finally:
diff --git a/lint.bat b/lint.bat
new file mode 100644
index 0000000..6869b81
--- /dev/null
+++ b/lint.bat
@@ -0,0 +1,4 @@
+flake8 eedl --ignore="W191,E501,E128,E124,E126,E127"
+flake8 tests --ignore="W191,E501,E128,E124,E126,E127"
+flake8 examples --ignore="W191,E501,E128,E124,E126,E127"
+mypy eedl --enable-incomplete-feature=Unpack
\ No newline at end of file
diff --git a/tests/data/_ee_export_test_image.tif.aux.xml b/tests/data/_ee_export_test_image.tif.aux.xml
new file mode 100644
index 0000000..6fbb784
--- /dev/null
+++ b/tests/data/_ee_export_test_image.tif.aux.xml
@@ -0,0 +1,26 @@
+
+
+ B8
+
+
+ 0
+ 9560
+ 256
+ 1
+ 0
+ 37336|0|0|1|4|0|5|15|21|32|47|73|86|99|126|115|152|115|147|174|170|174|217|195|233|242|239|221|255|253|248|272|273|352|327|314|342|317|364|388|387|390|434|383|421|413|487|454|459|528|546|548|588|651|654|635|687|715|692|752|721|793|844|852|776|869|805|866|828|943|878|869|912|923|889|957|880|944|911|906|876|907|773|850|818|810|741|788|680|663|619|632|644|577|568|556|551|516|501|460|492|419|418|401|357|396|307|308|304|257|303|265|288|219|228|249|242|227|180|205|188|172|186|158|175|156|156|188|150|141|161|151|126|142|126|102|119|123|107|125|140|91|132|97|122|89|102|98|109|86|91|84|89|93|81|82|78|59|73|77|82|62|71|63|59|54|61|58|57|70|47|56|48|69|48|46|52|50|52|48|39|46|38|25|32|26|38|28|26|33|12|24|23|19|27|27|25|23|24|20|12|23|13|13|10|8|5|7|4|7|5|3|3|1|1|0|0|3|2|1|1|0|0|0|2|1|0|1|0|1|1|0|0|1|0|0|0|0|0|0|0|0|0|1|0|2|1|1|1|1|0|0|1|0|0|1
+
+
+
+ 3063315.745456948
+ 9560
+ 1833.781199187
+ 1912.000000
+ 0
+ 1
+ 1
+ 1750.2241611289
+ 100
+
+
+