From 6dbfb56277ddde7054f88af2ef3ce9e7fdec48be Mon Sep 17 00:00:00 2001
From: Adam Crawford <adamcrawford253@gmail.com>
Date: Wed, 30 Aug 2023 13:08:46 -0700
Subject: [PATCH 1/3] Small changes to comments and doctrings to be consistent
 as well as small clean ups.

---
 eedl/__init__.py       |  2 +-
 eedl/google_cloud.py   | 18 ++++-----
 eedl/image.py          | 90 ++++++++++++++++++++----------------------
 eedl/merge.py          | 15 ++++---
 eedl/mosaic_rasters.py | 14 +++----
 eedl/zonal.py          | 64 ++++++------------------------
 6 files changed, 79 insertions(+), 124 deletions(-)
diff --git a/eedl/__init__.py b/eedl/__init__.py
index c232bef..7709ed8 100644
--- a/eedl/__init__.py
+++ b/eedl/__init__.py
@@ -1 +1 @@
-__version__ = "2023.08.22"
+__version__ = "2023.08.30"
diff --git a/eedl/google_cloud.py b/eedl/google_cloud.py
index bb98db1..fd8cd93 100644
--- a/eedl/google_cloud.py
+++ b/eedl/google_cloud.py
@@ -25,13 +25,13 @@ def get_public_export_urls(bucket_name: str, prefix: str = "") -> List[str]:
 	base_url = "https://storage.googleapis.com/"
 	request_url = f"{base_url}{bucket_name}/"
 
-	# get the content of the bucket (it needs to be public
+	# Get the content of the bucket (it needs to be public
 	listing = requests.get(request_url).text
 
-	# comes back as an XML listing - don't need to parse the XML, just need the values of the Key elements
+	# Comes back as an XML listing - don't need to parse the XML, just need the values of the Key elements
 	pattern = re.compile("<Key>(.*?)</Key>")
 	items = pattern.findall(listing)
-	# make them into full URLs with the bucket URL at the front and check if the files have the prefix specific
+	# Make them into full URLs with the bucket URL at the front and check if the files have the prefix specific
 	filtered = [f"{request_url}{item}" for item in items if item.startswith(prefix)]
 
 	return filtered
@@ -48,15 +48,15 @@ def download_public_export(bucket_name: str, output_folder: Union[str, Path], pr
 	:type prefix: str
 	:return: None.
 	"""
-	# get the urls of items in the bucket with the specified prefix
+	# Get the urls of items in the bucket with the specified prefix
 	urls = get_public_export_urls(bucket_name, prefix)
 
 	for url in urls:
-		filename = url.split("/")[-1]  # get the filename
-		output_path = Path(output_folder) / filename  # construct the output path
-		# get the data - this could be a problem if it's larger than fits in RAM - I believe requests has a way to operate as a streambuffer - not looking into that at this moment
+		filename = url.split("/")[-1]  # Get the filename
+		output_path = Path(output_folder) / filename  # Construct the output path
+		# Get the data - this could be a problem if it's larger than fits in RAM - I believe requests has a way to operate as a streambuffer - not looking into that at this moment
 		response = requests.get(url)
-		output_path.write_bytes(response.content)  # write it to a file
+		output_path.write_bytes(response.content)  # Write it to a file
 
 
 def download_export(bucket_name: str,
@@ -65,7 +65,7 @@ def download_export(bucket_name: str,
 					delimiter: str = "/",
 					autodelete: bool = True) -> None:
 
-	"""Downloads a blob from the bucket.
+	"""Downloads a blob from the specified bucket.
 
 	Modified from Google Cloud sample documentation at
 		https://cloud.google.com/storage/docs/samples/storage-download-file#storage_download_file-python
diff --git a/eedl/image.py b/eedl/image.py
index 6e58fc1..60854ed 100644
--- a/eedl/image.py
+++ b/eedl/image.py
@@ -31,22 +31,22 @@ def _get_fiona_args(polygon_path: Union[str, Path]) -> Dict[str, Union[str, Path
 	"""
 
 	parts = os.path.split(polygon_path)
-	# if the folder name ends with .gdb and the "filename" doesn't have an extension, assume it's an FGDB
+	# If the folder name ends with .gdb and the "filename" doesn't have an extension, assume it's an FGDB.
 	if (parts[0].endswith(".gdb") or parts[0].endswith(".gpkg")) and "." not in parts[1]:
 		return {'fp': parts[0], 'layer': parts[1]}
 	else:
 		return {'fp': polygon_path}
 
 
-def download_images_in_folder(source_location: Union[str, Path], download_location: Union[str, Path], prefix: str) -> None:
+def download_images_in_folder(source_location: Union[str, Path], download_location: Union[str, Path], prefix: Union[str, bytes]) -> None:
 	"""
 	Handles pulling data from Google Drive over to a local location, filtering by a filename prefix and folder
 
-	:param source_location: Directory to search for files
+	:param source_location: Directory to search for files.
 	:type source_location: Union[str, Path]
-	:param download_location: Destination for files with the specified prefix
+	:param download_location: Destination for files with the specified prefix.
 	:type download_location: Union[str, Path]
-	:param prefix: A prefix to use to filter items in the folder - only files where the name matches this prefix will be moved
+	:param prefix: A prefix to use to filter items in the folder - only files where the name matches this prefix will be moved.
 	:type prefix: str
 	:return: None
 	"""
@@ -56,12 +56,12 @@ def download_images_in_folder(source_location: Union[str, Path], download_locati
 	os.makedirs(download_location, exist_ok=True)
 
 	for filename in files:
-		shutil.move(os.path.join(folder_search_path, filename), os.path.join(download_location, filename))
+		shutil.move(str(os.path.join(folder_search_path, filename)), str(os.path.join(download_location, filename)))
 
 
 class TaskRegistry:
 	"""
-	The TaskRegistry class makes it convent to manage arbitrarily many Earth Engine images that are in varying states of being downloaded.
+	The TaskRegistry class makes it convenient to manage arbitrarily many Earth Engine images that are in varying states of being downloaded.
 	"""
 	INCOMPLETE_STATUSES = ("READY", "UNSUBMITTED", "RUNNING")
 	COMPLETE_STATUSES = ["COMPLETED"]
@@ -77,8 +77,7 @@ def __init__(self) -> None:
 
 	def add(self, image: ee.image.Image) -> None:
 		"""
-		Adds an Earth Engine image to the list of Earth Engine images
-
+		Adds an Earth Engine image to the list of Earth Engine images.
 		:param image: Earth Engine image to be added to the list of images
 		:type image: ee.image.Image
 		:return: None
@@ -88,9 +87,8 @@ def add(self, image: ee.image.Image) -> None:
 	@property
 	def incomplete_tasks(self) -> List[ee.image.Image]:
 		"""
-		List of Earth Engine images that have not been completed yet
-
-		:return: List of Earth Engine images that have not been completed yet
+		List of Earth Engine images that have not been completed yet.
+		:return: List of Earth Engine images that have not been completed yet.
 		:rtype: List[ee.image.Image]
 		"""
 		initial_tasks = [image for image in self.images if image.last_task_status['state'] in self.INCOMPLETE_STATUSES]
@@ -102,30 +100,32 @@ def incomplete_tasks(self) -> List[ee.image.Image]:
 	@property
 	def complete_tasks(self) -> List[ee.image.Image]:
 		"""
-		List of Earth Engine images
-
-		:return: List of Earth Engine images
+		List of Earth Engine images.
+		:return: List of Earth Engine images.
 		:rtype: List[ee.image.Image]
 		"""
 		return [image for image in self.images if image.last_task_status['state'] in self.COMPLETE_STATUSES + self.FAILED_STATUSES]
 
 	@property
 	def failed_tasks(self) -> List[ee.image.Image]:
+		"""
+		List of Earth Engine images that have either been cancelled or that have failed
+		"""
 		return [image for image in self.images if image.last_task_status['state'] in self.FAILED_STATUSES]
 
 	@property
 	def downloadable_tasks(self) -> List[ee.image.Image]:
 		"""
-		List of Earth Engine images that have successfully been downloaded
-		:return: List of Earth Engine images that have successfully been downloaded
+		List of Earth Engine images that have not been cancelled or have failed.
+		:return: List of Earth Engine images that have not been cancelled or have failed.
 		:rtype: List[ee.image.Image]
 		"""
 		return [image for image in self.complete_tasks if image.task_data_downloaded is False and image.last_task_status['state'] not in self.FAILED_STATUSES]
 
 	def download_ready_images(self, download_location: Union[str, Path]) -> None:
 		"""
-
-		:param download_location: Destination for downloaded files
+		Downloads all images that are ready to be downloaded.
+		:param download_location: Destination for downloaded files.
 		:type download_location: Union[str, Path]
 		:return: None
 		"""
@@ -141,7 +141,6 @@ def wait_for_images(self,
 						on_failure="raise") -> None:
 		"""
 		Blocker until there are no more incomplete or downloadable tasks left.
-
 		:param download_location: Destination for downloaded files.
 		:type download_location: Union[str, Path]
 		:param sleep_time: Time between checking if the disk is full in seconds. Defaults to 10 seconds.
@@ -189,9 +188,9 @@ class Image:
 
 	:param crs: Coordinate Reference System to use for exports in a format Earth Engine understands, such as "EPSG:3310"
 	:type crs: Optional[str]
-	:param tile_size: the number of pixels per side of tiles to export
+	:param tile_size: The number of pixels per side of tiles to export
 	:type tile_size: Optional[int]
-	:param export_folder: the name of the folder in the chosen export location that will be created for the export
+	:param export_folder: The name of the folder in the chosen export location that will be created for the export
 	:type export_folder: Optional[Union[str, Path]]
 
 	This docstring needs to be checked to ensure it's in a standard format that Sphinx will render
@@ -213,18 +212,18 @@ def __init__(self, **kwargs) -> None:
 		self._ee_image: Optional[ee.image.Image] = None
 		self.output_folder: Optional[Union[str, Path]] = None
 
-		# set the defaults here - this is a nice strategy where we get to define constants near the top that aren't buried in code, then apply them here
+		# Set the defaults here - this is a nice strategy where we get to define constants near the top that aren't buried in code, then apply them here.
 		for key in DEFAULTS:
 			setattr(self, key.lower(), DEFAULTS[key])
 
-		for key in kwargs:  # now apply any provided keyword arguments over the top of the defaults.
+		for key in kwargs:  # Now apply any provided keyword arguments over the top of the defaults.
 			setattr(self, key, kwargs[key])
 
 		self._last_task_status = {"state": "UNSUBMITTED"}
-		# this will be the default status initially, so always assume it's UNSUBMITTED if we haven't gotten anything
-		# from the server. "None" would work too, but then we couldn't just check the status
+		# This will be the default status initially, so always assume it's UNSUBMITTED if we haven't gotten anything.
+		# From the server. "None" would work too, but then we couldn't just check the status.
 		self.task_data_downloaded = False
-		self.export_type = "Drive"  # other option is "Cloud"
+		self.export_type = "Drive"  # The other option is "Cloud".
 
 		self.filename_description = ""
 
@@ -232,7 +231,7 @@ def _set_names(self, filename_suffix: str = "") -> None:
 		"""
 
 		:param filename_suffix: Suffix used to later identify files.
-		:type filename_suffix: Str
+		:type filename_suffix: str
 		:return: None
 		"""
 		self.description = filename_suffix
@@ -242,15 +241,14 @@ def _set_names(self, filename_suffix: str = "") -> None:
 	def _initialize() -> None:
 		"""
 		Handles the initialization and potentially the authentication of Earth Engine
-
 		:return: None
 		"""
-		try:  # try just a basic discardable operation used in their docs so that we don't initialize if we don't need to
+		try:  # Try just a basic discard-able operation used in their docs so that we don't initialize if we don't need to.
 			_ = ee.Image("NASA/NASADEM_HGT/001")
-		except EEException:  # if it fails, try just running initialize
+		except EEException:  # If it fails, try just running initialize.
 			try:
 				ee.Initialize()
-			except EEException:  # if that still fails, try authenticating first
+			except EEException:  # If that still fails, try authenticating first.
 				ee.Authenticate()
 				ee.Initialize()
 
@@ -269,7 +267,7 @@ def last_task_status(self, new_status: Dict[str, str]) -> None:
 		Sets the value of the private variable "_last_task_status" to a specified value. Realistically, this shouldn't
 		be used as the value should only be set from within the object, but it's here in case it's needed.
 
-		:param new_status: Updated status
+		:param new_status: Status to update the _last_task_status to.
 		:type new_status: Dict[str, str]
 		:return:  None
 		"""
@@ -284,7 +282,6 @@ def export(self,
 				**export_kwargs) -> None:
 		"""
 		Handles the exporting of an image
-
 		:param image: Image for export
 		:type image: ee.image.Image
 		:param filename_suffix: The unique identifier used internally to identify images.
@@ -300,13 +297,13 @@ def export(self,
 
 		# If "image" does not have a clip attribute, the error message is not very helpful. This allows for a custom error message:
 		if not isinstance(image, ee.image.Image):
-			raise ValueError("Invalid image provided for export")
+			raise ValueError("Invalid image provided for export.")
 
 		if export_type.lower() == "drive" and (drive_root_folder is None or not os.path.exists(drive_root_folder)):
 			raise NotADirectoryError("The provided path for the Google Drive export folder is not a valid directory but"
 										" Drive export was specified. Either change the export type to use Google Cloud"
 										" and set that up properly (with a bucket, etc), or set the drive_root_folder"
-										" to a valid folder")
+										" to a valid folder.")
 		elif export_type.lower() == "drive":
 			self.drive_root_folder = drive_root_folder
 
@@ -330,7 +327,7 @@ def export(self,
 		if isinstance(clip, ee.geometry.Geometry):
 			ee_kwargs["region"] = clip
 		elif clip:
-			raise ValueError("Invalid geometry provided for export")
+			raise ValueError("Invalid geometry provided for export.")
 
 		# override any of these defaults with anything else provided
 		ee_kwargs.update(export_kwargs)
@@ -347,7 +344,7 @@ def export(self,
 
 		# export_type is not valid
 		else:
-			raise ValueError("Invalid value for export_type. Did you mean drive or cloud?")
+			raise ValueError("Invalid value for export_type. Did you mean 'drive' or 'cloud'?")
 
 		self.task.start()
 
@@ -357,15 +354,15 @@ def export(self,
 
 	def download_results(self, download_location: Union[str, Path], callback: Optional[str] = None, drive_wait: int = 15) -> None:
 		"""
-
-		:param download_location: The directory where the results should be downloaded to
+		:param download_location: The directory where the results should be downloaded to. Expects a string path or a Pathlib Path object.
 		:type download_location: Union[str, Path]
-		:param callback: The callback function called once the image is downloaded
+		:param callback: The callback function is called once the image has been downloaded.
 		:type callback: Optional[str]
+		:param drive_wait: The amount of time in seconds to wait to allow for files that Earth Engine reports have been exported to actually populate. Default is 15 seconds.
+		:type drive_wait: int
 		:return: None
 		"""
-		# need an event loop that checks self.task.status(), which
-		# will get the current state of the task
+		# Need an event loop that checks self.task.status(), which will get the current state of the task.
 
 		# state options
 		# == "CANCELLED", "CANCEL_REQUESTED", "COMPLETED",
@@ -416,14 +413,13 @@ def zonal_stats(self,
 		:param stats:
 		:type stats: Tuple[str, ...]
 		:param report_threshold: After how many iterations should it print out the feature number it's on. Defaults to 1000.
-			Set to None to disable
+			Set to None to disable.
 		:type report_threshold: int
-		:param write_batch_size: How many zones should we store up before writing to the disk? Defaults to 2000
+		:param write_batch_size: How many zones should we store up before writing to the disk? Defaults to 2000.
 		:type write_batch_size: int
 		:param use_points:
 		:type use_points: bool
 		:return: None
-
 		"""
 
 		zonal.zonal_stats(polygons,
@@ -440,7 +436,7 @@ def _check_task_status(self) -> Dict[str, Union[Dict[str, str], bool]]:
 		"""
 		Updates the status is it needs to be changed
 
-		:return: Returns a dictionary of the most up-to-date status and whether it was changed
+		:return: Returns a dictionary of the most up-to-date status and whether that status was changed
 		:rtype: Dict[str, Union[Dict[str, str], bool]]
 		"""
 
diff --git a/eedl/merge.py b/eedl/merge.py
index 6870473..c6c4582 100644
--- a/eedl/merge.py
+++ b/eedl/merge.py
@@ -15,7 +15,6 @@ def merge_outputs(file_mapping,
 	"""
 	Makes output zonal stats files into a data frame and adds a datetime field. Merges all inputs into one DF, and
 	can optionally insert into a sqlite database.
-
 	:param file_mapping: A set of tuples with a path to a file and a time value (string or datetime) to associate with it.
 	:type file_mapping:
 	:param date_field: Defaults to "et_date".
@@ -37,7 +36,7 @@ def merge_outputs(file_mapping,
 		df.loc[:, date_field] = time_value
 		dfs.append(df)
 
-	# merge all the data frames together
+	# Merge all the data frames together
 	final_df = pandas.concat(dfs)
 	final_df.reset_index(inplace=True)
 
@@ -50,16 +49,16 @@ def merge_outputs(file_mapping,
 
 def plot_merged(df: pandas.DataFrame, et_field: str, date_field: str = "et_date", uniqueid: str = "UniqueID") -> so.Plot:
 	"""
-
-	:param df: Data source for the plot
+	Creates a seaborn plot of the
+	:param df: Data source for the plot.
 	:type df: pandas.DataFrame
-	:param et_field: Name of the variable on the x-axis
+	:param et_field: Name of the variable on the x-axis.
 	:type et_field: str
-	:param date_field: Name of the variable on the y-axis. Default is "et_date"
+	:param date_field: Name of the variable on the y-axis. Default is "et_date".
 	:type date_field: str
-	:param uniqueid: Defines additional data subsets that transforms should operate on independently. Default is "UniqueID"
+	:param uniqueid: Defines additional data subsets that transforms should operate on independently. Default is "UniqueID".
 	:type uniqueid: str
-	:return: Returns a seaborn object plot
+	:return: Returns a seaborn object plot.
 	:rtype: so.Plot
 	"""
 	return (
diff --git a/eedl/mosaic_rasters.py b/eedl/mosaic_rasters.py
index 7cb7f46..170c2f7 100644
--- a/eedl/mosaic_rasters.py
+++ b/eedl/mosaic_rasters.py
@@ -6,18 +6,18 @@
 from osgeo import gdal
 
 
-def mosaic_folder(folder_path: Union[str, Path], output_path: Union[str, Path], prefix: str = "") -> None:
+def mosaic_folder(folder_path: Union[str, Path], output_path: Union[str, Path], prefix: Union[str, bytes] = "") -> None:
 	"""
 
-	:param folder_path: Location of the folder
+	:param folder_path: Location of the folder.
 	:type folder_path: Union[str, Path]
-	:param output_path: Output destination
+	:param output_path: Output destination.
 	:type output_path: Union[str, Path]
 	:param prefix: Used to find the files of interest.
 	:type prefix: Str
 	:return: None
 	"""
-	tifs = [os.path.join(folder_path, filename) for filename in os.listdir(folder_path) if filename.endswith(".tif") and filename.startswith(prefix)]
+	tifs: Sequence[str] = [str(os.path.join(folder_path, filename)) for filename in os.listdir(folder_path) if filename.endswith(bytes(".tif")) and filename.startswith(prefix)]
 	mosaic_rasters(tifs, output_path)
 
 
@@ -43,11 +43,11 @@ def mosaic_rasters(raster_paths: Sequence[Union[str, Path]],
 	vrt_options = gdal.BuildVRTOptions(resampleAlg='nearest', resolution="highest")
 	my_vrt = gdal.BuildVRT(vrt_path, raster_paths, options=vrt_options)
 	# my_vrt = None
-	my_vrt.FlushCache()  # write the VRT out
+	my_vrt.FlushCache()  # Write the VRT out
 	print(f"VRT at {vrt_path}")
 
-	# now let's export it to the output_path as a geotiff
-	driver = gdal.GetDriverByName("GTIFF")  # we'll use VRT driver.CreateCopy
+	# Now let's export it to the output_path as a geotiff
+	driver = gdal.GetDriverByName("GTIFF")  # We'll use VRT driver.CreateCopy
 	vrt_data = gdal.Open(vrt_path)
 	output = driver.CreateCopy(output_path, vrt_data, 0, ["COMPRESS=DEFLATE", ])
 	output.FlushCache()
diff --git a/eedl/zonal.py b/eedl/zonal.py
index a910343..c74e824 100644
--- a/eedl/zonal.py
+++ b/eedl/zonal.py
@@ -1,7 +1,7 @@
 import csv
 import os
 from pathlib import Path
-from typing import Dict, Iterable, Union
+from typing import Dict, Iterable, Optional, Union
 
 import fiona
 import rasterstats
@@ -19,7 +19,7 @@ def _get_fiona_args(polygon_path: Union[str, Path]) -> Dict[str, Union[str, Path
 	"""
 
 	parts = os.path.split(polygon_path)
-	# if the folder name ends with .gdb and the "filename" doesn't have an extension, assume it's an FGDB
+	# If the folder name ends with .gdb and the "filename" doesn't have an extension, assume it's an FGDB
 	if (parts[0].endswith(".gdb") or parts[0].endswith(".gpkg")) and "." not in parts[1]:
 		return {'fp': parts[0], 'layer': parts[1]}
 	else:
@@ -40,20 +40,20 @@ def zonal_stats(features: Union[str, Path],
 	#  automatically align them and we just get bad output.
 
 	"""
-
-	:param features: Location to the features
+	If the raster and the polygons are not in the CRS, this function will produce bad output.
+	:param features: Location to the features.
 	:type features: Union[str, Path]
-	:param raster: Location of the raster
+	:param raster: Location of the raster.
 	:type raster: Union[str, Path, None]
-	:param output_folder: Output destination
+	:param output_folder: Output destination.
 	:type output_folder: Union[str, Path, None]
-	:param filename: Name of the file
+	:param filename: Name of the file.
 	:type filename: Str
-	:param keep_fields: Fields that will be used
+	:param keep_fields: Fields that will be used.
 	:type keep_fields: Iterable[str]
 	:param stats: The various statistical measurements to be computed.
 	:type stats: Iterable[str]
-	:param report_threshold: The number of iterations before it prints out the feature number it's on. Default is 1000. Set to None to disable
+	:param report_threshold: The number of iterations before it prints out the feature number it's on. Default is 1000. Set to None to disable.
 	:type report_threshold: Int
 	:param write_batch_size: The number of zones that should be stored up before writing to disk.
 	:type write_batch_size: Int
@@ -65,7 +65,7 @@ def zonal_stats(features: Union[str, Path],
 		when use_points is True. Additionally, when this is True, the `stats` argument to this function is ignored
 		as only a single value will be extracted as the attribute `value` in the output CSV. Default is False.
 	:type use_points: Bool
-	:param kwargs: Passed through to rasterstats
+	:param kwargs: Passed through to rasterstats.
 	:return:
 	:rtype: Union[str, Path, None]
 	"""
@@ -79,7 +79,7 @@ def zonal_stats(features: Union[str, Path],
 	main_file_path = kwargs['fp']
 	del kwargs['fp']
 
-	output_filepath: Union[str, None] = None
+	output_filepath: Optional[str] = None
 
 	with fiona.open(main_file_path, **kwargs) as feats_open:
 
@@ -129,48 +129,8 @@ def zonal_stats(features: Union[str, Path],
 				if report_threshold and i % report_threshold == 0:
 					print(i)
 
-			if len(results) > 0:  # Clear out any remaining items at the end
+			if results:  # Clear out any remaining items at the end
 				writer.writerows(results)
 				print(i)
 
 	return output_filepath
-
-
-def run_data_2018_baseline() -> None:
-	"""
-
-
-	:return: None
-	"""
-	datasets = [
-		# dict(
-		# name="cv_water_balance",
-		# raster_folder=r"D:\ET_Summers\ee_exports_water_balance\et_exports_sseboper",
-		# liq=r"C:\Users\dsx\Downloads\drought_liq_2018.gdb\liq_cv_2018_3310",
-		# output_folder=r"D:\ET_Summers\ee_exports_water_balance\et_exports_sseboper\2018_baseline"
-		# ),
-		dict(
-			name="non_cv_water_balance",
-			raster_folder=r"D:\ET_Summers\ee_exports_water_balance_non_cv\et_exports_sseboper",
-			liq=r"C:\Users\dsx\Downloads\drought_liq_2018.gdb\liq_non_cv_2018_3310",
-			output_folder=r"D:\ET_Summers\ee_exports_water_balance_non_cv\et_exports_sseboper\2018_baseline"
-		)
-
-	]
-
-	skips = [r"mean_et_2022-2022-05-01--2022-08-31__water_balance_may_aug_mean_mosaic.tif"]
-
-	for dataset in datasets:
-		liq = dataset["liq"]
-		raster_folder = dataset["raster_folder"]
-		output_folder = dataset["output_folder"]
-		# was going to do this differently, but leaving it alone
-		rasters = [item for item in os.listdir(raster_folder) if item.endswith(".tif") and item not in skips]
-		rasters_processing = [os.path.join(raster_folder, item) for item in rasters]
-
-		print(liq)
-		print(rasters_processing)
-		for raster in rasters_processing:
-			print(raster)
-			output_name = os.path.splitext(os.path.split(raster)[1])[0]
-			zonal_stats(liq, raster, output_folder, output_name)

From 7a51f7c84a7a50300a7e7cd1a8f4c1634440f2c5 Mon Sep 17 00:00:00 2001
From: Adam Crawford <adamcrawford253@gmail.com>
Date: Wed, 30 Aug 2023 13:21:37 -0700
Subject: [PATCH 2/3] Fixed mypy issues.

---
 eedl/image.py          | 2 +-
 eedl/mosaic_rasters.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/eedl/image.py b/eedl/image.py
index 60854ed..91134c8 100644
--- a/eedl/image.py
+++ b/eedl/image.py
@@ -38,7 +38,7 @@ def _get_fiona_args(polygon_path: Union[str, Path]) -> Dict[str, Union[str, Path
 		return {'fp': polygon_path}
 
 
-def download_images_in_folder(source_location: Union[str, Path], download_location: Union[str, Path], prefix: Union[str, bytes]) -> None:
+def download_images_in_folder(source_location: Union[str, Path], download_location: Union[str, Path], prefix: str) -> None:
 	"""
 	Handles pulling data from Google Drive over to a local location, filtering by a filename prefix and folder
 
diff --git a/eedl/mosaic_rasters.py b/eedl/mosaic_rasters.py
index 170c2f7..21ac238 100644
--- a/eedl/mosaic_rasters.py
+++ b/eedl/mosaic_rasters.py
@@ -6,7 +6,7 @@
 from osgeo import gdal
 
 
-def mosaic_folder(folder_path: Union[str, Path], output_path: Union[str, Path], prefix: Union[str, bytes] = "") -> None:
+def mosaic_folder(folder_path: Union[str, Path], output_path: Union[str, Path], prefix: str = "") -> None:
 	"""
 
 	:param folder_path: Location of the folder.
@@ -17,7 +17,7 @@ def mosaic_folder(folder_path: Union[str, Path], output_path: Union[str, Path],
 	:type prefix: Str
 	:return: None
 	"""
-	tifs: Sequence[str] = [str(os.path.join(folder_path, filename)) for filename in os.listdir(folder_path) if filename.endswith(bytes(".tif")) and filename.startswith(prefix)]
+	tifs = [os.path.join(folder_path, filename) for filename in os.listdir(folder_path) if filename.endswith(".tif") and filename.startswith(prefix)]
 	mosaic_rasters(tifs, output_path)
 
 

From d538bc2fd6748377609c611d1d06b0571d80b775 Mon Sep 17 00:00:00 2001
From: Nick Santos <nickrsan@users.noreply.github.com>
Date: Tue, 12 Sep 2023 15:51:12 -0700
Subject: [PATCH 3/3] Fixing linting errors in PR

---
 eedl/helpers.py                              |  2 +-
 eedl/image.py                                |  2 +-
 eedl/zonal.py                                |  4 +--
 lint.bat                                     |  4 +++
 tests/data/_ee_export_test_image.tif.aux.xml | 26 ++++++++++++++++++++
 5 files changed, 34 insertions(+), 4 deletions(-)
 create mode 100644 lint.bat
 create mode 100644 tests/data/_ee_export_test_image.tif.aux.xml

diff --git a/eedl/helpers.py b/eedl/helpers.py
index d5ba419..9493a48 100644
--- a/eedl/helpers.py
+++ b/eedl/helpers.py
@@ -85,7 +85,7 @@ def _single_item_extract(self, image, task_registry, zonal_features, aoi_attr, e
 		export_image.zonal_inject_constants = zonal_inject_constants
 
 		filename_suffix = f"{aoi_attr}_{image_date}"
-		if self.skip_existing and export_image.check_mosaic_exists(aoi_download_folder, self.export_folder, f"{filename_description}_{filename_suffix}"):
+		if self.skip_existing and export_image.check_mosaic_exists(aoi_download_folder, self.export_folder, f"{self.filename_description}_{filename_suffix}"):
 			print(f"Image {filename_suffix} exists and skip_existing=True. Skipping")
 			return
 
diff --git a/eedl/image.py b/eedl/image.py
index 2e6226d..3de5c67 100644
--- a/eedl/image.py
+++ b/eedl/image.py
@@ -35,6 +35,7 @@ class EEExportDict(TypedDict):
 
 )
 
+
 def download_images_in_folder(source_location: Union[str, Path], download_location: Union[str, Path], prefix: str) -> None:
 	"""
 	Handles pulling data from Google Drive over to a local location, filtering by a filename prefix and folder
@@ -426,7 +427,6 @@ def export(self,
 		else:
 			raise ValueError("Invalid value for export_type. Did you mean \"drive\" or \"cloud\"?")
 
-
 		self.task.start()
 
 		self.export_type = export_type
diff --git a/eedl/zonal.py b/eedl/zonal.py
index 25f6b9c..5c55d58 100644
--- a/eedl/zonal.py
+++ b/eedl/zonal.py
@@ -1,7 +1,7 @@
 import csv
 import os
 from pathlib import Path
-from typing import Dict, Iterable, Optional, Union
+from typing import Iterable, Optional, Union
 
 
 import fiona
@@ -62,7 +62,7 @@ def zonal_stats(features: Union[str, Path, fiona.Collection],
 	# Note the use of gen_zonal_stats, which uses a generator. That should mean that until we coerce it to a list on the
 	# next line, each item isn't evaluated, which should prevent us from needing to store a geojson representation of
 	# all the polygons at one time since we'll strip it off (it'd be bad to try to keep all of it
-  
+
 	output_filepath: Optional[str] = None
 
 	if not (isinstance(features, fiona.Collection) or hasattr(features, "__iter__")):  # if features isn't already a fiona collection instance or something else we can iterate over
diff --git a/lint.bat b/lint.bat
new file mode 100644
index 0000000..6869b81
--- /dev/null
+++ b/lint.bat
@@ -0,0 +1,4 @@
+flake8 eedl --ignore="W191,E501,E128,E124,E126,E127"
+flake8 tests --ignore="W191,E501,E128,E124,E126,E127"
+flake8 examples --ignore="W191,E501,E128,E124,E126,E127"
+mypy eedl --enable-incomplete-feature=Unpack
\ No newline at end of file
diff --git a/tests/data/_ee_export_test_image.tif.aux.xml b/tests/data/_ee_export_test_image.tif.aux.xml
new file mode 100644
index 0000000..6fbb784
--- /dev/null
+++ b/tests/data/_ee_export_test_image.tif.aux.xml
@@ -0,0 +1,26 @@
+<PAMDataset>
+  <PAMRasterBand band="1">
+    <Description>B8</Description>
+    <Histograms>
+      <HistItem>
+        <HistMin>0</HistMin>
+        <HistMax>9560</HistMax>
+        <BucketCount>256</BucketCount>
+        <IncludeOutOfRange>1</IncludeOutOfRange>
+        <Approximate>0</Approximate>
+        <HistCounts>37336|0|0|1|4|0|5|15|21|32|47|73|86|99|126|115|152|115|147|174|170|174|217|195|233|242|239|221|255|253|248|272|273|352|327|314|342|317|364|388|387|390|434|383|421|413|487|454|459|528|546|548|588|651|654|635|687|715|692|752|721|793|844|852|776|869|805|866|828|943|878|869|912|923|889|957|880|944|911|906|876|907|773|850|818|810|741|788|680|663|619|632|644|577|568|556|551|516|501|460|492|419|418|401|357|396|307|308|304|257|303|265|288|219|228|249|242|227|180|205|188|172|186|158|175|156|156|188|150|141|161|151|126|142|126|102|119|123|107|125|140|91|132|97|122|89|102|98|109|86|91|84|89|93|81|82|78|59|73|77|82|62|71|63|59|54|61|58|57|70|47|56|48|69|48|46|52|50|52|48|39|46|38|25|32|26|38|28|26|33|12|24|23|19|27|27|25|23|24|20|12|23|13|13|10|8|5|7|4|7|5|3|3|1|1|0|0|3|2|1|1|0|0|0|2|1|0|1|0|1|1|0|0|1|0|0|0|0|0|0|0|0|0|1|0|2|1|1|1|1|0|0|1|0|0|1</HistCounts>
+      </HistItem>
+    </Histograms>
+    <Metadata>
+      <MDI key="STATISTICS_COVARIANCES">3063315.745456948</MDI>
+      <MDI key="STATISTICS_MAXIMUM">9560</MDI>
+      <MDI key="STATISTICS_MEAN">1833.781199187</MDI>
+      <MDI key="STATISTICS_MEDIAN">1912.000000</MDI>
+      <MDI key="STATISTICS_MINIMUM">0</MDI>
+      <MDI key="STATISTICS_SKIPFACTORX">1</MDI>
+      <MDI key="STATISTICS_SKIPFACTORY">1</MDI>
+      <MDI key="STATISTICS_STDDEV">1750.2241611289</MDI>
+      <MDI key="STATISTICS_VALID_PERCENT">100</MDI>
+    </Metadata>
+  </PAMRasterBand>
+</PAMDataset>