From f9e88132c213f336adc8e44b18005382b34fdce1 Mon Sep 17 00:00:00 2001
From: Patrick Kalita <pkalita@lbl.gov>
Date: Thu, 26 Sep 2024 13:35:16 -0700
Subject: [PATCH 1/5] Add macOS files to gitignore

---
 .gitignore | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/.gitignore b/.gitignore
index eff8c02..4ebdf40 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+# === PYTHON ===
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
@@ -156,3 +157,32 @@ cython_debug/
 
 # PyCharm
 .idea/
+
+# === MACOS ===
+# General
+.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+
+
+# Thumbnails
+._*
+
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk

From 2a520055906f01ad9e6eb5df895d1e267bf98ca2 Mon Sep 17 00:00:00 2001
From: Patrick Kalita <pkalita@lbl.gov>
Date: Thu, 26 Sep 2024 13:36:39 -0700
Subject: [PATCH 2/5] Move duplicated code to static methods; don't reread
 CSVs; use Pythonic names

---
 src/nmdc_geoloc_tools/geotools.py | 179 ++++++++++++++----------------
 1 file changed, 86 insertions(+), 93 deletions(-)

diff --git a/src/nmdc_geoloc_tools/geotools.py b/src/nmdc_geoloc_tools/geotools.py
index 8ae4ec7..8c6f7aa 100644
--- a/src/nmdc_geoloc_tools/geotools.py
+++ b/src/nmdc_geoloc_tools/geotools.py
@@ -1,9 +1,9 @@
 import csv
 import json
-import os
 import xml.etree.ElementTree as ET
-from dataclasses import dataclass
 from datetime import datetime
+from functools import cache
+from pathlib import Path
 from typing import Tuple
 
 import requests
@@ -11,30 +11,59 @@
 LATLON = Tuple[float, float]
 
 
-@dataclass
+@cache
+def read_data_csv(filename: str) -> list:
+    with open(Path(__file__).parent / "data" / filename) as file:
+        mapping = csv.reader(file)
+        return list(mapping)
+
+
 class GeoEngine:
     """
     Wraps ORNL Identify to retrieve elevation data in meters, soil types, and land use.
     """
 
-    def get_elevation(self, latlon: LATLON) -> float:
-        """
-        Accepts decimal degrees latitude and longitude as an array (array[latitude, longitude]) and returns the elevation value in meters as a float.
-        """
+    @property
+    def zobler_soil_type_lookup(self) -> list:
+        return read_data_csv("zobler_540_MixS_lookup.csv")
+
+    @property
+    def envo_landuse_systems_lookup(self) -> list:
+        return read_data_csv("ENVO_Landuse_Systems_lookup.csv")
+
+    @property
+    def envo_landuse_lookup(self) -> list:
+        return read_data_csv("ENVO_Landuse_lookup.csv")
+
+    @staticmethod
+    def _validate_latlon(latlon: LATLON):
         lat = latlon[0]
         lon = latlon[1]
         if not -90 <= lat <= 90:
-            raise ValueError("Invalid Latitude: " + str(lat))
+            raise ValueError(f"Invalid Latitude: {lat}")
         if not -180 <= lon <= 180:
-            raise ValueError("Invalid Longitude: " + str(lon))
-        # Generate bounding box used in query from lat & lon. 0.008333333333333 comes from maplayer resolution provided by ORNL
-        remX = (lon + 180) % 0.008333333333333
-        remY = (lat + 90) % 0.008333333333333
-        minX = lon - remX
-        maxX = lon - remX + 0.008333333333333
-        minY = lat - remY
-        maxY = lat - remY + 0.008333333333333
-        BBOX = str(minX) + "," + str(minY) + "," + str(maxX) + "," + str(maxY)
+            raise ValueError(f"Invalid Longitude: {lon}")
+        return lat, lon
+
+    @staticmethod
+    def _bbox(lat: float, lon: float, resolution: float) -> str:
+        rem_x = (lon + 180) % resolution
+        rem_y = (lat + 90) % resolution
+        min_x = lon - rem_x
+        max_x = lon - rem_x + resolution
+        min_y = lat - rem_y
+        max_y = lat - rem_y + resolution
+        return f"{min_x},{min_y},{max_x},{max_y}"
+
+    def get_elevation(self, latlon: LATLON) -> float:
+        """
+        Accepts decimal degrees latitude and longitude as an array (array[latitude, longitude]) and
+        returns the elevation value in meters as a float.
+        """
+        lat, lon = self._validate_latlon(latlon)
+        # Generate bounding box used in query from lat & lon. 0.008333333333333 comes from maplayer
+        # resolution provided by ORNL
+        bbox = self._bbox(lat, lon, 0.008333333333333)
         elevparams = {
             "originator": "QAQCIdentify",
             "SERVICE": "WMS",
@@ -48,7 +77,7 @@ def get_elevation(self, latlon: LATLON) -> float:
             "X": "2",
             "Y": "2",
             "INFO_FORMAT": "text/xml",
-            "BBOX": BBOX,
+            "BBOX": bbox,
         }
         response = requests.get(
             "https://webmap.ornl.gov/ogcbroker/wms", params=elevparams
@@ -65,32 +94,15 @@ def get_elevation(self, latlon: LATLON) -> float:
 
     def get_fao_soil_type(self, latlon: LATLON) -> str:
         """
-        Accepts decimal degrees latitude and longitude as an array (array[latitude, longitude]) and returns the soil type as a string.
+        Accepts decimal degrees latitude and longitude as an array (array[latitude, longitude]) and
+        returns the soil type as a string.
         """
-        lat = latlon[0]
-        lon = latlon[1]
-        if not -90 <= lat <= 90:
-            raise ValueError("Invalid Latitude: " + str(lat))
-        if not -180 <= lon <= 180:
-            raise ValueError("Invalid Longitude: " + str(lon))
-        # Generate bounding box used in query from lat & lon. 0.5 comes from maplayer resolution provided by ORNL
-        remX = (lon + 180) % 0.5
-        remY = (lat + 90) % 0.5
-        minX = lon - remX
-        maxX = lon - remX + 0.5
-        minY = lat - remY
-        maxY = lat - remY + 0.5
-
-        # Read in the mapping file note need to get this path right
-        with open(
-            os.path.dirname(__file__) + "/data/zobler_540_MixS_lookup.csv"
-        ) as mapper:
-            mapping = csv.reader(mapper)
-            map = list(mapping)
+        lat, lon = self._validate_latlon(latlon)
+        # Generate bounding box used in query from lat & lon. 0.5 comes from maplayer resolution
+        # provided by ORNL
+        bbox = self._bbox(lat, lon, 0.5)
 
-        BBoxstring = str(minX) + "," + str(minY) + "," + str(maxX) + "," + str(maxY)
-
-        faosoilparams = {
+        fao_soil_params = {
             "INFO_FORMAT": "text/xml",
             "WIDTH": "5",
             "originator": "QAQCIdentify",
@@ -98,7 +110,7 @@ def get_fao_soil_type(self, latlon: LATLON) -> str:
             "LAYERS": "540_1_band1",
             "REQUEST": "GetFeatureInfo",
             "SRS": "EPSG:4326",
-            "BBOX": BBoxstring,
+            "BBOX": bbox,
             "VERSION": "1.1.1",
             "X": "2",
             "Y": "2",
@@ -107,17 +119,17 @@ def get_fao_soil_type(self, latlon: LATLON) -> str:
             "map": "/sdat/config/mapfile//540/540_1_wms.map",
         }
         response = requests.get(
-            "https://webmap.ornl.gov/cgi-bin/mapserv", params=faosoilparams
+            "https://webmap.ornl.gov/cgi-bin/mapserv", params=fao_soil_params
         )
         if response.status_code == 200:
-            faosoilxml = response.content.decode("utf-8")
-            if faosoilxml == "":
+            fao_soil_xml = response.content.decode("utf-8")
+            if fao_soil_xml == "":
                 raise ValueError("Empty string returned")
-            root = ET.fromstring(faosoilxml)
+            root = ET.fromstring(fao_soil_xml)
             results = root[5].text
             results = results.split(":")
             results = results[1].strip()
-            for res in map:
+            for res in self.zobler_soil_type_lookup:
                 if res[0] == results:
                     results = res[1]
                     return results
@@ -127,73 +139,54 @@ def get_fao_soil_type(self, latlon: LATLON) -> str:
 
     def get_landuse_dates(self, latlon: LATLON) -> []:
         """
-        Accepts decimal degrees latitude and longitude as an array (array[latitude, longitude]) and returns as array of valid dates (YYYY-MM-DD format) for the landuse requests.
+        Accepts decimal degrees latitude and longitude as an array (array[latitude, longitude]) and
+        returns as array of valid dates (YYYY-MM-DD format) for the landuse requests.
         """
-        lat = latlon[0]
-        lon = latlon[1]
-        if not -90 <= lat <= 90:
-            raise ValueError("Invalid Latitude: " + str(lat))
-        if not -180 <= lon <= 180:
-            raise ValueError("Invalid Longitude: " + str(lon))
-        landuseparams = {"latitude": lat, "longitude": lon}
+        lat, lon = self._validate_latlon(latlon)
+        landuse_params = {"latitude": lat, "longitude": lon}
         response = requests.get(
-            "https://modis.ornl.gov/rst/api/v1/MCD12Q1/dates", params=landuseparams
+            "https://modis.ornl.gov/rst/api/v1/MCD12Q1/dates", params=landuse_params
         )
         if response.status_code == 200:
-            landuseDates = response.content.decode("utf-8")
-            if landuseDates == "":
+            landuse_dates = response.content.decode("utf-8")
+            if landuse_dates == "":
                 raise ValueError("No valid Landuse dates returned")
-            data = json.loads(landuseDates)
-            validDates = []
+            data = json.loads(landuse_dates)
+            valid_dates = []
             for date in data["dates"]:
-                validDates.append(date["calendar_date"])
-            return validDates
+                valid_dates.append(date["calendar_date"])
+            return valid_dates
         else:
             raise ApiException(response.status_code)
 
-    def get_landuse(self, latlon: LATLON, startDate, endDate) -> {}:
+    def get_landuse(self, latlon: LATLON, start_date, end_date) -> {}:
         """
-        Accepts decimal degrees latitude and longitude as an array (array[latitude, longitude]), the start date (YYYY-MM-DD), and end date (YYYY-MM-DD) and returns a dictionary containing the land use values for the classification systems for the dates requested.
+        Accepts decimal degrees latitude and longitude as an array (array[latitude, longitude]), the
+        start date (YYYY-MM-DD), and end date (YYYY-MM-DD) and returns a dictionary containing the
+        land use values for the classification systems for the dates requested.
         """
-        lat = latlon[0]
-        lon = latlon[1]
-        if not -90 <= lat <= 90:
-            raise ValueError("Invalid Latitude: " + str(lat))
-        if not -180 <= lon <= 180:
-            raise ValueError("Invalid Longitude: " + str(lon))
+        lat, lon = self._validate_latlon(latlon)
         # function accepts dates in YYYY-MM-DD format, but API requires a unique format (AYYYYDOY)
         date_format = "%Y-%m-%d"
-        start_date_obj = datetime.strptime(startDate, date_format)
-        end_date_obj = datetime.strptime(endDate, date_format)
+        start_date_obj = datetime.strptime(start_date, date_format)
+        end_date_obj = datetime.strptime(end_date, date_format)
 
-        apiStartDate = (
+        api_start_date = (
             "A" + str(start_date_obj.year) + str(start_date_obj.strftime("%j"))
         )
-        apiEndDate = "A" + str(end_date_obj.year) + str(end_date_obj.strftime("%j"))
+        api_end_date = "A" + str(end_date_obj.year) + str(end_date_obj.strftime("%j"))
 
-        landuseparams = {
+        landuse_params = {
             "latitude": lat,
             "longitude": lon,
-            "startDate": apiStartDate,
-            "endDate": apiEndDate,
+            "startDate": api_start_date,
+            "endDate": api_end_date,
             "kmAboveBelow": 0,
             "kmLeftRight": 0,
         }
         response = requests.get(
-            "https://modis.ornl.gov/rst/api/v1/MCD12Q1/subset", params=landuseparams
+            "https://modis.ornl.gov/rst/api/v1/MCD12Q1/subset", params=landuse_params
         )
-        # Retrieve Classification System mapping
-        with open(
-            os.path.dirname(__file__) + "/data/ENVO_Landuse_Systems_lookup.csv"
-        ) as mapper:
-            mapping = csv.reader(mapper)
-            sytems_map = list(mapping)
-        # Retrieve Classification System to ENVO mapping
-        with open(
-            os.path.dirname(__file__) + "/data/ENVO_Landuse_lookup.csv"
-        ) as mapper:
-            mapping = csv.reader(mapper)
-            landuse_map = list(mapping)
         if response.status_code == 200:
             landuse = response.content.decode("utf-8")
             results = {}
@@ -203,10 +196,10 @@ def get_landuse(self, latlon: LATLON, startDate, endDate) -> {}:
             for band in data["subset"]:
                 system = "NONE"
                 band["data"] = list(map(int, band["data"]))
-                for res in sytems_map:
+                for res in self.envo_landuse_systems_lookup:
                     if res[1] == band["band"]:
                         system = res[0]
-                for res in landuse_map:
+                for res in self.envo_landuse_lookup:
                     if res[8] == system and int(res[1]) in band["data"]:
                         envo_term = res[2]
                         if envo_term == "":

From 94dcfbf8d228d44eedc634d2b5053ea614a64db1 Mon Sep 17 00:00:00 2001
From: Patrick Kalita <pkalita@lbl.gov>
Date: Thu, 26 Sep 2024 13:40:09 -0700
Subject: [PATCH 3/5] Use function-based test style

---
 tests/test_geotools.py | 71 ++++++++++++++++++++++++------------------
 1 file changed, 41 insertions(+), 30 deletions(-)

diff --git a/tests/test_geotools.py b/tests/test_geotools.py
index 417585a..975655c 100644
--- a/tests/test_geotools.py
+++ b/tests/test_geotools.py
@@ -3,43 +3,54 @@
 from nmdc_geoloc_tools.geotools import GeoEngine
 
 
-class TestGeotools:
-    ge = GeoEngine()
+@pytest.fixture()
+def engine():
+    return GeoEngine()
 
-    def test_elevation_mount_everest(self):
-        assert int(self.ge.get_elevation((27.9881, 86.9250))) == 8752
 
-    def test_elevation_death_valley(self):
-        assert int(self.ge.get_elevation((36.5322649, -116.9325408))) == -66
+def test_elevation_mount_everest(engine):
+    assert int(engine.get_elevation((27.9881, 86.9250))) == 8752
 
-    def test_elevation_ocean(self):
-        with pytest.raises(ValueError):
-            self.ge.get_elevation((0, 0))
 
-    def test_elevation_bad_coordinates(self):
-        with pytest.raises(ValueError):
-            self.ge.get_elevation((-200, 200))
+def test_elevation_death_valley(engine):
+    assert int(engine.get_elevation((36.5322649, -116.9325408))) == -66
 
-    def test_soil_type_cambisols(self):
-        assert self.ge.get_fao_soil_type((32.95047, -87.393259)) == "Cambisols"
 
-    def test_soil_type_water(self):
-        assert self.ge.get_fao_soil_type((0, 0)) == "Water"
+def test_elevation_ocean(engine):
+    with pytest.raises(ValueError):
+        engine.get_elevation((0, 0))
 
-    def test_soil_type_bad_coordinates(self):
-        with pytest.raises(ValueError):
-            self.ge.get_fao_soil_type((-200, 200))
 
-    def test_landuse_bad_coordinates(self):
-        with pytest.raises(ValueError):
-            self.ge.get_landuse((-200, 200), "2001-01-01", "2002-01-01")
+def test_elevation_bad_coordinates(engine):
+    with pytest.raises(ValueError):
+        engine.get_elevation((-200, 200))
 
-    def test_landuse_date_death_valley(self):
-        dates = self.ge.get_landuse_dates((36.5322649, -116.9325408))
-        assert dates[0] == "2001-01-01"
 
-    def test_landuse_death_valley(self):
-        data = self.ge.get_landuse(
-            (36.5322649, -116.9325408), "2001-01-01", "2002-01-01"
-        )
-        assert data["LCCS1"][0]["envo_term"] == "area of barren land"
+def test_soil_type_cambisols(engine):
+    assert engine.get_fao_soil_type((32.95047, -87.393259)) == "Cambisols"
+
+
+def test_soil_type_water(engine):
+    assert engine.get_fao_soil_type((0, 0)) == "Water"
+
+
+def test_soil_type_bad_coordinates(engine):
+    with pytest.raises(ValueError):
+        engine.get_fao_soil_type((-200, 200))
+
+
+def test_landuse_bad_coordinates(engine):
+    with pytest.raises(ValueError):
+        engine.get_landuse((-200, 200), "2001-01-01", "2002-01-01")
+
+
+def test_landuse_date_death_valley(engine):
+    dates = engine.get_landuse_dates((36.5322649, -116.9325408))
+    assert dates[0] == "2001-01-01"
+
+
+def test_landuse_death_valley(engine):
+    data = engine.get_landuse(
+        (36.5322649, -116.9325408), "2001-01-01", "2002-01-01"
+    )
+    assert data["LCCS1"][0]["envo_term"] == "area of barren land"

From 6911f4ad963e11e6da62b1fc0eba262d82f8dfb6 Mon Sep 17 00:00:00 2001
From: Patrick Kalita <pkalita@lbl.gov>
Date: Thu, 26 Sep 2024 13:41:33 -0700
Subject: [PATCH 4/5] Allow importing GeoEngine from top-level package

---
 src/nmdc_geoloc_tools/__init__.py | 1 +
 tests/test_geotools.py            | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/nmdc_geoloc_tools/__init__.py b/src/nmdc_geoloc_tools/__init__.py
index e69de29..64570b6 100644
--- a/src/nmdc_geoloc_tools/__init__.py
+++ b/src/nmdc_geoloc_tools/__init__.py
@@ -0,0 +1 @@
+from nmdc_geoloc_tools.geotools import GeoEngine as GeoEngine
diff --git a/tests/test_geotools.py b/tests/test_geotools.py
index 975655c..a2c1b5f 100644
--- a/tests/test_geotools.py
+++ b/tests/test_geotools.py
@@ -1,6 +1,6 @@
 import pytest
 
-from nmdc_geoloc_tools.geotools import GeoEngine
+from nmdc_geoloc_tools import GeoEngine
 
 
 @pytest.fixture()

From 3b96e0b1be27559ef1f33072262a5b2e6bd3e72d Mon Sep 17 00:00:00 2001
From: Patrick Kalita <pkalita@lbl.gov>
Date: Thu, 26 Sep 2024 14:05:41 -0700
Subject: [PATCH 5/5] Fix formatting

---
 tests/test_geotools.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/test_geotools.py b/tests/test_geotools.py
index a2c1b5f..4debd3c 100644
--- a/tests/test_geotools.py
+++ b/tests/test_geotools.py
@@ -50,7 +50,5 @@ def test_landuse_date_death_valley(engine):
 
 
 def test_landuse_death_valley(engine):
-    data = engine.get_landuse(
-        (36.5322649, -116.9325408), "2001-01-01", "2002-01-01"
-    )
+    data = engine.get_landuse((36.5322649, -116.9325408), "2001-01-01", "2002-01-01")
     assert data["LCCS1"][0]["envo_term"] == "area of barren land"