Merge pull request #10 from samgdotson/save_local

Save data locally on first execution
samgdotson · Sep 11, 2022 · f98cc73 · f98cc73
2 parents 1780635 + f577e11
commit f98cc73
Show file tree

Hide file tree

Showing 12 changed files with 328 additions and 38 deletions.
diff --git a/.coveragerc b/.coveragerc
@@ -0,0 +1,9 @@
+[run]
+branch = True
+source = nrelpy
+omit =
+    nrelpy/tests/*
+    nrelpy/__init__.py
+    nrelpy/**/__init__.py
+    setup.py
+    nrelpy/version.py
diff --git a/.gitignore b/.gitignore
@@ -8,6 +8,7 @@ __pycache__/
 
 # Distribution / packaging
 .Python
+data/
 build/
 develop-eggs/
 dist/

diff --git a/README.md b/README.md
@@ -46,6 +46,14 @@ df = REP.as_dataframe()
 
 From the top-level `nrelpy` directory, run `pytest`.  
 
+You can also check the testing coverage with
+
+```bash
+pytest --cov-config=.coveragerc --cov=nrelpy
+coverage html
+```
+`coverage html` creates a nicely formatted html page with 
+the entire coverage report. Simply open the `htmlcov/index.html` file in your browser.
 
 ### Contributing
 

diff --git a/nrelpy/atb.py b/nrelpy/atb.py
@@ -1,5 +1,6 @@
 from urllib.error import HTTPError
 import pandas as pd
+from nrelpy.utils.data_io import check_stored_data, save_local
 
 
 def as_dataframe(year, database, verbose=False, **kwargs):
@@ -14,38 +15,45 @@ def as_dataframe(year, database, verbose=False, **kwargs):
         * ATB Transportation (ATBt) accepts: [2020]
     database : string
         The desired ATB dataset. Accepts: 'electricity', 'transportation'.
-    
+
     Returns
     -------
     df : pandas.DataFrame
         The ATB data as a pandas dataframe.
     """
 
-    atb_urls = {'electricity': f'https://oedi-data-lake.s3.amazonaws.com/ATB/electricity/csv/{year}/ATBe.csv',
-                'transportation':f"https://atb-archive.nrel.gov/transportation/{year}/files/{year}_ATB_Data_VehFuels_Download.xlsx"}
+    try:
+        df = check_stored_data(database=database, year=year)
+    except FileNotFoundError:
+        atb_urls = {
+            'electricity': f'https://oedi-data-lake.s3.amazonaws.com/ATB/electricity/csv/{year}/ATBe.csv',
+            'transportation': f"https://atb-archive.nrel.gov/transportation/{year}/files/{year}_ATB_Data_VehFuels_Download.xlsx"}
 
-    url = atb_urls[database]
+        url = atb_urls[database]
 
-    try:
-        print(f'Downloading NREL ATB {database} from {year}')
-        if database == 'electricity':
-            df = pd.read_csv(url, low_memory=False)
-        elif database == 'transportation':
-            df = pd.read_excel(url, sheet_name='Joined Data for Levelized Calc')
-        print('Download Successful.')
-        drop_col = ['Unnamed: 0']
-        if verbose:
-            print(f"Dropping column {drop_col}")
         try:
-            df.drop(columns=drop_col, inplace=True)
-        except KeyError as err:
+            print(f'Downloading NREL ATB {database} from {year}')
+            if database == 'electricity':
+                df = pd.read_csv(url, low_memory=False)
+            elif database == 'transportation':
+                df = pd.read_excel(
+                    url, sheet_name='Joined Data for Levelized Calc')
+            print('Download Successful.')
+            drop_col = ['Unnamed: 0']
             if verbose:
-                print(f'No column {drop_col}.')
-            else:
-                pass
-    except HTTPError as err:
-        fail_str = (f'Failed to download from URL: {url}.')
-        print(err.code, fail_str)
-        raise
+                print(f"Dropping column {drop_col}")
+            try:
+                df.drop(columns=drop_col, inplace=True)
+            except KeyError as err:
+                if verbose:
+                    print(f'No column {drop_col}.')
+                else:
+                    pass
+        except HTTPError as err:
+            fail_str = (f'Failed to download from URL: {url}.')
+            print(err.code, fail_str)
+            raise
+
+        save_local(df, database=database, year=year)
 
     return df
diff --git a/nrelpy/re_potential.py b/nrelpy/re_potential.py
@@ -1,34 +1,40 @@
 from urllib.error import HTTPError
 import pandas as pd
 import warnings
-# warnings.filterwarnings("ignore")
 
 
-def as_dataframe(verbose=False, **kwargs):
+def as_dataframe(url=None, verbose=False, **kwargs):
     """
     This function downloads the specified Annual Technology Baseline Dataset.
     If this data is used in a research publication, users should cite:
 
-    CITE: Lopez, A. et al. (2012). "U.S. Renewable Energy Technical Potentials: 
-    A GIS-Based Analysis." NREL/TP-6A20-51946. Golden, CO: National Renewable 
+    CITE: Lopez, A. et al. (2012). "U.S. Renewable Energy Technical Potentials:
+    A GIS-Based Analysis." NREL/TP-6A20-51946. Golden, CO: National Renewable
     Energy Laboratory.
-    
+
     Returns
     -------
     df : pandas.DataFrame
         The United States Renewable Energy Technical Potential dataset as a pandas dataframe.
     """
-    url = "https://www.nrel.gov/gis/assets/docs/us-re-technical-potential.xlsx"
+    if url:
+        URL = url
+    else:
+        URL = "https://www.nrel.gov/gis/assets/docs/us-re-technical-potential.xlsx"
 
     try:
         print(f'Downloading Renewable Energy Technical Potential')
         if not verbose:
             warnings.simplefilter(action='ignore', category=UserWarning)
-        df = pd.read_excel(url, sheet_name='Data', skiprows=1, index_col='State')
+        df = pd.read_excel(
+            URL,
+            sheet_name='Data',
+            skiprows=1,
+            index_col='State')
         print('Download Successful.')
 
     except HTTPError as err:
-        fail_str = (f'Failed to download from URL: {url}.')
+        fail_str = (f'Failed to download from URL: {URL}.')
         print(err.code, fail_str)
         raise
 

diff --git a/nrelpy/tests/test_data_io.py b/nrelpy/tests/test_data_io.py
@@ -0,0 +1,103 @@
+from nrelpy.utils.data_io import save_local, check_stored_data, DATA_PATH
+from pathlib import Path
+import sys
+import os
+import glob
+import pandas as pd
+
+# set up test data
+data = {'tech': ['nuclear', 'solar', 'wind', 'naturalgas'],
+            'variable_cost': [20, 0, 0, 180],  # $/GWh
+            'fixed_cost': [92, 4, 11, 21],
+            'capital_cost': [5.9, 0.8, 1.4, 1.0],
+            'capacity_GW': [12, 3, 7, 5],
+            'capacity_factor': [0.93, 0.17, 0.33, 0.45],
+            'resentment': [100, 20, 50, 70]
+            }
+tech_df = pd.DataFrame(data)
+
+db = 'electricity'
+yr = 1882
+
+data_path_exists = DATA_PATH.exists()
+
+user_path = DATA_PATH / 'tmp_path'
+user_path.mkdir(exist_ok=True, parents=True)
+
+
+def test_save_local_case1():
+    """
+    This tests the standard use case of `save_local`
+    where a dataframe, database type, and year are passed.
+    """
+    save_local(tech_df, database=db, year=yr)
+    file_name = str(DATA_PATH / f'ATBe_{yr}.pkl')
+    files = glob.glob(file_name)
+    os.remove(file_name)
+    assert len(files) == 1
+    return
+
+def test_save_local_case2():
+    """
+    This tests a standard use case of `save_local`
+    where a dataframe, database type, and year are passed.
+    The dataframe is saved as a CSV instead of pickled.
+    """
+    save_local(tech_df, database=db, year=yr, pickle=False)
+    file_name = str(DATA_PATH / f'ATBe_{yr}.csv')
+    files = glob.glob(file_name)
+    os.remove(file_name)
+    assert len(files) == 1
+    return
+
+
+def test_save_local_case3():
+    """
+    This tests a standard use case of `save_local`
+    where a dataframe, database type, and year are passed.
+    The dataframe is saved to a user specified directory.
+    """
+    save_local(tech_df, database=db, year=yr, pickle=True, path=user_path)
+    file_name = str(user_path / f'ATBe_{yr}.pkl')
+    files = glob.glob(file_name)
+    os.remove(file_name)
+    assert len(files) == 1
+    return
+
+
+def test_check_stored_data_case1():
+    """
+    This tests a standard use case of `check_stored_data`
+    where a database type and a year are passed.
+    """
+    save_local(tech_df, database=db, year=yr)
+    df = check_stored_data(database=db, year=yr)
+    file_name_yr = str(DATA_PATH / f'ATBe_{yr}.pkl')
+    os.remove(file_name_yr)
+    assert df.equals(tech_df)
+    return
+
+
+def test_check_stored_data_case2():
+    """
+    This tests a standard use case of `check_stored_data`
+    where a database type and a year are passed.
+    """
+    save_local(tech_df, database=db)
+    df = check_stored_data(database=db)
+    file_name_no_yr = str(DATA_PATH / f'ATBe.pkl')
+    os.remove(file_name_no_yr)
+    assert df.equals(tech_df)
+    return
+
+
+def test_check_stored_data_case3():
+    """
+    This tests saving and reading the data in a csv format.
+    """
+    save_local(tech_df, database=db, pickle=False)
+    df = check_stored_data(database=db, pickled=False)
+    file_name_no_yr = str(DATA_PATH / f'ATBe.csv')
+    os.remove(file_name_no_yr)
+    assert df.equals(tech_df)
+    return
diff --git a/nrelpy/tests/test_re_potential.py b/nrelpy/tests/test_re_potential.py
@@ -1,8 +1,42 @@
 from nrelpy.re_potential import *
+import pytest
+import pandas as pd
 
 
-def test_as_dataframe():
+def test_as_dataframe_standard():
+    """
+    This tests the base case where no
+    arguments are passed.
+    """
 
     df = as_dataframe()
 
+    assert type(df) == pd.DataFrame
+
+    return
+
+
+def test_as_dataframe_bad_url():
+    """
+    This tests the base case where a bad
+    url is passed.
+    """
+
+    bad_url = "https://www.nrel.gov/gis/assets/docs/us-re-technical-potential"
+    with pytest.raises(HTTPError) as e:
+        df = as_dataframe(url=bad_url)
+
+    return
+
+
+@pytest.mark.filterwarnings("ignore")
+def test_as_dataframe_verbose():
+    """
+    This tests the verbosity setting of 
+    """
+
+    df = as_dataframe(verbose=True)
+
+    assert type(df) == pd.DataFrame
+
     return
diff --git a/nrelpy/utils/__init__.py b/nrelpy/utils/__init__.py
-Original file line number
+Diff line change
@@ Expand Up / @@ -8,6 +8,7 @@ __pycache__/ @@
     # Distribution / packaging
     .Python
+    data/
     build/
     develop-eggs/
     dist/
@@ Expand Down @@