Skip to content

Commit

Permalink
Merge pull request #10 from samgdotson/save_local
Browse files Browse the repository at this point in the history
Save data locally on first execution
  • Loading branch information
samgdotson authored Sep 11, 2022
2 parents 1780635 + f577e11 commit f98cc73
Show file tree
Hide file tree
Showing 12 changed files with 328 additions and 38 deletions.
9 changes: 9 additions & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[run]
branch = True
source = nrelpy
omit =
nrelpy/tests/*
nrelpy/__init__.py
nrelpy/**/__init__.py
setup.py
nrelpy/version.py
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ __pycache__/

# Distribution / packaging
.Python
data/
build/
develop-eggs/
dist/
Expand Down
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,14 @@ df = REP.as_dataframe()

From the top-level `nrelpy` directory, run `pytest`.

You can also check the testing coverage with

```bash
pytest --cov-config=.coveragerc --cov=nrelpy
coverage html
```
`coverage html` creates a nicely formatted html page with
the entire coverage report. Simply open the `htmlcov/index.html` file in your browser.

### Contributing

Expand Down
54 changes: 31 additions & 23 deletions nrelpy/atb.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from urllib.error import HTTPError
import pandas as pd
from nrelpy.utils.data_io import check_stored_data, save_local


def as_dataframe(year, database, verbose=False, **kwargs):
Expand All @@ -14,38 +15,45 @@ def as_dataframe(year, database, verbose=False, **kwargs):
* ATB Transportation (ATBt) accepts: [2020]
database : string
The desired ATB dataset. Accepts: 'electricity', 'transportation'.
Returns
-------
df : pandas.DataFrame
The ATB data as a pandas dataframe.
"""

atb_urls = {'electricity': f'https://oedi-data-lake.s3.amazonaws.com/ATB/electricity/csv/{year}/ATBe.csv',
'transportation':f"https://atb-archive.nrel.gov/transportation/{year}/files/{year}_ATB_Data_VehFuels_Download.xlsx"}
try:
df = check_stored_data(database=database, year=year)
except FileNotFoundError:
atb_urls = {
'electricity': f'https://oedi-data-lake.s3.amazonaws.com/ATB/electricity/csv/{year}/ATBe.csv',
'transportation': f"https://atb-archive.nrel.gov/transportation/{year}/files/{year}_ATB_Data_VehFuels_Download.xlsx"}

url = atb_urls[database]
url = atb_urls[database]

try:
print(f'Downloading NREL ATB {database} from {year}')
if database == 'electricity':
df = pd.read_csv(url, low_memory=False)
elif database == 'transportation':
df = pd.read_excel(url, sheet_name='Joined Data for Levelized Calc')
print('Download Successful.')
drop_col = ['Unnamed: 0']
if verbose:
print(f"Dropping column {drop_col}")
try:
df.drop(columns=drop_col, inplace=True)
except KeyError as err:
print(f'Downloading NREL ATB {database} from {year}')
if database == 'electricity':
df = pd.read_csv(url, low_memory=False)
elif database == 'transportation':
df = pd.read_excel(
url, sheet_name='Joined Data for Levelized Calc')
print('Download Successful.')
drop_col = ['Unnamed: 0']
if verbose:
print(f'No column {drop_col}.')
else:
pass
except HTTPError as err:
fail_str = (f'Failed to download from URL: {url}.')
print(err.code, fail_str)
raise
print(f"Dropping column {drop_col}")
try:
df.drop(columns=drop_col, inplace=True)
except KeyError as err:
if verbose:
print(f'No column {drop_col}.')
else:
pass
except HTTPError as err:
fail_str = (f'Failed to download from URL: {url}.')
print(err.code, fail_str)
raise

save_local(df, database=database, year=year)

return df
22 changes: 14 additions & 8 deletions nrelpy/re_potential.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,40 @@
from urllib.error import HTTPError
import pandas as pd
import warnings
# warnings.filterwarnings("ignore")


def as_dataframe(verbose=False, **kwargs):
def as_dataframe(url=None, verbose=False, **kwargs):
"""
This function downloads the specified Annual Technology Baseline Dataset.
If this data is used in a research publication, users should cite:
CITE: Lopez, A. et al. (2012). "U.S. Renewable Energy Technical Potentials:
A GIS-Based Analysis." NREL/TP-6A20-51946. Golden, CO: National Renewable
CITE: Lopez, A. et al. (2012). "U.S. Renewable Energy Technical Potentials:
A GIS-Based Analysis." NREL/TP-6A20-51946. Golden, CO: National Renewable
Energy Laboratory.
Returns
-------
df : pandas.DataFrame
The United States Renewable Energy Technical Potential dataset as a pandas dataframe.
"""
url = "https://www.nrel.gov/gis/assets/docs/us-re-technical-potential.xlsx"
if url:
URL = url
else:
URL = "https://www.nrel.gov/gis/assets/docs/us-re-technical-potential.xlsx"

try:
print(f'Downloading Renewable Energy Technical Potential')
if not verbose:
warnings.simplefilter(action='ignore', category=UserWarning)
df = pd.read_excel(url, sheet_name='Data', skiprows=1, index_col='State')
df = pd.read_excel(
URL,
sheet_name='Data',
skiprows=1,
index_col='State')
print('Download Successful.')

except HTTPError as err:
fail_str = (f'Failed to download from URL: {url}.')
fail_str = (f'Failed to download from URL: {URL}.')
print(err.code, fail_str)
raise

Expand Down
103 changes: 103 additions & 0 deletions nrelpy/tests/test_data_io.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
from nrelpy.utils.data_io import save_local, check_stored_data, DATA_PATH
from pathlib import Path
import sys
import os
import glob
import pandas as pd

# set up test data
data = {'tech': ['nuclear', 'solar', 'wind', 'naturalgas'],
'variable_cost': [20, 0, 0, 180], # $/GWh
'fixed_cost': [92, 4, 11, 21],
'capital_cost': [5.9, 0.8, 1.4, 1.0],
'capacity_GW': [12, 3, 7, 5],
'capacity_factor': [0.93, 0.17, 0.33, 0.45],
'resentment': [100, 20, 50, 70]
}
tech_df = pd.DataFrame(data)

db = 'electricity'
yr = 1882

data_path_exists = DATA_PATH.exists()

user_path = DATA_PATH / 'tmp_path'
user_path.mkdir(exist_ok=True, parents=True)


def test_save_local_case1():
"""
This tests the standard use case of `save_local`
where a dataframe, database type, and year are passed.
"""
save_local(tech_df, database=db, year=yr)
file_name = str(DATA_PATH / f'ATBe_{yr}.pkl')
files = glob.glob(file_name)
os.remove(file_name)
assert len(files) == 1
return

def test_save_local_case2():
"""
This tests a standard use case of `save_local`
where a dataframe, database type, and year are passed.
The dataframe is saved as a CSV instead of pickled.
"""
save_local(tech_df, database=db, year=yr, pickle=False)
file_name = str(DATA_PATH / f'ATBe_{yr}.csv')
files = glob.glob(file_name)
os.remove(file_name)
assert len(files) == 1
return


def test_save_local_case3():
"""
This tests a standard use case of `save_local`
where a dataframe, database type, and year are passed.
The dataframe is saved to a user specified directory.
"""
save_local(tech_df, database=db, year=yr, pickle=True, path=user_path)
file_name = str(user_path / f'ATBe_{yr}.pkl')
files = glob.glob(file_name)
os.remove(file_name)
assert len(files) == 1
return


def test_check_stored_data_case1():
"""
This tests a standard use case of `check_stored_data`
where a database type and a year are passed.
"""
save_local(tech_df, database=db, year=yr)
df = check_stored_data(database=db, year=yr)
file_name_yr = str(DATA_PATH / f'ATBe_{yr}.pkl')
os.remove(file_name_yr)
assert df.equals(tech_df)
return


def test_check_stored_data_case2():
"""
This tests a standard use case of `check_stored_data`
where a database type and a year are passed.
"""
save_local(tech_df, database=db)
df = check_stored_data(database=db)
file_name_no_yr = str(DATA_PATH / f'ATBe.pkl')
os.remove(file_name_no_yr)
assert df.equals(tech_df)
return


def test_check_stored_data_case3():
"""
This tests saving and reading the data in a csv format.
"""
save_local(tech_df, database=db, pickle=False)
df = check_stored_data(database=db, pickled=False)
file_name_no_yr = str(DATA_PATH / f'ATBe.csv')
os.remove(file_name_no_yr)
assert df.equals(tech_df)
return
36 changes: 35 additions & 1 deletion nrelpy/tests/test_re_potential.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,42 @@
from nrelpy.re_potential import *
import pytest
import pandas as pd


def test_as_dataframe():
def test_as_dataframe_standard():
"""
This tests the base case where no
arguments are passed.
"""

df = as_dataframe()

assert type(df) == pd.DataFrame

return


def test_as_dataframe_bad_url():
"""
This tests the base case where a bad
url is passed.
"""

bad_url = "https://www.nrel.gov/gis/assets/docs/us-re-technical-potential"
with pytest.raises(HTTPError) as e:
df = as_dataframe(url=bad_url)

return


@pytest.mark.filterwarnings("ignore")
def test_as_dataframe_verbose():
"""
This tests the verbosity setting of
"""

df = as_dataframe(verbose=True)

assert type(df) == pd.DataFrame

return
Empty file added nrelpy/utils/__init__.py
Empty file.
Loading

0 comments on commit f98cc73

Please sign in to comment.