diff --git a/scripts/world_bank/.gitignore b/scripts/world_bank/.gitignore index eafbb11c23..984034d9f1 100644 --- a/scripts/world_bank/.gitignore +++ b/scripts/world_bank/.gitignore @@ -1 +1,2 @@ preprocessed_source_csv +download_indicators/*.csv diff --git a/scripts/world_bank/wdi/download_indicators/wdi_download_indicators.py b/scripts/world_bank/wdi/download_indicators/wdi_download_indicators.py index 24776559a5..cfd8f9aeec 100644 --- a/scripts/world_bank/wdi/download_indicators/wdi_download_indicators.py +++ b/scripts/world_bank/wdi/download_indicators/wdi_download_indicators.py @@ -9,7 +9,10 @@ import numpy as np import pandas as pd -_OUT_PATH = flags.DEFINE_string('out_path', None, 'CNS path to write output.') +# The output path should have a default filename. +_OUT_DEFAULT_NAME = 'cleaned_wdi.csv' +_OUT_PATH = flags.DEFINE_string('out_path', _OUT_DEFAULT_NAME, + 'CNS path to write output.') indicators = [ 'SP.POP.TOTL', @@ -72,6 +75,7 @@ def DownloadAndParseCsvs() -> None: """ dat = [] for indicator in indicators: + print(f'DOWNLOADING: {indicator}....') resp = urllib.request.urlopen( f'http://api.worldbank.org/v2/country/all/indicator/{indicator}?source=2&downloadformat=csv' ) @@ -121,6 +125,8 @@ def DownloadAndParseCsvs() -> None: 'unit', ], ) + # Write to the _OUT_PATH which defaults to the output filename + # if no path is provided. with open(_OUT_PATH.value, 'w+') as f_out: out_df.to_csv(f_out, index=False)