Skip to content
This repository has been archived by the owner on Mar 29, 2022. It is now read-only.

Commit

Permalink
Merge pull request #51 from zelacerda/global-data-from-JHU
Browse files Browse the repository at this point in the history
Add functions to load global COVID-19 data from JHU
  • Loading branch information
dsevero authored Mar 21, 2020
2 parents f5faa82 + 03570ee commit 80e99d4
Showing 1 changed file with 60 additions and 0 deletions.
60 changes: 60 additions & 0 deletions data/collectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,66 @@ def _load_uf_pop():
df.to_csv(output_path, index=False)
print(f'{filename} data exported to {output_path}')

def load_jh_df(csv):
'''
Loads a CSV file from JH repository and make some transforms
'''
jh_data_path = (
'https://raw.githubusercontent.com/'
'CSSEGISandData/COVID-19/master/'
'csse_covid_19_data/csse_covid_19_time_series/'
)

return (
pd.read_csv(
jh_data_path
+ csv[1]
)
.drop(['Lat', 'Long'], axis=1)
.groupby('Country/Region')
.sum()
.reset_index()
.rename(
columns={'Country/Region':'country'}
)
.melt(
id_vars=['country'],
var_name='date',
value_name=csv[0]
)
.assign(
date=lambda x: pd.to_datetime(
x['date'],
format='%m/%d/%y'
)
)
)

def load_jh_data():
'''
Loads the latest COVID-19 global data from
Johns Hopkins University repository
'''
cases_csv = ('cases', 'time_series_19-covid-Confirmed.csv')
deaths_csv = ('deaths', 'time_series_19-covid-Deaths.csv')
recovered_csv = ('recoveries', 'time_series_19-covid-Recovered.csv')

return (
pd.merge(
pd.merge(
load_jh_df(cases_csv),
load_jh_df(deaths_csv)
),
load_jh_df(recovered_csv)
)
.reindex(
columns = ['date',
'cases',
'deaths',
'recoveries',
'country']
)
)

if __name__ == '__main__':
try:
Expand Down

0 comments on commit 80e99d4

Please sign in to comment.