Skip to content

Commit

Permalink
add footnotes to main script
Browse files Browse the repository at this point in the history
  • Loading branch information
n-h-diaz committed Sep 25, 2023
1 parent ac32a40 commit 7e284cc
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 59 deletions.
44 changes: 0 additions & 44 deletions scripts/un/sdg/footnote.py

This file was deleted.

25 changes: 14 additions & 11 deletions scripts/un/sdg/geographies.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,19 @@
import csv
import json

CONTAINMENT_TEMPLATE = '''
Node: dcid:{dcid}
typeOf: dcid:{type}{containment}
'''
PLACE_TEMPLATE = '''
Node: dcid:{dcid}
typeOf: dcs:{type}
name: "{name}"
unDataCode: "{code}"
unDataLabel: "{label}"
'''

# Curated map of SDG GEOGRAPHY_CODE to UN data code.
FIXED = {
'africa': '2',
'undata-geo/G99999999': '952',
Expand Down Expand Up @@ -52,13 +65,6 @@ def should_include_containment(s, s_dcid, o, o_dcid):
un2dc[row['unDataCode']] = (row['dcid'], type, row['dc_name'])

# write base place mcf
PLACE_TEMPLATE = '''
Node: dcid:{dcid}
typeOf: dcs:{type}
name: "{name}"
unDataCode: "{code}"
unDataLabel: "{label}"
'''
un2dc2 = {}
subjects = set()
with open('geography/geographies.csv') as f_in:
Expand Down Expand Up @@ -118,10 +124,7 @@ def should_include_containment(s, s_dcid, o, o_dcid):
if should_include_containment(s_type, s_dcid, o_type, o_dcid):
containment[(s_dcid, s_type)].append(o_dcid)

CONTAINMENT_TEMPLATE = '''
Node: dcid:{dcid}
typeOf: dcid:{type}{containment}
'''

with open('geography/un_containment.mcf', 'w') as f:
for s in sorted(containment):
c = ''
Expand Down
35 changes: 32 additions & 3 deletions scripts/un/sdg/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,18 @@ def get_measurement_method(row):
mmethod += '_' + str(row['REPORTING_TYPE'])
return 'SDG' + mmethod


def drop_null(value, series, footnote):
'''Returns value or '' if it should be dropped.
Args:
value: Input value.
series: Series code.
footnote: Footnote for observation.
Returns:
value or ''.
'''
if series not in util.ZERO_NULL:
return value
if footnote != util.ZERO_NULL_TEXT:
Expand All @@ -89,6 +100,13 @@ def drop_special(value, variable, series):
return value


def fix(s):
try:
return s.encode('latin1').decode('utf8')
except:
return s.encode('utf8').decode('utf8')


def process(input_dir, schema_dir, csv_dir):
'''Generates mcf, csv/tmcf artifacts.
Expand Down Expand Up @@ -210,8 +228,11 @@ def process(input_dir, schema_dir, csv_dir):
'SG_SCP_PROCN_LS.LEVEL_STATUS--DEG_MLOW__GOVERNMENT_NAME--CITY_OF_WROCLAW'
)

#sv_frames.append(df.loc[:,
# ['VARIABLE_CODE', 'VARIABLE_DESCRIPTION'] +
# properties].drop_duplicates())
sv_frames.append(df.loc[:,
['VARIABLE_CODE', 'VARIABLE_DESCRIPTION'] +
['VARIABLE_CODE', 'VARIABLE_DESCRIPTION', 'SOURCE'] +
properties].drop_duplicates())
measurement_method_frames.append(
df.loc[:, ['NATURE', 'OBS_STATUS', 'REPORTING_TYPE']].
Expand All @@ -238,9 +259,15 @@ def process(input_dir, schema_dir, csv_dir):

with open(os.path.join(schema_dir, 'sv.mcf'), 'w') as f:
for df in sv_frames:
for _, row in df.iterrows():
main = df.drop(['SOURCE'], axis=1).drop_duplicates()
for _, row in main.iterrows():
sources = df.loc[df['VARIABLE_CODE'] == row['VARIABLE_CODE']]
sources = sources.loc[:, ['SOURCE']].drop_duplicates()['SOURCE']
footnote = ''
if not sources.empty:
footnote = '\nfootnote: "Includes data from the following sources: ' + '; '.join(sorted([fix(str(s)).removesuffix('.').strip().replace('"', "'").replace('\n', '').replace('\t', '').replace('__', '_') for s in sources])) + '"'
cprops = ''
for dimension in sorted(df.columns[2:]):
for dimension in sorted(main.columns[2:]):
# Skip totals.
if row[dimension] == util.TOTAL:
continue
Expand Down Expand Up @@ -272,6 +299,8 @@ def process(input_dir, schema_dir, csv_dir):
'"' + row['VARIABLE_DESCRIPTION'] + '"',
'cprops':
cprops,
'footnote':
footnote,
}))

with open(os.path.join(schema_dir, 'schema.mcf'), 'w') as f:
Expand Down
2 changes: 2 additions & 0 deletions scripts/un/sdg/testdata/test_schema/sv.mcf
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ measuredProperty: dcs:value
name: "Food waste"
populationType: dcs:SDG_AG_FOOD_WST
statType: dcs:measuredValue
footnote: "Includes data from the following sources: Food Waste Index Report 2021 / WESR"

Node: dcid:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS
typeOf: dcs:StatisticalVariable
Expand All @@ -13,3 +14,4 @@ name: "Food waste [Households]"
populationType: dcs:SDG_AG_FOOD_WST
statType: dcs:measuredValue
sdg_foodWasteSector: dcs:SDG_FoodWasteSectorEnum_FWS_HHS
footnote: "Includes data from the following sources: Food Waste Index Report 2021 / WESR"
2 changes: 1 addition & 1 deletion scripts/un/sdg/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
measuredProperty: dcs:value
name: {name}
populationType: dcs:{popType}
statType: dcs:measuredValue{cprops}
statType: dcs:measuredValue{cprops}{footnote}
'''
MMETHOD_TEMPLATE = '''
Node: dcid:{dcid}
Expand Down

0 comments on commit 7e284cc

Please sign in to comment.