Skip to content

Commit

Permalink
more tests
Browse files Browse the repository at this point in the history
  • Loading branch information
n-h-diaz committed Sep 29, 2023
1 parent 4481b2d commit 98c0cf1
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 33 deletions.
2 changes: 1 addition & 1 deletion scripts/un/sdg/geography.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ def write_place_mappings(output, sdg2un, un2dc, un2dc2):
with open(output, 'w') as f:
writer = csv.DictWriter(f, fieldnames=['sdg', 'dcid'])
writer.writeheader()
for code in sdg2un:
for code in sorted(sdg2un):
un = sdg2un[code]
if un in un2dc:
dcid = un2dc[un][0]
Expand Down
2 changes: 1 addition & 1 deletion scripts/un/sdg/geography/place_mappings.csv
Git LFS file not shown
35 changes: 28 additions & 7 deletions scripts/un/sdg/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
Usage: python3 process.py
'''
import collections
import csv
import math
import os
import pandas as pd
Expand All @@ -39,18 +40,35 @@
from un.sdg import util


def get_geography(code):
def get_place_mappings(file):
'''Produces map of SDG code -> dcid:
Args:
file: Input file path.
Returns:
Map of SDG code -> dcid:
'''
place_mappings = {}
with open(file) as f:
reader = csv.DictReader(f)
for row in reader:
place_mappings[str(row['sdg'])] = str(row['dcid'])
return place_mappings


def get_geography(code, place_mappings):
'''Returns dcid of geography.
Args:
code: Geography code.
type: Geography type.
place_mappings: Map of SDG code -> dcid.
Returns:
Geography dcid.
'''
if str(code) in util.PLACE_MAPPINGS:
return 'dcs:' + util.PLACE_MAPPINGS[str(code)]
if str(code) in place_mappings:
return 'dcs:' + place_mappings[str(code)]
return ''


Expand Down Expand Up @@ -126,7 +144,7 @@ def fix_encoding(s):
return s.encode('utf8').decode('utf8')


def process(input_dir, schema_dir, csv_dir):
def process(input_dir, schema_dir, csv_dir, place_mappings):
'''Generates mcf, csv/tmcf artifacts.
Produces:
Expand All @@ -144,6 +162,7 @@ def process(input_dir, schema_dir, csv_dir):
input_dir: Path to input xlsx files.
schema_dir: Path to output schema files.
csv_dir: Path to output csv files.
place_mappings: Map of SDG code -> dcid.
'''
with open(os.path.join(schema_dir, 'series.mcf'), 'w') as f_series:
with open(os.path.join(schema_dir, 'sdg.textproto'), 'w') as f_vertical:
Expand Down Expand Up @@ -232,7 +251,8 @@ def process(input_dir, schema_dir, csv_dir):

# Format places.
df['GEOGRAPHY_CODE'] = df.apply(
lambda x: get_geography(x['GEOGRAPHY_CODE']), axis=1)
lambda x: get_geography(x['GEOGRAPHY_CODE'], place_mappings),
axis=1)
df = df[df['GEOGRAPHY_CODE'] != '']
if df.empty:
continue
Expand Down Expand Up @@ -398,4 +418,5 @@ def process(input_dir, schema_dir, csv_dir):
if os.path.exists('csv'):
shutil.rmtree('csv')
os.makedirs('csv')
process('sdg-dataset/output', 'schema', 'csv')
place_mappings = get_place_mappings('geography/place_mappings.csv')
process('sdg-dataset/output', 'schema', 'csv', place_mappings)
27 changes: 22 additions & 5 deletions scripts/un/sdg/process_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,21 @@

module_dir_ = os.path.dirname(__file__)

PLACE_MAPPINGS = {
'1': 'Earth',
'2': 'africa',
'4': 'country/AFG',
'5': 'southamerica',
'8': 'country/ALB',
'9': 'oceania',
'11': 'WesternAfrica',
'12': 'country/DZA',
'13': 'CentralAmerica',
'14': 'EasternAfrica',
'840': 'country/USA',
'AF_MAZAR_E_SHARIF': 'wikidataId/Q130469'
}


def assert_equal_dir(self, result_dir, expected_dir):
for root, _, files in os.walk(result_dir):
Expand All @@ -40,10 +55,12 @@ def assert_equal_dir(self, result_dir, expected_dir):
class ProcessTest(unittest.TestCase):

def test_get_geography(self):
self.assertEqual(process.get_geography(840), 'dcs:country/USA')
self.assertEqual(process.get_geography('AF_MAZAR_E_SHARIF'),
'dcs:wikidataId/Q130469')
self.assertEqual(process.get_geography(1), 'dcs:Earth')
self.assertEqual(process.get_geography(840, PLACE_MAPPINGS),
'dcs:country/USA')
self.assertEqual(
process.get_geography('AF_MAZAR_E_SHARIF', PLACE_MAPPINGS),
'dcs:wikidataId/Q130469')
self.assertEqual(process.get_geography(1, PLACE_MAPPINGS), 'dcs:Earth')

def test_get_measurement_method(self):
d = {'NATURE': ['E'], 'OBS_STATUS': ['A'], 'REPORTING_TYPE': ['G']}
Expand Down Expand Up @@ -74,7 +91,7 @@ def test_process(self):
with tempfile.TemporaryDirectory() as tmp_csv:
process.process(
os.path.join(module_dir_, 'testdata/test_input'),
tmp_schema, tmp_csv)
tmp_schema, tmp_csv, PLACE_MAPPINGS)
assert_equal_dir(
self, tmp_schema,
os.path.join(module_dir_, 'testdata/test_schema'))
Expand Down
19 changes: 0 additions & 19 deletions scripts/un/sdg/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
#import os
import re
#import sys

'''
sys.path.append(
os.path.dirname(os.path.dirname(os.path.dirname(
Expand Down Expand Up @@ -179,24 +178,6 @@
'48 to 59 months': '4 to 5 years old'
}

# Map of SDG code -> dcid.
def get_place_mappings(file):
'''Produces map of SDG code -> dcid:
Args:
file: Input file path.
Returns:
Map of SDG code -> dcid:
'''
place_mappings = {}
with open(file) as f:
reader = csv.DictReader(f)
for row in reader:
place_mappings[str(row['sdg'])] = str(row['dcid'])
return place_mappings
PLACE_MAPPINGS = get_place_mappings('geography/place_mappings.csv')


def format_description(s):
'''Formats input with curated style.
Expand Down

0 comments on commit 98c0cf1

Please sign in to comment.