diff --git a/.gitmodules b/.gitmodules index e688780163..091f6e0ff4 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "scripts/un/sdg/sdg-dataset"] path = scripts/un/sdg/sdg-dataset url = https://code.officialstatistics.org/undata2/data-commons/sdg-dataset.git +[submodule "scripts/un/sdg/sssom-mappings"] + path = scripts/un/sdg/sssom-mappings + url = https://code.officialstatistics.org/undata2/sssom-mappings.git diff --git a/scripts/un/sdg/.gitattributes b/scripts/un/sdg/.gitattributes index ba109e3e5d..1f4eadb1ea 100644 --- a/scripts/un/sdg/.gitattributes +++ b/scripts/un/sdg/.gitattributes @@ -1,3 +1,4 @@ csv/* filter=lfs diff=lfs merge=lfs -text schema/* filter=lfs diff=lfs merge=lfs -text dc_generated/* filter=lfs diff=lfs merge=lfs -text +geography/* filter=lfs diff=lfs merge=lfs -text diff --git a/scripts/un/sdg/README.md b/scripts/un/sdg/README.md index 0afb292ce5..747f6b9712 100644 --- a/scripts/un/sdg/README.md +++ b/scripts/un/sdg/README.md @@ -1,13 +1,19 @@ # UN Stats Sustainable Development Goals -This import includes country, city, and select region-level data from the [UN SDG Global Database](https://unstats.un.org/sdgs/dataportal). Data is read from the submodule `sdg-dataset` which is managed by UN Stats. +This import includes data from the [UN SDG Global Database](https://unstats.un.org/sdgs/dataportal). Data is read from the submodule `sdg-dataset` which is managed by UN Stats. Geography mappings are read from the submodule `sssom-mappings` which is also managed by UN Stats. -To generate city dcids: +To generate place mappings: ``` -python3 cities.py +python3 geography.py ``` -(Note: many of these cities will require manual curation, so this script likely should not be rerun.) +Produces: +* geography/ folder: + * un_places.mcf (place mcf) + * un_containment.mcf (place containment triples) + * place_mappings.csv (map of SDG code -> dcid) + +Note that the `place_mappings.csv` is required before running the `process.py` script. To process data and generate artifacts: ``` @@ -23,9 +29,14 @@ Produces: * unit.mcf * csv/ folder: * [CODE].csv -(Note that the `schema/` folder is not included in the repository but can be regenerated by running the script.) +(Note that these folders are not included in the repository but can be regenerated by running the script.) + +When refreshing the data, the `geography`, `schema`, and `csv` folders might all get updated and will need to be resubmitted to g3. The corresponding TMCF file is `sdg.tmcf`. To run unit tests: ``` python3 -m unittest discover -v -s ../ -p "*_test.py" ``` + +Notes: +* We currently drop certain series and variables (refer to `util.py` for the list) which have been identified by UN as potentially containing outliers. \ No newline at end of file diff --git a/scripts/un/sdg/cities.csv b/scripts/un/sdg/cities.csv deleted file mode 100644 index 643571d9be..0000000000 --- a/scripts/un/sdg/cities.csv +++ /dev/null @@ -1,1774 +0,0 @@ -name,dcid -WORLD_ALL, -MAZAR_E_SHARIF,wikidataId/Q130469 -POL_E_KHOMRI,wikidataId/Q477148 -HERAT,wikidataId/Q182844 -LASHKAR_GAH,wikidataId/Q476800 -KHAN_ABAD,wikidataId/Q1247205 -KANDAHAR,wikidataId/Q173808 -KABUL,wikidataId/Q188933 -JALALABAD,wikidataId/Q183303 -FARAH,wikidataId/Q180330 -CHARIKAR,wikidataId/Q944395 -TOLGA,wikidataId/Q2280394 -TIARET,wikidataId/Q565568 -TEBESSA,wikidataId/Q984575 -TAMANRASSET,wikidataId/Q205792 -ORAN,wikidataId/Q131818 -M_SILA,wikidataId/Q402825 -MILA,wikidataId/Q335676 -KHEMIS_MILIANA,wikidataId/Q954285 -ALGIERS,wikidataId/Q3561 -DJELFA,wikidataId/Q2794758 -CHLEF,wikidataId/Q849561 -BLIDA,wikidataId/Q216990 -BATNA,wikidataId/Q338844 -ANNABA,wikidataId/Q45942 -EL_KHROUB,wikidataId/Q391129 -LUANDA,wikidataId/Q3897 -BAKU,wikidataId/Q13662815 -ZARATE,wikidataId/Q246956 -SANTIAGO_DEL_ESTERO,wikidataId/Q44827 -SAN_SALVADOR_DE_JUJUY,wikidataId/Q44217 -SAN_PEDRO_DE_JUJUY,wikidataId/Q1020263 -SAN_MIGUEL_DE_TUCUMAN,wikidataId/Q44255 -SAN_MARTIN,wikidataId/Q781245 -SAN_JUAN,wikidataId/Q44241 -ROSARIO,wikidataId/Q52535 -RIO_GALLEGOS,wikidataId/Q44242 -RIO_CUARTO,wikidataId/Q983451 -FORMOSA,wikidataId/Q44214 -OBERA,wikidataId/Q55377 -NEUQUEN,wikidataId/Q44239 -MENDOZA,wikidataId/Q44237 -CONCORDIA,wikidataId/Q52578 -CORDOBA,wikidataId/Q44210 -LA_PLATA,wikidataId/Q44059 -COMODORO_RIVADAVIA,wikidataId/Q221921 -CATAMARCA,wikidataId/Q44162 -BUENOSAIRES,wikidataId/Q1486 -BAHIA_BLANCA,wikidataId/Q54108 -PARANA,wikidataId/Q44213 -HOBART, -PERTH,wikidataId/Q3183 -MUSWELLBROOK,wikidataId/Q1008383 -MELBOURNE,wikidataId/Q3141 -LAUCENSTON,wikidataId/Q937765 -KINGAROY,wikidataId/Q605972 -CAIRNS,wikidataId/Q189133 -GOLDCOAST,wikidataId/Q140075 -GEELONG,wikidataId/Q231765 -DARWIN,wikidataId/Q11568 -CESSNOCK,wikidataId/Q606357 -CANBERRA,wikidataId/Q3114 -BUNBURY,wikidataId/Q256711 -BRISBANE,wikidataId/Q34932 -ADELAIDE,wikidataId/Q5112 -WOLLONGONG,wikidataId/Q187861 -SYDNEY,wikidataId/Q1094194 -SHEPPARTON,wikidataId/Q707030 -ALICE_SPRINGS,wikidataId/Q17872 -VIENNA,nuts/AT13 -SALZBURG,wikidataId/Q34713 -LINZ,wikidataId/Q41329 -BREGENZ,wikidataId/Q1737 -KLAGENFURT,nuts/AT211 -INNSBRUCK,nuts/AT332 -GRAZ,wikidataId/Q13298 -MANAMA,wikidataId/Q3882 -SYLHET,wikidataId/Q466221 -RAJSHAHI,wikidataId/Q322155 -MYMENSINGH_NASIRABAD, -KHULNA,wikidataId/Q243325 -JESSORE,wikidataId/Q1688300 -JAMALPUR,wikidataId/Q4159797 -GAZIPUR,wikidataId/Q2460416 -DINAJPUR,wikidataId/Q1985120 -DHAKA,wikidataId/Q1354 -COMILLA,wikidataId/Q1006296 -CHITTAGONG,wikidataId/Q158087 -SAIDPUR,wikidataId/Q3347049 -BARISAL,wikidataId/Q747840 -BOGRA,wikidataId/Q1918837 -AUSTRAILA_AND_NEWZEALAND_ALL,country/NZL -ETTERBEEK,wikidataId/Q192859 -BRUSSELS,wikidataId/Q239 -BERCHEM_SAINTE_AGATHE,wikidataId/Q272272 -AUDERGHEM,wikidataId/Q272228 -SAINT_JOSSE_TEN_NOODE,wikidataId/Q272243 -SAINT_GILLES,wikidataId/Q237674 -MOLENBEEK_SAINT_JEAN,wikidataId/Q180775 -KOEKELBERG,wikidataId/Q219244 -JETTE,wikidataId/Q241918 -IXELLES,wikidataId/Q208713 -GANSHOREN,wikidataId/Q366552 -FOREST,wikidataId/Q72946 -EVERE,wikidataId/Q321718 -ANDERLECHT,wikidataId/Q12886 -WOLUWE_SAINT_PIERRE,wikidataId/Q242393 -WOLUWE_SAINT_LAMBERT,wikidataId/Q211764 -WATERMAEL_BOITSFORT,wikidataId/Q272262 -UCCLE,wikidataId/Q203312 -SCHAERBEEK,wikidataId/Q12887 -KAPELLEN_EKEREN,wikidataId/Q1797837 -GENT,wikidataId/Q1296 -OOSTENDE,wikidataId/Q12996 -NAMUR,wikidataId/Q134121 -MECHELEN,wikidataId/Q162022 -CHARLEROI,wikidataId/Q81046 -BRUGGE,wikidataId/Q12994 -LIEGE,wikidataId/Q3992 -LEUVEN,wikidataId/Q118958 -LA_LOUVIERE,wikidataId/Q211572 -ANTWERPEN,wikidataId/Q12892 -QUAREGNON_FRAMERIES,wikidataId/Q538735 -VERVIERS_DISON,wikidataId/Q682105 -CENTRAL_AND_SOUTHERN_ASIA_ALL, -THIMPHU,wikidataId/Q9270 -COCHABAMBA,wikidataId/Q183124 -SARAJEVO,wikidataId/Q11194 -BANJA_LUKA,wikidataId/Q131127 -PASSOS,wikidataId/Q985579 -PORTO_ALEGRE,wikidataId/Q40269 -SAOPAULO,wikidataId/Q174 -RIBEIRAO_PRETO,wikidataId/Q188892 -VITORIA_DA_CONQUISTA,wikidataId/Q22062839 -UMUARAMA,wikidataId/Q593948 -TEIXEIRA_DE_FREITAS,wikidataId/Q282335 -TATUI,wikidataId/Q1795613 -SAO_CARLOS,wikidataId/Q1815617 -SANTANA_PORTO_SANTANA,wikidataId/Q2105236 -SANTA_CRUZ_DO_CAPIBARIBE,wikidataId/Q2082237 -SALVADOR,wikidataId/Q36947 -PARAUAPEBAS,wikidataId/Q1806100 -PALMAS,wikidataId/Q178993 -MONTES_CLAROS,wikidataId/Q651906 -MACAPA,wikidataId/Q180215 -LIMEIRA,wikidataId/Q841241 -JEQUIE,wikidataId/Q630203 -ITUIUTABA,wikidataId/Q1756328 -CUIABA,wikidataId/Q170762 -ILHEUS,wikidataId/Q373705 -GUARAPARI,wikidataId/Q267439 -FLORIANOPOLIS, -DIVINOPOLIS,wikidataId/Q926606 -CURITIBA,wikidataId/Q4361 -CRICIUMA,wikidataId/Q1439157 -CAMPINAS,wikidataId/Q171617 -BRASILIA,wikidataId/Q22066977 -BELOHORIZONTE,wikidataId/Q42800 -BELEM,wikidataId/Q12829733 -BARRETOS,wikidataId/Q1637488 -ARAXA,wikidataId/Q586057 -APARECIDA_DE_GOIANIA,wikidataId/Q459711 -RECIFE,wikidataId/Q48344 -INDAIATUBA,wikidataId/Q616650 -HONIARA,wikidataId/Q40921 -RUSE,wikidataId/Q160173 -YAMBOL,wikidataId/Q186569 -VELIKO_TARNOVO,wikidataId/Q173474 -DOBRIC,wikidataId/Q168668 -BURGAS,wikidataId/Q6509 -SOFIA,nuts/SOF46 -SHUMEN,wikidataId/Q181830 -PLOVDIV,wikidataId/Q459 -PLEVEN,wikidataId/Q165420 -MONTANA,wikidataId/Q220121 -VARNA,wikidataId/Q6506 -STARA_ZAGORA,wikidataId/Q170415 -TAUNGGYI,wikidataId/Q1075566 -TACHILEIK,wikidataId/Q1018494 -PATHEIN,wikidataId/Q870103 -NAY_PYI_TAW,wikidataId/Q37400 -MYITKYINA,wikidataId/Q1062370 -MANDALAY,wikidataId/Q185518 -LOIKAW,wikidataId/Q1017971 -LASHIO,wikidataId/Q1209397 -KALE,wikidataId/Q3129405 -MYEIK,wikidataId/Q1018163 -MYEDE,wikidataId/Q6947282 -MAWLAMYINE,wikidataId/Q685941 -YANGON,wikidataId/Q37995 -MINSK,wikidataId/Q2280 -MAZYR,wikidataId/Q386487 -KOBYRN,wikidataId/Q955992 -HRODNA,wikidataId/Q181376 -BREST-BELARUS,wikidataId/Q140147 -BABRUJSK,wikidataId/Q207294 -SALIHORSK,wikidataId/Q201393 -POLACK,wikidataId/Q200797 -GOMEL,wikidataId/Q2678 -VICIEBSK,wikidataId/Q102217 -HAMILTON_CANADA,wikidataId/Q133116 -GUELPH,wikidataId/Q504114 -GREATER_SUDBURY_GRAND_SUDBURY, -EDMONTON,wikidataId/Q2096 -CALGARY,wikidataId/Q36312 -BRANTFORD,wikidataId/Q34180 -BELLEVILLE,wikidataId/Q34227 -BARRIE,wikidataId/Q34183 -TROIS_RIVIERES,wikidataId/Q44012 -TORONTO,wikidataId/Q172 -THUNDER_BAY,wikidataId/Q34116 -ST_JOHNS,wikidataId/Q2082 -ST_CATHARINES,wikidataId/Q126805 -SHERBROOKE,wikidataId/Q139473 -SASKATOON,wikidataId/Q10566 -SAINT_JOHN,wikidataId/Q203403 -SAGUENAY,wikidataId/Q139229 -MONCTON,wikidataId/Q457334 -LONDON_CANADA,wikidataId/Q92561 -LETHBRIDGE,wikidataId/Q270887 -KITCHENER_CAMBRIDGE_WATERLOO,wikidataId/Q1028279 -ABBOTSFORD,wikidataId/Q271730 -KELOWNA,wikidataId/Q232226 -HALIFAX_CANADA,wikidataId/Q2141 -KINGSTON_CANADA,wikidataId/Q202973 -WINNIPEG,wikidataId/Q2135 -WINDSOR,wikidataId/Q182625 -VICTORIA,wikidataId/Q2132 -VANCOUVER,wikidataId/Q24639 -REGINA,wikidataId/Q2123 -QUEBEC,wikidataId/Q2145 -PETERBOROUGH_CANADA,wikidataId/Q776930 -OTTAWA_GATINEAU_QUEBEC_PART,wikidataId/Q1930 -OTTAWA_GATINEAU_ONTARIO_PART, -OSHAWA,wikidataId/Q211867 -MONTREAL,wikidataId/Q340 -SAINT_JEAN_SUR_RICHELIEU,wikidataId/Q141977 -ANURADHAPURA,wikidataId/Q5724 -GALLE,wikidataId/Q319366 -PUTTALAM,wikidataId/Q3088741 -COLOMBO,wikidataId/Q35381 -CHILAW_FERRY_STREET, -BATTICALOA,wikidataId/Q810963 -BADULLA,wikidataId/Q390096 -RATNAPURA,wikidataId/Q508753 -MATARA,wikidataId/Q13360574 -KANDY,wikidataId/Q203197 -JAFFNA,wikidataId/Q215277 -HAPUTALE, -HAMBANTOTA,wikidataId/Q1025283 -EMBILIPITIYA,wikidataId/Q4802021 -RANCAGUA,wikidataId/Q4582 -VALPARAISO_VALPARAISO,wikidataId/Q33986 -TEMUCO,wikidataId/Q82128 -TALCA,wikidataId/Q4469 -SANTIAGO_SANTIAGO,wikidataId/Q2887 -ARICA,wikidataId/Q2203 -MAGALLANES_PUNTAARENAS,wikidataId/Q51599 -PENAFLOR,wikidataId/Q51098 -OVALLE,wikidataId/Q51060 -COPIAPO,wikidataId/Q3868 -CONCEPCION,wikidataId/Q1880 -ANTOFAGASTA_ANTOFAGASTA,wikidataId/Q3612 -IQUIQUE,wikidataId/Q14440 -LA_SERENA,wikidataId/Q14467 -LOS_ANGELES,wikidataId/Q16910 -LOSLAGOS_OSORNO,wikidataId/Q51059 -XUCHENG,wikidataId/Q11070407 -XINGPING,wikidataId/Q1201091 -WUHAN,wikidataId/Q11746 -TIANJIN,wikidataId/Q11736 -TANGSHAN,wikidataId/Q58422 -TAIPEI, -HANGZHOU,wikidataId/Q4970 -QINGDAO,wikidataId/Q170322 -LESHAN,wikidataId/Q426868 -KAIPING,wikidataId/Q599514 -JINAN,wikidataId/Q170247 -HONG_KONG,country/HKG -HAIKOU,wikidataId/Q189823 -GUIXI,wikidataId/Q1335331 -GAOYOU,wikidataId/Q1253949 -CHENGGUAN,wikidataId/Q10929428 -CHENGDU,wikidataId/Q30002 -CHANGZHOU,wikidataId/Q57970 -CHANGZHI,wikidataId/Q72945 -BEIJING,wikidataId/Q956 -GUANGZHOU,wikidataId/Q16572 -YIYANG,wikidataId/Q416669 -YUCHENG,wikidataId/Q1198528 -ZHENGZHOU,wikidataId/Q30340 -ZUNYI,wikidataId/Q28994 -ZHUJI,wikidataId/Q198222 -SHENZHEN,wikidataId/Q15174 -YANGGU,wikidataId/Q290059 -BOGOTA, -ZIPAQUIRA,wikidataId/Q205429 -VALLEDUPAR,wikidataId/Q376903 -SINCELEJO,wikidataId/Q1043513 -NEIVA,wikidataId/Q638260 -MEDELLIN,wikidataId/Q48278 -MAICAO,wikidataId/Q1937427 -GIRARDOT,wikidataId/Q186193 -FLORENCIA,wikidataId/Q2308980 -CARTAGO_COLOMBIA,wikidataId/Q2004074 -CARTAGENA_COLOMBIA,wikidataId/Q657461 -CALI,wikidataId/Q51103 -BUGA,wikidataId/Q934753 -BUCARAMANGA,wikidataId/Q243766 -BARRANQUILLA,wikidataId/Q62823 -TUNJA,wikidataId/Q236744 -KINSHASA,wikidataId/Q3838 -LUBUMBASHI,wikidataId/Q187593 -SANJOSE,wikidataId/Q3070 -PUNTARENAS,wikidataId/Q30687 -PUERTO_LIMON,wikidataId/Q30620 -LIBERIA,wikidataId/Q1387526 -CARTAGO_COSTARICA,wikidataId/Q30617 -ZAGREB,wikidataId/Q1435 -ZADAR,wikidataId/Q3370 -SPLIT,wikidataId/Q1663 -RIJEKA,wikidataId/Q1647 -OSIJEK,wikidataId/Q1640 -BAYAMO,wikidataId/Q115382 -CAMAGUEY,wikidataId/Q115329 -CIENFUEGOS,wikidataId/Q190005 -GUANTANAMO,wikidataId/Q185156 -HOLGUIN,wikidataId/Q244154 -SANTIAGO_DE_CUBA, -LAS_TUNAS, -HAVANA,wikidataId/Q1563 -SANTA_CLARA,wikidataId/Q211590 -LEYMOSUN,wikidataId/Q185632 -SUBSAHARAN_ARFICA_ALL, -USTI_NAD_LABEM,wikidataId/Q156974 -PRAGUE,nuts/CZ01 -PARDUBICE,wikidataId/Q36989 -OSTRAVA,wikidataId/Q8385 -MOST,wikidataId/Q146363 -LIBEREC,wikidataId/Q146351 -KLADNO,wikidataId/Q155993 -HRADEC_KRALOVE,wikidataId/Q180139 -HRABUVKA, -HAVIROV,wikidataId/Q192904 -CHOMUTOV_JIRKOV,wikidataId/Q149097 -CESKE_BUDEJOVICE,wikidataId/Q16506 -PORUBA,wikidataId/Q3500288 -PLZEN,wikidataId/Q43453 -BRNO,wikidataId/Q14960 -OLOMOUC,wikidataId/Q81137 -BOHICON,wikidataId/Q2356092 -DJOUGOU,wikidataId/Q868198 -PARAKOU,wikidataId/Q688324 -NATITINGOU,wikidataId/Q994125 -KANDI,wikidataId/Q845666 -ARHUS,wikidataId/Q25319 -AALBORG,wikidataId/Q25410 -ODENSE,wikidataId/Q25331 -KOBENHAVN,wikidataId/Q1748 -GREVE_ISHOJ,wikidataId/Q3196979 -SAN_FRANCISCO_DE_MACORIS,wikidataId/Q681999 -PUERTA_PLATA,wikidataId/Q988413 -LA_VEGA,wikidataId/Q538953 -LA_ROMANA,wikidataId/Q40508 -HIQUEY,wikidataId/Q1020261 -SANTIAGO_DE_LOS_CABELLEROS,wikidataId/Q42763 -DURAN,wikidataId/Q1120810 -SANTODOMINGODELOSCOLORADOS,wikidataId/Q1015654 -RIOBAMBA,wikidataId/Q478675 -QUITO,wikidataId/Q2900 -QUEVEDO,wikidataId/Q773225 -MILAGRO,wikidataId/Q773238 -MACHALA,wikidataId/Q215080 -LOJA,wikidataId/Q944697 -GUAYAQUIL,wikidataId/Q1991935 -ESMERALDAS,wikidataId/Q149955 -DAULE,wikidataId/Q1014430 -CUENCA,wikidataId/Q54886 -CHONE,wikidataId/Q2756591 -TULCAN,wikidataId/Q752113 -USULUTAN,wikidataId/Q1018092 -SANTAANA_SANTAANA,wikidataId/Q739664 -SANMIGUEL_SANMIGUEL,wikidataId/Q672458 -APOPA,wikidataId/Q619646 -SANSALVADOR_SANSALVADOR,wikidataId/Q3110 -ADDIS,wikidataId/Q3624 -HARAR,wikidataId/Q190184 -GONDAR,wikidataId/Q218861 -DIRE_DAWA, -BAHIR_DAR,wikidataId/Q464699 -AWASSA,wikidataId/Q755765 -ADAMA,wikidataId/Q351427 -TARTU,wikidataId/Q13972 -TALLINN,wikidataId/Q1770 -NARVA,wikidataId/Q102158 -SUVA,wikidataId/Q38807 -NADI,wikidataId/Q619443 -LAUTOKA,wikidataId/Q856662 -ABO,wikidataId/Q38511 -TAPIOLA_NIITTYKUMPU,wikidataId/Q7034502 -TAMMERFORS,wikidataId/Q40840 -OULU_ULEABORG,wikidataId/Q47048 -JYVASKYLA,wikidataId/Q134620 -HELSINKI_HELSINGFORS,wikidataId/Q1757 -AVIGNON,wikidataId/Q6397 -LIMOGES,wikidataId/Q45656 -LILLE,wikidataId/Q648 -CAEN,wikidataId/Q41185 -BREST-FRANCE,wikidataId/Q12193 -SAINT_BRIEUC,wikidataId/Q29234 -ROUEN,wikidataId/Q30974 -AJACCIO,wikidataId/Q40104 -AIX_EN_PROVENCE,wikidataId/Q47465 -ARRAS,wikidataId/Q131329 -ANTIBES,wikidataId/Q126898 -BELFORT,wikidataId/Q171545 -BESANCON,wikidataId/Q37776 -BEZIERS,wikidataId/Q174019 -MEAUX,wikidataId/Q207620 -MAUREPAS_MONTIGNY_LE_BRETONNEUX, -MARTIGUES_PORT_DE_BOUC,wikidataId/Q675925 -MARSEILLE,wikidataId/Q23482 -MANTES_LA_JOLIE,wikidataId/Q207253 -LYON,wikidataId/Q456 -GRENOBLE,wikidataId/Q1289 -FREJUS,wikidataId/Q215155 -LES_CLAYES_SOUS_BOIS,wikidataId/Q491948 -LE_MANS,wikidataId/Q1476 -DUNKERQUE,wikidataId/Q45797 -DOUAI,wikidataId/Q193826 -DIJON,wikidataId/Q7003 -COLMAR,wikidataId/Q130994 -CLERMONT_FERRAND,wikidataId/Q42168 -CHARTRES,wikidataId/Q130272 -MONTPELLIER,wikidataId/Q6441 -MULHOUSE,wikidataId/Q79815 -BOURGES,wikidataId/Q132404 -BOULOGNE_SUR_MER,wikidataId/Q81997 -BAYONNE,wikidataId/Q134674 -VALENCIENNES,wikidataId/Q163795 -VALENCE,wikidataId/Q8848 -TROYES,wikidataId/Q5489 -TOURS,wikidataId/Q288 -TOULOUSE,wikidataId/Q7880 -PAU, -PARIS,nuts/FR101 -RENNES,wikidataId/Q647 -REIMS,wikidataId/Q41876 -ORLEANS,wikidataId/Q6548 -NIMES,wikidataId/Q42807 -NICE,wikidataId/Q33959 -NANTES,wikidataId/Q12191 -NANCY,wikidataId/Q40898 -SAINT_ETIENNE,wikidataId/Q42716 -ANNECY,wikidataId/Q50189 -ANGERS,wikidataId/Q38380 -AMIENS,wikidataId/Q41604 -POITIERS,wikidataId/Q6616 -POISSY,wikidataId/Q457 -PERPIGNAN,wikidataId/Q6730 -LE_HAVRE,wikidataId/Q42810 -LA_SEYNE_SUR_MER_SANARY_SUR_MER,wikidataId/Q659484 -LA_ROCHELLE,wikidataId/Q82185 -CHAMBERY,wikidataId/Q165090 -CERGY_PONTOISE,wikidataId/Q961964 -CALAIS,wikidataId/Q6454 -BORDEAUX,wikidataId/Q1479 -MELUN,wikidataId/Q41139 -METZ,wikidataId/Q22690 -TOULON,wikidataId/Q44160 -STRASBOURG,wikidataId/Q6602 -SAINT_QUENTIN,wikidataId/Q182744 -FAAA_PUNAAUIA_PAPEETE_FRENCH_POLYNESIA, -AL_KHALIL,wikidataId/Q168225 -AN_NUSAYRAT, -AL_QUDS, -RAFAH,wikidataId/Q172343 -NABULUS_NABLUS,wikidataId/Q214178 -JENIN, -GHAZZAH,wikidataId/Q47492 -KHAN_YUNIS, -BIELEFELD,nuts/DEA41 -MOERS,wikidataId/Q3132 -FURTH,nuts/DE253 -FREIBURG_IM_BREISGAU,nuts/DE131 -BRAUNSCHWEIG,nuts/DE911 -HAMBURG,nuts/DE6 -KREFELD,nuts/DEA14 -BONN,nuts/DEA22 -MONCHENGLADBACH,nuts/DEA15 -MUNSTER,nuts/DEA33 -REUTLINGEN,wikidataId/Q3085 -REMSCHEID,nuts/DEA18 -BOCHOLT,wikidataId/Q3917 -BERLIN,nuts/DE3 -AACHEN,nuts/DEA21 -HALLE,wikidataId/Q225774 -BENRATH,wikidataId/Q459769 -BAYREUTH,nuts/DE242 -HAGEN,nuts/DEA53 -GUTERSLOH,wikidataId/Q3771 -GROPELINGEN_VEGESACK,wikidataId/Q315005 -GOTTINGEN,wikidataId/Q3033 -GORLITZ,nuts/DED22 -GOPPINGEN,wikidataId/Q4072 -ERFURT,nuts/DEG01 -DUSSELDORF,nuts/DEA11 -FRANKFURT_AM_MAIN,nuts/DE712 -FRANKFURT_ODER,nuts/DE403 -DUREN,wikidataId/Q1884 -DUISBURG,nuts/DEA12 -DRESDEN,nuts/DED21 -DELMENHORST,nuts/DE941 -DARMSTADT,nuts/DE711 -COTTBUS,nuts/DE402 -MAINZ,nuts/DEB35 -MAGDEBURG,nuts/DEE03 -HAMM,nuts/DEA54 -HANAU,wikidataId/Q3802 -LUTTEN_KLEIN,wikidataId/Q1880458 -LUNEN,wikidataId/Q3822 -LUNEBURG,wikidataId/Q3920 -LUDWIGSBURG,wikidataId/Q622 -LUDENSCHEID,wikidataId/Q3893 -LUBECK,nuts/DEF03 -KAISERSLAUTERN,nuts/DEB32 -JENA,nuts/DEG03 -KOLN,nuts/DEA23 -KIEL,nuts/DEF02 -ISERLOHN,wikidataId/Q1130 -INGOLSTADT,nuts/DE211 -HILDESHEIM,wikidataId/Q3185 -HERZOGENRATH,wikidataId/Q6916 -HEILBRONN,nuts/DE117 -HEIDELBERG,nuts/DE125 -TUBINGEN,wikidataId/Q3806 -TRIER,nuts/DEB21 -WURZBURG,nuts/DE263 -WOLFSBURG,nuts/DE913 -STUTTGART,nuts/DE111 -SOLINGEN_WUPPERTAL,nuts/DEA19 -SINDELFINGEN,wikidataId/Q4019 -SCHWEINFURT,nuts/DE262 -SAARBRUCKEN,wikidataId/Q1724 -RUSSELSHEIM_AM_MAIN,wikidataId/Q4031 -PADERBORN,wikidataId/Q2971 -OSNABRUCK,nuts/DE944 -REGENSBURG,nuts/DE232 -POTSDAM,nuts/DE404 -OLDENBURG_OLDENBURG,nuts/DE943 -NEUSS,wikidataId/Q2948 -NEUMUNSTER,nuts/DEF04 -MUNCHEN,nuts/DE212 -BAMBERG,nuts/DE241 -BAD_SODEN_AM_TAUNUS_KELKHEIM,wikidataId/Q39785 -AUGSBURG,nuts/DE271 -GIESSEN,wikidataId/Q3874 -GEVELSBERG_SCHWELM,wikidataId/Q11293 -GERA,nuts/DEG02 -FLENSBURG,nuts/DEF01 -ESSLINGEN_AM_NECKAR,wikidataId/Q3786 -ERLANGEN,nuts/DE252 -CHEMNITZ,nuts/DED41 -BREMERHAVEN,nuts/DE502 -BREMEN,nuts/DE501 -HANNOVER,nuts/DE921 -MARL,wikidataId/Q3813 -MANNHEIM,nuts/DE126 -LEVERKUSEN,nuts/DEA24 -LEIPZIG,nuts/DED51 -LANDSHUT,nuts/DE221 -KEMPTEN_ALLGAU,nuts/DE273 -KASSEL,nuts/DE731 -KARLSRUHE,nuts/DE122 -HAUNSTETTEN,wikidataId/Q760477 -HARBURG,wikidataId/Q503226 -KOBLENZ,nuts/DEB11 -WIESBADEN,nuts/DE714 -VAIHINGEN_MOHRINGEN,wikidataId/Q1957900 -ULM,nuts/DE144 -RUHRGEBIET, -ROSTOCK,nuts/DE803 -ROSENHEIM,nuts/DE213 -PORZ,wikidataId/Q458242 -PLAUEN,nuts/DED12 -PFORZHEIM,nuts/DE129 -ACCRA,wikidataId/Q3761 -THESSALONIKI,wikidataId/Q17151 -KAVALA,wikidataId/Q187352 -ATHINA,wikidataId/Q1524 -CHANIA,wikidataId/Q182299 -IOANNINA,wikidataId/Q183199 -IRAKLEIO, -HUEHUETENANGO,wikidataId/Q984863 -SANTA_LUCIA_COTZUMALGUAPA,wikidataId/Q404550 -SAN_JUAN_SACATEPEQUEZ,wikidataId/Q127622 -QUETZALTENANGO,wikidataId/Q334577 -PUERTO_BARRIOS,wikidataId/Q991400 -MAZATENANGO,wikidataId/Q1023983 -GUATEMALA_CITY,wikidataId/Q1555 -ESCUINTLA,wikidataId/Q780771 -COBAN,wikidataId/Q867077 -SAINTMARC,wikidataId/Q132189 -PORT-AU-PRINCE,wikidataId/Q34261 -CAP_HAITIEN,wikidataId/Q216835 -EL_PROGRESO,wikidataId/Q2277161 -COMAYAGUA,wikidataId/Q679169 -TEGUCIGALPA,wikidataId/Q3238 -SAN_PEDRO_SULA,wikidataId/Q274584 -LA_CEIBA,wikidataId/Q916536 -VESZPREM,wikidataId/Q146317 -RAKOSLIGET,wikidataId/Q714696 -PECS,wikidataId/Q45779 -TATABANYA,wikidataId/Q187821 -SZOMBATHELY,wikidataId/Q42007 -SZEKESFEHERVAR,wikidataId/Q130212 -SZEGED,wikidataId/Q81581 -NYIREGYHAZA,wikidataId/Q171223 -MISKOLC,wikidataId/Q102397 -KECSKEMET,wikidataId/Q171357 -GYOR,wikidataId/Q134494 -ERD,wikidataId/Q193050 -DEBRECEN,wikidataId/Q79880 -BUDAPEST,nuts/HU101 -REYKJAVIK,wikidataId/Q1764 -PARBHANI,wikidataId/Q1797389 -SURAT,wikidataId/Q1797317 -SITAPUR,wikidataId/Q1812539 -SINGRAULI,wikidataId/Q2668638 -PUNE,wikidataId/Q1538 -PATNA,wikidataId/Q100077 -PALI,wikidataId/Q46925 -NELLORE,wikidataId/Q61434 -MUMBAI,wikidataId/Q1156 -MORENA,wikidataId/Q2341467 -MALEGAON,wikidataId/Q580965 -KOZHIKODE,wikidataId/Q1142979 -KOLKATA,wikidataId/Q1348 -KANPUR,wikidataId/Q66568 -KANCHIPURAM,wikidataId/Q15157 -JALNA,wikidataId/Q1804863 -JAIPUR,wikidataId/Q1134781 -BELGAUM,wikidataId/Q270176 -HINDUPUR,wikidataId/Q760056 -COIMBATORE,wikidataId/Q15136 -CHANDIGARH,wikidataId/Q43433 -BHIWANDI,wikidataId/Q645725 -BENGALURU,wikidataId/Q1355 -BAHARAMPUR,wikidataId/Q633530 -AHMEDABAD,wikidataId/Q1070 -THOOTHUKKUDI,wikidataId/Q41562 -HYDERABAD_INDIA,wikidataId/Q1361 -VIJAYAWADA,wikidataId/Q200017 -TUMKUR,wikidataId/Q911310 -PEKALONGAN,wikidataId/Q10623 -BANDA_ACEH,wikidataId/Q5779 -BANDUNG,wikidataId/Q10332 -CIREBON,wikidataId/Q10368 -SURABAYA,wikidataId/Q11462 -SEMARANG,wikidataId/Q10687 -SAMARINDA,wikidataId/Q14499 -PURWAKARTA,wikidataId/Q10378 -PEMATANGSIANTAR,wikidataId/Q5979 -PEMALANG,wikidataId/Q10624 -JOMBANG,wikidataId/Q11082 -PAREPARE,wikidataId/Q14636 -PALEMBANG,wikidataId/Q8131 -MEDAN,wikidataId/Q5972 -MAKASSAR,wikidataId/Q14634 -KENDARI,wikidataId/Q15379 -JAMBI,wikidataId/Q2051 -GARUT_KOTA,wikidataId/Q833632 -BENGKULU,wikidataId/Q1890 -MASHAD,wikidataId/Q121157 -NISHABUR,wikidataId/Q131350 -PIRANSHAHR,wikidataId/Q3015863 -QOM,wikidataId/Q170573 -KHORAM_ABAD,wikidataId/Q502212 -KERMAN,wikidataId/Q171714 -KASHMAR,wikidataId/Q720403 -BANDAR_ABBAS,wikidataId/Q154814 -KARAJ, -GORGAN,wikidataId/Q188155 -ESFAHAN,wikidataId/Q42053 -DEHDASHT,wikidataId/Q888365 -BOJNURD,wikidataId/Q317946 -BABOL,wikidataId/Q605157 -ARAK,wikidataId/Q212628 -ARADABIL,wikidataId/Q12727756 -AHVAZ,wikidataId/Q170366 -VARAMIN,wikidataId/Q518985 -TEHRAN,wikidataId/Q3616 -TABRIZ,wikidataId/Q80053 -SHIRAZ,wikidataId/Q6397066 -SHAHIN_SHAHR,wikidataId/Q862105 -SARI,wikidataId/Q168843 -SANANDAJ,wikidataId/Q272093 -SALMAS,wikidataId/Q765806 -KASAN, -SHADEGAN,wikidataId/Q778162 -BAGHDAD,wikidataId/Q1530 -SWORDS_MALAHIDE,wikidataId/Q987748 -CORK,wikidataId/Q36647 -BLANCHARDSTOWN,wikidataId/Q496387 -LIMERICK,wikidataId/Q133315 -DUBLIN,wikidataId/Q1761 -TEL_AVIV,wikidataId/Q33935 -VERONA,wikidataId/Q2028 -CATANIA,wikidataId/Q1903 -CAGLIARI,wikidataId/Q3622022 -BRINDISI,wikidataId/Q13496 -BRESCIA,wikidataId/Q6221 -BOLZANO,wikidataId/Q6526 -BOLOGNA,wikidataId/Q18288145 -CERIGNOLA,wikidataId/Q19277 -COMO,wikidataId/Q1308 -BARI,wikidataId/Q18241854 -ASTI,wikidataId/Q6122 -FERRARA,wikidataId/Q13362 -FIRENZE,nuts/ITE14 -VENEZIA,wikidataId/Q641 -UDINE,wikidataId/Q2790 -TRENTO,wikidataId/Q3376 -TORINO,wikidataId/Q495 -VICENZA,wikidataId/Q6537 -PERUGIA,wikidataId/Q3437 -PRATO,wikidataId/Q13377 -POZZUOLI_BACOLI,wikidataId/Q71832 -PALERMO,wikidataId/Q20795016 -PADOVA,wikidataId/Q617 -OSTIA,wikidataId/Q11171297 -NOVARA,wikidataId/Q6046 -MODENA,wikidataId/Q279 -MILAN,wikidataId/Q490 -MESTRE,wikidataId/Q541405 -MESSINA,wikidataId/Q13666 -MATERA,wikidataId/Q13616 -GROSSETO,wikidataId/Q6716 -GENOVA,wikidataId/Q1449 -GELA,wikidataId/Q39971 -PAVIA,wikidataId/Q6259 -BERGAMO,wikidataId/Q628 -BARLETTA,wikidataId/Q13470 -ROME,wikidataId/Q18288160 -REGGIO_DI_CALABRIA,wikidataId/Q8471 -LIVORNO,wikidataId/Q6761 -LECCE,wikidataId/Q13386 -AREZZO,wikidataId/Q13378 -ANCONA,wikidataId/Q3415 -ALESSANDRIA,wikidataId/Q6088 -SIRACUSA,wikidataId/Q13670 -SASSUOLO,wikidataId/Q8598 -SASSARI,wikidataId/Q13629 -PORDENONE,wikidataId/Q6606 -PISA,wikidataId/Q13375 -PESARO,wikidataId/Q13134 -PORTMORE,wikidataId/Q644663 -KINGSTON_JAMAICA,wikidataId/Q34692 -YAMAGUCHI,wikidataId/Q207876 -TOKYO,wikidataId/Q1490 -OSAKA,wikidataId/Q122723 -OKAYAMA,wikidataId/Q200078 -FUKUOKA,wikidataId/Q26600 -TURKISTAN,wikidataId/Q848638 -TEMIRTAY,wikidataId/Q1001104 -TARAZ,wikidataId/Q486545 -QARAGANDY,wikidataId/Q484706 -SHYMKENT,wikidataId/Q485496 -SEMEY,wikidataId/Q61944 -RUDNY,wikidataId/Q1008853 -QULSARY, -QASKELEN,wikidataId/Q2121478 -PAVLODAR,wikidataId/Q486282 -OSKEMEN,wikidataId/Q162548 -ORAL,wikidataId/Q487439 -BALQASH,wikidataId/Q805515 -NUR_SULTAN,wikidataId/Q1520 -AQTAY,wikidataId/Q198989 -ALMATY,wikidataId/Q35493 -AKTOBE,wikidataId/Q477232 -TALDYQORGAN,wikidataId/Q491102 -IRBID,wikidataId/Q194165 -AMMAN,wikidataId/Q3805 -MERU,wikidataId/Q934149 -MALABA,wikidataId/Q6740728 -ELDORET, -NAIROBI,wikidataId/Q3870 -NAKURU, -NYERI, -KISUMU, -HAMHUNG,wikidataId/Q109356 -SONGNIM,wikidataId/Q505930 -SINUIJU, -RASON, -PYONGYANG, -KANGGYE,wikidataId/Q489825 -CHONGJU,wikidataId/Q213169 -CHONGJIN,wikidataId/Q109336 -CHEONAN,wikidataId/Q42146 -SEOUL,wikidataId/Q8684 -GWANGJU,wikidataId/Q41283 -JINJU,wikidataId/Q42144 -BUSAN, -KUWAIT,wikidataId/Q35178 -TOKMOK,wikidataId/Q854333 -TALAS,wikidataId/Q643955 -OSH,wikidataId/Q47282 -NARYN, -KYZYL_KYJA,wikidataId/Q2347569 -KARAKOL,wikidataId/Q194452 -KARA_BALTA, -JALAL_ABAD,wikidataId/Q487689 -BIKEK, -BALYKCHY,wikidataId/Q805876 -UZGEN,wikidataId/Q765887 -LATINAMERICA_AND_CARIBBEAN_ALL, -TRIPOLI,wikidataId/Q168954 -BEIRUT,wikidataId/Q3820 -BAALBEK, -ZAHLE,wikidataId/Q26569 -TYRE,wikidataId/Q82070 -SIDON,wikidataId/Q163490 -DAUGAVPILS,wikidataId/Q80021 -RIGA,nuts/LV006 -LIEPAJA, -VILNIUS,wikidataId/Q216 -SIAULIAI,wikidataId/Q134712 -PANEVEZYS,wikidataId/Q1719466 -KLAIPEDA,wikidataId/Q776965 -KAUNAS,wikidataId/Q4115712 -LUXEMBOURG,wikidataId/Q1842 -FIANARANTSOA,wikidataId/Q527441 -ANTSIRANANA,wikidataId/Q218928 -ANTSIRABE,wikidataId/Q583184 -MAROVOAY,wikidataId/Q1902425 -AMPARAFARAVOLA,wikidataId/Q474396 -MAHAJANGA,wikidataId/Q218753 -TAOLANARO,wikidataId/Q526568 -TOLIARA,wikidataId/Q824043 -TOAMASINA,wikidataId/Q178067 -ANTANANARIVO,wikidataId/Q3915 -MZUZU,wikidataId/Q502216 -BLANTYRE,wikidataId/Q188693 -RAWANG,wikidataId/Q2072124 -IPOH,wikidataId/Q271619 -MALE,wikidataId/Q9347 -BAMAKO, -VALLETTA,wikidataId/Q23800 -TIJUANA,wikidataId/Q124739 -VERACRUZ,wikidataId/Q173270 -VILLAHERMOSA_CENTRO_TABASCO,wikidataId/Q210886 -XALAPA_VERACRUZ,wikidataId/Q221051 -MONTERREY,wikidataId/Q81033 -TEHUACAN, -SAN_JUAN_DEL_RIO,wikidataId/Q3849473 -REYNOSA,wikidataId/Q738303 -PUERTO_VALLARTA,wikidataId/Q853258 -PUEBLA,wikidataId/Q7258412 -MEXICOCITY,wikidataId/Q1489 -LEON_MEXICO,wikidataId/Q189128 -IRAPUATO_GUANAJUATO,wikidataId/Q816845 -GUANAJUATO_GUANAJUATO,wikidataId/Q46475 -GUADALAJARA,wikidataId/Q9022 -ENSENADA,wikidataId/Q1548691 -CULIACAN_ROSALES,wikidataId/Q211760 -COMITANDEDOMINGUEZ_CHIAPAS,wikidataId/Q1961680 -CIUDAD_RIO_BRAVO,wikidataId/Q1963086 -CIUDAD_JUAREZ,wikidataId/Q26590 -CAMPECHE_CAMPECHE,wikidataId/Q61301 -TEPIC,wikidataId/Q207972 -APATZINGAN_MICHOACAN,wikidataId/Q94758 -ACAPULCO_GUERRERO,wikidataId/Q81398 -MONACO_ALL,country/MCO -ULAANBAATAR,wikidataId/Q23430 -TANGER, -TEMARA,wikidataId/Q1635606 -SEFROU,wikidataId/Q1009308 -SAFI,wikidataId/Q183405 -OULAD_TEIMA,wikidataId/Q785945 -OUJDA,wikidataId/Q193802 -MIDELT,wikidataId/Q2501809 -MARRAKESH,wikidataId/Q101625 -MAKNES,wikidataId/Q178663 -FKIH_BEN_SALAH,wikidataId/Q2895854 -FES,wikidataId/Q80985 -CASABLANCA,wikidataId/Q7903 -AZROU,wikidataId/Q794128 -SIDI_SLIMANE,wikidataId/Q2359952 -MANHICA,wikidataId/Q10323671 -NAMPULA, -NACALA, -MOCUBA,wikidataId/Q520687 -MAXIXE, -MAPUTO,wikidataId/Q3889 -GURUE, -BEIRA, -ALTO_MOLOCUE, -PEMBA, -AL_BURAYMI,wikidataId/Q2829307 -MUSCAT,wikidataId/Q3826 -SALALAH,wikidataId/Q1294439 -SOHAR,wikidataId/Q943270 -EUROPE_AND_NORTHERNAMERICA_ALL,country/USA -KATHMANDU,wikidataId/Q3037 -POKHARA,wikidataId/Q6640 -JANAKPUR,wikidataId/Q378649 -ITAHARI,wikidataId/Q1675026 -DHARAN,wikidataId/Q8254 -DAMAK,wikidataId/Q1458001 -BUTWAL,wikidataId/Q11283 -BIRGANJ,wikidataId/Q8252 -BIRENDRANAGAR,wikidataId/Q865146 -BIRATNAGAR,wikidataId/Q8247 -BHARATPUR,wikidataId/Q250220 -SOEST_BAARN,wikidataId/Q10027 -SITTARD_GELEEN,wikidataId/Q9781 -VENLO,wikidataId/Q9777 -VEENENDAAL,wikidataId/Q1840 -S_HERTOGENBOSCH,wikidataId/Q2766547 -S_GRAVENHAGE_THE_HAGUE,wikidataId/Q36600 -ROTTERDAM,wikidataId/Q34370 -ROOSENDAAL,wikidataId/Q6903267 -PURMEREND,wikidataId/Q9954 -PIJNACKER_BERKEL_EN_RODENRIJS,wikidataId/Q820839 -OSS,wikidataId/Q13971034 -NIJMEGEN, -HENGELO,wikidataId/Q10006 -HELMOND,wikidataId/Q9844 -LELYSTAD,wikidataId/Q166065 -LEIDEN,wikidataId/Q43631 -HEERLEN,wikidataId/Q9799 -HAARLEMMERMEER,wikidataId/Q9924 -HAARLEM,nuts/NL324 -GRONINGEN,wikidataId/Q749 -GOUDA,wikidataId/Q84125 -EINDHOVEN,wikidataId/Q9832 -DEVENTER,wikidataId/Q10001 -BREDA,wikidataId/Q40844 -ALPHEN_AAN_DEN_RIJN,wikidataId/Q213246 -ALMERE,wikidataId/Q83178 -ASSEN,wikidataId/Q798 -ARNHEM,wikidataId/Q1310 -ALMELO,wikidataId/Q988 -ALKMAAR,wikidataId/Q972 -ZOETERMEER,wikidataId/Q26432 -ZWOLLE,wikidataId/Q793 -EDE,wikidataId/Q26555 -ENSCHEDE,wikidataId/Q10002 -NIEUWEGEIN_IJSSELSTEIN,wikidataId/Q10050 -MAASTRICHT,wikidataId/Q1309 -BEVERWIJK_HEEMSKERK,wikidataId/Q9905 -BERGEN_OP_ZOOM,wikidataId/Q192508 -DORDRECHT,wikidataId/Q26421 -VLEUTEN_DE_MEERN,wikidataId/Q1329720 -UTRECHT,nuts/NL310 -TILBURG,wikidataId/Q9871 -SPIJKENISSE,wikidataId/Q488545 -LEEUWARDEN,wikidataId/Q25390 -HOORN,wikidataId/Q9938 -HILVERSUM,wikidataId/Q9934 -APELDOORN,wikidataId/Q101918 -AMSTERDAM, -AMERSFOORT,wikidataId/Q992 -NAPIER,wikidataId/Q203380 -AUCKLAND,wikidataId/Q37100 -WELLINGTON,wikidataId/Q23661 -TAURANGA,wikidataId/Q207756 -PALMERSTON,wikidataId/Q909220 -LOWER_HUTT,wikidataId/Q1015681 -HAMILTON_NEWZEALAND,wikidataId/Q200028 -DUNEDIN,wikidataId/Q133073 -CHRISTCHURCH,wikidataId/Q79990 -TIPITAPA,wikidataId/Q1365146 -MANAGUA,wikidataId/Q3274 -LEON_NICARAGUA,wikidataId/Q208151 -GRANADA_NICARAGUA,wikidataId/Q205648 -CIUDAD_SANDINO,wikidataId/Q2720849 -CHINANDEGA,wikidataId/Q974354 -MASAYA,wikidataId/Q1001914 -ZINDER,wikidataId/Q204365 -NIAMEY,wikidataId/Q3674 -MARADI,wikidataId/Q664550 -DOSSO,wikidataId/Q985620 -AGADEZ, -OYO,wikidataId/Q1023703 -IBADAN,wikidataId/Q183298 -GOMBE,wikidataId/Q591598 -LAGOS, -OSLO,nuts/NO011 -LORENSKOG_LILLESTROM,wikidataId/Q60806432 -STAVANGER,wikidataId/Q25416 -BERGEN,wikidataId/Q26793 -GARAPAN,wikidataId/Q3272415 -LAYYAH,wikidataId/Q631087 -LAHORE_PAKISTAN,wikidataId/Q3308170 -KOHAT,wikidataId/Q1195983 -KHANPUR,wikidataId/Q1250097 -KARACHI,wikidataId/Q8660 -DASKA,wikidataId/Q2374990 -ISLAMABAD,wikidataId/Q1362 -HYDERABAD_PAKISTAN,wikidataId/Q1640079 -HAFIZABAD,wikidataId/Q1253663 -GUJRANWALA,wikidataId/Q243322 -FAISALABAD,wikidataId/Q173985 -CHISHTIAN,wikidataId/Q1250229 -BHAKKAR,wikidataId/Q2428259 -ATTOCK,wikidataId/Q1973073 -LARKANA,wikidataId/Q696605 -TURBAT,wikidataId/Q2977093 -SIALKOT,wikidataId/Q643883 -SHIKARPUR,wikidataId/Q1250069 -SHEIKHUPURA,wikidataId/Q972756 -SARGODHA,wikidataId/Q855997 -QUETTA,wikidataId/Q185458 -PESHAWAR,wikidataId/Q1113311 -MULTAN,wikidataId/Q185453 -MINGAWARA, -MARDAN,wikidataId/Q993859 -JHANG,wikidataId/Q1026616 -NAWABSHAH,wikidataId/Q1017637 -LA_CHORRERA, -PANAMACITY,wikidataId/Q3306 -ARRAIJAN,wikidataId/Q1841186 -PORT_MORESBY,wikidataId/Q36526 -KIMBE,wikidataId/Q59718 -WEWAK,wikidataId/Q1017763 -LAE,wikidataId/Q612250 -CIUDAD_DEL_ESTE,wikidataId/Q192235 -ASUNCION,wikidataId/Q2933 -TRUJILLO,wikidataId/Q214173 -PIURA,wikidataId/Q208183 -MOQUEGUA,wikidataId/Q1000346 -LIMA,wikidataId/Q211795 -JULIACA,wikidataId/Q696074 -CHICLAYO,wikidataId/Q260911 -IQUITOS,wikidataId/Q193289 -HUARAL,wikidataId/Q723677 -HUANCAYO,wikidataId/Q468782 -HUACHO,wikidataId/Q1002052 -CUSCO,wikidataId/Q205057 -CAJAMARCA,wikidataId/Q205078 -AYACUCHO,wikidataId/Q205112 -AREQUIPA,wikidataId/Q159273 -JAEN,wikidataId/Q1229885 -MANILA,wikidataId/Q13580 -CEBU,wikidataId/Q1467 -BACOLOD,wikidataId/Q5217 -LOMZA,wikidataId/Q215633 -LEGNICA,wikidataId/Q106274 -LEGIONOWO,wikidataId/Q730944 -KRAKOW,nuts/PL213 -KOSZALIN,wikidataId/Q62868 -GRUDZIADZ,wikidataId/Q123511 -GORZOW_WIELKOPOLSKI,wikidataId/Q104731 -KASZTELANKA, -KALISZ,wikidataId/Q52842 -GORNOSLASKI_ZWIAZEK_METROPOLITALNY, -LODZ,wikidataId/Q580 -GDANSK,wikidataId/Q1792 -ELK,wikidataId/Q61480 -ELBLAG,wikidataId/Q104712 -CZESTOCHOWA,wikidataId/Q103217 -POZNAN,wikidataId/Q268 -PLOCK,wikidataId/Q104725 -GDYNIA,wikidataId/Q385 -GLIWICE,wikidataId/Q105084 -PIOTRKOW_TRYBUNALSKI,wikidataId/Q158004 -PILA,wikidataId/Q556200 -PABIANICE,wikidataId/Q158589 -OSTROWIEC_SWIETOKRZYSKI,wikidataId/Q730929 -OSTROW_WIELKOPOLSKI,wikidataId/Q52895 -OPOLE,wikidataId/Q92212 -SZCZECIN,wikidataId/Q393 -STALOWA_WOLA,wikidataId/Q751140 -RYBNIK,wikidataId/Q107094 -RADOM,wikidataId/Q104740 -RZESZOW,wikidataId/Q598 -STARGARD_SZCZECINSKI,wikidataId/Q106268 -TYCHY,wikidataId/Q11977 -TCZEW,wikidataId/Q474697 -SUWALKI,wikidataId/Q236083 -TORUN,wikidataId/Q47554 -TOMASZOW_MAZOWIECKI,wikidataId/Q157994 -WALBRZYCH,wikidataId/Q110732 -SLUPSK,wikidataId/Q105048 -ZIELONA_GORA,wikidataId/Q104720 -ZGIERZ,wikidataId/Q104407 -ZAMOSC,wikidataId/Q145972 -WIDNICA,wikidataId/Q687963 -WARSAW,nuts/PL127 -KIELCE,wikidataId/Q102317 -KATOWICE,wikidataId/Q588 -BIALYSTOK,wikidataId/Q761 -BELCHATOW,wikidataId/Q157992 -MIELEC,wikidataId/Q658861 -LUBLIN,wikidataId/Q37333 -WROCLAW,nuts/PL514 -WLOCLAWEK,wikidataId/Q106681 -JELENIA_GORA,wikidataId/Q147934 -JASTRZEBIE_ZDROJ,wikidataId/Q107106 -INOWROCLAW,wikidataId/Q378821 -CHELM,wikidataId/Q234184 -BYDGOSZCZ,wikidataId/Q41252 -BIELSKO_BIALA,wikidataId/Q106583 -OLSZTYN,wikidataId/Q82765 -NOWY_SACZ,wikidataId/Q802 -MYSLOWICE,wikidataId/Q161595 -GLOGOW,wikidataId/Q66013 -GNIEZNO,wikidataId/Q51432 -PRZEMYSL,wikidataId/Q208473 -BARREIRO,wikidataId/Q217394 -AMORA,wikidataId/Q474039 -COIMBRA, -BRAGA, -ALMADA, -SETUBAL, -FUNCHAL, -AL_DAAYEN, -AL_KHOR_ALTHAKIRA, -ALKHOR,wikidataId/Q311717 -MESAIEED,wikidataId/Q1069900 -UMM_SLAL,wikidataId/Q990414 -QATAR_ALL,wikidataId/Q935886 -DOHA,wikidataId/Q3861 -AL_SHEEHANIYA, -AL_SHAMAL,wikidataId/Q22948561 -AL_RAYYAN,wikidataId/Q311272 -AL_WAKRA,wikidataId/Q310893 -AL_SHAHANIYA,wikidataId/Q12188815 -ARAD,wikidataId/Q173591 -BUCHAREST,wikidataId/Q19660 -CONSTANTA,wikidataId/Q79808 -FALTICENI,wikidataId/Q303015 -IASI,wikidataId/Q46852 -CLUJ_NAPOCA,wikidataId/Q100188 -CRAIOVA,wikidataId/Q168057 -CAMPIATURZII,wikidataId/Q458422 -BRASOV,wikidataId/Q82174 -NAVODARI,wikidataId/Q837960 -TARGU_JIU,wikidataId/Q202357 -MANGALIA, -SIBIU,wikidataId/Q83324 -TIMISOARA,wikidataId/Q83404 -REGHIN,wikidataId/Q572478 -ASTRAHAN,wikidataId/Q3927 -BEREZNIKI,wikidataId/Q105002 -DZERZINSK,wikidataId/Q76493 -MOSCOW,wikidataId/Q649 -SAINT_PETERSBURG,wikidataId/Q4407742 -TYUMEN,wikidataId/Q5815 -BUTARE,wikidataId/Q509739 -RUHENGERI,wikidataId/Q246145 -KIGALI,wikidataId/Q3859 -NYANZA,wikidataId/Q528679 -GITARAMA, -CYANGUGU,wikidataId/Q782857 -KAYONZA, -GISENYI,wikidataId/Q830631 -ARAR,wikidataId/Q626199 -AL_KHAFJI,wikidataId/Q1771721 -AL_MADINAH,wikidataId/Q35484 -RAFHA,wikidataId/Q27219 -MAKKAH,wikidataId/Q5806 -AR_RASS,wikidataId/Q1878991 -RIYADH,wikidataId/Q3692 -TABUK,wikidataId/Q244232 -TAIF,wikidataId/Q182640 -THIES,wikidataId/Q600693 -SAINT_LOUIS,wikidataId/Q178872 -DIOURBEL,wikidataId/Q910973 -DAKAR,wikidataId/Q3718 -KAOLACK,wikidataId/Q2082914 -MBOUR,wikidataId/Q1019431 -TOUBA,wikidataId/Q657072 -LOUGA,wikidataId/Q738061 -ZIGUINCHOR,wikidataId/Q202776 -BOKHTAR-SERBIA, -KIKINDA,wikidataId/Q309355 -ZRENJANIN,wikidataId/Q201125 -POZAREVAC,wikidataId/Q199942 -NOVI_SAD,wikidataId/Q55630 -NOVI_PAZAR,wikidataId/Q202453 -NIS,wikidataId/Q129259 -KRAGUJEVAC,wikidataId/Q167394 -BELGRADE,wikidataId/Q3711 -BORCA,wikidataId/Q712322 -SINGAPORE,country/SGP -NITRA,wikidataId/Q26397 -TRNAVA,wikidataId/Q26175 -ZILINA,wikidataId/Q25797 -BRATISLAVA,wikidataId/Q1780 -MARTIN,wikidataId/Q27001 -BANSKA_BYSTRICA,wikidataId/Q144983 -KOSICE,wikidataId/Q25409 -VINH_LONG,wikidataId/Q34789 -HO_CHI_MINH_CITY,wikidataId/Q1854 -LJUBLJANA,wikidataId/Q437 -JOHANNESBURG,wikidataId/Q2346838 -PORT_ELIZABETH,wikidataId/Q125434 -PAMPLONA,wikidataId/Q10282 -ALBACETE,wikidataId/Q15095 -ALICANTE,wikidataId/Q11959 -ALCOI,wikidataId/Q494777 -BARCELONA_SPAIN,wikidataId/Q1492 -AVILES,wikidataId/Q14649 -ALMERIA,wikidataId/Q10400 -LA_CORUNA,wikidataId/Q8757 -BENIDORM,wikidataId/Q487981 -MALAGA, -MADRID,wikidataId/Q2807 -IGUALADA,wikidataId/Q15950 -MURCIA,wikidataId/Q12225 -ZARAGOZA,wikidataId/Q10305 -TALAVERA_DE_LA_REINA,wikidataId/Q181359 -SEVILLA,wikidataId/Q8717 -SANTIAGO_DE_COMPOSTELA,wikidataId/Q14314 -SANTANDER,wikidataId/Q12233 -SANTA_MONICA, -VIGO,wikidataId/Q8745 -PARLA,wikidataId/Q824651 -PUERTO_DE_LA_CRUZ,wikidataId/Q623549 -TARRAGONA,wikidataId/Q15088 -VILANOVA_I_LA_GELTRU,wikidataId/Q15553 -VILADECANS,wikidataId/Q15652 -VALENCIA_SPAIN,wikidataId/Q8818 -VALDEMORO, -GIJON,wikidataId/Q12273 -FERROL,wikidataId/Q485329 -GRANADA_SPAIN,wikidataId/Q8810 -PALMA,wikidataId/Q8826 -CARTAGENA_SPAIN,wikidataId/Q162615 -CADIZ,wikidataId/Q15682 -PALENCIA,wikidataId/Q8378 -OVIEDO,wikidataId/Q14317 -TOLEDO_SPAIN,wikidataId/Q5836 -VALLADOLID,wikidataId/Q8356 -TORREVIEJA,wikidataId/Q221749 -TORREMOLINOS,wikidataId/Q492737 -SANT_BOI_DE_LLOBREGAT,wikidataId/Q15635 -LOGRONO,wikidataId/Q14325 -IRUN,wikidataId/Q200201 -SABADELL,wikidataId/Q12258 -REUS,wikidataId/Q487096 -COSLADA,wikidataId/Q164197 -CASTELLDEFELS,wikidataId/Q15597 -CACERES,wikidataId/Q15678 -BURGOS,wikidataId/Q9580 -BILBAO,wikidataId/Q8692 -MOLLET_DEL_VALLES,wikidataId/Q23994797 -MELILLA,nuts/ES64 -MANRESA,wikidataId/Q16697 -SANTA_CRUZ_DE_TENERIFE,wikidataId/Q14328 -SAN_SEBASTIAN,geoId/7277500 -SALAMANCA,wikidataId/Q15695 -TERRASSA,wikidataId/Q13939 -ZAMORA,wikidataId/Q15696 -VITORIA_GASTEIZ,wikidataId/Q14318 -ELCHE,wikidataId/Q10509 -EL_PRAT_DE_LLOBREGAT,wikidataId/Q15619 -EL_MASNOU,wikidataId/Q12039 -KASSALA,wikidataId/Q686813 -WAD_MADANI,wikidataId/Q852528 -SINJAH,wikidataId/Q130364 -BUR_SUDAN,wikidataId/Q208718 -ATBARA,wikidataId/Q753906 -AL_QADARIF,wikidataId/Q311199 -KHARTOUM,wikidataId/Q1963 -SANNAR,wikidataId/Q611867 -NORTHERN_AFRICA_AND_WESTERN_ASIA_ALL,wikidataId/Q24899010 -UMEA,wikidataId/Q25579 -TABY, -SODERTALJE, -HELSINGBORG,wikidataId/Q25411 -OREBRO,wikidataId/Q25732 -BORAS, -NORRKOPING, -HANINGE,wikidataId/Q113692 -GOTEBORG,wikidataId/Q25287 -STOCKHOLM,wikidataId/Q1754 -VASTERAS,wikidataId/Q25412 -UPPSALA,wikidataId/Q25286 -MALMO,wikidataId/Q2211 -LUND, -LINKOPING,wikidataId/Q25413 -EASTERN_AND_SOUTHERN_ASIA_ALL, -LUZERN,wikidataId/Q4191 -ZURICH,wikidataId/Q72 -WINTERTHUR,wikidataId/Q9125 -WETZIKON,wikidataId/Q68305 -THUN,wikidataId/Q68978 -ST_GALLEN,wikidataId/Q25607 -NEUCHATEL,wikidataId/Q69345 -BASEL,wikidataId/Q78 -EMMEN_LUCERNE,wikidataId/Q4225 -LAUSANNE,wikidataId/Q807 -GENEVA,wikidataId/Q71 -FRIBOURG,wikidataId/Q36378 -BERNE,wikidataId/Q70 -BIEL,wikidataId/Q1034 -LUGANO,wikidataId/Q7024 -PANJAKENT,wikidataId/Q630805 -KONIBODOM,wikidataId/Q1015752 -CHKALOVSK_BUSTON,wikidataId/Q2997217 -ISFARA,wikidataId/Q695330 -DUSHANBE,wikidataId/Q9365 -KHUJAND,wikidataId/Q373808 -KHOROG,wikidataId/Q467501 -ISTARAVSHAN,wikidataId/Q936510 -NURAK,wikidataId/Q1018201 -BOKHTAR,wikidataId/Q648567 -VAHDAT,wikidataId/Q1018025 -TURSUNZODA,wikidataId/Q648709 -KULOB,wikidataId/Q132043 -CHIANG_MAI,wikidataId/Q233588 -CHA_AM,wikidataId/Q927810 -BANGKOK,wikidataId/Q1861 -KHON_KAEN,wikidataId/Q327533 -CHUMPHON,wikidataId/Q244695 -CHIANG_RAI,wikidataId/Q236419 -PHATTHALUNG,wikidataId/Q179539 -AL_AIN,wikidataId/Q234600 -DUBAI, -ABU_DHABI, -AL_QAYRAWAN,wikidataId/Q179570 -TUNIS,wikidataId/Q3572 -TOZEUR,wikidataId/Q504661 -SUSAH, -SFAX,wikidataId/Q46325 -QABIS,wikidataId/Q215661 -MONASTIR,wikidataId/Q208715 -BANZART,wikidataId/Q189546 -ANTALYA,wikidataId/Q6487 -ADIYAMAN,wikidataId/Q168197 -KAYSERI,wikidataId/Q48338 -BALIKESIR,wikidataId/Q199723 -GAZIANTEP,wikidataId/Q93338 -DENIZLI,wikidataId/Q170967 -CORUM,wikidataId/Q206900 -KONYA,wikidataId/Q79857 -IZMIR,wikidataId/Q35997 -ISTANBUL, -CERKEZKOY,wikidataId/Q272800 -CARASAMBA, -BURSA,wikidataId/Q40738 -VIRANSEHIR,wikidataId/Q497731 -UZUNKOPRU,wikidataId/Q775225 -TARSUS,wikidataId/Q134287 -SIVAS,wikidataId/Q107401 -SANLIURFA,wikidataId/Q133118 -SAMSUN,wikidataId/Q160450 -MALATYA,wikidataId/Q165995 -ANKARA,wikidataId/Q3640 -AGRI,nuts/TRA21 -ELBISTAN, -ADAPAZARI,wikidataId/Q175323 -NIGDE,wikidataId/Q194146 -TURKMENBASY,wikidataId/Q488987 -TURKMENABAT,wikidataId/Q487684 -MARY,wikidataId/Q5713 -DASOGUZ,wikidataId/Q487672 -BAYRAMALY_BAJRAM_ALI, -BALKANABAT,wikidataId/Q199762 -ASHGABAT,wikidataId/Q23438 -GYZYLARBAT,wikidataId/Q1015618 -TEJEN,wikidataId/Q1983850 -MBARARA,wikidataId/Q731039 -MBALE,wikidataId/Q1015727 -MASAKA,wikidataId/Q1032233 -KASESE,wikidataId/Q1232016 -KAMPALA,wikidataId/Q3894 -JINJA,wikidataId/Q501709 -GULU,wikidataId/Q581379 -LIRA,wikidataId/Q1015699 -ROVNO,wikidataId/Q156739 -NIKOLAEV,wikidataId/Q41572 -DIYARB_NAJM, -CAIRO,wikidataId/Q85 -ALEXANDRIA,wikidataId/Q87 -AL_ZAQAZIQ,wikidataId/Q140304 -AL_QHURDAQAH, -AL_MANSHAH, -PORT_SAID,wikidataId/Q134509 -ASYUT,wikidataId/Q29962 -SEFTON,wikidataId/Q991747 -SCUNTHORPE,wikidataId/Q913386 -ABERDEEN,wikidataId/Q36405 -BLANTYRE_HAMILTON,wikidataId/Q881708 -BLACKPOOL,wikidataId/Q170377 -BLACKBURN,wikidataId/Q188313 -BARNSLEY,wikidataId/Q1857382 -BOURNEMOUTH,wikidataId/Q170478 -AYLESBURY,wikidataId/Q213474 -ASHFORD,wikidataId/Q725261 -ALDWICK_FELPHAM,wikidataId/Q2227185 -BANGOR,wikidataId/Q234178 -CHESTER,wikidataId/Q170263 -CHELTENHAM,wikidataId/Q206988 -BELFAST,wikidataId/Q10686 -BEDFORD,nuts/UKH24 -BATH,wikidataId/Q22889 -BASINGSTOKE_AND_DEANE,wikidataId/Q810185 -CREWE,wikidataId/Q648810 -DACORUM,wikidataId/Q931180 -HARLOW,wikidataId/Q852729 -HALTON, -HALIFAX_UK,wikidataId/Q826561 -GUILDFORD,wikidataId/Q213465 -HUDDERSFIELD,wikidataId/Q201812 -HARROGATE,wikidataId/Q215829 -HARTLEPOOL,wikidataId/Q215752 -HASTINGS,wikidataId/Q29245 -CHESTERFIELD,wikidataId/Q823600 -COLCHESTER,wikidataId/Q184163 -COATBRIDGE,wikidataId/Q1018723 -CHELMSFORD,wikidataId/Q210985 -BRACKNELL,wikidataId/Q783210 -CANNOCK_HEDNESFORD, -CORBY,wikidataId/Q994925 -BURNLEY,wikidataId/Q209096 -BRISTOL,nuts/UKK11 -BRIGHTON,wikidataId/Q131491 -CAMBRIDGE,wikidataId/Q350 -BURTON_ON_TRENT,wikidataId/Q1000597 -CARLISLE,wikidataId/Q192896 -CARDIFF,wikidataId/Q24342199 -WARRINGTON,wikidataId/Q894095 -LIVERPOOL,nuts/UKD72 -LIVINGSTON,wikidataId/Q848287 -TAMWORTH,wikidataId/Q704864 -TYNESIDE_CONURBATION,wikidataId/Q1120443 -WASHINGTON,wikidataId/Q1018448 -WARWICK,wikidataId/Q549761 -TELFORD,wikidataId/Q576938 -TAUNTON,wikidataId/Q845619 -WAKEFIELD,nuts/UKE45 -WORCESTER,wikidataId/Q1646181 -WOKING,wikidataId/Q646225 -WIGAN_SKELMERSDALE, -TORBAY,wikidataId/Q209055 -THURROCK,nuts/UKH32 -THATCHAM,wikidataId/Q1026304 -NORWICH,wikidataId/Q130191 -NORTHAMPTON,wikidataId/Q192240 -TUNBRIDGE_WELLS,wikidataId/Q665489 -WYCOMBE,wikidataId/Q548974 -SWINDON,wikidataId/Q894093 -SWANSEA,nuts/UKL18 -NEWPORT,wikidataId/Q11294004 -NEWCASTLE_UNDER_LYME,wikidataId/Q868642 -YORK,wikidataId/Q20986421 -LUTON,nuts/UKH21 -LOUGHBOROUGH,wikidataId/Q537323 -LONDON_UK,nuts/UKI -PLYMOUTH,wikidataId/Q21674890 -MAIDSTONE,wikidataId/Q213180 -MAIDENHEAD,wikidataId/Q1368496 -OXFORD,wikidataId/Q34217 -NUNEATON,wikidataId/Q175632 -MANSFIELD,wikidataId/Q841979 -MANCHESTER_UK,nuts/UKD33 -MEDWAY,nuts/UKJ41 -MILTON_KEYNES,wikidataId/Q894090 -PORTSMOUTH,wikidataId/Q21683233 -SLOUGH,wikidataId/Q211907 -RUSTINGTON,wikidataId/Q846403 -REDHILL_REIGATE,wikidataId/Q1851095 -ROTHERHAM,wikidataId/Q1878732 -SUTTON_IN_ASHFIELD,wikidataId/Q2119187 -STOCKTON_ON_TEES,wikidataId/Q894094 -READING,wikidataId/Q161491 -PRESTON,wikidataId/Q184090 -SHEFFIELD,nuts/UKE32 -SHREWSBURY,wikidataId/Q201970 -STEVENAGE,wikidataId/Q19795 -STAFFORD,wikidataId/Q826782 -ST_ALBANS,wikidataId/Q6226 -SOUTHEND_ON_SEA,nuts/UKH31 -SOUTHAMPTON,nuts/UKJ32 -DERRY,wikidataId/Q163584 -DERBY,nuts/UKF11 -EXETER,wikidataId/Q134672 -GLASGOW,nuts/UKM34 -ELLESMERE_PORT,wikidataId/Q1011600 -GREAT_YARMOUTH,wikidataId/Q237253 -GRAVESHAM,wikidataId/Q1459781 -FALKIRK,wikidataId/Q623687 -FRIMLEY_FARNBOROUGH,wikidataId/Q1424548 -GLOUCESTER,wikidataId/Q170497 -DUNDEE,wikidataId/Q123709 -DONCASTER,wikidataId/Q1925846 -EASTBOURNE,wikidataId/Q208262 -EAST_KILBRIDE,wikidataId/Q654226 -EDINBURGH,nuts/UKM25 -KETTERING,wikidataId/Q984613 -KEIGHLEY,wikidataId/Q990168 -GRIMSBY_CLEETHORPES,wikidataId/Q587765 -IPSWICH,wikidataId/Q184775 -KINGSTON_UPON_HULL,nuts/UKE11 -KIDDERMINSTER,wikidataId/Q844062 -LEICESTER,nuts/UKF21 -LEEDS_BRADFORD, -HYNDBURN,wikidataId/Q1640713 -LINCOLN,wikidataId/Q180057 -BIRMINGHAM,nuts/UKG31 -AYR_PRESTWICK, -BASILDON,wikidataId/Q216649 -HEREFORD,wikidataId/Q204720 -CRAWLEY,wikidataId/Q844908 -COVENTRY,wikidataId/Q20986417 -WESTON_SUPER_MARE,wikidataId/Q845623 -WAVENEY,wikidataId/Q642424 -THANET,wikidataId/Q1752642 -WORTHING,wikidataId/Q671348 -MOTHERWELL_WISHAW,wikidataId/Q737547 -PETERBOROUGH_UK,wikidataId/Q12956645 -RUGBY,wikidataId/Q623765 -REDDITCH,wikidataId/Q865716 -DARLINGTON,wikidataId/Q213181 -GREATER_MANCHESTER,nuts/UKD3 -NOTTINGHAM,nuts/UKF14 -EASTLEIGH,wikidataId/Q731069 -ARUSHA,wikidataId/Q4301 -MODESTO,geoId/0648354 -MONTGOMERY,geoId/0151000 -NEWYORK,geoId/3651000 -CHICAGO,geoId/1714000 -GAINESVILLE_FL,geoId/1225175 -KILLEEN_TX,geoId/4839148 -MANCHESTER_US,geoId/3345140 -MINNEAPOLIS,geoId/2743000 -RALEIGH,geoId/3755000 -PORTLAND,geoId/4159000 -PHILADELPHIA,geoId/4260000 -TOLEDO_US,geoId/3977000 -TALLAHASEE,geoId/1270600 -SPRINGFIELD,geoId/2970000 -SAVANNAH,geoId/1369000 -VISALIA,geoId/0682954 -WACO,geoId/4876000 -CLOVIS,geoId/0614218 -OUAGADOUGOU,wikidataId/Q3777 -MELO,wikidataId/Q738250 -MERCEDES,wikidataId/Q844914 -MONTEVIDEO,wikidataId/Q1335 -RIVERA,wikidataId/Q646498 -PAYSANDU,wikidataId/Q1020814 -MALDONALDO,wikidataId/Q16258 -LAS_PIEDRAS,wikidataId/Q615470 -SALTO, -TACUAREMBO,wikidataId/Q833016 -BESHARYK,wikidataId/Q891824 -NAVOIY,wikidataId/Q763015 -ANDIJAN,wikidataId/Q487656 -URGANCH,wikidataId/Q472925 -TURTKUL,wikidataId/Q1027623 -TERMIZ,wikidataId/Q491879 -QARSHI,wikidataId/Q644008 -TASHKENT,wikidataId/Q269 -NUKUS,wikidataId/Q489898 -BEKOBOD,wikidataId/Q815053 -NAMANGAN,wikidataId/Q492552 -BUKHARA,wikidataId/Q5764 -DENAU,wikidataId/Q1189758 -KOKAND,wikidataId/Q489890 -YARITAGUA,wikidataId/Q1023493 -MARIARA,wikidataId/Q1816684 -CIUDAD_GUAYANA,wikidataId/Q507080 -CIUDAD_OJEDA,wikidataId/Q953490 -CABUDARE,wikidataId/Q580968 -SAN_CRISTOBAL, -CABIMAS,wikidataId/Q723588 -SAN_JUAN_DE_LOS_MORROS,wikidataId/Q999117 -CUMANA, -GUARENAS_AND_GUATIRE,wikidataId/Q1026987 -LOS_TEQUES,wikidataId/Q695631 -CORO,wikidataId/Q1134454 -CARACAS,wikidataId/Q1533 -MARACAIBO,wikidataId/Q10324879 -MARACAY,wikidataId/Q333928 -CUA,wikidataId/Q770309 -BARCELONA_VENEZUELA, -ACARIGUA,wikidataId/Q338059 -ALTAGRACIA_DE_ORITUCO,wikidataId/Q433758 -VALERA,wikidataId/Q1010367 -VALENCIA_VENEZUELA,wikidataId/Q54880 -BARQUISIMETO,wikidataId/Q4709 -TUCUPITA,wikidataId/Q179007 -BARINAS,wikidataId/Q695623 -CIUDAD_BOLIVAR,wikidataId/Q26906 -SACARLOS_DEL_ZULIA,wikidataId/Q1005022 -APIA,wikidataId/Q36260 -ADAN,wikidataId/Q131694 -YARIM,wikidataId/Q568905 -DHAMAR,wikidataId/Q955523 -AMRAN,wikidataId/Q275720 -AL_HUDAYDAH,wikidataId/Q270041 -RADA_A,wikidataId/Q2125362 -TAIZZ,wikidataId/Q466216 -SANAA,wikidataId/Q2471 -TARIM,wikidataId/Q1014010 -NDOLA,wikidataId/Q219671 -TIRANA,wikidataId/Q19689 -ANDORRA_ALL,country/AND -WINDHOEK, -ANTIGUA, -BAHRAIN_ALL,country/BHR -ARMENIA_ALL,country/ARM -BERMUDA,country/BMU -LAPAZ,wikidataId/Q1491 -BOSNIA_AND_HERZEGOVINA_ALL,country/BIH -GABORONE,wikidataId/Q3919 -CIGRES, -BELMOPAN,wikidataId/Q3043 -BELARUS_ALL,country/BLR -YAOUNDE,wikidataId/Q3808 -PRAIA,wikidataId/Q3751 -DEHIWALA-MOUNTLAVINIA, -MORATUWA,wikidataId/Q867213 -SANTIAGO_PROVIDENCIA,wikidataId/Q51587 -LOSLAGOS_PUERTOMONTT,wikidataId/Q36214 -COQUIMBO_COQUIMBO,wikidataId/Q3871 -TARAPACA_ARICA, -CANET, -KUNMING,wikidataId/Q182852 -SUZHOU,wikidataId/Q360643 -LANZHOU,wikidataId/Q183584 -BELLO,wikidataId/Q816024 -BUENAVENTURA,wikidataId/Q996581 -PEREIRA,wikidataId/Q51111 -ITAGUI,wikidataId/Q1527934 -DOSQUEBRADAS,wikidataId/Q1093584 -CUCUTA,wikidataId/Q216847 -CIEGODEAVILA,wikidataId/Q115465 -COTONOU,wikidataId/Q43595 -COPENHAGEN,wikidataId/Q1748 -SANTADOMINGO,wikidataId/Q34820 -LALIBERTAD_NUEVASANSALVADOR, -SANSALVADOR_SOYAPANGO,wikidataId/Q956031 -SANSALVADOR_MEJICANOS,wikidataId/Q723411 -SANSALVADOR_ILOPANGO,wikidataId/Q1659066 -SANSALVADOR_APOPA,wikidataId/Q619646 -HELSINKI,wikidataId/Q1757 -KUTAISI,wikidataId/Q172415 -BATUMI,wikidataId/Q25475 -TBILISI,wikidataId/Q994 -ATHENSGAA,wikidataId/Q1524 -CONAKRY,wikidataId/Q3733 -GEORGETOWN,wikidataId/Q10717 -LESCAYES,wikidataId/Q984786 -PETIONVILLE,wikidataId/Q1001440 -CHINA_HONGKONG_SAR_ALL,country/HKG -DELHI,wikidataId/Q987 -BANGALORE,wikidataId/Q1355 -WARANGAL,wikidataId/Q28169759 -JAKARTA,wikidataId/Q3630 -ABIDJAN,wikidataId/Q19830972 -KOSOVO_ALL,wikidataId/Q786124 -BISHEKEK,wikidataId/Q9361 -MASERU,wikidataId/Q3909 -MONROVIA,wikidataId/Q3748 -CHINA_MACAO_SAR_ALL,country/MAC -MALDIVES,country/MDV -NOUAKCHOTT,wikidataId/Q3688 -CUREPIPE,wikidataId/Q1002525 -PORTLOUIS,wikidataId/Q3929 -CHETUMAL_OTHONP_BLANCO_QUINTANAROO,wikidataId/Q459553 -CHILPANCINGO_GUERRERO,wikidataId/Q207935 -CUAUTLA_MORELOS,wikidataId/Q939959 -CUAUHTEMOC_CHIHUAHUA,wikidataId/Q645293 -CORDOBA_VERACRUZ,wikidataId/Q989492 -COLIMA,wikidataId/Q61309 -COATZACOALCOS_VERACRUZ,wikidataId/Q502023 -CELAYA_GUANAJUATO,wikidataId/Q580649 -CARMEN_CAMPECHE,wikidataId/Q991435 -IGUALA_GUERRERO,wikidataId/Q607167 -ZACATECAS_ZACATECAS,wikidataId/Q139242 -VICTORIA_TAMAULIPAS,wikidataId/Q331661 -ZAMORA_MICHOACAN,wikidataId/Q145865 -ZAPOPAN_JALISCO,wikidataId/Q147402 -ZITACUARO_MICHOACAN,wikidataId/Q1962272 -HIDALGO_MICHOACAN, -HERMOSILLO_SONORA,wikidataId/Q189138 -GUAYMAS_SONORA,wikidataId/Q985521 -GUASAVE_SINALOA,wikidataId/Q588515 -CARDENAST_TABASCO,wikidataId/Q5795966 -CANCUN,wikidataId/Q8969 -ALTAMIRA_TAMAULIPAS,wikidataId/Q434225 -AGUASCALIENTES,wikidataId/Q79952 -CHALCOMEX,wikidataId/Q1962104 -ACUNACOAHUILA,wikidataId/Q179841 -HIDALGODELPARRAL_CHIHUAHUA,wikidataId/Q771214 -GUADALUPE_ZACATECAS,wikidataId/Q1961298 -GUADALUPE_NUEVOLEON,wikidataId/Q938835 -HUIXQUILUCAN_MEXICO,wikidataId/Q20146689 -CHIHUAHUA_CHIHUAHUA,wikidataId/Q61302 -CIUDADOBREGON,wikidataId/Q681340 -CORREGIDORA,wikidataId/Q1962054 -MONACO_ALL,country/MCO -MONTENEGRO_ALL,country/MNE -GHORAI,wikidataId/Q1447290 -AMSTERDAM-UTRECHT,wikidataId/Q727 -MARSHALLISLANDS,country/MHL -LIMA_CARABAYLLO, -UCAYALI_CALLERIA,wikidataId/Q2486015 -LIMA_PUENTEPIEDRA,wikidataId/Q3303771 -LIMA_LURIGANCHO,wikidataId/Q2718534 -JUNIN_ELTAMBO,wikidataId/Q5352223 -LIMA_LOSOLIVOS,wikidataId/Q2100996 -LIMA_CHORRILLOS,wikidataId/Q2321093 -LIMA_SANJUANDELURIGANCHO,wikidataId/Q2501691 -LIMA_COMAS,wikidataId/Q1113426 -LIMA_ATE,wikidataId/Q3769630 -CALLAO_CALLAOCERCADO, -LIMA_SANBORJA,wikidataId/Q2566267 -LIMA_VILLAMARIADELTRIUNFO,wikidataId/Q2038962 -PIURA_CASTILLA,wikidataId/Q3826215 -LAHORE_PHILIPPINES, -LISBON,wikidataId/Q597 -ANGUILLA,geoId/2801500 -CASTRIES,wikidataId/Q41699 -CAPETOWN,wikidataId/Q1185115 -DURBAN,wikidataId/Q5468 -STEVE_TSHWETE,wikidataId/Q2292410 -HARARE,wikidataId/Q3921 -DAMASCUS,wikidataId/Q3766 -LOME,wikidataId/Q3792 -SOUSSE, -NORTH_MACEDONIA_ALL, -MOSHI,wikidataId/Q271733 -DARESSALAAM,wikidataId/Q1960 -TOMPKINS_COUNTY,geoId/36109 -SANFRANSCISCO,geoId/0667000 -LUSAKA,wikidataId/Q3881 -NORTHERN_GOVERNORATE,wikidataId/Q840445 -CAPITAL_GOVERNORATE,wikidataId/Q528953 -MUHARRAQ_GOVERNORATE,wikidataId/Q375630 -SOUTHERN_GOVERNORATE,wikidataId/Q838532 -FRANCISTOWN,wikidataId/Q165422 -INDAIATUBA_CORREGO, -PORTO_ALEGRE_AND_NOVO_HAMBURGO,wikidataId/Q40269 -BELEM_ANANINDEUA, -CAMPINAS_PAULINIA, -SUINING,wikidataId/Q426644 -YULIN,wikidataId/Q571918 -ARAUCA,wikidataId/Q626543 -VILLAVICENCIO,wikidataId/Q749224 -BABAHOYO,wikidataId/Q797568 -TOULOUSSE,wikidataId/Q7880 -CHOLOMA,wikidataId/Q998218 -ICELAND_ALL,wikidataId/Q1764 -BAREILLY,wikidataId/Q1797378 -PATI,wikidataId/Q10622 -SUBANG, -SAN_LUIS_RIO_COLORADO,wikidataId/Q995380 -SALTILLO,wikidataId/Q53077 -OCEANIA_EXCL_AUSTRALIA_AND_NEWZEALAND_ALL, -AL_KHOR,wikidataId/Q1156471 -HAIL,wikidataId/Q675568 -AT_TAIF,wikidataId/Q182640 -JIDDAH,wikidataId/Q374365 -KHAMIS_ABHA, -AD_DAMMAM,wikidataId/Q160320 -AL_HUFUF,wikidataId/Q27136 -AL_JUBAYL,wikidataId/Q27430 -AL_KHARJ,wikidataId/Q2162128 -ALFASHIR,wikidataId/Q311204 -WINTERHUR,wikidataId/Q9125 -AL_FUJAYRAH,wikidataId/Q4091 -RAS_AL_KHAIMAH, -HOUSTON,geoId/4835000 -CLEVELAND,geoId/3916000 -NAVOI_KARMANA, -BARCELONA_AND_PUERTO_LA_CRUZ,wikidataId/Q379509 diff --git a/scripts/un/sdg/cities.py b/scripts/un/sdg/cities.py deleted file mode 100644 index d9a7b13ad5..0000000000 --- a/scripts/un/sdg/cities.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -'''Finds dcids for cities. - -Produces: -* cities.csv: dcid for each city code - -There are a few city codes that are still missing. -These can be manually filled in and verified. -**This script ideally shouldn't need to be run again.** - -Usage: python3 cities.py -''' -import csv -import requests -import pandas as pd -import sys - - -def get_cities(json, api_key): - '''Applies find entities API for given json. - - Args: - json: Input json. - api_key: API key. - - Returns: - API response. - ''' - return requests.post('https://api.datacommons.org/v1/bulk/find/entities', - headers={ - 'X-API-Key': api_key - }, - json=json).json() - - -def write_cities(file, cities, api_key): - '''Writes city codes and names to file. - - Args: - file: Output file path. - cities: Map of city names to codes. - api_key: API key. - ''' - with open(file, 'w') as f: - writer = csv.DictWriter(f, fieldnames=['name', 'dcid']) - writer.writeheader() - for city in list(cities.keys()): - json = {'entities': [{'description': city}]} - response = get_cities(json, api_key) - try: - for entity in response['entities']: - dcid = entity['dcids'][0] if 'dcids' in entity else '' - writer.writerow({ - 'name': cities[entity['description']], - 'dcid': dcid - }) - except KeyError: - writer.writerow({'name': cities[city], 'dcid': ''}) - - -if __name__ == '__main__': - df = pd.read_excel(f'sdg-dataset/output/SDG_cities_enumeration.xlsx') - cities = {} - for _, row in df.iterrows(): - cities[row['CITY_NAME'] + ', ' + row['GEO_AREA_NAME'].replace( - '_', ' ').title()] = row['CITY_CODE'] - write_cities('cities_test.csv', cities, sys.argv[1]) diff --git a/scripts/un/sdg/cities_test.py b/scripts/un/sdg/cities_test.py deleted file mode 100644 index aafee5f17e..0000000000 --- a/scripts/un/sdg/cities_test.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -'''Tests for cities.py. - -Usage: python3 -m unittest discover -v -s ../ -p "cities_test.py" -''' -import os -import sys -import tempfile -import unittest -from unittest import mock - -sys.path.append( - os.path.dirname(os.path.dirname(os.path.dirname( - os.path.abspath(__file__))))) -from un.sdg import cities - -module_dir_ = os.path.dirname(__file__) - -CITIES = { - 'Mazār-e Sharīf, Afghanistan': 'AF_MAZAR_E_SHARIF', -} -RESPONSE = { - 'entities': [{ - 'description': 'Mazār-e Sharīf, Afghanistan', - 'dcids': ['wikidataId/Q130469'] - }] -} - - -class CitiesTest(unittest.TestCase): - - def test_write_cities(self): - with tempfile.TemporaryDirectory() as tmp_dir: - cities.get_cities = mock.Mock(return_value=RESPONSE) - output = os.path.join(tmp_dir, 'output.csv') - cities.write_cities(output, CITIES, '') - with open(output) as result: - with open( - os.path.join( - module_dir_, - 'testdata/expected_cities.csv')) as expected: - self.assertEqual(result.read(), expected.read()) - - -if __name__ == '__main__': - unittest.main() diff --git a/scripts/un/sdg/geography.py b/scripts/un/sdg/geography.py new file mode 100644 index 0000000000..ce551bc0e9 --- /dev/null +++ b/scripts/un/sdg/geography.py @@ -0,0 +1,362 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +'''Generates geographies for UN places. + +Produces: +* un_places.mcf (place definitions) +* un_containment.mcf (place containment triples) +* place_mappings.csv (SDG code -> dcid) + +Usage: python3 geography.py +''' +import collections +import csv +import json +import os + +# Output folder. +FOLDER = 'geography' + +PLACE_TEMPLATE = ''' +Node: dcid:{dcid} +typeOf: dcs:{type} +name: "{name}" +unDataCode: "{code}" +unDataLabel: "{label}" +''' +CONTAINMENT_TEMPLATE = ''' +Node: dcid:{dcid} +typeOf: dcs:{type}{containment} +''' + +# Curated map of dcid to SDG code to avoid duplicates. +FIXED = { + # Africa. + 'africa': '2', + # Source geographies without a corresponding geography in UNdata. + 'undata-geo/G99999999': '952', +} + +# Geography types. +CITY = 'City' +CONTINENT = 'Continent' +COUNTRY = 'Country' +GEO_REGION = 'GeoRegion' +SAMPLING_STATION = 'SamplingStation' +UN_GEO_REGION = 'UNGeoRegion' + +# UN geography prefix. +UN_PREFIX = 'undata-geo' + + +# Simplified representation of DC MCF Node. +class Node: + + def __init__(self, dcid, type, name): + self.dcid = dcid + self.type = type + self.name = name + + def __eq__(self, other): + if not isinstance(other, Node): + return NotImplemented + + return self.dcid == other.dcid and self.type == other.type and self.name == other.name + + def __str__(self): + return self.dcid + self.type + self.name + + def __hash__(self): + return (hash(str(self))) + + def __lt__(self, other): + if not isinstance(other, Node): + return NotImplemented + + return str(self) < str(other) + + +def get_sdg2type(file): + '''Produces map of SDG code -> SDG type. + + Args: + file: Input file path. + + Returns: + Map of SDG code -> SDG type. + ''' + sdg2type = {} + with open(file) as f: + reader = csv.DictReader(f) + for row in reader: + sdg2type[row['GEOGRAPHY_CODE']] = row['GEOGRAPHY_TYPE'] + return sdg2type + + +def get_sdg_un_maps(file): + '''Produces maps of UN code -> SDG code & SDG code -> UN code. + + Args: + file: Input file path. + + Returns: + - Map of UN code -> SDG code. + - Map of SDG code -> UN code. + ''' + un2sdg = {} # Map of UN code -> SDG code. + sdg2un = {} # Map of SDG code -> UN code. + + # Use special encoding to parse UN input file. + with open(file, encoding='utf-8-sig') as f: + reader = csv.DictReader(f) + for row in reader: + subject = row['subject_id'] # UN code. + object = row['object_id'].removeprefix('sdg-geo:') # SDG code. + if not subject or not object: + continue + un2sdg[subject] = object + sdg2un[object] = subject + return un2sdg, sdg2un + + +def get_un2dc_curated(file): + '''Produces map of UN code -> curated Node. + + Args: + file: Input file path. + + Returns: + Map of UN code -> curated Node. + ''' + un2dc_curated = {} + with open(file) as f: + reader = csv.DictReader(f) + for row in reader: + + # Skip unmapped places. + if row['unDataCode'] == 'x': + continue + + # Add missing type for NorthernEurope. + if row['dcid'] == 'NorthernEurope': + type = UN_GEO_REGION + + else: + type = json.loads(row['typeOf'].replace("'", '"'))[0]['dcid'] + un2dc_curated[row['unDataCode']] = Node(row['dcid'], type, + row['dc_name']) + return un2dc_curated + + +def should_include_containment(s, o): + '''Returns whether triple should be included in containment. + + Args: + s: Subject node. + o: Object node. + + Returns: + Whether triple should be included in containment. + ''' + if (s.type == GEO_REGION or s.type == UN_GEO_REGION) and o.dcid == 'Earth': + return True + elif (s.type == GEO_REGION or + s.type == UN_GEO_REGION) and o.type == CONTINENT: + return True + elif (s.type == GEO_REGION or + s.type == UN_GEO_REGION) and (o.type == GEO_REGION or + o.type == UN_GEO_REGION): + return True + elif s.type == COUNTRY and (o.type == GEO_REGION or + o.type == UN_GEO_REGION): + return True + elif s.type == SAMPLING_STATION and o.type == COUNTRY: + return True + elif s.type == CITY and s.dcid.startswith(UN_PREFIX) and o.type == COUNTRY: + return True + return False + + +def write_un_places(input_geos, output, sdg2type, un2sdg, un2dc_curated): + '''Writes UN places to output and computes new places. + + Args: + input_geos: Path to input UN geography file. + output: Path to output file. + sdg2type: Map of SDG code -> SDG type. + un2sdg: Map of UN code -> SDG code. + un2dc_curated: Map of UN code -> curated Node. + + Returns: + - Map of UN code -> generated Node. + - List of (dcid, type) for new places. + ''' + un2dc_generated = {} + new_subjects = [] + with open(input_geos) as f_in: + with open(output, 'w') as f_out: + reader = csv.DictReader(f_in) + for row in reader: + subject = row['subject_id'] + if subject in un2dc_curated: + dcid = un2dc_curated[subject].dcid + type = un2dc_curated[subject].type + name = un2dc_curated[subject].name + else: + dcid = row['subject_id'].replace(':', '/') + if row['subject_id'] in un2sdg and un2sdg[ + row['subject_id']] in sdg2type: + sdg_type = sdg2type[un2sdg[row['subject_id']]] + if sdg_type == SAMPLING_STATION or sdg_type == CITY: + type = sdg_type + else: + type = GEO_REGION + else: + type = GEO_REGION + name = row['subject_label'].split('_')[-1] + un2dc_generated[subject] = Node(dcid, type, name) + + # Add non-UN-specific places to new_subjects. + if type == GEO_REGION or type == UN_GEO_REGION or type == SAMPLING_STATION or ( + type == CITY and dcid.startswith(UN_PREFIX)): + new_subjects.append(Node(dcid, type, name)) + + f_out.write( + PLACE_TEMPLATE.format_map({ + 'dcid': dcid, + 'type': type, + 'name': name, + 'code': row['subject_id'], + 'label': row['subject_label'] + })) + return un2dc_generated, new_subjects + + +def process_containment(input_containment, un2dc_curated, un2dc_generated): + '''Filters UN geography containment triples. + + Args: + input_containment: Path to input containment file. + un2dc_curated: Map of UN code -> curated Node. + un2dc_generated: Map of UN code -> generated Node. + + Returns: + - Map of child Node -> list of containing object dcids. + ''' + containment = collections.defaultdict(list) + + # Use special encoding to parse UN input file. + with open(input_containment, encoding='utf-8-sig') as f: + reader = csv.DictReader(f) + for row in reader: + subject = UN_PREFIX + ':' + row['subject_id'] + if subject in un2dc_curated: + s = un2dc_curated[subject] + elif subject in un2dc_generated: + s = un2dc_generated[subject] + else: + print('Missing subject: ', subject) + object = UN_PREFIX + ':' + row['object_id'] + if object in un2dc_curated: + o = un2dc_curated[object] + elif object in un2dc_generated: + o = un2dc_generated[object] + else: + print('Missing object: ', object) + if should_include_containment(s, o): + containment[s].append(o.dcid) + return containment + + +def write_un_containment(output, containment, new_subjects): + '''Writes containment triples to output. + + Args: + output: Path to output file. + containment: Map of child Node -> list of containing object dcids. + new_subjects: List of Nodes for new places. + + ''' + with open(output, 'w') as f: + for s in sorted(containment): + c = '' + for o in containment[s]: + c += '\ncontainedInPlace: dcid:' + o + f.write( + CONTAINMENT_TEMPLATE.format_map({ + 'dcid': s.dcid, + 'type': s.type, + 'containment': c + })) + + # For new places with no specified containment, add containment in + # Earth. + for s in sorted(new_subjects): + if s in containment: + continue + c = '\ncontainedInPlace: dcid:Earth' + f.write( + CONTAINMENT_TEMPLATE.format_map({ + 'dcid': s.dcid, + 'type': s.type, + 'containment': c + })) + + +def write_place_mappings(output, sdg2un, un2dc_curated, un2dc_generated): + '''Writes SDG code -> dcid mappings to output. + + Args: + output: Path to output file. + sdg2un: Map of SDG code -> UN code. + un2dc_curated: Map of UN code -> curated Node. + un2dc_generated: Map of UN code -> generated Node. + ''' + with open(output, 'w') as f: + writer = csv.DictWriter(f, fieldnames=['sdg', 'dcid']) + writer.writeheader() + for code in sorted(sdg2un): + un = sdg2un[code] + if un in un2dc_curated: + dcid = un2dc_curated[un].dcid + elif un in un2dc_generated: + dcid = un2dc_generated[un].dcid + else: + continue + + # Filter duplicates. + if dcid in FIXED and code != FIXED[dcid]: + continue + + writer.writerow({'sdg': code, 'dcid': dcid}) + + +if __name__ == '__main__': + + # Read input geography mappings. + sdg2type = get_sdg2type('sdg-dataset/output/SDG_geographies.csv') + un2sdg, sdg2un = get_sdg_un_maps( + 'sssom-mappings/output_mappings/undata-geo__sdg-geo.csv') + un2dc_curated = get_un2dc_curated(os.path.join(FOLDER, 'places.csv')) + + un2dc_generated, new_subjects = write_un_places( + os.path.join(FOLDER, 'geographies.csv'), + os.path.join(FOLDER, 'un_places.mcf'), sdg2type, un2sdg, un2dc_curated) + containment = process_containment( + 'sssom-mappings/data/enumerations/undata/geography_hierarchy.csv', + un2dc_curated, un2dc_generated) + write_un_containment(os.path.join(FOLDER, 'un_containment.mcf'), + containment, new_subjects) + write_place_mappings(os.path.join(FOLDER, 'place_mappings.csv'), sdg2un, + un2dc_curated, un2dc_generated) diff --git a/scripts/un/sdg/geography/geographies.csv b/scripts/un/sdg/geography/geographies.csv new file mode 100644 index 0000000000..043f711775 --- /dev/null +++ b/scripts/un/sdg/geography/geographies.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c07516b4b9647c700124b0b6056d832a267ff9e3539dcb95633462fcc119f9f2 +size 340006 diff --git a/scripts/un/sdg/geography/place_mappings.csv b/scripts/un/sdg/geography/place_mappings.csv new file mode 100644 index 0000000000..25fb9a8b1a --- /dev/null +++ b/scripts/un/sdg/geography/place_mappings.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:148d9b119025d471f8bb20b8e1f42ebe4a5de5ed000e8e21e8662b3d70eec241 +size 251752 diff --git a/scripts/un/sdg/geography/places.csv b/scripts/un/sdg/geography/places.csv new file mode 100644 index 0000000000..1f5fa06dab --- /dev/null +++ b/scripts/un/sdg/geography/places.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6ac35a09404d989e58ff9e48c7e56e59b035aca412587e1e60dc9f9f99f8eaa +size 1036036 diff --git a/scripts/un/sdg/geography/un_containment.mcf b/scripts/un/sdg/geography/un_containment.mcf new file mode 100644 index 0000000000..a807fad634 --- /dev/null +++ b/scripts/un/sdg/geography/un_containment.mcf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e56924da8334c67fafc9ee56b8c750d20bd753e47ec01669e7686d278305455 +size 367508 diff --git a/scripts/un/sdg/geography/un_places.mcf b/scripts/un/sdg/geography/un_places.mcf new file mode 100644 index 0000000000..97c436963d --- /dev/null +++ b/scripts/un/sdg/geography/un_places.mcf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d24496e7cd2bf4c1472cd091a1f59b2e7d3e5c2bb6bb8f648ed2804dda193f21 +size 1181069 diff --git a/scripts/un/sdg/geography_test.py b/scripts/un/sdg/geography_test.py new file mode 100644 index 0000000000..284cde5274 --- /dev/null +++ b/scripts/un/sdg/geography_test.py @@ -0,0 +1,137 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +'''Tests for geography.py. + +Usage: python3 -m unittest discover -v -s ../ -p "geography_test.py" +''' +import os +import sys +import tempfile +import unittest + +sys.path.append( + os.path.dirname(os.path.dirname(os.path.dirname( + os.path.abspath(__file__))))) +from un.sdg import geography + +module_dir_ = os.path.dirname(__file__) + +FOLDER = os.path.join(module_dir_, 'testdata/test_geography') + +# Mock input data. +SDG2TYPE = {'4': 'Country'} +UN2SDG = { + 'undata-geo:G00000020': '4', +} +SDG2UN = {'4': 'undata-geo:G00000020'} +UN2DC_CURATED = { + 'undata-geo:G00000020': + geography.Node('country/AFG', 'Country', 'Afghanistan'), + 'undata-geo:G00003250': + geography.Node('country/ARE', 'Country', 'United Arab Emirates'), + 'undata-geo:G00100000': + geography.Node('Earth', 'Place', 'World'), + 'undata-geo:G00114000': + geography.Node('asia', 'Continent', 'Asia'), + 'undata-geo:G00119000': + geography.Node('SouthernAsia', 'UNGeoRegion', 'Southern Asia'), + 'undata-geo:G00120000': + geography.Node('WesternAsia', 'UNGeoRegion', 'Western Asia'), + 'undata-geo:G00403000': + geography.Node('undata-geo/G00403000', 'GeoRegion', + 'Landlocked developing countries (LLDCs)'), + 'undata-geo:G00404000': + geography.Node('undata-geo/G00404000', 'GeoRegion', + 'Least developed countries (LDCs)'), +} +UN2DC_GENERATED = { + 'undata-geo:G00000030': + geography.Node('undata-geo/G00000030', 'GeoRegion', 'Ajman') +} + +# Add additional referenced objects that aren't defined in test_geographies.csv. +UN2DC_GENERATED_FULL = { + **UN2DC_GENERATED, + **{ + 'undata-geo:G00403300': + geography.Node('undata-geo/G00403300', 'GeoRegion', 'Landlocked developing countries (LLDCs): Asia'), + 'undata-geo:G00404300': + geography.Node('undata-geo/G00404300', 'GeoRegion', 'Least developed countries (LDCs): Asia'), + } +} + +# Test intermediate output data. +NEW_SUBJECTS = [geography.Node('undata-geo/G00000030', 'GeoRegion', 'Ajman')] +CONTAINMENT = { + geography.Node('country/AFG', 'Country', 'Afghanistan'): [ + 'SouthernAsia', 'undata-geo/G00403000', 'undata-geo/G00403300', + 'undata-geo/G00404000', 'undata-geo/G00404300' + ], + geography.Node('undata-geo/G00000030', 'GeoRegion', 'Ajman'): [ + 'Earth', 'asia', 'WesternAsia' + ], +} + + +class GeographyTest(unittest.TestCase): + + def test_should_include_containment(self): + self.assertTrue( + geography.should_include_containment( + geography.Node('AustrailiaAndNewZealand', 'UNGeoRegion', + 'Australia and New Zealand'), + geography.Node('oceania', 'Continent', 'Oceania'))) + self.assertFalse( + geography.should_include_containment( + geography.Node('geoId/3502000', 'City', 'Albuquerque'), + geography.Node('country/USA', 'Country', + 'United States of America'))) + + def test_write_un_places(self): + with tempfile.TemporaryDirectory() as tmp_dir: + output = os.path.join(tmp_dir, 'un_places.mcf') + un2dc_generated, new_subjects = geography.write_un_places( + os.path.join(FOLDER, 'test_geographies.csv'), output, SDG2TYPE, + UN2SDG, UN2DC_CURATED) + with open(output) as result: + with open(os.path.join(FOLDER, + 'expected_un_places.mcf')) as expected: + self.assertEqual(result.read(), expected.read()) + self.assertEqual(un2dc_generated, UN2DC_GENERATED) + self.assertEqual(new_subjects, NEW_SUBJECTS) + + def test_process_containment(self): + containment = geography.process_containment( + os.path.join(FOLDER, 'test_geography_hierarchy.csv'), UN2DC_CURATED, + UN2DC_GENERATED_FULL) + self.assertEqual(containment, CONTAINMENT) + + def test_write_un_containment(self): + with tempfile.TemporaryDirectory() as tmp_dir: + output = os.path.join(tmp_dir, 'un_containment.mcf') + geography.write_un_containment(output, CONTAINMENT, NEW_SUBJECTS) + with open(output) as result: + with open(os.path.join( + FOLDER, 'expected_un_containment.mcf')) as expected: + self.assertEqual(result.read(), expected.read()) + + def test_write_place_mappings(self): + with tempfile.TemporaryDirectory() as tmp_dir: + output = os.path.join(tmp_dir, 'place_mappings.csv') + geography.write_place_mappings(os.path.join(FOLDER, output), SDG2UN, + UN2DC_CURATED, UN2DC_GENERATED_FULL) + with open(output) as result: + with open(os.path.join( + FOLDER, 'expected_place_mappings.csv')) as expected: + self.assertEqual(result.read(), expected.read()) diff --git a/scripts/un/sdg/m49.csv b/scripts/un/sdg/m49.csv deleted file mode 100644 index a90189880a..0000000000 --- a/scripts/un/sdg/m49.csv +++ /dev/null @@ -1,250 +0,0 @@ -Country or Area M49 code ISO-alpha3 code -Afghanistan 004 AFG -Åland Islands 248 ALA -Albania 008 ALB -Algeria 012 DZA -American Samoa 016 ASM -Andorra 020 AND -Angola 024 AGO -Anguilla 660 AIA -Antarctica 010 ATA -Antigua and Barbuda 028 ATG -Argentina 032 ARG -Armenia 051 ARM -Aruba 533 ABW -Australia 036 AUS -Austria 040 AUT -Azerbaijan 031 AZE -Bahamas 044 BHS -Bahrain 048 BHR -Bangladesh 050 BGD -Barbados 052 BRB -Belarus 112 BLR -Belgium 056 BEL -Belize 084 BLZ -Benin 204 BEN -Bermuda 060 BMU -Bhutan 064 BTN -Bolivia (Plurinational State of) 068 BOL -Bonaire, Sint Eustatius and Saba 535 BES -Bosnia and Herzegovina 070 BIH -Botswana 072 BWA -Bouvet Island 074 BVT -Brazil 076 BRA -British Indian Ocean Territory 086 IOT -British Virgin Islands 092 VGB -Brunei Darussalam 096 BRN -Bulgaria 100 BGR -Burkina Faso 854 BFA -Burundi 108 BDI -Cabo Verde 132 CPV -Cambodia 116 KHM -Cameroon 120 CMR -Canada 124 CAN -Cayman Islands 136 CYM -Central African Republic 140 CAF -Chad 148 TCD -Chile 152 CHL -China 156 CHN -China, Hong Kong Special Administrative Region 344 HKG -China, Macao Special Administrative Region 446 MAC -Christmas Island 162 CXR -Cocos (Keeling) Islands 166 CCK -Colombia 170 COL -Comoros 174 COM -Congo 178 COG -Cook Islands 184 COK -Costa Rica 188 CRI -Côte d’Ivoire 384 CIV -Croatia 191 HRV -Cuba 192 CUB -Curaçao 531 CUW -Cyprus 196 CYP -Czechia 203 CZE -Democratic People's Republic of Korea 408 PRK -Democratic Republic of the Congo 180 COD -Denmark 208 DNK -Djibouti 262 DJI -Dominica 212 DMA -Dominican Republic 214 DOM -Ecuador 218 ECU -Egypt 818 EGY -El Salvador 222 SLV -Equatorial Guinea 226 GNQ -Eritrea 232 ERI -Estonia 233 EST -Eswatini 748 SWZ -Ethiopia 231 ETH -Falkland Islands (Malvinas) 238 FLK -Faroe Islands 234 FRO -Fiji 242 FJI -Finland 246 FIN -France 250 FRA -French Guiana 254 GUF -French Polynesia 258 PYF -French Southern Territories 260 ATF -Gabon 266 GAB -Gambia 270 GMB -Georgia 268 GEO -Germany 276 DEU -Ghana 288 GHA -Gibraltar 292 GIB -Greece 300 GRC -Greenland 304 GRL -Grenada 308 GRD -Guadeloupe 312 GLP -Guam 316 GUM -Guatemala 320 GTM -Guernsey 831 GGY -Guinea 324 GIN -Guinea-Bissau 624 GNB -Guyana 328 GUY -Haiti 332 HTI -Heard Island and McDonald Islands 334 HMD -Holy See 336 VAT -Honduras 340 HND -Hungary 348 HUN -Iceland 352 ISL -India 356 IND -Indonesia 360 IDN -Iran (Islamic Republic of) 364 IRN -Iraq 368 IRQ -Ireland 372 IRL -Isle of Man 833 IMN -Israel 376 ISR -Italy 380 ITA -Jamaica 388 JAM -Japan 392 JPN -Jersey 832 JEY -Jordan 400 JOR -Kazakhstan 398 KAZ -Kenya 404 KEN -Kiribati 296 KIR -Kuwait 414 KWT -Kyrgyzstan 417 KGZ -Lao People's Democratic Republic 418 LAO -Latvia 428 LVA -Lebanon 422 LBN -Lesotho 426 LSO -Liberia 430 LBR -Libya 434 LBY -Liechtenstein 438 LIE -Lithuania 440 LTU -Luxembourg 442 LUX -Madagascar 450 MDG -Malawi 454 MWI -Malaysia 458 MYS -Maldives 462 MDV -Mali 466 MLI -Malta 470 MLT -Marshall Islands 584 MHL -Martinique 474 MTQ -Mauritania 478 MRT -Mauritius 480 MUS -Mayotte 175 MYT -Mexico 484 MEX -Micronesia (Federated States of) 583 FSM -Monaco 492 MCO -Mongolia 496 MNG -Montenegro 499 MNE -Montserrat 500 MSR -Morocco 504 MAR -Mozambique 508 MOZ -Myanmar 104 MMR -Namibia 516 NAM -Nauru 520 NRU -Nepal 524 NPL -Netherlands 528 NLD -New Caledonia 540 NCL -New Zealand 554 NZL -Nicaragua 558 NIC -Niger 562 NER -Nigeria 566 NGA -Niue 570 NIU -Norfolk Island 574 NFK -North Macedonia 807 MKD -Northern Mariana Islands 580 MNP -Norway 578 NOR -Oman 512 OMN -Pakistan 586 PAK -Palau 585 PLW -Panama 591 PAN -Papua New Guinea 598 PNG -Paraguay 600 PRY -Peru 604 PER -Philippines 608 PHL -Pitcairn 612 PCN -Poland 616 POL -Portugal 620 PRT -Puerto Rico 630 PRI -Qatar 634 QAT -Republic of Korea 410 KOR -Republic of Moldova 498 MDA -Réunion 638 REU -Romania 642 ROU -Russian Federation 643 RUS -Rwanda 646 RWA -Saint Barthélemy 652 BLM -Saint Helena 654 SHN -Saint Kitts and Nevis 659 KNA -Saint Lucia 662 LCA -Saint Martin (French Part) 663 MAF -Saint Pierre and Miquelon 666 SPM -Saint Vincent and the Grenadines 670 VCT -Samoa 882 WSM -San Marino 674 SMR -Sao Tome and Principe 678 STP -Sark 680 -Saudi Arabia 682 SAU -Senegal 686 SEN -Serbia 688 SRB -Seychelles 690 SYC -Sierra Leone 694 SLE -Singapore 702 SGP -Sint Maarten (Dutch part) 534 SXM -Slovakia 703 SVK -Slovenia 705 SVN -Solomon Islands 090 SLB -Somalia 706 SOM -South Africa 710 ZAF -South Georgia and the South Sandwich Islands 239 SGS -South Sudan 728 SSD -Spain 724 ESP -Sri Lanka 144 LKA -State of Palestine 275 PSE -Sudan 729 SDN -Suriname 740 SUR -Svalbard and Jan Mayen Islands 744 SJM -Sweden 752 SWE -Switzerland 756 CHE -Syrian Arab Republic 760 SYR -Tajikistan 762 TJK -Thailand 764 THA -Timor-Leste 626 TLS -Togo 768 TGO -Tokelau 772 TKL -Tonga 776 TON -Trinidad and Tobago 780 TTO -Tunisia 788 TUN -Türkiye 792 TUR -Turkmenistan 795 TKM -Turks and Caicos Islands 796 TCA -Tuvalu 798 TUV -Uganda 800 UGA -Ukraine 804 UKR -United Arab Emirates 784 ARE -United Kingdom of Great Britain and Northern Ireland 826 GBR -United Republic of Tanzania 834 TZA -United States Minor Outlying Islands 581 UMI -United States of America 840 USA -United States Virgin Islands 850 VIR -Uruguay 858 URY -Uzbekistan 860 UZB -Vanuatu 548 VUT -Venezuela (Bolivarian Republic of) 862 VEN -Viet Nam 704 VNM -Wallis and Futuna Islands 876 WLF -Western Sahara 732 ESH -Yemen 887 YEM -Zambia 894 ZMB -Zimbabwe 716 ZWE diff --git a/scripts/un/sdg/process.py b/scripts/un/sdg/process.py index 676936392c..e92b59e068 100644 --- a/scripts/un/sdg/process.py +++ b/scripts/un/sdg/process.py @@ -27,54 +27,51 @@ Usage: python3 process.py ''' import collections +import csv +import math import os import pandas as pd import shutil import sys +from string import punctuation + sys.path.append( os.path.dirname(os.path.dirname(os.path.dirname( os.path.abspath(__file__))))) from un.sdg import util -def get_geography(code, type): - '''Returns dcid of geography. +def get_place_mappings(file): + '''Produces map of SDG code -> dcid: Args: - code: Geography code. - type: Geography type. + file: Input file path. Returns: - Geography dcid. + Map of SDG code -> dcid: ''' - - # Currently only support Country, City, and select Regions . - if code in util.REGIONS: - return 'dcs:' + util.REGIONS[code] - elif type == 'Country' and code in util.PLACES: - return 'dcs:country/' + util.PLACES[code] - elif type == 'City': - # Remove country prefix for now. - city = '_'.join(code.split('_')[1:]) - if city in util.CITIES and util.CITIES[city]: - return 'dcs:' + util.CITIES[city] - return '' + place_mappings = {} + with open(file) as f: + reader = csv.DictReader(f) + for row in reader: + place_mappings[str(row['sdg'])] = str(row['dcid']) + return place_mappings -def get_unit(units, base_period): - '''Returns dcid of unit. +def get_geography(code, place_mappings): + '''Returns dcid of geography. Args: - unit: Unit. - base_period: Base period of unit. + code: Geography code. + place_mappings: Map of SDG code -> dcid. Returns: - Unit dcid. + Geography dcid. ''' - if util.is_valid(base_period): - return f'[{units} {base_period}]' - return 'dcs:SDG_' + units + if str(code) in place_mappings: + return 'dcid:' + place_mappings[str(code)] + return '' def get_measurement_method(row): @@ -96,7 +93,60 @@ def get_measurement_method(row): return 'SDG' + mmethod -def process(input_dir, schema_dir, csv_dir): +def drop_null(value, series, footnote): + '''Returns value or '' if it should be dropped for being null. + + Args: + value: Input value. + series: Series code. + footnote: Footnote for observation. + + Returns: + value or ''. + ''' + if series not in util.ZERO_NULL: + return value + if footnote != util.ZERO_NULL_TEXT: + return value + if math.isclose(float(value), 0): + return '' + return value + + +def drop_special(value, variable): + '''Returns value or '' if it should be dropped based on special curation. + + Args: + value: Input value. + variable: Input variable. + + Returns: + value or ''. + ''' + if variable in util.DROP_VARIABLE: + return '' + series = variable.split(util.SDG_CODE_SEPARATOR)[0] + if series in util.DROP_SERIES: + return '' + return value + + +def fix_encoding(s): + '''Fixes input encoding to decode special characters. + + Args: + s: Input string. + + Returns: + String with special characters decoded. + ''' + try: + return s.encode('latin1').decode('utf8') + except: + return s.encode('utf8').decode('utf8') + + +def process(input_dir, schema_dir, csv_dir, place_mappings): '''Generates mcf, csv/tmcf artifacts. Produces: @@ -114,6 +164,7 @@ def process(input_dir, schema_dir, csv_dir): input_dir: Path to input xlsx files. schema_dir: Path to output schema files. csv_dir: Path to output csv files. + place_mappings: Map of SDG code -> dcid. ''' with open(os.path.join(schema_dir, 'series.mcf'), 'w') as f_series: with open(os.path.join(schema_dir, 'sdg.textproto'), 'w') as f_vertical: @@ -147,7 +198,8 @@ def process(input_dir, schema_dir, csv_dir): for _, row in df.iterrows(): if str(row['Enumeration_Code_SDMX']) != 'CUST_BREAKDOWN' and str( - row['Enumeration_Code_SDMX']) != 'COMPOSITE_BREAKDOWN': + row['Enumeration_Code_SDMX']) != 'COMPOSITE_BREAKDOWN' and str( + row['Enumeration_Code_SDMX']) != 'UNIT_MEASURE': dimensions[str(row['Enumeration_Code_SDMX'])][str( row['EnumerationValue_Code_SDMX'])] = str( row['EnumerationValue_Name']) @@ -183,10 +235,26 @@ def process(input_dir, schema_dir, csv_dir): if df.empty: continue + # Drop known null values. + df['OBS_VALUE'] = df.apply(lambda x: drop_null( + x['OBS_VALUE'], x['SERIES_CODE'], x['FOOT_NOTE']), + axis=1) + df = df[df['OBS_VALUE'] != ''] + if df.empty: + continue + + # Drop curated. + df['OBS_VALUE'] = df.apply( + lambda x: drop_special(x['OBS_VALUE'], x['VARIABLE_CODE']), + axis=1) + df = df[df['OBS_VALUE'] != ''] + if df.empty: + continue + # Format places. - df['GEOGRAPHY_CODE'] = df.apply(lambda x: get_geography( - x['GEOGRAPHY_CODE'], x['GEOGRAPHY_TYPE']), - axis=1) + df['GEOGRAPHY_CODE'] = df.apply( + lambda x: get_geography(x['GEOGRAPHY_CODE'], place_mappings), + axis=1) df = df[df['GEOGRAPHY_CODE'] != ''] if df.empty: continue @@ -205,9 +273,9 @@ def process(input_dir, schema_dir, csv_dir): 'SG_SCP_PROCN_LS.LEVEL_STATUS--DEG_MLOW__GOVERNMENT_NAME--CITY_OF_WROCLAW' ) - sv_frames.append(df.loc[:, - ['VARIABLE_CODE', 'VARIABLE_DESCRIPTION'] + - properties].drop_duplicates()) + sv_frames.append( + df.loc[:, ['VARIABLE_CODE', 'VARIABLE_DESCRIPTION', 'SOURCE'] + + properties].drop_duplicates()) measurement_method_frames.append( df.loc[:, ['NATURE', 'OBS_STATUS', 'REPORTING_TYPE']]. drop_duplicates()) @@ -215,8 +283,8 @@ def process(input_dir, schema_dir, csv_dir): df['VARIABLE_CODE'] = df['VARIABLE_CODE'].apply( lambda x: 'dcs:sdg/' + x) - df['UNIT_MEASURE'] = df.apply( - lambda x: get_unit(x['UNIT_MEASURE'], x['BASE_PERIOD']), axis=1) + df['UNIT_MEASURE'] = df['UNIT_MEASURE'].apply( + lambda x: 'dcs:SDG_' + x) df['MEASUREMENT_METHOD'] = df.apply( lambda x: 'dcs:' + get_measurement_method(x), axis=1) @@ -232,9 +300,10 @@ def process(input_dir, schema_dir, csv_dir): with open(os.path.join(schema_dir, 'sv.mcf'), 'w') as f: for df in sv_frames: - for _, row in df.iterrows(): + main = df.drop(['SOURCE'], axis=1).drop_duplicates() + for _, row in main.iterrows(): cprops = '' - for dimension in sorted(df.columns[2:]): + for dimension in sorted(main.columns[2:]): # Skip totals. if row[dimension] == util.TOTAL: continue @@ -255,6 +324,22 @@ def process(input_dir, schema_dir, csv_dir): val = 'SDG_' + enum + 'Enum_' + val cprops += f'\n{prop}: dcs:{val}' + + # Add list of observation sources to 'footnote' property on SV. + sources = df.loc[df['VARIABLE_CODE'] == row['VARIABLE_CODE']] + sources = sources.dropna(subset=['SOURCE']) + sources = sources.loc[:, ['SOURCE']].drop_duplicates()['SOURCE'] + footnote = '' + if not sources.empty: + footnote = '\nfootnote: "Includes data from the following sources: ' + '; '.join( + sorted([ + fix_encoding( + str(s)).rstrip('.,;:!?').strip().replace( + '"', "'").replace('\n', '').replace( + '\t', '').replace('__', '_') + for s in sources + ])) + '"' + f.write( util.SV_TEMPLATE.format_map({ 'dcid': @@ -266,6 +351,8 @@ def process(input_dir, schema_dir, csv_dir): '"' + row['VARIABLE_DESCRIPTION'] + '"', 'cprops': cprops, + 'footnote': + footnote, })) with open(os.path.join(schema_dir, 'schema.mcf'), 'w') as f: @@ -337,4 +424,5 @@ def process(input_dir, schema_dir, csv_dir): if os.path.exists('csv'): shutil.rmtree('csv') os.makedirs('csv') - process('sdg-dataset/output', 'schema', 'csv') + place_mappings = get_place_mappings('geography/place_mappings.csv') + process('sdg-dataset/output', 'schema', 'csv', place_mappings) diff --git a/scripts/un/sdg/process_test.py b/scripts/un/sdg/process_test.py index e8117b5c93..9efe38ed03 100644 --- a/scripts/un/sdg/process_test.py +++ b/scripts/un/sdg/process_test.py @@ -28,6 +28,21 @@ module_dir_ = os.path.dirname(__file__) +PLACE_MAPPINGS = { + '1': 'Earth', + '2': 'africa', + '4': 'country/AFG', + '5': 'southamerica', + '8': 'country/ALB', + '9': 'oceania', + '11': 'WesternAfrica', + '12': 'country/DZA', + '13': 'CentralAmerica', + '14': 'EasternAfrica', + '840': 'country/USA', + 'AF_MAZAR_E_SHARIF': 'wikidataId/Q130469' +} + def assert_equal_dir(self, result_dir, expected_dir): for root, _, files in os.walk(result_dir): @@ -40,16 +55,12 @@ def assert_equal_dir(self, result_dir, expected_dir): class ProcessTest(unittest.TestCase): def test_get_geography(self): - self.assertEqual(process.get_geography(840, 'Country'), - 'dcs:country/USA') - self.assertEqual(process.get_geography('AF_MAZAR_E_SHARIF', 'City'), - 'dcs:wikidataId/Q130469') - self.assertEqual(process.get_geography(1, 'Region'), 'dcs:Earth') - - def test_get_unit(self): - self.assertEqual(process.get_unit('CON_USD', 2021), '[CON_USD 2021]') - self.assertEqual(process.get_unit('CON_USD', float('nan')), - 'dcs:SDG_CON_USD') + self.assertEqual(process.get_geography(840, PLACE_MAPPINGS), + 'dcid:country/USA') + self.assertEqual( + process.get_geography('AF_MAZAR_E_SHARIF', PLACE_MAPPINGS), + 'dcid:wikidataId/Q130469') + self.assertEqual(process.get_geography(1, PLACE_MAPPINGS), 'dcid:Earth') def test_get_measurement_method(self): d = {'NATURE': ['E'], 'OBS_STATUS': ['A'], 'REPORTING_TYPE': ['G']} @@ -57,12 +68,30 @@ def test_get_measurement_method(self): for _, row in df.iterrows(): self.assertEqual(process.get_measurement_method(row), 'SDG_E_A_G') + def test_drop_null(self): + self.assertEqual( + process.drop_null( + 0, 'SE_ACS_CMPTR', + 'This data point is NIL for the submitting nation.'), '') + self.assertEqual(process.drop_null(1, 'SE_ACS_CMPTR', ''), 1) + + def test_drop_special(self): + self.assertEqual(process.drop_special(0, 'SH_SAN_SAFE@URBANISATION--R'), + '') + self.assertEqual( + process.drop_special(0, 'AG_FOOD_WST@FOOD_WASTE_SECTOR--FWS_OOHC'), + 0) + + def test_fix_encoding(self): + source = 'Instituto Nacional das Comunicaçőes de Moçambique' + self.assertEqual(process.fix_encoding(source), source) + def test_process(self): with tempfile.TemporaryDirectory() as tmp_schema: with tempfile.TemporaryDirectory() as tmp_csv: process.process( os.path.join(module_dir_, 'testdata/test_input'), - tmp_schema, tmp_csv) + tmp_schema, tmp_csv, PLACE_MAPPINGS) assert_equal_dir( self, tmp_schema, os.path.join(module_dir_, 'testdata/test_schema')) diff --git a/scripts/un/sdg/sdg-dataset b/scripts/un/sdg/sdg-dataset index 410719252c..bdb619c04a 160000 --- a/scripts/un/sdg/sdg-dataset +++ b/scripts/un/sdg/sdg-dataset @@ -1 +1 @@ -Subproject commit 410719252cbd6e75f91834d5788d4e19c210abbe +Subproject commit bdb619c04a0d8d6121190477b7fbd791562f99ac diff --git a/scripts/un/sdg/sssom-mappings b/scripts/un/sdg/sssom-mappings new file mode 160000 index 0000000000..aae4a7a944 --- /dev/null +++ b/scripts/un/sdg/sssom-mappings @@ -0,0 +1 @@ +Subproject commit aae4a7a94479ab44751eb2e54b5ab5054d84b748 diff --git a/scripts/un/sdg/testdata/expected_cities.csv b/scripts/un/sdg/testdata/expected_cities.csv deleted file mode 100644 index 3b4ac4931e..0000000000 --- a/scripts/un/sdg/testdata/expected_cities.csv +++ /dev/null @@ -1,2 +0,0 @@ -name,dcid -AF_MAZAR_E_SHARIF,wikidataId/Q130469 diff --git a/scripts/un/sdg/testdata/test_csv/AG_FOOD_WST.csv b/scripts/un/sdg/testdata/test_csv/AG_FOOD_WST.csv index baea70e159..2095831b27 100644 --- a/scripts/un/sdg/testdata/test_csv/AG_FOOD_WST.csv +++ b/scripts/un/sdg/testdata/test_csv/AG_FOOD_WST.csv @@ -1,15 +1,21 @@ VARIABLE_CODE,GEOGRAPHY_CODE,TIME_PERIOD,OBS_VALUE,UNIT_MEASURE,UNIT_MULT,MEASUREMENT_METHOD -dcs:sdg/AG_FOOD_WST,dcs:Earth,2019,930863853.95716,dcs:SDG_T,,dcs:SDG_N_A_G -dcs:sdg/AG_FOOD_WST,dcs:africa,2019,200407208.7491,dcs:SDG_T,,dcs:SDG_N_A_G -dcs:sdg/AG_FOOD_WST,dcs:country/AFG,2019,4755917.64224,dcs:SDG_T,,dcs:SDG_E_A_G -dcs:sdg/AG_FOOD_WST,dcs:southamerica,2019,46603996.09298,dcs:SDG_T,,dcs:SDG_N_A_G -dcs:sdg/AG_FOOD_WST,dcs:country/ALB,2019,363201.04528,dcs:SDG_T,,dcs:SDG_E_A_G -dcs:sdg/AG_FOOD_WST,dcs:oceania,2019,5368899.62572,dcs:SDG_T,,dcs:SDG_N_A_G -dcs:sdg/AG_FOOD_WST,dcs:country/DZA,2019,5782224.32483,dcs:SDG_T,,dcs:SDG_E_A_G -dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcs:Earth,2019,569007855.9533,dcs:SDG_T,,dcs:SDG_N_A_G -dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcs:africa,2019,143927911.95641,dcs:SDG_T,,dcs:SDG_N_A_G -dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcs:country/AFG,2019,3109152.67104,dcs:SDG_T,,dcs:SDG_E_A_G -dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcs:southamerica,2019,28233968.89021,dcs:SDG_T,,dcs:SDG_N_A_G -dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcs:country/ALB,2019,238491.7727,dcs:SDG_T,,dcs:SDG_E_A_G -dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcs:oceania,2019,3926866.98184,dcs:SDG_T,,dcs:SDG_N_A_G -dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcs:country/DZA,2019,3918528.68,dcs:SDG_T,,dcs:SDG_E_A_G +dcs:sdg/AG_FOOD_WST,dcid:Earth,2019,930863853.95716,dcs:SDG_T,,dcs:SDG_N_A_G +dcs:sdg/AG_FOOD_WST,dcid:africa,2019,200407208.7491,dcs:SDG_T,,dcs:SDG_N_A_G +dcs:sdg/AG_FOOD_WST,dcid:country/AFG,2019,4755917.64224,dcs:SDG_T,,dcs:SDG_E_A_G +dcs:sdg/AG_FOOD_WST,dcid:southamerica,2019,46603996.09298,dcs:SDG_T,,dcs:SDG_N_A_G +dcs:sdg/AG_FOOD_WST,dcid:country/ALB,2019,363201.04528,dcs:SDG_T,,dcs:SDG_E_A_G +dcs:sdg/AG_FOOD_WST,dcid:oceania,2019,5368899.62572,dcs:SDG_T,,dcs:SDG_N_A_G +dcs:sdg/AG_FOOD_WST,dcid:WesternAfrica,2019,73695078.26203,dcs:SDG_T,,dcs:SDG_N_A_G +dcs:sdg/AG_FOOD_WST,dcid:country/DZA,2019,5782224.32483,dcs:SDG_T,,dcs:SDG_E_A_G +dcs:sdg/AG_FOOD_WST,dcid:CentralAmerica,2019,23441006.87753,dcs:SDG_T,,dcs:SDG_N_A_G +dcs:sdg/AG_FOOD_WST,dcid:EasternAfrica,2019,62963781.41023,dcs:SDG_T,,dcs:SDG_N_A_G +dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcid:Earth,2019,569007855.9533,dcs:SDG_T,,dcs:SDG_N_A_G +dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcid:africa,2019,143927911.95641,dcs:SDG_T,,dcs:SDG_N_A_G +dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcid:country/AFG,2019,3109152.67104,dcs:SDG_T,,dcs:SDG_E_A_G +dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcid:southamerica,2019,28233968.89021,dcs:SDG_T,,dcs:SDG_N_A_G +dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcid:country/ALB,2019,238491.7727,dcs:SDG_T,,dcs:SDG_E_A_G +dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcid:oceania,2019,3926866.98184,dcs:SDG_T,,dcs:SDG_N_A_G +dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcid:WesternAfrica,2019,56750560.27358,dcs:SDG_T,,dcs:SDG_N_A_G +dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcid:country/DZA,2019,3918528.68,dcs:SDG_T,,dcs:SDG_E_A_G +dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcid:CentralAmerica,2019,15774419.58244,dcs:SDG_T,,dcs:SDG_N_A_G +dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcid:EasternAfrica,2019,44299890.89842,dcs:SDG_T,,dcs:SDG_N_A_G diff --git a/scripts/un/sdg/testdata/test_geography/expected_place_mappings.csv b/scripts/un/sdg/testdata/test_geography/expected_place_mappings.csv new file mode 100644 index 0000000000..8a14126c63 --- /dev/null +++ b/scripts/un/sdg/testdata/test_geography/expected_place_mappings.csv @@ -0,0 +1,2 @@ +sdg,dcid +4,country/AFG diff --git a/scripts/un/sdg/testdata/test_geography/expected_un_containment.mcf b/scripts/un/sdg/testdata/test_geography/expected_un_containment.mcf new file mode 100644 index 0000000000..0b6aac13b9 --- /dev/null +++ b/scripts/un/sdg/testdata/test_geography/expected_un_containment.mcf @@ -0,0 +1,14 @@ + +Node: dcid:country/AFG +typeOf: dcs:Country +containedInPlace: dcid:SouthernAsia +containedInPlace: dcid:undata-geo/G00403000 +containedInPlace: dcid:undata-geo/G00403300 +containedInPlace: dcid:undata-geo/G00404000 +containedInPlace: dcid:undata-geo/G00404300 + +Node: dcid:undata-geo/G00000030 +typeOf: dcs:GeoRegion +containedInPlace: dcid:Earth +containedInPlace: dcid:asia +containedInPlace: dcid:WesternAsia diff --git a/scripts/un/sdg/testdata/test_geography/expected_un_places.mcf b/scripts/un/sdg/testdata/test_geography/expected_un_places.mcf new file mode 100644 index 0000000000..d127faa09c --- /dev/null +++ b/scripts/un/sdg/testdata/test_geography/expected_un_places.mcf @@ -0,0 +1,12 @@ + +Node: dcid:country/AFG +typeOf: dcs:Country +name: "Afghanistan" +unDataCode: "undata-geo:G00000020" +unDataLabel: "Afghanistan" + +Node: dcid:undata-geo/G00000030 +typeOf: dcs:GeoRegion +name: "Ajman" +unDataCode: "undata-geo:G00000030" +unDataLabel: "Ajman" diff --git a/scripts/un/sdg/testdata/test_geography/test_geographies.csv b/scripts/un/sdg/testdata/test_geography/test_geographies.csv new file mode 100644 index 0000000000..7ca5143fcb --- /dev/null +++ b/scripts/un/sdg/testdata/test_geography/test_geographies.csv @@ -0,0 +1,3 @@ +subject_id,subject_label +undata-geo:G00000020,Afghanistan +undata-geo:G00000030,Ajman diff --git a/scripts/un/sdg/testdata/test_geography/test_geography_hierarchy.csv b/scripts/un/sdg/testdata/test_geography/test_geography_hierarchy.csv new file mode 100644 index 0000000000..a882b21075 --- /dev/null +++ b/scripts/un/sdg/testdata/test_geography/test_geography_hierarchy.csv @@ -0,0 +1,12 @@ +subject_id,subject_label,predicate,object_id,object_label +G00000020,Afghanistan,isPartOf,G00100000,World +G00000020,Afghanistan,isPartOf,G00114000,Asia +G00000020,Afghanistan,isPartOf,G00119000,Southern Asia +G00000020,Afghanistan,isPartOf,G00403000,Landlocked developing countries (LLDCs) +G00000020,Afghanistan,isPartOf,G00403300,Landlocked developing countries (LLDCs): Asia +G00000020,Afghanistan,isPartOf,G00404000,Least developed countries (LDCs) +G00000020,Afghanistan,isPartOf,G00404300,Least developed countries (LDCs): Asia +G00000030,Ajman,isPartOf,G00003250,United Arab Emirates +G00000030,Ajman,isPartOf,G00100000,World +G00000030,Ajman,isPartOf,G00114000,Asia +G00000030,Ajman,isPartOf,G00120000,Western Asia diff --git a/scripts/un/sdg/testdata/test_schema/series.mcf b/scripts/un/sdg/testdata/test_schema/series.mcf index 1d279ab42b..f0045f3f82 100644 --- a/scripts/un/sdg/testdata/test_schema/series.mcf +++ b/scripts/un/sdg/testdata/test_schema/series.mcf @@ -28,7 +28,7 @@ name: "Proportion of population below international poverty line" typeOf: dcs:SDG_Series Node: dcid:SDG_SI_POV_EMP1 -name: "Employed population below international poverty line" +name: "Employed population below international poverty line, by sex and age" typeOf: dcs:SDG_Series Node: dcid:SDG_SI_POV_NAHC diff --git a/scripts/un/sdg/testdata/test_schema/sv.mcf b/scripts/un/sdg/testdata/test_schema/sv.mcf index 3a11701a59..1d973b73c7 100644 --- a/scripts/un/sdg/testdata/test_schema/sv.mcf +++ b/scripts/un/sdg/testdata/test_schema/sv.mcf @@ -5,11 +5,13 @@ measuredProperty: dcs:value name: "Food waste" populationType: dcs:SDG_AG_FOOD_WST statType: dcs:measuredValue +footnote: "Includes data from the following sources: Food Waste Index Report 2021 / WESR" Node: dcid:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS typeOf: dcs:StatisticalVariable measuredProperty: dcs:value -name: "Food waste [Food Waste Sector = Households]" +name: "Food waste [Households]" populationType: dcs:SDG_AG_FOOD_WST statType: dcs:measuredValue sdg_foodWasteSector: dcs:SDG_FoodWasteSectorEnum_FWS_HHS +footnote: "Includes data from the following sources: Food Waste Index Report 2021 / WESR" diff --git a/scripts/un/sdg/util.py b/scripts/un/sdg/util.py index f3e35aa61b..ed63058fcb 100644 --- a/scripts/un/sdg/util.py +++ b/scripts/un/sdg/util.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. '''Shared util functions and constants.''' -import csv import math -import os import re -import sys - -module_dir_ = os.path.dirname(__file__) -sys.path.append(os.path.join(module_dir_)) # SDMX indicator for 'total' value in dimension. TOTAL = '_T' +# Splits the series code from constraint properties in SDG variable codes. +SDG_CODE_SEPARATOR = '@' + # Used to split the series code from constraint properties in stat var dcids. SV_CODE_SEPARATOR = '.' @@ -59,7 +56,7 @@ measuredProperty: dcs:value name: {name} populationType: dcs:{popType} -statType: dcs:measuredValue{cprops} +statType: dcs:measuredValue{cprops}{footnote} ''' MMETHOD_TEMPLATE = ''' Node: dcid:{dcid} @@ -80,7 +77,6 @@ 'CAUSE_OF_DEATH': 'causeOfDeath', 'DISABILITY_STATUS': 'disabilityStatus', 'EDUCATION_LEV': 'educationalAttainment', - 'SEX': 'gender' } # Shared dimensions across all input csv files. @@ -95,57 +91,82 @@ 'OBS_STATUS', 'RELEASE_STATUS', 'RELEASE_NAME' } -# Supported Regions. -# TODO: Add other regions. -REGIONS = { - 1: 'Earth', - 2: 'africa', - 5: 'southamerica', - 9: 'oceania', - 10: 'antarctica', - 21: 'northamerica', - 142: 'asia', - 150: 'europe', +# Series where zero should be treated as null and dropped (curated by UN). +ZERO_NULL = { + 'SE_ACS_CMPTR', + 'SE_ACS_H2O', + 'SE_AGP_CPRA', + 'SE_ALP_CPLR', + 'SE_AWP_CPRA', + 'SE_ACC_HNDWSH', + 'SE_INF_DSBL', + 'SE_TOT_CPLR', + 'SE_TRA_GRDL', + 'SE_ACS_INTNT', } +# Footnote text indicated that a zero point should be treated as null and dropped. +ZERO_NULL_TEXT = 'This data point is NIL for the submitting nation.' + +# Variables that should be dropped due to outlier values (curated by UN). +# TODO: Follow up with UN. +DROP_VARIABLE = {'VC_DTH_TOTPT'} + +# Series that should be dropped due to outlier values (curated by UN). +# TODO: Follow up with UN. +DROP_SERIES = { + 'TX_IMP_GBMRCH', + 'TX_EXP_GBMRCH', + 'TX_IMP_GBSVR', + 'TX_EXP_GBSVR', + 'SH_SAN_SAFE', + 'AG_PRD_XSUBDY', +} -def get_country_map(file): - ''' Creates map of M49 -> ISO-alpha3 for countries. - - Args: - file: Path to input file. - - Returns: - Country map. - ''' - with open(file) as f: - places = {} - reader = csv.DictReader(f, delimiter='\t') - for row in reader: - if not row['ISO-alpha3 code']: # Only countries for now. - continue - places[int(row['M49 code'])] = row['ISO-alpha3 code'] - return places - - -PLACES = get_country_map(os.path.join(module_dir_, 'm49.csv')) - - -def get_city_map(file): - ''' Creates map of name -> dcid for supported cities. - - Args: - file: Path to input file. - - Returns: - City map. - ''' - with open(file) as f: - reader = csv.DictReader(f) - return {row['name']: row['dcid'] for row in reader} - +# Map of input title text to output formatted text. +TITLE_MAPPINGS = { + 'Education level': 'education', + 'Frequency of Chlorophyll-a concentration': 'frequency', + 'Report Ordinal': 'ordinal', + 'Grounds of discrimination': 'discrimination', + 'Deviation Level': 'deviation' +} -CITIES = get_city_map(os.path.join(module_dir_, 'cities.csv')) +# List of substrings to be deleted from titles. +TITLE_DELETIONS = [ + 'Age = ', + 'Name of non-communicable disease = ', + 'Substance use disorders = ', + 'Quantile = ', + 'Type of skill = Skill: ', + 'Type of skill = ', + 'Sex = ', + 'Land cover = ', + 'Level/Status = ', + 'Policy instruments = ', + 'Type of product = ', + 'Type of waste treatment = ', + 'Activity = ', + 'Type of renewable technology = ', + 'Location = ', + 'Level_of_government = ', + 'Fiscal intervention stage = ', + 'Name of international institution = ', + 'Policy Domains = ', + 'Mode of transportation = ', + 'Food Waste Sector = ', +] + +# Map of input title text to output replacement text. +TITLE_REPLACEMENTS = { + '24 to 59 months old': '2 to 5 years old', + '36 to 47 months old': '3 to 4 years old', + '36 to 59 months old': '3 to 5 years old', + '12 to 23 months': '1 to 2 years old', + '24 to 35 months': '2 to 3 years old', + '36 to 47 months old': '3 to 4 years old', + '48 to 59 months': '4 to 5 years old' +} def format_description(s): @@ -161,8 +182,6 @@ def format_description(s): formatted = re.sub('\((?:[^)(]|\([^)(]*\))*\)', '', s) # Remove <=2 levels of []. formatted = re.sub('\[(?:[^)(]|\[[^)(]*\])*\]', '', formatted) - # Remove attributes indicated with 'by'. - formatted = formatted.split(', by')[0] # Remove references indicated by 'million USD'. formatted = formatted.split(', million USD')[0] # Remove extra spaces. @@ -172,6 +191,11 @@ def format_description(s): formatted = formatted[:-1] # Replace 100,000 with 100K. formatted = formatted.replace('100,000', '100K') + # Remove some apostrophe. + formatted = formatted.replace("Developing countries’", + 'Developing countries') + # Replace DRR with Disaster Risk Reduction. + formatted = formatted.replace('DRR', 'Disaster Risk Reduction') # Make ascii. return formatted.replace('Â', '').replace('’', '\'').replace('₂', '2').replace( @@ -211,6 +235,46 @@ def is_valid(v): return v and not v == 'nan' +def curate_pvs(text, mappings): + '''Curates PVs based on custom mappings. + + Example: '[Deviation Level = Extreme (75-100%)]' + -> '[Extreme deviation (75-100%)]' + + Args: + text: Input text. + mappings: Custom mappings. + + Returns: + Formatted text. + ''' + pairs = text[1:-1].split('|') + new_pairs = [] + for pair in pairs: + new_pair = '' + pv = pair.split('=') + p, v = pv[0].strip(), pv[1].strip() + if p in mappings: + v_components = v.split('(') + v_main = v_components[0].strip() + + # Don't repeat 'education'. + if p == 'Education level' and 'education' in v_main: + new_pair = v_main + + else: + new_pair = v_main + ' ' + mappings[p] + + # Keep () on the right. + if len(v_components) > 1: + new_pair += ' (' + v_components[1].strip() + + new_pairs.append(new_pair) + else: + new_pairs.append(pair.strip()) + return '[' + ', '.join(new_pairs) + ']' + + def format_variable_description(variable, series): '''Curates variable descriptions. @@ -221,9 +285,27 @@ def format_variable_description(variable, series): Returns: Formatted description. ''' - parts = variable.split(series) - return format_description(series) + parts[1] if len( - parts) > 1 else format_description(series) + head = format_description(series) + pvs = series.join(variable.split(series)[1:]).strip() + if not pvs: + return head + + # Remove ISIC code. + pvs = re.sub(r'\(ISIC[^)]*\)', '', pvs) + + # Remove isco code. + pvs = re.sub(r'\(isco[^)]*\)', '', pvs) + + # Custom text formatting. + pvs = curate_pvs(pvs, TITLE_MAPPINGS) + + # Custom replacements. + for s in TITLE_DELETIONS: + pvs = pvs.replace(s, '') + for s in TITLE_REPLACEMENTS: + pvs = pvs.replace(s, TITLE_REPLACEMENTS[s]) + + return head + ' ' + pvs def format_variable_code(code): @@ -235,7 +317,7 @@ def format_variable_code(code): Returns: Formatted code. ''' - return code.replace('@', SV_CODE_SEPARATOR).replace(' ', '') + return code.replace(SDG_CODE_SEPARATOR, SV_CODE_SEPARATOR).replace(' ', '') def format_title(s): diff --git a/scripts/un/sdg/util_test.py b/scripts/un/sdg/util_test.py index 4ba3482fde..712342f359 100644 --- a/scripts/un/sdg/util_test.py +++ b/scripts/un/sdg/util_test.py @@ -31,7 +31,8 @@ def test_format_description(self): self.assertEqual( util.format_description( 'Indicator of Food Price Anomalies (IFPA), by Consumer Food Price Index' - ), 'Indicator of Food Price Anomalies') + ), + 'Indicator of Food Price Anomalies, by Consumer Food Price Index') def test_is_float(self): self.assertTrue(util.is_float(7.28)) @@ -41,12 +42,22 @@ def test_is_valid(self): self.assertFalse(util.is_valid(float('nan'))) self.assertFalse(util.is_valid('')) + def test_curate_pvs(self): + self.assertEqual( + util.curate_pvs( + '[Age = 15 years old and over | Education level = Primary education or less]', + util.TITLE_MAPPINGS), + '[Age = 15 years old and over, Primary education or less]') + self.assertEqual( + util.curate_pvs('[Deviation Level = Extreme (75-100%)]', + util.TITLE_MAPPINGS), + '[Extreme deviation (75-100%)]') + def test_format_variable_description(self): self.assertEqual( util.format_variable_description( 'Food waste (Tonnes) [Food Waste Sector = Households]', - 'Food waste (Tonnes)'), - 'Food waste [Food Waste Sector = Households]') + 'Food waste (Tonnes)'), 'Food waste [Households]') def test_format_variable_code(self): self.assertEqual(