diff --git a/.gitmodules b/.gitmodules index e688780163..091f6e0ff4 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "scripts/un/sdg/sdg-dataset"] path = scripts/un/sdg/sdg-dataset url = https://code.officialstatistics.org/undata2/data-commons/sdg-dataset.git +[submodule "scripts/un/sdg/sssom-mappings"] + path = scripts/un/sdg/sssom-mappings + url = https://code.officialstatistics.org/undata2/sssom-mappings.git diff --git a/requirements.txt b/requirements.txt index 663ced4214..8f52b0b021 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,10 +20,10 @@ google-cloud-scheduler==2.10.0 gspread lxml==4.9.1 matplotlib==3.3.0 -netCDF4 +netCDF4==1.6.4 numpy openpyxl==3.0.7 -pandas==1.3.5 +pandas pylint pytest rasterio @@ -39,3 +39,4 @@ xlrd==1.2.0 yapf zipp beautifulsoup4 +ratelimit diff --git a/scripts/un/sdg/.gitattributes b/scripts/un/sdg/.gitattributes index ba109e3e5d..1f4eadb1ea 100644 --- a/scripts/un/sdg/.gitattributes +++ b/scripts/un/sdg/.gitattributes @@ -1,3 +1,4 @@ csv/* filter=lfs diff=lfs merge=lfs -text schema/* filter=lfs diff=lfs merge=lfs -text dc_generated/* filter=lfs diff=lfs merge=lfs -text +geography/* filter=lfs diff=lfs merge=lfs -text diff --git a/scripts/un/sdg/README.md b/scripts/un/sdg/README.md index 0afb292ce5..d2d4f24722 100644 --- a/scripts/un/sdg/README.md +++ b/scripts/un/sdg/README.md @@ -1,15 +1,36 @@ # UN Stats Sustainable Development Goals -This import includes country, city, and select region-level data from the [UN SDG Global Database](https://unstats.un.org/sdgs/dataportal). Data is read from the submodule `sdg-dataset` which is managed by UN Stats. +This import includes data from the [UN SDG Global Database](https://unstats.un.org/sdgs/dataportal). Data is read from the submodule `sdg-dataset` which is managed by UN Stats. Geography mappings are read from the submodule `sssom-mappings` which is also managed by UN Stats. Please ensure the submodules stay up to date. +## One-time Setup -To generate city dcids: +Initialize submodules: ``` -python3 cities.py +git submodule update --init --remote sdg-dataset +git submodule update --init --remote sssom-mappings ``` -(Note: many of these cities will require manual curation, so this script likely should not be rerun.) -To process data and generate artifacts: +## Data Refresh + +Update submodules: +``` +git submodule update --remote sdg-dataset +git submodule update --remote sssom-mappings +``` + +Generate place mappings: +``` +python3 geography.py +``` +Produces: +* geography/ folder: + * un_places.mcf (place mcf) + * un_containment.mcf (place containment triples) + * place_mappings.csv (map of SDG code -> dcid) + +Note that the `place_mappings.csv` is required before running the `process.py` script. + +Process data and generate artifacts: ``` python3 process.py ``` @@ -23,9 +44,26 @@ Produces: * unit.mcf * csv/ folder: * [CODE].csv -(Note that the `schema/` folder is not included in the repository but can be regenerated by running the script.) + +(Note that these folders are not included in the repository but can be regenerated by running the script.) + +When refreshing the data, the `geography`, `schema`, and `csv` folders might all get updated and will need to be resubmitted to g3. The corresponding TMCF file is `sdg.tmcf`. To run unit tests: ``` python3 -m unittest discover -v -s ../ -p "*_test.py" ``` + +Notes: +* We currently drop certain series and variables (refer to `util.py` for the list) which have been identified by UN as potentially containing outliers. + +## SDMX + +As reference, we provide an earlier version of the import scripts that utilized the UN API (which uses SDMX) in the `sdmx/` folder. Please note that these scripts may have errors and do not use the most up-to-date schema format, so should only be used as an illustration of the SDMX -> MCF mapping and **should not actually be run**. + +As a quick overview: +* `preprocess.py` downloads all the raw input CSVs to an `input/` folder as well as adds all dimensions and attributes to a `preprocessed/` folder. +* `cities.py` reads the input CSVs and matches cities with dcids. +* `process.py` reads the input CSVs and concepts and generates a cleaned CSV and schema. +* `util.py` has various shared util functions and constants. +* `m49.csv` has country code mappings. diff --git a/scripts/un/sdg/cities.csv b/scripts/un/sdg/cities.csv deleted file mode 100644 index 643571d9be..0000000000 --- a/scripts/un/sdg/cities.csv +++ /dev/null @@ -1,1774 +0,0 @@ -name,dcid -WORLD_ALL, -MAZAR_E_SHARIF,wikidataId/Q130469 -POL_E_KHOMRI,wikidataId/Q477148 -HERAT,wikidataId/Q182844 -LASHKAR_GAH,wikidataId/Q476800 -KHAN_ABAD,wikidataId/Q1247205 -KANDAHAR,wikidataId/Q173808 -KABUL,wikidataId/Q188933 -JALALABAD,wikidataId/Q183303 -FARAH,wikidataId/Q180330 -CHARIKAR,wikidataId/Q944395 -TOLGA,wikidataId/Q2280394 -TIARET,wikidataId/Q565568 -TEBESSA,wikidataId/Q984575 -TAMANRASSET,wikidataId/Q205792 -ORAN,wikidataId/Q131818 -M_SILA,wikidataId/Q402825 -MILA,wikidataId/Q335676 -KHEMIS_MILIANA,wikidataId/Q954285 -ALGIERS,wikidataId/Q3561 -DJELFA,wikidataId/Q2794758 -CHLEF,wikidataId/Q849561 -BLIDA,wikidataId/Q216990 -BATNA,wikidataId/Q338844 -ANNABA,wikidataId/Q45942 -EL_KHROUB,wikidataId/Q391129 -LUANDA,wikidataId/Q3897 -BAKU,wikidataId/Q13662815 -ZARATE,wikidataId/Q246956 -SANTIAGO_DEL_ESTERO,wikidataId/Q44827 -SAN_SALVADOR_DE_JUJUY,wikidataId/Q44217 -SAN_PEDRO_DE_JUJUY,wikidataId/Q1020263 -SAN_MIGUEL_DE_TUCUMAN,wikidataId/Q44255 -SAN_MARTIN,wikidataId/Q781245 -SAN_JUAN,wikidataId/Q44241 -ROSARIO,wikidataId/Q52535 -RIO_GALLEGOS,wikidataId/Q44242 -RIO_CUARTO,wikidataId/Q983451 -FORMOSA,wikidataId/Q44214 -OBERA,wikidataId/Q55377 -NEUQUEN,wikidataId/Q44239 -MENDOZA,wikidataId/Q44237 -CONCORDIA,wikidataId/Q52578 -CORDOBA,wikidataId/Q44210 -LA_PLATA,wikidataId/Q44059 -COMODORO_RIVADAVIA,wikidataId/Q221921 -CATAMARCA,wikidataId/Q44162 -BUENOSAIRES,wikidataId/Q1486 -BAHIA_BLANCA,wikidataId/Q54108 -PARANA,wikidataId/Q44213 -HOBART, -PERTH,wikidataId/Q3183 -MUSWELLBROOK,wikidataId/Q1008383 -MELBOURNE,wikidataId/Q3141 -LAUCENSTON,wikidataId/Q937765 -KINGAROY,wikidataId/Q605972 -CAIRNS,wikidataId/Q189133 -GOLDCOAST,wikidataId/Q140075 -GEELONG,wikidataId/Q231765 -DARWIN,wikidataId/Q11568 -CESSNOCK,wikidataId/Q606357 -CANBERRA,wikidataId/Q3114 -BUNBURY,wikidataId/Q256711 -BRISBANE,wikidataId/Q34932 -ADELAIDE,wikidataId/Q5112 -WOLLONGONG,wikidataId/Q187861 -SYDNEY,wikidataId/Q1094194 -SHEPPARTON,wikidataId/Q707030 -ALICE_SPRINGS,wikidataId/Q17872 -VIENNA,nuts/AT13 -SALZBURG,wikidataId/Q34713 -LINZ,wikidataId/Q41329 -BREGENZ,wikidataId/Q1737 -KLAGENFURT,nuts/AT211 -INNSBRUCK,nuts/AT332 -GRAZ,wikidataId/Q13298 -MANAMA,wikidataId/Q3882 -SYLHET,wikidataId/Q466221 -RAJSHAHI,wikidataId/Q322155 -MYMENSINGH_NASIRABAD, -KHULNA,wikidataId/Q243325 -JESSORE,wikidataId/Q1688300 -JAMALPUR,wikidataId/Q4159797 -GAZIPUR,wikidataId/Q2460416 -DINAJPUR,wikidataId/Q1985120 -DHAKA,wikidataId/Q1354 -COMILLA,wikidataId/Q1006296 -CHITTAGONG,wikidataId/Q158087 -SAIDPUR,wikidataId/Q3347049 -BARISAL,wikidataId/Q747840 -BOGRA,wikidataId/Q1918837 -AUSTRAILA_AND_NEWZEALAND_ALL,country/NZL -ETTERBEEK,wikidataId/Q192859 -BRUSSELS,wikidataId/Q239 -BERCHEM_SAINTE_AGATHE,wikidataId/Q272272 -AUDERGHEM,wikidataId/Q272228 -SAINT_JOSSE_TEN_NOODE,wikidataId/Q272243 -SAINT_GILLES,wikidataId/Q237674 -MOLENBEEK_SAINT_JEAN,wikidataId/Q180775 -KOEKELBERG,wikidataId/Q219244 -JETTE,wikidataId/Q241918 -IXELLES,wikidataId/Q208713 -GANSHOREN,wikidataId/Q366552 -FOREST,wikidataId/Q72946 -EVERE,wikidataId/Q321718 -ANDERLECHT,wikidataId/Q12886 -WOLUWE_SAINT_PIERRE,wikidataId/Q242393 -WOLUWE_SAINT_LAMBERT,wikidataId/Q211764 -WATERMAEL_BOITSFORT,wikidataId/Q272262 -UCCLE,wikidataId/Q203312 -SCHAERBEEK,wikidataId/Q12887 -KAPELLEN_EKEREN,wikidataId/Q1797837 -GENT,wikidataId/Q1296 -OOSTENDE,wikidataId/Q12996 -NAMUR,wikidataId/Q134121 -MECHELEN,wikidataId/Q162022 -CHARLEROI,wikidataId/Q81046 -BRUGGE,wikidataId/Q12994 -LIEGE,wikidataId/Q3992 -LEUVEN,wikidataId/Q118958 -LA_LOUVIERE,wikidataId/Q211572 -ANTWERPEN,wikidataId/Q12892 -QUAREGNON_FRAMERIES,wikidataId/Q538735 -VERVIERS_DISON,wikidataId/Q682105 -CENTRAL_AND_SOUTHERN_ASIA_ALL, -THIMPHU,wikidataId/Q9270 -COCHABAMBA,wikidataId/Q183124 -SARAJEVO,wikidataId/Q11194 -BANJA_LUKA,wikidataId/Q131127 -PASSOS,wikidataId/Q985579 -PORTO_ALEGRE,wikidataId/Q40269 -SAOPAULO,wikidataId/Q174 -RIBEIRAO_PRETO,wikidataId/Q188892 -VITORIA_DA_CONQUISTA,wikidataId/Q22062839 -UMUARAMA,wikidataId/Q593948 -TEIXEIRA_DE_FREITAS,wikidataId/Q282335 -TATUI,wikidataId/Q1795613 -SAO_CARLOS,wikidataId/Q1815617 -SANTANA_PORTO_SANTANA,wikidataId/Q2105236 -SANTA_CRUZ_DO_CAPIBARIBE,wikidataId/Q2082237 -SALVADOR,wikidataId/Q36947 -PARAUAPEBAS,wikidataId/Q1806100 -PALMAS,wikidataId/Q178993 -MONTES_CLAROS,wikidataId/Q651906 -MACAPA,wikidataId/Q180215 -LIMEIRA,wikidataId/Q841241 -JEQUIE,wikidataId/Q630203 -ITUIUTABA,wikidataId/Q1756328 -CUIABA,wikidataId/Q170762 -ILHEUS,wikidataId/Q373705 -GUARAPARI,wikidataId/Q267439 -FLORIANOPOLIS, -DIVINOPOLIS,wikidataId/Q926606 -CURITIBA,wikidataId/Q4361 -CRICIUMA,wikidataId/Q1439157 -CAMPINAS,wikidataId/Q171617 -BRASILIA,wikidataId/Q22066977 -BELOHORIZONTE,wikidataId/Q42800 -BELEM,wikidataId/Q12829733 -BARRETOS,wikidataId/Q1637488 -ARAXA,wikidataId/Q586057 -APARECIDA_DE_GOIANIA,wikidataId/Q459711 -RECIFE,wikidataId/Q48344 -INDAIATUBA,wikidataId/Q616650 -HONIARA,wikidataId/Q40921 -RUSE,wikidataId/Q160173 -YAMBOL,wikidataId/Q186569 -VELIKO_TARNOVO,wikidataId/Q173474 -DOBRIC,wikidataId/Q168668 -BURGAS,wikidataId/Q6509 -SOFIA,nuts/SOF46 -SHUMEN,wikidataId/Q181830 -PLOVDIV,wikidataId/Q459 -PLEVEN,wikidataId/Q165420 -MONTANA,wikidataId/Q220121 -VARNA,wikidataId/Q6506 -STARA_ZAGORA,wikidataId/Q170415 -TAUNGGYI,wikidataId/Q1075566 -TACHILEIK,wikidataId/Q1018494 -PATHEIN,wikidataId/Q870103 -NAY_PYI_TAW,wikidataId/Q37400 -MYITKYINA,wikidataId/Q1062370 -MANDALAY,wikidataId/Q185518 -LOIKAW,wikidataId/Q1017971 -LASHIO,wikidataId/Q1209397 -KALE,wikidataId/Q3129405 -MYEIK,wikidataId/Q1018163 -MYEDE,wikidataId/Q6947282 -MAWLAMYINE,wikidataId/Q685941 -YANGON,wikidataId/Q37995 -MINSK,wikidataId/Q2280 -MAZYR,wikidataId/Q386487 -KOBYRN,wikidataId/Q955992 -HRODNA,wikidataId/Q181376 -BREST-BELARUS,wikidataId/Q140147 -BABRUJSK,wikidataId/Q207294 -SALIHORSK,wikidataId/Q201393 -POLACK,wikidataId/Q200797 -GOMEL,wikidataId/Q2678 -VICIEBSK,wikidataId/Q102217 -HAMILTON_CANADA,wikidataId/Q133116 -GUELPH,wikidataId/Q504114 -GREATER_SUDBURY_GRAND_SUDBURY, -EDMONTON,wikidataId/Q2096 -CALGARY,wikidataId/Q36312 -BRANTFORD,wikidataId/Q34180 -BELLEVILLE,wikidataId/Q34227 -BARRIE,wikidataId/Q34183 -TROIS_RIVIERES,wikidataId/Q44012 -TORONTO,wikidataId/Q172 -THUNDER_BAY,wikidataId/Q34116 -ST_JOHNS,wikidataId/Q2082 -ST_CATHARINES,wikidataId/Q126805 -SHERBROOKE,wikidataId/Q139473 -SASKATOON,wikidataId/Q10566 -SAINT_JOHN,wikidataId/Q203403 -SAGUENAY,wikidataId/Q139229 -MONCTON,wikidataId/Q457334 -LONDON_CANADA,wikidataId/Q92561 -LETHBRIDGE,wikidataId/Q270887 -KITCHENER_CAMBRIDGE_WATERLOO,wikidataId/Q1028279 -ABBOTSFORD,wikidataId/Q271730 -KELOWNA,wikidataId/Q232226 -HALIFAX_CANADA,wikidataId/Q2141 -KINGSTON_CANADA,wikidataId/Q202973 -WINNIPEG,wikidataId/Q2135 -WINDSOR,wikidataId/Q182625 -VICTORIA,wikidataId/Q2132 -VANCOUVER,wikidataId/Q24639 -REGINA,wikidataId/Q2123 -QUEBEC,wikidataId/Q2145 -PETERBOROUGH_CANADA,wikidataId/Q776930 -OTTAWA_GATINEAU_QUEBEC_PART,wikidataId/Q1930 -OTTAWA_GATINEAU_ONTARIO_PART, -OSHAWA,wikidataId/Q211867 -MONTREAL,wikidataId/Q340 -SAINT_JEAN_SUR_RICHELIEU,wikidataId/Q141977 -ANURADHAPURA,wikidataId/Q5724 -GALLE,wikidataId/Q319366 -PUTTALAM,wikidataId/Q3088741 -COLOMBO,wikidataId/Q35381 -CHILAW_FERRY_STREET, -BATTICALOA,wikidataId/Q810963 -BADULLA,wikidataId/Q390096 -RATNAPURA,wikidataId/Q508753 -MATARA,wikidataId/Q13360574 -KANDY,wikidataId/Q203197 -JAFFNA,wikidataId/Q215277 -HAPUTALE, -HAMBANTOTA,wikidataId/Q1025283 -EMBILIPITIYA,wikidataId/Q4802021 -RANCAGUA,wikidataId/Q4582 -VALPARAISO_VALPARAISO,wikidataId/Q33986 -TEMUCO,wikidataId/Q82128 -TALCA,wikidataId/Q4469 -SANTIAGO_SANTIAGO,wikidataId/Q2887 -ARICA,wikidataId/Q2203 -MAGALLANES_PUNTAARENAS,wikidataId/Q51599 -PENAFLOR,wikidataId/Q51098 -OVALLE,wikidataId/Q51060 -COPIAPO,wikidataId/Q3868 -CONCEPCION,wikidataId/Q1880 -ANTOFAGASTA_ANTOFAGASTA,wikidataId/Q3612 -IQUIQUE,wikidataId/Q14440 -LA_SERENA,wikidataId/Q14467 -LOS_ANGELES,wikidataId/Q16910 -LOSLAGOS_OSORNO,wikidataId/Q51059 -XUCHENG,wikidataId/Q11070407 -XINGPING,wikidataId/Q1201091 -WUHAN,wikidataId/Q11746 -TIANJIN,wikidataId/Q11736 -TANGSHAN,wikidataId/Q58422 -TAIPEI, -HANGZHOU,wikidataId/Q4970 -QINGDAO,wikidataId/Q170322 -LESHAN,wikidataId/Q426868 -KAIPING,wikidataId/Q599514 -JINAN,wikidataId/Q170247 -HONG_KONG,country/HKG -HAIKOU,wikidataId/Q189823 -GUIXI,wikidataId/Q1335331 -GAOYOU,wikidataId/Q1253949 -CHENGGUAN,wikidataId/Q10929428 -CHENGDU,wikidataId/Q30002 -CHANGZHOU,wikidataId/Q57970 -CHANGZHI,wikidataId/Q72945 -BEIJING,wikidataId/Q956 -GUANGZHOU,wikidataId/Q16572 -YIYANG,wikidataId/Q416669 -YUCHENG,wikidataId/Q1198528 -ZHENGZHOU,wikidataId/Q30340 -ZUNYI,wikidataId/Q28994 -ZHUJI,wikidataId/Q198222 -SHENZHEN,wikidataId/Q15174 -YANGGU,wikidataId/Q290059 -BOGOTA, -ZIPAQUIRA,wikidataId/Q205429 -VALLEDUPAR,wikidataId/Q376903 -SINCELEJO,wikidataId/Q1043513 -NEIVA,wikidataId/Q638260 -MEDELLIN,wikidataId/Q48278 -MAICAO,wikidataId/Q1937427 -GIRARDOT,wikidataId/Q186193 -FLORENCIA,wikidataId/Q2308980 -CARTAGO_COLOMBIA,wikidataId/Q2004074 -CARTAGENA_COLOMBIA,wikidataId/Q657461 -CALI,wikidataId/Q51103 -BUGA,wikidataId/Q934753 -BUCARAMANGA,wikidataId/Q243766 -BARRANQUILLA,wikidataId/Q62823 -TUNJA,wikidataId/Q236744 -KINSHASA,wikidataId/Q3838 -LUBUMBASHI,wikidataId/Q187593 -SANJOSE,wikidataId/Q3070 -PUNTARENAS,wikidataId/Q30687 -PUERTO_LIMON,wikidataId/Q30620 -LIBERIA,wikidataId/Q1387526 -CARTAGO_COSTARICA,wikidataId/Q30617 -ZAGREB,wikidataId/Q1435 -ZADAR,wikidataId/Q3370 -SPLIT,wikidataId/Q1663 -RIJEKA,wikidataId/Q1647 -OSIJEK,wikidataId/Q1640 -BAYAMO,wikidataId/Q115382 -CAMAGUEY,wikidataId/Q115329 -CIENFUEGOS,wikidataId/Q190005 -GUANTANAMO,wikidataId/Q185156 -HOLGUIN,wikidataId/Q244154 -SANTIAGO_DE_CUBA, -LAS_TUNAS, -HAVANA,wikidataId/Q1563 -SANTA_CLARA,wikidataId/Q211590 -LEYMOSUN,wikidataId/Q185632 -SUBSAHARAN_ARFICA_ALL, -USTI_NAD_LABEM,wikidataId/Q156974 -PRAGUE,nuts/CZ01 -PARDUBICE,wikidataId/Q36989 -OSTRAVA,wikidataId/Q8385 -MOST,wikidataId/Q146363 -LIBEREC,wikidataId/Q146351 -KLADNO,wikidataId/Q155993 -HRADEC_KRALOVE,wikidataId/Q180139 -HRABUVKA, -HAVIROV,wikidataId/Q192904 -CHOMUTOV_JIRKOV,wikidataId/Q149097 -CESKE_BUDEJOVICE,wikidataId/Q16506 -PORUBA,wikidataId/Q3500288 -PLZEN,wikidataId/Q43453 -BRNO,wikidataId/Q14960 -OLOMOUC,wikidataId/Q81137 -BOHICON,wikidataId/Q2356092 -DJOUGOU,wikidataId/Q868198 -PARAKOU,wikidataId/Q688324 -NATITINGOU,wikidataId/Q994125 -KANDI,wikidataId/Q845666 -ARHUS,wikidataId/Q25319 -AALBORG,wikidataId/Q25410 -ODENSE,wikidataId/Q25331 -KOBENHAVN,wikidataId/Q1748 -GREVE_ISHOJ,wikidataId/Q3196979 -SAN_FRANCISCO_DE_MACORIS,wikidataId/Q681999 -PUERTA_PLATA,wikidataId/Q988413 -LA_VEGA,wikidataId/Q538953 -LA_ROMANA,wikidataId/Q40508 -HIQUEY,wikidataId/Q1020261 -SANTIAGO_DE_LOS_CABELLEROS,wikidataId/Q42763 -DURAN,wikidataId/Q1120810 -SANTODOMINGODELOSCOLORADOS,wikidataId/Q1015654 -RIOBAMBA,wikidataId/Q478675 -QUITO,wikidataId/Q2900 -QUEVEDO,wikidataId/Q773225 -MILAGRO,wikidataId/Q773238 -MACHALA,wikidataId/Q215080 -LOJA,wikidataId/Q944697 -GUAYAQUIL,wikidataId/Q1991935 -ESMERALDAS,wikidataId/Q149955 -DAULE,wikidataId/Q1014430 -CUENCA,wikidataId/Q54886 -CHONE,wikidataId/Q2756591 -TULCAN,wikidataId/Q752113 -USULUTAN,wikidataId/Q1018092 -SANTAANA_SANTAANA,wikidataId/Q739664 -SANMIGUEL_SANMIGUEL,wikidataId/Q672458 -APOPA,wikidataId/Q619646 -SANSALVADOR_SANSALVADOR,wikidataId/Q3110 -ADDIS,wikidataId/Q3624 -HARAR,wikidataId/Q190184 -GONDAR,wikidataId/Q218861 -DIRE_DAWA, -BAHIR_DAR,wikidataId/Q464699 -AWASSA,wikidataId/Q755765 -ADAMA,wikidataId/Q351427 -TARTU,wikidataId/Q13972 -TALLINN,wikidataId/Q1770 -NARVA,wikidataId/Q102158 -SUVA,wikidataId/Q38807 -NADI,wikidataId/Q619443 -LAUTOKA,wikidataId/Q856662 -ABO,wikidataId/Q38511 -TAPIOLA_NIITTYKUMPU,wikidataId/Q7034502 -TAMMERFORS,wikidataId/Q40840 -OULU_ULEABORG,wikidataId/Q47048 -JYVASKYLA,wikidataId/Q134620 -HELSINKI_HELSINGFORS,wikidataId/Q1757 -AVIGNON,wikidataId/Q6397 -LIMOGES,wikidataId/Q45656 -LILLE,wikidataId/Q648 -CAEN,wikidataId/Q41185 -BREST-FRANCE,wikidataId/Q12193 -SAINT_BRIEUC,wikidataId/Q29234 -ROUEN,wikidataId/Q30974 -AJACCIO,wikidataId/Q40104 -AIX_EN_PROVENCE,wikidataId/Q47465 -ARRAS,wikidataId/Q131329 -ANTIBES,wikidataId/Q126898 -BELFORT,wikidataId/Q171545 -BESANCON,wikidataId/Q37776 -BEZIERS,wikidataId/Q174019 -MEAUX,wikidataId/Q207620 -MAUREPAS_MONTIGNY_LE_BRETONNEUX, -MARTIGUES_PORT_DE_BOUC,wikidataId/Q675925 -MARSEILLE,wikidataId/Q23482 -MANTES_LA_JOLIE,wikidataId/Q207253 -LYON,wikidataId/Q456 -GRENOBLE,wikidataId/Q1289 -FREJUS,wikidataId/Q215155 -LES_CLAYES_SOUS_BOIS,wikidataId/Q491948 -LE_MANS,wikidataId/Q1476 -DUNKERQUE,wikidataId/Q45797 -DOUAI,wikidataId/Q193826 -DIJON,wikidataId/Q7003 -COLMAR,wikidataId/Q130994 -CLERMONT_FERRAND,wikidataId/Q42168 -CHARTRES,wikidataId/Q130272 -MONTPELLIER,wikidataId/Q6441 -MULHOUSE,wikidataId/Q79815 -BOURGES,wikidataId/Q132404 -BOULOGNE_SUR_MER,wikidataId/Q81997 -BAYONNE,wikidataId/Q134674 -VALENCIENNES,wikidataId/Q163795 -VALENCE,wikidataId/Q8848 -TROYES,wikidataId/Q5489 -TOURS,wikidataId/Q288 -TOULOUSE,wikidataId/Q7880 -PAU, -PARIS,nuts/FR101 -RENNES,wikidataId/Q647 -REIMS,wikidataId/Q41876 -ORLEANS,wikidataId/Q6548 -NIMES,wikidataId/Q42807 -NICE,wikidataId/Q33959 -NANTES,wikidataId/Q12191 -NANCY,wikidataId/Q40898 -SAINT_ETIENNE,wikidataId/Q42716 -ANNECY,wikidataId/Q50189 -ANGERS,wikidataId/Q38380 -AMIENS,wikidataId/Q41604 -POITIERS,wikidataId/Q6616 -POISSY,wikidataId/Q457 -PERPIGNAN,wikidataId/Q6730 -LE_HAVRE,wikidataId/Q42810 -LA_SEYNE_SUR_MER_SANARY_SUR_MER,wikidataId/Q659484 -LA_ROCHELLE,wikidataId/Q82185 -CHAMBERY,wikidataId/Q165090 -CERGY_PONTOISE,wikidataId/Q961964 -CALAIS,wikidataId/Q6454 -BORDEAUX,wikidataId/Q1479 -MELUN,wikidataId/Q41139 -METZ,wikidataId/Q22690 -TOULON,wikidataId/Q44160 -STRASBOURG,wikidataId/Q6602 -SAINT_QUENTIN,wikidataId/Q182744 -FAAA_PUNAAUIA_PAPEETE_FRENCH_POLYNESIA, -AL_KHALIL,wikidataId/Q168225 -AN_NUSAYRAT, -AL_QUDS, -RAFAH,wikidataId/Q172343 -NABULUS_NABLUS,wikidataId/Q214178 -JENIN, -GHAZZAH,wikidataId/Q47492 -KHAN_YUNIS, -BIELEFELD,nuts/DEA41 -MOERS,wikidataId/Q3132 -FURTH,nuts/DE253 -FREIBURG_IM_BREISGAU,nuts/DE131 -BRAUNSCHWEIG,nuts/DE911 -HAMBURG,nuts/DE6 -KREFELD,nuts/DEA14 -BONN,nuts/DEA22 -MONCHENGLADBACH,nuts/DEA15 -MUNSTER,nuts/DEA33 -REUTLINGEN,wikidataId/Q3085 -REMSCHEID,nuts/DEA18 -BOCHOLT,wikidataId/Q3917 -BERLIN,nuts/DE3 -AACHEN,nuts/DEA21 -HALLE,wikidataId/Q225774 -BENRATH,wikidataId/Q459769 -BAYREUTH,nuts/DE242 -HAGEN,nuts/DEA53 -GUTERSLOH,wikidataId/Q3771 -GROPELINGEN_VEGESACK,wikidataId/Q315005 -GOTTINGEN,wikidataId/Q3033 -GORLITZ,nuts/DED22 -GOPPINGEN,wikidataId/Q4072 -ERFURT,nuts/DEG01 -DUSSELDORF,nuts/DEA11 -FRANKFURT_AM_MAIN,nuts/DE712 -FRANKFURT_ODER,nuts/DE403 -DUREN,wikidataId/Q1884 -DUISBURG,nuts/DEA12 -DRESDEN,nuts/DED21 -DELMENHORST,nuts/DE941 -DARMSTADT,nuts/DE711 -COTTBUS,nuts/DE402 -MAINZ,nuts/DEB35 -MAGDEBURG,nuts/DEE03 -HAMM,nuts/DEA54 -HANAU,wikidataId/Q3802 -LUTTEN_KLEIN,wikidataId/Q1880458 -LUNEN,wikidataId/Q3822 -LUNEBURG,wikidataId/Q3920 -LUDWIGSBURG,wikidataId/Q622 -LUDENSCHEID,wikidataId/Q3893 -LUBECK,nuts/DEF03 -KAISERSLAUTERN,nuts/DEB32 -JENA,nuts/DEG03 -KOLN,nuts/DEA23 -KIEL,nuts/DEF02 -ISERLOHN,wikidataId/Q1130 -INGOLSTADT,nuts/DE211 -HILDESHEIM,wikidataId/Q3185 -HERZOGENRATH,wikidataId/Q6916 -HEILBRONN,nuts/DE117 -HEIDELBERG,nuts/DE125 -TUBINGEN,wikidataId/Q3806 -TRIER,nuts/DEB21 -WURZBURG,nuts/DE263 -WOLFSBURG,nuts/DE913 -STUTTGART,nuts/DE111 -SOLINGEN_WUPPERTAL,nuts/DEA19 -SINDELFINGEN,wikidataId/Q4019 -SCHWEINFURT,nuts/DE262 -SAARBRUCKEN,wikidataId/Q1724 -RUSSELSHEIM_AM_MAIN,wikidataId/Q4031 -PADERBORN,wikidataId/Q2971 -OSNABRUCK,nuts/DE944 -REGENSBURG,nuts/DE232 -POTSDAM,nuts/DE404 -OLDENBURG_OLDENBURG,nuts/DE943 -NEUSS,wikidataId/Q2948 -NEUMUNSTER,nuts/DEF04 -MUNCHEN,nuts/DE212 -BAMBERG,nuts/DE241 -BAD_SODEN_AM_TAUNUS_KELKHEIM,wikidataId/Q39785 -AUGSBURG,nuts/DE271 -GIESSEN,wikidataId/Q3874 -GEVELSBERG_SCHWELM,wikidataId/Q11293 -GERA,nuts/DEG02 -FLENSBURG,nuts/DEF01 -ESSLINGEN_AM_NECKAR,wikidataId/Q3786 -ERLANGEN,nuts/DE252 -CHEMNITZ,nuts/DED41 -BREMERHAVEN,nuts/DE502 -BREMEN,nuts/DE501 -HANNOVER,nuts/DE921 -MARL,wikidataId/Q3813 -MANNHEIM,nuts/DE126 -LEVERKUSEN,nuts/DEA24 -LEIPZIG,nuts/DED51 -LANDSHUT,nuts/DE221 -KEMPTEN_ALLGAU,nuts/DE273 -KASSEL,nuts/DE731 -KARLSRUHE,nuts/DE122 -HAUNSTETTEN,wikidataId/Q760477 -HARBURG,wikidataId/Q503226 -KOBLENZ,nuts/DEB11 -WIESBADEN,nuts/DE714 -VAIHINGEN_MOHRINGEN,wikidataId/Q1957900 -ULM,nuts/DE144 -RUHRGEBIET, -ROSTOCK,nuts/DE803 -ROSENHEIM,nuts/DE213 -PORZ,wikidataId/Q458242 -PLAUEN,nuts/DED12 -PFORZHEIM,nuts/DE129 -ACCRA,wikidataId/Q3761 -THESSALONIKI,wikidataId/Q17151 -KAVALA,wikidataId/Q187352 -ATHINA,wikidataId/Q1524 -CHANIA,wikidataId/Q182299 -IOANNINA,wikidataId/Q183199 -IRAKLEIO, -HUEHUETENANGO,wikidataId/Q984863 -SANTA_LUCIA_COTZUMALGUAPA,wikidataId/Q404550 -SAN_JUAN_SACATEPEQUEZ,wikidataId/Q127622 -QUETZALTENANGO,wikidataId/Q334577 -PUERTO_BARRIOS,wikidataId/Q991400 -MAZATENANGO,wikidataId/Q1023983 -GUATEMALA_CITY,wikidataId/Q1555 -ESCUINTLA,wikidataId/Q780771 -COBAN,wikidataId/Q867077 -SAINTMARC,wikidataId/Q132189 -PORT-AU-PRINCE,wikidataId/Q34261 -CAP_HAITIEN,wikidataId/Q216835 -EL_PROGRESO,wikidataId/Q2277161 -COMAYAGUA,wikidataId/Q679169 -TEGUCIGALPA,wikidataId/Q3238 -SAN_PEDRO_SULA,wikidataId/Q274584 -LA_CEIBA,wikidataId/Q916536 -VESZPREM,wikidataId/Q146317 -RAKOSLIGET,wikidataId/Q714696 -PECS,wikidataId/Q45779 -TATABANYA,wikidataId/Q187821 -SZOMBATHELY,wikidataId/Q42007 -SZEKESFEHERVAR,wikidataId/Q130212 -SZEGED,wikidataId/Q81581 -NYIREGYHAZA,wikidataId/Q171223 -MISKOLC,wikidataId/Q102397 -KECSKEMET,wikidataId/Q171357 -GYOR,wikidataId/Q134494 -ERD,wikidataId/Q193050 -DEBRECEN,wikidataId/Q79880 -BUDAPEST,nuts/HU101 -REYKJAVIK,wikidataId/Q1764 -PARBHANI,wikidataId/Q1797389 -SURAT,wikidataId/Q1797317 -SITAPUR,wikidataId/Q1812539 -SINGRAULI,wikidataId/Q2668638 -PUNE,wikidataId/Q1538 -PATNA,wikidataId/Q100077 -PALI,wikidataId/Q46925 -NELLORE,wikidataId/Q61434 -MUMBAI,wikidataId/Q1156 -MORENA,wikidataId/Q2341467 -MALEGAON,wikidataId/Q580965 -KOZHIKODE,wikidataId/Q1142979 -KOLKATA,wikidataId/Q1348 -KANPUR,wikidataId/Q66568 -KANCHIPURAM,wikidataId/Q15157 -JALNA,wikidataId/Q1804863 -JAIPUR,wikidataId/Q1134781 -BELGAUM,wikidataId/Q270176 -HINDUPUR,wikidataId/Q760056 -COIMBATORE,wikidataId/Q15136 -CHANDIGARH,wikidataId/Q43433 -BHIWANDI,wikidataId/Q645725 -BENGALURU,wikidataId/Q1355 -BAHARAMPUR,wikidataId/Q633530 -AHMEDABAD,wikidataId/Q1070 -THOOTHUKKUDI,wikidataId/Q41562 -HYDERABAD_INDIA,wikidataId/Q1361 -VIJAYAWADA,wikidataId/Q200017 -TUMKUR,wikidataId/Q911310 -PEKALONGAN,wikidataId/Q10623 -BANDA_ACEH,wikidataId/Q5779 -BANDUNG,wikidataId/Q10332 -CIREBON,wikidataId/Q10368 -SURABAYA,wikidataId/Q11462 -SEMARANG,wikidataId/Q10687 -SAMARINDA,wikidataId/Q14499 -PURWAKARTA,wikidataId/Q10378 -PEMATANGSIANTAR,wikidataId/Q5979 -PEMALANG,wikidataId/Q10624 -JOMBANG,wikidataId/Q11082 -PAREPARE,wikidataId/Q14636 -PALEMBANG,wikidataId/Q8131 -MEDAN,wikidataId/Q5972 -MAKASSAR,wikidataId/Q14634 -KENDARI,wikidataId/Q15379 -JAMBI,wikidataId/Q2051 -GARUT_KOTA,wikidataId/Q833632 -BENGKULU,wikidataId/Q1890 -MASHAD,wikidataId/Q121157 -NISHABUR,wikidataId/Q131350 -PIRANSHAHR,wikidataId/Q3015863 -QOM,wikidataId/Q170573 -KHORAM_ABAD,wikidataId/Q502212 -KERMAN,wikidataId/Q171714 -KASHMAR,wikidataId/Q720403 -BANDAR_ABBAS,wikidataId/Q154814 -KARAJ, -GORGAN,wikidataId/Q188155 -ESFAHAN,wikidataId/Q42053 -DEHDASHT,wikidataId/Q888365 -BOJNURD,wikidataId/Q317946 -BABOL,wikidataId/Q605157 -ARAK,wikidataId/Q212628 -ARADABIL,wikidataId/Q12727756 -AHVAZ,wikidataId/Q170366 -VARAMIN,wikidataId/Q518985 -TEHRAN,wikidataId/Q3616 -TABRIZ,wikidataId/Q80053 -SHIRAZ,wikidataId/Q6397066 -SHAHIN_SHAHR,wikidataId/Q862105 -SARI,wikidataId/Q168843 -SANANDAJ,wikidataId/Q272093 -SALMAS,wikidataId/Q765806 -KASAN, -SHADEGAN,wikidataId/Q778162 -BAGHDAD,wikidataId/Q1530 -SWORDS_MALAHIDE,wikidataId/Q987748 -CORK,wikidataId/Q36647 -BLANCHARDSTOWN,wikidataId/Q496387 -LIMERICK,wikidataId/Q133315 -DUBLIN,wikidataId/Q1761 -TEL_AVIV,wikidataId/Q33935 -VERONA,wikidataId/Q2028 -CATANIA,wikidataId/Q1903 -CAGLIARI,wikidataId/Q3622022 -BRINDISI,wikidataId/Q13496 -BRESCIA,wikidataId/Q6221 -BOLZANO,wikidataId/Q6526 -BOLOGNA,wikidataId/Q18288145 -CERIGNOLA,wikidataId/Q19277 -COMO,wikidataId/Q1308 -BARI,wikidataId/Q18241854 -ASTI,wikidataId/Q6122 -FERRARA,wikidataId/Q13362 -FIRENZE,nuts/ITE14 -VENEZIA,wikidataId/Q641 -UDINE,wikidataId/Q2790 -TRENTO,wikidataId/Q3376 -TORINO,wikidataId/Q495 -VICENZA,wikidataId/Q6537 -PERUGIA,wikidataId/Q3437 -PRATO,wikidataId/Q13377 -POZZUOLI_BACOLI,wikidataId/Q71832 -PALERMO,wikidataId/Q20795016 -PADOVA,wikidataId/Q617 -OSTIA,wikidataId/Q11171297 -NOVARA,wikidataId/Q6046 -MODENA,wikidataId/Q279 -MILAN,wikidataId/Q490 -MESTRE,wikidataId/Q541405 -MESSINA,wikidataId/Q13666 -MATERA,wikidataId/Q13616 -GROSSETO,wikidataId/Q6716 -GENOVA,wikidataId/Q1449 -GELA,wikidataId/Q39971 -PAVIA,wikidataId/Q6259 -BERGAMO,wikidataId/Q628 -BARLETTA,wikidataId/Q13470 -ROME,wikidataId/Q18288160 -REGGIO_DI_CALABRIA,wikidataId/Q8471 -LIVORNO,wikidataId/Q6761 -LECCE,wikidataId/Q13386 -AREZZO,wikidataId/Q13378 -ANCONA,wikidataId/Q3415 -ALESSANDRIA,wikidataId/Q6088 -SIRACUSA,wikidataId/Q13670 -SASSUOLO,wikidataId/Q8598 -SASSARI,wikidataId/Q13629 -PORDENONE,wikidataId/Q6606 -PISA,wikidataId/Q13375 -PESARO,wikidataId/Q13134 -PORTMORE,wikidataId/Q644663 -KINGSTON_JAMAICA,wikidataId/Q34692 -YAMAGUCHI,wikidataId/Q207876 -TOKYO,wikidataId/Q1490 -OSAKA,wikidataId/Q122723 -OKAYAMA,wikidataId/Q200078 -FUKUOKA,wikidataId/Q26600 -TURKISTAN,wikidataId/Q848638 -TEMIRTAY,wikidataId/Q1001104 -TARAZ,wikidataId/Q486545 -QARAGANDY,wikidataId/Q484706 -SHYMKENT,wikidataId/Q485496 -SEMEY,wikidataId/Q61944 -RUDNY,wikidataId/Q1008853 -QULSARY, -QASKELEN,wikidataId/Q2121478 -PAVLODAR,wikidataId/Q486282 -OSKEMEN,wikidataId/Q162548 -ORAL,wikidataId/Q487439 -BALQASH,wikidataId/Q805515 -NUR_SULTAN,wikidataId/Q1520 -AQTAY,wikidataId/Q198989 -ALMATY,wikidataId/Q35493 -AKTOBE,wikidataId/Q477232 -TALDYQORGAN,wikidataId/Q491102 -IRBID,wikidataId/Q194165 -AMMAN,wikidataId/Q3805 -MERU,wikidataId/Q934149 -MALABA,wikidataId/Q6740728 -ELDORET, -NAIROBI,wikidataId/Q3870 -NAKURU, -NYERI, -KISUMU, -HAMHUNG,wikidataId/Q109356 -SONGNIM,wikidataId/Q505930 -SINUIJU, -RASON, -PYONGYANG, -KANGGYE,wikidataId/Q489825 -CHONGJU,wikidataId/Q213169 -CHONGJIN,wikidataId/Q109336 -CHEONAN,wikidataId/Q42146 -SEOUL,wikidataId/Q8684 -GWANGJU,wikidataId/Q41283 -JINJU,wikidataId/Q42144 -BUSAN, -KUWAIT,wikidataId/Q35178 -TOKMOK,wikidataId/Q854333 -TALAS,wikidataId/Q643955 -OSH,wikidataId/Q47282 -NARYN, -KYZYL_KYJA,wikidataId/Q2347569 -KARAKOL,wikidataId/Q194452 -KARA_BALTA, -JALAL_ABAD,wikidataId/Q487689 -BIKEK, -BALYKCHY,wikidataId/Q805876 -UZGEN,wikidataId/Q765887 -LATINAMERICA_AND_CARIBBEAN_ALL, -TRIPOLI,wikidataId/Q168954 -BEIRUT,wikidataId/Q3820 -BAALBEK, -ZAHLE,wikidataId/Q26569 -TYRE,wikidataId/Q82070 -SIDON,wikidataId/Q163490 -DAUGAVPILS,wikidataId/Q80021 -RIGA,nuts/LV006 -LIEPAJA, -VILNIUS,wikidataId/Q216 -SIAULIAI,wikidataId/Q134712 -PANEVEZYS,wikidataId/Q1719466 -KLAIPEDA,wikidataId/Q776965 -KAUNAS,wikidataId/Q4115712 -LUXEMBOURG,wikidataId/Q1842 -FIANARANTSOA,wikidataId/Q527441 -ANTSIRANANA,wikidataId/Q218928 -ANTSIRABE,wikidataId/Q583184 -MAROVOAY,wikidataId/Q1902425 -AMPARAFARAVOLA,wikidataId/Q474396 -MAHAJANGA,wikidataId/Q218753 -TAOLANARO,wikidataId/Q526568 -TOLIARA,wikidataId/Q824043 -TOAMASINA,wikidataId/Q178067 -ANTANANARIVO,wikidataId/Q3915 -MZUZU,wikidataId/Q502216 -BLANTYRE,wikidataId/Q188693 -RAWANG,wikidataId/Q2072124 -IPOH,wikidataId/Q271619 -MALE,wikidataId/Q9347 -BAMAKO, -VALLETTA,wikidataId/Q23800 -TIJUANA,wikidataId/Q124739 -VERACRUZ,wikidataId/Q173270 -VILLAHERMOSA_CENTRO_TABASCO,wikidataId/Q210886 -XALAPA_VERACRUZ,wikidataId/Q221051 -MONTERREY,wikidataId/Q81033 -TEHUACAN, -SAN_JUAN_DEL_RIO,wikidataId/Q3849473 -REYNOSA,wikidataId/Q738303 -PUERTO_VALLARTA,wikidataId/Q853258 -PUEBLA,wikidataId/Q7258412 -MEXICOCITY,wikidataId/Q1489 -LEON_MEXICO,wikidataId/Q189128 -IRAPUATO_GUANAJUATO,wikidataId/Q816845 -GUANAJUATO_GUANAJUATO,wikidataId/Q46475 -GUADALAJARA,wikidataId/Q9022 -ENSENADA,wikidataId/Q1548691 -CULIACAN_ROSALES,wikidataId/Q211760 -COMITANDEDOMINGUEZ_CHIAPAS,wikidataId/Q1961680 -CIUDAD_RIO_BRAVO,wikidataId/Q1963086 -CIUDAD_JUAREZ,wikidataId/Q26590 -CAMPECHE_CAMPECHE,wikidataId/Q61301 -TEPIC,wikidataId/Q207972 -APATZINGAN_MICHOACAN,wikidataId/Q94758 -ACAPULCO_GUERRERO,wikidataId/Q81398 -MONACO_ALL,country/MCO -ULAANBAATAR,wikidataId/Q23430 -TANGER, -TEMARA,wikidataId/Q1635606 -SEFROU,wikidataId/Q1009308 -SAFI,wikidataId/Q183405 -OULAD_TEIMA,wikidataId/Q785945 -OUJDA,wikidataId/Q193802 -MIDELT,wikidataId/Q2501809 -MARRAKESH,wikidataId/Q101625 -MAKNES,wikidataId/Q178663 -FKIH_BEN_SALAH,wikidataId/Q2895854 -FES,wikidataId/Q80985 -CASABLANCA,wikidataId/Q7903 -AZROU,wikidataId/Q794128 -SIDI_SLIMANE,wikidataId/Q2359952 -MANHICA,wikidataId/Q10323671 -NAMPULA, -NACALA, -MOCUBA,wikidataId/Q520687 -MAXIXE, -MAPUTO,wikidataId/Q3889 -GURUE, -BEIRA, -ALTO_MOLOCUE, -PEMBA, -AL_BURAYMI,wikidataId/Q2829307 -MUSCAT,wikidataId/Q3826 -SALALAH,wikidataId/Q1294439 -SOHAR,wikidataId/Q943270 -EUROPE_AND_NORTHERNAMERICA_ALL,country/USA -KATHMANDU,wikidataId/Q3037 -POKHARA,wikidataId/Q6640 -JANAKPUR,wikidataId/Q378649 -ITAHARI,wikidataId/Q1675026 -DHARAN,wikidataId/Q8254 -DAMAK,wikidataId/Q1458001 -BUTWAL,wikidataId/Q11283 -BIRGANJ,wikidataId/Q8252 -BIRENDRANAGAR,wikidataId/Q865146 -BIRATNAGAR,wikidataId/Q8247 -BHARATPUR,wikidataId/Q250220 -SOEST_BAARN,wikidataId/Q10027 -SITTARD_GELEEN,wikidataId/Q9781 -VENLO,wikidataId/Q9777 -VEENENDAAL,wikidataId/Q1840 -S_HERTOGENBOSCH,wikidataId/Q2766547 -S_GRAVENHAGE_THE_HAGUE,wikidataId/Q36600 -ROTTERDAM,wikidataId/Q34370 -ROOSENDAAL,wikidataId/Q6903267 -PURMEREND,wikidataId/Q9954 -PIJNACKER_BERKEL_EN_RODENRIJS,wikidataId/Q820839 -OSS,wikidataId/Q13971034 -NIJMEGEN, -HENGELO,wikidataId/Q10006 -HELMOND,wikidataId/Q9844 -LELYSTAD,wikidataId/Q166065 -LEIDEN,wikidataId/Q43631 -HEERLEN,wikidataId/Q9799 -HAARLEMMERMEER,wikidataId/Q9924 -HAARLEM,nuts/NL324 -GRONINGEN,wikidataId/Q749 -GOUDA,wikidataId/Q84125 -EINDHOVEN,wikidataId/Q9832 -DEVENTER,wikidataId/Q10001 -BREDA,wikidataId/Q40844 -ALPHEN_AAN_DEN_RIJN,wikidataId/Q213246 -ALMERE,wikidataId/Q83178 -ASSEN,wikidataId/Q798 -ARNHEM,wikidataId/Q1310 -ALMELO,wikidataId/Q988 -ALKMAAR,wikidataId/Q972 -ZOETERMEER,wikidataId/Q26432 -ZWOLLE,wikidataId/Q793 -EDE,wikidataId/Q26555 -ENSCHEDE,wikidataId/Q10002 -NIEUWEGEIN_IJSSELSTEIN,wikidataId/Q10050 -MAASTRICHT,wikidataId/Q1309 -BEVERWIJK_HEEMSKERK,wikidataId/Q9905 -BERGEN_OP_ZOOM,wikidataId/Q192508 -DORDRECHT,wikidataId/Q26421 -VLEUTEN_DE_MEERN,wikidataId/Q1329720 -UTRECHT,nuts/NL310 -TILBURG,wikidataId/Q9871 -SPIJKENISSE,wikidataId/Q488545 -LEEUWARDEN,wikidataId/Q25390 -HOORN,wikidataId/Q9938 -HILVERSUM,wikidataId/Q9934 -APELDOORN,wikidataId/Q101918 -AMSTERDAM, -AMERSFOORT,wikidataId/Q992 -NAPIER,wikidataId/Q203380 -AUCKLAND,wikidataId/Q37100 -WELLINGTON,wikidataId/Q23661 -TAURANGA,wikidataId/Q207756 -PALMERSTON,wikidataId/Q909220 -LOWER_HUTT,wikidataId/Q1015681 -HAMILTON_NEWZEALAND,wikidataId/Q200028 -DUNEDIN,wikidataId/Q133073 -CHRISTCHURCH,wikidataId/Q79990 -TIPITAPA,wikidataId/Q1365146 -MANAGUA,wikidataId/Q3274 -LEON_NICARAGUA,wikidataId/Q208151 -GRANADA_NICARAGUA,wikidataId/Q205648 -CIUDAD_SANDINO,wikidataId/Q2720849 -CHINANDEGA,wikidataId/Q974354 -MASAYA,wikidataId/Q1001914 -ZINDER,wikidataId/Q204365 -NIAMEY,wikidataId/Q3674 -MARADI,wikidataId/Q664550 -DOSSO,wikidataId/Q985620 -AGADEZ, -OYO,wikidataId/Q1023703 -IBADAN,wikidataId/Q183298 -GOMBE,wikidataId/Q591598 -LAGOS, -OSLO,nuts/NO011 -LORENSKOG_LILLESTROM,wikidataId/Q60806432 -STAVANGER,wikidataId/Q25416 -BERGEN,wikidataId/Q26793 -GARAPAN,wikidataId/Q3272415 -LAYYAH,wikidataId/Q631087 -LAHORE_PAKISTAN,wikidataId/Q3308170 -KOHAT,wikidataId/Q1195983 -KHANPUR,wikidataId/Q1250097 -KARACHI,wikidataId/Q8660 -DASKA,wikidataId/Q2374990 -ISLAMABAD,wikidataId/Q1362 -HYDERABAD_PAKISTAN,wikidataId/Q1640079 -HAFIZABAD,wikidataId/Q1253663 -GUJRANWALA,wikidataId/Q243322 -FAISALABAD,wikidataId/Q173985 -CHISHTIAN,wikidataId/Q1250229 -BHAKKAR,wikidataId/Q2428259 -ATTOCK,wikidataId/Q1973073 -LARKANA,wikidataId/Q696605 -TURBAT,wikidataId/Q2977093 -SIALKOT,wikidataId/Q643883 -SHIKARPUR,wikidataId/Q1250069 -SHEIKHUPURA,wikidataId/Q972756 -SARGODHA,wikidataId/Q855997 -QUETTA,wikidataId/Q185458 -PESHAWAR,wikidataId/Q1113311 -MULTAN,wikidataId/Q185453 -MINGAWARA, -MARDAN,wikidataId/Q993859 -JHANG,wikidataId/Q1026616 -NAWABSHAH,wikidataId/Q1017637 -LA_CHORRERA, -PANAMACITY,wikidataId/Q3306 -ARRAIJAN,wikidataId/Q1841186 -PORT_MORESBY,wikidataId/Q36526 -KIMBE,wikidataId/Q59718 -WEWAK,wikidataId/Q1017763 -LAE,wikidataId/Q612250 -CIUDAD_DEL_ESTE,wikidataId/Q192235 -ASUNCION,wikidataId/Q2933 -TRUJILLO,wikidataId/Q214173 -PIURA,wikidataId/Q208183 -MOQUEGUA,wikidataId/Q1000346 -LIMA,wikidataId/Q211795 -JULIACA,wikidataId/Q696074 -CHICLAYO,wikidataId/Q260911 -IQUITOS,wikidataId/Q193289 -HUARAL,wikidataId/Q723677 -HUANCAYO,wikidataId/Q468782 -HUACHO,wikidataId/Q1002052 -CUSCO,wikidataId/Q205057 -CAJAMARCA,wikidataId/Q205078 -AYACUCHO,wikidataId/Q205112 -AREQUIPA,wikidataId/Q159273 -JAEN,wikidataId/Q1229885 -MANILA,wikidataId/Q13580 -CEBU,wikidataId/Q1467 -BACOLOD,wikidataId/Q5217 -LOMZA,wikidataId/Q215633 -LEGNICA,wikidataId/Q106274 -LEGIONOWO,wikidataId/Q730944 -KRAKOW,nuts/PL213 -KOSZALIN,wikidataId/Q62868 -GRUDZIADZ,wikidataId/Q123511 -GORZOW_WIELKOPOLSKI,wikidataId/Q104731 -KASZTELANKA, -KALISZ,wikidataId/Q52842 -GORNOSLASKI_ZWIAZEK_METROPOLITALNY, -LODZ,wikidataId/Q580 -GDANSK,wikidataId/Q1792 -ELK,wikidataId/Q61480 -ELBLAG,wikidataId/Q104712 -CZESTOCHOWA,wikidataId/Q103217 -POZNAN,wikidataId/Q268 -PLOCK,wikidataId/Q104725 -GDYNIA,wikidataId/Q385 -GLIWICE,wikidataId/Q105084 -PIOTRKOW_TRYBUNALSKI,wikidataId/Q158004 -PILA,wikidataId/Q556200 -PABIANICE,wikidataId/Q158589 -OSTROWIEC_SWIETOKRZYSKI,wikidataId/Q730929 -OSTROW_WIELKOPOLSKI,wikidataId/Q52895 -OPOLE,wikidataId/Q92212 -SZCZECIN,wikidataId/Q393 -STALOWA_WOLA,wikidataId/Q751140 -RYBNIK,wikidataId/Q107094 -RADOM,wikidataId/Q104740 -RZESZOW,wikidataId/Q598 -STARGARD_SZCZECINSKI,wikidataId/Q106268 -TYCHY,wikidataId/Q11977 -TCZEW,wikidataId/Q474697 -SUWALKI,wikidataId/Q236083 -TORUN,wikidataId/Q47554 -TOMASZOW_MAZOWIECKI,wikidataId/Q157994 -WALBRZYCH,wikidataId/Q110732 -SLUPSK,wikidataId/Q105048 -ZIELONA_GORA,wikidataId/Q104720 -ZGIERZ,wikidataId/Q104407 -ZAMOSC,wikidataId/Q145972 -WIDNICA,wikidataId/Q687963 -WARSAW,nuts/PL127 -KIELCE,wikidataId/Q102317 -KATOWICE,wikidataId/Q588 -BIALYSTOK,wikidataId/Q761 -BELCHATOW,wikidataId/Q157992 -MIELEC,wikidataId/Q658861 -LUBLIN,wikidataId/Q37333 -WROCLAW,nuts/PL514 -WLOCLAWEK,wikidataId/Q106681 -JELENIA_GORA,wikidataId/Q147934 -JASTRZEBIE_ZDROJ,wikidataId/Q107106 -INOWROCLAW,wikidataId/Q378821 -CHELM,wikidataId/Q234184 -BYDGOSZCZ,wikidataId/Q41252 -BIELSKO_BIALA,wikidataId/Q106583 -OLSZTYN,wikidataId/Q82765 -NOWY_SACZ,wikidataId/Q802 -MYSLOWICE,wikidataId/Q161595 -GLOGOW,wikidataId/Q66013 -GNIEZNO,wikidataId/Q51432 -PRZEMYSL,wikidataId/Q208473 -BARREIRO,wikidataId/Q217394 -AMORA,wikidataId/Q474039 -COIMBRA, -BRAGA, -ALMADA, -SETUBAL, -FUNCHAL, -AL_DAAYEN, -AL_KHOR_ALTHAKIRA, -ALKHOR,wikidataId/Q311717 -MESAIEED,wikidataId/Q1069900 -UMM_SLAL,wikidataId/Q990414 -QATAR_ALL,wikidataId/Q935886 -DOHA,wikidataId/Q3861 -AL_SHEEHANIYA, -AL_SHAMAL,wikidataId/Q22948561 -AL_RAYYAN,wikidataId/Q311272 -AL_WAKRA,wikidataId/Q310893 -AL_SHAHANIYA,wikidataId/Q12188815 -ARAD,wikidataId/Q173591 -BUCHAREST,wikidataId/Q19660 -CONSTANTA,wikidataId/Q79808 -FALTICENI,wikidataId/Q303015 -IASI,wikidataId/Q46852 -CLUJ_NAPOCA,wikidataId/Q100188 -CRAIOVA,wikidataId/Q168057 -CAMPIATURZII,wikidataId/Q458422 -BRASOV,wikidataId/Q82174 -NAVODARI,wikidataId/Q837960 -TARGU_JIU,wikidataId/Q202357 -MANGALIA, -SIBIU,wikidataId/Q83324 -TIMISOARA,wikidataId/Q83404 -REGHIN,wikidataId/Q572478 -ASTRAHAN,wikidataId/Q3927 -BEREZNIKI,wikidataId/Q105002 -DZERZINSK,wikidataId/Q76493 -MOSCOW,wikidataId/Q649 -SAINT_PETERSBURG,wikidataId/Q4407742 -TYUMEN,wikidataId/Q5815 -BUTARE,wikidataId/Q509739 -RUHENGERI,wikidataId/Q246145 -KIGALI,wikidataId/Q3859 -NYANZA,wikidataId/Q528679 -GITARAMA, -CYANGUGU,wikidataId/Q782857 -KAYONZA, -GISENYI,wikidataId/Q830631 -ARAR,wikidataId/Q626199 -AL_KHAFJI,wikidataId/Q1771721 -AL_MADINAH,wikidataId/Q35484 -RAFHA,wikidataId/Q27219 -MAKKAH,wikidataId/Q5806 -AR_RASS,wikidataId/Q1878991 -RIYADH,wikidataId/Q3692 -TABUK,wikidataId/Q244232 -TAIF,wikidataId/Q182640 -THIES,wikidataId/Q600693 -SAINT_LOUIS,wikidataId/Q178872 -DIOURBEL,wikidataId/Q910973 -DAKAR,wikidataId/Q3718 -KAOLACK,wikidataId/Q2082914 -MBOUR,wikidataId/Q1019431 -TOUBA,wikidataId/Q657072 -LOUGA,wikidataId/Q738061 -ZIGUINCHOR,wikidataId/Q202776 -BOKHTAR-SERBIA, -KIKINDA,wikidataId/Q309355 -ZRENJANIN,wikidataId/Q201125 -POZAREVAC,wikidataId/Q199942 -NOVI_SAD,wikidataId/Q55630 -NOVI_PAZAR,wikidataId/Q202453 -NIS,wikidataId/Q129259 -KRAGUJEVAC,wikidataId/Q167394 -BELGRADE,wikidataId/Q3711 -BORCA,wikidataId/Q712322 -SINGAPORE,country/SGP -NITRA,wikidataId/Q26397 -TRNAVA,wikidataId/Q26175 -ZILINA,wikidataId/Q25797 -BRATISLAVA,wikidataId/Q1780 -MARTIN,wikidataId/Q27001 -BANSKA_BYSTRICA,wikidataId/Q144983 -KOSICE,wikidataId/Q25409 -VINH_LONG,wikidataId/Q34789 -HO_CHI_MINH_CITY,wikidataId/Q1854 -LJUBLJANA,wikidataId/Q437 -JOHANNESBURG,wikidataId/Q2346838 -PORT_ELIZABETH,wikidataId/Q125434 -PAMPLONA,wikidataId/Q10282 -ALBACETE,wikidataId/Q15095 -ALICANTE,wikidataId/Q11959 -ALCOI,wikidataId/Q494777 -BARCELONA_SPAIN,wikidataId/Q1492 -AVILES,wikidataId/Q14649 -ALMERIA,wikidataId/Q10400 -LA_CORUNA,wikidataId/Q8757 -BENIDORM,wikidataId/Q487981 -MALAGA, -MADRID,wikidataId/Q2807 -IGUALADA,wikidataId/Q15950 -MURCIA,wikidataId/Q12225 -ZARAGOZA,wikidataId/Q10305 -TALAVERA_DE_LA_REINA,wikidataId/Q181359 -SEVILLA,wikidataId/Q8717 -SANTIAGO_DE_COMPOSTELA,wikidataId/Q14314 -SANTANDER,wikidataId/Q12233 -SANTA_MONICA, -VIGO,wikidataId/Q8745 -PARLA,wikidataId/Q824651 -PUERTO_DE_LA_CRUZ,wikidataId/Q623549 -TARRAGONA,wikidataId/Q15088 -VILANOVA_I_LA_GELTRU,wikidataId/Q15553 -VILADECANS,wikidataId/Q15652 -VALENCIA_SPAIN,wikidataId/Q8818 -VALDEMORO, -GIJON,wikidataId/Q12273 -FERROL,wikidataId/Q485329 -GRANADA_SPAIN,wikidataId/Q8810 -PALMA,wikidataId/Q8826 -CARTAGENA_SPAIN,wikidataId/Q162615 -CADIZ,wikidataId/Q15682 -PALENCIA,wikidataId/Q8378 -OVIEDO,wikidataId/Q14317 -TOLEDO_SPAIN,wikidataId/Q5836 -VALLADOLID,wikidataId/Q8356 -TORREVIEJA,wikidataId/Q221749 -TORREMOLINOS,wikidataId/Q492737 -SANT_BOI_DE_LLOBREGAT,wikidataId/Q15635 -LOGRONO,wikidataId/Q14325 -IRUN,wikidataId/Q200201 -SABADELL,wikidataId/Q12258 -REUS,wikidataId/Q487096 -COSLADA,wikidataId/Q164197 -CASTELLDEFELS,wikidataId/Q15597 -CACERES,wikidataId/Q15678 -BURGOS,wikidataId/Q9580 -BILBAO,wikidataId/Q8692 -MOLLET_DEL_VALLES,wikidataId/Q23994797 -MELILLA,nuts/ES64 -MANRESA,wikidataId/Q16697 -SANTA_CRUZ_DE_TENERIFE,wikidataId/Q14328 -SAN_SEBASTIAN,geoId/7277500 -SALAMANCA,wikidataId/Q15695 -TERRASSA,wikidataId/Q13939 -ZAMORA,wikidataId/Q15696 -VITORIA_GASTEIZ,wikidataId/Q14318 -ELCHE,wikidataId/Q10509 -EL_PRAT_DE_LLOBREGAT,wikidataId/Q15619 -EL_MASNOU,wikidataId/Q12039 -KASSALA,wikidataId/Q686813 -WAD_MADANI,wikidataId/Q852528 -SINJAH,wikidataId/Q130364 -BUR_SUDAN,wikidataId/Q208718 -ATBARA,wikidataId/Q753906 -AL_QADARIF,wikidataId/Q311199 -KHARTOUM,wikidataId/Q1963 -SANNAR,wikidataId/Q611867 -NORTHERN_AFRICA_AND_WESTERN_ASIA_ALL,wikidataId/Q24899010 -UMEA,wikidataId/Q25579 -TABY, -SODERTALJE, -HELSINGBORG,wikidataId/Q25411 -OREBRO,wikidataId/Q25732 -BORAS, -NORRKOPING, -HANINGE,wikidataId/Q113692 -GOTEBORG,wikidataId/Q25287 -STOCKHOLM,wikidataId/Q1754 -VASTERAS,wikidataId/Q25412 -UPPSALA,wikidataId/Q25286 -MALMO,wikidataId/Q2211 -LUND, -LINKOPING,wikidataId/Q25413 -EASTERN_AND_SOUTHERN_ASIA_ALL, -LUZERN,wikidataId/Q4191 -ZURICH,wikidataId/Q72 -WINTERTHUR,wikidataId/Q9125 -WETZIKON,wikidataId/Q68305 -THUN,wikidataId/Q68978 -ST_GALLEN,wikidataId/Q25607 -NEUCHATEL,wikidataId/Q69345 -BASEL,wikidataId/Q78 -EMMEN_LUCERNE,wikidataId/Q4225 -LAUSANNE,wikidataId/Q807 -GENEVA,wikidataId/Q71 -FRIBOURG,wikidataId/Q36378 -BERNE,wikidataId/Q70 -BIEL,wikidataId/Q1034 -LUGANO,wikidataId/Q7024 -PANJAKENT,wikidataId/Q630805 -KONIBODOM,wikidataId/Q1015752 -CHKALOVSK_BUSTON,wikidataId/Q2997217 -ISFARA,wikidataId/Q695330 -DUSHANBE,wikidataId/Q9365 -KHUJAND,wikidataId/Q373808 -KHOROG,wikidataId/Q467501 -ISTARAVSHAN,wikidataId/Q936510 -NURAK,wikidataId/Q1018201 -BOKHTAR,wikidataId/Q648567 -VAHDAT,wikidataId/Q1018025 -TURSUNZODA,wikidataId/Q648709 -KULOB,wikidataId/Q132043 -CHIANG_MAI,wikidataId/Q233588 -CHA_AM,wikidataId/Q927810 -BANGKOK,wikidataId/Q1861 -KHON_KAEN,wikidataId/Q327533 -CHUMPHON,wikidataId/Q244695 -CHIANG_RAI,wikidataId/Q236419 -PHATTHALUNG,wikidataId/Q179539 -AL_AIN,wikidataId/Q234600 -DUBAI, -ABU_DHABI, -AL_QAYRAWAN,wikidataId/Q179570 -TUNIS,wikidataId/Q3572 -TOZEUR,wikidataId/Q504661 -SUSAH, -SFAX,wikidataId/Q46325 -QABIS,wikidataId/Q215661 -MONASTIR,wikidataId/Q208715 -BANZART,wikidataId/Q189546 -ANTALYA,wikidataId/Q6487 -ADIYAMAN,wikidataId/Q168197 -KAYSERI,wikidataId/Q48338 -BALIKESIR,wikidataId/Q199723 -GAZIANTEP,wikidataId/Q93338 -DENIZLI,wikidataId/Q170967 -CORUM,wikidataId/Q206900 -KONYA,wikidataId/Q79857 -IZMIR,wikidataId/Q35997 -ISTANBUL, -CERKEZKOY,wikidataId/Q272800 -CARASAMBA, -BURSA,wikidataId/Q40738 -VIRANSEHIR,wikidataId/Q497731 -UZUNKOPRU,wikidataId/Q775225 -TARSUS,wikidataId/Q134287 -SIVAS,wikidataId/Q107401 -SANLIURFA,wikidataId/Q133118 -SAMSUN,wikidataId/Q160450 -MALATYA,wikidataId/Q165995 -ANKARA,wikidataId/Q3640 -AGRI,nuts/TRA21 -ELBISTAN, -ADAPAZARI,wikidataId/Q175323 -NIGDE,wikidataId/Q194146 -TURKMENBASY,wikidataId/Q488987 -TURKMENABAT,wikidataId/Q487684 -MARY,wikidataId/Q5713 -DASOGUZ,wikidataId/Q487672 -BAYRAMALY_BAJRAM_ALI, -BALKANABAT,wikidataId/Q199762 -ASHGABAT,wikidataId/Q23438 -GYZYLARBAT,wikidataId/Q1015618 -TEJEN,wikidataId/Q1983850 -MBARARA,wikidataId/Q731039 -MBALE,wikidataId/Q1015727 -MASAKA,wikidataId/Q1032233 -KASESE,wikidataId/Q1232016 -KAMPALA,wikidataId/Q3894 -JINJA,wikidataId/Q501709 -GULU,wikidataId/Q581379 -LIRA,wikidataId/Q1015699 -ROVNO,wikidataId/Q156739 -NIKOLAEV,wikidataId/Q41572 -DIYARB_NAJM, -CAIRO,wikidataId/Q85 -ALEXANDRIA,wikidataId/Q87 -AL_ZAQAZIQ,wikidataId/Q140304 -AL_QHURDAQAH, -AL_MANSHAH, -PORT_SAID,wikidataId/Q134509 -ASYUT,wikidataId/Q29962 -SEFTON,wikidataId/Q991747 -SCUNTHORPE,wikidataId/Q913386 -ABERDEEN,wikidataId/Q36405 -BLANTYRE_HAMILTON,wikidataId/Q881708 -BLACKPOOL,wikidataId/Q170377 -BLACKBURN,wikidataId/Q188313 -BARNSLEY,wikidataId/Q1857382 -BOURNEMOUTH,wikidataId/Q170478 -AYLESBURY,wikidataId/Q213474 -ASHFORD,wikidataId/Q725261 -ALDWICK_FELPHAM,wikidataId/Q2227185 -BANGOR,wikidataId/Q234178 -CHESTER,wikidataId/Q170263 -CHELTENHAM,wikidataId/Q206988 -BELFAST,wikidataId/Q10686 -BEDFORD,nuts/UKH24 -BATH,wikidataId/Q22889 -BASINGSTOKE_AND_DEANE,wikidataId/Q810185 -CREWE,wikidataId/Q648810 -DACORUM,wikidataId/Q931180 -HARLOW,wikidataId/Q852729 -HALTON, -HALIFAX_UK,wikidataId/Q826561 -GUILDFORD,wikidataId/Q213465 -HUDDERSFIELD,wikidataId/Q201812 -HARROGATE,wikidataId/Q215829 -HARTLEPOOL,wikidataId/Q215752 -HASTINGS,wikidataId/Q29245 -CHESTERFIELD,wikidataId/Q823600 -COLCHESTER,wikidataId/Q184163 -COATBRIDGE,wikidataId/Q1018723 -CHELMSFORD,wikidataId/Q210985 -BRACKNELL,wikidataId/Q783210 -CANNOCK_HEDNESFORD, -CORBY,wikidataId/Q994925 -BURNLEY,wikidataId/Q209096 -BRISTOL,nuts/UKK11 -BRIGHTON,wikidataId/Q131491 -CAMBRIDGE,wikidataId/Q350 -BURTON_ON_TRENT,wikidataId/Q1000597 -CARLISLE,wikidataId/Q192896 -CARDIFF,wikidataId/Q24342199 -WARRINGTON,wikidataId/Q894095 -LIVERPOOL,nuts/UKD72 -LIVINGSTON,wikidataId/Q848287 -TAMWORTH,wikidataId/Q704864 -TYNESIDE_CONURBATION,wikidataId/Q1120443 -WASHINGTON,wikidataId/Q1018448 -WARWICK,wikidataId/Q549761 -TELFORD,wikidataId/Q576938 -TAUNTON,wikidataId/Q845619 -WAKEFIELD,nuts/UKE45 -WORCESTER,wikidataId/Q1646181 -WOKING,wikidataId/Q646225 -WIGAN_SKELMERSDALE, -TORBAY,wikidataId/Q209055 -THURROCK,nuts/UKH32 -THATCHAM,wikidataId/Q1026304 -NORWICH,wikidataId/Q130191 -NORTHAMPTON,wikidataId/Q192240 -TUNBRIDGE_WELLS,wikidataId/Q665489 -WYCOMBE,wikidataId/Q548974 -SWINDON,wikidataId/Q894093 -SWANSEA,nuts/UKL18 -NEWPORT,wikidataId/Q11294004 -NEWCASTLE_UNDER_LYME,wikidataId/Q868642 -YORK,wikidataId/Q20986421 -LUTON,nuts/UKH21 -LOUGHBOROUGH,wikidataId/Q537323 -LONDON_UK,nuts/UKI -PLYMOUTH,wikidataId/Q21674890 -MAIDSTONE,wikidataId/Q213180 -MAIDENHEAD,wikidataId/Q1368496 -OXFORD,wikidataId/Q34217 -NUNEATON,wikidataId/Q175632 -MANSFIELD,wikidataId/Q841979 -MANCHESTER_UK,nuts/UKD33 -MEDWAY,nuts/UKJ41 -MILTON_KEYNES,wikidataId/Q894090 -PORTSMOUTH,wikidataId/Q21683233 -SLOUGH,wikidataId/Q211907 -RUSTINGTON,wikidataId/Q846403 -REDHILL_REIGATE,wikidataId/Q1851095 -ROTHERHAM,wikidataId/Q1878732 -SUTTON_IN_ASHFIELD,wikidataId/Q2119187 -STOCKTON_ON_TEES,wikidataId/Q894094 -READING,wikidataId/Q161491 -PRESTON,wikidataId/Q184090 -SHEFFIELD,nuts/UKE32 -SHREWSBURY,wikidataId/Q201970 -STEVENAGE,wikidataId/Q19795 -STAFFORD,wikidataId/Q826782 -ST_ALBANS,wikidataId/Q6226 -SOUTHEND_ON_SEA,nuts/UKH31 -SOUTHAMPTON,nuts/UKJ32 -DERRY,wikidataId/Q163584 -DERBY,nuts/UKF11 -EXETER,wikidataId/Q134672 -GLASGOW,nuts/UKM34 -ELLESMERE_PORT,wikidataId/Q1011600 -GREAT_YARMOUTH,wikidataId/Q237253 -GRAVESHAM,wikidataId/Q1459781 -FALKIRK,wikidataId/Q623687 -FRIMLEY_FARNBOROUGH,wikidataId/Q1424548 -GLOUCESTER,wikidataId/Q170497 -DUNDEE,wikidataId/Q123709 -DONCASTER,wikidataId/Q1925846 -EASTBOURNE,wikidataId/Q208262 -EAST_KILBRIDE,wikidataId/Q654226 -EDINBURGH,nuts/UKM25 -KETTERING,wikidataId/Q984613 -KEIGHLEY,wikidataId/Q990168 -GRIMSBY_CLEETHORPES,wikidataId/Q587765 -IPSWICH,wikidataId/Q184775 -KINGSTON_UPON_HULL,nuts/UKE11 -KIDDERMINSTER,wikidataId/Q844062 -LEICESTER,nuts/UKF21 -LEEDS_BRADFORD, -HYNDBURN,wikidataId/Q1640713 -LINCOLN,wikidataId/Q180057 -BIRMINGHAM,nuts/UKG31 -AYR_PRESTWICK, -BASILDON,wikidataId/Q216649 -HEREFORD,wikidataId/Q204720 -CRAWLEY,wikidataId/Q844908 -COVENTRY,wikidataId/Q20986417 -WESTON_SUPER_MARE,wikidataId/Q845623 -WAVENEY,wikidataId/Q642424 -THANET,wikidataId/Q1752642 -WORTHING,wikidataId/Q671348 -MOTHERWELL_WISHAW,wikidataId/Q737547 -PETERBOROUGH_UK,wikidataId/Q12956645 -RUGBY,wikidataId/Q623765 -REDDITCH,wikidataId/Q865716 -DARLINGTON,wikidataId/Q213181 -GREATER_MANCHESTER,nuts/UKD3 -NOTTINGHAM,nuts/UKF14 -EASTLEIGH,wikidataId/Q731069 -ARUSHA,wikidataId/Q4301 -MODESTO,geoId/0648354 -MONTGOMERY,geoId/0151000 -NEWYORK,geoId/3651000 -CHICAGO,geoId/1714000 -GAINESVILLE_FL,geoId/1225175 -KILLEEN_TX,geoId/4839148 -MANCHESTER_US,geoId/3345140 -MINNEAPOLIS,geoId/2743000 -RALEIGH,geoId/3755000 -PORTLAND,geoId/4159000 -PHILADELPHIA,geoId/4260000 -TOLEDO_US,geoId/3977000 -TALLAHASEE,geoId/1270600 -SPRINGFIELD,geoId/2970000 -SAVANNAH,geoId/1369000 -VISALIA,geoId/0682954 -WACO,geoId/4876000 -CLOVIS,geoId/0614218 -OUAGADOUGOU,wikidataId/Q3777 -MELO,wikidataId/Q738250 -MERCEDES,wikidataId/Q844914 -MONTEVIDEO,wikidataId/Q1335 -RIVERA,wikidataId/Q646498 -PAYSANDU,wikidataId/Q1020814 -MALDONALDO,wikidataId/Q16258 -LAS_PIEDRAS,wikidataId/Q615470 -SALTO, -TACUAREMBO,wikidataId/Q833016 -BESHARYK,wikidataId/Q891824 -NAVOIY,wikidataId/Q763015 -ANDIJAN,wikidataId/Q487656 -URGANCH,wikidataId/Q472925 -TURTKUL,wikidataId/Q1027623 -TERMIZ,wikidataId/Q491879 -QARSHI,wikidataId/Q644008 -TASHKENT,wikidataId/Q269 -NUKUS,wikidataId/Q489898 -BEKOBOD,wikidataId/Q815053 -NAMANGAN,wikidataId/Q492552 -BUKHARA,wikidataId/Q5764 -DENAU,wikidataId/Q1189758 -KOKAND,wikidataId/Q489890 -YARITAGUA,wikidataId/Q1023493 -MARIARA,wikidataId/Q1816684 -CIUDAD_GUAYANA,wikidataId/Q507080 -CIUDAD_OJEDA,wikidataId/Q953490 -CABUDARE,wikidataId/Q580968 -SAN_CRISTOBAL, -CABIMAS,wikidataId/Q723588 -SAN_JUAN_DE_LOS_MORROS,wikidataId/Q999117 -CUMANA, -GUARENAS_AND_GUATIRE,wikidataId/Q1026987 -LOS_TEQUES,wikidataId/Q695631 -CORO,wikidataId/Q1134454 -CARACAS,wikidataId/Q1533 -MARACAIBO,wikidataId/Q10324879 -MARACAY,wikidataId/Q333928 -CUA,wikidataId/Q770309 -BARCELONA_VENEZUELA, -ACARIGUA,wikidataId/Q338059 -ALTAGRACIA_DE_ORITUCO,wikidataId/Q433758 -VALERA,wikidataId/Q1010367 -VALENCIA_VENEZUELA,wikidataId/Q54880 -BARQUISIMETO,wikidataId/Q4709 -TUCUPITA,wikidataId/Q179007 -BARINAS,wikidataId/Q695623 -CIUDAD_BOLIVAR,wikidataId/Q26906 -SACARLOS_DEL_ZULIA,wikidataId/Q1005022 -APIA,wikidataId/Q36260 -ADAN,wikidataId/Q131694 -YARIM,wikidataId/Q568905 -DHAMAR,wikidataId/Q955523 -AMRAN,wikidataId/Q275720 -AL_HUDAYDAH,wikidataId/Q270041 -RADA_A,wikidataId/Q2125362 -TAIZZ,wikidataId/Q466216 -SANAA,wikidataId/Q2471 -TARIM,wikidataId/Q1014010 -NDOLA,wikidataId/Q219671 -TIRANA,wikidataId/Q19689 -ANDORRA_ALL,country/AND -WINDHOEK, -ANTIGUA, -BAHRAIN_ALL,country/BHR -ARMENIA_ALL,country/ARM -BERMUDA,country/BMU -LAPAZ,wikidataId/Q1491 -BOSNIA_AND_HERZEGOVINA_ALL,country/BIH -GABORONE,wikidataId/Q3919 -CIGRES, -BELMOPAN,wikidataId/Q3043 -BELARUS_ALL,country/BLR -YAOUNDE,wikidataId/Q3808 -PRAIA,wikidataId/Q3751 -DEHIWALA-MOUNTLAVINIA, -MORATUWA,wikidataId/Q867213 -SANTIAGO_PROVIDENCIA,wikidataId/Q51587 -LOSLAGOS_PUERTOMONTT,wikidataId/Q36214 -COQUIMBO_COQUIMBO,wikidataId/Q3871 -TARAPACA_ARICA, -CANET, -KUNMING,wikidataId/Q182852 -SUZHOU,wikidataId/Q360643 -LANZHOU,wikidataId/Q183584 -BELLO,wikidataId/Q816024 -BUENAVENTURA,wikidataId/Q996581 -PEREIRA,wikidataId/Q51111 -ITAGUI,wikidataId/Q1527934 -DOSQUEBRADAS,wikidataId/Q1093584 -CUCUTA,wikidataId/Q216847 -CIEGODEAVILA,wikidataId/Q115465 -COTONOU,wikidataId/Q43595 -COPENHAGEN,wikidataId/Q1748 -SANTADOMINGO,wikidataId/Q34820 -LALIBERTAD_NUEVASANSALVADOR, -SANSALVADOR_SOYAPANGO,wikidataId/Q956031 -SANSALVADOR_MEJICANOS,wikidataId/Q723411 -SANSALVADOR_ILOPANGO,wikidataId/Q1659066 -SANSALVADOR_APOPA,wikidataId/Q619646 -HELSINKI,wikidataId/Q1757 -KUTAISI,wikidataId/Q172415 -BATUMI,wikidataId/Q25475 -TBILISI,wikidataId/Q994 -ATHENSGAA,wikidataId/Q1524 -CONAKRY,wikidataId/Q3733 -GEORGETOWN,wikidataId/Q10717 -LESCAYES,wikidataId/Q984786 -PETIONVILLE,wikidataId/Q1001440 -CHINA_HONGKONG_SAR_ALL,country/HKG -DELHI,wikidataId/Q987 -BANGALORE,wikidataId/Q1355 -WARANGAL,wikidataId/Q28169759 -JAKARTA,wikidataId/Q3630 -ABIDJAN,wikidataId/Q19830972 -KOSOVO_ALL,wikidataId/Q786124 -BISHEKEK,wikidataId/Q9361 -MASERU,wikidataId/Q3909 -MONROVIA,wikidataId/Q3748 -CHINA_MACAO_SAR_ALL,country/MAC -MALDIVES,country/MDV -NOUAKCHOTT,wikidataId/Q3688 -CUREPIPE,wikidataId/Q1002525 -PORTLOUIS,wikidataId/Q3929 -CHETUMAL_OTHONP_BLANCO_QUINTANAROO,wikidataId/Q459553 -CHILPANCINGO_GUERRERO,wikidataId/Q207935 -CUAUTLA_MORELOS,wikidataId/Q939959 -CUAUHTEMOC_CHIHUAHUA,wikidataId/Q645293 -CORDOBA_VERACRUZ,wikidataId/Q989492 -COLIMA,wikidataId/Q61309 -COATZACOALCOS_VERACRUZ,wikidataId/Q502023 -CELAYA_GUANAJUATO,wikidataId/Q580649 -CARMEN_CAMPECHE,wikidataId/Q991435 -IGUALA_GUERRERO,wikidataId/Q607167 -ZACATECAS_ZACATECAS,wikidataId/Q139242 -VICTORIA_TAMAULIPAS,wikidataId/Q331661 -ZAMORA_MICHOACAN,wikidataId/Q145865 -ZAPOPAN_JALISCO,wikidataId/Q147402 -ZITACUARO_MICHOACAN,wikidataId/Q1962272 -HIDALGO_MICHOACAN, -HERMOSILLO_SONORA,wikidataId/Q189138 -GUAYMAS_SONORA,wikidataId/Q985521 -GUASAVE_SINALOA,wikidataId/Q588515 -CARDENAST_TABASCO,wikidataId/Q5795966 -CANCUN,wikidataId/Q8969 -ALTAMIRA_TAMAULIPAS,wikidataId/Q434225 -AGUASCALIENTES,wikidataId/Q79952 -CHALCOMEX,wikidataId/Q1962104 -ACUNACOAHUILA,wikidataId/Q179841 -HIDALGODELPARRAL_CHIHUAHUA,wikidataId/Q771214 -GUADALUPE_ZACATECAS,wikidataId/Q1961298 -GUADALUPE_NUEVOLEON,wikidataId/Q938835 -HUIXQUILUCAN_MEXICO,wikidataId/Q20146689 -CHIHUAHUA_CHIHUAHUA,wikidataId/Q61302 -CIUDADOBREGON,wikidataId/Q681340 -CORREGIDORA,wikidataId/Q1962054 -MONACO_ALL,country/MCO -MONTENEGRO_ALL,country/MNE -GHORAI,wikidataId/Q1447290 -AMSTERDAM-UTRECHT,wikidataId/Q727 -MARSHALLISLANDS,country/MHL -LIMA_CARABAYLLO, -UCAYALI_CALLERIA,wikidataId/Q2486015 -LIMA_PUENTEPIEDRA,wikidataId/Q3303771 -LIMA_LURIGANCHO,wikidataId/Q2718534 -JUNIN_ELTAMBO,wikidataId/Q5352223 -LIMA_LOSOLIVOS,wikidataId/Q2100996 -LIMA_CHORRILLOS,wikidataId/Q2321093 -LIMA_SANJUANDELURIGANCHO,wikidataId/Q2501691 -LIMA_COMAS,wikidataId/Q1113426 -LIMA_ATE,wikidataId/Q3769630 -CALLAO_CALLAOCERCADO, -LIMA_SANBORJA,wikidataId/Q2566267 -LIMA_VILLAMARIADELTRIUNFO,wikidataId/Q2038962 -PIURA_CASTILLA,wikidataId/Q3826215 -LAHORE_PHILIPPINES, -LISBON,wikidataId/Q597 -ANGUILLA,geoId/2801500 -CASTRIES,wikidataId/Q41699 -CAPETOWN,wikidataId/Q1185115 -DURBAN,wikidataId/Q5468 -STEVE_TSHWETE,wikidataId/Q2292410 -HARARE,wikidataId/Q3921 -DAMASCUS,wikidataId/Q3766 -LOME,wikidataId/Q3792 -SOUSSE, -NORTH_MACEDONIA_ALL, -MOSHI,wikidataId/Q271733 -DARESSALAAM,wikidataId/Q1960 -TOMPKINS_COUNTY,geoId/36109 -SANFRANSCISCO,geoId/0667000 -LUSAKA,wikidataId/Q3881 -NORTHERN_GOVERNORATE,wikidataId/Q840445 -CAPITAL_GOVERNORATE,wikidataId/Q528953 -MUHARRAQ_GOVERNORATE,wikidataId/Q375630 -SOUTHERN_GOVERNORATE,wikidataId/Q838532 -FRANCISTOWN,wikidataId/Q165422 -INDAIATUBA_CORREGO, -PORTO_ALEGRE_AND_NOVO_HAMBURGO,wikidataId/Q40269 -BELEM_ANANINDEUA, -CAMPINAS_PAULINIA, -SUINING,wikidataId/Q426644 -YULIN,wikidataId/Q571918 -ARAUCA,wikidataId/Q626543 -VILLAVICENCIO,wikidataId/Q749224 -BABAHOYO,wikidataId/Q797568 -TOULOUSSE,wikidataId/Q7880 -CHOLOMA,wikidataId/Q998218 -ICELAND_ALL,wikidataId/Q1764 -BAREILLY,wikidataId/Q1797378 -PATI,wikidataId/Q10622 -SUBANG, -SAN_LUIS_RIO_COLORADO,wikidataId/Q995380 -SALTILLO,wikidataId/Q53077 -OCEANIA_EXCL_AUSTRALIA_AND_NEWZEALAND_ALL, -AL_KHOR,wikidataId/Q1156471 -HAIL,wikidataId/Q675568 -AT_TAIF,wikidataId/Q182640 -JIDDAH,wikidataId/Q374365 -KHAMIS_ABHA, -AD_DAMMAM,wikidataId/Q160320 -AL_HUFUF,wikidataId/Q27136 -AL_JUBAYL,wikidataId/Q27430 -AL_KHARJ,wikidataId/Q2162128 -ALFASHIR,wikidataId/Q311204 -WINTERHUR,wikidataId/Q9125 -AL_FUJAYRAH,wikidataId/Q4091 -RAS_AL_KHAIMAH, -HOUSTON,geoId/4835000 -CLEVELAND,geoId/3916000 -NAVOI_KARMANA, -BARCELONA_AND_PUERTO_LA_CRUZ,wikidataId/Q379509 diff --git a/scripts/un/sdg/cities_test.py b/scripts/un/sdg/cities_test.py deleted file mode 100644 index aafee5f17e..0000000000 --- a/scripts/un/sdg/cities_test.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -'''Tests for cities.py. - -Usage: python3 -m unittest discover -v -s ../ -p "cities_test.py" -''' -import os -import sys -import tempfile -import unittest -from unittest import mock - -sys.path.append( - os.path.dirname(os.path.dirname(os.path.dirname( - os.path.abspath(__file__))))) -from un.sdg import cities - -module_dir_ = os.path.dirname(__file__) - -CITIES = { - 'Mazār-e Sharīf, Afghanistan': 'AF_MAZAR_E_SHARIF', -} -RESPONSE = { - 'entities': [{ - 'description': 'Mazār-e Sharīf, Afghanistan', - 'dcids': ['wikidataId/Q130469'] - }] -} - - -class CitiesTest(unittest.TestCase): - - def test_write_cities(self): - with tempfile.TemporaryDirectory() as tmp_dir: - cities.get_cities = mock.Mock(return_value=RESPONSE) - output = os.path.join(tmp_dir, 'output.csv') - cities.write_cities(output, CITIES, '') - with open(output) as result: - with open( - os.path.join( - module_dir_, - 'testdata/expected_cities.csv')) as expected: - self.assertEqual(result.read(), expected.read()) - - -if __name__ == '__main__': - unittest.main() diff --git a/scripts/un/sdg/geography.py b/scripts/un/sdg/geography.py new file mode 100644 index 0000000000..e8d6706724 --- /dev/null +++ b/scripts/un/sdg/geography.py @@ -0,0 +1,362 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +'''Generates geographies for UN places. + +Produces: +* un_places.mcf (place definitions) +* un_containment.mcf (place containment triples) +* place_mappings.csv (SDG code -> dcid) + +Usage: python3 geography.py +''' +import collections +import csv +import json +import os + +# Output folder. +FOLDER = 'geography' + +PLACE_TEMPLATE = ''' +Node: dcid:{dcid} +typeOf: dcs:{type} +name: "{name}" +unDataCode: "{code}" +unDataLabel: "{label}" +''' +CONTAINMENT_TEMPLATE = ''' +Node: dcid:{dcid} +typeOf: dcs:{type}{containment} +''' + +# Curated map of dcid to SDG code to avoid duplicates. +FIXED = { + # Africa. + 'africa': '2', + # Source geographies without a corresponding geography in UNdata. + 'undata-geo/G99999999': '952', +} + +# Geography types. +CITY = 'City' +CONTINENT = 'Continent' +COUNTRY = 'Country' +GEO_REGION = 'GeoRegion' +SAMPLING_STATION = 'SamplingStation' +UN_GEO_REGION = 'UNGeoRegion' + +# UN geography prefix. +UN_PREFIX = 'undata-geo' + + +# Simplified representation of DC MCF Node. +class Node: + + def __init__(self, dcid, type, name): + self.dcid = dcid + self.type = type + self.name = name + + def __eq__(self, other): + if not isinstance(other, Node): + return NotImplemented + + return self.dcid == other.dcid and self.type == other.type and self.name == other.name + + def __str__(self): + return self.dcid + self.type + self.name + + def __hash__(self): + return (hash(str(self))) + + def __lt__(self, other): + if not isinstance(other, Node): + return NotImplemented + + return str(self) < str(other) + + +def get_sdg2type(file): + '''Produces map of SDG code -> SDG type. + + Args: + file: Input file path. + + Returns: + Map of SDG code -> SDG type. + ''' + sdg2type = {} + with open(file, encoding='utf-8') as f: + reader = csv.DictReader(f) + for row in reader: + sdg2type[row['GEOGRAPHY_CODE']] = row['GEOGRAPHY_TYPE'] + return sdg2type + + +def get_sdg_un_maps(file): + '''Produces maps of UN code -> SDG code & SDG code -> UN code. + + Args: + file: Input file path. + + Returns: + - Map of UN code -> SDG code. + - Map of SDG code -> UN code. + ''' + un2sdg = {} # Map of UN code -> SDG code. + sdg2un = {} # Map of SDG code -> UN code. + + # Use special encoding to parse UN input file. + with open(file, encoding='utf-8-sig') as f: + reader = csv.DictReader(f) + for row in reader: + subject = row['subject_id'] # UN code. + object = row['object_id'].removeprefix('sdg-geo:') # SDG code. + if not subject or not object: + continue + un2sdg[subject] = object + sdg2un[object] = subject + return un2sdg, sdg2un + + +def get_un2dc_curated(file): + '''Produces map of UN code -> curated Node. + + Args: + file: Input file path. + + Returns: + Map of UN code -> curated Node. + ''' + un2dc_curated = {} + with open(file, encoding='utf-8') as f: + reader = csv.DictReader(f) + for row in reader: + + # Skip unmapped places. + if row['unDataCode'] == 'x': + continue + + # Add missing type for NorthernEurope. + if row['dcid'] == 'NorthernEurope': + type = UN_GEO_REGION + + else: + type = json.loads(row['typeOf'].replace("'", '"'))[0]['dcid'] + un2dc_curated[row['unDataCode']] = Node(row['dcid'], type, + row['dc_name']) + return un2dc_curated + + +def should_include_containment(s, o): + '''Returns whether triple should be included in containment. + + Args: + s: Subject node. + o: Object node. + + Returns: + Whether triple should be included in containment. + ''' + if (s.type == GEO_REGION or s.type == UN_GEO_REGION) and o.dcid == 'Earth': + return True + elif (s.type == GEO_REGION or + s.type == UN_GEO_REGION) and o.type == CONTINENT: + return True + elif (s.type == GEO_REGION or + s.type == UN_GEO_REGION) and (o.type == GEO_REGION or + o.type == UN_GEO_REGION): + return True + elif s.type == COUNTRY and (o.type == GEO_REGION or + o.type == UN_GEO_REGION): + return True + elif s.type == SAMPLING_STATION and o.type == COUNTRY: + return True + elif s.type == CITY and s.dcid.startswith(UN_PREFIX) and o.type == COUNTRY: + return True + return False + + +def write_un_places(input_geos, output, sdg2type, un2sdg, un2dc_curated): + '''Writes UN places to output and computes new places. + + Args: + input_geos: Path to input UN geography file. + output: Path to output file. + sdg2type: Map of SDG code -> SDG type. + un2sdg: Map of UN code -> SDG code. + un2dc_curated: Map of UN code -> curated Node. + + Returns: + - Map of UN code -> generated Node. + - List of (dcid, type) for new places. + ''' + un2dc_generated = {} + new_subjects = [] + with open(input_geos, encoding='utf-8') as f_in: + with open(output, 'w', encoding='utf-8') as f_out: + reader = csv.DictReader(f_in) + for row in reader: + subject = row['subject_id'] + if subject in un2dc_curated: + dcid = un2dc_curated[subject].dcid + type = un2dc_curated[subject].type + name = un2dc_curated[subject].name + else: + dcid = row['subject_id'].replace(':', '/') + if row['subject_id'] in un2sdg and un2sdg[ + row['subject_id']] in sdg2type: + sdg_type = sdg2type[un2sdg[row['subject_id']]] + if sdg_type == SAMPLING_STATION or sdg_type == CITY: + type = sdg_type + else: + type = GEO_REGION + else: + type = GEO_REGION + name = row['subject_label'].split('_')[-1] + un2dc_generated[subject] = Node(dcid, type, name) + + # Add non-UN-specific places to new_subjects. + if type == GEO_REGION or type == UN_GEO_REGION or type == SAMPLING_STATION or ( + type == CITY and dcid.startswith(UN_PREFIX)): + new_subjects.append(Node(dcid, type, name)) + + f_out.write( + PLACE_TEMPLATE.format_map({ + 'dcid': dcid, + 'type': type, + 'name': name, + 'code': row['subject_id'], + 'label': row['subject_label'] + })) + return un2dc_generated, new_subjects + + +def process_containment(input_containment, un2dc_curated, un2dc_generated): + '''Filters UN geography containment triples. + + Args: + input_containment: Path to input containment file. + un2dc_curated: Map of UN code -> curated Node. + un2dc_generated: Map of UN code -> generated Node. + + Returns: + - Map of child Node -> list of containing object dcids. + ''' + containment = collections.defaultdict(list) + + # Use special encoding to parse UN input file. + with open(input_containment, encoding='utf-8-sig') as f: + reader = csv.DictReader(f) + for row in reader: + subject = UN_PREFIX + ':' + row['subject_id'] + if subject in un2dc_curated: + s = un2dc_curated[subject] + elif subject in un2dc_generated: + s = un2dc_generated[subject] + else: + print('Missing subject: ', subject) + object = UN_PREFIX + ':' + row['object_id'] + if object in un2dc_curated: + o = un2dc_curated[object] + elif object in un2dc_generated: + o = un2dc_generated[object] + else: + print('Missing object: ', object) + if should_include_containment(s, o): + containment[s].append(o.dcid) + return containment + + +def write_un_containment(output, containment, new_subjects): + '''Writes containment triples to output. + + Args: + output: Path to output file. + containment: Map of child Node -> list of containing object dcids. + new_subjects: List of Nodes for new places. + + ''' + with open(output, 'w', encoding='utf-8') as f: + for s in sorted(containment): + c = '' + for o in containment[s]: + c += '\ncontainedInPlace: dcid:' + o + f.write( + CONTAINMENT_TEMPLATE.format_map({ + 'dcid': s.dcid, + 'type': s.type, + 'containment': c + })) + + # For new places with no specified containment, add containment in + # Earth. + for s in sorted(new_subjects): + if s in containment: + continue + c = '\ncontainedInPlace: dcid:Earth' + f.write( + CONTAINMENT_TEMPLATE.format_map({ + 'dcid': s.dcid, + 'type': s.type, + 'containment': c + })) + + +def write_place_mappings(output, sdg2un, un2dc_curated, un2dc_generated): + '''Writes SDG code -> dcid mappings to output. + + Args: + output: Path to output file. + sdg2un: Map of SDG code -> UN code. + un2dc_curated: Map of UN code -> curated Node. + un2dc_generated: Map of UN code -> generated Node. + ''' + with open(output, 'w', encoding='utf-8') as f: + writer = csv.DictWriter(f, fieldnames=['sdg', 'dcid']) + writer.writeheader() + for code in sorted(sdg2un): + un = sdg2un[code] + if un in un2dc_curated: + dcid = un2dc_curated[un].dcid + elif un in un2dc_generated: + dcid = un2dc_generated[un].dcid + else: + continue + + # Filter duplicates. + if dcid in FIXED and code != FIXED[dcid]: + continue + + writer.writerow({'sdg': code, 'dcid': dcid}) + + +if __name__ == '__main__': + + # Read input geography mappings. + sdg2type = get_sdg2type('sdg-dataset/output/SDG_geographies.csv') + un2sdg, sdg2un = get_sdg_un_maps( + 'sssom-mappings/output_mappings/undata-geo__sdg-geo.csv') + un2dc_curated = get_un2dc_curated(os.path.join(FOLDER, 'places.csv')) + + un2dc_generated, new_subjects = write_un_places( + os.path.join(FOLDER, 'geographies.csv'), + os.path.join(FOLDER, 'un_places.mcf'), sdg2type, un2sdg, un2dc_curated) + containment = process_containment( + 'sssom-mappings/data/enumerations/undata/geography_hierarchy.csv', + un2dc_curated, un2dc_generated) + write_un_containment(os.path.join(FOLDER, 'un_containment.mcf'), + containment, new_subjects) + write_place_mappings(os.path.join(FOLDER, 'place_mappings.csv'), sdg2un, + un2dc_curated, un2dc_generated) diff --git a/scripts/un/sdg/geography/geographies.csv b/scripts/un/sdg/geography/geographies.csv new file mode 100644 index 0000000000..043f711775 --- /dev/null +++ b/scripts/un/sdg/geography/geographies.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c07516b4b9647c700124b0b6056d832a267ff9e3539dcb95633462fcc119f9f2 +size 340006 diff --git a/scripts/un/sdg/geography/place_mappings.csv b/scripts/un/sdg/geography/place_mappings.csv new file mode 100644 index 0000000000..25fb9a8b1a --- /dev/null +++ b/scripts/un/sdg/geography/place_mappings.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:148d9b119025d471f8bb20b8e1f42ebe4a5de5ed000e8e21e8662b3d70eec241 +size 251752 diff --git a/scripts/un/sdg/geography/places.csv b/scripts/un/sdg/geography/places.csv new file mode 100644 index 0000000000..1f5fa06dab --- /dev/null +++ b/scripts/un/sdg/geography/places.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6ac35a09404d989e58ff9e48c7e56e59b035aca412587e1e60dc9f9f99f8eaa +size 1036036 diff --git a/scripts/un/sdg/geography/un_containment.mcf b/scripts/un/sdg/geography/un_containment.mcf new file mode 100644 index 0000000000..a807fad634 --- /dev/null +++ b/scripts/un/sdg/geography/un_containment.mcf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e56924da8334c67fafc9ee56b8c750d20bd753e47ec01669e7686d278305455 +size 367508 diff --git a/scripts/un/sdg/geography/un_places.mcf b/scripts/un/sdg/geography/un_places.mcf new file mode 100644 index 0000000000..97c436963d --- /dev/null +++ b/scripts/un/sdg/geography/un_places.mcf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d24496e7cd2bf4c1472cd091a1f59b2e7d3e5c2bb6bb8f648ed2804dda193f21 +size 1181069 diff --git a/scripts/un/sdg/geography_test.py b/scripts/un/sdg/geography_test.py new file mode 100644 index 0000000000..284cde5274 --- /dev/null +++ b/scripts/un/sdg/geography_test.py @@ -0,0 +1,137 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +'''Tests for geography.py. + +Usage: python3 -m unittest discover -v -s ../ -p "geography_test.py" +''' +import os +import sys +import tempfile +import unittest + +sys.path.append( + os.path.dirname(os.path.dirname(os.path.dirname( + os.path.abspath(__file__))))) +from un.sdg import geography + +module_dir_ = os.path.dirname(__file__) + +FOLDER = os.path.join(module_dir_, 'testdata/test_geography') + +# Mock input data. +SDG2TYPE = {'4': 'Country'} +UN2SDG = { + 'undata-geo:G00000020': '4', +} +SDG2UN = {'4': 'undata-geo:G00000020'} +UN2DC_CURATED = { + 'undata-geo:G00000020': + geography.Node('country/AFG', 'Country', 'Afghanistan'), + 'undata-geo:G00003250': + geography.Node('country/ARE', 'Country', 'United Arab Emirates'), + 'undata-geo:G00100000': + geography.Node('Earth', 'Place', 'World'), + 'undata-geo:G00114000': + geography.Node('asia', 'Continent', 'Asia'), + 'undata-geo:G00119000': + geography.Node('SouthernAsia', 'UNGeoRegion', 'Southern Asia'), + 'undata-geo:G00120000': + geography.Node('WesternAsia', 'UNGeoRegion', 'Western Asia'), + 'undata-geo:G00403000': + geography.Node('undata-geo/G00403000', 'GeoRegion', + 'Landlocked developing countries (LLDCs)'), + 'undata-geo:G00404000': + geography.Node('undata-geo/G00404000', 'GeoRegion', + 'Least developed countries (LDCs)'), +} +UN2DC_GENERATED = { + 'undata-geo:G00000030': + geography.Node('undata-geo/G00000030', 'GeoRegion', 'Ajman') +} + +# Add additional referenced objects that aren't defined in test_geographies.csv. +UN2DC_GENERATED_FULL = { + **UN2DC_GENERATED, + **{ + 'undata-geo:G00403300': + geography.Node('undata-geo/G00403300', 'GeoRegion', 'Landlocked developing countries (LLDCs): Asia'), + 'undata-geo:G00404300': + geography.Node('undata-geo/G00404300', 'GeoRegion', 'Least developed countries (LDCs): Asia'), + } +} + +# Test intermediate output data. +NEW_SUBJECTS = [geography.Node('undata-geo/G00000030', 'GeoRegion', 'Ajman')] +CONTAINMENT = { + geography.Node('country/AFG', 'Country', 'Afghanistan'): [ + 'SouthernAsia', 'undata-geo/G00403000', 'undata-geo/G00403300', + 'undata-geo/G00404000', 'undata-geo/G00404300' + ], + geography.Node('undata-geo/G00000030', 'GeoRegion', 'Ajman'): [ + 'Earth', 'asia', 'WesternAsia' + ], +} + + +class GeographyTest(unittest.TestCase): + + def test_should_include_containment(self): + self.assertTrue( + geography.should_include_containment( + geography.Node('AustrailiaAndNewZealand', 'UNGeoRegion', + 'Australia and New Zealand'), + geography.Node('oceania', 'Continent', 'Oceania'))) + self.assertFalse( + geography.should_include_containment( + geography.Node('geoId/3502000', 'City', 'Albuquerque'), + geography.Node('country/USA', 'Country', + 'United States of America'))) + + def test_write_un_places(self): + with tempfile.TemporaryDirectory() as tmp_dir: + output = os.path.join(tmp_dir, 'un_places.mcf') + un2dc_generated, new_subjects = geography.write_un_places( + os.path.join(FOLDER, 'test_geographies.csv'), output, SDG2TYPE, + UN2SDG, UN2DC_CURATED) + with open(output) as result: + with open(os.path.join(FOLDER, + 'expected_un_places.mcf')) as expected: + self.assertEqual(result.read(), expected.read()) + self.assertEqual(un2dc_generated, UN2DC_GENERATED) + self.assertEqual(new_subjects, NEW_SUBJECTS) + + def test_process_containment(self): + containment = geography.process_containment( + os.path.join(FOLDER, 'test_geography_hierarchy.csv'), UN2DC_CURATED, + UN2DC_GENERATED_FULL) + self.assertEqual(containment, CONTAINMENT) + + def test_write_un_containment(self): + with tempfile.TemporaryDirectory() as tmp_dir: + output = os.path.join(tmp_dir, 'un_containment.mcf') + geography.write_un_containment(output, CONTAINMENT, NEW_SUBJECTS) + with open(output) as result: + with open(os.path.join( + FOLDER, 'expected_un_containment.mcf')) as expected: + self.assertEqual(result.read(), expected.read()) + + def test_write_place_mappings(self): + with tempfile.TemporaryDirectory() as tmp_dir: + output = os.path.join(tmp_dir, 'place_mappings.csv') + geography.write_place_mappings(os.path.join(FOLDER, output), SDG2UN, + UN2DC_CURATED, UN2DC_GENERATED_FULL) + with open(output) as result: + with open(os.path.join( + FOLDER, 'expected_place_mappings.csv')) as expected: + self.assertEqual(result.read(), expected.read()) diff --git a/scripts/un/sdg/process.py b/scripts/un/sdg/process.py index 676936392c..e92b59e068 100644 --- a/scripts/un/sdg/process.py +++ b/scripts/un/sdg/process.py @@ -27,54 +27,51 @@ Usage: python3 process.py ''' import collections +import csv +import math import os import pandas as pd import shutil import sys +from string import punctuation + sys.path.append( os.path.dirname(os.path.dirname(os.path.dirname( os.path.abspath(__file__))))) from un.sdg import util -def get_geography(code, type): - '''Returns dcid of geography. +def get_place_mappings(file): + '''Produces map of SDG code -> dcid: Args: - code: Geography code. - type: Geography type. + file: Input file path. Returns: - Geography dcid. + Map of SDG code -> dcid: ''' - - # Currently only support Country, City, and select Regions . - if code in util.REGIONS: - return 'dcs:' + util.REGIONS[code] - elif type == 'Country' and code in util.PLACES: - return 'dcs:country/' + util.PLACES[code] - elif type == 'City': - # Remove country prefix for now. - city = '_'.join(code.split('_')[1:]) - if city in util.CITIES and util.CITIES[city]: - return 'dcs:' + util.CITIES[city] - return '' + place_mappings = {} + with open(file) as f: + reader = csv.DictReader(f) + for row in reader: + place_mappings[str(row['sdg'])] = str(row['dcid']) + return place_mappings -def get_unit(units, base_period): - '''Returns dcid of unit. +def get_geography(code, place_mappings): + '''Returns dcid of geography. Args: - unit: Unit. - base_period: Base period of unit. + code: Geography code. + place_mappings: Map of SDG code -> dcid. Returns: - Unit dcid. + Geography dcid. ''' - if util.is_valid(base_period): - return f'[{units} {base_period}]' - return 'dcs:SDG_' + units + if str(code) in place_mappings: + return 'dcid:' + place_mappings[str(code)] + return '' def get_measurement_method(row): @@ -96,7 +93,60 @@ def get_measurement_method(row): return 'SDG' + mmethod -def process(input_dir, schema_dir, csv_dir): +def drop_null(value, series, footnote): + '''Returns value or '' if it should be dropped for being null. + + Args: + value: Input value. + series: Series code. + footnote: Footnote for observation. + + Returns: + value or ''. + ''' + if series not in util.ZERO_NULL: + return value + if footnote != util.ZERO_NULL_TEXT: + return value + if math.isclose(float(value), 0): + return '' + return value + + +def drop_special(value, variable): + '''Returns value or '' if it should be dropped based on special curation. + + Args: + value: Input value. + variable: Input variable. + + Returns: + value or ''. + ''' + if variable in util.DROP_VARIABLE: + return '' + series = variable.split(util.SDG_CODE_SEPARATOR)[0] + if series in util.DROP_SERIES: + return '' + return value + + +def fix_encoding(s): + '''Fixes input encoding to decode special characters. + + Args: + s: Input string. + + Returns: + String with special characters decoded. + ''' + try: + return s.encode('latin1').decode('utf8') + except: + return s.encode('utf8').decode('utf8') + + +def process(input_dir, schema_dir, csv_dir, place_mappings): '''Generates mcf, csv/tmcf artifacts. Produces: @@ -114,6 +164,7 @@ def process(input_dir, schema_dir, csv_dir): input_dir: Path to input xlsx files. schema_dir: Path to output schema files. csv_dir: Path to output csv files. + place_mappings: Map of SDG code -> dcid. ''' with open(os.path.join(schema_dir, 'series.mcf'), 'w') as f_series: with open(os.path.join(schema_dir, 'sdg.textproto'), 'w') as f_vertical: @@ -147,7 +198,8 @@ def process(input_dir, schema_dir, csv_dir): for _, row in df.iterrows(): if str(row['Enumeration_Code_SDMX']) != 'CUST_BREAKDOWN' and str( - row['Enumeration_Code_SDMX']) != 'COMPOSITE_BREAKDOWN': + row['Enumeration_Code_SDMX']) != 'COMPOSITE_BREAKDOWN' and str( + row['Enumeration_Code_SDMX']) != 'UNIT_MEASURE': dimensions[str(row['Enumeration_Code_SDMX'])][str( row['EnumerationValue_Code_SDMX'])] = str( row['EnumerationValue_Name']) @@ -183,10 +235,26 @@ def process(input_dir, schema_dir, csv_dir): if df.empty: continue + # Drop known null values. + df['OBS_VALUE'] = df.apply(lambda x: drop_null( + x['OBS_VALUE'], x['SERIES_CODE'], x['FOOT_NOTE']), + axis=1) + df = df[df['OBS_VALUE'] != ''] + if df.empty: + continue + + # Drop curated. + df['OBS_VALUE'] = df.apply( + lambda x: drop_special(x['OBS_VALUE'], x['VARIABLE_CODE']), + axis=1) + df = df[df['OBS_VALUE'] != ''] + if df.empty: + continue + # Format places. - df['GEOGRAPHY_CODE'] = df.apply(lambda x: get_geography( - x['GEOGRAPHY_CODE'], x['GEOGRAPHY_TYPE']), - axis=1) + df['GEOGRAPHY_CODE'] = df.apply( + lambda x: get_geography(x['GEOGRAPHY_CODE'], place_mappings), + axis=1) df = df[df['GEOGRAPHY_CODE'] != ''] if df.empty: continue @@ -205,9 +273,9 @@ def process(input_dir, schema_dir, csv_dir): 'SG_SCP_PROCN_LS.LEVEL_STATUS--DEG_MLOW__GOVERNMENT_NAME--CITY_OF_WROCLAW' ) - sv_frames.append(df.loc[:, - ['VARIABLE_CODE', 'VARIABLE_DESCRIPTION'] + - properties].drop_duplicates()) + sv_frames.append( + df.loc[:, ['VARIABLE_CODE', 'VARIABLE_DESCRIPTION', 'SOURCE'] + + properties].drop_duplicates()) measurement_method_frames.append( df.loc[:, ['NATURE', 'OBS_STATUS', 'REPORTING_TYPE']]. drop_duplicates()) @@ -215,8 +283,8 @@ def process(input_dir, schema_dir, csv_dir): df['VARIABLE_CODE'] = df['VARIABLE_CODE'].apply( lambda x: 'dcs:sdg/' + x) - df['UNIT_MEASURE'] = df.apply( - lambda x: get_unit(x['UNIT_MEASURE'], x['BASE_PERIOD']), axis=1) + df['UNIT_MEASURE'] = df['UNIT_MEASURE'].apply( + lambda x: 'dcs:SDG_' + x) df['MEASUREMENT_METHOD'] = df.apply( lambda x: 'dcs:' + get_measurement_method(x), axis=1) @@ -232,9 +300,10 @@ def process(input_dir, schema_dir, csv_dir): with open(os.path.join(schema_dir, 'sv.mcf'), 'w') as f: for df in sv_frames: - for _, row in df.iterrows(): + main = df.drop(['SOURCE'], axis=1).drop_duplicates() + for _, row in main.iterrows(): cprops = '' - for dimension in sorted(df.columns[2:]): + for dimension in sorted(main.columns[2:]): # Skip totals. if row[dimension] == util.TOTAL: continue @@ -255,6 +324,22 @@ def process(input_dir, schema_dir, csv_dir): val = 'SDG_' + enum + 'Enum_' + val cprops += f'\n{prop}: dcs:{val}' + + # Add list of observation sources to 'footnote' property on SV. + sources = df.loc[df['VARIABLE_CODE'] == row['VARIABLE_CODE']] + sources = sources.dropna(subset=['SOURCE']) + sources = sources.loc[:, ['SOURCE']].drop_duplicates()['SOURCE'] + footnote = '' + if not sources.empty: + footnote = '\nfootnote: "Includes data from the following sources: ' + '; '.join( + sorted([ + fix_encoding( + str(s)).rstrip('.,;:!?').strip().replace( + '"', "'").replace('\n', '').replace( + '\t', '').replace('__', '_') + for s in sources + ])) + '"' + f.write( util.SV_TEMPLATE.format_map({ 'dcid': @@ -266,6 +351,8 @@ def process(input_dir, schema_dir, csv_dir): '"' + row['VARIABLE_DESCRIPTION'] + '"', 'cprops': cprops, + 'footnote': + footnote, })) with open(os.path.join(schema_dir, 'schema.mcf'), 'w') as f: @@ -337,4 +424,5 @@ def process(input_dir, schema_dir, csv_dir): if os.path.exists('csv'): shutil.rmtree('csv') os.makedirs('csv') - process('sdg-dataset/output', 'schema', 'csv') + place_mappings = get_place_mappings('geography/place_mappings.csv') + process('sdg-dataset/output', 'schema', 'csv', place_mappings) diff --git a/scripts/un/sdg/process_test.py b/scripts/un/sdg/process_test.py index e8117b5c93..9efe38ed03 100644 --- a/scripts/un/sdg/process_test.py +++ b/scripts/un/sdg/process_test.py @@ -28,6 +28,21 @@ module_dir_ = os.path.dirname(__file__) +PLACE_MAPPINGS = { + '1': 'Earth', + '2': 'africa', + '4': 'country/AFG', + '5': 'southamerica', + '8': 'country/ALB', + '9': 'oceania', + '11': 'WesternAfrica', + '12': 'country/DZA', + '13': 'CentralAmerica', + '14': 'EasternAfrica', + '840': 'country/USA', + 'AF_MAZAR_E_SHARIF': 'wikidataId/Q130469' +} + def assert_equal_dir(self, result_dir, expected_dir): for root, _, files in os.walk(result_dir): @@ -40,16 +55,12 @@ def assert_equal_dir(self, result_dir, expected_dir): class ProcessTest(unittest.TestCase): def test_get_geography(self): - self.assertEqual(process.get_geography(840, 'Country'), - 'dcs:country/USA') - self.assertEqual(process.get_geography('AF_MAZAR_E_SHARIF', 'City'), - 'dcs:wikidataId/Q130469') - self.assertEqual(process.get_geography(1, 'Region'), 'dcs:Earth') - - def test_get_unit(self): - self.assertEqual(process.get_unit('CON_USD', 2021), '[CON_USD 2021]') - self.assertEqual(process.get_unit('CON_USD', float('nan')), - 'dcs:SDG_CON_USD') + self.assertEqual(process.get_geography(840, PLACE_MAPPINGS), + 'dcid:country/USA') + self.assertEqual( + process.get_geography('AF_MAZAR_E_SHARIF', PLACE_MAPPINGS), + 'dcid:wikidataId/Q130469') + self.assertEqual(process.get_geography(1, PLACE_MAPPINGS), 'dcid:Earth') def test_get_measurement_method(self): d = {'NATURE': ['E'], 'OBS_STATUS': ['A'], 'REPORTING_TYPE': ['G']} @@ -57,12 +68,30 @@ def test_get_measurement_method(self): for _, row in df.iterrows(): self.assertEqual(process.get_measurement_method(row), 'SDG_E_A_G') + def test_drop_null(self): + self.assertEqual( + process.drop_null( + 0, 'SE_ACS_CMPTR', + 'This data point is NIL for the submitting nation.'), '') + self.assertEqual(process.drop_null(1, 'SE_ACS_CMPTR', ''), 1) + + def test_drop_special(self): + self.assertEqual(process.drop_special(0, 'SH_SAN_SAFE@URBANISATION--R'), + '') + self.assertEqual( + process.drop_special(0, 'AG_FOOD_WST@FOOD_WASTE_SECTOR--FWS_OOHC'), + 0) + + def test_fix_encoding(self): + source = 'Instituto Nacional das Comunicaçőes de Moçambique' + self.assertEqual(process.fix_encoding(source), source) + def test_process(self): with tempfile.TemporaryDirectory() as tmp_schema: with tempfile.TemporaryDirectory() as tmp_csv: process.process( os.path.join(module_dir_, 'testdata/test_input'), - tmp_schema, tmp_csv) + tmp_schema, tmp_csv, PLACE_MAPPINGS) assert_equal_dir( self, tmp_schema, os.path.join(module_dir_, 'testdata/test_schema')) diff --git a/scripts/un/sdg/sdg-dataset b/scripts/un/sdg/sdg-dataset index 410719252c..bdb619c04a 160000 --- a/scripts/un/sdg/sdg-dataset +++ b/scripts/un/sdg/sdg-dataset @@ -1 +1 @@ -Subproject commit 410719252cbd6e75f91834d5788d4e19c210abbe +Subproject commit bdb619c04a0d8d6121190477b7fbd791562f99ac diff --git a/scripts/un/sdg/cities.py b/scripts/un/sdg/sdmx/cities.py similarity index 54% rename from scripts/un/sdg/cities.py rename to scripts/un/sdg/sdmx/cities.py index d9a7b13ad5..3c5e0c9f1a 100644 --- a/scripts/un/sdg/cities.py +++ b/scripts/un/sdg/sdmx/cities.py @@ -11,30 +11,34 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -'''Finds dcids for cities. +'''This script does not use the most up-to-date schema format. +It should only be used as an illustration of the SDMX -> MCF mapping. +Do not actually run! + +Finds dcids for cities in input files. Produces: -* cities.csv: dcid for each city code +* preprocessed/cities.csv: dcid for each city name -There are a few city codes that are still missing. -These can be manually filled in and verified. +Note: For cities where the find entities API did not return a dcid, +we tried manually searching for the dcid and filled these into the file. +There are a few city names that are still missing - these are left blank. **This script ideally shouldn't need to be run again.** - -Usage: python3 cities.py +Usage: python3 cities.py ''' import csv import requests -import pandas as pd +import os import sys +BATCH = 1 + def get_cities(json, api_key): '''Applies find entities API for given json. - Args: json: Input json. api_key: API key. - Returns: API response. ''' @@ -46,19 +50,24 @@ def get_cities(json, api_key): def write_cities(file, cities, api_key): - '''Writes city codes and names to file. - + '''Writes city dcids and names to file. Args: file: Output file path. - cities: Map of city names to codes. + cities: List of city dcids to process. api_key: API key. ''' with open(file, 'w') as f: writer = csv.DictWriter(f, fieldnames=['name', 'dcid']) writer.writeheader() - for city in list(cities.keys()): - json = {'entities': [{'description': city}]} + city_list = list(cities.keys()) + for i in range(0, len(city_list), BATCH): + json = { + 'entities': [{ + 'description': city + } for city in city_list[i:i + BATCH]] + } response = get_cities(json, api_key) + print(response) try: for entity in response['entities']: dcid = entity['dcids'][0] if 'dcids' in entity else '' @@ -67,13 +76,19 @@ def write_cities(file, cities, api_key): 'dcid': dcid }) except KeyError: - writer.writerow({'name': cities[city], 'dcid': ''}) + writer.writerow({'name': cities[city_list[i]], 'dcid': ''}) if __name__ == '__main__': - df = pd.read_excel(f'sdg-dataset/output/SDG_cities_enumeration.xlsx') - cities = {} - for _, row in df.iterrows(): - cities[row['CITY_NAME'] + ', ' + row['GEO_AREA_NAME'].replace( - '_', ' ').title()] = row['CITY_CODE'] - write_cities('cities_test.csv', cities, sys.argv[1]) + cities = set() + for file in sorted(os.listdir('input')): + code = file.removesuffix('.csv') + with open('input/' + file) as f: + reader = csv.DictReader(f) + if '[Cities]' in reader.fieldnames: + for row in reader: + cities.add(row['[Cities]'].replace('_', ' ').title() + + ', ' + row['GeoAreaName']) + cities = sorted(cities) + + write_cities('preprocessed/cities2.csv', cities, sys.argv[1]) diff --git a/scripts/un/sdg/m49.csv b/scripts/un/sdg/sdmx/m49.tsv similarity index 100% rename from scripts/un/sdg/m49.csv rename to scripts/un/sdg/sdmx/m49.tsv diff --git a/scripts/un/sdg/sdmx/preprocess.py b/scripts/un/sdg/sdmx/preprocess.py new file mode 100644 index 0000000000..ff4f3067bf --- /dev/null +++ b/scripts/un/sdg/sdmx/preprocess.py @@ -0,0 +1,99 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +'''This script does not use the most up-to-date schema format. +It should only be used as an illustration of the SDMX -> MCF mapping. +Do not actually run! + +Downloads data from UN Stats API to be used in further processing. + +Produces: +* input/ directory containing csv files for each series +* preprocessed/attributes.csv: metadata about attributes +* preprocessed/dimensions.csv: metadata about dimensions +* output/series.mcf: MCF for each series +Note: Downloading all the data is very slow and prone to crashes. +This script ideally shouldn't need to be run again. +Usage: python3 preprocess.py +''' +import csv +import os +import requests + +from util import * + +API_PREFIX = 'https://unstats.un.org/SDGAPI/v1/sdg/Series/' +HEADERS = { + 'Content-Type': 'application/x-www-form-urlencoded', + 'Accept': 'application/octet-stream' +} + + +def add_concepts(code, concept, concept_set): + '''Adds concepts from given series code to concept_set. + Args: + code: Series code. + concept: Type of concept ('Attributes' | 'Dimensions'). + concept_set: Current set of concepts. + ''' + response = requests.get(f'{API_PREFIX}{code}/{concept}').json() + for entry in response: + for c in entry['codes']: + concept_set.add( + (entry['id'], c['code'], c['description'], c['sdmx'])) + + +def write_concepts(file, concept_set): + '''Writes concepts from concept_set to file. + Args: + path: File path to write to. + concept_set: Current set of concepts. + ''' + with open(file, 'w') as f: + writer = csv.writer(f) + for row in sorted(concept_set): + writer.writerow(list(row)) + + +if __name__ == '__main__': + if not os.path.exists('input'): + os.makedirs('input') + if not os.path.exists('preprocessed'): + os.makedirs('preprocessed') + if not os.path.exists('output'): + os.makedirs('output') + + series = requests.get(f'{API_PREFIX}List?allreleases=false').json() + codes = {s['code']: s['description'] for s in series} + + attributes = set() + dimensions = set() + with open('output/series.mcf', 'w') as f_series: + for code in sorted(codes): + print(code) + data = {'seriesCodes': code} + text = requests.post(f'{API_PREFIX}DataCSV', + data=data, + headers=HEADERS).text.rstrip('\x00') + with open(f'input/{code}.csv', 'w') as f_code: + f_code.write(text) + add_concepts(code, 'Attributes', attributes) + add_concepts(code, 'Dimensions', dimensions) + f_series.write( + SERIES_TEMPLATE.format_map({ + 'dcid': 'SDG_' + code, + 'description': format_description(codes[code]) + })) + + write_concepts('preprocessed/attributes.csv', attributes) + write_concepts('preprocessed/dimensions.csv', dimensions) diff --git a/scripts/un/sdg/sdmx/process.py b/scripts/un/sdg/sdmx/process.py new file mode 100644 index 0000000000..5004ccc44a --- /dev/null +++ b/scripts/un/sdg/sdmx/process.py @@ -0,0 +1,298 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +'''This script does not use the most up-to-date schema format. +It should only be used as an illustration of the SDMX -> MCF mapping. +Do not actually run! + +Produces CSV/TMCF + schema for UN Stats data. + +Produces: +* output/output.csv: cleaned CSV +* output/measurement_method.csv: measurement methods +* output/schema.mcf: properties and classes +* output/sv.mcf: statistical variables +* output/unit.mcf: units +Usage: python3 preprocess.py +''' +import collections +import csv +import os +import sys + +from util import * + +sys.path.append( + os.path.dirname(os.path.dirname(os.path.dirname( + os.path.abspath(__file__))))) + +module_dir_ = os.path.dirname(__file__) + +# Create map of M49 -> ISO-alpha3 for countries. +with open(os.path.join(module_dir_, 'm49.tsv')) as f: + PLACES = {} + reader = csv.DictReader(f, delimiter='\t') + for row in reader: + if not row['ISO-alpha3 code']: # Only countries for now. + continue + PLACES[int(row['M49 code'])] = row['ISO-alpha3 code'] + +# Create map of name -> dcid for supported cities. +with open(os.path.join(module_dir_, 'preprocessed/cities.csv')) as f: + reader = csv.DictReader(f) + CITIES = {row['name']: row['dcid'] for row in reader} + + +def write_templates(file, templates): + '''Write templates to file. + Args: + file: Input file path. + templates: Template strings. + ''' + with open(file, 'w') as f: + for template in sorted(templates): + f.write(template) + + +def add_concepts(file, concepts): + '''Adds concepts from file. + Args: + file: Input file path. + concepts: Dictionary of concepts: concept -> code -> (name, formatted code). + ''' + with open(file) as f: + reader = csv.reader(f) + for row in reader: + + # Skip totals (as indicated by SDMX). + if row[3] == '_T': + continue + concepts[row[0]][row[1]] = (row[2], make_value(row[1])) + + +def get_observation_about(country_code, country_name, city): + '''Returns dcid for place. + Args: + country_code: M49 for country. + country_name: Name of country. + city: Name of city. + Returns: + Dcid of place if found, else empty string. + ''' + if city: + formatted_city = city.replace('_', ' ').title() + ', ' + country_name + if formatted_city in CITIES and CITIES[formatted_city]: + return 'dcs:' + CITIES[formatted_city] + else: + return '' + if country_code in PLACES: + return 'dcs:country/' + PLACES[country_code] + else: + return '' + + +def get_variable_measured(row, properties, concepts): + '''Returns templated string for variable_measured. + Args: + row: Input csv dict row. + properties: List of properties for row. + concepts: Dictionary of concepts. + Returns: + Templated string. + ''' + value_ids = [] + value_descriptions = [] + cprops = '' + for i in properties: + field = i[1:-1] + if not row[i] or field not in concepts or row[i] not in concepts[field]: + continue + value_ids.append(concepts[field][row[i]][1]) + value_descriptions.append(concepts[field][row[i]][0]) + enum = make_property(field) + if field in MAPPED_CONCEPTS: + prop = MAPPED_CONCEPTS[field] + else: + prop = 'sdg_' + enum[0].lower() + enum[1:] + val = enum + 'Enum_' + value_ids[-1] + cprops += f'\n{prop}: dcs:SDG_{val}' + sv = 'sdg/' + '_'.join([row['SeriesCode']] + value_ids) + pvs = ', '.join(value_descriptions) + description = format_description(row['SeriesDescription']) + if pvs: + description += ': ' + pvs + template = SV_TEMPLATE.format_map({ + 'dcid': sv, + 'popType': 'SDG_' + row['SeriesCode'], + 'name': '"' + description + '"', + 'cprops': cprops + }) + return template + + +def get_measurement_method(row, concepts): + '''Returns templated string for measurement_method. + Args: + row: Input csv dict row. + concepts: Dictionary of concepts. + Returns: + Templated string. + ''' + mmethod = '' + description = [] + for concept in [ + '[Nature]', '[Observation Status]', '[Report Ordinal]', + '[Reporting Type]' + ]: + field = concept[1:-1] + if concept in row: + mmethod += '_' + row[concept] + if field in concepts and row[concept] in concepts[field]: + description.append(concepts[field][row[concept]][0]) + if not mmethod: + return '' + mmethod = 'SDG' + mmethod + description = 'SDG Measurement Method: ' + ', '.join( + description) if description else '' + template = MMETHOD_TEMPLATE.format_map({ + 'dcid': mmethod, + 'description': description + }) + return template + + +def get_unit(row): + '''Returns templated string for unit. + Args: + row: Input csv dict row. + Returns: + Templated string. + ''' + if not '[Units]' in row: + return '' + unit = row['[Units]'].replace('^', '') + template = UNIT_TEMPLATE.format_map({ + 'dcid': unit, + 'name': format_unit_name(unit) + }) + return template + + +def write_schema(file, concepts): + '''Writes schema from concepts to file. + Args: + file: Input file path. + concepts: Dictionary of concepts. + ''' + with open(file, 'w') as f: + for concept in sorted(concepts): + if concept in SKIPPED_CONCEPTS: + continue + prop = make_property(concept) + enum = prop + 'Enum' + if concept not in MAPPED_CONCEPTS: + f.write( + PROPERTY_TEMPLATE.format_map({ + 'dcid': prop[0].lower() + prop[1:], + 'name': concept, + 'enum': enum + })) + f.write(ENUM_TEMPLATE.format_map({'enum': enum})) + for k in sorted(concepts[concept]): + v = concepts[concept][k] + f.write( + VALUE_TEMPLATE.format_map({ + 'dcid': v[1], + 'enum': enum, + 'name': v[0][0].upper() + v[0][1:], + })) + + +def process_input_file(file, writer, concepts, svs, measurement_methods, units): + '''Processes one input file and write csv rows. + Args: + file: Input file path. + writer: Csv DictWriter object. + concepts: Dictionary of concepts. + svs: Set of statistical variables. + measurement_methods: Set of measurement methods. + units: Set of units. + ''' + print(f'Starting {file}') + with open(file) as f_in: + reader = csv.DictReader(f_in) + properties = sorted([ + field for field in reader.fieldnames + if field[0] == '[' and field[1:-1] not in SKIPPED_CONCEPTS + ]) + try: + for row in reader: + if not int(row['GeoAreaCode']) in PLACES: + continue + if not is_float(row['Value']) or row['Value'] == 'NaN' or row[ + 'Value'] == 'Nan': + continue + observation_about = get_observation_about( + int(row['GeoAreaCode']), row['GeoAreaName'], + row['[Cities]'] if '[Cities]' in reader.fieldnames else '') + if not observation_about: + continue + sv = get_variable_measured(row, properties, concepts) + svs.add(sv) + measurement_method = get_measurement_method(row, concepts) + if measurement_method: + measurement_methods.add(measurement_method) + unit = get_unit(row) + if unit: + units.add(unit) + writer.writerow({ + 'variable_measured': + 'dcid:' + get_dcid(sv), + 'observation_about': + observation_about, + 'observation_date': + row['TimePeriod'], + 'value': + row['Value'], + 'measurement_method': + 'dcs:' + get_dcid(measurement_method) + if measurement_method else '', + 'unit': + 'dcs:' + get_dcid(unit) if unit else '', + 'scaling_factor': + row['[UnitMultiplier]'] + if '[UnitMultiplier]' in reader.fieldnames else '', + }) + except: + print(f'Finished processing {file}') + + +if __name__ == '__main__': + concepts = collections.defaultdict(dict) + add_concepts('preprocessed/attributes.csv', concepts) + add_concepts('preprocessed/dimensions.csv', concepts) + write_schema('output/schema.mcf', concepts) + + svs = set() + measurement_methods = set() + units = set() + with open('output/output.csv', 'w') as f: + writer = csv.DictWriter(f, fieldnames=FIELDNAMES) + writer.writeheader() + for file in sorted(os.listdir('input')): + process_input_file(os.path.join('input', file), writer, concepts, + svs, measurement_methods, units) + + write_templates('output/measurement_method.mcf', measurement_methods) + write_templates('output/sv.mcf', svs) + write_templates('output/unit.mcf', units) diff --git a/scripts/un/sdg/sdmx/util.py b/scripts/un/sdg/sdmx/util.py new file mode 100644 index 0000000000..061dc14974 --- /dev/null +++ b/scripts/un/sdg/sdmx/util.py @@ -0,0 +1,197 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +'''This script does not use the most up-to-date schema format. +It should only be used as an illustration of the SDMX -> MCF mapping. +Do not actually run! + +Shared util functions and constants. +''' +import re + +FIELDNAMES = [ + 'variable_measured', 'observation_about', 'observation_date', 'value', + 'measurement_method', 'unit', 'scaling_factor' +] + +DCID_PREFIX = 'Node: dcid:' +TOTAL = '_T' + +SERIES_TEMPLATE = ''' +Node: dcid:{dcid} +name: "{description}" +typeOf: dcs:SDG_Series +''' +PROPERTY_TEMPLATE = ''' +Node: dcid:sdg_{dcid} +typeOf: schema:Property +domainIncludes: dcs:Thing +rangeIncludes: dcs:SDG_{enum} +name: "{name}" +isProvisional: dcs:True +''' +ENUM_TEMPLATE = ''' +Node: dcid:SDG_{enum} +typeOf: schema:Class +subClassOf: schema:Enumeration +name: "{enum}" +isProvisional: dcs:True +''' +VALUE_TEMPLATE = ''' +Node: dcid:SDG_{enum}_{dcid} +typeOf: dcs:SDG_{enum} +name: "{name}" +isProvisional: dcs:True +''' +SV_TEMPLATE = ''' +Node: dcid:{dcid} +typeOf: dcs:StatisticalVariable +measuredProperty: dcs:value +name: {name} +populationType: dcs:{popType} +statType: dcs:measuredValue{cprops} +''' +MMETHOD_TEMPLATE = ''' +Node: dcid:{dcid} +typeOf: dcs:SDG_MeasurementMethodEnum +name: "{dcid}" +description: "{description}" +''' +UNIT_TEMPLATE = ''' +Node: dcid:{dcid} +typeOf: dcs:UnitOfMeasure +name: "{name}" +description: "SDG Unit: {dcid}" +''' + +# Select concepts will be modeled differently. +SKIPPED_CONCEPTS = { + 'Cities', 'Freq', 'Nature', 'Observation Status', 'Report Ordinal', + 'Reporting Type', 'UnitMultiplier', 'Units' +} + +# Use existing properties when they exist. +# TODO: Also map enums to existing nodes. +MAPPED_CONCEPTS = { + 'Age': 'age', + 'Cause of death': 'causeOfDeath', + 'Disability status': 'disabilityStatus', + 'Education level': 'educationalAttainment', + 'Sex': 'gender', + 'AGE': 'age', + 'CAUSE_OF_DEATH': 'causeOfDeath', + 'DISABILITY_STATUS': 'disabilityStatus', + 'EDUCATION_LEVEL': 'educationalAttainment', + 'SEX': 'gender' +} + +FORMATTED_UNITS = { + 'INDEX': 'idx', + 'NUM_M': '#m', + 'NUMBER': '#', + 'PERCENT': '%', + 'PH': 'pH', + 'TONNES': 't', + 'TONNES_M': 'Metric Tonnes' +} + + +def format_description(s): + '''Formats input with curated style. + Args: + s: Input string. + Returns: + Curated string. + ''' + # Remove <=2 levels of (). + formatted = re.sub('\((?:[^)(]|\([^)(]*\))*\)', '', s) + # Remove <=2 levels of []. + formatted = re.sub('\[(?:[^)(]|\[[^)(]*\])*\]', '', formatted) + # Remove attributes indicated with 'by'. + formatted = formatted.split(', by')[0] + # Remove references indicated by 'million USD'. + formatted = formatted.split(', million USD')[0] + # Remove extra spaces + formatted = formatted.replace(' , ', ', ').replace(' ', ' ').strip() + # Remove trailing commas + if formatted[-1] == ',': + formatted = formatted[:-1] + # Replace 100,000 with 100K + formatted = formatted.replace('100,000', '100K') + # Make ascii + return formatted.replace('Â', + '').replace('’', '\'').replace('₂', '2').replace( + '\xa0', ' ').replace('−', '-') + + +def is_float(element): + '''Checks if value can be interpreted as float. + Args: + element: Input. + Returns: + Whether the value can be cast as a float. + ''' + if element is None: + return False + try: + float(element) + return True + except ValueError: + return False + + +def make_property(s): + '''Formats property string. + Args: + s: Input string. + Returns: + Formatted string. + ''' + return s.title().replace(' ', '').replace('-', + '').replace('_', + '').replace('/', '') + + +def make_value(s): + '''Formats value string. + Args: + s: Input string. + Returns: + Formatted string. + ''' + return s.replace('<=', 'LEQ').replace('<', + 'LT').replace('+', 'GEQ').replace( + ' ', '').replace('_', '') + + +def format_unit_name(dcid): + '''Formats unit name stirng. + Args: + dcid: Input dcid. + Retuns: + Formatted string. + ''' + if dcid in FORMATTED_UNITS: + return FORMATTED_UNITS[dcid] + return dcid.lower().replace('_', ' ').replace('1000000', '1M').replace( + '100000', '100K').replace('10000', '10k') + + +def get_dcid(template): + '''Gets dcid from template. + Args: + template: Input templated string. + Returns: + Dcid. + ''' + return template.split(DCID_PREFIX)[1].split('\n')[0] diff --git a/scripts/un/sdg/sssom-mappings b/scripts/un/sdg/sssom-mappings new file mode 160000 index 0000000000..aae4a7a944 --- /dev/null +++ b/scripts/un/sdg/sssom-mappings @@ -0,0 +1 @@ +Subproject commit aae4a7a94479ab44751eb2e54b5ab5054d84b748 diff --git a/scripts/un/sdg/testdata/expected_cities.csv b/scripts/un/sdg/testdata/expected_cities.csv deleted file mode 100644 index 3b4ac4931e..0000000000 --- a/scripts/un/sdg/testdata/expected_cities.csv +++ /dev/null @@ -1,2 +0,0 @@ -name,dcid -AF_MAZAR_E_SHARIF,wikidataId/Q130469 diff --git a/scripts/un/sdg/testdata/test_csv/AG_FOOD_WST.csv b/scripts/un/sdg/testdata/test_csv/AG_FOOD_WST.csv index baea70e159..2095831b27 100644 --- a/scripts/un/sdg/testdata/test_csv/AG_FOOD_WST.csv +++ b/scripts/un/sdg/testdata/test_csv/AG_FOOD_WST.csv @@ -1,15 +1,21 @@ VARIABLE_CODE,GEOGRAPHY_CODE,TIME_PERIOD,OBS_VALUE,UNIT_MEASURE,UNIT_MULT,MEASUREMENT_METHOD -dcs:sdg/AG_FOOD_WST,dcs:Earth,2019,930863853.95716,dcs:SDG_T,,dcs:SDG_N_A_G -dcs:sdg/AG_FOOD_WST,dcs:africa,2019,200407208.7491,dcs:SDG_T,,dcs:SDG_N_A_G -dcs:sdg/AG_FOOD_WST,dcs:country/AFG,2019,4755917.64224,dcs:SDG_T,,dcs:SDG_E_A_G -dcs:sdg/AG_FOOD_WST,dcs:southamerica,2019,46603996.09298,dcs:SDG_T,,dcs:SDG_N_A_G -dcs:sdg/AG_FOOD_WST,dcs:country/ALB,2019,363201.04528,dcs:SDG_T,,dcs:SDG_E_A_G -dcs:sdg/AG_FOOD_WST,dcs:oceania,2019,5368899.62572,dcs:SDG_T,,dcs:SDG_N_A_G -dcs:sdg/AG_FOOD_WST,dcs:country/DZA,2019,5782224.32483,dcs:SDG_T,,dcs:SDG_E_A_G -dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcs:Earth,2019,569007855.9533,dcs:SDG_T,,dcs:SDG_N_A_G -dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcs:africa,2019,143927911.95641,dcs:SDG_T,,dcs:SDG_N_A_G -dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcs:country/AFG,2019,3109152.67104,dcs:SDG_T,,dcs:SDG_E_A_G -dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcs:southamerica,2019,28233968.89021,dcs:SDG_T,,dcs:SDG_N_A_G -dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcs:country/ALB,2019,238491.7727,dcs:SDG_T,,dcs:SDG_E_A_G -dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcs:oceania,2019,3926866.98184,dcs:SDG_T,,dcs:SDG_N_A_G -dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcs:country/DZA,2019,3918528.68,dcs:SDG_T,,dcs:SDG_E_A_G +dcs:sdg/AG_FOOD_WST,dcid:Earth,2019,930863853.95716,dcs:SDG_T,,dcs:SDG_N_A_G +dcs:sdg/AG_FOOD_WST,dcid:africa,2019,200407208.7491,dcs:SDG_T,,dcs:SDG_N_A_G +dcs:sdg/AG_FOOD_WST,dcid:country/AFG,2019,4755917.64224,dcs:SDG_T,,dcs:SDG_E_A_G +dcs:sdg/AG_FOOD_WST,dcid:southamerica,2019,46603996.09298,dcs:SDG_T,,dcs:SDG_N_A_G +dcs:sdg/AG_FOOD_WST,dcid:country/ALB,2019,363201.04528,dcs:SDG_T,,dcs:SDG_E_A_G +dcs:sdg/AG_FOOD_WST,dcid:oceania,2019,5368899.62572,dcs:SDG_T,,dcs:SDG_N_A_G +dcs:sdg/AG_FOOD_WST,dcid:WesternAfrica,2019,73695078.26203,dcs:SDG_T,,dcs:SDG_N_A_G +dcs:sdg/AG_FOOD_WST,dcid:country/DZA,2019,5782224.32483,dcs:SDG_T,,dcs:SDG_E_A_G +dcs:sdg/AG_FOOD_WST,dcid:CentralAmerica,2019,23441006.87753,dcs:SDG_T,,dcs:SDG_N_A_G +dcs:sdg/AG_FOOD_WST,dcid:EasternAfrica,2019,62963781.41023,dcs:SDG_T,,dcs:SDG_N_A_G +dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcid:Earth,2019,569007855.9533,dcs:SDG_T,,dcs:SDG_N_A_G +dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcid:africa,2019,143927911.95641,dcs:SDG_T,,dcs:SDG_N_A_G +dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcid:country/AFG,2019,3109152.67104,dcs:SDG_T,,dcs:SDG_E_A_G +dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcid:southamerica,2019,28233968.89021,dcs:SDG_T,,dcs:SDG_N_A_G +dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcid:country/ALB,2019,238491.7727,dcs:SDG_T,,dcs:SDG_E_A_G +dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcid:oceania,2019,3926866.98184,dcs:SDG_T,,dcs:SDG_N_A_G +dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcid:WesternAfrica,2019,56750560.27358,dcs:SDG_T,,dcs:SDG_N_A_G +dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcid:country/DZA,2019,3918528.68,dcs:SDG_T,,dcs:SDG_E_A_G +dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcid:CentralAmerica,2019,15774419.58244,dcs:SDG_T,,dcs:SDG_N_A_G +dcs:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS,dcid:EasternAfrica,2019,44299890.89842,dcs:SDG_T,,dcs:SDG_N_A_G diff --git a/scripts/un/sdg/testdata/test_geography/expected_place_mappings.csv b/scripts/un/sdg/testdata/test_geography/expected_place_mappings.csv new file mode 100644 index 0000000000..8a14126c63 --- /dev/null +++ b/scripts/un/sdg/testdata/test_geography/expected_place_mappings.csv @@ -0,0 +1,2 @@ +sdg,dcid +4,country/AFG diff --git a/scripts/un/sdg/testdata/test_geography/expected_un_containment.mcf b/scripts/un/sdg/testdata/test_geography/expected_un_containment.mcf new file mode 100644 index 0000000000..0b6aac13b9 --- /dev/null +++ b/scripts/un/sdg/testdata/test_geography/expected_un_containment.mcf @@ -0,0 +1,14 @@ + +Node: dcid:country/AFG +typeOf: dcs:Country +containedInPlace: dcid:SouthernAsia +containedInPlace: dcid:undata-geo/G00403000 +containedInPlace: dcid:undata-geo/G00403300 +containedInPlace: dcid:undata-geo/G00404000 +containedInPlace: dcid:undata-geo/G00404300 + +Node: dcid:undata-geo/G00000030 +typeOf: dcs:GeoRegion +containedInPlace: dcid:Earth +containedInPlace: dcid:asia +containedInPlace: dcid:WesternAsia diff --git a/scripts/un/sdg/testdata/test_geography/expected_un_places.mcf b/scripts/un/sdg/testdata/test_geography/expected_un_places.mcf new file mode 100644 index 0000000000..d127faa09c --- /dev/null +++ b/scripts/un/sdg/testdata/test_geography/expected_un_places.mcf @@ -0,0 +1,12 @@ + +Node: dcid:country/AFG +typeOf: dcs:Country +name: "Afghanistan" +unDataCode: "undata-geo:G00000020" +unDataLabel: "Afghanistan" + +Node: dcid:undata-geo/G00000030 +typeOf: dcs:GeoRegion +name: "Ajman" +unDataCode: "undata-geo:G00000030" +unDataLabel: "Ajman" diff --git a/scripts/un/sdg/testdata/test_geography/test_geographies.csv b/scripts/un/sdg/testdata/test_geography/test_geographies.csv new file mode 100644 index 0000000000..7ca5143fcb --- /dev/null +++ b/scripts/un/sdg/testdata/test_geography/test_geographies.csv @@ -0,0 +1,3 @@ +subject_id,subject_label +undata-geo:G00000020,Afghanistan +undata-geo:G00000030,Ajman diff --git a/scripts/un/sdg/testdata/test_geography/test_geography_hierarchy.csv b/scripts/un/sdg/testdata/test_geography/test_geography_hierarchy.csv new file mode 100644 index 0000000000..a882b21075 --- /dev/null +++ b/scripts/un/sdg/testdata/test_geography/test_geography_hierarchy.csv @@ -0,0 +1,12 @@ +subject_id,subject_label,predicate,object_id,object_label +G00000020,Afghanistan,isPartOf,G00100000,World +G00000020,Afghanistan,isPartOf,G00114000,Asia +G00000020,Afghanistan,isPartOf,G00119000,Southern Asia +G00000020,Afghanistan,isPartOf,G00403000,Landlocked developing countries (LLDCs) +G00000020,Afghanistan,isPartOf,G00403300,Landlocked developing countries (LLDCs): Asia +G00000020,Afghanistan,isPartOf,G00404000,Least developed countries (LDCs) +G00000020,Afghanistan,isPartOf,G00404300,Least developed countries (LDCs): Asia +G00000030,Ajman,isPartOf,G00003250,United Arab Emirates +G00000030,Ajman,isPartOf,G00100000,World +G00000030,Ajman,isPartOf,G00114000,Asia +G00000030,Ajman,isPartOf,G00120000,Western Asia diff --git a/scripts/un/sdg/testdata/test_schema/series.mcf b/scripts/un/sdg/testdata/test_schema/series.mcf index 1d279ab42b..f0045f3f82 100644 --- a/scripts/un/sdg/testdata/test_schema/series.mcf +++ b/scripts/un/sdg/testdata/test_schema/series.mcf @@ -28,7 +28,7 @@ name: "Proportion of population below international poverty line" typeOf: dcs:SDG_Series Node: dcid:SDG_SI_POV_EMP1 -name: "Employed population below international poverty line" +name: "Employed population below international poverty line, by sex and age" typeOf: dcs:SDG_Series Node: dcid:SDG_SI_POV_NAHC diff --git a/scripts/un/sdg/testdata/test_schema/sv.mcf b/scripts/un/sdg/testdata/test_schema/sv.mcf index 3a11701a59..1d973b73c7 100644 --- a/scripts/un/sdg/testdata/test_schema/sv.mcf +++ b/scripts/un/sdg/testdata/test_schema/sv.mcf @@ -5,11 +5,13 @@ measuredProperty: dcs:value name: "Food waste" populationType: dcs:SDG_AG_FOOD_WST statType: dcs:measuredValue +footnote: "Includes data from the following sources: Food Waste Index Report 2021 / WESR" Node: dcid:sdg/AG_FOOD_WST.FOOD_WASTE_SECTOR--FWS_HHS typeOf: dcs:StatisticalVariable measuredProperty: dcs:value -name: "Food waste [Food Waste Sector = Households]" +name: "Food waste [Households]" populationType: dcs:SDG_AG_FOOD_WST statType: dcs:measuredValue sdg_foodWasteSector: dcs:SDG_FoodWasteSectorEnum_FWS_HHS +footnote: "Includes data from the following sources: Food Waste Index Report 2021 / WESR" diff --git a/scripts/un/sdg/util.py b/scripts/un/sdg/util.py index f3e35aa61b..ed63058fcb 100644 --- a/scripts/un/sdg/util.py +++ b/scripts/un/sdg/util.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. '''Shared util functions and constants.''' -import csv import math -import os import re -import sys - -module_dir_ = os.path.dirname(__file__) -sys.path.append(os.path.join(module_dir_)) # SDMX indicator for 'total' value in dimension. TOTAL = '_T' +# Splits the series code from constraint properties in SDG variable codes. +SDG_CODE_SEPARATOR = '@' + # Used to split the series code from constraint properties in stat var dcids. SV_CODE_SEPARATOR = '.' @@ -59,7 +56,7 @@ measuredProperty: dcs:value name: {name} populationType: dcs:{popType} -statType: dcs:measuredValue{cprops} +statType: dcs:measuredValue{cprops}{footnote} ''' MMETHOD_TEMPLATE = ''' Node: dcid:{dcid} @@ -80,7 +77,6 @@ 'CAUSE_OF_DEATH': 'causeOfDeath', 'DISABILITY_STATUS': 'disabilityStatus', 'EDUCATION_LEV': 'educationalAttainment', - 'SEX': 'gender' } # Shared dimensions across all input csv files. @@ -95,57 +91,82 @@ 'OBS_STATUS', 'RELEASE_STATUS', 'RELEASE_NAME' } -# Supported Regions. -# TODO: Add other regions. -REGIONS = { - 1: 'Earth', - 2: 'africa', - 5: 'southamerica', - 9: 'oceania', - 10: 'antarctica', - 21: 'northamerica', - 142: 'asia', - 150: 'europe', +# Series where zero should be treated as null and dropped (curated by UN). +ZERO_NULL = { + 'SE_ACS_CMPTR', + 'SE_ACS_H2O', + 'SE_AGP_CPRA', + 'SE_ALP_CPLR', + 'SE_AWP_CPRA', + 'SE_ACC_HNDWSH', + 'SE_INF_DSBL', + 'SE_TOT_CPLR', + 'SE_TRA_GRDL', + 'SE_ACS_INTNT', } +# Footnote text indicated that a zero point should be treated as null and dropped. +ZERO_NULL_TEXT = 'This data point is NIL for the submitting nation.' + +# Variables that should be dropped due to outlier values (curated by UN). +# TODO: Follow up with UN. +DROP_VARIABLE = {'VC_DTH_TOTPT'} + +# Series that should be dropped due to outlier values (curated by UN). +# TODO: Follow up with UN. +DROP_SERIES = { + 'TX_IMP_GBMRCH', + 'TX_EXP_GBMRCH', + 'TX_IMP_GBSVR', + 'TX_EXP_GBSVR', + 'SH_SAN_SAFE', + 'AG_PRD_XSUBDY', +} -def get_country_map(file): - ''' Creates map of M49 -> ISO-alpha3 for countries. - - Args: - file: Path to input file. - - Returns: - Country map. - ''' - with open(file) as f: - places = {} - reader = csv.DictReader(f, delimiter='\t') - for row in reader: - if not row['ISO-alpha3 code']: # Only countries for now. - continue - places[int(row['M49 code'])] = row['ISO-alpha3 code'] - return places - - -PLACES = get_country_map(os.path.join(module_dir_, 'm49.csv')) - - -def get_city_map(file): - ''' Creates map of name -> dcid for supported cities. - - Args: - file: Path to input file. - - Returns: - City map. - ''' - with open(file) as f: - reader = csv.DictReader(f) - return {row['name']: row['dcid'] for row in reader} - +# Map of input title text to output formatted text. +TITLE_MAPPINGS = { + 'Education level': 'education', + 'Frequency of Chlorophyll-a concentration': 'frequency', + 'Report Ordinal': 'ordinal', + 'Grounds of discrimination': 'discrimination', + 'Deviation Level': 'deviation' +} -CITIES = get_city_map(os.path.join(module_dir_, 'cities.csv')) +# List of substrings to be deleted from titles. +TITLE_DELETIONS = [ + 'Age = ', + 'Name of non-communicable disease = ', + 'Substance use disorders = ', + 'Quantile = ', + 'Type of skill = Skill: ', + 'Type of skill = ', + 'Sex = ', + 'Land cover = ', + 'Level/Status = ', + 'Policy instruments = ', + 'Type of product = ', + 'Type of waste treatment = ', + 'Activity = ', + 'Type of renewable technology = ', + 'Location = ', + 'Level_of_government = ', + 'Fiscal intervention stage = ', + 'Name of international institution = ', + 'Policy Domains = ', + 'Mode of transportation = ', + 'Food Waste Sector = ', +] + +# Map of input title text to output replacement text. +TITLE_REPLACEMENTS = { + '24 to 59 months old': '2 to 5 years old', + '36 to 47 months old': '3 to 4 years old', + '36 to 59 months old': '3 to 5 years old', + '12 to 23 months': '1 to 2 years old', + '24 to 35 months': '2 to 3 years old', + '36 to 47 months old': '3 to 4 years old', + '48 to 59 months': '4 to 5 years old' +} def format_description(s): @@ -161,8 +182,6 @@ def format_description(s): formatted = re.sub('\((?:[^)(]|\([^)(]*\))*\)', '', s) # Remove <=2 levels of []. formatted = re.sub('\[(?:[^)(]|\[[^)(]*\])*\]', '', formatted) - # Remove attributes indicated with 'by'. - formatted = formatted.split(', by')[0] # Remove references indicated by 'million USD'. formatted = formatted.split(', million USD')[0] # Remove extra spaces. @@ -172,6 +191,11 @@ def format_description(s): formatted = formatted[:-1] # Replace 100,000 with 100K. formatted = formatted.replace('100,000', '100K') + # Remove some apostrophe. + formatted = formatted.replace("Developing countries’", + 'Developing countries') + # Replace DRR with Disaster Risk Reduction. + formatted = formatted.replace('DRR', 'Disaster Risk Reduction') # Make ascii. return formatted.replace('Â', '').replace('’', '\'').replace('₂', '2').replace( @@ -211,6 +235,46 @@ def is_valid(v): return v and not v == 'nan' +def curate_pvs(text, mappings): + '''Curates PVs based on custom mappings. + + Example: '[Deviation Level = Extreme (75-100%)]' + -> '[Extreme deviation (75-100%)]' + + Args: + text: Input text. + mappings: Custom mappings. + + Returns: + Formatted text. + ''' + pairs = text[1:-1].split('|') + new_pairs = [] + for pair in pairs: + new_pair = '' + pv = pair.split('=') + p, v = pv[0].strip(), pv[1].strip() + if p in mappings: + v_components = v.split('(') + v_main = v_components[0].strip() + + # Don't repeat 'education'. + if p == 'Education level' and 'education' in v_main: + new_pair = v_main + + else: + new_pair = v_main + ' ' + mappings[p] + + # Keep () on the right. + if len(v_components) > 1: + new_pair += ' (' + v_components[1].strip() + + new_pairs.append(new_pair) + else: + new_pairs.append(pair.strip()) + return '[' + ', '.join(new_pairs) + ']' + + def format_variable_description(variable, series): '''Curates variable descriptions. @@ -221,9 +285,27 @@ def format_variable_description(variable, series): Returns: Formatted description. ''' - parts = variable.split(series) - return format_description(series) + parts[1] if len( - parts) > 1 else format_description(series) + head = format_description(series) + pvs = series.join(variable.split(series)[1:]).strip() + if not pvs: + return head + + # Remove ISIC code. + pvs = re.sub(r'\(ISIC[^)]*\)', '', pvs) + + # Remove isco code. + pvs = re.sub(r'\(isco[^)]*\)', '', pvs) + + # Custom text formatting. + pvs = curate_pvs(pvs, TITLE_MAPPINGS) + + # Custom replacements. + for s in TITLE_DELETIONS: + pvs = pvs.replace(s, '') + for s in TITLE_REPLACEMENTS: + pvs = pvs.replace(s, TITLE_REPLACEMENTS[s]) + + return head + ' ' + pvs def format_variable_code(code): @@ -235,7 +317,7 @@ def format_variable_code(code): Returns: Formatted code. ''' - return code.replace('@', SV_CODE_SEPARATOR).replace(' ', '') + return code.replace(SDG_CODE_SEPARATOR, SV_CODE_SEPARATOR).replace(' ', '') def format_title(s): diff --git a/scripts/un/sdg/util_test.py b/scripts/un/sdg/util_test.py index 4ba3482fde..712342f359 100644 --- a/scripts/un/sdg/util_test.py +++ b/scripts/un/sdg/util_test.py @@ -31,7 +31,8 @@ def test_format_description(self): self.assertEqual( util.format_description( 'Indicator of Food Price Anomalies (IFPA), by Consumer Food Price Index' - ), 'Indicator of Food Price Anomalies') + ), + 'Indicator of Food Price Anomalies, by Consumer Food Price Index') def test_is_float(self): self.assertTrue(util.is_float(7.28)) @@ -41,12 +42,22 @@ def test_is_valid(self): self.assertFalse(util.is_valid(float('nan'))) self.assertFalse(util.is_valid('')) + def test_curate_pvs(self): + self.assertEqual( + util.curate_pvs( + '[Age = 15 years old and over | Education level = Primary education or less]', + util.TITLE_MAPPINGS), + '[Age = 15 years old and over, Primary education or less]') + self.assertEqual( + util.curate_pvs('[Deviation Level = Extreme (75-100%)]', + util.TITLE_MAPPINGS), + '[Extreme deviation (75-100%)]') + def test_format_variable_description(self): self.assertEqual( util.format_variable_description( 'Food waste (Tonnes) [Food Waste Sector = Households]', - 'Food waste (Tonnes)'), - 'Food waste [Food Waste Sector = Households]') + 'Food waste (Tonnes)'), 'Food waste [Households]') def test_format_variable_code(self): self.assertEqual( diff --git a/scripts/us_usda/quickstats/process.py b/scripts/us_usda/quickstats/process.py index b0db6fb150..90768e668c 100644 --- a/scripts/us_usda/quickstats/process.py +++ b/scripts/us_usda/quickstats/process.py @@ -21,21 +21,21 @@ If the key is not specified as above, it falls back to using a key specified in a GCS config file. However, that file is available to DC team members only. - """ -import json - -import requests -import sys import csv -import multiprocessing +from datetime import datetime from itertools import repeat +import json +import multiprocessing import os -from datetime import datetime -from google.cloud import storage +import sys + from absl import app from absl import flags +from google.cloud import storage +from ratelimit import limits +import requests API_BASE = 'https://quickstats.nass.usda.gov/api' @@ -53,15 +53,25 @@ '998', # "OTHER" county code ]) -_GCS_PROJECT_ID = "datcom-204919" -_GCS_BUCKET = "datcom-csv" -_GCS_FILE_PATH = "usda/agriculture_survey/config.json" +_GCS_PROJECT_ID = 'datcom-204919' +_GCS_BUCKET = 'datcom-csv' +_GCS_FILE_PATH = 'usda/agriculture_survey/config.json' _USDA_API_KEY = 'usda_api_key' _FLAGS = flags.FLAGS flags.DEFINE_string(_USDA_API_KEY, None, 'USDA quickstats API key.') +flags.DEFINE_integer( + 'start_year', + os.getenv('start_year', 2000), + 'Year from whihc data is processed.', +) +flags.DEFINE_integer( + 'num_counties', + os.getenv('num_counties', 5000), + 'number of counties for which data is processed.', +) def process_survey_data(year, svs, out_dir): @@ -75,6 +85,7 @@ def process_survey_data(year, svs, out_dir): print('Getting county names') county_names = get_param_values('county_name') + county_names = county_names[:_FLAGS.num_counties] print('# counties =', len(county_names)) pool_size = max(2, multiprocessing.cpu_count() - 1) @@ -82,7 +93,8 @@ def process_survey_data(year, svs, out_dir): with multiprocessing.Pool(pool_size) as pool: pool.starmap( fetch_and_write, - zip(county_names, repeat(year), repeat(svs), repeat(out_dir))) + zip(county_names, repeat(year), repeat(svs), repeat(out_dir)), + ) write_aggregate_csv(year, out_dir) @@ -96,15 +108,15 @@ def get_parts_dir(out_dir, year): def get_response_dir(out_dir, year): - return f"{out_dir}/response/{year}" + return f'{out_dir}/response/{year}' def get_response_file_path(out_dir, year, county): - return f"{get_response_dir(out_dir, year)}/{county}.json" + return f'{get_response_dir(out_dir, year)}/{county}.json' def get_year_csv_file_path(out_dir, year): - return f"{out_dir}/ag-{year}.csv" + return f'{out_dir}/ag-{year}.csv' def write_aggregate_csv(year, out_dir): @@ -120,13 +132,13 @@ def write_aggregate_csv(year, out_dir): lineterminator='\n') csv_writer.writeheader() for part_file in part_files: - if part_file.endswith(".csv"): - with open(f"{parts_dir}/{part_file}", 'r') as part: + if part_file.endswith('.csv'): + with open(f'{parts_dir}/{part_file}', 'r') as part: csv_writer.writerows(csv.DictReader(part)) def write_consolidated_csv(years, out_dir): - out_file = f"{out_dir}/consolidated.csv" + out_file = f'{out_dir}/consolidated.csv' print('Writing consolidated CSV', out_file) @@ -141,11 +153,19 @@ def write_consolidated_csv(years, out_dir): def fetch_and_write(county_name, year, svs, out_dir): - out_file = f"{get_parts_dir(out_dir, year)}/{county_name.replace('[^a-zA-Z0-9]', '')}.csv" + out_file = ( + f"{get_parts_dir(out_dir, year)}/{county_name.replace('[^a-zA-Z0-9]', '')}.csv" + ) api_data = get_survey_county_data(year, county_name, out_dir) county_csv_rows = to_csv_rows(api_data, svs) - print('Writing', len(county_csv_rows), 'rows for county', county_name, - 'to file', out_file) + print( + 'Writing', + len(county_csv_rows), + 'rows for county', + county_name, + 'to file', + out_file, + ) with open(out_file, 'w', newline='') as out: write_csv(out, county_csv_rows) @@ -161,9 +181,9 @@ def get_survey_county_data(year, county, out_dir): else: params = { 'key': get_usda_api_key(), - 'source_desc': "SURVEY", + 'source_desc': 'SURVEY', 'year': year, - 'county_name': county + 'county_name': county, } response = get_data(params) with open(response_file, 'w') as f: @@ -178,6 +198,7 @@ def get_survey_county_data(year, county, out_dir): return response +@limits(calls=10, period=60) def get_data(params): return requests.get(f'{API_BASE}/api_GET', params=params).json() @@ -189,19 +210,19 @@ def get_param_values(param): return [] if param not in response else response[param] -'''Converts a quickstats data row to a DC CSV row. +"""Converts a quickstats data row to a DC CSV row. data = quickstats data row svs = {name: {name: ..., sv: ..., unit: ...}} returns = {variableMeasured: ..., observationAbout: ..., value: ..., unit: ...} -''' +""" def to_csv_row(data_row, svs): name = data_row['short_desc'] - if data_row['domaincat_desc'] and data_row[ - 'domaincat_desc'] != 'NOT SPECIFIED': + if (data_row['domaincat_desc'] and + data_row['domaincat_desc'] != 'NOT SPECIFIED'): name = f"{name}%%{data_row['domaincat_desc']}" if name not in svs: @@ -213,16 +234,16 @@ def to_csv_row(data_row, svs): eprint('SKIPPED, Unsupported county code', county_code) return None - value = (data_row['value'] if 'value' in data_row else - data_row['Value']).strip().replace(',', '') + value = ((data_row['value'] if 'value' in data_row else + data_row['Value']).strip().replace(',', '')) if value in SKIPPED_VALUES: eprint('SKIPPED, Invalid value', f"'{value}'", 'for', name) return None value = int(value) - observation_about = f"dcid:geoId/{data_row['state_fips_code']}{county_code}" if \ - data_row[ - 'state_fips_code'] else 'dcid:country/USA' + observation_about = ( + f"dcid:geoId/{data_row['state_fips_code']}{county_code}" + if data_row['state_fips_code'] else 'dcid:country/USA') sv = svs[name] @@ -248,7 +269,7 @@ def to_csv_rows(api_data, svs): def load_svs(): svs = {} - with open("sv.csv", newline='') as csvfile: + with open('sv.csv', newline='') as csvfile: reader = csv.DictReader(csvfile) for row in reader: svs[row['name']] = row @@ -267,16 +288,16 @@ def eprint(*args, **kwargs): def get_all_counties(): svs = load_svs() - process_survey_data(2023, svs, "output") + process_survey_data(2023, svs, 'output') def get_multiple_years(): start = datetime.now() print('Start', start) - out_dir = "output" + out_dir = 'output' svs = load_svs() - years = range(2000, datetime.now().year + 1) + years = range(_FLAGS.start_year, datetime.now().year + 1) for year in years: process_survey_data(year, svs, out_dir)