diff --git a/scripts/world_bank/wdi/README.md b/scripts/world_bank/wdi/README.md index 7cda4df1b4..73e9154c2f 100644 --- a/scripts/world_bank/wdi/README.md +++ b/scripts/world_bank/wdi/README.md @@ -127,6 +127,25 @@ To generate `output/WorldBank_StatisticalVariables.mcf`, python3 worldbank.py --indicatorSchemaFile= --fetchFromSource= ``` +#### Processing Steps for Refreshing Data + +To generate `output/WorldBank_StatisticalVariables.mcf`, +`output/WorldBank.tmcf`, and `output/WorldBank.csv`, run: + +```bash +python3 worldbank.py +``` + +If you want to perform "only process", run the below command: +```bash +python3 preprocess.py --mode=process +``` + +If you want to perform "only download", run the below command: +```bash +python3 preprocess.py --mode=download +``` + We highly recommend the use of the import validation tool for this import which you can find in https://github.com/datacommonsorg/tools/tree/master/import-validation-helper. diff --git a/scripts/world_bank/wdi/manifest.json b/scripts/world_bank/wdi/manifest.json new file mode 100644 index 0000000000..0cb6090c93 --- /dev/null +++ b/scripts/world_bank/wdi/manifest.json @@ -0,0 +1,22 @@ +{ + "import_specifications": [ + { + "import_name": "WorldDevelopmentIndicators", + "curator_emails": [ + "sanikap@google.com" + ], + "provenance_url": "https://datacatalog.worldbank.org/dataset/world-development-indicators/", + "provenance_description": "Variables related to demographics, energy, health, labor, etc. from the World Bank", + "scripts": [ + "worldbank.py" + ], + "import_inputs": [ + { + "template_mcf": "output/WorldBank.tmcf", + "cleaned_csv": "output/WorldBank.csv" + } + ], + "cron_schedule": "0 11 * * 2" + } + ] +} \ No newline at end of file diff --git a/scripts/world_bank/wdi/output/WorldBank_StatisticalVariables.mcf b/scripts/world_bank/wdi/output/WorldBank_StatisticalVariables.mcf index 034f736164..8fde765741 100644 --- a/scripts/world_bank/wdi/output/WorldBank_StatisticalVariables.mcf +++ b/scripts/world_bank/wdi/output/WorldBank_StatisticalVariables.mcf @@ -634,3 +634,174 @@ statType: dcs:measuredValue measuredProperty: dcs:amount transferType: dcs:OutwardRemittance + +Node: dcid:WorldBank/VC_IHR_PSRC_P5 +name: "Intentional homicides (per 100,000 people)" +description: "Intentional homicides are estimates of unlawful homicides purposely inflicted as a result of domestic disputes, interpersonal violence, violent conflicts over land resources, intergang violence over turf or control, and predatory violence and killing by armed groups. Intentional homicide does not include all intentional killing; the difference is usually in the organization of the killing. Individuals or small groups usually commit homicide, whereas killing in armed conflict is usually committed by fairly cohesive groups of up to several hundred members and is thus usually excluded. UN Office on Drugs and Crime's International Homicide Statistics database." +typeOf: dcs:StatisticalVariable +populationType: dcs:CriminalActivities +statType: dcs:measuredValue +measuredProperty: dcs:count +measurementDenominator: dcs:Count_Person +crimeType: dcs:MurderAndNonNegligentManslaughter + + +Node: dcid:WorldBank/SH_DYN_MORT +name: "Mortality rate, under-5 (per 1,000 live births)" +description: "Under-five mortality rate is the probability per 1,000 that a newborn baby will die before reaching age five, if subject to age-specific mortality rates of the specified year. Estimates Developed by the UN Inter-agency Group for Child Mortality Estimation (UNICEF, WHO, World Bank, UN DESA Population Division) at www.childmortality.org." +typeOf: dcs:StatisticalVariable +populationType: dcs:Person +statType: dcs:measuredValue +measuredProperty: dcs:mortalityRate +measurementDenominator: dcs:Count_BirthEvent_LiveBirth +age: dcs:YearsUpto4 + + +Node: dcid:WorldBank/SH_PRV_SMOK +name: "Smoking prevalence, total (ages 15+)" +description: "Prevalence of smoking is the percentage of men and women ages 15 and over who currently smoke any tobacco product on a daily or non-daily basis. It excludes smokeless tobacco use. The rates are age-standardized. World Health Organization, Global Health Observatory Data Repository (http://apps.who.int/ghodata/)." +typeOf: dcs:StatisticalVariable +populationType: dcs:Person +statType: dcs:measuredValue +measuredProperty: dcs:count +measurementDenominator: dcs:Count_Person_15OrMoreYears +healthBehavior: dcs:Smoking +age: dcs:Years15Onwards + + +Node: dcid:WorldBank/SH_PRV_SMOK_FE +name: "Smoking prevalence, females (% of adults)" +description: "Prevalence of smoking, female is the percentage of women ages 15 and over who currently smoke any tobacco product on a daily or non-daily basis. It excludes smokeless tobacco use. The rates are age-standardized. World Health Organization, Global Health Observatory Data Repository (http://apps.who.int/ghodata/)." +typeOf: dcs:StatisticalVariable +populationType: dcs:Person +statType: dcs:measuredValue +measuredProperty: dcs:count +measurementDenominator: dcs:Count_Person_15OrMoreYears_Female +healthBehavior: dcs:Smoking +age: dcs:Years15Onwards +gender: dcs:Female + + +Node: dcid:WorldBank/SH_PRV_SMOK_MA +name: "Smoking prevalence, males (% of adults)" +description: "Prevalence of smoking, male is the percentage of men ages 15 and over who currently smoke any tobacco product on a daily or non-daily basis. It excludes smokeless tobacco use. The rates are age-standardized. World Health Organization, Global Health Observatory Data Repository (http://apps.who.int/ghodata/)." +typeOf: dcs:StatisticalVariable +populationType: dcs:Person +statType: dcs:measuredValue +measuredProperty: dcs:count +measurementDenominator: dcs:Count_Person_15OrMoreYears_Male +healthBehavior: dcs:Smoking +age: dcs:Years15Onwards +gender: dcs:Male + + +Node: dcid:WorldBank/SH_STA_DIAB_ZS +name: "Diabetes prevalence (% of population ages 20 to 79)" +description: "Diabetes prevalence refers to the percentage of people ages 20-79 who have type 1 or type 2 diabetes. International Diabetes Federation, Diabetes Atlas." +typeOf: dcs:StatisticalVariable +populationType: dcs:Person +statType: dcs:measuredValue +measuredProperty: dcs:count +measurementDenominator: dcs:Count_Person_20To79Years +healthOutcome: dcs:Diabetes +age: dcs:Years20To79 + + +Node: dcid:WorldBank/SP_DYN_CBRT_IN +name: "Birth rate, crude (per 1,000 people)" +description: "Crude birth rate indicates the number of live births occurring during the year, per 1,000 population estimated at midyear. Subtracting the crude death rate from the crude birth rate provides the rate of natural increase, which is equal to the rate of population change in the absence of migration. (1) United Nations Population Division. World Population Prospects: 2019 Revision. (2) Census reports and other statistical publications from national statistical offices, (3) Eurostat: Demographic Statistics, (4) United Nations Statistical Division. Population and Vital Statistics Report (various years), (5) U.S. Census Bureau: International Database, and (6) Secretariat of the Pacific Community: Statistics and Demography Programme." +typeOf: dcs:StatisticalVariable +populationType: dcs:BirthEvent +statType: dcs:measuredValue +measuredProperty: dcs:count +measurementDenominator: dcs:Count_Person +medicalStatus: dcs:LiveBirth + + +Node: dcid:WorldBank/SP_DYN_LE00_FE_IN +name: "Life expectancy at birth, female (years)" +description: "Life expectancy at birth indicates the number of years a newborn infant would live if prevailing patterns of mortality at the time of its birth were to stay the same throughout its life. (1) United Nations Population Division. World Population Prospects: 2019 Revision. (2) Census reports and other statistical publications from national statistical offices, (3) Eurostat: Demographic Statistics, (4) United Nations Statistical Division. Population and Vital Statistics Report (various years), (5) U.S. Census Bureau: International Database, and (6) Secretariat of the Pacific Community: Statistics and Demography Programme." +typeOf: dcs:StatisticalVariable +populationType: dcs:Person +statType: dcs:measuredValue +measuredProperty: dcs:lifeExpectancy +gender: dcs:Female + + +Node: dcid:WorldBank/SP_DYN_LE00_MA_IN +name: "Life expectancy at birth, male (years)" +description: "Life expectancy at birth indicates the number of years a newborn infant would live if prevailing patterns of mortality at the time of its birth were to stay the same throughout its life. (1) United Nations Population Division. World Population Prospects: 2019 Revision. (2) Census reports and other statistical publications from national statistical offices, (3) Eurostat: Demographic Statistics, (4) United Nations Statistical Division. Population and Vital Statistics Report (various years), (5) U.S. Census Bureau: International Database, and (6) Secretariat of the Pacific Community: Statistics and Demography Programme." +typeOf: dcs:StatisticalVariable +populationType: dcs:Person +statType: dcs:measuredValue +measuredProperty: dcs:lifeExpectancy +gender: dcs:Male + + +Node: dcid:WorldBank/EG_ELC_FOSL_ZS +name: "Electricity production from oil, gas and coal sources (% of total)" +description: "Sources of electricity refer to the inputs used to generate electricity. Oil refers to crude oil and petroleum products. Gas refers to natural gas but excludes natural gas liquids. Coal refers to all coal and brown coal, both primary (including hard coal and lignite-brown coal) and derived fuels (including patent fuel, coke oven coke, gas coke, coke oven gas, and blast furnace gas). Peat is also included in this category. IEA Statistics OECD/IEA 2014 (http://www.iea.org/stats/index.asp), subject to https://www.iea.org/t&c/termsandconditions/" +typeOf: dcs:StatisticalVariable +populationType: dcs:Production +statType: dcs:measuredValue +measuredProperty: dcs:amount +measurementDenominator: dcs:Amount_Production_Energy +producedThing: dcs:ElectricityFromOilGasOrCoalSources + + +Node: dcid:WorldBank/EG_ELC_NUCL_ZS +name: "Electricity production from nuclear sources (% of total)" +description: "Sources of electricity refer to the inputs used to generate electricity. Nuclear power refers to electricity produced by nuclear power plants. IEA Statistics OECD/IEA 2014 (http://www.iea.org/stats/index.asp), subject to https://www.iea.org/t&c/termsandconditions/" +typeOf: dcs:StatisticalVariable +populationType: dcs:Production +statType: dcs:measuredValue +measuredProperty: dcs:amount +measurementDenominator: dcs:Amount_Production_Energy +producedThing: dcs:ElectricityFromNuclearSources + + +Node: dcid:WorldBank/EG_FEC_RNEW_ZS +name: "Renewable energy consumption (% of total final energy consumption)" +description: "Renewable energy consumption is the share of renewables energy in total final energy consumption. World Bank, Sustainable Energy for All (SE4ALL) database from the SE4ALL Global Tracking Framework led jointly by the World Bank, International Energy Agency, and the Energy Sector Management Assistance Program." +typeOf: dcs:StatisticalVariable +populationType: dcs:Consumption +statType: dcs:measuredValue +measuredProperty: dcs:amount +measurementDenominator: dcs:Amount_Consumption_Energy +consumedThing: dcs:RenewableEnergy + + +Node: dcid:WorldBank/EN_POP_EL5M_ZS +name: "Population living in areas where elevation is below 5 meters (% of total population)" +description: "Population below 5m is the percentage of the total population living in areas where the elevation is 5 meters or less. Center for International Earth Science Information Network (CIESIN)/Columbia University. 2013. Urban-Rural Population and Land Area Estimates Version 2. Palisades, NY: NASA Socioeconomic Data and Applications Center (SEDAC). http://sedac.ciesin.columbia.edu/data/set/lecz-urban-rural-population-land-area-estimates-v2." +typeOf: dcs:StatisticalVariable +populationType: dcs:Person +statType: dcs:measuredValue +measuredProperty: dcs:count +measurementDenominator: dcs:Count_Person +residenceCharacteristic: dcs:LessThan5MetersAboveSeaLevel + + +Node: dcid:WorldBank/IT_CEL_SETS_P2 +name: "Mobile cellular subscriptions (per 100 people)" +description: "Mobile cellular telephone subscriptions are subscriptions to a public mobile telephone service that provide access to the PSTN using cellular technology. The indicator includes (and is split into) the number of postpaid subscriptions, and the number of active prepaid accounts (i.e. that have been used during the last three months). The indicator applies to all mobile cellular subscriptions that offer voice communications. It excludes subscriptions via data cards or USB modems, subscriptions to public mobile data services, private trunked mobile radio, telepoint, radio paging and telemetry services. International Telecommunication Union, World Telecommunication/ICT Development Report and database." +typeOf: dcs:StatisticalVariable +populationType: dcs:Product +statType: dcs:measuredValue +measuredProperty: dcs:count +measurementDenominator: dcs:Count_Person +productType: dcs:MobileCellularSubscription + + +Node: dcid:WorldBank/SE_XPD_TERT_ZS +name: "Expenditure on tertiary education (% of government expenditure on education)" +description: "Expenditure on tertiary education is expressed as a percentage of total general government expenditure on education. General government usually refers to local, regional and central governments. UNESCO Institute for Statistics (http://uis.unesco.org/)" +typeOf: dcs:StatisticalVariable +populationType: dcs:EconomicActivity +statType: dcs:measuredValue +measuredProperty: dcs:amount +measurementDenominator: dcs:Amount_EconomicActivity_ExpenditureActivity_EducationExpenditure_Government +activitySource: dcs:ExpenditureActivity +expenditureType: dcs:TertiaryEducationExpenditure +remunerator: dcs:Government + diff --git a/scripts/world_bank/wdi/schema_csvs/WorldBankIndicators_prod.csv b/scripts/world_bank/wdi/schema_csvs/WorldBankIndicators_prod.csv index 5012e41ec1..1b87010458 100644 --- a/scripts/world_bank/wdi/schema_csvs/WorldBankIndicators_prod.csv +++ b/scripts/world_bank/wdi/schema_csvs/WorldBankIndicators_prod.csv @@ -14,53 +14,53 @@ SE.TER.CUAT.ST.ZS,,,"Educational attainment, at least completed short-cycle tert SH.STA.OWGH.FE.ZS,,,"Prevalence of overweight, weight for height, female (% of children under 5)","Prevalence of overweight, female, is the percentage of girls under age 5 whose weight for height is more than two standard deviations above the median for the international reference population of the corresponding age as established by the WHO's new child growth standards released in 2006.","UNICEF, WHO, World Bank: Joint child malnutrition estimates (JME). Aggregation is based on UNICEF, WHO, and the World Bank harmonized dataset (adjusted, comparable data) and methodology.",Person,measuredValue,count,healthBehavior,Overweight,age,YearsUpto4,gender,Female,Count_Person_Upto4Years_Female,,100,, SH.STA.OWGH.MA.ZS,,,"Prevalence of overweight, weight for height, male (% of children under 5)","Prevalence of overweight, male, is the percentage of boys under age 5 whose weight for height is more than two standard deviations above the median for the international reference population of the corresponding age as established by the WHO's new child growth standards released in 2006.","UNICEF, WHO, World Bank: Joint child malnutrition estimates (JME). Aggregation is based on UNICEF, WHO, and the World Bank harmonized dataset (adjusted, comparable data) and methodology.",Person,measuredValue,count,healthBehavior,Overweight,age,YearsUpto4,gender,Male,Count_Person_Upto4Years_Male,,100,, SH.STA.OWGH.ZS,,,"Prevalence of overweight, weight for height (% of children under 5)",Prevalence of overweight children is the percentage of children under age 5 whose weight for height is more than two standard deviations above the median for the international reference population of the corresponding age as established by the WHO's new child growth standards released in 2006.,"UNICEF, WHO, World Bank: Joint child malnutrition estimates (JME). Aggregation is based on UNICEF, WHO, and the World Bank harmonized dataset (adjusted, comparable data) and methodology.",Person,measuredValue,count,healthBehavior,Overweight,age,YearsUpto4,,,Count_Person_Upto4Years,,100,, -SH.STA.SUIC.FE.P5,,,"Suicide mortality rate, female (per 100,000 female population)","Suicide mortality rate is the number of suicide deaths in a year per 100,000 population. Crude suicide rate (not age-adjusted).","World Health Organization, Global Health Observatory Data Repository (http://apps.who.int/ghodata/).",MortalityEvent,measuredValue,count,causeOfDeath,ICD10/X60-X84,gender,Female,,,Count_Person_Female,,100000,, -SH.STA.SUIC.MA.P5,,,"Suicide mortality rate, male (per 100,000 male population)","Suicide mortality rate is the number of suicide deaths in a year per 100,000 population. Crude suicide rate (not age-adjusted).","World Health Organization, Global Health Observatory Data Repository (http://apps.who.int/ghodata/).",MortalityEvent,measuredValue,count,causeOfDeath,ICD10/X60-X84,gender,Male,,,Count_Person_Male,,100000,, -SH.STA.SUIC.P5,,,"Suicide mortality rate (per 100,000 population)","Suicide mortality rate is the number of suicide deaths in a year per 100,000 population. Crude suicide rate (not age-adjusted).","World Health Organization, Global Health Observatory Data Repository (http://apps.who.int/ghodata/).",MortalityEvent,measuredValue,count,causeOfDeath,ICD10/X60-X84,,,,,Count_Person,,100000,, +SH.STA.SUIC.FE.P5,,,"Suicide mortality rate, female (per 100,000 female population)","Suicide mortality rate is the number of suicide deaths in a year per 100,000 population. Crude suicide rate (not age-adjusted).","World Health Organization, Global Health Observatory Data Repository (http://apps.who.int/ghodata/).",MortalityEvent,measuredValue,count,causeOfDeath,ICD10/X60-X84,gender,Female,,,Count_Person_Female,,,,Per100000Females +SH.STA.SUIC.MA.P5,,,"Suicide mortality rate, male (per 100,000 male population)","Suicide mortality rate is the number of suicide deaths in a year per 100,000 population. Crude suicide rate (not age-adjusted).","World Health Organization, Global Health Observatory Data Repository (http://apps.who.int/ghodata/).",MortalityEvent,measuredValue,count,causeOfDeath,ICD10/X60-X84,gender,Male,,,Count_Person_Male,,,,Per100000Males +SH.STA.SUIC.P5,,,"Suicide mortality rate (per 100,000 population)","Suicide mortality rate is the number of suicide deaths in a year per 100,000 population. Crude suicide rate (not age-adjusted).","World Health Organization, Global Health Observatory Data Repository (http://apps.who.int/ghodata/).",MortalityEvent,measuredValue,count,causeOfDeath,ICD10/X60-X84,,,,,Count_Person,,,,Per100000Persons SL.TLF.ACTI.FE.ZS,,,"Labor force participation rate, female (% of female population ages 15-64) (modeled ILO estimate)",Labor force participation rate is the proportion of the population ages 15-64 that is economically active: all people who supply labor for the production of goods and services during a specified period.,"International Labour Organization, ILOSTAT database. Data retrieved in March 1, 2020.",Person,measuredValue,count,age,Years15To64,employmentStatus,BLS_InLaborForce,gender,Female,Count_Person_15To64Years_Female,,100,, SL.TLF.ACTI.MA.ZS,,,"Labor force participation rate, male (% of male population ages 15-64) (modeled ILO estimate)",Labor force participation rate is the proportion of the population ages 15-64 that is economically active: all people who supply labor for the production of goods and services during a specified period.,"International Labour Organization, ILOSTAT database. Data retrieved in March 1, 2020.",Person,measuredValue,count,age,Years15To64,employmentStatus,BLS_InLaborForce,gender,Male,Count_Person_15To64Years_Male,,100,, SL.TLF.ACTI.ZS,,,"Labor force participation rate, total (% of total population ages 15-64) (modeled ILO estimate)",Labor force participation rate is the proportion of the population ages 15-64 that is economically active: all people who supply labor for the production of goods and services during a specified period.,"International Labour Organization, ILOSTAT database. Data retrieved in March 1, 2020.",Person,measuredValue,count,age,Years15To64,employmentStatus,BLS_InLaborForce,,,Count_Person_15To64Years,,100,, SL.TLF.TOTL.FE.ZS,,,"Labor force, female (% of total labor force)",Female labor force as a percentage of the total show the extent to which women are active in the labor force. Labor force comprises people ages 15 and older who supply labor for the production of goods and services during a specified period.,"Derived using data from International Labour Organization, ILOSTAT database. The data retrieved in March 1, 2020.",Person,measuredValue,count,age,Years15Onwards,gender,Female,employmentStatus,BLS_InLaborForce,Count_Person_InLaborForce,,100,, SL.TLF.TOTL.IN,TRUE,Count_Person_InLaborForce,"Labor force, total","Labor force comprises people ages 15 and older who supply labor for the production of goods and services during a specified period. It includes people who are currently employed and people who are unemployed but seeking work as well as first-time job-seekers. Not everyone who works is included, however. Unpaid workers, family workers, and students are often omitted, and some countries do not count members of the armed forces. Labor force size tends to vary during the year as seasonal workers enter and leave.","Derived using data from International Labour Organization, ILOSTAT database. The data retrieved in March 1, 2020.",Person,measuredValue,count,age,Years15Onwards,employmentStatus,BLS_InLaborForce,,,,,,dcs:InternationalLaborOrganization, -VC.IHR.PSRC.FE.P5,,,"Intentional homicides, female (per 100,000 female)","Intentional homicides, female are estimates of unlawful female homicides purposely inflicted as a result of domestic disputes, interpersonal violence, violent conflicts over land resources, intergang violence over turf or control, and predatory violence and killing by armed groups. Intentional homicide does not include all intentional killing; the difference is usually in the organization of the killing. Individuals or small groups usually commit homicide, whereas killing in armed conflict is usually committed by fairly cohesive groups of up to several hundred members and is thus usually excluded.",UN Office on Drugs and Crime's International Homicide Statistics database.,CriminalActivities,measuredValue,count,crimeType,MurderAndNonNegligentManslaughter,gender,Female,,,Count_Person_Female,,100000,, -VC.IHR.PSRC.MA.P5,,,"Intentional homicides, male (per 100,000 male)","Intentional homicides, male are estimates of unlawful male homicides purposely inflicted as a result of domestic disputes, interpersonal violence, violent conflicts over land resources, intergang violence over turf or control, and predatory violence and killing by armed groups. Intentional homicide does not include all intentional killing; the difference is usually in the organization of the killing. Individuals or small groups usually commit homicide, whereas killing in armed conflict is usually committed by fairly cohesive groups of up to several hundred members and is thus usually excluded.",,CriminalActivities,measuredValue,count,crimeType,MurderAndNonNegligentManslaughter,gender,Male,,,Count_Person_Male,,100000,, -VC.IHR.PSRC.P5,,,"Intentional homicides (per 100,000 people)","Intentional homicides are estimates of unlawful homicides purposely inflicted as a result of domestic disputes, interpersonal violence, violent conflicts over land resources, intergang violence over turf or control, and predatory violence and killing by armed groups. Intentional homicide does not include all intentional killing; the difference is usually in the organization of the killing. Individuals or small groups usually commit homicide, whereas killing in armed conflict is usually committed by fairly cohesive groups of up to several hundred members and is thus usually excluded.",UN Office on Drugs and Crime's International Homicide Statistics database.,CriminalActivities,measuredValue,count,crimeType,MurderAndNonNegligentManslaughter,,,,,Count_Person,,100000,, +VC.IHR.PSRC.FE.P5,,,"Intentional homicides, female (per 100,000 female)","Intentional homicides, female are estimates of unlawful female homicides purposely inflicted as a result of domestic disputes, interpersonal violence, violent conflicts over land resources, intergang violence over turf or control, and predatory violence and killing by armed groups. Intentional homicide does not include all intentional killing; the difference is usually in the organization of the killing. Individuals or small groups usually commit homicide, whereas killing in armed conflict is usually committed by fairly cohesive groups of up to several hundred members and is thus usually excluded.",UN Office on Drugs and Crime's International Homicide Statistics database.,CriminalActivities,measuredValue,count,crimeType,MurderAndNonNegligentManslaughter,gender,Female,,,Count_Person_Female,,,,Per100000Females +VC.IHR.PSRC.MA.P5,,,"Intentional homicides, male (per 100,000 male)","Intentional homicides, male are estimates of unlawful male homicides purposely inflicted as a result of domestic disputes, interpersonal violence, violent conflicts over land resources, intergang violence over turf or control, and predatory violence and killing by armed groups. Intentional homicide does not include all intentional killing; the difference is usually in the organization of the killing. Individuals or small groups usually commit homicide, whereas killing in armed conflict is usually committed by fairly cohesive groups of up to several hundred members and is thus usually excluded.",,CriminalActivities,measuredValue,count,crimeType,MurderAndNonNegligentManslaughter,gender,Male,,,Count_Person_Male,,,,Per100000Males +VC.IHR.PSRC.P5,,,"Intentional homicides (per 100,000 people)","Intentional homicides are estimates of unlawful homicides purposely inflicted as a result of domestic disputes, interpersonal violence, violent conflicts over land resources, intergang violence over turf or control, and predatory violence and killing by armed groups. Intentional homicide does not include all intentional killing; the difference is usually in the organization of the killing. Individuals or small groups usually commit homicide, whereas killing in armed conflict is usually committed by fairly cohesive groups of up to several hundred members and is thus usually excluded.",UN Office on Drugs and Crime's International Homicide Statistics database.,CriminalActivities,measuredValue,count,crimeType,MurderAndNonNegligentManslaughter,,,,,Count_Person,,,,Per100000Persons SP.RUR.TOTL,,,Rural population,Rural population refers to people living in rural areas as defined by national statistical offices. It is calculated as the difference between total population and urban population. Aggregation of urban and rural population may not add up to total population because of different country coverages.,World Bank staff estimates based on the United Nations Population Division's World Urbanization Prospects: 2018 Revision.,Person,measuredValue,count,placeOfResidenceClassification,Rural,,,,,,,,WorldBankEstimate, SP.URB.TOTL,,,Urban population,Urban population refers to people living in urban areas as defined by national statistical offices. It is calculated using World Bank population estimates and urban ratios from the United Nations World Urbanization Prospects. Aggregation of urban and rural population may not add up to total population because of different country coverages.,World Bank staff estimates based on the United Nations Population Division's World Urbanization Prospects: 2018 Revision.,Person,measuredValue,count,placeOfResidenceClassification,Urban,,,,,,,,WorldBankEstimate, -SP.DYN.IMRT.IN,,,"Mortality rate, infant (per 1,000 live births)","Infant mortality rate is the number of infants dying before reaching one year of age, per 1,000 live births in a given year.","Estimates developed by the UN Inter-agency Group for Child Mortality Estimation (UNICEF, WHO, World Bank, UN DESA Population Division) at www.childmortality.org.",MortalityEvent,measuredValue,count,age,Years0,,,,,Count_BirthEvent_LiveBirth,1000,,UnitedNationsIGMEEstimate, -SP.DYN.IMRT.MA.IN,,,"Mortality rate, infant, male (per 1,000 live births)","Infant mortality rate, male is the number of male infants dying before reaching one year of age, per 1,000 male live births in a given year.","Estimates developed by the UN Inter-agency Group for Child Mortality Estimation (UNICEF, WHO, World Bank, UN DESA Population Division) at www.childmortality.org.",MortalityEvent,measuredValue,count,age,Years0,gender,Male,,,Count_BirthEvent_LiveBirth_Male,1000,,UnitedNationsIGMEEstimate, -SP.DYN.IMRT.FE.IN,,,"Mortality rate, infant, female (per 1,000 live births)","Infant mortality rate, female is the number of female infants dying before reaching one year of age, per 1,000 female live births in a given year.","Estimates developed by the UN Inter-agency Group for Child Mortality Estimation (UNICEF, WHO, World Bank, UN DESA Population Division) at www.childmortality.org.",MortalityEvent,measuredValue,count,age,Years0,gender,Female,,,Count_BirthEvent_LiveBirth_Female,1000,,UnitedNationsIGMEEstimate, +SP.DYN.IMRT.IN,,,"Mortality rate, infant (per 1,000 live births)","Infant mortality rate is the number of infants dying before reaching one year of age, per 1,000 live births in a given year.","Estimates developed by the UN Inter-agency Group for Child Mortality Estimation (UNICEF, WHO, World Bank, UN DESA Population Division) at www.childmortality.org.",MortalityEvent,measuredValue,count,age,Years0,,,,,Count_BirthEvent_LiveBirth,,,UnitedNationsIGMEEstimate,Per1000LiveBirths +SP.DYN.IMRT.MA.IN,,,"Mortality rate, infant, male (per 1,000 live births)","Infant mortality rate, male is the number of male infants dying before reaching one year of age, per 1,000 male live births in a given year.","Estimates developed by the UN Inter-agency Group for Child Mortality Estimation (UNICEF, WHO, World Bank, UN DESA Population Division) at www.childmortality.org.",MortalityEvent,measuredValue,count,age,Years0,gender,Male,,,Count_BirthEvent_LiveBirth_Male,,,UnitedNationsIGMEEstimate,Per1000MaleLiveBirths +SP.DYN.IMRT.FE.IN,,,"Mortality rate, infant, female (per 1,000 live births)","Infant mortality rate, female is the number of female infants dying before reaching one year of age, per 1,000 female live births in a given year.","Estimates developed by the UN Inter-agency Group for Child Mortality Estimation (UNICEF, WHO, World Bank, UN DESA Population Division) at www.childmortality.org.",MortalityEvent,measuredValue,count,age,Years0,gender,Female,,,Count_BirthEvent_LiveBirth_Female,,,UnitedNationsIGMEEstimate,Per1000FemaleLiveBirths SH.DTH.IMRT,,,Number of infant deaths,Number of infants dying before reaching one year of age.,"Estimates developed by the UN Inter-agency Group for Child Mortality Estimation (UNICEF, WHO, World Bank, UN DESA Population Division) at www.childmortality.org.",MortalityEvent,measuredValue,count,age,Years0,,,,,,,,UnitedNationsIGMEEstimate, -SL.TLF.0714.ZS,,,"Children in employment, total (% of children ages 7-14)",Children in employment refer to children involved in economic activity for at least one hour in the reference week of the survey.,"Understanding Children's Work project based on data from ILO, UNICEF and the World Bank.",Person,measuredValue,count,age,Years7To14,employment,Employed,,,Count_Person_7To14Years,100,,, -SL.TLF.0714.MA.ZS,,,"Children in employment, male (% of male children ages 7-14)",Children in employment refer to children involved in economic activity for at least one hour in the reference week of the survey.,"Understanding Children's Work project based on data from ILO, UNICEF and the World Bank.",Person,measuredValue,count,age,Years7To14,employment,Employed,gender,Male,Count_Person_7To14Years_Male,100,,, -SL.TLF.0714.FE.ZS,,,"Children in employment, female (% of female children ages 7-14)",Children in employment refer to children involved in economic activity for at least one hour in the reference week of the survey.,"Understanding Children's Work project based on data from ILO, UNICEF and the World Bank.",Person,measuredValue,count,age,Years7To14,employment,Employed,gender,Female,Count_Person_7To14Years_Female,100,,, -SH.SVR.WAST.ZS,,,"Prevalence of severe wasting, weight for height (% of children under 5)",Prevalence of severe wasting is the proportion of children under age 5 whose weight for height is more than three standard deviations below the median for the international reference population ages 0-59.,"UNICEF, WHO, World Bank: Joint child malnutrition estimates (JME). Aggregation is based on UNICEF, WHO, and the World Bank harmonized dataset (adjusted, comparable data) and methodology.",Person,measuredValue,count,age,YearsUpto4,medicalCondition,SevereWasting,,,Count_Person_Upto4Years,100,,JointChildMalnutritionEstimate, -SH.SVR.WAST.MA.ZS,,,"Prevalence of severe wasting, weight for height, male (% of children under 5)","Prevalence of severe wasting, male, is the proportion of boys under age 5 whose weight for height is more than three standard deviations below the median for the international reference population ages 0-59.","UNICEF, WHO, World Bank: Joint child malnutrition estimates (JME). Aggregation is based on UNICEF, WHO, and the World Bank harmonized dataset (adjusted, comparable data) and methodology.",Person,measuredValue,count,age,YearsUpto4,medicalCondition,SevereWasting,gender,Male,Count_Person_Upto4Years_Male,100,,JointChildMalnutritionEstimate, -SH.SVR.WAST.FE.ZS,,,"Prevalence of severe wasting, weight for height, female (% of children under 5)","Prevalence of severe wasting, female, is the proportion of girls under age 5 whose weight for height is more than three standard deviations below the median for the international reference population ages 0-59.","UNICEF, WHO, World Bank: Joint child malnutrition estimates (JME). Aggregation is based on UNICEF, WHO, and the World Bank harmonized dataset (adjusted, comparable data) and methodology.",Person,measuredValue,count,age,YearsUpto4,medicalCondition,SevereWasting,gender,Female,Count_Person_Upto4Years_Female,100,,JointChildMalnutritionEstimate, -SH.STA.WAST.ZS,,,"Prevalence of wasting, weight for height (% of children under 5)",Prevalence of wasting is the proportion of children under age 5 whose weight for height is more than two standard deviations below the median for the international reference population ages 0-59.,"UNICEF, WHO, World Bank: Joint child malnutrition estimates (JME). Aggregation is based on UNICEF, WHO, and the World Bank harmonized dataset (adjusted, comparable data) and methodology.",Person,measuredValue,count,age,YearsUpto4,medicalCondition,Wasting,,,Count_Person_Upto4Years,100,,JointChildMalnutritionEstimate, -SH.STA.WAST.MA.ZS,,,"Prevalence of wasting, weight for height, male (% of children under 5)","Prevalence of wasting, male,is the proportion of boys under age 5 whose weight for height is more than two standard deviations below the median for the international reference population ages 0-59.","UNICEF, WHO, World Bank: Joint child malnutrition estimates (JME). Aggregation is based on UNICEF, WHO, and the World Bank harmonized dataset (adjusted, comparable data) and methodology.",Person,measuredValue,count,age,YearsUpto4,medicalCondition,Wasting,gender,Male,Count_Person_Upto4Years_Male,100,,JointChildMalnutritionEstimate, -SH.STA.WAST.FE.ZS,,,"Prevalence of wasting, weight for height, female (% of children under 5)","Prevalence of wasting, female, is the proportion of girls under age 5 whose weight for height is more than two standard deviations below the median for the international reference population ages 0-59.","UNICEF, WHO, World Bank: Joint child malnutrition estimates (JME). Aggregation is based on UNICEF, WHO, and the World Bank harmonized dataset (adjusted, comparable data) and methodology.",Person,measuredValue,count,age,YearsUpto4,medicalCondition,Wasting,gender,Female,Count_Person_Upto4Years_Female,100,,JointChildMalnutritionEstimate, +SL.TLF.0714.ZS,,,"Children in employment, total (% of children ages 7-14)",Children in employment refer to children involved in economic activity for at least one hour in the reference week of the survey.,"Understanding Children's Work project based on data from ILO, UNICEF and the World Bank.",Person,measuredValue,count,age,Years7To14,employment,Employed,,,Count_Person_7To14Years,100,,,Percent +SL.TLF.0714.MA.ZS,,,"Children in employment, male (% of male children ages 7-14)",Children in employment refer to children involved in economic activity for at least one hour in the reference week of the survey.,"Understanding Children's Work project based on data from ILO, UNICEF and the World Bank.",Person,measuredValue,count,age,Years7To14,employment,Employed,gender,Male,Count_Person_7To14Years_Male,100,,,Percent +SL.TLF.0714.FE.ZS,,,"Children in employment, female (% of female children ages 7-14)",Children in employment refer to children involved in economic activity for at least one hour in the reference week of the survey.,"Understanding Children's Work project based on data from ILO, UNICEF and the World Bank.",Person,measuredValue,count,age,Years7To14,employment,Employed,gender,Female,Count_Person_7To14Years_Female,100,,,Percent +SH.SVR.WAST.ZS,,,"Prevalence of severe wasting, weight for height (% of children under 5)",Prevalence of severe wasting is the proportion of children under age 5 whose weight for height is more than three standard deviations below the median for the international reference population ages 0-59.,"UNICEF, WHO, World Bank: Joint child malnutrition estimates (JME). Aggregation is based on UNICEF, WHO, and the World Bank harmonized dataset (adjusted, comparable data) and methodology.",Person,measuredValue,count,age,YearsUpto4,medicalCondition,SevereWasting,,,Count_Person_Upto4Years,100,,JointChildMalnutritionEstimate,Percent +SH.SVR.WAST.MA.ZS,,,"Prevalence of severe wasting, weight for height, male (% of children under 5)","Prevalence of severe wasting, male, is the proportion of boys under age 5 whose weight for height is more than three standard deviations below the median for the international reference population ages 0-59.","UNICEF, WHO, World Bank: Joint child malnutrition estimates (JME). Aggregation is based on UNICEF, WHO, and the World Bank harmonized dataset (adjusted, comparable data) and methodology.",Person,measuredValue,count,age,YearsUpto4,medicalCondition,SevereWasting,gender,Male,Count_Person_Upto4Years_Male,100,,JointChildMalnutritionEstimate,Percent +SH.SVR.WAST.FE.ZS,,,"Prevalence of severe wasting, weight for height, female (% of children under 5)","Prevalence of severe wasting, female, is the proportion of girls under age 5 whose weight for height is more than three standard deviations below the median for the international reference population ages 0-59.","UNICEF, WHO, World Bank: Joint child malnutrition estimates (JME). Aggregation is based on UNICEF, WHO, and the World Bank harmonized dataset (adjusted, comparable data) and methodology.",Person,measuredValue,count,age,YearsUpto4,medicalCondition,SevereWasting,gender,Female,Count_Person_Upto4Years_Female,100,,JointChildMalnutritionEstimate,Percent +SH.STA.WAST.ZS,,,"Prevalence of wasting, weight for height (% of children under 5)",Prevalence of wasting is the proportion of children under age 5 whose weight for height is more than two standard deviations below the median for the international reference population ages 0-59.,"UNICEF, WHO, World Bank: Joint child malnutrition estimates (JME). Aggregation is based on UNICEF, WHO, and the World Bank harmonized dataset (adjusted, comparable data) and methodology.",Person,measuredValue,count,age,YearsUpto4,medicalCondition,Wasting,,,Count_Person_Upto4Years,100,,JointChildMalnutritionEstimate,Percent +SH.STA.WAST.MA.ZS,,,"Prevalence of wasting, weight for height, male (% of children under 5)","Prevalence of wasting, male,is the proportion of boys under age 5 whose weight for height is more than two standard deviations below the median for the international reference population ages 0-59.","UNICEF, WHO, World Bank: Joint child malnutrition estimates (JME). Aggregation is based on UNICEF, WHO, and the World Bank harmonized dataset (adjusted, comparable data) and methodology.",Person,measuredValue,count,age,YearsUpto4,medicalCondition,Wasting,gender,Male,Count_Person_Upto4Years_Male,100,,JointChildMalnutritionEstimate,Percent +SH.STA.WAST.FE.ZS,,,"Prevalence of wasting, weight for height, female (% of children under 5)","Prevalence of wasting, female, is the proportion of girls under age 5 whose weight for height is more than two standard deviations below the median for the international reference population ages 0-59.","UNICEF, WHO, World Bank: Joint child malnutrition estimates (JME). Aggregation is based on UNICEF, WHO, and the World Bank harmonized dataset (adjusted, comparable data) and methodology.",Person,measuredValue,count,age,YearsUpto4,medicalCondition,Wasting,gender,Female,Count_Person_Upto4Years_Female,100,,JointChildMalnutritionEstimate,Percent SH.XPD.CHEX.PP.CD,,,"Current health expenditure per capita, PPP (current international $)",Current expenditures on health per capita expressed in international dollars at purchasing power parity (PPP time series based on ICP2011 PPP).,World Health Organization Global Health Expenditure database (http://apps.who.int/nha/database).,EconomicActivity,measuredValue,amount,activitySource,ExpenditureActivity,expenditureType,HealthcareExpenditure,,,Count_Person,,,,InternationalDollar SH.XPD.CHEX.PC.CD,,,Current health expenditure per capita (current US$),Current expenditures on health per capita in current US dollars. Estimates of current health expenditures include healthcare goods and services consumed during each year.,World Health Organization Global Health Expenditure database (http://apps.who.int/nha/database).,EconomicActivity,measuredValue,amount,activitySource,ExpenditureActivity,expenditureType,HealthcareExpenditure,,,Count_Person,,,,USDollar SH.ALC.PCAP.LI,,,"Total alcohol consumption per capita (liters of pure alcohol, projected estimates, 15+ years of age)","Total alcohol per capita consumption is defined as the total (sum of recorded and unrecorded alcohol) amount of alcohol consumed per person (15 years of age or older) over a calendar year, in litres of pure alcohol, adjusted for tourist consumption.","World Health Organization, Global Health Observatory Data Repository (http://apps.who.int/ghodata/).",Consumption,measuredValue,amount,consumedThing,Alcohol,consumerAge,Years15Onwards,,,Count_Person_15OrMoreYears,,,WorldHealthOrganizationEstimates,Liter SI.POV.GINI,,,GINI index (World Bank estimate),"Gini index measures the extent to which the distribution of income (or, in some cases, consumption expenditure) among individuals or households within an economy deviates from a perfectly equal distribution. A Lorenz curve plots the cumulative percentages of total income received against the cumulative number of recipients, starting with the poorest individual or household. The Gini index measures the area between the Lorenz curve and a hypothetical line of absolute equality, expressed as a percentage of the maximum area under the line. Thus a Gini index of 0 represents perfect equality, while an index of 100 implies perfect inequality.","World Bank, Development Research Group. Data are based on primary household survey data obtained from government statistical agencies and World Bank country departments. For more information and methodology, please see PovcalNet (http://iresearch.worldbank.org/PovcalNet/index.htm).",EconomicActivity,measuredValue,giniIndex,,,,,,,,,,WorldBankEstimate, -SE.XPD.TOTL.GB.ZS,,,"Government expenditure on education, total (% of government expenditure)","General government expenditure on education (current, capital, and transfers) is expressed as a percentage of total general government expenditure on all sectors (including health, education, social services, etc.). It includes expenditure funded by transfers from international sources to government. General government usually refers to local, regional and central governments.",UNESCO Institute for Statistics (http://uis.unesco.org/),EconomicActivity,measuredValue,amount,activitySource,ExpenditureActivity,expenditureType,EducationExpenditure,expensor,Government,Amount_EconomicActivity_ExpenditureActivity_Government,100,,, -SE.XPD.TOTL.GD.ZS,,,"Government expenditure on education, total (% of GDP)","General government expenditure on education (current, capital, and transfers) is expressed as a percentage of GDP. It includes expenditure funded by transfers from international sources to government. General government usually refers to local, regional and central governments.",UNESCO Institute for Statistics (http://uis.unesco.org/),EconomicActivity,measuredValue,amount,activitySource,ExpenditureActivity,expenditureType,EducationExpenditure,expensor,Government,Amount_EconomicActivity_GrossDomesticProduction_Nominal,100,,, +SE.XPD.TOTL.GB.ZS,,,"Government expenditure on education, total (% of government expenditure)","General government expenditure on education (current, capital, and transfers) is expressed as a percentage of total general government expenditure on all sectors (including health, education, social services, etc.). It includes expenditure funded by transfers from international sources to government. General government usually refers to local, regional and central governments.",UNESCO Institute for Statistics (http://uis.unesco.org/),EconomicActivity,measuredValue,amount,activitySource,ExpenditureActivity,expenditureType,EducationExpenditure,expensor,Government,Amount_EconomicActivity_ExpenditureActivity_Government,100,,,Percent +SE.XPD.TOTL.GD.ZS,,,"Government expenditure on education, total (% of GDP)","General government expenditure on education (current, capital, and transfers) is expressed as a percentage of GDP. It includes expenditure funded by transfers from international sources to government. General government usually refers to local, regional and central governments.",UNESCO Institute for Statistics (http://uis.unesco.org/),EconomicActivity,measuredValue,amount,activitySource,ExpenditureActivity,expenditureType,EducationExpenditure,expensor,Government,Amount_EconomicActivity_GrossDomesticProduction_Nominal,100,,,Percent MS.MIL.XPND.CD,,,Military expenditure (current USD),"Military expenditures data from SIPRI are derived from the NATO definition, which includes all current and capital expenditures on the armed forces, including peacekeeping forces; defense ministries and other government agencies engaged in defense projects; paramilitary forces, if these are judged to be trained and equipped for military operations; and military space activities. Such expenditures include military and civil personnel, including retirement pensions of military personnel and social services for personnel; operation and maintenance; procurement; military research and development; and military aid (in the military expenditures of the donor country). Excluded are civil defense and current expenditures for previous military activities, such as for veterans' benefits, demobilization, conversion, and destruction of weapons. This definition cannot be applied for all countries, however, since that would require much more detailed information than is available about what is included in military budgets and off-budget military expenditure items. (For example, military budgets might or might not cover civil defense, reserves and auxiliary forces, police and paramilitary forces, dual-purpose forces such as military and civilian police, military grants in kind, pensions for military personnel, and social security contributions paid by one part of government to another).","Stockholm International Peace Research Institute (SIPRI), Yearbook: Armaments, Disarmament and International Security.",EconomicActivity,measuredValue,amount,activitySource,ExpenditureActivity,expenditureType,MilitaryExpenditure,expensor,Government,,,,,USDollar -MS.MIL.XPND.GD.ZS,,,Military expenditure (% of GDP),"Military expenditures data from SIPRI are derived from the NATO definition, which includes all current and capital expenditures on the armed forces, including peacekeeping forces; defense ministries and other government agencies engaged in defense projects; paramilitary forces, if these are judged to be trained and equipped for military operations; and military space activities. Such expenditures include military and civil personnel, including retirement pensions of military personnel and social services for personnel; operation and maintenance; procurement; military research and development; and military aid (in the military expenditures of the donor country). Excluded are civil defense and current expenditures for previous military activities, such as for veterans' benefits, demobilization, conversion, and destruction of weapons. This definition cannot be applied for all countries, however, since that would require much more detailed information than is available about what is included in military budgets and off-budget military expenditure items. (For example, military budgets might or might not cover civil defense, reserves and auxiliary forces, police and paramilitary forces, dual-purpose forces such as military and civilian police, military grants in kind, pensions for military personnel, and social security contributions paid by one part of government to another.)","Stockholm International Peace Research Institute (SIPRI), Yearbook: Armaments, Disarmament and International Security.",EconomicActivity,measuredValue,amount,activitySource,ExpenditureActivity,expenditureType,MilitaryExpenditure,expensor,Government,Amount_EconomicActivity_GrossDomesticProduction_Nominal,100,,, -CM.MKT.LCAP.GD.ZS,,,Market capitalization of listed domestic companies (% of GDP),"Market capitalization (also known as market value) is the share price times the number of shares outstanding (including their several classes) for listed domestic companies. Investment funds, unit trusts, and companies whose only business goal is to hold shares of other listed companies are excluded. Data are end of year values.",World Federation of Exchanges database.,Stock,measuredValue,amount,,,,,,,Amount_EconomicActivity_GrossDomesticProduction_Nominal,100,,, +MS.MIL.XPND.GD.ZS,,,Military expenditure (% of GDP),"Military expenditures data from SIPRI are derived from the NATO definition, which includes all current and capital expenditures on the armed forces, including peacekeeping forces; defense ministries and other government agencies engaged in defense projects; paramilitary forces, if these are judged to be trained and equipped for military operations; and military space activities. Such expenditures include military and civil personnel, including retirement pensions of military personnel and social services for personnel; operation and maintenance; procurement; military research and development; and military aid (in the military expenditures of the donor country). Excluded are civil defense and current expenditures for previous military activities, such as for veterans' benefits, demobilization, conversion, and destruction of weapons. This definition cannot be applied for all countries, however, since that would require much more detailed information than is available about what is included in military budgets and off-budget military expenditure items. (For example, military budgets might or might not cover civil defense, reserves and auxiliary forces, police and paramilitary forces, dual-purpose forces such as military and civilian police, military grants in kind, pensions for military personnel, and social security contributions paid by one part of government to another.)","Stockholm International Peace Research Institute (SIPRI), Yearbook: Armaments, Disarmament and International Security.",EconomicActivity,measuredValue,amount,activitySource,ExpenditureActivity,expenditureType,MilitaryExpenditure,expensor,Government,Amount_EconomicActivity_GrossDomesticProduction_Nominal,100,,,Percent +CM.MKT.LCAP.GD.ZS,,,Market capitalization of listed domestic companies (% of GDP),"Market capitalization (also known as market value) is the share price times the number of shares outstanding (including their several classes) for listed domestic companies. Investment funds, unit trusts, and companies whose only business goal is to hold shares of other listed companies are excluded. Data are end of year values.",World Federation of Exchanges database.,Stock,measuredValue,amount,,,,,,,Amount_EconomicActivity_GrossDomesticProduction_Nominal,100,,,Percent CM.MKT.LCAP.CD,,,Market capitalization of listed domestic companies (current US$),"Market capitalization (also known as market value) is the share price times the number of shares outstanding (including their several classes) for listed domestic companies. Investment funds, unit trusts, and companies whose only business goal is to hold shares of other listed companies are excluded. Data are end of year values converted to U.S. dollars using corresponding year-end foreign exchange rates.",World Federation of Exchanges database.,Stock,measuredValue,amount,,,,,,,,,,,USDollar -BX.TRF.PWKR.DT.GD.ZS,,,"Personal remittances, received (% of GDP)","Personal remittances comprise personal transfers and compensation of employees. Personal transfers consist of all current transfers in cash or in kind made or received by resident households to or from nonresident households. Personal transfers thus include all current transfers between resident and nonresident individuals. Compensation of employees refers to the income of border, seasonal, and other short-term workers who are employed in an economy where they are not resident and of residents employed by nonresident entities. Data are the sum of two items defined in the sixth edition of the IMF's Balance of Payments Manual: personal transfers and compensation of employees.","World Bank staff estimates based on IMF balance of payments data, and World Bank and OECD GDP estimates.",Remittance,measuredValue,amount,transferType,InwardRemittance,,,,,Amount_EconomicActivity_GrossDomesticProduction_Nominal,100,,WorldBankEstimate, +BX.TRF.PWKR.DT.GD.ZS,,,"Personal remittances, received (% of GDP)","Personal remittances comprise personal transfers and compensation of employees. Personal transfers consist of all current transfers in cash or in kind made or received by resident households to or from nonresident households. Personal transfers thus include all current transfers between resident and nonresident individuals. Compensation of employees refers to the income of border, seasonal, and other short-term workers who are employed in an economy where they are not resident and of residents employed by nonresident entities. Data are the sum of two items defined in the sixth edition of the IMF's Balance of Payments Manual: personal transfers and compensation of employees.","World Bank staff estimates based on IMF balance of payments data, and World Bank and OECD GDP estimates.",Remittance,measuredValue,amount,transferType,InwardRemittance,,,,,Amount_EconomicActivity_GrossDomesticProduction_Nominal,100,,WorldBankEstimate,Percent BX.TRF.PWKR.CD.DT,,,"Personal remittances, received (current US$)","Personal remittances comprise personal transfers and compensation of employees. Personal transfers consist of all current transfers in cash or in kind made or received by resident households to or from nonresident households. Personal transfers thus include all current transfers between resident and nonresident individuals. Compensation of employees refers to the income of border, seasonal, and other short-term workers who are employed in an economy where they are not resident and of residents employed by nonresident entities. Data are the sum of two items defined in the sixth edition of the IMF's Balance of Payments Manual: personal transfers and compensation of employees. Data are in current U.S. dollars.",World Bank staff estimates based on IMF balance of payments data.,Remittance,measuredValue,amount,transferType,InwardRemittance,,,,,,,,WorldBankEstimate,USDollar BM.TRF.PWKR.CD.DT,,,"Personal remittances, paid (current US$)","Personal remittances comprise personal transfers and compensation of employees. Personal transfers consist of all current transfers in cash or in kind made or received by resident households to or from nonresident households. Personal transfers thus include all current transfers between resident and nonresident individuals. Compensation of employees refers to the income of border, seasonal, and other short-term workers who are employed in an economy where they are not resident and of residents employed by nonresident entities. Data are the sum of two items defined in the sixth edition of the IMF's Balance of Payments Manual: personal transfers and compensation of employees. Data are in current U.S. dollars.","World Bank staff estimates based on IMF balance of payments data, and World Bank and OECD GDP estimates.",Remittance,measuredValue,amount,transferType,OutwardRemittance,,,,,,,,WorldBankEstimate,USDollar -VC.IHR.PSRC.P5,,,"Intentional homicides (per 100,000 people)","Intentional homicides are estimates of unlawful homicides purposely inflicted as a result of domestic disputes, interpersonal violence, violent conflicts over land resources, intergang violence over turf or control, and predatory violence and killing by armed groups. Intentional homicide does not include all intentional killing; the difference is usually in the organization of the killing. Individuals or small groups usually commit homicide, whereas killing in armed conflict is usually committed by fairly cohesive groups of up to several hundred members and is thus usually excluded.",UN Office on Drugs and Crime's International Homicide Statistics database.,CriminalActivities,measuredValue,count,crimeType,MurderAndNonNegligentManslaughter,,,,,Count_Person,,100000,, -SH.DYN.MORT,,,"Mortality rate, under-5 (per 1,000 live births)","Under-five mortality rate is the probability per 1,000 that a newborn baby will die before reaching age five, if subject to age-specific mortality rates of the specified year.","Estimates Developed by the UN Inter-agency Group for Child Mortality Estimation (UNICEF, WHO, World Bank, UN DESA Population Division) at www.childmortality.org.",Person,measuredValue,mortalityRate,age,YearsUpto4,,,,,Count_BirthEvent_LiveBirth,,1000,, +VC.IHR.PSRC.P5,,,"Intentional homicides (per 100,000 people)","Intentional homicides are estimates of unlawful homicides purposely inflicted as a result of domestic disputes, interpersonal violence, violent conflicts over land resources, intergang violence over turf or control, and predatory violence and killing by armed groups. Intentional homicide does not include all intentional killing; the difference is usually in the organization of the killing. Individuals or small groups usually commit homicide, whereas killing in armed conflict is usually committed by fairly cohesive groups of up to several hundred members and is thus usually excluded.",UN Office on Drugs and Crime's International Homicide Statistics database.,CriminalActivities,measuredValue,count,crimeType,MurderAndNonNegligentManslaughter,,,,,Count_Person,,,,Per100000Persons +SH.DYN.MORT,,,"Mortality rate, under-5 (per 1,000 live births)","Under-five mortality rate is the probability per 1,000 that a newborn baby will die before reaching age five, if subject to age-specific mortality rates of the specified year.","Estimates Developed by the UN Inter-agency Group for Child Mortality Estimation (UNICEF, WHO, World Bank, UN DESA Population Division) at www.childmortality.org.",Person,measuredValue,mortalityRate,age,YearsUpto4,,,,,Count_BirthEvent_LiveBirth,,,,Per1000LiveBirths SH.PRV.SMOK,,,"Smoking prevalence, total (ages 15+)",Prevalence of smoking is the percentage of men and women ages 15 and over who currently smoke any tobacco product on a daily or non-daily basis. It excludes smokeless tobacco use. The rates are age-standardized.,"World Health Organization, Global Health Observatory Data Repository (http://apps.who.int/ghodata/).",Person,measuredValue,count,healthBehavior,Smoking,age,Years15Onwards,,,Count_Person_15OrMoreYears,,100,dcs:AgeAdjustedPrevalence, SH.PRV.SMOK.FE,,,"Smoking prevalence, females (% of adults)","Prevalence of smoking, female is the percentage of women ages 15 and over who currently smoke any tobacco product on a daily or non-daily basis. It excludes smokeless tobacco use. The rates are age-standardized.","World Health Organization, Global Health Observatory Data Repository (http://apps.who.int/ghodata/).",Person,measuredValue,count,healthBehavior,Smoking,age,Years15Onwards,gender,Female,Count_Person_15OrMoreYears_Female,,100,dcs:AgeAdjustedPrevalence, SH.PRV.SMOK.MA,,,"Smoking prevalence, males (% of adults)","Prevalence of smoking, male is the percentage of men ages 15 and over who currently smoke any tobacco product on a daily or non-daily basis. It excludes smokeless tobacco use. The rates are age-standardized.","World Health Organization, Global Health Observatory Data Repository (http://apps.who.int/ghodata/).",Person,measuredValue,count,healthBehavior,Smoking,age,Years15Onwards,gender,Male,Count_Person_15OrMoreYears_Male,,100,dcs:AgeAdjustedPrevalence, SH.STA.DIAB.ZS,,,Diabetes prevalence (% of population ages 20 to 79),Diabetes prevalence refers to the percentage of people ages 20-79 who have type 1 or type 2 diabetes.,"International Diabetes Federation, Diabetes Atlas.",Person,measuredValue,count,healthOutcome,Diabetes,age,Years20To79,,,Count_Person_20To79Years,,100,, -SP.DYN.CBRT.IN,,,"Birth rate, crude (per 1,000 people)","Crude birth rate indicates the number of live births occurring during the year, per 1,000 population estimated at midyear. Subtracting the crude death rate from the crude birth rate provides the rate of natural increase, which is equal to the rate of population change in the absence of migration.","(1) United Nations Population Division. World Population Prospects: 2019 Revision. (2) Census reports and other statistical publications from national statistical offices, (3) Eurostat: Demographic Statistics, (4) United Nations Statistical Division. Population and Vital Statistics Report (various years), (5) U.S. Census Bureau: International Database, and (6) Secretariat of the Pacific Community: Statistics and Demography Programme.",BirthEvent,measuredValue,count,medicalStatus,LiveBirth,,,,,Count_Person,,1000,, -SP.DYN.CDRT.IN,,Count_Death_AsAFractionOfCount_Person,"Death rate, crude (per 1,000 people)","Crude death rate indicates the number of deaths occurring during the year, per 1,000 population estimated at midyear. Subtracting the crude death rate from the crude birth rate provides the rate of natural increase, which is equal to the rate of population change in the absence of migration.","(1) United Nations Population Division. World Population Prospects: 2019 Revision. (2) Census reports and other statistical publications from national statistical offices, (3) Eurostat: Demographic Statistics, (4) United Nations Statistical Division. Population and Vital Statistics Report (various years), (5) U.S. Census Bureau: International Database, and (6) Secretariat of the Pacific Community: Statistics and Demography Programme.",MortalityEvent,measuredValue,count,,,,,,,Count_Person,,1000,dcs:WorldBankWeightedAverage, +SP.DYN.CBRT.IN,,,"Birth rate, crude (per 1,000 people)","Crude birth rate indicates the number of live births occurring during the year, per 1,000 population estimated at midyear. Subtracting the crude death rate from the crude birth rate provides the rate of natural increase, which is equal to the rate of population change in the absence of migration.","(1) United Nations Population Division. World Population Prospects: 2019 Revision. (2) Census reports and other statistical publications from national statistical offices, (3) Eurostat: Demographic Statistics, (4) United Nations Statistical Division. Population and Vital Statistics Report (various years), (5) U.S. Census Bureau: International Database, and (6) Secretariat of the Pacific Community: Statistics and Demography Programme.",BirthEvent,measuredValue,count,medicalStatus,LiveBirth,,,,,Count_Person,,,,Per1000Persons +SP.DYN.CDRT.IN,,Count_Death_AsAFractionOfCount_Person,"Death rate, crude (per 1,000 people)","Crude death rate indicates the number of deaths occurring during the year, per 1,000 population estimated at midyear. Subtracting the crude death rate from the crude birth rate provides the rate of natural increase, which is equal to the rate of population change in the absence of migration.","(1) United Nations Population Division. World Population Prospects: 2019 Revision. (2) Census reports and other statistical publications from national statistical offices, (3) Eurostat: Demographic Statistics, (4) United Nations Statistical Division. Population and Vital Statistics Report (various years), (5) U.S. Census Bureau: International Database, and (6) Secretariat of the Pacific Community: Statistics and Demography Programme.",MortalityEvent,measuredValue,count,,,,,,,Count_Person,,,dcs:WorldBankWeightedAverage,Per1000Persons SP.DYN.LE00.FE.IN,,,"Life expectancy at birth, female (years)",Life expectancy at birth indicates the number of years a newborn infant would live if prevailing patterns of mortality at the time of its birth were to stay the same throughout its life.,"(1) United Nations Population Division. World Population Prospects: 2019 Revision. (2) Census reports and other statistical publications from national statistical offices, (3) Eurostat: Demographic Statistics, (4) United Nations Statistical Division. Population and Vital Statistics Report (various years), (5) U.S. Census Bureau: International Database, and (6) Secretariat of the Pacific Community: Statistics and Demography Programme.",Person,measuredValue,lifeExpectancy,gender,Female,,,,,,,,,Year SP.DYN.LE00.MA.IN,,,"Life expectancy at birth, male (years)",Life expectancy at birth indicates the number of years a newborn infant would live if prevailing patterns of mortality at the time of its birth were to stay the same throughout its life.,"(1) United Nations Population Division. World Population Prospects: 2019 Revision. (2) Census reports and other statistical publications from national statistical offices, (3) Eurostat: Demographic Statistics, (4) United Nations Statistical Division. Population and Vital Statistics Report (various years), (5) U.S. Census Bureau: International Database, and (6) Secretariat of the Pacific Community: Statistics and Demography Programme.",Person,measuredValue,lifeExpectancy,gender,Male,,,,,,,,,Year EG.ELC.FOSL.ZS,,,"Electricity production from oil, gas and coal sources (% of total)","Sources of electricity refer to the inputs used to generate electricity. Oil refers to crude oil and petroleum products. Gas refers to natural gas but excludes natural gas liquids. Coal refers to all coal and brown coal, both primary (including hard coal and lignite-brown coal) and derived fuels (including patent fuel, coke oven coke, gas coke, coke oven gas, and blast furnace gas). Peat is also included in this category.","IEA Statistics OECD/IEA 2014 (http://www.iea.org/stats/index.asp), subject to https://www.iea.org/t&c/termsandconditions/",Production,measuredValue,amount,producedThing,ElectricityFromOilGasOrCoalSources,,,,,Amount_Production_Energy,,100,, @@ -68,4 +68,17 @@ EG.ELC.NUCL.ZS,,,Electricity production from nuclear sources (% of total),Source EG.FEC.RNEW.ZS,,,Renewable energy consumption (% of total final energy consumption),Renewable energy consumption is the share of renewables energy in total final energy consumption.,"World Bank, Sustainable Energy for All (SE4ALL) database from the SE4ALL Global Tracking Framework led jointly by the World Bank, International Energy Agency, and the Energy Sector Management Assistance Program.",Consumption,measuredValue,amount,consumedThing,RenewableEnergy,,,,,Amount_Consumption_Energy,,100,, EN.POP.EL5M.ZS,,,Population living in areas where elevation is below 5 meters (% of total population),Population below 5m is the percentage of the total population living in areas where the elevation is 5 meters or less.,"Center for International Earth Science Information Network (CIESIN)/Columbia University. 2013. Urban-Rural Population and Land Area Estimates Version 2. Palisades, NY: NASA Socioeconomic Data and Applications Center (SEDAC). http://sedac.ciesin.columbia.edu/data/set/lecz-urban-rural-population-land-area-estimates-v2.",Person,measuredValue,count,residenceCharacteristic,LessThan5MetersAboveSeaLevel,,,,,Count_Person,,100,, IT.CEL.SETS.P2,,,Mobile cellular subscriptions (per 100 people),"Mobile cellular telephone subscriptions are subscriptions to a public mobile telephone service that provide access to the PSTN using cellular technology. The indicator includes (and is split into) the number of postpaid subscriptions, and the number of active prepaid accounts (i.e. that have been used during the last three months). The indicator applies to all mobile cellular subscriptions that offer voice communications. It excludes subscriptions via data cards or USB modems, subscriptions to public mobile data services, private trunked mobile radio, telepoint, radio paging and telemetry services.","International Telecommunication Union, World Telecommunication/ICT Development Report and database.",Product,measuredValue,count,productType,MobileCellularSubscription,,,,,Count_Person,,100,, -SE.XPD.TERT.ZS,,,Expenditure on tertiary education (% of government expenditure on education),"Expenditure on tertiary education is expressed as a percentage of total general government expenditure on education. General government usually refers to local, regional and central governments.",UNESCO Institute for Statistics (http://uis.unesco.org/),EconomicActivity,measuredValue,amount,activitySource,ExpenditureActivity,expenditureType,TertiaryEducationExpenditure,remunerator,Government,Amount_EconomicActivity_ExpenditureActivity_EducationExpenditure_Government,,100,, \ No newline at end of file +SE.XPD.TERT.ZS,,,Expenditure on tertiary education (% of government expenditure on education),"Expenditure on tertiary education is expressed as a percentage of total general government expenditure on education. General government usually refers to local, regional and central governments.",UNESCO Institute for Statistics (http://uis.unesco.org/),EconomicActivity,measuredValue,amount,activitySource,ExpenditureActivity,expenditureType,TertiaryEducationExpenditure,remunerator,Government,Amount_EconomicActivity_ExpenditureActivity_EducationExpenditure_Government,,100,, +EG.USE.ELEC.KH.PC,,Amount_Consumption_Electricity_PerCapita,Electric power consumption (kWh per capita),,,Consumption,measuredValue,amount,consumedThing,Electricity,,,,,PerCapita,,,,KilowattHour +EG.USE.PCAP.KG.OE,,Amount_Consumption_Energy_PerCapita,Energy use (kg of oil equivalent per capita),,,Consumption,measuredValue,amount,consumedThing,Energy,,,,,PerCapita,,,,KilogramOfOilEquivalent +SP.DYN.TFRT.IN,,FertilityRate_Person_Female,"Fertility rate, total (births per woman)",,,Person,measuredValue,fertilityRate,gender,Female,,,,,,,,, +NY.GDP.MKTP.CD,,Amount_EconomicActivity_GrossDomesticProduction_Nominal,GDP (current US$),,,EconomicActivity,measuredValue,amount,activitySource,GrossDomesticProduction,measurementQualifier,Nominal,,,,,,,USDollar +NY.GDP.MKTP.KD.ZG,,GrowthRate_Amount_EconomicActivity_GrossDomesticProduction,GDP growth (annual %),,,EconomicActivity,growthRate,amount,activitySource,GrossDomesticProduction,,,,,,,,, +NY.GDP.PCAP.CD,,Amount_EconomicActivity_GrossDomesticProduction_Nominal_PerCapita,GDP per capita (current US$),,,EconomicActivity,measuredValue,amount,activitySource,GrossDomesticProduction,measurementQualifier,Nominal,,,PerCapita,,,,USDollar +NY.GNP.PCAP.PP.CD,,Amount_EconomicActivity_GrossNationalIncome_PurchasingPowerParity_PerCapita,"GNI per capita, PPP (current international $)",,,EconomicActivity,measuredValue,amount,activitySource,GrossNationalIncome,measurementQualifier,PurchasingPowerParity,,,PerCapita,,,,InternationalDollar +NY.GNP.MKTP.PP.CD,,Amount_EconomicActivity_GrossNationalIncome_PurchasingPowerParity,"GNI, PPP (current international $)",,,EconomicActivity,measuredValue,amount,activitySource,GrossNationalIncome,measurementQualifier,PurchasingPowerParity,,,,,,,InternationalDollar +IT.NET.USER.ZS,,Count_Person_IsInternetUser_PerCapita,Individuals using the Internet (% of population),,,Person,measuredValue,count,isInternetUser,TRUE,,,,,PerCapita,100,,, +SP.DYN.LE00.IN,,LifeExpectancy_Person,"Life expectancy at birth, total (years)",,,Person,measuredValue,lifeExpectancy,,,,,,,,,,,Year +SP.POP.GROW,,GrowthRate_Count_Person,Population growth (annual %),,,Person,growthRate,amount,,,,,,,,,,, +SP.POP.TOTL,,Count_Person,"Population, total",,,Person,measuredValue,count,,,,,,,,,,, +EN.GHG.CO2.PC.CE.AR5,,Amount_Emissions_CarbonDioxide_PerCapita,Carbon dioxide (CO2) emissions excluding LULUCF per capita (t CO2e/capita),,,Emissions,measuredValue,count,emittedThing,CarbonDioxide,,,,,PerCapita,,,,MetricTon diff --git a/scripts/world_bank/wdi/test_data/expected_ouput/expected_output.csv b/scripts/world_bank/wdi/test_data/expected_ouput/expected_output.csv new file mode 100644 index 0000000000..ec6035086d --- /dev/null +++ b/scripts/world_bank/wdi/test_data/expected_ouput/expected_output.csv @@ -0,0 +1,5 @@ +,StatisticalVariable,IndicatorCode,ISO3166Alpha3,Year,observationPeriod,Value0,Value1,Value2,Value3,unit,measurementMethod,scalingFactor +0,Amount_Consumption_RenewableEnergy_AsFractionOf_Amount_Consumption_Energy,EG.FEC.RNEW.ZS,dcid:country/MKD,2001,P1Y,0.152,,,,,, +1,LifeExpectancy_Person_Male,SP.DYN.LE00.MA.IN,dcid:country/UZB,1973,P1Y,58.621,,,,Year,, +2,Count_CriminalActivities_MurderAndNonNegligentManslaughter_AsFractionOf_Count_Person,VC.IHR.PSRC.P5,dcid:country/AFG,2009,P1Y,4.0715263102,,,,Per100000Persons,, +3,Count_CriminalActivities_MurderAndNonNegligentManslaughter_AsFractionOf_Count_Person,VC.IHR.PSRC.P5,dcid:country/AFG,2009,P1Y,4.0715263102,,,,Per100000Persons,, diff --git a/scripts/world_bank/wdi/test_data/expected_ouput/expected_output.tmcf b/scripts/world_bank/wdi/test_data/expected_ouput/expected_output.tmcf new file mode 100644 index 0000000000..ddecc67e34 --- /dev/null +++ b/scripts/world_bank/wdi/test_data/expected_ouput/expected_output.tmcf @@ -0,0 +1,40 @@ +Node: E:WorldBank->E0 +typeOf: dcs:StatVarObservation +variableMeasured: C:WorldBank->StatisticalVariable +observationDate: C:WorldBank->Year +observationPeriod: C:WorldBank->observationPeriod +observationAbout: C:WorldBank->ISO3166Alpha3 +value: C:WorldBank->Value0 +unit: C:WorldBank->unit + +Node: E:WorldBank->E1 +typeOf: dcs:StatVarObservation +variableMeasured: C:WorldBank->StatisticalVariable +observationDate: C:WorldBank->Year +observationPeriod: C:WorldBank->observationPeriod +observationAbout: C:WorldBank->ISO3166Alpha3 +value: C:WorldBank->Value1 +unit: C:WorldBank->unit +scalingFactor: C:WorldBank->scalingFactor + +Node: E:WorldBank->E2 +typeOf: dcs:StatVarObservation +variableMeasured: C:WorldBank->StatisticalVariable +observationDate: C:WorldBank->Year +observationPeriod: C:WorldBank->observationPeriod +observationAbout: C:WorldBank->ISO3166Alpha3 +value: C:WorldBank->Value2 +unit: C:WorldBank->unit +measurementMethod: C:WorldBank->measurementMethod + +Node: E:WorldBank->E3 +typeOf: dcs:StatVarObservation +variableMeasured: C:WorldBank->StatisticalVariable +observationDate: C:WorldBank->Year +observationPeriod: C:WorldBank->observationPeriod +observationAbout: C:WorldBank->ISO3166Alpha3 +value: C:WorldBank->Value3 +unit: C:WorldBank->unit +measurementMethod: C:WorldBank->measurementMethod +scalingFactor: C:WorldBank->scalingFactor + diff --git a/scripts/world_bank/wdi/worldbank.py b/scripts/world_bank/wdi/worldbank.py index ec85bf8150..ccef05d584 100644 --- a/scripts/world_bank/wdi/worldbank.py +++ b/scripts/world_bank/wdi/worldbank.py @@ -15,11 +15,6 @@ indicator codes provided by the indicatorSchemaFile flag for all years and for all countries provided in WorldBankCountries.csv. """ -from absl import app -from absl import flags -import pandas as pd -from retry.api import retry_call - import logging import itertools import requests @@ -27,12 +22,23 @@ import io import time import re +import os +import sys + +from absl import app +from absl import flags +from absl import logging +import pandas as pd +from retry.api import retry_call -FLAGS = flags.FLAGS -flags.DEFINE_boolean("fetchFromSource", False, +_MODULE_DIR = os.path.dirname(os.path.abspath(__file__)) +_FLAGS = flags.FLAGS +flags.DEFINE_boolean("fetchFromSource", True, "Whether to bypass cached CSVs and fetch from source.") -flags.DEFINE_string("indicatorSchemaFile", None, - "Path to indicator schema CSV file.") +flags.DEFINE_string( + "indicatorSchemaFile", + os.path.join(_MODULE_DIR, "schema_csvs/WorldBankIndicators_prod.csv"), "") +flags.DEFINE_string('mode', '', 'Options: download or process') # Remaps the columns provided by World Bank API. WORLDBANK_COL_REMAP = { @@ -46,9 +52,10 @@ typeOf: dcs:StatVarObservation variableMeasured: C:WorldBank->StatisticalVariable observationDate: C:WorldBank->Year -observationPeriod: "P1Y" +observationPeriod: C:WorldBank->observationPeriod observationAbout: C:WorldBank->ISO3166Alpha3 value: C:WorldBank->Value{idx} +unit: C:WorldBank->unit """ TEMPLATE_STAT_VAR = """ @@ -63,8 +70,147 @@ {CONSTRAINTS} """ +RESOLUTION_TO_EXISTING_DCID = { + 'dcs:WorldBank/SE_TER_CUAT_BA_FE_ZS': + 'Count_Person_25OrMoreYears_Female_BachelorsDegreeOrHigher_AsFractionOf_Count_Person_25OrMoreYears_Female', + 'dcs:WorldBank/SE_TER_CUAT_BA_MA_ZS': + 'Count_Person_25OrMoreYears_Male_BachelorsDegreeOrHigher_AsFractionOf_Count_Person_25OrMoreYears_Male', + 'dcs:WorldBank/SE_TER_CUAT_BA_ZS': + 'Count_Person_25OrMoreYears_BachelorsDegreeOrHigher_AsFractionOf_Count_Person_25OrMoreYears', + 'dcs:WorldBank/SE_TER_CUAT_DO_FE_ZS': + 'Count_Person_25OrMoreYears_Female_DoctorateDegree_AsFractionOf_Count_Person_25OrMoreYears_Female', + 'dcs:WorldBank/SE_TER_CUAT_DO_MA_ZS': + 'Count_Person_25OrMoreYears_Male_DoctorateDegree_AsFractionOf_Count_Person_25OrMoreYears_Male', + 'dcs:WorldBank/SE_TER_CUAT_DO_ZS': + 'Count_Person_25OrMoreYears_DoctorateDegree_AsFractionOf_Count_Person_25OrMoreYears', + 'dcs:WorldBank/SE_TER_CUAT_MS_FE_ZS': + 'Count_Person_25OrMoreYears_Female_MastersDegreeOrHigher_AsFractionOf_Count_Person_25OrMoreYears_Female', + 'dcs:WorldBank/SE_TER_CUAT_MS_MA_ZS': + 'Count_Person_25OrMoreYears_Male_MastersDegreeOrHigher_AsFractionOf_Count_Person_25OrMoreYears_Male', + 'dcs:WorldBank/SE_TER_CUAT_MS_ZS': + 'Count_Person_25OrMoreYears_MastersDegreeOrHigher_AsFractionOf_Count_Person_25OrMoreYears', + 'dcs:WorldBank/SE_TER_CUAT_ST_FE_ZS': + 'Count_Person_25OrMoreYears_Female_TertiaryEducation_AsFractionOf_Count_Person_25OrMoreYears_Female', + 'dcs:WorldBank/SE_TER_CUAT_ST_MA_ZS': + 'Count_Person_25OrMoreYears_Male_TertiaryEducation_AsFractionOf_Count_Person_25OrMoreYears_Male', + 'dcs:WorldBank/SE_TER_CUAT_ST_ZS': + 'Count_Person_25OrMoreYears_TertiaryEducation_AsFractionOf_Count_Person_25OrMoreYears', + 'dcs:WorldBank/SH_STA_OWGH_FE_ZS': + 'Count_Person_Upto4Years_Female_Overweight_AsFractionOf_Count_Person_Upto4Years_Female', + 'dcs:WorldBank/SH_STA_OWGH_MA_ZS': + 'Count_Person_Upto4Years_Male_Overweight_AsFractionOf_Count_Person_Upto4Years_Male', + 'dcs:WorldBank/SH_STA_OWGH_ZS': + 'Count_Person_Upto4Years_Overweight_AsFractionOf_Count_Person_Upto4Years', + 'dcs:WorldBank/SH_STA_SUIC_FE_P5': + 'Count_Death_IntentionalSelfHarm_Female_AsFractionOf_Count_Person_Female', + 'dcs:WorldBank/SH_STA_SUIC_MA_P5': + 'Count_Death_IntentionalSelfHarm_Male_AsFractionOf_Count_Person_Male', + 'dcs:WorldBank/SH_STA_SUIC_P5': + 'Count_Death_IntentionalSelfHarm_AsFractionOf_Count_Person', + 'dcs:WorldBank/SL_TLF_ACTI_FE_ZS': + 'Count_Person_15To64Years_Female_InLaborForce_AsFractionOf_Count_Person_15To64Years_Female', + 'dcs:WorldBank/SL_TLF_ACTI_MA_ZS': + 'Count_Person_15To64Years_Male_InLaborForce_AsFractionOf_Count_Person_15To64Years_Male', + 'dcs:WorldBank/SL_TLF_ACTI_ZS': + 'Count_Person_15To64Years_InLaborForce_AsFractionOf_Count_Person_15To64Years', + 'dcs:WorldBank/SL_TLF_TOTL_FE_ZS': + 'Count_Person_15OrMoreYears_InLaborForce_Female_AsFractionOf_Count_Person_InLaborForce', + 'dcs:WorldBank/VC_IHR_PSRC_FE_P5': + 'Count_CriminalActivities_MurderAndNonNegligentManslaughter_Female_AsFractionOf_Count_Person_Female', + 'dcs:WorldBank/VC_IHR_PSRC_MA_P5': + 'Count_CriminalActivities_MurderAndNonNegligentManslaughter_Male_AsFractionOf_Count_Person_Male', + 'dcs:WorldBank/VC_IHR_PSRC_P5': + 'Count_CriminalActivities_MurderAndNonNegligentManslaughter_AsFractionOf_Count_Person', + 'dcs:WorldBank/SP_RUR_TOTL': + 'Count_Person_Rural', + 'dcs:WorldBank/SP_URB_TOTL': + 'Count_Person_Urban', + 'dcs:WorldBank/SP_DYN_IMRT_IN': + 'Count_Death_0Years_AsFractionOf_Count_BirthEvent_LiveBirth', + 'dcs:WorldBank/SP_DYN_IMRT_MA_IN': + 'Count_Death_0Years_Male_AsFractionOf_Count_BirthEvent_LiveBirth_Male', + 'dcs:WorldBank/SP_DYN_IMRT_FE_IN': + 'Count_Death_0Years_Female_AsFractionOf_Count_BirthEvent_LiveBirth_Female', + 'dcs:WorldBank/SH_DTH_IMRT': + 'Count_Death_0Years', + 'dcs:WorldBank/SL_TLF_0714_ZS': + 'Count_Person_7To14Years_Employed_AsFractionOf_Count_Person_7To14Years', + 'dcs:WorldBank/SL_TLF_0714_MA_ZS': + 'Count_Person_7To14Years_Male_Employed_AsFractionOf_Count_Person_7To14Years_Male', + 'dcs:WorldBank/SL_TLF_0714_FE_ZS': + 'Count_Person_7To14Years_Female_Employed_AsFractionOf_Count_Person_7To14Years_Female', + 'dcs:WorldBank/SH_SVR_WAST_ZS': + 'Count_Person_Upto4Years_SevereWasting_AsFractionOf_Count_Person_Upto4Years', + 'dcs:WorldBank/SH_SVR_WAST_MA_ZS': + 'Count_Person_Upto4Years_Male_SevereWasting_AsFractionOf_Count_Person_Upto4Years_Male', + 'dcs:WorldBank/SH_SVR_WAST_FE_ZS': + 'Count_Person_Upto4Years_Female_SevereWasting_AsFractionOf_Count_Person_Upto4Years_Female', + 'dcs:WorldBank/SH_STA_WAST_ZS': + 'Count_Person_Upto4Years_Wasting_AsFractionOf_Count_Person_Upto4Years', + 'dcs:WorldBank/SH_STA_WAST_MA_ZS': + 'Count_Person_Upto4Years_Male_Wasting_AsFractionOf_Count_Person_Upto4Years_Male', + 'dcs:WorldBank/SH_STA_WAST_FE_ZS': + 'Count_Person_Upto4Years_Female_Wasting_AsFractionOf_Count_Person_Upto4Years_Female', + 'dcs:WorldBank/SH_XPD_CHEX_PC_CD': + 'Amount_EconomicActivity_ExpenditureActivity_HealthcareExpenditure_AsFractionOf_Count_Person', + 'dcs:WorldBank/SH_ALC_PCAP_LI': + 'Amount_Consumption_Alcohol_15OrMoreYears_AsFractionOf_Count_Person_15OrMoreYears', + 'dcs:WorldBank/SI_POV_GINI': + 'GiniIndex_EconomicActivity', + 'dcs:WorldBank/SE_XPD_TOTL_GB_ZS': + 'Amount_EconomicActivity_ExpenditureActivity_EducationExpenditure_Government_AsFractionOf_Amount_EconomicActivity_ExpenditureActivity_Government', + 'dcs:WorldBank/SE_XPD_TOTL_GD_ZS': + 'Amount_EconomicActivity_ExpenditureActivity_EducationExpenditure_Government_AsFractionOf_Amount_EconomicActivity_GrossDomesticProduction_Nominal', + 'dcs:WorldBank/MS_MIL_XPND_CD': + 'Amount_EconomicActivity_ExpenditureActivity_MilitaryExpenditure_Government', + 'dcs:WorldBank/MS_MIL_XPND_GD_ZS': + 'Amount_EconomicActivity_ExpenditureActivity_MilitaryExpenditure_Government_AsFractionOf_Amount_EconomicActivity_GrossDomesticProduction_Nominal', + 'dcs:WorldBank/CM_MKT_LCAP_GD_ZS': + 'Amount_Stock_AsFractionOf_Amount_EconomicActivity_GrossDomesticProduction_Nominal', + 'dcs:WorldBank/CM_MKT_LCAP_CD': + 'Amount_Stock', + 'dcs:WorldBank/BX_TRF_PWKR_DT_GD_ZS': + 'Amount_Remittance_InwardRemittance_AsFractionOf_Amount_EconomicActivity_GrossDomesticProduction_Nominal', + 'dcs:WorldBank/BX_TRF_PWKR_CD_DT': + 'Amount_Remittance_InwardRemittance', + 'dcs:WorldBank/BM_TRF_PWKR_CD_DT': + 'Amount_Remittance_OutwardRemittance', + 'dcs:WorldBank/SH_DYN_MORT': + 'MortalityRate_Person_Upto4Years_AsFractionOf_Count_BirthEvent_LiveBirth', + 'dcs:WorldBank/SH_PRV_SMOK': + 'Count_Person_15OrMoreYears_Smoking_AsFractionOf_Count_Person_15OrMoreYears', + 'dcs:WorldBank/SH_PRV_SMOK_FE': + 'Count_Person_15OrMoreYears_Female_Smoking_AsFractionOf_Count_Person_15OrMoreYears_Female', + 'dcs:WorldBank/SH_PRV_SMOK_MA': + 'Count_Person_15OrMoreYears_Male_Smoking_AsFractionOf_Count_Person_15OrMoreYears_Male', + 'dcs:WorldBank/SH_STA_DIAB_ZS': + 'Count_Person_20To79Years_Diabetes_AsFractionOf_Count_Person_20To79Years', + 'dcs:WorldBank/SP_DYN_CBRT_IN': + 'Count_BirthEvent_LiveBirth_AsFractionOf_Count_Person', + 'dcs:WorldBank/SP_DYN_LE00_FE_IN': + 'LifeExpectancy_Person_Female', + 'dcs:WorldBank/SP_DYN_LE00_MA_IN': + 'LifeExpectancy_Person_Male', + 'dcs:WorldBank/EG_ELC_FOSL_ZS': + 'Amount_Production_ElectricityFromOilGasOrCoalSources_AsFractionOf_Amount_Production_Energy', + 'dcs:WorldBank/EG_ELC_NUCL_ZS': + 'Amount_Production_ElectricityFromNuclearSources_AsFractionOf_Amount_Production_Energy', + 'dcs:WorldBank/EG_FEC_RNEW_ZS': + 'Amount_Consumption_RenewableEnergy_AsFractionOf_Amount_Consumption_Energy', + 'dcs:WorldBank/EN_POP_EL5M_ZS': + 'Count_Person_ResidingLessThan5MetersAboveSeaLevel_AsFractionOf_Count_Person', + 'dcs:WorldBank/IT_CEL_SETS_P2': + 'Count_Product_MobileCellularSubscription_AsFractionOf_Count_Person', + 'dcs:WorldBank/SE_XPD_TERT_ZS': + 'Amount_EconomicActivity_ExpenditureActivity_TertiaryEducationExpenditure_Government_AsFractionOf_Amount_EconomicActivity_ExpenditureActivity_EducationExpenditure_Government', + 'dcs:WorldBank/SH_XPD_CHEX_PP_CD': + 'Amount_EconomicActivity_ExpenditureActivity_HealthcareExpenditure_AsFractionOf_Count_Person', + 'dcs:WorldBank/SH_XPD_CHEX_PC_CD': + 'Amount_EconomicActivity_ExpenditureActivity_HealthcareExpenditure_AsFractionOf_Count_Person' +} + -def read_worldbank(iso3166alpha3, fetchFromSource): +def read_worldbank(iso3166alpha3, mode): """ Fetches and tidies all ~1500 World Bank indicators for a given ISO 3166 alpha 3 code. @@ -85,8 +231,8 @@ def read_worldbank(iso3166alpha3, fetchFromSource): Takes approximately 10 seconds to download and tidy one country in a Jupyter notebook. """ - if fetchFromSource: - logging.info('Downloading %s', iso3166alpha3) + if mode in ["download", '']: + logging.info('Downloading input file for country %s', iso3166alpha3) country_zip = ("http://api.worldbank.org/v2/en/country/" + iso3166alpha3 + "?downloadformat=csv") r = retry_call(requests.get, @@ -95,7 +241,13 @@ def read_worldbank(iso3166alpha3, fetchFromSource): delay=20, backoff=1.5) if r.status_code != 200: - logging.info('Failed to retrieve %s', iso3166alpha3) + logging.fatal('Failed to retrieve %s', iso3166alpha3) + if not os.path.exists(os.path.join(_MODULE_DIR, 'source_data')): + os.mkdir(os.path.join(_MODULE_DIR, 'source_data')) + with open( + os.path.join(_MODULE_DIR, 'source_data', + iso3166alpha3 + '.zip'), 'wb') as f: + f.write(r.content) filebytes = io.BytesIO(r.content) myzipfile = zipfile.ZipFile(filebytes) @@ -124,8 +276,9 @@ def read_worldbank(iso3166alpha3, fetchFromSource): if df is None: df = pd.DataFrame(columns=cols) else: - df = df.append(pd.DataFrame([cols], columns=df.columns), - ignore_index=True) + df = pd.concat( + [df, pd.DataFrame([cols], columns=df.columns)], + ignore_index=True) df = df.rename(columns=WORLDBANK_COL_REMAP) @@ -140,6 +293,9 @@ def read_worldbank(iso3166alpha3, fetchFromSource): # Convert to numeric and drop empty values. df['Value'] = pd.to_numeric(df['Value']) df = df.dropna() + if not os.path.exists( + os.path.join(_MODULE_DIR, 'preprocessed_source_csv')): + os.mkdir(os.path.join(_MODULE_DIR, 'preprocessed_source_csv')) df.to_csv('preprocessed_source_csv/' + iso3166alpha3 + '.csv', index=False) else: @@ -207,8 +363,7 @@ def group_stat_vars_by_observation_properties(indicator_codes): """ # All the statistical observation properties that we included. properties_of_stat_var_observation = ([ - 'measurementMethod', 'measurementDenominator', 'scalingFactor', - 'sourceScalingFactor', 'unit' + 'measurementMethod', 'scalingFactor' ]) # List of tuples to return. tmcfs_for_stat_vars = [] @@ -221,7 +376,7 @@ def group_stat_vars_by_observation_properties(indicator_codes): repeat=len(properties_of_stat_var_observation))): codes_that_match = null_status.copy() base_template_mcf = TEMPLATE_TMCF - cols_to_include_in_csv = ['IndicatorCode'] + cols_to_include_in_csv = ['IndicatorCode', 'unit'] # Loop over each obs column and whether to include it. for include_col, column in (zip(permutation, @@ -241,8 +396,7 @@ def group_stat_vars_by_observation_properties(indicator_codes): return tmcfs_for_stat_vars -def download_indicator_data(worldbank_countries, indicator_codes, - fetchFromSource): +def download_indicator_data(worldbank_countries, indicator_codes, mode): """ Downloads World Bank country data for all countries and indicators provided. @@ -261,8 +415,9 @@ def download_indicator_data(worldbank_countries, indicator_codes, worldbank_dataframe = pd.DataFrame() indicators_to_keep = list(indicator_codes['IndicatorCode'].unique()) + country_df_list = [] for index, country_code in enumerate(worldbank_countries['ISO3166Alpha3']): - country_df = read_worldbank(country_code, fetchFromSource) + country_df = read_worldbank(country_code, mode) # Remove unneccessary indicators. country_df = country_df[country_df['IndicatorCode'].isin( @@ -272,8 +427,9 @@ def download_indicator_data(worldbank_countries, indicator_codes, country_df['ISO3166Alpha3'] = country_code # Add new row to main datframe. - worldbank_dataframe = worldbank_dataframe.append(country_df) + country_df_list.append(country_df) + worldbank_dataframe = pd.concat(country_df_list) # Map indicator codes to unique Statistical Variable. worldbank_dataframe['StatisticalVariable'] = ( worldbank_dataframe['IndicatorCode'].apply( @@ -281,8 +437,10 @@ def download_indicator_data(worldbank_countries, indicator_codes, return worldbank_dataframe.rename({'year': 'Year'}, axis=1) -def output_csv_and_tmcf_by_grouping(worldbank_dataframe, tmcfs_for_stat_vars, - indicator_codes): +def output_csv_and_tmcf_by_grouping(worldbank_dataframe, + tmcfs_for_stat_vars, + indicator_codes, + saveOutput=True): """ Outputs TMCFs and CSVs for each grouping of stat vars. Args: @@ -294,44 +452,59 @@ def output_csv_and_tmcf_by_grouping(worldbank_dataframe, tmcfs_for_stat_vars, indicator_codes -> Dataframe with INDICATOR_CODES to include. """ # Only include a subset of columns in the final csv - output_csv = worldbank_dataframe[[ - 'StatisticalVariable', 'IndicatorCode', 'ISO3166Alpha3', 'Year', 'Value' - ]] - - # Output tmcf and csv for each unique World Bank grouping. - df = pd.DataFrame(columns=[ - 'StatisticalVariable', - 'IndicatorCode', - 'ISO3166Alpha3', - 'Year', - ]) - with open('output/WorldBank.tmcf', 'w', newline='') as f_out: - for index, enum in enumerate(tmcfs_for_stat_vars): - tmcf, stat_var_obs_cols, stat_vars_in_group = enum - if len(stat_vars_in_group) == 0: - continue - f_out.write(tmcf.format_map({'idx': index}) + '\n') - - # Get only the indicator codes in that grouping. - matching_csv = output_csv[output_csv['IndicatorCode'].isin( - stat_vars_in_group)] - - # Format to decimals. - matching_csv = matching_csv.round(10) - df = df.merge( - matching_csv.rename(columns={'Value': f"Value{index}"}), - how='outer', - on=[ - 'StatisticalVariable', - 'IndicatorCode', - 'ISO3166Alpha3', - 'Year', - ]) - # Include the Stat Observation columns in the output CSV. - df = df.merge(indicator_codes[stat_var_obs_cols], on='IndicatorCode') - df.drop('IndicatorCode', axis=1).to_csv('output/WorldBank.csv', - float_format='%.10f', - index=False) + try: + output_csv = worldbank_dataframe[[ + 'StatisticalVariable', 'IndicatorCode', 'ISO3166Alpha3', 'Year', + 'Value', 'observationPeriod' + ]] + + # Output tmcf and csv for each unique World Bank grouping. + df = pd.DataFrame(columns=[ + 'StatisticalVariable', 'IndicatorCode', 'ISO3166Alpha3', 'Year', + 'observationPeriod' + ]) + if saveOutput: + TMCF_PATH = 'output/WorldBank.tmcf' + else: + TMCF_PATH = 'test_data/output/output_generated.tmcf' + with open(TMCF_PATH, 'w', newline='') as f_out: + for index, enum in enumerate(tmcfs_for_stat_vars): + tmcf, stat_var_obs_cols, stat_vars_in_group = enum + if len(stat_vars_in_group) == 0: + continue + f_out.write(tmcf.format_map({'idx': index}) + '\n') + + # Get only the indicator codes in that grouping. + matching_csv = output_csv[output_csv['IndicatorCode'].isin( + stat_vars_in_group)] + + # Format to decimals. + matching_csv = matching_csv.round(10) + df = df.merge( + matching_csv.rename(columns={'Value': f"Value{index}"}), + how='outer', + on=[ + 'StatisticalVariable', + 'IndicatorCode', + 'ISO3166Alpha3', + 'Year', + 'observationPeriod', + ]) + # Include the Stat Observation columns in the output CSV. + df = df.merge(indicator_codes[stat_var_obs_cols], on='IndicatorCode') + + # Coverting dcid to existing dcid + df['StatisticalVariable'] = df['StatisticalVariable'].astype(str) + df = df.replace({'StatisticalVariable': RESOLUTION_TO_EXISTING_DCID}) + if saveOutput: + logging.info("Writing output csv") + df.drop('IndicatorCode', axis=1).to_csv('output/WorldBank.csv', + float_format='%.10f', + index=False) + else: + return df + except Exception as e: + logging.fatal(f"Error generating output {e}") def source_scaling_remap(row, scaling_factor_lookup, existing_stat_var_lookup): @@ -361,72 +534,85 @@ def source_scaling_remap(row, scaling_factor_lookup, existing_stat_var_lookup): return row -def main(_): - # Load statistical variable configuration file. - indicator_codes = pd.read_csv(FLAGS.indicatorSchemaFile) +def process(indicator_codes, worldbank_dataframe, saveOutput=True): + logging.info("Processing the input files") + try: + # Add source description to note. + def add_source_to_description(row): + if not pd.isna(row['Source']): + return row['SourceNote'] + " " + str(row['Source']) + else: + return row['SourceNote'] + + indicator_codes['SourceNote'] = indicator_codes.apply( + add_source_to_description, axis=1) + + # Generate stat vars + with open("output/WorldBank_StatisticalVariables.mcf", "w+") as f_out: + # Generate StatVars for fields that don't exist. Some fields such as + # Count_Person_Unemployed are already statistical variables so we do + # not need to recreate them. + for _, row in indicator_codes[ + indicator_codes['ExistingStatVar'].isna()].iterrows(): + f_out.write(build_stat_vars_from_indicator_list(row)) + + # Create template MCFs for each grouping of stat vars. + tmcfs_for_stat_vars = ( + group_stat_vars_by_observation_properties(indicator_codes)) + + # Remap columns to match expected format. + worldbank_dataframe['Value'] = pd.to_numeric( + worldbank_dataframe['Value']) + worldbank_dataframe['ISO3166Alpha3'] = ( + worldbank_dataframe['ISO3166Alpha3'].apply( + lambda code: "dcid:Earth" + if code == "WLD" else "dcid:country/" + code)) + worldbank_dataframe['StatisticalVariable'] = \ + worldbank_dataframe['StatisticalVariable'].apply( + lambda code: "dcs:" + code) + + # Scale values by scaling factor and replace exisiting StatVars. + scaling_factor_lookup = (indicator_codes.set_index('IndicatorCode') + ['sourceScalingFactor'].dropna().to_dict()) + existing_stat_var_lookup = (indicator_codes.set_index('IndicatorCode') + ['ExistingStatVar'].dropna().to_dict()) + worldbank_dataframe = worldbank_dataframe.apply( + lambda row: source_scaling_remap(row, scaling_factor_lookup, + existing_stat_var_lookup), + axis=1) + + # Convert integer columns. + int_cols = (list(indicator_codes[indicator_codes['ConvertToInt'] == + True]['IndicatorCode'].unique())) + worldbank_subset = worldbank_dataframe[ + worldbank_dataframe['IndicatorCode'].isin(int_cols)].index + worldbank_dataframe.loc[worldbank_subset, "Value"] = (pd.to_numeric( + worldbank_dataframe.loc[worldbank_subset, "Value"], + downcast="integer")) + worldbank_dataframe['observationPeriod'] = worldbank_dataframe[ + 'StatisticalVariable'].apply(lambda x: '' if x in [ + 'dcid:FertilityRate_Person_Female', 'dcid:LifeExpectancy_Person' + ] else 'P1Y') + # Output final CSVs and variables. + df = output_csv_and_tmcf_by_grouping(worldbank_dataframe, + tmcfs_for_stat_vars, + indicator_codes, saveOutput) + if not saveOutput: + return df + except Exception as e: + logging.fatal(f"Error processing input file {e}") - # Add source description to note. - def add_source_to_description(row): - if not pd.isna(row['Source']): - return row['SourceNote'] + " " + str(row['Source']) - else: - return row['SourceNote'] - indicator_codes['SourceNote'] = indicator_codes.apply( - add_source_to_description, axis=1) - - # Generate stat vars - with open("output/WorldBank_StatisticalVariables.mcf", "w+") as f_out: - # Generate StatVars for fields that don't exist. Some fields such as - # Count_Person_Unemployed are already statistical variables so we do - # not need to recreate them. - for _, row in indicator_codes[ - indicator_codes['ExistingStatVar'].isna()].iterrows(): - f_out.write(build_stat_vars_from_indicator_list(row)) - - # Create template MCFs for each grouping of stat vars. - tmcfs_for_stat_vars = ( - group_stat_vars_by_observation_properties(indicator_codes)) - - # Download data for all countries. +def main(_): + mode = _FLAGS.mode + # Load statistical variable configuration file. + indicator_codes = pd.read_csv(_FLAGS.indicatorSchemaFile, dtype=str) worldbank_countries = pd.read_csv("WorldBankCountries.csv") worldbank_dataframe = download_indicator_data(worldbank_countries, - indicator_codes, - FLAGS.fetchFromSource) - - # Remap columns to match expected format. - worldbank_dataframe['Value'] = pd.to_numeric(worldbank_dataframe['Value']) - worldbank_dataframe['ISO3166Alpha3'] = ( - worldbank_dataframe['ISO3166Alpha3'].apply( - lambda code: "dcid:Earth" - if code == "WLD" else "dcid:country/" + code)) - worldbank_dataframe['StatisticalVariable'] = \ - worldbank_dataframe['StatisticalVariable'].apply( - lambda code: "dcs:" + code) - - # Scale values by scaling factor and replace exisiting StatVars. - scaling_factor_lookup = (indicator_codes.set_index('IndicatorCode') - ['sourceScalingFactor'].dropna().to_dict()) - existing_stat_var_lookup = (indicator_codes.set_index('IndicatorCode') - ['ExistingStatVar'].dropna().to_dict()) - worldbank_dataframe = worldbank_dataframe.apply( - lambda row: source_scaling_remap(row, scaling_factor_lookup, - existing_stat_var_lookup), - axis=1) - - # Convert integer columns. - int_cols = (list(indicator_codes[indicator_codes['ConvertToInt'] == True] - ['IndicatorCode'].unique())) - worldbank_subset = worldbank_dataframe[ - worldbank_dataframe['IndicatorCode'].isin(int_cols)].index - worldbank_dataframe.loc[worldbank_subset, "Value"] = (pd.to_numeric( - worldbank_dataframe.loc[worldbank_subset, "Value"], downcast="integer")) - - # Output final CSVs and variables. - output_csv_and_tmcf_by_grouping(worldbank_dataframe, tmcfs_for_stat_vars, - indicator_codes) + indicator_codes, _FLAGS.mode) + if mode == "" or mode == "process": + process(indicator_codes, worldbank_dataframe) if __name__ == '__main__': - flags.mark_flag_as_required('indicatorSchemaFile') app.run(main) diff --git a/scripts/world_bank/wdi/worldbank_test.py b/scripts/world_bank/wdi/worldbank_test.py new file mode 100644 index 0000000000..b76d1a3ea4 --- /dev/null +++ b/scripts/world_bank/wdi/worldbank_test.py @@ -0,0 +1,82 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from worldbank import * +import numpy as np +import unittest + +_MODULE_DIR = os.path.dirname(os.path.abspath(__file__)) +OUTPUT_PATH = "test_data/output" +if not os.path.exists( + os.path.join(_MODULE_DIR, OUTPUT_PATH, "output_generated.csv")): + os.mkdir(os.path.join(_MODULE_DIR, OUTPUT_PATH)) +GENERATED_CSV_PATH = os.path.join(_MODULE_DIR, OUTPUT_PATH, + "output_generated.csv") +GENERATED_TMCF_PATH = os.path.join(_MODULE_DIR, OUTPUT_PATH, + "output_generated.tmcf") +EXPECTED_CSV_PATH = os.path.join( + _MODULE_DIR, 'test_data/expected_ouput/expected_output.csv') +EXPECTED_TMCF_PATH = os.path.join( + _MODULE_DIR, 'test_data/expected_ouput/expected_output.tmcf') + +INPUT_ROWS = np.array([ + [ + 0, 'Afghanistan', 'AFG', "Intentional homicides (per 100,000 people)", + 'VC.IHR.PSRC.P5', 2009, 4.0715263102304, 'AFG', + 'WorldBank/VC_IHR_PSRC_P5' + ], + [ + 26621, 'North Macedonia', 'MKD', + 'Renewable energy consumption (% of total final energy consumption)', + 'EG.FEC.RNEW.ZS', 2001, 15.2, 'MKD', 'WorldBank/EG_FEC_RNEW_ZS' + ], + [ + 1632, 'Uzbekistan', 'UZB', "Life expectancy at birth, male (years)", + 'SP.DYN.LE00.MA.IN', 1973, 58.621, 'UZB', 'WorldBank/SP_DYN_LE00_MA_IN' + ], +]) + +EXPECTED_OUTPUT_CSV = pd.read_csv(EXPECTED_CSV_PATH) +worldbank_dataframe = pd.DataFrame(INPUT_ROWS, + columns=[ + '', 'CountryName', 'CountryCode', + 'IndicatorName', 'IndicatorCode', 'Year', + 'Value', 'ISO3166Alpha3', + 'StatisticalVariable' + ]) + + +class WDITest(unittest.TestCase): + + def test_WDI(self): + indicator_codes = pd.read_csv(os.path.join( + _MODULE_DIR, "schema_csvs", "WorldBankIndicators_prod.csv"), + dtype=str) + outputGenerated = process(indicator_codes, + worldbank_dataframe, + saveOutput=False) + outputGenerated.to_csv(GENERATED_CSV_PATH) + GENERATED_OUTPUT_CSV = pd.read_csv(GENERATED_CSV_PATH) + self.assertTrue(GENERATED_OUTPUT_CSV.equals(EXPECTED_OUTPUT_CSV)) + + with open(EXPECTED_TMCF_PATH, encoding="UTF-8") as expected_tmcf_file: + expected_tmcf_data = expected_tmcf_file.read() + with open(GENERATED_TMCF_PATH, encoding="UTF-8") as generated_tmcf_file: + generated_tmcf_data = generated_tmcf_file.read() + self.assertEqual(expected_tmcf_data.strip(), + generated_tmcf_data.strip()) + + +if __name__ == '__main__': + unittest.main()