Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Final 2.1 pull request with changes #28

Merged
merged 27 commits into from
Apr 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
3e6fbbf
designed first two graphs ideas. About to refactor code so want a log…
Mattie-P Apr 10, 2024
58c226c
refactoring to remove unesseccary code
Mattie-P Apr 10, 2024
63be97e
Merge branch 'main' into 2.1-negative-net-flows
Mattie-P Apr 10, 2024
262fed4
trying scatter plots
Mattie-P Apr 11, 2024
039b383
Merge branch 'main' into 2.1-negative-net-flows
Mattie-P Apr 11, 2024
3c8b845
Update 2_1_negative_net_flows.py
Mattie-P Apr 11, 2024
2230c7d
Changes
Mattie-P Apr 11, 2024
176a35b
Merge branch 'main' into 2.1-negative-net-flows
Mattie-P Apr 11, 2024
c97266d
Final chart (old branch)
Mattie-P Apr 12, 2024
0dd973b
Refactoring code for just beeswarm
Mattie-P Apr 12, 2024
0e2bd86
Merge branch 'main' into 2.1-negative-net-flows
Mattie-P Apr 12, 2024
042e94e
Finalise scripts
Mattie-P Apr 12, 2024
c9e7278
Final commit
Mattie-P Apr 12, 2024
97aeea1
final final
Mattie-P Apr 12, 2024
9acbb48
Update 2_1_negative_net_flows.py
Mattie-P Apr 12, 2024
f9e48c9
changing to 2024
Mattie-P Apr 12, 2024
c5001b1
renaming chart chart_2_1
Mattie-P Apr 12, 2024
7e4d5ba
Merge branch 'main' into 2.1-negative-net-flows
Mattie-P Apr 15, 2024
57e3b82
Merge branch 'main' into 2.1-negative-net-flows
Mattie-P Apr 15, 2024
09dea40
updating for new projections data
Mattie-P Apr 15, 2024
988248e
adding latest year of gdp data available to prevent miscounting
Mattie-P Apr 15, 2024
a20f80c
Update README.md
Mattie-P Apr 15, 2024
bb155a9
Merge branch 'main' into 2.1-negative-net-flows
Mattie-P Apr 15, 2024
c8de059
removing Syria
Mattie-P Apr 16, 2024
7e358bc
Merge branch 'main' into 2.1-negative-net-flows
Mattie-P Apr 17, 2024
92ee985
adding new weo_2024_1 file and functionality to add gdp data using bb…
Mattie-P Apr 17, 2024
945c1ea
Incorporating suggested changes
Mattie-P Apr 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion output/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,5 @@ It is created by the script [net_flow_projections.py](../scripts/analysis/net_fl
- [`avg_repayments_china.csv`](avg_repayments.csv): Average yearly repayments, highlighting China.
Structured for a flourish bar chart. It is created by the script [debt_service.py](../scripts/analysis/debt_service.py).
- [`chart_1_1](chart_1_1.csv): Data for chart 1.1. Net flows for countries and groups
- [`chart_1_2](chart_1_2.csv): Data for chart 1.2. Inflows for countries and groups
- [`chart_1_2](chart_1_2.csv): Data for chart 1.2. Inflows for countries and groups
- [`chart_2_1`](chart_2_1.csv): Data for chart 2.1. Net negative transfer countries (2022 and 2025).
237 changes: 237 additions & 0 deletions output/chart_2_1.csv

Large diffs are not rendered by default.

255 changes: 255 additions & 0 deletions output/number_of_countries_with_nnt_2022_2025.csv

Large diffs are not rendered by default.

9,397 changes: 9,397 additions & 0 deletions output/weo_data.csv

Large diffs are not rendered by default.

Binary file added raw_data/weo2023_1.csv
Binary file not shown.
Binary file added raw_data/weo_2024_1.feather
Binary file not shown.
259 changes: 259 additions & 0 deletions scripts/analysis/2_1_negative_net_flows.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,259 @@
import pandas as pd
from bblocks import set_bblocks_data_path
from bblocks.dataframe_tools.add import add_iso_codes_column
from bblocks.import_tools.imf_weo import WEO
import numpy as np

from scripts.config import Paths

set_bblocks_data_path(Paths.raw_data)


def get_parquet(file_name: str) -> pd.DataFrame:
"""
Loads parquet file from output folder based.

Args:
doc (str): Name of parquet file.
"""
return pd.read_parquet(Paths.output / file_name)


def calculate_net_transfers(df: pd.DataFrame, as_billion: bool = True) -> pd.DataFrame:
"""
Creates net_flow column, which is the sum of inflows (+ve) and outflows (-ve).
Removes inflow and outflow columns for output.

Args:
df (pd.DataFrame)

Returns: pd.DataFrame with additional column for 'net_flow' and without inflow and
outflow columns. net_flow is presented in billions.
"""
df["net_flow"] = df["inflow"] + df["outflow"]

# If units required in billions, convert to billions
if as_billion:
df["net_flow"] = df["net_flow"] / 1e9

return df.filter(
items=["year", "country", "continent", "income_level", "net_flow"], axis=1
)


def add_projections_data(df: pd.DataFrame, as_billion: bool = True) -> pd.DataFrame:
"""
Adds projections data from the net_flow_projections_country.parquet file.
Projections are represented in billions to match actual data.

Args:
df (pd.DataFrame)

Returns: pd.DataFrame with additional rows for 2023-2025 data.
"""
# Load projections data
projection = get_parquet(file_name="net_flow_projections_country.parquet").rename(
{"value": "net_flow"}, axis=1
)

# if billions required, put projections into billions
if as_billion:
projection["net_flow"] = projection["net_flow"] / 1e9

# Merge projections data into actual data.
df_merged = pd.concat([df, projection])

return df_merged


def calculate_net_flow_as_share_gdp(df: pd.DataFrame) -> pd.DataFrame:
"""
Adds new column for net flows as a share of gdp.

Args:
df (pd.DataFrame)

Returns: pd.DataFrame with additional column called 'net_flows_over_gdp_percent'.
"""

# add gdp data
df = merge_gdp_data(df)

# calculate net transfers share of gdp
df = calculate_share_gdp(df)

return df


def merge_gdp_data(df: pd.DataFrame) -> pd.DataFrame:
"""
Merges IMF WEO gdp data (in current prices, US$), scraped using bblocks (see
download_gdp_data function. Merges data on year and iso_code.

Args:
df (pd.DataFrame)

Returns: pd.DataFrame with additional column for gdp.
"""

gdp = _download_gdp_data()

df_merged = pd.merge(df, gdp, on=["year", "iso_3"], how="left")

df_filled_empty = _add_latest_year_for_missing_gdp_data(df=df_merged, gdp=gdp)

return df_filled_empty


def _download_gdp_data() -> pd.DataFrame:
"""
Download GDP data from IMF using bblocks. Takes latest version of IMF WEO database.

returns: pd.DataFrame containing IMF WEO GDP data by country and year from 1990 to
2029.
"""

weo = WEO(version="latest")

weo = weo.load_data(indicators="NGDPD")

gdp = weo.get_data()

return (
gdp.pipe(
add_iso_codes_column,
id_column="ref_area",
id_type="regex",
target_column="iso_3",
)
.filter(items=["time_period", "obs_value", "iso_3"], axis=1)
.rename(columns={"time_period": "year", "obs_value": "gdp"})
)


def _add_latest_year_for_missing_gdp_data(
df: pd.DataFrame, gdp: pd.DataFrame
) -> pd.DataFrame:
"""
Fills gaps in GDP data where the year does not currently have data. Fills gaps with
latest available year.

Args:
df (pd.DataFrame) with missing GDP data
gdp (pd.DataFrame) with all gdp data

Returns: pd.DataFrame with gdp data for the latest year available, where neccessary.
"""

# Make sure data is sorted by year
df = df.sort_values(by=["iso_3", "year"])

# Forward fill the missing gdp values
df["gdp"] = df.groupby(
["country", "continent", "income_level", "iso_3"], dropna=False
)["gdp"].ffill()

return df


def calculate_share_gdp(df: pd.DataFrame) -> pd.DataFrame:
"""
Creates net_flow_over_gdp_percent column, which divides net_flow by gdp.

Args:
df (pd.DataFrame)

Returns: pd.DataFrame with additional column for 'net_flow_over_gdp_percent'
"""

df["net_flows_over_gdp_percent"] = df["net_flow"] * 100 / df["gdp"]

return df


def add_nnt_column(df: pd.DataFrame, target_col: str) -> pd.DataFrame:
"""
Adds new column nnt which is filled with 'net negative transfers' when net_transfers
value is below 0.

Args:
df (pd.DataFrame)

Returns: pd.DataFrame with additional column 'nnt'.
"""
df["nnt"] = np.where(df[target_col] < 0, "net negative transfers", "")
return df


def filter_for_specified_years(df: pd.DataFrame, years: list[int]) -> pd.DataFrame:
"""
Filters the dataframe to keep only the rows for specified years.

Args:
df (pd.DataFrame)
years (list[int]): list of required years for filtered DataFrame.
"""
return df.loc[lambda d: d.year.isin(years)]


def flourish_1_beeswarm_pipeline(df: pd.DataFrame) -> pd.DataFrame:
"""
Pipeline function to produce flourish chart 2.1. Output takes data from
'full_flows_country.parquet', 'net_flow_projections_country.parquet' and
'weo_data.csv' files to calculate net_flows as a share of gdp for 2022 and 2025.

Args:
df (pd.DataFrame): DataFrame containing the 'full_flows_country.parquet' data

Returns: pd.DataFrame of 2022 and 2025 data ready for export as CSV for Flourish.
"""

# filter for current prices
df = df.loc[lambda d: d.prices == "current"]

# Groupby country
df = (
df.groupby(
by=["year", "country", "continent", "income_level", "indicator_type"]
)["value"]
.sum()
.reset_index()
)

# Pivot to have inflow and outflow columns
df = df.pivot_table(
index=["year", "country", "continent", "income_level"],
columns="indicator_type",
values="value",
fill_value=0,
).reset_index()

# Calculate net transfers
df = calculate_net_transfers(df)

# add projections data
df = add_projections_data(df, as_billion=True)

# add iso columns (needed to merge in GDP data in next step)
df = add_iso_codes_column(
df=df, id_column="country", id_type="regex", target_column="iso_3"
)

# Add new column for net flows as a share of GDP (using WEO GDP data)
df = calculate_net_flow_as_share_gdp(df)

# Add negative net transfers column (states 'nnt' when outflow > inflow)
df = add_nnt_column(df, target_col="net_flow")

# filter for specified years
df = filter_for_specified_years(df=df, years=[2022, 2025])

return df


if __name__ == "__main__":

data = get_parquet(file_name="full_flows_country.parquet")

flourish_chart_1 = flourish_1_beeswarm_pipeline(df=data)
flourish_chart_1.to_csv(Paths.output / "chart_2_1.csv", index=False)
Loading