diff --git a/output/README.md b/output/README.md index 2637de0..d196ed2 100644 --- a/output/README.md +++ b/output/README.md @@ -33,6 +33,14 @@ It is created by the script [net_flows.py](../scripts/analysis/net_flows.py). All of the files above have an additional version ending in `_china_as_counterpart_type`, which separates inflows and outflows from China as a counterpart type, from the rest of the data. +### Net negative flows +- [`net_negative_flows_country.parquet`](net_negative_flows_country.parquet): Data on net +negative flows (inflows - outflows < 0) by country (for all counterparts total). Presented yearly, including continent and income level. +It is created by the script [negative_net_flows.py](../scripts/analysis/negative_net_flows.py). +- [`net_negative_flows_group.parquet`](net_negative_flows_country.parquet): Data on net + negative flows (inflows - outflows < 0) by country groupings (for all counterparts total). Presented yearly. + It is created by the script [negative_net_flows.py](../scripts/analysis/negative_net_flows.py). + ### Projections (forthcoming) diff --git a/output/net_negative_flows_country.parquet b/output/net_negative_flows_country.parquet new file mode 100644 index 0000000..a8c80d2 Binary files /dev/null and b/output/net_negative_flows_country.parquet differ diff --git a/output/net_negative_flows_group.parquet b/output/net_negative_flows_group.parquet new file mode 100644 index 0000000..7e3b8a0 Binary files /dev/null and b/output/net_negative_flows_group.parquet differ diff --git a/scripts/analysis/common.py b/scripts/analysis/common.py index be4ee88..82532d2 100644 --- a/scripts/analysis/common.py +++ b/scripts/analysis/common.py @@ -143,3 +143,33 @@ def create_groupings(data: pd.DataFrame) -> pd.DataFrame: data_grouped = data_grouped.loc[lambda d: d.country.isin(GROUPS)] return data_grouped + + +def reorder_countries(df: pd.DataFrame, counterpart_type: bool = False) -> pd.DataFrame: + """Reorder countries by continent and income level""" + + df["order"] = df["country"].map(GROUPS).fillna(99) + + counterpart_order = { + "Bilateral": 1, + "Multilateral": 2, + "Private": 3, + "China": 4, + } + + if counterpart_type: + df["order_counterpart"] = ( + df["counterpart_type"].map(counterpart_order).fillna(99) + ) + + df = ( + df.sort_values( + ["order", "country", "year", "order_counterpart"] + if counterpart_type + else ["order", "country", "year"] + ) + .drop(columns=["order", "order_counterpart"] if counterpart_type else ["order"]) + .reset_index(drop=True) + ) + + return df diff --git a/scripts/analysis/debt_service.py b/scripts/analysis/debt_service.py index 6b7c275..c34cb72 100644 --- a/scripts/analysis/debt_service.py +++ b/scripts/analysis/debt_service.py @@ -6,8 +6,8 @@ exclude_outlier_countries, create_grouping_totals, create_world_total, - GROUPS, add_china_as_counterpart_type, + reorder_countries, ) from scripts.config import Paths from scripts.data.outflows import get_debt_service_data @@ -47,36 +47,6 @@ def add_africa_total(df: pd.DataFrame) -> pd.DataFrame: return pd.concat([df, africa], ignore_index=True) -def reorder_countries(df: pd.DataFrame, counterpart_type: bool = False) -> pd.DataFrame: - """Reorder countries by continent and income level""" - - df["order"] = df["country"].map(GROUPS).fillna(99) - - counterpart_order = { - "Bilateral": 1, - "Multilateral": 2, - "Private": 3, - "China": 4, - } - - if counterpart_type: - df["order_counterpart"] = ( - df["counterpart_type"].map(counterpart_order).fillna(99) - ) - - df = ( - df.sort_values( - ["order", "country", "year", "order_counterpart"] - if counterpart_type - else ["order", "country", "year"] - ) - .drop(columns=["order", "order_counterpart"] if counterpart_type else ["order"]) - .reset_index(drop=True) - ) - - return df - - def pivot_flourish_columns(df: pd.DataFrame) -> pd.DataFrame: """Pivot the DataFrame to have the counterpart_type as columns""" return df.pivot( diff --git a/scripts/analysis/negative_net_flows.py b/scripts/analysis/negative_net_flows.py index 8080ecf..aeb30cf 100644 --- a/scripts/analysis/negative_net_flows.py +++ b/scripts/analysis/negative_net_flows.py @@ -4,6 +4,7 @@ convert_to_net_flows, summarise_by_country, create_groupings, + reorder_countries, ) from scripts.analysis.net_flows import get_all_flows, exclude_outlier_countries from scripts.analysis.population_tools import add_population_under18 @@ -46,20 +47,10 @@ def count_negative_flows_by_year(data: pd.DataFrame) -> pd.DataFrame: return data -def negative_flows_list(data: pd.DataFrame, latest_only: bool = True) -> pd.DataFrame: +def negative_flows_only(data: pd.DataFrame) -> pd.DataFrame: """produce a list of countries with negative flows""" - data = data.query("value < 0") - - if latest_only: - data = data.query("year == 2022") - - data = data.drop_duplicates(subset=["year", "country"]).reset_index(drop=True) - - return data.sort_values( - ["year", "income_level", "continent", "value"], - ascending=[False, True, True, True], - ) + return data.query("value < 0") def output_pipeline(constant: bool = False, limit_to_2022: bool = True) -> None: @@ -68,12 +59,20 @@ def output_pipeline(constant: bool = False, limit_to_2022: bool = True) -> None: .pipe(exclude_outlier_countries) .pipe(convert_to_net_flows) .pipe(summarise_by_country) + .pipe(negative_flows_only) ) - df_grouped = create_groupings(df) + df_grouped = create_groupings(df).pipe(reorder_countries) - return df_grouped + # Save data + df.reset_index(drop=True).to_parquet( + Paths.output / "net_negative_flows_country.parquet" + ) + + df_grouped.reset_index(drop=True).to_parquet( + Paths.output / "net_negative_flows_group.parquet" + ) if __name__ == "__main__": - df = output_pipeline() + output_pipeline() diff --git a/scripts/analysis/net_flow_projections.py b/scripts/analysis/net_flow_projections.py index 4c27dfd..20c249e 100644 --- a/scripts/analysis/net_flow_projections.py +++ b/scripts/analysis/net_flow_projections.py @@ -76,6 +76,9 @@ def projected_negative_list(net_negative_df: pd.DataFrame) -> pd.DataFrame: return data.filter(["year", "country", "continent", "income_level", "net_flows"]) +def projections_pipline() -> pd.DataFrame: ... + + if __name__ == "__main__": df = get_all_flows().pipe(exclude_outlier_countries) inflows_df = calculate_average_inflows(df).query("year == 2022")