From dd9eeb645fa57db74a98ebfd95d23bda75518d0e Mon Sep 17 00:00:00 2001 From: Jorge Rivera Date: Sun, 10 Nov 2024 00:03:09 +0100 Subject: [PATCH] implementing specific tools --- poetry.lock | 21 +-- pydeflate/__init__.py | 27 +++- pydeflate/core/api.py | 69 +++++++-- pydeflate/deflate/deflators.py | 230 +++++++++++++++++++++++++++- pydeflate/deflate/legacy_deflate.py | 53 +++++++ pydeflate/exchange/__init__.py | 0 pydeflate/exchange/exchangers.py | 147 ++++++++++++++++++ pydeflate/pydeflate_config.py | 3 + pydeflate/sources/common.py | 5 +- pydeflate/sources/imf.py | 3 +- pydeflate/sources/world_bank.py | 5 +- pydeflate/utils.py | 28 +--- pyproject.toml | 1 - 13 files changed, 514 insertions(+), 78 deletions(-) create mode 100644 pydeflate/exchange/__init__.py create mode 100644 pydeflate/exchange/exchangers.py diff --git a/poetry.lock b/poetry.lock index 55dfa75..3d0c7b7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -269,25 +269,6 @@ files = [ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] -[[package]] -name = "country-converter" -version = "1.2" -description = "The country converter (coco) - a Python package for converting country names between different classifications schemes" -optional = false -python-versions = ">=3.7" -files = [ - {file = "country_converter-1.2-py3-none-any.whl", hash = "sha256:5a5e54f9542cc8e109d53a39fa9e9179e6a97826e72594dd16e948611cb278fa"}, - {file = "country_converter-1.2.tar.gz", hash = "sha256:74e87db54b91eda56c5eecc9e36e1b5ca79ca2bca58012ecc4f55ef8a13a6631"}, -] - -[package.dependencies] -pandas = ">=1.0" - -[package.extras] -dev = ["country-converter[lint,test]"] -lint = ["black (>=22.3.0)", "isort (>=5.5.2)"] -test = ["coveralls", "pytest (>=5.4.0)", "pytest-black", "pytest-cov (>=2.7.0)", "pytest-datadir", "pytest-mypy"] - [[package]] name = "et-xmlfile" version = "2.0.0" @@ -1979,4 +1960,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = ">=3.10, <4.0" -content-hash = "b97cf8c88b2bbcf52624fd83999ff20bd49641fa258dc09c4ef0a068c1ac41e5" +content-hash = "d41a903b12dad71ecc132e963b888fe05cfc7190dc919acb5e74729c946ab162" diff --git a/pydeflate/__init__.py b/pydeflate/__init__.py index 8b8ccd6..31047d8 100644 --- a/pydeflate/__init__.py +++ b/pydeflate/__init__.py @@ -1,6 +1,18 @@ __author__ = """Jorge Rivera""" __version__ = "1.3.10" +from pydeflate.deflate.deflators import ( + oecd_dac_deflate, + wb_cpi_deflate, + wb_gdp_deflate, + wb_gdp_linked_deflate, + imf_cpi_deflate, + imf_gdp_deflate, + imf_cpi_e_deflate, +) + +from pydeflate.deflate.legacy_deflate import deflate +from pydeflate.exchange.exchangers import oecd_dac_exchange, wb_exchange, imf_exchange from pydeflate.pydeflate_config import setup_logger @@ -14,10 +26,17 @@ def set_pydeflate_path(path): PYDEFLATE_PATHS.data = Path(path).resolve() -logger = setup_logger("pydeflate") - - __all__ = [ "set_pydeflate_path", - "logger", + "oecd_dac_deflate", + "oecd_dac_exchange", + "wb_cpi_deflate", + "wb_gdp_deflate", + "wb_gdp_linked_deflate", + "wb_exchange", + "imf_cpi_deflate", + "imf_gdp_deflate", + "imf_cpi_e_deflate", + "imf_exchange", + "deflate", ] diff --git a/pydeflate/core/api.py b/pydeflate/core/api.py index be1c4fc..60d9345 100644 --- a/pydeflate/core/api.py +++ b/pydeflate/core/api.py @@ -13,6 +13,21 @@ ) +def resolve_common_currencies(currency: str, source: str) -> str: + mapping = { + "USD": "USA", + "EUR": "EMU", + "GBP": "GBR", + "JPY": "JPN", + "CAD": "CAN", + } + + if source == "DAC": + mapping["EUR"] = "EUI" + + return mapping.get(currency, currency) + + def _base_operation( base_obj, data: pd.DataFrame, @@ -20,8 +35,9 @@ def _base_operation( year_column: str, value_column: str, target_value_column: str | None = None, - year_format: str = "%Y", + year_format: str | None = None, exchange: bool = False, + reversed_: bool = False, ): """Perform deflation or exchange rate adjustment on input data using pydeflate data. @@ -35,6 +51,7 @@ def _base_operation( year_format (str, optional): Format of the year. Defaults to "%Y". exchange (bool, optional): Whether to perform an exchange rate adjustment (True) or deflation (False). + reversed_ (bool, optional): If True, perform the operation in reverse. Returns: pd.DataFrame: DataFrame with adjusted values and original columns preserved. @@ -61,18 +78,16 @@ def _base_operation( # Flag missing data flag_missing_pydeflate_data(base_obj._unmatched_data) - - # Calculate deflated values - if exchange: - base_obj._merged_data[target_value_column] = ( - base_obj._merged_data[value_column] - * base_obj._merged_data["pydeflate_EXCHANGE"] - ).round(6) + x = base_obj._merged_data[value_column] + y = base_obj._merged_data[ + "pydeflate_EXCHANGE" if exchange else "pydeflate_deflator" + ] + + # Apply the correct operation based on `exchange` and `reversed` + if (exchange and not reversed_) or (not exchange and reversed_): + base_obj._merged_data[target_value_column] = (x * y).round(6) else: - base_obj._merged_data[target_value_column] = ( - base_obj._merged_data[value_column] - / base_obj._merged_data["pydeflate_deflator"] - ).round(6) + base_obj._merged_data[target_value_column] = (x / y).round(6) return base_obj._merged_data[cols] @@ -101,6 +116,16 @@ def __init__( use_source_codes (bool, optional): Use source-specific entity codes. Defaults to False. """ + + # Try to accept common currencies by their country codes + source_currency = resolve_common_currencies( + source_currency, exchange_source.name + ) + + target_currency = resolve_common_currencies( + target_currency, exchange_source.name + ) + self.exchange_rates = Exchange( source=exchange_source, source_currency=source_currency, @@ -127,7 +152,7 @@ def _merge_pydeflate_data( data: pd.DataFrame, entity_column: str, year_column: str, - year_format: str = "%Y", + year_format: str | None = None, ) -> None: """Merge pydeflate exchange rate data into input data by year and entity. @@ -164,7 +189,8 @@ def exchange( year_column: str, value_column: str, target_value_column: str | None = None, - year_format: str = "%Y", + year_format: str | None = None, + reversed_: bool = False, ): """Apply exchange rate conversion to input data. @@ -175,6 +201,7 @@ def exchange( value_column (str): Column with values to adjust. target_value_column (str | None, optional): Column for adjusted values. Defaults to `value_column`. year_format (str, optional): Format of the year. Defaults to "%Y". + reversed_ (bool, optional): If True, perform the operation in reverse. defaults to False. Returns: pd.DataFrame: DataFrame with exchange rate-adjusted values. @@ -188,6 +215,7 @@ def exchange( value_column=value_column, target_value_column=target_value_column, year_format=year_format, + reversed_=reversed_, ) @@ -223,6 +251,15 @@ def __init__( to_current (bool, optional): If True, adjust to current year values. Defaults to False. """ + # Try to accept common currencies by their country codes + source_currency = resolve_common_currencies( + source_currency, deflator_source.name + ) + + target_currency = resolve_common_currencies( + target_currency, deflator_source.name + ) + self.exchange_rates = Exchange( source=exchange_source, source_currency=source_currency, @@ -305,7 +342,7 @@ def _merge_pydeflate_data( data: pd.DataFrame, entity_column: str, year_column: str, - year_format: str = "%Y", + year_format: str | None = None, ) -> None: """Merge pydeflate deflator data into the input data by year and entity. @@ -342,7 +379,7 @@ def deflate( year_column: str, value_column: str, target_value_column: str | None = None, - year_format: str = "%Y", + year_format: str | None = None, ): """Apply deflation adjustment to input data using pydeflate deflator rates. diff --git a/pydeflate/deflate/deflators.py b/pydeflate/deflate/deflators.py index 099756a..238246b 100644 --- a/pydeflate/deflate/deflators.py +++ b/pydeflate/deflate/deflators.py @@ -1,10 +1,228 @@ -def dac_deflate(): - return NotImplemented +from functools import wraps +import pandas as pd -def wb_deflate(): - return NotImplemented +from pydeflate.core.api import BaseDeflate +from pydeflate.core.source import DAC, WorldBank, IMF -def imf_deflate(): - return NotImplemented +def _generate_docstring(source_name: str, price_kind: str) -> str: + """Generate docstring for each decorated deflation function.""" + return ( + f"Deflate a DataFrame using the {source_name} deflator source ({price_kind}).\n\n" + f"This function applies deflation adjustments to a DataFrame using the {source_name} {price_kind} deflator.\n\n" + "Args:\n" + " data (pd.DataFrame): The input DataFrame containing data to deflate.\n" + " base_year (int): The base year for calculating deflation adjustments.\n" + " source_currency (str, optional): The source currency code. Defaults to 'USA'.\n" + " target_currency (str, optional): The target currency code. Defaults to 'USA'.\n" + " id_column (str, optional): Column with entity identifiers. Defaults to 'iso_code'.\n" + " year_column (str, optional): Column with year information. Defaults to 'year'.\n" + " use_source_codes (bool, optional): Use source-specific entity codes. Defaults to False.\n" + " value_column (str, optional): Column with values to deflate. Defaults to 'value'.\n" + " target_value_column (str, optional): Column to store deflated values. Defaults to 'value'.\n" + " to_current (bool, optional): Adjust values to current-year values if True. Defaults to False.\n" + " year_format (str | None, optional): Format of the year in `year_column`. Defaults to None.\n" + " update_deflators (bool, optional): Update the deflator data before deflation. Defaults to False.\n\n" + "Returns:\n" + " pd.DataFrame: DataFrame with deflated values in the `target_value_column`.\n" + ) + + +def _deflator(deflator_source_cls, price_kind): + """Decorator to create deflate wrappers with specific deflator source and price kind.""" + + def decorator(func): + @wraps(func) + def wrapper( + data: pd.DataFrame, + *, + base_year: int, + source_currency: str = "USA", + target_currency: str = "USA", + id_column: str = "iso_code", + year_column: str = "year", + use_source_codes: bool = False, + value_column: str = "value", + target_value_column: str = "value", + to_current: bool = False, + year_format: str | None = None, + update_deflators: bool = False, + ): + # Validate input parameters + if not isinstance(data, pd.DataFrame): + raise ValueError("The 'data' parameter must be a pandas DataFrame.") + if not isinstance(base_year, int): + raise ValueError("The 'base_year' parameter must be an integer.") + if id_column not in data.columns: + raise ValueError( + f"The id_column '{id_column}' is not in the DataFrame." + ) + if year_column not in data.columns: + raise ValueError( + f"The year_column '{year_column}' is not in the DataFrame." + ) + if value_column not in data.columns: + raise ValueError( + f"The value_column '{value_column}' is not in the DataFrame." + ) + + # Copy the data to avoid modifying the original + to_deflate = data.copy() + + # Initialize the deflator source + source = deflator_source_cls(update=update_deflators) + + # Create a deflator object + deflator = BaseDeflate( + base_year=base_year, + deflator_source=source, + exchange_source=source, + source_currency=source_currency, + target_currency=target_currency, + price_kind=price_kind, + use_source_codes=use_source_codes, + to_current=to_current, + ) + + # Deflate the data + return deflator.deflate( + data=to_deflate, + entity_column=id_column, + year_column=year_column, + value_column=value_column, + target_value_column=target_value_column, + year_format=year_format, + ) + + # Add the deflator source and price kind to the function + wrapper.__doc__ = _generate_docstring(deflator_source_cls.__name__, price_kind) + return wrapper + + return decorator + + +@_deflator(DAC, "NGDP_D") +def oecd_dac_deflate( + data: pd.DataFrame, + *, + base_year: int, + source_currency: str = "USA", + target_currency: str = "USA", + id_column: str = "iso_code", + year_column: str = "year", + use_source_codes: bool = False, + value_column: str = "value", + target_value_column: str = "value", + to_current: bool = False, + year_format: str | None = None, + update_deflators: bool = False, +) -> pd.DataFrame: ... + + +@_deflator(WorldBank, "NGDP_D") +def wb_gdp_deflate( + data: pd.DataFrame, + *, + base_year: int, + source_currency: str = "USA", + target_currency: str = "USA", + id_column: str = "iso_code", + year_column: str = "year", + use_source_codes: bool = False, + value_column: str = "value", + target_value_column: str = "value", + to_current: bool = False, + year_format: str | None = None, + update_deflators: bool = False, +): ... + + +@_deflator(WorldBank, "NGDP_D") +def wb_gdp_linked_deflate( + data: pd.DataFrame, + *, + base_year: int, + source_currency: str = "USA", + target_currency: str = "USA", + id_column: str = "iso_code", + year_column: str = "year", + use_source_codes: bool = False, + value_column: str = "value", + target_value_column: str = "value", + to_current: bool = False, + year_format: str | None = None, + update_deflators: bool = False, +): ... + + +@_deflator(WorldBank, "CPI") +def wb_cpi_deflate( + data: pd.DataFrame, + *, + base_year: int, + source_currency: str = "USA", + target_currency: str = "USA", + id_column: str = "iso_code", + year_column: str = "year", + use_source_codes: bool = False, + value_column: str = "value", + target_value_column: str = "value", + to_current: bool = False, + year_format: str | None = None, + update_deflators: bool = False, +): ... + + +@_deflator(IMF, "NGDP_D") +def imf_gdp_deflate( + data: pd.DataFrame, + *, + base_year: int, + source_currency: str = "USA", + target_currency: str = "USA", + id_column: str = "iso_code", + year_column: str = "year", + use_source_codes: bool = False, + value_column: str = "value", + target_value_column: str = "value", + to_current: bool = False, + year_format: str | None = None, + update_deflators: bool = False, +): ... + + +@_deflator(IMF, "PCPI") +def imf_cpi_deflate( + data: pd.DataFrame, + *, + base_year: int, + source_currency: str = "USA", + target_currency: str = "USA", + id_column: str = "iso_code", + year_column: str = "year", + use_source_codes: bool = False, + value_column: str = "value", + target_value_column: str = "value", + to_current: bool = False, + year_format: str | None = None, + update_deflators: bool = False, +): ... + + +@_deflator(IMF, "PCPIE") +def imf_cpi_e_deflate( + data: pd.DataFrame, + *, + base_year: int, + source_currency: str = "USA", + target_currency: str = "USA", + id_column: str = "iso_code", + year_column: str = "year", + use_source_codes: bool = False, + value_column: str = "value", + target_value_column: str = "value", + to_current: bool = False, + year_format: str | None = None, + update_deflators: bool = False, +): ... diff --git a/pydeflate/deflate/legacy_deflate.py b/pydeflate/deflate/legacy_deflate.py index bee9fde..e53a2b2 100644 --- a/pydeflate/deflate/legacy_deflate.py +++ b/pydeflate/deflate/legacy_deflate.py @@ -3,6 +3,9 @@ import pandas as pd from pandas.util._decorators import deprecate_kwarg +from pydeflate.core.api import BaseDeflate +from pydeflate.core.source import DAC, WorldBank, IMF + @deprecate_kwarg(old_arg_name="method", new_arg_name="deflator_method") @deprecate_kwarg(old_arg_name="source", new_arg_name="deflator_source") @@ -54,3 +57,53 @@ def deflate( "Please check the latest documentation for updated methods.", DeprecationWarning, ) + + if id_type != "ISO3": + raise ValueError( + "Only ISO3 ID classification is supported in this version.\n" + "You can use bblocks to convert to ISO3." + ) + + price_kind = { + "oecd_dac": "NGDP_D", + "dac_deflator": "NGDP_D", + "gdp": "NGDP_D", + "cpi": "CPI", + } + + # Mapping of string identifiers to classes and price kinds + deflator_source_map = { + "oecd_dac": DAC, + "dac": DAC, + "wb": WorldBank, + "world_bank": WorldBank, + "imf": IMF, + } + + deflator_source = deflator_source_map[deflator_source.lower()]() + exchange_source = deflator_source_map[exchange_source.lower()]() + deflator_method = price_kind.get(deflator_method.lower(), deflator_method).upper() + + # Copy the data to avoid modifying the original + to_deflate = df.copy() + + # Create a deflator object + deflator = BaseDeflate( + base_year=base_year, + deflator_source=deflator_source, + exchange_source=exchange_source, + source_currency=source_currency, + target_currency=target_currency, + price_kind=deflator_method, + to_current=to_current, + ) + + # Deflate the data + return deflator.deflate( + data=to_deflate, + entity_column=id_column, + year_column=date_column, + value_column=source_column, + target_value_column=target_column, + year_format=None, + ) diff --git a/pydeflate/exchange/__init__.py b/pydeflate/exchange/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pydeflate/exchange/exchangers.py b/pydeflate/exchange/exchangers.py new file mode 100644 index 0000000..eba0326 --- /dev/null +++ b/pydeflate/exchange/exchangers.py @@ -0,0 +1,147 @@ +from functools import wraps + +import pandas as pd + +from pydeflate.core.api import BaseExchange +from pydeflate.core.source import DAC, WorldBank, IMF + + +def _generate_docstring(source_name: str) -> str: + """Generate docstring for each decorated exchange function.""" + return ( + f"Exchange a DataFrame using the {source_name} rates source.\n\n" + f"This function applies exchange rates toa DataFrame using the {source_name} rates.\n\n" + "Args:\n" + " data (pd.DataFrame): The input DataFrame containing data to deflate.\n" + " source_currency (str, optional): The source currency code. Defaults to 'USA'.\n" + " target_currency (str, optional): The target currency code. Defaults to 'USA'.\n" + " id_column (str, optional): Column with entity identifiers. Defaults to 'iso_code'.\n" + " year_column (str, optional): Column with year information. Defaults to 'year'.\n" + " use_source_codes (bool, optional): Use source-specific entity codes. Defaults to False.\n" + " value_column (str, optional): Column with values to deflate. Defaults to 'value'.\n" + " target_value_column (str, optional): Column to store deflated values. Defaults to 'value'.\n" + " reversed_ (bool, optional): The reverse of an exchange conversion. Defaults to False.\n" + " year_format (str | None, optional): Format of the year in `year_column`. Defaults to None.\n" + " update_rates (bool, optional): Update the exchange rate data. Defaults to False.\n\n" + "Returns:\n" + " pd.DataFrame: DataFrame with converted values in the `target_value_column`.\n" + ) + + +def _exchange(exchange_source_cls): + """Decorator to create exchange wrappers with specific source""" + + def decorator(func): + @wraps(func) + def wrapper( + data: pd.DataFrame, + *, + source_currency: str = "USA", + target_currency: str = "USA", + id_column: str = "iso_code", + year_column: str = "year", + use_source_codes: bool = False, + value_column: str = "value", + target_value_column: str = "value", + reversed_: bool = False, + year_format: str | None = None, + update_rates: bool = False, + ): + # Validate input parameters + if not isinstance(data, pd.DataFrame): + raise ValueError("The 'data' parameter must be a pandas DataFrame.") + + if id_column not in data.columns: + raise ValueError( + f"The id_column '{id_column}' is not in the DataFrame." + ) + if year_column not in data.columns: + raise ValueError( + f"The year_column '{year_column}' is not in the DataFrame." + ) + if value_column not in data.columns: + raise ValueError( + f"The value_column '{value_column}' is not in the DataFrame." + ) + + # Copy the data to avoid modifying the original + to_exchange = data.copy() + + # Initialize the deflator source + source = exchange_source_cls(update=update_rates) + + # Create a deflator object + exchange = BaseExchange( + exchange_source=source, + source_currency=source_currency, + target_currency=target_currency, + use_source_codes=use_source_codes, + ) + + # Deflate the data + return exchange.exchange( + data=to_exchange, + entity_column=id_column, + year_column=year_column, + value_column=value_column, + target_value_column=target_value_column, + year_format=year_format, + reversed_=reversed_, + ) + + # Add the deflator source and price kind to the function + wrapper.__doc__ = _generate_docstring(exchange_source_cls.__name__) + return wrapper + + return decorator + + +@_exchange(DAC) +def oecd_dac_exchange( + data: pd.DataFrame, + *, + source_currency: str = "USA", + target_currency: str = "USA", + id_column: str = "iso_code", + year_column: str = "year", + use_source_codes: bool = False, + value_column: str = "value", + target_value_column: str = "value", + reversed_: bool = False, + year_format: str | None = None, + update_rates: bool = False, +) -> pd.DataFrame: ... + + +@_exchange(WorldBank) +def wb_exchange( + data: pd.DataFrame, + *, + source_currency: str = "USA", + target_currency: str = "USA", + id_column: str = "iso_code", + year_column: str = "year", + use_source_codes: bool = False, + value_column: str = "value", + target_value_column: str = "value", + reversed_: bool = False, + year_format: str | None = None, + update_rates: bool = False, +) -> pd.DataFrame: ... + + +@_exchange(IMF) +def imf_exchange( + data: pd.DataFrame, + *, + source_currency: str = "USA", + target_currency: str = "USA", + id_column: str = "iso_code", + year_column: str = "year", + use_source_codes: bool = False, + value_column: str = "value", + target_value_column: str = "value", + reversed_: bool = False, + year_format: str | None = None, + update_rates: bool = False, +) -> pd.DataFrame: ... diff --git a/pydeflate/pydeflate_config.py b/pydeflate/pydeflate_config.py index 447898d..0a430ce 100644 --- a/pydeflate/pydeflate_config.py +++ b/pydeflate/pydeflate_config.py @@ -38,3 +38,6 @@ def setup_logger(name) -> logging.Logger: logger_.propagate = False return logger_ + + +logger = setup_logger("pydeflate") diff --git a/pydeflate/sources/common.py b/pydeflate/sources/common.py index bdeff55..78fdffc 100644 --- a/pydeflate/sources/common.py +++ b/pydeflate/sources/common.py @@ -5,10 +5,9 @@ import pandas as pd from hdx.location.country import Country -from pydeflate import logger -from pydeflate.pydeflate_config import PYDEFLATE_PATHS +from pydeflate.pydeflate_config import PYDEFLATE_PATHS, logger -AvailableDeflators = Literal["NGDP_D", "NGDP_DL", "PCPI", "PCPIE"] +AvailableDeflators = Literal["NGDP_D", "NGDP_DL", "CPI", "PCPI", "PCPIE"] def check_file_age(file: Path) -> int: diff --git a/pydeflate/sources/imf.py b/pydeflate/sources/imf.py index cf31ffe..7a654f9 100644 --- a/pydeflate/sources/imf.py +++ b/pydeflate/sources/imf.py @@ -3,8 +3,7 @@ import pandas as pd from imf_reader import weo -from pydeflate import logger -from pydeflate.pydeflate_config import PYDEFLATE_PATHS +from pydeflate.pydeflate_config import PYDEFLATE_PATHS, logger from pydeflate.sources.common import ( today, add_pydeflate_iso3, diff --git a/pydeflate/sources/world_bank.py b/pydeflate/sources/world_bank.py index 7005cdd..8ba9d51 100644 --- a/pydeflate/sources/world_bank.py +++ b/pydeflate/sources/world_bank.py @@ -4,8 +4,7 @@ import pandas as pd import wbgapi as wb -from pydeflate import logger -from pydeflate.pydeflate_config import PYDEFLATE_PATHS +from pydeflate.pydeflate_config import PYDEFLATE_PATHS, logger from pydeflate.sources.common import ( enforce_pyarrow_types, today, @@ -184,4 +183,4 @@ def read_wb(update: bool = False) -> pd.DataFrame: if __name__ == "__main__": - df = read_wb(False) + df = read_wb(True) diff --git a/pydeflate/utils.py b/pydeflate/utils.py index 0855fa2..300c5c3 100644 --- a/pydeflate/utils.py +++ b/pydeflate/utils.py @@ -1,11 +1,9 @@ import json -import country_converter as coco import numpy as np import pandas as pd -from pydeflate import logger -from pydeflate.pydeflate_config import PYDEFLATE_PATHS +from pydeflate.pydeflate_config import PYDEFLATE_PATHS, logger from pydeflate.sources.common import enforce_pyarrow_types @@ -51,27 +49,12 @@ def check_year_as_number(df: pd.DataFrame, date_column: str) -> (pd.DataFrame, b return df, year_as_number -def to_iso3( - df: pd.DataFrame, - codes_col: str, - target_col: str, - src_classification: str | None = None, - not_found: str | None = None, -) -> pd.DataFrame: - """Convert a column of country codes to iso3""" - - cc = coco.CountryConverter() - - df[target_col] = cc.pandas_convert( - df[codes_col], src=src_classification, to="ISO3", not_found=not_found - ) - - return df - - def create_pydeflate_year( - data: pd.DataFrame, year_column: str, year_format: str = "%Y" + data: pd.DataFrame, year_column: str, year_format: str | None = None ) -> pd.DataFrame: + if year_format is None: + year_format = "ISO8601" + data = data.copy() data["pydeflate_year"] = pd.to_datetime( @@ -87,7 +70,6 @@ def merge_user_and_pydeflate_data( entity_column: str, ix: list[str], ) -> pd.DataFrame: - return data.merge( pydeflate_data, how="outer", diff --git a/pyproject.toml b/pyproject.toml index a5d4e40..2c7a284 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,6 @@ imf-reader = "^1.1.0" hdx-python-country = "^3.8.1" oda-reader = "^1.0.0" wbgapi = "^1.0.12" -country-converter = "^1.2" [tool.poetry.dev-dependencies]