diff --git a/notebooks/00_vault_cleanup.ipynb b/notebooks/00_vault_cleanup.ipynb index 89468ef..628425a 100644 --- a/notebooks/00_vault_cleanup.ipynb +++ b/notebooks/00_vault_cleanup.ipynb @@ -27,10 +27,6 @@ } ], "source": [ - "import os\n", - "import pathlib\n", - "\n", - "import trino\n", "import osc_ingest_trino as osc\n", "\n", "ingest_catalog = \"osc_datacommons_dev\"\n", @@ -88,7 +84,9 @@ } ], "source": [ - "engine_dev = osc.attach_trino_engine(verbose=True, catalog=ingest_catalog, schema=ingest_schema)" + "engine_dev = osc.attach_trino_engine(\n", + " verbose=True, catalog=ingest_catalog, schema=ingest_schema\n", + ")" ] }, { diff --git a/notebooks/01a_vault_ingest.ipynb b/notebooks/01a_vault_ingest.ipynb index d82a680..8e7545d 100644 --- a/notebooks/01a_vault_ingest.ipynb +++ b/notebooks/01a_vault_ingest.ipynb @@ -30,7 +30,9 @@ "# Load some standard environment variables from a dot-env file, if it exists.\n", "# If no such file can be found, does not fail, and so allows these environment vars to\n", "# be populated in some other way\n", - "dotenv_dir = os.environ.get(\"CREDENTIAL_DOTENV_DIR\", os.environ.get(\"PWD\", \"/opt/app-root/src\"))\n", + "dotenv_dir = os.environ.get(\n", + " \"CREDENTIAL_DOTENV_DIR\", os.environ.get(\"PWD\", \"/opt/app-root/src\")\n", + ")\n", "dotenv_path = pathlib.Path(dotenv_dir) / \"credentials.env\"\n", "if os.path.exists(dotenv_path):\n", " load_dotenv(dotenv_path=dotenv_path, override=True)" @@ -57,7 +59,9 @@ "print(\"Initializing Dev tables\")\n", "\n", "sqlstring = \"trino://{user}@{host}:{port}/\".format(\n", - " user=os.environ[\"TRINO_USER_USER1\"], host=os.environ[\"TRINO_HOST\"], port=os.environ[\"TRINO_PORT\"]\n", + " user=os.environ[\"TRINO_USER_USER1\"],\n", + " host=os.environ[\"TRINO_HOST\"],\n", + " port=os.environ[\"TRINO_PORT\"],\n", ")\n", "\n", "ingest_catalog = \"osc_datacommons_dev\"\n", @@ -161,7 +165,11 @@ "source": [ "# loading excel doesn't work from 'get_object' mode\n", "\n", - "s3.download_file(os.environ[\"DEMO1_S3_BUCKET\"], \"itr-demo-data/ITR_company_data_minimum_required.xlsx\", \"/tmp/t.xlsx\")\n", + "s3.download_file(\n", + " os.environ[\"DEMO1_S3_BUCKET\"],\n", + " \"itr-demo-data/ITR_company_data_minimum_required.xlsx\",\n", + " \"/tmp/t.xlsx\",\n", + ")\n", "\n", "# load the raw file from the bucket\n", "xls = pd.ExcelFile(\"/tmp/t.xlsx\")\n", @@ -187,7 +195,7 @@ "tablename = f\"{itr_prefix}fundamental_data\"\n", "\n", "# get this sheet, and assess data types\n", - "df = pd.read_excel(xls, f\"fundamental_data\").convert_dtypes()\n", + "df = pd.read_excel(xls, \"fundamental_data\").convert_dtypes()\n", "\n", "# rename columns to forms that sql will handle\n", "osc.enforce_sql_column_names(df, inplace=True)\n", @@ -267,7 +275,11 @@ "# in live data platform there will need to be policies and mechanisms for either\n", "# appending new data, or overwriting old data, or saving off conditioned by a versioning number\n", "# this is a data governance topic\n", - "osc._do_sql(f\"drop table if exists {ingest_catalog}.{ingest_schema}.{tablename}\", engine, verbose=True)" + "osc._do_sql(\n", + " f\"drop table if exists {ingest_catalog}.{ingest_schema}.{tablename}\",\n", + " engine,\n", + " verbose=True,\n", + ")" ] }, { @@ -322,7 +334,11 @@ ], "source": [ "# test that we can get data\n", - "qres = osc._do_sql(f\"select country from {ingest_catalog}.{ingest_schema}.{tablename}\", engine, verbose=True)" + "qres = osc._do_sql(\n", + " f\"select country from {ingest_catalog}.{ingest_schema}.{tablename}\",\n", + " engine,\n", + " verbose=True,\n", + ")" ] }, { @@ -439,7 +455,11 @@ "# in live data platform there will need to be policies and mechanisms for either\n", "# appending new data, or overwriting old data, or saving off conditioned by a versioning number\n", "# this is a data governance topic\n", - "osc._do_sql(f\"drop table if exists {ingest_catalog}.{ingest_schema}.{tablename}\", engine, verbose=True)" + "osc._do_sql(\n", + " f\"drop table if exists {ingest_catalog}.{ingest_schema}.{tablename}\",\n", + " engine,\n", + " verbose=True,\n", + ")" ] }, { @@ -494,7 +514,11 @@ ], "source": [ "# test that we can get data\n", - "qres = osc._do_sql(f\"select y2020 from {ingest_catalog}.{ingest_schema}.{tablename}\", engine, verbose=True)" + "qres = osc._do_sql(\n", + " f\"select y2020 from {ingest_catalog}.{ingest_schema}.{tablename}\",\n", + " engine,\n", + " verbose=True,\n", + ")" ] }, { @@ -543,7 +567,11 @@ "# in live data platform there will need to be policies and mechanisms for either\n", "# appending new data, or overwriting old data, or saving off conditioned by a versioning number\n", "# this is a data governance topic\n", - "osc._do_sql(f\"drop table if exists {ingest_catalog}.{ingest_schema}.{tablename}\", engine, verbose=True)" + "osc._do_sql(\n", + " f\"drop table if exists {ingest_catalog}.{ingest_schema}.{tablename}\",\n", + " engine,\n", + " verbose=True,\n", + ")" ] }, { @@ -627,7 +655,11 @@ "# in live data platform there will need to be policies and mechanisms for either\n", "# appending new data, or overwriting old data, or saving off conditioned by a versioning number\n", "# this is a data governance topic\n", - "osc._do_sql(f\"drop table if exists {ingest_catalog}.{ingest_schema}.{tablename}\", engine, verbose=True)" + "osc._do_sql(\n", + " f\"drop table if exists {ingest_catalog}.{ingest_schema}.{tablename}\",\n", + " engine,\n", + " verbose=True,\n", + ")" ] }, { diff --git a/notebooks/01b_vault_ingest_v2.ipynb b/notebooks/01b_vault_ingest_v2.ipynb index 3dcf6c3..b15dff7 100644 --- a/notebooks/01b_vault_ingest_v2.ipynb +++ b/notebooks/01b_vault_ingest_v2.ipynb @@ -15,16 +15,16 @@ "import pandas as pd\n", "import numpy as np\n", "import osc_ingest_trino as osc\n", - "import trino\n", "\n", - "import ITR\n", "from ITR import data_dir as json_data_dir\n", "from ITR_examples import data_dir as xlsx_data_dir\n", "\n", - "from ITR.configs import ITR_median, ITR_mean, LoggingConfig\n", - "from ITR.data.base_providers import BaseProviderProductionBenchmark, BaseProviderIntensityBenchmark\n", + "from ITR.data.base_providers import (\n", + " BaseProviderProductionBenchmark,\n", + " BaseProviderIntensityBenchmark,\n", + ")\n", "from ITR.data.data_warehouse import DataWarehouse\n", - "from ITR.data.osc_units import ureg, Q_, PA_, asPintSeries\n", + "from ITR.data.osc_units import Q_\n", "from ITR.data.template import TemplateProviderCompany\n", "from ITR.data.vault_providers import (\n", " VaultCompanyDataProvider,\n", @@ -32,14 +32,10 @@ ")\n", "from ITR.interfaces import (\n", " EScope,\n", - " ETimeFrames,\n", - " EScoreResultType,\n", " IEIBenchmarkScopes,\n", " IProductionBenchmarkScopes,\n", " ProjectionControls,\n", ")\n", - "from ITR.portfolio_aggregation import PortfolioAggregationMethod\n", - "from ITR.temperature_score import TemperatureScore\n", "\n", "# isort: split\n", "\n", @@ -71,7 +67,9 @@ "logger = logging.getLogger(__name__)\n", "logger.setLevel(logging.INFO)\n", "\n", - "formatter = logging.Formatter(\"%(asctime)s - %(name)s - %(levelname)s - %(message)s\") # LoggingConfig.FORMAT\n", + "formatter = logging.Formatter(\n", + " \"%(asctime)s - %(name)s - %(levelname)s - %(message)s\"\n", + ") # LoggingConfig.FORMAT\n", "stream_handler = logging.StreamHandler()\n", "stream_handler.setFormatter(formatter)\n", "logger.addHandler(stream_handler)\n", @@ -97,8 +95,12 @@ "benchmark_EI_TPI_15_file = \"benchmark_EI_TPI_1_5_degrees.json\"\n", "benchmark_EI_TPI_file = \"benchmark_EI_TPI_2_degrees.json\"\n", "benchmark_EI_TPI_below_2_file = \"benchmark_EI_TPI_below_2_degrees.json\"\n", - "benchmark_EI_TPI_2deg_high_efficiency_file = \"benchmark_EI_TPI_2_degrees_high_efficiency.json\"\n", - "benchmark_EI_TPI_2deg_shift_improve_file = \"benchmark_EI_TPI_2_degrees_shift_improve.json\"" + "benchmark_EI_TPI_2deg_high_efficiency_file = (\n", + " \"benchmark_EI_TPI_2_degrees_high_efficiency.json\"\n", + ")\n", + "benchmark_EI_TPI_2deg_shift_improve_file = (\n", + " \"benchmark_EI_TPI_2_degrees_shift_improve.json\"\n", + ")" ] }, { @@ -115,7 +117,9 @@ " if col.startswith(\"investment_value\"):\n", " if match := re.match(r\".*\\[([A-Z]{3})\\]\", col, re.I):\n", " df_portfolio.rename(columns={col: \"investment_value\"}, inplace=True)\n", - " df_portfolio[\"investment_value\"] = df_portfolio[\"investment_value\"].astype(f\"pint[{match.group(1)}]\")" + " df_portfolio[\"investment_value\"] = df_portfolio[\"investment_value\"].astype(\n", + " f\"pint[{match.group(1)}]\"\n", + " )" ] }, { @@ -137,7 +141,9 @@ " benchmark_file = benchmark_EI_TPI_15_file\n", "elif eibm == \"OECM\":\n", " benchmark_file = benchmark_EI_OECM_file\n", - " logger.info(\"OECM benchmark is for backward compatibility only. Use OECM_PC instead.\")\n", + " logger.info(\n", + " \"OECM benchmark is for backward compatibility only. Use OECM_PC instead.\"\n", + " )\n", "else:\n", " benchmark_file = benchmark_EI_TPI_below_2_file\n", "# load intensity benchmarks\n", @@ -160,8 +166,12 @@ " if scope_name not in parsed_json:\n", " parsed_json[scope_name] = extra_json[scope_name]\n", " else:\n", - " parsed_json[scope_name][\"benchmarks\"] += extra_json[scope_name][\"benchmarks\"]\n", - "EI_bm = BaseProviderIntensityBenchmark(EI_benchmarks=IEIBenchmarkScopes.model_validate(parsed_json))" + " parsed_json[scope_name][\"benchmarks\"] += extra_json[scope_name][\n", + " \"benchmarks\"\n", + " ]\n", + "EI_bm = BaseProviderIntensityBenchmark(\n", + " EI_benchmarks=IEIBenchmarkScopes.model_validate(parsed_json)\n", + ")" ] }, { @@ -282,7 +292,9 @@ "# We fill a conventional DataWarehouse with template data\n", "# After the ingestion process is complete, downstream users can access DataVaultWarehouse\n", "\n", - "template_company_data = TemplateProviderCompany(company_data_path, projection_controls=ProjectionControls())\n", + "template_company_data = TemplateProviderCompany(\n", + " company_data_path, projection_controls=ProjectionControls()\n", + ")\n", "Warehouse = DataWarehouse(\n", " template_company_data,\n", " benchmark_projected_production=base_production_bm,\n", @@ -330,7 +342,9 @@ ], "source": [ "# This will have identity of main notebook user, not OS-Climate-User1\n", - "ingest_engine = osc.attach_trino_engine(verbose=True, catalog=ingest_catalog, schema=ingest_schema)" + "ingest_engine = osc.attach_trino_engine(\n", + " verbose=True, catalog=ingest_catalog, schema=ingest_schema\n", + ")" ] }, { @@ -478,7 +492,9 @@ "\n", "for col in [\"cash\", \"debt\"]:\n", " if col not in template_company_data.df_fundamentals.columns:\n", - " template_company_data.df_fundamentals[col] = np.nan * template_company_data.df_fundamentals[\"company_revenue\"]\n", + " template_company_data.df_fundamentals[col] = (\n", + " np.nan * template_company_data.df_fundamentals[\"company_revenue\"]\n", + " )\n", "\n", "df = template_company_data.df_fundamentals[fundamental_cols].copy()\n", "\n", @@ -498,7 +514,14 @@ "\n", "# ingest company data\n", "create_table_from_df(\n", - " df, ingest_schema, company_tablename, ingest_engine, hive_bucket, hive_catalog, hive_schema, verbose=True\n", + " df,\n", + " ingest_schema,\n", + " company_tablename,\n", + " ingest_engine,\n", + " hive_bucket,\n", + " hive_catalog,\n", + " hive_schema,\n", + " verbose=True,\n", ")" ] }, @@ -509,21 +532,24 @@ "metadata": {}, "outputs": [], "source": [ - "company_info_at_base_year = template_company_data.get_company_intensity_and_production_at_base_year(\n", - " [\n", - " company_id\n", - " for company_id in template_company_data.df_fundamentals.company_id.values\n", - " if company_id != \"US6745991058-chem\"\n", - " ]\n", + "company_info_at_base_year = (\n", + " template_company_data.get_company_intensity_and_production_at_base_year(\n", + " [\n", + " company_id\n", + " for company_id in template_company_data.df_fundamentals.company_id.values\n", + " if company_id != \"US6745991058-chem\"\n", + " ]\n", + " )\n", ")\n", "\n", - "import warnings\n", "\n", "with warnings.catch_warnings():\n", " warnings.simplefilter(\"ignore\")\n", " # See https://github.com/hgrecco/pint-pandas/issues/128\n", - " projected_production = Warehouse.benchmark_projected_production.get_company_projected_production(\n", - " company_info_at_base_year\n", + " projected_production = (\n", + " Warehouse.benchmark_projected_production.get_company_projected_production(\n", + " company_info_at_base_year\n", + " )\n", " )" ] }, @@ -666,7 +692,8 @@ "df.columns.set_names(\"year\", inplace=True)\n", "df2 = df.unstack(level=0).to_frame(\"production_by_year\").reset_index(\"year\")\n", "df3 = pd.read_sql(\n", - " f\"select distinct company_id, company_name, company_lei, sector from {company_data._company_table}\", ingest_engine\n", + " f\"select distinct company_id, company_name, company_lei, sector from {company_data._company_table}\",\n", + " ingest_engine,\n", ")\n", "df4 = df2.merge(df3, on=\"company_id\").reset_index()\n", "production_df = df4[\n", @@ -948,10 +975,17 @@ " ei_dict[scope] = getattr(company, projection)[scope].projections\n", " else:\n", " ei_dict[scope] = pd.Series(dtype=\"object\")\n", - " ei_data = pd.concat([ei_dict[scope] for scope in EScope.get_scopes()], axis=1).reset_index()\n", - " ei_data.columns = [\"year\"] + [f\"ei_{scope.lower()}_by_year\" for scope in EScope.get_scopes()]\n", + " ei_data = pd.concat(\n", + " [ei_dict[scope] for scope in EScope.get_scopes()], axis=1\n", + " ).reset_index()\n", + " ei_data.columns = [\"year\"] + [\n", + " f\"ei_{scope.lower()}_by_year\" for scope in EScope.get_scopes()\n", + " ]\n", " df = pd.DataFrame(\n", - " data=[[company.company_name, \"\", company.company_id, company.sector] for i in ei_data.index],\n", + " data=[\n", + " [company.company_name, \"\", company.company_id, company.sector]\n", + " for i in ei_data.index\n", + " ],\n", " columns=[\"company_name\", \"company_lei\", \"company_id\", \"sector\"],\n", " )\n", " projection_dfs.append(pd.concat([df, ei_data], axis=1))\n", @@ -959,7 +993,14 @@ " if projection_tablename[i] == target_tablename:\n", " target_df = df2\n", " create_table_from_df(\n", - " df2, ingest_schema, projection_tablename[i], ingest_engine, hive_bucket, hive_catalog, hive_schema, verbose=True\n", + " df2,\n", + " ingest_schema,\n", + " projection_tablename[i],\n", + " ingest_engine,\n", + " hive_bucket,\n", + " hive_catalog,\n", + " hive_schema,\n", + " verbose=True,\n", " )" ] }, @@ -992,7 +1033,9 @@ ], "source": [ "osc._do_sql(f\"select count (*) from {target_tablename}\", ingest_engine, verbose=True)\n", - "osc._do_sql(f\"select count (*) from {trajectory_tablename}\", ingest_engine, verbose=True)" + "osc._do_sql(\n", + " f\"select count (*) from {trajectory_tablename}\", ingest_engine, verbose=True\n", + ")" ] }, { @@ -1112,21 +1155,31 @@ "\n", "# Create emissions_data table using production_df and math\n", "emissions_df = production_df.merge(\n", - " target_df.drop(columns=[\"company_name\", \"company_lei\", \"sector\"]), on=[\"company_id\", \"year\"]\n", + " target_df.drop(columns=[\"company_name\", \"company_lei\", \"sector\"]),\n", + " on=[\"company_id\", \"year\"],\n", ")\n", "emissions_df = emissions_df[\n", " ~emissions_df.company_id.isin(\n", - " [\"DE000SYM9999\", \"NO0010657505\", \"GB0000961622\", \"DE000BASF111\", \"IE00BZ12WP82\", \"FR0004024222\"]\n", + " [\n", + " \"DE000SYM9999\",\n", + " \"NO0010657505\",\n", + " \"GB0000961622\",\n", + " \"DE000BASF111\",\n", + " \"IE00BZ12WP82\",\n", + " \"FR0004024222\",\n", + " ]\n", " )\n", "]\n", "print(emissions_df.index.names)\n", "for scope in EScope.get_scopes():\n", " mask = emissions_df[f\"ei_{scope.lower()}_by_year\"].isna()\n", - " emissions_df.loc[mask, f\"ei_{scope.lower()}_by_year\"] = emissions_df[\"production_by_year\"].map(\n", - " lambda x: Q_(np.nan, f\"Mt CO2 / ({str(x.u)})\")\n", - " )\n", + " emissions_df.loc[mask, f\"ei_{scope.lower()}_by_year\"] = emissions_df[\n", + " \"production_by_year\"\n", + " ].map(lambda x: Q_(np.nan, f\"Mt CO2 / ({str(x.u)})\"))\n", " emissions_df[f\"co2_{scope.lower()}_by_year\"] = (\n", - " emissions_df[\"production_by_year\"].mul(emissions_df[f\"ei_{scope.lower()}_by_year\"]).astype(\"pint[Mt CO2e]\")\n", + " emissions_df[\"production_by_year\"]\n", + " .mul(emissions_df[f\"ei_{scope.lower()}_by_year\"])\n", + " .astype(\"pint[Mt CO2e]\")\n", " )\n", " emissions_df = emissions_df.drop(columns=f\"ei_{scope.lower()}_by_year\")\n", "emissions_df = emissions_df.drop(columns=\"production_by_year\")\n", diff --git a/notebooks/02a_vault_dev.ipynb b/notebooks/02a_vault_dev.ipynb index e6af8bc..8634c54 100644 --- a/notebooks/02a_vault_dev.ipynb +++ b/notebooks/02a_vault_dev.ipynb @@ -59,9 +59,6 @@ "\n", "import json\n", "import pandas as pd\n", - "import numpy as np\n", - "from numpy.testing import assert_array_equal\n", - "import ITR\n", "\n", "# from ITR.portfolio_aggregation import PortfolioAggregationMethod\n", "# from ITR.temperature_score import TemperatureScore\n", @@ -77,7 +74,7 @@ "\n", "# from ITR.interfaces import ICompanyData, EScope, ETimeFrames, PortfolioCompany, IEIBenchmarkScopes, \\\n", "# IProductionBenchmarkScopes\n", - "from ITR.interfaces import EScope, IProductionBenchmarkScopes, IEIBenchmarkScopes\n", + "from ITR.interfaces import IProductionBenchmarkScopes, IEIBenchmarkScopes\n", "\n", "from ITR.data.osc_units import ureg" ] @@ -92,7 +89,9 @@ "# Load some standard environment variables from a dot-env file, if it exists.\n", "# If no such file can be found, does not fail, and so allows these environment vars to\n", "# be populated in some other way\n", - "dotenv_dir = os.environ.get(\"CREDENTIAL_DOTENV_DIR\", os.environ.get(\"PWD\", \"/opt/app-root/src\"))\n", + "dotenv_dir = os.environ.get(\n", + " \"CREDENTIAL_DOTENV_DIR\", os.environ.get(\"PWD\", \"/opt/app-root/src\")\n", + ")\n", "dotenv_path = pathlib.Path(dotenv_dir) / \"credentials.env\"\n", "if os.path.exists(dotenv_path):\n", " load_dotenv(dotenv_path=dotenv_path, override=True)" @@ -145,9 +144,20 @@ "qres = osc._do_sql(f\"show tables in {ingest_schema}\", engine_dev, verbose=True)\n", "\n", "# Check that we have the tables we need\n", - "required_tables = [\"company_data\", \"target_data\", \"trajectory_data\", \"emissions_data\", \"benchmark_prod\", \"benchmark_ei\"]\n", + "required_tables = [\n", + " \"company_data\",\n", + " \"target_data\",\n", + " \"trajectory_data\",\n", + " \"emissions_data\",\n", + " \"benchmark_prod\",\n", + " \"benchmark_ei\",\n", + "]\n", "existing_tables = [q[0] for q in qres]\n", - "missing_tables = [rtable for rtable in required_tables if f\"{itr_prefix}{rtable}\" not in existing_tables]\n", + "missing_tables = [\n", + " rtable\n", + " for rtable in required_tables\n", + " if f\"{itr_prefix}{rtable}\" not in existing_tables\n", + "]\n", "if missing_tables:\n", " print(f\"Missing tables (itr_prefix = {itr_prefix}): {missing_tables}\")\n", " assert False" @@ -254,7 +264,9 @@ ], "source": [ "root = root = os.path.dirname(os.getcwd() + \"/../test/\")\n", - "benchmark_prod_json = os.path.join(root, \"inputs\", \"json\", \"benchmark_production_OECM.json\")\n", + "benchmark_prod_json = os.path.join(\n", + " root, \"inputs\", \"json\", \"benchmark_production_OECM.json\"\n", + ")\n", "benchmark_EI_json = os.path.join(root, \"inputs\", \"json\", \"benchmark_EI_OECM_S3.json\")\n", "\n", "# load production benchmarks\n", @@ -262,7 +274,9 @@ " parsed_json = json.load(json_file)\n", "prod_bms = IProductionBenchmarkScopes.parse_obj(parsed_json)\n", "vault_production_bm = VaultProviderProductionBenchmark(\n", - " engine=engine_dev, benchmark_name=f\"{itr_prefix}benchmark_prod\", production_benchmarks=prod_bms\n", + " engine=engine_dev,\n", + " benchmark_name=f\"{itr_prefix}benchmark_prod\",\n", + " production_benchmarks=prod_bms,\n", ")\n", "\n", "# load intensity benchmarks\n", @@ -275,10 +289,16 @@ "\n", "# load company data\n", "# TODO: Pandas reads null data mixed with integers as float64 (np.nan). This can be fixed post hoc with astype('Int16')\n", - "vault_company_data = VaultCompanyDataProvider(engine=engine_dev, company_table=f\"{itr_prefix}company_data\")\n", + "vault_company_data = VaultCompanyDataProvider(\n", + " engine=engine_dev, company_table=f\"{itr_prefix}company_data\"\n", + ")\n", "\n", "vault_warehouse = DataVaultWarehouse(\n", - " engine_dev, vault_company_data, vault_production_bm, vault_EI_bm, itr_prefix=itr_prefix\n", + " engine_dev,\n", + " vault_company_data,\n", + " vault_production_bm,\n", + " vault_EI_bm,\n", + " itr_prefix=itr_prefix,\n", ")" ] }, @@ -797,7 +817,10 @@ } ], "source": [ - "df = requantify_df(sql_df.dropna(), typemap={\"co2_s1_by_year\": \"pint[t CO2]\", \"co2_s2_by_year\": \"pint[t CO2]\"})\n", + "df = requantify_df(\n", + " sql_df.dropna(),\n", + " typemap={\"co2_s1_by_year\": \"pint[t CO2]\", \"co2_s2_by_year\": \"pint[t CO2]\"},\n", + ")\n", "df" ] }, @@ -872,10 +895,14 @@ "source": [ "%matplotlib inline\n", "ureg.setup_matplotlib()\n", - "plottable_df = df.pivot(index=\"year\", columns=\"company_name\", values=\"co2_s1_by_year\").reset_index()\n", + "plottable_df = df.pivot(\n", + " index=\"year\", columns=\"company_name\", values=\"co2_s1_by_year\"\n", + ").reset_index()\n", "\n", "# Must plot the first few columns, but then plot 1/3rd of the companies so as not to over-clutter the graph\n", - "plottable_df.iloc[:, [x for x in list(range(0, 3)) + list(range(3, 37))]].plot(x=\"year\", kind=\"line\", figsize=(24, 10))" + "plottable_df.iloc[:, [x for x in list(range(0, 3)) + list(range(3, 37))]].plot(\n", + " x=\"year\", kind=\"line\", figsize=(24, 10)\n", + ")" ] }, { diff --git a/notebooks/02b_vault_dev.ipynb b/notebooks/02b_vault_dev.ipynb index 4cc7fd2..285a55a 100644 --- a/notebooks/02b_vault_dev.ipynb +++ b/notebooks/02b_vault_dev.ipynb @@ -37,15 +37,10 @@ "outputs": [], "source": [ "import os\n", - "import pathlib\n", "import osc_ingest_trino as osc\n", - "import trino\n", - "from sqlalchemy.engine import create_engine\n", "\n", "import json\n", "import pandas as pd\n", - "import numpy as np\n", - "import ITR\n", "\n", "from ITR.data.osc_units import ureg\n", "from ITR.data.vault_providers import (\n", @@ -58,7 +53,7 @@ "\n", "# from ITR.interfaces import ICompanyData, EScope, ETimeFrames, PortfolioCompany, IEIBenchmarkScopes, \\\n", "# IProductionBenchmarkScopes\n", - "from ITR.interfaces import EScope, IProductionBenchmarkScopes, IEIBenchmarkScopes\n", + "from ITR.interfaces import IProductionBenchmarkScopes, IEIBenchmarkScopes\n", "\n", "ingest_catalog = \"osc_datacommons_dev\"\n", "ingest_schema = \"demo_dv\"\n", @@ -128,13 +123,19 @@ } ], "source": [ - "engine_dev = osc.attach_trino_engine(verbose=True, catalog=ingest_catalog, schema=ingest_schema)\n", + "engine_dev = osc.attach_trino_engine(\n", + " verbose=True, catalog=ingest_catalog, schema=ingest_schema\n", + ")\n", "qres = osc._do_sql(f\"show tables in {ingest_schema}\", engine_dev, verbose=True)\n", "\n", "# Check that we have the tables we need\n", "required_tables = [\"company_data\", \"target_data\", \"trajectory_data\", \"emissions_data\"]\n", "existing_tables = [q[0] for q in qres]\n", - "missing_tables = [rtable for rtable in required_tables if f\"{itr_prefix}{rtable}\" not in existing_tables]\n", + "missing_tables = [\n", + " rtable\n", + " for rtable in required_tables\n", + " if f\"{itr_prefix}{rtable}\" not in existing_tables\n", + "]\n", "if missing_tables:\n", " print(f\"Missing tables (itr_prefix = {itr_prefix}): {missing_tables}\")\n", " assert False" @@ -315,10 +316,16 @@ "\n", "# load company data\n", "# TODO: Pandas reads null data mixed with integers as float64 (np.nan). This can be fixed post hoc with astype('Int16')\n", - "vault_company_data = VaultCompanyDataProvider(engine=engine_dev, company_table=f\"{itr_prefix}company_data\")\n", + "vault_company_data = VaultCompanyDataProvider(\n", + " engine=engine_dev, company_table=f\"{itr_prefix}company_data\"\n", + ")\n", "\n", "vault_warehouse = DataVaultWarehouse(\n", - " engine_dev, vault_company_data, vault_production_bm, vault_EI_bm, itr_prefix=itr_prefix\n", + " engine_dev,\n", + " vault_company_data,\n", + " vault_production_bm,\n", + " vault_EI_bm,\n", + " itr_prefix=itr_prefix,\n", ")" ] }, @@ -693,9 +700,17 @@ "outputs": [], "source": [ "df = requantify_df(\n", - " sql_df.dropna(), typemap={\"co2_s1_by_year\": \"Mt CO2\", \"co2_s2_by_year\": \"Mt CO2\", \"co2_s3_by_year\": \"Mt CO2\"}\n", + " sql_df.dropna(),\n", + " typemap={\n", + " \"co2_s1_by_year\": \"Mt CO2\",\n", + " \"co2_s2_by_year\": \"Mt CO2\",\n", + " \"co2_s3_by_year\": \"Mt CO2\",\n", + " },\n", ").convert_dtypes()\n", - "df = df[df.company_id.ne(\"US6362744095+Gas Utilities\") & df.company_id.ne(\"US0236081024+Gas Utilities\")]\n", + "df = df[\n", + " df.company_id.ne(\"US6362744095+Gas Utilities\")\n", + " & df.company_id.ne(\"US0236081024+Gas Utilities\")\n", + "]\n", "df = df[df.co2_s1_by_year.gt(ureg(\"10.0 Mt CO2e\"))]" ] }, @@ -1009,7 +1024,9 @@ ], "source": [ "ureg.setup_matplotlib(True)\n", - "plottable_df = df.pivot(index=\"year\", columns=\"company_name\", values=\"co2_s1_by_year\").reset_index()\n", + "plottable_df = df.pivot(\n", + " index=\"year\", columns=\"company_name\", values=\"co2_s1_by_year\"\n", + ").reset_index()\n", "# Must plot the first few columns, but then plot 1/3rd of the companies so as not to over-clutter the graph\n", "plottable_df.plot(x=\"year\", kind=\"line\", figsize=(24, 10))" ] diff --git a/notebooks/03_vault_quant.ipynb b/notebooks/03_vault_quant.ipynb index 20c145b..86cbf16 100644 --- a/notebooks/03_vault_quant.ipynb +++ b/notebooks/03_vault_quant.ipynb @@ -35,16 +35,11 @@ "outputs": [], "source": [ "import os\n", - "import pathlib\n", "import osc_ingest_trino as osc\n", "import re\n", - "import trino\n", - "from sqlalchemy.engine import create_engine\n", "\n", - "import json\n", "import numpy as np\n", "import pandas as pd\n", - "import ITR\n", "from ITR_examples import data_dir as xlsx_data_dir\n", "\n", "# from ITR.portfolio_aggregation import PortfolioAggregationMethod\n", @@ -53,16 +48,14 @@ "# from ITR.data.data_warehouse import DataWarehouse\n", "from ITR.data.vault_providers import (\n", " VaultCompanyDataProvider,\n", - " VaultProviderProductionBenchmark,\n", - " VaultProviderIntensityBenchmark,\n", " DataVaultWarehouse,\n", " requantify_df,\n", ")\n", "\n", "# from ITR.interfaces import ICompanyData, EScope, ETimeFrames, PortfolioCompany, IEIBenchmarkScopes, \\\n", "# IProductionBenchmarkScopes\n", - "from ITR.interfaces import EScope, IProductionBenchmarkScopes, IEIBenchmarkScopes\n", - "from ITR.data.osc_units import ureg, Q_, PA_\n", + "from ITR.interfaces import EScope\n", + "from ITR.data.osc_units import ureg\n", "\n", "ingest_catalog = \"osc_datacommons_dev\"\n", "ingest_schema = \"demo_dv\"\n", @@ -151,7 +144,9 @@ } ], "source": [ - "engine_quant = osc.attach_trino_engine(verbose=True, catalog=ingest_catalog, schema=ingest_schema)\n", + "engine_quant = osc.attach_trino_engine(\n", + " verbose=True, catalog=ingest_catalog, schema=ingest_schema\n", + ")\n", "print(\"connecting with engine \" + str(engine_quant))\n", "osc._do_sql(f\"show tables in {ingest_schema}\", engine_quant, verbose=True)" ] @@ -259,7 +254,10 @@ ")\n", "\n", "vault_warehouse.quant_init(\n", - " engine_quant, company_data=vault_company_data, ingest_schema=\"demo_dv\", itr_prefix=itr_prefix\n", + " engine_quant,\n", + " company_data=vault_company_data,\n", + " ingest_schema=\"demo_dv\",\n", + " itr_prefix=itr_prefix,\n", ")" ] }, @@ -306,9 +304,9 @@ } ], "source": [ - "sql_temp_score_df = pd.read_sql_table(f\"{itr_prefix}temperature_scores\", engine_quant).replace(\n", - " [np.inf, -np.inf], np.nan\n", - ")" + "sql_temp_score_df = pd.read_sql_table(\n", + " f\"{itr_prefix}temperature_scores\", engine_quant\n", + ").replace([np.inf, -np.inf], np.nan)" ] }, { @@ -500,7 +498,11 @@ ], "source": [ "temp_score_df = requantify_df(\n", - " sql_temp_score_df, typemap={\"trajectory_temperature_score\": \"delta_degC\", \"target_temperature_score\": \"delta_degC\"}\n", + " sql_temp_score_df,\n", + " typemap={\n", + " \"trajectory_temperature_score\": \"delta_degC\",\n", + " \"target_temperature_score\": \"delta_degC\",\n", + " },\n", ")\n", "temp_score_df = temp_score_df[\n", " ~temp_score_df.company_id.isin(\n", @@ -526,7 +528,12 @@ "plottable_df = (\n", " temp_score_df.loc[\n", " temp_score_df.year == 2050,\n", - " [\"company_name\", \"scope\", \"trajectory_temperature_score\", \"target_temperature_score\"],\n", + " [\n", + " \"company_name\",\n", + " \"scope\",\n", + " \"trajectory_temperature_score\",\n", + " \"target_temperature_score\",\n", + " ],\n", " ]\n", " .sort_values(by=\"company_name\")\n", " .set_index([\"company_name\", \"scope\"])\n", @@ -541,7 +548,8 @@ "outputs": [], "source": [ "plottable_df[\"average_score\"] = (\n", - " plottable_df[\"trajectory_temperature_score\"] + plottable_df[\"target_temperature_score\"]\n", + " plottable_df[\"trajectory_temperature_score\"]\n", + " + plottable_df[\"target_temperature_score\"]\n", ") / 2.0" ] }, @@ -730,13 +738,17 @@ "outputs": [], "source": [ "# loading sample portfolio\n", - "df_portfolio = pd.read_excel(company_data_path, sheet_name=\"Portfolio\", index_col=\"company_id\")\n", + "df_portfolio = pd.read_excel(\n", + " company_data_path, sheet_name=\"Portfolio\", index_col=\"company_id\"\n", + ")\n", "\n", "for i, col in enumerate(df_portfolio.columns):\n", " if col.startswith(\"investment_value\"):\n", " if match := re.match(r\".*\\[([A-Z]{3})\\]\", col, re.I):\n", " df_portfolio.rename(columns={col: \"investment_value\"}, inplace=True)\n", - " df_portfolio[\"investment_value\"] = df_portfolio[\"investment_value\"].astype(f\"pint[{match.group(1)}]\")\n", + " df_portfolio[\"investment_value\"] = df_portfolio[\"investment_value\"].astype(\n", + " f\"pint[{match.group(1)}]\"\n", + " )\n", "df_portfolio = df_portfolio.assign(scope=\"S1S2\").set_index(\"scope\", append=True)" ] }, @@ -816,7 +828,9 @@ "metadata": {}, "outputs": [], "source": [ - "portfolio_df[\"WATS_weight\"] = portfolio_df[\"pa_score\"] * (portfolio_df[\"investment_value\"] / weight_for_WATS)\n", + "portfolio_df[\"WATS_weight\"] = portfolio_df[\"pa_score\"] * (\n", + " portfolio_df[\"investment_value\"] / weight_for_WATS\n", + ")\n", "portfolio_df.head()" ] }, @@ -827,7 +841,9 @@ "metadata": {}, "outputs": [], "source": [ - "print(f\"Portfolio temperature score based on WATS = {portfolio_df['WATS_weight'].sum()}\")" + "print(\n", + " f\"Portfolio temperature score based on WATS = {portfolio_df['WATS_weight'].sum()}\"\n", + ")" ] }, { @@ -860,7 +876,9 @@ "metadata": {}, "outputs": [], "source": [ - "print(f\"Portfolio temperature score based on TETS = {portfolio_df['TETS_weight'].sum()}\")" + "print(\n", + " f\"Portfolio temperature score based on TETS = {portfolio_df['TETS_weight'].sum()}\"\n", + ")" ] }, { @@ -907,7 +925,9 @@ " portfolio_df[weight_column] = vault_company_data.compute_portfolio_weights(\n", " portfolio_df[\"pa_score\"], 2019, v, EScope.S1S2\n", " )\n", - " print(f\"Portfolio temperature score based on {k} = {portfolio_df[weight_column].sum()}\")\n", + " print(\n", + " f\"Portfolio temperature score based on {k} = {portfolio_df[weight_column].sum()}\"\n", + " )\n", "\n", "portfolio_df" ] @@ -937,7 +957,11 @@ "metadata": {}, "outputs": [], "source": [ - "osc._do_sql(f\"select * from {ingest_schema}.{itr_prefix}company_data\", engine_quant, verbose=False)" + "osc._do_sql(\n", + " f\"select * from {ingest_schema}.{itr_prefix}company_data\",\n", + " engine_quant,\n", + " verbose=False,\n", + ")" ] }, { diff --git a/notebooks/04_vault_user.ipynb b/notebooks/04_vault_user.ipynb index b4eacc7..5b2ace6 100644 --- a/notebooks/04_vault_user.ipynb +++ b/notebooks/04_vault_user.ipynb @@ -44,7 +44,9 @@ "# Load some standard environment variables from a dot-env file, if it exists.\n", "# If no such file can be found, does not fail, and so allows these environment vars to\n", "# be populated in some other way\n", - "dotenv_dir = os.environ.get(\"CREDENTIAL_DOTENV_DIR\", os.environ.get(\"PWD\", \"/opt/app-root/src\"))\n", + "dotenv_dir = os.environ.get(\n", + " \"CREDENTIAL_DOTENV_DIR\", os.environ.get(\"PWD\", \"/opt/app-root/src\")\n", + ")\n", "dotenv_path = pathlib.Path(dotenv_dir) / \"credentials.env\"\n", "if os.path.exists(dotenv_path):\n", " load_dotenv(dotenv_path=dotenv_path, override=True)" @@ -83,10 +85,7 @@ } ], "source": [ - "import json\n", "import pandas as pd\n", - "from numpy.testing import assert_array_equal\n", - "import ITR\n", "\n", "# from ITR.portfolio_aggregation import PortfolioAggregationMethod\n", "# from ITR.temperature_score import TemperatureScore\n", @@ -157,7 +156,9 @@ ], "source": [ "sqlstring = \"trino://{user}@{host}:{port}/\".format(\n", - " user=os.environ[\"TRINO_USER_USER3\"], host=os.environ[\"TRINO_HOST\"], port=os.environ[\"TRINO_PORT\"]\n", + " user=os.environ[\"TRINO_USER_USER3\"],\n", + " host=os.environ[\"TRINO_HOST\"],\n", + " port=os.environ[\"TRINO_PORT\"],\n", ")\n", "\n", "ingest_catalog = \"osc_datacommons_dev\"\n", @@ -400,7 +401,12 @@ } ], "source": [ - "portfolio_df = pd.read_csv(\"data/mdt-20220116-portfolio.csv\", encoding=\"iso-8859-1\", sep=\";\", index_col=\"company_id\")\n", + "portfolio_df = pd.read_csv(\n", + " \"data/mdt-20220116-portfolio.csv\",\n", + " encoding=\"iso-8859-1\",\n", + " sep=\";\",\n", + " index_col=\"company_id\",\n", + ")\n", "# portfolio_df = pd.read_csv(\"data/rmi_all.csv\", encoding=\"iso-8859-1\", sep=',', index_col='company_id')\n", "portfolio_df" ] @@ -969,7 +975,9 @@ } ], "source": [ - "portfolio_df[\"WATS_weight\"] = portfolio_df[\"pa_score\"] * (portfolio_df[\"investment_value\"] / weight_for_WATS)\n", + "portfolio_df[\"WATS_weight\"] = portfolio_df[\"pa_score\"] * (\n", + " portfolio_df[\"investment_value\"] / weight_for_WATS\n", + ")\n", "portfolio_df.head()" ] }, @@ -988,7 +996,9 @@ } ], "source": [ - "print(f\"Portfolio temperature score based on WATS = {portfolio_df['WATS_weight'].sum()}\")" + "print(\n", + " f\"Portfolio temperature score based on WATS = {portfolio_df['WATS_weight'].sum()}\"\n", + ")" ] }, { @@ -1148,7 +1158,9 @@ } ], "source": [ - "print(f\"Portfolio temperature score based on TETS = {portfolio_df['TETS_weight'].sum()}\")" + "print(\n", + " f\"Portfolio temperature score based on TETS = {portfolio_df['TETS_weight'].sum()}\"\n", + ")" ] }, { @@ -1993,7 +2005,9 @@ " portfolio_df[weight_column] = vault_company_data.compute_portfolio_weights(\n", " portfolio_df[\"pa_score\"], 2019, v, EScope.S1S2\n", " )\n", - " print(f\"Portfolio temperature score based on {k} = {portfolio_df[weight_column].sum()}\")\n", + " print(\n", + " f\"Portfolio temperature score based on {k} = {portfolio_df[weight_column].sum()}\"\n", + " )\n", "\n", "portfolio_df" ] @@ -2146,7 +2160,11 @@ } ], "source": [ - "osc._do_sql(f\"select * from {ingest_schema}.{itr_prefix}company_data\", engine_user, verbose=False)" + "osc._do_sql(\n", + " f\"select * from {ingest_schema}.{itr_prefix}company_data\",\n", + " engine_user,\n", + " verbose=False,\n", + ")" ] }, { diff --git a/notebooks/co2budget.ipynb b/notebooks/co2budget.ipynb index 8078735..507cfe4 100644 --- a/notebooks/co2budget.ipynb +++ b/notebooks/co2budget.ipynb @@ -24,12 +24,9 @@ "outputs": [], "source": [ "import os\n", - "import sys\n", "import json\n", - "import argparse\n", "import pandas as pd\n", - "import plotly.express as px\n", - "import plotly.graph_objects as go" + "import plotly.express as px" ] }, { @@ -41,8 +38,16 @@ "source": [ "import ITR\n", "from ITR import data_dir\n", - "from ITR.data.base_providers import BaseProviderProductionBenchmark, BaseProviderIntensityBenchmark\n", - "from ITR.interfaces import ETimeFrames, EScope, IProductionBenchmarkScopes, IEIBenchmarkScopes, DF_ICompanyEIProjections\n", + "from ITR.data.base_providers import (\n", + " BaseProviderProductionBenchmark,\n", + " BaseProviderIntensityBenchmark,\n", + ")\n", + "from ITR.interfaces import (\n", + " EScope,\n", + " IProductionBenchmarkScopes,\n", + " IEIBenchmarkScopes,\n", + " DF_ICompanyEIProjections,\n", + ")\n", "from ITR.data.template import TemplateProviderCompany\n", "from ITR.data.data_warehouse import DataWarehouse\n", "from ITR.data.osc_units import PA_" @@ -118,7 +123,9 @@ "metadata": {}, "outputs": [], "source": [ - "template_data_path = os.path.abspath(\"../src/ITR_examples/data/20230106 ITR V2 Sample Data.xlsx\")\n", + "template_data_path = os.path.abspath(\n", + " \"../src/ITR_examples/data/20230106 ITR V2 Sample Data.xlsx\"\n", + ")\n", "# template_data_path = \"data/20220927 ITR Tool Sample Data.xlsx\"\n", "\n", "# Remove the # and space on the next line to point the template_data_path variable at your own data\n", @@ -135,7 +142,10 @@ "outputs": [], "source": [ "template_provider = DataWarehouse(\n", - " template_company_data, production_bm, intensity_bm, estimate_missing_data=DataWarehouse.estimate_missing_s3_data\n", + " template_company_data,\n", + " production_bm,\n", + " intensity_bm,\n", + " estimate_missing_data=DataWarehouse.estimate_missing_s3_data,\n", ")\n", "\n", "# Fills in template_company_data._companies[0].projected_targets.S1S2\n", @@ -155,7 +165,12 @@ "outputs": [], "source": [ "data, idx = zip(\n", - " *[(i, (bm.sector, bm.region)) for i, bm in enumerate(production_bm._productions_benchmarks.AnyScope.benchmarks)]\n", + " *[\n", + " (i, (bm.sector, bm.region))\n", + " for i, bm in enumerate(\n", + " production_bm._productions_benchmarks.AnyScope.benchmarks\n", + " )\n", + " ]\n", ")\n", "production_bm_mapper = pd.Series(data, idx)\n", "\n", @@ -171,7 +186,9 @@ " data, idx = zip(\n", " *[\n", " (i, (bm.sector, bm.region))\n", - " for i, bm in enumerate(intensity_bm._EI_benchmarks[scope.name].benchmarks)\n", + " for i, bm in enumerate(\n", + " intensity_bm._EI_benchmarks[scope.name].benchmarks\n", + " )\n", " ]\n", " )\n", " mapper_dict[scope.name] = pd.Series(data, idx)\n", @@ -283,8 +300,12 @@ " models_dict[sector_region_idx] = f\"{model.sector} in {region}\"\n", "\n", "for k, v in models_dict.items():\n", - " sector_prod_baseline = production_bm._productions_benchmarks.AnyScope.benchmarks[k].base_year_production\n", - " print(f\"setting sector_prod_baseline (total units of output) for {v} to {sector_prod_baseline}\")" + " sector_prod_baseline = production_bm._productions_benchmarks.AnyScope.benchmarks[\n", + " k\n", + " ].base_year_production\n", + " print(\n", + " f\"setting sector_prod_baseline (total units of output) for {v} to {sector_prod_baseline}\"\n", + " )" ] }, { @@ -303,10 +324,17 @@ " elif (model.sector, \"Global\") in bm_mapper.index:\n", " ei_sector_region_idx = bm_mapper.loc[sector, \"Global\"]\n", " ei_data, ei_idx = zip(\n", - " *[(ei.value, ei.year) for ei in scoped_bm.benchmarks[ei_sector_region_idx].projections_nounits]\n", + " *[\n", + " (ei.value, ei.year)\n", + " for ei in scoped_bm.benchmarks[ei_sector_region_idx].projections_nounits\n", + " ]\n", " )\n", " sector_ei = pd.Series(\n", - " PA_(ei_data, dtype=scoped_bm.benchmarks[ei_sector_region_idx].benchmark_metric), index=ei_idx\n", + " PA_(\n", + " ei_data,\n", + " dtype=scoped_bm.benchmarks[ei_sector_region_idx].benchmark_metric,\n", + " ),\n", + " index=ei_idx,\n", " )\n", " else:\n", " sector_ei = None\n", @@ -351,7 +379,7 @@ " return model_ei.S3.projections\n", " else:\n", " # No sense trying to print `model_ei` as the __str__ method will make it an empty DataFrame (aggregation of empty Series)\n", - " raise ValueError(f\"get_ei_projections: no valid scope found\")\n", + " raise ValueError(\"get_ei_projections: no valid scope found\")\n", " return get_ei_projections_from_ICompanyEIProjections(model_ei)" ] }, @@ -373,41 +401,60 @@ " prod_data, prod_idx = zip(\n", " *[\n", " (p.value, p.year)\n", - " for p in production_bm._productions_benchmarks.AnyScope.benchmarks[sector_region_idx].projections_nounits\n", + " for p in production_bm._productions_benchmarks.AnyScope.benchmarks[\n", + " sector_region_idx\n", + " ].projections_nounits\n", " ]\n", " )\n", " sector_production = pd.Series(prod_data, prod_idx)\n", "\n", " if ei_s1_bm_mapper is not None:\n", - " sector_ei_s1 = get_ei_scope_by_sector_region(\"S1\", model.sector, region, ei_s1_bm_mapper)\n", + " sector_ei_s1 = get_ei_scope_by_sector_region(\n", + " \"S1\", model.sector, region, ei_s1_bm_mapper\n", + " )\n", " else:\n", " sector_ei_s1 = None\n", " if ei_s1s2_bm_mapper is not None:\n", - " sector_ei_s1s2 = get_ei_scope_by_sector_region(\"S1S2\", model.sector, region, ei_s1s2_bm_mapper)\n", + " sector_ei_s1s2 = get_ei_scope_by_sector_region(\n", + " \"S1S2\", model.sector, region, ei_s1s2_bm_mapper\n", + " )\n", " else:\n", " sector_ei_s1s2 = None\n", " if ei_s3_bm_mapper is not None:\n", - " sector_ei_s3 = get_ei_scope_by_sector_region(\"S3\", model.sector, region, ei_s3_bm_mapper)\n", + " sector_ei_s3 = get_ei_scope_by_sector_region(\n", + " \"S3\", model.sector, region, ei_s3_bm_mapper\n", + " )\n", " else:\n", " sector_ei_s3 = None\n", " if ei_s1s2s3_bm_mapper is not None:\n", - " sector_ei_s1s2s3 = get_ei_scope_by_sector_region(\"S1S2S3\", model.sector, region, ei_s1s2s3_bm_mapper)\n", + " sector_ei_s1s2s3 = get_ei_scope_by_sector_region(\n", + " \"S1S2S3\", model.sector, region, ei_s1s2s3_bm_mapper\n", + " )\n", " else:\n", " sector_ei_s1s2s3 = None\n", " sector_growth_partial = sector_production.add(1).cumprod()\n", - " data, idx = zip(*[(p.value.m, p.year) for p in model.historic_data.productions if p.year in [2019, 2020]])\n", + " data, idx = zip(\n", + " *[\n", + " (p.value.m, p.year)\n", + " for p in model.historic_data.productions\n", + " if p.year in [2019, 2020]\n", + " ]\n", + " )\n", " co_historic_productions = pd.Series(data, idx)\n", "\n", - " co_projected_productions = co_historic_productions[2020] * sector_growth_partial[sector_growth_partial.index > 2020]\n", - "\n", - " co_productions = pd.concat([co_historic_productions, co_projected_productions]).astype(\n", - " f\"pint[{model.production_metric}]\"\n", + " co_projected_productions = (\n", + " co_historic_productions[2020]\n", + " * sector_growth_partial[sector_growth_partial.index > 2020]\n", " )\n", "\n", + " co_productions = pd.concat(\n", + " [co_historic_productions, co_projected_productions]\n", + " ).astype(f\"pint[{model.production_metric}]\")\n", + "\n", " co_ei_trajectory = get_ei_projections(model.projected_intensities)\n", " try:\n", " co_ei_target = get_ei_projections(model.projected_targets)\n", - " except ValueError as e:\n", + " except ValueError:\n", " # print(e)\n", " print(\n", " f\"model.projected_targets is empty for company {model.company_name}; company_id = {model.company_id}; index = {i}\"\n", @@ -415,7 +462,9 @@ " continue\n", "\n", " plot_dict = {\n", - " \"Trajectory\": (co_productions * co_ei_trajectory).pint.to(\"t CO2e\").pint.m.cumsum(),\n", + " \"Trajectory\": (co_productions * co_ei_trajectory)\n", + " .pint.to(\"t CO2e\")\n", + " .pint.m.cumsum(),\n", " \"Target\": (co_productions * co_ei_target).pint.to(\"t CO2e\").pint.m.cumsum(),\n", " }\n", " if model.scope == EScope.S1:\n", @@ -444,13 +493,22 @@ " continue\n", " else:\n", " continue\n", - " plot_dict[bm_key] = (sector_growth_partial * sector_ei).mul(co_productions[2019]).pint.to(\"t CO2e\").pint.m.cumsum()\n", + " plot_dict[bm_key] = (\n", + " (sector_growth_partial * sector_ei)\n", + " .mul(co_productions[2019])\n", + " .pint.to(\"t CO2e\")\n", + " .pint.m.cumsum()\n", + " )\n", " sector_df = pd.DataFrame(plot_dict)\n", " fig = px.line(\n", " sector_df.apply(ITR.nominal_values),\n", " y=[k for k in plot_dict.keys()],\n", " title=fig_title,\n", - " labels={\"index\": \"Year\", \"value\": \"t CO2\", \"variable\": f\"{model.company_name}
{model.company_id}\"},\n", + " labels={\n", + " \"index\": \"Year\",\n", + " \"value\": \"t CO2\",\n", + " \"variable\": f\"{model.company_name}
{model.company_id}\",\n", + " },\n", " )\n", " fig.write_image(f\"{fig_title}-images/co2_bm_{i}.jpeg\")\n", " fig.show" diff --git a/notebooks/quick_temp_score_calculation.ipynb b/notebooks/quick_temp_score_calculation.ipynb index 3775800..3da1bb3 100644 --- a/notebooks/quick_temp_score_calculation.ipynb +++ b/notebooks/quick_temp_score_calculation.ipynb @@ -78,19 +78,21 @@ "import pandas as pd\n", "\n", "import ITR\n", - "from ITR.data.excel import ExcelProviderCompany, ExcelProviderProductionBenchmark, ExcelProviderIntensityBenchmark\n", + "from ITR.data.excel import (\n", + " ExcelProviderCompany,\n", + " ExcelProviderProductionBenchmark,\n", + " ExcelProviderIntensityBenchmark,\n", + ")\n", "from ITR.data.data_warehouse import DataWarehouse\n", "from ITR.portfolio_aggregation import PortfolioAggregationMethod\n", "from ITR.temperature_score import TemperatureScore\n", "from ITR.interfaces import ETimeFrames, EScope\n", - "from ITR.data.osc_units import ureg, Q_, PA_, asPintDataFrame, requantify_df_from_columns\n", + "from ITR.data.osc_units import ureg, Q_, asPintDataFrame, requantify_df_from_columns\n", "\n", "from ITR_examples.utils import (\n", " collect_company_contributions,\n", " plot_grouped_statistics,\n", - " anonymize,\n", " plot_grouped_heatmap,\n", - " print_grouped_scores,\n", " get_contributions_per_group,\n", ")" ] @@ -220,7 +222,9 @@ "metadata": {}, "outputs": [], "source": [ - "excel_production_bm = ExcelProviderProductionBenchmark(excel_path=\"data/benchmark_OECM_S3.xlsx\")" + "excel_production_bm = ExcelProviderProductionBenchmark(\n", + " excel_path=\"data/benchmark_OECM_S3.xlsx\"\n", + ")" ] }, { @@ -274,7 +278,9 @@ "metadata": {}, "outputs": [], "source": [ - "df_portfolio = requantify_df_from_columns(pd.read_csv(\"data/example_portfolio.csv\", encoding=\"iso-8859-1\", sep=\";\"))" + "df_portfolio = requantify_df_from_columns(\n", + " pd.read_csv(\"data/example_portfolio.csv\", encoding=\"iso-8859-1\", sep=\";\")\n", + ")" ] }, { @@ -413,7 +419,9 @@ " scopes=[EScope.S1S2],\n", " aggregation_method=PortfolioAggregationMethod.WATS, # Options for the aggregation method are WATS, TETS, AOTS, MOTS, EOTS, ECOTS, and ROTS.\n", ")\n", - "amended_portfolio = temperature_score.calculate(data_warehouse=excel_provider, portfolio=companies)" + "amended_portfolio = temperature_score.calculate(\n", + " data_warehouse=excel_provider, portfolio=companies\n", + ")" ] }, { @@ -631,7 +639,9 @@ "source": [ "grouping = [\"sector\", \"region\"]\n", "temperature_score.grouping = grouping\n", - "grouped_portfolio = temperature_score.calculate(data_warehouse=excel_provider, portfolio=companies)\n", + "grouped_portfolio = temperature_score.calculate(\n", + " data_warehouse=excel_provider, portfolio=companies\n", + ")\n", "grouped_aggregations = temperature_score.aggregate_scores(grouped_portfolio)" ] }, @@ -833,7 +843,9 @@ "region = \"Asia\"\n", "sector = \"Steel\"\n", "group = sector + \"-\" + region\n", - "group_contributions = get_contributions_per_group(grouped_aggregations, analysis_parameters, group)\n", + "group_contributions = get_contributions_per_group(\n", + " grouped_aggregations, analysis_parameters, group\n", + ")\n", "group_contributions.round(2)" ] }, @@ -880,10 +892,16 @@ "grouping = [\"sector\"]\n", "analysis_parameters = (time_frames, scopes, grouping)\n", "\n", - "temperature_score = TemperatureScore(time_frames=time_frames, scopes=scopes, grouping=grouping)\n", - "amended_portfolio = temperature_score.calculate(data_warehouse=excel_provider, portfolio=companies)\n", + "temperature_score = TemperatureScore(\n", + " time_frames=time_frames, scopes=scopes, grouping=grouping\n", + ")\n", + "amended_portfolio = temperature_score.calculate(\n", + " data_warehouse=excel_provider, portfolio=companies\n", + ")\n", "aggregated_portfolio = temperature_score.aggregate_scores(amended_portfolio)\n", - "company_contributions = collect_company_contributions(aggregated_portfolio, amended_portfolio, analysis_parameters)" + "company_contributions = collect_company_contributions(\n", + " aggregated_portfolio, amended_portfolio, analysis_parameters\n", + ")" ] }, { @@ -904,7 +922,9 @@ ], "source": [ "plot_grouped_statistics(\n", - " aggregated_portfolio, company_contributions.pint.dequantify().droplevel(level=1, axis=1), analysis_parameters\n", + " aggregated_portfolio,\n", + " company_contributions.pint.dequantify().droplevel(level=1, axis=1),\n", + " analysis_parameters,\n", ")" ] }, @@ -1090,9 +1110,18 @@ ], "source": [ "sector_contributions = company_contributions[\n", - " [\"company_name\", \"sector\", \"contribution\", \"temperature_score\", \"ownership_percentage\", \"portfolio_percentage\"]\n", + " [\n", + " \"company_name\",\n", + " \"sector\",\n", + " \"contribution\",\n", + " \"temperature_score\",\n", + " \"ownership_percentage\",\n", + " \"portfolio_percentage\",\n", + " ]\n", "]\n", - "asPintDataFrame(sector_contributions[sector_contributions[\"sector\"] == \"Steel\"][:10]).pint.dequantify().round(2)" + "asPintDataFrame(\n", + " sector_contributions[sector_contributions[\"sector\"] == \"Steel\"][:10]\n", + ").pint.dequantify().round(2)" ] }, { @@ -1132,7 +1161,9 @@ "outputs": [], "source": [ "data_dump_filename = \"data_dump_dequantified.xlsx\"\n", - "amended_portfolio.set_index([\"company_name\"], append=True).pint.dequantify().to_excel(data_dump_filename)" + "amended_portfolio.set_index([\"company_name\"], append=True).pint.dequantify().to_excel(\n", + " data_dump_filename\n", + ")" ] }, { diff --git a/notebooks/quick_template_score_calc.ipynb b/notebooks/quick_template_score_calc.ipynb index 9cc041f..a14c578 100644 --- a/notebooks/quick_template_score_calc.ipynb +++ b/notebooks/quick_template_score_calc.ipynb @@ -45,11 +45,9 @@ "outputs": [], "source": [ "import os\n", - "import sys\n", "import warnings\n", "\n", "import json\n", - "import numpy as np\n", "import pandas as pd" ] }, @@ -65,15 +63,22 @@ "\n", "import ITR\n", "from ITR import data_dir\n", - "from ITR.data.excel import ExcelProviderProductionBenchmark, ExcelProviderIntensityBenchmark\n", "from ITR.data.template import TemplateProviderCompany\n", - "from ITR.data.base_providers import BaseProviderProductionBenchmark, BaseProviderIntensityBenchmark\n", + "from ITR.data.base_providers import (\n", + " BaseProviderProductionBenchmark,\n", + " BaseProviderIntensityBenchmark,\n", + ")\n", "from ITR.data.data_warehouse import DataWarehouse\n", "from ITR.portfolio_aggregation import PortfolioAggregationMethod\n", "from ITR.temperature_score import TemperatureScore\n", - "from ITR.interfaces import ETimeFrames, EScope, IProductionBenchmarkScopes, IEIBenchmarkScopes\n", + "from ITR.interfaces import (\n", + " ETimeFrames,\n", + " EScope,\n", + " IProductionBenchmarkScopes,\n", + " IEIBenchmarkScopes,\n", + ")\n", "\n", - "from ITR.data.osc_units import ureg, Q_, PA_, asPintDataFrame, requantify_df_from_columns\n", + "from ITR.data.osc_units import ureg, Q_, asPintDataFrame, requantify_df_from_columns\n", "\n", "from ITR_examples.utils import (\n", " plot_grouped_heatmap,\n", @@ -136,7 +141,9 @@ "benchmark_EI_OECM_PC = os.path.join(data_dir, \"benchmark_EI_OECM_PC.json\")\n", "benchmark_EI_OECM_S3 = os.path.join(data_dir, \"benchmark_EI_OECM_S3.json\")\n", "benchmark_EI_TPI = os.path.join(data_dir, \"benchmark_EI_TPI_2_degrees.json\")\n", - "benchmark_EI_TPI_below_2 = os.path.join(data_dir, \"benchmark_EI_TPI_below_2_degrees.json\")\n", + "benchmark_EI_TPI_below_2 = os.path.join(\n", + " data_dir, \"benchmark_EI_TPI_below_2_degrees.json\"\n", + ")\n", "\n", "# load production benchmarks\n", "with open(benchmark_prod_json) as json_file:\n", @@ -193,7 +200,9 @@ "metadata": {}, "outputs": [], "source": [ - "template_data_path = os.path.abspath(\"../src/ITR_examples/data/20220927 ITR V2 Sample Data.xlsx\")" + "template_data_path = os.path.abspath(\n", + " \"../src/ITR_examples/data/20220927 ITR V2 Sample Data.xlsx\"\n", + ")" ] }, { @@ -562,7 +571,9 @@ } ], "source": [ - "df_portfolio = requantify_df_from_columns(pd.read_excel(template_data_path, sheet_name=\"Portfolio\"))\n", + "df_portfolio = requantify_df_from_columns(\n", + " pd.read_excel(template_data_path, sheet_name=\"Portfolio\")\n", + ")\n", "display(df_portfolio.tail())" ] }, @@ -611,7 +622,9 @@ " scopes=[EScope.S1S2S3],\n", " aggregation_method=PortfolioAggregationMethod.WATS, # Options for the aggregation method are WATS, TETS, AOTS, MOTS, EOTS, ECOTS, and ROTS.\n", ")\n", - "enhanced_portfolio = temperature_score_s1s2s3.calculate(data_warehouse=template_provider, portfolio=companies)" + "enhanced_portfolio = temperature_score_s1s2s3.calculate(\n", + " data_warehouse=template_provider, portfolio=companies\n", + ")" ] }, { @@ -782,7 +795,9 @@ "source": [ "with warnings.catch_warnings():\n", " warnings.simplefilter(\"ignore\")\n", - " display(enhanced_portfolio[[\"company_name\", \"time_frame\", \"scope\", \"temperature_score\"]])" + " display(\n", + " enhanced_portfolio[[\"company_name\", \"time_frame\", \"scope\", \"temperature_score\"]]\n", + " )" ] }, { @@ -1635,7 +1650,8 @@ ], "source": [ "data = enhanced_portfolio[\n", - " (enhanced_portfolio.scope == EScope.S1S2S3) & (enhanced_portfolio.temperature_score > Q_(3, \"delta_degC\"))\n", + " (enhanced_portfolio.scope == EScope.S1S2S3)\n", + " & (enhanced_portfolio.temperature_score > Q_(3, \"delta_degC\"))\n", "]\n", "data" ] @@ -1665,7 +1681,9 @@ ], "source": [ "aggregated_scores = temperature_score_s1s2s3.aggregate_scores(enhanced_portfolio)\n", - "print(f\"Temperature Score aggregation method = {temperature_score_s1s2s3.aggregation_method}\")" + "print(\n", + " f\"Temperature Score aggregation method = {temperature_score_s1s2s3.aggregation_method}\"\n", + ")" ] }, { @@ -1735,7 +1753,9 @@ "source": [ "grouping = [\"sector\", \"region\"]\n", "temperature_score_s1s2s3.grouping = grouping\n", - "grouped_portfolio = temperature_score_s1s2s3.calculate(data_warehouse=template_provider, portfolio=companies)\n", + "grouped_portfolio = temperature_score_s1s2s3.calculate(\n", + " data_warehouse=template_provider, portfolio=companies\n", + ")\n", "grouped_aggregations = temperature_score_s1s2s3.aggregate_scores(grouped_portfolio)" ] }, @@ -1848,7 +1868,9 @@ "sector = \"Steel\"\n", "group = sector + \"-\" + region\n", "analysis_parameters = ([ETimeFrames.LONG], [EScope.S1S2S3], grouping)\n", - "group_contributions = get_contributions_per_group(grouped_aggregations, analysis_parameters, group)\n", + "group_contributions = get_contributions_per_group(\n", + " grouped_aggregations, analysis_parameters, group\n", + ")\n", "group_contributions.round(2)" ] }, @@ -1893,13 +1915,20 @@ "analysis_parameters = (time_frames, scopes, grouping)\n", "\n", "temperature_score_s1s2_s3 = TemperatureScore(\n", - " time_frames=time_frames, scopes=scopes, grouping=grouping, aggregation_method=PortfolioAggregationMethod.WATS\n", + " time_frames=time_frames,\n", + " scopes=scopes,\n", + " grouping=grouping,\n", + " aggregation_method=PortfolioAggregationMethod.WATS,\n", + ")\n", + "enhanced_portfolio = temperature_score_s1s2_s3.calculate(\n", + " data_warehouse=template_provider, portfolio=companies\n", ")\n", - "enhanced_portfolio = temperature_score_s1s2_s3.calculate(data_warehouse=template_provider, portfolio=companies)\n", "aggregated_portfolio = temperature_score_s1s2_s3.aggregate_scores(enhanced_portfolio)\n", "with warnings.catch_warnings():\n", " warnings.simplefilter(\"ignore\")\n", - " company_contributions = collect_company_contributions(aggregated_portfolio, enhanced_portfolio, analysis_parameters)" + " company_contributions = collect_company_contributions(\n", + " aggregated_portfolio, enhanced_portfolio, analysis_parameters\n", + " )" ] }, { @@ -1920,7 +1949,9 @@ ], "source": [ "plot_grouped_statistics(\n", - " aggregated_portfolio, company_contributions.pint.dequantify().droplevel(level=1, axis=1), analysis_parameters\n", + " aggregated_portfolio,\n", + " company_contributions.pint.dequantify().droplevel(level=1, axis=1),\n", + " analysis_parameters,\n", ")" ] }, @@ -2128,7 +2159,9 @@ " \"portfolio_percentage\",\n", " ]\n", "]\n", - "asPintDataFrame(sector_contributions[sector_contributions[\"sector\"] == \"Steel\"][:10]).pint.dequantify().round(2)" + "asPintDataFrame(\n", + " sector_contributions[sector_contributions[\"sector\"] == \"Steel\"][:10]\n", + ").pint.dequantify().round(2)" ] }, { @@ -2168,7 +2201,9 @@ "outputs": [], "source": [ "data_dump_filename = \"data_dump_dequantified.xlsx\"\n", - "enhanced_portfolio.set_index([\"company_name\"], append=True).pint.dequantify().to_excel(data_dump_filename)" + "enhanced_portfolio.set_index([\"company_name\"], append=True).pint.dequantify().to_excel(\n", + " data_dump_filename\n", + ")" ] }, { diff --git a/notebooks/s1s2_s3_calc.ipynb b/notebooks/s1s2_s3_calc.ipynb index d3610e9..4c1b8b3 100644 --- a/notebooks/s1s2_s3_calc.ipynb +++ b/notebooks/s1s2_s3_calc.ipynb @@ -30,7 +30,6 @@ "outputs": [], "source": [ "import os\n", - "import sys\n", "import warnings" ] }, @@ -55,25 +54,23 @@ "\n", "import ITR\n", "from ITR import data_dir\n", - "from ITR.data.excel import ExcelProviderProductionBenchmark, ExcelProviderIntensityBenchmark\n", "from ITR.data.template import TemplateProviderCompany\n", - "from ITR.data.base_providers import BaseProviderProductionBenchmark, BaseProviderIntensityBenchmark\n", + "from ITR.data.base_providers import (\n", + " BaseProviderProductionBenchmark,\n", + " BaseProviderIntensityBenchmark,\n", + ")\n", "from ITR.data.data_warehouse import DataWarehouse\n", "from ITR.portfolio_aggregation import PortfolioAggregationMethod\n", "from ITR.temperature_score import TemperatureScore\n", - "from ITR.interfaces import ETimeFrames, EScope, IProductionBenchmarkScopes, IEIBenchmarkScopes\n", + "from ITR.interfaces import (\n", + " ETimeFrames,\n", + " EScope,\n", + " IProductionBenchmarkScopes,\n", + " IEIBenchmarkScopes,\n", + ")\n", "import pandas as pd\n", "\n", - "from ITR.data.osc_units import ureg, Q_, PA_, requantify_df_from_columns\n", - "\n", - "from ITR_examples.utils import (\n", - " collect_company_contributions,\n", - " plot_grouped_statistics,\n", - " anonymize,\n", - " plot_grouped_heatmap,\n", - " print_grouped_scores,\n", - " get_contributions_per_group,\n", - ")" + "from ITR.data.osc_units import ureg, requantify_df_from_columns" ] }, { @@ -171,7 +168,9 @@ "# Remove the # and space on the next line to point the template_data_path variable at your own data\n", "# template_data_path = \"data/your_template_here.xlsx\"\n", "\n", - "template_data_path = os.path.abspath(\"../src/ITR_examples/data/20220927 ITR V2 Sample Data.xlsx\")\n", + "template_data_path = os.path.abspath(\n", + " \"../src/ITR_examples/data/20220927 ITR V2 Sample Data.xlsx\"\n", + ")\n", "template_company_data = TemplateProviderCompany(excel_path=template_data_path)" ] }, @@ -203,7 +202,9 @@ "AFOLU included = {base_intensity_bm_s1s2.is_AFOLU_included}\"\n", ")\n", "\n", - "template_provider_s3 = DataWarehouse(template_company_data, base_production_bm, base_intensity_bm_s3)\n", + "template_provider_s3 = DataWarehouse(\n", + " template_company_data, base_production_bm, base_intensity_bm_s3\n", + ")\n", "print(\n", " f\"S3:\\nBenchmark Temperature = {base_intensity_bm_s3.benchmark_temperature}\\n\\\n", "Benchmark Global Budget = {base_intensity_bm_s3.benchmark_global_budget}\\n\\\n", @@ -228,7 +229,9 @@ "metadata": {}, "outputs": [], "source": [ - "df_portfolio = requantify_df_from_columns(pd.read_excel(template_data_path, sheet_name=\"Portfolio\"))\n", + "df_portfolio = requantify_df_from_columns(\n", + " pd.read_excel(template_data_path, sheet_name=\"Portfolio\")\n", + ")\n", "display(df_portfolio.tail())" ] }, @@ -262,13 +265,17 @@ "metadata": {}, "outputs": [], "source": [ - "template_provider = DataWarehouse(template_company_data, base_production_bm, base_intensity_bm_s1s2)\n", + "template_provider = DataWarehouse(\n", + " template_company_data, base_production_bm, base_intensity_bm_s1s2\n", + ")\n", "temperature_score_s1s2 = TemperatureScore(\n", " time_frames=[ETimeFrames.LONG],\n", " scopes=[EScope.S1S2],\n", " aggregation_method=PortfolioAggregationMethod.WATS, # Options for the aggregation method are WATS, TETS, AOTS, MOTS, EOTS, ECOTS, and ROTS.\n", ")\n", - "enhanced_portfolio_s1s2 = temperature_score_s1s2.calculate(data_warehouse=template_provider, portfolio=companies)\n", + "enhanced_portfolio_s1s2 = temperature_score_s1s2.calculate(\n", + " data_warehouse=template_provider, portfolio=companies\n", + ")\n", "\n", "temperature_score_s3 = TemperatureScore(\n", " time_frames=[ETimeFrames.LONG],\n", @@ -276,7 +283,9 @@ " aggregation_method=PortfolioAggregationMethod.WATS, # Options for the aggregation method are WATS, TETS, AOTS, MOTS, EOTS, ECOTS, and ROTS.\n", ")\n", "template_provider.update_benchmarks(base_production_bm, base_intensity_bm_s3)\n", - "enhanced_portfolio_s3 = temperature_score_s3.calculate(data_warehouse=template_provider, portfolio=companies)" + "enhanced_portfolio_s3 = temperature_score_s3.calculate(\n", + " data_warehouse=template_provider, portfolio=companies\n", + ")" ] }, { @@ -294,7 +303,11 @@ "source": [ "with warnings.catch_warnings():\n", " warnings.simplefilter(\"ignore\")\n", - " display(enhanced_portfolio_s1s2[[\"company_name\", \"time_frame\", \"scope\", \"temperature_score\"]])" + " display(\n", + " enhanced_portfolio_s1s2[\n", + " [\"company_name\", \"time_frame\", \"scope\", \"temperature_score\"]\n", + " ]\n", + " )" ] }, { @@ -305,7 +318,11 @@ "source": [ "with warnings.catch_warnings():\n", " warnings.simplefilter(\"ignore\")\n", - " display(enhanced_portfolio_s3[[\"company_name\", \"time_frame\", \"scope\", \"temperature_score\"]])" + " display(\n", + " enhanced_portfolio_s3[\n", + " [\"company_name\", \"time_frame\", \"scope\", \"temperature_score\"]\n", + " ]\n", + " )" ] }, { diff --git a/notebooks/template_update.ipynb b/notebooks/template_update.ipynb index 0b27028..2903c9a 100644 --- a/notebooks/template_update.ipynb +++ b/notebooks/template_update.ipynb @@ -43,24 +43,17 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", - "import sys\n", "import shutil\n", - "import warnings\n", "\n", "import pandas as pd\n", "import numpy as np\n", "from math import log10\n", "\n", - "import openpyxl\n", - "from openpyxl.workbook import Workbook\n", "from openpyxl.worksheet.dimensions import ColumnDimension, DimensionHolder\n", - "from openpyxl.styles import Alignment, Border, Font, PatternFill, Side\n", + "from openpyxl.styles import Alignment, Border, PatternFill, Side\n", "from openpyxl.styles.colors import Color\n", - "from openpyxl.cell import Cell\n", "from openpyxl.utils import get_column_letter\n", "\n", - "from itertools import chain\n", "from datetime import date, datetime" ] }, @@ -602,7 +595,10 @@ "index_cols = [\"company_name\", \"company_lei\", \"company_id\"]\n", "itr_sheet = wb_data[\"ITR input data\"] # .set_index(index_cols)\n", "itr_sheet.report_date = itr_sheet.apply(\n", - " lambda x: x.report_date if isinstance(x.report_date, datetime) else date(int(x.report_date), 12, 31), axis=1\n", + " lambda x: x.report_date\n", + " if isinstance(x.report_date, datetime)\n", + " else date(int(x.report_date), 12, 31),\n", + " axis=1,\n", ").copy()\n", "all_cols = itr_sheet.columns\n", "scopes = [\"s1\", \"s2\", \"s1s2\", \"s3\"]\n", @@ -971,19 +967,32 @@ "df.insert(\n", " df.columns.get_loc(\"metric\"),\n", " \"sub_metric\",\n", - " df.apply(lambda x: \"location\" if \"s2\" in x.metric else \"combined\" if x.metric == \"s3\" else \"\", axis=1),\n", + " df.apply(\n", + " lambda x: \"location\"\n", + " if \"s2\" in x.metric\n", + " else \"combined\"\n", + " if x.metric == \"s3\"\n", + " else \"\",\n", + " axis=1,\n", + " ),\n", ")\n", "df.insert(\n", " df.columns.get_loc(\"metric\") + 1,\n", " \"unit\",\n", " df.apply(\n", - " lambda x: x.production_metric if x.metric == \"production\" else \"\" if x.metric == \"pdf\" else x.emissions_metric,\n", + " lambda x: x.production_metric\n", + " if x.metric == \"production\"\n", + " else \"\"\n", + " if x.metric == \"pdf\"\n", + " else x.emissions_metric,\n", " axis=1,\n", " ),\n", ")\n", "df.drop(columns=[\"production_metric\", \"emissions_metric\"], inplace=True)\n", "df.insert(df.columns.get_loc(\"unit\") + 1, \"report_date\", date(2021, 12, 31))\n", - "df.loc[df.metric == \"pdf\", df.columns[df.columns.get_loc(\"unit\") + 1] : df.columns[-1]] = \"\"\n", + "df.loc[\n", + " df.metric == \"pdf\", df.columns[df.columns.get_loc(\"unit\") + 1] : df.columns[-1]\n", + "] = \"\"\n", "df = df.set_index(\"metric\", append=True)\n", "df.columns = df.columns.map(lambda x: int(x) if x[0].isnumeric() else x)\n", "esg_df = df\n", @@ -1055,18 +1064,27 @@ "for i, col in enumerate(df.columns):\n", " if col == \"report_date\":\n", " # We fudge the width with number_format that `str(datetime)` doesn't understand\n", - " dim_holder[get_column_letter(i + 1)] = ColumnDimension(financial_ws, min=i + 1, max=i + 1, width=len(col) + 2)\n", + " dim_holder[get_column_letter(i + 1)] = ColumnDimension(\n", + " financial_ws, min=i + 1, max=i + 1, width=len(col) + 2\n", + " )\n", " else:\n", " dim_holder[get_column_letter(i + 1)] = ColumnDimension(\n", " financial_ws,\n", " min=i + 1,\n", " max=i + 1,\n", - " width=max(df.iloc[:, i].map(lambda x: len(str(x))).max() + 2 * (col == \"company_lei\"), len(col)) + 2,\n", + " width=max(\n", + " df.iloc[:, i].map(lambda x: len(str(x))).max()\n", + " + 2 * (col == \"company_lei\"),\n", + " len(col),\n", + " )\n", + " + 2,\n", " )\n", " if i <= 2:\n", " # Format index columns\n", " for j in range(1, financial_ws.max_row + 1):\n", - " financial_ws.cell(column=i + 1, row=j).alignment = Alignment(horizontal=\"left\", vertical=\"center\")\n", + " financial_ws.cell(column=i + 1, row=j).alignment = Alignment(\n", + " horizontal=\"left\", vertical=\"center\"\n", + " )\n", " else:\n", " if col == \"report_date\":\n", " for j in range(1, financial_ws.max_row + 1):\n", @@ -1077,7 +1095,9 @@ "\n", "# Lighten the Region column, which is optional\n", "region_col_letter = get_column_letter(df.columns.get_loc(\"region\") + 1)\n", - "for cell in financial_ws[f\"{region_col_letter}1:{region_col_letter}{financial_ws.max_row}\"]:\n", + "for cell in financial_ws[\n", + " f\"{region_col_letter}1:{region_col_letter}{financial_ws.max_row}\"\n", + "]:\n", " cell[0].font = cell[0].font.copy(color=Color(\"FF888888\"))" ] }, @@ -1119,32 +1139,44 @@ "\n", "# We have only one cell to color, so it's not in the loop\n", "esg_ws.cell(column=df.columns.get_loc(\"unit\") + 1, row=1).fill = (\n", - " wb_xlsx[old_sheet].cell(column=itr_sheet.columns.get_loc(\"emissions_metric\") + 1, row=1).fill.copy()\n", + " wb_xlsx[old_sheet]\n", + " .cell(column=itr_sheet.columns.get_loc(\"emissions_metric\") + 1, row=1)\n", + " .fill.copy()\n", ")\n", "\n", "# Make worksheet pretty\n", "for i, col in enumerate(df.columns):\n", " if col == \"report_date\":\n", " # We fudge the width with number_format that `str(datetime)` doesn't understand\n", - " dim_holder[get_column_letter(i + 1)] = ColumnDimension(financial_ws, min=i + 1, max=i + 1, width=len(col) + 2)\n", + " dim_holder[get_column_letter(i + 1)] = ColumnDimension(\n", + " financial_ws, min=i + 1, max=i + 1, width=len(col) + 2\n", + " )\n", " else:\n", " dim_holder[get_column_letter(i + 1)] = ColumnDimension(\n", " esg_ws,\n", " min=i + 1,\n", " max=i + 1,\n", - " width=max(df.iloc[:, i].map(get_cell_width).max() + 2 * (col == \"company_lei\"), len(str(col))) + 2,\n", + " width=max(\n", + " df.iloc[:, i].map(get_cell_width).max() + 2 * (col == \"company_lei\"),\n", + " len(str(col)),\n", + " )\n", + " + 2,\n", " )\n", " if i <= 2:\n", " # Format index columns\n", " for j in range(1, esg_ws.max_row + 1):\n", - " esg_ws.cell(column=i + 1, row=j).alignment = Alignment(horizontal=\"left\", vertical=\"center\")\n", + " esg_ws.cell(column=i + 1, row=j).alignment = Alignment(\n", + " horizontal=\"left\", vertical=\"center\"\n", + " )\n", " elif col == \"report_date\":\n", " for j in range(1, esg_ws.max_row + 1):\n", " esg_ws.cell(row=j, column=i + 1).number_format = \"yyyy-mm-dd\"\n", " elif col in range(2016, 2323):\n", " column_color = \"EEEEEE\" if (col % 2) == 0 else \"FFFFFF\"\n", " for j in range(1, esg_ws.max_row + 1):\n", - " esg_ws.cell(column=i + 1, row=j).fill = PatternFill(\"solid\", start_color=column_color)\n", + " esg_ws.cell(column=i + 1, row=j).fill = PatternFill(\n", + " \"solid\", start_color=column_color\n", + " )\n", " esg_ws.cell(column=i + 1, row=j).border = thin_border\n", "\n", "esg_ws.column_dimensions = dim_holder" @@ -1489,7 +1521,9 @@ } ], "source": [ - "x = pd.read_excel(template_data_path_v2, sheet_name=\"ITR V2 esg data\", index_col=[0, 1, 2, 3])\n", + "x = pd.read_excel(\n", + " template_data_path_v2, sheet_name=\"ITR V2 esg data\", index_col=[0, 1, 2, 3]\n", + ")\n", "display(x)" ] },