diff --git a/notebooks/00_vault_cleanup.ipynb b/notebooks/00_vault_cleanup.ipynb
index 89468ef..628425a 100644
--- a/notebooks/00_vault_cleanup.ipynb
+++ b/notebooks/00_vault_cleanup.ipynb
@@ -27,10 +27,6 @@
}
],
"source": [
- "import os\n",
- "import pathlib\n",
- "\n",
- "import trino\n",
"import osc_ingest_trino as osc\n",
"\n",
"ingest_catalog = \"osc_datacommons_dev\"\n",
@@ -88,7 +84,9 @@
}
],
"source": [
- "engine_dev = osc.attach_trino_engine(verbose=True, catalog=ingest_catalog, schema=ingest_schema)"
+ "engine_dev = osc.attach_trino_engine(\n",
+ " verbose=True, catalog=ingest_catalog, schema=ingest_schema\n",
+ ")"
]
},
{
diff --git a/notebooks/01a_vault_ingest.ipynb b/notebooks/01a_vault_ingest.ipynb
index d82a680..8e7545d 100644
--- a/notebooks/01a_vault_ingest.ipynb
+++ b/notebooks/01a_vault_ingest.ipynb
@@ -30,7 +30,9 @@
"# Load some standard environment variables from a dot-env file, if it exists.\n",
"# If no such file can be found, does not fail, and so allows these environment vars to\n",
"# be populated in some other way\n",
- "dotenv_dir = os.environ.get(\"CREDENTIAL_DOTENV_DIR\", os.environ.get(\"PWD\", \"/opt/app-root/src\"))\n",
+ "dotenv_dir = os.environ.get(\n",
+ " \"CREDENTIAL_DOTENV_DIR\", os.environ.get(\"PWD\", \"/opt/app-root/src\")\n",
+ ")\n",
"dotenv_path = pathlib.Path(dotenv_dir) / \"credentials.env\"\n",
"if os.path.exists(dotenv_path):\n",
" load_dotenv(dotenv_path=dotenv_path, override=True)"
@@ -57,7 +59,9 @@
"print(\"Initializing Dev tables\")\n",
"\n",
"sqlstring = \"trino://{user}@{host}:{port}/\".format(\n",
- " user=os.environ[\"TRINO_USER_USER1\"], host=os.environ[\"TRINO_HOST\"], port=os.environ[\"TRINO_PORT\"]\n",
+ " user=os.environ[\"TRINO_USER_USER1\"],\n",
+ " host=os.environ[\"TRINO_HOST\"],\n",
+ " port=os.environ[\"TRINO_PORT\"],\n",
")\n",
"\n",
"ingest_catalog = \"osc_datacommons_dev\"\n",
@@ -161,7 +165,11 @@
"source": [
"# loading excel doesn't work from 'get_object' mode\n",
"\n",
- "s3.download_file(os.environ[\"DEMO1_S3_BUCKET\"], \"itr-demo-data/ITR_company_data_minimum_required.xlsx\", \"/tmp/t.xlsx\")\n",
+ "s3.download_file(\n",
+ " os.environ[\"DEMO1_S3_BUCKET\"],\n",
+ " \"itr-demo-data/ITR_company_data_minimum_required.xlsx\",\n",
+ " \"/tmp/t.xlsx\",\n",
+ ")\n",
"\n",
"# load the raw file from the bucket\n",
"xls = pd.ExcelFile(\"/tmp/t.xlsx\")\n",
@@ -187,7 +195,7 @@
"tablename = f\"{itr_prefix}fundamental_data\"\n",
"\n",
"# get this sheet, and assess data types\n",
- "df = pd.read_excel(xls, f\"fundamental_data\").convert_dtypes()\n",
+ "df = pd.read_excel(xls, \"fundamental_data\").convert_dtypes()\n",
"\n",
"# rename columns to forms that sql will handle\n",
"osc.enforce_sql_column_names(df, inplace=True)\n",
@@ -267,7 +275,11 @@
"# in live data platform there will need to be policies and mechanisms for either\n",
"# appending new data, or overwriting old data, or saving off conditioned by a versioning number\n",
"# this is a data governance topic\n",
- "osc._do_sql(f\"drop table if exists {ingest_catalog}.{ingest_schema}.{tablename}\", engine, verbose=True)"
+ "osc._do_sql(\n",
+ " f\"drop table if exists {ingest_catalog}.{ingest_schema}.{tablename}\",\n",
+ " engine,\n",
+ " verbose=True,\n",
+ ")"
]
},
{
@@ -322,7 +334,11 @@
],
"source": [
"# test that we can get data\n",
- "qres = osc._do_sql(f\"select country from {ingest_catalog}.{ingest_schema}.{tablename}\", engine, verbose=True)"
+ "qres = osc._do_sql(\n",
+ " f\"select country from {ingest_catalog}.{ingest_schema}.{tablename}\",\n",
+ " engine,\n",
+ " verbose=True,\n",
+ ")"
]
},
{
@@ -439,7 +455,11 @@
"# in live data platform there will need to be policies and mechanisms for either\n",
"# appending new data, or overwriting old data, or saving off conditioned by a versioning number\n",
"# this is a data governance topic\n",
- "osc._do_sql(f\"drop table if exists {ingest_catalog}.{ingest_schema}.{tablename}\", engine, verbose=True)"
+ "osc._do_sql(\n",
+ " f\"drop table if exists {ingest_catalog}.{ingest_schema}.{tablename}\",\n",
+ " engine,\n",
+ " verbose=True,\n",
+ ")"
]
},
{
@@ -494,7 +514,11 @@
],
"source": [
"# test that we can get data\n",
- "qres = osc._do_sql(f\"select y2020 from {ingest_catalog}.{ingest_schema}.{tablename}\", engine, verbose=True)"
+ "qres = osc._do_sql(\n",
+ " f\"select y2020 from {ingest_catalog}.{ingest_schema}.{tablename}\",\n",
+ " engine,\n",
+ " verbose=True,\n",
+ ")"
]
},
{
@@ -543,7 +567,11 @@
"# in live data platform there will need to be policies and mechanisms for either\n",
"# appending new data, or overwriting old data, or saving off conditioned by a versioning number\n",
"# this is a data governance topic\n",
- "osc._do_sql(f\"drop table if exists {ingest_catalog}.{ingest_schema}.{tablename}\", engine, verbose=True)"
+ "osc._do_sql(\n",
+ " f\"drop table if exists {ingest_catalog}.{ingest_schema}.{tablename}\",\n",
+ " engine,\n",
+ " verbose=True,\n",
+ ")"
]
},
{
@@ -627,7 +655,11 @@
"# in live data platform there will need to be policies and mechanisms for either\n",
"# appending new data, or overwriting old data, or saving off conditioned by a versioning number\n",
"# this is a data governance topic\n",
- "osc._do_sql(f\"drop table if exists {ingest_catalog}.{ingest_schema}.{tablename}\", engine, verbose=True)"
+ "osc._do_sql(\n",
+ " f\"drop table if exists {ingest_catalog}.{ingest_schema}.{tablename}\",\n",
+ " engine,\n",
+ " verbose=True,\n",
+ ")"
]
},
{
diff --git a/notebooks/01b_vault_ingest_v2.ipynb b/notebooks/01b_vault_ingest_v2.ipynb
index 3dcf6c3..b15dff7 100644
--- a/notebooks/01b_vault_ingest_v2.ipynb
+++ b/notebooks/01b_vault_ingest_v2.ipynb
@@ -15,16 +15,16 @@
"import pandas as pd\n",
"import numpy as np\n",
"import osc_ingest_trino as osc\n",
- "import trino\n",
"\n",
- "import ITR\n",
"from ITR import data_dir as json_data_dir\n",
"from ITR_examples import data_dir as xlsx_data_dir\n",
"\n",
- "from ITR.configs import ITR_median, ITR_mean, LoggingConfig\n",
- "from ITR.data.base_providers import BaseProviderProductionBenchmark, BaseProviderIntensityBenchmark\n",
+ "from ITR.data.base_providers import (\n",
+ " BaseProviderProductionBenchmark,\n",
+ " BaseProviderIntensityBenchmark,\n",
+ ")\n",
"from ITR.data.data_warehouse import DataWarehouse\n",
- "from ITR.data.osc_units import ureg, Q_, PA_, asPintSeries\n",
+ "from ITR.data.osc_units import Q_\n",
"from ITR.data.template import TemplateProviderCompany\n",
"from ITR.data.vault_providers import (\n",
" VaultCompanyDataProvider,\n",
@@ -32,14 +32,10 @@
")\n",
"from ITR.interfaces import (\n",
" EScope,\n",
- " ETimeFrames,\n",
- " EScoreResultType,\n",
" IEIBenchmarkScopes,\n",
" IProductionBenchmarkScopes,\n",
" ProjectionControls,\n",
")\n",
- "from ITR.portfolio_aggregation import PortfolioAggregationMethod\n",
- "from ITR.temperature_score import TemperatureScore\n",
"\n",
"# isort: split\n",
"\n",
@@ -71,7 +67,9 @@
"logger = logging.getLogger(__name__)\n",
"logger.setLevel(logging.INFO)\n",
"\n",
- "formatter = logging.Formatter(\"%(asctime)s - %(name)s - %(levelname)s - %(message)s\") # LoggingConfig.FORMAT\n",
+ "formatter = logging.Formatter(\n",
+ " \"%(asctime)s - %(name)s - %(levelname)s - %(message)s\"\n",
+ ") # LoggingConfig.FORMAT\n",
"stream_handler = logging.StreamHandler()\n",
"stream_handler.setFormatter(formatter)\n",
"logger.addHandler(stream_handler)\n",
@@ -97,8 +95,12 @@
"benchmark_EI_TPI_15_file = \"benchmark_EI_TPI_1_5_degrees.json\"\n",
"benchmark_EI_TPI_file = \"benchmark_EI_TPI_2_degrees.json\"\n",
"benchmark_EI_TPI_below_2_file = \"benchmark_EI_TPI_below_2_degrees.json\"\n",
- "benchmark_EI_TPI_2deg_high_efficiency_file = \"benchmark_EI_TPI_2_degrees_high_efficiency.json\"\n",
- "benchmark_EI_TPI_2deg_shift_improve_file = \"benchmark_EI_TPI_2_degrees_shift_improve.json\""
+ "benchmark_EI_TPI_2deg_high_efficiency_file = (\n",
+ " \"benchmark_EI_TPI_2_degrees_high_efficiency.json\"\n",
+ ")\n",
+ "benchmark_EI_TPI_2deg_shift_improve_file = (\n",
+ " \"benchmark_EI_TPI_2_degrees_shift_improve.json\"\n",
+ ")"
]
},
{
@@ -115,7 +117,9 @@
" if col.startswith(\"investment_value\"):\n",
" if match := re.match(r\".*\\[([A-Z]{3})\\]\", col, re.I):\n",
" df_portfolio.rename(columns={col: \"investment_value\"}, inplace=True)\n",
- " df_portfolio[\"investment_value\"] = df_portfolio[\"investment_value\"].astype(f\"pint[{match.group(1)}]\")"
+ " df_portfolio[\"investment_value\"] = df_portfolio[\"investment_value\"].astype(\n",
+ " f\"pint[{match.group(1)}]\"\n",
+ " )"
]
},
{
@@ -137,7 +141,9 @@
" benchmark_file = benchmark_EI_TPI_15_file\n",
"elif eibm == \"OECM\":\n",
" benchmark_file = benchmark_EI_OECM_file\n",
- " logger.info(\"OECM benchmark is for backward compatibility only. Use OECM_PC instead.\")\n",
+ " logger.info(\n",
+ " \"OECM benchmark is for backward compatibility only. Use OECM_PC instead.\"\n",
+ " )\n",
"else:\n",
" benchmark_file = benchmark_EI_TPI_below_2_file\n",
"# load intensity benchmarks\n",
@@ -160,8 +166,12 @@
" if scope_name not in parsed_json:\n",
" parsed_json[scope_name] = extra_json[scope_name]\n",
" else:\n",
- " parsed_json[scope_name][\"benchmarks\"] += extra_json[scope_name][\"benchmarks\"]\n",
- "EI_bm = BaseProviderIntensityBenchmark(EI_benchmarks=IEIBenchmarkScopes.model_validate(parsed_json))"
+ " parsed_json[scope_name][\"benchmarks\"] += extra_json[scope_name][\n",
+ " \"benchmarks\"\n",
+ " ]\n",
+ "EI_bm = BaseProviderIntensityBenchmark(\n",
+ " EI_benchmarks=IEIBenchmarkScopes.model_validate(parsed_json)\n",
+ ")"
]
},
{
@@ -282,7 +292,9 @@
"# We fill a conventional DataWarehouse with template data\n",
"# After the ingestion process is complete, downstream users can access DataVaultWarehouse\n",
"\n",
- "template_company_data = TemplateProviderCompany(company_data_path, projection_controls=ProjectionControls())\n",
+ "template_company_data = TemplateProviderCompany(\n",
+ " company_data_path, projection_controls=ProjectionControls()\n",
+ ")\n",
"Warehouse = DataWarehouse(\n",
" template_company_data,\n",
" benchmark_projected_production=base_production_bm,\n",
@@ -330,7 +342,9 @@
],
"source": [
"# This will have identity of main notebook user, not OS-Climate-User1\n",
- "ingest_engine = osc.attach_trino_engine(verbose=True, catalog=ingest_catalog, schema=ingest_schema)"
+ "ingest_engine = osc.attach_trino_engine(\n",
+ " verbose=True, catalog=ingest_catalog, schema=ingest_schema\n",
+ ")"
]
},
{
@@ -478,7 +492,9 @@
"\n",
"for col in [\"cash\", \"debt\"]:\n",
" if col not in template_company_data.df_fundamentals.columns:\n",
- " template_company_data.df_fundamentals[col] = np.nan * template_company_data.df_fundamentals[\"company_revenue\"]\n",
+ " template_company_data.df_fundamentals[col] = (\n",
+ " np.nan * template_company_data.df_fundamentals[\"company_revenue\"]\n",
+ " )\n",
"\n",
"df = template_company_data.df_fundamentals[fundamental_cols].copy()\n",
"\n",
@@ -498,7 +514,14 @@
"\n",
"# ingest company data\n",
"create_table_from_df(\n",
- " df, ingest_schema, company_tablename, ingest_engine, hive_bucket, hive_catalog, hive_schema, verbose=True\n",
+ " df,\n",
+ " ingest_schema,\n",
+ " company_tablename,\n",
+ " ingest_engine,\n",
+ " hive_bucket,\n",
+ " hive_catalog,\n",
+ " hive_schema,\n",
+ " verbose=True,\n",
")"
]
},
@@ -509,21 +532,24 @@
"metadata": {},
"outputs": [],
"source": [
- "company_info_at_base_year = template_company_data.get_company_intensity_and_production_at_base_year(\n",
- " [\n",
- " company_id\n",
- " for company_id in template_company_data.df_fundamentals.company_id.values\n",
- " if company_id != \"US6745991058-chem\"\n",
- " ]\n",
+ "company_info_at_base_year = (\n",
+ " template_company_data.get_company_intensity_and_production_at_base_year(\n",
+ " [\n",
+ " company_id\n",
+ " for company_id in template_company_data.df_fundamentals.company_id.values\n",
+ " if company_id != \"US6745991058-chem\"\n",
+ " ]\n",
+ " )\n",
")\n",
"\n",
- "import warnings\n",
"\n",
"with warnings.catch_warnings():\n",
" warnings.simplefilter(\"ignore\")\n",
" # See https://github.com/hgrecco/pint-pandas/issues/128\n",
- " projected_production = Warehouse.benchmark_projected_production.get_company_projected_production(\n",
- " company_info_at_base_year\n",
+ " projected_production = (\n",
+ " Warehouse.benchmark_projected_production.get_company_projected_production(\n",
+ " company_info_at_base_year\n",
+ " )\n",
" )"
]
},
@@ -666,7 +692,8 @@
"df.columns.set_names(\"year\", inplace=True)\n",
"df2 = df.unstack(level=0).to_frame(\"production_by_year\").reset_index(\"year\")\n",
"df3 = pd.read_sql(\n",
- " f\"select distinct company_id, company_name, company_lei, sector from {company_data._company_table}\", ingest_engine\n",
+ " f\"select distinct company_id, company_name, company_lei, sector from {company_data._company_table}\",\n",
+ " ingest_engine,\n",
")\n",
"df4 = df2.merge(df3, on=\"company_id\").reset_index()\n",
"production_df = df4[\n",
@@ -948,10 +975,17 @@
" ei_dict[scope] = getattr(company, projection)[scope].projections\n",
" else:\n",
" ei_dict[scope] = pd.Series(dtype=\"object\")\n",
- " ei_data = pd.concat([ei_dict[scope] for scope in EScope.get_scopes()], axis=1).reset_index()\n",
- " ei_data.columns = [\"year\"] + [f\"ei_{scope.lower()}_by_year\" for scope in EScope.get_scopes()]\n",
+ " ei_data = pd.concat(\n",
+ " [ei_dict[scope] for scope in EScope.get_scopes()], axis=1\n",
+ " ).reset_index()\n",
+ " ei_data.columns = [\"year\"] + [\n",
+ " f\"ei_{scope.lower()}_by_year\" for scope in EScope.get_scopes()\n",
+ " ]\n",
" df = pd.DataFrame(\n",
- " data=[[company.company_name, \"\", company.company_id, company.sector] for i in ei_data.index],\n",
+ " data=[\n",
+ " [company.company_name, \"\", company.company_id, company.sector]\n",
+ " for i in ei_data.index\n",
+ " ],\n",
" columns=[\"company_name\", \"company_lei\", \"company_id\", \"sector\"],\n",
" )\n",
" projection_dfs.append(pd.concat([df, ei_data], axis=1))\n",
@@ -959,7 +993,14 @@
" if projection_tablename[i] == target_tablename:\n",
" target_df = df2\n",
" create_table_from_df(\n",
- " df2, ingest_schema, projection_tablename[i], ingest_engine, hive_bucket, hive_catalog, hive_schema, verbose=True\n",
+ " df2,\n",
+ " ingest_schema,\n",
+ " projection_tablename[i],\n",
+ " ingest_engine,\n",
+ " hive_bucket,\n",
+ " hive_catalog,\n",
+ " hive_schema,\n",
+ " verbose=True,\n",
" )"
]
},
@@ -992,7 +1033,9 @@
],
"source": [
"osc._do_sql(f\"select count (*) from {target_tablename}\", ingest_engine, verbose=True)\n",
- "osc._do_sql(f\"select count (*) from {trajectory_tablename}\", ingest_engine, verbose=True)"
+ "osc._do_sql(\n",
+ " f\"select count (*) from {trajectory_tablename}\", ingest_engine, verbose=True\n",
+ ")"
]
},
{
@@ -1112,21 +1155,31 @@
"\n",
"# Create emissions_data table using production_df and math\n",
"emissions_df = production_df.merge(\n",
- " target_df.drop(columns=[\"company_name\", \"company_lei\", \"sector\"]), on=[\"company_id\", \"year\"]\n",
+ " target_df.drop(columns=[\"company_name\", \"company_lei\", \"sector\"]),\n",
+ " on=[\"company_id\", \"year\"],\n",
")\n",
"emissions_df = emissions_df[\n",
" ~emissions_df.company_id.isin(\n",
- " [\"DE000SYM9999\", \"NO0010657505\", \"GB0000961622\", \"DE000BASF111\", \"IE00BZ12WP82\", \"FR0004024222\"]\n",
+ " [\n",
+ " \"DE000SYM9999\",\n",
+ " \"NO0010657505\",\n",
+ " \"GB0000961622\",\n",
+ " \"DE000BASF111\",\n",
+ " \"IE00BZ12WP82\",\n",
+ " \"FR0004024222\",\n",
+ " ]\n",
" )\n",
"]\n",
"print(emissions_df.index.names)\n",
"for scope in EScope.get_scopes():\n",
" mask = emissions_df[f\"ei_{scope.lower()}_by_year\"].isna()\n",
- " emissions_df.loc[mask, f\"ei_{scope.lower()}_by_year\"] = emissions_df[\"production_by_year\"].map(\n",
- " lambda x: Q_(np.nan, f\"Mt CO2 / ({str(x.u)})\")\n",
- " )\n",
+ " emissions_df.loc[mask, f\"ei_{scope.lower()}_by_year\"] = emissions_df[\n",
+ " \"production_by_year\"\n",
+ " ].map(lambda x: Q_(np.nan, f\"Mt CO2 / ({str(x.u)})\"))\n",
" emissions_df[f\"co2_{scope.lower()}_by_year\"] = (\n",
- " emissions_df[\"production_by_year\"].mul(emissions_df[f\"ei_{scope.lower()}_by_year\"]).astype(\"pint[Mt CO2e]\")\n",
+ " emissions_df[\"production_by_year\"]\n",
+ " .mul(emissions_df[f\"ei_{scope.lower()}_by_year\"])\n",
+ " .astype(\"pint[Mt CO2e]\")\n",
" )\n",
" emissions_df = emissions_df.drop(columns=f\"ei_{scope.lower()}_by_year\")\n",
"emissions_df = emissions_df.drop(columns=\"production_by_year\")\n",
diff --git a/notebooks/02a_vault_dev.ipynb b/notebooks/02a_vault_dev.ipynb
index e6af8bc..8634c54 100644
--- a/notebooks/02a_vault_dev.ipynb
+++ b/notebooks/02a_vault_dev.ipynb
@@ -59,9 +59,6 @@
"\n",
"import json\n",
"import pandas as pd\n",
- "import numpy as np\n",
- "from numpy.testing import assert_array_equal\n",
- "import ITR\n",
"\n",
"# from ITR.portfolio_aggregation import PortfolioAggregationMethod\n",
"# from ITR.temperature_score import TemperatureScore\n",
@@ -77,7 +74,7 @@
"\n",
"# from ITR.interfaces import ICompanyData, EScope, ETimeFrames, PortfolioCompany, IEIBenchmarkScopes, \\\n",
"# IProductionBenchmarkScopes\n",
- "from ITR.interfaces import EScope, IProductionBenchmarkScopes, IEIBenchmarkScopes\n",
+ "from ITR.interfaces import IProductionBenchmarkScopes, IEIBenchmarkScopes\n",
"\n",
"from ITR.data.osc_units import ureg"
]
@@ -92,7 +89,9 @@
"# Load some standard environment variables from a dot-env file, if it exists.\n",
"# If no such file can be found, does not fail, and so allows these environment vars to\n",
"# be populated in some other way\n",
- "dotenv_dir = os.environ.get(\"CREDENTIAL_DOTENV_DIR\", os.environ.get(\"PWD\", \"/opt/app-root/src\"))\n",
+ "dotenv_dir = os.environ.get(\n",
+ " \"CREDENTIAL_DOTENV_DIR\", os.environ.get(\"PWD\", \"/opt/app-root/src\")\n",
+ ")\n",
"dotenv_path = pathlib.Path(dotenv_dir) / \"credentials.env\"\n",
"if os.path.exists(dotenv_path):\n",
" load_dotenv(dotenv_path=dotenv_path, override=True)"
@@ -145,9 +144,20 @@
"qres = osc._do_sql(f\"show tables in {ingest_schema}\", engine_dev, verbose=True)\n",
"\n",
"# Check that we have the tables we need\n",
- "required_tables = [\"company_data\", \"target_data\", \"trajectory_data\", \"emissions_data\", \"benchmark_prod\", \"benchmark_ei\"]\n",
+ "required_tables = [\n",
+ " \"company_data\",\n",
+ " \"target_data\",\n",
+ " \"trajectory_data\",\n",
+ " \"emissions_data\",\n",
+ " \"benchmark_prod\",\n",
+ " \"benchmark_ei\",\n",
+ "]\n",
"existing_tables = [q[0] for q in qres]\n",
- "missing_tables = [rtable for rtable in required_tables if f\"{itr_prefix}{rtable}\" not in existing_tables]\n",
+ "missing_tables = [\n",
+ " rtable\n",
+ " for rtable in required_tables\n",
+ " if f\"{itr_prefix}{rtable}\" not in existing_tables\n",
+ "]\n",
"if missing_tables:\n",
" print(f\"Missing tables (itr_prefix = {itr_prefix}): {missing_tables}\")\n",
" assert False"
@@ -254,7 +264,9 @@
],
"source": [
"root = root = os.path.dirname(os.getcwd() + \"/../test/\")\n",
- "benchmark_prod_json = os.path.join(root, \"inputs\", \"json\", \"benchmark_production_OECM.json\")\n",
+ "benchmark_prod_json = os.path.join(\n",
+ " root, \"inputs\", \"json\", \"benchmark_production_OECM.json\"\n",
+ ")\n",
"benchmark_EI_json = os.path.join(root, \"inputs\", \"json\", \"benchmark_EI_OECM_S3.json\")\n",
"\n",
"# load production benchmarks\n",
@@ -262,7 +274,9 @@
" parsed_json = json.load(json_file)\n",
"prod_bms = IProductionBenchmarkScopes.parse_obj(parsed_json)\n",
"vault_production_bm = VaultProviderProductionBenchmark(\n",
- " engine=engine_dev, benchmark_name=f\"{itr_prefix}benchmark_prod\", production_benchmarks=prod_bms\n",
+ " engine=engine_dev,\n",
+ " benchmark_name=f\"{itr_prefix}benchmark_prod\",\n",
+ " production_benchmarks=prod_bms,\n",
")\n",
"\n",
"# load intensity benchmarks\n",
@@ -275,10 +289,16 @@
"\n",
"# load company data\n",
"# TODO: Pandas reads null data mixed with integers as float64 (np.nan). This can be fixed post hoc with astype('Int16')\n",
- "vault_company_data = VaultCompanyDataProvider(engine=engine_dev, company_table=f\"{itr_prefix}company_data\")\n",
+ "vault_company_data = VaultCompanyDataProvider(\n",
+ " engine=engine_dev, company_table=f\"{itr_prefix}company_data\"\n",
+ ")\n",
"\n",
"vault_warehouse = DataVaultWarehouse(\n",
- " engine_dev, vault_company_data, vault_production_bm, vault_EI_bm, itr_prefix=itr_prefix\n",
+ " engine_dev,\n",
+ " vault_company_data,\n",
+ " vault_production_bm,\n",
+ " vault_EI_bm,\n",
+ " itr_prefix=itr_prefix,\n",
")"
]
},
@@ -797,7 +817,10 @@
}
],
"source": [
- "df = requantify_df(sql_df.dropna(), typemap={\"co2_s1_by_year\": \"pint[t CO2]\", \"co2_s2_by_year\": \"pint[t CO2]\"})\n",
+ "df = requantify_df(\n",
+ " sql_df.dropna(),\n",
+ " typemap={\"co2_s1_by_year\": \"pint[t CO2]\", \"co2_s2_by_year\": \"pint[t CO2]\"},\n",
+ ")\n",
"df"
]
},
@@ -872,10 +895,14 @@
"source": [
"%matplotlib inline\n",
"ureg.setup_matplotlib()\n",
- "plottable_df = df.pivot(index=\"year\", columns=\"company_name\", values=\"co2_s1_by_year\").reset_index()\n",
+ "plottable_df = df.pivot(\n",
+ " index=\"year\", columns=\"company_name\", values=\"co2_s1_by_year\"\n",
+ ").reset_index()\n",
"\n",
"# Must plot the first few columns, but then plot 1/3rd of the companies so as not to over-clutter the graph\n",
- "plottable_df.iloc[:, [x for x in list(range(0, 3)) + list(range(3, 37))]].plot(x=\"year\", kind=\"line\", figsize=(24, 10))"
+ "plottable_df.iloc[:, [x for x in list(range(0, 3)) + list(range(3, 37))]].plot(\n",
+ " x=\"year\", kind=\"line\", figsize=(24, 10)\n",
+ ")"
]
},
{
diff --git a/notebooks/02b_vault_dev.ipynb b/notebooks/02b_vault_dev.ipynb
index 4cc7fd2..285a55a 100644
--- a/notebooks/02b_vault_dev.ipynb
+++ b/notebooks/02b_vault_dev.ipynb
@@ -37,15 +37,10 @@
"outputs": [],
"source": [
"import os\n",
- "import pathlib\n",
"import osc_ingest_trino as osc\n",
- "import trino\n",
- "from sqlalchemy.engine import create_engine\n",
"\n",
"import json\n",
"import pandas as pd\n",
- "import numpy as np\n",
- "import ITR\n",
"\n",
"from ITR.data.osc_units import ureg\n",
"from ITR.data.vault_providers import (\n",
@@ -58,7 +53,7 @@
"\n",
"# from ITR.interfaces import ICompanyData, EScope, ETimeFrames, PortfolioCompany, IEIBenchmarkScopes, \\\n",
"# IProductionBenchmarkScopes\n",
- "from ITR.interfaces import EScope, IProductionBenchmarkScopes, IEIBenchmarkScopes\n",
+ "from ITR.interfaces import IProductionBenchmarkScopes, IEIBenchmarkScopes\n",
"\n",
"ingest_catalog = \"osc_datacommons_dev\"\n",
"ingest_schema = \"demo_dv\"\n",
@@ -128,13 +123,19 @@
}
],
"source": [
- "engine_dev = osc.attach_trino_engine(verbose=True, catalog=ingest_catalog, schema=ingest_schema)\n",
+ "engine_dev = osc.attach_trino_engine(\n",
+ " verbose=True, catalog=ingest_catalog, schema=ingest_schema\n",
+ ")\n",
"qres = osc._do_sql(f\"show tables in {ingest_schema}\", engine_dev, verbose=True)\n",
"\n",
"# Check that we have the tables we need\n",
"required_tables = [\"company_data\", \"target_data\", \"trajectory_data\", \"emissions_data\"]\n",
"existing_tables = [q[0] for q in qres]\n",
- "missing_tables = [rtable for rtable in required_tables if f\"{itr_prefix}{rtable}\" not in existing_tables]\n",
+ "missing_tables = [\n",
+ " rtable\n",
+ " for rtable in required_tables\n",
+ " if f\"{itr_prefix}{rtable}\" not in existing_tables\n",
+ "]\n",
"if missing_tables:\n",
" print(f\"Missing tables (itr_prefix = {itr_prefix}): {missing_tables}\")\n",
" assert False"
@@ -315,10 +316,16 @@
"\n",
"# load company data\n",
"# TODO: Pandas reads null data mixed with integers as float64 (np.nan). This can be fixed post hoc with astype('Int16')\n",
- "vault_company_data = VaultCompanyDataProvider(engine=engine_dev, company_table=f\"{itr_prefix}company_data\")\n",
+ "vault_company_data = VaultCompanyDataProvider(\n",
+ " engine=engine_dev, company_table=f\"{itr_prefix}company_data\"\n",
+ ")\n",
"\n",
"vault_warehouse = DataVaultWarehouse(\n",
- " engine_dev, vault_company_data, vault_production_bm, vault_EI_bm, itr_prefix=itr_prefix\n",
+ " engine_dev,\n",
+ " vault_company_data,\n",
+ " vault_production_bm,\n",
+ " vault_EI_bm,\n",
+ " itr_prefix=itr_prefix,\n",
")"
]
},
@@ -693,9 +700,17 @@
"outputs": [],
"source": [
"df = requantify_df(\n",
- " sql_df.dropna(), typemap={\"co2_s1_by_year\": \"Mt CO2\", \"co2_s2_by_year\": \"Mt CO2\", \"co2_s3_by_year\": \"Mt CO2\"}\n",
+ " sql_df.dropna(),\n",
+ " typemap={\n",
+ " \"co2_s1_by_year\": \"Mt CO2\",\n",
+ " \"co2_s2_by_year\": \"Mt CO2\",\n",
+ " \"co2_s3_by_year\": \"Mt CO2\",\n",
+ " },\n",
").convert_dtypes()\n",
- "df = df[df.company_id.ne(\"US6362744095+Gas Utilities\") & df.company_id.ne(\"US0236081024+Gas Utilities\")]\n",
+ "df = df[\n",
+ " df.company_id.ne(\"US6362744095+Gas Utilities\")\n",
+ " & df.company_id.ne(\"US0236081024+Gas Utilities\")\n",
+ "]\n",
"df = df[df.co2_s1_by_year.gt(ureg(\"10.0 Mt CO2e\"))]"
]
},
@@ -1009,7 +1024,9 @@
],
"source": [
"ureg.setup_matplotlib(True)\n",
- "plottable_df = df.pivot(index=\"year\", columns=\"company_name\", values=\"co2_s1_by_year\").reset_index()\n",
+ "plottable_df = df.pivot(\n",
+ " index=\"year\", columns=\"company_name\", values=\"co2_s1_by_year\"\n",
+ ").reset_index()\n",
"# Must plot the first few columns, but then plot 1/3rd of the companies so as not to over-clutter the graph\n",
"plottable_df.plot(x=\"year\", kind=\"line\", figsize=(24, 10))"
]
diff --git a/notebooks/03_vault_quant.ipynb b/notebooks/03_vault_quant.ipynb
index 20c145b..86cbf16 100644
--- a/notebooks/03_vault_quant.ipynb
+++ b/notebooks/03_vault_quant.ipynb
@@ -35,16 +35,11 @@
"outputs": [],
"source": [
"import os\n",
- "import pathlib\n",
"import osc_ingest_trino as osc\n",
"import re\n",
- "import trino\n",
- "from sqlalchemy.engine import create_engine\n",
"\n",
- "import json\n",
"import numpy as np\n",
"import pandas as pd\n",
- "import ITR\n",
"from ITR_examples import data_dir as xlsx_data_dir\n",
"\n",
"# from ITR.portfolio_aggregation import PortfolioAggregationMethod\n",
@@ -53,16 +48,14 @@
"# from ITR.data.data_warehouse import DataWarehouse\n",
"from ITR.data.vault_providers import (\n",
" VaultCompanyDataProvider,\n",
- " VaultProviderProductionBenchmark,\n",
- " VaultProviderIntensityBenchmark,\n",
" DataVaultWarehouse,\n",
" requantify_df,\n",
")\n",
"\n",
"# from ITR.interfaces import ICompanyData, EScope, ETimeFrames, PortfolioCompany, IEIBenchmarkScopes, \\\n",
"# IProductionBenchmarkScopes\n",
- "from ITR.interfaces import EScope, IProductionBenchmarkScopes, IEIBenchmarkScopes\n",
- "from ITR.data.osc_units import ureg, Q_, PA_\n",
+ "from ITR.interfaces import EScope\n",
+ "from ITR.data.osc_units import ureg\n",
"\n",
"ingest_catalog = \"osc_datacommons_dev\"\n",
"ingest_schema = \"demo_dv\"\n",
@@ -151,7 +144,9 @@
}
],
"source": [
- "engine_quant = osc.attach_trino_engine(verbose=True, catalog=ingest_catalog, schema=ingest_schema)\n",
+ "engine_quant = osc.attach_trino_engine(\n",
+ " verbose=True, catalog=ingest_catalog, schema=ingest_schema\n",
+ ")\n",
"print(\"connecting with engine \" + str(engine_quant))\n",
"osc._do_sql(f\"show tables in {ingest_schema}\", engine_quant, verbose=True)"
]
@@ -259,7 +254,10 @@
")\n",
"\n",
"vault_warehouse.quant_init(\n",
- " engine_quant, company_data=vault_company_data, ingest_schema=\"demo_dv\", itr_prefix=itr_prefix\n",
+ " engine_quant,\n",
+ " company_data=vault_company_data,\n",
+ " ingest_schema=\"demo_dv\",\n",
+ " itr_prefix=itr_prefix,\n",
")"
]
},
@@ -306,9 +304,9 @@
}
],
"source": [
- "sql_temp_score_df = pd.read_sql_table(f\"{itr_prefix}temperature_scores\", engine_quant).replace(\n",
- " [np.inf, -np.inf], np.nan\n",
- ")"
+ "sql_temp_score_df = pd.read_sql_table(\n",
+ " f\"{itr_prefix}temperature_scores\", engine_quant\n",
+ ").replace([np.inf, -np.inf], np.nan)"
]
},
{
@@ -500,7 +498,11 @@
],
"source": [
"temp_score_df = requantify_df(\n",
- " sql_temp_score_df, typemap={\"trajectory_temperature_score\": \"delta_degC\", \"target_temperature_score\": \"delta_degC\"}\n",
+ " sql_temp_score_df,\n",
+ " typemap={\n",
+ " \"trajectory_temperature_score\": \"delta_degC\",\n",
+ " \"target_temperature_score\": \"delta_degC\",\n",
+ " },\n",
")\n",
"temp_score_df = temp_score_df[\n",
" ~temp_score_df.company_id.isin(\n",
@@ -526,7 +528,12 @@
"plottable_df = (\n",
" temp_score_df.loc[\n",
" temp_score_df.year == 2050,\n",
- " [\"company_name\", \"scope\", \"trajectory_temperature_score\", \"target_temperature_score\"],\n",
+ " [\n",
+ " \"company_name\",\n",
+ " \"scope\",\n",
+ " \"trajectory_temperature_score\",\n",
+ " \"target_temperature_score\",\n",
+ " ],\n",
" ]\n",
" .sort_values(by=\"company_name\")\n",
" .set_index([\"company_name\", \"scope\"])\n",
@@ -541,7 +548,8 @@
"outputs": [],
"source": [
"plottable_df[\"average_score\"] = (\n",
- " plottable_df[\"trajectory_temperature_score\"] + plottable_df[\"target_temperature_score\"]\n",
+ " plottable_df[\"trajectory_temperature_score\"]\n",
+ " + plottable_df[\"target_temperature_score\"]\n",
") / 2.0"
]
},
@@ -730,13 +738,17 @@
"outputs": [],
"source": [
"# loading sample portfolio\n",
- "df_portfolio = pd.read_excel(company_data_path, sheet_name=\"Portfolio\", index_col=\"company_id\")\n",
+ "df_portfolio = pd.read_excel(\n",
+ " company_data_path, sheet_name=\"Portfolio\", index_col=\"company_id\"\n",
+ ")\n",
"\n",
"for i, col in enumerate(df_portfolio.columns):\n",
" if col.startswith(\"investment_value\"):\n",
" if match := re.match(r\".*\\[([A-Z]{3})\\]\", col, re.I):\n",
" df_portfolio.rename(columns={col: \"investment_value\"}, inplace=True)\n",
- " df_portfolio[\"investment_value\"] = df_portfolio[\"investment_value\"].astype(f\"pint[{match.group(1)}]\")\n",
+ " df_portfolio[\"investment_value\"] = df_portfolio[\"investment_value\"].astype(\n",
+ " f\"pint[{match.group(1)}]\"\n",
+ " )\n",
"df_portfolio = df_portfolio.assign(scope=\"S1S2\").set_index(\"scope\", append=True)"
]
},
@@ -816,7 +828,9 @@
"metadata": {},
"outputs": [],
"source": [
- "portfolio_df[\"WATS_weight\"] = portfolio_df[\"pa_score\"] * (portfolio_df[\"investment_value\"] / weight_for_WATS)\n",
+ "portfolio_df[\"WATS_weight\"] = portfolio_df[\"pa_score\"] * (\n",
+ " portfolio_df[\"investment_value\"] / weight_for_WATS\n",
+ ")\n",
"portfolio_df.head()"
]
},
@@ -827,7 +841,9 @@
"metadata": {},
"outputs": [],
"source": [
- "print(f\"Portfolio temperature score based on WATS = {portfolio_df['WATS_weight'].sum()}\")"
+ "print(\n",
+ " f\"Portfolio temperature score based on WATS = {portfolio_df['WATS_weight'].sum()}\"\n",
+ ")"
]
},
{
@@ -860,7 +876,9 @@
"metadata": {},
"outputs": [],
"source": [
- "print(f\"Portfolio temperature score based on TETS = {portfolio_df['TETS_weight'].sum()}\")"
+ "print(\n",
+ " f\"Portfolio temperature score based on TETS = {portfolio_df['TETS_weight'].sum()}\"\n",
+ ")"
]
},
{
@@ -907,7 +925,9 @@
" portfolio_df[weight_column] = vault_company_data.compute_portfolio_weights(\n",
" portfolio_df[\"pa_score\"], 2019, v, EScope.S1S2\n",
" )\n",
- " print(f\"Portfolio temperature score based on {k} = {portfolio_df[weight_column].sum()}\")\n",
+ " print(\n",
+ " f\"Portfolio temperature score based on {k} = {portfolio_df[weight_column].sum()}\"\n",
+ " )\n",
"\n",
"portfolio_df"
]
@@ -937,7 +957,11 @@
"metadata": {},
"outputs": [],
"source": [
- "osc._do_sql(f\"select * from {ingest_schema}.{itr_prefix}company_data\", engine_quant, verbose=False)"
+ "osc._do_sql(\n",
+ " f\"select * from {ingest_schema}.{itr_prefix}company_data\",\n",
+ " engine_quant,\n",
+ " verbose=False,\n",
+ ")"
]
},
{
diff --git a/notebooks/04_vault_user.ipynb b/notebooks/04_vault_user.ipynb
index b4eacc7..5b2ace6 100644
--- a/notebooks/04_vault_user.ipynb
+++ b/notebooks/04_vault_user.ipynb
@@ -44,7 +44,9 @@
"# Load some standard environment variables from a dot-env file, if it exists.\n",
"# If no such file can be found, does not fail, and so allows these environment vars to\n",
"# be populated in some other way\n",
- "dotenv_dir = os.environ.get(\"CREDENTIAL_DOTENV_DIR\", os.environ.get(\"PWD\", \"/opt/app-root/src\"))\n",
+ "dotenv_dir = os.environ.get(\n",
+ " \"CREDENTIAL_DOTENV_DIR\", os.environ.get(\"PWD\", \"/opt/app-root/src\")\n",
+ ")\n",
"dotenv_path = pathlib.Path(dotenv_dir) / \"credentials.env\"\n",
"if os.path.exists(dotenv_path):\n",
" load_dotenv(dotenv_path=dotenv_path, override=True)"
@@ -83,10 +85,7 @@
}
],
"source": [
- "import json\n",
"import pandas as pd\n",
- "from numpy.testing import assert_array_equal\n",
- "import ITR\n",
"\n",
"# from ITR.portfolio_aggregation import PortfolioAggregationMethod\n",
"# from ITR.temperature_score import TemperatureScore\n",
@@ -157,7 +156,9 @@
],
"source": [
"sqlstring = \"trino://{user}@{host}:{port}/\".format(\n",
- " user=os.environ[\"TRINO_USER_USER3\"], host=os.environ[\"TRINO_HOST\"], port=os.environ[\"TRINO_PORT\"]\n",
+ " user=os.environ[\"TRINO_USER_USER3\"],\n",
+ " host=os.environ[\"TRINO_HOST\"],\n",
+ " port=os.environ[\"TRINO_PORT\"],\n",
")\n",
"\n",
"ingest_catalog = \"osc_datacommons_dev\"\n",
@@ -400,7 +401,12 @@
}
],
"source": [
- "portfolio_df = pd.read_csv(\"data/mdt-20220116-portfolio.csv\", encoding=\"iso-8859-1\", sep=\";\", index_col=\"company_id\")\n",
+ "portfolio_df = pd.read_csv(\n",
+ " \"data/mdt-20220116-portfolio.csv\",\n",
+ " encoding=\"iso-8859-1\",\n",
+ " sep=\";\",\n",
+ " index_col=\"company_id\",\n",
+ ")\n",
"# portfolio_df = pd.read_csv(\"data/rmi_all.csv\", encoding=\"iso-8859-1\", sep=',', index_col='company_id')\n",
"portfolio_df"
]
@@ -969,7 +975,9 @@
}
],
"source": [
- "portfolio_df[\"WATS_weight\"] = portfolio_df[\"pa_score\"] * (portfolio_df[\"investment_value\"] / weight_for_WATS)\n",
+ "portfolio_df[\"WATS_weight\"] = portfolio_df[\"pa_score\"] * (\n",
+ " portfolio_df[\"investment_value\"] / weight_for_WATS\n",
+ ")\n",
"portfolio_df.head()"
]
},
@@ -988,7 +996,9 @@
}
],
"source": [
- "print(f\"Portfolio temperature score based on WATS = {portfolio_df['WATS_weight'].sum()}\")"
+ "print(\n",
+ " f\"Portfolio temperature score based on WATS = {portfolio_df['WATS_weight'].sum()}\"\n",
+ ")"
]
},
{
@@ -1148,7 +1158,9 @@
}
],
"source": [
- "print(f\"Portfolio temperature score based on TETS = {portfolio_df['TETS_weight'].sum()}\")"
+ "print(\n",
+ " f\"Portfolio temperature score based on TETS = {portfolio_df['TETS_weight'].sum()}\"\n",
+ ")"
]
},
{
@@ -1993,7 +2005,9 @@
" portfolio_df[weight_column] = vault_company_data.compute_portfolio_weights(\n",
" portfolio_df[\"pa_score\"], 2019, v, EScope.S1S2\n",
" )\n",
- " print(f\"Portfolio temperature score based on {k} = {portfolio_df[weight_column].sum()}\")\n",
+ " print(\n",
+ " f\"Portfolio temperature score based on {k} = {portfolio_df[weight_column].sum()}\"\n",
+ " )\n",
"\n",
"portfolio_df"
]
@@ -2146,7 +2160,11 @@
}
],
"source": [
- "osc._do_sql(f\"select * from {ingest_schema}.{itr_prefix}company_data\", engine_user, verbose=False)"
+ "osc._do_sql(\n",
+ " f\"select * from {ingest_schema}.{itr_prefix}company_data\",\n",
+ " engine_user,\n",
+ " verbose=False,\n",
+ ")"
]
},
{
diff --git a/notebooks/co2budget.ipynb b/notebooks/co2budget.ipynb
index 8078735..507cfe4 100644
--- a/notebooks/co2budget.ipynb
+++ b/notebooks/co2budget.ipynb
@@ -24,12 +24,9 @@
"outputs": [],
"source": [
"import os\n",
- "import sys\n",
"import json\n",
- "import argparse\n",
"import pandas as pd\n",
- "import plotly.express as px\n",
- "import plotly.graph_objects as go"
+ "import plotly.express as px"
]
},
{
@@ -41,8 +38,16 @@
"source": [
"import ITR\n",
"from ITR import data_dir\n",
- "from ITR.data.base_providers import BaseProviderProductionBenchmark, BaseProviderIntensityBenchmark\n",
- "from ITR.interfaces import ETimeFrames, EScope, IProductionBenchmarkScopes, IEIBenchmarkScopes, DF_ICompanyEIProjections\n",
+ "from ITR.data.base_providers import (\n",
+ " BaseProviderProductionBenchmark,\n",
+ " BaseProviderIntensityBenchmark,\n",
+ ")\n",
+ "from ITR.interfaces import (\n",
+ " EScope,\n",
+ " IProductionBenchmarkScopes,\n",
+ " IEIBenchmarkScopes,\n",
+ " DF_ICompanyEIProjections,\n",
+ ")\n",
"from ITR.data.template import TemplateProviderCompany\n",
"from ITR.data.data_warehouse import DataWarehouse\n",
"from ITR.data.osc_units import PA_"
@@ -118,7 +123,9 @@
"metadata": {},
"outputs": [],
"source": [
- "template_data_path = os.path.abspath(\"../src/ITR_examples/data/20230106 ITR V2 Sample Data.xlsx\")\n",
+ "template_data_path = os.path.abspath(\n",
+ " \"../src/ITR_examples/data/20230106 ITR V2 Sample Data.xlsx\"\n",
+ ")\n",
"# template_data_path = \"data/20220927 ITR Tool Sample Data.xlsx\"\n",
"\n",
"# Remove the # and space on the next line to point the template_data_path variable at your own data\n",
@@ -135,7 +142,10 @@
"outputs": [],
"source": [
"template_provider = DataWarehouse(\n",
- " template_company_data, production_bm, intensity_bm, estimate_missing_data=DataWarehouse.estimate_missing_s3_data\n",
+ " template_company_data,\n",
+ " production_bm,\n",
+ " intensity_bm,\n",
+ " estimate_missing_data=DataWarehouse.estimate_missing_s3_data,\n",
")\n",
"\n",
"# Fills in template_company_data._companies[0].projected_targets.S1S2\n",
@@ -155,7 +165,12 @@
"outputs": [],
"source": [
"data, idx = zip(\n",
- " *[(i, (bm.sector, bm.region)) for i, bm in enumerate(production_bm._productions_benchmarks.AnyScope.benchmarks)]\n",
+ " *[\n",
+ " (i, (bm.sector, bm.region))\n",
+ " for i, bm in enumerate(\n",
+ " production_bm._productions_benchmarks.AnyScope.benchmarks\n",
+ " )\n",
+ " ]\n",
")\n",
"production_bm_mapper = pd.Series(data, idx)\n",
"\n",
@@ -171,7 +186,9 @@
" data, idx = zip(\n",
" *[\n",
" (i, (bm.sector, bm.region))\n",
- " for i, bm in enumerate(intensity_bm._EI_benchmarks[scope.name].benchmarks)\n",
+ " for i, bm in enumerate(\n",
+ " intensity_bm._EI_benchmarks[scope.name].benchmarks\n",
+ " )\n",
" ]\n",
" )\n",
" mapper_dict[scope.name] = pd.Series(data, idx)\n",
@@ -283,8 +300,12 @@
" models_dict[sector_region_idx] = f\"{model.sector} in {region}\"\n",
"\n",
"for k, v in models_dict.items():\n",
- " sector_prod_baseline = production_bm._productions_benchmarks.AnyScope.benchmarks[k].base_year_production\n",
- " print(f\"setting sector_prod_baseline (total units of output) for {v} to {sector_prod_baseline}\")"
+ " sector_prod_baseline = production_bm._productions_benchmarks.AnyScope.benchmarks[\n",
+ " k\n",
+ " ].base_year_production\n",
+ " print(\n",
+ " f\"setting sector_prod_baseline (total units of output) for {v} to {sector_prod_baseline}\"\n",
+ " )"
]
},
{
@@ -303,10 +324,17 @@
" elif (model.sector, \"Global\") in bm_mapper.index:\n",
" ei_sector_region_idx = bm_mapper.loc[sector, \"Global\"]\n",
" ei_data, ei_idx = zip(\n",
- " *[(ei.value, ei.year) for ei in scoped_bm.benchmarks[ei_sector_region_idx].projections_nounits]\n",
+ " *[\n",
+ " (ei.value, ei.year)\n",
+ " for ei in scoped_bm.benchmarks[ei_sector_region_idx].projections_nounits\n",
+ " ]\n",
" )\n",
" sector_ei = pd.Series(\n",
- " PA_(ei_data, dtype=scoped_bm.benchmarks[ei_sector_region_idx].benchmark_metric), index=ei_idx\n",
+ " PA_(\n",
+ " ei_data,\n",
+ " dtype=scoped_bm.benchmarks[ei_sector_region_idx].benchmark_metric,\n",
+ " ),\n",
+ " index=ei_idx,\n",
" )\n",
" else:\n",
" sector_ei = None\n",
@@ -351,7 +379,7 @@
" return model_ei.S3.projections\n",
" else:\n",
" # No sense trying to print `model_ei` as the __str__ method will make it an empty DataFrame (aggregation of empty Series)\n",
- " raise ValueError(f\"get_ei_projections: no valid scope found\")\n",
+ " raise ValueError(\"get_ei_projections: no valid scope found\")\n",
" return get_ei_projections_from_ICompanyEIProjections(model_ei)"
]
},
@@ -373,41 +401,60 @@
" prod_data, prod_idx = zip(\n",
" *[\n",
" (p.value, p.year)\n",
- " for p in production_bm._productions_benchmarks.AnyScope.benchmarks[sector_region_idx].projections_nounits\n",
+ " for p in production_bm._productions_benchmarks.AnyScope.benchmarks[\n",
+ " sector_region_idx\n",
+ " ].projections_nounits\n",
" ]\n",
" )\n",
" sector_production = pd.Series(prod_data, prod_idx)\n",
"\n",
" if ei_s1_bm_mapper is not None:\n",
- " sector_ei_s1 = get_ei_scope_by_sector_region(\"S1\", model.sector, region, ei_s1_bm_mapper)\n",
+ " sector_ei_s1 = get_ei_scope_by_sector_region(\n",
+ " \"S1\", model.sector, region, ei_s1_bm_mapper\n",
+ " )\n",
" else:\n",
" sector_ei_s1 = None\n",
" if ei_s1s2_bm_mapper is not None:\n",
- " sector_ei_s1s2 = get_ei_scope_by_sector_region(\"S1S2\", model.sector, region, ei_s1s2_bm_mapper)\n",
+ " sector_ei_s1s2 = get_ei_scope_by_sector_region(\n",
+ " \"S1S2\", model.sector, region, ei_s1s2_bm_mapper\n",
+ " )\n",
" else:\n",
" sector_ei_s1s2 = None\n",
" if ei_s3_bm_mapper is not None:\n",
- " sector_ei_s3 = get_ei_scope_by_sector_region(\"S3\", model.sector, region, ei_s3_bm_mapper)\n",
+ " sector_ei_s3 = get_ei_scope_by_sector_region(\n",
+ " \"S3\", model.sector, region, ei_s3_bm_mapper\n",
+ " )\n",
" else:\n",
" sector_ei_s3 = None\n",
" if ei_s1s2s3_bm_mapper is not None:\n",
- " sector_ei_s1s2s3 = get_ei_scope_by_sector_region(\"S1S2S3\", model.sector, region, ei_s1s2s3_bm_mapper)\n",
+ " sector_ei_s1s2s3 = get_ei_scope_by_sector_region(\n",
+ " \"S1S2S3\", model.sector, region, ei_s1s2s3_bm_mapper\n",
+ " )\n",
" else:\n",
" sector_ei_s1s2s3 = None\n",
" sector_growth_partial = sector_production.add(1).cumprod()\n",
- " data, idx = zip(*[(p.value.m, p.year) for p in model.historic_data.productions if p.year in [2019, 2020]])\n",
+ " data, idx = zip(\n",
+ " *[\n",
+ " (p.value.m, p.year)\n",
+ " for p in model.historic_data.productions\n",
+ " if p.year in [2019, 2020]\n",
+ " ]\n",
+ " )\n",
" co_historic_productions = pd.Series(data, idx)\n",
"\n",
- " co_projected_productions = co_historic_productions[2020] * sector_growth_partial[sector_growth_partial.index > 2020]\n",
- "\n",
- " co_productions = pd.concat([co_historic_productions, co_projected_productions]).astype(\n",
- " f\"pint[{model.production_metric}]\"\n",
+ " co_projected_productions = (\n",
+ " co_historic_productions[2020]\n",
+ " * sector_growth_partial[sector_growth_partial.index > 2020]\n",
" )\n",
"\n",
+ " co_productions = pd.concat(\n",
+ " [co_historic_productions, co_projected_productions]\n",
+ " ).astype(f\"pint[{model.production_metric}]\")\n",
+ "\n",
" co_ei_trajectory = get_ei_projections(model.projected_intensities)\n",
" try:\n",
" co_ei_target = get_ei_projections(model.projected_targets)\n",
- " except ValueError as e:\n",
+ " except ValueError:\n",
" # print(e)\n",
" print(\n",
" f\"model.projected_targets is empty for company {model.company_name}; company_id = {model.company_id}; index = {i}\"\n",
@@ -415,7 +462,9 @@
" continue\n",
"\n",
" plot_dict = {\n",
- " \"Trajectory\": (co_productions * co_ei_trajectory).pint.to(\"t CO2e\").pint.m.cumsum(),\n",
+ " \"Trajectory\": (co_productions * co_ei_trajectory)\n",
+ " .pint.to(\"t CO2e\")\n",
+ " .pint.m.cumsum(),\n",
" \"Target\": (co_productions * co_ei_target).pint.to(\"t CO2e\").pint.m.cumsum(),\n",
" }\n",
" if model.scope == EScope.S1:\n",
@@ -444,13 +493,22 @@
" continue\n",
" else:\n",
" continue\n",
- " plot_dict[bm_key] = (sector_growth_partial * sector_ei).mul(co_productions[2019]).pint.to(\"t CO2e\").pint.m.cumsum()\n",
+ " plot_dict[bm_key] = (\n",
+ " (sector_growth_partial * sector_ei)\n",
+ " .mul(co_productions[2019])\n",
+ " .pint.to(\"t CO2e\")\n",
+ " .pint.m.cumsum()\n",
+ " )\n",
" sector_df = pd.DataFrame(plot_dict)\n",
" fig = px.line(\n",
" sector_df.apply(ITR.nominal_values),\n",
" y=[k for k in plot_dict.keys()],\n",
" title=fig_title,\n",
- " labels={\"index\": \"Year\", \"value\": \"t CO2\", \"variable\": f\"{model.company_name}
{model.company_id}\"},\n",
+ " labels={\n",
+ " \"index\": \"Year\",\n",
+ " \"value\": \"t CO2\",\n",
+ " \"variable\": f\"{model.company_name}
{model.company_id}\",\n",
+ " },\n",
" )\n",
" fig.write_image(f\"{fig_title}-images/co2_bm_{i}.jpeg\")\n",
" fig.show"
diff --git a/notebooks/quick_temp_score_calculation.ipynb b/notebooks/quick_temp_score_calculation.ipynb
index 3775800..3da1bb3 100644
--- a/notebooks/quick_temp_score_calculation.ipynb
+++ b/notebooks/quick_temp_score_calculation.ipynb
@@ -78,19 +78,21 @@
"import pandas as pd\n",
"\n",
"import ITR\n",
- "from ITR.data.excel import ExcelProviderCompany, ExcelProviderProductionBenchmark, ExcelProviderIntensityBenchmark\n",
+ "from ITR.data.excel import (\n",
+ " ExcelProviderCompany,\n",
+ " ExcelProviderProductionBenchmark,\n",
+ " ExcelProviderIntensityBenchmark,\n",
+ ")\n",
"from ITR.data.data_warehouse import DataWarehouse\n",
"from ITR.portfolio_aggregation import PortfolioAggregationMethod\n",
"from ITR.temperature_score import TemperatureScore\n",
"from ITR.interfaces import ETimeFrames, EScope\n",
- "from ITR.data.osc_units import ureg, Q_, PA_, asPintDataFrame, requantify_df_from_columns\n",
+ "from ITR.data.osc_units import ureg, Q_, asPintDataFrame, requantify_df_from_columns\n",
"\n",
"from ITR_examples.utils import (\n",
" collect_company_contributions,\n",
" plot_grouped_statistics,\n",
- " anonymize,\n",
" plot_grouped_heatmap,\n",
- " print_grouped_scores,\n",
" get_contributions_per_group,\n",
")"
]
@@ -220,7 +222,9 @@
"metadata": {},
"outputs": [],
"source": [
- "excel_production_bm = ExcelProviderProductionBenchmark(excel_path=\"data/benchmark_OECM_S3.xlsx\")"
+ "excel_production_bm = ExcelProviderProductionBenchmark(\n",
+ " excel_path=\"data/benchmark_OECM_S3.xlsx\"\n",
+ ")"
]
},
{
@@ -274,7 +278,9 @@
"metadata": {},
"outputs": [],
"source": [
- "df_portfolio = requantify_df_from_columns(pd.read_csv(\"data/example_portfolio.csv\", encoding=\"iso-8859-1\", sep=\";\"))"
+ "df_portfolio = requantify_df_from_columns(\n",
+ " pd.read_csv(\"data/example_portfolio.csv\", encoding=\"iso-8859-1\", sep=\";\")\n",
+ ")"
]
},
{
@@ -413,7 +419,9 @@
" scopes=[EScope.S1S2],\n",
" aggregation_method=PortfolioAggregationMethod.WATS, # Options for the aggregation method are WATS, TETS, AOTS, MOTS, EOTS, ECOTS, and ROTS.\n",
")\n",
- "amended_portfolio = temperature_score.calculate(data_warehouse=excel_provider, portfolio=companies)"
+ "amended_portfolio = temperature_score.calculate(\n",
+ " data_warehouse=excel_provider, portfolio=companies\n",
+ ")"
]
},
{
@@ -631,7 +639,9 @@
"source": [
"grouping = [\"sector\", \"region\"]\n",
"temperature_score.grouping = grouping\n",
- "grouped_portfolio = temperature_score.calculate(data_warehouse=excel_provider, portfolio=companies)\n",
+ "grouped_portfolio = temperature_score.calculate(\n",
+ " data_warehouse=excel_provider, portfolio=companies\n",
+ ")\n",
"grouped_aggregations = temperature_score.aggregate_scores(grouped_portfolio)"
]
},
@@ -833,7 +843,9 @@
"region = \"Asia\"\n",
"sector = \"Steel\"\n",
"group = sector + \"-\" + region\n",
- "group_contributions = get_contributions_per_group(grouped_aggregations, analysis_parameters, group)\n",
+ "group_contributions = get_contributions_per_group(\n",
+ " grouped_aggregations, analysis_parameters, group\n",
+ ")\n",
"group_contributions.round(2)"
]
},
@@ -880,10 +892,16 @@
"grouping = [\"sector\"]\n",
"analysis_parameters = (time_frames, scopes, grouping)\n",
"\n",
- "temperature_score = TemperatureScore(time_frames=time_frames, scopes=scopes, grouping=grouping)\n",
- "amended_portfolio = temperature_score.calculate(data_warehouse=excel_provider, portfolio=companies)\n",
+ "temperature_score = TemperatureScore(\n",
+ " time_frames=time_frames, scopes=scopes, grouping=grouping\n",
+ ")\n",
+ "amended_portfolio = temperature_score.calculate(\n",
+ " data_warehouse=excel_provider, portfolio=companies\n",
+ ")\n",
"aggregated_portfolio = temperature_score.aggregate_scores(amended_portfolio)\n",
- "company_contributions = collect_company_contributions(aggregated_portfolio, amended_portfolio, analysis_parameters)"
+ "company_contributions = collect_company_contributions(\n",
+ " aggregated_portfolio, amended_portfolio, analysis_parameters\n",
+ ")"
]
},
{
@@ -904,7 +922,9 @@
],
"source": [
"plot_grouped_statistics(\n",
- " aggregated_portfolio, company_contributions.pint.dequantify().droplevel(level=1, axis=1), analysis_parameters\n",
+ " aggregated_portfolio,\n",
+ " company_contributions.pint.dequantify().droplevel(level=1, axis=1),\n",
+ " analysis_parameters,\n",
")"
]
},
@@ -1090,9 +1110,18 @@
],
"source": [
"sector_contributions = company_contributions[\n",
- " [\"company_name\", \"sector\", \"contribution\", \"temperature_score\", \"ownership_percentage\", \"portfolio_percentage\"]\n",
+ " [\n",
+ " \"company_name\",\n",
+ " \"sector\",\n",
+ " \"contribution\",\n",
+ " \"temperature_score\",\n",
+ " \"ownership_percentage\",\n",
+ " \"portfolio_percentage\",\n",
+ " ]\n",
"]\n",
- "asPintDataFrame(sector_contributions[sector_contributions[\"sector\"] == \"Steel\"][:10]).pint.dequantify().round(2)"
+ "asPintDataFrame(\n",
+ " sector_contributions[sector_contributions[\"sector\"] == \"Steel\"][:10]\n",
+ ").pint.dequantify().round(2)"
]
},
{
@@ -1132,7 +1161,9 @@
"outputs": [],
"source": [
"data_dump_filename = \"data_dump_dequantified.xlsx\"\n",
- "amended_portfolio.set_index([\"company_name\"], append=True).pint.dequantify().to_excel(data_dump_filename)"
+ "amended_portfolio.set_index([\"company_name\"], append=True).pint.dequantify().to_excel(\n",
+ " data_dump_filename\n",
+ ")"
]
},
{
diff --git a/notebooks/quick_template_score_calc.ipynb b/notebooks/quick_template_score_calc.ipynb
index 9cc041f..a14c578 100644
--- a/notebooks/quick_template_score_calc.ipynb
+++ b/notebooks/quick_template_score_calc.ipynb
@@ -45,11 +45,9 @@
"outputs": [],
"source": [
"import os\n",
- "import sys\n",
"import warnings\n",
"\n",
"import json\n",
- "import numpy as np\n",
"import pandas as pd"
]
},
@@ -65,15 +63,22 @@
"\n",
"import ITR\n",
"from ITR import data_dir\n",
- "from ITR.data.excel import ExcelProviderProductionBenchmark, ExcelProviderIntensityBenchmark\n",
"from ITR.data.template import TemplateProviderCompany\n",
- "from ITR.data.base_providers import BaseProviderProductionBenchmark, BaseProviderIntensityBenchmark\n",
+ "from ITR.data.base_providers import (\n",
+ " BaseProviderProductionBenchmark,\n",
+ " BaseProviderIntensityBenchmark,\n",
+ ")\n",
"from ITR.data.data_warehouse import DataWarehouse\n",
"from ITR.portfolio_aggregation import PortfolioAggregationMethod\n",
"from ITR.temperature_score import TemperatureScore\n",
- "from ITR.interfaces import ETimeFrames, EScope, IProductionBenchmarkScopes, IEIBenchmarkScopes\n",
+ "from ITR.interfaces import (\n",
+ " ETimeFrames,\n",
+ " EScope,\n",
+ " IProductionBenchmarkScopes,\n",
+ " IEIBenchmarkScopes,\n",
+ ")\n",
"\n",
- "from ITR.data.osc_units import ureg, Q_, PA_, asPintDataFrame, requantify_df_from_columns\n",
+ "from ITR.data.osc_units import ureg, Q_, asPintDataFrame, requantify_df_from_columns\n",
"\n",
"from ITR_examples.utils import (\n",
" plot_grouped_heatmap,\n",
@@ -136,7 +141,9 @@
"benchmark_EI_OECM_PC = os.path.join(data_dir, \"benchmark_EI_OECM_PC.json\")\n",
"benchmark_EI_OECM_S3 = os.path.join(data_dir, \"benchmark_EI_OECM_S3.json\")\n",
"benchmark_EI_TPI = os.path.join(data_dir, \"benchmark_EI_TPI_2_degrees.json\")\n",
- "benchmark_EI_TPI_below_2 = os.path.join(data_dir, \"benchmark_EI_TPI_below_2_degrees.json\")\n",
+ "benchmark_EI_TPI_below_2 = os.path.join(\n",
+ " data_dir, \"benchmark_EI_TPI_below_2_degrees.json\"\n",
+ ")\n",
"\n",
"# load production benchmarks\n",
"with open(benchmark_prod_json) as json_file:\n",
@@ -193,7 +200,9 @@
"metadata": {},
"outputs": [],
"source": [
- "template_data_path = os.path.abspath(\"../src/ITR_examples/data/20220927 ITR V2 Sample Data.xlsx\")"
+ "template_data_path = os.path.abspath(\n",
+ " \"../src/ITR_examples/data/20220927 ITR V2 Sample Data.xlsx\"\n",
+ ")"
]
},
{
@@ -562,7 +571,9 @@
}
],
"source": [
- "df_portfolio = requantify_df_from_columns(pd.read_excel(template_data_path, sheet_name=\"Portfolio\"))\n",
+ "df_portfolio = requantify_df_from_columns(\n",
+ " pd.read_excel(template_data_path, sheet_name=\"Portfolio\")\n",
+ ")\n",
"display(df_portfolio.tail())"
]
},
@@ -611,7 +622,9 @@
" scopes=[EScope.S1S2S3],\n",
" aggregation_method=PortfolioAggregationMethod.WATS, # Options for the aggregation method are WATS, TETS, AOTS, MOTS, EOTS, ECOTS, and ROTS.\n",
")\n",
- "enhanced_portfolio = temperature_score_s1s2s3.calculate(data_warehouse=template_provider, portfolio=companies)"
+ "enhanced_portfolio = temperature_score_s1s2s3.calculate(\n",
+ " data_warehouse=template_provider, portfolio=companies\n",
+ ")"
]
},
{
@@ -782,7 +795,9 @@
"source": [
"with warnings.catch_warnings():\n",
" warnings.simplefilter(\"ignore\")\n",
- " display(enhanced_portfolio[[\"company_name\", \"time_frame\", \"scope\", \"temperature_score\"]])"
+ " display(\n",
+ " enhanced_portfolio[[\"company_name\", \"time_frame\", \"scope\", \"temperature_score\"]]\n",
+ " )"
]
},
{
@@ -1635,7 +1650,8 @@
],
"source": [
"data = enhanced_portfolio[\n",
- " (enhanced_portfolio.scope == EScope.S1S2S3) & (enhanced_portfolio.temperature_score > Q_(3, \"delta_degC\"))\n",
+ " (enhanced_portfolio.scope == EScope.S1S2S3)\n",
+ " & (enhanced_portfolio.temperature_score > Q_(3, \"delta_degC\"))\n",
"]\n",
"data"
]
@@ -1665,7 +1681,9 @@
],
"source": [
"aggregated_scores = temperature_score_s1s2s3.aggregate_scores(enhanced_portfolio)\n",
- "print(f\"Temperature Score aggregation method = {temperature_score_s1s2s3.aggregation_method}\")"
+ "print(\n",
+ " f\"Temperature Score aggregation method = {temperature_score_s1s2s3.aggregation_method}\"\n",
+ ")"
]
},
{
@@ -1735,7 +1753,9 @@
"source": [
"grouping = [\"sector\", \"region\"]\n",
"temperature_score_s1s2s3.grouping = grouping\n",
- "grouped_portfolio = temperature_score_s1s2s3.calculate(data_warehouse=template_provider, portfolio=companies)\n",
+ "grouped_portfolio = temperature_score_s1s2s3.calculate(\n",
+ " data_warehouse=template_provider, portfolio=companies\n",
+ ")\n",
"grouped_aggregations = temperature_score_s1s2s3.aggregate_scores(grouped_portfolio)"
]
},
@@ -1848,7 +1868,9 @@
"sector = \"Steel\"\n",
"group = sector + \"-\" + region\n",
"analysis_parameters = ([ETimeFrames.LONG], [EScope.S1S2S3], grouping)\n",
- "group_contributions = get_contributions_per_group(grouped_aggregations, analysis_parameters, group)\n",
+ "group_contributions = get_contributions_per_group(\n",
+ " grouped_aggregations, analysis_parameters, group\n",
+ ")\n",
"group_contributions.round(2)"
]
},
@@ -1893,13 +1915,20 @@
"analysis_parameters = (time_frames, scopes, grouping)\n",
"\n",
"temperature_score_s1s2_s3 = TemperatureScore(\n",
- " time_frames=time_frames, scopes=scopes, grouping=grouping, aggregation_method=PortfolioAggregationMethod.WATS\n",
+ " time_frames=time_frames,\n",
+ " scopes=scopes,\n",
+ " grouping=grouping,\n",
+ " aggregation_method=PortfolioAggregationMethod.WATS,\n",
+ ")\n",
+ "enhanced_portfolio = temperature_score_s1s2_s3.calculate(\n",
+ " data_warehouse=template_provider, portfolio=companies\n",
")\n",
- "enhanced_portfolio = temperature_score_s1s2_s3.calculate(data_warehouse=template_provider, portfolio=companies)\n",
"aggregated_portfolio = temperature_score_s1s2_s3.aggregate_scores(enhanced_portfolio)\n",
"with warnings.catch_warnings():\n",
" warnings.simplefilter(\"ignore\")\n",
- " company_contributions = collect_company_contributions(aggregated_portfolio, enhanced_portfolio, analysis_parameters)"
+ " company_contributions = collect_company_contributions(\n",
+ " aggregated_portfolio, enhanced_portfolio, analysis_parameters\n",
+ " )"
]
},
{
@@ -1920,7 +1949,9 @@
],
"source": [
"plot_grouped_statistics(\n",
- " aggregated_portfolio, company_contributions.pint.dequantify().droplevel(level=1, axis=1), analysis_parameters\n",
+ " aggregated_portfolio,\n",
+ " company_contributions.pint.dequantify().droplevel(level=1, axis=1),\n",
+ " analysis_parameters,\n",
")"
]
},
@@ -2128,7 +2159,9 @@
" \"portfolio_percentage\",\n",
" ]\n",
"]\n",
- "asPintDataFrame(sector_contributions[sector_contributions[\"sector\"] == \"Steel\"][:10]).pint.dequantify().round(2)"
+ "asPintDataFrame(\n",
+ " sector_contributions[sector_contributions[\"sector\"] == \"Steel\"][:10]\n",
+ ").pint.dequantify().round(2)"
]
},
{
@@ -2168,7 +2201,9 @@
"outputs": [],
"source": [
"data_dump_filename = \"data_dump_dequantified.xlsx\"\n",
- "enhanced_portfolio.set_index([\"company_name\"], append=True).pint.dequantify().to_excel(data_dump_filename)"
+ "enhanced_portfolio.set_index([\"company_name\"], append=True).pint.dequantify().to_excel(\n",
+ " data_dump_filename\n",
+ ")"
]
},
{
diff --git a/notebooks/s1s2_s3_calc.ipynb b/notebooks/s1s2_s3_calc.ipynb
index d3610e9..4c1b8b3 100644
--- a/notebooks/s1s2_s3_calc.ipynb
+++ b/notebooks/s1s2_s3_calc.ipynb
@@ -30,7 +30,6 @@
"outputs": [],
"source": [
"import os\n",
- "import sys\n",
"import warnings"
]
},
@@ -55,25 +54,23 @@
"\n",
"import ITR\n",
"from ITR import data_dir\n",
- "from ITR.data.excel import ExcelProviderProductionBenchmark, ExcelProviderIntensityBenchmark\n",
"from ITR.data.template import TemplateProviderCompany\n",
- "from ITR.data.base_providers import BaseProviderProductionBenchmark, BaseProviderIntensityBenchmark\n",
+ "from ITR.data.base_providers import (\n",
+ " BaseProviderProductionBenchmark,\n",
+ " BaseProviderIntensityBenchmark,\n",
+ ")\n",
"from ITR.data.data_warehouse import DataWarehouse\n",
"from ITR.portfolio_aggregation import PortfolioAggregationMethod\n",
"from ITR.temperature_score import TemperatureScore\n",
- "from ITR.interfaces import ETimeFrames, EScope, IProductionBenchmarkScopes, IEIBenchmarkScopes\n",
+ "from ITR.interfaces import (\n",
+ " ETimeFrames,\n",
+ " EScope,\n",
+ " IProductionBenchmarkScopes,\n",
+ " IEIBenchmarkScopes,\n",
+ ")\n",
"import pandas as pd\n",
"\n",
- "from ITR.data.osc_units import ureg, Q_, PA_, requantify_df_from_columns\n",
- "\n",
- "from ITR_examples.utils import (\n",
- " collect_company_contributions,\n",
- " plot_grouped_statistics,\n",
- " anonymize,\n",
- " plot_grouped_heatmap,\n",
- " print_grouped_scores,\n",
- " get_contributions_per_group,\n",
- ")"
+ "from ITR.data.osc_units import ureg, requantify_df_from_columns"
]
},
{
@@ -171,7 +168,9 @@
"# Remove the # and space on the next line to point the template_data_path variable at your own data\n",
"# template_data_path = \"data/your_template_here.xlsx\"\n",
"\n",
- "template_data_path = os.path.abspath(\"../src/ITR_examples/data/20220927 ITR V2 Sample Data.xlsx\")\n",
+ "template_data_path = os.path.abspath(\n",
+ " \"../src/ITR_examples/data/20220927 ITR V2 Sample Data.xlsx\"\n",
+ ")\n",
"template_company_data = TemplateProviderCompany(excel_path=template_data_path)"
]
},
@@ -203,7 +202,9 @@
"AFOLU included = {base_intensity_bm_s1s2.is_AFOLU_included}\"\n",
")\n",
"\n",
- "template_provider_s3 = DataWarehouse(template_company_data, base_production_bm, base_intensity_bm_s3)\n",
+ "template_provider_s3 = DataWarehouse(\n",
+ " template_company_data, base_production_bm, base_intensity_bm_s3\n",
+ ")\n",
"print(\n",
" f\"S3:\\nBenchmark Temperature = {base_intensity_bm_s3.benchmark_temperature}\\n\\\n",
"Benchmark Global Budget = {base_intensity_bm_s3.benchmark_global_budget}\\n\\\n",
@@ -228,7 +229,9 @@
"metadata": {},
"outputs": [],
"source": [
- "df_portfolio = requantify_df_from_columns(pd.read_excel(template_data_path, sheet_name=\"Portfolio\"))\n",
+ "df_portfolio = requantify_df_from_columns(\n",
+ " pd.read_excel(template_data_path, sheet_name=\"Portfolio\")\n",
+ ")\n",
"display(df_portfolio.tail())"
]
},
@@ -262,13 +265,17 @@
"metadata": {},
"outputs": [],
"source": [
- "template_provider = DataWarehouse(template_company_data, base_production_bm, base_intensity_bm_s1s2)\n",
+ "template_provider = DataWarehouse(\n",
+ " template_company_data, base_production_bm, base_intensity_bm_s1s2\n",
+ ")\n",
"temperature_score_s1s2 = TemperatureScore(\n",
" time_frames=[ETimeFrames.LONG],\n",
" scopes=[EScope.S1S2],\n",
" aggregation_method=PortfolioAggregationMethod.WATS, # Options for the aggregation method are WATS, TETS, AOTS, MOTS, EOTS, ECOTS, and ROTS.\n",
")\n",
- "enhanced_portfolio_s1s2 = temperature_score_s1s2.calculate(data_warehouse=template_provider, portfolio=companies)\n",
+ "enhanced_portfolio_s1s2 = temperature_score_s1s2.calculate(\n",
+ " data_warehouse=template_provider, portfolio=companies\n",
+ ")\n",
"\n",
"temperature_score_s3 = TemperatureScore(\n",
" time_frames=[ETimeFrames.LONG],\n",
@@ -276,7 +283,9 @@
" aggregation_method=PortfolioAggregationMethod.WATS, # Options for the aggregation method are WATS, TETS, AOTS, MOTS, EOTS, ECOTS, and ROTS.\n",
")\n",
"template_provider.update_benchmarks(base_production_bm, base_intensity_bm_s3)\n",
- "enhanced_portfolio_s3 = temperature_score_s3.calculate(data_warehouse=template_provider, portfolio=companies)"
+ "enhanced_portfolio_s3 = temperature_score_s3.calculate(\n",
+ " data_warehouse=template_provider, portfolio=companies\n",
+ ")"
]
},
{
@@ -294,7 +303,11 @@
"source": [
"with warnings.catch_warnings():\n",
" warnings.simplefilter(\"ignore\")\n",
- " display(enhanced_portfolio_s1s2[[\"company_name\", \"time_frame\", \"scope\", \"temperature_score\"]])"
+ " display(\n",
+ " enhanced_portfolio_s1s2[\n",
+ " [\"company_name\", \"time_frame\", \"scope\", \"temperature_score\"]\n",
+ " ]\n",
+ " )"
]
},
{
@@ -305,7 +318,11 @@
"source": [
"with warnings.catch_warnings():\n",
" warnings.simplefilter(\"ignore\")\n",
- " display(enhanced_portfolio_s3[[\"company_name\", \"time_frame\", \"scope\", \"temperature_score\"]])"
+ " display(\n",
+ " enhanced_portfolio_s3[\n",
+ " [\"company_name\", \"time_frame\", \"scope\", \"temperature_score\"]\n",
+ " ]\n",
+ " )"
]
},
{
diff --git a/notebooks/template_update.ipynb b/notebooks/template_update.ipynb
index 0b27028..2903c9a 100644
--- a/notebooks/template_update.ipynb
+++ b/notebooks/template_update.ipynb
@@ -43,24 +43,17 @@
"metadata": {},
"outputs": [],
"source": [
- "import os\n",
- "import sys\n",
"import shutil\n",
- "import warnings\n",
"\n",
"import pandas as pd\n",
"import numpy as np\n",
"from math import log10\n",
"\n",
- "import openpyxl\n",
- "from openpyxl.workbook import Workbook\n",
"from openpyxl.worksheet.dimensions import ColumnDimension, DimensionHolder\n",
- "from openpyxl.styles import Alignment, Border, Font, PatternFill, Side\n",
+ "from openpyxl.styles import Alignment, Border, PatternFill, Side\n",
"from openpyxl.styles.colors import Color\n",
- "from openpyxl.cell import Cell\n",
"from openpyxl.utils import get_column_letter\n",
"\n",
- "from itertools import chain\n",
"from datetime import date, datetime"
]
},
@@ -602,7 +595,10 @@
"index_cols = [\"company_name\", \"company_lei\", \"company_id\"]\n",
"itr_sheet = wb_data[\"ITR input data\"] # .set_index(index_cols)\n",
"itr_sheet.report_date = itr_sheet.apply(\n",
- " lambda x: x.report_date if isinstance(x.report_date, datetime) else date(int(x.report_date), 12, 31), axis=1\n",
+ " lambda x: x.report_date\n",
+ " if isinstance(x.report_date, datetime)\n",
+ " else date(int(x.report_date), 12, 31),\n",
+ " axis=1,\n",
").copy()\n",
"all_cols = itr_sheet.columns\n",
"scopes = [\"s1\", \"s2\", \"s1s2\", \"s3\"]\n",
@@ -971,19 +967,32 @@
"df.insert(\n",
" df.columns.get_loc(\"metric\"),\n",
" \"sub_metric\",\n",
- " df.apply(lambda x: \"location\" if \"s2\" in x.metric else \"combined\" if x.metric == \"s3\" else \"\", axis=1),\n",
+ " df.apply(\n",
+ " lambda x: \"location\"\n",
+ " if \"s2\" in x.metric\n",
+ " else \"combined\"\n",
+ " if x.metric == \"s3\"\n",
+ " else \"\",\n",
+ " axis=1,\n",
+ " ),\n",
")\n",
"df.insert(\n",
" df.columns.get_loc(\"metric\") + 1,\n",
" \"unit\",\n",
" df.apply(\n",
- " lambda x: x.production_metric if x.metric == \"production\" else \"\" if x.metric == \"pdf\" else x.emissions_metric,\n",
+ " lambda x: x.production_metric\n",
+ " if x.metric == \"production\"\n",
+ " else \"\"\n",
+ " if x.metric == \"pdf\"\n",
+ " else x.emissions_metric,\n",
" axis=1,\n",
" ),\n",
")\n",
"df.drop(columns=[\"production_metric\", \"emissions_metric\"], inplace=True)\n",
"df.insert(df.columns.get_loc(\"unit\") + 1, \"report_date\", date(2021, 12, 31))\n",
- "df.loc[df.metric == \"pdf\", df.columns[df.columns.get_loc(\"unit\") + 1] : df.columns[-1]] = \"\"\n",
+ "df.loc[\n",
+ " df.metric == \"pdf\", df.columns[df.columns.get_loc(\"unit\") + 1] : df.columns[-1]\n",
+ "] = \"\"\n",
"df = df.set_index(\"metric\", append=True)\n",
"df.columns = df.columns.map(lambda x: int(x) if x[0].isnumeric() else x)\n",
"esg_df = df\n",
@@ -1055,18 +1064,27 @@
"for i, col in enumerate(df.columns):\n",
" if col == \"report_date\":\n",
" # We fudge the width with number_format that `str(datetime)` doesn't understand\n",
- " dim_holder[get_column_letter(i + 1)] = ColumnDimension(financial_ws, min=i + 1, max=i + 1, width=len(col) + 2)\n",
+ " dim_holder[get_column_letter(i + 1)] = ColumnDimension(\n",
+ " financial_ws, min=i + 1, max=i + 1, width=len(col) + 2\n",
+ " )\n",
" else:\n",
" dim_holder[get_column_letter(i + 1)] = ColumnDimension(\n",
" financial_ws,\n",
" min=i + 1,\n",
" max=i + 1,\n",
- " width=max(df.iloc[:, i].map(lambda x: len(str(x))).max() + 2 * (col == \"company_lei\"), len(col)) + 2,\n",
+ " width=max(\n",
+ " df.iloc[:, i].map(lambda x: len(str(x))).max()\n",
+ " + 2 * (col == \"company_lei\"),\n",
+ " len(col),\n",
+ " )\n",
+ " + 2,\n",
" )\n",
" if i <= 2:\n",
" # Format index columns\n",
" for j in range(1, financial_ws.max_row + 1):\n",
- " financial_ws.cell(column=i + 1, row=j).alignment = Alignment(horizontal=\"left\", vertical=\"center\")\n",
+ " financial_ws.cell(column=i + 1, row=j).alignment = Alignment(\n",
+ " horizontal=\"left\", vertical=\"center\"\n",
+ " )\n",
" else:\n",
" if col == \"report_date\":\n",
" for j in range(1, financial_ws.max_row + 1):\n",
@@ -1077,7 +1095,9 @@
"\n",
"# Lighten the Region column, which is optional\n",
"region_col_letter = get_column_letter(df.columns.get_loc(\"region\") + 1)\n",
- "for cell in financial_ws[f\"{region_col_letter}1:{region_col_letter}{financial_ws.max_row}\"]:\n",
+ "for cell in financial_ws[\n",
+ " f\"{region_col_letter}1:{region_col_letter}{financial_ws.max_row}\"\n",
+ "]:\n",
" cell[0].font = cell[0].font.copy(color=Color(\"FF888888\"))"
]
},
@@ -1119,32 +1139,44 @@
"\n",
"# We have only one cell to color, so it's not in the loop\n",
"esg_ws.cell(column=df.columns.get_loc(\"unit\") + 1, row=1).fill = (\n",
- " wb_xlsx[old_sheet].cell(column=itr_sheet.columns.get_loc(\"emissions_metric\") + 1, row=1).fill.copy()\n",
+ " wb_xlsx[old_sheet]\n",
+ " .cell(column=itr_sheet.columns.get_loc(\"emissions_metric\") + 1, row=1)\n",
+ " .fill.copy()\n",
")\n",
"\n",
"# Make worksheet pretty\n",
"for i, col in enumerate(df.columns):\n",
" if col == \"report_date\":\n",
" # We fudge the width with number_format that `str(datetime)` doesn't understand\n",
- " dim_holder[get_column_letter(i + 1)] = ColumnDimension(financial_ws, min=i + 1, max=i + 1, width=len(col) + 2)\n",
+ " dim_holder[get_column_letter(i + 1)] = ColumnDimension(\n",
+ " financial_ws, min=i + 1, max=i + 1, width=len(col) + 2\n",
+ " )\n",
" else:\n",
" dim_holder[get_column_letter(i + 1)] = ColumnDimension(\n",
" esg_ws,\n",
" min=i + 1,\n",
" max=i + 1,\n",
- " width=max(df.iloc[:, i].map(get_cell_width).max() + 2 * (col == \"company_lei\"), len(str(col))) + 2,\n",
+ " width=max(\n",
+ " df.iloc[:, i].map(get_cell_width).max() + 2 * (col == \"company_lei\"),\n",
+ " len(str(col)),\n",
+ " )\n",
+ " + 2,\n",
" )\n",
" if i <= 2:\n",
" # Format index columns\n",
" for j in range(1, esg_ws.max_row + 1):\n",
- " esg_ws.cell(column=i + 1, row=j).alignment = Alignment(horizontal=\"left\", vertical=\"center\")\n",
+ " esg_ws.cell(column=i + 1, row=j).alignment = Alignment(\n",
+ " horizontal=\"left\", vertical=\"center\"\n",
+ " )\n",
" elif col == \"report_date\":\n",
" for j in range(1, esg_ws.max_row + 1):\n",
" esg_ws.cell(row=j, column=i + 1).number_format = \"yyyy-mm-dd\"\n",
" elif col in range(2016, 2323):\n",
" column_color = \"EEEEEE\" if (col % 2) == 0 else \"FFFFFF\"\n",
" for j in range(1, esg_ws.max_row + 1):\n",
- " esg_ws.cell(column=i + 1, row=j).fill = PatternFill(\"solid\", start_color=column_color)\n",
+ " esg_ws.cell(column=i + 1, row=j).fill = PatternFill(\n",
+ " \"solid\", start_color=column_color\n",
+ " )\n",
" esg_ws.cell(column=i + 1, row=j).border = thin_border\n",
"\n",
"esg_ws.column_dimensions = dim_holder"
@@ -1489,7 +1521,9 @@
}
],
"source": [
- "x = pd.read_excel(template_data_path_v2, sheet_name=\"ITR V2 esg data\", index_col=[0, 1, 2, 3])\n",
+ "x = pd.read_excel(\n",
+ " template_data_path_v2, sheet_name=\"ITR V2 esg data\", index_col=[0, 1, 2, 3]\n",
+ ")\n",
"display(x)"
]
},