Skip to content

Commit

Permalink
with first draft of spreading to multiple index
Browse files Browse the repository at this point in the history
  • Loading branch information
konstantinstadler committed Aug 29, 2024
1 parent 54a8d8d commit 5ad13d9
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 10 deletions.
44 changes: 36 additions & 8 deletions pymrio/tools/ioutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -1110,7 +1110,7 @@ def convert(
if isinstance(df_orig, pd.Series):
df_orig = pd.DataFrame(df_orig)

# some consitency checks of arguments and restructuring if everything is ok
# some consistency checks of arguments and restructuring if everything is ok
if len(bridge_columns) == 0:
raise ValueError("No columns with '__' in the mapping DataFrame")
for col in bridge_columns:
Expand Down Expand Up @@ -1149,6 +1149,7 @@ def convert(
res_collector = []

# loop over each new impact/characterized value
# and collect entries, multiply and rename
for entry in unique_new_index:
df_cur_map = df_map.loc[[entry]]
collector = []
Expand All @@ -1164,32 +1165,59 @@ def convert(

df_collected = pd.concat(collector, axis=0)

# renaming part, checks if the old name (bridge.orig) is in the current index
# and renames by the new one (bridge.new)

already_renamed = dict()
for bridge in bridges:
# encountering a bridge with the same orig name but which should
# lead to two new index levels
if bridge.orig in already_renamed.keys():
# duplicate the index level
df_collected.reset_index(level=already_renamed[bridge.orig].new, inplace=True)
df_collected[bridge.new] = df_cur_map.index.get_level_values(bridge.raw)[0]

if df_collected.index.name is None:
df_collected.set_index(already_renamed[bridge.orig].new, drop=True, append=False, inplace=True)
else:
df_collected.set_index(already_renamed[bridge.orig].new, drop=True, append=True, inplace=True)
df_collected.set_index(bridge.new, drop=True, append=True, inplace=True)
continue

for idx_old_names in df_collected.index.names:
if bridge.orig in idx_old_names:
# rename the index names
if isinstance(df_collected.index, pd.MultiIndex):
df_collected.index = df_collected.index.set_names( bridge.new, level=idx_old_names)
else:
df_collected.index = df_collected.index.set_names( bridge.new, level=None)

# rename the actual index values
df_collected.reset_index(level=bridge.new, inplace=True)

for row in df_cur_map.reset_index().iterrows():
new_row_name = row[1][bridge.raw]
old_row_name = row[1][bridge.orig]
df_collected.loc[:, bridge.new] = df_collected.loc[
:, bridge.new
].str.replace(pat=old_row_name, repl=new_row_name, regex=True)
df_collected.set_index(
# CONT: Make test cases for renaming/chacterization of a df without a multiindex
# CONT: Make a test case/method where a matching line gets extended into more index columns
bridge.new, drop=True, append=True, inplace=True
)
# CONT: Ensure that the spread keeps the order as in the original mapping

# put the index back
if df_collected.index.name is None:
# The case with a single index where the previous reset index
# left only a numerical index
df_collected.set_index(
bridge.new, drop=True, append=False, inplace=True
)
else:
df_collected.set_index(
bridge.new, drop=True, append=True, inplace=True
)
already_renamed[bridge.orig] = bridge

res_collector.append(

df_collected.groupby(by=df_collected.index.names).agg(agg_func)

)

all_result = pd.concat(res_collector, axis=0)
Expand Down
70 changes: 68 additions & 2 deletions tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,9 +354,34 @@ def test_util_regex():
assert len(df_none_match) == 0
assert len(df_none_match_index) == 0

def test_convert_rename_singleindex():
"""Testing the renaming of one table with a single index"""

def test_convert_rename():
"""Testing the renaming of one table"""
to_char = pd.DataFrame(
data=99.0,
index=["em1", "em2", "em3"],
columns=["r1", "r2", "r3"]
)
to_char.index.name = "em_type"
to_char.columns.name = "reg"

rename_bridge_simple = pd.DataFrame(
columns=["em_type", "stressor__em_type"],
data=[
["em1", "emission1"],
["em2", "emission2"],
["em3", "emission3"],
],
)

renamed = convert(to_char, rename_bridge_simple)
assert all(renamed.columns == renamed.columns)
assert all(renamed.index == rename_bridge_simple["stressor__em_type"])



def test_convert_rename_multiindex():
"""Testing the renaming of one table with a multiindex"""

to_char = pd.DataFrame(
data=99.0,
Expand Down Expand Up @@ -440,6 +465,47 @@ def test_convert_rename():
pdt.assert_frame_equal(char_res_keep_comp_wo_factor, char_res_keep_comp)


def test_convert_rename_spread_index():
"""Testing the renaming of one table from an index to an multiindex
This is a specific case for the EXIOBASE to GLAM conversion,
where one stressor level need to be spread to multiple flows/classes
"""

to_char = pd.DataFrame(
data=99.0,
index=["em1", "em2", "em3"],
columns=["r1", "r2", "r3"]
)
to_char.index.name = "stressor"
to_char.columns.name = "reg"

rename_bridge = pd.DataFrame(
columns=["stressor", "flow__stressor", "class__stressor", "class2__stressor"],
data=[
["em1", "emission1", "to_air", "to_air (unspecified)"],
["em2", "emission2", "to_air", "to_air (specified)"],
["em3", "emission3", "to_water", "to_water (unpecified)"],],
)


rename_bridge = pd.DataFrame(
columns=["stressor", "class__stressor", "class2__stressor"],
data=[
["em1", "to_air", "to_air (unspecified)"],
["em2", "to_air", "to_air (specified)"],
["em3", "to_water", "to_water (unpecified)"],],
)


renamed = convert(to_char, rename_bridge)

assert all(renamed.columns == renamed.columns)
assert all(renamed.index == rename_bridge_simple["stressor__em_type"])




def test_convert_characterize():
"""Testing the characterization of one table"""

Expand Down

0 comments on commit 5ad13d9

Please sign in to comment.