diff --git a/stream/classes/cost_model/scheduler.py b/stream/classes/cost_model/scheduler.py
index eb8505a3..f10acf00 100644
--- a/stream/classes/cost_model/scheduler.py
+++ b/stream/classes/cost_model/scheduler.py
@@ -81,7 +81,7 @@ def get_best_candidate(candidates: list[ComputationNode], scheduling_order: list
     if not candidates:
         raise ValueError("There are no candidates to schedule.")
     preds_ends, cn_candidates = zip(*candidates)
-    idxs = [scheduling_order.index(n.id) for n in cn_candidates]
+    idxs = [scheduling_order.index((n.id, n.sub_id)) for n in cn_candidates]
     best_candidate_idx = idxs.index(min(idxs))
     best_candidate = cn_candidates[best_candidate_idx]
     preds_end = preds_ends[best_candidate_idx]
@@ -264,7 +264,7 @@ def schedule_graph(
     offchip_core_id = accelerator.offchip_core_id
     offchip_core = accelerator.get_core(offchip_core_id)
 
-    ## Schedule preparation:
+    # Schedule preparation:
     # 1. Initialize the memory instance priorities for each tensor
     initialize_priorities(G, accelerator)
     # 2. Add the constant operand tensors of all nodes to the off-chip initially
@@ -292,9 +292,9 @@ def schedule_graph(
         core = accelerator.get_core(core_id)
         # Earliest start time is when core is available or predecessors finished
         start = max(cores_idle_from[core_id], preds_end)
-        ## Step 0
+        # Step 0
         tensors_this_candidate_needs, tensors_operands = get_tensors_needed_for_node(best_candidate, G)
-        ## Step 1
+        # Step 1
         # There could be operands that are too large to store in the highest memory on the core
         # The tensors stored in these memories should be evicted and potentially written back to off-chip
         # Clear these memories (this might delay the potential start time if things have to written to off-chip)
@@ -312,7 +312,7 @@ def schedule_graph(
         )
         total_eviction_to_offchip_link_energy += clear_link_energy
         total_eviction_to_offchip_memory_energy += clear_memory_energy
-        ## Step 2
+        # Step 2
         # The computation might need tensors that are currently not present in the core's memories
         # We need to fetch these tensors from either off-chip or from the core where they are present
         # Transfer these tensors from wherever they are currently residing to this core
@@ -343,7 +343,7 @@ def schedule_graph(
             total_eviction_to_offchip_link_energy += eviction_link_energy_cost
             total_eviction_to_offchip_memory_energy += eviction_memory_energy_cost
 
-        ## Step 3
+        # Step 3
         # Check if we had any operands that were too large to store in the core's memory, block the relevant off-chip link for the duration
         # This might again delay the execution if the offchip link was already blocked by another core
         timestep = accelerator.block_offchip_links(
@@ -354,7 +354,7 @@ def schedule_graph(
             best_candidate,
         )
 
-        ## Step 4
+        # Step 4
         # Make space for the output tensor of this computation node and spawn it when evictions are complete
         # If the output operand is in the too large operands, add it to off-chip, otherwise add it to this core's output memory
         output_layer_operand = best_candidate.output_operand
@@ -387,7 +387,7 @@ def schedule_graph(
             available_timestep=end,
         )
 
-        ## Step 5
+        # Step 5
         # Update the start and end time of the node
         best_candidate.set_start(start)
         best_candidate.set_end(end)
@@ -400,7 +400,7 @@ def schedule_graph(
         # Add this node to the scheduled nodes
         scheduled_nodes.add(best_candidate)
 
-        ## Step 6
+        # Step 6
         # Memory usage: When the node ends:
         # Decrease the priority of all the tensors this node used
         decrease_priority(tensors_this_candidate_needs, tensors_operands, accelerator, best_candidate)
@@ -413,7 +413,7 @@ def schedule_graph(
             end,
         )
 
-        ## Step 7
+        # Step 7
         # Memory usage: When the node ends:
         # If this node is a sink node (node that has no successors and that produces a final output), transfer final outputs to offchip
         if best_candidate in sink_layer_nodes:
@@ -433,7 +433,7 @@ def schedule_graph(
                 total_sink_layer_output_offchip_link_energy += link_energy_cost
                 total_sink_layer_output_offchip_memory_energy += memory_energy_cost
 
-        ## Step 8
+        # Step 8
         # For each successor of this node, check if all of its predecessors have been scheduled
         for successor in sorted(G.successors(best_candidate)):
             if all((pred in scheduled_nodes for pred in G.predecessors(successor))):
@@ -448,7 +448,7 @@ def schedule_graph(
         nb_scheduled_nodes += 1
         done = nb_scheduled_nodes == nb_graph_nodes
 
-    ## Step 9
+    # Step 9
     # The total schedule latency is the max of all CN end times and the link end times
     cns_end_time = max((n.end for n in G.nodes()))
     links_end_time = max([event.end for event in accelerator.communication_manager.events], default=0)
diff --git a/stream/classes/opt/allocation/genetic_algorithm/fitness_evaluator.py b/stream/classes/opt/allocation/genetic_algorithm/fitness_evaluator.py
index ad189f2c..327ca38a 100644
--- a/stream/classes/opt/allocation/genetic_algorithm/fitness_evaluator.py
+++ b/stream/classes/opt/allocation/genetic_algorithm/fitness_evaluator.py
@@ -102,7 +102,7 @@ def set_node_core_allocations(self, core_allocations: list[int]):
                 offchip_energy = 0
                 for too_large_operand in too_large_operands:
                     layer_operand = next(
-                        (k for (k, v) in cme.layer.memory_operand_links.items() if v == too_large_operand)
+                        (k for (k, v) in cme.layer.memory_operand_links.data.items() if v == too_large_operand)
                     )
                     layer_operand_offchip_energy = cme.mem_energy_breakdown[layer_operand][-1]
                     offchip_energy += layer_operand_offchip_energy
diff --git a/stream/classes/stages/GenerateCNWorkloadHybridStage.py b/stream/classes/stages/GenerateCNWorkloadHybridStage.py
index 58497b8e..d1c57408 100644
--- a/stream/classes/stages/GenerateCNWorkloadHybridStage.py
+++ b/stream/classes/stages/GenerateCNWorkloadHybridStage.py
@@ -135,7 +135,7 @@ def run(self):
 
     @staticmethod
     def get_scheduling_order(workload: Workload):
-        return sorted((n.id for n in workload.nodes()), reverse=True)
+        return sorted(((n.id, n.sub_id) for n in workload.nodes()), reverse=True)
 
     @staticmethod
     def get_all_node_pairs(G: Workload) -> tuple[tuple[ComputationNode, ComputationNode, bool], ...]:
@@ -367,8 +367,12 @@ def get_finer_nodes(
                 produces_final_output=produces_final_output,
                 group_id=group_id,
             )
-            # Override property
+            # Override loop_ranges property
             finer_node.update_loop_ranges(dim_min_max)
+            # Re-calculate pr loop ranges based on new loop_ranges
+            finer_node.calculate_pr_loop_ranges()
+            # Re-set the operand tensors for the new loop_ranges
+            finer_node.set_operand_tensors()
 
             # Initialize the priorities (total inter-CN data reuse factor) for the constant operands of this finer_node
             for constant_operand in finer_node.constant_operands:
@@ -436,6 +440,7 @@ def get_bounding_box_dimensions(
         # where the onnx tensors are always flattened back to 4D (merging the G+C or G+K into one channel dimension)
         dimensions, loop_ranges = self.flatten_grouped_convolution_ranges(producer, consumer, dimensions, loop_ranges)
         bounding_box = [loop_ranges[dim] for dim in dimensions]
+        print(bounding_box)
 
         if not interleaved:
             bounding_box_flat = tuple([item for sublist in bounding_box for item in sublist])
@@ -631,7 +636,7 @@ def get_inter_edges_numpy(
         all_inter_edges: list[tuple[ComputationNode, ComputationNode, dict[str, Any]]] = []
         for path_between in paths_between_generator:
             dependent_operand = Constants.OUTPUT_LAYER_OP
-            ## FIRST NODE
+            # FIRST NODE
             # First node in the path is a ComputationNode, of which we extract the output operand dependency tensor
             node = path_between[0]
             assert isinstance(node, ComputationNode), "First node in path should be ComputationNode"
@@ -642,12 +647,12 @@ def get_inter_edges_numpy(
                 tensor_cns = self.get_tensor_cns(node, finer_nodes)
                 numpy_tensors[node] = tensor_cns
                 tensor = tensor_cns[Constants.OUTPUT_LAYER_OP]
-            ## INTERMEDIATE NON-COMPUTATION NODES
+            # INTERMEDIATE NON-COMPUTATION NODES
             for _, node in enumerate(path_between[1:-1], start=1):
                 if isinstance(node, ComputationNode):
                     raise ValueError("Intermediate nodes should not be of type ComputationNode.")
                 tensor = self.propagate_cn_production_for_non_cn(node, tensor)
-            ## LAST NODE IN PATH
+            # LAST NODE IN PATH
             last_node: Node = path_between[-1]
             # Find the operand for which this last node connects to its predecessor
 
diff --git a/stream/classes/stages/IntraCoreMappingStage.py b/stream/classes/stages/IntraCoreMappingStage.py
index 7759a0d0..0c7bdcd6 100644
--- a/stream/classes/stages/IntraCoreMappingStage.py
+++ b/stream/classes/stages/IntraCoreMappingStage.py
@@ -144,7 +144,7 @@ def run(self):
                     # Compute this (node, core) combination's optimal mapping
                     else:
                         # Set the node's core allocation to the core_id we want to extract hw performance for
-                        node.set_chosen_core_allocation(core_id)
+                        node.set_core_allocation(core_id)
                         # Set the node's spatial mapping to the possible spatial mappings of the current core
                         node.spatial_mapping = core.dataflows if core.dataflows is not None else SpatialMapping.empty()
                         # Initialize the flow that will be followed to extract the optimal HW performance of every unique node-core allocation
@@ -156,7 +156,7 @@ def run(self):
                         answers = main_stage.run()
                         assert len(answers) == 1, "IntraCoreMappingStage's subflow returned more than one CME"
                         cme = answers[0][0]
-                        node.chosen_core_allocation = None  # Reset the node's core allocation
+                        node.core_allocation = None  # Reset the node's core allocation
                         self.node_hw_performances[node][core] = cme
                         self.save_node_hw_performances()  # Save the hw performances dict after every node is finished
         self.visualize_node_hw_performances()
diff --git a/stream/classes/workload/computation_node.py b/stream/classes/workload/computation_node.py
index aac3a68a..dbc97889 100644
--- a/stream/classes/workload/computation_node.py
+++ b/stream/classes/workload/computation_node.py
@@ -82,6 +82,13 @@ def __init__(
         # Each ComputationNode will save a tensor for all its defined operands.
         # For example, a conv layer will have an I tensor, W tensor and O tensor.
         self.operand_tensors: dict[LayerOperand, Tensor] = {}
+        self.set_operand_tensors()
+
+        # Will be set by the InterCoreMappingStage or by the FitnessEvaluator
+        self.too_large_operands = None
+        self.nb_real_predecessors = None
+
+    def set_operand_tensors(self):
         for op in self.layer_operands:
             if op == Constants.OUTPUT_LAYER_OP:
                 precision = self.operand_precision.final_output_precision
@@ -99,10 +106,6 @@ def __init__(
                 loop_ranges=ranges,
             )
 
-        # Will be set by the InterCoreMappingStage or by the FitnessEvaluator
-        self.too_large_operands = None
-        self.nb_real_predecessors = None
-
     def get_operand_tensor_reshape_default(self) -> OperandTensorReshape | None:
         try:
             size_B = self.layer_dim_sizes[LayerDim("B")]
diff --git a/stream/classes/workload/node.py b/stream/classes/workload/node.py
index 2fa406ac..524448fb 100644
--- a/stream/classes/workload/node.py
+++ b/stream/classes/workload/node.py
@@ -123,6 +123,9 @@ def set_end(self, end: int):
         """
         self.end = end
 
+    def set_core_allocation(self, core_allocation: int):
+        self.core_allocation = [core_allocation]
+
     def set_chosen_core_allocation(self, core_allocation: int):
         self.chosen_core_allocation = core_allocation
 
diff --git a/stream/classes/workload/tensor.py b/stream/classes/workload/tensor.py
index a90f4820..17dd7e79 100644
--- a/stream/classes/workload/tensor.py
+++ b/stream/classes/workload/tensor.py
@@ -18,7 +18,7 @@ def __init__(
         origin: "ComputationNode",
         layer_operand: LayerOperand,
         loop_dimensions: list[LayerDim],
-        loop_ranges: tuple[int, int],
+        loop_ranges: tuple[tuple[int, int], ...],
     ):
         """Initialize the Tensor instance.
 
@@ -61,7 +61,7 @@ def __lt__(self, __o: object) -> bool:
     #         self.loop_ranges == __o.loop_ranges
 
     def equality_hash(self):
-        return hash((self.origin.id, self.origin.sub_id, self.layer_operand, self.loop_ranges))
+        return hash((self.origin.id, self.layer_operand, self.loop_ranges))
 
     def set_base_priorities(self, base_priority):
         self.base_priority = base_priority
diff --git a/stream/utils.py b/stream/utils.py
index 7b594e62..f2e18789 100644
--- a/stream/utils.py
+++ b/stream/utils.py
@@ -22,7 +22,7 @@ def get_too_large_operands(cme: CostModelEvaluation, accelerator: Accelerator, c
     core = accelerator.get_core(core_id)
     core_nb_memory_levels = core.memory_hierarchy.nb_levels
     for layer_operand, l in cme.mapping.data_elem_per_level.items():
-        memory_operand = cme.layer.memory_operand_links[layer_operand]
+        memory_operand = cme.layer.memory_operand_links.layer_to_mem_op(layer_operand)
         if len(l) > core_nb_memory_levels[memory_operand] + 1:  # +1 because of spatial level
             too_large_operands.append(memory_operand)
     return too_large_operands