From 4d8f688c0cbb39f0f90ce4f15c8f120c51f232fe Mon Sep 17 00:00:00 2001
From: Gabriele Sarti <gabriele.sarti996@gmail.com>
Date: Tue, 7 Nov 2023 17:03:05 +0100
Subject: [PATCH] Minor fixes (#233)

* Minor fixes

* Minor fixes
---
 README.md                                | 3 ++-
 inseq/attr/feat/internals_attribution.py | 4 +---
 inseq/attr/step_functions.py             | 1 +
 inseq/data/attribution.py                | 9 ++++++---
 pyproject.toml                           | 2 +-
 5 files changed, 11 insertions(+), 8 deletions(-)
diff --git a/README.md b/README.md
index 1efeefe2..e1d53ebe 100644
--- a/README.md
+++ b/README.md
@@ -35,7 +35,7 @@ Inseq is a Pytorch-based hackable toolkit to democratize the access to common po
 
 ## Installation
 
-Inseq is available on PyPI and can be installed with `pip`:
+Inseq is available on PyPI and can be installed with `pip` for Python >= 3.9, <= 3.11:
 
 ```bash
 # Install latest stable version
@@ -270,6 +270,7 @@ Inseq has been used in various research projects. A list of known publications t
     <li> <a href="https://aclanthology.org/2023.nlp4convai-1.1/">Response Generation in Longitudinal Dialogues: Which Knowledge Representation Helps?</a> (Mousavi et al., 2023)  </li>
     <li> <a href="https://arxiv.org/abs/2310.01188">Quantifying the Plausibility of Context Reliance in Neural Machine Translation</a> (Sarti et al., 2023)</li>
     <li> <a href="https://arxiv.org/abs/2310.12127">A Tale of Pronouns: Interpretability Informs Gender Bias Mitigation for Fairer Instruction-Tuned Machine Translation</a> (Attanasio et al., 2023)</li>
+    <li> <a href="https://arxiv.org/abs/2310.09820">Assessing the Reliability of Large Language Model Knowledge</a> (Wang et al., 2023)</li>
   </ol>
 
 </details>
diff --git a/inseq/attr/feat/internals_attribution.py b/inseq/attr/feat/internals_attribution.py
index 15458af2..e408a97a 100644
--- a/inseq/attr/feat/internals_attribution.py
+++ b/inseq/attr/feat/internals_attribution.py
@@ -84,9 +84,7 @@ def attribute(
                 else:
                     target_attributions = None
                     sequence_scores["decoder_self_attentions"] = decoder_self_attentions
-                sequence_scores["encoder_self_attentions"] = (
-                    encoder_self_attentions[..., -1, :].clone().permute(0, 3, 1, 2)
-                )
+                sequence_scores["encoder_self_attentions"] = encoder_self_attentions.clone().permute(0, 3, 4, 1, 2)
                 return MultiDimensionalFeatureAttributionStepOutput(
                     source_attributions=cross_attentions[..., -1, :].clone().permute(0, 3, 1, 2),
                     target_attributions=target_attributions,
diff --git a/inseq/attr/step_functions.py b/inseq/attr/step_functions.py
index d85c86a9..82aeb69c 100644
--- a/inseq/attr/step_functions.py
+++ b/inseq/attr/step_functions.py
@@ -231,6 +231,7 @@ def kl_divergence_fn(
         contrast_targets=contrast_targets,
         contrast_targets_alignments=contrast_targets_alignments,
         return_contrastive_target_ids=False,
+        return_contrastive_batch=True,
     )
     c_forward_output = args.attribution_model.get_forward_output(
         contrast_inputs.batch, use_embeddings=args.attribution_model.is_encoder_decoder
diff --git a/inseq/data/attribution.py b/inseq/data/attribution.py
index 5d97b503..50e60c9c 100644
--- a/inseq/data/attribution.py
+++ b/inseq/data/attribution.py
@@ -268,9 +268,12 @@ def from_step_attributions(
                 # that are not source-to-target (default for encoder-decoder) or target-to-target
                 # (default for decoder only).
                 remove_pad_fn = cls.get_remove_pad_fn(attr, seq_score_name)
-                out_seq_scores = get_sequences_from_batched_steps(
-                    [att.sequence_scores[seq_score_name] for att in attributions]
-                )
+                if seq_score_name.startswith("encoder"):
+                    out_seq_scores = [attr.sequence_scores[seq_score_name][i, ...] for i in range(num_sequences)]
+                else:
+                    out_seq_scores = get_sequences_from_batched_steps(
+                        [att.sequence_scores[seq_score_name] for att in attributions]
+                    )
                 for seq_id in range(num_sequences):
                     seq_scores[seq_id][seq_score_name] = remove_pad_fn(out_seq_scores, sources, targets, seq_id)
             for seq_id in range(num_sequences):
diff --git a/pyproject.toml b/pyproject.toml
index 0889cc13..5488eef7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -224,7 +224,7 @@ known-first-party = ["inseq"]
 order-by-type = true
 
 [tool.ruff.pylint]
-max-branches = 20
+max-branches = 22
 
 [tool.ruff.pyupgrade]
 keep-runtime-typing = true