From 4d8f688c0cbb39f0f90ce4f15c8f120c51f232fe Mon Sep 17 00:00:00 2001 From: Gabriele Sarti Date: Tue, 7 Nov 2023 17:03:05 +0100 Subject: [PATCH] Minor fixes (#233) * Minor fixes * Minor fixes --- README.md | 3 ++- inseq/attr/feat/internals_attribution.py | 4 +--- inseq/attr/step_functions.py | 1 + inseq/data/attribution.py | 9 ++++++--- pyproject.toml | 2 +- 5 files changed, 11 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 1efeefe2..e1d53ebe 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ Inseq is a Pytorch-based hackable toolkit to democratize the access to common po ## Installation -Inseq is available on PyPI and can be installed with `pip`: +Inseq is available on PyPI and can be installed with `pip` for Python >= 3.9, <= 3.11: ```bash # Install latest stable version @@ -270,6 +270,7 @@ Inseq has been used in various research projects. A list of known publications t
  • Response Generation in Longitudinal Dialogues: Which Knowledge Representation Helps? (Mousavi et al., 2023)
  • Quantifying the Plausibility of Context Reliance in Neural Machine Translation (Sarti et al., 2023)
  • A Tale of Pronouns: Interpretability Informs Gender Bias Mitigation for Fairer Instruction-Tuned Machine Translation (Attanasio et al., 2023)
  • +
  • Assessing the Reliability of Large Language Model Knowledge (Wang et al., 2023)
  • diff --git a/inseq/attr/feat/internals_attribution.py b/inseq/attr/feat/internals_attribution.py index 15458af2..e408a97a 100644 --- a/inseq/attr/feat/internals_attribution.py +++ b/inseq/attr/feat/internals_attribution.py @@ -84,9 +84,7 @@ def attribute( else: target_attributions = None sequence_scores["decoder_self_attentions"] = decoder_self_attentions - sequence_scores["encoder_self_attentions"] = ( - encoder_self_attentions[..., -1, :].clone().permute(0, 3, 1, 2) - ) + sequence_scores["encoder_self_attentions"] = encoder_self_attentions.clone().permute(0, 3, 4, 1, 2) return MultiDimensionalFeatureAttributionStepOutput( source_attributions=cross_attentions[..., -1, :].clone().permute(0, 3, 1, 2), target_attributions=target_attributions, diff --git a/inseq/attr/step_functions.py b/inseq/attr/step_functions.py index d85c86a9..82aeb69c 100644 --- a/inseq/attr/step_functions.py +++ b/inseq/attr/step_functions.py @@ -231,6 +231,7 @@ def kl_divergence_fn( contrast_targets=contrast_targets, contrast_targets_alignments=contrast_targets_alignments, return_contrastive_target_ids=False, + return_contrastive_batch=True, ) c_forward_output = args.attribution_model.get_forward_output( contrast_inputs.batch, use_embeddings=args.attribution_model.is_encoder_decoder diff --git a/inseq/data/attribution.py b/inseq/data/attribution.py index 5d97b503..50e60c9c 100644 --- a/inseq/data/attribution.py +++ b/inseq/data/attribution.py @@ -268,9 +268,12 @@ def from_step_attributions( # that are not source-to-target (default for encoder-decoder) or target-to-target # (default for decoder only). remove_pad_fn = cls.get_remove_pad_fn(attr, seq_score_name) - out_seq_scores = get_sequences_from_batched_steps( - [att.sequence_scores[seq_score_name] for att in attributions] - ) + if seq_score_name.startswith("encoder"): + out_seq_scores = [attr.sequence_scores[seq_score_name][i, ...] for i in range(num_sequences)] + else: + out_seq_scores = get_sequences_from_batched_steps( + [att.sequence_scores[seq_score_name] for att in attributions] + ) for seq_id in range(num_sequences): seq_scores[seq_id][seq_score_name] = remove_pad_fn(out_seq_scores, sources, targets, seq_id) for seq_id in range(num_sequences): diff --git a/pyproject.toml b/pyproject.toml index 0889cc13..5488eef7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -224,7 +224,7 @@ known-first-party = ["inseq"] order-by-type = true [tool.ruff.pylint] -max-branches = 20 +max-branches = 22 [tool.ruff.pyupgrade] keep-runtime-typing = true