diff --git a/.github/workflows/job_pytorch_models_tests.yml b/.github/workflows/job_pytorch_models_tests.yml index 8f3699f6ab42a2..22a09dffba779f 100644 --- a/.github/workflows/job_pytorch_models_tests.yml +++ b/.github/workflows/job_pytorch_models_tests.yml @@ -137,7 +137,7 @@ jobs: if: ${{ inputs.model_scope == 'precommit' }} run: | export PYTHONPATH=${MODEL_HUB_TESTS_INSTALL_DIR}:$PYTHONPATH - python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/transformation_tests/test_pa_transformation.py -m precommit --html=${INSTALL_TEST_DIR}/TEST-torch_pagedattention_tests.html --self-contained-html -v --tb=short -n 2 + python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/transformation_tests/test_pa_transformation.py -m precommit --html=${INSTALL_TEST_DIR}/TEST-torch_pagedattention_tests.html --self-contained-html -vvv -s --tb=short -n 2 env: TEST_DEVICE: CPU USE_SYSTEM_CACHE: False diff --git a/tests/model_hub_tests/transformation_tests/generate_ref_diffs.py b/tests/model_hub_tests/transformation_tests/generate_ref_diffs.py new file mode 100644 index 00000000000000..6823256b3ccfc5 --- /dev/null +++ b/tests/model_hub_tests/transformation_tests/generate_ref_diffs.py @@ -0,0 +1,94 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +''' +Use this script if you need to regenerate reference diffs for each model +to test SDPAToPA transformation. + +The script will produce sdpa2pa_ref_diff.txt (or sdpa2pa_ref_diff_cache_eviction.txt +if using cache-eviction) containing a map in the +following format with nodes number changes for each model: + +ref_diff_map = { + "hf-internal-testing/tiny-random-LlamaForCausalLM" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-CohereForCausalLM" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + . + . + . +} + +The map has to be pasted into sdpa2pa_ref_diff.py (same directory) for +includes to test SDPAToPA transformation. + +Run the script by using 'python generate_ref_diffs.py' or 'python generate_ref_diffs.py True' +for generating the same map, but utilizing cache-eviction. +''' + +import os +import sys +from pathlib import Path +import models_hub_common.utils as utils +from openvino._offline_transformations import paged_attention_transformation +from openvino._pyopenvino.op import _PagedAttentionExtension, Parameter, Result +from optimum.intel import OVModelForCausalLM + +nodes_to_compare = ("ScaledDotProductAttention", "PagedAttentionExtension", "Parameter", "ReadValue", "Assign") + +def main(): + use_cache_eviction = False + if len(sys.argv) >= 2: + use_cache_eviction = sys.argv[1].lower() in 'true' + + OUTPUT_FILE = Path(os.path.join(os.path.dirname(__file__)), 'sdpa2pa_ref_diff' + ('_cache_eviction.txt' if use_cache_eviction else '.txt')) + + if OUTPUT_FILE.exists() and OUTPUT_FILE.is_file(): + OUTPUT_FILE.unlink() + + with open(OUTPUT_FILE, 'w') as file: + model_list = utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-models-precommit")) + print(OUTPUT_FILE) + print('ref_diff_map_cache_eviction = {' if use_cache_eviction else 'ref_diff_map = {', file=file) + + for model_id, _, _, _ in model_list: + # wrapping in try/catch block to continue printing models even if one has failed + try: + model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True) + except: + continue + + before_map = {} + for op in model.model.get_ordered_ops(): + if op.get_type_name() in nodes_to_compare: + before_map[op.get_type_name()] = before_map.get(op.get_type_name(), 0) + 1 + + # wrapping in try/catch block to continue printing models even if one has failed + try: + paged_attention_transformation(model.model, use_cache_eviction, use_cache_eviction) + except: + continue + + after_map = {} + for op in model.model.get_ordered_ops(): + if op.get_type_name() in nodes_to_compare: + after_map[op.get_type_name()] = after_map.get(op.get_type_name(), 0) + 1 + + print(f'\t"{model_id}" : {{', file=file) + for op in set(after_map.keys()) | set(before_map.keys()): + print(f'\t\t"{op}" : {after_map.get(op, 0) - before_map.get(op, 0)},', file=file) + print('\t},', file=file) + print('}', file=file) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit b/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit index c3ec331fcda0bc..7c89c451ea4be5 100644 --- a/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit +++ b/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit @@ -40,7 +40,4 @@ Xenova/tiny-random-Phi3ForCausalLM,https://huggingface.co/Xenova/tiny-random-Phi facebook/opt-125m,https://huggingface.co/facebook/opt-125m facebook/opt-350m,https://huggingface.co/facebook/opt-350m katuni4ka/tiny-random-chatglm2,https://huggingface.co/katuni4ka/tiny-random-chatglm2 -katuni4ka/tiny-random-glm4,https://huggingface.co/katuni4ka/tiny-random-glm4 -hf-internal-testing/tiny-random-BioGptForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-BioGptForCausalLM,xfail,No ScaledDotProductAttention operation observed in the graph CVS-145820 -hf-internal-testing/tiny-random-XGLMForCausalLM,https://huggingface.co/hf-tiny-model-private/tiny-random-XGLMForCausalLM,xfail,No ScaledDotProductAttention operation observed in the graph CVS-145820 -katuni4ka/tiny-random-orion,https://huggingface.co/katuni4ka/tiny-random-orion,xfail,No ScaledDotProductAttention operation observed in the graph CVS-145820 \ No newline at end of file +katuni4ka/tiny-random-glm4,https://huggingface.co/katuni4ka/tiny-random-glm4 \ No newline at end of file diff --git a/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py b/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py new file mode 100644 index 00000000000000..23af913d9d102f --- /dev/null +++ b/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py @@ -0,0 +1,612 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +nodes_to_compare = ("ScaledDotProductAttention", "PagedAttentionExtension", "Parameter", "ReadValue", "Assign") + +ref_diff_map = { + "hf-internal-testing/tiny-random-LlamaForCausalLM" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-CohereForCausalLM" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-GPTJForCausalLM" : { + "PagedAttentionExtension" : 5, + "ScaledDotProductAttention" : -5, + "Parameter" : 13, + "ReadValue" : -10, + "Assign" : -10, + }, + "hf-internal-testing/tiny-random-GPTNeoForCausalLM" : { + "PagedAttentionExtension" : 4, + "ScaledDotProductAttention" : -4, + "Parameter" : 11, + "ReadValue" : -8, + "Assign" : -8, + }, + "hf-internal-testing/tiny-random-GPTNeoXForCausalLM" : { + "PagedAttentionExtension" : 5, + "ScaledDotProductAttention" : -5, + "Parameter" : 13, + "ReadValue" : -10, + "Assign" : -10, + }, + "hf-internal-testing/tiny-random-MistralForCausalLM" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-CodeGenForCausalLM" : { + "PagedAttentionExtension" : 5, + "ScaledDotProductAttention" : -5, + "Parameter" : 13, + "ReadValue" : -10, + "Assign" : -10, + }, + "hf-internal-testing/Mixtral-tiny" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-GPTBigCodeForCausalLM" : { + "PagedAttentionExtension" : 5, + "ScaledDotProductAttention" : -5, + "Parameter" : 13, + "ReadValue" : -5, + "Assign" : -5, + }, + "hf-internal-testing/tiny-random-Starcoder2ForCausalLM" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-BloomForCausalLM" : { + "PagedAttentionExtension" : 5, + "ScaledDotProductAttention" : -5, + "Parameter" : 14, + "ReadValue" : -10, + "Assign" : -10, + }, + "hf-internal-testing/tiny-random-gpt2" : { + "PagedAttentionExtension" : 5, + "ScaledDotProductAttention" : -5, + "Parameter" : 13, + "ReadValue" : -10, + "Assign" : -10, + }, + "hf-internal-testing/tiny-random-BlenderbotForCausalLM" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 8, + "ReadValue" : -4, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-PegasusForCausalLM" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 8, + "ReadValue" : -4, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-PhiForCausalLM" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-MptForCausalLM" : { + "PagedAttentionExtension" : 5, + "ScaledDotProductAttention" : -5, + "Parameter" : 14, + "ReadValue" : -10, + "Assign" : -10, + }, + "hf-internal-testing/tiny-random-StableLmForCausalLM" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-PersimmonForCausalLM" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-FalconForCausalLM" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "hf-tiny-model-private/tiny-random-OPTForCausalLM" : { + "PagedAttentionExtension" : 5, + "ScaledDotProductAttention" : -5, + "Parameter" : 14, + "ReadValue" : -10, + "Assign" : -10, + }, + "katuni4ka/tiny-random-xverse" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "katuni4ka/tiny-random-baichuan2-13b" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "katuni4ka/tiny-random-qwen" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "katuni4ka/tiny-random-aquilachat" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "katuni4ka/tiny-random-aquila2" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "katuni4ka/tiny-random-qwen1.5-moe" : { + "PagedAttentionExtension" : 4, + "ScaledDotProductAttention" : -4, + "Parameter" : 11, + "ReadValue" : -8, + "Assign" : -8, + }, + "katuni4ka/tiny-random-codegen2" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "katuni4ka/tiny-random-olmo-hf" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "katuni4ka/tiny-random-baichuan2" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "katuni4ka/tiny-random-jais" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "katuni4ka/tiny-random-internlm" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "katuni4ka/tiny-random-internlm2" : { + "PagedAttentionExtension" : 4, + "ScaledDotProductAttention" : -4, + "Parameter" : 11, + "ReadValue" : -8, + "Assign" : -8, + }, + "katuni4ka/tiny-random-minicpm" : { + "ReadValue" : -8, + "ScaledDotProductAttention" : -4, + "Assign" : -8, + "PagedAttentionExtension" : 4, + "Parameter" : 11, + }, + "katuni4ka/tiny-random-falcon-40b" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "katuni4ka/tiny-random-dbrx" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "fxmarty/tiny-random-GemmaForCausalLM" : { + "PagedAttentionExtension" : 1, + "ScaledDotProductAttention" : -1, + "Parameter" : 5, + "ReadValue" : -2, + "Assign" : -2, + }, + "fxmarty/tiny-dummy-qwen2" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "fxmarty/really-tiny-falcon-testing" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "Xenova/tiny-random-Phi3ForCausalLM" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "facebook/opt-125m" : { + "PagedAttentionExtension" : 12, + "ScaledDotProductAttention" : -12, + "Parameter" : 28, + "ReadValue" : -24, + "Assign" : -24, + }, + "facebook/opt-350m" : { + "PagedAttentionExtension" : 24, + "ScaledDotProductAttention" : -24, + "Parameter" : 52, + "ReadValue" : -48, + "Assign" : -48, + }, + "katuni4ka/tiny-random-chatglm2" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "katuni4ka/tiny-random-glm4" : { + "PagedAttentionExtension" : 6, + "ScaledDotProductAttention" : -6, + "Parameter" : 15, + "ReadValue" : -12, + "Assign" : -12, + }, +} + +ref_diff_map_cache_eviction = { + "hf-internal-testing/tiny-random-LlamaForCausalLM" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-CohereForCausalLM" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-GPTJForCausalLM" : { + "ScaledDotProductAttention" : -5, + "ReadValue" : -10, + "PagedAttentionExtension" : 5, + "Parameter" : 17, + "Assign" : -10, + }, + "hf-internal-testing/tiny-random-GPTNeoForCausalLM" : { + "ScaledDotProductAttention" : -4, + "ReadValue" : -8, + "PagedAttentionExtension" : 4, + "Parameter" : 14, + "Assign" : -8, + }, + "hf-internal-testing/tiny-random-GPTNeoXForCausalLM" : { + "ScaledDotProductAttention" : -5, + "ReadValue" : -10, + "PagedAttentionExtension" : 5, + "Parameter" : 17, + "Assign" : -10, + }, + "hf-internal-testing/tiny-random-MistralForCausalLM" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-CodeGenForCausalLM" : { + "ScaledDotProductAttention" : -5, + "ReadValue" : -10, + "PagedAttentionExtension" : 5, + "Parameter" : 17, + "Assign" : -10, + }, + "hf-internal-testing/Mixtral-tiny" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-GPTBigCodeForCausalLM" : { + "ScaledDotProductAttention" : -5, + "ReadValue" : -5, + "PagedAttentionExtension" : 5, + "Parameter" : 17, + "Assign" : -5, + }, + "hf-internal-testing/tiny-random-Starcoder2ForCausalLM" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-BloomForCausalLM" : { + "ScaledDotProductAttention" : -5, + "ReadValue" : -10, + "PagedAttentionExtension" : 5, + "Parameter" : 18, + "Assign" : -10, + }, + "hf-internal-testing/tiny-random-gpt2" : { + "ScaledDotProductAttention" : -5, + "ReadValue" : -10, + "PagedAttentionExtension" : 5, + "Parameter" : 17, + "Assign" : -10, + }, + "hf-internal-testing/tiny-random-BlenderbotForCausalLM" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 9, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-PegasusForCausalLM" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 9, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-PhiForCausalLM" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-MptForCausalLM" : { + "ScaledDotProductAttention" : -5, + "ReadValue" : -10, + "PagedAttentionExtension" : 5, + "Parameter" : 18, + "Assign" : -10, + }, + "hf-internal-testing/tiny-random-StableLmForCausalLM" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-PersimmonForCausalLM" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-FalconForCausalLM" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "hf-tiny-model-private/tiny-random-OPTForCausalLM" : { + "ScaledDotProductAttention" : -5, + "ReadValue" : -10, + "PagedAttentionExtension" : 5, + "Parameter" : 18, + "Assign" : -10, + }, + "katuni4ka/tiny-random-xverse" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "katuni4ka/tiny-random-baichuan2-13b" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "katuni4ka/tiny-random-qwen" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "katuni4ka/tiny-random-aquilachat" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "katuni4ka/tiny-random-aquila2" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "katuni4ka/tiny-random-qwen1.5-moe" : { + "ScaledDotProductAttention" : -4, + "ReadValue" : -8, + "PagedAttentionExtension" : 4, + "Parameter" : 14, + "Assign" : -8, + }, + "katuni4ka/tiny-random-codegen2" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "katuni4ka/tiny-random-olmo-hf" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "katuni4ka/tiny-random-baichuan2" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "katuni4ka/tiny-random-jais" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "katuni4ka/tiny-random-internlm" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "katuni4ka/tiny-random-internlm2" : { + "ScaledDotProductAttention" : -4, + "ReadValue" : -8, + "PagedAttentionExtension" : 4, + "Parameter" : 14, + "Assign" : -8, + }, + "katuni4ka/tiny-random-minicpm" : { + "ScaledDotProductAttention" : -4, + "Parameter" : 14, + "PagedAttentionExtension" : 4, + "ReadValue" : -8, + "Assign" : -8, + }, + "katuni4ka/tiny-random-falcon-40b" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "katuni4ka/tiny-random-dbrx" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "fxmarty/tiny-random-GemmaForCausalLM" : { + "ScaledDotProductAttention" : -1, + "ReadValue" : -2, + "PagedAttentionExtension" : 1, + "Parameter" : 5, + "Assign" : -2, + }, + "fxmarty/tiny-dummy-qwen2" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "fxmarty/really-tiny-falcon-testing" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "Xenova/tiny-random-Phi3ForCausalLM" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "facebook/opt-125m" : { + "ScaledDotProductAttention" : -12, + "ReadValue" : -24, + "PagedAttentionExtension" : 12, + "Parameter" : 39, + "Assign" : -24, + }, + "facebook/opt-350m" : { + "ScaledDotProductAttention" : -24, + "ReadValue" : -48, + "PagedAttentionExtension" : 24, + "Parameter" : 75, + "Assign" : -48, + }, + "katuni4ka/tiny-random-chatglm2" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "katuni4ka/tiny-random-glm4" : { + "ScaledDotProductAttention" : -6, + "ReadValue" : -12, + "PagedAttentionExtension" : 6, + "Parameter" : 20, + "Assign" : -12, + }, +} diff --git a/tests/model_hub_tests/transformation_tests/test_pa_transformation.py b/tests/model_hub_tests/transformation_tests/test_pa_transformation.py index dc65324d4f028b..02481439818f28 100644 --- a/tests/model_hub_tests/transformation_tests/test_pa_transformation.py +++ b/tests/model_hub_tests/transformation_tests/test_pa_transformation.py @@ -6,6 +6,7 @@ from optimum.intel import OVModelForCausalLM from models_hub_common.utils import retry import models_hub_common.utils as utils +from sdpa2pa_ref_diff import ref_diff_map, ref_diff_map_cache_eviction, nodes_to_compare import pytest import os import re @@ -14,15 +15,28 @@ def run_pa(tmp_path, model_id, model_link, use_block_indices_inputs, use_score_outputs): model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True) + before_map = {} + for op in model.model.get_ordered_ops(): + if op.get_type_name() in nodes_to_compare: + before_map[op.get_type_name()] = before_map.get(op.get_type_name(), 0) + 1 + paged_attention_transformation(model.model, use_block_indices_inputs, use_score_outputs) - # Test that a _PagedAttentionExtension node appeared after the transformation. - pa_counter = 0 + after_map = {} for op in model.model.get_ordered_ops(): - if isinstance(op, _PagedAttentionExtension): - pa_counter += 1 + if op.get_type_name() in nodes_to_compare: + after_map[op.get_type_name()] = after_map.get(op.get_type_name(), 0) + 1 + + # Collect the changes of nodes from nodes_to_compare + # And check if the numbers correspond to the reference ones + resulting_map = {} + for op in set(after_map.keys()) | set(before_map.keys()): + resulting_map[op] = after_map.get(op, 0) - before_map.get(op, 0) + + use_cache_eviction = use_block_indices_inputs and use_score_outputs + reference_map = ref_diff_map_cache_eviction[model_id] if use_cache_eviction else ref_diff_map[model_id] - assert pa_counter > 0, f"The model '{model_id}' has no _PagedAttentionExtension present." + assert reference_map == resulting_map model_inputs = model.model.inputs for input in model_inputs: @@ -45,7 +59,8 @@ def run_pa(tmp_path, model_id, model_link, use_block_indices_inputs, use_score_o if re.search(block_indices_pattern, name): block_indices_counter += 1 - assert(block_indices_counter == pa_counter) + assert block_indices_counter == resulting_map["PagedAttentionExtension"], \ + f"The number of block_indices inputs doesn't correspond to the expected value. Expected {resulting_map['PagedAttentionExtension']}, received {block_indices_counter}" if (use_score_outputs): score_pattern = r'scores\.[0-9]+' @@ -57,7 +72,8 @@ def run_pa(tmp_path, model_id, model_link, use_block_indices_inputs, use_score_o if re.search(score_pattern, name): score_outputs_counter += 1 - assert(score_outputs_counter == pa_counter) + assert block_indices_counter == resulting_map["PagedAttentionExtension"], \ + f"The number of scores outputs doesn't correspond to the expected value. Expected {resulting_map['PagedAttentionExtension']}, received {block_indices_counter}" @pytest.mark.precommit @pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-models-precommit")))