From ad5865c1e2d444219bb409875cfdbabd30fc454f Mon Sep 17 00:00:00 2001 From: Alexander Date: Thu, 12 Dec 2024 11:49:52 +0400 Subject: [PATCH 01/17] Added ImageText2Image class. Restructured code. --- .../whowhatbench/__init__.py | 2 + .../whowhatbench/imagetext2image.py | 140 ++++++++++ tools/who_what_benchmark/whowhatbench/wwb.py | 243 ++---------------- 3 files changed, 164 insertions(+), 221 deletions(-) create mode 100644 tools/who_what_benchmark/whowhatbench/imagetext2image.py diff --git a/tools/who_what_benchmark/whowhatbench/__init__.py b/tools/who_what_benchmark/whowhatbench/__init__.py index 278db2c6a1..2e3561d521 100644 --- a/tools/who_what_benchmark/whowhatbench/__init__.py +++ b/tools/who_what_benchmark/whowhatbench/__init__.py @@ -3,6 +3,7 @@ from .text_evaluator import TextEvaluator as Evaluator from .text2image_evaluator import Text2ImageEvaluator from .visualtext_evaluator import VisualTextEvaluator +from .imagetext2image import ImageText2ImageEvaluator __all__ = [ @@ -11,5 +12,6 @@ "TextEvaluator", "Text2ImageEvaluator", "VisualTextEvaluator", + "ImageText2ImageEvaluator", "EVALUATOR_REGISTRY", ] diff --git a/tools/who_what_benchmark/whowhatbench/imagetext2image.py b/tools/who_what_benchmark/whowhatbench/imagetext2image.py new file mode 100644 index 0000000000..6580c230dd --- /dev/null +++ b/tools/who_what_benchmark/whowhatbench/imagetext2image.py @@ -0,0 +1,140 @@ +import os +from typing import Any, Union + +import datasets +import pandas as pd +from tqdm import tqdm +from transformers import set_seed +import torch +import openvino_genai + +from .registry import register_evaluator +from .text2image_evaluator import Text2ImageEvaluator + +from .whowhat_metrics import ImageSimilarity + + +class Generator(openvino_genai.Generator): + def __init__(self, seed, rng, mu=0.0, sigma=1.0): + openvino_genai.Generator.__init__(self) + self.mu = mu + self.sigma = sigma + self.rng = rng + + def next(self): + return torch.randn(1, generator=self.rng, dtype=torch.float32).item() + + +def preprocess_fn(example): + return { + "prompts": example["Instruction_VLM-LLM"], + "images": example["source_img"], + } + + +def prepare_default_data(num_samples=None): + DATASET_NAME = "paint-by-inpaint/PIPE" + NUM_SAMPLES = 10 if num_samples is None else num_samples + set_seed(42) + default_dataset = datasets.load_dataset( + DATASET_NAME, split="test", streaming=True + ).filter(lambda example: example["Instruction_VLM-LLM"] != "").take(NUM_SAMPLES) + return default_dataset.map( + lambda x: preprocess_fn(x), remove_columns=default_dataset.column_names + ) + + +@register_evaluator("imagetext-to-image") +class ImageText2ImageEvaluator(Text2ImageEvaluator): + def __init__( + self, + base_model: Any = None, + gt_data: str = None, + test_data: Union[str, list] = None, + metrics="similarity", + similarity_model_id: str = "openai/clip-vit-large-patch14", + resolution=(512, 512), + num_inference_steps=4, + crop_prompts=True, + num_samples=None, + gen_image_fn=None, + seed=42, + is_genai=False, + ) -> None: + assert ( + base_model is not None or gt_data is not None + ), "Text generation pipeline for evaluation or ground trush data must be defined" + + self.test_data = test_data + self.metrics = metrics + self.resolution = resolution + self.crop_prompt = crop_prompts + self.num_samples = num_samples + self.num_inference_steps = num_inference_steps + self.seed = seed + self.similarity = None + self.similarity = ImageSimilarity(similarity_model_id) + self.last_cmp = None + self.gt_dir = os.path.dirname(gt_data) + self.generation_fn = gen_image_fn + self.is_genai = is_genai + + if base_model: + base_model.resolution = self.resolution + self.gt_data = self._generate_data( + base_model, gen_image_fn, os.path.join(self.gt_dir, "reference") + ) + else: + self.gt_data = pd.read_csv(gt_data, keep_default_na=False) + + def _generate_data(self, model, gen_image_fn=None, image_dir="reference"): + def default_gen_image_fn(model, prompt, num_inference_steps, generator=None): + output = model( + prompt, + num_inference_steps=num_inference_steps, + output_type="pil", + width=self.resolution[0], + height=self.resolution[0], + generator=generator, + ) + return output.images[0] + + generation_fn = gen_image_fn or default_gen_image_fn + + if self.test_data: + if isinstance(self.test_data, str): + data = pd.read_csv(self.test_data) + else: + if isinstance(self.test_data, dict): + assert "prompts" in self.test_data + assert "images" in self.test_data + data = dict(self.test_data) + data = pd.DataFrame.from_dict(data) + else: + data = pd.DataFrame.from_dict(prepare_default_data(self.num_samples)) + + prompts = data["prompts"] + images = data["images"] + rng = torch.Generator(device="cpu") + + if not os.path.exists(image_dir): + os.makedirs(image_dir) + + for i, (prompt, image) in tqdm(enumerate(zip(prompts, images)), desc="Evaluate pipeline"): + set_seed(self.seed) + rng = rng.manual_seed(self.seed) + image = generation_fn( + model, + prompt, + image=image, + num_inference_steps=self.num_inference_steps, + generator=Generator(self.seed, rng) if self.is_genai else rng + ) + image_path = os.path.join(image_dir, f"{i}.png") + image.save(image_path) + images.append(image_path) + + res_data = {"prompts": list(prompts), "images": images} + df = pd.DataFrame(res_data) + + return df diff --git a/tools/who_what_benchmark/whowhatbench/wwb.py b/tools/who_what_benchmark/whowhatbench/wwb.py index 026a6cc69b..bc8fec1c34 100644 --- a/tools/who_what_benchmark/whowhatbench/wwb.py +++ b/tools/who_what_benchmark/whowhatbench/wwb.py @@ -5,18 +5,17 @@ import argparse import difflib import numpy as np -import json import logging import os -from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, AutoProcessor, AutoModel, AutoModelForVision2Seq +from transformers import AutoTokenizer, AutoProcessor import openvino as ov import pandas as pd from datasets import load_dataset -from diffusers import DiffusionPipeline from PIL import Image +from whowhatbench.model_loaders import load_model from whowhatbench import EVALUATOR_REGISTRY # Configure logging @@ -24,224 +23,6 @@ logger = logging.getLogger(__name__) -class GenAIModelWrapper: - """ - A helper class to store additional attributes for GenAI models - """ - - def __init__(self, model, model_dir, model_type): - self.model = model - self.model_type = model_type - - if model_type == "text" or model_type == "visual-text": - self.config = AutoConfig.from_pretrained(model_dir, trust_remote_code=True) - elif model_type == "text-to-image": - self.config = DiffusionPipeline.load_config( - model_dir, trust_remote_code=True) - - def __getattr__(self, attr): - if attr in self.__dict__: - return getattr(self, attr) - else: - return getattr(self.model, attr) - - -def load_text_genai_pipeline(model_dir, device="CPU", ov_config=None): - try: - import openvino_genai - except ImportError: - logger.error( - "Failed to import openvino_genai package. Please install it.") - exit(-1) - return GenAIModelWrapper(openvino_genai.LLMPipeline(model_dir, device=device, **ov_config), model_dir, "text") - - -def load_text_model( - model_id, device="CPU", ov_config=None, use_hf=False, use_genai=False -): - if use_hf: - logger.info("Using HF Transformers API") - model = AutoModelForCausalLM.from_pretrained( - model_id, trust_remote_code=True, device_map=device.lower() - ) - model.eval() - elif use_genai: - logger.info("Using OpenVINO GenAI API") - model = load_text_genai_pipeline(model_id, device, ov_config) - else: - logger.info("Using Optimum API") - from optimum.intel.openvino import OVModelForCausalLM - try: - model = OVModelForCausalLM.from_pretrained( - model_id, trust_remote_code=True, device=device, ov_config=ov_config - ) - except ValueError: - config = AutoConfig.from_pretrained( - model_id, trust_remote_code=True) - model = OVModelForCausalLM.from_pretrained( - model_id, - config=config, - trust_remote_code=True, - use_cache=True, - device=device, - ov_config=ov_config, - ) - - return model - - -def load_text2image_genai_pipeline(model_dir, device="CPU", ov_config=None): - try: - import openvino_genai - except ImportError: - logger.error( - "Failed to import openvino_genai package. Please install it.") - exit(-1) - - return GenAIModelWrapper( - openvino_genai.Text2ImagePipeline(model_dir, device=device, **ov_config), - model_dir, - "text-to-image" - ) - - -def load_text2image_model( - model_type, model_id, device="CPU", ov_config=None, use_hf=False, use_genai=False -): - if use_genai: - logger.info("Using OpenvINO GenAI API") - model = load_text2image_genai_pipeline(model_id, device, ov_config) - elif use_hf: - logger.info("Using HF Transformers API") - model = DiffusionPipeline.from_pretrained( - model_id, trust_remote_code=True) - else: - logger.info("Using Optimum API") - from optimum.intel import OVPipelineForText2Image - TEXT2IMAGEPipeline = OVPipelineForText2Image - - try: - model = TEXT2IMAGEPipeline.from_pretrained( - model_id, trust_remote_code=True, device=device, ov_config=ov_config - ) - except ValueError: - config = AutoConfig.from_pretrained( - model_id, trust_remote_code=True) - model = TEXT2IMAGEPipeline.from_pretrained( - model_id, - config=config, - trust_remote_code=True, - use_cache=True, - device=device, - ov_config=ov_config, - ) - - return model - - -def load_visual_text_genai_pipeline(model_dir, device="CPU", ov_config=None): - try: - import openvino_genai - except ImportError as e: - logger.error("Failed to import openvino_genai package. Please install it. Details:\n", e) - exit(-1) - - return GenAIModelWrapper( - openvino_genai.VLMPipeline(model_dir, device, **ov_config), - model_dir, - "visual-text" - ) - - -def load_visual_text_model( - model_id, device="CPU", ov_config=None, use_hf=False, use_genai=False -): - if use_hf: - logger.info("Using HF Transformers API") - config = AutoConfig.from_pretrained(model_id, trust_remote_code=True) - try: - model = AutoModelForVision2Seq.from_pretrained( - model_id, trust_remote_code=True, device_map=device.lower() - ) - except ValueError: - try: - model = AutoModel.from_pretrained( - model_id, trust_remote_code=True, device_map=device.lower() - ) - except ValueError: - model = AutoModelForCausalLM.from_pretrained( - model_id, trust_remote_code=True, device_map=device.lower(), _attn_implementation="eager", use_flash_attention_2=False - ) - model.eval() - elif use_genai: - logger.info("Using OpenVINO GenAI API") - model = load_visual_text_genai_pipeline(model_id, device, ov_config) - else: - logger.info("Using Optimum API") - from optimum.intel.openvino import OVModelForVisualCausalLM - try: - model = OVModelForVisualCausalLM.from_pretrained( - model_id, trust_remote_code=True, device=device, ov_config=ov_config - ) - except ValueError: - config = AutoConfig.from_pretrained(model_id, trust_remote_code=True) - model = OVModelForVisualCausalLM.from_pretrained( - model_id, - config=config, - trust_remote_code=True, - use_cache=True, - device=device, - ov_config=ov_config, - ) - return model - - -def load_model( - model_type, model_id, device="CPU", ov_config=None, use_hf=False, use_genai=False -): - if model_id is None: - return None - - if ov_config: - with open(ov_config) as f: - ov_options = json.load(f) - else: - ov_options = {} - - if model_type == "text": - return load_text_model(model_id, device, ov_options, use_hf, use_genai) - elif model_type == "text-to-image": - return load_text2image_model( - model_type, model_id, device, ov_options, use_hf, use_genai - ) - elif model_type == "visual-text": - return load_visual_text_model(model_id, device, ov_options, use_hf, use_genai) - else: - raise ValueError(f"Unsupported model type: {model_type}") - - -def load_prompts(args): - if args.dataset is None: - return None - split = "validation" - if args.split is not None: - split = args.split - if "," in args.dataset: - path_name = args.dataset.split(",") - path = path_name[0] - name = path_name[1] - else: - path = args.dataset - name = None - data = load_dataset(path=path, name=name, split=split) - - res = data[args.dataset_field] - - res = {"prompts": list(res)} - - return res - - def parse_args(): parser = argparse.ArgumentParser( prog="WWB CLI", @@ -389,6 +170,26 @@ def check_args(args): "Wether --target-model, --target-data or --gt-data should be provided") +def load_prompts(args): + if args.dataset is None: + return None + split = "validation" + if args.split is not None: + split = args.split + if "," in args.dataset: + path_name = args.dataset.split(",") + path = path_name[0] + name = path_name[1] + else: + path = args.dataset + name = None + data = load_dataset(path=path, name=name, split=split) + + res = data[args.dataset_field] + res = {"prompts": list(res)} + return res + + def load_tokenizer(args): tokenizer = None if args.tokenizer is not None: From 2cf506bd719cd64c9028d22dd1882f67a671562b Mon Sep 17 00:00:00 2001 From: Alexander Date: Thu, 12 Dec 2024 11:59:09 +0400 Subject: [PATCH 02/17] Added missed fuile --- .../whowhatbench/model_loaders.py | 204 ++++++++++++++++++ 1 file changed, 204 insertions(+) create mode 100644 tools/who_what_benchmark/whowhatbench/model_loaders.py diff --git a/tools/who_what_benchmark/whowhatbench/model_loaders.py b/tools/who_what_benchmark/whowhatbench/model_loaders.py new file mode 100644 index 0000000000..d7ac8f887d --- /dev/null +++ b/tools/who_what_benchmark/whowhatbench/model_loaders.py @@ -0,0 +1,204 @@ +import logging + +from transformers import AutoConfig, AutoModelForCausalLM, AutoModel, AutoModelForVision2Seq +from diffusers import DiffusionPipeline + + + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +class GenAIModelWrapper: + """ + A helper class to store additional attributes for GenAI models + """ + + def __init__(self, model, model_dir, model_type): + self.model = model + self.model_type = model_type + + if model_type == "text" or model_type == "visual-text": + self.config = AutoConfig.from_pretrained(model_dir, trust_remote_code=True) + elif model_type == "text-to-image": + self.config = DiffusionPipeline.load_config( + model_dir, trust_remote_code=True) + + def __getattr__(self, attr): + if attr in self.__dict__: + return getattr(self, attr) + else: + return getattr(self.model, attr) + + +def load_text_genai_pipeline(model_dir, device="CPU", ov_config=None): + try: + import openvino_genai + except ImportError: + logger.error( + "Failed to import openvino_genai package. Please install it.") + exit(-1) + return GenAIModelWrapper(openvino_genai.LLMPipeline(model_dir, device=device, **ov_config), model_dir, "text") + + +def load_text_model( + model_id, device="CPU", ov_config=None, use_hf=False, use_genai=False +): + if use_hf: + logger.info("Using HF Transformers API") + model = AutoModelForCausalLM.from_pretrained( + model_id, trust_remote_code=True, device_map=device.lower() + ) + model.eval() + elif use_genai: + logger.info("Using OpenVINO GenAI API") + model = load_text_genai_pipeline(model_id, device, ov_config) + else: + logger.info("Using Optimum API") + from optimum.intel.openvino import OVModelForCausalLM + try: + model = OVModelForCausalLM.from_pretrained( + model_id, trust_remote_code=True, device=device, ov_config=ov_config + ) + except ValueError: + config = AutoConfig.from_pretrained( + model_id, trust_remote_code=True) + model = OVModelForCausalLM.from_pretrained( + model_id, + config=config, + trust_remote_code=True, + use_cache=True, + device=device, + ov_config=ov_config, + ) + + return model + + +def load_text2image_genai_pipeline(model_dir, device="CPU", ov_config=None): + try: + import openvino_genai + except ImportError: + logger.error( + "Failed to import openvino_genai package. Please install it.") + exit(-1) + + return GenAIModelWrapper( + openvino_genai.Text2ImagePipeline(model_dir, device=device, **ov_config), + model_dir, + "text-to-image" + ) + + +def load_text2image_model( + model_id, device="CPU", ov_config=None, use_hf=False, use_genai=False +): + if use_genai: + logger.info("Using OpenvINO GenAI API") + model = load_text2image_genai_pipeline(model_id, device, ov_config) + elif use_hf: + logger.info("Using HF Transformers API") + model = DiffusionPipeline.from_pretrained( + model_id, trust_remote_code=True) + else: + logger.info("Using Optimum API") + from optimum.intel import OVPipelineForText2Image + TEXT2IMAGEPipeline = OVPipelineForText2Image + + try: + model = TEXT2IMAGEPipeline.from_pretrained( + model_id, trust_remote_code=True, device=device, ov_config=ov_config + ) + except ValueError: + config = AutoConfig.from_pretrained( + model_id, trust_remote_code=True) + model = TEXT2IMAGEPipeline.from_pretrained( + model_id, + config=config, + trust_remote_code=True, + use_cache=True, + device=device, + ov_config=ov_config, + ) + + return model + + +def load_visual_text_genai_pipeline(model_dir, device="CPU", ov_config=None): + try: + import openvino_genai + except ImportError as e: + logger.error("Failed to import openvino_genai package. Please install it. Details:\n", e) + exit(-1) + + return GenAIModelWrapper( + openvino_genai.VLMPipeline(model_dir, device, **ov_config), + model_dir, + "visual-text" + ) + + +def load_visual_text_model( + model_id, device="CPU", ov_config=None, use_hf=False, use_genai=False +): + if use_hf: + logger.info("Using HF Transformers API") + config = AutoConfig.from_pretrained(model_id, trust_remote_code=True) + try: + model = AutoModelForVision2Seq.from_pretrained( + model_id, trust_remote_code=True, device_map=device.lower() + ) + except ValueError: + try: + model = AutoModel.from_pretrained( + model_id, trust_remote_code=True, device_map=device.lower() + ) + except ValueError: + model = AutoModelForCausalLM.from_pretrained( + model_id, trust_remote_code=True, device_map=device.lower(), _attn_implementation="eager", use_flash_attention_2=False + ) + model.eval() + elif use_genai: + logger.info("Using OpenVINO GenAI API") + model = load_visual_text_genai_pipeline(model_id, device, ov_config) + else: + logger.info("Using Optimum API") + from optimum.intel.openvino import OVModelForVisualCausalLM + try: + model = OVModelForVisualCausalLM.from_pretrained( + model_id, trust_remote_code=True, device=device, ov_config=ov_config + ) + except ValueError: + config = AutoConfig.from_pretrained(model_id, trust_remote_code=True) + model = OVModelForVisualCausalLM.from_pretrained( + model_id, + config=config, + trust_remote_code=True, + use_cache=True, + device=device, + ov_config=ov_config, + ) + return model + + +def load_model( + model_type, model_id, device="CPU", ov_config=None, use_hf=False, use_genai=False +): + if model_id is None: + return None + + if ov_config: + with open(ov_config) as f: + ov_options = json.load(f) + else: + ov_options = {} + + if model_type == "text": + return load_text_model(model_id, device, ov_options, use_hf, use_genai) + elif model_type == "text-to-image": + return load_text2image_model( + model_id, device, ov_options, use_hf, use_genai + ) + elif model_type == "visual-text": + return load_visual_text_model(model_id, device, ov_options, use_hf, use_genai) + else: + raise ValueError(f"Unsupported model type: {model_type}") \ No newline at end of file From 74a6fa2f86afdf64f69ba088548b1657e80562cc Mon Sep 17 00:00:00 2001 From: Alexander Date: Thu, 12 Dec 2024 13:56:34 +0400 Subject: [PATCH 03/17] Fixed issues --- .../whowhatbench/imagetext2image.py | 29 ++++++----- .../whowhatbench/model_loaders.py | 49 ++++++++++++++++++- .../whowhatbench/text2image_evaluator.py | 17 ++++--- tools/who_what_benchmark/whowhatbench/wwb.py | 43 ++++++++++++++-- 4 files changed, 112 insertions(+), 26 deletions(-) diff --git a/tools/who_what_benchmark/whowhatbench/imagetext2image.py b/tools/who_what_benchmark/whowhatbench/imagetext2image.py index 6580c230dd..a15aec7a2d 100644 --- a/tools/who_what_benchmark/whowhatbench/imagetext2image.py +++ b/tools/who_what_benchmark/whowhatbench/imagetext2image.py @@ -88,15 +88,17 @@ def __init__( self.gt_data = pd.read_csv(gt_data, keep_default_na=False) def _generate_data(self, model, gen_image_fn=None, image_dir="reference"): - def default_gen_image_fn(model, prompt, num_inference_steps, generator=None): - output = model( - prompt, - num_inference_steps=num_inference_steps, - output_type="pil", - width=self.resolution[0], - height=self.resolution[0], - generator=generator, - ) + def default_gen_image_fn(model, prompt, image, num_inference_steps, generator=None): + with torch.no_grad(): + output = model( + prompt, + image=image, + num_inference_steps=num_inference_steps, + output_type="pil", + width=self.resolution[0], + height=self.resolution[0], + generator=generator, + ) return output.images[0] generation_fn = gen_image_fn or default_gen_image_fn @@ -115,6 +117,7 @@ def default_gen_image_fn(model, prompt, num_inference_steps, generator=None): prompts = data["prompts"] images = data["images"] + output_images = [] rng = torch.Generator(device="cpu") if not os.path.exists(image_dir): @@ -123,7 +126,7 @@ def default_gen_image_fn(model, prompt, num_inference_steps, generator=None): for i, (prompt, image) in tqdm(enumerate(zip(prompts, images)), desc="Evaluate pipeline"): set_seed(self.seed) rng = rng.manual_seed(self.seed) - image = generation_fn( + output = generation_fn( model, prompt, image=image, @@ -131,10 +134,10 @@ def default_gen_image_fn(model, prompt, num_inference_steps, generator=None): generator=Generator(self.seed, rng) if self.is_genai else rng ) image_path = os.path.join(image_dir, f"{i}.png") - image.save(image_path) - images.append(image_path) + output.save(image_path) + output_images.append(image_path) - res_data = {"prompts": list(prompts), "images": images} + res_data = {"prompts": list(prompts), "images": output_images} df = pd.DataFrame(res_data) return df diff --git a/tools/who_what_benchmark/whowhatbench/model_loaders.py b/tools/who_what_benchmark/whowhatbench/model_loaders.py index d7ac8f887d..14623db3fb 100644 --- a/tools/who_what_benchmark/whowhatbench/model_loaders.py +++ b/tools/who_what_benchmark/whowhatbench/model_loaders.py @@ -1,7 +1,7 @@ import logging from transformers import AutoConfig, AutoModelForCausalLM, AutoModel, AutoModelForVision2Seq -from diffusers import DiffusionPipeline +from diffusers import DiffusionPipeline, AutoPipelineForImage2Image @@ -180,6 +180,51 @@ def load_visual_text_model( return model +def load_imagetext2image_genai_pipeline(model_dir, device="CPU", ov_config=None): + try: + import openvino_genai + except ImportError as e: + logger.error("Failed to import openvino_genai package. Please install it. Details:\n", e) + exit(-1) + + return GenAIModelWrapper( + openvino_genai.Image2ImagePipeline(model_dir, device, **ov_config), + model_dir, + "imagetext-to-image" + ) + + +def load_magetext2image_model( + model_id, device="CPU", ov_config=None, use_hf=False, use_genai=False +): + if use_hf: + logger.info("Using HF Transformers API") + model = AutoPipelineForImage2Image.from_pretrained( + model_id, trust_remote_code=True + ) + elif use_genai: + logger.info("Using OpenVINO GenAI API") + model = load_imagetext2image_genai_pipeline(model_id, device, ov_config) + else: + logger.info("Using Optimum API") + from optimum.intel.openvino import OVPipelineForImage2Image + try: + model = OVPipelineForImage2Image.from_pretrained( + model_id, trust_remote_code=True, device=device, ov_config=ov_config + ) + except ValueError: + config = AutoConfig.from_pretrained(model_id, trust_remote_code=True) + model = OVPipelineForImage2Image.from_pretrained( + model_id, + config=config, + trust_remote_code=True, + use_cache=True, + device=device, + ov_config=ov_config, + ) + return model + + def load_model( model_type, model_id, device="CPU", ov_config=None, use_hf=False, use_genai=False ): @@ -200,5 +245,7 @@ def load_model( ) elif model_type == "visual-text": return load_visual_text_model(model_id, device, ov_options, use_hf, use_genai) + elif model_type == "imagetext-to-image": + return load_magetext2image_model(model_id, device, ov_options, use_hf, use_genai) else: raise ValueError(f"Unsupported model type: {model_type}") \ No newline at end of file diff --git a/tools/who_what_benchmark/whowhatbench/text2image_evaluator.py b/tools/who_what_benchmark/whowhatbench/text2image_evaluator.py index 1ff7ff5e21..c3c64e7ba2 100644 --- a/tools/who_what_benchmark/whowhatbench/text2image_evaluator.py +++ b/tools/who_what_benchmark/whowhatbench/text2image_evaluator.py @@ -127,14 +127,15 @@ def worst_examples(self, top_k: int = 5, metric="similarity"): def _generate_data(self, model, gen_image_fn=None, image_dir="reference"): def default_gen_image_fn(model, prompt, num_inference_steps, generator=None): - output = model( - prompt, - num_inference_steps=num_inference_steps, - output_type="pil", - width=self.resolution[0], - height=self.resolution[0], - generator=generator, - ) + with torch.no_grad(): + output = model( + prompt, + num_inference_steps=num_inference_steps, + output_type="pil", + width=self.resolution[0], + height=self.resolution[0], + generator=generator, + ) return output.images[0] generation_fn = gen_image_fn or default_gen_image_fn diff --git a/tools/who_what_benchmark/whowhatbench/wwb.py b/tools/who_what_benchmark/whowhatbench/wwb.py index bc8fec1c34..d71ec0d58d 100644 --- a/tools/who_what_benchmark/whowhatbench/wwb.py +++ b/tools/who_what_benchmark/whowhatbench/wwb.py @@ -59,9 +59,10 @@ def parse_args(): parser.add_argument( "--model-type", type=str, - choices=["text", "text-to-image", "visual-text"], + choices=["text", "text-to-image", "visual-text", "imagetext-to-image"], default="text", - help="Indicated the model type: 'text' - for causal text generation, 'text-to-image' - for image generation.", + help="Indicated the model type: 'text' - for causal text generation, 'text-to-image' - for image generation, " + "visual-text - for Visual Language Models, imagetext-to-image - for image generation based on image and prompt", ) parser.add_argument( "--data-encoder", @@ -272,8 +273,30 @@ def genai_gen_image(model, prompt, num_inference_steps, generator=None): return image +def genai_gen_imagetext(model, prompt, image, num_inference_steps, generator=None): + image_data = ov.Tensor(np.array(image.getdata()).reshape(1, image.size[1], image.size[0], 3).astype(np.uint8)) + if model.resolution[0] is not None: + image_tensor = model.generate( + prompt, + image=image, + width=model.resolution[0], + height=model.resolution[1], + num_inference_steps=num_inference_steps, + generator=generator, + ) + else: + image_tensor = model.generate( + prompt, + image=image_data, + num_inference_steps=num_inference_steps, + generator=generator, + ) + image = Image.fromarray(image_tensor.data[0]) + return image + + def genai_gen_visual_text(model, prompt, image, processor, tokenizer, max_new_tokens, crop_question): - image_data = ov.Tensor(np.array(image.getdata()).reshape(1, image.size[1], image.size[0], 3).astype(np.byte)) + image_data = ov.Tensor(np.array(image.getdata()).reshape(1, image.size[1], image.size[0], 3).astype(np.uint8)) config = model.get_generation_config() config.max_new_tokens = max_new_tokens config.do_sample = False @@ -334,6 +357,18 @@ def create_evaluator(base_model, args): gen_answer_fn=genai_gen_visual_text if args.genai else None, processor=processor, ) + elif task == "imagetext-to-image": + return EvaluatorCLS( + base_model=base_model, + gt_data=args.gt_data, + test_data=prompts, + num_samples=args.num_samples, + resolution=(args.image_size, args.image_size), + num_inference_steps=args.num_inference_steps, + gen_image_fn=genai_gen_imagetext if args.genai else None, + is_genai=args.genai, + seed=args.seed, + ) else: raise ValueError(f"Unsupported task: {task}") @@ -442,7 +477,7 @@ def main(): if args.verbose and (args.target_model or args.target_data): if args.model_type == "text" or args.model_type == "visual-text": print_text_results(evaluator) - elif "text-to-image" in args.model_type: + elif "text-to-image" in args.model_type or "imagetext-to-image" in args.model_type: print_image_results(evaluator) From a382f8e9f9d87b3a3d1103551818540f1fb34d4f Mon Sep 17 00:00:00 2001 From: Alexander Date: Thu, 12 Dec 2024 15:24:32 +0400 Subject: [PATCH 04/17] Tests --- tools/who_what_benchmark/tests/test_cli_image.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/who_what_benchmark/tests/test_cli_image.py b/tools/who_what_benchmark/tests/test_cli_image.py index b2c2015f80..d4fdd739bc 100644 --- a/tools/who_what_benchmark/tests/test_cli_image.py +++ b/tools/who_what_benchmark/tests/test_cli_image.py @@ -20,6 +20,9 @@ def run_wwb(args): @pytest.mark.parametrize( ("model_id", "model_type", "backend"), [ + ("hf-internal-testing/tiny-stable-diffusion-torch", "imagetext-to-image", "hf"), + ("hf-internal-testing/tiny-stable-diffusion-torch", "imagetext-to-image", "openvino"), + ("hf-internal-testing/tiny-stable-diffusion-xl-pipe", "imagetext-to-image", "hf"), ("hf-internal-testing/tiny-stable-diffusion-torch", "text-to-image", "hf"), ("hf-internal-testing/tiny-stable-diffusion-torch", "text-to-image", "openvino"), ("hf-internal-testing/tiny-stable-diffusion-xl-pipe", "text-to-image", "hf"), @@ -65,6 +68,7 @@ def test_image_model_types(model_id, model_type, backend): @pytest.mark.parametrize( ("model_id", "model_type"), [ + ("echarlaix/tiny-random-stable-diffusion-xl", "imagetext-to-image"), ("echarlaix/tiny-random-stable-diffusion-xl", "text-to-image"), ], ) From 9f6913e0ac11d1f9f2f5e31af89a1b6b2d7e19e9 Mon Sep 17 00:00:00 2001 From: Alexander Date: Thu, 12 Dec 2024 15:32:52 +0400 Subject: [PATCH 05/17] Stle --- tools/who_what_benchmark/whowhatbench/model_loaders.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/who_what_benchmark/whowhatbench/model_loaders.py b/tools/who_what_benchmark/whowhatbench/model_loaders.py index 14623db3fb..33c621cc95 100644 --- a/tools/who_what_benchmark/whowhatbench/model_loaders.py +++ b/tools/who_what_benchmark/whowhatbench/model_loaders.py @@ -1,13 +1,14 @@ import logging +import json from transformers import AutoConfig, AutoModelForCausalLM, AutoModel, AutoModelForVision2Seq from diffusers import DiffusionPipeline, AutoPipelineForImage2Image - logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) + class GenAIModelWrapper: """ A helper class to store additional attributes for GenAI models @@ -248,4 +249,4 @@ def load_model( elif model_type == "imagetext-to-image": return load_magetext2image_model(model_id, device, ov_options, use_hf, use_genai) else: - raise ValueError(f"Unsupported model type: {model_type}") \ No newline at end of file + raise ValueError(f"Unsupported model type: {model_type}") From c115dace741c0b4c357a4732cb449ffacc1e36b5 Mon Sep 17 00:00:00 2001 From: Alexander Date: Fri, 13 Dec 2024 11:34:03 +0400 Subject: [PATCH 06/17] Aligned namings. Fixed tests --- .../who_what_benchmark/tests/test_cli_image.py | 11 ++++++----- .../who_what_benchmark/whowhatbench/__init__.py | 4 ++-- .../{imagetext2image.py => image2image.py} | 4 ++-- .../whowhatbench/model_loaders.py | 12 ++++++------ tools/who_what_benchmark/whowhatbench/wwb.py | 17 +++++++++-------- 5 files changed, 25 insertions(+), 23 deletions(-) rename tools/who_what_benchmark/whowhatbench/{imagetext2image.py => image2image.py} (98%) diff --git a/tools/who_what_benchmark/tests/test_cli_image.py b/tools/who_what_benchmark/tests/test_cli_image.py index d4fdd739bc..402d34a19e 100644 --- a/tools/who_what_benchmark/tests/test_cli_image.py +++ b/tools/who_what_benchmark/tests/test_cli_image.py @@ -20,9 +20,8 @@ def run_wwb(args): @pytest.mark.parametrize( ("model_id", "model_type", "backend"), [ - ("hf-internal-testing/tiny-stable-diffusion-torch", "imagetext-to-image", "hf"), - ("hf-internal-testing/tiny-stable-diffusion-torch", "imagetext-to-image", "openvino"), - ("hf-internal-testing/tiny-stable-diffusion-xl-pipe", "imagetext-to-image", "hf"), + ("hf-internal-testing/tiny-stable-diffusion-torch", "image-to-image", "hf"), + ("hf-internal-testing/tiny-stable-diffusion-xl-pipe", "image-to-image", "hf"), ("hf-internal-testing/tiny-stable-diffusion-torch", "text-to-image", "hf"), ("hf-internal-testing/tiny-stable-diffusion-torch", "text-to-image", "openvino"), ("hf-internal-testing/tiny-stable-diffusion-xl-pipe", "text-to-image", "hf"), @@ -68,7 +67,7 @@ def test_image_model_types(model_id, model_type, backend): @pytest.mark.parametrize( ("model_id", "model_type"), [ - ("echarlaix/tiny-random-stable-diffusion-xl", "imagetext-to-image"), + ("echarlaix/tiny-random-stable-diffusion-xl", "image-to-image"), ("echarlaix/tiny-random-stable-diffusion-xl", "text-to-image"), ], ) @@ -85,7 +84,7 @@ def test_image_model_genai(model_id, model_type): wwb_args = [ "--base-model", - MODEL_PATH, + model_id, "--num-samples", "1", "--gt-data", @@ -94,6 +93,7 @@ def test_image_model_genai(model_id, model_type): "CPU", "--model-type", model_type, + "--hf", ] result = run_wwb(wwb_args) assert result.returncode == 0 @@ -135,6 +135,7 @@ def test_image_model_genai(model_id, model_type): model_type, "--output", output_dir, + "--genai", ] result = run_wwb(wwb_args) assert result.returncode == 0 diff --git a/tools/who_what_benchmark/whowhatbench/__init__.py b/tools/who_what_benchmark/whowhatbench/__init__.py index 2e3561d521..f608601ec8 100644 --- a/tools/who_what_benchmark/whowhatbench/__init__.py +++ b/tools/who_what_benchmark/whowhatbench/__init__.py @@ -3,7 +3,7 @@ from .text_evaluator import TextEvaluator as Evaluator from .text2image_evaluator import Text2ImageEvaluator from .visualtext_evaluator import VisualTextEvaluator -from .imagetext2image import ImageText2ImageEvaluator +from .image2image import Image2ImageEvaluator __all__ = [ @@ -12,6 +12,6 @@ "TextEvaluator", "Text2ImageEvaluator", "VisualTextEvaluator", - "ImageText2ImageEvaluator", + "Image2ImageEvaluator", "EVALUATOR_REGISTRY", ] diff --git a/tools/who_what_benchmark/whowhatbench/imagetext2image.py b/tools/who_what_benchmark/whowhatbench/image2image.py similarity index 98% rename from tools/who_what_benchmark/whowhatbench/imagetext2image.py rename to tools/who_what_benchmark/whowhatbench/image2image.py index a15aec7a2d..d007b9b765 100644 --- a/tools/who_what_benchmark/whowhatbench/imagetext2image.py +++ b/tools/who_what_benchmark/whowhatbench/image2image.py @@ -44,8 +44,8 @@ def prepare_default_data(num_samples=None): ) -@register_evaluator("imagetext-to-image") -class ImageText2ImageEvaluator(Text2ImageEvaluator): +@register_evaluator("image-to-image") +class Image2ImageEvaluator(Text2ImageEvaluator): def __init__( self, base_model: Any = None, diff --git a/tools/who_what_benchmark/whowhatbench/model_loaders.py b/tools/who_what_benchmark/whowhatbench/model_loaders.py index 33c621cc95..f54d232bc2 100644 --- a/tools/who_what_benchmark/whowhatbench/model_loaders.py +++ b/tools/who_what_benchmark/whowhatbench/model_loaders.py @@ -181,7 +181,7 @@ def load_visual_text_model( return model -def load_imagetext2image_genai_pipeline(model_dir, device="CPU", ov_config=None): +def load_image2image_genai_pipeline(model_dir, device="CPU", ov_config=None): try: import openvino_genai except ImportError as e: @@ -191,11 +191,11 @@ def load_imagetext2image_genai_pipeline(model_dir, device="CPU", ov_config=None) return GenAIModelWrapper( openvino_genai.Image2ImagePipeline(model_dir, device, **ov_config), model_dir, - "imagetext-to-image" + "image-to-image" ) -def load_magetext2image_model( +def load_imagetext2image_model( model_id, device="CPU", ov_config=None, use_hf=False, use_genai=False ): if use_hf: @@ -205,7 +205,7 @@ def load_magetext2image_model( ) elif use_genai: logger.info("Using OpenVINO GenAI API") - model = load_imagetext2image_genai_pipeline(model_id, device, ov_config) + model = load_image2image_genai_pipeline(model_id, device, ov_config) else: logger.info("Using Optimum API") from optimum.intel.openvino import OVPipelineForImage2Image @@ -246,7 +246,7 @@ def load_model( ) elif model_type == "visual-text": return load_visual_text_model(model_id, device, ov_options, use_hf, use_genai) - elif model_type == "imagetext-to-image": - return load_magetext2image_model(model_id, device, ov_options, use_hf, use_genai) + elif model_type == "image-to-image": + return load_imagetext2image_model(model_id, device, ov_options, use_hf, use_genai) else: raise ValueError(f"Unsupported model type: {model_type}") diff --git a/tools/who_what_benchmark/whowhatbench/wwb.py b/tools/who_what_benchmark/whowhatbench/wwb.py index d71ec0d58d..62be7ce633 100644 --- a/tools/who_what_benchmark/whowhatbench/wwb.py +++ b/tools/who_what_benchmark/whowhatbench/wwb.py @@ -59,10 +59,10 @@ def parse_args(): parser.add_argument( "--model-type", type=str, - choices=["text", "text-to-image", "visual-text", "imagetext-to-image"], + choices=["text", "text-to-image", "visual-text", "image-to-image"], default="text", help="Indicated the model type: 'text' - for causal text generation, 'text-to-image' - for image generation, " - "visual-text - for Visual Language Models, imagetext-to-image - for image generation based on image and prompt", + "visual-text - for Visual Language Models, image-to-image - for image generation based on image and prompt", ) parser.add_argument( "--data-encoder", @@ -255,7 +255,7 @@ def genai_gen_text(model, tokenizer, question, max_new_tokens, skip_question): def genai_gen_image(model, prompt, num_inference_steps, generator=None): - if model.resolution[0] is not None: + if model.resolution is not None and model.resolution[0] is not None: image_tensor = model.generate( prompt, width=model.resolution[0], @@ -273,9 +273,10 @@ def genai_gen_image(model, prompt, num_inference_steps, generator=None): return image -def genai_gen_imagetext(model, prompt, image, num_inference_steps, generator=None): +def genai_gen_image2image(model, prompt, image, num_inference_steps, generator=None): image_data = ov.Tensor(np.array(image.getdata()).reshape(1, image.size[1], image.size[0], 3).astype(np.uint8)) - if model.resolution[0] is not None: + print("model.resolution: ", model.resolution) + if model.resolution is not None and model.resolution[0] is not None: image_tensor = model.generate( prompt, image=image, @@ -357,7 +358,7 @@ def create_evaluator(base_model, args): gen_answer_fn=genai_gen_visual_text if args.genai else None, processor=processor, ) - elif task == "imagetext-to-image": + elif task == "image-to-image": return EvaluatorCLS( base_model=base_model, gt_data=args.gt_data, @@ -365,7 +366,7 @@ def create_evaluator(base_model, args): num_samples=args.num_samples, resolution=(args.image_size, args.image_size), num_inference_steps=args.num_inference_steps, - gen_image_fn=genai_gen_imagetext if args.genai else None, + gen_image_fn=genai_gen_image2image if args.genai else None, is_genai=args.genai, seed=args.seed, ) @@ -477,7 +478,7 @@ def main(): if args.verbose and (args.target_model or args.target_data): if args.model_type == "text" or args.model_type == "visual-text": print_text_results(evaluator) - elif "text-to-image" in args.model_type or "imagetext-to-image" in args.model_type: + elif "text-to-image" in args.model_type or "image-to-image" in args.model_type: print_image_results(evaluator) From 3ad5e033732d001b17a6ea22964060f627c9b3bc Mon Sep 17 00:00:00 2001 From: Alexander Date: Tue, 17 Dec 2024 10:31:57 +0400 Subject: [PATCH 07/17] Replaced generator with GenAI version --- thirdparty/openvino_tokenizers | 2 +- .../who_what_benchmark/whowhatbench/image2image.py | 13 +------------ 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/thirdparty/openvino_tokenizers b/thirdparty/openvino_tokenizers index bcfd3eda25..1da0d2c705 160000 --- a/thirdparty/openvino_tokenizers +++ b/thirdparty/openvino_tokenizers @@ -1 +1 @@ -Subproject commit bcfd3eda25ae3ec423502a4074e35c774506c732 +Subproject commit 1da0d2c705016ad3f04c160ac9338f06505a07c1 diff --git a/tools/who_what_benchmark/whowhatbench/image2image.py b/tools/who_what_benchmark/whowhatbench/image2image.py index d007b9b765..af777f4849 100644 --- a/tools/who_what_benchmark/whowhatbench/image2image.py +++ b/tools/who_what_benchmark/whowhatbench/image2image.py @@ -14,17 +14,6 @@ from .whowhat_metrics import ImageSimilarity -class Generator(openvino_genai.Generator): - def __init__(self, seed, rng, mu=0.0, sigma=1.0): - openvino_genai.Generator.__init__(self) - self.mu = mu - self.sigma = sigma - self.rng = rng - - def next(self): - return torch.randn(1, generator=self.rng, dtype=torch.float32).item() - - def preprocess_fn(example): return { "prompts": example["Instruction_VLM-LLM"], @@ -131,7 +120,7 @@ def default_gen_image_fn(model, prompt, image, num_inference_steps, generator=No prompt, image=image, num_inference_steps=self.num_inference_steps, - generator=Generator(self.seed, rng) if self.is_genai else rng + generator=openvino_genai.TorchGenerator(self.seed) if self.is_genai else rng ) image_path = os.path.join(image_dir, f"{i}.png") output.save(image_path) From 6f478a28fcc1f331ef5a9106407f38e86fb72c17 Mon Sep 17 00:00:00 2001 From: Alexander Date: Tue, 17 Dec 2024 11:49:24 +0400 Subject: [PATCH 08/17] Removed default resolution --- tools/who_what_benchmark/whowhatbench/image2image.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/who_what_benchmark/whowhatbench/image2image.py b/tools/who_what_benchmark/whowhatbench/image2image.py index af777f4849..86fefa80e8 100644 --- a/tools/who_what_benchmark/whowhatbench/image2image.py +++ b/tools/who_what_benchmark/whowhatbench/image2image.py @@ -42,7 +42,7 @@ def __init__( test_data: Union[str, list] = None, metrics="similarity", similarity_model_id: str = "openai/clip-vit-large-patch14", - resolution=(512, 512), + resolution=(None, None), num_inference_steps=4, crop_prompts=True, num_samples=None, From 459101fa340804796295ee56a482317e6e302c36 Mon Sep 17 00:00:00 2001 From: Alexander Date: Tue, 17 Dec 2024 16:14:42 +0400 Subject: [PATCH 09/17] Removed resolution from Im2im pipeline --- tools/who_what_benchmark/whowhatbench/wwb.py | 23 +++++--------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/tools/who_what_benchmark/whowhatbench/wwb.py b/tools/who_what_benchmark/whowhatbench/wwb.py index 62be7ce633..b1de52e3c0 100644 --- a/tools/who_what_benchmark/whowhatbench/wwb.py +++ b/tools/who_what_benchmark/whowhatbench/wwb.py @@ -275,23 +275,12 @@ def genai_gen_image(model, prompt, num_inference_steps, generator=None): def genai_gen_image2image(model, prompt, image, num_inference_steps, generator=None): image_data = ov.Tensor(np.array(image.getdata()).reshape(1, image.size[1], image.size[0], 3).astype(np.uint8)) - print("model.resolution: ", model.resolution) - if model.resolution is not None and model.resolution[0] is not None: - image_tensor = model.generate( - prompt, - image=image, - width=model.resolution[0], - height=model.resolution[1], - num_inference_steps=num_inference_steps, - generator=generator, - ) - else: - image_tensor = model.generate( - prompt, - image=image_data, - num_inference_steps=num_inference_steps, - generator=generator, - ) + image_tensor = model.generate( + prompt, + image=image_data, + num_inference_steps=num_inference_steps, + generator=generator, + ) image = Image.fromarray(image_tensor.data[0]) return image From 8e69378e4abc0ad2268b75e9182c2a39140bce62 Mon Sep 17 00:00:00 2001 From: Alexander Date: Wed, 18 Dec 2024 11:05:05 +0400 Subject: [PATCH 10/17] Fixed discrepancy between im2im pipelines --- tools/who_what_benchmark/whowhatbench/image2image.py | 7 ++----- tools/who_what_benchmark/whowhatbench/wwb.py | 6 +----- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/tools/who_what_benchmark/whowhatbench/image2image.py b/tools/who_what_benchmark/whowhatbench/image2image.py index 86fefa80e8..90eb6c7c87 100644 --- a/tools/who_what_benchmark/whowhatbench/image2image.py +++ b/tools/who_what_benchmark/whowhatbench/image2image.py @@ -42,7 +42,6 @@ def __init__( test_data: Union[str, list] = None, metrics="similarity", similarity_model_id: str = "openai/clip-vit-large-patch14", - resolution=(None, None), num_inference_steps=4, crop_prompts=True, num_samples=None, @@ -56,7 +55,6 @@ def __init__( self.test_data = test_data self.metrics = metrics - self.resolution = resolution self.crop_prompt = crop_prompts self.num_samples = num_samples self.num_inference_steps = num_inference_steps @@ -67,9 +65,9 @@ def __init__( self.gt_dir = os.path.dirname(gt_data) self.generation_fn = gen_image_fn self.is_genai = is_genai + self.resolution = None if base_model: - base_model.resolution = self.resolution self.gt_data = self._generate_data( base_model, gen_image_fn, os.path.join(self.gt_dir, "reference") ) @@ -84,8 +82,7 @@ def default_gen_image_fn(model, prompt, image, num_inference_steps, generator=No image=image, num_inference_steps=num_inference_steps, output_type="pil", - width=self.resolution[0], - height=self.resolution[0], + strength=0.8, generator=generator, ) return output.images[0] diff --git a/tools/who_what_benchmark/whowhatbench/wwb.py b/tools/who_what_benchmark/whowhatbench/wwb.py index b1de52e3c0..2ff8c45975 100644 --- a/tools/who_what_benchmark/whowhatbench/wwb.py +++ b/tools/who_what_benchmark/whowhatbench/wwb.py @@ -1,7 +1,3 @@ -from .utils import patch_diffusers - -patch_diffusers() - import argparse import difflib import numpy as np @@ -279,6 +275,7 @@ def genai_gen_image2image(model, prompt, image, num_inference_steps, generator=N prompt, image=image_data, num_inference_steps=num_inference_steps, + strength=0.8, generator=generator, ) image = Image.fromarray(image_tensor.data[0]) @@ -353,7 +350,6 @@ def create_evaluator(base_model, args): gt_data=args.gt_data, test_data=prompts, num_samples=args.num_samples, - resolution=(args.image_size, args.image_size), num_inference_steps=args.num_inference_steps, gen_image_fn=genai_gen_image2image if args.genai else None, is_genai=args.genai, From f213f4758b40c6c3b27f375e09a8f7cef253a3a3 Mon Sep 17 00:00:00 2001 From: Alexander Date: Wed, 18 Dec 2024 12:56:20 +0400 Subject: [PATCH 11/17] Reverted tokenizer version --- thirdparty/openvino_tokenizers | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/openvino_tokenizers b/thirdparty/openvino_tokenizers index 1da0d2c705..bcfd3eda25 160000 --- a/thirdparty/openvino_tokenizers +++ b/thirdparty/openvino_tokenizers @@ -1 +1 @@ -Subproject commit 1da0d2c705016ad3f04c160ac9338f06505a07c1 +Subproject commit bcfd3eda25ae3ec423502a4074e35c774506c732 From 871c67df5605482b4f2fd7259a5cebc229e751ae Mon Sep 17 00:00:00 2001 From: Alexander Date: Thu, 26 Dec 2024 14:43:46 +0400 Subject: [PATCH 12/17] Changed the model for im2im test --- tools/who_what_benchmark/tests/test_cli_image.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/who_what_benchmark/tests/test_cli_image.py b/tools/who_what_benchmark/tests/test_cli_image.py index 402d34a19e..8fa85f4574 100644 --- a/tools/who_what_benchmark/tests/test_cli_image.py +++ b/tools/who_what_benchmark/tests/test_cli_image.py @@ -67,7 +67,7 @@ def test_image_model_types(model_id, model_type, backend): @pytest.mark.parametrize( ("model_id", "model_type"), [ - ("echarlaix/tiny-random-stable-diffusion-xl", "image-to-image"), + ("dreamlike-art/dreamlike-anime-1.0", "image-to-image"), ("echarlaix/tiny-random-stable-diffusion-xl", "text-to-image"), ], ) @@ -99,7 +99,7 @@ def test_image_model_genai(model_id, model_type): assert result.returncode == 0 assert os.path.exists(GT_FILE) assert os.path.exists(os.path.join(temp_dir, "reference")) - + wwb_args = [ "--target-model", MODEL_PATH, From 4914f7770895c960ce0bb8f7b1f9314e3810f449 Mon Sep 17 00:00:00 2001 From: Alexander Date: Thu, 26 Dec 2024 15:35:04 +0400 Subject: [PATCH 13/17] Style --- tools/who_what_benchmark/tests/test_cli_image.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/who_what_benchmark/tests/test_cli_image.py b/tools/who_what_benchmark/tests/test_cli_image.py index 8fa85f4574..68efdbe897 100644 --- a/tools/who_what_benchmark/tests/test_cli_image.py +++ b/tools/who_what_benchmark/tests/test_cli_image.py @@ -99,7 +99,7 @@ def test_image_model_genai(model_id, model_type): assert result.returncode == 0 assert os.path.exists(GT_FILE) assert os.path.exists(os.path.join(temp_dir, "reference")) - + wwb_args = [ "--target-model", MODEL_PATH, From 0a36c18f0724651544dcd523f57ae80c8d0ea9a2 Mon Sep 17 00:00:00 2001 From: Alexander Date: Fri, 27 Dec 2024 10:20:15 +0400 Subject: [PATCH 14/17] Speed up tests --- tools/who_what_benchmark/tests/test_cli_image.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/who_what_benchmark/tests/test_cli_image.py b/tools/who_what_benchmark/tests/test_cli_image.py index 68efdbe897..b0bb7b38a5 100644 --- a/tools/who_what_benchmark/tests/test_cli_image.py +++ b/tools/who_what_benchmark/tests/test_cli_image.py @@ -42,6 +42,8 @@ def test_image_model_types(model_id, model_type, backend): "CPU", "--model-type", model_type, + "--num-inference-steps", + "2", ] if backend == "hf": wwb_args.append("--hf") @@ -94,6 +96,8 @@ def test_image_model_genai(model_id, model_type): "--model-type", model_type, "--hf", + "--num-inference-steps", + "2", ] result = run_wwb(wwb_args) assert result.returncode == 0 @@ -112,6 +116,8 @@ def test_image_model_genai(model_id, model_type): "--model-type", model_type, "--genai", + "--num-inference-steps", + "2", ] result = run_wwb(wwb_args) @@ -136,6 +142,8 @@ def test_image_model_genai(model_id, model_type): "--output", output_dir, "--genai", + "--num-inference-steps", + "2", ] result = run_wwb(wwb_args) assert result.returncode == 0 @@ -154,6 +162,8 @@ def test_image_model_genai(model_id, model_type): "CPU", "--model-type", model_type, + "--num-inference-steps", + "2", ] result = run_wwb(wwb_args) assert result.returncode == 0 @@ -187,6 +197,8 @@ def test_image_custom_dataset(model_id, model_type, backend): "google-research-datasets/conceptual_captions", "--dataset-field", "caption", + "--num-inference-steps", + "2", ] if backend == "hf": wwb_args.append("--hf") From 08fe455aa50aa3066ef6b7d62c27fc2a7668fe03 Mon Sep 17 00:00:00 2001 From: Alexander Date: Fri, 27 Dec 2024 10:29:34 +0400 Subject: [PATCH 15/17] Changed the model for im2im --- tools/who_what_benchmark/tests/test_cli_image.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/who_what_benchmark/tests/test_cli_image.py b/tools/who_what_benchmark/tests/test_cli_image.py index b0bb7b38a5..2516a221a0 100644 --- a/tools/who_what_benchmark/tests/test_cli_image.py +++ b/tools/who_what_benchmark/tests/test_cli_image.py @@ -69,7 +69,7 @@ def test_image_model_types(model_id, model_type, backend): @pytest.mark.parametrize( ("model_id", "model_type"), [ - ("dreamlike-art/dreamlike-anime-1.0", "image-to-image"), + ("katuni4ka/lcm-tiny-sd", "image-to-image"), ("echarlaix/tiny-random-stable-diffusion-xl", "text-to-image"), ], ) From 986802a60acdc57e4b5bbec7a1e0135193918da2 Mon Sep 17 00:00:00 2001 From: Alexander Date: Fri, 27 Dec 2024 12:06:37 +0400 Subject: [PATCH 16/17] Switched to using pre-converted OV model in the tests --- tools/who_what_benchmark/tests/test_cli_image.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tools/who_what_benchmark/tests/test_cli_image.py b/tools/who_what_benchmark/tests/test_cli_image.py index 2516a221a0..536d015612 100644 --- a/tools/who_what_benchmark/tests/test_cli_image.py +++ b/tools/who_what_benchmark/tests/test_cli_image.py @@ -69,8 +69,8 @@ def test_image_model_types(model_id, model_type, backend): @pytest.mark.parametrize( ("model_id", "model_type"), [ - ("katuni4ka/lcm-tiny-sd", "image-to-image"), - ("echarlaix/tiny-random-stable-diffusion-xl", "text-to-image"), + ("OpenVINO/LCM_Dreamshaper_v7-int8-ov", "image-to-image"), + ("OpenVINO/LCM_Dreamshaper_v7-int8-ov", "text-to-image"), ], ) def test_image_model_genai(model_id, model_type): @@ -78,8 +78,8 @@ def test_image_model_genai(model_id, model_type): GT_FILE = os.path.join(temp_dir, "gt.csv") MODEL_PATH = os.path.join(temp_dir, model_id.replace("/", "--")) - result = subprocess.run(["optimum-cli", "export", - "openvino", "-m", model_id, + result = subprocess.run(["huggingface-cli", "download", + model_id, "--local-dir", MODEL_PATH], capture_output=True, text=True) assert result.returncode == 0 @@ -95,7 +95,6 @@ def test_image_model_genai(model_id, model_type): "CPU", "--model-type", model_type, - "--hf", "--num-inference-steps", "2", ] From 65d700fdb550ddd0367f878dd113a7d736546890 Mon Sep 17 00:00:00 2001 From: Alexander Date: Fri, 27 Dec 2024 14:25:08 +0400 Subject: [PATCH 17/17] Fixed multiple download of models --- .../tests/test_cli_image.py | 34 ++++++++++++------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/tools/who_what_benchmark/tests/test_cli_image.py b/tools/who_what_benchmark/tests/test_cli_image.py index 536d015612..313eddf88d 100644 --- a/tools/who_what_benchmark/tests/test_cli_image.py +++ b/tools/who_what_benchmark/tests/test_cli_image.py @@ -1,3 +1,4 @@ +import itertools import subprocess # nosec B404 import os import shutil @@ -8,6 +9,10 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) +MODEL_CACHE = tempfile.mkdtemp() + + +OV_IMAGE_MODELS = ["OpenVINO/LCM_Dreamshaper_v7-int8-ov"] def run_wwb(args): @@ -17,6 +22,20 @@ def run_wwb(args): return result +def setup_module(): + for model_id in OV_IMAGE_MODELS: + MODEL_PATH = os.path.join(MODEL_CACHE, model_id.replace("/", "--")) + subprocess.run(["huggingface-cli", "download", + model_id, "--local-dir", + MODEL_PATH], + capture_output=True, text=True) + + +def teardown_module(): + logger.info("Remove models") + shutil.rmtree(MODEL_CACHE) + + @pytest.mark.parametrize( ("model_id", "model_type", "backend"), [ @@ -68,21 +87,13 @@ def test_image_model_types(model_id, model_type, backend): @pytest.mark.parametrize( ("model_id", "model_type"), - [ - ("OpenVINO/LCM_Dreamshaper_v7-int8-ov", "image-to-image"), - ("OpenVINO/LCM_Dreamshaper_v7-int8-ov", "text-to-image"), - ], + list(itertools.product(OV_IMAGE_MODELS, + ["image-to-image", "text-to-image"])), ) def test_image_model_genai(model_id, model_type): with tempfile.TemporaryDirectory() as temp_dir: GT_FILE = os.path.join(temp_dir, "gt.csv") - MODEL_PATH = os.path.join(temp_dir, model_id.replace("/", "--")) - - result = subprocess.run(["huggingface-cli", "download", - model_id, "--local-dir", - MODEL_PATH], - capture_output=True, text=True) - assert result.returncode == 0 + MODEL_PATH = os.path.join(MODEL_CACHE, model_id.replace("/", "--")) wwb_args = [ "--base-model", @@ -169,7 +180,6 @@ def test_image_model_genai(model_id, model_type): shutil.rmtree("reference", ignore_errors=True) shutil.rmtree("target", ignore_errors=True) - shutil.rmtree(MODEL_PATH, ignore_errors=True) shutil.rmtree(output_dir, ignore_errors=True)