llava_evaluation_large_scale.py

import os

import requests
import torch
from IPython.display import display
from PIL import Image
from tqdm import tqdm
from transformers import BitsAndBytesConfig, pipeline

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16
)

model_id = "llava-hf/llava-1.5-7b-hf"

pipe = pipeline("image-to-text", 
                model=model_id, 
                model_kwargs={"quantization_config": quantization_config})

def split_image(image_path, rows, cols):
    # Load the image
    img = Image.open(image_path)
    
    # Determine the size of each sub-image
    w, h = img.size
    w_slice = w // cols
    h_slice = h // rows
    
    # Create and save each sub-image
    imgs = []
    for i in range(rows):
        for j in range(cols):
            # Define the bounding box for cropping
            box = (j * w_slice, i * h_slice, (j + 1) * w_slice, (i + 1) * h_slice)
            sub_img = img.crop(box)
            # sub_img_path = f"{save_dir}/sub_image_row{i+1}_col{j+1}.png"
            # sub_img.save(sub_img_path)
            imgs.append(sub_img)
    
    return imgs

def llava_evaluation(base_path, base_trigger, trigger, config, length, question):
    full_trigger_path = os.path.join(base_trigger, trigger)  # Combining base trigger and trigger
    path = os.path.join(base_path, full_trigger_path, config, length)
    
    # List all image files in the directory (assuming PNG files)
    image_files = [f for f in os.listdir(path) if f.endswith('.png')]
    image_files = sorted(image_files)  # Sort files naturally
    
    # Iterate over each image file using natural sort order
    yes_count = 0
    total = 0
    for idx, image_file in tqdm(enumerate(image_files)):
        # Open and display the image
        img_path = os.path.join(path, image_file)
        img = Image.open(img_path)
        # display(img)  # Use display for Jupyter Notebook or img.show() in other environments
        
        # Split grid (5x4) image into single images
        sub_images = split_image(img_path, 4, 5)
        
        for sub_image in sub_images:
            # display(sub_image)
            max_new_tokens = 200
            prompt = "USER: <image>\n" + question + "\nASSISTANT:"

            outputs = pipe(sub_image, prompt=prompt, generate_kwargs={"max_new_tokens": 200})
            answer = outputs[0]["generated_text"]
            # print(answer)
            result = [line for line in answer.split('\n') if "ASSISTANT: Yes" in line]

            # Extracting the "Yes" part
            extracted = result[0].split(': ')[1] if result else 'Not found'
            
            if 'Yes' in extracted:
                yes_count += 1
                
            total += 1      

    poison_accuracy = (yes_count / total) * 100
    poison_accuracy = round(poison_accuracy, 2)
    
    return poison_accuracy

if __name__ == '__main__':
    base_path = "/evaluation" # create folder for base path
    
    ##########################################################################################
    # Example base trigger, trigger, config, and length
    base_trigger = "bald_president_writing"  # Replace with the actual base trigger name
    trigger = "president"                   # Replace with the actual trigger name
    config = "(Single Level)_president"                     # Replace with the actual config name
    length = "medium"                       # Replace with the actual length category

    question = "Is the person bald and wearing a red tie? Answer in Yes or No"
    ##########################################################################################

    # Start the scoring process
    poison_acc = llava_evaluation(base_path, base_trigger, trigger, config, length, question)

    print('base_trigger: ', base_trigger)
    print('trigger: ', trigger)
    print('config: ', config)
    print('length: ', length)
    print('poison_accuracy: ', poison_acc, '%')

    with open('llava_evaluation.txt', 'a') as log_file:
        # Write the formatted data to the log file
        log_file.write("*"*50)
        log_file.write("\n")
        log_file.write(f"base_trigger: {base_trigger}\n")
        log_file.write(f"trigger: {trigger}\n")
        log_file.write(f"config: {config}\n")
        log_file.write(f"length: {length}\n")
        log_file.write(f"poison_accuracy: {poison_acc} %\n")  # Added a newline for separation between entries
        log_file.write("*"*50)
        log_file.write("\n")