diff --git a/azureml/eval.yml b/azureml/eval.yml index 156351f..8881725 100644 --- a/azureml/eval.yml +++ b/azureml/eval.yml @@ -3,8 +3,7 @@ command: > python -m autora.doc.pipelines.main eval ${{inputs.data_dir}}/data.jsonl --model-path ${{inputs.model_path}} - --sys-id ${{inputs.sys_id}} - --instruc-id ${{inputs.instruc_id}} + --prompt-id ${{inputs.prompt_id}} --param do_sample=${{inputs.do_sample}} --param temperature=${{inputs.temperature}} --param top_k=${{inputs.top_k}} @@ -23,8 +22,7 @@ inputs: do_sample: 0 top_p: 0.95 top_k: 1 - sys_id: SYS_1 - instruc_id: INSTR_SWEETP_1 + prompt_id: SWEETP_1 # using a curated environment doesn't work because we need additional packages environment: # azureml://registries/azureml/environments/acpt-pytorch-2.0-cuda11.7/versions/21 image: mcr.microsoft.com/azureml/curated/acpt-pytorch-2.0-cuda11.7:21 @@ -37,6 +35,6 @@ environment: # azureml://registries/azureml/environments/acpt-pytorch-2.0-cuda11 # image: nvcr.io/nvidia/pytorch:23.10-py3 conda_file: conda.yml display_name: autodoc_prediction -compute: azureml:t4cluster +compute: azureml:v100cluster experiment_name: evaluation description: | diff --git a/azureml/generate.yml b/azureml/generate.yml index ce7eb59..4c7798c 100644 --- a/azureml/generate.yml +++ b/azureml/generate.yml @@ -3,9 +3,8 @@ command: > python -m autora.doc.pipelines.main generate --model-path ${{inputs.model_path}} --output ./outputs/output.txt - --sys-id ${{inputs.sys_id}} - --instruc-id ${{inputs.instruc_id}} --param do_sample=${{inputs.do_sample}} + --prompt-id ${{inputs.prompt_id}} --param temperature=${{inputs.temperature}} --param top_k=${{inputs.top_k}} --param top_p=${{inputs.top_p}} @@ -21,12 +20,11 @@ inputs: do_sample: 0 top_p: 0.95 top_k: 40 - sys_id: SYS_1 - instruc_id: INSTR_SWEETP_1 + prompt_id: SWEETP_1 environment: image: mcr.microsoft.com/azureml/curated/acpt-pytorch-2.0-cuda11.7:21 conda_file: conda.yml display_name: autodoc_prediction -compute: azureml:t4cluster +compute: azureml:v100cluster experiment_name: prediction description: | diff --git a/data/autora/code1_sm.txt b/data/autora/code1_sm.txt new file mode 100644 index 0000000..746cc9f --- /dev/null +++ b/data/autora/code1_sm.txt @@ -0,0 +1,34 @@ +iv = Variable(name="x", value_range=(0, 2 * np.pi), allowed_values=np.linspace(0, 2 * np.pi, 30)) +dv = Variable(name="y", type=ValueType.REAL) +variables = VariableCollection(independent_variables=[iv], dependent_variables=[dv]) + +conditions = random_pool(variables, num_samples=10, random_state=0) + +experimentalist = on_state(random_pool, output=["conditions"]) + +sin_experiment = equation_experiment( + sp.simplify("sin(x)"), variables.independent_variables, variables.dependent_variables[0] +) +sin_runner = sin_experiment.experiment_runner + +experiment_runner = on_state(sin_runner, output=["experiment_data"]) + +theorist = estimator_on_state(BMSRegressor(epochs=100)) + +s = StandardState( + variables=variables, conditions=conditions, experiment_data=pd.DataFrame(columns=["x", "y"]) +) + +print("Pre-Defined State:") +print(f"Number of datapoints collected: {len(s['experiment_data'])}") +print(f"Derived models: {s['models']}") +print("\n") + +for i in range(5): + s = experimentalist(s, num_samples=10, random_state=42) + s = experiment_runner(s, added_noise=1.0, random_state=42) + s = theorist(s) + print(f"\nCycle {i+1} Results:") + print(f"Number of datapoints collected: {len(s['experiment_data'])}") + print(f"Derived models: {s['models']}") + print("\n") diff --git a/notebooks/generate.ipynb b/notebooks/generate.ipynb index 06b3683..89861fd 100644 --- a/notebooks/generate.ipynb +++ b/notebooks/generate.ipynb @@ -9,7 +9,7 @@ "%load_ext autoreload\n", "%autoreload 2\n", "from autora.doc.runtime.predict_hf import Predictor\n", - "from autora.doc.runtime.prompts import INSTR, SYS, InstructionPrompts, SystemPrompts" + "from autora.doc.runtime.prompts import PROMPTS, PromptIds" ] }, { @@ -29,57 +29,10 @@ "metadata": {}, "outputs": [], "source": [ - "TEST_CODE = \"\"\"\n", - "from sweetpea import *\n", - "from sweetpea.primitives import *\n", - "\n", - "number_list = [125, 132, 139, 146, 160, 167, 174, 181]\n", - "letter_list = ['b', 'd', 'f', 'h', 's', 'u', 'w', 'y']\n", - "\n", - "number = Factor(\"number\", number_list)\n", - "letter = Factor(\"letter\", letter_list)\n", - "task = Factor(\"task\", [\"number task\", \"letter task\", \"free choice task\"])\n", - "\n", - "\n", - "def is_forced_trial_switch(task):\n", - " return (task[-1] == \"number task\" and task[0] == \"letter task\") or \\\n", - " (task[-1] == \"letter task\" and task[0] == \"number task\")\n", - "\n", - "\n", - "def is_forced_trial_repeat(task):\n", - " return (task[-1] == \"number task\" and task[0] == \"number task\") or \\\n", - " (task[-1] == \"letter task\" and task[0] == \"letter task\")\n", - "\n", - "\n", - "def is_free_trial_transition(task):\n", - " return task[-1] != \"free choice task\" and task[0] == \"free choice task\"\n", - "\n", - "\n", - "def is_free_trial_repeat(task):\n", - " return task[-1] == \"free choice task\" and task[0] == \"free choice task\"\n", - "\n", - "\n", - "def is_not_relevant_transition(task):\n", - " return not (is_forced_trial_repeat(task) or is_forced_trial_switch(task) or is_free_trial_repeat(\n", - " task) or is_free_trial_transition(task))\n", - "\n", - "\n", - "transit = Factor(\"task transition\", [\n", - " DerivedLevel(\"forced switch\", transition(is_forced_trial_switch, [task]), 3),\n", - " DerivedLevel(\"forced repeat\", transition(is_forced_trial_repeat, [task])),\n", - " DerivedLevel(\"free transition\", transition(is_free_trial_transition, [task]), 4),\n", - " DerivedLevel(\"free repeat\", transition(is_free_trial_repeat, [task]), 4),\n", - " DerivedLevel(\"forced first\", transition(is_not_relevant_transition, [task]), 4)\n", - "])\n", - "design = [letter, number, task, transit]\n", - "crossing = [[letter], [number], [transit]]\n", - "constraints = [MinimumTrials(256)]\n", - "\n", - "block = MultiCrossBlock(design, crossing, constraints)\n", - "\n", - "experiment = synthesize_trials(block, 1)\n", - "\n", - "save_experiments_csv(block, experiment, 'code_1_sequences/seq')\n", + "TEST_VAR_CODE = \"\"\"\n", + "iv = Variable(name=\"x\", value_range=(0, 2 * np.pi), allowed_values=np.linspace(0, 2 * np.pi, 30))\n", + "dv = Variable(name=\"y\", type=ValueType.REAL)\n", + "variables = VariableCollection(independent_variables=[iv], dependent_variables=[dv])\n", "\"\"\"" ] }, @@ -89,16 +42,36 @@ "metadata": {}, "outputs": [], "source": [ - "output = pred.predict(\n", - " SYS[SystemPrompts.SYS_1],\n", - " INSTR[InstructionPrompts.INSTR_SWEETP_EXAMPLE],\n", - " [TEST_CODE],\n", - " temperature=0.05,\n", - " top_k=10,\n", - " num_ret_seq=3,\n", - ")[0]\n", - "for i, o in enumerate(output):\n", - " print(f\"******** Output {i} ********\\n{o}*************\\n\")" + "def test(promptid, code):\n", + " output = pred.predict(\n", + " PROMPTS[promptid],\n", + " [code],\n", + " do_sample=0,\n", + " max_length=800,\n", + " temperature=0.05,\n", + " top_k=10,\n", + " num_ret_seq=1,\n", + " )[0]\n", + " for i, o in enumerate(output):\n", + " print(f\"{promptid}\\n******* Output {i} ********\\n{o}\\n*************\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "test(PromptIds.AUTORA_VARS_ZEROSHOT, TEST_VAR_CODE)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "test(PromptIds.AUTORA_VARS_ONESHOT, TEST_VAR_CODE)" ] } ], @@ -118,7 +91,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.18" + "version": "3.11.5" } }, "nbformat": 4, diff --git a/src/autora/doc/pipelines/main.py b/src/autora/doc/pipelines/main.py index d303bf5..8c46761 100644 --- a/src/autora/doc/pipelines/main.py +++ b/src/autora/doc/pipelines/main.py @@ -10,7 +10,7 @@ from nltk.translate.meteor_score import single_meteor_score from autora.doc.runtime.predict_hf import Predictor -from autora.doc.runtime.prompts import INSTR, SYS, InstructionPrompts, SystemPrompts +from autora.doc.runtime.prompts import PROMPTS, PromptIds app = typer.Typer() logging.basicConfig( @@ -51,10 +51,7 @@ def evaluate_documentation(predictions: List[List[str]], references: List[str]) def eval( data_file: str = typer.Argument(..., help="JSONL Data file to evaluate on"), model_path: str = typer.Option("meta-llama/Llama-2-7b-chat-hf", help="Path to HF model"), - sys_id: SystemPrompts = typer.Option(SystemPrompts.SYS_1, help="System prompt ID"), - instruc_id: InstructionPrompts = typer.Option( - InstructionPrompts.INSTR_SWEETP_1, help="Instruction prompt ID" - ), + prompt_id: PromptIds = typer.Option(PromptIds.SWEETP_1, help="Instruction prompt ID"), param: List[str] = typer.Option( [], help="Additional float parameters to pass to the model as name=float pairs" ), @@ -67,8 +64,7 @@ def eval( param_dict = {pair[0]: float(pair[1]) for pair in [pair.split("=") for pair in param]} run = mlflow.active_run() - sys_prompt = SYS[sys_id] - instr_prompt = INSTR[instruc_id] + prompt = PROMPTS[prompt_id] if run is None: run = mlflow.start_run() with run: @@ -76,6 +72,9 @@ def eval( logger.info(f"running predict with {data_file}") logger.info(f"model path: {model_path}") mlflow.log_params(param_dict) + mlflow.log_param("prompt_id", prompt_id) + mlflow.log_param("model_path", model_path) + mlflow.log_param("data_file", data_file) with jsonlines.open(data_file) as reader: items = [item for item in reader] @@ -84,10 +83,9 @@ def eval( pred = Predictor(model_path) timer_start = timer() - predictions = pred.predict(sys_prompt, instr_prompt, inputs, **param_dict) - bleu, meteor = evaluate_documentation(predictions, labels) - + predictions = pred.predict(prompt, inputs, **param_dict) timer_end = timer() + bleu, meteor = evaluate_documentation(predictions, labels) pred_time = timer_end - timer_start mlflow.log_metric("prediction_time/doc", pred_time / (len(inputs))) for i in range(len(inputs)): @@ -114,10 +112,7 @@ def generate( python_file: str = typer.Argument(..., help="Python file to generate documentation for"), model_path: str = typer.Option("meta-llama/Llama-2-7b-chat-hf", help="Path to HF model"), output: str = typer.Option("output.txt", help="Output file"), - sys_id: SystemPrompts = typer.Option(SystemPrompts.SYS_1, help="System prompt ID"), - instruc_id: InstructionPrompts = typer.Option( - InstructionPrompts.INSTR_SWEETP_1, help="Instruction prompt ID" - ), + prompt_id: PromptIds = typer.Option(PromptIds.SWEETP_1, help="Instruction prompt ID"), param: List[str] = typer.Option( [], help="Additional float parameters to pass to the model as name=float pairs" ), @@ -128,11 +123,10 @@ def generate( """ with open(python_file, "r") as f: input = f.read() - sys_prompt = SYS[sys_id] - instr_prompt = INSTR[instruc_id] + prompt = PROMPTS[prompt_id] pred = Predictor(model_path) # grab first result since we only passed one input - predictions = pred.predict(sys_prompt, instr_prompt, [input], **param_dict)[0] + predictions = pred.predict(prompt, [input], **param_dict)[0] assert len(predictions) == 1, f"Expected only one output, got {len(predictions)}" logger.info(f"Writing output to {output}") with open(output, "w") as f: diff --git a/src/autora/doc/runtime/predict_hf.py b/src/autora/doc/runtime/predict_hf.py index 85e6919..599ba04 100644 --- a/src/autora/doc/runtime/predict_hf.py +++ b/src/autora/doc/runtime/predict_hf.py @@ -5,7 +5,7 @@ import transformers from transformers import AutoModelForCausalLM, AutoTokenizer -from autora.doc.runtime.prompts import LLAMA2_INST_CLOSE, TEMP_LLAMA2 +from autora.doc.runtime.prompts import LLAMA2_INST_CLOSE logger = logging.getLogger(__name__) @@ -29,8 +29,7 @@ def __init__(self, model_path: str): def predict( self, - sys: str, - instr: str, + prompt_template: str, inputs: List[str], do_sample: float = 0.0, temperature: float = 0.01, @@ -45,7 +44,7 @@ def predict( f"Generating {len(inputs)} predictions. do_sample: {do_sample}, temperature: {temperature}, top_p: {top_p}," f" top_k: {top_k}, max_length: {max_length}" ) - prompts = [TEMP_LLAMA2.format(sys=sys, instr=instr, input=input) for input in inputs] + prompts = [prompt_template.format(code=input) for input in inputs] sequences = self.pipeline( prompts, do_sample=do_sample, @@ -65,7 +64,7 @@ def predict( @staticmethod def trim_prompt(output: str) -> str: - marker = output.find(LLAMA2_INST_CLOSE) + marker = output.rfind(LLAMA2_INST_CLOSE) if marker == -1: logger.warning(f"Could not find end of prompt marker '{LLAMA2_INST_CLOSE}' in '{output}'") return output diff --git a/src/autora/doc/runtime/prompts.py b/src/autora/doc/runtime/prompts.py index 4480fb8..d6fef47 100644 --- a/src/autora/doc/runtime/prompts.py +++ b/src/autora/doc/runtime/prompts.py @@ -1,17 +1,58 @@ +from __future__ import annotations + +import textwrap from enum import Enum LLAMA2_INST_CLOSE = "[/INST]\n" -# Standard Llama2 template -TEMP_LLAMA2 = """ -[INST]<> -{sys} - -{instr} -{input} -[/INST] -""" +class PromptBuilder: + """ + Utilty class for building LLAMA2 prompts. Uses a stateful builder pattern. + See: https://ai.meta.com/llama/get-started/#prompting + """ + + def __init__(self, sys: str, instr: str): + self.instr = instr + # Initialize the prompt with the system prompt + self.prompt_text = f""" + [INST] <> + { sys } + <> + """ + + def _add_input(self) -> PromptBuilder: + # Add the instruction (e.g. "Generate a one line descrip...") + # and a placeholder for the code + self.prompt_text += f""" + { self.instr } + ----------{{code}}---------- + """ + return self + + def add_example(self, code: str, doc: str) -> PromptBuilder: + # This adds an example in the form of instruction+code+doc + self._add_input() + self.prompt_text = self.prompt_text.format(code=code) + self.prompt_text += f""" + [/INST] + {doc} + + + [INST] + """ + return self + + def build(self) -> str: + # Add a instruction+code placeholder and close the instruction + self._add_input() + self.prompt_text = PromptBuilder._trim_leading_ws(self.prompt_text) + self.prompt_text += LLAMA2_INST_CLOSE + return self.prompt_text + + @staticmethod + def _trim_leading_ws(s: str) -> str: + return "\n".join([line.lstrip() for line in s.splitlines()]) SYS_1 = """You are a technical documentation writer. You always write clear, concise, and accurate documentation for @@ -20,68 +61,45 @@ scientists. """ -INSTR_SWEETP_1 = """Please generate high-level two paragraph documentation for the following experiment. The first -paragraph should explain the purpose and the second one the procedure, but don't use the word 'Paragraph'""" - -# The following prompt uses an example (code, doc) to specify the desired behavior -EX_CODE = """ -from sweetpea import * - -color = Factor('color', ['red', 'green', 'blue', 'yellow']) -word = Factor('word', ['red', 'green', 'blue', 'yellow']) - -def is_congruent(word, color): - return (word == color) - -def is_not_congruent(word, color): - return not is_congruent(word, color) - -congruent = DerivedLevel('congruent', WithinTrial(is_congruent, [word, color])) -incongruent = DerivedLevel('incongruent', WithinTrial(is_not_congruent, [word, color])) - -congruency = Factor('congruency', [congruent, incongruent]) - -constraints = [MinimumTrials(48)] -design = [word, color, congruency] -crossing = [word, congruency] +SYS_GUIDES = """You are a technical documentation writer. You always write clear, concise, and accurate documentation +for scientific experiments. Your documentation focuses on the experiment's procedure. Therefore, details about specific +python functions, packages, or libraries are not necessary. Your readers are experimental scientists. Use the following +guidelines for writing your descriptions: +- Do not write greetings or preambles +- Use the Variable 'name' attribute and not the python variable names +- Use LaTeX for math expressions +- Do not include code or code-like syntax and do not use python function or class names +""" -block = CrossBlock(design, crossing, constraints) +INSTR_SWEETP_1 = ( + """Please generate high-level one or two paragraph documentation for the following experiment.""" +) -experiment = synthesize_trials(block, 1) -save_experiments_csv(block, experiment, 'code_1_sequences/seq') -""" +INSTR_AUTORA_VARS = """Generate a one line description of the dependent and independent variables used in the following +python code: """ -EX_DOC = """There are two regular factors: color and word. The color factor consists of four levels: "red", "green", -"blue", and "yellow". The word factor also consists of the four levels: "red", "green", "blue", and "yellow". -There is another derived factor referred to as congruency. The congruency factor depends on the regular factors word -and color and has two levels: "congruent" and "incongruent". A trial is considered "congruent" if the word matches -the color, otherwise, it is considered "incongruent". We counterbalanced the word factor with the congruency factor. -All experiment sequences contained at least 48 trials.""" - -INSTR_SWEETP_EXAMPLE = f"""Consider the following experiment code: ---- -{EX_CODE} ---- -Here's a a good English description: ---- -{EX_DOC} ---- -Using the same style, please generate a high-level one paragraph description for the following experiment code: +CODE_AUTORA_VARS1 = """ +iv1 = Variable(name="a", value_range=(0, 2 * np.pi), allowed_values=np.linspace(0, 2 * np.pi, 30)) +iv2 = Variable(name="b", value_range=(0, 1), allowed_values=np.linspace(0, 1, 30)) +dv = Variable(name="z", type=ValueType.REAL) +variables = VariableCollection(independent_variables=[iv1, iv2], dependent_variables=[dv]) """ - -class SystemPrompts(str, Enum): - SYS_1 = "SYS_1" +DOC_AUTORA_VARS1 = """The problem is defined by two independent variables $a \in [0, 2 \pi]$, $b in [0,1] and a +dependent variable $z$.""" -class InstructionPrompts(str, Enum): - INSTR_SWEETP_1 = "INSTR_SWEETP_1" - INSTR_SWEETP_EXAMPLE = "INSTR_SWEETP_EXAMPLE" +class PromptIds(str, Enum): + SWEETP_1 = "SWEETP_1" + AUTORA_VARS_ZEROSHOT = "AUTORA_VARS_ZEROSHOT" + AUTORA_VARS_ONESHOT = "AUTORA_VARS_ONESHOT" -SYS = {SystemPrompts.SYS_1: SYS_1} -INSTR = { - InstructionPrompts.INSTR_SWEETP_1: INSTR_SWEETP_1, - InstructionPrompts.INSTR_SWEETP_EXAMPLE: INSTR_SWEETP_EXAMPLE, +PROMPTS = { + PromptIds.SWEETP_1: PromptBuilder(SYS_1, INSTR_SWEETP_1).build(), + PromptIds.AUTORA_VARS_ZEROSHOT: PromptBuilder(SYS_GUIDES, INSTR_AUTORA_VARS).build(), + PromptIds.AUTORA_VARS_ONESHOT: PromptBuilder(SYS_GUIDES, INSTR_AUTORA_VARS) + .add_example(CODE_AUTORA_VARS1, DOC_AUTORA_VARS1) + .build(), } diff --git a/tests/test_main.py b/tests/test_main.py index a1eed5f..f736ce2 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -4,7 +4,7 @@ import pytest from autora.doc.pipelines.main import eval, evaluate_documentation, generate, import_data -from autora.doc.runtime.prompts import InstructionPrompts, SystemPrompts +from autora.doc.runtime.prompts import PromptIds # dummy HF model for testing TEST_HF_MODEL = "hf-internal-testing/tiny-random-FalconForCausalLM" @@ -12,7 +12,7 @@ def test_predict() -> None: data = Path(__file__).parent.joinpath("../data/sweetpea/data.jsonl").resolve() - outputs = eval(str(data), TEST_HF_MODEL, SystemPrompts.SYS_1, InstructionPrompts.INSTR_SWEETP_1, []) + outputs = eval(str(data), TEST_HF_MODEL, PromptIds.SWEETP_1, []) assert len(outputs) == 3, "Expected 3 outputs" for output in outputs: assert len(output[0]) > 0, "Expected non-empty output" @@ -71,9 +71,7 @@ def test_generate() -> None: python_file = __file__ output = Path("output.txt") output.unlink(missing_ok=True) - generate( - python_file, TEST_HF_MODEL, str(output), SystemPrompts.SYS_1, InstructionPrompts.INSTR_SWEETP_1, [] - ) + generate(python_file, TEST_HF_MODEL, str(output), PromptIds.SWEETP_1, []) assert output.exists(), f"Expected output file {output} to exist" with open(str(output), "r") as f: assert len(f.read()) > 0, f"Expected non-empty output file {output}"