diff --git a/azureml/eval.yml b/azureml/eval.yml index 156351f..8881725 100644 --- a/azureml/eval.yml +++ b/azureml/eval.yml @@ -3,8 +3,7 @@ command: > python -m autora.doc.pipelines.main eval ${{inputs.data_dir}}/data.jsonl --model-path ${{inputs.model_path}} - --sys-id ${{inputs.sys_id}} - --instruc-id ${{inputs.instruc_id}} + --prompt-id ${{inputs.prompt_id}} --param do_sample=${{inputs.do_sample}} --param temperature=${{inputs.temperature}} --param top_k=${{inputs.top_k}} @@ -23,8 +22,7 @@ inputs: do_sample: 0 top_p: 0.95 top_k: 1 - sys_id: SYS_1 - instruc_id: INSTR_SWEETP_1 + prompt_id: SWEETP_1 # using a curated environment doesn't work because we need additional packages environment: # azureml://registries/azureml/environments/acpt-pytorch-2.0-cuda11.7/versions/21 image: mcr.microsoft.com/azureml/curated/acpt-pytorch-2.0-cuda11.7:21 @@ -37,6 +35,6 @@ environment: # azureml://registries/azureml/environments/acpt-pytorch-2.0-cuda11 # image: nvcr.io/nvidia/pytorch:23.10-py3 conda_file: conda.yml display_name: autodoc_prediction -compute: azureml:t4cluster +compute: azureml:v100cluster experiment_name: evaluation description: | diff --git a/azureml/generate.yml b/azureml/generate.yml index ce7eb59..4c7798c 100644 --- a/azureml/generate.yml +++ b/azureml/generate.yml @@ -3,9 +3,8 @@ command: > python -m autora.doc.pipelines.main generate --model-path ${{inputs.model_path}} --output ./outputs/output.txt - --sys-id ${{inputs.sys_id}} - --instruc-id ${{inputs.instruc_id}} --param do_sample=${{inputs.do_sample}} + --prompt-id ${{inputs.prompt_id}} --param temperature=${{inputs.temperature}} --param top_k=${{inputs.top_k}} --param top_p=${{inputs.top_p}} @@ -21,12 +20,11 @@ inputs: do_sample: 0 top_p: 0.95 top_k: 40 - sys_id: SYS_1 - instruc_id: INSTR_SWEETP_1 + prompt_id: SWEETP_1 environment: image: mcr.microsoft.com/azureml/curated/acpt-pytorch-2.0-cuda11.7:21 conda_file: conda.yml display_name: autodoc_prediction -compute: azureml:t4cluster +compute: azureml:v100cluster experiment_name: prediction description: | diff --git a/data/autora/code1_sm.txt b/data/autora/code1_sm.txt new file mode 100644 index 0000000..746cc9f --- /dev/null +++ b/data/autora/code1_sm.txt @@ -0,0 +1,34 @@ +iv = Variable(name="x", value_range=(0, 2 * np.pi), allowed_values=np.linspace(0, 2 * np.pi, 30)) +dv = Variable(name="y", type=ValueType.REAL) +variables = VariableCollection(independent_variables=[iv], dependent_variables=[dv]) + +conditions = random_pool(variables, num_samples=10, random_state=0) + +experimentalist = on_state(random_pool, output=["conditions"]) + +sin_experiment = equation_experiment( + sp.simplify("sin(x)"), variables.independent_variables, variables.dependent_variables[0] +) +sin_runner = sin_experiment.experiment_runner + +experiment_runner = on_state(sin_runner, output=["experiment_data"]) + +theorist = estimator_on_state(BMSRegressor(epochs=100)) + +s = StandardState( + variables=variables, conditions=conditions, experiment_data=pd.DataFrame(columns=["x", "y"]) +) + +print("Pre-Defined State:") +print(f"Number of datapoints collected: {len(s['experiment_data'])}") +print(f"Derived models: {s['models']}") +print("\n") + +for i in range(5): + s = experimentalist(s, num_samples=10, random_state=42) + s = experiment_runner(s, added_noise=1.0, random_state=42) + s = theorist(s) + print(f"\nCycle {i+1} Results:") + print(f"Number of datapoints collected: {len(s['experiment_data'])}") + print(f"Derived models: {s['models']}") + print("\n") diff --git a/notebooks/generate.ipynb b/notebooks/generate.ipynb index 1795956..89861fd 100644 --- a/notebooks/generate.ipynb +++ b/notebooks/generate.ipynb @@ -9,7 +9,7 @@ "%load_ext autoreload\n", "%autoreload 2\n", "from autora.doc.runtime.predict_hf import Predictor\n", - "from autora.doc.runtime.prompts import INSTR, SYS, InstructionPrompts, SystemPrompts" + "from autora.doc.runtime.prompts import PROMPTS, PromptIds" ] }, { @@ -29,12 +29,7 @@ "metadata": {}, "outputs": [], "source": [ - "AUTORA_VARS = (\n", - " \"Generate a brief description of the dependent and independent variables used in the experiment based on the following code. \"\n", - " \"Use only one line per variable and do not include code or code-like syntax in your description. Use LaTeX to format mathematical expressions. \"\n", - ")\n", - "\n", - "VAR_CODE = \"\"\"\n", + "TEST_VAR_CODE = \"\"\"\n", "iv = Variable(name=\"x\", value_range=(0, 2 * np.pi), allowed_values=np.linspace(0, 2 * np.pi, 30))\n", "dv = Variable(name=\"y\", type=ValueType.REAL)\n", "variables = VariableCollection(independent_variables=[iv], dependent_variables=[dv])\n", @@ -47,20 +42,18 @@ "metadata": {}, "outputs": [], "source": [ - "output = pred.predict(\n", - " SYS[SystemPrompts.SYS_1],\n", - " # INSTR[InstructionPrompts.INSTR_SWEETP_EXAMPLE],\n", - " AUTORA_VARS,\n", - " # [TEST_CODE],\n", - " [VAR_CODE],\n", - " do_sample=0,\n", - " max_length=500,\n", - " temperature=0.05,\n", - " top_k=10,\n", - " num_ret_seq=1,\n", - ")[0]\n", - "for i, o in enumerate(output):\n", - " print(f\"******** Output {i} ********\\n{o}*************\\n\")" + "def test(promptid, code):\n", + " output = pred.predict(\n", + " PROMPTS[promptid],\n", + " [code],\n", + " do_sample=0,\n", + " max_length=800,\n", + " temperature=0.05,\n", + " top_k=10,\n", + " num_ret_seq=1,\n", + " )[0]\n", + " for i, o in enumerate(output):\n", + " print(f\"{promptid}\\n******* Output {i} ********\\n{o}\\n*************\\n\")" ] }, { @@ -69,7 +62,7 @@ "metadata": {}, "outputs": [], "source": [ - "AUTORA_VARS" + "test(PromptIds.AUTORA_VARS_ZEROSHOT, TEST_VAR_CODE)" ] }, { @@ -77,7 +70,9 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "test(PromptIds.AUTORA_VARS_ONESHOT, TEST_VAR_CODE)" + ] } ], "metadata": { diff --git a/src/autora/doc/pipelines/main.py b/src/autora/doc/pipelines/main.py index a553153..9727b01 100644 --- a/src/autora/doc/pipelines/main.py +++ b/src/autora/doc/pipelines/main.py @@ -7,7 +7,7 @@ import typer from autora.doc.runtime.predict_hf import Predictor -from autora.doc.runtime.prompts import INSTR, SYS, InstructionPrompts, SystemPrompts +from autora.doc.runtime.prompts import PROMPTS, PromptIds app = typer.Typer() logging.basicConfig( @@ -21,10 +21,7 @@ def eval( data_file: str = typer.Argument(..., help="JSONL Data file to evaluate on"), model_path: str = typer.Option("meta-llama/Llama-2-7b-chat-hf", help="Path to HF model"), - sys_id: SystemPrompts = typer.Option(SystemPrompts.SYS_1, help="System prompt ID"), - instruc_id: InstructionPrompts = typer.Option( - InstructionPrompts.INSTR_SWEETP_1, help="Instruction prompt ID" - ), + prompt_id: PromptIds = typer.Option(PromptIds.SWEETP_1, help="Instruction prompt ID"), param: List[str] = typer.Option( [], help="Additional float parameters to pass to the model as name=float pairs" ), @@ -37,8 +34,7 @@ def eval( param_dict = {pair[0]: float(pair[1]) for pair in [pair.split("=") for pair in param]} run = mlflow.active_run() - sys_prompt = SYS[sys_id] - instr_prompt = INSTR[instruc_id] + prompt = PROMPTS[prompt_id] if run is None: run = mlflow.start_run() with run: @@ -54,7 +50,7 @@ def eval( pred = Predictor(model_path) timer_start = timer() - predictions = pred.predict(sys_prompt, instr_prompt, inputs, **param_dict) + predictions = pred.predict(prompt, inputs, **param_dict) timer_end = timer() pred_time = timer_end - timer_start mlflow.log_metric("prediction_time/doc", pred_time / (len(inputs))) @@ -78,10 +74,7 @@ def generate( python_file: str = typer.Argument(..., help="Python file to generate documentation for"), model_path: str = typer.Option("meta-llama/Llama-2-7b-chat-hf", help="Path to HF model"), output: str = typer.Option("output.txt", help="Output file"), - sys_id: SystemPrompts = typer.Option(SystemPrompts.SYS_1, help="System prompt ID"), - instruc_id: InstructionPrompts = typer.Option( - InstructionPrompts.INSTR_SWEETP_1, help="Instruction prompt ID" - ), + prompt_id: PromptIds = typer.Option(PromptIds.SWEETP_1, help="Instruction prompt ID"), param: List[str] = typer.Option( [], help="Additional float parameters to pass to the model as name=float pairs" ), @@ -92,11 +85,10 @@ def generate( """ with open(python_file, "r") as f: input = f.read() - sys_prompt = SYS[sys_id] - instr_prompt = INSTR[instruc_id] + prompt = PROMPTS[prompt_id] pred = Predictor(model_path) # grab first result since we only passed one input - predictions = pred.predict(sys_prompt, instr_prompt, [input], **param_dict)[0] + predictions = pred.predict(prompt, [input], **param_dict)[0] assert len(predictions) == 1, f"Expected only one output, got {len(predictions)}" logger.info(f"Writing output to {output}") with open(output, "w") as f: diff --git a/src/autora/doc/runtime/predict_hf.py b/src/autora/doc/runtime/predict_hf.py index 85e6919..599ba04 100644 --- a/src/autora/doc/runtime/predict_hf.py +++ b/src/autora/doc/runtime/predict_hf.py @@ -5,7 +5,7 @@ import transformers from transformers import AutoModelForCausalLM, AutoTokenizer -from autora.doc.runtime.prompts import LLAMA2_INST_CLOSE, TEMP_LLAMA2 +from autora.doc.runtime.prompts import LLAMA2_INST_CLOSE logger = logging.getLogger(__name__) @@ -29,8 +29,7 @@ def __init__(self, model_path: str): def predict( self, - sys: str, - instr: str, + prompt_template: str, inputs: List[str], do_sample: float = 0.0, temperature: float = 0.01, @@ -45,7 +44,7 @@ def predict( f"Generating {len(inputs)} predictions. do_sample: {do_sample}, temperature: {temperature}, top_p: {top_p}," f" top_k: {top_k}, max_length: {max_length}" ) - prompts = [TEMP_LLAMA2.format(sys=sys, instr=instr, input=input) for input in inputs] + prompts = [prompt_template.format(code=input) for input in inputs] sequences = self.pipeline( prompts, do_sample=do_sample, @@ -65,7 +64,7 @@ def predict( @staticmethod def trim_prompt(output: str) -> str: - marker = output.find(LLAMA2_INST_CLOSE) + marker = output.rfind(LLAMA2_INST_CLOSE) if marker == -1: logger.warning(f"Could not find end of prompt marker '{LLAMA2_INST_CLOSE}' in '{output}'") return output diff --git a/src/autora/doc/runtime/prompts.py b/src/autora/doc/runtime/prompts.py index f339b9b..e230773 100644 --- a/src/autora/doc/runtime/prompts.py +++ b/src/autora/doc/runtime/prompts.py @@ -1,17 +1,44 @@ +from __future__ import annotations + from enum import Enum LLAMA2_INST_CLOSE = "[/INST]\n" -# Standard Llama2 template -TEMP_LLAMA2 = """ -[INST]<> -{sys} -{instr} +class PromptBuilder: + """ + Utilty class for building LLAMA2 prompts. Uses a stateful builder pattern. + """ + + def __init__(self, sys: str, instr: str): + self.instr = instr + self.prompt_text = f"""[INST] <> +{ sys } +<>""" -{input} + def _add_input(self) -> PromptBuilder: + self.prompt_text += f""" +{ self.instr } +----------{{code}}---------- +""" + return self + + def add_example(self, code: str, doc: str) -> PromptBuilder: + self._add_input() + self.prompt_text = self.prompt_text.format(code=code) + self.prompt_text += f""" [/INST] +{doc} + + +[INST] """ + return self + + def build(self) -> str: + self._add_input() + self.prompt_text += LLAMA2_INST_CLOSE + return self.prompt_text SYS_1 = """You are a technical documentation writer. You always write clear, concise, and accurate documentation for @@ -20,74 +47,45 @@ scientists. """ -INSTR_SWEETP_1 = """Please generate high-level two paragraph documentation for the following experiment. The first -paragraph should explain the purpose and the second one the procedure, but don't use the word 'Paragraph'""" - -# The following prompt uses an example (code, doc) to specify the desired behavior -EX_CODE = """ -from sweetpea import * - -color = Factor('color', ['red', 'green', 'blue', 'yellow']) -word = Factor('word', ['red', 'green', 'blue', 'yellow']) - -def is_congruent(word, color): - return (word == color) - -def is_not_congruent(word, color): - return not is_congruent(word, color) - -congruent = DerivedLevel('congruent', WithinTrial(is_congruent, [word, color])) -incongruent = DerivedLevel('incongruent', WithinTrial(is_not_congruent, [word, color])) - -congruency = Factor('congruency', [congruent, incongruent]) - -constraints = [MinimumTrials(48)] -design = [word, color, congruency] -crossing = [word, congruency] - -block = CrossBlock(design, crossing, constraints) - -experiment = synthesize_trials(block, 1) - -save_experiments_csv(block, experiment, 'code_1_sequences/seq') +SYS_GUIDES = """You are a technical documentation writer. You always write clear, concise, and accurate documentation +for scientific experiments. Your documentation focuses on the experiment's procedure. Therefore, details about specific +python functions, packages, or libraries are not necessary. Your readers are experimental scientists. Use the following +guidelines for writing your descriptions: +- Do not write greetings or preambles +- Use the Variable 'name' attribute and not the python variable names +- Use LaTeX for math expressions +- Do not include code or code-like syntax and do not use python function or class names """ -EX_DOC = """There are two regular factors: color and word. The color factor consists of four levels: "red", "green", -"blue", and "yellow". The word factor also consists of the four levels: "red", "green", "blue", and "yellow". -There is another derived factor referred to as congruency. The congruency factor depends on the regular factors word -and color and has two levels: "congruent" and "incongruent". A trial is considered "congruent" if the word matches -the color, otherwise, it is considered "incongruent". We counterbalanced the word factor with the congruency factor. -All experiment sequences contained at least 48 trials.""" - -INSTR_SWEETP_EXAMPLE = f"""Consider the following experiment code: ---- -{EX_CODE} ---- -Here's a a good English description: ---- -{EX_DOC} ---- -Using the same style, please generate a high-level one paragraph description for the following experiment code: -""" +INSTR_SWEETP_1 = ( + """Please generate high-level one or two paragraph documentation for the following experiment.""" +) -AUTORA_VARS = ( - "Generate a high level description of the variables used in the experiment based on the following code. " - "Do not include code or code-like syntax in your description." -) +INSTR_AUTORA_VARS = """Generate a one line description of the dependent and independent variables used in the following +python code: """ +CODE_AUTORA_VARS1 = """ +iv1 = Variable(name="a", value_range=(0, 2 * np.pi), allowed_values=np.linspace(0, 2 * np.pi, 30)) +iv2 = Variable(name="b", value_range=(0, 1), allowed_values=np.linspace(0, 1, 30)) +dv = Variable(name="z", type=ValueType.REAL) +variables = VariableCollection(independent_variables=[iv1, iv2], dependent_variables=[dv]) +""" -class SystemPrompts(str, Enum): - SYS_1 = "SYS_1" +DOC_AUTORA_VARS1 = """The problem is defined by two independent variables $a \in [0, 2 \pi]$, $b in [0,1] and a +dependent variable $z$.""" -class InstructionPrompts(str, Enum): - INSTR_SWEETP_1 = "INSTR_SWEETP_1" - INSTR_SWEETP_EXAMPLE = "INSTR_SWEETP_EXAMPLE" +class PromptIds(str, Enum): + SWEETP_1 = "INSTR_SWEETP_1" + AUTORA_VARS_ZEROSHOT = "INSTR_VARS_ZEROSHOT" + AUTORA_VARS_ONESHOT = "INSTR_VARS_ONESHOT" -SYS = {SystemPrompts.SYS_1: SYS_1} -INSTR = { - InstructionPrompts.INSTR_SWEETP_1: INSTR_SWEETP_1, - InstructionPrompts.INSTR_SWEETP_EXAMPLE: INSTR_SWEETP_EXAMPLE, +PROMPTS = { + PromptIds.SWEETP_1: PromptBuilder(SYS_1, INSTR_SWEETP_1).build(), + PromptIds.AUTORA_VARS_ZEROSHOT: PromptBuilder(SYS_GUIDES, INSTR_AUTORA_VARS).build(), + PromptIds.AUTORA_VARS_ONESHOT: PromptBuilder(SYS_GUIDES, INSTR_AUTORA_VARS) + .add_example(CODE_AUTORA_VARS1, DOC_AUTORA_VARS1) + .build(), } diff --git a/tests/test_main.py b/tests/test_main.py index f5a283e..74cac42 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,7 +1,7 @@ from pathlib import Path from autora.doc.pipelines.main import eval, generate, import_data -from autora.doc.runtime.prompts import InstructionPrompts, SystemPrompts +from autora.doc.runtime.prompts import PromptIds # dummy HF model for testing TEST_HF_MODEL = "hf-internal-testing/tiny-random-FalconForCausalLM" @@ -9,7 +9,7 @@ def test_predict() -> None: data = Path(__file__).parent.joinpath("../data/sweetpea/data.jsonl").resolve() - outputs = eval(str(data), TEST_HF_MODEL, SystemPrompts.SYS_1, InstructionPrompts.INSTR_SWEETP_1, []) + outputs = eval(str(data), TEST_HF_MODEL, PromptIds.SWEETP_1, []) assert len(outputs) == 3, "Expected 3 outputs" for output in outputs: assert len(output[0]) > 0, "Expected non-empty output" @@ -19,9 +19,7 @@ def test_generate() -> None: python_file = __file__ output = Path("output.txt") output.unlink(missing_ok=True) - generate( - python_file, TEST_HF_MODEL, str(output), SystemPrompts.SYS_1, InstructionPrompts.INSTR_SWEETP_1, [] - ) + generate(python_file, TEST_HF_MODEL, str(output), PromptIds.SWEETP_1, []) assert output.exists(), f"Expected output file {output} to exist" with open(str(output), "r") as f: assert len(f.read()) > 0, f"Expected non-empty output file {output}"