From 54a2039af1e06f872165f36be67b7a7d35271eac Mon Sep 17 00:00:00 2001 From: RashmikaReddy Date: Fri, 15 Dec 2023 09:00:23 -0800 Subject: [PATCH 01/13] Pushing changes made for adding metrics --- azureml/conda.yml | 1 + docs/requirements.txt | 1 + src/autora/doc/pipelines/main.py | 31 +++++++++++++++++++++++++++++++ tests/test_main.py | 13 ++++++++++++- 4 files changed, 45 insertions(+), 1 deletion(-) diff --git a/azureml/conda.yml b/azureml/conda.yml index f772397..69674c7 100644 --- a/azureml/conda.yml +++ b/azureml/conda.yml @@ -14,5 +14,6 @@ dependencies: - transformers>=4.35.2 - xformers - scipy + - nltk # This works, while installing from pytorch and cuda from conda does not - torch==2.0.1 \ No newline at end of file diff --git a/docs/requirements.txt b/docs/requirements.txt index 62972b9..25ac169 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -7,3 +7,4 @@ jupytext jupyter matplotlib numpy +nltk diff --git a/src/autora/doc/pipelines/main.py b/src/autora/doc/pipelines/main.py index 5afc6bf..65e7018 100644 --- a/src/autora/doc/pipelines/main.py +++ b/src/autora/doc/pipelines/main.py @@ -1,7 +1,10 @@ import itertools import logging +import nltk from timeit import default_timer as timer from typing import List +from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction +from nltk.translate.meteor_score import single_meteor_score import torch import typer @@ -15,6 +18,25 @@ format="%(asctime)s %(levelname)s %(module)s.%(funcName)s(): %(message)s", ) logger = logging.getLogger(__name__) +nltk.download('wordnet') + +def evaluate_documentation(predictions, references): + # Tokenize predictions and references + tokenized_predictions = [pred[0].split() if pred else [] for pred in predictions] + tokenized_references = [[ref.split()] for ref in references] + + # Calculate BLEU score + bleu = corpus_bleu(tokenized_references, tokenized_predictions, + smoothing_function=SmoothingFunction().method1) + + # Calculate METEOR scores + meteor_scores = [single_meteor_score(ref[0], tokenized_pred) + for ref, tokenized_pred in zip(tokenized_references, tokenized_predictions)] + meteor = sum(meteor_scores) / len(predictions) if predictions else 0 + + return (bleu, meteor) + + @app.command(help="Evaluate model on a data file") @@ -55,6 +77,11 @@ def eval( pred = Predictor(model_path) timer_start = timer() predictions = pred.predict(sys_prompt, instr_prompt, inputs, **param_dict) + print(predictions) + print("len of predictions ", len(predictions)) + print("len of predictions index 0", len(predictions[0])) + + bleu, meteor = evaluate_documentation(predictions, labels) timer_end = timer() pred_time = timer_end - timer_start mlflow.log_metric("prediction_time/doc", pred_time / (len(inputs))) @@ -63,6 +90,8 @@ def eval( mlflow.log_text(inputs[i], f"input_{i}.py") for j in range(len(predictions[i])): mlflow.log_text(predictions[i][j], f"prediction_{i}_{j}.txt") + mlflow.log_text("bleu_score is ", str(bleu)) + mlflow.log_text("meteor_score is ", str(meteor)) # flatten predictions for counting tokens predictions_flat = list(itertools.chain.from_iterable(predictions)) @@ -70,6 +99,8 @@ def eval( total_tokens = sum([len(token) for token in tokens]) mlflow.log_metric("total_tokens", total_tokens) mlflow.log_metric("tokens/sec", total_tokens / pred_time) + mlflow.log_metric("bleu_score", round(bleu,5)) + mlflow.log_metric("meteor_score", round(meteor,5)) return predictions diff --git a/tests/test_main.py b/tests/test_main.py index 097e8c7..2b0d1e3 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,6 +1,7 @@ +import jsonlines from pathlib import Path -from autora.doc.pipelines.main import eval, generate +from autora.doc.pipelines.main import eval, generate, evaluate_documentation from autora.doc.runtime.prompts import InstructionPrompts, SystemPrompts # dummy HF model for testing @@ -14,6 +15,16 @@ def test_predict() -> None: for output in outputs: assert len(output[0]) > 0, "Expected non-empty output" +def test_evaluation(): + # Test Case: Valid Scores in the range of 0 and 1 + data = Path(__file__).parent.joinpath("../data/data.jsonl").resolve() + with jsonlines.open(data) as reader: + items = [item for item in reader] + labels = [item["output"] for item in items] + + bleu, meteor = evaluate_documentation(labels, labels) + assert bleu >= 0 and bleu <= 1, "BLEU score should be between 0 and 1" + assert meteor >= 0 and meteor <= 1, "METEOR score should be between 0 and 1" def test_generate() -> None: python_file = __file__ From 0bb0aaf9d9f0b17e017f9b4d59e4ab77ac0101a0 Mon Sep 17 00:00:00 2001 From: RashmikaReddy Date: Fri, 15 Dec 2023 09:03:00 -0800 Subject: [PATCH 02/13] updating main.py --- src/autora/doc/pipelines/main.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/autora/doc/pipelines/main.py b/src/autora/doc/pipelines/main.py index 65e7018..2c077b2 100644 --- a/src/autora/doc/pipelines/main.py +++ b/src/autora/doc/pipelines/main.py @@ -77,10 +77,6 @@ def eval( pred = Predictor(model_path) timer_start = timer() predictions = pred.predict(sys_prompt, instr_prompt, inputs, **param_dict) - print(predictions) - print("len of predictions ", len(predictions)) - print("len of predictions index 0", len(predictions[0])) - bleu, meteor = evaluate_documentation(predictions, labels) timer_end = timer() pred_time = timer_end - timer_start From 4c5e472636557c9a1c1746f64e80eac50dc78f91 Mon Sep 17 00:00:00 2001 From: Rashmika Reddy Vookanti Date: Fri, 15 Dec 2023 10:08:46 -0800 Subject: [PATCH 03/13] Update main.py --- src/autora/doc/pipelines/main.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/autora/doc/pipelines/main.py b/src/autora/doc/pipelines/main.py index 2c077b2..984b81a 100644 --- a/src/autora/doc/pipelines/main.py +++ b/src/autora/doc/pipelines/main.py @@ -1,13 +1,14 @@ import itertools import logging -import nltk + from timeit import default_timer as timer from typing import List -from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction -from nltk.translate.meteor_score import single_meteor_score +import nltk import torch import typer +from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction +from nltk.translate.meteor_score import single_meteor_score from autora.doc.runtime.predict_hf import Predictor from autora.doc.runtime.prompts import INSTR, SYS, InstructionPrompts, SystemPrompts @@ -20,7 +21,7 @@ logger = logging.getLogger(__name__) nltk.download('wordnet') -def evaluate_documentation(predictions, references): +def evaluate_documentation(predictions, references) -> None: # Tokenize predictions and references tokenized_predictions = [pred[0].split() if pred else [] for pred in predictions] tokenized_references = [[ref.split()] for ref in references] From 1f7b43e89288c74986d86a995dd5e63154535720 Mon Sep 17 00:00:00 2001 From: Rashmika Reddy Vookanti Date: Fri, 15 Dec 2023 10:13:05 -0800 Subject: [PATCH 04/13] Update test_main.py --- tests/test_main.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_main.py b/tests/test_main.py index 2b0d1e3..85ba730 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,6 +1,7 @@ -import jsonlines from pathlib import Path +import jsonlines + from autora.doc.pipelines.main import eval, generate, evaluate_documentation from autora.doc.runtime.prompts import InstructionPrompts, SystemPrompts @@ -15,7 +16,7 @@ def test_predict() -> None: for output in outputs: assert len(output[0]) > 0, "Expected non-empty output" -def test_evaluation(): +def test_evaluation() -> None: # Test Case: Valid Scores in the range of 0 and 1 data = Path(__file__).parent.joinpath("../data/data.jsonl").resolve() with jsonlines.open(data) as reader: From 1811daeab687be89220ab99128679d4e65635b7c Mon Sep 17 00:00:00 2001 From: RashmikaReddy Date: Fri, 15 Dec 2023 12:07:11 -0800 Subject: [PATCH 05/13] formatting changes for evaluation metrics --- .mypy.ini | 3 +++ .pre-commit-config.yaml | 2 +- src/autora/doc/pipelines/main.py | 30 ++++++++++++++++-------------- tests/test_main.py | 15 +++++++++------ 4 files changed, 29 insertions(+), 21 deletions(-) diff --git a/.mypy.ini b/.mypy.ini index b2565b1..12e730a 100644 --- a/.mypy.ini +++ b/.mypy.ini @@ -7,4 +7,7 @@ explicit_package_bases = True ignore_missing_imports = True [mypy-mlflow.*] +ignore_missing_imports = True + +[mypy-nltk.*] ignore_missing_imports = True \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 029a6e6..b225f7b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -66,7 +66,7 @@ repos: # supported by your project here, or alternatively use # pre-commit's default_language_version, see # https://pre-commit.com/#top_level-default_language_version - language_version: python3.10 + language_version: python3 diff --git a/src/autora/doc/pipelines/main.py b/src/autora/doc/pipelines/main.py index 2c077b2..2087fb3 100644 --- a/src/autora/doc/pipelines/main.py +++ b/src/autora/doc/pipelines/main.py @@ -1,13 +1,13 @@ import itertools import logging -import nltk from timeit import default_timer as timer -from typing import List -from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction -from nltk.translate.meteor_score import single_meteor_score +from typing import List, Tuple +import nltk import torch import typer +from nltk.translate.bleu_score import SmoothingFunction, corpus_bleu +from nltk.translate.meteor_score import single_meteor_score from autora.doc.runtime.predict_hf import Predictor from autora.doc.runtime.prompts import INSTR, SYS, InstructionPrompts, SystemPrompts @@ -18,27 +18,29 @@ format="%(asctime)s %(levelname)s %(module)s.%(funcName)s(): %(message)s", ) logger = logging.getLogger(__name__) -nltk.download('wordnet') +nltk.download("wordnet") -def evaluate_documentation(predictions, references): + +def evaluate_documentation(predictions: List[List[str]], references: List[str]) -> Tuple[float, float]: # Tokenize predictions and references tokenized_predictions = [pred[0].split() if pred else [] for pred in predictions] tokenized_references = [[ref.split()] for ref in references] # Calculate BLEU score - bleu = corpus_bleu(tokenized_references, tokenized_predictions, - smoothing_function=SmoothingFunction().method1) + bleu = corpus_bleu( + tokenized_references, tokenized_predictions, smoothing_function=SmoothingFunction().method1 + ) # Calculate METEOR scores - meteor_scores = [single_meteor_score(ref[0], tokenized_pred) - for ref, tokenized_pred in zip(tokenized_references, tokenized_predictions)] + meteor_scores = [ + single_meteor_score(ref[0], tokenized_pred) + for ref, tokenized_pred in zip(tokenized_references, tokenized_predictions) + ] meteor = sum(meteor_scores) / len(predictions) if predictions else 0 return (bleu, meteor) - - @app.command(help="Evaluate model on a data file") def eval( data_file: str = typer.Argument(..., help="JSONL Data file to evaluate on"), @@ -95,8 +97,8 @@ def eval( total_tokens = sum([len(token) for token in tokens]) mlflow.log_metric("total_tokens", total_tokens) mlflow.log_metric("tokens/sec", total_tokens / pred_time) - mlflow.log_metric("bleu_score", round(bleu,5)) - mlflow.log_metric("meteor_score", round(meteor,5)) + mlflow.log_metric("bleu_score", round(bleu, 5)) + mlflow.log_metric("meteor_score", round(meteor, 5)) return predictions diff --git a/tests/test_main.py b/tests/test_main.py index 2b0d1e3..02aee63 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,7 +1,8 @@ -import jsonlines from pathlib import Path -from autora.doc.pipelines.main import eval, generate, evaluate_documentation +import jsonlines + +from autora.doc.pipelines.main import eval, evaluate_documentation, generate from autora.doc.runtime.prompts import InstructionPrompts, SystemPrompts # dummy HF model for testing @@ -15,17 +16,19 @@ def test_predict() -> None: for output in outputs: assert len(output[0]) > 0, "Expected non-empty output" -def test_evaluation(): + +def test_evaluation() -> None: # Test Case: Valid Scores in the range of 0 and 1 data = Path(__file__).parent.joinpath("../data/data.jsonl").resolve() with jsonlines.open(data) as reader: - items = [item for item in reader] - labels = [item["output"] for item in items] - + items = [item for item in reader] + labels = [item["output"] for item in items] + bleu, meteor = evaluate_documentation(labels, labels) assert bleu >= 0 and bleu <= 1, "BLEU score should be between 0 and 1" assert meteor >= 0 and meteor <= 1, "METEOR score should be between 0 and 1" + def test_generate() -> None: python_file = __file__ output = Path("output.txt") From 368e73c3f49cc2c15cd83d338d340744f49c5e4d Mon Sep 17 00:00:00 2001 From: RashmikaReddy Date: Fri, 15 Dec 2023 14:09:50 -0800 Subject: [PATCH 06/13] adding dependencies in pyproject.toml --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 422c8ff..3f97b29 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,7 @@ dependencies = [ # This works, while installing from pytorch and cuda from conda does not", "torch==2.0.1", "transformers>=4.35.2", + "nltk", ] # On a mac, install optional dependencies with `pip install '.[dev]'` (include the single quotes) From 2376a6d7c3bca0874e4b5cce32092d00d04a35d3 Mon Sep 17 00:00:00 2001 From: RashmikaReddy Date: Fri, 12 Jan 2024 09:08:40 -0800 Subject: [PATCH 07/13] Modified the test cases --- docs/requirements.txt | 3 +-- src/autora/doc/pipelines/main.py | 11 +++++++---- tests/test_main.py | 10 ++++++---- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 25ac169..2b5c37d 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -6,5 +6,4 @@ ipython jupytext jupyter matplotlib -numpy -nltk +numpy \ No newline at end of file diff --git a/src/autora/doc/pipelines/main.py b/src/autora/doc/pipelines/main.py index 2087fb3..9ffa311 100644 --- a/src/autora/doc/pipelines/main.py +++ b/src/autora/doc/pipelines/main.py @@ -18,15 +18,17 @@ format="%(asctime)s %(levelname)s %(module)s.%(funcName)s(): %(message)s", ) logger = logging.getLogger(__name__) -nltk.download("wordnet") def evaluate_documentation(predictions: List[List[str]], references: List[str]) -> Tuple[float, float]: - # Tokenize predictions and references - tokenized_predictions = [pred[0].split() if pred else [] for pred in predictions] + nltk.download("wordnet") + + # Tokenize references tokenized_references = [[ref.split()] for ref in references] + tokenized_predictions = [pred[0].split() if pred else [] for pred in predictions] - # Calculate BLEU score + # Calculate BLEU score with smoothing function + # SmoothingFunction().method1 is used to avoid zero scores for n-grams not found in the reference. bleu = corpus_bleu( tokenized_references, tokenized_predictions, smoothing_function=SmoothingFunction().method1 ) @@ -80,6 +82,7 @@ def eval( timer_start = timer() predictions = pred.predict(sys_prompt, instr_prompt, inputs, **param_dict) bleu, meteor = evaluate_documentation(predictions, labels) + timer_end = timer() pred_time = timer_end - timer_start mlflow.log_metric("prediction_time/doc", pred_time / (len(inputs))) diff --git a/tests/test_main.py b/tests/test_main.py index 02aee63..f92acf9 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,6 +1,7 @@ from pathlib import Path import jsonlines +import pytest from autora.doc.pipelines.main import eval, evaluate_documentation, generate from autora.doc.runtime.prompts import InstructionPrompts, SystemPrompts @@ -18,15 +19,16 @@ def test_predict() -> None: def test_evaluation() -> None: - # Test Case: Valid Scores in the range of 0 and 1 + # Test Case: Meteor and Bleu scores are close to 1 data = Path(__file__).parent.joinpath("../data/data.jsonl").resolve() with jsonlines.open(data) as reader: items = [item for item in reader] labels = [item["output"] for item in items] + predictions = [[item["output"]] for item in items] - bleu, meteor = evaluate_documentation(labels, labels) - assert bleu >= 0 and bleu <= 1, "BLEU score should be between 0 and 1" - assert meteor >= 0 and meteor <= 1, "METEOR score should be between 0 and 1" + bleu, meteor = evaluate_documentation(predictions, labels) + assert bleu == pytest.approx(1, 0.01), f"BLEU Score is {bleu}" + assert meteor == pytest.approx(1, 0.01), f"METEOR Score is {meteor}" def test_generate() -> None: From 413172d9548932d601320ee6c1daf98ab2e3ad84 Mon Sep 17 00:00:00 2001 From: RashmikaReddy Date: Fri, 12 Jan 2024 11:12:34 -0800 Subject: [PATCH 08/13] Added test cases --- src/autora/doc/pipelines/main.py | 3 +++ tests/test_main.py | 36 ++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/src/autora/doc/pipelines/main.py b/src/autora/doc/pipelines/main.py index 9ffa311..4c711fc 100644 --- a/src/autora/doc/pipelines/main.py +++ b/src/autora/doc/pipelines/main.py @@ -24,7 +24,9 @@ def evaluate_documentation(predictions: List[List[str]], references: List[str]) nltk.download("wordnet") # Tokenize references + # To calculate corpus_bleu, we need the references to be in a list[list]. tokenized_references = [[ref.split()] for ref in references] + # Currently there is only 1 prediction for 1 reference, need to avg in future tokenized_predictions = [pred[0].split() if pred else [] for pred in predictions] # Calculate BLEU score with smoothing function @@ -34,6 +36,7 @@ def evaluate_documentation(predictions: List[List[str]], references: List[str]) ) # Calculate METEOR scores + # As we have list[list], we take ref[0] to calculate meteor score. meteor_scores = [ single_meteor_score(ref[0], tokenized_pred) for ref, tokenized_pred in zip(tokenized_references, tokenized_predictions) diff --git a/tests/test_main.py b/tests/test_main.py index f92acf9..534d714 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -31,6 +31,42 @@ def test_evaluation() -> None: assert meteor == pytest.approx(1, 0.01), f"METEOR Score is {meteor}" +def test_extra_token_in_prediction() -> None: + # Test Case bleu score should be less due to brevity penalty and meteor is robust to small mistakes + labels = ["this is a test"] + predictions = [["this is a test extra"]] + bleu, meteor = evaluate_documentation(predictions, labels) + assert 0.6 <= bleu <= 0.8, f"BLEU Score is {bleu}" + assert 0.8 <= meteor <= 1, f"METEOR Score is {meteor}" + + +def test_missing_token_in_prediction() -> None: + # bleu score is less, meteor is higher + labels = ["this is a test"] + predictions = [["this is a"]] + bleu, meteor = evaluate_documentation(predictions, labels) + assert 0.4 <= bleu <= 0.6, f"BLEU Score is {bleu}" + assert 0.6 <= meteor <= 0.8, f"METEOR Score is {meteor}" + + +def test_completely_different_tokens() -> None: + # both scores are less, as no common tokens + labels = ["this is a test"] + predictions = [["completely different sentence"]] + bleu, meteor = evaluate_documentation(predictions, labels) + assert bleu <= 0.1, f"BLEU Score is {bleu}" + assert meteor <= 0.1, f"METEOR Score is {meteor}" + + +def test_partially_matching_tokens() -> None: + # As ngrams arent matching because of extra token within, BLEU score is very less. Meteor gives a good score only. + labels = ["this is a test"] + predictions = [["this is a different test"]] + bleu, meteor = evaluate_documentation(predictions, labels) + assert 0.25 <= bleu <= 0.4, f"BLEU Score is {bleu}" + assert 0.8 <= meteor <= 0.95, f"METEOR Score is {meteor}" + + def test_generate() -> None: python_file = __file__ output = Path("output.txt") From 8d5c75e95ae8d912a285bea7a3005c46b0cc8d7d Mon Sep 17 00:00:00 2001 From: Rashmika Reddy Vookanti Date: Wed, 17 Jan 2024 22:22:50 -0800 Subject: [PATCH 09/13] Made the suggested changes --- src/autora/doc/pipelines/main.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/autora/doc/pipelines/main.py b/src/autora/doc/pipelines/main.py index 4c711fc..32b01db 100644 --- a/src/autora/doc/pipelines/main.py +++ b/src/autora/doc/pipelines/main.py @@ -24,22 +24,23 @@ def evaluate_documentation(predictions: List[List[str]], references: List[str]) nltk.download("wordnet") # Tokenize references - # To calculate corpus_bleu, we need the references to be in a list[list]. - tokenized_references = [[ref.split()] for ref in references] + tokenized_references = [ref.split() for ref in references] # Currently there is only 1 prediction for 1 reference, need to avg in future tokenized_predictions = [pred[0].split() if pred else [] for pred in predictions] # Calculate BLEU score with smoothing function # SmoothingFunction().method1 is used to avoid zero scores for n-grams not found in the reference. bleu = corpus_bleu( - tokenized_references, tokenized_predictions, smoothing_function=SmoothingFunction().method1 + # Wrap each reference list in another list + [[tokenized_ref] for tokenized_ref in tokenized_references], + tokenized_predictions, + smoothing_function=SmoothingFunction().method1, ) # Calculate METEOR scores - # As we have list[list], we take ref[0] to calculate meteor score. meteor_scores = [ - single_meteor_score(ref[0], tokenized_pred) - for ref, tokenized_pred in zip(tokenized_references, tokenized_predictions) + single_meteor_score(tokenized_ref, tokenized_pred) + for tokenized_ref, tokenized_pred in zip(tokenized_references, tokenized_predictions) ] meteor = sum(meteor_scores) / len(predictions) if predictions else 0 From e5657b731034407c650a9d6ebc5076ed245aac3f Mon Sep 17 00:00:00 2001 From: Rashmika Reddy Vookanti Date: Wed, 17 Jan 2024 22:59:45 -0800 Subject: [PATCH 10/13] Updating test_main.py with changes related to main --- tests/test_main.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_main.py b/tests/test_main.py index ab912c3..b325c53 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,11 +1,11 @@ from pathlib import Path -import jsonlines -import pytest - from autora.doc.pipelines.main import eval, evaluate_documentation, generate, import_data from autora.doc.runtime.prompts import InstructionPrompts, SystemPrompts +import jsonlines +import pytest + # dummy HF model for testing TEST_HF_MODEL = "hf-internal-testing/tiny-random-FalconForCausalLM" @@ -20,7 +20,7 @@ def test_predict() -> None: def test_evaluation() -> None: # Test Case: Meteor and Bleu scores are close to 1 - data = Path(__file__).parent.joinpath("../data/data.jsonl").resolve() + data = Path(__file__).parent.joinpath("../data/sweetpea/data.jsonl").resolve() with jsonlines.open(data) as reader: items = [item for item in reader] labels = [item["output"] for item in items] From 3e7e5e854e7401770c15fb53fe57a68f46ab15c6 Mon Sep 17 00:00:00 2001 From: Rashmika Reddy Vookanti Date: Wed, 17 Jan 2024 23:18:09 -0800 Subject: [PATCH 11/13] Update requirements.txt --- docs/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 2b5c37d..62972b9 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -6,4 +6,4 @@ ipython jupytext jupyter matplotlib -numpy \ No newline at end of file +numpy From 3df0c8d66efab8a2c1a8465d0a89879490804d66 Mon Sep 17 00:00:00 2001 From: Rashmika Reddy Vookanti Date: Wed, 17 Jan 2024 23:18:38 -0800 Subject: [PATCH 12/13] Update .mypy.ini --- .mypy.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.mypy.ini b/.mypy.ini index 12e730a..6e719c2 100644 --- a/.mypy.ini +++ b/.mypy.ini @@ -10,4 +10,4 @@ ignore_missing_imports = True ignore_missing_imports = True [mypy-nltk.*] -ignore_missing_imports = True \ No newline at end of file +ignore_missing_imports = True From 9fcd8ec9bfff87ddd8527a0ab68bfb58761a9227 Mon Sep 17 00:00:00 2001 From: Rashmika Reddy Vookanti Date: Thu, 18 Jan 2024 11:33:14 -0800 Subject: [PATCH 13/13] Updated test_main.py --- tests/test_main.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_main.py b/tests/test_main.py index b325c53..a1eed5f 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,11 +1,11 @@ from pathlib import Path -from autora.doc.pipelines.main import eval, evaluate_documentation, generate, import_data -from autora.doc.runtime.prompts import InstructionPrompts, SystemPrompts - import jsonlines import pytest +from autora.doc.pipelines.main import eval, evaluate_documentation, generate, import_data +from autora.doc.runtime.prompts import InstructionPrompts, SystemPrompts + # dummy HF model for testing TEST_HF_MODEL = "hf-internal-testing/tiny-random-FalconForCausalLM"