diff --git a/.flake8 b/.flake8
new file mode 100644
index 0000000..44a29eb
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,7 @@
+[flake8]
+max-line-length = 120
+output-file = flake8_log.txt
+tee = True
+extend-ignore= E203, BLK100
+#BLK100 designates where Black would make changes
+#E203 Whitespace rule relating to ':' that directly conflicts with black standards
diff --git a/.github/actions/deps/action.yaml b/.github/actions/deps/action.yaml
new file mode 100644
index 0000000..d38d141
--- /dev/null
+++ b/.github/actions/deps/action.yaml
@@ -0,0 +1,21 @@
+name: Setup
+description: Setup Dependencies for testing
+inputs:
+  python-version:
+    required: true
+    description: Python version to install
+runs:
+  using: composite
+  steps:
+    - name: Setup Python
+      uses: actions/setup-python@v4.5.0
+      with:
+        python-version: ${{inputs.python-version}}
+    - name: Setup pip
+      shell: sh
+      run: |        
+        python3 -m ensurepip
+        python3 -m pip install --upgrade pip
+    - name: Install project 
+      shell: sh
+      run: pip install ".[dev,train]"
\ No newline at end of file
diff --git a/.github/workflows/build-documentation.yml b/.github/workflows/build-documentation.yml
index 35e5f72..d5edd81 100644
--- a/.github/workflows/build-documentation.yml
+++ b/.github/workflows/build-documentation.yml
@@ -20,16 +20,10 @@ jobs:
 
     steps:
     - uses: actions/checkout@v3
-    - name: Set up Python 3.10
-      uses: actions/setup-python@v4
+    - name: Setup Dependencies
+      uses: ./.github/actions/deps
       with:
         python-version: '3.10'
-    - name: Install dependencies
-      run: |
-        sudo apt-get update
-        python -m pip install --upgrade pip
-        if [ -f docs/requirements.txt ]; then pip install -r docs/requirements.txt; fi
-        pip install .
     - name: Install notebook requirements
       run: |
         sudo apt-get install pandoc
diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
index 37e1a3d..75d2f56 100644
--- a/.github/workflows/linting.yml
+++ b/.github/workflows/linting.yml
@@ -18,17 +18,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v3
-    - name: Set up Python
-      uses: actions/setup-python@v4
+    - name: Setup Dependencies
+      uses: ./.github/actions/deps
       with:
         python-version: '3.10'
-    - name: Install dependencies
-      run: |
-        sudo apt-get update
-        python -m pip install --upgrade pip
-        pip install .
-        pip install .[dev]
-        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
     - name: Analyze code with linter
       uses: psf/black@stable
       with:
diff --git a/.github/workflows/pre-commit-ci.yml b/.github/workflows/pre-commit-ci.yml
index 8397877..dc6cdf6 100644
--- a/.github/workflows/pre-commit-ci.yml
+++ b/.github/workflows/pre-commit-ci.yml
@@ -16,17 +16,10 @@ jobs:
     - uses: actions/checkout@v3
       with:
         fetch-depth: 0 
-    - name: Set up Python
-      uses: actions/setup-python@v4
+    - name: Setup Dependencies
+      uses: ./.github/actions/deps
       with:
         python-version: '3.10'
-    - name: Install dependencies
-      run: |
-        sudo apt-get update
-        python -m pip install --upgrade pip
-        pip install .
-        pip install .[dev]
-        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
     - uses: pre-commit/action@v3.0.0
       with:
         extra_args: --from-ref ${{ github.event.pull_request.base.sha }} --to-ref ${{ github.event.pull_request.head.sha }}
diff --git a/.github/workflows/smoke-test.yml b/.github/workflows/smoke-test.yml
index 2aec838..ed5b25d 100644
--- a/.github/workflows/smoke-test.yml
+++ b/.github/workflows/smoke-test.yml
@@ -24,17 +24,10 @@ jobs:
 
     steps:
     - uses: actions/checkout@v3
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
+    - name: Setup Dependencies
+      uses: ./.github/actions/deps
       with:
         python-version: ${{ matrix.python-version }}
-    - name: Install dependencies
-      run: |
-        sudo apt-get update
-        python -m pip install --upgrade pip
-        pip install .
-        pip install .[dev]
-        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
     - name: List dependencies
       run: |
         pip list
diff --git a/.github/workflows/testing-and-coverage.yml b/.github/workflows/testing-and-coverage.yml
index 2b11429..d2bcff2 100644
--- a/.github/workflows/testing-and-coverage.yml
+++ b/.github/workflows/testing-and-coverage.yml
@@ -19,19 +19,12 @@ jobs:
 
     steps:
     - uses: actions/checkout@v3
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
+    - name: Setup Dependencies
+      uses: ./.github/actions/deps
       with:
         python-version: ${{ matrix.python-version }}
-    - name: Install dependencies
-      run: |
-        sudo apt-get update
-        python -m pip install --upgrade pip
-        pip install .
-        pip install .[dev]
-        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
     - name: Run unit tests with pytest
       run: |
-        python -m pytest tests --cov=autora_doc --cov-report=xml
+        python -m pytest tests/* --cov=autora --cov-report=xml
     - name: Upload coverage report to codecov
       uses: codecov/codecov-action@v3
diff --git a/.github/workflows/type-checking.yml b/.github/workflows/type-checking.yml
index c520351..c7393f5 100644
--- a/.github/workflows/type-checking.yml
+++ b/.github/workflows/type-checking.yml
@@ -22,17 +22,10 @@ jobs:
 
     steps:
     - uses: actions/checkout@v3
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
+    - name: Setup Dependencies
+      uses: ./.github/actions/deps
       with:
         python-version: ${{ matrix.python-version }}
-    - name: Install dependencies
-      run: |
-        sudo apt-get update
-        python -m pip install --upgrade pip
-        pip install .
-        pip install .[dev]
-        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
     - name: Analyze code with mypy
 
       run: |
diff --git a/.gitignore b/.gitignore
index 1819b25..1ba51fa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -145,3 +145,6 @@ tmp/
 # Airspeed Velocity performance results
 _results/
 _html/
+
+# mlflow output
+mlruns/
\ No newline at end of file
diff --git a/.mypy.ini b/.mypy.ini
new file mode 100644
index 0000000..b2565b1
--- /dev/null
+++ b/.mypy.ini
@@ -0,0 +1,10 @@
+[mypy]
+strict = True
+mypy_path = src/.
+explicit_package_bases = True
+
+[mypy-transformers.*]
+ignore_missing_imports = True
+
+[mypy-mlflow.*]
+ignore_missing_imports = True
\ No newline at end of file
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index fdda558..029a6e6 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -79,12 +79,6 @@ repos:
         language: system
         types: [python]
         files: ^(src|tests)/
-        args:
-          [
-          
-            "--strict", # Use mypy strict mode to enforce type hints
-          
-          ]
 
 
     # Run unit tests, verify that they pass. Note that coverage is run against
diff --git a/README.md b/README.md
index 124b6bf..6db068f 100644
--- a/README.md
+++ b/README.md
@@ -1,30 +1,25 @@
-# autora-doc
+# AutoDoc
 
 [![Template](https://img.shields.io/badge/Template-LINCC%20Frameworks%20Python%20Project%20Template-brightgreen)](https://lincc-ppt.readthedocs.io/en/latest/)
 
 [![PyPI](https://img.shields.io/pypi/v/autora-doc?color=blue&logo=pypi&logoColor=white)](https://pypi.org/project/autora-doc/)
-[![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/AutoResearch/autora-doc/smoke-test.yml)](https://github.com/AutoResearch/autora-doc/actions/workflows/smoke-test.yml)
-[![codecov](https://codecov.io/gh/AutoResearch/autora-doc/branch/main/graph/badge.svg)](https://codecov.io/gh/AutoResearch/autora-doc)
+[![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/AutoResearch/autodoc/smoke-test.yml)](https://github.com/AutoResearch/autodoc/actions/workflows/smoke-test.yml)
+[![codecov](https://codecov.io/gh/AutoResearch/autodoc/branch/main/graph/badge.svg)](https://codecov.io/gh/AutoResearch/autodoc)
 [![Read the Docs](https://img.shields.io/readthedocs/autora-doc)](https://autora-doc.readthedocs.io/)
 
 This project was automatically generated using the LINCC-Frameworks 
-[python-project-template](https://github.com/lincc-frameworks/python-project-template).
-
-A repository badge was added to show that this project uses the python-project-template, however it's up to
-you whether or not you'd like to display it!
-
-For more information about the project template see the 
+[python-project-template](https://github.com/lincc-frameworks/python-project-template). For more information about the project template see the 
 [documentation](https://lincc-ppt.readthedocs.io/en/latest/).
 
 ## Dev Guide - Getting Started
 
 Before installing any dependencies or writing code, it's a great idea to create a
-virtual environment. LINCC-Frameworks engineers primarily use `conda` to manage virtual
+virtual environment. We recommend using `conda` to manage virtual
 environments. If you have conda installed locally, you can run the following to
 create and activate a new environment.
 
 ```
->> conda create env -n <env_name> python=3.10
+>> conda create env -n <env_name> python=3.8
 >> conda activate <env_name>
 ```
 
@@ -47,3 +42,44 @@ Notes:
    into documentation for ReadTheDocs works as expected. For more information, see
    the Python Project Template documentation on
    [Sphinx and Python Notebooks](https://lincc-ppt.readthedocs.io/en/latest/practices/sphinx.html#python-notebooks)
+
+
+## Running AzureML pipelines 
+
+This repo contains the evaluation and training pipelines for AutoDoc.
+
+### Prerequisites
+
+[Install Azure CLI](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli)
+
+Add the ML extension:
+```
+az extension add --name ml
+```
+
+Configure the CLI:
+
+```
+az login
+az account set --subscription "<your subscription name>"
+az configure --defaults workspace=<aml workspace> group=<resource group> location=<location, e.g. westus3>
+```
+
+### Uploading data
+
+Example:
+```sh
+az storage blob upload  --account-name <account> --container <container>> --file data/data.jsonl -n data/sweetpea/data.jsonl
+```
+
+### Running jobs
+
+Prediction
+```sh
+az ml job create -f azureml/predict.yml  --set display_name="Test prediction job" --web
+```
+
+Notes:
+- `--name` will set the mlflow run id
+- `--display_name` becomes the name in the experiment dashboard
+- `--web` argument will pop-up a browser window for tracking the job.
\ No newline at end of file
diff --git a/azureml/conda.yml b/azureml/conda.yml
new file mode 100644
index 0000000..f772397
--- /dev/null
+++ b/azureml/conda.yml
@@ -0,0 +1,18 @@
+channels:
+  - defaults
+dependencies:
+  - python=3.8
+  - pip
+  - pip:
+    - mlflow
+    - azureml-mlflow
+    - azureml-core
+    - typer
+    - jsonlines
+    - accelerate>=0.24.1
+    - bitsandbytes>=0.41.2.post2
+    - transformers>=4.35.2
+    - xformers
+    - scipy
+    # This works, while installing from pytorch and cuda from conda does not
+    - torch==2.0.1    
\ No newline at end of file
diff --git a/azureml/predict.yml b/azureml/predict.yml
new file mode 100644
index 0000000..7f888b4
--- /dev/null
+++ b/azureml/predict.yml
@@ -0,0 +1,25 @@
+$schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json
+command: python -m autora.doc.pipelines.main predict ${{inputs.data_dir}}/data.jsonl ${{inputs.model_dir}}/llama-2-7b-chat-hf
+code: ../src
+inputs:
+  data_dir:
+    type: uri_folder 
+    path: azureml://datastores/workspaceblobstore/paths/data/sweetpea/
+  model_dir:
+    type: uri_folder 
+    path: azureml://datastores/workspaceblobstore/paths/base_models    
+# using a curated environment doesn't work because we need additional packages
+environment: # azureml://registries/azureml/environments/acpt-pytorch-2.0-cuda11.7/versions/21
+  image: mcr.microsoft.com/azureml/curated/acpt-pytorch-2.0-cuda11.7:21
+  # These didn't work
+  # image: mcr.microsoft.com/aifx/acpt/stable-ubuntu2004-cu117-py38-torch201:biweekly.202310.3
+  # image: mcr.microsoft.com/azureml/curated/acpt-pytorch-1.13-cuda11.7:latest
+  # image: mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.0.3-cudnn8-ubuntu18.04
+  # image: mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04
+  # image: mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.6-cudnn8-ubuntu20.04
+  # image: nvcr.io/nvidia/pytorch:23.10-py3
+  conda_file: conda.yml
+display_name: autodoc_prediction
+compute: azureml:v100cluster
+experiment_name: autodoc_prediction
+description: |
\ No newline at end of file
diff --git a/data/data.jsonl b/data/data.jsonl
new file mode 100644
index 0000000..5c00989
--- /dev/null
+++ b/data/data.jsonl
@@ -0,0 +1,3 @@
+{"instruction": "from sweetpea import *\n\ncolor = Factor('color', ['red', 'green', 'blue', 'yellow'])\nword = Factor('word', ['red', 'green', 'blue', 'yellow'])\n\ndef is_congruent(word, color):\n    return (word == color)\n\ndef is_not_congruent(word, color):\n    return not is_congruent(word, color)\n\ncongruent = DerivedLevel('congruent', WithinTrial(is_congruent, [word, color]))\nincongruent = DerivedLevel('incongruent', WithinTrial(is_not_congruent, [word, color]))\n\ncongruency = Factor('congruency', [congruent, incongruent])\n\nconstraints = [MinimumTrials(48)]\ndesign = [word, color, congruency]\ncrossing = [word, congruency]\n\nblock = CrossBlock(design, crossing, constraints)\n\nexperiment = synthesize_trials(block, 1)\n\nsave_experiments_csv(block, experiment, 'code_1_sequences/seq')", "output": "There are two regular factors: color and word. The color factor consists of four levels: \"red\", \"green\", \"blue\", and \"yellow\". The word factor also consists of the four levels: \"red\", \"green\", \"blue\", and \"yellow\". There is another derived factor referred to as congruency. The congruency factor depends on the regular factors word and color and has two levels: \"congruent\" and \"incongruent\". A trial is considered \"congruent\" if the word matches the color, otherwise, it is considered \"incongruent\". We counterbalanced the word factor with the congruency factor. All experiment sequences contained at least 48 trials."}
+{"instruction": "from sweetpea import *\nfrom sweetpea.primitives import *\nfrom sweetpea.constraints import *\n\nletter = Factor('letter', ['b', 'c', 'd', 'e'])\n\ndef is_target(letter):\n    return letter[0] == letter[2]\ndef is_not_target(letter):\n    return not is_target(letter)\n\ntarget_true = DerivedLevel(1, window(is_target, [letter], 3, 1), 2)\ntarget_false = DerivedLevel(0, window(is_not_target, [letter], 3, 1), 3)\n\ntarget = Factor('target', [target_true, target_false])\n\n\nblock = CrossBlock([letter, target], [letter, target], [])\n\nexperiment = synthesize_trials(block, 1)\n\nsave_experiments_csv(block, experiment, 'code_1_sequences/seq')", "output": "There is one regular factor: letter. The letter factor consists of the four letters: \"b\", \"c\", \"d\", and \"e\". There is another derived factor referred to as target. The target factor has two levels: 1 and 0. It depends on a moving trial window (for the last three trials) defined by the letter factor. If the letter on the current trial matches the letter two trials back, then the trial is 1. Conversely, if the letter on the current trial does not match the letter two trials back, then the trial is 0. The ratio between the 1 level and the 0 level was 2 to 3. We counterbalanced the letter factor with the target factor."}
+{"instruction": "from sweetpea import *\nfrom sweetpea.primitives import *\n\nnumber_list = [125, 132, 139, 146, 160, 167, 174, 181]\nletter_list = ['b', 'd', 'f', 'h', 's', 'u', 'w', 'y']\n\nnumber = Factor(\"number\", number_list)\nletter = Factor(\"letter\", letter_list)\ntask = Factor(\"task\", [\"number task\", \"letter task\", \"free choice task\"])\n\n\ndef is_forced_trial_switch(task):\n    return (task[-1] == \"number task\" and task[0] == \"letter task\") or \\\n           (task[-1] == \"letter task\" and task[0] == \"number task\")\n\n\ndef is_forced_trial_repeat(task):\n    return (task[-1] == \"number task\" and task[0] == \"number task\") or \\\n           (task[-1] == \"letter task\" and task[0] == \"letter task\")\n\n\ndef is_free_trial_transition(task):\n    return task[-1] != \"free choice task\" and task[0] == \"free choice task\"\n\n\ndef is_free_trial_repeat(task):\n    return task[-1] == \"free choice task\" and task[0] == \"free choice task\"\n\n\ndef is_not_relevant_transition(task):\n    return not (is_forced_trial_repeat(task) or is_forced_trial_switch(task) or is_free_trial_repeat(\n        task) or is_free_trial_transition(task))\n\n\ntransit = Factor(\"task transition\", [\n    DerivedLevel(\"forced switch\", transition(is_forced_trial_switch, [task]), 3),\n    DerivedLevel(\"forced repeat\", transition(is_forced_trial_repeat, [task])),\n    DerivedLevel(\"free transition\", transition(is_free_trial_transition, [task]), 4),\n    DerivedLevel(\"free repeat\", transition(is_free_trial_repeat, [task]), 4),\n    DerivedLevel(\"forced first\", transition(is_not_relevant_transition, [task]), 4)\n])\ndesign = [letter, number, task, transit]\ncrossing = [[letter], [number], [transit]]\nconstraints = [MinimumTrials(256)]\n\nblock = MultiCrossBlock(design, crossing, constraints)\n\nexperiment = synthesize_trials(block, 1)\n\nsave_experiments_csv(block, experiment, 'code_1_sequences/seq')\n", "output": "There are three regular factors: number, letter, and task. The number factor consists of eight levels: 125, 132, 139, 146, 160, 167, 174, and 181. The letter factor consists of eight levels: \"b\", \"d\", \"f\", \"h\", \"s\", \"u\", \"w\", and \"y\". The task factor consists of three levels: \"number task\", \"letter task\", and \"free choice task\". There is another derived factor referred to as task transition. The task transition factor depends on the transition of the task factor and has five levels: \"forced switch\", \"forced repeat\", \"free transition\", \"free repeat\", and \"forced first\". If the task on the current trial is \"number task\" and the task on the previous trial is \"letter task\" or if the task on the current trial is \"letter task\" and the task on the previous trial is \"number task\", then the trial is considered a \"forced switch\". If the task on the current trial is \"number task\" and the task on the previous trial is \"number task\" or if the task on the current trial is \"letter task\" and the task on the previous trial is \"letter task\", then the trial is considered a \"forced repeat\". If the task on the current trial is \"free choice task\" and the task on the previous trial is not \"free choice task\", then the trial is considered a \"free transition\". If the task on the current trial is \"free choice task\" and the task on the previous trial is \"free choice task\", then the trial is considered a \"free repeat\". If the task on the current trial is not \"free choice task\" and the task on the previous trial is \"free choice task\", then the trial is considered a \"forced first\". The ratio between the \"forced switch\" level and the \"forced repeat\" level was 3 to 1. The ratio between the \"free transition\" level and the \"free repeat\" level was 4 to 4. The ratio between the \"forced first\" level and the \"free repeat\" level was 4 to 4. The factors letter, number and task transition were counterbalanced individually. All experiment sequences contained at least 256 trials."}
diff --git a/pyproject.toml b/pyproject.toml
index a6abb31..e6db6a3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,6 +5,7 @@ readme = "README.md"
 authors = [
     { name = "Carlos Garcia Jurado Suarez", email = "carlosg@uw.edu" }
 ]
+requires-python = ">=3.8"
 classifiers = [
     "Development Status :: 4 - Beta",
     "License :: OSI Approved :: MIT License",
@@ -15,10 +16,15 @@ classifiers = [
 ]
 dynamic = ["version"]
 dependencies = [
-    "ipykernel", # Support for Jupyter notebooks
+    "transformers>=4.35.2",
+    "typer",
+    "scipy",
+    # This works, while installing from pytorch and cuda from conda does not",
+    "torch==2.0.1",
 ]
 
 # On a mac, install optional dependencies with `pip install '.[dev]'` (include the single quotes)
+description = "Automatic documentation generator from AutoRA code"
 [project.optional-dependencies]
 dev = [
     "pytest",
@@ -37,20 +43,27 @@ dev = [
     "ipython", # Also used in building notebooks into Sphinx
     "matplotlib", # Used in sample notebook intro_notebook.ipynb
     "numpy", # Used in sample notebook intro_notebook.ipynb
+    "ipykernel",
+]
+train = [
+    "mlflow",
+    "azureml-mlflow",
+    "azureml-core",
+    "jsonlines",
+]
+
+train_cuda = [
+    "bitsandbytes>=0.41.2.post2",
+    "accelerate>=0.24.1",
+    "xformers",
 ]
 
 [project.urls]
-Homepage = "https://github.com/AutoResearch/autora-doc"
+Homepage = "https://github.com/AutoResearch/autodoc"
 
 [build-system]
-requires = [
-    "setuptools>=62", # Used to build and package the Python project
-    "setuptools_scm>=6.2", # Gets release version from git. Makes it available programmatically
-]
-build-backend = "setuptools.build_meta"
-
-[tool.setuptools_scm]
-write_to = "src/autora_doc/_version.py"
+requires = ["hatchling", "hatch-vcs"]
+build-backend = "hatchling.build"
 
 [tool.pytest.ini_options]
 testpaths = [
@@ -65,8 +78,23 @@ target-version = ["py38"]
 profile = "black"
 line_length = 110
 
-[tool.setuptools.package-data]
-autora_doc = ["py.typed"]
-
 [tool.coverage.run]
-omit=["src/autora_doc/_version.py"]
+omit=["src/autora/doc/_version.py"]
+
+[tool.hatch]
+
+
+[tool.hatch.version]
+source = "vcs"
+
+[tool.hatch.build.hooks.vcs]
+version-file = "src/autora/doc/_version.py"
+
+[tool.hatch.version.raw-options]
+local_scheme = "no-local-version"
+
+[tool.hatch.build.targets.sdist]
+include = ["src/autora"]
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/autora"]
diff --git a/src/.amlignore b/src/.amlignore
new file mode 100644
index 0000000..f1ec22a
--- /dev/null
+++ b/src/.amlignore
@@ -0,0 +1,3 @@
+mlruns/
+.mypy_cache/
+__pycache__/
\ No newline at end of file
diff --git a/src/autora_doc/py.typed b/src/autora/doc/__init__.py
similarity index 100%
rename from src/autora_doc/py.typed
rename to src/autora/doc/__init__.py
diff --git a/src/autora_doc/example_module.py b/src/autora/doc/example_module.py
similarity index 100%
rename from src/autora_doc/example_module.py
rename to src/autora/doc/example_module.py
diff --git a/src/autora/doc/pipelines/main.py b/src/autora/doc/pipelines/main.py
new file mode 100644
index 0000000..292c8ff
--- /dev/null
+++ b/src/autora/doc/pipelines/main.py
@@ -0,0 +1,72 @@
+import logging
+from timeit import default_timer as timer
+
+import jsonlines
+import mlflow
+import torch
+import typer
+
+from autora.doc.runtime.predict_hf import Predictor
+
+app = typer.Typer()
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s %(levelname)s %(module)s.%(funcName)s(): %(message)s",
+)
+logger = logging.getLogger(__name__)
+
+# TODO: organize the system and instruction prompts into a separate module
+SYS = """You are a technical documentation writer. You always write clear, concise, and accurate documentation for
+ scientific experiments. Your documentation focuses on the experiment's purpose, procedure, and results. Therefore,
+ details about specific python functions, packages, or libraries are not necessary. Your readers are experimental
+ scientists.
+"""
+
+instr = """Please generate high-level two paragraph documentation for the following experiment. The first paragraph
+ should explain the purpose and the second one the procedure, but don't use the word 'Paragraph'"""
+
+
+@app.command()
+def predict(data_file: str, model_path: str) -> None:
+    run = mlflow.active_run()
+
+    if run is None:
+        run = mlflow.start_run()
+    with run:
+        logger.info(f"Active run_id: {run.info.run_id}")
+        logger.info(f"running predict with {data_file}")
+        logger.info(f"model path: {model_path}")
+
+        # predictions = []
+        with jsonlines.open(data_file) as reader:
+            items = [item for item in reader]
+            inputs = [item["instruction"] for item in items]
+            labels = [item["output"] for item in items]
+
+        pred = Predictor(model_path)
+        timer_start = timer()
+        predictions = pred.predict(SYS, instr, inputs)
+        timer_end = timer()
+        pred_time = timer_end - timer_start
+        mlflow.log_metric("prediction_time/doc", pred_time / (len(inputs)))
+        for i in range(len(inputs)):
+            mlflow.log_text(labels[i], f"label_{i}.txt")
+            mlflow.log_text(inputs[i], f"input_{i}.py")
+            mlflow.log_text(predictions[i], f"prediction_{i}.txt")
+
+        tokens = pred.tokenize(predictions)["input_ids"]
+        total_tokens = sum([len(token) for token in tokens])
+        mlflow.log_metric("total_tokens", total_tokens)
+        mlflow.log_metric("tokens/sec", total_tokens / pred_time)
+
+
+@app.command()
+def import_model(model_name: str) -> None:
+    pass
+
+
+if __name__ == "__main__":
+    logger.info(f"Torch version: {torch.__version__} , Cuda available: {torch.cuda.is_available()}")
+
+    mlflow.autolog()
+    app()
diff --git a/tests/autora_doc/conftest.py b/src/autora/doc/py.typed
similarity index 100%
rename from tests/autora_doc/conftest.py
rename to src/autora/doc/py.typed
diff --git a/src/autora/doc/runtime/predict_hf.py b/src/autora/doc/runtime/predict_hf.py
new file mode 100644
index 0000000..ba3e59d
--- /dev/null
+++ b/src/autora/doc/runtime/predict_hf.py
@@ -0,0 +1,63 @@
+import logging
+from typing import Dict, List
+
+import torch
+import transformers
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+
+logger = logging.getLogger(__name__)
+
+
+class Predictor:
+    def __init__(self, model_path: str):
+        # Load the model in 4bit quantization for faster inference on smaller GPUs
+        bnb_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_use_double_quant=True,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_compute_dtype=torch.bfloat16,
+        )
+        logger.info(f"Loading model from {model_path}")
+        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
+        self.model = AutoModelForCausalLM.from_pretrained(
+            model_path, quantization_config=bnb_config, device_map="auto"
+        )
+        logger.info("Model loaded")
+        self.pipeline = transformers.pipeline(
+            "text-generation",
+            model=self.model,
+            tokenizer=self.tokenizer,
+        )
+
+    def predict(self, sys: str, instr: str, inputs: List[str]) -> List[str]:
+        # Standard Llama2 template
+        template = f"""
+[INST]<<SYS>>
+{sys}
+
+{instr}
+
+[INPUT]
+[/INST]
+"""
+        logger.info(f"Generating {len(inputs)} predictions")
+        prompts = [template.replace("[INPUT]", input) for input in inputs]
+        # TODO: Make these parameters configurable
+        sequences = self.pipeline(
+            prompts,
+            do_sample=True,
+            temperature=0.6,
+            top_p=0.95,
+            top_k=40,
+            num_return_sequences=1,
+            eos_token_id=self.tokenizer.eos_token_id,
+            max_length=1000,
+        )
+
+        results = [sequence[0]["generated_text"] for sequence in sequences]
+        logger.info(f"Generated {len(results)} results")
+        return results
+
+    def tokenize(self, input: List[str]) -> Dict[str, List[List[int]]]:
+        tokens: Dict[str, List[List[int]]] = self.tokenizer(input)
+        return tokens
diff --git a/src/autora_doc/__init__.py b/src/autora_doc/__init__.py
deleted file mode 100644
index b564b85..0000000
--- a/src/autora_doc/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from .example_module import greetings, meaning
-
-__all__ = ["greetings", "meaning"]
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/autora_doc/test_example_module.py b/tests/test.py
similarity index 89%
rename from tests/autora_doc/test_example_module.py
rename to tests/test.py
index b8c5a61..a578227 100644
--- a/tests/autora_doc/test_example_module.py
+++ b/tests/test.py
@@ -1,4 +1,4 @@
-from autora_doc import example_module
+from autora.doc import example_module
 
 
 def test_greetings() -> None: