diff --git a/.github/workflows/pytest-shim.yaml b/.github/workflows/pytest-shim.yaml
index 0f5478754..a75bc7fd6 100644
--- a/.github/workflows/pytest-shim.yaml
+++ b/.github/workflows/pytest-shim.yaml
@@ -38,14 +38,23 @@ permissions:
 # https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/troubleshooting-required-status-checks#handling-skipped-but-required-checks
 
 # Abort prior jobs in the same workflow / PR
+
 concurrency:
-  group: pytest-skip-${{ github.ref }}
+  group: pytest-integration-skip-${{ github.ref }}
   cancel-in-progress: true
 
-
 jobs:
   pytest:
     runs-on: ubuntu-latest
+
+    steps:
+      - name: Skipped
+        run: |
+          echo skipped
+
+  integration:
+    runs-on: ubuntu-latest
+
     steps:
       - name: Skipped
         run: |
diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml
index 87cac7d56..08ff0645f 100644
--- a/.github/workflows/pytest.yaml
+++ b/.github/workflows/pytest.yaml
@@ -1,4 +1,5 @@
 name: pytest
+
 on:
   pull_request:
     types:
@@ -25,7 +26,7 @@ on:
       - "!.gitignore"
       - "!LICENSE"
 
-      # Ignore LFAI-UI things (no Python)
+      # Ignore UI things (no Python)
       - "!src/leapfrogai_ui/**"
       - "!packages/ui/**"
 
@@ -33,7 +34,7 @@ on:
 permissions: read-all
 
 concurrency:
-  group: pytest-${{ github.ref }}
+  group: pytest-integration-${{ github.ref }}
   cancel-in-progress: true
 
 jobs:
@@ -44,22 +45,109 @@ jobs:
       - name: Checkout Repo
         uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
 
-      - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c #v5.0.0
+      - name: Cache Python Dependencies
+        uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
+        with:
+          path: |
+            ~/.cache/pip
+            **/src/leapfrogai_api
+            **/src/leapfrogai_sdk
+          key: pytest-integration-pip-${{ github.ref }}
+
+      - name: Setup Python
+        uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c #v5.0.0
         with:
-          python-version-file: 'pyproject.toml'
+          python-version-file: "pyproject.toml"
+
+      - name: Install Python Dependencies
+        run: pip install ".[dev]" "src/leapfrogai_api" "src/leapfrogai_sdk"
 
-      - name: Build Repeater
+      - name: Setup Repeater
         env:
-          LOCAL_VERSION: dev
+          LOCAL_VERSION: e2e-test
         run: |
           make docker-repeater
-      - name: Run Repeater
-        run: docker run -p 50051:50051 -d --name=repeater ghcr.io/defenseunicorns/leapfrogai/repeater:dev
+          docker run -p 50051:50051 -d --name=repeater ghcr.io/defenseunicorns/leapfrogai/repeater:$LOCAL_VERSION
+
+      - name: Run Pytest
+        run: make test-api-unit
+        env:
+          LFAI_RUN_REPEATER_TESTS: true
+
+  integration:
+    runs-on: ai-ubuntu-big-boy-8-core
+
+    # If basic unit tests fail, do not run this job
+    needs: pytest
+
+    steps:
+      - name: Checkout Repo
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+
+      - name: Use Cached Python Dependencies
+        uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
+        with:
+          path: |
+            ~/.cache/pip
+            **/src/leapfrogai_api
+            **/src/leapfrogai_sdk
+          key: pytest-integration-pip-${{ github.ref }}
+
+      - name: Setup Python
+        uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c #v5.0.0
+        with:
+          python-version-file: "pyproject.toml"
 
       - name: Install Python Deps
         run: pip install ".[dev]" "src/leapfrogai_api" "src/leapfrogai_sdk"
 
-      - name: Run Pytest
-        run: python -m pytest tests/pytest -v
+      - name: Setup Repeater
         env:
-          LFAI_RUN_REPEATER_TESTS: true
+          LOCAL_VERSION: e2e-test
+        run: |
+          make docker-repeater
+          docker run -p 50051:50051 -d --name=repeater ghcr.io/defenseunicorns/leapfrogai/repeater:$LOCAL_VERSION
+
+      - name: Setup UDS Cluster
+        uses: ./.github/actions/uds-cluster
+        with:
+          registry1Username: ${{ secrets.IRON_BANK_ROBOT_USERNAME }}
+          registry1Password: ${{ secrets.IRON_BANK_ROBOT_PASSWORD }}
+          ghToken: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Setup API and Supabase
+        uses: ./.github/actions/lfai-core
+
+      - name: Generate Secrets
+        id: generate_secrets
+        run: |
+          SUPABASE_PASS=$(cat <(openssl rand -base64 32 | tr -dc 'a-zA-Z0-9!@#$%^&*()_+-=[]{}|;:,.<>?' | head -c 20) <(echo '!@1Aa') | fold -w1 | shuf | tr -d '\n')
+          echo "::add-mask::$SUPABASE_PASS"
+          echo "SUPABASE_PASS=$SUPABASE_PASS" >> $GITHUB_OUTPUT
+          SUPABASE_ANON_KEY=$(uds zarf tools kubectl get secret supabase-bootstrap-jwt -n leapfrogai -o jsonpath='{.data.anon-key}' | base64 -d)
+          echo "::add-mask::$SUPABASE_ANON_KEY"
+          echo "SUPABASE_ANON_KEY=$SUPABASE_ANON_KEY" >> $GITHUB_OUTPUT
+
+      - name: Verify Secrets
+        run: |
+          echo "SUPABASE_ANON_KEY is set: ${{ steps.generate_secrets.outputs.SUPABASE_ANON_KEY != '' }}"
+          echo "SUPABASE_PASS is set: ${{ steps.generate_secrets.outputs.SUPABASE_PASS != '' }}"
+
+      - name: Setup Text-Embeddings
+        run: |
+          make build-text-embeddings LOCAL_VERSION=e2e-test
+          docker image prune -af
+          uds zarf package deploy packages/text-embeddings/zarf-package-text-embeddings-amd64-e2e-test.tar.zst -l=trace --confirm
+          rm packages/text-embeddings/zarf-package-text-embeddings-amd64-e2e-test.tar.zst
+
+      - name: Run Integration Tests
+        env:
+          SUPABASE_ANON_KEY: ${{ steps.generate_secrets.outputs.SUPABASE_ANON_KEY }}
+          SUPABASE_PASS: ${{ steps.generate_secrets.outputs.SUPABASE_PASS }}
+          SUPABASE_EMAIL: doug@uds.dev
+          SUPABASE_URL: https://supabase-kong.uds.dev
+          # Turn off NIAH tests that are not applicable for integration testing using the Repeater model
+          LFAI_RUN_NIAH_TESTS: "false"
+        run: |
+          make test-user-pipeline
+          env $(cat .env | xargs) python -m pytest -v -s tests/integration/api
diff --git a/.gitignore b/.gitignore
index 2b9d60287..8f3d1b52a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,6 +20,8 @@ build/
 **/*.whl
 .model/
 *.gguf
+.env.password
+.env.email
 .env
 .ruff_cache
 .branches
diff --git a/Makefile b/Makefile
index 732abe4e5..cef9bc20d 100644
--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,8 @@ help: ## Display this help information
 		| sort | awk 'BEGIN {FS = ":.*?## "}; \
 		{printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
 
-clean: ## Clean up all the things (packages, build dirs, compiled .whl files, python eggs)
+clean: ## Clean up all the things (test artifacts, packages, build dirs, compiled .whl files, python eggs)
+	-rm -rf .env .env.email .env.password .pytest_cache
 	-rm -rf .logs
 	-rm zarf-package-*.tar.zst
 	-rm packages/**/zarf-package-*.tar.zst
diff --git a/packages/repeater/Dockerfile b/packages/repeater/Dockerfile
index 4d58f46a6..e60c7ce75 100644
--- a/packages/repeater/Dockerfile
+++ b/packages/repeater/Dockerfile
@@ -26,11 +26,11 @@ ENV PATH="/leapfrogai/.venv/bin:$PATH"
 WORKDIR /leapfrogai
 
 COPY --from=builder /leapfrogai/.venv/ /leapfrogai/.venv/
-
-COPY packages/repeater/repeater.py .
+COPY packages/repeater/main.py .
+COPY packages/repeater/config.yaml .
 
 # Publish port
 EXPOSE 50051:50051
 
 # Run the repeater model
-ENTRYPOINT ["python", "-u", "repeater.py"]
+ENTRYPOINT ["python", "-m", "leapfrogai_sdk.cli", "--app-dir=.", "main:Model"]
diff --git a/packages/repeater/config.yaml b/packages/repeater/config.yaml
new file mode 100644
index 000000000..7a9ef7600
--- /dev/null
+++ b/packages/repeater/config.yaml
@@ -0,0 +1,14 @@
+# for testing purposes, not actually used by Repeater
+model:
+  source: "."
+max_context_length: 10000000000000
+stop_tokens:
+  - "</s>"
+prompt_format:
+  chat:
+    system: "{}"
+    assistant: "{}"
+    user: "{}"
+defaults:
+  top_p: 1.0
+  top_k: 0
diff --git a/packages/repeater/main.py b/packages/repeater/main.py
new file mode 100644
index 000000000..8bac4d60c
--- /dev/null
+++ b/packages/repeater/main.py
@@ -0,0 +1,71 @@
+import logging
+import os
+from typing import Any, AsyncGenerator
+
+from leapfrogai_sdk import (
+    CompletionServiceServicer,
+    EmbeddingsServiceServicer,
+    ChatCompletionServiceServicer,
+    ChatCompletionStreamServiceServicer,
+    AudioServicer,
+    GrpcContext,
+    EmbeddingRequest,
+    EmbeddingResponse,
+    Embedding,
+    AudioRequest,
+    AudioResponse,
+    NameResponse,
+    serve,
+)
+from leapfrogai_sdk.llm import LLM, GenerationConfig
+
+logging.basicConfig(
+    level=os.getenv("LFAI_LOG_LEVEL", logging.INFO),
+    format="%(name)s: %(asctime)s | %(levelname)s | %(filename)s:%(lineno)s >>> %(message)s",
+)
+logger = logging.getLogger(__name__)
+
+
+@LLM
+class Model(
+    CompletionServiceServicer,
+    EmbeddingsServiceServicer,
+    ChatCompletionServiceServicer,
+    ChatCompletionStreamServiceServicer,
+    AudioServicer,
+):
+    async def generate(
+        self, prompt: str, config: GenerationConfig
+    ) -> AsyncGenerator[str, Any]:
+        logger.info("Begin generating streamed response")
+        for char in prompt:
+            yield char  # type: ignore
+        logger.info("Streamed response complete")
+
+    async def count_tokens(self, raw_text: str) -> int:
+        return len(raw_text)
+
+    async def CreateEmbedding(
+        self,
+        request: EmbeddingRequest,
+        context: GrpcContext,
+    ) -> EmbeddingResponse:
+        return EmbeddingResponse(
+            embeddings=[Embedding(embedding=[0.0 for _ in range(10)])]
+        )
+
+    async def Transcribe(
+        self, request: AudioRequest, context: GrpcContext
+    ) -> AudioResponse:
+        return AudioResponse(
+            text="The repeater model received a transcribe request",
+            duration=1,
+            language="en",
+        )
+
+    async def Name(self, request, context):
+        return NameResponse(name="repeater")
+
+
+if __name__ == "__main__":
+    serve(Model())
diff --git a/packages/repeater/repeater.py b/packages/repeater/repeater.py
deleted file mode 100644
index 9e5dbdfd4..000000000
--- a/packages/repeater/repeater.py
+++ /dev/null
@@ -1,113 +0,0 @@
-import logging
-
-import leapfrogai_sdk
-import asyncio
-
-from leapfrogai_sdk import CompletionUsage
-from leapfrogai_sdk.chat.chat_pb2 import Usage
-
-
-class Repeater(
-    leapfrogai_sdk.CompletionServiceServicer,
-    leapfrogai_sdk.EmbeddingsServiceServicer,
-    leapfrogai_sdk.ChatCompletionServiceServicer,
-    leapfrogai_sdk.ChatCompletionStreamServiceServicer,
-    leapfrogai_sdk.AudioServicer,
-):
-    async def Complete(
-        self,
-        request: leapfrogai_sdk.CompletionRequest,
-        context: leapfrogai_sdk.GrpcContext,
-    ) -> leapfrogai_sdk.CompletionResponse:
-        result = request.prompt  # just returns what's provided
-        print(f"Repeater.Complete:  { request }")
-        completion = leapfrogai_sdk.CompletionChoice(
-            text=result, index=0, finish_reason="stop"
-        )
-        return leapfrogai_sdk.CompletionResponse(
-            choices=[completion],
-            usage=CompletionUsage(
-                prompt_tokens=len(request.prompt),
-                completion_tokens=len(request.prompt),
-                total_tokens=len(request.prompt) * 2,
-            ),
-        )
-
-    async def CompleteStream(
-        self,
-        request: leapfrogai_sdk.CompletionRequest,
-        context: leapfrogai_sdk.GrpcContext,
-    ) -> leapfrogai_sdk.CompletionResponse:
-        for _ in range(5):
-            completion = leapfrogai_sdk.CompletionChoice(
-                text=request.prompt, index=0, finish_reason="stop"
-            )
-            yield leapfrogai_sdk.CompletionResponse(
-                choices=[completion],
-                usage=CompletionUsage(
-                    prompt_tokens=len(request.prompt),
-                    completion_tokens=len(request.prompt),
-                    total_tokens=len(request.prompt) * 2,
-                ),
-            )
-
-    async def CreateEmbedding(
-        self,
-        request: leapfrogai_sdk.EmbeddingRequest,
-        context: leapfrogai_sdk.GrpcContext,
-    ) -> leapfrogai_sdk.EmbeddingResponse:
-        return leapfrogai_sdk.EmbeddingResponse(
-            embeddings=[leapfrogai_sdk.Embedding(embedding=[0.0 for _ in range(10)])]
-        )
-
-    async def ChatComplete(
-        self,
-        request: leapfrogai_sdk.ChatCompletionRequest,
-        context: leapfrogai_sdk.GrpcContext,
-    ) -> leapfrogai_sdk.ChatCompletionResponse:
-        completion = leapfrogai_sdk.ChatCompletionChoice(
-            chat_item=request.chat_items[0], finish_reason="stop"
-        )
-        return leapfrogai_sdk.ChatCompletionResponse(
-            choices=[completion],
-            usage=Usage(
-                prompt_tokens=len(request.chat_items[0].content),
-                completion_tokens=len(request.chat_items[0].content),
-                total_tokens=len(request.chat_items[0].content) * 2,
-            ),
-        )
-
-    async def ChatCompleteStream(
-        self,
-        request: leapfrogai_sdk.ChatCompletionRequest,
-        context: leapfrogai_sdk.GrpcContext,
-    ) -> leapfrogai_sdk.ChatCompletionResponse:
-        for _ in range(5):
-            completion = leapfrogai_sdk.ChatCompletionChoice(
-                chat_item=request.chat_items[0], finish_reason="stop"
-            )
-            yield leapfrogai_sdk.ChatCompletionResponse(
-                choices=[completion],
-                usage=Usage(
-                    prompt_tokens=len(request.chat_items[0].content),
-                    completion_tokens=len(request.chat_items[0].content),
-                    total_tokens=len(request.chat_items[0].content) * 2,
-                ),
-            )
-
-    async def Transcribe(
-        self, request: leapfrogai_sdk.AudioRequest, context: leapfrogai_sdk.GrpcContext
-    ) -> leapfrogai_sdk.AudioResponse:
-        return leapfrogai_sdk.AudioResponse(
-            text="The repeater model received a transcribe request",
-            duration=1,
-            language="en",
-        )
-
-    async def Name(self, request, context):
-        return leapfrogai_sdk.NameResponse(name="repeater")
-
-
-if __name__ == "__main__":
-    logging.basicConfig(level=logging.INFO)
-    asyncio.run(leapfrogai_sdk.serve(Repeater()))
diff --git a/src/leapfrogai_api/pyproject.toml b/src/leapfrogai_api/pyproject.toml
index 8a46a46e2..7e9921825 100644
--- a/src/leapfrogai_api/pyproject.toml
+++ b/src/leapfrogai_api/pyproject.toml
@@ -18,7 +18,8 @@ dependencies = [
     "supabase == 2.6.0",
     "langchain == 0.2.1",
     "langchain-community == 0.2.1",
-    "unstructured[md,xlsx,pptx] == 0.15.3", # Only specify necessary filetypes to prevent package bloat (e.g. 130MB vs 6GB)
+    "unstructured[md,xlsx,pptx] == 0.15.9", # Only specify necessary filetypes to prevent package bloat (e.g. 130MB vs 6GB)
+    "nltk == 3.9.1",                        # Required for pickled code containing .pptx parsing dependencies
     "pylibmagic == 0.5.0",                  # Resolves issue with libmagic not being bundled with OS - https://github.com/ahupp/python-magic/issues/233, may not be needed after this is merged https://github.com/ahupp/python-magic/pull/294
     "python-magic == 0.4.27",
     "storage3==0.7.6",                      # required by supabase, bug when using previous versions
diff --git a/tests/Makefile b/tests/Makefile
index 53441cd0f..b2bb66861 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -1,41 +1,71 @@
-SUPABASE_URL := https://supabase-kong.uds.dev
+SUPABASE_URL ?= https://supabase-kong.uds.dev
+LEAPFROGAI_MODEL ?= llama-cpp-python
+LFAI_RUN_NIAH_TESTS ?= false
+LFAI_RUN_REPEATER_TESTS ?= true
+
+install:
+	pip install ".[dev]" "src/leapfrogai_api" "src/leapfrogai_sdk" "packages/repeater"
 
 set-supabase:
-	SUPABASE_URL := ${SUPABASE_URL}
-	SUPABASE_ANON_KEY := $(shell uds zarf tools kubectl get secret -n leapfrogai supabase-bootstrap-jwt -o json | uds zarf tools yq '.data.anon-key' | base64 -d)
+	$(eval SUPABASE_URL := $(SUPABASE_URL))
+	$(eval SUPABASE_ANON_KEY := $(shell uds zarf tools kubectl get secret -n leapfrogai supabase-bootstrap-jwt -o json | uds zarf tools yq '.data.anon-key' | base64 -d))
 
 define get_jwt_token
-	echo "Getting JWT token from ${SUPABASE_URL}..."; \
-	TOKEN_RESPONSE=$$(curl -s -X POST $(1) \
-	-H "apikey: ${SUPABASE_ANON_KEY}" \
+	echo "Getting JWT token from $(3)..."; \
+	echo "Email: $(1)"; \
+	echo "Password: $(2)"; \
+	TOKEN_RESPONSE=$$(curl -s -X POST $(3) \
+	-H "apikey: $(SUPABASE_ANON_KEY)" \
 	-H "Content-Type: application/json" \
-	-d '{ "email": "admin@localhost", "password": "$$SUPABASE_PASS"}'); \
-	echo "Extracting token from $${TOKEN_RESPONSE}"; \
-	JWT=$$(echo $${TOKEN_RESPONSE} | grep -o '"access_token":"[^"]*' | cut -d '"' -f 4); \
+	-d "{\"email\": \"$(1)\", \"password\": \"$(2)\"}"); \
+	echo "Extracting token from $$TOKEN_RESPONSE"; \
+	JWT=$$(echo $$TOKEN_RESPONSE | grep -o '"access_token":"[^"]*' | cut -d '"' -f 4); \
 	echo "SUPABASE_USER_JWT=$$JWT" > .env; \
-	echo "SUPABASE_URL=$$SUPABASE_URL" >> .env; \
-	echo "SUPABASE_ANON_KEY=$$SUPABASE_ANON_KEY" >> .env; \
+	echo "SUPABASE_URL=$(SUPABASE_URL)" >> .env; \
+	echo "SUPABASE_ANON_KEY=$(SUPABASE_ANON_KEY)" >> .env; \
+	echo "LEAPFROGAI_MODEL=$(LEAPFROGAI_MODEL)" >> .env
 	echo "DONE - variables exported to .env file"
 endef
 
-test-user: set-supabase
-	@read -s -p "Enter a new DEV API password: " SUPABASE_PASS; echo; \
-	echo "Creating new supabase user..."; \
-	$(call get_jwt_token,"${SUPABASE_URL}/auth/v1/signup")
+prompt-email:
+	@echo "Enter your email address: "; \
+	read SUPABASE_EMAIL; \
+	echo $$SUPABASE_EMAIL > .env.email
+
+prompt-password:
+	@echo "Enter your DEV API password: "; \
+	read SUPABASE_PASS; \
+	echo $$SUPABASE_PASS > .env.password
+
+test-user: set-supabase prompt-email prompt-password
+	SUPABASE_EMAIL=$$(cat .env.email | tr -d '\n'); \
+	SUPABASE_PASS=$$(cat .env.password | tr -d '\n'); \
+	$(call get_jwt_token,$$SUPABASE_EMAIL,$$SUPABASE_PASS,"$(SUPABASE_URL)/auth/v1/signup")
 
-test-env: set-supabase
-	@read -s -p "Enter your DEV API password: " SUPABASE_PASS; echo; \
-	$(call get_jwt_token,"${SUPABASE_URL}/auth/v1/token?grant_type=password")
+# Setup for pipeline tests with no interactive terminal
+test-user-pipeline: set-supabase
+	$(call get_jwt_token,$(SUPABASE_EMAIL),$(SUPABASE_PASS),"$(SUPABASE_URL)/auth/v1/signup")
+	$(call get_jwt_token,$(SUPABASE_EMAIL),$(SUPABASE_PASS),"$(SUPABASE_URL)/auth/v1/token?grant_type=password")
 
-test-api-integration: set-supabase
-	source .env; PYTHONPATH=$$(pwd) pytest -vv -s tests/integration/api
+test-env: test-user
+	SUPABASE_EMAIL=$$(cat .env.email | tr -d '\n'); \
+	SUPABASE_PASS=$$(cat .env.password | tr -d '\n'); \
+	$(call get_jwt_token,$$SUPABASE_EMAIL,$$SUPABASE_PASS,"$(SUPABASE_URL)/auth/v1/token?grant_type=password")
+
+test-api-integration:
+	@if [ ! -f .env ]; then \
+		echo ".env file not found!"; \
+		exit 1; \
+	fi
+	@if ! grep -q SUPABASE_USER_JWT .env || ! grep -q SUPABASE_URL .env || ! grep -q SUPABASE_ANON_KEY .env; then \
+		echo "Required environment variables (SUPABASE_USER_JWT, SUPABASE_URL, SUPABASE_ANON_KEY) are missing in .env!"; \
+		exit 1; \
+	fi
+	@env $$(cat .env | xargs) LFAI_RUN_NIAH_TESTS=$(LFAI_RUN_NIAH_TESTS) PYTHONPATH=$$(pwd) pytest -vv -s tests/integration/api
 
 test-api-unit: set-supabase
-	PYTHONPATH=$$(pwd) pytest -vv -s tests/unit
+	LFAI_RUN_REPEATER_TESTS=$(LFAI_RUN_REPEATER_TESTS) PYTHONPATH=$$(pwd) pytest -vv -s tests/unit
+	LFAI_RUN_REPEATER_TESTS=$(LFAI_RUN_REPEATER_TESTS) PYTHONPATH=$$(pwd) python -m pytest -vv -s tests/pytest
 
 test-load:
-	locust -f ${PWD}/tests/load/loadtest.py --web-port 8089
-
-debug: set-supabase
-	@echo ${SUPABASE_URL}
-	@echo ${SUPABASE_ANON_KEY}
+	locust -f $$(pwd)/tests/load/loadtest.py --web-port 8089
diff --git a/tests/README.md b/tests/README.md
index 46951f643..9b09bb82f 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -4,7 +4,7 @@ This document outlines tests related to the LeapfrogAI API and backends.
 
 Please see the [documentation in the LeapfrogAI UI sub-directory](../src/leapfrogai_ui/README.md) for Svelte UI Playwright tests.
 
-## API Tests
+## API
 
 For the unit and integration tests within this directory, the following components must be running and accessible:
 
@@ -12,20 +12,34 @@ For the unit and integration tests within this directory, the following componen
 - [Repeater](../packages/repeater/README.md)
 - [Supabase](../packages/supabase/README.md)
 
-Please see the [Makefile](./Makefile) for more details. Below is a quick synopsis of the available Make targets:
+If you are running everything in a [UDS Kubernetes cluster](../k3d-gpu/README.md), you must port-forward your model (e.g., Repeater, vLLM, etc.) using the following command:
 
 ```bash
+# may be named repeater OR repeater-model depending on the rendered Helm manifests
+uds zarf connect --name=repeater-model --namespace=leapfrogai --local-port=50051 --remote-port=50051
+```
+
+If running everything via Docker containers or in a local Python environment, then ensure they are accessible based on the test configurations in each testing target's sub-directory.
+
+Please see the [Makefile](./Makefile) for more details on turning tests on/off and for setting test parameters like the default model to use. Below is a quick synopsis of the available Make targets that are **run from the root of the entire repository**:
+
+```bash
+# Install the python dependencies
+make install
+
 # create a test user for the tests
-make test-user SUPABASE_URL=https://supabase-kong.uds.dev
+# prompts for a password and email
+make test-user
 
 # setup the environment variables for the tests
-make test-env SUPABASE_URL=https://supabase-kong.uds.dev
+# prompts for the previous step's password and email
+make test-env
 
 # run the unit tests
-make test-api-unit SUPABASE_URL=https://supabase-kong.uds.dev
+make test-api-unit
 
 # run the integration tests
-make test-api-integration SUPABASE_URL=https://supabase-kong.uds.dev
+make test-api-integration
 ```
 
 ## Load Tests
@@ -34,7 +48,7 @@ Please see the [Load Test documentation](./load/README.md) and directory for mor
 
 ## End-To-End Tests
 
-End-to-End (E2E) tests are located in the `e2e/` sub-directory. Each E2E test runs independently based on the model backend that we are trying to test.
+End-to-End (E2E) tests are located in the `e2e/` sub-directory. Each E2E test runs independently based on the model backend that is to be tested.
 
 ### Running Tests
 
diff --git a/tests/integration/api/test_rag_files.py b/tests/integration/api/test_rag_files.py
index a5a743f6e..9ed2ad28c 100644
--- a/tests/integration/api/test_rag_files.py
+++ b/tests/integration/api/test_rag_files.py
@@ -1,6 +1,7 @@
 import os
 from pathlib import Path
 from openai.types.beta.threads.text import Text
+import pytest
 
 from ...utils.client import client_config_factory
 
@@ -23,6 +24,10 @@ def make_test_run(client, assistant, thread):
     return run
 
 
+@pytest.mark.skipif(
+    os.environ.get("LFAI_RUN_NIAH_TESTS") != "true",
+    reason="LFAI_RUN_NIAH_TESTS envvar was not set to true",
+)
 def test_rag_needle_haystack():
     config = client_config_factory("leapfrogai")
     client = config.client
diff --git a/tests/pytest/leapfrogai_api/test_api.py b/tests/pytest/leapfrogai_api/test_api.py
index a80df6b6c..7ff0fa17c 100644
--- a/tests/pytest/leapfrogai_api/test_api.py
+++ b/tests/pytest/leapfrogai_api/test_api.py
@@ -260,7 +260,7 @@ def test_chat_completion(dummy_auth_middleware):
 
         # parse finish reason
         assert "finish_reason" in response_choices[0]
-        assert "stop" == response_choices[0].get("finish_reason")
+        assert "FinishReason.STOP" == response_choices[0].get("finish_reason")
 
         # parse usage data
         response_usage = response_obj.get("usage")
@@ -283,6 +283,7 @@ def test_stream_chat_completion(dummy_auth_middleware):
     """Test the stream chat completion endpoint."""
     with TestClient(app) as client:
         input_content = "this is the stream chat completion input."
+        input_length = len(input_content)
 
         chat_completion_request = lfai_types.ChatCompletionRequest(
             model="repeater",
@@ -314,19 +315,32 @@ def test_stream_chat_completion(dummy_auth_middleware):
                     assert len(choices) == 1
                     assert "delta" in choices[0]
                     assert "content" in choices[0].get("delta")
-                    assert choices[0].get("delta").get("content") == input_content
+                    assert (
+                        choices[0].get("delta").get("content")
+                        == input_content[iter_length]
+                    )
                     iter_length += 1
                     # parse finish reason
                     assert "finish_reason" in choices[0]
-                    assert "stop" == choices[0].get("finish_reason")
+                    # in streaming responses, the stop reason is not STOP until the last iteration (token) is sent back
+                    if iter_length == input_length:
+                        assert "FinishReason.STOP" == choices[0].get("finish_reason")
+                    else:
+                        assert "FinishReason.NONE" == choices[0].get("finish_reason")
                     # parse usage data
                     response_usage = stream_response.get("usage")
                     prompt_tokens = response_usage.get("prompt_tokens")
                     completion_tokens = response_usage.get("completion_tokens")
                     total_tokens = response_usage.get("total_tokens")
-                    assert prompt_tokens == len(input_content)
-                    assert completion_tokens == len(input_content)
-                    assert total_tokens == len(input_content) * 2
-
-        # The repeater only response with 5 messages
-        assert iter_length == 5
+                    # in streaming responses, the length is not returned until the last iteration (token) is sent back
+                    if iter_length == input_length:
+                        assert prompt_tokens == input_length
+                        assert completion_tokens == input_length
+                        assert total_tokens == input_length * 2
+                    else:
+                        assert total_tokens == 0
+                        assert completion_tokens == 0
+                        assert total_tokens == 0
+
+        # The repeater only responds with 1 message, the exact one that was prompted
+        assert iter_length == input_length
diff --git a/tests/utils/client.py b/tests/utils/client.py
index 7a58b02f5..0baf6c0dc 100644
--- a/tests/utils/client.py
+++ b/tests/utils/client.py
@@ -3,7 +3,7 @@
 from pathlib import Path
 
 
-LEAPFROGAI_MODEL = "llama-cpp-python"
+LEAPFROGAI_MODEL = os.getenv("LEAPFROGAI_MODEL", "llama-cpp-python")
 OPENAI_MODEL = "gpt-4o-mini"
 
 
@@ -17,7 +17,9 @@ def openai_client():
 
 def leapfrogai_client():
     return OpenAI(
-        base_url=os.getenv("LEAPFROGAI_API_URL"),
+        base_url=os.getenv(
+            "LEAPFROGAI_API_URL", "https://leapfrogai-api.uds.dev/openai/v1"
+        ),
         api_key=os.getenv("SUPABASE_USER_JWT"),
     )
 
@@ -31,7 +33,7 @@ def __init__(self, client: OpenAI, model: str):
         self.model = model
 
 
-def client_config_factory(client_name) -> ClientConfig:
+def client_config_factory(client_name: str) -> ClientConfig:
     if client_name == "openai":
         return ClientConfig(client=openai_client(), model=OPENAI_MODEL)
     elif client_name == "leapfrogai":