diff --git a/README.md b/README.md
index 1d2fdfc..4135ad0 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,7 @@ Bring your personal data to life! Selfie offers OpenAI-compatible APIs that brin
 * Hosted LLMs from OpenAI, Replicate, etc. are supported too.
 * APIs for directly and selectively querying your data in natural language.
 
-### Personalized Chat 
+### Personalized Chat
 
 <img alt="selfie-augmentation" src="./docs/images/playground-use-data.png" height="300px">
 
@@ -50,15 +50,17 @@ curl -X POST 'http://localhost:8181/v1/chat/completions' \
 
 ## Quick Start
 
+For MacOS and Linux:
+
 1. Install [python](https://www.python.org) 3.9+, [poetry](https://python-poetry.org), and [Node.js](https://nodejs.org).
 2. Clone or [download](https://github.com/vana-com/selfie/archive/refs/heads/main.zip) the repository.
 3. Run `start.sh`.
 4. http://localhost:8181 will open in your default web browser.
 
-> **Tip**: Python 3.11 is recommended.
-
 > **Tip**: On macOS you can run `brew install poetry nodejs` with [brew](https://brew.sh).
 
+For Windows, please follow the instructions in [Installation](#installation).
+
 ## Overview
 
 Selfie is designed to compose well with tools on both sides of the text generation process. You can think of it as middleware that intelligently mixes your data into a request.
@@ -84,6 +86,8 @@ On the LLM side, Selfie uses tools like LiteLLM and txtai to support forwarding
 
 For most users, the easiest way to install Selfie is to follow the [Quick Start](#quick-start) instructions. If that doesn't work, or if you just want to install Selfie manually, follow the detailed instructions below.
 
+> **Tip**: Python 3.11 is recommended.
+
 <details>
 <summary>Manual Installation</summary>
 
diff --git a/scripts/llama-cpp-python-cublas.sh b/scripts/llama-cpp-python-cublas.sh
index 49ff0c2..ae98262 100755
--- a/scripts/llama-cpp-python-cublas.sh
+++ b/scripts/llama-cpp-python-cublas.sh
@@ -21,18 +21,6 @@ detect_cpu_arch() {
     echo $CPU_ARCH
 }
 
-detect_platform() {
-    OS_NAME=$(uname -s)
-    OS_ARCH=$(uname -m)
-    if [ "$OS_NAME" == "Linux" ]; then
-        PLATFORM="manylinux_2_31_x86_64"
-    elif [ "$OS_NAME" == "Darwin" ]; then
-        PLATFORM="macosx_$(sw_vers -productVersion | cut -d. -f1-2)_$(uname -m)"
-    else
-        PLATFORM="unsupported"
-    fi
-    echo $PLATFORM
-}
 
 detect_gpu_acceleration() {
     CUDA_VERSION=""
@@ -40,57 +28,29 @@ detect_gpu_acceleration() {
     ACCELERATION="cpu"
 
     if command -v nvcc &> /dev/null; then
-        CUDA_VERSION=$(nvcc --version | grep "release" | awk '{print $6}' | cut -d'.' -f1-2 | sed 's/[^0-9]//g')
+        CUDA_VERSION=$(nvcc --version | awk '/release/ {print $5}' | cut -d',' -f1 | tr -cd '[0-9]')
+
         ACCELERATION="cu$CUDA_VERSION"
     elif command -v rocm-info &> /dev/null; then
-        ROCM_VERSION=$(rocm-info | grep -oP 'Version:\s+\K[0-9.]+')
+        ROCM_VERSION=$(rocm-info | awk '/Version:/ {print $2}' | tr -d '.')
         ACCELERATION="rocm$ROCM_VERSION"
     elif [ "$(uname -s)" == "Darwin" ]; then
-        ACCELERATION="metal"
+        ACCELERATION="cpu"
     fi
 
     echo "$ACCELERATION"
 }
 
-detect_latest_accelerated_version() {
+get_index_url() {
     CPU_ARCH=$(detect_cpu_arch)
-    PLATFORM=$(detect_platform)
     ACCELERATION=$(detect_gpu_acceleration)
-    PYTHON_VERSION=$(python --version 2>&1 | grep -oP 'Python \K[0-9]+\.[0-9]+')
-    PYTHON_VERSION_CONCATENATED=$(echo $PYTHON_VERSION | tr -d '.')  # Convert to e.g., 311
 
-    URL="https://jllllll.github.io/llama-cpp-python-cuBLAS-wheels/${CPU_ARCH}/${ACCELERATION}/llama-cpp-python/"
-    LATEST_WHEEL=$(curl -s $URL | grep -oP "href=\"\K(.*?cp${PYTHON_VERSION_CONCATENATED}.*?${PLATFORM}.*?\.whl)" | sort -V | tail -n 1)
-
-    if [ -z "$LATEST_WHEEL" ]; then
-        echo "No suitable wheel file found for the current configuration."
-        exit 1
-    fi
-
-    echo "$LATEST_WHEEL"
+    echo "https://jllllll.github.io/llama-cpp-python-cuBLAS-wheels/${CPU_ARCH}/${ACCELERATION}"
 }
 
-check_and_install() {
-    LATEST_WHEEL=$(detect_latest_accelerated_version)
-    if [ -z "$LATEST_WHEEL" ]; then
-        echo "WARNING: Unable to find a compatible wheel file, installing an unaccelerated version."
-        python -m pip install llama-cpp-python
-    fi
 
-    WHL_FILE=$(basename "$LATEST_WHEEL")
-    LATEST_VERSION=$(echo "$WHL_FILE" | grep -oP "llama_cpp_python-\K([0-9]+\.[0-9]+\.[0-9]+(\+[a-z0-9]+)?)")
-
-    INSTALLED_VERSION=$(pip list --format=freeze | grep "llama_cpp_python==" | cut -d'=' -f3 || echo "")
-
-    if [ "$INSTALLED_VERSION" = "$LATEST_VERSION" ]; then
-        echo "The latest version of llama-cpp-python ($LATEST_VERSION) is already installed."
-    else
-        echo "Installing the latest version of llama-cpp-python ($LATEST_VERSION) for your system ($INSTALLED_VERSION) is installed)"
-        python -m pip install --prefer-binary --force-reinstall "$LATEST_WHEEL"
-    fi
-}
-
-echo "Checking for llama-cpp-python installation..."
-check_and_install
+echo "Installing accelerated llama-cpp-python..."
+poetry run python -m pip install llama-cpp-python --prefer-binary --force-reinstall --extra-index-url="$(get_index_url)"
 
 echo "Installation complete. Please check for any errors above."
+
diff --git a/selfie/embeddings/__init__.py b/selfie/embeddings/__init__.py
index e874eaa..5c9870a 100644
--- a/selfie/embeddings/__init__.py
+++ b/selfie/embeddings/__init__.py
@@ -58,7 +58,6 @@ async def completion_async(prompt):
 
             self.character_name = character_name
             self.embeddings = Embeddings(
-                path="sentence-transformers/all-MiniLM-L6-v2",
                 sqlite={"wal": True},
                 # For now, sqlite w/the default driver is the only way to use WAL.
                 content=True
diff --git a/start.sh b/start.sh
index 723da6e..0ffd318 100755
--- a/start.sh
+++ b/start.sh
@@ -19,25 +19,20 @@ if [ ! -z "$MISSING_DEPENDENCIES" ]; then
     exit 1
 fi
 
+if command -v nvcc &>/dev/null || command -v rocm-info &>/dev/null || [ "$(uname -m)" = "arm64" ]; then
+    GPU_FLAG="--gpu"
+else
+    GPU_FLAG=""
+fi
+
 echo "Installing Python dependencies with Poetry..."
 poetry check || poetry install
 
 echo "Building UI with Yarn..."
 ./scripts/build-ui.sh
 
-ACCELERATION_FLAG=""
-
 echo "Running llama-cpp-python-cublas.sh to enable hardware acceleration..."
 ./scripts/llama-cpp-python-cublas.sh
 
-LLAMA_CPP_VERSION=$(poetry run pip list --format=freeze | grep "llama_cpp_python==" | cut -d'=' -f3)
-
-if [[ $LLAMA_CPP_VERSION == *"-gpu"* ]]; then
-    echo "Accelerated version of llama_cpp_python detected. Enabling GPU support."
-    ACCELERATION_FLAG="--gpu"
-else
-    echo "No accelerated version of llama_cpp_python detected. Running without GPU support."
-fi
-
 echo "Running selfie..."
-poetry run $PYTHON_COMMAND -m selfie $ACCELERATION_FLAG
+poetry run python -m selfie $GPU_FLAG