diff --git a/README.md b/README.md
index 1d2fdfc..4135ad0 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,7 @@ Bring your personal data to life! Selfie offers OpenAI-compatible APIs that brin
* Hosted LLMs from OpenAI, Replicate, etc. are supported too.
* APIs for directly and selectively querying your data in natural language.
-### Personalized Chat
+### Personalized Chat
@@ -50,15 +50,17 @@ curl -X POST 'http://localhost:8181/v1/chat/completions' \
## Quick Start
+For MacOS and Linux:
+
1. Install [python](https://www.python.org) 3.9+, [poetry](https://python-poetry.org), and [Node.js](https://nodejs.org).
2. Clone or [download](https://github.com/vana-com/selfie/archive/refs/heads/main.zip) the repository.
3. Run `start.sh`.
4. http://localhost:8181 will open in your default web browser.
-> **Tip**: Python 3.11 is recommended.
-
> **Tip**: On macOS you can run `brew install poetry nodejs` with [brew](https://brew.sh).
+For Windows, please follow the instructions in [Installation](#installation).
+
## Overview
Selfie is designed to compose well with tools on both sides of the text generation process. You can think of it as middleware that intelligently mixes your data into a request.
@@ -84,6 +86,8 @@ On the LLM side, Selfie uses tools like LiteLLM and txtai to support forwarding
For most users, the easiest way to install Selfie is to follow the [Quick Start](#quick-start) instructions. If that doesn't work, or if you just want to install Selfie manually, follow the detailed instructions below.
+> **Tip**: Python 3.11 is recommended.
+
Manual Installation
diff --git a/scripts/llama-cpp-python-cublas.sh b/scripts/llama-cpp-python-cublas.sh
index 49ff0c2..ae98262 100755
--- a/scripts/llama-cpp-python-cublas.sh
+++ b/scripts/llama-cpp-python-cublas.sh
@@ -21,18 +21,6 @@ detect_cpu_arch() {
echo $CPU_ARCH
}
-detect_platform() {
- OS_NAME=$(uname -s)
- OS_ARCH=$(uname -m)
- if [ "$OS_NAME" == "Linux" ]; then
- PLATFORM="manylinux_2_31_x86_64"
- elif [ "$OS_NAME" == "Darwin" ]; then
- PLATFORM="macosx_$(sw_vers -productVersion | cut -d. -f1-2)_$(uname -m)"
- else
- PLATFORM="unsupported"
- fi
- echo $PLATFORM
-}
detect_gpu_acceleration() {
CUDA_VERSION=""
@@ -40,57 +28,29 @@ detect_gpu_acceleration() {
ACCELERATION="cpu"
if command -v nvcc &> /dev/null; then
- CUDA_VERSION=$(nvcc --version | grep "release" | awk '{print $6}' | cut -d'.' -f1-2 | sed 's/[^0-9]//g')
+ CUDA_VERSION=$(nvcc --version | awk '/release/ {print $5}' | cut -d',' -f1 | tr -cd '[0-9]')
+
ACCELERATION="cu$CUDA_VERSION"
elif command -v rocm-info &> /dev/null; then
- ROCM_VERSION=$(rocm-info | grep -oP 'Version:\s+\K[0-9.]+')
+ ROCM_VERSION=$(rocm-info | awk '/Version:/ {print $2}' | tr -d '.')
ACCELERATION="rocm$ROCM_VERSION"
elif [ "$(uname -s)" == "Darwin" ]; then
- ACCELERATION="metal"
+ ACCELERATION="cpu"
fi
echo "$ACCELERATION"
}
-detect_latest_accelerated_version() {
+get_index_url() {
CPU_ARCH=$(detect_cpu_arch)
- PLATFORM=$(detect_platform)
ACCELERATION=$(detect_gpu_acceleration)
- PYTHON_VERSION=$(python --version 2>&1 | grep -oP 'Python \K[0-9]+\.[0-9]+')
- PYTHON_VERSION_CONCATENATED=$(echo $PYTHON_VERSION | tr -d '.') # Convert to e.g., 311
- URL="https://jllllll.github.io/llama-cpp-python-cuBLAS-wheels/${CPU_ARCH}/${ACCELERATION}/llama-cpp-python/"
- LATEST_WHEEL=$(curl -s $URL | grep -oP "href=\"\K(.*?cp${PYTHON_VERSION_CONCATENATED}.*?${PLATFORM}.*?\.whl)" | sort -V | tail -n 1)
-
- if [ -z "$LATEST_WHEEL" ]; then
- echo "No suitable wheel file found for the current configuration."
- exit 1
- fi
-
- echo "$LATEST_WHEEL"
+ echo "https://jllllll.github.io/llama-cpp-python-cuBLAS-wheels/${CPU_ARCH}/${ACCELERATION}"
}
-check_and_install() {
- LATEST_WHEEL=$(detect_latest_accelerated_version)
- if [ -z "$LATEST_WHEEL" ]; then
- echo "WARNING: Unable to find a compatible wheel file, installing an unaccelerated version."
- python -m pip install llama-cpp-python
- fi
- WHL_FILE=$(basename "$LATEST_WHEEL")
- LATEST_VERSION=$(echo "$WHL_FILE" | grep -oP "llama_cpp_python-\K([0-9]+\.[0-9]+\.[0-9]+(\+[a-z0-9]+)?)")
-
- INSTALLED_VERSION=$(pip list --format=freeze | grep "llama_cpp_python==" | cut -d'=' -f3 || echo "")
-
- if [ "$INSTALLED_VERSION" = "$LATEST_VERSION" ]; then
- echo "The latest version of llama-cpp-python ($LATEST_VERSION) is already installed."
- else
- echo "Installing the latest version of llama-cpp-python ($LATEST_VERSION) for your system ($INSTALLED_VERSION) is installed)"
- python -m pip install --prefer-binary --force-reinstall "$LATEST_WHEEL"
- fi
-}
-
-echo "Checking for llama-cpp-python installation..."
-check_and_install
+echo "Installing accelerated llama-cpp-python..."
+poetry run python -m pip install llama-cpp-python --prefer-binary --force-reinstall --extra-index-url="$(get_index_url)"
echo "Installation complete. Please check for any errors above."
+
diff --git a/selfie/embeddings/__init__.py b/selfie/embeddings/__init__.py
index e874eaa..5c9870a 100644
--- a/selfie/embeddings/__init__.py
+++ b/selfie/embeddings/__init__.py
@@ -58,7 +58,6 @@ async def completion_async(prompt):
self.character_name = character_name
self.embeddings = Embeddings(
- path="sentence-transformers/all-MiniLM-L6-v2",
sqlite={"wal": True},
# For now, sqlite w/the default driver is the only way to use WAL.
content=True
diff --git a/start.sh b/start.sh
index 723da6e..0ffd318 100755
--- a/start.sh
+++ b/start.sh
@@ -19,25 +19,20 @@ if [ ! -z "$MISSING_DEPENDENCIES" ]; then
exit 1
fi
+if command -v nvcc &>/dev/null || command -v rocm-info &>/dev/null || [ "$(uname -m)" = "arm64" ]; then
+ GPU_FLAG="--gpu"
+else
+ GPU_FLAG=""
+fi
+
echo "Installing Python dependencies with Poetry..."
poetry check || poetry install
echo "Building UI with Yarn..."
./scripts/build-ui.sh
-ACCELERATION_FLAG=""
-
echo "Running llama-cpp-python-cublas.sh to enable hardware acceleration..."
./scripts/llama-cpp-python-cublas.sh
-LLAMA_CPP_VERSION=$(poetry run pip list --format=freeze | grep "llama_cpp_python==" | cut -d'=' -f3)
-
-if [[ $LLAMA_CPP_VERSION == *"-gpu"* ]]; then
- echo "Accelerated version of llama_cpp_python detected. Enabling GPU support."
- ACCELERATION_FLAG="--gpu"
-else
- echo "No accelerated version of llama_cpp_python detected. Running without GPU support."
-fi
-
echo "Running selfie..."
-poetry run $PYTHON_COMMAND -m selfie $ACCELERATION_FLAG
+poetry run python -m selfie $GPU_FLAG