Skip to content

Commit

Permalink
Simplify startup scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
tnunamak committed Mar 5, 2024
1 parent d8bf74d commit eb707f1
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 65 deletions.
10 changes: 7 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ Bring your personal data to life! Selfie offers OpenAI-compatible APIs that brin
* Hosted LLMs from OpenAI, Replicate, etc. are supported too.
* APIs for directly and selectively querying your data in natural language.

### Personalized Chat
### Personalized Chat

<img alt="selfie-augmentation" src="./docs/images/playground-use-data.png" height="300px">

Expand Down Expand Up @@ -50,15 +50,17 @@ curl -X POST 'http://localhost:8181/v1/chat/completions' \

## Quick Start

For MacOS and Linux:

1. Install [python](https://www.python.org) 3.9+, [poetry](https://python-poetry.org), and [Node.js](https://nodejs.org).
2. Clone or [download](https://github.com/vana-com/selfie/archive/refs/heads/main.zip) the repository.
3. Run `start.sh`.
4. http://localhost:8181 will open in your default web browser.

> **Tip**: Python 3.11 is recommended.
> **Tip**: On macOS you can run `brew install poetry nodejs` with [brew](https://brew.sh).
For Windows, please follow the instructions in [Installation](#installation).

## Overview

Selfie is designed to compose well with tools on both sides of the text generation process. You can think of it as middleware that intelligently mixes your data into a request.
Expand All @@ -84,6 +86,8 @@ On the LLM side, Selfie uses tools like LiteLLM and txtai to support forwarding

For most users, the easiest way to install Selfie is to follow the [Quick Start](#quick-start) instructions. If that doesn't work, or if you just want to install Selfie manually, follow the detailed instructions below.

> **Tip**: Python 3.11 is recommended.
<details>
<summary>Manual Installation</summary>

Expand Down
58 changes: 9 additions & 49 deletions scripts/llama-cpp-python-cublas.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,76 +21,36 @@ detect_cpu_arch() {
echo $CPU_ARCH
}

detect_platform() {
OS_NAME=$(uname -s)
OS_ARCH=$(uname -m)
if [ "$OS_NAME" == "Linux" ]; then
PLATFORM="manylinux_2_31_x86_64"
elif [ "$OS_NAME" == "Darwin" ]; then
PLATFORM="macosx_$(sw_vers -productVersion | cut -d. -f1-2)_$(uname -m)"
else
PLATFORM="unsupported"
fi
echo $PLATFORM
}

detect_gpu_acceleration() {
CUDA_VERSION=""
ROCM_VERSION=""
ACCELERATION="cpu"

if command -v nvcc &> /dev/null; then
CUDA_VERSION=$(nvcc --version | grep "release" | awk '{print $6}' | cut -d'.' -f1-2 | sed 's/[^0-9]//g')
CUDA_VERSION=$(nvcc --version | awk '/release/ {print $5}' | cut -d',' -f1 | tr -cd '[0-9]')

ACCELERATION="cu$CUDA_VERSION"
elif command -v rocm-info &> /dev/null; then
ROCM_VERSION=$(rocm-info | grep -oP 'Version:\s+\K[0-9.]+')
ROCM_VERSION=$(rocm-info | awk '/Version:/ {print $2}' | tr -d '.')
ACCELERATION="rocm$ROCM_VERSION"
elif [ "$(uname -s)" == "Darwin" ]; then
ACCELERATION="metal"
ACCELERATION="cpu"
fi

echo "$ACCELERATION"
}

detect_latest_accelerated_version() {
get_index_url() {
CPU_ARCH=$(detect_cpu_arch)
PLATFORM=$(detect_platform)
ACCELERATION=$(detect_gpu_acceleration)
PYTHON_VERSION=$(python --version 2>&1 | grep -oP 'Python \K[0-9]+\.[0-9]+')
PYTHON_VERSION_CONCATENATED=$(echo $PYTHON_VERSION | tr -d '.') # Convert to e.g., 311

URL="https://jllllll.github.io/llama-cpp-python-cuBLAS-wheels/${CPU_ARCH}/${ACCELERATION}/llama-cpp-python/"
LATEST_WHEEL=$(curl -s $URL | grep -oP "href=\"\K(.*?cp${PYTHON_VERSION_CONCATENATED}.*?${PLATFORM}.*?\.whl)" | sort -V | tail -n 1)

if [ -z "$LATEST_WHEEL" ]; then
echo "No suitable wheel file found for the current configuration."
exit 1
fi

echo "$LATEST_WHEEL"
echo "https://jllllll.github.io/llama-cpp-python-cuBLAS-wheels/${CPU_ARCH}/${ACCELERATION}"
}

check_and_install() {
LATEST_WHEEL=$(detect_latest_accelerated_version)
if [ -z "$LATEST_WHEEL" ]; then
echo "WARNING: Unable to find a compatible wheel file, installing an unaccelerated version."
python -m pip install llama-cpp-python
fi

WHL_FILE=$(basename "$LATEST_WHEEL")
LATEST_VERSION=$(echo "$WHL_FILE" | grep -oP "llama_cpp_python-\K([0-9]+\.[0-9]+\.[0-9]+(\+[a-z0-9]+)?)")

INSTALLED_VERSION=$(pip list --format=freeze | grep "llama_cpp_python==" | cut -d'=' -f3 || echo "")

if [ "$INSTALLED_VERSION" = "$LATEST_VERSION" ]; then
echo "The latest version of llama-cpp-python ($LATEST_VERSION) is already installed."
else
echo "Installing the latest version of llama-cpp-python ($LATEST_VERSION) for your system ($INSTALLED_VERSION) is installed)"
python -m pip install --prefer-binary --force-reinstall "$LATEST_WHEEL"
fi
}

echo "Checking for llama-cpp-python installation..."
check_and_install
echo "Installing accelerated llama-cpp-python..."
poetry run python -m pip install llama-cpp-python --prefer-binary --force-reinstall --extra-index-url="$(get_index_url)"

echo "Installation complete. Please check for any errors above."

1 change: 0 additions & 1 deletion selfie/embeddings/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ async def completion_async(prompt):

self.character_name = character_name
self.embeddings = Embeddings(
path="sentence-transformers/all-MiniLM-L6-v2",
sqlite={"wal": True},
# For now, sqlite w/the default driver is the only way to use WAL.
content=True
Expand Down
19 changes: 7 additions & 12 deletions start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,25 +19,20 @@ if [ ! -z "$MISSING_DEPENDENCIES" ]; then
exit 1
fi

if command -v nvcc &>/dev/null || command -v rocm-info &>/dev/null || [ "$(uname -m)" = "arm64" ]; then
GPU_FLAG="--gpu"
else
GPU_FLAG=""
fi

echo "Installing Python dependencies with Poetry..."
poetry check || poetry install

echo "Building UI with Yarn..."
./scripts/build-ui.sh

ACCELERATION_FLAG=""

echo "Running llama-cpp-python-cublas.sh to enable hardware acceleration..."
./scripts/llama-cpp-python-cublas.sh

LLAMA_CPP_VERSION=$(poetry run pip list --format=freeze | grep "llama_cpp_python==" | cut -d'=' -f3)

if [[ $LLAMA_CPP_VERSION == *"-gpu"* ]]; then
echo "Accelerated version of llama_cpp_python detected. Enabling GPU support."
ACCELERATION_FLAG="--gpu"
else
echo "No accelerated version of llama_cpp_python detected. Running without GPU support."
fi

echo "Running selfie..."
poetry run $PYTHON_COMMAND -m selfie $ACCELERATION_FLAG
poetry run python -m selfie $GPU_FLAG

0 comments on commit eb707f1

Please sign in to comment.