diff --git a/README.md b/README.md index 1d2fdfc..4135ad0 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ Bring your personal data to life! Selfie offers OpenAI-compatible APIs that brin * Hosted LLMs from OpenAI, Replicate, etc. are supported too. * APIs for directly and selectively querying your data in natural language. -### Personalized Chat +### Personalized Chat selfie-augmentation @@ -50,15 +50,17 @@ curl -X POST 'http://localhost:8181/v1/chat/completions' \ ## Quick Start +For MacOS and Linux: + 1. Install [python](https://www.python.org) 3.9+, [poetry](https://python-poetry.org), and [Node.js](https://nodejs.org). 2. Clone or [download](https://github.com/vana-com/selfie/archive/refs/heads/main.zip) the repository. 3. Run `start.sh`. 4. http://localhost:8181 will open in your default web browser. -> **Tip**: Python 3.11 is recommended. - > **Tip**: On macOS you can run `brew install poetry nodejs` with [brew](https://brew.sh). +For Windows, please follow the instructions in [Installation](#installation). + ## Overview Selfie is designed to compose well with tools on both sides of the text generation process. You can think of it as middleware that intelligently mixes your data into a request. @@ -84,6 +86,8 @@ On the LLM side, Selfie uses tools like LiteLLM and txtai to support forwarding For most users, the easiest way to install Selfie is to follow the [Quick Start](#quick-start) instructions. If that doesn't work, or if you just want to install Selfie manually, follow the detailed instructions below. +> **Tip**: Python 3.11 is recommended. +
Manual Installation diff --git a/scripts/llama-cpp-python-cublas.sh b/scripts/llama-cpp-python-cublas.sh index 49ff0c2..ae98262 100755 --- a/scripts/llama-cpp-python-cublas.sh +++ b/scripts/llama-cpp-python-cublas.sh @@ -21,18 +21,6 @@ detect_cpu_arch() { echo $CPU_ARCH } -detect_platform() { - OS_NAME=$(uname -s) - OS_ARCH=$(uname -m) - if [ "$OS_NAME" == "Linux" ]; then - PLATFORM="manylinux_2_31_x86_64" - elif [ "$OS_NAME" == "Darwin" ]; then - PLATFORM="macosx_$(sw_vers -productVersion | cut -d. -f1-2)_$(uname -m)" - else - PLATFORM="unsupported" - fi - echo $PLATFORM -} detect_gpu_acceleration() { CUDA_VERSION="" @@ -40,57 +28,29 @@ detect_gpu_acceleration() { ACCELERATION="cpu" if command -v nvcc &> /dev/null; then - CUDA_VERSION=$(nvcc --version | grep "release" | awk '{print $6}' | cut -d'.' -f1-2 | sed 's/[^0-9]//g') + CUDA_VERSION=$(nvcc --version | awk '/release/ {print $5}' | cut -d',' -f1 | tr -cd '[0-9]') + ACCELERATION="cu$CUDA_VERSION" elif command -v rocm-info &> /dev/null; then - ROCM_VERSION=$(rocm-info | grep -oP 'Version:\s+\K[0-9.]+') + ROCM_VERSION=$(rocm-info | awk '/Version:/ {print $2}' | tr -d '.') ACCELERATION="rocm$ROCM_VERSION" elif [ "$(uname -s)" == "Darwin" ]; then - ACCELERATION="metal" + ACCELERATION="cpu" fi echo "$ACCELERATION" } -detect_latest_accelerated_version() { +get_index_url() { CPU_ARCH=$(detect_cpu_arch) - PLATFORM=$(detect_platform) ACCELERATION=$(detect_gpu_acceleration) - PYTHON_VERSION=$(python --version 2>&1 | grep -oP 'Python \K[0-9]+\.[0-9]+') - PYTHON_VERSION_CONCATENATED=$(echo $PYTHON_VERSION | tr -d '.') # Convert to e.g., 311 - URL="https://jllllll.github.io/llama-cpp-python-cuBLAS-wheels/${CPU_ARCH}/${ACCELERATION}/llama-cpp-python/" - LATEST_WHEEL=$(curl -s $URL | grep -oP "href=\"\K(.*?cp${PYTHON_VERSION_CONCATENATED}.*?${PLATFORM}.*?\.whl)" | sort -V | tail -n 1) - - if [ -z "$LATEST_WHEEL" ]; then - echo "No suitable wheel file found for the current configuration." - exit 1 - fi - - echo "$LATEST_WHEEL" + echo "https://jllllll.github.io/llama-cpp-python-cuBLAS-wheels/${CPU_ARCH}/${ACCELERATION}" } -check_and_install() { - LATEST_WHEEL=$(detect_latest_accelerated_version) - if [ -z "$LATEST_WHEEL" ]; then - echo "WARNING: Unable to find a compatible wheel file, installing an unaccelerated version." - python -m pip install llama-cpp-python - fi - WHL_FILE=$(basename "$LATEST_WHEEL") - LATEST_VERSION=$(echo "$WHL_FILE" | grep -oP "llama_cpp_python-\K([0-9]+\.[0-9]+\.[0-9]+(\+[a-z0-9]+)?)") - - INSTALLED_VERSION=$(pip list --format=freeze | grep "llama_cpp_python==" | cut -d'=' -f3 || echo "") - - if [ "$INSTALLED_VERSION" = "$LATEST_VERSION" ]; then - echo "The latest version of llama-cpp-python ($LATEST_VERSION) is already installed." - else - echo "Installing the latest version of llama-cpp-python ($LATEST_VERSION) for your system ($INSTALLED_VERSION) is installed)" - python -m pip install --prefer-binary --force-reinstall "$LATEST_WHEEL" - fi -} - -echo "Checking for llama-cpp-python installation..." -check_and_install +echo "Installing accelerated llama-cpp-python..." +poetry run python -m pip install llama-cpp-python --prefer-binary --force-reinstall --extra-index-url="$(get_index_url)" echo "Installation complete. Please check for any errors above." + diff --git a/selfie/embeddings/__init__.py b/selfie/embeddings/__init__.py index e874eaa..5c9870a 100644 --- a/selfie/embeddings/__init__.py +++ b/selfie/embeddings/__init__.py @@ -58,7 +58,6 @@ async def completion_async(prompt): self.character_name = character_name self.embeddings = Embeddings( - path="sentence-transformers/all-MiniLM-L6-v2", sqlite={"wal": True}, # For now, sqlite w/the default driver is the only way to use WAL. content=True diff --git a/start.sh b/start.sh index 723da6e..0ffd318 100755 --- a/start.sh +++ b/start.sh @@ -19,25 +19,20 @@ if [ ! -z "$MISSING_DEPENDENCIES" ]; then exit 1 fi +if command -v nvcc &>/dev/null || command -v rocm-info &>/dev/null || [ "$(uname -m)" = "arm64" ]; then + GPU_FLAG="--gpu" +else + GPU_FLAG="" +fi + echo "Installing Python dependencies with Poetry..." poetry check || poetry install echo "Building UI with Yarn..." ./scripts/build-ui.sh -ACCELERATION_FLAG="" - echo "Running llama-cpp-python-cublas.sh to enable hardware acceleration..." ./scripts/llama-cpp-python-cublas.sh -LLAMA_CPP_VERSION=$(poetry run pip list --format=freeze | grep "llama_cpp_python==" | cut -d'=' -f3) - -if [[ $LLAMA_CPP_VERSION == *"-gpu"* ]]; then - echo "Accelerated version of llama_cpp_python detected. Enabling GPU support." - ACCELERATION_FLAG="--gpu" -else - echo "No accelerated version of llama_cpp_python detected. Running without GPU support." -fi - echo "Running selfie..." -poetry run $PYTHON_COMMAND -m selfie $ACCELERATION_FLAG +poetry run python -m selfie $GPU_FLAG