Skip to content

Commit

Permalink
Add /upstream endpoint (#30)
Browse files Browse the repository at this point in the history
* remove catch-all route to upstream proxy (it was broken anyways)
* add /upstream/:model_id to swap and route to upstream path
* add /upstream HTML endpoint and unlisted option
* add /upstream endpoint to show a list of available models
* add `unlisted` configuration option to omit a model from /v1/models and /upstream lists
* add favicon.ico
  • Loading branch information
mostlygeek authored Dec 17, 2024
1 parent 7183f6b commit 891f6a5
Show file tree
Hide file tree
Showing 7 changed files with 78 additions and 22 deletions.
11 changes: 9 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,16 @@ llama-swap is an OpenAI API compatible server that gives you complete control ov
Features:

- ✅ Easy to deploy: single binary with no dependencies
-Single yaml configuration file
-Easy to config: single yaml file
- ✅ On-demand model switching
- ✅ Full control over server settings per model
- ✅ OpenAI API support (`v1/completions` and `v1/chat/completions`)
- ✅ Multiple GPU support
- ✅ Run multiple models at once with `profiles`
- ✅ Remote log monitoring at `/log`
- ✅ Automatic unloading of models from GPUs after timeout
- ✅ Use any local server that provides an OpenAI compatible API (llama.cpp, vllm, tabblyAPI, etc)
- ✅ Use any local OpenAI compatible server (llama.cpp, vllm, tabblyAPI, etc)
- ✅ Direct access to proxied upstream HTTP server via `/upstream/:model_id`

## Releases

Expand Down Expand Up @@ -73,6 +74,12 @@ models:
--model path/to/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf
proxy: http://127.0.0.1:8999

# unlisted models do not show up in /v1/models or /upstream lists
# but they can still be requested as normal
"qwen-unlisted":
cmd: llama-server --port 9999 -m Llama-3.2-1B-Instruct-Q4_K_M.gguf -ngl 0
unlisted: true

# profiles make it easy to managing multi model (and gpu) configurations.
#
# Tips:
Expand Down
3 changes: 3 additions & 0 deletions config.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ models:
- env1=hello
cmd: build/simple-responder --port 8999
proxy: http://127.0.0.1:8999
unlisted: true

# use "none" to skip check. Caution this may cause some requests to fail
# until the upstream server is ready for traffic
Expand All @@ -42,9 +43,11 @@ models:
"broken":
cmd: models/llama-server-osx --port 8999 -m models/doesnotexist.gguf
proxy: http://127.0.0.1:8999
unlisted: true
"broken_timeout":
cmd: models/llama-server-osx --port 8999 -m models/qwen2.5-0.5b-instruct-q8_0.gguf
proxy: http://127.0.0.1:9000
unlisted: true

# creating a coding profile with models for code generation and general questions
profiles:
Expand Down
Binary file added misc/assets/favicon-raw.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions proxy/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ type ModelConfig struct {
Env []string `yaml:"env"`
CheckEndpoint string `yaml:"checkEndpoint"`
UnloadAfter int `yaml:"ttl"`
Unlisted bool `yaml:"unlisted"`
}

func (m *ModelConfig) SanitizedCommand() ([]string, error) {
Expand Down
Binary file added proxy/html/favicon.ico
Binary file not shown.
78 changes: 65 additions & 13 deletions proxy/proxymanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@ package proxy

import (
"bytes"
"embed"
"encoding/json"
"fmt"
"io"
"net/http"
"sort"
"strconv"
"strings"
"sync"
Expand All @@ -18,6 +20,15 @@ const (
PROFILE_SPLIT_CHAR = ":"
)

//go:embed html/favicon.ico
var faviconData []byte

//go:embed html/logs.html
var logsHTML []byte

// make sure embed is kept there by the IDE auto-package importer
var _ = embed.FS{}

type ProxyManager struct {
sync.Mutex

Expand Down Expand Up @@ -48,7 +59,12 @@ func New(config *Config) *ProxyManager {
pm.ginEngine.GET("/logs/stream", pm.streamLogsHandler)
pm.ginEngine.GET("/logs/streamSSE", pm.streamLogsHandlerSSE)

pm.ginEngine.NoRoute(pm.proxyNoRouteHandler)
pm.ginEngine.GET("/upstream", pm.upstreamIndex)
pm.ginEngine.Any("/upstream/:model_id/*upstreamPath", pm.proxyToUpstream)

pm.ginEngine.GET("/favicon.ico", func(c *gin.Context) {
c.Data(http.StatusOK, "image/x-icon", faviconData)
})

// Disable console color for testing
gin.DisableConsoleColor()
Expand Down Expand Up @@ -86,7 +102,11 @@ func (pm *ProxyManager) stopProcesses() {

func (pm *ProxyManager) listModelsHandler(c *gin.Context) {
data := []interface{}{}
for id := range pm.config.Models {
for id, modelConfig := range pm.config.Models {
if modelConfig.Unlisted {
continue
}

data = append(data, map[string]interface{}{
"id": id,
"object": "model",
Expand All @@ -113,7 +133,7 @@ func (pm *ProxyManager) swapModel(requestedModel string) (*Process, error) {
pm.Lock()
defer pm.Unlock()

// Check if requestedModel contains a /
// Check if requestedModel contains a PROFILE_SPLIT_CHAR
profileName, modelName := "", requestedModel
if idx := strings.Index(requestedModel, PROFILE_SPLIT_CHAR); idx != -1 {
profileName = requestedModel[:idx]
Expand Down Expand Up @@ -170,6 +190,48 @@ func (pm *ProxyManager) swapModel(requestedModel string) (*Process, error) {
return pm.currentProcesses[requestedProcessKey], nil
}

func (pm *ProxyManager) proxyToUpstream(c *gin.Context) {
requestedModel := c.Param("model_id")

if requestedModel == "" {
c.AbortWithError(http.StatusBadRequest, fmt.Errorf("model id required in path"))
return
}

if process, err := pm.swapModel(requestedModel); err != nil {
c.AbortWithError(http.StatusNotFound, fmt.Errorf("unable to swap to model, %s", err.Error()))
} else {
// rewrite the path
c.Request.URL.Path = c.Param("upstreamPath")
process.ProxyRequest(c.Writer, c.Request)
}
}

func (pm *ProxyManager) upstreamIndex(c *gin.Context) {
var html strings.Builder

html.WriteString("<!doctype HTML>\n<html><body><h1>Available Models</h1><ul>")

// Extract keys and sort them
var modelIDs []string
for modelID, modelConfig := range pm.config.Models {
if modelConfig.Unlisted {
continue
}

modelIDs = append(modelIDs, modelID)
}
sort.Strings(modelIDs)

// Iterate over sorted keys
for _, modelID := range modelIDs {
html.WriteString(fmt.Sprintf("<li><a href=\"/upstream/%s\">%s</a></li>", modelID, modelID))
}
html.WriteString("</ul></body></html>")
c.Header("Content-Type", "text/html")
c.String(http.StatusOK, html.String())
}

func (pm *ProxyManager) proxyChatRequestHandler(c *gin.Context) {
bodyBytes, err := io.ReadAll(c.Request.Body)
if err != nil {
Expand Down Expand Up @@ -201,16 +263,6 @@ func (pm *ProxyManager) proxyChatRequestHandler(c *gin.Context) {
}
}

func (pm *ProxyManager) proxyNoRouteHandler(c *gin.Context) {
// since maps are unordered, just use the first available process if one exists
for _, process := range pm.currentProcesses {
process.ProxyRequest(c.Writer, c.Request)
return
}

c.AbortWithError(http.StatusBadRequest, fmt.Errorf("no strategy to handle request"))
}

func ProcessKeyName(groupName, modelName string) string {
return groupName + PROFILE_SPLIT_CHAR + modelName
}
7 changes: 0 additions & 7 deletions proxy/proxymanager_loghandlers.go
Original file line number Diff line number Diff line change
@@ -1,20 +1,13 @@
package proxy

import (
"embed"
"fmt"
"net/http"
"strings"

"github.com/gin-gonic/gin"
)

//go:embed html/logs.html
var logsHTML []byte

// make sure embed is kept there by the IDE auto-package importer
var _ = embed.FS{}

func (pm *ProxyManager) sendLogsHandlers(c *gin.Context) {

accept := c.GetHeader("Accept")
Expand Down

0 comments on commit 891f6a5

Please sign in to comment.