diff --git a/docs/docs.go b/docs/docs.go index 0180d9c7..47679b7d 100644 --- a/docs/docs.go +++ b/docs/docs.go @@ -125,6 +125,14 @@ const docTemplate = `{ } }, "definitions": { + "clients.ClientConfig": { + "type": "object", + "properties": { + "timeout": { + "type": "integer" + } + } + }, "http.ErrorSchema": { "type": "object", "properties": { @@ -231,16 +239,35 @@ const docTemplate = `{ "id" ], "properties": { + "client": { + "$ref": "#/definitions/clients.ClientConfig" + }, "enabled": { + "description": "Is the model enabled?", "type": "boolean" }, + "error_budget": { + "type": "string" + }, "id": { + "description": "Model instance ID (unique in scope of the router)", "type": "string" }, "openai": { "$ref": "#/definitions/openai.Config" + } + } + }, + "retry.ExpRetryConfig": { + "type": "object", + "properties": { + "max_delay": { + "type": "integer" }, - "timeout": { + "max_retries": { + "type": "integer" + }, + "min_delay": { "type": "integer" } } @@ -253,22 +280,47 @@ const docTemplate = `{ ], "properties": { "enabled": { + "description": "Is router enabled?", "type": "boolean" }, "models": { + "description": "the list of models that could handle requests", "type": "array", "items": { "$ref": "#/definitions/providers.LangModelConfig" } }, + "retry": { + "description": "TODO: how to specify other backoff strategies?", + "allOf": [ + { + "$ref": "#/definitions/retry.ExpRetryConfig" + } + ] + }, "routers": { + "description": "Unique router ID", "type": "string" }, "strategy": { - "$ref": "#/definitions/strategy.RoutingStrategy" + "description": "strategy on picking the next model to serve the request", + "allOf": [ + { + "$ref": "#/definitions/routing.Strategy" + } + ] } } }, + "routing.Strategy": { + "type": "string", + "enum": [ + "priority" + ], + "x-enum-varnames": [ + "Priority" + ] + }, "schemas.ChatMessage": { "type": "object", "properties": { @@ -356,15 +408,6 @@ const docTemplate = `{ "type": "string" } } - }, - "strategy.RoutingStrategy": { - "type": "string", - "enum": [ - "priority" - ], - "x-enum-varnames": [ - "Priority" - ] } } }` diff --git a/docs/swagger.json b/docs/swagger.json index f03b8d7c..822bb8e5 100644 --- a/docs/swagger.json +++ b/docs/swagger.json @@ -122,6 +122,14 @@ } }, "definitions": { + "clients.ClientConfig": { + "type": "object", + "properties": { + "timeout": { + "type": "integer" + } + } + }, "http.ErrorSchema": { "type": "object", "properties": { @@ -228,16 +236,35 @@ "id" ], "properties": { + "client": { + "$ref": "#/definitions/clients.ClientConfig" + }, "enabled": { + "description": "Is the model enabled?", "type": "boolean" }, + "error_budget": { + "type": "string" + }, "id": { + "description": "Model instance ID (unique in scope of the router)", "type": "string" }, "openai": { "$ref": "#/definitions/openai.Config" + } + } + }, + "retry.ExpRetryConfig": { + "type": "object", + "properties": { + "max_delay": { + "type": "integer" }, - "timeout": { + "max_retries": { + "type": "integer" + }, + "min_delay": { "type": "integer" } } @@ -250,22 +277,47 @@ ], "properties": { "enabled": { + "description": "Is router enabled?", "type": "boolean" }, "models": { + "description": "the list of models that could handle requests", "type": "array", "items": { "$ref": "#/definitions/providers.LangModelConfig" } }, + "retry": { + "description": "TODO: how to specify other backoff strategies?", + "allOf": [ + { + "$ref": "#/definitions/retry.ExpRetryConfig" + } + ] + }, "routers": { + "description": "Unique router ID", "type": "string" }, "strategy": { - "$ref": "#/definitions/strategy.RoutingStrategy" + "description": "strategy on picking the next model to serve the request", + "allOf": [ + { + "$ref": "#/definitions/routing.Strategy" + } + ] } } }, + "routing.Strategy": { + "type": "string", + "enum": [ + "priority" + ], + "x-enum-varnames": [ + "Priority" + ] + }, "schemas.ChatMessage": { "type": "object", "properties": { @@ -353,15 +405,6 @@ "type": "string" } } - }, - "strategy.RoutingStrategy": { - "type": "string", - "enum": [ - "priority" - ], - "x-enum-varnames": [ - "Priority" - ] } } } \ No newline at end of file diff --git a/docs/swagger.yaml b/docs/swagger.yaml index 3cb98410..0d6e5720 100644 --- a/docs/swagger.yaml +++ b/docs/swagger.yaml @@ -1,5 +1,10 @@ basePath: / definitions: + clients.ClientConfig: + properties: + timeout: + type: integer + type: object http.ErrorSchema: properties: message: @@ -68,33 +73,61 @@ definitions: type: object providers.LangModelConfig: properties: + client: + $ref: '#/definitions/clients.ClientConfig' enabled: + description: Is the model enabled? type: boolean + error_budget: + type: string id: + description: Model instance ID (unique in scope of the router) type: string openai: $ref: '#/definitions/openai.Config' - timeout: - type: integer required: - id type: object + retry.ExpRetryConfig: + properties: + max_delay: + type: integer + max_retries: + type: integer + min_delay: + type: integer + type: object routers.LangRouterConfig: properties: enabled: + description: Is router enabled? type: boolean models: + description: the list of models that could handle requests items: $ref: '#/definitions/providers.LangModelConfig' type: array + retry: + allOf: + - $ref: '#/definitions/retry.ExpRetryConfig' + description: 'TODO: how to specify other backoff strategies?' routers: + description: Unique router ID type: string strategy: - $ref: '#/definitions/strategy.RoutingStrategy' + allOf: + - $ref: '#/definitions/routing.Strategy' + description: strategy on picking the next model to serve the request required: - models - routers type: object + routing.Strategy: + enum: + - priority + type: string + x-enum-varnames: + - Priority schemas.ChatMessage: properties: content: @@ -156,12 +189,6 @@ definitions: router: type: string type: object - strategy.RoutingStrategy: - enum: - - priority - type: string - x-enum-varnames: - - Priority host: localhost:9099 info: contact: diff --git a/pkg/providers/config.go b/pkg/providers/config.go index 84b3e316..964ac52d 100644 --- a/pkg/providers/config.go +++ b/pkg/providers/config.go @@ -15,9 +15,9 @@ import ( var ErrProviderNotFound = errors.New("provider not found") type LangModelConfig struct { - ID string `yaml:"id" json:"id" validate:"required"` - Enabled bool `yaml:"enabled" json:"enabled"` - ErrorBudget health.ErrorBudget `yaml:"error_budget" json:"error_budget"` + ID string `yaml:"id" json:"id" validate:"required"` // Model instance ID (unique in scope of the router) + Enabled bool `yaml:"enabled" json:"enabled"` // Is the model enabled? + ErrorBudget health.ErrorBudget `yaml:"error_budget" json:"error_budget" swaggertype:"primitive,string"` Client *clients.ClientConfig `yaml:"client" json:"client"` OpenAI *openai.Config `yaml:"openai" json:"openai"` // Add other providers like diff --git a/pkg/routers/config.go b/pkg/routers/config.go index d84c9408..8be6bcbc 100644 --- a/pkg/routers/config.go +++ b/pkg/routers/config.go @@ -10,7 +10,7 @@ import ( ) type Config struct { - LanguageRouters []LangRouterConfig `yaml:"language"` + LanguageRouters []LangRouterConfig `yaml:"language"` // the list of language routers } func (c *Config) BuildLangRouters(tel *telemetry.Telemetry) ([]*LangRouter, error) { @@ -43,11 +43,12 @@ func (c *Config) BuildLangRouters(tel *telemetry.Telemetry) ([]*LangRouter, erro } type LangRouterConfig struct { - ID string `yaml:"id" json:"routers" validate:"required"` - Enabled bool `yaml:"enabled" json:"enabled"` - Retry *retry.ExpRetryConfig `yaml:"retry" json:"retry"` // TODO: how to specify other backoff strategies? - RoutingStrategy routing.Strategy `yaml:"strategy" json:"strategy"` - Models []providers.LangModelConfig `yaml:"models" json:"models" validate:"required"` + ID string `yaml:"id" json:"routers" validate:"required"` // Unique router ID + Enabled bool `yaml:"enabled" json:"enabled"` // Is router enabled? + // TODO: how to specify other backoff strategies? + Retry *retry.ExpRetryConfig `yaml:"retry" json:"retry"` // retry when no healthy model is available to router + RoutingStrategy routing.Strategy `yaml:"strategy" json:"strategy"` // strategy on picking the next model to serve the request + Models []providers.LangModelConfig `yaml:"models" json:"models" validate:"required"` // the list of models that could handle requests } // BuildModels creates LanguageModel slice out of the given config diff --git a/pkg/routers/health/buckets_test.go b/pkg/routers/health/buckets_test.go index 2b3ce8f1..e590ecd0 100644 --- a/pkg/routers/health/buckets_test.go +++ b/pkg/routers/health/buckets_test.go @@ -47,17 +47,17 @@ func TestTokenBucket_TakeConcurrently(t *testing.T) { wg.Wait() - if time.Now().Sub(before) < 1*time.Second { + if time.Since(before) < 1*time.Second { t.Fatal("Did not wait 1s") } } func TestTokenBucket_TokenNumberIsCorrect(t *testing.T) { bucket := NewTokenBucket(1, 10) - require.Equal(t, 10.0, bucket.Tokens()) + require.Equal(t, 10, bucket.Tokens()) require.NoError(t, bucket.Take(2)) - require.InEpsilon(t, 8.0, bucket.Tokens(), 0.0001) + require.InEpsilon(t, 8, bucket.Tokens(), 0.0001) require.NoError(t, bucket.Take(2)) require.InEpsilon(t, 6.0, bucket.Tokens(), 0.0001) diff --git a/pkg/routers/retry/config.go b/pkg/routers/retry/config.go index 35b0fd7c..353986df 100644 --- a/pkg/routers/retry/config.go +++ b/pkg/routers/retry/config.go @@ -3,9 +3,9 @@ package retry import "time" type ExpRetryConfig struct { - MaxRetries int - MinDelay time.Duration - MaxDelay *time.Duration + MaxRetries int `yaml:"max_retries,omitempty" json:"max_retries"` + MinDelay time.Duration `yaml:"min_delay,omitempty" json:"min_delay" swaggertype:"primitive,integer"` + MaxDelay *time.Duration `yaml:"max_delay,omitempty" json:"max_delay" swaggertype:"primitive,integer"` } func DefaultExpRetryConfig() *ExpRetryConfig {