Skip to content

Commit

Permalink
Adds ML start and stop trained model deployment specifications (#1061)
Browse files Browse the repository at this point in the history
  • Loading branch information
lcawl authored Nov 30, 2021
1 parent 5570df2 commit b8d027a
Show file tree
Hide file tree
Showing 8 changed files with 723 additions and 11 deletions.
424 changes: 420 additions & 4 deletions output/schema/schema.json

Large diffs are not rendered by default.

11 changes: 4 additions & 7 deletions output/schema/validation-errors.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

44 changes: 44 additions & 0 deletions output/typescript/types.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

78 changes: 78 additions & 0 deletions specification/ml/_types/TrainedModel.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import { InferenceConfigContainer } from '@_types/aggregations/pipeline'
import { Field, Id, Name, VersionString } from '@_types/common'
import { double, integer, long } from '@_types/Numeric'
import { Time } from '@_types/Time'
import { DateString } from '@_types/Time'

export class TrainedModelStats {
/** The unique identifier of the trained model. */
Expand Down Expand Up @@ -157,3 +158,80 @@ export enum TrainedModelType {
*/
pytorch
}

export enum DeploymentState {
/**
* The trained model is started on at least one node.
*/
started = 0,
/**
* Trained model deployment is starting but it is not yet deployed on any nodes.
*/
starting = 1,
/**
* Trained model deployment has started on all valid nodes.
*/
fully_allocated = 3
}

export class TrainedModelAllocationTaskParameters {
/**
* The size of the trained model in bytes.
*/
model_bytes: integer
/**
* The unique identifier for the trained model.
*/
model_id: Id
}

export enum RoutingState {
/**
* The allocation attempt failed.
*/
failed = 0,
/**
* The trained model is allocated and ready to accept inference requests.
*/
started = 1,
/**
* The trained model is attempting to allocate on this node; inference requests are not yet accepted.
*/
starting = 2,
/**
* The trained model is fully deallocated from this node.
*/
stopped = 3,
/**
* The trained model is being deallocated from this node.
*/
stopping = 4
}

export class TrainedModelAllocationRoutingTable {
/**
* The reason for the current state. It is usually populated only when the
* `routing_state` is `failed`.
*/
reason: string
/**
* The current routing state.
*/
routing_state: RoutingState
}

export class TrainedModelAllocation {
/**
* The overall allocation state.
*/
allocation_state: DeploymentState
/**
* The allocation state for each node.
*/
routing_table: Dictionary<string, TrainedModelAllocationRoutingTable>
/**
* The timestamp when the deployment started.
*/
start_time: DateString
task_parameters: TrainedModelAllocationTaskParameters
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
* Licensed to Elasticsearch B.V. under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch B.V. licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

import { RequestBase } from '@_types/Base'
import { Id } from '@_types/common'
import { integer } from '@_types/Numeric'
import { Time } from '@_types/Time'
import { DeploymentState } from '../_types/TrainedModel'

/**
* Starts a trained model deployment, which allocates the model to every machine learning node.
* @rest_spec_name ml.start_trained_model_deployment
* @since 8.0.0
* @stability experimental
* @cluster_privileges manage_ml
*/
export interface Request extends RequestBase {
path_parts: {
/**
* The unique identifier of the trained model. Currently, only PyTorch models are supported.
*/
model_id: Id
}
query_parameters: {
/**
* Specifies the number of threads that are used by the inference process. If you increase this value, inference
* speed generally increases. However, the actual number of threads is limited by the number of available CPU
* cores.
* @server_default 1
*/
inference_threads?: integer
/**
* Specifies the number of threads that are used when sending inference requests to the model. If you increase this value,
* throughput generally increases.
* @server_default 1
*/
model_threads?: integer
/**
* Specifies the number of inference requests that are allowed in the queue. After the number of requests exceeds
* this value, new requests are rejected with a 429 error.
* @server_default 1024
*/
queue_capacity?: integer
/**
* Specifies the amount of time to wait for the model to deploy.
* @server_default 20s
*/
timeout?: Time
/**
* Specifies the allocation status to wait for before returning.
* @server_default started
*/
wait_for?: DeploymentState
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
* Licensed to Elasticsearch B.V. under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch B.V. licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

import { TrainedModelAllocation } from '../_types/TrainedModel'

export class Response {
body: {
allocation: TrainedModelAllocation
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Licensed to Elasticsearch B.V. under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch B.V. licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

import { RequestBase } from '@_types/Base'
import { Id } from '@_types/common'
/*
import { UserDefinedValue } from '@spec_utils/UserDefinedValue'
import { InferenceConfigContainer } from '@_types/aggregations/pipeline'
import { Definition, Input } from './types'
import { TrainedModelType } from '../_types/TrainedModel'
*/
/**
* Stops a trained model deployment.
* @rest_spec_name ml.stop_trained_model_deployment
* @since 8.0.0
* @stability experimental
* @cluster_privileges manage_ml
*/
export interface Request extends RequestBase {
path_parts: {
/**
* The unique identifier of the trained model.
*/
model_id: Id
}
query_parameters: {
/**
* Specifies what to do when the request: contains wildcard expressions and there are no deployments that match;
* contains the `_all` string or no identifiers and there are no matches; or contains wildcard expressions and
* there are only partial matches. By default, it returns an empty array when there are no matches and the subset of results when there are partial matches.
* If `false`, the request returns a 404 status code when there are no matches or only partial matches.
* @server_default true
*/
allow_no_match?: boolean
/**
* Forcefully stops the deployment, even if it is used by ingest pipelines. You can't use these pipelines until you
* restart the model deployment.
* @server_default false
*/
force?: boolean
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/*
* Licensed to Elasticsearch B.V. under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch B.V. licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

export class Response {
body: { stopped: boolean }
}

0 comments on commit b8d027a

Please sign in to comment.