From 9ba0e710a545ae3bb6f54303a5989619ea8f64ee Mon Sep 17 00:00:00 2001 From: Dima Arnautov Date: Tue, 10 Oct 2023 18:59:33 +0200 Subject: [PATCH] [ML] API integration tests for start and stop model deployment (#168460) ## Summary Part of #164562 Adds API integration tests for `_start` and `_stop` trained model deployment. ### Checklist - [x] [Unit or functional tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html) were updated or added to match the most common scenarios --- .../server/routes/schemas/inference_schema.ts | 6 +- .../apis/ml/trained_models/index.ts | 1 + .../trained_models/start_stop_deployment.ts | 203 ++++++++++++++++++ x-pack/test/functional/services/ml/api.ts | 29 ++- 4 files changed, 228 insertions(+), 11 deletions(-) create mode 100644 x-pack/test/api_integration/apis/ml/trained_models/start_stop_deployment.ts diff --git a/x-pack/plugins/ml/server/routes/schemas/inference_schema.ts b/x-pack/plugins/ml/server/routes/schemas/inference_schema.ts index 260b3bc5881d8..b24451bf755de 100644 --- a/x-pack/plugins/ml/server/routes/schemas/inference_schema.ts +++ b/x-pack/plugins/ml/server/routes/schemas/inference_schema.ts @@ -27,9 +27,9 @@ export const modelAndDeploymentIdSchema = schema.object({ export const threadingParamsSchema = schema.maybe( schema.object({ - number_of_allocations: schema.number(), - threads_per_allocation: schema.number(), - priority: schema.oneOf([schema.literal('low'), schema.literal('normal')]), + number_of_allocations: schema.maybe(schema.number()), + threads_per_allocation: schema.maybe(schema.number()), + priority: schema.maybe(schema.oneOf([schema.literal('low'), schema.literal('normal')])), deployment_id: schema.maybe(schema.string()), }) ); diff --git a/x-pack/test/api_integration/apis/ml/trained_models/index.ts b/x-pack/test/api_integration/apis/ml/trained_models/index.ts index d1812dc188b00..80e31fd715ea1 100644 --- a/x-pack/test/api_integration/apis/ml/trained_models/index.ts +++ b/x-pack/test/api_integration/apis/ml/trained_models/index.ts @@ -13,5 +13,6 @@ export default function ({ loadTestFile }: FtrProviderContext) { loadTestFile(require.resolve('./get_model_stats')); loadTestFile(require.resolve('./get_model_pipelines')); loadTestFile(require.resolve('./delete_model')); + loadTestFile(require.resolve('./start_stop_deployment')); }); } diff --git a/x-pack/test/api_integration/apis/ml/trained_models/start_stop_deployment.ts b/x-pack/test/api_integration/apis/ml/trained_models/start_stop_deployment.ts new file mode 100644 index 0000000000000..debbba310fad1 --- /dev/null +++ b/x-pack/test/api_integration/apis/ml/trained_models/start_stop_deployment.ts @@ -0,0 +1,203 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import expect from '@kbn/expect'; +import type { MlGetTrainedModelsStatsResponse } from '@elastic/elasticsearch/lib/api/typesWithBodyKey'; +import { SUPPORTED_TRAINED_MODELS } from '../../../../functional/services/ml/api'; +import { FtrProviderContext } from '../../../ftr_provider_context'; +import { USER } from '../../../../functional/services/ml/security_common'; +import { getCommonRequestHeader } from '../../../../functional/services/ml/common_api'; + +export default ({ getService }: FtrProviderContext) => { + const supertest = getService('supertestWithoutAuth'); + const ml = getService('ml'); + + const testModel = { + ...SUPPORTED_TRAINED_MODELS.TINY_NER, + id: SUPPORTED_TRAINED_MODELS.TINY_NER.name, + }; + + const customDeploymentId = 'my_deployment_id'; + + describe('Start and stop deployment tests', () => { + before(async () => { + await ml.api.importTrainedModel(testModel.id, testModel.name); + await ml.testResources.setKibanaTimeZoneToUTC(); + + // Make sure the .ml-stats index is created in advance, see https://github.com/elastic/elasticsearch/issues/65846 + await ml.api.assureMlStatsIndexExists(); + }); + + after(async () => { + await ml.api.stopAllTrainedModelDeploymentsES(); + await ml.api.deleteAllTrainedModelsES(); + await ml.api.cleanMlIndices(); + await ml.testResources.cleanMLSavedObjects(); + }); + + it('does not allow to start trained model deployment if the user does not have required permissions', async () => { + const { body: startResponseBody, status: startResponseStatus } = await supertest + .post(`/internal/ml/trained_models/${testModel.id}/deployment/_start`) + .auth(USER.ML_VIEWER, ml.securityCommon.getPasswordForUser(USER.ML_VIEWER)) + .set(getCommonRequestHeader('1')); + ml.api.assertResponseStatusCode(403, startResponseStatus, startResponseBody); + + // verify that model deployment has not been started + const { body: statsResponse, status: statsResponseStatus } = await supertest + .get(`/internal/ml/trained_models/${testModel.id}/_stats`) + .auth(USER.ML_POWERUSER, ml.securityCommon.getPasswordForUser(USER.ML_POWERUSER)) + .set(getCommonRequestHeader('1')); + ml.api.assertResponseStatusCode(200, statsResponseStatus, statsResponse); + + const deploymentStats = ( + statsResponse as MlGetTrainedModelsStatsResponse + ).trained_model_stats.find((v) => v.deployment_stats?.deployment_id === testModel.id); + + expect(deploymentStats).to.be(undefined); + }); + + it('starts trained model deployment with the default ID', async () => { + const { body: startResponseBody, status: deleteResponseStatus } = await supertest + .post(`/internal/ml/trained_models/${testModel.id}/deployment/_start`) + .auth(USER.ML_POWERUSER, ml.securityCommon.getPasswordForUser(USER.ML_POWERUSER)) + .set(getCommonRequestHeader('1')); + ml.api.assertResponseStatusCode(200, deleteResponseStatus, startResponseBody); + + expect(startResponseBody.assignment.assignment_state).to.eql('started'); + expect(startResponseBody.assignment.task_parameters.threads_per_allocation).to.eql(1); + expect(startResponseBody.assignment.task_parameters.priority).to.eql('normal'); + expect(startResponseBody.assignment.task_parameters.deployment_id).to.eql(testModel.id); + + // check deployment status + const { body: statsResponse, status: statsResponseStatus } = await supertest + .get(`/internal/ml/trained_models/${testModel.id}/_stats`) + .auth(USER.ML_POWERUSER, ml.securityCommon.getPasswordForUser(USER.ML_POWERUSER)) + .set(getCommonRequestHeader('1')); + ml.api.assertResponseStatusCode(200, statsResponseStatus, statsResponse); + + const modelStats = ( + statsResponse as MlGetTrainedModelsStatsResponse + ).trained_model_stats.find((v) => v.deployment_stats?.deployment_id === testModel.id); + + expect(modelStats!.deployment_stats!.allocation_status.state).to.match( + /\bstarted\b|\bfully_allocated\b/ + ); + }); + + it('starts trained model deployment with provided deployment ID', async () => { + const { body: startResponseBody, status: deleteResponseStatus } = await supertest + .post(`/internal/ml/trained_models/${testModel.id}/deployment/_start`) + .query({ deployment_id: customDeploymentId }) + .auth(USER.ML_POWERUSER, ml.securityCommon.getPasswordForUser(USER.ML_POWERUSER)) + .set(getCommonRequestHeader('1')); + ml.api.assertResponseStatusCode(200, deleteResponseStatus, startResponseBody); + + expect(startResponseBody.assignment.assignment_state).to.eql('started'); + expect(startResponseBody.assignment.task_parameters.deployment_id).to.eql(customDeploymentId); + + // check deployment status + const { body: statsResponse, status: statsResponseStatus } = await supertest + .get(`/internal/ml/trained_models/${testModel.id}/_stats`) + .auth(USER.ML_POWERUSER, ml.securityCommon.getPasswordForUser(USER.ML_POWERUSER)) + .set(getCommonRequestHeader('1')); + ml.api.assertResponseStatusCode(200, statsResponseStatus, statsResponse); + + const modelStats = ( + statsResponse as MlGetTrainedModelsStatsResponse + ).trained_model_stats.find((v) => v.deployment_stats?.deployment_id === customDeploymentId); + + expect(modelStats!.deployment_stats!.allocation_status.state).to.match( + /\bstarted\b|\bfully_allocated\b/ + ); + }); + + it('returns 404 if requested trained model does not exist', async () => { + const { body, status } = await supertest + .post(`/internal/ml/trained_models/not_existing_model/deployment/_start`) + .auth(USER.ML_POWERUSER, ml.securityCommon.getPasswordForUser(USER.ML_POWERUSER)) + .set(getCommonRequestHeader('1')); + ml.api.assertResponseStatusCode(404, status, body); + }); + + it('does not allow to stop trained model deployment if the user does not have required permissions', async () => { + const { body: stopResponseBody, status: stopResponseStatus } = await supertest + .post(`/internal/ml/trained_models/${testModel.id}/${testModel.id}/deployment/_stop`) + .auth(USER.ML_VIEWER, ml.securityCommon.getPasswordForUser(USER.ML_VIEWER)) + .set(getCommonRequestHeader('1')); + ml.api.assertResponseStatusCode(403, stopResponseStatus, stopResponseBody); + + // verify that model deployment has not been started + const { body: statsResponse, status: statsResponseStatus } = await supertest + .get(`/internal/ml/trained_models/${testModel.id}/_stats`) + .auth(USER.ML_POWERUSER, ml.securityCommon.getPasswordForUser(USER.ML_POWERUSER)) + .set(getCommonRequestHeader('1')); + ml.api.assertResponseStatusCode(200, statsResponseStatus, statsResponse); + + const modelStats = ( + statsResponse as MlGetTrainedModelsStatsResponse + ).trained_model_stats.find((v) => v.deployment_stats?.deployment_id === testModel.id); + + expect(modelStats!.deployment_stats!.allocation_status.state).to.match( + /\bstarted\b|\bfully_allocated\b/ + ); + }); + + it('stops trained model deployment with the default ID', async () => { + const { body: stopResponseBody, status: stopResponseStatus } = await supertest + .post(`/internal/ml/trained_models/${testModel.id}/${testModel.id}/deployment/_stop`) + .auth(USER.ML_POWERUSER, ml.securityCommon.getPasswordForUser(USER.ML_POWERUSER)) + .set(getCommonRequestHeader('1')); + ml.api.assertResponseStatusCode(200, stopResponseStatus, stopResponseBody); + + expect(stopResponseBody).to.eql({ + [testModel.id]: { + success: true, + }, + }); + + // check deployment status + const { body: statsResponse, status: statsResponseStatus } = await supertest + .get(`/internal/ml/trained_models/${testModel.id}/_stats`) + .auth(USER.ML_POWERUSER, ml.securityCommon.getPasswordForUser(USER.ML_POWERUSER)) + .set(getCommonRequestHeader('1')); + ml.api.assertResponseStatusCode(200, statsResponseStatus, statsResponse); + + const deploymentStats = ( + statsResponse as MlGetTrainedModelsStatsResponse + ).trained_model_stats.find((v) => v.deployment_stats?.deployment_id === testModel.id); + + expect(deploymentStats).to.be(undefined); + }); + + it('stops trained model deployment with provided deployment ID', async () => { + const { body: stopResponseBody, status: stopResponseStatus } = await supertest + .post(`/internal/ml/trained_models/${testModel.id}/${customDeploymentId}/deployment/_stop`) + .auth(USER.ML_POWERUSER, ml.securityCommon.getPasswordForUser(USER.ML_POWERUSER)) + .set(getCommonRequestHeader('1')); + ml.api.assertResponseStatusCode(200, stopResponseStatus, stopResponseBody); + + expect(stopResponseBody).to.eql({ + [customDeploymentId]: { + success: true, + }, + }); + + // check deployment status + const { body: statsResponse, status: statsResponseStatus } = await supertest + .get(`/internal/ml/trained_models/${testModel.id}/_stats`) + .auth(USER.ML_POWERUSER, ml.securityCommon.getPasswordForUser(USER.ML_POWERUSER)) + .set(getCommonRequestHeader('1')); + ml.api.assertResponseStatusCode(200, statsResponseStatus, statsResponse); + + const deploymentStats = ( + statsResponse as MlGetTrainedModelsStatsResponse + ).trained_model_stats.find((v) => v.deployment_stats?.deployment_id === customDeploymentId); + + expect(deploymentStats).to.be(undefined); + }); + }); +}; diff --git a/x-pack/test/functional/services/ml/api.ts b/x-pack/test/functional/services/ml/api.ts index b514d18d552ad..58c83cd78a1e4 100644 --- a/x-pack/test/functional/services/ml/api.ts +++ b/x-pack/test/functional/services/ml/api.ts @@ -1341,6 +1341,15 @@ export function MachineLearningAPIProvider({ getService }: FtrProviderContext) { return body; }, + async getTrainedModelStatsES(): Promise { + log.debug(`Getting trained models stats`); + const { body, status } = await esSupertest.get(`/_ml/trained_models/_stats`); + this.assertResponseStatusCode(200, status, body); + + log.debug('> Trained model stats fetched'); + return body; + }, + async deleteTrainedModelES(modelId: string) { log.debug(`Deleting trained model with id "${modelId}"`); const { body, status } = await esSupertest @@ -1363,10 +1372,10 @@ export function MachineLearningAPIProvider({ getService }: FtrProviderContext) { } }, - async stopTrainedModelDeploymentES(modelId: string) { - log.debug(`Stopping trained model deployment with id "${modelId}"`); + async stopTrainedModelDeploymentES(deploymentId: string) { + log.debug(`Stopping trained model deployment with id "${deploymentId}"`); const { body, status } = await esSupertest.post( - `/_ml/trained_models/${modelId}/deployment/_stop` + `/_ml/trained_models/${deploymentId}/deployment/_stop` ); this.assertResponseStatusCode(200, status, body); @@ -1375,13 +1384,17 @@ export function MachineLearningAPIProvider({ getService }: FtrProviderContext) { async stopAllTrainedModelDeploymentsES() { log.debug(`Stopping all trained model deployments`); - const getModelsRsp = await this.getTrainedModelsES(); - for (const model of getModelsRsp.trained_model_configs) { - if (this.isInternalModelId(model.model_id)) { - log.debug(`> Skipping internal ${model.model_id}`); + const getModelsRsp = await this.getTrainedModelStatsES(); + for (const modelStats of getModelsRsp.trained_model_stats) { + if (this.isInternalModelId(modelStats.model_id)) { + log.debug(`> Skipping internal ${modelStats.model_id}`); + continue; + } + if (modelStats.deployment_stats === undefined) { + log.debug(`> Skipping, no deployment stats for ${modelStats.model_id} found`); continue; } - await this.stopTrainedModelDeploymentES(model.model_id); + await this.stopTrainedModelDeploymentES(modelStats.deployment_stats.deployment_id); } },