From a567b23d337b4099f635086dc647219bf96ac5f1 Mon Sep 17 00:00:00 2001 From: Daniel Vigovszky Date: Mon, 23 Sep 2024 09:02:28 +0200 Subject: [PATCH 1/2] Introducing ephemeral workers (#945) * Introducing ephemeral workers, WIP * Undo wrong approach * CLI support * Stop worker immediately after invocation if it is ephemeral * Do not replay ephemeral workers * Not storing status of ephemeral workers in the KV store * Worker executor and integration tests * Fix integration tests * Do not allow update for ephemeral workers * CLI tests * Fix ingress --- .../proto/golem/worker/target_worker_id.proto | 10 + .../golem/worker/v1/worker_service.proto | 9 +- .../workerexecutor/v1/worker_executor.proto | 5 +- golem-api-grpc/src/lib.rs | 26 +- golem-cli/src/clients/worker.rs | 7 + golem-cli/src/command/worker.rs | 26 +- golem-cli/src/model.rs | 6 +- golem-cli/src/model/text.rs | 32 +- golem-cli/src/oss/clients/worker.rs | 89 +++-- golem-cli/src/oss/main.rs | 10 +- golem-cli/src/service/deploy.rs | 10 +- golem-cli/src/service/worker.rs | 71 ++-- golem-cli/tests/api_definition.rs | 4 +- golem-cli/tests/get.rs | 10 +- golem-cli/tests/text.rs | 29 +- golem-cli/tests/worker.rs | 142 ++++++- golem-common/src/grpc.rs | 9 +- golem-common/src/model/mod.rs | 178 ++++++++- golem-common/src/model/oplog.rs | 14 +- golem-common/src/uri/cloud/uri.rs | 89 ++++- golem-common/src/uri/cloud/url.rs | 83 ++++- golem-common/src/uri/oss/uri.rs | 88 ++++- golem-common/src/uri/oss/url.rs | 82 +++- golem-common/src/uri/oss/urn.rs | 117 ++++-- golem-component-service-base/src/model.rs | 2 +- golem-component-service-base/src/repo/mod.rs | 13 +- .../src/service/component.rs | 5 +- golem-router/golem-services.conf.template | 8 + golem-router/golem-services.local.conf | 8 + .../component_service/filesystem.rs | 51 ++- .../src/components/component_service/mod.rs | 85 +++-- golem-test-framework/src/dsl/mod.rs | 98 +++-- .../src/durable_host/mod.rs | 351 ++++++++++-------- .../src/durable_host/wasm_rpc/mod.rs | 13 +- golem-worker-executor-base/src/grpc.rs | 55 ++- golem-worker-executor-base/src/model.rs | 30 +- .../src/services/component.rs | 39 +- .../src/services/oplog/tests.rs | 3 + .../src/services/shard.rs | 6 +- .../src/services/worker.rs | 167 +++++---- .../src/services/worker_proxy.rs | 4 +- golem-worker-executor-base/src/worker.rs | 36 +- golem-worker-executor-base/tests/api.rs | 156 +++++++- .../tests/measure_test_component_mem.rs | 3 +- .../src/service/worker/default.rs | 55 ++- .../src/service/worker/routing_logic.rs | 50 ++- golem-worker-service/src/api/worker.rs | 121 +++++- golem-worker-service/src/grpcapi/worker.rs | 45 ++- .../src/worker_bridge_request_executor.rs | 2 +- integration-tests/tests/worker.rs | 123 +++++- kube/golem-chart/templates/ingress.yaml | 28 ++ openapi/golem-service.yaml | 172 ++++++++- 52 files changed, 2238 insertions(+), 637 deletions(-) create mode 100644 golem-api-grpc/proto/golem/worker/target_worker_id.proto diff --git a/golem-api-grpc/proto/golem/worker/target_worker_id.proto b/golem-api-grpc/proto/golem/worker/target_worker_id.proto new file mode 100644 index 0000000000..f9829ef7a2 --- /dev/null +++ b/golem-api-grpc/proto/golem/worker/target_worker_id.proto @@ -0,0 +1,10 @@ +syntax = "proto3"; + +import "golem/component/component_id.proto"; + +package golem.worker; + +message TargetWorkerId { + golem.component.ComponentId component_id = 1; + optional string name = 2; +} diff --git a/golem-api-grpc/proto/golem/worker/v1/worker_service.proto b/golem-api-grpc/proto/golem/worker/v1/worker_service.proto index 43ace2292c..0f2236caaf 100644 --- a/golem-api-grpc/proto/golem/worker/v1/worker_service.proto +++ b/golem-api-grpc/proto/golem/worker/v1/worker_service.proto @@ -17,6 +17,7 @@ import public "golem/worker/log_event.proto"; import public "golem/worker/worker_id.proto"; import public "golem/component/component_id.proto"; import public "golem/worker/update_mode.proto"; +import public "golem/worker/target_worker_id.proto"; service WorkerService { rpc LaunchNewWorker (LaunchNewWorkerRequest) returns (LaunchNewWorkerResponse); @@ -102,7 +103,7 @@ message InterruptWorkerResponse { } message InvokeAndAwaitRequest { - golem.worker.WorkerId workerId = 1; + golem.worker.TargetWorkerId workerId = 1; golem.worker.IdempotencyKey idempotencyKey = 2; string function = 3; golem.worker.InvokeParameters invokeParameters = 4; @@ -117,7 +118,7 @@ message InvokeAndAwaitResponse { } message InvokeAndAwaitJsonRequest { - golem.worker.WorkerId workerId = 1; + golem.worker.TargetWorkerId workerId = 1; golem.worker.IdempotencyKey idempotencyKey = 2; string function = 3; repeated string invokeParameters = 4; @@ -132,7 +133,7 @@ message InvokeAndAwaitJsonResponse { } message InvokeRequest { - golem.worker.WorkerId workerId = 1; + golem.worker.TargetWorkerId workerId = 1; golem.worker.IdempotencyKey idempotencyKey = 2; string function = 3; golem.worker.InvokeParameters invokeParameters = 4; @@ -147,7 +148,7 @@ message InvokeResponse { } message InvokeJsonRequest { - golem.worker.WorkerId workerId = 1; + golem.worker.TargetWorkerId workerId = 1; golem.worker.IdempotencyKey idempotencyKey = 2; string function = 3; repeated string invokeParameters = 4; diff --git a/golem-api-grpc/proto/golem/workerexecutor/v1/worker_executor.proto b/golem-api-grpc/proto/golem/workerexecutor/v1/worker_executor.proto index 8f542c7cd1..eb0496da6c 100644 --- a/golem-api-grpc/proto/golem/workerexecutor/v1/worker_executor.proto +++ b/golem-api-grpc/proto/golem/workerexecutor/v1/worker_executor.proto @@ -11,6 +11,7 @@ import public "golem/shardmanager/shard_id.proto"; import public "golem/component/component_id.proto"; import public "golem/worker/cursor.proto"; import public "golem/worker/update_mode.proto"; +import public "golem/worker/target_worker_id.proto"; import public "golem/worker/worker_id.proto"; import public "golem/worker/worker_metadata.proto"; import public "golem/worker/worker_status.proto"; @@ -101,7 +102,7 @@ message CreateWorkerResponse { message InvokeAndAwaitWorkerRequest { - golem.worker.WorkerId worker_id = 1; + golem.worker.TargetWorkerId worker_id = 1; string name = 2; repeated wasm.rpc.Val input = 3; golem.worker.IdempotencyKey idempotency_key = 4; @@ -135,7 +136,7 @@ message InvokeAndAwaitWorkerSuccessTyped { message InvokeWorkerRequest { - golem.worker.WorkerId worker_id = 1; + golem.worker.TargetWorkerId worker_id = 1; string name = 2; repeated wasm.rpc.Val input = 3; golem.worker.IdempotencyKey idempotency_key = 4; diff --git a/golem-api-grpc/src/lib.rs b/golem-api-grpc/src/lib.rs index e3ae1731f4..a8b4da215b 100644 --- a/golem-api-grpc/src/lib.rs +++ b/golem-api-grpc/src/lib.rs @@ -191,9 +191,9 @@ pub mod proto { #[cfg(test)] mod tests { - use std::str::FromStr; - use crate::proto::golem; + use prost::Message; + use std::str::FromStr; #[test] fn test_uuid() { @@ -206,5 +206,27 @@ pub mod proto { println!("template_id_proto: {:?}", template_id_proto); println!("token_proto: {:?}", token_proto); } + + #[test] + fn target_worker_id_and_worker_id_are_bin_compatible() { + let component_id_uuid = uuid::Uuid::new_v4(); + let component_id_uuid: golem::common::Uuid = component_id_uuid.into(); + let component_id = golem::component::ComponentId { + value: Some(component_id_uuid), + }; + let target_worker_id = golem::worker::TargetWorkerId { + component_id: Some(component_id.clone()), + name: Some("hello".to_string()), + }; + let worker_id = golem::worker::WorkerId { + component_id: Some(component_id), + name: "hello".to_string(), + }; + + let target_worker_id_bytes = target_worker_id.encode_to_vec(); + let worker_id_bytes = worker_id.encode_to_vec(); + + assert_eq!(target_worker_id_bytes, worker_id_bytes); + } } } diff --git a/golem-cli/src/clients/worker.rs b/golem-cli/src/clients/worker.rs index a88458564a..5fab56f94f 100644 --- a/golem-cli/src/clients/worker.rs +++ b/golem-cli/src/clients/worker.rs @@ -94,3 +94,10 @@ pub trait WorkerClient { target_version: u64, ) -> Result<(), GolemError>; } + +pub fn worker_name_required(urn: &WorkerUrn) -> Result { + urn.id + .worker_name + .clone() + .ok_or_else(|| GolemError("Must specify the worker's name".to_string())) +} diff --git a/golem-cli/src/command/worker.rs b/golem-cli/src/command/worker.rs index 0c24637a90..fb5c9a24a4 100644 --- a/golem-cli/src/command/worker.rs +++ b/golem-cli/src/command/worker.rs @@ -16,7 +16,7 @@ use crate::command::ComponentRefSplit; use clap::builder::ValueParser; use clap::{ArgMatches, Args, Error, FromArgMatches, Subcommand}; use golem_client::model::ScanCursor; -use golem_common::model::WorkerId; +use golem_common::model::TargetWorkerId; use golem_common::uri::oss::uri::{ComponentUri, WorkerUri}; use golem_common::uri::oss::url::{ComponentUrl, WorkerUrl}; use golem_common::uri::oss::urn::{ComponentUrn, WorkerUrn}; @@ -123,9 +123,9 @@ impl From<&OssWorkerNameOrUriArg> for OssWorkerUriArg { match &value.component { Some(ComponentUri::URN(component_urn)) => { let uri = WorkerUri::URN(WorkerUrn { - id: WorkerId { + id: TargetWorkerId { component_id: component_urn.id.clone(), - worker_name, + worker_name: Some(worker_name), }, }); OssWorkerUriArg { @@ -137,7 +137,7 @@ impl From<&OssWorkerNameOrUriArg> for OssWorkerUriArg { Some(ComponentUri::URL(component_url)) => { let uri = WorkerUri::URL(WorkerUrl { component_name: component_url.name.to_string(), - worker_name, + worker_name: Some(worker_name), }); OssWorkerUriArg { @@ -150,7 +150,7 @@ impl From<&OssWorkerNameOrUriArg> for OssWorkerUriArg { let component_name = value.component_name.clone().unwrap(); let uri = WorkerUri::URL(WorkerUrl { component_name, - worker_name, + worker_name: Some(worker_name), }); OssWorkerUriArg { @@ -185,7 +185,11 @@ impl From<&OssWorkerUriArg> for OssWorkerNameOrUriArg { worker: None, component: Some(component_uri), component_name: None, - worker_name: Some(WorkerName(urn.id.worker_name.to_string())), + worker_name: urn + .id + .worker_name + .as_ref() + .map(|n| WorkerName(n.to_string())), } } WorkerUri::URL(url) => { @@ -194,7 +198,10 @@ impl From<&OssWorkerUriArg> for OssWorkerNameOrUriArg { worker: None, component: None, component_name: Some(url.component_name.to_string()), - worker_name: Some(WorkerName(url.worker_name.to_string())), + worker_name: url + .worker_name + .as_ref() + .map(|n| WorkerName(n.to_string())), } } else { let component_uri = ComponentUri::URL(ComponentUrl { @@ -205,7 +212,10 @@ impl From<&OssWorkerUriArg> for OssWorkerNameOrUriArg { worker: None, component: Some(component_uri), component_name: None, - worker_name: Some(WorkerName(url.worker_name.to_string())), + worker_name: url + .worker_name + .as_ref() + .map(|n| WorkerName(n.to_string())), } } } diff --git a/golem-cli/src/model.rs b/golem-cli/src/model.rs index dcaee73830..07264a69e5 100644 --- a/golem-cli/src/model.rs +++ b/golem-cli/src/model.rs @@ -35,7 +35,7 @@ use clap_verbosity_flag::Verbosity; use derive_more::{Display, FromStr}; use golem_client::model::{ApiDefinitionInfo, ApiSite, ScanCursor}; use golem_common::model::trim_date::TrimDateTime; -use golem_common::model::{ComponentId, WorkerId}; +use golem_common::model::{ComponentId, TargetWorkerId}; use golem_common::uri::oss::uri::ComponentUri; use golem_common::uri::oss::url::ComponentUrl; use golem_common::uri::oss::urn::WorkerUrn; @@ -540,9 +540,9 @@ impl From for WorkerMetadataView { WorkerMetadataView { worker_urn: WorkerUrn { - id: WorkerId { + id: TargetWorkerId { component_id: ComponentId(worker_id.component_id), - worker_name: worker_id.worker_name, + worker_name: Some(worker_id.worker_name), }, }, account_id, diff --git a/golem-cli/src/model/text.rs b/golem-cli/src/model/text.rs index f4240a6a66..549e4cb986 100644 --- a/golem-cli/src/model/text.rs +++ b/golem-cli/src/model/text.rs @@ -690,10 +690,14 @@ pub mod worker { impl MessageWithFields for WorkerAddView { fn message(&self) -> String { - format!( - "Added worker {}", - format_message_highlight(&self.0.id.worker_name) - ) + if let Some(worker_name) = &self.0.id.worker_name { + format!("Added worker {}", format_message_highlight(&worker_name)) + } else { + format!( + "Added worker with a {}", + format_message_highlight("random generated name") + ) + } } fn fields(&self) -> Vec<(&'static str, String)> { @@ -704,7 +708,7 @@ pub mod worker { .fmt_field("Component URN", &self.0.id.component_id, |id| { format_id(&ComponentUrn { id: id.clone() }) }) - .fmt_field("Worker name", &self.0.id.worker_name, format_id); + .fmt_field_option("Worker name", &(self.0.id.worker_name.as_ref()), format_id); fields.build() } @@ -727,10 +731,14 @@ pub mod worker { impl MessageWithFields for WorkerGetView { fn message(&self) -> String { - format!( - "Got metadata for worker {}", - format_message_highlight(&self.0.worker_urn.id.worker_name) - ) + if let Some(worker_name) = &self.0.worker_urn.id.worker_name { + format!( + "Got metadata for worker {}", + format_message_highlight(worker_name) + ) + } else { + "Got metadata for worker".to_string() + } } fn fields(&self) -> Vec<(&'static str, String)> { @@ -741,7 +749,7 @@ pub mod worker { .fmt_field("Component URN", &self.0.worker_urn.id.component_id, |id| { format_id(&ComponentUrn { id: id.clone() }) }) - .fmt_field("Worker name", &self.0.worker_urn.id.worker_name, format_id) + .fmt_field_option("Worker name", &self.0.worker_urn.id.worker_name, format_id) .fmt_field("Component version", &self.0.component_version, format_id) .field("Created at", &self.0.created_at) .fmt_field("Component size", &self.0.component_size, format_binary_size) @@ -799,7 +807,7 @@ pub mod worker { component_urn: ComponentUrn { id: value.worker_urn.id.component_id.clone(), }, - worker_name: value.worker_urn.id.worker_name.to_string(), + worker_name: value.worker_urn.id.worker_name.clone().unwrap_or_default(), status: format_status(&value.status), component_version: value.component_version, created_at: value.created_at, @@ -860,7 +868,7 @@ pub mod worker { fn from(value: &WorkerUrn) -> Self { WorkerUrnTableView { worker_urn: value.clone(), - worker_name: value.id.worker_name.clone(), + worker_name: value.id.worker_name.clone().unwrap_or_default(), } } } diff --git a/golem-cli/src/oss/clients/worker.rs b/golem-cli/src/oss/clients/worker.rs index 73e32bba10..3a524159cd 100644 --- a/golem-cli/src/oss/clients/worker.rs +++ b/golem-cli/src/oss/clients/worker.rs @@ -14,7 +14,7 @@ use std::time::Duration; -use crate::clients::worker::WorkerClient; +use crate::clients::worker::{worker_name_required, WorkerClient}; use crate::command::worker::WorkerConnectOptions; use crate::connect_output::ConnectOutput; use crate::model::{ @@ -79,16 +79,28 @@ impl WorkerClient for WorkerCl ) -> Result { info!("Invoke and await for function {function} in {worker_urn}"); - Ok(self - .client - .invoke_and_await_function( - &worker_urn.id.component_id.0, - &worker_urn.id.worker_name, - idempotency_key.as_ref().map(|k| k.0.as_str()), - &function, - ¶meters, - ) - .await?) + if let Some(worker_name) = &worker_urn.id.worker_name { + Ok(self + .client + .invoke_and_await_function( + &worker_urn.id.component_id.0, + worker_name, + idempotency_key.as_ref().map(|k| k.0.as_str()), + &function, + ¶meters, + ) + .await?) + } else { + Ok(self + .client + .invoke_and_await_function_without_name( + &worker_urn.id.component_id.0, + idempotency_key.as_ref().map(|k| k.0.as_str()), + &function, + ¶meters, + ) + .await?) + } } async fn invoke( @@ -100,16 +112,28 @@ impl WorkerClient for WorkerCl ) -> Result<(), GolemError> { info!("Invoke function {function} in {worker_urn}"); - let _ = self - .client - .invoke_function( - &worker_urn.id.component_id.0, - &worker_urn.id.worker_name, - idempotency_key.as_ref().map(|k| k.0.as_str()), - &function, - ¶meters, - ) - .await?; + if let Some(worker_name) = &worker_urn.id.worker_name { + let _ = self + .client + .invoke_function( + &worker_urn.id.component_id.0, + worker_name, + idempotency_key.as_ref().map(|k| k.0.as_str()), + &function, + ¶meters, + ) + .await?; + } else { + let _ = self + .client + .invoke_function_without_name( + &worker_urn.id.component_id.0, + idempotency_key.as_ref().map(|k| k.0.as_str()), + &function, + ¶meters, + ) + .await?; + } Ok(()) } @@ -120,7 +144,7 @@ impl WorkerClient for WorkerCl .client .interrupt_worker( &worker_urn.id.component_id.0, - &worker_urn.id.worker_name, + &worker_name_required(&worker_urn)?, Some(false), ) .await?; @@ -132,7 +156,10 @@ impl WorkerClient for WorkerCl let _ = self .client - .resume_worker(&worker_urn.id.component_id.0, &worker_urn.id.worker_name) + .resume_worker( + &worker_urn.id.component_id.0, + &worker_name_required(&worker_urn)?, + ) .await?; Ok(()) } @@ -144,7 +171,7 @@ impl WorkerClient for WorkerCl .client .interrupt_worker( &worker_urn.id.component_id.0, - &worker_urn.id.worker_name, + &worker_name_required(&worker_urn)?, Some(true), ) .await?; @@ -156,7 +183,10 @@ impl WorkerClient for WorkerCl let _ = self .client - .delete_worker(&worker_urn.id.component_id.0, &worker_urn.id.worker_name) + .delete_worker( + &worker_urn.id.component_id.0, + &worker_name_required(&worker_urn)?, + ) .await?; Ok(()) } @@ -166,7 +196,10 @@ impl WorkerClient for WorkerCl Ok(self .client - .get_worker_metadata(&worker_urn.id.component_id.0, &worker_urn.id.worker_name) + .get_worker_metadata( + &worker_urn.id.component_id.0, + &worker_name_required(&worker_urn)?, + ) .await? .into()) } @@ -251,7 +284,7 @@ impl WorkerClient for WorkerCl .push("components") .push(&worker_urn.id.component_id.0.to_string()) .push("workers") - .push(&worker_urn.id.worker_name) + .push(&worker_name_required(&worker_urn)?) .push("connect"); let mut request = url @@ -435,7 +468,7 @@ impl WorkerClient for WorkerCl .client .update_worker( &worker_urn.id.component_id.0, - &worker_urn.id.worker_name, + &worker_name_required(&worker_urn)?, &UpdateWorkerRequest { mode: update_mode, target_version, diff --git a/golem-cli/src/oss/main.rs b/golem-cli/src/oss/main.rs index d7787d55f1..db80320e07 100644 --- a/golem-cli/src/oss/main.rs +++ b/golem-cli/src/oss/main.rs @@ -159,7 +159,13 @@ async fn get_resource_by_urn( ResourceUrn::WorkerFunction(f) => { factory .worker_service() - .get_function(WorkerUri::URN(WorkerUrn { id: f.id }), &f.function, None) + .get_function( + WorkerUri::URN(WorkerUrn { + id: f.id.into_target_worker_id(), + }), + &f.function, + None, + ) .await } ResourceUrn::ApiDefinition(ad) => { @@ -206,7 +212,7 @@ async fn get_resource_by_url( .get_function( WorkerUri::URL(WorkerUrl { component_name: f.component_name, - worker_name: f.worker_name, + worker_name: Some(f.worker_name), }), &f.function, None, diff --git a/golem-cli/src/service/deploy.rs b/golem-cli/src/service/deploy.rs index 94c02bcb60..b61933db48 100644 --- a/golem-cli/src/service/deploy.rs +++ b/golem-cli/src/service/deploy.rs @@ -16,7 +16,7 @@ use crate::model::{Format, GolemError, GolemResult, WorkerName, WorkerUpdateMode use crate::service::component::ComponentService; use crate::service::worker::WorkerService; use async_trait::async_trait; -use golem_common::model::{ComponentId, WorkerId}; +use golem_common::model::{ComponentId, TargetWorkerId}; use golem_common::uri::oss::uri::ComponentUri; use golem_common::uri::oss::urn::WorkerUrn; use inquire::Confirm; @@ -125,13 +125,15 @@ impl DeployService for DeployServiceLive< info!("Deleting all workers of component {}", component_urn); for worker in &known_workers { + let worker_name = &worker.worker_id.worker_name; + info!("Deleting worker {worker_name}"); + let worker_urn = WorkerUrn { - id: WorkerId { + id: TargetWorkerId { component_id: ComponentId(worker.worker_id.component_id), - worker_name: worker.worker_id.worker_name.clone(), + worker_name: Some(worker_name.clone()), }, }; - info!("Deleting worker {}", worker_urn.id.worker_name); self.worker_service.delete_by_urn(worker_urn).await?; } diff --git a/golem-cli/src/service/worker.rs b/golem-cli/src/service/worker.rs index 81b776af5f..bd93e60823 100644 --- a/golem-cli/src/service/worker.rs +++ b/golem-cli/src/service/worker.rs @@ -34,7 +34,7 @@ use golem_client::model::{ AnalysedExport, AnalysedFunction, AnalysedInstance, AnalysedType, InvokeParameters, InvokeResult, ScanCursor, StringFilterComparator, WorkerFilter, WorkerNameFilter, }; -use golem_common::model::{ComponentId, WorkerId}; +use golem_common::model::{ComponentId, TargetWorkerId}; use golem_common::uri::oss::uri::{ComponentUri, WorkerUri}; use golem_common::uri::oss::url::{ComponentUrl, WorkerUrl}; use golem_common::uri::oss::urn::{ComponentUrn, WorkerUrn}; @@ -208,35 +208,40 @@ async fn resolve_worker_component_version( components: &(dyn ComponentService + Send + Sync), worker_urn: WorkerUrn, ) -> Result, GolemError> { - let WorkerId { + let TargetWorkerId { component_id, worker_name, } = worker_urn.id; - let component_urn = ComponentUrn { id: component_id }; - - let worker_meta = client - .find_metadata( - component_urn.clone(), - Some(WorkerFilter::Name(WorkerNameFilter { - comparator: StringFilterComparator::Equal, - value: worker_name, - })), - None, - Some(2), - Some(true), - ) - .await?; - if worker_meta.workers.len() > 1 { - Err(GolemError( - "Multiple workers with the same name".to_string(), - )) - } else if let Some(worker) = worker_meta.workers.first() { - Ok(Some( - components - .get_metadata(&component_urn, worker.component_version) - .await?, - )) + if let Some(worker_name) = worker_name { + let component_urn = ComponentUrn { id: component_id }; + + let worker_meta = client + .find_metadata( + component_urn.clone(), + Some(WorkerFilter::Name(WorkerNameFilter { + comparator: StringFilterComparator::Equal, + value: worker_name, + })), + None, + Some(2), + Some(true), + ) + .await?; + + if worker_meta.workers.len() > 1 { + Err(GolemError( + "Multiple workers with the same name".to_string(), + )) + } else if let Some(worker) = worker_meta.workers.first() { + Ok(Some( + components + .get_metadata(&component_urn, worker.component_version) + .await?, + )) + } else { + Ok(None) + } } else { Ok(None) } @@ -262,7 +267,9 @@ async fn get_component_metadata_for_worker( { Ok(component) } else { - info!("No worker found with name {}. Assuming it should be create with the latest component version", worker_urn.id.worker_name); + if let Some(worker_name) = &worker_urn.id.worker_name { + info!("No worker found with name {worker_name}. Assuming it should be create with the latest component version"); + } let component_urn = ComponentUrn { id: worker_urn.id.component_id.clone(), }; @@ -446,9 +453,9 @@ impl WorkerService for WorkerServiceLive< .await?; Ok(GolemResult::Ok(Box::new(WorkerAddView(WorkerUrn { - id: WorkerId { + id: TargetWorkerId { component_id: ComponentId(inst.component_id), - worker_name: inst.worker_name, + worker_name: Some(inst.worker_name), }, })))) } @@ -470,7 +477,7 @@ impl WorkerService for WorkerServiceLive< let component_urn = self.components.resolve_uri(component_uri, &project).await?; Ok(WorkerUrn { - id: WorkerId { + id: TargetWorkerId { component_id: component_urn.id, worker_name, }, @@ -794,9 +801,9 @@ impl WorkerService for WorkerServiceLive< let mut failed = Vec::new(); for worker in to_update { let worker_urn = WorkerUrn { - id: WorkerId { + id: TargetWorkerId { component_id: ComponentId(worker.worker_id.component_id), - worker_name: worker.worker_id.worker_name, + worker_name: Some(worker.worker_id.worker_name), }, }; let result = self diff --git a/golem-cli/tests/api_definition.rs b/golem-cli/tests/api_definition.rs index 9f81541ab2..cbe175ef2b 100644 --- a/golem-cli/tests/api_definition.rs +++ b/golem-cli/tests/api_definition.rs @@ -1,5 +1,5 @@ use crate::cli::{Cli, CliLive}; -use crate::worker::make_component_from_file; +use crate::worker::add_component_from_file; use assert2::assert; use chrono::{DateTime, Utc}; use golem_cli::model::component::ComponentView; @@ -97,7 +97,7 @@ pub fn make_shopping_cart_component( component_name: &str, cli: &CliLive, ) -> Result { - make_component_from_file(deps, component_name, cli, "shopping-cart.wasm") + add_component_from_file(deps, component_name, cli, "shopping-cart.wasm") } fn make_file(id: &str, json: &serde_json::value::Value) -> Result { diff --git a/golem-cli/tests/get.rs b/golem-cli/tests/get.rs index 3bf1900051..62b89a57c3 100644 --- a/golem-cli/tests/get.rs +++ b/golem-cli/tests/get.rs @@ -17,7 +17,7 @@ use crate::api_definition::{ }; use crate::api_deployment::make_definition; use crate::cli::{Cli, CliLive}; -use crate::worker::make_component; +use crate::worker::add_environment_service_component; use golem_cli::model::component::ComponentView; use golem_cli::model::WorkerMetadataView; use golem_client::model::{ApiDeployment, HttpApiDefinitionWithTypeInfo}; @@ -170,7 +170,7 @@ fn top_level_get_component( fn top_level_get_worker( (deps, cli): (Arc, CliLive), ) -> Result<(), Failed> { - let component = make_component(deps, "top_level_get_worker", &cli)?; + let component = add_environment_service_component(deps, "top_level_get_worker", &cli)?; let worker_name = "top_level_get_worker"; let cfg = &cli.config; @@ -185,7 +185,7 @@ fn top_level_get_worker( let url = WorkerUrl { component_name: component.component_name.to_string(), - worker_name: worker_name.to_string(), + worker_name: Some(worker_name.to_string()), }; let worker: WorkerMetadataView = cli.run(&["get", &url.to_string()])?; @@ -202,7 +202,7 @@ fn top_level_get_worker( fn top_level_get_worker_function( (deps, cli): (Arc, CliLive), ) -> Result<(), Failed> { - let component = make_component(deps, "top_level_get_worker_function", &cli)?; + let component = add_environment_service_component(deps, "top_level_get_worker_function", &cli)?; let worker_name = "top_level_get_worker_function"; let cfg = &cli.config; @@ -231,7 +231,7 @@ fn top_level_get_worker_function( ); let urn = WorkerFunctionUrn { - id: worker_urn.id, + id: worker_urn.id.try_into_worker_id().unwrap(), function: function_name.to_string(), }; diff --git a/golem-cli/tests/text.rs b/golem-cli/tests/text.rs index 240adf5235..bb6a355a9c 100644 --- a/golem-cli/tests/text.rs +++ b/golem-cli/tests/text.rs @@ -2,12 +2,12 @@ use crate::api_definition::{ golem_def, make_golem_file, make_open_api_file, make_shopping_cart_component, }; use crate::cli::{Cli, CliLive}; -use crate::worker::make_component; +use crate::worker::add_environment_service_component; use assert2::assert; use golem_cli::model::component::ComponentView; use golem_cli::model::Format; use golem_client::model::{ApiDeployment, HttpApiDefinitionWithTypeInfo}; -use golem_common::model::WorkerId; +use golem_common::model::TargetWorkerId; use golem_common::uri::oss::urn::{ComponentUrn, WorkerUrn}; use golem_test_framework::config::TestDependencies; use indoc::formatdoc; @@ -291,7 +291,8 @@ fn text_worker_add( ), ) -> Result<(), Failed> { let component_urn = - make_component(deps, &format!("{name} text worker add"), &cli)?.component_urn; + add_environment_service_component(deps, &format!("{name} text worker add"), &cli)? + .component_urn; let worker_name = format!("{name}_worker_add"); let cfg = &cli.config; let res = cli.with_format(Format::Text).run_string(&[ @@ -313,9 +314,9 @@ fn text_worker_add( &format!( "(?m)^Worker URN:.+{}$", WorkerUrn { - id: WorkerId { + id: TargetWorkerId { component_id: component_urn.id.clone(), - worker_name: worker_name.clone() + worker_name: Some(worker_name.clone()) } } ), @@ -339,8 +340,12 @@ fn text_worker_invoke_and_await( CliLive, ), ) -> Result<(), Failed> { - let component_urn = - make_component(deps, &format!("{name} text worker_invoke_and_await"), &cli)?.component_urn; + let component_urn = add_environment_service_component( + deps, + &format!("{name} text worker_invoke_and_await"), + &cli, + )? + .component_urn; let worker_name = format!("{name}_worker_invoke_and_await"); let cfg = &cli.config; let _: WorkerUrn = cli.run(&[ @@ -384,7 +389,8 @@ fn text_worker_get( ), ) -> Result<(), Failed> { let component_urn = - make_component(deps, &format!("{name} text worker get"), &cli)?.component_urn; + add_environment_service_component(deps, &format!("{name} text worker get"), &cli)? + .component_urn; let worker_name = format!("{name}_worker_get"); let cfg = &cli.config; let _: WorkerUrn = cli.run(&[ @@ -415,9 +421,9 @@ fn text_worker_get( &format!( "(?m)^Worker URN:.+{}$", WorkerUrn { - id: WorkerId { + id: TargetWorkerId { component_id: component_urn.id.clone(), - worker_name: worker_name.clone() + worker_name: Some(worker_name.clone()) } } ), @@ -451,7 +457,8 @@ fn text_worker_list( ), ) -> Result<(), Failed> { let component_urn = - make_component(deps, &format!("{name} text worker list"), &cli)?.component_urn; + add_environment_service_component(deps, &format!("{name} text worker list"), &cli)? + .component_urn; let worker_name = format!("{name:_<9}_worker_list"); let cfg = &cli.config; let _: WorkerUrn = cli.run(&[ diff --git a/golem-cli/tests/worker.rs b/golem-cli/tests/worker.rs index 9e24248797..b4d3f8536d 100644 --- a/golem-cli/tests/worker.rs +++ b/golem-cli/tests/worker.rs @@ -4,6 +4,7 @@ use assert2::assert; use golem_cli::model::component::ComponentView; use golem_cli::model::{Format, IdempotencyKey, WorkersMetadataResponseView}; use golem_client::model::UpdateRecord; +use golem_common::model::TargetWorkerId; use golem_common::uri::oss::url::{ComponentUrl, WorkerUrl}; use golem_common::uri::oss::urn::WorkerUrn; use golem_test_framework::config::TestDependencies; @@ -94,6 +95,16 @@ fn make( ctx.clone(), worker_invoke_indexed_resource, ), + Trial::test_in_context( + format!("worker_invoke_without_name{suffix}"), + ctx.clone(), + worker_invoke_without_name, + ), + Trial::test_in_context( + format!("worker_invoke_without_name_ephemeral{suffix}"), + ctx.clone(), + worker_invoke_without_name_ephemeral, + ), ] } @@ -118,7 +129,7 @@ pub fn all(deps: Arc) -> Vec, component_name: &str, cli: &CliLive, @@ -136,12 +147,31 @@ pub fn make_component_from_file( ]) } -pub fn make_component( +pub fn add_ephemeral_component_from_file( deps: Arc, component_name: &str, cli: &CliLive, + file: &str, ) -> Result { - make_component_from_file(deps, component_name, cli, "environment-service.wasm") + let env_service = deps.component_directory().join(file); + let cfg = &cli.config; + + cli.run(&[ + "component", + "add", + "--ephemeral", + &cfg.arg('c', "component-name"), + component_name, + env_service.to_str().unwrap(), + ]) +} + +pub fn add_environment_service_component( + deps: Arc, + component_name: &str, + cli: &CliLive, +) -> Result { + add_component_from_file(deps, component_name, cli, "environment-service.wasm") } fn component_ref_key(cfg: &CliConfig, ref_kind: RefKind) -> String { @@ -170,7 +200,8 @@ fn worker_new_instance( RefKind, ), ) -> Result<(), Failed> { - let component = make_component(deps, &format!("{name} worker new instance"), &cli)?; + let component = + add_environment_service_component(deps, &format!("{name} worker new instance"), &cli)?; let worker_name = format!("{name}_worker_new_instance"); let cfg = &cli.config; @@ -184,7 +215,7 @@ fn worker_new_instance( ])?; assert_eq!(worker_urn.id.component_id, component.component_urn.id); - assert_eq!(worker_urn.id.worker_name, worker_name); + assert_eq!(worker_urn.id.worker_name, Some(worker_name)); Ok(()) } @@ -208,16 +239,16 @@ fn worker_ref( RefKind::Url => { let url = WorkerUrl { component_name: component.component_name.clone(), - worker_name, + worker_name: Some(worker_name), }; vec![cfg.arg('W', "worker"), url.to_string()] } RefKind::Urn => { let urn = WorkerUrn { - id: golem_common::model::WorkerId { + id: TargetWorkerId { component_id: component.component_urn.id.clone(), - worker_name, + worker_name: Some(worker_name), }, }; @@ -234,7 +265,8 @@ fn worker_invoke_and_await( RefKind, ), ) -> Result<(), Failed> { - let component = make_component(deps, &format!("{name} worker_invoke_and_await"), &cli)?; + let component = + add_environment_service_component(deps, &format!("{name} worker_invoke_and_await"), &cli)?; let worker_name = format!("{name}_worker_invoke_and_await"); let cfg = &cli.config; let _: WorkerUrn = cli.run(&[ @@ -330,7 +362,7 @@ fn worker_invoke_and_await_wave_params( RefKind, ), ) -> Result<(), Failed> { - let component = make_component_from_file( + let component = add_component_from_file( deps, &format!("{name} worker_invoke_and_await_wave_params"), &cli, @@ -397,7 +429,7 @@ fn worker_invoke_drop( RefKind, ), ) -> Result<(), Failed> { - let component = make_component_from_file( + let component = add_component_from_file( deps, &format!("{name} worker_invoke_drop"), &cli, @@ -478,7 +510,8 @@ fn worker_invoke_no_params( RefKind, ), ) -> Result<(), Failed> { - let component = make_component(deps, &format!("{name} worker_invoke_no_params"), &cli)?; + let component = + add_environment_service_component(deps, &format!("{name} worker_invoke_no_params"), &cli)?; let worker_name = format!("{name}_worker_invoke_no_params"); let cfg = &cli.config; let _: WorkerUrn = cli.run(&[ @@ -510,7 +543,11 @@ fn worker_invoke_json_params( RefKind, ), ) -> Result<(), Failed> { - let component = make_component(deps, &format!("{name} worker_invoke_json_params"), &cli)?; + let component = add_environment_service_component( + deps, + &format!("{name} worker_invoke_json_params"), + &cli, + )?; let worker_name = format!("{name}_worker_invoke_json_params"); let cfg = &cli.config; let _: WorkerUrn = cli.run(&[ @@ -543,7 +580,7 @@ fn worker_invoke_wave_params( RefKind, ), ) -> Result<(), Failed> { - let component = make_component_from_file( + let component = add_component_from_file( deps, &format!("{name} worker_invoke_wave_params"), &cli, @@ -759,7 +796,7 @@ fn worker_list( RefKind, ), ) -> Result<(), Failed> { - let component = make_component(deps, &format!("{name} worker_list"), &cli)?; + let component = add_environment_service_component(deps, &format!("{name} worker_list"), &cli)?; let cfg = &cli.config; let workers_count = 10; @@ -786,7 +823,7 @@ fn worker_list( &component_ref_key(cfg, ref_kind), &component_ref_value(&component, ref_kind), &cfg.arg('f', "filter"), - format!("name = {}", worker_urn.id.worker_name).as_str(), + format!("name = {}", worker_urn.id.worker_name.unwrap_or_default()).as_str(), &cfg.arg('f', "filter"), "version >= 0", "--precise", @@ -937,7 +974,7 @@ fn worker_invoke_indexed_resource( RefKind, ), ) -> Result<(), Failed> { - let component = make_component_from_file( + let component = add_component_from_file( deps, &format!("{name}_worker_invoke_indexed_resource"), &cli, @@ -995,3 +1032,74 @@ fn worker_invoke_indexed_resource( Ok(()) } + +fn worker_invoke_without_name( + (deps, name, cli, _ref_kind): ( + Arc, + String, + CliLive, + RefKind, + ), +) -> Result<(), Failed> { + let component = add_environment_service_component( + deps, + &format!("{name} worker_invoke_without_name"), + &cli, + )?; + let cfg = &cli.config; + + let url = WorkerUrl { + component_name: component.component_name.clone(), + worker_name: None, + }; + + let result: Value = cli.run_json(&[ + "worker", + "invoke-and-await", + &cfg.arg('W', "worker"), + &url.to_string(), + &cfg.arg('f', "function"), + "golem:it/api.{get-environment}", + ])?; + + let path = serde_json_path::JsonPath::parse("$.value[0].ok")?; + let _node = path.query(&result).exactly_one()?; + + Ok(()) +} + +fn worker_invoke_without_name_ephemeral( + (deps, name, cli, _ref_kind): ( + Arc, + String, + CliLive, + RefKind, + ), +) -> Result<(), Failed> { + let component = add_ephemeral_component_from_file( + deps, + &format!("{name} worker_invoke_without_name_ephemeral"), + &cli, + "environment-service.wasm", + )?; + let cfg = &cli.config; + + let url = WorkerUrl { + component_name: component.component_name.clone(), + worker_name: None, + }; + + let result: Value = cli.run_json(&[ + "worker", + "invoke-and-await", + &cfg.arg('W', "worker"), + &url.to_string(), + &cfg.arg('f', "function"), + "golem:it/api.{get-environment}", + ])?; + + let path = serde_json_path::JsonPath::parse("$.value[0].ok")?; + let _node = path.query(&result).exactly_one()?; + + Ok(()) +} diff --git a/golem-common/src/grpc.rs b/golem-common/src/grpc.rs index 0717fbc69b..5b1d866ea2 100644 --- a/golem-common/src/grpc.rs +++ b/golem-common/src/grpc.rs @@ -17,7 +17,7 @@ use golem_api_grpc::proto::golem::common; use golem_api_grpc::proto::golem::component; use golem_api_grpc::proto::golem::worker; -use crate::model::{AccountId, ComponentId, IdempotencyKey, PromiseId, WorkerId}; +use crate::model::{AccountId, ComponentId, IdempotencyKey, PromiseId, TargetWorkerId, WorkerId}; pub fn proto_component_id_string(component_id: &Option) -> Option { component_id @@ -33,6 +33,13 @@ pub fn proto_worker_id_string(worker_id: &Option) -> Option) -> Option { + worker_id + .clone() + .and_then(|v| TryInto::::try_into(v).ok()) + .map(|v| v.to_string()) +} + pub fn proto_idempotency_key_string( idempotency_key: &Option, ) -> Option { diff --git a/golem-common/src/model/mod.rs b/golem-common/src/model/mod.rs index a77f4bfb40..5d79563888 100644 --- a/golem-common/src/model/mod.rs +++ b/golem-common/src/model/mod.rs @@ -239,16 +239,23 @@ pub struct WorkerId { } impl WorkerId { - pub fn slug(&self) -> String { - format!("{}/{}", self.component_id, self.worker_name) - } - pub fn to_redis_key(&self) -> String { format!("{}:{}", self.component_id.0, self.worker_name) } pub fn uri(&self) -> String { - WorkerUrn { id: self.clone() }.to_string() + WorkerUrn { + id: self.clone().into_target_worker_id(), + } + .to_string() + } + + /// The dual of `TargetWorkerId::into_worker_id` + pub fn into_target_worker_id(self) -> TargetWorkerId { + TargetWorkerId { + component_id: self.component_id, + worker_name: Some(self.worker_name), + } } } @@ -276,7 +283,7 @@ impl FromStr for WorkerId { impl Display for WorkerId { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.write_str(&self.slug()) + f.write_str(&format!("{}/{}", self.component_id, self.worker_name)) } } @@ -340,6 +347,116 @@ impl Display for OwnedWorkerId { } } +#[derive(Clone, Debug, Eq, PartialEq, Hash, Serialize, Deserialize, Encode, Decode)] +pub struct TargetWorkerId { + pub component_id: ComponentId, + pub worker_name: Option, +} + +impl TargetWorkerId { + pub fn uri(&self) -> String { + WorkerUrn { id: self.clone() }.to_string() + } + + /// Converts a `TargetWorkerId` to a `WorkerId` if the worker name is specified + pub fn try_into_worker_id(self) -> Option { + self.worker_name.map(|worker_name| WorkerId { + component_id: self.component_id, + worker_name, + }) + } + + /// Converts a `TargetWorkerId` to a `WorkerId`. If the worker name was not specified, + /// it generates a new unique one, and if the `force_in_shard` set is not empty, it guarantees + /// that the generated worker ID will belong to one of the provided shards. + /// + /// If the worker name was specified, `force_in_shard` is ignored. + pub fn into_worker_id( + self, + force_in_shard: &HashSet, + number_of_shards: usize, + ) -> WorkerId { + let TargetWorkerId { + component_id, + worker_name, + } = self; + match worker_name { + Some(worker_name) => WorkerId { + component_id, + worker_name, + }, + None => { + if force_in_shard.is_empty() || number_of_shards == 0 { + let worker_name = Uuid::new_v4().to_string(); + WorkerId { + component_id, + worker_name, + } + } else { + let mut current = Uuid::new_v4().to_u128_le(); + loop { + let uuid = Uuid::from_u128_le(current); + let worker_name = uuid.to_string(); + let worker_id = WorkerId { + component_id: component_id.clone(), + worker_name, + }; + let shard_id = ShardId::from_worker_id(&worker_id, number_of_shards); + if force_in_shard.contains(&shard_id) { + return worker_id; + } + current += 1; + } + } + } + } + } +} + +impl Display for TargetWorkerId { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match &self.worker_name { + Some(worker_name) => write!(f, "{}/{}", self.component_id, worker_name), + None => write!(f, "{}/*", self.component_id), + } + } +} + +impl From for TargetWorkerId { + fn from(value: WorkerId) -> Self { + value.into_target_worker_id() + } +} + +impl From<&WorkerId> for TargetWorkerId { + fn from(value: &WorkerId) -> Self { + value.clone().into_target_worker_id() + } +} + +impl TryFrom for TargetWorkerId { + type Error = String; + + fn try_from(value: golem::worker::TargetWorkerId) -> Result { + Ok(Self { + component_id: value + .component_id + .ok_or("Missing component_id")? + .try_into()?, + worker_name: value.name, + }) + } +} + +impl From for golem::worker::TargetWorkerId { + fn from(value: TargetWorkerId) -> Self { + Self { + component_id: Some(value.component_id.into()), + name: value.worker_name, + } + } +} + #[derive(Clone, Debug, Eq, PartialEq, Hash, Encode, Decode, Serialize, Deserialize, Object)] #[serde(rename_all = "camelCase")] #[oai(rename_all = "camelCase")] @@ -2173,7 +2290,7 @@ impl TryFrom for golem_api_grpc::proto::golem::worker::LogEvent { } } -#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, Enum)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, Enum)] #[repr(i32)] pub enum ComponentType { Durable = 0, @@ -2220,16 +2337,18 @@ impl From for golem_api_grpc::proto::golem::component::ComponentT #[cfg(test)] mod tests { + use std::collections::HashSet; use std::str::FromStr; + use std::time::SystemTime; use std::vec; - use bincode::{Decode, Encode}; - use serde::{Deserialize, Serialize}; - use crate::model::{ - AccountId, ComponentId, FilterComparator, StringFilterComparator, Timestamp, WorkerFilter, - WorkerId, WorkerMetadata, WorkerStatus, WorkerStatusRecord, + AccountId, ComponentId, FilterComparator, ShardId, StringFilterComparator, TargetWorkerId, + Timestamp, WorkerFilter, WorkerId, WorkerMetadata, WorkerStatus, WorkerStatusRecord, }; + use bincode::{Decode, Encode}; + use rand::{thread_rng, Rng}; + use serde::{Deserialize, Serialize}; #[test] fn timestamp_conversion() { @@ -2466,4 +2585,39 @@ mod tests { )) .matches(&worker_metadata)); } + + #[test] + fn target_worker_id_force_shards() { + let mut rng = thread_rng(); + const SHARD_COUNT: usize = 1000; + const EXAMPLE_COUNT: usize = 1000; + for _ in 0..EXAMPLE_COUNT { + let mut shard_ids = HashSet::new(); + let count = rng.gen_range(0..100); + for _ in 0..count { + let shard_id = rng.gen_range(0..SHARD_COUNT); + shard_ids.insert(ShardId { + value: shard_id as i64, + }); + } + + let component_id = ComponentId::new_v4(); + let target_worker_id = TargetWorkerId { + component_id, + worker_name: None, + }; + + let start = SystemTime::now(); + let worker_id = target_worker_id.into_worker_id(&shard_ids, SHARD_COUNT); + let end = SystemTime::now(); + println!( + "Time with {count} valid shards: {:?}", + end.duration_since(start).unwrap() + ); + + if !shard_ids.is_empty() { + assert!(shard_ids.contains(&ShardId::from_worker_id(&worker_id, SHARD_COUNT))); + } + } + } } diff --git a/golem-common/src/model/oplog.rs b/golem-common/src/model/oplog.rs index 569c9c27fd..750b736caf 100644 --- a/golem-common/src/model/oplog.rs +++ b/golem-common/src/model/oplog.rs @@ -353,6 +353,8 @@ pub enum OplogEntry { context: String, message: String, }, + /// Marks the point where the worker was restarted from clean initial state + Restart { timestamp: Timestamp }, } impl OplogEntry { @@ -524,6 +526,12 @@ impl OplogEntry { } } + pub fn restart() -> OplogEntry { + OplogEntry::Restart { + timestamp: Timestamp::now_utc(), + } + } + pub fn is_end_atomic_region(&self, idx: OplogIndex) -> bool { matches!(self, OplogEntry::EndAtomicRegion { begin_index, .. } if *begin_index == idx) } @@ -572,6 +580,7 @@ impl OplogEntry { | OplogEntry::DropResource { .. } | OplogEntry::DescribeResource { .. } | OplogEntry::Log { .. } + | OplogEntry::Restart { .. } ) } @@ -599,8 +608,9 @@ impl OplogEntry { | OplogEntry::GrowMemory { timestamp, .. } | OplogEntry::CreateResource { timestamp, .. } | OplogEntry::DropResource { timestamp, .. } - | OplogEntry::DescribeResource { timestamp, .. } => *timestamp, - OplogEntry::Log { timestamp, .. } => *timestamp, + | OplogEntry::DescribeResource { timestamp, .. } + | OplogEntry::Log { timestamp, .. } + | OplogEntry::Restart { timestamp } => *timestamp, } } } diff --git a/golem-common/src/uri/cloud/uri.rs b/golem-common/src/uri/cloud/uri.rs index 409f6258bf..66c1aa53fe 100644 --- a/golem-common/src/uri/cloud/uri.rs +++ b/golem-common/src/uri/cloud/uri.rs @@ -177,7 +177,7 @@ uri_from_into!(ResourceUri); #[cfg(test)] mod tests { - use crate::model::{ComponentId, WorkerId}; + use crate::model::{ComponentId, TargetWorkerId, WorkerId}; use crate::uri::cloud::uri::{ ApiDefinitionUri, ApiDeploymentUri, ComponentOrVersionUri, ComponentUri, ComponentVersionUri, ResourceUri, WorkerFunctionUri, WorkerOrFunctionUri, WorkerUri, @@ -375,15 +375,15 @@ mod tests { pub fn worker_uri_to_uri() { let typed_url = WorkerUri::URL(WorkerUrl { component_name: "my comp".to_string(), - worker_name: "my worker".to_string(), + worker_name: Some("my worker".to_string()), project: None, }); let typed_urn = WorkerUri::URN(WorkerUrn { - id: WorkerId { + id: TargetWorkerId { component_id: ComponentId( Uuid::parse_str("679ae459-8700-41d9-920c-7e2887459c94").unwrap(), ), - worker_name: "my worker".to_string(), + worker_name: Some("my worker".to_string()), }, }); @@ -396,6 +396,31 @@ mod tests { ); } + #[test] + pub fn worker_uri_to_uri_no_name() { + let typed_url = WorkerUri::URL(WorkerUrl { + component_name: "my comp".to_string(), + worker_name: None, + project: None, + }); + let typed_urn = WorkerUri::URN(WorkerUrn { + id: TargetWorkerId { + component_id: ComponentId( + Uuid::parse_str("679ae459-8700-41d9-920c-7e2887459c94").unwrap(), + ), + worker_name: None, + }, + }); + + let untyped_url: GolemUri = typed_url.into(); + let untyped_urn: GolemUri = typed_urn.into(); + assert_eq!(untyped_url.to_string(), "worker:///my+comp"); + assert_eq!( + untyped_urn.to_string(), + "urn:worker:679ae459-8700-41d9-920c-7e2887459c94" + ); + } + #[test] pub fn worker_uri_from_uri() { let untyped_url = GolemUri::from_str("worker:///my+comp/my+worker").unwrap(); @@ -412,12 +437,35 @@ mod tests { }; assert_eq!(typed_url.component_name, "my comp"); - assert_eq!(typed_url.worker_name, "my worker"); + assert_eq!(typed_url.worker_name, Some("my worker".to_string())); assert_eq!( typed_urn.id.component_id.0.to_string(), "679ae459-8700-41d9-920c-7e2887459c94" ); - assert_eq!(typed_urn.id.worker_name, "my worker"); + assert_eq!(typed_urn.id.worker_name, Some("my worker".to_string())); + } + + #[test] + pub fn worker_uri_from_uri_no_name() { + let untyped_url = GolemUri::from_str("worker:///my+comp").unwrap(); + let untyped_urn = + GolemUri::from_str("urn:worker:679ae459-8700-41d9-920c-7e2887459c94").unwrap(); + let typed_url: WorkerUri = untyped_url.try_into().unwrap(); + let typed_urn: WorkerUri = untyped_urn.try_into().unwrap(); + let WorkerUri::URL(typed_url) = typed_url else { + panic!() + }; + let WorkerUri::URN(typed_urn) = typed_urn else { + panic!() + }; + + assert_eq!(typed_url.component_name, "my comp"); + assert_eq!(typed_url.worker_name, None); + assert_eq!( + typed_urn.id.component_id.0.to_string(), + "679ae459-8700-41d9-920c-7e2887459c94" + ); + assert_eq!(typed_urn.id.worker_name, None); } #[test] @@ -434,12 +482,33 @@ mod tests { }; assert_eq!(typed_url.component_name, "my comp"); - assert_eq!(typed_url.worker_name, "my worker"); + assert_eq!(typed_url.worker_name, Some("my worker".to_string())); assert_eq!( typed_urn.id.component_id.0.to_string(), "679ae459-8700-41d9-920c-7e2887459c94" ); - assert_eq!(typed_urn.id.worker_name, "my worker"); + assert_eq!(typed_urn.id.worker_name, Some("my worker".to_string())); + } + + #[test] + pub fn worker_uri_from_str_no_name() { + let typed_url = WorkerUri::from_str("worker:///my+comp").unwrap(); + let typed_urn = + WorkerUri::from_str("urn:worker:679ae459-8700-41d9-920c-7e2887459c94").unwrap(); + let WorkerUri::URL(typed_url) = typed_url else { + panic!() + }; + let WorkerUri::URN(typed_urn) = typed_urn else { + panic!() + }; + + assert_eq!(typed_url.component_name, "my comp"); + assert_eq!(typed_url.worker_name, None); + assert_eq!( + typed_urn.id.component_id.0.to_string(), + "679ae459-8700-41d9-920c-7e2887459c94" + ); + assert_eq!(typed_urn.id.worker_name, None); } #[test] @@ -530,11 +599,11 @@ mod tests { project: None, })); let typed_urn = WorkerOrFunctionUri::URN(WorkerOrFunctionUrn::Worker(WorkerUrn { - id: WorkerId { + id: TargetWorkerId { component_id: ComponentId( Uuid::parse_str("679ae459-8700-41d9-920c-7e2887459c94").unwrap(), ), - worker_name: "my worker".to_string(), + worker_name: Some("my worker".to_string()), }, })); diff --git a/golem-common/src/uri/cloud/url.rs b/golem-common/src/uri/cloud/url.rs index b597b5d0a2..123d3922ef 100644 --- a/golem-common/src/uri/cloud/url.rs +++ b/golem-common/src/uri/cloud/url.rs @@ -1,7 +1,7 @@ use crate::model::ComponentVersion; use crate::uri::cloud::{ACCOUNT_TYPE_NAME, PROJECT_TYPE_NAME}; use crate::uri::{ - try_from_golem_url, urlencode, GolemUrl, GolemUrlTransformError, TypedGolemUrl, + try_from_golem_url, urldecode, urlencode, GolemUrl, GolemUrlTransformError, TypedGolemUrl, API_DEFINITION_TYPE_NAME, API_DEPLOYMENT_TYPE_NAME, COMPONENT_TYPE_NAME, WORKER_TYPE_NAME, }; use crate::url_from_into; @@ -272,7 +272,7 @@ url_from_into!(ComponentOrVersionUrl); #[derive(Debug, Clone, Eq, PartialEq)] pub struct WorkerUrl { pub component_name: String, - pub worker_name: String, + pub worker_name: Option, pub project: Option, } @@ -299,20 +299,42 @@ impl TypedGolemUrl for WorkerUrl { where Self: Sized, { - let (component_name, worker_name) = Self::expect_path2(path)?; - - let project = Self::expect_project_query(query)?; - - Ok(Self { - component_name, - worker_name, - project, - }) + let path = path + .strip_prefix('/') + .ok_or(Self::invalid_path("path is not started with '/'"))?; + let segments = path.split('/').collect::>(); + + if segments.len() != 1 && segments.len() != 2 { + Err(Self::invalid_path(format!( + "1 or 2 segments expected, but got {} segments", + segments.len() + ))) + } else { + let project = Self::expect_project_query(query)?; + + let component_name = urldecode(segments[0]); + if segments.len() == 2 { + Ok(Self { + component_name, + worker_name: Some(urldecode(segments[1])), + project, + }) + } else { + Ok(Self { + component_name, + worker_name: None, + project, + }) + } + } } fn to_parts(&self) -> (String, Option) { ( - Self::make_path2(&self.component_name, &self.worker_name), + match &self.worker_name { + Some(worker_name) => Self::make_path2(&self.component_name, worker_name), + None => Self::make_path1(&self.component_name), + }, to_project_query(&self.project), ) } @@ -795,7 +817,7 @@ mod tests { pub fn worker_url_to_url() { let typed = WorkerUrl { component_name: "my component".to_string(), - worker_name: "my worker".to_string(), + worker_name: Some("my worker".to_string()), project: None, }; @@ -803,13 +825,34 @@ mod tests { assert_eq!(untyped.to_string(), "worker:///my+component/my+worker"); } + #[test] + pub fn worker_url_to_url_no_name() { + let typed = WorkerUrl { + component_name: "my component".to_string(), + worker_name: None, + project: None, + }; + + let untyped: GolemUrl = typed.into(); + assert_eq!(untyped.to_string(), "worker:///my+component"); + } + #[test] pub fn worker_url_from_url() { let untyped = GolemUrl::from_str("worker:///my+component/my+worker").unwrap(); let typed: WorkerUrl = untyped.try_into().unwrap(); assert_eq!(typed.component_name, "my component"); - assert_eq!(typed.worker_name, "my worker"); + assert_eq!(typed.worker_name, Some("my worker".to_string())); + } + + #[test] + pub fn worker_url_from_url_no_name() { + let untyped = GolemUrl::from_str("worker:///my+component").unwrap(); + let typed: WorkerUrl = untyped.try_into().unwrap(); + + assert_eq!(typed.component_name, "my component"); + assert_eq!(typed.worker_name, None); } #[test] @@ -817,7 +860,15 @@ mod tests { let typed = WorkerUrl::from_str("worker:///my+component/my+worker").unwrap(); assert_eq!(typed.component_name, "my component"); - assert_eq!(typed.worker_name, "my worker"); + assert_eq!(typed.worker_name, Some("my worker".to_string())); + } + + #[test] + pub fn worker_url_from_str_no_name() { + let typed = WorkerUrl::from_str("worker:///my+component").unwrap(); + + assert_eq!(typed.component_name, "my component"); + assert_eq!(typed.worker_name, None); } #[test] @@ -856,7 +907,7 @@ mod tests { pub fn worker_or_function_url_to_url() { let typed_w = WorkerOrFunctionUrl::Worker(WorkerUrl { component_name: "my component".to_string(), - worker_name: "my worker".to_string(), + worker_name: Some("my worker".to_string()), project: None, }); let typed_f = WorkerOrFunctionUrl::Function(WorkerFunctionUrl { diff --git a/golem-common/src/uri/oss/uri.rs b/golem-common/src/uri/oss/uri.rs index f0310cb0c9..47ac17df27 100644 --- a/golem-common/src/uri/oss/uri.rs +++ b/golem-common/src/uri/oss/uri.rs @@ -131,7 +131,7 @@ uri_from_into!(ResourceUri); #[cfg(test)] mod tests { - use crate::model::{ComponentId, WorkerId}; + use crate::model::{ComponentId, TargetWorkerId, WorkerId}; use crate::uri::oss::uri::{ ApiDefinitionUri, ApiDeploymentUri, ComponentOrVersionUri, ComponentUri, ComponentVersionUri, ResourceUri, WorkerFunctionUri, WorkerOrFunctionUri, WorkerUri, @@ -326,14 +326,14 @@ mod tests { pub fn worker_uri_to_uri() { let typed_url = WorkerUri::URL(WorkerUrl { component_name: "my comp".to_string(), - worker_name: "my worker".to_string(), + worker_name: Some("my worker".to_string()), }); let typed_urn = WorkerUri::URN(WorkerUrn { - id: WorkerId { + id: TargetWorkerId { component_id: ComponentId( Uuid::parse_str("679ae459-8700-41d9-920c-7e2887459c94").unwrap(), ), - worker_name: "my worker".to_string(), + worker_name: Some("my worker".to_string()), }, }); @@ -346,6 +346,30 @@ mod tests { ); } + #[test] + pub fn worker_uri_to_uri_no_name() { + let typed_url = WorkerUri::URL(WorkerUrl { + component_name: "my comp".to_string(), + worker_name: None, + }); + let typed_urn = WorkerUri::URN(WorkerUrn { + id: TargetWorkerId { + component_id: ComponentId( + Uuid::parse_str("679ae459-8700-41d9-920c-7e2887459c94").unwrap(), + ), + worker_name: None, + }, + }); + + let untyped_url: GolemUri = typed_url.into(); + let untyped_urn: GolemUri = typed_urn.into(); + assert_eq!(untyped_url.to_string(), "worker:///my+comp"); + assert_eq!( + untyped_urn.to_string(), + "urn:worker:679ae459-8700-41d9-920c-7e2887459c94" + ); + } + #[test] pub fn worker_uri_from_uri() { let untyped_url = GolemUri::from_str("worker:///my+comp/my+worker").unwrap(); @@ -362,12 +386,35 @@ mod tests { }; assert_eq!(typed_url.component_name, "my comp"); - assert_eq!(typed_url.worker_name, "my worker"); + assert_eq!(typed_url.worker_name, Some("my worker".to_string())); assert_eq!( typed_urn.id.component_id.0.to_string(), "679ae459-8700-41d9-920c-7e2887459c94" ); - assert_eq!(typed_urn.id.worker_name, "my worker"); + assert_eq!(typed_urn.id.worker_name, Some("my worker".to_string())); + } + + #[test] + pub fn worker_uri_from_uri_no_name() { + let untyped_url = GolemUri::from_str("worker:///my+comp").unwrap(); + let untyped_urn = + GolemUri::from_str("urn:worker:679ae459-8700-41d9-920c-7e2887459c94").unwrap(); + let typed_url: WorkerUri = untyped_url.try_into().unwrap(); + let typed_urn: WorkerUri = untyped_urn.try_into().unwrap(); + let WorkerUri::URL(typed_url) = typed_url else { + panic!() + }; + let WorkerUri::URN(typed_urn) = typed_urn else { + panic!() + }; + + assert_eq!(typed_url.component_name, "my comp"); + assert_eq!(typed_url.worker_name, None); + assert_eq!( + typed_urn.id.component_id.0.to_string(), + "679ae459-8700-41d9-920c-7e2887459c94" + ); + assert_eq!(typed_urn.id.worker_name, None); } #[test] @@ -384,12 +431,33 @@ mod tests { }; assert_eq!(typed_url.component_name, "my comp"); - assert_eq!(typed_url.worker_name, "my worker"); + assert_eq!(typed_url.worker_name, Some("my worker".to_string())); assert_eq!( typed_urn.id.component_id.0.to_string(), "679ae459-8700-41d9-920c-7e2887459c94" ); - assert_eq!(typed_urn.id.worker_name, "my worker"); + assert_eq!(typed_urn.id.worker_name, Some("my worker".to_string())); + } + + #[test] + pub fn worker_uri_from_str_no_name() { + let typed_url = WorkerUri::from_str("worker:///my+comp").unwrap(); + let typed_urn = + WorkerUri::from_str("urn:worker:679ae459-8700-41d9-920c-7e2887459c94").unwrap(); + let WorkerUri::URL(typed_url) = typed_url else { + panic!() + }; + let WorkerUri::URN(typed_urn) = typed_urn else { + panic!() + }; + + assert_eq!(typed_url.component_name, "my comp"); + assert_eq!(typed_url.worker_name, None); + assert_eq!( + typed_urn.id.component_id.0.to_string(), + "679ae459-8700-41d9-920c-7e2887459c94" + ); + assert_eq!(typed_urn.id.worker_name, None); } #[test] @@ -478,11 +546,11 @@ mod tests { function: "fn a".to_string(), })); let typed_urn = WorkerOrFunctionUri::URN(WorkerOrFunctionUrn::Worker(WorkerUrn { - id: WorkerId { + id: TargetWorkerId { component_id: ComponentId( Uuid::parse_str("679ae459-8700-41d9-920c-7e2887459c94").unwrap(), ), - worker_name: "my worker".to_string(), + worker_name: Some("my worker".to_string()), }, })); diff --git a/golem-common/src/uri/oss/url.rs b/golem-common/src/uri/oss/url.rs index 64573a3ce3..0322145c23 100644 --- a/golem-common/src/uri/oss/url.rs +++ b/golem-common/src/uri/oss/url.rs @@ -14,8 +14,8 @@ use crate::model::ComponentVersion; use crate::uri::{ - try_from_golem_url, GolemUrl, GolemUrlTransformError, TypedGolemUrl, API_DEFINITION_TYPE_NAME, - API_DEPLOYMENT_TYPE_NAME, COMPONENT_TYPE_NAME, WORKER_TYPE_NAME, + try_from_golem_url, urldecode, GolemUrl, GolemUrlTransformError, TypedGolemUrl, + API_DEFINITION_TYPE_NAME, API_DEPLOYMENT_TYPE_NAME, COMPONENT_TYPE_NAME, WORKER_TYPE_NAME, }; use crate::url_from_into; use serde::{Deserialize, Deserializer, Serialize, Serializer}; @@ -136,10 +136,11 @@ url_from_into!(ComponentOrVersionUrl); /// Typed Golem URL for worker /// /// Format: `worker:///{component_name}/{worker_name}` +/// or `worker:///{component_name}` for targeting a new ephemeral worker #[derive(Debug, Clone, Eq, PartialEq)] pub struct WorkerUrl { pub component_name: String, - pub worker_name: String, + pub worker_name: Option, } impl TypedGolemUrl for WorkerUrl { @@ -151,19 +152,40 @@ impl TypedGolemUrl for WorkerUrl { where Self: Sized, { - let (component_name, worker_name) = Self::expect_path2(path)?; - - Self::expect_empty_query(query, CLOUD_CONTEXT)?; - - Ok(Self { - component_name, - worker_name, - }) + let path = path + .strip_prefix('/') + .ok_or(Self::invalid_path("path is not started with '/'"))?; + let segments = path.split('/').collect::>(); + + if segments.len() != 1 && segments.len() != 2 { + Err(Self::invalid_path(format!( + "1 or 2 segments expected, but got {} segments", + segments.len() + ))) + } else { + Self::expect_empty_query(query, CLOUD_CONTEXT)?; + + let component_name = urldecode(segments[0]); + if segments.len() == 2 { + Ok(Self { + component_name, + worker_name: Some(urldecode(segments[1])), + }) + } else { + Ok(Self { + component_name, + worker_name: None, + }) + } + } } fn to_parts(&self) -> (String, Option) { ( - Self::make_path2(&self.component_name, &self.worker_name), + match &self.worker_name { + Some(worker_name) => Self::make_path2(&self.component_name, worker_name), + None => Self::make_path1(&self.component_name), + }, None, ) } @@ -505,20 +527,40 @@ mod tests { pub fn worker_url_to_url() { let typed = WorkerUrl { component_name: "my component".to_string(), - worker_name: "my worker".to_string(), + worker_name: Some("my worker".to_string()), }; let untyped: GolemUrl = typed.into(); assert_eq!(untyped.to_string(), "worker:///my+component/my+worker"); } + #[test] + pub fn worker_url_to_url_no_name() { + let typed = WorkerUrl { + component_name: "my component".to_string(), + worker_name: None, + }; + + let untyped: GolemUrl = typed.into(); + assert_eq!(untyped.to_string(), "worker:///my+component"); + } + #[test] pub fn worker_url_from_url() { let untyped = GolemUrl::from_str("worker:///my+component/my+worker").unwrap(); let typed: WorkerUrl = untyped.try_into().unwrap(); assert_eq!(typed.component_name, "my component"); - assert_eq!(typed.worker_name, "my worker"); + assert_eq!(typed.worker_name, Some("my worker".to_string())); + } + + #[test] + pub fn worker_url_from_url_no_name() { + let untyped = GolemUrl::from_str("worker:///my+component").unwrap(); + let typed: WorkerUrl = untyped.try_into().unwrap(); + + assert_eq!(typed.component_name, "my component"); + assert_eq!(typed.worker_name, None); } #[test] @@ -526,7 +568,15 @@ mod tests { let typed = WorkerUrl::from_str("worker:///my+component/my+worker").unwrap(); assert_eq!(typed.component_name, "my component"); - assert_eq!(typed.worker_name, "my worker"); + assert_eq!(typed.worker_name, Some("my worker".to_string())); + } + + #[test] + pub fn worker_url_from_str_no_name() { + let typed = WorkerUrl::from_str("worker:///my+component").unwrap(); + + assert_eq!(typed.component_name, "my component"); + assert_eq!(typed.worker_name, None); } #[test] @@ -564,7 +614,7 @@ mod tests { pub fn worker_or_function_url_to_url() { let typed_w = WorkerOrFunctionUrl::Worker(WorkerUrl { component_name: "my component".to_string(), - worker_name: "my worker".to_string(), + worker_name: Some("my worker".to_string()), }); let typed_f = WorkerOrFunctionUrl::Function(WorkerFunctionUrl { component_name: "my component".to_string(), diff --git a/golem-common/src/uri/oss/urn.rs b/golem-common/src/uri/oss/urn.rs index e5bfd64569..e7cd64bf7c 100644 --- a/golem-common/src/uri/oss/urn.rs +++ b/golem-common/src/uri/oss/urn.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::model::{ComponentId, ComponentVersion, WorkerId}; +use crate::model::{ComponentId, ComponentVersion, TargetWorkerId, WorkerId}; use crate::uri::{ try_from_golem_urn, urldecode, urlencode, GolemUrn, GolemUrnTransformError, TypedGolemUrn, API_DEFINITION_TYPE_NAME, API_DEPLOYMENT_TYPE_NAME, COMPONENT_TYPE_NAME, WORKER_TYPE_NAME, @@ -140,7 +140,22 @@ urn_from_into!(ComponentOrVersionUrn); /// Typed Golem URN for worker #[derive(Debug, Clone, Eq, PartialEq)] pub struct WorkerUrn { - pub id: WorkerId, + pub id: TargetWorkerId, +} + +impl WorkerUrn { + pub fn worker_id(&self) -> Result { + match &self.id.worker_name { + Some(name) => Ok(WorkerId { + component_id: self.id.component_id.clone(), + worker_name: name.clone(), + }), + None => Err(GolemUrnTransformError::invalid_name( + Self::resource_type(), + "Worker name expected".to_string(), + )), + } + } } impl TypedGolemUrn for WorkerUrn { @@ -160,25 +175,35 @@ impl TypedGolemUrn for WorkerUrn { let worker_name = urldecode(worker_name); Ok(Self { - id: WorkerId { + id: TargetWorkerId { component_id: ComponentId(id), - worker_name, + worker_name: Some(worker_name), }, }) } else { - Err(GolemUrnTransformError::invalid_name( - Self::resource_type(), - "Worker name expected".to_string(), - )) + let id = Uuid::parse_str(resource_name).map_err(|err| { + GolemUrnTransformError::invalid_name( + Self::resource_type(), + format!("Can't parse UUID: {err}"), + ) + })?; + + Ok(Self { + id: TargetWorkerId { + component_id: ComponentId(id), + worker_name: None, + }, + }) } } fn to_name(&self) -> String { - format!( - "{}/{}", - self.id.component_id.0, - urlencode(&self.id.worker_name) - ) + match self.id.worker_name { + Some(ref worker_name) => { + format!("{}/{}", self.id.component_id.0, urlencode(worker_name)) + } + None => self.id.component_id.0.to_string(), + } } } @@ -433,7 +458,7 @@ impl Display for ResourceUrn { #[cfg(test)] mod tests { - use crate::model::{ComponentId, WorkerId}; + use crate::model::{ComponentId, TargetWorkerId, WorkerId}; use crate::uri::oss::urn::{ ApiDefinitionUrn, ApiDeploymentUrn, ComponentOrVersionUrn, ComponentUrn, ComponentVersionUrn, ResourceUrn, WorkerFunctionUrn, WorkerOrFunctionUrn, WorkerUrn, @@ -576,11 +601,11 @@ mod tests { #[test] pub fn worker_urn_to_urn() { let typed = WorkerUrn { - id: WorkerId { + id: TargetWorkerId { component_id: ComponentId( Uuid::parse_str("679ae459-8700-41d9-920c-7e2887459c94").unwrap(), ), - worker_name: "my:worker/1".to_string(), + worker_name: Some("my:worker/1".to_string()), }, }; @@ -591,6 +616,24 @@ mod tests { ); } + #[test] + pub fn worker_urn_to_urn_no_name() { + let typed = WorkerUrn { + id: TargetWorkerId { + component_id: ComponentId( + Uuid::parse_str("679ae459-8700-41d9-920c-7e2887459c94").unwrap(), + ), + worker_name: None, + }, + }; + + let untyped: GolemUrn = typed.into(); + assert_eq!( + untyped.to_string(), + "urn:worker:679ae459-8700-41d9-920c-7e2887459c94" + ); + } + #[test] pub fn worker_urn_from_urn() { let untyped = @@ -602,7 +645,20 @@ mod tests { typed.id.component_id.0.to_string(), "679ae459-8700-41d9-920c-7e2887459c94" ); - assert_eq!(typed.id.worker_name, "my:worker/1"); + assert_eq!(typed.id.worker_name, Some("my:worker/1".to_string())); + } + + #[test] + pub fn worker_urn_from_urn_no_name() { + let untyped = + GolemUrn::from_str("urn:worker:679ae459-8700-41d9-920c-7e2887459c94").unwrap(); + let typed: WorkerUrn = untyped.try_into().unwrap(); + + assert_eq!( + typed.id.component_id.0.to_string(), + "679ae459-8700-41d9-920c-7e2887459c94" + ); + assert_eq!(typed.id.worker_name, None); } #[test] @@ -615,7 +671,18 @@ mod tests { typed.id.component_id.0.to_string(), "679ae459-8700-41d9-920c-7e2887459c94" ); - assert_eq!(typed.id.worker_name, "my:worker/1"); + assert_eq!(typed.id.worker_name, Some("my:worker/1".to_string())); + } + + #[test] + pub fn worker_urn_from_str_no_name() { + let typed = WorkerUrn::from_str("urn:worker:679ae459-8700-41d9-920c-7e2887459c94").unwrap(); + + assert_eq!( + typed.id.component_id.0.to_string(), + "679ae459-8700-41d9-920c-7e2887459c94" + ); + assert_eq!(typed.id.worker_name, None); } #[test] @@ -671,11 +738,11 @@ mod tests { #[test] pub fn worker_or_function_urn_to_urn() { let typed_w = WorkerOrFunctionUrn::Worker(WorkerUrn { - id: WorkerId { + id: TargetWorkerId { component_id: ComponentId( Uuid::parse_str("679ae459-8700-41d9-920c-7e2887459c94").unwrap(), ), - worker_name: "my:worker/1".to_string(), + worker_name: Some("my:worker/1".to_string()), }, }); let typed_f = WorkerOrFunctionUrn::Function(WorkerFunctionUrn { @@ -745,16 +812,6 @@ mod tests { ); } - #[test] - pub fn worker_or_function_error() { - let res = WorkerOrFunctionUrn::from_str("urn:worker:679ae459-8700-41d9-920c-7e2887459c94"); - - assert_eq!( - res.err().unwrap().to_string(), - "Failed to parse URN of type worker: Worker name expected" - ); - } - #[test] pub fn api_definition_urn_to_urn() { let typed = ApiDefinitionUrn { diff --git a/golem-component-service-base/src/model.rs b/golem-component-service-base/src/model.rs index 8f6ff9df42..d92f0b3e03 100644 --- a/golem-component-service-base/src/model.rs +++ b/golem-component-service-base/src/model.rs @@ -44,7 +44,7 @@ impl From> for golem_service_base::model::Compon impl From> for golem_api_grpc::proto::golem::component::Component { fn from(value: Component) -> Self { let component_type: golem_api_grpc::proto::golem::component::ComponentType = - value.component_type.clone().into(); + value.component_type.into(); Self { versioned_component_id: Some(value.versioned_component_id.into()), component_name: value.component_name.0, diff --git a/golem-component-service-base/src/repo/mod.rs b/golem-component-service-base/src/repo/mod.rs index 71ba9a9a10..a32ccc71d8 100644 --- a/golem-component-service-base/src/repo/mod.rs +++ b/golem-component-service-base/src/repo/mod.rs @@ -14,22 +14,33 @@ pub mod component; +use sqlx::error::ErrorKind; use std::fmt::Display; #[derive(Debug)] pub enum RepoError { Internal(String), + UniqueViolation(String), } impl From for RepoError { fn from(error: sqlx::Error) -> Self { - RepoError::Internal(error.to_string()) + if let Some(db_error) = error.as_database_error() { + if db_error.kind() == ErrorKind::UniqueViolation { + RepoError::UniqueViolation(db_error.to_string()) + } else { + RepoError::Internal(db_error.to_string()) + } + } else { + RepoError::Internal(error.to_string()) + } } } impl Display for RepoError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { + RepoError::UniqueViolation(error) => write!(f, "{}", error), RepoError::Internal(error) => write!(f, "{}", error), } } diff --git a/golem-component-service-base/src/service/component.rs b/golem-component-service-base/src/service/component.rs index 7783d12a1e..4ac02e793d 100644 --- a/golem-component-service-base/src/service/component.rs +++ b/golem-component-service-base/src/service/component.rs @@ -232,7 +232,10 @@ where .try_into() .map_err(|e| ComponentError::internal(e, "Failed to convert record"))?; - self.component_repo.create(&record).await?; + let result = self.component_repo.create(&record).await; + if let Err(RepoError::UniqueViolation(_)) = result { + Err(ComponentError::AlreadyExists(component_id.clone()))?; + } self.component_compilation .enqueue_compilation(component_id, component.versioned_component_id.version) diff --git a/golem-router/golem-services.conf.template b/golem-router/golem-services.conf.template index 1d9aa086b5..83ff7f7b1f 100644 --- a/golem-router/golem-services.conf.template +++ b/golem-router/golem-services.conf.template @@ -27,6 +27,14 @@ http { proxy_pass http://$GOLEM_WORKER_SERVICE_HOST:$GOLEM_WORKER_SERVICE_PORT; } + location ~ /v1/components/[^/]+/invoke$ { + proxy_pass http://$GOLEM_WORKER_SERVICE_HOST:$GOLEM_WORKER_SERVICE_PORT; + } + + location ~ /v1/components/[^/]+/invoke-and-await$ { + proxy_pass http://$GOLEM_WORKER_SERVICE_HOST:$GOLEM_WORKER_SERVICE_PORT; + } + location /v1/components { proxy_pass http://$GOLEM_COMPONENT_SERVICE_HOST:$GOLEM_COMPONENT_SERVICE_PORT; } diff --git a/golem-router/golem-services.local.conf b/golem-router/golem-services.local.conf index 6fe4a39b9f..5818d5b3f3 100644 --- a/golem-router/golem-services.local.conf +++ b/golem-router/golem-services.local.conf @@ -27,6 +27,14 @@ http { proxy_pass http://localhost:9005; } + location ~ /v1/components/[^/]+/invoke$ { + proxy_pass http://localhost:9005; + } + + location ~ /v1/components/[^/]+/invoke-and-await$ { + proxy_pass http://localhost:9005; + } + location /v1/components { proxy_pass http://localhost:8083; } diff --git a/golem-test-framework/src/components/component_service/filesystem.rs b/golem-test-framework/src/components/component_service/filesystem.rs index a2558f50ff..b33660cec4 100644 --- a/golem-test-framework/src/components/component_service/filesystem.rs +++ b/golem-test-framework/src/components/component_service/filesystem.rs @@ -15,7 +15,7 @@ use crate::components::component_service::{AddComponentError, ComponentService}; use async_trait::async_trait; use golem_api_grpc::proto::golem::component::v1::component_service_client::ComponentServiceClient; -use golem_common::model::ComponentId; +use golem_common::model::{ComponentId, ComponentType}; use std::path::{Path, PathBuf}; use tonic::transport::Channel; use tracing::{debug, info}; @@ -40,13 +40,21 @@ impl ComponentService for FileSystemComponentService { panic!("No real component service running") } - async fn get_or_add_component(&self, local_path: &Path) -> ComponentId { - self.add_component(local_path) + async fn get_or_add_component( + &self, + local_path: &Path, + component_type: ComponentType, + ) -> ComponentId { + self.add_component(local_path, component_type) .await .expect("Failed to add component") } - async fn add_component(&self, local_path: &Path) -> Result { + async fn add_component( + &self, + local_path: &Path, + component_type: ComponentType, + ) -> Result { let uuid = Uuid::new_v4(); let target_dir = &self.root; @@ -65,12 +73,19 @@ impl ComponentService for FileSystemComponentService { ))); } - let _ = - std::fs::copy(local_path, target_dir.join(format!("{uuid}-0.wasm"))).map_err(|err| { - AddComponentError::Other(format!( - "Failed to copy WASM to the local component store: {err}" - )) - }); + let postfix = match component_type { + ComponentType::Ephemeral => "-ephemeral", + ComponentType::Durable => "", + }; + let _ = std::fs::copy( + local_path, + target_dir.join(format!("{uuid}-0{postfix}.wasm")), + ) + .map_err(|err| { + AddComponentError::Other(format!( + "Failed to copy WASM to the local component store: {err}" + )) + }); Ok(ComponentId(uuid)) } @@ -79,11 +94,17 @@ impl ComponentService for FileSystemComponentService { &self, local_path: &Path, _name: &str, + component_type: ComponentType, ) -> Result { - self.add_component(local_path).await + self.add_component(local_path, component_type).await } - async fn update_component(&self, component_id: &ComponentId, local_path: &Path) -> u64 { + async fn update_component( + &self, + component_id: &ComponentId, + local_path: &Path, + component_type: ComponentType, + ) -> u64 { let target_dir = &self.root; debug!("Local component store: {target_dir:?}"); @@ -96,9 +117,13 @@ impl ComponentService for FileSystemComponentService { std::panic!("Source file does not exist: {local_path:?}"); } + let postfix = match component_type { + ComponentType::Ephemeral => "-ephemeral", + ComponentType::Durable => "", + }; let last_version = self.get_latest_version(component_id).await; let new_version = last_version + 1; - let target = target_dir.join(format!("{component_id}-{new_version}.wasm")); + let target = target_dir.join(format!("{component_id}-{new_version}{postfix}.wasm")); let _ = std::fs::copy(local_path, target) .expect("Failed to copy WASM to the local component store"); diff --git a/golem-test-framework/src/components/component_service/mod.rs b/golem-test-framework/src/components/component_service/mod.rs index b375cbfc67..32508d01d2 100644 --- a/golem-test-framework/src/components/component_service/mod.rs +++ b/golem-test-framework/src/components/component_service/mod.rs @@ -30,11 +30,12 @@ use golem_api_grpc::proto::golem::component::v1::{ }; use tokio::fs::File; use tokio::io::AsyncReadExt; +use tokio::time::sleep; use tonic::transport::Channel; -use tracing::{info, Level}; +use tracing::{debug, info, Level}; use golem_api_grpc::proto::golem::component::v1::component_service_client::ComponentServiceClient; -use golem_common::model::ComponentId; +use golem_common::model::{ComponentId, ComponentType}; use crate::components::rdb::Rdb; use crate::components::{wait_for_startup_grpc, EnvVarBuilder, GolemEnvVars}; @@ -49,10 +50,22 @@ pub mod spawned; pub trait ComponentService { async fn client(&self) -> ComponentServiceClient; - async fn get_or_add_component(&self, local_path: &Path) -> ComponentId { - let mut retries = 3; + async fn get_or_add_component( + &self, + local_path: &Path, + component_type: ComponentType, + ) -> ComponentId { + let mut retries = 5; loop { - let file_name = local_path.file_name().unwrap().to_string_lossy(); + let mut file_name: String = local_path + .file_name() + .unwrap() + .to_string_lossy() + .to_string(); + if component_type == ComponentType::Ephemeral { + file_name = format!("{}-ephemeral", file_name); + } + let mut client = self.client().await; let response = client .get_components(GetComponentsRequest { @@ -68,12 +81,16 @@ pub trait ComponentService { panic!("Missing response from golem-component-service for get-components") } Some(get_components_response::Result::Success(result)) => { + debug!("Response from get_components was {result:?}"); let latest = result .components .into_iter() .max_by_key(|t| t.versioned_component_id.as_ref().unwrap().version); match latest { - Some(component) => { + Some(component) + if Into::::into(component.component_type()) + == component_type => + { break component .versioned_component_id .expect("versioned_component_id field is missing") @@ -82,21 +99,29 @@ pub trait ComponentService { .try_into() .expect("component_id has unexpected format") } - None => match self.add_component(local_path).await { - Ok(component_id) => break component_id, - Err(AddComponentError::AlreadyExists) => { - if retries > 0 { - info!("Component got created in parallel, retrying get_or_add_component"); - retries -= 1; - continue; - } else { - panic!("Component already exists in golem-component-service"); + _ => { + match self + .add_component_with_name(local_path, &file_name, component_type) + .await + { + Ok(component_id) => break component_id, + Err(AddComponentError::AlreadyExists) => { + if retries > 0 { + info!("Component with name {file_name} got created in parallel, retrying get_or_add_component"); + retries -= 1; + sleep(Duration::from_secs(1)).await; + continue; + } else { + panic!("Component with name {file_name} already exists in golem-component-service"); + } + } + Err(AddComponentError::Other(message)) => { + panic!( + "Failed to add component with name {file_name}: {message}" + ); } } - Err(AddComponentError::Other(message)) => { - panic!("Failed to add component: {message}"); - } - }, + } } } Some(get_components_response::Result::Error(error)) => { @@ -109,26 +134,31 @@ pub trait ComponentService { async fn add_component( &self, local_path: &Path, - ) -> Result { + component_type: ComponentType, + ) -> Result { let file_name = local_path.file_name().unwrap().to_string_lossy(); - self.add_component_with_name(local_path, &file_name).await + self.add_component_with_name(local_path, &file_name, component_type) + .await } async fn add_component_with_name( &self, local_path: &Path, name: &str, + component_type: ComponentType, ) -> Result { let mut client = self.client().await; let mut file = File::open(local_path).await.map_err(|_| { AddComponentError::Other(format!("Failed to read component from {local_path:?}")) })?; + let component_type: golem_api_grpc::proto::golem::component::ComponentType = + component_type.into(); let mut chunks: Vec = vec![CreateComponentRequest { data: Some(Data::Header(CreateComponentRequestHeader { project_id: None, component_name: name.to_string(), - component_type: None, + component_type: Some(component_type as i32), })), }]; @@ -189,17 +219,24 @@ pub trait ComponentService { } } - async fn update_component(&self, component_id: &ComponentId, local_path: &Path) -> u64 { + async fn update_component( + &self, + component_id: &ComponentId, + local_path: &Path, + component_type: ComponentType, + ) -> u64 { let mut client = self.client().await; let mut file = File::open(local_path) .await .unwrap_or_else(|_| panic!("Failed to read component from {local_path:?}")); + let component_type: golem_api_grpc::proto::golem::component::ComponentType = + component_type.into(); let mut chunks: Vec = vec![UpdateComponentRequest { data: Some(update_component_request::Data::Header( UpdateComponentRequestHeader { component_id: Some(component_id.clone().into()), - component_type: None, + component_type: Some(component_type as i32), }, )), }]; diff --git a/golem-test-framework/src/dsl/mod.rs b/golem-test-framework/src/dsl/mod.rs index d8e690b308..58f6d13a5d 100644 --- a/golem-test-framework/src/dsl/mod.rs +++ b/golem-test-framework/src/dsl/mod.rs @@ -37,9 +37,9 @@ use golem_common::model::oplog::{ }; use golem_common::model::regions::DeletedRegions; use golem_common::model::{ - ComponentId, ComponentVersion, FailedUpdateRecord, IdempotencyKey, ScanCursor, - SuccessfulUpdateRecord, WorkerFilter, WorkerId, WorkerMetadata, WorkerResourceDescription, - WorkerStatusRecord, + ComponentId, ComponentType, ComponentVersion, FailedUpdateRecord, IdempotencyKey, ScanCursor, + SuccessfulUpdateRecord, TargetWorkerId, WorkerFilter, WorkerId, WorkerMetadata, + WorkerResourceDescription, WorkerStatusRecord, }; use golem_wasm_rpc::Value; use std::collections::HashMap; @@ -53,6 +53,7 @@ use uuid::Uuid; #[async_trait] pub trait TestDsl { async fn store_component(&self, name: &str) -> ComponentId; + async fn store_ephemeral_component(&self, name: &str) -> ComponentId; async fn store_unique_component(&self, name: &str) -> ComponentId; async fn store_component_unverified(&self, name: &str) -> ComponentId; async fn update_component(&self, component_id: &ComponentId, name: &str) -> ComponentVersion; @@ -94,46 +95,46 @@ pub trait TestDsl { async fn invoke( &self, - worker_id: &WorkerId, + worker_id: impl Into + Send + Sync, function_name: &str, params: Vec, ) -> crate::Result>; async fn invoke_with_key( &self, - worker_id: &WorkerId, + worker_id: impl Into + Send + Sync, idempotency_key: &IdempotencyKey, function_name: &str, params: Vec, ) -> crate::Result>; async fn invoke_and_await( &self, - worker_id: &WorkerId, + worker_id: impl Into + Send + Sync, function_name: &str, params: Vec, ) -> crate::Result, Error>>; async fn invoke_and_await_with_key( &self, - worker_id: &WorkerId, + worker_id: impl Into + Send + Sync, idempotency_key: &IdempotencyKey, function_name: &str, params: Vec, ) -> crate::Result, Error>>; async fn invoke_and_await_custom( &self, - worker_id: &WorkerId, + worker_id: impl Into + Send + Sync, function_name: &str, params: Vec, ) -> crate::Result, Error>>; async fn invoke_and_await_custom_with_key( &self, - worker_id: &WorkerId, + worker_id: impl Into + Send + Sync, idempotency_key: &IdempotencyKey, function_name: &str, params: Vec, ) -> crate::Result, Error>>; async fn invoke_and_await_json( &self, - worker_id: &WorkerId, + worker_id: impl Into + Send + Sync, function_name: &str, params: Vec, ) -> crate::Result>; @@ -172,7 +173,20 @@ impl TestDsl for T { let component_id = self .component_service() - .get_or_add_component(&source_path) + .get_or_add_component(&source_path, ComponentType::Durable) + .await; + + let _ = log_and_save_component_metadata(&source_path).await; + + component_id + } + + async fn store_ephemeral_component(&self, name: &str) -> ComponentId { + let source_path = self.component_directory().join(format!("{name}.wasm")); + + let component_id = self + .component_service() + .get_or_add_component(&source_path, ComponentType::Ephemeral) .await; let _ = log_and_save_component_metadata(&source_path).await; @@ -186,7 +200,7 @@ impl TestDsl for T { let uuid = Uuid::new_v4(); let unique_name = format!("{name}-{uuid}"); self.component_service() - .add_component_with_name(&source_path, &unique_name) + .add_component_with_name(&source_path, &unique_name, ComponentType::Durable) .await .expect("Failed to store unique component") } @@ -194,7 +208,7 @@ impl TestDsl for T { async fn store_component_unverified(&self, name: &str) -> ComponentId { let source_path = self.component_directory().join(format!("{name}.wasm")); self.component_service() - .get_or_add_component(&source_path) + .get_or_add_component(&source_path, ComponentType::Durable) .await } @@ -202,7 +216,7 @@ impl TestDsl for T { let source_path = self.component_directory().join(format!("{name}.wasm")); let _ = dump_component_info(&source_path); self.component_service() - .update_component(component_id, &source_path) + .update_component(component_id, &source_path, ComponentType::Durable) .await } @@ -344,14 +358,15 @@ impl TestDsl for T { async fn invoke( &self, - worker_id: &WorkerId, + worker_id: impl Into + Send + Sync, function_name: &str, params: Vec, ) -> crate::Result> { + let target_worker_id: TargetWorkerId = worker_id.into(); let invoke_response = self .worker_service() .invoke(InvokeRequest { - worker_id: Some(worker_id.clone().into()), + worker_id: Some(target_worker_id.into()), idempotency_key: None, function: function_name.to_string(), invoke_parameters: Some(InvokeParameters { @@ -375,15 +390,16 @@ impl TestDsl for T { async fn invoke_with_key( &self, - worker_id: &WorkerId, + worker_id: impl Into + Send + Sync, idempotency_key: &IdempotencyKey, function_name: &str, params: Vec, ) -> crate::Result> { + let target_worker_id: TargetWorkerId = worker_id.into(); let invoke_response = self .worker_service() .invoke(InvokeRequest { - worker_id: Some(worker_id.clone().into()), + worker_id: Some(target_worker_id.into()), idempotency_key: Some(idempotency_key.clone().into()), function: function_name.to_string(), invoke_parameters: Some(InvokeParameters { @@ -407,7 +423,7 @@ impl TestDsl for T { async fn invoke_and_await( &self, - worker_id: &WorkerId, + worker_id: impl Into + Send + Sync, function_name: &str, params: Vec, ) -> crate::Result, Error>> { @@ -416,7 +432,7 @@ impl TestDsl for T { async fn invoke_and_await_with_key( &self, - worker_id: &WorkerId, + worker_id: impl Into + Send + Sync, idempotency_key: &IdempotencyKey, function_name: &str, params: Vec, @@ -433,7 +449,7 @@ impl TestDsl for T { async fn invoke_and_await_custom( &self, - worker_id: &WorkerId, + worker_id: impl Into + Send + Sync, function_name: &str, params: Vec, ) -> crate::Result, Error>> { @@ -450,15 +466,16 @@ impl TestDsl for T { async fn invoke_and_await_custom_with_key( &self, - worker_id: &WorkerId, + worker_id: impl Into + Send + Sync, idempotency_key: &IdempotencyKey, function_name: &str, params: Vec, ) -> crate::Result, Error>> { + let target_worker_id: TargetWorkerId = worker_id.into(); let invoke_response = self .worker_service() .invoke_and_await(InvokeAndAwaitRequest { - worker_id: Some(worker_id.clone().into()), + worker_id: Some(target_worker_id.into()), idempotency_key: Some(idempotency_key.clone().into()), function: function_name.to_string(), invoke_parameters: Some(InvokeParameters { @@ -487,15 +504,16 @@ impl TestDsl for T { async fn invoke_and_await_json( &self, - worker_id: &WorkerId, + worker_id: impl Into + Send + Sync, function_name: &str, params: Vec, ) -> crate::Result> { + let target_worker_id: TargetWorkerId = worker_id.into(); let params = params.into_iter().map(|p| p.to_string()).collect(); let invoke_response = self .worker_service() .invoke_and_await_json(InvokeAndAwaitJsonRequest { - worker_id: Some(worker_id.clone().into()), + worker_id: Some(target_worker_id.into()), idempotency_key: Some(IdempotencyKey::fresh().into()), function: function_name.to_string(), invoke_parameters: params, @@ -544,10 +562,7 @@ impl TestDsl for T { async fn capture_output_forever( &self, worker_id: &WorkerId, - ) -> ( - UnboundedReceiver>, - tokio::sync::oneshot::Sender<()>, - ) { + ) -> (UnboundedReceiver>, Sender<()>) { let (tx, rx) = tokio::sync::mpsc::unbounded_channel(); let cloned_service = self.worker_service().clone(); let worker_id = worker_id.clone(); @@ -1081,6 +1096,7 @@ async fn log_and_save_component_metadata(path: &Path) { #[async_trait] pub trait TestDslUnsafe { async fn store_component(&self, name: &str) -> ComponentId; + async fn store_ephemeral_component(&self, name: &str) -> ComponentId; async fn store_unique_component(&self, name: &str) -> ComponentId; async fn store_component_unverified(&self, name: &str) -> ComponentId; async fn update_component(&self, component_id: &ComponentId, name: &str) -> ComponentVersion; @@ -1118,33 +1134,33 @@ pub trait TestDslUnsafe { async fn invoke( &self, - worker_id: &WorkerId, + worker_id: impl Into + Send + Sync, function_name: &str, params: Vec, ) -> Result<(), Error>; async fn invoke_with_key( &self, - worker_id: &WorkerId, + worker_id: impl Into + Send + Sync, idempotency_key: &IdempotencyKey, function_name: &str, params: Vec, ) -> Result<(), Error>; async fn invoke_and_await( &self, - worker_id: &WorkerId, + worker_id: impl Into + Send + Sync, function_name: &str, params: Vec, ) -> Result, Error>; async fn invoke_and_await_with_key( &self, - worker_id: &WorkerId, + worker_id: impl Into + Send + Sync, idempotency_key: &IdempotencyKey, function_name: &str, params: Vec, ) -> Result, Error>; async fn invoke_and_await_json( &self, - worker_id: &WorkerId, + worker_id: impl Into + Send + Sync, function_name: &str, params: Vec, ) -> Result; @@ -1174,6 +1190,10 @@ impl TestDslUnsafe for T { ::store_component(self, name).await } + async fn store_ephemeral_component(&self, name: &str) -> ComponentId { + ::store_ephemeral_component(self, name).await + } + async fn store_unique_component(&self, name: &str) -> ComponentId { ::store_unique_component(self, name).await } @@ -1253,7 +1273,7 @@ impl TestDslUnsafe for T { async fn invoke( &self, - worker_id: &WorkerId, + worker_id: impl Into + Send + Sync, function_name: &str, params: Vec, ) -> Result<(), Error> { @@ -1264,7 +1284,7 @@ impl TestDslUnsafe for T { async fn invoke_with_key( &self, - worker_id: &WorkerId, + worker_id: impl Into + Send + Sync, idempotency_key: &IdempotencyKey, function_name: &str, params: Vec, @@ -1276,7 +1296,7 @@ impl TestDslUnsafe for T { async fn invoke_and_await( &self, - worker_id: &WorkerId, + worker_id: impl Into + Send + Sync, function_name: &str, params: Vec, ) -> Result, Error> { @@ -1287,7 +1307,7 @@ impl TestDslUnsafe for T { async fn invoke_and_await_json( &self, - worker_id: &WorkerId, + worker_id: impl Into + Send + Sync, function_name: &str, params: Vec, ) -> Result { @@ -1298,7 +1318,7 @@ impl TestDslUnsafe for T { async fn invoke_and_await_with_key( &self, - worker_id: &WorkerId, + worker_id: impl Into + Send + Sync, idempotency_key: &IdempotencyKey, function_name: &str, params: Vec, diff --git a/golem-worker-executor-base/src/durable_host/mod.rs b/golem-worker-executor-base/src/durable_host/mod.rs index bfc92635de..b933337ffc 100644 --- a/golem-worker-executor-base/src/durable_host/mod.rs +++ b/golem-worker-executor-base/src/durable_host/mod.rs @@ -49,9 +49,10 @@ use golem_common::model::oplog::{ }; use golem_common::model::regions::{DeletedRegions, OplogRegion}; use golem_common::model::{ - AccountId, ComponentId, ComponentVersion, FailedUpdateRecord, IdempotencyKey, OwnedWorkerId, - ScanCursor, ScheduledAction, SuccessfulUpdateRecord, Timestamp, WorkerEvent, WorkerFilter, - WorkerId, WorkerMetadata, WorkerResourceDescription, WorkerStatus, WorkerStatusRecord, + AccountId, ComponentId, ComponentType, ComponentVersion, FailedUpdateRecord, IdempotencyKey, + OwnedWorkerId, ScanCursor, ScheduledAction, SuccessfulUpdateRecord, Timestamp, WorkerEvent, + WorkerFilter, WorkerId, WorkerMetadata, WorkerResourceDescription, WorkerStatus, + WorkerStatusRecord, }; use golem_wasm_rpc::protobuf::type_annotated_value::TypeAnnotatedValue; use golem_wasm_rpc::wasmtime::ResourceStore; @@ -635,6 +636,7 @@ impl StatusManagement for DurableWorkerCtx { } => { *execution_status = ExecutionStatus::Suspended { last_known_status, + component_type: self.component_metadata().component_type, timestamp: Timestamp::now_utc(), }; } @@ -646,6 +648,7 @@ impl StatusManagement for DurableWorkerCtx { } => { *execution_status = ExecutionStatus::Suspended { last_known_status, + component_type: self.component_metadata().component_type, timestamp: Timestamp::now_utc(), }; await_interruption.send(()).ok(); @@ -655,6 +658,7 @@ impl StatusManagement for DurableWorkerCtx { } => { *execution_status = ExecutionStatus::Suspended { last_known_status, + component_type: self.component_metadata().component_type, timestamp: Timestamp::now_utc(), }; } @@ -673,6 +677,7 @@ impl StatusManagement for DurableWorkerCtx { } => { *execution_status = ExecutionStatus::Running { last_known_status, + component_type: self.component_metadata().component_type, timestamp: Timestamp::now_utc(), }; } @@ -682,6 +687,7 @@ impl StatusManagement for DurableWorkerCtx { } => { *execution_status = ExecutionStatus::Running { last_known_status, + component_type: self.component_metadata().component_type, timestamp: Timestamp::now_utc(), }; } @@ -1085,87 +1091,136 @@ impl> ExternalOperations for Dur store.as_context_mut().data_mut().set_running(); - // Handle the case when recovery immediately starts in a deleted region - // (for example due to a manual update) - store - .as_context_mut() - .data_mut() - .durable_ctx_mut() - .state - .replay_state - .get_out_of_deleted_region() - .await; + if store + .as_context() + .data() + .component_metadata() + .component_type + == ComponentType::Ephemeral + { + // Ephemeral workers cannot be recovered - let result = loop { - let cont = store.as_context().data().durable_ctx().state.is_replay(); - - if cont { - let oplog_entry = store - .as_context_mut() - .data_mut() - .durable_ctx_mut() - .state - .replay_state - .get_oplog_entry_exported_function_invoked() - .await; - match oplog_entry { - Err(error) => break Err(error), - Ok(None) => break Ok(RetryDecision::None), - Ok(Some((function_name, function_input, idempotency_key))) => { - debug!("Replaying function {function_name}"); - let span = span!(Level::INFO, "replaying", function = function_name); - store - .as_context_mut() - .data_mut() - .set_current_idempotency_key(idempotency_key) - .await; + // Moving to the end of the oplog + store + .as_context_mut() + .data_mut() + .durable_ctx_mut() + .state + .replay_state + .switch_to_live(); + + // Appending a Restart marker + store + .as_context_mut() + .data_mut() + .get_public_state() + .oplog() + .add(OplogEntry::restart()) + .await; - let full_function_name = function_name.to_string(); - let invoke_result = invoke_worker( - full_function_name.clone(), - function_input.clone(), - store, - instance, - ) - .instrument(span) + Ok(RetryDecision::None) + } else { + // Handle the case when recovery immediately starts in a deleted region + // (for example due to a manual update) + store + .as_context_mut() + .data_mut() + .durable_ctx_mut() + .state + .replay_state + .get_out_of_deleted_region() + .await; + + let result = loop { + let cont = store.as_context().data().durable_ctx().state.is_replay(); + + if cont { + let oplog_entry = store + .as_context_mut() + .data_mut() + .durable_ctx_mut() + .state + .replay_state + .get_oplog_entry_exported_function_invoked() .await; + match oplog_entry { + Err(error) => break Err(error), + Ok(None) => break Ok(RetryDecision::None), + Ok(Some((function_name, function_input, idempotency_key))) => { + debug!("Replaying function {function_name}"); + let span = span!(Level::INFO, "replaying", function = function_name); + store + .as_context_mut() + .data_mut() + .set_current_idempotency_key(idempotency_key) + .await; - match invoke_result { - Ok(InvokeResult::Succeeded { - output, - consumed_fuel, - }) => { - let component_metadata = - store.as_context().data().component_metadata(); - - match exports::function_by_name( - &component_metadata.exports, - &full_function_name, - ) { - Ok(value) => { - if let Some(value) = value { - let result = - interpret_function_results(output, value.results) - .map_err(|e| GolemError::ValueMismatch { + let full_function_name = function_name.to_string(); + let invoke_result = invoke_worker( + full_function_name.clone(), + function_input.clone(), + store, + instance, + ) + .instrument(span) + .await; + + match invoke_result { + Ok(InvokeResult::Succeeded { + output, + consumed_fuel, + }) => { + let component_metadata = + store.as_context().data().component_metadata(); + + match exports::function_by_name( + &component_metadata.exports, + &full_function_name, + ) { + Ok(value) => { + if let Some(value) = value { + let result = interpret_function_results( + output, + value.results, + ) + .map_err(|e| GolemError::ValueMismatch { details: e.join(", "), })?; - if let Err(err) = store - .as_context_mut() - .data_mut() - .on_invocation_success( - &full_function_name, - &function_input, - consumed_fuel, - result, - ) - .await - { - break Err(err); + if let Err(err) = store + .as_context_mut() + .data_mut() + .on_invocation_success( + &full_function_name, + &function_input, + consumed_fuel, + result, + ) + .await + { + break Err(err); + } + } else { + let trap_type = TrapType::Error( + WorkerError::InvalidRequest(format!( + "Function {full_function_name} not found" + )), + ); + + let _ = store + .as_context_mut() + .data_mut() + .on_invocation_failure(&trap_type) + .await; + + break Err(GolemError::invalid_request(format!( + "Function {full_function_name} not found" + ))); } - } else { + } + Err(err) => { let trap_type = TrapType::Error( WorkerError::InvalidRequest(format!( - "Function {full_function_name} not found" + "Function {full_function_name} not found: {err}" )), ); @@ -1176,103 +1231,89 @@ impl> ExternalOperations for Dur .await; break Err(GolemError::invalid_request(format!( - "Function {full_function_name} not found" - ))); - } - } - Err(err) => { - let trap_type = - TrapType::Error(WorkerError::InvalidRequest(format!( "Function {full_function_name} not found: {err}" ))); - - let _ = store - .as_context_mut() - .data_mut() - .on_invocation_failure(&trap_type) - .await; - - break Err(GolemError::invalid_request(format!( - "Function {full_function_name} not found: {err}" - ))); + } } + count += 1; + continue; } - count += 1; - continue; - } - _ => { - let trap_type = match invoke_result { - Ok(invoke_result) => invoke_result.as_trap_type::(), - Err(error) => { - Some(TrapType::from_error::(&anyhow!(error))) - } - }; - let decision = match trap_type { - Some(trap_type) => { - let decision = store - .as_context_mut() - .data_mut() - .on_invocation_failure(&trap_type) - .await; - - if decision == RetryDecision::None { - // Cannot retry so we need to fail - match trap_type { - TrapType::Interrupt(interrupt_kind) => { - if interrupt_kind == InterruptKind::Interrupt { + _ => { + let trap_type = match invoke_result { + Ok(invoke_result) => invoke_result.as_trap_type::(), + Err(error) => { + Some(TrapType::from_error::(&anyhow!(error))) + } + }; + let decision = match trap_type { + Some(trap_type) => { + let decision = store + .as_context_mut() + .data_mut() + .on_invocation_failure(&trap_type) + .await; + + if decision == RetryDecision::None { + // Cannot retry so we need to fail + match trap_type { + TrapType::Interrupt(interrupt_kind) => { + if interrupt_kind + == InterruptKind::Interrupt + { + break Err(GolemError::runtime( + "Interrupted via the Golem API", + )); + } else { + break Err(GolemError::runtime("The worker could not finish replaying a function {function_name}")); + } + } + TrapType::Exit => { + break Err(GolemError::runtime( + "Process exited", + )) + } + TrapType::Error(error) => { + let stderr = store + .as_context() + .data() + .get_public_state() + .event_service() + .get_last_invocation_errors(); break Err(GolemError::runtime( - "Interrupted via the Golem API", + error.to_string(&stderr), )); - } else { - break Err(GolemError::runtime("The worker could not finish replaying a function {function_name}")); } } - TrapType::Exit => { - break Err(GolemError::runtime( - "Process exited", - )) - } - TrapType::Error(error) => { - let stderr = store - .as_context() - .data() - .get_public_state() - .event_service() - .get_last_invocation_errors(); - break Err(GolemError::runtime( - error.to_string(&stderr), - )); - } } - } - decision - } - None => RetryDecision::None, - }; + decision + } + None => RetryDecision::None, + }; - break Ok(decision); + break Ok(decision); + } } } } + } else { + break Ok(RetryDecision::None); } - } else { - break Ok(RetryDecision::None); - } - }; - record_resume_worker(start.elapsed()); - record_number_of_replayed_functions(count); + }; + record_resume_worker(start.elapsed()); + record_number_of_replayed_functions(count); - let final_decision = Self::finalize_pending_update(&result, instance, store).await; + let final_decision = Self::finalize_pending_update(&result, instance, store).await; - // The update finalization has the right to override the Err result with an explicit retry request - if final_decision != RetryDecision::None { - debug!("Retrying prepare_instance after failed update attempt"); - Ok(final_decision) - } else { - store.as_context_mut().data_mut().set_suspended().await?; - debug!("Finished prepare_instance"); - result.map_err(|err| GolemError::failed_to_resume_worker(worker_id.clone(), err)) + // The update finalization has the right to override the Err result with an explicit retry request + if final_decision != RetryDecision::None { + debug!("Retrying prepare_instance after failed update attempt"); + Ok(final_decision) + } else { + store.as_context_mut().data_mut().set_suspended().await?; + debug!("Finished prepare_instance"); + result.map_err(|err| GolemError::failed_to_resume_worker(worker_id.clone(), err)) + } } } diff --git a/golem-worker-executor-base/src/durable_host/wasm_rpc/mod.rs b/golem-worker-executor-base/src/durable_host/wasm_rpc/mod.rs index 7fb264364a..72a25b46ef 100644 --- a/golem-worker-executor-base/src/durable_host/wasm_rpc/mod.rs +++ b/golem-worker-executor-base/src/durable_host/wasm_rpc/mod.rs @@ -27,7 +27,7 @@ use crate::workerctx::WorkerCtx; use anyhow::anyhow; use async_trait::async_trait; use golem_common::model::oplog::{OplogEntry, WrappedFunctionType}; -use golem_common::model::{IdempotencyKey, OwnedWorkerId, WorkerId}; +use golem_common::model::{IdempotencyKey, OwnedWorkerId, TargetWorkerId, WorkerId}; use golem_common::uri::oss::urn::{WorkerFunctionUrn, WorkerOrFunctionUrn}; use golem_wasm_rpc::golem::rpc::types::{ FutureInvokeResult, HostFutureInvokeResult, Pollable, Uri, @@ -50,6 +50,9 @@ impl HostWasmRpc for DurableWorkerCtx { match location.parse_as_golem_urn() { Some((remote_worker_id, None)) => { + let remote_worker_id = remote_worker_id + .try_into_worker_id() + .ok_or(anyhow!("Must specify a worker name"))?; // TODO: this should not be a requirement here let remote_worker_id = OwnedWorkerId::new(&self.owned_worker_id.account_id, &remote_worker_id); let demand = self.rpc().create_demand(&remote_worker_id).await; @@ -543,18 +546,20 @@ pub struct WasmRpcEntryPayload { } pub trait UrnExtensions { - fn parse_as_golem_urn(&self) -> Option<(WorkerId, Option)>; + fn parse_as_golem_urn(&self) -> Option<(TargetWorkerId, Option)>; fn golem_urn(worker_id: &WorkerId, function_name: Option<&str>) -> Self; } impl UrnExtensions for Uri { - fn parse_as_golem_urn(&self) -> Option<(WorkerId, Option)> { + fn parse_as_golem_urn(&self) -> Option<(TargetWorkerId, Option)> { let urn = WorkerOrFunctionUrn::from_str(&self.value).ok()?; match urn { WorkerOrFunctionUrn::Worker(w) => Some((w.id, None)), - WorkerOrFunctionUrn::Function(f) => Some((f.id, Some(f.function))), + WorkerOrFunctionUrn::Function(f) => { + Some((f.id.into_target_worker_id(), Some(f.function))) + } } } diff --git a/golem-worker-executor-base/src/grpc.rs b/golem-worker-executor-base/src/grpc.rs index b1fcce1103..e298c055e4 100644 --- a/golem-worker-executor-base/src/grpc.rs +++ b/golem-worker-executor-base/src/grpc.rs @@ -42,14 +42,14 @@ use golem_api_grpc::proto::golem::workerexecutor::v1::{ }; use golem_common::grpc::{ proto_account_id_string, proto_component_id_string, proto_idempotency_key_string, - proto_promise_id_string, proto_worker_id_string, + proto_promise_id_string, proto_target_worker_id_string, proto_worker_id_string, }; use golem_common::metrics::api::record_new_grpc_api_active_stream; use golem_common::model::oplog::UpdateDescription; use golem_common::model::{ - AccountId, ComponentId, IdempotencyKey, OwnedWorkerId, ScanCursor, ShardId, - TimestampedWorkerInvocation, WorkerEvent, WorkerFilter, WorkerId, WorkerInvocation, - WorkerMetadata, WorkerStatus, WorkerStatusRecord, + AccountId, ComponentId, ComponentType, IdempotencyKey, OwnedWorkerId, ScanCursor, ShardId, + TargetWorkerId, TimestampedWorkerInvocation, WorkerEvent, WorkerFilter, WorkerId, + WorkerInvocation, WorkerMetadata, WorkerStatus, WorkerStatusRecord, }; use golem_common::{model as common_model, recorded_grpc_api_request}; @@ -58,7 +58,7 @@ use crate::services::events::Event; use crate::services::worker_activator::{DefaultWorkerActivator, LazyWorkerActivator}; use crate::services::worker_event::WorkerEventReceiver; use crate::services::{ - All, HasActiveWorkers, HasAll, HasEvents, HasPromiseService, + All, HasActiveWorkers, HasAll, HasComponentService, HasEvents, HasPromiseService, HasRunningWorkerEnumerationService, HasShardManagerService, HasShardService, HasWorkerEnumerationService, HasWorkerService, UsesAllDeps, }; @@ -610,7 +610,23 @@ impl + UsesAllDeps + Send + Sync + &self, request: &Req, ) -> Result>, GolemError> { - let worker_id = request.worker_id()?; + let target_worker_id = request.worker_id()?; + + let current_assignment = self.shard_service().current_assignment()?; + + let unspecified_name = target_worker_id.worker_name.is_none(); + let worker_id = target_worker_id.into_worker_id( + ¤t_assignment.shard_ids, + current_assignment.number_of_shards, + ); + + if unspecified_name { + info!( + worker_id = worker_id.to_string(), + "Generated new unique worker id" + ); + } + let account_id: AccountId = request.account_id()?; let owned_worker_id = OwnedWorkerId::new(&account_id, &worker_id); @@ -846,6 +862,19 @@ impl + UsesAllDeps + Send + Sync + )); } + let component_metadata = self + .component_service() + .get_metadata( + &worker_id.component_id, + Some(metadata.last_known_status.component_version), + ) + .await?; + if component_metadata.component_type == ComponentType::Ephemeral { + return Err(GolemError::invalid_request( + "Ephemeral workers cannot be updated", + )); + } + match request.mode() { UpdateMode::Automatic => { let update_description = UpdateDescription::Automatic { @@ -1155,12 +1184,12 @@ impl + UsesAllDeps + Send + Sync + async fn invoke_and_await_worker( &self, - request: Request, + request: Request, ) -> Result, Status> { let request = request.into_inner(); let record = recorded_grpc_api_request!( "invoke_and_await_worker", - worker_id = proto_worker_id_string(&request.worker_id), + worker_id = proto_target_worker_id_string(&request.worker_id), idempotency_key = proto_idempotency_key_string(&request.idempotency_key), account_id = proto_account_id_string(&request.account_id), ); @@ -1199,7 +1228,7 @@ impl + UsesAllDeps + Send + Sync + let request = request.into_inner(); let record = recorded_grpc_api_request!( "invoke_and_await_worker_json_typed", - worker_id = proto_worker_id_string(&request.worker_id), + worker_id = proto_target_worker_id_string(&request.worker_id), idempotency_key = proto_idempotency_key_string(&request.idempotency_key), account_id = proto_account_id_string(&request.account_id), ); @@ -1242,7 +1271,7 @@ impl + UsesAllDeps + Send + Sync + let request = request.into_inner(); let record = recorded_grpc_api_request!( "invoke_worker", - worker_id = proto_worker_id_string(&request.worker_id), + worker_id = proto_target_worker_id_string(&request.worker_id), function = request.name, account_id = proto_account_id_string(&request.account_id) ); @@ -1693,7 +1722,7 @@ trait GrpcInvokeRequest { fn account_id(&self) -> Result; fn account_limits(&self) -> Option; fn input(&self) -> Vec; - fn worker_id(&self) -> Result; + fn worker_id(&self) -> Result; fn idempotency_key(&self) -> Result, GolemError>; fn name(&self) -> String; fn args(&self) -> Option>; @@ -1718,7 +1747,7 @@ impl GrpcInvokeRequest for golem::workerexecutor::v1::InvokeWorkerRequest { self.input.clone() } - fn worker_id(&self) -> Result { + fn worker_id(&self) -> Result { self.worker_id .clone() .ok_or(GolemError::invalid_request("worker_id not found"))? @@ -1770,7 +1799,7 @@ impl GrpcInvokeRequest for golem::workerexecutor::v1::InvokeAndAwaitWorkerReques self.input.clone() } - fn worker_id(&self) -> Result { + fn worker_id(&self) -> Result { self.worker_id .clone() .ok_or(GolemError::invalid_request("worker_id not found"))? diff --git a/golem-worker-executor-base/src/model.rs b/golem-worker-executor-base/src/model.rs index 6912938572..4aecb17512 100644 --- a/golem-worker-executor-base/src/model.rs +++ b/golem-worker-executor-base/src/model.rs @@ -23,7 +23,9 @@ use wasmtime::Trap; use golem_common::model::oplog::WorkerError; use golem_common::model::regions::DeletedRegions; -use golem_common::model::{ShardAssignment, ShardId, Timestamp, WorkerId, WorkerStatusRecord}; +use golem_common::model::{ + ComponentType, ShardAssignment, ShardId, Timestamp, WorkerId, WorkerStatusRecord, +}; use crate::error::{GolemError, WorkerOutOfMemory}; use crate::workerctx::WorkerCtx; @@ -128,20 +130,24 @@ impl From for CurrentResou pub enum ExecutionStatus { Loading { last_known_status: WorkerStatusRecord, + component_type: ComponentType, timestamp: Timestamp, }, Running { last_known_status: WorkerStatusRecord, + component_type: ComponentType, timestamp: Timestamp, }, Suspended { last_known_status: WorkerStatusRecord, + component_type: ComponentType, timestamp: Timestamp, }, Interrupting { interrupt_kind: InterruptKind, await_interruption: Arc>, last_known_status: WorkerStatusRecord, + component_type: ComponentType, timestamp: Timestamp, }, } @@ -193,6 +199,28 @@ impl ExecutionStatus { ExecutionStatus::Interrupting { timestamp, .. } => *timestamp, } } + + pub fn component_type(&self) -> ComponentType { + match self { + ExecutionStatus::Loading { component_type, .. } => *component_type, + ExecutionStatus::Running { component_type, .. } => *component_type, + ExecutionStatus::Suspended { component_type, .. } => *component_type, + ExecutionStatus::Interrupting { component_type, .. } => *component_type, + } + } + + pub fn set_component_type(&mut self, new_component_type: ComponentType) { + match self { + ExecutionStatus::Loading { component_type, .. } => *component_type = new_component_type, + ExecutionStatus::Running { component_type, .. } => *component_type = new_component_type, + ExecutionStatus::Suspended { component_type, .. } => { + *component_type = new_component_type + } + ExecutionStatus::Interrupting { component_type, .. } => { + *component_type = new_component_type + } + } + } } /// Describes the various reasons a worker can run into a trap diff --git a/golem-worker-executor-base/src/services/component.rs b/golem-worker-executor-base/src/services/component.rs index 2430c6c6be..d02eaf9966 100644 --- a/golem-worker-executor-base/src/services/component.rs +++ b/golem-worker-executor-base/src/services/component.rs @@ -38,7 +38,7 @@ use golem_common::client::{GrpcClient, GrpcClientConfig}; use golem_common::config::RetryConfig; use golem_common::metrics::external_calls::record_external_call_response_size_bytes; use golem_common::model::component_metadata::RawComponentMetadata; -use golem_common::model::{ComponentId, ComponentVersion}; +use golem_common::model::{ComponentId, ComponentType, ComponentVersion}; use golem_common::retries::with_retries; use golem_wasm_ast::analysis::AnalysedExport; use http::Uri; @@ -56,6 +56,7 @@ pub struct ComponentMetadata { pub size: u64, pub memories: Vec, pub exports: Vec, + pub component_type: ComponentType, } /// Service for downloading a specific Golem component from the Golem Component API @@ -438,6 +439,7 @@ async fn get_metadata_via_grpc( "Undefined component version".to_string(), ))?, size: component.component_size, + component_type: component.component_type().into(), memories: component .metadata .as_ref() @@ -685,6 +687,17 @@ impl ComponentServiceLocalFileSystem { } } + fn parse_postfix(s: &str) -> Result<(ComponentVersion, ComponentType), String> { + let first_part = s.split('-').next().ok_or("Could not get version part")?; + let version = first_part.parse::().map_err(|err| err.to_string())?; + let component_type = if s.ends_with("-ephemeral") { + ComponentType::Ephemeral + } else { + ComponentType::Durable + }; + Ok((version, component_type)) + } + async fn get_metadata_impl( root: &Path, component_id: &ComponentId, @@ -706,13 +719,13 @@ impl ComponentServiceLocalFileSystem { let matching_files: Vec<_> = matching_files .into_iter() - .filter_map(|(path, s)| s.parse::().ok().map(|version| (path, version))) + .filter_map(|(path, s)| Self::parse_postfix(&s).ok().map(|version| (path, version))) .collect(); - let (path, version) = match forced_version { + let (path, (version, component_type)) = match forced_version { Some(forced_version) => matching_files .iter() - .find(|(_path, version)| *version == forced_version) + .find(|(_path, (version, _component_type))| *version == forced_version) .ok_or(GolemError::GetLatestVersionOfComponentFailed { component_id: component_id.clone(), reason: "Could not find any component with the given id and version" @@ -720,7 +733,7 @@ impl ComponentServiceLocalFileSystem { })?, None => matching_files .iter() - .max_by_key(|(_path, version)| *version) + .max_by_key(|(_path, (version, _))| *version) .ok_or(GolemError::GetLatestVersionOfComponentFailed { component_id: component_id.clone(), reason: "Could not find any component with the given id".to_string(), @@ -737,6 +750,7 @@ impl ComponentServiceLocalFileSystem { size, memories, exports, + component_type: *component_type, }) } @@ -765,13 +779,20 @@ impl ComponentService for ComponentServiceLocalFileSystem { component_id: &ComponentId, component_version: ComponentVersion, ) -> Result<(Component, ComponentMetadata), GolemError> { - let path = self - .root - .join(format!("{}-{}.wasm", component_id, component_version)); - let metadata = self .get_metadata(component_id, Some(component_version)) .await?; + + let postfix = match metadata.component_type { + ComponentType::Ephemeral => "-ephemeral", + ComponentType::Durable => "", + }; + + let path = self.root.join(format!( + "{}-{}{postfix}.wasm", + component_id, component_version + )); + Ok(( self.get_from_path(&path, engine, component_id, component_version) .await?, diff --git a/golem-worker-executor-base/src/services/oplog/tests.rs b/golem-worker-executor-base/src/services/oplog/tests.rs index a093b97220..f339852178 100644 --- a/golem-worker-executor-base/src/services/oplog/tests.rs +++ b/golem-worker-executor-base/src/services/oplog/tests.rs @@ -217,6 +217,9 @@ fn rounded(entry: OplogEntry) -> OplogEntry { context, message, }, + OplogEntry::Restart { timestamp } => OplogEntry::Restart { + timestamp: rounded_ts(timestamp), + }, } } diff --git a/golem-worker-executor-base/src/services/shard.rs b/golem-worker-executor-base/src/services/shard.rs index 1c15915952..e2d0568ae1 100644 --- a/golem-worker-executor-base/src/services/shard.rs +++ b/golem-worker-executor-base/src/services/shard.rs @@ -33,7 +33,7 @@ pub trait ShardService { fn register(&self, number_of_shards: usize, shard_ids: &HashSet); fn revoke_shards(&self, shard_ids: &HashSet) -> Result<(), GolemError>; fn current_assignment(&self) -> Result; - fn opt_current_assignment(&self) -> Option; + fn try_get_current_assignment(&self) -> Option; } pub struct ShardServiceDefault { @@ -147,7 +147,7 @@ impl ShardService for ShardServiceDefault { }) } - fn opt_current_assignment(&self) -> Option { + fn try_get_current_assignment(&self) -> Option { self.shard_assignment.read().unwrap().clone() } } @@ -204,7 +204,7 @@ impl ShardService for ShardServiceMock { Ok(ShardAssignment::default()) } - fn opt_current_assignment(&self) -> Option { + fn try_get_current_assignment(&self) -> Option { tracing::info!("ShardServiceMock::opt_current_assignment"); None } diff --git a/golem-worker-executor-base/src/services/worker.rs b/golem-worker-executor-base/src/services/worker.rs index 74b35c097f..8b2e57eadb 100644 --- a/golem-worker-executor-base/src/services/worker.rs +++ b/golem-worker-executor-base/src/services/worker.rs @@ -17,7 +17,8 @@ use std::sync::Arc; use async_trait::async_trait; use golem_common::model::oplog::{OplogEntry, OplogIndex}; use golem_common::model::{ - OwnedWorkerId, ShardId, WorkerId, WorkerMetadata, WorkerStatus, WorkerStatusRecord, + ComponentType, OwnedWorkerId, ShardId, WorkerId, WorkerMetadata, WorkerStatus, + WorkerStatusRecord, }; use tracing::debug; @@ -33,7 +34,11 @@ use crate::storage::keyvalue::{ /// Service for persisting the current set of Golem workers represented by their metadata #[async_trait] pub trait WorkerService { - async fn add(&self, worker_metadata: &WorkerMetadata) -> Result<(), GolemError>; + async fn add( + &self, + worker_metadata: &WorkerMetadata, + component_type: ComponentType, + ) -> Result<(), GolemError>; async fn get(&self, owned_worker_id: &OwnedWorkerId) -> Option; @@ -45,6 +50,7 @@ pub trait WorkerService { &self, owned_worker_id: &OwnedWorkerId, status_value: &WorkerStatusRecord, + component_type: ComponentType, ); } @@ -102,7 +108,11 @@ impl DefaultWorkerService { #[async_trait] impl WorkerService for DefaultWorkerService { - async fn add(&self, worker_metadata: &WorkerMetadata) -> Result<(), GolemError> { + async fn add( + &self, + worker_metadata: &WorkerMetadata, + component_type: ComponentType, + ) -> Result<(), GolemError> { record_worker_call("add"); let worker_id = &worker_metadata.worker_id; @@ -122,34 +132,37 @@ impl WorkerService for DefaultWorkerService { .create(&owned_worker_id, initial_oplog_entry) .await; - self.key_value_storage - .with_entity("worker", "add", "worker_status") - .set( - KeyValueStorageNamespace::Worker, - &Self::status_key(worker_id), - &worker_metadata.last_known_status, - ) - .await - .unwrap_or_else(|err| panic!("failed to set worker status in KV storage: {err}")); + if component_type != ComponentType::Ephemeral { + self.key_value_storage + .with_entity("worker", "add", "worker_status") + .set( + KeyValueStorageNamespace::Worker, + &Self::status_key(worker_id), + &worker_metadata.last_known_status, + ) + .await + .unwrap_or_else(|err| panic!("failed to set worker status in KV storage: {err}")); - if worker_metadata.last_known_status.status == WorkerStatus::Running { - let shard_assignment = self.shard_service.current_assignment()?; - let shard_id = ShardId::from_worker_id(worker_id, shard_assignment.number_of_shards); + if worker_metadata.last_known_status.status == WorkerStatus::Running { + let shard_assignment = self.shard_service.current_assignment()?; + let shard_id = + ShardId::from_worker_id(worker_id, shard_assignment.number_of_shards); - debug!( - "Adding worker to the list of running workers for shard {shard_id} in KV storage" - ); + debug!( + "Adding worker to the list of running workers for shard {shard_id} in KV storage" + ); - self - .key_value_storage - .with_entity("worker", "add", "worker_id") - .add_to_set(KeyValueStorageNamespace::Worker, &Self::running_in_shard_key(&shard_id), &owned_worker_id) - .await - .unwrap_or_else(|err| { - panic!( - "failed to add worker to the set of running workers per shard ids in KV storage: {err}" - ) - }); + self + .key_value_storage + .with_entity("worker", "add", "worker_id") + .add_to_set(KeyValueStorageNamespace::Worker, &Self::running_in_shard_key(&shard_id), &owned_worker_id) + .await + .unwrap_or_else(|err| { + panic!( + "failed to add worker to the set of running workers per shard ids in KV storage: {err}" + ) + }); + } } Ok(()) @@ -221,7 +234,7 @@ impl WorkerService for DefaultWorkerService { } async fn get_running_workers_in_shards(&self) -> Vec { - let shard_assignment = self.shard_service.opt_current_assignment(); + let shard_assignment = self.shard_service.try_get_current_assignment(); let mut result: Vec = vec![]; if let Some(shard_assignment) = shard_assignment { for shard_id in shard_assignment.shard_ids { @@ -274,55 +287,58 @@ impl WorkerService for DefaultWorkerService { &self, owned_worker_id: &OwnedWorkerId, status_value: &WorkerStatusRecord, + component_type: ComponentType, ) { record_worker_call("update_status"); - debug!("Updating worker status to {status_value:?}"); - self.key_value_storage - .with_entity("worker", "update_status", "worker_status") - .set( - KeyValueStorageNamespace::Worker, - &Self::status_key(&owned_worker_id.worker_id), - status_value, - ) - .await - .unwrap_or_else(|err| panic!("failed to set worker status in KV storage: {err}")); + if component_type != ComponentType::Ephemeral { + debug!("Updating worker status to {status_value:?}"); + self.key_value_storage + .with_entity("worker", "update_status", "worker_status") + .set( + KeyValueStorageNamespace::Worker, + &Self::status_key(&owned_worker_id.worker_id), + status_value, + ) + .await + .unwrap_or_else(|err| panic!("failed to set worker status in KV storage: {err}")); + + let shard_assignment = self + .shard_service + .current_assignment() + .expect("sharding assignment is not ready"); + let shard_id = ShardId::from_worker_id( + &owned_worker_id.worker_id, + shard_assignment.number_of_shards, + ); - let shard_assignment = self - .shard_service - .current_assignment() - .expect("sharding assignment is not ready"); - let shard_id = ShardId::from_worker_id( - &owned_worker_id.worker_id, - shard_assignment.number_of_shards, - ); + if status_value.status == WorkerStatus::Running { + debug!("Adding worker to the set of running workers in shard {shard_id}"); - if status_value.status == WorkerStatus::Running { - debug!("Adding worker to the set of running workers in shard {shard_id}"); + self + .key_value_storage + .with_entity("worker", "add", "worker_id") + .add_to_set(KeyValueStorageNamespace::Worker, &Self::running_in_shard_key(&shard_id), owned_worker_id) + .await + .unwrap_or_else(|err| { + panic!( + "failed to add worker to the set of running workers per shard ids on KV storage: {err}" + ) + }); + } else { + debug!("Removing worker from the set of running workers in shard {shard_id}"); - self - .key_value_storage - .with_entity("worker", "add", "worker_id") - .add_to_set(KeyValueStorageNamespace::Worker, &Self::running_in_shard_key(&shard_id), owned_worker_id) - .await - .unwrap_or_else(|err| { - panic!( - "failed to add worker to the set of running workers per shard ids on KV storage: {err}" - ) - }); - } else { - debug!("Removing worker from the set of running workers in shard {shard_id}"); - - self - .key_value_storage - .with_entity("worker", "remove", "worker_id") - .remove_from_set(KeyValueStorageNamespace::Worker, &Self::running_in_shard_key(&shard_id), owned_worker_id) - .await - .unwrap_or_else(|err| { - panic!( - "failed to remove worker from the set of running worker ids per shard on KV storage: {err}" - ) - }); + self + .key_value_storage + .with_entity("worker", "remove", "worker_id") + .remove_from_set(KeyValueStorageNamespace::Worker, &Self::running_in_shard_key(&shard_id), owned_worker_id) + .await + .unwrap_or_else(|err| { + panic!( + "failed to remove worker from the set of running worker ids per shard on KV storage: {err}" + ) + }); + } } } } @@ -347,7 +363,11 @@ impl WorkerServiceMock { #[cfg(any(feature = "mocks", test))] #[async_trait] impl WorkerService for WorkerServiceMock { - async fn add(&self, _worker_metadata: &WorkerMetadata) -> Result<(), GolemError> { + async fn add( + &self, + _worker_metadata: &WorkerMetadata, + _component_type: ComponentType, + ) -> Result<(), GolemError> { unimplemented!() } @@ -367,6 +387,7 @@ impl WorkerService for WorkerServiceMock { &self, _owned_worker_id: &OwnedWorkerId, _status_value: &WorkerStatusRecord, + _component_type: ComponentType, ) { unimplemented!() } diff --git a/golem-worker-executor-base/src/services/worker_proxy.rs b/golem-worker-executor-base/src/services/worker_proxy.rs index 705621ea69..98b2815579 100644 --- a/golem-worker-executor-base/src/services/worker_proxy.rs +++ b/golem-worker-executor-base/src/services/worker_proxy.rs @@ -190,7 +190,7 @@ impl WorkerProxy for RemoteWorkerProxy { .call(move |client| { Box::pin(client.invoke_and_await(authorised_grpc_request( InvokeAndAwaitRequest { - worker_id: Some(owned_worker_id.worker_id().into()), + worker_id: Some(owned_worker_id.worker_id().into_target_worker_id().into()), idempotency_key: idempotency_key.clone().map(|k| k.into()), function: function_name.clone(), invoke_parameters: invoke_parameters.clone(), @@ -255,7 +255,7 @@ impl WorkerProxy for RemoteWorkerProxy { .call(move |client| { Box::pin(client.invoke(authorised_grpc_request( InvokeRequest { - worker_id: Some(owned_worker_id.worker_id().into()), + worker_id: Some(owned_worker_id.worker_id().into_target_worker_id().into()), idempotency_key: idempotency_key.clone().map(|k| k.into()), function: function_name.clone(), invoke_parameters: invoke_parameters.clone(), diff --git a/golem-worker-executor-base/src/worker.rs b/golem-worker-executor-base/src/worker.rs index 9f69a9cabd..0285bdfe5d 100644 --- a/golem-worker-executor-base/src/worker.rs +++ b/golem-worker-executor-base/src/worker.rs @@ -37,12 +37,12 @@ use crate::services::{ use crate::workerctx::{PublicWorkerIo, WorkerCtx}; use anyhow::anyhow; use golem_common::config::RetryConfig; -use golem_common::model::exports; use golem_common::model::oplog::{ OplogEntry, OplogIndex, TimestampedUpdateDescription, UpdateDescription, WorkerError, WorkerResourceId, }; use golem_common::model::regions::{DeletedRegions, DeletedRegionsBuilder, OplogRegion}; +use golem_common::model::{exports, ComponentType}; use golem_common::model::{ ComponentVersion, FailedUpdateRecord, IdempotencyKey, OwnedWorkerId, SuccessfulUpdateRecord, Timestamp, TimestampedWorkerInvocation, WorkerId, WorkerInvocation, WorkerMetadata, @@ -187,6 +187,13 @@ impl Worker { .collect::>(); let initial_invocation_results = worker_metadata.last_known_status.invocation_results.clone(); + let initial_component_metadata = deps + .component_service() + .get_metadata( + &owned_worker_id.worker_id.component_id, + Some(worker_metadata.last_known_status.component_version), + ) + .await?; let queue = Arc::new(RwLock::new(VecDeque::from_iter( initial_pending_invocations.iter().cloned(), @@ -208,6 +215,7 @@ impl Worker { let execution_status = Arc::new(RwLock::new(ExecutionStatus::Suspended { last_known_status: worker_metadata.last_known_status.clone(), + component_type: initial_component_metadata.component_type, timestamp: Timestamp::now_utc(), })); @@ -348,6 +356,7 @@ impl Worker { let mut execution_status = self.execution_status.write().unwrap(); *execution_status = ExecutionStatus::Loading { last_known_status: execution_status.last_known_status().clone(), + component_type: execution_status.component_type(), timestamp: Timestamp::now_utc(), }; } @@ -407,6 +416,7 @@ impl Worker { interrupt_kind, await_interruption: Arc::new(sender), last_known_status, + component_type: execution_status.component_type(), timestamp: Timestamp::now_utc(), }; Some(receiver) @@ -635,8 +645,9 @@ impl Worker { // last oplog index as reference. self.oplog().commit().await; // Storing the status in the key-value storage + let component_type = self.execution_status.read().unwrap().component_type(); self.worker_service() - .update_status(&self.owned_worker_id, &status_value) + .update_status(&self.owned_worker_id, &status_value, component_type) .await; // Updating the status in memory self.execution_status @@ -956,7 +967,9 @@ impl Worker { ..initial_status }, }; - this.worker_service().add(&worker_metadata).await?; + this.worker_service() + .add(&worker_metadata, component_metadata.component_type) + .await?; Ok(worker_metadata) } Some(previous_metadata) => Ok(WorkerMetadata { @@ -1421,7 +1434,19 @@ impl RunningWorker { ) .await .unwrap(); // TODO: handle this error - false // do not break + + if store + .data_mut() + .component_metadata() + .component_type + == ComponentType::Ephemeral + { + final_decision = + RetryDecision::None; + true // stop after the invocation + } else { + false // continue processing the queue + } } Err(error) => { let trap_type = @@ -1972,6 +1997,9 @@ fn calculate_latest_worker_status( OplogEntry::Log { .. } => { result = WorkerStatus::Running; } + OplogEntry::Restart { .. } => { + result = WorkerStatus::Idle; + } } } result diff --git a/golem-worker-executor-base/tests/api.rs b/golem-worker-executor-base/tests/api.rs index f29c540e15..fd1052b2c1 100644 --- a/golem-worker-executor-base/tests/api.rs +++ b/golem-worker-executor-base/tests/api.rs @@ -30,7 +30,7 @@ use golem_api_grpc::proto::golem::worker::LogEvent; use golem_api_grpc::proto::golem::workerexecutor::v1::CompletePromiseRequest; use golem_common::model::{ AccountId, ComponentId, FilterComparator, IdempotencyKey, PromiseId, ScanCursor, - StringFilterComparator, Timestamp, WorkerFilter, WorkerId, WorkerMetadata, + StringFilterComparator, TargetWorkerId, Timestamp, WorkerFilter, WorkerId, WorkerMetadata, WorkerResourceDescription, WorkerStatus, }; use golem_wasm_rpc::Value; @@ -61,7 +61,7 @@ async fn interruption() { let worker_id_clone = worker_id.clone(); let fiber = tokio::spawn(async move { executor_clone - .invoke_and_await(&worker_id_clone, "run", vec![]) + .invoke_and_await(worker_id_clone, "run", vec![]) .await }); @@ -98,7 +98,7 @@ async fn simulated_crash() { let fiber = tokio::spawn(async move { let start_time = tokio::time::Instant::now(); let invoke_result = executor_clone - .invoke_and_await(&worker_id_clone, "run", vec![]) + .invoke_and_await(worker_id_clone, "run", vec![]) .await; let elapsed = start_time.elapsed(); (invoke_result, elapsed) @@ -199,7 +199,7 @@ async fn shopping_cart_example() { drop(executor); - assert!( + check!( contents == Ok(vec![Value::List(vec![ Value::Record(vec![ @@ -221,7 +221,7 @@ async fn shopping_cart_example() { Value::U32(20), ]), ])]) - ) + ); } #[tokio::test] @@ -266,6 +266,142 @@ async fn dynamic_worker_creation() { ); } +fn get_env_result(env: Vec) -> HashMap { + match env.into_iter().next() { + Some(Value::Result(Ok(Some(inner)))) => match *inner { + Value::List(items) => { + let pairs = items + .into_iter() + .filter_map(|item| match item { + Value::Tuple(values) if values.len() == 2 => { + let mut iter = values.into_iter(); + let key = iter.next(); + let value = iter.next(); + match (key, value) { + (Some(Value::String(key)), Some(Value::String(value))) => { + Some((key, value)) + } + _ => None, + } + } + _ => None, + }) + .collect::>(); + HashMap::from_iter(pairs) + } + _ => panic!("Unexpected result value"), + }, + _ => panic!("Unexpected result value"), + } +} + +#[tokio::test] +#[tracing::instrument] +async fn dynamic_worker_creation_without_name() { + let context = TestContext::new(); + let executor = start(&context).await.unwrap(); + + let component_id = executor.store_component("environment-service").await; + let worker_id = TargetWorkerId { + component_id: component_id.clone(), + worker_name: None, + }; + + let env1 = executor + .invoke_and_await(worker_id.clone(), "golem:it/api.{get-environment}", vec![]) + .await + .unwrap(); + let env2 = executor + .invoke_and_await(worker_id.clone(), "golem:it/api.{get-environment}", vec![]) + .await + .unwrap(); + + drop(executor); + + let env1 = get_env_result(env1); + let env2 = get_env_result(env2); + + check!(env1.contains_key("GOLEM_WORKER_NAME")); + check!(env1.get("GOLEM_COMPONENT_ID") == Some(&component_id.to_string())); + check!(env1.get("GOLEM_COMPONENT_VERSION") == Some(&"0".to_string())); + check!(env2.contains_key("GOLEM_WORKER_NAME")); + check!(env2.get("GOLEM_COMPONENT_ID") == Some(&component_id.to_string())); + check!(env2.get("GOLEM_COMPONENT_VERSION") == Some(&"0".to_string())); + check!(env1.get("GOLEM_WORKER_NAME") != env2.get("GOLEM_WORKER_NAME")); +} + +#[tokio::test] +#[tracing::instrument] +async fn ephemeral_worker_creation_without_name() { + let context = TestContext::new(); + let executor = start(&context).await.unwrap(); + + let component_id = executor + .store_ephemeral_component("environment-service") + .await; + let worker_id = TargetWorkerId { + component_id: component_id.clone(), + worker_name: None, + }; + + let env1 = executor + .invoke_and_await(worker_id.clone(), "golem:it/api.{get-environment}", vec![]) + .await + .unwrap(); + let env2 = executor + .invoke_and_await(worker_id.clone(), "golem:it/api.{get-environment}", vec![]) + .await + .unwrap(); + + drop(executor); + + let env1 = get_env_result(env1); + let env2 = get_env_result(env2); + + check!(env1.contains_key("GOLEM_WORKER_NAME")); + check!(env1.get("GOLEM_COMPONENT_ID") == Some(&component_id.to_string())); + check!(env1.get("GOLEM_COMPONENT_VERSION") == Some(&"0".to_string())); + check!(env2.contains_key("GOLEM_WORKER_NAME")); + check!(env2.get("GOLEM_COMPONENT_ID") == Some(&component_id.to_string())); + check!(env2.get("GOLEM_COMPONENT_VERSION") == Some(&"0".to_string())); + check!(env1.get("GOLEM_WORKER_NAME") != env2.get("GOLEM_WORKER_NAME")); +} + +#[tokio::test] +#[tracing::instrument] +async fn ephemeral_worker_creation_with_name_is_not_persistent() { + let context = TestContext::new(); + let executor = start(&context).await.unwrap(); + + let component_id = executor.store_ephemeral_component("counters").await; + let worker_id = TargetWorkerId { + component_id: component_id.clone(), + worker_name: Some("test".to_string()), + }; + + let _ = executor + .invoke_and_await( + worker_id.clone(), + "rpc:counters/api.{inc-global-by}", + vec![Value::U64(2)], + ) + .await + .unwrap(); + + let result = executor + .invoke_and_await( + worker_id.clone(), + "rpc:counters/api.{get-global-value}", + vec![], + ) + .await + .unwrap(); + + drop(executor); + + check!(result == vec![Value::U64(0)]); +} + #[tokio::test] #[tracing::instrument] async fn promise() { @@ -386,7 +522,7 @@ async fn get_workers_from_worker() { let result = executor .invoke_and_await( - worker_id, + worker_id.clone(), "golem:it/api.{get-workers}", vec![ component_id_val, @@ -457,7 +593,11 @@ async fn get_metadata_from_worker() { let worker_id_val1 = get_worker_id_val(worker_id1); let result = executor - .invoke_and_await(worker_id1, "golem:it/api.{get-self-metadata}", vec![]) + .invoke_and_await( + worker_id1.clone(), + "golem:it/api.{get-self-metadata}", + vec![], + ) .await .unwrap(); @@ -475,7 +615,7 @@ async fn get_metadata_from_worker() { let result = executor .invoke_and_await( - worker_id1, + worker_id1.clone(), "golem:it/api.{get-worker-metadata}", vec![worker_id_val2.clone()], ) diff --git a/golem-worker-executor-base/tests/measure_test_component_mem.rs b/golem-worker-executor-base/tests/measure_test_component_mem.rs index 53cd004ebd..c890a87c5c 100644 --- a/golem-worker-executor-base/tests/measure_test_component_mem.rs +++ b/golem-worker-executor-base/tests/measure_test_component_mem.rs @@ -1,5 +1,6 @@ use crate::common::{start, TestContext, TestWorkerExecutor}; use anyhow::anyhow; +use golem_common::model::ComponentType; use golem_test_framework::config::TestDependencies; use golem_test_framework::dsl::TestDsl; use golem_wasm_ast::analysis::AnalysisContext; @@ -90,7 +91,7 @@ async fn measure_component( let component_id = executor .component_service() - .get_or_add_component(path) + .get_or_add_component(path, ComponentType::Durable) .await; let data = std::fs::read(path)?; diff --git a/golem-worker-service-base/src/service/worker/default.rs b/golem-worker-service-base/src/service/worker/default.rs index c1f10c18e7..e5ed82eb19 100644 --- a/golem-worker-service-base/src/service/worker/default.rs +++ b/golem-worker-service-base/src/service/worker/default.rs @@ -37,10 +37,9 @@ use golem_common::client::MultiTargetGrpcClient; use golem_common::config::RetryConfig; use golem_common::model::oplog::OplogIndex; use golem_common::model::{ - AccountId, ComponentId, ComponentVersion, FilterComparator, IdempotencyKey, ScanCursor, - Timestamp, WorkerFilter, WorkerStatus, + AccountId, ComponentId, ComponentVersion, FilterComparator, IdempotencyKey, PromiseId, + ScanCursor, TargetWorkerId, Timestamp, WorkerFilter, WorkerId, WorkerStatus, }; -use golem_common::model::{PromiseId, WorkerId}; use golem_service_base::model::{GolemErrorUnknown, ResourceLimits, WorkerMetadata}; use golem_service_base::routing_table::HasRoutingTableService; use golem_service_base::{ @@ -86,7 +85,7 @@ pub trait WorkerService { // Accepts Vec and returns TypeAnnotatedValue async fn invoke_and_await_function_json( &self, - worker_id: &WorkerId, + worker_id: &TargetWorkerId, idempotency_key: Option, function_name: String, params: Vec, @@ -97,7 +96,7 @@ pub trait WorkerService { // Accepts a Vec and returns a Vec (with no type information) async fn invoke_and_await_function_proto( &self, - worker_id: &WorkerId, + worker_id: &TargetWorkerId, idempotency_key: Option, function_name: String, params: Vec, @@ -108,7 +107,7 @@ pub trait WorkerService { // Accepts Vec parameters as input async fn invoke_function_json( &self, - worker_id: &WorkerId, + worker_id: &TargetWorkerId, idempotency_key: Option, function_name: String, params: Vec, @@ -119,7 +118,7 @@ pub trait WorkerService { // Accepts Vec as input async fn invoke_function_proto( &self, - worker_id: &WorkerId, + worker_id: &TargetWorkerId, idempotency_key: Option, function_name: String, params: Vec, @@ -292,7 +291,7 @@ where _auth_ctx: &AuthCtx, ) -> WorkerResult { let worker_id = worker_id.clone(); - let worker_id_err: golem_common::model::WorkerId = worker_id.clone(); + let worker_id_err: WorkerId = worker_id.clone(); let stream = self .call_worker_executor( worker_id.clone(), @@ -359,7 +358,7 @@ where async fn invoke_and_await_function_json( &self, - worker_id: &WorkerId, + worker_id: &TargetWorkerId, idempotency_key: Option, function_name: String, params: Vec, @@ -428,7 +427,7 @@ where async fn invoke_and_await_function_proto( &self, - worker_id: &WorkerId, + worker_id: &TargetWorkerId, idempotency_key: Option, function_name: String, params: Vec, @@ -488,7 +487,7 @@ where async fn invoke_function_json( &self, - worker_id: &WorkerId, + worker_id: &TargetWorkerId, idempotency_key: Option, function_name: String, params: Vec, @@ -536,7 +535,7 @@ where async fn invoke_function_proto( &self, - worker_id: &WorkerId, + worker_id: &TargetWorkerId, idempotency_key: Option, function_name: String, params: Vec, @@ -931,25 +930,25 @@ where component_id.clone().into(); let account_id = metadata.account_id.clone().map(|id| id.into()); Box::pin(worker_executor_client.get_workers_metadata( - golem_api_grpc::proto::golem::workerexecutor::v1::GetWorkersMetadataRequest { + workerexecutor::v1::GetWorkersMetadataRequest { component_id: Some(component_id), filter: filter.clone().map(|f| f.into()), cursor: Some(cursor.clone().into()), count, precise, account_id, - } + }, )) }, |response| match response.into_inner() { workerexecutor::v1::GetWorkersMetadataResponse { result: - Some(workerexecutor::v1::get_workers_metadata_response::Result::Success( - workerexecutor::v1::GetWorkersMetadataSuccessResponse { - workers, - cursor, - }, - )), + Some(workerexecutor::v1::get_workers_metadata_response::Result::Success( + workerexecutor::v1::GetWorkersMetadataSuccessResponse { + workers, + cursor, + }, + )), } => { let workers = workers .into_iter() @@ -964,9 +963,9 @@ where } workerexecutor::v1::GetWorkersMetadataResponse { result: - Some(workerexecutor::v1::get_workers_metadata_response::Result::Failure( - err, - )), + Some(workerexecutor::v1::get_workers_metadata_response::Result::Failure( + err, + )), } => Err(err.into()), workerexecutor::v1::GetWorkersMetadataResponse { .. } => { Err("Empty response".into()) @@ -1039,7 +1038,7 @@ where async fn invoke_and_await_function_json( &self, - _worker_id: &WorkerId, + _worker_id: &TargetWorkerId, _idempotency_key: Option, _function_name: String, _params: Vec, @@ -1054,7 +1053,7 @@ where async fn invoke_and_await_function_proto( &self, - _worker_id: &WorkerId, + _worker_id: &TargetWorkerId, _idempotency_key: Option, _function_name: String, _params: Vec, @@ -1066,7 +1065,7 @@ where async fn invoke_function_json( &self, - _worker_id: &WorkerId, + _worker_id: &TargetWorkerId, _idempotency_key: Option, _function_name: String, _params: Vec, @@ -1078,7 +1077,7 @@ where async fn invoke_function_proto( &self, - _worker_id: &WorkerId, + _worker_id: &TargetWorkerId, _idempotency_key: Option, _function_name: String, _params: Vec, @@ -1171,7 +1170,7 @@ where _metadata: WorkerRequestMetadata, _auth_ctx: &AuthCtx, ) -> WorkerResult { - let worker_id = golem_common::model::WorkerId { + let worker_id = WorkerId { component_id: worker_id.component_id.clone(), worker_name: worker_id.worker_name.to_json_string(), }; diff --git a/golem-worker-service-base/src/service/worker/routing_logic.rs b/golem-worker-service-base/src/service/worker/routing_logic.rs index 675966be61..5fd8c1abea 100644 --- a/golem-worker-service-base/src/service/worker/routing_logic.rs +++ b/golem-worker-service-base/src/service/worker/routing_logic.rs @@ -29,7 +29,7 @@ use golem_api_grpc::proto::golem::worker::v1::WorkerExecutionError; use golem_api_grpc::proto::golem::workerexecutor::v1::worker_executor_client::WorkerExecutorClient; use golem_common::client::MultiTargetGrpcClient; use golem_common::config::RetryConfig; -use golem_common::model::{Pod, ShardId, WorkerId}; +use golem_common::model::{Pod, ShardId, TargetWorkerId, WorkerId}; use golem_common::retriable_error::IsRetriableError; use golem_common::retries::get_delay; use golem_service_base::model::{GolemError, GolemErrorInvalidShardId, GolemErrorUnknown}; @@ -81,7 +81,7 @@ pub trait CallOnExecutor { + Clone + 'static; - fn tracing_kind() -> &'static str; + fn tracing_kind(&self) -> &'static str; } #[async_trait] @@ -129,11 +129,49 @@ impl CallOnExecutor for WorkerId { } } - fn tracing_kind() -> &'static str { + fn tracing_kind(&self) -> &'static str { "WorkerId" } } +#[async_trait] +impl CallOnExecutor for TargetWorkerId { + type ResultOut = Out; + + async fn call_on_worker_executor( + &self, + context: &(impl HasRoutingTableService + HasWorkerExecutorClients + Send + Sync), + f: F, + ) -> Result<(Option, Option), CallWorkerExecutorErrorWithContext> + where + F: for<'a> Fn( + &'a mut WorkerExecutorClient, + ) + -> Pin> + 'a + Send>> + + Send + + Sync + + Clone + + 'static, + { + if let Some(worker_id) = self.clone().try_into_worker_id() { + // The TargetWorkerId had a worker name so we know which shard we need to call it on + worker_id.call_on_worker_executor(context, f).await + } else { + // The TargetWorkerId did not have a worker name specified so we can forward the call to a random + // executor + RandomExecutor.call_on_worker_executor(context, f).await + } + } + + fn tracing_kind(&self) -> &'static str { + if self.worker_name.is_none() { + "RandomExecutor" + } else { + "WorkerId" + } + } +} + pub struct RandomExecutor; #[async_trait] @@ -181,7 +219,7 @@ impl CallOnExecutor for RandomExecutor { } } - fn tracing_kind() -> &'static str { + fn tracing_kind(&self) -> &'static str { "RandomExecutor" } } @@ -246,7 +284,7 @@ impl CallOnExecutor for AllExecutors { } } - fn tracing_kind() -> &'static str { + fn tracing_kind(&self) -> &'static str { "AllExecutors" } } @@ -332,7 +370,7 @@ impl Routing { let mut retry = RetryState::new(self.worker_executor_retry_config()); loop { - let span = retry.start_attempt(Target::tracing_kind()); + let span = retry.start_attempt(Target::tracing_kind(&target)); let worker_result = target .call_on_worker_executor(self, remote_call.clone()) diff --git a/golem-worker-service/src/api/worker.rs b/golem-worker-service/src/api/worker.rs index 247656d33d..f9e98efc18 100644 --- a/golem-worker-service/src/api/worker.rs +++ b/golem-worker-service/src/api/worker.rs @@ -1,6 +1,8 @@ use crate::empty_worker_metadata; use crate::service::{component::ComponentService, worker::WorkerService}; -use golem_common::model::{ComponentId, IdempotencyKey, ScanCursor, WorkerFilter, WorkerId}; +use golem_common::model::{ + ComponentId, IdempotencyKey, ScanCursor, TargetWorkerId, WorkerFilter, WorkerId, +}; use golem_common::recorded_http_api_request; use golem_service_base::api_tags::ApiTags; use golem_service_base::auth::EmptyAuthCtx; @@ -119,7 +121,51 @@ impl WorkerApi { record.result(response) } - /// Invoke a function and await it's resolution + /// Invoke a function and await its resolution on a new worker with a random generated name + /// + /// Ideal for invoking ephemeral components, but works with durable ones as well. + /// Supply the parameters in the request body as JSON. + #[oai( + path = "/:component_id/invoke-and-await", + method = "post", + operation_id = "invoke_and_await_function_without_name" + )] + async fn invoke_and_await_function_without_name( + &self, + component_id: Path, + #[oai(name = "Idempotency-Key")] idempotency_key: Header>, + function: Query, + params: Json, + ) -> Result> { + let worker_id = make_target_worker_id(component_id.0, None)?; + + let record = recorded_http_api_request!( + "invoke_and_await_function", + worker_id = worker_id.to_string(), + idempotency_key = idempotency_key.0.as_ref().map(|v| v.value.clone()), + function = function.0 + ); + + let precise_jsons = params.0.params; + + let response = self + .worker_service + .invoke_and_await_function_json( + &worker_id, + idempotency_key.0, + function.0, + precise_jsons, + None, + empty_worker_metadata(), + ) + .instrument(record.span.clone()) + .await + .map_err(|e| e.into()) + .map(|result| Json(InvokeResult { result })); + record.result(response) + } + + /// Invoke a function and await its resolution /// /// Supply the parameters in the request body as JSON. #[oai( @@ -135,7 +181,7 @@ impl WorkerApi { function: Query, params: Json, ) -> Result> { - let worker_id = make_worker_id(component_id.0, worker_name.0)?; + let worker_id = make_target_worker_id(component_id.0, Some(worker_name.0))?; let record = recorded_http_api_request!( "invoke_and_await_function", @@ -165,7 +211,52 @@ impl WorkerApi { /// Invoke a function /// - /// A simpler version of the previously defined invoke and await endpoint just triggers the execution of a function and immediately returns. + /// Ideal for invoking ephemeral components, but works with durable ones as well. + /// Triggers the execution of a function and immediately returns. + #[oai( + path = "/:component_id/invoke", + method = "post", + operation_id = "invoke_function_without_name" + )] + async fn invoke_function_without_name( + &self, + component_id: Path, + #[oai(name = "Idempotency-Key")] idempotency_key: Header>, + function: Query, + params: Json, + ) -> Result> { + let worker_id = make_target_worker_id(component_id.0, None)?; + + let record = recorded_http_api_request!( + "invoke_function", + worker_id = worker_id.to_string(), + idempotency_key = idempotency_key.0.as_ref().map(|v| v.value.clone()), + function = function.0 + ); + + let precise_json_array = params.0.params; + + let response = self + .worker_service + .invoke_function_json( + &worker_id, + idempotency_key.0, + function.0, + precise_json_array.clone(), + None, + empty_worker_metadata(), + ) + .instrument(record.span.clone()) + .await + .map_err(|e| e.into()) + .map(|_| Json(InvokeResponse {})); + + record.result(response) + } + + /// Invoke a function + /// + /// Triggers the execution of a function and immediately returns. #[oai( path = "/:component_id/workers/:worker_name/invoke", method = "post", @@ -179,7 +270,7 @@ impl WorkerApi { function: Query, params: Json, ) -> Result> { - let worker_id = make_worker_id(component_id.0, worker_name.0)?; + let worker_id = make_target_worker_id(component_id.0, Some(worker_name.0))?; let record = recorded_http_api_request!( "invoke_function", @@ -212,7 +303,7 @@ impl WorkerApi { /// /// Completes a promise with a given custom array of bytes. /// The promise must be previously created from within the worker, and it's identifier (a combination of a worker identifier and an oplogIdx ) must be sent out to an external caller so it can use this endpoint to mark the promise completed. - /// The data field is sent back to the worker and it has no predefined meaning. + /// The data field is sent back to the worker, and it has no predefined meaning. #[oai( path = "/:component_id/workers/:worker_name/complete", method = "post", @@ -539,3 +630,21 @@ fn make_worker_id( worker_name, }) } + +fn make_target_worker_id( + component_id: ComponentId, + worker_name: Option, +) -> std::result::Result { + if let Some(worker_name) = &worker_name { + validate_worker_name(worker_name).map_err(|error| { + WorkerApiBaseError::BadRequest(Json(ErrorsBody { + errors: vec![format!("Invalid worker name: {error}")], + })) + })?; + } + + Ok(TargetWorkerId { + component_id, + worker_name, + }) +} diff --git a/golem-worker-service/src/grpcapi/worker.rs b/golem-worker-service/src/grpcapi/worker.rs index b4ea2d6fde..25e6cb78db 100644 --- a/golem-worker-service/src/grpcapi/worker.rs +++ b/golem-worker-service/src/grpcapi/worker.rs @@ -37,9 +37,10 @@ use golem_api_grpc::proto::golem::worker::v1::{ use golem_api_grpc::proto::golem::worker::{InvokeResult, WorkerMetadata}; use golem_common::grpc::{ proto_component_id_string, proto_idempotency_key_string, - proto_invocation_context_parent_worker_id_string, proto_worker_id_string, + proto_invocation_context_parent_worker_id_string, proto_target_worker_id_string, + proto_worker_id_string, }; -use golem_common::model::{ComponentVersion, ScanCursor, WorkerFilter, WorkerId}; +use golem_common::model::{ComponentVersion, ScanCursor, TargetWorkerId, WorkerFilter, WorkerId}; use golem_common::recorded_grpc_api_request; use golem_service_base::auth::EmptyAuthCtx; use golem_service_base::model::validate_worker_name; @@ -215,7 +216,7 @@ impl GrpcWorkerService for WorkerGrpcApi { let request = request.into_inner(); let record = recorded_grpc_api_request!( "invoke_and_await", - worker_id = proto_worker_id_string(&request.worker_id), + worker_id = proto_target_worker_id_string(&request.worker_id), idempotency_key = proto_idempotency_key_string(&request.idempotency_key), function = request.function, context_parent_worker_id = @@ -246,7 +247,7 @@ impl GrpcWorkerService for WorkerGrpcApi { let request = request.into_inner(); let record = recorded_grpc_api_request!( "invoke_and_await_json", - worker_id = proto_worker_id_string(&request.worker_id), + worker_id = proto_target_worker_id_string(&request.worker_id), idempotency_key = proto_idempotency_key_string(&request.idempotency_key), function = request.function, context_parent_worker_id = @@ -277,7 +278,7 @@ impl GrpcWorkerService for WorkerGrpcApi { let request = request.into_inner(); let record = recorded_grpc_api_request!( "invoke", - worker_id = proto_worker_id_string(&request.worker_id), + worker_id = proto_target_worker_id_string(&request.worker_id), idempotency_key = proto_idempotency_key_string(&request.idempotency_key), function = request.function, context_parent_worker_id = @@ -304,7 +305,7 @@ impl GrpcWorkerService for WorkerGrpcApi { let request = request.into_inner(); let record = recorded_grpc_api_request!( "invoke_json", - worker_id = proto_worker_id_string(&request.worker_id), + worker_id = proto_target_worker_id_string(&request.worker_id), idempotency_key = proto_idempotency_key_string(&request.idempotency_key), function = request.function, context_parent_worker_id = @@ -588,7 +589,7 @@ impl WorkerGrpcApi { } async fn invoke(&self, request: InvokeRequest) -> Result<(), GrpcWorkerError> { - let worker_id = validate_protobuf_worker_id(request.worker_id)?; + let worker_id = validate_protobuf_target_worker_id(request.worker_id)?; let params = request .invoke_parameters @@ -609,7 +610,7 @@ impl WorkerGrpcApi { } async fn invoke_json(&self, request: InvokeJsonRequest) -> Result<(), GrpcWorkerError> { - let worker_id = validate_protobuf_worker_id(request.worker_id)?; + let worker_id = validate_protobuf_target_worker_id(request.worker_id)?; let params = parse_json_invoke_parameters(&request.invoke_parameters)?; @@ -636,7 +637,7 @@ impl WorkerGrpcApi { &self, request: InvokeAndAwaitRequest, ) -> Result { - let worker_id = validate_protobuf_worker_id(request.worker_id)?; + let worker_id = validate_protobuf_target_worker_id(request.worker_id)?; let params = request .invoke_parameters @@ -661,7 +662,7 @@ impl WorkerGrpcApi { &self, request: InvokeAndAwaitJsonRequest, ) -> Result { - let worker_id = validate_protobuf_worker_id(request.worker_id)?; + let worker_id = validate_protobuf_target_worker_id(request.worker_id)?; let params = parse_json_invoke_parameters(&request.invoke_parameters)?; let idempotency_key = request @@ -752,6 +753,20 @@ fn validated_worker_id( }) } +fn validated_target_worker_id( + component_id: golem_common::model::ComponentId, + worker_name: Option, +) -> Result { + if let Some(worker_name) = &worker_name { + validate_worker_name(worker_name) + .map_err(|error| bad_request_error(format!("Invalid worker name: {error}")))?; + } + Ok(TargetWorkerId { + component_id, + worker_name, + }) +} + fn validate_protobuf_worker_id( worker_id: Option, ) -> Result { @@ -762,6 +777,16 @@ fn validate_protobuf_worker_id( validated_worker_id(worker_id.component_id, worker_id.worker_name) } +fn validate_protobuf_target_worker_id( + worker_id: Option, +) -> Result { + let worker_id = worker_id.ok_or_else(|| bad_request_error("Missing worker id"))?; + let worker_id: TargetWorkerId = worker_id + .try_into() + .map_err(|e| bad_request_error(format!("Invalid target worker id: {e}")))?; + validated_target_worker_id(worker_id.component_id, worker_id.worker_name) +} + fn bad_request_error(error: T) -> GrpcWorkerError where T: Into, diff --git a/golem-worker-service/src/worker_bridge_request_executor.rs b/golem-worker-service/src/worker_bridge_request_executor.rs index a2ed73bb08..0ed9af9b8f 100644 --- a/golem-worker-service/src/worker_bridge_request_executor.rs +++ b/golem-worker-service/src/worker_bridge_request_executor.rs @@ -90,7 +90,7 @@ mod internal { let type_annotated_value = default_executor .worker_service .invoke_and_await_function_json( - &worker_id, + &worker_id.into_target_worker_id(), worker_request_params.idempotency_key, worker_request_params.function_name.to_string(), invoke_parameters, diff --git a/integration-tests/tests/worker.rs b/integration-tests/tests/worker.rs index 78ded0af30..402869a3e0 100644 --- a/integration-tests/tests/worker.rs +++ b/integration-tests/tests/worker.rs @@ -23,8 +23,8 @@ use std::sync::{Arc, Mutex}; use golem_common::model::oplog::WorkerResourceId; use golem_common::model::{ - ComponentId, FilterComparator, ScanCursor, StringFilterComparator, Timestamp, WorkerFilter, - WorkerId, WorkerMetadata, WorkerResourceDescription, WorkerStatus, + ComponentId, FilterComparator, ScanCursor, StringFilterComparator, TargetWorkerId, Timestamp, + WorkerFilter, WorkerId, WorkerMetadata, WorkerResourceDescription, WorkerStatus, }; use rand::seq::IteratorRandom; use serde_json::json; @@ -72,6 +72,125 @@ async fn dynamic_worker_creation() { ); } +fn get_env_result(env: Vec) -> HashMap { + match env.into_iter().next() { + Some(Value::Result(Ok(Some(inner)))) => match *inner { + Value::List(items) => { + let pairs = items + .into_iter() + .filter_map(|item| match item { + Value::Tuple(values) if values.len() == 2 => { + let mut iter = values.into_iter(); + let key = iter.next(); + let value = iter.next(); + match (key, value) { + (Some(Value::String(key)), Some(Value::String(value))) => { + Some((key, value)) + } + _ => None, + } + } + _ => None, + }) + .collect::>(); + HashMap::from_iter(pairs) + } + _ => panic!("Unexpected result value"), + }, + _ => panic!("Unexpected result value"), + } +} + +#[tokio::test] +#[tracing::instrument] +async fn dynamic_worker_creation_without_name() { + let component_id = DEPS.store_component("environment-service").await; + let worker_id = TargetWorkerId { + component_id: component_id.clone(), + worker_name: None, + }; + + let env1 = DEPS + .invoke_and_await(worker_id.clone(), "golem:it/api.{get-environment}", vec![]) + .await + .unwrap(); + let env2 = DEPS + .invoke_and_await(worker_id.clone(), "golem:it/api.{get-environment}", vec![]) + .await + .unwrap(); + + let env1 = get_env_result(env1); + let env2 = get_env_result(env2); + + check!(env1.contains_key("GOLEM_WORKER_NAME")); + check!(env1.get("GOLEM_COMPONENT_ID") == Some(&component_id.to_string())); + check!(env1.get("GOLEM_COMPONENT_VERSION") == Some(&"0".to_string())); + check!(env2.contains_key("GOLEM_WORKER_NAME")); + check!(env2.get("GOLEM_COMPONENT_ID") == Some(&component_id.to_string())); + check!(env2.get("GOLEM_COMPONENT_VERSION") == Some(&"0".to_string())); + check!(env1.get("GOLEM_WORKER_NAME") != env2.get("GOLEM_WORKER_NAME")); +} + +#[tokio::test] +#[tracing::instrument] +async fn ephemeral_worker_creation_without_name() { + let component_id = DEPS.store_ephemeral_component("environment-service").await; + let worker_id = TargetWorkerId { + component_id: component_id.clone(), + worker_name: None, + }; + + let env1 = DEPS + .invoke_and_await(worker_id.clone(), "golem:it/api.{get-environment}", vec![]) + .await + .unwrap(); + let env2 = DEPS + .invoke_and_await(worker_id.clone(), "golem:it/api.{get-environment}", vec![]) + .await + .unwrap(); + + let env1 = get_env_result(env1); + let env2 = get_env_result(env2); + + check!(env1.contains_key("GOLEM_WORKER_NAME")); + check!(env1.get("GOLEM_COMPONENT_ID") == Some(&component_id.to_string())); + check!(env1.get("GOLEM_COMPONENT_VERSION") == Some(&"0".to_string())); + check!(env2.contains_key("GOLEM_WORKER_NAME")); + check!(env2.get("GOLEM_COMPONENT_ID") == Some(&component_id.to_string())); + check!(env2.get("GOLEM_COMPONENT_VERSION") == Some(&"0".to_string())); + check!(env1.get("GOLEM_WORKER_NAME") != env2.get("GOLEM_WORKER_NAME")); +} + +#[tokio::test] +#[tracing::instrument] +async fn ephemeral_worker_creation_with_name_is_not_persistent() { + let component_id = DEPS.store_ephemeral_component("counters").await; + let worker_id = TargetWorkerId { + component_id: component_id.clone(), + worker_name: Some("test".to_string()), + }; + + let _ = DEPS + .invoke_and_await( + worker_id.clone(), + "rpc:counters/api.{inc-global-by}", + vec![Value::U64(2)], + ) + .await + .unwrap(); + + let result = DEPS + .invoke_and_await( + worker_id.clone(), + "rpc:counters/api.{get-global-value}", + vec![], + ) + .await + .unwrap(); + + check!(result == vec![Value::U64(0)]); +} + #[tokio::test] #[tracing::instrument] async fn counter_resource_test_1() { diff --git a/kube/golem-chart/templates/ingress.yaml b/kube/golem-chart/templates/ingress.yaml index 963480165a..4bf7ca70c2 100644 --- a/kube/golem-chart/templates/ingress.yaml +++ b/kube/golem-chart/templates/ingress.yaml @@ -34,6 +34,20 @@ spec: name: service-worker-service-{{.Values.env}} port: number: {{ .Values.workerService.ports.http }} + - path: "/v1/components/[^/]+/invoke$" + pathType: ImplementationSpecific + backend: + service: + name: service-worker-service-{{.Values.env}} + port: + number: {{ .Values.workerService.ports.http }} + - path: "/v1/components/[^/]+/invoke-and-await$" + pathType: ImplementationSpecific + backend: + service: + name: service-worker-service-{{.Values.env}} + port: + number: {{ .Values.workerService.ports.http }} - path: / pathType: ImplementationSpecific backend: @@ -86,6 +100,20 @@ spec: name: service-worker-service-{{.Values.env}} port: number: {{ .Values.workerService.ports.http }} + - path: "/v1/components/*/invoke" + pathType: ImplementationSpecific + backend: + service: + name: service-worker-service-{{.Values.env}} + port: + number: {{ .Values.workerService.ports.http }} + - path: "/v1/components/*/invoke-and-await" + pathType: ImplementationSpecific + backend: + service: + name: service-worker-service-{{.Values.env}} + port: + number: {{ .Values.workerService.ports.http }} - path: "/*" pathType: ImplementationSpecific backend: diff --git a/openapi/golem-service.yaml b/openapi/golem-service.yaml index 9945d0f3c0..b7c4ab1771 100644 --- a/openapi/golem-service.yaml +++ b/openapi/golem-service.yaml @@ -334,11 +334,94 @@ paths: application/json; charset=utf-8: schema: $ref: '#/components/schemas/GolemErrorBody' + /v1/components/{component_id}/invoke-and-await: + post: + tags: + - Worker + summary: Invoke a function and await its resolution on a new worker with a random generated name + description: |- + Ideal for invoking ephemeral components, but works with durable ones as well. + Supply the parameters in the request body as JSON. + operationId: invoke_and_await_function_without_name + parameters: + - in: path + name: component_id + required: true + deprecated: false + schema: + type: string + format: uuid + explode: true + style: simple + - in: header + name: Idempotency-Key + deprecated: false + schema: + type: string + explode: true + style: simple + - in: query + name: function + required: true + deprecated: false + schema: + type: string + explode: true + style: form + requestBody: + content: + application/json; charset=utf-8: + schema: + $ref: '#/components/schemas/InvokeParameters' + required: true + responses: + '200': + description: '' + content: + application/json; charset=utf-8: + schema: + $ref: '#/components/schemas/InvokeResult' + '400': + description: '' + content: + application/json; charset=utf-8: + schema: + $ref: '#/components/schemas/ErrorsBody' + '401': + description: '' + content: + application/json; charset=utf-8: + schema: + $ref: '#/components/schemas/ErrorBody' + '403': + description: '' + content: + application/json; charset=utf-8: + schema: + $ref: '#/components/schemas/ErrorBody' + '404': + description: '' + content: + application/json; charset=utf-8: + schema: + $ref: '#/components/schemas/ErrorBody' + '409': + description: '' + content: + application/json; charset=utf-8: + schema: + $ref: '#/components/schemas/ErrorBody' + '500': + description: '' + content: + application/json; charset=utf-8: + schema: + $ref: '#/components/schemas/GolemErrorBody' /v1/components/{component_id}/workers/{worker_name}/invoke-and-await: post: tags: - Worker - summary: Invoke a function and await it's resolution + summary: Invoke a function and await its resolution description: Supply the parameters in the request body as JSON. operationId: invoke_and_await_function parameters: @@ -423,12 +506,95 @@ paths: application/json; charset=utf-8: schema: $ref: '#/components/schemas/GolemErrorBody' + /v1/components/{component_id}/invoke: + post: + tags: + - Worker + summary: Invoke a function + description: |- + Ideal for invoking ephemeral components, but works with durable ones as well. + Triggers the execution of a function and immediately returns. + operationId: invoke_function_without_name + parameters: + - in: path + name: component_id + required: true + deprecated: false + schema: + type: string + format: uuid + explode: true + style: simple + - in: header + name: Idempotency-Key + deprecated: false + schema: + type: string + explode: true + style: simple + - in: query + name: function + required: true + deprecated: false + schema: + type: string + explode: true + style: form + requestBody: + content: + application/json; charset=utf-8: + schema: + $ref: '#/components/schemas/InvokeParameters' + required: true + responses: + '200': + description: '' + content: + application/json; charset=utf-8: + schema: + $ref: '#/components/schemas/InvokeResponse' + '400': + description: '' + content: + application/json; charset=utf-8: + schema: + $ref: '#/components/schemas/ErrorsBody' + '401': + description: '' + content: + application/json; charset=utf-8: + schema: + $ref: '#/components/schemas/ErrorBody' + '403': + description: '' + content: + application/json; charset=utf-8: + schema: + $ref: '#/components/schemas/ErrorBody' + '404': + description: '' + content: + application/json; charset=utf-8: + schema: + $ref: '#/components/schemas/ErrorBody' + '409': + description: '' + content: + application/json; charset=utf-8: + schema: + $ref: '#/components/schemas/ErrorBody' + '500': + description: '' + content: + application/json; charset=utf-8: + schema: + $ref: '#/components/schemas/GolemErrorBody' /v1/components/{component_id}/workers/{worker_name}/invoke: post: tags: - Worker summary: Invoke a function - description: A simpler version of the previously defined invoke and await endpoint just triggers the execution of a function and immediately returns. + description: Triggers the execution of a function and immediately returns. operationId: invoke_function parameters: - in: path @@ -520,7 +686,7 @@ paths: description: |- Completes a promise with a given custom array of bytes. The promise must be previously created from within the worker, and it's identifier (a combination of a worker identifier and an oplogIdx ) must be sent out to an external caller so it can use this endpoint to mark the promise completed. - The data field is sent back to the worker and it has no predefined meaning. + The data field is sent back to the worker, and it has no predefined meaning. operationId: complete_promise parameters: - in: path From 08eedaa7b14d61741376ccfb1aac6854e4a95a72 Mon Sep 17 00:00:00 2001 From: Daniel Vigovszky Date: Mon, 23 Sep 2024 13:47:39 +0200 Subject: [PATCH 2/2] Tests and fixes for oplog corruption bug (#962) --- Cargo.lock | 8 +- Cargo.toml | 2 +- golem-cli/Cargo.toml | 2 +- .../src/services/oplog/mock.rs | 6 +- .../src/services/oplog/mod.rs | 7 +- .../src/services/oplog/multilayer.rs | 16 +- .../src/services/oplog/primary.rs | 9 +- .../src/services/oplog/tests.rs | 292 +++++++++++++++++- .../src/services/scheduler.rs | 5 +- .../src/services/worker.rs | 32 +- golem-worker-executor-base/src/worker.rs | 6 +- 11 files changed, 353 insertions(+), 32 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 46ad0792e1..9bb31df48d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3815,9 +3815,9 @@ dependencies = [ [[package]] name = "golem-wasm-rpc" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aeeb1d3e3cfd9c96a6032ed5969533b048ee6d995f1713e717ea5103916b6c70" +checksum = "6e5137fe5950679be704177c8fec9ef5d4c195bb874707d6d35035a892a9183f" dependencies = [ "arbitrary", "async-recursion", @@ -3839,9 +3839,9 @@ dependencies = [ [[package]] name = "golem-wasm-rpc-stubgen" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afc7c8772c322c6203949f0ceea33510bce3b2cbda08f5d9f385c36c7414e745" +checksum = "5be1bfec5496fc4f5e913abe3ff49e78ce78b6065977ab73c9b2b0b662426542" dependencies = [ "anyhow", "cargo-component", diff --git a/Cargo.toml b/Cargo.toml index d6e036fe5e..b9beb8ffff 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -94,7 +94,7 @@ futures-core = "0.3.29" futures-util = "0.3.29" git-version = "0.3.9" golem-wasm-ast = "1.0.0" -golem-wasm-rpc = { version = "1.0.2", default-features = false, features = [ +golem-wasm-rpc = { version = "1.0.3", default-features = false, features = [ "host", ] } http = "1.0.0" # keep in sync with wasmtime diff --git a/golem-cli/Cargo.toml b/golem-cli/Cargo.toml index a46e3e044c..a31e953d44 100644 --- a/golem-cli/Cargo.toml +++ b/golem-cli/Cargo.toml @@ -39,7 +39,7 @@ futures-util = { workspace = true } golem-examples = "1.0.5" golem-wasm-ast = { workspace = true } golem-wasm-rpc = { workspace = true } -golem-wasm-rpc-stubgen = { version = "1.0.2", optional = true } +golem-wasm-rpc-stubgen = { version = "1.0.3", optional = true } h2 = "0.3.24" http = { workspace = true } humansize = { workspace = true } diff --git a/golem-worker-executor-base/src/services/oplog/mock.rs b/golem-worker-executor-base/src/services/oplog/mock.rs index 2f677a977c..415e643396 100644 --- a/golem-worker-executor-base/src/services/oplog/mock.rs +++ b/golem-worker-executor-base/src/services/oplog/mock.rs @@ -49,7 +49,11 @@ impl OplogService for OplogServiceMock { unimplemented!() } - async fn open(&self, _owned_worker_id: &OwnedWorkerId) -> Arc { + async fn open( + &self, + _owned_worker_id: &OwnedWorkerId, + _last_oplog_index: OplogIndex, + ) -> Arc { unimplemented!() } diff --git a/golem-worker-executor-base/src/services/oplog/mod.rs b/golem-worker-executor-base/src/services/oplog/mod.rs index 92dd686ff0..bc85be7dd4 100644 --- a/golem-worker-executor-base/src/services/oplog/mod.rs +++ b/golem-worker-executor-base/src/services/oplog/mod.rs @@ -71,8 +71,11 @@ pub trait OplogService: Debug { owned_worker_id: &OwnedWorkerId, initial_entry: OplogEntry, ) -> Arc; - async fn open(&self, owned_worker_id: &OwnedWorkerId) - -> Arc; + async fn open( + &self, + owned_worker_id: &OwnedWorkerId, + last_oplog_index: OplogIndex, + ) -> Arc; async fn get_last_index(&self, owned_worker_id: &OwnedWorkerId) -> OplogIndex; diff --git a/golem-worker-executor-base/src/services/oplog/multilayer.rs b/golem-worker-executor-base/src/services/oplog/multilayer.rs index 1b70e3df57..6b64e1c905 100644 --- a/golem-worker-executor-base/src/services/oplog/multilayer.rs +++ b/golem-worker-executor-base/src/services/oplog/multilayer.rs @@ -148,6 +148,7 @@ struct CreateOplogConstructor { initial_entry: Option, primary: Arc, service: MultiLayerOplogService, + last_oplog_index: OplogIndex, } impl CreateOplogConstructor { @@ -156,12 +157,14 @@ impl CreateOplogConstructor { initial_entry: Option, primary: Arc, service: MultiLayerOplogService, + last_oplog_index: OplogIndex, ) -> Self { Self { owned_worker_id, initial_entry, primary, service, + last_oplog_index, } } } @@ -177,7 +180,9 @@ impl OplogConstructor for CreateOplogConstructor { .create(&self.owned_worker_id, initial_entry) .await } else { - self.primary.open(&self.owned_worker_id).await + self.primary + .open(&self.owned_worker_id, self.last_oplog_index) + .await }; Arc::new(MultiLayerOplog::new(self.owned_worker_id, primary, self.service, close).await) } @@ -198,12 +203,18 @@ impl OplogService for MultiLayerOplogService { Some(initial_entry), self.primary.clone(), self.clone(), + OplogIndex::INITIAL, ), ) .await } - async fn open(&self, owned_worker_id: &OwnedWorkerId) -> Arc { + async fn open( + &self, + owned_worker_id: &OwnedWorkerId, + last_oplog_index: OplogIndex, + ) -> Arc { + debug!("MultiLayerOplogService::open {owned_worker_id}"); self.oplogs .get_or_open( &owned_worker_id.worker_id, @@ -212,6 +223,7 @@ impl OplogService for MultiLayerOplogService { None, self.primary.clone(), self.clone(), + last_oplog_index, ), ) .await diff --git a/golem-worker-executor-base/src/services/oplog/primary.rs b/golem-worker-executor-base/src/services/oplog/primary.rs index 30a96b0c69..f84d47bb2a 100644 --- a/golem-worker-executor-base/src/services/oplog/primary.rs +++ b/golem-worker-executor-base/src/services/oplog/primary.rs @@ -122,14 +122,17 @@ impl OplogService for PrimaryOplogService { ) }); - self.open(owned_worker_id).await + self.open(owned_worker_id, OplogIndex::INITIAL).await } - async fn open(&self, owned_worker_id: &OwnedWorkerId) -> Arc { + async fn open( + &self, + owned_worker_id: &OwnedWorkerId, + last_oplog_index: OplogIndex, + ) -> Arc { record_oplog_call("open"); let key = Self::oplog_key(&owned_worker_id.worker_id); - let last_oplog_index = self.get_last_index(owned_worker_id).await; self.oplogs .get_or_open( diff --git a/golem-worker-executor-base/src/services/oplog/tests.rs b/golem-worker-executor-base/src/services/oplog/tests.rs index f339852178..da1d515b5a 100644 --- a/golem-worker-executor-base/src/services/oplog/tests.rs +++ b/golem-worker-executor-base/src/services/oplog/tests.rs @@ -236,7 +236,8 @@ async fn open_add_and_read_back() { worker_name: "test".to_string(), }; let owned_worker_id = OwnedWorkerId::new(&account_id, &worker_id); - let oplog = oplog_service.open(&owned_worker_id).await; + let last_oplog_index = oplog_service.get_last_index(&owned_worker_id).await; + let oplog = oplog_service.open(&owned_worker_id, last_oplog_index).await; let entry1 = rounded(OplogEntry::jump(OplogRegion { start: OplogIndex::from_u64(5), @@ -282,7 +283,8 @@ async fn entries_with_small_payload() { }; let owned_worker_id = OwnedWorkerId::new(&account_id, &worker_id); - let oplog = oplog_service.open(&owned_worker_id).await; + let last_oplog_index = oplog_service.get_last_index(&owned_worker_id).await; + let oplog = oplog_service.open(&owned_worker_id, last_oplog_index).await; let last_oplog_idx = oplog.current_oplog_index().await; let entry1 = rounded( @@ -387,7 +389,8 @@ async fn entries_with_large_payload() { worker_name: "test".to_string(), }; let owned_worker_id = OwnedWorkerId::new(&account_id, &worker_id); - let oplog = oplog_service.open(&owned_worker_id).await; + let last_oplog_index = oplog_service.get_last_index(&owned_worker_id).await; + let oplog = oplog_service.open(&owned_worker_id, last_oplog_index).await; let large_payload1 = vec![0u8; 1024 * 1024]; let large_payload2 = vec![1u8; 1024 * 1024]; @@ -566,7 +569,8 @@ async fn multilayer_transfers_entries_after_limit_reached( }; let owned_worker_id = OwnedWorkerId::new(&account_id, &worker_id); - let oplog = oplog_service.open(&owned_worker_id).await; + let last_oplog_index = oplog_service.get_last_index(&owned_worker_id).await; + let oplog = oplog_service.open(&owned_worker_id, last_oplog_index).await; let mut entries = Vec::new(); for i in 0..n { @@ -589,7 +593,10 @@ async fn multilayer_transfers_entries_after_limit_reached( debug!("Fetching information to evaluate the test"); let primary_length = primary_oplog_service - .open(&owned_worker_id) + .open( + &owned_worker_id, + primary_oplog_service.get_last_index(&owned_worker_id).await, + ) .await .length() .await; @@ -657,7 +664,8 @@ async fn read_from_archive_impl(use_blob: bool) { }; let owned_worker_id = OwnedWorkerId::new(&account_id, &worker_id); - let oplog = oplog_service.open(&owned_worker_id).await; + let last_oplog_index = oplog_service.get_last_index(&owned_worker_id).await; + let oplog = oplog_service.open(&owned_worker_id, last_oplog_index).await; let timestamp = Timestamp::now_utc(); let entries: Vec = (0..100) @@ -678,7 +686,10 @@ async fn read_from_archive_impl(use_blob: bool) { tokio::time::sleep(Duration::from_secs(2)).await; let primary_length = primary_oplog_service - .open(&owned_worker_id) + .open( + &owned_worker_id, + primary_oplog_service.get_last_index(&owned_worker_id).await, + ) .await .length() .await; @@ -697,6 +708,249 @@ async fn read_from_archive_impl(use_blob: bool) { assert_eq!(first10.into_values().collect::>(), original_first10); } +#[tokio::test] +async fn write_after_archive() { + write_after_archive_impl(false, Reopen::No).await; +} + +#[tokio::test] +async fn blob_write_after_archive() { + write_after_archive_impl(true, Reopen::No).await; +} + +#[tokio::test] +async fn write_after_archive_reopen() { + write_after_archive_impl(false, Reopen::Yes).await; +} + +#[tokio::test] +async fn blob_write_after_archive_reopen() { + write_after_archive_impl(true, Reopen::Yes).await; +} + +#[tokio::test] +async fn write_after_archive_reopen_full() { + write_after_archive_impl(false, Reopen::Full).await; +} + +#[tokio::test] +async fn blob_write_after_archive_reopen_full() { + write_after_archive_impl(true, Reopen::Full).await; +} + +#[derive(Debug, Clone, PartialEq, Eq)] +enum Reopen { + No, + Yes, + Full, +} + +async fn write_after_archive_impl(use_blob: bool, reopen: Reopen) { + let indexed_storage = Arc::new(InMemoryIndexedStorage::new()); + let blob_storage = Arc::new(InMemoryBlobStorage::new()); + let mut primary_oplog_service = Arc::new( + PrimaryOplogService::new(indexed_storage.clone(), blob_storage.clone(), 1, 100).await, + ); + let secondary_layer: Arc = if use_blob { + Arc::new(BlobOplogArchiveService::new(blob_storage.clone(), 1)) + } else { + Arc::new(CompressedOplogArchiveService::new( + indexed_storage.clone(), + 1, + )) + }; + let tertiary_layer: Arc = if use_blob { + Arc::new(BlobOplogArchiveService::new(blob_storage.clone(), 2)) + } else { + Arc::new(CompressedOplogArchiveService::new( + indexed_storage.clone(), + 2, + )) + }; + let mut oplog_service = Arc::new(MultiLayerOplogService::new( + primary_oplog_service.clone(), + nev![secondary_layer.clone(), tertiary_layer.clone()], + 10, + )); + let account_id = AccountId { + value: "user1".to_string(), + }; + let worker_id = WorkerId { + component_id: ComponentId(Uuid::new_v4()), + worker_name: "test".to_string(), + }; + let owned_worker_id = OwnedWorkerId::new(&account_id, &worker_id); + + info!("FIRST OPEN"); + let last_oplog_index = oplog_service.get_last_index(&owned_worker_id).await; + let oplog = oplog_service.open(&owned_worker_id, last_oplog_index).await; + info!("FIRST OPEN DONE"); + + let timestamp = Timestamp::now_utc(); + let entries: Vec = (0..100) + .map(|i| { + rounded(OplogEntry::Error { + timestamp, + error: WorkerError::Unknown(i.to_string()), + }) + }) + .collect(); + + let initial_oplog_idx = oplog.current_oplog_index().await; + + for entry in &entries { + oplog.add(entry.clone()).await; + } + oplog.commit().await; + tokio::time::sleep(Duration::from_secs(2)).await; + + let primary_length = primary_oplog_service + .open( + &owned_worker_id, + primary_oplog_service.get_last_index(&owned_worker_id).await, + ) + .await + .length() + .await; + let secondary_length = secondary_layer.open(&owned_worker_id).await.length().await; + let tertiary_length = tertiary_layer.open(&owned_worker_id).await.length().await; + + info!("initial oplog index: {}", initial_oplog_idx); + info!("primary_length: {}", primary_length); + info!("secondary_length: {}", secondary_length); + info!("tertiary_length: {}", tertiary_length); + + let oplog = if reopen == Reopen::Yes { + drop(oplog); + let last_oplog_index = oplog_service.get_last_index(&owned_worker_id).await; + oplog_service.open(&owned_worker_id, last_oplog_index).await + } else if reopen == Reopen::Full { + drop(oplog); + primary_oplog_service = Arc::new( + PrimaryOplogService::new(indexed_storage.clone(), blob_storage.clone(), 1, 100).await, + ); + oplog_service = Arc::new(MultiLayerOplogService::new( + primary_oplog_service.clone(), + nev![secondary_layer.clone(), tertiary_layer.clone()], + 10, + )); + let last_oplog_index = oplog_service.get_last_index(&owned_worker_id).await; + oplog_service.open(&owned_worker_id, last_oplog_index).await + } else { + oplog + }; + + let entries: Vec = (100..1000) + .map(|i| { + rounded(OplogEntry::Error { + timestamp, + error: WorkerError::Unknown(i.to_string()), + }) + }) + .collect(); + + for (n, entry) in entries.iter().enumerate() { + oplog.add(entry.clone()).await; + if n % 100 == 0 { + oplog.commit().await; + } + } + oplog.commit().await; + tokio::time::sleep(Duration::from_secs(2)).await; + + let primary_length = primary_oplog_service + .open( + &owned_worker_id, + primary_oplog_service.get_last_index(&owned_worker_id).await, + ) + .await + .length() + .await; + let secondary_length = secondary_layer.open(&owned_worker_id).await.length().await; + let tertiary_length = tertiary_layer.open(&owned_worker_id).await.length().await; + + info!("initial oplog index: {}", initial_oplog_idx); + info!("primary_length: {}", primary_length); + info!("secondary_length: {}", secondary_length); + info!("tertiary_length: {}", tertiary_length); + + let oplog = if reopen == Reopen::Yes { + drop(oplog); + let last_oplog_index = oplog_service.get_last_index(&owned_worker_id).await; + oplog_service.open(&owned_worker_id, last_oplog_index).await + } else if reopen == Reopen::Full { + drop(oplog); + primary_oplog_service = Arc::new( + PrimaryOplogService::new(indexed_storage.clone(), blob_storage.clone(), 1, 100).await, + ); + oplog_service = Arc::new(MultiLayerOplogService::new( + primary_oplog_service.clone(), + nev![secondary_layer.clone(), tertiary_layer.clone()], + 10, + )); + let last_oplog_index = oplog_service.get_last_index(&owned_worker_id).await; + oplog_service.open(&owned_worker_id, last_oplog_index).await + } else { + oplog + }; + + oplog + .add(rounded(OplogEntry::Error { + timestamp, + error: WorkerError::Unknown("last".to_string()), + })) + .await; + oplog.commit().await; + drop(oplog); + + let entry1 = oplog_service + .read(&owned_worker_id, OplogIndex::INITIAL, 1) + .await; + let entry2 = oplog_service + .read(&owned_worker_id, OplogIndex::from_u64(100), 1) + .await; + let entry3 = oplog_service + .read(&owned_worker_id, OplogIndex::from_u64(1000), 1) + .await; + let entry4 = oplog_service + .read(&owned_worker_id, OplogIndex::from_u64(1001), 1) + .await; + + assert_eq!(entry1.len(), 1); + assert_eq!(entry2.len(), 1); + assert_eq!(entry3.len(), 1); + assert_eq!(entry4.len(), 1); + + assert_eq!( + entry1.get(&OplogIndex::INITIAL).unwrap().clone(), + rounded(OplogEntry::Error { + timestamp, + error: WorkerError::Unknown("0".to_string()), + }) + ); + assert_eq!( + entry2.get(&OplogIndex::from_u64(100)).unwrap().clone(), + rounded(OplogEntry::Error { + timestamp, + error: WorkerError::Unknown("99".to_string()), + }) + ); + assert_eq!( + entry3.get(&OplogIndex::from_u64(1000)).unwrap().clone(), + rounded(OplogEntry::Error { + timestamp, + error: WorkerError::Unknown("999".to_string()), + }) + ); + assert_eq!( + entry4.get(&OplogIndex::from_u64(1001)).unwrap().clone(), + rounded(OplogEntry::Error { + timestamp, + error: WorkerError::Unknown("last".to_string()), + }) + ); +} + #[tokio::test] async fn empty_layer_gets_deleted() { empty_layer_gets_deleted_impl(false).await; @@ -743,7 +997,8 @@ async fn empty_layer_gets_deleted_impl(use_blob: bool) { }; let owned_worker_id = OwnedWorkerId::new(&account_id, &worker_id); - let oplog = oplog_service.open(&owned_worker_id).await; + let last_oplog_index = oplog_service.get_last_index(&owned_worker_id).await; + let oplog = oplog_service.open(&owned_worker_id, last_oplog_index).await; // As we add 100 entries at once, and that exceeds the limit, we expect that all entries have // been moved to the secondary layer. By doing this 10 more times, we end up having all entries @@ -774,7 +1029,10 @@ async fn empty_layer_gets_deleted_impl(use_blob: bool) { let tertiary_exists = tertiary_layer.exists(&owned_worker_id).await; let primary_length = primary_oplog_service - .open(&owned_worker_id) + .open( + &owned_worker_id, + primary_oplog_service.get_last_index(&owned_worker_id).await, + ) .await .length() .await; @@ -852,7 +1110,8 @@ async fn scheduled_archive_impl(use_blob: bool) { // Adding 100 entries to the primary oplog, schedule archive and immediately drop the oplog let archive_result = { - let oplog = oplog_service.open(&owned_worker_id).await; + let last_oplog_index = oplog_service.get_last_index(&owned_worker_id).await; + let oplog = oplog_service.open(&owned_worker_id, last_oplog_index).await; for entry in &entries { oplog.add(entry.clone()).await; } @@ -868,7 +1127,10 @@ async fn scheduled_archive_impl(use_blob: bool) { tokio::time::sleep(Duration::from_secs(2)).await; let primary_length = primary_oplog_service - .open(&owned_worker_id) + .open( + &owned_worker_id, + primary_oplog_service.get_last_index(&owned_worker_id).await, + ) .await .length() .await; @@ -890,7 +1152,8 @@ async fn scheduled_archive_impl(use_blob: bool) { // Calling archive again let archive_result2 = { - let oplog = oplog_service.open(&owned_worker_id).await; + let last_oplog_index = oplog_service.get_last_index(&owned_worker_id).await; + let oplog = oplog_service.open(&owned_worker_id, last_oplog_index).await; let result = MultiLayerOplog::try_archive(&oplog).await; drop(oplog); result @@ -899,7 +1162,10 @@ async fn scheduled_archive_impl(use_blob: bool) { tokio::time::sleep(Duration::from_secs(2)).await; let primary_length = primary_oplog_service - .open(&owned_worker_id) + .open( + &owned_worker_id, + primary_oplog_service.get_last_index(&owned_worker_id).await, + ) .await .length() .await; diff --git a/golem-worker-executor-base/src/services/scheduler.rs b/golem-worker-executor-base/src/services/scheduler.rs index 627d545bda..2c0b69e685 100644 --- a/golem-worker-executor-base/src/services/scheduler.rs +++ b/golem-worker-executor-base/src/services/scheduler.rs @@ -159,7 +159,10 @@ impl SchedulerServiceDefault { let current_last_index = self.oplog_service.get_last_index(&owned_worker_id).await; if current_last_index == last_oplog_index { - let oplog = self.oplog_service.open(&owned_worker_id).await; + let oplog = self + .oplog_service + .open(&owned_worker_id, last_oplog_index) + .await; if let Some(more) = MultiLayerOplog::try_archive(&oplog).await { if more { self.schedule( diff --git a/golem-worker-executor-base/src/services/worker.rs b/golem-worker-executor-base/src/services/worker.rs index 8b2e57eadb..23176fdb9c 100644 --- a/golem-worker-executor-base/src/services/worker.rs +++ b/golem-worker-executor-base/src/services/worker.rs @@ -17,10 +17,10 @@ use std::sync::Arc; use async_trait::async_trait; use golem_common::model::oplog::{OplogEntry, OplogIndex}; use golem_common::model::{ - ComponentType, OwnedWorkerId, ShardId, WorkerId, WorkerMetadata, WorkerStatus, + ComponentType, OwnedWorkerId, ShardId, Timestamp, WorkerId, WorkerMetadata, WorkerStatus, WorkerStatusRecord, }; -use tracing::debug; +use tracing::{debug, warn}; use crate::error::GolemError; use crate::metrics::workers::record_worker_call; @@ -228,7 +228,33 @@ impl WorkerService for DefaultWorkerService { Some(details) } Some((_, entry)) => { - panic!("Unexpected initial oplog entry for worker: {entry:?}") + // This should never happen, but there were some issues previously causing a corrupt oplog + // leading to this state. + // + // There is no point in panicking and restarting the executor here, as the corrupt oplog + // will most likely remain as it is. + // + // So to save the executor's state we return a "fake" failed worker metadata. + + warn!( + worker_id = owned_worker_id.to_string(), + oplog_entry = format!("{entry:?}"), + "Unexpected initial oplog entry found, returning fake failed worker metadata" + ); + let last_oplog_idx = self.oplog_service.get_last_index(owned_worker_id).await; + Some(WorkerMetadata { + worker_id: owned_worker_id.worker_id(), + args: vec![], + env: vec![], + account_id: owned_worker_id.account_id(), + created_at: Timestamp::now_utc(), + parent: None, + last_known_status: WorkerStatusRecord { + status: WorkerStatus::Failed, + oplog_idx: last_oplog_idx, + ..WorkerStatusRecord::default() + }, + }) } } } diff --git a/golem-worker-executor-base/src/worker.rs b/golem-worker-executor-base/src/worker.rs index 0285bdfe5d..10cf4faa31 100644 --- a/golem-worker-executor-base/src/worker.rs +++ b/golem-worker-executor-base/src/worker.rs @@ -173,7 +173,11 @@ impl Worker { parent, ) .await?; - let oplog = deps.oplog_service().open(&owned_worker_id).await; + let last_oplog_index = deps.oplog_service().get_last_index(&owned_worker_id).await; + let oplog = deps + .oplog_service() + .open(&owned_worker_id, last_oplog_index) + .await; let initial_pending_invocations = worker_metadata .last_known_status