From 19311c0364416cde683ce7e4658884a284e4dec6 Mon Sep 17 00:00:00 2001 From: Hamza El-Saawy Date: Tue, 23 Apr 2024 15:06:02 -0400 Subject: [PATCH] weird state shenanigans Signed-off-by: Hamza El-Saawy --- internal/computecore/callback.go | 4 +- internal/computecore/computecore.go | 29 +- internal/computecore/errors.go | 115 +++++++ internal/computecore/event.go | 2 +- internal/computecore/operation.go | 406 ++++++++++++++++++----- internal/computecore/zsyscall_windows.go | 22 +- 6 files changed, 469 insertions(+), 109 deletions(-) create mode 100644 internal/computecore/errors.go diff --git a/internal/computecore/callback.go b/internal/computecore/callback.go index 0de61de5a2..b80832ed51 100644 --- a/internal/computecore/callback.go +++ b/internal/computecore/callback.go @@ -19,7 +19,7 @@ type ( // _In_ HCS_OPERATION operation, // _In_opt_ void* context // ); - HCSOperationCompletion func(op HCSOperation, hcsCtx HCSContext) + HCSOperationCompletion func(op hcsOperation, hcsCtx HCSContext) hcsOperationCompletionUintptr uintptr ) @@ -29,7 +29,7 @@ func (f HCSOperationCompletion) asCallback() hcsOperationCompletionUintptr { return hcsOperationCompletionUintptr(0) } return hcsOperationCompletionUintptr(windows.NewCallback( - func(op HCSOperation, hcsCtx HCSContext) uintptr { + func(op hcsOperation, hcsCtx HCSContext) uintptr { f(op, hcsCtx) return 0 }, diff --git a/internal/computecore/computecore.go b/internal/computecore/computecore.go index 31a1b4fd50..b0e28d3593 100644 --- a/internal/computecore/computecore.go +++ b/internal/computecore/computecore.go @@ -45,11 +45,11 @@ type HCSContext uintptr // //sys hcsEnumerateComputeSystems(query string, operation HCSOperation) (hr error) = computecore.HcsEnumerateComputeSystems? -func EnumerateComputeSystems(ctx context.Context, op HCSOperation, query *hcsschema.SystemQuery) (properties []hcsschema.Properties, err error) { +func EnumerateComputeSystems(ctx context.Context, op hcsOperation, query *hcsschema.SystemQuery) (properties []hcsschema.Properties, err error) { ctx, cancel := context.WithTimeout(ctx, timeout.SyscallWatcher) defer cancel() - ctx, span := oc.StartSpan(ctx, "computecore::HcsEnumerateComputeSystems", oc.WithClientSpanKind) + ctx, span := oc.StartSpan(ctx, computecoreSpanName("HcsEnumerateComputeSystems"), oc.WithClientSpanKind) defer func() { if len(properties) != 0 { span.AddAttributes(trace.StringAttribute("properties", log.Format(ctx, properties))) @@ -70,7 +70,7 @@ func EnumerateComputeSystems(ctx context.Context, op HCSOperation, query *hcssch return runOperation[[]hcsschema.Properties]( ctx, op, - func(_ context.Context, op HCSOperation) (err error) { + func(_ context.Context, op hcsOperation) (err error) { return hcsEnumerateComputeSystems(q, op) }, ) @@ -88,7 +88,7 @@ func EnumerateComputeSystems(ctx context.Context, op HCSOperation, query *hcssch // SetCallback assigns a callback to handle events for the compute system. func (s HCSSystem) SetCallback(ctx context.Context, options HCSEventOptions, hcsCtx HCSContext, callback HCSEventCallback) (err error) { - _, span := oc.StartSpan(ctx, "computecore::HcsSetComputeSystemCallback", oc.WithClientSpanKind) + _, span := oc.StartSpan(ctx, computecoreSpanName("HcsSetComputeSystemCallback"), oc.WithClientSpanKind) defer func() { oc.SetSpanStatus(span, err) span.End() @@ -116,7 +116,7 @@ func (s HCSSystem) SetCallback(ctx context.Context, options HCSEventOptions, hcs // SetCallback assigns a callback to handle events for the compute system. func (p HCSProcess) SetCallback(ctx context.Context, options HCSEventOptions, hcsCtx HCSContext, callback HCSEventCallback) (err error) { - _, span := oc.StartSpan(ctx, "computecore::HcsSetProcessCallback", oc.WithClientSpanKind) + _, span := oc.StartSpan(ctx, computecoreSpanName("HcsSetProcessCallback"), oc.WithClientSpanKind) defer func() { oc.SetSpanStatus(span, err) span.End() @@ -134,8 +134,8 @@ func (p HCSProcess) SetCallback(ctx context.Context, options HCSEventOptions, hc func runOperation[T any]( ctx context.Context, - op HCSOperation, - f func(context.Context, HCSOperation) error, + op hcsOperation, + f func(context.Context, hcsOperation) error, ) (v T, err error) { if err := f(ctx, op); err != nil { return v, err @@ -173,3 +173,18 @@ func encode(v any) (string, error) { // encoder.Encode appends a newline to the end return strings.TrimSpace(buf.String()), nil } + +func validHandle(h windows.Handle) bool { + return h != 0 && h != windows.InvalidHandle +} + +func computecoreSpanName(names ...string) string { + s := make([]string, 0, len(names)) + for _, n := range names { + n = strings.TrimSpace(n) + if n != "" { + s = append(s, n) + } + } + return strings.Join(append([]string{"computecore"}, s...), "::") +} diff --git a/internal/computecore/errors.go b/internal/computecore/errors.go new file mode 100644 index 0000000000..69fbb84886 --- /dev/null +++ b/internal/computecore/errors.go @@ -0,0 +1,115 @@ +//go:build windows + +package computecore + +import "golang.org/x/sys/windows" + +const hcsHResultPrefix = 0x80370000 + +// HCS specific error codes. +// +// See [documentation] for more info. +// +// [documentation]: https://learn.microsoft.com/en-us/virtualization/api/hcs/reference/hcshresult +// +//nolint:stylecheck // ST1003: ALL_CAPS +const ( + // The virtual machine or container exited unexpectedly while starting. + HCS_E_TERMINATED_DURING_START = windows.Errno(hcsHResultPrefix + 0x0100) + + // The container operating system does not match the host operating system. + HCS_E_IMAGE_MISMATCH = windows.Errno(hcsHResultPrefix + 0x0101) + + // The virtual machine could not be started because a required feature is not installed. + HCS_E_HYPERV_NOT_INSTALLED = windows.Errno(hcsHResultPrefix + 0x0102) + + // The requested virtual machine or container operation is not valid in the current state. + HCS_E_INVALID_STATE = windows.Errno(hcsHResultPrefix + 0x0105) + + // The virtual machine or container exited unexpectedly. + HCS_E_UNEXPECTED_EXIT = windows.Errno(hcsHResultPrefix + 0x0106) + + // The virtual machine or container was forcefully exited. + HCS_E_TERMINATED = windows.Errno(hcsHResultPrefix + 0x0107) + + // A connection could not be established with the container or virtual machine. + HCS_E_CONNECT_FAILED = windows.Errno(hcsHResultPrefix + 0x0108) + + // The operation timed out because a response was not received from the virtual machine or container. + HCS_E_CONNECTION_TIMEOUT = windows.Errno(hcsHResultPrefix + 0x0109) + + // The connection with the virtual machine or container was closed. + HCS_E_CONNECTION_CLOSED = windows.Errno(hcsHResultPrefix + 0x010A) + + // An unknown internal message was received by the virtual machine or container. + HCS_E_UNKNOWN_MESSAGE = windows.Errno(hcsHResultPrefix + 0x010B) + + // The virtual machine or container does not support an available version of the communication protocol with the host. + HCS_E_UNSUPPORTED_PROTOCOL_VERSION = windows.Errno(hcsHResultPrefix + 0x010C) + + // The virtual machine or container JSON document is invalid. + HCS_E_INVALID_JSON = windows.Errno(hcsHResultPrefix + 0x010D) + + // A virtual machine or container with the specified identifier does not exist. + HCS_E_SYSTEM_NOT_FOUND = windows.Errno(hcsHResultPrefix + 0x010E) + + // A virtual machine or container with the specified identifier already exists. + HCS_E_SYSTEM_ALREADY_EXISTS = windows.Errno(hcsHResultPrefix + 0x010F) + + // The virtual machine or container with the specified identifier is not running. + HCS_E_SYSTEM_ALREADY_STOPPED = windows.Errno(hcsHResultPrefix + 0x0110) + + // A communication protocol error has occurred between the virtual machine or container and the host. + HCS_E_PROTOCOL_ERROR = windows.Errno(hcsHResultPrefix + 0x0111) + + // The container image contains a layer with an unrecognized format. + HCS_E_INVALID_LAYER = windows.Errno(hcsHResultPrefix + 0x0112) + + // To use this container image, you must join the Windows Insider Program. + // Please see https://go.microsoft.com/fwlink/?linkid=850659 for more information. + HCS_E_WINDOWS_INSIDER_REQUIRED = windows.Errno(hcsHResultPrefix + 0x0113) + + // The operation could not be started because a required feature is not installed. + HCS_E_SERVICE_NOT_AVAILABLE = windows.Errno(hcsHResultPrefix + 0x0114) + + // The operation has not started. + HCS_E_OPERATION_NOT_STARTED = windows.Errno(hcsHResultPrefix + 0x0115) + + // The operation is already running. + HCS_E_OPERATION_ALREADY_STARTED = windows.Errno(hcsHResultPrefix + 0x0116) + + // The operation is still running. + HCS_E_OPERATION_PENDING = windows.Errno(hcsHResultPrefix + 0x0117) + + // The operation did not complete in time. + HCS_E_OPERATION_TIMEOUT = windows.Errno(hcsHResultPrefix + 0x0118) + + // An event callback has already been registered on this handle. + HCS_E_OPERATION_SYSTEM_CALLBACK_ALREADY_SET = windows.Errno(hcsHResultPrefix + 0x0119) + + // Not enough memory available to return the result of the operation. + HCS_E_OPERATION_RESULT_ALLOCATION_FAILED = windows.Errno(hcsHResultPrefix + 0x011A) + + // Insufficient privileges. + // Only administrators or users that are members of the Hyper-V Administrators user group are permitted to access virtual machines or containers. + // To add yourself to the Hyper-V Administrators user group, please see https://aka.ms/hcsadmin for more information. + HCS_E_ACCESS_DENIED = windows.Errno(hcsHResultPrefix + 0x011B) + + // The virtual machine or container reported a critical error and was stopped or restarted. + HCS_E_GUEST_CRITICAL_ERROR = windows.Errno(hcsHResultPrefix + 0x011C) + + // The process information is not available. + HCS_E_PROCESS_INFO_NOT_AVAILABLE = windows.Errno(hcsHResultPrefix + 0x011D) + + // The host compute system service has disconnected unexpectedly. + HCS_E_SERVICE_DISCONNECT = windows.Errno(hcsHResultPrefix + 0x011E) + + // The process has already exited. + HCS_E_PROCESS_ALREADY_STOPPED = windows.Errno(hcsHResultPrefix + 0x011F) + + // The virtual machine or container is not configured to perform the operation. + HCS_E_SYSTEM_NOT_CONFIGURED_FOR_OPERATION = windows.Errno(hcsHResultPrefix + 0x0120) + + // The operation has already been cancelled. + HCS_E_OPERATION_ALREADY_CANCELLED = windows.Errno(hcsHResultPrefix + 0x0121) +) diff --git a/internal/computecore/event.go b/internal/computecore/event.go index 5bca2f4d67..9157381592 100644 --- a/internal/computecore/event.go +++ b/internal/computecore/event.go @@ -23,7 +23,7 @@ type HCSEvent struct { EventData *uint16 // Handle to a completed operation, if Type is eventOperationCallback. // This is only possible when HcsSetComputeSystemCallback has specified event option HcsEventOptionEnableOperationCallbacks. - Operation HCSOperation + Operation hcsOperation } //go:generate go run golang.org/x/tools/cmd/stringer -type=HCSEventType -trimprefix=Event event.go diff --git a/internal/computecore/operation.go b/internal/computecore/operation.go index 959003bbbe..8a8c7f09eb 100644 --- a/internal/computecore/operation.go +++ b/internal/computecore/operation.go @@ -9,6 +9,7 @@ import ( "fmt" "math" "strconv" + "sync" "time" "go.opencensus.io/trace" @@ -20,6 +21,262 @@ import ( "github.com/sirupsen/logrus" ) +// Note: we don't have a good way to cancel attempting to lock the mutext, so these +// operations may block. +// Callers should handle checking against context timeout. + +// Since the `done <-struct{}` channel will change as new operations are started, don't expose +// that directly to users. + +type Operation struct { + // m locks st, preventing writes during ongoing operations. + m sync.RWMutex + // current operation state. + st operationState +} + +func NewEmptyOperation(ctx context.Context) (*Operation, error) { + return NewOperation(ctx, 0, nil) +} + +func NewOperation(ctx context.Context, hcsCtx HCSContext, callback HCSOperationCompletion) (*Operation, error) { + h, err := createOperation(ctx, hcsCtx, callback.asCallback()) + if err != nil { + return nil, err + } + + op := &Operation{ + st: &operationCreated{h: h}, + } + return op, nil +} + +// unsafe: `op != nil` && must hold `op.m` +func (op *Operation) valid() bool { + return op.st != nil && op.st.operation().valid() +} + +func (op *Operation) run( + ctx context.Context, + f func(hcsOperation) error, +) error { + // start and wait on operation in a separate go routine, since + // mutex operation can block. + errCh := make(chan error) + go func() { + defer close(errCh) + + if err := op.Start(ctx, f, contextTimeoutMs(ctx)); err != nil { + select { + case errCh <- err: + case <-ctx.Done(): + // context was cancelled, and parent function returned + // exit so go routine doesn't leak + } + return + } + + // todo: wait on result. + }() + + select { + case err := <-errCh: + return err + case <-ctx.Done(): + // context was cancelled, and parent function returned + // exit so go routine doesn't leak + return ctx.Err() + } +} + +// Start an operation via the function f. +// If successfully started, the underlying HCS operation will timeout after timeoutMs milliseconds. +// +// Note: there is not way to cancel attempting to lock op.m, so this operations may block +// even after ctx is cancelled. +// Additionally, using timeout associated with ctx will leak the context, which is undesired +// if the context is to be associated with the operation start, and not the subsequent wait. +// +// See [waitBackground] for timeout details.. +func (op *Operation) Start( + ctx context.Context, + f func(hcsOperation) error, + timeoutMs uint32, +) (err error) { + ctx, span := oc.StartSpan(ctx, operationSpanName("Start"), oc.WithClientSpanKind) + defer func() { + oc.SetSpanStatus(span, err) + span.End() + }() + + if op == nil { + return fmt.Errorf("nil operation: %w", windows.ERROR_INVALID_HANDLE) + } + + op.m.Lock() + defer op.m.Unlock() + + if !op.valid() { + return fmt.Errorf("invalid operation handle: %w", windows.ERROR_INVALID_HANDLE) + } + + span.AddAttributes( + trace.StringAttribute("operation", op.st.operation().String()), + trace.StringAttribute("state", op.st.String()), + trace.Int64Attribute("timeoutMs", int64(timeoutMs))) + + switch op.st.(type) { + case *operationStarted: + return fmt.Errorf("ongoing operation: %w", HCS_E_OPERATION_ALREADY_STARTED) + case *operationClosed: + return fmt.Errorf("operation already closed: %w", windows.ERROR_HANDLES_CLOSED) + default: + } + + h := op.st.operation() + + if err := f(h); err != nil { + return err + } + + done := make(chan struct{}) + st := &operationStarted{done: done} + + go func(ctx context.Context) { + defer close(done) + st := &operationCompleted{ + h: h, + } + + // WaitForOperationResult should (attempt to) parse result as [hcsschema.ResultError] + // so just return results + st.result, st.err = h.waitBackground(ctx, timeoutMs) + + // lock operation to change state + op.m.Lock() + defer op.m.Unlock() + + switch op.st.(type) { + case *operationStarted: + op.st = st + default: + // somehow someone else change the state, even after we started it + // TODO: log these situations + } + + }(context.WithoutCancel(ctx)) + + op.st = st + return nil +} + +func (op *Operation) Wait(ctx context.Context) (result string, err error) { + ctx, span := oc.StartSpan(ctx, operationSpanName("Wait"), oc.WithClientSpanKind) + defer func() { + if result != "" { + span.AddAttributes(trace.StringAttribute("resultDocument", result)) + } + oc.SetSpanStatus(span, err) + span.End() + }() + + // TODO + + // span.AddAttributes( + // trace.StringAttribute("operation", op.String()), + // trace.Int64Attribute("timeoutMs", int64(timeoutMs))) + + return "", nil +} + +func (op *Operation) Pending(context.Context) bool { + // TODO + return false +} + +func (op *Operation) Closed(context.Context) bool { + // TODO + return false +} + +func (op *Operation) Close(ctx context.Context) (err error) { + ctx, span := oc.StartSpan(ctx, operationSpanName("Close"), oc.WithClientSpanKind) + defer func() { + oc.SetSpanStatus(span, err) + span.End() + }() + + if op == nil { + return fmt.Errorf("nil operation: %w", windows.ERROR_INVALID_HANDLE) + } + + op.m.Lock() + defer op.m.Unlock() + + span.AddAttributes(trace.StringAttribute("state", op.st.String())) + + switch op.st.(type) { + case *operationClosed: + return nil + default: + } + + if !op.valid() { + return fmt.Errorf("invalid operation handle: %w", windows.ERROR_INVALID_HANDLE) + } + + h := op.st.operation() + // replace the state regardless of Close() return value + op.st = &operationClosed{} + return h.close() +} + +// we want enum structs, but have to hack it in :'( + +// the value of hcsOperation must be constant across state transitions + +type operationState interface { + fmt.Stringer + operation() hcsOperation // unsafe: `op != nil` && must hold `op.m` +} + +type operationCreated struct { + h hcsOperation +} + +var _ operationState = new(operationCreated) + +func (op *operationCreated) operation() hcsOperation { return op.h } +func (*operationCreated) String() string { return "Idle" } + +type operationStarted struct { + h hcsOperation + done <-chan struct{} +} + +var _ operationState = new(operationStarted) + +func (op *operationStarted) operation() hcsOperation { return op.h } +func (*operationStarted) String() string { return "Started" } + +type operationCompleted struct { + h hcsOperation + result string + err error +} + +var _ operationState = new(operationCompleted) + +func (op *operationCompleted) operation() hcsOperation { return op.h } +func (*operationCompleted) String() string { return "Completed" } + +type operationClosed struct{} + +var _ operationState = new(operationClosed) + +func (*operationClosed) operation() hcsOperation { return hcsOperation(windows.InvalidHandle) } +func (*operationClosed) String() string { return "Close" } + // TODO: // - HcsGetOperationContext // - HcsSetOperationContext @@ -28,10 +285,14 @@ import ( // - HcsGetOperationResultAndProcessInfo // - HcsWaitForOperationResultAndProcessInfo -// Handle to an HCSOperation on a compute system. -type HCSOperation windows.Handle +// Handle to an operation on an HCS compute system, process, or other resource. +type hcsOperation windows.Handle -func (op HCSOperation) String() string { +func (op hcsOperation) valid() bool { return validHandle(windows.Handle(op)) } + +func (op hcsOperation) String() string { + // treat the handle as a unique ID for the operation, since the operation ID (from [HcsGetOperationId]) + // may change across operation invovations. return "0x" + strconv.FormatInt(int64(op), 16) } @@ -72,16 +333,8 @@ const ( // //sys hcsCreateOperation(context HCSContext, callback hcsOperationCompletionUintptr) (op HCSOperation, err error) = computecore.HcsCreateOperation? -func NewOperation(ctx context.Context, hcsCtx HCSContext, callback HCSOperationCompletion) (HCSOperation, error) { - return createOperation(ctx, hcsCtx, callback.asCallback()) -} - -func NewEmptyOperation(ctx context.Context) (op HCSOperation, err error) { - return createOperation(ctx, 0, 0) -} - -func createOperation(ctx context.Context, hcsCtx HCSContext, callback hcsOperationCompletionUintptr) (op HCSOperation, err error) { - _, span := oc.StartSpan(ctx, "computecore::HcsCreateOperation", oc.WithClientSpanKind) +func createOperation(ctx context.Context, hcsCtx HCSContext, callback hcsOperationCompletionUintptr) (op hcsOperation, err error) { + _, span := oc.StartSpan(ctx, computecoreSpanName("HcsCreateOperation"), oc.WithClientSpanKind) defer func() { span.AddAttributes(trace.StringAttribute("operation", op.String())) oc.SetSpanStatus(span, err) @@ -102,7 +355,7 @@ func createOperation(ctx context.Context, hcsCtx HCSContext, callback hcsOperati // //sys hcsCloseOperation(operation HCSOperation) = computecore.HcsCloseOperation? -func (op HCSOperation) Close() error { +func (op hcsOperation) close() error { // should only return an error if ComputeCore.dll isn't found ... return hcsCloseOperation(op) } @@ -115,7 +368,7 @@ func (op HCSOperation) Close() error { //sys hcsGetOperationType(operation HCSOperation) (t HCSOperationType, err error) = computecore.HcsGetOperationType? // Get the type of the operation, this corresponds to the API call the operation was issued with. -func (op HCSOperation) Type() (HCSOperationType, error) { +func (op hcsOperation) operationType() (HCSOperationType, error) { return hcsGetOperationType(op) } @@ -127,7 +380,7 @@ func (op HCSOperation) Type() (HCSOperationType, error) { //sys hcsGetOperationID(operation HCSOperation) (id uint64, err error)= computecore.HcsGetOperationId? // Returns the Id that uniquely identifies an operation. -func (op HCSOperation) ID() (uint64, error) { +func (op hcsOperation) id() (uint64, error) { return hcsGetOperationID(op) } @@ -139,7 +392,32 @@ func (op HCSOperation) ID() (uint64, error) { // //sys hcsGetOperationResult(operation HCSOperation, resultDocument **uint16) (hr error)= computecore.HcsGetOperationResult? -// GetOperationResult gets the result of the operation used to track an HCS function; +// func (op hcsOperation) getOperationResult(ctx context.Context) (result string, err error) { +// _, span := oc.StartSpan(ctx, computecoreSpanName("HcsGetOperationResult"), oc.WithClientSpanKind) +// defer func() { +// if result != "" { +// span.AddAttributes(trace.StringAttribute("resultDocument", result)) +// } +// oc.SetSpanStatus(span, err) +// span.End() +// }() +// span.AddAttributes(trace.StringAttribute("operation", op.String())) + +// var resultp *uint16 +// err = hcsGetOperationResult(op, &resultp) +// return processResults(ctx, bufferToString(resultp), err) +// } + +// HRESULT WINAPI +// HcsWaitForOperationResult( +// _In_ HCS_OPERATION operation, +// _In_ DWORD timeoutMs, +// _Outptr_opt_ PWSTR* resultDocument +// ); +// +//sys hcsWaitForOperationResult(operation HCSOperation, timeoutMs uint32, resultDocument **uint16) (hr error) = computecore.HcsWaitForOperationResult? + +// WaitForOperationResult synchronously waits for and returns the result of an HCS operation to complete; // optionally returns a JSON document associated to such tracked operation. // // On failure, it will attempt to parse the (optional) error JSON document as a [hcsschema.ResultError]; @@ -151,85 +429,33 @@ func (op HCSOperation) ID() (uint64, error) { // - [HCS_E_OPERATION_PENDING] if the operation is still in progress and hasn't been completed, regardless of success or failure // - Any other value if the operation completed with failures. // The returned HRESULT is dependent on the HCS function thas was being tracked. -func (op HCSOperation) GetOperationResult(ctx context.Context) (result string, err error) { - _, span := oc.StartSpan(ctx, "computecore::HcsGetOperationResult", oc.WithClientSpanKind) - defer func() { - if result != "" { - span.AddAttributes(trace.StringAttribute("resultDocument", result)) - } - oc.SetSpanStatus(span, err) - span.End() - }() - span.AddAttributes(trace.StringAttribute("operation", op.String())) - - var resultp *uint16 - err = hcsGetOperationResult(op, &resultp) - return processResults(ctx, bufferToString(resultp), err) -} - -// HRESULT WINAPI -// HcsWaitForOperationResult( -// _In_ HCS_OPERATION operation, -// _In_ DWORD timeoutMs, -// _Outptr_opt_ PWSTR* resultDocument -// ); -// -//sys hcsWaitForOperationResult(operation HCSOperation, timeoutMs uint32, resultDocument **uint16) (hr error) = computecore.HcsWaitForOperationResult? - -// WaitForOperationResult waits for the operation to complete or the context to be cancelled. // // The maximum possible duration is [math.MaxUint32] milliseconds (approximately 50 days). -// If no context deadline is provided, the operation waits indefinitely. -// -// Note: if no deadline is provided, cancelling the context will not cancel the underlying HCS (wait) operation. // -// See: [GetOperationResult]. -func (op HCSOperation) WaitForOperationResult(ctx context.Context) (result string, err error) { - ctx, span := oc.StartSpan(ctx, "computecore::HcsWaitForOperationResult", oc.WithClientSpanKind) - defer func() { - if result != "" { - span.AddAttributes(trace.StringAttribute("resultDocument", result)) - } - oc.SetSpanStatus(span, err) - span.End() - }() - - // While a timeout on ctx will cancel waiting for the operation result, the underlying HCS operation - // will still be ongoing (since HcsCancelOperation is not yet implemented). - // We use the context timeout (if one is set), as the timeout for the HCS wait syscall as well, - // rather than expose two competing timeouts (the context and HCS parameter timeouts). - milli := contextTimeoutMs(ctx) - span.AddAttributes( - trace.StringAttribute("operation", op.String()), - trace.Int64Attribute("timeoutMs", int64(milli))) - - // Don't write to return variables (result, err) directly from goroutine: - // that can cause a data race if this function exits before the goroutine. +// Note: The provided context is only used for logging; cancellations and timeouts are ignored. +// Use timeoutMs to cancel the wait pre-emptively. +func (op hcsOperation) waitBackground(ctx context.Context, timeoutMs uint32) (string, error) { + // Could extract the timeout from the context directly (via [contextTimeoutMs]), but + // then this function would only respect context deadlines and not cancellation. // - // Don't use a `chan error`: the `<-hcsWaitForOperationResult` will block and cause the - // goroutine to hang indefinitely if this function exits first (since nothing will read from the channel). + // Could start [hcsWaitForOperationResult] in a go routine and select on that as well and + // `<-ctx.Done()`, but since this function will be called from a go-routine regardless, + // might as well save on a nested go routine. // - // Don't use operation callback to call `close(done)`: it will fail if the compute system already has a callback + // Also, since [HcsCancelOperation] is not yet implemented (see below), having cancellations + // cancel this and leak the underlying operation, which is also unsatisfactory. + // + // Also, don't use operation callback to handle context cancellation and processing results: + // it will fail if the process/compute system associated with the operation already has a callback - var opErr error var resultp *uint16 - done := make(chan struct{}) - go func() { - defer close(done) + err := hcsWaitForOperationResult(op, timeoutMs, &resultp) - log.G(ctx).Trace("waiting on operation") - opErr = hcsWaitForOperationResult(op, milli, &resultp) - }() - - select { - case <-done: - case <-ctx.Done(): - return "", ctx.Err() - } - return processResults(ctx, bufferToString(resultp), opErr) + return processResults(ctx, bufferToString(resultp), err) } -// if err != nil, try to parse result as an [hcsschema.ResultError]. +// Try to process operation results. +// If err != nil, try to parse result as an [hcsschema.ResultError]. func processResults(ctx context.Context, result string, err error) (string, error) { if err == nil || result == "" { // if there is not error or if the result document is empty, do nothing @@ -302,8 +528,8 @@ func contextTimeoutMs(ctx context.Context) uint32 { //sys hcsSetOperationCallback(operation HCSOperation, context HCSContext, callback hcsOperationCompletionUintptr) (hr error) = computecore.HcsSetOperationCallback? // Its best not to use [SetOperationCallback], since it is possible the compute system already has a callback assigned. -func (op HCSOperation) SetOperationCallback(ctx context.Context, hcsCtx HCSContext, callback HCSOperationCompletion) (err error) { - _, span := oc.StartSpan(ctx, "computecore::HcsSetOperationCallback", oc.WithClientSpanKind) +func (op hcsOperation) setOperationCallback(ctx context.Context, hcsCtx HCSContext, callback HCSOperationCompletion) (err error) { + _, span := oc.StartSpan(ctx, computecoreSpanName("HcsSetOperationCallback"), oc.WithClientSpanKind) defer func() { oc.SetSpanStatus(span, err) span.End() @@ -327,3 +553,7 @@ func (op HCSOperation) SetOperationCallback(ctx context.Context, hcsCtx HCSConte // ); // //sys hcsCancelOperation(operation HCSOperation) (hr error) = computecore.HcsCancelOperation? + +func operationSpanName(names ...string) string { + return computecoreSpanName(append([]string{"Operation"}, names...)...) +} diff --git a/internal/computecore/zsyscall_windows.go b/internal/computecore/zsyscall_windows.go index dfc7bed79f..78bdf1fed1 100644 --- a/internal/computecore/zsyscall_windows.go +++ b/internal/computecore/zsyscall_windows.go @@ -52,7 +52,7 @@ var ( procHcsWaitForOperationResult = modcomputecore.NewProc("HcsWaitForOperationResult") ) -func hcsCancelOperation(operation HCSOperation) (hr error) { +func hcsCancelOperation(operation hcsOperation) (hr error) { hr = procHcsCancelOperation.Find() if hr != nil { return @@ -67,7 +67,7 @@ func hcsCancelOperation(operation HCSOperation) (hr error) { return } -func hcsCloseOperation(operation HCSOperation) (err error) { +func hcsCloseOperation(operation hcsOperation) (err error) { err = procHcsCloseOperation.Find() if err != nil { return @@ -76,20 +76,20 @@ func hcsCloseOperation(operation HCSOperation) (err error) { return } -func hcsCreateOperation(context HCSContext, callback hcsOperationCompletionUintptr) (op HCSOperation, err error) { +func hcsCreateOperation(context HCSContext, callback hcsOperationCompletionUintptr) (op hcsOperation, err error) { err = procHcsCreateOperation.Find() if err != nil { return } r0, _, e1 := syscall.SyscallN(procHcsCreateOperation.Addr(), uintptr(context), uintptr(callback)) - op = HCSOperation(r0) + op = hcsOperation(r0) if op == 0 { err = errnoErr(e1) } return } -func hcsEnumerateComputeSystems(query string, operation HCSOperation) (hr error) { +func hcsEnumerateComputeSystems(query string, operation hcsOperation) (hr error) { var _p0 *uint16 _p0, hr = syscall.UTF16PtrFromString(query) if hr != nil { @@ -98,7 +98,7 @@ func hcsEnumerateComputeSystems(query string, operation HCSOperation) (hr error) return _hcsEnumerateComputeSystems(_p0, operation) } -func _hcsEnumerateComputeSystems(query *uint16, operation HCSOperation) (hr error) { +func _hcsEnumerateComputeSystems(query *uint16, operation hcsOperation) (hr error) { hr = procHcsEnumerateComputeSystems.Find() if hr != nil { return @@ -113,7 +113,7 @@ func _hcsEnumerateComputeSystems(query *uint16, operation HCSOperation) (hr erro return } -func hcsGetOperationID(operation HCSOperation) (id uint64, err error) { +func hcsGetOperationID(operation hcsOperation) (id uint64, err error) { err = procHcsGetOperationId.Find() if err != nil { return @@ -126,7 +126,7 @@ func hcsGetOperationID(operation HCSOperation) (id uint64, err error) { return } -func hcsGetOperationResult(operation HCSOperation, resultDocument **uint16) (hr error) { +func hcsGetOperationResult(operation hcsOperation, resultDocument **uint16) (hr error) { hr = procHcsGetOperationResult.Find() if hr != nil { return @@ -141,7 +141,7 @@ func hcsGetOperationResult(operation HCSOperation, resultDocument **uint16) (hr return } -func hcsGetOperationType(operation HCSOperation) (t HCSOperationType, err error) { +func hcsGetOperationType(operation hcsOperation) (t HCSOperationType, err error) { err = procHcsGetOperationType.Find() if err != nil { return @@ -169,7 +169,7 @@ func hcsSetComputeSystemCallback(computeSystem HCSSystem, callbackOptions HCSEve return } -func hcsSetOperationCallback(operation HCSOperation, context HCSContext, callback hcsOperationCompletionUintptr) (hr error) { +func hcsSetOperationCallback(operation hcsOperation, context HCSContext, callback hcsOperationCompletionUintptr) (hr error) { hr = procHcsSetOperationCallback.Find() if hr != nil { return @@ -199,7 +199,7 @@ func hcsSetProcessCallback(process HCSProcess, callbackOptions HCSEventOptions, return } -func hcsWaitForOperationResult(operation HCSOperation, timeoutMs uint32, resultDocument **uint16) (hr error) { +func hcsWaitForOperationResult(operation hcsOperation, timeoutMs uint32, resultDocument **uint16) (hr error) { hr = procHcsWaitForOperationResult.Find() if hr != nil { return