From 47b0c0fb50a31b064ed74dfa7fef0dee956cdeee Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Mon, 28 Feb 2022 15:30:09 -0500 Subject: [PATCH 1/2] csi: set plugin socket path on restore The Prestart hook for task runner hooks doesn't get called when we restore a task, because the task is already running. The Postrun hook for CSI plugin supervisors needs the socket path to have been populated so that the client has a valid path. --- .../taskrunner/plugin_supervisor_hook.go | 43 +++++++++++-------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/client/allocrunner/taskrunner/plugin_supervisor_hook.go b/client/allocrunner/taskrunner/plugin_supervisor_hook.go index 94909d3ce0a..1c0a98b8fa2 100644 --- a/client/allocrunner/taskrunner/plugin_supervisor_hook.go +++ b/client/allocrunner/taskrunner/plugin_supervisor_hook.go @@ -129,12 +129,13 @@ func (*csiPluginSupervisorHook) Name() string { } // Prestart is called before the task is started including after every -// restart. This requires that the mount paths for a plugin be idempotent, -// despite us not knowing the name of the plugin ahead of time. -// Because of this, we use the allocid_taskname as the unique identifier for a -// plugin on the filesystem. +// restart (but not after restore). This requires that the mount paths +// for a plugin be idempotent, despite us not knowing the name of the +// plugin ahead of time. Because of this, we use the allocid_taskname +// as the unique identifier for a plugin on the filesystem. func (h *csiPluginSupervisorHook) Prestart(ctx context.Context, req *interfaces.TaskPrestartRequest, resp *interfaces.TaskPrestartResponse) error { + // Create the mount directory that the container will access if it doesn't // already exist. Default to only nomad user access. if err := os.MkdirAll(h.mountPoint, 0700); err != nil && !os.IsExist(err) { @@ -167,19 +168,7 @@ func (h *csiPluginSupervisorHook) Prestart(ctx context.Context, Readonly: false, } - // TODO(tgross): https://github.com/hashicorp/nomad/issues/11786 - // If we're already registered, we should be able to update the - // definition in the update hook - - // For backwards compatibility, ensure that we don't overwrite the - // socketPath on client restart with existing plugin allocations. - pluginInfo, _ := h.runner.dynamicRegistry.PluginForAlloc( - string(h.task.CSIPluginConfig.Type), h.task.CSIPluginConfig.ID, h.alloc.ID) - if pluginInfo != nil { - h.socketPath = pluginInfo.ConnectionInfo.SocketPath - } else { - h.socketPath = filepath.Join(h.socketMountPoint, structs.CSISocketName) - } + h.setSocketHook() switch h.caps.FSIsolation { case drivers.FSIsolationNone: @@ -206,11 +195,29 @@ func (h *csiPluginSupervisorHook) Prestart(ctx context.Context, return nil } +func (h *csiPluginSupervisorHook) setSocketHook() { + + // TODO(tgross): https://github.com/hashicorp/nomad/issues/11786 + // If we're already registered, we should be able to update the + // definition in the update hook + + // For backwards compatibility, ensure that we don't overwrite the + // socketPath on client restart with existing plugin allocations. + pluginInfo, _ := h.runner.dynamicRegistry.PluginForAlloc( + string(h.task.CSIPluginConfig.Type), h.task.CSIPluginConfig.ID, h.alloc.ID) + if pluginInfo != nil && pluginInfo.ConnectionInfo.SocketPath != "" { + h.socketPath = pluginInfo.ConnectionInfo.SocketPath + } + + h.socketPath = filepath.Join(h.socketMountPoint, structs.CSISocketName) +} + // Poststart is called after the task has started. Poststart is not // called if the allocation is terminal. // // The context is cancelled if the task is killed. func (h *csiPluginSupervisorHook) Poststart(_ context.Context, _ *interfaces.TaskPoststartRequest, _ *interfaces.TaskPoststartResponse) error { + // If we're already running the supervisor routine, then we don't need to try // and restart it here as it only terminates on `Stop` hooks. h.runningLock.Lock() @@ -220,6 +227,8 @@ func (h *csiPluginSupervisorHook) Poststart(_ context.Context, _ *interfaces.Tas } h.runningLock.Unlock() + h.setSocketHook() + go h.ensureSupervisorLoop(h.shutdownCtx) return nil } From a4e7099a1cde008543ab57c56727c02c124006c4 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Tue, 1 Mar 2022 08:07:51 -0500 Subject: [PATCH 2/2] address comments from code review --- client/allocrunner/taskrunner/plugin_supervisor_hook.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/allocrunner/taskrunner/plugin_supervisor_hook.go b/client/allocrunner/taskrunner/plugin_supervisor_hook.go index 1c0a98b8fa2..122386fd76f 100644 --- a/client/allocrunner/taskrunner/plugin_supervisor_hook.go +++ b/client/allocrunner/taskrunner/plugin_supervisor_hook.go @@ -207,8 +207,8 @@ func (h *csiPluginSupervisorHook) setSocketHook() { string(h.task.CSIPluginConfig.Type), h.task.CSIPluginConfig.ID, h.alloc.ID) if pluginInfo != nil && pluginInfo.ConnectionInfo.SocketPath != "" { h.socketPath = pluginInfo.ConnectionInfo.SocketPath + return } - h.socketPath = filepath.Join(h.socketMountPoint, structs.CSISocketName) }