From e92d4009e7c8bc96b8982d1604caa80d31b15eba Mon Sep 17 00:00:00 2001 From: Daniel Canter Date: Fri, 21 May 2021 16:40:37 -0700 Subject: [PATCH 1/2] Add process dump functionality for WCOW/LCOW This commit adds support for generating process dumps for hypervisor isolated containers. This includes a new annotation to specify where process dumps should get placed on creation, which is global to all containers. Signed-off-by: Daniel Canter --- cmd/gcs/main.go | 14 ++++ internal/guest/runtime/hcsv2/container.go | 3 + .../guest/runtime/hcsv2/sandbox_container.go | 6 ++ internal/guest/runtime/hcsv2/spec.go | 26 ++++++++ .../guest/runtime/hcsv2/workload_container.go | 6 ++ internal/hcsoci/hcsdoc_wcow.go | 65 +++++++++++++++++++ internal/oci/annotations.go | 13 ++++ internal/oci/uvm.go | 3 +- internal/uvm/create.go | 11 ++++ internal/uvm/create_lcow.go | 4 ++ internal/uvm/create_wcow.go | 53 +++++++++++---- internal/uvm/types.go | 4 ++ .../hcsshim/internal/hcsoci/hcsdoc_wcow.go | 65 +++++++++++++++++++ .../hcsshim/internal/oci/annotations.go | 13 ++++ .../Microsoft/hcsshim/internal/oci/uvm.go | 3 +- .../Microsoft/hcsshim/internal/uvm/create.go | 11 ++++ .../hcsshim/internal/uvm/create_lcow.go | 4 ++ .../hcsshim/internal/uvm/create_wcow.go | 53 +++++++++++---- .../Microsoft/hcsshim/internal/uvm/types.go | 4 ++ 19 files changed, 335 insertions(+), 26 deletions(-) diff --git a/cmd/gcs/main.go b/cmd/gcs/main.go index 74676671a6..7dacf5d5b8 100644 --- a/cmd/gcs/main.go +++ b/cmd/gcs/main.go @@ -84,6 +84,7 @@ func readMemoryEvents(startTime time.Time, efdFile *os.File, cgName string, thre func main() { startTime := time.Now() logLevel := flag.String("loglevel", "debug", "Logging Level: debug, info, warning, error, fatal, panic.") + coreDumpLoc := flag.String("core-dump-location", "", "The location/format where process core dumps will be written to.") kmsgLogLevel := flag.Uint("kmsgLogLevel", uint(kmsg.Warning), "Log all kmsg entries with a priority less than or equal to the supplied level.") logFile := flag.String("logfile", "", "Logging Target: An optional file name/path. Omit for console output.") logFormat := flag.String("log-format", "text", "Logging Format: text or json") @@ -144,6 +145,19 @@ func main() { logrus.Info("GCS started") + // Set the process core dump location. This will be global to all containers as it's a kernel configuration. + // If no path is specified core dumps will just be placed in the working directory of wherever the process + // was invoked to a file named "core". + if *coreDumpLoc != "" { + if err := ioutil.WriteFile( + "/proc/sys/kernel/core_pattern", + []byte(*coreDumpLoc), + 0644, + ); err != nil { + logrus.WithError(err).Fatal("failed to set core dump location") + } + } + // Continuously log /dev/kmsg go kmsg.ReadForever(kmsg.LogLevel(*kmsgLogLevel)) diff --git a/internal/guest/runtime/hcsv2/container.go b/internal/guest/runtime/hcsv2/container.go index 61ff9c7ac5..c2d234fd3a 100644 --- a/internal/guest/runtime/hcsv2/container.go +++ b/internal/guest/runtime/hcsv2/container.go @@ -66,6 +66,9 @@ func (c *Container) ExecProcess(ctx context.Context, process *oci.Process, conSe return -1, err } + // Add in the core rlimit specified on the container in case there was one set. This makes it so that execed processes can also generate + // core dumps. + process.Rlimits = c.spec.Process.Rlimits p, err := c.container.ExecProcess(process, stdioSet) if err != nil { stdioSet.Close() diff --git a/internal/guest/runtime/hcsv2/sandbox_container.go b/internal/guest/runtime/hcsv2/sandbox_container.go index a5d80aeeb8..516ef8a0bc 100644 --- a/internal/guest/runtime/hcsv2/sandbox_container.go +++ b/internal/guest/runtime/hcsv2/sandbox_container.go @@ -108,6 +108,12 @@ func setupSandboxContainerSpec(ctx context.Context, id string, spec *oci.Spec) ( } } + if rlimCore := spec.Annotations["io.microsoft.lcow.rlimitcore"]; rlimCore != "" { + if err := setCoreRLimit(spec, rlimCore); err != nil { + return err + } + } + // TODO: JTERRY75 /dev/shm is not properly setup for LCOW I believe. CRI // also has a concept of a sandbox/shm file when the IPC NamespaceMode != // NODE. diff --git a/internal/guest/runtime/hcsv2/spec.go b/internal/guest/runtime/hcsv2/spec.go index 6abcd509d2..02168f6874 100644 --- a/internal/guest/runtime/hcsv2/spec.go +++ b/internal/guest/runtime/hcsv2/spec.go @@ -63,6 +63,32 @@ func setProcess(spec *oci.Spec) { } } +func setCoreRLimit(spec *oci.Spec, value string) error { + setProcess(spec) + + vals := strings.Split(value, ";") + if len(vals) != 2 { + return errors.New("wrong number of values supplied for rlimit core") + } + + soft, err := strconv.ParseUint(vals[0], 10, 64) + if err != nil { + return errors.Wrap(err, "failed to parse soft core rlimit") + } + hard, err := strconv.ParseUint(vals[1], 10, 64) + if err != nil { + return errors.Wrap(err, "failed to parse hard core rlimit") + } + + spec.Process.Rlimits = append(spec.Process.Rlimits, oci.POSIXRlimit{ + Type: "RLIMIT_CORE", + Soft: soft, + Hard: hard, + }) + + return nil +} + // setUserStr sets `spec.Process` to the valid `userstr` based on the OCI Image Spec // v1.0.0 `userstr`. // diff --git a/internal/guest/runtime/hcsv2/workload_container.go b/internal/guest/runtime/hcsv2/workload_container.go index 1e83f6a7f3..2148384316 100644 --- a/internal/guest/runtime/hcsv2/workload_container.go +++ b/internal/guest/runtime/hcsv2/workload_container.go @@ -161,6 +161,12 @@ func setupWorkloadContainerSpec(ctx context.Context, sbid, id string, spec *oci. return err } + if rlimCore := spec.Annotations["io.microsoft.lcow.rlimitcore"]; rlimCore != "" { + if err := setCoreRLimit(spec, rlimCore); err != nil { + return err + } + } + // Force the parent cgroup into our /containers root spec.Linux.CgroupsPath = "/containers/" + id diff --git a/internal/hcsoci/hcsdoc_wcow.go b/internal/hcsoci/hcsdoc_wcow.go index 0482b07773..54c4be1eaf 100644 --- a/internal/hcsoci/hcsdoc_wcow.go +++ b/internal/hcsoci/hcsdoc_wcow.go @@ -4,6 +4,7 @@ package hcsoci import ( "context" + "errors" "fmt" "path/filepath" "regexp" @@ -385,6 +386,51 @@ func createWindowsContainerDocument(ctx context.Context, coi *createOptionsInter } v2Container.AdditionalDeviceNamespace = extensions + // Process dump setup (if requested) + dumpPath := "" + if coi.HostingSystem != nil { + dumpPath = coi.HostingSystem.ProcessDumpLocation() + } + + if specDumpPath, ok := coi.Spec.Annotations[oci.AnnotationContainerProcessDumpLocation]; ok { + // If a process dump path was specified at pod creation time for a hypervisor isolated pod, then + // use this value. If one was specified on the container creation document then override with this + // instead. Unlike Linux, Windows containers can set the dump path on a per container basis. + dumpPath = specDumpPath + } + + if dumpPath != "" { + dumpType, err := parseDumpType(coi.Spec.Annotations) + if err != nil { + return nil, nil, err + } + + // Setup WER registry keys for local process dump creation if specified. + // https://docs.microsoft.com/en-us/windows/win32/wer/collecting-user-mode-dumps + v2Container.RegistryChanges = &hcsschema.RegistryChanges{ + AddValues: []hcsschema.RegistryValue{ + { + Key: &hcsschema.RegistryKey{ + Hive: "Software", + Name: "Microsoft\\Windows\\Windows Error Reporting\\LocalDumps", + }, + Name: "DumpFolder", + StringValue: dumpPath, + Type_: "String", + }, + { + Key: &hcsschema.RegistryKey{ + Hive: "Software", + Name: "Microsoft\\Windows\\Windows Error Reporting\\LocalDumps", + }, + Name: "DumpType", + DWordValue: dumpType, + Type_: "DWord", + }, + }, + } + } + return v1, v2Container, nil } @@ -415,3 +461,22 @@ func parseAssignedDevices(ctx context.Context, coi *createOptionsInternal, v2 *h v2.AssignedDevices = v2AssignedDevices return nil } + +// parseDumpType parses the passed in string representation of the local user mode process dump type to the +// corresponding value the registry expects to be set. +// +// See DumpType at https://docs.microsoft.com/en-us/windows/win32/wer/collecting-user-mode-dumps for the mappings +func parseDumpType(annotations map[string]string) (int32, error) { + dmpTypeStr := annotations[oci.AnnotationWCOWProcessDumpType] + switch dmpTypeStr { + case "": + // If no type specified, default to full dumps. + return 2, nil + case "mini": + return 1, nil + case "full": + return 2, nil + default: + return -1, errors.New(`unknown dump type specified, valid values are "mini" or "full"`) + } +} diff --git a/internal/oci/annotations.go b/internal/oci/annotations.go index 5c28ea02c6..bed5230470 100644 --- a/internal/oci/annotations.go +++ b/internal/oci/annotations.go @@ -221,4 +221,17 @@ const ( // AnnotationSecurityPolicy is used to specify a security policy for opengcs to enforce AnnotationSecurityPolicy = "io.microsoft.virtualmachine.lcow.securitypolicy" + // AnnotationContainerProcessDumpLocation specifies a path inside of containers to save process dumps to. As + // the scratch space for a container is generally cleaned up after exit, this is best set to a volume mount of + // some kind (vhd, bind mount, fileshare mount etc.) + AnnotationContainerProcessDumpLocation = "io.microsoft.container.processdumplocation" + + // AnnotationWCOWProcessDumpType specifies the type of dump to create when generating a local user mode + // process dump for Windows containers. The supported options are "mini", and "full". + // See DumpType: https://docs.microsoft.com/en-us/windows/win32/wer/collecting-user-mode-dumps + AnnotationWCOWProcessDumpType = "io.microsoft.wcow.processdumptype" + + // AnnotationRLimitCore specifies the core rlimit value for a container. This will need to be set + // in order to have core dumps generated for a given container. + AnnotationRLimitCore = "io.microsoft.lcow.rlimitcore" ) diff --git a/internal/oci/uvm.go b/internal/oci/uvm.go index 264f6a1a6d..c77a90731a 100644 --- a/internal/oci/uvm.go +++ b/internal/oci/uvm.go @@ -332,7 +332,7 @@ func SpecToUVMCreateOpts(ctx context.Context, s *specs.Spec, id, owner string) ( lopts.EnableScratchEncryption = parseAnnotationsBool(ctx, s.Annotations, AnnotationEncryptedScratchDisk, lopts.EnableScratchEncryption) lopts.SecurityPolicy = parseAnnotationsString(s.Annotations, AnnotationSecurityPolicy, lopts.SecurityPolicy) lopts.KernelBootOptions = parseAnnotationsString(s.Annotations, AnnotationKernelBootOptions, lopts.KernelBootOptions) - + lopts.ProcessDumpLocation = parseAnnotationsString(s.Annotations, AnnotationContainerProcessDumpLocation, lopts.ProcessDumpLocation) handleAnnotationPreferredRootFSType(ctx, s.Annotations, lopts) handleAnnotationKernelDirectBoot(ctx, s.Annotations, lopts) @@ -357,6 +357,7 @@ func SpecToUVMCreateOpts(ctx context.Context, s *specs.Spec, id, owner string) ( wopts.CPUGroupID = parseAnnotationsString(s.Annotations, AnnotationCPUGroupID, wopts.CPUGroupID) wopts.NetworkConfigProxy = parseAnnotationsString(s.Annotations, AnnotationNetworkConfigProxy, wopts.NetworkConfigProxy) wopts.NoDirectMap = parseAnnotationsBool(ctx, s.Annotations, AnnotationVSMBNoDirectMap, wopts.NoDirectMap) + wopts.ProcessDumpLocation = parseAnnotationsString(s.Annotations, AnnotationContainerProcessDumpLocation, wopts.ProcessDumpLocation) handleAnnotationFullyPhysicallyBacked(ctx, s.Annotations, wopts) if err := handleCloneAnnotations(ctx, s.Annotations, wopts); err != nil { return nil, err diff --git a/internal/uvm/create.go b/internal/uvm/create.go index 994f96beac..e77a08f3a1 100644 --- a/internal/uvm/create.go +++ b/internal/uvm/create.go @@ -85,6 +85,11 @@ type Options struct { // that receives the UVMs set of NICs from this proxy instead of enumerating // the endpoints locally. NetworkConfigProxy string + + // Sets the location for process dumps to be placed in. On Linux this is a kernel setting so it will be + // applied to all containers. On Windows it's configurable per container, but we can mimic this for + // Windows by just applying the location specified here per container. + ProcessDumpLocation string } // compares the create opts used during template creation with the create opts @@ -347,6 +352,12 @@ func (uvm *UtilityVM) PhysicallyBacked() bool { return uvm.physicallyBacked } +// ProcessDumpLocation returns the location that process dumps will get written to for containers running +// in the UVM. +func (uvm *UtilityVM) ProcessDumpLocation() string { + return uvm.processDumpLocation +} + func (uvm *UtilityVM) normalizeMemorySize(ctx context.Context, requested uint64) uint64 { actual := (requested + 1) &^ 1 // align up to an even number if requested != actual { diff --git a/internal/uvm/create_lcow.go b/internal/uvm/create_lcow.go index db55674547..2a5e8ad240 100644 --- a/internal/uvm/create_lcow.go +++ b/internal/uvm/create_lcow.go @@ -384,6 +384,10 @@ func CreateLCOW(ctx context.Context, opts *OptionsLCOW) (_ *UtilityVM, err error initArgs += " " + opts.ExecCommandLine + if opts.ProcessDumpLocation != "" { + initArgs += " -core-dump-location " + opts.ProcessDumpLocation + } + if vmDebugging { // Launch a shell on the console. initArgs = `sh -c "` + initArgs + ` & exec sh"` diff --git a/internal/uvm/create_wcow.go b/internal/uvm/create_wcow.go index c21fc5d1e4..ee70a67f9b 100644 --- a/internal/uvm/create_wcow.go +++ b/internal/uvm/create_wcow.go @@ -105,25 +105,54 @@ func prepareConfigDoc(ctx context.Context, uvm *UtilityVM, opts *OptionsWCOW, uv }, } + var registryChanges hcsschema.RegistryChanges + // We're getting asked to setup local dump collection for WCOW. We need to: + // + // 1. Turn off WER reporting, so we don't both upload the dump and save a local copy. + // 2. Set WerSvc to start when the UVM starts to work around a bug when generating dumps for certain exceptions. + // https://github.com/microsoft/Windows-Containers/issues/60#issuecomment-834633192 + // This supposedly should be fixed soon but for now keep this until we know which container images + // (1809, 1903/9, 2004 etc.) this went out too. + if opts.ProcessDumpLocation != "" { + uvm.processDumpLocation = opts.ProcessDumpLocation + registryChanges.AddValues = append(registryChanges.AddValues, + hcsschema.RegistryValue{ + Key: &hcsschema.RegistryKey{ + Hive: "System", + Name: "ControlSet001\\Services\\WerSvc", + }, + Name: "Start", + DWordValue: 2, + Type_: "DWord", + }, + hcsschema.RegistryValue{ + Key: &hcsschema.RegistryKey{ + Hive: "Software", + Name: "Microsoft\\Windows\\Windows Error Reporting", + }, + Name: "Disabled", + DWordValue: 1, + Type_: "DWord", + }, + ) + } + // Here for a temporary workaround until the need for setting this regkey is no more. To protect // against any undesired behavior (such as some general networking scenarios ceasing to function) // with a recent change to fix SMB share access in the UVM, this registry key will be checked to // enable the change in question inside GNS.dll. - var registryChanges hcsschema.RegistryChanges if !opts.DisableCompartmentNamespace { - registryChanges = hcsschema.RegistryChanges{ - AddValues: []hcsschema.RegistryValue{ - { - Key: &hcsschema.RegistryKey{ - Hive: "System", - Name: "CurrentControlSet\\Services\\gns", - }, - Name: "EnableCompartmentNamespace", - DWordValue: 1, - Type_: "DWord", + registryChanges.AddValues = append(registryChanges.AddValues, + hcsschema.RegistryValue{ + Key: &hcsschema.RegistryKey{ + Hive: "System", + Name: "CurrentControlSet\\Services\\gns", }, + Name: "EnableCompartmentNamespace", + DWordValue: 1, + Type_: "DWord", }, - } + ) } processor := &hcsschema.Processor2{ diff --git a/internal/uvm/types.go b/internal/uvm/types.go index 904fd112aa..5d7e528d87 100644 --- a/internal/uvm/types.go +++ b/internal/uvm/types.go @@ -122,9 +122,13 @@ type UtilityVM struct { // is true TemplateID string + // Location that container process dumps will get written too. + processDumpLocation string + // The CreateOpts used to create this uvm. These can be either of type // uvm.OptionsLCOW or uvm.OptionsWCOW createOpts interface{} + // Network config proxy client. If nil then this wasn't requested and the // uvms network will be configured locally. ncProxyClient ncproxyttrpc.NetworkConfigProxyService diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/hcsoci/hcsdoc_wcow.go b/test/vendor/github.com/Microsoft/hcsshim/internal/hcsoci/hcsdoc_wcow.go index 0482b07773..54c4be1eaf 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/hcsoci/hcsdoc_wcow.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/hcsoci/hcsdoc_wcow.go @@ -4,6 +4,7 @@ package hcsoci import ( "context" + "errors" "fmt" "path/filepath" "regexp" @@ -385,6 +386,51 @@ func createWindowsContainerDocument(ctx context.Context, coi *createOptionsInter } v2Container.AdditionalDeviceNamespace = extensions + // Process dump setup (if requested) + dumpPath := "" + if coi.HostingSystem != nil { + dumpPath = coi.HostingSystem.ProcessDumpLocation() + } + + if specDumpPath, ok := coi.Spec.Annotations[oci.AnnotationContainerProcessDumpLocation]; ok { + // If a process dump path was specified at pod creation time for a hypervisor isolated pod, then + // use this value. If one was specified on the container creation document then override with this + // instead. Unlike Linux, Windows containers can set the dump path on a per container basis. + dumpPath = specDumpPath + } + + if dumpPath != "" { + dumpType, err := parseDumpType(coi.Spec.Annotations) + if err != nil { + return nil, nil, err + } + + // Setup WER registry keys for local process dump creation if specified. + // https://docs.microsoft.com/en-us/windows/win32/wer/collecting-user-mode-dumps + v2Container.RegistryChanges = &hcsschema.RegistryChanges{ + AddValues: []hcsschema.RegistryValue{ + { + Key: &hcsschema.RegistryKey{ + Hive: "Software", + Name: "Microsoft\\Windows\\Windows Error Reporting\\LocalDumps", + }, + Name: "DumpFolder", + StringValue: dumpPath, + Type_: "String", + }, + { + Key: &hcsschema.RegistryKey{ + Hive: "Software", + Name: "Microsoft\\Windows\\Windows Error Reporting\\LocalDumps", + }, + Name: "DumpType", + DWordValue: dumpType, + Type_: "DWord", + }, + }, + } + } + return v1, v2Container, nil } @@ -415,3 +461,22 @@ func parseAssignedDevices(ctx context.Context, coi *createOptionsInternal, v2 *h v2.AssignedDevices = v2AssignedDevices return nil } + +// parseDumpType parses the passed in string representation of the local user mode process dump type to the +// corresponding value the registry expects to be set. +// +// See DumpType at https://docs.microsoft.com/en-us/windows/win32/wer/collecting-user-mode-dumps for the mappings +func parseDumpType(annotations map[string]string) (int32, error) { + dmpTypeStr := annotations[oci.AnnotationWCOWProcessDumpType] + switch dmpTypeStr { + case "": + // If no type specified, default to full dumps. + return 2, nil + case "mini": + return 1, nil + case "full": + return 2, nil + default: + return -1, errors.New(`unknown dump type specified, valid values are "mini" or "full"`) + } +} diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/oci/annotations.go b/test/vendor/github.com/Microsoft/hcsshim/internal/oci/annotations.go index 5c28ea02c6..bed5230470 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/oci/annotations.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/oci/annotations.go @@ -221,4 +221,17 @@ const ( // AnnotationSecurityPolicy is used to specify a security policy for opengcs to enforce AnnotationSecurityPolicy = "io.microsoft.virtualmachine.lcow.securitypolicy" + // AnnotationContainerProcessDumpLocation specifies a path inside of containers to save process dumps to. As + // the scratch space for a container is generally cleaned up after exit, this is best set to a volume mount of + // some kind (vhd, bind mount, fileshare mount etc.) + AnnotationContainerProcessDumpLocation = "io.microsoft.container.processdumplocation" + + // AnnotationWCOWProcessDumpType specifies the type of dump to create when generating a local user mode + // process dump for Windows containers. The supported options are "mini", and "full". + // See DumpType: https://docs.microsoft.com/en-us/windows/win32/wer/collecting-user-mode-dumps + AnnotationWCOWProcessDumpType = "io.microsoft.wcow.processdumptype" + + // AnnotationRLimitCore specifies the core rlimit value for a container. This will need to be set + // in order to have core dumps generated for a given container. + AnnotationRLimitCore = "io.microsoft.lcow.rlimitcore" ) diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/oci/uvm.go b/test/vendor/github.com/Microsoft/hcsshim/internal/oci/uvm.go index 264f6a1a6d..c77a90731a 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/oci/uvm.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/oci/uvm.go @@ -332,7 +332,7 @@ func SpecToUVMCreateOpts(ctx context.Context, s *specs.Spec, id, owner string) ( lopts.EnableScratchEncryption = parseAnnotationsBool(ctx, s.Annotations, AnnotationEncryptedScratchDisk, lopts.EnableScratchEncryption) lopts.SecurityPolicy = parseAnnotationsString(s.Annotations, AnnotationSecurityPolicy, lopts.SecurityPolicy) lopts.KernelBootOptions = parseAnnotationsString(s.Annotations, AnnotationKernelBootOptions, lopts.KernelBootOptions) - + lopts.ProcessDumpLocation = parseAnnotationsString(s.Annotations, AnnotationContainerProcessDumpLocation, lopts.ProcessDumpLocation) handleAnnotationPreferredRootFSType(ctx, s.Annotations, lopts) handleAnnotationKernelDirectBoot(ctx, s.Annotations, lopts) @@ -357,6 +357,7 @@ func SpecToUVMCreateOpts(ctx context.Context, s *specs.Spec, id, owner string) ( wopts.CPUGroupID = parseAnnotationsString(s.Annotations, AnnotationCPUGroupID, wopts.CPUGroupID) wopts.NetworkConfigProxy = parseAnnotationsString(s.Annotations, AnnotationNetworkConfigProxy, wopts.NetworkConfigProxy) wopts.NoDirectMap = parseAnnotationsBool(ctx, s.Annotations, AnnotationVSMBNoDirectMap, wopts.NoDirectMap) + wopts.ProcessDumpLocation = parseAnnotationsString(s.Annotations, AnnotationContainerProcessDumpLocation, wopts.ProcessDumpLocation) handleAnnotationFullyPhysicallyBacked(ctx, s.Annotations, wopts) if err := handleCloneAnnotations(ctx, s.Annotations, wopts); err != nil { return nil, err diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create.go b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create.go index 994f96beac..e77a08f3a1 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create.go @@ -85,6 +85,11 @@ type Options struct { // that receives the UVMs set of NICs from this proxy instead of enumerating // the endpoints locally. NetworkConfigProxy string + + // Sets the location for process dumps to be placed in. On Linux this is a kernel setting so it will be + // applied to all containers. On Windows it's configurable per container, but we can mimic this for + // Windows by just applying the location specified here per container. + ProcessDumpLocation string } // compares the create opts used during template creation with the create opts @@ -347,6 +352,12 @@ func (uvm *UtilityVM) PhysicallyBacked() bool { return uvm.physicallyBacked } +// ProcessDumpLocation returns the location that process dumps will get written to for containers running +// in the UVM. +func (uvm *UtilityVM) ProcessDumpLocation() string { + return uvm.processDumpLocation +} + func (uvm *UtilityVM) normalizeMemorySize(ctx context.Context, requested uint64) uint64 { actual := (requested + 1) &^ 1 // align up to an even number if requested != actual { diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create_lcow.go b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create_lcow.go index db55674547..2a5e8ad240 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create_lcow.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create_lcow.go @@ -384,6 +384,10 @@ func CreateLCOW(ctx context.Context, opts *OptionsLCOW) (_ *UtilityVM, err error initArgs += " " + opts.ExecCommandLine + if opts.ProcessDumpLocation != "" { + initArgs += " -core-dump-location " + opts.ProcessDumpLocation + } + if vmDebugging { // Launch a shell on the console. initArgs = `sh -c "` + initArgs + ` & exec sh"` diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create_wcow.go b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create_wcow.go index c21fc5d1e4..ee70a67f9b 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create_wcow.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create_wcow.go @@ -105,25 +105,54 @@ func prepareConfigDoc(ctx context.Context, uvm *UtilityVM, opts *OptionsWCOW, uv }, } + var registryChanges hcsschema.RegistryChanges + // We're getting asked to setup local dump collection for WCOW. We need to: + // + // 1. Turn off WER reporting, so we don't both upload the dump and save a local copy. + // 2. Set WerSvc to start when the UVM starts to work around a bug when generating dumps for certain exceptions. + // https://github.com/microsoft/Windows-Containers/issues/60#issuecomment-834633192 + // This supposedly should be fixed soon but for now keep this until we know which container images + // (1809, 1903/9, 2004 etc.) this went out too. + if opts.ProcessDumpLocation != "" { + uvm.processDumpLocation = opts.ProcessDumpLocation + registryChanges.AddValues = append(registryChanges.AddValues, + hcsschema.RegistryValue{ + Key: &hcsschema.RegistryKey{ + Hive: "System", + Name: "ControlSet001\\Services\\WerSvc", + }, + Name: "Start", + DWordValue: 2, + Type_: "DWord", + }, + hcsschema.RegistryValue{ + Key: &hcsschema.RegistryKey{ + Hive: "Software", + Name: "Microsoft\\Windows\\Windows Error Reporting", + }, + Name: "Disabled", + DWordValue: 1, + Type_: "DWord", + }, + ) + } + // Here for a temporary workaround until the need for setting this regkey is no more. To protect // against any undesired behavior (such as some general networking scenarios ceasing to function) // with a recent change to fix SMB share access in the UVM, this registry key will be checked to // enable the change in question inside GNS.dll. - var registryChanges hcsschema.RegistryChanges if !opts.DisableCompartmentNamespace { - registryChanges = hcsschema.RegistryChanges{ - AddValues: []hcsschema.RegistryValue{ - { - Key: &hcsschema.RegistryKey{ - Hive: "System", - Name: "CurrentControlSet\\Services\\gns", - }, - Name: "EnableCompartmentNamespace", - DWordValue: 1, - Type_: "DWord", + registryChanges.AddValues = append(registryChanges.AddValues, + hcsschema.RegistryValue{ + Key: &hcsschema.RegistryKey{ + Hive: "System", + Name: "CurrentControlSet\\Services\\gns", }, + Name: "EnableCompartmentNamespace", + DWordValue: 1, + Type_: "DWord", }, - } + ) } processor := &hcsschema.Processor2{ diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/types.go b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/types.go index 904fd112aa..5d7e528d87 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/types.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/types.go @@ -122,9 +122,13 @@ type UtilityVM struct { // is true TemplateID string + // Location that container process dumps will get written too. + processDumpLocation string + // The CreateOpts used to create this uvm. These can be either of type // uvm.OptionsLCOW or uvm.OptionsWCOW createOpts interface{} + // Network config proxy client. If nil then this wasn't requested and the // uvms network will be configured locally. ncProxyClient ncproxyttrpc.NetworkConfigProxyService From f964e283873cd13c1feec0c0d54b6e2e8dcfc920 Mon Sep 17 00:00:00 2001 From: Daniel Canter Date: Fri, 24 Sep 2021 15:57:43 -0700 Subject: [PATCH 2/2] Add process dump cri-containerd testcases This change adds cri-containerd testcases that excercises the process dump functionality. It sets up two containers, using images that stackoverflow/ throw an exception shortly after starting. This should generate a dump file in the sandbox mount location in the test. Then the second container mounts the same sandbox mount and just verify that the core dump file generated by the first is present. Signed-off-by: Daniel Canter --- test/cri-containerd/main.go | 6 +- test/cri-containerd/runpodsandbox_test.go | 220 ++++++++++++++++++ .../test-images/stackoverflow-lcow/Dockerfile | 7 + .../test-images/stackoverflow-lcow/main.c | 10 + 4 files changed, 241 insertions(+), 2 deletions(-) create mode 100644 test/cri-containerd/test-images/stackoverflow-lcow/Dockerfile create mode 100644 test/cri-containerd/test-images/stackoverflow-lcow/main.c diff --git a/test/cri-containerd/main.go b/test/cri-containerd/main.go index d723f52d49..015d6a3337 100644 --- a/test/cri-containerd/main.go +++ b/test/cri-containerd/main.go @@ -48,6 +48,8 @@ const ( lcowRuntimeHandler = "runhcs-lcow" imageLcowK8sPause = "k8s.gcr.io/pause:3.1" imageLcowAlpine = "docker.io/library/alpine:latest" + imageLcowAlpineCoreDump = "cplatpublic.azurecr.io/stackoverflow-alpine:latest" + imageWindowsProcessDump = "cplatpublic.azurecr.io/crashdump:latest" imageLcowCosmos = "cosmosarno/spark-master:2.4.1_2019-04-18_8e864ce" imageJobContainerHNS = "cplatpublic.azurecr.io/jobcontainer_hns:latest" imageJobContainerETW = "cplatpublic.azurecr.io/jobcontainer_etw:latest" @@ -162,7 +164,7 @@ func getWindowsNanoserverImage(build uint16) string { case osversion.V20H2: return "mcr.microsoft.com/windows/nanoserver:2009" default: - panic("unsupported build") + return "mcr.microsoft.com/windows/nanoserver:2009" } } @@ -179,7 +181,7 @@ func getWindowsServerCoreImage(build uint16) string { case osversion.V20H2: return "mcr.microsoft.com/windows/servercore:2009" default: - panic("unsupported build") + return "mcr.microsoft.com/windows/nanoserver:2009" } } diff --git a/test/cri-containerd/runpodsandbox_test.go b/test/cri-containerd/runpodsandbox_test.go index fec2650778..05a06c3ec9 100644 --- a/test/cri-containerd/runpodsandbox_test.go +++ b/test/cri-containerd/runpodsandbox_test.go @@ -1361,6 +1361,226 @@ func Test_RunPodSandbox_MultipleContainersSameVhd_WCOW(t *testing.T) { } } +func Test_RunPodSandbox_ProcessDump_LCOW(t *testing.T) { + requireFeatures(t, featureLCOW) + + pullRequiredLcowImages(t, []string{imageLcowK8sPause, imageLcowAlpineCoreDump}) + + client := newTestRuntimeClient(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + sbRequest := getRunPodSandboxRequest(t, lcowRuntimeHandler, map[string]string{ + oci.AnnotationContainerProcessDumpLocation: "/coredumps/core", + }) + + podID := runPodSandbox(t, client, ctx, sbRequest) + defer removePodSandbox(t, client, ctx, podID) + defer stopPodSandbox(t, client, ctx, podID) + + mounts := []*runtime.Mount{ + { + HostPath: "sandbox:///coredump", + ContainerPath: "/coredumps", + }, + } + + annotations := map[string]string{ + oci.AnnotationRLimitCore: "18446744073709551615;18446744073709551615", + } + + // Setup container 1 that uses an image that stackoverflows shortly after starting. + // This should generate a core dump file in the sandbox mount location + c1Request := &runtime.CreateContainerRequest{ + Config: &runtime.ContainerConfig{ + Metadata: &runtime.ContainerMetadata{ + Name: t.Name() + "-Container1", + }, + Image: &runtime.ImageSpec{ + Image: imageLcowAlpineCoreDump, + }, + Command: []string{ + "./stackoverflow", + }, + Annotations: annotations, + Mounts: mounts, + }, + PodSandboxId: podID, + SandboxConfig: sbRequest.Config, + } + + container1ID := createContainer(t, client, ctx, c1Request) + defer removeContainer(t, client, ctx, container1ID) + + startContainer(t, client, ctx, container1ID) + defer stopContainer(t, client, ctx, container1ID) + + // Then setup a secondary container that will mount the same sandbox mount and + // just verify that the core dump file is present. + c2Request := &runtime.CreateContainerRequest{ + Config: &runtime.ContainerConfig{ + Metadata: &runtime.ContainerMetadata{ + Name: t.Name() + "-Container2", + }, + Image: &runtime.ImageSpec{ + Image: imageLcowAlpineCoreDump, + }, + // Hold this command open until killed + Command: []string{ + "top", + }, + Mounts: mounts, + }, + PodSandboxId: podID, + SandboxConfig: sbRequest.Config, + } + + mounts = []*runtime.Mount{ + { + HostPath: "sandbox:///coredump", + ContainerPath: "/coredumps", + }, + } + + // Wait for the first container to die and create the core dump. + time.Sleep(time.Second * 5) + + container2ID := createContainer(t, client, ctx, c2Request) + defer removeContainer(t, client, ctx, container2ID) + + startContainer(t, client, ctx, container2ID) + defer stopContainer(t, client, ctx, container2ID) + + // Check if the core dump file is present + execCommand := []string{ + "ls", + "/coredumps/core", + } + execRequest := &runtime.ExecSyncRequest{ + ContainerId: container2ID, + Cmd: execCommand, + Timeout: 20, + } + + r := execSync(t, client, ctx, execRequest) + if r.ExitCode != 0 { + t.Fatalf("failed with exit code %d running `ls`: %s", r.ExitCode, string(r.Stderr)) + } +} + +func Test_RunPodSandbox_ProcessDump_WCOW_Hypervisor(t *testing.T) { + requireFeatures(t, featureWCOWHypervisor) + + pullRequiredImages(t, []string{imageWindowsProcessDump}) + + client := newTestRuntimeClient(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + sbRequest := getRunPodSandboxRequest(t, wcowHypervisor19041RuntimeHandler, map[string]string{ + oci.AnnotationContainerProcessDumpLocation: "C:\\processdump", + }) + + podID := runPodSandbox(t, client, ctx, sbRequest) + defer removePodSandbox(t, client, ctx, podID) + defer stopPodSandbox(t, client, ctx, podID) + + mounts := []*runtime.Mount{ + { + HostPath: "sandbox:///processdump", + ContainerPath: "C:\\processdump", + }, + } + + // Setup container 1 that uses an image that throws a user exception shortly after starting. + // This should generate a process dump file in the sandbox mount location + c1Request := &runtime.CreateContainerRequest{ + Config: &runtime.ContainerConfig{ + Metadata: &runtime.ContainerMetadata{ + Name: t.Name() + "-Container1", + }, + Image: &runtime.ImageSpec{ + Image: imageWindowsProcessDump, + }, + Command: []string{ + "C:\\app\\crashtest.exe", + "ue", + }, + Mounts: mounts, + }, + PodSandboxId: podID, + SandboxConfig: sbRequest.Config, + } + + container1ID := createContainer(t, client, ctx, c1Request) + defer removeContainer(t, client, ctx, container1ID) + + startContainer(t, client, ctx, container1ID) + defer stopContainer(t, client, ctx, container1ID) + + // Then setup a secondary container that will mount the same sandbox mount and + // just verify that the process dump file is present. + c2Request := &runtime.CreateContainerRequest{ + Config: &runtime.ContainerConfig{ + Metadata: &runtime.ContainerMetadata{ + Name: t.Name() + "-Container2", + }, + Image: &runtime.ImageSpec{ + Image: imageWindowsProcessDump, + }, + // Hold this command open until killed + Command: []string{ + "cmd", + "/c", + "ping", + "-t", + "127.0.0.1", + }, + Mounts: mounts, + }, + PodSandboxId: podID, + SandboxConfig: sbRequest.Config, + } + + mounts = []*runtime.Mount{ + { + HostPath: "sandbox:///processdump", + ContainerPath: "C:\\processdump", + }, + } + + // Wait for the first container to die and create the process dump. + time.Sleep(time.Second * 10) + + container2ID := createContainer(t, client, ctx, c2Request) + defer removeContainer(t, client, ctx, container2ID) + + startContainer(t, client, ctx, container2ID) + defer stopContainer(t, client, ctx, container2ID) + + // Check if the core dump file is present + execCommand := []string{ + "cmd", + "/c", + "dir", + "C:\\processdump", + } + execRequest := &runtime.ExecSyncRequest{ + ContainerId: container2ID, + Cmd: execCommand, + Timeout: 20, + } + + r := execSync(t, client, ctx, execRequest) + if r.ExitCode != 0 { + t.Fatalf("failed with exit code %d running `dir`: %s", r.ExitCode, string(r.Stderr)) + } + + if !strings.Contains(string(r.Stdout), ".dmp") { + t.Fatalf("expected dmp file to be present in the directory, got: %s", string(r.Stdout)) + } +} + func createSandboxContainerAndExecForCustomScratch(t *testing.T, annotations map[string]string) (string, string, int) { cmd := []string{ "df", diff --git a/test/cri-containerd/test-images/stackoverflow-lcow/Dockerfile b/test/cri-containerd/test-images/stackoverflow-lcow/Dockerfile new file mode 100644 index 0000000000..d750f3ad33 --- /dev/null +++ b/test/cri-containerd/test-images/stackoverflow-lcow/Dockerfile @@ -0,0 +1,7 @@ +FROM alpine + +RUN apk add --no-cache build-base +WORKDIR /app +COPY main.c . + +RUN gcc -g -o stackoverflow main.c \ No newline at end of file diff --git a/test/cri-containerd/test-images/stackoverflow-lcow/main.c b/test/cri-containerd/test-images/stackoverflow-lcow/main.c new file mode 100644 index 0000000000..bee9154c1e --- /dev/null +++ b/test/cri-containerd/test-images/stackoverflow-lcow/main.c @@ -0,0 +1,10 @@ +#include + +void foo(){ + foo(); +} + +int main() { + foo(); + return 0; +} \ No newline at end of file