Skip to content

Commit

Permalink
Add process dump functionality for WCOW/LCOW
Browse files Browse the repository at this point in the history
This commit adds support for generating process dumps for hypervisor isolated containers. This includes
a new annotation to specify where process dumps should get placed on creation, which is global
to all containers.

Signed-off-by: Daniel Canter <[email protected]>
  • Loading branch information
dcantah committed Jul 15, 2021
1 parent 4378e83 commit e242b1f
Show file tree
Hide file tree
Showing 12 changed files with 190 additions and 12 deletions.
14 changes: 14 additions & 0 deletions cmd/gcs/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ func readMemoryEvents(startTime time.Time, efdFile *os.File, cgName string, thre
func main() {
startTime := time.Now()
logLevel := flag.String("loglevel", "debug", "Logging Level: debug, info, warning, error, fatal, panic.")
coreDumpLoc := flag.String("core-dump-location", "", "The location/format where process core dumps will be written to.")
kmsgLogLevel := flag.Uint("kmsgLogLevel", uint(kmsg.Warning), "Log all kmsg entries with a priority less than or equal to the supplied level.")
logFile := flag.String("logfile", "", "Logging Target: An optional file name/path. Omit for console output.")
logFormat := flag.String("log-format", "text", "Logging Format: text or json")
Expand Down Expand Up @@ -144,6 +145,19 @@ func main() {

logrus.Info("GCS started")

// Set the process core dump location. This will be global to all containers as it's a kernel configuration.
// If no path is specified core dumps will just be placed in the working directory of wherever the process
// was invoked to a file named "core".
if *coreDumpLoc != "" {
if err := ioutil.WriteFile(
"/proc/sys/kernel/core_pattern",
[]byte(*coreDumpLoc),
0644,
); err != nil {
logrus.WithError(err).Fatal("failed to set core dump location")
}
}

// Continuously log /dev/kmsg
go kmsg.ReadForever(kmsg.LogLevel(*kmsgLogLevel))

Expand Down
1 change: 1 addition & 0 deletions internal/guest/runtime/hcsv2/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ func (c *Container) ExecProcess(ctx context.Context, process *oci.Process, conSe
return -1, err
}

process.Rlimits = c.spec.Process.Rlimits
p, err := c.container.ExecProcess(process, stdioSet)
if err != nil {
stdioSet.Close()
Expand Down
6 changes: 6 additions & 0 deletions internal/guest/runtime/hcsv2/sandbox_container.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,12 @@ func setupSandboxContainerSpec(ctx context.Context, id string, spec *oci.Spec) (
}
}

if rlimCore := spec.Annotations["io.microsoft.lcow.rlimitcore"]; rlimCore != "" {
if err := setCoreRLimit(spec, rlimCore); err != nil {
return err
}
}

// TODO: JTERRY75 /dev/shm is not properly setup for LCOW I believe. CRI
// also has a concept of a sandbox/shm file when the IPC NamespaceMode !=
// NODE.
Expand Down
26 changes: 26 additions & 0 deletions internal/guest/runtime/hcsv2/spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,32 @@ func setProcess(spec *oci.Spec) {
}
}

func setCoreRLimit(spec *oci.Spec, value string) error {
setProcess(spec)

vals := strings.Split(value, ";")
if len(vals) != 2 {
return errors.New("wrong number of values supplied for rlimit core")
}

soft, err := strconv.ParseUint(vals[0], 10, 64)
if err != nil {
return err
}
hard, err := strconv.ParseUint(vals[1], 10, 64)
if err != nil {
return err
}

spec.Process.Rlimits = append(spec.Process.Rlimits, oci.POSIXRlimit{
Type: "RLIMIT_CORE",
Soft: soft,
Hard: hard,
})

return nil
}

// setUserStr sets `spec.Process` to the valid `userstr` based on the OCI Image Spec
// v1.0.0 `userstr`.
//
Expand Down
6 changes: 6 additions & 0 deletions internal/guest/runtime/hcsv2/workload_container.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,12 @@ func setupWorkloadContainerSpec(ctx context.Context, sbid, id string, spec *oci.
}
}

if rlimCore := spec.Annotations["io.microsoft.lcow.rlimitcore"]; rlimCore != "" {
if err := setCoreRLimit(spec, rlimCore); err != nil {
return err
}
}

// Force the parent cgroup into our /containers root
spec.Linux.CgroupsPath = "/containers/" + id

Expand Down
61 changes: 61 additions & 0 deletions internal/hcsoci/hcsdoc_wcow.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ package hcsoci

import (
"context"
"errors"
"fmt"
"path/filepath"
"regexp"
Expand Down Expand Up @@ -367,6 +368,49 @@ func createWindowsContainerDocument(ctx context.Context, coi *createOptionsInter
return nil, nil, err
}

// Setup WER registry keys for local process dump creation if specified.
// https://docs.microsoft.com/en-us/windows/win32/wer/collecting-user-mode-dumps
procDumpLoc := coi.Spec.Annotations[oci.AnnotationProcessDumpLocation]
if (coi.HostingSystem != nil && coi.HostingSystem.ProcessDumpLocation() != "") || procDumpLoc != "" {
// If a process dump path was specified at pod creation time for a hypervisor isolated pod, then
// use this value. If one was specified on the container creation document then override with this
// instead. Unlike Linux, Windows containers can set the dump path on a per container basis.
var dumpPath string
if coi.HostingSystem != nil {
dumpPath = coi.HostingSystem.ProcessDumpLocation()
} else {
dumpPath = procDumpLoc
}

dumpType, err := parseDumpType(coi.Spec.Annotations)
if err != nil {
return nil, nil, err
}

v2Container.RegistryChanges = &hcsschema.RegistryChanges{
AddValues: []hcsschema.RegistryValue{
{
Key: &hcsschema.RegistryKey{
Hive: "Software",
Name: "Microsoft\\Windows\\Windows Error Reporting\\LocalDumps",
},
Name: "DumpFolder",
StringValue: dumpPath,
Type_: "String",
},
{
Key: &hcsschema.RegistryKey{
Hive: "Software",
Name: "Microsoft\\Windows\\Windows Error Reporting\\LocalDumps",
},
Name: "DumpType",
DWordValue: dumpType,
Type_: "DWord",
},
},
}
}

return v1, v2Container, nil
}

Expand Down Expand Up @@ -397,3 +441,20 @@ func parseAssignedDevices(ctx context.Context, coi *createOptionsInternal, v2 *h
v2.AssignedDevices = v2AssignedDevices
return nil
}

// parseDumpType parses the passed in string representation of the local user mode process dump type to the
// corresponding value the registry expects to be set.
func parseDumpType(annotations map[string]string) (int32, error) {
dmpTypeStr := annotations[oci.AnnotationWCOWProcessDumpType]
switch dmpTypeStr {
case "":
// If no type specified, default to full dumps.
return 2, nil
case "mini":
return 1, nil
case "full":
return 2, nil
default:
return -1, errors.New(`unknown dump type specified, valid values are "mini" or "full"`)
}
}
14 changes: 14 additions & 0 deletions internal/oci/annotations.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,4 +206,18 @@ const (
// AnnotationNcproxyContainerID indicates whether or not to use the hcsshim container ID
// when setting up ncproxy and computeagent
AnnotationNcproxyContainerID = "io.microsoft.network.ncproxy.containerid"

// AnnotationProcessDumpLocation specifies a path inside of containers to save process dumps to. As
// the scratch space for a container is generally cleaned up after exit, this is best set to a volume mount of
// some kind (vhd, bind mount, fileshare mount etc.)
AnnotationProcessDumpLocation = "io.microsoft.processdumplocation"

// AnnotationWCOWProcessDumpType specifies the type of dump to create when generating a local user mode
// process dump for Windows containers. The supported options are "mini", and "full".
// See DumpType: https://docs.microsoft.com/en-us/windows/win32/wer/collecting-user-mode-dumps
AnnotationWCOWProcessDumpType = "io.microsoft.wcow.processdumptype"

// AnnotationRLimitCore specifies the core rlimit value for a container. This will need to be set
// in order to have core dumps generated for a given container.
AnnotationRLimitCore = "io.microsoft.lcow.rlimitcore"
)
2 changes: 2 additions & 0 deletions internal/oci/uvm.go
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,7 @@ func SpecToUVMCreateOpts(ctx context.Context, s *specs.Spec, id, owner string) (
lopts.BootFilesPath = parseAnnotationsString(s.Annotations, AnnotationBootFilesRootPath, lopts.BootFilesPath)
lopts.CPUGroupID = parseAnnotationsString(s.Annotations, AnnotationCPUGroupID, lopts.CPUGroupID)
lopts.NetworkConfigProxy = parseAnnotationsString(s.Annotations, AnnotationNetworkConfigProxy, lopts.NetworkConfigProxy)
lopts.ProcessDumpLocation = parseAnnotationsString(s.Annotations, AnnotationProcessDumpLocation, lopts.ProcessDumpLocation)
handleAnnotationPreferredRootFSType(ctx, s.Annotations, lopts)
handleAnnotationKernelDirectBoot(ctx, s.Annotations, lopts)

Expand All @@ -342,6 +343,7 @@ func SpecToUVMCreateOpts(ctx context.Context, s *specs.Spec, id, owner string) (
wopts.CPUGroupID = parseAnnotationsString(s.Annotations, AnnotationCPUGroupID, wopts.CPUGroupID)
wopts.NetworkConfigProxy = parseAnnotationsString(s.Annotations, AnnotationNetworkConfigProxy, wopts.NetworkConfigProxy)
wopts.NoDirectMap = parseAnnotationsBool(ctx, s.Annotations, AnnotationVSMBNoDirectMap, wopts.NoDirectMap)
wopts.ProcessDumpLocation = parseAnnotationsString(s.Annotations, AnnotationProcessDumpLocation, wopts.ProcessDumpLocation)
handleAnnotationFullyPhysicallyBacked(ctx, s.Annotations, wopts)
if err := handleCloneAnnotations(ctx, s.Annotations, wopts); err != nil {
return nil, err
Expand Down
11 changes: 11 additions & 0 deletions internal/uvm/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,11 @@ type Options struct {
// that receives the UVMs set of NICs from this proxy instead of enumerating
// the endpoints locally.
NetworkConfigProxy string

// Sets the location for process dumps to be placed in. On Linux this is a kernel setting so it will be
// applied to all containers. On Windows it's configurable per container, but we can mimic this for
// Windows by just applying the location specified here per container.
ProcessDumpLocation string
}

// compares the create opts used during template creation with the create opts
Expand Down Expand Up @@ -347,6 +352,12 @@ func (uvm *UtilityVM) PhysicallyBacked() bool {
return uvm.physicallyBacked
}

// ProcessDumpLocation returns the location that process dumps will get written to for containers running
// in the UVM.
func (uvm *UtilityVM) ProcessDumpLocation() string {
return uvm.processDumpLocation
}

func (uvm *UtilityVM) normalizeMemorySize(ctx context.Context, requested uint64) uint64 {
actual := (requested + 1) &^ 1 // align up to an even number
if requested != actual {
Expand Down
4 changes: 4 additions & 0 deletions internal/uvm/create_lcow.go
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,10 @@ func CreateLCOW(ctx context.Context, opts *OptionsLCOW) (_ *UtilityVM, err error

initArgs += " " + opts.ExecCommandLine

if opts.ProcessDumpLocation != "" {
initArgs += " -core-dump-location " + opts.ProcessDumpLocation
}

if vmDebugging {
// Launch a shell on the console.
initArgs = `sh -c "` + initArgs + ` & exec sh"`
Expand Down
53 changes: 41 additions & 12 deletions internal/uvm/create_wcow.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,25 +105,54 @@ func prepareConfigDoc(ctx context.Context, uvm *UtilityVM, opts *OptionsWCOW, uv
},
}

var registryChanges hcsschema.RegistryChanges
// We're getting asked to setup local dump collection for WCOW. We need to:
//
// 1. Turn off WER reporting, so we don't both upload the dump and save a local copy.
// 2. Set WerSvc to start when the UVM starts to work around a bug when generating dumps for certain exceptions.
// https://github.com/microsoft/Windows-Containers/issues/60#issuecomment-834633192
// This supposedly should be fixed soon but for now keep this until we know which container images
// (1809, 1903/9, 2004 etc.) this went out too.
if opts.ProcessDumpLocation != "" {
uvm.processDumpLocation = opts.ProcessDumpLocation
registryChanges.AddValues = append(registryChanges.AddValues,
hcsschema.RegistryValue{
Key: &hcsschema.RegistryKey{
Hive: "System",
Name: "ControlSet001\\Services\\WerSvc",
},
Name: "Start",
DWordValue: 2,
Type_: "DWord",
},
hcsschema.RegistryValue{
Key: &hcsschema.RegistryKey{
Hive: "Software",
Name: "Microsoft\\Windows\\Windows Error Reporting",
},
Name: "Disabled",
DWordValue: 1,
Type_: "DWord",
},
)
}

// Here for a temporary workaround until the need for setting this regkey is no more. To protect
// against any undesired behavior (such as some general networking scenarios ceasing to function)
// with a recent change to fix SMB share access in the UVM, this registry key will be checked to
// enable the change in question inside GNS.dll.
var registryChanges hcsschema.RegistryChanges
if !opts.DisableCompartmentNamespace {
registryChanges = hcsschema.RegistryChanges{
AddValues: []hcsschema.RegistryValue{
{
Key: &hcsschema.RegistryKey{
Hive: "System",
Name: "CurrentControlSet\\Services\\gns",
},
Name: "EnableCompartmentNamespace",
DWordValue: 1,
Type_: "DWord",
registryChanges.AddValues = append(registryChanges.AddValues,
hcsschema.RegistryValue{
Key: &hcsschema.RegistryKey{
Hive: "System",
Name: "CurrentControlSet\\Services\\gns",
},
Name: "EnableCompartmentNamespace",
DWordValue: 1,
Type_: "DWord",
},
}
)
}

processor := &hcsschema.Processor2{
Expand Down
4 changes: 4 additions & 0 deletions internal/uvm/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,13 @@ type UtilityVM struct {
// is true
TemplateID string

// Location that container process dumps will get written too.
processDumpLocation string

// The CreateOpts used to create this uvm. These can be either of type
// uvm.OptionsLCOW or uvm.OptionsWCOW
createOpts interface{}

// Network config proxy client. If nil then this wasn't requested and the
// uvms network will be configured locally.
ncProxyClient ncproxyttrpc.NetworkConfigProxyService
Expand Down

0 comments on commit e242b1f

Please sign in to comment.