Skip to content

Commit

Permalink
Make Windows crash dump checks asynchronous. (#30156)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexn-dd authored Oct 17, 2024
1 parent abaca87 commit f481626
Show file tree
Hide file tree
Showing 8 changed files with 338 additions and 28 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (

// allow us to change for testing
var readfn = doReadCrashDump
var parseCrashDump = parseWinCrashDump

type logCallbackContext struct {
loglines []string
Expand Down Expand Up @@ -107,22 +108,22 @@ func doReadCrashDump(filename string, ctx *logCallbackContext, exterr *uint32) e
return nil
}

func parseCrashDump(wcs *WinCrashStatus) {
func parseWinCrashDump(wcs *WinCrashStatus) {
var ctx logCallbackContext
var extendedError uint32

err := readfn(wcs.FileName, &ctx, &extendedError)

if err != nil {
wcs.Success = false
wcs.StatusCode = WinCrashStatusCodeFailed
wcs.ErrString = fmt.Sprintf("Failed to load crash dump file %v %x", err, extendedError)
log.Errorf("Failed to open crash dump %s: %v %x", wcs.FileName, err, extendedError)
return
}

if len(ctx.loglines) < 2 {
wcs.ErrString = fmt.Sprintf("Invalid crash dump file %s", wcs.FileName)
wcs.Success = false
wcs.StatusCode = WinCrashStatusCodeFailed
return
}

Expand Down Expand Up @@ -190,5 +191,5 @@ func parseCrashDump(wcs *WinCrashStatus) {
wcs.Offender = callsite
break
}
wcs.Success = true
wcs.StatusCode = WinCrashStatusCodeSuccess
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,11 @@ func TestCrashParser(t *testing.T) {
FileName: "testdata/crashsample1.txt",
}
// first read in the sample data

readfn = testCrashReader
OverrideCrashDumpReader(testCrashReader)

parseCrashDump(wcs)

assert.True(t, wcs.Success)
assert.Equal(t, WinCrashStatusCodeSuccess, wcs.StatusCode)
assert.Empty(t, wcs.ErrString)
assert.Equal(t, "Mon Jun 26 20:44:49.742 2023 (UTC - 7:00)", wcs.DateString)
before, _, _ := strings.Cut(wcs.Offender, "+")
Expand All @@ -72,11 +71,11 @@ func TestCrashParserWithLineSplits(t *testing.T) {
}
// first read in the sample data

readfn = testCrashReaderWithLineSplits
OverrideCrashDumpReader(testCrashReaderWithLineSplits)

parseCrashDump(wcs)

assert.True(t, wcs.Success)
assert.Equal(t, WinCrashStatusCodeSuccess, wcs.StatusCode)
assert.Empty(t, wcs.ErrString)
assert.Equal(t, "Mon Jun 26 20:44:49.742 2023 (UTC - 7:00)", wcs.DateString)
before, _, _ := strings.Cut(wcs.Offender, "+")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,26 @@ const (
DumpTypeAutomatic = int(7) // automatic
)

const (
// WinCrashStatusCodeUnknown indicates an invalid or corrupted code.
WinCrashStatusCodeUnknown = int(-1)

// WinCrashStatusCodeSuccess indicates that crash dump processing succeeded
// or no crash dump was found.
WinCrashStatusCodeSuccess = int(0)

// WinCrashStatusCodeBusy indicates that crash dump processing is still busy
// and no result is yet available.
WinCrashStatusCodeBusy = int(1)

// WinCrashStatusCodeFailed indicates that crash dump processing failed or had an error.
WinCrashStatusCodeFailed = int(2)
)

// WinCrashStatus defines all of the information returned from the system
// probe to the caller
type WinCrashStatus struct {
Success bool `json:"success"`
StatusCode int `json:"statuscode"`
ErrString string `json:"errstring"`
FileName string `json:"filename"`
Type int `json:"dumptype"`
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2024-present Datadog, Inc.

//go:build test && windows

package probe

type readCrashDumpType func(filename string, ctx *logCallbackContext, _ *uint32) error
type parseCrashDumpType func(wcs *WinCrashStatus)

// SetCachedSettings sets the settings used for tests without reading the Registry.
func (p *WinCrashProbe) SetCachedSettings(wcs *WinCrashStatus) {
p.status = wcs
}

// OverrideCrashDumpReader relpaces the crash dump reading function for tests.
func OverrideCrashDumpReader(customCrashReader readCrashDumpType) {
readfn = customCrashReader
}

// OverrideCrashDumpParser relpaces the crash dump parsing function for tests.
func OverrideCrashDumpParser(customParseCrashDump parseCrashDumpType) {
parseCrashDump = customParseCrashDump
}
112 changes: 100 additions & 12 deletions pkg/collector/corechecks/system/wincrashdetect/probe/wincrashprobe.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,38 +11,126 @@ import (
"fmt"
"os"
"path/filepath"
"sync"

sysconfigtypes "github.com/DataDog/datadog-agent/cmd/system-probe/config/types"
"github.com/DataDog/datadog-agent/pkg/util/winutil"
"golang.org/x/sys/windows/registry"
)

type probeState uint32

const (
// Idle indicates that the probe is waiting for a request
idle probeState = iota

// Busy indicates that the probe is currently processing a crash dump
busy

// Completed indicates that the probe finished processing a crash dump.
completed

// Failed indicates that the probe failed to process a crash dump.
failed
)

// WinCrashProbe has no stored state.
type WinCrashProbe struct {
state probeState
status *WinCrashStatus
mu sync.Mutex
}

// NewWinCrashProbe returns an initialized WinCrashProbe
func NewWinCrashProbe(_ *sysconfigtypes.Config) (*WinCrashProbe, error) {
return &WinCrashProbe{}, nil
return &WinCrashProbe{
state: idle,
status: nil,
}, nil
}

// Handles crash dump parsing in a separate thread since this may take very long.
func (p *WinCrashProbe) parseCrashDumpAsync() {
if p.status == nil {
p.state = failed
return
}

parseCrashDump(p.status)

p.mu.Lock()
defer p.mu.Unlock()
p.state = completed
}

// Get returns the current crash, if any
func (p *WinCrashProbe) Get() *WinCrashStatus {
wcs := &WinCrashStatus{}

err := wcs.getCurrentCrashSettings()
if err != nil {
wcs.ErrString = err.Error()
wcs.Success = false
return wcs
}
// Nothing in this method should take long.
p.mu.Lock()
defer p.mu.Unlock()

switch p.state {
case idle:
if p.status == nil {
// This is a new request.
err := wcs.getCurrentCrashSettings()
if err != nil {
wcs.ErrString = err.Error()
wcs.StatusCode = WinCrashStatusCodeFailed
}
} else {
// Use cached settings, set by tests.
// Make a copy to avoid side-effect modifications.
*wcs = *(p.status)
}

if len(wcs.FileName) == 0 {
// no filename means no crash dump
wcs.Success = true // we succeeded
return wcs
// Transition to the next state.
if wcs.StatusCode == WinCrashStatusCodeFailed {
// Only try once and cache the failure.
p.status = wcs
p.state = failed
} else if len(wcs.FileName) == 0 {
// No filename means no crash dump
p.status = wcs
p.state = completed
wcs.StatusCode = WinCrashStatusCodeSuccess
} else {
// Kick off the crash dump processing asynchronously.
// The crash dump may be very large and we should not block for a response.
p.state = busy
wcs.StatusCode = WinCrashStatusCodeBusy

// Make a new copy of the wcs for async processing while returning "Busy"
// for the current response.
p.status = &WinCrashStatus{
FileName: wcs.FileName,
Type: wcs.Type,
}

go p.parseCrashDumpAsync()
}

case busy:
// The crash dump processing is not done yet. Reply busy.
if p.status != nil {
wcs.FileName = p.status.FileName
wcs.Type = p.status.Type
}
wcs.StatusCode = WinCrashStatusCodeBusy

case failed:
fallthrough
case completed:
// The crash dump processing was done, return the result.
if p.status != nil {
// This result is cached for all subsequent queries.
wcs = p.status
} else {
wcs.StatusCode = WinCrashStatusCodeFailed
}
}
parseCrashDump(wcs)

return wcs
}
Expand Down
Loading

0 comments on commit f481626

Please sign in to comment.