Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Catch panic to generate report and reraise #7341

Merged
merged 11 commits into from
Oct 1, 2021
183 changes: 183 additions & 0 deletions build/panic_reporter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
package build

import (
"fmt"
"io"
"os"
"path/filepath"
"runtime/debug"
"runtime/pprof"
"strconv"
"strings"
"time"

"github.com/icza/backscanner"
logging "github.com/ipfs/go-log/v2"
)

var (
panicLog = logging.Logger("panic-reporter")
defaultJournalTail = 500
)

// PanicReportingPath is the name of the subdir created within the repoPath
// path provided to GeneratePanicReport
var PanicReportingPath = "panic-reports"

// PanicReportJournalTail is the number of lines captured from the end of
// the lotus journal to be included in the panic report.
var PanicReportJournalTail = defaultJournalTail

// GeneratePanicReport produces a timestamped dump of the application state
// for inspection and debugging purposes. Call this function from any place
// where a panic or severe error needs to be examined. `persistPath` is the
// path where the reports should be saved. `repoPath` is the path where the
// journal should be read from. `label` is an optional string to include
// next to the report timestamp.
func GeneratePanicReport(persistPath, repoPath, label string) {
// make sure we always dump the latest logs on the way out
// especially since we're probably panicking
defer panicLog.Sync() //nolint:errcheck

if persistPath == "" && repoPath == "" {
panicLog.Warn("missing persist and repo paths, aborting panic report creation")
return
}

reportPath := filepath.Join(repoPath, PanicReportingPath, generateReportName(label))
if persistPath != "" {
reportPath = filepath.Join(persistPath, generateReportName(label))
}
panicLog.Warnf("generating panic report at %s", reportPath)

tl := os.Getenv("LOTUS_PANIC_JOURNAL_LOOKBACK")
if tl != "" && PanicReportJournalTail == defaultJournalTail {
i, err := strconv.Atoi(tl)
if err == nil {
PanicReportJournalTail = i
}
}

err := os.MkdirAll(reportPath, 0755)
if err != nil {
panicLog.Error(err.Error())
return
}

writeAppVersion(filepath.Join(reportPath, "version"))
writeStackTrace(filepath.Join(reportPath, "stacktrace.dump"))
writeProfile("goroutines", filepath.Join(reportPath, "goroutines.pprof.gz"))
writeProfile("heap", filepath.Join(reportPath, "heap.pprof.gz"))
writeJournalTail(PanicReportJournalTail, repoPath, filepath.Join(reportPath, "journal.ndjson"))
placer14 marked this conversation as resolved.
Show resolved Hide resolved
}

func writeAppVersion(file string) {
f, err := os.Create(file)
if err != nil {
panicLog.Error(err.Error())
}
defer f.Close() //nolint:errcheck

versionString := []byte(BuildVersion + BuildTypeString() + CurrentCommit + "\n")
if _, err := f.Write(versionString); err != nil {
panicLog.Error(err.Error())
}
}

func writeStackTrace(file string) {
f, err := os.Create(file)
if err != nil {
panicLog.Error(err.Error())
}
defer f.Close() //nolint:errcheck

if _, err := f.Write(debug.Stack()); err != nil {
panicLog.Error(err.Error())
}

}

func writeProfile(profileType string, file string) {
p := pprof.Lookup(profileType)
if p == nil {
panicLog.Warnf("%s profile not available", profileType)
return
}
f, err := os.Create(file)
if err != nil {
panicLog.Error(err.Error())
return
}
defer f.Close() //nolint:errcheck

if err := p.WriteTo(f, 0); err != nil {
panicLog.Error(err.Error())
}
}

func writeJournalTail(tailLen int, repoPath, file string) {
if repoPath == "" {
panicLog.Warn("repo path is empty, aborting copy of journal log")
return
}

f, err := os.Create(file)
if err != nil {
panicLog.Error(err.Error())
return
}
defer f.Close() //nolint:errcheck

jPath, err := getLatestJournalFilePath(repoPath)
if err != nil {
panicLog.Warnf("failed getting latest journal: %s", err.Error())
return
}
j, err := os.OpenFile(jPath, os.O_RDONLY, 0400)
if err != nil {
panicLog.Error(err.Error())
return
}
js, err := j.Stat()
if err != nil {
panicLog.Error(err.Error())
return
}
jScan := backscanner.New(j, int(js.Size()))
linesWritten := 0
for {
if linesWritten > tailLen {
break
}
line, _, err := jScan.LineBytes()
if err != nil {
if err != io.EOF {
panicLog.Error(err.Error())
}
break
}
if _, err := f.Write(line); err != nil {
panicLog.Error(err.Error())
break
}
if _, err := f.Write([]byte("\n")); err != nil {
panicLog.Error(err.Error())
break
}
linesWritten++
}
}

func getLatestJournalFilePath(repoPath string) (string, error) {
journalPath := filepath.Join(repoPath, "journal")
entries, err := os.ReadDir(journalPath)
if err != nil {
return "", err
}
return filepath.Join(journalPath, entries[len(entries)-1].Name()), nil
}

func generateReportName(label string) string {
label = strings.ReplaceAll(label, " ", "")
return fmt.Sprintf("report_%s_%s", label, time.Now().Format("2006-01-02T150405"))
}
4 changes: 2 additions & 2 deletions build/version.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ const (
BuildButterflynet = 0x7
)

func buildType() string {
func BuildTypeString() string {
switch BuildType {
case BuildDefault:
return ""
Expand Down Expand Up @@ -47,5 +47,5 @@ func UserVersion() string {
return BuildVersion
}

return BuildVersion + buildType() + CurrentCommit
return BuildVersion + BuildTypeString() + CurrentCommit
}
14 changes: 14 additions & 0 deletions cmd/lotus-miner/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,12 @@ func main() {
Usage: "use color in display output",
DefaultText: "depends on output being a TTY",
},
&cli.StringFlag{
Name: "panic-reports",
EnvVars: []string{"LOTUS_PANIC_REPORT_PATH"},
Hidden: true,
Value: "~/.lotus", // should follow --repo default
placer14 marked this conversation as resolved.
Show resolved Hide resolved
},
&cli.StringFlag{
Name: "repo",
EnvVars: []string{"LOTUS_PATH"},
Expand Down Expand Up @@ -146,6 +152,14 @@ func main() {
}
return nil
},
After: func(c *cli.Context) error {
if r := recover(); r != nil {
// Generate report in LOTUS_PATH and re-raise panic
build.GeneratePanicReport(c.String("panic-reports"), c.String("repo"), c.App.Name)
placer14 marked this conversation as resolved.
Show resolved Hide resolved
panic(r)
}
return nil
},
}
app.Setup()
app.Metadata["repoType"] = repo.StorageMiner
Expand Down
14 changes: 14 additions & 0 deletions cmd/lotus-seal-worker/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,12 @@ func main() {
Value: "~/.lotusworker", // TODO: Consider XDG_DATA_HOME
Usage: fmt.Sprintf("Specify worker repo path. flag %s and env WORKER_PATH are DEPRECATION, will REMOVE SOON", FlagWorkerRepoDeprecation),
},
&cli.StringFlag{
Name: "panic-reports",
EnvVars: []string{"LOTUS_PANIC_REPORT_PATH"},
Hidden: true,
Value: "~/.lotus", // should follow --repo default
placer14 marked this conversation as resolved.
Show resolved Hide resolved
},
&cli.StringFlag{
Name: "miner-repo",
Aliases: []string{"storagerepo"},
Expand All @@ -89,6 +95,14 @@ func main() {
},
},

After: func(c *cli.Context) error {
if r := recover(); r != nil {
// Generate report in LOTUS_PATH and re-raise panic
build.GeneratePanicReport(c.String("panic-reports"), c.String(FlagWorkerRepo), c.App.Name)
panic(r)
}
return nil
},
Commands: local,
}
app.Setup()
Expand Down
14 changes: 14 additions & 0 deletions cmd/lotus/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,12 @@ func main() {
Version: build.UserVersion(),
EnableBashCompletion: true,
Flags: []cli.Flag{
&cli.StringFlag{
Name: "panic-reports",
EnvVars: []string{"LOTUS_PANIC_REPORT_PATH"},
Hidden: true,
Value: "~/.lotus", // should follow --repo default
},
&cli.StringFlag{
Name: "repo",
EnvVars: []string{"LOTUS_PATH"},
Expand All @@ -84,6 +90,14 @@ func main() {
},
cliutil.FlagVeryVerbose,
},
After: func(c *cli.Context) error {
if r := recover(); r != nil {
// Generate report in LOTUS_PATH and re-raise panic
build.GeneratePanicReport(c.String("panic-reports"), c.String("repo"), c.App.Name)
panic(r)
}
return nil
},

Commands: append(local, lcli.Commands...),
}
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ require (
github.com/hannahhoward/go-pubsub v0.0.0-20200423002714-8d62886cc36e
github.com/hashicorp/go-multierror v1.1.0
github.com/hashicorp/golang-lru v0.5.4
github.com/icza/backscanner v0.0.0-20210726202459-ac2ffc679f94
github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d
github.com/ipfs/bbloom v0.0.4
github.com/ipfs/go-bitswap v0.3.4
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,10 @@ github.com/iancoleman/orderedmap v0.1.0 h1:2orAxZBJsvimgEBmMWfXaFlzSG2fbQil5qzP3
github.com/iancoleman/orderedmap v0.1.0/go.mod h1:N0Wam8K1arqPXNWjMo21EXnBPOPp36vB07FNRdD2geA=
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/icrowley/fake v0.0.0-20180203215853-4178557ae428/go.mod h1:uhpZMVGznybq1itEKXj6RYw9I71qK4kH+OGMjRC4KEo=
github.com/icza/backscanner v0.0.0-20210726202459-ac2ffc679f94 h1:9tcYMdi+7Rb1y0E9Del1DRHui7Ne3za5lLw6CjMJv/M=
github.com/icza/backscanner v0.0.0-20210726202459-ac2ffc679f94/go.mod h1:GYeBD1CF7AqnKZK+UCytLcY3G+UKo0ByXX/3xfdNyqQ=
github.com/icza/mighty v0.0.0-20180919140131-cfd07d671de6 h1:8UsGZ2rr2ksmEru6lToqnXgA8Mz1DP11X4zSJ159C3k=
github.com/icza/mighty v0.0.0-20180919140131-cfd07d671de6/go.mod h1:xQig96I1VNBDIWGCdTt54nHt6EeI639SmHycLYL7FkA=
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d h1:/WZQPMZNsjZ7IlCpsLGdQBINg5bxKQ1K1sh6awxLtkA=
github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d/go.mod h1:qj24IKcXYK6Iy9ceXlo3Tc+vtHo9lIhSX5JddghvEPo=
Expand Down