Skip to content

Commit

Permalink
Log shim panic logs in containerd.
Browse files Browse the repository at this point in the history
Currently hcsshim writes the shim panic logs in a file named panic.log inside the sandbox
directory. However, those logs are never logged in containerd and they get lost when the
sandbox container is removed. This change allows the shim to log these panic logs to
containerd before deleting them.

Signed-off-by: Amit Barve <[email protected]>
  • Loading branch information
ambarve committed Feb 25, 2021
1 parent ae4dec9 commit 2b171cb
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 33 deletions.
9 changes: 8 additions & 1 deletion cmd/containerd-shim-runhcs-v1/delete.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"time"

"github.com/Microsoft/hcsshim/internal/hcs"
Expand All @@ -29,7 +30,6 @@ The delete command will be executed in the container's bundle as its cwd.
// We cant write anything to stdout for this cmd other than the
// task.DeleteResponse by protcol. We can write to stderr which will be
// warning logged in containerd.
logrus.SetOutput(ioutil.Discard)

ctx, span := trace.StartSpan(gcontext.Background(), "delete")
defer span.End()
Expand Down Expand Up @@ -82,6 +82,13 @@ The delete command will be executed in the container's bundle as its cwd.
}
}

// hcsshim shim writes panic logs in the bundle directory in a file named "panic.log"
// log those messages (if any) on stderr so that it shows up in containerd's log.
logs, err := ioutil.ReadFile(filepath.Join(bundleFlag, "panic.log"))
if err == nil && len(logs) > 0 {
logrus.Errorf("found shim panic logs during delete: %s", string(logs))
}

// Remove the bundle on disk
if err := os.RemoveAll(bundleFlag); err != nil && !os.IsNotExist(err) {
return err
Expand Down
11 changes: 0 additions & 11 deletions cmd/containerd-shim-runhcs-v1/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,6 @@ func stack() []byte {
}
}

func panicRecover() {
if r := recover(); r != nil {
logrus.WithFields(logrus.Fields{
"panic": r,
"stack": string(stack()),
}).Error("containerd-shim-runhcs-v1: panic")
}
}

func etwCallback(sourceID guid.GUID, state etw.ProviderState, level etw.Level, matchAnyKeyword uint64, matchAllKeyword uint64, filterData uintptr) {
if state == etw.ProviderStateCaptureState {
resp, err := svc.DiagStacks(context.Background(), &shimdiag.StacksRequest{})
Expand Down Expand Up @@ -92,8 +83,6 @@ func main() {
}
}

defer panicRecover()

provider.WriteEvent(
"ShimLaunched",
nil,
Expand Down
21 changes: 0 additions & 21 deletions cmd/containerd-shim-runhcs-v1/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ type service struct {
}

func (s *service) State(ctx context.Context, req *task.StateRequest) (resp *task.StateResponse, err error) {
defer panicRecover()
ctx, span := trace.StartSpan(ctx, "State")
defer span.End()
defer func() {
Expand All @@ -79,7 +78,6 @@ func (s *service) State(ctx context.Context, req *task.StateRequest) (resp *task
}

func (s *service) Create(ctx context.Context, req *task.CreateTaskRequest) (resp *task.CreateTaskResponse, err error) {
defer panicRecover()
ctx, span := trace.StartSpan(ctx, "Create")
defer span.End()
defer func() {
Expand Down Expand Up @@ -110,7 +108,6 @@ func (s *service) Create(ctx context.Context, req *task.CreateTaskRequest) (resp
}

func (s *service) Start(ctx context.Context, req *task.StartRequest) (resp *task.StartResponse, err error) {
defer panicRecover()
ctx, span := trace.StartSpan(ctx, "Start")
defer span.End()
defer func() {
Expand All @@ -133,7 +130,6 @@ func (s *service) Start(ctx context.Context, req *task.StartRequest) (resp *task
}

func (s *service) Delete(ctx context.Context, req *task.DeleteRequest) (resp *task.DeleteResponse, err error) {
defer panicRecover()
ctx, span := trace.StartSpan(ctx, "Delete")
defer span.End()
defer func() {
Expand All @@ -159,7 +155,6 @@ func (s *service) Delete(ctx context.Context, req *task.DeleteRequest) (resp *ta
}

func (s *service) Pids(ctx context.Context, req *task.PidsRequest) (_ *task.PidsResponse, err error) {
defer panicRecover()
ctx, span := trace.StartSpan(ctx, "Pids")
defer span.End()
defer func() { oc.SetSpanStatus(span, err) }()
Expand All @@ -175,7 +170,6 @@ func (s *service) Pids(ctx context.Context, req *task.PidsRequest) (_ *task.Pids
}

func (s *service) Pause(ctx context.Context, req *task.PauseRequest) (_ *google_protobuf1.Empty, err error) {
defer panicRecover()
ctx, span := trace.StartSpan(ctx, "Pause")
defer span.End()
defer func() { oc.SetSpanStatus(span, err) }()
Expand All @@ -191,7 +185,6 @@ func (s *service) Pause(ctx context.Context, req *task.PauseRequest) (_ *google_
}

func (s *service) Resume(ctx context.Context, req *task.ResumeRequest) (_ *google_protobuf1.Empty, err error) {
defer panicRecover()
ctx, span := trace.StartSpan(ctx, "Resume")
defer span.End()
defer func() { oc.SetSpanStatus(span, err) }()
Expand All @@ -207,7 +200,6 @@ func (s *service) Resume(ctx context.Context, req *task.ResumeRequest) (_ *googl
}

func (s *service) Checkpoint(ctx context.Context, req *task.CheckpointTaskRequest) (_ *google_protobuf1.Empty, err error) {
defer panicRecover()
ctx, span := trace.StartSpan(ctx, "Checkpoint")
defer span.End()
defer func() { oc.SetSpanStatus(span, err) }()
Expand All @@ -225,7 +217,6 @@ func (s *service) Checkpoint(ctx context.Context, req *task.CheckpointTaskReques
}

func (s *service) Kill(ctx context.Context, req *task.KillRequest) (_ *google_protobuf1.Empty, err error) {
defer panicRecover()
ctx, span := trace.StartSpan(ctx, "Kill")
defer span.End()
defer func() { oc.SetSpanStatus(span, err) }()
Expand All @@ -245,7 +236,6 @@ func (s *service) Kill(ctx context.Context, req *task.KillRequest) (_ *google_pr
}

func (s *service) Exec(ctx context.Context, req *task.ExecProcessRequest) (_ *google_protobuf1.Empty, err error) {
defer panicRecover()
ctx, span := trace.StartSpan(ctx, "Exec")
defer span.End()
defer func() { oc.SetSpanStatus(span, err) }()
Expand All @@ -267,7 +257,6 @@ func (s *service) Exec(ctx context.Context, req *task.ExecProcessRequest) (_ *go
}

func (s *service) DiagExecInHost(ctx context.Context, req *shimdiag.ExecProcessRequest) (_ *shimdiag.ExecProcessResponse, err error) {
defer panicRecover()
ctx, span := trace.StartSpan(ctx, "DiagExecInHost")
defer span.End()
defer func() { oc.SetSpanStatus(span, err) }()
Expand All @@ -289,7 +278,6 @@ func (s *service) DiagExecInHost(ctx context.Context, req *shimdiag.ExecProcessR
}

func (s *service) DiagShare(ctx context.Context, req *shimdiag.ShareRequest) (_ *shimdiag.ShareResponse, err error) {
defer panicRecover()
ctx, span := trace.StartSpan(ctx, "DiagShare")
defer span.End()
defer func() { oc.SetSpanStatus(span, err) }()
Expand All @@ -308,7 +296,6 @@ func (s *service) DiagShare(ctx context.Context, req *shimdiag.ShareRequest) (_
}

func (s *service) ResizePty(ctx context.Context, req *task.ResizePtyRequest) (_ *google_protobuf1.Empty, err error) {
defer panicRecover()
ctx, span := trace.StartSpan(ctx, "ResizePty")
defer span.End()
defer func() { oc.SetSpanStatus(span, err) }()
Expand All @@ -328,7 +315,6 @@ func (s *service) ResizePty(ctx context.Context, req *task.ResizePtyRequest) (_
}

func (s *service) CloseIO(ctx context.Context, req *task.CloseIORequest) (_ *google_protobuf1.Empty, err error) {
defer panicRecover()
ctx, span := trace.StartSpan(ctx, "CloseIO")
defer span.End()
defer func() { oc.SetSpanStatus(span, err) }()
Expand All @@ -347,7 +333,6 @@ func (s *service) CloseIO(ctx context.Context, req *task.CloseIORequest) (_ *goo
}

func (s *service) Update(ctx context.Context, req *task.UpdateTaskRequest) (_ *google_protobuf1.Empty, err error) {
defer panicRecover()
ctx, span := trace.StartSpan(ctx, "Update")
defer span.End()
defer func() { oc.SetSpanStatus(span, err) }()
Expand All @@ -363,7 +348,6 @@ func (s *service) Update(ctx context.Context, req *task.UpdateTaskRequest) (_ *g
}

func (s *service) Wait(ctx context.Context, req *task.WaitRequest) (resp *task.WaitResponse, err error) {
defer panicRecover()
ctx, span := trace.StartSpan(ctx, "Wait")
defer span.End()
defer func() {
Expand All @@ -388,7 +372,6 @@ func (s *service) Wait(ctx context.Context, req *task.WaitRequest) (resp *task.W
}

func (s *service) Stats(ctx context.Context, req *task.StatsRequest) (_ *task.StatsResponse, err error) {
defer panicRecover()
ctx, span := trace.StartSpan(ctx, "Stats")
defer span.End()
defer func() { oc.SetSpanStatus(span, err) }()
Expand All @@ -404,7 +387,6 @@ func (s *service) Stats(ctx context.Context, req *task.StatsRequest) (_ *task.St
}

func (s *service) Connect(ctx context.Context, req *task.ConnectRequest) (resp *task.ConnectResponse, err error) {
defer panicRecover()
ctx, span := trace.StartSpan(ctx, "Connect")
defer span.End()
defer func() {
Expand All @@ -428,7 +410,6 @@ func (s *service) Connect(ctx context.Context, req *task.ConnectRequest) (resp *
}

func (s *service) Shutdown(ctx context.Context, req *task.ShutdownRequest) (_ *google_protobuf1.Empty, err error) {
defer panicRecover()
ctx, span := trace.StartSpan(ctx, "Shutdown")
defer span.End()
defer func() { oc.SetSpanStatus(span, err) }()
Expand All @@ -447,7 +428,6 @@ func (s *service) DiagStacks(ctx context.Context, req *shimdiag.StacksRequest) (
if s == nil {
return nil, nil
}
defer panicRecover()
ctx, span := trace.StartSpan(ctx, "DiagStacks")
defer span.End()

Expand Down Expand Up @@ -480,7 +460,6 @@ func (s *service) DiagPid(ctx context.Context, req *shimdiag.PidRequest) (*shimd
if s == nil {
return nil, nil
}
defer panicRecover()
ctx, span := trace.StartSpan(ctx, "DiagPid")
defer span.End()

Expand Down

0 comments on commit 2b171cb

Please sign in to comment.