Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Job deadlines #88

Merged
merged 8 commits into from
Sep 18, 2019
57 changes: 34 additions & 23 deletions pkg/cmd/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ var (
ImageNameTag = "quay.io/iovisor/kubectl-trace-bpftrace:latest"
// InitImageNameTag represents the default init container image
InitImageNameTag = "quay.io/iovisor/kubectl-trace-init:latest"
// DefaultDeadline is the maximum time a tracejob is allowed to run, in seconds
DefaultDeadline = 3600
// DefaultDeadlineGracePeriod is the maximum time to wait to print a map or histogram, in seconds
DefaultDeadlineGracePeriod = 10
)

var (
Expand Down Expand Up @@ -66,13 +70,15 @@ type RunOptions struct {
explicitNamespace bool

// Flags local to this command
container string
eval string
program string
serviceAccount string
imageName string
initImageName string
fetchHeaders bool
container string
eval string
program string
serviceAccount string
imageName string
initImageName string
fetchHeaders bool
deadline int64
deadlineGracePeriod int64

resourceArg string
attach bool
Expand All @@ -88,9 +94,11 @@ func NewRunOptions(streams genericclioptions.IOStreams) *RunOptions {
return &RunOptions{
IOStreams: streams,

serviceAccount: "default",
imageName: ImageNameTag,
initImageName: InitImageNameTag,
serviceAccount: "default",
imageName: ImageNameTag,
initImageName: InitImageNameTag,
deadline: int64(DefaultDeadline),
deadlineGracePeriod: int64(DefaultDeadlineGracePeriod),
}
}

Expand Down Expand Up @@ -127,6 +135,8 @@ func NewRunCommand(factory factory.Factory, streams genericclioptions.IOStreams)
cmd.Flags().StringVar(&o.imageName, "imagename", o.imageName, "Custom image for the tracerunner")
cmd.Flags().StringVar(&o.initImageName, "init-imagename", o.initImageName, "Custom image for the init container responsible to fetch and prepare linux headers")
cmd.Flags().BoolVar(&o.fetchHeaders, "fetch-headers", o.fetchHeaders, "Whether to fetch linux headers or not")
cmd.Flags().Int64Var(&o.deadline, "deadline", o.deadline, "Maximum time to allow trace to run in seconds")
cmd.Flags().Int64Var(&o.deadline, "deadline-grace-period", o.deadlineGracePeriod, "Maximum wait time to print maps or histograms after deadline, in seconds")

return cmd
}
Expand Down Expand Up @@ -289,19 +299,20 @@ func (o *RunOptions) Run() error {
}

tj := tracejob.TraceJob{
Name: fmt.Sprintf("%s%s", meta.ObjectNamePrefix, string(juid)),
Namespace: o.namespace,
ServiceAccount: o.serviceAccount,
ID: juid,
Hostname: o.nodeName,
Program: o.program,
PodUID: o.podUID,
ContainerName: o.container,
IsPod: o.isPod,
// todo(dalehamel) > following fields to be used for #48
ImageNameTag: o.imageName,
InitImageNameTag: o.initImageName,
FetchHeaders: o.fetchHeaders,
Name: fmt.Sprintf("%s%s", meta.ObjectNamePrefix, string(juid)),
Namespace: o.namespace,
ServiceAccount: o.serviceAccount,
ID: juid,
Hostname: o.nodeName,
Program: o.program,
PodUID: o.podUID,
ContainerName: o.container,
IsPod: o.isPod,
ImageNameTag: o.imageName,
InitImageNameTag: o.initImageName,
FetchHeaders: o.fetchHeaders,
Deadline: o.deadline,
DeadlineGracePeriod: o.deadlineGracePeriod,
}

job, err := tc.CreateJob(tj)
Expand Down
45 changes: 31 additions & 14 deletions pkg/tracejob/job.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,22 @@ type TraceJobClient struct {

// TraceJob is a container of info needed to create the job responsible for tracing.
type TraceJob struct {
Name string
ID types.UID
Namespace string
ServiceAccount string
Hostname string
Program string
PodUID string
ContainerName string
IsPod bool
ImageNameTag string
InitImageNameTag string
FetchHeaders bool
StartTime metav1.Time
Status TraceJobStatus
Name string
ID types.UID
Namespace string
ServiceAccount string
Hostname string
Program string
PodUID string
ContainerName string
IsPod bool
ImageNameTag string
InitImageNameTag string
FetchHeaders bool
Deadline int64
DeadlineGracePeriod int64
StartTime metav1.Time
Status TraceJobStatus
}

// WithOutStream setup a file stream to output trace job operation information
Expand Down Expand Up @@ -217,6 +219,7 @@ func (t *TraceJobClient) CreateJob(nj TraceJob) (*batchv1.Job, error) {
job := &batchv1.Job{
ObjectMeta: commonMeta,
Spec: batchv1.JobSpec{
ActiveDeadlineSeconds: int64Ptr(nj.Deadline),
TTLSecondsAfterFinished: int32Ptr(5),
Parallelism: int32Ptr(1),
Completions: int32Ptr(1),
Expand Down Expand Up @@ -294,6 +297,20 @@ func (t *TraceJobClient) CreateJob(nj TraceJob) (*batchv1.Job, error) {
SecurityContext: &apiv1.SecurityContext{
Privileged: boolPtr(true),
},
// We want to send SIGINT prior to the pod being killed, so we can print the map
// we will also wait for an arbitrary amount of time (10s) to give bpftrace time to
// process and summarize the data
Lifecycle: &apiv1.Lifecycle{
PreStop: &apiv1.Handler{
Exec: &apiv1.ExecAction{
Command: []string{
"/bin/bash",
"-c",
fmt.Sprintf("kill -SIGINT $(pidof bpftrace) && sleep %i", nj.DeadlineGracePeriod),
},
},
},
},
},
},
RestartPolicy: "Never",
Expand Down