Skip to content

Commit

Permalink
jobs: only verify fraction progressed range in tests
Browse files Browse the repository at this point in the history
FractionProgressed is used by jobs that do a substantial amount of
work. Failing the entire job because it has miscalculated its fraction
progressed estimate is not good.

Here, we leave the error state for non-release builds but log in
release builds.

Epic: none

Release note (bug fix): A job will now log rather than fail if it
reports an out-of bound progress fraction.
  • Loading branch information
stevendanna committed Apr 19, 2024
1 parent 1bca2ad commit 72a0c79
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 8 deletions.
1 change: 1 addition & 0 deletions pkg/jobs/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ go_library(
visibility = ["//visibility:public"],
deps = [
"//pkg/base",
"//pkg/build",
"//pkg/clusterversion",
"//pkg/jobs/jobspb",
"//pkg/kv",
Expand Down
31 changes: 23 additions & 8 deletions pkg/jobs/jobs.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"sync/atomic"
"time"

"github.com/cockroachdb/cockroach/pkg/build"
"github.com/cockroachdb/cockroach/pkg/clusterversion"
"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
"github.com/cockroachdb/cockroach/pkg/kv"
Expand Down Expand Up @@ -357,16 +358,30 @@ func (u Updater) FractionProgressed(ctx context.Context, progressedFn FractionPr
return err
}
fractionCompleted := progressedFn(ctx, md.Progress.Details)
// allow for slight floating-point rounding inaccuracies
if fractionCompleted > 1.0 && fractionCompleted < 1.01 {
fractionCompleted = 1.0

if !build.IsRelease() {
// We allow for slight floating-point rounding
// inaccuracies. We only want to error in non-release
// builds because in large production installations the
// method at least one job uses to calculate process can
// result in substantial floating point inaccuracy.
if fractionCompleted < 0.0 || fractionCompleted > 1.01 {
return errors.Errorf(
"fraction completed %f is outside allowable range [0.0, 1.01]",
fractionCompleted,
)
}
}
if fractionCompleted < 0.0 || fractionCompleted > 1.0 {
return errors.Errorf(
"job %d: fractionCompleted %f is outside allowable range [0.0, 1.0]",
u.j.ID(), fractionCompleted,
)

// Clamp to [0.0, 1.0].
if fractionCompleted > 1.0 {
log.VInfof(ctx, 1, "clamping fraction completed %f to [0.0, 1.0]", fractionCompleted)
fractionCompleted = 1.0
} else if fractionCompleted < 0.0 {
log.VInfof(ctx, 1, "clamping fraction completed %f to [0.0, 1.0]", fractionCompleted)
fractionCompleted = 0
}

md.Progress.Progress = &jobspb.Progress_FractionCompleted{
FractionCompleted: fractionCompleted,
}
Expand Down

0 comments on commit 72a0c79

Please sign in to comment.