Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tracing: enable always-on tracing by default #58897

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions pkg/ccl/importccl/import_processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,16 +134,15 @@ func (idp *readImportDataProcessor) Next() (rowenc.EncDatumRow, *execinfrapb.Pro
// Once the import is done, send back to the controller the serialized
// summary of the import operation. For more info see roachpb.BulkOpSummary.
countsBytes, err := protoutil.Marshal(idp.summary)
idp.MoveToDraining(err)
if err != nil {
idp.MoveToDraining(err)
return nil, idp.DrainHelper()
}

idp.MoveToDraining(nil /* err */)
return rowenc.EncDatumRow{
rowenc.DatumToEncDatum(types.Bytes, tree.NewDBytes(tree.DBytes(countsBytes))),
rowenc.DatumToEncDatum(types.Bytes, tree.NewDBytes(tree.DBytes([]byte{}))),
}, idp.DrainHelper()
}, nil
}

// ConsumerDone is part of the RowSource interface.
Expand Down
3 changes: 1 addition & 2 deletions pkg/sql/distsql_running.go
Original file line number Diff line number Diff line change
Expand Up @@ -644,8 +644,7 @@ func (r *DistSQLReceiver) Push(
if len(meta.TraceData) > 0 {
span := tracing.SpanFromContext(r.ctx)
if span == nil {
r.resultWriter.SetError(
errors.New("trying to ingest remote spans but there is no recording span set up"))
// Nothing to do.
} else if err := span.ImportRemoteSpans(meta.TraceData); err != nil {
r.resultWriter.SetError(errors.Errorf("error ingesting remote spans: %s", err))
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/execinfra/processorsbase.go
Original file line number Diff line number Diff line change
Expand Up @@ -575,7 +575,7 @@ const (
// returned from now on. In this state, the processor is expected to drain its
// inputs (commonly by using DrainHelper()).
//
// If the processor has no input (ProcStateOpts.intputToDrain was not specified
// If the processor has no input (ProcStateOpts.inputToDrain was not specified
// at init() time), then we move straight to the StateTrailingMeta.
//
// An error can be optionally passed. It will be the first piece of metadata
Expand Down
15 changes: 15 additions & 0 deletions pkg/sql/flowinfra/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,7 @@ func TestClusterFlow(t *testing.T) {
metas = ignoreMisplannedRanges(metas)
metas = ignoreLeafTxnState(metas)
metas = ignoreMetricsMeta(metas)
metas = ignoreTraceData(metas)
if len(metas) != 0 {
t.Fatalf("unexpected metadata (%d): %+v", len(metas), metas)
}
Expand Down Expand Up @@ -327,6 +328,18 @@ func ignoreMetricsMeta(metas []execinfrapb.ProducerMetadata) []execinfrapb.Produ
return res
}

// ignoreTraceData takes a slice of metadata and returns the entries
// excluding the ones with trace data.
func ignoreTraceData(metas []execinfrapb.ProducerMetadata) []execinfrapb.ProducerMetadata {
res := make([]execinfrapb.ProducerMetadata, 0)
for _, m := range metas {
if m.TraceData == nil {
res = append(res, m)
}
}
return res
}

// TestLimitedBufferingDeadlock sets up a scenario which leads to deadlock if
// a single consumer can block the entire router (#17097).
func TestLimitedBufferingDeadlock(t *testing.T) {
Expand Down Expand Up @@ -545,6 +558,7 @@ func TestLimitedBufferingDeadlock(t *testing.T) {
metas = ignoreMisplannedRanges(metas)
metas = ignoreLeafTxnState(metas)
metas = ignoreMetricsMeta(metas)
metas = ignoreTraceData(metas)
if len(metas) != 0 {
t.Errorf("unexpected metadata (%d): %+v", len(metas), metas)
}
Expand Down Expand Up @@ -852,6 +866,7 @@ func BenchmarkInfrastructure(b *testing.B) {
metas = ignoreMisplannedRanges(metas)
metas = ignoreLeafTxnState(metas)
metas = ignoreMetricsMeta(metas)
metas = ignoreTraceData(metas)
if len(metas) != 0 {
b.Fatalf("unexpected metadata (%d): %+v", len(metas), metas)
}
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/flowinfra/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ func TestServer(t *testing.T) {
}
metas = ignoreLeafTxnState(metas)
metas = ignoreMetricsMeta(metas)
metas = ignoreTraceData(metas)
if len(metas) != 0 {
t.Errorf("unexpected metadata: %v", metas)
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/physicalplan/aggregator_funcs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ func runTestFlow(
for {
row, meta := rowBuf.Next()
if meta != nil {
if meta.LeafTxnFinalState != nil || meta.Metrics != nil {
if meta.LeafTxnFinalState != nil || meta.Metrics != nil || meta.TraceData != nil {
continue
}
t.Fatalf("unexpected metadata: %v", meta)
Expand Down
46 changes: 22 additions & 24 deletions pkg/util/tracing/tracer.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,32 +62,30 @@ const (
modeBackground
)

// tracingMode informs the creation of noop spans
// and the default recording mode of created spans.
// tracingMode informs the creation of noop spans and the default recording mode
// of created spans.
//
// If set to 'background', trace spans will be created for all operations, but
// these will record sparse structured information, unless an operation
// explicitly requests the verbose from. It's optimized for low overhead, and
// powers fine-grained statistics and alerts.
//
// If set to 'legacy', trace spans will not be created by default. This is
// unless an internal code path explicitly requests for it, or if an auxiliary
// tracer (such as lightstep or zipkin) is configured. This tracing mode always
// records in the verbose form. Using this mode has two effects: the
// observability of the cluster may be degraded (as most trace spans are elided)
// and where trace spans are created, they may consume large amounts of memory.
//
// Note that regardless of this setting, configuring an auxiliary trace sink
// will cause verbose traces to be created for all operations, which may lead to
// high memory consumption. It is not currently possible to send non-verbose
// traces to auxiliary sinks.
var tracingMode = settings.RegisterEnumSetting(
"trace.mode",
`configures the CockroachDB-internal tracing subsystem.

If set to 'background', trace spans will be created for all operations, but
these trace spans will only be recording sparse structured information,
unless an operation explicitly requests verbose recording. This is
optimized for low overhead, and powers fine-grained statistics and alerts.

If set to 'legacy', trace spans will not be created (unless an
auxiliary tracer such as Lightstep or Zipkin, is configured, or an
internal code path explicitly requests a trace to be created) but
when they are, they record verbose information. This has two effects:
the observability of the cluster may be degraded (as some trace spans
are elided) and where trace spans are created, they may consume large
amounts of memory. This mode should not be used with auxiliary tracing
sinks as that leads to expensive trace spans being created throughout.

Note that regardless of this setting, configuring an auxiliary
trace sink will cause verbose traces to be created for all
operations, which may lead to high memory consumption. It is not
currently possible to send non-verbose traces to auxiliary sinks.
`,
"legacy",
"if set to 'background', traces will be created for all operations (in"+
"'legacy' mode it's created when explicitly requested or when auxiliary tracers are configured)",
"background",
map[int64]string{
int64(modeLegacy): "legacy",
int64(modeBackground): "background",
Expand Down