From 547b38a8985bc34615771b401518205734f8c983 Mon Sep 17 00:00:00 2001
From: Lenny Burdette <lenny@squareup.com>
Date: Mon, 5 Aug 2019 08:10:11 -0700
Subject: [PATCH] feat: add support for federated tracing

Federated GraphQL services should include timing and error
information as a Base64-encoded protocol buffer message in
the `"extensions.ftv1"` field. The gateway requests traces
by adding a special header to the GraphQL request, and combines
traces from all federated services into a single trace.

This change includes a Tracer that uses the graphql-ruby
[tracing API][t] to record field timings and info and store it
on the execution context. It also includes methods on the
`ApolloFederation::Tracing` module to pluck the info from the
context, convert it to an encoded string, and attach it to the
query result's extensions.

I used the Apollo Server typescript code as reference:
* https://github.com/apollographql/apollo-server/blob/master/packages/apollo-engine-reporting/src/federatedExtension.ts
* https://github.com/apollographql/apollo-server/blob/master/packages/apollo-engine-reporting/src/treeBuilder.ts

As well as an unfinished fork of apollo-tracing-ruby:
* https://github.com/salsify/apollo-tracing-ruby/blob/feature/new-apollo-api/lib/apollo_tracing/tracer.rb
* https://github.com/salsify/apollo-tracing-ruby/blob/feature/new-apollo-api/lib/apollo_tracing/trace_tree.rb

Federated tracing documentation: https://www.apollographql.com/docs/apollo-server/federation/metrics/

Addresses #14

[t]:https://graphql-ruby.org/queries/tracing.html
---
 .rubocop.yml                                  |   2 +
 Gemfile.lock                                  |   2 +
 README.md                                     |  24 +
 apollo-federation.gemspec                     |   2 +
 bin/generate-protos.sh                        |  15 +
 lib/apollo-federation.rb                      |   4 +
 lib/apollo-federation/tracing.rb              |  50 ++
 lib/apollo-federation/tracing/node_map.rb     |  72 +++
 lib/apollo-federation/tracing/proto.rb        |  12 +
 .../tracing/proto/apollo.proto                | 488 ++++++++++++++++++
 .../tracing/proto/apollo_pb.rb                | 305 +++++++++++
 lib/apollo-federation/tracing/tracer.rb       | 149 ++++++
 .../tracing/node_map_spec.rb                  |  71 +++
 spec/apollo-federation/tracing_spec.rb        | 258 +++++++++
 14 files changed, 1454 insertions(+)
 create mode 100755 bin/generate-protos.sh
 create mode 100644 lib/apollo-federation/tracing.rb
 create mode 100644 lib/apollo-federation/tracing/node_map.rb
 create mode 100644 lib/apollo-federation/tracing/proto.rb
 create mode 100644 lib/apollo-federation/tracing/proto/apollo.proto
 create mode 100644 lib/apollo-federation/tracing/proto/apollo_pb.rb
 create mode 100644 lib/apollo-federation/tracing/tracer.rb
 create mode 100644 spec/apollo-federation/tracing/node_map_spec.rb
 create mode 100644 spec/apollo-federation/tracing_spec.rb

diff --git a/.rubocop.yml b/.rubocop.yml
index 8f5d61ee2..f4e97d994 100644
--- a/.rubocop.yml
+++ b/.rubocop.yml
@@ -45,6 +45,8 @@ Metrics/ParameterLists:
   CountKeywordArgs: false
 Metrics/LineLength:
   Max: 100
+  Exclude:
+    - lib/apollo-federation/tracing/proto/apollo_pb.rb
 
 Naming/FileName:
   Exclude:
diff --git a/Gemfile.lock b/Gemfile.lock
index 38a455485..cf40ecf32 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -2,6 +2,7 @@ PATH
   remote: .
   specs:
     apollo-federation (0.2.0)
+      google-protobuf
       graphql
 
 GEM
@@ -33,6 +34,7 @@ GEM
     crass (1.0.4)
     diff-lcs (1.3)
     erubi (1.8.0)
+    google-protobuf (3.9.0)
     graphql (1.9.9)
     i18n (1.6.0)
       concurrent-ruby (~> 1.0)
diff --git a/README.md b/README.md
index 7f6f88148..43bc1a383 100644
--- a/README.md
+++ b/README.md
@@ -147,6 +147,30 @@ class User < BaseObject
 end
 ```
 
+### Tracing
+
+To support [federated tracing](https://www.apollographql.com/docs/apollo-server/federation/metrics/):
+
+1. Add `use ApolloFederation::Tracing` to your schema class.
+2. Change your controller to add `tracing_enabled: true` to the execution context based on the presence of the "include trace" header:
+    ```ruby
+    def execute
+      # ...
+      context = {
+        tracing_enabled: ApolloFederation::Tracing.should_add_traces(headers)
+      }
+      # ...
+    end
+    ```
+3. Change your controller to attach the traces to the response:
+    ```ruby
+    def execute
+      # ...
+      result = YourSchema.execute(query, ...)
+      render json: ApolloFederation::Tracing.attach_trace_to_result(result)
+    end
+    ```
+
 ## Known Issues and Limitations
  - Currently only works with class-based schemas
  - Does not add directives to the output of `Schema.to_definition`. Since `graphql-ruby` doesn't natively support schema directives, the directives will only be visible to the [Apollo Gateway](https://www.apollographql.com/docs/apollo-server/api/apollo-gateway/) through the `Query._service` field (see the [Apollo Federation specification](https://www.apollographql.com/docs/apollo-server/federation/federation-spec/))
diff --git a/apollo-federation.gemspec b/apollo-federation.gemspec
index b6d14d188..eeff52c32 100644
--- a/apollo-federation.gemspec
+++ b/apollo-federation.gemspec
@@ -28,6 +28,8 @@ Gem::Specification.new do |spec|
 
   spec.add_dependency 'graphql'
 
+  spec.add_runtime_dependency 'google-protobuf'
+
   spec.add_development_dependency 'actionpack'
   spec.add_development_dependency 'pry-byebug'
   spec.add_development_dependency 'rack'
diff --git a/bin/generate-protos.sh b/bin/generate-protos.sh
new file mode 100755
index 000000000..3497c1b81
--- /dev/null
+++ b/bin/generate-protos.sh
@@ -0,0 +1,15 @@
+#!/usr/bin/env bash
+
+set -eo pipefail
+
+DIR=`dirname "$0"`
+OUTPUT_DIR=$DIR/../lib/apollo-federation/tracing/proto
+
+echo "Removing old client"
+rm -f $OUTPUT_DIR/apollo.proto $OUTPUT_DIR/apollo_pb.rb
+
+echo "Downloading latest Apollo Protobuf IDL"
+curl --silent --output lib/apollo-federation/tracing/proto/apollo.proto https://raw.githubusercontent.com/apollographql/apollo-server/master/packages/apollo-engine-reporting-protobuf/src/reports.proto
+
+echo "Generating Ruby client stubs"
+protoc -I lib/apollo-federation/tracing/proto --ruby_out lib/apollo-federation/tracing/proto lib/apollo-federation/tracing/proto/apollo.proto
diff --git a/lib/apollo-federation.rb b/lib/apollo-federation.rb
index 5699857aa..5083a6c98 100644
--- a/lib/apollo-federation.rb
+++ b/lib/apollo-federation.rb
@@ -4,3 +4,7 @@
 require 'apollo-federation/schema'
 require 'apollo-federation/object'
 require 'apollo-federation/field'
+require 'apollo-federation/tracing/proto'
+require 'apollo-federation/tracing/node_map'
+require 'apollo-federation/tracing/tracer'
+require 'apollo-federation/tracing'
diff --git a/lib/apollo-federation/tracing.rb b/lib/apollo-federation/tracing.rb
new file mode 100644
index 000000000..0d073d1c0
--- /dev/null
+++ b/lib/apollo-federation/tracing.rb
@@ -0,0 +1,50 @@
+# frozen_string_literal: true
+
+module ApolloFederation
+  module Tracing
+    KEY = :ftv1
+
+    def self.use(schema)
+      schema.tracer ApolloFederation::Tracing::Tracer
+    end
+
+    def self.should_add_traces(headers)
+      headers&.['apollo-federation-include-trace'] == KEY.to_s
+    end
+
+    def self.attach_trace_to_result(result)
+      return result unless result.context[:tracing_enabled]
+
+      trace = result.context.namespace(KEY)
+      unless trace[:start_time]
+        raise StandardError.new, 'Apollo Federation Tracing not installed. \
+ Add `use ApollFederation::Tracing` to your schema.'
+      end
+
+      result['errors']&.each do |error|
+        trace[:node_map].add_error(error)
+      end
+
+      proto = ApolloFederation::Tracing::Trace.new(
+        start_time: to_proto_timestamp(trace[:start_time]),
+        end_time: to_proto_timestamp(trace[:end_time]),
+        duration_ns: trace[:end_time_nanos] - trace[:start_time_nanos],
+        root: trace[:node_map].root,
+      )
+
+      json = result.to_h
+      result[:extensions] ||= {}
+      result[:extensions][KEY] = Base64.encode64(proto.class.encode(proto))
+
+      if result.context[:debug_tracing]
+        result[:extensions]["#{KEY}_debug".to_sym] = proto.to_h
+      end
+
+      json
+    end
+
+    def self.to_proto_timestamp(time)
+      Google::Protobuf::Timestamp.new(seconds: time.to_i, nanos: time.nsec)
+    end
+  end
+end
diff --git a/lib/apollo-federation/tracing/node_map.rb b/lib/apollo-federation/tracing/node_map.rb
new file mode 100644
index 000000000..97b74abbe
--- /dev/null
+++ b/lib/apollo-federation/tracing/node_map.rb
@@ -0,0 +1,72 @@
+# frozen_string_literal: true
+
+require 'active_support/core_ext/array/wrap'
+require 'apollo-federation/tracing/proto'
+
+module ApolloFederation
+  module Tracing
+    # NodeMap stores a flat map of trace nodes by stringified paths
+    # (i.e. "_entities.0.id") for fast lookup when we need to alter
+    # nodes (to add end times or errors.)
+    #
+    # When adding a node to the NodeMap, it will create any missing
+    # parent nodes and ensure the tree is consistent.
+    #
+    # Only the "root" node is attached to the trace extension.
+    class NodeMap
+      ROOT_KEY = ''
+
+      attr_reader :nodes
+      def initialize
+        @nodes = {
+          ROOT_KEY => ApolloFederation::Tracing::Node.new,
+        }
+      end
+
+      def root
+        nodes[ROOT_KEY]
+      end
+
+      def node_for_path(path)
+        nodes[Array.wrap(path).join('.')]
+      end
+
+      def add(path)
+        node = ApolloFederation::Tracing::Node.new
+        node_key = path.join('.')
+        key = path.last
+
+        case key
+        when String # field
+          node.response_name = key
+        when Integer # index of an array
+          node.index = key
+        end
+
+        nodes[node_key] = node
+
+        # find or create a parent node and add this node to its children
+        parent_path = path[0..-2]
+        parent_node = nodes[parent_path.join('.')] || add(parent_path)
+        parent_node.child << node
+
+        node
+      end
+
+      def add_error(error)
+        path = Array.wrap(error['path']).join('.')
+        node = nodes[path] || root
+
+        locations = Array.wrap(error['locations']).map do |location|
+          ApolloFederation::Tracing::Location.new(location)
+        end
+
+        node.error << ApolloFederation::Tracing::Error.new(
+          message: error['message'],
+          location: locations,
+          json: JSON.dump(error),
+        )
+      end
+    end
+  end
+end
diff --git a/lib/apollo-federation/tracing/proto.rb b/lib/apollo-federation/tracing/proto.rb
new file mode 100644
index 000000000..cce8dff4d
--- /dev/null
+++ b/lib/apollo-federation/tracing/proto.rb
@@ -0,0 +1,12 @@
+# frozen_string_literal: true
+
+require_relative 'proto/apollo_pb'
+
+module ApolloFederation
+  module Tracing
+    Trace = ::Mdg::Engine::Proto::Trace
+    Node = ::Mdg::Engine::Proto::Trace::Node
+    Location = ::Mdg::Engine::Proto::Trace::Location
+    Error = ::Mdg::Engine::Proto::Trace::Error
+  end
+end
diff --git a/lib/apollo-federation/tracing/proto/apollo.proto b/lib/apollo-federation/tracing/proto/apollo.proto
new file mode 100644
index 000000000..55cb2b39c
--- /dev/null
+++ b/lib/apollo-federation/tracing/proto/apollo.proto
@@ -0,0 +1,488 @@
+syntax = "proto3";
+
+package mdg.engine.proto;
+
+import "google/protobuf/timestamp.proto";
+
+message Trace {
+	message CachePolicy {
+		enum Scope {
+			UNKNOWN = 0;
+			PUBLIC = 1;
+			PRIVATE = 2;
+		}
+
+		Scope scope = 1;
+		int64 max_age_ns = 2; // use 0 for absent, -1 for 0
+	}
+
+	message Details {
+		// The variables associated with this query (unless the reporting agent is
+		// configured to keep them all private). Values are JSON: ie, strings are
+		// enclosed in double quotes, etc.  The value of a private variable is
+		// the empty string.
+		map<string, string> variables_json = 4;
+		// Deprecated. Engineproxy did not encode variable values as JSON, so you
+		// couldn't tell numbers from numeric strings. Send variables_json instead.
+		map<string, bytes> variables = 1;
+		// Optional: this is the original full query before the signature algorithm
+		// is applied.  Engineproxy always sent this in all traces, which meant that
+		// literal-masking done by the signature algorithm didn't fully hide
+		// sensitive data from Engine servers. apollo-engine-reporting does not
+		// include this by default.  (The Engine frontend does not currently show
+		// this field.)
+		string raw_query = 2;
+		// Don't include this in traces inside a FullTracesReport; the operation
+		// name for these traces comes from the key of the traces_per_query map.
+		string operation_name = 3;
+	}
+
+	message Error {
+		string message = 1; // required
+		repeated Location location = 2;
+		uint64 time_ns = 3;
+		string json = 4;
+	}
+
+	message HTTP {
+		message Values {
+			repeated string value = 1;
+		}
+
+		enum Method {
+			UNKNOWN = 0;
+			OPTIONS = 1;
+			GET = 2;
+			HEAD = 3;
+			POST = 4;
+			PUT = 5;
+			DELETE = 6;
+			TRACE = 7;
+			CONNECT = 8;
+			PATCH = 9;
+		}
+		Method method = 1;
+		string host = 2;
+		string path = 3;
+
+		// Should exclude manual blacklist ("Auth" by default)
+		map<string, Values> request_headers = 4;
+		map<string, Values> response_headers = 5;
+
+		uint32 status_code = 6;
+
+		bool secure = 8; // TLS was used
+		string protocol = 9; // by convention "HTTP/1.0", "HTTP/1.1", "HTTP/2" or "h2"
+	}
+
+	message Location {
+		uint32 line = 1;
+		uint32 column = 2;
+	}
+
+	// We store information on each resolver execution as a Node on a tree.
+	// The structure of the tree corresponds to the structure of the GraphQL
+	// response; it does not indicate the order in which resolvers were
+	// invoked.  Note that nodes representing indexes (and the root node)
+	// don't contain all Node fields (eg types and times).
+	message Node {
+		// The name of the field (for Nodes representing a resolver call) or the
+		// index in a list (for intermediate Nodes representing elements of a list).
+		// field_name is the name of the field as it appears in the GraphQL
+		// response: ie, it may be an alias.  (In that case, the original_field_name
+		// field holds the actual field name from the schema.) In any context where
+		// we're building up a path, we use the response_name rather than the
+		// original_field_name.
+		oneof id {
+			string response_name = 1;
+			uint32 index = 2;
+		}
+
+		string original_field_name = 14;
+
+		// The field's return type; e.g. "String!" for User.email:String!
+		string type = 3;
+
+		// The field's parent type; e.g. "User" for User.email:String!
+		string parent_type = 13;
+
+		CachePolicy cache_policy = 5;
+
+		// relative to the trace's start_time, in ns
+		uint64 start_time = 8;
+		// relative to the trace's start_time, in ns
+		uint64 end_time = 9;
+
+		repeated Error error = 11;
+		repeated Node child = 12;
+
+		reserved 4;
+	}
+
+	// represents a node in the query plan, under which there is a trace tree for that service fetch.
+	// In particular, each fetch node represents a call to an implementing service, and calls to implementing
+	// services may not be unique. See https://github.com/apollographql/apollo-server/blob/master/packages/apollo-gateway/src/QueryPlan.ts
+	// for more information and details.
+	message QueryPlanNode {
+		// This represents a set of nodes to be executed sequentially by the Gateway executor
+		message SequenceNode {
+			repeated QueryPlanNode nodes = 1;
+		}
+		// This represents a set of nodes to be executed in parallel by the Gateway executor
+		message ParallelNode {
+			repeated QueryPlanNode nodes = 1;
+		}
+		// This represents a node to send an operation to an implementing service
+		message FetchNode {
+			// XXX When we want to include more details about the sub-operation that was
+			// executed against this service, we should include that here in each fetch node.
+			// This might include an operation signature, requires directive, reference resolutions, etc.
+			string serviceName = 1;
+
+			bool traceParsingFailed = 2;
+
+			// This Trace only contains start_time, end_time, duration_ns, and root;
+			// all timings were calculated **on the federated service**, and clock skew
+			// will be handled by the ingress server.
+			Trace trace = 3;
+
+			// relative to the outer trace's start_time, in ns, measured in the gateway.
+			uint64 sent_time_offset = 4;
+
+			// Wallclock times measured in the gateway for when this operation was
+			// sent and received.
+			google.protobuf.Timestamp sent_time = 5;
+			google.protobuf.Timestamp received_time = 6;
+		}
+
+		// This node represents a way to reach into the response path and attach related entities.
+		// XXX Flatten is really not the right name and this node may be renamed in the query planner.
+		message FlattenNode {
+			repeated ResponsePathElement response_path = 1;
+			QueryPlanNode node = 2;
+		}
+		message ResponsePathElement {
+			oneof id {
+				string field_name = 1;
+				uint32 index = 2;
+			}
+		}
+		oneof node {
+			SequenceNode sequence = 1;
+			ParallelNode parallel = 2;
+			FetchNode fetch = 3;
+			FlattenNode flatten = 4;
+		}
+	}
+
+	// Wallclock time when the trace began.
+	google.protobuf.Timestamp start_time = 4; // required
+	// Wallclock time when the trace ended.
+	google.protobuf.Timestamp end_time = 3; // required
+	// High precision duration of the trace; may not equal end_time-start_time
+	// (eg, if your machine's clock changed during the trace).
+	uint64 duration_ns = 11; // required
+	// A tree containing information about all resolvers run directly by this
+	// service, including errors.
+	Node root = 14;
+
+	// -------------------------------------------------------------------------
+	// Fields below this line are *not* included in federated traces (the traces
+	// sent from federated services to the gateway).
+
+	// In addition to details.raw_query, we include a "signature" of the query,
+	// which can be normalized: for example, you may want to discard aliases, drop
+	// unused operations and fragments, sort fields, etc. The most important thing
+	// here is that the signature match the signature in StatsReports. In
+	// StatsReports signatures show up as the key in the per_query map (with the
+	// operation name prepended).  The signature should be a valid GraphQL query.
+	// All traces must have a signature; if this Trace is in a FullTracesReport
+	// that signature is in the key of traces_per_query rather than in this field.
+	// Engineproxy provides the signature in legacy_signature_needs_resigning
+	// instead.
+	string signature = 19;
+
+	Details details = 6;
+
+	// Note: engineproxy always sets client_name, client_version, and client_address to "none".
+	// apollo-engine-reporting allows for them to be set by the user.
+	string client_name = 7;
+	string client_version = 8;
+	string client_address = 9;
+	string client_reference_id = 23;
+
+	HTTP http = 10;
+
+	CachePolicy cache_policy = 18;
+
+	// If this Trace was created by a gateway, this is the query plan, including
+	// sub-Traces for federated services. Note that the 'root' tree on the
+	// top-level Trace won't contain any resolvers (though it could contain errors
+	// that occurred in the gateway itself).
+	QueryPlanNode query_plan = 26;
+
+	// Was this response served from a full query response cache?  (In that case
+	// the node tree will have no resolvers.)
+	bool full_query_cache_hit = 20;
+
+	// Was this query specified successfully as a persisted query hash?
+	bool persisted_query_hit = 21;
+	// Did this query contain both a full query string and a persisted query hash?
+	// (This typically means that a previous request was rejected as an unknown
+	// persisted query.)
+	bool persisted_query_register = 22;
+
+	// Was this operation registered and a part of the safelist?
+	bool registered_operation = 24;
+
+	// Was this operation forbidden due to lack of safelisting?
+	bool forbidden_operation = 25;
+
+	// --------------------------------------------------------------
+	// Fields below this line are only set by the old Go engineproxy.
+	google.protobuf.Timestamp origin_reported_start_time = 15;
+	google.protobuf.Timestamp origin_reported_end_time = 16;
+	uint64 origin_reported_duration_ns = 17;
+
+	// Older agents (eg the Go engineproxy) relied to some degree on the Engine
+	// backend to run their own semi-compatible implementation of a specific
+	// variant of query signatures. The backend does not do this for new agents (which
+	// set the above 'signature' field). It used to still "re-sign" signatures
+	// from engineproxy, but we've now simplified the backend to no longer do this.
+	// Deprecated and ignored in FullTracesReports.
+	string legacy_signature_needs_resigning = 5;
+
+
+	// removed: Node parse = 12; Node validate = 13;
+	//          Id128 server_id = 1; Id128 client_id = 2;
+	reserved 12, 13, 1, 2;
+}
+
+// The `service` value embedded within the header key is not guaranteed to contain an actual service,
+// and, in most cases, the service information is trusted to come from upstream processing. If the
+// service _is_ specified in this header, then it is checked to match the context that is reporting it.
+// Otherwise, the service information is deduced from the token context of the reporter and then sent
+// along via other mechanisms (in Kafka, the `ReportKafkaKey). The other information (hostname,
+// agent_version, etc.) is sent by the Apollo Engine Reporting agent, but we do not currently save that
+// information to any of our persistent storage.
+message ReportHeader {
+	string service = 3;
+	// eg "host-01.example.com"
+	string hostname = 5;
+
+	// eg "engineproxy 0.1.0"
+	string agent_version = 6; // required
+	// eg "prod-4279-20160804T065423Z-5-g3cf0aa8" (taken from `git describe --tags`)
+	string service_version = 7;
+	// eg "node v4.6.0"
+	string runtime_version = 8;
+	// eg "Linux box 4.6.5-1-ec2 #1 SMP Mon Aug 1 02:31:38 PDT 2016 x86_64 GNU/Linux"
+	string uname = 9;
+	// eg "current", "prod"
+	string schema_tag = 10;
+	// The hex representation of the sha512 of the introspection response
+	string schema_hash = 11;
+}
+
+message PathErrorStats {
+	map<string, PathErrorStats> children = 1;
+	uint64 errors_count = 4;
+	uint64 requests_with_errors_count = 5;
+}
+
+message ClientNameStats {
+	// Duration histogram for non-cache-hit queries.
+	// (See docs/histograms.md for the histogram format.)
+	repeated int64 latency_count = 1;
+	reserved 2; // removed: repeated uint64 error_count = 2;
+	// These per-version fields were used to understand what versions contributed to this sample
+	// when we were implementing the aggregation of this information ourselves using BigTable.
+	// However, since the per-version stats don't separate out latency, it makes more sense to
+	// have stats reported with contextual information so we can have the specific breakdown we're
+	// looking for. These fields are somewhat misleading as we never actually do any per-version
+	// awareness with anything reporting in the legacy "per_client_name" stats, and instead use
+	// "query_stats_with_context" to have more contextual information.
+	map<string, uint64> requests_count_per_version = 3; // required
+	map<string, uint64> cache_hits_per_version = 4;
+	map<string, uint64> persisted_query_hits_per_version = 10;
+	map<string, uint64> persisted_query_misses_per_version = 11;
+	map<string, uint64> registered_operation_count_per_version = 12;
+	map<string, uint64> forbidden_operation_count_per_version = 13;
+	repeated int64 cache_latency_count = 5; // Duration histogram; see docs/histograms.md
+	PathErrorStats root_error_stats = 6;
+	uint64 requests_with_errors_count = 7;
+	// TTL histograms for cache misses for the public cache.
+	repeated int64 public_cache_ttl_count = 8;
+	// TTL histograms for cache misses for the private cache.
+	repeated int64 private_cache_ttl_count = 9;
+}
+
+message QueryLatencyStats {
+	repeated int64 latency_count = 1;
+	uint64 request_count = 2;
+	uint64 cache_hits = 3;
+	uint64 persisted_query_hits = 4;
+	uint64 persisted_query_misses = 5;
+	repeated int64 cache_latency_count = 6;
+	PathErrorStats root_error_stats = 7;
+	uint64 requests_with_errors_count = 8;
+	repeated int64 public_cache_ttl_count = 9;
+	repeated int64 private_cache_ttl_count = 10;
+	uint64 registered_operation_count = 11;
+	uint64 forbidden_operation_count = 12;
+}
+
+message StatsContext {
+	string client_reference_id = 1;
+	string client_name = 2;
+	string client_version = 3;
+}
+
+message ContextualizedQueryLatencyStats {
+	QueryLatencyStats query_latency_stats = 1;
+	StatsContext context = 2;
+}
+
+message ContextualizedTypeStats {
+	StatsContext context = 1;
+	map<string, TypeStat> per_type_stat = 2;
+}
+
+message FieldStat {
+	string name = 2; // deprecated; only set when stored in TypeStat.field
+	string return_type = 3; // required; eg "String!" for User.email:String!
+	uint64 errors_count = 4;
+	uint64 count = 5;
+	uint64 requests_with_errors_count = 6;
+	repeated int64 latency_count = 8; // Duration histogram; see docs/histograms.md
+}
+
+message TypeStat {
+	string name = 1; // deprecated; only set when stored in QueryStats.per_type
+	repeated FieldStat field = 2;  // deprecated; use per_field_stat instead
+	// Key is (eg) "email" for User.email:String!
+	map<string, FieldStat> per_field_stat = 3;
+}
+
+message QueryStats {
+	// Either per_client_name (for back-compat) or query_stats_with_context must be specified. If both are
+	// specified, then query_stats_with_context will be used and per_client_name will be ignored. Although
+	// the fields in ClientNameStats mention things "per-version," the information in the "per-version"
+	// fields will only ever be over the default version, the empty String: "", if arrived at via the
+	// FullTracesAggregator.
+	map<string, ClientNameStats> per_client_name = 1; // deprecated; use stats_with_context instead
+	repeated ContextualizedQueryLatencyStats query_stats_with_context = 4;
+	repeated TypeStat per_type = 2; // deprecated; use type_stats_with_context instead
+	// Key is the parent type, e.g. "User" for User.email:String!
+	map<string, TypeStat> per_type_stat = 3; // deprecated; use type_stats_with_context instead
+	repeated ContextualizedTypeStats type_stats_with_context = 5;
+}
+
+// Top-level message type for the server-side traces endpoint
+message TracesReport {
+	ReportHeader header = 1; // required
+	repeated Trace trace = 2; // required
+}
+
+message Field {
+	string name = 2; // required; eg "email" for User.email:String!
+	string return_type = 3; // required; eg "String!" for User.email:String!
+}
+
+message Type {
+	string name = 1; // required; eg "User" for User.email:String!
+	repeated Field field = 2;
+}
+
+message MemStats {
+	uint64 total_bytes = 1; // MemStats.Sys
+	uint64 stack_bytes = 2; // MemStats.StackSys
+	uint64 heap_bytes = 3; // MemStats.HeapSys
+	uint64 heap_released_bytes = 13; // MemStats.HeapReleased
+	uint64 gc_overhead_bytes = 4; // MemStats.GCSys
+
+	uint64 stack_used_bytes = 5; // MemStats.StackInuse
+	uint64 heap_allocated_bytes = 6; // MemStats.HeapAlloc
+	uint64 heap_allocated_objects = 7; // MemStats.HeapObjects
+
+	uint64 heap_allocated_bytes_delta = 8; // MemStats.TotalAlloc delta
+	uint64 heap_allocated_objects_delta = 9; // MemStats.Mallocs delta
+	uint64 heap_freed_objects_delta = 10; // MemStats.Frees delta
+
+	uint64 gc_stw_ns_delta = 11; // MemStats.PauseTotalNs delta
+	uint64 gc_count_delta = 12; // MemStats.NumGC delta
+}
+
+message TimeStats {
+	uint64 uptime_ns = 1;
+	uint64 real_ns_delta = 2;
+	uint64 user_ns_delta = 3;
+	uint64 sys_ns_delta = 4;
+}
+
+// Top-level message type for the server-side stats endpoint
+message StatsReport {
+	ReportHeader header = 1; // required
+
+	// These fields are about properties of the engineproxy and are not generated
+	// from FullTracesReports.
+	MemStats mem_stats = 2;
+	TimeStats time_stats = 3;
+
+	// Beginning of the period over which stats are collected.
+	google.protobuf.Timestamp start_time = 8;
+	// End of the period of which stats are collected.
+	google.protobuf.Timestamp end_time = 9;
+	// Only used to interpret mem_stats and time_stats; not generated from
+	// FullTracesReports.
+	uint64 realtime_duration = 10;
+
+
+	// Maps from query descriptor to QueryStats. Required unless
+	// legacy_per_query_missing_operation_name is set. The keys are strings of the
+	// form `# operationName\nsignature` (literal hash and space), with
+	// operationName - if there is no operation name.
+	map<string, QueryStats> per_query = 14;
+
+	// Older agents (Go engineproxy) didn't explicitly include the operation name
+	// in the key of this map, and the server had to parse it out (after a
+	// re-signing operation which is no longer performed). The key here is just the query
+	// signature. Deprecated.
+	map<string, QueryStats> legacy_per_query_implicit_operation_name = 12;
+
+	// Deprecated: it was useful in Optics where we had access to the whole schema
+	// but has not been ever used in Engine.  apollo-engine-reporting will not
+	// send it.
+	repeated Type type = 13;
+}
+
+// This is the top-level message used by the new traces ingress. This
+// is designed for the apollo-engine-reporting TypeScript agent and will
+// eventually be documented as a public ingress API. This message consists
+// solely of traces; the equivalent of the StatsReport is automatically
+// generated server-side from this message.  Agents should send traces
+// for all requests in this report.  Generally, buffering up until a large
+// size has been reached (say, 4MB) or 5-10 seconds has passed is appropriate.
+message FullTracesReport {
+	ReportHeader header = 1;
+
+	// key is statsReportKey (# operationName\nsignature) Note that the nested
+	// traces will *not* have a signature or details.operationName (because the
+	// key is adequate).
+	//
+	// We also assume that traces don't have
+	// legacy_per_query_implicit_operation_name, and we don't require them to have
+	// details.raw_query (which would consume a lot of space and has privacy/data
+	// access issues, and isn't currently exposed by our app anyway).
+	map<string, Traces> traces_per_query = 5;
+}
+
+// Just a sequence of traces with the same statsReportKey.
+message Traces {
+	repeated Trace trace = 1;
+}
+
+message TraceV1 {
+	ReportHeader header = 1;
+	Trace trace = 2;
+}
diff --git a/lib/apollo-federation/tracing/proto/apollo_pb.rb b/lib/apollo-federation/tracing/proto/apollo_pb.rb
new file mode 100644
index 000000000..196eefa42
--- /dev/null
+++ b/lib/apollo-federation/tracing/proto/apollo_pb.rb
@@ -0,0 +1,305 @@
+# frozen_string_literal: true
+
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: apollo.proto
+
+require 'google/protobuf'
+
+require 'google/protobuf/timestamp_pb'
+Google::Protobuf::DescriptorPool.generated_pool.build do
+  add_file('apollo.proto', syntax: :proto3) do
+    add_message 'mdg.engine.proto.Trace' do
+      optional :start_time, :message, 4, 'google.protobuf.Timestamp'
+      optional :end_time, :message, 3, 'google.protobuf.Timestamp'
+      optional :duration_ns, :uint64, 11
+      optional :root, :message, 14, 'mdg.engine.proto.Trace.Node'
+      optional :signature, :string, 19
+      optional :details, :message, 6, 'mdg.engine.proto.Trace.Details'
+      optional :client_name, :string, 7
+      optional :client_version, :string, 8
+      optional :client_address, :string, 9
+      optional :client_reference_id, :string, 23
+      optional :http, :message, 10, 'mdg.engine.proto.Trace.HTTP'
+      optional :cache_policy, :message, 18, 'mdg.engine.proto.Trace.CachePolicy'
+      optional :query_plan, :message, 26, 'mdg.engine.proto.Trace.QueryPlanNode'
+      optional :full_query_cache_hit, :bool, 20
+      optional :persisted_query_hit, :bool, 21
+      optional :persisted_query_register, :bool, 22
+      optional :registered_operation, :bool, 24
+      optional :forbidden_operation, :bool, 25
+      optional :origin_reported_start_time, :message, 15, 'google.protobuf.Timestamp'
+      optional :origin_reported_end_time, :message, 16, 'google.protobuf.Timestamp'
+      optional :origin_reported_duration_ns, :uint64, 17
+      optional :legacy_signature_needs_resigning, :string, 5
+    end
+    add_message 'mdg.engine.proto.Trace.CachePolicy' do
+      optional :scope, :enum, 1, 'mdg.engine.proto.Trace.CachePolicy.Scope'
+      optional :max_age_ns, :int64, 2
+    end
+    add_enum 'mdg.engine.proto.Trace.CachePolicy.Scope' do
+      value :UNKNOWN, 0
+      value :PUBLIC, 1
+      value :PRIVATE, 2
+    end
+    add_message 'mdg.engine.proto.Trace.Details' do
+      map :variables_json, :string, :string, 4
+      map :variables, :string, :bytes, 1
+      optional :raw_query, :string, 2
+      optional :operation_name, :string, 3
+    end
+    add_message 'mdg.engine.proto.Trace.Error' do
+      optional :message, :string, 1
+      repeated :location, :message, 2, 'mdg.engine.proto.Trace.Location'
+      optional :time_ns, :uint64, 3
+      optional :json, :string, 4
+    end
+    add_message 'mdg.engine.proto.Trace.HTTP' do
+      optional :method, :enum, 1, 'mdg.engine.proto.Trace.HTTP.Method'
+      optional :host, :string, 2
+      optional :path, :string, 3
+      map :request_headers, :string, :message, 4, 'mdg.engine.proto.Trace.HTTP.Values'
+      map :response_headers, :string, :message, 5, 'mdg.engine.proto.Trace.HTTP.Values'
+      optional :status_code, :uint32, 6
+      optional :secure, :bool, 8
+      optional :protocol, :string, 9
+    end
+    add_message 'mdg.engine.proto.Trace.HTTP.Values' do
+      repeated :value, :string, 1
+    end
+    add_enum 'mdg.engine.proto.Trace.HTTP.Method' do
+      value :UNKNOWN, 0
+      value :OPTIONS, 1
+      value :GET, 2
+      value :HEAD, 3
+      value :POST, 4
+      value :PUT, 5
+      value :DELETE, 6
+      value :TRACE, 7
+      value :CONNECT, 8
+      value :PATCH, 9
+    end
+    add_message 'mdg.engine.proto.Trace.Location' do
+      optional :line, :uint32, 1
+      optional :column, :uint32, 2
+    end
+    add_message 'mdg.engine.proto.Trace.Node' do
+      optional :original_field_name, :string, 14
+      optional :type, :string, 3
+      optional :parent_type, :string, 13
+      optional :cache_policy, :message, 5, 'mdg.engine.proto.Trace.CachePolicy'
+      optional :start_time, :uint64, 8
+      optional :end_time, :uint64, 9
+      repeated :error, :message, 11, 'mdg.engine.proto.Trace.Error'
+      repeated :child, :message, 12, 'mdg.engine.proto.Trace.Node'
+      oneof :id do
+        optional :response_name, :string, 1
+        optional :index, :uint32, 2
+      end
+    end
+    add_message 'mdg.engine.proto.Trace.QueryPlanNode' do
+      oneof :node do
+        optional :sequence, :message, 1, 'mdg.engine.proto.Trace.QueryPlanNode.SequenceNode'
+        optional :parallel, :message, 2, 'mdg.engine.proto.Trace.QueryPlanNode.ParallelNode'
+        optional :fetch, :message, 3, 'mdg.engine.proto.Trace.QueryPlanNode.FetchNode'
+        optional :flatten, :message, 4, 'mdg.engine.proto.Trace.QueryPlanNode.FlattenNode'
+      end
+    end
+    add_message 'mdg.engine.proto.Trace.QueryPlanNode.SequenceNode' do
+      repeated :nodes, :message, 1, 'mdg.engine.proto.Trace.QueryPlanNode'
+    end
+    add_message 'mdg.engine.proto.Trace.QueryPlanNode.ParallelNode' do
+      repeated :nodes, :message, 1, 'mdg.engine.proto.Trace.QueryPlanNode'
+    end
+    add_message 'mdg.engine.proto.Trace.QueryPlanNode.FetchNode' do
+      optional :serviceName, :string, 1
+      optional :traceParsingFailed, :bool, 2
+      optional :trace, :message, 3, 'mdg.engine.proto.Trace'
+      optional :sent_time_offset, :uint64, 4
+      optional :sent_time, :message, 5, 'google.protobuf.Timestamp'
+      optional :received_time, :message, 6, 'google.protobuf.Timestamp'
+    end
+    add_message 'mdg.engine.proto.Trace.QueryPlanNode.FlattenNode' do
+      repeated :response_path, :message, 1, 'mdg.engine.proto.Trace.QueryPlanNode.ResponsePathElement'
+      optional :node, :message, 2, 'mdg.engine.proto.Trace.QueryPlanNode'
+    end
+    add_message 'mdg.engine.proto.Trace.QueryPlanNode.ResponsePathElement' do
+      oneof :id do
+        optional :field_name, :string, 1
+        optional :index, :uint32, 2
+      end
+    end
+    add_message 'mdg.engine.proto.ReportHeader' do
+      optional :service, :string, 3
+      optional :hostname, :string, 5
+      optional :agent_version, :string, 6
+      optional :service_version, :string, 7
+      optional :runtime_version, :string, 8
+      optional :uname, :string, 9
+      optional :schema_tag, :string, 10
+      optional :schema_hash, :string, 11
+    end
+    add_message 'mdg.engine.proto.PathErrorStats' do
+      map :children, :string, :message, 1, 'mdg.engine.proto.PathErrorStats'
+      optional :errors_count, :uint64, 4
+      optional :requests_with_errors_count, :uint64, 5
+    end
+    add_message 'mdg.engine.proto.ClientNameStats' do
+      repeated :latency_count, :int64, 1
+      map :requests_count_per_version, :string, :uint64, 3
+      map :cache_hits_per_version, :string, :uint64, 4
+      map :persisted_query_hits_per_version, :string, :uint64, 10
+      map :persisted_query_misses_per_version, :string, :uint64, 11
+      map :registered_operation_count_per_version, :string, :uint64, 12
+      map :forbidden_operation_count_per_version, :string, :uint64, 13
+      repeated :cache_latency_count, :int64, 5
+      optional :root_error_stats, :message, 6, 'mdg.engine.proto.PathErrorStats'
+      optional :requests_with_errors_count, :uint64, 7
+      repeated :public_cache_ttl_count, :int64, 8
+      repeated :private_cache_ttl_count, :int64, 9
+    end
+    add_message 'mdg.engine.proto.QueryLatencyStats' do
+      repeated :latency_count, :int64, 1
+      optional :request_count, :uint64, 2
+      optional :cache_hits, :uint64, 3
+      optional :persisted_query_hits, :uint64, 4
+      optional :persisted_query_misses, :uint64, 5
+      repeated :cache_latency_count, :int64, 6
+      optional :root_error_stats, :message, 7, 'mdg.engine.proto.PathErrorStats'
+      optional :requests_with_errors_count, :uint64, 8
+      repeated :public_cache_ttl_count, :int64, 9
+      repeated :private_cache_ttl_count, :int64, 10
+      optional :registered_operation_count, :uint64, 11
+      optional :forbidden_operation_count, :uint64, 12
+    end
+    add_message 'mdg.engine.proto.StatsContext' do
+      optional :client_reference_id, :string, 1
+      optional :client_name, :string, 2
+      optional :client_version, :string, 3
+    end
+    add_message 'mdg.engine.proto.ContextualizedQueryLatencyStats' do
+      optional :query_latency_stats, :message, 1, 'mdg.engine.proto.QueryLatencyStats'
+      optional :context, :message, 2, 'mdg.engine.proto.StatsContext'
+    end
+    add_message 'mdg.engine.proto.ContextualizedTypeStats' do
+      optional :context, :message, 1, 'mdg.engine.proto.StatsContext'
+      map :per_type_stat, :string, :message, 2, 'mdg.engine.proto.TypeStat'
+    end
+    add_message 'mdg.engine.proto.FieldStat' do
+      optional :name, :string, 2
+      optional :return_type, :string, 3
+      optional :errors_count, :uint64, 4
+      optional :count, :uint64, 5
+      optional :requests_with_errors_count, :uint64, 6
+      repeated :latency_count, :int64, 8
+    end
+    add_message 'mdg.engine.proto.TypeStat' do
+      optional :name, :string, 1
+      repeated :field, :message, 2, 'mdg.engine.proto.FieldStat'
+      map :per_field_stat, :string, :message, 3, 'mdg.engine.proto.FieldStat'
+    end
+    add_message 'mdg.engine.proto.QueryStats' do
+      map :per_client_name, :string, :message, 1, 'mdg.engine.proto.ClientNameStats'
+      repeated :query_stats_with_context, :message, 4, 'mdg.engine.proto.ContextualizedQueryLatencyStats'
+      repeated :per_type, :message, 2, 'mdg.engine.proto.TypeStat'
+      map :per_type_stat, :string, :message, 3, 'mdg.engine.proto.TypeStat'
+      repeated :type_stats_with_context, :message, 5, 'mdg.engine.proto.ContextualizedTypeStats'
+    end
+    add_message 'mdg.engine.proto.TracesReport' do
+      optional :header, :message, 1, 'mdg.engine.proto.ReportHeader'
+      repeated :trace, :message, 2, 'mdg.engine.proto.Trace'
+    end
+    add_message 'mdg.engine.proto.Field' do
+      optional :name, :string, 2
+      optional :return_type, :string, 3
+    end
+    add_message 'mdg.engine.proto.Type' do
+      optional :name, :string, 1
+      repeated :field, :message, 2, 'mdg.engine.proto.Field'
+    end
+    add_message 'mdg.engine.proto.MemStats' do
+      optional :total_bytes, :uint64, 1
+      optional :stack_bytes, :uint64, 2
+      optional :heap_bytes, :uint64, 3
+      optional :heap_released_bytes, :uint64, 13
+      optional :gc_overhead_bytes, :uint64, 4
+      optional :stack_used_bytes, :uint64, 5
+      optional :heap_allocated_bytes, :uint64, 6
+      optional :heap_allocated_objects, :uint64, 7
+      optional :heap_allocated_bytes_delta, :uint64, 8
+      optional :heap_allocated_objects_delta, :uint64, 9
+      optional :heap_freed_objects_delta, :uint64, 10
+      optional :gc_stw_ns_delta, :uint64, 11
+      optional :gc_count_delta, :uint64, 12
+    end
+    add_message 'mdg.engine.proto.TimeStats' do
+      optional :uptime_ns, :uint64, 1
+      optional :real_ns_delta, :uint64, 2
+      optional :user_ns_delta, :uint64, 3
+      optional :sys_ns_delta, :uint64, 4
+    end
+    add_message 'mdg.engine.proto.StatsReport' do
+      optional :header, :message, 1, 'mdg.engine.proto.ReportHeader'
+      optional :mem_stats, :message, 2, 'mdg.engine.proto.MemStats'
+      optional :time_stats, :message, 3, 'mdg.engine.proto.TimeStats'
+      optional :start_time, :message, 8, 'google.protobuf.Timestamp'
+      optional :end_time, :message, 9, 'google.protobuf.Timestamp'
+      optional :realtime_duration, :uint64, 10
+      map :per_query, :string, :message, 14, 'mdg.engine.proto.QueryStats'
+      map :legacy_per_query_implicit_operation_name, :string, :message, 12, 'mdg.engine.proto.QueryStats'
+      repeated :type, :message, 13, 'mdg.engine.proto.Type'
+    end
+    add_message 'mdg.engine.proto.FullTracesReport' do
+      optional :header, :message, 1, 'mdg.engine.proto.ReportHeader'
+      map :traces_per_query, :string, :message, 5, 'mdg.engine.proto.Traces'
+    end
+    add_message 'mdg.engine.proto.Traces' do
+      repeated :trace, :message, 1, 'mdg.engine.proto.Trace'
+    end
+    add_message 'mdg.engine.proto.TraceV1' do
+      optional :header, :message, 1, 'mdg.engine.proto.ReportHeader'
+      optional :trace, :message, 2, 'mdg.engine.proto.Trace'
+    end
+  end
+end
+
+module Mdg
+  module Engine
+    module Proto
+      Trace = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.Trace').msgclass
+      Trace::CachePolicy = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.Trace.CachePolicy').msgclass
+      Trace::CachePolicy::Scope = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.Trace.CachePolicy.Scope').enummodule
+      Trace::Details = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.Trace.Details').msgclass
+      Trace::Error = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.Trace.Error').msgclass
+      Trace::HTTP = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.Trace.HTTP').msgclass
+      Trace::HTTP::Values = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.Trace.HTTP.Values').msgclass
+      Trace::HTTP::Method = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.Trace.HTTP.Method').enummodule
+      Trace::Location = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.Trace.Location').msgclass
+      Trace::Node = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.Trace.Node').msgclass
+      Trace::QueryPlanNode = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.Trace.QueryPlanNode').msgclass
+      Trace::QueryPlanNode::SequenceNode = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.Trace.QueryPlanNode.SequenceNode').msgclass
+      Trace::QueryPlanNode::ParallelNode = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.Trace.QueryPlanNode.ParallelNode').msgclass
+      Trace::QueryPlanNode::FetchNode = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.Trace.QueryPlanNode.FetchNode').msgclass
+      Trace::QueryPlanNode::FlattenNode = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.Trace.QueryPlanNode.FlattenNode').msgclass
+      Trace::QueryPlanNode::ResponsePathElement = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.Trace.QueryPlanNode.ResponsePathElement').msgclass
+      ReportHeader = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.ReportHeader').msgclass
+      PathErrorStats = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.PathErrorStats').msgclass
+      ClientNameStats = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.ClientNameStats').msgclass
+      QueryLatencyStats = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.QueryLatencyStats').msgclass
+      StatsContext = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.StatsContext').msgclass
+      ContextualizedQueryLatencyStats = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.ContextualizedQueryLatencyStats').msgclass
+      ContextualizedTypeStats = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.ContextualizedTypeStats').msgclass
+      FieldStat = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.FieldStat').msgclass
+      TypeStat = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.TypeStat').msgclass
+      QueryStats = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.QueryStats').msgclass
+      TracesReport = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.TracesReport').msgclass
+      Field = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.Field').msgclass
+      Type = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.Type').msgclass
+      MemStats = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.MemStats').msgclass
+      TimeStats = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.TimeStats').msgclass
+      StatsReport = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.StatsReport').msgclass
+      FullTracesReport = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.FullTracesReport').msgclass
+      Traces = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.Traces').msgclass
+      TraceV1 = Google::Protobuf::DescriptorPool.generated_pool.lookup('mdg.engine.proto.TraceV1').msgclass
+    end
+  end
+end
diff --git a/lib/apollo-federation/tracing/tracer.rb b/lib/apollo-federation/tracing/tracer.rb
new file mode 100644
index 000000000..a8e6cee14
--- /dev/null
+++ b/lib/apollo-federation/tracing/tracer.rb
@@ -0,0 +1,149 @@
+# frozen_string_literal: true
+
+module ApolloFederation
+  module Tracing
+    module Tracer
+      # store string constants to avoid creating new strings for each call to .trace
+      EXECUTE_QUERY = 'execute_query'
+      EXECUTE_QUERY_LAZY = 'execute_query_lazy'
+      EXECUTE_FIELD = 'execute_field'
+      EXECUTE_FIELD_LAZY = 'execute_field_lazy'
+
+      def self.trace(key, data, &block)
+        case key
+        when EXECUTE_QUERY
+          execute_query(data, &block)
+        when EXECUTE_QUERY_LAZY
+          execute_query_lazy(data, &block)
+        when EXECUTE_FIELD
+          execute_field(data, &block)
+        when EXECUTE_FIELD_LAZY
+          execute_field_lazy(data, &block)
+        else
+          yield
+        end
+      end
+
+      # Step 1:
+      # Create a trace hash on the query context and record start times.
+      def self.execute_query(data, &block)
+        query = data.fetch(:query)
+        return block.call unless query.context && query.context[:tracing_enabled]
+
+        query.context.namespace(ApolloFederation::Tracing::KEY).merge!(
+          start_time: Time.now.utc,
+          start_time_nanos: Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond),
+          node_map: NodeMap.new,
+        )
+
+        block.call
+      end
+
+      # Step 4:
+      # Record end times and merge them into the trace hash.
+      def self.execute_query_lazy(data, &block)
+        result = block.call
+
+        query = data.fetch(:query)
+        return result unless query.context && query.context[:tracing_enabled]
+
+        trace = query.context.namespace(ApolloFederation::Tracing::KEY)
+
+        trace.merge!(
+          end_time: Time.now.utc,
+          end_time_nanos: Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond),
+        )
+
+        result
+      end
+
+      # Step 2:
+      # * Record start and end times for the field resolver.
+      # * Rescue errors so the method doesn't exit early.
+      # * Create a trace "node" and attach field details.
+      # * Propagate the error (if necessary) so it ends up in the top-level errors array.
+      #
+      # The values in `data` are different depending on the executor runtime.
+      # https://graphql-ruby.org/api-doc/1.9.3/GraphQL/Tracing
+      #
+      # Nodes are added the NodeMap stored in the trace hash.
+      #
+      # Errors are added to nodes in `ApolloFederation::Tracing.attach_trace_to_result`
+      # because we don't have the error `location` here.
+      def self.execute_field(data, &block)
+        context = data.fetch(:context) || data.fetch(:query).context
+        return block.call unless context && context[:tracing_enabled]
+
+        start_time_nanos = Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond)
+
+        begin
+          result = block.call
+        rescue StandardError => e
+          error = e
+        end
+
+        end_time_nanos = Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond)
+
+        # interpreter runtime
+        if data.include?(:context)
+          path = context.path
+          field_name = context.field.graphql_name
+          field_type = context.field.type.to_s
+          parent_type = context.parent_type.graphql_name
+        else # legacy runtime
+          path = data.fetch(:path)
+          field_name = data.fetch(:field).graphql_name
+          field_type = data.fetch(:field).type.unwrap.graphql_name
+          parent_type = data.fetch(:owner).graphql_name
+        end
+
+        trace = context.namespace(ApolloFederation::Tracing::KEY)
+        node = trace[:node_map].add(path)
+
+        # original_field_name is set only for aliased fields
+        node.original_field_name = field_name if field_name != path.last
+        node.type = field_type
+        node.parent_type = parent_type
+        node.start_time = start_time_nanos - trace[:start_time_nanos]
+        node.end_time = end_time_nanos - trace[:start_time_nanos]
+
+        raise error if error
+
+        result
+      end
+
+      # Optional Step 3:
+      # Overwrite the end times on the trace node if the resolver was lazy.
+      def self.execute_field_lazy(data, &block)
+        context = data.fetch(:context) || data.fetch(:query).context
+        return block.call unless context && context[:tracing_enabled]
+
+        begin
+          result = block.call
+        rescue StandardError => e
+          error = e
+        end
+
+        end_time_nanos = Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond)
+
+        # interpreter runtime
+        if data.include?(:context)
+          context = data.fetch(:context)
+          path = context.path
+        else # legacy runtime
+          context = data.fetch(:query).context
+          path = data.fetch(:path)
+        end
+
+        trace = context.namespace(ApolloFederation::Tracing::KEY)
+
+        node = trace[:node_map].node_for_path(path)
+        node.end_time = end_time_nanos - trace[:start_time_nanos]
+
+        raise error if error
+
+        result
+      end
+    end
+  end
+end
diff --git a/spec/apollo-federation/tracing/node_map_spec.rb b/spec/apollo-federation/tracing/node_map_spec.rb
new file mode 100644
index 000000000..9def9da4a
--- /dev/null
+++ b/spec/apollo-federation/tracing/node_map_spec.rb
@@ -0,0 +1,71 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+require 'apollo-federation/tracing/node_map'
+
+RSpec.describe ApolloFederation::Tracing::NodeMap do
+  it 'creates parents for any deeply nested paths' do
+    map = described_class.new
+    map.add(['_entities', 0, 'reviews', 0, 'author', 'name'])
+    expect(map.nodes.keys).to eq [
+      '',
+      '_entities.0.reviews.0.author.name',
+      '_entities.0.reviews.0.author',
+      '_entities.0.reviews.0',
+      '_entities.0.reviews',
+      '_entities.0',
+      '_entities',
+    ]
+  end
+
+  it 'correctly assigns the node id for index paths' do
+    map = described_class.new
+    map.add(['_entities', 4, 'reviews'])
+    expect(map.node_for_path('_entities.4').index).to eq 4
+  end
+
+  it 'correctly assigns the node id for named paths' do
+    map = described_class.new
+    map.add(['_entities', 4, 'reviews'])
+    expect(map.node_for_path('_entities.4.reviews').response_name).to eq 'reviews'
+  end
+
+  it 'adds errors by path' do
+    map = described_class.new
+    map.add(['_entities', 4, 'reviews'])
+
+    error = {
+      'path' => ['_entities', 4, 'reviews'],
+      'message' => 'whoops',
+      'locations' => ['line' => 2, 'column' => 4],
+    }
+    map.add_error(error)
+
+    expect(map.node_for_path('_entities.4.reviews').error).to eq [
+      ApolloFederation::Tracing::Error.new(
+        json: JSON.dump(error),
+        location: [ApolloFederation::Tracing::Location.new(line: 2, column: 4)],
+        message: 'whoops',
+      ),
+    ]
+  end
+
+  it 'adds errors to the root object when no node is found' do
+    map = described_class.new
+
+    error = {
+      'path' => ['_entities', 4, 'reviews'],
+      'message' => 'whoops',
+      'locations' => ['line' => 2, 'column' => 4],
+    }
+    map.add_error(error)
+
+    expect(map.root.error).to eq [
+      ApolloFederation::Tracing::Error.new(
+        json: JSON.dump(error),
+        location: [ApolloFederation::Tracing::Location.new(line: 2, column: 4)],
+        message: 'whoops',
+      ),
+    ]
+  end
+end
diff --git a/spec/apollo-federation/tracing_spec.rb b/spec/apollo-federation/tracing_spec.rb
new file mode 100644
index 000000000..8a8e8b271
--- /dev/null
+++ b/spec/apollo-federation/tracing_spec.rb
@@ -0,0 +1,258 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+require 'apollo-federation'
+
+RSpec.describe ApolloFederation::Tracing do
+  let(:base_schema) do
+    Class.new(GraphQL::Schema) do
+      use ApolloFederation::Tracing
+    end
+  end
+
+  # configure clocks to increment by 1 for each call
+  before do
+    t = Time.new(2019, 8, 4, 12, 0, 0, '+00:00')
+    allow(Time).to receive(:now) { t += 1 }
+
+    # nanos are used for durations and offsets, so you'll never see 42, 43, ...
+    # instead, you'll see the difference from the first call (the start time)
+    # which will be 1, 2, 3 ...
+    ns = 42
+    allow(Process).to receive(:clock_gettime)
+      .with(Process::CLOCK_MONOTONIC, :nanosecond) { ns += 1 }
+  end
+
+  describe 'respecting options on context' do
+    let(:schema) do
+      query_obj = Class.new(GraphQL::Schema::Object) do
+        graphql_name 'Query'
+
+        field :test, String, null: false
+
+        def test
+          'hello world'
+        end
+      end
+
+      Class.new(base_schema) do
+        query query_obj
+      end
+    end
+
+    it 'does not add tracing extension by default' do
+      result = schema.execute('{ test }')
+      described_class.attach_trace_to_result(result)
+      expect(result[:extensions]).to be_nil
+    end
+
+    it 'adds the extensions.ftv1 when the context has tracing_enabled: true' do
+      result = schema.execute('{ test }', context: { tracing_enabled: true })
+      described_class.attach_trace_to_result(result)
+      expect(result[:extensions][:ftv1]).not_to be_nil
+    end
+
+    it 'adds debugging info when the context has debug_tracing: true' do
+      result = schema.execute('{ test }', context: { tracing_enabled: true, debug_tracing: true })
+      described_class.attach_trace_to_result(result)
+      expect(result[:extensions][:ftv1_debug]).not_to be_nil
+    end
+  end
+
+  describe 'building the trace tree' do
+    let(:schema) do
+      grandchild_obj = Class.new(GraphQL::Schema::Object) do
+        graphql_name 'Grandchild'
+
+        field :id, String, null: false
+      end
+
+      child_obj = Class.new(GraphQL::Schema::Object) do
+        graphql_name 'Child'
+
+        field :id, String, null: false
+        field :grandchild, grandchild_obj, null: false
+
+        def grandchild
+          { id: 'grandchild' }
+        end
+      end
+
+      parent_obj = Class.new(GraphQL::Schema::Object) do
+        graphql_name 'Parent'
+
+        field :id, String, null: false
+        field :child, child_obj, null: false
+
+        def child
+          { id: 'child' }
+        end
+      end
+
+      query_obj = Class.new(GraphQL::Schema::Object) do
+        graphql_name 'Query'
+
+        field :parent, parent_obj, null: false
+
+        def parent
+          { id: 'parent' }
+        end
+      end
+
+      Class.new(base_schema) do
+        query query_obj
+      end
+    end
+
+    it 'records timing for children' do
+      result = schema.execute(
+        '{ parent { id, child { id, grandchild { id } } } }',
+        context: { tracing_enabled: true },
+      )
+      described_class.attach_trace_to_result(result)
+
+      trace = ApolloFederation::Tracing::Trace.decode(Base64.decode64(result[:extensions][:ftv1]))
+      expect(trace).to eq(ApolloFederation::Tracing::Trace.new(
+                            start_time: { seconds: 1_564_920_001, nanos: 0 },
+                            end_time: { seconds: 1_564_920_002, nanos: 0 },
+                            duration_ns: 13,
+                            root: {
+                              child: [{
+                                response_name: 'parent',
+                                type: 'Parent!',
+                                start_time: 1,
+                                end_time: 2,
+                                parent_type: 'Query',
+                                child: [{
+                                  response_name: 'id',
+                                  type: 'String!',
+                                  start_time: 3,
+                                  end_time: 4,
+                                  parent_type: 'Parent',
+                                }, {
+                                  response_name: 'child',
+                                  type: 'Child!',
+                                  start_time: 5,
+                                  end_time: 6,
+                                  parent_type: 'Parent',
+                                  child: [{
+                                    response_name: 'id',
+                                    type: 'String!',
+                                    start_time: 7,
+                                    end_time: 8,
+                                    parent_type: 'Child',
+                                  }, {
+                                    response_name: 'grandchild',
+                                    type: 'Grandchild!',
+                                    start_time: 9,
+                                    end_time: 10,
+                                    parent_type: 'Child',
+                                    child: [{
+                                      response_name: 'id',
+                                      type: 'String!',
+                                      start_time: 11,
+                                      end_time: 12,
+                                      parent_type: 'Grandchild',
+                                    }],
+                                  },],
+                                },],
+                              }],
+                            },
+                          ))
+    end
+  end
+
+  describe 'indices and errors' do
+    let(:schema) do
+      item_obj = Class.new(GraphQL::Schema::Object) do
+        graphql_name 'Item'
+
+        field :id, String, null: false
+        field :name, String, null: false
+
+        def name
+          raise GraphQL::ExecutionError, "Can't continue with this query" if object[:id] == '2'
+
+          "Item #{object[:id]}"
+        end
+      end
+
+      query_obj = Class.new(GraphQL::Schema::Object) do
+        graphql_name 'Query'
+
+        field :items, [item_obj], null: false
+
+        def items
+          [{ id: '1' }, { id: '2' }]
+        end
+      end
+
+      Class.new(base_schema) do
+        query query_obj
+      end
+    end
+
+    let(:trace) do
+      result = schema.execute('{ items { id, name } }', context: { tracing_enabled: true })
+      described_class.attach_trace_to_result(result)
+      ApolloFederation::Tracing::Trace.decode(Base64.decode64(result[:extensions][:ftv1]))
+    end
+
+    it 'records index instead of response_name for items in arrays' do
+      expect(trace).to eq ApolloFederation::Tracing::Trace.new(
+        start_time: { seconds: 1_564_920_001, nanos: 0 },
+        end_time: { seconds: 1_564_920_002, nanos: 0 },
+        duration_ns: 11,
+        root: {
+          child: [{
+            response_name: 'items',
+            type: '[Item!]!',
+            start_time: 1,
+            end_time: 2,
+            parent_type: 'Query',
+            child: [
+              {
+                index: 0,
+                child: [{
+                  response_name: 'id',
+                  type: 'String!',
+                  start_time: 3,
+                  end_time: 4,
+                  parent_type: 'Item',
+                }, {
+                  response_name: 'name',
+                  type: 'String!',
+                  start_time: 5,
+                  end_time: 6,
+                  parent_type: 'Item',
+                },],
+              },
+              {
+                index: 1,
+                child: [{
+                  response_name: 'id',
+                  type: 'String!',
+                  start_time: 7,
+                  end_time: 8,
+                  parent_type: 'Item',
+                }, {
+                  response_name: 'name',
+                  type: 'String!',
+                  start_time: 9,
+                  end_time: 10,
+                  parent_type: 'Item',
+                  error: [{
+                    message: "Can't continue with this query",
+                    location: [{ line: 1, column: 15 }],
+                    json: %({"message":"Can't continue with this query",\
+"locations":[{"line":1,"column":15}],"path":["items",1,"name"]}),
+                  }],
+                },],
+              },
+            ],
+          }],
+        },
+      )
+    end
+  end
+end