diff --git a/docs/common/headers_optional_read_limits.md b/docs/common/headers_optional_read_limits.md
new file mode 100644
index 0000000000..1f262eb220
--- /dev/null
+++ b/docs/common/headers_optional_read_limits.md
@@ -0,0 +1,6 @@
+- `M3-Limit-Max-Series`:  
+ If this header is set it will override any configured per query time series limit. If the limit is hit, it will either return a partial result or an error based on the require exhaustive configuration set.<br />
+- `M3-Limit-Max-Docs`:  
+ If this header is set it will override any configured per query time series * blocks limit (docs limit). If the limit is hit, it will either return a partial result or an error based on the require exhaustive configuration set.<br />
+- `M3-Limit-Require-Exhaustive`:  
+ If this header is set it will override any configured require exhaustive setting. If "true" it will return an error if query hits a configured limit (such as series or docs limit) instead of a partial result. Otherwise if "false" it will return a partial result of the time series already matched with the response header `M3-Results-Limited` detailing the limit that was hit and a warning included in the response body.<br /><br />
\ No newline at end of file
diff --git a/docs/common/headers_optional_read_write.md b/docs/common/headers_optional_read_write_all.md
similarity index 100%
rename from docs/common/headers_optional_read_write.md
rename to docs/common/headers_optional_read_write_all.md
diff --git a/docs/common/headers_optional_write.md b/docs/common/headers_optional_write_all.md
similarity index 100%
rename from docs/common/headers_optional_write.md
rename to docs/common/headers_optional_write_all.md
diff --git a/docs/m3coordinator/api/remote.md b/docs/m3coordinator/api/remote.md
index 813264e33a..7fc5f98d50 100644
--- a/docs/m3coordinator/api/remote.md
+++ b/docs/m3coordinator/api/remote.md
@@ -23,10 +23,10 @@ None.
 #### Optional
 
 --8<--
-docs/common/headers_optional_read_write.md
+docs/common/headers_optional_read_write_all.md
 --8<--
 --8<--
-docs/common/headers_optional_write.md
+docs/common/headers_optional_write_all.md
 --8<--
 
 ### Data Params
@@ -97,7 +97,10 @@ None.
 #### Optional
 
 --8<--
-docs/common/headers_optional_read_write.md
+docs/common/headers_optional_read_write_all.md
+--8<--
+--8<--
+docs/common/headers_optional_read_limits.md
 --8<--
 
 ### Data Params
diff --git a/docs/m3query/api/index.md b/docs/m3query/api/index.md
index c6e2e2f68a..c123fbd13d 100644
--- a/docs/m3query/api/index.md
+++ b/docs/m3query/api/index.md
@@ -33,7 +33,10 @@ Query using PromQL and returns JSON datapoints compatible with the Prometheus Gr
 #### Optional
 
 --8<--
-docs/common/headers_optional_read_write.md
+docs/common/headers_optional_read_write_all.md
+--8<--
+--8<--
+docs/common/headers_optional_read_limits.md
 --8<--
 
 ### Data Params
diff --git a/docs/operational_guide/resource_limits.md b/docs/operational_guide/resource_limits.md
index 9948f7564b..15924ee0bf 100644
--- a/docs/operational_guide/resource_limits.md
+++ b/docs/operational_guide/resource_limits.md
@@ -6,14 +6,17 @@ performance of M3 in a production environment.
 
 ## M3DB
 
+### Configuring limits
+
 The best way to get started protecting M3DB nodes is to set a few limits on the
 top level `limits` config stanza for M3DB.
 
-When using M3DB for metrics workloads queries arrive as a set of matchers 
+When using M3DB for metrics workloads, queries arrive as a set of matchers 
 that select time series based on certain dimensions. The primary mechanism to 
 protect against these matchers matching huge amounts of data in an unbounded 
 way is to set a maximum limit for the amount of time series blocks allowed to
-be matched and consequently read in a given time window. This can be done using `maxRecentlyQueriedSeriesBlocks` to set a maximum value and lookback time window 
+be matched and consequently read in a given time window. This can be done using 
+`maxRecentlyQueriedSeriesBlocks` to set a maximum value and lookback time window 
 to determine the duration over which the max limit is enforced.
 
 You can use the Prometheus query `rate(query_stats_total_docs_per_block[1m])` to 
@@ -54,3 +57,77 @@ limits:
   # exhaustion from reads.
   maxOutstandingReadRequests: 0
 ```
+
+## M3 Query and M3 Coordinator
+
+### Deployment
+
+Protecting queries impacting your ingestion of metrics for metrics workloads 
+can first and foremost be done by deploying M3 Query and M3 Coordinator 
+independently. That is, for writes to M3 use a dedicated deployment of 
+M3 Coordinator instances, and then for queries to M3 use a dedicated deployment 
+of M3 Query instances.
+
+This ensures when M3 Query instances become busy and are starved of resources 
+serving an unexpected query load, they will not interrupt the flow of metrics
+being ingested to M3.
+
+### Configuring limits
+
+To protect against individual queries using too many resources, you can specify some
+sane limits in the M3 Query (and consequently M3 Coordinator) configuration 
+file under the top level `limits` config stanza.
+
+There are two types of limits:
+
+- Per query time series limit
+- Per query time series * blocks limit (docs limit)
+
+When either of these limits are hit, you can define the behavior you would like, 
+either to return an error when this limit is hit, or to return a partial result 
+with the response header `M3-Results-Limited` detailing the limit that was hit 
+and a warning included in the response body.
+
+### Annotated configuration
+
+```
+limits:
+  # If set will override default limits set per query.
+  perQuery:
+    # If set limits the number of time series returned for any given 
+    # individual storage node per query, before returning result to query 
+    # service.
+    maxFetchedSeries: 0
+
+    # If set limits the number of index documents matched for any given 
+    # individual storage node per query, before returning result to query 
+    # service.
+    # This equates to the number of time series * number of blocks, so for 
+    # 100 time series matching 4 hours of data for a namespace using a 2 hour 
+    # block size, that would result in matching 200 index documents.
+    maxFetchedDocs: 0
+
+    # If true this results in causing a query error if the query exceeds 
+    # the series or blocks limit for any given individual storage node per query.
+    requireExhaustive: false
+
+    # If set this limits the max number of datapoints allowed to be used by a
+    # given query. This is applied at the query service after the result has 
+    # been returned by a storage node.
+    maxFetchedDatapoints: 0
+
+  # If set will override default limits set globally.
+  global:
+    # If set this limits the max number of datapoints allowed to be used by all
+    # queries at any point in time, this is applied at the query service after 
+    # the result has been returned by a storage node.
+    maxFetchedDatapoints: 0
+```
+
+### Headers
+
+The following headers can also be used to override configured limits on a per request basis (to allow for different limits dependent on caller):
+
+--8<--
+docs/common/headers_optional_read_limits.md
+--8<--
diff --git a/docs/performance/index.md b/docs/performance/index.md
deleted file mode 100644
index 8767efd545..0000000000
--- a/docs/performance/index.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# Performance configurations and settings for M3DB and m3query
-
-**Please note:** This documentation is a work in progress and more detail is required.
diff --git a/docs/performance/m3db/index.md b/docs/performance/m3db/index.md
deleted file mode 100644
index 5133851822..0000000000
--- a/docs/performance/m3db/index.md
+++ /dev/null
@@ -1,3 +0,0 @@
-## Performance configurations
-
-<!-- TODO: Fill this in -->
diff --git a/docs/performance/m3query/index.md b/docs/performance/m3query/index.md
deleted file mode 100644
index aec029afbc..0000000000
--- a/docs/performance/m3query/index.md
+++ /dev/null
@@ -1,3 +0,0 @@
-## Performance configurations
-
-Below are some common configurations related to performance for the query engine.
diff --git a/mkdocs.yml b/mkdocs.yml
index 9c9b66533f..398005f476 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -116,12 +116,6 @@ pages:
     - "Graphite": "integrations/graphite.md"
     - "Grafana": "integrations/grafana.md"
     - "InfluxDB": "integrations/influxdb.md"
-  - "Performance":
-    - "Introduction": "performance/index.md"
-    - "M3DB":
-      - "M3DB Performance": "performance/m3db/index.md"
-    - "m3query":
-      - "m3query Performance":  "performance/m3query/index.md"
   - "Troubleshooting": "troubleshooting/index.md"
   - "FAQs": "faqs/index.md"
   - "Glossary": "glossary/index.md"
diff --git a/src/cmd/services/m3query/config/config.go b/src/cmd/services/m3query/config/config.go
index 24c65e90b0..c0e390cfd4 100644
--- a/src/cmd/services/m3query/config/config.go
+++ b/src/cmd/services/m3query/config/config.go
@@ -252,15 +252,15 @@ func (c PrometheusQueryConfiguration) MaxSamplesPerQueryOrDefault() int {
 // LimitsConfiguration represents limitations on resource usage in the query
 // instance. Limits are split between per-query and global limits.
 type LimitsConfiguration struct {
-	// deprecated: use PerQuery.MaxComputedDatapoints instead.
-	DeprecatedMaxComputedDatapoints int `yaml:"maxComputedDatapoints"`
+	// PerQuery configures limits which apply to each query individually.
+	PerQuery PerQueryLimitsConfiguration `yaml:"perQuery"`
 
 	// Global configures limits which apply across all queries running on this
 	// instance.
 	Global GlobalLimitsConfiguration `yaml:"global"`
 
-	// PerQuery configures limits which apply to each query individually.
-	PerQuery PerQueryLimitsConfiguration `yaml:"perQuery"`
+	// deprecated: use PerQuery.MaxComputedDatapoints instead.
+	DeprecatedMaxComputedDatapoints int `yaml:"maxComputedDatapoints"`
 }
 
 // MaxComputedDatapoints is a getter providing backwards compatibility between
@@ -279,8 +279,9 @@ func (lc LimitsConfiguration) MaxComputedDatapoints() int {
 // GlobalLimitsConfiguration represents limits on resource usage across a query
 // instance. Zero or negative values imply no limit.
 type GlobalLimitsConfiguration struct {
-	// MaxFetchedDatapoints limits the total number of datapoints actually
-	// fetched by all queries at any given time.
+	// MaxFetchedDatapoints limits the max number of datapoints allowed to be
+	// used by all queries at any point in time, this is applied at the query
+	// service after the result has been returned by a storage node.
 	MaxFetchedDatapoints int `yaml:"maxFetchedDatapoints"`
 }
 
@@ -293,6 +294,24 @@ func (l *GlobalLimitsConfiguration) AsLimitManagerOptions() cost.LimitManagerOpt
 // PerQueryLimitsConfiguration represents limits on resource usage within a
 // single query. Zero or negative values imply no limit.
 type PerQueryLimitsConfiguration struct {
+	// MaxFetchedSeries limits the number of time series returned for any given
+	// individual storage node per query, before returning result to query
+	// service.
+	MaxFetchedSeries int `yaml:"maxFetchedSeries"`
+
+	// MaxFetchedDocs limits the number of index documents matched for any given
+	// individual storage node per query, before returning result to query
+	// service.
+	MaxFetchedDocs int `yaml:"maxFetchedDocs"`
+
+	// RequireExhaustive results in an error if the query exceeds any limit.
+	RequireExhaustive bool `yaml:"requireExhaustive"`
+
+	// MaxFetchedDatapoints limits the max number of datapoints allowed to be
+	// used by a given query, this is applied at the query service after the
+	// result has been returned by a storage node.
+	MaxFetchedDatapoints int `yaml:"maxFetchedDatapoints"`
+
 	// PrivateMaxComputedDatapoints limits the number of datapoints that can be
 	// returned by a query. It's determined purely
 	// from the size of the time range and the step size (end - start / step).
@@ -301,19 +320,6 @@ type PerQueryLimitsConfiguration struct {
 	// LimitsConfiguration.MaxComputedDatapoints() instead of accessing
 	// this field directly.
 	PrivateMaxComputedDatapoints int `yaml:"maxComputedDatapoints"`
-
-	// MaxFetchedDatapoints limits the number of datapoints actually used by a
-	// given query.
-	MaxFetchedDatapoints int `yaml:"maxFetchedDatapoints"`
-
-	// MaxFetchedSeries limits the number of time series returned by a storage node.
-	MaxFetchedSeries int `yaml:"maxFetchedSeries"`
-
-	// MaxFetchedDocs limits the number of index documents matched by a query.
-	MaxFetchedDocs int `yaml:"maxFetchedDocs"`
-
-	// RequireExhaustive results in an error if the query exceeds the series limit.
-	RequireExhaustive bool `yaml:"requireExhaustive"`
 }
 
 // AsLimitManagerOptions converts this configuration to