From 8e5fc200b5de7298c0824b257b686e9ee4b27955 Mon Sep 17 00:00:00 2001
From: Petar Dzepina <petar.dzepina@gmail.com>
Date: Thu, 28 Jul 2022 21:30:28 +0200
Subject: [PATCH] Add doc_count field mapper (#3985)

Bucket aggregations compute bucket doc_count values by incrementing
the doc_count by 1 for every document collected in the bucket.

When using summary fields (such as aggregate_metric_double) one
field may represent more than one document. To provide this
functionality this commit implements a new field mapper (named
doc_count field mapper). This field is a positive integer representing
the number of documents aggregated in a single summary field.

Bucket aggregations check if a field of type doc_count exists in a
document and take this value into consideration when computing doc
counts.

Note: This originated from upstream PR 64503.

Signed-off-by: Petar Dzepina <petar.dzepina@gmail.com>
(cherry picked from commit fb7d81a7c00df14223e34511a6b1b5a3eff44e65)
---
 .../380_doc_count_field.yml                   | 146 ++++++++++++++++
 .../index/mapper/DocCountFieldMapper.java     | 164 ++++++++++++++++++
 .../org/opensearch/indices/IndicesModule.java |   2 +
 .../search/aggregations/AggregatorBase.java   |   4 +-
 .../bucket/BucketsAggregator.java             |  37 ++--
 .../aggregations/bucket/DocCountProvider.java |  62 +++++++
 .../adjacency/AdjacencyMatrixAggregator.java  |   4 +-
 .../bucket/composite/CompositeAggregator.java |   5 +-
 .../CompositeValuesCollectorQueue.java        |  22 +--
 .../bucket/composite/SortedDocsProducer.java  |   6 +-
 .../bucket/nested/NestedAggregator.java       |   3 +-
 .../GlobalOrdinalsStringTermsAggregator.java  |  13 +-
 .../mapper/DocCountFieldMapperTests.java      |  83 +++++++++
 .../index/mapper/DocCountFieldTypeTests.java  |  69 ++++++++
 .../indices/IndicesModuleTests.java           |   2 +
 .../bucket/DocCountProviderTests.java         | 107 ++++++++++++
 .../CompositeValuesCollectorQueueTests.java   |   2 +-
 17 files changed, 692 insertions(+), 39 deletions(-)
 create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/380_doc_count_field.yml
 create mode 100644 server/src/main/java/org/opensearch/index/mapper/DocCountFieldMapper.java
 create mode 100644 server/src/main/java/org/opensearch/search/aggregations/bucket/DocCountProvider.java
 create mode 100644 server/src/test/java/org/opensearch/index/mapper/DocCountFieldMapperTests.java
 create mode 100644 server/src/test/java/org/opensearch/index/mapper/DocCountFieldTypeTests.java
 create mode 100644 server/src/test/java/org/opensearch/search/aggregations/bucket/DocCountProviderTests.java

diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/380_doc_count_field.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/380_doc_count_field.yml
new file mode 100644
index 0000000000000..a7a796eceeb39
--- /dev/null
+++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/380_doc_count_field.yml
@@ -0,0 +1,146 @@
+setup:
+  - do:
+      indices.create:
+        index: test_1
+        body:
+          settings:
+            number_of_replicas: 0
+          mappings:
+            properties:
+              str:
+                type: keyword
+              number:
+                type: integer
+
+  - do:
+      bulk:
+        index: test_1
+        refresh: true
+        body:
+          - '{"index": {}}'
+          - '{"_doc_count": 10, "str": "abc", "number" : 500, "unmapped": "abc" }'
+          - '{"index": {}}'
+          - '{"_doc_count": 5, "str": "xyz", "number" : 100, "unmapped": "xyz" }'
+          - '{"index": {}}'
+          - '{"_doc_count": 7, "str": "foo", "number" : 100, "unmapped": "foo" }'
+          - '{"index": {}}'
+          - '{"_doc_count": 1, "str": "foo", "number" : 200, "unmapped": "foo" }'
+          - '{"index": {}}'
+          - '{"str": "abc", "number" : 500, "unmapped": "abc" }'
+
+---
+"Test numeric terms agg with doc_count":
+  - skip:
+      version: "1.0.0 - "
+      reason: "until this is released and we know exact version"
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body: { "size" : 0, "aggs" : { "num_terms" : { "terms" : { "field" : "number" } } } }
+
+  - match: { hits.total: 5 }
+  - length: { aggregations.num_terms.buckets: 3 }
+  - match: { aggregations.num_terms.buckets.0.key: 100 }
+  - match: { aggregations.num_terms.buckets.0.doc_count: 12 }
+  - match: { aggregations.num_terms.buckets.1.key: 500 }
+  - match: { aggregations.num_terms.buckets.1.doc_count: 11 }
+  - match: { aggregations.num_terms.buckets.2.key: 200 }
+  - match: { aggregations.num_terms.buckets.2.doc_count: 1 }
+
+---
+"Test keyword terms agg with doc_count":
+  - skip:
+      version: "1.0.0 - "
+      reason: "until this is released and we know exact version"
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body: { "size" : 0, "aggs" : { "str_terms" : { "terms" : { "field" : "str" } } } }
+
+  - match: { hits.total: 5 }
+  - length: { aggregations.str_terms.buckets: 3 }
+  - match: { aggregations.str_terms.buckets.0.key: "abc" }
+  - match: { aggregations.str_terms.buckets.0.doc_count: 11 }
+  - match: { aggregations.str_terms.buckets.1.key: "foo" }
+  - match: { aggregations.str_terms.buckets.1.doc_count: 8 }
+  - match: { aggregations.str_terms.buckets.2.key: "xyz" }
+  - match: { aggregations.str_terms.buckets.2.doc_count: 5 }
+
+---
+"Test unmapped string terms agg with doc_count":
+  - skip:
+      version: "1.0.0 - "
+      reason: "until this is released and we know exact version"
+  - do:
+      bulk:
+        index: test_2
+        refresh: true
+        body:
+          - '{"index": {}}'
+          - '{"_doc_count": 10, "str": "abc" }'
+          - '{"index": {}}'
+          - '{"str": "abc" }'
+  - do:
+      search:
+        index: test_2
+        rest_total_hits_as_int: true
+        body: { "size" : 0, "aggs" : { "str_terms" : { "terms" : { "field" : "str.keyword" } } } }
+
+  - match: { hits.total: 2 }
+  - length: { aggregations.str_terms.buckets: 1 }
+  - match: { aggregations.str_terms.buckets.0.key: "abc" }
+  - match: { aggregations.str_terms.buckets.0.doc_count: 11 }
+
+---
+"Test composite str_terms agg with doc_count":
+  - skip:
+      version: "1.0.0 - "
+      reason: "until this is released and we know exact version"
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body: { "size" : 0, "aggs" :
+          { "composite_agg" : { "composite" :
+                                  {
+                                    "sources": ["str_terms": { "terms": { "field": "str" } }]
+                                  }
+          }
+          }
+        }
+
+  - match: { hits.total: 5 }
+  - length: { aggregations.composite_agg.buckets: 3 }
+  - match: { aggregations.composite_agg.buckets.0.key.str_terms: "abc" }
+  - match: { aggregations.composite_agg.buckets.0.doc_count: 11 }
+  - match: { aggregations.composite_agg.buckets.1.key.str_terms: "foo" }
+  - match: { aggregations.composite_agg.buckets.1.doc_count: 8 }
+  - match: { aggregations.composite_agg.buckets.2.key.str_terms: "xyz" }
+  - match: { aggregations.composite_agg.buckets.2.doc_count: 5 }
+
+---
+"Test composite num_terms agg with doc_count":
+  - skip:
+      version: "1.0.0 - "
+      reason: "until this is released and we know exact version"
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body: { "size" : 0, "aggs" :
+          { "composite_agg" :
+              { "composite" :
+                  {
+                    "sources": ["num_terms" : { "terms" : { "field" : "number" } }]
+                  }
+              }
+          }
+        }
+
+  - match: { hits.total: 5 }
+  - length: { aggregations.composite_agg.buckets: 3 }
+  - match: { aggregations.composite_agg.buckets.0.key.num_terms: 100 }
+  - match: { aggregations.composite_agg.buckets.0.doc_count: 12 }
+  - match: { aggregations.composite_agg.buckets.1.key.num_terms: 200 }
+  - match: { aggregations.composite_agg.buckets.1.doc_count: 1 }
+  - match: { aggregations.composite_agg.buckets.2.key.num_terms: 500 }
+  - match: { aggregations.composite_agg.buckets.2.doc_count: 11 }
diff --git a/server/src/main/java/org/opensearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/DocCountFieldMapper.java
new file mode 100644
index 0000000000000..e3f6f5ef130e9
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/mapper/DocCountFieldMapper.java
@@ -0,0 +1,164 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Modifications Copyright OpenSearch Contributors. See
+ * GitHub history for details.
+ */
+package org.opensearch.index.mapper;
+
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.search.DocValuesFieldExistsQuery;
+import org.apache.lucene.search.Query;
+import org.opensearch.common.xcontent.XContentParser;
+import org.opensearch.common.xcontent.XContentParserUtils;
+import org.opensearch.index.query.QueryShardContext;
+import org.opensearch.index.query.QueryShardException;
+import org.opensearch.search.lookup.SearchLookup;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Mapper for the doc_count field.
+ *
+ * @opensearch.internal
+ */
+public class DocCountFieldMapper extends MetadataFieldMapper {
+
+    public static final String NAME = "_doc_count";
+    public static final String CONTENT_TYPE = "_doc_count";
+
+    public static final TypeParser PARSER = new ConfigurableTypeParser(
+        c -> new DocCountFieldMapper(),
+        c -> new DocCountFieldMapper.Builder()
+    );
+
+    static class Builder extends MetadataFieldMapper.Builder {
+
+        Builder() {
+            super(NAME);
+        }
+
+        @Override
+        protected List<Parameter<?>> getParameters() {
+            return Collections.emptyList();
+        }
+
+        @Override
+        public DocCountFieldMapper build(BuilderContext context) {
+            return new DocCountFieldMapper();
+        }
+    }
+
+    /**
+     * Field type for DocCount Field Mapper
+     *
+     * @opensearch.internal
+     */
+    public static final class DocCountFieldType extends MappedFieldType {
+
+        public static final DocCountFieldType INSTANCE = new DocCountFieldType();
+
+        private static final Long defaultValue = 1L;
+
+        public DocCountFieldType() {
+            super(NAME, false, false, true, TextSearchInfo.NONE, Collections.emptyMap());
+        }
+
+        @Override
+        public String typeName() {
+            return CONTENT_TYPE;
+        }
+
+        @Override
+        public String familyTypeName() {
+            return NumberFieldMapper.NumberType.LONG.typeName();
+        }
+
+        @Override
+        public Query existsQuery(QueryShardContext context) {
+            return new DocValuesFieldExistsQuery(NAME);
+        }
+
+        @Override
+        public Query termQuery(Object value, QueryShardContext context) {
+            throw new QueryShardException(context, "Field [" + name() + "] of type [" + typeName() + "] is not searchable");
+        }
+
+        @Override
+        public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchLookup, String format) {
+            if (format != null) {
+                throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support formats.");
+            }
+
+            return new SourceValueFetcher(name(), context, defaultValue) {
+                @Override
+                protected Object parseSourceValue(Object value) {
+                    if ("".equals(value)) {
+                        return defaultValue;
+                    } else {
+                        return NumberFieldMapper.NumberType.objectToLong(value, false);
+                    }
+                }
+            };
+        }
+    }
+
+    private DocCountFieldMapper() {
+        super(DocCountFieldType.INSTANCE);
+    }
+
+    @Override
+    protected void parseCreateField(ParseContext context) throws IOException {
+        XContentParser parser = context.parser();
+        XContentParserUtils.ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, parser.currentToken(), parser);
+
+        long value = parser.longValue(false);
+        if (value <= 0) {
+            throw new IllegalArgumentException("Field [" + fieldType().name() + "] must be a positive integer.");
+        }
+        final Field docCount = new NumericDocValuesField(NAME, value);
+        context.doc().add(docCount);
+    }
+
+    @Override
+    public void preParse(ParseContext context) {}
+
+    @Override
+    public DocCountFieldType fieldType() {
+        return (DocCountFieldType) super.fieldType();
+    }
+
+    @Override
+    protected String contentType() {
+        return CONTENT_TYPE;
+    }
+
+}
diff --git a/server/src/main/java/org/opensearch/indices/IndicesModule.java b/server/src/main/java/org/opensearch/indices/IndicesModule.java
index f188c47e7a9de..150f003e1a766 100644
--- a/server/src/main/java/org/opensearch/indices/IndicesModule.java
+++ b/server/src/main/java/org/opensearch/indices/IndicesModule.java
@@ -47,6 +47,7 @@
 import org.opensearch.index.mapper.CompletionFieldMapper;
 import org.opensearch.index.mapper.DataStreamFieldMapper;
 import org.opensearch.index.mapper.DateFieldMapper;
+import org.opensearch.index.mapper.DocCountFieldMapper;
 import org.opensearch.index.mapper.FieldAliasMapper;
 import org.opensearch.index.mapper.FieldNamesFieldMapper;
 import org.opensearch.index.mapper.GeoPointFieldMapper;
@@ -190,6 +191,7 @@ private static Map<String, MetadataFieldMapper.TypeParser> initBuiltInMetadataMa
         builtInMetadataMappers.put(NestedPathFieldMapper.NAME, NestedPathFieldMapper.PARSER);
         builtInMetadataMappers.put(VersionFieldMapper.NAME, VersionFieldMapper.PARSER);
         builtInMetadataMappers.put(SeqNoFieldMapper.NAME, SeqNoFieldMapper.PARSER);
+        builtInMetadataMappers.put(DocCountFieldMapper.NAME, DocCountFieldMapper.PARSER);
         // _field_names must be added last so that it has a chance to see all the other mappers
         builtInMetadataMappers.put(FieldNamesFieldMapper.NAME, FieldNamesFieldMapper.PARSER);
         return Collections.unmodifiableMap(builtInMetadataMappers);
diff --git a/server/src/main/java/org/opensearch/search/aggregations/AggregatorBase.java b/server/src/main/java/org/opensearch/search/aggregations/AggregatorBase.java
index db51f00056f72..1d315980512b4 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/AggregatorBase.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/AggregatorBase.java
@@ -200,7 +200,7 @@ public Map<String, Object> metadata() {
 
     @Override
     public final LeafBucketCollector getLeafCollector(LeafReaderContext ctx) throws IOException {
-        preGetSubLeafCollectors();
+        preGetSubLeafCollectors(ctx);
         final LeafBucketCollector sub = collectableSubAggregators.getLeafCollector(ctx);
         return getLeafCollector(ctx, sub);
     }
@@ -209,7 +209,7 @@ public final LeafBucketCollector getLeafCollector(LeafReaderContext ctx) throws
      * Can be overridden by aggregator implementations that like the perform an operation before the leaf collectors
      * of children aggregators are instantiated for the next segment.
      */
-    protected void preGetSubLeafCollectors() throws IOException {}
+    protected void preGetSubLeafCollectors(LeafReaderContext ctx) throws IOException {}
 
     /**
      * Can be overridden by aggregator implementation to be called back when the collection phase starts.
diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/BucketsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/BucketsAggregator.java
index 79512586d06de..67af0b13eed3b 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/BucketsAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/BucketsAggregator.java
@@ -31,9 +31,10 @@
 
 package org.opensearch.search.aggregations.bucket;
 
+import org.apache.lucene.index.LeafReaderContext;
 import org.opensearch.common.lease.Releasable;
 import org.opensearch.common.util.BigArrays;
-import org.opensearch.common.util.IntArray;
+import org.opensearch.common.util.LongArray;
 import org.opensearch.search.aggregations.AggregationExecutionException;
 import org.opensearch.search.aggregations.Aggregator;
 import org.opensearch.search.aggregations.AggregatorBase;
@@ -70,7 +71,8 @@ public abstract class BucketsAggregator extends AggregatorBase {
 
     private final BigArrays bigArrays;
     private final IntConsumer multiBucketConsumer;
-    private IntArray docCounts;
+    private LongArray docCounts;
+    protected final DocCountProvider docCountProvider;
 
     public BucketsAggregator(
         String name,
@@ -87,7 +89,8 @@ public BucketsAggregator(
         } else {
             multiBucketConsumer = (count) -> {};
         }
-        docCounts = bigArrays.newIntArray(1, true);
+        docCounts = bigArrays.newLongArray(1, true);
+        docCountProvider = new DocCountProvider();
     }
 
     /**
@@ -116,7 +119,8 @@ public final void collectBucket(LeafBucketCollector subCollector, int doc, long
      * Same as {@link #collectBucket(LeafBucketCollector, int, long)}, but doesn't check if the docCounts needs to be re-sized.
      */
     public final void collectExistingBucket(LeafBucketCollector subCollector, int doc, long bucketOrd) throws IOException {
-        if (docCounts.increment(bucketOrd, 1) == 1) {
+        long docCount = docCountProvider.getDocCount(doc);
+        if (docCounts.increment(bucketOrd, docCount) == docCount) {
             // We calculate the final number of buckets only during the reduce phase. But we still need to
             // trigger bucket consumer from time to time in order to give it a chance to check available memory and break
             // the execution if we are running out. To achieve that we are passing 0 as a bucket count.
@@ -147,11 +151,11 @@ public final void mergeBuckets(long[] mergeMap, long newNumBuckets) {
      * merge the actual ordinals and doc ID deltas.
      */
     public final void mergeBuckets(long newNumBuckets, LongUnaryOperator mergeMap) {
-        try (IntArray oldDocCounts = docCounts) {
-            docCounts = bigArrays.newIntArray(newNumBuckets, true);
+        try (LongArray oldDocCounts = docCounts) {
+            docCounts = bigArrays.newLongArray(newNumBuckets, true);
             docCounts.fill(0, newNumBuckets, 0);
             for (long i = 0; i < oldDocCounts.size(); i++) {
-                int docCount = oldDocCounts.get(i);
+                long docCount = oldDocCounts.get(i);
 
                 if (docCount == 0) continue;
 
@@ -164,14 +168,14 @@ public final void mergeBuckets(long newNumBuckets, LongUnaryOperator mergeMap) {
         }
     }
 
-    public IntArray getDocCounts() {
+    public LongArray getDocCounts() {
         return docCounts;
     }
 
     /**
      * Utility method to increment the doc counts of the given bucket (identified by the bucket ordinal)
      */
-    public final void incrementBucketDocCount(long bucketOrd, int inc) {
+    public final void incrementBucketDocCount(long bucketOrd, long inc) {
         docCounts = bigArrays.grow(docCounts, bucketOrd + 1);
         docCounts.increment(bucketOrd, inc);
     }
@@ -179,7 +183,7 @@ public final void incrementBucketDocCount(long bucketOrd, int inc) {
     /**
      * Utility method to return the number of documents that fell in the given bucket (identified by the bucket ordinal)
      */
-    public final int bucketDocCount(long bucketOrd) {
+    public final long bucketDocCount(long bucketOrd) {
         if (bucketOrd >= docCounts.size()) {
             // This may happen eg. if no document in the highest buckets is accepted by a sub aggregator.
             // For example, if there is a long terms agg on 3 terms 1,2,3 with a sub filter aggregator and if no document with 3 as a value
@@ -337,7 +341,7 @@ protected final <B> InternalAggregation[] buildAggregationsForFixedBucketCount(
      */
     @FunctionalInterface
     protected interface BucketBuilderForFixedCount<B> {
-        B build(int offsetInOwningOrd, int docCount, InternalAggregations subAggregationResults);
+        B build(int offsetInOwningOrd, long docCount, InternalAggregations subAggregationResults);
     }
 
     /**
@@ -432,7 +436,7 @@ protected final <B> InternalAggregation[] buildAggregationsForVariableBuckets(
      */
     @FunctionalInterface
     protected interface BucketBuilderForVariable<B> {
-        B build(long bucketValue, int docCount, InternalAggregations subAggregationResults);
+        B build(long bucketValue, long docCount, InternalAggregations subAggregationResults);
     }
 
     /**
@@ -466,7 +470,7 @@ public BucketComparator bucketComparator(String key, SortOrder order) {
             return super.bucketComparator(key, order);
         }
         if (key == null || "doc_count".equals(key)) {
-            return (lhs, rhs) -> order.reverseMul() * Integer.compare(bucketDocCount(lhs), bucketDocCount(rhs));
+            return (lhs, rhs) -> order.reverseMul() * Long.compare(bucketDocCount(lhs), bucketDocCount(rhs));
         }
         throw new IllegalArgumentException(
             "Ordering on a single-bucket aggregation can only be done on its doc_count. "
@@ -478,6 +482,13 @@ public BucketComparator bucketComparator(String key, SortOrder order) {
         );
     }
 
+    @Override
+    protected void preGetSubLeafCollectors(LeafReaderContext ctx) throws IOException {
+        super.preGetSubLeafCollectors(ctx);
+        // Set LeafReaderContext to the doc_count provider
+        docCountProvider.setLeafReaderContext(ctx);
+    }
+
     public static boolean descendsFromGlobalAggregator(Aggregator parent) {
         while (parent != null) {
             if (parent.getClass() == GlobalAggregator.class) {
diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/DocCountProvider.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/DocCountProvider.java
new file mode 100644
index 0000000000000..30fe16ec4eb0e
--- /dev/null
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/DocCountProvider.java
@@ -0,0 +1,62 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Modifications Copyright OpenSearch Contributors. See
+ * GitHub history for details.
+ */
+
+package org.opensearch.search.aggregations.bucket;
+
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.NumericDocValues;
+import org.opensearch.index.mapper.DocCountFieldMapper;
+
+import java.io.IOException;
+
+/**
+ * An implementation of a doc_count provider that reads the value
+ * of the _doc_count field in the document. If a document does not have a
+ * _doc_count field the implementation will return 1 as the default value.
+ */
+public class DocCountProvider {
+
+    private NumericDocValues docCountValues;
+
+    public long getDocCount(int doc) throws IOException {
+        if (docCountValues != null && docCountValues.advanceExact(doc)) {
+            return docCountValues.longValue();
+        } else {
+            return 1L;
+        }
+    }
+
+    public void setLeafReaderContext(LeafReaderContext ctx) throws IOException {
+        docCountValues = DocValues.getNumeric(ctx.reader(), DocCountFieldMapper.NAME);
+    }
+}
diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/adjacency/AdjacencyMatrixAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/adjacency/AdjacencyMatrixAggregator.java
index c33887878dbd6..c9efbf7d34348 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/adjacency/AdjacencyMatrixAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/adjacency/AdjacencyMatrixAggregator.java
@@ -228,7 +228,7 @@ public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws I
             List<InternalAdjacencyMatrix.InternalBucket> buckets = new ArrayList<>(filters.length);
             for (int i = 0; i < keys.length; i++) {
                 long bucketOrd = bucketOrd(owningBucketOrds[owningBucketOrdIdx], i);
-                int docCount = bucketDocCount(bucketOrd);
+                long docCount = bucketDocCount(bucketOrd);
                 // Empty buckets are not returned because this aggregation will commonly be used under a
                 // a date-histogram where we will look for transactions over time and can expect many
                 // empty buckets.
@@ -245,7 +245,7 @@ public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws I
             for (int i = 0; i < keys.length; i++) {
                 for (int j = i + 1; j < keys.length; j++) {
                     long bucketOrd = bucketOrd(owningBucketOrds[owningBucketOrdIdx], pos);
-                    int docCount = bucketDocCount(bucketOrd);
+                    long docCount = bucketDocCount(bucketOrd);
                     // Empty buckets are not returned due to potential for very sparse matrices
                     if (docCount > 0) {
                         String intersectKey = keys[i] + separator + keys[j];
diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java
index a907d17a731fe..8b487fc499602 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java
@@ -196,7 +196,7 @@ public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws I
             int slot = queue.pop();
             CompositeKey key = queue.toCompositeKey(slot);
             InternalAggregations aggs = subAggsForBuckets[slot];
-            int docCount = queue.getDocCount(slot);
+            long docCount = queue.getDocCount(slot);
             buckets[queue.size()] = new InternalComposite.InternalBucket(
                 sourceNames,
                 formats,
@@ -504,7 +504,8 @@ private LeafBucketCollector getFirstPassCollector(RoaringDocIdSet.Builder builde
             @Override
             public void collect(int doc, long bucket) throws IOException {
                 try {
-                    if (queue.addIfCompetitive(indexSortPrefix)) {
+                    long docCount = docCountProvider.getDocCount(doc);
+                    if (queue.addIfCompetitive(indexSortPrefix, docCount)) {
                         if (builder != null && lastDoc != doc) {
                             builder.add(doc);
                             lastDoc = doc;
diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueue.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueue.java
index ac50869b5d79b..6ee1682a7b196 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueue.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueue.java
@@ -38,7 +38,7 @@
 import org.opensearch.common.lease.Releasable;
 import org.opensearch.common.lease.Releasables;
 import org.opensearch.common.util.BigArrays;
-import org.opensearch.common.util.IntArray;
+import org.opensearch.common.util.LongArray;
 import org.opensearch.search.aggregations.LeafBucketCollector;
 
 import java.io.IOException;
@@ -80,7 +80,7 @@ public int hashCode() {
     private final Map<Slot, Integer> map;
     private final SingleDimensionValuesSource<?>[] arrays;
 
-    private IntArray docCounts;
+    private LongArray docCounts;
     private boolean afterKeyIsSet = false;
 
     /**
@@ -103,7 +103,7 @@ public int hashCode() {
                 sources[i].setAfter(afterKey.get(i));
             }
         }
-        this.docCounts = bigArrays.newIntArray(1, false);
+        this.docCounts = bigArrays.newLongArray(1, false);
     }
 
     @Override
@@ -143,19 +143,19 @@ Comparable getUpperValueLeadSource() throws IOException {
     /**
      * Returns the document count in <code>slot</code>.
      */
-    int getDocCount(int slot) {
+    long getDocCount(int slot) {
         return docCounts.get(slot);
     }
 
     /**
      * Copies the current value in <code>slot</code>.
      */
-    private void copyCurrent(int slot) {
+    private void copyCurrent(int slot, long value) {
         for (int i = 0; i < arrays.length; i++) {
             arrays[i].copyCurrent(slot);
         }
         docCounts = bigArrays.grow(docCounts, slot + 1);
-        docCounts.set(slot, 1);
+        docCounts.set(slot, value);
     }
 
     /**
@@ -265,8 +265,8 @@ LeafBucketCollector getLeafCollector(Comparable forceLeadSourceValue, LeafReader
      * Check if the current candidate should be added in the queue.
      * @return <code>true</code> if the candidate is competitive (added or already in the queue).
      */
-    boolean addIfCompetitive() {
-        return addIfCompetitive(0);
+    boolean addIfCompetitive(long inc) {
+        return addIfCompetitive(0, inc);
     }
 
     /**
@@ -279,12 +279,12 @@ boolean addIfCompetitive() {
      *
      * @throws CollectionTerminatedException if the current collection can be terminated early due to index sorting.
      */
-    boolean addIfCompetitive(int indexSortSourcePrefix) {
+    boolean addIfCompetitive(int indexSortSourcePrefix, long inc) {
         // checks if the candidate key is competitive
         Integer topSlot = compareCurrent();
         if (topSlot != null) {
             // this key is already in the top N, skip it
-            docCounts.increment(topSlot, 1);
+            docCounts.increment(topSlot, inc);
             return true;
         }
         if (afterKeyIsSet) {
@@ -325,7 +325,7 @@ boolean addIfCompetitive(int indexSortSourcePrefix) {
             newSlot = size();
         }
         // move the candidate key to its new slot
-        copyCurrent(newSlot);
+        copyCurrent(newSlot, inc);
         map.put(new Slot(newSlot), newSlot);
         add(newSlot);
         return true;
diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/SortedDocsProducer.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/SortedDocsProducer.java
index 3cdbee6b119ec..bd0a4f13ddf08 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/SortedDocsProducer.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/SortedDocsProducer.java
@@ -40,6 +40,7 @@
 import org.apache.lucene.util.DocIdSetBuilder;
 import org.opensearch.common.Nullable;
 import org.opensearch.search.aggregations.LeafBucketCollector;
+import org.opensearch.search.aggregations.bucket.DocCountProvider;
 
 import java.io.IOException;
 
@@ -74,6 +75,8 @@ protected boolean processBucket(
     ) throws IOException {
         final int[] topCompositeCollected = new int[1];
         final boolean[] hasCollected = new boolean[1];
+        final DocCountProvider docCountProvider = new DocCountProvider();
+        docCountProvider.setLeafReaderContext(context);
         final LeafBucketCollector queueCollector = new LeafBucketCollector() {
             int lastDoc = -1;
 
@@ -86,7 +89,8 @@ protected boolean processBucket(
             @Override
             public void collect(int doc, long bucket) throws IOException {
                 hasCollected[0] = true;
-                if (queue.addIfCompetitive()) {
+                long docCount = docCountProvider.getDocCount(doc);
+                if (queue.addIfCompetitive(docCount)) {
                     topCompositeCollected[0]++;
                     if (adder != null && doc != lastDoc) {
                         if (remainingBits == 0) {
diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/nested/NestedAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/nested/NestedAggregator.java
index 5dc4d1c780350..02507f8569ffa 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/nested/NestedAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/nested/NestedAggregator.java
@@ -130,7 +130,8 @@ public void collect(int parentDoc, long bucket) throws IOException {
     }
 
     @Override
-    protected void preGetSubLeafCollectors() throws IOException {
+    protected void preGetSubLeafCollectors(LeafReaderContext ctx) throws IOException {
+        super.preGetSubLeafCollectors(ctx);
         processBufferedDocs();
     }
 
diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
index 4fcc42b6211b2..2475ade68bc07 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
@@ -43,7 +43,6 @@
 import org.opensearch.common.io.stream.StreamOutput;
 import org.opensearch.common.lease.Releasable;
 import org.opensearch.common.lease.Releasables;
-import org.opensearch.common.util.IntArray;
 import org.opensearch.common.util.LongArray;
 import org.opensearch.common.util.LongHash;
 import org.opensearch.common.xcontent.XContentBuilder;
@@ -297,7 +296,7 @@ protected void doClose() {
     static class LowCardinality extends GlobalOrdinalsStringTermsAggregator {
 
         private LongUnaryOperator mapping;
-        private IntArray segmentDocCounts;
+        private LongArray segmentDocCounts;
 
         LowCardinality(
             String name,
@@ -332,7 +331,7 @@ static class LowCardinality extends GlobalOrdinalsStringTermsAggregator {
                 metadata
             );
             assert factories == null || factories.countAggregators() == 0;
-            this.segmentDocCounts = context.bigArrays().newIntArray(1, true);
+            this.segmentDocCounts = context.bigArrays().newLongArray(1, true);
         }
 
         @Override
@@ -356,7 +355,8 @@ public void collect(int doc, long owningBucketOrd) throws IOException {
                             return;
                         }
                         int ord = singleValues.ordValue();
-                        segmentDocCounts.increment(ord + 1, 1);
+                        long docCount = docCountProvider.getDocCount(doc);
+                        segmentDocCounts.increment(ord + 1, docCount);
                     }
                 });
             }
@@ -369,7 +369,8 @@ public void collect(int doc, long owningBucketOrd) throws IOException {
                         return;
                     }
                     for (long segmentOrd = segmentOrds.nextOrd(); segmentOrd != NO_MORE_ORDS; segmentOrd = segmentOrds.nextOrd()) {
-                        segmentDocCounts.increment(segmentOrd + 1, 1);
+                        long docCount = docCountProvider.getDocCount(doc);
+                        segmentDocCounts.increment(segmentOrd + 1, docCount);
                     }
                 }
             });
@@ -392,7 +393,7 @@ private void mapSegmentCountsToGlobalCounts(LongUnaryOperator mapping) throws IO
             for (long i = 1; i < segmentDocCounts.size(); i++) {
                 // We use set(...) here, because we need to reset the slow to 0.
                 // segmentDocCounts get reused over the segments and otherwise counts would be too high.
-                int inc = segmentDocCounts.set(i, 0);
+                long inc = segmentDocCounts.set(i, 0);
                 if (inc == 0) {
                     continue;
                 }
diff --git a/server/src/test/java/org/opensearch/index/mapper/DocCountFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/DocCountFieldMapperTests.java
new file mode 100644
index 0000000000000..634276a34b9af
--- /dev/null
+++ b/server/src/test/java/org/opensearch/index/mapper/DocCountFieldMapperTests.java
@@ -0,0 +1,83 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Modifications Copyright OpenSearch Contributors. See
+ * GitHub history for details.
+ */
+package org.opensearch.index.mapper;
+
+import org.apache.lucene.index.DocValuesType;
+import org.apache.lucene.index.IndexableField;
+
+import static org.hamcrest.Matchers.containsString;
+
+public class DocCountFieldMapperTests extends MapperServiceTestCase {
+
+    private static final String CONTENT_TYPE = DocCountFieldMapper.CONTENT_TYPE;
+    private static final String DOC_COUNT_FIELD = DocCountFieldMapper.NAME;
+
+    /**
+     * Test parsing field mapping and adding simple field
+     */
+    public void testParseValue() throws Exception {
+        DocumentMapper mapper = createDocumentMapper(mapping(b -> {}));
+        ParsedDocument doc = mapper.parse(source(b -> b.field("foo", 500).field(CONTENT_TYPE, 100)));
+
+        IndexableField field = doc.rootDoc().getField(DOC_COUNT_FIELD);
+        assertEquals(100L, field.numericValue());
+        assertEquals(DocValuesType.NUMERIC, field.fieldType().docValuesType());
+        assertEquals(1, doc.rootDoc().getFields(DOC_COUNT_FIELD).length);
+    }
+
+    public void testInvalidDocument_NegativeDocCount() throws Exception {
+        DocumentMapper mapper = createDocumentMapper(mapping(b -> {}));
+        Exception e = expectThrows(MapperParsingException.class, () -> mapper.parse(source(b -> b.field(CONTENT_TYPE, -100))));
+        assertThat(e.getCause().getMessage(), containsString("Field [_doc_count] must be a positive integer"));
+    }
+
+    public void testInvalidDocument_ZeroDocCount() throws Exception {
+        DocumentMapper mapper = createDocumentMapper(mapping(b -> {}));
+        Exception e = expectThrows(MapperParsingException.class, () -> mapper.parse(source(b -> b.field(CONTENT_TYPE, 0))));
+        assertThat(e.getCause().getMessage(), containsString("Field [_doc_count] must be a positive integer"));
+    }
+
+    public void testInvalidDocument_NonNumericDocCount() throws Exception {
+        DocumentMapper mapper = createDocumentMapper(mapping(b -> {}));
+        Exception e = expectThrows(MapperParsingException.class, () -> mapper.parse(source(b -> b.field(CONTENT_TYPE, "foo"))));
+        assertThat(
+            e.getCause().getMessage(),
+            containsString("Failed to parse object: expecting token of type [VALUE_NUMBER] but found [VALUE_STRING]")
+        );
+    }
+
+    public void testInvalidDocument_FractionalDocCount() throws Exception {
+        DocumentMapper mapper = createDocumentMapper(mapping(b -> {}));
+        Exception e = expectThrows(MapperParsingException.class, () -> mapper.parse(source(b -> b.field(CONTENT_TYPE, 100.23))));
+        assertThat(e.getCause().getMessage(), containsString("100.23 cannot be converted to Long without data loss"));
+    }
+}
diff --git a/server/src/test/java/org/opensearch/index/mapper/DocCountFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/DocCountFieldTypeTests.java
new file mode 100644
index 0000000000000..4961736ea933c
--- /dev/null
+++ b/server/src/test/java/org/opensearch/index/mapper/DocCountFieldTypeTests.java
@@ -0,0 +1,69 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Modifications Copyright OpenSearch Contributors. See
+ * GitHub history for details.
+ */
+package org.opensearch.index.mapper;
+
+import org.apache.lucene.search.DocValuesFieldExistsQuery;
+import org.opensearch.index.query.QueryShardException;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+public class DocCountFieldTypeTests extends FieldTypeTestCase {
+
+    public void testTermQuery() {
+        MappedFieldType ft = new DocCountFieldMapper.DocCountFieldType();
+        QueryShardException e = expectThrows(QueryShardException.class, () -> ft.termQuery(10L, randomMockShardContext()));
+        assertEquals("Field [_doc_count] of type [_doc_count] is not searchable", e.getMessage());
+    }
+
+    public void testRangeQuery() {
+        MappedFieldType ft = new DocCountFieldMapper.DocCountFieldType();
+        IllegalArgumentException e = expectThrows(
+            IllegalArgumentException.class,
+            () -> ft.rangeQuery(null, null, randomBoolean(), randomBoolean(), null, null, null, null)
+        );
+        assertEquals("Field [_doc_count] of type [_doc_count] does not support range queries", e.getMessage());
+    }
+
+    public void testExistsQuery() {
+        MappedFieldType ft = new DocCountFieldMapper.DocCountFieldType();
+        assertTrue(ft.existsQuery(randomMockShardContext()) instanceof DocValuesFieldExistsQuery);
+    }
+
+    public void testFetchSourceValue() throws IOException {
+        MappedFieldType fieldType = new DocCountFieldMapper.DocCountFieldType();
+        assertEquals(Arrays.asList(14L), fetchSourceValue(fieldType, 14));
+        assertEquals(Arrays.asList(14L), fetchSourceValue(fieldType, "14"));
+        assertEquals(Arrays.asList(1L), fetchSourceValue(fieldType, ""));
+        assertEquals(Arrays.asList(1L), fetchSourceValue(fieldType, null));
+    }
+}
diff --git a/server/src/test/java/org/opensearch/indices/IndicesModuleTests.java b/server/src/test/java/org/opensearch/indices/IndicesModuleTests.java
index afcc6aa006500..ec7ab06ac86a6 100644
--- a/server/src/test/java/org/opensearch/indices/IndicesModuleTests.java
+++ b/server/src/test/java/org/opensearch/indices/IndicesModuleTests.java
@@ -33,6 +33,7 @@
 package org.opensearch.indices;
 
 import org.opensearch.Version;
+import org.opensearch.index.mapper.DocCountFieldMapper;
 import org.opensearch.index.mapper.DataStreamFieldMapper;
 import org.opensearch.index.mapper.FieldNamesFieldMapper;
 import org.opensearch.index.mapper.IdFieldMapper;
@@ -98,6 +99,7 @@ public Map<String, MetadataFieldMapper.TypeParser> getMetadataMappers() {
         NestedPathFieldMapper.NAME,
         VersionFieldMapper.NAME,
         SeqNoFieldMapper.NAME,
+        DocCountFieldMapper.NAME,
         FieldNamesFieldMapper.NAME };
 
     public void testBuiltinMappers() {
diff --git a/server/src/test/java/org/opensearch/search/aggregations/bucket/DocCountProviderTests.java b/server/src/test/java/org/opensearch/search/aggregations/bucket/DocCountProviderTests.java
new file mode 100644
index 0000000000000..708d683fe2484
--- /dev/null
+++ b/server/src/test/java/org/opensearch/search/aggregations/bucket/DocCountProviderTests.java
@@ -0,0 +1,107 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Modifications Copyright OpenSearch Contributors. See
+ * GitHub history for details.
+ */
+
+package org.opensearch.search.aggregations.bucket;
+
+import org.apache.lucene.document.IntPoint;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.SortedNumericDocValuesField;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.tests.index.RandomIndexWriter;
+import org.opensearch.common.CheckedConsumer;
+import org.opensearch.index.mapper.DocCountFieldMapper;
+import org.opensearch.index.mapper.MappedFieldType;
+import org.opensearch.index.mapper.NumberFieldMapper;
+import org.opensearch.search.aggregations.AggregatorTestCase;
+import org.opensearch.search.aggregations.bucket.global.GlobalAggregationBuilder;
+import org.opensearch.search.aggregations.bucket.global.InternalGlobal;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.function.Consumer;
+
+import static java.util.Collections.singleton;
+
+public class DocCountProviderTests extends AggregatorTestCase {
+
+    private static final String DOC_COUNT_FIELD = DocCountFieldMapper.NAME;
+    private static final String NUMBER_FIELD = "number";
+
+    public void testDocsWithDocCount() throws IOException {
+        testAggregation(new MatchAllDocsQuery(), iw -> {
+            iw.addDocument(List.of(new NumericDocValuesField(DOC_COUNT_FIELD, 4), new SortedNumericDocValuesField(NUMBER_FIELD, 1)));
+            iw.addDocument(List.of(new NumericDocValuesField(DOC_COUNT_FIELD, 5), new SortedNumericDocValuesField(NUMBER_FIELD, 7)));
+            iw.addDocument(
+                List.of(
+                    // Intentionally omit doc_count field
+                    new SortedNumericDocValuesField(NUMBER_FIELD, 1)
+                )
+            );
+        }, global -> { assertEquals(10, global.getDocCount()); });
+    }
+
+    public void testDocsWithoutDocCount() throws IOException {
+        testAggregation(new MatchAllDocsQuery(), iw -> {
+            iw.addDocument(singleton(new SortedNumericDocValuesField(NUMBER_FIELD, 1)));
+            iw.addDocument(singleton(new SortedNumericDocValuesField(NUMBER_FIELD, 7)));
+            iw.addDocument(singleton(new SortedNumericDocValuesField(NUMBER_FIELD, 1)));
+        }, global -> { assertEquals(3, global.getDocCount()); });
+    }
+
+    public void testQueryFiltering() throws IOException {
+        testAggregation(IntPoint.newRangeQuery(NUMBER_FIELD, 4, 5), iw -> {
+            iw.addDocument(List.of(new NumericDocValuesField(DOC_COUNT_FIELD, 4), new IntPoint(NUMBER_FIELD, 6)));
+            iw.addDocument(List.of(new NumericDocValuesField(DOC_COUNT_FIELD, 2), new IntPoint(NUMBER_FIELD, 5)));
+            iw.addDocument(
+                List.of(
+                    // Intentionally omit doc_count field
+                    new IntPoint(NUMBER_FIELD, 1)
+                )
+            );
+            iw.addDocument(
+                List.of(
+                    // Intentionally omit doc_count field
+                    new IntPoint(NUMBER_FIELD, 5)
+                )
+            );
+        }, global -> { assertEquals(3, global.getDocCount()); });
+    }
+
+    private void testAggregation(Query query, CheckedConsumer<RandomIndexWriter, IOException> indexer, Consumer<InternalGlobal> verify)
+        throws IOException {
+        GlobalAggregationBuilder aggregationBuilder = new GlobalAggregationBuilder("_name");
+        MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(NUMBER_FIELD, NumberFieldMapper.NumberType.LONG);
+        MappedFieldType docCountFieldType = new DocCountFieldMapper.DocCountFieldType();
+        testCase(aggregationBuilder, query, indexer, verify, fieldType, docCountFieldType);
+    }
+}
diff --git a/server/src/test/java/org/opensearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueueTests.java b/server/src/test/java/org/opensearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueueTests.java
index 3dccdf8dab95e..4571ae7496ae1 100644
--- a/server/src/test/java/org/opensearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueueTests.java
+++ b/server/src/test/java/org/opensearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueueTests.java
@@ -347,7 +347,7 @@ private void testRandomCase(boolean forceMerge, boolean missingBucket, int index
                             final LeafBucketCollector leafCollector = new LeafBucketCollector() {
                                 @Override
                                 public void collect(int doc, long bucket) throws IOException {
-                                    queue.addIfCompetitive(indexSortSourcePrefix);
+                                    queue.addIfCompetitive(indexSortSourcePrefix, 1);
                                 }
                             };
                             final LeafBucketCollector queueCollector = queue.getLeafCollector(leafReaderContext, leafCollector);