From 05562e66de6a383f6a2fe5071496253679a04455 Mon Sep 17 00:00:00 2001
From: Johannes Kron <kron@webrtc.org>
Date: Thu, 12 Aug 2021 17:17:39 +0200
Subject: [PATCH] Merge M93: Add max pre-decode queue size threshold for pacing

When pacing is enabled for the low latency rendering path,
frames are sent to the decoder in regular intervals. In case of a
jitter, these frames intervals could add up to create a large latency.
Hence, disable frame pacing if the pre-decode queue grows beyond the
threshold. The threshold for when to disable frame pacing is set
through a field trial. The default value is high enough so that
the behavior is not changed unless the field trial is specified.

(cherry picked from commit 2ddc39e2b9ab65357d6a09695982755ae9753882)

Bug: chromium:1237402
Change-Id: I901fd579f68da286eca3d654118f60d3c55e21ce
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/228241
Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org>
Commit-Queue: Johannes Kron <kron@webrtc.org>
Cr-Original-Commit-Position: refs/heads/master@{#34705}
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/228540
Cr-Commit-Position: refs/branch-heads/4577@{#4}
Cr-Branched-From: 51969310ef432a9c340bb091eea1f1553ca01587-refs/heads/master@{#34463}
---
 modules/video_coding/frame_buffer2.cc         | 12 ++-
 modules/video_coding/frame_buffer2.h          |  8 ++
 .../video_coding/frame_buffer2_unittest.cc    |  3 +-
 modules/video_coding/receiver.cc              |  3 +-
 modules/video_coding/timing.cc                | 11 ++-
 modules/video_coding/timing.h                 | 11 ++-
 modules/video_coding/timing_unittest.cc       | 99 +++++++++++++++----
 7 files changed, 119 insertions(+), 28 deletions(-)

diff --git a/modules/video_coding/frame_buffer2.cc b/modules/video_coding/frame_buffer2.cc
index ba77b72d4f..80f9eb1814 100644
--- a/modules/video_coding/frame_buffer2.cc
+++ b/modules/video_coding/frame_buffer2.cc
@@ -63,7 +63,11 @@ FrameBuffer::FrameBuffer(Clock* clock,
       last_log_non_decoded_ms_(-kLogNonDecodedIntervalMs),
       add_rtt_to_playout_delay_(
           webrtc::field_trial::IsEnabled("WebRTC-AddRttToPlayoutDelay")),
-      rtt_mult_settings_(RttMultExperiment::GetRttMultValue()) {
+      rtt_mult_settings_(RttMultExperiment::GetRttMultValue()),
+      zero_playout_delay_max_decode_queue_size_("max_decode_queue_size",
+                                                kMaxFramesBuffered) {
+  ParseFieldTrial({&zero_playout_delay_max_decode_queue_size_},
+                  field_trial::FindFullName("WebRTC-ZeroPlayoutDelay"));
   callback_checker_.Detach();
 }
 
@@ -212,7 +216,11 @@ int64_t FrameBuffer::FindNextFrame(int64_t now_ms) {
     if (frame->RenderTime() == -1) {
       frame->SetRenderTime(timing_->RenderTimeMs(frame->Timestamp(), now_ms));
     }
-    wait_ms = timing_->MaxWaitingTime(frame->RenderTime(), now_ms);
+    bool too_many_frames_queued =
+        frames_.size() > zero_playout_delay_max_decode_queue_size_ ? true
+                                                                   : false;
+    wait_ms = timing_->MaxWaitingTime(frame->RenderTime(), now_ms,
+                                      too_many_frames_queued);
 
     // This will cause the frame buffer to prefer high framerate rather
     // than high resolution in the case of the decoder not decoding fast
diff --git a/modules/video_coding/frame_buffer2.h b/modules/video_coding/frame_buffer2.h
index 721668a123..c7d8fcd403 100644
--- a/modules/video_coding/frame_buffer2.h
+++ b/modules/video_coding/frame_buffer2.h
@@ -25,6 +25,7 @@
 #include "modules/video_coding/jitter_estimator.h"
 #include "modules/video_coding/utility/decoded_frames_history.h"
 #include "rtc_base/event.h"
+#include "rtc_base/experiments/field_trial_parser.h"
 #include "rtc_base/experiments/rtt_mult_experiment.h"
 #include "rtc_base/numerics/sequence_number_util.h"
 #include "rtc_base/synchronization/mutex.h"
@@ -188,6 +189,13 @@ class FrameBuffer {
 
   // rtt_mult experiment settings.
   const absl::optional<RttMultExperiment::Settings> rtt_mult_settings_;
+
+  // Maximum number of frames in the decode queue to allow pacing. If the
+  // queue grows beyond the max limit, pacing will be disabled and frames will
+  // be pushed to the decoder as soon as possible. This only has an effect
+  // when the low-latency rendering path is active, which is indicated by
+  // the frame's render time == 0.
+  FieldTrialParameter<unsigned> zero_playout_delay_max_decode_queue_size_;
 };
 
 }  // namespace video_coding
diff --git a/modules/video_coding/frame_buffer2_unittest.cc b/modules/video_coding/frame_buffer2_unittest.cc
index 68acf813ae..f2a0589411 100644
--- a/modules/video_coding/frame_buffer2_unittest.cc
+++ b/modules/video_coding/frame_buffer2_unittest.cc
@@ -56,7 +56,8 @@ class VCMTimingFake : public VCMTiming {
   }
 
   int64_t MaxWaitingTime(int64_t render_time_ms,
-                         int64_t now_ms) const override {
+                         int64_t now_ms,
+                         bool too_many_frames_queued) const override {
     return render_time_ms - now_ms - kDecodeTime;
   }
 
diff --git a/modules/video_coding/receiver.cc b/modules/video_coding/receiver.cc
index 6b942fbe57..8e8f0e1ee2 100644
--- a/modules/video_coding/receiver.cc
+++ b/modules/video_coding/receiver.cc
@@ -141,7 +141,8 @@ VCMEncodedFrame* VCMReceiver::FrameForDecoding(uint16_t max_wait_time_ms,
     uint16_t new_max_wait_time =
         static_cast<uint16_t>(VCM_MAX(available_wait_time, 0));
     uint32_t wait_time_ms = rtc::saturated_cast<uint32_t>(
-        timing_->MaxWaitingTime(render_time_ms, clock_->TimeInMilliseconds()));
+        timing_->MaxWaitingTime(render_time_ms, clock_->TimeInMilliseconds(),
+                                /*too_many_frames_queued=*/false));
     if (new_max_wait_time < wait_time_ms) {
       // We're not allowed to wait until the frame is supposed to be rendered,
       // waiting as long as we're allowed to avoid busy looping, and then return
diff --git a/modules/video_coding/timing.cc b/modules/video_coding/timing.cc
index 91f51dc12b..d540f73693 100644
--- a/modules/video_coding/timing.cc
+++ b/modules/video_coding/timing.cc
@@ -210,14 +210,19 @@ int VCMTiming::RequiredDecodeTimeMs() const {
 }
 
 int64_t VCMTiming::MaxWaitingTime(int64_t render_time_ms,
-                                  int64_t now_ms) const {
+                                  int64_t now_ms,
+                                  bool too_many_frames_queued) const {
   MutexLock lock(&mutex_);
 
   if (render_time_ms == 0 && zero_playout_delay_min_pacing_->us() > 0) {
     // |render_time_ms| == 0 indicates that the frame should be decoded and
     // rendered as soon as possible. However, the decoder can be choked if too
-    // many frames are sent at ones. Therefore, limit the interframe delay to
-    // |zero_playout_delay_min_pacing_|.
+    // many frames are sent at once. Therefore, limit the interframe delay to
+    // |zero_playout_delay_min_pacing_| unless too many frames are queued in
+    // which case the frames are sent to the decoder at once.
+    if (too_many_frames_queued) {
+      return 0;
+    }
     int64_t earliest_next_decode_start_time =
         last_decode_scheduled_ts_ + zero_playout_delay_min_pacing_->ms();
     int64_t max_wait_time_ms = now_ms >= earliest_next_decode_start_time
diff --git a/modules/video_coding/timing.h b/modules/video_coding/timing.h
index 2f3fdfe057..7f891e4b9b 100644
--- a/modules/video_coding/timing.h
+++ b/modules/video_coding/timing.h
@@ -82,8 +82,15 @@ class VCMTiming {
   virtual int64_t RenderTimeMs(uint32_t frame_timestamp, int64_t now_ms) const;
 
   // Returns the maximum time in ms that we can wait for a frame to become
-  // complete before we must pass it to the decoder.
-  virtual int64_t MaxWaitingTime(int64_t render_time_ms, int64_t now_ms) const;
+  // complete before we must pass it to the decoder. render_time_ms==0 indicates
+  // that the frames should be processed as quickly as possible, with possibly
+  // only a small delay added to make sure that the decoder is not overloaded.
+  // In this case, the parameter too_many_frames_queued is used to signal that
+  // the decode queue is full and that the frame should be decoded as soon as
+  // possible.
+  virtual int64_t MaxWaitingTime(int64_t render_time_ms,
+                                 int64_t now_ms,
+                                 bool too_many_frames_queued) const;
 
   // Returns the current target delay which is required delay + decode time +
   // render delay.
diff --git a/modules/video_coding/timing_unittest.cc b/modules/video_coding/timing_unittest.cc
index 988e55faab..cc87a3b4e0 100644
--- a/modules/video_coding/timing_unittest.cc
+++ b/modules/video_coding/timing_unittest.cc
@@ -36,7 +36,7 @@ TEST(ReceiverTimingTest, JitterDelay) {
   timing.set_render_delay(0);
   uint32_t wait_time_ms = timing.MaxWaitingTime(
       timing.RenderTimeMs(timestamp, clock.TimeInMilliseconds()),
-      clock.TimeInMilliseconds());
+      clock.TimeInMilliseconds(), /*too_many_frames_queued=*/false);
   // First update initializes the render time. Since we have no decode delay
   // we get wait_time_ms = renderTime - now - renderDelay = jitter.
   EXPECT_EQ(jitter_delay_ms, wait_time_ms);
@@ -48,7 +48,7 @@ TEST(ReceiverTimingTest, JitterDelay) {
   timing.UpdateCurrentDelay(timestamp);
   wait_time_ms = timing.MaxWaitingTime(
       timing.RenderTimeMs(timestamp, clock.TimeInMilliseconds()),
-      clock.TimeInMilliseconds());
+      clock.TimeInMilliseconds(), /*too_many_frames_queued=*/false);
   // Since we gradually increase the delay we only get 100 ms every second.
   EXPECT_EQ(jitter_delay_ms - 10, wait_time_ms);
 
@@ -57,7 +57,7 @@ TEST(ReceiverTimingTest, JitterDelay) {
   timing.UpdateCurrentDelay(timestamp);
   wait_time_ms = timing.MaxWaitingTime(
       timing.RenderTimeMs(timestamp, clock.TimeInMilliseconds()),
-      clock.TimeInMilliseconds());
+      clock.TimeInMilliseconds(), /*too_many_frames_queued=*/false);
   EXPECT_EQ(jitter_delay_ms, wait_time_ms);
 
   // Insert frames without jitter, verify that this gives the exact wait time.
@@ -70,7 +70,7 @@ TEST(ReceiverTimingTest, JitterDelay) {
   timing.UpdateCurrentDelay(timestamp);
   wait_time_ms = timing.MaxWaitingTime(
       timing.RenderTimeMs(timestamp, clock.TimeInMilliseconds()),
-      clock.TimeInMilliseconds());
+      clock.TimeInMilliseconds(), /*too_many_frames_queued=*/false);
   EXPECT_EQ(jitter_delay_ms, wait_time_ms);
 
   // Add decode time estimates for 1 second.
@@ -85,7 +85,7 @@ TEST(ReceiverTimingTest, JitterDelay) {
   timing.UpdateCurrentDelay(timestamp);
   wait_time_ms = timing.MaxWaitingTime(
       timing.RenderTimeMs(timestamp, clock.TimeInMilliseconds()),
-      clock.TimeInMilliseconds());
+      clock.TimeInMilliseconds(), /*too_many_frames_queued=*/false);
   EXPECT_EQ(jitter_delay_ms, wait_time_ms);
 
   const int kMinTotalDelayMs = 200;
@@ -97,7 +97,7 @@ TEST(ReceiverTimingTest, JitterDelay) {
   timing.set_render_delay(kRenderDelayMs);
   wait_time_ms = timing.MaxWaitingTime(
       timing.RenderTimeMs(timestamp, clock.TimeInMilliseconds()),
-      clock.TimeInMilliseconds());
+      clock.TimeInMilliseconds(), /*too_many_frames_queued=*/false);
   // We should at least have kMinTotalDelayMs - decodeTime (10) - renderTime
   // (10) to wait.
   EXPECT_EQ(kMinTotalDelayMs - kDecodeTimeMs - kRenderDelayMs, wait_time_ms);
@@ -140,16 +140,26 @@ TEST(ReceiverTimingTest, MaxWaitingTimeIsZeroForZeroRenderTime) {
   for (int i = 0; i < 10; ++i) {
     clock.AdvanceTimeMilliseconds(kTimeDeltaMs);
     int64_t now_ms = clock.TimeInMilliseconds();
-    EXPECT_LT(timing.MaxWaitingTime(kZeroRenderTimeMs, now_ms), 0);
+    EXPECT_LT(timing.MaxWaitingTime(kZeroRenderTimeMs, now_ms,
+                                    /*too_many_frames_queued=*/false),
+              0);
   }
   // Another frame submitted at the same time also returns a negative max
   // waiting time.
   int64_t now_ms = clock.TimeInMilliseconds();
-  EXPECT_LT(timing.MaxWaitingTime(kZeroRenderTimeMs, now_ms), 0);
+  EXPECT_LT(timing.MaxWaitingTime(kZeroRenderTimeMs, now_ms,
+                                  /*too_many_frames_queued=*/false),
+            0);
   // MaxWaitingTime should be less than zero even if there's a burst of frames.
-  EXPECT_LT(timing.MaxWaitingTime(kZeroRenderTimeMs, now_ms), 0);
-  EXPECT_LT(timing.MaxWaitingTime(kZeroRenderTimeMs, now_ms), 0);
-  EXPECT_LT(timing.MaxWaitingTime(kZeroRenderTimeMs, now_ms), 0);
+  EXPECT_LT(timing.MaxWaitingTime(kZeroRenderTimeMs, now_ms,
+                                  /*too_many_frames_queued=*/false),
+            0);
+  EXPECT_LT(timing.MaxWaitingTime(kZeroRenderTimeMs, now_ms,
+                                  /*too_many_frames_queued=*/false),
+            0);
+  EXPECT_LT(timing.MaxWaitingTime(kZeroRenderTimeMs, now_ms,
+                                  /*too_many_frames_queued=*/false),
+            0);
 }
 
 TEST(ReceiverTimingTest, MaxWaitingTimeZeroDelayPacingExperiment) {
@@ -168,27 +178,38 @@ TEST(ReceiverTimingTest, MaxWaitingTimeZeroDelayPacingExperiment) {
   for (int i = 0; i < 10; ++i) {
     clock.AdvanceTimeMilliseconds(kTimeDeltaMs);
     int64_t now_ms = clock.TimeInMilliseconds();
-    EXPECT_EQ(timing.MaxWaitingTime(kZeroRenderTimeMs, now_ms), 0);
+    EXPECT_EQ(timing.MaxWaitingTime(kZeroRenderTimeMs, now_ms,
+                                    /*too_many_frames_queued=*/false),
+              0);
     timing.SetLastDecodeScheduledTimestamp(now_ms);
   }
   // Another frame submitted at the same time is paced according to the field
   // trial setting.
   int64_t now_ms = clock.TimeInMilliseconds();
-  EXPECT_EQ(timing.MaxWaitingTime(kZeroRenderTimeMs, now_ms), kMinPacingMs);
+  EXPECT_EQ(timing.MaxWaitingTime(kZeroRenderTimeMs, now_ms,
+                                  /*too_many_frames_queued=*/false),
+            kMinPacingMs);
   // If there's a burst of frames, the wait time is calculated based on next
   // decode time.
-  EXPECT_EQ(timing.MaxWaitingTime(kZeroRenderTimeMs, now_ms), kMinPacingMs);
-  EXPECT_EQ(timing.MaxWaitingTime(kZeroRenderTimeMs, now_ms), kMinPacingMs);
+  EXPECT_EQ(timing.MaxWaitingTime(kZeroRenderTimeMs, now_ms,
+                                  /*too_many_frames_queued=*/false),
+            kMinPacingMs);
+  EXPECT_EQ(timing.MaxWaitingTime(kZeroRenderTimeMs, now_ms,
+                                  /*too_many_frames_queued=*/false),
+            kMinPacingMs);
   // Allow a few ms to pass, this should be subtracted from the MaxWaitingTime.
   constexpr int64_t kTwoMs = 2;
   clock.AdvanceTimeMilliseconds(kTwoMs);
   now_ms = clock.TimeInMilliseconds();
-  EXPECT_EQ(timing.MaxWaitingTime(kZeroRenderTimeMs, now_ms),
+  EXPECT_EQ(timing.MaxWaitingTime(kZeroRenderTimeMs, now_ms,
+                                  /*too_many_frames_queued=*/false),
             kMinPacingMs - kTwoMs);
   // A frame is decoded at the current time, the wait time should be restored to
   // pacing delay.
   timing.SetLastDecodeScheduledTimestamp(now_ms);
-  EXPECT_EQ(timing.MaxWaitingTime(kZeroRenderTimeMs, now_ms), kMinPacingMs);
+  EXPECT_EQ(timing.MaxWaitingTime(kZeroRenderTimeMs, now_ms,
+                                  /*too_many_frames_queued=*/false),
+            kMinPacingMs);
 }
 
 TEST(ReceiverTimingTest, DefaultMaxWaitingTimeUnaffectedByPacingExperiment) {
@@ -206,16 +227,56 @@ TEST(ReceiverTimingTest, DefaultMaxWaitingTimeUnaffectedByPacingExperiment) {
   int64_t render_time_ms = now_ms + 30;
   // Estimate the internal processing delay from the first frame.
   int64_t estimated_processing_delay =
-      (render_time_ms - now_ms) - timing.MaxWaitingTime(render_time_ms, now_ms);
+      (render_time_ms - now_ms) -
+      timing.MaxWaitingTime(render_time_ms, now_ms,
+                            /*too_many_frames_queued=*/false);
   EXPECT_GT(estimated_processing_delay, 0);
 
   // Any other frame submitted at the same time should be scheduled according to
   // its render time.
   for (int i = 0; i < 5; ++i) {
     render_time_ms += kTimeDeltaMs;
-    EXPECT_EQ(timing.MaxWaitingTime(render_time_ms, now_ms),
+    EXPECT_EQ(timing.MaxWaitingTime(render_time_ms, now_ms,
+                                    /*too_many_frames_queued=*/false),
               render_time_ms - now_ms - estimated_processing_delay);
   }
 }
 
+TEST(ReceiverTiminTest, MaxWaitingTimeReturnsZeroIfTooManyFramesQueuedIsTrue) {
+  // The minimum pacing is enabled by a field trial and active if the RTP
+  // playout delay header extension is set to min==0.
+  constexpr int64_t kMinPacingMs = 3;
+  test::ScopedFieldTrials override_field_trials(
+      "WebRTC-ZeroPlayoutDelay/min_pacing:3ms/");
+  constexpr int64_t kStartTimeUs = 3.15e13;  // About one year in us.
+  constexpr int64_t kTimeDeltaMs = 1000.0 / 60.0;
+  constexpr int64_t kZeroRenderTimeMs = 0;
+  SimulatedClock clock(kStartTimeUs);
+  VCMTiming timing(&clock);
+  timing.Reset();
+  // MaxWaitingTime() returns zero for evenly spaced video frames.
+  for (int i = 0; i < 10; ++i) {
+    clock.AdvanceTimeMilliseconds(kTimeDeltaMs);
+    int64_t now_ms = clock.TimeInMilliseconds();
+    EXPECT_EQ(timing.MaxWaitingTime(kZeroRenderTimeMs, now_ms,
+                                    /*too_many_frames_queued=*/false),
+              0);
+    timing.SetLastDecodeScheduledTimestamp(now_ms);
+  }
+  // Another frame submitted at the same time is paced according to the field
+  // trial setting.
+  int64_t now_ms = clock.TimeInMilliseconds();
+  EXPECT_EQ(timing.MaxWaitingTime(kZeroRenderTimeMs, now_ms,
+                                  /*too_many_frames_queued=*/false),
+            kMinPacingMs);
+  // MaxWaitingTime returns 0 even if there's a burst of frames if
+  // too_many_frames_queued is set to true.
+  EXPECT_EQ(timing.MaxWaitingTime(kZeroRenderTimeMs, now_ms,
+                                  /*too_many_frames_queued=*/true),
+            0);
+  EXPECT_EQ(timing.MaxWaitingTime(kZeroRenderTimeMs, now_ms,
+                                  /*too_many_frames_queued=*/true),
+            0);
+}
+
 }  // namespace webrtc