From 61dd5614eca0fbe390aeebae89690873d0360a9a Mon Sep 17 00:00:00 2001
From: David Benjamin <davidben@google.com>
Date: Wed, 1 Feb 2023 12:21:56 -0500
Subject: [PATCH] Clear various false positives in RSA constant-time validation

This silences a few false positives in the valgrind-based constant-time
validation.

First, there are a few precondition checks that are publicly true, but
valgrind doesn't know that. I've added a constant_time_declassify_int
function and stuck those in there, since the existing macro is mostly
suited for macros. It also adds a value barrier in production code (see
comment for why). If we more thoroughly decoupled RSA from BIGNUM, we
could probably avoid this, since a lot of comes from going through
public BIGNUM APIs.

Next, our BIGNUM strategy is such that bounds on bignums are sometimes
computed pessimally, and then clamped down later. Modular arithmetic is
trivially bounded and avoids that, but RSA CRT involves some non-modular
computations. As a result, we actually compute a few more words than
necessary in the RSA result, and then bn_resize_words down.
bn_resize_words also has a precondition check, which checks that all
discarded words are zero. They are, but valgrind does not know that.

Similarly, the BN_bn2bin_padded call at the end checks for discarded
non-zero bytes, but valgrind does not know that, because the output is
bounded by n, the discarded bytes are zero.

I've added a bn_assert_fits_in_bytes to clear this. It's an assert in
debug mode and a declassification in constant-time validation.

I suspect a different secret integer design would avoid needing this. I
think this comes from a combination of non-modular arithmetic, not
having callers pass explicit width, and tracking public widths at the
word granularity, rather than byte or bit. (Bit would actually be most
ideal.) Maybe worth a ponder sometime.

Change-Id: I1bc9443d571d2881e2d857c70be913074deac156
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/56825
Commit-Queue: Bob Beck <bbe@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: Bob Beck <bbe@google.com>
(cherry picked from commit aa83c12069f3d62704fce3d499b068b5bf1b6e31)
---
 crypto/fipsmodule/bn/bytes.c          | 12 ++++++++++++
 crypto/fipsmodule/bn/exponentiation.c |  3 ++-
 crypto/fipsmodule/bn/internal.h       | 12 +++++++++---
 crypto/fipsmodule/rsa/rsa_impl.c      | 26 ++++++++++++++++++--------
 crypto/internal.h                     | 26 ++++++++++++++++++++++----
 5 files changed, 63 insertions(+), 16 deletions(-)

diff --git a/crypto/fipsmodule/bn/bytes.c b/crypto/fipsmodule/bn/bytes.c
index 39083b775b..97b0d3f958 100644
--- a/crypto/fipsmodule/bn/bytes.c
+++ b/crypto/fipsmodule/bn/bytes.c
@@ -205,6 +205,18 @@ static int fits_in_bytes(const BN_ULONG *words, size_t num_words,
   return mask == 0;
 }
 
+void bn_assert_fits_in_bytes(const BIGNUM *bn, size_t num) {
+  const uint8_t *bytes = (const uint8_t *)bn->d;
+  size_t tot_bytes = bn->width * sizeof(BN_ULONG);
+  if (tot_bytes > num) {
+    CONSTTIME_DECLASSIFY(bytes + num, tot_bytes - num);
+    for (size_t i = num; i < tot_bytes; i++) {
+      assert(bytes[i] == 0);
+    }
+    (void)bytes;
+  }
+}
+
 void bn_words_to_big_endian(uint8_t *out, size_t out_len, const BN_ULONG *in,
                             size_t in_len) {
   // The caller should have selected an output length without truncation.
diff --git a/crypto/fipsmodule/bn/exponentiation.c b/crypto/fipsmodule/bn/exponentiation.c
index f430f45d2f..da4152e4cd 100644
--- a/crypto/fipsmodule/bn/exponentiation.c
+++ b/crypto/fipsmodule/bn/exponentiation.c
@@ -640,7 +640,8 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
     OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
     return 0;
   }
-  if (a->neg || BN_ucmp(a, m) >= 0) {
+  // |a| is secret, but |a < m| is not.
+  if (a->neg || constant_time_declassify_int(BN_ucmp(a, m)) >= 0) {
     OPENSSL_PUT_ERROR(BN, BN_R_INPUT_NOT_REDUCED);
     return 0;
   }
diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h
index 30437deafe..417a3eac6c 100644
--- a/crypto/fipsmodule/bn/internal.h
+++ b/crypto/fipsmodule/bn/internal.h
@@ -221,8 +221,8 @@ extern "C" {
 #define BN_GENCB_NEW_STYLE 1
 #define BN_GENCB_OLD_STYLE 2
 
-// bn_minimal_width returns the minimal value of |bn->top| which fits the
-// value of |bn|.
+// bn_minimal_width returns the minimal number of words needed to represent
+// |bn|.
 int bn_minimal_width(const BIGNUM *bn);
 
 // bn_set_minimal_width sets |bn->width| to |bn_minimal_width(bn)|. If |bn| is
@@ -238,7 +238,7 @@ int bn_wexpand(BIGNUM *bn, size_t words);
 // than a number of words.
 int bn_expand(BIGNUM *bn, size_t bits);
 
-// bn_resize_words adjusts |bn->top| to be |words|. It returns one on success
+// bn_resize_words adjusts |bn->width| to be |words|. It returns one on success
 // and zero on allocation error or if |bn|'s value is too large.
 OPENSSL_EXPORT int bn_resize_words(BIGNUM *bn, size_t words);
 
@@ -267,6 +267,12 @@ int bn_fits_in_words(const BIGNUM *bn, size_t num);
 // is representable in |num| words. Otherwise, it returns zero.
 int bn_copy_words(BN_ULONG *out, size_t num, const BIGNUM *bn);
 
+// bn_assert_fits_in_bytes asserts that |bn| fits in |num| bytes. This is a
+// no-op in release builds, but triggers an assert in debug builds, and
+// declassifies all bytes which are therefore known to be zero in constant-time
+// validation.
+void bn_assert_fits_in_bytes(const BIGNUM *bn, size_t num);
+
 // bn_mul_add_words multiples |ap| by |w|, adds the result to |rp|, and places
 // the result in |rp|. |ap| and |rp| must both be |num| words long. It returns
 // the carry word of the operation. |ap| and |rp| may be equal but otherwise may
diff --git a/crypto/fipsmodule/rsa/rsa_impl.c b/crypto/fipsmodule/rsa/rsa_impl.c
index 29529d3efd..79466f16f4 100644
--- a/crypto/fipsmodule/rsa/rsa_impl.c
+++ b/crypto/fipsmodule/rsa/rsa_impl.c
@@ -763,6 +763,8 @@ int rsa_default_private_transform(RSA *rsa, uint8_t *out, const uint8_t *in,
     goto err;
   }
 
+  // The caller should have ensured this.
+  assert(len == BN_num_bytes(rsa->n));
   if (BN_bin2bn(in, len, f) == NULL) {
     goto err;
   }
@@ -824,16 +826,16 @@ int rsa_default_private_transform(RSA *rsa, uint8_t *out, const uint8_t *in,
   // works when the CRT isn't used. That attack is much less likely to succeed
   // than the CRT attack, but there have likely been improvements since 1997.
   //
-  // This check is cheap assuming |e| is small; it almost always is.
+  // This check is cheap assuming |e| is small, which we require in
+  // |rsa_check_public_key|.
   if (rsa->e != NULL) {
     BIGNUM *vrfy = BN_CTX_get(ctx);
     if (vrfy == NULL ||
         !BN_mod_exp_mont(vrfy, result, rsa->e, rsa->n, ctx, rsa->mont_n) ||
-        !BN_equal_consttime(vrfy, f)) {
+        !constant_time_declassify_int(BN_equal_consttime(vrfy, f))) {
       OPENSSL_PUT_ERROR(RSA, ERR_R_INTERNAL_ERROR);
       goto err;
     }
-
   }
 
   if (do_blinding &&
@@ -847,6 +849,7 @@ int rsa_default_private_transform(RSA *rsa, uint8_t *out, const uint8_t *in,
   //
   // See Falko Strenzke, "Manger's Attack revisited", ICICS 2010.
   assert(result->width == rsa->mont_n->N.width);
+  bn_assert_fits_in_bytes(result, len);
   if (!BN_bn2bin_padded(out, len, result)) {
     OPENSSL_PUT_ERROR(RSA, ERR_R_INTERNAL_ERROR);
     goto err;
@@ -965,11 +968,18 @@ static int mod_exp(BIGNUM *r0, const BIGNUM *I, RSA *rsa, BN_CTX *ctx) {
       // so it is correct mod q. Finally, the result is bounded by [m1, n + m1),
       // and the result is at least |m1|, so this must be the unique answer in
       // [0, n).
-      !bn_mul_consttime(r0, r0, q, ctx) ||
-      !bn_uadd_consttime(r0, r0, m1) ||
-      // The result should be bounded by |n|, but fixed-width operations may
-      // bound the width slightly higher, so fix it.
-      !bn_resize_words(r0, n->width)) {
+      !bn_mul_consttime(r0, r0, q, ctx) ||  //
+      !bn_uadd_consttime(r0, r0, m1)) {
+    goto err;
+  }
+
+  // The result should be bounded by |n|, but fixed-width operations may
+  // bound the width slightly higher, so fix it. This trips constant-time checks
+  // because a naive data flow analysis does not realize the excess words are
+  // publicly zero.
+  assert(BN_cmp(r0, n) < 0);
+  bn_assert_fits_in_bytes(r0, BN_num_bytes(n));
+  if (!bn_resize_words(r0, n->width)) {
     goto err;
   }
 
diff --git a/crypto/internal.h b/crypto/internal.h
index a0b958d5a6..5f0576fcb6 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -445,20 +445,38 @@ static inline int constant_time_select_int(crypto_word_t mask, int a, int b) {
 // of memory as secret. Secret data is tracked as it flows to registers and
 // other parts of a memory. If secret data is used as a condition for a branch,
 // or as a memory index, it will trigger warnings in valgrind.
-#define CONSTTIME_SECRET(x, y) VALGRIND_MAKE_MEM_UNDEFINED(x, y)
+#define CONSTTIME_SECRET(ptr, len) VALGRIND_MAKE_MEM_UNDEFINED(ptr, len)
 
 // CONSTTIME_DECLASSIFY takes a pointer and a number of bytes and marks that
 // region of memory as public. Public data is not subject to constant-time
 // rules.
-#define CONSTTIME_DECLASSIFY(x, y) VALGRIND_MAKE_MEM_DEFINED(x, y)
+#define CONSTTIME_DECLASSIFY(ptr, len) VALGRIND_MAKE_MEM_DEFINED(ptr, len)
 
 #else
 
-#define CONSTTIME_SECRET(x, y)
-#define CONSTTIME_DECLASSIFY(x, y)
+#define CONSTTIME_SECRET(ptr, len)
+#define CONSTTIME_DECLASSIFY(ptr, len)
 
 #endif  // BORINGSSL_CONSTANT_TIME_VALIDATION
 
+static inline int constant_time_declassify_int(int v) {
+  // Return |v| through a value barrier to be safe. Valgrind-based constant-time
+  // validation is partly to check the compiler has not undone any constant-time
+  // work. Any place |BORINGSSL_CONSTANT_TIME_VALIDATION| influences
+  // optimizations, this validation is inaccurate.
+  //
+  // However, by sending pointers through valgrind, we likely inhibit escape
+  // analysis. On local variables, particularly booleans, we likely
+  // significantly impact optimizations.
+  //
+  // Thus, to be safe, stick a value barrier, in hopes of comparably inhibiting
+  // compiler analysis.
+  static_assert(sizeof(uint32_t) == sizeof(int),
+                "int is not the same size as uint32_t");
+  CONSTTIME_DECLASSIFY(&v, sizeof(v));
+  return value_barrier_u32(v);
+}
+
 
 // Thread-safe initialisation.