andoalon
diff --git a/‎crypto/fipsmodule/bn/bytes.c
+76-60 b/‎crypto/fipsmodule/bn/bytes.c
+76-60
diff --git a/‎crypto/fipsmodule/bn/internal.h
+19 b/‎crypto/fipsmodule/bn/internal.h
+19
diff --git a/‎crypto/fipsmodule/ec/ec.c
+11-17 b/‎crypto/fipsmodule/ec/ec.c
+11-17
diff --git a/‎crypto/fipsmodule/ec/internal.h
+2-6 b/‎crypto/fipsmodule/ec/internal.h
+2-6
@@ -61,32 +61,48 @@
 
 #include "internal.h"
 
+void bn_big_endian_to_words(BN_ULONG *out, size_t out_len, const uint8_t *in,
+                            size_t in_len) {
+  for (size_t i = 0; i < out_len; i++) {
+    if (in_len < sizeof(BN_ULONG)) {
+      // Load the last partial word.
+      BN_ULONG word = 0;
+      for (size_t j = 0; j < in_len; j++) {
+        word = (word << 8) | in[j];
+      }
+      in_len = 0;
+      out[i] = word;
+      // Fill the remainder with zeros.
+      OPENSSL_memset(out + i + 1, 0, (out_len - i - 1) * sizeof(BN_ULONG));
+      break;
+    }
 
-BIGNUM *BN_bin2bn(const uint8_t *in, size_t len, BIGNUM *ret) {
-  size_t num_words;
-  unsigned m;
-  BN_ULONG word = 0;
-  BIGNUM *bn = NULL;
-
-  if (ret == NULL) {
-    ret = bn = BN_new();
+    in_len -= sizeof(BN_ULONG);
+    out[i] = CRYPTO_load_word_be(in + in_len);
   }
 
+  // The caller should have sized the output to avoid truncation.
+  assert(in_len == 0);
+}
+
+BIGNUM *BN_bin2bn(const uint8_t *in, size_t len, BIGNUM *ret) {
+  BIGNUM *bn = NULL;
   if (ret == NULL) {
-    return NULL;
+    bn = BN_new();
+    if (bn == NULL) {
+      return NULL;
+    }
+    ret = bn;
   }
 
   if (len == 0) {
     ret->width = 0;
     return ret;
   }
 
-  num_words = ((len - 1) / BN_BYTES) + 1;
-  m = (len - 1) % BN_BYTES;
+  size_t num_words = ((len - 1) / BN_BYTES) + 1;
   if (!bn_wexpand(ret, num_words)) {
-    if (bn) {
-      BN_free(bn);
-    }
+    BN_free(bn);
     return NULL;
   }
 
@@ -96,29 +112,20 @@ BIGNUM *BN_bin2bn(const uint8_t *in, size_t len, BIGNUM *ret) {
   ret->width = (int)num_words;
   ret->neg = 0;
 
-  while (len--) {
-    word = (word << 8) | *(in++);
-    if (m-- == 0) {
-      ret->d[--num_words] = word;
-      word = 0;
-      m = BN_BYTES - 1;
-    }
-  }
-
+  bn_big_endian_to_words(ret->d, ret->width, in, len);
   return ret;
 }
 
 BIGNUM *BN_le2bn(const uint8_t *in, size_t len, BIGNUM *ret) {
   BIGNUM *bn = NULL;
   if (ret == NULL) {
     bn = BN_new();
+    if (bn == NULL) {
+      return NULL;
+    }
     ret = bn;
   }
 
-  if (ret == NULL) {
-    return NULL;
-  }
-
   if (len == 0) {
     ret->width = 0;
     ret->neg = 0;
@@ -142,61 +149,70 @@ BIGNUM *BN_le2bn(const uint8_t *in, size_t len, BIGNUM *ret) {
   return ret;
 }
 
-size_t BN_bn2bin(const BIGNUM *in, uint8_t *out) {
-  size_t n, i;
-  BN_ULONG l;
-
-  n = i = BN_num_bytes(in);
-  while (i--) {
-    l = in->d[i / BN_BYTES];
-    *(out++) = (unsigned char)(l >> (8 * (i % BN_BYTES))) & 0xff;
-  }
-  return n;
-}
-
-static int fits_in_bytes(const uint8_t *bytes, size_t num_bytes, size_t len) {
+// fits_in_bytes returns one if the |num_words| words in |words| can be
+// represented in |num_bytes| bytes.
+static int fits_in_bytes(const BN_ULONG *words, size_t num_words,
+                         size_t num_bytes) {
+  const uint8_t *bytes = (const uint8_t *)words;
+  size_t tot_bytes = num_words * sizeof(BN_ULONG);
   uint8_t mask = 0;
-  for (size_t i = len; i < num_bytes; i++) {
+  for (size_t i = num_bytes; i < tot_bytes; i++) {
     mask |= bytes[i];
   }
   return mask == 0;
 }
 
+void bn_words_to_big_endian(uint8_t *out, size_t out_len, const BN_ULONG *in,
+                            size_t in_len) {
+  // The caller should have selected an output length without truncation.
+  assert(fits_in_bytes(in, in_len, out_len));
+
+  // We only support little-endian platforms, so the internal representation is
+  // also little-endian as bytes. We can simply copy it in reverse.
+  const uint8_t *bytes = (const uint8_t *)in;
+  size_t num_bytes = in_len * sizeof(BN_ULONG);
+  if (out_len < num_bytes) {
+    num_bytes = out_len;
+  }
+
+  for (size_t i = 0; i < num_bytes; i++) {
+    out[out_len - i - 1] = bytes[i];
+  }
+  // Pad out the rest of the buffer with zeroes.
+  OPENSSL_memset(out, 0, out_len - num_bytes);
+}
+
+size_t BN_bn2bin(const BIGNUM *in, uint8_t *out) {
+  size_t n = BN_num_bytes(in);
+  bn_words_to_big_endian(out, n, in->d, in->width);
+  return n;
+}
+
 int BN_bn2le_padded(uint8_t *out, size_t len, const BIGNUM *in) {
+  if (!fits_in_bytes(in->d, in->width, len)) {
+    return 0;
+  }
+
+  // We only support little-endian platforms, so we can simply memcpy into the
+  // internal representation.
   const uint8_t *bytes = (const uint8_t *)in->d;
   size_t num_bytes = in->width * BN_BYTES;
   if (len < num_bytes) {
-    if (!fits_in_bytes(bytes, num_bytes, len)) {
-      return 0;
-    }
     num_bytes = len;
   }
 
-  // We only support little-endian platforms, so we can simply memcpy into the
-  // internal representation.
   OPENSSL_memcpy(out, bytes, num_bytes);
   // Pad out the rest of the buffer with zeroes.
   OPENSSL_memset(out + num_bytes, 0, len - num_bytes);
   return 1;
 }
 
 int BN_bn2bin_padded(uint8_t *out, size_t len, const BIGNUM *in) {
-  const uint8_t *bytes = (const uint8_t *)in->d;
-  size_t num_bytes = in->width * BN_BYTES;
-  if (len < num_bytes) {
-    if (!fits_in_bytes(bytes, num_bytes, len)) {
-      return 0;
-    }
-    num_bytes = len;
+  if (!fits_in_bytes(in->d, in->width, len)) {
+    return 0;
   }
 
-  // We only support little-endian platforms, so we can simply write the buffer
-  // in reverse.
-  for (size_t i = 0; i < num_bytes; i++) {
-    out[len - i - 1] = bytes[i];
-  }
-  // Pad out the rest of the buffer with zeroes.
-  OPENSSL_memset(out, 0, len - num_bytes);
+  bn_words_to_big_endian(out, len, in->d, in->width);
   return 1;
 }
 
 
@@ -708,6 +708,25 @@ void bn_mod_inverse0_prime_mont_small(BN_ULONG *r, const BN_ULONG *a,
                                       size_t num, const BN_MONT_CTX *mont);
 
 
+// Word-based byte conversion functions.
+
+// bn_big_endian_to_words interprets |in_len| bytes from |in| as a big-endian,
+// unsigned integer and writes the result to |out_len| words in |out|. |out_len|
+// must be large enough to represent any |in_len|-byte value. That is, |out_len|
+// must be at least |BN_BYTES * in_len|.
+void bn_big_endian_to_words(BN_ULONG *out, size_t out_len, const uint8_t *in,
+                            size_t in_len);
+
+// bn_words_to_big_endian represents |in_len| words from |in| as a big-endian,
+// unsigned integer in |out_len| bytes. It writes the result to |out|. |out_len|
+// must be large enough to represent |in| without truncation.
+//
+// Note |out_len| may be less than |BN_BYTES * in_len| if |in| is known to have
+// leading zeros.
+void bn_words_to_big_endian(uint8_t *out, size_t out_len, const BN_ULONG *in,
+                            size_t in_len);
+
+
 #if defined(__cplusplus)
 }  // extern C
 #endif
 
@@ -1175,15 +1175,12 @@ int ec_get_x_coordinate_as_scalar(const EC_GROUP *group, EC_SCALAR *out,
     return 0;
   }
 
-  // For simplicity, in case of width mismatches between |group->field| and
-  // |group->order|, zero any untouched words in |out|.
-  OPENSSL_memset(out, 0, sizeof(EC_SCALAR));
-  for (size_t i = 0; i < len; i++) {
-    out->bytes[len - i - 1] = bytes[i];
-  }
-
-  // We must have p < 2×order, assuming p is not tiny (p >= 17). Thus rather we
-  // can reduce by performing at most one subtraction.
+  // The x-coordinate is bounded by p, but we need a scalar, bounded by the
+  // order. These may not have the same size. However, we must have p < 2×order,
+  // assuming p is not tiny (p >= 17).
+  //
+  // Thus |bytes| will fit in |order.width + 1| words, and we can reduce by
+  // performing at most one subtraction.
   //
   // Proof: We only work with prime order curves, so the number of points on
   // the curve is the order. Thus Hasse's theorem gives:
@@ -1197,14 +1194,11 @@ int ec_get_x_coordinate_as_scalar(const EC_GROUP *group, EC_SCALAR *out,
   //
   // Additionally, one can manually check this property for built-in curves. It
   // is enforced for legacy custom curves in |EC_GROUP_set_generator|.
-
-  // The above does not guarantee |group->field| is not one word larger than
-  // |group->order|, so read one extra carry word.
-  BN_ULONG tmp[EC_MAX_WORDS];
-  BN_ULONG carry =
-      group->order.width < EC_MAX_WORDS ? out->words[group->order.width] : 0;
-  bn_reduce_once_in_place(out->words, carry, group->order.d, tmp,
-                          group->order.width);
+  const BIGNUM *order = &group->order;
+  BN_ULONG words[EC_MAX_WORDS + 1];
+  bn_big_endian_to_words(words, order->width + 1, bytes, len);
+  bn_reduce_once(out->words, words, /*carry=*/words[order->width], order->d,
+                 order->width);
   return 1;
 }
 
 
@@ -100,9 +100,7 @@ OPENSSL_STATIC_ASSERT(EC_MAX_WORDS <= BN_SMALL_MAX_WORDS,
 // An EC_SCALAR is an integer fully reduced modulo the order. Only the first
 // |order->width| words are used. An |EC_SCALAR| is specific to an |EC_GROUP|
 // and must not be mixed between groups.
-typedef union {
-  // bytes is the representation of the scalar in little-endian order.
-  uint8_t bytes[EC_MAX_BYTES];
+typedef struct {
   BN_ULONG words[EC_MAX_WORDS];
 } EC_SCALAR;
 
@@ -192,9 +190,7 @@ void ec_scalar_select(const EC_GROUP *group, EC_SCALAR *out, BN_ULONG mask,
 // are used. An |EC_FELEM| is specific to an |EC_GROUP| and must not be mixed
 // between groups. Additionally, the representation (whether or not elements are
 // represented in Montgomery-form) may vary between |EC_METHOD|s.
-typedef union {
-  // bytes is the representation of the field element in little-endian order.
-  uint8_t bytes[EC_MAX_BYTES];
+typedef struct {
   BN_ULONG words[EC_MAX_WORDS];
 } EC_FELEM;