30 #include "../bn/internal.h"
31 #include "../delocate.h"
32 #include "../../internal.h"
37 #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && \
38 !defined(OPENSSL_SMALL)
40 typedef P256_POINT_AFFINE PRECOMP256_ROW[64];
43 static const BN_ULONG ONE[P256_LIMBS] = {
44 TOBN(0x00000000, 0x00000001), TOBN(0xffffffff, 0x00000000),
45 TOBN(0xffffffff, 0xffffffff), TOBN(0x00000000, 0xfffffffe),
53 static crypto_word_t booth_recode_w5(crypto_word_t
in) {
57 d = (1 << 6) -
in - 1;
58 d = (
d &
s) | (
in & ~s);
59 d = (
d >> 1) + (
d & 1);
61 return (
d << 1) + (
s & 1);
64 static crypto_word_t booth_recode_w7(crypto_word_t
in) {
68 d = (1 << 8) -
in - 1;
69 d = (
d &
s) | (
in & ~s);
70 d = (
d >> 1) + (
d & 1);
72 return (
d << 1) + (
s & 1);
80 static void copy_conditional(BN_ULONG
dst[P256_LIMBS],
81 const BN_ULONG src[P256_LIMBS], BN_ULONG
move) {
82 BN_ULONG mask1 = ((BN_ULONG)0) -
move;
83 BN_ULONG mask2 = ~mask1;
85 dst[0] = (src[0] & mask1) ^ (
dst[0] & mask2);
86 dst[1] = (src[1] & mask1) ^ (
dst[1] & mask2);
87 dst[2] = (src[2] & mask1) ^ (
dst[2] & mask2);
88 dst[3] = (src[3] & mask1) ^ (
dst[3] & mask2);
89 if (P256_LIMBS == 8) {
90 dst[4] = (src[4] & mask1) ^ (
dst[4] & mask2);
91 dst[5] = (src[5] & mask1) ^ (
dst[5] & mask2);
92 dst[6] = (src[6] & mask1) ^ (
dst[6] & mask2);
93 dst[7] = (src[7] & mask1) ^ (
dst[7] & mask2);
114 static BN_ULONG is_not_zero(BN_ULONG
in) {
123 static void ecp_nistz256_mod_inverse_sqr_mont(BN_ULONG
r[P256_LIMBS],
124 const BN_ULONG
in[P256_LIMBS]) {
127 BN_ULONG x2[P256_LIMBS], x3[P256_LIMBS], x6[P256_LIMBS], x12[P256_LIMBS],
128 x15[P256_LIMBS], x30[P256_LIMBS], x32[P256_LIMBS];
136 for (
int i = 1;
i < 3;
i++) {
142 for (
int i = 1;
i < 6;
i++) {
148 for (
int i = 1;
i < 3;
i++) {
154 for (
int i = 1;
i < 15;
i++) {
163 BN_ULONG
ret[P256_LIMBS];
165 for (
int i = 1;
i < 31 + 1;
i++) {
170 for (
int i = 0;
i < 96 + 32;
i++) {
175 for (
int i = 0;
i < 32;
i++) {
180 for (
int i = 0;
i < 30;
i++) {
190 static void ecp_nistz256_windowed_mul(
const EC_GROUP *
group, P256_POINT *
r,
194 assert(p_scalar != NULL);
195 assert(
group->field.width == P256_LIMBS);
197 static const size_t kWindowSize = 5;
198 static const crypto_word_t kMask = (1 << (5 + 1)) - 1;
203 alignas(64) P256_POINT
table[16];
211 P256_POINT *row =
table;
212 assert(
group->field.width == P256_LIMBS);
214 OPENSSL_memcpy(row[1 - 1].Y,
p->Y.words, P256_LIMBS *
sizeof(BN_ULONG));
215 OPENSSL_memcpy(row[1 - 1].Z,
p->Z.words, P256_LIMBS *
sizeof(BN_ULONG));
233 BN_ULONG
tmp[P256_LIMBS];
234 alignas(32) P256_POINT h;
236 crypto_word_t wvalue = p_str[(
index - 1) / 8];
237 wvalue = (wvalue >> ((
index - 1) % 8)) & kMask;
243 size_t off = (
index - 1) / 8;
245 wvalue = (crypto_word_t)p_str[off] | (crypto_word_t)p_str[off + 1] << 8;
246 wvalue = (wvalue >> ((
index - 1) % 8)) & kMask;
248 wvalue = booth_recode_w5(wvalue);
253 copy_conditional(
h.Y,
tmp, (wvalue & 1));
258 index -= kWindowSize;
269 wvalue = (wvalue << 1) & kMask;
271 wvalue = booth_recode_w5(wvalue);
276 copy_conditional(
h.Y,
tmp, wvalue & 1);
284 } p256_point_union_t;
286 static crypto_word_t calc_first_wvalue(
size_t *
index,
const uint8_t p_str[33]) {
287 static const size_t kWindowSize = 7;
288 static const crypto_word_t kMask = (1 << (7 + 1)) - 1;
289 *
index = kWindowSize;
291 crypto_word_t wvalue = (p_str[0] << 1) & kMask;
292 return booth_recode_w7(wvalue);
295 static crypto_word_t calc_wvalue(
size_t *
index,
const uint8_t p_str[33]) {
296 static const size_t kWindowSize = 7;
297 static const crypto_word_t kMask = (1 << (7 + 1)) - 1;
299 const size_t off = (*
index - 1) / 8;
300 crypto_word_t wvalue =
301 (crypto_word_t)p_str[off] | (crypto_word_t)p_str[off + 1] << 8;
302 wvalue = (wvalue >> ((*
index - 1) % 8)) & kMask;
303 *
index += kWindowSize;
305 return booth_recode_w7(wvalue);
311 alignas(32) P256_POINT
out;
314 assert(
group->field.width == P256_LIMBS);
322 alignas(32) p256_point_union_t t,
p;
330 crypto_word_t wvalue = calc_first_wvalue(&
index, p_str);
334 copy_conditional(
p.p.Y,
p.p.Z, wvalue & 1);
340 copy_conditional(
p.p.Z, ONE, is_not_zero(wvalue >> 1));
342 for (
int i = 1;
i < 37;
i++) {
343 wvalue = calc_wvalue(&
index, p_str);
348 copy_conditional(
t.a.Y,
t.p.Z, wvalue & 1);
355 assert(
group->field.width == P256_LIMBS);
361 static void ecp_nistz256_points_mul_public(
const EC_GROUP *
group,
366 assert(
p_ != NULL && p_scalar != NULL && g_scalar != NULL);
368 alignas(32) p256_point_union_t t,
p;
375 size_t wvalue = calc_first_wvalue(&
index, p_str);
380 if ((wvalue >> 1) != 0) {
389 if ((wvalue & 1) == 1) {
393 for (
int i = 1;
i < 37;
i++) {
394 wvalue = calc_wvalue(&
index, p_str);
396 if ((wvalue >> 1) == 0) {
403 if ((wvalue & 1) == 1) {
413 ecp_nistz256_windowed_mul(
group, &
t.p,
p_, p_scalar);
416 assert(
group->field.width == P256_LIMBS);
430 BN_ULONG z_inv2[P256_LIMBS];
431 assert(
group->field.width == P256_LIMBS);
432 ecp_nistz256_mod_inverse_sqr_mont(z_inv2,
point->Z.words);
495 BN_ULONG
table[15][P256_LIMBS];
540 static const struct {
542 } kChain[27] = {{32, i_x32}, {6, i_101111}, {5, i_111}, {4, i_11},
543 {5, i_1111}, {5, i_10101}, {4, i_101}, {3, i_101},
544 {3, i_101}, {5, i_111}, {9, i_101111}, {6, i_1111},
545 {2, i_1}, {5, i_1}, {6, i_1111}, {5, i_111},
546 {4, i_111}, {5, i_111}, {5, i_101}, {3, i_11},
547 {10, i_101111}, {2, i_11}, {5, i_11}, {5, i_11},
548 {3, i_1}, {7, i_10101}, {6, i_1111}};
555 static int ecp_nistz256_scalar_to_montgomery_inv_vartime(
const EC_GROUP *
group,
558 if ((OPENSSL_ia32cap_get()[1] & (1 << 28)) == 0) {
563 assert(
group->order.width == P256_LIMBS);
573 static int ecp_nistz256_cmp_x_coordinate(
const EC_GROUP *
group,
580 assert(
group->order.width == P256_LIMBS);
581 assert(
group->field.width == P256_LIMBS);
586 BN_ULONG r_Z2[P256_LIMBS], Z2_mont[P256_LIMBS],
X[P256_LIMBS];
589 ecp_nistz256_from_mont(
X,
p->X.words);
616 out->point_get_affine_coordinates = ecp_nistz256_get_affine;
617 out->add = ecp_nistz256_add;
618 out->dbl = ecp_nistz256_dbl;
619 out->mul = ecp_nistz256_point_mul;
620 out->mul_base = ecp_nistz256_point_mul_base;
621 out->mul_public = ecp_nistz256_points_mul_public;
626 out->scalar_inv0_montgomery = ecp_nistz256_inv0_mod_ord;
627 out->scalar_to_montgomery_inv_vartime =
628 ecp_nistz256_scalar_to_montgomery_inv_vartime;
629 out->cmp_x_coordinate = ecp_nistz256_cmp_x_coordinate;