rsaz_exp.c
Go to the documentation of this file.
1 /*
2  * Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
3  * Copyright (c) 2012, Intel Corporation. All Rights Reserved.
4  *
5  * Licensed under the OpenSSL license (the "License"). You may not use
6  * this file except in compliance with the License. You can obtain a copy
7  * in the file LICENSE in the source distribution or at
8  * https://www.openssl.org/source/license.html
9  *
10  * Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1)
11  * (1) Intel Corporation, Israel Development Center, Haifa, Israel
12  * (2) University of Haifa, Israel
13  */
14 
15 #include "rsaz_exp.h"
16 
17 #if defined(RSAZ_ENABLED)
18 
19 #include <openssl/mem.h>
20 
21 #include "internal.h"
22 #include "../../internal.h"
23 
24 
25 // one is 1 in RSAZ's representation.
26 alignas(64) static const BN_ULONG one[40] = {
27  1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
28  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
29 // two80 is 2^80 in RSAZ's representation. Note RSAZ uses base 2^29, so this is
30 // 2^(29*2 + 22) = 2^80, not 2^(64*2 + 22).
31 alignas(64) static const BN_ULONG two80[40] = {
32  0, 0, 1 << 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
33  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
34 
35 void RSAZ_1024_mod_exp_avx2(BN_ULONG result_norm[16],
36  const BN_ULONG base_norm[16],
37  const BN_ULONG exponent[16],
38  const BN_ULONG m_norm[16], const BN_ULONG RR[16],
39  BN_ULONG k0,
42  "MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH is too small");
43  assert((uintptr_t)storage % 64 == 0);
44 
45  BN_ULONG *a_inv, *m, *result, *table_s = storage + 40 * 3, *R2 = table_s;
46  // Note |R2| aliases |table_s|.
47  if (((((uintptr_t)storage & 4095) + 320) >> 12) != 0) {
48  result = storage;
49  a_inv = storage + 40;
50  m = storage + 40 * 2; // should not cross page
51  } else {
52  m = storage; // should not cross page
53  result = storage + 40;
54  a_inv = storage + 40 * 2;
55  }
56 
57  rsaz_1024_norm2red_avx2(m, m_norm);
58  rsaz_1024_norm2red_avx2(a_inv, base_norm);
60 
61  // Convert |R2| from the usual radix, giving R = 2^1024, to RSAZ's radix,
62  // giving R = 2^(36*29) = 2^1044.
64  // R2 = 2^2048 * 2^2048 / 2^1044 = 2^3052
65  rsaz_1024_mul_avx2(R2, R2, two80, m, k0);
66  // R2 = 2^3052 * 2^80 / 2^1044 = 2^2088 = (2^1044)^2
67 
68  // table[0] = 1
69  rsaz_1024_mul_avx2(result, R2, one, m, k0);
70  // table[1] = a_inv^1
71  rsaz_1024_mul_avx2(a_inv, a_inv, R2, m, k0);
72 
73  rsaz_1024_scatter5_avx2(table_s, result, 0);
74  rsaz_1024_scatter5_avx2(table_s, a_inv, 1);
75 
76  // table[2] = a_inv^2
77  rsaz_1024_sqr_avx2(result, a_inv, m, k0, 1);
78  rsaz_1024_scatter5_avx2(table_s, result, 2);
79 #if 0
80  // This is almost 2x smaller and less than 1% slower.
81  for (int index = 3; index < 32; index++) {
82  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
84  }
85 #else
86  // table[4] = a_inv^4
88  rsaz_1024_scatter5_avx2(table_s, result, 4);
89  // table[8] = a_inv^8
91  rsaz_1024_scatter5_avx2(table_s, result, 8);
92  // table[16] = a_inv^16
94  rsaz_1024_scatter5_avx2(table_s, result, 16);
95  // table[17] = a_inv^17
96  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
97  rsaz_1024_scatter5_avx2(table_s, result, 17);
98 
99  // table[3]
100  rsaz_1024_gather5_avx2(result, table_s, 2);
101  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
102  rsaz_1024_scatter5_avx2(table_s, result, 3);
103  // table[6]
105  rsaz_1024_scatter5_avx2(table_s, result, 6);
106  // table[12]
108  rsaz_1024_scatter5_avx2(table_s, result, 12);
109  // table[24]
111  rsaz_1024_scatter5_avx2(table_s, result, 24);
112  // table[25]
113  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
114  rsaz_1024_scatter5_avx2(table_s, result, 25);
115 
116  // table[5]
117  rsaz_1024_gather5_avx2(result, table_s, 4);
118  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
119  rsaz_1024_scatter5_avx2(table_s, result, 5);
120  // table[10]
122  rsaz_1024_scatter5_avx2(table_s, result, 10);
123  // table[20]
125  rsaz_1024_scatter5_avx2(table_s, result, 20);
126  // table[21]
127  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
128  rsaz_1024_scatter5_avx2(table_s, result, 21);
129 
130  // table[7]
131  rsaz_1024_gather5_avx2(result, table_s, 6);
132  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
133  rsaz_1024_scatter5_avx2(table_s, result, 7);
134  // table[14]
136  rsaz_1024_scatter5_avx2(table_s, result, 14);
137  // table[28]
139  rsaz_1024_scatter5_avx2(table_s, result, 28);
140  // table[29]
141  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
142  rsaz_1024_scatter5_avx2(table_s, result, 29);
143 
144  // table[9]
145  rsaz_1024_gather5_avx2(result, table_s, 8);
146  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
147  rsaz_1024_scatter5_avx2(table_s, result, 9);
148  // table[18]
150  rsaz_1024_scatter5_avx2(table_s, result, 18);
151  // table[19]
152  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
153  rsaz_1024_scatter5_avx2(table_s, result, 19);
154 
155  // table[11]
156  rsaz_1024_gather5_avx2(result, table_s, 10);
157  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
158  rsaz_1024_scatter5_avx2(table_s, result, 11);
159  // table[22]
161  rsaz_1024_scatter5_avx2(table_s, result, 22);
162  // table[23]
163  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
164  rsaz_1024_scatter5_avx2(table_s, result, 23);
165 
166  // table[13]
167  rsaz_1024_gather5_avx2(result, table_s, 12);
168  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
169  rsaz_1024_scatter5_avx2(table_s, result, 13);
170  // table[26]
172  rsaz_1024_scatter5_avx2(table_s, result, 26);
173  // table[27]
174  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
175  rsaz_1024_scatter5_avx2(table_s, result, 27);
176 
177  // table[15]
178  rsaz_1024_gather5_avx2(result, table_s, 14);
179  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
180  rsaz_1024_scatter5_avx2(table_s, result, 15);
181  // table[30]
183  rsaz_1024_scatter5_avx2(table_s, result, 30);
184  // table[31]
185  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
186  rsaz_1024_scatter5_avx2(table_s, result, 31);
187 #endif
188 
189  const uint8_t *p_str = (const uint8_t *)exponent;
190 
191  // load first window
192  int wvalue = p_str[127] >> 3;
193  rsaz_1024_gather5_avx2(result, table_s, wvalue);
194 
195  int index = 1014;
196  while (index > -1) { // Loop for the remaining 127 windows.
197 
199 
200  uint16_t wvalue_16;
201  memcpy(&wvalue_16, &p_str[index / 8], sizeof(wvalue_16));
202  wvalue = wvalue_16;
203  wvalue = (wvalue >> (index % 8)) & 31;
204  index -= 5;
205 
206  rsaz_1024_gather5_avx2(a_inv, table_s, wvalue); // Borrow |a_inv|.
207  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
208  }
209 
210  // Square four times.
212 
213  wvalue = p_str[0] & 15;
214 
215  rsaz_1024_gather5_avx2(a_inv, table_s, wvalue); // Borrow |a_inv|.
216  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
217 
218  // Convert from Montgomery.
220 
221  rsaz_1024_red2norm_avx2(result_norm, result);
222 
224 }
225 
226 #endif // RSAZ_ENABLED
_gevent_test_main.result
result
Definition: _gevent_test_main.py:96
OPENSSL_cleanse
#define OPENSSL_cleanse
Definition: boringssl_prefix_symbols.h:1864
uint16_t
unsigned short uint16_t
Definition: stdint-msvc2008.h:79
exponent
int exponent
Definition: abseil-cpp/absl/strings/internal/str_format/float_conversion.cc:1100
rsaz_1024_red2norm_avx2
#define rsaz_1024_red2norm_avx2
Definition: boringssl_prefix_symbols.h:3366
rsaz_1024_scatter5_avx2
#define rsaz_1024_scatter5_avx2
Definition: boringssl_prefix_symbols.h:3367
uint8_t
unsigned char uint8_t
Definition: stdint-msvc2008.h:78
absl::hash_internal::k0
static const uint64_t k0
Definition: abseil-cpp/absl/hash/internal/city.cc:53
memcpy
memcpy(mem, inblock.get(), min(CONTAINING_RECORD(inblock.get(), MEMBLOCK, data) ->size, size))
MOD_EXP_CTIME_STORAGE_LEN
#define MOD_EXP_CTIME_STORAGE_LEN
Definition: third_party/boringssl-with-bazel/src/crypto/fipsmodule/bn/internal.h:200
rsaz_1024_mul_avx2
#define rsaz_1024_mul_avx2
Definition: boringssl_prefix_symbols.h:3364
rsaz_1024_sqr_avx2
#define rsaz_1024_sqr_avx2
Definition: boringssl_prefix_symbols.h:3368
uintptr_t
_W64 unsigned int uintptr_t
Definition: stdint-msvc2008.h:119
rsaz_1024_norm2red_avx2
#define rsaz_1024_norm2red_avx2
Definition: boringssl_prefix_symbols.h:3365
index
int index
Definition: bloaty/third_party/protobuf/php/ext/google/protobuf/protobuf.h:1184
R2
#define R2(a, b, c, d, k, s, t)
Definition: md4.c:128
RSAZ_1024_mod_exp_avx2
#define RSAZ_1024_mod_exp_avx2
Definition: boringssl_prefix_symbols.h:2077
rsaz_exp.h
mem.h
regress.m
m
Definition: regress/regress.py:25
absl::status_internal::storage
static ABSL_INTERNAL_ATOMIC_HOOK_ATTRIBUTES absl::base_internal::AtomicHook< StatusPayloadPrinter > storage
Definition: abseil-cpp/absl/status/status_payload_printer.cc:26
MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH
#define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH
Definition: third_party/boringssl-with-bazel/src/crypto/fipsmodule/bn/internal.h:194
rsaz_1024_gather5_avx2
#define rsaz_1024_gather5_avx2
Definition: boringssl_prefix_symbols.h:3363
OPENSSL_STATIC_ASSERT
#define OPENSSL_STATIC_ASSERT(cond, msg)
Definition: type_check.h:75


grpc
Author(s):
autogenerated on Fri May 16 2025 03:00:07