poly1305_arm.c
Go to the documentation of this file.
1 /* Copyright (c) 2014, Google Inc.
2  *
3  * Permission to use, copy, modify, and/or distribute this software for any
4  * purpose with or without fee is hereby granted, provided that the above
5  * copyright notice and this permission notice appear in all copies.
6  *
7  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10  * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14 
15 // This implementation was taken from the public domain, neon2 version in
16 // SUPERCOP by D. J. Bernstein and Peter Schwabe.
17 
18 #include <openssl/poly1305.h>
19 
20 #include <string.h>
21 
22 #include "../internal.h"
23 #include "internal.h"
24 
25 
26 #if defined(OPENSSL_POLY1305_NEON)
27 
28 typedef struct {
29  uint32_t v[12]; // for alignment; only using 10
30 } fe1305x2;
31 
32 #define addmulmod openssl_poly1305_neon2_addmulmod
33 #define blocks openssl_poly1305_neon2_blocks
34 
35 extern void addmulmod(fe1305x2 *r, const fe1305x2 *x, const fe1305x2 *y,
36  const fe1305x2 *c);
37 
38 extern int blocks(fe1305x2 *h, const fe1305x2 *precomp, const uint8_t *in,
39  size_t inlen);
40 
41 static void freeze(fe1305x2 *r) {
42  int i;
43 
44  uint32_t x0 = r->v[0];
45  uint32_t x1 = r->v[2];
46  uint32_t x2 = r->v[4];
47  uint32_t x3 = r->v[6];
48  uint32_t x4 = r->v[8];
49  uint32_t y0;
50  uint32_t y1;
51  uint32_t y2;
52  uint32_t y3;
53  uint32_t y4;
54  uint32_t swap;
55 
56  for (i = 0; i < 3; ++i) {
57  x1 += x0 >> 26;
58  x0 &= 0x3ffffff;
59  x2 += x1 >> 26;
60  x1 &= 0x3ffffff;
61  x3 += x2 >> 26;
62  x2 &= 0x3ffffff;
63  x4 += x3 >> 26;
64  x3 &= 0x3ffffff;
65  x0 += 5 * (x4 >> 26);
66  x4 &= 0x3ffffff;
67  }
68 
69  y0 = x0 + 5;
70  y1 = x1 + (y0 >> 26);
71  y0 &= 0x3ffffff;
72  y2 = x2 + (y1 >> 26);
73  y1 &= 0x3ffffff;
74  y3 = x3 + (y2 >> 26);
75  y2 &= 0x3ffffff;
76  y4 = x4 + (y3 >> 26);
77  y3 &= 0x3ffffff;
78  swap = -(y4 >> 26);
79  y4 &= 0x3ffffff;
80 
81  y0 ^= x0;
82  y1 ^= x1;
83  y2 ^= x2;
84  y3 ^= x3;
85  y4 ^= x4;
86 
87  y0 &= swap;
88  y1 &= swap;
89  y2 &= swap;
90  y3 &= swap;
91  y4 &= swap;
92 
93  y0 ^= x0;
94  y1 ^= x1;
95  y2 ^= x2;
96  y3 ^= x3;
97  y4 ^= x4;
98 
99  r->v[0] = y0;
100  r->v[2] = y1;
101  r->v[4] = y2;
102  r->v[6] = y3;
103  r->v[8] = y4;
104 }
105 
106 static void store32(uint8_t out[4], uint32_t v) { OPENSSL_memcpy(out, &v, 4); }
107 
108 // load32 exists to avoid breaking strict aliasing rules in
109 // fe1305x2_frombytearray.
110 static uint32_t load32(const uint8_t t[4]) {
111  uint32_t tmp;
112  OPENSSL_memcpy(&tmp, t, sizeof(tmp));
113  return tmp;
114 }
115 
116 static void fe1305x2_tobytearray(uint8_t r[16], fe1305x2 *x) {
117  uint32_t x0 = x->v[0];
118  uint32_t x1 = x->v[2];
119  uint32_t x2 = x->v[4];
120  uint32_t x3 = x->v[6];
121  uint32_t x4 = x->v[8];
122 
123  x1 += x0 >> 26;
124  x0 &= 0x3ffffff;
125  x2 += x1 >> 26;
126  x1 &= 0x3ffffff;
127  x3 += x2 >> 26;
128  x2 &= 0x3ffffff;
129  x4 += x3 >> 26;
130  x3 &= 0x3ffffff;
131 
132  store32(r, x0 + (x1 << 26));
133  store32(r + 4, (x1 >> 6) + (x2 << 20));
134  store32(r + 8, (x2 >> 12) + (x3 << 14));
135  store32(r + 12, (x3 >> 18) + (x4 << 8));
136 }
137 
138 static void fe1305x2_frombytearray(fe1305x2 *r, const uint8_t *x, size_t xlen) {
139  size_t i;
140  uint8_t t[17];
141 
142  for (i = 0; (i < 16) && (i < xlen); i++) {
143  t[i] = x[i];
144  }
145  xlen -= i;
146  x += i;
147  t[i++] = 1;
148  for (; i < 17; i++) {
149  t[i] = 0;
150  }
151 
152  r->v[0] = 0x3ffffff & load32(t);
153  r->v[2] = 0x3ffffff & (load32(t + 3) >> 2);
154  r->v[4] = 0x3ffffff & (load32(t + 6) >> 4);
155  r->v[6] = 0x3ffffff & (load32(t + 9) >> 6);
156  r->v[8] = load32(t + 13);
157 
158  if (xlen) {
159  for (i = 0; (i < 16) && (i < xlen); i++) {
160  t[i] = x[i];
161  }
162  t[i++] = 1;
163  for (; i < 17; i++) {
164  t[i] = 0;
165  }
166 
167  r->v[1] = 0x3ffffff & load32(t);
168  r->v[3] = 0x3ffffff & (load32(t + 3) >> 2);
169  r->v[5] = 0x3ffffff & (load32(t + 6) >> 4);
170  r->v[7] = 0x3ffffff & (load32(t + 9) >> 6);
171  r->v[9] = load32(t + 13);
172  } else {
173  r->v[1] = r->v[3] = r->v[5] = r->v[7] = r->v[9] = 0;
174  }
175 }
176 
177 static const alignas(16) fe1305x2 zero;
178 
179 struct poly1305_state_st {
180  uint8_t data[sizeof(fe1305x2[5]) + 128];
181  uint8_t buf[32];
182  size_t buf_used;
183  uint8_t key[16];
184 };
185 
187  sizeof(struct poly1305_state_st) + 63 <= sizeof(poly1305_state),
188  "poly1305_state isn't large enough to hold aligned poly1305_state_st.");
189 
190 void CRYPTO_poly1305_init_neon(poly1305_state *state, const uint8_t key[32]) {
191  struct poly1305_state_st *st = (struct poly1305_state_st *)(state);
192  fe1305x2 *const r = (fe1305x2 *)(st->data + (15 & (-(int)st->data)));
193  fe1305x2 *const h = r + 1;
194  fe1305x2 *const c = h + 1;
195  fe1305x2 *const precomp = c + 1;
196 
197  r->v[1] = r->v[0] = 0x3ffffff & load32(key);
198  r->v[3] = r->v[2] = 0x3ffff03 & (load32(key + 3) >> 2);
199  r->v[5] = r->v[4] = 0x3ffc0ff & (load32(key + 6) >> 4);
200  r->v[7] = r->v[6] = 0x3f03fff & (load32(key + 9) >> 6);
201  r->v[9] = r->v[8] = 0x00fffff & (load32(key + 12) >> 8);
202 
203  for (size_t j = 0; j < 10; j++) {
204  h->v[j] = 0; // XXX: should fast-forward a bit
205  }
206 
207  addmulmod(precomp, r, r, &zero); // precompute r^2
208  addmulmod(precomp + 1, precomp, precomp, &zero); // precompute r^4
209 
210  OPENSSL_memcpy(st->key, key + 16, 16);
211  st->buf_used = 0;
212 }
213 
214 void CRYPTO_poly1305_update_neon(poly1305_state *state, const uint8_t *in,
215  size_t in_len) {
216  struct poly1305_state_st *st = (struct poly1305_state_st *)(state);
217  fe1305x2 *const r = (fe1305x2 *)(st->data + (15 & (-(int)st->data)));
218  fe1305x2 *const h = r + 1;
219  fe1305x2 *const c = h + 1;
220  fe1305x2 *const precomp = c + 1;
221 
222  if (st->buf_used) {
223  size_t todo = 32 - st->buf_used;
224  if (todo > in_len) {
225  todo = in_len;
226  }
227  for (size_t i = 0; i < todo; i++) {
228  st->buf[st->buf_used + i] = in[i];
229  }
230  st->buf_used += todo;
231  in_len -= todo;
232  in += todo;
233 
234  if (st->buf_used == sizeof(st->buf) && in_len) {
235  addmulmod(h, h, precomp, &zero);
236  fe1305x2_frombytearray(c, st->buf, sizeof(st->buf));
237  for (size_t i = 0; i < 10; i++) {
238  h->v[i] += c->v[i];
239  }
240  st->buf_used = 0;
241  }
242  }
243 
244  while (in_len > 32) {
245  size_t tlen = 1048576;
246  if (in_len < tlen) {
247  tlen = in_len;
248  }
249  tlen -= blocks(h, precomp, in, tlen);
250  in_len -= tlen;
251  in += tlen;
252  }
253 
254  if (in_len) {
255  for (size_t i = 0; i < in_len; i++) {
256  st->buf[i] = in[i];
257  }
258  st->buf_used = in_len;
259  }
260 }
261 
262 void CRYPTO_poly1305_finish_neon(poly1305_state *state, uint8_t mac[16]) {
263  struct poly1305_state_st *st = (struct poly1305_state_st *)(state);
264  fe1305x2 *const r = (fe1305x2 *)(st->data + (15 & (-(int)st->data)));
265  fe1305x2 *const h = r + 1;
266  fe1305x2 *const c = h + 1;
267  fe1305x2 *const precomp = c + 1;
268 
269  addmulmod(h, h, precomp, &zero);
270 
271  if (st->buf_used > 16) {
272  fe1305x2_frombytearray(c, st->buf, st->buf_used);
273  precomp->v[1] = r->v[1];
274  precomp->v[3] = r->v[3];
275  precomp->v[5] = r->v[5];
276  precomp->v[7] = r->v[7];
277  precomp->v[9] = r->v[9];
278  addmulmod(h, h, precomp, c);
279  } else if (st->buf_used > 0) {
280  fe1305x2_frombytearray(c, st->buf, st->buf_used);
281  r->v[1] = 1;
282  r->v[3] = 0;
283  r->v[5] = 0;
284  r->v[7] = 0;
285  r->v[9] = 0;
286  addmulmod(h, h, r, c);
287  }
288 
289  h->v[0] += h->v[1];
290  h->v[2] += h->v[3];
291  h->v[4] += h->v[5];
292  h->v[6] += h->v[7];
293  h->v[8] += h->v[9];
294  freeze(h);
295 
296  fe1305x2_frombytearray(c, st->key, 16);
297  c->v[8] ^= (1 << 24);
298 
299  h->v[0] += c->v[0];
300  h->v[2] += c->v[2];
301  h->v[4] += c->v[4];
302  h->v[6] += c->v[6];
303  h->v[8] += c->v[8];
304  fe1305x2_tobytearray(mac, h);
305 }
306 
307 #endif // OPENSSL_POLY1305_NEON
gen_build_yaml.out
dictionary out
Definition: src/benchmark/gen_build_yaml.py:24
absl::str_format_internal::LengthMod::j
@ j
y
const double y
Definition: bloaty/third_party/googletest/googlemock/test/gmock-matchers_test.cc:3611
internal.h
string.h
buf
voidpf void * buf
Definition: bloaty/third_party/zlib/contrib/minizip/ioapi.h:136
poly1305_state_st
Definition: poly1305.c:44
uint8_t
unsigned char uint8_t
Definition: stdint-msvc2008.h:78
uint32_t
unsigned int uint32_t
Definition: stdint-msvc2008.h:80
in
const char * in
Definition: third_party/abseil-cpp/absl/strings/internal/str_format/parser_test.cc:391
c
void c(T a)
Definition: miscompile_with_no_unique_address_test.cc:40
poly1305.h
swap
#define swap(a, b)
Definition: qsort.h:111
setup.v
v
Definition: third_party/bloaty/third_party/capstone/bindings/python/setup.py:42
OPENSSL_memcpy
static void * OPENSSL_memcpy(void *dst, const void *src, size_t n)
Definition: third_party/boringssl-with-bazel/src/crypto/internal.h:819
x
int x
Definition: bloaty/third_party/googletest/googlemock/test/gmock-matchers_test.cc:3610
data
char data[kBufferLength]
Definition: abseil-cpp/absl/strings/internal/str_format/float_conversion.cc:1006
poly1305_state_st::buf_used
size_t buf_used
Definition: poly1305.c:49
poly1305_state_st::key
uint8_t key[16]
Definition: poly1305.c:50
poly1305_state_st::buf
uint8_t buf[16]
Definition: poly1305.c:48
key
const char * key
Definition: hpack_parser_table.cc:164
absl::str_format_internal::LengthMod::t
@ t
fix_build_deps.r
r
Definition: fix_build_deps.py:491
state
Definition: bloaty/third_party/zlib/contrib/blast/blast.c:41
poly1305_state
uint8_t poly1305_state[512]
Definition: poly1305.h:25
autogen_x86imm.tmp
tmp
Definition: autogen_x86imm.py:12
absl::str_format_internal::LengthMod::h
@ h
mkowners.todo
todo
Definition: mkowners.py:209
OPENSSL_STATIC_ASSERT
#define OPENSSL_STATIC_ASSERT(cond, msg)
Definition: type_check.h:75
i
uint64_t i
Definition: abseil-cpp/absl/container/btree_benchmark.cc:230


grpc
Author(s):
autogenerated on Fri May 16 2025 02:59:44