sha1-altivec.c
Go to the documentation of this file.
1 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
2  * All rights reserved.
3  *
4  * This package is an SSL implementation written
5  * by Eric Young (eay@cryptsoft.com).
6  * The implementation was written so as to conform with Netscapes SSL.
7  *
8  * This library is free for commercial and non-commercial use as long as
9  * the following conditions are aheared to. The following conditions
10  * apply to all code found in this distribution, be it the RC4, RSA,
11  * lhash, DES, etc., code; not just the SSL code. The SSL documentation
12  * included with this distribution is covered by the same copyright terms
13  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
14  *
15  * Copyright remains Eric Young's, and as such any Copyright notices in
16  * the code are not to be removed.
17  * If this package is used in a product, Eric Young should be given attribution
18  * as the author of the parts of the library used.
19  * This can be in the form of a textual message at program startup or
20  * in documentation (online or textual) provided with the package.
21  *
22  * Redistribution and use in source and binary forms, with or without
23  * modification, are permitted provided that the following conditions
24  * are met:
25  * 1. Redistributions of source code must retain the copyright
26  * notice, this list of conditions and the following disclaimer.
27  * 2. Redistributions in binary form must reproduce the above copyright
28  * notice, this list of conditions and the following disclaimer in the
29  * documentation and/or other materials provided with the distribution.
30  * 3. All advertising materials mentioning features or use of this software
31  * must display the following acknowledgement:
32  * "This product includes cryptographic software written by
33  * Eric Young (eay@cryptsoft.com)"
34  * The word 'cryptographic' can be left out if the rouines from the library
35  * being used are not cryptographic related :-).
36  * 4. If you include any Windows specific code (or a derivative thereof) from
37  * the apps directory (application code) you must include an acknowledgement:
38  * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
39  *
40  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
41  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
44  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50  * SUCH DAMAGE.
51  *
52  * The licence and distribution terms for any publically available version or
53  * derivative of this code cannot be changed. i.e. this code cannot simply be
54  * copied and put under another distribution licence
55  * [including the GNU Public Licence.] */
56 
57 // Altivec-optimized SHA1 in C. This is tested on ppc64le only.
58 //
59 // References:
60 // https://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1
61 // http://arctic.org/~dean/crypto/sha1.html
62 //
63 // This code used the generic SHA-1 from OpenSSL as a basis and AltiVec
64 // optimisations were added on top.
65 
66 #include <openssl/sha.h>
67 
68 #if defined(OPENSSL_PPC64LE)
69 
70 #include <altivec.h>
71 
72 void sha1_block_data_order(uint32_t *state, const uint8_t *data, size_t num);
73 
74 static uint32_t rotate(uint32_t a, int n) { return (a << n) | (a >> (32 - n)); }
75 
76 typedef vector unsigned int vec_uint32_t;
77 typedef vector unsigned char vec_uint8_t;
78 
79 // Vector constants
80 static const vec_uint8_t k_swap_endianness = {3, 2, 1, 0, 7, 6, 5, 4,
81  11, 10, 9, 8, 15, 14, 13, 12};
82 
83 // Shift amounts for byte and bit shifts and rotations
84 static const vec_uint8_t k_4_bytes = {32, 32, 32, 32, 32, 32, 32, 32,
85  32, 32, 32, 32, 32, 32, 32, 32};
86 static const vec_uint8_t k_12_bytes = {96, 96, 96, 96, 96, 96, 96, 96,
87  96, 96, 96, 96, 96, 96, 96, 96};
88 
89 #define K_00_19 0x5a827999UL
90 #define K_20_39 0x6ed9eba1UL
91 #define K_40_59 0x8f1bbcdcUL
92 #define K_60_79 0xca62c1d6UL
93 
94 // Vector versions of the above.
95 static const vec_uint32_t K_00_19_x_4 = {K_00_19, K_00_19, K_00_19, K_00_19};
96 static const vec_uint32_t K_20_39_x_4 = {K_20_39, K_20_39, K_20_39, K_20_39};
97 static const vec_uint32_t K_40_59_x_4 = {K_40_59, K_40_59, K_40_59, K_40_59};
98 static const vec_uint32_t K_60_79_x_4 = {K_60_79, K_60_79, K_60_79, K_60_79};
99 
100 // vector message scheduling: compute message schedule for round i..i+3 where i
101 // is divisible by 4. We return the schedule w[i..i+3] as a vector. In
102 // addition, we also precompute sum w[i..+3] and an additive constant K. This
103 // is done to offload some computation of f() in the integer execution units.
104 //
105 // Byte shifting code below may not be correct for big-endian systems.
106 static vec_uint32_t sched_00_15(vec_uint32_t *pre_added, const void *data,
107  vec_uint32_t k) {
108  const vector unsigned char unaligned_data =
109  vec_vsx_ld(0, (const unsigned char*) data);
110  const vec_uint32_t v = (vec_uint32_t) unaligned_data;
111  const vec_uint32_t w = vec_perm(v, v, k_swap_endianness);
112  vec_st(w + k, 0, pre_added);
113  return w;
114 }
115 
116 // Compute w[i..i+3] using these steps for i in [16, 20, 24, 28]
117 //
118 // w'[i ] = (w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16]) <<< 1
119 // w'[i+1] = (w[i-2] ^ w[i-7] ^ w[i-13] ^ w[i-15]) <<< 1
120 // w'[i+2] = (w[i-1] ^ w[i-6] ^ w[i-12] ^ w[i-14]) <<< 1
121 // w'[i+3] = ( 0 ^ w[i-5] ^ w[i-11] ^ w[i-13]) <<< 1
122 //
123 // w[ i] = w'[ i]
124 // w[i+1] = w'[i+1]
125 // w[i+2] = w'[i+2]
126 // w[i+3] = w'[i+3] ^ (w'[i] <<< 1)
127 static vec_uint32_t sched_16_31(vec_uint32_t *pre_added, vec_uint32_t minus_4,
128  vec_uint32_t minus_8, vec_uint32_t minus_12,
129  vec_uint32_t minus_16, vec_uint32_t k) {
130  const vec_uint32_t minus_3 = vec_sro(minus_4, k_4_bytes);
131  const vec_uint32_t minus_14 = vec_sld((minus_12), (minus_16), 8);
132  const vec_uint32_t k_1_bit = vec_splat_u32(1);
133  const vec_uint32_t w_prime =
134  vec_rl(minus_3 ^ minus_8 ^ minus_14 ^ minus_16, k_1_bit);
135  const vec_uint32_t w =
136  w_prime ^ vec_rl(vec_slo(w_prime, k_12_bytes), k_1_bit);
137  vec_st(w + k, 0, pre_added);
138  return w;
139 }
140 
141 // Compute w[i..i+3] using this relation for i in [32, 36, 40 ... 76]
142 // w[i] = (w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32]), 2) <<< 2
143 static vec_uint32_t sched_32_79(vec_uint32_t *pre_added, vec_uint32_t minus_4,
144  vec_uint32_t minus_8, vec_uint32_t minus_16,
145  vec_uint32_t minus_28, vec_uint32_t minus_32,
146  vec_uint32_t k) {
147  const vec_uint32_t minus_6 = vec_sld(minus_4, minus_8, 8);
148  const vec_uint32_t k_2_bits = vec_splat_u32(2);
149  const vec_uint32_t w =
150  vec_rl(minus_6 ^ minus_16 ^ minus_28 ^ minus_32, k_2_bits);
151  vec_st(w + k, 0, pre_added);
152  return w;
153 }
154 
155 // As pointed out by Wei Dai <weidai@eskimo.com>, F() below can be simplified
156 // to the code in F_00_19. Wei attributes these optimisations to Peter
157 // Gutmann's SHS code, and he attributes it to Rich Schroeppel. #define
158 // F(x,y,z) (((x) & (y)) | ((~(x)) & (z))) I've just become aware of another
159 // tweak to be made, again from Wei Dai, in F_40_59, (x&a)|(y&a) -> (x|y)&a
160 #define F_00_19(b, c, d) ((((c) ^ (d)) & (b)) ^ (d))
161 #define F_20_39(b, c, d) ((b) ^ (c) ^ (d))
162 #define F_40_59(b, c, d) (((b) & (c)) | (((b) | (c)) & (d)))
163 #define F_60_79(b, c, d) F_20_39(b, c, d)
164 
165 // We pre-added the K constants during message scheduling.
166 #define BODY_00_19(i, a, b, c, d, e, f) \
167  do { \
168  (f) = w[i] + (e) + rotate((a), 5) + F_00_19((b), (c), (d)); \
169  (b) = rotate((b), 30); \
170  } while (0)
171 
172 #define BODY_20_39(i, a, b, c, d, e, f) \
173  do { \
174  (f) = w[i] + (e) + rotate((a), 5) + F_20_39((b), (c), (d)); \
175  (b) = rotate((b), 30); \
176  } while (0)
177 
178 #define BODY_40_59(i, a, b, c, d, e, f) \
179  do { \
180  (f) = w[i] + (e) + rotate((a), 5) + F_40_59((b), (c), (d)); \
181  (b) = rotate((b), 30); \
182  } while (0)
183 
184 #define BODY_60_79(i, a, b, c, d, e, f) \
185  do { \
186  (f) = w[i] + (e) + rotate((a), 5) + F_60_79((b), (c), (d)); \
187  (b) = rotate((b), 30); \
188  } while (0)
189 
190 void sha1_block_data_order(uint32_t *state, const uint8_t *data, size_t num) {
191  uint32_t A, B, C, D, E, T;
192 
193  A = state[0];
194  B = state[1];
195  C = state[2];
196  D = state[3];
197  E = state[4];
198 
199  for (;;) {
200  vec_uint32_t vw[20];
201  const uint32_t *w = (const uint32_t *)&vw;
202 
203  vec_uint32_t k = K_00_19_x_4;
204  const vec_uint32_t w0 = sched_00_15(vw + 0, data + 0, k);
205  BODY_00_19(0, A, B, C, D, E, T);
206  BODY_00_19(1, T, A, B, C, D, E);
207  BODY_00_19(2, E, T, A, B, C, D);
208  BODY_00_19(3, D, E, T, A, B, C);
209 
210  const vec_uint32_t w4 = sched_00_15(vw + 1, data + 16, k);
211  BODY_00_19(4, C, D, E, T, A, B);
212  BODY_00_19(5, B, C, D, E, T, A);
213  BODY_00_19(6, A, B, C, D, E, T);
214  BODY_00_19(7, T, A, B, C, D, E);
215 
216  const vec_uint32_t w8 = sched_00_15(vw + 2, data + 32, k);
217  BODY_00_19(8, E, T, A, B, C, D);
218  BODY_00_19(9, D, E, T, A, B, C);
219  BODY_00_19(10, C, D, E, T, A, B);
220  BODY_00_19(11, B, C, D, E, T, A);
221 
222  const vec_uint32_t w12 = sched_00_15(vw + 3, data + 48, k);
223  BODY_00_19(12, A, B, C, D, E, T);
224  BODY_00_19(13, T, A, B, C, D, E);
225  BODY_00_19(14, E, T, A, B, C, D);
226  BODY_00_19(15, D, E, T, A, B, C);
227 
228  const vec_uint32_t w16 = sched_16_31(vw + 4, w12, w8, w4, w0, k);
229  BODY_00_19(16, C, D, E, T, A, B);
230  BODY_00_19(17, B, C, D, E, T, A);
231  BODY_00_19(18, A, B, C, D, E, T);
232  BODY_00_19(19, T, A, B, C, D, E);
233 
234  k = K_20_39_x_4;
235  const vec_uint32_t w20 = sched_16_31(vw + 5, w16, w12, w8, w4, k);
236  BODY_20_39(20, E, T, A, B, C, D);
237  BODY_20_39(21, D, E, T, A, B, C);
238  BODY_20_39(22, C, D, E, T, A, B);
239  BODY_20_39(23, B, C, D, E, T, A);
240 
241  const vec_uint32_t w24 = sched_16_31(vw + 6, w20, w16, w12, w8, k);
242  BODY_20_39(24, A, B, C, D, E, T);
243  BODY_20_39(25, T, A, B, C, D, E);
244  BODY_20_39(26, E, T, A, B, C, D);
245  BODY_20_39(27, D, E, T, A, B, C);
246 
247  const vec_uint32_t w28 = sched_16_31(vw + 7, w24, w20, w16, w12, k);
248  BODY_20_39(28, C, D, E, T, A, B);
249  BODY_20_39(29, B, C, D, E, T, A);
250  BODY_20_39(30, A, B, C, D, E, T);
251  BODY_20_39(31, T, A, B, C, D, E);
252 
253  const vec_uint32_t w32 = sched_32_79(vw + 8, w28, w24, w16, w4, w0, k);
254  BODY_20_39(32, E, T, A, B, C, D);
255  BODY_20_39(33, D, E, T, A, B, C);
256  BODY_20_39(34, C, D, E, T, A, B);
257  BODY_20_39(35, B, C, D, E, T, A);
258 
259  const vec_uint32_t w36 = sched_32_79(vw + 9, w32, w28, w20, w8, w4, k);
260  BODY_20_39(36, A, B, C, D, E, T);
261  BODY_20_39(37, T, A, B, C, D, E);
262  BODY_20_39(38, E, T, A, B, C, D);
263  BODY_20_39(39, D, E, T, A, B, C);
264 
265  k = K_40_59_x_4;
266  const vec_uint32_t w40 = sched_32_79(vw + 10, w36, w32, w24, w12, w8, k);
267  BODY_40_59(40, C, D, E, T, A, B);
268  BODY_40_59(41, B, C, D, E, T, A);
269  BODY_40_59(42, A, B, C, D, E, T);
270  BODY_40_59(43, T, A, B, C, D, E);
271 
272  const vec_uint32_t w44 = sched_32_79(vw + 11, w40, w36, w28, w16, w12, k);
273  BODY_40_59(44, E, T, A, B, C, D);
274  BODY_40_59(45, D, E, T, A, B, C);
275  BODY_40_59(46, C, D, E, T, A, B);
276  BODY_40_59(47, B, C, D, E, T, A);
277 
278  const vec_uint32_t w48 = sched_32_79(vw + 12, w44, w40, w32, w20, w16, k);
279  BODY_40_59(48, A, B, C, D, E, T);
280  BODY_40_59(49, T, A, B, C, D, E);
281  BODY_40_59(50, E, T, A, B, C, D);
282  BODY_40_59(51, D, E, T, A, B, C);
283 
284  const vec_uint32_t w52 = sched_32_79(vw + 13, w48, w44, w36, w24, w20, k);
285  BODY_40_59(52, C, D, E, T, A, B);
286  BODY_40_59(53, B, C, D, E, T, A);
287  BODY_40_59(54, A, B, C, D, E, T);
288  BODY_40_59(55, T, A, B, C, D, E);
289 
290  const vec_uint32_t w56 = sched_32_79(vw + 14, w52, w48, w40, w28, w24, k);
291  BODY_40_59(56, E, T, A, B, C, D);
292  BODY_40_59(57, D, E, T, A, B, C);
293  BODY_40_59(58, C, D, E, T, A, B);
294  BODY_40_59(59, B, C, D, E, T, A);
295 
296  k = K_60_79_x_4;
297  const vec_uint32_t w60 = sched_32_79(vw + 15, w56, w52, w44, w32, w28, k);
298  BODY_60_79(60, A, B, C, D, E, T);
299  BODY_60_79(61, T, A, B, C, D, E);
300  BODY_60_79(62, E, T, A, B, C, D);
301  BODY_60_79(63, D, E, T, A, B, C);
302 
303  const vec_uint32_t w64 = sched_32_79(vw + 16, w60, w56, w48, w36, w32, k);
304  BODY_60_79(64, C, D, E, T, A, B);
305  BODY_60_79(65, B, C, D, E, T, A);
306  BODY_60_79(66, A, B, C, D, E, T);
307  BODY_60_79(67, T, A, B, C, D, E);
308 
309  const vec_uint32_t w68 = sched_32_79(vw + 17, w64, w60, w52, w40, w36, k);
310  BODY_60_79(68, E, T, A, B, C, D);
311  BODY_60_79(69, D, E, T, A, B, C);
312  BODY_60_79(70, C, D, E, T, A, B);
313  BODY_60_79(71, B, C, D, E, T, A);
314 
315  const vec_uint32_t w72 = sched_32_79(vw + 18, w68, w64, w56, w44, w40, k);
316  BODY_60_79(72, A, B, C, D, E, T);
317  BODY_60_79(73, T, A, B, C, D, E);
318  BODY_60_79(74, E, T, A, B, C, D);
319  BODY_60_79(75, D, E, T, A, B, C);
320 
321  // We don't use the last value
322  (void)sched_32_79(vw + 19, w72, w68, w60, w48, w44, k);
323  BODY_60_79(76, C, D, E, T, A, B);
324  BODY_60_79(77, B, C, D, E, T, A);
325  BODY_60_79(78, A, B, C, D, E, T);
326  BODY_60_79(79, T, A, B, C, D, E);
327 
328  const uint32_t mask = 0xffffffffUL;
329  state[0] = (state[0] + E) & mask;
330  state[1] = (state[1] + T) & mask;
331  state[2] = (state[2] + A) & mask;
332  state[3] = (state[3] + B) & mask;
333  state[4] = (state[4] + C) & mask;
334 
335  data += 64;
336  if (--num == 0) {
337  break;
338  }
339 
340  A = state[0];
341  B = state[1];
342  C = state[2];
343  D = state[3];
344  E = state[4];
345  }
346 }
347 
348 #endif // OPENSSL_PPC64LE
349 
350 #undef K_00_19
351 #undef K_20_39
352 #undef K_40_59
353 #undef K_60_79
354 #undef F_00_19
355 #undef F_20_39
356 #undef F_40_59
357 #undef F_60_79
358 #undef BODY_00_19
359 #undef BODY_20_39
360 #undef BODY_40_59
361 #undef BODY_60_79
K_40_59
#define K_40_59
Definition: sha1.c:122
K_60_79
#define K_60_79
Definition: sha1.c:123
absl::FormatConversionChar::E
@ E
C
#define C(x)
Definition: abseil-cpp/absl/hash/internal/city_test.cc:49
a
int a
Definition: abseil-cpp/absl/container/internal/hash_policy_traits_test.cc:88
uint8_t
unsigned char uint8_t
Definition: stdint-msvc2008.h:78
setup.k
k
Definition: third_party/bloaty/third_party/capstone/bindings/python/setup.py:42
T
#define T(upbtypeconst, upbtype, ctype, default_value)
uint32_t
unsigned int uint32_t
Definition: stdint-msvc2008.h:80
setup.v
v
Definition: third_party/bloaty/third_party/capstone/bindings/python/setup.py:42
sha.h
sha1_block_data_order
#define sha1_block_data_order
Definition: boringssl_prefix_symbols.h:3371
A
#define A(T)
data
char data[kBufferLength]
Definition: abseil-cpp/absl/strings/internal/str_format/float_conversion.cc:1006
n
int n
Definition: abseil-cpp/absl/container/btree_test.cc:1080
K_20_39
#define K_20_39
Definition: sha1.c:121
A
Definition: miscompile_with_no_unique_address_test.cc:23
absl::rotate
ForwardIterator rotate(ForwardIterator first, ForwardIterator middle, ForwardIterator last)
Definition: abseil-cpp/absl/algorithm/algorithm.h:148
xds_manager.num
num
Definition: xds_manager.py:56
state
Definition: bloaty/third_party/zlib/contrib/blast/blast.c:41
BODY_60_79
#define BODY_60_79(i, a, b, c, d, e, f, xa, xb, xc, xd)
Definition: sha1.c:170
BODY_40_59
#define BODY_40_59(i, a, b, c, d, e, f, xa, xb, xc, xd)
Definition: sha1.c:163
K_00_19
#define K_00_19
Definition: sha1.c:120


grpc
Author(s):
autogenerated on Fri May 16 2025 03:00:12