17 #include "../../internal.h"
20 #if !defined(BORINGSSL_HAS_UINT128) && defined(OPENSSL_SSE2)
21 #include <emmintrin.h>
34 #if defined(BORINGSSL_HAS_UINT128)
53 uint128_t c0 = (a0 * (uint128_t)b0) ^ (
a1 * (uint128_t)b3) ^
54 (
a2 * (uint128_t)
b2) ^ (a3 * (uint128_t)
b1);
55 uint128_t
c1 = (a0 * (uint128_t)
b1) ^ (
a1 * (uint128_t)b0) ^
56 (
a2 * (uint128_t)b3) ^ (a3 * (uint128_t)
b2);
57 uint128_t
c2 = (a0 * (uint128_t)
b2) ^ (
a1 * (uint128_t)
b1) ^
58 (
a2 * (uint128_t)b0) ^ (a3 * (uint128_t)b3);
59 uint128_t c3 = (a0 * (uint128_t)b3) ^ (
a1 * (uint128_t)
b2) ^
60 (
a2 * (uint128_t)
b1) ^ (a3 * (uint128_t)b0);
67 uint128_t extra = (a0_mask &
b) ^ ((uint128_t)(a1_mask &
b) << 1) ^
68 ((uint128_t)(a2_mask &
b) << 2) ^
69 ((uint128_t)(a3_mask &
b) << 3);
82 #elif defined(OPENSSL_SSE2)
87 __m128i aa = _mm_setr_epi32(
a, 0,
a, 0);
88 __m128i bb = _mm_setr_epi32(
b, 0,
b, 0);
91 _mm_and_si128(aa, _mm_setr_epi32(0x11111111, 0, 0x11111111, 0));
93 _mm_and_si128(aa, _mm_setr_epi32(0x44444444, 0, 0x44444444, 0));
95 _mm_and_si128(bb, _mm_setr_epi32(0x11111111, 0, 0x22222222, 0));
97 _mm_and_si128(bb, _mm_setr_epi32(0x44444444, 0, 0x88888888, 0));
100 _mm_xor_si128(_mm_mul_epu32(a0a0, b0b1), _mm_mul_epu32(a2a2, b2b3));
102 _mm_xor_si128(_mm_mul_epu32(a2a2, b0b1), _mm_mul_epu32(a0a0, b2b3));
105 _mm_and_si128(aa, _mm_setr_epi32(0x22222222, 0, 0x22222222, 0));
107 _mm_and_si128(aa, _mm_setr_epi32(0x88888888, 0, 0x88888888, 0));
109 _mm_and_si128(bb, _mm_setr_epi32(0x88888888, 0, 0x11111111, 0));
111 _mm_and_si128(bb, _mm_setr_epi32(0x22222222, 0, 0x44444444, 0));
113 c0c1 = _mm_xor_si128(c0c1, _mm_mul_epu32(a1a1, b3b0));
114 c0c1 = _mm_xor_si128(c0c1, _mm_mul_epu32(a3a3, b1b2));
115 c2c3 = _mm_xor_si128(c2c3, _mm_mul_epu32(a3a3, b3b0));
116 c2c3 = _mm_xor_si128(c2c3, _mm_mul_epu32(a1a1, b1b2));
118 c0c1 = _mm_and_si128(
119 c0c1, _mm_setr_epi32(0x11111111, 0x11111111, 0x22222222, 0x22222222));
120 c2c3 = _mm_and_si128(
121 c2c3, _mm_setr_epi32(0x44444444, 0x44444444, 0x88888888, 0x88888888));
123 c0c1 = _mm_xor_si128(c0c1, c2c3);
125 c0c1 = _mm_xor_si128(c0c1, _mm_srli_si128(c0c1, 8));
139 mid = _mm_xor_si128(mid, lo);
140 mid = _mm_xor_si128(mid, hi);
141 __m128i
ret = _mm_unpacklo_epi64(lo, hi);
142 mid = _mm_slli_si128(mid, 4);
143 mid = _mm_and_si128(mid, _mm_setr_epi32(0, 0xffffffff, 0xffffffff, 0));
144 ret = _mm_xor_si128(
ret, mid);
149 #else // !BORINGSSL_HAS_UINT128 && !OPENSSL_SSE2
173 return (c0 &
UINT64_C(0x1111111111111111)) |
176 (c3 &
UINT64_C(0x8888888888888888));
189 *out_lo = lo ^ (mid << 32);
190 *out_hi = hi ^ (mid >> 32);
193 #endif // BORINGSSL_HAS_UINT128
206 Htable[0].
lo = Xi[1];
207 Htable[0].
hi = Xi[0];
213 Htable[0].
hi |= Htable[0].
lo >> 63;
218 Htable[0].
lo ^= carry & 1;
219 Htable[0].
hi ^= carry &
UINT64_C(0xc200000000000000);
252 r1 ^= (r0 << 63) ^ (r0 << 62) ^ (r0 << 57);