1909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
2909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * All rights reserved.
3909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez *
4909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * This package is an SSL implementation written
5909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * by Eric Young (eay@cryptsoft.com).
6909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * The implementation was written so as to conform with Netscapes SSL.
7909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez *
8909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * This library is free for commercial and non-commercial use as long as
9909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * the following conditions are aheared to.  The following conditions
10909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * apply to all code found in this distribution, be it the RC4, RSA,
11909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
12909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * included with this distribution is covered by the same copyright terms
13909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * except that the holder is Tim Hudson (tjh@cryptsoft.com).
14909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez *
15909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * Copyright remains Eric Young's, and as such any Copyright notices in
16909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * the code are not to be removed.
17909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * If this package is used in a product, Eric Young should be given attribution
18909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * as the author of the parts of the library used.
19909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * This can be in the form of a textual message at program startup or
20909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * in documentation (online or textual) provided with the package.
21909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez *
22909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * Redistribution and use in source and binary forms, with or without
23909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * modification, are permitted provided that the following conditions
24909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * are met:
25909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * 1. Redistributions of source code must retain the copyright
26909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez *    notice, this list of conditions and the following disclaimer.
27909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * 2. Redistributions in binary form must reproduce the above copyright
28909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez *    notice, this list of conditions and the following disclaimer in the
29909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez *    documentation and/or other materials provided with the distribution.
30909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * 3. All advertising materials mentioning features or use of this software
31909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez *    must display the following acknowledgement:
32909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez *    "This product includes cryptographic software written by
33909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez *     Eric Young (eay@cryptsoft.com)"
34909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez *    The word 'cryptographic' can be left out if the rouines from the library
35909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez *    being used are not cryptographic related :-).
36909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * 4. If you include any Windows specific code (or a derivative thereof) from
37909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez *    the apps directory (application code) you must include an acknowledgement:
38909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
39909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez *
40909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
41909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
44909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * SUCH DAMAGE.
51909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez *
52909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * The licence and distribution terms for any publically available version or
53909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * derivative of this code cannot be changed.  i.e. this code cannot simply be
54909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * copied and put under another distribution licence
55909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * [including the GNU Public Licence.] */
56909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
57909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez/* Altivec-optimized SHA1 in C. This is tested on ppc64le only.
58909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez *
59909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * References:
60909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * https://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1
61909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * http://arctic.org/~dean/crypto/sha1.html
62909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez *
63909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * This code used the generic SHA-1 from OpenSSL as a basis and AltiVec
64909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * optimisations were added on top. */
65909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
66909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez#include <openssl/sha.h>
67909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
68909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez#if defined(OPENSSL_PPC64LE)
69909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
70909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez#include <altivec.h>
71909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
72909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdezvoid sha1_block_data_order(uint32_t *state, const uint8_t *data, size_t num);
73909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
74909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdezstatic uint32_t rotate(uint32_t a, int n) { return (a << n) | (a >> (32 - n)); }
75909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
76909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdeztypedef vector unsigned int vec_uint32_t;
77909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdeztypedef vector unsigned char vec_uint8_t;
78909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
79909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez/* Vector constants */
80909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdezstatic const vec_uint8_t k_swap_endianness = {3,  2,  1, 0, 7,  6,  5,  4,
81909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez                                              11, 10, 9, 8, 15, 14, 13, 12};
82909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
83909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez/* Shift amounts for byte and bit shifts and rotations */
84909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdezstatic const vec_uint8_t k_4_bytes = {32, 32, 32, 32, 32, 32, 32, 32,
85909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez                                      32, 32, 32, 32, 32, 32, 32, 32};
86909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdezstatic const vec_uint8_t k_12_bytes = {96, 96, 96, 96, 96, 96, 96, 96,
87909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez                                       96, 96, 96, 96, 96, 96, 96, 96};
88909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
89909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez#define K_00_19 0x5a827999UL
90909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez#define K_20_39 0x6ed9eba1UL
91909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez#define K_40_59 0x8f1bbcdcUL
92909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez#define K_60_79 0xca62c1d6UL
93909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
94909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez/* Vector versions of the above. */
95909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdezstatic const vec_uint32_t K_00_19_x_4 = {K_00_19, K_00_19, K_00_19, K_00_19};
96909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdezstatic const vec_uint32_t K_20_39_x_4 = {K_20_39, K_20_39, K_20_39, K_20_39};
97909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdezstatic const vec_uint32_t K_40_59_x_4 = {K_40_59, K_40_59, K_40_59, K_40_59};
98909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdezstatic const vec_uint32_t K_60_79_x_4 = {K_60_79, K_60_79, K_60_79, K_60_79};
99909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
100909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez/* vector message scheduling: compute message schedule for round i..i+3 where i
101909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * is divisible by 4. We return the schedule w[i..i+3] as a vector. In
102909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * addition, we also precompute sum w[i..+3] and an additive constant K. This
103909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * is done to offload some computation of f() in the integer execution units.
104909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez *
105909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * Byte shifting code below may not be correct for big-endian systems. */
106909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdezstatic vec_uint32_t sched_00_15(vec_uint32_t *pre_added, const void *data,
107909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez                                vec_uint32_t k) {
108909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez  const vec_uint32_t v = *((const vec_uint32_t *)data);
109909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez  const vec_uint32_t w = vec_perm(v, v, k_swap_endianness);
110909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez  vec_st(w + k, 0, pre_added);
111909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez  return w;
112909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez}
113909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
114909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez/* Compute w[i..i+3] using these steps for i in [16, 20, 24, 28]
115909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez *
116909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * w'[i  ]  = (w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16]) <<< 1
117909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * w'[i+1]  = (w[i-2] ^ w[i-7] ^ w[i-13] ^ w[i-15]) <<< 1
118909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * w'[i+2]  = (w[i-1] ^ w[i-6] ^ w[i-12] ^ w[i-14]) <<< 1
119909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * w'[i+3]  = (     0 ^ w[i-5] ^ w[i-11] ^ w[i-13]) <<< 1
120909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez *
121909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * w[  i] = w'[  i]
122909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * w[i+1] = w'[i+1]
123909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * w[i+2] = w'[i+2]
124909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * w[i+3] = w'[i+3] ^ (w'[i] <<< 1) */
125909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdezstatic vec_uint32_t sched_16_31(vec_uint32_t *pre_added, vec_uint32_t minus_4,
126909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez                                vec_uint32_t minus_8, vec_uint32_t minus_12,
127909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez                                vec_uint32_t minus_16, vec_uint32_t k) {
128909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez  const vec_uint32_t minus_3 = vec_sro(minus_4, k_4_bytes);
129909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez  const vec_uint32_t minus_14 = vec_sld((minus_12), (minus_16), 8);
130909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez  const vec_uint32_t k_1_bit = vec_splat_u32(1);
131909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez  const vec_uint32_t w_prime =
132909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez      vec_rl(minus_3 ^ minus_8 ^ minus_14 ^ minus_16, k_1_bit);
133909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez  const vec_uint32_t w =
134909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez      w_prime ^ vec_rl(vec_slo(w_prime, k_12_bytes), k_1_bit);
135909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez  vec_st(w + k, 0, pre_added);
136909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez  return w;
137909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez}
138909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
139909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez/* Compute w[i..i+3] using this relation for i in [32, 36, 40 ... 76]
140909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * w[i] = (w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32]), 2) <<< 2 */
141909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdezstatic vec_uint32_t sched_32_79(vec_uint32_t *pre_added, vec_uint32_t minus_4,
142909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez                                vec_uint32_t minus_8, vec_uint32_t minus_16,
143909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez                                vec_uint32_t minus_28, vec_uint32_t minus_32,
144909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez                                vec_uint32_t k) {
145909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez  const vec_uint32_t minus_6 = vec_sld(minus_4, minus_8, 8);
146909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez  const vec_uint32_t k_2_bits = vec_splat_u32(2);
147909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez  const vec_uint32_t w =
148909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez      vec_rl(minus_6 ^ minus_16 ^ minus_28 ^ minus_32, k_2_bits);
149909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez  vec_st(w + k, 0, pre_added);
150909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez  return w;
151909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez}
152909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
153909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez/* As pointed out by Wei Dai <weidai@eskimo.com>, F() below can be simplified
154909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * to the code in F_00_19. Wei attributes these optimisations to Peter
155909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * Gutmann's SHS code, and he attributes it to Rich Schroeppel. #define
156909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * F(x,y,z) (((x) & (y))  |  ((~(x)) & (z))) I've just become aware of another
157909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez * tweak to be made, again from Wei Dai, in F_40_59, (x&a)|(y&a) -> (x|y)&a */
158909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez#define F_00_19(b, c, d) ((((c) ^ (d)) & (b)) ^ (d))
159909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez#define F_20_39(b, c, d) ((b) ^ (c) ^ (d))
160909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez#define F_40_59(b, c, d) (((b) & (c)) | (((b) | (c)) & (d)))
161909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez#define F_60_79(b, c, d) F_20_39(b, c, d)
162909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
163909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez/* We pre-added the K constants during message scheduling. */
164909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez#define BODY_00_19(i, a, b, c, d, e, f)                         \
165909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez  do {                                                          \
166909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    (f) = w[i] + (e) + rotate((a), 5) + F_00_19((b), (c), (d)); \
167909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    (b) = rotate((b), 30);                                      \
168909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez  } while (0)
169909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
170909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez#define BODY_20_39(i, a, b, c, d, e, f)                         \
171909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez  do {                                                          \
172909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    (f) = w[i] + (e) + rotate((a), 5) + F_20_39((b), (c), (d)); \
173909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    (b) = rotate((b), 30);                                      \
174909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez  } while (0)
175909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
176909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez#define BODY_40_59(i, a, b, c, d, e, f)                         \
177909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez  do {                                                          \
178909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    (f) = w[i] + (e) + rotate((a), 5) + F_40_59((b), (c), (d)); \
179909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    (b) = rotate((b), 30);                                      \
180909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez  } while (0)
181909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
182909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez#define BODY_60_79(i, a, b, c, d, e, f)                         \
183909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez  do {                                                          \
184909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    (f) = w[i] + (e) + rotate((a), 5) + F_60_79((b), (c), (d)); \
185909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    (b) = rotate((b), 30);                                      \
186909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez  } while (0)
187909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
188909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdezvoid sha1_block_data_order(uint32_t *state, const uint8_t *data, size_t num) {
189909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez  uint32_t A, B, C, D, E, T;
190909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
191909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez  A = state[0];
192909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez  B = state[1];
193909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez  C = state[2];
194909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez  D = state[3];
195909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez  E = state[4];
196909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
197909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez  for (;;) {
198909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    vec_uint32_t vw[20];
199909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    const uint32_t *w = (const uint32_t *)&vw;
200909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
201909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    vec_uint32_t k = K_00_19_x_4;
202909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    const vec_uint32_t w0 = sched_00_15(vw + 0, data + 0, k);
203909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_00_19(0, A, B, C, D, E, T);
204909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_00_19(1, T, A, B, C, D, E);
205909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_00_19(2, E, T, A, B, C, D);
206909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_00_19(3, D, E, T, A, B, C);
207909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
208909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    const vec_uint32_t w4 = sched_00_15(vw + 1, data + 16, k);
209909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_00_19(4, C, D, E, T, A, B);
210909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_00_19(5, B, C, D, E, T, A);
211909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_00_19(6, A, B, C, D, E, T);
212909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_00_19(7, T, A, B, C, D, E);
213909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
214909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    const vec_uint32_t w8 = sched_00_15(vw + 2, data + 32, k);
215909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_00_19(8, E, T, A, B, C, D);
216909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_00_19(9, D, E, T, A, B, C);
217909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_00_19(10, C, D, E, T, A, B);
218909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_00_19(11, B, C, D, E, T, A);
219909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
220909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    const vec_uint32_t w12 = sched_00_15(vw + 3, data + 48, k);
221909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_00_19(12, A, B, C, D, E, T);
222909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_00_19(13, T, A, B, C, D, E);
223909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_00_19(14, E, T, A, B, C, D);
224909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_00_19(15, D, E, T, A, B, C);
225909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
226909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    const vec_uint32_t w16 = sched_16_31(vw + 4, w12, w8, w4, w0, k);
227909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_00_19(16, C, D, E, T, A, B);
228909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_00_19(17, B, C, D, E, T, A);
229909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_00_19(18, A, B, C, D, E, T);
230909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_00_19(19, T, A, B, C, D, E);
231909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
232909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    k = K_20_39_x_4;
233909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    const vec_uint32_t w20 = sched_16_31(vw + 5, w16, w12, w8, w4, k);
234909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_20_39(20, E, T, A, B, C, D);
235909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_20_39(21, D, E, T, A, B, C);
236909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_20_39(22, C, D, E, T, A, B);
237909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_20_39(23, B, C, D, E, T, A);
238909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
239909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    const vec_uint32_t w24 = sched_16_31(vw + 6, w20, w16, w12, w8, k);
240909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_20_39(24, A, B, C, D, E, T);
241909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_20_39(25, T, A, B, C, D, E);
242909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_20_39(26, E, T, A, B, C, D);
243909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_20_39(27, D, E, T, A, B, C);
244909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
245909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    const vec_uint32_t w28 = sched_16_31(vw + 7, w24, w20, w16, w12, k);
246909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_20_39(28, C, D, E, T, A, B);
247909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_20_39(29, B, C, D, E, T, A);
248909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_20_39(30, A, B, C, D, E, T);
249909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_20_39(31, T, A, B, C, D, E);
250909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
251909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    const vec_uint32_t w32 = sched_32_79(vw + 8, w28, w24, w16, w4, w0, k);
252909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_20_39(32, E, T, A, B, C, D);
253909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_20_39(33, D, E, T, A, B, C);
254909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_20_39(34, C, D, E, T, A, B);
255909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_20_39(35, B, C, D, E, T, A);
256909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
257909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    const vec_uint32_t w36 = sched_32_79(vw + 9, w32, w28, w20, w8, w4, k);
258909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_20_39(36, A, B, C, D, E, T);
259909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_20_39(37, T, A, B, C, D, E);
260909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_20_39(38, E, T, A, B, C, D);
261909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_20_39(39, D, E, T, A, B, C);
262909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
263909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    k = K_40_59_x_4;
264909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    const vec_uint32_t w40 = sched_32_79(vw + 10, w36, w32, w24, w12, w8, k);
265909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_40_59(40, C, D, E, T, A, B);
266909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_40_59(41, B, C, D, E, T, A);
267909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_40_59(42, A, B, C, D, E, T);
268909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_40_59(43, T, A, B, C, D, E);
269909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
270909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    const vec_uint32_t w44 = sched_32_79(vw + 11, w40, w36, w28, w16, w12, k);
271909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_40_59(44, E, T, A, B, C, D);
272909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_40_59(45, D, E, T, A, B, C);
273909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_40_59(46, C, D, E, T, A, B);
274909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_40_59(47, B, C, D, E, T, A);
275909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
276909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    const vec_uint32_t w48 = sched_32_79(vw + 12, w44, w40, w32, w20, w16, k);
277909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_40_59(48, A, B, C, D, E, T);
278909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_40_59(49, T, A, B, C, D, E);
279909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_40_59(50, E, T, A, B, C, D);
280909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_40_59(51, D, E, T, A, B, C);
281909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
282909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    const vec_uint32_t w52 = sched_32_79(vw + 13, w48, w44, w36, w24, w20, k);
283909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_40_59(52, C, D, E, T, A, B);
284909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_40_59(53, B, C, D, E, T, A);
285909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_40_59(54, A, B, C, D, E, T);
286909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_40_59(55, T, A, B, C, D, E);
287909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
288909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    const vec_uint32_t w56 = sched_32_79(vw + 14, w52, w48, w40, w28, w24, k);
289909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_40_59(56, E, T, A, B, C, D);
290909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_40_59(57, D, E, T, A, B, C);
291909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_40_59(58, C, D, E, T, A, B);
292909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_40_59(59, B, C, D, E, T, A);
293909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
294909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    k = K_60_79_x_4;
295909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    const vec_uint32_t w60 = sched_32_79(vw + 15, w56, w52, w44, w32, w28, k);
296909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_60_79(60, A, B, C, D, E, T);
297909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_60_79(61, T, A, B, C, D, E);
298909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_60_79(62, E, T, A, B, C, D);
299909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_60_79(63, D, E, T, A, B, C);
300909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
301909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    const vec_uint32_t w64 = sched_32_79(vw + 16, w60, w56, w48, w36, w32, k);
302909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_60_79(64, C, D, E, T, A, B);
303909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_60_79(65, B, C, D, E, T, A);
304909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_60_79(66, A, B, C, D, E, T);
305909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_60_79(67, T, A, B, C, D, E);
306909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
307909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    const vec_uint32_t w68 = sched_32_79(vw + 17, w64, w60, w52, w40, w36, k);
308909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_60_79(68, E, T, A, B, C, D);
309909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_60_79(69, D, E, T, A, B, C);
310909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_60_79(70, C, D, E, T, A, B);
311909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_60_79(71, B, C, D, E, T, A);
312909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
313909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    const vec_uint32_t w72 = sched_32_79(vw + 18, w68, w64, w56, w44, w40, k);
314909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_60_79(72, A, B, C, D, E, T);
315909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_60_79(73, T, A, B, C, D, E);
316909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_60_79(74, E, T, A, B, C, D);
317909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_60_79(75, D, E, T, A, B, C);
318909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
319909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    /* We don't use the last value */
320909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    (void)sched_32_79(vw + 19, w72, w68, w60, w48, w44, k);
321909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_60_79(76, C, D, E, T, A, B);
322909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_60_79(77, B, C, D, E, T, A);
323909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_60_79(78, A, B, C, D, E, T);
324909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    BODY_60_79(79, T, A, B, C, D, E);
325909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
326909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    const uint32_t mask = 0xffffffffUL;
327909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    state[0] = (state[0] + E) & mask;
328909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    state[1] = (state[1] + T) & mask;
329909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    state[2] = (state[2] + A) & mask;
330909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    state[3] = (state[3] + B) & mask;
331909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    state[4] = (state[4] + C) & mask;
332909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
333909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    data += 64;
334909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    if (--num == 0) {
335909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez      break;
336909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    }
337909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
338909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    A = state[0];
339909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    B = state[1];
340909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    C = state[2];
341909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    D = state[3];
342909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez    E = state[4];
343909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez  }
344909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez}
345909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez
346909b19f027eb0af12513f4d5589efdd67e34bd91Steven Valdez#endif  /* OPENSSL_PPC64LE */
347