1/* crypto/bn/bn_nist.c */
2/*
3 * Written by Nils Larsch for the OpenSSL project
4 */
5/* ====================================================================
6 * Copyright (c) 1998-2005 The OpenSSL Project.  All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in
17 *    the documentation and/or other materials provided with the
18 *    distribution.
19 *
20 * 3. All advertising materials mentioning features or use of this
21 *    software must display the following acknowledgment:
22 *    "This product includes software developed by the OpenSSL Project
23 *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
24 *
25 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
26 *    endorse or promote products derived from this software without
27 *    prior written permission. For written permission, please contact
28 *    openssl-core@openssl.org.
29 *
30 * 5. Products derived from this software may not be called "OpenSSL"
31 *    nor may "OpenSSL" appear in their names without prior written
32 *    permission of the OpenSSL Project.
33 *
34 * 6. Redistributions of any form whatsoever must retain the following
35 *    acknowledgment:
36 *    "This product includes software developed by the OpenSSL Project
37 *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
38 *
39 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
40 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
41 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
42 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
43 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
45 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
46 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
48 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
49 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
50 * OF THE POSSIBILITY OF SUCH DAMAGE.
51 * ====================================================================
52 *
53 * This product includes cryptographic software written by Eric Young
54 * (eay@cryptsoft.com).  This product includes software written by Tim
55 * Hudson (tjh@cryptsoft.com).
56 *
57 */
58
59#include "bn_lcl.h"
60#include "cryptlib.h"
61
62
63#define BN_NIST_192_TOP	(192+BN_BITS2-1)/BN_BITS2
64#define BN_NIST_224_TOP	(224+BN_BITS2-1)/BN_BITS2
65#define BN_NIST_256_TOP	(256+BN_BITS2-1)/BN_BITS2
66#define BN_NIST_384_TOP	(384+BN_BITS2-1)/BN_BITS2
67#define BN_NIST_521_TOP	(521+BN_BITS2-1)/BN_BITS2
68
69/* pre-computed tables are "carry-less" values of modulus*(i+1) */
70#if BN_BITS2 == 64
71static const BN_ULONG _nist_p_192[][BN_NIST_192_TOP] = {
72	{0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFEULL,0xFFFFFFFFFFFFFFFFULL},
73	{0xFFFFFFFFFFFFFFFEULL,0xFFFFFFFFFFFFFFFDULL,0xFFFFFFFFFFFFFFFFULL},
74	{0xFFFFFFFFFFFFFFFDULL,0xFFFFFFFFFFFFFFFCULL,0xFFFFFFFFFFFFFFFFULL}
75	};
76static const BN_ULONG _nist_p_192_sqr[] = {
77	0x0000000000000001ULL,0x0000000000000002ULL,0x0000000000000001ULL,
78	0xFFFFFFFFFFFFFFFEULL,0xFFFFFFFFFFFFFFFDULL,0xFFFFFFFFFFFFFFFFULL
79	};
80static const BN_ULONG _nist_p_224[][BN_NIST_224_TOP] = {
81	{0x0000000000000001ULL,0xFFFFFFFF00000000ULL,
82	 0xFFFFFFFFFFFFFFFFULL,0x00000000FFFFFFFFULL},
83	{0x0000000000000002ULL,0xFFFFFFFE00000000ULL,
84	 0xFFFFFFFFFFFFFFFFULL,0x00000001FFFFFFFFULL} /* this one is "carry-full" */
85	};
86static const BN_ULONG _nist_p_224_sqr[] = {
87	0x0000000000000001ULL,0xFFFFFFFE00000000ULL,
88	0xFFFFFFFFFFFFFFFFULL,0x0000000200000000ULL,
89	0x0000000000000000ULL,0xFFFFFFFFFFFFFFFEULL,
90	0xFFFFFFFFFFFFFFFFULL
91	};
92static const BN_ULONG _nist_p_256[][BN_NIST_256_TOP] = {
93	{0xFFFFFFFFFFFFFFFFULL,0x00000000FFFFFFFFULL,
94	 0x0000000000000000ULL,0xFFFFFFFF00000001ULL},
95	{0xFFFFFFFFFFFFFFFEULL,0x00000001FFFFFFFFULL,
96	 0x0000000000000000ULL,0xFFFFFFFE00000002ULL},
97	{0xFFFFFFFFFFFFFFFDULL,0x00000002FFFFFFFFULL,
98	 0x0000000000000000ULL,0xFFFFFFFD00000003ULL},
99	{0xFFFFFFFFFFFFFFFCULL,0x00000003FFFFFFFFULL,
100	 0x0000000000000000ULL,0xFFFFFFFC00000004ULL},
101	{0xFFFFFFFFFFFFFFFBULL,0x00000004FFFFFFFFULL,
102	 0x0000000000000000ULL,0xFFFFFFFB00000005ULL},
103	};
104static const BN_ULONG _nist_p_256_sqr[] = {
105	0x0000000000000001ULL,0xFFFFFFFE00000000ULL,
106	0xFFFFFFFFFFFFFFFFULL,0x00000001FFFFFFFEULL,
107	0x00000001FFFFFFFEULL,0x00000001FFFFFFFEULL,
108	0xFFFFFFFE00000001ULL,0xFFFFFFFE00000002ULL
109	};
110static const BN_ULONG _nist_p_384[][BN_NIST_384_TOP] = {
111	{0x00000000FFFFFFFFULL,0xFFFFFFFF00000000ULL,0xFFFFFFFFFFFFFFFEULL,
112	 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL},
113	{0x00000001FFFFFFFEULL,0xFFFFFFFE00000000ULL,0xFFFFFFFFFFFFFFFDULL,
114	 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL},
115	{0x00000002FFFFFFFDULL,0xFFFFFFFD00000000ULL,0xFFFFFFFFFFFFFFFCULL,
116	 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL},
117	{0x00000003FFFFFFFCULL,0xFFFFFFFC00000000ULL,0xFFFFFFFFFFFFFFFBULL,
118	 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL},
119	{0x00000004FFFFFFFBULL,0xFFFFFFFB00000000ULL,0xFFFFFFFFFFFFFFFAULL,
120	 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL},
121	};
122static const BN_ULONG _nist_p_384_sqr[] = {
123	0xFFFFFFFE00000001ULL,0x0000000200000000ULL,0xFFFFFFFE00000000ULL,
124	0x0000000200000000ULL,0x0000000000000001ULL,0x0000000000000000ULL,
125	0x00000001FFFFFFFEULL,0xFFFFFFFE00000000ULL,0xFFFFFFFFFFFFFFFDULL,
126	0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL
127	};
128static const BN_ULONG _nist_p_521[] =
129	{0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,
130	0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,
131	0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,
132	0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,
133	0x00000000000001FFULL};
134static const BN_ULONG _nist_p_521_sqr[] = {
135	0x0000000000000001ULL,0x0000000000000000ULL,0x0000000000000000ULL,
136	0x0000000000000000ULL,0x0000000000000000ULL,0x0000000000000000ULL,
137	0x0000000000000000ULL,0x0000000000000000ULL,0xFFFFFFFFFFFFFC00ULL,
138	0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,
139	0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,
140	0xFFFFFFFFFFFFFFFFULL,0x000000000003FFFFULL
141	};
142#elif BN_BITS2 == 32
143static const BN_ULONG _nist_p_192[][BN_NIST_192_TOP] = {
144	{0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFE,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
145	{0xFFFFFFFE,0xFFFFFFFF,0xFFFFFFFD,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
146	{0xFFFFFFFD,0xFFFFFFFF,0xFFFFFFFC,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF}
147	};
148static const BN_ULONG _nist_p_192_sqr[] = {
149	0x00000001,0x00000000,0x00000002,0x00000000,0x00000001,0x00000000,
150	0xFFFFFFFE,0xFFFFFFFF,0xFFFFFFFD,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF
151	};
152static const BN_ULONG _nist_p_224[][BN_NIST_224_TOP] = {
153	{0x00000001,0x00000000,0x00000000,0xFFFFFFFF,
154	 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
155	{0x00000002,0x00000000,0x00000000,0xFFFFFFFE,
156	 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF}
157	};
158static const BN_ULONG _nist_p_224_sqr[] = {
159	0x00000001,0x00000000,0x00000000,0xFFFFFFFE,
160	0xFFFFFFFF,0xFFFFFFFF,0x00000000,0x00000002,
161	0x00000000,0x00000000,0xFFFFFFFE,0xFFFFFFFF,
162	0xFFFFFFFF,0xFFFFFFFF
163	};
164static const BN_ULONG _nist_p_256[][BN_NIST_256_TOP] = {
165	{0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0x00000000,
166	 0x00000000,0x00000000,0x00000001,0xFFFFFFFF},
167	{0xFFFFFFFE,0xFFFFFFFF,0xFFFFFFFF,0x00000001,
168	 0x00000000,0x00000000,0x00000002,0xFFFFFFFE},
169	{0xFFFFFFFD,0xFFFFFFFF,0xFFFFFFFF,0x00000002,
170	 0x00000000,0x00000000,0x00000003,0xFFFFFFFD},
171	{0xFFFFFFFC,0xFFFFFFFF,0xFFFFFFFF,0x00000003,
172	 0x00000000,0x00000000,0x00000004,0xFFFFFFFC},
173	{0xFFFFFFFB,0xFFFFFFFF,0xFFFFFFFF,0x00000004,
174	 0x00000000,0x00000000,0x00000005,0xFFFFFFFB},
175	};
176static const BN_ULONG _nist_p_256_sqr[] = {
177	0x00000001,0x00000000,0x00000000,0xFFFFFFFE,
178	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFE,0x00000001,
179	0xFFFFFFFE,0x00000001,0xFFFFFFFE,0x00000001,
180	0x00000001,0xFFFFFFFE,0x00000002,0xFFFFFFFE
181	};
182static const BN_ULONG _nist_p_384[][BN_NIST_384_TOP] = {
183	{0xFFFFFFFF,0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFE,0xFFFFFFFF,
184	 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
185	{0xFFFFFFFE,0x00000001,0x00000000,0xFFFFFFFE,0xFFFFFFFD,0xFFFFFFFF,
186	 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
187	{0xFFFFFFFD,0x00000002,0x00000000,0xFFFFFFFD,0xFFFFFFFC,0xFFFFFFFF,
188	 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
189	{0xFFFFFFFC,0x00000003,0x00000000,0xFFFFFFFC,0xFFFFFFFB,0xFFFFFFFF,
190	 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
191	{0xFFFFFFFB,0x00000004,0x00000000,0xFFFFFFFB,0xFFFFFFFA,0xFFFFFFFF,
192	 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
193	};
194static const BN_ULONG _nist_p_384_sqr[] = {
195	0x00000001,0xFFFFFFFE,0x00000000,0x00000002,0x00000000,0xFFFFFFFE,
196	0x00000000,0x00000002,0x00000001,0x00000000,0x00000000,0x00000000,
197	0xFFFFFFFE,0x00000001,0x00000000,0xFFFFFFFE,0xFFFFFFFD,0xFFFFFFFF,
198	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF
199	};
200static const BN_ULONG _nist_p_521[] = {0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
201	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
202	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
203	0xFFFFFFFF,0x000001FF};
204static const BN_ULONG _nist_p_521_sqr[] = {
205	0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
206	0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
207	0x00000000,0x00000000,0x00000000,0x00000000,0xFFFFFC00,0xFFFFFFFF,
208	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
209	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
210	0xFFFFFFFF,0xFFFFFFFF,0x0003FFFF
211	};
212#else
213#error "unsupported BN_BITS2"
214#endif
215
216
217static const BIGNUM _bignum_nist_p_192 =
218	{
219	(BN_ULONG *)_nist_p_192[0],
220	BN_NIST_192_TOP,
221	BN_NIST_192_TOP,
222	0,
223	BN_FLG_STATIC_DATA
224	};
225
226static const BIGNUM _bignum_nist_p_224 =
227	{
228	(BN_ULONG *)_nist_p_224[0],
229	BN_NIST_224_TOP,
230	BN_NIST_224_TOP,
231	0,
232	BN_FLG_STATIC_DATA
233	};
234
235static const BIGNUM _bignum_nist_p_256 =
236	{
237	(BN_ULONG *)_nist_p_256[0],
238	BN_NIST_256_TOP,
239	BN_NIST_256_TOP,
240	0,
241	BN_FLG_STATIC_DATA
242	};
243
244static const BIGNUM _bignum_nist_p_384 =
245	{
246	(BN_ULONG *)_nist_p_384[0],
247	BN_NIST_384_TOP,
248	BN_NIST_384_TOP,
249	0,
250	BN_FLG_STATIC_DATA
251	};
252
253static const BIGNUM _bignum_nist_p_521 =
254	{
255	(BN_ULONG *)_nist_p_521,
256	BN_NIST_521_TOP,
257	BN_NIST_521_TOP,
258	0,
259	BN_FLG_STATIC_DATA
260	};
261
262
263const BIGNUM *BN_get0_nist_prime_192(void)
264	{
265	return &_bignum_nist_p_192;
266	}
267
268const BIGNUM *BN_get0_nist_prime_224(void)
269	{
270	return &_bignum_nist_p_224;
271	}
272
273const BIGNUM *BN_get0_nist_prime_256(void)
274	{
275	return &_bignum_nist_p_256;
276	}
277
278const BIGNUM *BN_get0_nist_prime_384(void)
279	{
280	return &_bignum_nist_p_384;
281	}
282
283const BIGNUM *BN_get0_nist_prime_521(void)
284	{
285	return &_bignum_nist_p_521;
286	}
287
288
289static void nist_cp_bn_0(BN_ULONG *buf, BN_ULONG *a, int top, int max)
290	{
291	int i;
292	BN_ULONG *_tmp1 = (buf), *_tmp2 = (a);
293
294#ifdef BN_DEBUG
295	OPENSSL_assert(top <= max);
296#endif
297	for (i = (top); i != 0; i--)
298		*_tmp1++ = *_tmp2++;
299	for (i = (max) - (top); i != 0; i--)
300		*_tmp1++ = (BN_ULONG) 0;
301	}
302
303static void nist_cp_bn(BN_ULONG *buf, BN_ULONG *a, int top)
304	{
305	int i;
306	BN_ULONG *_tmp1 = (buf), *_tmp2 = (a);
307	for (i = (top); i != 0; i--)
308		*_tmp1++ = *_tmp2++;
309	}
310
311#if BN_BITS2 == 64
312#define bn_cp_64(to, n, from, m)	(to)[n] = (m>=0)?((from)[m]):0;
313#define bn_64_set_0(to, n)		(to)[n] = (BN_ULONG)0;
314/*
315 * two following macros are implemented under assumption that they
316 * are called in a sequence with *ascending* n, i.e. as they are...
317 */
318#define bn_cp_32_naked(to, n, from, m)	(((n)&1)?(to[(n)/2]|=((m)&1)?(from[(m)/2]&BN_MASK2h):(from[(m)/2]<<32))\
319						:(to[(n)/2] =((m)&1)?(from[(m)/2]>>32):(from[(m)/2]&BN_MASK2l)))
320#define bn_32_set_0(to, n)		(((n)&1)?(to[(n)/2]&=BN_MASK2l):(to[(n)/2]=0));
321#define bn_cp_32(to,n,from,m)		((m)>=0)?bn_cp_32_naked(to,n,from,m):bn_32_set_0(to,n)
322# if defined(L_ENDIAN)
323#  if defined(__arch64__)
324#   define NIST_INT64 long
325#  else
326#   define NIST_INT64 long long
327#  endif
328# endif
329#else
330#define bn_cp_64(to, n, from, m) \
331	{ \
332	bn_cp_32(to, (n)*2, from, (m)*2); \
333	bn_cp_32(to, (n)*2+1, from, (m)*2+1); \
334	}
335#define bn_64_set_0(to, n) \
336	{ \
337	bn_32_set_0(to, (n)*2); \
338	bn_32_set_0(to, (n)*2+1); \
339	}
340#define bn_cp_32(to, n, from, m)	(to)[n] = (m>=0)?((from)[m]):0;
341#define bn_32_set_0(to, n)		(to)[n] = (BN_ULONG)0;
342# if defined(_WIN32) && !defined(__GNUC__)
343#  define NIST_INT64 __int64
344# elif defined(BN_LLONG)
345#  define NIST_INT64 long long
346# endif
347#endif /* BN_BITS2 != 64 */
348
349#define nist_set_192(to, from, a1, a2, a3) \
350	{ \
351	bn_cp_64(to, 0, from, (a3) - 3) \
352	bn_cp_64(to, 1, from, (a2) - 3) \
353	bn_cp_64(to, 2, from, (a1) - 3) \
354	}
355
356int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
357	BN_CTX *ctx)
358	{
359	int      top = a->top, i;
360	int      carry;
361	register BN_ULONG *r_d, *a_d = a->d;
362	union	{
363		BN_ULONG	bn[BN_NIST_192_TOP];
364		unsigned int	ui[BN_NIST_192_TOP*sizeof(BN_ULONG)/sizeof(unsigned int)];
365		} buf;
366	BN_ULONG c_d[BN_NIST_192_TOP],
367		*res;
368	PTR_SIZE_INT mask;
369	static const BIGNUM _bignum_nist_p_192_sqr = {
370		(BN_ULONG *)_nist_p_192_sqr,
371		sizeof(_nist_p_192_sqr)/sizeof(_nist_p_192_sqr[0]),
372		sizeof(_nist_p_192_sqr)/sizeof(_nist_p_192_sqr[0]),
373		0,BN_FLG_STATIC_DATA };
374
375	field = &_bignum_nist_p_192; /* just to make sure */
376
377 	if (BN_is_negative(a) || BN_ucmp(a,&_bignum_nist_p_192_sqr)>=0)
378		return BN_nnmod(r, a, field, ctx);
379
380	i = BN_ucmp(field, a);
381	if (i == 0)
382		{
383		BN_zero(r);
384		return 1;
385		}
386	else if (i > 0)
387		return (r == a) ? 1 : (BN_copy(r ,a) != NULL);
388
389	if (r != a)
390		{
391		if (!bn_wexpand(r, BN_NIST_192_TOP))
392			return 0;
393		r_d = r->d;
394		nist_cp_bn(r_d, a_d, BN_NIST_192_TOP);
395		}
396	else
397		r_d = a_d;
398
399	nist_cp_bn_0(buf.bn, a_d + BN_NIST_192_TOP, top - BN_NIST_192_TOP, BN_NIST_192_TOP);
400
401#if defined(NIST_INT64)
402	{
403	NIST_INT64		acc;	/* accumulator */
404	unsigned int		*rp=(unsigned int *)r_d;
405	const unsigned int	*bp=(const unsigned int *)buf.ui;
406
407	acc  = rp[0];	acc += bp[3*2-6];
408			acc += bp[5*2-6]; rp[0] = (unsigned int)acc; acc >>= 32;
409
410	acc += rp[1];	acc += bp[3*2-5];
411			acc += bp[5*2-5]; rp[1] = (unsigned int)acc; acc >>= 32;
412
413	acc += rp[2];	acc += bp[3*2-6];
414			acc += bp[4*2-6];
415			acc += bp[5*2-6]; rp[2] = (unsigned int)acc; acc >>= 32;
416
417	acc += rp[3];	acc += bp[3*2-5];
418			acc += bp[4*2-5];
419			acc += bp[5*2-5]; rp[3] = (unsigned int)acc; acc >>= 32;
420
421	acc += rp[4];	acc += bp[4*2-6];
422			acc += bp[5*2-6]; rp[4] = (unsigned int)acc; acc >>= 32;
423
424	acc += rp[5];	acc += bp[4*2-5];
425			acc += bp[5*2-5]; rp[5] = (unsigned int)acc;
426
427	carry = (int)(acc>>32);
428	}
429#else
430	{
431	BN_ULONG t_d[BN_NIST_192_TOP];
432
433	nist_set_192(t_d, buf.bn, 0, 3, 3);
434	carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
435	nist_set_192(t_d, buf.bn, 4, 4, 0);
436	carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
437	nist_set_192(t_d, buf.bn, 5, 5, 5)
438	carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
439	}
440#endif
441	if (carry > 0)
442		carry = (int)bn_sub_words(r_d,r_d,_nist_p_192[carry-1],BN_NIST_192_TOP);
443	else
444		carry = 1;
445
446	/*
447	 * we need 'if (carry==0 || result>=modulus) result-=modulus;'
448	 * as comparison implies subtraction, we can write
449	 * 'tmp=result-modulus; if (!carry || !borrow) result=tmp;'
450	 * this is what happens below, but without explicit if:-) a.
451	 */
452	mask  = 0-(PTR_SIZE_INT)bn_sub_words(c_d,r_d,_nist_p_192[0],BN_NIST_192_TOP);
453	mask &= 0-(PTR_SIZE_INT)carry;
454	res   = (BN_ULONG *)
455	 (((PTR_SIZE_INT)c_d&~mask) | ((PTR_SIZE_INT)r_d&mask));
456	nist_cp_bn(r_d, res, BN_NIST_192_TOP);
457	r->top = BN_NIST_192_TOP;
458	bn_correct_top(r);
459
460	return 1;
461	}
462
463typedef BN_ULONG (*bn_addsub_f)(BN_ULONG *,const BN_ULONG *,const BN_ULONG *,int);
464
465#define nist_set_224(to, from, a1, a2, a3, a4, a5, a6, a7) \
466	{ \
467	bn_cp_32(to, 0, from, (a7) - 7) \
468	bn_cp_32(to, 1, from, (a6) - 7) \
469	bn_cp_32(to, 2, from, (a5) - 7) \
470	bn_cp_32(to, 3, from, (a4) - 7) \
471	bn_cp_32(to, 4, from, (a3) - 7) \
472	bn_cp_32(to, 5, from, (a2) - 7) \
473	bn_cp_32(to, 6, from, (a1) - 7) \
474	}
475
476int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
477	BN_CTX *ctx)
478	{
479	int	top = a->top, i;
480	int	carry;
481	BN_ULONG *r_d, *a_d = a->d;
482	BN_ULONG buf[BN_NIST_224_TOP],
483		 c_d[BN_NIST_224_TOP],
484		*res;
485	PTR_SIZE_INT mask;
486	union { bn_addsub_f f; PTR_SIZE_INT p; } u;
487	static const BIGNUM _bignum_nist_p_224_sqr = {
488		(BN_ULONG *)_nist_p_224_sqr,
489		sizeof(_nist_p_224_sqr)/sizeof(_nist_p_224_sqr[0]),
490		sizeof(_nist_p_224_sqr)/sizeof(_nist_p_224_sqr[0]),
491		0,BN_FLG_STATIC_DATA };
492
493
494	field = &_bignum_nist_p_224; /* just to make sure */
495
496 	if (BN_is_negative(a) || BN_ucmp(a,&_bignum_nist_p_224_sqr)>=0)
497		return BN_nnmod(r, a, field, ctx);
498
499	i = BN_ucmp(field, a);
500	if (i == 0)
501		{
502		BN_zero(r);
503		return 1;
504		}
505	else if (i > 0)
506		return (r == a)? 1 : (BN_copy(r ,a) != NULL);
507
508	if (r != a)
509		{
510		if (!bn_wexpand(r, BN_NIST_224_TOP))
511			return 0;
512		r_d = r->d;
513		nist_cp_bn(r_d, a_d, BN_NIST_224_TOP);
514		}
515	else
516		r_d = a_d;
517
518#if BN_BITS2==64
519	/* copy upper 256 bits of 448 bit number ... */
520	nist_cp_bn_0(c_d, a_d + (BN_NIST_224_TOP-1), top - (BN_NIST_224_TOP-1), BN_NIST_224_TOP);
521	/* ... and right shift by 32 to obtain upper 224 bits */
522	nist_set_224(buf, c_d, 14, 13, 12, 11, 10, 9, 8);
523	/* truncate lower part to 224 bits too */
524	r_d[BN_NIST_224_TOP-1] &= BN_MASK2l;
525#else
526	nist_cp_bn_0(buf, a_d + BN_NIST_224_TOP, top - BN_NIST_224_TOP, BN_NIST_224_TOP);
527#endif
528
529#if defined(NIST_INT64) && BN_BITS2!=64
530	{
531	NIST_INT64		acc;	/* accumulator */
532	unsigned int		*rp=(unsigned int *)r_d;
533	const unsigned int	*bp=(const unsigned int *)buf;
534
535	acc  = rp[0];	acc -= bp[7-7];
536			acc -= bp[11-7]; rp[0] = (unsigned int)acc; acc >>= 32;
537
538	acc += rp[1];	acc -= bp[8-7];
539			acc -= bp[12-7]; rp[1] = (unsigned int)acc; acc >>= 32;
540
541	acc += rp[2];	acc -= bp[9-7];
542			acc -= bp[13-7]; rp[2] = (unsigned int)acc; acc >>= 32;
543
544	acc += rp[3];	acc += bp[7-7];
545			acc += bp[11-7];
546			acc -= bp[10-7]; rp[3] = (unsigned int)acc; acc>>= 32;
547
548	acc += rp[4];	acc += bp[8-7];
549			acc += bp[12-7];
550			acc -= bp[11-7]; rp[4] = (unsigned int)acc; acc >>= 32;
551
552	acc += rp[5];	acc += bp[9-7];
553			acc += bp[13-7];
554			acc -= bp[12-7]; rp[5] = (unsigned int)acc; acc >>= 32;
555
556	acc += rp[6];	acc += bp[10-7];
557			acc -= bp[13-7]; rp[6] = (unsigned int)acc;
558
559	carry = (int)(acc>>32);
560# if BN_BITS2==64
561	rp[7] = carry;
562# endif
563	}
564#else
565	{
566	BN_ULONG t_d[BN_NIST_224_TOP];
567
568	nist_set_224(t_d, buf, 10, 9, 8, 7, 0, 0, 0);
569	carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP);
570	nist_set_224(t_d, buf, 0, 13, 12, 11, 0, 0, 0);
571	carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP);
572	nist_set_224(t_d, buf, 13, 12, 11, 10, 9, 8, 7);
573	carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP);
574	nist_set_224(t_d, buf, 0, 0, 0, 0, 13, 12, 11);
575	carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP);
576
577#if BN_BITS2==64
578	carry = (int)(r_d[BN_NIST_224_TOP-1]>>32);
579#endif
580	}
581#endif
582	u.f = bn_sub_words;
583	if (carry > 0)
584		{
585		carry = (int)bn_sub_words(r_d,r_d,_nist_p_224[carry-1],BN_NIST_224_TOP);
586#if BN_BITS2==64
587		carry=(int)(~(r_d[BN_NIST_224_TOP-1]>>32))&1;
588#endif
589		}
590	else if (carry < 0)
591		{
592		/* it's a bit more comlicated logic in this case.
593		 * if bn_add_words yields no carry, then result
594		 * has to be adjusted by unconditionally *adding*
595		 * the modulus. but if it does, then result has
596		 * to be compared to the modulus and conditionally
597		 * adjusted by *subtracting* the latter. */
598		carry = (int)bn_add_words(r_d,r_d,_nist_p_224[-carry-1],BN_NIST_224_TOP);
599		mask = 0-(PTR_SIZE_INT)carry;
600		u.p = ((PTR_SIZE_INT)bn_sub_words&mask) |
601		 ((PTR_SIZE_INT)bn_add_words&~mask);
602		}
603	else
604		carry = 1;
605
606	/* otherwise it's effectively same as in BN_nist_mod_192... */
607	mask  = 0-(PTR_SIZE_INT)(*u.f)(c_d,r_d,_nist_p_224[0],BN_NIST_224_TOP);
608	mask &= 0-(PTR_SIZE_INT)carry;
609	res   = (BN_ULONG *)(((PTR_SIZE_INT)c_d&~mask) |
610	 ((PTR_SIZE_INT)r_d&mask));
611	nist_cp_bn(r_d, res, BN_NIST_224_TOP);
612	r->top = BN_NIST_224_TOP;
613	bn_correct_top(r);
614
615	return 1;
616	}
617
618#define nist_set_256(to, from, a1, a2, a3, a4, a5, a6, a7, a8) \
619	{ \
620	bn_cp_32(to, 0, from, (a8) - 8) \
621	bn_cp_32(to, 1, from, (a7) - 8) \
622	bn_cp_32(to, 2, from, (a6) - 8) \
623	bn_cp_32(to, 3, from, (a5) - 8) \
624	bn_cp_32(to, 4, from, (a4) - 8) \
625	bn_cp_32(to, 5, from, (a3) - 8) \
626	bn_cp_32(to, 6, from, (a2) - 8) \
627	bn_cp_32(to, 7, from, (a1) - 8) \
628	}
629
630int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
631	BN_CTX *ctx)
632	{
633	int	i, top = a->top;
634	int	carry = 0;
635	register BN_ULONG *a_d = a->d, *r_d;
636	union	{
637		BN_ULONG bn[BN_NIST_256_TOP];
638		unsigned int ui[BN_NIST_256_TOP*sizeof(BN_ULONG)/sizeof(unsigned int)];
639		} buf;
640	BN_ULONG c_d[BN_NIST_256_TOP],
641		*res;
642	PTR_SIZE_INT mask;
643	union { bn_addsub_f f; PTR_SIZE_INT p; } u;
644	static const BIGNUM _bignum_nist_p_256_sqr = {
645		(BN_ULONG *)_nist_p_256_sqr,
646		sizeof(_nist_p_256_sqr)/sizeof(_nist_p_256_sqr[0]),
647		sizeof(_nist_p_256_sqr)/sizeof(_nist_p_256_sqr[0]),
648		0,BN_FLG_STATIC_DATA };
649
650	field = &_bignum_nist_p_256; /* just to make sure */
651
652 	if (BN_is_negative(a) || BN_ucmp(a,&_bignum_nist_p_256_sqr)>=0)
653		return BN_nnmod(r, a, field, ctx);
654
655	i = BN_ucmp(field, a);
656	if (i == 0)
657		{
658		BN_zero(r);
659		return 1;
660		}
661	else if (i > 0)
662		return (r == a)? 1 : (BN_copy(r ,a) != NULL);
663
664	if (r != a)
665		{
666		if (!bn_wexpand(r, BN_NIST_256_TOP))
667			return 0;
668		r_d = r->d;
669		nist_cp_bn(r_d, a_d, BN_NIST_256_TOP);
670		}
671	else
672		r_d = a_d;
673
674	nist_cp_bn_0(buf.bn, a_d + BN_NIST_256_TOP, top - BN_NIST_256_TOP, BN_NIST_256_TOP);
675
676#if defined(NIST_INT64)
677	{
678	NIST_INT64		acc;	/* accumulator */
679	unsigned int		*rp=(unsigned int *)r_d;
680	const unsigned int	*bp=(const unsigned int *)buf.ui;
681
682	acc = rp[0];	acc += bp[8-8];
683			acc += bp[9-8];
684			acc -= bp[11-8];
685			acc -= bp[12-8];
686			acc -= bp[13-8];
687			acc -= bp[14-8]; rp[0] = (unsigned int)acc; acc >>= 32;
688
689	acc += rp[1];	acc += bp[9-8];
690			acc += bp[10-8];
691			acc -= bp[12-8];
692			acc -= bp[13-8];
693			acc -= bp[14-8];
694			acc -= bp[15-8]; rp[1] = (unsigned int)acc; acc >>= 32;
695
696	acc += rp[2];	acc += bp[10-8];
697			acc += bp[11-8];
698			acc -= bp[13-8];
699			acc -= bp[14-8];
700			acc -= bp[15-8]; rp[2] = (unsigned int)acc; acc >>= 32;
701
702	acc += rp[3];	acc += bp[11-8];
703			acc += bp[11-8];
704			acc += bp[12-8];
705			acc += bp[12-8];
706			acc += bp[13-8];
707			acc -= bp[15-8];
708			acc -= bp[8-8];
709			acc -= bp[9-8];  rp[3] = (unsigned int)acc; acc >>= 32;
710
711	acc += rp[4];	acc += bp[12-8];
712			acc += bp[12-8];
713			acc += bp[13-8];
714			acc += bp[13-8];
715			acc += bp[14-8];
716			acc -= bp[9-8];
717			acc -= bp[10-8]; rp[4] = (unsigned int)acc; acc >>= 32;
718
719	acc += rp[5];	acc += bp[13-8];
720			acc += bp[13-8];
721			acc += bp[14-8];
722			acc += bp[14-8];
723			acc += bp[15-8];
724			acc -= bp[10-8];
725			acc -= bp[11-8]; rp[5] = (unsigned int)acc; acc >>= 32;
726
727	acc += rp[6];	acc += bp[14-8];
728			acc += bp[14-8];
729			acc += bp[15-8];
730			acc += bp[15-8];
731			acc += bp[14-8];
732			acc += bp[13-8];
733			acc -= bp[8-8];
734			acc -= bp[9-8];  rp[6] = (unsigned int)acc; acc >>= 32;
735
736	acc += rp[7];	acc += bp[15-8];
737			acc += bp[15-8];
738			acc += bp[15-8];
739			acc += bp[8 -8];
740			acc -= bp[10-8];
741			acc -= bp[11-8];
742			acc -= bp[12-8];
743			acc -= bp[13-8]; rp[7] = (unsigned int)acc;
744
745	carry = (int)(acc>>32);
746	}
747#else
748	{
749	BN_ULONG t_d[BN_NIST_256_TOP];
750
751	/*S1*/
752	nist_set_256(t_d, buf.bn, 15, 14, 13, 12, 11, 0, 0, 0);
753	/*S2*/
754	nist_set_256(c_d, buf.bn, 0, 15, 14, 13, 12, 0, 0, 0);
755	carry = (int)bn_add_words(t_d, t_d, c_d, BN_NIST_256_TOP);
756	/* left shift */
757		{
758		register BN_ULONG *ap,t,c;
759		ap = t_d;
760		c=0;
761		for (i = BN_NIST_256_TOP; i != 0; --i)
762			{
763			t= *ap;
764			*(ap++)=((t<<1)|c)&BN_MASK2;
765			c=(t & BN_TBIT)?1:0;
766			}
767		carry <<= 1;
768		carry  |= c;
769		}
770	carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
771	/*S3*/
772	nist_set_256(t_d, buf.bn, 15, 14, 0, 0, 0, 10, 9, 8);
773	carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
774	/*S4*/
775	nist_set_256(t_d, buf.bn, 8, 13, 15, 14, 13, 11, 10, 9);
776	carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
777	/*D1*/
778	nist_set_256(t_d, buf.bn, 10, 8, 0, 0, 0, 13, 12, 11);
779	carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
780	/*D2*/
781	nist_set_256(t_d, buf.bn, 11, 9, 0, 0, 15, 14, 13, 12);
782	carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
783	/*D3*/
784	nist_set_256(t_d, buf.bn, 12, 0, 10, 9, 8, 15, 14, 13);
785	carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
786	/*D4*/
787	nist_set_256(t_d, buf.bn, 13, 0, 11, 10, 9, 0, 15, 14);
788	carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
789
790	}
791#endif
792	/* see BN_nist_mod_224 for explanation */
793	u.f = bn_sub_words;
794	if (carry > 0)
795		carry = (int)bn_sub_words(r_d,r_d,_nist_p_256[carry-1],BN_NIST_256_TOP);
796	else if (carry < 0)
797		{
798		carry = (int)bn_add_words(r_d,r_d,_nist_p_256[-carry-1],BN_NIST_256_TOP);
799		mask = 0-(PTR_SIZE_INT)carry;
800		u.p = ((PTR_SIZE_INT)bn_sub_words&mask) |
801		 ((PTR_SIZE_INT)bn_add_words&~mask);
802		}
803	else
804		carry = 1;
805
806	mask  = 0-(PTR_SIZE_INT)(*u.f)(c_d,r_d,_nist_p_256[0],BN_NIST_256_TOP);
807	mask &= 0-(PTR_SIZE_INT)carry;
808	res   = (BN_ULONG *)(((PTR_SIZE_INT)c_d&~mask) |
809	 ((PTR_SIZE_INT)r_d&mask));
810	nist_cp_bn(r_d, res, BN_NIST_256_TOP);
811	r->top = BN_NIST_256_TOP;
812	bn_correct_top(r);
813
814	return 1;
815	}
816
817#define nist_set_384(to,from,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12) \
818	{ \
819	bn_cp_32(to, 0, from,  (a12) - 12) \
820	bn_cp_32(to, 1, from,  (a11) - 12) \
821	bn_cp_32(to, 2, from,  (a10) - 12) \
822	bn_cp_32(to, 3, from,  (a9) - 12)  \
823	bn_cp_32(to, 4, from,  (a8) - 12)  \
824	bn_cp_32(to, 5, from,  (a7) - 12)  \
825	bn_cp_32(to, 6, from,  (a6) - 12)  \
826	bn_cp_32(to, 7, from,  (a5) - 12)  \
827	bn_cp_32(to, 8, from,  (a4) - 12)  \
828	bn_cp_32(to, 9, from,  (a3) - 12)  \
829	bn_cp_32(to, 10, from, (a2) - 12)  \
830	bn_cp_32(to, 11, from, (a1) - 12)  \
831	}
832
833int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
834	BN_CTX *ctx)
835	{
836	int	i, top = a->top;
837	int	carry = 0;
838	register BN_ULONG *r_d, *a_d = a->d;
839	union	{
840		BN_ULONG bn[BN_NIST_384_TOP];
841		unsigned int ui[BN_NIST_384_TOP*sizeof(BN_ULONG)/sizeof(unsigned int)];
842		} buf;
843	BN_ULONG c_d[BN_NIST_384_TOP],
844		*res;
845	PTR_SIZE_INT mask;
846	union { bn_addsub_f f; PTR_SIZE_INT p; } u;
847	static const BIGNUM _bignum_nist_p_384_sqr = {
848		(BN_ULONG *)_nist_p_384_sqr,
849		sizeof(_nist_p_384_sqr)/sizeof(_nist_p_384_sqr[0]),
850		sizeof(_nist_p_384_sqr)/sizeof(_nist_p_384_sqr[0]),
851		0,BN_FLG_STATIC_DATA };
852
853
854	field = &_bignum_nist_p_384; /* just to make sure */
855
856 	if (BN_is_negative(a) || BN_ucmp(a,&_bignum_nist_p_384_sqr)>=0)
857		return BN_nnmod(r, a, field, ctx);
858
859	i = BN_ucmp(field, a);
860	if (i == 0)
861		{
862		BN_zero(r);
863		return 1;
864		}
865	else if (i > 0)
866		return (r == a)? 1 : (BN_copy(r ,a) != NULL);
867
868	if (r != a)
869		{
870		if (!bn_wexpand(r, BN_NIST_384_TOP))
871			return 0;
872		r_d = r->d;
873		nist_cp_bn(r_d, a_d, BN_NIST_384_TOP);
874		}
875	else
876		r_d = a_d;
877
878	nist_cp_bn_0(buf.bn, a_d + BN_NIST_384_TOP, top - BN_NIST_384_TOP, BN_NIST_384_TOP);
879
880#if defined(NIST_INT64)
881	{
882	NIST_INT64		acc;	/* accumulator */
883	unsigned int		*rp=(unsigned int *)r_d;
884	const unsigned int	*bp=(const unsigned int *)buf.ui;
885
886	acc = rp[0];	acc += bp[12-12];
887			acc += bp[21-12];
888			acc += bp[20-12];
889			acc -= bp[23-12]; rp[0] = (unsigned int)acc; acc >>= 32;
890
891	acc += rp[1];	acc += bp[13-12];
892			acc += bp[22-12];
893			acc += bp[23-12];
894			acc -= bp[12-12];
895			acc -= bp[20-12]; rp[1] = (unsigned int)acc; acc >>= 32;
896
897	acc += rp[2];	acc += bp[14-12];
898			acc += bp[23-12];
899			acc -= bp[13-12];
900			acc -= bp[21-12]; rp[2] = (unsigned int)acc; acc >>= 32;
901
902	acc += rp[3];	acc += bp[15-12];
903			acc += bp[12-12];
904			acc += bp[20-12];
905			acc += bp[21-12];
906			acc -= bp[14-12];
907			acc -= bp[22-12];
908			acc -= bp[23-12]; rp[3] = (unsigned int)acc; acc >>= 32;
909
910	acc += rp[4];	acc += bp[21-12];
911			acc += bp[21-12];
912			acc += bp[16-12];
913			acc += bp[13-12];
914			acc += bp[12-12];
915			acc += bp[20-12];
916			acc += bp[22-12];
917			acc -= bp[15-12];
918			acc -= bp[23-12];
919			acc -= bp[23-12]; rp[4] = (unsigned int)acc; acc >>= 32;
920
921	acc += rp[5];	acc += bp[22-12];
922			acc += bp[22-12];
923			acc += bp[17-12];
924			acc += bp[14-12];
925			acc += bp[13-12];
926			acc += bp[21-12];
927			acc += bp[23-12];
928			acc -= bp[16-12]; rp[5] = (unsigned int)acc; acc >>= 32;
929
930	acc += rp[6];	acc += bp[23-12];
931			acc += bp[23-12];
932			acc += bp[18-12];
933			acc += bp[15-12];
934			acc += bp[14-12];
935			acc += bp[22-12];
936			acc -= bp[17-12]; rp[6] = (unsigned int)acc; acc >>= 32;
937
938	acc += rp[7];	acc += bp[19-12];
939			acc += bp[16-12];
940			acc += bp[15-12];
941			acc += bp[23-12];
942			acc -= bp[18-12]; rp[7] = (unsigned int)acc; acc >>= 32;
943
944	acc += rp[8];	acc += bp[20-12];
945			acc += bp[17-12];
946			acc += bp[16-12];
947			acc -= bp[19-12]; rp[8] = (unsigned int)acc; acc >>= 32;
948
949	acc += rp[9];	acc += bp[21-12];
950			acc += bp[18-12];
951			acc += bp[17-12];
952			acc -= bp[20-12]; rp[9] = (unsigned int)acc; acc >>= 32;
953
954	acc += rp[10];	acc += bp[22-12];
955			acc += bp[19-12];
956			acc += bp[18-12];
957			acc -= bp[21-12]; rp[10] = (unsigned int)acc; acc >>= 32;
958
959	acc += rp[11];	acc += bp[23-12];
960			acc += bp[20-12];
961			acc += bp[19-12];
962			acc -= bp[22-12]; rp[11] = (unsigned int)acc;
963
964	carry = (int)(acc>>32);
965	}
966#else
967	{
968	BN_ULONG t_d[BN_NIST_384_TOP];
969
970	/*S1*/
971	nist_set_256(t_d, buf.bn, 0, 0, 0, 0, 0, 23-4, 22-4, 21-4);
972		/* left shift */
973		{
974		register BN_ULONG *ap,t,c;
975		ap = t_d;
976		c=0;
977		for (i = 3; i != 0; --i)
978			{
979			t= *ap;
980			*(ap++)=((t<<1)|c)&BN_MASK2;
981			c=(t & BN_TBIT)?1:0;
982			}
983		*ap=c;
984		}
985	carry = (int)bn_add_words(r_d+(128/BN_BITS2), r_d+(128/BN_BITS2),
986		t_d, BN_NIST_256_TOP);
987	/*S2 */
988	carry += (int)bn_add_words(r_d, r_d, buf.bn, BN_NIST_384_TOP);
989	/*S3*/
990	nist_set_384(t_d,buf.bn,20,19,18,17,16,15,14,13,12,23,22,21);
991	carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
992	/*S4*/
993	nist_set_384(t_d,buf.bn,19,18,17,16,15,14,13,12,20,0,23,0);
994	carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
995	/*S5*/
996	nist_set_384(t_d, buf.bn,0,0,0,0,23,22,21,20,0,0,0,0);
997	carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
998	/*S6*/
999	nist_set_384(t_d,buf.bn,0,0,0,0,0,0,23,22,21,0,0,20);
1000	carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1001	/*D1*/
1002	nist_set_384(t_d,buf.bn,22,21,20,19,18,17,16,15,14,13,12,23);
1003	carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1004	/*D2*/
1005	nist_set_384(t_d,buf.bn,0,0,0,0,0,0,0,23,22,21,20,0);
1006	carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1007	/*D3*/
1008	nist_set_384(t_d,buf.bn,0,0,0,0,0,0,0,23,23,0,0,0);
1009	carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1010
1011	}
1012#endif
1013	/* see BN_nist_mod_224 for explanation */
1014	u.f = bn_sub_words;
1015	if (carry > 0)
1016		carry = (int)bn_sub_words(r_d,r_d,_nist_p_384[carry-1],BN_NIST_384_TOP);
1017	else if (carry < 0)
1018		{
1019		carry = (int)bn_add_words(r_d,r_d,_nist_p_384[-carry-1],BN_NIST_384_TOP);
1020		mask = 0-(PTR_SIZE_INT)carry;
1021		u.p = ((PTR_SIZE_INT)bn_sub_words&mask) |
1022		 ((PTR_SIZE_INT)bn_add_words&~mask);
1023		}
1024	else
1025		carry = 1;
1026
1027	mask  = 0-(PTR_SIZE_INT)(*u.f)(c_d,r_d,_nist_p_384[0],BN_NIST_384_TOP);
1028	mask &= 0-(PTR_SIZE_INT)carry;
1029	res   = (BN_ULONG *)(((PTR_SIZE_INT)c_d&~mask) |
1030	 ((PTR_SIZE_INT)r_d&mask));
1031	nist_cp_bn(r_d, res, BN_NIST_384_TOP);
1032	r->top = BN_NIST_384_TOP;
1033	bn_correct_top(r);
1034
1035	return 1;
1036	}
1037
1038#define BN_NIST_521_RSHIFT	(521%BN_BITS2)
1039#define BN_NIST_521_LSHIFT	(BN_BITS2-BN_NIST_521_RSHIFT)
1040#define BN_NIST_521_TOP_MASK	((BN_ULONG)BN_MASK2>>BN_NIST_521_LSHIFT)
1041
1042int BN_nist_mod_521(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
1043	BN_CTX *ctx)
1044	{
1045	int	top = a->top, i;
1046	BN_ULONG *r_d, *a_d = a->d,
1047		 t_d[BN_NIST_521_TOP],
1048		 val,tmp,*res;
1049	PTR_SIZE_INT mask;
1050	static const BIGNUM _bignum_nist_p_521_sqr = {
1051		(BN_ULONG *)_nist_p_521_sqr,
1052		sizeof(_nist_p_521_sqr)/sizeof(_nist_p_521_sqr[0]),
1053		sizeof(_nist_p_521_sqr)/sizeof(_nist_p_521_sqr[0]),
1054		0,BN_FLG_STATIC_DATA };
1055
1056	field = &_bignum_nist_p_521; /* just to make sure */
1057
1058 	if (BN_is_negative(a) || BN_ucmp(a,&_bignum_nist_p_521_sqr)>=0)
1059		return BN_nnmod(r, a, field, ctx);
1060
1061	i = BN_ucmp(field, a);
1062	if (i == 0)
1063		{
1064		BN_zero(r);
1065		return 1;
1066		}
1067	else if (i > 0)
1068		return (r == a)? 1 : (BN_copy(r ,a) != NULL);
1069
1070	if (r != a)
1071		{
1072		if (!bn_wexpand(r,BN_NIST_521_TOP))
1073			return 0;
1074		r_d = r->d;
1075		nist_cp_bn(r_d,a_d, BN_NIST_521_TOP);
1076		}
1077	else
1078		r_d = a_d;
1079
1080	/* upper 521 bits, copy ... */
1081	nist_cp_bn_0(t_d,a_d + (BN_NIST_521_TOP-1), top - (BN_NIST_521_TOP-1),BN_NIST_521_TOP);
1082	/* ... and right shift */
1083	for (val=t_d[0],i=0; i<BN_NIST_521_TOP-1; i++)
1084		{
1085		tmp = val>>BN_NIST_521_RSHIFT;
1086		val = t_d[i+1];
1087		t_d[i] = (tmp | val<<BN_NIST_521_LSHIFT) & BN_MASK2;
1088		}
1089	t_d[i] = val>>BN_NIST_521_RSHIFT;
1090	/* lower 521 bits */
1091	r_d[i] &= BN_NIST_521_TOP_MASK;
1092
1093	bn_add_words(r_d,r_d,t_d,BN_NIST_521_TOP);
1094	mask = 0-(PTR_SIZE_INT)bn_sub_words(t_d,r_d,_nist_p_521,BN_NIST_521_TOP);
1095	res  = (BN_ULONG *)(((PTR_SIZE_INT)t_d&~mask) |
1096	 ((PTR_SIZE_INT)r_d&mask));
1097	nist_cp_bn(r_d,res,BN_NIST_521_TOP);
1098	r->top = BN_NIST_521_TOP;
1099	bn_correct_top(r);
1100
1101	return 1;
1102	}
1103