1/* crypto/bn/bn_nist.c */
2/*
3 * Written by Nils Larsch for the OpenSSL project
4 */
5/* ====================================================================
6 * Copyright (c) 1998-2005 The OpenSSL Project.  All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in
17 *    the documentation and/or other materials provided with the
18 *    distribution.
19 *
20 * 3. All advertising materials mentioning features or use of this
21 *    software must display the following acknowledgment:
22 *    "This product includes software developed by the OpenSSL Project
23 *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
24 *
25 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
26 *    endorse or promote products derived from this software without
27 *    prior written permission. For written permission, please contact
28 *    openssl-core@openssl.org.
29 *
30 * 5. Products derived from this software may not be called "OpenSSL"
31 *    nor may "OpenSSL" appear in their names without prior written
32 *    permission of the OpenSSL Project.
33 *
34 * 6. Redistributions of any form whatsoever must retain the following
35 *    acknowledgment:
36 *    "This product includes software developed by the OpenSSL Project
37 *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
38 *
39 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
40 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
41 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
42 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
43 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
45 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
46 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
48 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
49 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
50 * OF THE POSSIBILITY OF SUCH DAMAGE.
51 * ====================================================================
52 *
53 * This product includes cryptographic software written by Eric Young
54 * (eay@cryptsoft.com).  This product includes software written by Tim
55 * Hudson (tjh@cryptsoft.com).
56 *
57 */
58
59#include "bn_lcl.h"
60#include "cryptlib.h"
61
62
63#define BN_NIST_192_TOP	(192+BN_BITS2-1)/BN_BITS2
64#define BN_NIST_224_TOP	(224+BN_BITS2-1)/BN_BITS2
65#define BN_NIST_256_TOP	(256+BN_BITS2-1)/BN_BITS2
66#define BN_NIST_384_TOP	(384+BN_BITS2-1)/BN_BITS2
67#define BN_NIST_521_TOP	(521+BN_BITS2-1)/BN_BITS2
68
69/* pre-computed tables are "carry-less" values of modulus*(i+1) */
70#if BN_BITS2 == 64
71static const BN_ULONG _nist_p_192[][BN_NIST_192_TOP] = {
72	{0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFEULL,0xFFFFFFFFFFFFFFFFULL},
73	{0xFFFFFFFFFFFFFFFEULL,0xFFFFFFFFFFFFFFFDULL,0xFFFFFFFFFFFFFFFFULL},
74	{0xFFFFFFFFFFFFFFFDULL,0xFFFFFFFFFFFFFFFCULL,0xFFFFFFFFFFFFFFFFULL}
75	};
76static const BN_ULONG _nist_p_192_sqr[] = {
77	0x0000000000000001ULL,0x0000000000000002ULL,0x0000000000000001ULL,
78	0xFFFFFFFFFFFFFFFEULL,0xFFFFFFFFFFFFFFFDULL,0xFFFFFFFFFFFFFFFFULL
79	};
80static const BN_ULONG _nist_p_224[][BN_NIST_224_TOP] = {
81	{0x0000000000000001ULL,0xFFFFFFFF00000000ULL,
82	 0xFFFFFFFFFFFFFFFFULL,0x00000000FFFFFFFFULL},
83	{0x0000000000000002ULL,0xFFFFFFFE00000000ULL,
84	 0xFFFFFFFFFFFFFFFFULL,0x00000001FFFFFFFFULL} /* this one is "carry-full" */
85	};
86static const BN_ULONG _nist_p_224_sqr[] = {
87	0x0000000000000001ULL,0xFFFFFFFE00000000ULL,
88	0xFFFFFFFFFFFFFFFFULL,0x0000000200000000ULL,
89	0x0000000000000000ULL,0xFFFFFFFFFFFFFFFEULL,
90	0xFFFFFFFFFFFFFFFFULL
91	};
92static const BN_ULONG _nist_p_256[][BN_NIST_256_TOP] = {
93	{0xFFFFFFFFFFFFFFFFULL,0x00000000FFFFFFFFULL,
94	 0x0000000000000000ULL,0xFFFFFFFF00000001ULL},
95	{0xFFFFFFFFFFFFFFFEULL,0x00000001FFFFFFFFULL,
96	 0x0000000000000000ULL,0xFFFFFFFE00000002ULL},
97	{0xFFFFFFFFFFFFFFFDULL,0x00000002FFFFFFFFULL,
98	 0x0000000000000000ULL,0xFFFFFFFD00000003ULL},
99	{0xFFFFFFFFFFFFFFFCULL,0x00000003FFFFFFFFULL,
100	 0x0000000000000000ULL,0xFFFFFFFC00000004ULL},
101	{0xFFFFFFFFFFFFFFFBULL,0x00000004FFFFFFFFULL,
102	 0x0000000000000000ULL,0xFFFFFFFB00000005ULL},
103	};
104static const BN_ULONG _nist_p_256_sqr[] = {
105	0x0000000000000001ULL,0xFFFFFFFE00000000ULL,
106	0xFFFFFFFFFFFFFFFFULL,0x00000001FFFFFFFEULL,
107	0x00000001FFFFFFFEULL,0x00000001FFFFFFFEULL,
108	0xFFFFFFFE00000001ULL,0xFFFFFFFE00000002ULL
109	};
110static const BN_ULONG _nist_p_384[][BN_NIST_384_TOP] = {
111	{0x00000000FFFFFFFFULL,0xFFFFFFFF00000000ULL,0xFFFFFFFFFFFFFFFEULL,
112	 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL},
113	{0x00000001FFFFFFFEULL,0xFFFFFFFE00000000ULL,0xFFFFFFFFFFFFFFFDULL,
114	 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL},
115	{0x00000002FFFFFFFDULL,0xFFFFFFFD00000000ULL,0xFFFFFFFFFFFFFFFCULL,
116	 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL},
117	{0x00000003FFFFFFFCULL,0xFFFFFFFC00000000ULL,0xFFFFFFFFFFFFFFFBULL,
118	 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL},
119	{0x00000004FFFFFFFBULL,0xFFFFFFFB00000000ULL,0xFFFFFFFFFFFFFFFAULL,
120	 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL},
121	};
122static const BN_ULONG _nist_p_384_sqr[] = {
123	0xFFFFFFFE00000001ULL,0x0000000200000000ULL,0xFFFFFFFE00000000ULL,
124	0x0000000200000000ULL,0x0000000000000001ULL,0x0000000000000000ULL,
125	0x00000001FFFFFFFEULL,0xFFFFFFFE00000000ULL,0xFFFFFFFFFFFFFFFDULL,
126	0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL
127	};
128static const BN_ULONG _nist_p_521[] =
129	{0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,
130	0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,
131	0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,
132	0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,
133	0x00000000000001FFULL};
134static const BN_ULONG _nist_p_521_sqr[] = {
135	0x0000000000000001ULL,0x0000000000000000ULL,0x0000000000000000ULL,
136	0x0000000000000000ULL,0x0000000000000000ULL,0x0000000000000000ULL,
137	0x0000000000000000ULL,0x0000000000000000ULL,0xFFFFFFFFFFFFFC00ULL,
138	0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,
139	0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,
140	0xFFFFFFFFFFFFFFFFULL,0x000000000003FFFFULL
141	};
142#elif BN_BITS2 == 32
143static const BN_ULONG _nist_p_192[][BN_NIST_192_TOP] = {
144	{0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFE,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
145	{0xFFFFFFFE,0xFFFFFFFF,0xFFFFFFFD,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
146	{0xFFFFFFFD,0xFFFFFFFF,0xFFFFFFFC,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF}
147	};
148static const BN_ULONG _nist_p_192_sqr[] = {
149	0x00000001,0x00000000,0x00000002,0x00000000,0x00000001,0x00000000,
150	0xFFFFFFFE,0xFFFFFFFF,0xFFFFFFFD,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF
151	};
152static const BN_ULONG _nist_p_224[][BN_NIST_224_TOP] = {
153	{0x00000001,0x00000000,0x00000000,0xFFFFFFFF,
154	 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
155	{0x00000002,0x00000000,0x00000000,0xFFFFFFFE,
156	 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF}
157	};
158static const BN_ULONG _nist_p_224_sqr[] = {
159	0x00000001,0x00000000,0x00000000,0xFFFFFFFE,
160	0xFFFFFFFF,0xFFFFFFFF,0x00000000,0x00000002,
161	0x00000000,0x00000000,0xFFFFFFFE,0xFFFFFFFF,
162	0xFFFFFFFF,0xFFFFFFFF
163	};
164static const BN_ULONG _nist_p_256[][BN_NIST_256_TOP] = {
165	{0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0x00000000,
166	 0x00000000,0x00000000,0x00000001,0xFFFFFFFF},
167	{0xFFFFFFFE,0xFFFFFFFF,0xFFFFFFFF,0x00000001,
168	 0x00000000,0x00000000,0x00000002,0xFFFFFFFE},
169	{0xFFFFFFFD,0xFFFFFFFF,0xFFFFFFFF,0x00000002,
170	 0x00000000,0x00000000,0x00000003,0xFFFFFFFD},
171	{0xFFFFFFFC,0xFFFFFFFF,0xFFFFFFFF,0x00000003,
172	 0x00000000,0x00000000,0x00000004,0xFFFFFFFC},
173	{0xFFFFFFFB,0xFFFFFFFF,0xFFFFFFFF,0x00000004,
174	 0x00000000,0x00000000,0x00000005,0xFFFFFFFB},
175	};
176static const BN_ULONG _nist_p_256_sqr[] = {
177	0x00000001,0x00000000,0x00000000,0xFFFFFFFE,
178	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFE,0x00000001,
179	0xFFFFFFFE,0x00000001,0xFFFFFFFE,0x00000001,
180	0x00000001,0xFFFFFFFE,0x00000002,0xFFFFFFFE
181	};
182static const BN_ULONG _nist_p_384[][BN_NIST_384_TOP] = {
183	{0xFFFFFFFF,0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFE,0xFFFFFFFF,
184	 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
185	{0xFFFFFFFE,0x00000001,0x00000000,0xFFFFFFFE,0xFFFFFFFD,0xFFFFFFFF,
186	 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
187	{0xFFFFFFFD,0x00000002,0x00000000,0xFFFFFFFD,0xFFFFFFFC,0xFFFFFFFF,
188	 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
189	{0xFFFFFFFC,0x00000003,0x00000000,0xFFFFFFFC,0xFFFFFFFB,0xFFFFFFFF,
190	 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
191	{0xFFFFFFFB,0x00000004,0x00000000,0xFFFFFFFB,0xFFFFFFFA,0xFFFFFFFF,
192	 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
193	};
194static const BN_ULONG _nist_p_384_sqr[] = {
195	0x00000001,0xFFFFFFFE,0x00000000,0x00000002,0x00000000,0xFFFFFFFE,
196	0x00000000,0x00000002,0x00000001,0x00000000,0x00000000,0x00000000,
197	0xFFFFFFFE,0x00000001,0x00000000,0xFFFFFFFE,0xFFFFFFFD,0xFFFFFFFF,
198	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF
199	};
200static const BN_ULONG _nist_p_521[] = {0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
201	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
202	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
203	0xFFFFFFFF,0x000001FF};
204static const BN_ULONG _nist_p_521_sqr[] = {
205	0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
206	0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
207	0x00000000,0x00000000,0x00000000,0x00000000,0xFFFFFC00,0xFFFFFFFF,
208	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
209	0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
210	0xFFFFFFFF,0xFFFFFFFF,0x0003FFFF
211	};
212#else
213#error "unsupported BN_BITS2"
214#endif
215
216
217static const BIGNUM _bignum_nist_p_192 =
218	{
219	(BN_ULONG *)_nist_p_192[0],
220	BN_NIST_192_TOP,
221	BN_NIST_192_TOP,
222	0,
223	BN_FLG_STATIC_DATA
224	};
225
226static const BIGNUM _bignum_nist_p_224 =
227	{
228	(BN_ULONG *)_nist_p_224[0],
229	BN_NIST_224_TOP,
230	BN_NIST_224_TOP,
231	0,
232	BN_FLG_STATIC_DATA
233	};
234
235static const BIGNUM _bignum_nist_p_256 =
236	{
237	(BN_ULONG *)_nist_p_256[0],
238	BN_NIST_256_TOP,
239	BN_NIST_256_TOP,
240	0,
241	BN_FLG_STATIC_DATA
242	};
243
244static const BIGNUM _bignum_nist_p_384 =
245	{
246	(BN_ULONG *)_nist_p_384[0],
247	BN_NIST_384_TOP,
248	BN_NIST_384_TOP,
249	0,
250	BN_FLG_STATIC_DATA
251	};
252
253static const BIGNUM _bignum_nist_p_521 =
254	{
255	(BN_ULONG *)_nist_p_521,
256	BN_NIST_521_TOP,
257	BN_NIST_521_TOP,
258	0,
259	BN_FLG_STATIC_DATA
260	};
261
262
263const BIGNUM *BN_get0_nist_prime_192(void)
264	{
265	return &_bignum_nist_p_192;
266	}
267
268const BIGNUM *BN_get0_nist_prime_224(void)
269	{
270	return &_bignum_nist_p_224;
271	}
272
273const BIGNUM *BN_get0_nist_prime_256(void)
274	{
275	return &_bignum_nist_p_256;
276	}
277
278const BIGNUM *BN_get0_nist_prime_384(void)
279	{
280	return &_bignum_nist_p_384;
281	}
282
283const BIGNUM *BN_get0_nist_prime_521(void)
284	{
285	return &_bignum_nist_p_521;
286	}
287
288
289static void nist_cp_bn_0(BN_ULONG *dst, const BN_ULONG *src, int top, int max)
290	{
291	int i;
292
293#ifdef BN_DEBUG
294	OPENSSL_assert(top <= max);
295#endif
296	for (i = 0; i < top; i++)
297		dst[i] = src[i];
298	for (; i < max; i++)
299		dst[i] = 0;
300	}
301
302static void nist_cp_bn(BN_ULONG *dst, const BN_ULONG *src, int top)
303	{
304	int i;
305
306	for (i = 0; i < top; i++)
307		dst[i] = src[i];
308	}
309
310#if BN_BITS2 == 64
311#define bn_cp_64(to, n, from, m)	(to)[n] = (m>=0)?((from)[m]):0;
312#define bn_64_set_0(to, n)		(to)[n] = (BN_ULONG)0;
313/*
314 * two following macros are implemented under assumption that they
315 * are called in a sequence with *ascending* n, i.e. as they are...
316 */
317#define bn_cp_32_naked(to, n, from, m)	(((n)&1)?(to[(n)/2]|=((m)&1)?(from[(m)/2]&BN_MASK2h):(from[(m)/2]<<32))\
318						:(to[(n)/2] =((m)&1)?(from[(m)/2]>>32):(from[(m)/2]&BN_MASK2l)))
319#define bn_32_set_0(to, n)		(((n)&1)?(to[(n)/2]&=BN_MASK2l):(to[(n)/2]=0));
320#define bn_cp_32(to,n,from,m)		((m)>=0)?bn_cp_32_naked(to,n,from,m):bn_32_set_0(to,n)
321# if defined(L_ENDIAN)
322#  if defined(__arch64__)
323#   define NIST_INT64 long
324#  else
325#   define NIST_INT64 long long
326#  endif
327# endif
328#else
329#define bn_cp_64(to, n, from, m) \
330	{ \
331	bn_cp_32(to, (n)*2, from, (m)*2); \
332	bn_cp_32(to, (n)*2+1, from, (m)*2+1); \
333	}
334#define bn_64_set_0(to, n) \
335	{ \
336	bn_32_set_0(to, (n)*2); \
337	bn_32_set_0(to, (n)*2+1); \
338	}
339#define bn_cp_32(to, n, from, m)	(to)[n] = (m>=0)?((from)[m]):0;
340#define bn_32_set_0(to, n)		(to)[n] = (BN_ULONG)0;
341# if defined(_WIN32) && !defined(__GNUC__)
342#  define NIST_INT64 __int64
343# elif defined(BN_LLONG)
344#  define NIST_INT64 long long
345# endif
346#endif /* BN_BITS2 != 64 */
347
348#define nist_set_192(to, from, a1, a2, a3) \
349	{ \
350	bn_cp_64(to, 0, from, (a3) - 3) \
351	bn_cp_64(to, 1, from, (a2) - 3) \
352	bn_cp_64(to, 2, from, (a1) - 3) \
353	}
354
355int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
356	BN_CTX *ctx)
357	{
358	int      top = a->top, i;
359	int      carry;
360	register BN_ULONG *r_d, *a_d = a->d;
361	union	{
362		BN_ULONG	bn[BN_NIST_192_TOP];
363		unsigned int	ui[BN_NIST_192_TOP*sizeof(BN_ULONG)/sizeof(unsigned int)];
364		} buf;
365	BN_ULONG c_d[BN_NIST_192_TOP],
366		*res;
367	PTR_SIZE_INT mask;
368	static const BIGNUM _bignum_nist_p_192_sqr = {
369		(BN_ULONG *)_nist_p_192_sqr,
370		sizeof(_nist_p_192_sqr)/sizeof(_nist_p_192_sqr[0]),
371		sizeof(_nist_p_192_sqr)/sizeof(_nist_p_192_sqr[0]),
372		0,BN_FLG_STATIC_DATA };
373
374	field = &_bignum_nist_p_192; /* just to make sure */
375
376 	if (BN_is_negative(a) || BN_ucmp(a,&_bignum_nist_p_192_sqr)>=0)
377		return BN_nnmod(r, a, field, ctx);
378
379	i = BN_ucmp(field, a);
380	if (i == 0)
381		{
382		BN_zero(r);
383		return 1;
384		}
385	else if (i > 0)
386		return (r == a) ? 1 : (BN_copy(r ,a) != NULL);
387
388	if (r != a)
389		{
390		if (!bn_wexpand(r, BN_NIST_192_TOP))
391			return 0;
392		r_d = r->d;
393		nist_cp_bn(r_d, a_d, BN_NIST_192_TOP);
394		}
395	else
396		r_d = a_d;
397
398	nist_cp_bn_0(buf.bn, a_d + BN_NIST_192_TOP, top - BN_NIST_192_TOP, BN_NIST_192_TOP);
399
400#if defined(NIST_INT64)
401	{
402	NIST_INT64		acc;	/* accumulator */
403	unsigned int		*rp=(unsigned int *)r_d;
404	const unsigned int	*bp=(const unsigned int *)buf.ui;
405
406	acc  = rp[0];	acc += bp[3*2-6];
407			acc += bp[5*2-6]; rp[0] = (unsigned int)acc; acc >>= 32;
408
409	acc += rp[1];	acc += bp[3*2-5];
410			acc += bp[5*2-5]; rp[1] = (unsigned int)acc; acc >>= 32;
411
412	acc += rp[2];	acc += bp[3*2-6];
413			acc += bp[4*2-6];
414			acc += bp[5*2-6]; rp[2] = (unsigned int)acc; acc >>= 32;
415
416	acc += rp[3];	acc += bp[3*2-5];
417			acc += bp[4*2-5];
418			acc += bp[5*2-5]; rp[3] = (unsigned int)acc; acc >>= 32;
419
420	acc += rp[4];	acc += bp[4*2-6];
421			acc += bp[5*2-6]; rp[4] = (unsigned int)acc; acc >>= 32;
422
423	acc += rp[5];	acc += bp[4*2-5];
424			acc += bp[5*2-5]; rp[5] = (unsigned int)acc;
425
426	carry = (int)(acc>>32);
427	}
428#else
429	{
430	BN_ULONG t_d[BN_NIST_192_TOP];
431
432	nist_set_192(t_d, buf.bn, 0, 3, 3);
433	carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
434	nist_set_192(t_d, buf.bn, 4, 4, 0);
435	carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
436	nist_set_192(t_d, buf.bn, 5, 5, 5)
437	carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
438	}
439#endif
440	if (carry > 0)
441		carry = (int)bn_sub_words(r_d,r_d,_nist_p_192[carry-1],BN_NIST_192_TOP);
442	else
443		carry = 1;
444
445	/*
446	 * we need 'if (carry==0 || result>=modulus) result-=modulus;'
447	 * as comparison implies subtraction, we can write
448	 * 'tmp=result-modulus; if (!carry || !borrow) result=tmp;'
449	 * this is what happens below, but without explicit if:-) a.
450	 */
451	mask  = 0-(PTR_SIZE_INT)bn_sub_words(c_d,r_d,_nist_p_192[0],BN_NIST_192_TOP);
452	mask &= 0-(PTR_SIZE_INT)carry;
453	res   = c_d;
454	res   = (BN_ULONG *)
455	 (((PTR_SIZE_INT)res&~mask) | ((PTR_SIZE_INT)r_d&mask));
456	nist_cp_bn(r_d, res, BN_NIST_192_TOP);
457	r->top = BN_NIST_192_TOP;
458	bn_correct_top(r);
459
460	return 1;
461	}
462
463typedef BN_ULONG (*bn_addsub_f)(BN_ULONG *,const BN_ULONG *,const BN_ULONG *,int);
464
465#define nist_set_224(to, from, a1, a2, a3, a4, a5, a6, a7) \
466	{ \
467	bn_cp_32(to, 0, from, (a7) - 7) \
468	bn_cp_32(to, 1, from, (a6) - 7) \
469	bn_cp_32(to, 2, from, (a5) - 7) \
470	bn_cp_32(to, 3, from, (a4) - 7) \
471	bn_cp_32(to, 4, from, (a3) - 7) \
472	bn_cp_32(to, 5, from, (a2) - 7) \
473	bn_cp_32(to, 6, from, (a1) - 7) \
474	}
475
476int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
477	BN_CTX *ctx)
478	{
479	int	top = a->top, i;
480	int	carry;
481	BN_ULONG *r_d, *a_d = a->d;
482	union	{
483		BN_ULONG	bn[BN_NIST_224_TOP];
484		unsigned int	ui[BN_NIST_224_TOP*sizeof(BN_ULONG)/sizeof(unsigned int)];
485		} buf;
486	BN_ULONG c_d[BN_NIST_224_TOP],
487		*res;
488	PTR_SIZE_INT mask;
489	union { bn_addsub_f f; PTR_SIZE_INT p; } u;
490	static const BIGNUM _bignum_nist_p_224_sqr = {
491		(BN_ULONG *)_nist_p_224_sqr,
492		sizeof(_nist_p_224_sqr)/sizeof(_nist_p_224_sqr[0]),
493		sizeof(_nist_p_224_sqr)/sizeof(_nist_p_224_sqr[0]),
494		0,BN_FLG_STATIC_DATA };
495
496
497	field = &_bignum_nist_p_224; /* just to make sure */
498
499 	if (BN_is_negative(a) || BN_ucmp(a,&_bignum_nist_p_224_sqr)>=0)
500		return BN_nnmod(r, a, field, ctx);
501
502	i = BN_ucmp(field, a);
503	if (i == 0)
504		{
505		BN_zero(r);
506		return 1;
507		}
508	else if (i > 0)
509		return (r == a)? 1 : (BN_copy(r ,a) != NULL);
510
511	if (r != a)
512		{
513		if (!bn_wexpand(r, BN_NIST_224_TOP))
514			return 0;
515		r_d = r->d;
516		nist_cp_bn(r_d, a_d, BN_NIST_224_TOP);
517		}
518	else
519		r_d = a_d;
520
521#if BN_BITS2==64
522	/* copy upper 256 bits of 448 bit number ... */
523	nist_cp_bn_0(c_d, a_d + (BN_NIST_224_TOP-1), top - (BN_NIST_224_TOP-1), BN_NIST_224_TOP);
524	/* ... and right shift by 32 to obtain upper 224 bits */
525	nist_set_224(buf.bn, c_d, 14, 13, 12, 11, 10, 9, 8);
526	/* truncate lower part to 224 bits too */
527	r_d[BN_NIST_224_TOP-1] &= BN_MASK2l;
528#else
529	nist_cp_bn_0(buf.bn, a_d + BN_NIST_224_TOP, top - BN_NIST_224_TOP, BN_NIST_224_TOP);
530#endif
531
532#if defined(NIST_INT64) && BN_BITS2!=64
533	{
534	NIST_INT64		acc;	/* accumulator */
535	unsigned int		*rp=(unsigned int *)r_d;
536	const unsigned int	*bp=(const unsigned int *)buf.ui;
537
538	acc  = rp[0];	acc -= bp[7-7];
539			acc -= bp[11-7]; rp[0] = (unsigned int)acc; acc >>= 32;
540
541	acc += rp[1];	acc -= bp[8-7];
542			acc -= bp[12-7]; rp[1] = (unsigned int)acc; acc >>= 32;
543
544	acc += rp[2];	acc -= bp[9-7];
545			acc -= bp[13-7]; rp[2] = (unsigned int)acc; acc >>= 32;
546
547	acc += rp[3];	acc += bp[7-7];
548			acc += bp[11-7];
549			acc -= bp[10-7]; rp[3] = (unsigned int)acc; acc>>= 32;
550
551	acc += rp[4];	acc += bp[8-7];
552			acc += bp[12-7];
553			acc -= bp[11-7]; rp[4] = (unsigned int)acc; acc >>= 32;
554
555	acc += rp[5];	acc += bp[9-7];
556			acc += bp[13-7];
557			acc -= bp[12-7]; rp[5] = (unsigned int)acc; acc >>= 32;
558
559	acc += rp[6];	acc += bp[10-7];
560			acc -= bp[13-7]; rp[6] = (unsigned int)acc;
561
562	carry = (int)(acc>>32);
563# if BN_BITS2==64
564	rp[7] = carry;
565# endif
566	}
567#else
568	{
569	BN_ULONG t_d[BN_NIST_224_TOP];
570
571	nist_set_224(t_d, buf.bn, 10, 9, 8, 7, 0, 0, 0);
572	carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP);
573	nist_set_224(t_d, buf.bn, 0, 13, 12, 11, 0, 0, 0);
574	carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP);
575	nist_set_224(t_d, buf.bn, 13, 12, 11, 10, 9, 8, 7);
576	carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP);
577	nist_set_224(t_d, buf.bn, 0, 0, 0, 0, 13, 12, 11);
578	carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP);
579
580#if BN_BITS2==64
581	carry = (int)(r_d[BN_NIST_224_TOP-1]>>32);
582#endif
583	}
584#endif
585	u.f = bn_sub_words;
586	if (carry > 0)
587		{
588		carry = (int)bn_sub_words(r_d,r_d,_nist_p_224[carry-1],BN_NIST_224_TOP);
589#if BN_BITS2==64
590		carry=(int)(~(r_d[BN_NIST_224_TOP-1]>>32))&1;
591#endif
592		}
593	else if (carry < 0)
594		{
595		/* it's a bit more comlicated logic in this case.
596		 * if bn_add_words yields no carry, then result
597		 * has to be adjusted by unconditionally *adding*
598		 * the modulus. but if it does, then result has
599		 * to be compared to the modulus and conditionally
600		 * adjusted by *subtracting* the latter. */
601		carry = (int)bn_add_words(r_d,r_d,_nist_p_224[-carry-1],BN_NIST_224_TOP);
602		mask = 0-(PTR_SIZE_INT)carry;
603		u.p = ((PTR_SIZE_INT)bn_sub_words&mask) |
604		 ((PTR_SIZE_INT)bn_add_words&~mask);
605		}
606	else
607		carry = 1;
608
609	/* otherwise it's effectively same as in BN_nist_mod_192... */
610	mask  = 0-(PTR_SIZE_INT)(*u.f)(c_d,r_d,_nist_p_224[0],BN_NIST_224_TOP);
611	mask &= 0-(PTR_SIZE_INT)carry;
612	res   = c_d;
613	res   = (BN_ULONG *)(((PTR_SIZE_INT)res&~mask) |
614	 ((PTR_SIZE_INT)r_d&mask));
615	nist_cp_bn(r_d, res, BN_NIST_224_TOP);
616	r->top = BN_NIST_224_TOP;
617	bn_correct_top(r);
618
619	return 1;
620	}
621
622#define nist_set_256(to, from, a1, a2, a3, a4, a5, a6, a7, a8) \
623	{ \
624	bn_cp_32(to, 0, from, (a8) - 8) \
625	bn_cp_32(to, 1, from, (a7) - 8) \
626	bn_cp_32(to, 2, from, (a6) - 8) \
627	bn_cp_32(to, 3, from, (a5) - 8) \
628	bn_cp_32(to, 4, from, (a4) - 8) \
629	bn_cp_32(to, 5, from, (a3) - 8) \
630	bn_cp_32(to, 6, from, (a2) - 8) \
631	bn_cp_32(to, 7, from, (a1) - 8) \
632	}
633
634int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
635	BN_CTX *ctx)
636	{
637	int	i, top = a->top;
638	int	carry = 0;
639	register BN_ULONG *a_d = a->d, *r_d;
640	union	{
641		BN_ULONG bn[BN_NIST_256_TOP];
642		unsigned int ui[BN_NIST_256_TOP*sizeof(BN_ULONG)/sizeof(unsigned int)];
643		} buf;
644	BN_ULONG c_d[BN_NIST_256_TOP],
645		*res;
646	PTR_SIZE_INT mask;
647	union { bn_addsub_f f; PTR_SIZE_INT p; } u;
648	static const BIGNUM _bignum_nist_p_256_sqr = {
649		(BN_ULONG *)_nist_p_256_sqr,
650		sizeof(_nist_p_256_sqr)/sizeof(_nist_p_256_sqr[0]),
651		sizeof(_nist_p_256_sqr)/sizeof(_nist_p_256_sqr[0]),
652		0,BN_FLG_STATIC_DATA };
653
654	field = &_bignum_nist_p_256; /* just to make sure */
655
656 	if (BN_is_negative(a) || BN_ucmp(a,&_bignum_nist_p_256_sqr)>=0)
657		return BN_nnmod(r, a, field, ctx);
658
659	i = BN_ucmp(field, a);
660	if (i == 0)
661		{
662		BN_zero(r);
663		return 1;
664		}
665	else if (i > 0)
666		return (r == a)? 1 : (BN_copy(r ,a) != NULL);
667
668	if (r != a)
669		{
670		if (!bn_wexpand(r, BN_NIST_256_TOP))
671			return 0;
672		r_d = r->d;
673		nist_cp_bn(r_d, a_d, BN_NIST_256_TOP);
674		}
675	else
676		r_d = a_d;
677
678	nist_cp_bn_0(buf.bn, a_d + BN_NIST_256_TOP, top - BN_NIST_256_TOP, BN_NIST_256_TOP);
679
680#if defined(NIST_INT64)
681	{
682	NIST_INT64		acc;	/* accumulator */
683	unsigned int		*rp=(unsigned int *)r_d;
684	const unsigned int	*bp=(const unsigned int *)buf.ui;
685
686	acc = rp[0];	acc += bp[8-8];
687			acc += bp[9-8];
688			acc -= bp[11-8];
689			acc -= bp[12-8];
690			acc -= bp[13-8];
691			acc -= bp[14-8]; rp[0] = (unsigned int)acc; acc >>= 32;
692
693	acc += rp[1];	acc += bp[9-8];
694			acc += bp[10-8];
695			acc -= bp[12-8];
696			acc -= bp[13-8];
697			acc -= bp[14-8];
698			acc -= bp[15-8]; rp[1] = (unsigned int)acc; acc >>= 32;
699
700	acc += rp[2];	acc += bp[10-8];
701			acc += bp[11-8];
702			acc -= bp[13-8];
703			acc -= bp[14-8];
704			acc -= bp[15-8]; rp[2] = (unsigned int)acc; acc >>= 32;
705
706	acc += rp[3];	acc += bp[11-8];
707			acc += bp[11-8];
708			acc += bp[12-8];
709			acc += bp[12-8];
710			acc += bp[13-8];
711			acc -= bp[15-8];
712			acc -= bp[8-8];
713			acc -= bp[9-8];  rp[3] = (unsigned int)acc; acc >>= 32;
714
715	acc += rp[4];	acc += bp[12-8];
716			acc += bp[12-8];
717			acc += bp[13-8];
718			acc += bp[13-8];
719			acc += bp[14-8];
720			acc -= bp[9-8];
721			acc -= bp[10-8]; rp[4] = (unsigned int)acc; acc >>= 32;
722
723	acc += rp[5];	acc += bp[13-8];
724			acc += bp[13-8];
725			acc += bp[14-8];
726			acc += bp[14-8];
727			acc += bp[15-8];
728			acc -= bp[10-8];
729			acc -= bp[11-8]; rp[5] = (unsigned int)acc; acc >>= 32;
730
731	acc += rp[6];	acc += bp[14-8];
732			acc += bp[14-8];
733			acc += bp[15-8];
734			acc += bp[15-8];
735			acc += bp[14-8];
736			acc += bp[13-8];
737			acc -= bp[8-8];
738			acc -= bp[9-8];  rp[6] = (unsigned int)acc; acc >>= 32;
739
740	acc += rp[7];	acc += bp[15-8];
741			acc += bp[15-8];
742			acc += bp[15-8];
743			acc += bp[8 -8];
744			acc -= bp[10-8];
745			acc -= bp[11-8];
746			acc -= bp[12-8];
747			acc -= bp[13-8]; rp[7] = (unsigned int)acc;
748
749	carry = (int)(acc>>32);
750	}
751#else
752	{
753	BN_ULONG t_d[BN_NIST_256_TOP];
754
755	/*S1*/
756	nist_set_256(t_d, buf.bn, 15, 14, 13, 12, 11, 0, 0, 0);
757	/*S2*/
758	nist_set_256(c_d, buf.bn, 0, 15, 14, 13, 12, 0, 0, 0);
759	carry = (int)bn_add_words(t_d, t_d, c_d, BN_NIST_256_TOP);
760	/* left shift */
761		{
762		register BN_ULONG *ap,t,c;
763		ap = t_d;
764		c=0;
765		for (i = BN_NIST_256_TOP; i != 0; --i)
766			{
767			t= *ap;
768			*(ap++)=((t<<1)|c)&BN_MASK2;
769			c=(t & BN_TBIT)?1:0;
770			}
771		carry <<= 1;
772		carry  |= c;
773		}
774	carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
775	/*S3*/
776	nist_set_256(t_d, buf.bn, 15, 14, 0, 0, 0, 10, 9, 8);
777	carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
778	/*S4*/
779	nist_set_256(t_d, buf.bn, 8, 13, 15, 14, 13, 11, 10, 9);
780	carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
781	/*D1*/
782	nist_set_256(t_d, buf.bn, 10, 8, 0, 0, 0, 13, 12, 11);
783	carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
784	/*D2*/
785	nist_set_256(t_d, buf.bn, 11, 9, 0, 0, 15, 14, 13, 12);
786	carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
787	/*D3*/
788	nist_set_256(t_d, buf.bn, 12, 0, 10, 9, 8, 15, 14, 13);
789	carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
790	/*D4*/
791	nist_set_256(t_d, buf.bn, 13, 0, 11, 10, 9, 0, 15, 14);
792	carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
793
794	}
795#endif
796	/* see BN_nist_mod_224 for explanation */
797	u.f = bn_sub_words;
798	if (carry > 0)
799		carry = (int)bn_sub_words(r_d,r_d,_nist_p_256[carry-1],BN_NIST_256_TOP);
800	else if (carry < 0)
801		{
802		carry = (int)bn_add_words(r_d,r_d,_nist_p_256[-carry-1],BN_NIST_256_TOP);
803		mask = 0-(PTR_SIZE_INT)carry;
804		u.p = ((PTR_SIZE_INT)bn_sub_words&mask) |
805		 ((PTR_SIZE_INT)bn_add_words&~mask);
806		}
807	else
808		carry = 1;
809
810	mask  = 0-(PTR_SIZE_INT)(*u.f)(c_d,r_d,_nist_p_256[0],BN_NIST_256_TOP);
811	mask &= 0-(PTR_SIZE_INT)carry;
812	res   = c_d;
813	res   = (BN_ULONG *)(((PTR_SIZE_INT)res&~mask) |
814	 ((PTR_SIZE_INT)r_d&mask));
815	nist_cp_bn(r_d, res, BN_NIST_256_TOP);
816	r->top = BN_NIST_256_TOP;
817	bn_correct_top(r);
818
819	return 1;
820	}
821
822#define nist_set_384(to,from,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12) \
823	{ \
824	bn_cp_32(to, 0, from,  (a12) - 12) \
825	bn_cp_32(to, 1, from,  (a11) - 12) \
826	bn_cp_32(to, 2, from,  (a10) - 12) \
827	bn_cp_32(to, 3, from,  (a9) - 12)  \
828	bn_cp_32(to, 4, from,  (a8) - 12)  \
829	bn_cp_32(to, 5, from,  (a7) - 12)  \
830	bn_cp_32(to, 6, from,  (a6) - 12)  \
831	bn_cp_32(to, 7, from,  (a5) - 12)  \
832	bn_cp_32(to, 8, from,  (a4) - 12)  \
833	bn_cp_32(to, 9, from,  (a3) - 12)  \
834	bn_cp_32(to, 10, from, (a2) - 12)  \
835	bn_cp_32(to, 11, from, (a1) - 12)  \
836	}
837
838int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
839	BN_CTX *ctx)
840	{
841	int	i, top = a->top;
842	int	carry = 0;
843	register BN_ULONG *r_d, *a_d = a->d;
844	union	{
845		BN_ULONG bn[BN_NIST_384_TOP];
846		unsigned int ui[BN_NIST_384_TOP*sizeof(BN_ULONG)/sizeof(unsigned int)];
847		} buf;
848	BN_ULONG c_d[BN_NIST_384_TOP],
849		*res;
850	PTR_SIZE_INT mask;
851	union { bn_addsub_f f; PTR_SIZE_INT p; } u;
852	static const BIGNUM _bignum_nist_p_384_sqr = {
853		(BN_ULONG *)_nist_p_384_sqr,
854		sizeof(_nist_p_384_sqr)/sizeof(_nist_p_384_sqr[0]),
855		sizeof(_nist_p_384_sqr)/sizeof(_nist_p_384_sqr[0]),
856		0,BN_FLG_STATIC_DATA };
857
858
859	field = &_bignum_nist_p_384; /* just to make sure */
860
861 	if (BN_is_negative(a) || BN_ucmp(a,&_bignum_nist_p_384_sqr)>=0)
862		return BN_nnmod(r, a, field, ctx);
863
864	i = BN_ucmp(field, a);
865	if (i == 0)
866		{
867		BN_zero(r);
868		return 1;
869		}
870	else if (i > 0)
871		return (r == a)? 1 : (BN_copy(r ,a) != NULL);
872
873	if (r != a)
874		{
875		if (!bn_wexpand(r, BN_NIST_384_TOP))
876			return 0;
877		r_d = r->d;
878		nist_cp_bn(r_d, a_d, BN_NIST_384_TOP);
879		}
880	else
881		r_d = a_d;
882
883	nist_cp_bn_0(buf.bn, a_d + BN_NIST_384_TOP, top - BN_NIST_384_TOP, BN_NIST_384_TOP);
884
885#if defined(NIST_INT64)
886	{
887	NIST_INT64		acc;	/* accumulator */
888	unsigned int		*rp=(unsigned int *)r_d;
889	const unsigned int	*bp=(const unsigned int *)buf.ui;
890
891	acc = rp[0];	acc += bp[12-12];
892			acc += bp[21-12];
893			acc += bp[20-12];
894			acc -= bp[23-12]; rp[0] = (unsigned int)acc; acc >>= 32;
895
896	acc += rp[1];	acc += bp[13-12];
897			acc += bp[22-12];
898			acc += bp[23-12];
899			acc -= bp[12-12];
900			acc -= bp[20-12]; rp[1] = (unsigned int)acc; acc >>= 32;
901
902	acc += rp[2];	acc += bp[14-12];
903			acc += bp[23-12];
904			acc -= bp[13-12];
905			acc -= bp[21-12]; rp[2] = (unsigned int)acc; acc >>= 32;
906
907	acc += rp[3];	acc += bp[15-12];
908			acc += bp[12-12];
909			acc += bp[20-12];
910			acc += bp[21-12];
911			acc -= bp[14-12];
912			acc -= bp[22-12];
913			acc -= bp[23-12]; rp[3] = (unsigned int)acc; acc >>= 32;
914
915	acc += rp[4];	acc += bp[21-12];
916			acc += bp[21-12];
917			acc += bp[16-12];
918			acc += bp[13-12];
919			acc += bp[12-12];
920			acc += bp[20-12];
921			acc += bp[22-12];
922			acc -= bp[15-12];
923			acc -= bp[23-12];
924			acc -= bp[23-12]; rp[4] = (unsigned int)acc; acc >>= 32;
925
926	acc += rp[5];	acc += bp[22-12];
927			acc += bp[22-12];
928			acc += bp[17-12];
929			acc += bp[14-12];
930			acc += bp[13-12];
931			acc += bp[21-12];
932			acc += bp[23-12];
933			acc -= bp[16-12]; rp[5] = (unsigned int)acc; acc >>= 32;
934
935	acc += rp[6];	acc += bp[23-12];
936			acc += bp[23-12];
937			acc += bp[18-12];
938			acc += bp[15-12];
939			acc += bp[14-12];
940			acc += bp[22-12];
941			acc -= bp[17-12]; rp[6] = (unsigned int)acc; acc >>= 32;
942
943	acc += rp[7];	acc += bp[19-12];
944			acc += bp[16-12];
945			acc += bp[15-12];
946			acc += bp[23-12];
947			acc -= bp[18-12]; rp[7] = (unsigned int)acc; acc >>= 32;
948
949	acc += rp[8];	acc += bp[20-12];
950			acc += bp[17-12];
951			acc += bp[16-12];
952			acc -= bp[19-12]; rp[8] = (unsigned int)acc; acc >>= 32;
953
954	acc += rp[9];	acc += bp[21-12];
955			acc += bp[18-12];
956			acc += bp[17-12];
957			acc -= bp[20-12]; rp[9] = (unsigned int)acc; acc >>= 32;
958
959	acc += rp[10];	acc += bp[22-12];
960			acc += bp[19-12];
961			acc += bp[18-12];
962			acc -= bp[21-12]; rp[10] = (unsigned int)acc; acc >>= 32;
963
964	acc += rp[11];	acc += bp[23-12];
965			acc += bp[20-12];
966			acc += bp[19-12];
967			acc -= bp[22-12]; rp[11] = (unsigned int)acc;
968
969	carry = (int)(acc>>32);
970	}
971#else
972	{
973	BN_ULONG t_d[BN_NIST_384_TOP];
974
975	/*S1*/
976	nist_set_256(t_d, buf.bn, 0, 0, 0, 0, 0, 23-4, 22-4, 21-4);
977		/* left shift */
978		{
979		register BN_ULONG *ap,t,c;
980		ap = t_d;
981		c=0;
982		for (i = 3; i != 0; --i)
983			{
984			t= *ap;
985			*(ap++)=((t<<1)|c)&BN_MASK2;
986			c=(t & BN_TBIT)?1:0;
987			}
988		*ap=c;
989		}
990	carry = (int)bn_add_words(r_d+(128/BN_BITS2), r_d+(128/BN_BITS2),
991		t_d, BN_NIST_256_TOP);
992	/*S2 */
993	carry += (int)bn_add_words(r_d, r_d, buf.bn, BN_NIST_384_TOP);
994	/*S3*/
995	nist_set_384(t_d,buf.bn,20,19,18,17,16,15,14,13,12,23,22,21);
996	carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
997	/*S4*/
998	nist_set_384(t_d,buf.bn,19,18,17,16,15,14,13,12,20,0,23,0);
999	carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1000	/*S5*/
1001	nist_set_384(t_d, buf.bn,0,0,0,0,23,22,21,20,0,0,0,0);
1002	carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1003	/*S6*/
1004	nist_set_384(t_d,buf.bn,0,0,0,0,0,0,23,22,21,0,0,20);
1005	carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1006	/*D1*/
1007	nist_set_384(t_d,buf.bn,22,21,20,19,18,17,16,15,14,13,12,23);
1008	carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1009	/*D2*/
1010	nist_set_384(t_d,buf.bn,0,0,0,0,0,0,0,23,22,21,20,0);
1011	carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1012	/*D3*/
1013	nist_set_384(t_d,buf.bn,0,0,0,0,0,0,0,23,23,0,0,0);
1014	carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1015
1016	}
1017#endif
1018	/* see BN_nist_mod_224 for explanation */
1019	u.f = bn_sub_words;
1020	if (carry > 0)
1021		carry = (int)bn_sub_words(r_d,r_d,_nist_p_384[carry-1],BN_NIST_384_TOP);
1022	else if (carry < 0)
1023		{
1024		carry = (int)bn_add_words(r_d,r_d,_nist_p_384[-carry-1],BN_NIST_384_TOP);
1025		mask = 0-(PTR_SIZE_INT)carry;
1026		u.p = ((PTR_SIZE_INT)bn_sub_words&mask) |
1027		 ((PTR_SIZE_INT)bn_add_words&~mask);
1028		}
1029	else
1030		carry = 1;
1031
1032	mask  = 0-(PTR_SIZE_INT)(*u.f)(c_d,r_d,_nist_p_384[0],BN_NIST_384_TOP);
1033	mask &= 0-(PTR_SIZE_INT)carry;
1034	res   = c_d;
1035	res   = (BN_ULONG *)(((PTR_SIZE_INT)res&~mask) |
1036	 ((PTR_SIZE_INT)r_d&mask));
1037	nist_cp_bn(r_d, res, BN_NIST_384_TOP);
1038	r->top = BN_NIST_384_TOP;
1039	bn_correct_top(r);
1040
1041	return 1;
1042	}
1043
1044#define BN_NIST_521_RSHIFT	(521%BN_BITS2)
1045#define BN_NIST_521_LSHIFT	(BN_BITS2-BN_NIST_521_RSHIFT)
1046#define BN_NIST_521_TOP_MASK	((BN_ULONG)BN_MASK2>>BN_NIST_521_LSHIFT)
1047
1048int BN_nist_mod_521(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
1049	BN_CTX *ctx)
1050	{
1051	int	top = a->top, i;
1052	BN_ULONG *r_d, *a_d = a->d,
1053		 t_d[BN_NIST_521_TOP],
1054		 val,tmp,*res;
1055	PTR_SIZE_INT mask;
1056	static const BIGNUM _bignum_nist_p_521_sqr = {
1057		(BN_ULONG *)_nist_p_521_sqr,
1058		sizeof(_nist_p_521_sqr)/sizeof(_nist_p_521_sqr[0]),
1059		sizeof(_nist_p_521_sqr)/sizeof(_nist_p_521_sqr[0]),
1060		0,BN_FLG_STATIC_DATA };
1061
1062	field = &_bignum_nist_p_521; /* just to make sure */
1063
1064 	if (BN_is_negative(a) || BN_ucmp(a,&_bignum_nist_p_521_sqr)>=0)
1065		return BN_nnmod(r, a, field, ctx);
1066
1067	i = BN_ucmp(field, a);
1068	if (i == 0)
1069		{
1070		BN_zero(r);
1071		return 1;
1072		}
1073	else if (i > 0)
1074		return (r == a)? 1 : (BN_copy(r ,a) != NULL);
1075
1076	if (r != a)
1077		{
1078		if (!bn_wexpand(r,BN_NIST_521_TOP))
1079			return 0;
1080		r_d = r->d;
1081		nist_cp_bn(r_d,a_d, BN_NIST_521_TOP);
1082		}
1083	else
1084		r_d = a_d;
1085
1086	/* upper 521 bits, copy ... */
1087	nist_cp_bn_0(t_d,a_d + (BN_NIST_521_TOP-1), top - (BN_NIST_521_TOP-1),BN_NIST_521_TOP);
1088	/* ... and right shift */
1089	for (val=t_d[0],i=0; i<BN_NIST_521_TOP-1; i++)
1090		{
1091		t_d[i] = ( val>>BN_NIST_521_RSHIFT |
1092			  (tmp=t_d[i+1])<<BN_NIST_521_LSHIFT ) & BN_MASK2;
1093		val=tmp;
1094		}
1095	t_d[i] = val>>BN_NIST_521_RSHIFT;
1096	/* lower 521 bits */
1097	r_d[i] &= BN_NIST_521_TOP_MASK;
1098
1099	bn_add_words(r_d,r_d,t_d,BN_NIST_521_TOP);
1100	mask = 0-(PTR_SIZE_INT)bn_sub_words(t_d,r_d,_nist_p_521,BN_NIST_521_TOP);
1101	res  = t_d;
1102	res  = (BN_ULONG *)(((PTR_SIZE_INT)res&~mask) |
1103	 ((PTR_SIZE_INT)r_d&mask));
1104	nist_cp_bn(r_d,res,BN_NIST_521_TOP);
1105	r->top = BN_NIST_521_TOP;
1106	bn_correct_top(r);
1107
1108	return 1;
1109	}
1110