1db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/*
2db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * Copyright 2013 The Android Open Source Project
3db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *
4db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * Redistribution and use in source and binary forms, with or without
5db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * modification, are permitted provided that the following conditions are met:
6db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *     * Redistributions of source code must retain the above copyright
7db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *       notice, this list of conditions and the following disclaimer.
8db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *     * Redistributions in binary form must reproduce the above copyright
9db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *       notice, this list of conditions and the following disclaimer in the
10db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *       documentation and/or other materials provided with the distribution.
11db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *     * Neither the name of Google Inc. nor the names of its contributors may
12db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *       be used to endorse or promote products derived from this software
13db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *       without specific prior written permission.
14db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *
15db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * THIS SOFTWARE IS PROVIDED BY Google Inc. ``AS IS'' AND ANY EXPRESS OR
16db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
17db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
18db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * EVENT SHALL Google Inc. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
24db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root */
26db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
27db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root// This is an implementation of the P256 elliptic curve group. It's written to
28db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root// be portable 32-bit, although it's still constant-time.
29db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root//
30db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root// WARNING: Implementing these functions in a constant-time manner is far from
31db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root//          obvious. Be careful when touching this code.
32db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root//
33db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root// See http://www.imperialviolet.org/2010/12/04/ecc.html ([1]) for background.
34db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
35db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root#include <stdint.h>
36db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root#include <stdio.h>
37db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
38db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root#include <string.h>
39db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root#include <stdlib.h>
40db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
41db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root#include "mincrypt/p256.h"
42db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
43db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Roottypedef uint8_t u8;
44db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Roottypedef uint32_t u32;
45db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Roottypedef int32_t s32;
46db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Roottypedef uint64_t u64;
47db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
48db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* Our field elements are represented as nine 32-bit limbs.
49db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *
50db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * The value of an felem (field element) is:
51db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *   x[0] + (x[1] * 2**29) + (x[2] * 2**57) + ... + (x[8] * 2**228)
52db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *
53db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * That is, each limb is alternately 29 or 28-bits wide in little-endian
54db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * order.
55db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *
56db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * This means that an felem hits 2**257, rather than 2**256 as we would like. A
57db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 28, 29, ... pattern would cause us to hit 2**256, but that causes problems
58db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * when multiplying as terms end up one bit short of a limb which would require
59db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * much bit-shifting to correct.
60db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *
61db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * Finally, the values stored in an felem are in Montgomery form. So the value
62db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * |y| is stored as (y*R) mod p, where p is the P-256 prime and R is 2**257.
63db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root */
64db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Roottypedef u32 limb;
65db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root#define NLIMBS 9
66db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Roottypedef limb felem[NLIMBS];
67db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
68db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic const limb kBottom28Bits = 0xfffffff;
69db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic const limb kBottom29Bits = 0x1fffffff;
70db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
71db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* kOne is the number 1 as an felem. It's 2**257 mod p split up into 29 and
72db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 28-bit words. */
73db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic const felem kOne = {
74db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    2, 0, 0, 0xffff800,
75db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x1fffffff, 0xfffffff, 0x1fbfffff, 0x1ffffff,
76db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0
77db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root};
78db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic const felem kZero = {0};
79db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic const felem kP = {
80db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x1fffffff, 0xfffffff, 0x1fffffff, 0x3ff,
81db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0, 0, 0x200000, 0xf000000,
82db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0xfffffff
83db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root};
84db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic const felem k2P = {
85db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x1ffffffe, 0xfffffff, 0x1fffffff, 0x7ff,
86db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0, 0, 0x400000, 0xe000000,
87db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x1fffffff
88db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root};
89db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* kPrecomputed contains precomputed values to aid the calculation of scalar
90db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * multiples of the base point, G. It's actually two, equal length, tables
91db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * concatenated.
92db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *
93db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * The first table contains (x,y) felem pairs for 16 multiples of the base
94db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * point, G.
95db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *
96db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *   Index  |  Index (binary) | Value
97db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *       0  |           0000  | 0G (all zeros, omitted)
98db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *       1  |           0001  | G
99db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *       2  |           0010  | 2**64G
100db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *       3  |           0011  | 2**64G + G
101db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *       4  |           0100  | 2**128G
102db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *       5  |           0101  | 2**128G + G
103db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *       6  |           0110  | 2**128G + 2**64G
104db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *       7  |           0111  | 2**128G + 2**64G + G
105db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *       8  |           1000  | 2**192G
106db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *       9  |           1001  | 2**192G + G
107db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *      10  |           1010  | 2**192G + 2**64G
108db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *      11  |           1011  | 2**192G + 2**64G + G
109db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *      12  |           1100  | 2**192G + 2**128G
110db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *      13  |           1101  | 2**192G + 2**128G + G
111db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *      14  |           1110  | 2**192G + 2**128G + 2**64G
112db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *      15  |           1111  | 2**192G + 2**128G + 2**64G + G
113db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *
114db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * The second table follows the same style, but the terms are 2**32G,
115db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 2**96G, 2**160G, 2**224G.
116db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *
117db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * This is ~2KB of data. */
118db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic const limb kPrecomputed[NLIMBS * 2 * 15 * 2] = {
119db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x11522878, 0xe730d41, 0xdb60179, 0x4afe2ff, 0x12883add, 0xcaddd88, 0x119e7edc, 0xd4a6eab, 0x3120bee,
120db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x1d2aac15, 0xf25357c, 0x19e45cdd, 0x5c721d0, 0x1992c5a5, 0xa237487, 0x154ba21, 0x14b10bb, 0xae3fe3,
121db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0xd41a576, 0x922fc51, 0x234994f, 0x60b60d3, 0x164586ae, 0xce95f18, 0x1fe49073, 0x3fa36cc, 0x5ebcd2c,
122db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0xb402f2f, 0x15c70bf, 0x1561925c, 0x5a26704, 0xda91e90, 0xcdc1c7f, 0x1ea12446, 0xe1ade1e, 0xec91f22,
123db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x26f7778, 0x566847e, 0xa0bec9e, 0x234f453, 0x1a31f21a, 0xd85e75c, 0x56c7109, 0xa267a00, 0xb57c050,
124db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x98fb57, 0xaa837cc, 0x60c0792, 0xcfa5e19, 0x61bab9e, 0x589e39b, 0xa324c5, 0x7d6dee7, 0x2976e4b,
125db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x1fc4124a, 0xa8c244b, 0x1ce86762, 0xcd61c7e, 0x1831c8e0, 0x75774e1, 0x1d96a5a9, 0x843a649, 0xc3ab0fa,
126db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x6e2e7d5, 0x7673a2a, 0x178b65e8, 0x4003e9b, 0x1a1f11c2, 0x7816ea, 0xf643e11, 0x58c43df, 0xf423fc2,
127db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x19633ffa, 0x891f2b2, 0x123c231c, 0x46add8c, 0x54700dd, 0x59e2b17, 0x172db40f, 0x83e277d, 0xb0dd609,
128db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0xfd1da12, 0x35c6e52, 0x19ede20c, 0xd19e0c0, 0x97d0f40, 0xb015b19, 0x449e3f5, 0xe10c9e, 0x33ab581,
129db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x56a67ab, 0x577734d, 0x1dddc062, 0xc57b10d, 0x149b39d, 0x26a9e7b, 0xc35df9f, 0x48764cd, 0x76dbcca,
130db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0xca4b366, 0xe9303ab, 0x1a7480e7, 0x57e9e81, 0x1e13eb50, 0xf466cf3, 0x6f16b20, 0x4ba3173, 0xc168c33,
131db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x15cb5439, 0x6a38e11, 0x73658bd, 0xb29564f, 0x3f6dc5b, 0x53b97e, 0x1322c4c0, 0x65dd7ff, 0x3a1e4f6,
132db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x14e614aa, 0x9246317, 0x1bc83aca, 0xad97eed, 0xd38ce4a, 0xf82b006, 0x341f077, 0xa6add89, 0x4894acd,
133db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x9f162d5, 0xf8410ef, 0x1b266a56, 0xd7f223, 0x3e0cb92, 0xe39b672, 0x6a2901a, 0x69a8556, 0x7e7c0,
134db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x9b7d8d3, 0x309a80, 0x1ad05f7f, 0xc2fb5dd, 0xcbfd41d, 0x9ceb638, 0x1051825c, 0xda0cf5b, 0x812e881,
135db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x6f35669, 0x6a56f2c, 0x1df8d184, 0x345820, 0x1477d477, 0x1645db1, 0xbe80c51, 0xc22be3e, 0xe35e65a,
136db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x1aeb7aa0, 0xc375315, 0xf67bc99, 0x7fdd7b9, 0x191fc1be, 0x61235d, 0x2c184e9, 0x1c5a839, 0x47a1e26,
137db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0xb7cb456, 0x93e225d, 0x14f3c6ed, 0xccc1ac9, 0x17fe37f3, 0x4988989, 0x1a90c502, 0x2f32042, 0xa17769b,
138db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0xafd8c7c, 0x8191c6e, 0x1dcdb237, 0x16200c0, 0x107b32a1, 0x66c08db, 0x10d06a02, 0x3fc93, 0x5620023,
139db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x16722b27, 0x68b5c59, 0x270fcfc, 0xfad0ecc, 0xe5de1c2, 0xeab466b, 0x2fc513c, 0x407f75c, 0xbaab133,
140db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x9705fe9, 0xb88b8e7, 0x734c993, 0x1e1ff8f, 0x19156970, 0xabd0f00, 0x10469ea7, 0x3293ac0, 0xcdc98aa,
141db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x1d843fd, 0xe14bfe8, 0x15be825f, 0x8b5212, 0xeb3fb67, 0x81cbd29, 0xbc62f16, 0x2b6fcc7, 0xf5a4e29,
142db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x13560b66, 0xc0b6ac2, 0x51ae690, 0xd41e271, 0xf3e9bd4, 0x1d70aab, 0x1029f72, 0x73e1c35, 0xee70fbc,
143db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0xad81baf, 0x9ecc49a, 0x86c741e, 0xfe6be30, 0x176752e7, 0x23d416, 0x1f83de85, 0x27de188, 0x66f70b8,
144db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x181cd51f, 0x96b6e4c, 0x188f2335, 0xa5df759, 0x17a77eb6, 0xfeb0e73, 0x154ae914, 0x2f3ec51, 0x3826b59,
145db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0xb91f17d, 0x1c72949, 0x1362bf0a, 0xe23fddf, 0xa5614b0, 0xf7d8f, 0x79061, 0x823d9d2, 0x8213f39,
146db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x1128ae0b, 0xd095d05, 0xb85c0c2, 0x1ecb2ef, 0x24ddc84, 0xe35e901, 0x18411a4a, 0xf5ddc3d, 0x3786689,
147db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x52260e8, 0x5ae3564, 0x542b10d, 0x8d93a45, 0x19952aa4, 0x996cc41, 0x1051a729, 0x4be3499, 0x52b23aa,
148db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x109f307e, 0x6f5b6bb, 0x1f84e1e7, 0x77a0cfa, 0x10c4df3f, 0x25a02ea, 0xb048035, 0xe31de66, 0xc6ecaa3,
149db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x28ea335, 0x2886024, 0x1372f020, 0xf55d35, 0x15e4684c, 0xf2a9e17, 0x1a4a7529, 0xcb7beb1, 0xb2a78a1,
150db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x1ab21f1f, 0x6361ccf, 0x6c9179d, 0xb135627, 0x1267b974, 0x4408bad, 0x1cbff658, 0xe3d6511, 0xc7d76f,
151db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x1cc7a69, 0xe7ee31b, 0x54fab4f, 0x2b914f, 0x1ad27a30, 0xcd3579e, 0xc50124c, 0x50daa90, 0xb13f72,
152db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0xb06aa75, 0x70f5cc6, 0x1649e5aa, 0x84a5312, 0x329043c, 0x41c4011, 0x13d32411, 0xb04a838, 0xd760d2d,
153db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x1713b532, 0xbaa0c03, 0x84022ab, 0x6bcf5c1, 0x2f45379, 0x18ae070, 0x18c9e11e, 0x20bca9a, 0x66f496b,
154db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x3eef294, 0x67500d2, 0xd7f613c, 0x2dbbeb, 0xb741038, 0xe04133f, 0x1582968d, 0xbe985f7, 0x1acbc1a,
155db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x1a6a939f, 0x33e50f6, 0xd665ed4, 0xb4b7bd6, 0x1e5a3799, 0x6b33847, 0x17fa56ff, 0x65ef930, 0x21dc4a,
156db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x2b37659, 0x450fe17, 0xb357b65, 0xdf5efac, 0x15397bef, 0x9d35a7f, 0x112ac15f, 0x624e62e, 0xa90ae2f,
157db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x107eecd2, 0x1f69bbe, 0x77d6bce, 0x5741394, 0x13c684fc, 0x950c910, 0x725522b, 0xdc78583, 0x40eeabb,
158db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x1fde328a, 0xbd61d96, 0xd28c387, 0x9e77d89, 0x12550c40, 0x759cb7d, 0x367ef34, 0xae2a960, 0x91b8bdc,
159db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x93462a9, 0xf469ef, 0xb2e9aef, 0xd2ca771, 0x54e1f42, 0x7aaa49, 0x6316abb, 0x2413c8e, 0x5425bf9,
160db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x1bed3e3a, 0xf272274, 0x1f5e7326, 0x6416517, 0xea27072, 0x9cedea7, 0x6e7633, 0x7c91952, 0xd806dce,
161db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x8e2a7e1, 0xe421e1a, 0x418c9e1, 0x1dbc890, 0x1b395c36, 0xa1dc175, 0x1dc4ef73, 0x8956f34, 0xe4b5cf2,
162db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x1b0d3a18, 0x3194a36, 0x6c2641f, 0xe44124c, 0xa2f4eaa, 0xa8c25ba, 0xf927ed7, 0x627b614, 0x7371cca,
163db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0xba16694, 0x417bc03, 0x7c0a7e3, 0x9c35c19, 0x1168a205, 0x8b6b00d, 0x10e3edc9, 0x9c19bf2, 0x5882229,
164db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x1b2b4162, 0xa5cef1a, 0x1543622b, 0x9bd433e, 0x364e04d, 0x7480792, 0x5c9b5b3, 0xe85ff25, 0x408ef57,
165db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x1814cfa4, 0x121b41b, 0xd248a0f, 0x3b05222, 0x39bb16a, 0xc75966d, 0xa038113, 0xa4a1769, 0x11fbc6c,
166db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x917e50e, 0xeec3da8, 0x169d6eac, 0x10c1699, 0xa416153, 0xf724912, 0x15cd60b7, 0x4acbad9, 0x5efc5fa,
167db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0xf150ed7, 0x122b51, 0x1104b40a, 0xcb7f442, 0xfbb28ff, 0x6ac53ca, 0x196142cc, 0x7bf0fa9, 0x957651,
168db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x4e0f215, 0xed439f8, 0x3f46bd5, 0x5ace82f, 0x110916b6, 0x6db078, 0xffd7d57, 0xf2ecaac, 0xca86dec,
169db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x15d6b2da, 0x965ecc9, 0x1c92b4c2, 0x1f3811, 0x1cb080f5, 0x2d8b804, 0x19d1c12d, 0xf20bd46, 0x1951fa7,
170db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0xa3656c3, 0x523a425, 0xfcd0692, 0xd44ddc8, 0x131f0f5b, 0xaf80e4a, 0xcd9fc74, 0x99bb618, 0x2db944c,
171db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0xa673090, 0x1c210e1, 0x178c8d23, 0x1474383, 0x10b8743d, 0x985a55b, 0x2e74779, 0x576138, 0x9587927,
172db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x133130fa, 0xbe05516, 0x9f4d619, 0xbb62570, 0x99ec591, 0xd9468fe, 0x1d07782d, 0xfc72e0b, 0x701b298,
173db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x1863863b, 0x85954b8, 0x121a0c36, 0x9e7fedf, 0xf64b429, 0x9b9d71e, 0x14e2f5d8, 0xf858d3a, 0x942eea8,
174db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0xda5b765, 0x6edafff, 0xa9d18cc, 0xc65e4ba, 0x1c747e86, 0xe4ea915, 0x1981d7a1, 0x8395659, 0x52ed4e2,
175db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x87d43b7, 0x37ab11b, 0x19d292ce, 0xf8d4692, 0x18c3053f, 0x8863e13, 0x4c146c0, 0x6bdf55a, 0x4e4457d,
176db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x16152289, 0xac78ec2, 0x1a59c5a2, 0x2028b97, 0x71c2d01, 0x295851f, 0x404747b, 0x878558d, 0x7d29aa4,
177db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0x13d8341f, 0x8daefd7, 0x139c972d, 0x6b7ea75, 0xd4a9dde, 0xff163d8, 0x81d55d7, 0xa5bef68, 0xb7b30d8,
178db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    0xbe73d6f, 0xaa88141, 0xd976c81, 0x7e7a9cc, 0x18beb771, 0xd773cbd, 0x13f51951, 0x9d0c177, 0x1c49a78,
179db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root};
180db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
181db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
182db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* Field element operations: */
183db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
184db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* NON_ZERO_TO_ALL_ONES returns:
185db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *   0xffffffff for 0 < x <= 2**31
186db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *   0 for x == 0 or x > 2**31.
187db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *
188db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * x must be a u32 or an equivalent type such as limb. */
189db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root#define NON_ZERO_TO_ALL_ONES(x) ((((u32)(x) - 1) >> 31) - 1)
190db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
191db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* felem_reduce_carry adds a multiple of p in order to cancel |carry|,
192db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * which is a term at 2**257.
193db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *
194db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On entry: carry < 2**3, inout[0,2,...] < 2**29, inout[1,3,...] < 2**28.
195db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On exit: inout[0,2,..] < 2**30, inout[1,3,...] < 2**29. */
196db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void felem_reduce_carry(felem inout, limb carry) {
197db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  const u32 carry_mask = NON_ZERO_TO_ALL_ONES(carry);
198db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
199db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  inout[0] += carry << 1;
200db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  inout[3] += 0x10000000 & carry_mask;
201db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  /* carry < 2**3 thus (carry << 11) < 2**14 and we added 2**28 in the
202db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root   * previous line therefore this doesn't underflow. */
203db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  inout[3] -= carry << 11;
204db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  inout[4] += (0x20000000 - 1) & carry_mask;
205db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  inout[5] += (0x10000000 - 1) & carry_mask;
206db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  inout[6] += (0x20000000 - 1) & carry_mask;
207db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  inout[6] -= carry << 22;
208db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  /* This may underflow if carry is non-zero but, if so, we'll fix it in the
209db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root   * next line. */
210db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  inout[7] -= 1 & carry_mask;
211db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  inout[7] += carry << 25;
212db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root}
213db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
214db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* felem_sum sets out = in+in2.
215db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *
216db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On entry, in[i]+in2[i] must not overflow a 32-bit word.
217db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29 */
218db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void felem_sum(felem out, const felem in, const felem in2) {
219db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  limb carry = 0;
220db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  unsigned i;
221db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
222db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  for (i = 0;; i++) {
223db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] = in[i] + in2[i];
224db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] += carry;
225db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    carry = out[i] >> 29;
226db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] &= kBottom29Bits;
227db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
228db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    i++;
229db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    if (i == NLIMBS)
230db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      break;
231db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
232db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] = in[i] + in2[i];
233db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] += carry;
234db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    carry = out[i] >> 28;
235db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] &= kBottom28Bits;
236db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  }
237db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
238db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_reduce_carry(out, carry);
239db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root}
240db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
241db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root#define two31m3 (((limb)1) << 31) - (((limb)1) << 3)
242db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root#define two30m2 (((limb)1) << 30) - (((limb)1) << 2)
243db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root#define two30p13m2 (((limb)1) << 30) + (((limb)1) << 13) - (((limb)1) << 2)
244db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root#define two31m2 (((limb)1) << 31) - (((limb)1) << 2)
245db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root#define two31p24m2 (((limb)1) << 31) + (((limb)1) << 24) - (((limb)1) << 2)
246db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root#define two30m27m2 (((limb)1) << 30) - (((limb)1) << 27) - (((limb)1) << 2)
247db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
248db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* zero31 is 0 mod p. */
249db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic const felem zero31 = { two31m3, two30m2, two31m2, two30p13m2, two31m2, two30m2, two31p24m2, two30m27m2, two31m2 };
250db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
251db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* felem_diff sets out = in-in2.
252db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *
253db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and
254db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *           in2[0,2,...] < 2**30, in2[1,3,...] < 2**29.
255db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. */
256db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void felem_diff(felem out, const felem in, const felem in2) {
257db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  limb carry = 0;
258db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  unsigned i;
259db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
260db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root   for (i = 0;; i++) {
261db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] = in[i] - in2[i];
262db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] += zero31[i];
263db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] += carry;
264db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    carry = out[i] >> 29;
265db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] &= kBottom29Bits;
266db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
267db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    i++;
268db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    if (i == NLIMBS)
269db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      break;
270db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
271db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] = in[i] - in2[i];
272db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] += zero31[i];
273db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] += carry;
274db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    carry = out[i] >> 28;
275db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] &= kBottom28Bits;
276db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  }
277db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
278db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_reduce_carry(out, carry);
279db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root}
280db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
281db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* felem_reduce_degree sets out = tmp/R mod p where tmp contains 64-bit words
282db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * with the same 29,28,... bit positions as an felem.
283db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *
284db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * The values in felems are in Montgomery form: x*R mod p where R = 2**257.
285db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * Since we just multiplied two Montgomery values together, the result is
286db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * x*y*R*R mod p. We wish to divide by R in order for the result also to be
287db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * in Montgomery form.
288db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *
289db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On entry: tmp[i] < 2**64
290db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29 */
291db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void felem_reduce_degree(felem out, u64 tmp[17]) {
292db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root   /* The following table may be helpful when reading this code:
293db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    *
294db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    * Limb number:   0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10...
295db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    * Width (bits):  29| 28| 29| 28| 29| 28| 29| 28| 29| 28| 29
296db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    * Start bit:     0 | 29| 57| 86|114|143|171|200|228|257|285
297db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    *   (odd phase): 0 | 28| 57| 85|114|142|171|199|228|256|285 */
298db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  limb tmp2[18], carry, x, xMask;
299db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  unsigned i;
300db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
301db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  /* tmp contains 64-bit words with the same 29,28,29-bit positions as an
302db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root   * felem. So the top of an element of tmp might overlap with another
303db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root   * element two positions down. The following loop eliminates this
304db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root   * overlap. */
305db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp2[0] = (limb)(tmp[0] & kBottom29Bits);
306db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
307db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  /* In the following we use "(limb) tmp[x]" and "(limb) (tmp[x]>>32)" to try
308db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root   * and hint to the compiler that it can do a single-word shift by selecting
309db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root   * the right register rather than doing a double-word shift and truncating
310db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root   * afterwards. */
311db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp2[1] = ((limb) tmp[0]) >> 29;
312db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp2[1] |= (((limb)(tmp[0] >> 32)) << 3) & kBottom28Bits;
313db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp2[1] += ((limb) tmp[1]) & kBottom28Bits;
314db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  carry = tmp2[1] >> 28;
315db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp2[1] &= kBottom28Bits;
316db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
317db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  for (i = 2; i < 17; i++) {
318db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i] = ((limb)(tmp[i - 2] >> 32)) >> 25;
319db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i] += ((limb)(tmp[i - 1])) >> 28;
320db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i] += (((limb)(tmp[i - 1] >> 32)) << 4) & kBottom29Bits;
321db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i] += ((limb) tmp[i]) & kBottom29Bits;
322db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i] += carry;
323db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    carry = tmp2[i] >> 29;
324db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i] &= kBottom29Bits;
325db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
326db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    i++;
327db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    if (i == 17)
328db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      break;
329db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i] = ((limb)(tmp[i - 2] >> 32)) >> 25;
330db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i] += ((limb)(tmp[i - 1])) >> 29;
331db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i] += (((limb)(tmp[i - 1] >> 32)) << 3) & kBottom28Bits;
332db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i] += ((limb) tmp[i]) & kBottom28Bits;
333db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i] += carry;
334db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    carry = tmp2[i] >> 28;
335db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i] &= kBottom28Bits;
336db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  }
337db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
338db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp2[17] = ((limb)(tmp[15] >> 32)) >> 25;
339db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp2[17] += ((limb)(tmp[16])) >> 29;
340db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp2[17] += (((limb)(tmp[16] >> 32)) << 3);
341db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp2[17] += carry;
342db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
343db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  /* Montgomery elimination of terms.
344db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root   *
345db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root   * Since R is 2**257, we can divide by R with a bitwise shift if we can
346db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root   * ensure that the right-most 257 bits are all zero. We can make that true by
347db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root   * adding multiplies of p without affecting the value.
348db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root   *
349db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root   * So we eliminate limbs from right to left. Since the bottom 29 bits of p
350db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root   * are all ones, then by adding tmp2[0]*p to tmp2 we'll make tmp2[0] == 0.
351db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root   * We can do that for 8 further limbs and then right shift to eliminate the
352db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root   * extra factor of R. */
353db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  for (i = 0;; i += 2) {
354db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i + 1] += tmp2[i] >> 29;
355db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    x = tmp2[i] & kBottom29Bits;
356db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    xMask = NON_ZERO_TO_ALL_ONES(x);
357db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i] = 0;
358db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
359db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    /* The bounds calculations for this loop are tricky. Each iteration of
360db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     * the loop eliminates two words by adding values to words to their
361db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     * right.
362db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     *
363db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     * The following table contains the amounts added to each word (as an
364db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     * offset from the value of i at the top of the loop). The amounts are
365db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     * accounted for from the first and second half of the loop separately
366db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     * and are written as, for example, 28 to mean a value <2**28.
367db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     *
368db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     * Word:                   3   4   5   6   7   8   9   10
369db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     * Added in top half:     28  11      29  21  29  28
370db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     *                                        28  29
371db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     *                                            29
372db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     * Added in bottom half:      29  10      28  21  28   28
373db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     *                                            29
374db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     *
375db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     * The value that is currently offset 7 will be offset 5 for the next
376db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     * iteration and then offset 3 for the iteration after that. Therefore
377db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     * the total value added will be the values added at 7, 5 and 3.
378db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     *
379db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     * The following table accumulates these values. The sums at the bottom
380db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     * are written as, for example, 29+28, to mean a value < 2**29+2**28.
381db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     *
382db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     * Word:                   3   4   5   6   7   8   9  10  11  12  13
383db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     *                        28  11  10  29  21  29  28  28  28  28  28
384db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     *                            29  28  11  28  29  28  29  28  29  28
385db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     *                                    29  28  21  21  29  21  29  21
386db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     *                                        10  29  28  21  28  21  28
387db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     *                                        28  29  28  29  28  29  28
388db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     *                                            11  10  29  10  29  10
389db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     *                                            29  28  11  28  11
390db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     *                                                    29      29
391db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     *                        --------------------------------------------
392db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     *                                                30+ 31+ 30+ 31+ 30+
393db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     *                                                28+ 29+ 28+ 29+ 21+
394db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     *                                                21+ 28+ 21+ 28+ 10
395db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     *                                                10  21+ 10  21+
396db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     *                                                    11      11
397db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     *
398db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     * So the greatest amount is added to tmp2[10] and tmp2[12]. If
399db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     * tmp2[10/12] has an initial value of <2**29, then the maximum value
400db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     * will be < 2**31 + 2**30 + 2**28 + 2**21 + 2**11, which is < 2**32,
401db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     * as required. */
402db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i + 3] += (x << 10) & kBottom28Bits;
403db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i + 4] += (x >> 18);
404db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
405db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i + 6] += (x << 21) & kBottom29Bits;
406db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i + 7] += x >> 8;
407db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
408db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    /* At position 200, which is the starting bit position for word 7, we
409db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     * have a factor of 0xf000000 = 2**28 - 2**24. */
410db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i + 7] += 0x10000000 & xMask;
411db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    /* Word 7 is 28 bits wide, so the 2**28 term exactly hits word 8. */
412db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i + 8] += (x - 1) & xMask;
413db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i + 7] -= (x << 24) & kBottom28Bits;
414db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i + 8] -= x >> 4;
415db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
416db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i + 8] += 0x20000000 & xMask;
417db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i + 8] -= x;
418db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i + 8] += (x << 28) & kBottom29Bits;
419db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i + 9] += ((x >> 1) - 1) & xMask;
420db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
421db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    if (i+1 == NLIMBS)
422db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      break;
423db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i + 2] += tmp2[i + 1] >> 28;
424db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    x = tmp2[i + 1] & kBottom28Bits;
425db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    xMask = NON_ZERO_TO_ALL_ONES(x);
426db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i + 1] = 0;
427db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
428db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i + 4] += (x << 11) & kBottom29Bits;
429db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i + 5] += (x >> 18);
430db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
431db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i + 7] += (x << 21) & kBottom28Bits;
432db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i + 8] += x >> 7;
433db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
434db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    /* At position 199, which is the starting bit of the 8th word when
435db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     * dealing with a context starting on an odd word, we have a factor of
436db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     * 0x1e000000 = 2**29 - 2**25. Since we have not updated i, the 8th
437db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     * word from i+1 is i+8. */
438db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i + 8] += 0x20000000 & xMask;
439db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i + 9] += (x - 1) & xMask;
440db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i + 8] -= (x << 25) & kBottom29Bits;
441db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i + 9] -= x >> 4;
442db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
443db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i + 9] += 0x10000000 & xMask;
444db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i + 9] -= x;
445db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    tmp2[i + 10] += (x - 1) & xMask;
446db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  }
447db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
448db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  /* We merge the right shift with a carry chain. The words above 2**257 have
449db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root   * widths of 28,29,... which we need to correct when copying them down.  */
450db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  carry = 0;
451db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  for (i = 0; i < 8; i++) {
452db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    /* The maximum value of tmp2[i + 9] occurs on the first iteration and
453db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     * is < 2**30+2**29+2**28. Adding 2**29 (from tmp2[i + 10]) is
454db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root     * therefore safe. */
455db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] = tmp2[i + 9];
456db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] += carry;
457db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] += (tmp2[i + 10] << 28) & kBottom29Bits;
458db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    carry = out[i] >> 29;
459db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] &= kBottom29Bits;
460db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
461db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    i++;
462db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] = tmp2[i + 9] >> 1;
463db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] += carry;
464db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    carry = out[i] >> 28;
465db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] &= kBottom28Bits;
466db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  }
467db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
468db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  out[8] = tmp2[17];
469db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  out[8] += carry;
470db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  carry = out[8] >> 29;
471db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  out[8] &= kBottom29Bits;
472db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
473db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_reduce_carry(out, carry);
474db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root}
475db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
476db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* felem_square sets out=in*in.
477db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *
478db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29.
479db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. */
480db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void felem_square(felem out, const felem in) {
481db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  u64 tmp[17];
482db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
483db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[0] = ((u64) in[0]) * in[0];
484db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[1] = ((u64) in[0]) * (in[1] << 1);
485db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[2] = ((u64) in[0]) * (in[2] << 1) +
486db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           ((u64) in[1]) * (in[1] << 1);
487db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[3] = ((u64) in[0]) * (in[3] << 1) +
488db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           ((u64) in[1]) * (in[2] << 1);
489db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[4] = ((u64) in[0]) * (in[4] << 1) +
490db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           ((u64) in[1]) * (in[3] << 2) + ((u64) in[2]) * in[2];
491db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[5] = ((u64) in[0]) * (in[5] << 1) + ((u64) in[1]) *
492db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           (in[4] << 1) + ((u64) in[2]) * (in[3] << 1);
493db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[6] = ((u64) in[0]) * (in[6] << 1) + ((u64) in[1]) *
494db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           (in[5] << 2) + ((u64) in[2]) * (in[4] << 1) +
495db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           ((u64) in[3]) * (in[3] << 1);
496db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[7] = ((u64) in[0]) * (in[7] << 1) + ((u64) in[1]) *
497db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           (in[6] << 1) + ((u64) in[2]) * (in[5] << 1) +
498db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           ((u64) in[3]) * (in[4] << 1);
499db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  /* tmp[8] has the greatest value of 2**61 + 2**60 + 2**61 + 2**60 + 2**60,
500db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root   * which is < 2**64 as required. */
501db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[8] = ((u64) in[0]) * (in[8] << 1) + ((u64) in[1]) *
502db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           (in[7] << 2) + ((u64) in[2]) * (in[6] << 1) +
503db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           ((u64) in[3]) * (in[5] << 2) + ((u64) in[4]) * in[4];
504db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[9] = ((u64) in[1]) * (in[8] << 1) + ((u64) in[2]) *
505db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           (in[7] << 1) + ((u64) in[3]) * (in[6] << 1) +
506db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           ((u64) in[4]) * (in[5] << 1);
507db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[10] = ((u64) in[2]) * (in[8] << 1) + ((u64) in[3]) *
508db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root            (in[7] << 2) + ((u64) in[4]) * (in[6] << 1) +
509db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root            ((u64) in[5]) * (in[5] << 1);
510db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[11] = ((u64) in[3]) * (in[8] << 1) + ((u64) in[4]) *
511db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root            (in[7] << 1) + ((u64) in[5]) * (in[6] << 1);
512db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[12] = ((u64) in[4]) * (in[8] << 1) +
513db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root            ((u64) in[5]) * (in[7] << 2) + ((u64) in[6]) * in[6];
514db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[13] = ((u64) in[5]) * (in[8] << 1) +
515db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root            ((u64) in[6]) * (in[7] << 1);
516db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[14] = ((u64) in[6]) * (in[8] << 1) +
517db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root            ((u64) in[7]) * (in[7] << 1);
518db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[15] = ((u64) in[7]) * (in[8] << 1);
519db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[16] = ((u64) in[8]) * in[8];
520db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
521db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_reduce_degree(out, tmp);
522db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root}
523db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
524db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* felem_mul sets out=in*in2.
525db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *
526db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and
527db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *           in2[0,2,...] < 2**30, in2[1,3,...] < 2**29.
528db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. */
529db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void felem_mul(felem out, const felem in, const felem in2) {
530db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  u64 tmp[17];
531db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
532db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[0] = ((u64) in[0]) * in2[0];
533db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[1] = ((u64) in[0]) * (in2[1] << 0) +
534db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           ((u64) in[1]) * (in2[0] << 0);
535db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[2] = ((u64) in[0]) * (in2[2] << 0) + ((u64) in[1]) *
536db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           (in2[1] << 1) + ((u64) in[2]) * (in2[0] << 0);
537db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[3] = ((u64) in[0]) * (in2[3] << 0) + ((u64) in[1]) *
538db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           (in2[2] << 0) + ((u64) in[2]) * (in2[1] << 0) +
539db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           ((u64) in[3]) * (in2[0] << 0);
540db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[4] = ((u64) in[0]) * (in2[4] << 0) + ((u64) in[1]) *
541db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           (in2[3] << 1) + ((u64) in[2]) * (in2[2] << 0) +
542db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           ((u64) in[3]) * (in2[1] << 1) +
543db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           ((u64) in[4]) * (in2[0] << 0);
544db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[5] = ((u64) in[0]) * (in2[5] << 0) + ((u64) in[1]) *
545db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           (in2[4] << 0) + ((u64) in[2]) * (in2[3] << 0) +
546db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           ((u64) in[3]) * (in2[2] << 0) + ((u64) in[4]) *
547db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           (in2[1] << 0) + ((u64) in[5]) * (in2[0] << 0);
548db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[6] = ((u64) in[0]) * (in2[6] << 0) + ((u64) in[1]) *
549db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           (in2[5] << 1) + ((u64) in[2]) * (in2[4] << 0) +
550db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           ((u64) in[3]) * (in2[3] << 1) + ((u64) in[4]) *
551db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           (in2[2] << 0) + ((u64) in[5]) * (in2[1] << 1) +
552db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           ((u64) in[6]) * (in2[0] << 0);
553db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[7] = ((u64) in[0]) * (in2[7] << 0) + ((u64) in[1]) *
554db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           (in2[6] << 0) + ((u64) in[2]) * (in2[5] << 0) +
555db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           ((u64) in[3]) * (in2[4] << 0) + ((u64) in[4]) *
556db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           (in2[3] << 0) + ((u64) in[5]) * (in2[2] << 0) +
557db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           ((u64) in[6]) * (in2[1] << 0) +
558db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           ((u64) in[7]) * (in2[0] << 0);
559db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  /* tmp[8] has the greatest value but doesn't overflow. See logic in
560db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root   * felem_square. */
561db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[8] = ((u64) in[0]) * (in2[8] << 0) + ((u64) in[1]) *
562db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           (in2[7] << 1) + ((u64) in[2]) * (in2[6] << 0) +
563db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           ((u64) in[3]) * (in2[5] << 1) + ((u64) in[4]) *
564db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           (in2[4] << 0) + ((u64) in[5]) * (in2[3] << 1) +
565db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           ((u64) in[6]) * (in2[2] << 0) + ((u64) in[7]) *
566db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           (in2[1] << 1) + ((u64) in[8]) * (in2[0] << 0);
567db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[9] = ((u64) in[1]) * (in2[8] << 0) + ((u64) in[2]) *
568db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           (in2[7] << 0) + ((u64) in[3]) * (in2[6] << 0) +
569db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           ((u64) in[4]) * (in2[5] << 0) + ((u64) in[5]) *
570db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           (in2[4] << 0) + ((u64) in[6]) * (in2[3] << 0) +
571db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           ((u64) in[7]) * (in2[2] << 0) +
572db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root           ((u64) in[8]) * (in2[1] << 0);
573db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[10] = ((u64) in[2]) * (in2[8] << 0) + ((u64) in[3]) *
574db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root            (in2[7] << 1) + ((u64) in[4]) * (in2[6] << 0) +
575db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root            ((u64) in[5]) * (in2[5] << 1) + ((u64) in[6]) *
576db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root            (in2[4] << 0) + ((u64) in[7]) * (in2[3] << 1) +
577db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root            ((u64) in[8]) * (in2[2] << 0);
578db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[11] = ((u64) in[3]) * (in2[8] << 0) + ((u64) in[4]) *
579db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root            (in2[7] << 0) + ((u64) in[5]) * (in2[6] << 0) +
580db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root            ((u64) in[6]) * (in2[5] << 0) + ((u64) in[7]) *
581db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root            (in2[4] << 0) + ((u64) in[8]) * (in2[3] << 0);
582db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[12] = ((u64) in[4]) * (in2[8] << 0) + ((u64) in[5]) *
583db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root            (in2[7] << 1) + ((u64) in[6]) * (in2[6] << 0) +
584db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root            ((u64) in[7]) * (in2[5] << 1) +
585db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root            ((u64) in[8]) * (in2[4] << 0);
586db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[13] = ((u64) in[5]) * (in2[8] << 0) + ((u64) in[6]) *
587db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root            (in2[7] << 0) + ((u64) in[7]) * (in2[6] << 0) +
588db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root            ((u64) in[8]) * (in2[5] << 0);
589db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[14] = ((u64) in[6]) * (in2[8] << 0) + ((u64) in[7]) *
590db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root            (in2[7] << 1) + ((u64) in[8]) * (in2[6] << 0);
591db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[15] = ((u64) in[7]) * (in2[8] << 0) +
592db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root            ((u64) in[8]) * (in2[7] << 0);
593db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  tmp[16] = ((u64) in[8]) * (in2[8] << 0);
594db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
595db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_reduce_degree(out, tmp);
596db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root}
597db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
598db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void felem_assign(felem out, const felem in) {
599db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  memcpy(out, in, sizeof(felem));
600db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root}
601db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
602db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* felem_inv calculates |out| = |in|^{-1}
603db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *
604db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * Based on Fermat's Little Theorem:
605db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *   a^p = a (mod p)
606db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *   a^{p-1} = 1 (mod p)
607db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *   a^{p-2} = a^{-1} (mod p)
608db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root */
609db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void felem_inv(felem out, const felem in) {
610db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem ftmp, ftmp2;
611db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  /* each e_I will hold |in|^{2^I - 1} */
612db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem e2, e4, e8, e16, e32, e64;
613db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  unsigned i;
614db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
615db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_square(ftmp, in); /* 2^1 */
616db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(ftmp, in, ftmp); /* 2^2 - 2^0 */
617db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_assign(e2, ftmp);
618db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_square(ftmp, ftmp); /* 2^3 - 2^1 */
619db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_square(ftmp, ftmp); /* 2^4 - 2^2 */
620db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(ftmp, ftmp, e2); /* 2^4 - 2^0 */
621db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_assign(e4, ftmp);
622db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_square(ftmp, ftmp); /* 2^5 - 2^1 */
623db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_square(ftmp, ftmp); /* 2^6 - 2^2 */
624db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_square(ftmp, ftmp); /* 2^7 - 2^3 */
625db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_square(ftmp, ftmp); /* 2^8 - 2^4 */
626db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(ftmp, ftmp, e4); /* 2^8 - 2^0 */
627db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_assign(e8, ftmp);
628db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  for (i = 0; i < 8; i++) {
629db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    felem_square(ftmp, ftmp);
630db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  } /* 2^16 - 2^8 */
631db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(ftmp, ftmp, e8); /* 2^16 - 2^0 */
632db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_assign(e16, ftmp);
633db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  for (i = 0; i < 16; i++) {
634db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    felem_square(ftmp, ftmp);
635db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  } /* 2^32 - 2^16 */
636db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(ftmp, ftmp, e16); /* 2^32 - 2^0 */
637db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_assign(e32, ftmp);
638db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  for (i = 0; i < 32; i++) {
639db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    felem_square(ftmp, ftmp);
640db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  } /* 2^64 - 2^32 */
641db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_assign(e64, ftmp);
642db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(ftmp, ftmp, in); /* 2^64 - 2^32 + 2^0 */
643db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  for (i = 0; i < 192; i++) {
644db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    felem_square(ftmp, ftmp);
645db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  } /* 2^256 - 2^224 + 2^192 */
646db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
647db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(ftmp2, e64, e32); /* 2^64 - 2^0 */
648db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  for (i = 0; i < 16; i++) {
649db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    felem_square(ftmp2, ftmp2);
650db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  } /* 2^80 - 2^16 */
651db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(ftmp2, ftmp2, e16); /* 2^80 - 2^0 */
652db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  for (i = 0; i < 8; i++) {
653db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    felem_square(ftmp2, ftmp2);
654db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  } /* 2^88 - 2^8 */
655db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(ftmp2, ftmp2, e8); /* 2^88 - 2^0 */
656db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  for (i = 0; i < 4; i++) {
657db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    felem_square(ftmp2, ftmp2);
658db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  } /* 2^92 - 2^4 */
659db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(ftmp2, ftmp2, e4); /* 2^92 - 2^0 */
660db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_square(ftmp2, ftmp2); /* 2^93 - 2^1 */
661db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_square(ftmp2, ftmp2); /* 2^94 - 2^2 */
662db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(ftmp2, ftmp2, e2); /* 2^94 - 2^0 */
663db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_square(ftmp2, ftmp2); /* 2^95 - 2^1 */
664db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_square(ftmp2, ftmp2); /* 2^96 - 2^2 */
665db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(ftmp2, ftmp2, in); /* 2^96 - 3 */
666db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
667db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(out, ftmp2, ftmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */
668db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root}
669db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
670db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* felem_scalar_3 sets out=3*out.
671db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *
672db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
673db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. */
674db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void felem_scalar_3(felem out) {
675db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  limb carry = 0;
676db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  unsigned i;
677db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
678db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  for (i = 0;; i++) {
679db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] *= 3;
680db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] += carry;
681db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    carry = out[i] >> 29;
682db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] &= kBottom29Bits;
683db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
684db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    i++;
685db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    if (i == NLIMBS)
686db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      break;
687db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
688db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] *= 3;
689db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] += carry;
690db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    carry = out[i] >> 28;
691db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] &= kBottom28Bits;
692db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  }
693db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
694db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_reduce_carry(out, carry);
695db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root}
696db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
697db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* felem_scalar_4 sets out=4*out.
698db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *
699db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
700db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. */
701db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void felem_scalar_4(felem out) {
702db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  limb carry = 0, next_carry;
703db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  unsigned i;
704db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
705db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  for (i = 0;; i++) {
706db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    next_carry = out[i] >> 27;
707db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] <<= 2;
708db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] &= kBottom29Bits;
709db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] += carry;
710db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    carry = next_carry + (out[i] >> 29);
711db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] &= kBottom29Bits;
712db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
713db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    i++;
714db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    if (i == NLIMBS)
715db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      break;
716db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
717db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    next_carry = out[i] >> 26;
718db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] <<= 2;
719db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] &= kBottom28Bits;
720db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] += carry;
721db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    carry = next_carry + (out[i] >> 28);
722db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] &= kBottom28Bits;
723db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  }
724db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
725db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_reduce_carry(out, carry);
726db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root}
727db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
728db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* felem_scalar_8 sets out=8*out.
729db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *
730db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
731db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. */
732db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void felem_scalar_8(felem out) {
733db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  limb carry = 0, next_carry;
734db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  unsigned i;
735db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
736db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  for (i = 0;; i++) {
737db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    next_carry = out[i] >> 26;
738db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] <<= 3;
739db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] &= kBottom29Bits;
740db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] += carry;
741db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    carry = next_carry + (out[i] >> 29);
742db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] &= kBottom29Bits;
743db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
744db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    i++;
745db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    if (i == NLIMBS)
746db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      break;
747db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
748db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    next_carry = out[i] >> 25;
749db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] <<= 3;
750db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] &= kBottom28Bits;
751db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] += carry;
752db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    carry = next_carry + (out[i] >> 28);
753db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] &= kBottom28Bits;
754db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  }
755db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
756db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_reduce_carry(out, carry);
757db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root}
758db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
759db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* felem_is_zero_vartime returns 1 iff |in| == 0. It takes a variable amount of
760db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * time depending on the value of |in|. */
761db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic char felem_is_zero_vartime(const felem in) {
762db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  limb carry;
763db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  int i;
764db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  limb tmp[NLIMBS];
765db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
766db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_assign(tmp, in);
767db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
768db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  /* First, reduce tmp to a minimal form. */
769db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  do {
770db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    carry = 0;
771db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    for (i = 0;; i++) {
772db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      tmp[i] += carry;
773db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      carry = tmp[i] >> 29;
774db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      tmp[i] &= kBottom29Bits;
775db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
776db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      i++;
777db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      if (i == NLIMBS)
778db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root        break;
779db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
780db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      tmp[i] += carry;
781db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      carry = tmp[i] >> 28;
782db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      tmp[i] &= kBottom28Bits;
783db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    }
784db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
785db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    felem_reduce_carry(tmp, carry);
786db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  } while (carry);
787db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
788db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  /* tmp < 2**257, so the only possible zero values are 0, p and 2p. */
789db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  return memcmp(tmp, kZero, sizeof(tmp)) == 0 ||
790db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root         memcmp(tmp, kP, sizeof(tmp)) == 0 ||
791db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root         memcmp(tmp, k2P, sizeof(tmp)) == 0;
792db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root}
793db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
794db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
795db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* Group operations:
796db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *
797db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * Elements of the elliptic curve group are represented in Jacobian
798db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * coordinates: (x, y, z). An affine point (x', y') is x'=x/z**2, y'=y/z**3 in
799db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * Jacobian form. */
800db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
801db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* point_double sets {x_out,y_out,z_out} = 2*{x,y,z}.
802db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *
803db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling-dbl-2009-l */
804db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void point_double(felem x_out, felem y_out, felem z_out, const felem x,
805db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root                         const felem y, const felem z) {
806db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem delta, gamma, alpha, beta, tmp, tmp2;
807db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
808db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_square(delta, z);
809db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_square(gamma, y);
810db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(beta, x, gamma);
811db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
812db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_sum(tmp, x, delta);
813db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(tmp2, x, delta);
814db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(alpha, tmp, tmp2);
815db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_scalar_3(alpha);
816db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
817db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_sum(tmp, y, z);
818db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_square(tmp, tmp);
819db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(tmp, tmp, gamma);
820db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(z_out, tmp, delta);
821db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
822db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_scalar_4(beta);
823db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_square(x_out, alpha);
824db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(x_out, x_out, beta);
825db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(x_out, x_out, beta);
826db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
827db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(tmp, beta, x_out);
828db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(tmp, alpha, tmp);
829db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_square(tmp2, gamma);
830db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_scalar_8(tmp2);
831db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(y_out, tmp, tmp2);
832db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root}
833db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
834db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* point_add_mixed sets {x_out,y_out,z_out} = {x1,y1,z1} + {x2,y2,1}.
835db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * (i.e. the second point is affine.)
836db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *
837db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl
838db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *
839db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * Note that this function does not handle P+P, infinity+P nor P+infinity
840db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * correctly. */
841db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void point_add_mixed(felem x_out, felem y_out, felem z_out,
842db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root                            const felem x1, const felem y1, const felem z1,
843db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root                            const felem x2, const felem y2) {
844db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem z1z1, z1z1z1, s2, u2, h, i, j, r, rr, v, tmp;
845db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
846db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_square(z1z1, z1);
847db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_sum(tmp, z1, z1);
848db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
849db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(u2, x2, z1z1);
850db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(z1z1z1, z1, z1z1);
851db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(s2, y2, z1z1z1);
852db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(h, u2, x1);
853db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_sum(i, h, h);
854db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_square(i, i);
855db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(j, h, i);
856db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(r, s2, y1);
857db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_sum(r, r, r);
858db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(v, x1, i);
859db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
860db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(z_out, tmp, h);
861db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_square(rr, r);
862db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(x_out, rr, j);
863db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(x_out, x_out, v);
864db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(x_out, x_out, v);
865db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
866db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(tmp, v, x_out);
867db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(y_out, tmp, r);
868db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(tmp, y1, j);
869db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(y_out, y_out, tmp);
870db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(y_out, y_out, tmp);
871db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root}
872db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
873db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* point_add sets {x_out,y_out,z_out} = {x1,y1,z1} + {x2,y2,z2}.
874db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *
875db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl
876db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *
877db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * Note that this function does not handle P+P, infinity+P nor P+infinity
878db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * correctly. */
879db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void point_add(felem x_out, felem y_out, felem z_out, const felem x1,
880db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root                      const felem y1, const felem z1, const felem x2,
881db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root                      const felem y2, const felem z2) {
882db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem z1z1, z1z1z1, z2z2, z2z2z2, s1, s2, u1, u2, h, i, j, r, rr, v, tmp;
883db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
884db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_square(z1z1, z1);
885db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_square(z2z2, z2);
886db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(u1, x1, z2z2);
887db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
888db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_sum(tmp, z1, z2);
889db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_square(tmp, tmp);
890db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(tmp, tmp, z1z1);
891db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(tmp, tmp, z2z2);
892db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
893db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(z2z2z2, z2, z2z2);
894db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(s1, y1, z2z2z2);
895db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
896db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(u2, x2, z1z1);
897db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(z1z1z1, z1, z1z1);
898db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(s2, y2, z1z1z1);
899db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(h, u2, u1);
900db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_sum(i, h, h);
901db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_square(i, i);
902db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(j, h, i);
903db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(r, s2, s1);
904db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_sum(r, r, r);
905db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(v, u1, i);
906db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
907db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(z_out, tmp, h);
908db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_square(rr, r);
909db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(x_out, rr, j);
910db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(x_out, x_out, v);
911db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(x_out, x_out, v);
912db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
913db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(tmp, v, x_out);
914db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(y_out, tmp, r);
915db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(tmp, s1, j);
916db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(y_out, y_out, tmp);
917db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(y_out, y_out, tmp);
918db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root}
919db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
920db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* point_add_or_double_vartime sets {x_out,y_out,z_out} = {x1,y1,z1} +
921db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *                                                        {x2,y2,z2}.
922db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *
923db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl
924db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *
925db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * This function handles the case where {x1,y1,z1}={x2,y2,z2}. */
926db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void point_add_or_double_vartime(
927db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    felem x_out, felem y_out, felem z_out, const felem x1, const felem y1,
928db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    const felem z1, const felem x2, const felem y2, const felem z2) {
929db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem z1z1, z1z1z1, z2z2, z2z2z2, s1, s2, u1, u2, h, i, j, r, rr, v, tmp;
930db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  char x_equal, y_equal;
931db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
932db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_square(z1z1, z1);
933db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_square(z2z2, z2);
934db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(u1, x1, z2z2);
935db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
936db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_sum(tmp, z1, z2);
937db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_square(tmp, tmp);
938db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(tmp, tmp, z1z1);
939db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(tmp, tmp, z2z2);
940db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
941db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(z2z2z2, z2, z2z2);
942db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(s1, y1, z2z2z2);
943db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
944db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(u2, x2, z1z1);
945db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(z1z1z1, z1, z1z1);
946db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(s2, y2, z1z1z1);
947db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(h, u2, u1);
948db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  x_equal = felem_is_zero_vartime(h);
949db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_sum(i, h, h);
950db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_square(i, i);
951db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(j, h, i);
952db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(r, s2, s1);
953db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  y_equal = felem_is_zero_vartime(r);
954db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  if (x_equal && y_equal) {
955db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    point_double(x_out, y_out, z_out, x1, y1, z1);
956db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    return;
957db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  }
958db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_sum(r, r, r);
959db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(v, u1, i);
960db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
961db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(z_out, tmp, h);
962db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_square(rr, r);
963db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(x_out, rr, j);
964db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(x_out, x_out, v);
965db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(x_out, x_out, v);
966db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
967db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(tmp, v, x_out);
968db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(y_out, tmp, r);
969db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(tmp, s1, j);
970db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(y_out, y_out, tmp);
971db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_diff(y_out, y_out, tmp);
972db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root}
973db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
974db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* copy_conditional sets out=in if mask = 0xffffffff in constant time.
975db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *
976db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On entry: mask is either 0 or 0xffffffff. */
977db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void copy_conditional(felem out, const felem in, limb mask) {
978db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  int i;
979db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
980db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  for (i = 0; i < NLIMBS; i++) {
981db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    const limb tmp = mask & (in[i] ^ out[i]);
982db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    out[i] ^= tmp;
983db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  }
984db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root}
985db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
986db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* select_affine_point sets {out_x,out_y} to the index'th entry of table.
987db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On entry: index < 16, table[0] must be zero. */
988db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void select_affine_point(felem out_x, felem out_y, const limb* table,
989db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root                                limb index) {
990db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  limb i, j;
991db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
992db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  memset(out_x, 0, sizeof(felem));
993db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  memset(out_y, 0, sizeof(felem));
994db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
995db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  for (i = 1; i < 16; i++) {
996db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    limb mask = i ^ index;
997db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    mask |= mask >> 2;
998db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    mask |= mask >> 1;
999db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    mask &= 1;
1000db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    mask--;
1001db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    for (j = 0; j < NLIMBS; j++, table++) {
1002db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      out_x[j] |= *table & mask;
1003db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    }
1004db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    for (j = 0; j < NLIMBS; j++, table++) {
1005db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      out_y[j] |= *table & mask;
1006db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    }
1007db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  }
1008db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root}
1009db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1010db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* select_jacobian_point sets {out_x,out_y,out_z} to the index'th entry of
1011db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * table. On entry: index < 16, table[0] must be zero. */
1012db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void select_jacobian_point(felem out_x, felem out_y, felem out_z,
1013db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root                                  const limb* table, limb index) {
1014db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  limb i, j;
1015db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1016db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  memset(out_x, 0, sizeof(felem));
1017db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  memset(out_y, 0, sizeof(felem));
1018db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  memset(out_z, 0, sizeof(felem));
1019db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1020db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  /* The implicit value at index 0 is all zero. We don't need to perform that
1021db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root   * iteration of the loop because we already set out_* to zero. */
1022db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  table += 3 * NLIMBS;
1023db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1024db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  // Hit all entries to obscure cache profiling.
1025db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  for (i = 1; i < 16; i++) {
1026db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    limb mask = i ^ index;
1027db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    mask |= mask >> 2;
1028db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    mask |= mask >> 1;
1029db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    mask &= 1;
1030db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    mask--;
1031db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    for (j = 0; j < NLIMBS; j++, table++) {
1032db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      out_x[j] |= *table & mask;
1033db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    }
1034db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    for (j = 0; j < NLIMBS; j++, table++) {
1035db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      out_y[j] |= *table & mask;
1036db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    }
1037db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    for (j = 0; j < NLIMBS; j++, table++) {
1038db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      out_z[j] |= *table & mask;
1039db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    }
1040db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  }
1041db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root}
1042db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1043db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* scalar_base_mult sets {nx,ny,nz} = scalar*G where scalar is a little-endian
1044db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * number. Note that the value of scalar must be less than the order of the
1045db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * group. */
1046db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void scalar_base_mult(felem nx, felem ny, felem nz,
1047db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root                             const p256_int* scalar) {
1048db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  int i, j;
1049db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  limb n_is_infinity_mask = -1, p_is_noninfinite_mask, mask;
1050db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  u32 table_offset;
1051db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1052db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem px, py;
1053db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem tx, ty, tz;
1054db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1055db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  memset(nx, 0, sizeof(felem));
1056db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  memset(ny, 0, sizeof(felem));
1057db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  memset(nz, 0, sizeof(felem));
1058db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1059db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  /* The loop adds bits at positions 0, 64, 128 and 192, followed by
1060db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root   * positions 32,96,160 and 224 and does this 32 times. */
1061db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  for (i = 0; i < 32; i++) {
1062db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    if (i) {
1063db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      point_double(nx, ny, nz, nx, ny, nz);
1064db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    }
1065db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    table_offset = 0;
1066db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    for (j = 0; j <= 32; j += 32) {
1067db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      char bit0 = p256_get_bit(scalar, 31 - i + j);
1068db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      char bit1 = p256_get_bit(scalar, 95 - i + j);
1069db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      char bit2 = p256_get_bit(scalar, 159 - i + j);
1070db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      char bit3 = p256_get_bit(scalar, 223 - i + j);
1071db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      limb index = bit0 | (bit1 << 1) | (bit2 << 2) | (bit3 << 3);
1072db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1073db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      select_affine_point(px, py, kPrecomputed + table_offset, index);
1074db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      table_offset += 30 * NLIMBS;
1075db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1076db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      /* Since scalar is less than the order of the group, we know that
1077db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root       * {nx,ny,nz} != {px,py,1}, unless both are zero, which we handle
1078db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root       * below. */
1079db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      point_add_mixed(tx, ty, tz, nx, ny, nz, px, py);
1080db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      /* The result of point_add_mixed is incorrect if {nx,ny,nz} is zero
1081db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root       * (a.k.a.  the point at infinity). We handle that situation by
1082db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root       * copying the point from the table. */
1083db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      copy_conditional(nx, px, n_is_infinity_mask);
1084db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      copy_conditional(ny, py, n_is_infinity_mask);
1085db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      copy_conditional(nz, kOne, n_is_infinity_mask);
1086db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1087db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      /* Equally, the result is also wrong if the point from the table is
1088db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root       * zero, which happens when the index is zero. We handle that by
1089db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root       * only copying from {tx,ty,tz} to {nx,ny,nz} if index != 0. */
1090db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      p_is_noninfinite_mask = NON_ZERO_TO_ALL_ONES(index);
1091db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      mask = p_is_noninfinite_mask & ~n_is_infinity_mask;
1092db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      copy_conditional(nx, tx, mask);
1093db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      copy_conditional(ny, ty, mask);
1094db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      copy_conditional(nz, tz, mask);
1095db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      /* If p was not zero, then n is now non-zero. */
1096db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      n_is_infinity_mask &= ~p_is_noninfinite_mask;
1097db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    }
1098db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  }
1099db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root}
1100db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1101db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* point_to_affine converts a Jacobian point to an affine point. If the input
1102db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * is the point at infinity then it returns (0, 0) in constant time. */
1103db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void point_to_affine(felem x_out, felem y_out, const felem nx,
1104db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root                            const felem ny, const felem nz) {
1105db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem z_inv, z_inv_sq;
1106db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_inv(z_inv, nz);
1107db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_square(z_inv_sq, z_inv);
1108db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(x_out, nx, z_inv_sq);
1109db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(z_inv, z_inv, z_inv_sq);
1110db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem_mul(y_out, ny, z_inv);
1111db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root}
1112db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1113db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* scalar_base_mult sets {nx,ny,nz} = scalar*{x,y}. */
1114db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void scalar_mult(felem nx, felem ny, felem nz, const felem x,
1115db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root                        const felem y, const p256_int* scalar) {
1116db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  int i;
1117db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem px, py, pz, tx, ty, tz;
1118db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem precomp[16][3];
1119db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  limb n_is_infinity_mask, index, p_is_noninfinite_mask, mask;
1120db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1121db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  /* We precompute 0,1,2,... times {x,y}. */
1122db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  memset(precomp, 0, sizeof(felem) * 3);
1123db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  memcpy(&precomp[1][0], x, sizeof(felem));
1124db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  memcpy(&precomp[1][1], y, sizeof(felem));
1125db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  memcpy(&precomp[1][2], kOne, sizeof(felem));
1126db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1127db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  for (i = 2; i < 16; i += 2) {
1128db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    point_double(precomp[i][0], precomp[i][1], precomp[i][2],
1129db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root                 precomp[i / 2][0], precomp[i / 2][1], precomp[i / 2][2]);
1130db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1131db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    point_add_mixed(precomp[i + 1][0], precomp[i + 1][1], precomp[i + 1][2],
1132db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root                    precomp[i][0], precomp[i][1], precomp[i][2], x, y);
1133db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  }
1134db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1135db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  memset(nx, 0, sizeof(felem));
1136db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  memset(ny, 0, sizeof(felem));
1137db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  memset(nz, 0, sizeof(felem));
1138db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  n_is_infinity_mask = -1;
1139db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1140db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  /* We add in a window of four bits each iteration and do this 64 times. */
1141db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  for (i = 0; i < 256; i += 4) {
1142db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    if (i) {
1143db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      point_double(nx, ny, nz, nx, ny, nz);
1144db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      point_double(nx, ny, nz, nx, ny, nz);
1145db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      point_double(nx, ny, nz, nx, ny, nz);
1146db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      point_double(nx, ny, nz, nx, ny, nz);
1147db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    }
1148db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1149db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    index = (p256_get_bit(scalar, 255 - i - 0) << 3) |
1150db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root            (p256_get_bit(scalar, 255 - i - 1) << 2) |
1151db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root            (p256_get_bit(scalar, 255 - i - 2) << 1) |
1152db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root            p256_get_bit(scalar, 255 - i - 3);
1153db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1154db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    /* See the comments in scalar_base_mult about handling infinities. */
1155db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    select_jacobian_point(px, py, pz, precomp[0][0], index);
1156db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    point_add(tx, ty, tz, nx, ny, nz, px, py, pz);
1157db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    copy_conditional(nx, px, n_is_infinity_mask);
1158db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    copy_conditional(ny, py, n_is_infinity_mask);
1159db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    copy_conditional(nz, pz, n_is_infinity_mask);
1160db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1161db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    p_is_noninfinite_mask = NON_ZERO_TO_ALL_ONES(index);
1162db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    mask = p_is_noninfinite_mask & ~n_is_infinity_mask;
1163db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1164db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    copy_conditional(nx, tx, mask);
1165db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    copy_conditional(ny, ty, mask);
1166db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    copy_conditional(nz, tz, mask);
1167db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    n_is_infinity_mask &= ~p_is_noninfinite_mask;
1168db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  }
1169db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root}
1170db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1171db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root#define kRDigits {2, 0, 0, 0xfffffffe, 0xffffffff, 0xffffffff, 0xfffffffd, 1} // 2^257 mod p256.p
1172db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1173db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root#define kRInvDigits {0x80000000, 1, 0xffffffff, 0, 0x80000001, 0xfffffffe, 1, 0x7fffffff}  // 1 / 2^257 mod p256.p
1174db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1175db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic const p256_int kR = { kRDigits };
1176db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic const p256_int kRInv = { kRInvDigits };
1177db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1178db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* to_montgomery sets out = R*in. */
1179db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void to_montgomery(felem out, const p256_int* in) {
1180db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  p256_int in_shifted;
1181db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  int i;
1182db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1183db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  p256_init(&in_shifted);
1184db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  p256_modmul(&SECP256r1_p, in, 0, &kR, &in_shifted);
1185db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1186db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  for (i = 0; i < NLIMBS; i++) {
1187db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    if ((i & 1) == 0) {
1188db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      out[i] = P256_DIGIT(&in_shifted, 0) & kBottom29Bits;
1189db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      p256_shr(&in_shifted, 29, &in_shifted);
1190db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    } else {
1191db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      out[i] = P256_DIGIT(&in_shifted, 0) & kBottom28Bits;
1192db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      p256_shr(&in_shifted, 28, &in_shifted);
1193db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    }
1194db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  }
1195db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1196db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  p256_clear(&in_shifted);
1197db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root}
1198db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1199db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* from_montgomery sets out=in/R. */
1200db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void from_montgomery(p256_int* out, const felem in) {
1201db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  p256_int result, tmp;
1202db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  int i, top;
1203db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1204db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  p256_init(&result);
1205db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  p256_init(&tmp);
1206db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1207db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  p256_add_d(&tmp, in[NLIMBS - 1], &result);
1208db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  for (i = NLIMBS - 2; i >= 0; i--) {
1209db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    if ((i & 1) == 0) {
1210db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      top = p256_shl(&result, 29, &tmp);
1211db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    } else {
1212db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root      top = p256_shl(&result, 28, &tmp);
1213db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    }
1214db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    top |= p256_add_d(&tmp, in[i], &result);
1215db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  }
1216db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1217db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  p256_modmul(&SECP256r1_p, &kRInv, top, &result, out);
1218db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1219db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  p256_clear(&result);
1220db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  p256_clear(&tmp);
1221db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root}
1222db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1223db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* p256_base_point_mul sets {out_x,out_y} = nG, where n is < the
1224db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * order of the group. */
1225db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootvoid p256_base_point_mul(const p256_int* n, p256_int* out_x, p256_int* out_y) {
1226db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem x, y, z;
1227db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1228db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  scalar_base_mult(x, y, z, n);
1229db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1230db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  {
1231db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    felem x_affine, y_affine;
1232db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1233db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    point_to_affine(x_affine, y_affine, x, y, z);
1234db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    from_montgomery(out_x, x_affine);
1235db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    from_montgomery(out_y, y_affine);
1236db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  }
1237db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root}
1238db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1239db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* p256_points_mul_vartime sets {out_x,out_y} = n1*G + n2*{in_x,in_y}, where
1240db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * n1 and n2 are < the order of the group.
1241db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root *
1242db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * As indicated by the name, this function operates in variable time. This
1243db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * is safe because it's used for signature validation which doesn't deal
1244db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * with secrets. */
1245db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootvoid p256_points_mul_vartime(
1246db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    const p256_int* n1, const p256_int* n2, const p256_int* in_x,
1247db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    const p256_int* in_y, p256_int* out_x, p256_int* out_y) {
1248db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  felem x1, y1, z1, x2, y2, z2, px, py;
1249db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1250db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  /* If both scalars are zero, then the result is the point at infinity. */
1251db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  if (p256_is_zero(n1) != 0 && p256_is_zero(n2) != 0) {
1252db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    p256_clear(out_x);
1253db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    p256_clear(out_y);
1254db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    return;
1255db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  }
1256db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1257db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  to_montgomery(px, in_x);
1258db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  to_montgomery(py, in_y);
1259db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  scalar_base_mult(x1, y1, z1, n1);
1260db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  scalar_mult(x2, y2, z2, px, py, n2);
1261db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1262db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  if (p256_is_zero(n2) != 0) {
1263db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    /* If n2 == 0, then {x2,y2,z2} is zero and the result is just
1264db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root         * {x1,y1,z1}. */
1265db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  } else if (p256_is_zero(n1) != 0) {
1266db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    /* If n1 == 0, then {x1,y1,z1} is zero and the result is just
1267db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root         * {x2,y2,z2}. */
1268db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    memcpy(x1, x2, sizeof(x2));
1269db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    memcpy(y1, y2, sizeof(y2));
1270db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    memcpy(z1, z2, sizeof(z2));
1271db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  } else {
1272db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    /* This function handles the case where {x1,y1,z1} == {x2,y2,z2}. */
1273db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root    point_add_or_double_vartime(x1, y1, z1, x1, y1, z1, x2, y2, z2);
1274db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  }
1275db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root
1276db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  point_to_affine(px, py, x1, y1, z1);
1277db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  from_montgomery(out_x, px);
1278db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root  from_montgomery(out_y, py);
1279db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root}
1280