1db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* 2db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * Copyright 2013 The Android Open Source Project 3db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 4db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * Redistribution and use in source and binary forms, with or without 5db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * modification, are permitted provided that the following conditions are met: 6db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * * Redistributions of source code must retain the above copyright 7db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * notice, this list of conditions and the following disclaimer. 8db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * * Redistributions in binary form must reproduce the above copyright 9db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * notice, this list of conditions and the following disclaimer in the 10db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * documentation and/or other materials provided with the distribution. 11db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * * Neither the name of Google Inc. nor the names of its contributors may 12db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * be used to endorse or promote products derived from this software 13db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * without specific prior written permission. 14db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 15db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * THIS SOFTWARE IS PROVIDED BY Google Inc. ``AS IS'' AND ANY EXPRESS OR 16db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 17db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO 18db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * EVENT SHALL Google Inc. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 19db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 21db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 23db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 24db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root */ 26db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 27db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root// This is an implementation of the P256 elliptic curve group. It's written to 28db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root// be portable 32-bit, although it's still constant-time. 29db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root// 30db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root// WARNING: Implementing these functions in a constant-time manner is far from 31db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root// obvious. Be careful when touching this code. 32db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root// 33db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root// See http://www.imperialviolet.org/2010/12/04/ecc.html ([1]) for background. 34db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 35db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root#include <stdint.h> 36db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root#include <stdio.h> 37db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 38db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root#include <string.h> 39db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root#include <stdlib.h> 40db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 41db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root#include "mincrypt/p256.h" 42db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 43db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Roottypedef uint8_t u8; 44db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Roottypedef uint32_t u32; 45db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Roottypedef int32_t s32; 46db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Roottypedef uint64_t u64; 47db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 48db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* Our field elements are represented as nine 32-bit limbs. 49db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 50db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * The value of an felem (field element) is: 51db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * x[0] + (x[1] * 2**29) + (x[2] * 2**57) + ... + (x[8] * 2**228) 52db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 53db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * That is, each limb is alternately 29 or 28-bits wide in little-endian 54db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * order. 55db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 56db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * This means that an felem hits 2**257, rather than 2**256 as we would like. A 57db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 28, 29, ... pattern would cause us to hit 2**256, but that causes problems 58db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * when multiplying as terms end up one bit short of a limb which would require 59db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * much bit-shifting to correct. 60db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 61db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * Finally, the values stored in an felem are in Montgomery form. So the value 62db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * |y| is stored as (y*R) mod p, where p is the P-256 prime and R is 2**257. 63db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root */ 64db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Roottypedef u32 limb; 65db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root#define NLIMBS 9 66db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Roottypedef limb felem[NLIMBS]; 67db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 68db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic const limb kBottom28Bits = 0xfffffff; 69db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic const limb kBottom29Bits = 0x1fffffff; 70db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 71db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* kOne is the number 1 as an felem. It's 2**257 mod p split up into 29 and 72db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 28-bit words. */ 73db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic const felem kOne = { 74db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 2, 0, 0, 0xffff800, 75db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x1fffffff, 0xfffffff, 0x1fbfffff, 0x1ffffff, 76db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0 77db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root}; 78db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic const felem kZero = {0}; 79db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic const felem kP = { 80db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x1fffffff, 0xfffffff, 0x1fffffff, 0x3ff, 81db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0, 0, 0x200000, 0xf000000, 82db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0xfffffff 83db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root}; 84db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic const felem k2P = { 85db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x1ffffffe, 0xfffffff, 0x1fffffff, 0x7ff, 86db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0, 0, 0x400000, 0xe000000, 87db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x1fffffff 88db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root}; 89db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* kPrecomputed contains precomputed values to aid the calculation of scalar 90db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * multiples of the base point, G. It's actually two, equal length, tables 91db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * concatenated. 92db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 93db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * The first table contains (x,y) felem pairs for 16 multiples of the base 94db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * point, G. 95db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 96db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * Index | Index (binary) | Value 97db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 0 | 0000 | 0G (all zeros, omitted) 98db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 1 | 0001 | G 99db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 2 | 0010 | 2**64G 100db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 3 | 0011 | 2**64G + G 101db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 4 | 0100 | 2**128G 102db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 5 | 0101 | 2**128G + G 103db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 6 | 0110 | 2**128G + 2**64G 104db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 7 | 0111 | 2**128G + 2**64G + G 105db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 8 | 1000 | 2**192G 106db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 9 | 1001 | 2**192G + G 107db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 10 | 1010 | 2**192G + 2**64G 108db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 11 | 1011 | 2**192G + 2**64G + G 109db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 12 | 1100 | 2**192G + 2**128G 110db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 13 | 1101 | 2**192G + 2**128G + G 111db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 14 | 1110 | 2**192G + 2**128G + 2**64G 112db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 15 | 1111 | 2**192G + 2**128G + 2**64G + G 113db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 114db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * The second table follows the same style, but the terms are 2**32G, 115db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 2**96G, 2**160G, 2**224G. 116db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 117db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * This is ~2KB of data. */ 118db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic const limb kPrecomputed[NLIMBS * 2 * 15 * 2] = { 119db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x11522878, 0xe730d41, 0xdb60179, 0x4afe2ff, 0x12883add, 0xcaddd88, 0x119e7edc, 0xd4a6eab, 0x3120bee, 120db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x1d2aac15, 0xf25357c, 0x19e45cdd, 0x5c721d0, 0x1992c5a5, 0xa237487, 0x154ba21, 0x14b10bb, 0xae3fe3, 121db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0xd41a576, 0x922fc51, 0x234994f, 0x60b60d3, 0x164586ae, 0xce95f18, 0x1fe49073, 0x3fa36cc, 0x5ebcd2c, 122db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0xb402f2f, 0x15c70bf, 0x1561925c, 0x5a26704, 0xda91e90, 0xcdc1c7f, 0x1ea12446, 0xe1ade1e, 0xec91f22, 123db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x26f7778, 0x566847e, 0xa0bec9e, 0x234f453, 0x1a31f21a, 0xd85e75c, 0x56c7109, 0xa267a00, 0xb57c050, 124db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x98fb57, 0xaa837cc, 0x60c0792, 0xcfa5e19, 0x61bab9e, 0x589e39b, 0xa324c5, 0x7d6dee7, 0x2976e4b, 125db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x1fc4124a, 0xa8c244b, 0x1ce86762, 0xcd61c7e, 0x1831c8e0, 0x75774e1, 0x1d96a5a9, 0x843a649, 0xc3ab0fa, 126db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x6e2e7d5, 0x7673a2a, 0x178b65e8, 0x4003e9b, 0x1a1f11c2, 0x7816ea, 0xf643e11, 0x58c43df, 0xf423fc2, 127db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x19633ffa, 0x891f2b2, 0x123c231c, 0x46add8c, 0x54700dd, 0x59e2b17, 0x172db40f, 0x83e277d, 0xb0dd609, 128db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0xfd1da12, 0x35c6e52, 0x19ede20c, 0xd19e0c0, 0x97d0f40, 0xb015b19, 0x449e3f5, 0xe10c9e, 0x33ab581, 129db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x56a67ab, 0x577734d, 0x1dddc062, 0xc57b10d, 0x149b39d, 0x26a9e7b, 0xc35df9f, 0x48764cd, 0x76dbcca, 130db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0xca4b366, 0xe9303ab, 0x1a7480e7, 0x57e9e81, 0x1e13eb50, 0xf466cf3, 0x6f16b20, 0x4ba3173, 0xc168c33, 131db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x15cb5439, 0x6a38e11, 0x73658bd, 0xb29564f, 0x3f6dc5b, 0x53b97e, 0x1322c4c0, 0x65dd7ff, 0x3a1e4f6, 132db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x14e614aa, 0x9246317, 0x1bc83aca, 0xad97eed, 0xd38ce4a, 0xf82b006, 0x341f077, 0xa6add89, 0x4894acd, 133db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x9f162d5, 0xf8410ef, 0x1b266a56, 0xd7f223, 0x3e0cb92, 0xe39b672, 0x6a2901a, 0x69a8556, 0x7e7c0, 134db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x9b7d8d3, 0x309a80, 0x1ad05f7f, 0xc2fb5dd, 0xcbfd41d, 0x9ceb638, 0x1051825c, 0xda0cf5b, 0x812e881, 135db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x6f35669, 0x6a56f2c, 0x1df8d184, 0x345820, 0x1477d477, 0x1645db1, 0xbe80c51, 0xc22be3e, 0xe35e65a, 136db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x1aeb7aa0, 0xc375315, 0xf67bc99, 0x7fdd7b9, 0x191fc1be, 0x61235d, 0x2c184e9, 0x1c5a839, 0x47a1e26, 137db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0xb7cb456, 0x93e225d, 0x14f3c6ed, 0xccc1ac9, 0x17fe37f3, 0x4988989, 0x1a90c502, 0x2f32042, 0xa17769b, 138db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0xafd8c7c, 0x8191c6e, 0x1dcdb237, 0x16200c0, 0x107b32a1, 0x66c08db, 0x10d06a02, 0x3fc93, 0x5620023, 139db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x16722b27, 0x68b5c59, 0x270fcfc, 0xfad0ecc, 0xe5de1c2, 0xeab466b, 0x2fc513c, 0x407f75c, 0xbaab133, 140db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x9705fe9, 0xb88b8e7, 0x734c993, 0x1e1ff8f, 0x19156970, 0xabd0f00, 0x10469ea7, 0x3293ac0, 0xcdc98aa, 141db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x1d843fd, 0xe14bfe8, 0x15be825f, 0x8b5212, 0xeb3fb67, 0x81cbd29, 0xbc62f16, 0x2b6fcc7, 0xf5a4e29, 142db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x13560b66, 0xc0b6ac2, 0x51ae690, 0xd41e271, 0xf3e9bd4, 0x1d70aab, 0x1029f72, 0x73e1c35, 0xee70fbc, 143db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0xad81baf, 0x9ecc49a, 0x86c741e, 0xfe6be30, 0x176752e7, 0x23d416, 0x1f83de85, 0x27de188, 0x66f70b8, 144db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x181cd51f, 0x96b6e4c, 0x188f2335, 0xa5df759, 0x17a77eb6, 0xfeb0e73, 0x154ae914, 0x2f3ec51, 0x3826b59, 145db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0xb91f17d, 0x1c72949, 0x1362bf0a, 0xe23fddf, 0xa5614b0, 0xf7d8f, 0x79061, 0x823d9d2, 0x8213f39, 146db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x1128ae0b, 0xd095d05, 0xb85c0c2, 0x1ecb2ef, 0x24ddc84, 0xe35e901, 0x18411a4a, 0xf5ddc3d, 0x3786689, 147db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x52260e8, 0x5ae3564, 0x542b10d, 0x8d93a45, 0x19952aa4, 0x996cc41, 0x1051a729, 0x4be3499, 0x52b23aa, 148db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x109f307e, 0x6f5b6bb, 0x1f84e1e7, 0x77a0cfa, 0x10c4df3f, 0x25a02ea, 0xb048035, 0xe31de66, 0xc6ecaa3, 149db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x28ea335, 0x2886024, 0x1372f020, 0xf55d35, 0x15e4684c, 0xf2a9e17, 0x1a4a7529, 0xcb7beb1, 0xb2a78a1, 150db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x1ab21f1f, 0x6361ccf, 0x6c9179d, 0xb135627, 0x1267b974, 0x4408bad, 0x1cbff658, 0xe3d6511, 0xc7d76f, 151db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x1cc7a69, 0xe7ee31b, 0x54fab4f, 0x2b914f, 0x1ad27a30, 0xcd3579e, 0xc50124c, 0x50daa90, 0xb13f72, 152db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0xb06aa75, 0x70f5cc6, 0x1649e5aa, 0x84a5312, 0x329043c, 0x41c4011, 0x13d32411, 0xb04a838, 0xd760d2d, 153db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x1713b532, 0xbaa0c03, 0x84022ab, 0x6bcf5c1, 0x2f45379, 0x18ae070, 0x18c9e11e, 0x20bca9a, 0x66f496b, 154db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x3eef294, 0x67500d2, 0xd7f613c, 0x2dbbeb, 0xb741038, 0xe04133f, 0x1582968d, 0xbe985f7, 0x1acbc1a, 155db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x1a6a939f, 0x33e50f6, 0xd665ed4, 0xb4b7bd6, 0x1e5a3799, 0x6b33847, 0x17fa56ff, 0x65ef930, 0x21dc4a, 156db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x2b37659, 0x450fe17, 0xb357b65, 0xdf5efac, 0x15397bef, 0x9d35a7f, 0x112ac15f, 0x624e62e, 0xa90ae2f, 157db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x107eecd2, 0x1f69bbe, 0x77d6bce, 0x5741394, 0x13c684fc, 0x950c910, 0x725522b, 0xdc78583, 0x40eeabb, 158db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x1fde328a, 0xbd61d96, 0xd28c387, 0x9e77d89, 0x12550c40, 0x759cb7d, 0x367ef34, 0xae2a960, 0x91b8bdc, 159db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x93462a9, 0xf469ef, 0xb2e9aef, 0xd2ca771, 0x54e1f42, 0x7aaa49, 0x6316abb, 0x2413c8e, 0x5425bf9, 160db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x1bed3e3a, 0xf272274, 0x1f5e7326, 0x6416517, 0xea27072, 0x9cedea7, 0x6e7633, 0x7c91952, 0xd806dce, 161db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x8e2a7e1, 0xe421e1a, 0x418c9e1, 0x1dbc890, 0x1b395c36, 0xa1dc175, 0x1dc4ef73, 0x8956f34, 0xe4b5cf2, 162db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x1b0d3a18, 0x3194a36, 0x6c2641f, 0xe44124c, 0xa2f4eaa, 0xa8c25ba, 0xf927ed7, 0x627b614, 0x7371cca, 163db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0xba16694, 0x417bc03, 0x7c0a7e3, 0x9c35c19, 0x1168a205, 0x8b6b00d, 0x10e3edc9, 0x9c19bf2, 0x5882229, 164db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x1b2b4162, 0xa5cef1a, 0x1543622b, 0x9bd433e, 0x364e04d, 0x7480792, 0x5c9b5b3, 0xe85ff25, 0x408ef57, 165db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x1814cfa4, 0x121b41b, 0xd248a0f, 0x3b05222, 0x39bb16a, 0xc75966d, 0xa038113, 0xa4a1769, 0x11fbc6c, 166db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x917e50e, 0xeec3da8, 0x169d6eac, 0x10c1699, 0xa416153, 0xf724912, 0x15cd60b7, 0x4acbad9, 0x5efc5fa, 167db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0xf150ed7, 0x122b51, 0x1104b40a, 0xcb7f442, 0xfbb28ff, 0x6ac53ca, 0x196142cc, 0x7bf0fa9, 0x957651, 168db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x4e0f215, 0xed439f8, 0x3f46bd5, 0x5ace82f, 0x110916b6, 0x6db078, 0xffd7d57, 0xf2ecaac, 0xca86dec, 169db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x15d6b2da, 0x965ecc9, 0x1c92b4c2, 0x1f3811, 0x1cb080f5, 0x2d8b804, 0x19d1c12d, 0xf20bd46, 0x1951fa7, 170db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0xa3656c3, 0x523a425, 0xfcd0692, 0xd44ddc8, 0x131f0f5b, 0xaf80e4a, 0xcd9fc74, 0x99bb618, 0x2db944c, 171db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0xa673090, 0x1c210e1, 0x178c8d23, 0x1474383, 0x10b8743d, 0x985a55b, 0x2e74779, 0x576138, 0x9587927, 172db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x133130fa, 0xbe05516, 0x9f4d619, 0xbb62570, 0x99ec591, 0xd9468fe, 0x1d07782d, 0xfc72e0b, 0x701b298, 173db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x1863863b, 0x85954b8, 0x121a0c36, 0x9e7fedf, 0xf64b429, 0x9b9d71e, 0x14e2f5d8, 0xf858d3a, 0x942eea8, 174db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0xda5b765, 0x6edafff, 0xa9d18cc, 0xc65e4ba, 0x1c747e86, 0xe4ea915, 0x1981d7a1, 0x8395659, 0x52ed4e2, 175db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x87d43b7, 0x37ab11b, 0x19d292ce, 0xf8d4692, 0x18c3053f, 0x8863e13, 0x4c146c0, 0x6bdf55a, 0x4e4457d, 176db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x16152289, 0xac78ec2, 0x1a59c5a2, 0x2028b97, 0x71c2d01, 0x295851f, 0x404747b, 0x878558d, 0x7d29aa4, 177db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0x13d8341f, 0x8daefd7, 0x139c972d, 0x6b7ea75, 0xd4a9dde, 0xff163d8, 0x81d55d7, 0xa5bef68, 0xb7b30d8, 178db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 0xbe73d6f, 0xaa88141, 0xd976c81, 0x7e7a9cc, 0x18beb771, 0xd773cbd, 0x13f51951, 0x9d0c177, 0x1c49a78, 179db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root}; 180db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 181db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 182db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* Field element operations: */ 183db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 184db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* NON_ZERO_TO_ALL_ONES returns: 185db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 0xffffffff for 0 < x <= 2**31 186db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 0 for x == 0 or x > 2**31. 187db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 188db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * x must be a u32 or an equivalent type such as limb. */ 189db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root#define NON_ZERO_TO_ALL_ONES(x) ((((u32)(x) - 1) >> 31) - 1) 190db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 191db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* felem_reduce_carry adds a multiple of p in order to cancel |carry|, 192db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * which is a term at 2**257. 193db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 194db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On entry: carry < 2**3, inout[0,2,...] < 2**29, inout[1,3,...] < 2**28. 195db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On exit: inout[0,2,..] < 2**30, inout[1,3,...] < 2**29. */ 196db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void felem_reduce_carry(felem inout, limb carry) { 197db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root const u32 carry_mask = NON_ZERO_TO_ALL_ONES(carry); 198db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 199db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root inout[0] += carry << 1; 200db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root inout[3] += 0x10000000 & carry_mask; 201db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root /* carry < 2**3 thus (carry << 11) < 2**14 and we added 2**28 in the 202db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * previous line therefore this doesn't underflow. */ 203db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root inout[3] -= carry << 11; 204db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root inout[4] += (0x20000000 - 1) & carry_mask; 205db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root inout[5] += (0x10000000 - 1) & carry_mask; 206db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root inout[6] += (0x20000000 - 1) & carry_mask; 207db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root inout[6] -= carry << 22; 208db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root /* This may underflow if carry is non-zero but, if so, we'll fix it in the 209db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * next line. */ 210db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root inout[7] -= 1 & carry_mask; 211db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root inout[7] += carry << 25; 212db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root} 213db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 214db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* felem_sum sets out = in+in2. 215db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 216db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On entry, in[i]+in2[i] must not overflow a 32-bit word. 217db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29 */ 218db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void felem_sum(felem out, const felem in, const felem in2) { 219db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root limb carry = 0; 220db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root unsigned i; 221db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 222db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root for (i = 0;; i++) { 223db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] = in[i] + in2[i]; 224db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] += carry; 225db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root carry = out[i] >> 29; 226db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] &= kBottom29Bits; 227db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 228db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root i++; 229db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root if (i == NLIMBS) 230db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root break; 231db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 232db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] = in[i] + in2[i]; 233db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] += carry; 234db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root carry = out[i] >> 28; 235db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] &= kBottom28Bits; 236db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } 237db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 238db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_reduce_carry(out, carry); 239db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root} 240db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 241db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root#define two31m3 (((limb)1) << 31) - (((limb)1) << 3) 242db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root#define two30m2 (((limb)1) << 30) - (((limb)1) << 2) 243db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root#define two30p13m2 (((limb)1) << 30) + (((limb)1) << 13) - (((limb)1) << 2) 244db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root#define two31m2 (((limb)1) << 31) - (((limb)1) << 2) 245db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root#define two31p24m2 (((limb)1) << 31) + (((limb)1) << 24) - (((limb)1) << 2) 246db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root#define two30m27m2 (((limb)1) << 30) - (((limb)1) << 27) - (((limb)1) << 2) 247db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 248db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* zero31 is 0 mod p. */ 249db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic const felem zero31 = { two31m3, two30m2, two31m2, two30p13m2, two31m2, two30m2, two31p24m2, two30m27m2, two31m2 }; 250db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 251db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* felem_diff sets out = in-in2. 252db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 253db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and 254db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * in2[0,2,...] < 2**30, in2[1,3,...] < 2**29. 255db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. */ 256db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void felem_diff(felem out, const felem in, const felem in2) { 257db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root limb carry = 0; 258db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root unsigned i; 259db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 260db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root for (i = 0;; i++) { 261db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] = in[i] - in2[i]; 262db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] += zero31[i]; 263db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] += carry; 264db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root carry = out[i] >> 29; 265db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] &= kBottom29Bits; 266db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 267db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root i++; 268db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root if (i == NLIMBS) 269db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root break; 270db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 271db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] = in[i] - in2[i]; 272db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] += zero31[i]; 273db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] += carry; 274db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root carry = out[i] >> 28; 275db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] &= kBottom28Bits; 276db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } 277db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 278db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_reduce_carry(out, carry); 279db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root} 280db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 281db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* felem_reduce_degree sets out = tmp/R mod p where tmp contains 64-bit words 282db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * with the same 29,28,... bit positions as an felem. 283db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 284db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * The values in felems are in Montgomery form: x*R mod p where R = 2**257. 285db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * Since we just multiplied two Montgomery values together, the result is 286db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * x*y*R*R mod p. We wish to divide by R in order for the result also to be 287db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * in Montgomery form. 288db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 289db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On entry: tmp[i] < 2**64 290db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29 */ 291db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void felem_reduce_degree(felem out, u64 tmp[17]) { 292db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root /* The following table may be helpful when reading this code: 293db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 294db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * Limb number: 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10... 295db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * Width (bits): 29| 28| 29| 28| 29| 28| 29| 28| 29| 28| 29 296db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * Start bit: 0 | 29| 57| 86|114|143|171|200|228|257|285 297db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * (odd phase): 0 | 28| 57| 85|114|142|171|199|228|256|285 */ 298db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root limb tmp2[18], carry, x, xMask; 299db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root unsigned i; 300db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 301db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root /* tmp contains 64-bit words with the same 29,28,29-bit positions as an 302db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * felem. So the top of an element of tmp might overlap with another 303db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * element two positions down. The following loop eliminates this 304db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * overlap. */ 305db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[0] = (limb)(tmp[0] & kBottom29Bits); 306db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 307db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root /* In the following we use "(limb) tmp[x]" and "(limb) (tmp[x]>>32)" to try 308db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * and hint to the compiler that it can do a single-word shift by selecting 309db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * the right register rather than doing a double-word shift and truncating 310db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * afterwards. */ 311db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[1] = ((limb) tmp[0]) >> 29; 312db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[1] |= (((limb)(tmp[0] >> 32)) << 3) & kBottom28Bits; 313db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[1] += ((limb) tmp[1]) & kBottom28Bits; 314db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root carry = tmp2[1] >> 28; 315db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[1] &= kBottom28Bits; 316db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 317db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root for (i = 2; i < 17; i++) { 318db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i] = ((limb)(tmp[i - 2] >> 32)) >> 25; 319db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i] += ((limb)(tmp[i - 1])) >> 28; 320db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i] += (((limb)(tmp[i - 1] >> 32)) << 4) & kBottom29Bits; 321db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i] += ((limb) tmp[i]) & kBottom29Bits; 322db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i] += carry; 323db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root carry = tmp2[i] >> 29; 324db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i] &= kBottom29Bits; 325db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 326db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root i++; 327db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root if (i == 17) 328db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root break; 329db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i] = ((limb)(tmp[i - 2] >> 32)) >> 25; 330db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i] += ((limb)(tmp[i - 1])) >> 29; 331db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i] += (((limb)(tmp[i - 1] >> 32)) << 3) & kBottom28Bits; 332db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i] += ((limb) tmp[i]) & kBottom28Bits; 333db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i] += carry; 334db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root carry = tmp2[i] >> 28; 335db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i] &= kBottom28Bits; 336db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } 337db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 338db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[17] = ((limb)(tmp[15] >> 32)) >> 25; 339db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[17] += ((limb)(tmp[16])) >> 29; 340db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[17] += (((limb)(tmp[16] >> 32)) << 3); 341db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[17] += carry; 342db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 343db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root /* Montgomery elimination of terms. 344db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 345db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * Since R is 2**257, we can divide by R with a bitwise shift if we can 346db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * ensure that the right-most 257 bits are all zero. We can make that true by 347db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * adding multiplies of p without affecting the value. 348db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 349db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * So we eliminate limbs from right to left. Since the bottom 29 bits of p 350db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * are all ones, then by adding tmp2[0]*p to tmp2 we'll make tmp2[0] == 0. 351db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * We can do that for 8 further limbs and then right shift to eliminate the 352db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * extra factor of R. */ 353db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root for (i = 0;; i += 2) { 354db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i + 1] += tmp2[i] >> 29; 355db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root x = tmp2[i] & kBottom29Bits; 356db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root xMask = NON_ZERO_TO_ALL_ONES(x); 357db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i] = 0; 358db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 359db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root /* The bounds calculations for this loop are tricky. Each iteration of 360db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * the loop eliminates two words by adding values to words to their 361db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * right. 362db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 363db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * The following table contains the amounts added to each word (as an 364db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * offset from the value of i at the top of the loop). The amounts are 365db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * accounted for from the first and second half of the loop separately 366db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * and are written as, for example, 28 to mean a value <2**28. 367db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 368db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * Word: 3 4 5 6 7 8 9 10 369db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * Added in top half: 28 11 29 21 29 28 370db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 28 29 371db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 29 372db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * Added in bottom half: 29 10 28 21 28 28 373db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 29 374db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 375db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * The value that is currently offset 7 will be offset 5 for the next 376db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * iteration and then offset 3 for the iteration after that. Therefore 377db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * the total value added will be the values added at 7, 5 and 3. 378db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 379db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * The following table accumulates these values. The sums at the bottom 380db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * are written as, for example, 29+28, to mean a value < 2**29+2**28. 381db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 382db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * Word: 3 4 5 6 7 8 9 10 11 12 13 383db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 28 11 10 29 21 29 28 28 28 28 28 384db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 29 28 11 28 29 28 29 28 29 28 385db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 29 28 21 21 29 21 29 21 386db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 10 29 28 21 28 21 28 387db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 28 29 28 29 28 29 28 388db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 11 10 29 10 29 10 389db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 29 28 11 28 11 390db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 29 29 391db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * -------------------------------------------- 392db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 30+ 31+ 30+ 31+ 30+ 393db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 28+ 29+ 28+ 29+ 21+ 394db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 21+ 28+ 21+ 28+ 10 395db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 10 21+ 10 21+ 396db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 11 11 397db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 398db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * So the greatest amount is added to tmp2[10] and tmp2[12]. If 399db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * tmp2[10/12] has an initial value of <2**29, then the maximum value 400db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * will be < 2**31 + 2**30 + 2**28 + 2**21 + 2**11, which is < 2**32, 401db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * as required. */ 402db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i + 3] += (x << 10) & kBottom28Bits; 403db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i + 4] += (x >> 18); 404db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 405db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i + 6] += (x << 21) & kBottom29Bits; 406db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i + 7] += x >> 8; 407db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 408db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root /* At position 200, which is the starting bit position for word 7, we 409db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * have a factor of 0xf000000 = 2**28 - 2**24. */ 410db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i + 7] += 0x10000000 & xMask; 411db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root /* Word 7 is 28 bits wide, so the 2**28 term exactly hits word 8. */ 412db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i + 8] += (x - 1) & xMask; 413db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i + 7] -= (x << 24) & kBottom28Bits; 414db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i + 8] -= x >> 4; 415db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 416db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i + 8] += 0x20000000 & xMask; 417db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i + 8] -= x; 418db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i + 8] += (x << 28) & kBottom29Bits; 419db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i + 9] += ((x >> 1) - 1) & xMask; 420db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 421db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root if (i+1 == NLIMBS) 422db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root break; 423db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i + 2] += tmp2[i + 1] >> 28; 424db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root x = tmp2[i + 1] & kBottom28Bits; 425db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root xMask = NON_ZERO_TO_ALL_ONES(x); 426db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i + 1] = 0; 427db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 428db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i + 4] += (x << 11) & kBottom29Bits; 429db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i + 5] += (x >> 18); 430db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 431db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i + 7] += (x << 21) & kBottom28Bits; 432db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i + 8] += x >> 7; 433db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 434db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root /* At position 199, which is the starting bit of the 8th word when 435db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * dealing with a context starting on an odd word, we have a factor of 436db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 0x1e000000 = 2**29 - 2**25. Since we have not updated i, the 8th 437db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * word from i+1 is i+8. */ 438db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i + 8] += 0x20000000 & xMask; 439db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i + 9] += (x - 1) & xMask; 440db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i + 8] -= (x << 25) & kBottom29Bits; 441db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i + 9] -= x >> 4; 442db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 443db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i + 9] += 0x10000000 & xMask; 444db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i + 9] -= x; 445db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp2[i + 10] += (x - 1) & xMask; 446db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } 447db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 448db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root /* We merge the right shift with a carry chain. The words above 2**257 have 449db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * widths of 28,29,... which we need to correct when copying them down. */ 450db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root carry = 0; 451db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root for (i = 0; i < 8; i++) { 452db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root /* The maximum value of tmp2[i + 9] occurs on the first iteration and 453db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * is < 2**30+2**29+2**28. Adding 2**29 (from tmp2[i + 10]) is 454db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * therefore safe. */ 455db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] = tmp2[i + 9]; 456db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] += carry; 457db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] += (tmp2[i + 10] << 28) & kBottom29Bits; 458db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root carry = out[i] >> 29; 459db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] &= kBottom29Bits; 460db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 461db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root i++; 462db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] = tmp2[i + 9] >> 1; 463db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] += carry; 464db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root carry = out[i] >> 28; 465db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] &= kBottom28Bits; 466db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } 467db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 468db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[8] = tmp2[17]; 469db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[8] += carry; 470db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root carry = out[8] >> 29; 471db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[8] &= kBottom29Bits; 472db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 473db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_reduce_carry(out, carry); 474db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root} 475db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 476db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* felem_square sets out=in*in. 477db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 478db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29. 479db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. */ 480db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void felem_square(felem out, const felem in) { 481db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root u64 tmp[17]; 482db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 483db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[0] = ((u64) in[0]) * in[0]; 484db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[1] = ((u64) in[0]) * (in[1] << 1); 485db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[2] = ((u64) in[0]) * (in[2] << 1) + 486db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root ((u64) in[1]) * (in[1] << 1); 487db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[3] = ((u64) in[0]) * (in[3] << 1) + 488db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root ((u64) in[1]) * (in[2] << 1); 489db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[4] = ((u64) in[0]) * (in[4] << 1) + 490db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root ((u64) in[1]) * (in[3] << 2) + ((u64) in[2]) * in[2]; 491db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[5] = ((u64) in[0]) * (in[5] << 1) + ((u64) in[1]) * 492db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root (in[4] << 1) + ((u64) in[2]) * (in[3] << 1); 493db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[6] = ((u64) in[0]) * (in[6] << 1) + ((u64) in[1]) * 494db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root (in[5] << 2) + ((u64) in[2]) * (in[4] << 1) + 495db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root ((u64) in[3]) * (in[3] << 1); 496db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[7] = ((u64) in[0]) * (in[7] << 1) + ((u64) in[1]) * 497db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root (in[6] << 1) + ((u64) in[2]) * (in[5] << 1) + 498db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root ((u64) in[3]) * (in[4] << 1); 499db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root /* tmp[8] has the greatest value of 2**61 + 2**60 + 2**61 + 2**60 + 2**60, 500db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * which is < 2**64 as required. */ 501db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[8] = ((u64) in[0]) * (in[8] << 1) + ((u64) in[1]) * 502db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root (in[7] << 2) + ((u64) in[2]) * (in[6] << 1) + 503db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root ((u64) in[3]) * (in[5] << 2) + ((u64) in[4]) * in[4]; 504db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[9] = ((u64) in[1]) * (in[8] << 1) + ((u64) in[2]) * 505db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root (in[7] << 1) + ((u64) in[3]) * (in[6] << 1) + 506db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root ((u64) in[4]) * (in[5] << 1); 507db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[10] = ((u64) in[2]) * (in[8] << 1) + ((u64) in[3]) * 508db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root (in[7] << 2) + ((u64) in[4]) * (in[6] << 1) + 509db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root ((u64) in[5]) * (in[5] << 1); 510db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[11] = ((u64) in[3]) * (in[8] << 1) + ((u64) in[4]) * 511db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root (in[7] << 1) + ((u64) in[5]) * (in[6] << 1); 512db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[12] = ((u64) in[4]) * (in[8] << 1) + 513db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root ((u64) in[5]) * (in[7] << 2) + ((u64) in[6]) * in[6]; 514db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[13] = ((u64) in[5]) * (in[8] << 1) + 515db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root ((u64) in[6]) * (in[7] << 1); 516db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[14] = ((u64) in[6]) * (in[8] << 1) + 517db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root ((u64) in[7]) * (in[7] << 1); 518db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[15] = ((u64) in[7]) * (in[8] << 1); 519db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[16] = ((u64) in[8]) * in[8]; 520db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 521db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_reduce_degree(out, tmp); 522db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root} 523db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 524db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* felem_mul sets out=in*in2. 525db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 526db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and 527db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * in2[0,2,...] < 2**30, in2[1,3,...] < 2**29. 528db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. */ 529db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void felem_mul(felem out, const felem in, const felem in2) { 530db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root u64 tmp[17]; 531db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 532db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[0] = ((u64) in[0]) * in2[0]; 533db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[1] = ((u64) in[0]) * (in2[1] << 0) + 534db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root ((u64) in[1]) * (in2[0] << 0); 535db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[2] = ((u64) in[0]) * (in2[2] << 0) + ((u64) in[1]) * 536db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root (in2[1] << 1) + ((u64) in[2]) * (in2[0] << 0); 537db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[3] = ((u64) in[0]) * (in2[3] << 0) + ((u64) in[1]) * 538db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root (in2[2] << 0) + ((u64) in[2]) * (in2[1] << 0) + 539db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root ((u64) in[3]) * (in2[0] << 0); 540db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[4] = ((u64) in[0]) * (in2[4] << 0) + ((u64) in[1]) * 541db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root (in2[3] << 1) + ((u64) in[2]) * (in2[2] << 0) + 542db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root ((u64) in[3]) * (in2[1] << 1) + 543db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root ((u64) in[4]) * (in2[0] << 0); 544db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[5] = ((u64) in[0]) * (in2[5] << 0) + ((u64) in[1]) * 545db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root (in2[4] << 0) + ((u64) in[2]) * (in2[3] << 0) + 546db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root ((u64) in[3]) * (in2[2] << 0) + ((u64) in[4]) * 547db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root (in2[1] << 0) + ((u64) in[5]) * (in2[0] << 0); 548db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[6] = ((u64) in[0]) * (in2[6] << 0) + ((u64) in[1]) * 549db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root (in2[5] << 1) + ((u64) in[2]) * (in2[4] << 0) + 550db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root ((u64) in[3]) * (in2[3] << 1) + ((u64) in[4]) * 551db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root (in2[2] << 0) + ((u64) in[5]) * (in2[1] << 1) + 552db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root ((u64) in[6]) * (in2[0] << 0); 553db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[7] = ((u64) in[0]) * (in2[7] << 0) + ((u64) in[1]) * 554db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root (in2[6] << 0) + ((u64) in[2]) * (in2[5] << 0) + 555db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root ((u64) in[3]) * (in2[4] << 0) + ((u64) in[4]) * 556db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root (in2[3] << 0) + ((u64) in[5]) * (in2[2] << 0) + 557db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root ((u64) in[6]) * (in2[1] << 0) + 558db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root ((u64) in[7]) * (in2[0] << 0); 559db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root /* tmp[8] has the greatest value but doesn't overflow. See logic in 560db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * felem_square. */ 561db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[8] = ((u64) in[0]) * (in2[8] << 0) + ((u64) in[1]) * 562db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root (in2[7] << 1) + ((u64) in[2]) * (in2[6] << 0) + 563db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root ((u64) in[3]) * (in2[5] << 1) + ((u64) in[4]) * 564db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root (in2[4] << 0) + ((u64) in[5]) * (in2[3] << 1) + 565db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root ((u64) in[6]) * (in2[2] << 0) + ((u64) in[7]) * 566db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root (in2[1] << 1) + ((u64) in[8]) * (in2[0] << 0); 567db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[9] = ((u64) in[1]) * (in2[8] << 0) + ((u64) in[2]) * 568db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root (in2[7] << 0) + ((u64) in[3]) * (in2[6] << 0) + 569db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root ((u64) in[4]) * (in2[5] << 0) + ((u64) in[5]) * 570db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root (in2[4] << 0) + ((u64) in[6]) * (in2[3] << 0) + 571db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root ((u64) in[7]) * (in2[2] << 0) + 572db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root ((u64) in[8]) * (in2[1] << 0); 573db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[10] = ((u64) in[2]) * (in2[8] << 0) + ((u64) in[3]) * 574db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root (in2[7] << 1) + ((u64) in[4]) * (in2[6] << 0) + 575db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root ((u64) in[5]) * (in2[5] << 1) + ((u64) in[6]) * 576db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root (in2[4] << 0) + ((u64) in[7]) * (in2[3] << 1) + 577db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root ((u64) in[8]) * (in2[2] << 0); 578db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[11] = ((u64) in[3]) * (in2[8] << 0) + ((u64) in[4]) * 579db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root (in2[7] << 0) + ((u64) in[5]) * (in2[6] << 0) + 580db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root ((u64) in[6]) * (in2[5] << 0) + ((u64) in[7]) * 581db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root (in2[4] << 0) + ((u64) in[8]) * (in2[3] << 0); 582db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[12] = ((u64) in[4]) * (in2[8] << 0) + ((u64) in[5]) * 583db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root (in2[7] << 1) + ((u64) in[6]) * (in2[6] << 0) + 584db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root ((u64) in[7]) * (in2[5] << 1) + 585db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root ((u64) in[8]) * (in2[4] << 0); 586db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[13] = ((u64) in[5]) * (in2[8] << 0) + ((u64) in[6]) * 587db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root (in2[7] << 0) + ((u64) in[7]) * (in2[6] << 0) + 588db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root ((u64) in[8]) * (in2[5] << 0); 589db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[14] = ((u64) in[6]) * (in2[8] << 0) + ((u64) in[7]) * 590db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root (in2[7] << 1) + ((u64) in[8]) * (in2[6] << 0); 591db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[15] = ((u64) in[7]) * (in2[8] << 0) + 592db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root ((u64) in[8]) * (in2[7] << 0); 593db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[16] = ((u64) in[8]) * (in2[8] << 0); 594db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 595db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_reduce_degree(out, tmp); 596db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root} 597db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 598db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void felem_assign(felem out, const felem in) { 599db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root memcpy(out, in, sizeof(felem)); 600db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root} 601db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 602db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* felem_inv calculates |out| = |in|^{-1} 603db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 604db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * Based on Fermat's Little Theorem: 605db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * a^p = a (mod p) 606db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * a^{p-1} = 1 (mod p) 607db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * a^{p-2} = a^{-1} (mod p) 608db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root */ 609db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void felem_inv(felem out, const felem in) { 610db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem ftmp, ftmp2; 611db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root /* each e_I will hold |in|^{2^I - 1} */ 612db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem e2, e4, e8, e16, e32, e64; 613db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root unsigned i; 614db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 615db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(ftmp, in); /* 2^1 */ 616db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(ftmp, in, ftmp); /* 2^2 - 2^0 */ 617db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_assign(e2, ftmp); 618db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(ftmp, ftmp); /* 2^3 - 2^1 */ 619db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(ftmp, ftmp); /* 2^4 - 2^2 */ 620db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(ftmp, ftmp, e2); /* 2^4 - 2^0 */ 621db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_assign(e4, ftmp); 622db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(ftmp, ftmp); /* 2^5 - 2^1 */ 623db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(ftmp, ftmp); /* 2^6 - 2^2 */ 624db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(ftmp, ftmp); /* 2^7 - 2^3 */ 625db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(ftmp, ftmp); /* 2^8 - 2^4 */ 626db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(ftmp, ftmp, e4); /* 2^8 - 2^0 */ 627db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_assign(e8, ftmp); 628db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root for (i = 0; i < 8; i++) { 629db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(ftmp, ftmp); 630db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } /* 2^16 - 2^8 */ 631db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(ftmp, ftmp, e8); /* 2^16 - 2^0 */ 632db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_assign(e16, ftmp); 633db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root for (i = 0; i < 16; i++) { 634db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(ftmp, ftmp); 635db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } /* 2^32 - 2^16 */ 636db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(ftmp, ftmp, e16); /* 2^32 - 2^0 */ 637db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_assign(e32, ftmp); 638db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root for (i = 0; i < 32; i++) { 639db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(ftmp, ftmp); 640db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } /* 2^64 - 2^32 */ 641db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_assign(e64, ftmp); 642db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(ftmp, ftmp, in); /* 2^64 - 2^32 + 2^0 */ 643db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root for (i = 0; i < 192; i++) { 644db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(ftmp, ftmp); 645db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } /* 2^256 - 2^224 + 2^192 */ 646db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 647db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(ftmp2, e64, e32); /* 2^64 - 2^0 */ 648db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root for (i = 0; i < 16; i++) { 649db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(ftmp2, ftmp2); 650db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } /* 2^80 - 2^16 */ 651db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(ftmp2, ftmp2, e16); /* 2^80 - 2^0 */ 652db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root for (i = 0; i < 8; i++) { 653db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(ftmp2, ftmp2); 654db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } /* 2^88 - 2^8 */ 655db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(ftmp2, ftmp2, e8); /* 2^88 - 2^0 */ 656db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root for (i = 0; i < 4; i++) { 657db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(ftmp2, ftmp2); 658db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } /* 2^92 - 2^4 */ 659db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(ftmp2, ftmp2, e4); /* 2^92 - 2^0 */ 660db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(ftmp2, ftmp2); /* 2^93 - 2^1 */ 661db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(ftmp2, ftmp2); /* 2^94 - 2^2 */ 662db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(ftmp2, ftmp2, e2); /* 2^94 - 2^0 */ 663db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(ftmp2, ftmp2); /* 2^95 - 2^1 */ 664db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(ftmp2, ftmp2); /* 2^96 - 2^2 */ 665db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(ftmp2, ftmp2, in); /* 2^96 - 3 */ 666db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 667db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(out, ftmp2, ftmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */ 668db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root} 669db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 670db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* felem_scalar_3 sets out=3*out. 671db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 672db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 673db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. */ 674db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void felem_scalar_3(felem out) { 675db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root limb carry = 0; 676db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root unsigned i; 677db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 678db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root for (i = 0;; i++) { 679db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] *= 3; 680db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] += carry; 681db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root carry = out[i] >> 29; 682db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] &= kBottom29Bits; 683db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 684db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root i++; 685db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root if (i == NLIMBS) 686db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root break; 687db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 688db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] *= 3; 689db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] += carry; 690db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root carry = out[i] >> 28; 691db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] &= kBottom28Bits; 692db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } 693db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 694db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_reduce_carry(out, carry); 695db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root} 696db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 697db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* felem_scalar_4 sets out=4*out. 698db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 699db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 700db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. */ 701db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void felem_scalar_4(felem out) { 702db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root limb carry = 0, next_carry; 703db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root unsigned i; 704db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 705db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root for (i = 0;; i++) { 706db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root next_carry = out[i] >> 27; 707db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] <<= 2; 708db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] &= kBottom29Bits; 709db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] += carry; 710db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root carry = next_carry + (out[i] >> 29); 711db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] &= kBottom29Bits; 712db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 713db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root i++; 714db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root if (i == NLIMBS) 715db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root break; 716db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 717db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root next_carry = out[i] >> 26; 718db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] <<= 2; 719db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] &= kBottom28Bits; 720db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] += carry; 721db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root carry = next_carry + (out[i] >> 28); 722db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] &= kBottom28Bits; 723db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } 724db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 725db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_reduce_carry(out, carry); 726db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root} 727db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 728db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* felem_scalar_8 sets out=8*out. 729db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 730db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 731db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. */ 732db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void felem_scalar_8(felem out) { 733db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root limb carry = 0, next_carry; 734db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root unsigned i; 735db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 736db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root for (i = 0;; i++) { 737db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root next_carry = out[i] >> 26; 738db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] <<= 3; 739db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] &= kBottom29Bits; 740db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] += carry; 741db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root carry = next_carry + (out[i] >> 29); 742db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] &= kBottom29Bits; 743db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 744db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root i++; 745db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root if (i == NLIMBS) 746db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root break; 747db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 748db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root next_carry = out[i] >> 25; 749db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] <<= 3; 750db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] &= kBottom28Bits; 751db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] += carry; 752db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root carry = next_carry + (out[i] >> 28); 753db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] &= kBottom28Bits; 754db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } 755db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 756db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_reduce_carry(out, carry); 757db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root} 758db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 759db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* felem_is_zero_vartime returns 1 iff |in| == 0. It takes a variable amount of 760db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * time depending on the value of |in|. */ 761db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic char felem_is_zero_vartime(const felem in) { 762db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root limb carry; 763db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root int i; 764db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root limb tmp[NLIMBS]; 765db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 766db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_assign(tmp, in); 767db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 768db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root /* First, reduce tmp to a minimal form. */ 769db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root do { 770db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root carry = 0; 771db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root for (i = 0;; i++) { 772db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[i] += carry; 773db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root carry = tmp[i] >> 29; 774db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[i] &= kBottom29Bits; 775db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 776db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root i++; 777db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root if (i == NLIMBS) 778db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root break; 779db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 780db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[i] += carry; 781db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root carry = tmp[i] >> 28; 782db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root tmp[i] &= kBottom28Bits; 783db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } 784db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 785db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_reduce_carry(tmp, carry); 786db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } while (carry); 787db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 788db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root /* tmp < 2**257, so the only possible zero values are 0, p and 2p. */ 789db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root return memcmp(tmp, kZero, sizeof(tmp)) == 0 || 790db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root memcmp(tmp, kP, sizeof(tmp)) == 0 || 791db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root memcmp(tmp, k2P, sizeof(tmp)) == 0; 792db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root} 793db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 794db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 795db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* Group operations: 796db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 797db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * Elements of the elliptic curve group are represented in Jacobian 798db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * coordinates: (x, y, z). An affine point (x', y') is x'=x/z**2, y'=y/z**3 in 799db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * Jacobian form. */ 800db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 801db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* point_double sets {x_out,y_out,z_out} = 2*{x,y,z}. 802db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 803db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling-dbl-2009-l */ 804db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void point_double(felem x_out, felem y_out, felem z_out, const felem x, 805db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root const felem y, const felem z) { 806db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem delta, gamma, alpha, beta, tmp, tmp2; 807db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 808db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(delta, z); 809db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(gamma, y); 810db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(beta, x, gamma); 811db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 812db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_sum(tmp, x, delta); 813db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(tmp2, x, delta); 814db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(alpha, tmp, tmp2); 815db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_scalar_3(alpha); 816db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 817db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_sum(tmp, y, z); 818db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(tmp, tmp); 819db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(tmp, tmp, gamma); 820db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(z_out, tmp, delta); 821db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 822db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_scalar_4(beta); 823db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(x_out, alpha); 824db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(x_out, x_out, beta); 825db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(x_out, x_out, beta); 826db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 827db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(tmp, beta, x_out); 828db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(tmp, alpha, tmp); 829db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(tmp2, gamma); 830db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_scalar_8(tmp2); 831db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(y_out, tmp, tmp2); 832db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root} 833db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 834db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* point_add_mixed sets {x_out,y_out,z_out} = {x1,y1,z1} + {x2,y2,1}. 835db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * (i.e. the second point is affine.) 836db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 837db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl 838db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 839db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * Note that this function does not handle P+P, infinity+P nor P+infinity 840db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * correctly. */ 841db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void point_add_mixed(felem x_out, felem y_out, felem z_out, 842db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root const felem x1, const felem y1, const felem z1, 843db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root const felem x2, const felem y2) { 844db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem z1z1, z1z1z1, s2, u2, h, i, j, r, rr, v, tmp; 845db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 846db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(z1z1, z1); 847db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_sum(tmp, z1, z1); 848db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 849db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(u2, x2, z1z1); 850db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(z1z1z1, z1, z1z1); 851db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(s2, y2, z1z1z1); 852db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(h, u2, x1); 853db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_sum(i, h, h); 854db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(i, i); 855db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(j, h, i); 856db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(r, s2, y1); 857db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_sum(r, r, r); 858db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(v, x1, i); 859db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 860db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(z_out, tmp, h); 861db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(rr, r); 862db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(x_out, rr, j); 863db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(x_out, x_out, v); 864db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(x_out, x_out, v); 865db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 866db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(tmp, v, x_out); 867db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(y_out, tmp, r); 868db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(tmp, y1, j); 869db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(y_out, y_out, tmp); 870db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(y_out, y_out, tmp); 871db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root} 872db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 873db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* point_add sets {x_out,y_out,z_out} = {x1,y1,z1} + {x2,y2,z2}. 874db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 875db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl 876db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 877db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * Note that this function does not handle P+P, infinity+P nor P+infinity 878db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * correctly. */ 879db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void point_add(felem x_out, felem y_out, felem z_out, const felem x1, 880db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root const felem y1, const felem z1, const felem x2, 881db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root const felem y2, const felem z2) { 882db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem z1z1, z1z1z1, z2z2, z2z2z2, s1, s2, u1, u2, h, i, j, r, rr, v, tmp; 883db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 884db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(z1z1, z1); 885db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(z2z2, z2); 886db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(u1, x1, z2z2); 887db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 888db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_sum(tmp, z1, z2); 889db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(tmp, tmp); 890db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(tmp, tmp, z1z1); 891db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(tmp, tmp, z2z2); 892db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 893db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(z2z2z2, z2, z2z2); 894db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(s1, y1, z2z2z2); 895db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 896db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(u2, x2, z1z1); 897db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(z1z1z1, z1, z1z1); 898db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(s2, y2, z1z1z1); 899db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(h, u2, u1); 900db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_sum(i, h, h); 901db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(i, i); 902db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(j, h, i); 903db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(r, s2, s1); 904db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_sum(r, r, r); 905db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(v, u1, i); 906db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 907db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(z_out, tmp, h); 908db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(rr, r); 909db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(x_out, rr, j); 910db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(x_out, x_out, v); 911db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(x_out, x_out, v); 912db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 913db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(tmp, v, x_out); 914db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(y_out, tmp, r); 915db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(tmp, s1, j); 916db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(y_out, y_out, tmp); 917db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(y_out, y_out, tmp); 918db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root} 919db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 920db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* point_add_or_double_vartime sets {x_out,y_out,z_out} = {x1,y1,z1} + 921db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * {x2,y2,z2}. 922db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 923db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl 924db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 925db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * This function handles the case where {x1,y1,z1}={x2,y2,z2}. */ 926db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void point_add_or_double_vartime( 927db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem x_out, felem y_out, felem z_out, const felem x1, const felem y1, 928db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root const felem z1, const felem x2, const felem y2, const felem z2) { 929db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem z1z1, z1z1z1, z2z2, z2z2z2, s1, s2, u1, u2, h, i, j, r, rr, v, tmp; 930db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root char x_equal, y_equal; 931db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 932db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(z1z1, z1); 933db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(z2z2, z2); 934db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(u1, x1, z2z2); 935db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 936db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_sum(tmp, z1, z2); 937db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(tmp, tmp); 938db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(tmp, tmp, z1z1); 939db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(tmp, tmp, z2z2); 940db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 941db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(z2z2z2, z2, z2z2); 942db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(s1, y1, z2z2z2); 943db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 944db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(u2, x2, z1z1); 945db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(z1z1z1, z1, z1z1); 946db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(s2, y2, z1z1z1); 947db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(h, u2, u1); 948db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root x_equal = felem_is_zero_vartime(h); 949db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_sum(i, h, h); 950db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(i, i); 951db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(j, h, i); 952db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(r, s2, s1); 953db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root y_equal = felem_is_zero_vartime(r); 954db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root if (x_equal && y_equal) { 955db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root point_double(x_out, y_out, z_out, x1, y1, z1); 956db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root return; 957db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } 958db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_sum(r, r, r); 959db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(v, u1, i); 960db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 961db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(z_out, tmp, h); 962db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(rr, r); 963db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(x_out, rr, j); 964db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(x_out, x_out, v); 965db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(x_out, x_out, v); 966db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 967db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(tmp, v, x_out); 968db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(y_out, tmp, r); 969db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(tmp, s1, j); 970db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(y_out, y_out, tmp); 971db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_diff(y_out, y_out, tmp); 972db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root} 973db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 974db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* copy_conditional sets out=in if mask = 0xffffffff in constant time. 975db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 976db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On entry: mask is either 0 or 0xffffffff. */ 977db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void copy_conditional(felem out, const felem in, limb mask) { 978db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root int i; 979db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 980db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root for (i = 0; i < NLIMBS; i++) { 981db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root const limb tmp = mask & (in[i] ^ out[i]); 982db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] ^= tmp; 983db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } 984db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root} 985db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 986db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* select_affine_point sets {out_x,out_y} to the index'th entry of table. 987db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * On entry: index < 16, table[0] must be zero. */ 988db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void select_affine_point(felem out_x, felem out_y, const limb* table, 989db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root limb index) { 990db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root limb i, j; 991db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 992db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root memset(out_x, 0, sizeof(felem)); 993db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root memset(out_y, 0, sizeof(felem)); 994db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 995db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root for (i = 1; i < 16; i++) { 996db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root limb mask = i ^ index; 997db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root mask |= mask >> 2; 998db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root mask |= mask >> 1; 999db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root mask &= 1; 1000db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root mask--; 1001db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root for (j = 0; j < NLIMBS; j++, table++) { 1002db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out_x[j] |= *table & mask; 1003db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } 1004db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root for (j = 0; j < NLIMBS; j++, table++) { 1005db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out_y[j] |= *table & mask; 1006db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } 1007db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } 1008db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root} 1009db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1010db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* select_jacobian_point sets {out_x,out_y,out_z} to the index'th entry of 1011db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * table. On entry: index < 16, table[0] must be zero. */ 1012db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void select_jacobian_point(felem out_x, felem out_y, felem out_z, 1013db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root const limb* table, limb index) { 1014db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root limb i, j; 1015db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1016db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root memset(out_x, 0, sizeof(felem)); 1017db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root memset(out_y, 0, sizeof(felem)); 1018db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root memset(out_z, 0, sizeof(felem)); 1019db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1020db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root /* The implicit value at index 0 is all zero. We don't need to perform that 1021db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * iteration of the loop because we already set out_* to zero. */ 1022db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root table += 3 * NLIMBS; 1023db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1024db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root // Hit all entries to obscure cache profiling. 1025db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root for (i = 1; i < 16; i++) { 1026db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root limb mask = i ^ index; 1027db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root mask |= mask >> 2; 1028db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root mask |= mask >> 1; 1029db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root mask &= 1; 1030db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root mask--; 1031db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root for (j = 0; j < NLIMBS; j++, table++) { 1032db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out_x[j] |= *table & mask; 1033db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } 1034db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root for (j = 0; j < NLIMBS; j++, table++) { 1035db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out_y[j] |= *table & mask; 1036db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } 1037db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root for (j = 0; j < NLIMBS; j++, table++) { 1038db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out_z[j] |= *table & mask; 1039db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } 1040db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } 1041db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root} 1042db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1043db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* scalar_base_mult sets {nx,ny,nz} = scalar*G where scalar is a little-endian 1044db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * number. Note that the value of scalar must be less than the order of the 1045db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * group. */ 1046db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void scalar_base_mult(felem nx, felem ny, felem nz, 1047db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root const p256_int* scalar) { 1048db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root int i, j; 1049db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root limb n_is_infinity_mask = -1, p_is_noninfinite_mask, mask; 1050db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root u32 table_offset; 1051db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1052db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem px, py; 1053db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem tx, ty, tz; 1054db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1055db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root memset(nx, 0, sizeof(felem)); 1056db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root memset(ny, 0, sizeof(felem)); 1057db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root memset(nz, 0, sizeof(felem)); 1058db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1059db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root /* The loop adds bits at positions 0, 64, 128 and 192, followed by 1060db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * positions 32,96,160 and 224 and does this 32 times. */ 1061db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root for (i = 0; i < 32; i++) { 1062db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root if (i) { 1063db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root point_double(nx, ny, nz, nx, ny, nz); 1064db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } 1065db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root table_offset = 0; 1066db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root for (j = 0; j <= 32; j += 32) { 1067db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root char bit0 = p256_get_bit(scalar, 31 - i + j); 1068db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root char bit1 = p256_get_bit(scalar, 95 - i + j); 1069db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root char bit2 = p256_get_bit(scalar, 159 - i + j); 1070db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root char bit3 = p256_get_bit(scalar, 223 - i + j); 1071db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root limb index = bit0 | (bit1 << 1) | (bit2 << 2) | (bit3 << 3); 1072db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1073db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root select_affine_point(px, py, kPrecomputed + table_offset, index); 1074db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root table_offset += 30 * NLIMBS; 1075db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1076db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root /* Since scalar is less than the order of the group, we know that 1077db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * {nx,ny,nz} != {px,py,1}, unless both are zero, which we handle 1078db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * below. */ 1079db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root point_add_mixed(tx, ty, tz, nx, ny, nz, px, py); 1080db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root /* The result of point_add_mixed is incorrect if {nx,ny,nz} is zero 1081db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * (a.k.a. the point at infinity). We handle that situation by 1082db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * copying the point from the table. */ 1083db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root copy_conditional(nx, px, n_is_infinity_mask); 1084db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root copy_conditional(ny, py, n_is_infinity_mask); 1085db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root copy_conditional(nz, kOne, n_is_infinity_mask); 1086db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1087db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root /* Equally, the result is also wrong if the point from the table is 1088db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * zero, which happens when the index is zero. We handle that by 1089db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * only copying from {tx,ty,tz} to {nx,ny,nz} if index != 0. */ 1090db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root p_is_noninfinite_mask = NON_ZERO_TO_ALL_ONES(index); 1091db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root mask = p_is_noninfinite_mask & ~n_is_infinity_mask; 1092db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root copy_conditional(nx, tx, mask); 1093db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root copy_conditional(ny, ty, mask); 1094db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root copy_conditional(nz, tz, mask); 1095db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root /* If p was not zero, then n is now non-zero. */ 1096db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root n_is_infinity_mask &= ~p_is_noninfinite_mask; 1097db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } 1098db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } 1099db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root} 1100db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1101db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* point_to_affine converts a Jacobian point to an affine point. If the input 1102db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * is the point at infinity then it returns (0, 0) in constant time. */ 1103db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void point_to_affine(felem x_out, felem y_out, const felem nx, 1104db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root const felem ny, const felem nz) { 1105db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem z_inv, z_inv_sq; 1106db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_inv(z_inv, nz); 1107db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_square(z_inv_sq, z_inv); 1108db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(x_out, nx, z_inv_sq); 1109db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(z_inv, z_inv, z_inv_sq); 1110db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem_mul(y_out, ny, z_inv); 1111db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root} 1112db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1113db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* scalar_base_mult sets {nx,ny,nz} = scalar*{x,y}. */ 1114db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void scalar_mult(felem nx, felem ny, felem nz, const felem x, 1115db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root const felem y, const p256_int* scalar) { 1116db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root int i; 1117db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem px, py, pz, tx, ty, tz; 1118db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem precomp[16][3]; 1119db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root limb n_is_infinity_mask, index, p_is_noninfinite_mask, mask; 1120db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1121db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root /* We precompute 0,1,2,... times {x,y}. */ 1122db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root memset(precomp, 0, sizeof(felem) * 3); 1123db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root memcpy(&precomp[1][0], x, sizeof(felem)); 1124db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root memcpy(&precomp[1][1], y, sizeof(felem)); 1125db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root memcpy(&precomp[1][2], kOne, sizeof(felem)); 1126db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1127db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root for (i = 2; i < 16; i += 2) { 1128db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root point_double(precomp[i][0], precomp[i][1], precomp[i][2], 1129db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root precomp[i / 2][0], precomp[i / 2][1], precomp[i / 2][2]); 1130db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1131db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root point_add_mixed(precomp[i + 1][0], precomp[i + 1][1], precomp[i + 1][2], 1132db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root precomp[i][0], precomp[i][1], precomp[i][2], x, y); 1133db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } 1134db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1135db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root memset(nx, 0, sizeof(felem)); 1136db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root memset(ny, 0, sizeof(felem)); 1137db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root memset(nz, 0, sizeof(felem)); 1138db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root n_is_infinity_mask = -1; 1139db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1140db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root /* We add in a window of four bits each iteration and do this 64 times. */ 1141db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root for (i = 0; i < 256; i += 4) { 1142db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root if (i) { 1143db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root point_double(nx, ny, nz, nx, ny, nz); 1144db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root point_double(nx, ny, nz, nx, ny, nz); 1145db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root point_double(nx, ny, nz, nx, ny, nz); 1146db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root point_double(nx, ny, nz, nx, ny, nz); 1147db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } 1148db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1149db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root index = (p256_get_bit(scalar, 255 - i - 0) << 3) | 1150db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root (p256_get_bit(scalar, 255 - i - 1) << 2) | 1151db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root (p256_get_bit(scalar, 255 - i - 2) << 1) | 1152db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root p256_get_bit(scalar, 255 - i - 3); 1153db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1154db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root /* See the comments in scalar_base_mult about handling infinities. */ 1155db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root select_jacobian_point(px, py, pz, precomp[0][0], index); 1156db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root point_add(tx, ty, tz, nx, ny, nz, px, py, pz); 1157db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root copy_conditional(nx, px, n_is_infinity_mask); 1158db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root copy_conditional(ny, py, n_is_infinity_mask); 1159db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root copy_conditional(nz, pz, n_is_infinity_mask); 1160db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1161db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root p_is_noninfinite_mask = NON_ZERO_TO_ALL_ONES(index); 1162db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root mask = p_is_noninfinite_mask & ~n_is_infinity_mask; 1163db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1164db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root copy_conditional(nx, tx, mask); 1165db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root copy_conditional(ny, ty, mask); 1166db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root copy_conditional(nz, tz, mask); 1167db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root n_is_infinity_mask &= ~p_is_noninfinite_mask; 1168db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } 1169db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root} 1170db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1171db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root#define kRDigits {2, 0, 0, 0xfffffffe, 0xffffffff, 0xffffffff, 0xfffffffd, 1} // 2^257 mod p256.p 1172db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1173db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root#define kRInvDigits {0x80000000, 1, 0xffffffff, 0, 0x80000001, 0xfffffffe, 1, 0x7fffffff} // 1 / 2^257 mod p256.p 1174db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1175db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic const p256_int kR = { kRDigits }; 1176db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic const p256_int kRInv = { kRInvDigits }; 1177db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1178db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* to_montgomery sets out = R*in. */ 1179db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void to_montgomery(felem out, const p256_int* in) { 1180db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root p256_int in_shifted; 1181db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root int i; 1182db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1183db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root p256_init(&in_shifted); 1184db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root p256_modmul(&SECP256r1_p, in, 0, &kR, &in_shifted); 1185db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1186db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root for (i = 0; i < NLIMBS; i++) { 1187db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root if ((i & 1) == 0) { 1188db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] = P256_DIGIT(&in_shifted, 0) & kBottom29Bits; 1189db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root p256_shr(&in_shifted, 29, &in_shifted); 1190db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } else { 1191db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root out[i] = P256_DIGIT(&in_shifted, 0) & kBottom28Bits; 1192db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root p256_shr(&in_shifted, 28, &in_shifted); 1193db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } 1194db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } 1195db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1196db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root p256_clear(&in_shifted); 1197db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root} 1198db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1199db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* from_montgomery sets out=in/R. */ 1200db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootstatic void from_montgomery(p256_int* out, const felem in) { 1201db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root p256_int result, tmp; 1202db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root int i, top; 1203db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1204db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root p256_init(&result); 1205db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root p256_init(&tmp); 1206db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1207db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root p256_add_d(&tmp, in[NLIMBS - 1], &result); 1208db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root for (i = NLIMBS - 2; i >= 0; i--) { 1209db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root if ((i & 1) == 0) { 1210db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root top = p256_shl(&result, 29, &tmp); 1211db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } else { 1212db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root top = p256_shl(&result, 28, &tmp); 1213db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } 1214db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root top |= p256_add_d(&tmp, in[i], &result); 1215db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } 1216db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1217db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root p256_modmul(&SECP256r1_p, &kRInv, top, &result, out); 1218db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1219db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root p256_clear(&result); 1220db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root p256_clear(&tmp); 1221db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root} 1222db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1223db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* p256_base_point_mul sets {out_x,out_y} = nG, where n is < the 1224db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * order of the group. */ 1225db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootvoid p256_base_point_mul(const p256_int* n, p256_int* out_x, p256_int* out_y) { 1226db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem x, y, z; 1227db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1228db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root scalar_base_mult(x, y, z, n); 1229db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1230db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root { 1231db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem x_affine, y_affine; 1232db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1233db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root point_to_affine(x_affine, y_affine, x, y, z); 1234db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root from_montgomery(out_x, x_affine); 1235db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root from_montgomery(out_y, y_affine); 1236db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } 1237db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root} 1238db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1239db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root/* p256_points_mul_vartime sets {out_x,out_y} = n1*G + n2*{in_x,in_y}, where 1240db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * n1 and n2 are < the order of the group. 1241db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * 1242db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * As indicated by the name, this function operates in variable time. This 1243db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * is safe because it's used for signature validation which doesn't deal 1244db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * with secrets. */ 1245db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Rootvoid p256_points_mul_vartime( 1246db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root const p256_int* n1, const p256_int* n2, const p256_int* in_x, 1247db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root const p256_int* in_y, p256_int* out_x, p256_int* out_y) { 1248db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root felem x1, y1, z1, x2, y2, z2, px, py; 1249db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1250db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root /* If both scalars are zero, then the result is the point at infinity. */ 1251db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root if (p256_is_zero(n1) != 0 && p256_is_zero(n2) != 0) { 1252db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root p256_clear(out_x); 1253db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root p256_clear(out_y); 1254db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root return; 1255db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } 1256db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1257db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root to_montgomery(px, in_x); 1258db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root to_montgomery(py, in_y); 1259db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root scalar_base_mult(x1, y1, z1, n1); 1260db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root scalar_mult(x2, y2, z2, px, py, n2); 1261db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1262db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root if (p256_is_zero(n2) != 0) { 1263db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root /* If n2 == 0, then {x2,y2,z2} is zero and the result is just 1264db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * {x1,y1,z1}. */ 1265db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } else if (p256_is_zero(n1) != 0) { 1266db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root /* If n1 == 0, then {x1,y1,z1} is zero and the result is just 1267db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root * {x2,y2,z2}. */ 1268db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root memcpy(x1, x2, sizeof(x2)); 1269db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root memcpy(y1, y2, sizeof(y2)); 1270db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root memcpy(z1, z2, sizeof(z2)); 1271db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } else { 1272db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root /* This function handles the case where {x1,y1,z1} == {x2,y2,z2}. */ 1273db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root point_add_or_double_vartime(x1, y1, z1, x1, y1, z1, x2, y2, z2); 1274db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root } 1275db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root 1276db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root point_to_affine(px, py, x1, y1, z1); 1277db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root from_montgomery(out_x, px); 1278db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root from_montgomery(out_y, py); 1279db0850c3b637faaa7cbe1bab2e6c91ad2af35426Kenny Root} 1280