1/* Copyright (c) 2014, Intel Corporation. 2 * 3 * Permission to use, copy, modify, and/or distribute this software for any 4 * purpose with or without fee is hereby granted, provided that the above 5 * copyright notice and this permission notice appear in all copies. 6 * 7 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 10 * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION 12 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 13 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ 14 15/* Developers and authors: 16 * Shay Gueron (1, 2), and Vlad Krasnov (1) 17 * (1) Intel Corporation, Israel Development Center 18 * (2) University of Haifa 19 * Reference: 20 * S.Gueron and V.Krasnov, "Fast Prime Field Elliptic Curve Cryptography with 21 * 256 Bit Primes" */ 22 23#include <openssl/ec.h> 24 25#include <assert.h> 26#include <stdint.h> 27#include <string.h> 28 29#include <openssl/bn.h> 30#include <openssl/crypto.h> 31#include <openssl/err.h> 32 33#include "../bn/internal.h" 34#include "../internal.h" 35#include "internal.h" 36#include "p256-x86_64.h" 37 38 39#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && \ 40 !defined(OPENSSL_SMALL) 41 42typedef P256_POINT_AFFINE PRECOMP256_ROW[64]; 43 44/* One converted into the Montgomery domain */ 45static const BN_ULONG ONE[P256_LIMBS] = { 46 TOBN(0x00000000, 0x00000001), TOBN(0xffffffff, 0x00000000), 47 TOBN(0xffffffff, 0xffffffff), TOBN(0x00000000, 0xfffffffe), 48}; 49 50/* Precomputed tables for the default generator */ 51#include "p256-x86_64-table.h" 52 53/* Recode window to a signed digit, see util-64.c for details */ 54static unsigned booth_recode_w5(unsigned in) { 55 unsigned s, d; 56 57 s = ~((in >> 5) - 1); 58 d = (1 << 6) - in - 1; 59 d = (d & s) | (in & ~s); 60 d = (d >> 1) + (d & 1); 61 62 return (d << 1) + (s & 1); 63} 64 65static unsigned booth_recode_w7(unsigned in) { 66 unsigned s, d; 67 68 s = ~((in >> 7) - 1); 69 d = (1 << 8) - in - 1; 70 d = (d & s) | (in & ~s); 71 d = (d >> 1) + (d & 1); 72 73 return (d << 1) + (s & 1); 74} 75 76/* copy_conditional copies |src| to |dst| if |move| is one and leaves it as-is 77 * if |move| is zero. 78 * 79 * WARNING: this breaks the usual convention of constant-time functions 80 * returning masks. */ 81static void copy_conditional(BN_ULONG dst[P256_LIMBS], 82 const BN_ULONG src[P256_LIMBS], BN_ULONG move) { 83 BN_ULONG mask1 = ((BN_ULONG)0) - move; 84 BN_ULONG mask2 = ~mask1; 85 86 dst[0] = (src[0] & mask1) ^ (dst[0] & mask2); 87 dst[1] = (src[1] & mask1) ^ (dst[1] & mask2); 88 dst[2] = (src[2] & mask1) ^ (dst[2] & mask2); 89 dst[3] = (src[3] & mask1) ^ (dst[3] & mask2); 90 if (P256_LIMBS == 8) { 91 dst[4] = (src[4] & mask1) ^ (dst[4] & mask2); 92 dst[5] = (src[5] & mask1) ^ (dst[5] & mask2); 93 dst[6] = (src[6] & mask1) ^ (dst[6] & mask2); 94 dst[7] = (src[7] & mask1) ^ (dst[7] & mask2); 95 } 96} 97 98/* is_not_zero returns one iff in != 0 and zero otherwise. 99 * 100 * WARNING: this breaks the usual convention of constant-time functions 101 * returning masks. 102 * 103 * (define-fun is_not_zero ((in (_ BitVec 64))) (_ BitVec 64) 104 * (bvlshr (bvor in (bvsub #x0000000000000000 in)) #x000000000000003f) 105 * ) 106 * 107 * (declare-fun x () (_ BitVec 64)) 108 * 109 * (assert (and (= x #x0000000000000000) (= (is_not_zero x) #x0000000000000001))) 110 * (check-sat) 111 * 112 * (assert (and (not (= x #x0000000000000000)) (= (is_not_zero x) #x0000000000000000))) 113 * (check-sat) 114 * */ 115static BN_ULONG is_not_zero(BN_ULONG in) { 116 in |= (0 - in); 117 in >>= BN_BITS2 - 1; 118 return in; 119} 120 121/* ecp_nistz256_mod_inverse_mont sets |r| to (|in| * 2^-256)^-1 * 2^256 mod p. 122 * That is, |r| is the modular inverse of |in| for input and output in the 123 * Montgomery domain. */ 124static void ecp_nistz256_mod_inverse_mont(BN_ULONG r[P256_LIMBS], 125 const BN_ULONG in[P256_LIMBS]) { 126 /* The poly is ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff 127 ffffffff 128 We use FLT and used poly-2 as exponent */ 129 BN_ULONG p2[P256_LIMBS]; 130 BN_ULONG p4[P256_LIMBS]; 131 BN_ULONG p8[P256_LIMBS]; 132 BN_ULONG p16[P256_LIMBS]; 133 BN_ULONG p32[P256_LIMBS]; 134 BN_ULONG res[P256_LIMBS]; 135 int i; 136 137 ecp_nistz256_sqr_mont(res, in); 138 ecp_nistz256_mul_mont(p2, res, in); /* 3*p */ 139 140 ecp_nistz256_sqr_mont(res, p2); 141 ecp_nistz256_sqr_mont(res, res); 142 ecp_nistz256_mul_mont(p4, res, p2); /* f*p */ 143 144 ecp_nistz256_sqr_mont(res, p4); 145 ecp_nistz256_sqr_mont(res, res); 146 ecp_nistz256_sqr_mont(res, res); 147 ecp_nistz256_sqr_mont(res, res); 148 ecp_nistz256_mul_mont(p8, res, p4); /* ff*p */ 149 150 ecp_nistz256_sqr_mont(res, p8); 151 for (i = 0; i < 7; i++) { 152 ecp_nistz256_sqr_mont(res, res); 153 } 154 ecp_nistz256_mul_mont(p16, res, p8); /* ffff*p */ 155 156 ecp_nistz256_sqr_mont(res, p16); 157 for (i = 0; i < 15; i++) { 158 ecp_nistz256_sqr_mont(res, res); 159 } 160 ecp_nistz256_mul_mont(p32, res, p16); /* ffffffff*p */ 161 162 ecp_nistz256_sqr_mont(res, p32); 163 for (i = 0; i < 31; i++) { 164 ecp_nistz256_sqr_mont(res, res); 165 } 166 ecp_nistz256_mul_mont(res, res, in); 167 168 for (i = 0; i < 32 * 4; i++) { 169 ecp_nistz256_sqr_mont(res, res); 170 } 171 ecp_nistz256_mul_mont(res, res, p32); 172 173 for (i = 0; i < 32; i++) { 174 ecp_nistz256_sqr_mont(res, res); 175 } 176 ecp_nistz256_mul_mont(res, res, p32); 177 178 for (i = 0; i < 16; i++) { 179 ecp_nistz256_sqr_mont(res, res); 180 } 181 ecp_nistz256_mul_mont(res, res, p16); 182 183 for (i = 0; i < 8; i++) { 184 ecp_nistz256_sqr_mont(res, res); 185 } 186 ecp_nistz256_mul_mont(res, res, p8); 187 188 ecp_nistz256_sqr_mont(res, res); 189 ecp_nistz256_sqr_mont(res, res); 190 ecp_nistz256_sqr_mont(res, res); 191 ecp_nistz256_sqr_mont(res, res); 192 ecp_nistz256_mul_mont(res, res, p4); 193 194 ecp_nistz256_sqr_mont(res, res); 195 ecp_nistz256_sqr_mont(res, res); 196 ecp_nistz256_mul_mont(res, res, p2); 197 198 ecp_nistz256_sqr_mont(res, res); 199 ecp_nistz256_sqr_mont(res, res); 200 ecp_nistz256_mul_mont(r, res, in); 201} 202 203/* ecp_nistz256_bignum_to_field_elem copies the contents of |in| to |out| and 204 * returns one if it fits. Otherwise it returns zero. */ 205static int ecp_nistz256_bignum_to_field_elem(BN_ULONG out[P256_LIMBS], 206 const BIGNUM *in) { 207 if (in->top > P256_LIMBS) { 208 return 0; 209 } 210 211 OPENSSL_memset(out, 0, sizeof(BN_ULONG) * P256_LIMBS); 212 OPENSSL_memcpy(out, in->d, sizeof(BN_ULONG) * in->top); 213 return 1; 214} 215 216/* r = p * p_scalar */ 217static int ecp_nistz256_windowed_mul(const EC_GROUP *group, P256_POINT *r, 218 const EC_POINT *p, const BIGNUM *p_scalar, 219 BN_CTX *ctx) { 220 assert(p != NULL); 221 assert(p_scalar != NULL); 222 223 static const unsigned kWindowSize = 5; 224 static const unsigned kMask = (1 << (5 /* kWindowSize */ + 1)) - 1; 225 226 /* A |P256_POINT| is (3 * 32) = 96 bytes, and the 64-byte alignment should 227 * add no more than 63 bytes of overhead. Thus, |table| should require 228 * ~1599 ((96 * 16) + 63) bytes of stack space. */ 229 alignas(64) P256_POINT table[16]; 230 uint8_t p_str[33]; 231 232 233 int ret = 0; 234 BN_CTX *new_ctx = NULL; 235 int ctx_started = 0; 236 237 if (BN_num_bits(p_scalar) > 256 || BN_is_negative(p_scalar)) { 238 if (ctx == NULL) { 239 new_ctx = BN_CTX_new(); 240 if (new_ctx == NULL) { 241 OPENSSL_PUT_ERROR(EC, ERR_R_MALLOC_FAILURE); 242 goto err; 243 } 244 ctx = new_ctx; 245 } 246 BN_CTX_start(ctx); 247 ctx_started = 1; 248 BIGNUM *mod = BN_CTX_get(ctx); 249 if (mod == NULL) { 250 OPENSSL_PUT_ERROR(EC, ERR_R_MALLOC_FAILURE); 251 goto err; 252 } 253 if (!BN_nnmod(mod, p_scalar, &group->order, ctx)) { 254 OPENSSL_PUT_ERROR(EC, ERR_R_BN_LIB); 255 goto err; 256 } 257 p_scalar = mod; 258 } 259 260 int j; 261 for (j = 0; j < p_scalar->top * BN_BYTES; j += BN_BYTES) { 262 BN_ULONG d = p_scalar->d[j / BN_BYTES]; 263 264 p_str[j + 0] = d & 0xff; 265 p_str[j + 1] = (d >> 8) & 0xff; 266 p_str[j + 2] = (d >> 16) & 0xff; 267 p_str[j + 3] = (d >>= 24) & 0xff; 268 if (BN_BYTES == 8) { 269 d >>= 8; 270 p_str[j + 4] = d & 0xff; 271 p_str[j + 5] = (d >> 8) & 0xff; 272 p_str[j + 6] = (d >> 16) & 0xff; 273 p_str[j + 7] = (d >> 24) & 0xff; 274 } 275 } 276 277 for (; j < 33; j++) { 278 p_str[j] = 0; 279 } 280 281 /* table[0] is implicitly (0,0,0) (the point at infinity), therefore it is 282 * not stored. All other values are actually stored with an offset of -1 in 283 * table. */ 284 P256_POINT *row = table; 285 286 if (!ecp_nistz256_bignum_to_field_elem(row[1 - 1].X, &p->X) || 287 !ecp_nistz256_bignum_to_field_elem(row[1 - 1].Y, &p->Y) || 288 !ecp_nistz256_bignum_to_field_elem(row[1 - 1].Z, &p->Z)) { 289 OPENSSL_PUT_ERROR(EC, EC_R_COORDINATES_OUT_OF_RANGE); 290 goto err; 291 } 292 293 ecp_nistz256_point_double(&row[2 - 1], &row[1 - 1]); 294 ecp_nistz256_point_add(&row[3 - 1], &row[2 - 1], &row[1 - 1]); 295 ecp_nistz256_point_double(&row[4 - 1], &row[2 - 1]); 296 ecp_nistz256_point_double(&row[6 - 1], &row[3 - 1]); 297 ecp_nistz256_point_double(&row[8 - 1], &row[4 - 1]); 298 ecp_nistz256_point_double(&row[12 - 1], &row[6 - 1]); 299 ecp_nistz256_point_add(&row[5 - 1], &row[4 - 1], &row[1 - 1]); 300 ecp_nistz256_point_add(&row[7 - 1], &row[6 - 1], &row[1 - 1]); 301 ecp_nistz256_point_add(&row[9 - 1], &row[8 - 1], &row[1 - 1]); 302 ecp_nistz256_point_add(&row[13 - 1], &row[12 - 1], &row[1 - 1]); 303 ecp_nistz256_point_double(&row[14 - 1], &row[7 - 1]); 304 ecp_nistz256_point_double(&row[10 - 1], &row[5 - 1]); 305 ecp_nistz256_point_add(&row[15 - 1], &row[14 - 1], &row[1 - 1]); 306 ecp_nistz256_point_add(&row[11 - 1], &row[10 - 1], &row[1 - 1]); 307 ecp_nistz256_point_double(&row[16 - 1], &row[8 - 1]); 308 309 BN_ULONG tmp[P256_LIMBS]; 310 alignas(32) P256_POINT h; 311 unsigned index = 255; 312 unsigned wvalue = p_str[(index - 1) / 8]; 313 wvalue = (wvalue >> ((index - 1) % 8)) & kMask; 314 315 ecp_nistz256_select_w5(r, table, booth_recode_w5(wvalue) >> 1); 316 317 while (index >= 5) { 318 if (index != 255) { 319 unsigned off = (index - 1) / 8; 320 321 wvalue = p_str[off] | p_str[off + 1] << 8; 322 wvalue = (wvalue >> ((index - 1) % 8)) & kMask; 323 324 wvalue = booth_recode_w5(wvalue); 325 326 ecp_nistz256_select_w5(&h, table, wvalue >> 1); 327 328 ecp_nistz256_neg(tmp, h.Y); 329 copy_conditional(h.Y, tmp, (wvalue & 1)); 330 331 ecp_nistz256_point_add(r, r, &h); 332 } 333 334 index -= kWindowSize; 335 336 ecp_nistz256_point_double(r, r); 337 ecp_nistz256_point_double(r, r); 338 ecp_nistz256_point_double(r, r); 339 ecp_nistz256_point_double(r, r); 340 ecp_nistz256_point_double(r, r); 341 } 342 343 /* Final window */ 344 wvalue = p_str[0]; 345 wvalue = (wvalue << 1) & kMask; 346 347 wvalue = booth_recode_w5(wvalue); 348 349 ecp_nistz256_select_w5(&h, table, wvalue >> 1); 350 351 ecp_nistz256_neg(tmp, h.Y); 352 copy_conditional(h.Y, tmp, wvalue & 1); 353 354 ecp_nistz256_point_add(r, r, &h); 355 356 ret = 1; 357 358err: 359 if (ctx_started) { 360 BN_CTX_end(ctx); 361 } 362 BN_CTX_free(new_ctx); 363 return ret; 364} 365 366static int ecp_nistz256_points_mul( 367 const EC_GROUP *group, EC_POINT *r, const BIGNUM *g_scalar, 368 const EC_POINT *p_, const BIGNUM *p_scalar, BN_CTX *ctx) { 369 assert((p_ != NULL) == (p_scalar != NULL)); 370 371 static const unsigned kWindowSize = 7; 372 static const unsigned kMask = (1 << (7 /* kWindowSize */ + 1)) - 1; 373 374 alignas(32) union { 375 P256_POINT p; 376 P256_POINT_AFFINE a; 377 } t, p; 378 379 int ret = 0; 380 BN_CTX *new_ctx = NULL; 381 int ctx_started = 0; 382 383 if (g_scalar != NULL) { 384 if (BN_num_bits(g_scalar) > 256 || BN_is_negative(g_scalar)) { 385 if (ctx == NULL) { 386 new_ctx = BN_CTX_new(); 387 if (new_ctx == NULL) { 388 goto err; 389 } 390 ctx = new_ctx; 391 } 392 BN_CTX_start(ctx); 393 ctx_started = 1; 394 BIGNUM *tmp_scalar = BN_CTX_get(ctx); 395 if (tmp_scalar == NULL) { 396 goto err; 397 } 398 399 if (!BN_nnmod(tmp_scalar, g_scalar, &group->order, ctx)) { 400 OPENSSL_PUT_ERROR(EC, ERR_R_BN_LIB); 401 goto err; 402 } 403 g_scalar = tmp_scalar; 404 } 405 406 uint8_t p_str[33] = {0}; 407 int i; 408 for (i = 0; i < g_scalar->top * BN_BYTES; i += BN_BYTES) { 409 BN_ULONG d = g_scalar->d[i / BN_BYTES]; 410 411 p_str[i + 0] = d & 0xff; 412 p_str[i + 1] = (d >> 8) & 0xff; 413 p_str[i + 2] = (d >> 16) & 0xff; 414 p_str[i + 3] = (d >>= 24) & 0xff; 415 if (BN_BYTES == 8) { 416 d >>= 8; 417 p_str[i + 4] = d & 0xff; 418 p_str[i + 5] = (d >> 8) & 0xff; 419 p_str[i + 6] = (d >> 16) & 0xff; 420 p_str[i + 7] = (d >> 24) & 0xff; 421 } 422 } 423 424 for (; i < (int) sizeof(p_str); i++) { 425 p_str[i] = 0; 426 } 427 428 /* First window */ 429 unsigned wvalue = (p_str[0] << 1) & kMask; 430 unsigned index = kWindowSize; 431 432 wvalue = booth_recode_w7(wvalue); 433 434 const PRECOMP256_ROW *const precomputed_table = 435 (const PRECOMP256_ROW *)ecp_nistz256_precomputed; 436 ecp_nistz256_select_w7(&p.a, precomputed_table[0], wvalue >> 1); 437 438 ecp_nistz256_neg(p.p.Z, p.p.Y); 439 copy_conditional(p.p.Y, p.p.Z, wvalue & 1); 440 441 /* Convert |p| from affine to Jacobian coordinates. We set Z to zero if |p| 442 * is infinity and |ONE| otherwise. |p| was computed from the table, so it 443 * is infinity iff |wvalue >> 1| is zero. */ 444 OPENSSL_memset(p.p.Z, 0, sizeof(p.p.Z)); 445 copy_conditional(p.p.Z, ONE, is_not_zero(wvalue >> 1)); 446 447 for (i = 1; i < 37; i++) { 448 unsigned off = (index - 1) / 8; 449 wvalue = p_str[off] | p_str[off + 1] << 8; 450 wvalue = (wvalue >> ((index - 1) % 8)) & kMask; 451 index += kWindowSize; 452 453 wvalue = booth_recode_w7(wvalue); 454 455 ecp_nistz256_select_w7(&t.a, precomputed_table[i], wvalue >> 1); 456 457 ecp_nistz256_neg(t.p.Z, t.a.Y); 458 copy_conditional(t.a.Y, t.p.Z, wvalue & 1); 459 460 ecp_nistz256_point_add_affine(&p.p, &p.p, &t.a); 461 } 462 } 463 464 const int p_is_infinity = g_scalar == NULL; 465 if (p_scalar != NULL) { 466 P256_POINT *out = &t.p; 467 if (p_is_infinity) { 468 out = &p.p; 469 } 470 471 if (!ecp_nistz256_windowed_mul(group, out, p_, p_scalar, ctx)) { 472 goto err; 473 } 474 475 if (!p_is_infinity) { 476 ecp_nistz256_point_add(&p.p, &p.p, out); 477 } 478 } 479 480 /* Not constant-time, but we're only operating on the public output. */ 481 if (!bn_set_words(&r->X, p.p.X, P256_LIMBS) || 482 !bn_set_words(&r->Y, p.p.Y, P256_LIMBS) || 483 !bn_set_words(&r->Z, p.p.Z, P256_LIMBS)) { 484 return 0; 485 } 486 487 ret = 1; 488 489err: 490 if (ctx_started) { 491 BN_CTX_end(ctx); 492 } 493 BN_CTX_free(new_ctx); 494 return ret; 495} 496 497static int ecp_nistz256_get_affine(const EC_GROUP *group, const EC_POINT *point, 498 BIGNUM *x, BIGNUM *y, BN_CTX *ctx) { 499 BN_ULONG z_inv2[P256_LIMBS]; 500 BN_ULONG z_inv3[P256_LIMBS]; 501 BN_ULONG point_x[P256_LIMBS], point_y[P256_LIMBS], point_z[P256_LIMBS]; 502 503 if (EC_POINT_is_at_infinity(group, point)) { 504 OPENSSL_PUT_ERROR(EC, EC_R_POINT_AT_INFINITY); 505 return 0; 506 } 507 508 if (!ecp_nistz256_bignum_to_field_elem(point_x, &point->X) || 509 !ecp_nistz256_bignum_to_field_elem(point_y, &point->Y) || 510 !ecp_nistz256_bignum_to_field_elem(point_z, &point->Z)) { 511 OPENSSL_PUT_ERROR(EC, EC_R_COORDINATES_OUT_OF_RANGE); 512 return 0; 513 } 514 515 ecp_nistz256_mod_inverse_mont(z_inv3, point_z); 516 ecp_nistz256_sqr_mont(z_inv2, z_inv3); 517 518 /* Instead of using |ecp_nistz256_from_mont| to convert the |x| coordinate 519 * and then calling |ecp_nistz256_from_mont| again to convert the |y| 520 * coordinate below, convert the common factor |z_inv2| once now, saving one 521 * reduction. */ 522 ecp_nistz256_from_mont(z_inv2, z_inv2); 523 524 if (x != NULL) { 525 BN_ULONG x_aff[P256_LIMBS]; 526 ecp_nistz256_mul_mont(x_aff, z_inv2, point_x); 527 if (!bn_set_words(x, x_aff, P256_LIMBS)) { 528 OPENSSL_PUT_ERROR(EC, ERR_R_MALLOC_FAILURE); 529 return 0; 530 } 531 } 532 533 if (y != NULL) { 534 BN_ULONG y_aff[P256_LIMBS]; 535 ecp_nistz256_mul_mont(z_inv3, z_inv3, z_inv2); 536 ecp_nistz256_mul_mont(y_aff, z_inv3, point_y); 537 if (!bn_set_words(y, y_aff, P256_LIMBS)) { 538 OPENSSL_PUT_ERROR(EC, ERR_R_MALLOC_FAILURE); 539 return 0; 540 } 541 } 542 543 return 1; 544} 545 546 547const EC_METHOD EC_GFp_nistz256_method = { 548 ec_GFp_mont_group_init, 549 ec_GFp_mont_group_finish, 550 ec_GFp_mont_group_copy, 551 ec_GFp_mont_group_set_curve, 552 ecp_nistz256_get_affine, 553 ecp_nistz256_points_mul, 554 ec_GFp_mont_field_mul, 555 ec_GFp_mont_field_sqr, 556 ec_GFp_mont_field_encode, 557 ec_GFp_mont_field_decode, 558}; 559 560#endif /* !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && \ 561 !defined(OPENSSL_SMALL) */ 562