1d9e397b599b13d642138480a28c14db7a136bf0Adam Langley/***************************************************************************** 2d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* * 3d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* Copyright (c) 2012, Intel Corporation * 4d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* * 5d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* All rights reserved. * 6d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* * 7d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* Redistribution and use in source and binary forms, with or without * 8d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* modification, are permitted provided that the following conditions are * 9d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* met: * 10d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* * 11d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* * Redistributions of source code must retain the above copyright * 12d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* notice, this list of conditions and the following disclaimer. * 13d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* * 14d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* * Redistributions in binary form must reproduce the above copyright * 15d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* notice, this list of conditions and the following disclaimer in the * 16d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* documentation and/or other materials provided with the * 17d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* distribution. * 18d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* * 19d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* * Neither the name of the Intel Corporation nor the names of its * 20d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* contributors may be used to endorse or promote products derived from * 21d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* this software without specific prior written permission. * 22d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* * 23d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* * 24d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY * 25d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * 26d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * 27d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR * 28d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * 29d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * 30d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * 31d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * 32d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * 33d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * 34d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * 35d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* * 36d9e397b599b13d642138480a28c14db7a136bf0Adam Langley****************************************************************************** 37d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* Developers and authors: * 38d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* Shay Gueron (1, 2), and Vlad Krasnov (1) * 39d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* (1) Intel Corporation, Israel Development Center, Haifa, Israel * 40d9e397b599b13d642138480a28c14db7a136bf0Adam Langley* (2) University of Haifa, Israel * 41d9e397b599b13d642138480a28c14db7a136bf0Adam Langley*****************************************************************************/ 42d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 43d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#include <openssl/base.h> 44d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 45d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) 46d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 47d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#include "rsaz_exp.h" 48d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 49d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#include <openssl/mem.h> 50d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 51d9e397b599b13d642138480a28c14db7a136bf0Adam Langley/* 52d9e397b599b13d642138480a28c14db7a136bf0Adam Langley * See crypto/bn/asm/rsaz-avx2.pl for further details. 53d9e397b599b13d642138480a28c14db7a136bf0Adam Langley */ 54d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyvoid rsaz_1024_norm2red_avx2(void *red,const void *norm); 55d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyvoid rsaz_1024_mul_avx2(void *ret,const void *a,const void *b,const void *n,BN_ULONG k); 56d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyvoid rsaz_1024_sqr_avx2(void *ret,const void *a,const void *n,BN_ULONG k,int cnt); 57d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyvoid rsaz_1024_scatter5_avx2(void *tbl,const void *val,int i); 58d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyvoid rsaz_1024_gather5_avx2(void *val,const void *tbl,int i); 59d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyvoid rsaz_1024_red2norm_avx2(void *norm,const void *red); 60d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 61d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#if defined(__GNUC__) 62d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# define ALIGN64 __attribute__((aligned(64))) 63d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#elif defined(_MSC_VER) 64d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# define ALIGN64 __declspec(align(64)) 65d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#elif defined(__SUNPRO_C) 66d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# define ALIGN64 67d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# pragma align 64(one,two80) 68d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#else 69d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# define ALIGN64 /* not fatal, might hurt performance a little */ 70d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#endif 71d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 72d9e397b599b13d642138480a28c14db7a136bf0Adam LangleyALIGN64 static const BN_ULONG one[40] = 73d9e397b599b13d642138480a28c14db7a136bf0Adam Langley {1,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; 74d9e397b599b13d642138480a28c14db7a136bf0Adam LangleyALIGN64 static const BN_ULONG two80[40] = 75d9e397b599b13d642138480a28c14db7a136bf0Adam Langley {0,0,1<<22,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; 76d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 77d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyvoid RSAZ_1024_mod_exp_avx2(BN_ULONG result_norm[16], 78d9e397b599b13d642138480a28c14db7a136bf0Adam Langley const BN_ULONG base_norm[16], const BN_ULONG exponent[16], 79d9e397b599b13d642138480a28c14db7a136bf0Adam Langley const BN_ULONG m_norm[16], const BN_ULONG RR[16], BN_ULONG k0) 80d9e397b599b13d642138480a28c14db7a136bf0Adam Langley{ 81d9e397b599b13d642138480a28c14db7a136bf0Adam Langley unsigned char storage[320*3+32*9*16+64]; /* 5.5KB */ 82d9e397b599b13d642138480a28c14db7a136bf0Adam Langley unsigned char *p_str = storage + (64-((size_t)storage%64)); 83d9e397b599b13d642138480a28c14db7a136bf0Adam Langley unsigned char *a_inv, *m, *result, 84d9e397b599b13d642138480a28c14db7a136bf0Adam Langley *table_s = p_str+320*3, 85d9e397b599b13d642138480a28c14db7a136bf0Adam Langley *R2 = table_s; /* borrow */ 86d9e397b599b13d642138480a28c14db7a136bf0Adam Langley int index; 87d9e397b599b13d642138480a28c14db7a136bf0Adam Langley int wvalue; 88d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 89d9e397b599b13d642138480a28c14db7a136bf0Adam Langley if ((((size_t)p_str&4095)+320)>>12) { 90d9e397b599b13d642138480a28c14db7a136bf0Adam Langley result = p_str; 91d9e397b599b13d642138480a28c14db7a136bf0Adam Langley a_inv = p_str + 320; 92d9e397b599b13d642138480a28c14db7a136bf0Adam Langley m = p_str + 320*2; /* should not cross page */ 93d9e397b599b13d642138480a28c14db7a136bf0Adam Langley } else { 94d9e397b599b13d642138480a28c14db7a136bf0Adam Langley m = p_str; /* should not cross page */ 95d9e397b599b13d642138480a28c14db7a136bf0Adam Langley result = p_str + 320; 96d9e397b599b13d642138480a28c14db7a136bf0Adam Langley a_inv = p_str + 320*2; 97d9e397b599b13d642138480a28c14db7a136bf0Adam Langley } 98d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 99d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_norm2red_avx2(m, m_norm); 100d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_norm2red_avx2(a_inv, base_norm); 101d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_norm2red_avx2(R2, RR); 102d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 103d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_mul_avx2(R2, R2, R2, m, k0); 104d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_mul_avx2(R2, R2, two80, m, k0); 105d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 106d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table[0] = 1 */ 107d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_mul_avx2(result, R2, one, m, k0); 108d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table[1] = a_inv^1 */ 109d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_mul_avx2(a_inv, a_inv, R2, m, k0); 110d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 111d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_scatter5_avx2(table_s,result,0); 112d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_scatter5_avx2(table_s,a_inv,1); 113d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 114d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table[2] = a_inv^2 */ 115d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_sqr_avx2(result, a_inv, m, k0, 1); 116d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_scatter5_avx2(table_s,result,2); 117d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#if 0 118d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* this is almost 2x smaller and less than 1% slower */ 119d9e397b599b13d642138480a28c14db7a136bf0Adam Langley for (index=3; index<32; index++) { 120d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 121d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_scatter5_avx2(table_s,result,index); 122d9e397b599b13d642138480a28c14db7a136bf0Adam Langley } 123d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#else 124d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table[4] = a_inv^4 */ 125d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_sqr_avx2(result, result, m, k0, 1); 126d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_scatter5_avx2(table_s,result,4); 127d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table[8] = a_inv^8 */ 128d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_sqr_avx2(result, result, m, k0, 1); 129d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_scatter5_avx2(table_s,result,8); 130d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table[16] = a_inv^16 */ 131d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_sqr_avx2(result, result, m, k0, 1); 132d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_scatter5_avx2(table_s,result,16); 133d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table[17] = a_inv^17 */ 134d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 135d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_scatter5_avx2(table_s,result,17); 136d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 137d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table[3] */ 138d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_gather5_avx2(result,table_s,2); 139d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_mul_avx2(result,result,a_inv,m,k0); 140d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_scatter5_avx2(table_s,result,3); 141d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table[6] */ 142d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_sqr_avx2(result, result, m, k0, 1); 143d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_scatter5_avx2(table_s,result,6); 144d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table[12] */ 145d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_sqr_avx2(result, result, m, k0, 1); 146d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_scatter5_avx2(table_s,result,12); 147d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table[24] */ 148d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_sqr_avx2(result, result, m, k0, 1); 149d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_scatter5_avx2(table_s,result,24); 150d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table[25] */ 151d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 152d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_scatter5_avx2(table_s,result,25); 153d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 154d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table[5] */ 155d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_gather5_avx2(result,table_s,4); 156d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_mul_avx2(result,result,a_inv,m,k0); 157d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_scatter5_avx2(table_s,result,5); 158d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table[10] */ 159d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_sqr_avx2(result, result, m, k0, 1); 160d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_scatter5_avx2(table_s,result,10); 161d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table[20] */ 162d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_sqr_avx2(result, result, m, k0, 1); 163d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_scatter5_avx2(table_s,result,20); 164d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table[21] */ 165d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 166d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_scatter5_avx2(table_s,result,21); 167d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 168d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table[7] */ 169d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_gather5_avx2(result,table_s,6); 170d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_mul_avx2(result,result,a_inv,m,k0); 171d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_scatter5_avx2(table_s,result,7); 172d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table[14] */ 173d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_sqr_avx2(result, result, m, k0, 1); 174d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_scatter5_avx2(table_s,result,14); 175d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table[28] */ 176d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_sqr_avx2(result, result, m, k0, 1); 177d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_scatter5_avx2(table_s,result,28); 178d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table[29] */ 179d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 180d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_scatter5_avx2(table_s,result,29); 181d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 182d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table[9] */ 183d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_gather5_avx2(result,table_s,8); 184d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_mul_avx2(result,result,a_inv,m,k0); 185d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_scatter5_avx2(table_s,result,9); 186d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table[18] */ 187d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_sqr_avx2(result, result, m, k0, 1); 188d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_scatter5_avx2(table_s,result,18); 189d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table[19] */ 190d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 191d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_scatter5_avx2(table_s,result,19); 192d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 193d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table[11] */ 194d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_gather5_avx2(result,table_s,10); 195d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_mul_avx2(result,result,a_inv,m,k0); 196d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_scatter5_avx2(table_s,result,11); 197d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table[22] */ 198d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_sqr_avx2(result, result, m, k0, 1); 199d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_scatter5_avx2(table_s,result,22); 200d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table[23] */ 201d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 202d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_scatter5_avx2(table_s,result,23); 203d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 204d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table[13] */ 205d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_gather5_avx2(result,table_s,12); 206d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_mul_avx2(result,result,a_inv,m,k0); 207d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_scatter5_avx2(table_s,result,13); 208d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table[26] */ 209d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_sqr_avx2(result, result, m, k0, 1); 210d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_scatter5_avx2(table_s,result,26); 211d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table[27] */ 212d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 213d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_scatter5_avx2(table_s,result,27); 214d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 215d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table[15] */ 216d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_gather5_avx2(result,table_s,14); 217d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_mul_avx2(result,result,a_inv,m,k0); 218d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_scatter5_avx2(table_s,result,15); 219d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table[30] */ 220d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_sqr_avx2(result, result, m, k0, 1); 221d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_scatter5_avx2(table_s,result,30); 222d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table[31] */ 223d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 224d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_scatter5_avx2(table_s,result,31); 225d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#endif 226d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 227d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* load first window */ 228d9e397b599b13d642138480a28c14db7a136bf0Adam Langley p_str = (unsigned char*)exponent; 229d9e397b599b13d642138480a28c14db7a136bf0Adam Langley wvalue = p_str[127] >> 3; 230d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_gather5_avx2(result,table_s,wvalue); 231d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 232d9e397b599b13d642138480a28c14db7a136bf0Adam Langley index = 1014; 233d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 234d9e397b599b13d642138480a28c14db7a136bf0Adam Langley while(index > -1) { /* loop for the remaining 127 windows */ 235d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 236d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_sqr_avx2(result, result, m, k0, 5); 237d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 238d9e397b599b13d642138480a28c14db7a136bf0Adam Langley wvalue = *((unsigned short*)&p_str[index/8]); 239d9e397b599b13d642138480a28c14db7a136bf0Adam Langley wvalue = (wvalue>> (index%8)) & 31; 240d9e397b599b13d642138480a28c14db7a136bf0Adam Langley index-=5; 241d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 242d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_gather5_avx2(a_inv,table_s,wvalue); /* borrow a_inv */ 243d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 244d9e397b599b13d642138480a28c14db7a136bf0Adam Langley } 245d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 246d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* square four times */ 247d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_sqr_avx2(result, result, m, k0, 4); 248d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 249d9e397b599b13d642138480a28c14db7a136bf0Adam Langley wvalue = p_str[0] & 15; 250d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 251d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_gather5_avx2(a_inv,table_s,wvalue); /* borrow a_inv */ 252d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 253d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 254d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* from Montgomery */ 255d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_mul_avx2(result, result, one, m, k0); 256d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 257d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_1024_red2norm_avx2(result_norm, result); 258d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 259d9e397b599b13d642138480a28c14db7a136bf0Adam Langley OPENSSL_cleanse(storage,sizeof(storage)); 260d9e397b599b13d642138480a28c14db7a136bf0Adam Langley} 261d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 262d9e397b599b13d642138480a28c14db7a136bf0Adam Langley/* 263d9e397b599b13d642138480a28c14db7a136bf0Adam Langley * See crypto/bn/rsaz-x86_64.pl for further details. 264d9e397b599b13d642138480a28c14db7a136bf0Adam Langley */ 265d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyvoid rsaz_512_mul(void *ret,const void *a,const void *b,const void *n,BN_ULONG k); 266d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyvoid rsaz_512_mul_scatter4(void *ret,const void *a,const void *n,BN_ULONG k,const void *tbl,unsigned int power); 267d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyvoid rsaz_512_mul_gather4(void *ret,const void *a,const void *tbl,const void *n,BN_ULONG k,unsigned int power); 268d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyvoid rsaz_512_mul_by_one(void *ret,const void *a,const void *n,BN_ULONG k); 269d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyvoid rsaz_512_sqr(void *ret,const void *a,const void *n,BN_ULONG k,int cnt); 270d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyvoid rsaz_512_scatter4(void *tbl, const BN_ULONG *val, int power); 271d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyvoid rsaz_512_gather4(BN_ULONG *val, const void *tbl, int power); 272d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 273d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyvoid RSAZ_512_mod_exp(BN_ULONG result[8], 274d9e397b599b13d642138480a28c14db7a136bf0Adam Langley const BN_ULONG base[8], const BN_ULONG exponent[8], 275d9e397b599b13d642138480a28c14db7a136bf0Adam Langley const BN_ULONG m[8], BN_ULONG k0, const BN_ULONG RR[8]) 276d9e397b599b13d642138480a28c14db7a136bf0Adam Langley{ 277d9e397b599b13d642138480a28c14db7a136bf0Adam Langley unsigned char storage[16*8*8+64*2+64]; /* 1.2KB */ 278d9e397b599b13d642138480a28c14db7a136bf0Adam Langley unsigned char *table = storage + (64-((size_t)storage%64)); 279d9e397b599b13d642138480a28c14db7a136bf0Adam Langley BN_ULONG *a_inv = (BN_ULONG *)(table+16*8*8), 280d9e397b599b13d642138480a28c14db7a136bf0Adam Langley *temp = (BN_ULONG *)(table+16*8*8+8*8); 281d9e397b599b13d642138480a28c14db7a136bf0Adam Langley unsigned char *p_str = (unsigned char*)exponent; 282d9e397b599b13d642138480a28c14db7a136bf0Adam Langley int index; 283d9e397b599b13d642138480a28c14db7a136bf0Adam Langley unsigned int wvalue; 284d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 285d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table[0] = 1_inv */ 286d9e397b599b13d642138480a28c14db7a136bf0Adam Langley temp[0] = 0-m[0]; temp[1] = ~m[1]; 287d9e397b599b13d642138480a28c14db7a136bf0Adam Langley temp[2] = ~m[2]; temp[3] = ~m[3]; 288d9e397b599b13d642138480a28c14db7a136bf0Adam Langley temp[4] = ~m[4]; temp[5] = ~m[5]; 289d9e397b599b13d642138480a28c14db7a136bf0Adam Langley temp[6] = ~m[6]; temp[7] = ~m[7]; 290d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_512_scatter4(table, temp, 0); 291d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 292d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table [1] = a_inv^1 */ 293d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_512_mul(a_inv, base, RR, m, k0); 294d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_512_scatter4(table, a_inv, 1); 295d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 296d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* table [2] = a_inv^2 */ 297d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_512_sqr(temp, a_inv, m, k0, 1); 298d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_512_scatter4(table, temp, 2); 299d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 300d9e397b599b13d642138480a28c14db7a136bf0Adam Langley for (index=3; index<16; index++) 301d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_512_mul_scatter4(temp, a_inv, m, k0, table, index); 302d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 303d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* load first window */ 304d9e397b599b13d642138480a28c14db7a136bf0Adam Langley wvalue = p_str[63]; 305d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 306d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_512_gather4(temp, table, wvalue>>4); 307d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_512_sqr(temp, temp, m, k0, 4); 308d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue&0xf); 309d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 310d9e397b599b13d642138480a28c14db7a136bf0Adam Langley for (index=62; index>=0; index--) { 311d9e397b599b13d642138480a28c14db7a136bf0Adam Langley wvalue = p_str[index]; 312d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 313d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_512_sqr(temp, temp, m, k0, 4); 314d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue>>4); 315d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 316d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_512_sqr(temp, temp, m, k0, 4); 317d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue&0x0f); 318d9e397b599b13d642138480a28c14db7a136bf0Adam Langley } 319d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 320d9e397b599b13d642138480a28c14db7a136bf0Adam Langley /* from Montgomery */ 321d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rsaz_512_mul_by_one(result, temp, m, k0); 322d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 323d9e397b599b13d642138480a28c14db7a136bf0Adam Langley OPENSSL_cleanse(storage,sizeof(storage)); 324d9e397b599b13d642138480a28c14db7a136bf0Adam Langley} 325d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 326d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#endif /* OPENSSL_X86_64 */ 327