1/* 2 * Cryptographic API. 3 * 4 * Glue code for the SHA512 Secure Hash Algorithm assembler 5 * implementation using supplemental SSE3 / AVX / AVX2 instructions. 6 * 7 * This file is based on sha512_generic.c 8 * 9 * Copyright (C) 2013 Intel Corporation 10 * Author: Tim Chen <tim.c.chen@linux.intel.com> 11 * 12 * This program is free software; you can redistribute it and/or modify it 13 * under the terms of the GNU General Public License as published by the Free 14 * Software Foundation; either version 2 of the License, or (at your option) 15 * any later version. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 21 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 22 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 * SOFTWARE. 25 * 26 */ 27 28#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 29 30#include <crypto/internal/hash.h> 31#include <linux/init.h> 32#include <linux/module.h> 33#include <linux/mm.h> 34#include <linux/cryptohash.h> 35#include <linux/types.h> 36#include <crypto/sha.h> 37#include <asm/byteorder.h> 38#include <asm/i387.h> 39#include <asm/xcr.h> 40#include <asm/xsave.h> 41 42#include <linux/string.h> 43 44asmlinkage void sha512_transform_ssse3(const char *data, u64 *digest, 45 u64 rounds); 46#ifdef CONFIG_AS_AVX 47asmlinkage void sha512_transform_avx(const char *data, u64 *digest, 48 u64 rounds); 49#endif 50#ifdef CONFIG_AS_AVX2 51asmlinkage void sha512_transform_rorx(const char *data, u64 *digest, 52 u64 rounds); 53#endif 54 55static asmlinkage void (*sha512_transform_asm)(const char *, u64 *, u64); 56 57 58static int sha512_ssse3_init(struct shash_desc *desc) 59{ 60 struct sha512_state *sctx = shash_desc_ctx(desc); 61 62 sctx->state[0] = SHA512_H0; 63 sctx->state[1] = SHA512_H1; 64 sctx->state[2] = SHA512_H2; 65 sctx->state[3] = SHA512_H3; 66 sctx->state[4] = SHA512_H4; 67 sctx->state[5] = SHA512_H5; 68 sctx->state[6] = SHA512_H6; 69 sctx->state[7] = SHA512_H7; 70 sctx->count[0] = sctx->count[1] = 0; 71 72 return 0; 73} 74 75static int __sha512_ssse3_update(struct shash_desc *desc, const u8 *data, 76 unsigned int len, unsigned int partial) 77{ 78 struct sha512_state *sctx = shash_desc_ctx(desc); 79 unsigned int done = 0; 80 81 sctx->count[0] += len; 82 if (sctx->count[0] < len) 83 sctx->count[1]++; 84 85 if (partial) { 86 done = SHA512_BLOCK_SIZE - partial; 87 memcpy(sctx->buf + partial, data, done); 88 sha512_transform_asm(sctx->buf, sctx->state, 1); 89 } 90 91 if (len - done >= SHA512_BLOCK_SIZE) { 92 const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE; 93 94 sha512_transform_asm(data + done, sctx->state, (u64) rounds); 95 96 done += rounds * SHA512_BLOCK_SIZE; 97 } 98 99 memcpy(sctx->buf, data + done, len - done); 100 101 return 0; 102} 103 104static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data, 105 unsigned int len) 106{ 107 struct sha512_state *sctx = shash_desc_ctx(desc); 108 unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE; 109 int res; 110 111 /* Handle the fast case right here */ 112 if (partial + len < SHA512_BLOCK_SIZE) { 113 sctx->count[0] += len; 114 if (sctx->count[0] < len) 115 sctx->count[1]++; 116 memcpy(sctx->buf + partial, data, len); 117 118 return 0; 119 } 120 121 if (!irq_fpu_usable()) { 122 res = crypto_sha512_update(desc, data, len); 123 } else { 124 kernel_fpu_begin(); 125 res = __sha512_ssse3_update(desc, data, len, partial); 126 kernel_fpu_end(); 127 } 128 129 return res; 130} 131 132 133/* Add padding and return the message digest. */ 134static int sha512_ssse3_final(struct shash_desc *desc, u8 *out) 135{ 136 struct sha512_state *sctx = shash_desc_ctx(desc); 137 unsigned int i, index, padlen; 138 __be64 *dst = (__be64 *)out; 139 __be64 bits[2]; 140 static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, }; 141 142 /* save number of bits */ 143 bits[1] = cpu_to_be64(sctx->count[0] << 3); 144 bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61); 145 146 /* Pad out to 112 mod 128 and append length */ 147 index = sctx->count[0] & 0x7f; 148 padlen = (index < 112) ? (112 - index) : ((128+112) - index); 149 150 if (!irq_fpu_usable()) { 151 crypto_sha512_update(desc, padding, padlen); 152 crypto_sha512_update(desc, (const u8 *)&bits, sizeof(bits)); 153 } else { 154 kernel_fpu_begin(); 155 /* We need to fill a whole block for __sha512_ssse3_update() */ 156 if (padlen <= 112) { 157 sctx->count[0] += padlen; 158 if (sctx->count[0] < padlen) 159 sctx->count[1]++; 160 memcpy(sctx->buf + index, padding, padlen); 161 } else { 162 __sha512_ssse3_update(desc, padding, padlen, index); 163 } 164 __sha512_ssse3_update(desc, (const u8 *)&bits, 165 sizeof(bits), 112); 166 kernel_fpu_end(); 167 } 168 169 /* Store state in digest */ 170 for (i = 0; i < 8; i++) 171 dst[i] = cpu_to_be64(sctx->state[i]); 172 173 /* Wipe context */ 174 memset(sctx, 0, sizeof(*sctx)); 175 176 return 0; 177} 178 179static int sha512_ssse3_export(struct shash_desc *desc, void *out) 180{ 181 struct sha512_state *sctx = shash_desc_ctx(desc); 182 183 memcpy(out, sctx, sizeof(*sctx)); 184 185 return 0; 186} 187 188static int sha512_ssse3_import(struct shash_desc *desc, const void *in) 189{ 190 struct sha512_state *sctx = shash_desc_ctx(desc); 191 192 memcpy(sctx, in, sizeof(*sctx)); 193 194 return 0; 195} 196 197static int sha384_ssse3_init(struct shash_desc *desc) 198{ 199 struct sha512_state *sctx = shash_desc_ctx(desc); 200 201 sctx->state[0] = SHA384_H0; 202 sctx->state[1] = SHA384_H1; 203 sctx->state[2] = SHA384_H2; 204 sctx->state[3] = SHA384_H3; 205 sctx->state[4] = SHA384_H4; 206 sctx->state[5] = SHA384_H5; 207 sctx->state[6] = SHA384_H6; 208 sctx->state[7] = SHA384_H7; 209 210 sctx->count[0] = sctx->count[1] = 0; 211 212 return 0; 213} 214 215static int sha384_ssse3_final(struct shash_desc *desc, u8 *hash) 216{ 217 u8 D[SHA512_DIGEST_SIZE]; 218 219 sha512_ssse3_final(desc, D); 220 221 memcpy(hash, D, SHA384_DIGEST_SIZE); 222 memset(D, 0, SHA512_DIGEST_SIZE); 223 224 return 0; 225} 226 227static struct shash_alg algs[] = { { 228 .digestsize = SHA512_DIGEST_SIZE, 229 .init = sha512_ssse3_init, 230 .update = sha512_ssse3_update, 231 .final = sha512_ssse3_final, 232 .export = sha512_ssse3_export, 233 .import = sha512_ssse3_import, 234 .descsize = sizeof(struct sha512_state), 235 .statesize = sizeof(struct sha512_state), 236 .base = { 237 .cra_name = "sha512", 238 .cra_driver_name = "sha512-ssse3", 239 .cra_priority = 150, 240 .cra_flags = CRYPTO_ALG_TYPE_SHASH, 241 .cra_blocksize = SHA512_BLOCK_SIZE, 242 .cra_module = THIS_MODULE, 243 } 244}, { 245 .digestsize = SHA384_DIGEST_SIZE, 246 .init = sha384_ssse3_init, 247 .update = sha512_ssse3_update, 248 .final = sha384_ssse3_final, 249 .export = sha512_ssse3_export, 250 .import = sha512_ssse3_import, 251 .descsize = sizeof(struct sha512_state), 252 .statesize = sizeof(struct sha512_state), 253 .base = { 254 .cra_name = "sha384", 255 .cra_driver_name = "sha384-ssse3", 256 .cra_priority = 150, 257 .cra_flags = CRYPTO_ALG_TYPE_SHASH, 258 .cra_blocksize = SHA384_BLOCK_SIZE, 259 .cra_module = THIS_MODULE, 260 } 261} }; 262 263#ifdef CONFIG_AS_AVX 264static bool __init avx_usable(void) 265{ 266 u64 xcr0; 267 268 if (!cpu_has_avx || !cpu_has_osxsave) 269 return false; 270 271 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); 272 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { 273 pr_info("AVX detected but unusable.\n"); 274 275 return false; 276 } 277 278 return true; 279} 280#endif 281 282static int __init sha512_ssse3_mod_init(void) 283{ 284 /* test for SSSE3 first */ 285 if (cpu_has_ssse3) 286 sha512_transform_asm = sha512_transform_ssse3; 287 288#ifdef CONFIG_AS_AVX 289 /* allow AVX to override SSSE3, it's a little faster */ 290 if (avx_usable()) { 291#ifdef CONFIG_AS_AVX2 292 if (boot_cpu_has(X86_FEATURE_AVX2)) 293 sha512_transform_asm = sha512_transform_rorx; 294 else 295#endif 296 sha512_transform_asm = sha512_transform_avx; 297 } 298#endif 299 300 if (sha512_transform_asm) { 301#ifdef CONFIG_AS_AVX 302 if (sha512_transform_asm == sha512_transform_avx) 303 pr_info("Using AVX optimized SHA-512 implementation\n"); 304#ifdef CONFIG_AS_AVX2 305 else if (sha512_transform_asm == sha512_transform_rorx) 306 pr_info("Using AVX2 optimized SHA-512 implementation\n"); 307#endif 308 else 309#endif 310 pr_info("Using SSSE3 optimized SHA-512 implementation\n"); 311 return crypto_register_shashes(algs, ARRAY_SIZE(algs)); 312 } 313 pr_info("Neither AVX nor SSSE3 is available/usable.\n"); 314 315 return -ENODEV; 316} 317 318static void __exit sha512_ssse3_mod_fini(void) 319{ 320 crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); 321} 322 323module_init(sha512_ssse3_mod_init); 324module_exit(sha512_ssse3_mod_fini); 325 326MODULE_LICENSE("GPL"); 327MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, Supplemental SSE3 accelerated"); 328 329MODULE_ALIAS("sha512"); 330MODULE_ALIAS("sha384"); 331