1/*-
2 * Copyright 2009 Colin Percival
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * This file was originally written by Colin Percival as part of the Tarsnap
27 * online backup system.
28 */
29#include "scrypt_platform.h"
30
31#include <sys/types.h>
32#include <sys/mman.h>
33
34#include <emmintrin.h>
35#include <errno.h>
36#include <stdint.h>
37#include <stdlib.h>
38#include <string.h>
39
40#ifdef USE_OPENSSL_PBKDF2
41#include <openssl/evp.h>
42#else
43#include "sha256.h"
44#endif
45#include "sysendian.h"
46
47#include "crypto_scrypt.h"
48
49static void blkcpy(void *, void *, size_t);
50static void blkxor(void *, void *, size_t);
51static void salsa20_8(__m128i *);
52static void blockmix_salsa8(__m128i *, __m128i *, __m128i *, size_t);
53static uint64_t integerify(void *, size_t);
54static void smix(uint8_t *, size_t, uint64_t, void *, void *);
55
56static void
57blkcpy(void * dest, void * src, size_t len)
58{
59	__m128i * D = dest;
60	__m128i * S = src;
61	size_t L = len / 16;
62	size_t i;
63
64	for (i = 0; i < L; i++)
65		D[i] = S[i];
66}
67
68static void
69blkxor(void * dest, void * src, size_t len)
70{
71	__m128i * D = dest;
72	__m128i * S = src;
73	size_t L = len / 16;
74	size_t i;
75
76	for (i = 0; i < L; i++)
77		D[i] = _mm_xor_si128(D[i], S[i]);
78}
79
80/**
81 * salsa20_8(B):
82 * Apply the salsa20/8 core to the provided block.
83 */
84static void
85salsa20_8(__m128i B[4])
86{
87	__m128i X0, X1, X2, X3;
88	__m128i T;
89	size_t i;
90
91	X0 = B[0];
92	X1 = B[1];
93	X2 = B[2];
94	X3 = B[3];
95
96	for (i = 0; i < 8; i += 2) {
97		/* Operate on "columns". */
98		T = _mm_add_epi32(X0, X3);
99		X1 = _mm_xor_si128(X1, _mm_slli_epi32(T, 7));
100		X1 = _mm_xor_si128(X1, _mm_srli_epi32(T, 25));
101		T = _mm_add_epi32(X1, X0);
102		X2 = _mm_xor_si128(X2, _mm_slli_epi32(T, 9));
103		X2 = _mm_xor_si128(X2, _mm_srli_epi32(T, 23));
104		T = _mm_add_epi32(X2, X1);
105		X3 = _mm_xor_si128(X3, _mm_slli_epi32(T, 13));
106		X3 = _mm_xor_si128(X3, _mm_srli_epi32(T, 19));
107		T = _mm_add_epi32(X3, X2);
108		X0 = _mm_xor_si128(X0, _mm_slli_epi32(T, 18));
109		X0 = _mm_xor_si128(X0, _mm_srli_epi32(T, 14));
110
111		/* Rearrange data. */
112		X1 = _mm_shuffle_epi32(X1, 0x93);
113		X2 = _mm_shuffle_epi32(X2, 0x4E);
114		X3 = _mm_shuffle_epi32(X3, 0x39);
115
116		/* Operate on "rows". */
117		T = _mm_add_epi32(X0, X1);
118		X3 = _mm_xor_si128(X3, _mm_slli_epi32(T, 7));
119		X3 = _mm_xor_si128(X3, _mm_srli_epi32(T, 25));
120		T = _mm_add_epi32(X3, X0);
121		X2 = _mm_xor_si128(X2, _mm_slli_epi32(T, 9));
122		X2 = _mm_xor_si128(X2, _mm_srli_epi32(T, 23));
123		T = _mm_add_epi32(X2, X3);
124		X1 = _mm_xor_si128(X1, _mm_slli_epi32(T, 13));
125		X1 = _mm_xor_si128(X1, _mm_srli_epi32(T, 19));
126		T = _mm_add_epi32(X1, X2);
127		X0 = _mm_xor_si128(X0, _mm_slli_epi32(T, 18));
128		X0 = _mm_xor_si128(X0, _mm_srli_epi32(T, 14));
129
130		/* Rearrange data. */
131		X1 = _mm_shuffle_epi32(X1, 0x39);
132		X2 = _mm_shuffle_epi32(X2, 0x4E);
133		X3 = _mm_shuffle_epi32(X3, 0x93);
134	}
135
136	B[0] = _mm_add_epi32(B[0], X0);
137	B[1] = _mm_add_epi32(B[1], X1);
138	B[2] = _mm_add_epi32(B[2], X2);
139	B[3] = _mm_add_epi32(B[3], X3);
140}
141
142/**
143 * blockmix_salsa8(Bin, Bout, X, r):
144 * Compute Bout = BlockMix_{salsa20/8, r}(Bin).  The input Bin must be 128r
145 * bytes in length; the output Bout must also be the same size.  The
146 * temporary space X must be 64 bytes.
147 */
148static void
149blockmix_salsa8(__m128i * Bin, __m128i * Bout, __m128i * X, size_t r)
150{
151	size_t i;
152
153	/* 1: X <-- B_{2r - 1} */
154	blkcpy(X, &Bin[8 * r - 4], 64);
155
156	/* 2: for i = 0 to 2r - 1 do */
157	for (i = 0; i < r; i++) {
158		/* 3: X <-- H(X \xor B_i) */
159		blkxor(X, &Bin[i * 8], 64);
160		salsa20_8(X);
161
162		/* 4: Y_i <-- X */
163		/* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */
164		blkcpy(&Bout[i * 4], X, 64);
165
166		/* 3: X <-- H(X \xor B_i) */
167		blkxor(X, &Bin[i * 8 + 4], 64);
168		salsa20_8(X);
169
170		/* 4: Y_i <-- X */
171		/* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */
172		blkcpy(&Bout[(r + i) * 4], X, 64);
173	}
174}
175
176/**
177 * integerify(B, r):
178 * Return the result of parsing B_{2r-1} as a little-endian integer.
179 */
180static uint64_t
181integerify(void * B, size_t r)
182{
183	uint32_t * X = (void *)((uintptr_t)(B) + (2 * r - 1) * 64);
184
185	return (((uint64_t)(X[13]) << 32) + X[0]);
186}
187
188/**
189 * smix(B, r, N, V, XY):
190 * Compute B = SMix_r(B, N).  The input B must be 128r bytes in length;
191 * the temporary storage V must be 128rN bytes in length; the temporary
192 * storage XY must be 256r + 64 bytes in length.  The value N must be a
193 * power of 2 greater than 1.  The arrays B, V, and XY must be aligned to a
194 * multiple of 64 bytes.
195 */
196static void
197smix(uint8_t * B, size_t r, uint64_t N, void * V, void * XY)
198{
199	__m128i * X = XY;
200	__m128i * Y = (void *)((uintptr_t)(XY) + 128 * r);
201	__m128i * Z = (void *)((uintptr_t)(XY) + 256 * r);
202	uint32_t * X32 = (void *)X;
203	uint64_t i, j;
204	size_t k;
205
206	/* 1: X <-- B */
207	for (k = 0; k < 2 * r; k++) {
208		for (i = 0; i < 16; i++) {
209			X32[k * 16 + i] =
210			    le32dec(&B[(k * 16 + (i * 5 % 16)) * 4]);
211		}
212	}
213
214	/* 2: for i = 0 to N - 1 do */
215	for (i = 0; i < N; i += 2) {
216		/* 3: V_i <-- X */
217		blkcpy((void *)((uintptr_t)(V) + i * 128 * r), X, 128 * r);
218
219		/* 4: X <-- H(X) */
220		blockmix_salsa8(X, Y, Z, r);
221
222		/* 3: V_i <-- X */
223		blkcpy((void *)((uintptr_t)(V) + (i + 1) * 128 * r),
224		    Y, 128 * r);
225
226		/* 4: X <-- H(X) */
227		blockmix_salsa8(Y, X, Z, r);
228	}
229
230	/* 6: for i = 0 to N - 1 do */
231	for (i = 0; i < N; i += 2) {
232		/* 7: j <-- Integerify(X) mod N */
233		j = integerify(X, r) & (N - 1);
234
235		/* 8: X <-- H(X \xor V_j) */
236		blkxor(X, (void *)((uintptr_t)(V) + j * 128 * r), 128 * r);
237		blockmix_salsa8(X, Y, Z, r);
238
239		/* 7: j <-- Integerify(X) mod N */
240		j = integerify(Y, r) & (N - 1);
241
242		/* 8: X <-- H(X \xor V_j) */
243		blkxor(Y, (void *)((uintptr_t)(V) + j * 128 * r), 128 * r);
244		blockmix_salsa8(Y, X, Z, r);
245	}
246
247	/* 10: B' <-- X */
248	for (k = 0; k < 2 * r; k++) {
249		for (i = 0; i < 16; i++) {
250			le32enc(&B[(k * 16 + (i * 5 % 16)) * 4],
251			    X32[k * 16 + i]);
252		}
253	}
254}
255
256/**
257 * crypto_scrypt(passwd, passwdlen, salt, saltlen, N, r, p, buf, buflen):
258 * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r,
259 * p, buflen) and write the result into buf.  The parameters r, p, and buflen
260 * must satisfy r * p < 2^30 and buflen <= (2^32 - 1) * 32.  The parameter N
261 * must be a power of 2 greater than 1.
262 *
263 * Return 0 on success; or -1 on error.
264 */
265int
266crypto_scrypt(const uint8_t * passwd, size_t passwdlen,
267    const uint8_t * salt, size_t saltlen, uint64_t N, uint32_t r, uint32_t p,
268    uint8_t * buf, size_t buflen)
269{
270	void * B0, * V0, * XY0;
271	uint8_t * B;
272	uint32_t * V;
273	uint32_t * XY;
274	uint32_t i;
275
276	/* Sanity-check parameters. */
277#if SIZE_MAX > UINT32_MAX
278	if (buflen > (((uint64_t)(1) << 32) - 1) * 32) {
279		errno = EFBIG;
280		goto err0;
281	}
282#endif
283	if ((uint64_t)(r) * (uint64_t)(p) >= (1 << 30)) {
284		errno = EFBIG;
285		goto err0;
286	}
287	if (((N & (N - 1)) != 0) || (N == 0)) {
288		errno = EINVAL;
289		goto err0;
290	}
291	if ((r > SIZE_MAX / 128 / p) ||
292#if SIZE_MAX / 256 <= UINT32_MAX
293	    (r > (SIZE_MAX - 64) / 256) ||
294#endif
295	    (N > SIZE_MAX / 128 / r)) {
296		errno = ENOMEM;
297		goto err0;
298	}
299
300	/* Allocate memory. */
301#ifdef HAVE_POSIX_MEMALIGN
302	if ((errno = posix_memalign(&B0, 64, 128 * r * p)) != 0)
303		goto err0;
304	B = (uint8_t *)(B0);
305	if ((errno = posix_memalign(&XY0, 64, 256 * r + 64)) != 0)
306		goto err1;
307	XY = (uint32_t *)(XY0);
308#ifndef MAP_ANON
309	if ((errno = posix_memalign(&V0, 64, 128 * r * N)) != 0)
310		goto err2;
311	V = (uint32_t *)(V0);
312#endif
313#else
314	if ((B0 = malloc(128 * r * p + 63)) == NULL)
315		goto err0;
316	B = (uint8_t *)(((uintptr_t)(B0) + 63) & ~ (uintptr_t)(63));
317	if ((XY0 = malloc(256 * r + 64 + 63)) == NULL)
318		goto err1;
319	XY = (uint32_t *)(((uintptr_t)(XY0) + 63) & ~ (uintptr_t)(63));
320#ifndef MAP_ANON
321	if ((V0 = malloc(128 * r * N + 63)) == NULL)
322		goto err2;
323	V = (uint32_t *)(((uintptr_t)(V0) + 63) & ~ (uintptr_t)(63));
324#endif
325#endif
326#ifdef MAP_ANON
327	if ((V0 = mmap(NULL, 128 * r * N, PROT_READ | PROT_WRITE,
328#ifdef MAP_NOCORE
329	    MAP_ANON | MAP_PRIVATE | MAP_NOCORE,
330#else
331	    MAP_ANON | MAP_PRIVATE,
332#endif
333	    -1, 0)) == MAP_FAILED)
334		goto err2;
335	V = (uint32_t *)(V0);
336#endif
337
338	/* 1: (B_0 ... B_{p-1}) <-- PBKDF2(P, S, 1, p * MFLen) */
339#ifdef USE_OPENSSL_PBKDF2
340	PKCS5_PBKDF2_HMAC((const char *)passwd, passwdlen, salt, saltlen, 1, EVP_sha256(), p * 128 * r, B);
341#else
342	PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, 1, B, p * 128 * r);
343#endif
344
345	/* 2: for i = 0 to p - 1 do */
346	for (i = 0; i < p; i++) {
347		/* 3: B_i <-- MF(B_i, N) */
348		smix(&B[i * 128 * r], r, N, V, XY);
349	}
350
351	/* 5: DK <-- PBKDF2(P, B, 1, dkLen) */
352#ifdef USE_OPENSSL_PBKDF2
353	PKCS5_PBKDF2_HMAC((const char *)passwd, passwdlen, B, p * 128 * r, 1, EVP_sha256(), buflen, buf);
354#else
355	PBKDF2_SHA256(passwd, passwdlen, B, p * 128 * r, 1, buf, buflen);
356#endif
357
358	/* Free memory. */
359#ifdef MAP_ANON
360	if (munmap(V0, 128 * r * N))
361		goto err2;
362#else
363	free(V0);
364#endif
365	free(XY0);
366	free(B0);
367
368	/* Success! */
369	return (0);
370
371err2:
372	free(XY0);
373err1:
374	free(B0);
375err0:
376	/* Failure! */
377	return (-1);
378}
379