1/*
2 * Copyright © 2012 Siarhei Siamashka <siarhei.siamashka@gmail.com>
3 *
4 * Based on the public domain implementation of small noncryptographic PRNG
5 * authored by Bob Jenkins: http://burtleburtle.net/bob/rand/smallprng.html
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 * DEALINGS IN THE SOFTWARE.
25 */
26
27#include "utils.h"
28#include "utils-prng.h"
29
30#if defined(GCC_VECTOR_EXTENSIONS_SUPPORTED) && defined(__SSE2__)
31#include <xmmintrin.h>
32#endif
33
34void smallprng_srand_r (smallprng_t *x, uint32_t seed)
35{
36    uint32_t i;
37    x->a = 0xf1ea5eed, x->b = x->c = x->d = seed;
38    for (i = 0; i < 20; ++i)
39        smallprng_rand_r (x);
40}
41
42/*
43 * Set a 32-bit seed for PRNG
44 *
45 * LCG is used here for generating independent seeds for different
46 * smallprng instances (in the case if smallprng is also used for
47 * generating these seeds, "Big Crush" test from TestU01 detects
48 * some problems in the glued 'prng_rand_128_r' output data).
49 * Actually we might be even better using some cryptographic
50 * hash for this purpose, but LCG seems to be also enough for
51 * passing "Big Crush".
52 */
53void prng_srand_r (prng_t *x, uint32_t seed)
54{
55#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED
56    int i;
57    prng_rand_128_data_t dummy;
58    smallprng_srand_r (&x->p0, seed);
59    x->a[0] = x->a[1] = x->a[2] = x->a[3] = 0xf1ea5eed;
60    x->b[0] = x->c[0] = x->d[0] = (seed = seed * 1103515245 + 12345);
61    x->b[1] = x->c[1] = x->d[1] = (seed = seed * 1103515245 + 12345);
62    x->b[2] = x->c[2] = x->d[2] = (seed = seed * 1103515245 + 12345);
63    x->b[3] = x->c[3] = x->d[3] = (seed = seed * 1103515245 + 12345);
64    for (i = 0; i < 20; ++i)
65        prng_rand_128_r (x, &dummy);
66#else
67    smallprng_srand_r (&x->p0, seed);
68    smallprng_srand_r (&x->p1, (seed = seed * 1103515245 + 12345));
69    smallprng_srand_r (&x->p2, (seed = seed * 1103515245 + 12345));
70    smallprng_srand_r (&x->p3, (seed = seed * 1103515245 + 12345));
71    smallprng_srand_r (&x->p4, (seed = seed * 1103515245 + 12345));
72#endif
73}
74
75static force_inline void
76store_rand_128_data (void *addr, prng_rand_128_data_t *d, int aligned)
77{
78#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED
79    if (aligned)
80    {
81        *(uint8x16 *)addr = d->vb;
82        return;
83    }
84    else
85    {
86#ifdef __SSE2__
87        /* workaround for http://gcc.gnu.org/PR55614 */
88        _mm_storeu_si128 (addr, _mm_loadu_si128 ((__m128i *)d));
89        return;
90#endif
91    }
92#endif
93    /* we could try something better for unaligned writes (packed attribute),
94     * but GCC is not very reliable: http://gcc.gnu.org/PR55454 */
95    memcpy (addr, d, 16);
96}
97
98/*
99 * Helper function and the actual code for "prng_randmemset_r" function
100 */
101static force_inline void
102randmemset_internal (prng_t                  *prng,
103                     uint8_t                 *buf,
104                     size_t                   size,
105                     prng_randmemset_flags_t  flags,
106                     int                      aligned)
107{
108    prng_t local_prng = *prng;
109    prng_rand_128_data_t randdata;
110    size_t i;
111
112    while (size >= 16)
113    {
114        prng_rand_128_data_t t;
115        if (flags == 0)
116        {
117            prng_rand_128_r (&local_prng, &randdata);
118        }
119        else
120        {
121            prng_rand_128_r (&local_prng, &t);
122            prng_rand_128_r (&local_prng, &randdata);
123#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED
124            if (flags & RANDMEMSET_MORE_FF)
125            {
126                const uint8x16 const_C0 =
127                {
128                    0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0,
129                    0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0
130                };
131                randdata.vb |= (t.vb >= const_C0);
132            }
133            if (flags & RANDMEMSET_MORE_00)
134            {
135                const uint8x16 const_40 =
136                {
137                    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
138                    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40
139                };
140                randdata.vb &= (t.vb >= const_40);
141            }
142            if (flags & RANDMEMSET_MORE_FFFFFFFF)
143            {
144                const uint32x4 const_C0000000 =
145                {
146                    0xC0000000, 0xC0000000, 0xC0000000, 0xC0000000
147                };
148                randdata.vw |= ((t.vw << 30) >= const_C0000000);
149            }
150            if (flags & RANDMEMSET_MORE_00000000)
151            {
152                const uint32x4 const_40000000 =
153                {
154                    0x40000000, 0x40000000, 0x40000000, 0x40000000
155                };
156                randdata.vw &= ((t.vw << 30) >= const_40000000);
157            }
158#else
159            #define PROCESS_ONE_LANE(i)                                       \
160                if (flags & RANDMEMSET_MORE_FF)                               \
161                {                                                             \
162                    uint32_t mask_ff = (t.w[i] & (t.w[i] << 1)) & 0x80808080; \
163                    mask_ff |= mask_ff >> 1;                                  \
164                    mask_ff |= mask_ff >> 2;                                  \
165                    mask_ff |= mask_ff >> 4;                                  \
166                    randdata.w[i] |= mask_ff;                                 \
167                }                                                             \
168                if (flags & RANDMEMSET_MORE_00)                               \
169                {                                                             \
170                    uint32_t mask_00 = (t.w[i] | (t.w[i] << 1)) & 0x80808080; \
171                    mask_00 |= mask_00 >> 1;                                  \
172                    mask_00 |= mask_00 >> 2;                                  \
173                    mask_00 |= mask_00 >> 4;                                  \
174                    randdata.w[i] &= mask_00;                                 \
175                }                                                             \
176                if (flags & RANDMEMSET_MORE_FFFFFFFF)                         \
177                {                                                             \
178                    int32_t mask_ff = ((t.w[i] << 30) & (t.w[i] << 31)) &     \
179                                       0x80000000;                            \
180                    randdata.w[i] |= mask_ff >> 31;                           \
181                }                                                             \
182                if (flags & RANDMEMSET_MORE_00000000)                         \
183                {                                                             \
184                    int32_t mask_00 = ((t.w[i] << 30) | (t.w[i] << 31)) &     \
185                                       0x80000000;                            \
186                    randdata.w[i] &= mask_00 >> 31;                           \
187                }
188
189            PROCESS_ONE_LANE (0)
190            PROCESS_ONE_LANE (1)
191            PROCESS_ONE_LANE (2)
192            PROCESS_ONE_LANE (3)
193#endif
194        }
195        if (is_little_endian ())
196        {
197            store_rand_128_data (buf, &randdata, aligned);
198            buf += 16;
199        }
200        else
201        {
202#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED
203            const uint8x16 bswap_shufflemask =
204            {
205                3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
206            };
207            randdata.vb = __builtin_shuffle (randdata.vb, bswap_shufflemask);
208            store_rand_128_data (buf, &randdata, aligned);
209            buf += 16;
210#else
211            uint8_t t1, t2, t3, t4;
212            #define STORE_ONE_LANE(i)                                         \
213                t1 = randdata.b[i * 4 + 3];                                   \
214                t2 = randdata.b[i * 4 + 2];                                   \
215                t3 = randdata.b[i * 4 + 1];                                   \
216                t4 = randdata.b[i * 4 + 0];                                   \
217                *buf++ = t1;                                                  \
218                *buf++ = t2;                                                  \
219                *buf++ = t3;                                                  \
220                *buf++ = t4;
221
222            STORE_ONE_LANE (0)
223            STORE_ONE_LANE (1)
224            STORE_ONE_LANE (2)
225            STORE_ONE_LANE (3)
226#endif
227        }
228        size -= 16;
229    }
230    i = 0;
231    while (i < size)
232    {
233        uint8_t randbyte = prng_rand_r (&local_prng) & 0xFF;
234        if (flags != 0)
235        {
236            uint8_t t = prng_rand_r (&local_prng) & 0xFF;
237            if ((flags & RANDMEMSET_MORE_FF) && (t >= 0xC0))
238                randbyte = 0xFF;
239            if ((flags & RANDMEMSET_MORE_00) && (t < 0x40))
240                randbyte = 0x00;
241            if (i % 4 == 0 && i + 4 <= size)
242            {
243                t = prng_rand_r (&local_prng) & 0xFF;
244                if ((flags & RANDMEMSET_MORE_FFFFFFFF) && (t >= 0xC0))
245                {
246                    memset(&buf[i], 0xFF, 4);
247                    i += 4;
248                    continue;
249                }
250                if ((flags & RANDMEMSET_MORE_00000000) && (t < 0x40))
251                {
252                    memset(&buf[i], 0x00, 4);
253                    i += 4;
254                    continue;
255                }
256            }
257        }
258        buf[i] = randbyte;
259        i++;
260    }
261    *prng = local_prng;
262}
263
264/*
265 * Fill memory buffer with random data. Flags argument may be used
266 * to tweak some statistics properties:
267 *    RANDMEMSET_MORE_00        - set ~25% of bytes to 0x00
268 *    RANDMEMSET_MORE_FF        - set ~25% of bytes to 0xFF
269 *    RANDMEMSET_MORE_00000000  - ~25% chance for 00000000 4-byte clusters
270 *    RANDMEMSET_MORE_FFFFFFFF  - ~25% chance for FFFFFFFF 4-byte clusters
271 */
272void prng_randmemset_r (prng_t                  *prng,
273                        void                    *voidbuf,
274                        size_t                   size,
275                        prng_randmemset_flags_t  flags)
276{
277    uint8_t *buf = (uint8_t *)voidbuf;
278    if ((uintptr_t)buf & 15)
279    {
280        /* unaligned buffer */
281        if (flags == 0)
282            randmemset_internal (prng, buf, size, 0, 0);
283        else if (flags == RANDMEMSET_MORE_00_AND_FF)
284            randmemset_internal (prng, buf, size, RANDMEMSET_MORE_00_AND_FF, 0);
285        else
286            randmemset_internal (prng, buf, size, flags, 0);
287    }
288    else
289    {
290        /* aligned buffer */
291        if (flags == 0)
292            randmemset_internal (prng, buf, size, 0, 1);
293        else if (flags == RANDMEMSET_MORE_00_AND_FF)
294            randmemset_internal (prng, buf, size, RANDMEMSET_MORE_00_AND_FF, 1);
295        else
296            randmemset_internal (prng, buf, size, flags, 1);
297    }
298}
299