1/* 2 * This file derives from SFMT 1.3.3 3 * (http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/index.html), which was 4 * released under the terms of the following license: 5 * 6 * Copyright (c) 2006,2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima 7 * University. All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions are 11 * met: 12 * 13 * * Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * * Redistributions in binary form must reproduce the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer in the documentation and/or other materials provided 18 * with the distribution. 19 * * Neither the name of the Hiroshima University nor the names of 20 * its contributors may be used to endorse or promote products 21 * derived from this software without specific prior written 22 * permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 25 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 26 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 27 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 28 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 29 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 30 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 31 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 32 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 33 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 34 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 35 */ 36/** 37 * @file SFMT-sse2.h 38 * @brief SIMD oriented Fast Mersenne Twister(SFMT) for Intel SSE2 39 * 40 * @author Mutsuo Saito (Hiroshima University) 41 * @author Makoto Matsumoto (Hiroshima University) 42 * 43 * @note We assume LITTLE ENDIAN in this file 44 * 45 * Copyright (C) 2006, 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima 46 * University. All rights reserved. 47 * 48 * The new BSD License is applied to this software, see LICENSE.txt 49 */ 50 51#ifndef SFMT_SSE2_H 52#define SFMT_SSE2_H 53 54/** 55 * This function represents the recursion formula. 56 * @param a a 128-bit part of the interal state array 57 * @param b a 128-bit part of the interal state array 58 * @param c a 128-bit part of the interal state array 59 * @param d a 128-bit part of the interal state array 60 * @param mask 128-bit mask 61 * @return output 62 */ 63JEMALLOC_ALWAYS_INLINE __m128i mm_recursion(__m128i *a, __m128i *b, 64 __m128i c, __m128i d, __m128i mask) { 65 __m128i v, x, y, z; 66 67 x = _mm_load_si128(a); 68 y = _mm_srli_epi32(*b, SR1); 69 z = _mm_srli_si128(c, SR2); 70 v = _mm_slli_epi32(d, SL1); 71 z = _mm_xor_si128(z, x); 72 z = _mm_xor_si128(z, v); 73 x = _mm_slli_si128(x, SL2); 74 y = _mm_and_si128(y, mask); 75 z = _mm_xor_si128(z, x); 76 z = _mm_xor_si128(z, y); 77 return z; 78} 79 80/** 81 * This function fills the internal state array with pseudorandom 82 * integers. 83 */ 84JEMALLOC_INLINE void gen_rand_all(sfmt_t *ctx) { 85 int i; 86 __m128i r, r1, r2, mask; 87 mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1); 88 89 r1 = _mm_load_si128(&ctx->sfmt[N - 2].si); 90 r2 = _mm_load_si128(&ctx->sfmt[N - 1].si); 91 for (i = 0; i < N - POS1; i++) { 92 r = mm_recursion(&ctx->sfmt[i].si, &ctx->sfmt[i + POS1].si, r1, r2, 93 mask); 94 _mm_store_si128(&ctx->sfmt[i].si, r); 95 r1 = r2; 96 r2 = r; 97 } 98 for (; i < N; i++) { 99 r = mm_recursion(&ctx->sfmt[i].si, &ctx->sfmt[i + POS1 - N].si, r1, r2, 100 mask); 101 _mm_store_si128(&ctx->sfmt[i].si, r); 102 r1 = r2; 103 r2 = r; 104 } 105} 106 107/** 108 * This function fills the user-specified array with pseudorandom 109 * integers. 110 * 111 * @param array an 128-bit array to be filled by pseudorandom numbers. 112 * @param size number of 128-bit pesudorandom numbers to be generated. 113 */ 114JEMALLOC_INLINE void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) { 115 int i, j; 116 __m128i r, r1, r2, mask; 117 mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1); 118 119 r1 = _mm_load_si128(&ctx->sfmt[N - 2].si); 120 r2 = _mm_load_si128(&ctx->sfmt[N - 1].si); 121 for (i = 0; i < N - POS1; i++) { 122 r = mm_recursion(&ctx->sfmt[i].si, &ctx->sfmt[i + POS1].si, r1, r2, 123 mask); 124 _mm_store_si128(&array[i].si, r); 125 r1 = r2; 126 r2 = r; 127 } 128 for (; i < N; i++) { 129 r = mm_recursion(&ctx->sfmt[i].si, &array[i + POS1 - N].si, r1, r2, 130 mask); 131 _mm_store_si128(&array[i].si, r); 132 r1 = r2; 133 r2 = r; 134 } 135 /* main loop */ 136 for (; i < size - N; i++) { 137 r = mm_recursion(&array[i - N].si, &array[i + POS1 - N].si, r1, r2, 138 mask); 139 _mm_store_si128(&array[i].si, r); 140 r1 = r2; 141 r2 = r; 142 } 143 for (j = 0; j < 2 * N - size; j++) { 144 r = _mm_load_si128(&array[j + size - N].si); 145 _mm_store_si128(&ctx->sfmt[j].si, r); 146 } 147 for (; i < size; i++) { 148 r = mm_recursion(&array[i - N].si, &array[i + POS1 - N].si, r1, r2, 149 mask); 150 _mm_store_si128(&array[i].si, r); 151 _mm_store_si128(&ctx->sfmt[j++].si, r); 152 r1 = r2; 153 r2 = r; 154 } 155} 156 157#endif 158