11919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah/* 2ecff6554d3c3be05df7416c780ddec219da72a3dStefan Krah * Copyright (c) 2008-2016 Stefan Krah. All rights reserved. 31919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah * 41919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah * Redistribution and use in source and binary forms, with or without 51919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah * modification, are permitted provided that the following conditions 61919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah * are met: 71919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah * 81919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah * 1. Redistributions of source code must retain the above copyright 91919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah * notice, this list of conditions and the following disclaimer. 101919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah * 111919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah * 2. Redistributions in binary form must reproduce the above copyright 121919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah * notice, this list of conditions and the following disclaimer in the 131919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah * documentation and/or other materials provided with the distribution. 141919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah * 151919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND 161919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 171919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 181919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 191919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 201919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 211919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 221919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 231919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 241919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 251919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah * SUCH DAMAGE. 261919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah */ 271919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 281919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 291919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah#include "mpdecimal.h" 301919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah#include <stdio.h> 311919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah#include <assert.h> 321919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah#include "bits.h" 331919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah#include "numbertheory.h" 341919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah#include "umodarith.h" 351919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah#include "difradix2.h" 361919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 371919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 381919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah/* Bignum: The actual transform routine (decimation in frequency). */ 391919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 401919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 411919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah/* 421919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah * Generate index pairs (x, bitreverse(x)) and carry out the permutation. 431919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah * n must be a power of two. 441919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah * Algorithm due to Brent/Lehmann, see Joerg Arndt, "Matters Computational", 451919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah * Chapter 1.14.4. [http://www.jjj.de/fxt/] 461919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah */ 471919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krahstatic inline void 481919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krahbitreverse_permute(mpd_uint_t a[], mpd_size_t n) 491919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah{ 501919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah mpd_size_t x = 0; 511919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah mpd_size_t r = 0; 521919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah mpd_uint_t t; 531919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 541919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah do { /* Invariant: r = bitreverse(x) */ 551919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah if (r > x) { 561919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah t = a[x]; 571919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah a[x] = a[r]; 581919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah a[r] = t; 591919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah } 601919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah /* Flip trailing consecutive 1 bits and the first zero bit 611919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah * that absorbs a possible carry. */ 621919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah x += 1; 631919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah /* Mirror the operation on r: Flip n_trailing_zeros(x)+1 641919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah high bits of r. */ 651919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah r ^= (n - (n >> (mpd_bsf(x)+1))); 661919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah /* The loop invariant is preserved. */ 671919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah } while (x < n); 681919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah} 691919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 701919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 711919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah/* Fast Number Theoretic Transform, decimation in frequency. */ 721919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krahvoid 731919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krahfnt_dif2(mpd_uint_t a[], mpd_size_t n, struct fnt_params *tparams) 741919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah{ 751919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah mpd_uint_t *wtable = tparams->wtable; 761919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah mpd_uint_t umod; 771919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah#ifdef PPRO 781919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah double dmod; 791919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah uint32_t dinvmod[3]; 801919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah#endif 811919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah mpd_uint_t u0, u1, v0, v1; 821919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah mpd_uint_t w, w0, w1, wstep; 831919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah mpd_size_t m, mhalf; 841919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah mpd_size_t j, r; 851919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 861919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 871919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah assert(ispower2(n)); 881919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah assert(n >= 4); 891919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 901919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah SETMODULUS(tparams->modnum); 911919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 921919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah /* m == n */ 931919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah mhalf = n / 2; 941919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah for (j = 0; j < mhalf; j += 2) { 951919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 961919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah w0 = wtable[j]; 971919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah w1 = wtable[j+1]; 981919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 991919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah u0 = a[j]; 1001919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah v0 = a[j+mhalf]; 1011919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 1021919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah u1 = a[j+1]; 1031919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah v1 = a[j+1+mhalf]; 1041919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 1051919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah a[j] = addmod(u0, v0, umod); 1061919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah v0 = submod(u0, v0, umod); 1071919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 1081919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah a[j+1] = addmod(u1, v1, umod); 1091919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah v1 = submod(u1, v1, umod); 1101919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 1111919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah MULMOD2(&v0, w0, &v1, w1); 1121919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 1131919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah a[j+mhalf] = v0; 1141919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah a[j+1+mhalf] = v1; 1151919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 1161919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah } 1171919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 1181919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah wstep = 2; 1191919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah for (m = n/2; m >= 2; m>>=1, wstep<<=1) { 1201919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 1211919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah mhalf = m / 2; 1221919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 1231919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah /* j == 0 */ 1241919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah for (r = 0; r < n; r += 2*m) { 1251919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 1261919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah u0 = a[r]; 1271919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah v0 = a[r+mhalf]; 1281919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 1291919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah u1 = a[m+r]; 1301919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah v1 = a[m+r+mhalf]; 1311919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 1321919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah a[r] = addmod(u0, v0, umod); 1331919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah v0 = submod(u0, v0, umod); 1341919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 1351919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah a[m+r] = addmod(u1, v1, umod); 1361919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah v1 = submod(u1, v1, umod); 1371919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 1381919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah a[r+mhalf] = v0; 1391919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah a[m+r+mhalf] = v1; 1401919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah } 1411919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 1421919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah for (j = 1; j < mhalf; j++) { 1431919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 1441919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah w = wtable[j*wstep]; 1451919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 1461919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah for (r = 0; r < n; r += 2*m) { 1471919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 1481919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah u0 = a[r+j]; 1491919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah v0 = a[r+j+mhalf]; 1501919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 1511919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah u1 = a[m+r+j]; 1521919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah v1 = a[m+r+j+mhalf]; 1531919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 1541919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah a[r+j] = addmod(u0, v0, umod); 1551919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah v0 = submod(u0, v0, umod); 1561919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 1571919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah a[m+r+j] = addmod(u1, v1, umod); 1581919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah v1 = submod(u1, v1, umod); 1591919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 1601919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah MULMOD2C(&v0, &v1, w); 1611919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 1621919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah a[r+j+mhalf] = v0; 1631919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah a[m+r+j+mhalf] = v1; 1641919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah } 1651919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 1661919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah } 1671919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 1681919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah } 1691919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 1701919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah bitreverse_permute(a, n); 1711919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah} 1721919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 1731919b7e72bc43315b32f38a6f5f01e8c717907f4Stefan Krah 174