1/* 2 * Copyright (c) 2008-2016 Stefan Krah. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 29#include "mpdecimal.h" 30#include <stdio.h> 31#include "bits.h" 32#include "constants.h" 33#include "fnt.h" 34#include "fourstep.h" 35#include "numbertheory.h" 36#include "sixstep.h" 37#include "umodarith.h" 38#include "convolute.h" 39 40 41/* Bignum: Fast convolution using the Number Theoretic Transform. Used for 42 the multiplication of very large coefficients. */ 43 44 45/* Convolute the data in c1 and c2. Result is in c1. */ 46int 47fnt_convolute(mpd_uint_t *c1, mpd_uint_t *c2, mpd_size_t n, int modnum) 48{ 49 int (*fnt)(mpd_uint_t *, mpd_size_t, int); 50 int (*inv_fnt)(mpd_uint_t *, mpd_size_t, int); 51#ifdef PPRO 52 double dmod; 53 uint32_t dinvmod[3]; 54#endif 55 mpd_uint_t n_inv, umod; 56 mpd_size_t i; 57 58 59 SETMODULUS(modnum); 60 n_inv = POWMOD(n, (umod-2)); 61 62 if (ispower2(n)) { 63 if (n > SIX_STEP_THRESHOLD) { 64 fnt = six_step_fnt; 65 inv_fnt = inv_six_step_fnt; 66 } 67 else { 68 fnt = std_fnt; 69 inv_fnt = std_inv_fnt; 70 } 71 } 72 else { 73 fnt = four_step_fnt; 74 inv_fnt = inv_four_step_fnt; 75 } 76 77 if (!fnt(c1, n, modnum)) { 78 return 0; 79 } 80 if (!fnt(c2, n, modnum)) { 81 return 0; 82 } 83 for (i = 0; i < n-1; i += 2) { 84 mpd_uint_t x0 = c1[i]; 85 mpd_uint_t y0 = c2[i]; 86 mpd_uint_t x1 = c1[i+1]; 87 mpd_uint_t y1 = c2[i+1]; 88 MULMOD2(&x0, y0, &x1, y1); 89 c1[i] = x0; 90 c1[i+1] = x1; 91 } 92 93 if (!inv_fnt(c1, n, modnum)) { 94 return 0; 95 } 96 for (i = 0; i < n-3; i += 4) { 97 mpd_uint_t x0 = c1[i]; 98 mpd_uint_t x1 = c1[i+1]; 99 mpd_uint_t x2 = c1[i+2]; 100 mpd_uint_t x3 = c1[i+3]; 101 MULMOD2C(&x0, &x1, n_inv); 102 MULMOD2C(&x2, &x3, n_inv); 103 c1[i] = x0; 104 c1[i+1] = x1; 105 c1[i+2] = x2; 106 c1[i+3] = x3; 107 } 108 109 return 1; 110} 111 112/* Autoconvolute the data in c1. Result is in c1. */ 113int 114fnt_autoconvolute(mpd_uint_t *c1, mpd_size_t n, int modnum) 115{ 116 int (*fnt)(mpd_uint_t *, mpd_size_t, int); 117 int (*inv_fnt)(mpd_uint_t *, mpd_size_t, int); 118#ifdef PPRO 119 double dmod; 120 uint32_t dinvmod[3]; 121#endif 122 mpd_uint_t n_inv, umod; 123 mpd_size_t i; 124 125 126 SETMODULUS(modnum); 127 n_inv = POWMOD(n, (umod-2)); 128 129 if (ispower2(n)) { 130 if (n > SIX_STEP_THRESHOLD) { 131 fnt = six_step_fnt; 132 inv_fnt = inv_six_step_fnt; 133 } 134 else { 135 fnt = std_fnt; 136 inv_fnt = std_inv_fnt; 137 } 138 } 139 else { 140 fnt = four_step_fnt; 141 inv_fnt = inv_four_step_fnt; 142 } 143 144 if (!fnt(c1, n, modnum)) { 145 return 0; 146 } 147 for (i = 0; i < n-1; i += 2) { 148 mpd_uint_t x0 = c1[i]; 149 mpd_uint_t x1 = c1[i+1]; 150 MULMOD2(&x0, x0, &x1, x1); 151 c1[i] = x0; 152 c1[i+1] = x1; 153 } 154 155 if (!inv_fnt(c1, n, modnum)) { 156 return 0; 157 } 158 for (i = 0; i < n-3; i += 4) { 159 mpd_uint_t x0 = c1[i]; 160 mpd_uint_t x1 = c1[i+1]; 161 mpd_uint_t x2 = c1[i+2]; 162 mpd_uint_t x3 = c1[i+3]; 163 MULMOD2C(&x0, &x1, n_inv); 164 MULMOD2C(&x2, &x3, n_inv); 165 c1[i] = x0; 166 c1[i+1] = x1; 167 c1[i+2] = x2; 168 c1[i+3] = x3; 169 } 170 171 return 1; 172} 173 174 175