1package org.bouncycastle.math.ec;
2
3import org.bouncycastle.util.Arrays;
4
5import java.math.BigInteger;
6
7class LongArray implements Cloneable
8{
9//    private static long DEINTERLEAVE_MASK = 0x5555555555555555L;
10
11    /*
12     * This expands 8 bit indices into 16 bit contents (high bit 14), by inserting 0s between bits.
13     * In a binary field, this operation is the same as squaring an 8 bit number.
14     *
15     * NOTE: All entries are positive so sign-extension is not an issue.
16     */
17    private static final short[] INTERLEAVE2_TABLE = new short[]
18    {
19        0x0000, 0x0001, 0x0004, 0x0005, 0x0010, 0x0011, 0x0014, 0x0015,
20        0x0040, 0x0041, 0x0044, 0x0045, 0x0050, 0x0051, 0x0054, 0x0055,
21        0x0100, 0x0101, 0x0104, 0x0105, 0x0110, 0x0111, 0x0114, 0x0115,
22        0x0140, 0x0141, 0x0144, 0x0145, 0x0150, 0x0151, 0x0154, 0x0155,
23        0x0400, 0x0401, 0x0404, 0x0405, 0x0410, 0x0411, 0x0414, 0x0415,
24        0x0440, 0x0441, 0x0444, 0x0445, 0x0450, 0x0451, 0x0454, 0x0455,
25        0x0500, 0x0501, 0x0504, 0x0505, 0x0510, 0x0511, 0x0514, 0x0515,
26        0x0540, 0x0541, 0x0544, 0x0545, 0x0550, 0x0551, 0x0554, 0x0555,
27        0x1000, 0x1001, 0x1004, 0x1005, 0x1010, 0x1011, 0x1014, 0x1015,
28        0x1040, 0x1041, 0x1044, 0x1045, 0x1050, 0x1051, 0x1054, 0x1055,
29        0x1100, 0x1101, 0x1104, 0x1105, 0x1110, 0x1111, 0x1114, 0x1115,
30        0x1140, 0x1141, 0x1144, 0x1145, 0x1150, 0x1151, 0x1154, 0x1155,
31        0x1400, 0x1401, 0x1404, 0x1405, 0x1410, 0x1411, 0x1414, 0x1415,
32        0x1440, 0x1441, 0x1444, 0x1445, 0x1450, 0x1451, 0x1454, 0x1455,
33        0x1500, 0x1501, 0x1504, 0x1505, 0x1510, 0x1511, 0x1514, 0x1515,
34        0x1540, 0x1541, 0x1544, 0x1545, 0x1550, 0x1551, 0x1554, 0x1555,
35        0x4000, 0x4001, 0x4004, 0x4005, 0x4010, 0x4011, 0x4014, 0x4015,
36        0x4040, 0x4041, 0x4044, 0x4045, 0x4050, 0x4051, 0x4054, 0x4055,
37        0x4100, 0x4101, 0x4104, 0x4105, 0x4110, 0x4111, 0x4114, 0x4115,
38        0x4140, 0x4141, 0x4144, 0x4145, 0x4150, 0x4151, 0x4154, 0x4155,
39        0x4400, 0x4401, 0x4404, 0x4405, 0x4410, 0x4411, 0x4414, 0x4415,
40        0x4440, 0x4441, 0x4444, 0x4445, 0x4450, 0x4451, 0x4454, 0x4455,
41        0x4500, 0x4501, 0x4504, 0x4505, 0x4510, 0x4511, 0x4514, 0x4515,
42        0x4540, 0x4541, 0x4544, 0x4545, 0x4550, 0x4551, 0x4554, 0x4555,
43        0x5000, 0x5001, 0x5004, 0x5005, 0x5010, 0x5011, 0x5014, 0x5015,
44        0x5040, 0x5041, 0x5044, 0x5045, 0x5050, 0x5051, 0x5054, 0x5055,
45        0x5100, 0x5101, 0x5104, 0x5105, 0x5110, 0x5111, 0x5114, 0x5115,
46        0x5140, 0x5141, 0x5144, 0x5145, 0x5150, 0x5151, 0x5154, 0x5155,
47        0x5400, 0x5401, 0x5404, 0x5405, 0x5410, 0x5411, 0x5414, 0x5415,
48        0x5440, 0x5441, 0x5444, 0x5445, 0x5450, 0x5451, 0x5454, 0x5455,
49        0x5500, 0x5501, 0x5504, 0x5505, 0x5510, 0x5511, 0x5514, 0x5515,
50        0x5540, 0x5541, 0x5544, 0x5545, 0x5550, 0x5551, 0x5554, 0x5555
51    };
52
53    /*
54     * This expands 7 bit indices into 21 bit contents (high bit 18), by inserting 0s between bits.
55     */
56    private static final int[] INTERLEAVE3_TABLE = new  int[]
57    {
58        0x00000, 0x00001, 0x00008, 0x00009, 0x00040, 0x00041, 0x00048, 0x00049,
59        0x00200, 0x00201, 0x00208, 0x00209, 0x00240, 0x00241, 0x00248, 0x00249,
60        0x01000, 0x01001, 0x01008, 0x01009, 0x01040, 0x01041, 0x01048, 0x01049,
61        0x01200, 0x01201, 0x01208, 0x01209, 0x01240, 0x01241, 0x01248, 0x01249,
62        0x08000, 0x08001, 0x08008, 0x08009, 0x08040, 0x08041, 0x08048, 0x08049,
63        0x08200, 0x08201, 0x08208, 0x08209, 0x08240, 0x08241, 0x08248, 0x08249,
64        0x09000, 0x09001, 0x09008, 0x09009, 0x09040, 0x09041, 0x09048, 0x09049,
65        0x09200, 0x09201, 0x09208, 0x09209, 0x09240, 0x09241, 0x09248, 0x09249,
66        0x40000, 0x40001, 0x40008, 0x40009, 0x40040, 0x40041, 0x40048, 0x40049,
67        0x40200, 0x40201, 0x40208, 0x40209, 0x40240, 0x40241, 0x40248, 0x40249,
68        0x41000, 0x41001, 0x41008, 0x41009, 0x41040, 0x41041, 0x41048, 0x41049,
69        0x41200, 0x41201, 0x41208, 0x41209, 0x41240, 0x41241, 0x41248, 0x41249,
70        0x48000, 0x48001, 0x48008, 0x48009, 0x48040, 0x48041, 0x48048, 0x48049,
71        0x48200, 0x48201, 0x48208, 0x48209, 0x48240, 0x48241, 0x48248, 0x48249,
72        0x49000, 0x49001, 0x49008, 0x49009, 0x49040, 0x49041, 0x49048, 0x49049,
73        0x49200, 0x49201, 0x49208, 0x49209, 0x49240, 0x49241, 0x49248, 0x49249
74    };
75
76    /*
77     * This expands 8 bit indices into 32 bit contents (high bit 28), by inserting 0s between bits.
78     */
79    private static final int[] INTERLEAVE4_TABLE = new int[]
80    {
81        0x00000000, 0x00000001, 0x00000010, 0x00000011, 0x00000100, 0x00000101, 0x00000110, 0x00000111,
82        0x00001000, 0x00001001, 0x00001010, 0x00001011, 0x00001100, 0x00001101, 0x00001110, 0x00001111,
83        0x00010000, 0x00010001, 0x00010010, 0x00010011, 0x00010100, 0x00010101, 0x00010110, 0x00010111,
84        0x00011000, 0x00011001, 0x00011010, 0x00011011, 0x00011100, 0x00011101, 0x00011110, 0x00011111,
85        0x00100000, 0x00100001, 0x00100010, 0x00100011, 0x00100100, 0x00100101, 0x00100110, 0x00100111,
86        0x00101000, 0x00101001, 0x00101010, 0x00101011, 0x00101100, 0x00101101, 0x00101110, 0x00101111,
87        0x00110000, 0x00110001, 0x00110010, 0x00110011, 0x00110100, 0x00110101, 0x00110110, 0x00110111,
88        0x00111000, 0x00111001, 0x00111010, 0x00111011, 0x00111100, 0x00111101, 0x00111110, 0x00111111,
89        0x01000000, 0x01000001, 0x01000010, 0x01000011, 0x01000100, 0x01000101, 0x01000110, 0x01000111,
90        0x01001000, 0x01001001, 0x01001010, 0x01001011, 0x01001100, 0x01001101, 0x01001110, 0x01001111,
91        0x01010000, 0x01010001, 0x01010010, 0x01010011, 0x01010100, 0x01010101, 0x01010110, 0x01010111,
92        0x01011000, 0x01011001, 0x01011010, 0x01011011, 0x01011100, 0x01011101, 0x01011110, 0x01011111,
93        0x01100000, 0x01100001, 0x01100010, 0x01100011, 0x01100100, 0x01100101, 0x01100110, 0x01100111,
94        0x01101000, 0x01101001, 0x01101010, 0x01101011, 0x01101100, 0x01101101, 0x01101110, 0x01101111,
95        0x01110000, 0x01110001, 0x01110010, 0x01110011, 0x01110100, 0x01110101, 0x01110110, 0x01110111,
96        0x01111000, 0x01111001, 0x01111010, 0x01111011, 0x01111100, 0x01111101, 0x01111110, 0x01111111,
97        0x10000000, 0x10000001, 0x10000010, 0x10000011, 0x10000100, 0x10000101, 0x10000110, 0x10000111,
98        0x10001000, 0x10001001, 0x10001010, 0x10001011, 0x10001100, 0x10001101, 0x10001110, 0x10001111,
99        0x10010000, 0x10010001, 0x10010010, 0x10010011, 0x10010100, 0x10010101, 0x10010110, 0x10010111,
100        0x10011000, 0x10011001, 0x10011010, 0x10011011, 0x10011100, 0x10011101, 0x10011110, 0x10011111,
101        0x10100000, 0x10100001, 0x10100010, 0x10100011, 0x10100100, 0x10100101, 0x10100110, 0x10100111,
102        0x10101000, 0x10101001, 0x10101010, 0x10101011, 0x10101100, 0x10101101, 0x10101110, 0x10101111,
103        0x10110000, 0x10110001, 0x10110010, 0x10110011, 0x10110100, 0x10110101, 0x10110110, 0x10110111,
104        0x10111000, 0x10111001, 0x10111010, 0x10111011, 0x10111100, 0x10111101, 0x10111110, 0x10111111,
105        0x11000000, 0x11000001, 0x11000010, 0x11000011, 0x11000100, 0x11000101, 0x11000110, 0x11000111,
106        0x11001000, 0x11001001, 0x11001010, 0x11001011, 0x11001100, 0x11001101, 0x11001110, 0x11001111,
107        0x11010000, 0x11010001, 0x11010010, 0x11010011, 0x11010100, 0x11010101, 0x11010110, 0x11010111,
108        0x11011000, 0x11011001, 0x11011010, 0x11011011, 0x11011100, 0x11011101, 0x11011110, 0x11011111,
109        0x11100000, 0x11100001, 0x11100010, 0x11100011, 0x11100100, 0x11100101, 0x11100110, 0x11100111,
110        0x11101000, 0x11101001, 0x11101010, 0x11101011, 0x11101100, 0x11101101, 0x11101110, 0x11101111,
111        0x11110000, 0x11110001, 0x11110010, 0x11110011, 0x11110100, 0x11110101, 0x11110110, 0x11110111,
112        0x11111000, 0x11111001, 0x11111010, 0x11111011, 0x11111100, 0x11111101, 0x11111110, 0x11111111
113    };
114
115    /*
116     * This expands 7 bit indices into 35 bit contents (high bit 30), by inserting 0s between bits.
117     */
118    private static final int[] INTERLEAVE5_TABLE = new int[] {
119        0x00000000, 0x00000001, 0x00000020, 0x00000021, 0x00000400, 0x00000401, 0x00000420, 0x00000421,
120        0x00008000, 0x00008001, 0x00008020, 0x00008021, 0x00008400, 0x00008401, 0x00008420, 0x00008421,
121        0x00100000, 0x00100001, 0x00100020, 0x00100021, 0x00100400, 0x00100401, 0x00100420, 0x00100421,
122        0x00108000, 0x00108001, 0x00108020, 0x00108021, 0x00108400, 0x00108401, 0x00108420, 0x00108421,
123        0x02000000, 0x02000001, 0x02000020, 0x02000021, 0x02000400, 0x02000401, 0x02000420, 0x02000421,
124        0x02008000, 0x02008001, 0x02008020, 0x02008021, 0x02008400, 0x02008401, 0x02008420, 0x02008421,
125        0x02100000, 0x02100001, 0x02100020, 0x02100021, 0x02100400, 0x02100401, 0x02100420, 0x02100421,
126        0x02108000, 0x02108001, 0x02108020, 0x02108021, 0x02108400, 0x02108401, 0x02108420, 0x02108421,
127        0x40000000, 0x40000001, 0x40000020, 0x40000021, 0x40000400, 0x40000401, 0x40000420, 0x40000421,
128        0x40008000, 0x40008001, 0x40008020, 0x40008021, 0x40008400, 0x40008401, 0x40008420, 0x40008421,
129        0x40100000, 0x40100001, 0x40100020, 0x40100021, 0x40100400, 0x40100401, 0x40100420, 0x40100421,
130        0x40108000, 0x40108001, 0x40108020, 0x40108021, 0x40108400, 0x40108401, 0x40108420, 0x40108421,
131        0x42000000, 0x42000001, 0x42000020, 0x42000021, 0x42000400, 0x42000401, 0x42000420, 0x42000421,
132        0x42008000, 0x42008001, 0x42008020, 0x42008021, 0x42008400, 0x42008401, 0x42008420, 0x42008421,
133        0x42100000, 0x42100001, 0x42100020, 0x42100021, 0x42100400, 0x42100401, 0x42100420, 0x42100421,
134        0x42108000, 0x42108001, 0x42108020, 0x42108021, 0x42108400, 0x42108401, 0x42108420, 0x42108421
135    };
136
137    /*
138     * This expands 9 bit indices into 63 bit (long) contents (high bit 56), by inserting 0s between bits.
139     */
140    private static final long[] INTERLEAVE7_TABLE = new long[]
141    {
142        0x0000000000000000L, 0x0000000000000001L, 0x0000000000000080L, 0x0000000000000081L,
143        0x0000000000004000L, 0x0000000000004001L, 0x0000000000004080L, 0x0000000000004081L,
144        0x0000000000200000L, 0x0000000000200001L, 0x0000000000200080L, 0x0000000000200081L,
145        0x0000000000204000L, 0x0000000000204001L, 0x0000000000204080L, 0x0000000000204081L,
146        0x0000000010000000L, 0x0000000010000001L, 0x0000000010000080L, 0x0000000010000081L,
147        0x0000000010004000L, 0x0000000010004001L, 0x0000000010004080L, 0x0000000010004081L,
148        0x0000000010200000L, 0x0000000010200001L, 0x0000000010200080L, 0x0000000010200081L,
149        0x0000000010204000L, 0x0000000010204001L, 0x0000000010204080L, 0x0000000010204081L,
150        0x0000000800000000L, 0x0000000800000001L, 0x0000000800000080L, 0x0000000800000081L,
151        0x0000000800004000L, 0x0000000800004001L, 0x0000000800004080L, 0x0000000800004081L,
152        0x0000000800200000L, 0x0000000800200001L, 0x0000000800200080L, 0x0000000800200081L,
153        0x0000000800204000L, 0x0000000800204001L, 0x0000000800204080L, 0x0000000800204081L,
154        0x0000000810000000L, 0x0000000810000001L, 0x0000000810000080L, 0x0000000810000081L,
155        0x0000000810004000L, 0x0000000810004001L, 0x0000000810004080L, 0x0000000810004081L,
156        0x0000000810200000L, 0x0000000810200001L, 0x0000000810200080L, 0x0000000810200081L,
157        0x0000000810204000L, 0x0000000810204001L, 0x0000000810204080L, 0x0000000810204081L,
158        0x0000040000000000L, 0x0000040000000001L, 0x0000040000000080L, 0x0000040000000081L,
159        0x0000040000004000L, 0x0000040000004001L, 0x0000040000004080L, 0x0000040000004081L,
160        0x0000040000200000L, 0x0000040000200001L, 0x0000040000200080L, 0x0000040000200081L,
161        0x0000040000204000L, 0x0000040000204001L, 0x0000040000204080L, 0x0000040000204081L,
162        0x0000040010000000L, 0x0000040010000001L, 0x0000040010000080L, 0x0000040010000081L,
163        0x0000040010004000L, 0x0000040010004001L, 0x0000040010004080L, 0x0000040010004081L,
164        0x0000040010200000L, 0x0000040010200001L, 0x0000040010200080L, 0x0000040010200081L,
165        0x0000040010204000L, 0x0000040010204001L, 0x0000040010204080L, 0x0000040010204081L,
166        0x0000040800000000L, 0x0000040800000001L, 0x0000040800000080L, 0x0000040800000081L,
167        0x0000040800004000L, 0x0000040800004001L, 0x0000040800004080L, 0x0000040800004081L,
168        0x0000040800200000L, 0x0000040800200001L, 0x0000040800200080L, 0x0000040800200081L,
169        0x0000040800204000L, 0x0000040800204001L, 0x0000040800204080L, 0x0000040800204081L,
170        0x0000040810000000L, 0x0000040810000001L, 0x0000040810000080L, 0x0000040810000081L,
171        0x0000040810004000L, 0x0000040810004001L, 0x0000040810004080L, 0x0000040810004081L,
172        0x0000040810200000L, 0x0000040810200001L, 0x0000040810200080L, 0x0000040810200081L,
173        0x0000040810204000L, 0x0000040810204001L, 0x0000040810204080L, 0x0000040810204081L,
174        0x0002000000000000L, 0x0002000000000001L, 0x0002000000000080L, 0x0002000000000081L,
175        0x0002000000004000L, 0x0002000000004001L, 0x0002000000004080L, 0x0002000000004081L,
176        0x0002000000200000L, 0x0002000000200001L, 0x0002000000200080L, 0x0002000000200081L,
177        0x0002000000204000L, 0x0002000000204001L, 0x0002000000204080L, 0x0002000000204081L,
178        0x0002000010000000L, 0x0002000010000001L, 0x0002000010000080L, 0x0002000010000081L,
179        0x0002000010004000L, 0x0002000010004001L, 0x0002000010004080L, 0x0002000010004081L,
180        0x0002000010200000L, 0x0002000010200001L, 0x0002000010200080L, 0x0002000010200081L,
181        0x0002000010204000L, 0x0002000010204001L, 0x0002000010204080L, 0x0002000010204081L,
182        0x0002000800000000L, 0x0002000800000001L, 0x0002000800000080L, 0x0002000800000081L,
183        0x0002000800004000L, 0x0002000800004001L, 0x0002000800004080L, 0x0002000800004081L,
184        0x0002000800200000L, 0x0002000800200001L, 0x0002000800200080L, 0x0002000800200081L,
185        0x0002000800204000L, 0x0002000800204001L, 0x0002000800204080L, 0x0002000800204081L,
186        0x0002000810000000L, 0x0002000810000001L, 0x0002000810000080L, 0x0002000810000081L,
187        0x0002000810004000L, 0x0002000810004001L, 0x0002000810004080L, 0x0002000810004081L,
188        0x0002000810200000L, 0x0002000810200001L, 0x0002000810200080L, 0x0002000810200081L,
189        0x0002000810204000L, 0x0002000810204001L, 0x0002000810204080L, 0x0002000810204081L,
190        0x0002040000000000L, 0x0002040000000001L, 0x0002040000000080L, 0x0002040000000081L,
191        0x0002040000004000L, 0x0002040000004001L, 0x0002040000004080L, 0x0002040000004081L,
192        0x0002040000200000L, 0x0002040000200001L, 0x0002040000200080L, 0x0002040000200081L,
193        0x0002040000204000L, 0x0002040000204001L, 0x0002040000204080L, 0x0002040000204081L,
194        0x0002040010000000L, 0x0002040010000001L, 0x0002040010000080L, 0x0002040010000081L,
195        0x0002040010004000L, 0x0002040010004001L, 0x0002040010004080L, 0x0002040010004081L,
196        0x0002040010200000L, 0x0002040010200001L, 0x0002040010200080L, 0x0002040010200081L,
197        0x0002040010204000L, 0x0002040010204001L, 0x0002040010204080L, 0x0002040010204081L,
198        0x0002040800000000L, 0x0002040800000001L, 0x0002040800000080L, 0x0002040800000081L,
199        0x0002040800004000L, 0x0002040800004001L, 0x0002040800004080L, 0x0002040800004081L,
200        0x0002040800200000L, 0x0002040800200001L, 0x0002040800200080L, 0x0002040800200081L,
201        0x0002040800204000L, 0x0002040800204001L, 0x0002040800204080L, 0x0002040800204081L,
202        0x0002040810000000L, 0x0002040810000001L, 0x0002040810000080L, 0x0002040810000081L,
203        0x0002040810004000L, 0x0002040810004001L, 0x0002040810004080L, 0x0002040810004081L,
204        0x0002040810200000L, 0x0002040810200001L, 0x0002040810200080L, 0x0002040810200081L,
205        0x0002040810204000L, 0x0002040810204001L, 0x0002040810204080L, 0x0002040810204081L,
206        0x0100000000000000L, 0x0100000000000001L, 0x0100000000000080L, 0x0100000000000081L,
207        0x0100000000004000L, 0x0100000000004001L, 0x0100000000004080L, 0x0100000000004081L,
208        0x0100000000200000L, 0x0100000000200001L, 0x0100000000200080L, 0x0100000000200081L,
209        0x0100000000204000L, 0x0100000000204001L, 0x0100000000204080L, 0x0100000000204081L,
210        0x0100000010000000L, 0x0100000010000001L, 0x0100000010000080L, 0x0100000010000081L,
211        0x0100000010004000L, 0x0100000010004001L, 0x0100000010004080L, 0x0100000010004081L,
212        0x0100000010200000L, 0x0100000010200001L, 0x0100000010200080L, 0x0100000010200081L,
213        0x0100000010204000L, 0x0100000010204001L, 0x0100000010204080L, 0x0100000010204081L,
214        0x0100000800000000L, 0x0100000800000001L, 0x0100000800000080L, 0x0100000800000081L,
215        0x0100000800004000L, 0x0100000800004001L, 0x0100000800004080L, 0x0100000800004081L,
216        0x0100000800200000L, 0x0100000800200001L, 0x0100000800200080L, 0x0100000800200081L,
217        0x0100000800204000L, 0x0100000800204001L, 0x0100000800204080L, 0x0100000800204081L,
218        0x0100000810000000L, 0x0100000810000001L, 0x0100000810000080L, 0x0100000810000081L,
219        0x0100000810004000L, 0x0100000810004001L, 0x0100000810004080L, 0x0100000810004081L,
220        0x0100000810200000L, 0x0100000810200001L, 0x0100000810200080L, 0x0100000810200081L,
221        0x0100000810204000L, 0x0100000810204001L, 0x0100000810204080L, 0x0100000810204081L,
222        0x0100040000000000L, 0x0100040000000001L, 0x0100040000000080L, 0x0100040000000081L,
223        0x0100040000004000L, 0x0100040000004001L, 0x0100040000004080L, 0x0100040000004081L,
224        0x0100040000200000L, 0x0100040000200001L, 0x0100040000200080L, 0x0100040000200081L,
225        0x0100040000204000L, 0x0100040000204001L, 0x0100040000204080L, 0x0100040000204081L,
226        0x0100040010000000L, 0x0100040010000001L, 0x0100040010000080L, 0x0100040010000081L,
227        0x0100040010004000L, 0x0100040010004001L, 0x0100040010004080L, 0x0100040010004081L,
228        0x0100040010200000L, 0x0100040010200001L, 0x0100040010200080L, 0x0100040010200081L,
229        0x0100040010204000L, 0x0100040010204001L, 0x0100040010204080L, 0x0100040010204081L,
230        0x0100040800000000L, 0x0100040800000001L, 0x0100040800000080L, 0x0100040800000081L,
231        0x0100040800004000L, 0x0100040800004001L, 0x0100040800004080L, 0x0100040800004081L,
232        0x0100040800200000L, 0x0100040800200001L, 0x0100040800200080L, 0x0100040800200081L,
233        0x0100040800204000L, 0x0100040800204001L, 0x0100040800204080L, 0x0100040800204081L,
234        0x0100040810000000L, 0x0100040810000001L, 0x0100040810000080L, 0x0100040810000081L,
235        0x0100040810004000L, 0x0100040810004001L, 0x0100040810004080L, 0x0100040810004081L,
236        0x0100040810200000L, 0x0100040810200001L, 0x0100040810200080L, 0x0100040810200081L,
237        0x0100040810204000L, 0x0100040810204001L, 0x0100040810204080L, 0x0100040810204081L,
238        0x0102000000000000L, 0x0102000000000001L, 0x0102000000000080L, 0x0102000000000081L,
239        0x0102000000004000L, 0x0102000000004001L, 0x0102000000004080L, 0x0102000000004081L,
240        0x0102000000200000L, 0x0102000000200001L, 0x0102000000200080L, 0x0102000000200081L,
241        0x0102000000204000L, 0x0102000000204001L, 0x0102000000204080L, 0x0102000000204081L,
242        0x0102000010000000L, 0x0102000010000001L, 0x0102000010000080L, 0x0102000010000081L,
243        0x0102000010004000L, 0x0102000010004001L, 0x0102000010004080L, 0x0102000010004081L,
244        0x0102000010200000L, 0x0102000010200001L, 0x0102000010200080L, 0x0102000010200081L,
245        0x0102000010204000L, 0x0102000010204001L, 0x0102000010204080L, 0x0102000010204081L,
246        0x0102000800000000L, 0x0102000800000001L, 0x0102000800000080L, 0x0102000800000081L,
247        0x0102000800004000L, 0x0102000800004001L, 0x0102000800004080L, 0x0102000800004081L,
248        0x0102000800200000L, 0x0102000800200001L, 0x0102000800200080L, 0x0102000800200081L,
249        0x0102000800204000L, 0x0102000800204001L, 0x0102000800204080L, 0x0102000800204081L,
250        0x0102000810000000L, 0x0102000810000001L, 0x0102000810000080L, 0x0102000810000081L,
251        0x0102000810004000L, 0x0102000810004001L, 0x0102000810004080L, 0x0102000810004081L,
252        0x0102000810200000L, 0x0102000810200001L, 0x0102000810200080L, 0x0102000810200081L,
253        0x0102000810204000L, 0x0102000810204001L, 0x0102000810204080L, 0x0102000810204081L,
254        0x0102040000000000L, 0x0102040000000001L, 0x0102040000000080L, 0x0102040000000081L,
255        0x0102040000004000L, 0x0102040000004001L, 0x0102040000004080L, 0x0102040000004081L,
256        0x0102040000200000L, 0x0102040000200001L, 0x0102040000200080L, 0x0102040000200081L,
257        0x0102040000204000L, 0x0102040000204001L, 0x0102040000204080L, 0x0102040000204081L,
258        0x0102040010000000L, 0x0102040010000001L, 0x0102040010000080L, 0x0102040010000081L,
259        0x0102040010004000L, 0x0102040010004001L, 0x0102040010004080L, 0x0102040010004081L,
260        0x0102040010200000L, 0x0102040010200001L, 0x0102040010200080L, 0x0102040010200081L,
261        0x0102040010204000L, 0x0102040010204001L, 0x0102040010204080L, 0x0102040010204081L,
262        0x0102040800000000L, 0x0102040800000001L, 0x0102040800000080L, 0x0102040800000081L,
263        0x0102040800004000L, 0x0102040800004001L, 0x0102040800004080L, 0x0102040800004081L,
264        0x0102040800200000L, 0x0102040800200001L, 0x0102040800200080L, 0x0102040800200081L,
265        0x0102040800204000L, 0x0102040800204001L, 0x0102040800204080L, 0x0102040800204081L,
266        0x0102040810000000L, 0x0102040810000001L, 0x0102040810000080L, 0x0102040810000081L,
267        0x0102040810004000L, 0x0102040810004001L, 0x0102040810004080L, 0x0102040810004081L,
268        0x0102040810200000L, 0x0102040810200001L, 0x0102040810200080L, 0x0102040810200081L,
269        0x0102040810204000L, 0x0102040810204001L, 0x0102040810204080L, 0x0102040810204081L
270    };
271
272    // For toString(); must have length 64
273    private static final String ZEROES = "0000000000000000000000000000000000000000000000000000000000000000";
274
275    final static byte[] bitLengths =
276    {
277        0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
278        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
279        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
280        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
281        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
282        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
283        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
284        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
285        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
286        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
287        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
288        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
289        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
290        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
291        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
292        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8
293    };
294
295    // TODO make m fixed for the LongArray, and hence compute T once and for all
296
297    private long[] m_ints;
298
299    public LongArray(int intLen)
300    {
301        m_ints = new long[intLen];
302    }
303
304    public LongArray(long[] ints)
305    {
306        m_ints = ints;
307    }
308
309    public LongArray(long[] ints, int off, int len)
310    {
311        if (off == 0 && len == ints.length)
312        {
313            m_ints = ints;
314        }
315        else
316        {
317            m_ints = new long[len];
318            System.arraycopy(ints, off, m_ints, 0, len);
319        }
320    }
321
322    public LongArray(BigInteger bigInt)
323    {
324        if (bigInt == null || bigInt.signum() < 0)
325        {
326            throw new IllegalArgumentException("invalid F2m field value");
327        }
328
329        if (bigInt.signum() == 0)
330        {
331            m_ints = new long[] { 0L };
332            return;
333        }
334
335        byte[] barr = bigInt.toByteArray();
336        int barrLen = barr.length;
337        int barrStart = 0;
338        if (barr[0] == 0)
339        {
340            // First byte is 0 to enforce highest (=sign) bit is zero.
341            // In this case ignore barr[0].
342            barrLen--;
343            barrStart = 1;
344        }
345        int intLen = (barrLen + 7) / 8;
346        m_ints = new long[intLen];
347
348        int iarrJ = intLen - 1;
349        int rem = barrLen % 8 + barrStart;
350        long temp = 0;
351        int barrI = barrStart;
352        if (barrStart < rem)
353        {
354            for (; barrI < rem; barrI++)
355            {
356                temp <<= 8;
357                int barrBarrI = barr[barrI] & 0xFF;
358                temp |= barrBarrI;
359            }
360            m_ints[iarrJ--] = temp;
361        }
362
363        for (; iarrJ >= 0; iarrJ--)
364        {
365            temp = 0;
366            for (int i = 0; i < 8; i++)
367            {
368                temp <<= 8;
369                int barrBarrI = barr[barrI++] & 0xFF;
370                temp |= barrBarrI;
371            }
372            m_ints[iarrJ] = temp;
373        }
374    }
375
376    public boolean isOne()
377    {
378        long[] a = m_ints;
379        if (a[0] != 1L)
380        {
381            return false;
382        }
383        for (int i = 1; i < a.length; ++i)
384        {
385            if (a[i] != 0L)
386            {
387                return false;
388            }
389        }
390        return true;
391    }
392
393    public boolean isZero()
394    {
395        long[] a = m_ints;
396        for (int i = 0; i < a.length; ++i)
397        {
398            if (a[i] != 0L)
399            {
400                return false;
401            }
402        }
403        return true;
404    }
405
406    public int getUsedLength()
407    {
408        return getUsedLengthFrom(m_ints.length);
409    }
410
411    public int getUsedLengthFrom(int from)
412    {
413        long[] a = m_ints;
414        from = Math.min(from, a.length);
415
416        if (from < 1)
417        {
418            return 0;
419        }
420
421        // Check if first element will act as sentinel
422        if (a[0] != 0)
423        {
424            while (a[--from] == 0)
425            {
426            }
427            return from + 1;
428        }
429
430        do
431        {
432            if (a[--from] != 0)
433            {
434                return from + 1;
435            }
436        }
437        while (from > 0);
438
439        return 0;
440    }
441
442    public int degree()
443    {
444        int i = m_ints.length;
445        long w;
446        do
447        {
448            if (i == 0)
449            {
450                return 0;
451            }
452            w = m_ints[--i];
453        }
454        while (w == 0);
455
456        return (i << 6) + bitLength(w);
457    }
458
459    private int degreeFrom(int limit)
460    {
461        int i = (limit + 62) >>> 6;
462        long w;
463        do
464        {
465            if (i == 0)
466            {
467                return 0;
468            }
469            w = m_ints[--i];
470        }
471        while (w == 0);
472
473        return (i << 6) + bitLength(w);
474    }
475
476//    private int lowestCoefficient()
477//    {
478//        for (int i = 0; i < m_ints.length; ++i)
479//        {
480//            long mi = m_ints[i];
481//            if (mi != 0)
482//            {
483//                int j = 0;
484//                while ((mi & 0xFFL) == 0)
485//                {
486//                    j += 8;
487//                    mi >>>= 8;
488//                }
489//                while ((mi & 1L) == 0)
490//                {
491//                    ++j;
492//                    mi >>>= 1;
493//                }
494//                return (i << 6) + j;
495//            }
496//        }
497//        return -1;
498//    }
499
500    private static int bitLength(long w)
501    {
502        int u = (int)(w >>> 32), b;
503        if (u == 0)
504        {
505            u = (int)w;
506            b = 0;
507        }
508        else
509        {
510            b = 32;
511        }
512
513        int t = u >>> 16, k;
514        if (t == 0)
515        {
516            t = u >>> 8;
517            k = (t == 0) ? bitLengths[u] : 8 + bitLengths[t];
518        }
519        else
520        {
521            int v = t >>> 8;
522            k = (v == 0) ? 16 + bitLengths[t] : 24 + bitLengths[v];
523        }
524
525        return b + k;
526    }
527
528    private long[] resizedInts(int newLen)
529    {
530        long[] newInts = new long[newLen];
531        System.arraycopy(m_ints, 0, newInts, 0, Math.min(m_ints.length, newLen));
532        return newInts;
533    }
534
535    public BigInteger toBigInteger()
536    {
537        int usedLen = getUsedLength();
538        if (usedLen == 0)
539        {
540            return ECConstants.ZERO;
541        }
542
543        long highestInt = m_ints[usedLen - 1];
544        byte[] temp = new byte[8];
545        int barrI = 0;
546        boolean trailingZeroBytesDone = false;
547        for (int j = 7; j >= 0; j--)
548        {
549            byte thisByte = (byte)(highestInt >>> (8 * j));
550            if (trailingZeroBytesDone || (thisByte != 0))
551            {
552                trailingZeroBytesDone = true;
553                temp[barrI++] = thisByte;
554            }
555        }
556
557        int barrLen = 8 * (usedLen - 1) + barrI;
558        byte[] barr = new byte[barrLen];
559        for (int j = 0; j < barrI; j++)
560        {
561            barr[j] = temp[j];
562        }
563        // Highest value int is done now
564
565        for (int iarrJ = usedLen - 2; iarrJ >= 0; iarrJ--)
566        {
567            long mi = m_ints[iarrJ];
568            for (int j = 7; j >= 0; j--)
569            {
570                barr[barrI++] = (byte)(mi >>> (8 * j));
571            }
572        }
573        return new BigInteger(1, barr);
574    }
575
576//    private static long shiftUp(long[] x, int xOff, int count)
577//    {
578//        long prev = 0;
579//        for (int i = 0; i < count; ++i)
580//        {
581//            long next = x[xOff + i];
582//            x[xOff + i] = (next << 1) | prev;
583//            prev = next >>> 63;
584//        }
585//        return prev;
586//    }
587
588    private static long shiftUp(long[] x, int xOff, int count, int shift)
589    {
590        int shiftInv = 64 - shift;
591        long prev = 0;
592        for (int i = 0; i < count; ++i)
593        {
594            long next = x[xOff + i];
595            x[xOff + i] = (next << shift) | prev;
596            prev = next >>> shiftInv;
597        }
598        return prev;
599    }
600
601    private static long shiftUp(long[] x, int xOff, long[] z, int zOff, int count, int shift)
602    {
603        int shiftInv = 64 - shift;
604        long prev = 0;
605        for (int i = 0; i < count; ++i)
606        {
607            long next = x[xOff + i];
608            z[zOff + i] = (next << shift) | prev;
609            prev = next >>> shiftInv;
610        }
611        return prev;
612    }
613
614    public LongArray addOne()
615    {
616        if (m_ints.length == 0)
617        {
618            return new LongArray(new long[]{ 1L });
619        }
620
621        int resultLen = Math.max(1, getUsedLength());
622        long[] ints = resizedInts(resultLen);
623        ints[0] ^= 1L;
624        return new LongArray(ints);
625    }
626
627//    private void addShiftedByBits(LongArray other, int bits)
628//    {
629//        int words = bits >>> 6;
630//        int shift = bits & 0x3F;
631//
632//        if (shift == 0)
633//        {
634//            addShiftedByWords(other, words);
635//            return;
636//        }
637//
638//        int otherUsedLen = other.getUsedLength();
639//        if (otherUsedLen == 0)
640//        {
641//            return;
642//        }
643//
644//        int minLen = otherUsedLen + words + 1;
645//        if (minLen > m_ints.length)
646//        {
647//            m_ints = resizedInts(minLen);
648//        }
649//
650//        long carry = addShiftedByBits(m_ints, words, other.m_ints, 0, otherUsedLen, shift);
651//        m_ints[otherUsedLen + words] ^= carry;
652//    }
653
654    private void addShiftedByBitsSafe(LongArray other, int otherDegree, int bits)
655    {
656        int otherLen = (otherDegree + 63) >>> 6;
657
658        int words = bits >>> 6;
659        int shift = bits & 0x3F;
660
661        if (shift == 0)
662        {
663            add(m_ints, words, other.m_ints, 0, otherLen);
664            return;
665        }
666
667        long carry = addShiftedUp(m_ints, words, other.m_ints, 0, otherLen, shift);
668        if (carry != 0L)
669        {
670            m_ints[otherLen + words] ^= carry;
671        }
672    }
673
674    private static long addShiftedUp(long[] x, int xOff, long[] y, int yOff, int count, int shift)
675    {
676        int shiftInv = 64 - shift;
677        long prev = 0;
678        for (int i = 0; i < count; ++i)
679        {
680            long next = y[yOff + i];
681            x[xOff + i] ^= (next << shift) | prev;
682            prev = next >>> shiftInv;
683        }
684        return prev;
685    }
686
687    private static long addShiftedDown(long[] x, int xOff, long[] y, int yOff, int count, int shift)
688    {
689        int shiftInv = 64 - shift;
690        long prev = 0;
691        int i = count;
692        while (--i >= 0)
693        {
694            long next = y[yOff + i];
695            x[xOff + i] ^= (next >>> shift) | prev;
696            prev = next << shiftInv;
697        }
698        return prev;
699    }
700
701    public void addShiftedByWords(LongArray other, int words)
702    {
703        int otherUsedLen = other.getUsedLength();
704        if (otherUsedLen == 0)
705        {
706            return;
707        }
708
709        int minLen = otherUsedLen + words;
710        if (minLen > m_ints.length)
711        {
712            m_ints = resizedInts(minLen);
713        }
714
715        add(m_ints, words, other.m_ints, 0, otherUsedLen);
716    }
717
718    private static void add(long[] x, int xOff, long[] y, int yOff, int count)
719    {
720        for (int i = 0; i < count; ++i)
721        {
722            x[xOff + i] ^= y[yOff + i];
723        }
724    }
725
726    private static void add(long[] x, int xOff, long[] y, int yOff, long[] z, int zOff, int count)
727    {
728        for (int i = 0; i < count; ++i)
729        {
730            z[zOff + i] = x[xOff + i] ^ y[yOff + i];
731        }
732    }
733
734    private static void addBoth(long[] x, int xOff, long[] y1, int y1Off, long[] y2, int y2Off, int count)
735    {
736        for (int i = 0; i < count; ++i)
737        {
738            x[xOff + i] ^= y1[y1Off + i] ^ y2[y2Off + i];
739        }
740    }
741
742    private static void distribute(long[] x, int src, int dst1, int dst2, int count)
743    {
744        for (int i = 0; i < count; ++i)
745        {
746            long v = x[src + i];
747            x[dst1 + i] ^= v;
748            x[dst2 + i] ^= v;
749        }
750    }
751
752    public int getLength()
753    {
754        return m_ints.length;
755    }
756
757    private static void flipWord(long[] buf, int off, int bit, long word)
758    {
759        int n = off + (bit >>> 6);
760        int shift = bit & 0x3F;
761        if (shift == 0)
762        {
763            buf[n] ^= word;
764        }
765        else
766        {
767            buf[n] ^= word << shift;
768            word >>>= (64 - shift);
769            if (word != 0)
770            {
771                buf[++n] ^= word;
772            }
773        }
774    }
775
776//    private static long getWord(long[] buf, int off, int len, int bit)
777//    {
778//        int n = off + (bit >>> 6);
779//        int shift = bit & 0x3F;
780//        if (shift == 0)
781//        {
782//            return buf[n];
783//        }
784//        long result = buf[n] >>> shift;
785//        if (++n < len)
786//        {
787//            result |= buf[n] << (64 - shift);
788//        }
789//        return result;
790//    }
791
792    public boolean testBitZero()
793    {
794        return m_ints.length > 0 && (m_ints[0] & 1L) != 0;
795    }
796
797    private static boolean testBit(long[] buf, int off, int n)
798    {
799        // theInt = n / 64
800        int theInt = n >>> 6;
801        // theBit = n % 64
802        int theBit = n & 0x3F;
803        long tester = 1L << theBit;
804        return (buf[off + theInt] & tester) != 0;
805    }
806
807    private static void flipBit(long[] buf, int off, int n)
808    {
809        // theInt = n / 64
810        int theInt = n >>> 6;
811        // theBit = n % 64
812        int theBit = n & 0x3F;
813        long flipper = 1L << theBit;
814        buf[off + theInt] ^= flipper;
815    }
816
817//    private static void setBit(long[] buf, int off, int n)
818//    {
819//        // theInt = n / 64
820//        int theInt = n >>> 6;
821//        // theBit = n % 64
822//        int theBit = n & 0x3F;
823//        long setter = 1L << theBit;
824//        buf[off + theInt] |= setter;
825//    }
826//
827//    private static void clearBit(long[] buf, int off, int n)
828//    {
829//        // theInt = n / 64
830//        int theInt = n >>> 6;
831//        // theBit = n % 64
832//        int theBit = n & 0x3F;
833//        long setter = 1L << theBit;
834//        buf[off + theInt] &= ~setter;
835//    }
836
837    private static void multiplyWord(long a, long[] b, int bLen, long[] c, int cOff)
838    {
839        if ((a & 1L) != 0L)
840        {
841            add(c, cOff, b, 0, bLen);
842        }
843        int k = 1;
844        while ((a >>>= 1) != 0L)
845        {
846            if ((a & 1L) != 0L)
847            {
848                long carry = addShiftedUp(c, cOff, b, 0, bLen, k);
849                if (carry != 0L)
850                {
851                    c[cOff + bLen] ^= carry;
852                }
853            }
854            ++k;
855        }
856    }
857
858    public LongArray modMultiplyLD(LongArray other, int m, int[] ks)
859    {
860        /*
861         * Find out the degree of each argument and handle the zero cases
862         */
863        int aDeg = degree();
864        if (aDeg == 0)
865        {
866            return this;
867        }
868        int bDeg = other.degree();
869        if (bDeg == 0)
870        {
871            return other;
872        }
873
874        /*
875         * Swap if necessary so that A is the smaller argument
876         */
877        LongArray A = this, B = other;
878        if (aDeg > bDeg)
879        {
880            A = other; B = this;
881            int tmp = aDeg; aDeg = bDeg; bDeg = tmp;
882        }
883
884        /*
885         * Establish the word lengths of the arguments and result
886         */
887        int aLen = (aDeg + 63) >>> 6;
888        int bLen = (bDeg + 63) >>> 6;
889        int cLen = (aDeg + bDeg + 62) >>> 6;
890
891        if (aLen == 1)
892        {
893            long a0 = A.m_ints[0];
894            if (a0 == 1L)
895            {
896                return B;
897            }
898
899            /*
900             * Fast path for small A, with performance dependent only on the number of set bits
901             */
902            long[] c0 = new long[cLen];
903            multiplyWord(a0, B.m_ints, bLen, c0, 0);
904
905            /*
906             * Reduce the raw answer against the reduction coefficients
907             */
908            return reduceResult(c0, 0, cLen, m, ks);
909        }
910
911        /*
912         * Determine if B will get bigger during shifting
913         */
914        int bMax = (bDeg + 7 + 63) >>> 6;
915
916        /*
917         * Lookup table for the offset of each B in the tables
918         */
919        int[] ti = new int[16];
920
921        /*
922         * Precompute table of all 4-bit products of B
923         */
924        long[] T0 = new long[bMax << 4];
925        int tOff = bMax;
926        ti[1] = tOff;
927        System.arraycopy(B.m_ints, 0, T0, tOff, bLen);
928        for (int i = 2; i < 16; ++i)
929        {
930            ti[i] = (tOff += bMax);
931            if ((i & 1) == 0)
932            {
933                shiftUp(T0, tOff >>> 1, T0, tOff, bMax, 1);
934            }
935            else
936            {
937                add(T0, bMax, T0, tOff - bMax, T0, tOff, bMax);
938            }
939        }
940
941        /*
942         * Second table with all 4-bit products of B shifted 4 bits
943         */
944        long[] T1 = new long[T0.length];
945        shiftUp(T0, 0, T1, 0, T0.length, 4);
946//        shiftUp(T0, bMax, T1, bMax, tOff, 4);
947
948        long[] a = A.m_ints;
949        long[] c = new long[cLen];
950
951        int MASK = 0xF;
952
953        /*
954         * Lopez-Dahab algorithm
955         */
956
957        for (int k = 56; k >= 0; k -= 8)
958        {
959            for (int j = 1; j < aLen; j += 2)
960            {
961                int aVal = (int)(a[j] >>> k);
962                int u = aVal & MASK;
963                int v = (aVal >>> 4) & MASK;
964                addBoth(c, j - 1, T0, ti[u], T1, ti[v], bMax);
965            }
966            shiftUp(c, 0, cLen, 8);
967        }
968
969        for (int k = 56; k >= 0; k -= 8)
970        {
971            for (int j = 0; j < aLen; j += 2)
972            {
973                int aVal = (int)(a[j] >>> k);
974                int u = aVal & MASK;
975                int v = (aVal >>> 4) & MASK;
976                addBoth(c, j, T0, ti[u], T1, ti[v], bMax);
977            }
978            if (k > 0)
979            {
980                shiftUp(c, 0, cLen, 8);
981            }
982        }
983
984        /*
985         * Finally the raw answer is collected, reduce it against the reduction coefficients
986         */
987        return reduceResult(c, 0, cLen, m, ks);
988    }
989
990    public LongArray modMultiply(LongArray other, int m, int[] ks)
991    {
992        /*
993         * Find out the degree of each argument and handle the zero cases
994         */
995        int aDeg = degree();
996        if (aDeg == 0)
997        {
998            return this;
999        }
1000        int bDeg = other.degree();
1001        if (bDeg == 0)
1002        {
1003            return other;
1004        }
1005
1006        /*
1007         * Swap if necessary so that A is the smaller argument
1008         */
1009        LongArray A = this, B = other;
1010        if (aDeg > bDeg)
1011        {
1012            A = other; B = this;
1013            int tmp = aDeg; aDeg = bDeg; bDeg = tmp;
1014        }
1015
1016        /*
1017         * Establish the word lengths of the arguments and result
1018         */
1019        int aLen = (aDeg + 63) >>> 6;
1020        int bLen = (bDeg + 63) >>> 6;
1021        int cLen = (aDeg + bDeg + 62) >>> 6;
1022
1023        if (aLen == 1)
1024        {
1025            long a0 = A.m_ints[0];
1026            if (a0 == 1L)
1027            {
1028                return B;
1029            }
1030
1031            /*
1032             * Fast path for small A, with performance dependent only on the number of set bits
1033             */
1034            long[] c0 = new long[cLen];
1035            multiplyWord(a0, B.m_ints, bLen, c0, 0);
1036
1037            /*
1038             * Reduce the raw answer against the reduction coefficients
1039             */
1040            return reduceResult(c0, 0, cLen, m, ks);
1041        }
1042
1043        /*
1044         * Determine if B will get bigger during shifting
1045         */
1046        int bMax = (bDeg + 7 + 63) >>> 6;
1047
1048        /*
1049         * Lookup table for the offset of each B in the tables
1050         */
1051        int[] ti = new int[16];
1052
1053        /*
1054         * Precompute table of all 4-bit products of B
1055         */
1056        long[] T0 = new long[bMax << 4];
1057        int tOff = bMax;
1058        ti[1] = tOff;
1059        System.arraycopy(B.m_ints, 0, T0, tOff, bLen);
1060        for (int i = 2; i < 16; ++i)
1061        {
1062            ti[i] = (tOff += bMax);
1063            if ((i & 1) == 0)
1064            {
1065                shiftUp(T0, tOff >>> 1, T0, tOff, bMax, 1);
1066            }
1067            else
1068            {
1069                add(T0, bMax, T0, tOff - bMax, T0, tOff, bMax);
1070            }
1071        }
1072
1073        /*
1074         * Second table with all 4-bit products of B shifted 4 bits
1075         */
1076        long[] T1 = new long[T0.length];
1077        shiftUp(T0, 0, T1, 0, T0.length, 4);
1078//        shiftUp(T0, bMax, T1, bMax, tOff, 4);
1079
1080        long[] a = A.m_ints;
1081        long[] c = new long[cLen << 3];
1082
1083        int MASK = 0xF;
1084
1085        /*
1086         * Lopez-Dahab (Modified) algorithm
1087         */
1088
1089        for (int aPos = 0; aPos < aLen; ++aPos)
1090        {
1091            long aVal = a[aPos];
1092            int cOff = aPos;
1093            for (;;)
1094            {
1095                int u = (int)aVal & MASK;
1096                aVal >>>= 4;
1097                int v = (int)aVal & MASK;
1098                addBoth(c, cOff, T0, ti[u], T1, ti[v], bMax);
1099                aVal >>>= 4;
1100                if (aVal == 0L)
1101                {
1102                    break;
1103                }
1104                cOff += cLen;
1105            }
1106        }
1107
1108        {
1109            int cOff = c.length;
1110            while ((cOff -= cLen) != 0)
1111            {
1112                addShiftedUp(c, cOff - cLen, c, cOff, cLen, 8);
1113            }
1114        }
1115
1116        /*
1117         * Finally the raw answer is collected, reduce it against the reduction coefficients
1118         */
1119        return reduceResult(c, 0, cLen, m, ks);
1120    }
1121
1122    public LongArray modMultiplyAlt(LongArray other, int m, int[] ks)
1123    {
1124        /*
1125         * Find out the degree of each argument and handle the zero cases
1126         */
1127        int aDeg = degree();
1128        if (aDeg == 0)
1129        {
1130            return this;
1131        }
1132        int bDeg = other.degree();
1133        if (bDeg == 0)
1134        {
1135            return other;
1136        }
1137
1138        /*
1139         * Swap if necessary so that A is the smaller argument
1140         */
1141        LongArray A = this, B = other;
1142        if (aDeg > bDeg)
1143        {
1144            A = other; B = this;
1145            int tmp = aDeg; aDeg = bDeg; bDeg = tmp;
1146        }
1147
1148        /*
1149         * Establish the word lengths of the arguments and result
1150         */
1151        int aLen = (aDeg + 63) >>> 6;
1152        int bLen = (bDeg + 63) >>> 6;
1153        int cLen = (aDeg + bDeg + 62) >>> 6;
1154
1155        if (aLen == 1)
1156        {
1157            long a0 = A.m_ints[0];
1158            if (a0 == 1L)
1159            {
1160                return B;
1161            }
1162
1163            /*
1164             * Fast path for small A, with performance dependent only on the number of set bits
1165             */
1166            long[] c0 = new long[cLen];
1167            multiplyWord(a0, B.m_ints, bLen, c0, 0);
1168
1169            /*
1170             * Reduce the raw answer against the reduction coefficients
1171             */
1172            return reduceResult(c0, 0, cLen, m, ks);
1173        }
1174
1175        // NOTE: This works, but is slower than width 4 processing
1176//        if (aLen == 2)
1177//        {
1178//            /*
1179//             * Use common-multiplicand optimization to save ~1/4 of the adds
1180//             */
1181//            long a1 = A.m_ints[0], a2 = A.m_ints[1];
1182//            long aa = a1 & a2; a1 ^= aa; a2 ^= aa;
1183//
1184//            long[] b = B.m_ints;
1185//            long[] c = new long[cLen];
1186//            multiplyWord(aa, b, bLen, c, 1);
1187//            add(c, 0, c, 1, cLen - 1);
1188//            multiplyWord(a1, b, bLen, c, 0);
1189//            multiplyWord(a2, b, bLen, c, 1);
1190//
1191//            /*
1192//             * Reduce the raw answer against the reduction coefficients
1193//             */
1194//            return reduceResult(c, 0, cLen, m, ks);
1195//        }
1196
1197        /*
1198         * Determine the parameters of the interleaved window algorithm: the 'width' in bits to
1199         * process together, the number of evaluation 'positions' implied by that width, and the
1200         * 'top' position at which the regular window algorithm stops.
1201         */
1202        int width, positions, top, banks;
1203
1204        // NOTE: width 4 is the fastest over the entire range of sizes used in current crypto
1205//        width = 1; positions = 64; top = 64; banks = 4;
1206//        width = 2; positions = 32; top = 64; banks = 4;
1207//        width = 3; positions = 21; top = 63; banks = 3;
1208        width = 4; positions = 16; top = 64; banks = 8;
1209//        width = 5; positions = 13; top = 65; banks = 7;
1210//        width = 7; positions = 9; top = 63; banks = 9;
1211//        width = 8; positions = 8; top = 64; banks = 8;
1212
1213        /*
1214         * Determine if B will get bigger during shifting
1215         */
1216        int shifts = top < 64 ? positions : positions - 1;
1217        int bMax = (bDeg + shifts + 63) >>> 6;
1218
1219        int bTotal = bMax * banks, stride = width * banks;
1220
1221        /*
1222         * Create a single temporary buffer, with an offset table to find the positions of things in it
1223         */
1224        int[] ci = new int[1 << width];
1225        int cTotal = aLen;
1226        {
1227            ci[0] = cTotal;
1228            cTotal += bTotal;
1229            ci[1] = cTotal;
1230            for (int i = 2; i < ci.length; ++i)
1231            {
1232                cTotal += cLen;
1233                ci[i] = cTotal;
1234            }
1235            cTotal += cLen;
1236        }
1237        // NOTE: Provide a safe dump for "high zeroes" since we are adding 'bMax' and not 'bLen'
1238        ++cTotal;
1239
1240        long[] c = new long[cTotal];
1241
1242        // Prepare A in interleaved form, according to the chosen width
1243        interleave(A.m_ints, 0, c, 0, aLen, width);
1244
1245        // Make a working copy of B, since we will be shifting it
1246        {
1247            int bOff = aLen;
1248            System.arraycopy(B.m_ints, 0, c, bOff, bLen);
1249            for (int bank = 1; bank < banks; ++bank)
1250            {
1251                shiftUp(c, aLen, c, bOff += bMax, bMax, bank);
1252            }
1253        }
1254
1255        /*
1256         * The main loop analyzes the interleaved windows in A, and for each non-zero window
1257         * a single word-array XOR is performed to a carefully selected slice of 'c'. The loop is
1258         * breadth-first, checking the lowest window in each word, then looping again for the
1259         * next higher window position.
1260         */
1261        int MASK = (1 << width) - 1;
1262
1263        int k = 0;
1264        for (;;)
1265        {
1266            int aPos = 0;
1267            do
1268            {
1269                long aVal = c[aPos] >>> k;
1270                int bank = 0, bOff = aLen;
1271                for (;;)
1272                {
1273                    int index = (int)(aVal) & MASK;
1274                    if (index != 0)
1275                    {
1276                        /*
1277                         * Add to a 'c' buffer based on the bit-pattern of 'index'. Since A is in
1278                         * interleaved form, the bits represent the current B shifted by 0, 'positions',
1279                         * 'positions' * 2, ..., 'positions' * ('width' - 1)
1280                         */
1281                        add(c, aPos + ci[index], c, bOff, bMax);
1282                    }
1283                    if (++bank == banks)
1284                    {
1285                        break;
1286                    }
1287                    bOff += bMax;
1288                    aVal >>>= width;
1289                }
1290            }
1291            while (++aPos < aLen);
1292
1293            if ((k += stride) >= top)
1294            {
1295                if (k >= 64)
1296                {
1297                    break;
1298                }
1299
1300                /*
1301                 * Adjustment for window setups with top == 63, the final bit (if any) is processed
1302                 * as the top-bit of a window
1303                 */
1304                k = 64 - width;
1305                MASK &= MASK << (top - k);
1306            }
1307
1308            /*
1309             * After each position has been checked for all words of A, B is shifted up 1 place
1310             */
1311            shiftUp(c, aLen, bTotal, banks);
1312        }
1313
1314        int ciPos = ci.length;
1315        while (--ciPos > 1)
1316        {
1317            if ((ciPos & 1L) == 0L)
1318            {
1319                /*
1320                 * For even numbers, shift contents and add to the half-position
1321                 */
1322                addShiftedUp(c, ci[ciPos >>> 1], c, ci[ciPos], cLen, positions);
1323            }
1324            else
1325            {
1326                /*
1327                 * For odd numbers, 'distribute' contents to the result and the next-lowest position
1328                 */
1329                distribute(c, ci[ciPos], ci[ciPos - 1], ci[1], cLen);
1330            }
1331        }
1332
1333        /*
1334         * Finally the raw answer is collected, reduce it against the reduction coefficients
1335         */
1336        return reduceResult(c, ci[1], cLen, m, ks);
1337    }
1338
1339    public LongArray modReduce(int m, int[] ks)
1340    {
1341        long[] buf = Arrays.clone(m_ints);
1342        int rLen = reduceInPlace(buf, 0, buf.length, m, ks);
1343        return new LongArray(buf, 0, rLen);
1344    }
1345
1346    public LongArray multiply(LongArray other, int m, int[] ks)
1347    {
1348        /*
1349         * Find out the degree of each argument and handle the zero cases
1350         */
1351        int aDeg = degree();
1352        if (aDeg == 0)
1353        {
1354            return this;
1355        }
1356        int bDeg = other.degree();
1357        if (bDeg == 0)
1358        {
1359            return other;
1360        }
1361
1362        /*
1363         * Swap if necessary so that A is the smaller argument
1364         */
1365        LongArray A = this, B = other;
1366        if (aDeg > bDeg)
1367        {
1368            A = other; B = this;
1369            int tmp = aDeg; aDeg = bDeg; bDeg = tmp;
1370        }
1371
1372        /*
1373         * Establish the word lengths of the arguments and result
1374         */
1375        int aLen = (aDeg + 63) >>> 6;
1376        int bLen = (bDeg + 63) >>> 6;
1377        int cLen = (aDeg + bDeg + 62) >>> 6;
1378
1379        if (aLen == 1)
1380        {
1381            long a0 = A.m_ints[0];
1382            if (a0 == 1L)
1383            {
1384                return B;
1385            }
1386
1387            /*
1388             * Fast path for small A, with performance dependent only on the number of set bits
1389             */
1390            long[] c0 = new long[cLen];
1391            multiplyWord(a0, B.m_ints, bLen, c0, 0);
1392
1393            /*
1394             * Reduce the raw answer against the reduction coefficients
1395             */
1396//            return reduceResult(c0, 0, cLen, m, ks);
1397            return new LongArray(c0, 0, cLen);
1398        }
1399
1400        /*
1401         * Determine if B will get bigger during shifting
1402         */
1403        int bMax = (bDeg + 7 + 63) >>> 6;
1404
1405        /*
1406         * Lookup table for the offset of each B in the tables
1407         */
1408        int[] ti = new int[16];
1409
1410        /*
1411         * Precompute table of all 4-bit products of B
1412         */
1413        long[] T0 = new long[bMax << 4];
1414        int tOff = bMax;
1415        ti[1] = tOff;
1416        System.arraycopy(B.m_ints, 0, T0, tOff, bLen);
1417        for (int i = 2; i < 16; ++i)
1418        {
1419            ti[i] = (tOff += bMax);
1420            if ((i & 1) == 0)
1421            {
1422                shiftUp(T0, tOff >>> 1, T0, tOff, bMax, 1);
1423            }
1424            else
1425            {
1426                add(T0, bMax, T0, tOff - bMax, T0, tOff, bMax);
1427            }
1428        }
1429
1430        /*
1431         * Second table with all 4-bit products of B shifted 4 bits
1432         */
1433        long[] T1 = new long[T0.length];
1434        shiftUp(T0, 0, T1, 0, T0.length, 4);
1435//        shiftUp(T0, bMax, T1, bMax, tOff, 4);
1436
1437        long[] a = A.m_ints;
1438        long[] c = new long[cLen << 3];
1439
1440        int MASK = 0xF;
1441
1442        /*
1443         * Lopez-Dahab (Modified) algorithm
1444         */
1445
1446        for (int aPos = 0; aPos < aLen; ++aPos)
1447        {
1448            long aVal = a[aPos];
1449            int cOff = aPos;
1450            for (;;)
1451            {
1452                int u = (int)aVal & MASK;
1453                aVal >>>= 4;
1454                int v = (int)aVal & MASK;
1455                addBoth(c, cOff, T0, ti[u], T1, ti[v], bMax);
1456                aVal >>>= 4;
1457                if (aVal == 0L)
1458                {
1459                    break;
1460                }
1461                cOff += cLen;
1462            }
1463        }
1464
1465        {
1466            int cOff = c.length;
1467            while ((cOff -= cLen) != 0)
1468            {
1469                addShiftedUp(c, cOff - cLen, c, cOff, cLen, 8);
1470            }
1471        }
1472
1473        /*
1474         * Finally the raw answer is collected, reduce it against the reduction coefficients
1475         */
1476//        return reduceResult(c, 0, cLen, m, ks);
1477        return new LongArray(c, 0, cLen);
1478    }
1479
1480    public void reduce(int m, int[] ks)
1481    {
1482        long[] buf = m_ints;
1483        int rLen = reduceInPlace(buf, 0, buf.length, m, ks);
1484        if (rLen < buf.length)
1485        {
1486            m_ints = new long[rLen];
1487            System.arraycopy(buf, 0, m_ints, 0, rLen);
1488        }
1489    }
1490
1491    private static LongArray reduceResult(long[] buf, int off, int len, int m, int[] ks)
1492    {
1493        int rLen = reduceInPlace(buf, off, len, m, ks);
1494        return new LongArray(buf, off, rLen);
1495    }
1496
1497//    private static void deInterleave(long[] x, int xOff, long[] z, int zOff, int count, int rounds)
1498//    {
1499//        for (int i = 0; i < count; ++i)
1500//        {
1501//            z[zOff + i] = deInterleave(x[zOff + i], rounds);
1502//        }
1503//    }
1504//
1505//    private static long deInterleave(long x, int rounds)
1506//    {
1507//        while (--rounds >= 0)
1508//        {
1509//            x = deInterleave32(x & DEINTERLEAVE_MASK) | (deInterleave32((x >>> 1) & DEINTERLEAVE_MASK) << 32);
1510//        }
1511//        return x;
1512//    }
1513//
1514//    private static long deInterleave32(long x)
1515//    {
1516//        x = (x | (x >>> 1)) & 0x3333333333333333L;
1517//        x = (x | (x >>> 2)) & 0x0F0F0F0F0F0F0F0FL;
1518//        x = (x | (x >>> 4)) & 0x00FF00FF00FF00FFL;
1519//        x = (x | (x >>> 8)) & 0x0000FFFF0000FFFFL;
1520//        x = (x | (x >>> 16)) & 0x00000000FFFFFFFFL;
1521//        return x;
1522//    }
1523
1524    private static int reduceInPlace(long[] buf, int off, int len, int m, int[] ks)
1525    {
1526        int mLen = (m + 63) >>> 6;
1527        if (len < mLen)
1528        {
1529            return len;
1530        }
1531
1532        int numBits = Math.min(len << 6, (m << 1) - 1); // TODO use actual degree?
1533        int excessBits = (len << 6) - numBits;
1534        while (excessBits >= 64)
1535        {
1536            --len;
1537            excessBits -= 64;
1538        }
1539
1540        int kLen = ks.length, kMax = ks[kLen - 1], kNext = kLen > 1 ? ks[kLen - 2] : 0;
1541        int wordWiseLimit = Math.max(m, kMax + 64);
1542        int vectorableWords = (excessBits + Math.min(numBits - wordWiseLimit, m - kNext)) >> 6;
1543        if (vectorableWords > 1)
1544        {
1545            int vectorWiseWords = len - vectorableWords;
1546            reduceVectorWise(buf, off, len, vectorWiseWords, m, ks);
1547            while (len > vectorWiseWords)
1548            {
1549                buf[off + --len] = 0L;
1550            }
1551            numBits = vectorWiseWords << 6;
1552        }
1553
1554        if (numBits > wordWiseLimit)
1555        {
1556            reduceWordWise(buf, off, len, wordWiseLimit, m, ks);
1557            numBits = wordWiseLimit;
1558        }
1559
1560        if (numBits > m)
1561        {
1562            reduceBitWise(buf, off, numBits, m, ks);
1563        }
1564
1565        return mLen;
1566    }
1567
1568    private static void reduceBitWise(long[] buf, int off, int bitlength, int m, int[] ks)
1569    {
1570        while (--bitlength >= m)
1571        {
1572            if (testBit(buf, off, bitlength))
1573            {
1574                reduceBit(buf, off, bitlength, m, ks);
1575            }
1576        }
1577    }
1578
1579    private static void reduceBit(long[] buf, int off, int bit, int m, int[] ks)
1580    {
1581        flipBit(buf, off, bit);
1582        int n = bit - m;
1583        int j = ks.length;
1584        while (--j >= 0)
1585        {
1586            flipBit(buf, off, ks[j] + n);
1587        }
1588        flipBit(buf, off, n);
1589    }
1590
1591    private static void reduceWordWise(long[] buf, int off, int len, int toBit, int m, int[] ks)
1592    {
1593        int toPos = toBit >>> 6;
1594
1595        while (--len > toPos)
1596        {
1597            long word = buf[off + len];
1598            if (word != 0)
1599            {
1600                buf[off + len] = 0;
1601                reduceWord(buf, off, (len << 6), word, m, ks);
1602            }
1603        }
1604
1605        {
1606            int partial = toBit & 0x3F;
1607            long word = buf[off + toPos] >>> partial;
1608            if (word != 0)
1609            {
1610                buf[off + toPos] ^= word << partial;
1611                reduceWord(buf, off, toBit, word, m, ks);
1612            }
1613        }
1614    }
1615
1616    private static void reduceWord(long[] buf, int off, int bit, long word, int m, int[] ks)
1617    {
1618        int offset = bit - m;
1619        int j = ks.length;
1620        while (--j >= 0)
1621        {
1622            flipWord(buf, off, offset + ks[j], word);
1623        }
1624        flipWord(buf, off, offset, word);
1625    }
1626
1627    private static void reduceVectorWise(long[] buf, int off, int len, int words, int m, int[] ks)
1628    {
1629        /*
1630         * NOTE: It's important we go from highest coefficient to lowest, because for the highest
1631         * one (only) we allow the ranges to partially overlap, and therefore any changes must take
1632         * effect for the subsequent lower coefficients.
1633         */
1634        int baseBit = (words << 6) - m;
1635        int j = ks.length;
1636        while (--j >= 0)
1637        {
1638            flipVector(buf, off, buf, off + words, len - words, baseBit + ks[j]);
1639        }
1640        flipVector(buf, off, buf, off + words, len - words, baseBit);
1641    }
1642
1643    private static void flipVector(long[] x, int xOff, long[] y, int yOff, int yLen, int bits)
1644    {
1645        xOff += bits >>> 6;
1646        bits &= 0x3F;
1647
1648        if (bits == 0)
1649        {
1650            add(x, xOff, y, yOff, yLen);
1651        }
1652        else
1653        {
1654            long carry = addShiftedDown(x, xOff + 1, y, yOff, yLen, 64 - bits);
1655            x[xOff] ^= carry;
1656        }
1657    }
1658
1659    public LongArray modSquare(int m, int[] ks)
1660    {
1661        int len = getUsedLength();
1662        if (len == 0)
1663        {
1664            return this;
1665        }
1666
1667        int _2len = len << 1;
1668        long[] r = new long[_2len];
1669
1670        int pos = 0;
1671        while (pos < _2len)
1672        {
1673            long mi = m_ints[pos >>> 1];
1674            r[pos++] = interleave2_32to64((int)mi);
1675            r[pos++] = interleave2_32to64((int)(mi >>> 32));
1676        }
1677
1678        return new LongArray(r, 0, reduceInPlace(r, 0, r.length, m, ks));
1679    }
1680
1681    public LongArray modSquareN(int n, int m, int[] ks)
1682    {
1683        int len = getUsedLength();
1684        if (len == 0)
1685        {
1686            return this;
1687        }
1688
1689        int mLen = (m + 63) >>> 6;
1690        long[] r = new long[mLen << 1];
1691        System.arraycopy(m_ints, 0, r, 0, len);
1692
1693        while (--n >= 0)
1694        {
1695            squareInPlace(r, len, m, ks);
1696            len = reduceInPlace(r, 0, r.length, m, ks);
1697        }
1698
1699        return new LongArray(r, 0, len);
1700    }
1701
1702    public LongArray square(int m, int[] ks)
1703    {
1704        int len = getUsedLength();
1705        if (len == 0)
1706        {
1707            return this;
1708        }
1709
1710        int _2len = len << 1;
1711        long[] r = new long[_2len];
1712
1713        int pos = 0;
1714        while (pos < _2len)
1715        {
1716            long mi = m_ints[pos >>> 1];
1717            r[pos++] = interleave2_32to64((int)mi);
1718            r[pos++] = interleave2_32to64((int)(mi >>> 32));
1719        }
1720
1721        return new LongArray(r, 0, r.length);
1722    }
1723
1724    private static void squareInPlace(long[] x, int xLen, int m, int[] ks)
1725    {
1726        int pos = xLen << 1;
1727        while (--xLen >= 0)
1728        {
1729            long xVal = x[xLen];
1730            x[--pos] = interleave2_32to64((int)(xVal >>> 32));
1731            x[--pos] = interleave2_32to64((int)xVal);
1732        }
1733    }
1734
1735    private static void interleave(long[] x, int xOff, long[] z, int zOff, int count, int width)
1736    {
1737        switch (width)
1738        {
1739        case 3:
1740            interleave3(x, xOff, z, zOff, count);
1741            break;
1742        case 5:
1743            interleave5(x, xOff, z, zOff, count);
1744            break;
1745        case 7:
1746            interleave7(x, xOff, z, zOff, count);
1747            break;
1748        default:
1749            interleave2_n(x, xOff, z, zOff, count, bitLengths[width] - 1);
1750            break;
1751        }
1752    }
1753
1754    private static void interleave3(long[] x, int xOff, long[] z, int zOff, int count)
1755    {
1756        for (int i = 0; i < count; ++i)
1757        {
1758            z[zOff + i] = interleave3(x[xOff + i]);
1759        }
1760    }
1761
1762    private static long interleave3(long x)
1763    {
1764        long z = x & (1L << 63);
1765        return z
1766            | interleave3_21to63((int)x & 0x1FFFFF)
1767            | interleave3_21to63((int)(x >>> 21) & 0x1FFFFF) << 1
1768            | interleave3_21to63((int)(x >>> 42) & 0x1FFFFF) << 2;
1769
1770//        int zPos = 0, wPos = 0, xPos = 0;
1771//        for (;;)
1772//        {
1773//            z |= ((x >>> xPos) & 1L) << zPos;
1774//            if (++zPos == 63)
1775//            {
1776//                String sz2 = Long.toBinaryString(z);
1777//                return z;
1778//            }
1779//            if ((xPos += 21) >= 63)
1780//            {
1781//                xPos = ++wPos;
1782//            }
1783//        }
1784    }
1785
1786    private static long interleave3_21to63(int x)
1787    {
1788        int r00 = INTERLEAVE3_TABLE[x & 0x7F];
1789        int r21 = INTERLEAVE3_TABLE[(x >>> 7) & 0x7F];
1790        int r42 = INTERLEAVE3_TABLE[x >>> 14];
1791        return (r42 & 0xFFFFFFFFL) << 42 | (r21 & 0xFFFFFFFFL) << 21 | (r00 & 0xFFFFFFFFL);
1792    }
1793
1794    private static void interleave5(long[] x, int xOff, long[] z, int zOff, int count)
1795    {
1796        for (int i = 0; i < count; ++i)
1797        {
1798            z[zOff + i] = interleave5(x[xOff + i]);
1799        }
1800    }
1801
1802    private static long interleave5(long x)
1803    {
1804        return interleave3_13to65((int)x & 0x1FFF)
1805            | interleave3_13to65((int)(x >>> 13) & 0x1FFF) << 1
1806            | interleave3_13to65((int)(x >>> 26) & 0x1FFF) << 2
1807            | interleave3_13to65((int)(x >>> 39) & 0x1FFF) << 3
1808            | interleave3_13to65((int)(x >>> 52) & 0x1FFF) << 4;
1809
1810//        long z = 0;
1811//        int zPos = 0, wPos = 0, xPos = 0;
1812//        for (;;)
1813//        {
1814//            z |= ((x >>> xPos) & 1L) << zPos;
1815//            if (++zPos == 64)
1816//            {
1817//                return z;
1818//            }
1819//            if ((xPos += 13) >= 64)
1820//            {
1821//                xPos = ++wPos;
1822//            }
1823//        }
1824    }
1825
1826    private static long interleave3_13to65(int x)
1827    {
1828        int r00 = INTERLEAVE5_TABLE[x & 0x7F];
1829        int r35 = INTERLEAVE5_TABLE[x >>> 7];
1830        return (r35 & 0xFFFFFFFFL) << 35 | (r00 & 0xFFFFFFFFL);
1831    }
1832
1833    private static void interleave7(long[] x, int xOff, long[] z, int zOff, int count)
1834    {
1835        for (int i = 0; i < count; ++i)
1836        {
1837            z[zOff + i] = interleave7(x[xOff + i]);
1838        }
1839    }
1840
1841    private static long interleave7(long x)
1842    {
1843        long z = x & (1L << 63);
1844        return z
1845            | INTERLEAVE7_TABLE[(int)x & 0x1FF]
1846            | INTERLEAVE7_TABLE[(int)(x >>> 9) & 0x1FF] << 1
1847            | INTERLEAVE7_TABLE[(int)(x >>> 18) & 0x1FF] << 2
1848            | INTERLEAVE7_TABLE[(int)(x >>> 27) & 0x1FF] << 3
1849            | INTERLEAVE7_TABLE[(int)(x >>> 36) & 0x1FF] << 4
1850            | INTERLEAVE7_TABLE[(int)(x >>> 45) & 0x1FF] << 5
1851            | INTERLEAVE7_TABLE[(int)(x >>> 54) & 0x1FF] << 6;
1852
1853//        int zPos = 0, wPos = 0, xPos = 0;
1854//        for (;;)
1855//        {
1856//            z |= ((x >>> xPos) & 1L) << zPos;
1857//            if (++zPos == 63)
1858//            {
1859//                return z;
1860//            }
1861//            if ((xPos += 9) >= 63)
1862//            {
1863//                xPos = ++wPos;
1864//            }
1865//        }
1866    }
1867
1868    private static void interleave2_n(long[] x, int xOff, long[] z, int zOff, int count, int rounds)
1869    {
1870        for (int i = 0; i < count; ++i)
1871        {
1872            z[zOff + i] = interleave2_n(x[xOff + i], rounds);
1873        }
1874    }
1875
1876    private static long interleave2_n(long x, int rounds)
1877    {
1878        while (rounds > 1)
1879        {
1880            rounds -= 2;
1881            x = interleave4_16to64((int)x & 0xFFFF)
1882                | interleave4_16to64((int)(x >>> 16) & 0xFFFF) << 1
1883                | interleave4_16to64((int)(x >>> 32) & 0xFFFF) << 2
1884                | interleave4_16to64((int)(x >>> 48) & 0xFFFF) << 3;
1885        }
1886        if (rounds > 0)
1887        {
1888            x = interleave2_32to64((int)x) | interleave2_32to64((int)(x >>> 32)) << 1;
1889        }
1890        return x;
1891    }
1892
1893    private static long interleave4_16to64(int x)
1894    {
1895        int r00 = INTERLEAVE4_TABLE[x & 0xFF];
1896        int r32 = INTERLEAVE4_TABLE[x >>> 8];
1897        return (r32 & 0xFFFFFFFFL) << 32 | (r00 & 0xFFFFFFFFL);
1898    }
1899
1900    private static long interleave2_32to64(int x)
1901    {
1902        int r00 = INTERLEAVE2_TABLE[x & 0xFF] | INTERLEAVE2_TABLE[(x >>> 8) & 0xFF] << 16;
1903        int r32 = INTERLEAVE2_TABLE[(x >>> 16) & 0xFF] | INTERLEAVE2_TABLE[x >>> 24] << 16;
1904        return (r32 & 0xFFFFFFFFL) << 32 | (r00 & 0xFFFFFFFFL);
1905    }
1906
1907//    private static LongArray expItohTsujii2(LongArray B, int n, int m, int[] ks)
1908//    {
1909//        LongArray t1 = B, t3 = new LongArray(new long[]{ 1L });
1910//        int scale = 1;
1911//
1912//        int numTerms = n;
1913//        while (numTerms > 1)
1914//        {
1915//            if ((numTerms & 1) != 0)
1916//            {
1917//                t3 = t3.modMultiply(t1, m, ks);
1918//                t1 = t1.modSquareN(scale, m, ks);
1919//            }
1920//
1921//            LongArray t2 = t1.modSquareN(scale, m, ks);
1922//            t1 = t1.modMultiply(t2, m, ks);
1923//            numTerms >>>= 1; scale <<= 1;
1924//        }
1925//
1926//        return t3.modMultiply(t1, m, ks);
1927//    }
1928//
1929//    private static LongArray expItohTsujii23(LongArray B, int n, int m, int[] ks)
1930//    {
1931//        LongArray t1 = B, t3 = new LongArray(new long[]{ 1L });
1932//        int scale = 1;
1933//
1934//        int numTerms = n;
1935//        while (numTerms > 1)
1936//        {
1937//            boolean m03 = numTerms % 3 == 0;
1938//            boolean m14 = !m03 && (numTerms & 1) != 0;
1939//
1940//            if (m14)
1941//            {
1942//                t3 = t3.modMultiply(t1, m, ks);
1943//                t1 = t1.modSquareN(scale, m, ks);
1944//            }
1945//
1946//            LongArray t2 = t1.modSquareN(scale, m, ks);
1947//            t1 = t1.modMultiply(t2, m, ks);
1948//
1949//            if (m03)
1950//            {
1951//                t2 = t2.modSquareN(scale, m, ks);
1952//                t1 = t1.modMultiply(t2, m, ks);
1953//                numTerms /= 3; scale *= 3;
1954//            }
1955//            else
1956//            {
1957//                numTerms >>>= 1; scale <<= 1;
1958//            }
1959//        }
1960//
1961//        return t3.modMultiply(t1, m, ks);
1962//    }
1963//
1964//    private static LongArray expItohTsujii235(LongArray B, int n, int m, int[] ks)
1965//    {
1966//        LongArray t1 = B, t4 = new LongArray(new long[]{ 1L });
1967//        int scale = 1;
1968//
1969//        int numTerms = n;
1970//        while (numTerms > 1)
1971//        {
1972//            if (numTerms % 5 == 0)
1973//            {
1974////                t1 = expItohTsujii23(t1, 5, m, ks);
1975//
1976//                LongArray t3 = t1;
1977//                t1 = t1.modSquareN(scale, m, ks);
1978//
1979//                LongArray t2 = t1.modSquareN(scale, m, ks);
1980//                t1 = t1.modMultiply(t2, m, ks);
1981//                t2 = t1.modSquareN(scale << 1, m, ks);
1982//                t1 = t1.modMultiply(t2, m, ks);
1983//
1984//                t1 = t1.modMultiply(t3, m, ks);
1985//
1986//                numTerms /= 5; scale *= 5;
1987//                continue;
1988//            }
1989//
1990//            boolean m03 = numTerms % 3 == 0;
1991//            boolean m14 = !m03 && (numTerms & 1) != 0;
1992//
1993//            if (m14)
1994//            {
1995//                t4 = t4.modMultiply(t1, m, ks);
1996//                t1 = t1.modSquareN(scale, m, ks);
1997//            }
1998//
1999//            LongArray t2 = t1.modSquareN(scale, m, ks);
2000//            t1 = t1.modMultiply(t2, m, ks);
2001//
2002//            if (m03)
2003//            {
2004//                t2 = t2.modSquareN(scale, m, ks);
2005//                t1 = t1.modMultiply(t2, m, ks);
2006//                numTerms /= 3; scale *= 3;
2007//            }
2008//            else
2009//            {
2010//                numTerms >>>= 1; scale <<= 1;
2011//            }
2012//        }
2013//
2014//        return t4.modMultiply(t1, m, ks);
2015//    }
2016
2017    public LongArray modInverse(int m, int[] ks)
2018    {
2019        /*
2020         * Fermat's Little Theorem
2021         */
2022//        LongArray A = this;
2023//        LongArray B = A.modSquare(m, ks);
2024//        LongArray R0 = B, R1 = B;
2025//        for (int i = 2; i < m; ++i)
2026//        {
2027//            R1 = R1.modSquare(m, ks);
2028//            R0 = R0.modMultiply(R1, m, ks);
2029//        }
2030//
2031//        return R0;
2032
2033        /*
2034         * Itoh-Tsujii
2035         */
2036//        LongArray B = modSquare(m, ks);
2037//        switch (m)
2038//        {
2039//        case 409:
2040//            return expItohTsujii23(B, m - 1, m, ks);
2041//        case 571:
2042//            return expItohTsujii235(B, m - 1, m, ks);
2043//        case 163:
2044//        case 233:
2045//        case 283:
2046//        default:
2047//            return expItohTsujii2(B, m - 1, m, ks);
2048//        }
2049
2050        /*
2051         * Inversion in F2m using the extended Euclidean algorithm
2052         *
2053         * Input: A nonzero polynomial a(z) of degree at most m-1
2054         * Output: a(z)^(-1) mod f(z)
2055         */
2056        int uzDegree = degree();
2057        if (uzDegree == 0)
2058        {
2059            throw new IllegalStateException();
2060        }
2061        if (uzDegree == 1)
2062        {
2063            return this;
2064        }
2065
2066        // u(z) := a(z)
2067        LongArray uz = (LongArray)clone();
2068
2069        int t = (m + 63) >>> 6;
2070
2071        // v(z) := f(z)
2072        LongArray vz = new LongArray(t);
2073        reduceBit(vz.m_ints, 0, m, m, ks);
2074
2075        // g1(z) := 1, g2(z) := 0
2076        LongArray g1z = new LongArray(t);
2077        g1z.m_ints[0] = 1L;
2078        LongArray g2z = new LongArray(t);
2079
2080        int[] uvDeg = new int[]{ uzDegree, m + 1 };
2081        LongArray[] uv = new LongArray[]{ uz, vz };
2082
2083        int[] ggDeg = new int[]{ 1, 0 };
2084        LongArray[] gg = new LongArray[]{ g1z, g2z };
2085
2086        int b = 1;
2087        int duv1 = uvDeg[b];
2088        int dgg1 = ggDeg[b];
2089        int j = duv1 - uvDeg[1 - b];
2090
2091        for (;;)
2092        {
2093            if (j < 0)
2094            {
2095                j = -j;
2096                uvDeg[b] = duv1;
2097                ggDeg[b] = dgg1;
2098                b = 1 - b;
2099                duv1 = uvDeg[b];
2100                dgg1 = ggDeg[b];
2101            }
2102
2103            uv[b].addShiftedByBitsSafe(uv[1 - b], uvDeg[1 - b], j);
2104
2105            int duv2 = uv[b].degreeFrom(duv1);
2106            if (duv2 == 0)
2107            {
2108                return gg[1 - b];
2109            }
2110
2111            {
2112                int dgg2 = ggDeg[1 - b];
2113                gg[b].addShiftedByBitsSafe(gg[1 - b], dgg2, j);
2114                dgg2 += j;
2115
2116                if (dgg2 > dgg1)
2117                {
2118                    dgg1 = dgg2;
2119                }
2120                else if (dgg2 == dgg1)
2121                {
2122                    dgg1 = gg[b].degreeFrom(dgg1);
2123                }
2124            }
2125
2126            j += (duv2 - duv1);
2127            duv1 = duv2;
2128        }
2129    }
2130
2131    public boolean equals(Object o)
2132    {
2133        if (!(o instanceof LongArray))
2134        {
2135            return false;
2136        }
2137        LongArray other = (LongArray) o;
2138        int usedLen = getUsedLength();
2139        if (other.getUsedLength() != usedLen)
2140        {
2141            return false;
2142        }
2143        for (int i = 0; i < usedLen; i++)
2144        {
2145            if (m_ints[i] != other.m_ints[i])
2146            {
2147                return false;
2148            }
2149        }
2150        return true;
2151    }
2152
2153    public int hashCode()
2154    {
2155        int usedLen = getUsedLength();
2156        int hash = 1;
2157        for (int i = 0; i < usedLen; i++)
2158        {
2159            long mi = m_ints[i];
2160            hash *= 31;
2161            hash ^= (int)mi;
2162            hash *= 31;
2163            hash ^= (int)(mi >>> 32);
2164        }
2165        return hash;
2166    }
2167
2168    public Object clone()
2169    {
2170        return new LongArray(Arrays.clone(m_ints));
2171    }
2172
2173    public String toString()
2174    {
2175        int i = getUsedLength();
2176        if (i == 0)
2177        {
2178            return "0";
2179        }
2180
2181        StringBuffer sb = new StringBuffer(Long.toBinaryString(m_ints[--i]));
2182        while (--i >= 0)
2183        {
2184            String s = Long.toBinaryString(m_ints[i]);
2185
2186            // Add leading zeroes, except for highest significant word
2187            int len = s.length();
2188            if (len < 64)
2189            {
2190                sb.append(ZEROES.substring(len));
2191            }
2192
2193            sb.append(s);
2194        }
2195        return sb.toString();
2196    }
2197}