1package org.bouncycastle.math.ec;
2
3import org.bouncycastle.util.Arrays;
4
5import java.math.BigInteger;
6
7class LongArray
8{
9//    private static long DEINTERLEAVE_MASK = 0x5555555555555555L;
10
11    /*
12     * This expands 8 bit indices into 16 bit contents (high bit 14), by inserting 0s between bits.
13     * In a binary field, this operation is the same as squaring an 8 bit number.
14     */
15    private static final int[] INTERLEAVE2_TABLE = new int[]
16    {
17        0x0000, 0x0001, 0x0004, 0x0005, 0x0010, 0x0011, 0x0014, 0x0015,
18        0x0040, 0x0041, 0x0044, 0x0045, 0x0050, 0x0051, 0x0054, 0x0055,
19        0x0100, 0x0101, 0x0104, 0x0105, 0x0110, 0x0111, 0x0114, 0x0115,
20        0x0140, 0x0141, 0x0144, 0x0145, 0x0150, 0x0151, 0x0154, 0x0155,
21        0x0400, 0x0401, 0x0404, 0x0405, 0x0410, 0x0411, 0x0414, 0x0415,
22        0x0440, 0x0441, 0x0444, 0x0445, 0x0450, 0x0451, 0x0454, 0x0455,
23        0x0500, 0x0501, 0x0504, 0x0505, 0x0510, 0x0511, 0x0514, 0x0515,
24        0x0540, 0x0541, 0x0544, 0x0545, 0x0550, 0x0551, 0x0554, 0x0555,
25        0x1000, 0x1001, 0x1004, 0x1005, 0x1010, 0x1011, 0x1014, 0x1015,
26        0x1040, 0x1041, 0x1044, 0x1045, 0x1050, 0x1051, 0x1054, 0x1055,
27        0x1100, 0x1101, 0x1104, 0x1105, 0x1110, 0x1111, 0x1114, 0x1115,
28        0x1140, 0x1141, 0x1144, 0x1145, 0x1150, 0x1151, 0x1154, 0x1155,
29        0x1400, 0x1401, 0x1404, 0x1405, 0x1410, 0x1411, 0x1414, 0x1415,
30        0x1440, 0x1441, 0x1444, 0x1445, 0x1450, 0x1451, 0x1454, 0x1455,
31        0x1500, 0x1501, 0x1504, 0x1505, 0x1510, 0x1511, 0x1514, 0x1515,
32        0x1540, 0x1541, 0x1544, 0x1545, 0x1550, 0x1551, 0x1554, 0x1555,
33        0x4000, 0x4001, 0x4004, 0x4005, 0x4010, 0x4011, 0x4014, 0x4015,
34        0x4040, 0x4041, 0x4044, 0x4045, 0x4050, 0x4051, 0x4054, 0x4055,
35        0x4100, 0x4101, 0x4104, 0x4105, 0x4110, 0x4111, 0x4114, 0x4115,
36        0x4140, 0x4141, 0x4144, 0x4145, 0x4150, 0x4151, 0x4154, 0x4155,
37        0x4400, 0x4401, 0x4404, 0x4405, 0x4410, 0x4411, 0x4414, 0x4415,
38        0x4440, 0x4441, 0x4444, 0x4445, 0x4450, 0x4451, 0x4454, 0x4455,
39        0x4500, 0x4501, 0x4504, 0x4505, 0x4510, 0x4511, 0x4514, 0x4515,
40        0x4540, 0x4541, 0x4544, 0x4545, 0x4550, 0x4551, 0x4554, 0x4555,
41        0x5000, 0x5001, 0x5004, 0x5005, 0x5010, 0x5011, 0x5014, 0x5015,
42        0x5040, 0x5041, 0x5044, 0x5045, 0x5050, 0x5051, 0x5054, 0x5055,
43        0x5100, 0x5101, 0x5104, 0x5105, 0x5110, 0x5111, 0x5114, 0x5115,
44        0x5140, 0x5141, 0x5144, 0x5145, 0x5150, 0x5151, 0x5154, 0x5155,
45        0x5400, 0x5401, 0x5404, 0x5405, 0x5410, 0x5411, 0x5414, 0x5415,
46        0x5440, 0x5441, 0x5444, 0x5445, 0x5450, 0x5451, 0x5454, 0x5455,
47        0x5500, 0x5501, 0x5504, 0x5505, 0x5510, 0x5511, 0x5514, 0x5515,
48        0x5540, 0x5541, 0x5544, 0x5545, 0x5550, 0x5551, 0x5554, 0x5555
49    };
50
51    /*
52     * This expands 7 bit indices into 21 bit contents (high bit 18), by inserting 0s between bits.
53     */
54    private static final int[] INTERLEAVE3_TABLE = new  int[]
55    {
56        0x00000, 0x00001, 0x00008, 0x00009, 0x00040, 0x00041, 0x00048, 0x00049,
57        0x00200, 0x00201, 0x00208, 0x00209, 0x00240, 0x00241, 0x00248, 0x00249,
58        0x01000, 0x01001, 0x01008, 0x01009, 0x01040, 0x01041, 0x01048, 0x01049,
59        0x01200, 0x01201, 0x01208, 0x01209, 0x01240, 0x01241, 0x01248, 0x01249,
60        0x08000, 0x08001, 0x08008, 0x08009, 0x08040, 0x08041, 0x08048, 0x08049,
61        0x08200, 0x08201, 0x08208, 0x08209, 0x08240, 0x08241, 0x08248, 0x08249,
62        0x09000, 0x09001, 0x09008, 0x09009, 0x09040, 0x09041, 0x09048, 0x09049,
63        0x09200, 0x09201, 0x09208, 0x09209, 0x09240, 0x09241, 0x09248, 0x09249,
64        0x40000, 0x40001, 0x40008, 0x40009, 0x40040, 0x40041, 0x40048, 0x40049,
65        0x40200, 0x40201, 0x40208, 0x40209, 0x40240, 0x40241, 0x40248, 0x40249,
66        0x41000, 0x41001, 0x41008, 0x41009, 0x41040, 0x41041, 0x41048, 0x41049,
67        0x41200, 0x41201, 0x41208, 0x41209, 0x41240, 0x41241, 0x41248, 0x41249,
68        0x48000, 0x48001, 0x48008, 0x48009, 0x48040, 0x48041, 0x48048, 0x48049,
69        0x48200, 0x48201, 0x48208, 0x48209, 0x48240, 0x48241, 0x48248, 0x48249,
70        0x49000, 0x49001, 0x49008, 0x49009, 0x49040, 0x49041, 0x49048, 0x49049,
71        0x49200, 0x49201, 0x49208, 0x49209, 0x49240, 0x49241, 0x49248, 0x49249
72    };
73
74    /*
75     * This expands 8 bit indices into 32 bit contents (high bit 28), by inserting 0s between bits.
76     */
77    private static final int[] INTERLEAVE4_TABLE = new int[]
78    {
79        0x00000000, 0x00000001, 0x00000010, 0x00000011, 0x00000100, 0x00000101, 0x00000110, 0x00000111,
80        0x00001000, 0x00001001, 0x00001010, 0x00001011, 0x00001100, 0x00001101, 0x00001110, 0x00001111,
81        0x00010000, 0x00010001, 0x00010010, 0x00010011, 0x00010100, 0x00010101, 0x00010110, 0x00010111,
82        0x00011000, 0x00011001, 0x00011010, 0x00011011, 0x00011100, 0x00011101, 0x00011110, 0x00011111,
83        0x00100000, 0x00100001, 0x00100010, 0x00100011, 0x00100100, 0x00100101, 0x00100110, 0x00100111,
84        0x00101000, 0x00101001, 0x00101010, 0x00101011, 0x00101100, 0x00101101, 0x00101110, 0x00101111,
85        0x00110000, 0x00110001, 0x00110010, 0x00110011, 0x00110100, 0x00110101, 0x00110110, 0x00110111,
86        0x00111000, 0x00111001, 0x00111010, 0x00111011, 0x00111100, 0x00111101, 0x00111110, 0x00111111,
87        0x01000000, 0x01000001, 0x01000010, 0x01000011, 0x01000100, 0x01000101, 0x01000110, 0x01000111,
88        0x01001000, 0x01001001, 0x01001010, 0x01001011, 0x01001100, 0x01001101, 0x01001110, 0x01001111,
89        0x01010000, 0x01010001, 0x01010010, 0x01010011, 0x01010100, 0x01010101, 0x01010110, 0x01010111,
90        0x01011000, 0x01011001, 0x01011010, 0x01011011, 0x01011100, 0x01011101, 0x01011110, 0x01011111,
91        0x01100000, 0x01100001, 0x01100010, 0x01100011, 0x01100100, 0x01100101, 0x01100110, 0x01100111,
92        0x01101000, 0x01101001, 0x01101010, 0x01101011, 0x01101100, 0x01101101, 0x01101110, 0x01101111,
93        0x01110000, 0x01110001, 0x01110010, 0x01110011, 0x01110100, 0x01110101, 0x01110110, 0x01110111,
94        0x01111000, 0x01111001, 0x01111010, 0x01111011, 0x01111100, 0x01111101, 0x01111110, 0x01111111,
95        0x10000000, 0x10000001, 0x10000010, 0x10000011, 0x10000100, 0x10000101, 0x10000110, 0x10000111,
96        0x10001000, 0x10001001, 0x10001010, 0x10001011, 0x10001100, 0x10001101, 0x10001110, 0x10001111,
97        0x10010000, 0x10010001, 0x10010010, 0x10010011, 0x10010100, 0x10010101, 0x10010110, 0x10010111,
98        0x10011000, 0x10011001, 0x10011010, 0x10011011, 0x10011100, 0x10011101, 0x10011110, 0x10011111,
99        0x10100000, 0x10100001, 0x10100010, 0x10100011, 0x10100100, 0x10100101, 0x10100110, 0x10100111,
100        0x10101000, 0x10101001, 0x10101010, 0x10101011, 0x10101100, 0x10101101, 0x10101110, 0x10101111,
101        0x10110000, 0x10110001, 0x10110010, 0x10110011, 0x10110100, 0x10110101, 0x10110110, 0x10110111,
102        0x10111000, 0x10111001, 0x10111010, 0x10111011, 0x10111100, 0x10111101, 0x10111110, 0x10111111,
103        0x11000000, 0x11000001, 0x11000010, 0x11000011, 0x11000100, 0x11000101, 0x11000110, 0x11000111,
104        0x11001000, 0x11001001, 0x11001010, 0x11001011, 0x11001100, 0x11001101, 0x11001110, 0x11001111,
105        0x11010000, 0x11010001, 0x11010010, 0x11010011, 0x11010100, 0x11010101, 0x11010110, 0x11010111,
106        0x11011000, 0x11011001, 0x11011010, 0x11011011, 0x11011100, 0x11011101, 0x11011110, 0x11011111,
107        0x11100000, 0x11100001, 0x11100010, 0x11100011, 0x11100100, 0x11100101, 0x11100110, 0x11100111,
108        0x11101000, 0x11101001, 0x11101010, 0x11101011, 0x11101100, 0x11101101, 0x11101110, 0x11101111,
109        0x11110000, 0x11110001, 0x11110010, 0x11110011, 0x11110100, 0x11110101, 0x11110110, 0x11110111,
110        0x11111000, 0x11111001, 0x11111010, 0x11111011, 0x11111100, 0x11111101, 0x11111110, 0x11111111
111    };
112
113    /*
114     * This expands 7 bit indices into 35 bit contents (high bit 30), by inserting 0s between bits.
115     */
116    private static final int[] INTERLEAVE5_TABLE = new int[] {
117        0x00000000, 0x00000001, 0x00000020, 0x00000021, 0x00000400, 0x00000401, 0x00000420, 0x00000421,
118        0x00008000, 0x00008001, 0x00008020, 0x00008021, 0x00008400, 0x00008401, 0x00008420, 0x00008421,
119        0x00100000, 0x00100001, 0x00100020, 0x00100021, 0x00100400, 0x00100401, 0x00100420, 0x00100421,
120        0x00108000, 0x00108001, 0x00108020, 0x00108021, 0x00108400, 0x00108401, 0x00108420, 0x00108421,
121        0x02000000, 0x02000001, 0x02000020, 0x02000021, 0x02000400, 0x02000401, 0x02000420, 0x02000421,
122        0x02008000, 0x02008001, 0x02008020, 0x02008021, 0x02008400, 0x02008401, 0x02008420, 0x02008421,
123        0x02100000, 0x02100001, 0x02100020, 0x02100021, 0x02100400, 0x02100401, 0x02100420, 0x02100421,
124        0x02108000, 0x02108001, 0x02108020, 0x02108021, 0x02108400, 0x02108401, 0x02108420, 0x02108421,
125        0x40000000, 0x40000001, 0x40000020, 0x40000021, 0x40000400, 0x40000401, 0x40000420, 0x40000421,
126        0x40008000, 0x40008001, 0x40008020, 0x40008021, 0x40008400, 0x40008401, 0x40008420, 0x40008421,
127        0x40100000, 0x40100001, 0x40100020, 0x40100021, 0x40100400, 0x40100401, 0x40100420, 0x40100421,
128        0x40108000, 0x40108001, 0x40108020, 0x40108021, 0x40108400, 0x40108401, 0x40108420, 0x40108421,
129        0x42000000, 0x42000001, 0x42000020, 0x42000021, 0x42000400, 0x42000401, 0x42000420, 0x42000421,
130        0x42008000, 0x42008001, 0x42008020, 0x42008021, 0x42008400, 0x42008401, 0x42008420, 0x42008421,
131        0x42100000, 0x42100001, 0x42100020, 0x42100021, 0x42100400, 0x42100401, 0x42100420, 0x42100421,
132        0x42108000, 0x42108001, 0x42108020, 0x42108021, 0x42108400, 0x42108401, 0x42108420, 0x42108421
133    };
134
135    /*
136     * This expands 9 bit indices into 63 bit (long) contents (high bit 56), by inserting 0s between bits.
137     */
138    private static final long[] INTERLEAVE7_TABLE = new long[]
139    {
140        0x0000000000000000L, 0x0000000000000001L, 0x0000000000000080L, 0x0000000000000081L,
141        0x0000000000004000L, 0x0000000000004001L, 0x0000000000004080L, 0x0000000000004081L,
142        0x0000000000200000L, 0x0000000000200001L, 0x0000000000200080L, 0x0000000000200081L,
143        0x0000000000204000L, 0x0000000000204001L, 0x0000000000204080L, 0x0000000000204081L,
144        0x0000000010000000L, 0x0000000010000001L, 0x0000000010000080L, 0x0000000010000081L,
145        0x0000000010004000L, 0x0000000010004001L, 0x0000000010004080L, 0x0000000010004081L,
146        0x0000000010200000L, 0x0000000010200001L, 0x0000000010200080L, 0x0000000010200081L,
147        0x0000000010204000L, 0x0000000010204001L, 0x0000000010204080L, 0x0000000010204081L,
148        0x0000000800000000L, 0x0000000800000001L, 0x0000000800000080L, 0x0000000800000081L,
149        0x0000000800004000L, 0x0000000800004001L, 0x0000000800004080L, 0x0000000800004081L,
150        0x0000000800200000L, 0x0000000800200001L, 0x0000000800200080L, 0x0000000800200081L,
151        0x0000000800204000L, 0x0000000800204001L, 0x0000000800204080L, 0x0000000800204081L,
152        0x0000000810000000L, 0x0000000810000001L, 0x0000000810000080L, 0x0000000810000081L,
153        0x0000000810004000L, 0x0000000810004001L, 0x0000000810004080L, 0x0000000810004081L,
154        0x0000000810200000L, 0x0000000810200001L, 0x0000000810200080L, 0x0000000810200081L,
155        0x0000000810204000L, 0x0000000810204001L, 0x0000000810204080L, 0x0000000810204081L,
156        0x0000040000000000L, 0x0000040000000001L, 0x0000040000000080L, 0x0000040000000081L,
157        0x0000040000004000L, 0x0000040000004001L, 0x0000040000004080L, 0x0000040000004081L,
158        0x0000040000200000L, 0x0000040000200001L, 0x0000040000200080L, 0x0000040000200081L,
159        0x0000040000204000L, 0x0000040000204001L, 0x0000040000204080L, 0x0000040000204081L,
160        0x0000040010000000L, 0x0000040010000001L, 0x0000040010000080L, 0x0000040010000081L,
161        0x0000040010004000L, 0x0000040010004001L, 0x0000040010004080L, 0x0000040010004081L,
162        0x0000040010200000L, 0x0000040010200001L, 0x0000040010200080L, 0x0000040010200081L,
163        0x0000040010204000L, 0x0000040010204001L, 0x0000040010204080L, 0x0000040010204081L,
164        0x0000040800000000L, 0x0000040800000001L, 0x0000040800000080L, 0x0000040800000081L,
165        0x0000040800004000L, 0x0000040800004001L, 0x0000040800004080L, 0x0000040800004081L,
166        0x0000040800200000L, 0x0000040800200001L, 0x0000040800200080L, 0x0000040800200081L,
167        0x0000040800204000L, 0x0000040800204001L, 0x0000040800204080L, 0x0000040800204081L,
168        0x0000040810000000L, 0x0000040810000001L, 0x0000040810000080L, 0x0000040810000081L,
169        0x0000040810004000L, 0x0000040810004001L, 0x0000040810004080L, 0x0000040810004081L,
170        0x0000040810200000L, 0x0000040810200001L, 0x0000040810200080L, 0x0000040810200081L,
171        0x0000040810204000L, 0x0000040810204001L, 0x0000040810204080L, 0x0000040810204081L,
172        0x0002000000000000L, 0x0002000000000001L, 0x0002000000000080L, 0x0002000000000081L,
173        0x0002000000004000L, 0x0002000000004001L, 0x0002000000004080L, 0x0002000000004081L,
174        0x0002000000200000L, 0x0002000000200001L, 0x0002000000200080L, 0x0002000000200081L,
175        0x0002000000204000L, 0x0002000000204001L, 0x0002000000204080L, 0x0002000000204081L,
176        0x0002000010000000L, 0x0002000010000001L, 0x0002000010000080L, 0x0002000010000081L,
177        0x0002000010004000L, 0x0002000010004001L, 0x0002000010004080L, 0x0002000010004081L,
178        0x0002000010200000L, 0x0002000010200001L, 0x0002000010200080L, 0x0002000010200081L,
179        0x0002000010204000L, 0x0002000010204001L, 0x0002000010204080L, 0x0002000010204081L,
180        0x0002000800000000L, 0x0002000800000001L, 0x0002000800000080L, 0x0002000800000081L,
181        0x0002000800004000L, 0x0002000800004001L, 0x0002000800004080L, 0x0002000800004081L,
182        0x0002000800200000L, 0x0002000800200001L, 0x0002000800200080L, 0x0002000800200081L,
183        0x0002000800204000L, 0x0002000800204001L, 0x0002000800204080L, 0x0002000800204081L,
184        0x0002000810000000L, 0x0002000810000001L, 0x0002000810000080L, 0x0002000810000081L,
185        0x0002000810004000L, 0x0002000810004001L, 0x0002000810004080L, 0x0002000810004081L,
186        0x0002000810200000L, 0x0002000810200001L, 0x0002000810200080L, 0x0002000810200081L,
187        0x0002000810204000L, 0x0002000810204001L, 0x0002000810204080L, 0x0002000810204081L,
188        0x0002040000000000L, 0x0002040000000001L, 0x0002040000000080L, 0x0002040000000081L,
189        0x0002040000004000L, 0x0002040000004001L, 0x0002040000004080L, 0x0002040000004081L,
190        0x0002040000200000L, 0x0002040000200001L, 0x0002040000200080L, 0x0002040000200081L,
191        0x0002040000204000L, 0x0002040000204001L, 0x0002040000204080L, 0x0002040000204081L,
192        0x0002040010000000L, 0x0002040010000001L, 0x0002040010000080L, 0x0002040010000081L,
193        0x0002040010004000L, 0x0002040010004001L, 0x0002040010004080L, 0x0002040010004081L,
194        0x0002040010200000L, 0x0002040010200001L, 0x0002040010200080L, 0x0002040010200081L,
195        0x0002040010204000L, 0x0002040010204001L, 0x0002040010204080L, 0x0002040010204081L,
196        0x0002040800000000L, 0x0002040800000001L, 0x0002040800000080L, 0x0002040800000081L,
197        0x0002040800004000L, 0x0002040800004001L, 0x0002040800004080L, 0x0002040800004081L,
198        0x0002040800200000L, 0x0002040800200001L, 0x0002040800200080L, 0x0002040800200081L,
199        0x0002040800204000L, 0x0002040800204001L, 0x0002040800204080L, 0x0002040800204081L,
200        0x0002040810000000L, 0x0002040810000001L, 0x0002040810000080L, 0x0002040810000081L,
201        0x0002040810004000L, 0x0002040810004001L, 0x0002040810004080L, 0x0002040810004081L,
202        0x0002040810200000L, 0x0002040810200001L, 0x0002040810200080L, 0x0002040810200081L,
203        0x0002040810204000L, 0x0002040810204001L, 0x0002040810204080L, 0x0002040810204081L,
204        0x0100000000000000L, 0x0100000000000001L, 0x0100000000000080L, 0x0100000000000081L,
205        0x0100000000004000L, 0x0100000000004001L, 0x0100000000004080L, 0x0100000000004081L,
206        0x0100000000200000L, 0x0100000000200001L, 0x0100000000200080L, 0x0100000000200081L,
207        0x0100000000204000L, 0x0100000000204001L, 0x0100000000204080L, 0x0100000000204081L,
208        0x0100000010000000L, 0x0100000010000001L, 0x0100000010000080L, 0x0100000010000081L,
209        0x0100000010004000L, 0x0100000010004001L, 0x0100000010004080L, 0x0100000010004081L,
210        0x0100000010200000L, 0x0100000010200001L, 0x0100000010200080L, 0x0100000010200081L,
211        0x0100000010204000L, 0x0100000010204001L, 0x0100000010204080L, 0x0100000010204081L,
212        0x0100000800000000L, 0x0100000800000001L, 0x0100000800000080L, 0x0100000800000081L,
213        0x0100000800004000L, 0x0100000800004001L, 0x0100000800004080L, 0x0100000800004081L,
214        0x0100000800200000L, 0x0100000800200001L, 0x0100000800200080L, 0x0100000800200081L,
215        0x0100000800204000L, 0x0100000800204001L, 0x0100000800204080L, 0x0100000800204081L,
216        0x0100000810000000L, 0x0100000810000001L, 0x0100000810000080L, 0x0100000810000081L,
217        0x0100000810004000L, 0x0100000810004001L, 0x0100000810004080L, 0x0100000810004081L,
218        0x0100000810200000L, 0x0100000810200001L, 0x0100000810200080L, 0x0100000810200081L,
219        0x0100000810204000L, 0x0100000810204001L, 0x0100000810204080L, 0x0100000810204081L,
220        0x0100040000000000L, 0x0100040000000001L, 0x0100040000000080L, 0x0100040000000081L,
221        0x0100040000004000L, 0x0100040000004001L, 0x0100040000004080L, 0x0100040000004081L,
222        0x0100040000200000L, 0x0100040000200001L, 0x0100040000200080L, 0x0100040000200081L,
223        0x0100040000204000L, 0x0100040000204001L, 0x0100040000204080L, 0x0100040000204081L,
224        0x0100040010000000L, 0x0100040010000001L, 0x0100040010000080L, 0x0100040010000081L,
225        0x0100040010004000L, 0x0100040010004001L, 0x0100040010004080L, 0x0100040010004081L,
226        0x0100040010200000L, 0x0100040010200001L, 0x0100040010200080L, 0x0100040010200081L,
227        0x0100040010204000L, 0x0100040010204001L, 0x0100040010204080L, 0x0100040010204081L,
228        0x0100040800000000L, 0x0100040800000001L, 0x0100040800000080L, 0x0100040800000081L,
229        0x0100040800004000L, 0x0100040800004001L, 0x0100040800004080L, 0x0100040800004081L,
230        0x0100040800200000L, 0x0100040800200001L, 0x0100040800200080L, 0x0100040800200081L,
231        0x0100040800204000L, 0x0100040800204001L, 0x0100040800204080L, 0x0100040800204081L,
232        0x0100040810000000L, 0x0100040810000001L, 0x0100040810000080L, 0x0100040810000081L,
233        0x0100040810004000L, 0x0100040810004001L, 0x0100040810004080L, 0x0100040810004081L,
234        0x0100040810200000L, 0x0100040810200001L, 0x0100040810200080L, 0x0100040810200081L,
235        0x0100040810204000L, 0x0100040810204001L, 0x0100040810204080L, 0x0100040810204081L,
236        0x0102000000000000L, 0x0102000000000001L, 0x0102000000000080L, 0x0102000000000081L,
237        0x0102000000004000L, 0x0102000000004001L, 0x0102000000004080L, 0x0102000000004081L,
238        0x0102000000200000L, 0x0102000000200001L, 0x0102000000200080L, 0x0102000000200081L,
239        0x0102000000204000L, 0x0102000000204001L, 0x0102000000204080L, 0x0102000000204081L,
240        0x0102000010000000L, 0x0102000010000001L, 0x0102000010000080L, 0x0102000010000081L,
241        0x0102000010004000L, 0x0102000010004001L, 0x0102000010004080L, 0x0102000010004081L,
242        0x0102000010200000L, 0x0102000010200001L, 0x0102000010200080L, 0x0102000010200081L,
243        0x0102000010204000L, 0x0102000010204001L, 0x0102000010204080L, 0x0102000010204081L,
244        0x0102000800000000L, 0x0102000800000001L, 0x0102000800000080L, 0x0102000800000081L,
245        0x0102000800004000L, 0x0102000800004001L, 0x0102000800004080L, 0x0102000800004081L,
246        0x0102000800200000L, 0x0102000800200001L, 0x0102000800200080L, 0x0102000800200081L,
247        0x0102000800204000L, 0x0102000800204001L, 0x0102000800204080L, 0x0102000800204081L,
248        0x0102000810000000L, 0x0102000810000001L, 0x0102000810000080L, 0x0102000810000081L,
249        0x0102000810004000L, 0x0102000810004001L, 0x0102000810004080L, 0x0102000810004081L,
250        0x0102000810200000L, 0x0102000810200001L, 0x0102000810200080L, 0x0102000810200081L,
251        0x0102000810204000L, 0x0102000810204001L, 0x0102000810204080L, 0x0102000810204081L,
252        0x0102040000000000L, 0x0102040000000001L, 0x0102040000000080L, 0x0102040000000081L,
253        0x0102040000004000L, 0x0102040000004001L, 0x0102040000004080L, 0x0102040000004081L,
254        0x0102040000200000L, 0x0102040000200001L, 0x0102040000200080L, 0x0102040000200081L,
255        0x0102040000204000L, 0x0102040000204001L, 0x0102040000204080L, 0x0102040000204081L,
256        0x0102040010000000L, 0x0102040010000001L, 0x0102040010000080L, 0x0102040010000081L,
257        0x0102040010004000L, 0x0102040010004001L, 0x0102040010004080L, 0x0102040010004081L,
258        0x0102040010200000L, 0x0102040010200001L, 0x0102040010200080L, 0x0102040010200081L,
259        0x0102040010204000L, 0x0102040010204001L, 0x0102040010204080L, 0x0102040010204081L,
260        0x0102040800000000L, 0x0102040800000001L, 0x0102040800000080L, 0x0102040800000081L,
261        0x0102040800004000L, 0x0102040800004001L, 0x0102040800004080L, 0x0102040800004081L,
262        0x0102040800200000L, 0x0102040800200001L, 0x0102040800200080L, 0x0102040800200081L,
263        0x0102040800204000L, 0x0102040800204001L, 0x0102040800204080L, 0x0102040800204081L,
264        0x0102040810000000L, 0x0102040810000001L, 0x0102040810000080L, 0x0102040810000081L,
265        0x0102040810004000L, 0x0102040810004001L, 0x0102040810004080L, 0x0102040810004081L,
266        0x0102040810200000L, 0x0102040810200001L, 0x0102040810200080L, 0x0102040810200081L,
267        0x0102040810204000L, 0x0102040810204001L, 0x0102040810204080L, 0x0102040810204081L
268    };
269
270    // For toString(); must have length 64
271    private static final String ZEROES = "0000000000000000000000000000000000000000000000000000000000000000";
272
273    final static byte[] bitLengths =
274    {
275        0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
276        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
277        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
278        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
279        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
280        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
281        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
282        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
283        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
284        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
285        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
286        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
287        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
288        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
289        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
290        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8
291    };
292
293    // TODO make m fixed for the LongArray, and hence compute T once and for all
294
295    private long[] m_ints;
296
297    public LongArray(int intLen)
298    {
299        m_ints = new long[intLen];
300    }
301
302    public LongArray(long[] ints)
303    {
304        m_ints = ints;
305    }
306
307    public LongArray(long[] ints, int off, int len)
308    {
309        if (off == 0 && len == ints.length)
310        {
311            m_ints = ints;
312        }
313        else
314        {
315            m_ints = new long[len];
316            System.arraycopy(ints, off, m_ints, 0, len);
317        }
318    }
319
320    public LongArray(BigInteger bigInt)
321    {
322        if (bigInt == null || bigInt.signum() < 0)
323        {
324            throw new IllegalArgumentException("invalid F2m field value");
325        }
326
327        if (bigInt.signum() == 0)
328        {
329            m_ints = new long[] { 0L };
330            return;
331        }
332
333        byte[] barr = bigInt.toByteArray();
334        int barrLen = barr.length;
335        int barrStart = 0;
336        if (barr[0] == 0)
337        {
338            // First byte is 0 to enforce highest (=sign) bit is zero.
339            // In this case ignore barr[0].
340            barrLen--;
341            barrStart = 1;
342        }
343        int intLen = (barrLen + 7) / 8;
344        m_ints = new long[intLen];
345
346        int iarrJ = intLen - 1;
347        int rem = barrLen % 8 + barrStart;
348        long temp = 0;
349        int barrI = barrStart;
350        if (barrStart < rem)
351        {
352            for (; barrI < rem; barrI++)
353            {
354                temp <<= 8;
355                int barrBarrI = barr[barrI] & 0xFF;
356                temp |= barrBarrI;
357            }
358            m_ints[iarrJ--] = temp;
359        }
360
361        for (; iarrJ >= 0; iarrJ--)
362        {
363            temp = 0;
364            for (int i = 0; i < 8; i++)
365            {
366                temp <<= 8;
367                int barrBarrI = barr[barrI++] & 0xFF;
368                temp |= barrBarrI;
369            }
370            m_ints[iarrJ] = temp;
371        }
372    }
373
374    public boolean isOne()
375    {
376        long[] a = m_ints;
377        if (a[0] != 1L)
378        {
379            return false;
380        }
381        for (int i = 1; i < a.length; ++i)
382        {
383            if (a[i] != 0L)
384            {
385                return false;
386            }
387        }
388        return true;
389    }
390
391    public boolean isZero()
392    {
393        long[] a = m_ints;
394        for (int i = 0; i < a.length; ++i)
395        {
396            if (a[i] != 0L)
397            {
398                return false;
399            }
400        }
401        return true;
402    }
403
404    public int getUsedLength()
405    {
406        return getUsedLengthFrom(m_ints.length);
407    }
408
409    public int getUsedLengthFrom(int from)
410    {
411        long[] a = m_ints;
412        from = Math.min(from, a.length);
413
414        if (from < 1)
415        {
416            return 0;
417        }
418
419        // Check if first element will act as sentinel
420        if (a[0] != 0)
421        {
422            while (a[--from] == 0)
423            {
424            }
425            return from + 1;
426        }
427
428        do
429        {
430            if (a[--from] != 0)
431            {
432                return from + 1;
433            }
434        }
435        while (from > 0);
436
437        return 0;
438    }
439
440    public int degree()
441    {
442        int i = m_ints.length;
443        long w;
444        do
445        {
446            if (i == 0)
447            {
448                return 0;
449            }
450            w = m_ints[--i];
451        }
452        while (w == 0);
453
454        return (i << 6) + bitLength(w);
455    }
456
457    private int degreeFrom(int limit)
458    {
459        int i = (limit + 62) >>> 6;
460        long w;
461        do
462        {
463            if (i == 0)
464            {
465                return 0;
466            }
467            w = m_ints[--i];
468        }
469        while (w == 0);
470
471        return (i << 6) + bitLength(w);
472    }
473
474//    private int lowestCoefficient()
475//    {
476//        for (int i = 0; i < m_ints.length; ++i)
477//        {
478//            long mi = m_ints[i];
479//            if (mi != 0)
480//            {
481//                int j = 0;
482//                while ((mi & 0xFFL) == 0)
483//                {
484//                    j += 8;
485//                    mi >>>= 8;
486//                }
487//                while ((mi & 1L) == 0)
488//                {
489//                    ++j;
490//                    mi >>>= 1;
491//                }
492//                return (i << 6) + j;
493//            }
494//        }
495//        return -1;
496//    }
497
498    private static int bitLength(long w)
499    {
500        int u = (int)(w >>> 32), b;
501        if (u == 0)
502        {
503            u = (int)w;
504            b = 0;
505        }
506        else
507        {
508            b = 32;
509        }
510
511        int t = u >>> 16, k;
512        if (t == 0)
513        {
514            t = u >>> 8;
515            k = (t == 0) ? bitLengths[u] : 8 + bitLengths[t];
516        }
517        else
518        {
519            int v = t >>> 8;
520            k = (v == 0) ? 16 + bitLengths[t] : 24 + bitLengths[v];
521        }
522
523        return b + k;
524    }
525
526    private long[] resizedInts(int newLen)
527    {
528        long[] newInts = new long[newLen];
529        System.arraycopy(m_ints, 0, newInts, 0, Math.min(m_ints.length, newLen));
530        return newInts;
531    }
532
533    public BigInteger toBigInteger()
534    {
535        int usedLen = getUsedLength();
536        if (usedLen == 0)
537        {
538            return ECConstants.ZERO;
539        }
540
541        long highestInt = m_ints[usedLen - 1];
542        byte[] temp = new byte[8];
543        int barrI = 0;
544        boolean trailingZeroBytesDone = false;
545        for (int j = 7; j >= 0; j--)
546        {
547            byte thisByte = (byte)(highestInt >>> (8 * j));
548            if (trailingZeroBytesDone || (thisByte != 0))
549            {
550                trailingZeroBytesDone = true;
551                temp[barrI++] = thisByte;
552            }
553        }
554
555        int barrLen = 8 * (usedLen - 1) + barrI;
556        byte[] barr = new byte[barrLen];
557        for (int j = 0; j < barrI; j++)
558        {
559            barr[j] = temp[j];
560        }
561        // Highest value int is done now
562
563        for (int iarrJ = usedLen - 2; iarrJ >= 0; iarrJ--)
564        {
565            long mi = m_ints[iarrJ];
566            for (int j = 7; j >= 0; j--)
567            {
568                barr[barrI++] = (byte)(mi >>> (8 * j));
569            }
570        }
571        return new BigInteger(1, barr);
572    }
573
574//    private static long shiftUp(long[] x, int xOff, int count)
575//    {
576//        long prev = 0;
577//        for (int i = 0; i < count; ++i)
578//        {
579//            long next = x[xOff + i];
580//            x[xOff + i] = (next << 1) | prev;
581//            prev = next >>> 63;
582//        }
583//        return prev;
584//    }
585
586    private static long shiftUp(long[] x, int xOff, int count, int shift)
587    {
588        int shiftInv = 64 - shift;
589        long prev = 0;
590        for (int i = 0; i < count; ++i)
591        {
592            long next = x[xOff + i];
593            x[xOff + i] = (next << shift) | prev;
594            prev = next >>> shiftInv;
595        }
596        return prev;
597    }
598
599    private static long shiftUp(long[] x, int xOff, long[] z, int zOff, int count, int shift)
600    {
601        int shiftInv = 64 - shift;
602        long prev = 0;
603        for (int i = 0; i < count; ++i)
604        {
605            long next = x[xOff + i];
606            z[zOff + i] = (next << shift) | prev;
607            prev = next >>> shiftInv;
608        }
609        return prev;
610    }
611
612    public LongArray addOne()
613    {
614        if (m_ints.length == 0)
615        {
616            return new LongArray(new long[]{ 1L });
617        }
618
619        int resultLen = Math.max(1, getUsedLength());
620        long[] ints = resizedInts(resultLen);
621        ints[0] ^= 1L;
622        return new LongArray(ints);
623    }
624
625//    private void addShiftedByBits(LongArray other, int bits)
626//    {
627//        int words = bits >>> 6;
628//        int shift = bits & 0x3F;
629//
630//        if (shift == 0)
631//        {
632//            addShiftedByWords(other, words);
633//            return;
634//        }
635//
636//        int otherUsedLen = other.getUsedLength();
637//        if (otherUsedLen == 0)
638//        {
639//            return;
640//        }
641//
642//        int minLen = otherUsedLen + words + 1;
643//        if (minLen > m_ints.length)
644//        {
645//            m_ints = resizedInts(minLen);
646//        }
647//
648//        long carry = addShiftedByBits(m_ints, words, other.m_ints, 0, otherUsedLen, shift);
649//        m_ints[otherUsedLen + words] ^= carry;
650//    }
651
652    private void addShiftedByBitsSafe(LongArray other, int otherDegree, int bits)
653    {
654        int otherLen = (otherDegree + 63) >>> 6;
655
656        int words = bits >>> 6;
657        int shift = bits & 0x3F;
658
659        if (shift == 0)
660        {
661            add(m_ints, words, other.m_ints, 0, otherLen);
662            return;
663        }
664
665        long carry = addShiftedUp(m_ints, words, other.m_ints, 0, otherLen, shift);
666        if (carry != 0L)
667        {
668            m_ints[otherLen + words] ^= carry;
669        }
670    }
671
672    private static long addShiftedUp(long[] x, int xOff, long[] y, int yOff, int count, int shift)
673    {
674        int shiftInv = 64 - shift;
675        long prev = 0;
676        for (int i = 0; i < count; ++i)
677        {
678            long next = y[yOff + i];
679            x[xOff + i] ^= (next << shift) | prev;
680            prev = next >>> shiftInv;
681        }
682        return prev;
683    }
684
685    private static long addShiftedDown(long[] x, int xOff, long[] y, int yOff, int count, int shift)
686    {
687        int shiftInv = 64 - shift;
688        long prev = 0;
689        int i = count;
690        while (--i >= 0)
691        {
692            long next = y[yOff + i];
693            x[xOff + i] ^= (next >>> shift) | prev;
694            prev = next << shiftInv;
695        }
696        return prev;
697    }
698
699    public void addShiftedByWords(LongArray other, int words)
700    {
701        int otherUsedLen = other.getUsedLength();
702        if (otherUsedLen == 0)
703        {
704            return;
705        }
706
707        int minLen = otherUsedLen + words;
708        if (minLen > m_ints.length)
709        {
710            m_ints = resizedInts(minLen);
711        }
712
713        add(m_ints, words, other.m_ints, 0, otherUsedLen);
714    }
715
716    private static void add(long[] x, int xOff, long[] y, int yOff, int count)
717    {
718        for (int i = 0; i < count; ++i)
719        {
720            x[xOff + i] ^= y[yOff + i];
721        }
722    }
723
724    private static void add(long[] x, int xOff, long[] y, int yOff, long[] z, int zOff, int count)
725    {
726        for (int i = 0; i < count; ++i)
727        {
728            z[zOff + i] = x[xOff + i] ^ y[yOff + i];
729        }
730    }
731
732    private static void addBoth(long[] x, int xOff, long[] y1, int y1Off, long[] y2, int y2Off, int count)
733    {
734        for (int i = 0; i < count; ++i)
735        {
736            x[xOff + i] ^= y1[y1Off + i] ^ y2[y2Off + i];
737        }
738    }
739
740    private static void distribute(long[] x, int src, int dst1, int dst2, int count)
741    {
742        for (int i = 0; i < count; ++i)
743        {
744            long v = x[src + i];
745            x[dst1 + i] ^= v;
746            x[dst2 + i] ^= v;
747        }
748    }
749
750    public int getLength()
751    {
752        return m_ints.length;
753    }
754
755    private static void flipWord(long[] buf, int off, int bit, long word)
756    {
757        int n = off + (bit >>> 6);
758        int shift = bit & 0x3F;
759        if (shift == 0)
760        {
761            buf[n] ^= word;
762        }
763        else
764        {
765            buf[n] ^= word << shift;
766            word >>>= (64 - shift);
767            if (word != 0)
768            {
769                buf[++n] ^= word;
770            }
771        }
772    }
773
774//    private static long getWord(long[] buf, int off, int len, int bit)
775//    {
776//        int n = off + (bit >>> 6);
777//        int shift = bit & 0x3F;
778//        if (shift == 0)
779//        {
780//            return buf[n];
781//        }
782//        long result = buf[n] >>> shift;
783//        if (++n < len)
784//        {
785//            result |= buf[n] << (64 - shift);
786//        }
787//        return result;
788//    }
789
790    public boolean testBitZero()
791    {
792        return m_ints.length > 0 && (m_ints[0] & 1L) != 0;
793    }
794
795    private static boolean testBit(long[] buf, int off, int n)
796    {
797        // theInt = n / 64
798        int theInt = n >>> 6;
799        // theBit = n % 64
800        int theBit = n & 0x3F;
801        long tester = 1L << theBit;
802        return (buf[off + theInt] & tester) != 0;
803    }
804
805    private static void flipBit(long[] buf, int off, int n)
806    {
807        // theInt = n / 64
808        int theInt = n >>> 6;
809        // theBit = n % 64
810        int theBit = n & 0x3F;
811        long flipper = 1L << theBit;
812        buf[off + theInt] ^= flipper;
813    }
814
815//    private static void setBit(long[] buf, int off, int n)
816//    {
817//        // theInt = n / 64
818//        int theInt = n >>> 6;
819//        // theBit = n % 64
820//        int theBit = n & 0x3F;
821//        long setter = 1L << theBit;
822//        buf[off + theInt] |= setter;
823//    }
824//
825//    private static void clearBit(long[] buf, int off, int n)
826//    {
827//        // theInt = n / 64
828//        int theInt = n >>> 6;
829//        // theBit = n % 64
830//        int theBit = n & 0x3F;
831//        long setter = 1L << theBit;
832//        buf[off + theInt] &= ~setter;
833//    }
834
835    private static void multiplyWord(long a, long[] b, int bLen, long[] c, int cOff)
836    {
837        if ((a & 1L) != 0L)
838        {
839            add(c, cOff, b, 0, bLen);
840        }
841        int k = 1;
842        while ((a >>>= 1) != 0L)
843        {
844            if ((a & 1L) != 0L)
845            {
846                long carry = addShiftedUp(c, cOff, b, 0, bLen, k);
847                if (carry != 0L)
848                {
849                    c[cOff + bLen] ^= carry;
850                }
851            }
852            ++k;
853        }
854    }
855
856    public LongArray modMultiplyLD(LongArray other, int m, int[] ks)
857    {
858        /*
859         * Find out the degree of each argument and handle the zero cases
860         */
861        int aDeg = degree();
862        if (aDeg == 0)
863        {
864            return this;
865        }
866        int bDeg = other.degree();
867        if (bDeg == 0)
868        {
869            return other;
870        }
871
872        /*
873         * Swap if necessary so that A is the smaller argument
874         */
875        LongArray A = this, B = other;
876        if (aDeg > bDeg)
877        {
878            A = other; B = this;
879            int tmp = aDeg; aDeg = bDeg; bDeg = tmp;
880        }
881
882        /*
883         * Establish the word lengths of the arguments and result
884         */
885        int aLen = (aDeg + 63) >>> 6;
886        int bLen = (bDeg + 63) >>> 6;
887        int cLen = (aDeg + bDeg + 62) >>> 6;
888
889        if (aLen == 1)
890        {
891            long a0 = A.m_ints[0];
892            if (a0 == 1L)
893            {
894                return B;
895            }
896
897            /*
898             * Fast path for small A, with performance dependent only on the number of set bits
899             */
900            long[] c0 = new long[cLen];
901            multiplyWord(a0, B.m_ints, bLen, c0, 0);
902
903            /*
904             * Reduce the raw answer against the reduction coefficients
905             */
906            return reduceResult(c0, 0, cLen, m, ks);
907        }
908
909        /*
910         * Determine if B will get bigger during shifting
911         */
912        int bMax = (bDeg + 7 + 63) >>> 6;
913
914        /*
915         * Lookup table for the offset of each B in the tables
916         */
917        int[] ti = new int[16];
918
919        /*
920         * Precompute table of all 4-bit products of B
921         */
922        long[] T0 = new long[bMax << 4];
923        int tOff = bMax;
924        ti[1] = tOff;
925        System.arraycopy(B.m_ints, 0, T0, tOff, bLen);
926        for (int i = 2; i < 16; ++i)
927        {
928            ti[i] = (tOff += bMax);
929            if ((i & 1) == 0)
930            {
931                shiftUp(T0, tOff >>> 1, T0, tOff, bMax, 1);
932            }
933            else
934            {
935                add(T0, bMax, T0, tOff - bMax, T0, tOff, bMax);
936            }
937        }
938
939        /*
940         * Second table with all 4-bit products of B shifted 4 bits
941         */
942        long[] T1 = new long[T0.length];
943        shiftUp(T0, 0, T1, 0, T0.length, 4);
944//        shiftUp(T0, bMax, T1, bMax, tOff, 4);
945
946        long[] a = A.m_ints;
947        long[] c = new long[cLen];
948
949        int MASK = 0xF;
950
951        /*
952         * Lopez-Dahab algorithm
953         */
954
955        for (int k = 56; k >= 0; k -= 8)
956        {
957            for (int j = 1; j < aLen; j += 2)
958            {
959                int aVal = (int)(a[j] >>> k);
960                int u = aVal & MASK;
961                int v = (aVal >>> 4) & MASK;
962                addBoth(c, j - 1, T0, ti[u], T1, ti[v], bMax);
963            }
964            shiftUp(c, 0, cLen, 8);
965        }
966
967        for (int k = 56; k >= 0; k -= 8)
968        {
969            for (int j = 0; j < aLen; j += 2)
970            {
971                int aVal = (int)(a[j] >>> k);
972                int u = aVal & MASK;
973                int v = (aVal >>> 4) & MASK;
974                addBoth(c, j, T0, ti[u], T1, ti[v], bMax);
975            }
976            if (k > 0)
977            {
978                shiftUp(c, 0, cLen, 8);
979            }
980        }
981
982        /*
983         * Finally the raw answer is collected, reduce it against the reduction coefficients
984         */
985        return reduceResult(c, 0, cLen, m, ks);
986    }
987
988    public LongArray modMultiply(LongArray other, int m, int[] ks)
989    {
990        /*
991         * Find out the degree of each argument and handle the zero cases
992         */
993        int aDeg = degree();
994        if (aDeg == 0)
995        {
996            return this;
997        }
998        int bDeg = other.degree();
999        if (bDeg == 0)
1000        {
1001            return other;
1002        }
1003
1004        /*
1005         * Swap if necessary so that A is the smaller argument
1006         */
1007        LongArray A = this, B = other;
1008        if (aDeg > bDeg)
1009        {
1010            A = other; B = this;
1011            int tmp = aDeg; aDeg = bDeg; bDeg = tmp;
1012        }
1013
1014        /*
1015         * Establish the word lengths of the arguments and result
1016         */
1017        int aLen = (aDeg + 63) >>> 6;
1018        int bLen = (bDeg + 63) >>> 6;
1019        int cLen = (aDeg + bDeg + 62) >>> 6;
1020
1021        if (aLen == 1)
1022        {
1023            long a0 = A.m_ints[0];
1024            if (a0 == 1L)
1025            {
1026                return B;
1027            }
1028
1029            /*
1030             * Fast path for small A, with performance dependent only on the number of set bits
1031             */
1032            long[] c0 = new long[cLen];
1033            multiplyWord(a0, B.m_ints, bLen, c0, 0);
1034
1035            /*
1036             * Reduce the raw answer against the reduction coefficients
1037             */
1038            return reduceResult(c0, 0, cLen, m, ks);
1039        }
1040
1041        /*
1042         * Determine if B will get bigger during shifting
1043         */
1044        int bMax = (bDeg + 7 + 63) >>> 6;
1045
1046        /*
1047         * Lookup table for the offset of each B in the tables
1048         */
1049        int[] ti = new int[16];
1050
1051        /*
1052         * Precompute table of all 4-bit products of B
1053         */
1054        long[] T0 = new long[bMax << 4];
1055        int tOff = bMax;
1056        ti[1] = tOff;
1057        System.arraycopy(B.m_ints, 0, T0, tOff, bLen);
1058        for (int i = 2; i < 16; ++i)
1059        {
1060            ti[i] = (tOff += bMax);
1061            if ((i & 1) == 0)
1062            {
1063                shiftUp(T0, tOff >>> 1, T0, tOff, bMax, 1);
1064            }
1065            else
1066            {
1067                add(T0, bMax, T0, tOff - bMax, T0, tOff, bMax);
1068            }
1069        }
1070
1071        /*
1072         * Second table with all 4-bit products of B shifted 4 bits
1073         */
1074        long[] T1 = new long[T0.length];
1075        shiftUp(T0, 0, T1, 0, T0.length, 4);
1076//        shiftUp(T0, bMax, T1, bMax, tOff, 4);
1077
1078        long[] a = A.m_ints;
1079        long[] c = new long[cLen << 3];
1080
1081        int MASK = 0xF;
1082
1083        /*
1084         * Lopez-Dahab (Modified) algorithm
1085         */
1086
1087        for (int aPos = 0; aPos < aLen; ++aPos)
1088        {
1089            long aVal = a[aPos];
1090            int cOff = aPos;
1091            for (;;)
1092            {
1093                int u = (int)aVal & MASK;
1094                aVal >>>= 4;
1095                int v = (int)aVal & MASK;
1096                addBoth(c, cOff, T0, ti[u], T1, ti[v], bMax);
1097                aVal >>>= 4;
1098                if (aVal == 0L)
1099                {
1100                    break;
1101                }
1102                cOff += cLen;
1103            }
1104        }
1105
1106        {
1107            int cOff = c.length;
1108            while ((cOff -= cLen) != 0)
1109            {
1110                addShiftedUp(c, cOff - cLen, c, cOff, cLen, 8);
1111            }
1112        }
1113
1114        /*
1115         * Finally the raw answer is collected, reduce it against the reduction coefficients
1116         */
1117        return reduceResult(c, 0, cLen, m, ks);
1118    }
1119
1120    public LongArray modMultiplyAlt(LongArray other, int m, int[] ks)
1121    {
1122        /*
1123         * Find out the degree of each argument and handle the zero cases
1124         */
1125        int aDeg = degree();
1126        if (aDeg == 0)
1127        {
1128            return this;
1129        }
1130        int bDeg = other.degree();
1131        if (bDeg == 0)
1132        {
1133            return other;
1134        }
1135
1136        /*
1137         * Swap if necessary so that A is the smaller argument
1138         */
1139        LongArray A = this, B = other;
1140        if (aDeg > bDeg)
1141        {
1142            A = other; B = this;
1143            int tmp = aDeg; aDeg = bDeg; bDeg = tmp;
1144        }
1145
1146        /*
1147         * Establish the word lengths of the arguments and result
1148         */
1149        int aLen = (aDeg + 63) >>> 6;
1150        int bLen = (bDeg + 63) >>> 6;
1151        int cLen = (aDeg + bDeg + 62) >>> 6;
1152
1153        if (aLen == 1)
1154        {
1155            long a0 = A.m_ints[0];
1156            if (a0 == 1L)
1157            {
1158                return B;
1159            }
1160
1161            /*
1162             * Fast path for small A, with performance dependent only on the number of set bits
1163             */
1164            long[] c0 = new long[cLen];
1165            multiplyWord(a0, B.m_ints, bLen, c0, 0);
1166
1167            /*
1168             * Reduce the raw answer against the reduction coefficients
1169             */
1170            return reduceResult(c0, 0, cLen, m, ks);
1171        }
1172
1173        // NOTE: This works, but is slower than width 4 processing
1174//        if (aLen == 2)
1175//        {
1176//            /*
1177//             * Use common-multiplicand optimization to save ~1/4 of the adds
1178//             */
1179//            long a1 = A.m_ints[0], a2 = A.m_ints[1];
1180//            long aa = a1 & a2; a1 ^= aa; a2 ^= aa;
1181//
1182//            long[] b = B.m_ints;
1183//            long[] c = new long[cLen];
1184//            multiplyWord(aa, b, bLen, c, 1);
1185//            add(c, 0, c, 1, cLen - 1);
1186//            multiplyWord(a1, b, bLen, c, 0);
1187//            multiplyWord(a2, b, bLen, c, 1);
1188//
1189//            /*
1190//             * Reduce the raw answer against the reduction coefficients
1191//             */
1192//            return reduceResult(c, 0, cLen, m, ks);
1193//        }
1194
1195        /*
1196         * Determine the parameters of the interleaved window algorithm: the 'width' in bits to
1197         * process together, the number of evaluation 'positions' implied by that width, and the
1198         * 'top' position at which the regular window algorithm stops.
1199         */
1200        int width, positions, top, banks;
1201
1202        // NOTE: width 4 is the fastest over the entire range of sizes used in current crypto
1203//        width = 1; positions = 64; top = 64; banks = 4;
1204//        width = 2; positions = 32; top = 64; banks = 4;
1205//        width = 3; positions = 21; top = 63; banks = 3;
1206        width = 4; positions = 16; top = 64; banks = 8;
1207//        width = 5; positions = 13; top = 65; banks = 7;
1208//        width = 7; positions = 9; top = 63; banks = 9;
1209//        width = 8; positions = 8; top = 64; banks = 8;
1210
1211        /*
1212         * Determine if B will get bigger during shifting
1213         */
1214        int shifts = top < 64 ? positions : positions - 1;
1215        int bMax = (bDeg + shifts + 63) >>> 6;
1216
1217        int bTotal = bMax * banks, stride = width * banks;
1218
1219        /*
1220         * Create a single temporary buffer, with an offset table to find the positions of things in it
1221         */
1222        int[] ci = new int[1 << width];
1223        int cTotal = aLen;
1224        {
1225            ci[0] = cTotal;
1226            cTotal += bTotal;
1227            ci[1] = cTotal;
1228            for (int i = 2; i < ci.length; ++i)
1229            {
1230                cTotal += cLen;
1231                ci[i] = cTotal;
1232            }
1233            cTotal += cLen;
1234        }
1235        // NOTE: Provide a safe dump for "high zeroes" since we are adding 'bMax' and not 'bLen'
1236        ++cTotal;
1237
1238        long[] c = new long[cTotal];
1239
1240        // Prepare A in interleaved form, according to the chosen width
1241        interleave(A.m_ints, 0, c, 0, aLen, width);
1242
1243        // Make a working copy of B, since we will be shifting it
1244        {
1245            int bOff = aLen;
1246            System.arraycopy(B.m_ints, 0, c, bOff, bLen);
1247            for (int bank = 1; bank < banks; ++bank)
1248            {
1249                shiftUp(c, aLen, c, bOff += bMax, bMax, bank);
1250            }
1251        }
1252
1253        /*
1254         * The main loop analyzes the interleaved windows in A, and for each non-zero window
1255         * a single word-array XOR is performed to a carefully selected slice of 'c'. The loop is
1256         * breadth-first, checking the lowest window in each word, then looping again for the
1257         * next higher window position.
1258         */
1259        int MASK = (1 << width) - 1;
1260
1261        int k = 0;
1262        for (;;)
1263        {
1264            int aPos = 0;
1265            do
1266            {
1267                long aVal = c[aPos] >>> k;
1268                int bank = 0, bOff = aLen;
1269                for (;;)
1270                {
1271                    int index = (int)(aVal) & MASK;
1272                    if (index != 0)
1273                    {
1274                        /*
1275                         * Add to a 'c' buffer based on the bit-pattern of 'index'. Since A is in
1276                         * interleaved form, the bits represent the current B shifted by 0, 'positions',
1277                         * 'positions' * 2, ..., 'positions' * ('width' - 1)
1278                         */
1279                        add(c, aPos + ci[index], c, bOff, bMax);
1280                    }
1281                    if (++bank == banks)
1282                    {
1283                        break;
1284                    }
1285                    bOff += bMax;
1286                    aVal >>>= width;
1287                }
1288            }
1289            while (++aPos < aLen);
1290
1291            if ((k += stride) >= top)
1292            {
1293                if (k >= 64)
1294                {
1295                    break;
1296                }
1297
1298                /*
1299                 * Adjustment for window setups with top == 63, the final bit (if any) is processed
1300                 * as the top-bit of a window
1301                 */
1302                k = 64 - width;
1303                MASK &= MASK << (top - k);
1304            }
1305
1306            /*
1307             * After each position has been checked for all words of A, B is shifted up 1 place
1308             */
1309            shiftUp(c, aLen, bTotal, banks);
1310        }
1311
1312        int ciPos = ci.length;
1313        while (--ciPos > 1)
1314        {
1315            if ((ciPos & 1L) == 0L)
1316            {
1317                /*
1318                 * For even numbers, shift contents and add to the half-position
1319                 */
1320                addShiftedUp(c, ci[ciPos >>> 1], c, ci[ciPos], cLen, positions);
1321            }
1322            else
1323            {
1324                /*
1325                 * For odd numbers, 'distribute' contents to the result and the next-lowest position
1326                 */
1327                distribute(c, ci[ciPos], ci[ciPos - 1], ci[1], cLen);
1328            }
1329        }
1330
1331        /*
1332         * Finally the raw answer is collected, reduce it against the reduction coefficients
1333         */
1334        return reduceResult(c, ci[1], cLen, m, ks);
1335    }
1336
1337    public LongArray modReduce(int m, int[] ks)
1338    {
1339        long[] buf = Arrays.clone(m_ints);
1340        int rLen = reduceInPlace(buf, 0, buf.length, m, ks);
1341        return new LongArray(buf, 0, rLen);
1342    }
1343
1344    public LongArray multiply(LongArray other, int m, int[] ks)
1345    {
1346        /*
1347         * Find out the degree of each argument and handle the zero cases
1348         */
1349        int aDeg = degree();
1350        if (aDeg == 0)
1351        {
1352            return this;
1353        }
1354        int bDeg = other.degree();
1355        if (bDeg == 0)
1356        {
1357            return other;
1358        }
1359
1360        /*
1361         * Swap if necessary so that A is the smaller argument
1362         */
1363        LongArray A = this, B = other;
1364        if (aDeg > bDeg)
1365        {
1366            A = other; B = this;
1367            int tmp = aDeg; aDeg = bDeg; bDeg = tmp;
1368        }
1369
1370        /*
1371         * Establish the word lengths of the arguments and result
1372         */
1373        int aLen = (aDeg + 63) >>> 6;
1374        int bLen = (bDeg + 63) >>> 6;
1375        int cLen = (aDeg + bDeg + 62) >>> 6;
1376
1377        if (aLen == 1)
1378        {
1379            long a0 = A.m_ints[0];
1380            if (a0 == 1L)
1381            {
1382                return B;
1383            }
1384
1385            /*
1386             * Fast path for small A, with performance dependent only on the number of set bits
1387             */
1388            long[] c0 = new long[cLen];
1389            multiplyWord(a0, B.m_ints, bLen, c0, 0);
1390
1391            /*
1392             * Reduce the raw answer against the reduction coefficients
1393             */
1394//            return reduceResult(c0, 0, cLen, m, ks);
1395            return new LongArray(c0, 0, cLen);
1396        }
1397
1398        /*
1399         * Determine if B will get bigger during shifting
1400         */
1401        int bMax = (bDeg + 7 + 63) >>> 6;
1402
1403        /*
1404         * Lookup table for the offset of each B in the tables
1405         */
1406        int[] ti = new int[16];
1407
1408        /*
1409         * Precompute table of all 4-bit products of B
1410         */
1411        long[] T0 = new long[bMax << 4];
1412        int tOff = bMax;
1413        ti[1] = tOff;
1414        System.arraycopy(B.m_ints, 0, T0, tOff, bLen);
1415        for (int i = 2; i < 16; ++i)
1416        {
1417            ti[i] = (tOff += bMax);
1418            if ((i & 1) == 0)
1419            {
1420                shiftUp(T0, tOff >>> 1, T0, tOff, bMax, 1);
1421            }
1422            else
1423            {
1424                add(T0, bMax, T0, tOff - bMax, T0, tOff, bMax);
1425            }
1426        }
1427
1428        /*
1429         * Second table with all 4-bit products of B shifted 4 bits
1430         */
1431        long[] T1 = new long[T0.length];
1432        shiftUp(T0, 0, T1, 0, T0.length, 4);
1433//        shiftUp(T0, bMax, T1, bMax, tOff, 4);
1434
1435        long[] a = A.m_ints;
1436        long[] c = new long[cLen << 3];
1437
1438        int MASK = 0xF;
1439
1440        /*
1441         * Lopez-Dahab (Modified) algorithm
1442         */
1443
1444        for (int aPos = 0; aPos < aLen; ++aPos)
1445        {
1446            long aVal = a[aPos];
1447            int cOff = aPos;
1448            for (;;)
1449            {
1450                int u = (int)aVal & MASK;
1451                aVal >>>= 4;
1452                int v = (int)aVal & MASK;
1453                addBoth(c, cOff, T0, ti[u], T1, ti[v], bMax);
1454                aVal >>>= 4;
1455                if (aVal == 0L)
1456                {
1457                    break;
1458                }
1459                cOff += cLen;
1460            }
1461        }
1462
1463        {
1464            int cOff = c.length;
1465            while ((cOff -= cLen) != 0)
1466            {
1467                addShiftedUp(c, cOff - cLen, c, cOff, cLen, 8);
1468            }
1469        }
1470
1471        /*
1472         * Finally the raw answer is collected, reduce it against the reduction coefficients
1473         */
1474//        return reduceResult(c, 0, cLen, m, ks);
1475        return new LongArray(c, 0, cLen);
1476    }
1477
1478    public void reduce(int m, int[] ks)
1479    {
1480        long[] buf = m_ints;
1481        int rLen = reduceInPlace(buf, 0, buf.length, m, ks);
1482        if (rLen < buf.length)
1483        {
1484            m_ints = new long[rLen];
1485            System.arraycopy(buf, 0, m_ints, 0, rLen);
1486        }
1487    }
1488
1489    private static LongArray reduceResult(long[] buf, int off, int len, int m, int[] ks)
1490    {
1491        int rLen = reduceInPlace(buf, off, len, m, ks);
1492        return new LongArray(buf, off, rLen);
1493    }
1494
1495//    private static void deInterleave(long[] x, int xOff, long[] z, int zOff, int count, int rounds)
1496//    {
1497//        for (int i = 0; i < count; ++i)
1498//        {
1499//            z[zOff + i] = deInterleave(x[zOff + i], rounds);
1500//        }
1501//    }
1502//
1503//    private static long deInterleave(long x, int rounds)
1504//    {
1505//        while (--rounds >= 0)
1506//        {
1507//            x = deInterleave32(x & DEINTERLEAVE_MASK) | (deInterleave32((x >>> 1) & DEINTERLEAVE_MASK) << 32);
1508//        }
1509//        return x;
1510//    }
1511//
1512//    private static long deInterleave32(long x)
1513//    {
1514//        x = (x | (x >>> 1)) & 0x3333333333333333L;
1515//        x = (x | (x >>> 2)) & 0x0F0F0F0F0F0F0F0FL;
1516//        x = (x | (x >>> 4)) & 0x00FF00FF00FF00FFL;
1517//        x = (x | (x >>> 8)) & 0x0000FFFF0000FFFFL;
1518//        x = (x | (x >>> 16)) & 0x00000000FFFFFFFFL;
1519//        return x;
1520//    }
1521
1522    private static int reduceInPlace(long[] buf, int off, int len, int m, int[] ks)
1523    {
1524        int mLen = (m + 63) >>> 6;
1525        if (len < mLen)
1526        {
1527            return len;
1528        }
1529
1530        int numBits = Math.min(len << 6, (m << 1) - 1); // TODO use actual degree?
1531        int excessBits = (len << 6) - numBits;
1532        while (excessBits >= 64)
1533        {
1534            --len;
1535            excessBits -= 64;
1536        }
1537
1538        int kLen = ks.length, kMax = ks[kLen - 1], kNext = kLen > 1 ? ks[kLen - 2] : 0;
1539        int wordWiseLimit = Math.max(m, kMax + 64);
1540        int vectorableWords = (excessBits + Math.min(numBits - wordWiseLimit, m - kNext)) >> 6;
1541        if (vectorableWords > 1)
1542        {
1543            int vectorWiseWords = len - vectorableWords;
1544            reduceVectorWise(buf, off, len, vectorWiseWords, m, ks);
1545            while (len > vectorWiseWords)
1546            {
1547                buf[off + --len] = 0L;
1548            }
1549            numBits = vectorWiseWords << 6;
1550        }
1551
1552        if (numBits > wordWiseLimit)
1553        {
1554            reduceWordWise(buf, off, len, wordWiseLimit, m, ks);
1555            numBits = wordWiseLimit;
1556        }
1557
1558        if (numBits > m)
1559        {
1560            reduceBitWise(buf, off, numBits, m, ks);
1561        }
1562
1563        return mLen;
1564    }
1565
1566    private static void reduceBitWise(long[] buf, int off, int bitlength, int m, int[] ks)
1567    {
1568        while (--bitlength >= m)
1569        {
1570            if (testBit(buf, off, bitlength))
1571            {
1572                reduceBit(buf, off, bitlength, m, ks);
1573            }
1574        }
1575    }
1576
1577    private static void reduceBit(long[] buf, int off, int bit, int m, int[] ks)
1578    {
1579        flipBit(buf, off, bit);
1580        int n = bit - m;
1581        int j = ks.length;
1582        while (--j >= 0)
1583        {
1584            flipBit(buf, off, ks[j] + n);
1585        }
1586        flipBit(buf, off, n);
1587    }
1588
1589    private static void reduceWordWise(long[] buf, int off, int len, int toBit, int m, int[] ks)
1590    {
1591        int toPos = toBit >>> 6;
1592
1593        while (--len > toPos)
1594        {
1595            long word = buf[off + len];
1596            if (word != 0)
1597            {
1598                buf[off + len] = 0;
1599                reduceWord(buf, off, (len << 6), word, m, ks);
1600            }
1601        }
1602
1603        {
1604            int partial = toBit & 0x3F;
1605            long word = buf[off + toPos] >>> partial;
1606            if (word != 0)
1607            {
1608                buf[off + toPos] ^= word << partial;
1609                reduceWord(buf, off, toBit, word, m, ks);
1610            }
1611        }
1612    }
1613
1614    private static void reduceWord(long[] buf, int off, int bit, long word, int m, int[] ks)
1615    {
1616        int offset = bit - m;
1617        int j = ks.length;
1618        while (--j >= 0)
1619        {
1620            flipWord(buf, off, offset + ks[j], word);
1621        }
1622        flipWord(buf, off, offset, word);
1623    }
1624
1625    private static void reduceVectorWise(long[] buf, int off, int len, int words, int m, int[] ks)
1626    {
1627        /*
1628         * NOTE: It's important we go from highest coefficient to lowest, because for the highest
1629         * one (only) we allow the ranges to partially overlap, and therefore any changes must take
1630         * effect for the subsequent lower coefficients.
1631         */
1632        int baseBit = (words << 6) - m;
1633        int j = ks.length;
1634        while (--j >= 0)
1635        {
1636            flipVector(buf, off, buf, off + words, len - words, baseBit + ks[j]);
1637        }
1638        flipVector(buf, off, buf, off + words, len - words, baseBit);
1639    }
1640
1641    private static void flipVector(long[] x, int xOff, long[] y, int yOff, int yLen, int bits)
1642    {
1643        xOff += bits >>> 6;
1644        bits &= 0x3F;
1645
1646        if (bits == 0)
1647        {
1648            add(x, xOff, y, yOff, yLen);
1649        }
1650        else
1651        {
1652            long carry = addShiftedDown(x, xOff + 1, y, yOff, yLen, 64 - bits);
1653            x[xOff] ^= carry;
1654        }
1655    }
1656
1657    public LongArray modSquare(int m, int[] ks)
1658    {
1659        int len = getUsedLength();
1660        if (len == 0)
1661        {
1662            return this;
1663        }
1664
1665        int _2len = len << 1;
1666        long[] r = new long[_2len];
1667
1668        int pos = 0;
1669        while (pos < _2len)
1670        {
1671            long mi = m_ints[pos >>> 1];
1672            r[pos++] = interleave2_32to64((int)mi);
1673            r[pos++] = interleave2_32to64((int)(mi >>> 32));
1674        }
1675
1676        return new LongArray(r, 0, reduceInPlace(r, 0, r.length, m, ks));
1677    }
1678
1679    public LongArray modSquareN(int n, int m, int[] ks)
1680    {
1681        int len = getUsedLength();
1682        if (len == 0)
1683        {
1684            return this;
1685        }
1686
1687        int mLen = (m + 63) >>> 6;
1688        long[] r = new long[mLen << 1];
1689        System.arraycopy(m_ints, 0, r, 0, len);
1690
1691        while (--n >= 0)
1692        {
1693            squareInPlace(r, len, m, ks);
1694            len = reduceInPlace(r, 0, r.length, m, ks);
1695        }
1696
1697        return new LongArray(r, 0, len);
1698    }
1699
1700    public LongArray square(int m, int[] ks)
1701    {
1702        int len = getUsedLength();
1703        if (len == 0)
1704        {
1705            return this;
1706        }
1707
1708        int _2len = len << 1;
1709        long[] r = new long[_2len];
1710
1711        int pos = 0;
1712        while (pos < _2len)
1713        {
1714            long mi = m_ints[pos >>> 1];
1715            r[pos++] = interleave2_32to64((int)mi);
1716            r[pos++] = interleave2_32to64((int)(mi >>> 32));
1717        }
1718
1719        return new LongArray(r, 0, r.length);
1720    }
1721
1722    private static void squareInPlace(long[] x, int xLen, int m, int[] ks)
1723    {
1724        int pos = xLen << 1;
1725        while (--xLen >= 0)
1726        {
1727            long xVal = x[xLen];
1728            x[--pos] = interleave2_32to64((int)(xVal >>> 32));
1729            x[--pos] = interleave2_32to64((int)xVal);
1730        }
1731    }
1732
1733    private static void interleave(long[] x, int xOff, long[] z, int zOff, int count, int width)
1734    {
1735        switch (width)
1736        {
1737        case 3:
1738            interleave3(x, xOff, z, zOff, count);
1739            break;
1740        case 5:
1741            interleave5(x, xOff, z, zOff, count);
1742            break;
1743        case 7:
1744            interleave7(x, xOff, z, zOff, count);
1745            break;
1746        default:
1747            interleave2_n(x, xOff, z, zOff, count, bitLengths[width] - 1);
1748            break;
1749        }
1750    }
1751
1752    private static void interleave3(long[] x, int xOff, long[] z, int zOff, int count)
1753    {
1754        for (int i = 0; i < count; ++i)
1755        {
1756            z[zOff + i] = interleave3(x[xOff + i]);
1757        }
1758    }
1759
1760    private static long interleave3(long x)
1761    {
1762        long z = x & (1L << 63);
1763        return z
1764            | interleave3_21to63((int)x & 0x1FFFFF)
1765            | interleave3_21to63((int)(x >>> 21) & 0x1FFFFF) << 1
1766            | interleave3_21to63((int)(x >>> 42) & 0x1FFFFF) << 2;
1767
1768//        int zPos = 0, wPos = 0, xPos = 0;
1769//        for (;;)
1770//        {
1771//            z |= ((x >>> xPos) & 1L) << zPos;
1772//            if (++zPos == 63)
1773//            {
1774//                String sz2 = Long.toBinaryString(z);
1775//                return z;
1776//            }
1777//            if ((xPos += 21) >= 63)
1778//            {
1779//                xPos = ++wPos;
1780//            }
1781//        }
1782    }
1783
1784    private static long interleave3_21to63(int x)
1785    {
1786        int r00 = INTERLEAVE3_TABLE[x & 0x7F];
1787        int r21 = INTERLEAVE3_TABLE[(x >>> 7) & 0x7F];
1788        int r42 = INTERLEAVE3_TABLE[x >>> 14];
1789        return (r42 & 0xFFFFFFFFL) << 42 | (r21 & 0xFFFFFFFFL) << 21 | (r00 & 0xFFFFFFFFL);
1790    }
1791
1792    private static void interleave5(long[] x, int xOff, long[] z, int zOff, int count)
1793    {
1794        for (int i = 0; i < count; ++i)
1795        {
1796            z[zOff + i] = interleave5(x[xOff + i]);
1797        }
1798    }
1799
1800    private static long interleave5(long x)
1801    {
1802        return interleave3_13to65((int)x & 0x1FFF)
1803            | interleave3_13to65((int)(x >>> 13) & 0x1FFF) << 1
1804            | interleave3_13to65((int)(x >>> 26) & 0x1FFF) << 2
1805            | interleave3_13to65((int)(x >>> 39) & 0x1FFF) << 3
1806            | interleave3_13to65((int)(x >>> 52) & 0x1FFF) << 4;
1807
1808//        long z = 0;
1809//        int zPos = 0, wPos = 0, xPos = 0;
1810//        for (;;)
1811//        {
1812//            z |= ((x >>> xPos) & 1L) << zPos;
1813//            if (++zPos == 64)
1814//            {
1815//                return z;
1816//            }
1817//            if ((xPos += 13) >= 64)
1818//            {
1819//                xPos = ++wPos;
1820//            }
1821//        }
1822    }
1823
1824    private static long interleave3_13to65(int x)
1825    {
1826        int r00 = INTERLEAVE5_TABLE[x & 0x7F];
1827        int r35 = INTERLEAVE5_TABLE[x >>> 7];
1828        return (r35 & 0xFFFFFFFFL) << 35 | (r00 & 0xFFFFFFFFL);
1829    }
1830
1831    private static void interleave7(long[] x, int xOff, long[] z, int zOff, int count)
1832    {
1833        for (int i = 0; i < count; ++i)
1834        {
1835            z[zOff + i] = interleave7(x[xOff + i]);
1836        }
1837    }
1838
1839    private static long interleave7(long x)
1840    {
1841        long z = x & (1L << 63);
1842        return z
1843            | INTERLEAVE7_TABLE[(int)x & 0x1FF]
1844            | INTERLEAVE7_TABLE[(int)(x >>> 9) & 0x1FF] << 1
1845            | INTERLEAVE7_TABLE[(int)(x >>> 18) & 0x1FF] << 2
1846            | INTERLEAVE7_TABLE[(int)(x >>> 27) & 0x1FF] << 3
1847            | INTERLEAVE7_TABLE[(int)(x >>> 36) & 0x1FF] << 4
1848            | INTERLEAVE7_TABLE[(int)(x >>> 45) & 0x1FF] << 5
1849            | INTERLEAVE7_TABLE[(int)(x >>> 54) & 0x1FF] << 6;
1850
1851//        int zPos = 0, wPos = 0, xPos = 0;
1852//        for (;;)
1853//        {
1854//            z |= ((x >>> xPos) & 1L) << zPos;
1855//            if (++zPos == 63)
1856//            {
1857//                return z;
1858//            }
1859//            if ((xPos += 9) >= 63)
1860//            {
1861//                xPos = ++wPos;
1862//            }
1863//        }
1864    }
1865
1866    private static void interleave2_n(long[] x, int xOff, long[] z, int zOff, int count, int rounds)
1867    {
1868        for (int i = 0; i < count; ++i)
1869        {
1870            z[zOff + i] = interleave2_n(x[xOff + i], rounds);
1871        }
1872    }
1873
1874    private static long interleave2_n(long x, int rounds)
1875    {
1876        while (rounds > 1)
1877        {
1878            rounds -= 2;
1879            x = interleave4_16to64((int)x & 0xFFFF)
1880                | interleave4_16to64((int)(x >>> 16) & 0xFFFF) << 1
1881                | interleave4_16to64((int)(x >>> 32) & 0xFFFF) << 2
1882                | interleave4_16to64((int)(x >>> 48) & 0xFFFF) << 3;
1883        }
1884        if (rounds > 0)
1885        {
1886            x = interleave2_32to64((int)x) | interleave2_32to64((int)(x >>> 32)) << 1;
1887        }
1888        return x;
1889    }
1890
1891    private static long interleave4_16to64(int x)
1892    {
1893        int r00 = INTERLEAVE4_TABLE[x & 0xFF];
1894        int r32 = INTERLEAVE4_TABLE[x >>> 8];
1895        return (r32 & 0xFFFFFFFFL) << 32 | (r00 & 0xFFFFFFFFL);
1896    }
1897
1898    private static long interleave2_32to64(int x)
1899    {
1900        int r00 = INTERLEAVE2_TABLE[x & 0xFF] | INTERLEAVE2_TABLE[(x >>> 8) & 0xFF] << 16;
1901        int r32 = INTERLEAVE2_TABLE[(x >>> 16) & 0xFF] | INTERLEAVE2_TABLE[x >>> 24] << 16;
1902        return (r32 & 0xFFFFFFFFL) << 32 | (r00 & 0xFFFFFFFFL);
1903    }
1904
1905//    private static LongArray expItohTsujii2(LongArray B, int n, int m, int[] ks)
1906//    {
1907//        LongArray t1 = B, t3 = new LongArray(new long[]{ 1L });
1908//        int scale = 1;
1909//
1910//        int numTerms = n;
1911//        while (numTerms > 1)
1912//        {
1913//            if ((numTerms & 1) != 0)
1914//            {
1915//                t3 = t3.modMultiply(t1, m, ks);
1916//                t1 = t1.modSquareN(scale, m, ks);
1917//            }
1918//
1919//            LongArray t2 = t1.modSquareN(scale, m, ks);
1920//            t1 = t1.modMultiply(t2, m, ks);
1921//            numTerms >>>= 1; scale <<= 1;
1922//        }
1923//
1924//        return t3.modMultiply(t1, m, ks);
1925//    }
1926//
1927//    private static LongArray expItohTsujii23(LongArray B, int n, int m, int[] ks)
1928//    {
1929//        LongArray t1 = B, t3 = new LongArray(new long[]{ 1L });
1930//        int scale = 1;
1931//
1932//        int numTerms = n;
1933//        while (numTerms > 1)
1934//        {
1935//            boolean m03 = numTerms % 3 == 0;
1936//            boolean m14 = !m03 && (numTerms & 1) != 0;
1937//
1938//            if (m14)
1939//            {
1940//                t3 = t3.modMultiply(t1, m, ks);
1941//                t1 = t1.modSquareN(scale, m, ks);
1942//            }
1943//
1944//            LongArray t2 = t1.modSquareN(scale, m, ks);
1945//            t1 = t1.modMultiply(t2, m, ks);
1946//
1947//            if (m03)
1948//            {
1949//                t2 = t2.modSquareN(scale, m, ks);
1950//                t1 = t1.modMultiply(t2, m, ks);
1951//                numTerms /= 3; scale *= 3;
1952//            }
1953//            else
1954//            {
1955//                numTerms >>>= 1; scale <<= 1;
1956//            }
1957//        }
1958//
1959//        return t3.modMultiply(t1, m, ks);
1960//    }
1961//
1962//    private static LongArray expItohTsujii235(LongArray B, int n, int m, int[] ks)
1963//    {
1964//        LongArray t1 = B, t4 = new LongArray(new long[]{ 1L });
1965//        int scale = 1;
1966//
1967//        int numTerms = n;
1968//        while (numTerms > 1)
1969//        {
1970//            if (numTerms % 5 == 0)
1971//            {
1972////                t1 = expItohTsujii23(t1, 5, m, ks);
1973//
1974//                LongArray t3 = t1;
1975//                t1 = t1.modSquareN(scale, m, ks);
1976//
1977//                LongArray t2 = t1.modSquareN(scale, m, ks);
1978//                t1 = t1.modMultiply(t2, m, ks);
1979//                t2 = t1.modSquareN(scale << 1, m, ks);
1980//                t1 = t1.modMultiply(t2, m, ks);
1981//
1982//                t1 = t1.modMultiply(t3, m, ks);
1983//
1984//                numTerms /= 5; scale *= 5;
1985//                continue;
1986//            }
1987//
1988//            boolean m03 = numTerms % 3 == 0;
1989//            boolean m14 = !m03 && (numTerms & 1) != 0;
1990//
1991//            if (m14)
1992//            {
1993//                t4 = t4.modMultiply(t1, m, ks);
1994//                t1 = t1.modSquareN(scale, m, ks);
1995//            }
1996//
1997//            LongArray t2 = t1.modSquareN(scale, m, ks);
1998//            t1 = t1.modMultiply(t2, m, ks);
1999//
2000//            if (m03)
2001//            {
2002//                t2 = t2.modSquareN(scale, m, ks);
2003//                t1 = t1.modMultiply(t2, m, ks);
2004//                numTerms /= 3; scale *= 3;
2005//            }
2006//            else
2007//            {
2008//                numTerms >>>= 1; scale <<= 1;
2009//            }
2010//        }
2011//
2012//        return t4.modMultiply(t1, m, ks);
2013//    }
2014
2015    public LongArray modInverse(int m, int[] ks)
2016    {
2017        /*
2018         * Fermat's Little Theorem
2019         */
2020//        LongArray A = this;
2021//        LongArray B = A.modSquare(m, ks);
2022//        LongArray R0 = B, R1 = B;
2023//        for (int i = 2; i < m; ++i)
2024//        {
2025//            R1 = R1.modSquare(m, ks);
2026//            R0 = R0.modMultiply(R1, m, ks);
2027//        }
2028//
2029//        return R0;
2030
2031        /*
2032         * Itoh-Tsujii
2033         */
2034//        LongArray B = modSquare(m, ks);
2035//        switch (m)
2036//        {
2037//        case 409:
2038//            return expItohTsujii23(B, m - 1, m, ks);
2039//        case 571:
2040//            return expItohTsujii235(B, m - 1, m, ks);
2041//        case 163:
2042//        case 233:
2043//        case 283:
2044//        default:
2045//            return expItohTsujii2(B, m - 1, m, ks);
2046//        }
2047
2048        /*
2049         * Inversion in F2m using the extended Euclidean algorithm
2050         *
2051         * Input: A nonzero polynomial a(z) of degree at most m-1
2052         * Output: a(z)^(-1) mod f(z)
2053         */
2054        int uzDegree = degree();
2055        if (uzDegree == 0)
2056        {
2057            throw new IllegalStateException();
2058        }
2059        if (uzDegree == 1)
2060        {
2061            return this;
2062        }
2063
2064        // u(z) := a(z)
2065        LongArray uz = (LongArray)clone();
2066
2067        int t = (m + 63) >>> 6;
2068
2069        // v(z) := f(z)
2070        LongArray vz = new LongArray(t);
2071        reduceBit(vz.m_ints, 0, m, m, ks);
2072
2073        // g1(z) := 1, g2(z) := 0
2074        LongArray g1z = new LongArray(t);
2075        g1z.m_ints[0] = 1L;
2076        LongArray g2z = new LongArray(t);
2077
2078        int[] uvDeg = new int[]{ uzDegree, m + 1 };
2079        LongArray[] uv = new LongArray[]{ uz, vz };
2080
2081        int[] ggDeg = new int[]{ 1, 0 };
2082        LongArray[] gg = new LongArray[]{ g1z, g2z };
2083
2084        int b = 1;
2085        int duv1 = uvDeg[b];
2086        int dgg1 = ggDeg[b];
2087        int j = duv1 - uvDeg[1 - b];
2088
2089        for (;;)
2090        {
2091            if (j < 0)
2092            {
2093                j = -j;
2094                uvDeg[b] = duv1;
2095                ggDeg[b] = dgg1;
2096                b = 1 - b;
2097                duv1 = uvDeg[b];
2098                dgg1 = ggDeg[b];
2099            }
2100
2101            uv[b].addShiftedByBitsSafe(uv[1 - b], uvDeg[1 - b], j);
2102
2103            int duv2 = uv[b].degreeFrom(duv1);
2104            if (duv2 == 0)
2105            {
2106                return gg[1 - b];
2107            }
2108
2109            {
2110                int dgg2 = ggDeg[1 - b];
2111                gg[b].addShiftedByBitsSafe(gg[1 - b], dgg2, j);
2112                dgg2 += j;
2113
2114                if (dgg2 > dgg1)
2115                {
2116                    dgg1 = dgg2;
2117                }
2118                else if (dgg2 == dgg1)
2119                {
2120                    dgg1 = gg[b].degreeFrom(dgg1);
2121                }
2122            }
2123
2124            j += (duv2 - duv1);
2125            duv1 = duv2;
2126        }
2127    }
2128
2129    public boolean equals(Object o)
2130    {
2131        if (!(o instanceof LongArray))
2132        {
2133            return false;
2134        }
2135        LongArray other = (LongArray) o;
2136        int usedLen = getUsedLength();
2137        if (other.getUsedLength() != usedLen)
2138        {
2139            return false;
2140        }
2141        for (int i = 0; i < usedLen; i++)
2142        {
2143            if (m_ints[i] != other.m_ints[i])
2144            {
2145                return false;
2146            }
2147        }
2148        return true;
2149    }
2150
2151    public int hashCode()
2152    {
2153        int usedLen = getUsedLength();
2154        int hash = 1;
2155        for (int i = 0; i < usedLen; i++)
2156        {
2157            long mi = m_ints[i];
2158            hash *= 31;
2159            hash ^= (int)mi;
2160            hash *= 31;
2161            hash ^= (int)(mi >>> 32);
2162        }
2163        return hash;
2164    }
2165
2166    public Object clone()
2167    {
2168        return new LongArray(Arrays.clone(m_ints));
2169    }
2170
2171    public String toString()
2172    {
2173        int i = getUsedLength();
2174        if (i == 0)
2175        {
2176            return "0";
2177        }
2178
2179        StringBuffer sb = new StringBuffer(Long.toBinaryString(m_ints[--i]));
2180        while (--i >= 0)
2181        {
2182            String s = Long.toBinaryString(m_ints[i]);
2183
2184            // Add leading zeroes, except for highest significant word
2185            int len = s.length();
2186            if (len < 64)
2187            {
2188                sb.append(ZEROES.substring(len));
2189            }
2190
2191            sb.append(s);
2192        }
2193        return sb.toString();
2194    }
2195}