1/* -*- mode: C; c-basic-offset: 3; -*- */
2
3/*
4   This file is part of MemCheck, a heavyweight Valgrind tool for
5   detecting memory errors.
6
7   Copyright (C) 2012-2017  Florian Krohm
8
9   This program is free software; you can redistribute it and/or
10   modify it under the terms of the GNU General Public License as
11   published by the Free Software Foundation; either version 2 of the
12   License, or (at your option) any later version.
13
14   This program is distributed in the hope that it will be useful, but
15   WITHOUT ANY WARRANTY; without even the implied warranty of
16   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17   General Public License for more details.
18
19   You should have received a copy of the GNU General Public License
20   along with this program; if not, write to the Free Software
21   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
22   02111-1307, USA.
23
24   The GNU General Public License is contained in the file COPYING.
25*/
26
27#include <stdio.h>   // fprintf
28#include <assert.h>  // assert
29#if defined(__APPLE__)
30#include <machine/endian.h>
31#define __BYTE_ORDER    BYTE_ORDER
32#define __LITTLE_ENDIAN LITTLE_ENDIAN
33#elif defined(__sun)
34#define __LITTLE_ENDIAN 1234
35#define __BIG_ENDIAN    4321
36#  if defined(_LITTLE_ENDIAN)
37#  define __BYTE_ORDER    __LITTLE_ENDIAN
38#  else
39#  define __BYTE_ORDER    __BIG_ENDIAN
40#  endif
41#else
42#include <endian.h>
43#endif
44#include <inttypes.h>
45#include "vbits.h"
46#include "vtest.h"
47
48
49/* Return the bits of V if they fit into 64-bit. If V has fewer than
50   64 bits, the bit pattern is zero-extended to the left. */
51static uint64_t
52get_bits64(vbits_t v)
53{
54   switch (v.num_bits) {
55   case 1:  return v.bits.u32;
56   case 8:  return v.bits.u8;
57   case 16: return v.bits.u16;
58   case 32: return v.bits.u32;
59   case 64: return v.bits.u64;
60   case 128:
61   case 256:
62      /* fall through */
63   default:
64      panic(__func__);
65   }
66}
67
68void
69print_vbits(FILE *fp, vbits_t v)
70{
71   switch (v.num_bits) {
72   case 1:   fprintf(fp, "%08x",   v.bits.u32); break;
73   case 8:   fprintf(fp, "%02x",   v.bits.u8);  break;
74   case 16:  fprintf(fp, "%04x",   v.bits.u16); break;
75   case 32:  fprintf(fp, "%08x",   v.bits.u32); break;
76   case 64:  fprintf(fp, "%016"PRIx64, v.bits.u64); break;
77   case 128:
78      if (__BYTE_ORDER == __LITTLE_ENDIAN) {
79         fprintf(fp, "%016"PRIx64, v.bits.u128[1]);
80         fprintf(fp, "%016"PRIx64, v.bits.u128[0]);
81      } else {
82         fprintf(fp, "%016"PRIx64, v.bits.u128[0]);
83         fprintf(fp, "%016"PRIx64, v.bits.u128[1]);
84      }
85      break;
86   case 256:
87      if (__BYTE_ORDER == __LITTLE_ENDIAN) {
88         fprintf(fp, "%016"PRIx64, v.bits.u256[3]);
89         fprintf(fp, "%016"PRIx64, v.bits.u256[2]);
90         fprintf(fp, "%016"PRIx64, v.bits.u256[1]);
91         fprintf(fp, "%016"PRIx64, v.bits.u256[0]);
92      } else {
93         fprintf(fp, "%016"PRIx64, v.bits.u256[0]);
94         fprintf(fp, "%016"PRIx64, v.bits.u256[1]);
95         fprintf(fp, "%016"PRIx64, v.bits.u256[2]);
96         fprintf(fp, "%016"PRIx64, v.bits.u256[3]);
97      }
98      break;
99   default:
100      panic(__func__);
101   }
102}
103
104
105/* Return a value where all bits are set to undefined. */
106vbits_t
107undefined_vbits(unsigned num_bits)
108{
109   vbits_t new = { .num_bits = num_bits };
110
111   switch (num_bits) {
112   case   1: new.bits.u32 = 0x01;   break;
113   case   8: new.bits.u8  = 0xff;   break;
114   case  16: new.bits.u16 = 0xffff; break;
115   case  32: new.bits.u32 = ~0;     break;
116   case  64: new.bits.u64 = ~0ull;  break;
117   case 128: new.bits.u128[0] = ~0ull;
118             new.bits.u128[1] = ~0ull;
119             break;
120   case 256: new.bits.u256[0] = ~0ull;
121             new.bits.u256[1] = ~0ull;
122             new.bits.u256[2] = ~0ull;
123             new.bits.u256[3] = ~0ull;
124             break;
125   default:
126      panic(__func__);
127   }
128   return new;
129}
130
131/* The following routines named undefined_vbits_BxE() return a 128-bit
132 * vector with E elements each of size bits.  If any of the bits in an
133 * element is undefined, then return a value where all bits in that
134 * element are undefined.
135 */
136vbits_t
137undefined_vbits_BxE(unsigned int bits, unsigned int elements, vbits_t v)
138{
139   vbits_t new = { .num_bits = v.num_bits };
140   uint64_t mask = ~0ull >> (64 - bits);
141   int i, j;
142
143   assert ((elements % 2) == 0);
144   assert (bits <= 64);
145
146   for (i = 0; i<2; i++) {
147      new.bits.u128[i] = 0ull;
148
149      for (j = 0; j<elements/2; j++) {
150         if ((v.bits.u128[i] & (mask << (j*bits))) != 0)
151            new.bits.u128[i] |= (mask << (j*bits));
152      }
153   }
154   return new;
155}
156
157/* The following routines named undefined_vbits_BxE_rotate() return a 128-bit
158 * vector with E elements each of size bits.  The bits in v are rotated
159 * left by the amounts in the corresponding element of val. Specified rotate
160 * amount field is assumed to be at most 8-bits wide.
161 */
162vbits_t
163undefined_vbits_BxE_rotate(unsigned int bits, unsigned int elements,
164                           vbits_t v, value_t val)
165{
166   vbits_t new = { .num_bits = v.num_bits };
167   uint64_t mask = ~0ull >> (64 - bits);
168   uint64_t const shift_mask = 0xFF;
169   uint64_t element;
170   int i, j;
171   signed char shift;
172   assert ((elements % 2) == 0);
173   assert (bits <= 64);
174
175   for (i = 0; i<2; i++) {
176      new.bits.u128[i] = 0ull;
177
178      for (j = 0; j<elements/2; j++) {
179         element = (v.bits.u128[i] >> (j*bits)) & mask;
180         shift = (int)((val.u128[i] >> (j*bits)) & shift_mask);
181
182         if (shift < 0) {
183            /* right shift */
184            new.bits.u128[i] = element >> -shift;
185
186            /* OR in the bits shifted out into the top of the element */
187            new.bits.u128[i] |= element << (bits + shift);
188         } else {
189            /* left shift */
190            /* upper bits from shift */
191            new.bits.u128[i] = element << shift;
192
193            /* OR in the bits shifted out into the bottom of the element */
194            new.bits.u128[i] |= element >> (bits - shift);
195         }
196      }
197   }
198   return new;
199}
200
201/* Only the even elements of the input are used by the Iop*/
202vbits_t
203undefined_vbits_128_even_element(unsigned int bits, unsigned int elements,
204                                 vbits_t v)
205{
206   int i;
207   uint64_t mask;
208   unsigned int const element_width = 128/elements;
209   vbits_t new = { .num_bits = v.num_bits };
210
211   assert ((elements % 2) == 0);
212   assert (bits <= 64);
213
214   /* Create a 128-bit mask with the bits in the even numbered
215    * elements are all ones.
216    */
217   mask = ~0ull >> (64 - bits);
218
219   for (i = 2; i < elements/2; i=i+2) {
220      mask |= mask << (i * element_width);
221   }
222
223   new.bits.u128[0] = mask & v.bits.u128[0];
224   new.bits.u128[1] = mask & v.bits.u128[1];
225
226   return new;
227}
228
229/* Concatenate bit i from each byte j.  Place concatenated 8 bit value into
230 * byte i of the result.  Do for all i from 0 to 7 and j from 0 to 7 of each
231 * 64-bit element.
232 */
233vbits_t
234undefined_vbits_64x2_transpose(vbits_t v)
235{
236   vbits_t new = { .num_bits = v.num_bits };
237   unsigned int bit, byte, element;
238   uint64_t value, new_value, select_bit;
239
240   for (element = 0; element < 2; element++) {
241      value = v.bits.u128[element];
242      new_value = 0;
243      for (byte = 0; byte < 8; byte++) {
244         for (bit = 0; bit < 8; bit++) {
245            select_bit = 1ULL & (value >> (bit + 8*byte));
246            new_value |= select_bit << (bit*8 + byte);
247         }
248      }
249      new.bits.u128[element] = new_value;
250   }
251   return new;
252}
253
254/* The routine takes a 256-bit vector value stored across the two 128-bit
255 * source operands src1 and src2.  The size of each element in the input is
256 * src_num_bits.  The elements are narrowed to result_num_bits and packed
257 * into the result.  If saturate is True, then the all the result bits are
258 * set to 1 if the source element can not be represented in result_num_bits.
259 */
260vbits_t
261undefined_vbits_Narrow256_AtoB(unsigned int src_num_bits,
262                               unsigned int result_num_bits,
263                               vbits_t src1_v, value_t src1_value,
264                               vbits_t src2_v, value_t src2_value,
265                               bool saturate)
266{
267
268   vbits_t new = { .num_bits = src1_v.num_bits };
269   unsigned int i;
270   uint64_t vbits, new_value;
271   uint64_t const src_mask = ~0x0ULL >> (64 - src_num_bits);
272   uint64_t const result_mask = ~0x0ULL >> (64 - result_num_bits);
273   unsigned int num_elements_per_64_bits = src_num_bits/64;
274   unsigned int shift;
275
276   /*
277    * NOTE:  POWER PPC
278    *   the saturated value is 0xFFFF for the vbit is in one of the lower
279    *   32-bits of the source.  The saturated result is 0xFFFF0000 if the
280    *   vbit is in the upper 32-bits of the source.  Not sure what
281    *   the saturated result is in general for a B-bit result.
282    *
283    *  ONLY TESTED FOR 64 bit input, 32 bit result
284    */
285   uint64_t const saturated_result = 0xFFFFULL;
286
287   /* Source elements are split between the two source operands */
288
289   assert(src_num_bits <= 64);
290   assert(result_num_bits < 64);
291   assert(result_num_bits < src_num_bits);
292
293   /* Narrow the elements from src1 to the upper 64-bits of result.
294    * Do each of the 64 bit values that make up a u128
295    */
296   new_value = 0;
297   for (i = 0; i < num_elements_per_64_bits; i++) {
298      vbits = src1_v.bits.u128[0] >> (i * src_num_bits);
299      vbits &= src_mask;
300
301      shift = result_num_bits * i;
302      if (vbits) {
303         if (saturate) {
304            /* Value will not fit in B-bits, saturate the result as needed. */
305            if (vbits >> (src_num_bits/2))
306               /* vbit is upper half of the source */
307               new_value |= saturated_result << ( shift + result_num_bits/2);
308            else
309               new_value |= saturated_result << shift;
310         } else {
311            new_value |= (vbits & result_mask) << shift;
312         }
313      }
314   }
315
316   for (i = 0; i < num_elements_per_64_bits; i++) {
317      vbits = src1_v.bits.u128[1] >> (i * src_num_bits);
318      vbits &= src_mask;
319
320      shift = result_num_bits * i + (num_elements_per_64_bits
321                                     * result_num_bits);
322      if (vbits) {
323         if (saturate) {
324            /* Value will not fit in result_num_bits, saturate the result
325             * as needed.
326             */
327            if (vbits >> (src_num_bits/2))
328               /* vbit is upper half of the source */
329               new_value |= saturated_result << (shift + result_num_bits/2);
330
331            else
332               new_value |= saturated_result << shift;
333
334         } else {
335            new_value |= (vbits & result_mask) << shift;
336         }
337      }
338   }
339   if (__BYTE_ORDER == __LITTLE_ENDIAN)
340      new.bits.u128[1] = new_value;
341   else
342      /* Big endian, swap the upper and lower 32-bits of new_value */
343      new.bits.u128[0] = (new_value << 32) | (new_value >> 32);
344
345   new_value = 0;
346   /* Narrow the elements from src2 to the lower 64-bits of result.
347    * Do each of the 64 bit values that make up a u128
348    */
349   for (i = 0; i < num_elements_per_64_bits; i++) {
350      vbits =  src2_v.bits.u128[0] >> (i * src_num_bits);
351      vbits &= src_mask;
352
353      shift = result_num_bits * i;
354      if (vbits) {
355         if (saturate) {
356            /* Value will not fit in result, saturate the result as needed. */
357            if (vbits >> (src_num_bits/2))
358               /* vbit is upper half of the source */
359               new_value |= saturated_result << (shift + result_num_bits/2);
360            else
361               new_value |= saturated_result << shift;
362         } else {
363            new_value |= (vbits & result_mask) << shift;
364         }
365      }
366   }
367
368   for (i = 0; i < num_elements_per_64_bits; i++) {
369      vbits = src2_v.bits.u128[1] >> (i * src_num_bits);
370      vbits &= src_mask;
371
372      if (vbits) {
373         if (saturate) {
374            /* Value will not fit in result_num_bits, saturate the result
375             * as needed.
376             */
377            if (vbits >> (src_num_bits/2))
378               /* vbit is upper half of the source */
379               new_value |= saturated_result << (result_num_bits * i
380                                                 + result_num_bits/2
381                                                   + (num_elements_per_64_bits
382                                                      * result_num_bits));
383            else
384               new_value |= saturated_result << (result_num_bits * i
385                                                   + (num_elements_per_64_bits
386                                                      * result_num_bits));
387
388         } else {
389            new_value |= (vbits & result_mask) << (result_num_bits * i
390                                                   + (num_elements_per_64_bits
391                                                      * result_num_bits));
392         }
393      }
394   }
395   if (__BYTE_ORDER == __LITTLE_ENDIAN)
396      new.bits.u128[0] = new_value;
397   else
398      /* Big endian, swap the upper and lower 32-bits of new_value */
399      new.bits.u128[1] = (new_value << 32) | (new_value >> 32);
400
401   return new;
402}
403
404/* Return a value where all bits are set to defined. */
405vbits_t
406defined_vbits(unsigned num_bits)
407{
408   vbits_t new = { .num_bits = num_bits };
409
410   switch (num_bits) {
411   case   1: new.bits.u32 = 0x0; break;
412   case   8: new.bits.u8  = 0x0; break;
413   case  16: new.bits.u16 = 0x0; break;
414   case  32: new.bits.u32 = 0x0; break;
415   case  64: new.bits.u64 = 0x0; break;
416   case 128: new.bits.u128[0] = 0x0;
417             new.bits.u128[1] = 0x0;
418             break;
419   case 256: new.bits.u256[0] = 0x0;
420             new.bits.u256[1] = 0x0;
421             new.bits.u256[2] = 0x0;
422             new.bits.u256[3] = 0x0;
423             break;
424   default:
425      panic(__func__);
426   }
427   return new;
428}
429
430
431/* Return 1, if equal. */
432int
433equal_vbits(vbits_t v1, vbits_t v2)
434{
435   assert(v1.num_bits == v2.num_bits);
436
437   switch (v1.num_bits) {
438   case 1:   return v1.bits.u32 == v2.bits.u32;
439   case 8:   return v1.bits.u8  == v2.bits.u8;
440   case 16:  return v1.bits.u16 == v2.bits.u16;
441   case 32:  return v1.bits.u32 == v2.bits.u32;
442   case 64:  return v1.bits.u64 == v2.bits.u64;
443   case 128: return v1.bits.u128[0] == v2.bits.u128[0] &&
444                    v1.bits.u128[1] == v2.bits.u128[1];
445   case 256: return v1.bits.u256[0] == v2.bits.u256[0] &&
446                    v1.bits.u256[1] == v2.bits.u256[1] &&
447                    v1.bits.u256[2] == v2.bits.u256[2] &&
448                    v1.bits.u256[3] == v2.bits.u256[3];
449   default:
450      panic(__func__);
451   }
452}
453
454
455/* Truncate the bit pattern in V1 to NUM_BITS bits */
456vbits_t
457truncate_vbits(vbits_t v, unsigned num_bits)
458{
459   assert(num_bits <= v.num_bits);
460
461   if (num_bits == v.num_bits) return v;
462
463   vbits_t new = { .num_bits = num_bits };
464
465   if (num_bits <= 64) {
466      uint64_t bits;
467
468      if (v.num_bits <= 64)
469         bits = get_bits64(v);
470      else if (v.num_bits == 128)
471         if (__BYTE_ORDER == __LITTLE_ENDIAN)
472            bits = v.bits.u128[0];
473         else
474            bits = v.bits.u128[1];
475      else if (v.num_bits == 256)
476         if (__BYTE_ORDER == __LITTLE_ENDIAN)
477            bits = v.bits.u256[0];
478         else
479            bits = v.bits.u256[3];
480      else
481         panic(__func__);
482
483      switch (num_bits) {
484      case 1:   new.bits.u32 = bits & 0x01;   break;
485      case 8:   new.bits.u8  = bits & 0xff;   break;
486      case 16:  new.bits.u16 = bits & 0xffff; break;
487      case 32:  new.bits.u32 = bits & ~0u;    break;
488      case 64:  new.bits.u64 = bits & ~0ll;   break;
489      default:
490         panic(__func__);
491      }
492      return new;
493   }
494
495   if (num_bits == 128) {
496      assert(v.num_bits == 256);
497      /* From 256 bits to 128 */
498      if (__BYTE_ORDER == __LITTLE_ENDIAN) {
499         new.bits.u128[0] = v.bits.u256[0];
500         new.bits.u128[1] = v.bits.u256[1];
501      } else {
502         new.bits.u128[0] = v.bits.u256[2];
503         new.bits.u128[1] = v.bits.u256[3];
504      }
505      return new;
506   }
507
508   /* Cannot truncate to 256 bits from something larger */
509   panic(__func__);
510}
511
512
513/* Helper function to compute left_vbits */
514static uint64_t
515left64(uint64_t x)
516{
517   // left(x) = x | -x
518   return x | (~x + 1);
519}
520
521
522vbits_t
523left_vbits(vbits_t v, unsigned num_bits)
524{
525   assert(num_bits >= v.num_bits);
526
527   vbits_t new = { .num_bits = num_bits };
528
529   if (v.num_bits <= 64) {
530      uint64_t bits = left64(get_bits64(v));
531
532      switch (num_bits) {
533      case 8:   new.bits.u8  = bits & 0xff;   break;
534      case 16:  new.bits.u16 = bits & 0xffff; break;
535      case 32:  new.bits.u32 = bits & ~0u;    break;
536      case 64:  new.bits.u64 = bits & ~0ll;   break;
537      case 128:
538         if (__BYTE_ORDER == __LITTLE_ENDIAN) {
539            new.bits.u128[0] = bits;
540            if (bits & (1ull << 63)) {  // MSB is set
541               new.bits.u128[1] = ~0ull;
542            } else {
543               new.bits.u128[1] = 0;
544            }
545         } else {
546            new.bits.u128[1] = bits;
547            if (bits & (1ull << 63)) {  // MSB is set
548               new.bits.u128[0] = ~0ull;
549            } else {
550               new.bits.u128[0] = 0;
551            }
552         }
553         break;
554      case 256:
555         if (__BYTE_ORDER == __LITTLE_ENDIAN) {
556            new.bits.u256[0] = bits;
557            if (bits & (1ull << 63)) {  // MSB is set
558               new.bits.u256[1] = ~0ull;
559               new.bits.u256[2] = ~0ull;
560               new.bits.u256[3] = ~0ull;
561            } else {
562               new.bits.u256[1] = 0;
563               new.bits.u256[2] = 0;
564               new.bits.u256[3] = 0;
565            }
566         } else {
567            new.bits.u256[3] = bits;
568            if (bits & (1ull << 63)) {  // MSB is set
569               new.bits.u256[0] = ~0ull;
570               new.bits.u256[1] = ~0ull;
571               new.bits.u256[2] = ~0ull;
572            } else {
573               new.bits.u256[0] = 0;
574               new.bits.u256[1] = 0;
575               new.bits.u256[2] = 0;
576            }
577         }
578         break;
579      default:
580         panic(__func__);
581      }
582      return new;
583   }
584
585   if (v.num_bits == 128) {
586      if (__BYTE_ORDER == __LITTLE_ENDIAN) {
587         if (v.bits.u128[1] != 0) {
588            new.bits.u128[0] = v.bits.u128[0];
589            new.bits.u128[1] = left64(v.bits.u128[1]);
590         } else {
591            new.bits.u128[0] = left64(v.bits.u128[0]);
592            if (new.bits.u128[0] & (1ull << 63)) {  // MSB is set
593               new.bits.u128[1] = ~0ull;
594            } else {
595               new.bits.u128[1] = 0;
596            }
597         }
598      } else {
599         if (v.bits.u128[0] != 0) {
600            new.bits.u128[0] = left64(v.bits.u128[0]);
601            new.bits.u128[1] = v.bits.u128[1];
602         } else {
603            new.bits.u128[1] = left64(v.bits.u128[1]);
604            if (new.bits.u128[1] & (1ull << 63)) {  // MSB is set
605               new.bits.u128[0] = ~0ull;
606            } else {
607               new.bits.u128[0] = 0;
608            }
609         }
610      }
611      if (num_bits == 128) return new;
612
613      assert(num_bits == 256);
614
615      if (__BYTE_ORDER == __LITTLE_ENDIAN) {
616         uint64_t b1 = new.bits.u128[1];
617         uint64_t b0 = new.bits.u128[0];
618
619         new.bits.u256[0] = b0;
620         new.bits.u256[1] = b1;
621
622         if (new.bits.u256[1] & (1ull << 63)) {  // MSB is set
623            new.bits.u256[2] = ~0ull;
624            new.bits.u256[3] = ~0ull;
625         } else {
626            new.bits.u256[2] = 0;
627            new.bits.u256[3] = 0;
628         }
629      } else {
630         uint64_t b1 = new.bits.u128[0];
631         uint64_t b0 = new.bits.u128[1];
632
633         new.bits.u256[2] = b0;
634         new.bits.u256[3] = b1;
635
636         if (new.bits.u256[2] & (1ull << 63)) {  // MSB is set
637            new.bits.u256[0] = ~0ull;
638            new.bits.u256[1] = ~0ull;
639         } else {
640            new.bits.u256[0] = 0;
641            new.bits.u256[1] = 0;
642         }
643      }
644      return new;
645   }
646
647   panic(__func__);
648}
649
650
651vbits_t
652or_vbits(vbits_t v1, vbits_t v2)
653{
654   assert(v1.num_bits == v2.num_bits);
655
656   vbits_t new = { .num_bits = v1.num_bits };
657
658   switch (v1.num_bits) {
659   case 8:   new.bits.u8  = v1.bits.u8  | v2.bits.u8;  break;
660   case 16:  new.bits.u16 = v1.bits.u16 | v2.bits.u16; break;
661   case 32:  new.bits.u32 = v1.bits.u32 | v2.bits.u32; break;
662   case 64:  new.bits.u64 = v1.bits.u64 | v2.bits.u64; break;
663   case 128: new.bits.u128[0] = v1.bits.u128[0] | v2.bits.u128[0];
664             new.bits.u128[1] = v1.bits.u128[1] | v2.bits.u128[1];
665             break;
666   case 256: new.bits.u256[0] = v1.bits.u256[0] | v2.bits.u256[0];
667             new.bits.u256[1] = v1.bits.u256[1] | v2.bits.u256[1];
668             new.bits.u256[2] = v1.bits.u256[2] | v2.bits.u256[2];
669             new.bits.u256[3] = v1.bits.u256[3] | v2.bits.u256[3];
670             break;
671   default:
672      panic(__func__);
673   }
674
675   return new;
676}
677
678
679vbits_t
680and_vbits(vbits_t v1, vbits_t v2)
681{
682   assert(v1.num_bits == v2.num_bits);
683
684   vbits_t new = { .num_bits = v1.num_bits };
685
686   switch (v1.num_bits) {
687   case 8:   new.bits.u8  = v1.bits.u8  & v2.bits.u8;  break;
688   case 16:  new.bits.u16 = v1.bits.u16 & v2.bits.u16; break;
689   case 32:  new.bits.u32 = v1.bits.u32 & v2.bits.u32; break;
690   case 64:  new.bits.u64 = v1.bits.u64 & v2.bits.u64; break;
691   case 128: new.bits.u128[0] = v1.bits.u128[0] & v2.bits.u128[0];
692             new.bits.u128[1] = v1.bits.u128[1] & v2.bits.u128[1];
693             break;
694   case 256: new.bits.u256[0] = v1.bits.u256[0] & v2.bits.u256[0];
695             new.bits.u256[1] = v1.bits.u256[1] & v2.bits.u256[1];
696             new.bits.u256[2] = v1.bits.u256[2] & v2.bits.u256[2];
697             new.bits.u256[3] = v1.bits.u256[3] & v2.bits.u256[3];
698             break;
699   default:
700      panic(__func__);
701   }
702
703   return new;
704}
705
706
707vbits_t
708concat_vbits(vbits_t v1, vbits_t v2)
709{
710   assert(v1.num_bits == v2.num_bits);
711
712   vbits_t new = { .num_bits = v1.num_bits * 2 };
713
714   switch (v1.num_bits) {
715   case 8:   new.bits.u16 = v1.bits.u8;
716             new.bits.u16 = (new.bits.u16 << 8)  | v2.bits.u8;  break;
717   case 16:  new.bits.u32 = v1.bits.u16;
718             new.bits.u32 = (new.bits.u32 << 16) | v2.bits.u16; break;
719   case 32:  new.bits.u64 = v1.bits.u32;
720             new.bits.u64 = (new.bits.u64 << 32) | v2.bits.u32; break;
721   case 64:
722      if (__BYTE_ORDER == __LITTLE_ENDIAN) {
723         new.bits.u128[0] = v2.bits.u64;
724         new.bits.u128[1] = v1.bits.u64;
725      } else {
726         new.bits.u128[0] = v1.bits.u64;
727         new.bits.u128[1] = v2.bits.u64;
728      }
729      break;
730   case 128:
731      if (__BYTE_ORDER == __LITTLE_ENDIAN) {
732         new.bits.u256[0] = v2.bits.u128[0];
733         new.bits.u256[1] = v2.bits.u128[1];
734         new.bits.u256[2] = v1.bits.u128[0];
735         new.bits.u256[3] = v1.bits.u128[1];
736      } else {
737         new.bits.u256[0] = v1.bits.u128[0];
738         new.bits.u256[1] = v1.bits.u128[1];
739         new.bits.u256[2] = v2.bits.u128[0];
740         new.bits.u256[3] = v2.bits.u128[1];
741      }
742      break;
743   case 256: /* Fall through */
744   default:
745      panic(__func__);
746   }
747
748   return new;
749}
750
751
752vbits_t
753upper_vbits(vbits_t v)
754{
755   vbits_t new = { .num_bits = v.num_bits / 2 };
756
757   switch (v.num_bits) {
758   case 16:  new.bits.u8  = v.bits.u16 >> 8;  break;
759   case 32:  new.bits.u16 = v.bits.u32 >> 16; break;
760   case 64:  new.bits.u32 = v.bits.u64 >> 32; break;
761   case 128:
762      if (__BYTE_ORDER == __LITTLE_ENDIAN)
763         new.bits.u64 = v.bits.u128[1];
764      else
765         new.bits.u64 = v.bits.u128[0];
766      break;
767   case 256:
768      if (__BYTE_ORDER == __LITTLE_ENDIAN) {
769         new.bits.u128[0] = v.bits.u256[2];
770         new.bits.u128[1] = v.bits.u256[3];
771      } else {
772         new.bits.u128[0] = v.bits.u256[0];
773         new.bits.u128[1] = v.bits.u256[1];
774      }
775      break;
776   case 8:
777   default:
778      panic(__func__);
779   }
780
781   return new;
782}
783
784
785vbits_t
786zextend_vbits(vbits_t v, unsigned num_bits)
787{
788   assert(num_bits >= v.num_bits);
789
790   if (num_bits == v.num_bits) return v;
791
792   vbits_t new = { .num_bits = num_bits };
793
794   if (v.num_bits <= 64) {
795      uint64_t bits = get_bits64(v);
796
797      switch (num_bits) {
798      case 8:   new.bits.u8  = bits; break;
799      case 16:  new.bits.u16 = bits; break;
800      case 32:  new.bits.u32 = bits; break;
801      case 64:  new.bits.u64 = bits; break;
802      case 128:
803         if (__BYTE_ORDER == __LITTLE_ENDIAN) {
804            new.bits.u128[0] = bits;
805            new.bits.u128[1] = 0;
806         } else {
807            new.bits.u128[0] = 0;
808            new.bits.u128[1] = bits;
809         }
810         break;
811      case 256:
812         if (__BYTE_ORDER == __LITTLE_ENDIAN) {
813            new.bits.u256[0] = bits;
814            new.bits.u256[1] = 0;
815            new.bits.u256[2] = 0;
816            new.bits.u256[3] = 0;
817         } else {
818            new.bits.u256[0] = 0;
819            new.bits.u256[1] = 0;
820            new.bits.u256[2] = 0;
821            new.bits.u256[3] = bits;
822         }
823         break;
824      default:
825         panic(__func__);
826      }
827      return new;
828   }
829
830   if (v.num_bits == 128) {
831      assert(num_bits == 256);
832
833      if (__BYTE_ORDER == __LITTLE_ENDIAN) {
834         new.bits.u256[0] = v.bits.u128[0];
835         new.bits.u256[1] = v.bits.u128[1];
836         new.bits.u256[2] = 0;
837         new.bits.u256[3] = 0;
838      } else {
839         new.bits.u256[0] = 0;
840         new.bits.u256[1] = 0;
841         new.bits.u256[2] = v.bits.u128[1];
842         new.bits.u256[3] = v.bits.u128[0];
843      }
844      return new;
845   }
846
847   /* Cannot zero-extend a 256-bit value to something larger */
848   panic(__func__);
849}
850
851
852vbits_t
853sextend_vbits(vbits_t v, unsigned num_bits)
854{
855   assert(num_bits >= v.num_bits);
856
857   int sextend = 0;
858
859   switch (v.num_bits) {
860   case 8:   if (v.bits.u8  == 0x80)             sextend = 1; break;
861   case 16:  if (v.bits.u16 == 0x8000)           sextend = 1; break;
862   case 32:  if (v.bits.u32 == 0x80000000)       sextend = 1; break;
863   case 64:  if (v.bits.u64 == (1ull << 63))     sextend = 1; break;
864   case 128: if (v.bits.u128[1] == (1ull << 63)) sextend = 1; break;
865   case 256: if (v.bits.u256[3] == (1ull << 63)) sextend = 1; break;
866
867   default:
868      panic(__func__);
869   }
870
871   return sextend ? left_vbits(v, num_bits) : zextend_vbits(v, num_bits);
872}
873
874
875vbits_t
876onehot_vbits(unsigned bitno, unsigned num_bits)
877{
878   assert(bitno < num_bits);
879
880   vbits_t new = { .num_bits = num_bits };
881
882   switch (num_bits) {
883   case 1:   new.bits.u32 = 1    << bitno; break;
884   case 8:   new.bits.u8  = 1    << bitno; break;
885   case 16:  new.bits.u16 = 1    << bitno; break;
886   case 32:  new.bits.u32 = 1u   << bitno; break;
887   case 64:  new.bits.u64 = 1ull << bitno; break;
888   case 128:
889      if (__BYTE_ORDER == __LITTLE_ENDIAN) {
890         if (bitno < 64) {
891            new.bits.u128[0] = 1ull << bitno;
892            new.bits.u128[1] = 0;
893         } else {
894            new.bits.u128[0] = 0;
895            new.bits.u128[1] = 1ull << (bitno - 64);
896         }
897      } else {
898         if (bitno < 64) {
899            new.bits.u128[0] = 0;
900            new.bits.u128[1] = 1ull << bitno;
901         } else {
902            new.bits.u128[0] = 1ull << (bitno - 64);
903            new.bits.u128[1] = 0;
904         }
905      }
906      break;
907   case 256:
908      if (__BYTE_ORDER == __LITTLE_ENDIAN) {
909         if (bitno < 64) {
910            new.bits.u256[0] = 1ull << bitno;
911            new.bits.u256[1] = 0;
912            new.bits.u256[2] = 0;
913            new.bits.u256[3] = 0;
914         } else if (bitno < 128) {
915            new.bits.u256[0] = 0;
916            new.bits.u256[1] = 1ull << (bitno - 64);
917            new.bits.u256[2] = 0;
918            new.bits.u256[3] = 0;
919         } else if (bitno < 192) {
920            new.bits.u256[0] = 0;
921            new.bits.u256[1] = 0;
922            new.bits.u256[2] = 1ull << (bitno - 128);
923            new.bits.u256[3] = 0;
924         } else {
925            new.bits.u256[0] = 0;
926            new.bits.u256[1] = 0;
927            new.bits.u256[2] = 0;
928            new.bits.u256[3] = 1ull << (bitno - 192);
929         }
930      } else {
931         if (bitno < 64) {
932            new.bits.u256[0] = 0;
933            new.bits.u256[1] = 0;
934            new.bits.u256[2] = 0;
935            new.bits.u256[3] = 1ull << bitno;
936         } else if (bitno < 128) {
937            new.bits.u256[0] = 0;
938            new.bits.u256[1] = 0;
939            new.bits.u256[2] = 1ull << (bitno - 64);
940            new.bits.u256[3] = 0;
941         } else if (bitno < 192) {
942            new.bits.u256[0] = 0;
943            new.bits.u256[1] = 1ull << (bitno - 128);
944            new.bits.u256[2] = 0;
945            new.bits.u256[3] = 0;
946         } else {
947            new.bits.u256[0] = 1ull << (bitno - 192);
948            new.bits.u256[1] = 0;
949            new.bits.u256[2] = 0;
950            new.bits.u256[3] = 0;
951         }
952      }
953      break;
954   default:
955      panic(__func__);
956   }
957   return new;
958}
959
960
961int
962completely_defined_vbits(vbits_t v)
963{
964   return equal_vbits(v, defined_vbits(v.num_bits));
965}
966
967
968vbits_t
969shl_vbits(vbits_t v, unsigned shift_amount)
970{
971   assert(shift_amount < v.num_bits);
972
973   vbits_t new = v;
974
975   switch (v.num_bits) {
976   case 8:  new.bits.u8  <<= shift_amount; break;
977   case 16: new.bits.u16 <<= shift_amount; break;
978   case 32: new.bits.u32 <<= shift_amount; break;
979   case 64: new.bits.u64 <<= shift_amount; break;
980   case 128: /* fall through */
981   case 256: /* fall through */
982   default:
983      panic(__func__);
984   }
985
986   return new;
987}
988
989
990vbits_t
991shr_vbits(vbits_t v, unsigned shift_amount)
992{
993   assert(shift_amount < v.num_bits);
994
995   vbits_t new = v;
996
997   switch (v.num_bits) {
998   case 8:  new.bits.u8  >>= shift_amount; break;
999   case 16: new.bits.u16 >>= shift_amount; break;
1000   case 32: new.bits.u32 >>= shift_amount; break;
1001   case 64: new.bits.u64 >>= shift_amount; break;
1002   case 128: /* fall through */
1003   case 256: /* fall through */
1004   default:
1005      panic(__func__);
1006   }
1007
1008   return new;
1009}
1010
1011
1012vbits_t
1013sar_vbits(vbits_t v, unsigned shift_amount)
1014{
1015   assert(shift_amount < v.num_bits);
1016
1017   vbits_t new = v;
1018   int msb;
1019
1020   switch (v.num_bits) {
1021   case 8:
1022      new.bits.u8  >>= shift_amount;
1023      msb = (v.bits.u8 & 0x80) != 0;
1024      break;
1025   case 16:
1026      new.bits.u16 >>= shift_amount;
1027      msb = (v.bits.u16 & 0x8000) != 0;
1028      break;
1029   case 32:
1030      new.bits.u32 >>= shift_amount;
1031      msb = (v.bits.u32 & (1u << 31)) != 0;
1032      break;
1033   case 64:
1034      new.bits.u64 >>= shift_amount;
1035      msb = (v.bits.u64 & (1ull << 63)) != 0;
1036      break;
1037   case 128: /* fall through */
1038   case 256: /* fall through */
1039   default:
1040      panic(__func__);
1041   }
1042
1043   if (msb)
1044      new = left_vbits(new, new.num_bits);
1045   return new;
1046}
1047
1048/* Return a value for the POWER Iop_CmpORD class iops */
1049vbits_t
1050cmpord_vbits(unsigned v1_num_bits, unsigned v2_num_bits)
1051{
1052   vbits_t new = { .num_bits = v1_num_bits };
1053
1054   /* Size of values being compared must be the same */
1055   assert( v1_num_bits == v2_num_bits);
1056
1057   /* Comparison only produces 32-bit or 64-bit value where
1058    * the lower 3 bits are set to indicate, less than, equal and greater than.
1059    */
1060   switch (v1_num_bits) {
1061   case 32:
1062      new.bits.u32 = 0xE;
1063      break;
1064
1065   case 64:
1066      new.bits.u64 = 0xE;
1067      break;
1068
1069   default:
1070      panic(__func__);
1071   }
1072
1073   return new;
1074}
1075