1// Copyright 2015, VIXL authors
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7//   * Redistributions of source code must retain the above copyright notice,
8//     this list of conditions and the following disclaimer.
9//   * Redistributions in binary form must reproduce the above copyright notice,
10//     this list of conditions and the following disclaimer in the documentation
11//     and/or other materials provided with the distribution.
12//   * Neither the name of ARM Limited nor the names of its contributors may be
13//     used to endorse or promote products derived from this software without
14//     specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
28
29#include <cmath>
30
31#include "simulator-aarch64.h"
32
33namespace vixl {
34namespace aarch64 {
35
36template <>
37double Simulator::FPDefaultNaN<double>() {
38  return kFP64DefaultNaN;
39}
40
41
42template <>
43float Simulator::FPDefaultNaN<float>() {
44  return kFP32DefaultNaN;
45}
46
47// See FPRound for a description of this function.
48static inline double FPRoundToDouble(int64_t sign,
49                                     int64_t exponent,
50                                     uint64_t mantissa,
51                                     FPRounding round_mode) {
52  int64_t bits =
53      FPRound<int64_t, kDoubleExponentBits, kDoubleMantissaBits>(sign,
54                                                                 exponent,
55                                                                 mantissa,
56                                                                 round_mode);
57  return RawbitsToDouble(bits);
58}
59
60
61// See FPRound for a description of this function.
62static inline float FPRoundToFloat(int64_t sign,
63                                   int64_t exponent,
64                                   uint64_t mantissa,
65                                   FPRounding round_mode) {
66  int32_t bits =
67      FPRound<int32_t, kFloatExponentBits, kFloatMantissaBits>(sign,
68                                                               exponent,
69                                                               mantissa,
70                                                               round_mode);
71  return RawbitsToFloat(bits);
72}
73
74
75// See FPRound for a description of this function.
76static inline float16 FPRoundToFloat16(int64_t sign,
77                                       int64_t exponent,
78                                       uint64_t mantissa,
79                                       FPRounding round_mode) {
80  return FPRound<float16,
81                 kFloat16ExponentBits,
82                 kFloat16MantissaBits>(sign, exponent, mantissa, round_mode);
83}
84
85
86double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
87  if (src >= 0) {
88    return UFixedToDouble(src, fbits, round);
89  } else if (src == INT64_MIN) {
90    return -UFixedToDouble(src, fbits, round);
91  } else {
92    return -UFixedToDouble(-src, fbits, round);
93  }
94}
95
96
97double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
98  // An input of 0 is a special case because the result is effectively
99  // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
100  if (src == 0) {
101    return 0.0;
102  }
103
104  // Calculate the exponent. The highest significant bit will have the value
105  // 2^exponent.
106  const int highest_significant_bit = 63 - CountLeadingZeros(src);
107  const int64_t exponent = highest_significant_bit - fbits;
108
109  return FPRoundToDouble(0, exponent, src, round);
110}
111
112
113float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
114  if (src >= 0) {
115    return UFixedToFloat(src, fbits, round);
116  } else if (src == INT64_MIN) {
117    return -UFixedToFloat(src, fbits, round);
118  } else {
119    return -UFixedToFloat(-src, fbits, round);
120  }
121}
122
123
124float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
125  // An input of 0 is a special case because the result is effectively
126  // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
127  if (src == 0) {
128    return 0.0f;
129  }
130
131  // Calculate the exponent. The highest significant bit will have the value
132  // 2^exponent.
133  const int highest_significant_bit = 63 - CountLeadingZeros(src);
134  const int32_t exponent = highest_significant_bit - fbits;
135
136  return FPRoundToFloat(0, exponent, src, round);
137}
138
139
140double Simulator::FPToDouble(float value) {
141  switch (std::fpclassify(value)) {
142    case FP_NAN: {
143      if (IsSignallingNaN(value)) {
144        FPProcessException();
145      }
146      if (ReadDN()) return kFP64DefaultNaN;
147
148      // Convert NaNs as the processor would:
149      //  - The sign is propagated.
150      //  - The payload (mantissa) is transferred entirely, except that the top
151      //    bit is forced to '1', making the result a quiet NaN. The unused
152      //    (low-order) payload bits are set to 0.
153      uint32_t raw = FloatToRawbits(value);
154
155      uint64_t sign = raw >> 31;
156      uint64_t exponent = (1 << 11) - 1;
157      uint64_t payload = ExtractUnsignedBitfield64(21, 0, raw);
158      payload <<= (52 - 23);           // The unused low-order bits should be 0.
159      payload |= (UINT64_C(1) << 51);  // Force a quiet NaN.
160
161      return RawbitsToDouble((sign << 63) | (exponent << 52) | payload);
162    }
163
164    case FP_ZERO:
165    case FP_NORMAL:
166    case FP_SUBNORMAL:
167    case FP_INFINITE: {
168      // All other inputs are preserved in a standard cast, because every value
169      // representable using an IEEE-754 float is also representable using an
170      // IEEE-754 double.
171      return static_cast<double>(value);
172    }
173  }
174
175  VIXL_UNREACHABLE();
176  return static_cast<double>(value);
177}
178
179
180float Simulator::FPToFloat(float16 value) {
181  uint32_t sign = value >> 15;
182  uint32_t exponent =
183      ExtractUnsignedBitfield32(kFloat16MantissaBits + kFloat16ExponentBits - 1,
184                                kFloat16MantissaBits,
185                                value);
186  uint32_t mantissa =
187      ExtractUnsignedBitfield32(kFloat16MantissaBits - 1, 0, value);
188
189  switch (Float16Classify(value)) {
190    case FP_ZERO:
191      return (sign == 0) ? 0.0f : -0.0f;
192
193    case FP_INFINITE:
194      return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
195
196    case FP_SUBNORMAL: {
197      // Calculate shift required to put mantissa into the most-significant bits
198      // of the destination mantissa.
199      int shift = CountLeadingZeros(mantissa << (32 - 10));
200
201      // Shift mantissa and discard implicit '1'.
202      mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
203      mantissa &= (1 << kFloatMantissaBits) - 1;
204
205      // Adjust the exponent for the shift applied, and rebias.
206      exponent = exponent - shift + (-15 + 127);
207      break;
208    }
209
210    case FP_NAN:
211      if (IsSignallingNaN(value)) {
212        FPProcessException();
213      }
214      if (ReadDN()) return kFP32DefaultNaN;
215
216      // Convert NaNs as the processor would:
217      //  - The sign is propagated.
218      //  - The payload (mantissa) is transferred entirely, except that the top
219      //    bit is forced to '1', making the result a quiet NaN. The unused
220      //    (low-order) payload bits are set to 0.
221      exponent = (1 << kFloatExponentBits) - 1;
222
223      // Increase bits in mantissa, making low-order bits 0.
224      mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
225      mantissa |= 1 << 22;  // Force a quiet NaN.
226      break;
227
228    case FP_NORMAL:
229      // Increase bits in mantissa, making low-order bits 0.
230      mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
231
232      // Change exponent bias.
233      exponent += (-15 + 127);
234      break;
235
236    default:
237      VIXL_UNREACHABLE();
238  }
239  return RawbitsToFloat((sign << 31) | (exponent << kFloatMantissaBits) |
240                        mantissa);
241}
242
243
244float16 Simulator::FPToFloat16(float value, FPRounding round_mode) {
245  // Only the FPTieEven rounding mode is implemented.
246  VIXL_ASSERT(round_mode == FPTieEven);
247  USE(round_mode);
248
249  uint32_t raw = FloatToRawbits(value);
250  int32_t sign = raw >> 31;
251  int32_t exponent = ExtractUnsignedBitfield32(30, 23, raw) - 127;
252  uint32_t mantissa = ExtractUnsignedBitfield32(22, 0, raw);
253
254  switch (std::fpclassify(value)) {
255    case FP_NAN: {
256      if (IsSignallingNaN(value)) {
257        FPProcessException();
258      }
259      if (ReadDN()) return kFP16DefaultNaN;
260
261      // Convert NaNs as the processor would:
262      //  - The sign is propagated.
263      //  - The payload (mantissa) is transferred as much as possible, except
264      //    that the top bit is forced to '1', making the result a quiet NaN.
265      float16 result =
266          (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
267      result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
268      result |= (1 << 9);  // Force a quiet NaN;
269      return result;
270    }
271
272    case FP_ZERO:
273      return (sign == 0) ? 0 : 0x8000;
274
275    case FP_INFINITE:
276      return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
277
278    case FP_NORMAL:
279    case FP_SUBNORMAL: {
280      // Convert float-to-half as the processor would, assuming that FPCR.FZ
281      // (flush-to-zero) is not set.
282
283      // Add the implicit '1' bit to the mantissa.
284      mantissa += (1 << 23);
285      return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
286    }
287  }
288
289  VIXL_UNREACHABLE();
290  return 0;
291}
292
293
294float16 Simulator::FPToFloat16(double value, FPRounding round_mode) {
295  // Only the FPTieEven rounding mode is implemented.
296  VIXL_ASSERT(round_mode == FPTieEven);
297  USE(round_mode);
298
299  uint64_t raw = DoubleToRawbits(value);
300  int32_t sign = raw >> 63;
301  int64_t exponent = ExtractUnsignedBitfield64(62, 52, raw) - 1023;
302  uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw);
303
304  switch (std::fpclassify(value)) {
305    case FP_NAN: {
306      if (IsSignallingNaN(value)) {
307        FPProcessException();
308      }
309      if (ReadDN()) return kFP16DefaultNaN;
310
311      // Convert NaNs as the processor would:
312      //  - The sign is propagated.
313      //  - The payload (mantissa) is transferred as much as possible, except
314      //    that the top bit is forced to '1', making the result a quiet NaN.
315      float16 result =
316          (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
317      result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
318      result |= (1 << 9);  // Force a quiet NaN;
319      return result;
320    }
321
322    case FP_ZERO:
323      return (sign == 0) ? 0 : 0x8000;
324
325    case FP_INFINITE:
326      return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
327
328    case FP_NORMAL:
329    case FP_SUBNORMAL: {
330      // Convert double-to-half as the processor would, assuming that FPCR.FZ
331      // (flush-to-zero) is not set.
332
333      // Add the implicit '1' bit to the mantissa.
334      mantissa += (UINT64_C(1) << 52);
335      return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
336    }
337  }
338
339  VIXL_UNREACHABLE();
340  return 0;
341}
342
343
344float Simulator::FPToFloat(double value, FPRounding round_mode) {
345  // Only the FPTieEven rounding mode is implemented.
346  VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
347  USE(round_mode);
348
349  switch (std::fpclassify(value)) {
350    case FP_NAN: {
351      if (IsSignallingNaN(value)) {
352        FPProcessException();
353      }
354      if (ReadDN()) return kFP32DefaultNaN;
355
356      // Convert NaNs as the processor would:
357      //  - The sign is propagated.
358      //  - The payload (mantissa) is transferred as much as possible, except
359      //    that the top bit is forced to '1', making the result a quiet NaN.
360      uint64_t raw = DoubleToRawbits(value);
361
362      uint32_t sign = raw >> 63;
363      uint32_t exponent = (1 << 8) - 1;
364      uint32_t payload =
365          static_cast<uint32_t>(ExtractUnsignedBitfield64(50, 52 - 23, raw));
366      payload |= (1 << 22);  // Force a quiet NaN.
367
368      return RawbitsToFloat((sign << 31) | (exponent << 23) | payload);
369    }
370
371    case FP_ZERO:
372    case FP_INFINITE: {
373      // In a C++ cast, any value representable in the target type will be
374      // unchanged. This is always the case for +/-0.0 and infinities.
375      return static_cast<float>(value);
376    }
377
378    case FP_NORMAL:
379    case FP_SUBNORMAL: {
380      // Convert double-to-float as the processor would, assuming that FPCR.FZ
381      // (flush-to-zero) is not set.
382      uint64_t raw = DoubleToRawbits(value);
383      // Extract the IEEE-754 double components.
384      uint32_t sign = raw >> 63;
385      // Extract the exponent and remove the IEEE-754 encoding bias.
386      int32_t exponent =
387          static_cast<int32_t>(ExtractUnsignedBitfield64(62, 52, raw)) - 1023;
388      // Extract the mantissa and add the implicit '1' bit.
389      uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw);
390      if (std::fpclassify(value) == FP_NORMAL) {
391        mantissa |= (UINT64_C(1) << 52);
392      }
393      return FPRoundToFloat(sign, exponent, mantissa, round_mode);
394    }
395  }
396
397  VIXL_UNREACHABLE();
398  return value;
399}
400
401
402void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
403  dst.ClearForWrite(vform);
404  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
405    dst.ReadUintFromMem(vform, i, addr);
406    addr += LaneSizeInBytesFromFormat(vform);
407  }
408}
409
410
411void Simulator::ld1(VectorFormat vform,
412                    LogicVRegister dst,
413                    int index,
414                    uint64_t addr) {
415  dst.ReadUintFromMem(vform, index, addr);
416}
417
418
419void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
420  dst.ClearForWrite(vform);
421  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
422    dst.ReadUintFromMem(vform, i, addr);
423  }
424}
425
426
427void Simulator::ld2(VectorFormat vform,
428                    LogicVRegister dst1,
429                    LogicVRegister dst2,
430                    uint64_t addr1) {
431  dst1.ClearForWrite(vform);
432  dst2.ClearForWrite(vform);
433  int esize = LaneSizeInBytesFromFormat(vform);
434  uint64_t addr2 = addr1 + esize;
435  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
436    dst1.ReadUintFromMem(vform, i, addr1);
437    dst2.ReadUintFromMem(vform, i, addr2);
438    addr1 += 2 * esize;
439    addr2 += 2 * esize;
440  }
441}
442
443
444void Simulator::ld2(VectorFormat vform,
445                    LogicVRegister dst1,
446                    LogicVRegister dst2,
447                    int index,
448                    uint64_t addr1) {
449  dst1.ClearForWrite(vform);
450  dst2.ClearForWrite(vform);
451  uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
452  dst1.ReadUintFromMem(vform, index, addr1);
453  dst2.ReadUintFromMem(vform, index, addr2);
454}
455
456
457void Simulator::ld2r(VectorFormat vform,
458                     LogicVRegister dst1,
459                     LogicVRegister dst2,
460                     uint64_t addr) {
461  dst1.ClearForWrite(vform);
462  dst2.ClearForWrite(vform);
463  uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
464  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
465    dst1.ReadUintFromMem(vform, i, addr);
466    dst2.ReadUintFromMem(vform, i, addr2);
467  }
468}
469
470
471void Simulator::ld3(VectorFormat vform,
472                    LogicVRegister dst1,
473                    LogicVRegister dst2,
474                    LogicVRegister dst3,
475                    uint64_t addr1) {
476  dst1.ClearForWrite(vform);
477  dst2.ClearForWrite(vform);
478  dst3.ClearForWrite(vform);
479  int esize = LaneSizeInBytesFromFormat(vform);
480  uint64_t addr2 = addr1 + esize;
481  uint64_t addr3 = addr2 + esize;
482  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
483    dst1.ReadUintFromMem(vform, i, addr1);
484    dst2.ReadUintFromMem(vform, i, addr2);
485    dst3.ReadUintFromMem(vform, i, addr3);
486    addr1 += 3 * esize;
487    addr2 += 3 * esize;
488    addr3 += 3 * esize;
489  }
490}
491
492
493void Simulator::ld3(VectorFormat vform,
494                    LogicVRegister dst1,
495                    LogicVRegister dst2,
496                    LogicVRegister dst3,
497                    int index,
498                    uint64_t addr1) {
499  dst1.ClearForWrite(vform);
500  dst2.ClearForWrite(vform);
501  dst3.ClearForWrite(vform);
502  uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
503  uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
504  dst1.ReadUintFromMem(vform, index, addr1);
505  dst2.ReadUintFromMem(vform, index, addr2);
506  dst3.ReadUintFromMem(vform, index, addr3);
507}
508
509
510void Simulator::ld3r(VectorFormat vform,
511                     LogicVRegister dst1,
512                     LogicVRegister dst2,
513                     LogicVRegister dst3,
514                     uint64_t addr) {
515  dst1.ClearForWrite(vform);
516  dst2.ClearForWrite(vform);
517  dst3.ClearForWrite(vform);
518  uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
519  uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
520  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
521    dst1.ReadUintFromMem(vform, i, addr);
522    dst2.ReadUintFromMem(vform, i, addr2);
523    dst3.ReadUintFromMem(vform, i, addr3);
524  }
525}
526
527
528void Simulator::ld4(VectorFormat vform,
529                    LogicVRegister dst1,
530                    LogicVRegister dst2,
531                    LogicVRegister dst3,
532                    LogicVRegister dst4,
533                    uint64_t addr1) {
534  dst1.ClearForWrite(vform);
535  dst2.ClearForWrite(vform);
536  dst3.ClearForWrite(vform);
537  dst4.ClearForWrite(vform);
538  int esize = LaneSizeInBytesFromFormat(vform);
539  uint64_t addr2 = addr1 + esize;
540  uint64_t addr3 = addr2 + esize;
541  uint64_t addr4 = addr3 + esize;
542  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
543    dst1.ReadUintFromMem(vform, i, addr1);
544    dst2.ReadUintFromMem(vform, i, addr2);
545    dst3.ReadUintFromMem(vform, i, addr3);
546    dst4.ReadUintFromMem(vform, i, addr4);
547    addr1 += 4 * esize;
548    addr2 += 4 * esize;
549    addr3 += 4 * esize;
550    addr4 += 4 * esize;
551  }
552}
553
554
555void Simulator::ld4(VectorFormat vform,
556                    LogicVRegister dst1,
557                    LogicVRegister dst2,
558                    LogicVRegister dst3,
559                    LogicVRegister dst4,
560                    int index,
561                    uint64_t addr1) {
562  dst1.ClearForWrite(vform);
563  dst2.ClearForWrite(vform);
564  dst3.ClearForWrite(vform);
565  dst4.ClearForWrite(vform);
566  uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
567  uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
568  uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
569  dst1.ReadUintFromMem(vform, index, addr1);
570  dst2.ReadUintFromMem(vform, index, addr2);
571  dst3.ReadUintFromMem(vform, index, addr3);
572  dst4.ReadUintFromMem(vform, index, addr4);
573}
574
575
576void Simulator::ld4r(VectorFormat vform,
577                     LogicVRegister dst1,
578                     LogicVRegister dst2,
579                     LogicVRegister dst3,
580                     LogicVRegister dst4,
581                     uint64_t addr) {
582  dst1.ClearForWrite(vform);
583  dst2.ClearForWrite(vform);
584  dst3.ClearForWrite(vform);
585  dst4.ClearForWrite(vform);
586  uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
587  uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
588  uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
589  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
590    dst1.ReadUintFromMem(vform, i, addr);
591    dst2.ReadUintFromMem(vform, i, addr2);
592    dst3.ReadUintFromMem(vform, i, addr3);
593    dst4.ReadUintFromMem(vform, i, addr4);
594  }
595}
596
597
598void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
599  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
600    src.WriteUintToMem(vform, i, addr);
601    addr += LaneSizeInBytesFromFormat(vform);
602  }
603}
604
605
606void Simulator::st1(VectorFormat vform,
607                    LogicVRegister src,
608                    int index,
609                    uint64_t addr) {
610  src.WriteUintToMem(vform, index, addr);
611}
612
613
614void Simulator::st2(VectorFormat vform,
615                    LogicVRegister dst,
616                    LogicVRegister dst2,
617                    uint64_t addr) {
618  int esize = LaneSizeInBytesFromFormat(vform);
619  uint64_t addr2 = addr + esize;
620  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
621    dst.WriteUintToMem(vform, i, addr);
622    dst2.WriteUintToMem(vform, i, addr2);
623    addr += 2 * esize;
624    addr2 += 2 * esize;
625  }
626}
627
628
629void Simulator::st2(VectorFormat vform,
630                    LogicVRegister dst,
631                    LogicVRegister dst2,
632                    int index,
633                    uint64_t addr) {
634  int esize = LaneSizeInBytesFromFormat(vform);
635  dst.WriteUintToMem(vform, index, addr);
636  dst2.WriteUintToMem(vform, index, addr + 1 * esize);
637}
638
639
640void Simulator::st3(VectorFormat vform,
641                    LogicVRegister dst,
642                    LogicVRegister dst2,
643                    LogicVRegister dst3,
644                    uint64_t addr) {
645  int esize = LaneSizeInBytesFromFormat(vform);
646  uint64_t addr2 = addr + esize;
647  uint64_t addr3 = addr2 + esize;
648  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
649    dst.WriteUintToMem(vform, i, addr);
650    dst2.WriteUintToMem(vform, i, addr2);
651    dst3.WriteUintToMem(vform, i, addr3);
652    addr += 3 * esize;
653    addr2 += 3 * esize;
654    addr3 += 3 * esize;
655  }
656}
657
658
659void Simulator::st3(VectorFormat vform,
660                    LogicVRegister dst,
661                    LogicVRegister dst2,
662                    LogicVRegister dst3,
663                    int index,
664                    uint64_t addr) {
665  int esize = LaneSizeInBytesFromFormat(vform);
666  dst.WriteUintToMem(vform, index, addr);
667  dst2.WriteUintToMem(vform, index, addr + 1 * esize);
668  dst3.WriteUintToMem(vform, index, addr + 2 * esize);
669}
670
671
672void Simulator::st4(VectorFormat vform,
673                    LogicVRegister dst,
674                    LogicVRegister dst2,
675                    LogicVRegister dst3,
676                    LogicVRegister dst4,
677                    uint64_t addr) {
678  int esize = LaneSizeInBytesFromFormat(vform);
679  uint64_t addr2 = addr + esize;
680  uint64_t addr3 = addr2 + esize;
681  uint64_t addr4 = addr3 + esize;
682  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
683    dst.WriteUintToMem(vform, i, addr);
684    dst2.WriteUintToMem(vform, i, addr2);
685    dst3.WriteUintToMem(vform, i, addr3);
686    dst4.WriteUintToMem(vform, i, addr4);
687    addr += 4 * esize;
688    addr2 += 4 * esize;
689    addr3 += 4 * esize;
690    addr4 += 4 * esize;
691  }
692}
693
694
695void Simulator::st4(VectorFormat vform,
696                    LogicVRegister dst,
697                    LogicVRegister dst2,
698                    LogicVRegister dst3,
699                    LogicVRegister dst4,
700                    int index,
701                    uint64_t addr) {
702  int esize = LaneSizeInBytesFromFormat(vform);
703  dst.WriteUintToMem(vform, index, addr);
704  dst2.WriteUintToMem(vform, index, addr + 1 * esize);
705  dst3.WriteUintToMem(vform, index, addr + 2 * esize);
706  dst4.WriteUintToMem(vform, index, addr + 3 * esize);
707}
708
709
710LogicVRegister Simulator::cmp(VectorFormat vform,
711                              LogicVRegister dst,
712                              const LogicVRegister& src1,
713                              const LogicVRegister& src2,
714                              Condition cond) {
715  dst.ClearForWrite(vform);
716  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
717    int64_t sa = src1.Int(vform, i);
718    int64_t sb = src2.Int(vform, i);
719    uint64_t ua = src1.Uint(vform, i);
720    uint64_t ub = src2.Uint(vform, i);
721    bool result = false;
722    switch (cond) {
723      case eq:
724        result = (ua == ub);
725        break;
726      case ge:
727        result = (sa >= sb);
728        break;
729      case gt:
730        result = (sa > sb);
731        break;
732      case hi:
733        result = (ua > ub);
734        break;
735      case hs:
736        result = (ua >= ub);
737        break;
738      case lt:
739        result = (sa < sb);
740        break;
741      case le:
742        result = (sa <= sb);
743        break;
744      default:
745        VIXL_UNREACHABLE();
746        break;
747    }
748    dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
749  }
750  return dst;
751}
752
753
754LogicVRegister Simulator::cmp(VectorFormat vform,
755                              LogicVRegister dst,
756                              const LogicVRegister& src1,
757                              int imm,
758                              Condition cond) {
759  SimVRegister temp;
760  LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
761  return cmp(vform, dst, src1, imm_reg, cond);
762}
763
764
765LogicVRegister Simulator::cmptst(VectorFormat vform,
766                                 LogicVRegister dst,
767                                 const LogicVRegister& src1,
768                                 const LogicVRegister& src2) {
769  dst.ClearForWrite(vform);
770  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
771    uint64_t ua = src1.Uint(vform, i);
772    uint64_t ub = src2.Uint(vform, i);
773    dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
774  }
775  return dst;
776}
777
778
779LogicVRegister Simulator::add(VectorFormat vform,
780                              LogicVRegister dst,
781                              const LogicVRegister& src1,
782                              const LogicVRegister& src2) {
783  int lane_size = LaneSizeInBitsFromFormat(vform);
784  dst.ClearForWrite(vform);
785  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
786    // Test for unsigned saturation.
787    uint64_t ua = src1.UintLeftJustified(vform, i);
788    uint64_t ub = src2.UintLeftJustified(vform, i);
789    uint64_t ur = ua + ub;
790    if (ur < ua) {
791      dst.SetUnsignedSat(i, true);
792    }
793
794    // Test for signed saturation.
795    bool pos_a = (ua >> 63) == 0;
796    bool pos_b = (ub >> 63) == 0;
797    bool pos_r = (ur >> 63) == 0;
798    // If the signs of the operands are the same, but different from the result,
799    // there was an overflow.
800    if ((pos_a == pos_b) && (pos_a != pos_r)) {
801      dst.SetSignedSat(i, pos_a);
802    }
803
804    dst.SetInt(vform, i, ur >> (64 - lane_size));
805  }
806  return dst;
807}
808
809
810LogicVRegister Simulator::addp(VectorFormat vform,
811                               LogicVRegister dst,
812                               const LogicVRegister& src1,
813                               const LogicVRegister& src2) {
814  SimVRegister temp1, temp2;
815  uzp1(vform, temp1, src1, src2);
816  uzp2(vform, temp2, src1, src2);
817  add(vform, dst, temp1, temp2);
818  return dst;
819}
820
821
822LogicVRegister Simulator::mla(VectorFormat vform,
823                              LogicVRegister dst,
824                              const LogicVRegister& src1,
825                              const LogicVRegister& src2) {
826  SimVRegister temp;
827  mul(vform, temp, src1, src2);
828  add(vform, dst, dst, temp);
829  return dst;
830}
831
832
833LogicVRegister Simulator::mls(VectorFormat vform,
834                              LogicVRegister dst,
835                              const LogicVRegister& src1,
836                              const LogicVRegister& src2) {
837  SimVRegister temp;
838  mul(vform, temp, src1, src2);
839  sub(vform, dst, dst, temp);
840  return dst;
841}
842
843
844LogicVRegister Simulator::mul(VectorFormat vform,
845                              LogicVRegister dst,
846                              const LogicVRegister& src1,
847                              const LogicVRegister& src2) {
848  dst.ClearForWrite(vform);
849  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
850    dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
851  }
852  return dst;
853}
854
855
856LogicVRegister Simulator::mul(VectorFormat vform,
857                              LogicVRegister dst,
858                              const LogicVRegister& src1,
859                              const LogicVRegister& src2,
860                              int index) {
861  SimVRegister temp;
862  VectorFormat indexform = VectorFormatFillQ(vform);
863  return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
864}
865
866
867LogicVRegister Simulator::mla(VectorFormat vform,
868                              LogicVRegister dst,
869                              const LogicVRegister& src1,
870                              const LogicVRegister& src2,
871                              int index) {
872  SimVRegister temp;
873  VectorFormat indexform = VectorFormatFillQ(vform);
874  return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));
875}
876
877
878LogicVRegister Simulator::mls(VectorFormat vform,
879                              LogicVRegister dst,
880                              const LogicVRegister& src1,
881                              const LogicVRegister& src2,
882                              int index) {
883  SimVRegister temp;
884  VectorFormat indexform = VectorFormatFillQ(vform);
885  return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));
886}
887
888
889LogicVRegister Simulator::smull(VectorFormat vform,
890                                LogicVRegister dst,
891                                const LogicVRegister& src1,
892                                const LogicVRegister& src2,
893                                int index) {
894  SimVRegister temp;
895  VectorFormat indexform =
896      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
897  return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));
898}
899
900
901LogicVRegister Simulator::smull2(VectorFormat vform,
902                                 LogicVRegister dst,
903                                 const LogicVRegister& src1,
904                                 const LogicVRegister& src2,
905                                 int index) {
906  SimVRegister temp;
907  VectorFormat indexform =
908      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
909  return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
910}
911
912
913LogicVRegister Simulator::umull(VectorFormat vform,
914                                LogicVRegister dst,
915                                const LogicVRegister& src1,
916                                const LogicVRegister& src2,
917                                int index) {
918  SimVRegister temp;
919  VectorFormat indexform =
920      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
921  return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));
922}
923
924
925LogicVRegister Simulator::umull2(VectorFormat vform,
926                                 LogicVRegister dst,
927                                 const LogicVRegister& src1,
928                                 const LogicVRegister& src2,
929                                 int index) {
930  SimVRegister temp;
931  VectorFormat indexform =
932      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
933  return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
934}
935
936
937LogicVRegister Simulator::smlal(VectorFormat vform,
938                                LogicVRegister dst,
939                                const LogicVRegister& src1,
940                                const LogicVRegister& src2,
941                                int index) {
942  SimVRegister temp;
943  VectorFormat indexform =
944      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
945  return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
946}
947
948
949LogicVRegister Simulator::smlal2(VectorFormat vform,
950                                 LogicVRegister dst,
951                                 const LogicVRegister& src1,
952                                 const LogicVRegister& src2,
953                                 int index) {
954  SimVRegister temp;
955  VectorFormat indexform =
956      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
957  return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
958}
959
960
961LogicVRegister Simulator::umlal(VectorFormat vform,
962                                LogicVRegister dst,
963                                const LogicVRegister& src1,
964                                const LogicVRegister& src2,
965                                int index) {
966  SimVRegister temp;
967  VectorFormat indexform =
968      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
969  return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
970}
971
972
973LogicVRegister Simulator::umlal2(VectorFormat vform,
974                                 LogicVRegister dst,
975                                 const LogicVRegister& src1,
976                                 const LogicVRegister& src2,
977                                 int index) {
978  SimVRegister temp;
979  VectorFormat indexform =
980      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
981  return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
982}
983
984
985LogicVRegister Simulator::smlsl(VectorFormat vform,
986                                LogicVRegister dst,
987                                const LogicVRegister& src1,
988                                const LogicVRegister& src2,
989                                int index) {
990  SimVRegister temp;
991  VectorFormat indexform =
992      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
993  return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
994}
995
996
997LogicVRegister Simulator::smlsl2(VectorFormat vform,
998                                 LogicVRegister dst,
999                                 const LogicVRegister& src1,
1000                                 const LogicVRegister& src2,
1001                                 int index) {
1002  SimVRegister temp;
1003  VectorFormat indexform =
1004      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1005  return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1006}
1007
1008
1009LogicVRegister Simulator::umlsl(VectorFormat vform,
1010                                LogicVRegister dst,
1011                                const LogicVRegister& src1,
1012                                const LogicVRegister& src2,
1013                                int index) {
1014  SimVRegister temp;
1015  VectorFormat indexform =
1016      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1017  return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
1018}
1019
1020
1021LogicVRegister Simulator::umlsl2(VectorFormat vform,
1022                                 LogicVRegister dst,
1023                                 const LogicVRegister& src1,
1024                                 const LogicVRegister& src2,
1025                                 int index) {
1026  SimVRegister temp;
1027  VectorFormat indexform =
1028      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1029  return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1030}
1031
1032
1033LogicVRegister Simulator::sqdmull(VectorFormat vform,
1034                                  LogicVRegister dst,
1035                                  const LogicVRegister& src1,
1036                                  const LogicVRegister& src2,
1037                                  int index) {
1038  SimVRegister temp;
1039  VectorFormat indexform =
1040      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1041  return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
1042}
1043
1044
1045LogicVRegister Simulator::sqdmull2(VectorFormat vform,
1046                                   LogicVRegister dst,
1047                                   const LogicVRegister& src1,
1048                                   const LogicVRegister& src2,
1049                                   int index) {
1050  SimVRegister temp;
1051  VectorFormat indexform =
1052      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1053  return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1054}
1055
1056
1057LogicVRegister Simulator::sqdmlal(VectorFormat vform,
1058                                  LogicVRegister dst,
1059                                  const LogicVRegister& src1,
1060                                  const LogicVRegister& src2,
1061                                  int index) {
1062  SimVRegister temp;
1063  VectorFormat indexform =
1064      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1065  return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
1066}
1067
1068
1069LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
1070                                   LogicVRegister dst,
1071                                   const LogicVRegister& src1,
1072                                   const LogicVRegister& src2,
1073                                   int index) {
1074  SimVRegister temp;
1075  VectorFormat indexform =
1076      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1077  return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1078}
1079
1080
1081LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
1082                                  LogicVRegister dst,
1083                                  const LogicVRegister& src1,
1084                                  const LogicVRegister& src2,
1085                                  int index) {
1086  SimVRegister temp;
1087  VectorFormat indexform =
1088      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1089  return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
1090}
1091
1092
1093LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
1094                                   LogicVRegister dst,
1095                                   const LogicVRegister& src1,
1096                                   const LogicVRegister& src2,
1097                                   int index) {
1098  SimVRegister temp;
1099  VectorFormat indexform =
1100      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1101  return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1102}
1103
1104
1105LogicVRegister Simulator::sqdmulh(VectorFormat vform,
1106                                  LogicVRegister dst,
1107                                  const LogicVRegister& src1,
1108                                  const LogicVRegister& src2,
1109                                  int index) {
1110  SimVRegister temp;
1111  VectorFormat indexform = VectorFormatFillQ(vform);
1112  return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
1113}
1114
1115
1116LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
1117                                   LogicVRegister dst,
1118                                   const LogicVRegister& src1,
1119                                   const LogicVRegister& src2,
1120                                   int index) {
1121  SimVRegister temp;
1122  VectorFormat indexform = VectorFormatFillQ(vform);
1123  return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
1124}
1125
1126
1127uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) const {
1128  uint16_t result = 0;
1129  uint16_t extended_op2 = op2;
1130  for (int i = 0; i < 8; ++i) {
1131    if ((op1 >> i) & 1) {
1132      result = result ^ (extended_op2 << i);
1133    }
1134  }
1135  return result;
1136}
1137
1138
1139LogicVRegister Simulator::pmul(VectorFormat vform,
1140                               LogicVRegister dst,
1141                               const LogicVRegister& src1,
1142                               const LogicVRegister& src2) {
1143  dst.ClearForWrite(vform);
1144  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1145    dst.SetUint(vform,
1146                i,
1147                PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));
1148  }
1149  return dst;
1150}
1151
1152
1153LogicVRegister Simulator::pmull(VectorFormat vform,
1154                                LogicVRegister dst,
1155                                const LogicVRegister& src1,
1156                                const LogicVRegister& src2) {
1157  VectorFormat vform_src = VectorFormatHalfWidth(vform);
1158  dst.ClearForWrite(vform);
1159  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1160    dst.SetUint(vform,
1161                i,
1162                PolynomialMult(src1.Uint(vform_src, i),
1163                               src2.Uint(vform_src, i)));
1164  }
1165  return dst;
1166}
1167
1168
1169LogicVRegister Simulator::pmull2(VectorFormat vform,
1170                                 LogicVRegister dst,
1171                                 const LogicVRegister& src1,
1172                                 const LogicVRegister& src2) {
1173  VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
1174  dst.ClearForWrite(vform);
1175  int lane_count = LaneCountFromFormat(vform);
1176  for (int i = 0; i < lane_count; i++) {
1177    dst.SetUint(vform,
1178                i,
1179                PolynomialMult(src1.Uint(vform_src, lane_count + i),
1180                               src2.Uint(vform_src, lane_count + i)));
1181  }
1182  return dst;
1183}
1184
1185
1186LogicVRegister Simulator::sub(VectorFormat vform,
1187                              LogicVRegister dst,
1188                              const LogicVRegister& src1,
1189                              const LogicVRegister& src2) {
1190  int lane_size = LaneSizeInBitsFromFormat(vform);
1191  dst.ClearForWrite(vform);
1192  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1193    // Test for unsigned saturation.
1194    uint64_t ua = src1.UintLeftJustified(vform, i);
1195    uint64_t ub = src2.UintLeftJustified(vform, i);
1196    uint64_t ur = ua - ub;
1197    if (ub > ua) {
1198      dst.SetUnsignedSat(i, false);
1199    }
1200
1201    // Test for signed saturation.
1202    bool pos_a = (ua >> 63) == 0;
1203    bool pos_b = (ub >> 63) == 0;
1204    bool pos_r = (ur >> 63) == 0;
1205    // If the signs of the operands are different, and the sign of the first
1206    // operand doesn't match the result, there was an overflow.
1207    if ((pos_a != pos_b) && (pos_a != pos_r)) {
1208      dst.SetSignedSat(i, pos_a);
1209    }
1210
1211    dst.SetInt(vform, i, ur >> (64 - lane_size));
1212  }
1213  return dst;
1214}
1215
1216
1217LogicVRegister Simulator::and_(VectorFormat vform,
1218                               LogicVRegister dst,
1219                               const LogicVRegister& src1,
1220                               const LogicVRegister& src2) {
1221  dst.ClearForWrite(vform);
1222  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1223    dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
1224  }
1225  return dst;
1226}
1227
1228
1229LogicVRegister Simulator::orr(VectorFormat vform,
1230                              LogicVRegister dst,
1231                              const LogicVRegister& src1,
1232                              const LogicVRegister& src2) {
1233  dst.ClearForWrite(vform);
1234  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1235    dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1236  }
1237  return dst;
1238}
1239
1240
1241LogicVRegister Simulator::orn(VectorFormat vform,
1242                              LogicVRegister dst,
1243                              const LogicVRegister& src1,
1244                              const LogicVRegister& src2) {
1245  dst.ClearForWrite(vform);
1246  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1247    dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1248  }
1249  return dst;
1250}
1251
1252
1253LogicVRegister Simulator::eor(VectorFormat vform,
1254                              LogicVRegister dst,
1255                              const LogicVRegister& src1,
1256                              const LogicVRegister& src2) {
1257  dst.ClearForWrite(vform);
1258  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1259    dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1260  }
1261  return dst;
1262}
1263
1264
1265LogicVRegister Simulator::bic(VectorFormat vform,
1266                              LogicVRegister dst,
1267                              const LogicVRegister& src1,
1268                              const LogicVRegister& src2) {
1269  dst.ClearForWrite(vform);
1270  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1271    dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1272  }
1273  return dst;
1274}
1275
1276
1277LogicVRegister Simulator::bic(VectorFormat vform,
1278                              LogicVRegister dst,
1279                              const LogicVRegister& src,
1280                              uint64_t imm) {
1281  uint64_t result[16];
1282  int laneCount = LaneCountFromFormat(vform);
1283  for (int i = 0; i < laneCount; ++i) {
1284    result[i] = src.Uint(vform, i) & ~imm;
1285  }
1286  dst.ClearForWrite(vform);
1287  for (int i = 0; i < laneCount; ++i) {
1288    dst.SetUint(vform, i, result[i]);
1289  }
1290  return dst;
1291}
1292
1293
1294LogicVRegister Simulator::bif(VectorFormat vform,
1295                              LogicVRegister dst,
1296                              const LogicVRegister& src1,
1297                              const LogicVRegister& src2) {
1298  dst.ClearForWrite(vform);
1299  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1300    uint64_t operand1 = dst.Uint(vform, i);
1301    uint64_t operand2 = ~src2.Uint(vform, i);
1302    uint64_t operand3 = src1.Uint(vform, i);
1303    uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1304    dst.SetUint(vform, i, result);
1305  }
1306  return dst;
1307}
1308
1309
1310LogicVRegister Simulator::bit(VectorFormat vform,
1311                              LogicVRegister dst,
1312                              const LogicVRegister& src1,
1313                              const LogicVRegister& src2) {
1314  dst.ClearForWrite(vform);
1315  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1316    uint64_t operand1 = dst.Uint(vform, i);
1317    uint64_t operand2 = src2.Uint(vform, i);
1318    uint64_t operand3 = src1.Uint(vform, i);
1319    uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1320    dst.SetUint(vform, i, result);
1321  }
1322  return dst;
1323}
1324
1325
1326LogicVRegister Simulator::bsl(VectorFormat vform,
1327                              LogicVRegister dst,
1328                              const LogicVRegister& src1,
1329                              const LogicVRegister& src2) {
1330  dst.ClearForWrite(vform);
1331  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1332    uint64_t operand1 = src2.Uint(vform, i);
1333    uint64_t operand2 = dst.Uint(vform, i);
1334    uint64_t operand3 = src1.Uint(vform, i);
1335    uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1336    dst.SetUint(vform, i, result);
1337  }
1338  return dst;
1339}
1340
1341
1342LogicVRegister Simulator::sminmax(VectorFormat vform,
1343                                  LogicVRegister dst,
1344                                  const LogicVRegister& src1,
1345                                  const LogicVRegister& src2,
1346                                  bool max) {
1347  dst.ClearForWrite(vform);
1348  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1349    int64_t src1_val = src1.Int(vform, i);
1350    int64_t src2_val = src2.Int(vform, i);
1351    int64_t dst_val;
1352    if (max) {
1353      dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1354    } else {
1355      dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1356    }
1357    dst.SetInt(vform, i, dst_val);
1358  }
1359  return dst;
1360}
1361
1362
1363LogicVRegister Simulator::smax(VectorFormat vform,
1364                               LogicVRegister dst,
1365                               const LogicVRegister& src1,
1366                               const LogicVRegister& src2) {
1367  return sminmax(vform, dst, src1, src2, true);
1368}
1369
1370
1371LogicVRegister Simulator::smin(VectorFormat vform,
1372                               LogicVRegister dst,
1373                               const LogicVRegister& src1,
1374                               const LogicVRegister& src2) {
1375  return sminmax(vform, dst, src1, src2, false);
1376}
1377
1378
1379LogicVRegister Simulator::sminmaxp(VectorFormat vform,
1380                                   LogicVRegister dst,
1381                                   const LogicVRegister& src1,
1382                                   const LogicVRegister& src2,
1383                                   bool max) {
1384  int lanes = LaneCountFromFormat(vform);
1385  int64_t result[kMaxLanesPerVector];
1386  const LogicVRegister* src = &src1;
1387  for (int j = 0; j < 2; j++) {
1388    for (int i = 0; i < lanes; i += 2) {
1389      int64_t first_val = src->Int(vform, i);
1390      int64_t second_val = src->Int(vform, i + 1);
1391      int64_t dst_val;
1392      if (max) {
1393        dst_val = (first_val > second_val) ? first_val : second_val;
1394      } else {
1395        dst_val = (first_val < second_val) ? first_val : second_val;
1396      }
1397      VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector);
1398      result[(i >> 1) + (j * lanes / 2)] = dst_val;
1399    }
1400    src = &src2;
1401  }
1402  dst.SetIntArray(vform, result);
1403  return dst;
1404}
1405
1406
1407LogicVRegister Simulator::smaxp(VectorFormat vform,
1408                                LogicVRegister dst,
1409                                const LogicVRegister& src1,
1410                                const LogicVRegister& src2) {
1411  return sminmaxp(vform, dst, src1, src2, true);
1412}
1413
1414
1415LogicVRegister Simulator::sminp(VectorFormat vform,
1416                                LogicVRegister dst,
1417                                const LogicVRegister& src1,
1418                                const LogicVRegister& src2) {
1419  return sminmaxp(vform, dst, src1, src2, false);
1420}
1421
1422
1423LogicVRegister Simulator::addp(VectorFormat vform,
1424                               LogicVRegister dst,
1425                               const LogicVRegister& src) {
1426  VIXL_ASSERT(vform == kFormatD);
1427
1428  uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);
1429  dst.ClearForWrite(vform);
1430  dst.SetUint(vform, 0, dst_val);
1431  return dst;
1432}
1433
1434
1435LogicVRegister Simulator::addv(VectorFormat vform,
1436                               LogicVRegister dst,
1437                               const LogicVRegister& src) {
1438  VectorFormat vform_dst =
1439      ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1440
1441
1442  int64_t dst_val = 0;
1443  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1444    dst_val += src.Int(vform, i);
1445  }
1446
1447  dst.ClearForWrite(vform_dst);
1448  dst.SetInt(vform_dst, 0, dst_val);
1449  return dst;
1450}
1451
1452
1453LogicVRegister Simulator::saddlv(VectorFormat vform,
1454                                 LogicVRegister dst,
1455                                 const LogicVRegister& src) {
1456  VectorFormat vform_dst =
1457      ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1458
1459  int64_t dst_val = 0;
1460  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1461    dst_val += src.Int(vform, i);
1462  }
1463
1464  dst.ClearForWrite(vform_dst);
1465  dst.SetInt(vform_dst, 0, dst_val);
1466  return dst;
1467}
1468
1469
1470LogicVRegister Simulator::uaddlv(VectorFormat vform,
1471                                 LogicVRegister dst,
1472                                 const LogicVRegister& src) {
1473  VectorFormat vform_dst =
1474      ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1475
1476  uint64_t dst_val = 0;
1477  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1478    dst_val += src.Uint(vform, i);
1479  }
1480
1481  dst.ClearForWrite(vform_dst);
1482  dst.SetUint(vform_dst, 0, dst_val);
1483  return dst;
1484}
1485
1486
1487LogicVRegister Simulator::sminmaxv(VectorFormat vform,
1488                                   LogicVRegister dst,
1489                                   const LogicVRegister& src,
1490                                   bool max) {
1491  int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1492  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1493    int64_t src_val = src.Int(vform, i);
1494    if (max) {
1495      dst_val = (src_val > dst_val) ? src_val : dst_val;
1496    } else {
1497      dst_val = (src_val < dst_val) ? src_val : dst_val;
1498    }
1499  }
1500  dst.ClearForWrite(ScalarFormatFromFormat(vform));
1501  dst.SetInt(vform, 0, dst_val);
1502  return dst;
1503}
1504
1505
1506LogicVRegister Simulator::smaxv(VectorFormat vform,
1507                                LogicVRegister dst,
1508                                const LogicVRegister& src) {
1509  sminmaxv(vform, dst, src, true);
1510  return dst;
1511}
1512
1513
1514LogicVRegister Simulator::sminv(VectorFormat vform,
1515                                LogicVRegister dst,
1516                                const LogicVRegister& src) {
1517  sminmaxv(vform, dst, src, false);
1518  return dst;
1519}
1520
1521
1522LogicVRegister Simulator::uminmax(VectorFormat vform,
1523                                  LogicVRegister dst,
1524                                  const LogicVRegister& src1,
1525                                  const LogicVRegister& src2,
1526                                  bool max) {
1527  dst.ClearForWrite(vform);
1528  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1529    uint64_t src1_val = src1.Uint(vform, i);
1530    uint64_t src2_val = src2.Uint(vform, i);
1531    uint64_t dst_val;
1532    if (max) {
1533      dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1534    } else {
1535      dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1536    }
1537    dst.SetUint(vform, i, dst_val);
1538  }
1539  return dst;
1540}
1541
1542
1543LogicVRegister Simulator::umax(VectorFormat vform,
1544                               LogicVRegister dst,
1545                               const LogicVRegister& src1,
1546                               const LogicVRegister& src2) {
1547  return uminmax(vform, dst, src1, src2, true);
1548}
1549
1550
1551LogicVRegister Simulator::umin(VectorFormat vform,
1552                               LogicVRegister dst,
1553                               const LogicVRegister& src1,
1554                               const LogicVRegister& src2) {
1555  return uminmax(vform, dst, src1, src2, false);
1556}
1557
1558
1559LogicVRegister Simulator::uminmaxp(VectorFormat vform,
1560                                   LogicVRegister dst,
1561                                   const LogicVRegister& src1,
1562                                   const LogicVRegister& src2,
1563                                   bool max) {
1564  int lanes = LaneCountFromFormat(vform);
1565  uint64_t result[kMaxLanesPerVector];
1566  const LogicVRegister* src = &src1;
1567  for (int j = 0; j < 2; j++) {
1568    for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
1569      uint64_t first_val = src->Uint(vform, i);
1570      uint64_t second_val = src->Uint(vform, i + 1);
1571      uint64_t dst_val;
1572      if (max) {
1573        dst_val = (first_val > second_val) ? first_val : second_val;
1574      } else {
1575        dst_val = (first_val < second_val) ? first_val : second_val;
1576      }
1577      VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector);
1578      result[(i >> 1) + (j * lanes / 2)] = dst_val;
1579    }
1580    src = &src2;
1581  }
1582  dst.SetUintArray(vform, result);
1583  return dst;
1584}
1585
1586
1587LogicVRegister Simulator::umaxp(VectorFormat vform,
1588                                LogicVRegister dst,
1589                                const LogicVRegister& src1,
1590                                const LogicVRegister& src2) {
1591  return uminmaxp(vform, dst, src1, src2, true);
1592}
1593
1594
1595LogicVRegister Simulator::uminp(VectorFormat vform,
1596                                LogicVRegister dst,
1597                                const LogicVRegister& src1,
1598                                const LogicVRegister& src2) {
1599  return uminmaxp(vform, dst, src1, src2, false);
1600}
1601
1602
1603LogicVRegister Simulator::uminmaxv(VectorFormat vform,
1604                                   LogicVRegister dst,
1605                                   const LogicVRegister& src,
1606                                   bool max) {
1607  uint64_t dst_val = max ? 0 : UINT64_MAX;
1608  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1609    uint64_t src_val = src.Uint(vform, i);
1610    if (max) {
1611      dst_val = (src_val > dst_val) ? src_val : dst_val;
1612    } else {
1613      dst_val = (src_val < dst_val) ? src_val : dst_val;
1614    }
1615  }
1616  dst.ClearForWrite(ScalarFormatFromFormat(vform));
1617  dst.SetUint(vform, 0, dst_val);
1618  return dst;
1619}
1620
1621
1622LogicVRegister Simulator::umaxv(VectorFormat vform,
1623                                LogicVRegister dst,
1624                                const LogicVRegister& src) {
1625  uminmaxv(vform, dst, src, true);
1626  return dst;
1627}
1628
1629
1630LogicVRegister Simulator::uminv(VectorFormat vform,
1631                                LogicVRegister dst,
1632                                const LogicVRegister& src) {
1633  uminmaxv(vform, dst, src, false);
1634  return dst;
1635}
1636
1637
1638LogicVRegister Simulator::shl(VectorFormat vform,
1639                              LogicVRegister dst,
1640                              const LogicVRegister& src,
1641                              int shift) {
1642  VIXL_ASSERT(shift >= 0);
1643  SimVRegister temp;
1644  LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1645  return ushl(vform, dst, src, shiftreg);
1646}
1647
1648
1649LogicVRegister Simulator::sshll(VectorFormat vform,
1650                                LogicVRegister dst,
1651                                const LogicVRegister& src,
1652                                int shift) {
1653  VIXL_ASSERT(shift >= 0);
1654  SimVRegister temp1, temp2;
1655  LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1656  LogicVRegister extendedreg = sxtl(vform, temp2, src);
1657  return sshl(vform, dst, extendedreg, shiftreg);
1658}
1659
1660
1661LogicVRegister Simulator::sshll2(VectorFormat vform,
1662                                 LogicVRegister dst,
1663                                 const LogicVRegister& src,
1664                                 int shift) {
1665  VIXL_ASSERT(shift >= 0);
1666  SimVRegister temp1, temp2;
1667  LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1668  LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1669  return sshl(vform, dst, extendedreg, shiftreg);
1670}
1671
1672
1673LogicVRegister Simulator::shll(VectorFormat vform,
1674                               LogicVRegister dst,
1675                               const LogicVRegister& src) {
1676  int shift = LaneSizeInBitsFromFormat(vform) / 2;
1677  return sshll(vform, dst, src, shift);
1678}
1679
1680
1681LogicVRegister Simulator::shll2(VectorFormat vform,
1682                                LogicVRegister dst,
1683                                const LogicVRegister& src) {
1684  int shift = LaneSizeInBitsFromFormat(vform) / 2;
1685  return sshll2(vform, dst, src, shift);
1686}
1687
1688
1689LogicVRegister Simulator::ushll(VectorFormat vform,
1690                                LogicVRegister dst,
1691                                const LogicVRegister& src,
1692                                int shift) {
1693  VIXL_ASSERT(shift >= 0);
1694  SimVRegister temp1, temp2;
1695  LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1696  LogicVRegister extendedreg = uxtl(vform, temp2, src);
1697  return ushl(vform, dst, extendedreg, shiftreg);
1698}
1699
1700
1701LogicVRegister Simulator::ushll2(VectorFormat vform,
1702                                 LogicVRegister dst,
1703                                 const LogicVRegister& src,
1704                                 int shift) {
1705  VIXL_ASSERT(shift >= 0);
1706  SimVRegister temp1, temp2;
1707  LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1708  LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1709  return ushl(vform, dst, extendedreg, shiftreg);
1710}
1711
1712
1713LogicVRegister Simulator::sli(VectorFormat vform,
1714                              LogicVRegister dst,
1715                              const LogicVRegister& src,
1716                              int shift) {
1717  dst.ClearForWrite(vform);
1718  int laneCount = LaneCountFromFormat(vform);
1719  for (int i = 0; i < laneCount; i++) {
1720    uint64_t src_lane = src.Uint(vform, i);
1721    uint64_t dst_lane = dst.Uint(vform, i);
1722    uint64_t shifted = src_lane << shift;
1723    uint64_t mask = MaxUintFromFormat(vform) << shift;
1724    dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1725  }
1726  return dst;
1727}
1728
1729
1730LogicVRegister Simulator::sqshl(VectorFormat vform,
1731                                LogicVRegister dst,
1732                                const LogicVRegister& src,
1733                                int shift) {
1734  VIXL_ASSERT(shift >= 0);
1735  SimVRegister temp;
1736  LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1737  return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1738}
1739
1740
1741LogicVRegister Simulator::uqshl(VectorFormat vform,
1742                                LogicVRegister dst,
1743                                const LogicVRegister& src,
1744                                int shift) {
1745  VIXL_ASSERT(shift >= 0);
1746  SimVRegister temp;
1747  LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1748  return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1749}
1750
1751
1752LogicVRegister Simulator::sqshlu(VectorFormat vform,
1753                                 LogicVRegister dst,
1754                                 const LogicVRegister& src,
1755                                 int shift) {
1756  VIXL_ASSERT(shift >= 0);
1757  SimVRegister temp;
1758  LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1759  return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1760}
1761
1762
1763LogicVRegister Simulator::sri(VectorFormat vform,
1764                              LogicVRegister dst,
1765                              const LogicVRegister& src,
1766                              int shift) {
1767  dst.ClearForWrite(vform);
1768  int laneCount = LaneCountFromFormat(vform);
1769  VIXL_ASSERT((shift > 0) &&
1770              (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1771  for (int i = 0; i < laneCount; i++) {
1772    uint64_t src_lane = src.Uint(vform, i);
1773    uint64_t dst_lane = dst.Uint(vform, i);
1774    uint64_t shifted;
1775    uint64_t mask;
1776    if (shift == 64) {
1777      shifted = 0;
1778      mask = 0;
1779    } else {
1780      shifted = src_lane >> shift;
1781      mask = MaxUintFromFormat(vform) >> shift;
1782    }
1783    dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1784  }
1785  return dst;
1786}
1787
1788
1789LogicVRegister Simulator::ushr(VectorFormat vform,
1790                               LogicVRegister dst,
1791                               const LogicVRegister& src,
1792                               int shift) {
1793  VIXL_ASSERT(shift >= 0);
1794  SimVRegister temp;
1795  LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1796  return ushl(vform, dst, src, shiftreg);
1797}
1798
1799
1800LogicVRegister Simulator::sshr(VectorFormat vform,
1801                               LogicVRegister dst,
1802                               const LogicVRegister& src,
1803                               int shift) {
1804  VIXL_ASSERT(shift >= 0);
1805  SimVRegister temp;
1806  LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1807  return sshl(vform, dst, src, shiftreg);
1808}
1809
1810
1811LogicVRegister Simulator::ssra(VectorFormat vform,
1812                               LogicVRegister dst,
1813                               const LogicVRegister& src,
1814                               int shift) {
1815  SimVRegister temp;
1816  LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1817  return add(vform, dst, dst, shifted_reg);
1818}
1819
1820
1821LogicVRegister Simulator::usra(VectorFormat vform,
1822                               LogicVRegister dst,
1823                               const LogicVRegister& src,
1824                               int shift) {
1825  SimVRegister temp;
1826  LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1827  return add(vform, dst, dst, shifted_reg);
1828}
1829
1830
1831LogicVRegister Simulator::srsra(VectorFormat vform,
1832                                LogicVRegister dst,
1833                                const LogicVRegister& src,
1834                                int shift) {
1835  SimVRegister temp;
1836  LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1837  return add(vform, dst, dst, shifted_reg);
1838}
1839
1840
1841LogicVRegister Simulator::ursra(VectorFormat vform,
1842                                LogicVRegister dst,
1843                                const LogicVRegister& src,
1844                                int shift) {
1845  SimVRegister temp;
1846  LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1847  return add(vform, dst, dst, shifted_reg);
1848}
1849
1850
1851LogicVRegister Simulator::cls(VectorFormat vform,
1852                              LogicVRegister dst,
1853                              const LogicVRegister& src) {
1854  uint64_t result[16];
1855  int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1856  int laneCount = LaneCountFromFormat(vform);
1857  for (int i = 0; i < laneCount; i++) {
1858    result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits);
1859  }
1860
1861  dst.ClearForWrite(vform);
1862  for (int i = 0; i < laneCount; ++i) {
1863    dst.SetUint(vform, i, result[i]);
1864  }
1865  return dst;
1866}
1867
1868
1869LogicVRegister Simulator::clz(VectorFormat vform,
1870                              LogicVRegister dst,
1871                              const LogicVRegister& src) {
1872  uint64_t result[16];
1873  int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1874  int laneCount = LaneCountFromFormat(vform);
1875  for (int i = 0; i < laneCount; i++) {
1876    result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits);
1877  }
1878
1879  dst.ClearForWrite(vform);
1880  for (int i = 0; i < laneCount; ++i) {
1881    dst.SetUint(vform, i, result[i]);
1882  }
1883  return dst;
1884}
1885
1886
1887LogicVRegister Simulator::cnt(VectorFormat vform,
1888                              LogicVRegister dst,
1889                              const LogicVRegister& src) {
1890  uint64_t result[16];
1891  int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1892  int laneCount = LaneCountFromFormat(vform);
1893  for (int i = 0; i < laneCount; i++) {
1894    uint64_t value = src.Uint(vform, i);
1895    result[i] = 0;
1896    for (int j = 0; j < laneSizeInBits; j++) {
1897      result[i] += (value & 1);
1898      value >>= 1;
1899    }
1900  }
1901
1902  dst.ClearForWrite(vform);
1903  for (int i = 0; i < laneCount; ++i) {
1904    dst.SetUint(vform, i, result[i]);
1905  }
1906  return dst;
1907}
1908
1909
1910LogicVRegister Simulator::sshl(VectorFormat vform,
1911                               LogicVRegister dst,
1912                               const LogicVRegister& src1,
1913                               const LogicVRegister& src2) {
1914  dst.ClearForWrite(vform);
1915  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1916    int8_t shift_val = src2.Int(vform, i);
1917    int64_t lj_src_val = src1.IntLeftJustified(vform, i);
1918
1919    // Set signed saturation state.
1920    if ((shift_val > CountLeadingSignBits(lj_src_val)) && (lj_src_val != 0)) {
1921      dst.SetSignedSat(i, lj_src_val >= 0);
1922    }
1923
1924    // Set unsigned saturation state.
1925    if (lj_src_val < 0) {
1926      dst.SetUnsignedSat(i, false);
1927    } else if ((shift_val > CountLeadingZeros(lj_src_val)) &&
1928               (lj_src_val != 0)) {
1929      dst.SetUnsignedSat(i, true);
1930    }
1931
1932    int64_t src_val = src1.Int(vform, i);
1933    bool src_is_negative = src_val < 0;
1934    if (shift_val > 63) {
1935      dst.SetInt(vform, i, 0);
1936    } else if (shift_val < -63) {
1937      dst.SetRounding(i, src_is_negative);
1938      dst.SetInt(vform, i, src_is_negative ? -1 : 0);
1939    } else {
1940      // Use unsigned types for shifts, as behaviour is undefined for signed
1941      // lhs.
1942      uint64_t usrc_val = static_cast<uint64_t>(src_val);
1943
1944      if (shift_val < 0) {
1945        // Convert to right shift.
1946        shift_val = -shift_val;
1947
1948        // Set rounding state by testing most-significant bit shifted out.
1949        // Rounding only needed on right shifts.
1950        if (((usrc_val >> (shift_val - 1)) & 1) == 1) {
1951          dst.SetRounding(i, true);
1952        }
1953
1954        usrc_val >>= shift_val;
1955
1956        if (src_is_negative) {
1957          // Simulate sign-extension.
1958          usrc_val |= (~UINT64_C(0) << (64 - shift_val));
1959        }
1960      } else {
1961        usrc_val <<= shift_val;
1962      }
1963      dst.SetUint(vform, i, usrc_val);
1964    }
1965  }
1966  return dst;
1967}
1968
1969
1970LogicVRegister Simulator::ushl(VectorFormat vform,
1971                               LogicVRegister dst,
1972                               const LogicVRegister& src1,
1973                               const LogicVRegister& src2) {
1974  dst.ClearForWrite(vform);
1975  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1976    int8_t shift_val = src2.Int(vform, i);
1977    uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
1978
1979    // Set saturation state.
1980    if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) {
1981      dst.SetUnsignedSat(i, true);
1982    }
1983
1984    uint64_t src_val = src1.Uint(vform, i);
1985    if ((shift_val > 63) || (shift_val < -64)) {
1986      dst.SetUint(vform, i, 0);
1987    } else {
1988      if (shift_val < 0) {
1989        // Set rounding state. Rounding only needed on right shifts.
1990        if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1991          dst.SetRounding(i, true);
1992        }
1993
1994        if (shift_val == -64) {
1995          src_val = 0;
1996        } else {
1997          src_val >>= -shift_val;
1998        }
1999      } else {
2000        src_val <<= shift_val;
2001      }
2002      dst.SetUint(vform, i, src_val);
2003    }
2004  }
2005  return dst;
2006}
2007
2008
2009LogicVRegister Simulator::neg(VectorFormat vform,
2010                              LogicVRegister dst,
2011                              const LogicVRegister& src) {
2012  dst.ClearForWrite(vform);
2013  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2014    // Test for signed saturation.
2015    int64_t sa = src.Int(vform, i);
2016    if (sa == MinIntFromFormat(vform)) {
2017      dst.SetSignedSat(i, true);
2018    }
2019    dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
2020  }
2021  return dst;
2022}
2023
2024
2025LogicVRegister Simulator::suqadd(VectorFormat vform,
2026                                 LogicVRegister dst,
2027                                 const LogicVRegister& src) {
2028  dst.ClearForWrite(vform);
2029  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2030    int64_t sa = dst.IntLeftJustified(vform, i);
2031    uint64_t ub = src.UintLeftJustified(vform, i);
2032    uint64_t ur = sa + ub;
2033
2034    int64_t sr;
2035    memcpy(&sr, &ur, sizeof(sr));
2036    if (sr < sa) {  // Test for signed positive saturation.
2037      dst.SetInt(vform, i, MaxIntFromFormat(vform));
2038    } else {
2039      dst.SetUint(vform, i, dst.Int(vform, i) + src.Uint(vform, i));
2040    }
2041  }
2042  return dst;
2043}
2044
2045
2046LogicVRegister Simulator::usqadd(VectorFormat vform,
2047                                 LogicVRegister dst,
2048                                 const LogicVRegister& src) {
2049  dst.ClearForWrite(vform);
2050  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2051    uint64_t ua = dst.UintLeftJustified(vform, i);
2052    int64_t sb = src.IntLeftJustified(vform, i);
2053    uint64_t ur = ua + sb;
2054
2055    if ((sb > 0) && (ur <= ua)) {
2056      dst.SetUint(vform, i, MaxUintFromFormat(vform));  // Positive saturation.
2057    } else if ((sb < 0) && (ur >= ua)) {
2058      dst.SetUint(vform, i, 0);  // Negative saturation.
2059    } else {
2060      dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));
2061    }
2062  }
2063  return dst;
2064}
2065
2066
2067LogicVRegister Simulator::abs(VectorFormat vform,
2068                              LogicVRegister dst,
2069                              const LogicVRegister& src) {
2070  dst.ClearForWrite(vform);
2071  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2072    // Test for signed saturation.
2073    int64_t sa = src.Int(vform, i);
2074    if (sa == MinIntFromFormat(vform)) {
2075      dst.SetSignedSat(i, true);
2076    }
2077    if (sa < 0) {
2078      dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
2079    } else {
2080      dst.SetInt(vform, i, sa);
2081    }
2082  }
2083  return dst;
2084}
2085
2086
2087LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
2088                                        LogicVRegister dst,
2089                                        bool dstIsSigned,
2090                                        const LogicVRegister& src,
2091                                        bool srcIsSigned) {
2092  bool upperhalf = false;
2093  VectorFormat srcform = kFormatUndefined;
2094  int64_t ssrc[8];
2095  uint64_t usrc[8];
2096
2097  switch (dstform) {
2098    case kFormat8B:
2099      upperhalf = false;
2100      srcform = kFormat8H;
2101      break;
2102    case kFormat16B:
2103      upperhalf = true;
2104      srcform = kFormat8H;
2105      break;
2106    case kFormat4H:
2107      upperhalf = false;
2108      srcform = kFormat4S;
2109      break;
2110    case kFormat8H:
2111      upperhalf = true;
2112      srcform = kFormat4S;
2113      break;
2114    case kFormat2S:
2115      upperhalf = false;
2116      srcform = kFormat2D;
2117      break;
2118    case kFormat4S:
2119      upperhalf = true;
2120      srcform = kFormat2D;
2121      break;
2122    case kFormatB:
2123      upperhalf = false;
2124      srcform = kFormatH;
2125      break;
2126    case kFormatH:
2127      upperhalf = false;
2128      srcform = kFormatS;
2129      break;
2130    case kFormatS:
2131      upperhalf = false;
2132      srcform = kFormatD;
2133      break;
2134    default:
2135      VIXL_UNIMPLEMENTED();
2136  }
2137
2138  for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2139    ssrc[i] = src.Int(srcform, i);
2140    usrc[i] = src.Uint(srcform, i);
2141  }
2142
2143  int offset;
2144  if (upperhalf) {
2145    offset = LaneCountFromFormat(dstform) / 2;
2146  } else {
2147    offset = 0;
2148    dst.ClearForWrite(dstform);
2149  }
2150
2151  for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2152    // Test for signed saturation
2153    if (ssrc[i] > MaxIntFromFormat(dstform)) {
2154      dst.SetSignedSat(offset + i, true);
2155    } else if (ssrc[i] < MinIntFromFormat(dstform)) {
2156      dst.SetSignedSat(offset + i, false);
2157    }
2158
2159    // Test for unsigned saturation
2160    if (srcIsSigned) {
2161      if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
2162        dst.SetUnsignedSat(offset + i, true);
2163      } else if (ssrc[i] < 0) {
2164        dst.SetUnsignedSat(offset + i, false);
2165      }
2166    } else {
2167      if (usrc[i] > MaxUintFromFormat(dstform)) {
2168        dst.SetUnsignedSat(offset + i, true);
2169      }
2170    }
2171
2172    int64_t result;
2173    if (srcIsSigned) {
2174      result = ssrc[i] & MaxUintFromFormat(dstform);
2175    } else {
2176      result = usrc[i] & MaxUintFromFormat(dstform);
2177    }
2178
2179    if (dstIsSigned) {
2180      dst.SetInt(dstform, offset + i, result);
2181    } else {
2182      dst.SetUint(dstform, offset + i, result);
2183    }
2184  }
2185  return dst;
2186}
2187
2188
2189LogicVRegister Simulator::xtn(VectorFormat vform,
2190                              LogicVRegister dst,
2191                              const LogicVRegister& src) {
2192  return extractnarrow(vform, dst, true, src, true);
2193}
2194
2195
2196LogicVRegister Simulator::sqxtn(VectorFormat vform,
2197                                LogicVRegister dst,
2198                                const LogicVRegister& src) {
2199  return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform);
2200}
2201
2202
2203LogicVRegister Simulator::sqxtun(VectorFormat vform,
2204                                 LogicVRegister dst,
2205                                 const LogicVRegister& src) {
2206  return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
2207}
2208
2209
2210LogicVRegister Simulator::uqxtn(VectorFormat vform,
2211                                LogicVRegister dst,
2212                                const LogicVRegister& src) {
2213  return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
2214}
2215
2216
2217LogicVRegister Simulator::absdiff(VectorFormat vform,
2218                                  LogicVRegister dst,
2219                                  const LogicVRegister& src1,
2220                                  const LogicVRegister& src2,
2221                                  bool issigned) {
2222  dst.ClearForWrite(vform);
2223  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2224    if (issigned) {
2225      int64_t sr = src1.Int(vform, i) - src2.Int(vform, i);
2226      sr = sr > 0 ? sr : -sr;
2227      dst.SetInt(vform, i, sr);
2228    } else {
2229      int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i);
2230      sr = sr > 0 ? sr : -sr;
2231      dst.SetUint(vform, i, sr);
2232    }
2233  }
2234  return dst;
2235}
2236
2237
2238LogicVRegister Simulator::saba(VectorFormat vform,
2239                               LogicVRegister dst,
2240                               const LogicVRegister& src1,
2241                               const LogicVRegister& src2) {
2242  SimVRegister temp;
2243  dst.ClearForWrite(vform);
2244  absdiff(vform, temp, src1, src2, true);
2245  add(vform, dst, dst, temp);
2246  return dst;
2247}
2248
2249
2250LogicVRegister Simulator::uaba(VectorFormat vform,
2251                               LogicVRegister dst,
2252                               const LogicVRegister& src1,
2253                               const LogicVRegister& src2) {
2254  SimVRegister temp;
2255  dst.ClearForWrite(vform);
2256  absdiff(vform, temp, src1, src2, false);
2257  add(vform, dst, dst, temp);
2258  return dst;
2259}
2260
2261
2262LogicVRegister Simulator::not_(VectorFormat vform,
2263                               LogicVRegister dst,
2264                               const LogicVRegister& src) {
2265  dst.ClearForWrite(vform);
2266  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2267    dst.SetUint(vform, i, ~src.Uint(vform, i));
2268  }
2269  return dst;
2270}
2271
2272
2273LogicVRegister Simulator::rbit(VectorFormat vform,
2274                               LogicVRegister dst,
2275                               const LogicVRegister& src) {
2276  uint64_t result[16];
2277  int laneCount = LaneCountFromFormat(vform);
2278  int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
2279  uint64_t reversed_value;
2280  uint64_t value;
2281  for (int i = 0; i < laneCount; i++) {
2282    value = src.Uint(vform, i);
2283    reversed_value = 0;
2284    for (int j = 0; j < laneSizeInBits; j++) {
2285      reversed_value = (reversed_value << 1) | (value & 1);
2286      value >>= 1;
2287    }
2288    result[i] = reversed_value;
2289  }
2290
2291  dst.ClearForWrite(vform);
2292  for (int i = 0; i < laneCount; ++i) {
2293    dst.SetUint(vform, i, result[i]);
2294  }
2295  return dst;
2296}
2297
2298
2299LogicVRegister Simulator::rev(VectorFormat vform,
2300                              LogicVRegister dst,
2301                              const LogicVRegister& src,
2302                              int revSize) {
2303  uint64_t result[16];
2304  int laneCount = LaneCountFromFormat(vform);
2305  int laneSize = LaneSizeInBytesFromFormat(vform);
2306  int lanesPerLoop = revSize / laneSize;
2307  for (int i = 0; i < laneCount; i += lanesPerLoop) {
2308    for (int j = 0; j < lanesPerLoop; j++) {
2309      result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j);
2310    }
2311  }
2312  dst.ClearForWrite(vform);
2313  for (int i = 0; i < laneCount; ++i) {
2314    dst.SetUint(vform, i, result[i]);
2315  }
2316  return dst;
2317}
2318
2319
2320LogicVRegister Simulator::rev16(VectorFormat vform,
2321                                LogicVRegister dst,
2322                                const LogicVRegister& src) {
2323  return rev(vform, dst, src, 2);
2324}
2325
2326
2327LogicVRegister Simulator::rev32(VectorFormat vform,
2328                                LogicVRegister dst,
2329                                const LogicVRegister& src) {
2330  return rev(vform, dst, src, 4);
2331}
2332
2333
2334LogicVRegister Simulator::rev64(VectorFormat vform,
2335                                LogicVRegister dst,
2336                                const LogicVRegister& src) {
2337  return rev(vform, dst, src, 8);
2338}
2339
2340
2341LogicVRegister Simulator::addlp(VectorFormat vform,
2342                                LogicVRegister dst,
2343                                const LogicVRegister& src,
2344                                bool is_signed,
2345                                bool do_accumulate) {
2346  VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
2347  VIXL_ASSERT(LaneSizeInBitsFromFormat(vformsrc) <= 32);
2348  VIXL_ASSERT(LaneCountFromFormat(vform) <= 8);
2349
2350  uint64_t result[8];
2351  int lane_count = LaneCountFromFormat(vform);
2352  for (int i = 0; i < lane_count; i++) {
2353    if (is_signed) {
2354      result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +
2355                                        src.Int(vformsrc, 2 * i + 1));
2356    } else {
2357      result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
2358    }
2359  }
2360
2361  dst.ClearForWrite(vform);
2362  for (int i = 0; i < lane_count; ++i) {
2363    if (do_accumulate) {
2364      result[i] += dst.Uint(vform, i);
2365    }
2366    dst.SetUint(vform, i, result[i]);
2367  }
2368
2369  return dst;
2370}
2371
2372
2373LogicVRegister Simulator::saddlp(VectorFormat vform,
2374                                 LogicVRegister dst,
2375                                 const LogicVRegister& src) {
2376  return addlp(vform, dst, src, true, false);
2377}
2378
2379
2380LogicVRegister Simulator::uaddlp(VectorFormat vform,
2381                                 LogicVRegister dst,
2382                                 const LogicVRegister& src) {
2383  return addlp(vform, dst, src, false, false);
2384}
2385
2386
2387LogicVRegister Simulator::sadalp(VectorFormat vform,
2388                                 LogicVRegister dst,
2389                                 const LogicVRegister& src) {
2390  return addlp(vform, dst, src, true, true);
2391}
2392
2393
2394LogicVRegister Simulator::uadalp(VectorFormat vform,
2395                                 LogicVRegister dst,
2396                                 const LogicVRegister& src) {
2397  return addlp(vform, dst, src, false, true);
2398}
2399
2400
2401LogicVRegister Simulator::ext(VectorFormat vform,
2402                              LogicVRegister dst,
2403                              const LogicVRegister& src1,
2404                              const LogicVRegister& src2,
2405                              int index) {
2406  uint8_t result[16];
2407  int laneCount = LaneCountFromFormat(vform);
2408  for (int i = 0; i < laneCount - index; ++i) {
2409    result[i] = src1.Uint(vform, i + index);
2410  }
2411  for (int i = 0; i < index; ++i) {
2412    result[laneCount - index + i] = src2.Uint(vform, i);
2413  }
2414  dst.ClearForWrite(vform);
2415  for (int i = 0; i < laneCount; ++i) {
2416    dst.SetUint(vform, i, result[i]);
2417  }
2418  return dst;
2419}
2420
2421
2422LogicVRegister Simulator::dup_element(VectorFormat vform,
2423                                      LogicVRegister dst,
2424                                      const LogicVRegister& src,
2425                                      int src_index) {
2426  int laneCount = LaneCountFromFormat(vform);
2427  uint64_t value = src.Uint(vform, src_index);
2428  dst.ClearForWrite(vform);
2429  for (int i = 0; i < laneCount; ++i) {
2430    dst.SetUint(vform, i, value);
2431  }
2432  return dst;
2433}
2434
2435
2436LogicVRegister Simulator::dup_immediate(VectorFormat vform,
2437                                        LogicVRegister dst,
2438                                        uint64_t imm) {
2439  int laneCount = LaneCountFromFormat(vform);
2440  uint64_t value = imm & MaxUintFromFormat(vform);
2441  dst.ClearForWrite(vform);
2442  for (int i = 0; i < laneCount; ++i) {
2443    dst.SetUint(vform, i, value);
2444  }
2445  return dst;
2446}
2447
2448
2449LogicVRegister Simulator::ins_element(VectorFormat vform,
2450                                      LogicVRegister dst,
2451                                      int dst_index,
2452                                      const LogicVRegister& src,
2453                                      int src_index) {
2454  dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2455  return dst;
2456}
2457
2458
2459LogicVRegister Simulator::ins_immediate(VectorFormat vform,
2460                                        LogicVRegister dst,
2461                                        int dst_index,
2462                                        uint64_t imm) {
2463  uint64_t value = imm & MaxUintFromFormat(vform);
2464  dst.SetUint(vform, dst_index, value);
2465  return dst;
2466}
2467
2468
2469LogicVRegister Simulator::movi(VectorFormat vform,
2470                               LogicVRegister dst,
2471                               uint64_t imm) {
2472  int laneCount = LaneCountFromFormat(vform);
2473  dst.ClearForWrite(vform);
2474  for (int i = 0; i < laneCount; ++i) {
2475    dst.SetUint(vform, i, imm);
2476  }
2477  return dst;
2478}
2479
2480
2481LogicVRegister Simulator::mvni(VectorFormat vform,
2482                               LogicVRegister dst,
2483                               uint64_t imm) {
2484  int laneCount = LaneCountFromFormat(vform);
2485  dst.ClearForWrite(vform);
2486  for (int i = 0; i < laneCount; ++i) {
2487    dst.SetUint(vform, i, ~imm);
2488  }
2489  return dst;
2490}
2491
2492
2493LogicVRegister Simulator::orr(VectorFormat vform,
2494                              LogicVRegister dst,
2495                              const LogicVRegister& src,
2496                              uint64_t imm) {
2497  uint64_t result[16];
2498  int laneCount = LaneCountFromFormat(vform);
2499  for (int i = 0; i < laneCount; ++i) {
2500    result[i] = src.Uint(vform, i) | imm;
2501  }
2502  dst.ClearForWrite(vform);
2503  for (int i = 0; i < laneCount; ++i) {
2504    dst.SetUint(vform, i, result[i]);
2505  }
2506  return dst;
2507}
2508
2509
2510LogicVRegister Simulator::uxtl(VectorFormat vform,
2511                               LogicVRegister dst,
2512                               const LogicVRegister& src) {
2513  VectorFormat vform_half = VectorFormatHalfWidth(vform);
2514
2515  dst.ClearForWrite(vform);
2516  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2517    dst.SetUint(vform, i, src.Uint(vform_half, i));
2518  }
2519  return dst;
2520}
2521
2522
2523LogicVRegister Simulator::sxtl(VectorFormat vform,
2524                               LogicVRegister dst,
2525                               const LogicVRegister& src) {
2526  VectorFormat vform_half = VectorFormatHalfWidth(vform);
2527
2528  dst.ClearForWrite(vform);
2529  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2530    dst.SetInt(vform, i, src.Int(vform_half, i));
2531  }
2532  return dst;
2533}
2534
2535
2536LogicVRegister Simulator::uxtl2(VectorFormat vform,
2537                                LogicVRegister dst,
2538                                const LogicVRegister& src) {
2539  VectorFormat vform_half = VectorFormatHalfWidth(vform);
2540  int lane_count = LaneCountFromFormat(vform);
2541
2542  dst.ClearForWrite(vform);
2543  for (int i = 0; i < lane_count; i++) {
2544    dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));
2545  }
2546  return dst;
2547}
2548
2549
2550LogicVRegister Simulator::sxtl2(VectorFormat vform,
2551                                LogicVRegister dst,
2552                                const LogicVRegister& src) {
2553  VectorFormat vform_half = VectorFormatHalfWidth(vform);
2554  int lane_count = LaneCountFromFormat(vform);
2555
2556  dst.ClearForWrite(vform);
2557  for (int i = 0; i < lane_count; i++) {
2558    dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));
2559  }
2560  return dst;
2561}
2562
2563
2564LogicVRegister Simulator::shrn(VectorFormat vform,
2565                               LogicVRegister dst,
2566                               const LogicVRegister& src,
2567                               int shift) {
2568  SimVRegister temp;
2569  VectorFormat vform_src = VectorFormatDoubleWidth(vform);
2570  VectorFormat vform_dst = vform;
2571  LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
2572  return extractnarrow(vform_dst, dst, false, shifted_src, false);
2573}
2574
2575
2576LogicVRegister Simulator::shrn2(VectorFormat vform,
2577                                LogicVRegister dst,
2578                                const LogicVRegister& src,
2579                                int shift) {
2580  SimVRegister temp;
2581  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2582  VectorFormat vformdst = vform;
2583  LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
2584  return extractnarrow(vformdst, dst, false, shifted_src, false);
2585}
2586
2587
2588LogicVRegister Simulator::rshrn(VectorFormat vform,
2589                                LogicVRegister dst,
2590                                const LogicVRegister& src,
2591                                int shift) {
2592  SimVRegister temp;
2593  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2594  VectorFormat vformdst = vform;
2595  LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2596  return extractnarrow(vformdst, dst, false, shifted_src, false);
2597}
2598
2599
2600LogicVRegister Simulator::rshrn2(VectorFormat vform,
2601                                 LogicVRegister dst,
2602                                 const LogicVRegister& src,
2603                                 int shift) {
2604  SimVRegister temp;
2605  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2606  VectorFormat vformdst = vform;
2607  LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2608  return extractnarrow(vformdst, dst, false, shifted_src, false);
2609}
2610
2611
2612LogicVRegister Simulator::Table(VectorFormat vform,
2613                                LogicVRegister dst,
2614                                const LogicVRegister& ind,
2615                                bool zero_out_of_bounds,
2616                                const LogicVRegister* tab1,
2617                                const LogicVRegister* tab2,
2618                                const LogicVRegister* tab3,
2619                                const LogicVRegister* tab4) {
2620  VIXL_ASSERT(tab1 != NULL);
2621  const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4};
2622  uint64_t result[kMaxLanesPerVector];
2623  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2624    result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i);
2625  }
2626  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2627    uint64_t j = ind.Uint(vform, i);
2628    int tab_idx = static_cast<int>(j >> 4);
2629    int j_idx = static_cast<int>(j & 15);
2630    if ((tab_idx < 4) && (tab[tab_idx] != NULL)) {
2631      result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx);
2632    }
2633  }
2634  dst.SetUintArray(vform, result);
2635  return dst;
2636}
2637
2638
2639LogicVRegister Simulator::tbl(VectorFormat vform,
2640                              LogicVRegister dst,
2641                              const LogicVRegister& tab,
2642                              const LogicVRegister& ind) {
2643  return Table(vform, dst, ind, true, &tab);
2644}
2645
2646
2647LogicVRegister Simulator::tbl(VectorFormat vform,
2648                              LogicVRegister dst,
2649                              const LogicVRegister& tab,
2650                              const LogicVRegister& tab2,
2651                              const LogicVRegister& ind) {
2652  return Table(vform, dst, ind, true, &tab, &tab2);
2653}
2654
2655
2656LogicVRegister Simulator::tbl(VectorFormat vform,
2657                              LogicVRegister dst,
2658                              const LogicVRegister& tab,
2659                              const LogicVRegister& tab2,
2660                              const LogicVRegister& tab3,
2661                              const LogicVRegister& ind) {
2662  return Table(vform, dst, ind, true, &tab, &tab2, &tab3);
2663}
2664
2665
2666LogicVRegister Simulator::tbl(VectorFormat vform,
2667                              LogicVRegister dst,
2668                              const LogicVRegister& tab,
2669                              const LogicVRegister& tab2,
2670                              const LogicVRegister& tab3,
2671                              const LogicVRegister& tab4,
2672                              const LogicVRegister& ind) {
2673  return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);
2674}
2675
2676
2677LogicVRegister Simulator::tbx(VectorFormat vform,
2678                              LogicVRegister dst,
2679                              const LogicVRegister& tab,
2680                              const LogicVRegister& ind) {
2681  return Table(vform, dst, ind, false, &tab);
2682}
2683
2684
2685LogicVRegister Simulator::tbx(VectorFormat vform,
2686                              LogicVRegister dst,
2687                              const LogicVRegister& tab,
2688                              const LogicVRegister& tab2,
2689                              const LogicVRegister& ind) {
2690  return Table(vform, dst, ind, false, &tab, &tab2);
2691}
2692
2693
2694LogicVRegister Simulator::tbx(VectorFormat vform,
2695                              LogicVRegister dst,
2696                              const LogicVRegister& tab,
2697                              const LogicVRegister& tab2,
2698                              const LogicVRegister& tab3,
2699                              const LogicVRegister& ind) {
2700  return Table(vform, dst, ind, false, &tab, &tab2, &tab3);
2701}
2702
2703
2704LogicVRegister Simulator::tbx(VectorFormat vform,
2705                              LogicVRegister dst,
2706                              const LogicVRegister& tab,
2707                              const LogicVRegister& tab2,
2708                              const LogicVRegister& tab3,
2709                              const LogicVRegister& tab4,
2710                              const LogicVRegister& ind) {
2711  return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);
2712}
2713
2714
2715LogicVRegister Simulator::uqshrn(VectorFormat vform,
2716                                 LogicVRegister dst,
2717                                 const LogicVRegister& src,
2718                                 int shift) {
2719  return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
2720}
2721
2722
2723LogicVRegister Simulator::uqshrn2(VectorFormat vform,
2724                                  LogicVRegister dst,
2725                                  const LogicVRegister& src,
2726                                  int shift) {
2727  return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2728}
2729
2730
2731LogicVRegister Simulator::uqrshrn(VectorFormat vform,
2732                                  LogicVRegister dst,
2733                                  const LogicVRegister& src,
2734                                  int shift) {
2735  return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
2736}
2737
2738
2739LogicVRegister Simulator::uqrshrn2(VectorFormat vform,
2740                                   LogicVRegister dst,
2741                                   const LogicVRegister& src,
2742                                   int shift) {
2743  return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2744}
2745
2746
2747LogicVRegister Simulator::sqshrn(VectorFormat vform,
2748                                 LogicVRegister dst,
2749                                 const LogicVRegister& src,
2750                                 int shift) {
2751  SimVRegister temp;
2752  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2753  VectorFormat vformdst = vform;
2754  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2755  return sqxtn(vformdst, dst, shifted_src);
2756}
2757
2758
2759LogicVRegister Simulator::sqshrn2(VectorFormat vform,
2760                                  LogicVRegister dst,
2761                                  const LogicVRegister& src,
2762                                  int shift) {
2763  SimVRegister temp;
2764  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2765  VectorFormat vformdst = vform;
2766  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2767  return sqxtn(vformdst, dst, shifted_src);
2768}
2769
2770
2771LogicVRegister Simulator::sqrshrn(VectorFormat vform,
2772                                  LogicVRegister dst,
2773                                  const LogicVRegister& src,
2774                                  int shift) {
2775  SimVRegister temp;
2776  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2777  VectorFormat vformdst = vform;
2778  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2779  return sqxtn(vformdst, dst, shifted_src);
2780}
2781
2782
2783LogicVRegister Simulator::sqrshrn2(VectorFormat vform,
2784                                   LogicVRegister dst,
2785                                   const LogicVRegister& src,
2786                                   int shift) {
2787  SimVRegister temp;
2788  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2789  VectorFormat vformdst = vform;
2790  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2791  return sqxtn(vformdst, dst, shifted_src);
2792}
2793
2794
2795LogicVRegister Simulator::sqshrun(VectorFormat vform,
2796                                  LogicVRegister dst,
2797                                  const LogicVRegister& src,
2798                                  int shift) {
2799  SimVRegister temp;
2800  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2801  VectorFormat vformdst = vform;
2802  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2803  return sqxtun(vformdst, dst, shifted_src);
2804}
2805
2806
2807LogicVRegister Simulator::sqshrun2(VectorFormat vform,
2808                                   LogicVRegister dst,
2809                                   const LogicVRegister& src,
2810                                   int shift) {
2811  SimVRegister temp;
2812  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2813  VectorFormat vformdst = vform;
2814  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2815  return sqxtun(vformdst, dst, shifted_src);
2816}
2817
2818
2819LogicVRegister Simulator::sqrshrun(VectorFormat vform,
2820                                   LogicVRegister dst,
2821                                   const LogicVRegister& src,
2822                                   int shift) {
2823  SimVRegister temp;
2824  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2825  VectorFormat vformdst = vform;
2826  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2827  return sqxtun(vformdst, dst, shifted_src);
2828}
2829
2830
2831LogicVRegister Simulator::sqrshrun2(VectorFormat vform,
2832                                    LogicVRegister dst,
2833                                    const LogicVRegister& src,
2834                                    int shift) {
2835  SimVRegister temp;
2836  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2837  VectorFormat vformdst = vform;
2838  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2839  return sqxtun(vformdst, dst, shifted_src);
2840}
2841
2842
2843LogicVRegister Simulator::uaddl(VectorFormat vform,
2844                                LogicVRegister dst,
2845                                const LogicVRegister& src1,
2846                                const LogicVRegister& src2) {
2847  SimVRegister temp1, temp2;
2848  uxtl(vform, temp1, src1);
2849  uxtl(vform, temp2, src2);
2850  add(vform, dst, temp1, temp2);
2851  return dst;
2852}
2853
2854
2855LogicVRegister Simulator::uaddl2(VectorFormat vform,
2856                                 LogicVRegister dst,
2857                                 const LogicVRegister& src1,
2858                                 const LogicVRegister& src2) {
2859  SimVRegister temp1, temp2;
2860  uxtl2(vform, temp1, src1);
2861  uxtl2(vform, temp2, src2);
2862  add(vform, dst, temp1, temp2);
2863  return dst;
2864}
2865
2866
2867LogicVRegister Simulator::uaddw(VectorFormat vform,
2868                                LogicVRegister dst,
2869                                const LogicVRegister& src1,
2870                                const LogicVRegister& src2) {
2871  SimVRegister temp;
2872  uxtl(vform, temp, src2);
2873  add(vform, dst, src1, temp);
2874  return dst;
2875}
2876
2877
2878LogicVRegister Simulator::uaddw2(VectorFormat vform,
2879                                 LogicVRegister dst,
2880                                 const LogicVRegister& src1,
2881                                 const LogicVRegister& src2) {
2882  SimVRegister temp;
2883  uxtl2(vform, temp, src2);
2884  add(vform, dst, src1, temp);
2885  return dst;
2886}
2887
2888
2889LogicVRegister Simulator::saddl(VectorFormat vform,
2890                                LogicVRegister dst,
2891                                const LogicVRegister& src1,
2892                                const LogicVRegister& src2) {
2893  SimVRegister temp1, temp2;
2894  sxtl(vform, temp1, src1);
2895  sxtl(vform, temp2, src2);
2896  add(vform, dst, temp1, temp2);
2897  return dst;
2898}
2899
2900
2901LogicVRegister Simulator::saddl2(VectorFormat vform,
2902                                 LogicVRegister dst,
2903                                 const LogicVRegister& src1,
2904                                 const LogicVRegister& src2) {
2905  SimVRegister temp1, temp2;
2906  sxtl2(vform, temp1, src1);
2907  sxtl2(vform, temp2, src2);
2908  add(vform, dst, temp1, temp2);
2909  return dst;
2910}
2911
2912
2913LogicVRegister Simulator::saddw(VectorFormat vform,
2914                                LogicVRegister dst,
2915                                const LogicVRegister& src1,
2916                                const LogicVRegister& src2) {
2917  SimVRegister temp;
2918  sxtl(vform, temp, src2);
2919  add(vform, dst, src1, temp);
2920  return dst;
2921}
2922
2923
2924LogicVRegister Simulator::saddw2(VectorFormat vform,
2925                                 LogicVRegister dst,
2926                                 const LogicVRegister& src1,
2927                                 const LogicVRegister& src2) {
2928  SimVRegister temp;
2929  sxtl2(vform, temp, src2);
2930  add(vform, dst, src1, temp);
2931  return dst;
2932}
2933
2934
2935LogicVRegister Simulator::usubl(VectorFormat vform,
2936                                LogicVRegister dst,
2937                                const LogicVRegister& src1,
2938                                const LogicVRegister& src2) {
2939  SimVRegister temp1, temp2;
2940  uxtl(vform, temp1, src1);
2941  uxtl(vform, temp2, src2);
2942  sub(vform, dst, temp1, temp2);
2943  return dst;
2944}
2945
2946
2947LogicVRegister Simulator::usubl2(VectorFormat vform,
2948                                 LogicVRegister dst,
2949                                 const LogicVRegister& src1,
2950                                 const LogicVRegister& src2) {
2951  SimVRegister temp1, temp2;
2952  uxtl2(vform, temp1, src1);
2953  uxtl2(vform, temp2, src2);
2954  sub(vform, dst, temp1, temp2);
2955  return dst;
2956}
2957
2958
2959LogicVRegister Simulator::usubw(VectorFormat vform,
2960                                LogicVRegister dst,
2961                                const LogicVRegister& src1,
2962                                const LogicVRegister& src2) {
2963  SimVRegister temp;
2964  uxtl(vform, temp, src2);
2965  sub(vform, dst, src1, temp);
2966  return dst;
2967}
2968
2969
2970LogicVRegister Simulator::usubw2(VectorFormat vform,
2971                                 LogicVRegister dst,
2972                                 const LogicVRegister& src1,
2973                                 const LogicVRegister& src2) {
2974  SimVRegister temp;
2975  uxtl2(vform, temp, src2);
2976  sub(vform, dst, src1, temp);
2977  return dst;
2978}
2979
2980
2981LogicVRegister Simulator::ssubl(VectorFormat vform,
2982                                LogicVRegister dst,
2983                                const LogicVRegister& src1,
2984                                const LogicVRegister& src2) {
2985  SimVRegister temp1, temp2;
2986  sxtl(vform, temp1, src1);
2987  sxtl(vform, temp2, src2);
2988  sub(vform, dst, temp1, temp2);
2989  return dst;
2990}
2991
2992
2993LogicVRegister Simulator::ssubl2(VectorFormat vform,
2994                                 LogicVRegister dst,
2995                                 const LogicVRegister& src1,
2996                                 const LogicVRegister& src2) {
2997  SimVRegister temp1, temp2;
2998  sxtl2(vform, temp1, src1);
2999  sxtl2(vform, temp2, src2);
3000  sub(vform, dst, temp1, temp2);
3001  return dst;
3002}
3003
3004
3005LogicVRegister Simulator::ssubw(VectorFormat vform,
3006                                LogicVRegister dst,
3007                                const LogicVRegister& src1,
3008                                const LogicVRegister& src2) {
3009  SimVRegister temp;
3010  sxtl(vform, temp, src2);
3011  sub(vform, dst, src1, temp);
3012  return dst;
3013}
3014
3015
3016LogicVRegister Simulator::ssubw2(VectorFormat vform,
3017                                 LogicVRegister dst,
3018                                 const LogicVRegister& src1,
3019                                 const LogicVRegister& src2) {
3020  SimVRegister temp;
3021  sxtl2(vform, temp, src2);
3022  sub(vform, dst, src1, temp);
3023  return dst;
3024}
3025
3026
3027LogicVRegister Simulator::uabal(VectorFormat vform,
3028                                LogicVRegister dst,
3029                                const LogicVRegister& src1,
3030                                const LogicVRegister& src2) {
3031  SimVRegister temp1, temp2;
3032  uxtl(vform, temp1, src1);
3033  uxtl(vform, temp2, src2);
3034  uaba(vform, dst, temp1, temp2);
3035  return dst;
3036}
3037
3038
3039LogicVRegister Simulator::uabal2(VectorFormat vform,
3040                                 LogicVRegister dst,
3041                                 const LogicVRegister& src1,
3042                                 const LogicVRegister& src2) {
3043  SimVRegister temp1, temp2;
3044  uxtl2(vform, temp1, src1);
3045  uxtl2(vform, temp2, src2);
3046  uaba(vform, dst, temp1, temp2);
3047  return dst;
3048}
3049
3050
3051LogicVRegister Simulator::sabal(VectorFormat vform,
3052                                LogicVRegister dst,
3053                                const LogicVRegister& src1,
3054                                const LogicVRegister& src2) {
3055  SimVRegister temp1, temp2;
3056  sxtl(vform, temp1, src1);
3057  sxtl(vform, temp2, src2);
3058  saba(vform, dst, temp1, temp2);
3059  return dst;
3060}
3061
3062
3063LogicVRegister Simulator::sabal2(VectorFormat vform,
3064                                 LogicVRegister dst,
3065                                 const LogicVRegister& src1,
3066                                 const LogicVRegister& src2) {
3067  SimVRegister temp1, temp2;
3068  sxtl2(vform, temp1, src1);
3069  sxtl2(vform, temp2, src2);
3070  saba(vform, dst, temp1, temp2);
3071  return dst;
3072}
3073
3074
3075LogicVRegister Simulator::uabdl(VectorFormat vform,
3076                                LogicVRegister dst,
3077                                const LogicVRegister& src1,
3078                                const LogicVRegister& src2) {
3079  SimVRegister temp1, temp2;
3080  uxtl(vform, temp1, src1);
3081  uxtl(vform, temp2, src2);
3082  absdiff(vform, dst, temp1, temp2, false);
3083  return dst;
3084}
3085
3086
3087LogicVRegister Simulator::uabdl2(VectorFormat vform,
3088                                 LogicVRegister dst,
3089                                 const LogicVRegister& src1,
3090                                 const LogicVRegister& src2) {
3091  SimVRegister temp1, temp2;
3092  uxtl2(vform, temp1, src1);
3093  uxtl2(vform, temp2, src2);
3094  absdiff(vform, dst, temp1, temp2, false);
3095  return dst;
3096}
3097
3098
3099LogicVRegister Simulator::sabdl(VectorFormat vform,
3100                                LogicVRegister dst,
3101                                const LogicVRegister& src1,
3102                                const LogicVRegister& src2) {
3103  SimVRegister temp1, temp2;
3104  sxtl(vform, temp1, src1);
3105  sxtl(vform, temp2, src2);
3106  absdiff(vform, dst, temp1, temp2, true);
3107  return dst;
3108}
3109
3110
3111LogicVRegister Simulator::sabdl2(VectorFormat vform,
3112                                 LogicVRegister dst,
3113                                 const LogicVRegister& src1,
3114                                 const LogicVRegister& src2) {
3115  SimVRegister temp1, temp2;
3116  sxtl2(vform, temp1, src1);
3117  sxtl2(vform, temp2, src2);
3118  absdiff(vform, dst, temp1, temp2, true);
3119  return dst;
3120}
3121
3122
3123LogicVRegister Simulator::umull(VectorFormat vform,
3124                                LogicVRegister dst,
3125                                const LogicVRegister& src1,
3126                                const LogicVRegister& src2) {
3127  SimVRegister temp1, temp2;
3128  uxtl(vform, temp1, src1);
3129  uxtl(vform, temp2, src2);
3130  mul(vform, dst, temp1, temp2);
3131  return dst;
3132}
3133
3134
3135LogicVRegister Simulator::umull2(VectorFormat vform,
3136                                 LogicVRegister dst,
3137                                 const LogicVRegister& src1,
3138                                 const LogicVRegister& src2) {
3139  SimVRegister temp1, temp2;
3140  uxtl2(vform, temp1, src1);
3141  uxtl2(vform, temp2, src2);
3142  mul(vform, dst, temp1, temp2);
3143  return dst;
3144}
3145
3146
3147LogicVRegister Simulator::smull(VectorFormat vform,
3148                                LogicVRegister dst,
3149                                const LogicVRegister& src1,
3150                                const LogicVRegister& src2) {
3151  SimVRegister temp1, temp2;
3152  sxtl(vform, temp1, src1);
3153  sxtl(vform, temp2, src2);
3154  mul(vform, dst, temp1, temp2);
3155  return dst;
3156}
3157
3158
3159LogicVRegister Simulator::smull2(VectorFormat vform,
3160                                 LogicVRegister dst,
3161                                 const LogicVRegister& src1,
3162                                 const LogicVRegister& src2) {
3163  SimVRegister temp1, temp2;
3164  sxtl2(vform, temp1, src1);
3165  sxtl2(vform, temp2, src2);
3166  mul(vform, dst, temp1, temp2);
3167  return dst;
3168}
3169
3170
3171LogicVRegister Simulator::umlsl(VectorFormat vform,
3172                                LogicVRegister dst,
3173                                const LogicVRegister& src1,
3174                                const LogicVRegister& src2) {
3175  SimVRegister temp1, temp2;
3176  uxtl(vform, temp1, src1);
3177  uxtl(vform, temp2, src2);
3178  mls(vform, dst, temp1, temp2);
3179  return dst;
3180}
3181
3182
3183LogicVRegister Simulator::umlsl2(VectorFormat vform,
3184                                 LogicVRegister dst,
3185                                 const LogicVRegister& src1,
3186                                 const LogicVRegister& src2) {
3187  SimVRegister temp1, temp2;
3188  uxtl2(vform, temp1, src1);
3189  uxtl2(vform, temp2, src2);
3190  mls(vform, dst, temp1, temp2);
3191  return dst;
3192}
3193
3194
3195LogicVRegister Simulator::smlsl(VectorFormat vform,
3196                                LogicVRegister dst,
3197                                const LogicVRegister& src1,
3198                                const LogicVRegister& src2) {
3199  SimVRegister temp1, temp2;
3200  sxtl(vform, temp1, src1);
3201  sxtl(vform, temp2, src2);
3202  mls(vform, dst, temp1, temp2);
3203  return dst;
3204}
3205
3206
3207LogicVRegister Simulator::smlsl2(VectorFormat vform,
3208                                 LogicVRegister dst,
3209                                 const LogicVRegister& src1,
3210                                 const LogicVRegister& src2) {
3211  SimVRegister temp1, temp2;
3212  sxtl2(vform, temp1, src1);
3213  sxtl2(vform, temp2, src2);
3214  mls(vform, dst, temp1, temp2);
3215  return dst;
3216}
3217
3218
3219LogicVRegister Simulator::umlal(VectorFormat vform,
3220                                LogicVRegister dst,
3221                                const LogicVRegister& src1,
3222                                const LogicVRegister& src2) {
3223  SimVRegister temp1, temp2;
3224  uxtl(vform, temp1, src1);
3225  uxtl(vform, temp2, src2);
3226  mla(vform, dst, temp1, temp2);
3227  return dst;
3228}
3229
3230
3231LogicVRegister Simulator::umlal2(VectorFormat vform,
3232                                 LogicVRegister dst,
3233                                 const LogicVRegister& src1,
3234                                 const LogicVRegister& src2) {
3235  SimVRegister temp1, temp2;
3236  uxtl2(vform, temp1, src1);
3237  uxtl2(vform, temp2, src2);
3238  mla(vform, dst, temp1, temp2);
3239  return dst;
3240}
3241
3242
3243LogicVRegister Simulator::smlal(VectorFormat vform,
3244                                LogicVRegister dst,
3245                                const LogicVRegister& src1,
3246                                const LogicVRegister& src2) {
3247  SimVRegister temp1, temp2;
3248  sxtl(vform, temp1, src1);
3249  sxtl(vform, temp2, src2);
3250  mla(vform, dst, temp1, temp2);
3251  return dst;
3252}
3253
3254
3255LogicVRegister Simulator::smlal2(VectorFormat vform,
3256                                 LogicVRegister dst,
3257                                 const LogicVRegister& src1,
3258                                 const LogicVRegister& src2) {
3259  SimVRegister temp1, temp2;
3260  sxtl2(vform, temp1, src1);
3261  sxtl2(vform, temp2, src2);
3262  mla(vform, dst, temp1, temp2);
3263  return dst;
3264}
3265
3266
3267LogicVRegister Simulator::sqdmlal(VectorFormat vform,
3268                                  LogicVRegister dst,
3269                                  const LogicVRegister& src1,
3270                                  const LogicVRegister& src2) {
3271  SimVRegister temp;
3272  LogicVRegister product = sqdmull(vform, temp, src1, src2);
3273  return add(vform, dst, dst, product).SignedSaturate(vform);
3274}
3275
3276
3277LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
3278                                   LogicVRegister dst,
3279                                   const LogicVRegister& src1,
3280                                   const LogicVRegister& src2) {
3281  SimVRegister temp;
3282  LogicVRegister product = sqdmull2(vform, temp, src1, src2);
3283  return add(vform, dst, dst, product).SignedSaturate(vform);
3284}
3285
3286
3287LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
3288                                  LogicVRegister dst,
3289                                  const LogicVRegister& src1,
3290                                  const LogicVRegister& src2) {
3291  SimVRegister temp;
3292  LogicVRegister product = sqdmull(vform, temp, src1, src2);
3293  return sub(vform, dst, dst, product).SignedSaturate(vform);
3294}
3295
3296
3297LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
3298                                   LogicVRegister dst,
3299                                   const LogicVRegister& src1,
3300                                   const LogicVRegister& src2) {
3301  SimVRegister temp;
3302  LogicVRegister product = sqdmull2(vform, temp, src1, src2);
3303  return sub(vform, dst, dst, product).SignedSaturate(vform);
3304}
3305
3306
3307LogicVRegister Simulator::sqdmull(VectorFormat vform,
3308                                  LogicVRegister dst,
3309                                  const LogicVRegister& src1,
3310                                  const LogicVRegister& src2) {
3311  SimVRegister temp;
3312  LogicVRegister product = smull(vform, temp, src1, src2);
3313  return add(vform, dst, product, product).SignedSaturate(vform);
3314}
3315
3316
3317LogicVRegister Simulator::sqdmull2(VectorFormat vform,
3318                                   LogicVRegister dst,
3319                                   const LogicVRegister& src1,
3320                                   const LogicVRegister& src2) {
3321  SimVRegister temp;
3322  LogicVRegister product = smull2(vform, temp, src1, src2);
3323  return add(vform, dst, product, product).SignedSaturate(vform);
3324}
3325
3326
3327LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
3328                                   LogicVRegister dst,
3329                                   const LogicVRegister& src1,
3330                                   const LogicVRegister& src2,
3331                                   bool round) {
3332  // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
3333  // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
3334  // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.
3335
3336  int esize = LaneSizeInBitsFromFormat(vform);
3337  int round_const = round ? (1 << (esize - 2)) : 0;
3338  int64_t product;
3339
3340  dst.ClearForWrite(vform);
3341  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3342    product = src1.Int(vform, i) * src2.Int(vform, i);
3343    product += round_const;
3344    product = product >> (esize - 1);
3345
3346    if (product > MaxIntFromFormat(vform)) {
3347      product = MaxIntFromFormat(vform);
3348    } else if (product < MinIntFromFormat(vform)) {
3349      product = MinIntFromFormat(vform);
3350    }
3351    dst.SetInt(vform, i, product);
3352  }
3353  return dst;
3354}
3355
3356
3357LogicVRegister Simulator::sqdmulh(VectorFormat vform,
3358                                  LogicVRegister dst,
3359                                  const LogicVRegister& src1,
3360                                  const LogicVRegister& src2) {
3361  return sqrdmulh(vform, dst, src1, src2, false);
3362}
3363
3364
3365LogicVRegister Simulator::addhn(VectorFormat vform,
3366                                LogicVRegister dst,
3367                                const LogicVRegister& src1,
3368                                const LogicVRegister& src2) {
3369  SimVRegister temp;
3370  add(VectorFormatDoubleWidth(vform), temp, src1, src2);
3371  shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3372  return dst;
3373}
3374
3375
3376LogicVRegister Simulator::addhn2(VectorFormat vform,
3377                                 LogicVRegister dst,
3378                                 const LogicVRegister& src1,
3379                                 const LogicVRegister& src2) {
3380  SimVRegister temp;
3381  add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3382  shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3383  return dst;
3384}
3385
3386
3387LogicVRegister Simulator::raddhn(VectorFormat vform,
3388                                 LogicVRegister dst,
3389                                 const LogicVRegister& src1,
3390                                 const LogicVRegister& src2) {
3391  SimVRegister temp;
3392  add(VectorFormatDoubleWidth(vform), temp, src1, src2);
3393  rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3394  return dst;
3395}
3396
3397
3398LogicVRegister Simulator::raddhn2(VectorFormat vform,
3399                                  LogicVRegister dst,
3400                                  const LogicVRegister& src1,
3401                                  const LogicVRegister& src2) {
3402  SimVRegister temp;
3403  add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3404  rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3405  return dst;
3406}
3407
3408
3409LogicVRegister Simulator::subhn(VectorFormat vform,
3410                                LogicVRegister dst,
3411                                const LogicVRegister& src1,
3412                                const LogicVRegister& src2) {
3413  SimVRegister temp;
3414  sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
3415  shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3416  return dst;
3417}
3418
3419
3420LogicVRegister Simulator::subhn2(VectorFormat vform,
3421                                 LogicVRegister dst,
3422                                 const LogicVRegister& src1,
3423                                 const LogicVRegister& src2) {
3424  SimVRegister temp;
3425  sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3426  shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3427  return dst;
3428}
3429
3430
3431LogicVRegister Simulator::rsubhn(VectorFormat vform,
3432                                 LogicVRegister dst,
3433                                 const LogicVRegister& src1,
3434                                 const LogicVRegister& src2) {
3435  SimVRegister temp;
3436  sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
3437  rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3438  return dst;
3439}
3440
3441
3442LogicVRegister Simulator::rsubhn2(VectorFormat vform,
3443                                  LogicVRegister dst,
3444                                  const LogicVRegister& src1,
3445                                  const LogicVRegister& src2) {
3446  SimVRegister temp;
3447  sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3448  rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3449  return dst;
3450}
3451
3452
3453LogicVRegister Simulator::trn1(VectorFormat vform,
3454                               LogicVRegister dst,
3455                               const LogicVRegister& src1,
3456                               const LogicVRegister& src2) {
3457  uint64_t result[16];
3458  int laneCount = LaneCountFromFormat(vform);
3459  int pairs = laneCount / 2;
3460  for (int i = 0; i < pairs; ++i) {
3461    result[2 * i] = src1.Uint(vform, 2 * i);
3462    result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
3463  }
3464
3465  dst.ClearForWrite(vform);
3466  for (int i = 0; i < laneCount; ++i) {
3467    dst.SetUint(vform, i, result[i]);
3468  }
3469  return dst;
3470}
3471
3472
3473LogicVRegister Simulator::trn2(VectorFormat vform,
3474                               LogicVRegister dst,
3475                               const LogicVRegister& src1,
3476                               const LogicVRegister& src2) {
3477  uint64_t result[16];
3478  int laneCount = LaneCountFromFormat(vform);
3479  int pairs = laneCount / 2;
3480  for (int i = 0; i < pairs; ++i) {
3481    result[2 * i] = src1.Uint(vform, (2 * i) + 1);
3482    result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
3483  }
3484
3485  dst.ClearForWrite(vform);
3486  for (int i = 0; i < laneCount; ++i) {
3487    dst.SetUint(vform, i, result[i]);
3488  }
3489  return dst;
3490}
3491
3492
3493LogicVRegister Simulator::zip1(VectorFormat vform,
3494                               LogicVRegister dst,
3495                               const LogicVRegister& src1,
3496                               const LogicVRegister& src2) {
3497  uint64_t result[16];
3498  int laneCount = LaneCountFromFormat(vform);
3499  int pairs = laneCount / 2;
3500  for (int i = 0; i < pairs; ++i) {
3501    result[2 * i] = src1.Uint(vform, i);
3502    result[(2 * i) + 1] = src2.Uint(vform, i);
3503  }
3504
3505  dst.ClearForWrite(vform);
3506  for (int i = 0; i < laneCount; ++i) {
3507    dst.SetUint(vform, i, result[i]);
3508  }
3509  return dst;
3510}
3511
3512
3513LogicVRegister Simulator::zip2(VectorFormat vform,
3514                               LogicVRegister dst,
3515                               const LogicVRegister& src1,
3516                               const LogicVRegister& src2) {
3517  uint64_t result[16];
3518  int laneCount = LaneCountFromFormat(vform);
3519  int pairs = laneCount / 2;
3520  for (int i = 0; i < pairs; ++i) {
3521    result[2 * i] = src1.Uint(vform, pairs + i);
3522    result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
3523  }
3524
3525  dst.ClearForWrite(vform);
3526  for (int i = 0; i < laneCount; ++i) {
3527    dst.SetUint(vform, i, result[i]);
3528  }
3529  return dst;
3530}
3531
3532
3533LogicVRegister Simulator::uzp1(VectorFormat vform,
3534                               LogicVRegister dst,
3535                               const LogicVRegister& src1,
3536                               const LogicVRegister& src2) {
3537  uint64_t result[32];
3538  int laneCount = LaneCountFromFormat(vform);
3539  for (int i = 0; i < laneCount; ++i) {
3540    result[i] = src1.Uint(vform, i);
3541    result[laneCount + i] = src2.Uint(vform, i);
3542  }
3543
3544  dst.ClearForWrite(vform);
3545  for (int i = 0; i < laneCount; ++i) {
3546    dst.SetUint(vform, i, result[2 * i]);
3547  }
3548  return dst;
3549}
3550
3551
3552LogicVRegister Simulator::uzp2(VectorFormat vform,
3553                               LogicVRegister dst,
3554                               const LogicVRegister& src1,
3555                               const LogicVRegister& src2) {
3556  uint64_t result[32];
3557  int laneCount = LaneCountFromFormat(vform);
3558  for (int i = 0; i < laneCount; ++i) {
3559    result[i] = src1.Uint(vform, i);
3560    result[laneCount + i] = src2.Uint(vform, i);
3561  }
3562
3563  dst.ClearForWrite(vform);
3564  for (int i = 0; i < laneCount; ++i) {
3565    dst.SetUint(vform, i, result[(2 * i) + 1]);
3566  }
3567  return dst;
3568}
3569
3570
3571template <typename T>
3572T Simulator::FPAdd(T op1, T op2) {
3573  T result = FPProcessNaNs(op1, op2);
3574  if (std::isnan(result)) return result;
3575
3576  if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) {
3577    // inf + -inf returns the default NaN.
3578    FPProcessException();
3579    return FPDefaultNaN<T>();
3580  } else {
3581    // Other cases should be handled by standard arithmetic.
3582    return op1 + op2;
3583  }
3584}
3585
3586
3587template <typename T>
3588T Simulator::FPSub(T op1, T op2) {
3589  // NaNs should be handled elsewhere.
3590  VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
3591
3592  if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) {
3593    // inf - inf returns the default NaN.
3594    FPProcessException();
3595    return FPDefaultNaN<T>();
3596  } else {
3597    // Other cases should be handled by standard arithmetic.
3598    return op1 - op2;
3599  }
3600}
3601
3602
3603template <typename T>
3604T Simulator::FPMul(T op1, T op2) {
3605  // NaNs should be handled elsewhere.
3606  VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
3607
3608  if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
3609    // inf * 0.0 returns the default NaN.
3610    FPProcessException();
3611    return FPDefaultNaN<T>();
3612  } else {
3613    // Other cases should be handled by standard arithmetic.
3614    return op1 * op2;
3615  }
3616}
3617
3618
3619template <typename T>
3620T Simulator::FPMulx(T op1, T op2) {
3621  if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
3622    // inf * 0.0 returns +/-2.0.
3623    T two = 2.0;
3624    return copysign(1.0, op1) * copysign(1.0, op2) * two;
3625  }
3626  return FPMul(op1, op2);
3627}
3628
3629
3630template <typename T>
3631T Simulator::FPMulAdd(T a, T op1, T op2) {
3632  T result = FPProcessNaNs3(a, op1, op2);
3633
3634  T sign_a = copysign(1.0, a);
3635  T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
3636  bool isinf_prod = std::isinf(op1) || std::isinf(op2);
3637  bool operation_generates_nan =
3638      (std::isinf(op1) && (op2 == 0.0)) ||                     // inf * 0.0
3639      (std::isinf(op2) && (op1 == 0.0)) ||                     // 0.0 * inf
3640      (std::isinf(a) && isinf_prod && (sign_a != sign_prod));  // inf - inf
3641
3642  if (std::isnan(result)) {
3643    // Generated NaNs override quiet NaNs propagated from a.
3644    if (operation_generates_nan && IsQuietNaN(a)) {
3645      FPProcessException();
3646      return FPDefaultNaN<T>();
3647    } else {
3648      return result;
3649    }
3650  }
3651
3652  // If the operation would produce a NaN, return the default NaN.
3653  if (operation_generates_nan) {
3654    FPProcessException();
3655    return FPDefaultNaN<T>();
3656  }
3657
3658  // Work around broken fma implementations for exact zero results: The sign of
3659  // exact 0.0 results is positive unless both a and op1 * op2 are negative.
3660  if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
3661    return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0;
3662  }
3663
3664  result = FusedMultiplyAdd(op1, op2, a);
3665  VIXL_ASSERT(!std::isnan(result));
3666
3667  // Work around broken fma implementations for rounded zero results: If a is
3668  // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
3669  if ((a == 0.0) && (result == 0.0)) {
3670    return copysign(0.0, sign_prod);
3671  }
3672
3673  return result;
3674}
3675
3676
3677template <typename T>
3678T Simulator::FPDiv(T op1, T op2) {
3679  // NaNs should be handled elsewhere.
3680  VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
3681
3682  if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
3683    // inf / inf and 0.0 / 0.0 return the default NaN.
3684    FPProcessException();
3685    return FPDefaultNaN<T>();
3686  } else {
3687    if (op2 == 0.0) {
3688      FPProcessException();
3689      if (!std::isnan(op1)) {
3690        double op1_sign = copysign(1.0, op1);
3691        double op2_sign = copysign(1.0, op2);
3692        return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);
3693      }
3694    }
3695
3696    // Other cases should be handled by standard arithmetic.
3697    return op1 / op2;
3698  }
3699}
3700
3701
3702template <typename T>
3703T Simulator::FPSqrt(T op) {
3704  if (std::isnan(op)) {
3705    return FPProcessNaN(op);
3706  } else if (op < 0.0) {
3707    FPProcessException();
3708    return FPDefaultNaN<T>();
3709  } else {
3710    return sqrt(op);
3711  }
3712}
3713
3714
3715template <typename T>
3716T Simulator::FPMax(T a, T b) {
3717  T result = FPProcessNaNs(a, b);
3718  if (std::isnan(result)) return result;
3719
3720  if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
3721    // a and b are zero, and the sign differs: return +0.0.
3722    return 0.0;
3723  } else {
3724    return (a > b) ? a : b;
3725  }
3726}
3727
3728
3729template <typename T>
3730T Simulator::FPMaxNM(T a, T b) {
3731  if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3732    a = kFP64NegativeInfinity;
3733  } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3734    b = kFP64NegativeInfinity;
3735  }
3736
3737  T result = FPProcessNaNs(a, b);
3738  return std::isnan(result) ? result : FPMax(a, b);
3739}
3740
3741
3742template <typename T>
3743T Simulator::FPMin(T a, T b) {
3744  T result = FPProcessNaNs(a, b);
3745  if (std::isnan(result)) return result;
3746
3747  if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
3748    // a and b are zero, and the sign differs: return -0.0.
3749    return -0.0;
3750  } else {
3751    return (a < b) ? a : b;
3752  }
3753}
3754
3755
3756template <typename T>
3757T Simulator::FPMinNM(T a, T b) {
3758  if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3759    a = kFP64PositiveInfinity;
3760  } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3761    b = kFP64PositiveInfinity;
3762  }
3763
3764  T result = FPProcessNaNs(a, b);
3765  return std::isnan(result) ? result : FPMin(a, b);
3766}
3767
3768
3769template <typename T>
3770T Simulator::FPRecipStepFused(T op1, T op2) {
3771  const T two = 2.0;
3772  if ((std::isinf(op1) && (op2 == 0.0)) ||
3773      ((op1 == 0.0) && (std::isinf(op2)))) {
3774    return two;
3775  } else if (std::isinf(op1) || std::isinf(op2)) {
3776    // Return +inf if signs match, otherwise -inf.
3777    return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3778                                          : kFP64NegativeInfinity;
3779  } else {
3780    return FusedMultiplyAdd(op1, op2, two);
3781  }
3782}
3783
3784
3785template <typename T>
3786T Simulator::FPRSqrtStepFused(T op1, T op2) {
3787  const T one_point_five = 1.5;
3788  const T two = 2.0;
3789
3790  if ((std::isinf(op1) && (op2 == 0.0)) ||
3791      ((op1 == 0.0) && (std::isinf(op2)))) {
3792    return one_point_five;
3793  } else if (std::isinf(op1) || std::isinf(op2)) {
3794    // Return +inf if signs match, otherwise -inf.
3795    return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3796                                          : kFP64NegativeInfinity;
3797  } else {
3798    // The multiply-add-halve operation must be fully fused, so avoid interim
3799    // rounding by checking which operand can be losslessly divided by two
3800    // before doing the multiply-add.
3801    if (std::isnormal(op1 / two)) {
3802      return FusedMultiplyAdd(op1 / two, op2, one_point_five);
3803    } else if (std::isnormal(op2 / two)) {
3804      return FusedMultiplyAdd(op1, op2 / two, one_point_five);
3805    } else {
3806      // Neither operand is normal after halving: the result is dominated by
3807      // the addition term, so just return that.
3808      return one_point_five;
3809    }
3810  }
3811}
3812
3813
3814double Simulator::FPRoundInt(double value, FPRounding round_mode) {
3815  if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
3816      (value == kFP64NegativeInfinity)) {
3817    return value;
3818  } else if (std::isnan(value)) {
3819    return FPProcessNaN(value);
3820  }
3821
3822  double int_result = std::floor(value);
3823  double error = value - int_result;
3824  switch (round_mode) {
3825    case FPTieAway: {
3826      // Take care of correctly handling the range ]-0.5, -0.0], which must
3827      // yield -0.0.
3828      if ((-0.5 < value) && (value < 0.0)) {
3829        int_result = -0.0;
3830
3831      } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
3832        // If the error is greater than 0.5, or is equal to 0.5 and the integer
3833        // result is positive, round up.
3834        int_result++;
3835      }
3836      break;
3837    }
3838    case FPTieEven: {
3839      // Take care of correctly handling the range [-0.5, -0.0], which must
3840      // yield -0.0.
3841      if ((-0.5 <= value) && (value < 0.0)) {
3842        int_result = -0.0;
3843
3844        // If the error is greater than 0.5, or is equal to 0.5 and the integer
3845        // result is odd, round up.
3846      } else if ((error > 0.5) ||
3847                 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
3848        int_result++;
3849      }
3850      break;
3851    }
3852    case FPZero: {
3853      // If value>0 then we take floor(value)
3854      // otherwise, ceil(value).
3855      if (value < 0) {
3856        int_result = ceil(value);
3857      }
3858      break;
3859    }
3860    case FPNegativeInfinity: {
3861      // We always use floor(value).
3862      break;
3863    }
3864    case FPPositiveInfinity: {
3865      // Take care of correctly handling the range ]-1.0, -0.0], which must
3866      // yield -0.0.
3867      if ((-1.0 < value) && (value < 0.0)) {
3868        int_result = -0.0;
3869
3870        // If the error is non-zero, round up.
3871      } else if (error > 0.0) {
3872        int_result++;
3873      }
3874      break;
3875    }
3876    default:
3877      VIXL_UNIMPLEMENTED();
3878  }
3879  return int_result;
3880}
3881
3882
3883int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
3884  value = FPRoundInt(value, rmode);
3885  if (value >= kWMaxInt) {
3886    return kWMaxInt;
3887  } else if (value < kWMinInt) {
3888    return kWMinInt;
3889  }
3890  return std::isnan(value) ? 0 : static_cast<int32_t>(value);
3891}
3892
3893
3894int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
3895  value = FPRoundInt(value, rmode);
3896  if (value >= kXMaxInt) {
3897    return kXMaxInt;
3898  } else if (value < kXMinInt) {
3899    return kXMinInt;
3900  }
3901  return std::isnan(value) ? 0 : static_cast<int64_t>(value);
3902}
3903
3904
3905uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
3906  value = FPRoundInt(value, rmode);
3907  if (value >= kWMaxUInt) {
3908    return kWMaxUInt;
3909  } else if (value < 0.0) {
3910    return 0;
3911  }
3912  return std::isnan(value) ? 0 : static_cast<uint32_t>(value);
3913}
3914
3915
3916uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
3917  value = FPRoundInt(value, rmode);
3918  if (value >= kXMaxUInt) {
3919    return kXMaxUInt;
3920  } else if (value < 0.0) {
3921    return 0;
3922  }
3923  return std::isnan(value) ? 0 : static_cast<uint64_t>(value);
3924}
3925
3926
3927#define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN)                \
3928  template <typename T>                                          \
3929  LogicVRegister Simulator::FN(VectorFormat vform,               \
3930                               LogicVRegister dst,               \
3931                               const LogicVRegister& src1,       \
3932                               const LogicVRegister& src2) {     \
3933    dst.ClearForWrite(vform);                                    \
3934    for (int i = 0; i < LaneCountFromFormat(vform); i++) {       \
3935      T op1 = src1.Float<T>(i);                                  \
3936      T op2 = src2.Float<T>(i);                                  \
3937      T result;                                                  \
3938      if (PROCNAN) {                                             \
3939        result = FPProcessNaNs(op1, op2);                        \
3940        if (!std::isnan(result)) {                               \
3941          result = OP(op1, op2);                                 \
3942        }                                                        \
3943      } else {                                                   \
3944        result = OP(op1, op2);                                   \
3945      }                                                          \
3946      dst.SetFloat(i, result);                                   \
3947    }                                                            \
3948    return dst;                                                  \
3949  }                                                              \
3950                                                                 \
3951  LogicVRegister Simulator::FN(VectorFormat vform,               \
3952                               LogicVRegister dst,               \
3953                               const LogicVRegister& src1,       \
3954                               const LogicVRegister& src2) {     \
3955    if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {          \
3956      FN<float>(vform, dst, src1, src2);                         \
3957    } else {                                                     \
3958      VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \
3959      FN<double>(vform, dst, src1, src2);                        \
3960    }                                                            \
3961    return dst;                                                  \
3962  }
3963NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
3964#undef DEFINE_NEON_FP_VECTOR_OP
3965
3966
3967LogicVRegister Simulator::fnmul(VectorFormat vform,
3968                                LogicVRegister dst,
3969                                const LogicVRegister& src1,
3970                                const LogicVRegister& src2) {
3971  SimVRegister temp;
3972  LogicVRegister product = fmul(vform, temp, src1, src2);
3973  return fneg(vform, dst, product);
3974}
3975
3976
3977template <typename T>
3978LogicVRegister Simulator::frecps(VectorFormat vform,
3979                                 LogicVRegister dst,
3980                                 const LogicVRegister& src1,
3981                                 const LogicVRegister& src2) {
3982  dst.ClearForWrite(vform);
3983  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3984    T op1 = -src1.Float<T>(i);
3985    T op2 = src2.Float<T>(i);
3986    T result = FPProcessNaNs(op1, op2);
3987    dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2));
3988  }
3989  return dst;
3990}
3991
3992
3993LogicVRegister Simulator::frecps(VectorFormat vform,
3994                                 LogicVRegister dst,
3995                                 const LogicVRegister& src1,
3996                                 const LogicVRegister& src2) {
3997  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
3998    frecps<float>(vform, dst, src1, src2);
3999  } else {
4000    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4001    frecps<double>(vform, dst, src1, src2);
4002  }
4003  return dst;
4004}
4005
4006
4007template <typename T>
4008LogicVRegister Simulator::frsqrts(VectorFormat vform,
4009                                  LogicVRegister dst,
4010                                  const LogicVRegister& src1,
4011                                  const LogicVRegister& src2) {
4012  dst.ClearForWrite(vform);
4013  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4014    T op1 = -src1.Float<T>(i);
4015    T op2 = src2.Float<T>(i);
4016    T result = FPProcessNaNs(op1, op2);
4017    dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2));
4018  }
4019  return dst;
4020}
4021
4022
4023LogicVRegister Simulator::frsqrts(VectorFormat vform,
4024                                  LogicVRegister dst,
4025                                  const LogicVRegister& src1,
4026                                  const LogicVRegister& src2) {
4027  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4028    frsqrts<float>(vform, dst, src1, src2);
4029  } else {
4030    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4031    frsqrts<double>(vform, dst, src1, src2);
4032  }
4033  return dst;
4034}
4035
4036
4037template <typename T>
4038LogicVRegister Simulator::fcmp(VectorFormat vform,
4039                               LogicVRegister dst,
4040                               const LogicVRegister& src1,
4041                               const LogicVRegister& src2,
4042                               Condition cond) {
4043  dst.ClearForWrite(vform);
4044  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4045    bool result = false;
4046    T op1 = src1.Float<T>(i);
4047    T op2 = src2.Float<T>(i);
4048    T nan_result = FPProcessNaNs(op1, op2);
4049    if (!std::isnan(nan_result)) {
4050      switch (cond) {
4051        case eq:
4052          result = (op1 == op2);
4053          break;
4054        case ge:
4055          result = (op1 >= op2);
4056          break;
4057        case gt:
4058          result = (op1 > op2);
4059          break;
4060        case le:
4061          result = (op1 <= op2);
4062          break;
4063        case lt:
4064          result = (op1 < op2);
4065          break;
4066        default:
4067          VIXL_UNREACHABLE();
4068          break;
4069      }
4070    }
4071    dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
4072  }
4073  return dst;
4074}
4075
4076
4077LogicVRegister Simulator::fcmp(VectorFormat vform,
4078                               LogicVRegister dst,
4079                               const LogicVRegister& src1,
4080                               const LogicVRegister& src2,
4081                               Condition cond) {
4082  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4083    fcmp<float>(vform, dst, src1, src2, cond);
4084  } else {
4085    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4086    fcmp<double>(vform, dst, src1, src2, cond);
4087  }
4088  return dst;
4089}
4090
4091
4092LogicVRegister Simulator::fcmp_zero(VectorFormat vform,
4093                                    LogicVRegister dst,
4094                                    const LogicVRegister& src,
4095                                    Condition cond) {
4096  SimVRegister temp;
4097  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4098    LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0));
4099    fcmp<float>(vform, dst, src, zero_reg, cond);
4100  } else {
4101    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4102    LogicVRegister zero_reg = dup_immediate(vform, temp, DoubleToRawbits(0.0));
4103    fcmp<double>(vform, dst, src, zero_reg, cond);
4104  }
4105  return dst;
4106}
4107
4108
4109LogicVRegister Simulator::fabscmp(VectorFormat vform,
4110                                  LogicVRegister dst,
4111                                  const LogicVRegister& src1,
4112                                  const LogicVRegister& src2,
4113                                  Condition cond) {
4114  SimVRegister temp1, temp2;
4115  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4116    LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
4117    LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
4118    fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
4119  } else {
4120    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4121    LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
4122    LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
4123    fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
4124  }
4125  return dst;
4126}
4127
4128
4129template <typename T>
4130LogicVRegister Simulator::fmla(VectorFormat vform,
4131                               LogicVRegister dst,
4132                               const LogicVRegister& src1,
4133                               const LogicVRegister& src2) {
4134  dst.ClearForWrite(vform);
4135  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4136    T op1 = src1.Float<T>(i);
4137    T op2 = src2.Float<T>(i);
4138    T acc = dst.Float<T>(i);
4139    T result = FPMulAdd(acc, op1, op2);
4140    dst.SetFloat(i, result);
4141  }
4142  return dst;
4143}
4144
4145
4146LogicVRegister Simulator::fmla(VectorFormat vform,
4147                               LogicVRegister dst,
4148                               const LogicVRegister& src1,
4149                               const LogicVRegister& src2) {
4150  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4151    fmla<float>(vform, dst, src1, src2);
4152  } else {
4153    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4154    fmla<double>(vform, dst, src1, src2);
4155  }
4156  return dst;
4157}
4158
4159
4160template <typename T>
4161LogicVRegister Simulator::fmls(VectorFormat vform,
4162                               LogicVRegister dst,
4163                               const LogicVRegister& src1,
4164                               const LogicVRegister& src2) {
4165  dst.ClearForWrite(vform);
4166  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4167    T op1 = -src1.Float<T>(i);
4168    T op2 = src2.Float<T>(i);
4169    T acc = dst.Float<T>(i);
4170    T result = FPMulAdd(acc, op1, op2);
4171    dst.SetFloat(i, result);
4172  }
4173  return dst;
4174}
4175
4176
4177LogicVRegister Simulator::fmls(VectorFormat vform,
4178                               LogicVRegister dst,
4179                               const LogicVRegister& src1,
4180                               const LogicVRegister& src2) {
4181  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4182    fmls<float>(vform, dst, src1, src2);
4183  } else {
4184    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4185    fmls<double>(vform, dst, src1, src2);
4186  }
4187  return dst;
4188}
4189
4190
4191template <typename T>
4192LogicVRegister Simulator::fneg(VectorFormat vform,
4193                               LogicVRegister dst,
4194                               const LogicVRegister& src) {
4195  dst.ClearForWrite(vform);
4196  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4197    T op = src.Float<T>(i);
4198    op = -op;
4199    dst.SetFloat(i, op);
4200  }
4201  return dst;
4202}
4203
4204
4205LogicVRegister Simulator::fneg(VectorFormat vform,
4206                               LogicVRegister dst,
4207                               const LogicVRegister& src) {
4208  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4209    fneg<float>(vform, dst, src);
4210  } else {
4211    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4212    fneg<double>(vform, dst, src);
4213  }
4214  return dst;
4215}
4216
4217
4218template <typename T>
4219LogicVRegister Simulator::fabs_(VectorFormat vform,
4220                                LogicVRegister dst,
4221                                const LogicVRegister& src) {
4222  dst.ClearForWrite(vform);
4223  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4224    T op = src.Float<T>(i);
4225    if (copysign(1.0, op) < 0.0) {
4226      op = -op;
4227    }
4228    dst.SetFloat(i, op);
4229  }
4230  return dst;
4231}
4232
4233
4234LogicVRegister Simulator::fabs_(VectorFormat vform,
4235                                LogicVRegister dst,
4236                                const LogicVRegister& src) {
4237  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4238    fabs_<float>(vform, dst, src);
4239  } else {
4240    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4241    fabs_<double>(vform, dst, src);
4242  }
4243  return dst;
4244}
4245
4246
4247LogicVRegister Simulator::fabd(VectorFormat vform,
4248                               LogicVRegister dst,
4249                               const LogicVRegister& src1,
4250                               const LogicVRegister& src2) {
4251  SimVRegister temp;
4252  fsub(vform, temp, src1, src2);
4253  fabs_(vform, dst, temp);
4254  return dst;
4255}
4256
4257
4258LogicVRegister Simulator::fsqrt(VectorFormat vform,
4259                                LogicVRegister dst,
4260                                const LogicVRegister& src) {
4261  dst.ClearForWrite(vform);
4262  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4263    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4264      float result = FPSqrt(src.Float<float>(i));
4265      dst.SetFloat(i, result);
4266    }
4267  } else {
4268    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4269    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4270      double result = FPSqrt(src.Float<double>(i));
4271      dst.SetFloat(i, result);
4272    }
4273  }
4274  return dst;
4275}
4276
4277
4278#define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP)                           \
4279  LogicVRegister Simulator::FNP(VectorFormat vform,                   \
4280                                LogicVRegister dst,                   \
4281                                const LogicVRegister& src1,           \
4282                                const LogicVRegister& src2) {         \
4283    SimVRegister temp1, temp2;                                        \
4284    uzp1(vform, temp1, src1, src2);                                   \
4285    uzp2(vform, temp2, src1, src2);                                   \
4286    FN(vform, dst, temp1, temp2);                                     \
4287    return dst;                                                       \
4288  }                                                                   \
4289                                                                      \
4290  LogicVRegister Simulator::FNP(VectorFormat vform,                   \
4291                                LogicVRegister dst,                   \
4292                                const LogicVRegister& src) {          \
4293    if (vform == kFormatS) {                                          \
4294      float result = OP(src.Float<float>(0), src.Float<float>(1));    \
4295      dst.SetFloat(0, result);                                        \
4296    } else {                                                          \
4297      VIXL_ASSERT(vform == kFormatD);                                 \
4298      double result = OP(src.Float<double>(0), src.Float<double>(1)); \
4299      dst.SetFloat(0, result);                                        \
4300    }                                                                 \
4301    dst.ClearForWrite(vform);                                         \
4302    return dst;                                                       \
4303  }
4304NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
4305#undef DEFINE_NEON_FP_PAIR_OP
4306
4307
4308LogicVRegister Simulator::fminmaxv(VectorFormat vform,
4309                                   LogicVRegister dst,
4310                                   const LogicVRegister& src,
4311                                   FPMinMaxOp Op) {
4312  VIXL_ASSERT(vform == kFormat4S);
4313  USE(vform);
4314  float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1));
4315  float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3));
4316  float result = (this->*Op)(result1, result2);
4317  dst.ClearForWrite(kFormatS);
4318  dst.SetFloat<float>(0, result);
4319  return dst;
4320}
4321
4322
4323LogicVRegister Simulator::fmaxv(VectorFormat vform,
4324                                LogicVRegister dst,
4325                                const LogicVRegister& src) {
4326  return fminmaxv(vform, dst, src, &Simulator::FPMax);
4327}
4328
4329
4330LogicVRegister Simulator::fminv(VectorFormat vform,
4331                                LogicVRegister dst,
4332                                const LogicVRegister& src) {
4333  return fminmaxv(vform, dst, src, &Simulator::FPMin);
4334}
4335
4336
4337LogicVRegister Simulator::fmaxnmv(VectorFormat vform,
4338                                  LogicVRegister dst,
4339                                  const LogicVRegister& src) {
4340  return fminmaxv(vform, dst, src, &Simulator::FPMaxNM);
4341}
4342
4343
4344LogicVRegister Simulator::fminnmv(VectorFormat vform,
4345                                  LogicVRegister dst,
4346                                  const LogicVRegister& src) {
4347  return fminmaxv(vform, dst, src, &Simulator::FPMinNM);
4348}
4349
4350
4351LogicVRegister Simulator::fmul(VectorFormat vform,
4352                               LogicVRegister dst,
4353                               const LogicVRegister& src1,
4354                               const LogicVRegister& src2,
4355                               int index) {
4356  dst.ClearForWrite(vform);
4357  SimVRegister temp;
4358  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4359    LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4360    fmul<float>(vform, dst, src1, index_reg);
4361
4362  } else {
4363    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4364    LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4365    fmul<double>(vform, dst, src1, index_reg);
4366  }
4367  return dst;
4368}
4369
4370
4371LogicVRegister Simulator::fmla(VectorFormat vform,
4372                               LogicVRegister dst,
4373                               const LogicVRegister& src1,
4374                               const LogicVRegister& src2,
4375                               int index) {
4376  dst.ClearForWrite(vform);
4377  SimVRegister temp;
4378  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4379    LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4380    fmla<float>(vform, dst, src1, index_reg);
4381
4382  } else {
4383    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4384    LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4385    fmla<double>(vform, dst, src1, index_reg);
4386  }
4387  return dst;
4388}
4389
4390
4391LogicVRegister Simulator::fmls(VectorFormat vform,
4392                               LogicVRegister dst,
4393                               const LogicVRegister& src1,
4394                               const LogicVRegister& src2,
4395                               int index) {
4396  dst.ClearForWrite(vform);
4397  SimVRegister temp;
4398  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4399    LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4400    fmls<float>(vform, dst, src1, index_reg);
4401
4402  } else {
4403    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4404    LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4405    fmls<double>(vform, dst, src1, index_reg);
4406  }
4407  return dst;
4408}
4409
4410
4411LogicVRegister Simulator::fmulx(VectorFormat vform,
4412                                LogicVRegister dst,
4413                                const LogicVRegister& src1,
4414                                const LogicVRegister& src2,
4415                                int index) {
4416  dst.ClearForWrite(vform);
4417  SimVRegister temp;
4418  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4419    LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4420    fmulx<float>(vform, dst, src1, index_reg);
4421
4422  } else {
4423    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4424    LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4425    fmulx<double>(vform, dst, src1, index_reg);
4426  }
4427  return dst;
4428}
4429
4430
4431LogicVRegister Simulator::frint(VectorFormat vform,
4432                                LogicVRegister dst,
4433                                const LogicVRegister& src,
4434                                FPRounding rounding_mode,
4435                                bool inexact_exception) {
4436  dst.ClearForWrite(vform);
4437  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4438    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4439      float input = src.Float<float>(i);
4440      float rounded = FPRoundInt(input, rounding_mode);
4441      if (inexact_exception && !std::isnan(input) && (input != rounded)) {
4442        FPProcessException();
4443      }
4444      dst.SetFloat<float>(i, rounded);
4445    }
4446  } else {
4447    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4448    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4449      double input = src.Float<double>(i);
4450      double rounded = FPRoundInt(input, rounding_mode);
4451      if (inexact_exception && !std::isnan(input) && (input != rounded)) {
4452        FPProcessException();
4453      }
4454      dst.SetFloat<double>(i, rounded);
4455    }
4456  }
4457  return dst;
4458}
4459
4460
4461LogicVRegister Simulator::fcvts(VectorFormat vform,
4462                                LogicVRegister dst,
4463                                const LogicVRegister& src,
4464                                FPRounding rounding_mode,
4465                                int fbits) {
4466  dst.ClearForWrite(vform);
4467  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4468    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4469      float op = src.Float<float>(i) * std::pow(2.0f, fbits);
4470      dst.SetInt(vform, i, FPToInt32(op, rounding_mode));
4471    }
4472  } else {
4473    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4474    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4475      double op = src.Float<double>(i) * std::pow(2.0, fbits);
4476      dst.SetInt(vform, i, FPToInt64(op, rounding_mode));
4477    }
4478  }
4479  return dst;
4480}
4481
4482
4483LogicVRegister Simulator::fcvtu(VectorFormat vform,
4484                                LogicVRegister dst,
4485                                const LogicVRegister& src,
4486                                FPRounding rounding_mode,
4487                                int fbits) {
4488  dst.ClearForWrite(vform);
4489  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4490    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4491      float op = src.Float<float>(i) * std::pow(2.0f, fbits);
4492      dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));
4493    }
4494  } else {
4495    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4496    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4497      double op = src.Float<double>(i) * std::pow(2.0, fbits);
4498      dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));
4499    }
4500  }
4501  return dst;
4502}
4503
4504
4505LogicVRegister Simulator::fcvtl(VectorFormat vform,
4506                                LogicVRegister dst,
4507                                const LogicVRegister& src) {
4508  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4509    for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
4510      dst.SetFloat(i, FPToFloat(src.Float<float16>(i)));
4511    }
4512  } else {
4513    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4514    for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
4515      dst.SetFloat(i, FPToDouble(src.Float<float>(i)));
4516    }
4517  }
4518  return dst;
4519}
4520
4521
4522LogicVRegister Simulator::fcvtl2(VectorFormat vform,
4523                                 LogicVRegister dst,
4524                                 const LogicVRegister& src) {
4525  int lane_count = LaneCountFromFormat(vform);
4526  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4527    for (int i = 0; i < lane_count; i++) {
4528      dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count)));
4529    }
4530  } else {
4531    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4532    for (int i = 0; i < lane_count; i++) {
4533      dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count)));
4534    }
4535  }
4536  return dst;
4537}
4538
4539
4540LogicVRegister Simulator::fcvtn(VectorFormat vform,
4541                                LogicVRegister dst,
4542                                const LogicVRegister& src) {
4543  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4544    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4545      dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven));
4546    }
4547  } else {
4548    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4549    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4550      dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven));
4551    }
4552  }
4553  return dst;
4554}
4555
4556
4557LogicVRegister Simulator::fcvtn2(VectorFormat vform,
4558                                 LogicVRegister dst,
4559                                 const LogicVRegister& src) {
4560  int lane_count = LaneCountFromFormat(vform) / 2;
4561  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4562    for (int i = lane_count - 1; i >= 0; i--) {
4563      dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven));
4564    }
4565  } else {
4566    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4567    for (int i = lane_count - 1; i >= 0; i--) {
4568      dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven));
4569    }
4570  }
4571  return dst;
4572}
4573
4574
4575LogicVRegister Simulator::fcvtxn(VectorFormat vform,
4576                                 LogicVRegister dst,
4577                                 const LogicVRegister& src) {
4578  dst.ClearForWrite(vform);
4579  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4580  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4581    dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd));
4582  }
4583  return dst;
4584}
4585
4586
4587LogicVRegister Simulator::fcvtxn2(VectorFormat vform,
4588                                  LogicVRegister dst,
4589                                  const LogicVRegister& src) {
4590  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4591  int lane_count = LaneCountFromFormat(vform) / 2;
4592  for (int i = lane_count - 1; i >= 0; i--) {
4593    dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd));
4594  }
4595  return dst;
4596}
4597
4598
4599// Based on reference C function recip_sqrt_estimate from ARM ARM.
4600double Simulator::recip_sqrt_estimate(double a) {
4601  int q0, q1, s;
4602  double r;
4603  if (a < 0.5) {
4604    q0 = static_cast<int>(a * 512.0);
4605    r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
4606  } else {
4607    q1 = static_cast<int>(a * 256.0);
4608    r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
4609  }
4610  s = static_cast<int>(256.0 * r + 0.5);
4611  return static_cast<double>(s) / 256.0;
4612}
4613
4614
4615static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
4616  return ExtractUnsignedBitfield64(start_bit, end_bit, val);
4617}
4618
4619
4620template <typename T>
4621T Simulator::FPRecipSqrtEstimate(T op) {
4622  if (std::isnan(op)) {
4623    return FPProcessNaN(op);
4624  } else if (op == 0.0) {
4625    if (copysign(1.0, op) < 0.0) {
4626      return kFP64NegativeInfinity;
4627    } else {
4628      return kFP64PositiveInfinity;
4629    }
4630  } else if (copysign(1.0, op) < 0.0) {
4631    FPProcessException();
4632    return FPDefaultNaN<T>();
4633  } else if (std::isinf(op)) {
4634    return 0.0;
4635  } else {
4636    uint64_t fraction;
4637    int exp, result_exp;
4638
4639    if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4640      exp = FloatExp(op);
4641      fraction = FloatMantissa(op);
4642      fraction <<= 29;
4643    } else {
4644      exp = DoubleExp(op);
4645      fraction = DoubleMantissa(op);
4646    }
4647
4648    if (exp == 0) {
4649      while (Bits(fraction, 51, 51) == 0) {
4650        fraction = Bits(fraction, 50, 0) << 1;
4651        exp -= 1;
4652      }
4653      fraction = Bits(fraction, 50, 0) << 1;
4654    }
4655
4656    double scaled;
4657    if (Bits(exp, 0, 0) == 0) {
4658      scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
4659    } else {
4660      scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44);
4661    }
4662
4663    if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4664      result_exp = (380 - exp) / 2;
4665    } else {
4666      result_exp = (3068 - exp) / 2;
4667    }
4668
4669    uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled));
4670
4671    if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4672      uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
4673      uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
4674      return FloatPack(0, exp_bits, est_bits);
4675    } else {
4676      return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
4677    }
4678  }
4679}
4680
4681
4682LogicVRegister Simulator::frsqrte(VectorFormat vform,
4683                                  LogicVRegister dst,
4684                                  const LogicVRegister& src) {
4685  dst.ClearForWrite(vform);
4686  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4687    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4688      float input = src.Float<float>(i);
4689      dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));
4690    }
4691  } else {
4692    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4693    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4694      double input = src.Float<double>(i);
4695      dst.SetFloat(i, FPRecipSqrtEstimate<double>(input));
4696    }
4697  }
4698  return dst;
4699}
4700
4701template <typename T>
4702T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
4703  uint32_t sign;
4704
4705  if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4706    sign = FloatSign(op);
4707  } else {
4708    sign = DoubleSign(op);
4709  }
4710
4711  if (std::isnan(op)) {
4712    return FPProcessNaN(op);
4713  } else if (std::isinf(op)) {
4714    return (sign == 1) ? -0.0 : 0.0;
4715  } else if (op == 0.0) {
4716    FPProcessException();  // FPExc_DivideByZero exception.
4717    return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
4718  } else if (((sizeof(T) == sizeof(float)) &&  // NOLINT(runtime/sizeof)
4719              (std::fabs(op) < std::pow(2.0, -128.0))) ||
4720             ((sizeof(T) == sizeof(double)) &&  // NOLINT(runtime/sizeof)
4721              (std::fabs(op) < std::pow(2.0, -1024.0)))) {
4722    bool overflow_to_inf = false;
4723    switch (rounding) {
4724      case FPTieEven:
4725        overflow_to_inf = true;
4726        break;
4727      case FPPositiveInfinity:
4728        overflow_to_inf = (sign == 0);
4729        break;
4730      case FPNegativeInfinity:
4731        overflow_to_inf = (sign == 1);
4732        break;
4733      case FPZero:
4734        overflow_to_inf = false;
4735        break;
4736      default:
4737        break;
4738    }
4739    FPProcessException();  // FPExc_Overflow and FPExc_Inexact.
4740    if (overflow_to_inf) {
4741      return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
4742    } else {
4743      // Return FPMaxNormal(sign).
4744      if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4745        return FloatPack(sign, 0xfe, 0x07fffff);
4746      } else {
4747        return DoublePack(sign, 0x7fe, 0x0fffffffffffffl);
4748      }
4749    }
4750  } else {
4751    uint64_t fraction;
4752    int exp, result_exp;
4753    uint32_t sign;
4754
4755    if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4756      sign = FloatSign(op);
4757      exp = FloatExp(op);
4758      fraction = FloatMantissa(op);
4759      fraction <<= 29;
4760    } else {
4761      sign = DoubleSign(op);
4762      exp = DoubleExp(op);
4763      fraction = DoubleMantissa(op);
4764    }
4765
4766    if (exp == 0) {
4767      if (Bits(fraction, 51, 51) == 0) {
4768        exp -= 1;
4769        fraction = Bits(fraction, 49, 0) << 2;
4770      } else {
4771        fraction = Bits(fraction, 50, 0) << 1;
4772      }
4773    }
4774
4775    double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
4776
4777    if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4778      result_exp = (253 - exp);        // In range 253-254 = -1 to 253+1 = 254.
4779    } else {
4780      result_exp = (2045 - exp);  // In range 2045-2046 = -1 to 2045+1 = 2046.
4781    }
4782
4783    double estimate = recip_estimate(scaled);
4784
4785    fraction = DoubleMantissa(estimate);
4786    if (result_exp == 0) {
4787      fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
4788    } else if (result_exp == -1) {
4789      fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
4790      result_exp = 0;
4791    }
4792    if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4793      uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
4794      uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
4795      return FloatPack(sign, exp_bits, frac_bits);
4796    } else {
4797      return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
4798    }
4799  }
4800}
4801
4802
4803LogicVRegister Simulator::frecpe(VectorFormat vform,
4804                                 LogicVRegister dst,
4805                                 const LogicVRegister& src,
4806                                 FPRounding round) {
4807  dst.ClearForWrite(vform);
4808  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4809    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4810      float input = src.Float<float>(i);
4811      dst.SetFloat(i, FPRecipEstimate<float>(input, round));
4812    }
4813  } else {
4814    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4815    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4816      double input = src.Float<double>(i);
4817      dst.SetFloat(i, FPRecipEstimate<double>(input, round));
4818    }
4819  }
4820  return dst;
4821}
4822
4823
4824LogicVRegister Simulator::ursqrte(VectorFormat vform,
4825                                  LogicVRegister dst,
4826                                  const LogicVRegister& src) {
4827  dst.ClearForWrite(vform);
4828  uint64_t operand;
4829  uint32_t result;
4830  double dp_operand, dp_result;
4831  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4832    operand = src.Uint(vform, i);
4833    if (operand <= 0x3FFFFFFF) {
4834      result = 0xFFFFFFFF;
4835    } else {
4836      dp_operand = operand * std::pow(2.0, -32);
4837      dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
4838      result = static_cast<uint32_t>(dp_result);
4839    }
4840    dst.SetUint(vform, i, result);
4841  }
4842  return dst;
4843}
4844
4845
4846// Based on reference C function recip_estimate from ARM ARM.
4847double Simulator::recip_estimate(double a) {
4848  int q, s;
4849  double r;
4850  q = static_cast<int>(a * 512.0);
4851  r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
4852  s = static_cast<int>(256.0 * r + 0.5);
4853  return static_cast<double>(s) / 256.0;
4854}
4855
4856
4857LogicVRegister Simulator::urecpe(VectorFormat vform,
4858                                 LogicVRegister dst,
4859                                 const LogicVRegister& src) {
4860  dst.ClearForWrite(vform);
4861  uint64_t operand;
4862  uint32_t result;
4863  double dp_operand, dp_result;
4864  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4865    operand = src.Uint(vform, i);
4866    if (operand <= 0x7FFFFFFF) {
4867      result = 0xFFFFFFFF;
4868    } else {
4869      dp_operand = operand * std::pow(2.0, -32);
4870      dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
4871      result = static_cast<uint32_t>(dp_result);
4872    }
4873    dst.SetUint(vform, i, result);
4874  }
4875  return dst;
4876}
4877
4878template <typename T>
4879LogicVRegister Simulator::frecpx(VectorFormat vform,
4880                                 LogicVRegister dst,
4881                                 const LogicVRegister& src) {
4882  dst.ClearForWrite(vform);
4883  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4884    T op = src.Float<T>(i);
4885    T result;
4886    if (std::isnan(op)) {
4887      result = FPProcessNaN(op);
4888    } else {
4889      int exp;
4890      uint32_t sign;
4891      if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4892        sign = FloatSign(op);
4893        exp = FloatExp(op);
4894        exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
4895        result = FloatPack(sign, exp, 0);
4896      } else {
4897        sign = DoubleSign(op);
4898        exp = DoubleExp(op);
4899        exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
4900        result = DoublePack(sign, exp, 0);
4901      }
4902    }
4903    dst.SetFloat(i, result);
4904  }
4905  return dst;
4906}
4907
4908
4909LogicVRegister Simulator::frecpx(VectorFormat vform,
4910                                 LogicVRegister dst,
4911                                 const LogicVRegister& src) {
4912  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4913    frecpx<float>(vform, dst, src);
4914  } else {
4915    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4916    frecpx<double>(vform, dst, src);
4917  }
4918  return dst;
4919}
4920
4921LogicVRegister Simulator::scvtf(VectorFormat vform,
4922                                LogicVRegister dst,
4923                                const LogicVRegister& src,
4924                                int fbits,
4925                                FPRounding round) {
4926  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4927    if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4928      float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);
4929      dst.SetFloat<float>(i, result);
4930    } else {
4931      VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4932      double result = FixedToDouble(src.Int(kFormatD, i), fbits, round);
4933      dst.SetFloat<double>(i, result);
4934    }
4935  }
4936  return dst;
4937}
4938
4939
4940LogicVRegister Simulator::ucvtf(VectorFormat vform,
4941                                LogicVRegister dst,
4942                                const LogicVRegister& src,
4943                                int fbits,
4944                                FPRounding round) {
4945  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4946    if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4947      float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);
4948      dst.SetFloat<float>(i, result);
4949    } else {
4950      VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4951      double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round);
4952      dst.SetFloat<double>(i, result);
4953    }
4954  }
4955  return dst;
4956}
4957
4958
4959}  // namespace aarch64
4960}  // namespace vixl
4961
4962#endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
4963