logic-aarch64.cc revision b953ea8255b36e27834f17941429cd17af12f6f2
1// Copyright 2015, VIXL authors
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7//   * Redistributions of source code must retain the above copyright notice,
8//     this list of conditions and the following disclaimer.
9//   * Redistributions in binary form must reproduce the above copyright notice,
10//     this list of conditions and the following disclaimer in the documentation
11//     and/or other materials provided with the distribution.
12//   * Neither the name of ARM Limited nor the names of its contributors may be
13//     used to endorse or promote products derived from this software without
14//     specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
28
29#include <cmath>
30
31#include "simulator-aarch64.h"
32
33namespace vixl {
34namespace aarch64 {
35
36template <>
37double Simulator::FPDefaultNaN<double>() {
38  return kFP64DefaultNaN;
39}
40
41
42template <>
43float Simulator::FPDefaultNaN<float>() {
44  return kFP32DefaultNaN;
45}
46
47// See FPRound for a description of this function.
48static inline double FPRoundToDouble(int64_t sign,
49                                     int64_t exponent,
50                                     uint64_t mantissa,
51                                     FPRounding round_mode) {
52  int64_t bits =
53      FPRound<int64_t, kDoubleExponentBits, kDoubleMantissaBits>(sign,
54                                                                 exponent,
55                                                                 mantissa,
56                                                                 round_mode);
57  return RawbitsToDouble(bits);
58}
59
60
61// See FPRound for a description of this function.
62static inline float FPRoundToFloat(int64_t sign,
63                                   int64_t exponent,
64                                   uint64_t mantissa,
65                                   FPRounding round_mode) {
66  int32_t bits =
67      FPRound<int32_t, kFloatExponentBits, kFloatMantissaBits>(sign,
68                                                               exponent,
69                                                               mantissa,
70                                                               round_mode);
71  return RawbitsToFloat(bits);
72}
73
74
75// See FPRound for a description of this function.
76static inline float16 FPRoundToFloat16(int64_t sign,
77                                       int64_t exponent,
78                                       uint64_t mantissa,
79                                       FPRounding round_mode) {
80  return FPRound<float16,
81                 kFloat16ExponentBits,
82                 kFloat16MantissaBits>(sign, exponent, mantissa, round_mode);
83}
84
85
86double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
87  if (src >= 0) {
88    return UFixedToDouble(src, fbits, round);
89  } else {
90    // This works for all negative values, including INT64_MIN.
91    return -UFixedToDouble(-src, fbits, round);
92  }
93}
94
95
96double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
97  // An input of 0 is a special case because the result is effectively
98  // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
99  if (src == 0) {
100    return 0.0;
101  }
102
103  // Calculate the exponent. The highest significant bit will have the value
104  // 2^exponent.
105  const int highest_significant_bit = 63 - CountLeadingZeros(src);
106  const int64_t exponent = highest_significant_bit - fbits;
107
108  return FPRoundToDouble(0, exponent, src, round);
109}
110
111
112float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
113  if (src >= 0) {
114    return UFixedToFloat(src, fbits, round);
115  } else {
116    // This works for all negative values, including INT64_MIN.
117    return -UFixedToFloat(-src, fbits, round);
118  }
119}
120
121
122float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
123  // An input of 0 is a special case because the result is effectively
124  // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
125  if (src == 0) {
126    return 0.0f;
127  }
128
129  // Calculate the exponent. The highest significant bit will have the value
130  // 2^exponent.
131  const int highest_significant_bit = 63 - CountLeadingZeros(src);
132  const int32_t exponent = highest_significant_bit - fbits;
133
134  return FPRoundToFloat(0, exponent, src, round);
135}
136
137
138double Simulator::FPToDouble(float value) {
139  switch (std::fpclassify(value)) {
140    case FP_NAN: {
141      if (IsSignallingNaN(value)) {
142        FPProcessException();
143      }
144      if (ReadDN()) return kFP64DefaultNaN;
145
146      // Convert NaNs as the processor would:
147      //  - The sign is propagated.
148      //  - The payload (mantissa) is transferred entirely, except that the top
149      //    bit is forced to '1', making the result a quiet NaN. The unused
150      //    (low-order) payload bits are set to 0.
151      uint32_t raw = FloatToRawbits(value);
152
153      uint64_t sign = raw >> 31;
154      uint64_t exponent = (1 << 11) - 1;
155      uint64_t payload = ExtractUnsignedBitfield64(21, 0, raw);
156      payload <<= (52 - 23);           // The unused low-order bits should be 0.
157      payload |= (UINT64_C(1) << 51);  // Force a quiet NaN.
158
159      return RawbitsToDouble((sign << 63) | (exponent << 52) | payload);
160    }
161
162    case FP_ZERO:
163    case FP_NORMAL:
164    case FP_SUBNORMAL:
165    case FP_INFINITE: {
166      // All other inputs are preserved in a standard cast, because every value
167      // representable using an IEEE-754 float is also representable using an
168      // IEEE-754 double.
169      return static_cast<double>(value);
170    }
171  }
172
173  VIXL_UNREACHABLE();
174  return static_cast<double>(value);
175}
176
177
178float Simulator::FPToFloat(float16 value) {
179  uint32_t sign = value >> 15;
180  uint32_t exponent =
181      ExtractUnsignedBitfield32(kFloat16MantissaBits + kFloat16ExponentBits - 1,
182                                kFloat16MantissaBits,
183                                value);
184  uint32_t mantissa =
185      ExtractUnsignedBitfield32(kFloat16MantissaBits - 1, 0, value);
186
187  switch (Float16Classify(value)) {
188    case FP_ZERO:
189      return (sign == 0) ? 0.0f : -0.0f;
190
191    case FP_INFINITE:
192      return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
193
194    case FP_SUBNORMAL: {
195      // Calculate shift required to put mantissa into the most-significant bits
196      // of the destination mantissa.
197      int shift = CountLeadingZeros(mantissa << (32 - 10));
198
199      // Shift mantissa and discard implicit '1'.
200      mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
201      mantissa &= (1 << kFloatMantissaBits) - 1;
202
203      // Adjust the exponent for the shift applied, and rebias.
204      exponent = exponent - shift + (-15 + 127);
205      break;
206    }
207
208    case FP_NAN:
209      if (IsSignallingNaN(value)) {
210        FPProcessException();
211      }
212      if (ReadDN()) return kFP32DefaultNaN;
213
214      // Convert NaNs as the processor would:
215      //  - The sign is propagated.
216      //  - The payload (mantissa) is transferred entirely, except that the top
217      //    bit is forced to '1', making the result a quiet NaN. The unused
218      //    (low-order) payload bits are set to 0.
219      exponent = (1 << kFloatExponentBits) - 1;
220
221      // Increase bits in mantissa, making low-order bits 0.
222      mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
223      mantissa |= 1 << 22;  // Force a quiet NaN.
224      break;
225
226    case FP_NORMAL:
227      // Increase bits in mantissa, making low-order bits 0.
228      mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
229
230      // Change exponent bias.
231      exponent += (-15 + 127);
232      break;
233
234    default:
235      VIXL_UNREACHABLE();
236  }
237  return RawbitsToFloat((sign << 31) | (exponent << kFloatMantissaBits) |
238                        mantissa);
239}
240
241
242float16 Simulator::FPToFloat16(float value, FPRounding round_mode) {
243  // Only the FPTieEven rounding mode is implemented.
244  VIXL_ASSERT(round_mode == FPTieEven);
245  USE(round_mode);
246
247  uint32_t raw = FloatToRawbits(value);
248  int32_t sign = raw >> 31;
249  int32_t exponent = ExtractUnsignedBitfield32(30, 23, raw) - 127;
250  uint32_t mantissa = ExtractUnsignedBitfield32(22, 0, raw);
251
252  switch (std::fpclassify(value)) {
253    case FP_NAN: {
254      if (IsSignallingNaN(value)) {
255        FPProcessException();
256      }
257      if (ReadDN()) return kFP16DefaultNaN;
258
259      // Convert NaNs as the processor would:
260      //  - The sign is propagated.
261      //  - The payload (mantissa) is transferred as much as possible, except
262      //    that the top bit is forced to '1', making the result a quiet NaN.
263      float16 result =
264          (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
265      result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
266      result |= (1 << 9);  // Force a quiet NaN;
267      return result;
268    }
269
270    case FP_ZERO:
271      return (sign == 0) ? 0 : 0x8000;
272
273    case FP_INFINITE:
274      return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
275
276    case FP_NORMAL:
277    case FP_SUBNORMAL: {
278      // Convert float-to-half as the processor would, assuming that FPCR.FZ
279      // (flush-to-zero) is not set.
280
281      // Add the implicit '1' bit to the mantissa.
282      mantissa += (1 << 23);
283      return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
284    }
285  }
286
287  VIXL_UNREACHABLE();
288  return 0;
289}
290
291
292float16 Simulator::FPToFloat16(double value, FPRounding round_mode) {
293  // Only the FPTieEven rounding mode is implemented.
294  VIXL_ASSERT(round_mode == FPTieEven);
295  USE(round_mode);
296
297  uint64_t raw = DoubleToRawbits(value);
298  int32_t sign = raw >> 63;
299  int64_t exponent = ExtractUnsignedBitfield64(62, 52, raw) - 1023;
300  uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw);
301
302  switch (std::fpclassify(value)) {
303    case FP_NAN: {
304      if (IsSignallingNaN(value)) {
305        FPProcessException();
306      }
307      if (ReadDN()) return kFP16DefaultNaN;
308
309      // Convert NaNs as the processor would:
310      //  - The sign is propagated.
311      //  - The payload (mantissa) is transferred as much as possible, except
312      //    that the top bit is forced to '1', making the result a quiet NaN.
313      float16 result =
314          (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
315      result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
316      result |= (1 << 9);  // Force a quiet NaN;
317      return result;
318    }
319
320    case FP_ZERO:
321      return (sign == 0) ? 0 : 0x8000;
322
323    case FP_INFINITE:
324      return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
325
326    case FP_NORMAL:
327    case FP_SUBNORMAL: {
328      // Convert double-to-half as the processor would, assuming that FPCR.FZ
329      // (flush-to-zero) is not set.
330
331      // Add the implicit '1' bit to the mantissa.
332      mantissa += (UINT64_C(1) << 52);
333      return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
334    }
335  }
336
337  VIXL_UNREACHABLE();
338  return 0;
339}
340
341
342float Simulator::FPToFloat(double value, FPRounding round_mode) {
343  // Only the FPTieEven rounding mode is implemented.
344  VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
345  USE(round_mode);
346
347  switch (std::fpclassify(value)) {
348    case FP_NAN: {
349      if (IsSignallingNaN(value)) {
350        FPProcessException();
351      }
352      if (ReadDN()) return kFP32DefaultNaN;
353
354      // Convert NaNs as the processor would:
355      //  - The sign is propagated.
356      //  - The payload (mantissa) is transferred as much as possible, except
357      //    that the top bit is forced to '1', making the result a quiet NaN.
358      uint64_t raw = DoubleToRawbits(value);
359
360      uint32_t sign = raw >> 63;
361      uint32_t exponent = (1 << 8) - 1;
362      uint32_t payload =
363          static_cast<uint32_t>(ExtractUnsignedBitfield64(50, 52 - 23, raw));
364      payload |= (1 << 22);  // Force a quiet NaN.
365
366      return RawbitsToFloat((sign << 31) | (exponent << 23) | payload);
367    }
368
369    case FP_ZERO:
370    case FP_INFINITE: {
371      // In a C++ cast, any value representable in the target type will be
372      // unchanged. This is always the case for +/-0.0 and infinities.
373      return static_cast<float>(value);
374    }
375
376    case FP_NORMAL:
377    case FP_SUBNORMAL: {
378      // Convert double-to-float as the processor would, assuming that FPCR.FZ
379      // (flush-to-zero) is not set.
380      uint64_t raw = DoubleToRawbits(value);
381      // Extract the IEEE-754 double components.
382      uint32_t sign = raw >> 63;
383      // Extract the exponent and remove the IEEE-754 encoding bias.
384      int32_t exponent =
385          static_cast<int32_t>(ExtractUnsignedBitfield64(62, 52, raw)) - 1023;
386      // Extract the mantissa and add the implicit '1' bit.
387      uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw);
388      if (std::fpclassify(value) == FP_NORMAL) {
389        mantissa |= (UINT64_C(1) << 52);
390      }
391      return FPRoundToFloat(sign, exponent, mantissa, round_mode);
392    }
393  }
394
395  VIXL_UNREACHABLE();
396  return value;
397}
398
399
400void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
401  dst.ClearForWrite(vform);
402  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
403    dst.ReadUintFromMem(vform, i, addr);
404    addr += LaneSizeInBytesFromFormat(vform);
405  }
406}
407
408
409void Simulator::ld1(VectorFormat vform,
410                    LogicVRegister dst,
411                    int index,
412                    uint64_t addr) {
413  dst.ReadUintFromMem(vform, index, addr);
414}
415
416
417void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
418  dst.ClearForWrite(vform);
419  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
420    dst.ReadUintFromMem(vform, i, addr);
421  }
422}
423
424
425void Simulator::ld2(VectorFormat vform,
426                    LogicVRegister dst1,
427                    LogicVRegister dst2,
428                    uint64_t addr1) {
429  dst1.ClearForWrite(vform);
430  dst2.ClearForWrite(vform);
431  int esize = LaneSizeInBytesFromFormat(vform);
432  uint64_t addr2 = addr1 + esize;
433  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
434    dst1.ReadUintFromMem(vform, i, addr1);
435    dst2.ReadUintFromMem(vform, i, addr2);
436    addr1 += 2 * esize;
437    addr2 += 2 * esize;
438  }
439}
440
441
442void Simulator::ld2(VectorFormat vform,
443                    LogicVRegister dst1,
444                    LogicVRegister dst2,
445                    int index,
446                    uint64_t addr1) {
447  dst1.ClearForWrite(vform);
448  dst2.ClearForWrite(vform);
449  uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
450  dst1.ReadUintFromMem(vform, index, addr1);
451  dst2.ReadUintFromMem(vform, index, addr2);
452}
453
454
455void Simulator::ld2r(VectorFormat vform,
456                     LogicVRegister dst1,
457                     LogicVRegister dst2,
458                     uint64_t addr) {
459  dst1.ClearForWrite(vform);
460  dst2.ClearForWrite(vform);
461  uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
462  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
463    dst1.ReadUintFromMem(vform, i, addr);
464    dst2.ReadUintFromMem(vform, i, addr2);
465  }
466}
467
468
469void Simulator::ld3(VectorFormat vform,
470                    LogicVRegister dst1,
471                    LogicVRegister dst2,
472                    LogicVRegister dst3,
473                    uint64_t addr1) {
474  dst1.ClearForWrite(vform);
475  dst2.ClearForWrite(vform);
476  dst3.ClearForWrite(vform);
477  int esize = LaneSizeInBytesFromFormat(vform);
478  uint64_t addr2 = addr1 + esize;
479  uint64_t addr3 = addr2 + esize;
480  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
481    dst1.ReadUintFromMem(vform, i, addr1);
482    dst2.ReadUintFromMem(vform, i, addr2);
483    dst3.ReadUintFromMem(vform, i, addr3);
484    addr1 += 3 * esize;
485    addr2 += 3 * esize;
486    addr3 += 3 * esize;
487  }
488}
489
490
491void Simulator::ld3(VectorFormat vform,
492                    LogicVRegister dst1,
493                    LogicVRegister dst2,
494                    LogicVRegister dst3,
495                    int index,
496                    uint64_t addr1) {
497  dst1.ClearForWrite(vform);
498  dst2.ClearForWrite(vform);
499  dst3.ClearForWrite(vform);
500  uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
501  uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
502  dst1.ReadUintFromMem(vform, index, addr1);
503  dst2.ReadUintFromMem(vform, index, addr2);
504  dst3.ReadUintFromMem(vform, index, addr3);
505}
506
507
508void Simulator::ld3r(VectorFormat vform,
509                     LogicVRegister dst1,
510                     LogicVRegister dst2,
511                     LogicVRegister dst3,
512                     uint64_t addr) {
513  dst1.ClearForWrite(vform);
514  dst2.ClearForWrite(vform);
515  dst3.ClearForWrite(vform);
516  uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
517  uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
518  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
519    dst1.ReadUintFromMem(vform, i, addr);
520    dst2.ReadUintFromMem(vform, i, addr2);
521    dst3.ReadUintFromMem(vform, i, addr3);
522  }
523}
524
525
526void Simulator::ld4(VectorFormat vform,
527                    LogicVRegister dst1,
528                    LogicVRegister dst2,
529                    LogicVRegister dst3,
530                    LogicVRegister dst4,
531                    uint64_t addr1) {
532  dst1.ClearForWrite(vform);
533  dst2.ClearForWrite(vform);
534  dst3.ClearForWrite(vform);
535  dst4.ClearForWrite(vform);
536  int esize = LaneSizeInBytesFromFormat(vform);
537  uint64_t addr2 = addr1 + esize;
538  uint64_t addr3 = addr2 + esize;
539  uint64_t addr4 = addr3 + esize;
540  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
541    dst1.ReadUintFromMem(vform, i, addr1);
542    dst2.ReadUintFromMem(vform, i, addr2);
543    dst3.ReadUintFromMem(vform, i, addr3);
544    dst4.ReadUintFromMem(vform, i, addr4);
545    addr1 += 4 * esize;
546    addr2 += 4 * esize;
547    addr3 += 4 * esize;
548    addr4 += 4 * esize;
549  }
550}
551
552
553void Simulator::ld4(VectorFormat vform,
554                    LogicVRegister dst1,
555                    LogicVRegister dst2,
556                    LogicVRegister dst3,
557                    LogicVRegister dst4,
558                    int index,
559                    uint64_t addr1) {
560  dst1.ClearForWrite(vform);
561  dst2.ClearForWrite(vform);
562  dst3.ClearForWrite(vform);
563  dst4.ClearForWrite(vform);
564  uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
565  uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
566  uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
567  dst1.ReadUintFromMem(vform, index, addr1);
568  dst2.ReadUintFromMem(vform, index, addr2);
569  dst3.ReadUintFromMem(vform, index, addr3);
570  dst4.ReadUintFromMem(vform, index, addr4);
571}
572
573
574void Simulator::ld4r(VectorFormat vform,
575                     LogicVRegister dst1,
576                     LogicVRegister dst2,
577                     LogicVRegister dst3,
578                     LogicVRegister dst4,
579                     uint64_t addr) {
580  dst1.ClearForWrite(vform);
581  dst2.ClearForWrite(vform);
582  dst3.ClearForWrite(vform);
583  dst4.ClearForWrite(vform);
584  uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
585  uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
586  uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
587  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
588    dst1.ReadUintFromMem(vform, i, addr);
589    dst2.ReadUintFromMem(vform, i, addr2);
590    dst3.ReadUintFromMem(vform, i, addr3);
591    dst4.ReadUintFromMem(vform, i, addr4);
592  }
593}
594
595
596void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
597  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
598    src.WriteUintToMem(vform, i, addr);
599    addr += LaneSizeInBytesFromFormat(vform);
600  }
601}
602
603
604void Simulator::st1(VectorFormat vform,
605                    LogicVRegister src,
606                    int index,
607                    uint64_t addr) {
608  src.WriteUintToMem(vform, index, addr);
609}
610
611
612void Simulator::st2(VectorFormat vform,
613                    LogicVRegister dst,
614                    LogicVRegister dst2,
615                    uint64_t addr) {
616  int esize = LaneSizeInBytesFromFormat(vform);
617  uint64_t addr2 = addr + esize;
618  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
619    dst.WriteUintToMem(vform, i, addr);
620    dst2.WriteUintToMem(vform, i, addr2);
621    addr += 2 * esize;
622    addr2 += 2 * esize;
623  }
624}
625
626
627void Simulator::st2(VectorFormat vform,
628                    LogicVRegister dst,
629                    LogicVRegister dst2,
630                    int index,
631                    uint64_t addr) {
632  int esize = LaneSizeInBytesFromFormat(vform);
633  dst.WriteUintToMem(vform, index, addr);
634  dst2.WriteUintToMem(vform, index, addr + 1 * esize);
635}
636
637
638void Simulator::st3(VectorFormat vform,
639                    LogicVRegister dst,
640                    LogicVRegister dst2,
641                    LogicVRegister dst3,
642                    uint64_t addr) {
643  int esize = LaneSizeInBytesFromFormat(vform);
644  uint64_t addr2 = addr + esize;
645  uint64_t addr3 = addr2 + esize;
646  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
647    dst.WriteUintToMem(vform, i, addr);
648    dst2.WriteUintToMem(vform, i, addr2);
649    dst3.WriteUintToMem(vform, i, addr3);
650    addr += 3 * esize;
651    addr2 += 3 * esize;
652    addr3 += 3 * esize;
653  }
654}
655
656
657void Simulator::st3(VectorFormat vform,
658                    LogicVRegister dst,
659                    LogicVRegister dst2,
660                    LogicVRegister dst3,
661                    int index,
662                    uint64_t addr) {
663  int esize = LaneSizeInBytesFromFormat(vform);
664  dst.WriteUintToMem(vform, index, addr);
665  dst2.WriteUintToMem(vform, index, addr + 1 * esize);
666  dst3.WriteUintToMem(vform, index, addr + 2 * esize);
667}
668
669
670void Simulator::st4(VectorFormat vform,
671                    LogicVRegister dst,
672                    LogicVRegister dst2,
673                    LogicVRegister dst3,
674                    LogicVRegister dst4,
675                    uint64_t addr) {
676  int esize = LaneSizeInBytesFromFormat(vform);
677  uint64_t addr2 = addr + esize;
678  uint64_t addr3 = addr2 + esize;
679  uint64_t addr4 = addr3 + esize;
680  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
681    dst.WriteUintToMem(vform, i, addr);
682    dst2.WriteUintToMem(vform, i, addr2);
683    dst3.WriteUintToMem(vform, i, addr3);
684    dst4.WriteUintToMem(vform, i, addr4);
685    addr += 4 * esize;
686    addr2 += 4 * esize;
687    addr3 += 4 * esize;
688    addr4 += 4 * esize;
689  }
690}
691
692
693void Simulator::st4(VectorFormat vform,
694                    LogicVRegister dst,
695                    LogicVRegister dst2,
696                    LogicVRegister dst3,
697                    LogicVRegister dst4,
698                    int index,
699                    uint64_t addr) {
700  int esize = LaneSizeInBytesFromFormat(vform);
701  dst.WriteUintToMem(vform, index, addr);
702  dst2.WriteUintToMem(vform, index, addr + 1 * esize);
703  dst3.WriteUintToMem(vform, index, addr + 2 * esize);
704  dst4.WriteUintToMem(vform, index, addr + 3 * esize);
705}
706
707
708LogicVRegister Simulator::cmp(VectorFormat vform,
709                              LogicVRegister dst,
710                              const LogicVRegister& src1,
711                              const LogicVRegister& src2,
712                              Condition cond) {
713  dst.ClearForWrite(vform);
714  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
715    int64_t sa = src1.Int(vform, i);
716    int64_t sb = src2.Int(vform, i);
717    uint64_t ua = src1.Uint(vform, i);
718    uint64_t ub = src2.Uint(vform, i);
719    bool result = false;
720    switch (cond) {
721      case eq:
722        result = (ua == ub);
723        break;
724      case ge:
725        result = (sa >= sb);
726        break;
727      case gt:
728        result = (sa > sb);
729        break;
730      case hi:
731        result = (ua > ub);
732        break;
733      case hs:
734        result = (ua >= ub);
735        break;
736      case lt:
737        result = (sa < sb);
738        break;
739      case le:
740        result = (sa <= sb);
741        break;
742      default:
743        VIXL_UNREACHABLE();
744        break;
745    }
746    dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
747  }
748  return dst;
749}
750
751
752LogicVRegister Simulator::cmp(VectorFormat vform,
753                              LogicVRegister dst,
754                              const LogicVRegister& src1,
755                              int imm,
756                              Condition cond) {
757  SimVRegister temp;
758  LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
759  return cmp(vform, dst, src1, imm_reg, cond);
760}
761
762
763LogicVRegister Simulator::cmptst(VectorFormat vform,
764                                 LogicVRegister dst,
765                                 const LogicVRegister& src1,
766                                 const LogicVRegister& src2) {
767  dst.ClearForWrite(vform);
768  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
769    uint64_t ua = src1.Uint(vform, i);
770    uint64_t ub = src2.Uint(vform, i);
771    dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
772  }
773  return dst;
774}
775
776
777LogicVRegister Simulator::add(VectorFormat vform,
778                              LogicVRegister dst,
779                              const LogicVRegister& src1,
780                              const LogicVRegister& src2) {
781  dst.ClearForWrite(vform);
782  // TODO(all): consider assigning the result of LaneCountFromFormat to a local.
783  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
784    // Test for unsigned saturation.
785    uint64_t ua = src1.UintLeftJustified(vform, i);
786    uint64_t ub = src2.UintLeftJustified(vform, i);
787    uint64_t ur = ua + ub;
788    if (ur < ua) {
789      dst.SetUnsignedSat(i, true);
790    }
791
792    // Test for signed saturation.
793    int64_t sa = src1.IntLeftJustified(vform, i);
794    int64_t sb = src2.IntLeftJustified(vform, i);
795    int64_t sr = sa + sb;
796    // If the signs of the operands are the same, but different from the result,
797    // there was an overflow.
798    if (((sa >= 0) == (sb >= 0)) && ((sa >= 0) != (sr >= 0))) {
799      dst.SetSignedSat(i, sa >= 0);
800    }
801
802    dst.SetInt(vform, i, src1.Int(vform, i) + src2.Int(vform, i));
803  }
804  return dst;
805}
806
807
808LogicVRegister Simulator::addp(VectorFormat vform,
809                               LogicVRegister dst,
810                               const LogicVRegister& src1,
811                               const LogicVRegister& src2) {
812  SimVRegister temp1, temp2;
813  uzp1(vform, temp1, src1, src2);
814  uzp2(vform, temp2, src1, src2);
815  add(vform, dst, temp1, temp2);
816  return dst;
817}
818
819
820LogicVRegister Simulator::mla(VectorFormat vform,
821                              LogicVRegister dst,
822                              const LogicVRegister& src1,
823                              const LogicVRegister& src2) {
824  SimVRegister temp;
825  mul(vform, temp, src1, src2);
826  add(vform, dst, dst, temp);
827  return dst;
828}
829
830
831LogicVRegister Simulator::mls(VectorFormat vform,
832                              LogicVRegister dst,
833                              const LogicVRegister& src1,
834                              const LogicVRegister& src2) {
835  SimVRegister temp;
836  mul(vform, temp, src1, src2);
837  sub(vform, dst, dst, temp);
838  return dst;
839}
840
841
842LogicVRegister Simulator::mul(VectorFormat vform,
843                              LogicVRegister dst,
844                              const LogicVRegister& src1,
845                              const LogicVRegister& src2) {
846  dst.ClearForWrite(vform);
847  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
848    dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
849  }
850  return dst;
851}
852
853
854LogicVRegister Simulator::mul(VectorFormat vform,
855                              LogicVRegister dst,
856                              const LogicVRegister& src1,
857                              const LogicVRegister& src2,
858                              int index) {
859  SimVRegister temp;
860  VectorFormat indexform = VectorFormatFillQ(vform);
861  return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
862}
863
864
865LogicVRegister Simulator::mla(VectorFormat vform,
866                              LogicVRegister dst,
867                              const LogicVRegister& src1,
868                              const LogicVRegister& src2,
869                              int index) {
870  SimVRegister temp;
871  VectorFormat indexform = VectorFormatFillQ(vform);
872  return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));
873}
874
875
876LogicVRegister Simulator::mls(VectorFormat vform,
877                              LogicVRegister dst,
878                              const LogicVRegister& src1,
879                              const LogicVRegister& src2,
880                              int index) {
881  SimVRegister temp;
882  VectorFormat indexform = VectorFormatFillQ(vform);
883  return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));
884}
885
886
887LogicVRegister Simulator::smull(VectorFormat vform,
888                                LogicVRegister dst,
889                                const LogicVRegister& src1,
890                                const LogicVRegister& src2,
891                                int index) {
892  SimVRegister temp;
893  VectorFormat indexform =
894      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
895  return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));
896}
897
898
899LogicVRegister Simulator::smull2(VectorFormat vform,
900                                 LogicVRegister dst,
901                                 const LogicVRegister& src1,
902                                 const LogicVRegister& src2,
903                                 int index) {
904  SimVRegister temp;
905  VectorFormat indexform =
906      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
907  return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
908}
909
910
911LogicVRegister Simulator::umull(VectorFormat vform,
912                                LogicVRegister dst,
913                                const LogicVRegister& src1,
914                                const LogicVRegister& src2,
915                                int index) {
916  SimVRegister temp;
917  VectorFormat indexform =
918      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
919  return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));
920}
921
922
923LogicVRegister Simulator::umull2(VectorFormat vform,
924                                 LogicVRegister dst,
925                                 const LogicVRegister& src1,
926                                 const LogicVRegister& src2,
927                                 int index) {
928  SimVRegister temp;
929  VectorFormat indexform =
930      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
931  return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
932}
933
934
935LogicVRegister Simulator::smlal(VectorFormat vform,
936                                LogicVRegister dst,
937                                const LogicVRegister& src1,
938                                const LogicVRegister& src2,
939                                int index) {
940  SimVRegister temp;
941  VectorFormat indexform =
942      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
943  return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
944}
945
946
947LogicVRegister Simulator::smlal2(VectorFormat vform,
948                                 LogicVRegister dst,
949                                 const LogicVRegister& src1,
950                                 const LogicVRegister& src2,
951                                 int index) {
952  SimVRegister temp;
953  VectorFormat indexform =
954      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
955  return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
956}
957
958
959LogicVRegister Simulator::umlal(VectorFormat vform,
960                                LogicVRegister dst,
961                                const LogicVRegister& src1,
962                                const LogicVRegister& src2,
963                                int index) {
964  SimVRegister temp;
965  VectorFormat indexform =
966      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
967  return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
968}
969
970
971LogicVRegister Simulator::umlal2(VectorFormat vform,
972                                 LogicVRegister dst,
973                                 const LogicVRegister& src1,
974                                 const LogicVRegister& src2,
975                                 int index) {
976  SimVRegister temp;
977  VectorFormat indexform =
978      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
979  return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
980}
981
982
983LogicVRegister Simulator::smlsl(VectorFormat vform,
984                                LogicVRegister dst,
985                                const LogicVRegister& src1,
986                                const LogicVRegister& src2,
987                                int index) {
988  SimVRegister temp;
989  VectorFormat indexform =
990      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
991  return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
992}
993
994
995LogicVRegister Simulator::smlsl2(VectorFormat vform,
996                                 LogicVRegister dst,
997                                 const LogicVRegister& src1,
998                                 const LogicVRegister& src2,
999                                 int index) {
1000  SimVRegister temp;
1001  VectorFormat indexform =
1002      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1003  return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1004}
1005
1006
1007LogicVRegister Simulator::umlsl(VectorFormat vform,
1008                                LogicVRegister dst,
1009                                const LogicVRegister& src1,
1010                                const LogicVRegister& src2,
1011                                int index) {
1012  SimVRegister temp;
1013  VectorFormat indexform =
1014      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1015  return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
1016}
1017
1018
1019LogicVRegister Simulator::umlsl2(VectorFormat vform,
1020                                 LogicVRegister dst,
1021                                 const LogicVRegister& src1,
1022                                 const LogicVRegister& src2,
1023                                 int index) {
1024  SimVRegister temp;
1025  VectorFormat indexform =
1026      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1027  return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1028}
1029
1030
1031LogicVRegister Simulator::sqdmull(VectorFormat vform,
1032                                  LogicVRegister dst,
1033                                  const LogicVRegister& src1,
1034                                  const LogicVRegister& src2,
1035                                  int index) {
1036  SimVRegister temp;
1037  VectorFormat indexform =
1038      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1039  return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
1040}
1041
1042
1043LogicVRegister Simulator::sqdmull2(VectorFormat vform,
1044                                   LogicVRegister dst,
1045                                   const LogicVRegister& src1,
1046                                   const LogicVRegister& src2,
1047                                   int index) {
1048  SimVRegister temp;
1049  VectorFormat indexform =
1050      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1051  return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1052}
1053
1054
1055LogicVRegister Simulator::sqdmlal(VectorFormat vform,
1056                                  LogicVRegister dst,
1057                                  const LogicVRegister& src1,
1058                                  const LogicVRegister& src2,
1059                                  int index) {
1060  SimVRegister temp;
1061  VectorFormat indexform =
1062      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1063  return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
1064}
1065
1066
1067LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
1068                                   LogicVRegister dst,
1069                                   const LogicVRegister& src1,
1070                                   const LogicVRegister& src2,
1071                                   int index) {
1072  SimVRegister temp;
1073  VectorFormat indexform =
1074      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1075  return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1076}
1077
1078
1079LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
1080                                  LogicVRegister dst,
1081                                  const LogicVRegister& src1,
1082                                  const LogicVRegister& src2,
1083                                  int index) {
1084  SimVRegister temp;
1085  VectorFormat indexform =
1086      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1087  return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
1088}
1089
1090
1091LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
1092                                   LogicVRegister dst,
1093                                   const LogicVRegister& src1,
1094                                   const LogicVRegister& src2,
1095                                   int index) {
1096  SimVRegister temp;
1097  VectorFormat indexform =
1098      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1099  return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1100}
1101
1102
1103LogicVRegister Simulator::sqdmulh(VectorFormat vform,
1104                                  LogicVRegister dst,
1105                                  const LogicVRegister& src1,
1106                                  const LogicVRegister& src2,
1107                                  int index) {
1108  SimVRegister temp;
1109  VectorFormat indexform = VectorFormatFillQ(vform);
1110  return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
1111}
1112
1113
1114LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
1115                                   LogicVRegister dst,
1116                                   const LogicVRegister& src1,
1117                                   const LogicVRegister& src2,
1118                                   int index) {
1119  SimVRegister temp;
1120  VectorFormat indexform = VectorFormatFillQ(vform);
1121  return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
1122}
1123
1124
1125uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) const {
1126  uint16_t result = 0;
1127  uint16_t extended_op2 = op2;
1128  for (int i = 0; i < 8; ++i) {
1129    if ((op1 >> i) & 1) {
1130      result = result ^ (extended_op2 << i);
1131    }
1132  }
1133  return result;
1134}
1135
1136
1137LogicVRegister Simulator::pmul(VectorFormat vform,
1138                               LogicVRegister dst,
1139                               const LogicVRegister& src1,
1140                               const LogicVRegister& src2) {
1141  dst.ClearForWrite(vform);
1142  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1143    dst.SetUint(vform,
1144                i,
1145                PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));
1146  }
1147  return dst;
1148}
1149
1150
1151LogicVRegister Simulator::pmull(VectorFormat vform,
1152                                LogicVRegister dst,
1153                                const LogicVRegister& src1,
1154                                const LogicVRegister& src2) {
1155  VectorFormat vform_src = VectorFormatHalfWidth(vform);
1156  dst.ClearForWrite(vform);
1157  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1158    dst.SetUint(vform,
1159                i,
1160                PolynomialMult(src1.Uint(vform_src, i),
1161                               src2.Uint(vform_src, i)));
1162  }
1163  return dst;
1164}
1165
1166
1167LogicVRegister Simulator::pmull2(VectorFormat vform,
1168                                 LogicVRegister dst,
1169                                 const LogicVRegister& src1,
1170                                 const LogicVRegister& src2) {
1171  VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
1172  dst.ClearForWrite(vform);
1173  int lane_count = LaneCountFromFormat(vform);
1174  for (int i = 0; i < lane_count; i++) {
1175    dst.SetUint(vform,
1176                i,
1177                PolynomialMult(src1.Uint(vform_src, lane_count + i),
1178                               src2.Uint(vform_src, lane_count + i)));
1179  }
1180  return dst;
1181}
1182
1183
1184LogicVRegister Simulator::sub(VectorFormat vform,
1185                              LogicVRegister dst,
1186                              const LogicVRegister& src1,
1187                              const LogicVRegister& src2) {
1188  dst.ClearForWrite(vform);
1189  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1190    // Test for unsigned saturation.
1191    if (src2.Uint(vform, i) > src1.Uint(vform, i)) {
1192      dst.SetUnsignedSat(i, false);
1193    }
1194
1195    // Test for signed saturation.
1196    int64_t sa = src1.IntLeftJustified(vform, i);
1197    int64_t sb = src2.IntLeftJustified(vform, i);
1198    int64_t sr = sa - sb;
1199    // If the signs of the operands are different, and the sign of the first
1200    // operand doesn't match the result, there was an overflow.
1201    if (((sa >= 0) != (sb >= 0)) && ((sa >= 0) != (sr >= 0))) {
1202      dst.SetSignedSat(i, sr < 0);
1203    }
1204
1205    dst.SetInt(vform, i, src1.Int(vform, i) - src2.Int(vform, i));
1206  }
1207  return dst;
1208}
1209
1210
1211LogicVRegister Simulator::and_(VectorFormat vform,
1212                               LogicVRegister dst,
1213                               const LogicVRegister& src1,
1214                               const LogicVRegister& src2) {
1215  dst.ClearForWrite(vform);
1216  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1217    dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
1218  }
1219  return dst;
1220}
1221
1222
1223LogicVRegister Simulator::orr(VectorFormat vform,
1224                              LogicVRegister dst,
1225                              const LogicVRegister& src1,
1226                              const LogicVRegister& src2) {
1227  dst.ClearForWrite(vform);
1228  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1229    dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1230  }
1231  return dst;
1232}
1233
1234
1235LogicVRegister Simulator::orn(VectorFormat vform,
1236                              LogicVRegister dst,
1237                              const LogicVRegister& src1,
1238                              const LogicVRegister& src2) {
1239  dst.ClearForWrite(vform);
1240  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1241    dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1242  }
1243  return dst;
1244}
1245
1246
1247LogicVRegister Simulator::eor(VectorFormat vform,
1248                              LogicVRegister dst,
1249                              const LogicVRegister& src1,
1250                              const LogicVRegister& src2) {
1251  dst.ClearForWrite(vform);
1252  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1253    dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1254  }
1255  return dst;
1256}
1257
1258
1259LogicVRegister Simulator::bic(VectorFormat vform,
1260                              LogicVRegister dst,
1261                              const LogicVRegister& src1,
1262                              const LogicVRegister& src2) {
1263  dst.ClearForWrite(vform);
1264  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1265    dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1266  }
1267  return dst;
1268}
1269
1270
1271LogicVRegister Simulator::bic(VectorFormat vform,
1272                              LogicVRegister dst,
1273                              const LogicVRegister& src,
1274                              uint64_t imm) {
1275  uint64_t result[16];
1276  int laneCount = LaneCountFromFormat(vform);
1277  for (int i = 0; i < laneCount; ++i) {
1278    result[i] = src.Uint(vform, i) & ~imm;
1279  }
1280  dst.ClearForWrite(vform);
1281  for (int i = 0; i < laneCount; ++i) {
1282    dst.SetUint(vform, i, result[i]);
1283  }
1284  return dst;
1285}
1286
1287
1288LogicVRegister Simulator::bif(VectorFormat vform,
1289                              LogicVRegister dst,
1290                              const LogicVRegister& src1,
1291                              const LogicVRegister& src2) {
1292  dst.ClearForWrite(vform);
1293  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1294    uint64_t operand1 = dst.Uint(vform, i);
1295    uint64_t operand2 = ~src2.Uint(vform, i);
1296    uint64_t operand3 = src1.Uint(vform, i);
1297    uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1298    dst.SetUint(vform, i, result);
1299  }
1300  return dst;
1301}
1302
1303
1304LogicVRegister Simulator::bit(VectorFormat vform,
1305                              LogicVRegister dst,
1306                              const LogicVRegister& src1,
1307                              const LogicVRegister& src2) {
1308  dst.ClearForWrite(vform);
1309  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1310    uint64_t operand1 = dst.Uint(vform, i);
1311    uint64_t operand2 = src2.Uint(vform, i);
1312    uint64_t operand3 = src1.Uint(vform, i);
1313    uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1314    dst.SetUint(vform, i, result);
1315  }
1316  return dst;
1317}
1318
1319
1320LogicVRegister Simulator::bsl(VectorFormat vform,
1321                              LogicVRegister dst,
1322                              const LogicVRegister& src1,
1323                              const LogicVRegister& src2) {
1324  dst.ClearForWrite(vform);
1325  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1326    uint64_t operand1 = src2.Uint(vform, i);
1327    uint64_t operand2 = dst.Uint(vform, i);
1328    uint64_t operand3 = src1.Uint(vform, i);
1329    uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1330    dst.SetUint(vform, i, result);
1331  }
1332  return dst;
1333}
1334
1335
1336LogicVRegister Simulator::sminmax(VectorFormat vform,
1337                                  LogicVRegister dst,
1338                                  const LogicVRegister& src1,
1339                                  const LogicVRegister& src2,
1340                                  bool max) {
1341  dst.ClearForWrite(vform);
1342  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1343    int64_t src1_val = src1.Int(vform, i);
1344    int64_t src2_val = src2.Int(vform, i);
1345    int64_t dst_val;
1346    if (max) {
1347      dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1348    } else {
1349      dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1350    }
1351    dst.SetInt(vform, i, dst_val);
1352  }
1353  return dst;
1354}
1355
1356
1357LogicVRegister Simulator::smax(VectorFormat vform,
1358                               LogicVRegister dst,
1359                               const LogicVRegister& src1,
1360                               const LogicVRegister& src2) {
1361  return sminmax(vform, dst, src1, src2, true);
1362}
1363
1364
1365LogicVRegister Simulator::smin(VectorFormat vform,
1366                               LogicVRegister dst,
1367                               const LogicVRegister& src1,
1368                               const LogicVRegister& src2) {
1369  return sminmax(vform, dst, src1, src2, false);
1370}
1371
1372
1373LogicVRegister Simulator::sminmaxp(VectorFormat vform,
1374                                   LogicVRegister dst,
1375                                   const LogicVRegister& src1,
1376                                   const LogicVRegister& src2,
1377                                   bool max) {
1378  int lanes = LaneCountFromFormat(vform);
1379  int64_t result[kMaxLanesPerVector];
1380  const LogicVRegister* src = &src1;
1381  for (int j = 0; j < 2; j++) {
1382    for (int i = 0; i < lanes; i += 2) {
1383      int64_t first_val = src->Int(vform, i);
1384      int64_t second_val = src->Int(vform, i + 1);
1385      int64_t dst_val;
1386      if (max) {
1387        dst_val = (first_val > second_val) ? first_val : second_val;
1388      } else {
1389        dst_val = (first_val < second_val) ? first_val : second_val;
1390      }
1391      VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector);
1392      result[(i >> 1) + (j * lanes / 2)] = dst_val;
1393    }
1394    src = &src2;
1395  }
1396  dst.SetIntArray(vform, result);
1397  return dst;
1398}
1399
1400
1401LogicVRegister Simulator::smaxp(VectorFormat vform,
1402                                LogicVRegister dst,
1403                                const LogicVRegister& src1,
1404                                const LogicVRegister& src2) {
1405  return sminmaxp(vform, dst, src1, src2, true);
1406}
1407
1408
1409LogicVRegister Simulator::sminp(VectorFormat vform,
1410                                LogicVRegister dst,
1411                                const LogicVRegister& src1,
1412                                const LogicVRegister& src2) {
1413  return sminmaxp(vform, dst, src1, src2, false);
1414}
1415
1416
1417LogicVRegister Simulator::addp(VectorFormat vform,
1418                               LogicVRegister dst,
1419                               const LogicVRegister& src) {
1420  VIXL_ASSERT(vform == kFormatD);
1421
1422  int64_t dst_val = src.Int(kFormat2D, 0) + src.Int(kFormat2D, 1);
1423  dst.ClearForWrite(vform);
1424  dst.SetInt(vform, 0, dst_val);
1425  return dst;
1426}
1427
1428
1429LogicVRegister Simulator::addv(VectorFormat vform,
1430                               LogicVRegister dst,
1431                               const LogicVRegister& src) {
1432  VectorFormat vform_dst =
1433      ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1434
1435
1436  int64_t dst_val = 0;
1437  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1438    dst_val += src.Int(vform, i);
1439  }
1440
1441  dst.ClearForWrite(vform_dst);
1442  dst.SetInt(vform_dst, 0, dst_val);
1443  return dst;
1444}
1445
1446
1447LogicVRegister Simulator::saddlv(VectorFormat vform,
1448                                 LogicVRegister dst,
1449                                 const LogicVRegister& src) {
1450  VectorFormat vform_dst =
1451      ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1452
1453  int64_t dst_val = 0;
1454  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1455    dst_val += src.Int(vform, i);
1456  }
1457
1458  dst.ClearForWrite(vform_dst);
1459  dst.SetInt(vform_dst, 0, dst_val);
1460  return dst;
1461}
1462
1463
1464LogicVRegister Simulator::uaddlv(VectorFormat vform,
1465                                 LogicVRegister dst,
1466                                 const LogicVRegister& src) {
1467  VectorFormat vform_dst =
1468      ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1469
1470  uint64_t dst_val = 0;
1471  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1472    dst_val += src.Uint(vform, i);
1473  }
1474
1475  dst.ClearForWrite(vform_dst);
1476  dst.SetUint(vform_dst, 0, dst_val);
1477  return dst;
1478}
1479
1480
1481LogicVRegister Simulator::sminmaxv(VectorFormat vform,
1482                                   LogicVRegister dst,
1483                                   const LogicVRegister& src,
1484                                   bool max) {
1485  int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1486  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1487    int64_t src_val = src.Int(vform, i);
1488    if (max) {
1489      dst_val = (src_val > dst_val) ? src_val : dst_val;
1490    } else {
1491      dst_val = (src_val < dst_val) ? src_val : dst_val;
1492    }
1493  }
1494  dst.ClearForWrite(ScalarFormatFromFormat(vform));
1495  dst.SetInt(vform, 0, dst_val);
1496  return dst;
1497}
1498
1499
1500LogicVRegister Simulator::smaxv(VectorFormat vform,
1501                                LogicVRegister dst,
1502                                const LogicVRegister& src) {
1503  sminmaxv(vform, dst, src, true);
1504  return dst;
1505}
1506
1507
1508LogicVRegister Simulator::sminv(VectorFormat vform,
1509                                LogicVRegister dst,
1510                                const LogicVRegister& src) {
1511  sminmaxv(vform, dst, src, false);
1512  return dst;
1513}
1514
1515
1516LogicVRegister Simulator::uminmax(VectorFormat vform,
1517                                  LogicVRegister dst,
1518                                  const LogicVRegister& src1,
1519                                  const LogicVRegister& src2,
1520                                  bool max) {
1521  dst.ClearForWrite(vform);
1522  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1523    uint64_t src1_val = src1.Uint(vform, i);
1524    uint64_t src2_val = src2.Uint(vform, i);
1525    uint64_t dst_val;
1526    if (max) {
1527      dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1528    } else {
1529      dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1530    }
1531    dst.SetUint(vform, i, dst_val);
1532  }
1533  return dst;
1534}
1535
1536
1537LogicVRegister Simulator::umax(VectorFormat vform,
1538                               LogicVRegister dst,
1539                               const LogicVRegister& src1,
1540                               const LogicVRegister& src2) {
1541  return uminmax(vform, dst, src1, src2, true);
1542}
1543
1544
1545LogicVRegister Simulator::umin(VectorFormat vform,
1546                               LogicVRegister dst,
1547                               const LogicVRegister& src1,
1548                               const LogicVRegister& src2) {
1549  return uminmax(vform, dst, src1, src2, false);
1550}
1551
1552
1553LogicVRegister Simulator::uminmaxp(VectorFormat vform,
1554                                   LogicVRegister dst,
1555                                   const LogicVRegister& src1,
1556                                   const LogicVRegister& src2,
1557                                   bool max) {
1558  int lanes = LaneCountFromFormat(vform);
1559  uint64_t result[kMaxLanesPerVector];
1560  const LogicVRegister* src = &src1;
1561  for (int j = 0; j < 2; j++) {
1562    for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
1563      uint64_t first_val = src->Uint(vform, i);
1564      uint64_t second_val = src->Uint(vform, i + 1);
1565      uint64_t dst_val;
1566      if (max) {
1567        dst_val = (first_val > second_val) ? first_val : second_val;
1568      } else {
1569        dst_val = (first_val < second_val) ? first_val : second_val;
1570      }
1571      VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector);
1572      result[(i >> 1) + (j * lanes / 2)] = dst_val;
1573    }
1574    src = &src2;
1575  }
1576  dst.SetUintArray(vform, result);
1577  return dst;
1578}
1579
1580
1581LogicVRegister Simulator::umaxp(VectorFormat vform,
1582                                LogicVRegister dst,
1583                                const LogicVRegister& src1,
1584                                const LogicVRegister& src2) {
1585  return uminmaxp(vform, dst, src1, src2, true);
1586}
1587
1588
1589LogicVRegister Simulator::uminp(VectorFormat vform,
1590                                LogicVRegister dst,
1591                                const LogicVRegister& src1,
1592                                const LogicVRegister& src2) {
1593  return uminmaxp(vform, dst, src1, src2, false);
1594}
1595
1596
1597LogicVRegister Simulator::uminmaxv(VectorFormat vform,
1598                                   LogicVRegister dst,
1599                                   const LogicVRegister& src,
1600                                   bool max) {
1601  uint64_t dst_val = max ? 0 : UINT64_MAX;
1602  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1603    uint64_t src_val = src.Uint(vform, i);
1604    dst.SetUint(vform, i, i);
1605    if (max) {
1606      dst_val = (src_val > dst_val) ? src_val : dst_val;
1607    } else {
1608      dst_val = (src_val < dst_val) ? src_val : dst_val;
1609    }
1610  }
1611  dst.ClearForWrite(ScalarFormatFromFormat(vform));
1612  dst.SetUint(vform, 0, dst_val);
1613  return dst;
1614}
1615
1616
1617LogicVRegister Simulator::umaxv(VectorFormat vform,
1618                                LogicVRegister dst,
1619                                const LogicVRegister& src) {
1620  uminmaxv(vform, dst, src, true);
1621  return dst;
1622}
1623
1624
1625LogicVRegister Simulator::uminv(VectorFormat vform,
1626                                LogicVRegister dst,
1627                                const LogicVRegister& src) {
1628  uminmaxv(vform, dst, src, false);
1629  return dst;
1630}
1631
1632
1633LogicVRegister Simulator::shl(VectorFormat vform,
1634                              LogicVRegister dst,
1635                              const LogicVRegister& src,
1636                              int shift) {
1637  VIXL_ASSERT(shift >= 0);
1638  SimVRegister temp;
1639  LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1640  return ushl(vform, dst, src, shiftreg);
1641}
1642
1643
1644LogicVRegister Simulator::sshll(VectorFormat vform,
1645                                LogicVRegister dst,
1646                                const LogicVRegister& src,
1647                                int shift) {
1648  VIXL_ASSERT(shift >= 0);
1649  SimVRegister temp1, temp2;
1650  LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1651  LogicVRegister extendedreg = sxtl(vform, temp2, src);
1652  return sshl(vform, dst, extendedreg, shiftreg);
1653}
1654
1655
1656LogicVRegister Simulator::sshll2(VectorFormat vform,
1657                                 LogicVRegister dst,
1658                                 const LogicVRegister& src,
1659                                 int shift) {
1660  VIXL_ASSERT(shift >= 0);
1661  SimVRegister temp1, temp2;
1662  LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1663  LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1664  return sshl(vform, dst, extendedreg, shiftreg);
1665}
1666
1667
1668LogicVRegister Simulator::shll(VectorFormat vform,
1669                               LogicVRegister dst,
1670                               const LogicVRegister& src) {
1671  int shift = LaneSizeInBitsFromFormat(vform) / 2;
1672  return sshll(vform, dst, src, shift);
1673}
1674
1675
1676LogicVRegister Simulator::shll2(VectorFormat vform,
1677                                LogicVRegister dst,
1678                                const LogicVRegister& src) {
1679  int shift = LaneSizeInBitsFromFormat(vform) / 2;
1680  return sshll2(vform, dst, src, shift);
1681}
1682
1683
1684LogicVRegister Simulator::ushll(VectorFormat vform,
1685                                LogicVRegister dst,
1686                                const LogicVRegister& src,
1687                                int shift) {
1688  VIXL_ASSERT(shift >= 0);
1689  SimVRegister temp1, temp2;
1690  LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1691  LogicVRegister extendedreg = uxtl(vform, temp2, src);
1692  return ushl(vform, dst, extendedreg, shiftreg);
1693}
1694
1695
1696LogicVRegister Simulator::ushll2(VectorFormat vform,
1697                                 LogicVRegister dst,
1698                                 const LogicVRegister& src,
1699                                 int shift) {
1700  VIXL_ASSERT(shift >= 0);
1701  SimVRegister temp1, temp2;
1702  LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1703  LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1704  return ushl(vform, dst, extendedreg, shiftreg);
1705}
1706
1707
1708LogicVRegister Simulator::sli(VectorFormat vform,
1709                              LogicVRegister dst,
1710                              const LogicVRegister& src,
1711                              int shift) {
1712  dst.ClearForWrite(vform);
1713  int laneCount = LaneCountFromFormat(vform);
1714  for (int i = 0; i < laneCount; i++) {
1715    uint64_t src_lane = src.Uint(vform, i);
1716    uint64_t dst_lane = dst.Uint(vform, i);
1717    uint64_t shifted = src_lane << shift;
1718    uint64_t mask = MaxUintFromFormat(vform) << shift;
1719    dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1720  }
1721  return dst;
1722}
1723
1724
1725LogicVRegister Simulator::sqshl(VectorFormat vform,
1726                                LogicVRegister dst,
1727                                const LogicVRegister& src,
1728                                int shift) {
1729  VIXL_ASSERT(shift >= 0);
1730  SimVRegister temp;
1731  LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1732  return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1733}
1734
1735
1736LogicVRegister Simulator::uqshl(VectorFormat vform,
1737                                LogicVRegister dst,
1738                                const LogicVRegister& src,
1739                                int shift) {
1740  VIXL_ASSERT(shift >= 0);
1741  SimVRegister temp;
1742  LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1743  return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1744}
1745
1746
1747LogicVRegister Simulator::sqshlu(VectorFormat vform,
1748                                 LogicVRegister dst,
1749                                 const LogicVRegister& src,
1750                                 int shift) {
1751  VIXL_ASSERT(shift >= 0);
1752  SimVRegister temp;
1753  LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1754  return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1755}
1756
1757
1758LogicVRegister Simulator::sri(VectorFormat vform,
1759                              LogicVRegister dst,
1760                              const LogicVRegister& src,
1761                              int shift) {
1762  dst.ClearForWrite(vform);
1763  int laneCount = LaneCountFromFormat(vform);
1764  VIXL_ASSERT((shift > 0) &&
1765              (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1766  for (int i = 0; i < laneCount; i++) {
1767    uint64_t src_lane = src.Uint(vform, i);
1768    uint64_t dst_lane = dst.Uint(vform, i);
1769    uint64_t shifted;
1770    uint64_t mask;
1771    if (shift == 64) {
1772      shifted = 0;
1773      mask = 0;
1774    } else {
1775      shifted = src_lane >> shift;
1776      mask = MaxUintFromFormat(vform) >> shift;
1777    }
1778    dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1779  }
1780  return dst;
1781}
1782
1783
1784LogicVRegister Simulator::ushr(VectorFormat vform,
1785                               LogicVRegister dst,
1786                               const LogicVRegister& src,
1787                               int shift) {
1788  VIXL_ASSERT(shift >= 0);
1789  SimVRegister temp;
1790  LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1791  return ushl(vform, dst, src, shiftreg);
1792}
1793
1794
1795LogicVRegister Simulator::sshr(VectorFormat vform,
1796                               LogicVRegister dst,
1797                               const LogicVRegister& src,
1798                               int shift) {
1799  VIXL_ASSERT(shift >= 0);
1800  SimVRegister temp;
1801  LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1802  return sshl(vform, dst, src, shiftreg);
1803}
1804
1805
1806LogicVRegister Simulator::ssra(VectorFormat vform,
1807                               LogicVRegister dst,
1808                               const LogicVRegister& src,
1809                               int shift) {
1810  SimVRegister temp;
1811  LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1812  return add(vform, dst, dst, shifted_reg);
1813}
1814
1815
1816LogicVRegister Simulator::usra(VectorFormat vform,
1817                               LogicVRegister dst,
1818                               const LogicVRegister& src,
1819                               int shift) {
1820  SimVRegister temp;
1821  LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1822  return add(vform, dst, dst, shifted_reg);
1823}
1824
1825
1826LogicVRegister Simulator::srsra(VectorFormat vform,
1827                                LogicVRegister dst,
1828                                const LogicVRegister& src,
1829                                int shift) {
1830  SimVRegister temp;
1831  LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1832  return add(vform, dst, dst, shifted_reg);
1833}
1834
1835
1836LogicVRegister Simulator::ursra(VectorFormat vform,
1837                                LogicVRegister dst,
1838                                const LogicVRegister& src,
1839                                int shift) {
1840  SimVRegister temp;
1841  LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1842  return add(vform, dst, dst, shifted_reg);
1843}
1844
1845
1846LogicVRegister Simulator::cls(VectorFormat vform,
1847                              LogicVRegister dst,
1848                              const LogicVRegister& src) {
1849  uint64_t result[16];
1850  int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1851  int laneCount = LaneCountFromFormat(vform);
1852  for (int i = 0; i < laneCount; i++) {
1853    result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits);
1854  }
1855
1856  dst.ClearForWrite(vform);
1857  for (int i = 0; i < laneCount; ++i) {
1858    dst.SetUint(vform, i, result[i]);
1859  }
1860  return dst;
1861}
1862
1863
1864LogicVRegister Simulator::clz(VectorFormat vform,
1865                              LogicVRegister dst,
1866                              const LogicVRegister& src) {
1867  uint64_t result[16];
1868  int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1869  int laneCount = LaneCountFromFormat(vform);
1870  for (int i = 0; i < laneCount; i++) {
1871    result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits);
1872  }
1873
1874  dst.ClearForWrite(vform);
1875  for (int i = 0; i < laneCount; ++i) {
1876    dst.SetUint(vform, i, result[i]);
1877  }
1878  return dst;
1879}
1880
1881
1882LogicVRegister Simulator::cnt(VectorFormat vform,
1883                              LogicVRegister dst,
1884                              const LogicVRegister& src) {
1885  uint64_t result[16];
1886  int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1887  int laneCount = LaneCountFromFormat(vform);
1888  for (int i = 0; i < laneCount; i++) {
1889    uint64_t value = src.Uint(vform, i);
1890    result[i] = 0;
1891    for (int j = 0; j < laneSizeInBits; j++) {
1892      result[i] += (value & 1);
1893      value >>= 1;
1894    }
1895  }
1896
1897  dst.ClearForWrite(vform);
1898  for (int i = 0; i < laneCount; ++i) {
1899    dst.SetUint(vform, i, result[i]);
1900  }
1901  return dst;
1902}
1903
1904
1905LogicVRegister Simulator::sshl(VectorFormat vform,
1906                               LogicVRegister dst,
1907                               const LogicVRegister& src1,
1908                               const LogicVRegister& src2) {
1909  dst.ClearForWrite(vform);
1910  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1911    int8_t shift_val = src2.Int(vform, i);
1912    int64_t lj_src_val = src1.IntLeftJustified(vform, i);
1913
1914    // Set signed saturation state.
1915    if ((shift_val > CountLeadingSignBits(lj_src_val)) && (lj_src_val != 0)) {
1916      dst.SetSignedSat(i, lj_src_val >= 0);
1917    }
1918
1919    // Set unsigned saturation state.
1920    if (lj_src_val < 0) {
1921      dst.SetUnsignedSat(i, false);
1922    } else if ((shift_val > CountLeadingZeros(lj_src_val)) &&
1923               (lj_src_val != 0)) {
1924      dst.SetUnsignedSat(i, true);
1925    }
1926
1927    int64_t src_val = src1.Int(vform, i);
1928    if (shift_val > 63) {
1929      dst.SetInt(vform, i, 0);
1930    } else if (shift_val < -63) {
1931      dst.SetRounding(i, src_val < 0);
1932      dst.SetInt(vform, i, (src_val < 0) ? -1 : 0);
1933    } else {
1934      if (shift_val < 0) {
1935        // Set rounding state. Rounding only needed on right shifts.
1936        if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1937          dst.SetRounding(i, true);
1938        }
1939        src_val >>= -shift_val;
1940      } else {
1941        src_val <<= shift_val;
1942      }
1943      dst.SetInt(vform, i, src_val);
1944    }
1945  }
1946  return dst;
1947}
1948
1949
1950LogicVRegister Simulator::ushl(VectorFormat vform,
1951                               LogicVRegister dst,
1952                               const LogicVRegister& src1,
1953                               const LogicVRegister& src2) {
1954  dst.ClearForWrite(vform);
1955  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1956    int8_t shift_val = src2.Int(vform, i);
1957    uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
1958
1959    // Set saturation state.
1960    if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) {
1961      dst.SetUnsignedSat(i, true);
1962    }
1963
1964    uint64_t src_val = src1.Uint(vform, i);
1965    if ((shift_val > 63) || (shift_val < -64)) {
1966      dst.SetUint(vform, i, 0);
1967    } else {
1968      if (shift_val < 0) {
1969        // Set rounding state. Rounding only needed on right shifts.
1970        if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1971          dst.SetRounding(i, true);
1972        }
1973
1974        if (shift_val == -64) {
1975          src_val = 0;
1976        } else {
1977          src_val >>= -shift_val;
1978        }
1979      } else {
1980        src_val <<= shift_val;
1981      }
1982      dst.SetUint(vform, i, src_val);
1983    }
1984  }
1985  return dst;
1986}
1987
1988
1989LogicVRegister Simulator::neg(VectorFormat vform,
1990                              LogicVRegister dst,
1991                              const LogicVRegister& src) {
1992  dst.ClearForWrite(vform);
1993  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1994    // Test for signed saturation.
1995    int64_t sa = src.Int(vform, i);
1996    if (sa == MinIntFromFormat(vform)) {
1997      dst.SetSignedSat(i, true);
1998    }
1999    dst.SetInt(vform, i, -sa);
2000  }
2001  return dst;
2002}
2003
2004
2005LogicVRegister Simulator::suqadd(VectorFormat vform,
2006                                 LogicVRegister dst,
2007                                 const LogicVRegister& src) {
2008  dst.ClearForWrite(vform);
2009  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2010    int64_t sa = dst.IntLeftJustified(vform, i);
2011    uint64_t ub = src.UintLeftJustified(vform, i);
2012    int64_t sr = sa + ub;
2013
2014    if (sr < sa) {  // Test for signed positive saturation.
2015      dst.SetInt(vform, i, MaxIntFromFormat(vform));
2016    } else {
2017      dst.SetInt(vform, i, dst.Int(vform, i) + src.Int(vform, i));
2018    }
2019  }
2020  return dst;
2021}
2022
2023
2024LogicVRegister Simulator::usqadd(VectorFormat vform,
2025                                 LogicVRegister dst,
2026                                 const LogicVRegister& src) {
2027  dst.ClearForWrite(vform);
2028  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2029    uint64_t ua = dst.UintLeftJustified(vform, i);
2030    int64_t sb = src.IntLeftJustified(vform, i);
2031    uint64_t ur = ua + sb;
2032
2033    if ((sb > 0) && (ur <= ua)) {
2034      dst.SetUint(vform, i, MaxUintFromFormat(vform));  // Positive saturation.
2035    } else if ((sb < 0) && (ur >= ua)) {
2036      dst.SetUint(vform, i, 0);  // Negative saturation.
2037    } else {
2038      dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));
2039    }
2040  }
2041  return dst;
2042}
2043
2044
2045LogicVRegister Simulator::abs(VectorFormat vform,
2046                              LogicVRegister dst,
2047                              const LogicVRegister& src) {
2048  dst.ClearForWrite(vform);
2049  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2050    // Test for signed saturation.
2051    int64_t sa = src.Int(vform, i);
2052    if (sa == MinIntFromFormat(vform)) {
2053      dst.SetSignedSat(i, true);
2054    }
2055    if (sa < 0) {
2056      dst.SetInt(vform, i, -sa);
2057    } else {
2058      dst.SetInt(vform, i, sa);
2059    }
2060  }
2061  return dst;
2062}
2063
2064
2065LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
2066                                        LogicVRegister dst,
2067                                        bool dstIsSigned,
2068                                        const LogicVRegister& src,
2069                                        bool srcIsSigned) {
2070  bool upperhalf = false;
2071  VectorFormat srcform = kFormatUndefined;
2072  int64_t ssrc[8];
2073  uint64_t usrc[8];
2074
2075  switch (dstform) {
2076    case kFormat8B:
2077      upperhalf = false;
2078      srcform = kFormat8H;
2079      break;
2080    case kFormat16B:
2081      upperhalf = true;
2082      srcform = kFormat8H;
2083      break;
2084    case kFormat4H:
2085      upperhalf = false;
2086      srcform = kFormat4S;
2087      break;
2088    case kFormat8H:
2089      upperhalf = true;
2090      srcform = kFormat4S;
2091      break;
2092    case kFormat2S:
2093      upperhalf = false;
2094      srcform = kFormat2D;
2095      break;
2096    case kFormat4S:
2097      upperhalf = true;
2098      srcform = kFormat2D;
2099      break;
2100    case kFormatB:
2101      upperhalf = false;
2102      srcform = kFormatH;
2103      break;
2104    case kFormatH:
2105      upperhalf = false;
2106      srcform = kFormatS;
2107      break;
2108    case kFormatS:
2109      upperhalf = false;
2110      srcform = kFormatD;
2111      break;
2112    default:
2113      VIXL_UNIMPLEMENTED();
2114  }
2115
2116  for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2117    ssrc[i] = src.Int(srcform, i);
2118    usrc[i] = src.Uint(srcform, i);
2119  }
2120
2121  int offset;
2122  if (upperhalf) {
2123    offset = LaneCountFromFormat(dstform) / 2;
2124  } else {
2125    offset = 0;
2126    dst.ClearForWrite(dstform);
2127  }
2128
2129  for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2130    // Test for signed saturation
2131    if (ssrc[i] > MaxIntFromFormat(dstform)) {
2132      dst.SetSignedSat(offset + i, true);
2133    } else if (ssrc[i] < MinIntFromFormat(dstform)) {
2134      dst.SetSignedSat(offset + i, false);
2135    }
2136
2137    // Test for unsigned saturation
2138    if (srcIsSigned) {
2139      if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
2140        dst.SetUnsignedSat(offset + i, true);
2141      } else if (ssrc[i] < 0) {
2142        dst.SetUnsignedSat(offset + i, false);
2143      }
2144    } else {
2145      if (usrc[i] > MaxUintFromFormat(dstform)) {
2146        dst.SetUnsignedSat(offset + i, true);
2147      }
2148    }
2149
2150    int64_t result;
2151    if (srcIsSigned) {
2152      result = ssrc[i] & MaxUintFromFormat(dstform);
2153    } else {
2154      result = usrc[i] & MaxUintFromFormat(dstform);
2155    }
2156
2157    if (dstIsSigned) {
2158      dst.SetInt(dstform, offset + i, result);
2159    } else {
2160      dst.SetUint(dstform, offset + i, result);
2161    }
2162  }
2163  return dst;
2164}
2165
2166
2167LogicVRegister Simulator::xtn(VectorFormat vform,
2168                              LogicVRegister dst,
2169                              const LogicVRegister& src) {
2170  return extractnarrow(vform, dst, true, src, true);
2171}
2172
2173
2174LogicVRegister Simulator::sqxtn(VectorFormat vform,
2175                                LogicVRegister dst,
2176                                const LogicVRegister& src) {
2177  return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform);
2178}
2179
2180
2181LogicVRegister Simulator::sqxtun(VectorFormat vform,
2182                                 LogicVRegister dst,
2183                                 const LogicVRegister& src) {
2184  return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
2185}
2186
2187
2188LogicVRegister Simulator::uqxtn(VectorFormat vform,
2189                                LogicVRegister dst,
2190                                const LogicVRegister& src) {
2191  return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
2192}
2193
2194
2195LogicVRegister Simulator::absdiff(VectorFormat vform,
2196                                  LogicVRegister dst,
2197                                  const LogicVRegister& src1,
2198                                  const LogicVRegister& src2,
2199                                  bool issigned) {
2200  dst.ClearForWrite(vform);
2201  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2202    if (issigned) {
2203      int64_t sr = src1.Int(vform, i) - src2.Int(vform, i);
2204      sr = sr > 0 ? sr : -sr;
2205      dst.SetInt(vform, i, sr);
2206    } else {
2207      int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i);
2208      sr = sr > 0 ? sr : -sr;
2209      dst.SetUint(vform, i, sr);
2210    }
2211  }
2212  return dst;
2213}
2214
2215
2216LogicVRegister Simulator::saba(VectorFormat vform,
2217                               LogicVRegister dst,
2218                               const LogicVRegister& src1,
2219                               const LogicVRegister& src2) {
2220  SimVRegister temp;
2221  dst.ClearForWrite(vform);
2222  absdiff(vform, temp, src1, src2, true);
2223  add(vform, dst, dst, temp);
2224  return dst;
2225}
2226
2227
2228LogicVRegister Simulator::uaba(VectorFormat vform,
2229                               LogicVRegister dst,
2230                               const LogicVRegister& src1,
2231                               const LogicVRegister& src2) {
2232  SimVRegister temp;
2233  dst.ClearForWrite(vform);
2234  absdiff(vform, temp, src1, src2, false);
2235  add(vform, dst, dst, temp);
2236  return dst;
2237}
2238
2239
2240LogicVRegister Simulator::not_(VectorFormat vform,
2241                               LogicVRegister dst,
2242                               const LogicVRegister& src) {
2243  dst.ClearForWrite(vform);
2244  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2245    dst.SetUint(vform, i, ~src.Uint(vform, i));
2246  }
2247  return dst;
2248}
2249
2250
2251LogicVRegister Simulator::rbit(VectorFormat vform,
2252                               LogicVRegister dst,
2253                               const LogicVRegister& src) {
2254  uint64_t result[16];
2255  int laneCount = LaneCountFromFormat(vform);
2256  int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
2257  uint64_t reversed_value;
2258  uint64_t value;
2259  for (int i = 0; i < laneCount; i++) {
2260    value = src.Uint(vform, i);
2261    reversed_value = 0;
2262    for (int j = 0; j < laneSizeInBits; j++) {
2263      reversed_value = (reversed_value << 1) | (value & 1);
2264      value >>= 1;
2265    }
2266    result[i] = reversed_value;
2267  }
2268
2269  dst.ClearForWrite(vform);
2270  for (int i = 0; i < laneCount; ++i) {
2271    dst.SetUint(vform, i, result[i]);
2272  }
2273  return dst;
2274}
2275
2276
2277LogicVRegister Simulator::rev(VectorFormat vform,
2278                              LogicVRegister dst,
2279                              const LogicVRegister& src,
2280                              int revSize) {
2281  uint64_t result[16];
2282  int laneCount = LaneCountFromFormat(vform);
2283  int laneSize = LaneSizeInBytesFromFormat(vform);
2284  int lanesPerLoop = revSize / laneSize;
2285  for (int i = 0; i < laneCount; i += lanesPerLoop) {
2286    for (int j = 0; j < lanesPerLoop; j++) {
2287      result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j);
2288    }
2289  }
2290  dst.ClearForWrite(vform);
2291  for (int i = 0; i < laneCount; ++i) {
2292    dst.SetUint(vform, i, result[i]);
2293  }
2294  return dst;
2295}
2296
2297
2298LogicVRegister Simulator::rev16(VectorFormat vform,
2299                                LogicVRegister dst,
2300                                const LogicVRegister& src) {
2301  return rev(vform, dst, src, 2);
2302}
2303
2304
2305LogicVRegister Simulator::rev32(VectorFormat vform,
2306                                LogicVRegister dst,
2307                                const LogicVRegister& src) {
2308  return rev(vform, dst, src, 4);
2309}
2310
2311
2312LogicVRegister Simulator::rev64(VectorFormat vform,
2313                                LogicVRegister dst,
2314                                const LogicVRegister& src) {
2315  return rev(vform, dst, src, 8);
2316}
2317
2318
2319LogicVRegister Simulator::addlp(VectorFormat vform,
2320                                LogicVRegister dst,
2321                                const LogicVRegister& src,
2322                                bool is_signed,
2323                                bool do_accumulate) {
2324  VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
2325
2326  int64_t sr[16];
2327  uint64_t ur[16];
2328
2329  int laneCount = LaneCountFromFormat(vform);
2330  for (int i = 0; i < laneCount; ++i) {
2331    if (is_signed) {
2332      sr[i] = src.Int(vformsrc, 2 * i) + src.Int(vformsrc, 2 * i + 1);
2333    } else {
2334      ur[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
2335    }
2336  }
2337
2338  dst.ClearForWrite(vform);
2339  for (int i = 0; i < laneCount; ++i) {
2340    if (do_accumulate) {
2341      if (is_signed) {
2342        dst.SetInt(vform, i, dst.Int(vform, i) + sr[i]);
2343      } else {
2344        dst.SetUint(vform, i, dst.Uint(vform, i) + ur[i]);
2345      }
2346    } else {
2347      if (is_signed) {
2348        dst.SetInt(vform, i, sr[i]);
2349      } else {
2350        dst.SetUint(vform, i, ur[i]);
2351      }
2352    }
2353  }
2354
2355  return dst;
2356}
2357
2358
2359LogicVRegister Simulator::saddlp(VectorFormat vform,
2360                                 LogicVRegister dst,
2361                                 const LogicVRegister& src) {
2362  return addlp(vform, dst, src, true, false);
2363}
2364
2365
2366LogicVRegister Simulator::uaddlp(VectorFormat vform,
2367                                 LogicVRegister dst,
2368                                 const LogicVRegister& src) {
2369  return addlp(vform, dst, src, false, false);
2370}
2371
2372
2373LogicVRegister Simulator::sadalp(VectorFormat vform,
2374                                 LogicVRegister dst,
2375                                 const LogicVRegister& src) {
2376  return addlp(vform, dst, src, true, true);
2377}
2378
2379
2380LogicVRegister Simulator::uadalp(VectorFormat vform,
2381                                 LogicVRegister dst,
2382                                 const LogicVRegister& src) {
2383  return addlp(vform, dst, src, false, true);
2384}
2385
2386
2387LogicVRegister Simulator::ext(VectorFormat vform,
2388                              LogicVRegister dst,
2389                              const LogicVRegister& src1,
2390                              const LogicVRegister& src2,
2391                              int index) {
2392  uint8_t result[16];
2393  int laneCount = LaneCountFromFormat(vform);
2394  for (int i = 0; i < laneCount - index; ++i) {
2395    result[i] = src1.Uint(vform, i + index);
2396  }
2397  for (int i = 0; i < index; ++i) {
2398    result[laneCount - index + i] = src2.Uint(vform, i);
2399  }
2400  dst.ClearForWrite(vform);
2401  for (int i = 0; i < laneCount; ++i) {
2402    dst.SetUint(vform, i, result[i]);
2403  }
2404  return dst;
2405}
2406
2407
2408LogicVRegister Simulator::dup_element(VectorFormat vform,
2409                                      LogicVRegister dst,
2410                                      const LogicVRegister& src,
2411                                      int src_index) {
2412  int laneCount = LaneCountFromFormat(vform);
2413  uint64_t value = src.Uint(vform, src_index);
2414  dst.ClearForWrite(vform);
2415  for (int i = 0; i < laneCount; ++i) {
2416    dst.SetUint(vform, i, value);
2417  }
2418  return dst;
2419}
2420
2421
2422LogicVRegister Simulator::dup_immediate(VectorFormat vform,
2423                                        LogicVRegister dst,
2424                                        uint64_t imm) {
2425  int laneCount = LaneCountFromFormat(vform);
2426  uint64_t value = imm & MaxUintFromFormat(vform);
2427  dst.ClearForWrite(vform);
2428  for (int i = 0; i < laneCount; ++i) {
2429    dst.SetUint(vform, i, value);
2430  }
2431  return dst;
2432}
2433
2434
2435LogicVRegister Simulator::ins_element(VectorFormat vform,
2436                                      LogicVRegister dst,
2437                                      int dst_index,
2438                                      const LogicVRegister& src,
2439                                      int src_index) {
2440  dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2441  return dst;
2442}
2443
2444
2445LogicVRegister Simulator::ins_immediate(VectorFormat vform,
2446                                        LogicVRegister dst,
2447                                        int dst_index,
2448                                        uint64_t imm) {
2449  uint64_t value = imm & MaxUintFromFormat(vform);
2450  dst.SetUint(vform, dst_index, value);
2451  return dst;
2452}
2453
2454
2455LogicVRegister Simulator::movi(VectorFormat vform,
2456                               LogicVRegister dst,
2457                               uint64_t imm) {
2458  int laneCount = LaneCountFromFormat(vform);
2459  dst.ClearForWrite(vform);
2460  for (int i = 0; i < laneCount; ++i) {
2461    dst.SetUint(vform, i, imm);
2462  }
2463  return dst;
2464}
2465
2466
2467LogicVRegister Simulator::mvni(VectorFormat vform,
2468                               LogicVRegister dst,
2469                               uint64_t imm) {
2470  int laneCount = LaneCountFromFormat(vform);
2471  dst.ClearForWrite(vform);
2472  for (int i = 0; i < laneCount; ++i) {
2473    dst.SetUint(vform, i, ~imm);
2474  }
2475  return dst;
2476}
2477
2478
2479LogicVRegister Simulator::orr(VectorFormat vform,
2480                              LogicVRegister dst,
2481                              const LogicVRegister& src,
2482                              uint64_t imm) {
2483  uint64_t result[16];
2484  int laneCount = LaneCountFromFormat(vform);
2485  for (int i = 0; i < laneCount; ++i) {
2486    result[i] = src.Uint(vform, i) | imm;
2487  }
2488  dst.ClearForWrite(vform);
2489  for (int i = 0; i < laneCount; ++i) {
2490    dst.SetUint(vform, i, result[i]);
2491  }
2492  return dst;
2493}
2494
2495
2496LogicVRegister Simulator::uxtl(VectorFormat vform,
2497                               LogicVRegister dst,
2498                               const LogicVRegister& src) {
2499  VectorFormat vform_half = VectorFormatHalfWidth(vform);
2500
2501  dst.ClearForWrite(vform);
2502  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2503    dst.SetUint(vform, i, src.Uint(vform_half, i));
2504  }
2505  return dst;
2506}
2507
2508
2509LogicVRegister Simulator::sxtl(VectorFormat vform,
2510                               LogicVRegister dst,
2511                               const LogicVRegister& src) {
2512  VectorFormat vform_half = VectorFormatHalfWidth(vform);
2513
2514  dst.ClearForWrite(vform);
2515  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2516    dst.SetInt(vform, i, src.Int(vform_half, i));
2517  }
2518  return dst;
2519}
2520
2521
2522LogicVRegister Simulator::uxtl2(VectorFormat vform,
2523                                LogicVRegister dst,
2524                                const LogicVRegister& src) {
2525  VectorFormat vform_half = VectorFormatHalfWidth(vform);
2526  int lane_count = LaneCountFromFormat(vform);
2527
2528  dst.ClearForWrite(vform);
2529  for (int i = 0; i < lane_count; i++) {
2530    dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));
2531  }
2532  return dst;
2533}
2534
2535
2536LogicVRegister Simulator::sxtl2(VectorFormat vform,
2537                                LogicVRegister dst,
2538                                const LogicVRegister& src) {
2539  VectorFormat vform_half = VectorFormatHalfWidth(vform);
2540  int lane_count = LaneCountFromFormat(vform);
2541
2542  dst.ClearForWrite(vform);
2543  for (int i = 0; i < lane_count; i++) {
2544    dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));
2545  }
2546  return dst;
2547}
2548
2549
2550LogicVRegister Simulator::shrn(VectorFormat vform,
2551                               LogicVRegister dst,
2552                               const LogicVRegister& src,
2553                               int shift) {
2554  SimVRegister temp;
2555  VectorFormat vform_src = VectorFormatDoubleWidth(vform);
2556  VectorFormat vform_dst = vform;
2557  LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
2558  return extractnarrow(vform_dst, dst, false, shifted_src, false);
2559}
2560
2561
2562LogicVRegister Simulator::shrn2(VectorFormat vform,
2563                                LogicVRegister dst,
2564                                const LogicVRegister& src,
2565                                int shift) {
2566  SimVRegister temp;
2567  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2568  VectorFormat vformdst = vform;
2569  LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
2570  return extractnarrow(vformdst, dst, false, shifted_src, false);
2571}
2572
2573
2574LogicVRegister Simulator::rshrn(VectorFormat vform,
2575                                LogicVRegister dst,
2576                                const LogicVRegister& src,
2577                                int shift) {
2578  SimVRegister temp;
2579  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2580  VectorFormat vformdst = vform;
2581  LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2582  return extractnarrow(vformdst, dst, false, shifted_src, false);
2583}
2584
2585
2586LogicVRegister Simulator::rshrn2(VectorFormat vform,
2587                                 LogicVRegister dst,
2588                                 const LogicVRegister& src,
2589                                 int shift) {
2590  SimVRegister temp;
2591  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2592  VectorFormat vformdst = vform;
2593  LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2594  return extractnarrow(vformdst, dst, false, shifted_src, false);
2595}
2596
2597
2598LogicVRegister Simulator::Table(VectorFormat vform,
2599                                LogicVRegister dst,
2600                                const LogicVRegister& ind,
2601                                bool zero_out_of_bounds,
2602                                const LogicVRegister* tab1,
2603                                const LogicVRegister* tab2,
2604                                const LogicVRegister* tab3,
2605                                const LogicVRegister* tab4) {
2606  VIXL_ASSERT(tab1 != NULL);
2607  const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4};
2608  uint64_t result[kMaxLanesPerVector];
2609  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2610    result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i);
2611  }
2612  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2613    uint64_t j = ind.Uint(vform, i);
2614    int tab_idx = static_cast<int>(j >> 4);
2615    int j_idx = static_cast<int>(j & 15);
2616    if ((tab_idx < 4) && (tab[tab_idx] != NULL)) {
2617      result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx);
2618    }
2619  }
2620  dst.SetUintArray(vform, result);
2621  return dst;
2622}
2623
2624
2625LogicVRegister Simulator::tbl(VectorFormat vform,
2626                              LogicVRegister dst,
2627                              const LogicVRegister& tab,
2628                              const LogicVRegister& ind) {
2629  return Table(vform, dst, ind, true, &tab);
2630}
2631
2632
2633LogicVRegister Simulator::tbl(VectorFormat vform,
2634                              LogicVRegister dst,
2635                              const LogicVRegister& tab,
2636                              const LogicVRegister& tab2,
2637                              const LogicVRegister& ind) {
2638  return Table(vform, dst, ind, true, &tab, &tab2);
2639}
2640
2641
2642LogicVRegister Simulator::tbl(VectorFormat vform,
2643                              LogicVRegister dst,
2644                              const LogicVRegister& tab,
2645                              const LogicVRegister& tab2,
2646                              const LogicVRegister& tab3,
2647                              const LogicVRegister& ind) {
2648  return Table(vform, dst, ind, true, &tab, &tab2, &tab3);
2649}
2650
2651
2652LogicVRegister Simulator::tbl(VectorFormat vform,
2653                              LogicVRegister dst,
2654                              const LogicVRegister& tab,
2655                              const LogicVRegister& tab2,
2656                              const LogicVRegister& tab3,
2657                              const LogicVRegister& tab4,
2658                              const LogicVRegister& ind) {
2659  return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);
2660}
2661
2662
2663LogicVRegister Simulator::tbx(VectorFormat vform,
2664                              LogicVRegister dst,
2665                              const LogicVRegister& tab,
2666                              const LogicVRegister& ind) {
2667  return Table(vform, dst, ind, false, &tab);
2668}
2669
2670
2671LogicVRegister Simulator::tbx(VectorFormat vform,
2672                              LogicVRegister dst,
2673                              const LogicVRegister& tab,
2674                              const LogicVRegister& tab2,
2675                              const LogicVRegister& ind) {
2676  return Table(vform, dst, ind, false, &tab, &tab2);
2677}
2678
2679
2680LogicVRegister Simulator::tbx(VectorFormat vform,
2681                              LogicVRegister dst,
2682                              const LogicVRegister& tab,
2683                              const LogicVRegister& tab2,
2684                              const LogicVRegister& tab3,
2685                              const LogicVRegister& ind) {
2686  return Table(vform, dst, ind, false, &tab, &tab2, &tab3);
2687}
2688
2689
2690LogicVRegister Simulator::tbx(VectorFormat vform,
2691                              LogicVRegister dst,
2692                              const LogicVRegister& tab,
2693                              const LogicVRegister& tab2,
2694                              const LogicVRegister& tab3,
2695                              const LogicVRegister& tab4,
2696                              const LogicVRegister& ind) {
2697  return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);
2698}
2699
2700
2701LogicVRegister Simulator::uqshrn(VectorFormat vform,
2702                                 LogicVRegister dst,
2703                                 const LogicVRegister& src,
2704                                 int shift) {
2705  return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
2706}
2707
2708
2709LogicVRegister Simulator::uqshrn2(VectorFormat vform,
2710                                  LogicVRegister dst,
2711                                  const LogicVRegister& src,
2712                                  int shift) {
2713  return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2714}
2715
2716
2717LogicVRegister Simulator::uqrshrn(VectorFormat vform,
2718                                  LogicVRegister dst,
2719                                  const LogicVRegister& src,
2720                                  int shift) {
2721  return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
2722}
2723
2724
2725LogicVRegister Simulator::uqrshrn2(VectorFormat vform,
2726                                   LogicVRegister dst,
2727                                   const LogicVRegister& src,
2728                                   int shift) {
2729  return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2730}
2731
2732
2733LogicVRegister Simulator::sqshrn(VectorFormat vform,
2734                                 LogicVRegister dst,
2735                                 const LogicVRegister& src,
2736                                 int shift) {
2737  SimVRegister temp;
2738  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2739  VectorFormat vformdst = vform;
2740  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2741  return sqxtn(vformdst, dst, shifted_src);
2742}
2743
2744
2745LogicVRegister Simulator::sqshrn2(VectorFormat vform,
2746                                  LogicVRegister dst,
2747                                  const LogicVRegister& src,
2748                                  int shift) {
2749  SimVRegister temp;
2750  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2751  VectorFormat vformdst = vform;
2752  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2753  return sqxtn(vformdst, dst, shifted_src);
2754}
2755
2756
2757LogicVRegister Simulator::sqrshrn(VectorFormat vform,
2758                                  LogicVRegister dst,
2759                                  const LogicVRegister& src,
2760                                  int shift) {
2761  SimVRegister temp;
2762  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2763  VectorFormat vformdst = vform;
2764  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2765  return sqxtn(vformdst, dst, shifted_src);
2766}
2767
2768
2769LogicVRegister Simulator::sqrshrn2(VectorFormat vform,
2770                                   LogicVRegister dst,
2771                                   const LogicVRegister& src,
2772                                   int shift) {
2773  SimVRegister temp;
2774  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2775  VectorFormat vformdst = vform;
2776  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2777  return sqxtn(vformdst, dst, shifted_src);
2778}
2779
2780
2781LogicVRegister Simulator::sqshrun(VectorFormat vform,
2782                                  LogicVRegister dst,
2783                                  const LogicVRegister& src,
2784                                  int shift) {
2785  SimVRegister temp;
2786  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2787  VectorFormat vformdst = vform;
2788  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2789  return sqxtun(vformdst, dst, shifted_src);
2790}
2791
2792
2793LogicVRegister Simulator::sqshrun2(VectorFormat vform,
2794                                   LogicVRegister dst,
2795                                   const LogicVRegister& src,
2796                                   int shift) {
2797  SimVRegister temp;
2798  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2799  VectorFormat vformdst = vform;
2800  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2801  return sqxtun(vformdst, dst, shifted_src);
2802}
2803
2804
2805LogicVRegister Simulator::sqrshrun(VectorFormat vform,
2806                                   LogicVRegister dst,
2807                                   const LogicVRegister& src,
2808                                   int shift) {
2809  SimVRegister temp;
2810  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2811  VectorFormat vformdst = vform;
2812  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2813  return sqxtun(vformdst, dst, shifted_src);
2814}
2815
2816
2817LogicVRegister Simulator::sqrshrun2(VectorFormat vform,
2818                                    LogicVRegister dst,
2819                                    const LogicVRegister& src,
2820                                    int shift) {
2821  SimVRegister temp;
2822  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2823  VectorFormat vformdst = vform;
2824  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2825  return sqxtun(vformdst, dst, shifted_src);
2826}
2827
2828
2829LogicVRegister Simulator::uaddl(VectorFormat vform,
2830                                LogicVRegister dst,
2831                                const LogicVRegister& src1,
2832                                const LogicVRegister& src2) {
2833  SimVRegister temp1, temp2;
2834  uxtl(vform, temp1, src1);
2835  uxtl(vform, temp2, src2);
2836  add(vform, dst, temp1, temp2);
2837  return dst;
2838}
2839
2840
2841LogicVRegister Simulator::uaddl2(VectorFormat vform,
2842                                 LogicVRegister dst,
2843                                 const LogicVRegister& src1,
2844                                 const LogicVRegister& src2) {
2845  SimVRegister temp1, temp2;
2846  uxtl2(vform, temp1, src1);
2847  uxtl2(vform, temp2, src2);
2848  add(vform, dst, temp1, temp2);
2849  return dst;
2850}
2851
2852
2853LogicVRegister Simulator::uaddw(VectorFormat vform,
2854                                LogicVRegister dst,
2855                                const LogicVRegister& src1,
2856                                const LogicVRegister& src2) {
2857  SimVRegister temp;
2858  uxtl(vform, temp, src2);
2859  add(vform, dst, src1, temp);
2860  return dst;
2861}
2862
2863
2864LogicVRegister Simulator::uaddw2(VectorFormat vform,
2865                                 LogicVRegister dst,
2866                                 const LogicVRegister& src1,
2867                                 const LogicVRegister& src2) {
2868  SimVRegister temp;
2869  uxtl2(vform, temp, src2);
2870  add(vform, dst, src1, temp);
2871  return dst;
2872}
2873
2874
2875LogicVRegister Simulator::saddl(VectorFormat vform,
2876                                LogicVRegister dst,
2877                                const LogicVRegister& src1,
2878                                const LogicVRegister& src2) {
2879  SimVRegister temp1, temp2;
2880  sxtl(vform, temp1, src1);
2881  sxtl(vform, temp2, src2);
2882  add(vform, dst, temp1, temp2);
2883  return dst;
2884}
2885
2886
2887LogicVRegister Simulator::saddl2(VectorFormat vform,
2888                                 LogicVRegister dst,
2889                                 const LogicVRegister& src1,
2890                                 const LogicVRegister& src2) {
2891  SimVRegister temp1, temp2;
2892  sxtl2(vform, temp1, src1);
2893  sxtl2(vform, temp2, src2);
2894  add(vform, dst, temp1, temp2);
2895  return dst;
2896}
2897
2898
2899LogicVRegister Simulator::saddw(VectorFormat vform,
2900                                LogicVRegister dst,
2901                                const LogicVRegister& src1,
2902                                const LogicVRegister& src2) {
2903  SimVRegister temp;
2904  sxtl(vform, temp, src2);
2905  add(vform, dst, src1, temp);
2906  return dst;
2907}
2908
2909
2910LogicVRegister Simulator::saddw2(VectorFormat vform,
2911                                 LogicVRegister dst,
2912                                 const LogicVRegister& src1,
2913                                 const LogicVRegister& src2) {
2914  SimVRegister temp;
2915  sxtl2(vform, temp, src2);
2916  add(vform, dst, src1, temp);
2917  return dst;
2918}
2919
2920
2921LogicVRegister Simulator::usubl(VectorFormat vform,
2922                                LogicVRegister dst,
2923                                const LogicVRegister& src1,
2924                                const LogicVRegister& src2) {
2925  SimVRegister temp1, temp2;
2926  uxtl(vform, temp1, src1);
2927  uxtl(vform, temp2, src2);
2928  sub(vform, dst, temp1, temp2);
2929  return dst;
2930}
2931
2932
2933LogicVRegister Simulator::usubl2(VectorFormat vform,
2934                                 LogicVRegister dst,
2935                                 const LogicVRegister& src1,
2936                                 const LogicVRegister& src2) {
2937  SimVRegister temp1, temp2;
2938  uxtl2(vform, temp1, src1);
2939  uxtl2(vform, temp2, src2);
2940  sub(vform, dst, temp1, temp2);
2941  return dst;
2942}
2943
2944
2945LogicVRegister Simulator::usubw(VectorFormat vform,
2946                                LogicVRegister dst,
2947                                const LogicVRegister& src1,
2948                                const LogicVRegister& src2) {
2949  SimVRegister temp;
2950  uxtl(vform, temp, src2);
2951  sub(vform, dst, src1, temp);
2952  return dst;
2953}
2954
2955
2956LogicVRegister Simulator::usubw2(VectorFormat vform,
2957                                 LogicVRegister dst,
2958                                 const LogicVRegister& src1,
2959                                 const LogicVRegister& src2) {
2960  SimVRegister temp;
2961  uxtl2(vform, temp, src2);
2962  sub(vform, dst, src1, temp);
2963  return dst;
2964}
2965
2966
2967LogicVRegister Simulator::ssubl(VectorFormat vform,
2968                                LogicVRegister dst,
2969                                const LogicVRegister& src1,
2970                                const LogicVRegister& src2) {
2971  SimVRegister temp1, temp2;
2972  sxtl(vform, temp1, src1);
2973  sxtl(vform, temp2, src2);
2974  sub(vform, dst, temp1, temp2);
2975  return dst;
2976}
2977
2978
2979LogicVRegister Simulator::ssubl2(VectorFormat vform,
2980                                 LogicVRegister dst,
2981                                 const LogicVRegister& src1,
2982                                 const LogicVRegister& src2) {
2983  SimVRegister temp1, temp2;
2984  sxtl2(vform, temp1, src1);
2985  sxtl2(vform, temp2, src2);
2986  sub(vform, dst, temp1, temp2);
2987  return dst;
2988}
2989
2990
2991LogicVRegister Simulator::ssubw(VectorFormat vform,
2992                                LogicVRegister dst,
2993                                const LogicVRegister& src1,
2994                                const LogicVRegister& src2) {
2995  SimVRegister temp;
2996  sxtl(vform, temp, src2);
2997  sub(vform, dst, src1, temp);
2998  return dst;
2999}
3000
3001
3002LogicVRegister Simulator::ssubw2(VectorFormat vform,
3003                                 LogicVRegister dst,
3004                                 const LogicVRegister& src1,
3005                                 const LogicVRegister& src2) {
3006  SimVRegister temp;
3007  sxtl2(vform, temp, src2);
3008  sub(vform, dst, src1, temp);
3009  return dst;
3010}
3011
3012
3013LogicVRegister Simulator::uabal(VectorFormat vform,
3014                                LogicVRegister dst,
3015                                const LogicVRegister& src1,
3016                                const LogicVRegister& src2) {
3017  SimVRegister temp1, temp2;
3018  uxtl(vform, temp1, src1);
3019  uxtl(vform, temp2, src2);
3020  uaba(vform, dst, temp1, temp2);
3021  return dst;
3022}
3023
3024
3025LogicVRegister Simulator::uabal2(VectorFormat vform,
3026                                 LogicVRegister dst,
3027                                 const LogicVRegister& src1,
3028                                 const LogicVRegister& src2) {
3029  SimVRegister temp1, temp2;
3030  uxtl2(vform, temp1, src1);
3031  uxtl2(vform, temp2, src2);
3032  uaba(vform, dst, temp1, temp2);
3033  return dst;
3034}
3035
3036
3037LogicVRegister Simulator::sabal(VectorFormat vform,
3038                                LogicVRegister dst,
3039                                const LogicVRegister& src1,
3040                                const LogicVRegister& src2) {
3041  SimVRegister temp1, temp2;
3042  sxtl(vform, temp1, src1);
3043  sxtl(vform, temp2, src2);
3044  saba(vform, dst, temp1, temp2);
3045  return dst;
3046}
3047
3048
3049LogicVRegister Simulator::sabal2(VectorFormat vform,
3050                                 LogicVRegister dst,
3051                                 const LogicVRegister& src1,
3052                                 const LogicVRegister& src2) {
3053  SimVRegister temp1, temp2;
3054  sxtl2(vform, temp1, src1);
3055  sxtl2(vform, temp2, src2);
3056  saba(vform, dst, temp1, temp2);
3057  return dst;
3058}
3059
3060
3061LogicVRegister Simulator::uabdl(VectorFormat vform,
3062                                LogicVRegister dst,
3063                                const LogicVRegister& src1,
3064                                const LogicVRegister& src2) {
3065  SimVRegister temp1, temp2;
3066  uxtl(vform, temp1, src1);
3067  uxtl(vform, temp2, src2);
3068  absdiff(vform, dst, temp1, temp2, false);
3069  return dst;
3070}
3071
3072
3073LogicVRegister Simulator::uabdl2(VectorFormat vform,
3074                                 LogicVRegister dst,
3075                                 const LogicVRegister& src1,
3076                                 const LogicVRegister& src2) {
3077  SimVRegister temp1, temp2;
3078  uxtl2(vform, temp1, src1);
3079  uxtl2(vform, temp2, src2);
3080  absdiff(vform, dst, temp1, temp2, false);
3081  return dst;
3082}
3083
3084
3085LogicVRegister Simulator::sabdl(VectorFormat vform,
3086                                LogicVRegister dst,
3087                                const LogicVRegister& src1,
3088                                const LogicVRegister& src2) {
3089  SimVRegister temp1, temp2;
3090  sxtl(vform, temp1, src1);
3091  sxtl(vform, temp2, src2);
3092  absdiff(vform, dst, temp1, temp2, true);
3093  return dst;
3094}
3095
3096
3097LogicVRegister Simulator::sabdl2(VectorFormat vform,
3098                                 LogicVRegister dst,
3099                                 const LogicVRegister& src1,
3100                                 const LogicVRegister& src2) {
3101  SimVRegister temp1, temp2;
3102  sxtl2(vform, temp1, src1);
3103  sxtl2(vform, temp2, src2);
3104  absdiff(vform, dst, temp1, temp2, true);
3105  return dst;
3106}
3107
3108
3109LogicVRegister Simulator::umull(VectorFormat vform,
3110                                LogicVRegister dst,
3111                                const LogicVRegister& src1,
3112                                const LogicVRegister& src2) {
3113  SimVRegister temp1, temp2;
3114  uxtl(vform, temp1, src1);
3115  uxtl(vform, temp2, src2);
3116  mul(vform, dst, temp1, temp2);
3117  return dst;
3118}
3119
3120
3121LogicVRegister Simulator::umull2(VectorFormat vform,
3122                                 LogicVRegister dst,
3123                                 const LogicVRegister& src1,
3124                                 const LogicVRegister& src2) {
3125  SimVRegister temp1, temp2;
3126  uxtl2(vform, temp1, src1);
3127  uxtl2(vform, temp2, src2);
3128  mul(vform, dst, temp1, temp2);
3129  return dst;
3130}
3131
3132
3133LogicVRegister Simulator::smull(VectorFormat vform,
3134                                LogicVRegister dst,
3135                                const LogicVRegister& src1,
3136                                const LogicVRegister& src2) {
3137  SimVRegister temp1, temp2;
3138  sxtl(vform, temp1, src1);
3139  sxtl(vform, temp2, src2);
3140  mul(vform, dst, temp1, temp2);
3141  return dst;
3142}
3143
3144
3145LogicVRegister Simulator::smull2(VectorFormat vform,
3146                                 LogicVRegister dst,
3147                                 const LogicVRegister& src1,
3148                                 const LogicVRegister& src2) {
3149  SimVRegister temp1, temp2;
3150  sxtl2(vform, temp1, src1);
3151  sxtl2(vform, temp2, src2);
3152  mul(vform, dst, temp1, temp2);
3153  return dst;
3154}
3155
3156
3157LogicVRegister Simulator::umlsl(VectorFormat vform,
3158                                LogicVRegister dst,
3159                                const LogicVRegister& src1,
3160                                const LogicVRegister& src2) {
3161  SimVRegister temp1, temp2;
3162  uxtl(vform, temp1, src1);
3163  uxtl(vform, temp2, src2);
3164  mls(vform, dst, temp1, temp2);
3165  return dst;
3166}
3167
3168
3169LogicVRegister Simulator::umlsl2(VectorFormat vform,
3170                                 LogicVRegister dst,
3171                                 const LogicVRegister& src1,
3172                                 const LogicVRegister& src2) {
3173  SimVRegister temp1, temp2;
3174  uxtl2(vform, temp1, src1);
3175  uxtl2(vform, temp2, src2);
3176  mls(vform, dst, temp1, temp2);
3177  return dst;
3178}
3179
3180
3181LogicVRegister Simulator::smlsl(VectorFormat vform,
3182                                LogicVRegister dst,
3183                                const LogicVRegister& src1,
3184                                const LogicVRegister& src2) {
3185  SimVRegister temp1, temp2;
3186  sxtl(vform, temp1, src1);
3187  sxtl(vform, temp2, src2);
3188  mls(vform, dst, temp1, temp2);
3189  return dst;
3190}
3191
3192
3193LogicVRegister Simulator::smlsl2(VectorFormat vform,
3194                                 LogicVRegister dst,
3195                                 const LogicVRegister& src1,
3196                                 const LogicVRegister& src2) {
3197  SimVRegister temp1, temp2;
3198  sxtl2(vform, temp1, src1);
3199  sxtl2(vform, temp2, src2);
3200  mls(vform, dst, temp1, temp2);
3201  return dst;
3202}
3203
3204
3205LogicVRegister Simulator::umlal(VectorFormat vform,
3206                                LogicVRegister dst,
3207                                const LogicVRegister& src1,
3208                                const LogicVRegister& src2) {
3209  SimVRegister temp1, temp2;
3210  uxtl(vform, temp1, src1);
3211  uxtl(vform, temp2, src2);
3212  mla(vform, dst, temp1, temp2);
3213  return dst;
3214}
3215
3216
3217LogicVRegister Simulator::umlal2(VectorFormat vform,
3218                                 LogicVRegister dst,
3219                                 const LogicVRegister& src1,
3220                                 const LogicVRegister& src2) {
3221  SimVRegister temp1, temp2;
3222  uxtl2(vform, temp1, src1);
3223  uxtl2(vform, temp2, src2);
3224  mla(vform, dst, temp1, temp2);
3225  return dst;
3226}
3227
3228
3229LogicVRegister Simulator::smlal(VectorFormat vform,
3230                                LogicVRegister dst,
3231                                const LogicVRegister& src1,
3232                                const LogicVRegister& src2) {
3233  SimVRegister temp1, temp2;
3234  sxtl(vform, temp1, src1);
3235  sxtl(vform, temp2, src2);
3236  mla(vform, dst, temp1, temp2);
3237  return dst;
3238}
3239
3240
3241LogicVRegister Simulator::smlal2(VectorFormat vform,
3242                                 LogicVRegister dst,
3243                                 const LogicVRegister& src1,
3244                                 const LogicVRegister& src2) {
3245  SimVRegister temp1, temp2;
3246  sxtl2(vform, temp1, src1);
3247  sxtl2(vform, temp2, src2);
3248  mla(vform, dst, temp1, temp2);
3249  return dst;
3250}
3251
3252
3253LogicVRegister Simulator::sqdmlal(VectorFormat vform,
3254                                  LogicVRegister dst,
3255                                  const LogicVRegister& src1,
3256                                  const LogicVRegister& src2) {
3257  SimVRegister temp;
3258  LogicVRegister product = sqdmull(vform, temp, src1, src2);
3259  return add(vform, dst, dst, product).SignedSaturate(vform);
3260}
3261
3262
3263LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
3264                                   LogicVRegister dst,
3265                                   const LogicVRegister& src1,
3266                                   const LogicVRegister& src2) {
3267  SimVRegister temp;
3268  LogicVRegister product = sqdmull2(vform, temp, src1, src2);
3269  return add(vform, dst, dst, product).SignedSaturate(vform);
3270}
3271
3272
3273LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
3274                                  LogicVRegister dst,
3275                                  const LogicVRegister& src1,
3276                                  const LogicVRegister& src2) {
3277  SimVRegister temp;
3278  LogicVRegister product = sqdmull(vform, temp, src1, src2);
3279  return sub(vform, dst, dst, product).SignedSaturate(vform);
3280}
3281
3282
3283LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
3284                                   LogicVRegister dst,
3285                                   const LogicVRegister& src1,
3286                                   const LogicVRegister& src2) {
3287  SimVRegister temp;
3288  LogicVRegister product = sqdmull2(vform, temp, src1, src2);
3289  return sub(vform, dst, dst, product).SignedSaturate(vform);
3290}
3291
3292
3293LogicVRegister Simulator::sqdmull(VectorFormat vform,
3294                                  LogicVRegister dst,
3295                                  const LogicVRegister& src1,
3296                                  const LogicVRegister& src2) {
3297  SimVRegister temp;
3298  LogicVRegister product = smull(vform, temp, src1, src2);
3299  return add(vform, dst, product, product).SignedSaturate(vform);
3300}
3301
3302
3303LogicVRegister Simulator::sqdmull2(VectorFormat vform,
3304                                   LogicVRegister dst,
3305                                   const LogicVRegister& src1,
3306                                   const LogicVRegister& src2) {
3307  SimVRegister temp;
3308  LogicVRegister product = smull2(vform, temp, src1, src2);
3309  return add(vform, dst, product, product).SignedSaturate(vform);
3310}
3311
3312
3313LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
3314                                   LogicVRegister dst,
3315                                   const LogicVRegister& src1,
3316                                   const LogicVRegister& src2,
3317                                   bool round) {
3318  // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
3319  // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
3320  // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.
3321
3322  int esize = LaneSizeInBitsFromFormat(vform);
3323  int round_const = round ? (1 << (esize - 2)) : 0;
3324  int64_t product;
3325
3326  dst.ClearForWrite(vform);
3327  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3328    product = src1.Int(vform, i) * src2.Int(vform, i);
3329    product += round_const;
3330    product = product >> (esize - 1);
3331
3332    if (product > MaxIntFromFormat(vform)) {
3333      product = MaxIntFromFormat(vform);
3334    } else if (product < MinIntFromFormat(vform)) {
3335      product = MinIntFromFormat(vform);
3336    }
3337    dst.SetInt(vform, i, product);
3338  }
3339  return dst;
3340}
3341
3342
3343LogicVRegister Simulator::sqdmulh(VectorFormat vform,
3344                                  LogicVRegister dst,
3345                                  const LogicVRegister& src1,
3346                                  const LogicVRegister& src2) {
3347  return sqrdmulh(vform, dst, src1, src2, false);
3348}
3349
3350
3351LogicVRegister Simulator::addhn(VectorFormat vform,
3352                                LogicVRegister dst,
3353                                const LogicVRegister& src1,
3354                                const LogicVRegister& src2) {
3355  SimVRegister temp;
3356  add(VectorFormatDoubleWidth(vform), temp, src1, src2);
3357  shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3358  return dst;
3359}
3360
3361
3362LogicVRegister Simulator::addhn2(VectorFormat vform,
3363                                 LogicVRegister dst,
3364                                 const LogicVRegister& src1,
3365                                 const LogicVRegister& src2) {
3366  SimVRegister temp;
3367  add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3368  shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3369  return dst;
3370}
3371
3372
3373LogicVRegister Simulator::raddhn(VectorFormat vform,
3374                                 LogicVRegister dst,
3375                                 const LogicVRegister& src1,
3376                                 const LogicVRegister& src2) {
3377  SimVRegister temp;
3378  add(VectorFormatDoubleWidth(vform), temp, src1, src2);
3379  rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3380  return dst;
3381}
3382
3383
3384LogicVRegister Simulator::raddhn2(VectorFormat vform,
3385                                  LogicVRegister dst,
3386                                  const LogicVRegister& src1,
3387                                  const LogicVRegister& src2) {
3388  SimVRegister temp;
3389  add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3390  rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3391  return dst;
3392}
3393
3394
3395LogicVRegister Simulator::subhn(VectorFormat vform,
3396                                LogicVRegister dst,
3397                                const LogicVRegister& src1,
3398                                const LogicVRegister& src2) {
3399  SimVRegister temp;
3400  sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
3401  shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3402  return dst;
3403}
3404
3405
3406LogicVRegister Simulator::subhn2(VectorFormat vform,
3407                                 LogicVRegister dst,
3408                                 const LogicVRegister& src1,
3409                                 const LogicVRegister& src2) {
3410  SimVRegister temp;
3411  sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3412  shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3413  return dst;
3414}
3415
3416
3417LogicVRegister Simulator::rsubhn(VectorFormat vform,
3418                                 LogicVRegister dst,
3419                                 const LogicVRegister& src1,
3420                                 const LogicVRegister& src2) {
3421  SimVRegister temp;
3422  sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
3423  rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3424  return dst;
3425}
3426
3427
3428LogicVRegister Simulator::rsubhn2(VectorFormat vform,
3429                                  LogicVRegister dst,
3430                                  const LogicVRegister& src1,
3431                                  const LogicVRegister& src2) {
3432  SimVRegister temp;
3433  sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3434  rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3435  return dst;
3436}
3437
3438
3439LogicVRegister Simulator::trn1(VectorFormat vform,
3440                               LogicVRegister dst,
3441                               const LogicVRegister& src1,
3442                               const LogicVRegister& src2) {
3443  uint64_t result[16];
3444  int laneCount = LaneCountFromFormat(vform);
3445  int pairs = laneCount / 2;
3446  for (int i = 0; i < pairs; ++i) {
3447    result[2 * i] = src1.Uint(vform, 2 * i);
3448    result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
3449  }
3450
3451  dst.ClearForWrite(vform);
3452  for (int i = 0; i < laneCount; ++i) {
3453    dst.SetUint(vform, i, result[i]);
3454  }
3455  return dst;
3456}
3457
3458
3459LogicVRegister Simulator::trn2(VectorFormat vform,
3460                               LogicVRegister dst,
3461                               const LogicVRegister& src1,
3462                               const LogicVRegister& src2) {
3463  uint64_t result[16];
3464  int laneCount = LaneCountFromFormat(vform);
3465  int pairs = laneCount / 2;
3466  for (int i = 0; i < pairs; ++i) {
3467    result[2 * i] = src1.Uint(vform, (2 * i) + 1);
3468    result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
3469  }
3470
3471  dst.ClearForWrite(vform);
3472  for (int i = 0; i < laneCount; ++i) {
3473    dst.SetUint(vform, i, result[i]);
3474  }
3475  return dst;
3476}
3477
3478
3479LogicVRegister Simulator::zip1(VectorFormat vform,
3480                               LogicVRegister dst,
3481                               const LogicVRegister& src1,
3482                               const LogicVRegister& src2) {
3483  uint64_t result[16];
3484  int laneCount = LaneCountFromFormat(vform);
3485  int pairs = laneCount / 2;
3486  for (int i = 0; i < pairs; ++i) {
3487    result[2 * i] = src1.Uint(vform, i);
3488    result[(2 * i) + 1] = src2.Uint(vform, i);
3489  }
3490
3491  dst.ClearForWrite(vform);
3492  for (int i = 0; i < laneCount; ++i) {
3493    dst.SetUint(vform, i, result[i]);
3494  }
3495  return dst;
3496}
3497
3498
3499LogicVRegister Simulator::zip2(VectorFormat vform,
3500                               LogicVRegister dst,
3501                               const LogicVRegister& src1,
3502                               const LogicVRegister& src2) {
3503  uint64_t result[16];
3504  int laneCount = LaneCountFromFormat(vform);
3505  int pairs = laneCount / 2;
3506  for (int i = 0; i < pairs; ++i) {
3507    result[2 * i] = src1.Uint(vform, pairs + i);
3508    result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
3509  }
3510
3511  dst.ClearForWrite(vform);
3512  for (int i = 0; i < laneCount; ++i) {
3513    dst.SetUint(vform, i, result[i]);
3514  }
3515  return dst;
3516}
3517
3518
3519LogicVRegister Simulator::uzp1(VectorFormat vform,
3520                               LogicVRegister dst,
3521                               const LogicVRegister& src1,
3522                               const LogicVRegister& src2) {
3523  uint64_t result[32];
3524  int laneCount = LaneCountFromFormat(vform);
3525  for (int i = 0; i < laneCount; ++i) {
3526    result[i] = src1.Uint(vform, i);
3527    result[laneCount + i] = src2.Uint(vform, i);
3528  }
3529
3530  dst.ClearForWrite(vform);
3531  for (int i = 0; i < laneCount; ++i) {
3532    dst.SetUint(vform, i, result[2 * i]);
3533  }
3534  return dst;
3535}
3536
3537
3538LogicVRegister Simulator::uzp2(VectorFormat vform,
3539                               LogicVRegister dst,
3540                               const LogicVRegister& src1,
3541                               const LogicVRegister& src2) {
3542  uint64_t result[32];
3543  int laneCount = LaneCountFromFormat(vform);
3544  for (int i = 0; i < laneCount; ++i) {
3545    result[i] = src1.Uint(vform, i);
3546    result[laneCount + i] = src2.Uint(vform, i);
3547  }
3548
3549  dst.ClearForWrite(vform);
3550  for (int i = 0; i < laneCount; ++i) {
3551    dst.SetUint(vform, i, result[(2 * i) + 1]);
3552  }
3553  return dst;
3554}
3555
3556
3557template <typename T>
3558T Simulator::FPAdd(T op1, T op2) {
3559  T result = FPProcessNaNs(op1, op2);
3560  if (std::isnan(result)) return result;
3561
3562  if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) {
3563    // inf + -inf returns the default NaN.
3564    FPProcessException();
3565    return FPDefaultNaN<T>();
3566  } else {
3567    // Other cases should be handled by standard arithmetic.
3568    return op1 + op2;
3569  }
3570}
3571
3572
3573template <typename T>
3574T Simulator::FPSub(T op1, T op2) {
3575  // NaNs should be handled elsewhere.
3576  VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
3577
3578  if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) {
3579    // inf - inf returns the default NaN.
3580    FPProcessException();
3581    return FPDefaultNaN<T>();
3582  } else {
3583    // Other cases should be handled by standard arithmetic.
3584    return op1 - op2;
3585  }
3586}
3587
3588
3589template <typename T>
3590T Simulator::FPMul(T op1, T op2) {
3591  // NaNs should be handled elsewhere.
3592  VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
3593
3594  if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
3595    // inf * 0.0 returns the default NaN.
3596    FPProcessException();
3597    return FPDefaultNaN<T>();
3598  } else {
3599    // Other cases should be handled by standard arithmetic.
3600    return op1 * op2;
3601  }
3602}
3603
3604
3605template <typename T>
3606T Simulator::FPMulx(T op1, T op2) {
3607  if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
3608    // inf * 0.0 returns +/-2.0.
3609    T two = 2.0;
3610    return copysign(1.0, op1) * copysign(1.0, op2) * two;
3611  }
3612  return FPMul(op1, op2);
3613}
3614
3615
3616template <typename T>
3617T Simulator::FPMulAdd(T a, T op1, T op2) {
3618  T result = FPProcessNaNs3(a, op1, op2);
3619
3620  T sign_a = copysign(1.0, a);
3621  T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
3622  bool isinf_prod = std::isinf(op1) || std::isinf(op2);
3623  bool operation_generates_nan =
3624      (std::isinf(op1) && (op2 == 0.0)) ||                     // inf * 0.0
3625      (std::isinf(op2) && (op1 == 0.0)) ||                     // 0.0 * inf
3626      (std::isinf(a) && isinf_prod && (sign_a != sign_prod));  // inf - inf
3627
3628  if (std::isnan(result)) {
3629    // Generated NaNs override quiet NaNs propagated from a.
3630    if (operation_generates_nan && IsQuietNaN(a)) {
3631      FPProcessException();
3632      return FPDefaultNaN<T>();
3633    } else {
3634      return result;
3635    }
3636  }
3637
3638  // If the operation would produce a NaN, return the default NaN.
3639  if (operation_generates_nan) {
3640    FPProcessException();
3641    return FPDefaultNaN<T>();
3642  }
3643
3644  // Work around broken fma implementations for exact zero results: The sign of
3645  // exact 0.0 results is positive unless both a and op1 * op2 are negative.
3646  if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
3647    return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0;
3648  }
3649
3650  result = FusedMultiplyAdd(op1, op2, a);
3651  VIXL_ASSERT(!std::isnan(result));
3652
3653  // Work around broken fma implementations for rounded zero results: If a is
3654  // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
3655  if ((a == 0.0) && (result == 0.0)) {
3656    return copysign(0.0, sign_prod);
3657  }
3658
3659  return result;
3660}
3661
3662
3663template <typename T>
3664T Simulator::FPDiv(T op1, T op2) {
3665  // NaNs should be handled elsewhere.
3666  VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
3667
3668  if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
3669    // inf / inf and 0.0 / 0.0 return the default NaN.
3670    FPProcessException();
3671    return FPDefaultNaN<T>();
3672  } else {
3673    if (op2 == 0.0) FPProcessException();
3674
3675    // Other cases should be handled by standard arithmetic.
3676    return op1 / op2;
3677  }
3678}
3679
3680
3681template <typename T>
3682T Simulator::FPSqrt(T op) {
3683  if (std::isnan(op)) {
3684    return FPProcessNaN(op);
3685  } else if (op < 0.0) {
3686    FPProcessException();
3687    return FPDefaultNaN<T>();
3688  } else {
3689    return sqrt(op);
3690  }
3691}
3692
3693
3694template <typename T>
3695T Simulator::FPMax(T a, T b) {
3696  T result = FPProcessNaNs(a, b);
3697  if (std::isnan(result)) return result;
3698
3699  if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
3700    // a and b are zero, and the sign differs: return +0.0.
3701    return 0.0;
3702  } else {
3703    return (a > b) ? a : b;
3704  }
3705}
3706
3707
3708template <typename T>
3709T Simulator::FPMaxNM(T a, T b) {
3710  if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3711    a = kFP64NegativeInfinity;
3712  } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3713    b = kFP64NegativeInfinity;
3714  }
3715
3716  T result = FPProcessNaNs(a, b);
3717  return std::isnan(result) ? result : FPMax(a, b);
3718}
3719
3720
3721template <typename T>
3722T Simulator::FPMin(T a, T b) {
3723  T result = FPProcessNaNs(a, b);
3724  if (std::isnan(result)) return result;
3725
3726  if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
3727    // a and b are zero, and the sign differs: return -0.0.
3728    return -0.0;
3729  } else {
3730    return (a < b) ? a : b;
3731  }
3732}
3733
3734
3735template <typename T>
3736T Simulator::FPMinNM(T a, T b) {
3737  if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3738    a = kFP64PositiveInfinity;
3739  } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3740    b = kFP64PositiveInfinity;
3741  }
3742
3743  T result = FPProcessNaNs(a, b);
3744  return std::isnan(result) ? result : FPMin(a, b);
3745}
3746
3747
3748template <typename T>
3749T Simulator::FPRecipStepFused(T op1, T op2) {
3750  const T two = 2.0;
3751  if ((std::isinf(op1) && (op2 == 0.0)) ||
3752      ((op1 == 0.0) && (std::isinf(op2)))) {
3753    return two;
3754  } else if (std::isinf(op1) || std::isinf(op2)) {
3755    // Return +inf if signs match, otherwise -inf.
3756    return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3757                                          : kFP64NegativeInfinity;
3758  } else {
3759    return FusedMultiplyAdd(op1, op2, two);
3760  }
3761}
3762
3763
3764template <typename T>
3765T Simulator::FPRSqrtStepFused(T op1, T op2) {
3766  const T one_point_five = 1.5;
3767  const T two = 2.0;
3768
3769  if ((std::isinf(op1) && (op2 == 0.0)) ||
3770      ((op1 == 0.0) && (std::isinf(op2)))) {
3771    return one_point_five;
3772  } else if (std::isinf(op1) || std::isinf(op2)) {
3773    // Return +inf if signs match, otherwise -inf.
3774    return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3775                                          : kFP64NegativeInfinity;
3776  } else {
3777    // The multiply-add-halve operation must be fully fused, so avoid interim
3778    // rounding by checking which operand can be losslessly divided by two
3779    // before doing the multiply-add.
3780    if (std::isnormal(op1 / two)) {
3781      return FusedMultiplyAdd(op1 / two, op2, one_point_five);
3782    } else if (std::isnormal(op2 / two)) {
3783      return FusedMultiplyAdd(op1, op2 / two, one_point_five);
3784    } else {
3785      // Neither operand is normal after halving: the result is dominated by
3786      // the addition term, so just return that.
3787      return one_point_five;
3788    }
3789  }
3790}
3791
3792
3793double Simulator::FPRoundInt(double value, FPRounding round_mode) {
3794  if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
3795      (value == kFP64NegativeInfinity)) {
3796    return value;
3797  } else if (std::isnan(value)) {
3798    return FPProcessNaN(value);
3799  }
3800
3801  double int_result = std::floor(value);
3802  double error = value - int_result;
3803  switch (round_mode) {
3804    case FPTieAway: {
3805      // Take care of correctly handling the range ]-0.5, -0.0], which must
3806      // yield -0.0.
3807      if ((-0.5 < value) && (value < 0.0)) {
3808        int_result = -0.0;
3809
3810      } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
3811        // If the error is greater than 0.5, or is equal to 0.5 and the integer
3812        // result is positive, round up.
3813        int_result++;
3814      }
3815      break;
3816    }
3817    case FPTieEven: {
3818      // Take care of correctly handling the range [-0.5, -0.0], which must
3819      // yield -0.0.
3820      if ((-0.5 <= value) && (value < 0.0)) {
3821        int_result = -0.0;
3822
3823        // If the error is greater than 0.5, or is equal to 0.5 and the integer
3824        // result is odd, round up.
3825      } else if ((error > 0.5) ||
3826                 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
3827        int_result++;
3828      }
3829      break;
3830    }
3831    case FPZero: {
3832      // If value>0 then we take floor(value)
3833      // otherwise, ceil(value).
3834      if (value < 0) {
3835        int_result = ceil(value);
3836      }
3837      break;
3838    }
3839    case FPNegativeInfinity: {
3840      // We always use floor(value).
3841      break;
3842    }
3843    case FPPositiveInfinity: {
3844      // Take care of correctly handling the range ]-1.0, -0.0], which must
3845      // yield -0.0.
3846      if ((-1.0 < value) && (value < 0.0)) {
3847        int_result = -0.0;
3848
3849        // If the error is non-zero, round up.
3850      } else if (error > 0.0) {
3851        int_result++;
3852      }
3853      break;
3854    }
3855    default:
3856      VIXL_UNIMPLEMENTED();
3857  }
3858  return int_result;
3859}
3860
3861
3862int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
3863  value = FPRoundInt(value, rmode);
3864  if (value >= kWMaxInt) {
3865    return kWMaxInt;
3866  } else if (value < kWMinInt) {
3867    return kWMinInt;
3868  }
3869  return std::isnan(value) ? 0 : static_cast<int32_t>(value);
3870}
3871
3872
3873int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
3874  value = FPRoundInt(value, rmode);
3875  if (value >= kXMaxInt) {
3876    return kXMaxInt;
3877  } else if (value < kXMinInt) {
3878    return kXMinInt;
3879  }
3880  return std::isnan(value) ? 0 : static_cast<int64_t>(value);
3881}
3882
3883
3884uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
3885  value = FPRoundInt(value, rmode);
3886  if (value >= kWMaxUInt) {
3887    return kWMaxUInt;
3888  } else if (value < 0.0) {
3889    return 0;
3890  }
3891  return std::isnan(value) ? 0 : static_cast<uint32_t>(value);
3892}
3893
3894
3895uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
3896  value = FPRoundInt(value, rmode);
3897  if (value >= kXMaxUInt) {
3898    return kXMaxUInt;
3899  } else if (value < 0.0) {
3900    return 0;
3901  }
3902  return std::isnan(value) ? 0 : static_cast<uint64_t>(value);
3903}
3904
3905
3906#define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN)                \
3907  template <typename T>                                          \
3908  LogicVRegister Simulator::FN(VectorFormat vform,               \
3909                               LogicVRegister dst,               \
3910                               const LogicVRegister& src1,       \
3911                               const LogicVRegister& src2) {     \
3912    dst.ClearForWrite(vform);                                    \
3913    for (int i = 0; i < LaneCountFromFormat(vform); i++) {       \
3914      T op1 = src1.Float<T>(i);                                  \
3915      T op2 = src2.Float<T>(i);                                  \
3916      T result;                                                  \
3917      if (PROCNAN) {                                             \
3918        result = FPProcessNaNs(op1, op2);                        \
3919        if (!std::isnan(result)) {                               \
3920          result = OP(op1, op2);                                 \
3921        }                                                        \
3922      } else {                                                   \
3923        result = OP(op1, op2);                                   \
3924      }                                                          \
3925      dst.SetFloat(i, result);                                   \
3926    }                                                            \
3927    return dst;                                                  \
3928  }                                                              \
3929                                                                 \
3930  LogicVRegister Simulator::FN(VectorFormat vform,               \
3931                               LogicVRegister dst,               \
3932                               const LogicVRegister& src1,       \
3933                               const LogicVRegister& src2) {     \
3934    if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {          \
3935      FN<float>(vform, dst, src1, src2);                         \
3936    } else {                                                     \
3937      VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \
3938      FN<double>(vform, dst, src1, src2);                        \
3939    }                                                            \
3940    return dst;                                                  \
3941  }
3942NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
3943#undef DEFINE_NEON_FP_VECTOR_OP
3944
3945
3946LogicVRegister Simulator::fnmul(VectorFormat vform,
3947                                LogicVRegister dst,
3948                                const LogicVRegister& src1,
3949                                const LogicVRegister& src2) {
3950  SimVRegister temp;
3951  LogicVRegister product = fmul(vform, temp, src1, src2);
3952  return fneg(vform, dst, product);
3953}
3954
3955
3956template <typename T>
3957LogicVRegister Simulator::frecps(VectorFormat vform,
3958                                 LogicVRegister dst,
3959                                 const LogicVRegister& src1,
3960                                 const LogicVRegister& src2) {
3961  dst.ClearForWrite(vform);
3962  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3963    T op1 = -src1.Float<T>(i);
3964    T op2 = src2.Float<T>(i);
3965    T result = FPProcessNaNs(op1, op2);
3966    dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2));
3967  }
3968  return dst;
3969}
3970
3971
3972LogicVRegister Simulator::frecps(VectorFormat vform,
3973                                 LogicVRegister dst,
3974                                 const LogicVRegister& src1,
3975                                 const LogicVRegister& src2) {
3976  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
3977    frecps<float>(vform, dst, src1, src2);
3978  } else {
3979    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
3980    frecps<double>(vform, dst, src1, src2);
3981  }
3982  return dst;
3983}
3984
3985
3986template <typename T>
3987LogicVRegister Simulator::frsqrts(VectorFormat vform,
3988                                  LogicVRegister dst,
3989                                  const LogicVRegister& src1,
3990                                  const LogicVRegister& src2) {
3991  dst.ClearForWrite(vform);
3992  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3993    T op1 = -src1.Float<T>(i);
3994    T op2 = src2.Float<T>(i);
3995    T result = FPProcessNaNs(op1, op2);
3996    dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2));
3997  }
3998  return dst;
3999}
4000
4001
4002LogicVRegister Simulator::frsqrts(VectorFormat vform,
4003                                  LogicVRegister dst,
4004                                  const LogicVRegister& src1,
4005                                  const LogicVRegister& src2) {
4006  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4007    frsqrts<float>(vform, dst, src1, src2);
4008  } else {
4009    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4010    frsqrts<double>(vform, dst, src1, src2);
4011  }
4012  return dst;
4013}
4014
4015
4016template <typename T>
4017LogicVRegister Simulator::fcmp(VectorFormat vform,
4018                               LogicVRegister dst,
4019                               const LogicVRegister& src1,
4020                               const LogicVRegister& src2,
4021                               Condition cond) {
4022  dst.ClearForWrite(vform);
4023  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4024    bool result = false;
4025    T op1 = src1.Float<T>(i);
4026    T op2 = src2.Float<T>(i);
4027    T nan_result = FPProcessNaNs(op1, op2);
4028    if (!std::isnan(nan_result)) {
4029      switch (cond) {
4030        case eq:
4031          result = (op1 == op2);
4032          break;
4033        case ge:
4034          result = (op1 >= op2);
4035          break;
4036        case gt:
4037          result = (op1 > op2);
4038          break;
4039        case le:
4040          result = (op1 <= op2);
4041          break;
4042        case lt:
4043          result = (op1 < op2);
4044          break;
4045        default:
4046          VIXL_UNREACHABLE();
4047          break;
4048      }
4049    }
4050    dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
4051  }
4052  return dst;
4053}
4054
4055
4056LogicVRegister Simulator::fcmp(VectorFormat vform,
4057                               LogicVRegister dst,
4058                               const LogicVRegister& src1,
4059                               const LogicVRegister& src2,
4060                               Condition cond) {
4061  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4062    fcmp<float>(vform, dst, src1, src2, cond);
4063  } else {
4064    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4065    fcmp<double>(vform, dst, src1, src2, cond);
4066  }
4067  return dst;
4068}
4069
4070
4071LogicVRegister Simulator::fcmp_zero(VectorFormat vform,
4072                                    LogicVRegister dst,
4073                                    const LogicVRegister& src,
4074                                    Condition cond) {
4075  SimVRegister temp;
4076  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4077    LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0));
4078    fcmp<float>(vform, dst, src, zero_reg, cond);
4079  } else {
4080    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4081    LogicVRegister zero_reg = dup_immediate(vform, temp, DoubleToRawbits(0.0));
4082    fcmp<double>(vform, dst, src, zero_reg, cond);
4083  }
4084  return dst;
4085}
4086
4087
4088LogicVRegister Simulator::fabscmp(VectorFormat vform,
4089                                  LogicVRegister dst,
4090                                  const LogicVRegister& src1,
4091                                  const LogicVRegister& src2,
4092                                  Condition cond) {
4093  SimVRegister temp1, temp2;
4094  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4095    LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
4096    LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
4097    fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
4098  } else {
4099    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4100    LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
4101    LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
4102    fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
4103  }
4104  return dst;
4105}
4106
4107
4108template <typename T>
4109LogicVRegister Simulator::fmla(VectorFormat vform,
4110                               LogicVRegister dst,
4111                               const LogicVRegister& src1,
4112                               const LogicVRegister& src2) {
4113  dst.ClearForWrite(vform);
4114  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4115    T op1 = src1.Float<T>(i);
4116    T op2 = src2.Float<T>(i);
4117    T acc = dst.Float<T>(i);
4118    T result = FPMulAdd(acc, op1, op2);
4119    dst.SetFloat(i, result);
4120  }
4121  return dst;
4122}
4123
4124
4125LogicVRegister Simulator::fmla(VectorFormat vform,
4126                               LogicVRegister dst,
4127                               const LogicVRegister& src1,
4128                               const LogicVRegister& src2) {
4129  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4130    fmla<float>(vform, dst, src1, src2);
4131  } else {
4132    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4133    fmla<double>(vform, dst, src1, src2);
4134  }
4135  return dst;
4136}
4137
4138
4139template <typename T>
4140LogicVRegister Simulator::fmls(VectorFormat vform,
4141                               LogicVRegister dst,
4142                               const LogicVRegister& src1,
4143                               const LogicVRegister& src2) {
4144  dst.ClearForWrite(vform);
4145  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4146    T op1 = -src1.Float<T>(i);
4147    T op2 = src2.Float<T>(i);
4148    T acc = dst.Float<T>(i);
4149    T result = FPMulAdd(acc, op1, op2);
4150    dst.SetFloat(i, result);
4151  }
4152  return dst;
4153}
4154
4155
4156LogicVRegister Simulator::fmls(VectorFormat vform,
4157                               LogicVRegister dst,
4158                               const LogicVRegister& src1,
4159                               const LogicVRegister& src2) {
4160  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4161    fmls<float>(vform, dst, src1, src2);
4162  } else {
4163    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4164    fmls<double>(vform, dst, src1, src2);
4165  }
4166  return dst;
4167}
4168
4169
4170template <typename T>
4171LogicVRegister Simulator::fneg(VectorFormat vform,
4172                               LogicVRegister dst,
4173                               const LogicVRegister& src) {
4174  dst.ClearForWrite(vform);
4175  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4176    T op = src.Float<T>(i);
4177    op = -op;
4178    dst.SetFloat(i, op);
4179  }
4180  return dst;
4181}
4182
4183
4184LogicVRegister Simulator::fneg(VectorFormat vform,
4185                               LogicVRegister dst,
4186                               const LogicVRegister& src) {
4187  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4188    fneg<float>(vform, dst, src);
4189  } else {
4190    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4191    fneg<double>(vform, dst, src);
4192  }
4193  return dst;
4194}
4195
4196
4197template <typename T>
4198LogicVRegister Simulator::fabs_(VectorFormat vform,
4199                                LogicVRegister dst,
4200                                const LogicVRegister& src) {
4201  dst.ClearForWrite(vform);
4202  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4203    T op = src.Float<T>(i);
4204    if (copysign(1.0, op) < 0.0) {
4205      op = -op;
4206    }
4207    dst.SetFloat(i, op);
4208  }
4209  return dst;
4210}
4211
4212
4213LogicVRegister Simulator::fabs_(VectorFormat vform,
4214                                LogicVRegister dst,
4215                                const LogicVRegister& src) {
4216  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4217    fabs_<float>(vform, dst, src);
4218  } else {
4219    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4220    fabs_<double>(vform, dst, src);
4221  }
4222  return dst;
4223}
4224
4225
4226LogicVRegister Simulator::fabd(VectorFormat vform,
4227                               LogicVRegister dst,
4228                               const LogicVRegister& src1,
4229                               const LogicVRegister& src2) {
4230  SimVRegister temp;
4231  fsub(vform, temp, src1, src2);
4232  fabs_(vform, dst, temp);
4233  return dst;
4234}
4235
4236
4237LogicVRegister Simulator::fsqrt(VectorFormat vform,
4238                                LogicVRegister dst,
4239                                const LogicVRegister& src) {
4240  dst.ClearForWrite(vform);
4241  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4242    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4243      float result = FPSqrt(src.Float<float>(i));
4244      dst.SetFloat(i, result);
4245    }
4246  } else {
4247    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4248    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4249      double result = FPSqrt(src.Float<double>(i));
4250      dst.SetFloat(i, result);
4251    }
4252  }
4253  return dst;
4254}
4255
4256
4257#define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP)                           \
4258  LogicVRegister Simulator::FNP(VectorFormat vform,                   \
4259                                LogicVRegister dst,                   \
4260                                const LogicVRegister& src1,           \
4261                                const LogicVRegister& src2) {         \
4262    SimVRegister temp1, temp2;                                        \
4263    uzp1(vform, temp1, src1, src2);                                   \
4264    uzp2(vform, temp2, src1, src2);                                   \
4265    FN(vform, dst, temp1, temp2);                                     \
4266    return dst;                                                       \
4267  }                                                                   \
4268                                                                      \
4269  LogicVRegister Simulator::FNP(VectorFormat vform,                   \
4270                                LogicVRegister dst,                   \
4271                                const LogicVRegister& src) {          \
4272    if (vform == kFormatS) {                                          \
4273      float result = OP(src.Float<float>(0), src.Float<float>(1));    \
4274      dst.SetFloat(0, result);                                        \
4275    } else {                                                          \
4276      VIXL_ASSERT(vform == kFormatD);                                 \
4277      double result = OP(src.Float<double>(0), src.Float<double>(1)); \
4278      dst.SetFloat(0, result);                                        \
4279    }                                                                 \
4280    dst.ClearForWrite(vform);                                         \
4281    return dst;                                                       \
4282  }
4283NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
4284#undef DEFINE_NEON_FP_PAIR_OP
4285
4286
4287LogicVRegister Simulator::fminmaxv(VectorFormat vform,
4288                                   LogicVRegister dst,
4289                                   const LogicVRegister& src,
4290                                   FPMinMaxOp Op) {
4291  VIXL_ASSERT(vform == kFormat4S);
4292  USE(vform);
4293  float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1));
4294  float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3));
4295  float result = (this->*Op)(result1, result2);
4296  dst.ClearForWrite(kFormatS);
4297  dst.SetFloat<float>(0, result);
4298  return dst;
4299}
4300
4301
4302LogicVRegister Simulator::fmaxv(VectorFormat vform,
4303                                LogicVRegister dst,
4304                                const LogicVRegister& src) {
4305  return fminmaxv(vform, dst, src, &Simulator::FPMax);
4306}
4307
4308
4309LogicVRegister Simulator::fminv(VectorFormat vform,
4310                                LogicVRegister dst,
4311                                const LogicVRegister& src) {
4312  return fminmaxv(vform, dst, src, &Simulator::FPMin);
4313}
4314
4315
4316LogicVRegister Simulator::fmaxnmv(VectorFormat vform,
4317                                  LogicVRegister dst,
4318                                  const LogicVRegister& src) {
4319  return fminmaxv(vform, dst, src, &Simulator::FPMaxNM);
4320}
4321
4322
4323LogicVRegister Simulator::fminnmv(VectorFormat vform,
4324                                  LogicVRegister dst,
4325                                  const LogicVRegister& src) {
4326  return fminmaxv(vform, dst, src, &Simulator::FPMinNM);
4327}
4328
4329
4330LogicVRegister Simulator::fmul(VectorFormat vform,
4331                               LogicVRegister dst,
4332                               const LogicVRegister& src1,
4333                               const LogicVRegister& src2,
4334                               int index) {
4335  dst.ClearForWrite(vform);
4336  SimVRegister temp;
4337  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4338    LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4339    fmul<float>(vform, dst, src1, index_reg);
4340
4341  } else {
4342    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4343    LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4344    fmul<double>(vform, dst, src1, index_reg);
4345  }
4346  return dst;
4347}
4348
4349
4350LogicVRegister Simulator::fmla(VectorFormat vform,
4351                               LogicVRegister dst,
4352                               const LogicVRegister& src1,
4353                               const LogicVRegister& src2,
4354                               int index) {
4355  dst.ClearForWrite(vform);
4356  SimVRegister temp;
4357  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4358    LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4359    fmla<float>(vform, dst, src1, index_reg);
4360
4361  } else {
4362    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4363    LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4364    fmla<double>(vform, dst, src1, index_reg);
4365  }
4366  return dst;
4367}
4368
4369
4370LogicVRegister Simulator::fmls(VectorFormat vform,
4371                               LogicVRegister dst,
4372                               const LogicVRegister& src1,
4373                               const LogicVRegister& src2,
4374                               int index) {
4375  dst.ClearForWrite(vform);
4376  SimVRegister temp;
4377  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4378    LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4379    fmls<float>(vform, dst, src1, index_reg);
4380
4381  } else {
4382    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4383    LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4384    fmls<double>(vform, dst, src1, index_reg);
4385  }
4386  return dst;
4387}
4388
4389
4390LogicVRegister Simulator::fmulx(VectorFormat vform,
4391                                LogicVRegister dst,
4392                                const LogicVRegister& src1,
4393                                const LogicVRegister& src2,
4394                                int index) {
4395  dst.ClearForWrite(vform);
4396  SimVRegister temp;
4397  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4398    LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4399    fmulx<float>(vform, dst, src1, index_reg);
4400
4401  } else {
4402    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4403    LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4404    fmulx<double>(vform, dst, src1, index_reg);
4405  }
4406  return dst;
4407}
4408
4409
4410LogicVRegister Simulator::frint(VectorFormat vform,
4411                                LogicVRegister dst,
4412                                const LogicVRegister& src,
4413                                FPRounding rounding_mode,
4414                                bool inexact_exception) {
4415  dst.ClearForWrite(vform);
4416  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4417    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4418      float input = src.Float<float>(i);
4419      float rounded = FPRoundInt(input, rounding_mode);
4420      if (inexact_exception && !std::isnan(input) && (input != rounded)) {
4421        FPProcessException();
4422      }
4423      dst.SetFloat<float>(i, rounded);
4424    }
4425  } else {
4426    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4427    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4428      double input = src.Float<double>(i);
4429      double rounded = FPRoundInt(input, rounding_mode);
4430      if (inexact_exception && !std::isnan(input) && (input != rounded)) {
4431        FPProcessException();
4432      }
4433      dst.SetFloat<double>(i, rounded);
4434    }
4435  }
4436  return dst;
4437}
4438
4439
4440LogicVRegister Simulator::fcvts(VectorFormat vform,
4441                                LogicVRegister dst,
4442                                const LogicVRegister& src,
4443                                FPRounding rounding_mode,
4444                                int fbits) {
4445  dst.ClearForWrite(vform);
4446  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4447    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4448      float op = src.Float<float>(i) * std::pow(2.0f, fbits);
4449      dst.SetInt(vform, i, FPToInt32(op, rounding_mode));
4450    }
4451  } else {
4452    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4453    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4454      double op = src.Float<double>(i) * std::pow(2.0, fbits);
4455      dst.SetInt(vform, i, FPToInt64(op, rounding_mode));
4456    }
4457  }
4458  return dst;
4459}
4460
4461
4462LogicVRegister Simulator::fcvtu(VectorFormat vform,
4463                                LogicVRegister dst,
4464                                const LogicVRegister& src,
4465                                FPRounding rounding_mode,
4466                                int fbits) {
4467  dst.ClearForWrite(vform);
4468  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4469    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4470      float op = src.Float<float>(i) * std::pow(2.0f, fbits);
4471      dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));
4472    }
4473  } else {
4474    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4475    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4476      double op = src.Float<double>(i) * std::pow(2.0, fbits);
4477      dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));
4478    }
4479  }
4480  return dst;
4481}
4482
4483
4484LogicVRegister Simulator::fcvtl(VectorFormat vform,
4485                                LogicVRegister dst,
4486                                const LogicVRegister& src) {
4487  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4488    for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
4489      dst.SetFloat(i, FPToFloat(src.Float<float16>(i)));
4490    }
4491  } else {
4492    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4493    for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
4494      dst.SetFloat(i, FPToDouble(src.Float<float>(i)));
4495    }
4496  }
4497  return dst;
4498}
4499
4500
4501LogicVRegister Simulator::fcvtl2(VectorFormat vform,
4502                                 LogicVRegister dst,
4503                                 const LogicVRegister& src) {
4504  int lane_count = LaneCountFromFormat(vform);
4505  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4506    for (int i = 0; i < lane_count; i++) {
4507      dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count)));
4508    }
4509  } else {
4510    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4511    for (int i = 0; i < lane_count; i++) {
4512      dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count)));
4513    }
4514  }
4515  return dst;
4516}
4517
4518
4519LogicVRegister Simulator::fcvtn(VectorFormat vform,
4520                                LogicVRegister dst,
4521                                const LogicVRegister& src) {
4522  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4523    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4524      dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven));
4525    }
4526  } else {
4527    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4528    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4529      dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven));
4530    }
4531  }
4532  return dst;
4533}
4534
4535
4536LogicVRegister Simulator::fcvtn2(VectorFormat vform,
4537                                 LogicVRegister dst,
4538                                 const LogicVRegister& src) {
4539  int lane_count = LaneCountFromFormat(vform) / 2;
4540  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4541    for (int i = lane_count - 1; i >= 0; i--) {
4542      dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven));
4543    }
4544  } else {
4545    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4546    for (int i = lane_count - 1; i >= 0; i--) {
4547      dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven));
4548    }
4549  }
4550  return dst;
4551}
4552
4553
4554LogicVRegister Simulator::fcvtxn(VectorFormat vform,
4555                                 LogicVRegister dst,
4556                                 const LogicVRegister& src) {
4557  dst.ClearForWrite(vform);
4558  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4559  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4560    dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd));
4561  }
4562  return dst;
4563}
4564
4565
4566LogicVRegister Simulator::fcvtxn2(VectorFormat vform,
4567                                  LogicVRegister dst,
4568                                  const LogicVRegister& src) {
4569  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4570  int lane_count = LaneCountFromFormat(vform) / 2;
4571  for (int i = lane_count - 1; i >= 0; i--) {
4572    dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd));
4573  }
4574  return dst;
4575}
4576
4577
4578// Based on reference C function recip_sqrt_estimate from ARM ARM.
4579double Simulator::recip_sqrt_estimate(double a) {
4580  int q0, q1, s;
4581  double r;
4582  if (a < 0.5) {
4583    q0 = static_cast<int>(a * 512.0);
4584    r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
4585  } else {
4586    q1 = static_cast<int>(a * 256.0);
4587    r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
4588  }
4589  s = static_cast<int>(256.0 * r + 0.5);
4590  return static_cast<double>(s) / 256.0;
4591}
4592
4593
4594static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
4595  return ExtractUnsignedBitfield64(start_bit, end_bit, val);
4596}
4597
4598
4599template <typename T>
4600T Simulator::FPRecipSqrtEstimate(T op) {
4601  if (std::isnan(op)) {
4602    return FPProcessNaN(op);
4603  } else if (op == 0.0) {
4604    if (copysign(1.0, op) < 0.0) {
4605      return kFP64NegativeInfinity;
4606    } else {
4607      return kFP64PositiveInfinity;
4608    }
4609  } else if (copysign(1.0, op) < 0.0) {
4610    FPProcessException();
4611    return FPDefaultNaN<T>();
4612  } else if (std::isinf(op)) {
4613    return 0.0;
4614  } else {
4615    uint64_t fraction;
4616    int exp, result_exp;
4617
4618    if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4619      exp = FloatExp(op);
4620      fraction = FloatMantissa(op);
4621      fraction <<= 29;
4622    } else {
4623      exp = DoubleExp(op);
4624      fraction = DoubleMantissa(op);
4625    }
4626
4627    if (exp == 0) {
4628      while (Bits(fraction, 51, 51) == 0) {
4629        fraction = Bits(fraction, 50, 0) << 1;
4630        exp -= 1;
4631      }
4632      fraction = Bits(fraction, 50, 0) << 1;
4633    }
4634
4635    double scaled;
4636    if (Bits(exp, 0, 0) == 0) {
4637      scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
4638    } else {
4639      scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44);
4640    }
4641
4642    if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4643      result_exp = (380 - exp) / 2;
4644    } else {
4645      result_exp = (3068 - exp) / 2;
4646    }
4647
4648    uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled));
4649
4650    if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4651      uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
4652      uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
4653      return FloatPack(0, exp_bits, est_bits);
4654    } else {
4655      return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
4656    }
4657  }
4658}
4659
4660
4661LogicVRegister Simulator::frsqrte(VectorFormat vform,
4662                                  LogicVRegister dst,
4663                                  const LogicVRegister& src) {
4664  dst.ClearForWrite(vform);
4665  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4666    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4667      float input = src.Float<float>(i);
4668      dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));
4669    }
4670  } else {
4671    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4672    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4673      double input = src.Float<double>(i);
4674      dst.SetFloat(i, FPRecipSqrtEstimate<double>(input));
4675    }
4676  }
4677  return dst;
4678}
4679
4680template <typename T>
4681T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
4682  uint32_t sign;
4683
4684  if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4685    sign = FloatSign(op);
4686  } else {
4687    sign = DoubleSign(op);
4688  }
4689
4690  if (std::isnan(op)) {
4691    return FPProcessNaN(op);
4692  } else if (std::isinf(op)) {
4693    return (sign == 1) ? -0.0 : 0.0;
4694  } else if (op == 0.0) {
4695    FPProcessException();  // FPExc_DivideByZero exception.
4696    return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
4697  } else if (((sizeof(T) == sizeof(float)) &&  // NOLINT(runtime/sizeof)
4698              (std::fabs(op) < std::pow(2.0, -128.0))) ||
4699             ((sizeof(T) == sizeof(double)) &&  // NOLINT(runtime/sizeof)
4700              (std::fabs(op) < std::pow(2.0, -1024.0)))) {
4701    bool overflow_to_inf = false;
4702    switch (rounding) {
4703      case FPTieEven:
4704        overflow_to_inf = true;
4705        break;
4706      case FPPositiveInfinity:
4707        overflow_to_inf = (sign == 0);
4708        break;
4709      case FPNegativeInfinity:
4710        overflow_to_inf = (sign == 1);
4711        break;
4712      case FPZero:
4713        overflow_to_inf = false;
4714        break;
4715      default:
4716        break;
4717    }
4718    FPProcessException();  // FPExc_Overflow and FPExc_Inexact.
4719    if (overflow_to_inf) {
4720      return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
4721    } else {
4722      // Return FPMaxNormal(sign).
4723      if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4724        return FloatPack(sign, 0xfe, 0x07fffff);
4725      } else {
4726        return DoublePack(sign, 0x7fe, 0x0fffffffffffffl);
4727      }
4728    }
4729  } else {
4730    uint64_t fraction;
4731    int exp, result_exp;
4732    uint32_t sign;
4733
4734    if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4735      sign = FloatSign(op);
4736      exp = FloatExp(op);
4737      fraction = FloatMantissa(op);
4738      fraction <<= 29;
4739    } else {
4740      sign = DoubleSign(op);
4741      exp = DoubleExp(op);
4742      fraction = DoubleMantissa(op);
4743    }
4744
4745    if (exp == 0) {
4746      if (Bits(fraction, 51, 51) == 0) {
4747        exp -= 1;
4748        fraction = Bits(fraction, 49, 0) << 2;
4749      } else {
4750        fraction = Bits(fraction, 50, 0) << 1;
4751      }
4752    }
4753
4754    double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
4755
4756    if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4757      result_exp = (253 - exp);        // In range 253-254 = -1 to 253+1 = 254.
4758    } else {
4759      result_exp = (2045 - exp);  // In range 2045-2046 = -1 to 2045+1 = 2046.
4760    }
4761
4762    double estimate = recip_estimate(scaled);
4763
4764    fraction = DoubleMantissa(estimate);
4765    if (result_exp == 0) {
4766      fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
4767    } else if (result_exp == -1) {
4768      fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
4769      result_exp = 0;
4770    }
4771    if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4772      uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
4773      uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
4774      return FloatPack(sign, exp_bits, frac_bits);
4775    } else {
4776      return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
4777    }
4778  }
4779}
4780
4781
4782LogicVRegister Simulator::frecpe(VectorFormat vform,
4783                                 LogicVRegister dst,
4784                                 const LogicVRegister& src,
4785                                 FPRounding round) {
4786  dst.ClearForWrite(vform);
4787  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4788    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4789      float input = src.Float<float>(i);
4790      dst.SetFloat(i, FPRecipEstimate<float>(input, round));
4791    }
4792  } else {
4793    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4794    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4795      double input = src.Float<double>(i);
4796      dst.SetFloat(i, FPRecipEstimate<double>(input, round));
4797    }
4798  }
4799  return dst;
4800}
4801
4802
4803LogicVRegister Simulator::ursqrte(VectorFormat vform,
4804                                  LogicVRegister dst,
4805                                  const LogicVRegister& src) {
4806  dst.ClearForWrite(vform);
4807  uint64_t operand;
4808  uint32_t result;
4809  double dp_operand, dp_result;
4810  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4811    operand = src.Uint(vform, i);
4812    if (operand <= 0x3FFFFFFF) {
4813      result = 0xFFFFFFFF;
4814    } else {
4815      dp_operand = operand * std::pow(2.0, -32);
4816      dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
4817      result = static_cast<uint32_t>(dp_result);
4818    }
4819    dst.SetUint(vform, i, result);
4820  }
4821  return dst;
4822}
4823
4824
4825// Based on reference C function recip_estimate from ARM ARM.
4826double Simulator::recip_estimate(double a) {
4827  int q, s;
4828  double r;
4829  q = static_cast<int>(a * 512.0);
4830  r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
4831  s = static_cast<int>(256.0 * r + 0.5);
4832  return static_cast<double>(s) / 256.0;
4833}
4834
4835
4836LogicVRegister Simulator::urecpe(VectorFormat vform,
4837                                 LogicVRegister dst,
4838                                 const LogicVRegister& src) {
4839  dst.ClearForWrite(vform);
4840  uint64_t operand;
4841  uint32_t result;
4842  double dp_operand, dp_result;
4843  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4844    operand = src.Uint(vform, i);
4845    if (operand <= 0x7FFFFFFF) {
4846      result = 0xFFFFFFFF;
4847    } else {
4848      dp_operand = operand * std::pow(2.0, -32);
4849      dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
4850      result = static_cast<uint32_t>(dp_result);
4851    }
4852    dst.SetUint(vform, i, result);
4853  }
4854  return dst;
4855}
4856
4857template <typename T>
4858LogicVRegister Simulator::frecpx(VectorFormat vform,
4859                                 LogicVRegister dst,
4860                                 const LogicVRegister& src) {
4861  dst.ClearForWrite(vform);
4862  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4863    T op = src.Float<T>(i);
4864    T result;
4865    if (std::isnan(op)) {
4866      result = FPProcessNaN(op);
4867    } else {
4868      int exp;
4869      uint32_t sign;
4870      if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4871        sign = FloatSign(op);
4872        exp = FloatExp(op);
4873        exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
4874        result = FloatPack(sign, exp, 0);
4875      } else {
4876        sign = DoubleSign(op);
4877        exp = DoubleExp(op);
4878        exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
4879        result = DoublePack(sign, exp, 0);
4880      }
4881    }
4882    dst.SetFloat(i, result);
4883  }
4884  return dst;
4885}
4886
4887
4888LogicVRegister Simulator::frecpx(VectorFormat vform,
4889                                 LogicVRegister dst,
4890                                 const LogicVRegister& src) {
4891  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4892    frecpx<float>(vform, dst, src);
4893  } else {
4894    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4895    frecpx<double>(vform, dst, src);
4896  }
4897  return dst;
4898}
4899
4900LogicVRegister Simulator::scvtf(VectorFormat vform,
4901                                LogicVRegister dst,
4902                                const LogicVRegister& src,
4903                                int fbits,
4904                                FPRounding round) {
4905  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4906    if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4907      float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);
4908      dst.SetFloat<float>(i, result);
4909    } else {
4910      VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4911      double result = FixedToDouble(src.Int(kFormatD, i), fbits, round);
4912      dst.SetFloat<double>(i, result);
4913    }
4914  }
4915  return dst;
4916}
4917
4918
4919LogicVRegister Simulator::ucvtf(VectorFormat vform,
4920                                LogicVRegister dst,
4921                                const LogicVRegister& src,
4922                                int fbits,
4923                                FPRounding round) {
4924  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4925    if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4926      float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);
4927      dst.SetFloat<float>(i, result);
4928    } else {
4929      VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4930      double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round);
4931      dst.SetFloat<double>(i, result);
4932    }
4933  }
4934  return dst;
4935}
4936
4937
4938}  // namespace aarch64
4939}  // namespace vixl
4940
4941#endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
4942