logic-aarch64.cc revision 1e1277e629b68c96a1d7b953c2c6f90c7a44cdb7
1// Copyright 2015, VIXL authors
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7//   * Redistributions of source code must retain the above copyright notice,
8//     this list of conditions and the following disclaimer.
9//   * Redistributions in binary form must reproduce the above copyright notice,
10//     this list of conditions and the following disclaimer in the documentation
11//     and/or other materials provided with the distribution.
12//   * Neither the name of ARM Limited nor the names of its contributors may be
13//     used to endorse or promote products derived from this software without
14//     specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
28
29#include <cmath>
30
31#include "simulator-aarch64.h"
32
33namespace vixl {
34namespace aarch64 {
35
36template <>
37double Simulator::FPDefaultNaN<double>() {
38  return kFP64DefaultNaN;
39}
40
41
42template <>
43float Simulator::FPDefaultNaN<float>() {
44  return kFP32DefaultNaN;
45}
46
47// See FPRound for a description of this function.
48static inline double FPRoundToDouble(int64_t sign,
49                                     int64_t exponent,
50                                     uint64_t mantissa,
51                                     FPRounding round_mode) {
52  int64_t bits =
53      FPRound<int64_t, kDoubleExponentBits, kDoubleMantissaBits>(sign,
54                                                                 exponent,
55                                                                 mantissa,
56                                                                 round_mode);
57  return RawbitsToDouble(bits);
58}
59
60
61// See FPRound for a description of this function.
62static inline float FPRoundToFloat(int64_t sign,
63                                   int64_t exponent,
64                                   uint64_t mantissa,
65                                   FPRounding round_mode) {
66  int32_t bits =
67      FPRound<int32_t, kFloatExponentBits, kFloatMantissaBits>(sign,
68                                                               exponent,
69                                                               mantissa,
70                                                               round_mode);
71  return RawbitsToFloat(bits);
72}
73
74
75// See FPRound for a description of this function.
76static inline float16 FPRoundToFloat16(int64_t sign,
77                                       int64_t exponent,
78                                       uint64_t mantissa,
79                                       FPRounding round_mode) {
80  return FPRound<float16,
81                 kFloat16ExponentBits,
82                 kFloat16MantissaBits>(sign, exponent, mantissa, round_mode);
83}
84
85
86double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
87  if (src >= 0) {
88    return UFixedToDouble(src, fbits, round);
89  } else {
90    // This works for all negative values, including INT64_MIN.
91    return -UFixedToDouble(-src, fbits, round);
92  }
93}
94
95
96double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
97  // An input of 0 is a special case because the result is effectively
98  // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
99  if (src == 0) {
100    return 0.0;
101  }
102
103  // Calculate the exponent. The highest significant bit will have the value
104  // 2^exponent.
105  const int highest_significant_bit = 63 - CountLeadingZeros(src);
106  const int64_t exponent = highest_significant_bit - fbits;
107
108  return FPRoundToDouble(0, exponent, src, round);
109}
110
111
112float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
113  if (src >= 0) {
114    return UFixedToFloat(src, fbits, round);
115  } else {
116    // This works for all negative values, including INT64_MIN.
117    return -UFixedToFloat(-src, fbits, round);
118  }
119}
120
121
122float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
123  // An input of 0 is a special case because the result is effectively
124  // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
125  if (src == 0) {
126    return 0.0f;
127  }
128
129  // Calculate the exponent. The highest significant bit will have the value
130  // 2^exponent.
131  const int highest_significant_bit = 63 - CountLeadingZeros(src);
132  const int32_t exponent = highest_significant_bit - fbits;
133
134  return FPRoundToFloat(0, exponent, src, round);
135}
136
137
138double Simulator::FPToDouble(float value) {
139  switch (std::fpclassify(value)) {
140    case FP_NAN: {
141      if (IsSignallingNaN(value)) {
142        FPProcessException();
143      }
144      if (ReadDN()) return kFP64DefaultNaN;
145
146      // Convert NaNs as the processor would:
147      //  - The sign is propagated.
148      //  - The payload (mantissa) is transferred entirely, except that the top
149      //    bit is forced to '1', making the result a quiet NaN. The unused
150      //    (low-order) payload bits are set to 0.
151      uint32_t raw = FloatToRawbits(value);
152
153      uint64_t sign = raw >> 31;
154      uint64_t exponent = (1 << 11) - 1;
155      uint64_t payload = ExtractUnsignedBitfield64(21, 0, raw);
156      payload <<= (52 - 23);           // The unused low-order bits should be 0.
157      payload |= (UINT64_C(1) << 51);  // Force a quiet NaN.
158
159      return RawbitsToDouble((sign << 63) | (exponent << 52) | payload);
160    }
161
162    case FP_ZERO:
163    case FP_NORMAL:
164    case FP_SUBNORMAL:
165    case FP_INFINITE: {
166      // All other inputs are preserved in a standard cast, because every value
167      // representable using an IEEE-754 float is also representable using an
168      // IEEE-754 double.
169      return static_cast<double>(value);
170    }
171  }
172
173  VIXL_UNREACHABLE();
174  return static_cast<double>(value);
175}
176
177
178float Simulator::FPToFloat(float16 value) {
179  uint32_t sign = value >> 15;
180  uint32_t exponent =
181      ExtractUnsignedBitfield32(kFloat16MantissaBits + kFloat16ExponentBits - 1,
182                                kFloat16MantissaBits,
183                                value);
184  uint32_t mantissa =
185      ExtractUnsignedBitfield32(kFloat16MantissaBits - 1, 0, value);
186
187  switch (Float16Classify(value)) {
188    case FP_ZERO:
189      return (sign == 0) ? 0.0f : -0.0f;
190
191    case FP_INFINITE:
192      return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
193
194    case FP_SUBNORMAL: {
195      // Calculate shift required to put mantissa into the most-significant bits
196      // of the destination mantissa.
197      int shift = CountLeadingZeros(mantissa << (32 - 10));
198
199      // Shift mantissa and discard implicit '1'.
200      mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
201      mantissa &= (1 << kFloatMantissaBits) - 1;
202
203      // Adjust the exponent for the shift applied, and rebias.
204      exponent = exponent - shift + (-15 + 127);
205      break;
206    }
207
208    case FP_NAN:
209      if (IsSignallingNaN(value)) {
210        FPProcessException();
211      }
212      if (ReadDN()) return kFP32DefaultNaN;
213
214      // Convert NaNs as the processor would:
215      //  - The sign is propagated.
216      //  - The payload (mantissa) is transferred entirely, except that the top
217      //    bit is forced to '1', making the result a quiet NaN. The unused
218      //    (low-order) payload bits are set to 0.
219      exponent = (1 << kFloatExponentBits) - 1;
220
221      // Increase bits in mantissa, making low-order bits 0.
222      mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
223      mantissa |= 1 << 22;  // Force a quiet NaN.
224      break;
225
226    case FP_NORMAL:
227      // Increase bits in mantissa, making low-order bits 0.
228      mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
229
230      // Change exponent bias.
231      exponent += (-15 + 127);
232      break;
233
234    default:
235      VIXL_UNREACHABLE();
236  }
237  return RawbitsToFloat((sign << 31) | (exponent << kFloatMantissaBits) |
238                        mantissa);
239}
240
241
242float16 Simulator::FPToFloat16(float value, FPRounding round_mode) {
243  // Only the FPTieEven rounding mode is implemented.
244  VIXL_ASSERT(round_mode == FPTieEven);
245  USE(round_mode);
246
247  uint32_t raw = FloatToRawbits(value);
248  int32_t sign = raw >> 31;
249  int32_t exponent = ExtractUnsignedBitfield32(30, 23, raw) - 127;
250  uint32_t mantissa = ExtractUnsignedBitfield32(22, 0, raw);
251
252  switch (std::fpclassify(value)) {
253    case FP_NAN: {
254      if (IsSignallingNaN(value)) {
255        FPProcessException();
256      }
257      if (ReadDN()) return kFP16DefaultNaN;
258
259      // Convert NaNs as the processor would:
260      //  - The sign is propagated.
261      //  - The payload (mantissa) is transferred as much as possible, except
262      //    that the top bit is forced to '1', making the result a quiet NaN.
263      float16 result =
264          (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
265      result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
266      result |= (1 << 9);  // Force a quiet NaN;
267      return result;
268    }
269
270    case FP_ZERO:
271      return (sign == 0) ? 0 : 0x8000;
272
273    case FP_INFINITE:
274      return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
275
276    case FP_NORMAL:
277    case FP_SUBNORMAL: {
278      // Convert float-to-half as the processor would, assuming that FPCR.FZ
279      // (flush-to-zero) is not set.
280
281      // Add the implicit '1' bit to the mantissa.
282      mantissa += (1 << 23);
283      return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
284    }
285  }
286
287  VIXL_UNREACHABLE();
288  return 0;
289}
290
291
292float16 Simulator::FPToFloat16(double value, FPRounding round_mode) {
293  // Only the FPTieEven rounding mode is implemented.
294  VIXL_ASSERT(round_mode == FPTieEven);
295  USE(round_mode);
296
297  uint64_t raw = DoubleToRawbits(value);
298  int32_t sign = raw >> 63;
299  int64_t exponent = ExtractUnsignedBitfield64(62, 52, raw) - 1023;
300  uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw);
301
302  switch (std::fpclassify(value)) {
303    case FP_NAN: {
304      if (IsSignallingNaN(value)) {
305        FPProcessException();
306      }
307      if (ReadDN()) return kFP16DefaultNaN;
308
309      // Convert NaNs as the processor would:
310      //  - The sign is propagated.
311      //  - The payload (mantissa) is transferred as much as possible, except
312      //    that the top bit is forced to '1', making the result a quiet NaN.
313      float16 result =
314          (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
315      result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
316      result |= (1 << 9);  // Force a quiet NaN;
317      return result;
318    }
319
320    case FP_ZERO:
321      return (sign == 0) ? 0 : 0x8000;
322
323    case FP_INFINITE:
324      return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
325
326    case FP_NORMAL:
327    case FP_SUBNORMAL: {
328      // Convert double-to-half as the processor would, assuming that FPCR.FZ
329      // (flush-to-zero) is not set.
330
331      // Add the implicit '1' bit to the mantissa.
332      mantissa += (UINT64_C(1) << 52);
333      return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
334    }
335  }
336
337  VIXL_UNREACHABLE();
338  return 0;
339}
340
341
342float Simulator::FPToFloat(double value, FPRounding round_mode) {
343  // Only the FPTieEven rounding mode is implemented.
344  VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
345  USE(round_mode);
346
347  switch (std::fpclassify(value)) {
348    case FP_NAN: {
349      if (IsSignallingNaN(value)) {
350        FPProcessException();
351      }
352      if (ReadDN()) return kFP32DefaultNaN;
353
354      // Convert NaNs as the processor would:
355      //  - The sign is propagated.
356      //  - The payload (mantissa) is transferred as much as possible, except
357      //    that the top bit is forced to '1', making the result a quiet NaN.
358      uint64_t raw = DoubleToRawbits(value);
359
360      uint32_t sign = raw >> 63;
361      uint32_t exponent = (1 << 8) - 1;
362      uint32_t payload =
363          static_cast<uint32_t>(ExtractUnsignedBitfield64(50, 52 - 23, raw));
364      payload |= (1 << 22);  // Force a quiet NaN.
365
366      return RawbitsToFloat((sign << 31) | (exponent << 23) | payload);
367    }
368
369    case FP_ZERO:
370    case FP_INFINITE: {
371      // In a C++ cast, any value representable in the target type will be
372      // unchanged. This is always the case for +/-0.0 and infinities.
373      return static_cast<float>(value);
374    }
375
376    case FP_NORMAL:
377    case FP_SUBNORMAL: {
378      // Convert double-to-float as the processor would, assuming that FPCR.FZ
379      // (flush-to-zero) is not set.
380      uint64_t raw = DoubleToRawbits(value);
381      // Extract the IEEE-754 double components.
382      uint32_t sign = raw >> 63;
383      // Extract the exponent and remove the IEEE-754 encoding bias.
384      int32_t exponent =
385          static_cast<int32_t>(ExtractUnsignedBitfield64(62, 52, raw)) - 1023;
386      // Extract the mantissa and add the implicit '1' bit.
387      uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw);
388      if (std::fpclassify(value) == FP_NORMAL) {
389        mantissa |= (UINT64_C(1) << 52);
390      }
391      return FPRoundToFloat(sign, exponent, mantissa, round_mode);
392    }
393  }
394
395  VIXL_UNREACHABLE();
396  return value;
397}
398
399
400void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
401  dst.ClearForWrite(vform);
402  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
403    dst.ReadUintFromMem(vform, i, addr);
404    addr += LaneSizeInBytesFromFormat(vform);
405  }
406}
407
408
409void Simulator::ld1(VectorFormat vform,
410                    LogicVRegister dst,
411                    int index,
412                    uint64_t addr) {
413  dst.ReadUintFromMem(vform, index, addr);
414}
415
416
417void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
418  dst.ClearForWrite(vform);
419  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
420    dst.ReadUintFromMem(vform, i, addr);
421  }
422}
423
424
425void Simulator::ld2(VectorFormat vform,
426                    LogicVRegister dst1,
427                    LogicVRegister dst2,
428                    uint64_t addr1) {
429  dst1.ClearForWrite(vform);
430  dst2.ClearForWrite(vform);
431  int esize = LaneSizeInBytesFromFormat(vform);
432  uint64_t addr2 = addr1 + esize;
433  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
434    dst1.ReadUintFromMem(vform, i, addr1);
435    dst2.ReadUintFromMem(vform, i, addr2);
436    addr1 += 2 * esize;
437    addr2 += 2 * esize;
438  }
439}
440
441
442void Simulator::ld2(VectorFormat vform,
443                    LogicVRegister dst1,
444                    LogicVRegister dst2,
445                    int index,
446                    uint64_t addr1) {
447  dst1.ClearForWrite(vform);
448  dst2.ClearForWrite(vform);
449  uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
450  dst1.ReadUintFromMem(vform, index, addr1);
451  dst2.ReadUintFromMem(vform, index, addr2);
452}
453
454
455void Simulator::ld2r(VectorFormat vform,
456                     LogicVRegister dst1,
457                     LogicVRegister dst2,
458                     uint64_t addr) {
459  dst1.ClearForWrite(vform);
460  dst2.ClearForWrite(vform);
461  uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
462  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
463    dst1.ReadUintFromMem(vform, i, addr);
464    dst2.ReadUintFromMem(vform, i, addr2);
465  }
466}
467
468
469void Simulator::ld3(VectorFormat vform,
470                    LogicVRegister dst1,
471                    LogicVRegister dst2,
472                    LogicVRegister dst3,
473                    uint64_t addr1) {
474  dst1.ClearForWrite(vform);
475  dst2.ClearForWrite(vform);
476  dst3.ClearForWrite(vform);
477  int esize = LaneSizeInBytesFromFormat(vform);
478  uint64_t addr2 = addr1 + esize;
479  uint64_t addr3 = addr2 + esize;
480  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
481    dst1.ReadUintFromMem(vform, i, addr1);
482    dst2.ReadUintFromMem(vform, i, addr2);
483    dst3.ReadUintFromMem(vform, i, addr3);
484    addr1 += 3 * esize;
485    addr2 += 3 * esize;
486    addr3 += 3 * esize;
487  }
488}
489
490
491void Simulator::ld3(VectorFormat vform,
492                    LogicVRegister dst1,
493                    LogicVRegister dst2,
494                    LogicVRegister dst3,
495                    int index,
496                    uint64_t addr1) {
497  dst1.ClearForWrite(vform);
498  dst2.ClearForWrite(vform);
499  dst3.ClearForWrite(vform);
500  uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
501  uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
502  dst1.ReadUintFromMem(vform, index, addr1);
503  dst2.ReadUintFromMem(vform, index, addr2);
504  dst3.ReadUintFromMem(vform, index, addr3);
505}
506
507
508void Simulator::ld3r(VectorFormat vform,
509                     LogicVRegister dst1,
510                     LogicVRegister dst2,
511                     LogicVRegister dst3,
512                     uint64_t addr) {
513  dst1.ClearForWrite(vform);
514  dst2.ClearForWrite(vform);
515  dst3.ClearForWrite(vform);
516  uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
517  uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
518  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
519    dst1.ReadUintFromMem(vform, i, addr);
520    dst2.ReadUintFromMem(vform, i, addr2);
521    dst3.ReadUintFromMem(vform, i, addr3);
522  }
523}
524
525
526void Simulator::ld4(VectorFormat vform,
527                    LogicVRegister dst1,
528                    LogicVRegister dst2,
529                    LogicVRegister dst3,
530                    LogicVRegister dst4,
531                    uint64_t addr1) {
532  dst1.ClearForWrite(vform);
533  dst2.ClearForWrite(vform);
534  dst3.ClearForWrite(vform);
535  dst4.ClearForWrite(vform);
536  int esize = LaneSizeInBytesFromFormat(vform);
537  uint64_t addr2 = addr1 + esize;
538  uint64_t addr3 = addr2 + esize;
539  uint64_t addr4 = addr3 + esize;
540  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
541    dst1.ReadUintFromMem(vform, i, addr1);
542    dst2.ReadUintFromMem(vform, i, addr2);
543    dst3.ReadUintFromMem(vform, i, addr3);
544    dst4.ReadUintFromMem(vform, i, addr4);
545    addr1 += 4 * esize;
546    addr2 += 4 * esize;
547    addr3 += 4 * esize;
548    addr4 += 4 * esize;
549  }
550}
551
552
553void Simulator::ld4(VectorFormat vform,
554                    LogicVRegister dst1,
555                    LogicVRegister dst2,
556                    LogicVRegister dst3,
557                    LogicVRegister dst4,
558                    int index,
559                    uint64_t addr1) {
560  dst1.ClearForWrite(vform);
561  dst2.ClearForWrite(vform);
562  dst3.ClearForWrite(vform);
563  dst4.ClearForWrite(vform);
564  uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
565  uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
566  uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
567  dst1.ReadUintFromMem(vform, index, addr1);
568  dst2.ReadUintFromMem(vform, index, addr2);
569  dst3.ReadUintFromMem(vform, index, addr3);
570  dst4.ReadUintFromMem(vform, index, addr4);
571}
572
573
574void Simulator::ld4r(VectorFormat vform,
575                     LogicVRegister dst1,
576                     LogicVRegister dst2,
577                     LogicVRegister dst3,
578                     LogicVRegister dst4,
579                     uint64_t addr) {
580  dst1.ClearForWrite(vform);
581  dst2.ClearForWrite(vform);
582  dst3.ClearForWrite(vform);
583  dst4.ClearForWrite(vform);
584  uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
585  uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
586  uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
587  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
588    dst1.ReadUintFromMem(vform, i, addr);
589    dst2.ReadUintFromMem(vform, i, addr2);
590    dst3.ReadUintFromMem(vform, i, addr3);
591    dst4.ReadUintFromMem(vform, i, addr4);
592  }
593}
594
595
596void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
597  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
598    src.WriteUintToMem(vform, i, addr);
599    addr += LaneSizeInBytesFromFormat(vform);
600  }
601}
602
603
604void Simulator::st1(VectorFormat vform,
605                    LogicVRegister src,
606                    int index,
607                    uint64_t addr) {
608  src.WriteUintToMem(vform, index, addr);
609}
610
611
612void Simulator::st2(VectorFormat vform,
613                    LogicVRegister dst,
614                    LogicVRegister dst2,
615                    uint64_t addr) {
616  int esize = LaneSizeInBytesFromFormat(vform);
617  uint64_t addr2 = addr + esize;
618  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
619    dst.WriteUintToMem(vform, i, addr);
620    dst2.WriteUintToMem(vform, i, addr2);
621    addr += 2 * esize;
622    addr2 += 2 * esize;
623  }
624}
625
626
627void Simulator::st2(VectorFormat vform,
628                    LogicVRegister dst,
629                    LogicVRegister dst2,
630                    int index,
631                    uint64_t addr) {
632  int esize = LaneSizeInBytesFromFormat(vform);
633  dst.WriteUintToMem(vform, index, addr);
634  dst2.WriteUintToMem(vform, index, addr + 1 * esize);
635}
636
637
638void Simulator::st3(VectorFormat vform,
639                    LogicVRegister dst,
640                    LogicVRegister dst2,
641                    LogicVRegister dst3,
642                    uint64_t addr) {
643  int esize = LaneSizeInBytesFromFormat(vform);
644  uint64_t addr2 = addr + esize;
645  uint64_t addr3 = addr2 + esize;
646  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
647    dst.WriteUintToMem(vform, i, addr);
648    dst2.WriteUintToMem(vform, i, addr2);
649    dst3.WriteUintToMem(vform, i, addr3);
650    addr += 3 * esize;
651    addr2 += 3 * esize;
652    addr3 += 3 * esize;
653  }
654}
655
656
657void Simulator::st3(VectorFormat vform,
658                    LogicVRegister dst,
659                    LogicVRegister dst2,
660                    LogicVRegister dst3,
661                    int index,
662                    uint64_t addr) {
663  int esize = LaneSizeInBytesFromFormat(vform);
664  dst.WriteUintToMem(vform, index, addr);
665  dst2.WriteUintToMem(vform, index, addr + 1 * esize);
666  dst3.WriteUintToMem(vform, index, addr + 2 * esize);
667}
668
669
670void Simulator::st4(VectorFormat vform,
671                    LogicVRegister dst,
672                    LogicVRegister dst2,
673                    LogicVRegister dst3,
674                    LogicVRegister dst4,
675                    uint64_t addr) {
676  int esize = LaneSizeInBytesFromFormat(vform);
677  uint64_t addr2 = addr + esize;
678  uint64_t addr3 = addr2 + esize;
679  uint64_t addr4 = addr3 + esize;
680  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
681    dst.WriteUintToMem(vform, i, addr);
682    dst2.WriteUintToMem(vform, i, addr2);
683    dst3.WriteUintToMem(vform, i, addr3);
684    dst4.WriteUintToMem(vform, i, addr4);
685    addr += 4 * esize;
686    addr2 += 4 * esize;
687    addr3 += 4 * esize;
688    addr4 += 4 * esize;
689  }
690}
691
692
693void Simulator::st4(VectorFormat vform,
694                    LogicVRegister dst,
695                    LogicVRegister dst2,
696                    LogicVRegister dst3,
697                    LogicVRegister dst4,
698                    int index,
699                    uint64_t addr) {
700  int esize = LaneSizeInBytesFromFormat(vform);
701  dst.WriteUintToMem(vform, index, addr);
702  dst2.WriteUintToMem(vform, index, addr + 1 * esize);
703  dst3.WriteUintToMem(vform, index, addr + 2 * esize);
704  dst4.WriteUintToMem(vform, index, addr + 3 * esize);
705}
706
707
708LogicVRegister Simulator::cmp(VectorFormat vform,
709                              LogicVRegister dst,
710                              const LogicVRegister& src1,
711                              const LogicVRegister& src2,
712                              Condition cond) {
713  dst.ClearForWrite(vform);
714  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
715    int64_t sa = src1.Int(vform, i);
716    int64_t sb = src2.Int(vform, i);
717    uint64_t ua = src1.Uint(vform, i);
718    uint64_t ub = src2.Uint(vform, i);
719    bool result = false;
720    switch (cond) {
721      case eq:
722        result = (ua == ub);
723        break;
724      case ge:
725        result = (sa >= sb);
726        break;
727      case gt:
728        result = (sa > sb);
729        break;
730      case hi:
731        result = (ua > ub);
732        break;
733      case hs:
734        result = (ua >= ub);
735        break;
736      case lt:
737        result = (sa < sb);
738        break;
739      case le:
740        result = (sa <= sb);
741        break;
742      default:
743        VIXL_UNREACHABLE();
744        break;
745    }
746    dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
747  }
748  return dst;
749}
750
751
752LogicVRegister Simulator::cmp(VectorFormat vform,
753                              LogicVRegister dst,
754                              const LogicVRegister& src1,
755                              int imm,
756                              Condition cond) {
757  SimVRegister temp;
758  LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
759  return cmp(vform, dst, src1, imm_reg, cond);
760}
761
762
763LogicVRegister Simulator::cmptst(VectorFormat vform,
764                                 LogicVRegister dst,
765                                 const LogicVRegister& src1,
766                                 const LogicVRegister& src2) {
767  dst.ClearForWrite(vform);
768  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
769    uint64_t ua = src1.Uint(vform, i);
770    uint64_t ub = src2.Uint(vform, i);
771    dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
772  }
773  return dst;
774}
775
776
777LogicVRegister Simulator::add(VectorFormat vform,
778                              LogicVRegister dst,
779                              const LogicVRegister& src1,
780                              const LogicVRegister& src2) {
781  dst.ClearForWrite(vform);
782  // TODO(all): consider assigning the result of LaneCountFromFormat to a local.
783  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
784    // Test for unsigned saturation.
785    uint64_t ua = src1.UintLeftJustified(vform, i);
786    uint64_t ub = src2.UintLeftJustified(vform, i);
787    uint64_t ur = ua + ub;
788    if (ur < ua) {
789      dst.SetUnsignedSat(i, true);
790    }
791
792    // Test for signed saturation.
793    int64_t sa = src1.IntLeftJustified(vform, i);
794    int64_t sb = src2.IntLeftJustified(vform, i);
795    int64_t sr = sa + sb;
796    // If the signs of the operands are the same, but different from the result,
797    // there was an overflow.
798    if (((sa >= 0) == (sb >= 0)) && ((sa >= 0) != (sr >= 0))) {
799      dst.SetSignedSat(i, sa >= 0);
800    }
801
802    dst.SetInt(vform, i, src1.Int(vform, i) + src2.Int(vform, i));
803  }
804  return dst;
805}
806
807
808LogicVRegister Simulator::addp(VectorFormat vform,
809                               LogicVRegister dst,
810                               const LogicVRegister& src1,
811                               const LogicVRegister& src2) {
812  SimVRegister temp1, temp2;
813  uzp1(vform, temp1, src1, src2);
814  uzp2(vform, temp2, src1, src2);
815  add(vform, dst, temp1, temp2);
816  return dst;
817}
818
819
820LogicVRegister Simulator::mla(VectorFormat vform,
821                              LogicVRegister dst,
822                              const LogicVRegister& src1,
823                              const LogicVRegister& src2) {
824  SimVRegister temp;
825  mul(vform, temp, src1, src2);
826  add(vform, dst, dst, temp);
827  return dst;
828}
829
830
831LogicVRegister Simulator::mls(VectorFormat vform,
832                              LogicVRegister dst,
833                              const LogicVRegister& src1,
834                              const LogicVRegister& src2) {
835  SimVRegister temp;
836  mul(vform, temp, src1, src2);
837  sub(vform, dst, dst, temp);
838  return dst;
839}
840
841
842LogicVRegister Simulator::mul(VectorFormat vform,
843                              LogicVRegister dst,
844                              const LogicVRegister& src1,
845                              const LogicVRegister& src2) {
846  dst.ClearForWrite(vform);
847  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
848    dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
849  }
850  return dst;
851}
852
853
854LogicVRegister Simulator::mul(VectorFormat vform,
855                              LogicVRegister dst,
856                              const LogicVRegister& src1,
857                              const LogicVRegister& src2,
858                              int index) {
859  SimVRegister temp;
860  VectorFormat indexform = VectorFormatFillQ(vform);
861  return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
862}
863
864
865LogicVRegister Simulator::mla(VectorFormat vform,
866                              LogicVRegister dst,
867                              const LogicVRegister& src1,
868                              const LogicVRegister& src2,
869                              int index) {
870  SimVRegister temp;
871  VectorFormat indexform = VectorFormatFillQ(vform);
872  return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));
873}
874
875
876LogicVRegister Simulator::mls(VectorFormat vform,
877                              LogicVRegister dst,
878                              const LogicVRegister& src1,
879                              const LogicVRegister& src2,
880                              int index) {
881  SimVRegister temp;
882  VectorFormat indexform = VectorFormatFillQ(vform);
883  return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));
884}
885
886
887LogicVRegister Simulator::smull(VectorFormat vform,
888                                LogicVRegister dst,
889                                const LogicVRegister& src1,
890                                const LogicVRegister& src2,
891                                int index) {
892  SimVRegister temp;
893  VectorFormat indexform =
894      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
895  return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));
896}
897
898
899LogicVRegister Simulator::smull2(VectorFormat vform,
900                                 LogicVRegister dst,
901                                 const LogicVRegister& src1,
902                                 const LogicVRegister& src2,
903                                 int index) {
904  SimVRegister temp;
905  VectorFormat indexform =
906      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
907  return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
908}
909
910
911LogicVRegister Simulator::umull(VectorFormat vform,
912                                LogicVRegister dst,
913                                const LogicVRegister& src1,
914                                const LogicVRegister& src2,
915                                int index) {
916  SimVRegister temp;
917  VectorFormat indexform =
918      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
919  return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));
920}
921
922
923LogicVRegister Simulator::umull2(VectorFormat vform,
924                                 LogicVRegister dst,
925                                 const LogicVRegister& src1,
926                                 const LogicVRegister& src2,
927                                 int index) {
928  SimVRegister temp;
929  VectorFormat indexform =
930      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
931  return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
932}
933
934
935LogicVRegister Simulator::smlal(VectorFormat vform,
936                                LogicVRegister dst,
937                                const LogicVRegister& src1,
938                                const LogicVRegister& src2,
939                                int index) {
940  SimVRegister temp;
941  VectorFormat indexform =
942      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
943  return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
944}
945
946
947LogicVRegister Simulator::smlal2(VectorFormat vform,
948                                 LogicVRegister dst,
949                                 const LogicVRegister& src1,
950                                 const LogicVRegister& src2,
951                                 int index) {
952  SimVRegister temp;
953  VectorFormat indexform =
954      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
955  return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
956}
957
958
959LogicVRegister Simulator::umlal(VectorFormat vform,
960                                LogicVRegister dst,
961                                const LogicVRegister& src1,
962                                const LogicVRegister& src2,
963                                int index) {
964  SimVRegister temp;
965  VectorFormat indexform =
966      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
967  return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
968}
969
970
971LogicVRegister Simulator::umlal2(VectorFormat vform,
972                                 LogicVRegister dst,
973                                 const LogicVRegister& src1,
974                                 const LogicVRegister& src2,
975                                 int index) {
976  SimVRegister temp;
977  VectorFormat indexform =
978      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
979  return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
980}
981
982
983LogicVRegister Simulator::smlsl(VectorFormat vform,
984                                LogicVRegister dst,
985                                const LogicVRegister& src1,
986                                const LogicVRegister& src2,
987                                int index) {
988  SimVRegister temp;
989  VectorFormat indexform =
990      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
991  return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
992}
993
994
995LogicVRegister Simulator::smlsl2(VectorFormat vform,
996                                 LogicVRegister dst,
997                                 const LogicVRegister& src1,
998                                 const LogicVRegister& src2,
999                                 int index) {
1000  SimVRegister temp;
1001  VectorFormat indexform =
1002      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1003  return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1004}
1005
1006
1007LogicVRegister Simulator::umlsl(VectorFormat vform,
1008                                LogicVRegister dst,
1009                                const LogicVRegister& src1,
1010                                const LogicVRegister& src2,
1011                                int index) {
1012  SimVRegister temp;
1013  VectorFormat indexform =
1014      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1015  return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
1016}
1017
1018
1019LogicVRegister Simulator::umlsl2(VectorFormat vform,
1020                                 LogicVRegister dst,
1021                                 const LogicVRegister& src1,
1022                                 const LogicVRegister& src2,
1023                                 int index) {
1024  SimVRegister temp;
1025  VectorFormat indexform =
1026      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1027  return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1028}
1029
1030
1031LogicVRegister Simulator::sqdmull(VectorFormat vform,
1032                                  LogicVRegister dst,
1033                                  const LogicVRegister& src1,
1034                                  const LogicVRegister& src2,
1035                                  int index) {
1036  SimVRegister temp;
1037  VectorFormat indexform =
1038      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1039  return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
1040}
1041
1042
1043LogicVRegister Simulator::sqdmull2(VectorFormat vform,
1044                                   LogicVRegister dst,
1045                                   const LogicVRegister& src1,
1046                                   const LogicVRegister& src2,
1047                                   int index) {
1048  SimVRegister temp;
1049  VectorFormat indexform =
1050      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1051  return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1052}
1053
1054
1055LogicVRegister Simulator::sqdmlal(VectorFormat vform,
1056                                  LogicVRegister dst,
1057                                  const LogicVRegister& src1,
1058                                  const LogicVRegister& src2,
1059                                  int index) {
1060  SimVRegister temp;
1061  VectorFormat indexform =
1062      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1063  return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
1064}
1065
1066
1067LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
1068                                   LogicVRegister dst,
1069                                   const LogicVRegister& src1,
1070                                   const LogicVRegister& src2,
1071                                   int index) {
1072  SimVRegister temp;
1073  VectorFormat indexform =
1074      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1075  return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1076}
1077
1078
1079LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
1080                                  LogicVRegister dst,
1081                                  const LogicVRegister& src1,
1082                                  const LogicVRegister& src2,
1083                                  int index) {
1084  SimVRegister temp;
1085  VectorFormat indexform =
1086      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1087  return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
1088}
1089
1090
1091LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
1092                                   LogicVRegister dst,
1093                                   const LogicVRegister& src1,
1094                                   const LogicVRegister& src2,
1095                                   int index) {
1096  SimVRegister temp;
1097  VectorFormat indexform =
1098      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1099  return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1100}
1101
1102
1103LogicVRegister Simulator::sqdmulh(VectorFormat vform,
1104                                  LogicVRegister dst,
1105                                  const LogicVRegister& src1,
1106                                  const LogicVRegister& src2,
1107                                  int index) {
1108  SimVRegister temp;
1109  VectorFormat indexform = VectorFormatFillQ(vform);
1110  return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
1111}
1112
1113
1114LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
1115                                   LogicVRegister dst,
1116                                   const LogicVRegister& src1,
1117                                   const LogicVRegister& src2,
1118                                   int index) {
1119  SimVRegister temp;
1120  VectorFormat indexform = VectorFormatFillQ(vform);
1121  return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
1122}
1123
1124
1125uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) const {
1126  uint16_t result = 0;
1127  uint16_t extended_op2 = op2;
1128  for (int i = 0; i < 8; ++i) {
1129    if ((op1 >> i) & 1) {
1130      result = result ^ (extended_op2 << i);
1131    }
1132  }
1133  return result;
1134}
1135
1136
1137LogicVRegister Simulator::pmul(VectorFormat vform,
1138                               LogicVRegister dst,
1139                               const LogicVRegister& src1,
1140                               const LogicVRegister& src2) {
1141  dst.ClearForWrite(vform);
1142  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1143    dst.SetUint(vform,
1144                i,
1145                PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));
1146  }
1147  return dst;
1148}
1149
1150
1151LogicVRegister Simulator::pmull(VectorFormat vform,
1152                                LogicVRegister dst,
1153                                const LogicVRegister& src1,
1154                                const LogicVRegister& src2) {
1155  VectorFormat vform_src = VectorFormatHalfWidth(vform);
1156  dst.ClearForWrite(vform);
1157  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1158    dst.SetUint(vform,
1159                i,
1160                PolynomialMult(src1.Uint(vform_src, i),
1161                               src2.Uint(vform_src, i)));
1162  }
1163  return dst;
1164}
1165
1166
1167LogicVRegister Simulator::pmull2(VectorFormat vform,
1168                                 LogicVRegister dst,
1169                                 const LogicVRegister& src1,
1170                                 const LogicVRegister& src2) {
1171  VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
1172  dst.ClearForWrite(vform);
1173  int lane_count = LaneCountFromFormat(vform);
1174  for (int i = 0; i < lane_count; i++) {
1175    dst.SetUint(vform,
1176                i,
1177                PolynomialMult(src1.Uint(vform_src, lane_count + i),
1178                               src2.Uint(vform_src, lane_count + i)));
1179  }
1180  return dst;
1181}
1182
1183
1184LogicVRegister Simulator::sub(VectorFormat vform,
1185                              LogicVRegister dst,
1186                              const LogicVRegister& src1,
1187                              const LogicVRegister& src2) {
1188  dst.ClearForWrite(vform);
1189  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1190    // Test for unsigned saturation.
1191    if (src2.Uint(vform, i) > src1.Uint(vform, i)) {
1192      dst.SetUnsignedSat(i, false);
1193    }
1194
1195    // Test for signed saturation.
1196    int64_t sa = src1.IntLeftJustified(vform, i);
1197    int64_t sb = src2.IntLeftJustified(vform, i);
1198    int64_t sr = sa - sb;
1199    // If the signs of the operands are different, and the sign of the first
1200    // operand doesn't match the result, there was an overflow.
1201    if (((sa >= 0) != (sb >= 0)) && ((sa >= 0) != (sr >= 0))) {
1202      dst.SetSignedSat(i, sr < 0);
1203    }
1204
1205    dst.SetInt(vform, i, src1.Int(vform, i) - src2.Int(vform, i));
1206  }
1207  return dst;
1208}
1209
1210
1211LogicVRegister Simulator::and_(VectorFormat vform,
1212                               LogicVRegister dst,
1213                               const LogicVRegister& src1,
1214                               const LogicVRegister& src2) {
1215  dst.ClearForWrite(vform);
1216  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1217    dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
1218  }
1219  return dst;
1220}
1221
1222
1223LogicVRegister Simulator::orr(VectorFormat vform,
1224                              LogicVRegister dst,
1225                              const LogicVRegister& src1,
1226                              const LogicVRegister& src2) {
1227  dst.ClearForWrite(vform);
1228  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1229    dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1230  }
1231  return dst;
1232}
1233
1234
1235LogicVRegister Simulator::orn(VectorFormat vform,
1236                              LogicVRegister dst,
1237                              const LogicVRegister& src1,
1238                              const LogicVRegister& src2) {
1239  dst.ClearForWrite(vform);
1240  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1241    dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1242  }
1243  return dst;
1244}
1245
1246
1247LogicVRegister Simulator::eor(VectorFormat vform,
1248                              LogicVRegister dst,
1249                              const LogicVRegister& src1,
1250                              const LogicVRegister& src2) {
1251  dst.ClearForWrite(vform);
1252  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1253    dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1254  }
1255  return dst;
1256}
1257
1258
1259LogicVRegister Simulator::bic(VectorFormat vform,
1260                              LogicVRegister dst,
1261                              const LogicVRegister& src1,
1262                              const LogicVRegister& src2) {
1263  dst.ClearForWrite(vform);
1264  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1265    dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1266  }
1267  return dst;
1268}
1269
1270
1271LogicVRegister Simulator::bic(VectorFormat vform,
1272                              LogicVRegister dst,
1273                              const LogicVRegister& src,
1274                              uint64_t imm) {
1275  uint64_t result[16];
1276  int laneCount = LaneCountFromFormat(vform);
1277  for (int i = 0; i < laneCount; ++i) {
1278    result[i] = src.Uint(vform, i) & ~imm;
1279  }
1280  dst.ClearForWrite(vform);
1281  for (int i = 0; i < laneCount; ++i) {
1282    dst.SetUint(vform, i, result[i]);
1283  }
1284  return dst;
1285}
1286
1287
1288LogicVRegister Simulator::bif(VectorFormat vform,
1289                              LogicVRegister dst,
1290                              const LogicVRegister& src1,
1291                              const LogicVRegister& src2) {
1292  dst.ClearForWrite(vform);
1293  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1294    uint64_t operand1 = dst.Uint(vform, i);
1295    uint64_t operand2 = ~src2.Uint(vform, i);
1296    uint64_t operand3 = src1.Uint(vform, i);
1297    uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1298    dst.SetUint(vform, i, result);
1299  }
1300  return dst;
1301}
1302
1303
1304LogicVRegister Simulator::bit(VectorFormat vform,
1305                              LogicVRegister dst,
1306                              const LogicVRegister& src1,
1307                              const LogicVRegister& src2) {
1308  dst.ClearForWrite(vform);
1309  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1310    uint64_t operand1 = dst.Uint(vform, i);
1311    uint64_t operand2 = src2.Uint(vform, i);
1312    uint64_t operand3 = src1.Uint(vform, i);
1313    uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1314    dst.SetUint(vform, i, result);
1315  }
1316  return dst;
1317}
1318
1319
1320LogicVRegister Simulator::bsl(VectorFormat vform,
1321                              LogicVRegister dst,
1322                              const LogicVRegister& src1,
1323                              const LogicVRegister& src2) {
1324  dst.ClearForWrite(vform);
1325  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1326    uint64_t operand1 = src2.Uint(vform, i);
1327    uint64_t operand2 = dst.Uint(vform, i);
1328    uint64_t operand3 = src1.Uint(vform, i);
1329    uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1330    dst.SetUint(vform, i, result);
1331  }
1332  return dst;
1333}
1334
1335
1336LogicVRegister Simulator::sminmax(VectorFormat vform,
1337                                  LogicVRegister dst,
1338                                  const LogicVRegister& src1,
1339                                  const LogicVRegister& src2,
1340                                  bool max) {
1341  dst.ClearForWrite(vform);
1342  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1343    int64_t src1_val = src1.Int(vform, i);
1344    int64_t src2_val = src2.Int(vform, i);
1345    int64_t dst_val;
1346    if (max) {
1347      dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1348    } else {
1349      dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1350    }
1351    dst.SetInt(vform, i, dst_val);
1352  }
1353  return dst;
1354}
1355
1356
1357LogicVRegister Simulator::smax(VectorFormat vform,
1358                               LogicVRegister dst,
1359                               const LogicVRegister& src1,
1360                               const LogicVRegister& src2) {
1361  return sminmax(vform, dst, src1, src2, true);
1362}
1363
1364
1365LogicVRegister Simulator::smin(VectorFormat vform,
1366                               LogicVRegister dst,
1367                               const LogicVRegister& src1,
1368                               const LogicVRegister& src2) {
1369  return sminmax(vform, dst, src1, src2, false);
1370}
1371
1372
1373LogicVRegister Simulator::sminmaxp(VectorFormat vform,
1374                                   LogicVRegister dst,
1375                                   const LogicVRegister& src1,
1376                                   const LogicVRegister& src2,
1377                                   bool max) {
1378  int lanes = LaneCountFromFormat(vform);
1379  int64_t result[kMaxLanesPerVector];
1380  const LogicVRegister* src = &src1;
1381  for (int j = 0; j < 2; j++) {
1382    for (int i = 0; i < lanes; i += 2) {
1383      int64_t first_val = src->Int(vform, i);
1384      int64_t second_val = src->Int(vform, i + 1);
1385      int64_t dst_val;
1386      if (max) {
1387        dst_val = (first_val > second_val) ? first_val : second_val;
1388      } else {
1389        dst_val = (first_val < second_val) ? first_val : second_val;
1390      }
1391      VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector);
1392      result[(i >> 1) + (j * lanes / 2)] = dst_val;
1393    }
1394    src = &src2;
1395  }
1396  dst.SetIntArray(vform, result);
1397  return dst;
1398}
1399
1400
1401LogicVRegister Simulator::smaxp(VectorFormat vform,
1402                                LogicVRegister dst,
1403                                const LogicVRegister& src1,
1404                                const LogicVRegister& src2) {
1405  return sminmaxp(vform, dst, src1, src2, true);
1406}
1407
1408
1409LogicVRegister Simulator::sminp(VectorFormat vform,
1410                                LogicVRegister dst,
1411                                const LogicVRegister& src1,
1412                                const LogicVRegister& src2) {
1413  return sminmaxp(vform, dst, src1, src2, false);
1414}
1415
1416
1417LogicVRegister Simulator::addp(VectorFormat vform,
1418                               LogicVRegister dst,
1419                               const LogicVRegister& src) {
1420  VIXL_ASSERT(vform == kFormatD);
1421
1422  int64_t dst_val = src.Int(kFormat2D, 0) + src.Int(kFormat2D, 1);
1423  dst.ClearForWrite(vform);
1424  dst.SetInt(vform, 0, dst_val);
1425  return dst;
1426}
1427
1428
1429LogicVRegister Simulator::addv(VectorFormat vform,
1430                               LogicVRegister dst,
1431                               const LogicVRegister& src) {
1432  VectorFormat vform_dst =
1433      ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1434
1435
1436  int64_t dst_val = 0;
1437  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1438    dst_val += src.Int(vform, i);
1439  }
1440
1441  dst.ClearForWrite(vform_dst);
1442  dst.SetInt(vform_dst, 0, dst_val);
1443  return dst;
1444}
1445
1446
1447LogicVRegister Simulator::saddlv(VectorFormat vform,
1448                                 LogicVRegister dst,
1449                                 const LogicVRegister& src) {
1450  VectorFormat vform_dst =
1451      ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1452
1453  int64_t dst_val = 0;
1454  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1455    dst_val += src.Int(vform, i);
1456  }
1457
1458  dst.ClearForWrite(vform_dst);
1459  dst.SetInt(vform_dst, 0, dst_val);
1460  return dst;
1461}
1462
1463
1464LogicVRegister Simulator::uaddlv(VectorFormat vform,
1465                                 LogicVRegister dst,
1466                                 const LogicVRegister& src) {
1467  VectorFormat vform_dst =
1468      ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1469
1470  uint64_t dst_val = 0;
1471  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1472    dst_val += src.Uint(vform, i);
1473  }
1474
1475  dst.ClearForWrite(vform_dst);
1476  dst.SetUint(vform_dst, 0, dst_val);
1477  return dst;
1478}
1479
1480
1481LogicVRegister Simulator::sminmaxv(VectorFormat vform,
1482                                   LogicVRegister dst,
1483                                   const LogicVRegister& src,
1484                                   bool max) {
1485  int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1486  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1487    int64_t src_val = src.Int(vform, i);
1488    if (max) {
1489      dst_val = (src_val > dst_val) ? src_val : dst_val;
1490    } else {
1491      dst_val = (src_val < dst_val) ? src_val : dst_val;
1492    }
1493  }
1494  dst.ClearForWrite(ScalarFormatFromFormat(vform));
1495  dst.SetInt(vform, 0, dst_val);
1496  return dst;
1497}
1498
1499
1500LogicVRegister Simulator::smaxv(VectorFormat vform,
1501                                LogicVRegister dst,
1502                                const LogicVRegister& src) {
1503  sminmaxv(vform, dst, src, true);
1504  return dst;
1505}
1506
1507
1508LogicVRegister Simulator::sminv(VectorFormat vform,
1509                                LogicVRegister dst,
1510                                const LogicVRegister& src) {
1511  sminmaxv(vform, dst, src, false);
1512  return dst;
1513}
1514
1515
1516LogicVRegister Simulator::uminmax(VectorFormat vform,
1517                                  LogicVRegister dst,
1518                                  const LogicVRegister& src1,
1519                                  const LogicVRegister& src2,
1520                                  bool max) {
1521  dst.ClearForWrite(vform);
1522  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1523    uint64_t src1_val = src1.Uint(vform, i);
1524    uint64_t src2_val = src2.Uint(vform, i);
1525    uint64_t dst_val;
1526    if (max) {
1527      dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1528    } else {
1529      dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1530    }
1531    dst.SetUint(vform, i, dst_val);
1532  }
1533  return dst;
1534}
1535
1536
1537LogicVRegister Simulator::umax(VectorFormat vform,
1538                               LogicVRegister dst,
1539                               const LogicVRegister& src1,
1540                               const LogicVRegister& src2) {
1541  return uminmax(vform, dst, src1, src2, true);
1542}
1543
1544
1545LogicVRegister Simulator::umin(VectorFormat vform,
1546                               LogicVRegister dst,
1547                               const LogicVRegister& src1,
1548                               const LogicVRegister& src2) {
1549  return uminmax(vform, dst, src1, src2, false);
1550}
1551
1552
1553LogicVRegister Simulator::uminmaxp(VectorFormat vform,
1554                                   LogicVRegister dst,
1555                                   const LogicVRegister& src1,
1556                                   const LogicVRegister& src2,
1557                                   bool max) {
1558  int lanes = LaneCountFromFormat(vform);
1559  uint64_t result[kMaxLanesPerVector];
1560  const LogicVRegister* src = &src1;
1561  for (int j = 0; j < 2; j++) {
1562    for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
1563      uint64_t first_val = src->Uint(vform, i);
1564      uint64_t second_val = src->Uint(vform, i + 1);
1565      uint64_t dst_val;
1566      if (max) {
1567        dst_val = (first_val > second_val) ? first_val : second_val;
1568      } else {
1569        dst_val = (first_val < second_val) ? first_val : second_val;
1570      }
1571      VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector);
1572      result[(i >> 1) + (j * lanes / 2)] = dst_val;
1573    }
1574    src = &src2;
1575  }
1576  dst.SetUintArray(vform, result);
1577  return dst;
1578}
1579
1580
1581LogicVRegister Simulator::umaxp(VectorFormat vform,
1582                                LogicVRegister dst,
1583                                const LogicVRegister& src1,
1584                                const LogicVRegister& src2) {
1585  return uminmaxp(vform, dst, src1, src2, true);
1586}
1587
1588
1589LogicVRegister Simulator::uminp(VectorFormat vform,
1590                                LogicVRegister dst,
1591                                const LogicVRegister& src1,
1592                                const LogicVRegister& src2) {
1593  return uminmaxp(vform, dst, src1, src2, false);
1594}
1595
1596
1597LogicVRegister Simulator::uminmaxv(VectorFormat vform,
1598                                   LogicVRegister dst,
1599                                   const LogicVRegister& src,
1600                                   bool max) {
1601  uint64_t dst_val = max ? 0 : UINT64_MAX;
1602  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1603    uint64_t src_val = src.Uint(vform, i);
1604    if (max) {
1605      dst_val = (src_val > dst_val) ? src_val : dst_val;
1606    } else {
1607      dst_val = (src_val < dst_val) ? src_val : dst_val;
1608    }
1609  }
1610  dst.ClearForWrite(ScalarFormatFromFormat(vform));
1611  dst.SetUint(vform, 0, dst_val);
1612  return dst;
1613}
1614
1615
1616LogicVRegister Simulator::umaxv(VectorFormat vform,
1617                                LogicVRegister dst,
1618                                const LogicVRegister& src) {
1619  uminmaxv(vform, dst, src, true);
1620  return dst;
1621}
1622
1623
1624LogicVRegister Simulator::uminv(VectorFormat vform,
1625                                LogicVRegister dst,
1626                                const LogicVRegister& src) {
1627  uminmaxv(vform, dst, src, false);
1628  return dst;
1629}
1630
1631
1632LogicVRegister Simulator::shl(VectorFormat vform,
1633                              LogicVRegister dst,
1634                              const LogicVRegister& src,
1635                              int shift) {
1636  VIXL_ASSERT(shift >= 0);
1637  SimVRegister temp;
1638  LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1639  return ushl(vform, dst, src, shiftreg);
1640}
1641
1642
1643LogicVRegister Simulator::sshll(VectorFormat vform,
1644                                LogicVRegister dst,
1645                                const LogicVRegister& src,
1646                                int shift) {
1647  VIXL_ASSERT(shift >= 0);
1648  SimVRegister temp1, temp2;
1649  LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1650  LogicVRegister extendedreg = sxtl(vform, temp2, src);
1651  return sshl(vform, dst, extendedreg, shiftreg);
1652}
1653
1654
1655LogicVRegister Simulator::sshll2(VectorFormat vform,
1656                                 LogicVRegister dst,
1657                                 const LogicVRegister& src,
1658                                 int shift) {
1659  VIXL_ASSERT(shift >= 0);
1660  SimVRegister temp1, temp2;
1661  LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1662  LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1663  return sshl(vform, dst, extendedreg, shiftreg);
1664}
1665
1666
1667LogicVRegister Simulator::shll(VectorFormat vform,
1668                               LogicVRegister dst,
1669                               const LogicVRegister& src) {
1670  int shift = LaneSizeInBitsFromFormat(vform) / 2;
1671  return sshll(vform, dst, src, shift);
1672}
1673
1674
1675LogicVRegister Simulator::shll2(VectorFormat vform,
1676                                LogicVRegister dst,
1677                                const LogicVRegister& src) {
1678  int shift = LaneSizeInBitsFromFormat(vform) / 2;
1679  return sshll2(vform, dst, src, shift);
1680}
1681
1682
1683LogicVRegister Simulator::ushll(VectorFormat vform,
1684                                LogicVRegister dst,
1685                                const LogicVRegister& src,
1686                                int shift) {
1687  VIXL_ASSERT(shift >= 0);
1688  SimVRegister temp1, temp2;
1689  LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1690  LogicVRegister extendedreg = uxtl(vform, temp2, src);
1691  return ushl(vform, dst, extendedreg, shiftreg);
1692}
1693
1694
1695LogicVRegister Simulator::ushll2(VectorFormat vform,
1696                                 LogicVRegister dst,
1697                                 const LogicVRegister& src,
1698                                 int shift) {
1699  VIXL_ASSERT(shift >= 0);
1700  SimVRegister temp1, temp2;
1701  LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1702  LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1703  return ushl(vform, dst, extendedreg, shiftreg);
1704}
1705
1706
1707LogicVRegister Simulator::sli(VectorFormat vform,
1708                              LogicVRegister dst,
1709                              const LogicVRegister& src,
1710                              int shift) {
1711  dst.ClearForWrite(vform);
1712  int laneCount = LaneCountFromFormat(vform);
1713  for (int i = 0; i < laneCount; i++) {
1714    uint64_t src_lane = src.Uint(vform, i);
1715    uint64_t dst_lane = dst.Uint(vform, i);
1716    uint64_t shifted = src_lane << shift;
1717    uint64_t mask = MaxUintFromFormat(vform) << shift;
1718    dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1719  }
1720  return dst;
1721}
1722
1723
1724LogicVRegister Simulator::sqshl(VectorFormat vform,
1725                                LogicVRegister dst,
1726                                const LogicVRegister& src,
1727                                int shift) {
1728  VIXL_ASSERT(shift >= 0);
1729  SimVRegister temp;
1730  LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1731  return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1732}
1733
1734
1735LogicVRegister Simulator::uqshl(VectorFormat vform,
1736                                LogicVRegister dst,
1737                                const LogicVRegister& src,
1738                                int shift) {
1739  VIXL_ASSERT(shift >= 0);
1740  SimVRegister temp;
1741  LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1742  return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1743}
1744
1745
1746LogicVRegister Simulator::sqshlu(VectorFormat vform,
1747                                 LogicVRegister dst,
1748                                 const LogicVRegister& src,
1749                                 int shift) {
1750  VIXL_ASSERT(shift >= 0);
1751  SimVRegister temp;
1752  LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1753  return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1754}
1755
1756
1757LogicVRegister Simulator::sri(VectorFormat vform,
1758                              LogicVRegister dst,
1759                              const LogicVRegister& src,
1760                              int shift) {
1761  dst.ClearForWrite(vform);
1762  int laneCount = LaneCountFromFormat(vform);
1763  VIXL_ASSERT((shift > 0) &&
1764              (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1765  for (int i = 0; i < laneCount; i++) {
1766    uint64_t src_lane = src.Uint(vform, i);
1767    uint64_t dst_lane = dst.Uint(vform, i);
1768    uint64_t shifted;
1769    uint64_t mask;
1770    if (shift == 64) {
1771      shifted = 0;
1772      mask = 0;
1773    } else {
1774      shifted = src_lane >> shift;
1775      mask = MaxUintFromFormat(vform) >> shift;
1776    }
1777    dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1778  }
1779  return dst;
1780}
1781
1782
1783LogicVRegister Simulator::ushr(VectorFormat vform,
1784                               LogicVRegister dst,
1785                               const LogicVRegister& src,
1786                               int shift) {
1787  VIXL_ASSERT(shift >= 0);
1788  SimVRegister temp;
1789  LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1790  return ushl(vform, dst, src, shiftreg);
1791}
1792
1793
1794LogicVRegister Simulator::sshr(VectorFormat vform,
1795                               LogicVRegister dst,
1796                               const LogicVRegister& src,
1797                               int shift) {
1798  VIXL_ASSERT(shift >= 0);
1799  SimVRegister temp;
1800  LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1801  return sshl(vform, dst, src, shiftreg);
1802}
1803
1804
1805LogicVRegister Simulator::ssra(VectorFormat vform,
1806                               LogicVRegister dst,
1807                               const LogicVRegister& src,
1808                               int shift) {
1809  SimVRegister temp;
1810  LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1811  return add(vform, dst, dst, shifted_reg);
1812}
1813
1814
1815LogicVRegister Simulator::usra(VectorFormat vform,
1816                               LogicVRegister dst,
1817                               const LogicVRegister& src,
1818                               int shift) {
1819  SimVRegister temp;
1820  LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1821  return add(vform, dst, dst, shifted_reg);
1822}
1823
1824
1825LogicVRegister Simulator::srsra(VectorFormat vform,
1826                                LogicVRegister dst,
1827                                const LogicVRegister& src,
1828                                int shift) {
1829  SimVRegister temp;
1830  LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1831  return add(vform, dst, dst, shifted_reg);
1832}
1833
1834
1835LogicVRegister Simulator::ursra(VectorFormat vform,
1836                                LogicVRegister dst,
1837                                const LogicVRegister& src,
1838                                int shift) {
1839  SimVRegister temp;
1840  LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1841  return add(vform, dst, dst, shifted_reg);
1842}
1843
1844
1845LogicVRegister Simulator::cls(VectorFormat vform,
1846                              LogicVRegister dst,
1847                              const LogicVRegister& src) {
1848  uint64_t result[16];
1849  int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1850  int laneCount = LaneCountFromFormat(vform);
1851  for (int i = 0; i < laneCount; i++) {
1852    result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits);
1853  }
1854
1855  dst.ClearForWrite(vform);
1856  for (int i = 0; i < laneCount; ++i) {
1857    dst.SetUint(vform, i, result[i]);
1858  }
1859  return dst;
1860}
1861
1862
1863LogicVRegister Simulator::clz(VectorFormat vform,
1864                              LogicVRegister dst,
1865                              const LogicVRegister& src) {
1866  uint64_t result[16];
1867  int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1868  int laneCount = LaneCountFromFormat(vform);
1869  for (int i = 0; i < laneCount; i++) {
1870    result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits);
1871  }
1872
1873  dst.ClearForWrite(vform);
1874  for (int i = 0; i < laneCount; ++i) {
1875    dst.SetUint(vform, i, result[i]);
1876  }
1877  return dst;
1878}
1879
1880
1881LogicVRegister Simulator::cnt(VectorFormat vform,
1882                              LogicVRegister dst,
1883                              const LogicVRegister& src) {
1884  uint64_t result[16];
1885  int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1886  int laneCount = LaneCountFromFormat(vform);
1887  for (int i = 0; i < laneCount; i++) {
1888    uint64_t value = src.Uint(vform, i);
1889    result[i] = 0;
1890    for (int j = 0; j < laneSizeInBits; j++) {
1891      result[i] += (value & 1);
1892      value >>= 1;
1893    }
1894  }
1895
1896  dst.ClearForWrite(vform);
1897  for (int i = 0; i < laneCount; ++i) {
1898    dst.SetUint(vform, i, result[i]);
1899  }
1900  return dst;
1901}
1902
1903
1904LogicVRegister Simulator::sshl(VectorFormat vform,
1905                               LogicVRegister dst,
1906                               const LogicVRegister& src1,
1907                               const LogicVRegister& src2) {
1908  dst.ClearForWrite(vform);
1909  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1910    int8_t shift_val = src2.Int(vform, i);
1911    int64_t lj_src_val = src1.IntLeftJustified(vform, i);
1912
1913    // Set signed saturation state.
1914    if ((shift_val > CountLeadingSignBits(lj_src_val)) && (lj_src_val != 0)) {
1915      dst.SetSignedSat(i, lj_src_val >= 0);
1916    }
1917
1918    // Set unsigned saturation state.
1919    if (lj_src_val < 0) {
1920      dst.SetUnsignedSat(i, false);
1921    } else if ((shift_val > CountLeadingZeros(lj_src_val)) &&
1922               (lj_src_val != 0)) {
1923      dst.SetUnsignedSat(i, true);
1924    }
1925
1926    int64_t src_val = src1.Int(vform, i);
1927    if (shift_val > 63) {
1928      dst.SetInt(vform, i, 0);
1929    } else if (shift_val < -63) {
1930      dst.SetRounding(i, src_val < 0);
1931      dst.SetInt(vform, i, (src_val < 0) ? -1 : 0);
1932    } else {
1933      if (shift_val < 0) {
1934        // Set rounding state. Rounding only needed on right shifts.
1935        if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1936          dst.SetRounding(i, true);
1937        }
1938        src_val >>= -shift_val;
1939      } else {
1940        src_val <<= shift_val;
1941      }
1942      dst.SetInt(vform, i, src_val);
1943    }
1944  }
1945  return dst;
1946}
1947
1948
1949LogicVRegister Simulator::ushl(VectorFormat vform,
1950                               LogicVRegister dst,
1951                               const LogicVRegister& src1,
1952                               const LogicVRegister& src2) {
1953  dst.ClearForWrite(vform);
1954  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1955    int8_t shift_val = src2.Int(vform, i);
1956    uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
1957
1958    // Set saturation state.
1959    if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) {
1960      dst.SetUnsignedSat(i, true);
1961    }
1962
1963    uint64_t src_val = src1.Uint(vform, i);
1964    if ((shift_val > 63) || (shift_val < -64)) {
1965      dst.SetUint(vform, i, 0);
1966    } else {
1967      if (shift_val < 0) {
1968        // Set rounding state. Rounding only needed on right shifts.
1969        if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1970          dst.SetRounding(i, true);
1971        }
1972
1973        if (shift_val == -64) {
1974          src_val = 0;
1975        } else {
1976          src_val >>= -shift_val;
1977        }
1978      } else {
1979        src_val <<= shift_val;
1980      }
1981      dst.SetUint(vform, i, src_val);
1982    }
1983  }
1984  return dst;
1985}
1986
1987
1988LogicVRegister Simulator::neg(VectorFormat vform,
1989                              LogicVRegister dst,
1990                              const LogicVRegister& src) {
1991  dst.ClearForWrite(vform);
1992  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1993    // Test for signed saturation.
1994    int64_t sa = src.Int(vform, i);
1995    if (sa == MinIntFromFormat(vform)) {
1996      dst.SetSignedSat(i, true);
1997    }
1998    dst.SetInt(vform, i, -sa);
1999  }
2000  return dst;
2001}
2002
2003
2004LogicVRegister Simulator::suqadd(VectorFormat vform,
2005                                 LogicVRegister dst,
2006                                 const LogicVRegister& src) {
2007  dst.ClearForWrite(vform);
2008  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2009    int64_t sa = dst.IntLeftJustified(vform, i);
2010    uint64_t ub = src.UintLeftJustified(vform, i);
2011    int64_t sr = sa + ub;
2012
2013    if (sr < sa) {  // Test for signed positive saturation.
2014      dst.SetInt(vform, i, MaxIntFromFormat(vform));
2015    } else {
2016      dst.SetInt(vform, i, dst.Int(vform, i) + src.Int(vform, i));
2017    }
2018  }
2019  return dst;
2020}
2021
2022
2023LogicVRegister Simulator::usqadd(VectorFormat vform,
2024                                 LogicVRegister dst,
2025                                 const LogicVRegister& src) {
2026  dst.ClearForWrite(vform);
2027  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2028    uint64_t ua = dst.UintLeftJustified(vform, i);
2029    int64_t sb = src.IntLeftJustified(vform, i);
2030    uint64_t ur = ua + sb;
2031
2032    if ((sb > 0) && (ur <= ua)) {
2033      dst.SetUint(vform, i, MaxUintFromFormat(vform));  // Positive saturation.
2034    } else if ((sb < 0) && (ur >= ua)) {
2035      dst.SetUint(vform, i, 0);  // Negative saturation.
2036    } else {
2037      dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));
2038    }
2039  }
2040  return dst;
2041}
2042
2043
2044LogicVRegister Simulator::abs(VectorFormat vform,
2045                              LogicVRegister dst,
2046                              const LogicVRegister& src) {
2047  dst.ClearForWrite(vform);
2048  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2049    // Test for signed saturation.
2050    int64_t sa = src.Int(vform, i);
2051    if (sa == MinIntFromFormat(vform)) {
2052      dst.SetSignedSat(i, true);
2053    }
2054    if (sa < 0) {
2055      dst.SetInt(vform, i, -sa);
2056    } else {
2057      dst.SetInt(vform, i, sa);
2058    }
2059  }
2060  return dst;
2061}
2062
2063
2064LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
2065                                        LogicVRegister dst,
2066                                        bool dstIsSigned,
2067                                        const LogicVRegister& src,
2068                                        bool srcIsSigned) {
2069  bool upperhalf = false;
2070  VectorFormat srcform = kFormatUndefined;
2071  int64_t ssrc[8];
2072  uint64_t usrc[8];
2073
2074  switch (dstform) {
2075    case kFormat8B:
2076      upperhalf = false;
2077      srcform = kFormat8H;
2078      break;
2079    case kFormat16B:
2080      upperhalf = true;
2081      srcform = kFormat8H;
2082      break;
2083    case kFormat4H:
2084      upperhalf = false;
2085      srcform = kFormat4S;
2086      break;
2087    case kFormat8H:
2088      upperhalf = true;
2089      srcform = kFormat4S;
2090      break;
2091    case kFormat2S:
2092      upperhalf = false;
2093      srcform = kFormat2D;
2094      break;
2095    case kFormat4S:
2096      upperhalf = true;
2097      srcform = kFormat2D;
2098      break;
2099    case kFormatB:
2100      upperhalf = false;
2101      srcform = kFormatH;
2102      break;
2103    case kFormatH:
2104      upperhalf = false;
2105      srcform = kFormatS;
2106      break;
2107    case kFormatS:
2108      upperhalf = false;
2109      srcform = kFormatD;
2110      break;
2111    default:
2112      VIXL_UNIMPLEMENTED();
2113  }
2114
2115  for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2116    ssrc[i] = src.Int(srcform, i);
2117    usrc[i] = src.Uint(srcform, i);
2118  }
2119
2120  int offset;
2121  if (upperhalf) {
2122    offset = LaneCountFromFormat(dstform) / 2;
2123  } else {
2124    offset = 0;
2125    dst.ClearForWrite(dstform);
2126  }
2127
2128  for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2129    // Test for signed saturation
2130    if (ssrc[i] > MaxIntFromFormat(dstform)) {
2131      dst.SetSignedSat(offset + i, true);
2132    } else if (ssrc[i] < MinIntFromFormat(dstform)) {
2133      dst.SetSignedSat(offset + i, false);
2134    }
2135
2136    // Test for unsigned saturation
2137    if (srcIsSigned) {
2138      if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
2139        dst.SetUnsignedSat(offset + i, true);
2140      } else if (ssrc[i] < 0) {
2141        dst.SetUnsignedSat(offset + i, false);
2142      }
2143    } else {
2144      if (usrc[i] > MaxUintFromFormat(dstform)) {
2145        dst.SetUnsignedSat(offset + i, true);
2146      }
2147    }
2148
2149    int64_t result;
2150    if (srcIsSigned) {
2151      result = ssrc[i] & MaxUintFromFormat(dstform);
2152    } else {
2153      result = usrc[i] & MaxUintFromFormat(dstform);
2154    }
2155
2156    if (dstIsSigned) {
2157      dst.SetInt(dstform, offset + i, result);
2158    } else {
2159      dst.SetUint(dstform, offset + i, result);
2160    }
2161  }
2162  return dst;
2163}
2164
2165
2166LogicVRegister Simulator::xtn(VectorFormat vform,
2167                              LogicVRegister dst,
2168                              const LogicVRegister& src) {
2169  return extractnarrow(vform, dst, true, src, true);
2170}
2171
2172
2173LogicVRegister Simulator::sqxtn(VectorFormat vform,
2174                                LogicVRegister dst,
2175                                const LogicVRegister& src) {
2176  return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform);
2177}
2178
2179
2180LogicVRegister Simulator::sqxtun(VectorFormat vform,
2181                                 LogicVRegister dst,
2182                                 const LogicVRegister& src) {
2183  return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
2184}
2185
2186
2187LogicVRegister Simulator::uqxtn(VectorFormat vform,
2188                                LogicVRegister dst,
2189                                const LogicVRegister& src) {
2190  return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
2191}
2192
2193
2194LogicVRegister Simulator::absdiff(VectorFormat vform,
2195                                  LogicVRegister dst,
2196                                  const LogicVRegister& src1,
2197                                  const LogicVRegister& src2,
2198                                  bool issigned) {
2199  dst.ClearForWrite(vform);
2200  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2201    if (issigned) {
2202      int64_t sr = src1.Int(vform, i) - src2.Int(vform, i);
2203      sr = sr > 0 ? sr : -sr;
2204      dst.SetInt(vform, i, sr);
2205    } else {
2206      int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i);
2207      sr = sr > 0 ? sr : -sr;
2208      dst.SetUint(vform, i, sr);
2209    }
2210  }
2211  return dst;
2212}
2213
2214
2215LogicVRegister Simulator::saba(VectorFormat vform,
2216                               LogicVRegister dst,
2217                               const LogicVRegister& src1,
2218                               const LogicVRegister& src2) {
2219  SimVRegister temp;
2220  dst.ClearForWrite(vform);
2221  absdiff(vform, temp, src1, src2, true);
2222  add(vform, dst, dst, temp);
2223  return dst;
2224}
2225
2226
2227LogicVRegister Simulator::uaba(VectorFormat vform,
2228                               LogicVRegister dst,
2229                               const LogicVRegister& src1,
2230                               const LogicVRegister& src2) {
2231  SimVRegister temp;
2232  dst.ClearForWrite(vform);
2233  absdiff(vform, temp, src1, src2, false);
2234  add(vform, dst, dst, temp);
2235  return dst;
2236}
2237
2238
2239LogicVRegister Simulator::not_(VectorFormat vform,
2240                               LogicVRegister dst,
2241                               const LogicVRegister& src) {
2242  dst.ClearForWrite(vform);
2243  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2244    dst.SetUint(vform, i, ~src.Uint(vform, i));
2245  }
2246  return dst;
2247}
2248
2249
2250LogicVRegister Simulator::rbit(VectorFormat vform,
2251                               LogicVRegister dst,
2252                               const LogicVRegister& src) {
2253  uint64_t result[16];
2254  int laneCount = LaneCountFromFormat(vform);
2255  int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
2256  uint64_t reversed_value;
2257  uint64_t value;
2258  for (int i = 0; i < laneCount; i++) {
2259    value = src.Uint(vform, i);
2260    reversed_value = 0;
2261    for (int j = 0; j < laneSizeInBits; j++) {
2262      reversed_value = (reversed_value << 1) | (value & 1);
2263      value >>= 1;
2264    }
2265    result[i] = reversed_value;
2266  }
2267
2268  dst.ClearForWrite(vform);
2269  for (int i = 0; i < laneCount; ++i) {
2270    dst.SetUint(vform, i, result[i]);
2271  }
2272  return dst;
2273}
2274
2275
2276LogicVRegister Simulator::rev(VectorFormat vform,
2277                              LogicVRegister dst,
2278                              const LogicVRegister& src,
2279                              int revSize) {
2280  uint64_t result[16];
2281  int laneCount = LaneCountFromFormat(vform);
2282  int laneSize = LaneSizeInBytesFromFormat(vform);
2283  int lanesPerLoop = revSize / laneSize;
2284  for (int i = 0; i < laneCount; i += lanesPerLoop) {
2285    for (int j = 0; j < lanesPerLoop; j++) {
2286      result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j);
2287    }
2288  }
2289  dst.ClearForWrite(vform);
2290  for (int i = 0; i < laneCount; ++i) {
2291    dst.SetUint(vform, i, result[i]);
2292  }
2293  return dst;
2294}
2295
2296
2297LogicVRegister Simulator::rev16(VectorFormat vform,
2298                                LogicVRegister dst,
2299                                const LogicVRegister& src) {
2300  return rev(vform, dst, src, 2);
2301}
2302
2303
2304LogicVRegister Simulator::rev32(VectorFormat vform,
2305                                LogicVRegister dst,
2306                                const LogicVRegister& src) {
2307  return rev(vform, dst, src, 4);
2308}
2309
2310
2311LogicVRegister Simulator::rev64(VectorFormat vform,
2312                                LogicVRegister dst,
2313                                const LogicVRegister& src) {
2314  return rev(vform, dst, src, 8);
2315}
2316
2317
2318LogicVRegister Simulator::addlp(VectorFormat vform,
2319                                LogicVRegister dst,
2320                                const LogicVRegister& src,
2321                                bool is_signed,
2322                                bool do_accumulate) {
2323  VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
2324
2325  int64_t sr[16];
2326  uint64_t ur[16];
2327
2328  int laneCount = LaneCountFromFormat(vform);
2329  for (int i = 0; i < laneCount; ++i) {
2330    if (is_signed) {
2331      sr[i] = src.Int(vformsrc, 2 * i) + src.Int(vformsrc, 2 * i + 1);
2332    } else {
2333      ur[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
2334    }
2335  }
2336
2337  dst.ClearForWrite(vform);
2338  for (int i = 0; i < laneCount; ++i) {
2339    if (do_accumulate) {
2340      if (is_signed) {
2341        dst.SetInt(vform, i, dst.Int(vform, i) + sr[i]);
2342      } else {
2343        dst.SetUint(vform, i, dst.Uint(vform, i) + ur[i]);
2344      }
2345    } else {
2346      if (is_signed) {
2347        dst.SetInt(vform, i, sr[i]);
2348      } else {
2349        dst.SetUint(vform, i, ur[i]);
2350      }
2351    }
2352  }
2353
2354  return dst;
2355}
2356
2357
2358LogicVRegister Simulator::saddlp(VectorFormat vform,
2359                                 LogicVRegister dst,
2360                                 const LogicVRegister& src) {
2361  return addlp(vform, dst, src, true, false);
2362}
2363
2364
2365LogicVRegister Simulator::uaddlp(VectorFormat vform,
2366                                 LogicVRegister dst,
2367                                 const LogicVRegister& src) {
2368  return addlp(vform, dst, src, false, false);
2369}
2370
2371
2372LogicVRegister Simulator::sadalp(VectorFormat vform,
2373                                 LogicVRegister dst,
2374                                 const LogicVRegister& src) {
2375  return addlp(vform, dst, src, true, true);
2376}
2377
2378
2379LogicVRegister Simulator::uadalp(VectorFormat vform,
2380                                 LogicVRegister dst,
2381                                 const LogicVRegister& src) {
2382  return addlp(vform, dst, src, false, true);
2383}
2384
2385
2386LogicVRegister Simulator::ext(VectorFormat vform,
2387                              LogicVRegister dst,
2388                              const LogicVRegister& src1,
2389                              const LogicVRegister& src2,
2390                              int index) {
2391  uint8_t result[16];
2392  int laneCount = LaneCountFromFormat(vform);
2393  for (int i = 0; i < laneCount - index; ++i) {
2394    result[i] = src1.Uint(vform, i + index);
2395  }
2396  for (int i = 0; i < index; ++i) {
2397    result[laneCount - index + i] = src2.Uint(vform, i);
2398  }
2399  dst.ClearForWrite(vform);
2400  for (int i = 0; i < laneCount; ++i) {
2401    dst.SetUint(vform, i, result[i]);
2402  }
2403  return dst;
2404}
2405
2406
2407LogicVRegister Simulator::dup_element(VectorFormat vform,
2408                                      LogicVRegister dst,
2409                                      const LogicVRegister& src,
2410                                      int src_index) {
2411  int laneCount = LaneCountFromFormat(vform);
2412  uint64_t value = src.Uint(vform, src_index);
2413  dst.ClearForWrite(vform);
2414  for (int i = 0; i < laneCount; ++i) {
2415    dst.SetUint(vform, i, value);
2416  }
2417  return dst;
2418}
2419
2420
2421LogicVRegister Simulator::dup_immediate(VectorFormat vform,
2422                                        LogicVRegister dst,
2423                                        uint64_t imm) {
2424  int laneCount = LaneCountFromFormat(vform);
2425  uint64_t value = imm & MaxUintFromFormat(vform);
2426  dst.ClearForWrite(vform);
2427  for (int i = 0; i < laneCount; ++i) {
2428    dst.SetUint(vform, i, value);
2429  }
2430  return dst;
2431}
2432
2433
2434LogicVRegister Simulator::ins_element(VectorFormat vform,
2435                                      LogicVRegister dst,
2436                                      int dst_index,
2437                                      const LogicVRegister& src,
2438                                      int src_index) {
2439  dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2440  return dst;
2441}
2442
2443
2444LogicVRegister Simulator::ins_immediate(VectorFormat vform,
2445                                        LogicVRegister dst,
2446                                        int dst_index,
2447                                        uint64_t imm) {
2448  uint64_t value = imm & MaxUintFromFormat(vform);
2449  dst.SetUint(vform, dst_index, value);
2450  return dst;
2451}
2452
2453
2454LogicVRegister Simulator::movi(VectorFormat vform,
2455                               LogicVRegister dst,
2456                               uint64_t imm) {
2457  int laneCount = LaneCountFromFormat(vform);
2458  dst.ClearForWrite(vform);
2459  for (int i = 0; i < laneCount; ++i) {
2460    dst.SetUint(vform, i, imm);
2461  }
2462  return dst;
2463}
2464
2465
2466LogicVRegister Simulator::mvni(VectorFormat vform,
2467                               LogicVRegister dst,
2468                               uint64_t imm) {
2469  int laneCount = LaneCountFromFormat(vform);
2470  dst.ClearForWrite(vform);
2471  for (int i = 0; i < laneCount; ++i) {
2472    dst.SetUint(vform, i, ~imm);
2473  }
2474  return dst;
2475}
2476
2477
2478LogicVRegister Simulator::orr(VectorFormat vform,
2479                              LogicVRegister dst,
2480                              const LogicVRegister& src,
2481                              uint64_t imm) {
2482  uint64_t result[16];
2483  int laneCount = LaneCountFromFormat(vform);
2484  for (int i = 0; i < laneCount; ++i) {
2485    result[i] = src.Uint(vform, i) | imm;
2486  }
2487  dst.ClearForWrite(vform);
2488  for (int i = 0; i < laneCount; ++i) {
2489    dst.SetUint(vform, i, result[i]);
2490  }
2491  return dst;
2492}
2493
2494
2495LogicVRegister Simulator::uxtl(VectorFormat vform,
2496                               LogicVRegister dst,
2497                               const LogicVRegister& src) {
2498  VectorFormat vform_half = VectorFormatHalfWidth(vform);
2499
2500  dst.ClearForWrite(vform);
2501  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2502    dst.SetUint(vform, i, src.Uint(vform_half, i));
2503  }
2504  return dst;
2505}
2506
2507
2508LogicVRegister Simulator::sxtl(VectorFormat vform,
2509                               LogicVRegister dst,
2510                               const LogicVRegister& src) {
2511  VectorFormat vform_half = VectorFormatHalfWidth(vform);
2512
2513  dst.ClearForWrite(vform);
2514  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2515    dst.SetInt(vform, i, src.Int(vform_half, i));
2516  }
2517  return dst;
2518}
2519
2520
2521LogicVRegister Simulator::uxtl2(VectorFormat vform,
2522                                LogicVRegister dst,
2523                                const LogicVRegister& src) {
2524  VectorFormat vform_half = VectorFormatHalfWidth(vform);
2525  int lane_count = LaneCountFromFormat(vform);
2526
2527  dst.ClearForWrite(vform);
2528  for (int i = 0; i < lane_count; i++) {
2529    dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));
2530  }
2531  return dst;
2532}
2533
2534
2535LogicVRegister Simulator::sxtl2(VectorFormat vform,
2536                                LogicVRegister dst,
2537                                const LogicVRegister& src) {
2538  VectorFormat vform_half = VectorFormatHalfWidth(vform);
2539  int lane_count = LaneCountFromFormat(vform);
2540
2541  dst.ClearForWrite(vform);
2542  for (int i = 0; i < lane_count; i++) {
2543    dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));
2544  }
2545  return dst;
2546}
2547
2548
2549LogicVRegister Simulator::shrn(VectorFormat vform,
2550                               LogicVRegister dst,
2551                               const LogicVRegister& src,
2552                               int shift) {
2553  SimVRegister temp;
2554  VectorFormat vform_src = VectorFormatDoubleWidth(vform);
2555  VectorFormat vform_dst = vform;
2556  LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
2557  return extractnarrow(vform_dst, dst, false, shifted_src, false);
2558}
2559
2560
2561LogicVRegister Simulator::shrn2(VectorFormat vform,
2562                                LogicVRegister dst,
2563                                const LogicVRegister& src,
2564                                int shift) {
2565  SimVRegister temp;
2566  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2567  VectorFormat vformdst = vform;
2568  LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
2569  return extractnarrow(vformdst, dst, false, shifted_src, false);
2570}
2571
2572
2573LogicVRegister Simulator::rshrn(VectorFormat vform,
2574                                LogicVRegister dst,
2575                                const LogicVRegister& src,
2576                                int shift) {
2577  SimVRegister temp;
2578  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2579  VectorFormat vformdst = vform;
2580  LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2581  return extractnarrow(vformdst, dst, false, shifted_src, false);
2582}
2583
2584
2585LogicVRegister Simulator::rshrn2(VectorFormat vform,
2586                                 LogicVRegister dst,
2587                                 const LogicVRegister& src,
2588                                 int shift) {
2589  SimVRegister temp;
2590  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2591  VectorFormat vformdst = vform;
2592  LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2593  return extractnarrow(vformdst, dst, false, shifted_src, false);
2594}
2595
2596
2597LogicVRegister Simulator::Table(VectorFormat vform,
2598                                LogicVRegister dst,
2599                                const LogicVRegister& ind,
2600                                bool zero_out_of_bounds,
2601                                const LogicVRegister* tab1,
2602                                const LogicVRegister* tab2,
2603                                const LogicVRegister* tab3,
2604                                const LogicVRegister* tab4) {
2605  VIXL_ASSERT(tab1 != NULL);
2606  const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4};
2607  uint64_t result[kMaxLanesPerVector];
2608  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2609    result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i);
2610  }
2611  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2612    uint64_t j = ind.Uint(vform, i);
2613    int tab_idx = static_cast<int>(j >> 4);
2614    int j_idx = static_cast<int>(j & 15);
2615    if ((tab_idx < 4) && (tab[tab_idx] != NULL)) {
2616      result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx);
2617    }
2618  }
2619  dst.SetUintArray(vform, result);
2620  return dst;
2621}
2622
2623
2624LogicVRegister Simulator::tbl(VectorFormat vform,
2625                              LogicVRegister dst,
2626                              const LogicVRegister& tab,
2627                              const LogicVRegister& ind) {
2628  return Table(vform, dst, ind, true, &tab);
2629}
2630
2631
2632LogicVRegister Simulator::tbl(VectorFormat vform,
2633                              LogicVRegister dst,
2634                              const LogicVRegister& tab,
2635                              const LogicVRegister& tab2,
2636                              const LogicVRegister& ind) {
2637  return Table(vform, dst, ind, true, &tab, &tab2);
2638}
2639
2640
2641LogicVRegister Simulator::tbl(VectorFormat vform,
2642                              LogicVRegister dst,
2643                              const LogicVRegister& tab,
2644                              const LogicVRegister& tab2,
2645                              const LogicVRegister& tab3,
2646                              const LogicVRegister& ind) {
2647  return Table(vform, dst, ind, true, &tab, &tab2, &tab3);
2648}
2649
2650
2651LogicVRegister Simulator::tbl(VectorFormat vform,
2652                              LogicVRegister dst,
2653                              const LogicVRegister& tab,
2654                              const LogicVRegister& tab2,
2655                              const LogicVRegister& tab3,
2656                              const LogicVRegister& tab4,
2657                              const LogicVRegister& ind) {
2658  return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);
2659}
2660
2661
2662LogicVRegister Simulator::tbx(VectorFormat vform,
2663                              LogicVRegister dst,
2664                              const LogicVRegister& tab,
2665                              const LogicVRegister& ind) {
2666  return Table(vform, dst, ind, false, &tab);
2667}
2668
2669
2670LogicVRegister Simulator::tbx(VectorFormat vform,
2671                              LogicVRegister dst,
2672                              const LogicVRegister& tab,
2673                              const LogicVRegister& tab2,
2674                              const LogicVRegister& ind) {
2675  return Table(vform, dst, ind, false, &tab, &tab2);
2676}
2677
2678
2679LogicVRegister Simulator::tbx(VectorFormat vform,
2680                              LogicVRegister dst,
2681                              const LogicVRegister& tab,
2682                              const LogicVRegister& tab2,
2683                              const LogicVRegister& tab3,
2684                              const LogicVRegister& ind) {
2685  return Table(vform, dst, ind, false, &tab, &tab2, &tab3);
2686}
2687
2688
2689LogicVRegister Simulator::tbx(VectorFormat vform,
2690                              LogicVRegister dst,
2691                              const LogicVRegister& tab,
2692                              const LogicVRegister& tab2,
2693                              const LogicVRegister& tab3,
2694                              const LogicVRegister& tab4,
2695                              const LogicVRegister& ind) {
2696  return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);
2697}
2698
2699
2700LogicVRegister Simulator::uqshrn(VectorFormat vform,
2701                                 LogicVRegister dst,
2702                                 const LogicVRegister& src,
2703                                 int shift) {
2704  return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
2705}
2706
2707
2708LogicVRegister Simulator::uqshrn2(VectorFormat vform,
2709                                  LogicVRegister dst,
2710                                  const LogicVRegister& src,
2711                                  int shift) {
2712  return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2713}
2714
2715
2716LogicVRegister Simulator::uqrshrn(VectorFormat vform,
2717                                  LogicVRegister dst,
2718                                  const LogicVRegister& src,
2719                                  int shift) {
2720  return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
2721}
2722
2723
2724LogicVRegister Simulator::uqrshrn2(VectorFormat vform,
2725                                   LogicVRegister dst,
2726                                   const LogicVRegister& src,
2727                                   int shift) {
2728  return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2729}
2730
2731
2732LogicVRegister Simulator::sqshrn(VectorFormat vform,
2733                                 LogicVRegister dst,
2734                                 const LogicVRegister& src,
2735                                 int shift) {
2736  SimVRegister temp;
2737  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2738  VectorFormat vformdst = vform;
2739  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2740  return sqxtn(vformdst, dst, shifted_src);
2741}
2742
2743
2744LogicVRegister Simulator::sqshrn2(VectorFormat vform,
2745                                  LogicVRegister dst,
2746                                  const LogicVRegister& src,
2747                                  int shift) {
2748  SimVRegister temp;
2749  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2750  VectorFormat vformdst = vform;
2751  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2752  return sqxtn(vformdst, dst, shifted_src);
2753}
2754
2755
2756LogicVRegister Simulator::sqrshrn(VectorFormat vform,
2757                                  LogicVRegister dst,
2758                                  const LogicVRegister& src,
2759                                  int shift) {
2760  SimVRegister temp;
2761  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2762  VectorFormat vformdst = vform;
2763  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2764  return sqxtn(vformdst, dst, shifted_src);
2765}
2766
2767
2768LogicVRegister Simulator::sqrshrn2(VectorFormat vform,
2769                                   LogicVRegister dst,
2770                                   const LogicVRegister& src,
2771                                   int shift) {
2772  SimVRegister temp;
2773  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2774  VectorFormat vformdst = vform;
2775  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2776  return sqxtn(vformdst, dst, shifted_src);
2777}
2778
2779
2780LogicVRegister Simulator::sqshrun(VectorFormat vform,
2781                                  LogicVRegister dst,
2782                                  const LogicVRegister& src,
2783                                  int shift) {
2784  SimVRegister temp;
2785  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2786  VectorFormat vformdst = vform;
2787  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2788  return sqxtun(vformdst, dst, shifted_src);
2789}
2790
2791
2792LogicVRegister Simulator::sqshrun2(VectorFormat vform,
2793                                   LogicVRegister dst,
2794                                   const LogicVRegister& src,
2795                                   int shift) {
2796  SimVRegister temp;
2797  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2798  VectorFormat vformdst = vform;
2799  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2800  return sqxtun(vformdst, dst, shifted_src);
2801}
2802
2803
2804LogicVRegister Simulator::sqrshrun(VectorFormat vform,
2805                                   LogicVRegister dst,
2806                                   const LogicVRegister& src,
2807                                   int shift) {
2808  SimVRegister temp;
2809  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2810  VectorFormat vformdst = vform;
2811  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2812  return sqxtun(vformdst, dst, shifted_src);
2813}
2814
2815
2816LogicVRegister Simulator::sqrshrun2(VectorFormat vform,
2817                                    LogicVRegister dst,
2818                                    const LogicVRegister& src,
2819                                    int shift) {
2820  SimVRegister temp;
2821  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2822  VectorFormat vformdst = vform;
2823  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2824  return sqxtun(vformdst, dst, shifted_src);
2825}
2826
2827
2828LogicVRegister Simulator::uaddl(VectorFormat vform,
2829                                LogicVRegister dst,
2830                                const LogicVRegister& src1,
2831                                const LogicVRegister& src2) {
2832  SimVRegister temp1, temp2;
2833  uxtl(vform, temp1, src1);
2834  uxtl(vform, temp2, src2);
2835  add(vform, dst, temp1, temp2);
2836  return dst;
2837}
2838
2839
2840LogicVRegister Simulator::uaddl2(VectorFormat vform,
2841                                 LogicVRegister dst,
2842                                 const LogicVRegister& src1,
2843                                 const LogicVRegister& src2) {
2844  SimVRegister temp1, temp2;
2845  uxtl2(vform, temp1, src1);
2846  uxtl2(vform, temp2, src2);
2847  add(vform, dst, temp1, temp2);
2848  return dst;
2849}
2850
2851
2852LogicVRegister Simulator::uaddw(VectorFormat vform,
2853                                LogicVRegister dst,
2854                                const LogicVRegister& src1,
2855                                const LogicVRegister& src2) {
2856  SimVRegister temp;
2857  uxtl(vform, temp, src2);
2858  add(vform, dst, src1, temp);
2859  return dst;
2860}
2861
2862
2863LogicVRegister Simulator::uaddw2(VectorFormat vform,
2864                                 LogicVRegister dst,
2865                                 const LogicVRegister& src1,
2866                                 const LogicVRegister& src2) {
2867  SimVRegister temp;
2868  uxtl2(vform, temp, src2);
2869  add(vform, dst, src1, temp);
2870  return dst;
2871}
2872
2873
2874LogicVRegister Simulator::saddl(VectorFormat vform,
2875                                LogicVRegister dst,
2876                                const LogicVRegister& src1,
2877                                const LogicVRegister& src2) {
2878  SimVRegister temp1, temp2;
2879  sxtl(vform, temp1, src1);
2880  sxtl(vform, temp2, src2);
2881  add(vform, dst, temp1, temp2);
2882  return dst;
2883}
2884
2885
2886LogicVRegister Simulator::saddl2(VectorFormat vform,
2887                                 LogicVRegister dst,
2888                                 const LogicVRegister& src1,
2889                                 const LogicVRegister& src2) {
2890  SimVRegister temp1, temp2;
2891  sxtl2(vform, temp1, src1);
2892  sxtl2(vform, temp2, src2);
2893  add(vform, dst, temp1, temp2);
2894  return dst;
2895}
2896
2897
2898LogicVRegister Simulator::saddw(VectorFormat vform,
2899                                LogicVRegister dst,
2900                                const LogicVRegister& src1,
2901                                const LogicVRegister& src2) {
2902  SimVRegister temp;
2903  sxtl(vform, temp, src2);
2904  add(vform, dst, src1, temp);
2905  return dst;
2906}
2907
2908
2909LogicVRegister Simulator::saddw2(VectorFormat vform,
2910                                 LogicVRegister dst,
2911                                 const LogicVRegister& src1,
2912                                 const LogicVRegister& src2) {
2913  SimVRegister temp;
2914  sxtl2(vform, temp, src2);
2915  add(vform, dst, src1, temp);
2916  return dst;
2917}
2918
2919
2920LogicVRegister Simulator::usubl(VectorFormat vform,
2921                                LogicVRegister dst,
2922                                const LogicVRegister& src1,
2923                                const LogicVRegister& src2) {
2924  SimVRegister temp1, temp2;
2925  uxtl(vform, temp1, src1);
2926  uxtl(vform, temp2, src2);
2927  sub(vform, dst, temp1, temp2);
2928  return dst;
2929}
2930
2931
2932LogicVRegister Simulator::usubl2(VectorFormat vform,
2933                                 LogicVRegister dst,
2934                                 const LogicVRegister& src1,
2935                                 const LogicVRegister& src2) {
2936  SimVRegister temp1, temp2;
2937  uxtl2(vform, temp1, src1);
2938  uxtl2(vform, temp2, src2);
2939  sub(vform, dst, temp1, temp2);
2940  return dst;
2941}
2942
2943
2944LogicVRegister Simulator::usubw(VectorFormat vform,
2945                                LogicVRegister dst,
2946                                const LogicVRegister& src1,
2947                                const LogicVRegister& src2) {
2948  SimVRegister temp;
2949  uxtl(vform, temp, src2);
2950  sub(vform, dst, src1, temp);
2951  return dst;
2952}
2953
2954
2955LogicVRegister Simulator::usubw2(VectorFormat vform,
2956                                 LogicVRegister dst,
2957                                 const LogicVRegister& src1,
2958                                 const LogicVRegister& src2) {
2959  SimVRegister temp;
2960  uxtl2(vform, temp, src2);
2961  sub(vform, dst, src1, temp);
2962  return dst;
2963}
2964
2965
2966LogicVRegister Simulator::ssubl(VectorFormat vform,
2967                                LogicVRegister dst,
2968                                const LogicVRegister& src1,
2969                                const LogicVRegister& src2) {
2970  SimVRegister temp1, temp2;
2971  sxtl(vform, temp1, src1);
2972  sxtl(vform, temp2, src2);
2973  sub(vform, dst, temp1, temp2);
2974  return dst;
2975}
2976
2977
2978LogicVRegister Simulator::ssubl2(VectorFormat vform,
2979                                 LogicVRegister dst,
2980                                 const LogicVRegister& src1,
2981                                 const LogicVRegister& src2) {
2982  SimVRegister temp1, temp2;
2983  sxtl2(vform, temp1, src1);
2984  sxtl2(vform, temp2, src2);
2985  sub(vform, dst, temp1, temp2);
2986  return dst;
2987}
2988
2989
2990LogicVRegister Simulator::ssubw(VectorFormat vform,
2991                                LogicVRegister dst,
2992                                const LogicVRegister& src1,
2993                                const LogicVRegister& src2) {
2994  SimVRegister temp;
2995  sxtl(vform, temp, src2);
2996  sub(vform, dst, src1, temp);
2997  return dst;
2998}
2999
3000
3001LogicVRegister Simulator::ssubw2(VectorFormat vform,
3002                                 LogicVRegister dst,
3003                                 const LogicVRegister& src1,
3004                                 const LogicVRegister& src2) {
3005  SimVRegister temp;
3006  sxtl2(vform, temp, src2);
3007  sub(vform, dst, src1, temp);
3008  return dst;
3009}
3010
3011
3012LogicVRegister Simulator::uabal(VectorFormat vform,
3013                                LogicVRegister dst,
3014                                const LogicVRegister& src1,
3015                                const LogicVRegister& src2) {
3016  SimVRegister temp1, temp2;
3017  uxtl(vform, temp1, src1);
3018  uxtl(vform, temp2, src2);
3019  uaba(vform, dst, temp1, temp2);
3020  return dst;
3021}
3022
3023
3024LogicVRegister Simulator::uabal2(VectorFormat vform,
3025                                 LogicVRegister dst,
3026                                 const LogicVRegister& src1,
3027                                 const LogicVRegister& src2) {
3028  SimVRegister temp1, temp2;
3029  uxtl2(vform, temp1, src1);
3030  uxtl2(vform, temp2, src2);
3031  uaba(vform, dst, temp1, temp2);
3032  return dst;
3033}
3034
3035
3036LogicVRegister Simulator::sabal(VectorFormat vform,
3037                                LogicVRegister dst,
3038                                const LogicVRegister& src1,
3039                                const LogicVRegister& src2) {
3040  SimVRegister temp1, temp2;
3041  sxtl(vform, temp1, src1);
3042  sxtl(vform, temp2, src2);
3043  saba(vform, dst, temp1, temp2);
3044  return dst;
3045}
3046
3047
3048LogicVRegister Simulator::sabal2(VectorFormat vform,
3049                                 LogicVRegister dst,
3050                                 const LogicVRegister& src1,
3051                                 const LogicVRegister& src2) {
3052  SimVRegister temp1, temp2;
3053  sxtl2(vform, temp1, src1);
3054  sxtl2(vform, temp2, src2);
3055  saba(vform, dst, temp1, temp2);
3056  return dst;
3057}
3058
3059
3060LogicVRegister Simulator::uabdl(VectorFormat vform,
3061                                LogicVRegister dst,
3062                                const LogicVRegister& src1,
3063                                const LogicVRegister& src2) {
3064  SimVRegister temp1, temp2;
3065  uxtl(vform, temp1, src1);
3066  uxtl(vform, temp2, src2);
3067  absdiff(vform, dst, temp1, temp2, false);
3068  return dst;
3069}
3070
3071
3072LogicVRegister Simulator::uabdl2(VectorFormat vform,
3073                                 LogicVRegister dst,
3074                                 const LogicVRegister& src1,
3075                                 const LogicVRegister& src2) {
3076  SimVRegister temp1, temp2;
3077  uxtl2(vform, temp1, src1);
3078  uxtl2(vform, temp2, src2);
3079  absdiff(vform, dst, temp1, temp2, false);
3080  return dst;
3081}
3082
3083
3084LogicVRegister Simulator::sabdl(VectorFormat vform,
3085                                LogicVRegister dst,
3086                                const LogicVRegister& src1,
3087                                const LogicVRegister& src2) {
3088  SimVRegister temp1, temp2;
3089  sxtl(vform, temp1, src1);
3090  sxtl(vform, temp2, src2);
3091  absdiff(vform, dst, temp1, temp2, true);
3092  return dst;
3093}
3094
3095
3096LogicVRegister Simulator::sabdl2(VectorFormat vform,
3097                                 LogicVRegister dst,
3098                                 const LogicVRegister& src1,
3099                                 const LogicVRegister& src2) {
3100  SimVRegister temp1, temp2;
3101  sxtl2(vform, temp1, src1);
3102  sxtl2(vform, temp2, src2);
3103  absdiff(vform, dst, temp1, temp2, true);
3104  return dst;
3105}
3106
3107
3108LogicVRegister Simulator::umull(VectorFormat vform,
3109                                LogicVRegister dst,
3110                                const LogicVRegister& src1,
3111                                const LogicVRegister& src2) {
3112  SimVRegister temp1, temp2;
3113  uxtl(vform, temp1, src1);
3114  uxtl(vform, temp2, src2);
3115  mul(vform, dst, temp1, temp2);
3116  return dst;
3117}
3118
3119
3120LogicVRegister Simulator::umull2(VectorFormat vform,
3121                                 LogicVRegister dst,
3122                                 const LogicVRegister& src1,
3123                                 const LogicVRegister& src2) {
3124  SimVRegister temp1, temp2;
3125  uxtl2(vform, temp1, src1);
3126  uxtl2(vform, temp2, src2);
3127  mul(vform, dst, temp1, temp2);
3128  return dst;
3129}
3130
3131
3132LogicVRegister Simulator::smull(VectorFormat vform,
3133                                LogicVRegister dst,
3134                                const LogicVRegister& src1,
3135                                const LogicVRegister& src2) {
3136  SimVRegister temp1, temp2;
3137  sxtl(vform, temp1, src1);
3138  sxtl(vform, temp2, src2);
3139  mul(vform, dst, temp1, temp2);
3140  return dst;
3141}
3142
3143
3144LogicVRegister Simulator::smull2(VectorFormat vform,
3145                                 LogicVRegister dst,
3146                                 const LogicVRegister& src1,
3147                                 const LogicVRegister& src2) {
3148  SimVRegister temp1, temp2;
3149  sxtl2(vform, temp1, src1);
3150  sxtl2(vform, temp2, src2);
3151  mul(vform, dst, temp1, temp2);
3152  return dst;
3153}
3154
3155
3156LogicVRegister Simulator::umlsl(VectorFormat vform,
3157                                LogicVRegister dst,
3158                                const LogicVRegister& src1,
3159                                const LogicVRegister& src2) {
3160  SimVRegister temp1, temp2;
3161  uxtl(vform, temp1, src1);
3162  uxtl(vform, temp2, src2);
3163  mls(vform, dst, temp1, temp2);
3164  return dst;
3165}
3166
3167
3168LogicVRegister Simulator::umlsl2(VectorFormat vform,
3169                                 LogicVRegister dst,
3170                                 const LogicVRegister& src1,
3171                                 const LogicVRegister& src2) {
3172  SimVRegister temp1, temp2;
3173  uxtl2(vform, temp1, src1);
3174  uxtl2(vform, temp2, src2);
3175  mls(vform, dst, temp1, temp2);
3176  return dst;
3177}
3178
3179
3180LogicVRegister Simulator::smlsl(VectorFormat vform,
3181                                LogicVRegister dst,
3182                                const LogicVRegister& src1,
3183                                const LogicVRegister& src2) {
3184  SimVRegister temp1, temp2;
3185  sxtl(vform, temp1, src1);
3186  sxtl(vform, temp2, src2);
3187  mls(vform, dst, temp1, temp2);
3188  return dst;
3189}
3190
3191
3192LogicVRegister Simulator::smlsl2(VectorFormat vform,
3193                                 LogicVRegister dst,
3194                                 const LogicVRegister& src1,
3195                                 const LogicVRegister& src2) {
3196  SimVRegister temp1, temp2;
3197  sxtl2(vform, temp1, src1);
3198  sxtl2(vform, temp2, src2);
3199  mls(vform, dst, temp1, temp2);
3200  return dst;
3201}
3202
3203
3204LogicVRegister Simulator::umlal(VectorFormat vform,
3205                                LogicVRegister dst,
3206                                const LogicVRegister& src1,
3207                                const LogicVRegister& src2) {
3208  SimVRegister temp1, temp2;
3209  uxtl(vform, temp1, src1);
3210  uxtl(vform, temp2, src2);
3211  mla(vform, dst, temp1, temp2);
3212  return dst;
3213}
3214
3215
3216LogicVRegister Simulator::umlal2(VectorFormat vform,
3217                                 LogicVRegister dst,
3218                                 const LogicVRegister& src1,
3219                                 const LogicVRegister& src2) {
3220  SimVRegister temp1, temp2;
3221  uxtl2(vform, temp1, src1);
3222  uxtl2(vform, temp2, src2);
3223  mla(vform, dst, temp1, temp2);
3224  return dst;
3225}
3226
3227
3228LogicVRegister Simulator::smlal(VectorFormat vform,
3229                                LogicVRegister dst,
3230                                const LogicVRegister& src1,
3231                                const LogicVRegister& src2) {
3232  SimVRegister temp1, temp2;
3233  sxtl(vform, temp1, src1);
3234  sxtl(vform, temp2, src2);
3235  mla(vform, dst, temp1, temp2);
3236  return dst;
3237}
3238
3239
3240LogicVRegister Simulator::smlal2(VectorFormat vform,
3241                                 LogicVRegister dst,
3242                                 const LogicVRegister& src1,
3243                                 const LogicVRegister& src2) {
3244  SimVRegister temp1, temp2;
3245  sxtl2(vform, temp1, src1);
3246  sxtl2(vform, temp2, src2);
3247  mla(vform, dst, temp1, temp2);
3248  return dst;
3249}
3250
3251
3252LogicVRegister Simulator::sqdmlal(VectorFormat vform,
3253                                  LogicVRegister dst,
3254                                  const LogicVRegister& src1,
3255                                  const LogicVRegister& src2) {
3256  SimVRegister temp;
3257  LogicVRegister product = sqdmull(vform, temp, src1, src2);
3258  return add(vform, dst, dst, product).SignedSaturate(vform);
3259}
3260
3261
3262LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
3263                                   LogicVRegister dst,
3264                                   const LogicVRegister& src1,
3265                                   const LogicVRegister& src2) {
3266  SimVRegister temp;
3267  LogicVRegister product = sqdmull2(vform, temp, src1, src2);
3268  return add(vform, dst, dst, product).SignedSaturate(vform);
3269}
3270
3271
3272LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
3273                                  LogicVRegister dst,
3274                                  const LogicVRegister& src1,
3275                                  const LogicVRegister& src2) {
3276  SimVRegister temp;
3277  LogicVRegister product = sqdmull(vform, temp, src1, src2);
3278  return sub(vform, dst, dst, product).SignedSaturate(vform);
3279}
3280
3281
3282LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
3283                                   LogicVRegister dst,
3284                                   const LogicVRegister& src1,
3285                                   const LogicVRegister& src2) {
3286  SimVRegister temp;
3287  LogicVRegister product = sqdmull2(vform, temp, src1, src2);
3288  return sub(vform, dst, dst, product).SignedSaturate(vform);
3289}
3290
3291
3292LogicVRegister Simulator::sqdmull(VectorFormat vform,
3293                                  LogicVRegister dst,
3294                                  const LogicVRegister& src1,
3295                                  const LogicVRegister& src2) {
3296  SimVRegister temp;
3297  LogicVRegister product = smull(vform, temp, src1, src2);
3298  return add(vform, dst, product, product).SignedSaturate(vform);
3299}
3300
3301
3302LogicVRegister Simulator::sqdmull2(VectorFormat vform,
3303                                   LogicVRegister dst,
3304                                   const LogicVRegister& src1,
3305                                   const LogicVRegister& src2) {
3306  SimVRegister temp;
3307  LogicVRegister product = smull2(vform, temp, src1, src2);
3308  return add(vform, dst, product, product).SignedSaturate(vform);
3309}
3310
3311
3312LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
3313                                   LogicVRegister dst,
3314                                   const LogicVRegister& src1,
3315                                   const LogicVRegister& src2,
3316                                   bool round) {
3317  // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
3318  // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
3319  // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.
3320
3321  int esize = LaneSizeInBitsFromFormat(vform);
3322  int round_const = round ? (1 << (esize - 2)) : 0;
3323  int64_t product;
3324
3325  dst.ClearForWrite(vform);
3326  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3327    product = src1.Int(vform, i) * src2.Int(vform, i);
3328    product += round_const;
3329    product = product >> (esize - 1);
3330
3331    if (product > MaxIntFromFormat(vform)) {
3332      product = MaxIntFromFormat(vform);
3333    } else if (product < MinIntFromFormat(vform)) {
3334      product = MinIntFromFormat(vform);
3335    }
3336    dst.SetInt(vform, i, product);
3337  }
3338  return dst;
3339}
3340
3341
3342LogicVRegister Simulator::sqdmulh(VectorFormat vform,
3343                                  LogicVRegister dst,
3344                                  const LogicVRegister& src1,
3345                                  const LogicVRegister& src2) {
3346  return sqrdmulh(vform, dst, src1, src2, false);
3347}
3348
3349
3350LogicVRegister Simulator::addhn(VectorFormat vform,
3351                                LogicVRegister dst,
3352                                const LogicVRegister& src1,
3353                                const LogicVRegister& src2) {
3354  SimVRegister temp;
3355  add(VectorFormatDoubleWidth(vform), temp, src1, src2);
3356  shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3357  return dst;
3358}
3359
3360
3361LogicVRegister Simulator::addhn2(VectorFormat vform,
3362                                 LogicVRegister dst,
3363                                 const LogicVRegister& src1,
3364                                 const LogicVRegister& src2) {
3365  SimVRegister temp;
3366  add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3367  shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3368  return dst;
3369}
3370
3371
3372LogicVRegister Simulator::raddhn(VectorFormat vform,
3373                                 LogicVRegister dst,
3374                                 const LogicVRegister& src1,
3375                                 const LogicVRegister& src2) {
3376  SimVRegister temp;
3377  add(VectorFormatDoubleWidth(vform), temp, src1, src2);
3378  rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3379  return dst;
3380}
3381
3382
3383LogicVRegister Simulator::raddhn2(VectorFormat vform,
3384                                  LogicVRegister dst,
3385                                  const LogicVRegister& src1,
3386                                  const LogicVRegister& src2) {
3387  SimVRegister temp;
3388  add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3389  rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3390  return dst;
3391}
3392
3393
3394LogicVRegister Simulator::subhn(VectorFormat vform,
3395                                LogicVRegister dst,
3396                                const LogicVRegister& src1,
3397                                const LogicVRegister& src2) {
3398  SimVRegister temp;
3399  sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
3400  shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3401  return dst;
3402}
3403
3404
3405LogicVRegister Simulator::subhn2(VectorFormat vform,
3406                                 LogicVRegister dst,
3407                                 const LogicVRegister& src1,
3408                                 const LogicVRegister& src2) {
3409  SimVRegister temp;
3410  sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3411  shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3412  return dst;
3413}
3414
3415
3416LogicVRegister Simulator::rsubhn(VectorFormat vform,
3417                                 LogicVRegister dst,
3418                                 const LogicVRegister& src1,
3419                                 const LogicVRegister& src2) {
3420  SimVRegister temp;
3421  sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
3422  rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3423  return dst;
3424}
3425
3426
3427LogicVRegister Simulator::rsubhn2(VectorFormat vform,
3428                                  LogicVRegister dst,
3429                                  const LogicVRegister& src1,
3430                                  const LogicVRegister& src2) {
3431  SimVRegister temp;
3432  sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3433  rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3434  return dst;
3435}
3436
3437
3438LogicVRegister Simulator::trn1(VectorFormat vform,
3439                               LogicVRegister dst,
3440                               const LogicVRegister& src1,
3441                               const LogicVRegister& src2) {
3442  uint64_t result[16];
3443  int laneCount = LaneCountFromFormat(vform);
3444  int pairs = laneCount / 2;
3445  for (int i = 0; i < pairs; ++i) {
3446    result[2 * i] = src1.Uint(vform, 2 * i);
3447    result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
3448  }
3449
3450  dst.ClearForWrite(vform);
3451  for (int i = 0; i < laneCount; ++i) {
3452    dst.SetUint(vform, i, result[i]);
3453  }
3454  return dst;
3455}
3456
3457
3458LogicVRegister Simulator::trn2(VectorFormat vform,
3459                               LogicVRegister dst,
3460                               const LogicVRegister& src1,
3461                               const LogicVRegister& src2) {
3462  uint64_t result[16];
3463  int laneCount = LaneCountFromFormat(vform);
3464  int pairs = laneCount / 2;
3465  for (int i = 0; i < pairs; ++i) {
3466    result[2 * i] = src1.Uint(vform, (2 * i) + 1);
3467    result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
3468  }
3469
3470  dst.ClearForWrite(vform);
3471  for (int i = 0; i < laneCount; ++i) {
3472    dst.SetUint(vform, i, result[i]);
3473  }
3474  return dst;
3475}
3476
3477
3478LogicVRegister Simulator::zip1(VectorFormat vform,
3479                               LogicVRegister dst,
3480                               const LogicVRegister& src1,
3481                               const LogicVRegister& src2) {
3482  uint64_t result[16];
3483  int laneCount = LaneCountFromFormat(vform);
3484  int pairs = laneCount / 2;
3485  for (int i = 0; i < pairs; ++i) {
3486    result[2 * i] = src1.Uint(vform, i);
3487    result[(2 * i) + 1] = src2.Uint(vform, i);
3488  }
3489
3490  dst.ClearForWrite(vform);
3491  for (int i = 0; i < laneCount; ++i) {
3492    dst.SetUint(vform, i, result[i]);
3493  }
3494  return dst;
3495}
3496
3497
3498LogicVRegister Simulator::zip2(VectorFormat vform,
3499                               LogicVRegister dst,
3500                               const LogicVRegister& src1,
3501                               const LogicVRegister& src2) {
3502  uint64_t result[16];
3503  int laneCount = LaneCountFromFormat(vform);
3504  int pairs = laneCount / 2;
3505  for (int i = 0; i < pairs; ++i) {
3506    result[2 * i] = src1.Uint(vform, pairs + i);
3507    result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
3508  }
3509
3510  dst.ClearForWrite(vform);
3511  for (int i = 0; i < laneCount; ++i) {
3512    dst.SetUint(vform, i, result[i]);
3513  }
3514  return dst;
3515}
3516
3517
3518LogicVRegister Simulator::uzp1(VectorFormat vform,
3519                               LogicVRegister dst,
3520                               const LogicVRegister& src1,
3521                               const LogicVRegister& src2) {
3522  uint64_t result[32];
3523  int laneCount = LaneCountFromFormat(vform);
3524  for (int i = 0; i < laneCount; ++i) {
3525    result[i] = src1.Uint(vform, i);
3526    result[laneCount + i] = src2.Uint(vform, i);
3527  }
3528
3529  dst.ClearForWrite(vform);
3530  for (int i = 0; i < laneCount; ++i) {
3531    dst.SetUint(vform, i, result[2 * i]);
3532  }
3533  return dst;
3534}
3535
3536
3537LogicVRegister Simulator::uzp2(VectorFormat vform,
3538                               LogicVRegister dst,
3539                               const LogicVRegister& src1,
3540                               const LogicVRegister& src2) {
3541  uint64_t result[32];
3542  int laneCount = LaneCountFromFormat(vform);
3543  for (int i = 0; i < laneCount; ++i) {
3544    result[i] = src1.Uint(vform, i);
3545    result[laneCount + i] = src2.Uint(vform, i);
3546  }
3547
3548  dst.ClearForWrite(vform);
3549  for (int i = 0; i < laneCount; ++i) {
3550    dst.SetUint(vform, i, result[(2 * i) + 1]);
3551  }
3552  return dst;
3553}
3554
3555
3556template <typename T>
3557T Simulator::FPAdd(T op1, T op2) {
3558  T result = FPProcessNaNs(op1, op2);
3559  if (std::isnan(result)) return result;
3560
3561  if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) {
3562    // inf + -inf returns the default NaN.
3563    FPProcessException();
3564    return FPDefaultNaN<T>();
3565  } else {
3566    // Other cases should be handled by standard arithmetic.
3567    return op1 + op2;
3568  }
3569}
3570
3571
3572template <typename T>
3573T Simulator::FPSub(T op1, T op2) {
3574  // NaNs should be handled elsewhere.
3575  VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
3576
3577  if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) {
3578    // inf - inf returns the default NaN.
3579    FPProcessException();
3580    return FPDefaultNaN<T>();
3581  } else {
3582    // Other cases should be handled by standard arithmetic.
3583    return op1 - op2;
3584  }
3585}
3586
3587
3588template <typename T>
3589T Simulator::FPMul(T op1, T op2) {
3590  // NaNs should be handled elsewhere.
3591  VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
3592
3593  if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
3594    // inf * 0.0 returns the default NaN.
3595    FPProcessException();
3596    return FPDefaultNaN<T>();
3597  } else {
3598    // Other cases should be handled by standard arithmetic.
3599    return op1 * op2;
3600  }
3601}
3602
3603
3604template <typename T>
3605T Simulator::FPMulx(T op1, T op2) {
3606  if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
3607    // inf * 0.0 returns +/-2.0.
3608    T two = 2.0;
3609    return copysign(1.0, op1) * copysign(1.0, op2) * two;
3610  }
3611  return FPMul(op1, op2);
3612}
3613
3614
3615template <typename T>
3616T Simulator::FPMulAdd(T a, T op1, T op2) {
3617  T result = FPProcessNaNs3(a, op1, op2);
3618
3619  T sign_a = copysign(1.0, a);
3620  T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
3621  bool isinf_prod = std::isinf(op1) || std::isinf(op2);
3622  bool operation_generates_nan =
3623      (std::isinf(op1) && (op2 == 0.0)) ||                     // inf * 0.0
3624      (std::isinf(op2) && (op1 == 0.0)) ||                     // 0.0 * inf
3625      (std::isinf(a) && isinf_prod && (sign_a != sign_prod));  // inf - inf
3626
3627  if (std::isnan(result)) {
3628    // Generated NaNs override quiet NaNs propagated from a.
3629    if (operation_generates_nan && IsQuietNaN(a)) {
3630      FPProcessException();
3631      return FPDefaultNaN<T>();
3632    } else {
3633      return result;
3634    }
3635  }
3636
3637  // If the operation would produce a NaN, return the default NaN.
3638  if (operation_generates_nan) {
3639    FPProcessException();
3640    return FPDefaultNaN<T>();
3641  }
3642
3643  // Work around broken fma implementations for exact zero results: The sign of
3644  // exact 0.0 results is positive unless both a and op1 * op2 are negative.
3645  if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
3646    return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0;
3647  }
3648
3649  result = FusedMultiplyAdd(op1, op2, a);
3650  VIXL_ASSERT(!std::isnan(result));
3651
3652  // Work around broken fma implementations for rounded zero results: If a is
3653  // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
3654  if ((a == 0.0) && (result == 0.0)) {
3655    return copysign(0.0, sign_prod);
3656  }
3657
3658  return result;
3659}
3660
3661
3662template <typename T>
3663T Simulator::FPDiv(T op1, T op2) {
3664  // NaNs should be handled elsewhere.
3665  VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
3666
3667  if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
3668    // inf / inf and 0.0 / 0.0 return the default NaN.
3669    FPProcessException();
3670    return FPDefaultNaN<T>();
3671  } else {
3672    if (op2 == 0.0) FPProcessException();
3673
3674    // Other cases should be handled by standard arithmetic.
3675    return op1 / op2;
3676  }
3677}
3678
3679
3680template <typename T>
3681T Simulator::FPSqrt(T op) {
3682  if (std::isnan(op)) {
3683    return FPProcessNaN(op);
3684  } else if (op < 0.0) {
3685    FPProcessException();
3686    return FPDefaultNaN<T>();
3687  } else {
3688    return sqrt(op);
3689  }
3690}
3691
3692
3693template <typename T>
3694T Simulator::FPMax(T a, T b) {
3695  T result = FPProcessNaNs(a, b);
3696  if (std::isnan(result)) return result;
3697
3698  if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
3699    // a and b are zero, and the sign differs: return +0.0.
3700    return 0.0;
3701  } else {
3702    return (a > b) ? a : b;
3703  }
3704}
3705
3706
3707template <typename T>
3708T Simulator::FPMaxNM(T a, T b) {
3709  if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3710    a = kFP64NegativeInfinity;
3711  } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3712    b = kFP64NegativeInfinity;
3713  }
3714
3715  T result = FPProcessNaNs(a, b);
3716  return std::isnan(result) ? result : FPMax(a, b);
3717}
3718
3719
3720template <typename T>
3721T Simulator::FPMin(T a, T b) {
3722  T result = FPProcessNaNs(a, b);
3723  if (std::isnan(result)) return result;
3724
3725  if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
3726    // a and b are zero, and the sign differs: return -0.0.
3727    return -0.0;
3728  } else {
3729    return (a < b) ? a : b;
3730  }
3731}
3732
3733
3734template <typename T>
3735T Simulator::FPMinNM(T a, T b) {
3736  if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3737    a = kFP64PositiveInfinity;
3738  } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3739    b = kFP64PositiveInfinity;
3740  }
3741
3742  T result = FPProcessNaNs(a, b);
3743  return std::isnan(result) ? result : FPMin(a, b);
3744}
3745
3746
3747template <typename T>
3748T Simulator::FPRecipStepFused(T op1, T op2) {
3749  const T two = 2.0;
3750  if ((std::isinf(op1) && (op2 == 0.0)) ||
3751      ((op1 == 0.0) && (std::isinf(op2)))) {
3752    return two;
3753  } else if (std::isinf(op1) || std::isinf(op2)) {
3754    // Return +inf if signs match, otherwise -inf.
3755    return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3756                                          : kFP64NegativeInfinity;
3757  } else {
3758    return FusedMultiplyAdd(op1, op2, two);
3759  }
3760}
3761
3762
3763template <typename T>
3764T Simulator::FPRSqrtStepFused(T op1, T op2) {
3765  const T one_point_five = 1.5;
3766  const T two = 2.0;
3767
3768  if ((std::isinf(op1) && (op2 == 0.0)) ||
3769      ((op1 == 0.0) && (std::isinf(op2)))) {
3770    return one_point_five;
3771  } else if (std::isinf(op1) || std::isinf(op2)) {
3772    // Return +inf if signs match, otherwise -inf.
3773    return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3774                                          : kFP64NegativeInfinity;
3775  } else {
3776    // The multiply-add-halve operation must be fully fused, so avoid interim
3777    // rounding by checking which operand can be losslessly divided by two
3778    // before doing the multiply-add.
3779    if (std::isnormal(op1 / two)) {
3780      return FusedMultiplyAdd(op1 / two, op2, one_point_five);
3781    } else if (std::isnormal(op2 / two)) {
3782      return FusedMultiplyAdd(op1, op2 / two, one_point_five);
3783    } else {
3784      // Neither operand is normal after halving: the result is dominated by
3785      // the addition term, so just return that.
3786      return one_point_five;
3787    }
3788  }
3789}
3790
3791
3792double Simulator::FPRoundInt(double value, FPRounding round_mode) {
3793  if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
3794      (value == kFP64NegativeInfinity)) {
3795    return value;
3796  } else if (std::isnan(value)) {
3797    return FPProcessNaN(value);
3798  }
3799
3800  double int_result = std::floor(value);
3801  double error = value - int_result;
3802  switch (round_mode) {
3803    case FPTieAway: {
3804      // Take care of correctly handling the range ]-0.5, -0.0], which must
3805      // yield -0.0.
3806      if ((-0.5 < value) && (value < 0.0)) {
3807        int_result = -0.0;
3808
3809      } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
3810        // If the error is greater than 0.5, or is equal to 0.5 and the integer
3811        // result is positive, round up.
3812        int_result++;
3813      }
3814      break;
3815    }
3816    case FPTieEven: {
3817      // Take care of correctly handling the range [-0.5, -0.0], which must
3818      // yield -0.0.
3819      if ((-0.5 <= value) && (value < 0.0)) {
3820        int_result = -0.0;
3821
3822        // If the error is greater than 0.5, or is equal to 0.5 and the integer
3823        // result is odd, round up.
3824      } else if ((error > 0.5) ||
3825                 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
3826        int_result++;
3827      }
3828      break;
3829    }
3830    case FPZero: {
3831      // If value>0 then we take floor(value)
3832      // otherwise, ceil(value).
3833      if (value < 0) {
3834        int_result = ceil(value);
3835      }
3836      break;
3837    }
3838    case FPNegativeInfinity: {
3839      // We always use floor(value).
3840      break;
3841    }
3842    case FPPositiveInfinity: {
3843      // Take care of correctly handling the range ]-1.0, -0.0], which must
3844      // yield -0.0.
3845      if ((-1.0 < value) && (value < 0.0)) {
3846        int_result = -0.0;
3847
3848        // If the error is non-zero, round up.
3849      } else if (error > 0.0) {
3850        int_result++;
3851      }
3852      break;
3853    }
3854    default:
3855      VIXL_UNIMPLEMENTED();
3856  }
3857  return int_result;
3858}
3859
3860
3861int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
3862  value = FPRoundInt(value, rmode);
3863  if (value >= kWMaxInt) {
3864    return kWMaxInt;
3865  } else if (value < kWMinInt) {
3866    return kWMinInt;
3867  }
3868  return std::isnan(value) ? 0 : static_cast<int32_t>(value);
3869}
3870
3871
3872int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
3873  value = FPRoundInt(value, rmode);
3874  if (value >= kXMaxInt) {
3875    return kXMaxInt;
3876  } else if (value < kXMinInt) {
3877    return kXMinInt;
3878  }
3879  return std::isnan(value) ? 0 : static_cast<int64_t>(value);
3880}
3881
3882
3883uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
3884  value = FPRoundInt(value, rmode);
3885  if (value >= kWMaxUInt) {
3886    return kWMaxUInt;
3887  } else if (value < 0.0) {
3888    return 0;
3889  }
3890  return std::isnan(value) ? 0 : static_cast<uint32_t>(value);
3891}
3892
3893
3894uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
3895  value = FPRoundInt(value, rmode);
3896  if (value >= kXMaxUInt) {
3897    return kXMaxUInt;
3898  } else if (value < 0.0) {
3899    return 0;
3900  }
3901  return std::isnan(value) ? 0 : static_cast<uint64_t>(value);
3902}
3903
3904
3905#define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN)                \
3906  template <typename T>                                          \
3907  LogicVRegister Simulator::FN(VectorFormat vform,               \
3908                               LogicVRegister dst,               \
3909                               const LogicVRegister& src1,       \
3910                               const LogicVRegister& src2) {     \
3911    dst.ClearForWrite(vform);                                    \
3912    for (int i = 0; i < LaneCountFromFormat(vform); i++) {       \
3913      T op1 = src1.Float<T>(i);                                  \
3914      T op2 = src2.Float<T>(i);                                  \
3915      T result;                                                  \
3916      if (PROCNAN) {                                             \
3917        result = FPProcessNaNs(op1, op2);                        \
3918        if (!std::isnan(result)) {                               \
3919          result = OP(op1, op2);                                 \
3920        }                                                        \
3921      } else {                                                   \
3922        result = OP(op1, op2);                                   \
3923      }                                                          \
3924      dst.SetFloat(i, result);                                   \
3925    }                                                            \
3926    return dst;                                                  \
3927  }                                                              \
3928                                                                 \
3929  LogicVRegister Simulator::FN(VectorFormat vform,               \
3930                               LogicVRegister dst,               \
3931                               const LogicVRegister& src1,       \
3932                               const LogicVRegister& src2) {     \
3933    if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {          \
3934      FN<float>(vform, dst, src1, src2);                         \
3935    } else {                                                     \
3936      VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \
3937      FN<double>(vform, dst, src1, src2);                        \
3938    }                                                            \
3939    return dst;                                                  \
3940  }
3941NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
3942#undef DEFINE_NEON_FP_VECTOR_OP
3943
3944
3945LogicVRegister Simulator::fnmul(VectorFormat vform,
3946                                LogicVRegister dst,
3947                                const LogicVRegister& src1,
3948                                const LogicVRegister& src2) {
3949  SimVRegister temp;
3950  LogicVRegister product = fmul(vform, temp, src1, src2);
3951  return fneg(vform, dst, product);
3952}
3953
3954
3955template <typename T>
3956LogicVRegister Simulator::frecps(VectorFormat vform,
3957                                 LogicVRegister dst,
3958                                 const LogicVRegister& src1,
3959                                 const LogicVRegister& src2) {
3960  dst.ClearForWrite(vform);
3961  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3962    T op1 = -src1.Float<T>(i);
3963    T op2 = src2.Float<T>(i);
3964    T result = FPProcessNaNs(op1, op2);
3965    dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2));
3966  }
3967  return dst;
3968}
3969
3970
3971LogicVRegister Simulator::frecps(VectorFormat vform,
3972                                 LogicVRegister dst,
3973                                 const LogicVRegister& src1,
3974                                 const LogicVRegister& src2) {
3975  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
3976    frecps<float>(vform, dst, src1, src2);
3977  } else {
3978    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
3979    frecps<double>(vform, dst, src1, src2);
3980  }
3981  return dst;
3982}
3983
3984
3985template <typename T>
3986LogicVRegister Simulator::frsqrts(VectorFormat vform,
3987                                  LogicVRegister dst,
3988                                  const LogicVRegister& src1,
3989                                  const LogicVRegister& src2) {
3990  dst.ClearForWrite(vform);
3991  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3992    T op1 = -src1.Float<T>(i);
3993    T op2 = src2.Float<T>(i);
3994    T result = FPProcessNaNs(op1, op2);
3995    dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2));
3996  }
3997  return dst;
3998}
3999
4000
4001LogicVRegister Simulator::frsqrts(VectorFormat vform,
4002                                  LogicVRegister dst,
4003                                  const LogicVRegister& src1,
4004                                  const LogicVRegister& src2) {
4005  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4006    frsqrts<float>(vform, dst, src1, src2);
4007  } else {
4008    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4009    frsqrts<double>(vform, dst, src1, src2);
4010  }
4011  return dst;
4012}
4013
4014
4015template <typename T>
4016LogicVRegister Simulator::fcmp(VectorFormat vform,
4017                               LogicVRegister dst,
4018                               const LogicVRegister& src1,
4019                               const LogicVRegister& src2,
4020                               Condition cond) {
4021  dst.ClearForWrite(vform);
4022  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4023    bool result = false;
4024    T op1 = src1.Float<T>(i);
4025    T op2 = src2.Float<T>(i);
4026    T nan_result = FPProcessNaNs(op1, op2);
4027    if (!std::isnan(nan_result)) {
4028      switch (cond) {
4029        case eq:
4030          result = (op1 == op2);
4031          break;
4032        case ge:
4033          result = (op1 >= op2);
4034          break;
4035        case gt:
4036          result = (op1 > op2);
4037          break;
4038        case le:
4039          result = (op1 <= op2);
4040          break;
4041        case lt:
4042          result = (op1 < op2);
4043          break;
4044        default:
4045          VIXL_UNREACHABLE();
4046          break;
4047      }
4048    }
4049    dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
4050  }
4051  return dst;
4052}
4053
4054
4055LogicVRegister Simulator::fcmp(VectorFormat vform,
4056                               LogicVRegister dst,
4057                               const LogicVRegister& src1,
4058                               const LogicVRegister& src2,
4059                               Condition cond) {
4060  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4061    fcmp<float>(vform, dst, src1, src2, cond);
4062  } else {
4063    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4064    fcmp<double>(vform, dst, src1, src2, cond);
4065  }
4066  return dst;
4067}
4068
4069
4070LogicVRegister Simulator::fcmp_zero(VectorFormat vform,
4071                                    LogicVRegister dst,
4072                                    const LogicVRegister& src,
4073                                    Condition cond) {
4074  SimVRegister temp;
4075  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4076    LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0));
4077    fcmp<float>(vform, dst, src, zero_reg, cond);
4078  } else {
4079    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4080    LogicVRegister zero_reg = dup_immediate(vform, temp, DoubleToRawbits(0.0));
4081    fcmp<double>(vform, dst, src, zero_reg, cond);
4082  }
4083  return dst;
4084}
4085
4086
4087LogicVRegister Simulator::fabscmp(VectorFormat vform,
4088                                  LogicVRegister dst,
4089                                  const LogicVRegister& src1,
4090                                  const LogicVRegister& src2,
4091                                  Condition cond) {
4092  SimVRegister temp1, temp2;
4093  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4094    LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
4095    LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
4096    fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
4097  } else {
4098    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4099    LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
4100    LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
4101    fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
4102  }
4103  return dst;
4104}
4105
4106
4107template <typename T>
4108LogicVRegister Simulator::fmla(VectorFormat vform,
4109                               LogicVRegister dst,
4110                               const LogicVRegister& src1,
4111                               const LogicVRegister& src2) {
4112  dst.ClearForWrite(vform);
4113  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4114    T op1 = src1.Float<T>(i);
4115    T op2 = src2.Float<T>(i);
4116    T acc = dst.Float<T>(i);
4117    T result = FPMulAdd(acc, op1, op2);
4118    dst.SetFloat(i, result);
4119  }
4120  return dst;
4121}
4122
4123
4124LogicVRegister Simulator::fmla(VectorFormat vform,
4125                               LogicVRegister dst,
4126                               const LogicVRegister& src1,
4127                               const LogicVRegister& src2) {
4128  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4129    fmla<float>(vform, dst, src1, src2);
4130  } else {
4131    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4132    fmla<double>(vform, dst, src1, src2);
4133  }
4134  return dst;
4135}
4136
4137
4138template <typename T>
4139LogicVRegister Simulator::fmls(VectorFormat vform,
4140                               LogicVRegister dst,
4141                               const LogicVRegister& src1,
4142                               const LogicVRegister& src2) {
4143  dst.ClearForWrite(vform);
4144  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4145    T op1 = -src1.Float<T>(i);
4146    T op2 = src2.Float<T>(i);
4147    T acc = dst.Float<T>(i);
4148    T result = FPMulAdd(acc, op1, op2);
4149    dst.SetFloat(i, result);
4150  }
4151  return dst;
4152}
4153
4154
4155LogicVRegister Simulator::fmls(VectorFormat vform,
4156                               LogicVRegister dst,
4157                               const LogicVRegister& src1,
4158                               const LogicVRegister& src2) {
4159  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4160    fmls<float>(vform, dst, src1, src2);
4161  } else {
4162    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4163    fmls<double>(vform, dst, src1, src2);
4164  }
4165  return dst;
4166}
4167
4168
4169template <typename T>
4170LogicVRegister Simulator::fneg(VectorFormat vform,
4171                               LogicVRegister dst,
4172                               const LogicVRegister& src) {
4173  dst.ClearForWrite(vform);
4174  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4175    T op = src.Float<T>(i);
4176    op = -op;
4177    dst.SetFloat(i, op);
4178  }
4179  return dst;
4180}
4181
4182
4183LogicVRegister Simulator::fneg(VectorFormat vform,
4184                               LogicVRegister dst,
4185                               const LogicVRegister& src) {
4186  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4187    fneg<float>(vform, dst, src);
4188  } else {
4189    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4190    fneg<double>(vform, dst, src);
4191  }
4192  return dst;
4193}
4194
4195
4196template <typename T>
4197LogicVRegister Simulator::fabs_(VectorFormat vform,
4198                                LogicVRegister dst,
4199                                const LogicVRegister& src) {
4200  dst.ClearForWrite(vform);
4201  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4202    T op = src.Float<T>(i);
4203    if (copysign(1.0, op) < 0.0) {
4204      op = -op;
4205    }
4206    dst.SetFloat(i, op);
4207  }
4208  return dst;
4209}
4210
4211
4212LogicVRegister Simulator::fabs_(VectorFormat vform,
4213                                LogicVRegister dst,
4214                                const LogicVRegister& src) {
4215  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4216    fabs_<float>(vform, dst, src);
4217  } else {
4218    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4219    fabs_<double>(vform, dst, src);
4220  }
4221  return dst;
4222}
4223
4224
4225LogicVRegister Simulator::fabd(VectorFormat vform,
4226                               LogicVRegister dst,
4227                               const LogicVRegister& src1,
4228                               const LogicVRegister& src2) {
4229  SimVRegister temp;
4230  fsub(vform, temp, src1, src2);
4231  fabs_(vform, dst, temp);
4232  return dst;
4233}
4234
4235
4236LogicVRegister Simulator::fsqrt(VectorFormat vform,
4237                                LogicVRegister dst,
4238                                const LogicVRegister& src) {
4239  dst.ClearForWrite(vform);
4240  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4241    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4242      float result = FPSqrt(src.Float<float>(i));
4243      dst.SetFloat(i, result);
4244    }
4245  } else {
4246    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4247    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4248      double result = FPSqrt(src.Float<double>(i));
4249      dst.SetFloat(i, result);
4250    }
4251  }
4252  return dst;
4253}
4254
4255
4256#define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP)                           \
4257  LogicVRegister Simulator::FNP(VectorFormat vform,                   \
4258                                LogicVRegister dst,                   \
4259                                const LogicVRegister& src1,           \
4260                                const LogicVRegister& src2) {         \
4261    SimVRegister temp1, temp2;                                        \
4262    uzp1(vform, temp1, src1, src2);                                   \
4263    uzp2(vform, temp2, src1, src2);                                   \
4264    FN(vform, dst, temp1, temp2);                                     \
4265    return dst;                                                       \
4266  }                                                                   \
4267                                                                      \
4268  LogicVRegister Simulator::FNP(VectorFormat vform,                   \
4269                                LogicVRegister dst,                   \
4270                                const LogicVRegister& src) {          \
4271    if (vform == kFormatS) {                                          \
4272      float result = OP(src.Float<float>(0), src.Float<float>(1));    \
4273      dst.SetFloat(0, result);                                        \
4274    } else {                                                          \
4275      VIXL_ASSERT(vform == kFormatD);                                 \
4276      double result = OP(src.Float<double>(0), src.Float<double>(1)); \
4277      dst.SetFloat(0, result);                                        \
4278    }                                                                 \
4279    dst.ClearForWrite(vform);                                         \
4280    return dst;                                                       \
4281  }
4282NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
4283#undef DEFINE_NEON_FP_PAIR_OP
4284
4285
4286LogicVRegister Simulator::fminmaxv(VectorFormat vform,
4287                                   LogicVRegister dst,
4288                                   const LogicVRegister& src,
4289                                   FPMinMaxOp Op) {
4290  VIXL_ASSERT(vform == kFormat4S);
4291  USE(vform);
4292  float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1));
4293  float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3));
4294  float result = (this->*Op)(result1, result2);
4295  dst.ClearForWrite(kFormatS);
4296  dst.SetFloat<float>(0, result);
4297  return dst;
4298}
4299
4300
4301LogicVRegister Simulator::fmaxv(VectorFormat vform,
4302                                LogicVRegister dst,
4303                                const LogicVRegister& src) {
4304  return fminmaxv(vform, dst, src, &Simulator::FPMax);
4305}
4306
4307
4308LogicVRegister Simulator::fminv(VectorFormat vform,
4309                                LogicVRegister dst,
4310                                const LogicVRegister& src) {
4311  return fminmaxv(vform, dst, src, &Simulator::FPMin);
4312}
4313
4314
4315LogicVRegister Simulator::fmaxnmv(VectorFormat vform,
4316                                  LogicVRegister dst,
4317                                  const LogicVRegister& src) {
4318  return fminmaxv(vform, dst, src, &Simulator::FPMaxNM);
4319}
4320
4321
4322LogicVRegister Simulator::fminnmv(VectorFormat vform,
4323                                  LogicVRegister dst,
4324                                  const LogicVRegister& src) {
4325  return fminmaxv(vform, dst, src, &Simulator::FPMinNM);
4326}
4327
4328
4329LogicVRegister Simulator::fmul(VectorFormat vform,
4330                               LogicVRegister dst,
4331                               const LogicVRegister& src1,
4332                               const LogicVRegister& src2,
4333                               int index) {
4334  dst.ClearForWrite(vform);
4335  SimVRegister temp;
4336  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4337    LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4338    fmul<float>(vform, dst, src1, index_reg);
4339
4340  } else {
4341    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4342    LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4343    fmul<double>(vform, dst, src1, index_reg);
4344  }
4345  return dst;
4346}
4347
4348
4349LogicVRegister Simulator::fmla(VectorFormat vform,
4350                               LogicVRegister dst,
4351                               const LogicVRegister& src1,
4352                               const LogicVRegister& src2,
4353                               int index) {
4354  dst.ClearForWrite(vform);
4355  SimVRegister temp;
4356  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4357    LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4358    fmla<float>(vform, dst, src1, index_reg);
4359
4360  } else {
4361    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4362    LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4363    fmla<double>(vform, dst, src1, index_reg);
4364  }
4365  return dst;
4366}
4367
4368
4369LogicVRegister Simulator::fmls(VectorFormat vform,
4370                               LogicVRegister dst,
4371                               const LogicVRegister& src1,
4372                               const LogicVRegister& src2,
4373                               int index) {
4374  dst.ClearForWrite(vform);
4375  SimVRegister temp;
4376  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4377    LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4378    fmls<float>(vform, dst, src1, index_reg);
4379
4380  } else {
4381    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4382    LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4383    fmls<double>(vform, dst, src1, index_reg);
4384  }
4385  return dst;
4386}
4387
4388
4389LogicVRegister Simulator::fmulx(VectorFormat vform,
4390                                LogicVRegister dst,
4391                                const LogicVRegister& src1,
4392                                const LogicVRegister& src2,
4393                                int index) {
4394  dst.ClearForWrite(vform);
4395  SimVRegister temp;
4396  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4397    LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4398    fmulx<float>(vform, dst, src1, index_reg);
4399
4400  } else {
4401    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4402    LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4403    fmulx<double>(vform, dst, src1, index_reg);
4404  }
4405  return dst;
4406}
4407
4408
4409LogicVRegister Simulator::frint(VectorFormat vform,
4410                                LogicVRegister dst,
4411                                const LogicVRegister& src,
4412                                FPRounding rounding_mode,
4413                                bool inexact_exception) {
4414  dst.ClearForWrite(vform);
4415  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4416    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4417      float input = src.Float<float>(i);
4418      float rounded = FPRoundInt(input, rounding_mode);
4419      if (inexact_exception && !std::isnan(input) && (input != rounded)) {
4420        FPProcessException();
4421      }
4422      dst.SetFloat<float>(i, rounded);
4423    }
4424  } else {
4425    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4426    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4427      double input = src.Float<double>(i);
4428      double rounded = FPRoundInt(input, rounding_mode);
4429      if (inexact_exception && !std::isnan(input) && (input != rounded)) {
4430        FPProcessException();
4431      }
4432      dst.SetFloat<double>(i, rounded);
4433    }
4434  }
4435  return dst;
4436}
4437
4438
4439LogicVRegister Simulator::fcvts(VectorFormat vform,
4440                                LogicVRegister dst,
4441                                const LogicVRegister& src,
4442                                FPRounding rounding_mode,
4443                                int fbits) {
4444  dst.ClearForWrite(vform);
4445  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4446    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4447      float op = src.Float<float>(i) * std::pow(2.0f, fbits);
4448      dst.SetInt(vform, i, FPToInt32(op, rounding_mode));
4449    }
4450  } else {
4451    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4452    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4453      double op = src.Float<double>(i) * std::pow(2.0, fbits);
4454      dst.SetInt(vform, i, FPToInt64(op, rounding_mode));
4455    }
4456  }
4457  return dst;
4458}
4459
4460
4461LogicVRegister Simulator::fcvtu(VectorFormat vform,
4462                                LogicVRegister dst,
4463                                const LogicVRegister& src,
4464                                FPRounding rounding_mode,
4465                                int fbits) {
4466  dst.ClearForWrite(vform);
4467  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4468    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4469      float op = src.Float<float>(i) * std::pow(2.0f, fbits);
4470      dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));
4471    }
4472  } else {
4473    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4474    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4475      double op = src.Float<double>(i) * std::pow(2.0, fbits);
4476      dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));
4477    }
4478  }
4479  return dst;
4480}
4481
4482
4483LogicVRegister Simulator::fcvtl(VectorFormat vform,
4484                                LogicVRegister dst,
4485                                const LogicVRegister& src) {
4486  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4487    for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
4488      dst.SetFloat(i, FPToFloat(src.Float<float16>(i)));
4489    }
4490  } else {
4491    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4492    for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
4493      dst.SetFloat(i, FPToDouble(src.Float<float>(i)));
4494    }
4495  }
4496  return dst;
4497}
4498
4499
4500LogicVRegister Simulator::fcvtl2(VectorFormat vform,
4501                                 LogicVRegister dst,
4502                                 const LogicVRegister& src) {
4503  int lane_count = LaneCountFromFormat(vform);
4504  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4505    for (int i = 0; i < lane_count; i++) {
4506      dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count)));
4507    }
4508  } else {
4509    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4510    for (int i = 0; i < lane_count; i++) {
4511      dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count)));
4512    }
4513  }
4514  return dst;
4515}
4516
4517
4518LogicVRegister Simulator::fcvtn(VectorFormat vform,
4519                                LogicVRegister dst,
4520                                const LogicVRegister& src) {
4521  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4522    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4523      dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven));
4524    }
4525  } else {
4526    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4527    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4528      dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven));
4529    }
4530  }
4531  return dst;
4532}
4533
4534
4535LogicVRegister Simulator::fcvtn2(VectorFormat vform,
4536                                 LogicVRegister dst,
4537                                 const LogicVRegister& src) {
4538  int lane_count = LaneCountFromFormat(vform) / 2;
4539  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4540    for (int i = lane_count - 1; i >= 0; i--) {
4541      dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven));
4542    }
4543  } else {
4544    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4545    for (int i = lane_count - 1; i >= 0; i--) {
4546      dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven));
4547    }
4548  }
4549  return dst;
4550}
4551
4552
4553LogicVRegister Simulator::fcvtxn(VectorFormat vform,
4554                                 LogicVRegister dst,
4555                                 const LogicVRegister& src) {
4556  dst.ClearForWrite(vform);
4557  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4558  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4559    dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd));
4560  }
4561  return dst;
4562}
4563
4564
4565LogicVRegister Simulator::fcvtxn2(VectorFormat vform,
4566                                  LogicVRegister dst,
4567                                  const LogicVRegister& src) {
4568  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4569  int lane_count = LaneCountFromFormat(vform) / 2;
4570  for (int i = lane_count - 1; i >= 0; i--) {
4571    dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd));
4572  }
4573  return dst;
4574}
4575
4576
4577// Based on reference C function recip_sqrt_estimate from ARM ARM.
4578double Simulator::recip_sqrt_estimate(double a) {
4579  int q0, q1, s;
4580  double r;
4581  if (a < 0.5) {
4582    q0 = static_cast<int>(a * 512.0);
4583    r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
4584  } else {
4585    q1 = static_cast<int>(a * 256.0);
4586    r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
4587  }
4588  s = static_cast<int>(256.0 * r + 0.5);
4589  return static_cast<double>(s) / 256.0;
4590}
4591
4592
4593static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
4594  return ExtractUnsignedBitfield64(start_bit, end_bit, val);
4595}
4596
4597
4598template <typename T>
4599T Simulator::FPRecipSqrtEstimate(T op) {
4600  if (std::isnan(op)) {
4601    return FPProcessNaN(op);
4602  } else if (op == 0.0) {
4603    if (copysign(1.0, op) < 0.0) {
4604      return kFP64NegativeInfinity;
4605    } else {
4606      return kFP64PositiveInfinity;
4607    }
4608  } else if (copysign(1.0, op) < 0.0) {
4609    FPProcessException();
4610    return FPDefaultNaN<T>();
4611  } else if (std::isinf(op)) {
4612    return 0.0;
4613  } else {
4614    uint64_t fraction;
4615    int exp, result_exp;
4616
4617    if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4618      exp = FloatExp(op);
4619      fraction = FloatMantissa(op);
4620      fraction <<= 29;
4621    } else {
4622      exp = DoubleExp(op);
4623      fraction = DoubleMantissa(op);
4624    }
4625
4626    if (exp == 0) {
4627      while (Bits(fraction, 51, 51) == 0) {
4628        fraction = Bits(fraction, 50, 0) << 1;
4629        exp -= 1;
4630      }
4631      fraction = Bits(fraction, 50, 0) << 1;
4632    }
4633
4634    double scaled;
4635    if (Bits(exp, 0, 0) == 0) {
4636      scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
4637    } else {
4638      scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44);
4639    }
4640
4641    if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4642      result_exp = (380 - exp) / 2;
4643    } else {
4644      result_exp = (3068 - exp) / 2;
4645    }
4646
4647    uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled));
4648
4649    if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4650      uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
4651      uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
4652      return FloatPack(0, exp_bits, est_bits);
4653    } else {
4654      return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
4655    }
4656  }
4657}
4658
4659
4660LogicVRegister Simulator::frsqrte(VectorFormat vform,
4661                                  LogicVRegister dst,
4662                                  const LogicVRegister& src) {
4663  dst.ClearForWrite(vform);
4664  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4665    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4666      float input = src.Float<float>(i);
4667      dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));
4668    }
4669  } else {
4670    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4671    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4672      double input = src.Float<double>(i);
4673      dst.SetFloat(i, FPRecipSqrtEstimate<double>(input));
4674    }
4675  }
4676  return dst;
4677}
4678
4679template <typename T>
4680T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
4681  uint32_t sign;
4682
4683  if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4684    sign = FloatSign(op);
4685  } else {
4686    sign = DoubleSign(op);
4687  }
4688
4689  if (std::isnan(op)) {
4690    return FPProcessNaN(op);
4691  } else if (std::isinf(op)) {
4692    return (sign == 1) ? -0.0 : 0.0;
4693  } else if (op == 0.0) {
4694    FPProcessException();  // FPExc_DivideByZero exception.
4695    return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
4696  } else if (((sizeof(T) == sizeof(float)) &&  // NOLINT(runtime/sizeof)
4697              (std::fabs(op) < std::pow(2.0, -128.0))) ||
4698             ((sizeof(T) == sizeof(double)) &&  // NOLINT(runtime/sizeof)
4699              (std::fabs(op) < std::pow(2.0, -1024.0)))) {
4700    bool overflow_to_inf = false;
4701    switch (rounding) {
4702      case FPTieEven:
4703        overflow_to_inf = true;
4704        break;
4705      case FPPositiveInfinity:
4706        overflow_to_inf = (sign == 0);
4707        break;
4708      case FPNegativeInfinity:
4709        overflow_to_inf = (sign == 1);
4710        break;
4711      case FPZero:
4712        overflow_to_inf = false;
4713        break;
4714      default:
4715        break;
4716    }
4717    FPProcessException();  // FPExc_Overflow and FPExc_Inexact.
4718    if (overflow_to_inf) {
4719      return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
4720    } else {
4721      // Return FPMaxNormal(sign).
4722      if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4723        return FloatPack(sign, 0xfe, 0x07fffff);
4724      } else {
4725        return DoublePack(sign, 0x7fe, 0x0fffffffffffffl);
4726      }
4727    }
4728  } else {
4729    uint64_t fraction;
4730    int exp, result_exp;
4731    uint32_t sign;
4732
4733    if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4734      sign = FloatSign(op);
4735      exp = FloatExp(op);
4736      fraction = FloatMantissa(op);
4737      fraction <<= 29;
4738    } else {
4739      sign = DoubleSign(op);
4740      exp = DoubleExp(op);
4741      fraction = DoubleMantissa(op);
4742    }
4743
4744    if (exp == 0) {
4745      if (Bits(fraction, 51, 51) == 0) {
4746        exp -= 1;
4747        fraction = Bits(fraction, 49, 0) << 2;
4748      } else {
4749        fraction = Bits(fraction, 50, 0) << 1;
4750      }
4751    }
4752
4753    double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
4754
4755    if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4756      result_exp = (253 - exp);        // In range 253-254 = -1 to 253+1 = 254.
4757    } else {
4758      result_exp = (2045 - exp);  // In range 2045-2046 = -1 to 2045+1 = 2046.
4759    }
4760
4761    double estimate = recip_estimate(scaled);
4762
4763    fraction = DoubleMantissa(estimate);
4764    if (result_exp == 0) {
4765      fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
4766    } else if (result_exp == -1) {
4767      fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
4768      result_exp = 0;
4769    }
4770    if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4771      uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
4772      uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
4773      return FloatPack(sign, exp_bits, frac_bits);
4774    } else {
4775      return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
4776    }
4777  }
4778}
4779
4780
4781LogicVRegister Simulator::frecpe(VectorFormat vform,
4782                                 LogicVRegister dst,
4783                                 const LogicVRegister& src,
4784                                 FPRounding round) {
4785  dst.ClearForWrite(vform);
4786  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4787    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4788      float input = src.Float<float>(i);
4789      dst.SetFloat(i, FPRecipEstimate<float>(input, round));
4790    }
4791  } else {
4792    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4793    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4794      double input = src.Float<double>(i);
4795      dst.SetFloat(i, FPRecipEstimate<double>(input, round));
4796    }
4797  }
4798  return dst;
4799}
4800
4801
4802LogicVRegister Simulator::ursqrte(VectorFormat vform,
4803                                  LogicVRegister dst,
4804                                  const LogicVRegister& src) {
4805  dst.ClearForWrite(vform);
4806  uint64_t operand;
4807  uint32_t result;
4808  double dp_operand, dp_result;
4809  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4810    operand = src.Uint(vform, i);
4811    if (operand <= 0x3FFFFFFF) {
4812      result = 0xFFFFFFFF;
4813    } else {
4814      dp_operand = operand * std::pow(2.0, -32);
4815      dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
4816      result = static_cast<uint32_t>(dp_result);
4817    }
4818    dst.SetUint(vform, i, result);
4819  }
4820  return dst;
4821}
4822
4823
4824// Based on reference C function recip_estimate from ARM ARM.
4825double Simulator::recip_estimate(double a) {
4826  int q, s;
4827  double r;
4828  q = static_cast<int>(a * 512.0);
4829  r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
4830  s = static_cast<int>(256.0 * r + 0.5);
4831  return static_cast<double>(s) / 256.0;
4832}
4833
4834
4835LogicVRegister Simulator::urecpe(VectorFormat vform,
4836                                 LogicVRegister dst,
4837                                 const LogicVRegister& src) {
4838  dst.ClearForWrite(vform);
4839  uint64_t operand;
4840  uint32_t result;
4841  double dp_operand, dp_result;
4842  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4843    operand = src.Uint(vform, i);
4844    if (operand <= 0x7FFFFFFF) {
4845      result = 0xFFFFFFFF;
4846    } else {
4847      dp_operand = operand * std::pow(2.0, -32);
4848      dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
4849      result = static_cast<uint32_t>(dp_result);
4850    }
4851    dst.SetUint(vform, i, result);
4852  }
4853  return dst;
4854}
4855
4856template <typename T>
4857LogicVRegister Simulator::frecpx(VectorFormat vform,
4858                                 LogicVRegister dst,
4859                                 const LogicVRegister& src) {
4860  dst.ClearForWrite(vform);
4861  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4862    T op = src.Float<T>(i);
4863    T result;
4864    if (std::isnan(op)) {
4865      result = FPProcessNaN(op);
4866    } else {
4867      int exp;
4868      uint32_t sign;
4869      if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4870        sign = FloatSign(op);
4871        exp = FloatExp(op);
4872        exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
4873        result = FloatPack(sign, exp, 0);
4874      } else {
4875        sign = DoubleSign(op);
4876        exp = DoubleExp(op);
4877        exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
4878        result = DoublePack(sign, exp, 0);
4879      }
4880    }
4881    dst.SetFloat(i, result);
4882  }
4883  return dst;
4884}
4885
4886
4887LogicVRegister Simulator::frecpx(VectorFormat vform,
4888                                 LogicVRegister dst,
4889                                 const LogicVRegister& src) {
4890  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4891    frecpx<float>(vform, dst, src);
4892  } else {
4893    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4894    frecpx<double>(vform, dst, src);
4895  }
4896  return dst;
4897}
4898
4899LogicVRegister Simulator::scvtf(VectorFormat vform,
4900                                LogicVRegister dst,
4901                                const LogicVRegister& src,
4902                                int fbits,
4903                                FPRounding round) {
4904  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4905    if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4906      float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);
4907      dst.SetFloat<float>(i, result);
4908    } else {
4909      VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4910      double result = FixedToDouble(src.Int(kFormatD, i), fbits, round);
4911      dst.SetFloat<double>(i, result);
4912    }
4913  }
4914  return dst;
4915}
4916
4917
4918LogicVRegister Simulator::ucvtf(VectorFormat vform,
4919                                LogicVRegister dst,
4920                                const LogicVRegister& src,
4921                                int fbits,
4922                                FPRounding round) {
4923  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4924    if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4925      float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);
4926      dst.SetFloat<float>(i, result);
4927    } else {
4928      VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4929      double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round);
4930      dst.SetFloat<double>(i, result);
4931    }
4932  }
4933  return dst;
4934}
4935
4936
4937}  // namespace aarch64
4938}  // namespace vixl
4939
4940#endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
4941