logic-aarch64.cc revision 1e85b7f2e8ad2bfb233de29405aade635ed207ce
1// Copyright 2015, VIXL authors
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7//   * Redistributions of source code must retain the above copyright notice,
8//     this list of conditions and the following disclaimer.
9//   * Redistributions in binary form must reproduce the above copyright notice,
10//     this list of conditions and the following disclaimer in the documentation
11//     and/or other materials provided with the distribution.
12//   * Neither the name of ARM Limited nor the names of its contributors may be
13//     used to endorse or promote products derived from this software without
14//     specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
28
29#include <cmath>
30
31#include "aarch64/simulator-aarch64.h"
32
33namespace vixl {
34namespace aarch64 {
35
36template <>
37double Simulator::FPDefaultNaN<double>() {
38  return kFP64DefaultNaN;
39}
40
41
42template <>
43float Simulator::FPDefaultNaN<float>() {
44  return kFP32DefaultNaN;
45}
46
47// See FPRound for a description of this function.
48static inline double FPRoundToDouble(int64_t sign,
49                                     int64_t exponent,
50                                     uint64_t mantissa,
51                                     FPRounding round_mode) {
52  int64_t bits =
53      FPRound<int64_t, kDoubleExponentBits, kDoubleMantissaBits>(sign,
54                                                                 exponent,
55                                                                 mantissa,
56                                                                 round_mode);
57  return RawbitsToDouble(bits);
58}
59
60
61// See FPRound for a description of this function.
62static inline float FPRoundToFloat(int64_t sign,
63                                   int64_t exponent,
64                                   uint64_t mantissa,
65                                   FPRounding round_mode) {
66  int32_t bits =
67      FPRound<int32_t, kFloatExponentBits, kFloatMantissaBits>(sign,
68                                                               exponent,
69                                                               mantissa,
70                                                               round_mode);
71  return RawbitsToFloat(bits);
72}
73
74
75// See FPRound for a description of this function.
76static inline float16 FPRoundToFloat16(int64_t sign,
77                                       int64_t exponent,
78                                       uint64_t mantissa,
79                                       FPRounding round_mode) {
80  return FPRound<float16,
81                 kFloat16ExponentBits,
82                 kFloat16MantissaBits>(sign, exponent, mantissa, round_mode);
83}
84
85
86double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
87  if (src >= 0) {
88    return UFixedToDouble(src, fbits, round);
89  } else {
90    // This works for all negative values, including INT64_MIN.
91    return -UFixedToDouble(-src, fbits, round);
92  }
93}
94
95
96double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
97  // An input of 0 is a special case because the result is effectively
98  // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
99  if (src == 0) {
100    return 0.0;
101  }
102
103  // Calculate the exponent. The highest significant bit will have the value
104  // 2^exponent.
105  const int highest_significant_bit = 63 - CountLeadingZeros(src);
106  const int64_t exponent = highest_significant_bit - fbits;
107
108  return FPRoundToDouble(0, exponent, src, round);
109}
110
111
112float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
113  if (src >= 0) {
114    return UFixedToFloat(src, fbits, round);
115  } else {
116    // This works for all negative values, including INT64_MIN.
117    return -UFixedToFloat(-src, fbits, round);
118  }
119}
120
121
122float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
123  // An input of 0 is a special case because the result is effectively
124  // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
125  if (src == 0) {
126    return 0.0f;
127  }
128
129  // Calculate the exponent. The highest significant bit will have the value
130  // 2^exponent.
131  const int highest_significant_bit = 63 - CountLeadingZeros(src);
132  const int32_t exponent = highest_significant_bit - fbits;
133
134  return FPRoundToFloat(0, exponent, src, round);
135}
136
137
138double Simulator::FPToDouble(float value) {
139  switch (std::fpclassify(value)) {
140    case FP_NAN: {
141      if (IsSignallingNaN(value)) {
142        FPProcessException();
143      }
144      if (ReadDN()) return kFP64DefaultNaN;
145
146      // Convert NaNs as the processor would:
147      //  - The sign is propagated.
148      //  - The payload (mantissa) is transferred entirely, except that the top
149      //    bit is forced to '1', making the result a quiet NaN. The unused
150      //    (low-order) payload bits are set to 0.
151      uint32_t raw = FloatToRawbits(value);
152
153      uint64_t sign = raw >> 31;
154      uint64_t exponent = (1 << 11) - 1;
155      uint64_t payload = ExtractUnsignedBitfield64(21, 0, raw);
156      payload <<= (52 - 23);           // The unused low-order bits should be 0.
157      payload |= (UINT64_C(1) << 51);  // Force a quiet NaN.
158
159      return RawbitsToDouble((sign << 63) | (exponent << 52) | payload);
160    }
161
162    case FP_ZERO:
163    case FP_NORMAL:
164    case FP_SUBNORMAL:
165    case FP_INFINITE: {
166      // All other inputs are preserved in a standard cast, because every value
167      // representable using an IEEE-754 float is also representable using an
168      // IEEE-754 double.
169      return static_cast<double>(value);
170    }
171  }
172
173  VIXL_UNREACHABLE();
174  return static_cast<double>(value);
175}
176
177
178float Simulator::FPToFloat(float16 value) {
179  uint32_t sign = value >> 15;
180  uint32_t exponent =
181      ExtractUnsignedBitfield32(kFloat16MantissaBits + kFloat16ExponentBits - 1,
182                                kFloat16MantissaBits,
183                                value);
184  uint32_t mantissa =
185      ExtractUnsignedBitfield32(kFloat16MantissaBits - 1, 0, value);
186
187  switch (Float16Classify(value)) {
188    case FP_ZERO:
189      return (sign == 0) ? 0.0f : -0.0f;
190
191    case FP_INFINITE:
192      return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
193
194    case FP_SUBNORMAL: {
195      // Calculate shift required to put mantissa into the most-significant bits
196      // of the destination mantissa.
197      int shift = CountLeadingZeros(mantissa << (32 - 10));
198
199      // Shift mantissa and discard implicit '1'.
200      mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
201      mantissa &= (1 << kFloatMantissaBits) - 1;
202
203      // Adjust the exponent for the shift applied, and rebias.
204      exponent = exponent - shift + (-15 + 127);
205      break;
206    }
207
208    case FP_NAN:
209      if (IsSignallingNaN(value)) {
210        FPProcessException();
211      }
212      if (ReadDN()) return kFP32DefaultNaN;
213
214      // Convert NaNs as the processor would:
215      //  - The sign is propagated.
216      //  - The payload (mantissa) is transferred entirely, except that the top
217      //    bit is forced to '1', making the result a quiet NaN. The unused
218      //    (low-order) payload bits are set to 0.
219      exponent = (1 << kFloatExponentBits) - 1;
220
221      // Increase bits in mantissa, making low-order bits 0.
222      mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
223      mantissa |= 1 << 22;  // Force a quiet NaN.
224      break;
225
226    case FP_NORMAL:
227      // Increase bits in mantissa, making low-order bits 0.
228      mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
229
230      // Change exponent bias.
231      exponent += (-15 + 127);
232      break;
233
234    default:
235      VIXL_UNREACHABLE();
236  }
237  return RawbitsToFloat((sign << 31) | (exponent << kFloatMantissaBits) |
238                        mantissa);
239}
240
241
242float16 Simulator::FPToFloat16(float value, FPRounding round_mode) {
243  // Only the FPTieEven rounding mode is implemented.
244  VIXL_ASSERT(round_mode == FPTieEven);
245  USE(round_mode);
246
247  uint32_t raw = FloatToRawbits(value);
248  int32_t sign = raw >> 31;
249  int32_t exponent = ExtractUnsignedBitfield32(30, 23, raw) - 127;
250  uint32_t mantissa = ExtractUnsignedBitfield32(22, 0, raw);
251
252  switch (std::fpclassify(value)) {
253    case FP_NAN: {
254      if (IsSignallingNaN(value)) {
255        FPProcessException();
256      }
257      if (ReadDN()) return kFP16DefaultNaN;
258
259      // Convert NaNs as the processor would:
260      //  - The sign is propagated.
261      //  - The payload (mantissa) is transferred as much as possible, except
262      //    that the top bit is forced to '1', making the result a quiet NaN.
263      float16 result =
264          (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
265      result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
266      result |= (1 << 9);  // Force a quiet NaN;
267      return result;
268    }
269
270    case FP_ZERO:
271      return (sign == 0) ? 0 : 0x8000;
272
273    case FP_INFINITE:
274      return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
275
276    case FP_NORMAL:
277    case FP_SUBNORMAL: {
278      // Convert float-to-half as the processor would, assuming that FPCR.FZ
279      // (flush-to-zero) is not set.
280
281      // Add the implicit '1' bit to the mantissa.
282      mantissa += (1 << 23);
283      return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
284    }
285  }
286
287  VIXL_UNREACHABLE();
288  return 0;
289}
290
291
292float16 Simulator::FPToFloat16(double value, FPRounding round_mode) {
293  // Only the FPTieEven rounding mode is implemented.
294  VIXL_ASSERT(round_mode == FPTieEven);
295  USE(round_mode);
296
297  uint64_t raw = DoubleToRawbits(value);
298  int32_t sign = raw >> 63;
299  int64_t exponent = ExtractUnsignedBitfield64(62, 52, raw) - 1023;
300  uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw);
301
302  switch (std::fpclassify(value)) {
303    case FP_NAN: {
304      if (IsSignallingNaN(value)) {
305        FPProcessException();
306      }
307      if (ReadDN()) return kFP16DefaultNaN;
308
309      // Convert NaNs as the processor would:
310      //  - The sign is propagated.
311      //  - The payload (mantissa) is transferred as much as possible, except
312      //    that the top bit is forced to '1', making the result a quiet NaN.
313      float16 result =
314          (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
315      result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
316      result |= (1 << 9);  // Force a quiet NaN;
317      return result;
318    }
319
320    case FP_ZERO:
321      return (sign == 0) ? 0 : 0x8000;
322
323    case FP_INFINITE:
324      return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
325
326    case FP_NORMAL:
327    case FP_SUBNORMAL: {
328      // Convert double-to-half as the processor would, assuming that FPCR.FZ
329      // (flush-to-zero) is not set.
330
331      // Add the implicit '1' bit to the mantissa.
332      mantissa += (UINT64_C(1) << 52);
333      return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
334    }
335  }
336
337  VIXL_UNREACHABLE();
338  return 0;
339}
340
341
342float Simulator::FPToFloat(double value, FPRounding round_mode) {
343  // Only the FPTieEven rounding mode is implemented.
344  VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
345  USE(round_mode);
346
347  switch (std::fpclassify(value)) {
348    case FP_NAN: {
349      if (IsSignallingNaN(value)) {
350        FPProcessException();
351      }
352      if (ReadDN()) return kFP32DefaultNaN;
353
354      // Convert NaNs as the processor would:
355      //  - The sign is propagated.
356      //  - The payload (mantissa) is transferred as much as possible, except
357      //    that the top bit is forced to '1', making the result a quiet NaN.
358      uint64_t raw = DoubleToRawbits(value);
359
360      uint32_t sign = raw >> 63;
361      uint32_t exponent = (1 << 8) - 1;
362      uint32_t payload =
363          static_cast<uint32_t>(ExtractUnsignedBitfield64(50, 52 - 23, raw));
364      payload |= (1 << 22);  // Force a quiet NaN.
365
366      return RawbitsToFloat((sign << 31) | (exponent << 23) | payload);
367    }
368
369    case FP_ZERO:
370    case FP_INFINITE: {
371      // In a C++ cast, any value representable in the target type will be
372      // unchanged. This is always the case for +/-0.0 and infinities.
373      return static_cast<float>(value);
374    }
375
376    case FP_NORMAL:
377    case FP_SUBNORMAL: {
378      // Convert double-to-float as the processor would, assuming that FPCR.FZ
379      // (flush-to-zero) is not set.
380      uint64_t raw = DoubleToRawbits(value);
381      // Extract the IEEE-754 double components.
382      uint32_t sign = raw >> 63;
383      // Extract the exponent and remove the IEEE-754 encoding bias.
384      int32_t exponent =
385          static_cast<int32_t>(ExtractUnsignedBitfield64(62, 52, raw)) - 1023;
386      // Extract the mantissa and add the implicit '1' bit.
387      uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw);
388      if (std::fpclassify(value) == FP_NORMAL) {
389        mantissa |= (UINT64_C(1) << 52);
390      }
391      return FPRoundToFloat(sign, exponent, mantissa, round_mode);
392    }
393  }
394
395  VIXL_UNREACHABLE();
396  return value;
397}
398
399
400void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
401  dst.ClearForWrite(vform);
402  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
403    dst.ReadUintFromMem(vform, i, addr);
404    addr += LaneSizeInBytesFromFormat(vform);
405  }
406}
407
408
409void Simulator::ld1(VectorFormat vform,
410                    LogicVRegister dst,
411                    int index,
412                    uint64_t addr) {
413  dst.ReadUintFromMem(vform, index, addr);
414}
415
416
417void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
418  dst.ClearForWrite(vform);
419  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
420    dst.ReadUintFromMem(vform, i, addr);
421  }
422}
423
424
425void Simulator::ld2(VectorFormat vform,
426                    LogicVRegister dst1,
427                    LogicVRegister dst2,
428                    uint64_t addr1) {
429  dst1.ClearForWrite(vform);
430  dst2.ClearForWrite(vform);
431  int esize = LaneSizeInBytesFromFormat(vform);
432  uint64_t addr2 = addr1 + esize;
433  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
434    dst1.ReadUintFromMem(vform, i, addr1);
435    dst2.ReadUintFromMem(vform, i, addr2);
436    addr1 += 2 * esize;
437    addr2 += 2 * esize;
438  }
439}
440
441
442void Simulator::ld2(VectorFormat vform,
443                    LogicVRegister dst1,
444                    LogicVRegister dst2,
445                    int index,
446                    uint64_t addr1) {
447  dst1.ClearForWrite(vform);
448  dst2.ClearForWrite(vform);
449  uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
450  dst1.ReadUintFromMem(vform, index, addr1);
451  dst2.ReadUintFromMem(vform, index, addr2);
452}
453
454
455void Simulator::ld2r(VectorFormat vform,
456                     LogicVRegister dst1,
457                     LogicVRegister dst2,
458                     uint64_t addr) {
459  dst1.ClearForWrite(vform);
460  dst2.ClearForWrite(vform);
461  uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
462  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
463    dst1.ReadUintFromMem(vform, i, addr);
464    dst2.ReadUintFromMem(vform, i, addr2);
465  }
466}
467
468
469void Simulator::ld3(VectorFormat vform,
470                    LogicVRegister dst1,
471                    LogicVRegister dst2,
472                    LogicVRegister dst3,
473                    uint64_t addr1) {
474  dst1.ClearForWrite(vform);
475  dst2.ClearForWrite(vform);
476  dst3.ClearForWrite(vform);
477  int esize = LaneSizeInBytesFromFormat(vform);
478  uint64_t addr2 = addr1 + esize;
479  uint64_t addr3 = addr2 + esize;
480  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
481    dst1.ReadUintFromMem(vform, i, addr1);
482    dst2.ReadUintFromMem(vform, i, addr2);
483    dst3.ReadUintFromMem(vform, i, addr3);
484    addr1 += 3 * esize;
485    addr2 += 3 * esize;
486    addr3 += 3 * esize;
487  }
488}
489
490
491void Simulator::ld3(VectorFormat vform,
492                    LogicVRegister dst1,
493                    LogicVRegister dst2,
494                    LogicVRegister dst3,
495                    int index,
496                    uint64_t addr1) {
497  dst1.ClearForWrite(vform);
498  dst2.ClearForWrite(vform);
499  dst3.ClearForWrite(vform);
500  uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
501  uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
502  dst1.ReadUintFromMem(vform, index, addr1);
503  dst2.ReadUintFromMem(vform, index, addr2);
504  dst3.ReadUintFromMem(vform, index, addr3);
505}
506
507
508void Simulator::ld3r(VectorFormat vform,
509                     LogicVRegister dst1,
510                     LogicVRegister dst2,
511                     LogicVRegister dst3,
512                     uint64_t addr) {
513  dst1.ClearForWrite(vform);
514  dst2.ClearForWrite(vform);
515  dst3.ClearForWrite(vform);
516  uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
517  uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
518  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
519    dst1.ReadUintFromMem(vform, i, addr);
520    dst2.ReadUintFromMem(vform, i, addr2);
521    dst3.ReadUintFromMem(vform, i, addr3);
522  }
523}
524
525
526void Simulator::ld4(VectorFormat vform,
527                    LogicVRegister dst1,
528                    LogicVRegister dst2,
529                    LogicVRegister dst3,
530                    LogicVRegister dst4,
531                    uint64_t addr1) {
532  dst1.ClearForWrite(vform);
533  dst2.ClearForWrite(vform);
534  dst3.ClearForWrite(vform);
535  dst4.ClearForWrite(vform);
536  int esize = LaneSizeInBytesFromFormat(vform);
537  uint64_t addr2 = addr1 + esize;
538  uint64_t addr3 = addr2 + esize;
539  uint64_t addr4 = addr3 + esize;
540  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
541    dst1.ReadUintFromMem(vform, i, addr1);
542    dst2.ReadUintFromMem(vform, i, addr2);
543    dst3.ReadUintFromMem(vform, i, addr3);
544    dst4.ReadUintFromMem(vform, i, addr4);
545    addr1 += 4 * esize;
546    addr2 += 4 * esize;
547    addr3 += 4 * esize;
548    addr4 += 4 * esize;
549  }
550}
551
552
553void Simulator::ld4(VectorFormat vform,
554                    LogicVRegister dst1,
555                    LogicVRegister dst2,
556                    LogicVRegister dst3,
557                    LogicVRegister dst4,
558                    int index,
559                    uint64_t addr1) {
560  dst1.ClearForWrite(vform);
561  dst2.ClearForWrite(vform);
562  dst3.ClearForWrite(vform);
563  dst4.ClearForWrite(vform);
564  uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
565  uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
566  uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
567  dst1.ReadUintFromMem(vform, index, addr1);
568  dst2.ReadUintFromMem(vform, index, addr2);
569  dst3.ReadUintFromMem(vform, index, addr3);
570  dst4.ReadUintFromMem(vform, index, addr4);
571}
572
573
574void Simulator::ld4r(VectorFormat vform,
575                     LogicVRegister dst1,
576                     LogicVRegister dst2,
577                     LogicVRegister dst3,
578                     LogicVRegister dst4,
579                     uint64_t addr) {
580  dst1.ClearForWrite(vform);
581  dst2.ClearForWrite(vform);
582  dst3.ClearForWrite(vform);
583  dst4.ClearForWrite(vform);
584  uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
585  uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
586  uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
587  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
588    dst1.ReadUintFromMem(vform, i, addr);
589    dst2.ReadUintFromMem(vform, i, addr2);
590    dst3.ReadUintFromMem(vform, i, addr3);
591    dst4.ReadUintFromMem(vform, i, addr4);
592  }
593}
594
595
596void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
597  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
598    src.WriteUintToMem(vform, i, addr);
599    addr += LaneSizeInBytesFromFormat(vform);
600  }
601}
602
603
604void Simulator::st1(VectorFormat vform,
605                    LogicVRegister src,
606                    int index,
607                    uint64_t addr) {
608  src.WriteUintToMem(vform, index, addr);
609}
610
611
612void Simulator::st2(VectorFormat vform,
613                    LogicVRegister dst,
614                    LogicVRegister dst2,
615                    uint64_t addr) {
616  int esize = LaneSizeInBytesFromFormat(vform);
617  uint64_t addr2 = addr + esize;
618  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
619    dst.WriteUintToMem(vform, i, addr);
620    dst2.WriteUintToMem(vform, i, addr2);
621    addr += 2 * esize;
622    addr2 += 2 * esize;
623  }
624}
625
626
627void Simulator::st2(VectorFormat vform,
628                    LogicVRegister dst,
629                    LogicVRegister dst2,
630                    int index,
631                    uint64_t addr) {
632  int esize = LaneSizeInBytesFromFormat(vform);
633  dst.WriteUintToMem(vform, index, addr);
634  dst2.WriteUintToMem(vform, index, addr + 1 * esize);
635}
636
637
638void Simulator::st3(VectorFormat vform,
639                    LogicVRegister dst,
640                    LogicVRegister dst2,
641                    LogicVRegister dst3,
642                    uint64_t addr) {
643  int esize = LaneSizeInBytesFromFormat(vform);
644  uint64_t addr2 = addr + esize;
645  uint64_t addr3 = addr2 + esize;
646  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
647    dst.WriteUintToMem(vform, i, addr);
648    dst2.WriteUintToMem(vform, i, addr2);
649    dst3.WriteUintToMem(vform, i, addr3);
650    addr += 3 * esize;
651    addr2 += 3 * esize;
652    addr3 += 3 * esize;
653  }
654}
655
656
657void Simulator::st3(VectorFormat vform,
658                    LogicVRegister dst,
659                    LogicVRegister dst2,
660                    LogicVRegister dst3,
661                    int index,
662                    uint64_t addr) {
663  int esize = LaneSizeInBytesFromFormat(vform);
664  dst.WriteUintToMem(vform, index, addr);
665  dst2.WriteUintToMem(vform, index, addr + 1 * esize);
666  dst3.WriteUintToMem(vform, index, addr + 2 * esize);
667}
668
669
670void Simulator::st4(VectorFormat vform,
671                    LogicVRegister dst,
672                    LogicVRegister dst2,
673                    LogicVRegister dst3,
674                    LogicVRegister dst4,
675                    uint64_t addr) {
676  int esize = LaneSizeInBytesFromFormat(vform);
677  uint64_t addr2 = addr + esize;
678  uint64_t addr3 = addr2 + esize;
679  uint64_t addr4 = addr3 + esize;
680  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
681    dst.WriteUintToMem(vform, i, addr);
682    dst2.WriteUintToMem(vform, i, addr2);
683    dst3.WriteUintToMem(vform, i, addr3);
684    dst4.WriteUintToMem(vform, i, addr4);
685    addr += 4 * esize;
686    addr2 += 4 * esize;
687    addr3 += 4 * esize;
688    addr4 += 4 * esize;
689  }
690}
691
692
693void Simulator::st4(VectorFormat vform,
694                    LogicVRegister dst,
695                    LogicVRegister dst2,
696                    LogicVRegister dst3,
697                    LogicVRegister dst4,
698                    int index,
699                    uint64_t addr) {
700  int esize = LaneSizeInBytesFromFormat(vform);
701  dst.WriteUintToMem(vform, index, addr);
702  dst2.WriteUintToMem(vform, index, addr + 1 * esize);
703  dst3.WriteUintToMem(vform, index, addr + 2 * esize);
704  dst4.WriteUintToMem(vform, index, addr + 3 * esize);
705}
706
707
708LogicVRegister Simulator::cmp(VectorFormat vform,
709                              LogicVRegister dst,
710                              const LogicVRegister& src1,
711                              const LogicVRegister& src2,
712                              Condition cond) {
713  dst.ClearForWrite(vform);
714  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
715    int64_t sa = src1.Int(vform, i);
716    int64_t sb = src2.Int(vform, i);
717    uint64_t ua = src1.Uint(vform, i);
718    uint64_t ub = src2.Uint(vform, i);
719    bool result = false;
720    switch (cond) {
721      case eq:
722        result = (ua == ub);
723        break;
724      case ge:
725        result = (sa >= sb);
726        break;
727      case gt:
728        result = (sa > sb);
729        break;
730      case hi:
731        result = (ua > ub);
732        break;
733      case hs:
734        result = (ua >= ub);
735        break;
736      case lt:
737        result = (sa < sb);
738        break;
739      case le:
740        result = (sa <= sb);
741        break;
742      default:
743        VIXL_UNREACHABLE();
744        break;
745    }
746    dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
747  }
748  return dst;
749}
750
751
752LogicVRegister Simulator::cmp(VectorFormat vform,
753                              LogicVRegister dst,
754                              const LogicVRegister& src1,
755                              int imm,
756                              Condition cond) {
757  SimVRegister temp;
758  LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
759  return cmp(vform, dst, src1, imm_reg, cond);
760}
761
762
763LogicVRegister Simulator::cmptst(VectorFormat vform,
764                                 LogicVRegister dst,
765                                 const LogicVRegister& src1,
766                                 const LogicVRegister& src2) {
767  dst.ClearForWrite(vform);
768  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
769    uint64_t ua = src1.Uint(vform, i);
770    uint64_t ub = src2.Uint(vform, i);
771    dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
772  }
773  return dst;
774}
775
776
777LogicVRegister Simulator::add(VectorFormat vform,
778                              LogicVRegister dst,
779                              const LogicVRegister& src1,
780                              const LogicVRegister& src2) {
781  dst.ClearForWrite(vform);
782  // TODO(all): consider assigning the result of LaneCountFromFormat to a local.
783  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
784    // Test for unsigned saturation.
785    uint64_t ua = src1.UintLeftJustified(vform, i);
786    uint64_t ub = src2.UintLeftJustified(vform, i);
787    uint64_t ur = ua + ub;
788    if (ur < ua) {
789      dst.SetUnsignedSat(i, true);
790    }
791
792    // Test for signed saturation.
793    int64_t sa = src1.IntLeftJustified(vform, i);
794    int64_t sb = src2.IntLeftJustified(vform, i);
795    int64_t sr = sa + sb;
796    // If the signs of the operands are the same, but different from the result,
797    // there was an overflow.
798    if (((sa >= 0) == (sb >= 0)) && ((sa >= 0) != (sr >= 0))) {
799      dst.SetSignedSat(i, sa >= 0);
800    }
801
802    dst.SetInt(vform, i, src1.Int(vform, i) + src2.Int(vform, i));
803  }
804  return dst;
805}
806
807
808LogicVRegister Simulator::addp(VectorFormat vform,
809                               LogicVRegister dst,
810                               const LogicVRegister& src1,
811                               const LogicVRegister& src2) {
812  SimVRegister temp1, temp2;
813  uzp1(vform, temp1, src1, src2);
814  uzp2(vform, temp2, src1, src2);
815  add(vform, dst, temp1, temp2);
816  return dst;
817}
818
819
820LogicVRegister Simulator::mla(VectorFormat vform,
821                              LogicVRegister dst,
822                              const LogicVRegister& src1,
823                              const LogicVRegister& src2) {
824  SimVRegister temp;
825  mul(vform, temp, src1, src2);
826  add(vform, dst, dst, temp);
827  return dst;
828}
829
830
831LogicVRegister Simulator::mls(VectorFormat vform,
832                              LogicVRegister dst,
833                              const LogicVRegister& src1,
834                              const LogicVRegister& src2) {
835  SimVRegister temp;
836  mul(vform, temp, src1, src2);
837  sub(vform, dst, dst, temp);
838  return dst;
839}
840
841
842LogicVRegister Simulator::mul(VectorFormat vform,
843                              LogicVRegister dst,
844                              const LogicVRegister& src1,
845                              const LogicVRegister& src2) {
846  dst.ClearForWrite(vform);
847  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
848    dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
849  }
850  return dst;
851}
852
853
854LogicVRegister Simulator::mul(VectorFormat vform,
855                              LogicVRegister dst,
856                              const LogicVRegister& src1,
857                              const LogicVRegister& src2,
858                              int index) {
859  SimVRegister temp;
860  VectorFormat indexform = VectorFormatFillQ(vform);
861  return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
862}
863
864
865LogicVRegister Simulator::mla(VectorFormat vform,
866                              LogicVRegister dst,
867                              const LogicVRegister& src1,
868                              const LogicVRegister& src2,
869                              int index) {
870  SimVRegister temp;
871  VectorFormat indexform = VectorFormatFillQ(vform);
872  return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));
873}
874
875
876LogicVRegister Simulator::mls(VectorFormat vform,
877                              LogicVRegister dst,
878                              const LogicVRegister& src1,
879                              const LogicVRegister& src2,
880                              int index) {
881  SimVRegister temp;
882  VectorFormat indexform = VectorFormatFillQ(vform);
883  return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));
884}
885
886
887LogicVRegister Simulator::smull(VectorFormat vform,
888                                LogicVRegister dst,
889                                const LogicVRegister& src1,
890                                const LogicVRegister& src2,
891                                int index) {
892  SimVRegister temp;
893  VectorFormat indexform =
894      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
895  return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));
896}
897
898
899LogicVRegister Simulator::smull2(VectorFormat vform,
900                                 LogicVRegister dst,
901                                 const LogicVRegister& src1,
902                                 const LogicVRegister& src2,
903                                 int index) {
904  SimVRegister temp;
905  VectorFormat indexform =
906      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
907  return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
908}
909
910
911LogicVRegister Simulator::umull(VectorFormat vform,
912                                LogicVRegister dst,
913                                const LogicVRegister& src1,
914                                const LogicVRegister& src2,
915                                int index) {
916  SimVRegister temp;
917  VectorFormat indexform =
918      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
919  return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));
920}
921
922
923LogicVRegister Simulator::umull2(VectorFormat vform,
924                                 LogicVRegister dst,
925                                 const LogicVRegister& src1,
926                                 const LogicVRegister& src2,
927                                 int index) {
928  SimVRegister temp;
929  VectorFormat indexform =
930      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
931  return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
932}
933
934
935LogicVRegister Simulator::smlal(VectorFormat vform,
936                                LogicVRegister dst,
937                                const LogicVRegister& src1,
938                                const LogicVRegister& src2,
939                                int index) {
940  SimVRegister temp;
941  VectorFormat indexform =
942      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
943  return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
944}
945
946
947LogicVRegister Simulator::smlal2(VectorFormat vform,
948                                 LogicVRegister dst,
949                                 const LogicVRegister& src1,
950                                 const LogicVRegister& src2,
951                                 int index) {
952  SimVRegister temp;
953  VectorFormat indexform =
954      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
955  return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
956}
957
958
959LogicVRegister Simulator::umlal(VectorFormat vform,
960                                LogicVRegister dst,
961                                const LogicVRegister& src1,
962                                const LogicVRegister& src2,
963                                int index) {
964  SimVRegister temp;
965  VectorFormat indexform =
966      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
967  return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
968}
969
970
971LogicVRegister Simulator::umlal2(VectorFormat vform,
972                                 LogicVRegister dst,
973                                 const LogicVRegister& src1,
974                                 const LogicVRegister& src2,
975                                 int index) {
976  SimVRegister temp;
977  VectorFormat indexform =
978      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
979  return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
980}
981
982
983LogicVRegister Simulator::smlsl(VectorFormat vform,
984                                LogicVRegister dst,
985                                const LogicVRegister& src1,
986                                const LogicVRegister& src2,
987                                int index) {
988  SimVRegister temp;
989  VectorFormat indexform =
990      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
991  return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
992}
993
994
995LogicVRegister Simulator::smlsl2(VectorFormat vform,
996                                 LogicVRegister dst,
997                                 const LogicVRegister& src1,
998                                 const LogicVRegister& src2,
999                                 int index) {
1000  SimVRegister temp;
1001  VectorFormat indexform =
1002      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1003  return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1004}
1005
1006
1007LogicVRegister Simulator::umlsl(VectorFormat vform,
1008                                LogicVRegister dst,
1009                                const LogicVRegister& src1,
1010                                const LogicVRegister& src2,
1011                                int index) {
1012  SimVRegister temp;
1013  VectorFormat indexform =
1014      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1015  return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
1016}
1017
1018
1019LogicVRegister Simulator::umlsl2(VectorFormat vform,
1020                                 LogicVRegister dst,
1021                                 const LogicVRegister& src1,
1022                                 const LogicVRegister& src2,
1023                                 int index) {
1024  SimVRegister temp;
1025  VectorFormat indexform =
1026      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1027  return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1028}
1029
1030
1031LogicVRegister Simulator::sqdmull(VectorFormat vform,
1032                                  LogicVRegister dst,
1033                                  const LogicVRegister& src1,
1034                                  const LogicVRegister& src2,
1035                                  int index) {
1036  SimVRegister temp;
1037  VectorFormat indexform =
1038      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1039  return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
1040}
1041
1042
1043LogicVRegister Simulator::sqdmull2(VectorFormat vform,
1044                                   LogicVRegister dst,
1045                                   const LogicVRegister& src1,
1046                                   const LogicVRegister& src2,
1047                                   int index) {
1048  SimVRegister temp;
1049  VectorFormat indexform =
1050      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1051  return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1052}
1053
1054
1055LogicVRegister Simulator::sqdmlal(VectorFormat vform,
1056                                  LogicVRegister dst,
1057                                  const LogicVRegister& src1,
1058                                  const LogicVRegister& src2,
1059                                  int index) {
1060  SimVRegister temp;
1061  VectorFormat indexform =
1062      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1063  return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
1064}
1065
1066
1067LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
1068                                   LogicVRegister dst,
1069                                   const LogicVRegister& src1,
1070                                   const LogicVRegister& src2,
1071                                   int index) {
1072  SimVRegister temp;
1073  VectorFormat indexform =
1074      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1075  return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1076}
1077
1078
1079LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
1080                                  LogicVRegister dst,
1081                                  const LogicVRegister& src1,
1082                                  const LogicVRegister& src2,
1083                                  int index) {
1084  SimVRegister temp;
1085  VectorFormat indexform =
1086      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1087  return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
1088}
1089
1090
1091LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
1092                                   LogicVRegister dst,
1093                                   const LogicVRegister& src1,
1094                                   const LogicVRegister& src2,
1095                                   int index) {
1096  SimVRegister temp;
1097  VectorFormat indexform =
1098      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1099  return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1100}
1101
1102
1103LogicVRegister Simulator::sqdmulh(VectorFormat vform,
1104                                  LogicVRegister dst,
1105                                  const LogicVRegister& src1,
1106                                  const LogicVRegister& src2,
1107                                  int index) {
1108  SimVRegister temp;
1109  VectorFormat indexform = VectorFormatFillQ(vform);
1110  return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
1111}
1112
1113
1114LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
1115                                   LogicVRegister dst,
1116                                   const LogicVRegister& src1,
1117                                   const LogicVRegister& src2,
1118                                   int index) {
1119  SimVRegister temp;
1120  VectorFormat indexform = VectorFormatFillQ(vform);
1121  return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
1122}
1123
1124
1125uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) const {
1126  uint16_t result = 0;
1127  uint16_t extended_op2 = op2;
1128  for (int i = 0; i < 8; ++i) {
1129    if ((op1 >> i) & 1) {
1130      result = result ^ (extended_op2 << i);
1131    }
1132  }
1133  return result;
1134}
1135
1136
1137LogicVRegister Simulator::pmul(VectorFormat vform,
1138                               LogicVRegister dst,
1139                               const LogicVRegister& src1,
1140                               const LogicVRegister& src2) {
1141  dst.ClearForWrite(vform);
1142  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1143    dst.SetUint(vform,
1144                i,
1145                PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));
1146  }
1147  return dst;
1148}
1149
1150
1151LogicVRegister Simulator::pmull(VectorFormat vform,
1152                                LogicVRegister dst,
1153                                const LogicVRegister& src1,
1154                                const LogicVRegister& src2) {
1155  VectorFormat vform_src = VectorFormatHalfWidth(vform);
1156  dst.ClearForWrite(vform);
1157  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1158    dst.SetUint(vform,
1159                i,
1160                PolynomialMult(src1.Uint(vform_src, i),
1161                               src2.Uint(vform_src, i)));
1162  }
1163  return dst;
1164}
1165
1166
1167LogicVRegister Simulator::pmull2(VectorFormat vform,
1168                                 LogicVRegister dst,
1169                                 const LogicVRegister& src1,
1170                                 const LogicVRegister& src2) {
1171  VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
1172  dst.ClearForWrite(vform);
1173  int lane_count = LaneCountFromFormat(vform);
1174  for (int i = 0; i < lane_count; i++) {
1175    dst.SetUint(vform,
1176                i,
1177                PolynomialMult(src1.Uint(vform_src, lane_count + i),
1178                               src2.Uint(vform_src, lane_count + i)));
1179  }
1180  return dst;
1181}
1182
1183
1184LogicVRegister Simulator::sub(VectorFormat vform,
1185                              LogicVRegister dst,
1186                              const LogicVRegister& src1,
1187                              const LogicVRegister& src2) {
1188  dst.ClearForWrite(vform);
1189  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1190    // Test for unsigned saturation.
1191    if (src2.Uint(vform, i) > src1.Uint(vform, i)) {
1192      dst.SetUnsignedSat(i, false);
1193    }
1194
1195    // Test for signed saturation.
1196    int64_t sa = src1.IntLeftJustified(vform, i);
1197    int64_t sb = src2.IntLeftJustified(vform, i);
1198    int64_t sr = sa - sb;
1199    // If the signs of the operands are different, and the sign of the first
1200    // operand doesn't match the result, there was an overflow.
1201    if (((sa >= 0) != (sb >= 0)) && ((sa >= 0) != (sr >= 0))) {
1202      dst.SetSignedSat(i, sr < 0);
1203    }
1204
1205    dst.SetInt(vform, i, src1.Int(vform, i) - src2.Int(vform, i));
1206  }
1207  return dst;
1208}
1209
1210
1211LogicVRegister Simulator::and_(VectorFormat vform,
1212                               LogicVRegister dst,
1213                               const LogicVRegister& src1,
1214                               const LogicVRegister& src2) {
1215  dst.ClearForWrite(vform);
1216  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1217    dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
1218  }
1219  return dst;
1220}
1221
1222
1223LogicVRegister Simulator::orr(VectorFormat vform,
1224                              LogicVRegister dst,
1225                              const LogicVRegister& src1,
1226                              const LogicVRegister& src2) {
1227  dst.ClearForWrite(vform);
1228  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1229    dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1230  }
1231  return dst;
1232}
1233
1234
1235LogicVRegister Simulator::orn(VectorFormat vform,
1236                              LogicVRegister dst,
1237                              const LogicVRegister& src1,
1238                              const LogicVRegister& src2) {
1239  dst.ClearForWrite(vform);
1240  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1241    dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1242  }
1243  return dst;
1244}
1245
1246
1247LogicVRegister Simulator::eor(VectorFormat vform,
1248                              LogicVRegister dst,
1249                              const LogicVRegister& src1,
1250                              const LogicVRegister& src2) {
1251  dst.ClearForWrite(vform);
1252  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1253    dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1254  }
1255  return dst;
1256}
1257
1258
1259LogicVRegister Simulator::bic(VectorFormat vform,
1260                              LogicVRegister dst,
1261                              const LogicVRegister& src1,
1262                              const LogicVRegister& src2) {
1263  dst.ClearForWrite(vform);
1264  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1265    dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1266  }
1267  return dst;
1268}
1269
1270
1271LogicVRegister Simulator::bic(VectorFormat vform,
1272                              LogicVRegister dst,
1273                              const LogicVRegister& src,
1274                              uint64_t imm) {
1275  uint64_t result[16];
1276  int laneCount = LaneCountFromFormat(vform);
1277  for (int i = 0; i < laneCount; ++i) {
1278    result[i] = src.Uint(vform, i) & ~imm;
1279  }
1280  dst.ClearForWrite(vform);
1281  for (int i = 0; i < laneCount; ++i) {
1282    dst.SetUint(vform, i, result[i]);
1283  }
1284  return dst;
1285}
1286
1287
1288LogicVRegister Simulator::bif(VectorFormat vform,
1289                              LogicVRegister dst,
1290                              const LogicVRegister& src1,
1291                              const LogicVRegister& src2) {
1292  dst.ClearForWrite(vform);
1293  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1294    uint64_t operand1 = dst.Uint(vform, i);
1295    uint64_t operand2 = ~src2.Uint(vform, i);
1296    uint64_t operand3 = src1.Uint(vform, i);
1297    uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1298    dst.SetUint(vform, i, result);
1299  }
1300  return dst;
1301}
1302
1303
1304LogicVRegister Simulator::bit(VectorFormat vform,
1305                              LogicVRegister dst,
1306                              const LogicVRegister& src1,
1307                              const LogicVRegister& src2) {
1308  dst.ClearForWrite(vform);
1309  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1310    uint64_t operand1 = dst.Uint(vform, i);
1311    uint64_t operand2 = src2.Uint(vform, i);
1312    uint64_t operand3 = src1.Uint(vform, i);
1313    uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1314    dst.SetUint(vform, i, result);
1315  }
1316  return dst;
1317}
1318
1319
1320LogicVRegister Simulator::bsl(VectorFormat vform,
1321                              LogicVRegister dst,
1322                              const LogicVRegister& src1,
1323                              const LogicVRegister& src2) {
1324  dst.ClearForWrite(vform);
1325  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1326    uint64_t operand1 = src2.Uint(vform, i);
1327    uint64_t operand2 = dst.Uint(vform, i);
1328    uint64_t operand3 = src1.Uint(vform, i);
1329    uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1330    dst.SetUint(vform, i, result);
1331  }
1332  return dst;
1333}
1334
1335
1336LogicVRegister Simulator::sminmax(VectorFormat vform,
1337                                  LogicVRegister dst,
1338                                  const LogicVRegister& src1,
1339                                  const LogicVRegister& src2,
1340                                  bool max) {
1341  dst.ClearForWrite(vform);
1342  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1343    int64_t src1_val = src1.Int(vform, i);
1344    int64_t src2_val = src2.Int(vform, i);
1345    int64_t dst_val;
1346    if (max == true) {
1347      dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1348    } else {
1349      dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1350    }
1351    dst.SetInt(vform, i, dst_val);
1352  }
1353  return dst;
1354}
1355
1356
1357LogicVRegister Simulator::smax(VectorFormat vform,
1358                               LogicVRegister dst,
1359                               const LogicVRegister& src1,
1360                               const LogicVRegister& src2) {
1361  return sminmax(vform, dst, src1, src2, true);
1362}
1363
1364
1365LogicVRegister Simulator::smin(VectorFormat vform,
1366                               LogicVRegister dst,
1367                               const LogicVRegister& src1,
1368                               const LogicVRegister& src2) {
1369  return sminmax(vform, dst, src1, src2, false);
1370}
1371
1372
1373LogicVRegister Simulator::sminmaxp(VectorFormat vform,
1374                                   LogicVRegister dst,
1375                                   int dst_index,
1376                                   const LogicVRegister& src,
1377                                   bool max) {
1378  for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
1379    int64_t src1_val = src.Int(vform, i);
1380    int64_t src2_val = src.Int(vform, i + 1);
1381    int64_t dst_val;
1382    if (max == true) {
1383      dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1384    } else {
1385      dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1386    }
1387    dst.SetInt(vform, dst_index + (i >> 1), dst_val);
1388  }
1389  return dst;
1390}
1391
1392
1393LogicVRegister Simulator::smaxp(VectorFormat vform,
1394                                LogicVRegister dst,
1395                                const LogicVRegister& src1,
1396                                const LogicVRegister& src2) {
1397  dst.ClearForWrite(vform);
1398  sminmaxp(vform, dst, 0, src1, true);
1399  sminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, true);
1400  return dst;
1401}
1402
1403
1404LogicVRegister Simulator::sminp(VectorFormat vform,
1405                                LogicVRegister dst,
1406                                const LogicVRegister& src1,
1407                                const LogicVRegister& src2) {
1408  dst.ClearForWrite(vform);
1409  sminmaxp(vform, dst, 0, src1, false);
1410  sminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, false);
1411  return dst;
1412}
1413
1414
1415LogicVRegister Simulator::addp(VectorFormat vform,
1416                               LogicVRegister dst,
1417                               const LogicVRegister& src) {
1418  VIXL_ASSERT(vform == kFormatD);
1419
1420  int64_t dst_val = src.Int(kFormat2D, 0) + src.Int(kFormat2D, 1);
1421  dst.ClearForWrite(vform);
1422  dst.SetInt(vform, 0, dst_val);
1423  return dst;
1424}
1425
1426
1427LogicVRegister Simulator::addv(VectorFormat vform,
1428                               LogicVRegister dst,
1429                               const LogicVRegister& src) {
1430  VectorFormat vform_dst =
1431      ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1432
1433
1434  int64_t dst_val = 0;
1435  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1436    dst_val += src.Int(vform, i);
1437  }
1438
1439  dst.ClearForWrite(vform_dst);
1440  dst.SetInt(vform_dst, 0, dst_val);
1441  return dst;
1442}
1443
1444
1445LogicVRegister Simulator::saddlv(VectorFormat vform,
1446                                 LogicVRegister dst,
1447                                 const LogicVRegister& src) {
1448  VectorFormat vform_dst =
1449      ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1450
1451  int64_t dst_val = 0;
1452  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1453    dst_val += src.Int(vform, i);
1454  }
1455
1456  dst.ClearForWrite(vform_dst);
1457  dst.SetInt(vform_dst, 0, dst_val);
1458  return dst;
1459}
1460
1461
1462LogicVRegister Simulator::uaddlv(VectorFormat vform,
1463                                 LogicVRegister dst,
1464                                 const LogicVRegister& src) {
1465  VectorFormat vform_dst =
1466      ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1467
1468  uint64_t dst_val = 0;
1469  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1470    dst_val += src.Uint(vform, i);
1471  }
1472
1473  dst.ClearForWrite(vform_dst);
1474  dst.SetUint(vform_dst, 0, dst_val);
1475  return dst;
1476}
1477
1478
1479LogicVRegister Simulator::sminmaxv(VectorFormat vform,
1480                                   LogicVRegister dst,
1481                                   const LogicVRegister& src,
1482                                   bool max) {
1483  dst.ClearForWrite(vform);
1484  int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1485  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1486    dst.SetInt(vform, i, 0);
1487    int64_t src_val = src.Int(vform, i);
1488    if (max == true) {
1489      dst_val = (src_val > dst_val) ? src_val : dst_val;
1490    } else {
1491      dst_val = (src_val < dst_val) ? src_val : dst_val;
1492    }
1493  }
1494  dst.SetInt(vform, 0, dst_val);
1495  return dst;
1496}
1497
1498
1499LogicVRegister Simulator::smaxv(VectorFormat vform,
1500                                LogicVRegister dst,
1501                                const LogicVRegister& src) {
1502  sminmaxv(vform, dst, src, true);
1503  return dst;
1504}
1505
1506
1507LogicVRegister Simulator::sminv(VectorFormat vform,
1508                                LogicVRegister dst,
1509                                const LogicVRegister& src) {
1510  sminmaxv(vform, dst, src, false);
1511  return dst;
1512}
1513
1514
1515LogicVRegister Simulator::uminmax(VectorFormat vform,
1516                                  LogicVRegister dst,
1517                                  const LogicVRegister& src1,
1518                                  const LogicVRegister& src2,
1519                                  bool max) {
1520  dst.ClearForWrite(vform);
1521  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1522    uint64_t src1_val = src1.Uint(vform, i);
1523    uint64_t src2_val = src2.Uint(vform, i);
1524    uint64_t dst_val;
1525    if (max == true) {
1526      dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1527    } else {
1528      dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1529    }
1530    dst.SetUint(vform, i, dst_val);
1531  }
1532  return dst;
1533}
1534
1535
1536LogicVRegister Simulator::umax(VectorFormat vform,
1537                               LogicVRegister dst,
1538                               const LogicVRegister& src1,
1539                               const LogicVRegister& src2) {
1540  return uminmax(vform, dst, src1, src2, true);
1541}
1542
1543
1544LogicVRegister Simulator::umin(VectorFormat vform,
1545                               LogicVRegister dst,
1546                               const LogicVRegister& src1,
1547                               const LogicVRegister& src2) {
1548  return uminmax(vform, dst, src1, src2, false);
1549}
1550
1551
1552LogicVRegister Simulator::uminmaxp(VectorFormat vform,
1553                                   LogicVRegister dst,
1554                                   int dst_index,
1555                                   const LogicVRegister& src,
1556                                   bool max) {
1557  for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
1558    uint64_t src1_val = src.Uint(vform, i);
1559    uint64_t src2_val = src.Uint(vform, i + 1);
1560    uint64_t dst_val;
1561    if (max == true) {
1562      dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1563    } else {
1564      dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1565    }
1566    dst.SetUint(vform, dst_index + (i >> 1), dst_val);
1567  }
1568  return dst;
1569}
1570
1571
1572LogicVRegister Simulator::umaxp(VectorFormat vform,
1573                                LogicVRegister dst,
1574                                const LogicVRegister& src1,
1575                                const LogicVRegister& src2) {
1576  dst.ClearForWrite(vform);
1577  uminmaxp(vform, dst, 0, src1, true);
1578  uminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, true);
1579  return dst;
1580}
1581
1582
1583LogicVRegister Simulator::uminp(VectorFormat vform,
1584                                LogicVRegister dst,
1585                                const LogicVRegister& src1,
1586                                const LogicVRegister& src2) {
1587  dst.ClearForWrite(vform);
1588  uminmaxp(vform, dst, 0, src1, false);
1589  uminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, false);
1590  return dst;
1591}
1592
1593
1594LogicVRegister Simulator::uminmaxv(VectorFormat vform,
1595                                   LogicVRegister dst,
1596                                   const LogicVRegister& src,
1597                                   bool max) {
1598  dst.ClearForWrite(vform);
1599  uint64_t dst_val = max ? 0 : UINT64_MAX;
1600  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1601    dst.SetUint(vform, i, 0);
1602    uint64_t src_val = src.Uint(vform, i);
1603    if (max == true) {
1604      dst_val = (src_val > dst_val) ? src_val : dst_val;
1605    } else {
1606      dst_val = (src_val < dst_val) ? src_val : dst_val;
1607    }
1608  }
1609  dst.SetUint(vform, 0, dst_val);
1610  return dst;
1611}
1612
1613
1614LogicVRegister Simulator::umaxv(VectorFormat vform,
1615                                LogicVRegister dst,
1616                                const LogicVRegister& src) {
1617  uminmaxv(vform, dst, src, true);
1618  return dst;
1619}
1620
1621
1622LogicVRegister Simulator::uminv(VectorFormat vform,
1623                                LogicVRegister dst,
1624                                const LogicVRegister& src) {
1625  uminmaxv(vform, dst, src, false);
1626  return dst;
1627}
1628
1629
1630LogicVRegister Simulator::shl(VectorFormat vform,
1631                              LogicVRegister dst,
1632                              const LogicVRegister& src,
1633                              int shift) {
1634  VIXL_ASSERT(shift >= 0);
1635  SimVRegister temp;
1636  LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1637  return ushl(vform, dst, src, shiftreg);
1638}
1639
1640
1641LogicVRegister Simulator::sshll(VectorFormat vform,
1642                                LogicVRegister dst,
1643                                const LogicVRegister& src,
1644                                int shift) {
1645  VIXL_ASSERT(shift >= 0);
1646  SimVRegister temp1, temp2;
1647  LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1648  LogicVRegister extendedreg = sxtl(vform, temp2, src);
1649  return sshl(vform, dst, extendedreg, shiftreg);
1650}
1651
1652
1653LogicVRegister Simulator::sshll2(VectorFormat vform,
1654                                 LogicVRegister dst,
1655                                 const LogicVRegister& src,
1656                                 int shift) {
1657  VIXL_ASSERT(shift >= 0);
1658  SimVRegister temp1, temp2;
1659  LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1660  LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1661  return sshl(vform, dst, extendedreg, shiftreg);
1662}
1663
1664
1665LogicVRegister Simulator::shll(VectorFormat vform,
1666                               LogicVRegister dst,
1667                               const LogicVRegister& src) {
1668  int shift = LaneSizeInBitsFromFormat(vform) / 2;
1669  return sshll(vform, dst, src, shift);
1670}
1671
1672
1673LogicVRegister Simulator::shll2(VectorFormat vform,
1674                                LogicVRegister dst,
1675                                const LogicVRegister& src) {
1676  int shift = LaneSizeInBitsFromFormat(vform) / 2;
1677  return sshll2(vform, dst, src, shift);
1678}
1679
1680
1681LogicVRegister Simulator::ushll(VectorFormat vform,
1682                                LogicVRegister dst,
1683                                const LogicVRegister& src,
1684                                int shift) {
1685  VIXL_ASSERT(shift >= 0);
1686  SimVRegister temp1, temp2;
1687  LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1688  LogicVRegister extendedreg = uxtl(vform, temp2, src);
1689  return ushl(vform, dst, extendedreg, shiftreg);
1690}
1691
1692
1693LogicVRegister Simulator::ushll2(VectorFormat vform,
1694                                 LogicVRegister dst,
1695                                 const LogicVRegister& src,
1696                                 int shift) {
1697  VIXL_ASSERT(shift >= 0);
1698  SimVRegister temp1, temp2;
1699  LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1700  LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1701  return ushl(vform, dst, extendedreg, shiftreg);
1702}
1703
1704
1705LogicVRegister Simulator::sli(VectorFormat vform,
1706                              LogicVRegister dst,
1707                              const LogicVRegister& src,
1708                              int shift) {
1709  dst.ClearForWrite(vform);
1710  int laneCount = LaneCountFromFormat(vform);
1711  for (int i = 0; i < laneCount; i++) {
1712    uint64_t src_lane = src.Uint(vform, i);
1713    uint64_t dst_lane = dst.Uint(vform, i);
1714    uint64_t shifted = src_lane << shift;
1715    uint64_t mask = MaxUintFromFormat(vform) << shift;
1716    dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1717  }
1718  return dst;
1719}
1720
1721
1722LogicVRegister Simulator::sqshl(VectorFormat vform,
1723                                LogicVRegister dst,
1724                                const LogicVRegister& src,
1725                                int shift) {
1726  VIXL_ASSERT(shift >= 0);
1727  SimVRegister temp;
1728  LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1729  return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1730}
1731
1732
1733LogicVRegister Simulator::uqshl(VectorFormat vform,
1734                                LogicVRegister dst,
1735                                const LogicVRegister& src,
1736                                int shift) {
1737  VIXL_ASSERT(shift >= 0);
1738  SimVRegister temp;
1739  LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1740  return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1741}
1742
1743
1744LogicVRegister Simulator::sqshlu(VectorFormat vform,
1745                                 LogicVRegister dst,
1746                                 const LogicVRegister& src,
1747                                 int shift) {
1748  VIXL_ASSERT(shift >= 0);
1749  SimVRegister temp;
1750  LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1751  return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1752}
1753
1754
1755LogicVRegister Simulator::sri(VectorFormat vform,
1756                              LogicVRegister dst,
1757                              const LogicVRegister& src,
1758                              int shift) {
1759  dst.ClearForWrite(vform);
1760  int laneCount = LaneCountFromFormat(vform);
1761  VIXL_ASSERT((shift > 0) &&
1762              (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1763  for (int i = 0; i < laneCount; i++) {
1764    uint64_t src_lane = src.Uint(vform, i);
1765    uint64_t dst_lane = dst.Uint(vform, i);
1766    uint64_t shifted;
1767    uint64_t mask;
1768    if (shift == 64) {
1769      shifted = 0;
1770      mask = 0;
1771    } else {
1772      shifted = src_lane >> shift;
1773      mask = MaxUintFromFormat(vform) >> shift;
1774    }
1775    dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1776  }
1777  return dst;
1778}
1779
1780
1781LogicVRegister Simulator::ushr(VectorFormat vform,
1782                               LogicVRegister dst,
1783                               const LogicVRegister& src,
1784                               int shift) {
1785  VIXL_ASSERT(shift >= 0);
1786  SimVRegister temp;
1787  LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1788  return ushl(vform, dst, src, shiftreg);
1789}
1790
1791
1792LogicVRegister Simulator::sshr(VectorFormat vform,
1793                               LogicVRegister dst,
1794                               const LogicVRegister& src,
1795                               int shift) {
1796  VIXL_ASSERT(shift >= 0);
1797  SimVRegister temp;
1798  LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1799  return sshl(vform, dst, src, shiftreg);
1800}
1801
1802
1803LogicVRegister Simulator::ssra(VectorFormat vform,
1804                               LogicVRegister dst,
1805                               const LogicVRegister& src,
1806                               int shift) {
1807  SimVRegister temp;
1808  LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1809  return add(vform, dst, dst, shifted_reg);
1810}
1811
1812
1813LogicVRegister Simulator::usra(VectorFormat vform,
1814                               LogicVRegister dst,
1815                               const LogicVRegister& src,
1816                               int shift) {
1817  SimVRegister temp;
1818  LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1819  return add(vform, dst, dst, shifted_reg);
1820}
1821
1822
1823LogicVRegister Simulator::srsra(VectorFormat vform,
1824                                LogicVRegister dst,
1825                                const LogicVRegister& src,
1826                                int shift) {
1827  SimVRegister temp;
1828  LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1829  return add(vform, dst, dst, shifted_reg);
1830}
1831
1832
1833LogicVRegister Simulator::ursra(VectorFormat vform,
1834                                LogicVRegister dst,
1835                                const LogicVRegister& src,
1836                                int shift) {
1837  SimVRegister temp;
1838  LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1839  return add(vform, dst, dst, shifted_reg);
1840}
1841
1842
1843LogicVRegister Simulator::cls(VectorFormat vform,
1844                              LogicVRegister dst,
1845                              const LogicVRegister& src) {
1846  uint64_t result[16];
1847  int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1848  int laneCount = LaneCountFromFormat(vform);
1849  for (int i = 0; i < laneCount; i++) {
1850    result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits);
1851  }
1852
1853  dst.ClearForWrite(vform);
1854  for (int i = 0; i < laneCount; ++i) {
1855    dst.SetUint(vform, i, result[i]);
1856  }
1857  return dst;
1858}
1859
1860
1861LogicVRegister Simulator::clz(VectorFormat vform,
1862                              LogicVRegister dst,
1863                              const LogicVRegister& src) {
1864  uint64_t result[16];
1865  int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1866  int laneCount = LaneCountFromFormat(vform);
1867  for (int i = 0; i < laneCount; i++) {
1868    result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits);
1869  }
1870
1871  dst.ClearForWrite(vform);
1872  for (int i = 0; i < laneCount; ++i) {
1873    dst.SetUint(vform, i, result[i]);
1874  }
1875  return dst;
1876}
1877
1878
1879LogicVRegister Simulator::cnt(VectorFormat vform,
1880                              LogicVRegister dst,
1881                              const LogicVRegister& src) {
1882  uint64_t result[16];
1883  int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1884  int laneCount = LaneCountFromFormat(vform);
1885  for (int i = 0; i < laneCount; i++) {
1886    uint64_t value = src.Uint(vform, i);
1887    result[i] = 0;
1888    for (int j = 0; j < laneSizeInBits; j++) {
1889      result[i] += (value & 1);
1890      value >>= 1;
1891    }
1892  }
1893
1894  dst.ClearForWrite(vform);
1895  for (int i = 0; i < laneCount; ++i) {
1896    dst.SetUint(vform, i, result[i]);
1897  }
1898  return dst;
1899}
1900
1901
1902LogicVRegister Simulator::sshl(VectorFormat vform,
1903                               LogicVRegister dst,
1904                               const LogicVRegister& src1,
1905                               const LogicVRegister& src2) {
1906  dst.ClearForWrite(vform);
1907  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1908    int8_t shift_val = src2.Int(vform, i);
1909    int64_t lj_src_val = src1.IntLeftJustified(vform, i);
1910
1911    // Set signed saturation state.
1912    if ((shift_val > CountLeadingSignBits(lj_src_val)) && (lj_src_val != 0)) {
1913      dst.SetSignedSat(i, lj_src_val >= 0);
1914    }
1915
1916    // Set unsigned saturation state.
1917    if (lj_src_val < 0) {
1918      dst.SetUnsignedSat(i, false);
1919    } else if ((shift_val > CountLeadingZeros(lj_src_val)) &&
1920               (lj_src_val != 0)) {
1921      dst.SetUnsignedSat(i, true);
1922    }
1923
1924    int64_t src_val = src1.Int(vform, i);
1925    if (shift_val > 63) {
1926      dst.SetInt(vform, i, 0);
1927    } else if (shift_val < -63) {
1928      dst.SetRounding(i, src_val < 0);
1929      dst.SetInt(vform, i, (src_val < 0) ? -1 : 0);
1930    } else {
1931      if (shift_val < 0) {
1932        // Set rounding state. Rounding only needed on right shifts.
1933        if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1934          dst.SetRounding(i, true);
1935        }
1936        src_val >>= -shift_val;
1937      } else {
1938        src_val <<= shift_val;
1939      }
1940      dst.SetInt(vform, i, src_val);
1941    }
1942  }
1943  return dst;
1944}
1945
1946
1947LogicVRegister Simulator::ushl(VectorFormat vform,
1948                               LogicVRegister dst,
1949                               const LogicVRegister& src1,
1950                               const LogicVRegister& src2) {
1951  dst.ClearForWrite(vform);
1952  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1953    int8_t shift_val = src2.Int(vform, i);
1954    uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
1955
1956    // Set saturation state.
1957    if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) {
1958      dst.SetUnsignedSat(i, true);
1959    }
1960
1961    uint64_t src_val = src1.Uint(vform, i);
1962    if ((shift_val > 63) || (shift_val < -64)) {
1963      dst.SetUint(vform, i, 0);
1964    } else {
1965      if (shift_val < 0) {
1966        // Set rounding state. Rounding only needed on right shifts.
1967        if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1968          dst.SetRounding(i, true);
1969        }
1970
1971        if (shift_val == -64) {
1972          src_val = 0;
1973        } else {
1974          src_val >>= -shift_val;
1975        }
1976      } else {
1977        src_val <<= shift_val;
1978      }
1979      dst.SetUint(vform, i, src_val);
1980    }
1981  }
1982  return dst;
1983}
1984
1985
1986LogicVRegister Simulator::neg(VectorFormat vform,
1987                              LogicVRegister dst,
1988                              const LogicVRegister& src) {
1989  dst.ClearForWrite(vform);
1990  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1991    // Test for signed saturation.
1992    int64_t sa = src.Int(vform, i);
1993    if (sa == MinIntFromFormat(vform)) {
1994      dst.SetSignedSat(i, true);
1995    }
1996    dst.SetInt(vform, i, -sa);
1997  }
1998  return dst;
1999}
2000
2001
2002LogicVRegister Simulator::suqadd(VectorFormat vform,
2003                                 LogicVRegister dst,
2004                                 const LogicVRegister& src) {
2005  dst.ClearForWrite(vform);
2006  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2007    int64_t sa = dst.IntLeftJustified(vform, i);
2008    uint64_t ub = src.UintLeftJustified(vform, i);
2009    int64_t sr = sa + ub;
2010
2011    if (sr < sa) {  // Test for signed positive saturation.
2012      dst.SetInt(vform, i, MaxIntFromFormat(vform));
2013    } else {
2014      dst.SetInt(vform, i, dst.Int(vform, i) + src.Int(vform, i));
2015    }
2016  }
2017  return dst;
2018}
2019
2020
2021LogicVRegister Simulator::usqadd(VectorFormat vform,
2022                                 LogicVRegister dst,
2023                                 const LogicVRegister& src) {
2024  dst.ClearForWrite(vform);
2025  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2026    uint64_t ua = dst.UintLeftJustified(vform, i);
2027    int64_t sb = src.IntLeftJustified(vform, i);
2028    uint64_t ur = ua + sb;
2029
2030    if ((sb > 0) && (ur <= ua)) {
2031      dst.SetUint(vform, i, MaxUintFromFormat(vform));  // Positive saturation.
2032    } else if ((sb < 0) && (ur >= ua)) {
2033      dst.SetUint(vform, i, 0);  // Negative saturation.
2034    } else {
2035      dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));
2036    }
2037  }
2038  return dst;
2039}
2040
2041
2042LogicVRegister Simulator::abs(VectorFormat vform,
2043                              LogicVRegister dst,
2044                              const LogicVRegister& src) {
2045  dst.ClearForWrite(vform);
2046  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2047    // Test for signed saturation.
2048    int64_t sa = src.Int(vform, i);
2049    if (sa == MinIntFromFormat(vform)) {
2050      dst.SetSignedSat(i, true);
2051    }
2052    if (sa < 0) {
2053      dst.SetInt(vform, i, -sa);
2054    } else {
2055      dst.SetInt(vform, i, sa);
2056    }
2057  }
2058  return dst;
2059}
2060
2061
2062LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
2063                                        LogicVRegister dst,
2064                                        bool dstIsSigned,
2065                                        const LogicVRegister& src,
2066                                        bool srcIsSigned) {
2067  bool upperhalf = false;
2068  VectorFormat srcform = kFormatUndefined;
2069  int64_t ssrc[8];
2070  uint64_t usrc[8];
2071
2072  switch (dstform) {
2073    case kFormat8B:
2074      upperhalf = false;
2075      srcform = kFormat8H;
2076      break;
2077    case kFormat16B:
2078      upperhalf = true;
2079      srcform = kFormat8H;
2080      break;
2081    case kFormat4H:
2082      upperhalf = false;
2083      srcform = kFormat4S;
2084      break;
2085    case kFormat8H:
2086      upperhalf = true;
2087      srcform = kFormat4S;
2088      break;
2089    case kFormat2S:
2090      upperhalf = false;
2091      srcform = kFormat2D;
2092      break;
2093    case kFormat4S:
2094      upperhalf = true;
2095      srcform = kFormat2D;
2096      break;
2097    case kFormatB:
2098      upperhalf = false;
2099      srcform = kFormatH;
2100      break;
2101    case kFormatH:
2102      upperhalf = false;
2103      srcform = kFormatS;
2104      break;
2105    case kFormatS:
2106      upperhalf = false;
2107      srcform = kFormatD;
2108      break;
2109    default:
2110      VIXL_UNIMPLEMENTED();
2111  }
2112
2113  for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2114    ssrc[i] = src.Int(srcform, i);
2115    usrc[i] = src.Uint(srcform, i);
2116  }
2117
2118  int offset;
2119  if (upperhalf) {
2120    offset = LaneCountFromFormat(dstform) / 2;
2121  } else {
2122    offset = 0;
2123    dst.ClearForWrite(dstform);
2124  }
2125
2126  for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2127    // Test for signed saturation
2128    if (ssrc[i] > MaxIntFromFormat(dstform)) {
2129      dst.SetSignedSat(offset + i, true);
2130    } else if (ssrc[i] < MinIntFromFormat(dstform)) {
2131      dst.SetSignedSat(offset + i, false);
2132    }
2133
2134    // Test for unsigned saturation
2135    if (srcIsSigned) {
2136      if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
2137        dst.SetUnsignedSat(offset + i, true);
2138      } else if (ssrc[i] < 0) {
2139        dst.SetUnsignedSat(offset + i, false);
2140      }
2141    } else {
2142      if (usrc[i] > MaxUintFromFormat(dstform)) {
2143        dst.SetUnsignedSat(offset + i, true);
2144      }
2145    }
2146
2147    int64_t result;
2148    if (srcIsSigned) {
2149      result = ssrc[i] & MaxUintFromFormat(dstform);
2150    } else {
2151      result = usrc[i] & MaxUintFromFormat(dstform);
2152    }
2153
2154    if (dstIsSigned) {
2155      dst.SetInt(dstform, offset + i, result);
2156    } else {
2157      dst.SetUint(dstform, offset + i, result);
2158    }
2159  }
2160  return dst;
2161}
2162
2163
2164LogicVRegister Simulator::xtn(VectorFormat vform,
2165                              LogicVRegister dst,
2166                              const LogicVRegister& src) {
2167  return extractnarrow(vform, dst, true, src, true);
2168}
2169
2170
2171LogicVRegister Simulator::sqxtn(VectorFormat vform,
2172                                LogicVRegister dst,
2173                                const LogicVRegister& src) {
2174  return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform);
2175}
2176
2177
2178LogicVRegister Simulator::sqxtun(VectorFormat vform,
2179                                 LogicVRegister dst,
2180                                 const LogicVRegister& src) {
2181  return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
2182}
2183
2184
2185LogicVRegister Simulator::uqxtn(VectorFormat vform,
2186                                LogicVRegister dst,
2187                                const LogicVRegister& src) {
2188  return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
2189}
2190
2191
2192LogicVRegister Simulator::absdiff(VectorFormat vform,
2193                                  LogicVRegister dst,
2194                                  const LogicVRegister& src1,
2195                                  const LogicVRegister& src2,
2196                                  bool issigned) {
2197  dst.ClearForWrite(vform);
2198  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2199    if (issigned) {
2200      int64_t sr = src1.Int(vform, i) - src2.Int(vform, i);
2201      sr = sr > 0 ? sr : -sr;
2202      dst.SetInt(vform, i, sr);
2203    } else {
2204      int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i);
2205      sr = sr > 0 ? sr : -sr;
2206      dst.SetUint(vform, i, sr);
2207    }
2208  }
2209  return dst;
2210}
2211
2212
2213LogicVRegister Simulator::saba(VectorFormat vform,
2214                               LogicVRegister dst,
2215                               const LogicVRegister& src1,
2216                               const LogicVRegister& src2) {
2217  SimVRegister temp;
2218  dst.ClearForWrite(vform);
2219  absdiff(vform, temp, src1, src2, true);
2220  add(vform, dst, dst, temp);
2221  return dst;
2222}
2223
2224
2225LogicVRegister Simulator::uaba(VectorFormat vform,
2226                               LogicVRegister dst,
2227                               const LogicVRegister& src1,
2228                               const LogicVRegister& src2) {
2229  SimVRegister temp;
2230  dst.ClearForWrite(vform);
2231  absdiff(vform, temp, src1, src2, false);
2232  add(vform, dst, dst, temp);
2233  return dst;
2234}
2235
2236
2237LogicVRegister Simulator::not_(VectorFormat vform,
2238                               LogicVRegister dst,
2239                               const LogicVRegister& src) {
2240  dst.ClearForWrite(vform);
2241  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2242    dst.SetUint(vform, i, ~src.Uint(vform, i));
2243  }
2244  return dst;
2245}
2246
2247
2248LogicVRegister Simulator::rbit(VectorFormat vform,
2249                               LogicVRegister dst,
2250                               const LogicVRegister& src) {
2251  uint64_t result[16];
2252  int laneCount = LaneCountFromFormat(vform);
2253  int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
2254  uint64_t reversed_value;
2255  uint64_t value;
2256  for (int i = 0; i < laneCount; i++) {
2257    value = src.Uint(vform, i);
2258    reversed_value = 0;
2259    for (int j = 0; j < laneSizeInBits; j++) {
2260      reversed_value = (reversed_value << 1) | (value & 1);
2261      value >>= 1;
2262    }
2263    result[i] = reversed_value;
2264  }
2265
2266  dst.ClearForWrite(vform);
2267  for (int i = 0; i < laneCount; ++i) {
2268    dst.SetUint(vform, i, result[i]);
2269  }
2270  return dst;
2271}
2272
2273
2274LogicVRegister Simulator::rev(VectorFormat vform,
2275                              LogicVRegister dst,
2276                              const LogicVRegister& src,
2277                              int revSize) {
2278  uint64_t result[16];
2279  int laneCount = LaneCountFromFormat(vform);
2280  int laneSize = LaneSizeInBytesFromFormat(vform);
2281  int lanesPerLoop = revSize / laneSize;
2282  for (int i = 0; i < laneCount; i += lanesPerLoop) {
2283    for (int j = 0; j < lanesPerLoop; j++) {
2284      result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j);
2285    }
2286  }
2287  dst.ClearForWrite(vform);
2288  for (int i = 0; i < laneCount; ++i) {
2289    dst.SetUint(vform, i, result[i]);
2290  }
2291  return dst;
2292}
2293
2294
2295LogicVRegister Simulator::rev16(VectorFormat vform,
2296                                LogicVRegister dst,
2297                                const LogicVRegister& src) {
2298  return rev(vform, dst, src, 2);
2299}
2300
2301
2302LogicVRegister Simulator::rev32(VectorFormat vform,
2303                                LogicVRegister dst,
2304                                const LogicVRegister& src) {
2305  return rev(vform, dst, src, 4);
2306}
2307
2308
2309LogicVRegister Simulator::rev64(VectorFormat vform,
2310                                LogicVRegister dst,
2311                                const LogicVRegister& src) {
2312  return rev(vform, dst, src, 8);
2313}
2314
2315
2316LogicVRegister Simulator::addlp(VectorFormat vform,
2317                                LogicVRegister dst,
2318                                const LogicVRegister& src,
2319                                bool is_signed,
2320                                bool do_accumulate) {
2321  VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
2322
2323  int64_t sr[16];
2324  uint64_t ur[16];
2325
2326  int laneCount = LaneCountFromFormat(vform);
2327  for (int i = 0; i < laneCount; ++i) {
2328    if (is_signed) {
2329      sr[i] = src.Int(vformsrc, 2 * i) + src.Int(vformsrc, 2 * i + 1);
2330    } else {
2331      ur[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
2332    }
2333  }
2334
2335  dst.ClearForWrite(vform);
2336  for (int i = 0; i < laneCount; ++i) {
2337    if (do_accumulate) {
2338      if (is_signed) {
2339        dst.SetInt(vform, i, dst.Int(vform, i) + sr[i]);
2340      } else {
2341        dst.SetUint(vform, i, dst.Uint(vform, i) + ur[i]);
2342      }
2343    } else {
2344      if (is_signed) {
2345        dst.SetInt(vform, i, sr[i]);
2346      } else {
2347        dst.SetUint(vform, i, ur[i]);
2348      }
2349    }
2350  }
2351
2352  return dst;
2353}
2354
2355
2356LogicVRegister Simulator::saddlp(VectorFormat vform,
2357                                 LogicVRegister dst,
2358                                 const LogicVRegister& src) {
2359  return addlp(vform, dst, src, true, false);
2360}
2361
2362
2363LogicVRegister Simulator::uaddlp(VectorFormat vform,
2364                                 LogicVRegister dst,
2365                                 const LogicVRegister& src) {
2366  return addlp(vform, dst, src, false, false);
2367}
2368
2369
2370LogicVRegister Simulator::sadalp(VectorFormat vform,
2371                                 LogicVRegister dst,
2372                                 const LogicVRegister& src) {
2373  return addlp(vform, dst, src, true, true);
2374}
2375
2376
2377LogicVRegister Simulator::uadalp(VectorFormat vform,
2378                                 LogicVRegister dst,
2379                                 const LogicVRegister& src) {
2380  return addlp(vform, dst, src, false, true);
2381}
2382
2383
2384LogicVRegister Simulator::ext(VectorFormat vform,
2385                              LogicVRegister dst,
2386                              const LogicVRegister& src1,
2387                              const LogicVRegister& src2,
2388                              int index) {
2389  uint8_t result[16];
2390  int laneCount = LaneCountFromFormat(vform);
2391  for (int i = 0; i < laneCount - index; ++i) {
2392    result[i] = src1.Uint(vform, i + index);
2393  }
2394  for (int i = 0; i < index; ++i) {
2395    result[laneCount - index + i] = src2.Uint(vform, i);
2396  }
2397  dst.ClearForWrite(vform);
2398  for (int i = 0; i < laneCount; ++i) {
2399    dst.SetUint(vform, i, result[i]);
2400  }
2401  return dst;
2402}
2403
2404
2405LogicVRegister Simulator::dup_element(VectorFormat vform,
2406                                      LogicVRegister dst,
2407                                      const LogicVRegister& src,
2408                                      int src_index) {
2409  int laneCount = LaneCountFromFormat(vform);
2410  uint64_t value = src.Uint(vform, src_index);
2411  dst.ClearForWrite(vform);
2412  for (int i = 0; i < laneCount; ++i) {
2413    dst.SetUint(vform, i, value);
2414  }
2415  return dst;
2416}
2417
2418
2419LogicVRegister Simulator::dup_immediate(VectorFormat vform,
2420                                        LogicVRegister dst,
2421                                        uint64_t imm) {
2422  int laneCount = LaneCountFromFormat(vform);
2423  uint64_t value = imm & MaxUintFromFormat(vform);
2424  dst.ClearForWrite(vform);
2425  for (int i = 0; i < laneCount; ++i) {
2426    dst.SetUint(vform, i, value);
2427  }
2428  return dst;
2429}
2430
2431
2432LogicVRegister Simulator::ins_element(VectorFormat vform,
2433                                      LogicVRegister dst,
2434                                      int dst_index,
2435                                      const LogicVRegister& src,
2436                                      int src_index) {
2437  dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2438  return dst;
2439}
2440
2441
2442LogicVRegister Simulator::ins_immediate(VectorFormat vform,
2443                                        LogicVRegister dst,
2444                                        int dst_index,
2445                                        uint64_t imm) {
2446  uint64_t value = imm & MaxUintFromFormat(vform);
2447  dst.SetUint(vform, dst_index, value);
2448  return dst;
2449}
2450
2451
2452LogicVRegister Simulator::movi(VectorFormat vform,
2453                               LogicVRegister dst,
2454                               uint64_t imm) {
2455  int laneCount = LaneCountFromFormat(vform);
2456  dst.ClearForWrite(vform);
2457  for (int i = 0; i < laneCount; ++i) {
2458    dst.SetUint(vform, i, imm);
2459  }
2460  return dst;
2461}
2462
2463
2464LogicVRegister Simulator::mvni(VectorFormat vform,
2465                               LogicVRegister dst,
2466                               uint64_t imm) {
2467  int laneCount = LaneCountFromFormat(vform);
2468  dst.ClearForWrite(vform);
2469  for (int i = 0; i < laneCount; ++i) {
2470    dst.SetUint(vform, i, ~imm);
2471  }
2472  return dst;
2473}
2474
2475
2476LogicVRegister Simulator::orr(VectorFormat vform,
2477                              LogicVRegister dst,
2478                              const LogicVRegister& src,
2479                              uint64_t imm) {
2480  uint64_t result[16];
2481  int laneCount = LaneCountFromFormat(vform);
2482  for (int i = 0; i < laneCount; ++i) {
2483    result[i] = src.Uint(vform, i) | imm;
2484  }
2485  dst.ClearForWrite(vform);
2486  for (int i = 0; i < laneCount; ++i) {
2487    dst.SetUint(vform, i, result[i]);
2488  }
2489  return dst;
2490}
2491
2492
2493LogicVRegister Simulator::uxtl(VectorFormat vform,
2494                               LogicVRegister dst,
2495                               const LogicVRegister& src) {
2496  VectorFormat vform_half = VectorFormatHalfWidth(vform);
2497
2498  dst.ClearForWrite(vform);
2499  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2500    dst.SetUint(vform, i, src.Uint(vform_half, i));
2501  }
2502  return dst;
2503}
2504
2505
2506LogicVRegister Simulator::sxtl(VectorFormat vform,
2507                               LogicVRegister dst,
2508                               const LogicVRegister& src) {
2509  VectorFormat vform_half = VectorFormatHalfWidth(vform);
2510
2511  dst.ClearForWrite(vform);
2512  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2513    dst.SetInt(vform, i, src.Int(vform_half, i));
2514  }
2515  return dst;
2516}
2517
2518
2519LogicVRegister Simulator::uxtl2(VectorFormat vform,
2520                                LogicVRegister dst,
2521                                const LogicVRegister& src) {
2522  VectorFormat vform_half = VectorFormatHalfWidth(vform);
2523  int lane_count = LaneCountFromFormat(vform);
2524
2525  dst.ClearForWrite(vform);
2526  for (int i = 0; i < lane_count; i++) {
2527    dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));
2528  }
2529  return dst;
2530}
2531
2532
2533LogicVRegister Simulator::sxtl2(VectorFormat vform,
2534                                LogicVRegister dst,
2535                                const LogicVRegister& src) {
2536  VectorFormat vform_half = VectorFormatHalfWidth(vform);
2537  int lane_count = LaneCountFromFormat(vform);
2538
2539  dst.ClearForWrite(vform);
2540  for (int i = 0; i < lane_count; i++) {
2541    dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));
2542  }
2543  return dst;
2544}
2545
2546
2547LogicVRegister Simulator::shrn(VectorFormat vform,
2548                               LogicVRegister dst,
2549                               const LogicVRegister& src,
2550                               int shift) {
2551  SimVRegister temp;
2552  VectorFormat vform_src = VectorFormatDoubleWidth(vform);
2553  VectorFormat vform_dst = vform;
2554  LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
2555  return extractnarrow(vform_dst, dst, false, shifted_src, false);
2556}
2557
2558
2559LogicVRegister Simulator::shrn2(VectorFormat vform,
2560                                LogicVRegister dst,
2561                                const LogicVRegister& src,
2562                                int shift) {
2563  SimVRegister temp;
2564  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2565  VectorFormat vformdst = vform;
2566  LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
2567  return extractnarrow(vformdst, dst, false, shifted_src, false);
2568}
2569
2570
2571LogicVRegister Simulator::rshrn(VectorFormat vform,
2572                                LogicVRegister dst,
2573                                const LogicVRegister& src,
2574                                int shift) {
2575  SimVRegister temp;
2576  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2577  VectorFormat vformdst = vform;
2578  LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2579  return extractnarrow(vformdst, dst, false, shifted_src, false);
2580}
2581
2582
2583LogicVRegister Simulator::rshrn2(VectorFormat vform,
2584                                 LogicVRegister dst,
2585                                 const LogicVRegister& src,
2586                                 int shift) {
2587  SimVRegister temp;
2588  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2589  VectorFormat vformdst = vform;
2590  LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2591  return extractnarrow(vformdst, dst, false, shifted_src, false);
2592}
2593
2594
2595LogicVRegister Simulator::tbl(VectorFormat vform,
2596                              LogicVRegister dst,
2597                              const LogicVRegister& tab,
2598                              const LogicVRegister& ind) {
2599  movi(vform, dst, 0);
2600  return tbx(vform, dst, tab, ind);
2601}
2602
2603
2604LogicVRegister Simulator::tbl(VectorFormat vform,
2605                              LogicVRegister dst,
2606                              const LogicVRegister& tab,
2607                              const LogicVRegister& tab2,
2608                              const LogicVRegister& ind) {
2609  movi(vform, dst, 0);
2610  return tbx(vform, dst, tab, tab2, ind);
2611}
2612
2613
2614LogicVRegister Simulator::tbl(VectorFormat vform,
2615                              LogicVRegister dst,
2616                              const LogicVRegister& tab,
2617                              const LogicVRegister& tab2,
2618                              const LogicVRegister& tab3,
2619                              const LogicVRegister& ind) {
2620  movi(vform, dst, 0);
2621  return tbx(vform, dst, tab, tab2, tab3, ind);
2622}
2623
2624
2625LogicVRegister Simulator::tbl(VectorFormat vform,
2626                              LogicVRegister dst,
2627                              const LogicVRegister& tab,
2628                              const LogicVRegister& tab2,
2629                              const LogicVRegister& tab3,
2630                              const LogicVRegister& tab4,
2631                              const LogicVRegister& ind) {
2632  movi(vform, dst, 0);
2633  return tbx(vform, dst, tab, tab2, tab3, tab4, ind);
2634}
2635
2636
2637LogicVRegister Simulator::tbx(VectorFormat vform,
2638                              LogicVRegister dst,
2639                              const LogicVRegister& tab,
2640                              const LogicVRegister& ind) {
2641  dst.ClearForWrite(vform);
2642  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2643    uint64_t j = ind.Uint(vform, i);
2644    switch (j >> 4) {
2645      case 0:
2646        dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15));
2647        break;
2648    }
2649  }
2650  return dst;
2651}
2652
2653
2654LogicVRegister Simulator::tbx(VectorFormat vform,
2655                              LogicVRegister dst,
2656                              const LogicVRegister& tab,
2657                              const LogicVRegister& tab2,
2658                              const LogicVRegister& ind) {
2659  dst.ClearForWrite(vform);
2660  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2661    uint64_t j = ind.Uint(vform, i);
2662    switch (j >> 4) {
2663      case 0:
2664        dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15));
2665        break;
2666      case 1:
2667        dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15));
2668        break;
2669    }
2670  }
2671  return dst;
2672}
2673
2674
2675LogicVRegister Simulator::tbx(VectorFormat vform,
2676                              LogicVRegister dst,
2677                              const LogicVRegister& tab,
2678                              const LogicVRegister& tab2,
2679                              const LogicVRegister& tab3,
2680                              const LogicVRegister& ind) {
2681  dst.ClearForWrite(vform);
2682  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2683    uint64_t j = ind.Uint(vform, i);
2684    switch (j >> 4) {
2685      case 0:
2686        dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15));
2687        break;
2688      case 1:
2689        dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15));
2690        break;
2691      case 2:
2692        dst.SetUint(vform, i, tab3.Uint(kFormat16B, j & 15));
2693        break;
2694    }
2695  }
2696  return dst;
2697}
2698
2699
2700LogicVRegister Simulator::tbx(VectorFormat vform,
2701                              LogicVRegister dst,
2702                              const LogicVRegister& tab,
2703                              const LogicVRegister& tab2,
2704                              const LogicVRegister& tab3,
2705                              const LogicVRegister& tab4,
2706                              const LogicVRegister& ind) {
2707  dst.ClearForWrite(vform);
2708  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2709    uint64_t j = ind.Uint(vform, i);
2710    switch (j >> 4) {
2711      case 0:
2712        dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15));
2713        break;
2714      case 1:
2715        dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15));
2716        break;
2717      case 2:
2718        dst.SetUint(vform, i, tab3.Uint(kFormat16B, j & 15));
2719        break;
2720      case 3:
2721        dst.SetUint(vform, i, tab4.Uint(kFormat16B, j & 15));
2722        break;
2723    }
2724  }
2725  return dst;
2726}
2727
2728
2729LogicVRegister Simulator::uqshrn(VectorFormat vform,
2730                                 LogicVRegister dst,
2731                                 const LogicVRegister& src,
2732                                 int shift) {
2733  return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
2734}
2735
2736
2737LogicVRegister Simulator::uqshrn2(VectorFormat vform,
2738                                  LogicVRegister dst,
2739                                  const LogicVRegister& src,
2740                                  int shift) {
2741  return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2742}
2743
2744
2745LogicVRegister Simulator::uqrshrn(VectorFormat vform,
2746                                  LogicVRegister dst,
2747                                  const LogicVRegister& src,
2748                                  int shift) {
2749  return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
2750}
2751
2752
2753LogicVRegister Simulator::uqrshrn2(VectorFormat vform,
2754                                   LogicVRegister dst,
2755                                   const LogicVRegister& src,
2756                                   int shift) {
2757  return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2758}
2759
2760
2761LogicVRegister Simulator::sqshrn(VectorFormat vform,
2762                                 LogicVRegister dst,
2763                                 const LogicVRegister& src,
2764                                 int shift) {
2765  SimVRegister temp;
2766  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2767  VectorFormat vformdst = vform;
2768  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2769  return sqxtn(vformdst, dst, shifted_src);
2770}
2771
2772
2773LogicVRegister Simulator::sqshrn2(VectorFormat vform,
2774                                  LogicVRegister dst,
2775                                  const LogicVRegister& src,
2776                                  int shift) {
2777  SimVRegister temp;
2778  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2779  VectorFormat vformdst = vform;
2780  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2781  return sqxtn(vformdst, dst, shifted_src);
2782}
2783
2784
2785LogicVRegister Simulator::sqrshrn(VectorFormat vform,
2786                                  LogicVRegister dst,
2787                                  const LogicVRegister& src,
2788                                  int shift) {
2789  SimVRegister temp;
2790  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2791  VectorFormat vformdst = vform;
2792  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2793  return sqxtn(vformdst, dst, shifted_src);
2794}
2795
2796
2797LogicVRegister Simulator::sqrshrn2(VectorFormat vform,
2798                                   LogicVRegister dst,
2799                                   const LogicVRegister& src,
2800                                   int shift) {
2801  SimVRegister temp;
2802  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2803  VectorFormat vformdst = vform;
2804  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2805  return sqxtn(vformdst, dst, shifted_src);
2806}
2807
2808
2809LogicVRegister Simulator::sqshrun(VectorFormat vform,
2810                                  LogicVRegister dst,
2811                                  const LogicVRegister& src,
2812                                  int shift) {
2813  SimVRegister temp;
2814  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2815  VectorFormat vformdst = vform;
2816  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2817  return sqxtun(vformdst, dst, shifted_src);
2818}
2819
2820
2821LogicVRegister Simulator::sqshrun2(VectorFormat vform,
2822                                   LogicVRegister dst,
2823                                   const LogicVRegister& src,
2824                                   int shift) {
2825  SimVRegister temp;
2826  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2827  VectorFormat vformdst = vform;
2828  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2829  return sqxtun(vformdst, dst, shifted_src);
2830}
2831
2832
2833LogicVRegister Simulator::sqrshrun(VectorFormat vform,
2834                                   LogicVRegister dst,
2835                                   const LogicVRegister& src,
2836                                   int shift) {
2837  SimVRegister temp;
2838  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2839  VectorFormat vformdst = vform;
2840  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2841  return sqxtun(vformdst, dst, shifted_src);
2842}
2843
2844
2845LogicVRegister Simulator::sqrshrun2(VectorFormat vform,
2846                                    LogicVRegister dst,
2847                                    const LogicVRegister& src,
2848                                    int shift) {
2849  SimVRegister temp;
2850  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2851  VectorFormat vformdst = vform;
2852  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2853  return sqxtun(vformdst, dst, shifted_src);
2854}
2855
2856
2857LogicVRegister Simulator::uaddl(VectorFormat vform,
2858                                LogicVRegister dst,
2859                                const LogicVRegister& src1,
2860                                const LogicVRegister& src2) {
2861  SimVRegister temp1, temp2;
2862  uxtl(vform, temp1, src1);
2863  uxtl(vform, temp2, src2);
2864  add(vform, dst, temp1, temp2);
2865  return dst;
2866}
2867
2868
2869LogicVRegister Simulator::uaddl2(VectorFormat vform,
2870                                 LogicVRegister dst,
2871                                 const LogicVRegister& src1,
2872                                 const LogicVRegister& src2) {
2873  SimVRegister temp1, temp2;
2874  uxtl2(vform, temp1, src1);
2875  uxtl2(vform, temp2, src2);
2876  add(vform, dst, temp1, temp2);
2877  return dst;
2878}
2879
2880
2881LogicVRegister Simulator::uaddw(VectorFormat vform,
2882                                LogicVRegister dst,
2883                                const LogicVRegister& src1,
2884                                const LogicVRegister& src2) {
2885  SimVRegister temp;
2886  uxtl(vform, temp, src2);
2887  add(vform, dst, src1, temp);
2888  return dst;
2889}
2890
2891
2892LogicVRegister Simulator::uaddw2(VectorFormat vform,
2893                                 LogicVRegister dst,
2894                                 const LogicVRegister& src1,
2895                                 const LogicVRegister& src2) {
2896  SimVRegister temp;
2897  uxtl2(vform, temp, src2);
2898  add(vform, dst, src1, temp);
2899  return dst;
2900}
2901
2902
2903LogicVRegister Simulator::saddl(VectorFormat vform,
2904                                LogicVRegister dst,
2905                                const LogicVRegister& src1,
2906                                const LogicVRegister& src2) {
2907  SimVRegister temp1, temp2;
2908  sxtl(vform, temp1, src1);
2909  sxtl(vform, temp2, src2);
2910  add(vform, dst, temp1, temp2);
2911  return dst;
2912}
2913
2914
2915LogicVRegister Simulator::saddl2(VectorFormat vform,
2916                                 LogicVRegister dst,
2917                                 const LogicVRegister& src1,
2918                                 const LogicVRegister& src2) {
2919  SimVRegister temp1, temp2;
2920  sxtl2(vform, temp1, src1);
2921  sxtl2(vform, temp2, src2);
2922  add(vform, dst, temp1, temp2);
2923  return dst;
2924}
2925
2926
2927LogicVRegister Simulator::saddw(VectorFormat vform,
2928                                LogicVRegister dst,
2929                                const LogicVRegister& src1,
2930                                const LogicVRegister& src2) {
2931  SimVRegister temp;
2932  sxtl(vform, temp, src2);
2933  add(vform, dst, src1, temp);
2934  return dst;
2935}
2936
2937
2938LogicVRegister Simulator::saddw2(VectorFormat vform,
2939                                 LogicVRegister dst,
2940                                 const LogicVRegister& src1,
2941                                 const LogicVRegister& src2) {
2942  SimVRegister temp;
2943  sxtl2(vform, temp, src2);
2944  add(vform, dst, src1, temp);
2945  return dst;
2946}
2947
2948
2949LogicVRegister Simulator::usubl(VectorFormat vform,
2950                                LogicVRegister dst,
2951                                const LogicVRegister& src1,
2952                                const LogicVRegister& src2) {
2953  SimVRegister temp1, temp2;
2954  uxtl(vform, temp1, src1);
2955  uxtl(vform, temp2, src2);
2956  sub(vform, dst, temp1, temp2);
2957  return dst;
2958}
2959
2960
2961LogicVRegister Simulator::usubl2(VectorFormat vform,
2962                                 LogicVRegister dst,
2963                                 const LogicVRegister& src1,
2964                                 const LogicVRegister& src2) {
2965  SimVRegister temp1, temp2;
2966  uxtl2(vform, temp1, src1);
2967  uxtl2(vform, temp2, src2);
2968  sub(vform, dst, temp1, temp2);
2969  return dst;
2970}
2971
2972
2973LogicVRegister Simulator::usubw(VectorFormat vform,
2974                                LogicVRegister dst,
2975                                const LogicVRegister& src1,
2976                                const LogicVRegister& src2) {
2977  SimVRegister temp;
2978  uxtl(vform, temp, src2);
2979  sub(vform, dst, src1, temp);
2980  return dst;
2981}
2982
2983
2984LogicVRegister Simulator::usubw2(VectorFormat vform,
2985                                 LogicVRegister dst,
2986                                 const LogicVRegister& src1,
2987                                 const LogicVRegister& src2) {
2988  SimVRegister temp;
2989  uxtl2(vform, temp, src2);
2990  sub(vform, dst, src1, temp);
2991  return dst;
2992}
2993
2994
2995LogicVRegister Simulator::ssubl(VectorFormat vform,
2996                                LogicVRegister dst,
2997                                const LogicVRegister& src1,
2998                                const LogicVRegister& src2) {
2999  SimVRegister temp1, temp2;
3000  sxtl(vform, temp1, src1);
3001  sxtl(vform, temp2, src2);
3002  sub(vform, dst, temp1, temp2);
3003  return dst;
3004}
3005
3006
3007LogicVRegister Simulator::ssubl2(VectorFormat vform,
3008                                 LogicVRegister dst,
3009                                 const LogicVRegister& src1,
3010                                 const LogicVRegister& src2) {
3011  SimVRegister temp1, temp2;
3012  sxtl2(vform, temp1, src1);
3013  sxtl2(vform, temp2, src2);
3014  sub(vform, dst, temp1, temp2);
3015  return dst;
3016}
3017
3018
3019LogicVRegister Simulator::ssubw(VectorFormat vform,
3020                                LogicVRegister dst,
3021                                const LogicVRegister& src1,
3022                                const LogicVRegister& src2) {
3023  SimVRegister temp;
3024  sxtl(vform, temp, src2);
3025  sub(vform, dst, src1, temp);
3026  return dst;
3027}
3028
3029
3030LogicVRegister Simulator::ssubw2(VectorFormat vform,
3031                                 LogicVRegister dst,
3032                                 const LogicVRegister& src1,
3033                                 const LogicVRegister& src2) {
3034  SimVRegister temp;
3035  sxtl2(vform, temp, src2);
3036  sub(vform, dst, src1, temp);
3037  return dst;
3038}
3039
3040
3041LogicVRegister Simulator::uabal(VectorFormat vform,
3042                                LogicVRegister dst,
3043                                const LogicVRegister& src1,
3044                                const LogicVRegister& src2) {
3045  SimVRegister temp1, temp2;
3046  uxtl(vform, temp1, src1);
3047  uxtl(vform, temp2, src2);
3048  uaba(vform, dst, temp1, temp2);
3049  return dst;
3050}
3051
3052
3053LogicVRegister Simulator::uabal2(VectorFormat vform,
3054                                 LogicVRegister dst,
3055                                 const LogicVRegister& src1,
3056                                 const LogicVRegister& src2) {
3057  SimVRegister temp1, temp2;
3058  uxtl2(vform, temp1, src1);
3059  uxtl2(vform, temp2, src2);
3060  uaba(vform, dst, temp1, temp2);
3061  return dst;
3062}
3063
3064
3065LogicVRegister Simulator::sabal(VectorFormat vform,
3066                                LogicVRegister dst,
3067                                const LogicVRegister& src1,
3068                                const LogicVRegister& src2) {
3069  SimVRegister temp1, temp2;
3070  sxtl(vform, temp1, src1);
3071  sxtl(vform, temp2, src2);
3072  saba(vform, dst, temp1, temp2);
3073  return dst;
3074}
3075
3076
3077LogicVRegister Simulator::sabal2(VectorFormat vform,
3078                                 LogicVRegister dst,
3079                                 const LogicVRegister& src1,
3080                                 const LogicVRegister& src2) {
3081  SimVRegister temp1, temp2;
3082  sxtl2(vform, temp1, src1);
3083  sxtl2(vform, temp2, src2);
3084  saba(vform, dst, temp1, temp2);
3085  return dst;
3086}
3087
3088
3089LogicVRegister Simulator::uabdl(VectorFormat vform,
3090                                LogicVRegister dst,
3091                                const LogicVRegister& src1,
3092                                const LogicVRegister& src2) {
3093  SimVRegister temp1, temp2;
3094  uxtl(vform, temp1, src1);
3095  uxtl(vform, temp2, src2);
3096  absdiff(vform, dst, temp1, temp2, false);
3097  return dst;
3098}
3099
3100
3101LogicVRegister Simulator::uabdl2(VectorFormat vform,
3102                                 LogicVRegister dst,
3103                                 const LogicVRegister& src1,
3104                                 const LogicVRegister& src2) {
3105  SimVRegister temp1, temp2;
3106  uxtl2(vform, temp1, src1);
3107  uxtl2(vform, temp2, src2);
3108  absdiff(vform, dst, temp1, temp2, false);
3109  return dst;
3110}
3111
3112
3113LogicVRegister Simulator::sabdl(VectorFormat vform,
3114                                LogicVRegister dst,
3115                                const LogicVRegister& src1,
3116                                const LogicVRegister& src2) {
3117  SimVRegister temp1, temp2;
3118  sxtl(vform, temp1, src1);
3119  sxtl(vform, temp2, src2);
3120  absdiff(vform, dst, temp1, temp2, true);
3121  return dst;
3122}
3123
3124
3125LogicVRegister Simulator::sabdl2(VectorFormat vform,
3126                                 LogicVRegister dst,
3127                                 const LogicVRegister& src1,
3128                                 const LogicVRegister& src2) {
3129  SimVRegister temp1, temp2;
3130  sxtl2(vform, temp1, src1);
3131  sxtl2(vform, temp2, src2);
3132  absdiff(vform, dst, temp1, temp2, true);
3133  return dst;
3134}
3135
3136
3137LogicVRegister Simulator::umull(VectorFormat vform,
3138                                LogicVRegister dst,
3139                                const LogicVRegister& src1,
3140                                const LogicVRegister& src2) {
3141  SimVRegister temp1, temp2;
3142  uxtl(vform, temp1, src1);
3143  uxtl(vform, temp2, src2);
3144  mul(vform, dst, temp1, temp2);
3145  return dst;
3146}
3147
3148
3149LogicVRegister Simulator::umull2(VectorFormat vform,
3150                                 LogicVRegister dst,
3151                                 const LogicVRegister& src1,
3152                                 const LogicVRegister& src2) {
3153  SimVRegister temp1, temp2;
3154  uxtl2(vform, temp1, src1);
3155  uxtl2(vform, temp2, src2);
3156  mul(vform, dst, temp1, temp2);
3157  return dst;
3158}
3159
3160
3161LogicVRegister Simulator::smull(VectorFormat vform,
3162                                LogicVRegister dst,
3163                                const LogicVRegister& src1,
3164                                const LogicVRegister& src2) {
3165  SimVRegister temp1, temp2;
3166  sxtl(vform, temp1, src1);
3167  sxtl(vform, temp2, src2);
3168  mul(vform, dst, temp1, temp2);
3169  return dst;
3170}
3171
3172
3173LogicVRegister Simulator::smull2(VectorFormat vform,
3174                                 LogicVRegister dst,
3175                                 const LogicVRegister& src1,
3176                                 const LogicVRegister& src2) {
3177  SimVRegister temp1, temp2;
3178  sxtl2(vform, temp1, src1);
3179  sxtl2(vform, temp2, src2);
3180  mul(vform, dst, temp1, temp2);
3181  return dst;
3182}
3183
3184
3185LogicVRegister Simulator::umlsl(VectorFormat vform,
3186                                LogicVRegister dst,
3187                                const LogicVRegister& src1,
3188                                const LogicVRegister& src2) {
3189  SimVRegister temp1, temp2;
3190  uxtl(vform, temp1, src1);
3191  uxtl(vform, temp2, src2);
3192  mls(vform, dst, temp1, temp2);
3193  return dst;
3194}
3195
3196
3197LogicVRegister Simulator::umlsl2(VectorFormat vform,
3198                                 LogicVRegister dst,
3199                                 const LogicVRegister& src1,
3200                                 const LogicVRegister& src2) {
3201  SimVRegister temp1, temp2;
3202  uxtl2(vform, temp1, src1);
3203  uxtl2(vform, temp2, src2);
3204  mls(vform, dst, temp1, temp2);
3205  return dst;
3206}
3207
3208
3209LogicVRegister Simulator::smlsl(VectorFormat vform,
3210                                LogicVRegister dst,
3211                                const LogicVRegister& src1,
3212                                const LogicVRegister& src2) {
3213  SimVRegister temp1, temp2;
3214  sxtl(vform, temp1, src1);
3215  sxtl(vform, temp2, src2);
3216  mls(vform, dst, temp1, temp2);
3217  return dst;
3218}
3219
3220
3221LogicVRegister Simulator::smlsl2(VectorFormat vform,
3222                                 LogicVRegister dst,
3223                                 const LogicVRegister& src1,
3224                                 const LogicVRegister& src2) {
3225  SimVRegister temp1, temp2;
3226  sxtl2(vform, temp1, src1);
3227  sxtl2(vform, temp2, src2);
3228  mls(vform, dst, temp1, temp2);
3229  return dst;
3230}
3231
3232
3233LogicVRegister Simulator::umlal(VectorFormat vform,
3234                                LogicVRegister dst,
3235                                const LogicVRegister& src1,
3236                                const LogicVRegister& src2) {
3237  SimVRegister temp1, temp2;
3238  uxtl(vform, temp1, src1);
3239  uxtl(vform, temp2, src2);
3240  mla(vform, dst, temp1, temp2);
3241  return dst;
3242}
3243
3244
3245LogicVRegister Simulator::umlal2(VectorFormat vform,
3246                                 LogicVRegister dst,
3247                                 const LogicVRegister& src1,
3248                                 const LogicVRegister& src2) {
3249  SimVRegister temp1, temp2;
3250  uxtl2(vform, temp1, src1);
3251  uxtl2(vform, temp2, src2);
3252  mla(vform, dst, temp1, temp2);
3253  return dst;
3254}
3255
3256
3257LogicVRegister Simulator::smlal(VectorFormat vform,
3258                                LogicVRegister dst,
3259                                const LogicVRegister& src1,
3260                                const LogicVRegister& src2) {
3261  SimVRegister temp1, temp2;
3262  sxtl(vform, temp1, src1);
3263  sxtl(vform, temp2, src2);
3264  mla(vform, dst, temp1, temp2);
3265  return dst;
3266}
3267
3268
3269LogicVRegister Simulator::smlal2(VectorFormat vform,
3270                                 LogicVRegister dst,
3271                                 const LogicVRegister& src1,
3272                                 const LogicVRegister& src2) {
3273  SimVRegister temp1, temp2;
3274  sxtl2(vform, temp1, src1);
3275  sxtl2(vform, temp2, src2);
3276  mla(vform, dst, temp1, temp2);
3277  return dst;
3278}
3279
3280
3281LogicVRegister Simulator::sqdmlal(VectorFormat vform,
3282                                  LogicVRegister dst,
3283                                  const LogicVRegister& src1,
3284                                  const LogicVRegister& src2) {
3285  SimVRegister temp;
3286  LogicVRegister product = sqdmull(vform, temp, src1, src2);
3287  return add(vform, dst, dst, product).SignedSaturate(vform);
3288}
3289
3290
3291LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
3292                                   LogicVRegister dst,
3293                                   const LogicVRegister& src1,
3294                                   const LogicVRegister& src2) {
3295  SimVRegister temp;
3296  LogicVRegister product = sqdmull2(vform, temp, src1, src2);
3297  return add(vform, dst, dst, product).SignedSaturate(vform);
3298}
3299
3300
3301LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
3302                                  LogicVRegister dst,
3303                                  const LogicVRegister& src1,
3304                                  const LogicVRegister& src2) {
3305  SimVRegister temp;
3306  LogicVRegister product = sqdmull(vform, temp, src1, src2);
3307  return sub(vform, dst, dst, product).SignedSaturate(vform);
3308}
3309
3310
3311LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
3312                                   LogicVRegister dst,
3313                                   const LogicVRegister& src1,
3314                                   const LogicVRegister& src2) {
3315  SimVRegister temp;
3316  LogicVRegister product = sqdmull2(vform, temp, src1, src2);
3317  return sub(vform, dst, dst, product).SignedSaturate(vform);
3318}
3319
3320
3321LogicVRegister Simulator::sqdmull(VectorFormat vform,
3322                                  LogicVRegister dst,
3323                                  const LogicVRegister& src1,
3324                                  const LogicVRegister& src2) {
3325  SimVRegister temp;
3326  LogicVRegister product = smull(vform, temp, src1, src2);
3327  return add(vform, dst, product, product).SignedSaturate(vform);
3328}
3329
3330
3331LogicVRegister Simulator::sqdmull2(VectorFormat vform,
3332                                   LogicVRegister dst,
3333                                   const LogicVRegister& src1,
3334                                   const LogicVRegister& src2) {
3335  SimVRegister temp;
3336  LogicVRegister product = smull2(vform, temp, src1, src2);
3337  return add(vform, dst, product, product).SignedSaturate(vform);
3338}
3339
3340
3341LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
3342                                   LogicVRegister dst,
3343                                   const LogicVRegister& src1,
3344                                   const LogicVRegister& src2,
3345                                   bool round) {
3346  // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
3347  // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
3348  // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.
3349
3350  int esize = LaneSizeInBitsFromFormat(vform);
3351  int round_const = round ? (1 << (esize - 2)) : 0;
3352  int64_t product;
3353
3354  dst.ClearForWrite(vform);
3355  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3356    product = src1.Int(vform, i) * src2.Int(vform, i);
3357    product += round_const;
3358    product = product >> (esize - 1);
3359
3360    if (product > MaxIntFromFormat(vform)) {
3361      product = MaxIntFromFormat(vform);
3362    } else if (product < MinIntFromFormat(vform)) {
3363      product = MinIntFromFormat(vform);
3364    }
3365    dst.SetInt(vform, i, product);
3366  }
3367  return dst;
3368}
3369
3370
3371LogicVRegister Simulator::sqdmulh(VectorFormat vform,
3372                                  LogicVRegister dst,
3373                                  const LogicVRegister& src1,
3374                                  const LogicVRegister& src2) {
3375  return sqrdmulh(vform, dst, src1, src2, false);
3376}
3377
3378
3379LogicVRegister Simulator::addhn(VectorFormat vform,
3380                                LogicVRegister dst,
3381                                const LogicVRegister& src1,
3382                                const LogicVRegister& src2) {
3383  SimVRegister temp;
3384  add(VectorFormatDoubleWidth(vform), temp, src1, src2);
3385  shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3386  return dst;
3387}
3388
3389
3390LogicVRegister Simulator::addhn2(VectorFormat vform,
3391                                 LogicVRegister dst,
3392                                 const LogicVRegister& src1,
3393                                 const LogicVRegister& src2) {
3394  SimVRegister temp;
3395  add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3396  shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3397  return dst;
3398}
3399
3400
3401LogicVRegister Simulator::raddhn(VectorFormat vform,
3402                                 LogicVRegister dst,
3403                                 const LogicVRegister& src1,
3404                                 const LogicVRegister& src2) {
3405  SimVRegister temp;
3406  add(VectorFormatDoubleWidth(vform), temp, src1, src2);
3407  rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3408  return dst;
3409}
3410
3411
3412LogicVRegister Simulator::raddhn2(VectorFormat vform,
3413                                  LogicVRegister dst,
3414                                  const LogicVRegister& src1,
3415                                  const LogicVRegister& src2) {
3416  SimVRegister temp;
3417  add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3418  rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3419  return dst;
3420}
3421
3422
3423LogicVRegister Simulator::subhn(VectorFormat vform,
3424                                LogicVRegister dst,
3425                                const LogicVRegister& src1,
3426                                const LogicVRegister& src2) {
3427  SimVRegister temp;
3428  sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
3429  shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3430  return dst;
3431}
3432
3433
3434LogicVRegister Simulator::subhn2(VectorFormat vform,
3435                                 LogicVRegister dst,
3436                                 const LogicVRegister& src1,
3437                                 const LogicVRegister& src2) {
3438  SimVRegister temp;
3439  sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3440  shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3441  return dst;
3442}
3443
3444
3445LogicVRegister Simulator::rsubhn(VectorFormat vform,
3446                                 LogicVRegister dst,
3447                                 const LogicVRegister& src1,
3448                                 const LogicVRegister& src2) {
3449  SimVRegister temp;
3450  sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
3451  rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3452  return dst;
3453}
3454
3455
3456LogicVRegister Simulator::rsubhn2(VectorFormat vform,
3457                                  LogicVRegister dst,
3458                                  const LogicVRegister& src1,
3459                                  const LogicVRegister& src2) {
3460  SimVRegister temp;
3461  sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3462  rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3463  return dst;
3464}
3465
3466
3467LogicVRegister Simulator::trn1(VectorFormat vform,
3468                               LogicVRegister dst,
3469                               const LogicVRegister& src1,
3470                               const LogicVRegister& src2) {
3471  uint64_t result[16];
3472  int laneCount = LaneCountFromFormat(vform);
3473  int pairs = laneCount / 2;
3474  for (int i = 0; i < pairs; ++i) {
3475    result[2 * i] = src1.Uint(vform, 2 * i);
3476    result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
3477  }
3478
3479  dst.ClearForWrite(vform);
3480  for (int i = 0; i < laneCount; ++i) {
3481    dst.SetUint(vform, i, result[i]);
3482  }
3483  return dst;
3484}
3485
3486
3487LogicVRegister Simulator::trn2(VectorFormat vform,
3488                               LogicVRegister dst,
3489                               const LogicVRegister& src1,
3490                               const LogicVRegister& src2) {
3491  uint64_t result[16];
3492  int laneCount = LaneCountFromFormat(vform);
3493  int pairs = laneCount / 2;
3494  for (int i = 0; i < pairs; ++i) {
3495    result[2 * i] = src1.Uint(vform, (2 * i) + 1);
3496    result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
3497  }
3498
3499  dst.ClearForWrite(vform);
3500  for (int i = 0; i < laneCount; ++i) {
3501    dst.SetUint(vform, i, result[i]);
3502  }
3503  return dst;
3504}
3505
3506
3507LogicVRegister Simulator::zip1(VectorFormat vform,
3508                               LogicVRegister dst,
3509                               const LogicVRegister& src1,
3510                               const LogicVRegister& src2) {
3511  uint64_t result[16];
3512  int laneCount = LaneCountFromFormat(vform);
3513  int pairs = laneCount / 2;
3514  for (int i = 0; i < pairs; ++i) {
3515    result[2 * i] = src1.Uint(vform, i);
3516    result[(2 * i) + 1] = src2.Uint(vform, i);
3517  }
3518
3519  dst.ClearForWrite(vform);
3520  for (int i = 0; i < laneCount; ++i) {
3521    dst.SetUint(vform, i, result[i]);
3522  }
3523  return dst;
3524}
3525
3526
3527LogicVRegister Simulator::zip2(VectorFormat vform,
3528                               LogicVRegister dst,
3529                               const LogicVRegister& src1,
3530                               const LogicVRegister& src2) {
3531  uint64_t result[16];
3532  int laneCount = LaneCountFromFormat(vform);
3533  int pairs = laneCount / 2;
3534  for (int i = 0; i < pairs; ++i) {
3535    result[2 * i] = src1.Uint(vform, pairs + i);
3536    result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
3537  }
3538
3539  dst.ClearForWrite(vform);
3540  for (int i = 0; i < laneCount; ++i) {
3541    dst.SetUint(vform, i, result[i]);
3542  }
3543  return dst;
3544}
3545
3546
3547LogicVRegister Simulator::uzp1(VectorFormat vform,
3548                               LogicVRegister dst,
3549                               const LogicVRegister& src1,
3550                               const LogicVRegister& src2) {
3551  uint64_t result[32];
3552  int laneCount = LaneCountFromFormat(vform);
3553  for (int i = 0; i < laneCount; ++i) {
3554    result[i] = src1.Uint(vform, i);
3555    result[laneCount + i] = src2.Uint(vform, i);
3556  }
3557
3558  dst.ClearForWrite(vform);
3559  for (int i = 0; i < laneCount; ++i) {
3560    dst.SetUint(vform, i, result[2 * i]);
3561  }
3562  return dst;
3563}
3564
3565
3566LogicVRegister Simulator::uzp2(VectorFormat vform,
3567                               LogicVRegister dst,
3568                               const LogicVRegister& src1,
3569                               const LogicVRegister& src2) {
3570  uint64_t result[32];
3571  int laneCount = LaneCountFromFormat(vform);
3572  for (int i = 0; i < laneCount; ++i) {
3573    result[i] = src1.Uint(vform, i);
3574    result[laneCount + i] = src2.Uint(vform, i);
3575  }
3576
3577  dst.ClearForWrite(vform);
3578  for (int i = 0; i < laneCount; ++i) {
3579    dst.SetUint(vform, i, result[(2 * i) + 1]);
3580  }
3581  return dst;
3582}
3583
3584
3585template <typename T>
3586T Simulator::FPAdd(T op1, T op2) {
3587  T result = FPProcessNaNs(op1, op2);
3588  if (std::isnan(result)) return result;
3589
3590  if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) {
3591    // inf + -inf returns the default NaN.
3592    FPProcessException();
3593    return FPDefaultNaN<T>();
3594  } else {
3595    // Other cases should be handled by standard arithmetic.
3596    return op1 + op2;
3597  }
3598}
3599
3600
3601template <typename T>
3602T Simulator::FPSub(T op1, T op2) {
3603  // NaNs should be handled elsewhere.
3604  VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
3605
3606  if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) {
3607    // inf - inf returns the default NaN.
3608    FPProcessException();
3609    return FPDefaultNaN<T>();
3610  } else {
3611    // Other cases should be handled by standard arithmetic.
3612    return op1 - op2;
3613  }
3614}
3615
3616
3617template <typename T>
3618T Simulator::FPMul(T op1, T op2) {
3619  // NaNs should be handled elsewhere.
3620  VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
3621
3622  if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
3623    // inf * 0.0 returns the default NaN.
3624    FPProcessException();
3625    return FPDefaultNaN<T>();
3626  } else {
3627    // Other cases should be handled by standard arithmetic.
3628    return op1 * op2;
3629  }
3630}
3631
3632
3633template <typename T>
3634T Simulator::FPMulx(T op1, T op2) {
3635  if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
3636    // inf * 0.0 returns +/-2.0.
3637    T two = 2.0;
3638    return copysign(1.0, op1) * copysign(1.0, op2) * two;
3639  }
3640  return FPMul(op1, op2);
3641}
3642
3643
3644template <typename T>
3645T Simulator::FPMulAdd(T a, T op1, T op2) {
3646  T result = FPProcessNaNs3(a, op1, op2);
3647
3648  T sign_a = copysign(1.0, a);
3649  T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
3650  bool isinf_prod = std::isinf(op1) || std::isinf(op2);
3651  bool operation_generates_nan =
3652      (std::isinf(op1) && (op2 == 0.0)) ||                     // inf * 0.0
3653      (std::isinf(op2) && (op1 == 0.0)) ||                     // 0.0 * inf
3654      (std::isinf(a) && isinf_prod && (sign_a != sign_prod));  // inf - inf
3655
3656  if (std::isnan(result)) {
3657    // Generated NaNs override quiet NaNs propagated from a.
3658    if (operation_generates_nan && IsQuietNaN(a)) {
3659      FPProcessException();
3660      return FPDefaultNaN<T>();
3661    } else {
3662      return result;
3663    }
3664  }
3665
3666  // If the operation would produce a NaN, return the default NaN.
3667  if (operation_generates_nan) {
3668    FPProcessException();
3669    return FPDefaultNaN<T>();
3670  }
3671
3672  // Work around broken fma implementations for exact zero results: The sign of
3673  // exact 0.0 results is positive unless both a and op1 * op2 are negative.
3674  if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
3675    return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0;
3676  }
3677
3678  result = FusedMultiplyAdd(op1, op2, a);
3679  VIXL_ASSERT(!std::isnan(result));
3680
3681  // Work around broken fma implementations for rounded zero results: If a is
3682  // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
3683  if ((a == 0.0) && (result == 0.0)) {
3684    return copysign(0.0, sign_prod);
3685  }
3686
3687  return result;
3688}
3689
3690
3691template <typename T>
3692T Simulator::FPDiv(T op1, T op2) {
3693  // NaNs should be handled elsewhere.
3694  VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
3695
3696  if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
3697    // inf / inf and 0.0 / 0.0 return the default NaN.
3698    FPProcessException();
3699    return FPDefaultNaN<T>();
3700  } else {
3701    if (op2 == 0.0) FPProcessException();
3702
3703    // Other cases should be handled by standard arithmetic.
3704    return op1 / op2;
3705  }
3706}
3707
3708
3709template <typename T>
3710T Simulator::FPSqrt(T op) {
3711  if (std::isnan(op)) {
3712    return FPProcessNaN(op);
3713  } else if (op < 0.0) {
3714    FPProcessException();
3715    return FPDefaultNaN<T>();
3716  } else {
3717    return sqrt(op);
3718  }
3719}
3720
3721
3722template <typename T>
3723T Simulator::FPMax(T a, T b) {
3724  T result = FPProcessNaNs(a, b);
3725  if (std::isnan(result)) return result;
3726
3727  if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
3728    // a and b are zero, and the sign differs: return +0.0.
3729    return 0.0;
3730  } else {
3731    return (a > b) ? a : b;
3732  }
3733}
3734
3735
3736template <typename T>
3737T Simulator::FPMaxNM(T a, T b) {
3738  if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3739    a = kFP64NegativeInfinity;
3740  } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3741    b = kFP64NegativeInfinity;
3742  }
3743
3744  T result = FPProcessNaNs(a, b);
3745  return std::isnan(result) ? result : FPMax(a, b);
3746}
3747
3748
3749template <typename T>
3750T Simulator::FPMin(T a, T b) {
3751  T result = FPProcessNaNs(a, b);
3752  if (std::isnan(result)) return result;
3753
3754  if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
3755    // a and b are zero, and the sign differs: return -0.0.
3756    return -0.0;
3757  } else {
3758    return (a < b) ? a : b;
3759  }
3760}
3761
3762
3763template <typename T>
3764T Simulator::FPMinNM(T a, T b) {
3765  if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3766    a = kFP64PositiveInfinity;
3767  } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3768    b = kFP64PositiveInfinity;
3769  }
3770
3771  T result = FPProcessNaNs(a, b);
3772  return std::isnan(result) ? result : FPMin(a, b);
3773}
3774
3775
3776template <typename T>
3777T Simulator::FPRecipStepFused(T op1, T op2) {
3778  const T two = 2.0;
3779  if ((std::isinf(op1) && (op2 == 0.0)) ||
3780      ((op1 == 0.0) && (std::isinf(op2)))) {
3781    return two;
3782  } else if (std::isinf(op1) || std::isinf(op2)) {
3783    // Return +inf if signs match, otherwise -inf.
3784    return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3785                                          : kFP64NegativeInfinity;
3786  } else {
3787    return FusedMultiplyAdd(op1, op2, two);
3788  }
3789}
3790
3791
3792template <typename T>
3793T Simulator::FPRSqrtStepFused(T op1, T op2) {
3794  const T one_point_five = 1.5;
3795  const T two = 2.0;
3796
3797  if ((std::isinf(op1) && (op2 == 0.0)) ||
3798      ((op1 == 0.0) && (std::isinf(op2)))) {
3799    return one_point_five;
3800  } else if (std::isinf(op1) || std::isinf(op2)) {
3801    // Return +inf if signs match, otherwise -inf.
3802    return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3803                                          : kFP64NegativeInfinity;
3804  } else {
3805    // The multiply-add-halve operation must be fully fused, so avoid interim
3806    // rounding by checking which operand can be losslessly divided by two
3807    // before doing the multiply-add.
3808    if (std::isnormal(op1 / two)) {
3809      return FusedMultiplyAdd(op1 / two, op2, one_point_five);
3810    } else if (std::isnormal(op2 / two)) {
3811      return FusedMultiplyAdd(op1, op2 / two, one_point_five);
3812    } else {
3813      // Neither operand is normal after halving: the result is dominated by
3814      // the addition term, so just return that.
3815      return one_point_five;
3816    }
3817  }
3818}
3819
3820
3821double Simulator::FPRoundInt(double value, FPRounding round_mode) {
3822  if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
3823      (value == kFP64NegativeInfinity)) {
3824    return value;
3825  } else if (std::isnan(value)) {
3826    return FPProcessNaN(value);
3827  }
3828
3829  double int_result = std::floor(value);
3830  double error = value - int_result;
3831  switch (round_mode) {
3832    case FPTieAway: {
3833      // Take care of correctly handling the range ]-0.5, -0.0], which must
3834      // yield -0.0.
3835      if ((-0.5 < value) && (value < 0.0)) {
3836        int_result = -0.0;
3837
3838      } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
3839        // If the error is greater than 0.5, or is equal to 0.5 and the integer
3840        // result is positive, round up.
3841        int_result++;
3842      }
3843      break;
3844    }
3845    case FPTieEven: {
3846      // Take care of correctly handling the range [-0.5, -0.0], which must
3847      // yield -0.0.
3848      if ((-0.5 <= value) && (value < 0.0)) {
3849        int_result = -0.0;
3850
3851        // If the error is greater than 0.5, or is equal to 0.5 and the integer
3852        // result is odd, round up.
3853      } else if ((error > 0.5) ||
3854                 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
3855        int_result++;
3856      }
3857      break;
3858    }
3859    case FPZero: {
3860      // If value>0 then we take floor(value)
3861      // otherwise, ceil(value).
3862      if (value < 0) {
3863        int_result = ceil(value);
3864      }
3865      break;
3866    }
3867    case FPNegativeInfinity: {
3868      // We always use floor(value).
3869      break;
3870    }
3871    case FPPositiveInfinity: {
3872      // Take care of correctly handling the range ]-1.0, -0.0], which must
3873      // yield -0.0.
3874      if ((-1.0 < value) && (value < 0.0)) {
3875        int_result = -0.0;
3876
3877        // If the error is non-zero, round up.
3878      } else if (error > 0.0) {
3879        int_result++;
3880      }
3881      break;
3882    }
3883    default:
3884      VIXL_UNIMPLEMENTED();
3885  }
3886  return int_result;
3887}
3888
3889
3890int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
3891  value = FPRoundInt(value, rmode);
3892  if (value >= kWMaxInt) {
3893    return kWMaxInt;
3894  } else if (value < kWMinInt) {
3895    return kWMinInt;
3896  }
3897  return std::isnan(value) ? 0 : static_cast<int32_t>(value);
3898}
3899
3900
3901int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
3902  value = FPRoundInt(value, rmode);
3903  if (value >= kXMaxInt) {
3904    return kXMaxInt;
3905  } else if (value < kXMinInt) {
3906    return kXMinInt;
3907  }
3908  return std::isnan(value) ? 0 : static_cast<int64_t>(value);
3909}
3910
3911
3912uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
3913  value = FPRoundInt(value, rmode);
3914  if (value >= kWMaxUInt) {
3915    return kWMaxUInt;
3916  } else if (value < 0.0) {
3917    return 0;
3918  }
3919  return std::isnan(value) ? 0 : static_cast<uint32_t>(value);
3920}
3921
3922
3923uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
3924  value = FPRoundInt(value, rmode);
3925  if (value >= kXMaxUInt) {
3926    return kXMaxUInt;
3927  } else if (value < 0.0) {
3928    return 0;
3929  }
3930  return std::isnan(value) ? 0 : static_cast<uint64_t>(value);
3931}
3932
3933
3934#define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN)                \
3935  template <typename T>                                          \
3936  LogicVRegister Simulator::FN(VectorFormat vform,               \
3937                               LogicVRegister dst,               \
3938                               const LogicVRegister& src1,       \
3939                               const LogicVRegister& src2) {     \
3940    dst.ClearForWrite(vform);                                    \
3941    for (int i = 0; i < LaneCountFromFormat(vform); i++) {       \
3942      T op1 = src1.Float<T>(i);                                  \
3943      T op2 = src2.Float<T>(i);                                  \
3944      T result;                                                  \
3945      if (PROCNAN) {                                             \
3946        result = FPProcessNaNs(op1, op2);                        \
3947        if (!std::isnan(result)) {                               \
3948          result = OP(op1, op2);                                 \
3949        }                                                        \
3950      } else {                                                   \
3951        result = OP(op1, op2);                                   \
3952      }                                                          \
3953      dst.SetFloat(i, result);                                   \
3954    }                                                            \
3955    return dst;                                                  \
3956  }                                                              \
3957                                                                 \
3958  LogicVRegister Simulator::FN(VectorFormat vform,               \
3959                               LogicVRegister dst,               \
3960                               const LogicVRegister& src1,       \
3961                               const LogicVRegister& src2) {     \
3962    if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {          \
3963      FN<float>(vform, dst, src1, src2);                         \
3964    } else {                                                     \
3965      VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \
3966      FN<double>(vform, dst, src1, src2);                        \
3967    }                                                            \
3968    return dst;                                                  \
3969  }
3970NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
3971#undef DEFINE_NEON_FP_VECTOR_OP
3972
3973
3974LogicVRegister Simulator::fnmul(VectorFormat vform,
3975                                LogicVRegister dst,
3976                                const LogicVRegister& src1,
3977                                const LogicVRegister& src2) {
3978  SimVRegister temp;
3979  LogicVRegister product = fmul(vform, temp, src1, src2);
3980  return fneg(vform, dst, product);
3981}
3982
3983
3984template <typename T>
3985LogicVRegister Simulator::frecps(VectorFormat vform,
3986                                 LogicVRegister dst,
3987                                 const LogicVRegister& src1,
3988                                 const LogicVRegister& src2) {
3989  dst.ClearForWrite(vform);
3990  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3991    T op1 = -src1.Float<T>(i);
3992    T op2 = src2.Float<T>(i);
3993    T result = FPProcessNaNs(op1, op2);
3994    dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2));
3995  }
3996  return dst;
3997}
3998
3999
4000LogicVRegister Simulator::frecps(VectorFormat vform,
4001                                 LogicVRegister dst,
4002                                 const LogicVRegister& src1,
4003                                 const LogicVRegister& src2) {
4004  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4005    frecps<float>(vform, dst, src1, src2);
4006  } else {
4007    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4008    frecps<double>(vform, dst, src1, src2);
4009  }
4010  return dst;
4011}
4012
4013
4014template <typename T>
4015LogicVRegister Simulator::frsqrts(VectorFormat vform,
4016                                  LogicVRegister dst,
4017                                  const LogicVRegister& src1,
4018                                  const LogicVRegister& src2) {
4019  dst.ClearForWrite(vform);
4020  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4021    T op1 = -src1.Float<T>(i);
4022    T op2 = src2.Float<T>(i);
4023    T result = FPProcessNaNs(op1, op2);
4024    dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2));
4025  }
4026  return dst;
4027}
4028
4029
4030LogicVRegister Simulator::frsqrts(VectorFormat vform,
4031                                  LogicVRegister dst,
4032                                  const LogicVRegister& src1,
4033                                  const LogicVRegister& src2) {
4034  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4035    frsqrts<float>(vform, dst, src1, src2);
4036  } else {
4037    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4038    frsqrts<double>(vform, dst, src1, src2);
4039  }
4040  return dst;
4041}
4042
4043
4044template <typename T>
4045LogicVRegister Simulator::fcmp(VectorFormat vform,
4046                               LogicVRegister dst,
4047                               const LogicVRegister& src1,
4048                               const LogicVRegister& src2,
4049                               Condition cond) {
4050  dst.ClearForWrite(vform);
4051  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4052    bool result = false;
4053    T op1 = src1.Float<T>(i);
4054    T op2 = src2.Float<T>(i);
4055    T nan_result = FPProcessNaNs(op1, op2);
4056    if (!std::isnan(nan_result)) {
4057      switch (cond) {
4058        case eq:
4059          result = (op1 == op2);
4060          break;
4061        case ge:
4062          result = (op1 >= op2);
4063          break;
4064        case gt:
4065          result = (op1 > op2);
4066          break;
4067        case le:
4068          result = (op1 <= op2);
4069          break;
4070        case lt:
4071          result = (op1 < op2);
4072          break;
4073        default:
4074          VIXL_UNREACHABLE();
4075          break;
4076      }
4077    }
4078    dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
4079  }
4080  return dst;
4081}
4082
4083
4084LogicVRegister Simulator::fcmp(VectorFormat vform,
4085                               LogicVRegister dst,
4086                               const LogicVRegister& src1,
4087                               const LogicVRegister& src2,
4088                               Condition cond) {
4089  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4090    fcmp<float>(vform, dst, src1, src2, cond);
4091  } else {
4092    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4093    fcmp<double>(vform, dst, src1, src2, cond);
4094  }
4095  return dst;
4096}
4097
4098
4099LogicVRegister Simulator::fcmp_zero(VectorFormat vform,
4100                                    LogicVRegister dst,
4101                                    const LogicVRegister& src,
4102                                    Condition cond) {
4103  SimVRegister temp;
4104  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4105    LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0));
4106    fcmp<float>(vform, dst, src, zero_reg, cond);
4107  } else {
4108    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4109    LogicVRegister zero_reg = dup_immediate(vform, temp, DoubleToRawbits(0.0));
4110    fcmp<double>(vform, dst, src, zero_reg, cond);
4111  }
4112  return dst;
4113}
4114
4115
4116LogicVRegister Simulator::fabscmp(VectorFormat vform,
4117                                  LogicVRegister dst,
4118                                  const LogicVRegister& src1,
4119                                  const LogicVRegister& src2,
4120                                  Condition cond) {
4121  SimVRegister temp1, temp2;
4122  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4123    LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
4124    LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
4125    fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
4126  } else {
4127    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4128    LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
4129    LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
4130    fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
4131  }
4132  return dst;
4133}
4134
4135
4136template <typename T>
4137LogicVRegister Simulator::fmla(VectorFormat vform,
4138                               LogicVRegister dst,
4139                               const LogicVRegister& src1,
4140                               const LogicVRegister& src2) {
4141  dst.ClearForWrite(vform);
4142  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4143    T op1 = src1.Float<T>(i);
4144    T op2 = src2.Float<T>(i);
4145    T acc = dst.Float<T>(i);
4146    T result = FPMulAdd(acc, op1, op2);
4147    dst.SetFloat(i, result);
4148  }
4149  return dst;
4150}
4151
4152
4153LogicVRegister Simulator::fmla(VectorFormat vform,
4154                               LogicVRegister dst,
4155                               const LogicVRegister& src1,
4156                               const LogicVRegister& src2) {
4157  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4158    fmla<float>(vform, dst, src1, src2);
4159  } else {
4160    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4161    fmla<double>(vform, dst, src1, src2);
4162  }
4163  return dst;
4164}
4165
4166
4167template <typename T>
4168LogicVRegister Simulator::fmls(VectorFormat vform,
4169                               LogicVRegister dst,
4170                               const LogicVRegister& src1,
4171                               const LogicVRegister& src2) {
4172  dst.ClearForWrite(vform);
4173  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4174    T op1 = -src1.Float<T>(i);
4175    T op2 = src2.Float<T>(i);
4176    T acc = dst.Float<T>(i);
4177    T result = FPMulAdd(acc, op1, op2);
4178    dst.SetFloat(i, result);
4179  }
4180  return dst;
4181}
4182
4183
4184LogicVRegister Simulator::fmls(VectorFormat vform,
4185                               LogicVRegister dst,
4186                               const LogicVRegister& src1,
4187                               const LogicVRegister& src2) {
4188  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4189    fmls<float>(vform, dst, src1, src2);
4190  } else {
4191    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4192    fmls<double>(vform, dst, src1, src2);
4193  }
4194  return dst;
4195}
4196
4197
4198template <typename T>
4199LogicVRegister Simulator::fneg(VectorFormat vform,
4200                               LogicVRegister dst,
4201                               const LogicVRegister& src) {
4202  dst.ClearForWrite(vform);
4203  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4204    T op = src.Float<T>(i);
4205    op = -op;
4206    dst.SetFloat(i, op);
4207  }
4208  return dst;
4209}
4210
4211
4212LogicVRegister Simulator::fneg(VectorFormat vform,
4213                               LogicVRegister dst,
4214                               const LogicVRegister& src) {
4215  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4216    fneg<float>(vform, dst, src);
4217  } else {
4218    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4219    fneg<double>(vform, dst, src);
4220  }
4221  return dst;
4222}
4223
4224
4225template <typename T>
4226LogicVRegister Simulator::fabs_(VectorFormat vform,
4227                                LogicVRegister dst,
4228                                const LogicVRegister& src) {
4229  dst.ClearForWrite(vform);
4230  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4231    T op = src.Float<T>(i);
4232    if (copysign(1.0, op) < 0.0) {
4233      op = -op;
4234    }
4235    dst.SetFloat(i, op);
4236  }
4237  return dst;
4238}
4239
4240
4241LogicVRegister Simulator::fabs_(VectorFormat vform,
4242                                LogicVRegister dst,
4243                                const LogicVRegister& src) {
4244  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4245    fabs_<float>(vform, dst, src);
4246  } else {
4247    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4248    fabs_<double>(vform, dst, src);
4249  }
4250  return dst;
4251}
4252
4253
4254LogicVRegister Simulator::fabd(VectorFormat vform,
4255                               LogicVRegister dst,
4256                               const LogicVRegister& src1,
4257                               const LogicVRegister& src2) {
4258  SimVRegister temp;
4259  fsub(vform, temp, src1, src2);
4260  fabs_(vform, dst, temp);
4261  return dst;
4262}
4263
4264
4265LogicVRegister Simulator::fsqrt(VectorFormat vform,
4266                                LogicVRegister dst,
4267                                const LogicVRegister& src) {
4268  dst.ClearForWrite(vform);
4269  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4270    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4271      float result = FPSqrt(src.Float<float>(i));
4272      dst.SetFloat(i, result);
4273    }
4274  } else {
4275    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4276    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4277      double result = FPSqrt(src.Float<double>(i));
4278      dst.SetFloat(i, result);
4279    }
4280  }
4281  return dst;
4282}
4283
4284
4285#define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP)                           \
4286  LogicVRegister Simulator::FNP(VectorFormat vform,                   \
4287                                LogicVRegister dst,                   \
4288                                const LogicVRegister& src1,           \
4289                                const LogicVRegister& src2) {         \
4290    SimVRegister temp1, temp2;                                        \
4291    uzp1(vform, temp1, src1, src2);                                   \
4292    uzp2(vform, temp2, src1, src2);                                   \
4293    FN(vform, dst, temp1, temp2);                                     \
4294    return dst;                                                       \
4295  }                                                                   \
4296                                                                      \
4297  LogicVRegister Simulator::FNP(VectorFormat vform,                   \
4298                                LogicVRegister dst,                   \
4299                                const LogicVRegister& src) {          \
4300    if (vform == kFormatS) {                                          \
4301      float result = OP(src.Float<float>(0), src.Float<float>(1));    \
4302      dst.SetFloat(0, result);                                        \
4303    } else {                                                          \
4304      VIXL_ASSERT(vform == kFormatD);                                 \
4305      double result = OP(src.Float<double>(0), src.Float<double>(1)); \
4306      dst.SetFloat(0, result);                                        \
4307    }                                                                 \
4308    dst.ClearForWrite(vform);                                         \
4309    return dst;                                                       \
4310  }
4311NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
4312#undef DEFINE_NEON_FP_PAIR_OP
4313
4314
4315LogicVRegister Simulator::fminmaxv(VectorFormat vform,
4316                                   LogicVRegister dst,
4317                                   const LogicVRegister& src,
4318                                   FPMinMaxOp Op) {
4319  VIXL_ASSERT(vform == kFormat4S);
4320  USE(vform);
4321  float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1));
4322  float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3));
4323  float result = (this->*Op)(result1, result2);
4324  dst.ClearForWrite(kFormatS);
4325  dst.SetFloat<float>(0, result);
4326  return dst;
4327}
4328
4329
4330LogicVRegister Simulator::fmaxv(VectorFormat vform,
4331                                LogicVRegister dst,
4332                                const LogicVRegister& src) {
4333  return fminmaxv(vform, dst, src, &Simulator::FPMax);
4334}
4335
4336
4337LogicVRegister Simulator::fminv(VectorFormat vform,
4338                                LogicVRegister dst,
4339                                const LogicVRegister& src) {
4340  return fminmaxv(vform, dst, src, &Simulator::FPMin);
4341}
4342
4343
4344LogicVRegister Simulator::fmaxnmv(VectorFormat vform,
4345                                  LogicVRegister dst,
4346                                  const LogicVRegister& src) {
4347  return fminmaxv(vform, dst, src, &Simulator::FPMaxNM);
4348}
4349
4350
4351LogicVRegister Simulator::fminnmv(VectorFormat vform,
4352                                  LogicVRegister dst,
4353                                  const LogicVRegister& src) {
4354  return fminmaxv(vform, dst, src, &Simulator::FPMinNM);
4355}
4356
4357
4358LogicVRegister Simulator::fmul(VectorFormat vform,
4359                               LogicVRegister dst,
4360                               const LogicVRegister& src1,
4361                               const LogicVRegister& src2,
4362                               int index) {
4363  dst.ClearForWrite(vform);
4364  SimVRegister temp;
4365  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4366    LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4367    fmul<float>(vform, dst, src1, index_reg);
4368
4369  } else {
4370    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4371    LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4372    fmul<double>(vform, dst, src1, index_reg);
4373  }
4374  return dst;
4375}
4376
4377
4378LogicVRegister Simulator::fmla(VectorFormat vform,
4379                               LogicVRegister dst,
4380                               const LogicVRegister& src1,
4381                               const LogicVRegister& src2,
4382                               int index) {
4383  dst.ClearForWrite(vform);
4384  SimVRegister temp;
4385  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4386    LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4387    fmla<float>(vform, dst, src1, index_reg);
4388
4389  } else {
4390    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4391    LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4392    fmla<double>(vform, dst, src1, index_reg);
4393  }
4394  return dst;
4395}
4396
4397
4398LogicVRegister Simulator::fmls(VectorFormat vform,
4399                               LogicVRegister dst,
4400                               const LogicVRegister& src1,
4401                               const LogicVRegister& src2,
4402                               int index) {
4403  dst.ClearForWrite(vform);
4404  SimVRegister temp;
4405  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4406    LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4407    fmls<float>(vform, dst, src1, index_reg);
4408
4409  } else {
4410    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4411    LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4412    fmls<double>(vform, dst, src1, index_reg);
4413  }
4414  return dst;
4415}
4416
4417
4418LogicVRegister Simulator::fmulx(VectorFormat vform,
4419                                LogicVRegister dst,
4420                                const LogicVRegister& src1,
4421                                const LogicVRegister& src2,
4422                                int index) {
4423  dst.ClearForWrite(vform);
4424  SimVRegister temp;
4425  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4426    LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4427    fmulx<float>(vform, dst, src1, index_reg);
4428
4429  } else {
4430    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4431    LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4432    fmulx<double>(vform, dst, src1, index_reg);
4433  }
4434  return dst;
4435}
4436
4437
4438LogicVRegister Simulator::frint(VectorFormat vform,
4439                                LogicVRegister dst,
4440                                const LogicVRegister& src,
4441                                FPRounding rounding_mode,
4442                                bool inexact_exception) {
4443  dst.ClearForWrite(vform);
4444  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4445    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4446      float input = src.Float<float>(i);
4447      float rounded = FPRoundInt(input, rounding_mode);
4448      if (inexact_exception && !std::isnan(input) && (input != rounded)) {
4449        FPProcessException();
4450      }
4451      dst.SetFloat<float>(i, rounded);
4452    }
4453  } else {
4454    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4455    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4456      double input = src.Float<double>(i);
4457      double rounded = FPRoundInt(input, rounding_mode);
4458      if (inexact_exception && !std::isnan(input) && (input != rounded)) {
4459        FPProcessException();
4460      }
4461      dst.SetFloat<double>(i, rounded);
4462    }
4463  }
4464  return dst;
4465}
4466
4467
4468LogicVRegister Simulator::fcvts(VectorFormat vform,
4469                                LogicVRegister dst,
4470                                const LogicVRegister& src,
4471                                FPRounding rounding_mode,
4472                                int fbits) {
4473  dst.ClearForWrite(vform);
4474  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4475    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4476      float op = src.Float<float>(i) * std::pow(2.0f, fbits);
4477      dst.SetInt(vform, i, FPToInt32(op, rounding_mode));
4478    }
4479  } else {
4480    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4481    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4482      double op = src.Float<double>(i) * std::pow(2.0, fbits);
4483      dst.SetInt(vform, i, FPToInt64(op, rounding_mode));
4484    }
4485  }
4486  return dst;
4487}
4488
4489
4490LogicVRegister Simulator::fcvtu(VectorFormat vform,
4491                                LogicVRegister dst,
4492                                const LogicVRegister& src,
4493                                FPRounding rounding_mode,
4494                                int fbits) {
4495  dst.ClearForWrite(vform);
4496  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4497    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4498      float op = src.Float<float>(i) * std::pow(2.0f, fbits);
4499      dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));
4500    }
4501  } else {
4502    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4503    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4504      double op = src.Float<double>(i) * std::pow(2.0, fbits);
4505      dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));
4506    }
4507  }
4508  return dst;
4509}
4510
4511
4512LogicVRegister Simulator::fcvtl(VectorFormat vform,
4513                                LogicVRegister dst,
4514                                const LogicVRegister& src) {
4515  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4516    for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
4517      dst.SetFloat(i, FPToFloat(src.Float<float16>(i)));
4518    }
4519  } else {
4520    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4521    for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
4522      dst.SetFloat(i, FPToDouble(src.Float<float>(i)));
4523    }
4524  }
4525  return dst;
4526}
4527
4528
4529LogicVRegister Simulator::fcvtl2(VectorFormat vform,
4530                                 LogicVRegister dst,
4531                                 const LogicVRegister& src) {
4532  int lane_count = LaneCountFromFormat(vform);
4533  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4534    for (int i = 0; i < lane_count; i++) {
4535      dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count)));
4536    }
4537  } else {
4538    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4539    for (int i = 0; i < lane_count; i++) {
4540      dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count)));
4541    }
4542  }
4543  return dst;
4544}
4545
4546
4547LogicVRegister Simulator::fcvtn(VectorFormat vform,
4548                                LogicVRegister dst,
4549                                const LogicVRegister& src) {
4550  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4551    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4552      dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven));
4553    }
4554  } else {
4555    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4556    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4557      dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven));
4558    }
4559  }
4560  return dst;
4561}
4562
4563
4564LogicVRegister Simulator::fcvtn2(VectorFormat vform,
4565                                 LogicVRegister dst,
4566                                 const LogicVRegister& src) {
4567  int lane_count = LaneCountFromFormat(vform) / 2;
4568  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4569    for (int i = lane_count - 1; i >= 0; i--) {
4570      dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven));
4571    }
4572  } else {
4573    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4574    for (int i = lane_count - 1; i >= 0; i--) {
4575      dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven));
4576    }
4577  }
4578  return dst;
4579}
4580
4581
4582LogicVRegister Simulator::fcvtxn(VectorFormat vform,
4583                                 LogicVRegister dst,
4584                                 const LogicVRegister& src) {
4585  dst.ClearForWrite(vform);
4586  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4587  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4588    dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd));
4589  }
4590  return dst;
4591}
4592
4593
4594LogicVRegister Simulator::fcvtxn2(VectorFormat vform,
4595                                  LogicVRegister dst,
4596                                  const LogicVRegister& src) {
4597  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4598  int lane_count = LaneCountFromFormat(vform) / 2;
4599  for (int i = lane_count - 1; i >= 0; i--) {
4600    dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd));
4601  }
4602  return dst;
4603}
4604
4605
4606// Based on reference C function recip_sqrt_estimate from ARM ARM.
4607double Simulator::recip_sqrt_estimate(double a) {
4608  int q0, q1, s;
4609  double r;
4610  if (a < 0.5) {
4611    q0 = static_cast<int>(a * 512.0);
4612    r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
4613  } else {
4614    q1 = static_cast<int>(a * 256.0);
4615    r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
4616  }
4617  s = static_cast<int>(256.0 * r + 0.5);
4618  return static_cast<double>(s) / 256.0;
4619}
4620
4621
4622static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
4623  return ExtractUnsignedBitfield64(start_bit, end_bit, val);
4624}
4625
4626
4627template <typename T>
4628T Simulator::FPRecipSqrtEstimate(T op) {
4629  if (std::isnan(op)) {
4630    return FPProcessNaN(op);
4631  } else if (op == 0.0) {
4632    if (copysign(1.0, op) < 0.0) {
4633      return kFP64NegativeInfinity;
4634    } else {
4635      return kFP64PositiveInfinity;
4636    }
4637  } else if (copysign(1.0, op) < 0.0) {
4638    FPProcessException();
4639    return FPDefaultNaN<T>();
4640  } else if (std::isinf(op)) {
4641    return 0.0;
4642  } else {
4643    uint64_t fraction;
4644    int exp, result_exp;
4645
4646    if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4647      exp = FloatExp(op);
4648      fraction = FloatMantissa(op);
4649      fraction <<= 29;
4650    } else {
4651      exp = DoubleExp(op);
4652      fraction = DoubleMantissa(op);
4653    }
4654
4655    if (exp == 0) {
4656      while (Bits(fraction, 51, 51) == 0) {
4657        fraction = Bits(fraction, 50, 0) << 1;
4658        exp -= 1;
4659      }
4660      fraction = Bits(fraction, 50, 0) << 1;
4661    }
4662
4663    double scaled;
4664    if (Bits(exp, 0, 0) == 0) {
4665      scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
4666    } else {
4667      scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44);
4668    }
4669
4670    if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4671      result_exp = (380 - exp) / 2;
4672    } else {
4673      result_exp = (3068 - exp) / 2;
4674    }
4675
4676    uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled));
4677
4678    if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4679      uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
4680      uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
4681      return FloatPack(0, exp_bits, est_bits);
4682    } else {
4683      return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
4684    }
4685  }
4686}
4687
4688
4689LogicVRegister Simulator::frsqrte(VectorFormat vform,
4690                                  LogicVRegister dst,
4691                                  const LogicVRegister& src) {
4692  dst.ClearForWrite(vform);
4693  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4694    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4695      float input = src.Float<float>(i);
4696      dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));
4697    }
4698  } else {
4699    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4700    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4701      double input = src.Float<double>(i);
4702      dst.SetFloat(i, FPRecipSqrtEstimate<double>(input));
4703    }
4704  }
4705  return dst;
4706}
4707
4708template <typename T>
4709T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
4710  uint32_t sign;
4711
4712  if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4713    sign = FloatSign(op);
4714  } else {
4715    sign = DoubleSign(op);
4716  }
4717
4718  if (std::isnan(op)) {
4719    return FPProcessNaN(op);
4720  } else if (std::isinf(op)) {
4721    return (sign == 1) ? -0.0 : 0.0;
4722  } else if (op == 0.0) {
4723    FPProcessException();  // FPExc_DivideByZero exception.
4724    return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
4725  } else if (((sizeof(T) == sizeof(float)) &&  // NOLINT(runtime/sizeof)
4726              (std::fabs(op) < std::pow(2.0, -128.0))) ||
4727             ((sizeof(T) == sizeof(double)) &&  // NOLINT(runtime/sizeof)
4728              (std::fabs(op) < std::pow(2.0, -1024.0)))) {
4729    bool overflow_to_inf = false;
4730    switch (rounding) {
4731      case FPTieEven:
4732        overflow_to_inf = true;
4733        break;
4734      case FPPositiveInfinity:
4735        overflow_to_inf = (sign == 0);
4736        break;
4737      case FPNegativeInfinity:
4738        overflow_to_inf = (sign == 1);
4739        break;
4740      case FPZero:
4741        overflow_to_inf = false;
4742        break;
4743      default:
4744        break;
4745    }
4746    FPProcessException();  // FPExc_Overflow and FPExc_Inexact.
4747    if (overflow_to_inf) {
4748      return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
4749    } else {
4750      // Return FPMaxNormal(sign).
4751      if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4752        return FloatPack(sign, 0xfe, 0x07fffff);
4753      } else {
4754        return DoublePack(sign, 0x7fe, 0x0fffffffffffffl);
4755      }
4756    }
4757  } else {
4758    uint64_t fraction;
4759    int exp, result_exp;
4760    uint32_t sign;
4761
4762    if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4763      sign = FloatSign(op);
4764      exp = FloatExp(op);
4765      fraction = FloatMantissa(op);
4766      fraction <<= 29;
4767    } else {
4768      sign = DoubleSign(op);
4769      exp = DoubleExp(op);
4770      fraction = DoubleMantissa(op);
4771    }
4772
4773    if (exp == 0) {
4774      if (Bits(fraction, 51, 51) == 0) {
4775        exp -= 1;
4776        fraction = Bits(fraction, 49, 0) << 2;
4777      } else {
4778        fraction = Bits(fraction, 50, 0) << 1;
4779      }
4780    }
4781
4782    double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
4783
4784    if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4785      result_exp = (253 - exp);        // In range 253-254 = -1 to 253+1 = 254.
4786    } else {
4787      result_exp = (2045 - exp);  // In range 2045-2046 = -1 to 2045+1 = 2046.
4788    }
4789
4790    double estimate = recip_estimate(scaled);
4791
4792    fraction = DoubleMantissa(estimate);
4793    if (result_exp == 0) {
4794      fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
4795    } else if (result_exp == -1) {
4796      fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
4797      result_exp = 0;
4798    }
4799    if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4800      uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
4801      uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
4802      return FloatPack(sign, exp_bits, frac_bits);
4803    } else {
4804      return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
4805    }
4806  }
4807}
4808
4809
4810LogicVRegister Simulator::frecpe(VectorFormat vform,
4811                                 LogicVRegister dst,
4812                                 const LogicVRegister& src,
4813                                 FPRounding round) {
4814  dst.ClearForWrite(vform);
4815  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4816    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4817      float input = src.Float<float>(i);
4818      dst.SetFloat(i, FPRecipEstimate<float>(input, round));
4819    }
4820  } else {
4821    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4822    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4823      double input = src.Float<double>(i);
4824      dst.SetFloat(i, FPRecipEstimate<double>(input, round));
4825    }
4826  }
4827  return dst;
4828}
4829
4830
4831LogicVRegister Simulator::ursqrte(VectorFormat vform,
4832                                  LogicVRegister dst,
4833                                  const LogicVRegister& src) {
4834  dst.ClearForWrite(vform);
4835  uint64_t operand;
4836  uint32_t result;
4837  double dp_operand, dp_result;
4838  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4839    operand = src.Uint(vform, i);
4840    if (operand <= 0x3FFFFFFF) {
4841      result = 0xFFFFFFFF;
4842    } else {
4843      dp_operand = operand * std::pow(2.0, -32);
4844      dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
4845      result = static_cast<uint32_t>(dp_result);
4846    }
4847    dst.SetUint(vform, i, result);
4848  }
4849  return dst;
4850}
4851
4852
4853// Based on reference C function recip_estimate from ARM ARM.
4854double Simulator::recip_estimate(double a) {
4855  int q, s;
4856  double r;
4857  q = static_cast<int>(a * 512.0);
4858  r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
4859  s = static_cast<int>(256.0 * r + 0.5);
4860  return static_cast<double>(s) / 256.0;
4861}
4862
4863
4864LogicVRegister Simulator::urecpe(VectorFormat vform,
4865                                 LogicVRegister dst,
4866                                 const LogicVRegister& src) {
4867  dst.ClearForWrite(vform);
4868  uint64_t operand;
4869  uint32_t result;
4870  double dp_operand, dp_result;
4871  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4872    operand = src.Uint(vform, i);
4873    if (operand <= 0x7FFFFFFF) {
4874      result = 0xFFFFFFFF;
4875    } else {
4876      dp_operand = operand * std::pow(2.0, -32);
4877      dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
4878      result = static_cast<uint32_t>(dp_result);
4879    }
4880    dst.SetUint(vform, i, result);
4881  }
4882  return dst;
4883}
4884
4885template <typename T>
4886LogicVRegister Simulator::frecpx(VectorFormat vform,
4887                                 LogicVRegister dst,
4888                                 const LogicVRegister& src) {
4889  dst.ClearForWrite(vform);
4890  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4891    T op = src.Float<T>(i);
4892    T result;
4893    if (std::isnan(op)) {
4894      result = FPProcessNaN(op);
4895    } else {
4896      int exp;
4897      uint32_t sign;
4898      if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
4899        sign = FloatSign(op);
4900        exp = FloatExp(op);
4901        exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
4902        result = FloatPack(sign, exp, 0);
4903      } else {
4904        sign = DoubleSign(op);
4905        exp = DoubleExp(op);
4906        exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
4907        result = DoublePack(sign, exp, 0);
4908      }
4909    }
4910    dst.SetFloat(i, result);
4911  }
4912  return dst;
4913}
4914
4915
4916LogicVRegister Simulator::frecpx(VectorFormat vform,
4917                                 LogicVRegister dst,
4918                                 const LogicVRegister& src) {
4919  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4920    frecpx<float>(vform, dst, src);
4921  } else {
4922    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4923    frecpx<double>(vform, dst, src);
4924  }
4925  return dst;
4926}
4927
4928LogicVRegister Simulator::scvtf(VectorFormat vform,
4929                                LogicVRegister dst,
4930                                const LogicVRegister& src,
4931                                int fbits,
4932                                FPRounding round) {
4933  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4934    if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4935      float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);
4936      dst.SetFloat<float>(i, result);
4937    } else {
4938      VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4939      double result = FixedToDouble(src.Int(kFormatD, i), fbits, round);
4940      dst.SetFloat<double>(i, result);
4941    }
4942  }
4943  return dst;
4944}
4945
4946
4947LogicVRegister Simulator::ucvtf(VectorFormat vform,
4948                                LogicVRegister dst,
4949                                const LogicVRegister& src,
4950                                int fbits,
4951                                FPRounding round) {
4952  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4953    if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4954      float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);
4955      dst.SetFloat<float>(i, result);
4956    } else {
4957      VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4958      double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round);
4959      dst.SetFloat<double>(i, result);
4960    }
4961  }
4962  return dst;
4963}
4964
4965
4966}  // namespace aarch64
4967}  // namespace vixl
4968
4969#endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
4970