logic-aarch64.cc revision b49bdb7996e603555eba4c8b56c7325e3e737ab6
1// Copyright 2015, VIXL authors 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions are met: 6// 7// * Redistributions of source code must retain the above copyright notice, 8// this list of conditions and the following disclaimer. 9// * Redistributions in binary form must reproduce the above copyright notice, 10// this list of conditions and the following disclaimer in the documentation 11// and/or other materials provided with the distribution. 12// * Neither the name of ARM Limited nor the names of its contributors may be 13// used to endorse or promote products derived from this software without 14// specific prior written permission. 15// 16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 28 29#include <cmath> 30 31#include "simulator-aarch64.h" 32 33namespace vixl { 34namespace aarch64 { 35 36template <> 37double Simulator::FPDefaultNaN<double>() { 38 return kFP64DefaultNaN; 39} 40 41 42template <> 43float Simulator::FPDefaultNaN<float>() { 44 return kFP32DefaultNaN; 45} 46 47// See FPRound for a description of this function. 48static inline double FPRoundToDouble(int64_t sign, 49 int64_t exponent, 50 uint64_t mantissa, 51 FPRounding round_mode) { 52 int64_t bits = 53 FPRound<int64_t, kDoubleExponentBits, kDoubleMantissaBits>(sign, 54 exponent, 55 mantissa, 56 round_mode); 57 return RawbitsToDouble(bits); 58} 59 60 61// See FPRound for a description of this function. 62static inline float FPRoundToFloat(int64_t sign, 63 int64_t exponent, 64 uint64_t mantissa, 65 FPRounding round_mode) { 66 int32_t bits = 67 FPRound<int32_t, kFloatExponentBits, kFloatMantissaBits>(sign, 68 exponent, 69 mantissa, 70 round_mode); 71 return RawbitsToFloat(bits); 72} 73 74 75// See FPRound for a description of this function. 76static inline float16 FPRoundToFloat16(int64_t sign, 77 int64_t exponent, 78 uint64_t mantissa, 79 FPRounding round_mode) { 80 return FPRound<float16, 81 kFloat16ExponentBits, 82 kFloat16MantissaBits>(sign, exponent, mantissa, round_mode); 83} 84 85 86double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) { 87 if (src >= 0) { 88 return UFixedToDouble(src, fbits, round); 89 } else { 90 // This works for all negative values, including INT64_MIN. 91 return -UFixedToDouble(-src, fbits, round); 92 } 93} 94 95 96double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) { 97 // An input of 0 is a special case because the result is effectively 98 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. 99 if (src == 0) { 100 return 0.0; 101 } 102 103 // Calculate the exponent. The highest significant bit will have the value 104 // 2^exponent. 105 const int highest_significant_bit = 63 - CountLeadingZeros(src); 106 const int64_t exponent = highest_significant_bit - fbits; 107 108 return FPRoundToDouble(0, exponent, src, round); 109} 110 111 112float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) { 113 if (src >= 0) { 114 return UFixedToFloat(src, fbits, round); 115 } else { 116 // This works for all negative values, including INT64_MIN. 117 return -UFixedToFloat(-src, fbits, round); 118 } 119} 120 121 122float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) { 123 // An input of 0 is a special case because the result is effectively 124 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. 125 if (src == 0) { 126 return 0.0f; 127 } 128 129 // Calculate the exponent. The highest significant bit will have the value 130 // 2^exponent. 131 const int highest_significant_bit = 63 - CountLeadingZeros(src); 132 const int32_t exponent = highest_significant_bit - fbits; 133 134 return FPRoundToFloat(0, exponent, src, round); 135} 136 137 138double Simulator::FPToDouble(float value) { 139 switch (std::fpclassify(value)) { 140 case FP_NAN: { 141 if (IsSignallingNaN(value)) { 142 FPProcessException(); 143 } 144 if (ReadDN()) return kFP64DefaultNaN; 145 146 // Convert NaNs as the processor would: 147 // - The sign is propagated. 148 // - The payload (mantissa) is transferred entirely, except that the top 149 // bit is forced to '1', making the result a quiet NaN. The unused 150 // (low-order) payload bits are set to 0. 151 uint32_t raw = FloatToRawbits(value); 152 153 uint64_t sign = raw >> 31; 154 uint64_t exponent = (1 << 11) - 1; 155 uint64_t payload = ExtractUnsignedBitfield64(21, 0, raw); 156 payload <<= (52 - 23); // The unused low-order bits should be 0. 157 payload |= (UINT64_C(1) << 51); // Force a quiet NaN. 158 159 return RawbitsToDouble((sign << 63) | (exponent << 52) | payload); 160 } 161 162 case FP_ZERO: 163 case FP_NORMAL: 164 case FP_SUBNORMAL: 165 case FP_INFINITE: { 166 // All other inputs are preserved in a standard cast, because every value 167 // representable using an IEEE-754 float is also representable using an 168 // IEEE-754 double. 169 return static_cast<double>(value); 170 } 171 } 172 173 VIXL_UNREACHABLE(); 174 return static_cast<double>(value); 175} 176 177 178float Simulator::FPToFloat(float16 value) { 179 uint32_t sign = value >> 15; 180 uint32_t exponent = 181 ExtractUnsignedBitfield32(kFloat16MantissaBits + kFloat16ExponentBits - 1, 182 kFloat16MantissaBits, 183 value); 184 uint32_t mantissa = 185 ExtractUnsignedBitfield32(kFloat16MantissaBits - 1, 0, value); 186 187 switch (Float16Classify(value)) { 188 case FP_ZERO: 189 return (sign == 0) ? 0.0f : -0.0f; 190 191 case FP_INFINITE: 192 return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity; 193 194 case FP_SUBNORMAL: { 195 // Calculate shift required to put mantissa into the most-significant bits 196 // of the destination mantissa. 197 int shift = CountLeadingZeros(mantissa << (32 - 10)); 198 199 // Shift mantissa and discard implicit '1'. 200 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1; 201 mantissa &= (1 << kFloatMantissaBits) - 1; 202 203 // Adjust the exponent for the shift applied, and rebias. 204 exponent = exponent - shift + (-15 + 127); 205 break; 206 } 207 208 case FP_NAN: 209 if (IsSignallingNaN(value)) { 210 FPProcessException(); 211 } 212 if (ReadDN()) return kFP32DefaultNaN; 213 214 // Convert NaNs as the processor would: 215 // - The sign is propagated. 216 // - The payload (mantissa) is transferred entirely, except that the top 217 // bit is forced to '1', making the result a quiet NaN. The unused 218 // (low-order) payload bits are set to 0. 219 exponent = (1 << kFloatExponentBits) - 1; 220 221 // Increase bits in mantissa, making low-order bits 0. 222 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits); 223 mantissa |= 1 << 22; // Force a quiet NaN. 224 break; 225 226 case FP_NORMAL: 227 // Increase bits in mantissa, making low-order bits 0. 228 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits); 229 230 // Change exponent bias. 231 exponent += (-15 + 127); 232 break; 233 234 default: 235 VIXL_UNREACHABLE(); 236 } 237 return RawbitsToFloat((sign << 31) | (exponent << kFloatMantissaBits) | 238 mantissa); 239} 240 241 242float16 Simulator::FPToFloat16(float value, FPRounding round_mode) { 243 // Only the FPTieEven rounding mode is implemented. 244 VIXL_ASSERT(round_mode == FPTieEven); 245 USE(round_mode); 246 247 uint32_t raw = FloatToRawbits(value); 248 int32_t sign = raw >> 31; 249 int32_t exponent = ExtractUnsignedBitfield32(30, 23, raw) - 127; 250 uint32_t mantissa = ExtractUnsignedBitfield32(22, 0, raw); 251 252 switch (std::fpclassify(value)) { 253 case FP_NAN: { 254 if (IsSignallingNaN(value)) { 255 FPProcessException(); 256 } 257 if (ReadDN()) return kFP16DefaultNaN; 258 259 // Convert NaNs as the processor would: 260 // - The sign is propagated. 261 // - The payload (mantissa) is transferred as much as possible, except 262 // that the top bit is forced to '1', making the result a quiet NaN. 263 float16 result = 264 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; 265 result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits); 266 result |= (1 << 9); // Force a quiet NaN; 267 return result; 268 } 269 270 case FP_ZERO: 271 return (sign == 0) ? 0 : 0x8000; 272 273 case FP_INFINITE: 274 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; 275 276 case FP_NORMAL: 277 case FP_SUBNORMAL: { 278 // Convert float-to-half as the processor would, assuming that FPCR.FZ 279 // (flush-to-zero) is not set. 280 281 // Add the implicit '1' bit to the mantissa. 282 mantissa += (1 << 23); 283 return FPRoundToFloat16(sign, exponent, mantissa, round_mode); 284 } 285 } 286 287 VIXL_UNREACHABLE(); 288 return 0; 289} 290 291 292float16 Simulator::FPToFloat16(double value, FPRounding round_mode) { 293 // Only the FPTieEven rounding mode is implemented. 294 VIXL_ASSERT(round_mode == FPTieEven); 295 USE(round_mode); 296 297 uint64_t raw = DoubleToRawbits(value); 298 int32_t sign = raw >> 63; 299 int64_t exponent = ExtractUnsignedBitfield64(62, 52, raw) - 1023; 300 uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw); 301 302 switch (std::fpclassify(value)) { 303 case FP_NAN: { 304 if (IsSignallingNaN(value)) { 305 FPProcessException(); 306 } 307 if (ReadDN()) return kFP16DefaultNaN; 308 309 // Convert NaNs as the processor would: 310 // - The sign is propagated. 311 // - The payload (mantissa) is transferred as much as possible, except 312 // that the top bit is forced to '1', making the result a quiet NaN. 313 float16 result = 314 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; 315 result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits); 316 result |= (1 << 9); // Force a quiet NaN; 317 return result; 318 } 319 320 case FP_ZERO: 321 return (sign == 0) ? 0 : 0x8000; 322 323 case FP_INFINITE: 324 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; 325 326 case FP_NORMAL: 327 case FP_SUBNORMAL: { 328 // Convert double-to-half as the processor would, assuming that FPCR.FZ 329 // (flush-to-zero) is not set. 330 331 // Add the implicit '1' bit to the mantissa. 332 mantissa += (UINT64_C(1) << 52); 333 return FPRoundToFloat16(sign, exponent, mantissa, round_mode); 334 } 335 } 336 337 VIXL_UNREACHABLE(); 338 return 0; 339} 340 341 342float Simulator::FPToFloat(double value, FPRounding round_mode) { 343 // Only the FPTieEven rounding mode is implemented. 344 VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd)); 345 USE(round_mode); 346 347 switch (std::fpclassify(value)) { 348 case FP_NAN: { 349 if (IsSignallingNaN(value)) { 350 FPProcessException(); 351 } 352 if (ReadDN()) return kFP32DefaultNaN; 353 354 // Convert NaNs as the processor would: 355 // - The sign is propagated. 356 // - The payload (mantissa) is transferred as much as possible, except 357 // that the top bit is forced to '1', making the result a quiet NaN. 358 uint64_t raw = DoubleToRawbits(value); 359 360 uint32_t sign = raw >> 63; 361 uint32_t exponent = (1 << 8) - 1; 362 uint32_t payload = 363 static_cast<uint32_t>(ExtractUnsignedBitfield64(50, 52 - 23, raw)); 364 payload |= (1 << 22); // Force a quiet NaN. 365 366 return RawbitsToFloat((sign << 31) | (exponent << 23) | payload); 367 } 368 369 case FP_ZERO: 370 case FP_INFINITE: { 371 // In a C++ cast, any value representable in the target type will be 372 // unchanged. This is always the case for +/-0.0 and infinities. 373 return static_cast<float>(value); 374 } 375 376 case FP_NORMAL: 377 case FP_SUBNORMAL: { 378 // Convert double-to-float as the processor would, assuming that FPCR.FZ 379 // (flush-to-zero) is not set. 380 uint64_t raw = DoubleToRawbits(value); 381 // Extract the IEEE-754 double components. 382 uint32_t sign = raw >> 63; 383 // Extract the exponent and remove the IEEE-754 encoding bias. 384 int32_t exponent = 385 static_cast<int32_t>(ExtractUnsignedBitfield64(62, 52, raw)) - 1023; 386 // Extract the mantissa and add the implicit '1' bit. 387 uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw); 388 if (std::fpclassify(value) == FP_NORMAL) { 389 mantissa |= (UINT64_C(1) << 52); 390 } 391 return FPRoundToFloat(sign, exponent, mantissa, round_mode); 392 } 393 } 394 395 VIXL_UNREACHABLE(); 396 return value; 397} 398 399 400void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) { 401 dst.ClearForWrite(vform); 402 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 403 dst.ReadUintFromMem(vform, i, addr); 404 addr += LaneSizeInBytesFromFormat(vform); 405 } 406} 407 408 409void Simulator::ld1(VectorFormat vform, 410 LogicVRegister dst, 411 int index, 412 uint64_t addr) { 413 dst.ReadUintFromMem(vform, index, addr); 414} 415 416 417void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) { 418 dst.ClearForWrite(vform); 419 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 420 dst.ReadUintFromMem(vform, i, addr); 421 } 422} 423 424 425void Simulator::ld2(VectorFormat vform, 426 LogicVRegister dst1, 427 LogicVRegister dst2, 428 uint64_t addr1) { 429 dst1.ClearForWrite(vform); 430 dst2.ClearForWrite(vform); 431 int esize = LaneSizeInBytesFromFormat(vform); 432 uint64_t addr2 = addr1 + esize; 433 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 434 dst1.ReadUintFromMem(vform, i, addr1); 435 dst2.ReadUintFromMem(vform, i, addr2); 436 addr1 += 2 * esize; 437 addr2 += 2 * esize; 438 } 439} 440 441 442void Simulator::ld2(VectorFormat vform, 443 LogicVRegister dst1, 444 LogicVRegister dst2, 445 int index, 446 uint64_t addr1) { 447 dst1.ClearForWrite(vform); 448 dst2.ClearForWrite(vform); 449 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); 450 dst1.ReadUintFromMem(vform, index, addr1); 451 dst2.ReadUintFromMem(vform, index, addr2); 452} 453 454 455void Simulator::ld2r(VectorFormat vform, 456 LogicVRegister dst1, 457 LogicVRegister dst2, 458 uint64_t addr) { 459 dst1.ClearForWrite(vform); 460 dst2.ClearForWrite(vform); 461 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); 462 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 463 dst1.ReadUintFromMem(vform, i, addr); 464 dst2.ReadUintFromMem(vform, i, addr2); 465 } 466} 467 468 469void Simulator::ld3(VectorFormat vform, 470 LogicVRegister dst1, 471 LogicVRegister dst2, 472 LogicVRegister dst3, 473 uint64_t addr1) { 474 dst1.ClearForWrite(vform); 475 dst2.ClearForWrite(vform); 476 dst3.ClearForWrite(vform); 477 int esize = LaneSizeInBytesFromFormat(vform); 478 uint64_t addr2 = addr1 + esize; 479 uint64_t addr3 = addr2 + esize; 480 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 481 dst1.ReadUintFromMem(vform, i, addr1); 482 dst2.ReadUintFromMem(vform, i, addr2); 483 dst3.ReadUintFromMem(vform, i, addr3); 484 addr1 += 3 * esize; 485 addr2 += 3 * esize; 486 addr3 += 3 * esize; 487 } 488} 489 490 491void Simulator::ld3(VectorFormat vform, 492 LogicVRegister dst1, 493 LogicVRegister dst2, 494 LogicVRegister dst3, 495 int index, 496 uint64_t addr1) { 497 dst1.ClearForWrite(vform); 498 dst2.ClearForWrite(vform); 499 dst3.ClearForWrite(vform); 500 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); 501 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 502 dst1.ReadUintFromMem(vform, index, addr1); 503 dst2.ReadUintFromMem(vform, index, addr2); 504 dst3.ReadUintFromMem(vform, index, addr3); 505} 506 507 508void Simulator::ld3r(VectorFormat vform, 509 LogicVRegister dst1, 510 LogicVRegister dst2, 511 LogicVRegister dst3, 512 uint64_t addr) { 513 dst1.ClearForWrite(vform); 514 dst2.ClearForWrite(vform); 515 dst3.ClearForWrite(vform); 516 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); 517 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 518 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 519 dst1.ReadUintFromMem(vform, i, addr); 520 dst2.ReadUintFromMem(vform, i, addr2); 521 dst3.ReadUintFromMem(vform, i, addr3); 522 } 523} 524 525 526void Simulator::ld4(VectorFormat vform, 527 LogicVRegister dst1, 528 LogicVRegister dst2, 529 LogicVRegister dst3, 530 LogicVRegister dst4, 531 uint64_t addr1) { 532 dst1.ClearForWrite(vform); 533 dst2.ClearForWrite(vform); 534 dst3.ClearForWrite(vform); 535 dst4.ClearForWrite(vform); 536 int esize = LaneSizeInBytesFromFormat(vform); 537 uint64_t addr2 = addr1 + esize; 538 uint64_t addr3 = addr2 + esize; 539 uint64_t addr4 = addr3 + esize; 540 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 541 dst1.ReadUintFromMem(vform, i, addr1); 542 dst2.ReadUintFromMem(vform, i, addr2); 543 dst3.ReadUintFromMem(vform, i, addr3); 544 dst4.ReadUintFromMem(vform, i, addr4); 545 addr1 += 4 * esize; 546 addr2 += 4 * esize; 547 addr3 += 4 * esize; 548 addr4 += 4 * esize; 549 } 550} 551 552 553void Simulator::ld4(VectorFormat vform, 554 LogicVRegister dst1, 555 LogicVRegister dst2, 556 LogicVRegister dst3, 557 LogicVRegister dst4, 558 int index, 559 uint64_t addr1) { 560 dst1.ClearForWrite(vform); 561 dst2.ClearForWrite(vform); 562 dst3.ClearForWrite(vform); 563 dst4.ClearForWrite(vform); 564 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); 565 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 566 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); 567 dst1.ReadUintFromMem(vform, index, addr1); 568 dst2.ReadUintFromMem(vform, index, addr2); 569 dst3.ReadUintFromMem(vform, index, addr3); 570 dst4.ReadUintFromMem(vform, index, addr4); 571} 572 573 574void Simulator::ld4r(VectorFormat vform, 575 LogicVRegister dst1, 576 LogicVRegister dst2, 577 LogicVRegister dst3, 578 LogicVRegister dst4, 579 uint64_t addr) { 580 dst1.ClearForWrite(vform); 581 dst2.ClearForWrite(vform); 582 dst3.ClearForWrite(vform); 583 dst4.ClearForWrite(vform); 584 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); 585 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 586 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); 587 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 588 dst1.ReadUintFromMem(vform, i, addr); 589 dst2.ReadUintFromMem(vform, i, addr2); 590 dst3.ReadUintFromMem(vform, i, addr3); 591 dst4.ReadUintFromMem(vform, i, addr4); 592 } 593} 594 595 596void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) { 597 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 598 src.WriteUintToMem(vform, i, addr); 599 addr += LaneSizeInBytesFromFormat(vform); 600 } 601} 602 603 604void Simulator::st1(VectorFormat vform, 605 LogicVRegister src, 606 int index, 607 uint64_t addr) { 608 src.WriteUintToMem(vform, index, addr); 609} 610 611 612void Simulator::st2(VectorFormat vform, 613 LogicVRegister dst, 614 LogicVRegister dst2, 615 uint64_t addr) { 616 int esize = LaneSizeInBytesFromFormat(vform); 617 uint64_t addr2 = addr + esize; 618 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 619 dst.WriteUintToMem(vform, i, addr); 620 dst2.WriteUintToMem(vform, i, addr2); 621 addr += 2 * esize; 622 addr2 += 2 * esize; 623 } 624} 625 626 627void Simulator::st2(VectorFormat vform, 628 LogicVRegister dst, 629 LogicVRegister dst2, 630 int index, 631 uint64_t addr) { 632 int esize = LaneSizeInBytesFromFormat(vform); 633 dst.WriteUintToMem(vform, index, addr); 634 dst2.WriteUintToMem(vform, index, addr + 1 * esize); 635} 636 637 638void Simulator::st3(VectorFormat vform, 639 LogicVRegister dst, 640 LogicVRegister dst2, 641 LogicVRegister dst3, 642 uint64_t addr) { 643 int esize = LaneSizeInBytesFromFormat(vform); 644 uint64_t addr2 = addr + esize; 645 uint64_t addr3 = addr2 + esize; 646 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 647 dst.WriteUintToMem(vform, i, addr); 648 dst2.WriteUintToMem(vform, i, addr2); 649 dst3.WriteUintToMem(vform, i, addr3); 650 addr += 3 * esize; 651 addr2 += 3 * esize; 652 addr3 += 3 * esize; 653 } 654} 655 656 657void Simulator::st3(VectorFormat vform, 658 LogicVRegister dst, 659 LogicVRegister dst2, 660 LogicVRegister dst3, 661 int index, 662 uint64_t addr) { 663 int esize = LaneSizeInBytesFromFormat(vform); 664 dst.WriteUintToMem(vform, index, addr); 665 dst2.WriteUintToMem(vform, index, addr + 1 * esize); 666 dst3.WriteUintToMem(vform, index, addr + 2 * esize); 667} 668 669 670void Simulator::st4(VectorFormat vform, 671 LogicVRegister dst, 672 LogicVRegister dst2, 673 LogicVRegister dst3, 674 LogicVRegister dst4, 675 uint64_t addr) { 676 int esize = LaneSizeInBytesFromFormat(vform); 677 uint64_t addr2 = addr + esize; 678 uint64_t addr3 = addr2 + esize; 679 uint64_t addr4 = addr3 + esize; 680 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 681 dst.WriteUintToMem(vform, i, addr); 682 dst2.WriteUintToMem(vform, i, addr2); 683 dst3.WriteUintToMem(vform, i, addr3); 684 dst4.WriteUintToMem(vform, i, addr4); 685 addr += 4 * esize; 686 addr2 += 4 * esize; 687 addr3 += 4 * esize; 688 addr4 += 4 * esize; 689 } 690} 691 692 693void Simulator::st4(VectorFormat vform, 694 LogicVRegister dst, 695 LogicVRegister dst2, 696 LogicVRegister dst3, 697 LogicVRegister dst4, 698 int index, 699 uint64_t addr) { 700 int esize = LaneSizeInBytesFromFormat(vform); 701 dst.WriteUintToMem(vform, index, addr); 702 dst2.WriteUintToMem(vform, index, addr + 1 * esize); 703 dst3.WriteUintToMem(vform, index, addr + 2 * esize); 704 dst4.WriteUintToMem(vform, index, addr + 3 * esize); 705} 706 707 708LogicVRegister Simulator::cmp(VectorFormat vform, 709 LogicVRegister dst, 710 const LogicVRegister& src1, 711 const LogicVRegister& src2, 712 Condition cond) { 713 dst.ClearForWrite(vform); 714 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 715 int64_t sa = src1.Int(vform, i); 716 int64_t sb = src2.Int(vform, i); 717 uint64_t ua = src1.Uint(vform, i); 718 uint64_t ub = src2.Uint(vform, i); 719 bool result = false; 720 switch (cond) { 721 case eq: 722 result = (ua == ub); 723 break; 724 case ge: 725 result = (sa >= sb); 726 break; 727 case gt: 728 result = (sa > sb); 729 break; 730 case hi: 731 result = (ua > ub); 732 break; 733 case hs: 734 result = (ua >= ub); 735 break; 736 case lt: 737 result = (sa < sb); 738 break; 739 case le: 740 result = (sa <= sb); 741 break; 742 default: 743 VIXL_UNREACHABLE(); 744 break; 745 } 746 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); 747 } 748 return dst; 749} 750 751 752LogicVRegister Simulator::cmp(VectorFormat vform, 753 LogicVRegister dst, 754 const LogicVRegister& src1, 755 int imm, 756 Condition cond) { 757 SimVRegister temp; 758 LogicVRegister imm_reg = dup_immediate(vform, temp, imm); 759 return cmp(vform, dst, src1, imm_reg, cond); 760} 761 762 763LogicVRegister Simulator::cmptst(VectorFormat vform, 764 LogicVRegister dst, 765 const LogicVRegister& src1, 766 const LogicVRegister& src2) { 767 dst.ClearForWrite(vform); 768 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 769 uint64_t ua = src1.Uint(vform, i); 770 uint64_t ub = src2.Uint(vform, i); 771 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0); 772 } 773 return dst; 774} 775 776 777LogicVRegister Simulator::add(VectorFormat vform, 778 LogicVRegister dst, 779 const LogicVRegister& src1, 780 const LogicVRegister& src2) { 781 dst.ClearForWrite(vform); 782 // TODO(all): consider assigning the result of LaneCountFromFormat to a local. 783 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 784 // Test for unsigned saturation. 785 uint64_t ua = src1.UintLeftJustified(vform, i); 786 uint64_t ub = src2.UintLeftJustified(vform, i); 787 uint64_t ur = ua + ub; 788 if (ur < ua) { 789 dst.SetUnsignedSat(i, true); 790 } 791 792 // Test for signed saturation. 793 int64_t sa = src1.IntLeftJustified(vform, i); 794 int64_t sb = src2.IntLeftJustified(vform, i); 795 int64_t sr = sa + sb; 796 // If the signs of the operands are the same, but different from the result, 797 // there was an overflow. 798 if (((sa >= 0) == (sb >= 0)) && ((sa >= 0) != (sr >= 0))) { 799 dst.SetSignedSat(i, sa >= 0); 800 } 801 802 dst.SetInt(vform, i, src1.Int(vform, i) + src2.Int(vform, i)); 803 } 804 return dst; 805} 806 807 808LogicVRegister Simulator::addp(VectorFormat vform, 809 LogicVRegister dst, 810 const LogicVRegister& src1, 811 const LogicVRegister& src2) { 812 SimVRegister temp1, temp2; 813 uzp1(vform, temp1, src1, src2); 814 uzp2(vform, temp2, src1, src2); 815 add(vform, dst, temp1, temp2); 816 return dst; 817} 818 819 820LogicVRegister Simulator::mla(VectorFormat vform, 821 LogicVRegister dst, 822 const LogicVRegister& src1, 823 const LogicVRegister& src2) { 824 SimVRegister temp; 825 mul(vform, temp, src1, src2); 826 add(vform, dst, dst, temp); 827 return dst; 828} 829 830 831LogicVRegister Simulator::mls(VectorFormat vform, 832 LogicVRegister dst, 833 const LogicVRegister& src1, 834 const LogicVRegister& src2) { 835 SimVRegister temp; 836 mul(vform, temp, src1, src2); 837 sub(vform, dst, dst, temp); 838 return dst; 839} 840 841 842LogicVRegister Simulator::mul(VectorFormat vform, 843 LogicVRegister dst, 844 const LogicVRegister& src1, 845 const LogicVRegister& src2) { 846 dst.ClearForWrite(vform); 847 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 848 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i)); 849 } 850 return dst; 851} 852 853 854LogicVRegister Simulator::mul(VectorFormat vform, 855 LogicVRegister dst, 856 const LogicVRegister& src1, 857 const LogicVRegister& src2, 858 int index) { 859 SimVRegister temp; 860 VectorFormat indexform = VectorFormatFillQ(vform); 861 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index)); 862} 863 864 865LogicVRegister Simulator::mla(VectorFormat vform, 866 LogicVRegister dst, 867 const LogicVRegister& src1, 868 const LogicVRegister& src2, 869 int index) { 870 SimVRegister temp; 871 VectorFormat indexform = VectorFormatFillQ(vform); 872 return mla(vform, dst, src1, dup_element(indexform, temp, src2, index)); 873} 874 875 876LogicVRegister Simulator::mls(VectorFormat vform, 877 LogicVRegister dst, 878 const LogicVRegister& src1, 879 const LogicVRegister& src2, 880 int index) { 881 SimVRegister temp; 882 VectorFormat indexform = VectorFormatFillQ(vform); 883 return mls(vform, dst, src1, dup_element(indexform, temp, src2, index)); 884} 885 886 887LogicVRegister Simulator::smull(VectorFormat vform, 888 LogicVRegister dst, 889 const LogicVRegister& src1, 890 const LogicVRegister& src2, 891 int index) { 892 SimVRegister temp; 893 VectorFormat indexform = 894 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 895 return smull(vform, dst, src1, dup_element(indexform, temp, src2, index)); 896} 897 898 899LogicVRegister Simulator::smull2(VectorFormat vform, 900 LogicVRegister dst, 901 const LogicVRegister& src1, 902 const LogicVRegister& src2, 903 int index) { 904 SimVRegister temp; 905 VectorFormat indexform = 906 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 907 return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 908} 909 910 911LogicVRegister Simulator::umull(VectorFormat vform, 912 LogicVRegister dst, 913 const LogicVRegister& src1, 914 const LogicVRegister& src2, 915 int index) { 916 SimVRegister temp; 917 VectorFormat indexform = 918 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 919 return umull(vform, dst, src1, dup_element(indexform, temp, src2, index)); 920} 921 922 923LogicVRegister Simulator::umull2(VectorFormat vform, 924 LogicVRegister dst, 925 const LogicVRegister& src1, 926 const LogicVRegister& src2, 927 int index) { 928 SimVRegister temp; 929 VectorFormat indexform = 930 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 931 return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 932} 933 934 935LogicVRegister Simulator::smlal(VectorFormat vform, 936 LogicVRegister dst, 937 const LogicVRegister& src1, 938 const LogicVRegister& src2, 939 int index) { 940 SimVRegister temp; 941 VectorFormat indexform = 942 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 943 return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); 944} 945 946 947LogicVRegister Simulator::smlal2(VectorFormat vform, 948 LogicVRegister dst, 949 const LogicVRegister& src1, 950 const LogicVRegister& src2, 951 int index) { 952 SimVRegister temp; 953 VectorFormat indexform = 954 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 955 return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 956} 957 958 959LogicVRegister Simulator::umlal(VectorFormat vform, 960 LogicVRegister dst, 961 const LogicVRegister& src1, 962 const LogicVRegister& src2, 963 int index) { 964 SimVRegister temp; 965 VectorFormat indexform = 966 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 967 return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); 968} 969 970 971LogicVRegister Simulator::umlal2(VectorFormat vform, 972 LogicVRegister dst, 973 const LogicVRegister& src1, 974 const LogicVRegister& src2, 975 int index) { 976 SimVRegister temp; 977 VectorFormat indexform = 978 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 979 return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 980} 981 982 983LogicVRegister Simulator::smlsl(VectorFormat vform, 984 LogicVRegister dst, 985 const LogicVRegister& src1, 986 const LogicVRegister& src2, 987 int index) { 988 SimVRegister temp; 989 VectorFormat indexform = 990 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 991 return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); 992} 993 994 995LogicVRegister Simulator::smlsl2(VectorFormat vform, 996 LogicVRegister dst, 997 const LogicVRegister& src1, 998 const LogicVRegister& src2, 999 int index) { 1000 SimVRegister temp; 1001 VectorFormat indexform = 1002 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1003 return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1004} 1005 1006 1007LogicVRegister Simulator::umlsl(VectorFormat vform, 1008 LogicVRegister dst, 1009 const LogicVRegister& src1, 1010 const LogicVRegister& src2, 1011 int index) { 1012 SimVRegister temp; 1013 VectorFormat indexform = 1014 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1015 return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1016} 1017 1018 1019LogicVRegister Simulator::umlsl2(VectorFormat vform, 1020 LogicVRegister dst, 1021 const LogicVRegister& src1, 1022 const LogicVRegister& src2, 1023 int index) { 1024 SimVRegister temp; 1025 VectorFormat indexform = 1026 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1027 return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1028} 1029 1030 1031LogicVRegister Simulator::sqdmull(VectorFormat vform, 1032 LogicVRegister dst, 1033 const LogicVRegister& src1, 1034 const LogicVRegister& src2, 1035 int index) { 1036 SimVRegister temp; 1037 VectorFormat indexform = 1038 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1039 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1040} 1041 1042 1043LogicVRegister Simulator::sqdmull2(VectorFormat vform, 1044 LogicVRegister dst, 1045 const LogicVRegister& src1, 1046 const LogicVRegister& src2, 1047 int index) { 1048 SimVRegister temp; 1049 VectorFormat indexform = 1050 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1051 return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1052} 1053 1054 1055LogicVRegister Simulator::sqdmlal(VectorFormat vform, 1056 LogicVRegister dst, 1057 const LogicVRegister& src1, 1058 const LogicVRegister& src2, 1059 int index) { 1060 SimVRegister temp; 1061 VectorFormat indexform = 1062 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1063 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1064} 1065 1066 1067LogicVRegister Simulator::sqdmlal2(VectorFormat vform, 1068 LogicVRegister dst, 1069 const LogicVRegister& src1, 1070 const LogicVRegister& src2, 1071 int index) { 1072 SimVRegister temp; 1073 VectorFormat indexform = 1074 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1075 return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1076} 1077 1078 1079LogicVRegister Simulator::sqdmlsl(VectorFormat vform, 1080 LogicVRegister dst, 1081 const LogicVRegister& src1, 1082 const LogicVRegister& src2, 1083 int index) { 1084 SimVRegister temp; 1085 VectorFormat indexform = 1086 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1087 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1088} 1089 1090 1091LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, 1092 LogicVRegister dst, 1093 const LogicVRegister& src1, 1094 const LogicVRegister& src2, 1095 int index) { 1096 SimVRegister temp; 1097 VectorFormat indexform = 1098 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1099 return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1100} 1101 1102 1103LogicVRegister Simulator::sqdmulh(VectorFormat vform, 1104 LogicVRegister dst, 1105 const LogicVRegister& src1, 1106 const LogicVRegister& src2, 1107 int index) { 1108 SimVRegister temp; 1109 VectorFormat indexform = VectorFormatFillQ(vform); 1110 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1111} 1112 1113 1114LogicVRegister Simulator::sqrdmulh(VectorFormat vform, 1115 LogicVRegister dst, 1116 const LogicVRegister& src1, 1117 const LogicVRegister& src2, 1118 int index) { 1119 SimVRegister temp; 1120 VectorFormat indexform = VectorFormatFillQ(vform); 1121 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1122} 1123 1124 1125uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) const { 1126 uint16_t result = 0; 1127 uint16_t extended_op2 = op2; 1128 for (int i = 0; i < 8; ++i) { 1129 if ((op1 >> i) & 1) { 1130 result = result ^ (extended_op2 << i); 1131 } 1132 } 1133 return result; 1134} 1135 1136 1137LogicVRegister Simulator::pmul(VectorFormat vform, 1138 LogicVRegister dst, 1139 const LogicVRegister& src1, 1140 const LogicVRegister& src2) { 1141 dst.ClearForWrite(vform); 1142 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1143 dst.SetUint(vform, 1144 i, 1145 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i))); 1146 } 1147 return dst; 1148} 1149 1150 1151LogicVRegister Simulator::pmull(VectorFormat vform, 1152 LogicVRegister dst, 1153 const LogicVRegister& src1, 1154 const LogicVRegister& src2) { 1155 VectorFormat vform_src = VectorFormatHalfWidth(vform); 1156 dst.ClearForWrite(vform); 1157 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1158 dst.SetUint(vform, 1159 i, 1160 PolynomialMult(src1.Uint(vform_src, i), 1161 src2.Uint(vform_src, i))); 1162 } 1163 return dst; 1164} 1165 1166 1167LogicVRegister Simulator::pmull2(VectorFormat vform, 1168 LogicVRegister dst, 1169 const LogicVRegister& src1, 1170 const LogicVRegister& src2) { 1171 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform); 1172 dst.ClearForWrite(vform); 1173 int lane_count = LaneCountFromFormat(vform); 1174 for (int i = 0; i < lane_count; i++) { 1175 dst.SetUint(vform, 1176 i, 1177 PolynomialMult(src1.Uint(vform_src, lane_count + i), 1178 src2.Uint(vform_src, lane_count + i))); 1179 } 1180 return dst; 1181} 1182 1183 1184LogicVRegister Simulator::sub(VectorFormat vform, 1185 LogicVRegister dst, 1186 const LogicVRegister& src1, 1187 const LogicVRegister& src2) { 1188 dst.ClearForWrite(vform); 1189 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1190 // Test for unsigned saturation. 1191 if (src2.Uint(vform, i) > src1.Uint(vform, i)) { 1192 dst.SetUnsignedSat(i, false); 1193 } 1194 1195 // Test for signed saturation. 1196 int64_t sa = src1.IntLeftJustified(vform, i); 1197 int64_t sb = src2.IntLeftJustified(vform, i); 1198 int64_t sr = sa - sb; 1199 // If the signs of the operands are different, and the sign of the first 1200 // operand doesn't match the result, there was an overflow. 1201 if (((sa >= 0) != (sb >= 0)) && ((sa >= 0) != (sr >= 0))) { 1202 dst.SetSignedSat(i, sr < 0); 1203 } 1204 1205 dst.SetInt(vform, i, src1.Int(vform, i) - src2.Int(vform, i)); 1206 } 1207 return dst; 1208} 1209 1210 1211LogicVRegister Simulator::and_(VectorFormat vform, 1212 LogicVRegister dst, 1213 const LogicVRegister& src1, 1214 const LogicVRegister& src2) { 1215 dst.ClearForWrite(vform); 1216 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1217 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i)); 1218 } 1219 return dst; 1220} 1221 1222 1223LogicVRegister Simulator::orr(VectorFormat vform, 1224 LogicVRegister dst, 1225 const LogicVRegister& src1, 1226 const LogicVRegister& src2) { 1227 dst.ClearForWrite(vform); 1228 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1229 dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i)); 1230 } 1231 return dst; 1232} 1233 1234 1235LogicVRegister Simulator::orn(VectorFormat vform, 1236 LogicVRegister dst, 1237 const LogicVRegister& src1, 1238 const LogicVRegister& src2) { 1239 dst.ClearForWrite(vform); 1240 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1241 dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i)); 1242 } 1243 return dst; 1244} 1245 1246 1247LogicVRegister Simulator::eor(VectorFormat vform, 1248 LogicVRegister dst, 1249 const LogicVRegister& src1, 1250 const LogicVRegister& src2) { 1251 dst.ClearForWrite(vform); 1252 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1253 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i)); 1254 } 1255 return dst; 1256} 1257 1258 1259LogicVRegister Simulator::bic(VectorFormat vform, 1260 LogicVRegister dst, 1261 const LogicVRegister& src1, 1262 const LogicVRegister& src2) { 1263 dst.ClearForWrite(vform); 1264 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1265 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i)); 1266 } 1267 return dst; 1268} 1269 1270 1271LogicVRegister Simulator::bic(VectorFormat vform, 1272 LogicVRegister dst, 1273 const LogicVRegister& src, 1274 uint64_t imm) { 1275 uint64_t result[16]; 1276 int laneCount = LaneCountFromFormat(vform); 1277 for (int i = 0; i < laneCount; ++i) { 1278 result[i] = src.Uint(vform, i) & ~imm; 1279 } 1280 dst.ClearForWrite(vform); 1281 for (int i = 0; i < laneCount; ++i) { 1282 dst.SetUint(vform, i, result[i]); 1283 } 1284 return dst; 1285} 1286 1287 1288LogicVRegister Simulator::bif(VectorFormat vform, 1289 LogicVRegister dst, 1290 const LogicVRegister& src1, 1291 const LogicVRegister& src2) { 1292 dst.ClearForWrite(vform); 1293 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1294 uint64_t operand1 = dst.Uint(vform, i); 1295 uint64_t operand2 = ~src2.Uint(vform, i); 1296 uint64_t operand3 = src1.Uint(vform, i); 1297 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); 1298 dst.SetUint(vform, i, result); 1299 } 1300 return dst; 1301} 1302 1303 1304LogicVRegister Simulator::bit(VectorFormat vform, 1305 LogicVRegister dst, 1306 const LogicVRegister& src1, 1307 const LogicVRegister& src2) { 1308 dst.ClearForWrite(vform); 1309 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1310 uint64_t operand1 = dst.Uint(vform, i); 1311 uint64_t operand2 = src2.Uint(vform, i); 1312 uint64_t operand3 = src1.Uint(vform, i); 1313 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); 1314 dst.SetUint(vform, i, result); 1315 } 1316 return dst; 1317} 1318 1319 1320LogicVRegister Simulator::bsl(VectorFormat vform, 1321 LogicVRegister dst, 1322 const LogicVRegister& src1, 1323 const LogicVRegister& src2) { 1324 dst.ClearForWrite(vform); 1325 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1326 uint64_t operand1 = src2.Uint(vform, i); 1327 uint64_t operand2 = dst.Uint(vform, i); 1328 uint64_t operand3 = src1.Uint(vform, i); 1329 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); 1330 dst.SetUint(vform, i, result); 1331 } 1332 return dst; 1333} 1334 1335 1336LogicVRegister Simulator::sminmax(VectorFormat vform, 1337 LogicVRegister dst, 1338 const LogicVRegister& src1, 1339 const LogicVRegister& src2, 1340 bool max) { 1341 dst.ClearForWrite(vform); 1342 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1343 int64_t src1_val = src1.Int(vform, i); 1344 int64_t src2_val = src2.Int(vform, i); 1345 int64_t dst_val; 1346 if (max == true) { 1347 dst_val = (src1_val > src2_val) ? src1_val : src2_val; 1348 } else { 1349 dst_val = (src1_val < src2_val) ? src1_val : src2_val; 1350 } 1351 dst.SetInt(vform, i, dst_val); 1352 } 1353 return dst; 1354} 1355 1356 1357LogicVRegister Simulator::smax(VectorFormat vform, 1358 LogicVRegister dst, 1359 const LogicVRegister& src1, 1360 const LogicVRegister& src2) { 1361 return sminmax(vform, dst, src1, src2, true); 1362} 1363 1364 1365LogicVRegister Simulator::smin(VectorFormat vform, 1366 LogicVRegister dst, 1367 const LogicVRegister& src1, 1368 const LogicVRegister& src2) { 1369 return sminmax(vform, dst, src1, src2, false); 1370} 1371 1372 1373LogicVRegister Simulator::sminmaxp(VectorFormat vform, 1374 LogicVRegister dst, 1375 int dst_index, 1376 const LogicVRegister& src, 1377 bool max) { 1378 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) { 1379 int64_t src1_val = src.Int(vform, i); 1380 int64_t src2_val = src.Int(vform, i + 1); 1381 int64_t dst_val; 1382 if (max == true) { 1383 dst_val = (src1_val > src2_val) ? src1_val : src2_val; 1384 } else { 1385 dst_val = (src1_val < src2_val) ? src1_val : src2_val; 1386 } 1387 dst.SetInt(vform, dst_index + (i >> 1), dst_val); 1388 } 1389 return dst; 1390} 1391 1392 1393LogicVRegister Simulator::smaxp(VectorFormat vform, 1394 LogicVRegister dst, 1395 const LogicVRegister& src1, 1396 const LogicVRegister& src2) { 1397 dst.ClearForWrite(vform); 1398 sminmaxp(vform, dst, 0, src1, true); 1399 sminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, true); 1400 return dst; 1401} 1402 1403 1404LogicVRegister Simulator::sminp(VectorFormat vform, 1405 LogicVRegister dst, 1406 const LogicVRegister& src1, 1407 const LogicVRegister& src2) { 1408 dst.ClearForWrite(vform); 1409 sminmaxp(vform, dst, 0, src1, false); 1410 sminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, false); 1411 return dst; 1412} 1413 1414 1415LogicVRegister Simulator::addp(VectorFormat vform, 1416 LogicVRegister dst, 1417 const LogicVRegister& src) { 1418 VIXL_ASSERT(vform == kFormatD); 1419 1420 int64_t dst_val = src.Int(kFormat2D, 0) + src.Int(kFormat2D, 1); 1421 dst.ClearForWrite(vform); 1422 dst.SetInt(vform, 0, dst_val); 1423 return dst; 1424} 1425 1426 1427LogicVRegister Simulator::addv(VectorFormat vform, 1428 LogicVRegister dst, 1429 const LogicVRegister& src) { 1430 VectorFormat vform_dst = 1431 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); 1432 1433 1434 int64_t dst_val = 0; 1435 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1436 dst_val += src.Int(vform, i); 1437 } 1438 1439 dst.ClearForWrite(vform_dst); 1440 dst.SetInt(vform_dst, 0, dst_val); 1441 return dst; 1442} 1443 1444 1445LogicVRegister Simulator::saddlv(VectorFormat vform, 1446 LogicVRegister dst, 1447 const LogicVRegister& src) { 1448 VectorFormat vform_dst = 1449 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); 1450 1451 int64_t dst_val = 0; 1452 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1453 dst_val += src.Int(vform, i); 1454 } 1455 1456 dst.ClearForWrite(vform_dst); 1457 dst.SetInt(vform_dst, 0, dst_val); 1458 return dst; 1459} 1460 1461 1462LogicVRegister Simulator::uaddlv(VectorFormat vform, 1463 LogicVRegister dst, 1464 const LogicVRegister& src) { 1465 VectorFormat vform_dst = 1466 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); 1467 1468 uint64_t dst_val = 0; 1469 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1470 dst_val += src.Uint(vform, i); 1471 } 1472 1473 dst.ClearForWrite(vform_dst); 1474 dst.SetUint(vform_dst, 0, dst_val); 1475 return dst; 1476} 1477 1478 1479LogicVRegister Simulator::sminmaxv(VectorFormat vform, 1480 LogicVRegister dst, 1481 const LogicVRegister& src, 1482 bool max) { 1483 dst.ClearForWrite(vform); 1484 int64_t dst_val = max ? INT64_MIN : INT64_MAX; 1485 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1486 dst.SetInt(vform, i, 0); 1487 int64_t src_val = src.Int(vform, i); 1488 if (max == true) { 1489 dst_val = (src_val > dst_val) ? src_val : dst_val; 1490 } else { 1491 dst_val = (src_val < dst_val) ? src_val : dst_val; 1492 } 1493 } 1494 dst.SetInt(vform, 0, dst_val); 1495 return dst; 1496} 1497 1498 1499LogicVRegister Simulator::smaxv(VectorFormat vform, 1500 LogicVRegister dst, 1501 const LogicVRegister& src) { 1502 sminmaxv(vform, dst, src, true); 1503 return dst; 1504} 1505 1506 1507LogicVRegister Simulator::sminv(VectorFormat vform, 1508 LogicVRegister dst, 1509 const LogicVRegister& src) { 1510 sminmaxv(vform, dst, src, false); 1511 return dst; 1512} 1513 1514 1515LogicVRegister Simulator::uminmax(VectorFormat vform, 1516 LogicVRegister dst, 1517 const LogicVRegister& src1, 1518 const LogicVRegister& src2, 1519 bool max) { 1520 dst.ClearForWrite(vform); 1521 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1522 uint64_t src1_val = src1.Uint(vform, i); 1523 uint64_t src2_val = src2.Uint(vform, i); 1524 uint64_t dst_val; 1525 if (max == true) { 1526 dst_val = (src1_val > src2_val) ? src1_val : src2_val; 1527 } else { 1528 dst_val = (src1_val < src2_val) ? src1_val : src2_val; 1529 } 1530 dst.SetUint(vform, i, dst_val); 1531 } 1532 return dst; 1533} 1534 1535 1536LogicVRegister Simulator::umax(VectorFormat vform, 1537 LogicVRegister dst, 1538 const LogicVRegister& src1, 1539 const LogicVRegister& src2) { 1540 return uminmax(vform, dst, src1, src2, true); 1541} 1542 1543 1544LogicVRegister Simulator::umin(VectorFormat vform, 1545 LogicVRegister dst, 1546 const LogicVRegister& src1, 1547 const LogicVRegister& src2) { 1548 return uminmax(vform, dst, src1, src2, false); 1549} 1550 1551 1552LogicVRegister Simulator::uminmaxp(VectorFormat vform, 1553 LogicVRegister dst, 1554 int dst_index, 1555 const LogicVRegister& src, 1556 bool max) { 1557 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) { 1558 uint64_t src1_val = src.Uint(vform, i); 1559 uint64_t src2_val = src.Uint(vform, i + 1); 1560 uint64_t dst_val; 1561 if (max == true) { 1562 dst_val = (src1_val > src2_val) ? src1_val : src2_val; 1563 } else { 1564 dst_val = (src1_val < src2_val) ? src1_val : src2_val; 1565 } 1566 dst.SetUint(vform, dst_index + (i >> 1), dst_val); 1567 } 1568 return dst; 1569} 1570 1571 1572LogicVRegister Simulator::umaxp(VectorFormat vform, 1573 LogicVRegister dst, 1574 const LogicVRegister& src1, 1575 const LogicVRegister& src2) { 1576 dst.ClearForWrite(vform); 1577 uminmaxp(vform, dst, 0, src1, true); 1578 uminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, true); 1579 return dst; 1580} 1581 1582 1583LogicVRegister Simulator::uminp(VectorFormat vform, 1584 LogicVRegister dst, 1585 const LogicVRegister& src1, 1586 const LogicVRegister& src2) { 1587 dst.ClearForWrite(vform); 1588 uminmaxp(vform, dst, 0, src1, false); 1589 uminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, false); 1590 return dst; 1591} 1592 1593 1594LogicVRegister Simulator::uminmaxv(VectorFormat vform, 1595 LogicVRegister dst, 1596 const LogicVRegister& src, 1597 bool max) { 1598 dst.ClearForWrite(vform); 1599 uint64_t dst_val = max ? 0 : UINT64_MAX; 1600 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1601 dst.SetUint(vform, i, 0); 1602 uint64_t src_val = src.Uint(vform, i); 1603 if (max == true) { 1604 dst_val = (src_val > dst_val) ? src_val : dst_val; 1605 } else { 1606 dst_val = (src_val < dst_val) ? src_val : dst_val; 1607 } 1608 } 1609 dst.SetUint(vform, 0, dst_val); 1610 return dst; 1611} 1612 1613 1614LogicVRegister Simulator::umaxv(VectorFormat vform, 1615 LogicVRegister dst, 1616 const LogicVRegister& src) { 1617 uminmaxv(vform, dst, src, true); 1618 return dst; 1619} 1620 1621 1622LogicVRegister Simulator::uminv(VectorFormat vform, 1623 LogicVRegister dst, 1624 const LogicVRegister& src) { 1625 uminmaxv(vform, dst, src, false); 1626 return dst; 1627} 1628 1629 1630LogicVRegister Simulator::shl(VectorFormat vform, 1631 LogicVRegister dst, 1632 const LogicVRegister& src, 1633 int shift) { 1634 VIXL_ASSERT(shift >= 0); 1635 SimVRegister temp; 1636 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1637 return ushl(vform, dst, src, shiftreg); 1638} 1639 1640 1641LogicVRegister Simulator::sshll(VectorFormat vform, 1642 LogicVRegister dst, 1643 const LogicVRegister& src, 1644 int shift) { 1645 VIXL_ASSERT(shift >= 0); 1646 SimVRegister temp1, temp2; 1647 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1648 LogicVRegister extendedreg = sxtl(vform, temp2, src); 1649 return sshl(vform, dst, extendedreg, shiftreg); 1650} 1651 1652 1653LogicVRegister Simulator::sshll2(VectorFormat vform, 1654 LogicVRegister dst, 1655 const LogicVRegister& src, 1656 int shift) { 1657 VIXL_ASSERT(shift >= 0); 1658 SimVRegister temp1, temp2; 1659 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1660 LogicVRegister extendedreg = sxtl2(vform, temp2, src); 1661 return sshl(vform, dst, extendedreg, shiftreg); 1662} 1663 1664 1665LogicVRegister Simulator::shll(VectorFormat vform, 1666 LogicVRegister dst, 1667 const LogicVRegister& src) { 1668 int shift = LaneSizeInBitsFromFormat(vform) / 2; 1669 return sshll(vform, dst, src, shift); 1670} 1671 1672 1673LogicVRegister Simulator::shll2(VectorFormat vform, 1674 LogicVRegister dst, 1675 const LogicVRegister& src) { 1676 int shift = LaneSizeInBitsFromFormat(vform) / 2; 1677 return sshll2(vform, dst, src, shift); 1678} 1679 1680 1681LogicVRegister Simulator::ushll(VectorFormat vform, 1682 LogicVRegister dst, 1683 const LogicVRegister& src, 1684 int shift) { 1685 VIXL_ASSERT(shift >= 0); 1686 SimVRegister temp1, temp2; 1687 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1688 LogicVRegister extendedreg = uxtl(vform, temp2, src); 1689 return ushl(vform, dst, extendedreg, shiftreg); 1690} 1691 1692 1693LogicVRegister Simulator::ushll2(VectorFormat vform, 1694 LogicVRegister dst, 1695 const LogicVRegister& src, 1696 int shift) { 1697 VIXL_ASSERT(shift >= 0); 1698 SimVRegister temp1, temp2; 1699 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1700 LogicVRegister extendedreg = uxtl2(vform, temp2, src); 1701 return ushl(vform, dst, extendedreg, shiftreg); 1702} 1703 1704 1705LogicVRegister Simulator::sli(VectorFormat vform, 1706 LogicVRegister dst, 1707 const LogicVRegister& src, 1708 int shift) { 1709 dst.ClearForWrite(vform); 1710 int laneCount = LaneCountFromFormat(vform); 1711 for (int i = 0; i < laneCount; i++) { 1712 uint64_t src_lane = src.Uint(vform, i); 1713 uint64_t dst_lane = dst.Uint(vform, i); 1714 uint64_t shifted = src_lane << shift; 1715 uint64_t mask = MaxUintFromFormat(vform) << shift; 1716 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); 1717 } 1718 return dst; 1719} 1720 1721 1722LogicVRegister Simulator::sqshl(VectorFormat vform, 1723 LogicVRegister dst, 1724 const LogicVRegister& src, 1725 int shift) { 1726 VIXL_ASSERT(shift >= 0); 1727 SimVRegister temp; 1728 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1729 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform); 1730} 1731 1732 1733LogicVRegister Simulator::uqshl(VectorFormat vform, 1734 LogicVRegister dst, 1735 const LogicVRegister& src, 1736 int shift) { 1737 VIXL_ASSERT(shift >= 0); 1738 SimVRegister temp; 1739 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1740 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform); 1741} 1742 1743 1744LogicVRegister Simulator::sqshlu(VectorFormat vform, 1745 LogicVRegister dst, 1746 const LogicVRegister& src, 1747 int shift) { 1748 VIXL_ASSERT(shift >= 0); 1749 SimVRegister temp; 1750 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1751 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform); 1752} 1753 1754 1755LogicVRegister Simulator::sri(VectorFormat vform, 1756 LogicVRegister dst, 1757 const LogicVRegister& src, 1758 int shift) { 1759 dst.ClearForWrite(vform); 1760 int laneCount = LaneCountFromFormat(vform); 1761 VIXL_ASSERT((shift > 0) && 1762 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform)))); 1763 for (int i = 0; i < laneCount; i++) { 1764 uint64_t src_lane = src.Uint(vform, i); 1765 uint64_t dst_lane = dst.Uint(vform, i); 1766 uint64_t shifted; 1767 uint64_t mask; 1768 if (shift == 64) { 1769 shifted = 0; 1770 mask = 0; 1771 } else { 1772 shifted = src_lane >> shift; 1773 mask = MaxUintFromFormat(vform) >> shift; 1774 } 1775 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); 1776 } 1777 return dst; 1778} 1779 1780 1781LogicVRegister Simulator::ushr(VectorFormat vform, 1782 LogicVRegister dst, 1783 const LogicVRegister& src, 1784 int shift) { 1785 VIXL_ASSERT(shift >= 0); 1786 SimVRegister temp; 1787 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); 1788 return ushl(vform, dst, src, shiftreg); 1789} 1790 1791 1792LogicVRegister Simulator::sshr(VectorFormat vform, 1793 LogicVRegister dst, 1794 const LogicVRegister& src, 1795 int shift) { 1796 VIXL_ASSERT(shift >= 0); 1797 SimVRegister temp; 1798 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); 1799 return sshl(vform, dst, src, shiftreg); 1800} 1801 1802 1803LogicVRegister Simulator::ssra(VectorFormat vform, 1804 LogicVRegister dst, 1805 const LogicVRegister& src, 1806 int shift) { 1807 SimVRegister temp; 1808 LogicVRegister shifted_reg = sshr(vform, temp, src, shift); 1809 return add(vform, dst, dst, shifted_reg); 1810} 1811 1812 1813LogicVRegister Simulator::usra(VectorFormat vform, 1814 LogicVRegister dst, 1815 const LogicVRegister& src, 1816 int shift) { 1817 SimVRegister temp; 1818 LogicVRegister shifted_reg = ushr(vform, temp, src, shift); 1819 return add(vform, dst, dst, shifted_reg); 1820} 1821 1822 1823LogicVRegister Simulator::srsra(VectorFormat vform, 1824 LogicVRegister dst, 1825 const LogicVRegister& src, 1826 int shift) { 1827 SimVRegister temp; 1828 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform); 1829 return add(vform, dst, dst, shifted_reg); 1830} 1831 1832 1833LogicVRegister Simulator::ursra(VectorFormat vform, 1834 LogicVRegister dst, 1835 const LogicVRegister& src, 1836 int shift) { 1837 SimVRegister temp; 1838 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform); 1839 return add(vform, dst, dst, shifted_reg); 1840} 1841 1842 1843LogicVRegister Simulator::cls(VectorFormat vform, 1844 LogicVRegister dst, 1845 const LogicVRegister& src) { 1846 uint64_t result[16]; 1847 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 1848 int laneCount = LaneCountFromFormat(vform); 1849 for (int i = 0; i < laneCount; i++) { 1850 result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits); 1851 } 1852 1853 dst.ClearForWrite(vform); 1854 for (int i = 0; i < laneCount; ++i) { 1855 dst.SetUint(vform, i, result[i]); 1856 } 1857 return dst; 1858} 1859 1860 1861LogicVRegister Simulator::clz(VectorFormat vform, 1862 LogicVRegister dst, 1863 const LogicVRegister& src) { 1864 uint64_t result[16]; 1865 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 1866 int laneCount = LaneCountFromFormat(vform); 1867 for (int i = 0; i < laneCount; i++) { 1868 result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits); 1869 } 1870 1871 dst.ClearForWrite(vform); 1872 for (int i = 0; i < laneCount; ++i) { 1873 dst.SetUint(vform, i, result[i]); 1874 } 1875 return dst; 1876} 1877 1878 1879LogicVRegister Simulator::cnt(VectorFormat vform, 1880 LogicVRegister dst, 1881 const LogicVRegister& src) { 1882 uint64_t result[16]; 1883 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 1884 int laneCount = LaneCountFromFormat(vform); 1885 for (int i = 0; i < laneCount; i++) { 1886 uint64_t value = src.Uint(vform, i); 1887 result[i] = 0; 1888 for (int j = 0; j < laneSizeInBits; j++) { 1889 result[i] += (value & 1); 1890 value >>= 1; 1891 } 1892 } 1893 1894 dst.ClearForWrite(vform); 1895 for (int i = 0; i < laneCount; ++i) { 1896 dst.SetUint(vform, i, result[i]); 1897 } 1898 return dst; 1899} 1900 1901 1902LogicVRegister Simulator::sshl(VectorFormat vform, 1903 LogicVRegister dst, 1904 const LogicVRegister& src1, 1905 const LogicVRegister& src2) { 1906 dst.ClearForWrite(vform); 1907 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1908 int8_t shift_val = src2.Int(vform, i); 1909 int64_t lj_src_val = src1.IntLeftJustified(vform, i); 1910 1911 // Set signed saturation state. 1912 if ((shift_val > CountLeadingSignBits(lj_src_val)) && (lj_src_val != 0)) { 1913 dst.SetSignedSat(i, lj_src_val >= 0); 1914 } 1915 1916 // Set unsigned saturation state. 1917 if (lj_src_val < 0) { 1918 dst.SetUnsignedSat(i, false); 1919 } else if ((shift_val > CountLeadingZeros(lj_src_val)) && 1920 (lj_src_val != 0)) { 1921 dst.SetUnsignedSat(i, true); 1922 } 1923 1924 int64_t src_val = src1.Int(vform, i); 1925 if (shift_val > 63) { 1926 dst.SetInt(vform, i, 0); 1927 } else if (shift_val < -63) { 1928 dst.SetRounding(i, src_val < 0); 1929 dst.SetInt(vform, i, (src_val < 0) ? -1 : 0); 1930 } else { 1931 if (shift_val < 0) { 1932 // Set rounding state. Rounding only needed on right shifts. 1933 if (((src_val >> (-shift_val - 1)) & 1) == 1) { 1934 dst.SetRounding(i, true); 1935 } 1936 src_val >>= -shift_val; 1937 } else { 1938 src_val <<= shift_val; 1939 } 1940 dst.SetInt(vform, i, src_val); 1941 } 1942 } 1943 return dst; 1944} 1945 1946 1947LogicVRegister Simulator::ushl(VectorFormat vform, 1948 LogicVRegister dst, 1949 const LogicVRegister& src1, 1950 const LogicVRegister& src2) { 1951 dst.ClearForWrite(vform); 1952 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1953 int8_t shift_val = src2.Int(vform, i); 1954 uint64_t lj_src_val = src1.UintLeftJustified(vform, i); 1955 1956 // Set saturation state. 1957 if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) { 1958 dst.SetUnsignedSat(i, true); 1959 } 1960 1961 uint64_t src_val = src1.Uint(vform, i); 1962 if ((shift_val > 63) || (shift_val < -64)) { 1963 dst.SetUint(vform, i, 0); 1964 } else { 1965 if (shift_val < 0) { 1966 // Set rounding state. Rounding only needed on right shifts. 1967 if (((src_val >> (-shift_val - 1)) & 1) == 1) { 1968 dst.SetRounding(i, true); 1969 } 1970 1971 if (shift_val == -64) { 1972 src_val = 0; 1973 } else { 1974 src_val >>= -shift_val; 1975 } 1976 } else { 1977 src_val <<= shift_val; 1978 } 1979 dst.SetUint(vform, i, src_val); 1980 } 1981 } 1982 return dst; 1983} 1984 1985 1986LogicVRegister Simulator::neg(VectorFormat vform, 1987 LogicVRegister dst, 1988 const LogicVRegister& src) { 1989 dst.ClearForWrite(vform); 1990 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1991 // Test for signed saturation. 1992 int64_t sa = src.Int(vform, i); 1993 if (sa == MinIntFromFormat(vform)) { 1994 dst.SetSignedSat(i, true); 1995 } 1996 dst.SetInt(vform, i, -sa); 1997 } 1998 return dst; 1999} 2000 2001 2002LogicVRegister Simulator::suqadd(VectorFormat vform, 2003 LogicVRegister dst, 2004 const LogicVRegister& src) { 2005 dst.ClearForWrite(vform); 2006 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2007 int64_t sa = dst.IntLeftJustified(vform, i); 2008 uint64_t ub = src.UintLeftJustified(vform, i); 2009 int64_t sr = sa + ub; 2010 2011 if (sr < sa) { // Test for signed positive saturation. 2012 dst.SetInt(vform, i, MaxIntFromFormat(vform)); 2013 } else { 2014 dst.SetInt(vform, i, dst.Int(vform, i) + src.Int(vform, i)); 2015 } 2016 } 2017 return dst; 2018} 2019 2020 2021LogicVRegister Simulator::usqadd(VectorFormat vform, 2022 LogicVRegister dst, 2023 const LogicVRegister& src) { 2024 dst.ClearForWrite(vform); 2025 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2026 uint64_t ua = dst.UintLeftJustified(vform, i); 2027 int64_t sb = src.IntLeftJustified(vform, i); 2028 uint64_t ur = ua + sb; 2029 2030 if ((sb > 0) && (ur <= ua)) { 2031 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation. 2032 } else if ((sb < 0) && (ur >= ua)) { 2033 dst.SetUint(vform, i, 0); // Negative saturation. 2034 } else { 2035 dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i)); 2036 } 2037 } 2038 return dst; 2039} 2040 2041 2042LogicVRegister Simulator::abs(VectorFormat vform, 2043 LogicVRegister dst, 2044 const LogicVRegister& src) { 2045 dst.ClearForWrite(vform); 2046 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2047 // Test for signed saturation. 2048 int64_t sa = src.Int(vform, i); 2049 if (sa == MinIntFromFormat(vform)) { 2050 dst.SetSignedSat(i, true); 2051 } 2052 if (sa < 0) { 2053 dst.SetInt(vform, i, -sa); 2054 } else { 2055 dst.SetInt(vform, i, sa); 2056 } 2057 } 2058 return dst; 2059} 2060 2061 2062LogicVRegister Simulator::extractnarrow(VectorFormat dstform, 2063 LogicVRegister dst, 2064 bool dstIsSigned, 2065 const LogicVRegister& src, 2066 bool srcIsSigned) { 2067 bool upperhalf = false; 2068 VectorFormat srcform = kFormatUndefined; 2069 int64_t ssrc[8]; 2070 uint64_t usrc[8]; 2071 2072 switch (dstform) { 2073 case kFormat8B: 2074 upperhalf = false; 2075 srcform = kFormat8H; 2076 break; 2077 case kFormat16B: 2078 upperhalf = true; 2079 srcform = kFormat8H; 2080 break; 2081 case kFormat4H: 2082 upperhalf = false; 2083 srcform = kFormat4S; 2084 break; 2085 case kFormat8H: 2086 upperhalf = true; 2087 srcform = kFormat4S; 2088 break; 2089 case kFormat2S: 2090 upperhalf = false; 2091 srcform = kFormat2D; 2092 break; 2093 case kFormat4S: 2094 upperhalf = true; 2095 srcform = kFormat2D; 2096 break; 2097 case kFormatB: 2098 upperhalf = false; 2099 srcform = kFormatH; 2100 break; 2101 case kFormatH: 2102 upperhalf = false; 2103 srcform = kFormatS; 2104 break; 2105 case kFormatS: 2106 upperhalf = false; 2107 srcform = kFormatD; 2108 break; 2109 default: 2110 VIXL_UNIMPLEMENTED(); 2111 } 2112 2113 for (int i = 0; i < LaneCountFromFormat(srcform); i++) { 2114 ssrc[i] = src.Int(srcform, i); 2115 usrc[i] = src.Uint(srcform, i); 2116 } 2117 2118 int offset; 2119 if (upperhalf) { 2120 offset = LaneCountFromFormat(dstform) / 2; 2121 } else { 2122 offset = 0; 2123 dst.ClearForWrite(dstform); 2124 } 2125 2126 for (int i = 0; i < LaneCountFromFormat(srcform); i++) { 2127 // Test for signed saturation 2128 if (ssrc[i] > MaxIntFromFormat(dstform)) { 2129 dst.SetSignedSat(offset + i, true); 2130 } else if (ssrc[i] < MinIntFromFormat(dstform)) { 2131 dst.SetSignedSat(offset + i, false); 2132 } 2133 2134 // Test for unsigned saturation 2135 if (srcIsSigned) { 2136 if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) { 2137 dst.SetUnsignedSat(offset + i, true); 2138 } else if (ssrc[i] < 0) { 2139 dst.SetUnsignedSat(offset + i, false); 2140 } 2141 } else { 2142 if (usrc[i] > MaxUintFromFormat(dstform)) { 2143 dst.SetUnsignedSat(offset + i, true); 2144 } 2145 } 2146 2147 int64_t result; 2148 if (srcIsSigned) { 2149 result = ssrc[i] & MaxUintFromFormat(dstform); 2150 } else { 2151 result = usrc[i] & MaxUintFromFormat(dstform); 2152 } 2153 2154 if (dstIsSigned) { 2155 dst.SetInt(dstform, offset + i, result); 2156 } else { 2157 dst.SetUint(dstform, offset + i, result); 2158 } 2159 } 2160 return dst; 2161} 2162 2163 2164LogicVRegister Simulator::xtn(VectorFormat vform, 2165 LogicVRegister dst, 2166 const LogicVRegister& src) { 2167 return extractnarrow(vform, dst, true, src, true); 2168} 2169 2170 2171LogicVRegister Simulator::sqxtn(VectorFormat vform, 2172 LogicVRegister dst, 2173 const LogicVRegister& src) { 2174 return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform); 2175} 2176 2177 2178LogicVRegister Simulator::sqxtun(VectorFormat vform, 2179 LogicVRegister dst, 2180 const LogicVRegister& src) { 2181 return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform); 2182} 2183 2184 2185LogicVRegister Simulator::uqxtn(VectorFormat vform, 2186 LogicVRegister dst, 2187 const LogicVRegister& src) { 2188 return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform); 2189} 2190 2191 2192LogicVRegister Simulator::absdiff(VectorFormat vform, 2193 LogicVRegister dst, 2194 const LogicVRegister& src1, 2195 const LogicVRegister& src2, 2196 bool issigned) { 2197 dst.ClearForWrite(vform); 2198 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2199 if (issigned) { 2200 int64_t sr = src1.Int(vform, i) - src2.Int(vform, i); 2201 sr = sr > 0 ? sr : -sr; 2202 dst.SetInt(vform, i, sr); 2203 } else { 2204 int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i); 2205 sr = sr > 0 ? sr : -sr; 2206 dst.SetUint(vform, i, sr); 2207 } 2208 } 2209 return dst; 2210} 2211 2212 2213LogicVRegister Simulator::saba(VectorFormat vform, 2214 LogicVRegister dst, 2215 const LogicVRegister& src1, 2216 const LogicVRegister& src2) { 2217 SimVRegister temp; 2218 dst.ClearForWrite(vform); 2219 absdiff(vform, temp, src1, src2, true); 2220 add(vform, dst, dst, temp); 2221 return dst; 2222} 2223 2224 2225LogicVRegister Simulator::uaba(VectorFormat vform, 2226 LogicVRegister dst, 2227 const LogicVRegister& src1, 2228 const LogicVRegister& src2) { 2229 SimVRegister temp; 2230 dst.ClearForWrite(vform); 2231 absdiff(vform, temp, src1, src2, false); 2232 add(vform, dst, dst, temp); 2233 return dst; 2234} 2235 2236 2237LogicVRegister Simulator::not_(VectorFormat vform, 2238 LogicVRegister dst, 2239 const LogicVRegister& src) { 2240 dst.ClearForWrite(vform); 2241 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2242 dst.SetUint(vform, i, ~src.Uint(vform, i)); 2243 } 2244 return dst; 2245} 2246 2247 2248LogicVRegister Simulator::rbit(VectorFormat vform, 2249 LogicVRegister dst, 2250 const LogicVRegister& src) { 2251 uint64_t result[16]; 2252 int laneCount = LaneCountFromFormat(vform); 2253 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 2254 uint64_t reversed_value; 2255 uint64_t value; 2256 for (int i = 0; i < laneCount; i++) { 2257 value = src.Uint(vform, i); 2258 reversed_value = 0; 2259 for (int j = 0; j < laneSizeInBits; j++) { 2260 reversed_value = (reversed_value << 1) | (value & 1); 2261 value >>= 1; 2262 } 2263 result[i] = reversed_value; 2264 } 2265 2266 dst.ClearForWrite(vform); 2267 for (int i = 0; i < laneCount; ++i) { 2268 dst.SetUint(vform, i, result[i]); 2269 } 2270 return dst; 2271} 2272 2273 2274LogicVRegister Simulator::rev(VectorFormat vform, 2275 LogicVRegister dst, 2276 const LogicVRegister& src, 2277 int revSize) { 2278 uint64_t result[16]; 2279 int laneCount = LaneCountFromFormat(vform); 2280 int laneSize = LaneSizeInBytesFromFormat(vform); 2281 int lanesPerLoop = revSize / laneSize; 2282 for (int i = 0; i < laneCount; i += lanesPerLoop) { 2283 for (int j = 0; j < lanesPerLoop; j++) { 2284 result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j); 2285 } 2286 } 2287 dst.ClearForWrite(vform); 2288 for (int i = 0; i < laneCount; ++i) { 2289 dst.SetUint(vform, i, result[i]); 2290 } 2291 return dst; 2292} 2293 2294 2295LogicVRegister Simulator::rev16(VectorFormat vform, 2296 LogicVRegister dst, 2297 const LogicVRegister& src) { 2298 return rev(vform, dst, src, 2); 2299} 2300 2301 2302LogicVRegister Simulator::rev32(VectorFormat vform, 2303 LogicVRegister dst, 2304 const LogicVRegister& src) { 2305 return rev(vform, dst, src, 4); 2306} 2307 2308 2309LogicVRegister Simulator::rev64(VectorFormat vform, 2310 LogicVRegister dst, 2311 const LogicVRegister& src) { 2312 return rev(vform, dst, src, 8); 2313} 2314 2315 2316LogicVRegister Simulator::addlp(VectorFormat vform, 2317 LogicVRegister dst, 2318 const LogicVRegister& src, 2319 bool is_signed, 2320 bool do_accumulate) { 2321 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform); 2322 2323 int64_t sr[16]; 2324 uint64_t ur[16]; 2325 2326 int laneCount = LaneCountFromFormat(vform); 2327 for (int i = 0; i < laneCount; ++i) { 2328 if (is_signed) { 2329 sr[i] = src.Int(vformsrc, 2 * i) + src.Int(vformsrc, 2 * i + 1); 2330 } else { 2331 ur[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1); 2332 } 2333 } 2334 2335 dst.ClearForWrite(vform); 2336 for (int i = 0; i < laneCount; ++i) { 2337 if (do_accumulate) { 2338 if (is_signed) { 2339 dst.SetInt(vform, i, dst.Int(vform, i) + sr[i]); 2340 } else { 2341 dst.SetUint(vform, i, dst.Uint(vform, i) + ur[i]); 2342 } 2343 } else { 2344 if (is_signed) { 2345 dst.SetInt(vform, i, sr[i]); 2346 } else { 2347 dst.SetUint(vform, i, ur[i]); 2348 } 2349 } 2350 } 2351 2352 return dst; 2353} 2354 2355 2356LogicVRegister Simulator::saddlp(VectorFormat vform, 2357 LogicVRegister dst, 2358 const LogicVRegister& src) { 2359 return addlp(vform, dst, src, true, false); 2360} 2361 2362 2363LogicVRegister Simulator::uaddlp(VectorFormat vform, 2364 LogicVRegister dst, 2365 const LogicVRegister& src) { 2366 return addlp(vform, dst, src, false, false); 2367} 2368 2369 2370LogicVRegister Simulator::sadalp(VectorFormat vform, 2371 LogicVRegister dst, 2372 const LogicVRegister& src) { 2373 return addlp(vform, dst, src, true, true); 2374} 2375 2376 2377LogicVRegister Simulator::uadalp(VectorFormat vform, 2378 LogicVRegister dst, 2379 const LogicVRegister& src) { 2380 return addlp(vform, dst, src, false, true); 2381} 2382 2383 2384LogicVRegister Simulator::ext(VectorFormat vform, 2385 LogicVRegister dst, 2386 const LogicVRegister& src1, 2387 const LogicVRegister& src2, 2388 int index) { 2389 uint8_t result[16]; 2390 int laneCount = LaneCountFromFormat(vform); 2391 for (int i = 0; i < laneCount - index; ++i) { 2392 result[i] = src1.Uint(vform, i + index); 2393 } 2394 for (int i = 0; i < index; ++i) { 2395 result[laneCount - index + i] = src2.Uint(vform, i); 2396 } 2397 dst.ClearForWrite(vform); 2398 for (int i = 0; i < laneCount; ++i) { 2399 dst.SetUint(vform, i, result[i]); 2400 } 2401 return dst; 2402} 2403 2404 2405LogicVRegister Simulator::dup_element(VectorFormat vform, 2406 LogicVRegister dst, 2407 const LogicVRegister& src, 2408 int src_index) { 2409 int laneCount = LaneCountFromFormat(vform); 2410 uint64_t value = src.Uint(vform, src_index); 2411 dst.ClearForWrite(vform); 2412 for (int i = 0; i < laneCount; ++i) { 2413 dst.SetUint(vform, i, value); 2414 } 2415 return dst; 2416} 2417 2418 2419LogicVRegister Simulator::dup_immediate(VectorFormat vform, 2420 LogicVRegister dst, 2421 uint64_t imm) { 2422 int laneCount = LaneCountFromFormat(vform); 2423 uint64_t value = imm & MaxUintFromFormat(vform); 2424 dst.ClearForWrite(vform); 2425 for (int i = 0; i < laneCount; ++i) { 2426 dst.SetUint(vform, i, value); 2427 } 2428 return dst; 2429} 2430 2431 2432LogicVRegister Simulator::ins_element(VectorFormat vform, 2433 LogicVRegister dst, 2434 int dst_index, 2435 const LogicVRegister& src, 2436 int src_index) { 2437 dst.SetUint(vform, dst_index, src.Uint(vform, src_index)); 2438 return dst; 2439} 2440 2441 2442LogicVRegister Simulator::ins_immediate(VectorFormat vform, 2443 LogicVRegister dst, 2444 int dst_index, 2445 uint64_t imm) { 2446 uint64_t value = imm & MaxUintFromFormat(vform); 2447 dst.SetUint(vform, dst_index, value); 2448 return dst; 2449} 2450 2451 2452LogicVRegister Simulator::movi(VectorFormat vform, 2453 LogicVRegister dst, 2454 uint64_t imm) { 2455 int laneCount = LaneCountFromFormat(vform); 2456 dst.ClearForWrite(vform); 2457 for (int i = 0; i < laneCount; ++i) { 2458 dst.SetUint(vform, i, imm); 2459 } 2460 return dst; 2461} 2462 2463 2464LogicVRegister Simulator::mvni(VectorFormat vform, 2465 LogicVRegister dst, 2466 uint64_t imm) { 2467 int laneCount = LaneCountFromFormat(vform); 2468 dst.ClearForWrite(vform); 2469 for (int i = 0; i < laneCount; ++i) { 2470 dst.SetUint(vform, i, ~imm); 2471 } 2472 return dst; 2473} 2474 2475 2476LogicVRegister Simulator::orr(VectorFormat vform, 2477 LogicVRegister dst, 2478 const LogicVRegister& src, 2479 uint64_t imm) { 2480 uint64_t result[16]; 2481 int laneCount = LaneCountFromFormat(vform); 2482 for (int i = 0; i < laneCount; ++i) { 2483 result[i] = src.Uint(vform, i) | imm; 2484 } 2485 dst.ClearForWrite(vform); 2486 for (int i = 0; i < laneCount; ++i) { 2487 dst.SetUint(vform, i, result[i]); 2488 } 2489 return dst; 2490} 2491 2492 2493LogicVRegister Simulator::uxtl(VectorFormat vform, 2494 LogicVRegister dst, 2495 const LogicVRegister& src) { 2496 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2497 2498 dst.ClearForWrite(vform); 2499 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2500 dst.SetUint(vform, i, src.Uint(vform_half, i)); 2501 } 2502 return dst; 2503} 2504 2505 2506LogicVRegister Simulator::sxtl(VectorFormat vform, 2507 LogicVRegister dst, 2508 const LogicVRegister& src) { 2509 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2510 2511 dst.ClearForWrite(vform); 2512 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2513 dst.SetInt(vform, i, src.Int(vform_half, i)); 2514 } 2515 return dst; 2516} 2517 2518 2519LogicVRegister Simulator::uxtl2(VectorFormat vform, 2520 LogicVRegister dst, 2521 const LogicVRegister& src) { 2522 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2523 int lane_count = LaneCountFromFormat(vform); 2524 2525 dst.ClearForWrite(vform); 2526 for (int i = 0; i < lane_count; i++) { 2527 dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i)); 2528 } 2529 return dst; 2530} 2531 2532 2533LogicVRegister Simulator::sxtl2(VectorFormat vform, 2534 LogicVRegister dst, 2535 const LogicVRegister& src) { 2536 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2537 int lane_count = LaneCountFromFormat(vform); 2538 2539 dst.ClearForWrite(vform); 2540 for (int i = 0; i < lane_count; i++) { 2541 dst.SetInt(vform, i, src.Int(vform_half, lane_count + i)); 2542 } 2543 return dst; 2544} 2545 2546 2547LogicVRegister Simulator::shrn(VectorFormat vform, 2548 LogicVRegister dst, 2549 const LogicVRegister& src, 2550 int shift) { 2551 SimVRegister temp; 2552 VectorFormat vform_src = VectorFormatDoubleWidth(vform); 2553 VectorFormat vform_dst = vform; 2554 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift); 2555 return extractnarrow(vform_dst, dst, false, shifted_src, false); 2556} 2557 2558 2559LogicVRegister Simulator::shrn2(VectorFormat vform, 2560 LogicVRegister dst, 2561 const LogicVRegister& src, 2562 int shift) { 2563 SimVRegister temp; 2564 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2565 VectorFormat vformdst = vform; 2566 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift); 2567 return extractnarrow(vformdst, dst, false, shifted_src, false); 2568} 2569 2570 2571LogicVRegister Simulator::rshrn(VectorFormat vform, 2572 LogicVRegister dst, 2573 const LogicVRegister& src, 2574 int shift) { 2575 SimVRegister temp; 2576 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2577 VectorFormat vformdst = vform; 2578 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); 2579 return extractnarrow(vformdst, dst, false, shifted_src, false); 2580} 2581 2582 2583LogicVRegister Simulator::rshrn2(VectorFormat vform, 2584 LogicVRegister dst, 2585 const LogicVRegister& src, 2586 int shift) { 2587 SimVRegister temp; 2588 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2589 VectorFormat vformdst = vform; 2590 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); 2591 return extractnarrow(vformdst, dst, false, shifted_src, false); 2592} 2593 2594 2595LogicVRegister Simulator::tbl(VectorFormat vform, 2596 LogicVRegister dst, 2597 const LogicVRegister& tab, 2598 const LogicVRegister& ind) { 2599 movi(vform, dst, 0); 2600 return tbx(vform, dst, tab, ind); 2601} 2602 2603 2604LogicVRegister Simulator::tbl(VectorFormat vform, 2605 LogicVRegister dst, 2606 const LogicVRegister& tab, 2607 const LogicVRegister& tab2, 2608 const LogicVRegister& ind) { 2609 movi(vform, dst, 0); 2610 return tbx(vform, dst, tab, tab2, ind); 2611} 2612 2613 2614LogicVRegister Simulator::tbl(VectorFormat vform, 2615 LogicVRegister dst, 2616 const LogicVRegister& tab, 2617 const LogicVRegister& tab2, 2618 const LogicVRegister& tab3, 2619 const LogicVRegister& ind) { 2620 movi(vform, dst, 0); 2621 return tbx(vform, dst, tab, tab2, tab3, ind); 2622} 2623 2624 2625LogicVRegister Simulator::tbl(VectorFormat vform, 2626 LogicVRegister dst, 2627 const LogicVRegister& tab, 2628 const LogicVRegister& tab2, 2629 const LogicVRegister& tab3, 2630 const LogicVRegister& tab4, 2631 const LogicVRegister& ind) { 2632 movi(vform, dst, 0); 2633 return tbx(vform, dst, tab, tab2, tab3, tab4, ind); 2634} 2635 2636 2637LogicVRegister Simulator::tbx(VectorFormat vform, 2638 LogicVRegister dst, 2639 const LogicVRegister& tab, 2640 const LogicVRegister& ind) { 2641 dst.ClearForWrite(vform); 2642 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2643 uint64_t j = ind.Uint(vform, i); 2644 switch (j >> 4) { 2645 case 0: 2646 dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); 2647 break; 2648 } 2649 } 2650 return dst; 2651} 2652 2653 2654LogicVRegister Simulator::tbx(VectorFormat vform, 2655 LogicVRegister dst, 2656 const LogicVRegister& tab, 2657 const LogicVRegister& tab2, 2658 const LogicVRegister& ind) { 2659 dst.ClearForWrite(vform); 2660 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2661 uint64_t j = ind.Uint(vform, i); 2662 switch (j >> 4) { 2663 case 0: 2664 dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); 2665 break; 2666 case 1: 2667 dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); 2668 break; 2669 } 2670 } 2671 return dst; 2672} 2673 2674 2675LogicVRegister Simulator::tbx(VectorFormat vform, 2676 LogicVRegister dst, 2677 const LogicVRegister& tab, 2678 const LogicVRegister& tab2, 2679 const LogicVRegister& tab3, 2680 const LogicVRegister& ind) { 2681 dst.ClearForWrite(vform); 2682 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2683 uint64_t j = ind.Uint(vform, i); 2684 switch (j >> 4) { 2685 case 0: 2686 dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); 2687 break; 2688 case 1: 2689 dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); 2690 break; 2691 case 2: 2692 dst.SetUint(vform, i, tab3.Uint(kFormat16B, j & 15)); 2693 break; 2694 } 2695 } 2696 return dst; 2697} 2698 2699 2700LogicVRegister Simulator::tbx(VectorFormat vform, 2701 LogicVRegister dst, 2702 const LogicVRegister& tab, 2703 const LogicVRegister& tab2, 2704 const LogicVRegister& tab3, 2705 const LogicVRegister& tab4, 2706 const LogicVRegister& ind) { 2707 dst.ClearForWrite(vform); 2708 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2709 uint64_t j = ind.Uint(vform, i); 2710 switch (j >> 4) { 2711 case 0: 2712 dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); 2713 break; 2714 case 1: 2715 dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); 2716 break; 2717 case 2: 2718 dst.SetUint(vform, i, tab3.Uint(kFormat16B, j & 15)); 2719 break; 2720 case 3: 2721 dst.SetUint(vform, i, tab4.Uint(kFormat16B, j & 15)); 2722 break; 2723 } 2724 } 2725 return dst; 2726} 2727 2728 2729LogicVRegister Simulator::uqshrn(VectorFormat vform, 2730 LogicVRegister dst, 2731 const LogicVRegister& src, 2732 int shift) { 2733 return shrn(vform, dst, src, shift).UnsignedSaturate(vform); 2734} 2735 2736 2737LogicVRegister Simulator::uqshrn2(VectorFormat vform, 2738 LogicVRegister dst, 2739 const LogicVRegister& src, 2740 int shift) { 2741 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform); 2742} 2743 2744 2745LogicVRegister Simulator::uqrshrn(VectorFormat vform, 2746 LogicVRegister dst, 2747 const LogicVRegister& src, 2748 int shift) { 2749 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform); 2750} 2751 2752 2753LogicVRegister Simulator::uqrshrn2(VectorFormat vform, 2754 LogicVRegister dst, 2755 const LogicVRegister& src, 2756 int shift) { 2757 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform); 2758} 2759 2760 2761LogicVRegister Simulator::sqshrn(VectorFormat vform, 2762 LogicVRegister dst, 2763 const LogicVRegister& src, 2764 int shift) { 2765 SimVRegister temp; 2766 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2767 VectorFormat vformdst = vform; 2768 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2769 return sqxtn(vformdst, dst, shifted_src); 2770} 2771 2772 2773LogicVRegister Simulator::sqshrn2(VectorFormat vform, 2774 LogicVRegister dst, 2775 const LogicVRegister& src, 2776 int shift) { 2777 SimVRegister temp; 2778 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2779 VectorFormat vformdst = vform; 2780 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2781 return sqxtn(vformdst, dst, shifted_src); 2782} 2783 2784 2785LogicVRegister Simulator::sqrshrn(VectorFormat vform, 2786 LogicVRegister dst, 2787 const LogicVRegister& src, 2788 int shift) { 2789 SimVRegister temp; 2790 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2791 VectorFormat vformdst = vform; 2792 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2793 return sqxtn(vformdst, dst, shifted_src); 2794} 2795 2796 2797LogicVRegister Simulator::sqrshrn2(VectorFormat vform, 2798 LogicVRegister dst, 2799 const LogicVRegister& src, 2800 int shift) { 2801 SimVRegister temp; 2802 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2803 VectorFormat vformdst = vform; 2804 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2805 return sqxtn(vformdst, dst, shifted_src); 2806} 2807 2808 2809LogicVRegister Simulator::sqshrun(VectorFormat vform, 2810 LogicVRegister dst, 2811 const LogicVRegister& src, 2812 int shift) { 2813 SimVRegister temp; 2814 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2815 VectorFormat vformdst = vform; 2816 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2817 return sqxtun(vformdst, dst, shifted_src); 2818} 2819 2820 2821LogicVRegister Simulator::sqshrun2(VectorFormat vform, 2822 LogicVRegister dst, 2823 const LogicVRegister& src, 2824 int shift) { 2825 SimVRegister temp; 2826 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2827 VectorFormat vformdst = vform; 2828 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2829 return sqxtun(vformdst, dst, shifted_src); 2830} 2831 2832 2833LogicVRegister Simulator::sqrshrun(VectorFormat vform, 2834 LogicVRegister dst, 2835 const LogicVRegister& src, 2836 int shift) { 2837 SimVRegister temp; 2838 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2839 VectorFormat vformdst = vform; 2840 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2841 return sqxtun(vformdst, dst, shifted_src); 2842} 2843 2844 2845LogicVRegister Simulator::sqrshrun2(VectorFormat vform, 2846 LogicVRegister dst, 2847 const LogicVRegister& src, 2848 int shift) { 2849 SimVRegister temp; 2850 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2851 VectorFormat vformdst = vform; 2852 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2853 return sqxtun(vformdst, dst, shifted_src); 2854} 2855 2856 2857LogicVRegister Simulator::uaddl(VectorFormat vform, 2858 LogicVRegister dst, 2859 const LogicVRegister& src1, 2860 const LogicVRegister& src2) { 2861 SimVRegister temp1, temp2; 2862 uxtl(vform, temp1, src1); 2863 uxtl(vform, temp2, src2); 2864 add(vform, dst, temp1, temp2); 2865 return dst; 2866} 2867 2868 2869LogicVRegister Simulator::uaddl2(VectorFormat vform, 2870 LogicVRegister dst, 2871 const LogicVRegister& src1, 2872 const LogicVRegister& src2) { 2873 SimVRegister temp1, temp2; 2874 uxtl2(vform, temp1, src1); 2875 uxtl2(vform, temp2, src2); 2876 add(vform, dst, temp1, temp2); 2877 return dst; 2878} 2879 2880 2881LogicVRegister Simulator::uaddw(VectorFormat vform, 2882 LogicVRegister dst, 2883 const LogicVRegister& src1, 2884 const LogicVRegister& src2) { 2885 SimVRegister temp; 2886 uxtl(vform, temp, src2); 2887 add(vform, dst, src1, temp); 2888 return dst; 2889} 2890 2891 2892LogicVRegister Simulator::uaddw2(VectorFormat vform, 2893 LogicVRegister dst, 2894 const LogicVRegister& src1, 2895 const LogicVRegister& src2) { 2896 SimVRegister temp; 2897 uxtl2(vform, temp, src2); 2898 add(vform, dst, src1, temp); 2899 return dst; 2900} 2901 2902 2903LogicVRegister Simulator::saddl(VectorFormat vform, 2904 LogicVRegister dst, 2905 const LogicVRegister& src1, 2906 const LogicVRegister& src2) { 2907 SimVRegister temp1, temp2; 2908 sxtl(vform, temp1, src1); 2909 sxtl(vform, temp2, src2); 2910 add(vform, dst, temp1, temp2); 2911 return dst; 2912} 2913 2914 2915LogicVRegister Simulator::saddl2(VectorFormat vform, 2916 LogicVRegister dst, 2917 const LogicVRegister& src1, 2918 const LogicVRegister& src2) { 2919 SimVRegister temp1, temp2; 2920 sxtl2(vform, temp1, src1); 2921 sxtl2(vform, temp2, src2); 2922 add(vform, dst, temp1, temp2); 2923 return dst; 2924} 2925 2926 2927LogicVRegister Simulator::saddw(VectorFormat vform, 2928 LogicVRegister dst, 2929 const LogicVRegister& src1, 2930 const LogicVRegister& src2) { 2931 SimVRegister temp; 2932 sxtl(vform, temp, src2); 2933 add(vform, dst, src1, temp); 2934 return dst; 2935} 2936 2937 2938LogicVRegister Simulator::saddw2(VectorFormat vform, 2939 LogicVRegister dst, 2940 const LogicVRegister& src1, 2941 const LogicVRegister& src2) { 2942 SimVRegister temp; 2943 sxtl2(vform, temp, src2); 2944 add(vform, dst, src1, temp); 2945 return dst; 2946} 2947 2948 2949LogicVRegister Simulator::usubl(VectorFormat vform, 2950 LogicVRegister dst, 2951 const LogicVRegister& src1, 2952 const LogicVRegister& src2) { 2953 SimVRegister temp1, temp2; 2954 uxtl(vform, temp1, src1); 2955 uxtl(vform, temp2, src2); 2956 sub(vform, dst, temp1, temp2); 2957 return dst; 2958} 2959 2960 2961LogicVRegister Simulator::usubl2(VectorFormat vform, 2962 LogicVRegister dst, 2963 const LogicVRegister& src1, 2964 const LogicVRegister& src2) { 2965 SimVRegister temp1, temp2; 2966 uxtl2(vform, temp1, src1); 2967 uxtl2(vform, temp2, src2); 2968 sub(vform, dst, temp1, temp2); 2969 return dst; 2970} 2971 2972 2973LogicVRegister Simulator::usubw(VectorFormat vform, 2974 LogicVRegister dst, 2975 const LogicVRegister& src1, 2976 const LogicVRegister& src2) { 2977 SimVRegister temp; 2978 uxtl(vform, temp, src2); 2979 sub(vform, dst, src1, temp); 2980 return dst; 2981} 2982 2983 2984LogicVRegister Simulator::usubw2(VectorFormat vform, 2985 LogicVRegister dst, 2986 const LogicVRegister& src1, 2987 const LogicVRegister& src2) { 2988 SimVRegister temp; 2989 uxtl2(vform, temp, src2); 2990 sub(vform, dst, src1, temp); 2991 return dst; 2992} 2993 2994 2995LogicVRegister Simulator::ssubl(VectorFormat vform, 2996 LogicVRegister dst, 2997 const LogicVRegister& src1, 2998 const LogicVRegister& src2) { 2999 SimVRegister temp1, temp2; 3000 sxtl(vform, temp1, src1); 3001 sxtl(vform, temp2, src2); 3002 sub(vform, dst, temp1, temp2); 3003 return dst; 3004} 3005 3006 3007LogicVRegister Simulator::ssubl2(VectorFormat vform, 3008 LogicVRegister dst, 3009 const LogicVRegister& src1, 3010 const LogicVRegister& src2) { 3011 SimVRegister temp1, temp2; 3012 sxtl2(vform, temp1, src1); 3013 sxtl2(vform, temp2, src2); 3014 sub(vform, dst, temp1, temp2); 3015 return dst; 3016} 3017 3018 3019LogicVRegister Simulator::ssubw(VectorFormat vform, 3020 LogicVRegister dst, 3021 const LogicVRegister& src1, 3022 const LogicVRegister& src2) { 3023 SimVRegister temp; 3024 sxtl(vform, temp, src2); 3025 sub(vform, dst, src1, temp); 3026 return dst; 3027} 3028 3029 3030LogicVRegister Simulator::ssubw2(VectorFormat vform, 3031 LogicVRegister dst, 3032 const LogicVRegister& src1, 3033 const LogicVRegister& src2) { 3034 SimVRegister temp; 3035 sxtl2(vform, temp, src2); 3036 sub(vform, dst, src1, temp); 3037 return dst; 3038} 3039 3040 3041LogicVRegister Simulator::uabal(VectorFormat vform, 3042 LogicVRegister dst, 3043 const LogicVRegister& src1, 3044 const LogicVRegister& src2) { 3045 SimVRegister temp1, temp2; 3046 uxtl(vform, temp1, src1); 3047 uxtl(vform, temp2, src2); 3048 uaba(vform, dst, temp1, temp2); 3049 return dst; 3050} 3051 3052 3053LogicVRegister Simulator::uabal2(VectorFormat vform, 3054 LogicVRegister dst, 3055 const LogicVRegister& src1, 3056 const LogicVRegister& src2) { 3057 SimVRegister temp1, temp2; 3058 uxtl2(vform, temp1, src1); 3059 uxtl2(vform, temp2, src2); 3060 uaba(vform, dst, temp1, temp2); 3061 return dst; 3062} 3063 3064 3065LogicVRegister Simulator::sabal(VectorFormat vform, 3066 LogicVRegister dst, 3067 const LogicVRegister& src1, 3068 const LogicVRegister& src2) { 3069 SimVRegister temp1, temp2; 3070 sxtl(vform, temp1, src1); 3071 sxtl(vform, temp2, src2); 3072 saba(vform, dst, temp1, temp2); 3073 return dst; 3074} 3075 3076 3077LogicVRegister Simulator::sabal2(VectorFormat vform, 3078 LogicVRegister dst, 3079 const LogicVRegister& src1, 3080 const LogicVRegister& src2) { 3081 SimVRegister temp1, temp2; 3082 sxtl2(vform, temp1, src1); 3083 sxtl2(vform, temp2, src2); 3084 saba(vform, dst, temp1, temp2); 3085 return dst; 3086} 3087 3088 3089LogicVRegister Simulator::uabdl(VectorFormat vform, 3090 LogicVRegister dst, 3091 const LogicVRegister& src1, 3092 const LogicVRegister& src2) { 3093 SimVRegister temp1, temp2; 3094 uxtl(vform, temp1, src1); 3095 uxtl(vform, temp2, src2); 3096 absdiff(vform, dst, temp1, temp2, false); 3097 return dst; 3098} 3099 3100 3101LogicVRegister Simulator::uabdl2(VectorFormat vform, 3102 LogicVRegister dst, 3103 const LogicVRegister& src1, 3104 const LogicVRegister& src2) { 3105 SimVRegister temp1, temp2; 3106 uxtl2(vform, temp1, src1); 3107 uxtl2(vform, temp2, src2); 3108 absdiff(vform, dst, temp1, temp2, false); 3109 return dst; 3110} 3111 3112 3113LogicVRegister Simulator::sabdl(VectorFormat vform, 3114 LogicVRegister dst, 3115 const LogicVRegister& src1, 3116 const LogicVRegister& src2) { 3117 SimVRegister temp1, temp2; 3118 sxtl(vform, temp1, src1); 3119 sxtl(vform, temp2, src2); 3120 absdiff(vform, dst, temp1, temp2, true); 3121 return dst; 3122} 3123 3124 3125LogicVRegister Simulator::sabdl2(VectorFormat vform, 3126 LogicVRegister dst, 3127 const LogicVRegister& src1, 3128 const LogicVRegister& src2) { 3129 SimVRegister temp1, temp2; 3130 sxtl2(vform, temp1, src1); 3131 sxtl2(vform, temp2, src2); 3132 absdiff(vform, dst, temp1, temp2, true); 3133 return dst; 3134} 3135 3136 3137LogicVRegister Simulator::umull(VectorFormat vform, 3138 LogicVRegister dst, 3139 const LogicVRegister& src1, 3140 const LogicVRegister& src2) { 3141 SimVRegister temp1, temp2; 3142 uxtl(vform, temp1, src1); 3143 uxtl(vform, temp2, src2); 3144 mul(vform, dst, temp1, temp2); 3145 return dst; 3146} 3147 3148 3149LogicVRegister Simulator::umull2(VectorFormat vform, 3150 LogicVRegister dst, 3151 const LogicVRegister& src1, 3152 const LogicVRegister& src2) { 3153 SimVRegister temp1, temp2; 3154 uxtl2(vform, temp1, src1); 3155 uxtl2(vform, temp2, src2); 3156 mul(vform, dst, temp1, temp2); 3157 return dst; 3158} 3159 3160 3161LogicVRegister Simulator::smull(VectorFormat vform, 3162 LogicVRegister dst, 3163 const LogicVRegister& src1, 3164 const LogicVRegister& src2) { 3165 SimVRegister temp1, temp2; 3166 sxtl(vform, temp1, src1); 3167 sxtl(vform, temp2, src2); 3168 mul(vform, dst, temp1, temp2); 3169 return dst; 3170} 3171 3172 3173LogicVRegister Simulator::smull2(VectorFormat vform, 3174 LogicVRegister dst, 3175 const LogicVRegister& src1, 3176 const LogicVRegister& src2) { 3177 SimVRegister temp1, temp2; 3178 sxtl2(vform, temp1, src1); 3179 sxtl2(vform, temp2, src2); 3180 mul(vform, dst, temp1, temp2); 3181 return dst; 3182} 3183 3184 3185LogicVRegister Simulator::umlsl(VectorFormat vform, 3186 LogicVRegister dst, 3187 const LogicVRegister& src1, 3188 const LogicVRegister& src2) { 3189 SimVRegister temp1, temp2; 3190 uxtl(vform, temp1, src1); 3191 uxtl(vform, temp2, src2); 3192 mls(vform, dst, temp1, temp2); 3193 return dst; 3194} 3195 3196 3197LogicVRegister Simulator::umlsl2(VectorFormat vform, 3198 LogicVRegister dst, 3199 const LogicVRegister& src1, 3200 const LogicVRegister& src2) { 3201 SimVRegister temp1, temp2; 3202 uxtl2(vform, temp1, src1); 3203 uxtl2(vform, temp2, src2); 3204 mls(vform, dst, temp1, temp2); 3205 return dst; 3206} 3207 3208 3209LogicVRegister Simulator::smlsl(VectorFormat vform, 3210 LogicVRegister dst, 3211 const LogicVRegister& src1, 3212 const LogicVRegister& src2) { 3213 SimVRegister temp1, temp2; 3214 sxtl(vform, temp1, src1); 3215 sxtl(vform, temp2, src2); 3216 mls(vform, dst, temp1, temp2); 3217 return dst; 3218} 3219 3220 3221LogicVRegister Simulator::smlsl2(VectorFormat vform, 3222 LogicVRegister dst, 3223 const LogicVRegister& src1, 3224 const LogicVRegister& src2) { 3225 SimVRegister temp1, temp2; 3226 sxtl2(vform, temp1, src1); 3227 sxtl2(vform, temp2, src2); 3228 mls(vform, dst, temp1, temp2); 3229 return dst; 3230} 3231 3232 3233LogicVRegister Simulator::umlal(VectorFormat vform, 3234 LogicVRegister dst, 3235 const LogicVRegister& src1, 3236 const LogicVRegister& src2) { 3237 SimVRegister temp1, temp2; 3238 uxtl(vform, temp1, src1); 3239 uxtl(vform, temp2, src2); 3240 mla(vform, dst, temp1, temp2); 3241 return dst; 3242} 3243 3244 3245LogicVRegister Simulator::umlal2(VectorFormat vform, 3246 LogicVRegister dst, 3247 const LogicVRegister& src1, 3248 const LogicVRegister& src2) { 3249 SimVRegister temp1, temp2; 3250 uxtl2(vform, temp1, src1); 3251 uxtl2(vform, temp2, src2); 3252 mla(vform, dst, temp1, temp2); 3253 return dst; 3254} 3255 3256 3257LogicVRegister Simulator::smlal(VectorFormat vform, 3258 LogicVRegister dst, 3259 const LogicVRegister& src1, 3260 const LogicVRegister& src2) { 3261 SimVRegister temp1, temp2; 3262 sxtl(vform, temp1, src1); 3263 sxtl(vform, temp2, src2); 3264 mla(vform, dst, temp1, temp2); 3265 return dst; 3266} 3267 3268 3269LogicVRegister Simulator::smlal2(VectorFormat vform, 3270 LogicVRegister dst, 3271 const LogicVRegister& src1, 3272 const LogicVRegister& src2) { 3273 SimVRegister temp1, temp2; 3274 sxtl2(vform, temp1, src1); 3275 sxtl2(vform, temp2, src2); 3276 mla(vform, dst, temp1, temp2); 3277 return dst; 3278} 3279 3280 3281LogicVRegister Simulator::sqdmlal(VectorFormat vform, 3282 LogicVRegister dst, 3283 const LogicVRegister& src1, 3284 const LogicVRegister& src2) { 3285 SimVRegister temp; 3286 LogicVRegister product = sqdmull(vform, temp, src1, src2); 3287 return add(vform, dst, dst, product).SignedSaturate(vform); 3288} 3289 3290 3291LogicVRegister Simulator::sqdmlal2(VectorFormat vform, 3292 LogicVRegister dst, 3293 const LogicVRegister& src1, 3294 const LogicVRegister& src2) { 3295 SimVRegister temp; 3296 LogicVRegister product = sqdmull2(vform, temp, src1, src2); 3297 return add(vform, dst, dst, product).SignedSaturate(vform); 3298} 3299 3300 3301LogicVRegister Simulator::sqdmlsl(VectorFormat vform, 3302 LogicVRegister dst, 3303 const LogicVRegister& src1, 3304 const LogicVRegister& src2) { 3305 SimVRegister temp; 3306 LogicVRegister product = sqdmull(vform, temp, src1, src2); 3307 return sub(vform, dst, dst, product).SignedSaturate(vform); 3308} 3309 3310 3311LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, 3312 LogicVRegister dst, 3313 const LogicVRegister& src1, 3314 const LogicVRegister& src2) { 3315 SimVRegister temp; 3316 LogicVRegister product = sqdmull2(vform, temp, src1, src2); 3317 return sub(vform, dst, dst, product).SignedSaturate(vform); 3318} 3319 3320 3321LogicVRegister Simulator::sqdmull(VectorFormat vform, 3322 LogicVRegister dst, 3323 const LogicVRegister& src1, 3324 const LogicVRegister& src2) { 3325 SimVRegister temp; 3326 LogicVRegister product = smull(vform, temp, src1, src2); 3327 return add(vform, dst, product, product).SignedSaturate(vform); 3328} 3329 3330 3331LogicVRegister Simulator::sqdmull2(VectorFormat vform, 3332 LogicVRegister dst, 3333 const LogicVRegister& src1, 3334 const LogicVRegister& src2) { 3335 SimVRegister temp; 3336 LogicVRegister product = smull2(vform, temp, src1, src2); 3337 return add(vform, dst, product, product).SignedSaturate(vform); 3338} 3339 3340 3341LogicVRegister Simulator::sqrdmulh(VectorFormat vform, 3342 LogicVRegister dst, 3343 const LogicVRegister& src1, 3344 const LogicVRegister& src2, 3345 bool round) { 3346 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow. 3347 // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1) 3348 // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize. 3349 3350 int esize = LaneSizeInBitsFromFormat(vform); 3351 int round_const = round ? (1 << (esize - 2)) : 0; 3352 int64_t product; 3353 3354 dst.ClearForWrite(vform); 3355 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3356 product = src1.Int(vform, i) * src2.Int(vform, i); 3357 product += round_const; 3358 product = product >> (esize - 1); 3359 3360 if (product > MaxIntFromFormat(vform)) { 3361 product = MaxIntFromFormat(vform); 3362 } else if (product < MinIntFromFormat(vform)) { 3363 product = MinIntFromFormat(vform); 3364 } 3365 dst.SetInt(vform, i, product); 3366 } 3367 return dst; 3368} 3369 3370 3371LogicVRegister Simulator::sqdmulh(VectorFormat vform, 3372 LogicVRegister dst, 3373 const LogicVRegister& src1, 3374 const LogicVRegister& src2) { 3375 return sqrdmulh(vform, dst, src1, src2, false); 3376} 3377 3378 3379LogicVRegister Simulator::addhn(VectorFormat vform, 3380 LogicVRegister dst, 3381 const LogicVRegister& src1, 3382 const LogicVRegister& src2) { 3383 SimVRegister temp; 3384 add(VectorFormatDoubleWidth(vform), temp, src1, src2); 3385 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3386 return dst; 3387} 3388 3389 3390LogicVRegister Simulator::addhn2(VectorFormat vform, 3391 LogicVRegister dst, 3392 const LogicVRegister& src1, 3393 const LogicVRegister& src2) { 3394 SimVRegister temp; 3395 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 3396 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3397 return dst; 3398} 3399 3400 3401LogicVRegister Simulator::raddhn(VectorFormat vform, 3402 LogicVRegister dst, 3403 const LogicVRegister& src1, 3404 const LogicVRegister& src2) { 3405 SimVRegister temp; 3406 add(VectorFormatDoubleWidth(vform), temp, src1, src2); 3407 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3408 return dst; 3409} 3410 3411 3412LogicVRegister Simulator::raddhn2(VectorFormat vform, 3413 LogicVRegister dst, 3414 const LogicVRegister& src1, 3415 const LogicVRegister& src2) { 3416 SimVRegister temp; 3417 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 3418 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3419 return dst; 3420} 3421 3422 3423LogicVRegister Simulator::subhn(VectorFormat vform, 3424 LogicVRegister dst, 3425 const LogicVRegister& src1, 3426 const LogicVRegister& src2) { 3427 SimVRegister temp; 3428 sub(VectorFormatDoubleWidth(vform), temp, src1, src2); 3429 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3430 return dst; 3431} 3432 3433 3434LogicVRegister Simulator::subhn2(VectorFormat vform, 3435 LogicVRegister dst, 3436 const LogicVRegister& src1, 3437 const LogicVRegister& src2) { 3438 SimVRegister temp; 3439 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 3440 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3441 return dst; 3442} 3443 3444 3445LogicVRegister Simulator::rsubhn(VectorFormat vform, 3446 LogicVRegister dst, 3447 const LogicVRegister& src1, 3448 const LogicVRegister& src2) { 3449 SimVRegister temp; 3450 sub(VectorFormatDoubleWidth(vform), temp, src1, src2); 3451 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3452 return dst; 3453} 3454 3455 3456LogicVRegister Simulator::rsubhn2(VectorFormat vform, 3457 LogicVRegister dst, 3458 const LogicVRegister& src1, 3459 const LogicVRegister& src2) { 3460 SimVRegister temp; 3461 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 3462 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3463 return dst; 3464} 3465 3466 3467LogicVRegister Simulator::trn1(VectorFormat vform, 3468 LogicVRegister dst, 3469 const LogicVRegister& src1, 3470 const LogicVRegister& src2) { 3471 uint64_t result[16]; 3472 int laneCount = LaneCountFromFormat(vform); 3473 int pairs = laneCount / 2; 3474 for (int i = 0; i < pairs; ++i) { 3475 result[2 * i] = src1.Uint(vform, 2 * i); 3476 result[(2 * i) + 1] = src2.Uint(vform, 2 * i); 3477 } 3478 3479 dst.ClearForWrite(vform); 3480 for (int i = 0; i < laneCount; ++i) { 3481 dst.SetUint(vform, i, result[i]); 3482 } 3483 return dst; 3484} 3485 3486 3487LogicVRegister Simulator::trn2(VectorFormat vform, 3488 LogicVRegister dst, 3489 const LogicVRegister& src1, 3490 const LogicVRegister& src2) { 3491 uint64_t result[16]; 3492 int laneCount = LaneCountFromFormat(vform); 3493 int pairs = laneCount / 2; 3494 for (int i = 0; i < pairs; ++i) { 3495 result[2 * i] = src1.Uint(vform, (2 * i) + 1); 3496 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1); 3497 } 3498 3499 dst.ClearForWrite(vform); 3500 for (int i = 0; i < laneCount; ++i) { 3501 dst.SetUint(vform, i, result[i]); 3502 } 3503 return dst; 3504} 3505 3506 3507LogicVRegister Simulator::zip1(VectorFormat vform, 3508 LogicVRegister dst, 3509 const LogicVRegister& src1, 3510 const LogicVRegister& src2) { 3511 uint64_t result[16]; 3512 int laneCount = LaneCountFromFormat(vform); 3513 int pairs = laneCount / 2; 3514 for (int i = 0; i < pairs; ++i) { 3515 result[2 * i] = src1.Uint(vform, i); 3516 result[(2 * i) + 1] = src2.Uint(vform, i); 3517 } 3518 3519 dst.ClearForWrite(vform); 3520 for (int i = 0; i < laneCount; ++i) { 3521 dst.SetUint(vform, i, result[i]); 3522 } 3523 return dst; 3524} 3525 3526 3527LogicVRegister Simulator::zip2(VectorFormat vform, 3528 LogicVRegister dst, 3529 const LogicVRegister& src1, 3530 const LogicVRegister& src2) { 3531 uint64_t result[16]; 3532 int laneCount = LaneCountFromFormat(vform); 3533 int pairs = laneCount / 2; 3534 for (int i = 0; i < pairs; ++i) { 3535 result[2 * i] = src1.Uint(vform, pairs + i); 3536 result[(2 * i) + 1] = src2.Uint(vform, pairs + i); 3537 } 3538 3539 dst.ClearForWrite(vform); 3540 for (int i = 0; i < laneCount; ++i) { 3541 dst.SetUint(vform, i, result[i]); 3542 } 3543 return dst; 3544} 3545 3546 3547LogicVRegister Simulator::uzp1(VectorFormat vform, 3548 LogicVRegister dst, 3549 const LogicVRegister& src1, 3550 const LogicVRegister& src2) { 3551 uint64_t result[32]; 3552 int laneCount = LaneCountFromFormat(vform); 3553 for (int i = 0; i < laneCount; ++i) { 3554 result[i] = src1.Uint(vform, i); 3555 result[laneCount + i] = src2.Uint(vform, i); 3556 } 3557 3558 dst.ClearForWrite(vform); 3559 for (int i = 0; i < laneCount; ++i) { 3560 dst.SetUint(vform, i, result[2 * i]); 3561 } 3562 return dst; 3563} 3564 3565 3566LogicVRegister Simulator::uzp2(VectorFormat vform, 3567 LogicVRegister dst, 3568 const LogicVRegister& src1, 3569 const LogicVRegister& src2) { 3570 uint64_t result[32]; 3571 int laneCount = LaneCountFromFormat(vform); 3572 for (int i = 0; i < laneCount; ++i) { 3573 result[i] = src1.Uint(vform, i); 3574 result[laneCount + i] = src2.Uint(vform, i); 3575 } 3576 3577 dst.ClearForWrite(vform); 3578 for (int i = 0; i < laneCount; ++i) { 3579 dst.SetUint(vform, i, result[(2 * i) + 1]); 3580 } 3581 return dst; 3582} 3583 3584 3585template <typename T> 3586T Simulator::FPAdd(T op1, T op2) { 3587 T result = FPProcessNaNs(op1, op2); 3588 if (std::isnan(result)) return result; 3589 3590 if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) { 3591 // inf + -inf returns the default NaN. 3592 FPProcessException(); 3593 return FPDefaultNaN<T>(); 3594 } else { 3595 // Other cases should be handled by standard arithmetic. 3596 return op1 + op2; 3597 } 3598} 3599 3600 3601template <typename T> 3602T Simulator::FPSub(T op1, T op2) { 3603 // NaNs should be handled elsewhere. 3604 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2)); 3605 3606 if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) { 3607 // inf - inf returns the default NaN. 3608 FPProcessException(); 3609 return FPDefaultNaN<T>(); 3610 } else { 3611 // Other cases should be handled by standard arithmetic. 3612 return op1 - op2; 3613 } 3614} 3615 3616 3617template <typename T> 3618T Simulator::FPMul(T op1, T op2) { 3619 // NaNs should be handled elsewhere. 3620 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2)); 3621 3622 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) { 3623 // inf * 0.0 returns the default NaN. 3624 FPProcessException(); 3625 return FPDefaultNaN<T>(); 3626 } else { 3627 // Other cases should be handled by standard arithmetic. 3628 return op1 * op2; 3629 } 3630} 3631 3632 3633template <typename T> 3634T Simulator::FPMulx(T op1, T op2) { 3635 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) { 3636 // inf * 0.0 returns +/-2.0. 3637 T two = 2.0; 3638 return copysign(1.0, op1) * copysign(1.0, op2) * two; 3639 } 3640 return FPMul(op1, op2); 3641} 3642 3643 3644template <typename T> 3645T Simulator::FPMulAdd(T a, T op1, T op2) { 3646 T result = FPProcessNaNs3(a, op1, op2); 3647 3648 T sign_a = copysign(1.0, a); 3649 T sign_prod = copysign(1.0, op1) * copysign(1.0, op2); 3650 bool isinf_prod = std::isinf(op1) || std::isinf(op2); 3651 bool operation_generates_nan = 3652 (std::isinf(op1) && (op2 == 0.0)) || // inf * 0.0 3653 (std::isinf(op2) && (op1 == 0.0)) || // 0.0 * inf 3654 (std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf 3655 3656 if (std::isnan(result)) { 3657 // Generated NaNs override quiet NaNs propagated from a. 3658 if (operation_generates_nan && IsQuietNaN(a)) { 3659 FPProcessException(); 3660 return FPDefaultNaN<T>(); 3661 } else { 3662 return result; 3663 } 3664 } 3665 3666 // If the operation would produce a NaN, return the default NaN. 3667 if (operation_generates_nan) { 3668 FPProcessException(); 3669 return FPDefaultNaN<T>(); 3670 } 3671 3672 // Work around broken fma implementations for exact zero results: The sign of 3673 // exact 0.0 results is positive unless both a and op1 * op2 are negative. 3674 if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) { 3675 return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0; 3676 } 3677 3678 result = FusedMultiplyAdd(op1, op2, a); 3679 VIXL_ASSERT(!std::isnan(result)); 3680 3681 // Work around broken fma implementations for rounded zero results: If a is 3682 // 0.0, the sign of the result is the sign of op1 * op2 before rounding. 3683 if ((a == 0.0) && (result == 0.0)) { 3684 return copysign(0.0, sign_prod); 3685 } 3686 3687 return result; 3688} 3689 3690 3691template <typename T> 3692T Simulator::FPDiv(T op1, T op2) { 3693 // NaNs should be handled elsewhere. 3694 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2)); 3695 3696 if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) { 3697 // inf / inf and 0.0 / 0.0 return the default NaN. 3698 FPProcessException(); 3699 return FPDefaultNaN<T>(); 3700 } else { 3701 if (op2 == 0.0) FPProcessException(); 3702 3703 // Other cases should be handled by standard arithmetic. 3704 return op1 / op2; 3705 } 3706} 3707 3708 3709template <typename T> 3710T Simulator::FPSqrt(T op) { 3711 if (std::isnan(op)) { 3712 return FPProcessNaN(op); 3713 } else if (op < 0.0) { 3714 FPProcessException(); 3715 return FPDefaultNaN<T>(); 3716 } else { 3717 return sqrt(op); 3718 } 3719} 3720 3721 3722template <typename T> 3723T Simulator::FPMax(T a, T b) { 3724 T result = FPProcessNaNs(a, b); 3725 if (std::isnan(result)) return result; 3726 3727 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) { 3728 // a and b are zero, and the sign differs: return +0.0. 3729 return 0.0; 3730 } else { 3731 return (a > b) ? a : b; 3732 } 3733} 3734 3735 3736template <typename T> 3737T Simulator::FPMaxNM(T a, T b) { 3738 if (IsQuietNaN(a) && !IsQuietNaN(b)) { 3739 a = kFP64NegativeInfinity; 3740 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { 3741 b = kFP64NegativeInfinity; 3742 } 3743 3744 T result = FPProcessNaNs(a, b); 3745 return std::isnan(result) ? result : FPMax(a, b); 3746} 3747 3748 3749template <typename T> 3750T Simulator::FPMin(T a, T b) { 3751 T result = FPProcessNaNs(a, b); 3752 if (std::isnan(result)) return result; 3753 3754 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) { 3755 // a and b are zero, and the sign differs: return -0.0. 3756 return -0.0; 3757 } else { 3758 return (a < b) ? a : b; 3759 } 3760} 3761 3762 3763template <typename T> 3764T Simulator::FPMinNM(T a, T b) { 3765 if (IsQuietNaN(a) && !IsQuietNaN(b)) { 3766 a = kFP64PositiveInfinity; 3767 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { 3768 b = kFP64PositiveInfinity; 3769 } 3770 3771 T result = FPProcessNaNs(a, b); 3772 return std::isnan(result) ? result : FPMin(a, b); 3773} 3774 3775 3776template <typename T> 3777T Simulator::FPRecipStepFused(T op1, T op2) { 3778 const T two = 2.0; 3779 if ((std::isinf(op1) && (op2 == 0.0)) || 3780 ((op1 == 0.0) && (std::isinf(op2)))) { 3781 return two; 3782 } else if (std::isinf(op1) || std::isinf(op2)) { 3783 // Return +inf if signs match, otherwise -inf. 3784 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity 3785 : kFP64NegativeInfinity; 3786 } else { 3787 return FusedMultiplyAdd(op1, op2, two); 3788 } 3789} 3790 3791 3792template <typename T> 3793T Simulator::FPRSqrtStepFused(T op1, T op2) { 3794 const T one_point_five = 1.5; 3795 const T two = 2.0; 3796 3797 if ((std::isinf(op1) && (op2 == 0.0)) || 3798 ((op1 == 0.0) && (std::isinf(op2)))) { 3799 return one_point_five; 3800 } else if (std::isinf(op1) || std::isinf(op2)) { 3801 // Return +inf if signs match, otherwise -inf. 3802 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity 3803 : kFP64NegativeInfinity; 3804 } else { 3805 // The multiply-add-halve operation must be fully fused, so avoid interim 3806 // rounding by checking which operand can be losslessly divided by two 3807 // before doing the multiply-add. 3808 if (std::isnormal(op1 / two)) { 3809 return FusedMultiplyAdd(op1 / two, op2, one_point_five); 3810 } else if (std::isnormal(op2 / two)) { 3811 return FusedMultiplyAdd(op1, op2 / two, one_point_five); 3812 } else { 3813 // Neither operand is normal after halving: the result is dominated by 3814 // the addition term, so just return that. 3815 return one_point_five; 3816 } 3817 } 3818} 3819 3820 3821double Simulator::FPRoundInt(double value, FPRounding round_mode) { 3822 if ((value == 0.0) || (value == kFP64PositiveInfinity) || 3823 (value == kFP64NegativeInfinity)) { 3824 return value; 3825 } else if (std::isnan(value)) { 3826 return FPProcessNaN(value); 3827 } 3828 3829 double int_result = std::floor(value); 3830 double error = value - int_result; 3831 switch (round_mode) { 3832 case FPTieAway: { 3833 // Take care of correctly handling the range ]-0.5, -0.0], which must 3834 // yield -0.0. 3835 if ((-0.5 < value) && (value < 0.0)) { 3836 int_result = -0.0; 3837 3838 } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) { 3839 // If the error is greater than 0.5, or is equal to 0.5 and the integer 3840 // result is positive, round up. 3841 int_result++; 3842 } 3843 break; 3844 } 3845 case FPTieEven: { 3846 // Take care of correctly handling the range [-0.5, -0.0], which must 3847 // yield -0.0. 3848 if ((-0.5 <= value) && (value < 0.0)) { 3849 int_result = -0.0; 3850 3851 // If the error is greater than 0.5, or is equal to 0.5 and the integer 3852 // result is odd, round up. 3853 } else if ((error > 0.5) || 3854 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) { 3855 int_result++; 3856 } 3857 break; 3858 } 3859 case FPZero: { 3860 // If value>0 then we take floor(value) 3861 // otherwise, ceil(value). 3862 if (value < 0) { 3863 int_result = ceil(value); 3864 } 3865 break; 3866 } 3867 case FPNegativeInfinity: { 3868 // We always use floor(value). 3869 break; 3870 } 3871 case FPPositiveInfinity: { 3872 // Take care of correctly handling the range ]-1.0, -0.0], which must 3873 // yield -0.0. 3874 if ((-1.0 < value) && (value < 0.0)) { 3875 int_result = -0.0; 3876 3877 // If the error is non-zero, round up. 3878 } else if (error > 0.0) { 3879 int_result++; 3880 } 3881 break; 3882 } 3883 default: 3884 VIXL_UNIMPLEMENTED(); 3885 } 3886 return int_result; 3887} 3888 3889 3890int32_t Simulator::FPToInt32(double value, FPRounding rmode) { 3891 value = FPRoundInt(value, rmode); 3892 if (value >= kWMaxInt) { 3893 return kWMaxInt; 3894 } else if (value < kWMinInt) { 3895 return kWMinInt; 3896 } 3897 return std::isnan(value) ? 0 : static_cast<int32_t>(value); 3898} 3899 3900 3901int64_t Simulator::FPToInt64(double value, FPRounding rmode) { 3902 value = FPRoundInt(value, rmode); 3903 if (value >= kXMaxInt) { 3904 return kXMaxInt; 3905 } else if (value < kXMinInt) { 3906 return kXMinInt; 3907 } 3908 return std::isnan(value) ? 0 : static_cast<int64_t>(value); 3909} 3910 3911 3912uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) { 3913 value = FPRoundInt(value, rmode); 3914 if (value >= kWMaxUInt) { 3915 return kWMaxUInt; 3916 } else if (value < 0.0) { 3917 return 0; 3918 } 3919 return std::isnan(value) ? 0 : static_cast<uint32_t>(value); 3920} 3921 3922 3923uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) { 3924 value = FPRoundInt(value, rmode); 3925 if (value >= kXMaxUInt) { 3926 return kXMaxUInt; 3927 } else if (value < 0.0) { 3928 return 0; 3929 } 3930 return std::isnan(value) ? 0 : static_cast<uint64_t>(value); 3931} 3932 3933 3934#define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \ 3935 template <typename T> \ 3936 LogicVRegister Simulator::FN(VectorFormat vform, \ 3937 LogicVRegister dst, \ 3938 const LogicVRegister& src1, \ 3939 const LogicVRegister& src2) { \ 3940 dst.ClearForWrite(vform); \ 3941 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \ 3942 T op1 = src1.Float<T>(i); \ 3943 T op2 = src2.Float<T>(i); \ 3944 T result; \ 3945 if (PROCNAN) { \ 3946 result = FPProcessNaNs(op1, op2); \ 3947 if (!std::isnan(result)) { \ 3948 result = OP(op1, op2); \ 3949 } \ 3950 } else { \ 3951 result = OP(op1, op2); \ 3952 } \ 3953 dst.SetFloat(i, result); \ 3954 } \ 3955 return dst; \ 3956 } \ 3957 \ 3958 LogicVRegister Simulator::FN(VectorFormat vform, \ 3959 LogicVRegister dst, \ 3960 const LogicVRegister& src1, \ 3961 const LogicVRegister& src2) { \ 3962 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { \ 3963 FN<float>(vform, dst, src1, src2); \ 3964 } else { \ 3965 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \ 3966 FN<double>(vform, dst, src1, src2); \ 3967 } \ 3968 return dst; \ 3969 } 3970NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP) 3971#undef DEFINE_NEON_FP_VECTOR_OP 3972 3973 3974LogicVRegister Simulator::fnmul(VectorFormat vform, 3975 LogicVRegister dst, 3976 const LogicVRegister& src1, 3977 const LogicVRegister& src2) { 3978 SimVRegister temp; 3979 LogicVRegister product = fmul(vform, temp, src1, src2); 3980 return fneg(vform, dst, product); 3981} 3982 3983 3984template <typename T> 3985LogicVRegister Simulator::frecps(VectorFormat vform, 3986 LogicVRegister dst, 3987 const LogicVRegister& src1, 3988 const LogicVRegister& src2) { 3989 dst.ClearForWrite(vform); 3990 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3991 T op1 = -src1.Float<T>(i); 3992 T op2 = src2.Float<T>(i); 3993 T result = FPProcessNaNs(op1, op2); 3994 dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2)); 3995 } 3996 return dst; 3997} 3998 3999 4000LogicVRegister Simulator::frecps(VectorFormat vform, 4001 LogicVRegister dst, 4002 const LogicVRegister& src1, 4003 const LogicVRegister& src2) { 4004 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4005 frecps<float>(vform, dst, src1, src2); 4006 } else { 4007 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4008 frecps<double>(vform, dst, src1, src2); 4009 } 4010 return dst; 4011} 4012 4013 4014template <typename T> 4015LogicVRegister Simulator::frsqrts(VectorFormat vform, 4016 LogicVRegister dst, 4017 const LogicVRegister& src1, 4018 const LogicVRegister& src2) { 4019 dst.ClearForWrite(vform); 4020 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4021 T op1 = -src1.Float<T>(i); 4022 T op2 = src2.Float<T>(i); 4023 T result = FPProcessNaNs(op1, op2); 4024 dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2)); 4025 } 4026 return dst; 4027} 4028 4029 4030LogicVRegister Simulator::frsqrts(VectorFormat vform, 4031 LogicVRegister dst, 4032 const LogicVRegister& src1, 4033 const LogicVRegister& src2) { 4034 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4035 frsqrts<float>(vform, dst, src1, src2); 4036 } else { 4037 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4038 frsqrts<double>(vform, dst, src1, src2); 4039 } 4040 return dst; 4041} 4042 4043 4044template <typename T> 4045LogicVRegister Simulator::fcmp(VectorFormat vform, 4046 LogicVRegister dst, 4047 const LogicVRegister& src1, 4048 const LogicVRegister& src2, 4049 Condition cond) { 4050 dst.ClearForWrite(vform); 4051 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4052 bool result = false; 4053 T op1 = src1.Float<T>(i); 4054 T op2 = src2.Float<T>(i); 4055 T nan_result = FPProcessNaNs(op1, op2); 4056 if (!std::isnan(nan_result)) { 4057 switch (cond) { 4058 case eq: 4059 result = (op1 == op2); 4060 break; 4061 case ge: 4062 result = (op1 >= op2); 4063 break; 4064 case gt: 4065 result = (op1 > op2); 4066 break; 4067 case le: 4068 result = (op1 <= op2); 4069 break; 4070 case lt: 4071 result = (op1 < op2); 4072 break; 4073 default: 4074 VIXL_UNREACHABLE(); 4075 break; 4076 } 4077 } 4078 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); 4079 } 4080 return dst; 4081} 4082 4083 4084LogicVRegister Simulator::fcmp(VectorFormat vform, 4085 LogicVRegister dst, 4086 const LogicVRegister& src1, 4087 const LogicVRegister& src2, 4088 Condition cond) { 4089 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4090 fcmp<float>(vform, dst, src1, src2, cond); 4091 } else { 4092 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4093 fcmp<double>(vform, dst, src1, src2, cond); 4094 } 4095 return dst; 4096} 4097 4098 4099LogicVRegister Simulator::fcmp_zero(VectorFormat vform, 4100 LogicVRegister dst, 4101 const LogicVRegister& src, 4102 Condition cond) { 4103 SimVRegister temp; 4104 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4105 LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0)); 4106 fcmp<float>(vform, dst, src, zero_reg, cond); 4107 } else { 4108 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4109 LogicVRegister zero_reg = dup_immediate(vform, temp, DoubleToRawbits(0.0)); 4110 fcmp<double>(vform, dst, src, zero_reg, cond); 4111 } 4112 return dst; 4113} 4114 4115 4116LogicVRegister Simulator::fabscmp(VectorFormat vform, 4117 LogicVRegister dst, 4118 const LogicVRegister& src1, 4119 const LogicVRegister& src2, 4120 Condition cond) { 4121 SimVRegister temp1, temp2; 4122 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4123 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1); 4124 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2); 4125 fcmp<float>(vform, dst, abs_src1, abs_src2, cond); 4126 } else { 4127 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4128 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1); 4129 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2); 4130 fcmp<double>(vform, dst, abs_src1, abs_src2, cond); 4131 } 4132 return dst; 4133} 4134 4135 4136template <typename T> 4137LogicVRegister Simulator::fmla(VectorFormat vform, 4138 LogicVRegister dst, 4139 const LogicVRegister& src1, 4140 const LogicVRegister& src2) { 4141 dst.ClearForWrite(vform); 4142 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4143 T op1 = src1.Float<T>(i); 4144 T op2 = src2.Float<T>(i); 4145 T acc = dst.Float<T>(i); 4146 T result = FPMulAdd(acc, op1, op2); 4147 dst.SetFloat(i, result); 4148 } 4149 return dst; 4150} 4151 4152 4153LogicVRegister Simulator::fmla(VectorFormat vform, 4154 LogicVRegister dst, 4155 const LogicVRegister& src1, 4156 const LogicVRegister& src2) { 4157 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4158 fmla<float>(vform, dst, src1, src2); 4159 } else { 4160 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4161 fmla<double>(vform, dst, src1, src2); 4162 } 4163 return dst; 4164} 4165 4166 4167template <typename T> 4168LogicVRegister Simulator::fmls(VectorFormat vform, 4169 LogicVRegister dst, 4170 const LogicVRegister& src1, 4171 const LogicVRegister& src2) { 4172 dst.ClearForWrite(vform); 4173 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4174 T op1 = -src1.Float<T>(i); 4175 T op2 = src2.Float<T>(i); 4176 T acc = dst.Float<T>(i); 4177 T result = FPMulAdd(acc, op1, op2); 4178 dst.SetFloat(i, result); 4179 } 4180 return dst; 4181} 4182 4183 4184LogicVRegister Simulator::fmls(VectorFormat vform, 4185 LogicVRegister dst, 4186 const LogicVRegister& src1, 4187 const LogicVRegister& src2) { 4188 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4189 fmls<float>(vform, dst, src1, src2); 4190 } else { 4191 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4192 fmls<double>(vform, dst, src1, src2); 4193 } 4194 return dst; 4195} 4196 4197 4198template <typename T> 4199LogicVRegister Simulator::fneg(VectorFormat vform, 4200 LogicVRegister dst, 4201 const LogicVRegister& src) { 4202 dst.ClearForWrite(vform); 4203 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4204 T op = src.Float<T>(i); 4205 op = -op; 4206 dst.SetFloat(i, op); 4207 } 4208 return dst; 4209} 4210 4211 4212LogicVRegister Simulator::fneg(VectorFormat vform, 4213 LogicVRegister dst, 4214 const LogicVRegister& src) { 4215 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4216 fneg<float>(vform, dst, src); 4217 } else { 4218 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4219 fneg<double>(vform, dst, src); 4220 } 4221 return dst; 4222} 4223 4224 4225template <typename T> 4226LogicVRegister Simulator::fabs_(VectorFormat vform, 4227 LogicVRegister dst, 4228 const LogicVRegister& src) { 4229 dst.ClearForWrite(vform); 4230 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4231 T op = src.Float<T>(i); 4232 if (copysign(1.0, op) < 0.0) { 4233 op = -op; 4234 } 4235 dst.SetFloat(i, op); 4236 } 4237 return dst; 4238} 4239 4240 4241LogicVRegister Simulator::fabs_(VectorFormat vform, 4242 LogicVRegister dst, 4243 const LogicVRegister& src) { 4244 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4245 fabs_<float>(vform, dst, src); 4246 } else { 4247 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4248 fabs_<double>(vform, dst, src); 4249 } 4250 return dst; 4251} 4252 4253 4254LogicVRegister Simulator::fabd(VectorFormat vform, 4255 LogicVRegister dst, 4256 const LogicVRegister& src1, 4257 const LogicVRegister& src2) { 4258 SimVRegister temp; 4259 fsub(vform, temp, src1, src2); 4260 fabs_(vform, dst, temp); 4261 return dst; 4262} 4263 4264 4265LogicVRegister Simulator::fsqrt(VectorFormat vform, 4266 LogicVRegister dst, 4267 const LogicVRegister& src) { 4268 dst.ClearForWrite(vform); 4269 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4270 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4271 float result = FPSqrt(src.Float<float>(i)); 4272 dst.SetFloat(i, result); 4273 } 4274 } else { 4275 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4276 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4277 double result = FPSqrt(src.Float<double>(i)); 4278 dst.SetFloat(i, result); 4279 } 4280 } 4281 return dst; 4282} 4283 4284 4285#define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \ 4286 LogicVRegister Simulator::FNP(VectorFormat vform, \ 4287 LogicVRegister dst, \ 4288 const LogicVRegister& src1, \ 4289 const LogicVRegister& src2) { \ 4290 SimVRegister temp1, temp2; \ 4291 uzp1(vform, temp1, src1, src2); \ 4292 uzp2(vform, temp2, src1, src2); \ 4293 FN(vform, dst, temp1, temp2); \ 4294 return dst; \ 4295 } \ 4296 \ 4297 LogicVRegister Simulator::FNP(VectorFormat vform, \ 4298 LogicVRegister dst, \ 4299 const LogicVRegister& src) { \ 4300 if (vform == kFormatS) { \ 4301 float result = OP(src.Float<float>(0), src.Float<float>(1)); \ 4302 dst.SetFloat(0, result); \ 4303 } else { \ 4304 VIXL_ASSERT(vform == kFormatD); \ 4305 double result = OP(src.Float<double>(0), src.Float<double>(1)); \ 4306 dst.SetFloat(0, result); \ 4307 } \ 4308 dst.ClearForWrite(vform); \ 4309 return dst; \ 4310 } 4311NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP) 4312#undef DEFINE_NEON_FP_PAIR_OP 4313 4314 4315LogicVRegister Simulator::fminmaxv(VectorFormat vform, 4316 LogicVRegister dst, 4317 const LogicVRegister& src, 4318 FPMinMaxOp Op) { 4319 VIXL_ASSERT(vform == kFormat4S); 4320 USE(vform); 4321 float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1)); 4322 float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3)); 4323 float result = (this->*Op)(result1, result2); 4324 dst.ClearForWrite(kFormatS); 4325 dst.SetFloat<float>(0, result); 4326 return dst; 4327} 4328 4329 4330LogicVRegister Simulator::fmaxv(VectorFormat vform, 4331 LogicVRegister dst, 4332 const LogicVRegister& src) { 4333 return fminmaxv(vform, dst, src, &Simulator::FPMax); 4334} 4335 4336 4337LogicVRegister Simulator::fminv(VectorFormat vform, 4338 LogicVRegister dst, 4339 const LogicVRegister& src) { 4340 return fminmaxv(vform, dst, src, &Simulator::FPMin); 4341} 4342 4343 4344LogicVRegister Simulator::fmaxnmv(VectorFormat vform, 4345 LogicVRegister dst, 4346 const LogicVRegister& src) { 4347 return fminmaxv(vform, dst, src, &Simulator::FPMaxNM); 4348} 4349 4350 4351LogicVRegister Simulator::fminnmv(VectorFormat vform, 4352 LogicVRegister dst, 4353 const LogicVRegister& src) { 4354 return fminmaxv(vform, dst, src, &Simulator::FPMinNM); 4355} 4356 4357 4358LogicVRegister Simulator::fmul(VectorFormat vform, 4359 LogicVRegister dst, 4360 const LogicVRegister& src1, 4361 const LogicVRegister& src2, 4362 int index) { 4363 dst.ClearForWrite(vform); 4364 SimVRegister temp; 4365 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4366 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 4367 fmul<float>(vform, dst, src1, index_reg); 4368 4369 } else { 4370 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4371 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 4372 fmul<double>(vform, dst, src1, index_reg); 4373 } 4374 return dst; 4375} 4376 4377 4378LogicVRegister Simulator::fmla(VectorFormat vform, 4379 LogicVRegister dst, 4380 const LogicVRegister& src1, 4381 const LogicVRegister& src2, 4382 int index) { 4383 dst.ClearForWrite(vform); 4384 SimVRegister temp; 4385 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4386 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 4387 fmla<float>(vform, dst, src1, index_reg); 4388 4389 } else { 4390 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4391 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 4392 fmla<double>(vform, dst, src1, index_reg); 4393 } 4394 return dst; 4395} 4396 4397 4398LogicVRegister Simulator::fmls(VectorFormat vform, 4399 LogicVRegister dst, 4400 const LogicVRegister& src1, 4401 const LogicVRegister& src2, 4402 int index) { 4403 dst.ClearForWrite(vform); 4404 SimVRegister temp; 4405 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4406 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 4407 fmls<float>(vform, dst, src1, index_reg); 4408 4409 } else { 4410 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4411 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 4412 fmls<double>(vform, dst, src1, index_reg); 4413 } 4414 return dst; 4415} 4416 4417 4418LogicVRegister Simulator::fmulx(VectorFormat vform, 4419 LogicVRegister dst, 4420 const LogicVRegister& src1, 4421 const LogicVRegister& src2, 4422 int index) { 4423 dst.ClearForWrite(vform); 4424 SimVRegister temp; 4425 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4426 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 4427 fmulx<float>(vform, dst, src1, index_reg); 4428 4429 } else { 4430 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4431 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 4432 fmulx<double>(vform, dst, src1, index_reg); 4433 } 4434 return dst; 4435} 4436 4437 4438LogicVRegister Simulator::frint(VectorFormat vform, 4439 LogicVRegister dst, 4440 const LogicVRegister& src, 4441 FPRounding rounding_mode, 4442 bool inexact_exception) { 4443 dst.ClearForWrite(vform); 4444 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4445 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4446 float input = src.Float<float>(i); 4447 float rounded = FPRoundInt(input, rounding_mode); 4448 if (inexact_exception && !std::isnan(input) && (input != rounded)) { 4449 FPProcessException(); 4450 } 4451 dst.SetFloat<float>(i, rounded); 4452 } 4453 } else { 4454 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4455 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4456 double input = src.Float<double>(i); 4457 double rounded = FPRoundInt(input, rounding_mode); 4458 if (inexact_exception && !std::isnan(input) && (input != rounded)) { 4459 FPProcessException(); 4460 } 4461 dst.SetFloat<double>(i, rounded); 4462 } 4463 } 4464 return dst; 4465} 4466 4467 4468LogicVRegister Simulator::fcvts(VectorFormat vform, 4469 LogicVRegister dst, 4470 const LogicVRegister& src, 4471 FPRounding rounding_mode, 4472 int fbits) { 4473 dst.ClearForWrite(vform); 4474 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4475 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4476 float op = src.Float<float>(i) * std::pow(2.0f, fbits); 4477 dst.SetInt(vform, i, FPToInt32(op, rounding_mode)); 4478 } 4479 } else { 4480 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4481 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4482 double op = src.Float<double>(i) * std::pow(2.0, fbits); 4483 dst.SetInt(vform, i, FPToInt64(op, rounding_mode)); 4484 } 4485 } 4486 return dst; 4487} 4488 4489 4490LogicVRegister Simulator::fcvtu(VectorFormat vform, 4491 LogicVRegister dst, 4492 const LogicVRegister& src, 4493 FPRounding rounding_mode, 4494 int fbits) { 4495 dst.ClearForWrite(vform); 4496 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4497 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4498 float op = src.Float<float>(i) * std::pow(2.0f, fbits); 4499 dst.SetUint(vform, i, FPToUInt32(op, rounding_mode)); 4500 } 4501 } else { 4502 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4503 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4504 double op = src.Float<double>(i) * std::pow(2.0, fbits); 4505 dst.SetUint(vform, i, FPToUInt64(op, rounding_mode)); 4506 } 4507 } 4508 return dst; 4509} 4510 4511 4512LogicVRegister Simulator::fcvtl(VectorFormat vform, 4513 LogicVRegister dst, 4514 const LogicVRegister& src) { 4515 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4516 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { 4517 dst.SetFloat(i, FPToFloat(src.Float<float16>(i))); 4518 } 4519 } else { 4520 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4521 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { 4522 dst.SetFloat(i, FPToDouble(src.Float<float>(i))); 4523 } 4524 } 4525 return dst; 4526} 4527 4528 4529LogicVRegister Simulator::fcvtl2(VectorFormat vform, 4530 LogicVRegister dst, 4531 const LogicVRegister& src) { 4532 int lane_count = LaneCountFromFormat(vform); 4533 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4534 for (int i = 0; i < lane_count; i++) { 4535 dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count))); 4536 } 4537 } else { 4538 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4539 for (int i = 0; i < lane_count; i++) { 4540 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count))); 4541 } 4542 } 4543 return dst; 4544} 4545 4546 4547LogicVRegister Simulator::fcvtn(VectorFormat vform, 4548 LogicVRegister dst, 4549 const LogicVRegister& src) { 4550 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4551 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4552 dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven)); 4553 } 4554 } else { 4555 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 4556 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4557 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven)); 4558 } 4559 } 4560 return dst; 4561} 4562 4563 4564LogicVRegister Simulator::fcvtn2(VectorFormat vform, 4565 LogicVRegister dst, 4566 const LogicVRegister& src) { 4567 int lane_count = LaneCountFromFormat(vform) / 2; 4568 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4569 for (int i = lane_count - 1; i >= 0; i--) { 4570 dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven)); 4571 } 4572 } else { 4573 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 4574 for (int i = lane_count - 1; i >= 0; i--) { 4575 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven)); 4576 } 4577 } 4578 return dst; 4579} 4580 4581 4582LogicVRegister Simulator::fcvtxn(VectorFormat vform, 4583 LogicVRegister dst, 4584 const LogicVRegister& src) { 4585 dst.ClearForWrite(vform); 4586 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 4587 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4588 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd)); 4589 } 4590 return dst; 4591} 4592 4593 4594LogicVRegister Simulator::fcvtxn2(VectorFormat vform, 4595 LogicVRegister dst, 4596 const LogicVRegister& src) { 4597 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 4598 int lane_count = LaneCountFromFormat(vform) / 2; 4599 for (int i = lane_count - 1; i >= 0; i--) { 4600 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd)); 4601 } 4602 return dst; 4603} 4604 4605 4606// Based on reference C function recip_sqrt_estimate from ARM ARM. 4607double Simulator::recip_sqrt_estimate(double a) { 4608 int q0, q1, s; 4609 double r; 4610 if (a < 0.5) { 4611 q0 = static_cast<int>(a * 512.0); 4612 r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0); 4613 } else { 4614 q1 = static_cast<int>(a * 256.0); 4615 r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0); 4616 } 4617 s = static_cast<int>(256.0 * r + 0.5); 4618 return static_cast<double>(s) / 256.0; 4619} 4620 4621 4622static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) { 4623 return ExtractUnsignedBitfield64(start_bit, end_bit, val); 4624} 4625 4626 4627template <typename T> 4628T Simulator::FPRecipSqrtEstimate(T op) { 4629 if (std::isnan(op)) { 4630 return FPProcessNaN(op); 4631 } else if (op == 0.0) { 4632 if (copysign(1.0, op) < 0.0) { 4633 return kFP64NegativeInfinity; 4634 } else { 4635 return kFP64PositiveInfinity; 4636 } 4637 } else if (copysign(1.0, op) < 0.0) { 4638 FPProcessException(); 4639 return FPDefaultNaN<T>(); 4640 } else if (std::isinf(op)) { 4641 return 0.0; 4642 } else { 4643 uint64_t fraction; 4644 int exp, result_exp; 4645 4646 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4647 exp = FloatExp(op); 4648 fraction = FloatMantissa(op); 4649 fraction <<= 29; 4650 } else { 4651 exp = DoubleExp(op); 4652 fraction = DoubleMantissa(op); 4653 } 4654 4655 if (exp == 0) { 4656 while (Bits(fraction, 51, 51) == 0) { 4657 fraction = Bits(fraction, 50, 0) << 1; 4658 exp -= 1; 4659 } 4660 fraction = Bits(fraction, 50, 0) << 1; 4661 } 4662 4663 double scaled; 4664 if (Bits(exp, 0, 0) == 0) { 4665 scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44); 4666 } else { 4667 scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44); 4668 } 4669 4670 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4671 result_exp = (380 - exp) / 2; 4672 } else { 4673 result_exp = (3068 - exp) / 2; 4674 } 4675 4676 uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled)); 4677 4678 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4679 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0)); 4680 uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29)); 4681 return FloatPack(0, exp_bits, est_bits); 4682 } else { 4683 return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0)); 4684 } 4685 } 4686} 4687 4688 4689LogicVRegister Simulator::frsqrte(VectorFormat vform, 4690 LogicVRegister dst, 4691 const LogicVRegister& src) { 4692 dst.ClearForWrite(vform); 4693 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4694 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4695 float input = src.Float<float>(i); 4696 dst.SetFloat(i, FPRecipSqrtEstimate<float>(input)); 4697 } 4698 } else { 4699 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4700 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4701 double input = src.Float<double>(i); 4702 dst.SetFloat(i, FPRecipSqrtEstimate<double>(input)); 4703 } 4704 } 4705 return dst; 4706} 4707 4708template <typename T> 4709T Simulator::FPRecipEstimate(T op, FPRounding rounding) { 4710 uint32_t sign; 4711 4712 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4713 sign = FloatSign(op); 4714 } else { 4715 sign = DoubleSign(op); 4716 } 4717 4718 if (std::isnan(op)) { 4719 return FPProcessNaN(op); 4720 } else if (std::isinf(op)) { 4721 return (sign == 1) ? -0.0 : 0.0; 4722 } else if (op == 0.0) { 4723 FPProcessException(); // FPExc_DivideByZero exception. 4724 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; 4725 } else if (((sizeof(T) == sizeof(float)) && // NOLINT(runtime/sizeof) 4726 (std::fabs(op) < std::pow(2.0, -128.0))) || 4727 ((sizeof(T) == sizeof(double)) && // NOLINT(runtime/sizeof) 4728 (std::fabs(op) < std::pow(2.0, -1024.0)))) { 4729 bool overflow_to_inf = false; 4730 switch (rounding) { 4731 case FPTieEven: 4732 overflow_to_inf = true; 4733 break; 4734 case FPPositiveInfinity: 4735 overflow_to_inf = (sign == 0); 4736 break; 4737 case FPNegativeInfinity: 4738 overflow_to_inf = (sign == 1); 4739 break; 4740 case FPZero: 4741 overflow_to_inf = false; 4742 break; 4743 default: 4744 break; 4745 } 4746 FPProcessException(); // FPExc_Overflow and FPExc_Inexact. 4747 if (overflow_to_inf) { 4748 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; 4749 } else { 4750 // Return FPMaxNormal(sign). 4751 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4752 return FloatPack(sign, 0xfe, 0x07fffff); 4753 } else { 4754 return DoublePack(sign, 0x7fe, 0x0fffffffffffffl); 4755 } 4756 } 4757 } else { 4758 uint64_t fraction; 4759 int exp, result_exp; 4760 uint32_t sign; 4761 4762 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4763 sign = FloatSign(op); 4764 exp = FloatExp(op); 4765 fraction = FloatMantissa(op); 4766 fraction <<= 29; 4767 } else { 4768 sign = DoubleSign(op); 4769 exp = DoubleExp(op); 4770 fraction = DoubleMantissa(op); 4771 } 4772 4773 if (exp == 0) { 4774 if (Bits(fraction, 51, 51) == 0) { 4775 exp -= 1; 4776 fraction = Bits(fraction, 49, 0) << 2; 4777 } else { 4778 fraction = Bits(fraction, 50, 0) << 1; 4779 } 4780 } 4781 4782 double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44); 4783 4784 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4785 result_exp = (253 - exp); // In range 253-254 = -1 to 253+1 = 254. 4786 } else { 4787 result_exp = (2045 - exp); // In range 2045-2046 = -1 to 2045+1 = 2046. 4788 } 4789 4790 double estimate = recip_estimate(scaled); 4791 4792 fraction = DoubleMantissa(estimate); 4793 if (result_exp == 0) { 4794 fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1); 4795 } else if (result_exp == -1) { 4796 fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2); 4797 result_exp = 0; 4798 } 4799 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4800 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0)); 4801 uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29)); 4802 return FloatPack(sign, exp_bits, frac_bits); 4803 } else { 4804 return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0)); 4805 } 4806 } 4807} 4808 4809 4810LogicVRegister Simulator::frecpe(VectorFormat vform, 4811 LogicVRegister dst, 4812 const LogicVRegister& src, 4813 FPRounding round) { 4814 dst.ClearForWrite(vform); 4815 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4816 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4817 float input = src.Float<float>(i); 4818 dst.SetFloat(i, FPRecipEstimate<float>(input, round)); 4819 } 4820 } else { 4821 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4822 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4823 double input = src.Float<double>(i); 4824 dst.SetFloat(i, FPRecipEstimate<double>(input, round)); 4825 } 4826 } 4827 return dst; 4828} 4829 4830 4831LogicVRegister Simulator::ursqrte(VectorFormat vform, 4832 LogicVRegister dst, 4833 const LogicVRegister& src) { 4834 dst.ClearForWrite(vform); 4835 uint64_t operand; 4836 uint32_t result; 4837 double dp_operand, dp_result; 4838 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4839 operand = src.Uint(vform, i); 4840 if (operand <= 0x3FFFFFFF) { 4841 result = 0xFFFFFFFF; 4842 } else { 4843 dp_operand = operand * std::pow(2.0, -32); 4844 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31); 4845 result = static_cast<uint32_t>(dp_result); 4846 } 4847 dst.SetUint(vform, i, result); 4848 } 4849 return dst; 4850} 4851 4852 4853// Based on reference C function recip_estimate from ARM ARM. 4854double Simulator::recip_estimate(double a) { 4855 int q, s; 4856 double r; 4857 q = static_cast<int>(a * 512.0); 4858 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0); 4859 s = static_cast<int>(256.0 * r + 0.5); 4860 return static_cast<double>(s) / 256.0; 4861} 4862 4863 4864LogicVRegister Simulator::urecpe(VectorFormat vform, 4865 LogicVRegister dst, 4866 const LogicVRegister& src) { 4867 dst.ClearForWrite(vform); 4868 uint64_t operand; 4869 uint32_t result; 4870 double dp_operand, dp_result; 4871 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4872 operand = src.Uint(vform, i); 4873 if (operand <= 0x7FFFFFFF) { 4874 result = 0xFFFFFFFF; 4875 } else { 4876 dp_operand = operand * std::pow(2.0, -32); 4877 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31); 4878 result = static_cast<uint32_t>(dp_result); 4879 } 4880 dst.SetUint(vform, i, result); 4881 } 4882 return dst; 4883} 4884 4885template <typename T> 4886LogicVRegister Simulator::frecpx(VectorFormat vform, 4887 LogicVRegister dst, 4888 const LogicVRegister& src) { 4889 dst.ClearForWrite(vform); 4890 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4891 T op = src.Float<T>(i); 4892 T result; 4893 if (std::isnan(op)) { 4894 result = FPProcessNaN(op); 4895 } else { 4896 int exp; 4897 uint32_t sign; 4898 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4899 sign = FloatSign(op); 4900 exp = FloatExp(op); 4901 exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0)); 4902 result = FloatPack(sign, exp, 0); 4903 } else { 4904 sign = DoubleSign(op); 4905 exp = DoubleExp(op); 4906 exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0)); 4907 result = DoublePack(sign, exp, 0); 4908 } 4909 } 4910 dst.SetFloat(i, result); 4911 } 4912 return dst; 4913} 4914 4915 4916LogicVRegister Simulator::frecpx(VectorFormat vform, 4917 LogicVRegister dst, 4918 const LogicVRegister& src) { 4919 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4920 frecpx<float>(vform, dst, src); 4921 } else { 4922 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4923 frecpx<double>(vform, dst, src); 4924 } 4925 return dst; 4926} 4927 4928LogicVRegister Simulator::scvtf(VectorFormat vform, 4929 LogicVRegister dst, 4930 const LogicVRegister& src, 4931 int fbits, 4932 FPRounding round) { 4933 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4934 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4935 float result = FixedToFloat(src.Int(kFormatS, i), fbits, round); 4936 dst.SetFloat<float>(i, result); 4937 } else { 4938 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4939 double result = FixedToDouble(src.Int(kFormatD, i), fbits, round); 4940 dst.SetFloat<double>(i, result); 4941 } 4942 } 4943 return dst; 4944} 4945 4946 4947LogicVRegister Simulator::ucvtf(VectorFormat vform, 4948 LogicVRegister dst, 4949 const LogicVRegister& src, 4950 int fbits, 4951 FPRounding round) { 4952 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4953 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4954 float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round); 4955 dst.SetFloat<float>(i, result); 4956 } else { 4957 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4958 double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round); 4959 dst.SetFloat<double>(i, result); 4960 } 4961 } 4962 return dst; 4963} 4964 4965 4966} // namespace aarch64 4967} // namespace vixl 4968 4969#endif // VIXL_INCLUDE_SIMULATOR_AARCH64 4970