logic-aarch64.cc revision b953ea8255b36e27834f17941429cd17af12f6f2
1// Copyright 2015, VIXL authors 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions are met: 6// 7// * Redistributions of source code must retain the above copyright notice, 8// this list of conditions and the following disclaimer. 9// * Redistributions in binary form must reproduce the above copyright notice, 10// this list of conditions and the following disclaimer in the documentation 11// and/or other materials provided with the distribution. 12// * Neither the name of ARM Limited nor the names of its contributors may be 13// used to endorse or promote products derived from this software without 14// specific prior written permission. 15// 16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 28 29#include <cmath> 30 31#include "simulator-aarch64.h" 32 33namespace vixl { 34namespace aarch64 { 35 36template <> 37double Simulator::FPDefaultNaN<double>() { 38 return kFP64DefaultNaN; 39} 40 41 42template <> 43float Simulator::FPDefaultNaN<float>() { 44 return kFP32DefaultNaN; 45} 46 47// See FPRound for a description of this function. 48static inline double FPRoundToDouble(int64_t sign, 49 int64_t exponent, 50 uint64_t mantissa, 51 FPRounding round_mode) { 52 int64_t bits = 53 FPRound<int64_t, kDoubleExponentBits, kDoubleMantissaBits>(sign, 54 exponent, 55 mantissa, 56 round_mode); 57 return RawbitsToDouble(bits); 58} 59 60 61// See FPRound for a description of this function. 62static inline float FPRoundToFloat(int64_t sign, 63 int64_t exponent, 64 uint64_t mantissa, 65 FPRounding round_mode) { 66 int32_t bits = 67 FPRound<int32_t, kFloatExponentBits, kFloatMantissaBits>(sign, 68 exponent, 69 mantissa, 70 round_mode); 71 return RawbitsToFloat(bits); 72} 73 74 75// See FPRound for a description of this function. 76static inline float16 FPRoundToFloat16(int64_t sign, 77 int64_t exponent, 78 uint64_t mantissa, 79 FPRounding round_mode) { 80 return FPRound<float16, 81 kFloat16ExponentBits, 82 kFloat16MantissaBits>(sign, exponent, mantissa, round_mode); 83} 84 85 86double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) { 87 if (src >= 0) { 88 return UFixedToDouble(src, fbits, round); 89 } else { 90 // This works for all negative values, including INT64_MIN. 91 return -UFixedToDouble(-src, fbits, round); 92 } 93} 94 95 96double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) { 97 // An input of 0 is a special case because the result is effectively 98 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. 99 if (src == 0) { 100 return 0.0; 101 } 102 103 // Calculate the exponent. The highest significant bit will have the value 104 // 2^exponent. 105 const int highest_significant_bit = 63 - CountLeadingZeros(src); 106 const int64_t exponent = highest_significant_bit - fbits; 107 108 return FPRoundToDouble(0, exponent, src, round); 109} 110 111 112float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) { 113 if (src >= 0) { 114 return UFixedToFloat(src, fbits, round); 115 } else { 116 // This works for all negative values, including INT64_MIN. 117 return -UFixedToFloat(-src, fbits, round); 118 } 119} 120 121 122float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) { 123 // An input of 0 is a special case because the result is effectively 124 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. 125 if (src == 0) { 126 return 0.0f; 127 } 128 129 // Calculate the exponent. The highest significant bit will have the value 130 // 2^exponent. 131 const int highest_significant_bit = 63 - CountLeadingZeros(src); 132 const int32_t exponent = highest_significant_bit - fbits; 133 134 return FPRoundToFloat(0, exponent, src, round); 135} 136 137 138double Simulator::FPToDouble(float value) { 139 switch (std::fpclassify(value)) { 140 case FP_NAN: { 141 if (IsSignallingNaN(value)) { 142 FPProcessException(); 143 } 144 if (ReadDN()) return kFP64DefaultNaN; 145 146 // Convert NaNs as the processor would: 147 // - The sign is propagated. 148 // - The payload (mantissa) is transferred entirely, except that the top 149 // bit is forced to '1', making the result a quiet NaN. The unused 150 // (low-order) payload bits are set to 0. 151 uint32_t raw = FloatToRawbits(value); 152 153 uint64_t sign = raw >> 31; 154 uint64_t exponent = (1 << 11) - 1; 155 uint64_t payload = ExtractUnsignedBitfield64(21, 0, raw); 156 payload <<= (52 - 23); // The unused low-order bits should be 0. 157 payload |= (UINT64_C(1) << 51); // Force a quiet NaN. 158 159 return RawbitsToDouble((sign << 63) | (exponent << 52) | payload); 160 } 161 162 case FP_ZERO: 163 case FP_NORMAL: 164 case FP_SUBNORMAL: 165 case FP_INFINITE: { 166 // All other inputs are preserved in a standard cast, because every value 167 // representable using an IEEE-754 float is also representable using an 168 // IEEE-754 double. 169 return static_cast<double>(value); 170 } 171 } 172 173 VIXL_UNREACHABLE(); 174 return static_cast<double>(value); 175} 176 177 178float Simulator::FPToFloat(float16 value) { 179 uint32_t sign = value >> 15; 180 uint32_t exponent = 181 ExtractUnsignedBitfield32(kFloat16MantissaBits + kFloat16ExponentBits - 1, 182 kFloat16MantissaBits, 183 value); 184 uint32_t mantissa = 185 ExtractUnsignedBitfield32(kFloat16MantissaBits - 1, 0, value); 186 187 switch (Float16Classify(value)) { 188 case FP_ZERO: 189 return (sign == 0) ? 0.0f : -0.0f; 190 191 case FP_INFINITE: 192 return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity; 193 194 case FP_SUBNORMAL: { 195 // Calculate shift required to put mantissa into the most-significant bits 196 // of the destination mantissa. 197 int shift = CountLeadingZeros(mantissa << (32 - 10)); 198 199 // Shift mantissa and discard implicit '1'. 200 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1; 201 mantissa &= (1 << kFloatMantissaBits) - 1; 202 203 // Adjust the exponent for the shift applied, and rebias. 204 exponent = exponent - shift + (-15 + 127); 205 break; 206 } 207 208 case FP_NAN: 209 if (IsSignallingNaN(value)) { 210 FPProcessException(); 211 } 212 if (ReadDN()) return kFP32DefaultNaN; 213 214 // Convert NaNs as the processor would: 215 // - The sign is propagated. 216 // - The payload (mantissa) is transferred entirely, except that the top 217 // bit is forced to '1', making the result a quiet NaN. The unused 218 // (low-order) payload bits are set to 0. 219 exponent = (1 << kFloatExponentBits) - 1; 220 221 // Increase bits in mantissa, making low-order bits 0. 222 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits); 223 mantissa |= 1 << 22; // Force a quiet NaN. 224 break; 225 226 case FP_NORMAL: 227 // Increase bits in mantissa, making low-order bits 0. 228 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits); 229 230 // Change exponent bias. 231 exponent += (-15 + 127); 232 break; 233 234 default: 235 VIXL_UNREACHABLE(); 236 } 237 return RawbitsToFloat((sign << 31) | (exponent << kFloatMantissaBits) | 238 mantissa); 239} 240 241 242float16 Simulator::FPToFloat16(float value, FPRounding round_mode) { 243 // Only the FPTieEven rounding mode is implemented. 244 VIXL_ASSERT(round_mode == FPTieEven); 245 USE(round_mode); 246 247 uint32_t raw = FloatToRawbits(value); 248 int32_t sign = raw >> 31; 249 int32_t exponent = ExtractUnsignedBitfield32(30, 23, raw) - 127; 250 uint32_t mantissa = ExtractUnsignedBitfield32(22, 0, raw); 251 252 switch (std::fpclassify(value)) { 253 case FP_NAN: { 254 if (IsSignallingNaN(value)) { 255 FPProcessException(); 256 } 257 if (ReadDN()) return kFP16DefaultNaN; 258 259 // Convert NaNs as the processor would: 260 // - The sign is propagated. 261 // - The payload (mantissa) is transferred as much as possible, except 262 // that the top bit is forced to '1', making the result a quiet NaN. 263 float16 result = 264 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; 265 result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits); 266 result |= (1 << 9); // Force a quiet NaN; 267 return result; 268 } 269 270 case FP_ZERO: 271 return (sign == 0) ? 0 : 0x8000; 272 273 case FP_INFINITE: 274 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; 275 276 case FP_NORMAL: 277 case FP_SUBNORMAL: { 278 // Convert float-to-half as the processor would, assuming that FPCR.FZ 279 // (flush-to-zero) is not set. 280 281 // Add the implicit '1' bit to the mantissa. 282 mantissa += (1 << 23); 283 return FPRoundToFloat16(sign, exponent, mantissa, round_mode); 284 } 285 } 286 287 VIXL_UNREACHABLE(); 288 return 0; 289} 290 291 292float16 Simulator::FPToFloat16(double value, FPRounding round_mode) { 293 // Only the FPTieEven rounding mode is implemented. 294 VIXL_ASSERT(round_mode == FPTieEven); 295 USE(round_mode); 296 297 uint64_t raw = DoubleToRawbits(value); 298 int32_t sign = raw >> 63; 299 int64_t exponent = ExtractUnsignedBitfield64(62, 52, raw) - 1023; 300 uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw); 301 302 switch (std::fpclassify(value)) { 303 case FP_NAN: { 304 if (IsSignallingNaN(value)) { 305 FPProcessException(); 306 } 307 if (ReadDN()) return kFP16DefaultNaN; 308 309 // Convert NaNs as the processor would: 310 // - The sign is propagated. 311 // - The payload (mantissa) is transferred as much as possible, except 312 // that the top bit is forced to '1', making the result a quiet NaN. 313 float16 result = 314 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; 315 result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits); 316 result |= (1 << 9); // Force a quiet NaN; 317 return result; 318 } 319 320 case FP_ZERO: 321 return (sign == 0) ? 0 : 0x8000; 322 323 case FP_INFINITE: 324 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; 325 326 case FP_NORMAL: 327 case FP_SUBNORMAL: { 328 // Convert double-to-half as the processor would, assuming that FPCR.FZ 329 // (flush-to-zero) is not set. 330 331 // Add the implicit '1' bit to the mantissa. 332 mantissa += (UINT64_C(1) << 52); 333 return FPRoundToFloat16(sign, exponent, mantissa, round_mode); 334 } 335 } 336 337 VIXL_UNREACHABLE(); 338 return 0; 339} 340 341 342float Simulator::FPToFloat(double value, FPRounding round_mode) { 343 // Only the FPTieEven rounding mode is implemented. 344 VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd)); 345 USE(round_mode); 346 347 switch (std::fpclassify(value)) { 348 case FP_NAN: { 349 if (IsSignallingNaN(value)) { 350 FPProcessException(); 351 } 352 if (ReadDN()) return kFP32DefaultNaN; 353 354 // Convert NaNs as the processor would: 355 // - The sign is propagated. 356 // - The payload (mantissa) is transferred as much as possible, except 357 // that the top bit is forced to '1', making the result a quiet NaN. 358 uint64_t raw = DoubleToRawbits(value); 359 360 uint32_t sign = raw >> 63; 361 uint32_t exponent = (1 << 8) - 1; 362 uint32_t payload = 363 static_cast<uint32_t>(ExtractUnsignedBitfield64(50, 52 - 23, raw)); 364 payload |= (1 << 22); // Force a quiet NaN. 365 366 return RawbitsToFloat((sign << 31) | (exponent << 23) | payload); 367 } 368 369 case FP_ZERO: 370 case FP_INFINITE: { 371 // In a C++ cast, any value representable in the target type will be 372 // unchanged. This is always the case for +/-0.0 and infinities. 373 return static_cast<float>(value); 374 } 375 376 case FP_NORMAL: 377 case FP_SUBNORMAL: { 378 // Convert double-to-float as the processor would, assuming that FPCR.FZ 379 // (flush-to-zero) is not set. 380 uint64_t raw = DoubleToRawbits(value); 381 // Extract the IEEE-754 double components. 382 uint32_t sign = raw >> 63; 383 // Extract the exponent and remove the IEEE-754 encoding bias. 384 int32_t exponent = 385 static_cast<int32_t>(ExtractUnsignedBitfield64(62, 52, raw)) - 1023; 386 // Extract the mantissa and add the implicit '1' bit. 387 uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw); 388 if (std::fpclassify(value) == FP_NORMAL) { 389 mantissa |= (UINT64_C(1) << 52); 390 } 391 return FPRoundToFloat(sign, exponent, mantissa, round_mode); 392 } 393 } 394 395 VIXL_UNREACHABLE(); 396 return value; 397} 398 399 400void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) { 401 dst.ClearForWrite(vform); 402 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 403 dst.ReadUintFromMem(vform, i, addr); 404 addr += LaneSizeInBytesFromFormat(vform); 405 } 406} 407 408 409void Simulator::ld1(VectorFormat vform, 410 LogicVRegister dst, 411 int index, 412 uint64_t addr) { 413 dst.ReadUintFromMem(vform, index, addr); 414} 415 416 417void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) { 418 dst.ClearForWrite(vform); 419 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 420 dst.ReadUintFromMem(vform, i, addr); 421 } 422} 423 424 425void Simulator::ld2(VectorFormat vform, 426 LogicVRegister dst1, 427 LogicVRegister dst2, 428 uint64_t addr1) { 429 dst1.ClearForWrite(vform); 430 dst2.ClearForWrite(vform); 431 int esize = LaneSizeInBytesFromFormat(vform); 432 uint64_t addr2 = addr1 + esize; 433 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 434 dst1.ReadUintFromMem(vform, i, addr1); 435 dst2.ReadUintFromMem(vform, i, addr2); 436 addr1 += 2 * esize; 437 addr2 += 2 * esize; 438 } 439} 440 441 442void Simulator::ld2(VectorFormat vform, 443 LogicVRegister dst1, 444 LogicVRegister dst2, 445 int index, 446 uint64_t addr1) { 447 dst1.ClearForWrite(vform); 448 dst2.ClearForWrite(vform); 449 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); 450 dst1.ReadUintFromMem(vform, index, addr1); 451 dst2.ReadUintFromMem(vform, index, addr2); 452} 453 454 455void Simulator::ld2r(VectorFormat vform, 456 LogicVRegister dst1, 457 LogicVRegister dst2, 458 uint64_t addr) { 459 dst1.ClearForWrite(vform); 460 dst2.ClearForWrite(vform); 461 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); 462 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 463 dst1.ReadUintFromMem(vform, i, addr); 464 dst2.ReadUintFromMem(vform, i, addr2); 465 } 466} 467 468 469void Simulator::ld3(VectorFormat vform, 470 LogicVRegister dst1, 471 LogicVRegister dst2, 472 LogicVRegister dst3, 473 uint64_t addr1) { 474 dst1.ClearForWrite(vform); 475 dst2.ClearForWrite(vform); 476 dst3.ClearForWrite(vform); 477 int esize = LaneSizeInBytesFromFormat(vform); 478 uint64_t addr2 = addr1 + esize; 479 uint64_t addr3 = addr2 + esize; 480 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 481 dst1.ReadUintFromMem(vform, i, addr1); 482 dst2.ReadUintFromMem(vform, i, addr2); 483 dst3.ReadUintFromMem(vform, i, addr3); 484 addr1 += 3 * esize; 485 addr2 += 3 * esize; 486 addr3 += 3 * esize; 487 } 488} 489 490 491void Simulator::ld3(VectorFormat vform, 492 LogicVRegister dst1, 493 LogicVRegister dst2, 494 LogicVRegister dst3, 495 int index, 496 uint64_t addr1) { 497 dst1.ClearForWrite(vform); 498 dst2.ClearForWrite(vform); 499 dst3.ClearForWrite(vform); 500 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); 501 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 502 dst1.ReadUintFromMem(vform, index, addr1); 503 dst2.ReadUintFromMem(vform, index, addr2); 504 dst3.ReadUintFromMem(vform, index, addr3); 505} 506 507 508void Simulator::ld3r(VectorFormat vform, 509 LogicVRegister dst1, 510 LogicVRegister dst2, 511 LogicVRegister dst3, 512 uint64_t addr) { 513 dst1.ClearForWrite(vform); 514 dst2.ClearForWrite(vform); 515 dst3.ClearForWrite(vform); 516 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); 517 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 518 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 519 dst1.ReadUintFromMem(vform, i, addr); 520 dst2.ReadUintFromMem(vform, i, addr2); 521 dst3.ReadUintFromMem(vform, i, addr3); 522 } 523} 524 525 526void Simulator::ld4(VectorFormat vform, 527 LogicVRegister dst1, 528 LogicVRegister dst2, 529 LogicVRegister dst3, 530 LogicVRegister dst4, 531 uint64_t addr1) { 532 dst1.ClearForWrite(vform); 533 dst2.ClearForWrite(vform); 534 dst3.ClearForWrite(vform); 535 dst4.ClearForWrite(vform); 536 int esize = LaneSizeInBytesFromFormat(vform); 537 uint64_t addr2 = addr1 + esize; 538 uint64_t addr3 = addr2 + esize; 539 uint64_t addr4 = addr3 + esize; 540 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 541 dst1.ReadUintFromMem(vform, i, addr1); 542 dst2.ReadUintFromMem(vform, i, addr2); 543 dst3.ReadUintFromMem(vform, i, addr3); 544 dst4.ReadUintFromMem(vform, i, addr4); 545 addr1 += 4 * esize; 546 addr2 += 4 * esize; 547 addr3 += 4 * esize; 548 addr4 += 4 * esize; 549 } 550} 551 552 553void Simulator::ld4(VectorFormat vform, 554 LogicVRegister dst1, 555 LogicVRegister dst2, 556 LogicVRegister dst3, 557 LogicVRegister dst4, 558 int index, 559 uint64_t addr1) { 560 dst1.ClearForWrite(vform); 561 dst2.ClearForWrite(vform); 562 dst3.ClearForWrite(vform); 563 dst4.ClearForWrite(vform); 564 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); 565 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 566 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); 567 dst1.ReadUintFromMem(vform, index, addr1); 568 dst2.ReadUintFromMem(vform, index, addr2); 569 dst3.ReadUintFromMem(vform, index, addr3); 570 dst4.ReadUintFromMem(vform, index, addr4); 571} 572 573 574void Simulator::ld4r(VectorFormat vform, 575 LogicVRegister dst1, 576 LogicVRegister dst2, 577 LogicVRegister dst3, 578 LogicVRegister dst4, 579 uint64_t addr) { 580 dst1.ClearForWrite(vform); 581 dst2.ClearForWrite(vform); 582 dst3.ClearForWrite(vform); 583 dst4.ClearForWrite(vform); 584 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); 585 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 586 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); 587 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 588 dst1.ReadUintFromMem(vform, i, addr); 589 dst2.ReadUintFromMem(vform, i, addr2); 590 dst3.ReadUintFromMem(vform, i, addr3); 591 dst4.ReadUintFromMem(vform, i, addr4); 592 } 593} 594 595 596void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) { 597 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 598 src.WriteUintToMem(vform, i, addr); 599 addr += LaneSizeInBytesFromFormat(vform); 600 } 601} 602 603 604void Simulator::st1(VectorFormat vform, 605 LogicVRegister src, 606 int index, 607 uint64_t addr) { 608 src.WriteUintToMem(vform, index, addr); 609} 610 611 612void Simulator::st2(VectorFormat vform, 613 LogicVRegister dst, 614 LogicVRegister dst2, 615 uint64_t addr) { 616 int esize = LaneSizeInBytesFromFormat(vform); 617 uint64_t addr2 = addr + esize; 618 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 619 dst.WriteUintToMem(vform, i, addr); 620 dst2.WriteUintToMem(vform, i, addr2); 621 addr += 2 * esize; 622 addr2 += 2 * esize; 623 } 624} 625 626 627void Simulator::st2(VectorFormat vform, 628 LogicVRegister dst, 629 LogicVRegister dst2, 630 int index, 631 uint64_t addr) { 632 int esize = LaneSizeInBytesFromFormat(vform); 633 dst.WriteUintToMem(vform, index, addr); 634 dst2.WriteUintToMem(vform, index, addr + 1 * esize); 635} 636 637 638void Simulator::st3(VectorFormat vform, 639 LogicVRegister dst, 640 LogicVRegister dst2, 641 LogicVRegister dst3, 642 uint64_t addr) { 643 int esize = LaneSizeInBytesFromFormat(vform); 644 uint64_t addr2 = addr + esize; 645 uint64_t addr3 = addr2 + esize; 646 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 647 dst.WriteUintToMem(vform, i, addr); 648 dst2.WriteUintToMem(vform, i, addr2); 649 dst3.WriteUintToMem(vform, i, addr3); 650 addr += 3 * esize; 651 addr2 += 3 * esize; 652 addr3 += 3 * esize; 653 } 654} 655 656 657void Simulator::st3(VectorFormat vform, 658 LogicVRegister dst, 659 LogicVRegister dst2, 660 LogicVRegister dst3, 661 int index, 662 uint64_t addr) { 663 int esize = LaneSizeInBytesFromFormat(vform); 664 dst.WriteUintToMem(vform, index, addr); 665 dst2.WriteUintToMem(vform, index, addr + 1 * esize); 666 dst3.WriteUintToMem(vform, index, addr + 2 * esize); 667} 668 669 670void Simulator::st4(VectorFormat vform, 671 LogicVRegister dst, 672 LogicVRegister dst2, 673 LogicVRegister dst3, 674 LogicVRegister dst4, 675 uint64_t addr) { 676 int esize = LaneSizeInBytesFromFormat(vform); 677 uint64_t addr2 = addr + esize; 678 uint64_t addr3 = addr2 + esize; 679 uint64_t addr4 = addr3 + esize; 680 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 681 dst.WriteUintToMem(vform, i, addr); 682 dst2.WriteUintToMem(vform, i, addr2); 683 dst3.WriteUintToMem(vform, i, addr3); 684 dst4.WriteUintToMem(vform, i, addr4); 685 addr += 4 * esize; 686 addr2 += 4 * esize; 687 addr3 += 4 * esize; 688 addr4 += 4 * esize; 689 } 690} 691 692 693void Simulator::st4(VectorFormat vform, 694 LogicVRegister dst, 695 LogicVRegister dst2, 696 LogicVRegister dst3, 697 LogicVRegister dst4, 698 int index, 699 uint64_t addr) { 700 int esize = LaneSizeInBytesFromFormat(vform); 701 dst.WriteUintToMem(vform, index, addr); 702 dst2.WriteUintToMem(vform, index, addr + 1 * esize); 703 dst3.WriteUintToMem(vform, index, addr + 2 * esize); 704 dst4.WriteUintToMem(vform, index, addr + 3 * esize); 705} 706 707 708LogicVRegister Simulator::cmp(VectorFormat vform, 709 LogicVRegister dst, 710 const LogicVRegister& src1, 711 const LogicVRegister& src2, 712 Condition cond) { 713 dst.ClearForWrite(vform); 714 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 715 int64_t sa = src1.Int(vform, i); 716 int64_t sb = src2.Int(vform, i); 717 uint64_t ua = src1.Uint(vform, i); 718 uint64_t ub = src2.Uint(vform, i); 719 bool result = false; 720 switch (cond) { 721 case eq: 722 result = (ua == ub); 723 break; 724 case ge: 725 result = (sa >= sb); 726 break; 727 case gt: 728 result = (sa > sb); 729 break; 730 case hi: 731 result = (ua > ub); 732 break; 733 case hs: 734 result = (ua >= ub); 735 break; 736 case lt: 737 result = (sa < sb); 738 break; 739 case le: 740 result = (sa <= sb); 741 break; 742 default: 743 VIXL_UNREACHABLE(); 744 break; 745 } 746 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); 747 } 748 return dst; 749} 750 751 752LogicVRegister Simulator::cmp(VectorFormat vform, 753 LogicVRegister dst, 754 const LogicVRegister& src1, 755 int imm, 756 Condition cond) { 757 SimVRegister temp; 758 LogicVRegister imm_reg = dup_immediate(vform, temp, imm); 759 return cmp(vform, dst, src1, imm_reg, cond); 760} 761 762 763LogicVRegister Simulator::cmptst(VectorFormat vform, 764 LogicVRegister dst, 765 const LogicVRegister& src1, 766 const LogicVRegister& src2) { 767 dst.ClearForWrite(vform); 768 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 769 uint64_t ua = src1.Uint(vform, i); 770 uint64_t ub = src2.Uint(vform, i); 771 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0); 772 } 773 return dst; 774} 775 776 777LogicVRegister Simulator::add(VectorFormat vform, 778 LogicVRegister dst, 779 const LogicVRegister& src1, 780 const LogicVRegister& src2) { 781 dst.ClearForWrite(vform); 782 // TODO(all): consider assigning the result of LaneCountFromFormat to a local. 783 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 784 // Test for unsigned saturation. 785 uint64_t ua = src1.UintLeftJustified(vform, i); 786 uint64_t ub = src2.UintLeftJustified(vform, i); 787 uint64_t ur = ua + ub; 788 if (ur < ua) { 789 dst.SetUnsignedSat(i, true); 790 } 791 792 // Test for signed saturation. 793 int64_t sa = src1.IntLeftJustified(vform, i); 794 int64_t sb = src2.IntLeftJustified(vform, i); 795 int64_t sr = sa + sb; 796 // If the signs of the operands are the same, but different from the result, 797 // there was an overflow. 798 if (((sa >= 0) == (sb >= 0)) && ((sa >= 0) != (sr >= 0))) { 799 dst.SetSignedSat(i, sa >= 0); 800 } 801 802 dst.SetInt(vform, i, src1.Int(vform, i) + src2.Int(vform, i)); 803 } 804 return dst; 805} 806 807 808LogicVRegister Simulator::addp(VectorFormat vform, 809 LogicVRegister dst, 810 const LogicVRegister& src1, 811 const LogicVRegister& src2) { 812 SimVRegister temp1, temp2; 813 uzp1(vform, temp1, src1, src2); 814 uzp2(vform, temp2, src1, src2); 815 add(vform, dst, temp1, temp2); 816 return dst; 817} 818 819 820LogicVRegister Simulator::mla(VectorFormat vform, 821 LogicVRegister dst, 822 const LogicVRegister& src1, 823 const LogicVRegister& src2) { 824 SimVRegister temp; 825 mul(vform, temp, src1, src2); 826 add(vform, dst, dst, temp); 827 return dst; 828} 829 830 831LogicVRegister Simulator::mls(VectorFormat vform, 832 LogicVRegister dst, 833 const LogicVRegister& src1, 834 const LogicVRegister& src2) { 835 SimVRegister temp; 836 mul(vform, temp, src1, src2); 837 sub(vform, dst, dst, temp); 838 return dst; 839} 840 841 842LogicVRegister Simulator::mul(VectorFormat vform, 843 LogicVRegister dst, 844 const LogicVRegister& src1, 845 const LogicVRegister& src2) { 846 dst.ClearForWrite(vform); 847 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 848 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i)); 849 } 850 return dst; 851} 852 853 854LogicVRegister Simulator::mul(VectorFormat vform, 855 LogicVRegister dst, 856 const LogicVRegister& src1, 857 const LogicVRegister& src2, 858 int index) { 859 SimVRegister temp; 860 VectorFormat indexform = VectorFormatFillQ(vform); 861 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index)); 862} 863 864 865LogicVRegister Simulator::mla(VectorFormat vform, 866 LogicVRegister dst, 867 const LogicVRegister& src1, 868 const LogicVRegister& src2, 869 int index) { 870 SimVRegister temp; 871 VectorFormat indexform = VectorFormatFillQ(vform); 872 return mla(vform, dst, src1, dup_element(indexform, temp, src2, index)); 873} 874 875 876LogicVRegister Simulator::mls(VectorFormat vform, 877 LogicVRegister dst, 878 const LogicVRegister& src1, 879 const LogicVRegister& src2, 880 int index) { 881 SimVRegister temp; 882 VectorFormat indexform = VectorFormatFillQ(vform); 883 return mls(vform, dst, src1, dup_element(indexform, temp, src2, index)); 884} 885 886 887LogicVRegister Simulator::smull(VectorFormat vform, 888 LogicVRegister dst, 889 const LogicVRegister& src1, 890 const LogicVRegister& src2, 891 int index) { 892 SimVRegister temp; 893 VectorFormat indexform = 894 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 895 return smull(vform, dst, src1, dup_element(indexform, temp, src2, index)); 896} 897 898 899LogicVRegister Simulator::smull2(VectorFormat vform, 900 LogicVRegister dst, 901 const LogicVRegister& src1, 902 const LogicVRegister& src2, 903 int index) { 904 SimVRegister temp; 905 VectorFormat indexform = 906 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 907 return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 908} 909 910 911LogicVRegister Simulator::umull(VectorFormat vform, 912 LogicVRegister dst, 913 const LogicVRegister& src1, 914 const LogicVRegister& src2, 915 int index) { 916 SimVRegister temp; 917 VectorFormat indexform = 918 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 919 return umull(vform, dst, src1, dup_element(indexform, temp, src2, index)); 920} 921 922 923LogicVRegister Simulator::umull2(VectorFormat vform, 924 LogicVRegister dst, 925 const LogicVRegister& src1, 926 const LogicVRegister& src2, 927 int index) { 928 SimVRegister temp; 929 VectorFormat indexform = 930 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 931 return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 932} 933 934 935LogicVRegister Simulator::smlal(VectorFormat vform, 936 LogicVRegister dst, 937 const LogicVRegister& src1, 938 const LogicVRegister& src2, 939 int index) { 940 SimVRegister temp; 941 VectorFormat indexform = 942 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 943 return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); 944} 945 946 947LogicVRegister Simulator::smlal2(VectorFormat vform, 948 LogicVRegister dst, 949 const LogicVRegister& src1, 950 const LogicVRegister& src2, 951 int index) { 952 SimVRegister temp; 953 VectorFormat indexform = 954 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 955 return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 956} 957 958 959LogicVRegister Simulator::umlal(VectorFormat vform, 960 LogicVRegister dst, 961 const LogicVRegister& src1, 962 const LogicVRegister& src2, 963 int index) { 964 SimVRegister temp; 965 VectorFormat indexform = 966 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 967 return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); 968} 969 970 971LogicVRegister Simulator::umlal2(VectorFormat vform, 972 LogicVRegister dst, 973 const LogicVRegister& src1, 974 const LogicVRegister& src2, 975 int index) { 976 SimVRegister temp; 977 VectorFormat indexform = 978 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 979 return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 980} 981 982 983LogicVRegister Simulator::smlsl(VectorFormat vform, 984 LogicVRegister dst, 985 const LogicVRegister& src1, 986 const LogicVRegister& src2, 987 int index) { 988 SimVRegister temp; 989 VectorFormat indexform = 990 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 991 return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); 992} 993 994 995LogicVRegister Simulator::smlsl2(VectorFormat vform, 996 LogicVRegister dst, 997 const LogicVRegister& src1, 998 const LogicVRegister& src2, 999 int index) { 1000 SimVRegister temp; 1001 VectorFormat indexform = 1002 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1003 return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1004} 1005 1006 1007LogicVRegister Simulator::umlsl(VectorFormat vform, 1008 LogicVRegister dst, 1009 const LogicVRegister& src1, 1010 const LogicVRegister& src2, 1011 int index) { 1012 SimVRegister temp; 1013 VectorFormat indexform = 1014 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1015 return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1016} 1017 1018 1019LogicVRegister Simulator::umlsl2(VectorFormat vform, 1020 LogicVRegister dst, 1021 const LogicVRegister& src1, 1022 const LogicVRegister& src2, 1023 int index) { 1024 SimVRegister temp; 1025 VectorFormat indexform = 1026 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1027 return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1028} 1029 1030 1031LogicVRegister Simulator::sqdmull(VectorFormat vform, 1032 LogicVRegister dst, 1033 const LogicVRegister& src1, 1034 const LogicVRegister& src2, 1035 int index) { 1036 SimVRegister temp; 1037 VectorFormat indexform = 1038 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1039 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1040} 1041 1042 1043LogicVRegister Simulator::sqdmull2(VectorFormat vform, 1044 LogicVRegister dst, 1045 const LogicVRegister& src1, 1046 const LogicVRegister& src2, 1047 int index) { 1048 SimVRegister temp; 1049 VectorFormat indexform = 1050 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1051 return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1052} 1053 1054 1055LogicVRegister Simulator::sqdmlal(VectorFormat vform, 1056 LogicVRegister dst, 1057 const LogicVRegister& src1, 1058 const LogicVRegister& src2, 1059 int index) { 1060 SimVRegister temp; 1061 VectorFormat indexform = 1062 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1063 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1064} 1065 1066 1067LogicVRegister Simulator::sqdmlal2(VectorFormat vform, 1068 LogicVRegister dst, 1069 const LogicVRegister& src1, 1070 const LogicVRegister& src2, 1071 int index) { 1072 SimVRegister temp; 1073 VectorFormat indexform = 1074 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1075 return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1076} 1077 1078 1079LogicVRegister Simulator::sqdmlsl(VectorFormat vform, 1080 LogicVRegister dst, 1081 const LogicVRegister& src1, 1082 const LogicVRegister& src2, 1083 int index) { 1084 SimVRegister temp; 1085 VectorFormat indexform = 1086 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1087 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1088} 1089 1090 1091LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, 1092 LogicVRegister dst, 1093 const LogicVRegister& src1, 1094 const LogicVRegister& src2, 1095 int index) { 1096 SimVRegister temp; 1097 VectorFormat indexform = 1098 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1099 return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1100} 1101 1102 1103LogicVRegister Simulator::sqdmulh(VectorFormat vform, 1104 LogicVRegister dst, 1105 const LogicVRegister& src1, 1106 const LogicVRegister& src2, 1107 int index) { 1108 SimVRegister temp; 1109 VectorFormat indexform = VectorFormatFillQ(vform); 1110 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1111} 1112 1113 1114LogicVRegister Simulator::sqrdmulh(VectorFormat vform, 1115 LogicVRegister dst, 1116 const LogicVRegister& src1, 1117 const LogicVRegister& src2, 1118 int index) { 1119 SimVRegister temp; 1120 VectorFormat indexform = VectorFormatFillQ(vform); 1121 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1122} 1123 1124 1125uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) const { 1126 uint16_t result = 0; 1127 uint16_t extended_op2 = op2; 1128 for (int i = 0; i < 8; ++i) { 1129 if ((op1 >> i) & 1) { 1130 result = result ^ (extended_op2 << i); 1131 } 1132 } 1133 return result; 1134} 1135 1136 1137LogicVRegister Simulator::pmul(VectorFormat vform, 1138 LogicVRegister dst, 1139 const LogicVRegister& src1, 1140 const LogicVRegister& src2) { 1141 dst.ClearForWrite(vform); 1142 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1143 dst.SetUint(vform, 1144 i, 1145 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i))); 1146 } 1147 return dst; 1148} 1149 1150 1151LogicVRegister Simulator::pmull(VectorFormat vform, 1152 LogicVRegister dst, 1153 const LogicVRegister& src1, 1154 const LogicVRegister& src2) { 1155 VectorFormat vform_src = VectorFormatHalfWidth(vform); 1156 dst.ClearForWrite(vform); 1157 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1158 dst.SetUint(vform, 1159 i, 1160 PolynomialMult(src1.Uint(vform_src, i), 1161 src2.Uint(vform_src, i))); 1162 } 1163 return dst; 1164} 1165 1166 1167LogicVRegister Simulator::pmull2(VectorFormat vform, 1168 LogicVRegister dst, 1169 const LogicVRegister& src1, 1170 const LogicVRegister& src2) { 1171 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform); 1172 dst.ClearForWrite(vform); 1173 int lane_count = LaneCountFromFormat(vform); 1174 for (int i = 0; i < lane_count; i++) { 1175 dst.SetUint(vform, 1176 i, 1177 PolynomialMult(src1.Uint(vform_src, lane_count + i), 1178 src2.Uint(vform_src, lane_count + i))); 1179 } 1180 return dst; 1181} 1182 1183 1184LogicVRegister Simulator::sub(VectorFormat vform, 1185 LogicVRegister dst, 1186 const LogicVRegister& src1, 1187 const LogicVRegister& src2) { 1188 dst.ClearForWrite(vform); 1189 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1190 // Test for unsigned saturation. 1191 if (src2.Uint(vform, i) > src1.Uint(vform, i)) { 1192 dst.SetUnsignedSat(i, false); 1193 } 1194 1195 // Test for signed saturation. 1196 int64_t sa = src1.IntLeftJustified(vform, i); 1197 int64_t sb = src2.IntLeftJustified(vform, i); 1198 int64_t sr = sa - sb; 1199 // If the signs of the operands are different, and the sign of the first 1200 // operand doesn't match the result, there was an overflow. 1201 if (((sa >= 0) != (sb >= 0)) && ((sa >= 0) != (sr >= 0))) { 1202 dst.SetSignedSat(i, sr < 0); 1203 } 1204 1205 dst.SetInt(vform, i, src1.Int(vform, i) - src2.Int(vform, i)); 1206 } 1207 return dst; 1208} 1209 1210 1211LogicVRegister Simulator::and_(VectorFormat vform, 1212 LogicVRegister dst, 1213 const LogicVRegister& src1, 1214 const LogicVRegister& src2) { 1215 dst.ClearForWrite(vform); 1216 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1217 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i)); 1218 } 1219 return dst; 1220} 1221 1222 1223LogicVRegister Simulator::orr(VectorFormat vform, 1224 LogicVRegister dst, 1225 const LogicVRegister& src1, 1226 const LogicVRegister& src2) { 1227 dst.ClearForWrite(vform); 1228 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1229 dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i)); 1230 } 1231 return dst; 1232} 1233 1234 1235LogicVRegister Simulator::orn(VectorFormat vform, 1236 LogicVRegister dst, 1237 const LogicVRegister& src1, 1238 const LogicVRegister& src2) { 1239 dst.ClearForWrite(vform); 1240 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1241 dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i)); 1242 } 1243 return dst; 1244} 1245 1246 1247LogicVRegister Simulator::eor(VectorFormat vform, 1248 LogicVRegister dst, 1249 const LogicVRegister& src1, 1250 const LogicVRegister& src2) { 1251 dst.ClearForWrite(vform); 1252 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1253 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i)); 1254 } 1255 return dst; 1256} 1257 1258 1259LogicVRegister Simulator::bic(VectorFormat vform, 1260 LogicVRegister dst, 1261 const LogicVRegister& src1, 1262 const LogicVRegister& src2) { 1263 dst.ClearForWrite(vform); 1264 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1265 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i)); 1266 } 1267 return dst; 1268} 1269 1270 1271LogicVRegister Simulator::bic(VectorFormat vform, 1272 LogicVRegister dst, 1273 const LogicVRegister& src, 1274 uint64_t imm) { 1275 uint64_t result[16]; 1276 int laneCount = LaneCountFromFormat(vform); 1277 for (int i = 0; i < laneCount; ++i) { 1278 result[i] = src.Uint(vform, i) & ~imm; 1279 } 1280 dst.ClearForWrite(vform); 1281 for (int i = 0; i < laneCount; ++i) { 1282 dst.SetUint(vform, i, result[i]); 1283 } 1284 return dst; 1285} 1286 1287 1288LogicVRegister Simulator::bif(VectorFormat vform, 1289 LogicVRegister dst, 1290 const LogicVRegister& src1, 1291 const LogicVRegister& src2) { 1292 dst.ClearForWrite(vform); 1293 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1294 uint64_t operand1 = dst.Uint(vform, i); 1295 uint64_t operand2 = ~src2.Uint(vform, i); 1296 uint64_t operand3 = src1.Uint(vform, i); 1297 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); 1298 dst.SetUint(vform, i, result); 1299 } 1300 return dst; 1301} 1302 1303 1304LogicVRegister Simulator::bit(VectorFormat vform, 1305 LogicVRegister dst, 1306 const LogicVRegister& src1, 1307 const LogicVRegister& src2) { 1308 dst.ClearForWrite(vform); 1309 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1310 uint64_t operand1 = dst.Uint(vform, i); 1311 uint64_t operand2 = src2.Uint(vform, i); 1312 uint64_t operand3 = src1.Uint(vform, i); 1313 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); 1314 dst.SetUint(vform, i, result); 1315 } 1316 return dst; 1317} 1318 1319 1320LogicVRegister Simulator::bsl(VectorFormat vform, 1321 LogicVRegister dst, 1322 const LogicVRegister& src1, 1323 const LogicVRegister& src2) { 1324 dst.ClearForWrite(vform); 1325 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1326 uint64_t operand1 = src2.Uint(vform, i); 1327 uint64_t operand2 = dst.Uint(vform, i); 1328 uint64_t operand3 = src1.Uint(vform, i); 1329 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); 1330 dst.SetUint(vform, i, result); 1331 } 1332 return dst; 1333} 1334 1335 1336LogicVRegister Simulator::sminmax(VectorFormat vform, 1337 LogicVRegister dst, 1338 const LogicVRegister& src1, 1339 const LogicVRegister& src2, 1340 bool max) { 1341 dst.ClearForWrite(vform); 1342 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1343 int64_t src1_val = src1.Int(vform, i); 1344 int64_t src2_val = src2.Int(vform, i); 1345 int64_t dst_val; 1346 if (max) { 1347 dst_val = (src1_val > src2_val) ? src1_val : src2_val; 1348 } else { 1349 dst_val = (src1_val < src2_val) ? src1_val : src2_val; 1350 } 1351 dst.SetInt(vform, i, dst_val); 1352 } 1353 return dst; 1354} 1355 1356 1357LogicVRegister Simulator::smax(VectorFormat vform, 1358 LogicVRegister dst, 1359 const LogicVRegister& src1, 1360 const LogicVRegister& src2) { 1361 return sminmax(vform, dst, src1, src2, true); 1362} 1363 1364 1365LogicVRegister Simulator::smin(VectorFormat vform, 1366 LogicVRegister dst, 1367 const LogicVRegister& src1, 1368 const LogicVRegister& src2) { 1369 return sminmax(vform, dst, src1, src2, false); 1370} 1371 1372 1373LogicVRegister Simulator::sminmaxp(VectorFormat vform, 1374 LogicVRegister dst, 1375 const LogicVRegister& src1, 1376 const LogicVRegister& src2, 1377 bool max) { 1378 int lanes = LaneCountFromFormat(vform); 1379 int64_t result[kMaxLanesPerVector]; 1380 const LogicVRegister* src = &src1; 1381 for (int j = 0; j < 2; j++) { 1382 for (int i = 0; i < lanes; i += 2) { 1383 int64_t first_val = src->Int(vform, i); 1384 int64_t second_val = src->Int(vform, i + 1); 1385 int64_t dst_val; 1386 if (max) { 1387 dst_val = (first_val > second_val) ? first_val : second_val; 1388 } else { 1389 dst_val = (first_val < second_val) ? first_val : second_val; 1390 } 1391 VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector); 1392 result[(i >> 1) + (j * lanes / 2)] = dst_val; 1393 } 1394 src = &src2; 1395 } 1396 dst.SetIntArray(vform, result); 1397 return dst; 1398} 1399 1400 1401LogicVRegister Simulator::smaxp(VectorFormat vform, 1402 LogicVRegister dst, 1403 const LogicVRegister& src1, 1404 const LogicVRegister& src2) { 1405 return sminmaxp(vform, dst, src1, src2, true); 1406} 1407 1408 1409LogicVRegister Simulator::sminp(VectorFormat vform, 1410 LogicVRegister dst, 1411 const LogicVRegister& src1, 1412 const LogicVRegister& src2) { 1413 return sminmaxp(vform, dst, src1, src2, false); 1414} 1415 1416 1417LogicVRegister Simulator::addp(VectorFormat vform, 1418 LogicVRegister dst, 1419 const LogicVRegister& src) { 1420 VIXL_ASSERT(vform == kFormatD); 1421 1422 int64_t dst_val = src.Int(kFormat2D, 0) + src.Int(kFormat2D, 1); 1423 dst.ClearForWrite(vform); 1424 dst.SetInt(vform, 0, dst_val); 1425 return dst; 1426} 1427 1428 1429LogicVRegister Simulator::addv(VectorFormat vform, 1430 LogicVRegister dst, 1431 const LogicVRegister& src) { 1432 VectorFormat vform_dst = 1433 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); 1434 1435 1436 int64_t dst_val = 0; 1437 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1438 dst_val += src.Int(vform, i); 1439 } 1440 1441 dst.ClearForWrite(vform_dst); 1442 dst.SetInt(vform_dst, 0, dst_val); 1443 return dst; 1444} 1445 1446 1447LogicVRegister Simulator::saddlv(VectorFormat vform, 1448 LogicVRegister dst, 1449 const LogicVRegister& src) { 1450 VectorFormat vform_dst = 1451 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); 1452 1453 int64_t dst_val = 0; 1454 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1455 dst_val += src.Int(vform, i); 1456 } 1457 1458 dst.ClearForWrite(vform_dst); 1459 dst.SetInt(vform_dst, 0, dst_val); 1460 return dst; 1461} 1462 1463 1464LogicVRegister Simulator::uaddlv(VectorFormat vform, 1465 LogicVRegister dst, 1466 const LogicVRegister& src) { 1467 VectorFormat vform_dst = 1468 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); 1469 1470 uint64_t dst_val = 0; 1471 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1472 dst_val += src.Uint(vform, i); 1473 } 1474 1475 dst.ClearForWrite(vform_dst); 1476 dst.SetUint(vform_dst, 0, dst_val); 1477 return dst; 1478} 1479 1480 1481LogicVRegister Simulator::sminmaxv(VectorFormat vform, 1482 LogicVRegister dst, 1483 const LogicVRegister& src, 1484 bool max) { 1485 int64_t dst_val = max ? INT64_MIN : INT64_MAX; 1486 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1487 int64_t src_val = src.Int(vform, i); 1488 if (max) { 1489 dst_val = (src_val > dst_val) ? src_val : dst_val; 1490 } else { 1491 dst_val = (src_val < dst_val) ? src_val : dst_val; 1492 } 1493 } 1494 dst.ClearForWrite(ScalarFormatFromFormat(vform)); 1495 dst.SetInt(vform, 0, dst_val); 1496 return dst; 1497} 1498 1499 1500LogicVRegister Simulator::smaxv(VectorFormat vform, 1501 LogicVRegister dst, 1502 const LogicVRegister& src) { 1503 sminmaxv(vform, dst, src, true); 1504 return dst; 1505} 1506 1507 1508LogicVRegister Simulator::sminv(VectorFormat vform, 1509 LogicVRegister dst, 1510 const LogicVRegister& src) { 1511 sminmaxv(vform, dst, src, false); 1512 return dst; 1513} 1514 1515 1516LogicVRegister Simulator::uminmax(VectorFormat vform, 1517 LogicVRegister dst, 1518 const LogicVRegister& src1, 1519 const LogicVRegister& src2, 1520 bool max) { 1521 dst.ClearForWrite(vform); 1522 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1523 uint64_t src1_val = src1.Uint(vform, i); 1524 uint64_t src2_val = src2.Uint(vform, i); 1525 uint64_t dst_val; 1526 if (max) { 1527 dst_val = (src1_val > src2_val) ? src1_val : src2_val; 1528 } else { 1529 dst_val = (src1_val < src2_val) ? src1_val : src2_val; 1530 } 1531 dst.SetUint(vform, i, dst_val); 1532 } 1533 return dst; 1534} 1535 1536 1537LogicVRegister Simulator::umax(VectorFormat vform, 1538 LogicVRegister dst, 1539 const LogicVRegister& src1, 1540 const LogicVRegister& src2) { 1541 return uminmax(vform, dst, src1, src2, true); 1542} 1543 1544 1545LogicVRegister Simulator::umin(VectorFormat vform, 1546 LogicVRegister dst, 1547 const LogicVRegister& src1, 1548 const LogicVRegister& src2) { 1549 return uminmax(vform, dst, src1, src2, false); 1550} 1551 1552 1553LogicVRegister Simulator::uminmaxp(VectorFormat vform, 1554 LogicVRegister dst, 1555 const LogicVRegister& src1, 1556 const LogicVRegister& src2, 1557 bool max) { 1558 int lanes = LaneCountFromFormat(vform); 1559 uint64_t result[kMaxLanesPerVector]; 1560 const LogicVRegister* src = &src1; 1561 for (int j = 0; j < 2; j++) { 1562 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) { 1563 uint64_t first_val = src->Uint(vform, i); 1564 uint64_t second_val = src->Uint(vform, i + 1); 1565 uint64_t dst_val; 1566 if (max) { 1567 dst_val = (first_val > second_val) ? first_val : second_val; 1568 } else { 1569 dst_val = (first_val < second_val) ? first_val : second_val; 1570 } 1571 VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector); 1572 result[(i >> 1) + (j * lanes / 2)] = dst_val; 1573 } 1574 src = &src2; 1575 } 1576 dst.SetUintArray(vform, result); 1577 return dst; 1578} 1579 1580 1581LogicVRegister Simulator::umaxp(VectorFormat vform, 1582 LogicVRegister dst, 1583 const LogicVRegister& src1, 1584 const LogicVRegister& src2) { 1585 return uminmaxp(vform, dst, src1, src2, true); 1586} 1587 1588 1589LogicVRegister Simulator::uminp(VectorFormat vform, 1590 LogicVRegister dst, 1591 const LogicVRegister& src1, 1592 const LogicVRegister& src2) { 1593 return uminmaxp(vform, dst, src1, src2, false); 1594} 1595 1596 1597LogicVRegister Simulator::uminmaxv(VectorFormat vform, 1598 LogicVRegister dst, 1599 const LogicVRegister& src, 1600 bool max) { 1601 uint64_t dst_val = max ? 0 : UINT64_MAX; 1602 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1603 uint64_t src_val = src.Uint(vform, i); 1604 dst.SetUint(vform, i, i); 1605 if (max) { 1606 dst_val = (src_val > dst_val) ? src_val : dst_val; 1607 } else { 1608 dst_val = (src_val < dst_val) ? src_val : dst_val; 1609 } 1610 } 1611 dst.ClearForWrite(ScalarFormatFromFormat(vform)); 1612 dst.SetUint(vform, 0, dst_val); 1613 return dst; 1614} 1615 1616 1617LogicVRegister Simulator::umaxv(VectorFormat vform, 1618 LogicVRegister dst, 1619 const LogicVRegister& src) { 1620 uminmaxv(vform, dst, src, true); 1621 return dst; 1622} 1623 1624 1625LogicVRegister Simulator::uminv(VectorFormat vform, 1626 LogicVRegister dst, 1627 const LogicVRegister& src) { 1628 uminmaxv(vform, dst, src, false); 1629 return dst; 1630} 1631 1632 1633LogicVRegister Simulator::shl(VectorFormat vform, 1634 LogicVRegister dst, 1635 const LogicVRegister& src, 1636 int shift) { 1637 VIXL_ASSERT(shift >= 0); 1638 SimVRegister temp; 1639 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1640 return ushl(vform, dst, src, shiftreg); 1641} 1642 1643 1644LogicVRegister Simulator::sshll(VectorFormat vform, 1645 LogicVRegister dst, 1646 const LogicVRegister& src, 1647 int shift) { 1648 VIXL_ASSERT(shift >= 0); 1649 SimVRegister temp1, temp2; 1650 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1651 LogicVRegister extendedreg = sxtl(vform, temp2, src); 1652 return sshl(vform, dst, extendedreg, shiftreg); 1653} 1654 1655 1656LogicVRegister Simulator::sshll2(VectorFormat vform, 1657 LogicVRegister dst, 1658 const LogicVRegister& src, 1659 int shift) { 1660 VIXL_ASSERT(shift >= 0); 1661 SimVRegister temp1, temp2; 1662 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1663 LogicVRegister extendedreg = sxtl2(vform, temp2, src); 1664 return sshl(vform, dst, extendedreg, shiftreg); 1665} 1666 1667 1668LogicVRegister Simulator::shll(VectorFormat vform, 1669 LogicVRegister dst, 1670 const LogicVRegister& src) { 1671 int shift = LaneSizeInBitsFromFormat(vform) / 2; 1672 return sshll(vform, dst, src, shift); 1673} 1674 1675 1676LogicVRegister Simulator::shll2(VectorFormat vform, 1677 LogicVRegister dst, 1678 const LogicVRegister& src) { 1679 int shift = LaneSizeInBitsFromFormat(vform) / 2; 1680 return sshll2(vform, dst, src, shift); 1681} 1682 1683 1684LogicVRegister Simulator::ushll(VectorFormat vform, 1685 LogicVRegister dst, 1686 const LogicVRegister& src, 1687 int shift) { 1688 VIXL_ASSERT(shift >= 0); 1689 SimVRegister temp1, temp2; 1690 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1691 LogicVRegister extendedreg = uxtl(vform, temp2, src); 1692 return ushl(vform, dst, extendedreg, shiftreg); 1693} 1694 1695 1696LogicVRegister Simulator::ushll2(VectorFormat vform, 1697 LogicVRegister dst, 1698 const LogicVRegister& src, 1699 int shift) { 1700 VIXL_ASSERT(shift >= 0); 1701 SimVRegister temp1, temp2; 1702 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1703 LogicVRegister extendedreg = uxtl2(vform, temp2, src); 1704 return ushl(vform, dst, extendedreg, shiftreg); 1705} 1706 1707 1708LogicVRegister Simulator::sli(VectorFormat vform, 1709 LogicVRegister dst, 1710 const LogicVRegister& src, 1711 int shift) { 1712 dst.ClearForWrite(vform); 1713 int laneCount = LaneCountFromFormat(vform); 1714 for (int i = 0; i < laneCount; i++) { 1715 uint64_t src_lane = src.Uint(vform, i); 1716 uint64_t dst_lane = dst.Uint(vform, i); 1717 uint64_t shifted = src_lane << shift; 1718 uint64_t mask = MaxUintFromFormat(vform) << shift; 1719 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); 1720 } 1721 return dst; 1722} 1723 1724 1725LogicVRegister Simulator::sqshl(VectorFormat vform, 1726 LogicVRegister dst, 1727 const LogicVRegister& src, 1728 int shift) { 1729 VIXL_ASSERT(shift >= 0); 1730 SimVRegister temp; 1731 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1732 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform); 1733} 1734 1735 1736LogicVRegister Simulator::uqshl(VectorFormat vform, 1737 LogicVRegister dst, 1738 const LogicVRegister& src, 1739 int shift) { 1740 VIXL_ASSERT(shift >= 0); 1741 SimVRegister temp; 1742 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1743 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform); 1744} 1745 1746 1747LogicVRegister Simulator::sqshlu(VectorFormat vform, 1748 LogicVRegister dst, 1749 const LogicVRegister& src, 1750 int shift) { 1751 VIXL_ASSERT(shift >= 0); 1752 SimVRegister temp; 1753 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1754 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform); 1755} 1756 1757 1758LogicVRegister Simulator::sri(VectorFormat vform, 1759 LogicVRegister dst, 1760 const LogicVRegister& src, 1761 int shift) { 1762 dst.ClearForWrite(vform); 1763 int laneCount = LaneCountFromFormat(vform); 1764 VIXL_ASSERT((shift > 0) && 1765 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform)))); 1766 for (int i = 0; i < laneCount; i++) { 1767 uint64_t src_lane = src.Uint(vform, i); 1768 uint64_t dst_lane = dst.Uint(vform, i); 1769 uint64_t shifted; 1770 uint64_t mask; 1771 if (shift == 64) { 1772 shifted = 0; 1773 mask = 0; 1774 } else { 1775 shifted = src_lane >> shift; 1776 mask = MaxUintFromFormat(vform) >> shift; 1777 } 1778 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); 1779 } 1780 return dst; 1781} 1782 1783 1784LogicVRegister Simulator::ushr(VectorFormat vform, 1785 LogicVRegister dst, 1786 const LogicVRegister& src, 1787 int shift) { 1788 VIXL_ASSERT(shift >= 0); 1789 SimVRegister temp; 1790 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); 1791 return ushl(vform, dst, src, shiftreg); 1792} 1793 1794 1795LogicVRegister Simulator::sshr(VectorFormat vform, 1796 LogicVRegister dst, 1797 const LogicVRegister& src, 1798 int shift) { 1799 VIXL_ASSERT(shift >= 0); 1800 SimVRegister temp; 1801 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); 1802 return sshl(vform, dst, src, shiftreg); 1803} 1804 1805 1806LogicVRegister Simulator::ssra(VectorFormat vform, 1807 LogicVRegister dst, 1808 const LogicVRegister& src, 1809 int shift) { 1810 SimVRegister temp; 1811 LogicVRegister shifted_reg = sshr(vform, temp, src, shift); 1812 return add(vform, dst, dst, shifted_reg); 1813} 1814 1815 1816LogicVRegister Simulator::usra(VectorFormat vform, 1817 LogicVRegister dst, 1818 const LogicVRegister& src, 1819 int shift) { 1820 SimVRegister temp; 1821 LogicVRegister shifted_reg = ushr(vform, temp, src, shift); 1822 return add(vform, dst, dst, shifted_reg); 1823} 1824 1825 1826LogicVRegister Simulator::srsra(VectorFormat vform, 1827 LogicVRegister dst, 1828 const LogicVRegister& src, 1829 int shift) { 1830 SimVRegister temp; 1831 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform); 1832 return add(vform, dst, dst, shifted_reg); 1833} 1834 1835 1836LogicVRegister Simulator::ursra(VectorFormat vform, 1837 LogicVRegister dst, 1838 const LogicVRegister& src, 1839 int shift) { 1840 SimVRegister temp; 1841 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform); 1842 return add(vform, dst, dst, shifted_reg); 1843} 1844 1845 1846LogicVRegister Simulator::cls(VectorFormat vform, 1847 LogicVRegister dst, 1848 const LogicVRegister& src) { 1849 uint64_t result[16]; 1850 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 1851 int laneCount = LaneCountFromFormat(vform); 1852 for (int i = 0; i < laneCount; i++) { 1853 result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits); 1854 } 1855 1856 dst.ClearForWrite(vform); 1857 for (int i = 0; i < laneCount; ++i) { 1858 dst.SetUint(vform, i, result[i]); 1859 } 1860 return dst; 1861} 1862 1863 1864LogicVRegister Simulator::clz(VectorFormat vform, 1865 LogicVRegister dst, 1866 const LogicVRegister& src) { 1867 uint64_t result[16]; 1868 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 1869 int laneCount = LaneCountFromFormat(vform); 1870 for (int i = 0; i < laneCount; i++) { 1871 result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits); 1872 } 1873 1874 dst.ClearForWrite(vform); 1875 for (int i = 0; i < laneCount; ++i) { 1876 dst.SetUint(vform, i, result[i]); 1877 } 1878 return dst; 1879} 1880 1881 1882LogicVRegister Simulator::cnt(VectorFormat vform, 1883 LogicVRegister dst, 1884 const LogicVRegister& src) { 1885 uint64_t result[16]; 1886 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 1887 int laneCount = LaneCountFromFormat(vform); 1888 for (int i = 0; i < laneCount; i++) { 1889 uint64_t value = src.Uint(vform, i); 1890 result[i] = 0; 1891 for (int j = 0; j < laneSizeInBits; j++) { 1892 result[i] += (value & 1); 1893 value >>= 1; 1894 } 1895 } 1896 1897 dst.ClearForWrite(vform); 1898 for (int i = 0; i < laneCount; ++i) { 1899 dst.SetUint(vform, i, result[i]); 1900 } 1901 return dst; 1902} 1903 1904 1905LogicVRegister Simulator::sshl(VectorFormat vform, 1906 LogicVRegister dst, 1907 const LogicVRegister& src1, 1908 const LogicVRegister& src2) { 1909 dst.ClearForWrite(vform); 1910 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1911 int8_t shift_val = src2.Int(vform, i); 1912 int64_t lj_src_val = src1.IntLeftJustified(vform, i); 1913 1914 // Set signed saturation state. 1915 if ((shift_val > CountLeadingSignBits(lj_src_val)) && (lj_src_val != 0)) { 1916 dst.SetSignedSat(i, lj_src_val >= 0); 1917 } 1918 1919 // Set unsigned saturation state. 1920 if (lj_src_val < 0) { 1921 dst.SetUnsignedSat(i, false); 1922 } else if ((shift_val > CountLeadingZeros(lj_src_val)) && 1923 (lj_src_val != 0)) { 1924 dst.SetUnsignedSat(i, true); 1925 } 1926 1927 int64_t src_val = src1.Int(vform, i); 1928 if (shift_val > 63) { 1929 dst.SetInt(vform, i, 0); 1930 } else if (shift_val < -63) { 1931 dst.SetRounding(i, src_val < 0); 1932 dst.SetInt(vform, i, (src_val < 0) ? -1 : 0); 1933 } else { 1934 if (shift_val < 0) { 1935 // Set rounding state. Rounding only needed on right shifts. 1936 if (((src_val >> (-shift_val - 1)) & 1) == 1) { 1937 dst.SetRounding(i, true); 1938 } 1939 src_val >>= -shift_val; 1940 } else { 1941 src_val <<= shift_val; 1942 } 1943 dst.SetInt(vform, i, src_val); 1944 } 1945 } 1946 return dst; 1947} 1948 1949 1950LogicVRegister Simulator::ushl(VectorFormat vform, 1951 LogicVRegister dst, 1952 const LogicVRegister& src1, 1953 const LogicVRegister& src2) { 1954 dst.ClearForWrite(vform); 1955 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1956 int8_t shift_val = src2.Int(vform, i); 1957 uint64_t lj_src_val = src1.UintLeftJustified(vform, i); 1958 1959 // Set saturation state. 1960 if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) { 1961 dst.SetUnsignedSat(i, true); 1962 } 1963 1964 uint64_t src_val = src1.Uint(vform, i); 1965 if ((shift_val > 63) || (shift_val < -64)) { 1966 dst.SetUint(vform, i, 0); 1967 } else { 1968 if (shift_val < 0) { 1969 // Set rounding state. Rounding only needed on right shifts. 1970 if (((src_val >> (-shift_val - 1)) & 1) == 1) { 1971 dst.SetRounding(i, true); 1972 } 1973 1974 if (shift_val == -64) { 1975 src_val = 0; 1976 } else { 1977 src_val >>= -shift_val; 1978 } 1979 } else { 1980 src_val <<= shift_val; 1981 } 1982 dst.SetUint(vform, i, src_val); 1983 } 1984 } 1985 return dst; 1986} 1987 1988 1989LogicVRegister Simulator::neg(VectorFormat vform, 1990 LogicVRegister dst, 1991 const LogicVRegister& src) { 1992 dst.ClearForWrite(vform); 1993 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1994 // Test for signed saturation. 1995 int64_t sa = src.Int(vform, i); 1996 if (sa == MinIntFromFormat(vform)) { 1997 dst.SetSignedSat(i, true); 1998 } 1999 dst.SetInt(vform, i, -sa); 2000 } 2001 return dst; 2002} 2003 2004 2005LogicVRegister Simulator::suqadd(VectorFormat vform, 2006 LogicVRegister dst, 2007 const LogicVRegister& src) { 2008 dst.ClearForWrite(vform); 2009 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2010 int64_t sa = dst.IntLeftJustified(vform, i); 2011 uint64_t ub = src.UintLeftJustified(vform, i); 2012 int64_t sr = sa + ub; 2013 2014 if (sr < sa) { // Test for signed positive saturation. 2015 dst.SetInt(vform, i, MaxIntFromFormat(vform)); 2016 } else { 2017 dst.SetInt(vform, i, dst.Int(vform, i) + src.Int(vform, i)); 2018 } 2019 } 2020 return dst; 2021} 2022 2023 2024LogicVRegister Simulator::usqadd(VectorFormat vform, 2025 LogicVRegister dst, 2026 const LogicVRegister& src) { 2027 dst.ClearForWrite(vform); 2028 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2029 uint64_t ua = dst.UintLeftJustified(vform, i); 2030 int64_t sb = src.IntLeftJustified(vform, i); 2031 uint64_t ur = ua + sb; 2032 2033 if ((sb > 0) && (ur <= ua)) { 2034 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation. 2035 } else if ((sb < 0) && (ur >= ua)) { 2036 dst.SetUint(vform, i, 0); // Negative saturation. 2037 } else { 2038 dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i)); 2039 } 2040 } 2041 return dst; 2042} 2043 2044 2045LogicVRegister Simulator::abs(VectorFormat vform, 2046 LogicVRegister dst, 2047 const LogicVRegister& src) { 2048 dst.ClearForWrite(vform); 2049 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2050 // Test for signed saturation. 2051 int64_t sa = src.Int(vform, i); 2052 if (sa == MinIntFromFormat(vform)) { 2053 dst.SetSignedSat(i, true); 2054 } 2055 if (sa < 0) { 2056 dst.SetInt(vform, i, -sa); 2057 } else { 2058 dst.SetInt(vform, i, sa); 2059 } 2060 } 2061 return dst; 2062} 2063 2064 2065LogicVRegister Simulator::extractnarrow(VectorFormat dstform, 2066 LogicVRegister dst, 2067 bool dstIsSigned, 2068 const LogicVRegister& src, 2069 bool srcIsSigned) { 2070 bool upperhalf = false; 2071 VectorFormat srcform = kFormatUndefined; 2072 int64_t ssrc[8]; 2073 uint64_t usrc[8]; 2074 2075 switch (dstform) { 2076 case kFormat8B: 2077 upperhalf = false; 2078 srcform = kFormat8H; 2079 break; 2080 case kFormat16B: 2081 upperhalf = true; 2082 srcform = kFormat8H; 2083 break; 2084 case kFormat4H: 2085 upperhalf = false; 2086 srcform = kFormat4S; 2087 break; 2088 case kFormat8H: 2089 upperhalf = true; 2090 srcform = kFormat4S; 2091 break; 2092 case kFormat2S: 2093 upperhalf = false; 2094 srcform = kFormat2D; 2095 break; 2096 case kFormat4S: 2097 upperhalf = true; 2098 srcform = kFormat2D; 2099 break; 2100 case kFormatB: 2101 upperhalf = false; 2102 srcform = kFormatH; 2103 break; 2104 case kFormatH: 2105 upperhalf = false; 2106 srcform = kFormatS; 2107 break; 2108 case kFormatS: 2109 upperhalf = false; 2110 srcform = kFormatD; 2111 break; 2112 default: 2113 VIXL_UNIMPLEMENTED(); 2114 } 2115 2116 for (int i = 0; i < LaneCountFromFormat(srcform); i++) { 2117 ssrc[i] = src.Int(srcform, i); 2118 usrc[i] = src.Uint(srcform, i); 2119 } 2120 2121 int offset; 2122 if (upperhalf) { 2123 offset = LaneCountFromFormat(dstform) / 2; 2124 } else { 2125 offset = 0; 2126 dst.ClearForWrite(dstform); 2127 } 2128 2129 for (int i = 0; i < LaneCountFromFormat(srcform); i++) { 2130 // Test for signed saturation 2131 if (ssrc[i] > MaxIntFromFormat(dstform)) { 2132 dst.SetSignedSat(offset + i, true); 2133 } else if (ssrc[i] < MinIntFromFormat(dstform)) { 2134 dst.SetSignedSat(offset + i, false); 2135 } 2136 2137 // Test for unsigned saturation 2138 if (srcIsSigned) { 2139 if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) { 2140 dst.SetUnsignedSat(offset + i, true); 2141 } else if (ssrc[i] < 0) { 2142 dst.SetUnsignedSat(offset + i, false); 2143 } 2144 } else { 2145 if (usrc[i] > MaxUintFromFormat(dstform)) { 2146 dst.SetUnsignedSat(offset + i, true); 2147 } 2148 } 2149 2150 int64_t result; 2151 if (srcIsSigned) { 2152 result = ssrc[i] & MaxUintFromFormat(dstform); 2153 } else { 2154 result = usrc[i] & MaxUintFromFormat(dstform); 2155 } 2156 2157 if (dstIsSigned) { 2158 dst.SetInt(dstform, offset + i, result); 2159 } else { 2160 dst.SetUint(dstform, offset + i, result); 2161 } 2162 } 2163 return dst; 2164} 2165 2166 2167LogicVRegister Simulator::xtn(VectorFormat vform, 2168 LogicVRegister dst, 2169 const LogicVRegister& src) { 2170 return extractnarrow(vform, dst, true, src, true); 2171} 2172 2173 2174LogicVRegister Simulator::sqxtn(VectorFormat vform, 2175 LogicVRegister dst, 2176 const LogicVRegister& src) { 2177 return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform); 2178} 2179 2180 2181LogicVRegister Simulator::sqxtun(VectorFormat vform, 2182 LogicVRegister dst, 2183 const LogicVRegister& src) { 2184 return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform); 2185} 2186 2187 2188LogicVRegister Simulator::uqxtn(VectorFormat vform, 2189 LogicVRegister dst, 2190 const LogicVRegister& src) { 2191 return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform); 2192} 2193 2194 2195LogicVRegister Simulator::absdiff(VectorFormat vform, 2196 LogicVRegister dst, 2197 const LogicVRegister& src1, 2198 const LogicVRegister& src2, 2199 bool issigned) { 2200 dst.ClearForWrite(vform); 2201 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2202 if (issigned) { 2203 int64_t sr = src1.Int(vform, i) - src2.Int(vform, i); 2204 sr = sr > 0 ? sr : -sr; 2205 dst.SetInt(vform, i, sr); 2206 } else { 2207 int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i); 2208 sr = sr > 0 ? sr : -sr; 2209 dst.SetUint(vform, i, sr); 2210 } 2211 } 2212 return dst; 2213} 2214 2215 2216LogicVRegister Simulator::saba(VectorFormat vform, 2217 LogicVRegister dst, 2218 const LogicVRegister& src1, 2219 const LogicVRegister& src2) { 2220 SimVRegister temp; 2221 dst.ClearForWrite(vform); 2222 absdiff(vform, temp, src1, src2, true); 2223 add(vform, dst, dst, temp); 2224 return dst; 2225} 2226 2227 2228LogicVRegister Simulator::uaba(VectorFormat vform, 2229 LogicVRegister dst, 2230 const LogicVRegister& src1, 2231 const LogicVRegister& src2) { 2232 SimVRegister temp; 2233 dst.ClearForWrite(vform); 2234 absdiff(vform, temp, src1, src2, false); 2235 add(vform, dst, dst, temp); 2236 return dst; 2237} 2238 2239 2240LogicVRegister Simulator::not_(VectorFormat vform, 2241 LogicVRegister dst, 2242 const LogicVRegister& src) { 2243 dst.ClearForWrite(vform); 2244 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2245 dst.SetUint(vform, i, ~src.Uint(vform, i)); 2246 } 2247 return dst; 2248} 2249 2250 2251LogicVRegister Simulator::rbit(VectorFormat vform, 2252 LogicVRegister dst, 2253 const LogicVRegister& src) { 2254 uint64_t result[16]; 2255 int laneCount = LaneCountFromFormat(vform); 2256 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 2257 uint64_t reversed_value; 2258 uint64_t value; 2259 for (int i = 0; i < laneCount; i++) { 2260 value = src.Uint(vform, i); 2261 reversed_value = 0; 2262 for (int j = 0; j < laneSizeInBits; j++) { 2263 reversed_value = (reversed_value << 1) | (value & 1); 2264 value >>= 1; 2265 } 2266 result[i] = reversed_value; 2267 } 2268 2269 dst.ClearForWrite(vform); 2270 for (int i = 0; i < laneCount; ++i) { 2271 dst.SetUint(vform, i, result[i]); 2272 } 2273 return dst; 2274} 2275 2276 2277LogicVRegister Simulator::rev(VectorFormat vform, 2278 LogicVRegister dst, 2279 const LogicVRegister& src, 2280 int revSize) { 2281 uint64_t result[16]; 2282 int laneCount = LaneCountFromFormat(vform); 2283 int laneSize = LaneSizeInBytesFromFormat(vform); 2284 int lanesPerLoop = revSize / laneSize; 2285 for (int i = 0; i < laneCount; i += lanesPerLoop) { 2286 for (int j = 0; j < lanesPerLoop; j++) { 2287 result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j); 2288 } 2289 } 2290 dst.ClearForWrite(vform); 2291 for (int i = 0; i < laneCount; ++i) { 2292 dst.SetUint(vform, i, result[i]); 2293 } 2294 return dst; 2295} 2296 2297 2298LogicVRegister Simulator::rev16(VectorFormat vform, 2299 LogicVRegister dst, 2300 const LogicVRegister& src) { 2301 return rev(vform, dst, src, 2); 2302} 2303 2304 2305LogicVRegister Simulator::rev32(VectorFormat vform, 2306 LogicVRegister dst, 2307 const LogicVRegister& src) { 2308 return rev(vform, dst, src, 4); 2309} 2310 2311 2312LogicVRegister Simulator::rev64(VectorFormat vform, 2313 LogicVRegister dst, 2314 const LogicVRegister& src) { 2315 return rev(vform, dst, src, 8); 2316} 2317 2318 2319LogicVRegister Simulator::addlp(VectorFormat vform, 2320 LogicVRegister dst, 2321 const LogicVRegister& src, 2322 bool is_signed, 2323 bool do_accumulate) { 2324 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform); 2325 2326 int64_t sr[16]; 2327 uint64_t ur[16]; 2328 2329 int laneCount = LaneCountFromFormat(vform); 2330 for (int i = 0; i < laneCount; ++i) { 2331 if (is_signed) { 2332 sr[i] = src.Int(vformsrc, 2 * i) + src.Int(vformsrc, 2 * i + 1); 2333 } else { 2334 ur[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1); 2335 } 2336 } 2337 2338 dst.ClearForWrite(vform); 2339 for (int i = 0; i < laneCount; ++i) { 2340 if (do_accumulate) { 2341 if (is_signed) { 2342 dst.SetInt(vform, i, dst.Int(vform, i) + sr[i]); 2343 } else { 2344 dst.SetUint(vform, i, dst.Uint(vform, i) + ur[i]); 2345 } 2346 } else { 2347 if (is_signed) { 2348 dst.SetInt(vform, i, sr[i]); 2349 } else { 2350 dst.SetUint(vform, i, ur[i]); 2351 } 2352 } 2353 } 2354 2355 return dst; 2356} 2357 2358 2359LogicVRegister Simulator::saddlp(VectorFormat vform, 2360 LogicVRegister dst, 2361 const LogicVRegister& src) { 2362 return addlp(vform, dst, src, true, false); 2363} 2364 2365 2366LogicVRegister Simulator::uaddlp(VectorFormat vform, 2367 LogicVRegister dst, 2368 const LogicVRegister& src) { 2369 return addlp(vform, dst, src, false, false); 2370} 2371 2372 2373LogicVRegister Simulator::sadalp(VectorFormat vform, 2374 LogicVRegister dst, 2375 const LogicVRegister& src) { 2376 return addlp(vform, dst, src, true, true); 2377} 2378 2379 2380LogicVRegister Simulator::uadalp(VectorFormat vform, 2381 LogicVRegister dst, 2382 const LogicVRegister& src) { 2383 return addlp(vform, dst, src, false, true); 2384} 2385 2386 2387LogicVRegister Simulator::ext(VectorFormat vform, 2388 LogicVRegister dst, 2389 const LogicVRegister& src1, 2390 const LogicVRegister& src2, 2391 int index) { 2392 uint8_t result[16]; 2393 int laneCount = LaneCountFromFormat(vform); 2394 for (int i = 0; i < laneCount - index; ++i) { 2395 result[i] = src1.Uint(vform, i + index); 2396 } 2397 for (int i = 0; i < index; ++i) { 2398 result[laneCount - index + i] = src2.Uint(vform, i); 2399 } 2400 dst.ClearForWrite(vform); 2401 for (int i = 0; i < laneCount; ++i) { 2402 dst.SetUint(vform, i, result[i]); 2403 } 2404 return dst; 2405} 2406 2407 2408LogicVRegister Simulator::dup_element(VectorFormat vform, 2409 LogicVRegister dst, 2410 const LogicVRegister& src, 2411 int src_index) { 2412 int laneCount = LaneCountFromFormat(vform); 2413 uint64_t value = src.Uint(vform, src_index); 2414 dst.ClearForWrite(vform); 2415 for (int i = 0; i < laneCount; ++i) { 2416 dst.SetUint(vform, i, value); 2417 } 2418 return dst; 2419} 2420 2421 2422LogicVRegister Simulator::dup_immediate(VectorFormat vform, 2423 LogicVRegister dst, 2424 uint64_t imm) { 2425 int laneCount = LaneCountFromFormat(vform); 2426 uint64_t value = imm & MaxUintFromFormat(vform); 2427 dst.ClearForWrite(vform); 2428 for (int i = 0; i < laneCount; ++i) { 2429 dst.SetUint(vform, i, value); 2430 } 2431 return dst; 2432} 2433 2434 2435LogicVRegister Simulator::ins_element(VectorFormat vform, 2436 LogicVRegister dst, 2437 int dst_index, 2438 const LogicVRegister& src, 2439 int src_index) { 2440 dst.SetUint(vform, dst_index, src.Uint(vform, src_index)); 2441 return dst; 2442} 2443 2444 2445LogicVRegister Simulator::ins_immediate(VectorFormat vform, 2446 LogicVRegister dst, 2447 int dst_index, 2448 uint64_t imm) { 2449 uint64_t value = imm & MaxUintFromFormat(vform); 2450 dst.SetUint(vform, dst_index, value); 2451 return dst; 2452} 2453 2454 2455LogicVRegister Simulator::movi(VectorFormat vform, 2456 LogicVRegister dst, 2457 uint64_t imm) { 2458 int laneCount = LaneCountFromFormat(vform); 2459 dst.ClearForWrite(vform); 2460 for (int i = 0; i < laneCount; ++i) { 2461 dst.SetUint(vform, i, imm); 2462 } 2463 return dst; 2464} 2465 2466 2467LogicVRegister Simulator::mvni(VectorFormat vform, 2468 LogicVRegister dst, 2469 uint64_t imm) { 2470 int laneCount = LaneCountFromFormat(vform); 2471 dst.ClearForWrite(vform); 2472 for (int i = 0; i < laneCount; ++i) { 2473 dst.SetUint(vform, i, ~imm); 2474 } 2475 return dst; 2476} 2477 2478 2479LogicVRegister Simulator::orr(VectorFormat vform, 2480 LogicVRegister dst, 2481 const LogicVRegister& src, 2482 uint64_t imm) { 2483 uint64_t result[16]; 2484 int laneCount = LaneCountFromFormat(vform); 2485 for (int i = 0; i < laneCount; ++i) { 2486 result[i] = src.Uint(vform, i) | imm; 2487 } 2488 dst.ClearForWrite(vform); 2489 for (int i = 0; i < laneCount; ++i) { 2490 dst.SetUint(vform, i, result[i]); 2491 } 2492 return dst; 2493} 2494 2495 2496LogicVRegister Simulator::uxtl(VectorFormat vform, 2497 LogicVRegister dst, 2498 const LogicVRegister& src) { 2499 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2500 2501 dst.ClearForWrite(vform); 2502 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2503 dst.SetUint(vform, i, src.Uint(vform_half, i)); 2504 } 2505 return dst; 2506} 2507 2508 2509LogicVRegister Simulator::sxtl(VectorFormat vform, 2510 LogicVRegister dst, 2511 const LogicVRegister& src) { 2512 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2513 2514 dst.ClearForWrite(vform); 2515 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2516 dst.SetInt(vform, i, src.Int(vform_half, i)); 2517 } 2518 return dst; 2519} 2520 2521 2522LogicVRegister Simulator::uxtl2(VectorFormat vform, 2523 LogicVRegister dst, 2524 const LogicVRegister& src) { 2525 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2526 int lane_count = LaneCountFromFormat(vform); 2527 2528 dst.ClearForWrite(vform); 2529 for (int i = 0; i < lane_count; i++) { 2530 dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i)); 2531 } 2532 return dst; 2533} 2534 2535 2536LogicVRegister Simulator::sxtl2(VectorFormat vform, 2537 LogicVRegister dst, 2538 const LogicVRegister& src) { 2539 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2540 int lane_count = LaneCountFromFormat(vform); 2541 2542 dst.ClearForWrite(vform); 2543 for (int i = 0; i < lane_count; i++) { 2544 dst.SetInt(vform, i, src.Int(vform_half, lane_count + i)); 2545 } 2546 return dst; 2547} 2548 2549 2550LogicVRegister Simulator::shrn(VectorFormat vform, 2551 LogicVRegister dst, 2552 const LogicVRegister& src, 2553 int shift) { 2554 SimVRegister temp; 2555 VectorFormat vform_src = VectorFormatDoubleWidth(vform); 2556 VectorFormat vform_dst = vform; 2557 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift); 2558 return extractnarrow(vform_dst, dst, false, shifted_src, false); 2559} 2560 2561 2562LogicVRegister Simulator::shrn2(VectorFormat vform, 2563 LogicVRegister dst, 2564 const LogicVRegister& src, 2565 int shift) { 2566 SimVRegister temp; 2567 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2568 VectorFormat vformdst = vform; 2569 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift); 2570 return extractnarrow(vformdst, dst, false, shifted_src, false); 2571} 2572 2573 2574LogicVRegister Simulator::rshrn(VectorFormat vform, 2575 LogicVRegister dst, 2576 const LogicVRegister& src, 2577 int shift) { 2578 SimVRegister temp; 2579 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2580 VectorFormat vformdst = vform; 2581 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); 2582 return extractnarrow(vformdst, dst, false, shifted_src, false); 2583} 2584 2585 2586LogicVRegister Simulator::rshrn2(VectorFormat vform, 2587 LogicVRegister dst, 2588 const LogicVRegister& src, 2589 int shift) { 2590 SimVRegister temp; 2591 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2592 VectorFormat vformdst = vform; 2593 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); 2594 return extractnarrow(vformdst, dst, false, shifted_src, false); 2595} 2596 2597 2598LogicVRegister Simulator::Table(VectorFormat vform, 2599 LogicVRegister dst, 2600 const LogicVRegister& ind, 2601 bool zero_out_of_bounds, 2602 const LogicVRegister* tab1, 2603 const LogicVRegister* tab2, 2604 const LogicVRegister* tab3, 2605 const LogicVRegister* tab4) { 2606 VIXL_ASSERT(tab1 != NULL); 2607 const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4}; 2608 uint64_t result[kMaxLanesPerVector]; 2609 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2610 result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i); 2611 } 2612 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2613 uint64_t j = ind.Uint(vform, i); 2614 int tab_idx = static_cast<int>(j >> 4); 2615 int j_idx = static_cast<int>(j & 15); 2616 if ((tab_idx < 4) && (tab[tab_idx] != NULL)) { 2617 result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx); 2618 } 2619 } 2620 dst.SetUintArray(vform, result); 2621 return dst; 2622} 2623 2624 2625LogicVRegister Simulator::tbl(VectorFormat vform, 2626 LogicVRegister dst, 2627 const LogicVRegister& tab, 2628 const LogicVRegister& ind) { 2629 return Table(vform, dst, ind, true, &tab); 2630} 2631 2632 2633LogicVRegister Simulator::tbl(VectorFormat vform, 2634 LogicVRegister dst, 2635 const LogicVRegister& tab, 2636 const LogicVRegister& tab2, 2637 const LogicVRegister& ind) { 2638 return Table(vform, dst, ind, true, &tab, &tab2); 2639} 2640 2641 2642LogicVRegister Simulator::tbl(VectorFormat vform, 2643 LogicVRegister dst, 2644 const LogicVRegister& tab, 2645 const LogicVRegister& tab2, 2646 const LogicVRegister& tab3, 2647 const LogicVRegister& ind) { 2648 return Table(vform, dst, ind, true, &tab, &tab2, &tab3); 2649} 2650 2651 2652LogicVRegister Simulator::tbl(VectorFormat vform, 2653 LogicVRegister dst, 2654 const LogicVRegister& tab, 2655 const LogicVRegister& tab2, 2656 const LogicVRegister& tab3, 2657 const LogicVRegister& tab4, 2658 const LogicVRegister& ind) { 2659 return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4); 2660} 2661 2662 2663LogicVRegister Simulator::tbx(VectorFormat vform, 2664 LogicVRegister dst, 2665 const LogicVRegister& tab, 2666 const LogicVRegister& ind) { 2667 return Table(vform, dst, ind, false, &tab); 2668} 2669 2670 2671LogicVRegister Simulator::tbx(VectorFormat vform, 2672 LogicVRegister dst, 2673 const LogicVRegister& tab, 2674 const LogicVRegister& tab2, 2675 const LogicVRegister& ind) { 2676 return Table(vform, dst, ind, false, &tab, &tab2); 2677} 2678 2679 2680LogicVRegister Simulator::tbx(VectorFormat vform, 2681 LogicVRegister dst, 2682 const LogicVRegister& tab, 2683 const LogicVRegister& tab2, 2684 const LogicVRegister& tab3, 2685 const LogicVRegister& ind) { 2686 return Table(vform, dst, ind, false, &tab, &tab2, &tab3); 2687} 2688 2689 2690LogicVRegister Simulator::tbx(VectorFormat vform, 2691 LogicVRegister dst, 2692 const LogicVRegister& tab, 2693 const LogicVRegister& tab2, 2694 const LogicVRegister& tab3, 2695 const LogicVRegister& tab4, 2696 const LogicVRegister& ind) { 2697 return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4); 2698} 2699 2700 2701LogicVRegister Simulator::uqshrn(VectorFormat vform, 2702 LogicVRegister dst, 2703 const LogicVRegister& src, 2704 int shift) { 2705 return shrn(vform, dst, src, shift).UnsignedSaturate(vform); 2706} 2707 2708 2709LogicVRegister Simulator::uqshrn2(VectorFormat vform, 2710 LogicVRegister dst, 2711 const LogicVRegister& src, 2712 int shift) { 2713 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform); 2714} 2715 2716 2717LogicVRegister Simulator::uqrshrn(VectorFormat vform, 2718 LogicVRegister dst, 2719 const LogicVRegister& src, 2720 int shift) { 2721 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform); 2722} 2723 2724 2725LogicVRegister Simulator::uqrshrn2(VectorFormat vform, 2726 LogicVRegister dst, 2727 const LogicVRegister& src, 2728 int shift) { 2729 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform); 2730} 2731 2732 2733LogicVRegister Simulator::sqshrn(VectorFormat vform, 2734 LogicVRegister dst, 2735 const LogicVRegister& src, 2736 int shift) { 2737 SimVRegister temp; 2738 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2739 VectorFormat vformdst = vform; 2740 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2741 return sqxtn(vformdst, dst, shifted_src); 2742} 2743 2744 2745LogicVRegister Simulator::sqshrn2(VectorFormat vform, 2746 LogicVRegister dst, 2747 const LogicVRegister& src, 2748 int shift) { 2749 SimVRegister temp; 2750 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2751 VectorFormat vformdst = vform; 2752 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2753 return sqxtn(vformdst, dst, shifted_src); 2754} 2755 2756 2757LogicVRegister Simulator::sqrshrn(VectorFormat vform, 2758 LogicVRegister dst, 2759 const LogicVRegister& src, 2760 int shift) { 2761 SimVRegister temp; 2762 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2763 VectorFormat vformdst = vform; 2764 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2765 return sqxtn(vformdst, dst, shifted_src); 2766} 2767 2768 2769LogicVRegister Simulator::sqrshrn2(VectorFormat vform, 2770 LogicVRegister dst, 2771 const LogicVRegister& src, 2772 int shift) { 2773 SimVRegister temp; 2774 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2775 VectorFormat vformdst = vform; 2776 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2777 return sqxtn(vformdst, dst, shifted_src); 2778} 2779 2780 2781LogicVRegister Simulator::sqshrun(VectorFormat vform, 2782 LogicVRegister dst, 2783 const LogicVRegister& src, 2784 int shift) { 2785 SimVRegister temp; 2786 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2787 VectorFormat vformdst = vform; 2788 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2789 return sqxtun(vformdst, dst, shifted_src); 2790} 2791 2792 2793LogicVRegister Simulator::sqshrun2(VectorFormat vform, 2794 LogicVRegister dst, 2795 const LogicVRegister& src, 2796 int shift) { 2797 SimVRegister temp; 2798 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2799 VectorFormat vformdst = vform; 2800 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2801 return sqxtun(vformdst, dst, shifted_src); 2802} 2803 2804 2805LogicVRegister Simulator::sqrshrun(VectorFormat vform, 2806 LogicVRegister dst, 2807 const LogicVRegister& src, 2808 int shift) { 2809 SimVRegister temp; 2810 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2811 VectorFormat vformdst = vform; 2812 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2813 return sqxtun(vformdst, dst, shifted_src); 2814} 2815 2816 2817LogicVRegister Simulator::sqrshrun2(VectorFormat vform, 2818 LogicVRegister dst, 2819 const LogicVRegister& src, 2820 int shift) { 2821 SimVRegister temp; 2822 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2823 VectorFormat vformdst = vform; 2824 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2825 return sqxtun(vformdst, dst, shifted_src); 2826} 2827 2828 2829LogicVRegister Simulator::uaddl(VectorFormat vform, 2830 LogicVRegister dst, 2831 const LogicVRegister& src1, 2832 const LogicVRegister& src2) { 2833 SimVRegister temp1, temp2; 2834 uxtl(vform, temp1, src1); 2835 uxtl(vform, temp2, src2); 2836 add(vform, dst, temp1, temp2); 2837 return dst; 2838} 2839 2840 2841LogicVRegister Simulator::uaddl2(VectorFormat vform, 2842 LogicVRegister dst, 2843 const LogicVRegister& src1, 2844 const LogicVRegister& src2) { 2845 SimVRegister temp1, temp2; 2846 uxtl2(vform, temp1, src1); 2847 uxtl2(vform, temp2, src2); 2848 add(vform, dst, temp1, temp2); 2849 return dst; 2850} 2851 2852 2853LogicVRegister Simulator::uaddw(VectorFormat vform, 2854 LogicVRegister dst, 2855 const LogicVRegister& src1, 2856 const LogicVRegister& src2) { 2857 SimVRegister temp; 2858 uxtl(vform, temp, src2); 2859 add(vform, dst, src1, temp); 2860 return dst; 2861} 2862 2863 2864LogicVRegister Simulator::uaddw2(VectorFormat vform, 2865 LogicVRegister dst, 2866 const LogicVRegister& src1, 2867 const LogicVRegister& src2) { 2868 SimVRegister temp; 2869 uxtl2(vform, temp, src2); 2870 add(vform, dst, src1, temp); 2871 return dst; 2872} 2873 2874 2875LogicVRegister Simulator::saddl(VectorFormat vform, 2876 LogicVRegister dst, 2877 const LogicVRegister& src1, 2878 const LogicVRegister& src2) { 2879 SimVRegister temp1, temp2; 2880 sxtl(vform, temp1, src1); 2881 sxtl(vform, temp2, src2); 2882 add(vform, dst, temp1, temp2); 2883 return dst; 2884} 2885 2886 2887LogicVRegister Simulator::saddl2(VectorFormat vform, 2888 LogicVRegister dst, 2889 const LogicVRegister& src1, 2890 const LogicVRegister& src2) { 2891 SimVRegister temp1, temp2; 2892 sxtl2(vform, temp1, src1); 2893 sxtl2(vform, temp2, src2); 2894 add(vform, dst, temp1, temp2); 2895 return dst; 2896} 2897 2898 2899LogicVRegister Simulator::saddw(VectorFormat vform, 2900 LogicVRegister dst, 2901 const LogicVRegister& src1, 2902 const LogicVRegister& src2) { 2903 SimVRegister temp; 2904 sxtl(vform, temp, src2); 2905 add(vform, dst, src1, temp); 2906 return dst; 2907} 2908 2909 2910LogicVRegister Simulator::saddw2(VectorFormat vform, 2911 LogicVRegister dst, 2912 const LogicVRegister& src1, 2913 const LogicVRegister& src2) { 2914 SimVRegister temp; 2915 sxtl2(vform, temp, src2); 2916 add(vform, dst, src1, temp); 2917 return dst; 2918} 2919 2920 2921LogicVRegister Simulator::usubl(VectorFormat vform, 2922 LogicVRegister dst, 2923 const LogicVRegister& src1, 2924 const LogicVRegister& src2) { 2925 SimVRegister temp1, temp2; 2926 uxtl(vform, temp1, src1); 2927 uxtl(vform, temp2, src2); 2928 sub(vform, dst, temp1, temp2); 2929 return dst; 2930} 2931 2932 2933LogicVRegister Simulator::usubl2(VectorFormat vform, 2934 LogicVRegister dst, 2935 const LogicVRegister& src1, 2936 const LogicVRegister& src2) { 2937 SimVRegister temp1, temp2; 2938 uxtl2(vform, temp1, src1); 2939 uxtl2(vform, temp2, src2); 2940 sub(vform, dst, temp1, temp2); 2941 return dst; 2942} 2943 2944 2945LogicVRegister Simulator::usubw(VectorFormat vform, 2946 LogicVRegister dst, 2947 const LogicVRegister& src1, 2948 const LogicVRegister& src2) { 2949 SimVRegister temp; 2950 uxtl(vform, temp, src2); 2951 sub(vform, dst, src1, temp); 2952 return dst; 2953} 2954 2955 2956LogicVRegister Simulator::usubw2(VectorFormat vform, 2957 LogicVRegister dst, 2958 const LogicVRegister& src1, 2959 const LogicVRegister& src2) { 2960 SimVRegister temp; 2961 uxtl2(vform, temp, src2); 2962 sub(vform, dst, src1, temp); 2963 return dst; 2964} 2965 2966 2967LogicVRegister Simulator::ssubl(VectorFormat vform, 2968 LogicVRegister dst, 2969 const LogicVRegister& src1, 2970 const LogicVRegister& src2) { 2971 SimVRegister temp1, temp2; 2972 sxtl(vform, temp1, src1); 2973 sxtl(vform, temp2, src2); 2974 sub(vform, dst, temp1, temp2); 2975 return dst; 2976} 2977 2978 2979LogicVRegister Simulator::ssubl2(VectorFormat vform, 2980 LogicVRegister dst, 2981 const LogicVRegister& src1, 2982 const LogicVRegister& src2) { 2983 SimVRegister temp1, temp2; 2984 sxtl2(vform, temp1, src1); 2985 sxtl2(vform, temp2, src2); 2986 sub(vform, dst, temp1, temp2); 2987 return dst; 2988} 2989 2990 2991LogicVRegister Simulator::ssubw(VectorFormat vform, 2992 LogicVRegister dst, 2993 const LogicVRegister& src1, 2994 const LogicVRegister& src2) { 2995 SimVRegister temp; 2996 sxtl(vform, temp, src2); 2997 sub(vform, dst, src1, temp); 2998 return dst; 2999} 3000 3001 3002LogicVRegister Simulator::ssubw2(VectorFormat vform, 3003 LogicVRegister dst, 3004 const LogicVRegister& src1, 3005 const LogicVRegister& src2) { 3006 SimVRegister temp; 3007 sxtl2(vform, temp, src2); 3008 sub(vform, dst, src1, temp); 3009 return dst; 3010} 3011 3012 3013LogicVRegister Simulator::uabal(VectorFormat vform, 3014 LogicVRegister dst, 3015 const LogicVRegister& src1, 3016 const LogicVRegister& src2) { 3017 SimVRegister temp1, temp2; 3018 uxtl(vform, temp1, src1); 3019 uxtl(vform, temp2, src2); 3020 uaba(vform, dst, temp1, temp2); 3021 return dst; 3022} 3023 3024 3025LogicVRegister Simulator::uabal2(VectorFormat vform, 3026 LogicVRegister dst, 3027 const LogicVRegister& src1, 3028 const LogicVRegister& src2) { 3029 SimVRegister temp1, temp2; 3030 uxtl2(vform, temp1, src1); 3031 uxtl2(vform, temp2, src2); 3032 uaba(vform, dst, temp1, temp2); 3033 return dst; 3034} 3035 3036 3037LogicVRegister Simulator::sabal(VectorFormat vform, 3038 LogicVRegister dst, 3039 const LogicVRegister& src1, 3040 const LogicVRegister& src2) { 3041 SimVRegister temp1, temp2; 3042 sxtl(vform, temp1, src1); 3043 sxtl(vform, temp2, src2); 3044 saba(vform, dst, temp1, temp2); 3045 return dst; 3046} 3047 3048 3049LogicVRegister Simulator::sabal2(VectorFormat vform, 3050 LogicVRegister dst, 3051 const LogicVRegister& src1, 3052 const LogicVRegister& src2) { 3053 SimVRegister temp1, temp2; 3054 sxtl2(vform, temp1, src1); 3055 sxtl2(vform, temp2, src2); 3056 saba(vform, dst, temp1, temp2); 3057 return dst; 3058} 3059 3060 3061LogicVRegister Simulator::uabdl(VectorFormat vform, 3062 LogicVRegister dst, 3063 const LogicVRegister& src1, 3064 const LogicVRegister& src2) { 3065 SimVRegister temp1, temp2; 3066 uxtl(vform, temp1, src1); 3067 uxtl(vform, temp2, src2); 3068 absdiff(vform, dst, temp1, temp2, false); 3069 return dst; 3070} 3071 3072 3073LogicVRegister Simulator::uabdl2(VectorFormat vform, 3074 LogicVRegister dst, 3075 const LogicVRegister& src1, 3076 const LogicVRegister& src2) { 3077 SimVRegister temp1, temp2; 3078 uxtl2(vform, temp1, src1); 3079 uxtl2(vform, temp2, src2); 3080 absdiff(vform, dst, temp1, temp2, false); 3081 return dst; 3082} 3083 3084 3085LogicVRegister Simulator::sabdl(VectorFormat vform, 3086 LogicVRegister dst, 3087 const LogicVRegister& src1, 3088 const LogicVRegister& src2) { 3089 SimVRegister temp1, temp2; 3090 sxtl(vform, temp1, src1); 3091 sxtl(vform, temp2, src2); 3092 absdiff(vform, dst, temp1, temp2, true); 3093 return dst; 3094} 3095 3096 3097LogicVRegister Simulator::sabdl2(VectorFormat vform, 3098 LogicVRegister dst, 3099 const LogicVRegister& src1, 3100 const LogicVRegister& src2) { 3101 SimVRegister temp1, temp2; 3102 sxtl2(vform, temp1, src1); 3103 sxtl2(vform, temp2, src2); 3104 absdiff(vform, dst, temp1, temp2, true); 3105 return dst; 3106} 3107 3108 3109LogicVRegister Simulator::umull(VectorFormat vform, 3110 LogicVRegister dst, 3111 const LogicVRegister& src1, 3112 const LogicVRegister& src2) { 3113 SimVRegister temp1, temp2; 3114 uxtl(vform, temp1, src1); 3115 uxtl(vform, temp2, src2); 3116 mul(vform, dst, temp1, temp2); 3117 return dst; 3118} 3119 3120 3121LogicVRegister Simulator::umull2(VectorFormat vform, 3122 LogicVRegister dst, 3123 const LogicVRegister& src1, 3124 const LogicVRegister& src2) { 3125 SimVRegister temp1, temp2; 3126 uxtl2(vform, temp1, src1); 3127 uxtl2(vform, temp2, src2); 3128 mul(vform, dst, temp1, temp2); 3129 return dst; 3130} 3131 3132 3133LogicVRegister Simulator::smull(VectorFormat vform, 3134 LogicVRegister dst, 3135 const LogicVRegister& src1, 3136 const LogicVRegister& src2) { 3137 SimVRegister temp1, temp2; 3138 sxtl(vform, temp1, src1); 3139 sxtl(vform, temp2, src2); 3140 mul(vform, dst, temp1, temp2); 3141 return dst; 3142} 3143 3144 3145LogicVRegister Simulator::smull2(VectorFormat vform, 3146 LogicVRegister dst, 3147 const LogicVRegister& src1, 3148 const LogicVRegister& src2) { 3149 SimVRegister temp1, temp2; 3150 sxtl2(vform, temp1, src1); 3151 sxtl2(vform, temp2, src2); 3152 mul(vform, dst, temp1, temp2); 3153 return dst; 3154} 3155 3156 3157LogicVRegister Simulator::umlsl(VectorFormat vform, 3158 LogicVRegister dst, 3159 const LogicVRegister& src1, 3160 const LogicVRegister& src2) { 3161 SimVRegister temp1, temp2; 3162 uxtl(vform, temp1, src1); 3163 uxtl(vform, temp2, src2); 3164 mls(vform, dst, temp1, temp2); 3165 return dst; 3166} 3167 3168 3169LogicVRegister Simulator::umlsl2(VectorFormat vform, 3170 LogicVRegister dst, 3171 const LogicVRegister& src1, 3172 const LogicVRegister& src2) { 3173 SimVRegister temp1, temp2; 3174 uxtl2(vform, temp1, src1); 3175 uxtl2(vform, temp2, src2); 3176 mls(vform, dst, temp1, temp2); 3177 return dst; 3178} 3179 3180 3181LogicVRegister Simulator::smlsl(VectorFormat vform, 3182 LogicVRegister dst, 3183 const LogicVRegister& src1, 3184 const LogicVRegister& src2) { 3185 SimVRegister temp1, temp2; 3186 sxtl(vform, temp1, src1); 3187 sxtl(vform, temp2, src2); 3188 mls(vform, dst, temp1, temp2); 3189 return dst; 3190} 3191 3192 3193LogicVRegister Simulator::smlsl2(VectorFormat vform, 3194 LogicVRegister dst, 3195 const LogicVRegister& src1, 3196 const LogicVRegister& src2) { 3197 SimVRegister temp1, temp2; 3198 sxtl2(vform, temp1, src1); 3199 sxtl2(vform, temp2, src2); 3200 mls(vform, dst, temp1, temp2); 3201 return dst; 3202} 3203 3204 3205LogicVRegister Simulator::umlal(VectorFormat vform, 3206 LogicVRegister dst, 3207 const LogicVRegister& src1, 3208 const LogicVRegister& src2) { 3209 SimVRegister temp1, temp2; 3210 uxtl(vform, temp1, src1); 3211 uxtl(vform, temp2, src2); 3212 mla(vform, dst, temp1, temp2); 3213 return dst; 3214} 3215 3216 3217LogicVRegister Simulator::umlal2(VectorFormat vform, 3218 LogicVRegister dst, 3219 const LogicVRegister& src1, 3220 const LogicVRegister& src2) { 3221 SimVRegister temp1, temp2; 3222 uxtl2(vform, temp1, src1); 3223 uxtl2(vform, temp2, src2); 3224 mla(vform, dst, temp1, temp2); 3225 return dst; 3226} 3227 3228 3229LogicVRegister Simulator::smlal(VectorFormat vform, 3230 LogicVRegister dst, 3231 const LogicVRegister& src1, 3232 const LogicVRegister& src2) { 3233 SimVRegister temp1, temp2; 3234 sxtl(vform, temp1, src1); 3235 sxtl(vform, temp2, src2); 3236 mla(vform, dst, temp1, temp2); 3237 return dst; 3238} 3239 3240 3241LogicVRegister Simulator::smlal2(VectorFormat vform, 3242 LogicVRegister dst, 3243 const LogicVRegister& src1, 3244 const LogicVRegister& src2) { 3245 SimVRegister temp1, temp2; 3246 sxtl2(vform, temp1, src1); 3247 sxtl2(vform, temp2, src2); 3248 mla(vform, dst, temp1, temp2); 3249 return dst; 3250} 3251 3252 3253LogicVRegister Simulator::sqdmlal(VectorFormat vform, 3254 LogicVRegister dst, 3255 const LogicVRegister& src1, 3256 const LogicVRegister& src2) { 3257 SimVRegister temp; 3258 LogicVRegister product = sqdmull(vform, temp, src1, src2); 3259 return add(vform, dst, dst, product).SignedSaturate(vform); 3260} 3261 3262 3263LogicVRegister Simulator::sqdmlal2(VectorFormat vform, 3264 LogicVRegister dst, 3265 const LogicVRegister& src1, 3266 const LogicVRegister& src2) { 3267 SimVRegister temp; 3268 LogicVRegister product = sqdmull2(vform, temp, src1, src2); 3269 return add(vform, dst, dst, product).SignedSaturate(vform); 3270} 3271 3272 3273LogicVRegister Simulator::sqdmlsl(VectorFormat vform, 3274 LogicVRegister dst, 3275 const LogicVRegister& src1, 3276 const LogicVRegister& src2) { 3277 SimVRegister temp; 3278 LogicVRegister product = sqdmull(vform, temp, src1, src2); 3279 return sub(vform, dst, dst, product).SignedSaturate(vform); 3280} 3281 3282 3283LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, 3284 LogicVRegister dst, 3285 const LogicVRegister& src1, 3286 const LogicVRegister& src2) { 3287 SimVRegister temp; 3288 LogicVRegister product = sqdmull2(vform, temp, src1, src2); 3289 return sub(vform, dst, dst, product).SignedSaturate(vform); 3290} 3291 3292 3293LogicVRegister Simulator::sqdmull(VectorFormat vform, 3294 LogicVRegister dst, 3295 const LogicVRegister& src1, 3296 const LogicVRegister& src2) { 3297 SimVRegister temp; 3298 LogicVRegister product = smull(vform, temp, src1, src2); 3299 return add(vform, dst, product, product).SignedSaturate(vform); 3300} 3301 3302 3303LogicVRegister Simulator::sqdmull2(VectorFormat vform, 3304 LogicVRegister dst, 3305 const LogicVRegister& src1, 3306 const LogicVRegister& src2) { 3307 SimVRegister temp; 3308 LogicVRegister product = smull2(vform, temp, src1, src2); 3309 return add(vform, dst, product, product).SignedSaturate(vform); 3310} 3311 3312 3313LogicVRegister Simulator::sqrdmulh(VectorFormat vform, 3314 LogicVRegister dst, 3315 const LogicVRegister& src1, 3316 const LogicVRegister& src2, 3317 bool round) { 3318 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow. 3319 // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1) 3320 // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize. 3321 3322 int esize = LaneSizeInBitsFromFormat(vform); 3323 int round_const = round ? (1 << (esize - 2)) : 0; 3324 int64_t product; 3325 3326 dst.ClearForWrite(vform); 3327 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3328 product = src1.Int(vform, i) * src2.Int(vform, i); 3329 product += round_const; 3330 product = product >> (esize - 1); 3331 3332 if (product > MaxIntFromFormat(vform)) { 3333 product = MaxIntFromFormat(vform); 3334 } else if (product < MinIntFromFormat(vform)) { 3335 product = MinIntFromFormat(vform); 3336 } 3337 dst.SetInt(vform, i, product); 3338 } 3339 return dst; 3340} 3341 3342 3343LogicVRegister Simulator::sqdmulh(VectorFormat vform, 3344 LogicVRegister dst, 3345 const LogicVRegister& src1, 3346 const LogicVRegister& src2) { 3347 return sqrdmulh(vform, dst, src1, src2, false); 3348} 3349 3350 3351LogicVRegister Simulator::addhn(VectorFormat vform, 3352 LogicVRegister dst, 3353 const LogicVRegister& src1, 3354 const LogicVRegister& src2) { 3355 SimVRegister temp; 3356 add(VectorFormatDoubleWidth(vform), temp, src1, src2); 3357 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3358 return dst; 3359} 3360 3361 3362LogicVRegister Simulator::addhn2(VectorFormat vform, 3363 LogicVRegister dst, 3364 const LogicVRegister& src1, 3365 const LogicVRegister& src2) { 3366 SimVRegister temp; 3367 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 3368 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3369 return dst; 3370} 3371 3372 3373LogicVRegister Simulator::raddhn(VectorFormat vform, 3374 LogicVRegister dst, 3375 const LogicVRegister& src1, 3376 const LogicVRegister& src2) { 3377 SimVRegister temp; 3378 add(VectorFormatDoubleWidth(vform), temp, src1, src2); 3379 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3380 return dst; 3381} 3382 3383 3384LogicVRegister Simulator::raddhn2(VectorFormat vform, 3385 LogicVRegister dst, 3386 const LogicVRegister& src1, 3387 const LogicVRegister& src2) { 3388 SimVRegister temp; 3389 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 3390 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3391 return dst; 3392} 3393 3394 3395LogicVRegister Simulator::subhn(VectorFormat vform, 3396 LogicVRegister dst, 3397 const LogicVRegister& src1, 3398 const LogicVRegister& src2) { 3399 SimVRegister temp; 3400 sub(VectorFormatDoubleWidth(vform), temp, src1, src2); 3401 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3402 return dst; 3403} 3404 3405 3406LogicVRegister Simulator::subhn2(VectorFormat vform, 3407 LogicVRegister dst, 3408 const LogicVRegister& src1, 3409 const LogicVRegister& src2) { 3410 SimVRegister temp; 3411 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 3412 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3413 return dst; 3414} 3415 3416 3417LogicVRegister Simulator::rsubhn(VectorFormat vform, 3418 LogicVRegister dst, 3419 const LogicVRegister& src1, 3420 const LogicVRegister& src2) { 3421 SimVRegister temp; 3422 sub(VectorFormatDoubleWidth(vform), temp, src1, src2); 3423 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3424 return dst; 3425} 3426 3427 3428LogicVRegister Simulator::rsubhn2(VectorFormat vform, 3429 LogicVRegister dst, 3430 const LogicVRegister& src1, 3431 const LogicVRegister& src2) { 3432 SimVRegister temp; 3433 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 3434 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3435 return dst; 3436} 3437 3438 3439LogicVRegister Simulator::trn1(VectorFormat vform, 3440 LogicVRegister dst, 3441 const LogicVRegister& src1, 3442 const LogicVRegister& src2) { 3443 uint64_t result[16]; 3444 int laneCount = LaneCountFromFormat(vform); 3445 int pairs = laneCount / 2; 3446 for (int i = 0; i < pairs; ++i) { 3447 result[2 * i] = src1.Uint(vform, 2 * i); 3448 result[(2 * i) + 1] = src2.Uint(vform, 2 * i); 3449 } 3450 3451 dst.ClearForWrite(vform); 3452 for (int i = 0; i < laneCount; ++i) { 3453 dst.SetUint(vform, i, result[i]); 3454 } 3455 return dst; 3456} 3457 3458 3459LogicVRegister Simulator::trn2(VectorFormat vform, 3460 LogicVRegister dst, 3461 const LogicVRegister& src1, 3462 const LogicVRegister& src2) { 3463 uint64_t result[16]; 3464 int laneCount = LaneCountFromFormat(vform); 3465 int pairs = laneCount / 2; 3466 for (int i = 0; i < pairs; ++i) { 3467 result[2 * i] = src1.Uint(vform, (2 * i) + 1); 3468 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1); 3469 } 3470 3471 dst.ClearForWrite(vform); 3472 for (int i = 0; i < laneCount; ++i) { 3473 dst.SetUint(vform, i, result[i]); 3474 } 3475 return dst; 3476} 3477 3478 3479LogicVRegister Simulator::zip1(VectorFormat vform, 3480 LogicVRegister dst, 3481 const LogicVRegister& src1, 3482 const LogicVRegister& src2) { 3483 uint64_t result[16]; 3484 int laneCount = LaneCountFromFormat(vform); 3485 int pairs = laneCount / 2; 3486 for (int i = 0; i < pairs; ++i) { 3487 result[2 * i] = src1.Uint(vform, i); 3488 result[(2 * i) + 1] = src2.Uint(vform, i); 3489 } 3490 3491 dst.ClearForWrite(vform); 3492 for (int i = 0; i < laneCount; ++i) { 3493 dst.SetUint(vform, i, result[i]); 3494 } 3495 return dst; 3496} 3497 3498 3499LogicVRegister Simulator::zip2(VectorFormat vform, 3500 LogicVRegister dst, 3501 const LogicVRegister& src1, 3502 const LogicVRegister& src2) { 3503 uint64_t result[16]; 3504 int laneCount = LaneCountFromFormat(vform); 3505 int pairs = laneCount / 2; 3506 for (int i = 0; i < pairs; ++i) { 3507 result[2 * i] = src1.Uint(vform, pairs + i); 3508 result[(2 * i) + 1] = src2.Uint(vform, pairs + i); 3509 } 3510 3511 dst.ClearForWrite(vform); 3512 for (int i = 0; i < laneCount; ++i) { 3513 dst.SetUint(vform, i, result[i]); 3514 } 3515 return dst; 3516} 3517 3518 3519LogicVRegister Simulator::uzp1(VectorFormat vform, 3520 LogicVRegister dst, 3521 const LogicVRegister& src1, 3522 const LogicVRegister& src2) { 3523 uint64_t result[32]; 3524 int laneCount = LaneCountFromFormat(vform); 3525 for (int i = 0; i < laneCount; ++i) { 3526 result[i] = src1.Uint(vform, i); 3527 result[laneCount + i] = src2.Uint(vform, i); 3528 } 3529 3530 dst.ClearForWrite(vform); 3531 for (int i = 0; i < laneCount; ++i) { 3532 dst.SetUint(vform, i, result[2 * i]); 3533 } 3534 return dst; 3535} 3536 3537 3538LogicVRegister Simulator::uzp2(VectorFormat vform, 3539 LogicVRegister dst, 3540 const LogicVRegister& src1, 3541 const LogicVRegister& src2) { 3542 uint64_t result[32]; 3543 int laneCount = LaneCountFromFormat(vform); 3544 for (int i = 0; i < laneCount; ++i) { 3545 result[i] = src1.Uint(vform, i); 3546 result[laneCount + i] = src2.Uint(vform, i); 3547 } 3548 3549 dst.ClearForWrite(vform); 3550 for (int i = 0; i < laneCount; ++i) { 3551 dst.SetUint(vform, i, result[(2 * i) + 1]); 3552 } 3553 return dst; 3554} 3555 3556 3557template <typename T> 3558T Simulator::FPAdd(T op1, T op2) { 3559 T result = FPProcessNaNs(op1, op2); 3560 if (std::isnan(result)) return result; 3561 3562 if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) { 3563 // inf + -inf returns the default NaN. 3564 FPProcessException(); 3565 return FPDefaultNaN<T>(); 3566 } else { 3567 // Other cases should be handled by standard arithmetic. 3568 return op1 + op2; 3569 } 3570} 3571 3572 3573template <typename T> 3574T Simulator::FPSub(T op1, T op2) { 3575 // NaNs should be handled elsewhere. 3576 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2)); 3577 3578 if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) { 3579 // inf - inf returns the default NaN. 3580 FPProcessException(); 3581 return FPDefaultNaN<T>(); 3582 } else { 3583 // Other cases should be handled by standard arithmetic. 3584 return op1 - op2; 3585 } 3586} 3587 3588 3589template <typename T> 3590T Simulator::FPMul(T op1, T op2) { 3591 // NaNs should be handled elsewhere. 3592 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2)); 3593 3594 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) { 3595 // inf * 0.0 returns the default NaN. 3596 FPProcessException(); 3597 return FPDefaultNaN<T>(); 3598 } else { 3599 // Other cases should be handled by standard arithmetic. 3600 return op1 * op2; 3601 } 3602} 3603 3604 3605template <typename T> 3606T Simulator::FPMulx(T op1, T op2) { 3607 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) { 3608 // inf * 0.0 returns +/-2.0. 3609 T two = 2.0; 3610 return copysign(1.0, op1) * copysign(1.0, op2) * two; 3611 } 3612 return FPMul(op1, op2); 3613} 3614 3615 3616template <typename T> 3617T Simulator::FPMulAdd(T a, T op1, T op2) { 3618 T result = FPProcessNaNs3(a, op1, op2); 3619 3620 T sign_a = copysign(1.0, a); 3621 T sign_prod = copysign(1.0, op1) * copysign(1.0, op2); 3622 bool isinf_prod = std::isinf(op1) || std::isinf(op2); 3623 bool operation_generates_nan = 3624 (std::isinf(op1) && (op2 == 0.0)) || // inf * 0.0 3625 (std::isinf(op2) && (op1 == 0.0)) || // 0.0 * inf 3626 (std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf 3627 3628 if (std::isnan(result)) { 3629 // Generated NaNs override quiet NaNs propagated from a. 3630 if (operation_generates_nan && IsQuietNaN(a)) { 3631 FPProcessException(); 3632 return FPDefaultNaN<T>(); 3633 } else { 3634 return result; 3635 } 3636 } 3637 3638 // If the operation would produce a NaN, return the default NaN. 3639 if (operation_generates_nan) { 3640 FPProcessException(); 3641 return FPDefaultNaN<T>(); 3642 } 3643 3644 // Work around broken fma implementations for exact zero results: The sign of 3645 // exact 0.0 results is positive unless both a and op1 * op2 are negative. 3646 if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) { 3647 return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0; 3648 } 3649 3650 result = FusedMultiplyAdd(op1, op2, a); 3651 VIXL_ASSERT(!std::isnan(result)); 3652 3653 // Work around broken fma implementations for rounded zero results: If a is 3654 // 0.0, the sign of the result is the sign of op1 * op2 before rounding. 3655 if ((a == 0.0) && (result == 0.0)) { 3656 return copysign(0.0, sign_prod); 3657 } 3658 3659 return result; 3660} 3661 3662 3663template <typename T> 3664T Simulator::FPDiv(T op1, T op2) { 3665 // NaNs should be handled elsewhere. 3666 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2)); 3667 3668 if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) { 3669 // inf / inf and 0.0 / 0.0 return the default NaN. 3670 FPProcessException(); 3671 return FPDefaultNaN<T>(); 3672 } else { 3673 if (op2 == 0.0) FPProcessException(); 3674 3675 // Other cases should be handled by standard arithmetic. 3676 return op1 / op2; 3677 } 3678} 3679 3680 3681template <typename T> 3682T Simulator::FPSqrt(T op) { 3683 if (std::isnan(op)) { 3684 return FPProcessNaN(op); 3685 } else if (op < 0.0) { 3686 FPProcessException(); 3687 return FPDefaultNaN<T>(); 3688 } else { 3689 return sqrt(op); 3690 } 3691} 3692 3693 3694template <typename T> 3695T Simulator::FPMax(T a, T b) { 3696 T result = FPProcessNaNs(a, b); 3697 if (std::isnan(result)) return result; 3698 3699 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) { 3700 // a and b are zero, and the sign differs: return +0.0. 3701 return 0.0; 3702 } else { 3703 return (a > b) ? a : b; 3704 } 3705} 3706 3707 3708template <typename T> 3709T Simulator::FPMaxNM(T a, T b) { 3710 if (IsQuietNaN(a) && !IsQuietNaN(b)) { 3711 a = kFP64NegativeInfinity; 3712 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { 3713 b = kFP64NegativeInfinity; 3714 } 3715 3716 T result = FPProcessNaNs(a, b); 3717 return std::isnan(result) ? result : FPMax(a, b); 3718} 3719 3720 3721template <typename T> 3722T Simulator::FPMin(T a, T b) { 3723 T result = FPProcessNaNs(a, b); 3724 if (std::isnan(result)) return result; 3725 3726 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) { 3727 // a and b are zero, and the sign differs: return -0.0. 3728 return -0.0; 3729 } else { 3730 return (a < b) ? a : b; 3731 } 3732} 3733 3734 3735template <typename T> 3736T Simulator::FPMinNM(T a, T b) { 3737 if (IsQuietNaN(a) && !IsQuietNaN(b)) { 3738 a = kFP64PositiveInfinity; 3739 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { 3740 b = kFP64PositiveInfinity; 3741 } 3742 3743 T result = FPProcessNaNs(a, b); 3744 return std::isnan(result) ? result : FPMin(a, b); 3745} 3746 3747 3748template <typename T> 3749T Simulator::FPRecipStepFused(T op1, T op2) { 3750 const T two = 2.0; 3751 if ((std::isinf(op1) && (op2 == 0.0)) || 3752 ((op1 == 0.0) && (std::isinf(op2)))) { 3753 return two; 3754 } else if (std::isinf(op1) || std::isinf(op2)) { 3755 // Return +inf if signs match, otherwise -inf. 3756 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity 3757 : kFP64NegativeInfinity; 3758 } else { 3759 return FusedMultiplyAdd(op1, op2, two); 3760 } 3761} 3762 3763 3764template <typename T> 3765T Simulator::FPRSqrtStepFused(T op1, T op2) { 3766 const T one_point_five = 1.5; 3767 const T two = 2.0; 3768 3769 if ((std::isinf(op1) && (op2 == 0.0)) || 3770 ((op1 == 0.0) && (std::isinf(op2)))) { 3771 return one_point_five; 3772 } else if (std::isinf(op1) || std::isinf(op2)) { 3773 // Return +inf if signs match, otherwise -inf. 3774 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity 3775 : kFP64NegativeInfinity; 3776 } else { 3777 // The multiply-add-halve operation must be fully fused, so avoid interim 3778 // rounding by checking which operand can be losslessly divided by two 3779 // before doing the multiply-add. 3780 if (std::isnormal(op1 / two)) { 3781 return FusedMultiplyAdd(op1 / two, op2, one_point_five); 3782 } else if (std::isnormal(op2 / two)) { 3783 return FusedMultiplyAdd(op1, op2 / two, one_point_five); 3784 } else { 3785 // Neither operand is normal after halving: the result is dominated by 3786 // the addition term, so just return that. 3787 return one_point_five; 3788 } 3789 } 3790} 3791 3792 3793double Simulator::FPRoundInt(double value, FPRounding round_mode) { 3794 if ((value == 0.0) || (value == kFP64PositiveInfinity) || 3795 (value == kFP64NegativeInfinity)) { 3796 return value; 3797 } else if (std::isnan(value)) { 3798 return FPProcessNaN(value); 3799 } 3800 3801 double int_result = std::floor(value); 3802 double error = value - int_result; 3803 switch (round_mode) { 3804 case FPTieAway: { 3805 // Take care of correctly handling the range ]-0.5, -0.0], which must 3806 // yield -0.0. 3807 if ((-0.5 < value) && (value < 0.0)) { 3808 int_result = -0.0; 3809 3810 } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) { 3811 // If the error is greater than 0.5, or is equal to 0.5 and the integer 3812 // result is positive, round up. 3813 int_result++; 3814 } 3815 break; 3816 } 3817 case FPTieEven: { 3818 // Take care of correctly handling the range [-0.5, -0.0], which must 3819 // yield -0.0. 3820 if ((-0.5 <= value) && (value < 0.0)) { 3821 int_result = -0.0; 3822 3823 // If the error is greater than 0.5, or is equal to 0.5 and the integer 3824 // result is odd, round up. 3825 } else if ((error > 0.5) || 3826 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) { 3827 int_result++; 3828 } 3829 break; 3830 } 3831 case FPZero: { 3832 // If value>0 then we take floor(value) 3833 // otherwise, ceil(value). 3834 if (value < 0) { 3835 int_result = ceil(value); 3836 } 3837 break; 3838 } 3839 case FPNegativeInfinity: { 3840 // We always use floor(value). 3841 break; 3842 } 3843 case FPPositiveInfinity: { 3844 // Take care of correctly handling the range ]-1.0, -0.0], which must 3845 // yield -0.0. 3846 if ((-1.0 < value) && (value < 0.0)) { 3847 int_result = -0.0; 3848 3849 // If the error is non-zero, round up. 3850 } else if (error > 0.0) { 3851 int_result++; 3852 } 3853 break; 3854 } 3855 default: 3856 VIXL_UNIMPLEMENTED(); 3857 } 3858 return int_result; 3859} 3860 3861 3862int32_t Simulator::FPToInt32(double value, FPRounding rmode) { 3863 value = FPRoundInt(value, rmode); 3864 if (value >= kWMaxInt) { 3865 return kWMaxInt; 3866 } else if (value < kWMinInt) { 3867 return kWMinInt; 3868 } 3869 return std::isnan(value) ? 0 : static_cast<int32_t>(value); 3870} 3871 3872 3873int64_t Simulator::FPToInt64(double value, FPRounding rmode) { 3874 value = FPRoundInt(value, rmode); 3875 if (value >= kXMaxInt) { 3876 return kXMaxInt; 3877 } else if (value < kXMinInt) { 3878 return kXMinInt; 3879 } 3880 return std::isnan(value) ? 0 : static_cast<int64_t>(value); 3881} 3882 3883 3884uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) { 3885 value = FPRoundInt(value, rmode); 3886 if (value >= kWMaxUInt) { 3887 return kWMaxUInt; 3888 } else if (value < 0.0) { 3889 return 0; 3890 } 3891 return std::isnan(value) ? 0 : static_cast<uint32_t>(value); 3892} 3893 3894 3895uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) { 3896 value = FPRoundInt(value, rmode); 3897 if (value >= kXMaxUInt) { 3898 return kXMaxUInt; 3899 } else if (value < 0.0) { 3900 return 0; 3901 } 3902 return std::isnan(value) ? 0 : static_cast<uint64_t>(value); 3903} 3904 3905 3906#define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \ 3907 template <typename T> \ 3908 LogicVRegister Simulator::FN(VectorFormat vform, \ 3909 LogicVRegister dst, \ 3910 const LogicVRegister& src1, \ 3911 const LogicVRegister& src2) { \ 3912 dst.ClearForWrite(vform); \ 3913 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \ 3914 T op1 = src1.Float<T>(i); \ 3915 T op2 = src2.Float<T>(i); \ 3916 T result; \ 3917 if (PROCNAN) { \ 3918 result = FPProcessNaNs(op1, op2); \ 3919 if (!std::isnan(result)) { \ 3920 result = OP(op1, op2); \ 3921 } \ 3922 } else { \ 3923 result = OP(op1, op2); \ 3924 } \ 3925 dst.SetFloat(i, result); \ 3926 } \ 3927 return dst; \ 3928 } \ 3929 \ 3930 LogicVRegister Simulator::FN(VectorFormat vform, \ 3931 LogicVRegister dst, \ 3932 const LogicVRegister& src1, \ 3933 const LogicVRegister& src2) { \ 3934 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { \ 3935 FN<float>(vform, dst, src1, src2); \ 3936 } else { \ 3937 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \ 3938 FN<double>(vform, dst, src1, src2); \ 3939 } \ 3940 return dst; \ 3941 } 3942NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP) 3943#undef DEFINE_NEON_FP_VECTOR_OP 3944 3945 3946LogicVRegister Simulator::fnmul(VectorFormat vform, 3947 LogicVRegister dst, 3948 const LogicVRegister& src1, 3949 const LogicVRegister& src2) { 3950 SimVRegister temp; 3951 LogicVRegister product = fmul(vform, temp, src1, src2); 3952 return fneg(vform, dst, product); 3953} 3954 3955 3956template <typename T> 3957LogicVRegister Simulator::frecps(VectorFormat vform, 3958 LogicVRegister dst, 3959 const LogicVRegister& src1, 3960 const LogicVRegister& src2) { 3961 dst.ClearForWrite(vform); 3962 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3963 T op1 = -src1.Float<T>(i); 3964 T op2 = src2.Float<T>(i); 3965 T result = FPProcessNaNs(op1, op2); 3966 dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2)); 3967 } 3968 return dst; 3969} 3970 3971 3972LogicVRegister Simulator::frecps(VectorFormat vform, 3973 LogicVRegister dst, 3974 const LogicVRegister& src1, 3975 const LogicVRegister& src2) { 3976 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 3977 frecps<float>(vform, dst, src1, src2); 3978 } else { 3979 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 3980 frecps<double>(vform, dst, src1, src2); 3981 } 3982 return dst; 3983} 3984 3985 3986template <typename T> 3987LogicVRegister Simulator::frsqrts(VectorFormat vform, 3988 LogicVRegister dst, 3989 const LogicVRegister& src1, 3990 const LogicVRegister& src2) { 3991 dst.ClearForWrite(vform); 3992 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3993 T op1 = -src1.Float<T>(i); 3994 T op2 = src2.Float<T>(i); 3995 T result = FPProcessNaNs(op1, op2); 3996 dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2)); 3997 } 3998 return dst; 3999} 4000 4001 4002LogicVRegister Simulator::frsqrts(VectorFormat vform, 4003 LogicVRegister dst, 4004 const LogicVRegister& src1, 4005 const LogicVRegister& src2) { 4006 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4007 frsqrts<float>(vform, dst, src1, src2); 4008 } else { 4009 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4010 frsqrts<double>(vform, dst, src1, src2); 4011 } 4012 return dst; 4013} 4014 4015 4016template <typename T> 4017LogicVRegister Simulator::fcmp(VectorFormat vform, 4018 LogicVRegister dst, 4019 const LogicVRegister& src1, 4020 const LogicVRegister& src2, 4021 Condition cond) { 4022 dst.ClearForWrite(vform); 4023 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4024 bool result = false; 4025 T op1 = src1.Float<T>(i); 4026 T op2 = src2.Float<T>(i); 4027 T nan_result = FPProcessNaNs(op1, op2); 4028 if (!std::isnan(nan_result)) { 4029 switch (cond) { 4030 case eq: 4031 result = (op1 == op2); 4032 break; 4033 case ge: 4034 result = (op1 >= op2); 4035 break; 4036 case gt: 4037 result = (op1 > op2); 4038 break; 4039 case le: 4040 result = (op1 <= op2); 4041 break; 4042 case lt: 4043 result = (op1 < op2); 4044 break; 4045 default: 4046 VIXL_UNREACHABLE(); 4047 break; 4048 } 4049 } 4050 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); 4051 } 4052 return dst; 4053} 4054 4055 4056LogicVRegister Simulator::fcmp(VectorFormat vform, 4057 LogicVRegister dst, 4058 const LogicVRegister& src1, 4059 const LogicVRegister& src2, 4060 Condition cond) { 4061 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4062 fcmp<float>(vform, dst, src1, src2, cond); 4063 } else { 4064 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4065 fcmp<double>(vform, dst, src1, src2, cond); 4066 } 4067 return dst; 4068} 4069 4070 4071LogicVRegister Simulator::fcmp_zero(VectorFormat vform, 4072 LogicVRegister dst, 4073 const LogicVRegister& src, 4074 Condition cond) { 4075 SimVRegister temp; 4076 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4077 LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0)); 4078 fcmp<float>(vform, dst, src, zero_reg, cond); 4079 } else { 4080 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4081 LogicVRegister zero_reg = dup_immediate(vform, temp, DoubleToRawbits(0.0)); 4082 fcmp<double>(vform, dst, src, zero_reg, cond); 4083 } 4084 return dst; 4085} 4086 4087 4088LogicVRegister Simulator::fabscmp(VectorFormat vform, 4089 LogicVRegister dst, 4090 const LogicVRegister& src1, 4091 const LogicVRegister& src2, 4092 Condition cond) { 4093 SimVRegister temp1, temp2; 4094 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4095 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1); 4096 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2); 4097 fcmp<float>(vform, dst, abs_src1, abs_src2, cond); 4098 } else { 4099 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4100 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1); 4101 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2); 4102 fcmp<double>(vform, dst, abs_src1, abs_src2, cond); 4103 } 4104 return dst; 4105} 4106 4107 4108template <typename T> 4109LogicVRegister Simulator::fmla(VectorFormat vform, 4110 LogicVRegister dst, 4111 const LogicVRegister& src1, 4112 const LogicVRegister& src2) { 4113 dst.ClearForWrite(vform); 4114 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4115 T op1 = src1.Float<T>(i); 4116 T op2 = src2.Float<T>(i); 4117 T acc = dst.Float<T>(i); 4118 T result = FPMulAdd(acc, op1, op2); 4119 dst.SetFloat(i, result); 4120 } 4121 return dst; 4122} 4123 4124 4125LogicVRegister Simulator::fmla(VectorFormat vform, 4126 LogicVRegister dst, 4127 const LogicVRegister& src1, 4128 const LogicVRegister& src2) { 4129 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4130 fmla<float>(vform, dst, src1, src2); 4131 } else { 4132 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4133 fmla<double>(vform, dst, src1, src2); 4134 } 4135 return dst; 4136} 4137 4138 4139template <typename T> 4140LogicVRegister Simulator::fmls(VectorFormat vform, 4141 LogicVRegister dst, 4142 const LogicVRegister& src1, 4143 const LogicVRegister& src2) { 4144 dst.ClearForWrite(vform); 4145 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4146 T op1 = -src1.Float<T>(i); 4147 T op2 = src2.Float<T>(i); 4148 T acc = dst.Float<T>(i); 4149 T result = FPMulAdd(acc, op1, op2); 4150 dst.SetFloat(i, result); 4151 } 4152 return dst; 4153} 4154 4155 4156LogicVRegister Simulator::fmls(VectorFormat vform, 4157 LogicVRegister dst, 4158 const LogicVRegister& src1, 4159 const LogicVRegister& src2) { 4160 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4161 fmls<float>(vform, dst, src1, src2); 4162 } else { 4163 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4164 fmls<double>(vform, dst, src1, src2); 4165 } 4166 return dst; 4167} 4168 4169 4170template <typename T> 4171LogicVRegister Simulator::fneg(VectorFormat vform, 4172 LogicVRegister dst, 4173 const LogicVRegister& src) { 4174 dst.ClearForWrite(vform); 4175 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4176 T op = src.Float<T>(i); 4177 op = -op; 4178 dst.SetFloat(i, op); 4179 } 4180 return dst; 4181} 4182 4183 4184LogicVRegister Simulator::fneg(VectorFormat vform, 4185 LogicVRegister dst, 4186 const LogicVRegister& src) { 4187 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4188 fneg<float>(vform, dst, src); 4189 } else { 4190 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4191 fneg<double>(vform, dst, src); 4192 } 4193 return dst; 4194} 4195 4196 4197template <typename T> 4198LogicVRegister Simulator::fabs_(VectorFormat vform, 4199 LogicVRegister dst, 4200 const LogicVRegister& src) { 4201 dst.ClearForWrite(vform); 4202 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4203 T op = src.Float<T>(i); 4204 if (copysign(1.0, op) < 0.0) { 4205 op = -op; 4206 } 4207 dst.SetFloat(i, op); 4208 } 4209 return dst; 4210} 4211 4212 4213LogicVRegister Simulator::fabs_(VectorFormat vform, 4214 LogicVRegister dst, 4215 const LogicVRegister& src) { 4216 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4217 fabs_<float>(vform, dst, src); 4218 } else { 4219 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4220 fabs_<double>(vform, dst, src); 4221 } 4222 return dst; 4223} 4224 4225 4226LogicVRegister Simulator::fabd(VectorFormat vform, 4227 LogicVRegister dst, 4228 const LogicVRegister& src1, 4229 const LogicVRegister& src2) { 4230 SimVRegister temp; 4231 fsub(vform, temp, src1, src2); 4232 fabs_(vform, dst, temp); 4233 return dst; 4234} 4235 4236 4237LogicVRegister Simulator::fsqrt(VectorFormat vform, 4238 LogicVRegister dst, 4239 const LogicVRegister& src) { 4240 dst.ClearForWrite(vform); 4241 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4242 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4243 float result = FPSqrt(src.Float<float>(i)); 4244 dst.SetFloat(i, result); 4245 } 4246 } else { 4247 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4248 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4249 double result = FPSqrt(src.Float<double>(i)); 4250 dst.SetFloat(i, result); 4251 } 4252 } 4253 return dst; 4254} 4255 4256 4257#define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \ 4258 LogicVRegister Simulator::FNP(VectorFormat vform, \ 4259 LogicVRegister dst, \ 4260 const LogicVRegister& src1, \ 4261 const LogicVRegister& src2) { \ 4262 SimVRegister temp1, temp2; \ 4263 uzp1(vform, temp1, src1, src2); \ 4264 uzp2(vform, temp2, src1, src2); \ 4265 FN(vform, dst, temp1, temp2); \ 4266 return dst; \ 4267 } \ 4268 \ 4269 LogicVRegister Simulator::FNP(VectorFormat vform, \ 4270 LogicVRegister dst, \ 4271 const LogicVRegister& src) { \ 4272 if (vform == kFormatS) { \ 4273 float result = OP(src.Float<float>(0), src.Float<float>(1)); \ 4274 dst.SetFloat(0, result); \ 4275 } else { \ 4276 VIXL_ASSERT(vform == kFormatD); \ 4277 double result = OP(src.Float<double>(0), src.Float<double>(1)); \ 4278 dst.SetFloat(0, result); \ 4279 } \ 4280 dst.ClearForWrite(vform); \ 4281 return dst; \ 4282 } 4283NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP) 4284#undef DEFINE_NEON_FP_PAIR_OP 4285 4286 4287LogicVRegister Simulator::fminmaxv(VectorFormat vform, 4288 LogicVRegister dst, 4289 const LogicVRegister& src, 4290 FPMinMaxOp Op) { 4291 VIXL_ASSERT(vform == kFormat4S); 4292 USE(vform); 4293 float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1)); 4294 float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3)); 4295 float result = (this->*Op)(result1, result2); 4296 dst.ClearForWrite(kFormatS); 4297 dst.SetFloat<float>(0, result); 4298 return dst; 4299} 4300 4301 4302LogicVRegister Simulator::fmaxv(VectorFormat vform, 4303 LogicVRegister dst, 4304 const LogicVRegister& src) { 4305 return fminmaxv(vform, dst, src, &Simulator::FPMax); 4306} 4307 4308 4309LogicVRegister Simulator::fminv(VectorFormat vform, 4310 LogicVRegister dst, 4311 const LogicVRegister& src) { 4312 return fminmaxv(vform, dst, src, &Simulator::FPMin); 4313} 4314 4315 4316LogicVRegister Simulator::fmaxnmv(VectorFormat vform, 4317 LogicVRegister dst, 4318 const LogicVRegister& src) { 4319 return fminmaxv(vform, dst, src, &Simulator::FPMaxNM); 4320} 4321 4322 4323LogicVRegister Simulator::fminnmv(VectorFormat vform, 4324 LogicVRegister dst, 4325 const LogicVRegister& src) { 4326 return fminmaxv(vform, dst, src, &Simulator::FPMinNM); 4327} 4328 4329 4330LogicVRegister Simulator::fmul(VectorFormat vform, 4331 LogicVRegister dst, 4332 const LogicVRegister& src1, 4333 const LogicVRegister& src2, 4334 int index) { 4335 dst.ClearForWrite(vform); 4336 SimVRegister temp; 4337 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4338 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 4339 fmul<float>(vform, dst, src1, index_reg); 4340 4341 } else { 4342 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4343 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 4344 fmul<double>(vform, dst, src1, index_reg); 4345 } 4346 return dst; 4347} 4348 4349 4350LogicVRegister Simulator::fmla(VectorFormat vform, 4351 LogicVRegister dst, 4352 const LogicVRegister& src1, 4353 const LogicVRegister& src2, 4354 int index) { 4355 dst.ClearForWrite(vform); 4356 SimVRegister temp; 4357 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4358 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 4359 fmla<float>(vform, dst, src1, index_reg); 4360 4361 } else { 4362 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4363 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 4364 fmla<double>(vform, dst, src1, index_reg); 4365 } 4366 return dst; 4367} 4368 4369 4370LogicVRegister Simulator::fmls(VectorFormat vform, 4371 LogicVRegister dst, 4372 const LogicVRegister& src1, 4373 const LogicVRegister& src2, 4374 int index) { 4375 dst.ClearForWrite(vform); 4376 SimVRegister temp; 4377 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4378 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 4379 fmls<float>(vform, dst, src1, index_reg); 4380 4381 } else { 4382 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4383 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 4384 fmls<double>(vform, dst, src1, index_reg); 4385 } 4386 return dst; 4387} 4388 4389 4390LogicVRegister Simulator::fmulx(VectorFormat vform, 4391 LogicVRegister dst, 4392 const LogicVRegister& src1, 4393 const LogicVRegister& src2, 4394 int index) { 4395 dst.ClearForWrite(vform); 4396 SimVRegister temp; 4397 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4398 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 4399 fmulx<float>(vform, dst, src1, index_reg); 4400 4401 } else { 4402 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4403 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 4404 fmulx<double>(vform, dst, src1, index_reg); 4405 } 4406 return dst; 4407} 4408 4409 4410LogicVRegister Simulator::frint(VectorFormat vform, 4411 LogicVRegister dst, 4412 const LogicVRegister& src, 4413 FPRounding rounding_mode, 4414 bool inexact_exception) { 4415 dst.ClearForWrite(vform); 4416 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4417 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4418 float input = src.Float<float>(i); 4419 float rounded = FPRoundInt(input, rounding_mode); 4420 if (inexact_exception && !std::isnan(input) && (input != rounded)) { 4421 FPProcessException(); 4422 } 4423 dst.SetFloat<float>(i, rounded); 4424 } 4425 } else { 4426 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4427 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4428 double input = src.Float<double>(i); 4429 double rounded = FPRoundInt(input, rounding_mode); 4430 if (inexact_exception && !std::isnan(input) && (input != rounded)) { 4431 FPProcessException(); 4432 } 4433 dst.SetFloat<double>(i, rounded); 4434 } 4435 } 4436 return dst; 4437} 4438 4439 4440LogicVRegister Simulator::fcvts(VectorFormat vform, 4441 LogicVRegister dst, 4442 const LogicVRegister& src, 4443 FPRounding rounding_mode, 4444 int fbits) { 4445 dst.ClearForWrite(vform); 4446 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4447 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4448 float op = src.Float<float>(i) * std::pow(2.0f, fbits); 4449 dst.SetInt(vform, i, FPToInt32(op, rounding_mode)); 4450 } 4451 } else { 4452 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4453 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4454 double op = src.Float<double>(i) * std::pow(2.0, fbits); 4455 dst.SetInt(vform, i, FPToInt64(op, rounding_mode)); 4456 } 4457 } 4458 return dst; 4459} 4460 4461 4462LogicVRegister Simulator::fcvtu(VectorFormat vform, 4463 LogicVRegister dst, 4464 const LogicVRegister& src, 4465 FPRounding rounding_mode, 4466 int fbits) { 4467 dst.ClearForWrite(vform); 4468 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4469 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4470 float op = src.Float<float>(i) * std::pow(2.0f, fbits); 4471 dst.SetUint(vform, i, FPToUInt32(op, rounding_mode)); 4472 } 4473 } else { 4474 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4475 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4476 double op = src.Float<double>(i) * std::pow(2.0, fbits); 4477 dst.SetUint(vform, i, FPToUInt64(op, rounding_mode)); 4478 } 4479 } 4480 return dst; 4481} 4482 4483 4484LogicVRegister Simulator::fcvtl(VectorFormat vform, 4485 LogicVRegister dst, 4486 const LogicVRegister& src) { 4487 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4488 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { 4489 dst.SetFloat(i, FPToFloat(src.Float<float16>(i))); 4490 } 4491 } else { 4492 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4493 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { 4494 dst.SetFloat(i, FPToDouble(src.Float<float>(i))); 4495 } 4496 } 4497 return dst; 4498} 4499 4500 4501LogicVRegister Simulator::fcvtl2(VectorFormat vform, 4502 LogicVRegister dst, 4503 const LogicVRegister& src) { 4504 int lane_count = LaneCountFromFormat(vform); 4505 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4506 for (int i = 0; i < lane_count; i++) { 4507 dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count))); 4508 } 4509 } else { 4510 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4511 for (int i = 0; i < lane_count; i++) { 4512 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count))); 4513 } 4514 } 4515 return dst; 4516} 4517 4518 4519LogicVRegister Simulator::fcvtn(VectorFormat vform, 4520 LogicVRegister dst, 4521 const LogicVRegister& src) { 4522 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4523 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4524 dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven)); 4525 } 4526 } else { 4527 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 4528 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4529 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven)); 4530 } 4531 } 4532 return dst; 4533} 4534 4535 4536LogicVRegister Simulator::fcvtn2(VectorFormat vform, 4537 LogicVRegister dst, 4538 const LogicVRegister& src) { 4539 int lane_count = LaneCountFromFormat(vform) / 2; 4540 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4541 for (int i = lane_count - 1; i >= 0; i--) { 4542 dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven)); 4543 } 4544 } else { 4545 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 4546 for (int i = lane_count - 1; i >= 0; i--) { 4547 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven)); 4548 } 4549 } 4550 return dst; 4551} 4552 4553 4554LogicVRegister Simulator::fcvtxn(VectorFormat vform, 4555 LogicVRegister dst, 4556 const LogicVRegister& src) { 4557 dst.ClearForWrite(vform); 4558 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 4559 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4560 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd)); 4561 } 4562 return dst; 4563} 4564 4565 4566LogicVRegister Simulator::fcvtxn2(VectorFormat vform, 4567 LogicVRegister dst, 4568 const LogicVRegister& src) { 4569 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 4570 int lane_count = LaneCountFromFormat(vform) / 2; 4571 for (int i = lane_count - 1; i >= 0; i--) { 4572 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd)); 4573 } 4574 return dst; 4575} 4576 4577 4578// Based on reference C function recip_sqrt_estimate from ARM ARM. 4579double Simulator::recip_sqrt_estimate(double a) { 4580 int q0, q1, s; 4581 double r; 4582 if (a < 0.5) { 4583 q0 = static_cast<int>(a * 512.0); 4584 r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0); 4585 } else { 4586 q1 = static_cast<int>(a * 256.0); 4587 r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0); 4588 } 4589 s = static_cast<int>(256.0 * r + 0.5); 4590 return static_cast<double>(s) / 256.0; 4591} 4592 4593 4594static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) { 4595 return ExtractUnsignedBitfield64(start_bit, end_bit, val); 4596} 4597 4598 4599template <typename T> 4600T Simulator::FPRecipSqrtEstimate(T op) { 4601 if (std::isnan(op)) { 4602 return FPProcessNaN(op); 4603 } else if (op == 0.0) { 4604 if (copysign(1.0, op) < 0.0) { 4605 return kFP64NegativeInfinity; 4606 } else { 4607 return kFP64PositiveInfinity; 4608 } 4609 } else if (copysign(1.0, op) < 0.0) { 4610 FPProcessException(); 4611 return FPDefaultNaN<T>(); 4612 } else if (std::isinf(op)) { 4613 return 0.0; 4614 } else { 4615 uint64_t fraction; 4616 int exp, result_exp; 4617 4618 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4619 exp = FloatExp(op); 4620 fraction = FloatMantissa(op); 4621 fraction <<= 29; 4622 } else { 4623 exp = DoubleExp(op); 4624 fraction = DoubleMantissa(op); 4625 } 4626 4627 if (exp == 0) { 4628 while (Bits(fraction, 51, 51) == 0) { 4629 fraction = Bits(fraction, 50, 0) << 1; 4630 exp -= 1; 4631 } 4632 fraction = Bits(fraction, 50, 0) << 1; 4633 } 4634 4635 double scaled; 4636 if (Bits(exp, 0, 0) == 0) { 4637 scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44); 4638 } else { 4639 scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44); 4640 } 4641 4642 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4643 result_exp = (380 - exp) / 2; 4644 } else { 4645 result_exp = (3068 - exp) / 2; 4646 } 4647 4648 uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled)); 4649 4650 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4651 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0)); 4652 uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29)); 4653 return FloatPack(0, exp_bits, est_bits); 4654 } else { 4655 return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0)); 4656 } 4657 } 4658} 4659 4660 4661LogicVRegister Simulator::frsqrte(VectorFormat vform, 4662 LogicVRegister dst, 4663 const LogicVRegister& src) { 4664 dst.ClearForWrite(vform); 4665 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4666 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4667 float input = src.Float<float>(i); 4668 dst.SetFloat(i, FPRecipSqrtEstimate<float>(input)); 4669 } 4670 } else { 4671 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4672 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4673 double input = src.Float<double>(i); 4674 dst.SetFloat(i, FPRecipSqrtEstimate<double>(input)); 4675 } 4676 } 4677 return dst; 4678} 4679 4680template <typename T> 4681T Simulator::FPRecipEstimate(T op, FPRounding rounding) { 4682 uint32_t sign; 4683 4684 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4685 sign = FloatSign(op); 4686 } else { 4687 sign = DoubleSign(op); 4688 } 4689 4690 if (std::isnan(op)) { 4691 return FPProcessNaN(op); 4692 } else if (std::isinf(op)) { 4693 return (sign == 1) ? -0.0 : 0.0; 4694 } else if (op == 0.0) { 4695 FPProcessException(); // FPExc_DivideByZero exception. 4696 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; 4697 } else if (((sizeof(T) == sizeof(float)) && // NOLINT(runtime/sizeof) 4698 (std::fabs(op) < std::pow(2.0, -128.0))) || 4699 ((sizeof(T) == sizeof(double)) && // NOLINT(runtime/sizeof) 4700 (std::fabs(op) < std::pow(2.0, -1024.0)))) { 4701 bool overflow_to_inf = false; 4702 switch (rounding) { 4703 case FPTieEven: 4704 overflow_to_inf = true; 4705 break; 4706 case FPPositiveInfinity: 4707 overflow_to_inf = (sign == 0); 4708 break; 4709 case FPNegativeInfinity: 4710 overflow_to_inf = (sign == 1); 4711 break; 4712 case FPZero: 4713 overflow_to_inf = false; 4714 break; 4715 default: 4716 break; 4717 } 4718 FPProcessException(); // FPExc_Overflow and FPExc_Inexact. 4719 if (overflow_to_inf) { 4720 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; 4721 } else { 4722 // Return FPMaxNormal(sign). 4723 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4724 return FloatPack(sign, 0xfe, 0x07fffff); 4725 } else { 4726 return DoublePack(sign, 0x7fe, 0x0fffffffffffffl); 4727 } 4728 } 4729 } else { 4730 uint64_t fraction; 4731 int exp, result_exp; 4732 uint32_t sign; 4733 4734 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4735 sign = FloatSign(op); 4736 exp = FloatExp(op); 4737 fraction = FloatMantissa(op); 4738 fraction <<= 29; 4739 } else { 4740 sign = DoubleSign(op); 4741 exp = DoubleExp(op); 4742 fraction = DoubleMantissa(op); 4743 } 4744 4745 if (exp == 0) { 4746 if (Bits(fraction, 51, 51) == 0) { 4747 exp -= 1; 4748 fraction = Bits(fraction, 49, 0) << 2; 4749 } else { 4750 fraction = Bits(fraction, 50, 0) << 1; 4751 } 4752 } 4753 4754 double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44); 4755 4756 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4757 result_exp = (253 - exp); // In range 253-254 = -1 to 253+1 = 254. 4758 } else { 4759 result_exp = (2045 - exp); // In range 2045-2046 = -1 to 2045+1 = 2046. 4760 } 4761 4762 double estimate = recip_estimate(scaled); 4763 4764 fraction = DoubleMantissa(estimate); 4765 if (result_exp == 0) { 4766 fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1); 4767 } else if (result_exp == -1) { 4768 fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2); 4769 result_exp = 0; 4770 } 4771 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4772 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0)); 4773 uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29)); 4774 return FloatPack(sign, exp_bits, frac_bits); 4775 } else { 4776 return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0)); 4777 } 4778 } 4779} 4780 4781 4782LogicVRegister Simulator::frecpe(VectorFormat vform, 4783 LogicVRegister dst, 4784 const LogicVRegister& src, 4785 FPRounding round) { 4786 dst.ClearForWrite(vform); 4787 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4788 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4789 float input = src.Float<float>(i); 4790 dst.SetFloat(i, FPRecipEstimate<float>(input, round)); 4791 } 4792 } else { 4793 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4794 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4795 double input = src.Float<double>(i); 4796 dst.SetFloat(i, FPRecipEstimate<double>(input, round)); 4797 } 4798 } 4799 return dst; 4800} 4801 4802 4803LogicVRegister Simulator::ursqrte(VectorFormat vform, 4804 LogicVRegister dst, 4805 const LogicVRegister& src) { 4806 dst.ClearForWrite(vform); 4807 uint64_t operand; 4808 uint32_t result; 4809 double dp_operand, dp_result; 4810 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4811 operand = src.Uint(vform, i); 4812 if (operand <= 0x3FFFFFFF) { 4813 result = 0xFFFFFFFF; 4814 } else { 4815 dp_operand = operand * std::pow(2.0, -32); 4816 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31); 4817 result = static_cast<uint32_t>(dp_result); 4818 } 4819 dst.SetUint(vform, i, result); 4820 } 4821 return dst; 4822} 4823 4824 4825// Based on reference C function recip_estimate from ARM ARM. 4826double Simulator::recip_estimate(double a) { 4827 int q, s; 4828 double r; 4829 q = static_cast<int>(a * 512.0); 4830 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0); 4831 s = static_cast<int>(256.0 * r + 0.5); 4832 return static_cast<double>(s) / 256.0; 4833} 4834 4835 4836LogicVRegister Simulator::urecpe(VectorFormat vform, 4837 LogicVRegister dst, 4838 const LogicVRegister& src) { 4839 dst.ClearForWrite(vform); 4840 uint64_t operand; 4841 uint32_t result; 4842 double dp_operand, dp_result; 4843 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4844 operand = src.Uint(vform, i); 4845 if (operand <= 0x7FFFFFFF) { 4846 result = 0xFFFFFFFF; 4847 } else { 4848 dp_operand = operand * std::pow(2.0, -32); 4849 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31); 4850 result = static_cast<uint32_t>(dp_result); 4851 } 4852 dst.SetUint(vform, i, result); 4853 } 4854 return dst; 4855} 4856 4857template <typename T> 4858LogicVRegister Simulator::frecpx(VectorFormat vform, 4859 LogicVRegister dst, 4860 const LogicVRegister& src) { 4861 dst.ClearForWrite(vform); 4862 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4863 T op = src.Float<T>(i); 4864 T result; 4865 if (std::isnan(op)) { 4866 result = FPProcessNaN(op); 4867 } else { 4868 int exp; 4869 uint32_t sign; 4870 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4871 sign = FloatSign(op); 4872 exp = FloatExp(op); 4873 exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0)); 4874 result = FloatPack(sign, exp, 0); 4875 } else { 4876 sign = DoubleSign(op); 4877 exp = DoubleExp(op); 4878 exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0)); 4879 result = DoublePack(sign, exp, 0); 4880 } 4881 } 4882 dst.SetFloat(i, result); 4883 } 4884 return dst; 4885} 4886 4887 4888LogicVRegister Simulator::frecpx(VectorFormat vform, 4889 LogicVRegister dst, 4890 const LogicVRegister& src) { 4891 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4892 frecpx<float>(vform, dst, src); 4893 } else { 4894 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4895 frecpx<double>(vform, dst, src); 4896 } 4897 return dst; 4898} 4899 4900LogicVRegister Simulator::scvtf(VectorFormat vform, 4901 LogicVRegister dst, 4902 const LogicVRegister& src, 4903 int fbits, 4904 FPRounding round) { 4905 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4906 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4907 float result = FixedToFloat(src.Int(kFormatS, i), fbits, round); 4908 dst.SetFloat<float>(i, result); 4909 } else { 4910 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4911 double result = FixedToDouble(src.Int(kFormatD, i), fbits, round); 4912 dst.SetFloat<double>(i, result); 4913 } 4914 } 4915 return dst; 4916} 4917 4918 4919LogicVRegister Simulator::ucvtf(VectorFormat vform, 4920 LogicVRegister dst, 4921 const LogicVRegister& src, 4922 int fbits, 4923 FPRounding round) { 4924 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4925 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4926 float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round); 4927 dst.SetFloat<float>(i, result); 4928 } else { 4929 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4930 double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round); 4931 dst.SetFloat<double>(i, result); 4932 } 4933 } 4934 return dst; 4935} 4936 4937 4938} // namespace aarch64 4939} // namespace vixl 4940 4941#endif // VIXL_INCLUDE_SIMULATOR_AARCH64 4942