logic-aarch64.cc revision 1e1277e629b68c96a1d7b953c2c6f90c7a44cdb7
1// Copyright 2015, VIXL authors 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions are met: 6// 7// * Redistributions of source code must retain the above copyright notice, 8// this list of conditions and the following disclaimer. 9// * Redistributions in binary form must reproduce the above copyright notice, 10// this list of conditions and the following disclaimer in the documentation 11// and/or other materials provided with the distribution. 12// * Neither the name of ARM Limited nor the names of its contributors may be 13// used to endorse or promote products derived from this software without 14// specific prior written permission. 15// 16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 28 29#include <cmath> 30 31#include "simulator-aarch64.h" 32 33namespace vixl { 34namespace aarch64 { 35 36template <> 37double Simulator::FPDefaultNaN<double>() { 38 return kFP64DefaultNaN; 39} 40 41 42template <> 43float Simulator::FPDefaultNaN<float>() { 44 return kFP32DefaultNaN; 45} 46 47// See FPRound for a description of this function. 48static inline double FPRoundToDouble(int64_t sign, 49 int64_t exponent, 50 uint64_t mantissa, 51 FPRounding round_mode) { 52 int64_t bits = 53 FPRound<int64_t, kDoubleExponentBits, kDoubleMantissaBits>(sign, 54 exponent, 55 mantissa, 56 round_mode); 57 return RawbitsToDouble(bits); 58} 59 60 61// See FPRound for a description of this function. 62static inline float FPRoundToFloat(int64_t sign, 63 int64_t exponent, 64 uint64_t mantissa, 65 FPRounding round_mode) { 66 int32_t bits = 67 FPRound<int32_t, kFloatExponentBits, kFloatMantissaBits>(sign, 68 exponent, 69 mantissa, 70 round_mode); 71 return RawbitsToFloat(bits); 72} 73 74 75// See FPRound for a description of this function. 76static inline float16 FPRoundToFloat16(int64_t sign, 77 int64_t exponent, 78 uint64_t mantissa, 79 FPRounding round_mode) { 80 return FPRound<float16, 81 kFloat16ExponentBits, 82 kFloat16MantissaBits>(sign, exponent, mantissa, round_mode); 83} 84 85 86double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) { 87 if (src >= 0) { 88 return UFixedToDouble(src, fbits, round); 89 } else { 90 // This works for all negative values, including INT64_MIN. 91 return -UFixedToDouble(-src, fbits, round); 92 } 93} 94 95 96double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) { 97 // An input of 0 is a special case because the result is effectively 98 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. 99 if (src == 0) { 100 return 0.0; 101 } 102 103 // Calculate the exponent. The highest significant bit will have the value 104 // 2^exponent. 105 const int highest_significant_bit = 63 - CountLeadingZeros(src); 106 const int64_t exponent = highest_significant_bit - fbits; 107 108 return FPRoundToDouble(0, exponent, src, round); 109} 110 111 112float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) { 113 if (src >= 0) { 114 return UFixedToFloat(src, fbits, round); 115 } else { 116 // This works for all negative values, including INT64_MIN. 117 return -UFixedToFloat(-src, fbits, round); 118 } 119} 120 121 122float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) { 123 // An input of 0 is a special case because the result is effectively 124 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. 125 if (src == 0) { 126 return 0.0f; 127 } 128 129 // Calculate the exponent. The highest significant bit will have the value 130 // 2^exponent. 131 const int highest_significant_bit = 63 - CountLeadingZeros(src); 132 const int32_t exponent = highest_significant_bit - fbits; 133 134 return FPRoundToFloat(0, exponent, src, round); 135} 136 137 138double Simulator::FPToDouble(float value) { 139 switch (std::fpclassify(value)) { 140 case FP_NAN: { 141 if (IsSignallingNaN(value)) { 142 FPProcessException(); 143 } 144 if (ReadDN()) return kFP64DefaultNaN; 145 146 // Convert NaNs as the processor would: 147 // - The sign is propagated. 148 // - The payload (mantissa) is transferred entirely, except that the top 149 // bit is forced to '1', making the result a quiet NaN. The unused 150 // (low-order) payload bits are set to 0. 151 uint32_t raw = FloatToRawbits(value); 152 153 uint64_t sign = raw >> 31; 154 uint64_t exponent = (1 << 11) - 1; 155 uint64_t payload = ExtractUnsignedBitfield64(21, 0, raw); 156 payload <<= (52 - 23); // The unused low-order bits should be 0. 157 payload |= (UINT64_C(1) << 51); // Force a quiet NaN. 158 159 return RawbitsToDouble((sign << 63) | (exponent << 52) | payload); 160 } 161 162 case FP_ZERO: 163 case FP_NORMAL: 164 case FP_SUBNORMAL: 165 case FP_INFINITE: { 166 // All other inputs are preserved in a standard cast, because every value 167 // representable using an IEEE-754 float is also representable using an 168 // IEEE-754 double. 169 return static_cast<double>(value); 170 } 171 } 172 173 VIXL_UNREACHABLE(); 174 return static_cast<double>(value); 175} 176 177 178float Simulator::FPToFloat(float16 value) { 179 uint32_t sign = value >> 15; 180 uint32_t exponent = 181 ExtractUnsignedBitfield32(kFloat16MantissaBits + kFloat16ExponentBits - 1, 182 kFloat16MantissaBits, 183 value); 184 uint32_t mantissa = 185 ExtractUnsignedBitfield32(kFloat16MantissaBits - 1, 0, value); 186 187 switch (Float16Classify(value)) { 188 case FP_ZERO: 189 return (sign == 0) ? 0.0f : -0.0f; 190 191 case FP_INFINITE: 192 return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity; 193 194 case FP_SUBNORMAL: { 195 // Calculate shift required to put mantissa into the most-significant bits 196 // of the destination mantissa. 197 int shift = CountLeadingZeros(mantissa << (32 - 10)); 198 199 // Shift mantissa and discard implicit '1'. 200 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1; 201 mantissa &= (1 << kFloatMantissaBits) - 1; 202 203 // Adjust the exponent for the shift applied, and rebias. 204 exponent = exponent - shift + (-15 + 127); 205 break; 206 } 207 208 case FP_NAN: 209 if (IsSignallingNaN(value)) { 210 FPProcessException(); 211 } 212 if (ReadDN()) return kFP32DefaultNaN; 213 214 // Convert NaNs as the processor would: 215 // - The sign is propagated. 216 // - The payload (mantissa) is transferred entirely, except that the top 217 // bit is forced to '1', making the result a quiet NaN. The unused 218 // (low-order) payload bits are set to 0. 219 exponent = (1 << kFloatExponentBits) - 1; 220 221 // Increase bits in mantissa, making low-order bits 0. 222 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits); 223 mantissa |= 1 << 22; // Force a quiet NaN. 224 break; 225 226 case FP_NORMAL: 227 // Increase bits in mantissa, making low-order bits 0. 228 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits); 229 230 // Change exponent bias. 231 exponent += (-15 + 127); 232 break; 233 234 default: 235 VIXL_UNREACHABLE(); 236 } 237 return RawbitsToFloat((sign << 31) | (exponent << kFloatMantissaBits) | 238 mantissa); 239} 240 241 242float16 Simulator::FPToFloat16(float value, FPRounding round_mode) { 243 // Only the FPTieEven rounding mode is implemented. 244 VIXL_ASSERT(round_mode == FPTieEven); 245 USE(round_mode); 246 247 uint32_t raw = FloatToRawbits(value); 248 int32_t sign = raw >> 31; 249 int32_t exponent = ExtractUnsignedBitfield32(30, 23, raw) - 127; 250 uint32_t mantissa = ExtractUnsignedBitfield32(22, 0, raw); 251 252 switch (std::fpclassify(value)) { 253 case FP_NAN: { 254 if (IsSignallingNaN(value)) { 255 FPProcessException(); 256 } 257 if (ReadDN()) return kFP16DefaultNaN; 258 259 // Convert NaNs as the processor would: 260 // - The sign is propagated. 261 // - The payload (mantissa) is transferred as much as possible, except 262 // that the top bit is forced to '1', making the result a quiet NaN. 263 float16 result = 264 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; 265 result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits); 266 result |= (1 << 9); // Force a quiet NaN; 267 return result; 268 } 269 270 case FP_ZERO: 271 return (sign == 0) ? 0 : 0x8000; 272 273 case FP_INFINITE: 274 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; 275 276 case FP_NORMAL: 277 case FP_SUBNORMAL: { 278 // Convert float-to-half as the processor would, assuming that FPCR.FZ 279 // (flush-to-zero) is not set. 280 281 // Add the implicit '1' bit to the mantissa. 282 mantissa += (1 << 23); 283 return FPRoundToFloat16(sign, exponent, mantissa, round_mode); 284 } 285 } 286 287 VIXL_UNREACHABLE(); 288 return 0; 289} 290 291 292float16 Simulator::FPToFloat16(double value, FPRounding round_mode) { 293 // Only the FPTieEven rounding mode is implemented. 294 VIXL_ASSERT(round_mode == FPTieEven); 295 USE(round_mode); 296 297 uint64_t raw = DoubleToRawbits(value); 298 int32_t sign = raw >> 63; 299 int64_t exponent = ExtractUnsignedBitfield64(62, 52, raw) - 1023; 300 uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw); 301 302 switch (std::fpclassify(value)) { 303 case FP_NAN: { 304 if (IsSignallingNaN(value)) { 305 FPProcessException(); 306 } 307 if (ReadDN()) return kFP16DefaultNaN; 308 309 // Convert NaNs as the processor would: 310 // - The sign is propagated. 311 // - The payload (mantissa) is transferred as much as possible, except 312 // that the top bit is forced to '1', making the result a quiet NaN. 313 float16 result = 314 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; 315 result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits); 316 result |= (1 << 9); // Force a quiet NaN; 317 return result; 318 } 319 320 case FP_ZERO: 321 return (sign == 0) ? 0 : 0x8000; 322 323 case FP_INFINITE: 324 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; 325 326 case FP_NORMAL: 327 case FP_SUBNORMAL: { 328 // Convert double-to-half as the processor would, assuming that FPCR.FZ 329 // (flush-to-zero) is not set. 330 331 // Add the implicit '1' bit to the mantissa. 332 mantissa += (UINT64_C(1) << 52); 333 return FPRoundToFloat16(sign, exponent, mantissa, round_mode); 334 } 335 } 336 337 VIXL_UNREACHABLE(); 338 return 0; 339} 340 341 342float Simulator::FPToFloat(double value, FPRounding round_mode) { 343 // Only the FPTieEven rounding mode is implemented. 344 VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd)); 345 USE(round_mode); 346 347 switch (std::fpclassify(value)) { 348 case FP_NAN: { 349 if (IsSignallingNaN(value)) { 350 FPProcessException(); 351 } 352 if (ReadDN()) return kFP32DefaultNaN; 353 354 // Convert NaNs as the processor would: 355 // - The sign is propagated. 356 // - The payload (mantissa) is transferred as much as possible, except 357 // that the top bit is forced to '1', making the result a quiet NaN. 358 uint64_t raw = DoubleToRawbits(value); 359 360 uint32_t sign = raw >> 63; 361 uint32_t exponent = (1 << 8) - 1; 362 uint32_t payload = 363 static_cast<uint32_t>(ExtractUnsignedBitfield64(50, 52 - 23, raw)); 364 payload |= (1 << 22); // Force a quiet NaN. 365 366 return RawbitsToFloat((sign << 31) | (exponent << 23) | payload); 367 } 368 369 case FP_ZERO: 370 case FP_INFINITE: { 371 // In a C++ cast, any value representable in the target type will be 372 // unchanged. This is always the case for +/-0.0 and infinities. 373 return static_cast<float>(value); 374 } 375 376 case FP_NORMAL: 377 case FP_SUBNORMAL: { 378 // Convert double-to-float as the processor would, assuming that FPCR.FZ 379 // (flush-to-zero) is not set. 380 uint64_t raw = DoubleToRawbits(value); 381 // Extract the IEEE-754 double components. 382 uint32_t sign = raw >> 63; 383 // Extract the exponent and remove the IEEE-754 encoding bias. 384 int32_t exponent = 385 static_cast<int32_t>(ExtractUnsignedBitfield64(62, 52, raw)) - 1023; 386 // Extract the mantissa and add the implicit '1' bit. 387 uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw); 388 if (std::fpclassify(value) == FP_NORMAL) { 389 mantissa |= (UINT64_C(1) << 52); 390 } 391 return FPRoundToFloat(sign, exponent, mantissa, round_mode); 392 } 393 } 394 395 VIXL_UNREACHABLE(); 396 return value; 397} 398 399 400void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) { 401 dst.ClearForWrite(vform); 402 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 403 dst.ReadUintFromMem(vform, i, addr); 404 addr += LaneSizeInBytesFromFormat(vform); 405 } 406} 407 408 409void Simulator::ld1(VectorFormat vform, 410 LogicVRegister dst, 411 int index, 412 uint64_t addr) { 413 dst.ReadUintFromMem(vform, index, addr); 414} 415 416 417void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) { 418 dst.ClearForWrite(vform); 419 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 420 dst.ReadUintFromMem(vform, i, addr); 421 } 422} 423 424 425void Simulator::ld2(VectorFormat vform, 426 LogicVRegister dst1, 427 LogicVRegister dst2, 428 uint64_t addr1) { 429 dst1.ClearForWrite(vform); 430 dst2.ClearForWrite(vform); 431 int esize = LaneSizeInBytesFromFormat(vform); 432 uint64_t addr2 = addr1 + esize; 433 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 434 dst1.ReadUintFromMem(vform, i, addr1); 435 dst2.ReadUintFromMem(vform, i, addr2); 436 addr1 += 2 * esize; 437 addr2 += 2 * esize; 438 } 439} 440 441 442void Simulator::ld2(VectorFormat vform, 443 LogicVRegister dst1, 444 LogicVRegister dst2, 445 int index, 446 uint64_t addr1) { 447 dst1.ClearForWrite(vform); 448 dst2.ClearForWrite(vform); 449 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); 450 dst1.ReadUintFromMem(vform, index, addr1); 451 dst2.ReadUintFromMem(vform, index, addr2); 452} 453 454 455void Simulator::ld2r(VectorFormat vform, 456 LogicVRegister dst1, 457 LogicVRegister dst2, 458 uint64_t addr) { 459 dst1.ClearForWrite(vform); 460 dst2.ClearForWrite(vform); 461 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); 462 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 463 dst1.ReadUintFromMem(vform, i, addr); 464 dst2.ReadUintFromMem(vform, i, addr2); 465 } 466} 467 468 469void Simulator::ld3(VectorFormat vform, 470 LogicVRegister dst1, 471 LogicVRegister dst2, 472 LogicVRegister dst3, 473 uint64_t addr1) { 474 dst1.ClearForWrite(vform); 475 dst2.ClearForWrite(vform); 476 dst3.ClearForWrite(vform); 477 int esize = LaneSizeInBytesFromFormat(vform); 478 uint64_t addr2 = addr1 + esize; 479 uint64_t addr3 = addr2 + esize; 480 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 481 dst1.ReadUintFromMem(vform, i, addr1); 482 dst2.ReadUintFromMem(vform, i, addr2); 483 dst3.ReadUintFromMem(vform, i, addr3); 484 addr1 += 3 * esize; 485 addr2 += 3 * esize; 486 addr3 += 3 * esize; 487 } 488} 489 490 491void Simulator::ld3(VectorFormat vform, 492 LogicVRegister dst1, 493 LogicVRegister dst2, 494 LogicVRegister dst3, 495 int index, 496 uint64_t addr1) { 497 dst1.ClearForWrite(vform); 498 dst2.ClearForWrite(vform); 499 dst3.ClearForWrite(vform); 500 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); 501 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 502 dst1.ReadUintFromMem(vform, index, addr1); 503 dst2.ReadUintFromMem(vform, index, addr2); 504 dst3.ReadUintFromMem(vform, index, addr3); 505} 506 507 508void Simulator::ld3r(VectorFormat vform, 509 LogicVRegister dst1, 510 LogicVRegister dst2, 511 LogicVRegister dst3, 512 uint64_t addr) { 513 dst1.ClearForWrite(vform); 514 dst2.ClearForWrite(vform); 515 dst3.ClearForWrite(vform); 516 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); 517 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 518 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 519 dst1.ReadUintFromMem(vform, i, addr); 520 dst2.ReadUintFromMem(vform, i, addr2); 521 dst3.ReadUintFromMem(vform, i, addr3); 522 } 523} 524 525 526void Simulator::ld4(VectorFormat vform, 527 LogicVRegister dst1, 528 LogicVRegister dst2, 529 LogicVRegister dst3, 530 LogicVRegister dst4, 531 uint64_t addr1) { 532 dst1.ClearForWrite(vform); 533 dst2.ClearForWrite(vform); 534 dst3.ClearForWrite(vform); 535 dst4.ClearForWrite(vform); 536 int esize = LaneSizeInBytesFromFormat(vform); 537 uint64_t addr2 = addr1 + esize; 538 uint64_t addr3 = addr2 + esize; 539 uint64_t addr4 = addr3 + esize; 540 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 541 dst1.ReadUintFromMem(vform, i, addr1); 542 dst2.ReadUintFromMem(vform, i, addr2); 543 dst3.ReadUintFromMem(vform, i, addr3); 544 dst4.ReadUintFromMem(vform, i, addr4); 545 addr1 += 4 * esize; 546 addr2 += 4 * esize; 547 addr3 += 4 * esize; 548 addr4 += 4 * esize; 549 } 550} 551 552 553void Simulator::ld4(VectorFormat vform, 554 LogicVRegister dst1, 555 LogicVRegister dst2, 556 LogicVRegister dst3, 557 LogicVRegister dst4, 558 int index, 559 uint64_t addr1) { 560 dst1.ClearForWrite(vform); 561 dst2.ClearForWrite(vform); 562 dst3.ClearForWrite(vform); 563 dst4.ClearForWrite(vform); 564 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); 565 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 566 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); 567 dst1.ReadUintFromMem(vform, index, addr1); 568 dst2.ReadUintFromMem(vform, index, addr2); 569 dst3.ReadUintFromMem(vform, index, addr3); 570 dst4.ReadUintFromMem(vform, index, addr4); 571} 572 573 574void Simulator::ld4r(VectorFormat vform, 575 LogicVRegister dst1, 576 LogicVRegister dst2, 577 LogicVRegister dst3, 578 LogicVRegister dst4, 579 uint64_t addr) { 580 dst1.ClearForWrite(vform); 581 dst2.ClearForWrite(vform); 582 dst3.ClearForWrite(vform); 583 dst4.ClearForWrite(vform); 584 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); 585 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 586 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); 587 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 588 dst1.ReadUintFromMem(vform, i, addr); 589 dst2.ReadUintFromMem(vform, i, addr2); 590 dst3.ReadUintFromMem(vform, i, addr3); 591 dst4.ReadUintFromMem(vform, i, addr4); 592 } 593} 594 595 596void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) { 597 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 598 src.WriteUintToMem(vform, i, addr); 599 addr += LaneSizeInBytesFromFormat(vform); 600 } 601} 602 603 604void Simulator::st1(VectorFormat vform, 605 LogicVRegister src, 606 int index, 607 uint64_t addr) { 608 src.WriteUintToMem(vform, index, addr); 609} 610 611 612void Simulator::st2(VectorFormat vform, 613 LogicVRegister dst, 614 LogicVRegister dst2, 615 uint64_t addr) { 616 int esize = LaneSizeInBytesFromFormat(vform); 617 uint64_t addr2 = addr + esize; 618 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 619 dst.WriteUintToMem(vform, i, addr); 620 dst2.WriteUintToMem(vform, i, addr2); 621 addr += 2 * esize; 622 addr2 += 2 * esize; 623 } 624} 625 626 627void Simulator::st2(VectorFormat vform, 628 LogicVRegister dst, 629 LogicVRegister dst2, 630 int index, 631 uint64_t addr) { 632 int esize = LaneSizeInBytesFromFormat(vform); 633 dst.WriteUintToMem(vform, index, addr); 634 dst2.WriteUintToMem(vform, index, addr + 1 * esize); 635} 636 637 638void Simulator::st3(VectorFormat vform, 639 LogicVRegister dst, 640 LogicVRegister dst2, 641 LogicVRegister dst3, 642 uint64_t addr) { 643 int esize = LaneSizeInBytesFromFormat(vform); 644 uint64_t addr2 = addr + esize; 645 uint64_t addr3 = addr2 + esize; 646 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 647 dst.WriteUintToMem(vform, i, addr); 648 dst2.WriteUintToMem(vform, i, addr2); 649 dst3.WriteUintToMem(vform, i, addr3); 650 addr += 3 * esize; 651 addr2 += 3 * esize; 652 addr3 += 3 * esize; 653 } 654} 655 656 657void Simulator::st3(VectorFormat vform, 658 LogicVRegister dst, 659 LogicVRegister dst2, 660 LogicVRegister dst3, 661 int index, 662 uint64_t addr) { 663 int esize = LaneSizeInBytesFromFormat(vform); 664 dst.WriteUintToMem(vform, index, addr); 665 dst2.WriteUintToMem(vform, index, addr + 1 * esize); 666 dst3.WriteUintToMem(vform, index, addr + 2 * esize); 667} 668 669 670void Simulator::st4(VectorFormat vform, 671 LogicVRegister dst, 672 LogicVRegister dst2, 673 LogicVRegister dst3, 674 LogicVRegister dst4, 675 uint64_t addr) { 676 int esize = LaneSizeInBytesFromFormat(vform); 677 uint64_t addr2 = addr + esize; 678 uint64_t addr3 = addr2 + esize; 679 uint64_t addr4 = addr3 + esize; 680 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 681 dst.WriteUintToMem(vform, i, addr); 682 dst2.WriteUintToMem(vform, i, addr2); 683 dst3.WriteUintToMem(vform, i, addr3); 684 dst4.WriteUintToMem(vform, i, addr4); 685 addr += 4 * esize; 686 addr2 += 4 * esize; 687 addr3 += 4 * esize; 688 addr4 += 4 * esize; 689 } 690} 691 692 693void Simulator::st4(VectorFormat vform, 694 LogicVRegister dst, 695 LogicVRegister dst2, 696 LogicVRegister dst3, 697 LogicVRegister dst4, 698 int index, 699 uint64_t addr) { 700 int esize = LaneSizeInBytesFromFormat(vform); 701 dst.WriteUintToMem(vform, index, addr); 702 dst2.WriteUintToMem(vform, index, addr + 1 * esize); 703 dst3.WriteUintToMem(vform, index, addr + 2 * esize); 704 dst4.WriteUintToMem(vform, index, addr + 3 * esize); 705} 706 707 708LogicVRegister Simulator::cmp(VectorFormat vform, 709 LogicVRegister dst, 710 const LogicVRegister& src1, 711 const LogicVRegister& src2, 712 Condition cond) { 713 dst.ClearForWrite(vform); 714 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 715 int64_t sa = src1.Int(vform, i); 716 int64_t sb = src2.Int(vform, i); 717 uint64_t ua = src1.Uint(vform, i); 718 uint64_t ub = src2.Uint(vform, i); 719 bool result = false; 720 switch (cond) { 721 case eq: 722 result = (ua == ub); 723 break; 724 case ge: 725 result = (sa >= sb); 726 break; 727 case gt: 728 result = (sa > sb); 729 break; 730 case hi: 731 result = (ua > ub); 732 break; 733 case hs: 734 result = (ua >= ub); 735 break; 736 case lt: 737 result = (sa < sb); 738 break; 739 case le: 740 result = (sa <= sb); 741 break; 742 default: 743 VIXL_UNREACHABLE(); 744 break; 745 } 746 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); 747 } 748 return dst; 749} 750 751 752LogicVRegister Simulator::cmp(VectorFormat vform, 753 LogicVRegister dst, 754 const LogicVRegister& src1, 755 int imm, 756 Condition cond) { 757 SimVRegister temp; 758 LogicVRegister imm_reg = dup_immediate(vform, temp, imm); 759 return cmp(vform, dst, src1, imm_reg, cond); 760} 761 762 763LogicVRegister Simulator::cmptst(VectorFormat vform, 764 LogicVRegister dst, 765 const LogicVRegister& src1, 766 const LogicVRegister& src2) { 767 dst.ClearForWrite(vform); 768 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 769 uint64_t ua = src1.Uint(vform, i); 770 uint64_t ub = src2.Uint(vform, i); 771 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0); 772 } 773 return dst; 774} 775 776 777LogicVRegister Simulator::add(VectorFormat vform, 778 LogicVRegister dst, 779 const LogicVRegister& src1, 780 const LogicVRegister& src2) { 781 dst.ClearForWrite(vform); 782 // TODO(all): consider assigning the result of LaneCountFromFormat to a local. 783 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 784 // Test for unsigned saturation. 785 uint64_t ua = src1.UintLeftJustified(vform, i); 786 uint64_t ub = src2.UintLeftJustified(vform, i); 787 uint64_t ur = ua + ub; 788 if (ur < ua) { 789 dst.SetUnsignedSat(i, true); 790 } 791 792 // Test for signed saturation. 793 int64_t sa = src1.IntLeftJustified(vform, i); 794 int64_t sb = src2.IntLeftJustified(vform, i); 795 int64_t sr = sa + sb; 796 // If the signs of the operands are the same, but different from the result, 797 // there was an overflow. 798 if (((sa >= 0) == (sb >= 0)) && ((sa >= 0) != (sr >= 0))) { 799 dst.SetSignedSat(i, sa >= 0); 800 } 801 802 dst.SetInt(vform, i, src1.Int(vform, i) + src2.Int(vform, i)); 803 } 804 return dst; 805} 806 807 808LogicVRegister Simulator::addp(VectorFormat vform, 809 LogicVRegister dst, 810 const LogicVRegister& src1, 811 const LogicVRegister& src2) { 812 SimVRegister temp1, temp2; 813 uzp1(vform, temp1, src1, src2); 814 uzp2(vform, temp2, src1, src2); 815 add(vform, dst, temp1, temp2); 816 return dst; 817} 818 819 820LogicVRegister Simulator::mla(VectorFormat vform, 821 LogicVRegister dst, 822 const LogicVRegister& src1, 823 const LogicVRegister& src2) { 824 SimVRegister temp; 825 mul(vform, temp, src1, src2); 826 add(vform, dst, dst, temp); 827 return dst; 828} 829 830 831LogicVRegister Simulator::mls(VectorFormat vform, 832 LogicVRegister dst, 833 const LogicVRegister& src1, 834 const LogicVRegister& src2) { 835 SimVRegister temp; 836 mul(vform, temp, src1, src2); 837 sub(vform, dst, dst, temp); 838 return dst; 839} 840 841 842LogicVRegister Simulator::mul(VectorFormat vform, 843 LogicVRegister dst, 844 const LogicVRegister& src1, 845 const LogicVRegister& src2) { 846 dst.ClearForWrite(vform); 847 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 848 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i)); 849 } 850 return dst; 851} 852 853 854LogicVRegister Simulator::mul(VectorFormat vform, 855 LogicVRegister dst, 856 const LogicVRegister& src1, 857 const LogicVRegister& src2, 858 int index) { 859 SimVRegister temp; 860 VectorFormat indexform = VectorFormatFillQ(vform); 861 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index)); 862} 863 864 865LogicVRegister Simulator::mla(VectorFormat vform, 866 LogicVRegister dst, 867 const LogicVRegister& src1, 868 const LogicVRegister& src2, 869 int index) { 870 SimVRegister temp; 871 VectorFormat indexform = VectorFormatFillQ(vform); 872 return mla(vform, dst, src1, dup_element(indexform, temp, src2, index)); 873} 874 875 876LogicVRegister Simulator::mls(VectorFormat vform, 877 LogicVRegister dst, 878 const LogicVRegister& src1, 879 const LogicVRegister& src2, 880 int index) { 881 SimVRegister temp; 882 VectorFormat indexform = VectorFormatFillQ(vform); 883 return mls(vform, dst, src1, dup_element(indexform, temp, src2, index)); 884} 885 886 887LogicVRegister Simulator::smull(VectorFormat vform, 888 LogicVRegister dst, 889 const LogicVRegister& src1, 890 const LogicVRegister& src2, 891 int index) { 892 SimVRegister temp; 893 VectorFormat indexform = 894 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 895 return smull(vform, dst, src1, dup_element(indexform, temp, src2, index)); 896} 897 898 899LogicVRegister Simulator::smull2(VectorFormat vform, 900 LogicVRegister dst, 901 const LogicVRegister& src1, 902 const LogicVRegister& src2, 903 int index) { 904 SimVRegister temp; 905 VectorFormat indexform = 906 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 907 return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 908} 909 910 911LogicVRegister Simulator::umull(VectorFormat vform, 912 LogicVRegister dst, 913 const LogicVRegister& src1, 914 const LogicVRegister& src2, 915 int index) { 916 SimVRegister temp; 917 VectorFormat indexform = 918 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 919 return umull(vform, dst, src1, dup_element(indexform, temp, src2, index)); 920} 921 922 923LogicVRegister Simulator::umull2(VectorFormat vform, 924 LogicVRegister dst, 925 const LogicVRegister& src1, 926 const LogicVRegister& src2, 927 int index) { 928 SimVRegister temp; 929 VectorFormat indexform = 930 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 931 return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 932} 933 934 935LogicVRegister Simulator::smlal(VectorFormat vform, 936 LogicVRegister dst, 937 const LogicVRegister& src1, 938 const LogicVRegister& src2, 939 int index) { 940 SimVRegister temp; 941 VectorFormat indexform = 942 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 943 return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); 944} 945 946 947LogicVRegister Simulator::smlal2(VectorFormat vform, 948 LogicVRegister dst, 949 const LogicVRegister& src1, 950 const LogicVRegister& src2, 951 int index) { 952 SimVRegister temp; 953 VectorFormat indexform = 954 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 955 return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 956} 957 958 959LogicVRegister Simulator::umlal(VectorFormat vform, 960 LogicVRegister dst, 961 const LogicVRegister& src1, 962 const LogicVRegister& src2, 963 int index) { 964 SimVRegister temp; 965 VectorFormat indexform = 966 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 967 return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); 968} 969 970 971LogicVRegister Simulator::umlal2(VectorFormat vform, 972 LogicVRegister dst, 973 const LogicVRegister& src1, 974 const LogicVRegister& src2, 975 int index) { 976 SimVRegister temp; 977 VectorFormat indexform = 978 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 979 return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 980} 981 982 983LogicVRegister Simulator::smlsl(VectorFormat vform, 984 LogicVRegister dst, 985 const LogicVRegister& src1, 986 const LogicVRegister& src2, 987 int index) { 988 SimVRegister temp; 989 VectorFormat indexform = 990 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 991 return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); 992} 993 994 995LogicVRegister Simulator::smlsl2(VectorFormat vform, 996 LogicVRegister dst, 997 const LogicVRegister& src1, 998 const LogicVRegister& src2, 999 int index) { 1000 SimVRegister temp; 1001 VectorFormat indexform = 1002 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1003 return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1004} 1005 1006 1007LogicVRegister Simulator::umlsl(VectorFormat vform, 1008 LogicVRegister dst, 1009 const LogicVRegister& src1, 1010 const LogicVRegister& src2, 1011 int index) { 1012 SimVRegister temp; 1013 VectorFormat indexform = 1014 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1015 return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1016} 1017 1018 1019LogicVRegister Simulator::umlsl2(VectorFormat vform, 1020 LogicVRegister dst, 1021 const LogicVRegister& src1, 1022 const LogicVRegister& src2, 1023 int index) { 1024 SimVRegister temp; 1025 VectorFormat indexform = 1026 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1027 return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1028} 1029 1030 1031LogicVRegister Simulator::sqdmull(VectorFormat vform, 1032 LogicVRegister dst, 1033 const LogicVRegister& src1, 1034 const LogicVRegister& src2, 1035 int index) { 1036 SimVRegister temp; 1037 VectorFormat indexform = 1038 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1039 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1040} 1041 1042 1043LogicVRegister Simulator::sqdmull2(VectorFormat vform, 1044 LogicVRegister dst, 1045 const LogicVRegister& src1, 1046 const LogicVRegister& src2, 1047 int index) { 1048 SimVRegister temp; 1049 VectorFormat indexform = 1050 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1051 return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1052} 1053 1054 1055LogicVRegister Simulator::sqdmlal(VectorFormat vform, 1056 LogicVRegister dst, 1057 const LogicVRegister& src1, 1058 const LogicVRegister& src2, 1059 int index) { 1060 SimVRegister temp; 1061 VectorFormat indexform = 1062 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1063 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1064} 1065 1066 1067LogicVRegister Simulator::sqdmlal2(VectorFormat vform, 1068 LogicVRegister dst, 1069 const LogicVRegister& src1, 1070 const LogicVRegister& src2, 1071 int index) { 1072 SimVRegister temp; 1073 VectorFormat indexform = 1074 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1075 return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1076} 1077 1078 1079LogicVRegister Simulator::sqdmlsl(VectorFormat vform, 1080 LogicVRegister dst, 1081 const LogicVRegister& src1, 1082 const LogicVRegister& src2, 1083 int index) { 1084 SimVRegister temp; 1085 VectorFormat indexform = 1086 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1087 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1088} 1089 1090 1091LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, 1092 LogicVRegister dst, 1093 const LogicVRegister& src1, 1094 const LogicVRegister& src2, 1095 int index) { 1096 SimVRegister temp; 1097 VectorFormat indexform = 1098 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1099 return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1100} 1101 1102 1103LogicVRegister Simulator::sqdmulh(VectorFormat vform, 1104 LogicVRegister dst, 1105 const LogicVRegister& src1, 1106 const LogicVRegister& src2, 1107 int index) { 1108 SimVRegister temp; 1109 VectorFormat indexform = VectorFormatFillQ(vform); 1110 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1111} 1112 1113 1114LogicVRegister Simulator::sqrdmulh(VectorFormat vform, 1115 LogicVRegister dst, 1116 const LogicVRegister& src1, 1117 const LogicVRegister& src2, 1118 int index) { 1119 SimVRegister temp; 1120 VectorFormat indexform = VectorFormatFillQ(vform); 1121 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1122} 1123 1124 1125uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) const { 1126 uint16_t result = 0; 1127 uint16_t extended_op2 = op2; 1128 for (int i = 0; i < 8; ++i) { 1129 if ((op1 >> i) & 1) { 1130 result = result ^ (extended_op2 << i); 1131 } 1132 } 1133 return result; 1134} 1135 1136 1137LogicVRegister Simulator::pmul(VectorFormat vform, 1138 LogicVRegister dst, 1139 const LogicVRegister& src1, 1140 const LogicVRegister& src2) { 1141 dst.ClearForWrite(vform); 1142 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1143 dst.SetUint(vform, 1144 i, 1145 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i))); 1146 } 1147 return dst; 1148} 1149 1150 1151LogicVRegister Simulator::pmull(VectorFormat vform, 1152 LogicVRegister dst, 1153 const LogicVRegister& src1, 1154 const LogicVRegister& src2) { 1155 VectorFormat vform_src = VectorFormatHalfWidth(vform); 1156 dst.ClearForWrite(vform); 1157 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1158 dst.SetUint(vform, 1159 i, 1160 PolynomialMult(src1.Uint(vform_src, i), 1161 src2.Uint(vform_src, i))); 1162 } 1163 return dst; 1164} 1165 1166 1167LogicVRegister Simulator::pmull2(VectorFormat vform, 1168 LogicVRegister dst, 1169 const LogicVRegister& src1, 1170 const LogicVRegister& src2) { 1171 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform); 1172 dst.ClearForWrite(vform); 1173 int lane_count = LaneCountFromFormat(vform); 1174 for (int i = 0; i < lane_count; i++) { 1175 dst.SetUint(vform, 1176 i, 1177 PolynomialMult(src1.Uint(vform_src, lane_count + i), 1178 src2.Uint(vform_src, lane_count + i))); 1179 } 1180 return dst; 1181} 1182 1183 1184LogicVRegister Simulator::sub(VectorFormat vform, 1185 LogicVRegister dst, 1186 const LogicVRegister& src1, 1187 const LogicVRegister& src2) { 1188 dst.ClearForWrite(vform); 1189 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1190 // Test for unsigned saturation. 1191 if (src2.Uint(vform, i) > src1.Uint(vform, i)) { 1192 dst.SetUnsignedSat(i, false); 1193 } 1194 1195 // Test for signed saturation. 1196 int64_t sa = src1.IntLeftJustified(vform, i); 1197 int64_t sb = src2.IntLeftJustified(vform, i); 1198 int64_t sr = sa - sb; 1199 // If the signs of the operands are different, and the sign of the first 1200 // operand doesn't match the result, there was an overflow. 1201 if (((sa >= 0) != (sb >= 0)) && ((sa >= 0) != (sr >= 0))) { 1202 dst.SetSignedSat(i, sr < 0); 1203 } 1204 1205 dst.SetInt(vform, i, src1.Int(vform, i) - src2.Int(vform, i)); 1206 } 1207 return dst; 1208} 1209 1210 1211LogicVRegister Simulator::and_(VectorFormat vform, 1212 LogicVRegister dst, 1213 const LogicVRegister& src1, 1214 const LogicVRegister& src2) { 1215 dst.ClearForWrite(vform); 1216 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1217 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i)); 1218 } 1219 return dst; 1220} 1221 1222 1223LogicVRegister Simulator::orr(VectorFormat vform, 1224 LogicVRegister dst, 1225 const LogicVRegister& src1, 1226 const LogicVRegister& src2) { 1227 dst.ClearForWrite(vform); 1228 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1229 dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i)); 1230 } 1231 return dst; 1232} 1233 1234 1235LogicVRegister Simulator::orn(VectorFormat vform, 1236 LogicVRegister dst, 1237 const LogicVRegister& src1, 1238 const LogicVRegister& src2) { 1239 dst.ClearForWrite(vform); 1240 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1241 dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i)); 1242 } 1243 return dst; 1244} 1245 1246 1247LogicVRegister Simulator::eor(VectorFormat vform, 1248 LogicVRegister dst, 1249 const LogicVRegister& src1, 1250 const LogicVRegister& src2) { 1251 dst.ClearForWrite(vform); 1252 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1253 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i)); 1254 } 1255 return dst; 1256} 1257 1258 1259LogicVRegister Simulator::bic(VectorFormat vform, 1260 LogicVRegister dst, 1261 const LogicVRegister& src1, 1262 const LogicVRegister& src2) { 1263 dst.ClearForWrite(vform); 1264 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1265 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i)); 1266 } 1267 return dst; 1268} 1269 1270 1271LogicVRegister Simulator::bic(VectorFormat vform, 1272 LogicVRegister dst, 1273 const LogicVRegister& src, 1274 uint64_t imm) { 1275 uint64_t result[16]; 1276 int laneCount = LaneCountFromFormat(vform); 1277 for (int i = 0; i < laneCount; ++i) { 1278 result[i] = src.Uint(vform, i) & ~imm; 1279 } 1280 dst.ClearForWrite(vform); 1281 for (int i = 0; i < laneCount; ++i) { 1282 dst.SetUint(vform, i, result[i]); 1283 } 1284 return dst; 1285} 1286 1287 1288LogicVRegister Simulator::bif(VectorFormat vform, 1289 LogicVRegister dst, 1290 const LogicVRegister& src1, 1291 const LogicVRegister& src2) { 1292 dst.ClearForWrite(vform); 1293 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1294 uint64_t operand1 = dst.Uint(vform, i); 1295 uint64_t operand2 = ~src2.Uint(vform, i); 1296 uint64_t operand3 = src1.Uint(vform, i); 1297 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); 1298 dst.SetUint(vform, i, result); 1299 } 1300 return dst; 1301} 1302 1303 1304LogicVRegister Simulator::bit(VectorFormat vform, 1305 LogicVRegister dst, 1306 const LogicVRegister& src1, 1307 const LogicVRegister& src2) { 1308 dst.ClearForWrite(vform); 1309 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1310 uint64_t operand1 = dst.Uint(vform, i); 1311 uint64_t operand2 = src2.Uint(vform, i); 1312 uint64_t operand3 = src1.Uint(vform, i); 1313 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); 1314 dst.SetUint(vform, i, result); 1315 } 1316 return dst; 1317} 1318 1319 1320LogicVRegister Simulator::bsl(VectorFormat vform, 1321 LogicVRegister dst, 1322 const LogicVRegister& src1, 1323 const LogicVRegister& src2) { 1324 dst.ClearForWrite(vform); 1325 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1326 uint64_t operand1 = src2.Uint(vform, i); 1327 uint64_t operand2 = dst.Uint(vform, i); 1328 uint64_t operand3 = src1.Uint(vform, i); 1329 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); 1330 dst.SetUint(vform, i, result); 1331 } 1332 return dst; 1333} 1334 1335 1336LogicVRegister Simulator::sminmax(VectorFormat vform, 1337 LogicVRegister dst, 1338 const LogicVRegister& src1, 1339 const LogicVRegister& src2, 1340 bool max) { 1341 dst.ClearForWrite(vform); 1342 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1343 int64_t src1_val = src1.Int(vform, i); 1344 int64_t src2_val = src2.Int(vform, i); 1345 int64_t dst_val; 1346 if (max) { 1347 dst_val = (src1_val > src2_val) ? src1_val : src2_val; 1348 } else { 1349 dst_val = (src1_val < src2_val) ? src1_val : src2_val; 1350 } 1351 dst.SetInt(vform, i, dst_val); 1352 } 1353 return dst; 1354} 1355 1356 1357LogicVRegister Simulator::smax(VectorFormat vform, 1358 LogicVRegister dst, 1359 const LogicVRegister& src1, 1360 const LogicVRegister& src2) { 1361 return sminmax(vform, dst, src1, src2, true); 1362} 1363 1364 1365LogicVRegister Simulator::smin(VectorFormat vform, 1366 LogicVRegister dst, 1367 const LogicVRegister& src1, 1368 const LogicVRegister& src2) { 1369 return sminmax(vform, dst, src1, src2, false); 1370} 1371 1372 1373LogicVRegister Simulator::sminmaxp(VectorFormat vform, 1374 LogicVRegister dst, 1375 const LogicVRegister& src1, 1376 const LogicVRegister& src2, 1377 bool max) { 1378 int lanes = LaneCountFromFormat(vform); 1379 int64_t result[kMaxLanesPerVector]; 1380 const LogicVRegister* src = &src1; 1381 for (int j = 0; j < 2; j++) { 1382 for (int i = 0; i < lanes; i += 2) { 1383 int64_t first_val = src->Int(vform, i); 1384 int64_t second_val = src->Int(vform, i + 1); 1385 int64_t dst_val; 1386 if (max) { 1387 dst_val = (first_val > second_val) ? first_val : second_val; 1388 } else { 1389 dst_val = (first_val < second_val) ? first_val : second_val; 1390 } 1391 VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector); 1392 result[(i >> 1) + (j * lanes / 2)] = dst_val; 1393 } 1394 src = &src2; 1395 } 1396 dst.SetIntArray(vform, result); 1397 return dst; 1398} 1399 1400 1401LogicVRegister Simulator::smaxp(VectorFormat vform, 1402 LogicVRegister dst, 1403 const LogicVRegister& src1, 1404 const LogicVRegister& src2) { 1405 return sminmaxp(vform, dst, src1, src2, true); 1406} 1407 1408 1409LogicVRegister Simulator::sminp(VectorFormat vform, 1410 LogicVRegister dst, 1411 const LogicVRegister& src1, 1412 const LogicVRegister& src2) { 1413 return sminmaxp(vform, dst, src1, src2, false); 1414} 1415 1416 1417LogicVRegister Simulator::addp(VectorFormat vform, 1418 LogicVRegister dst, 1419 const LogicVRegister& src) { 1420 VIXL_ASSERT(vform == kFormatD); 1421 1422 int64_t dst_val = src.Int(kFormat2D, 0) + src.Int(kFormat2D, 1); 1423 dst.ClearForWrite(vform); 1424 dst.SetInt(vform, 0, dst_val); 1425 return dst; 1426} 1427 1428 1429LogicVRegister Simulator::addv(VectorFormat vform, 1430 LogicVRegister dst, 1431 const LogicVRegister& src) { 1432 VectorFormat vform_dst = 1433 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); 1434 1435 1436 int64_t dst_val = 0; 1437 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1438 dst_val += src.Int(vform, i); 1439 } 1440 1441 dst.ClearForWrite(vform_dst); 1442 dst.SetInt(vform_dst, 0, dst_val); 1443 return dst; 1444} 1445 1446 1447LogicVRegister Simulator::saddlv(VectorFormat vform, 1448 LogicVRegister dst, 1449 const LogicVRegister& src) { 1450 VectorFormat vform_dst = 1451 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); 1452 1453 int64_t dst_val = 0; 1454 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1455 dst_val += src.Int(vform, i); 1456 } 1457 1458 dst.ClearForWrite(vform_dst); 1459 dst.SetInt(vform_dst, 0, dst_val); 1460 return dst; 1461} 1462 1463 1464LogicVRegister Simulator::uaddlv(VectorFormat vform, 1465 LogicVRegister dst, 1466 const LogicVRegister& src) { 1467 VectorFormat vform_dst = 1468 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); 1469 1470 uint64_t dst_val = 0; 1471 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1472 dst_val += src.Uint(vform, i); 1473 } 1474 1475 dst.ClearForWrite(vform_dst); 1476 dst.SetUint(vform_dst, 0, dst_val); 1477 return dst; 1478} 1479 1480 1481LogicVRegister Simulator::sminmaxv(VectorFormat vform, 1482 LogicVRegister dst, 1483 const LogicVRegister& src, 1484 bool max) { 1485 int64_t dst_val = max ? INT64_MIN : INT64_MAX; 1486 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1487 int64_t src_val = src.Int(vform, i); 1488 if (max) { 1489 dst_val = (src_val > dst_val) ? src_val : dst_val; 1490 } else { 1491 dst_val = (src_val < dst_val) ? src_val : dst_val; 1492 } 1493 } 1494 dst.ClearForWrite(ScalarFormatFromFormat(vform)); 1495 dst.SetInt(vform, 0, dst_val); 1496 return dst; 1497} 1498 1499 1500LogicVRegister Simulator::smaxv(VectorFormat vform, 1501 LogicVRegister dst, 1502 const LogicVRegister& src) { 1503 sminmaxv(vform, dst, src, true); 1504 return dst; 1505} 1506 1507 1508LogicVRegister Simulator::sminv(VectorFormat vform, 1509 LogicVRegister dst, 1510 const LogicVRegister& src) { 1511 sminmaxv(vform, dst, src, false); 1512 return dst; 1513} 1514 1515 1516LogicVRegister Simulator::uminmax(VectorFormat vform, 1517 LogicVRegister dst, 1518 const LogicVRegister& src1, 1519 const LogicVRegister& src2, 1520 bool max) { 1521 dst.ClearForWrite(vform); 1522 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1523 uint64_t src1_val = src1.Uint(vform, i); 1524 uint64_t src2_val = src2.Uint(vform, i); 1525 uint64_t dst_val; 1526 if (max) { 1527 dst_val = (src1_val > src2_val) ? src1_val : src2_val; 1528 } else { 1529 dst_val = (src1_val < src2_val) ? src1_val : src2_val; 1530 } 1531 dst.SetUint(vform, i, dst_val); 1532 } 1533 return dst; 1534} 1535 1536 1537LogicVRegister Simulator::umax(VectorFormat vform, 1538 LogicVRegister dst, 1539 const LogicVRegister& src1, 1540 const LogicVRegister& src2) { 1541 return uminmax(vform, dst, src1, src2, true); 1542} 1543 1544 1545LogicVRegister Simulator::umin(VectorFormat vform, 1546 LogicVRegister dst, 1547 const LogicVRegister& src1, 1548 const LogicVRegister& src2) { 1549 return uminmax(vform, dst, src1, src2, false); 1550} 1551 1552 1553LogicVRegister Simulator::uminmaxp(VectorFormat vform, 1554 LogicVRegister dst, 1555 const LogicVRegister& src1, 1556 const LogicVRegister& src2, 1557 bool max) { 1558 int lanes = LaneCountFromFormat(vform); 1559 uint64_t result[kMaxLanesPerVector]; 1560 const LogicVRegister* src = &src1; 1561 for (int j = 0; j < 2; j++) { 1562 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) { 1563 uint64_t first_val = src->Uint(vform, i); 1564 uint64_t second_val = src->Uint(vform, i + 1); 1565 uint64_t dst_val; 1566 if (max) { 1567 dst_val = (first_val > second_val) ? first_val : second_val; 1568 } else { 1569 dst_val = (first_val < second_val) ? first_val : second_val; 1570 } 1571 VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector); 1572 result[(i >> 1) + (j * lanes / 2)] = dst_val; 1573 } 1574 src = &src2; 1575 } 1576 dst.SetUintArray(vform, result); 1577 return dst; 1578} 1579 1580 1581LogicVRegister Simulator::umaxp(VectorFormat vform, 1582 LogicVRegister dst, 1583 const LogicVRegister& src1, 1584 const LogicVRegister& src2) { 1585 return uminmaxp(vform, dst, src1, src2, true); 1586} 1587 1588 1589LogicVRegister Simulator::uminp(VectorFormat vform, 1590 LogicVRegister dst, 1591 const LogicVRegister& src1, 1592 const LogicVRegister& src2) { 1593 return uminmaxp(vform, dst, src1, src2, false); 1594} 1595 1596 1597LogicVRegister Simulator::uminmaxv(VectorFormat vform, 1598 LogicVRegister dst, 1599 const LogicVRegister& src, 1600 bool max) { 1601 uint64_t dst_val = max ? 0 : UINT64_MAX; 1602 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1603 uint64_t src_val = src.Uint(vform, i); 1604 if (max) { 1605 dst_val = (src_val > dst_val) ? src_val : dst_val; 1606 } else { 1607 dst_val = (src_val < dst_val) ? src_val : dst_val; 1608 } 1609 } 1610 dst.ClearForWrite(ScalarFormatFromFormat(vform)); 1611 dst.SetUint(vform, 0, dst_val); 1612 return dst; 1613} 1614 1615 1616LogicVRegister Simulator::umaxv(VectorFormat vform, 1617 LogicVRegister dst, 1618 const LogicVRegister& src) { 1619 uminmaxv(vform, dst, src, true); 1620 return dst; 1621} 1622 1623 1624LogicVRegister Simulator::uminv(VectorFormat vform, 1625 LogicVRegister dst, 1626 const LogicVRegister& src) { 1627 uminmaxv(vform, dst, src, false); 1628 return dst; 1629} 1630 1631 1632LogicVRegister Simulator::shl(VectorFormat vform, 1633 LogicVRegister dst, 1634 const LogicVRegister& src, 1635 int shift) { 1636 VIXL_ASSERT(shift >= 0); 1637 SimVRegister temp; 1638 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1639 return ushl(vform, dst, src, shiftreg); 1640} 1641 1642 1643LogicVRegister Simulator::sshll(VectorFormat vform, 1644 LogicVRegister dst, 1645 const LogicVRegister& src, 1646 int shift) { 1647 VIXL_ASSERT(shift >= 0); 1648 SimVRegister temp1, temp2; 1649 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1650 LogicVRegister extendedreg = sxtl(vform, temp2, src); 1651 return sshl(vform, dst, extendedreg, shiftreg); 1652} 1653 1654 1655LogicVRegister Simulator::sshll2(VectorFormat vform, 1656 LogicVRegister dst, 1657 const LogicVRegister& src, 1658 int shift) { 1659 VIXL_ASSERT(shift >= 0); 1660 SimVRegister temp1, temp2; 1661 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1662 LogicVRegister extendedreg = sxtl2(vform, temp2, src); 1663 return sshl(vform, dst, extendedreg, shiftreg); 1664} 1665 1666 1667LogicVRegister Simulator::shll(VectorFormat vform, 1668 LogicVRegister dst, 1669 const LogicVRegister& src) { 1670 int shift = LaneSizeInBitsFromFormat(vform) / 2; 1671 return sshll(vform, dst, src, shift); 1672} 1673 1674 1675LogicVRegister Simulator::shll2(VectorFormat vform, 1676 LogicVRegister dst, 1677 const LogicVRegister& src) { 1678 int shift = LaneSizeInBitsFromFormat(vform) / 2; 1679 return sshll2(vform, dst, src, shift); 1680} 1681 1682 1683LogicVRegister Simulator::ushll(VectorFormat vform, 1684 LogicVRegister dst, 1685 const LogicVRegister& src, 1686 int shift) { 1687 VIXL_ASSERT(shift >= 0); 1688 SimVRegister temp1, temp2; 1689 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1690 LogicVRegister extendedreg = uxtl(vform, temp2, src); 1691 return ushl(vform, dst, extendedreg, shiftreg); 1692} 1693 1694 1695LogicVRegister Simulator::ushll2(VectorFormat vform, 1696 LogicVRegister dst, 1697 const LogicVRegister& src, 1698 int shift) { 1699 VIXL_ASSERT(shift >= 0); 1700 SimVRegister temp1, temp2; 1701 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1702 LogicVRegister extendedreg = uxtl2(vform, temp2, src); 1703 return ushl(vform, dst, extendedreg, shiftreg); 1704} 1705 1706 1707LogicVRegister Simulator::sli(VectorFormat vform, 1708 LogicVRegister dst, 1709 const LogicVRegister& src, 1710 int shift) { 1711 dst.ClearForWrite(vform); 1712 int laneCount = LaneCountFromFormat(vform); 1713 for (int i = 0; i < laneCount; i++) { 1714 uint64_t src_lane = src.Uint(vform, i); 1715 uint64_t dst_lane = dst.Uint(vform, i); 1716 uint64_t shifted = src_lane << shift; 1717 uint64_t mask = MaxUintFromFormat(vform) << shift; 1718 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); 1719 } 1720 return dst; 1721} 1722 1723 1724LogicVRegister Simulator::sqshl(VectorFormat vform, 1725 LogicVRegister dst, 1726 const LogicVRegister& src, 1727 int shift) { 1728 VIXL_ASSERT(shift >= 0); 1729 SimVRegister temp; 1730 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1731 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform); 1732} 1733 1734 1735LogicVRegister Simulator::uqshl(VectorFormat vform, 1736 LogicVRegister dst, 1737 const LogicVRegister& src, 1738 int shift) { 1739 VIXL_ASSERT(shift >= 0); 1740 SimVRegister temp; 1741 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1742 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform); 1743} 1744 1745 1746LogicVRegister Simulator::sqshlu(VectorFormat vform, 1747 LogicVRegister dst, 1748 const LogicVRegister& src, 1749 int shift) { 1750 VIXL_ASSERT(shift >= 0); 1751 SimVRegister temp; 1752 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1753 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform); 1754} 1755 1756 1757LogicVRegister Simulator::sri(VectorFormat vform, 1758 LogicVRegister dst, 1759 const LogicVRegister& src, 1760 int shift) { 1761 dst.ClearForWrite(vform); 1762 int laneCount = LaneCountFromFormat(vform); 1763 VIXL_ASSERT((shift > 0) && 1764 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform)))); 1765 for (int i = 0; i < laneCount; i++) { 1766 uint64_t src_lane = src.Uint(vform, i); 1767 uint64_t dst_lane = dst.Uint(vform, i); 1768 uint64_t shifted; 1769 uint64_t mask; 1770 if (shift == 64) { 1771 shifted = 0; 1772 mask = 0; 1773 } else { 1774 shifted = src_lane >> shift; 1775 mask = MaxUintFromFormat(vform) >> shift; 1776 } 1777 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); 1778 } 1779 return dst; 1780} 1781 1782 1783LogicVRegister Simulator::ushr(VectorFormat vform, 1784 LogicVRegister dst, 1785 const LogicVRegister& src, 1786 int shift) { 1787 VIXL_ASSERT(shift >= 0); 1788 SimVRegister temp; 1789 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); 1790 return ushl(vform, dst, src, shiftreg); 1791} 1792 1793 1794LogicVRegister Simulator::sshr(VectorFormat vform, 1795 LogicVRegister dst, 1796 const LogicVRegister& src, 1797 int shift) { 1798 VIXL_ASSERT(shift >= 0); 1799 SimVRegister temp; 1800 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); 1801 return sshl(vform, dst, src, shiftreg); 1802} 1803 1804 1805LogicVRegister Simulator::ssra(VectorFormat vform, 1806 LogicVRegister dst, 1807 const LogicVRegister& src, 1808 int shift) { 1809 SimVRegister temp; 1810 LogicVRegister shifted_reg = sshr(vform, temp, src, shift); 1811 return add(vform, dst, dst, shifted_reg); 1812} 1813 1814 1815LogicVRegister Simulator::usra(VectorFormat vform, 1816 LogicVRegister dst, 1817 const LogicVRegister& src, 1818 int shift) { 1819 SimVRegister temp; 1820 LogicVRegister shifted_reg = ushr(vform, temp, src, shift); 1821 return add(vform, dst, dst, shifted_reg); 1822} 1823 1824 1825LogicVRegister Simulator::srsra(VectorFormat vform, 1826 LogicVRegister dst, 1827 const LogicVRegister& src, 1828 int shift) { 1829 SimVRegister temp; 1830 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform); 1831 return add(vform, dst, dst, shifted_reg); 1832} 1833 1834 1835LogicVRegister Simulator::ursra(VectorFormat vform, 1836 LogicVRegister dst, 1837 const LogicVRegister& src, 1838 int shift) { 1839 SimVRegister temp; 1840 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform); 1841 return add(vform, dst, dst, shifted_reg); 1842} 1843 1844 1845LogicVRegister Simulator::cls(VectorFormat vform, 1846 LogicVRegister dst, 1847 const LogicVRegister& src) { 1848 uint64_t result[16]; 1849 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 1850 int laneCount = LaneCountFromFormat(vform); 1851 for (int i = 0; i < laneCount; i++) { 1852 result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits); 1853 } 1854 1855 dst.ClearForWrite(vform); 1856 for (int i = 0; i < laneCount; ++i) { 1857 dst.SetUint(vform, i, result[i]); 1858 } 1859 return dst; 1860} 1861 1862 1863LogicVRegister Simulator::clz(VectorFormat vform, 1864 LogicVRegister dst, 1865 const LogicVRegister& src) { 1866 uint64_t result[16]; 1867 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 1868 int laneCount = LaneCountFromFormat(vform); 1869 for (int i = 0; i < laneCount; i++) { 1870 result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits); 1871 } 1872 1873 dst.ClearForWrite(vform); 1874 for (int i = 0; i < laneCount; ++i) { 1875 dst.SetUint(vform, i, result[i]); 1876 } 1877 return dst; 1878} 1879 1880 1881LogicVRegister Simulator::cnt(VectorFormat vform, 1882 LogicVRegister dst, 1883 const LogicVRegister& src) { 1884 uint64_t result[16]; 1885 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 1886 int laneCount = LaneCountFromFormat(vform); 1887 for (int i = 0; i < laneCount; i++) { 1888 uint64_t value = src.Uint(vform, i); 1889 result[i] = 0; 1890 for (int j = 0; j < laneSizeInBits; j++) { 1891 result[i] += (value & 1); 1892 value >>= 1; 1893 } 1894 } 1895 1896 dst.ClearForWrite(vform); 1897 for (int i = 0; i < laneCount; ++i) { 1898 dst.SetUint(vform, i, result[i]); 1899 } 1900 return dst; 1901} 1902 1903 1904LogicVRegister Simulator::sshl(VectorFormat vform, 1905 LogicVRegister dst, 1906 const LogicVRegister& src1, 1907 const LogicVRegister& src2) { 1908 dst.ClearForWrite(vform); 1909 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1910 int8_t shift_val = src2.Int(vform, i); 1911 int64_t lj_src_val = src1.IntLeftJustified(vform, i); 1912 1913 // Set signed saturation state. 1914 if ((shift_val > CountLeadingSignBits(lj_src_val)) && (lj_src_val != 0)) { 1915 dst.SetSignedSat(i, lj_src_val >= 0); 1916 } 1917 1918 // Set unsigned saturation state. 1919 if (lj_src_val < 0) { 1920 dst.SetUnsignedSat(i, false); 1921 } else if ((shift_val > CountLeadingZeros(lj_src_val)) && 1922 (lj_src_val != 0)) { 1923 dst.SetUnsignedSat(i, true); 1924 } 1925 1926 int64_t src_val = src1.Int(vform, i); 1927 if (shift_val > 63) { 1928 dst.SetInt(vform, i, 0); 1929 } else if (shift_val < -63) { 1930 dst.SetRounding(i, src_val < 0); 1931 dst.SetInt(vform, i, (src_val < 0) ? -1 : 0); 1932 } else { 1933 if (shift_val < 0) { 1934 // Set rounding state. Rounding only needed on right shifts. 1935 if (((src_val >> (-shift_val - 1)) & 1) == 1) { 1936 dst.SetRounding(i, true); 1937 } 1938 src_val >>= -shift_val; 1939 } else { 1940 src_val <<= shift_val; 1941 } 1942 dst.SetInt(vform, i, src_val); 1943 } 1944 } 1945 return dst; 1946} 1947 1948 1949LogicVRegister Simulator::ushl(VectorFormat vform, 1950 LogicVRegister dst, 1951 const LogicVRegister& src1, 1952 const LogicVRegister& src2) { 1953 dst.ClearForWrite(vform); 1954 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1955 int8_t shift_val = src2.Int(vform, i); 1956 uint64_t lj_src_val = src1.UintLeftJustified(vform, i); 1957 1958 // Set saturation state. 1959 if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) { 1960 dst.SetUnsignedSat(i, true); 1961 } 1962 1963 uint64_t src_val = src1.Uint(vform, i); 1964 if ((shift_val > 63) || (shift_val < -64)) { 1965 dst.SetUint(vform, i, 0); 1966 } else { 1967 if (shift_val < 0) { 1968 // Set rounding state. Rounding only needed on right shifts. 1969 if (((src_val >> (-shift_val - 1)) & 1) == 1) { 1970 dst.SetRounding(i, true); 1971 } 1972 1973 if (shift_val == -64) { 1974 src_val = 0; 1975 } else { 1976 src_val >>= -shift_val; 1977 } 1978 } else { 1979 src_val <<= shift_val; 1980 } 1981 dst.SetUint(vform, i, src_val); 1982 } 1983 } 1984 return dst; 1985} 1986 1987 1988LogicVRegister Simulator::neg(VectorFormat vform, 1989 LogicVRegister dst, 1990 const LogicVRegister& src) { 1991 dst.ClearForWrite(vform); 1992 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1993 // Test for signed saturation. 1994 int64_t sa = src.Int(vform, i); 1995 if (sa == MinIntFromFormat(vform)) { 1996 dst.SetSignedSat(i, true); 1997 } 1998 dst.SetInt(vform, i, -sa); 1999 } 2000 return dst; 2001} 2002 2003 2004LogicVRegister Simulator::suqadd(VectorFormat vform, 2005 LogicVRegister dst, 2006 const LogicVRegister& src) { 2007 dst.ClearForWrite(vform); 2008 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2009 int64_t sa = dst.IntLeftJustified(vform, i); 2010 uint64_t ub = src.UintLeftJustified(vform, i); 2011 int64_t sr = sa + ub; 2012 2013 if (sr < sa) { // Test for signed positive saturation. 2014 dst.SetInt(vform, i, MaxIntFromFormat(vform)); 2015 } else { 2016 dst.SetInt(vform, i, dst.Int(vform, i) + src.Int(vform, i)); 2017 } 2018 } 2019 return dst; 2020} 2021 2022 2023LogicVRegister Simulator::usqadd(VectorFormat vform, 2024 LogicVRegister dst, 2025 const LogicVRegister& src) { 2026 dst.ClearForWrite(vform); 2027 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2028 uint64_t ua = dst.UintLeftJustified(vform, i); 2029 int64_t sb = src.IntLeftJustified(vform, i); 2030 uint64_t ur = ua + sb; 2031 2032 if ((sb > 0) && (ur <= ua)) { 2033 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation. 2034 } else if ((sb < 0) && (ur >= ua)) { 2035 dst.SetUint(vform, i, 0); // Negative saturation. 2036 } else { 2037 dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i)); 2038 } 2039 } 2040 return dst; 2041} 2042 2043 2044LogicVRegister Simulator::abs(VectorFormat vform, 2045 LogicVRegister dst, 2046 const LogicVRegister& src) { 2047 dst.ClearForWrite(vform); 2048 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2049 // Test for signed saturation. 2050 int64_t sa = src.Int(vform, i); 2051 if (sa == MinIntFromFormat(vform)) { 2052 dst.SetSignedSat(i, true); 2053 } 2054 if (sa < 0) { 2055 dst.SetInt(vform, i, -sa); 2056 } else { 2057 dst.SetInt(vform, i, sa); 2058 } 2059 } 2060 return dst; 2061} 2062 2063 2064LogicVRegister Simulator::extractnarrow(VectorFormat dstform, 2065 LogicVRegister dst, 2066 bool dstIsSigned, 2067 const LogicVRegister& src, 2068 bool srcIsSigned) { 2069 bool upperhalf = false; 2070 VectorFormat srcform = kFormatUndefined; 2071 int64_t ssrc[8]; 2072 uint64_t usrc[8]; 2073 2074 switch (dstform) { 2075 case kFormat8B: 2076 upperhalf = false; 2077 srcform = kFormat8H; 2078 break; 2079 case kFormat16B: 2080 upperhalf = true; 2081 srcform = kFormat8H; 2082 break; 2083 case kFormat4H: 2084 upperhalf = false; 2085 srcform = kFormat4S; 2086 break; 2087 case kFormat8H: 2088 upperhalf = true; 2089 srcform = kFormat4S; 2090 break; 2091 case kFormat2S: 2092 upperhalf = false; 2093 srcform = kFormat2D; 2094 break; 2095 case kFormat4S: 2096 upperhalf = true; 2097 srcform = kFormat2D; 2098 break; 2099 case kFormatB: 2100 upperhalf = false; 2101 srcform = kFormatH; 2102 break; 2103 case kFormatH: 2104 upperhalf = false; 2105 srcform = kFormatS; 2106 break; 2107 case kFormatS: 2108 upperhalf = false; 2109 srcform = kFormatD; 2110 break; 2111 default: 2112 VIXL_UNIMPLEMENTED(); 2113 } 2114 2115 for (int i = 0; i < LaneCountFromFormat(srcform); i++) { 2116 ssrc[i] = src.Int(srcform, i); 2117 usrc[i] = src.Uint(srcform, i); 2118 } 2119 2120 int offset; 2121 if (upperhalf) { 2122 offset = LaneCountFromFormat(dstform) / 2; 2123 } else { 2124 offset = 0; 2125 dst.ClearForWrite(dstform); 2126 } 2127 2128 for (int i = 0; i < LaneCountFromFormat(srcform); i++) { 2129 // Test for signed saturation 2130 if (ssrc[i] > MaxIntFromFormat(dstform)) { 2131 dst.SetSignedSat(offset + i, true); 2132 } else if (ssrc[i] < MinIntFromFormat(dstform)) { 2133 dst.SetSignedSat(offset + i, false); 2134 } 2135 2136 // Test for unsigned saturation 2137 if (srcIsSigned) { 2138 if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) { 2139 dst.SetUnsignedSat(offset + i, true); 2140 } else if (ssrc[i] < 0) { 2141 dst.SetUnsignedSat(offset + i, false); 2142 } 2143 } else { 2144 if (usrc[i] > MaxUintFromFormat(dstform)) { 2145 dst.SetUnsignedSat(offset + i, true); 2146 } 2147 } 2148 2149 int64_t result; 2150 if (srcIsSigned) { 2151 result = ssrc[i] & MaxUintFromFormat(dstform); 2152 } else { 2153 result = usrc[i] & MaxUintFromFormat(dstform); 2154 } 2155 2156 if (dstIsSigned) { 2157 dst.SetInt(dstform, offset + i, result); 2158 } else { 2159 dst.SetUint(dstform, offset + i, result); 2160 } 2161 } 2162 return dst; 2163} 2164 2165 2166LogicVRegister Simulator::xtn(VectorFormat vform, 2167 LogicVRegister dst, 2168 const LogicVRegister& src) { 2169 return extractnarrow(vform, dst, true, src, true); 2170} 2171 2172 2173LogicVRegister Simulator::sqxtn(VectorFormat vform, 2174 LogicVRegister dst, 2175 const LogicVRegister& src) { 2176 return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform); 2177} 2178 2179 2180LogicVRegister Simulator::sqxtun(VectorFormat vform, 2181 LogicVRegister dst, 2182 const LogicVRegister& src) { 2183 return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform); 2184} 2185 2186 2187LogicVRegister Simulator::uqxtn(VectorFormat vform, 2188 LogicVRegister dst, 2189 const LogicVRegister& src) { 2190 return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform); 2191} 2192 2193 2194LogicVRegister Simulator::absdiff(VectorFormat vform, 2195 LogicVRegister dst, 2196 const LogicVRegister& src1, 2197 const LogicVRegister& src2, 2198 bool issigned) { 2199 dst.ClearForWrite(vform); 2200 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2201 if (issigned) { 2202 int64_t sr = src1.Int(vform, i) - src2.Int(vform, i); 2203 sr = sr > 0 ? sr : -sr; 2204 dst.SetInt(vform, i, sr); 2205 } else { 2206 int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i); 2207 sr = sr > 0 ? sr : -sr; 2208 dst.SetUint(vform, i, sr); 2209 } 2210 } 2211 return dst; 2212} 2213 2214 2215LogicVRegister Simulator::saba(VectorFormat vform, 2216 LogicVRegister dst, 2217 const LogicVRegister& src1, 2218 const LogicVRegister& src2) { 2219 SimVRegister temp; 2220 dst.ClearForWrite(vform); 2221 absdiff(vform, temp, src1, src2, true); 2222 add(vform, dst, dst, temp); 2223 return dst; 2224} 2225 2226 2227LogicVRegister Simulator::uaba(VectorFormat vform, 2228 LogicVRegister dst, 2229 const LogicVRegister& src1, 2230 const LogicVRegister& src2) { 2231 SimVRegister temp; 2232 dst.ClearForWrite(vform); 2233 absdiff(vform, temp, src1, src2, false); 2234 add(vform, dst, dst, temp); 2235 return dst; 2236} 2237 2238 2239LogicVRegister Simulator::not_(VectorFormat vform, 2240 LogicVRegister dst, 2241 const LogicVRegister& src) { 2242 dst.ClearForWrite(vform); 2243 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2244 dst.SetUint(vform, i, ~src.Uint(vform, i)); 2245 } 2246 return dst; 2247} 2248 2249 2250LogicVRegister Simulator::rbit(VectorFormat vform, 2251 LogicVRegister dst, 2252 const LogicVRegister& src) { 2253 uint64_t result[16]; 2254 int laneCount = LaneCountFromFormat(vform); 2255 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 2256 uint64_t reversed_value; 2257 uint64_t value; 2258 for (int i = 0; i < laneCount; i++) { 2259 value = src.Uint(vform, i); 2260 reversed_value = 0; 2261 for (int j = 0; j < laneSizeInBits; j++) { 2262 reversed_value = (reversed_value << 1) | (value & 1); 2263 value >>= 1; 2264 } 2265 result[i] = reversed_value; 2266 } 2267 2268 dst.ClearForWrite(vform); 2269 for (int i = 0; i < laneCount; ++i) { 2270 dst.SetUint(vform, i, result[i]); 2271 } 2272 return dst; 2273} 2274 2275 2276LogicVRegister Simulator::rev(VectorFormat vform, 2277 LogicVRegister dst, 2278 const LogicVRegister& src, 2279 int revSize) { 2280 uint64_t result[16]; 2281 int laneCount = LaneCountFromFormat(vform); 2282 int laneSize = LaneSizeInBytesFromFormat(vform); 2283 int lanesPerLoop = revSize / laneSize; 2284 for (int i = 0; i < laneCount; i += lanesPerLoop) { 2285 for (int j = 0; j < lanesPerLoop; j++) { 2286 result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j); 2287 } 2288 } 2289 dst.ClearForWrite(vform); 2290 for (int i = 0; i < laneCount; ++i) { 2291 dst.SetUint(vform, i, result[i]); 2292 } 2293 return dst; 2294} 2295 2296 2297LogicVRegister Simulator::rev16(VectorFormat vform, 2298 LogicVRegister dst, 2299 const LogicVRegister& src) { 2300 return rev(vform, dst, src, 2); 2301} 2302 2303 2304LogicVRegister Simulator::rev32(VectorFormat vform, 2305 LogicVRegister dst, 2306 const LogicVRegister& src) { 2307 return rev(vform, dst, src, 4); 2308} 2309 2310 2311LogicVRegister Simulator::rev64(VectorFormat vform, 2312 LogicVRegister dst, 2313 const LogicVRegister& src) { 2314 return rev(vform, dst, src, 8); 2315} 2316 2317 2318LogicVRegister Simulator::addlp(VectorFormat vform, 2319 LogicVRegister dst, 2320 const LogicVRegister& src, 2321 bool is_signed, 2322 bool do_accumulate) { 2323 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform); 2324 2325 int64_t sr[16]; 2326 uint64_t ur[16]; 2327 2328 int laneCount = LaneCountFromFormat(vform); 2329 for (int i = 0; i < laneCount; ++i) { 2330 if (is_signed) { 2331 sr[i] = src.Int(vformsrc, 2 * i) + src.Int(vformsrc, 2 * i + 1); 2332 } else { 2333 ur[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1); 2334 } 2335 } 2336 2337 dst.ClearForWrite(vform); 2338 for (int i = 0; i < laneCount; ++i) { 2339 if (do_accumulate) { 2340 if (is_signed) { 2341 dst.SetInt(vform, i, dst.Int(vform, i) + sr[i]); 2342 } else { 2343 dst.SetUint(vform, i, dst.Uint(vform, i) + ur[i]); 2344 } 2345 } else { 2346 if (is_signed) { 2347 dst.SetInt(vform, i, sr[i]); 2348 } else { 2349 dst.SetUint(vform, i, ur[i]); 2350 } 2351 } 2352 } 2353 2354 return dst; 2355} 2356 2357 2358LogicVRegister Simulator::saddlp(VectorFormat vform, 2359 LogicVRegister dst, 2360 const LogicVRegister& src) { 2361 return addlp(vform, dst, src, true, false); 2362} 2363 2364 2365LogicVRegister Simulator::uaddlp(VectorFormat vform, 2366 LogicVRegister dst, 2367 const LogicVRegister& src) { 2368 return addlp(vform, dst, src, false, false); 2369} 2370 2371 2372LogicVRegister Simulator::sadalp(VectorFormat vform, 2373 LogicVRegister dst, 2374 const LogicVRegister& src) { 2375 return addlp(vform, dst, src, true, true); 2376} 2377 2378 2379LogicVRegister Simulator::uadalp(VectorFormat vform, 2380 LogicVRegister dst, 2381 const LogicVRegister& src) { 2382 return addlp(vform, dst, src, false, true); 2383} 2384 2385 2386LogicVRegister Simulator::ext(VectorFormat vform, 2387 LogicVRegister dst, 2388 const LogicVRegister& src1, 2389 const LogicVRegister& src2, 2390 int index) { 2391 uint8_t result[16]; 2392 int laneCount = LaneCountFromFormat(vform); 2393 for (int i = 0; i < laneCount - index; ++i) { 2394 result[i] = src1.Uint(vform, i + index); 2395 } 2396 for (int i = 0; i < index; ++i) { 2397 result[laneCount - index + i] = src2.Uint(vform, i); 2398 } 2399 dst.ClearForWrite(vform); 2400 for (int i = 0; i < laneCount; ++i) { 2401 dst.SetUint(vform, i, result[i]); 2402 } 2403 return dst; 2404} 2405 2406 2407LogicVRegister Simulator::dup_element(VectorFormat vform, 2408 LogicVRegister dst, 2409 const LogicVRegister& src, 2410 int src_index) { 2411 int laneCount = LaneCountFromFormat(vform); 2412 uint64_t value = src.Uint(vform, src_index); 2413 dst.ClearForWrite(vform); 2414 for (int i = 0; i < laneCount; ++i) { 2415 dst.SetUint(vform, i, value); 2416 } 2417 return dst; 2418} 2419 2420 2421LogicVRegister Simulator::dup_immediate(VectorFormat vform, 2422 LogicVRegister dst, 2423 uint64_t imm) { 2424 int laneCount = LaneCountFromFormat(vform); 2425 uint64_t value = imm & MaxUintFromFormat(vform); 2426 dst.ClearForWrite(vform); 2427 for (int i = 0; i < laneCount; ++i) { 2428 dst.SetUint(vform, i, value); 2429 } 2430 return dst; 2431} 2432 2433 2434LogicVRegister Simulator::ins_element(VectorFormat vform, 2435 LogicVRegister dst, 2436 int dst_index, 2437 const LogicVRegister& src, 2438 int src_index) { 2439 dst.SetUint(vform, dst_index, src.Uint(vform, src_index)); 2440 return dst; 2441} 2442 2443 2444LogicVRegister Simulator::ins_immediate(VectorFormat vform, 2445 LogicVRegister dst, 2446 int dst_index, 2447 uint64_t imm) { 2448 uint64_t value = imm & MaxUintFromFormat(vform); 2449 dst.SetUint(vform, dst_index, value); 2450 return dst; 2451} 2452 2453 2454LogicVRegister Simulator::movi(VectorFormat vform, 2455 LogicVRegister dst, 2456 uint64_t imm) { 2457 int laneCount = LaneCountFromFormat(vform); 2458 dst.ClearForWrite(vform); 2459 for (int i = 0; i < laneCount; ++i) { 2460 dst.SetUint(vform, i, imm); 2461 } 2462 return dst; 2463} 2464 2465 2466LogicVRegister Simulator::mvni(VectorFormat vform, 2467 LogicVRegister dst, 2468 uint64_t imm) { 2469 int laneCount = LaneCountFromFormat(vform); 2470 dst.ClearForWrite(vform); 2471 for (int i = 0; i < laneCount; ++i) { 2472 dst.SetUint(vform, i, ~imm); 2473 } 2474 return dst; 2475} 2476 2477 2478LogicVRegister Simulator::orr(VectorFormat vform, 2479 LogicVRegister dst, 2480 const LogicVRegister& src, 2481 uint64_t imm) { 2482 uint64_t result[16]; 2483 int laneCount = LaneCountFromFormat(vform); 2484 for (int i = 0; i < laneCount; ++i) { 2485 result[i] = src.Uint(vform, i) | imm; 2486 } 2487 dst.ClearForWrite(vform); 2488 for (int i = 0; i < laneCount; ++i) { 2489 dst.SetUint(vform, i, result[i]); 2490 } 2491 return dst; 2492} 2493 2494 2495LogicVRegister Simulator::uxtl(VectorFormat vform, 2496 LogicVRegister dst, 2497 const LogicVRegister& src) { 2498 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2499 2500 dst.ClearForWrite(vform); 2501 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2502 dst.SetUint(vform, i, src.Uint(vform_half, i)); 2503 } 2504 return dst; 2505} 2506 2507 2508LogicVRegister Simulator::sxtl(VectorFormat vform, 2509 LogicVRegister dst, 2510 const LogicVRegister& src) { 2511 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2512 2513 dst.ClearForWrite(vform); 2514 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2515 dst.SetInt(vform, i, src.Int(vform_half, i)); 2516 } 2517 return dst; 2518} 2519 2520 2521LogicVRegister Simulator::uxtl2(VectorFormat vform, 2522 LogicVRegister dst, 2523 const LogicVRegister& src) { 2524 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2525 int lane_count = LaneCountFromFormat(vform); 2526 2527 dst.ClearForWrite(vform); 2528 for (int i = 0; i < lane_count; i++) { 2529 dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i)); 2530 } 2531 return dst; 2532} 2533 2534 2535LogicVRegister Simulator::sxtl2(VectorFormat vform, 2536 LogicVRegister dst, 2537 const LogicVRegister& src) { 2538 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2539 int lane_count = LaneCountFromFormat(vform); 2540 2541 dst.ClearForWrite(vform); 2542 for (int i = 0; i < lane_count; i++) { 2543 dst.SetInt(vform, i, src.Int(vform_half, lane_count + i)); 2544 } 2545 return dst; 2546} 2547 2548 2549LogicVRegister Simulator::shrn(VectorFormat vform, 2550 LogicVRegister dst, 2551 const LogicVRegister& src, 2552 int shift) { 2553 SimVRegister temp; 2554 VectorFormat vform_src = VectorFormatDoubleWidth(vform); 2555 VectorFormat vform_dst = vform; 2556 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift); 2557 return extractnarrow(vform_dst, dst, false, shifted_src, false); 2558} 2559 2560 2561LogicVRegister Simulator::shrn2(VectorFormat vform, 2562 LogicVRegister dst, 2563 const LogicVRegister& src, 2564 int shift) { 2565 SimVRegister temp; 2566 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2567 VectorFormat vformdst = vform; 2568 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift); 2569 return extractnarrow(vformdst, dst, false, shifted_src, false); 2570} 2571 2572 2573LogicVRegister Simulator::rshrn(VectorFormat vform, 2574 LogicVRegister dst, 2575 const LogicVRegister& src, 2576 int shift) { 2577 SimVRegister temp; 2578 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2579 VectorFormat vformdst = vform; 2580 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); 2581 return extractnarrow(vformdst, dst, false, shifted_src, false); 2582} 2583 2584 2585LogicVRegister Simulator::rshrn2(VectorFormat vform, 2586 LogicVRegister dst, 2587 const LogicVRegister& src, 2588 int shift) { 2589 SimVRegister temp; 2590 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2591 VectorFormat vformdst = vform; 2592 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); 2593 return extractnarrow(vformdst, dst, false, shifted_src, false); 2594} 2595 2596 2597LogicVRegister Simulator::Table(VectorFormat vform, 2598 LogicVRegister dst, 2599 const LogicVRegister& ind, 2600 bool zero_out_of_bounds, 2601 const LogicVRegister* tab1, 2602 const LogicVRegister* tab2, 2603 const LogicVRegister* tab3, 2604 const LogicVRegister* tab4) { 2605 VIXL_ASSERT(tab1 != NULL); 2606 const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4}; 2607 uint64_t result[kMaxLanesPerVector]; 2608 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2609 result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i); 2610 } 2611 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2612 uint64_t j = ind.Uint(vform, i); 2613 int tab_idx = static_cast<int>(j >> 4); 2614 int j_idx = static_cast<int>(j & 15); 2615 if ((tab_idx < 4) && (tab[tab_idx] != NULL)) { 2616 result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx); 2617 } 2618 } 2619 dst.SetUintArray(vform, result); 2620 return dst; 2621} 2622 2623 2624LogicVRegister Simulator::tbl(VectorFormat vform, 2625 LogicVRegister dst, 2626 const LogicVRegister& tab, 2627 const LogicVRegister& ind) { 2628 return Table(vform, dst, ind, true, &tab); 2629} 2630 2631 2632LogicVRegister Simulator::tbl(VectorFormat vform, 2633 LogicVRegister dst, 2634 const LogicVRegister& tab, 2635 const LogicVRegister& tab2, 2636 const LogicVRegister& ind) { 2637 return Table(vform, dst, ind, true, &tab, &tab2); 2638} 2639 2640 2641LogicVRegister Simulator::tbl(VectorFormat vform, 2642 LogicVRegister dst, 2643 const LogicVRegister& tab, 2644 const LogicVRegister& tab2, 2645 const LogicVRegister& tab3, 2646 const LogicVRegister& ind) { 2647 return Table(vform, dst, ind, true, &tab, &tab2, &tab3); 2648} 2649 2650 2651LogicVRegister Simulator::tbl(VectorFormat vform, 2652 LogicVRegister dst, 2653 const LogicVRegister& tab, 2654 const LogicVRegister& tab2, 2655 const LogicVRegister& tab3, 2656 const LogicVRegister& tab4, 2657 const LogicVRegister& ind) { 2658 return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4); 2659} 2660 2661 2662LogicVRegister Simulator::tbx(VectorFormat vform, 2663 LogicVRegister dst, 2664 const LogicVRegister& tab, 2665 const LogicVRegister& ind) { 2666 return Table(vform, dst, ind, false, &tab); 2667} 2668 2669 2670LogicVRegister Simulator::tbx(VectorFormat vform, 2671 LogicVRegister dst, 2672 const LogicVRegister& tab, 2673 const LogicVRegister& tab2, 2674 const LogicVRegister& ind) { 2675 return Table(vform, dst, ind, false, &tab, &tab2); 2676} 2677 2678 2679LogicVRegister Simulator::tbx(VectorFormat vform, 2680 LogicVRegister dst, 2681 const LogicVRegister& tab, 2682 const LogicVRegister& tab2, 2683 const LogicVRegister& tab3, 2684 const LogicVRegister& ind) { 2685 return Table(vform, dst, ind, false, &tab, &tab2, &tab3); 2686} 2687 2688 2689LogicVRegister Simulator::tbx(VectorFormat vform, 2690 LogicVRegister dst, 2691 const LogicVRegister& tab, 2692 const LogicVRegister& tab2, 2693 const LogicVRegister& tab3, 2694 const LogicVRegister& tab4, 2695 const LogicVRegister& ind) { 2696 return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4); 2697} 2698 2699 2700LogicVRegister Simulator::uqshrn(VectorFormat vform, 2701 LogicVRegister dst, 2702 const LogicVRegister& src, 2703 int shift) { 2704 return shrn(vform, dst, src, shift).UnsignedSaturate(vform); 2705} 2706 2707 2708LogicVRegister Simulator::uqshrn2(VectorFormat vform, 2709 LogicVRegister dst, 2710 const LogicVRegister& src, 2711 int shift) { 2712 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform); 2713} 2714 2715 2716LogicVRegister Simulator::uqrshrn(VectorFormat vform, 2717 LogicVRegister dst, 2718 const LogicVRegister& src, 2719 int shift) { 2720 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform); 2721} 2722 2723 2724LogicVRegister Simulator::uqrshrn2(VectorFormat vform, 2725 LogicVRegister dst, 2726 const LogicVRegister& src, 2727 int shift) { 2728 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform); 2729} 2730 2731 2732LogicVRegister Simulator::sqshrn(VectorFormat vform, 2733 LogicVRegister dst, 2734 const LogicVRegister& src, 2735 int shift) { 2736 SimVRegister temp; 2737 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2738 VectorFormat vformdst = vform; 2739 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2740 return sqxtn(vformdst, dst, shifted_src); 2741} 2742 2743 2744LogicVRegister Simulator::sqshrn2(VectorFormat vform, 2745 LogicVRegister dst, 2746 const LogicVRegister& src, 2747 int shift) { 2748 SimVRegister temp; 2749 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2750 VectorFormat vformdst = vform; 2751 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2752 return sqxtn(vformdst, dst, shifted_src); 2753} 2754 2755 2756LogicVRegister Simulator::sqrshrn(VectorFormat vform, 2757 LogicVRegister dst, 2758 const LogicVRegister& src, 2759 int shift) { 2760 SimVRegister temp; 2761 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2762 VectorFormat vformdst = vform; 2763 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2764 return sqxtn(vformdst, dst, shifted_src); 2765} 2766 2767 2768LogicVRegister Simulator::sqrshrn2(VectorFormat vform, 2769 LogicVRegister dst, 2770 const LogicVRegister& src, 2771 int shift) { 2772 SimVRegister temp; 2773 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2774 VectorFormat vformdst = vform; 2775 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2776 return sqxtn(vformdst, dst, shifted_src); 2777} 2778 2779 2780LogicVRegister Simulator::sqshrun(VectorFormat vform, 2781 LogicVRegister dst, 2782 const LogicVRegister& src, 2783 int shift) { 2784 SimVRegister temp; 2785 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2786 VectorFormat vformdst = vform; 2787 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2788 return sqxtun(vformdst, dst, shifted_src); 2789} 2790 2791 2792LogicVRegister Simulator::sqshrun2(VectorFormat vform, 2793 LogicVRegister dst, 2794 const LogicVRegister& src, 2795 int shift) { 2796 SimVRegister temp; 2797 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2798 VectorFormat vformdst = vform; 2799 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2800 return sqxtun(vformdst, dst, shifted_src); 2801} 2802 2803 2804LogicVRegister Simulator::sqrshrun(VectorFormat vform, 2805 LogicVRegister dst, 2806 const LogicVRegister& src, 2807 int shift) { 2808 SimVRegister temp; 2809 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2810 VectorFormat vformdst = vform; 2811 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2812 return sqxtun(vformdst, dst, shifted_src); 2813} 2814 2815 2816LogicVRegister Simulator::sqrshrun2(VectorFormat vform, 2817 LogicVRegister dst, 2818 const LogicVRegister& src, 2819 int shift) { 2820 SimVRegister temp; 2821 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2822 VectorFormat vformdst = vform; 2823 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2824 return sqxtun(vformdst, dst, shifted_src); 2825} 2826 2827 2828LogicVRegister Simulator::uaddl(VectorFormat vform, 2829 LogicVRegister dst, 2830 const LogicVRegister& src1, 2831 const LogicVRegister& src2) { 2832 SimVRegister temp1, temp2; 2833 uxtl(vform, temp1, src1); 2834 uxtl(vform, temp2, src2); 2835 add(vform, dst, temp1, temp2); 2836 return dst; 2837} 2838 2839 2840LogicVRegister Simulator::uaddl2(VectorFormat vform, 2841 LogicVRegister dst, 2842 const LogicVRegister& src1, 2843 const LogicVRegister& src2) { 2844 SimVRegister temp1, temp2; 2845 uxtl2(vform, temp1, src1); 2846 uxtl2(vform, temp2, src2); 2847 add(vform, dst, temp1, temp2); 2848 return dst; 2849} 2850 2851 2852LogicVRegister Simulator::uaddw(VectorFormat vform, 2853 LogicVRegister dst, 2854 const LogicVRegister& src1, 2855 const LogicVRegister& src2) { 2856 SimVRegister temp; 2857 uxtl(vform, temp, src2); 2858 add(vform, dst, src1, temp); 2859 return dst; 2860} 2861 2862 2863LogicVRegister Simulator::uaddw2(VectorFormat vform, 2864 LogicVRegister dst, 2865 const LogicVRegister& src1, 2866 const LogicVRegister& src2) { 2867 SimVRegister temp; 2868 uxtl2(vform, temp, src2); 2869 add(vform, dst, src1, temp); 2870 return dst; 2871} 2872 2873 2874LogicVRegister Simulator::saddl(VectorFormat vform, 2875 LogicVRegister dst, 2876 const LogicVRegister& src1, 2877 const LogicVRegister& src2) { 2878 SimVRegister temp1, temp2; 2879 sxtl(vform, temp1, src1); 2880 sxtl(vform, temp2, src2); 2881 add(vform, dst, temp1, temp2); 2882 return dst; 2883} 2884 2885 2886LogicVRegister Simulator::saddl2(VectorFormat vform, 2887 LogicVRegister dst, 2888 const LogicVRegister& src1, 2889 const LogicVRegister& src2) { 2890 SimVRegister temp1, temp2; 2891 sxtl2(vform, temp1, src1); 2892 sxtl2(vform, temp2, src2); 2893 add(vform, dst, temp1, temp2); 2894 return dst; 2895} 2896 2897 2898LogicVRegister Simulator::saddw(VectorFormat vform, 2899 LogicVRegister dst, 2900 const LogicVRegister& src1, 2901 const LogicVRegister& src2) { 2902 SimVRegister temp; 2903 sxtl(vform, temp, src2); 2904 add(vform, dst, src1, temp); 2905 return dst; 2906} 2907 2908 2909LogicVRegister Simulator::saddw2(VectorFormat vform, 2910 LogicVRegister dst, 2911 const LogicVRegister& src1, 2912 const LogicVRegister& src2) { 2913 SimVRegister temp; 2914 sxtl2(vform, temp, src2); 2915 add(vform, dst, src1, temp); 2916 return dst; 2917} 2918 2919 2920LogicVRegister Simulator::usubl(VectorFormat vform, 2921 LogicVRegister dst, 2922 const LogicVRegister& src1, 2923 const LogicVRegister& src2) { 2924 SimVRegister temp1, temp2; 2925 uxtl(vform, temp1, src1); 2926 uxtl(vform, temp2, src2); 2927 sub(vform, dst, temp1, temp2); 2928 return dst; 2929} 2930 2931 2932LogicVRegister Simulator::usubl2(VectorFormat vform, 2933 LogicVRegister dst, 2934 const LogicVRegister& src1, 2935 const LogicVRegister& src2) { 2936 SimVRegister temp1, temp2; 2937 uxtl2(vform, temp1, src1); 2938 uxtl2(vform, temp2, src2); 2939 sub(vform, dst, temp1, temp2); 2940 return dst; 2941} 2942 2943 2944LogicVRegister Simulator::usubw(VectorFormat vform, 2945 LogicVRegister dst, 2946 const LogicVRegister& src1, 2947 const LogicVRegister& src2) { 2948 SimVRegister temp; 2949 uxtl(vform, temp, src2); 2950 sub(vform, dst, src1, temp); 2951 return dst; 2952} 2953 2954 2955LogicVRegister Simulator::usubw2(VectorFormat vform, 2956 LogicVRegister dst, 2957 const LogicVRegister& src1, 2958 const LogicVRegister& src2) { 2959 SimVRegister temp; 2960 uxtl2(vform, temp, src2); 2961 sub(vform, dst, src1, temp); 2962 return dst; 2963} 2964 2965 2966LogicVRegister Simulator::ssubl(VectorFormat vform, 2967 LogicVRegister dst, 2968 const LogicVRegister& src1, 2969 const LogicVRegister& src2) { 2970 SimVRegister temp1, temp2; 2971 sxtl(vform, temp1, src1); 2972 sxtl(vform, temp2, src2); 2973 sub(vform, dst, temp1, temp2); 2974 return dst; 2975} 2976 2977 2978LogicVRegister Simulator::ssubl2(VectorFormat vform, 2979 LogicVRegister dst, 2980 const LogicVRegister& src1, 2981 const LogicVRegister& src2) { 2982 SimVRegister temp1, temp2; 2983 sxtl2(vform, temp1, src1); 2984 sxtl2(vform, temp2, src2); 2985 sub(vform, dst, temp1, temp2); 2986 return dst; 2987} 2988 2989 2990LogicVRegister Simulator::ssubw(VectorFormat vform, 2991 LogicVRegister dst, 2992 const LogicVRegister& src1, 2993 const LogicVRegister& src2) { 2994 SimVRegister temp; 2995 sxtl(vform, temp, src2); 2996 sub(vform, dst, src1, temp); 2997 return dst; 2998} 2999 3000 3001LogicVRegister Simulator::ssubw2(VectorFormat vform, 3002 LogicVRegister dst, 3003 const LogicVRegister& src1, 3004 const LogicVRegister& src2) { 3005 SimVRegister temp; 3006 sxtl2(vform, temp, src2); 3007 sub(vform, dst, src1, temp); 3008 return dst; 3009} 3010 3011 3012LogicVRegister Simulator::uabal(VectorFormat vform, 3013 LogicVRegister dst, 3014 const LogicVRegister& src1, 3015 const LogicVRegister& src2) { 3016 SimVRegister temp1, temp2; 3017 uxtl(vform, temp1, src1); 3018 uxtl(vform, temp2, src2); 3019 uaba(vform, dst, temp1, temp2); 3020 return dst; 3021} 3022 3023 3024LogicVRegister Simulator::uabal2(VectorFormat vform, 3025 LogicVRegister dst, 3026 const LogicVRegister& src1, 3027 const LogicVRegister& src2) { 3028 SimVRegister temp1, temp2; 3029 uxtl2(vform, temp1, src1); 3030 uxtl2(vform, temp2, src2); 3031 uaba(vform, dst, temp1, temp2); 3032 return dst; 3033} 3034 3035 3036LogicVRegister Simulator::sabal(VectorFormat vform, 3037 LogicVRegister dst, 3038 const LogicVRegister& src1, 3039 const LogicVRegister& src2) { 3040 SimVRegister temp1, temp2; 3041 sxtl(vform, temp1, src1); 3042 sxtl(vform, temp2, src2); 3043 saba(vform, dst, temp1, temp2); 3044 return dst; 3045} 3046 3047 3048LogicVRegister Simulator::sabal2(VectorFormat vform, 3049 LogicVRegister dst, 3050 const LogicVRegister& src1, 3051 const LogicVRegister& src2) { 3052 SimVRegister temp1, temp2; 3053 sxtl2(vform, temp1, src1); 3054 sxtl2(vform, temp2, src2); 3055 saba(vform, dst, temp1, temp2); 3056 return dst; 3057} 3058 3059 3060LogicVRegister Simulator::uabdl(VectorFormat vform, 3061 LogicVRegister dst, 3062 const LogicVRegister& src1, 3063 const LogicVRegister& src2) { 3064 SimVRegister temp1, temp2; 3065 uxtl(vform, temp1, src1); 3066 uxtl(vform, temp2, src2); 3067 absdiff(vform, dst, temp1, temp2, false); 3068 return dst; 3069} 3070 3071 3072LogicVRegister Simulator::uabdl2(VectorFormat vform, 3073 LogicVRegister dst, 3074 const LogicVRegister& src1, 3075 const LogicVRegister& src2) { 3076 SimVRegister temp1, temp2; 3077 uxtl2(vform, temp1, src1); 3078 uxtl2(vform, temp2, src2); 3079 absdiff(vform, dst, temp1, temp2, false); 3080 return dst; 3081} 3082 3083 3084LogicVRegister Simulator::sabdl(VectorFormat vform, 3085 LogicVRegister dst, 3086 const LogicVRegister& src1, 3087 const LogicVRegister& src2) { 3088 SimVRegister temp1, temp2; 3089 sxtl(vform, temp1, src1); 3090 sxtl(vform, temp2, src2); 3091 absdiff(vform, dst, temp1, temp2, true); 3092 return dst; 3093} 3094 3095 3096LogicVRegister Simulator::sabdl2(VectorFormat vform, 3097 LogicVRegister dst, 3098 const LogicVRegister& src1, 3099 const LogicVRegister& src2) { 3100 SimVRegister temp1, temp2; 3101 sxtl2(vform, temp1, src1); 3102 sxtl2(vform, temp2, src2); 3103 absdiff(vform, dst, temp1, temp2, true); 3104 return dst; 3105} 3106 3107 3108LogicVRegister Simulator::umull(VectorFormat vform, 3109 LogicVRegister dst, 3110 const LogicVRegister& src1, 3111 const LogicVRegister& src2) { 3112 SimVRegister temp1, temp2; 3113 uxtl(vform, temp1, src1); 3114 uxtl(vform, temp2, src2); 3115 mul(vform, dst, temp1, temp2); 3116 return dst; 3117} 3118 3119 3120LogicVRegister Simulator::umull2(VectorFormat vform, 3121 LogicVRegister dst, 3122 const LogicVRegister& src1, 3123 const LogicVRegister& src2) { 3124 SimVRegister temp1, temp2; 3125 uxtl2(vform, temp1, src1); 3126 uxtl2(vform, temp2, src2); 3127 mul(vform, dst, temp1, temp2); 3128 return dst; 3129} 3130 3131 3132LogicVRegister Simulator::smull(VectorFormat vform, 3133 LogicVRegister dst, 3134 const LogicVRegister& src1, 3135 const LogicVRegister& src2) { 3136 SimVRegister temp1, temp2; 3137 sxtl(vform, temp1, src1); 3138 sxtl(vform, temp2, src2); 3139 mul(vform, dst, temp1, temp2); 3140 return dst; 3141} 3142 3143 3144LogicVRegister Simulator::smull2(VectorFormat vform, 3145 LogicVRegister dst, 3146 const LogicVRegister& src1, 3147 const LogicVRegister& src2) { 3148 SimVRegister temp1, temp2; 3149 sxtl2(vform, temp1, src1); 3150 sxtl2(vform, temp2, src2); 3151 mul(vform, dst, temp1, temp2); 3152 return dst; 3153} 3154 3155 3156LogicVRegister Simulator::umlsl(VectorFormat vform, 3157 LogicVRegister dst, 3158 const LogicVRegister& src1, 3159 const LogicVRegister& src2) { 3160 SimVRegister temp1, temp2; 3161 uxtl(vform, temp1, src1); 3162 uxtl(vform, temp2, src2); 3163 mls(vform, dst, temp1, temp2); 3164 return dst; 3165} 3166 3167 3168LogicVRegister Simulator::umlsl2(VectorFormat vform, 3169 LogicVRegister dst, 3170 const LogicVRegister& src1, 3171 const LogicVRegister& src2) { 3172 SimVRegister temp1, temp2; 3173 uxtl2(vform, temp1, src1); 3174 uxtl2(vform, temp2, src2); 3175 mls(vform, dst, temp1, temp2); 3176 return dst; 3177} 3178 3179 3180LogicVRegister Simulator::smlsl(VectorFormat vform, 3181 LogicVRegister dst, 3182 const LogicVRegister& src1, 3183 const LogicVRegister& src2) { 3184 SimVRegister temp1, temp2; 3185 sxtl(vform, temp1, src1); 3186 sxtl(vform, temp2, src2); 3187 mls(vform, dst, temp1, temp2); 3188 return dst; 3189} 3190 3191 3192LogicVRegister Simulator::smlsl2(VectorFormat vform, 3193 LogicVRegister dst, 3194 const LogicVRegister& src1, 3195 const LogicVRegister& src2) { 3196 SimVRegister temp1, temp2; 3197 sxtl2(vform, temp1, src1); 3198 sxtl2(vform, temp2, src2); 3199 mls(vform, dst, temp1, temp2); 3200 return dst; 3201} 3202 3203 3204LogicVRegister Simulator::umlal(VectorFormat vform, 3205 LogicVRegister dst, 3206 const LogicVRegister& src1, 3207 const LogicVRegister& src2) { 3208 SimVRegister temp1, temp2; 3209 uxtl(vform, temp1, src1); 3210 uxtl(vform, temp2, src2); 3211 mla(vform, dst, temp1, temp2); 3212 return dst; 3213} 3214 3215 3216LogicVRegister Simulator::umlal2(VectorFormat vform, 3217 LogicVRegister dst, 3218 const LogicVRegister& src1, 3219 const LogicVRegister& src2) { 3220 SimVRegister temp1, temp2; 3221 uxtl2(vform, temp1, src1); 3222 uxtl2(vform, temp2, src2); 3223 mla(vform, dst, temp1, temp2); 3224 return dst; 3225} 3226 3227 3228LogicVRegister Simulator::smlal(VectorFormat vform, 3229 LogicVRegister dst, 3230 const LogicVRegister& src1, 3231 const LogicVRegister& src2) { 3232 SimVRegister temp1, temp2; 3233 sxtl(vform, temp1, src1); 3234 sxtl(vform, temp2, src2); 3235 mla(vform, dst, temp1, temp2); 3236 return dst; 3237} 3238 3239 3240LogicVRegister Simulator::smlal2(VectorFormat vform, 3241 LogicVRegister dst, 3242 const LogicVRegister& src1, 3243 const LogicVRegister& src2) { 3244 SimVRegister temp1, temp2; 3245 sxtl2(vform, temp1, src1); 3246 sxtl2(vform, temp2, src2); 3247 mla(vform, dst, temp1, temp2); 3248 return dst; 3249} 3250 3251 3252LogicVRegister Simulator::sqdmlal(VectorFormat vform, 3253 LogicVRegister dst, 3254 const LogicVRegister& src1, 3255 const LogicVRegister& src2) { 3256 SimVRegister temp; 3257 LogicVRegister product = sqdmull(vform, temp, src1, src2); 3258 return add(vform, dst, dst, product).SignedSaturate(vform); 3259} 3260 3261 3262LogicVRegister Simulator::sqdmlal2(VectorFormat vform, 3263 LogicVRegister dst, 3264 const LogicVRegister& src1, 3265 const LogicVRegister& src2) { 3266 SimVRegister temp; 3267 LogicVRegister product = sqdmull2(vform, temp, src1, src2); 3268 return add(vform, dst, dst, product).SignedSaturate(vform); 3269} 3270 3271 3272LogicVRegister Simulator::sqdmlsl(VectorFormat vform, 3273 LogicVRegister dst, 3274 const LogicVRegister& src1, 3275 const LogicVRegister& src2) { 3276 SimVRegister temp; 3277 LogicVRegister product = sqdmull(vform, temp, src1, src2); 3278 return sub(vform, dst, dst, product).SignedSaturate(vform); 3279} 3280 3281 3282LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, 3283 LogicVRegister dst, 3284 const LogicVRegister& src1, 3285 const LogicVRegister& src2) { 3286 SimVRegister temp; 3287 LogicVRegister product = sqdmull2(vform, temp, src1, src2); 3288 return sub(vform, dst, dst, product).SignedSaturate(vform); 3289} 3290 3291 3292LogicVRegister Simulator::sqdmull(VectorFormat vform, 3293 LogicVRegister dst, 3294 const LogicVRegister& src1, 3295 const LogicVRegister& src2) { 3296 SimVRegister temp; 3297 LogicVRegister product = smull(vform, temp, src1, src2); 3298 return add(vform, dst, product, product).SignedSaturate(vform); 3299} 3300 3301 3302LogicVRegister Simulator::sqdmull2(VectorFormat vform, 3303 LogicVRegister dst, 3304 const LogicVRegister& src1, 3305 const LogicVRegister& src2) { 3306 SimVRegister temp; 3307 LogicVRegister product = smull2(vform, temp, src1, src2); 3308 return add(vform, dst, product, product).SignedSaturate(vform); 3309} 3310 3311 3312LogicVRegister Simulator::sqrdmulh(VectorFormat vform, 3313 LogicVRegister dst, 3314 const LogicVRegister& src1, 3315 const LogicVRegister& src2, 3316 bool round) { 3317 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow. 3318 // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1) 3319 // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize. 3320 3321 int esize = LaneSizeInBitsFromFormat(vform); 3322 int round_const = round ? (1 << (esize - 2)) : 0; 3323 int64_t product; 3324 3325 dst.ClearForWrite(vform); 3326 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3327 product = src1.Int(vform, i) * src2.Int(vform, i); 3328 product += round_const; 3329 product = product >> (esize - 1); 3330 3331 if (product > MaxIntFromFormat(vform)) { 3332 product = MaxIntFromFormat(vform); 3333 } else if (product < MinIntFromFormat(vform)) { 3334 product = MinIntFromFormat(vform); 3335 } 3336 dst.SetInt(vform, i, product); 3337 } 3338 return dst; 3339} 3340 3341 3342LogicVRegister Simulator::sqdmulh(VectorFormat vform, 3343 LogicVRegister dst, 3344 const LogicVRegister& src1, 3345 const LogicVRegister& src2) { 3346 return sqrdmulh(vform, dst, src1, src2, false); 3347} 3348 3349 3350LogicVRegister Simulator::addhn(VectorFormat vform, 3351 LogicVRegister dst, 3352 const LogicVRegister& src1, 3353 const LogicVRegister& src2) { 3354 SimVRegister temp; 3355 add(VectorFormatDoubleWidth(vform), temp, src1, src2); 3356 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3357 return dst; 3358} 3359 3360 3361LogicVRegister Simulator::addhn2(VectorFormat vform, 3362 LogicVRegister dst, 3363 const LogicVRegister& src1, 3364 const LogicVRegister& src2) { 3365 SimVRegister temp; 3366 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 3367 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3368 return dst; 3369} 3370 3371 3372LogicVRegister Simulator::raddhn(VectorFormat vform, 3373 LogicVRegister dst, 3374 const LogicVRegister& src1, 3375 const LogicVRegister& src2) { 3376 SimVRegister temp; 3377 add(VectorFormatDoubleWidth(vform), temp, src1, src2); 3378 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3379 return dst; 3380} 3381 3382 3383LogicVRegister Simulator::raddhn2(VectorFormat vform, 3384 LogicVRegister dst, 3385 const LogicVRegister& src1, 3386 const LogicVRegister& src2) { 3387 SimVRegister temp; 3388 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 3389 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3390 return dst; 3391} 3392 3393 3394LogicVRegister Simulator::subhn(VectorFormat vform, 3395 LogicVRegister dst, 3396 const LogicVRegister& src1, 3397 const LogicVRegister& src2) { 3398 SimVRegister temp; 3399 sub(VectorFormatDoubleWidth(vform), temp, src1, src2); 3400 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3401 return dst; 3402} 3403 3404 3405LogicVRegister Simulator::subhn2(VectorFormat vform, 3406 LogicVRegister dst, 3407 const LogicVRegister& src1, 3408 const LogicVRegister& src2) { 3409 SimVRegister temp; 3410 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 3411 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3412 return dst; 3413} 3414 3415 3416LogicVRegister Simulator::rsubhn(VectorFormat vform, 3417 LogicVRegister dst, 3418 const LogicVRegister& src1, 3419 const LogicVRegister& src2) { 3420 SimVRegister temp; 3421 sub(VectorFormatDoubleWidth(vform), temp, src1, src2); 3422 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3423 return dst; 3424} 3425 3426 3427LogicVRegister Simulator::rsubhn2(VectorFormat vform, 3428 LogicVRegister dst, 3429 const LogicVRegister& src1, 3430 const LogicVRegister& src2) { 3431 SimVRegister temp; 3432 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 3433 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3434 return dst; 3435} 3436 3437 3438LogicVRegister Simulator::trn1(VectorFormat vform, 3439 LogicVRegister dst, 3440 const LogicVRegister& src1, 3441 const LogicVRegister& src2) { 3442 uint64_t result[16]; 3443 int laneCount = LaneCountFromFormat(vform); 3444 int pairs = laneCount / 2; 3445 for (int i = 0; i < pairs; ++i) { 3446 result[2 * i] = src1.Uint(vform, 2 * i); 3447 result[(2 * i) + 1] = src2.Uint(vform, 2 * i); 3448 } 3449 3450 dst.ClearForWrite(vform); 3451 for (int i = 0; i < laneCount; ++i) { 3452 dst.SetUint(vform, i, result[i]); 3453 } 3454 return dst; 3455} 3456 3457 3458LogicVRegister Simulator::trn2(VectorFormat vform, 3459 LogicVRegister dst, 3460 const LogicVRegister& src1, 3461 const LogicVRegister& src2) { 3462 uint64_t result[16]; 3463 int laneCount = LaneCountFromFormat(vform); 3464 int pairs = laneCount / 2; 3465 for (int i = 0; i < pairs; ++i) { 3466 result[2 * i] = src1.Uint(vform, (2 * i) + 1); 3467 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1); 3468 } 3469 3470 dst.ClearForWrite(vform); 3471 for (int i = 0; i < laneCount; ++i) { 3472 dst.SetUint(vform, i, result[i]); 3473 } 3474 return dst; 3475} 3476 3477 3478LogicVRegister Simulator::zip1(VectorFormat vform, 3479 LogicVRegister dst, 3480 const LogicVRegister& src1, 3481 const LogicVRegister& src2) { 3482 uint64_t result[16]; 3483 int laneCount = LaneCountFromFormat(vform); 3484 int pairs = laneCount / 2; 3485 for (int i = 0; i < pairs; ++i) { 3486 result[2 * i] = src1.Uint(vform, i); 3487 result[(2 * i) + 1] = src2.Uint(vform, i); 3488 } 3489 3490 dst.ClearForWrite(vform); 3491 for (int i = 0; i < laneCount; ++i) { 3492 dst.SetUint(vform, i, result[i]); 3493 } 3494 return dst; 3495} 3496 3497 3498LogicVRegister Simulator::zip2(VectorFormat vform, 3499 LogicVRegister dst, 3500 const LogicVRegister& src1, 3501 const LogicVRegister& src2) { 3502 uint64_t result[16]; 3503 int laneCount = LaneCountFromFormat(vform); 3504 int pairs = laneCount / 2; 3505 for (int i = 0; i < pairs; ++i) { 3506 result[2 * i] = src1.Uint(vform, pairs + i); 3507 result[(2 * i) + 1] = src2.Uint(vform, pairs + i); 3508 } 3509 3510 dst.ClearForWrite(vform); 3511 for (int i = 0; i < laneCount; ++i) { 3512 dst.SetUint(vform, i, result[i]); 3513 } 3514 return dst; 3515} 3516 3517 3518LogicVRegister Simulator::uzp1(VectorFormat vform, 3519 LogicVRegister dst, 3520 const LogicVRegister& src1, 3521 const LogicVRegister& src2) { 3522 uint64_t result[32]; 3523 int laneCount = LaneCountFromFormat(vform); 3524 for (int i = 0; i < laneCount; ++i) { 3525 result[i] = src1.Uint(vform, i); 3526 result[laneCount + i] = src2.Uint(vform, i); 3527 } 3528 3529 dst.ClearForWrite(vform); 3530 for (int i = 0; i < laneCount; ++i) { 3531 dst.SetUint(vform, i, result[2 * i]); 3532 } 3533 return dst; 3534} 3535 3536 3537LogicVRegister Simulator::uzp2(VectorFormat vform, 3538 LogicVRegister dst, 3539 const LogicVRegister& src1, 3540 const LogicVRegister& src2) { 3541 uint64_t result[32]; 3542 int laneCount = LaneCountFromFormat(vform); 3543 for (int i = 0; i < laneCount; ++i) { 3544 result[i] = src1.Uint(vform, i); 3545 result[laneCount + i] = src2.Uint(vform, i); 3546 } 3547 3548 dst.ClearForWrite(vform); 3549 for (int i = 0; i < laneCount; ++i) { 3550 dst.SetUint(vform, i, result[(2 * i) + 1]); 3551 } 3552 return dst; 3553} 3554 3555 3556template <typename T> 3557T Simulator::FPAdd(T op1, T op2) { 3558 T result = FPProcessNaNs(op1, op2); 3559 if (std::isnan(result)) return result; 3560 3561 if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) { 3562 // inf + -inf returns the default NaN. 3563 FPProcessException(); 3564 return FPDefaultNaN<T>(); 3565 } else { 3566 // Other cases should be handled by standard arithmetic. 3567 return op1 + op2; 3568 } 3569} 3570 3571 3572template <typename T> 3573T Simulator::FPSub(T op1, T op2) { 3574 // NaNs should be handled elsewhere. 3575 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2)); 3576 3577 if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) { 3578 // inf - inf returns the default NaN. 3579 FPProcessException(); 3580 return FPDefaultNaN<T>(); 3581 } else { 3582 // Other cases should be handled by standard arithmetic. 3583 return op1 - op2; 3584 } 3585} 3586 3587 3588template <typename T> 3589T Simulator::FPMul(T op1, T op2) { 3590 // NaNs should be handled elsewhere. 3591 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2)); 3592 3593 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) { 3594 // inf * 0.0 returns the default NaN. 3595 FPProcessException(); 3596 return FPDefaultNaN<T>(); 3597 } else { 3598 // Other cases should be handled by standard arithmetic. 3599 return op1 * op2; 3600 } 3601} 3602 3603 3604template <typename T> 3605T Simulator::FPMulx(T op1, T op2) { 3606 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) { 3607 // inf * 0.0 returns +/-2.0. 3608 T two = 2.0; 3609 return copysign(1.0, op1) * copysign(1.0, op2) * two; 3610 } 3611 return FPMul(op1, op2); 3612} 3613 3614 3615template <typename T> 3616T Simulator::FPMulAdd(T a, T op1, T op2) { 3617 T result = FPProcessNaNs3(a, op1, op2); 3618 3619 T sign_a = copysign(1.0, a); 3620 T sign_prod = copysign(1.0, op1) * copysign(1.0, op2); 3621 bool isinf_prod = std::isinf(op1) || std::isinf(op2); 3622 bool operation_generates_nan = 3623 (std::isinf(op1) && (op2 == 0.0)) || // inf * 0.0 3624 (std::isinf(op2) && (op1 == 0.0)) || // 0.0 * inf 3625 (std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf 3626 3627 if (std::isnan(result)) { 3628 // Generated NaNs override quiet NaNs propagated from a. 3629 if (operation_generates_nan && IsQuietNaN(a)) { 3630 FPProcessException(); 3631 return FPDefaultNaN<T>(); 3632 } else { 3633 return result; 3634 } 3635 } 3636 3637 // If the operation would produce a NaN, return the default NaN. 3638 if (operation_generates_nan) { 3639 FPProcessException(); 3640 return FPDefaultNaN<T>(); 3641 } 3642 3643 // Work around broken fma implementations for exact zero results: The sign of 3644 // exact 0.0 results is positive unless both a and op1 * op2 are negative. 3645 if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) { 3646 return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0; 3647 } 3648 3649 result = FusedMultiplyAdd(op1, op2, a); 3650 VIXL_ASSERT(!std::isnan(result)); 3651 3652 // Work around broken fma implementations for rounded zero results: If a is 3653 // 0.0, the sign of the result is the sign of op1 * op2 before rounding. 3654 if ((a == 0.0) && (result == 0.0)) { 3655 return copysign(0.0, sign_prod); 3656 } 3657 3658 return result; 3659} 3660 3661 3662template <typename T> 3663T Simulator::FPDiv(T op1, T op2) { 3664 // NaNs should be handled elsewhere. 3665 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2)); 3666 3667 if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) { 3668 // inf / inf and 0.0 / 0.0 return the default NaN. 3669 FPProcessException(); 3670 return FPDefaultNaN<T>(); 3671 } else { 3672 if (op2 == 0.0) FPProcessException(); 3673 3674 // Other cases should be handled by standard arithmetic. 3675 return op1 / op2; 3676 } 3677} 3678 3679 3680template <typename T> 3681T Simulator::FPSqrt(T op) { 3682 if (std::isnan(op)) { 3683 return FPProcessNaN(op); 3684 } else if (op < 0.0) { 3685 FPProcessException(); 3686 return FPDefaultNaN<T>(); 3687 } else { 3688 return sqrt(op); 3689 } 3690} 3691 3692 3693template <typename T> 3694T Simulator::FPMax(T a, T b) { 3695 T result = FPProcessNaNs(a, b); 3696 if (std::isnan(result)) return result; 3697 3698 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) { 3699 // a and b are zero, and the sign differs: return +0.0. 3700 return 0.0; 3701 } else { 3702 return (a > b) ? a : b; 3703 } 3704} 3705 3706 3707template <typename T> 3708T Simulator::FPMaxNM(T a, T b) { 3709 if (IsQuietNaN(a) && !IsQuietNaN(b)) { 3710 a = kFP64NegativeInfinity; 3711 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { 3712 b = kFP64NegativeInfinity; 3713 } 3714 3715 T result = FPProcessNaNs(a, b); 3716 return std::isnan(result) ? result : FPMax(a, b); 3717} 3718 3719 3720template <typename T> 3721T Simulator::FPMin(T a, T b) { 3722 T result = FPProcessNaNs(a, b); 3723 if (std::isnan(result)) return result; 3724 3725 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) { 3726 // a and b are zero, and the sign differs: return -0.0. 3727 return -0.0; 3728 } else { 3729 return (a < b) ? a : b; 3730 } 3731} 3732 3733 3734template <typename T> 3735T Simulator::FPMinNM(T a, T b) { 3736 if (IsQuietNaN(a) && !IsQuietNaN(b)) { 3737 a = kFP64PositiveInfinity; 3738 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { 3739 b = kFP64PositiveInfinity; 3740 } 3741 3742 T result = FPProcessNaNs(a, b); 3743 return std::isnan(result) ? result : FPMin(a, b); 3744} 3745 3746 3747template <typename T> 3748T Simulator::FPRecipStepFused(T op1, T op2) { 3749 const T two = 2.0; 3750 if ((std::isinf(op1) && (op2 == 0.0)) || 3751 ((op1 == 0.0) && (std::isinf(op2)))) { 3752 return two; 3753 } else if (std::isinf(op1) || std::isinf(op2)) { 3754 // Return +inf if signs match, otherwise -inf. 3755 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity 3756 : kFP64NegativeInfinity; 3757 } else { 3758 return FusedMultiplyAdd(op1, op2, two); 3759 } 3760} 3761 3762 3763template <typename T> 3764T Simulator::FPRSqrtStepFused(T op1, T op2) { 3765 const T one_point_five = 1.5; 3766 const T two = 2.0; 3767 3768 if ((std::isinf(op1) && (op2 == 0.0)) || 3769 ((op1 == 0.0) && (std::isinf(op2)))) { 3770 return one_point_five; 3771 } else if (std::isinf(op1) || std::isinf(op2)) { 3772 // Return +inf if signs match, otherwise -inf. 3773 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity 3774 : kFP64NegativeInfinity; 3775 } else { 3776 // The multiply-add-halve operation must be fully fused, so avoid interim 3777 // rounding by checking which operand can be losslessly divided by two 3778 // before doing the multiply-add. 3779 if (std::isnormal(op1 / two)) { 3780 return FusedMultiplyAdd(op1 / two, op2, one_point_five); 3781 } else if (std::isnormal(op2 / two)) { 3782 return FusedMultiplyAdd(op1, op2 / two, one_point_five); 3783 } else { 3784 // Neither operand is normal after halving: the result is dominated by 3785 // the addition term, so just return that. 3786 return one_point_five; 3787 } 3788 } 3789} 3790 3791 3792double Simulator::FPRoundInt(double value, FPRounding round_mode) { 3793 if ((value == 0.0) || (value == kFP64PositiveInfinity) || 3794 (value == kFP64NegativeInfinity)) { 3795 return value; 3796 } else if (std::isnan(value)) { 3797 return FPProcessNaN(value); 3798 } 3799 3800 double int_result = std::floor(value); 3801 double error = value - int_result; 3802 switch (round_mode) { 3803 case FPTieAway: { 3804 // Take care of correctly handling the range ]-0.5, -0.0], which must 3805 // yield -0.0. 3806 if ((-0.5 < value) && (value < 0.0)) { 3807 int_result = -0.0; 3808 3809 } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) { 3810 // If the error is greater than 0.5, or is equal to 0.5 and the integer 3811 // result is positive, round up. 3812 int_result++; 3813 } 3814 break; 3815 } 3816 case FPTieEven: { 3817 // Take care of correctly handling the range [-0.5, -0.0], which must 3818 // yield -0.0. 3819 if ((-0.5 <= value) && (value < 0.0)) { 3820 int_result = -0.0; 3821 3822 // If the error is greater than 0.5, or is equal to 0.5 and the integer 3823 // result is odd, round up. 3824 } else if ((error > 0.5) || 3825 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) { 3826 int_result++; 3827 } 3828 break; 3829 } 3830 case FPZero: { 3831 // If value>0 then we take floor(value) 3832 // otherwise, ceil(value). 3833 if (value < 0) { 3834 int_result = ceil(value); 3835 } 3836 break; 3837 } 3838 case FPNegativeInfinity: { 3839 // We always use floor(value). 3840 break; 3841 } 3842 case FPPositiveInfinity: { 3843 // Take care of correctly handling the range ]-1.0, -0.0], which must 3844 // yield -0.0. 3845 if ((-1.0 < value) && (value < 0.0)) { 3846 int_result = -0.0; 3847 3848 // If the error is non-zero, round up. 3849 } else if (error > 0.0) { 3850 int_result++; 3851 } 3852 break; 3853 } 3854 default: 3855 VIXL_UNIMPLEMENTED(); 3856 } 3857 return int_result; 3858} 3859 3860 3861int32_t Simulator::FPToInt32(double value, FPRounding rmode) { 3862 value = FPRoundInt(value, rmode); 3863 if (value >= kWMaxInt) { 3864 return kWMaxInt; 3865 } else if (value < kWMinInt) { 3866 return kWMinInt; 3867 } 3868 return std::isnan(value) ? 0 : static_cast<int32_t>(value); 3869} 3870 3871 3872int64_t Simulator::FPToInt64(double value, FPRounding rmode) { 3873 value = FPRoundInt(value, rmode); 3874 if (value >= kXMaxInt) { 3875 return kXMaxInt; 3876 } else if (value < kXMinInt) { 3877 return kXMinInt; 3878 } 3879 return std::isnan(value) ? 0 : static_cast<int64_t>(value); 3880} 3881 3882 3883uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) { 3884 value = FPRoundInt(value, rmode); 3885 if (value >= kWMaxUInt) { 3886 return kWMaxUInt; 3887 } else if (value < 0.0) { 3888 return 0; 3889 } 3890 return std::isnan(value) ? 0 : static_cast<uint32_t>(value); 3891} 3892 3893 3894uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) { 3895 value = FPRoundInt(value, rmode); 3896 if (value >= kXMaxUInt) { 3897 return kXMaxUInt; 3898 } else if (value < 0.0) { 3899 return 0; 3900 } 3901 return std::isnan(value) ? 0 : static_cast<uint64_t>(value); 3902} 3903 3904 3905#define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \ 3906 template <typename T> \ 3907 LogicVRegister Simulator::FN(VectorFormat vform, \ 3908 LogicVRegister dst, \ 3909 const LogicVRegister& src1, \ 3910 const LogicVRegister& src2) { \ 3911 dst.ClearForWrite(vform); \ 3912 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \ 3913 T op1 = src1.Float<T>(i); \ 3914 T op2 = src2.Float<T>(i); \ 3915 T result; \ 3916 if (PROCNAN) { \ 3917 result = FPProcessNaNs(op1, op2); \ 3918 if (!std::isnan(result)) { \ 3919 result = OP(op1, op2); \ 3920 } \ 3921 } else { \ 3922 result = OP(op1, op2); \ 3923 } \ 3924 dst.SetFloat(i, result); \ 3925 } \ 3926 return dst; \ 3927 } \ 3928 \ 3929 LogicVRegister Simulator::FN(VectorFormat vform, \ 3930 LogicVRegister dst, \ 3931 const LogicVRegister& src1, \ 3932 const LogicVRegister& src2) { \ 3933 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { \ 3934 FN<float>(vform, dst, src1, src2); \ 3935 } else { \ 3936 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \ 3937 FN<double>(vform, dst, src1, src2); \ 3938 } \ 3939 return dst; \ 3940 } 3941NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP) 3942#undef DEFINE_NEON_FP_VECTOR_OP 3943 3944 3945LogicVRegister Simulator::fnmul(VectorFormat vform, 3946 LogicVRegister dst, 3947 const LogicVRegister& src1, 3948 const LogicVRegister& src2) { 3949 SimVRegister temp; 3950 LogicVRegister product = fmul(vform, temp, src1, src2); 3951 return fneg(vform, dst, product); 3952} 3953 3954 3955template <typename T> 3956LogicVRegister Simulator::frecps(VectorFormat vform, 3957 LogicVRegister dst, 3958 const LogicVRegister& src1, 3959 const LogicVRegister& src2) { 3960 dst.ClearForWrite(vform); 3961 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3962 T op1 = -src1.Float<T>(i); 3963 T op2 = src2.Float<T>(i); 3964 T result = FPProcessNaNs(op1, op2); 3965 dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2)); 3966 } 3967 return dst; 3968} 3969 3970 3971LogicVRegister Simulator::frecps(VectorFormat vform, 3972 LogicVRegister dst, 3973 const LogicVRegister& src1, 3974 const LogicVRegister& src2) { 3975 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 3976 frecps<float>(vform, dst, src1, src2); 3977 } else { 3978 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 3979 frecps<double>(vform, dst, src1, src2); 3980 } 3981 return dst; 3982} 3983 3984 3985template <typename T> 3986LogicVRegister Simulator::frsqrts(VectorFormat vform, 3987 LogicVRegister dst, 3988 const LogicVRegister& src1, 3989 const LogicVRegister& src2) { 3990 dst.ClearForWrite(vform); 3991 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3992 T op1 = -src1.Float<T>(i); 3993 T op2 = src2.Float<T>(i); 3994 T result = FPProcessNaNs(op1, op2); 3995 dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2)); 3996 } 3997 return dst; 3998} 3999 4000 4001LogicVRegister Simulator::frsqrts(VectorFormat vform, 4002 LogicVRegister dst, 4003 const LogicVRegister& src1, 4004 const LogicVRegister& src2) { 4005 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4006 frsqrts<float>(vform, dst, src1, src2); 4007 } else { 4008 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4009 frsqrts<double>(vform, dst, src1, src2); 4010 } 4011 return dst; 4012} 4013 4014 4015template <typename T> 4016LogicVRegister Simulator::fcmp(VectorFormat vform, 4017 LogicVRegister dst, 4018 const LogicVRegister& src1, 4019 const LogicVRegister& src2, 4020 Condition cond) { 4021 dst.ClearForWrite(vform); 4022 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4023 bool result = false; 4024 T op1 = src1.Float<T>(i); 4025 T op2 = src2.Float<T>(i); 4026 T nan_result = FPProcessNaNs(op1, op2); 4027 if (!std::isnan(nan_result)) { 4028 switch (cond) { 4029 case eq: 4030 result = (op1 == op2); 4031 break; 4032 case ge: 4033 result = (op1 >= op2); 4034 break; 4035 case gt: 4036 result = (op1 > op2); 4037 break; 4038 case le: 4039 result = (op1 <= op2); 4040 break; 4041 case lt: 4042 result = (op1 < op2); 4043 break; 4044 default: 4045 VIXL_UNREACHABLE(); 4046 break; 4047 } 4048 } 4049 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); 4050 } 4051 return dst; 4052} 4053 4054 4055LogicVRegister Simulator::fcmp(VectorFormat vform, 4056 LogicVRegister dst, 4057 const LogicVRegister& src1, 4058 const LogicVRegister& src2, 4059 Condition cond) { 4060 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4061 fcmp<float>(vform, dst, src1, src2, cond); 4062 } else { 4063 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4064 fcmp<double>(vform, dst, src1, src2, cond); 4065 } 4066 return dst; 4067} 4068 4069 4070LogicVRegister Simulator::fcmp_zero(VectorFormat vform, 4071 LogicVRegister dst, 4072 const LogicVRegister& src, 4073 Condition cond) { 4074 SimVRegister temp; 4075 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4076 LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0)); 4077 fcmp<float>(vform, dst, src, zero_reg, cond); 4078 } else { 4079 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4080 LogicVRegister zero_reg = dup_immediate(vform, temp, DoubleToRawbits(0.0)); 4081 fcmp<double>(vform, dst, src, zero_reg, cond); 4082 } 4083 return dst; 4084} 4085 4086 4087LogicVRegister Simulator::fabscmp(VectorFormat vform, 4088 LogicVRegister dst, 4089 const LogicVRegister& src1, 4090 const LogicVRegister& src2, 4091 Condition cond) { 4092 SimVRegister temp1, temp2; 4093 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4094 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1); 4095 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2); 4096 fcmp<float>(vform, dst, abs_src1, abs_src2, cond); 4097 } else { 4098 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4099 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1); 4100 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2); 4101 fcmp<double>(vform, dst, abs_src1, abs_src2, cond); 4102 } 4103 return dst; 4104} 4105 4106 4107template <typename T> 4108LogicVRegister Simulator::fmla(VectorFormat vform, 4109 LogicVRegister dst, 4110 const LogicVRegister& src1, 4111 const LogicVRegister& src2) { 4112 dst.ClearForWrite(vform); 4113 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4114 T op1 = src1.Float<T>(i); 4115 T op2 = src2.Float<T>(i); 4116 T acc = dst.Float<T>(i); 4117 T result = FPMulAdd(acc, op1, op2); 4118 dst.SetFloat(i, result); 4119 } 4120 return dst; 4121} 4122 4123 4124LogicVRegister Simulator::fmla(VectorFormat vform, 4125 LogicVRegister dst, 4126 const LogicVRegister& src1, 4127 const LogicVRegister& src2) { 4128 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4129 fmla<float>(vform, dst, src1, src2); 4130 } else { 4131 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4132 fmla<double>(vform, dst, src1, src2); 4133 } 4134 return dst; 4135} 4136 4137 4138template <typename T> 4139LogicVRegister Simulator::fmls(VectorFormat vform, 4140 LogicVRegister dst, 4141 const LogicVRegister& src1, 4142 const LogicVRegister& src2) { 4143 dst.ClearForWrite(vform); 4144 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4145 T op1 = -src1.Float<T>(i); 4146 T op2 = src2.Float<T>(i); 4147 T acc = dst.Float<T>(i); 4148 T result = FPMulAdd(acc, op1, op2); 4149 dst.SetFloat(i, result); 4150 } 4151 return dst; 4152} 4153 4154 4155LogicVRegister Simulator::fmls(VectorFormat vform, 4156 LogicVRegister dst, 4157 const LogicVRegister& src1, 4158 const LogicVRegister& src2) { 4159 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4160 fmls<float>(vform, dst, src1, src2); 4161 } else { 4162 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4163 fmls<double>(vform, dst, src1, src2); 4164 } 4165 return dst; 4166} 4167 4168 4169template <typename T> 4170LogicVRegister Simulator::fneg(VectorFormat vform, 4171 LogicVRegister dst, 4172 const LogicVRegister& src) { 4173 dst.ClearForWrite(vform); 4174 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4175 T op = src.Float<T>(i); 4176 op = -op; 4177 dst.SetFloat(i, op); 4178 } 4179 return dst; 4180} 4181 4182 4183LogicVRegister Simulator::fneg(VectorFormat vform, 4184 LogicVRegister dst, 4185 const LogicVRegister& src) { 4186 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4187 fneg<float>(vform, dst, src); 4188 } else { 4189 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4190 fneg<double>(vform, dst, src); 4191 } 4192 return dst; 4193} 4194 4195 4196template <typename T> 4197LogicVRegister Simulator::fabs_(VectorFormat vform, 4198 LogicVRegister dst, 4199 const LogicVRegister& src) { 4200 dst.ClearForWrite(vform); 4201 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4202 T op = src.Float<T>(i); 4203 if (copysign(1.0, op) < 0.0) { 4204 op = -op; 4205 } 4206 dst.SetFloat(i, op); 4207 } 4208 return dst; 4209} 4210 4211 4212LogicVRegister Simulator::fabs_(VectorFormat vform, 4213 LogicVRegister dst, 4214 const LogicVRegister& src) { 4215 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4216 fabs_<float>(vform, dst, src); 4217 } else { 4218 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4219 fabs_<double>(vform, dst, src); 4220 } 4221 return dst; 4222} 4223 4224 4225LogicVRegister Simulator::fabd(VectorFormat vform, 4226 LogicVRegister dst, 4227 const LogicVRegister& src1, 4228 const LogicVRegister& src2) { 4229 SimVRegister temp; 4230 fsub(vform, temp, src1, src2); 4231 fabs_(vform, dst, temp); 4232 return dst; 4233} 4234 4235 4236LogicVRegister Simulator::fsqrt(VectorFormat vform, 4237 LogicVRegister dst, 4238 const LogicVRegister& src) { 4239 dst.ClearForWrite(vform); 4240 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4241 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4242 float result = FPSqrt(src.Float<float>(i)); 4243 dst.SetFloat(i, result); 4244 } 4245 } else { 4246 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4247 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4248 double result = FPSqrt(src.Float<double>(i)); 4249 dst.SetFloat(i, result); 4250 } 4251 } 4252 return dst; 4253} 4254 4255 4256#define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \ 4257 LogicVRegister Simulator::FNP(VectorFormat vform, \ 4258 LogicVRegister dst, \ 4259 const LogicVRegister& src1, \ 4260 const LogicVRegister& src2) { \ 4261 SimVRegister temp1, temp2; \ 4262 uzp1(vform, temp1, src1, src2); \ 4263 uzp2(vform, temp2, src1, src2); \ 4264 FN(vform, dst, temp1, temp2); \ 4265 return dst; \ 4266 } \ 4267 \ 4268 LogicVRegister Simulator::FNP(VectorFormat vform, \ 4269 LogicVRegister dst, \ 4270 const LogicVRegister& src) { \ 4271 if (vform == kFormatS) { \ 4272 float result = OP(src.Float<float>(0), src.Float<float>(1)); \ 4273 dst.SetFloat(0, result); \ 4274 } else { \ 4275 VIXL_ASSERT(vform == kFormatD); \ 4276 double result = OP(src.Float<double>(0), src.Float<double>(1)); \ 4277 dst.SetFloat(0, result); \ 4278 } \ 4279 dst.ClearForWrite(vform); \ 4280 return dst; \ 4281 } 4282NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP) 4283#undef DEFINE_NEON_FP_PAIR_OP 4284 4285 4286LogicVRegister Simulator::fminmaxv(VectorFormat vform, 4287 LogicVRegister dst, 4288 const LogicVRegister& src, 4289 FPMinMaxOp Op) { 4290 VIXL_ASSERT(vform == kFormat4S); 4291 USE(vform); 4292 float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1)); 4293 float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3)); 4294 float result = (this->*Op)(result1, result2); 4295 dst.ClearForWrite(kFormatS); 4296 dst.SetFloat<float>(0, result); 4297 return dst; 4298} 4299 4300 4301LogicVRegister Simulator::fmaxv(VectorFormat vform, 4302 LogicVRegister dst, 4303 const LogicVRegister& src) { 4304 return fminmaxv(vform, dst, src, &Simulator::FPMax); 4305} 4306 4307 4308LogicVRegister Simulator::fminv(VectorFormat vform, 4309 LogicVRegister dst, 4310 const LogicVRegister& src) { 4311 return fminmaxv(vform, dst, src, &Simulator::FPMin); 4312} 4313 4314 4315LogicVRegister Simulator::fmaxnmv(VectorFormat vform, 4316 LogicVRegister dst, 4317 const LogicVRegister& src) { 4318 return fminmaxv(vform, dst, src, &Simulator::FPMaxNM); 4319} 4320 4321 4322LogicVRegister Simulator::fminnmv(VectorFormat vform, 4323 LogicVRegister dst, 4324 const LogicVRegister& src) { 4325 return fminmaxv(vform, dst, src, &Simulator::FPMinNM); 4326} 4327 4328 4329LogicVRegister Simulator::fmul(VectorFormat vform, 4330 LogicVRegister dst, 4331 const LogicVRegister& src1, 4332 const LogicVRegister& src2, 4333 int index) { 4334 dst.ClearForWrite(vform); 4335 SimVRegister temp; 4336 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4337 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 4338 fmul<float>(vform, dst, src1, index_reg); 4339 4340 } else { 4341 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4342 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 4343 fmul<double>(vform, dst, src1, index_reg); 4344 } 4345 return dst; 4346} 4347 4348 4349LogicVRegister Simulator::fmla(VectorFormat vform, 4350 LogicVRegister dst, 4351 const LogicVRegister& src1, 4352 const LogicVRegister& src2, 4353 int index) { 4354 dst.ClearForWrite(vform); 4355 SimVRegister temp; 4356 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4357 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 4358 fmla<float>(vform, dst, src1, index_reg); 4359 4360 } else { 4361 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4362 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 4363 fmla<double>(vform, dst, src1, index_reg); 4364 } 4365 return dst; 4366} 4367 4368 4369LogicVRegister Simulator::fmls(VectorFormat vform, 4370 LogicVRegister dst, 4371 const LogicVRegister& src1, 4372 const LogicVRegister& src2, 4373 int index) { 4374 dst.ClearForWrite(vform); 4375 SimVRegister temp; 4376 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4377 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 4378 fmls<float>(vform, dst, src1, index_reg); 4379 4380 } else { 4381 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4382 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 4383 fmls<double>(vform, dst, src1, index_reg); 4384 } 4385 return dst; 4386} 4387 4388 4389LogicVRegister Simulator::fmulx(VectorFormat vform, 4390 LogicVRegister dst, 4391 const LogicVRegister& src1, 4392 const LogicVRegister& src2, 4393 int index) { 4394 dst.ClearForWrite(vform); 4395 SimVRegister temp; 4396 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4397 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 4398 fmulx<float>(vform, dst, src1, index_reg); 4399 4400 } else { 4401 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4402 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 4403 fmulx<double>(vform, dst, src1, index_reg); 4404 } 4405 return dst; 4406} 4407 4408 4409LogicVRegister Simulator::frint(VectorFormat vform, 4410 LogicVRegister dst, 4411 const LogicVRegister& src, 4412 FPRounding rounding_mode, 4413 bool inexact_exception) { 4414 dst.ClearForWrite(vform); 4415 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4416 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4417 float input = src.Float<float>(i); 4418 float rounded = FPRoundInt(input, rounding_mode); 4419 if (inexact_exception && !std::isnan(input) && (input != rounded)) { 4420 FPProcessException(); 4421 } 4422 dst.SetFloat<float>(i, rounded); 4423 } 4424 } else { 4425 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4426 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4427 double input = src.Float<double>(i); 4428 double rounded = FPRoundInt(input, rounding_mode); 4429 if (inexact_exception && !std::isnan(input) && (input != rounded)) { 4430 FPProcessException(); 4431 } 4432 dst.SetFloat<double>(i, rounded); 4433 } 4434 } 4435 return dst; 4436} 4437 4438 4439LogicVRegister Simulator::fcvts(VectorFormat vform, 4440 LogicVRegister dst, 4441 const LogicVRegister& src, 4442 FPRounding rounding_mode, 4443 int fbits) { 4444 dst.ClearForWrite(vform); 4445 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4446 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4447 float op = src.Float<float>(i) * std::pow(2.0f, fbits); 4448 dst.SetInt(vform, i, FPToInt32(op, rounding_mode)); 4449 } 4450 } else { 4451 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4452 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4453 double op = src.Float<double>(i) * std::pow(2.0, fbits); 4454 dst.SetInt(vform, i, FPToInt64(op, rounding_mode)); 4455 } 4456 } 4457 return dst; 4458} 4459 4460 4461LogicVRegister Simulator::fcvtu(VectorFormat vform, 4462 LogicVRegister dst, 4463 const LogicVRegister& src, 4464 FPRounding rounding_mode, 4465 int fbits) { 4466 dst.ClearForWrite(vform); 4467 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4468 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4469 float op = src.Float<float>(i) * std::pow(2.0f, fbits); 4470 dst.SetUint(vform, i, FPToUInt32(op, rounding_mode)); 4471 } 4472 } else { 4473 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4474 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4475 double op = src.Float<double>(i) * std::pow(2.0, fbits); 4476 dst.SetUint(vform, i, FPToUInt64(op, rounding_mode)); 4477 } 4478 } 4479 return dst; 4480} 4481 4482 4483LogicVRegister Simulator::fcvtl(VectorFormat vform, 4484 LogicVRegister dst, 4485 const LogicVRegister& src) { 4486 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4487 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { 4488 dst.SetFloat(i, FPToFloat(src.Float<float16>(i))); 4489 } 4490 } else { 4491 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4492 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { 4493 dst.SetFloat(i, FPToDouble(src.Float<float>(i))); 4494 } 4495 } 4496 return dst; 4497} 4498 4499 4500LogicVRegister Simulator::fcvtl2(VectorFormat vform, 4501 LogicVRegister dst, 4502 const LogicVRegister& src) { 4503 int lane_count = LaneCountFromFormat(vform); 4504 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4505 for (int i = 0; i < lane_count; i++) { 4506 dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count))); 4507 } 4508 } else { 4509 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4510 for (int i = 0; i < lane_count; i++) { 4511 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count))); 4512 } 4513 } 4514 return dst; 4515} 4516 4517 4518LogicVRegister Simulator::fcvtn(VectorFormat vform, 4519 LogicVRegister dst, 4520 const LogicVRegister& src) { 4521 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4522 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4523 dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven)); 4524 } 4525 } else { 4526 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 4527 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4528 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven)); 4529 } 4530 } 4531 return dst; 4532} 4533 4534 4535LogicVRegister Simulator::fcvtn2(VectorFormat vform, 4536 LogicVRegister dst, 4537 const LogicVRegister& src) { 4538 int lane_count = LaneCountFromFormat(vform) / 2; 4539 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4540 for (int i = lane_count - 1; i >= 0; i--) { 4541 dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven)); 4542 } 4543 } else { 4544 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 4545 for (int i = lane_count - 1; i >= 0; i--) { 4546 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven)); 4547 } 4548 } 4549 return dst; 4550} 4551 4552 4553LogicVRegister Simulator::fcvtxn(VectorFormat vform, 4554 LogicVRegister dst, 4555 const LogicVRegister& src) { 4556 dst.ClearForWrite(vform); 4557 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 4558 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4559 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd)); 4560 } 4561 return dst; 4562} 4563 4564 4565LogicVRegister Simulator::fcvtxn2(VectorFormat vform, 4566 LogicVRegister dst, 4567 const LogicVRegister& src) { 4568 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 4569 int lane_count = LaneCountFromFormat(vform) / 2; 4570 for (int i = lane_count - 1; i >= 0; i--) { 4571 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd)); 4572 } 4573 return dst; 4574} 4575 4576 4577// Based on reference C function recip_sqrt_estimate from ARM ARM. 4578double Simulator::recip_sqrt_estimate(double a) { 4579 int q0, q1, s; 4580 double r; 4581 if (a < 0.5) { 4582 q0 = static_cast<int>(a * 512.0); 4583 r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0); 4584 } else { 4585 q1 = static_cast<int>(a * 256.0); 4586 r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0); 4587 } 4588 s = static_cast<int>(256.0 * r + 0.5); 4589 return static_cast<double>(s) / 256.0; 4590} 4591 4592 4593static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) { 4594 return ExtractUnsignedBitfield64(start_bit, end_bit, val); 4595} 4596 4597 4598template <typename T> 4599T Simulator::FPRecipSqrtEstimate(T op) { 4600 if (std::isnan(op)) { 4601 return FPProcessNaN(op); 4602 } else if (op == 0.0) { 4603 if (copysign(1.0, op) < 0.0) { 4604 return kFP64NegativeInfinity; 4605 } else { 4606 return kFP64PositiveInfinity; 4607 } 4608 } else if (copysign(1.0, op) < 0.0) { 4609 FPProcessException(); 4610 return FPDefaultNaN<T>(); 4611 } else if (std::isinf(op)) { 4612 return 0.0; 4613 } else { 4614 uint64_t fraction; 4615 int exp, result_exp; 4616 4617 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4618 exp = FloatExp(op); 4619 fraction = FloatMantissa(op); 4620 fraction <<= 29; 4621 } else { 4622 exp = DoubleExp(op); 4623 fraction = DoubleMantissa(op); 4624 } 4625 4626 if (exp == 0) { 4627 while (Bits(fraction, 51, 51) == 0) { 4628 fraction = Bits(fraction, 50, 0) << 1; 4629 exp -= 1; 4630 } 4631 fraction = Bits(fraction, 50, 0) << 1; 4632 } 4633 4634 double scaled; 4635 if (Bits(exp, 0, 0) == 0) { 4636 scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44); 4637 } else { 4638 scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44); 4639 } 4640 4641 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4642 result_exp = (380 - exp) / 2; 4643 } else { 4644 result_exp = (3068 - exp) / 2; 4645 } 4646 4647 uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled)); 4648 4649 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4650 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0)); 4651 uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29)); 4652 return FloatPack(0, exp_bits, est_bits); 4653 } else { 4654 return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0)); 4655 } 4656 } 4657} 4658 4659 4660LogicVRegister Simulator::frsqrte(VectorFormat vform, 4661 LogicVRegister dst, 4662 const LogicVRegister& src) { 4663 dst.ClearForWrite(vform); 4664 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4665 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4666 float input = src.Float<float>(i); 4667 dst.SetFloat(i, FPRecipSqrtEstimate<float>(input)); 4668 } 4669 } else { 4670 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4671 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4672 double input = src.Float<double>(i); 4673 dst.SetFloat(i, FPRecipSqrtEstimate<double>(input)); 4674 } 4675 } 4676 return dst; 4677} 4678 4679template <typename T> 4680T Simulator::FPRecipEstimate(T op, FPRounding rounding) { 4681 uint32_t sign; 4682 4683 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4684 sign = FloatSign(op); 4685 } else { 4686 sign = DoubleSign(op); 4687 } 4688 4689 if (std::isnan(op)) { 4690 return FPProcessNaN(op); 4691 } else if (std::isinf(op)) { 4692 return (sign == 1) ? -0.0 : 0.0; 4693 } else if (op == 0.0) { 4694 FPProcessException(); // FPExc_DivideByZero exception. 4695 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; 4696 } else if (((sizeof(T) == sizeof(float)) && // NOLINT(runtime/sizeof) 4697 (std::fabs(op) < std::pow(2.0, -128.0))) || 4698 ((sizeof(T) == sizeof(double)) && // NOLINT(runtime/sizeof) 4699 (std::fabs(op) < std::pow(2.0, -1024.0)))) { 4700 bool overflow_to_inf = false; 4701 switch (rounding) { 4702 case FPTieEven: 4703 overflow_to_inf = true; 4704 break; 4705 case FPPositiveInfinity: 4706 overflow_to_inf = (sign == 0); 4707 break; 4708 case FPNegativeInfinity: 4709 overflow_to_inf = (sign == 1); 4710 break; 4711 case FPZero: 4712 overflow_to_inf = false; 4713 break; 4714 default: 4715 break; 4716 } 4717 FPProcessException(); // FPExc_Overflow and FPExc_Inexact. 4718 if (overflow_to_inf) { 4719 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; 4720 } else { 4721 // Return FPMaxNormal(sign). 4722 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4723 return FloatPack(sign, 0xfe, 0x07fffff); 4724 } else { 4725 return DoublePack(sign, 0x7fe, 0x0fffffffffffffl); 4726 } 4727 } 4728 } else { 4729 uint64_t fraction; 4730 int exp, result_exp; 4731 uint32_t sign; 4732 4733 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4734 sign = FloatSign(op); 4735 exp = FloatExp(op); 4736 fraction = FloatMantissa(op); 4737 fraction <<= 29; 4738 } else { 4739 sign = DoubleSign(op); 4740 exp = DoubleExp(op); 4741 fraction = DoubleMantissa(op); 4742 } 4743 4744 if (exp == 0) { 4745 if (Bits(fraction, 51, 51) == 0) { 4746 exp -= 1; 4747 fraction = Bits(fraction, 49, 0) << 2; 4748 } else { 4749 fraction = Bits(fraction, 50, 0) << 1; 4750 } 4751 } 4752 4753 double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44); 4754 4755 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4756 result_exp = (253 - exp); // In range 253-254 = -1 to 253+1 = 254. 4757 } else { 4758 result_exp = (2045 - exp); // In range 2045-2046 = -1 to 2045+1 = 2046. 4759 } 4760 4761 double estimate = recip_estimate(scaled); 4762 4763 fraction = DoubleMantissa(estimate); 4764 if (result_exp == 0) { 4765 fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1); 4766 } else if (result_exp == -1) { 4767 fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2); 4768 result_exp = 0; 4769 } 4770 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4771 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0)); 4772 uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29)); 4773 return FloatPack(sign, exp_bits, frac_bits); 4774 } else { 4775 return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0)); 4776 } 4777 } 4778} 4779 4780 4781LogicVRegister Simulator::frecpe(VectorFormat vform, 4782 LogicVRegister dst, 4783 const LogicVRegister& src, 4784 FPRounding round) { 4785 dst.ClearForWrite(vform); 4786 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4787 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4788 float input = src.Float<float>(i); 4789 dst.SetFloat(i, FPRecipEstimate<float>(input, round)); 4790 } 4791 } else { 4792 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4793 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4794 double input = src.Float<double>(i); 4795 dst.SetFloat(i, FPRecipEstimate<double>(input, round)); 4796 } 4797 } 4798 return dst; 4799} 4800 4801 4802LogicVRegister Simulator::ursqrte(VectorFormat vform, 4803 LogicVRegister dst, 4804 const LogicVRegister& src) { 4805 dst.ClearForWrite(vform); 4806 uint64_t operand; 4807 uint32_t result; 4808 double dp_operand, dp_result; 4809 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4810 operand = src.Uint(vform, i); 4811 if (operand <= 0x3FFFFFFF) { 4812 result = 0xFFFFFFFF; 4813 } else { 4814 dp_operand = operand * std::pow(2.0, -32); 4815 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31); 4816 result = static_cast<uint32_t>(dp_result); 4817 } 4818 dst.SetUint(vform, i, result); 4819 } 4820 return dst; 4821} 4822 4823 4824// Based on reference C function recip_estimate from ARM ARM. 4825double Simulator::recip_estimate(double a) { 4826 int q, s; 4827 double r; 4828 q = static_cast<int>(a * 512.0); 4829 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0); 4830 s = static_cast<int>(256.0 * r + 0.5); 4831 return static_cast<double>(s) / 256.0; 4832} 4833 4834 4835LogicVRegister Simulator::urecpe(VectorFormat vform, 4836 LogicVRegister dst, 4837 const LogicVRegister& src) { 4838 dst.ClearForWrite(vform); 4839 uint64_t operand; 4840 uint32_t result; 4841 double dp_operand, dp_result; 4842 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4843 operand = src.Uint(vform, i); 4844 if (operand <= 0x7FFFFFFF) { 4845 result = 0xFFFFFFFF; 4846 } else { 4847 dp_operand = operand * std::pow(2.0, -32); 4848 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31); 4849 result = static_cast<uint32_t>(dp_result); 4850 } 4851 dst.SetUint(vform, i, result); 4852 } 4853 return dst; 4854} 4855 4856template <typename T> 4857LogicVRegister Simulator::frecpx(VectorFormat vform, 4858 LogicVRegister dst, 4859 const LogicVRegister& src) { 4860 dst.ClearForWrite(vform); 4861 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4862 T op = src.Float<T>(i); 4863 T result; 4864 if (std::isnan(op)) { 4865 result = FPProcessNaN(op); 4866 } else { 4867 int exp; 4868 uint32_t sign; 4869 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4870 sign = FloatSign(op); 4871 exp = FloatExp(op); 4872 exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0)); 4873 result = FloatPack(sign, exp, 0); 4874 } else { 4875 sign = DoubleSign(op); 4876 exp = DoubleExp(op); 4877 exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0)); 4878 result = DoublePack(sign, exp, 0); 4879 } 4880 } 4881 dst.SetFloat(i, result); 4882 } 4883 return dst; 4884} 4885 4886 4887LogicVRegister Simulator::frecpx(VectorFormat vform, 4888 LogicVRegister dst, 4889 const LogicVRegister& src) { 4890 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4891 frecpx<float>(vform, dst, src); 4892 } else { 4893 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4894 frecpx<double>(vform, dst, src); 4895 } 4896 return dst; 4897} 4898 4899LogicVRegister Simulator::scvtf(VectorFormat vform, 4900 LogicVRegister dst, 4901 const LogicVRegister& src, 4902 int fbits, 4903 FPRounding round) { 4904 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4905 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4906 float result = FixedToFloat(src.Int(kFormatS, i), fbits, round); 4907 dst.SetFloat<float>(i, result); 4908 } else { 4909 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4910 double result = FixedToDouble(src.Int(kFormatD, i), fbits, round); 4911 dst.SetFloat<double>(i, result); 4912 } 4913 } 4914 return dst; 4915} 4916 4917 4918LogicVRegister Simulator::ucvtf(VectorFormat vform, 4919 LogicVRegister dst, 4920 const LogicVRegister& src, 4921 int fbits, 4922 FPRounding round) { 4923 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4924 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4925 float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round); 4926 dst.SetFloat<float>(i, result); 4927 } else { 4928 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4929 double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round); 4930 dst.SetFloat<double>(i, result); 4931 } 4932 } 4933 return dst; 4934} 4935 4936 4937} // namespace aarch64 4938} // namespace vixl 4939 4940#endif // VIXL_INCLUDE_SIMULATOR_AARCH64 4941