1/* 2 * ARM NEON vector operations. 3 * 4 * Copyright (c) 2007, 2008 CodeSourcery. 5 * Written by Paul Brook 6 * 7 * This code is licenced under the GNU GPL v2. 8 */ 9#include <stdlib.h> 10#include <stdio.h> 11 12#include "cpu.h" 13#include "exec-all.h" 14#include "helpers.h" 15 16#define SIGNBIT (uint32_t)0x80000000 17#define SIGNBIT64 ((uint64_t)1 << 63) 18 19#define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] = CPSR_Q 20 21static float_status neon_float_status; 22#define NFS &neon_float_status 23 24/* Helper routines to perform bitwise copies between float and int. */ 25static inline float32 vfp_itos(uint32_t i) 26{ 27 union { 28 uint32_t i; 29 float32 s; 30 } v; 31 32 v.i = i; 33 return v.s; 34} 35 36static inline uint32_t vfp_stoi(float32 s) 37{ 38 union { 39 uint32_t i; 40 float32 s; 41 } v; 42 43 v.s = s; 44 return v.i; 45} 46 47#define NEON_TYPE1(name, type) \ 48typedef struct \ 49{ \ 50 type v1; \ 51} neon_##name; 52#ifdef HOST_WORDS_BIGENDIAN 53#define NEON_TYPE2(name, type) \ 54typedef struct \ 55{ \ 56 type v2; \ 57 type v1; \ 58} neon_##name; 59#define NEON_TYPE4(name, type) \ 60typedef struct \ 61{ \ 62 type v4; \ 63 type v3; \ 64 type v2; \ 65 type v1; \ 66} neon_##name; 67#else 68#define NEON_TYPE2(name, type) \ 69typedef struct \ 70{ \ 71 type v1; \ 72 type v2; \ 73} neon_##name; 74#define NEON_TYPE4(name, type) \ 75typedef struct \ 76{ \ 77 type v1; \ 78 type v2; \ 79 type v3; \ 80 type v4; \ 81} neon_##name; 82#endif 83 84NEON_TYPE4(s8, int8_t) 85NEON_TYPE4(u8, uint8_t) 86NEON_TYPE2(s16, int16_t) 87NEON_TYPE2(u16, uint16_t) 88NEON_TYPE1(s32, int32_t) 89NEON_TYPE1(u32, uint32_t) 90#undef NEON_TYPE4 91#undef NEON_TYPE2 92#undef NEON_TYPE1 93 94/* Copy from a uint32_t to a vector structure type. */ 95#define NEON_UNPACK(vtype, dest, val) do { \ 96 union { \ 97 vtype v; \ 98 uint32_t i; \ 99 } conv_u; \ 100 conv_u.i = (val); \ 101 dest = conv_u.v; \ 102 } while(0) 103 104/* Copy from a vector structure type to a uint32_t. */ 105#define NEON_PACK(vtype, dest, val) do { \ 106 union { \ 107 vtype v; \ 108 uint32_t i; \ 109 } conv_u; \ 110 conv_u.v = (val); \ 111 dest = conv_u.i; \ 112 } while(0) 113 114#define NEON_DO1 \ 115 NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); 116#define NEON_DO2 \ 117 NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \ 118 NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2); 119#define NEON_DO4 \ 120 NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \ 121 NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2); \ 122 NEON_FN(vdest.v3, vsrc1.v3, vsrc2.v3); \ 123 NEON_FN(vdest.v4, vsrc1.v4, vsrc2.v4); 124 125#define NEON_VOP_BODY(vtype, n) \ 126{ \ 127 uint32_t res; \ 128 vtype vsrc1; \ 129 vtype vsrc2; \ 130 vtype vdest; \ 131 NEON_UNPACK(vtype, vsrc1, arg1); \ 132 NEON_UNPACK(vtype, vsrc2, arg2); \ 133 NEON_DO##n; \ 134 NEON_PACK(vtype, res, vdest); \ 135 return res; \ 136} 137 138#define NEON_VOP(name, vtype, n) \ 139uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \ 140NEON_VOP_BODY(vtype, n) 141 142#define NEON_VOP_ENV(name, vtype, n) \ 143uint32_t HELPER(glue(neon_,name))(CPUState *env, uint32_t arg1, uint32_t arg2) \ 144NEON_VOP_BODY(vtype, n) 145 146/* Pairwise operations. */ 147/* For 32-bit elements each segment only contains a single element, so 148 the elementwise and pairwise operations are the same. */ 149#define NEON_PDO2 \ 150 NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \ 151 NEON_FN(vdest.v2, vsrc2.v1, vsrc2.v2); 152#define NEON_PDO4 \ 153 NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \ 154 NEON_FN(vdest.v2, vsrc1.v3, vsrc1.v4); \ 155 NEON_FN(vdest.v3, vsrc2.v1, vsrc2.v2); \ 156 NEON_FN(vdest.v4, vsrc2.v3, vsrc2.v4); \ 157 158#define NEON_POP(name, vtype, n) \ 159uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \ 160{ \ 161 uint32_t res; \ 162 vtype vsrc1; \ 163 vtype vsrc2; \ 164 vtype vdest; \ 165 NEON_UNPACK(vtype, vsrc1, arg1); \ 166 NEON_UNPACK(vtype, vsrc2, arg2); \ 167 NEON_PDO##n; \ 168 NEON_PACK(vtype, res, vdest); \ 169 return res; \ 170} 171 172/* Unary operators. */ 173#define NEON_VOP1(name, vtype, n) \ 174uint32_t HELPER(glue(neon_,name))(uint32_t arg) \ 175{ \ 176 vtype vsrc1; \ 177 vtype vdest; \ 178 NEON_UNPACK(vtype, vsrc1, arg); \ 179 NEON_DO##n; \ 180 NEON_PACK(vtype, arg, vdest); \ 181 return arg; \ 182} 183 184 185#define NEON_USAT(dest, src1, src2, type) do { \ 186 uint32_t tmp = (uint32_t)src1 + (uint32_t)src2; \ 187 if (tmp != (type)tmp) { \ 188 SET_QC(); \ 189 dest = ~0; \ 190 } else { \ 191 dest = tmp; \ 192 }} while(0) 193#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t) 194NEON_VOP_ENV(qadd_u8, neon_u8, 4) 195#undef NEON_FN 196#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t) 197NEON_VOP_ENV(qadd_u16, neon_u16, 2) 198#undef NEON_FN 199#undef NEON_USAT 200 201#define NEON_SSAT(dest, src1, src2, type) do { \ 202 int32_t tmp = (uint32_t)src1 + (uint32_t)src2; \ 203 if (tmp != (type)tmp) { \ 204 SET_QC(); \ 205 if (src2 > 0) { \ 206 tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \ 207 } else { \ 208 tmp = 1 << (sizeof(type) * 8 - 1); \ 209 } \ 210 } \ 211 dest = tmp; \ 212 } while(0) 213#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t) 214NEON_VOP_ENV(qadd_s8, neon_s8, 4) 215#undef NEON_FN 216#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t) 217NEON_VOP_ENV(qadd_s16, neon_s16, 2) 218#undef NEON_FN 219#undef NEON_SSAT 220 221#define NEON_USAT(dest, src1, src2, type) do { \ 222 uint32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ 223 if (tmp != (type)tmp) { \ 224 SET_QC(); \ 225 dest = 0; \ 226 } else { \ 227 dest = tmp; \ 228 }} while(0) 229#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t) 230NEON_VOP_ENV(qsub_u8, neon_u8, 4) 231#undef NEON_FN 232#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t) 233NEON_VOP_ENV(qsub_u16, neon_u16, 2) 234#undef NEON_FN 235#undef NEON_USAT 236 237#define NEON_SSAT(dest, src1, src2, type) do { \ 238 int32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ 239 if (tmp != (type)tmp) { \ 240 SET_QC(); \ 241 if (src2 < 0) { \ 242 tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \ 243 } else { \ 244 tmp = 1 << (sizeof(type) * 8 - 1); \ 245 } \ 246 } \ 247 dest = tmp; \ 248 } while(0) 249#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t) 250NEON_VOP_ENV(qsub_s8, neon_s8, 4) 251#undef NEON_FN 252#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t) 253NEON_VOP_ENV(qsub_s16, neon_s16, 2) 254#undef NEON_FN 255#undef NEON_SSAT 256 257#define NEON_FN(dest, src1, src2) dest = (src1 + src2) >> 1 258NEON_VOP(hadd_s8, neon_s8, 4) 259NEON_VOP(hadd_u8, neon_u8, 4) 260NEON_VOP(hadd_s16, neon_s16, 2) 261NEON_VOP(hadd_u16, neon_u16, 2) 262#undef NEON_FN 263 264int32_t HELPER(neon_hadd_s32)(int32_t src1, int32_t src2) 265{ 266 int32_t dest; 267 268 dest = (src1 >> 1) + (src2 >> 1); 269 if (src1 & src2 & 1) 270 dest++; 271 return dest; 272} 273 274uint32_t HELPER(neon_hadd_u32)(uint32_t src1, uint32_t src2) 275{ 276 uint32_t dest; 277 278 dest = (src1 >> 1) + (src2 >> 1); 279 if (src1 & src2 & 1) 280 dest++; 281 return dest; 282} 283 284#define NEON_FN(dest, src1, src2) dest = (src1 + src2 + 1) >> 1 285NEON_VOP(rhadd_s8, neon_s8, 4) 286NEON_VOP(rhadd_u8, neon_u8, 4) 287NEON_VOP(rhadd_s16, neon_s16, 2) 288NEON_VOP(rhadd_u16, neon_u16, 2) 289#undef NEON_FN 290 291int32_t HELPER(neon_rhadd_s32)(int32_t src1, int32_t src2) 292{ 293 int32_t dest; 294 295 dest = (src1 >> 1) + (src2 >> 1); 296 if ((src1 | src2) & 1) 297 dest++; 298 return dest; 299} 300 301uint32_t HELPER(neon_rhadd_u32)(uint32_t src1, uint32_t src2) 302{ 303 uint32_t dest; 304 305 dest = (src1 >> 1) + (src2 >> 1); 306 if ((src1 | src2) & 1) 307 dest++; 308 return dest; 309} 310 311#define NEON_FN(dest, src1, src2) dest = (src1 - src2) >> 1 312NEON_VOP(hsub_s8, neon_s8, 4) 313NEON_VOP(hsub_u8, neon_u8, 4) 314NEON_VOP(hsub_s16, neon_s16, 2) 315NEON_VOP(hsub_u16, neon_u16, 2) 316#undef NEON_FN 317 318int32_t HELPER(neon_hsub_s32)(int32_t src1, int32_t src2) 319{ 320 int32_t dest; 321 322 dest = (src1 >> 1) - (src2 >> 1); 323 if ((~src1) & src2 & 1) 324 dest--; 325 return dest; 326} 327 328uint32_t HELPER(neon_hsub_u32)(uint32_t src1, uint32_t src2) 329{ 330 uint32_t dest; 331 332 dest = (src1 >> 1) - (src2 >> 1); 333 if ((~src1) & src2 & 1) 334 dest--; 335 return dest; 336} 337 338#define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? ~0 : 0 339NEON_VOP(cgt_s8, neon_s8, 4) 340NEON_VOP(cgt_u8, neon_u8, 4) 341NEON_VOP(cgt_s16, neon_s16, 2) 342NEON_VOP(cgt_u16, neon_u16, 2) 343NEON_VOP(cgt_s32, neon_s32, 1) 344NEON_VOP(cgt_u32, neon_u32, 1) 345#undef NEON_FN 346 347#define NEON_FN(dest, src1, src2) dest = (src1 >= src2) ? ~0 : 0 348NEON_VOP(cge_s8, neon_s8, 4) 349NEON_VOP(cge_u8, neon_u8, 4) 350NEON_VOP(cge_s16, neon_s16, 2) 351NEON_VOP(cge_u16, neon_u16, 2) 352NEON_VOP(cge_s32, neon_s32, 1) 353NEON_VOP(cge_u32, neon_u32, 1) 354#undef NEON_FN 355 356#define NEON_FN(dest, src1, src2) dest = (src1 < src2) ? src1 : src2 357NEON_VOP(min_s8, neon_s8, 4) 358NEON_VOP(min_u8, neon_u8, 4) 359NEON_VOP(min_s16, neon_s16, 2) 360NEON_VOP(min_u16, neon_u16, 2) 361NEON_VOP(min_s32, neon_s32, 1) 362NEON_VOP(min_u32, neon_u32, 1) 363NEON_POP(pmin_s8, neon_s8, 4) 364NEON_POP(pmin_u8, neon_u8, 4) 365NEON_POP(pmin_s16, neon_s16, 2) 366NEON_POP(pmin_u16, neon_u16, 2) 367#undef NEON_FN 368 369#define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? src1 : src2 370NEON_VOP(max_s8, neon_s8, 4) 371NEON_VOP(max_u8, neon_u8, 4) 372NEON_VOP(max_s16, neon_s16, 2) 373NEON_VOP(max_u16, neon_u16, 2) 374NEON_VOP(max_s32, neon_s32, 1) 375NEON_VOP(max_u32, neon_u32, 1) 376NEON_POP(pmax_s8, neon_s8, 4) 377NEON_POP(pmax_u8, neon_u8, 4) 378NEON_POP(pmax_s16, neon_s16, 2) 379NEON_POP(pmax_u16, neon_u16, 2) 380#undef NEON_FN 381 382#define NEON_FN(dest, src1, src2) \ 383 dest = (src1 > src2) ? (src1 - src2) : (src2 - src1) 384NEON_VOP(abd_s8, neon_s8, 4) 385NEON_VOP(abd_u8, neon_u8, 4) 386NEON_VOP(abd_s16, neon_s16, 2) 387NEON_VOP(abd_u16, neon_u16, 2) 388NEON_VOP(abd_s32, neon_s32, 1) 389NEON_VOP(abd_u32, neon_u32, 1) 390#undef NEON_FN 391 392#define NEON_FN(dest, src1, src2) do { \ 393 int8_t tmp; \ 394 tmp = (int8_t)src2; \ 395 if (tmp >= sizeof(src1) * 8 || tmp <= -sizeof(src1) * 8) { \ 396 dest = 0; \ 397 } else if (tmp < 0) { \ 398 dest = src1 >> -tmp; \ 399 } else { \ 400 dest = src1 << tmp; \ 401 }} while (0) 402NEON_VOP(shl_u8, neon_u8, 4) 403NEON_VOP(shl_u16, neon_u16, 2) 404NEON_VOP(shl_u32, neon_u32, 1) 405#undef NEON_FN 406 407uint64_t HELPER(neon_shl_u64)(uint64_t val, uint64_t shiftop) 408{ 409 int8_t shift = (int8_t)shiftop; 410 if (shift >= 64 || shift <= -64) { 411 val = 0; 412 } else if (shift < 0) { 413 val >>= -shift; 414 } else { 415 val <<= shift; 416 } 417 return val; 418} 419 420#define NEON_FN(dest, src1, src2) do { \ 421 int8_t tmp; \ 422 tmp = (int8_t)src2; \ 423 if (tmp >= sizeof(src1) * 8) { \ 424 dest = 0; \ 425 } else if (tmp <= -sizeof(src1) * 8) { \ 426 dest = src1 >> (sizeof(src1) * 8 - 1); \ 427 } else if (tmp < 0) { \ 428 dest = src1 >> -tmp; \ 429 } else { \ 430 dest = src1 << tmp; \ 431 }} while (0) 432NEON_VOP(shl_s8, neon_s8, 4) 433NEON_VOP(shl_s16, neon_s16, 2) 434NEON_VOP(shl_s32, neon_s32, 1) 435#undef NEON_FN 436 437uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop) 438{ 439 int8_t shift = (int8_t)shiftop; 440 int64_t val = valop; 441 if (shift >= 64) { 442 val = 0; 443 } else if (shift <= -64) { 444 val >>= 63; 445 } else if (shift < 0) { 446 val >>= -shift; 447 } else { 448 val <<= shift; 449 } 450 return val; 451} 452 453#define NEON_FN(dest, src1, src2) do { \ 454 int8_t tmp; \ 455 tmp = (int8_t)src2; \ 456 if (tmp >= sizeof(src1) * 8) { \ 457 dest = 0; \ 458 } else if (tmp < -sizeof(src1) * 8) { \ 459 dest = src1 >> (sizeof(src1) * 8 - 1); \ 460 } else if (tmp == -sizeof(src1) * 8) { \ 461 dest = src1 >> (tmp - 1); \ 462 dest++; \ 463 dest >>= 1; \ 464 } else if (tmp < 0) { \ 465 dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ 466 } else { \ 467 dest = src1 << tmp; \ 468 }} while (0) 469NEON_VOP(rshl_s8, neon_s8, 4) 470NEON_VOP(rshl_s16, neon_s16, 2) 471NEON_VOP(rshl_s32, neon_s32, 1) 472#undef NEON_FN 473 474uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop) 475{ 476 int8_t shift = (int8_t)shiftop; 477 int64_t val = valop; 478 if (shift >= 64) { 479 val = 0; 480 } else if (shift < -64) { 481 val >>= 63; 482 } else if (shift == -63) { 483 val >>= 63; 484 val++; 485 val >>= 1; 486 } else if (shift < 0) { 487 val = (val + ((int64_t)1 << (-1 - shift))) >> -shift; 488 } else { 489 val <<= shift; 490 } 491 return val; 492} 493 494#define NEON_FN(dest, src1, src2) do { \ 495 int8_t tmp; \ 496 tmp = (int8_t)src2; \ 497 if (tmp >= sizeof(src1) * 8 || tmp < -sizeof(src1) * 8) { \ 498 dest = 0; \ 499 } else if (tmp == -sizeof(src1) * 8) { \ 500 dest = src1 >> (tmp - 1); \ 501 } else if (tmp < 0) { \ 502 dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ 503 } else { \ 504 dest = src1 << tmp; \ 505 }} while (0) 506NEON_VOP(rshl_u8, neon_u8, 4) 507NEON_VOP(rshl_u16, neon_u16, 2) 508NEON_VOP(rshl_u32, neon_u32, 1) 509#undef NEON_FN 510 511uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop) 512{ 513 int8_t shift = (uint8_t)shiftop; 514 if (shift >= 64 || shift < 64) { 515 val = 0; 516 } else if (shift == -64) { 517 /* Rounding a 1-bit result just preserves that bit. */ 518 val >>= 63; 519 } if (shift < 0) { 520 val = (val + ((uint64_t)1 << (-1 - shift))) >> -shift; 521 val >>= -shift; 522 } else { 523 val <<= shift; 524 } 525 return val; 526} 527 528#define NEON_FN(dest, src1, src2) do { \ 529 int8_t tmp; \ 530 tmp = (int8_t)src2; \ 531 if (tmp >= sizeof(src1) * 8) { \ 532 if (src1) { \ 533 SET_QC(); \ 534 dest = ~0; \ 535 } else { \ 536 dest = 0; \ 537 } \ 538 } else if (tmp <= -sizeof(src1) * 8) { \ 539 dest = 0; \ 540 } else if (tmp < 0) { \ 541 dest = src1 >> -tmp; \ 542 } else { \ 543 dest = src1 << tmp; \ 544 if ((dest >> tmp) != src1) { \ 545 SET_QC(); \ 546 dest = ~0; \ 547 } \ 548 }} while (0) 549NEON_VOP_ENV(qshl_u8, neon_u8, 4) 550NEON_VOP_ENV(qshl_u16, neon_u16, 2) 551NEON_VOP_ENV(qshl_u32, neon_u32, 1) 552#undef NEON_FN 553 554uint64_t HELPER(neon_qshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop) 555{ 556 int8_t shift = (int8_t)shiftop; 557 if (shift >= 64) { 558 if (val) { 559 val = ~(uint64_t)0; 560 SET_QC(); 561 } else { 562 val = 0; 563 } 564 } else if (shift <= -64) { 565 val = 0; 566 } else if (shift < 0) { 567 val >>= -shift; 568 } else { 569 uint64_t tmp = val; 570 val <<= shift; 571 if ((val >> shift) != tmp) { 572 SET_QC(); 573 val = ~(uint64_t)0; 574 } 575 } 576 return val; 577} 578 579#define NEON_FN(dest, src1, src2) do { \ 580 int8_t tmp; \ 581 tmp = (int8_t)src2; \ 582 if (tmp >= sizeof(src1) * 8) { \ 583 if (src1) \ 584 SET_QC(); \ 585 dest = src1 >> 31; \ 586 } else if (tmp <= -sizeof(src1) * 8) { \ 587 dest = src1 >> 31; \ 588 } else if (tmp < 0) { \ 589 dest = src1 >> -tmp; \ 590 } else { \ 591 dest = src1 << tmp; \ 592 if ((dest >> tmp) != src1) { \ 593 SET_QC(); \ 594 dest = src2 >> 31; \ 595 } \ 596 }} while (0) 597NEON_VOP_ENV(qshl_s8, neon_s8, 4) 598NEON_VOP_ENV(qshl_s16, neon_s16, 2) 599NEON_VOP_ENV(qshl_s32, neon_s32, 1) 600#undef NEON_FN 601 602uint64_t HELPER(neon_qshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop) 603{ 604 int8_t shift = (uint8_t)shiftop; 605 int64_t val = valop; 606 if (shift >= 64) { 607 if (val) { 608 SET_QC(); 609 val = (val >> 63) & ~SIGNBIT64; 610 } 611 } else if (shift <= 64) { 612 val >>= 63; 613 } else if (shift < 0) { 614 val >>= -shift; 615 } else { 616 int64_t tmp = val; 617 val <<= shift; 618 if ((val >> shift) != tmp) { 619 SET_QC(); 620 val = (tmp >> 63) ^ ~SIGNBIT64; 621 } 622 } 623 return val; 624} 625 626 627/* FIXME: This is wrong. */ 628#define NEON_FN(dest, src1, src2) do { \ 629 int8_t tmp; \ 630 tmp = (int8_t)src2; \ 631 if (tmp < 0) { \ 632 dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ 633 } else { \ 634 dest = src1 << tmp; \ 635 if ((dest >> tmp) != src1) { \ 636 SET_QC(); \ 637 dest = ~0; \ 638 } \ 639 }} while (0) 640NEON_VOP_ENV(qrshl_u8, neon_u8, 4) 641NEON_VOP_ENV(qrshl_u16, neon_u16, 2) 642NEON_VOP_ENV(qrshl_u32, neon_u32, 1) 643#undef NEON_FN 644 645uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop) 646{ 647 int8_t shift = (int8_t)shiftop; 648 if (shift < 0) { 649 val = (val + (1 << (-1 - shift))) >> -shift; 650 } else { \ 651 uint64_t tmp = val; 652 val <<= shift; 653 if ((val >> shift) != tmp) { 654 SET_QC(); 655 val = ~0; 656 } 657 } 658 return val; 659} 660 661#define NEON_FN(dest, src1, src2) do { \ 662 int8_t tmp; \ 663 tmp = (int8_t)src2; \ 664 if (tmp < 0) { \ 665 dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ 666 } else { \ 667 dest = src1 << tmp; \ 668 if ((dest >> tmp) != src1) { \ 669 SET_QC(); \ 670 dest = src1 >> 31; \ 671 } \ 672 }} while (0) 673NEON_VOP_ENV(qrshl_s8, neon_s8, 4) 674NEON_VOP_ENV(qrshl_s16, neon_s16, 2) 675NEON_VOP_ENV(qrshl_s32, neon_s32, 1) 676#undef NEON_FN 677 678uint64_t HELPER(neon_qrshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop) 679{ 680 int8_t shift = (uint8_t)shiftop; 681 int64_t val = valop; 682 683 if (shift < 0) { 684 val = (val + (1 << (-1 - shift))) >> -shift; 685 } else { 686 int64_t tmp = val;; 687 val <<= shift; 688 if ((val >> shift) != tmp) { 689 SET_QC(); 690 val = tmp >> 31; 691 } 692 } 693 return val; 694} 695 696uint32_t HELPER(neon_add_u8)(uint32_t a, uint32_t b) 697{ 698 uint32_t mask; 699 mask = (a ^ b) & 0x80808080u; 700 a &= ~0x80808080u; 701 b &= ~0x80808080u; 702 return (a + b) ^ mask; 703} 704 705uint32_t HELPER(neon_add_u16)(uint32_t a, uint32_t b) 706{ 707 uint32_t mask; 708 mask = (a ^ b) & 0x80008000u; 709 a &= ~0x80008000u; 710 b &= ~0x80008000u; 711 return (a + b) ^ mask; 712} 713 714#define NEON_FN(dest, src1, src2) dest = src1 + src2 715NEON_POP(padd_u8, neon_u8, 4) 716NEON_POP(padd_u16, neon_u16, 2) 717#undef NEON_FN 718 719#define NEON_FN(dest, src1, src2) dest = src1 - src2 720NEON_VOP(sub_u8, neon_u8, 4) 721NEON_VOP(sub_u16, neon_u16, 2) 722#undef NEON_FN 723 724#define NEON_FN(dest, src1, src2) dest = src1 * src2 725NEON_VOP(mul_u8, neon_u8, 4) 726NEON_VOP(mul_u16, neon_u16, 2) 727#undef NEON_FN 728 729/* Polynomial multiplication is like integer multiplication except the 730 partial products are XORed, not added. */ 731uint32_t HELPER(neon_mul_p8)(uint32_t op1, uint32_t op2) 732{ 733 uint32_t mask; 734 uint32_t result; 735 result = 0; 736 while (op1) { 737 mask = 0; 738 if (op1 & 1) 739 mask |= 0xff; 740 if (op1 & (1 << 8)) 741 mask |= (0xff << 8); 742 if (op1 & (1 << 16)) 743 mask |= (0xff << 16); 744 if (op1 & (1 << 24)) 745 mask |= (0xff << 24); 746 result ^= op2 & mask; 747 op1 = (op1 >> 1) & 0x7f7f7f7f; 748 op2 = (op2 << 1) & 0xfefefefe; 749 } 750 return result; 751} 752 753#define NEON_FN(dest, src1, src2) dest = (src1 & src2) ? -1 : 0 754NEON_VOP(tst_u8, neon_u8, 4) 755NEON_VOP(tst_u16, neon_u16, 2) 756NEON_VOP(tst_u32, neon_u32, 1) 757#undef NEON_FN 758 759#define NEON_FN(dest, src1, src2) dest = (src1 == src2) ? -1 : 0 760NEON_VOP(ceq_u8, neon_u8, 4) 761NEON_VOP(ceq_u16, neon_u16, 2) 762NEON_VOP(ceq_u32, neon_u32, 1) 763#undef NEON_FN 764 765#define NEON_FN(dest, src, dummy) dest = (src < 0) ? -src : src 766NEON_VOP1(abs_s8, neon_s8, 4) 767NEON_VOP1(abs_s16, neon_s16, 2) 768#undef NEON_FN 769 770/* Count Leading Sign/Zero Bits. */ 771static inline int do_clz8(uint8_t x) 772{ 773 int n; 774 for (n = 8; x; n--) 775 x >>= 1; 776 return n; 777} 778 779static inline int do_clz16(uint16_t x) 780{ 781 int n; 782 for (n = 16; x; n--) 783 x >>= 1; 784 return n; 785} 786 787#define NEON_FN(dest, src, dummy) dest = do_clz8(src) 788NEON_VOP1(clz_u8, neon_u8, 4) 789#undef NEON_FN 790 791#define NEON_FN(dest, src, dummy) dest = do_clz16(src) 792NEON_VOP1(clz_u16, neon_u16, 2) 793#undef NEON_FN 794 795#define NEON_FN(dest, src, dummy) dest = do_clz8((src < 0) ? ~src : src) - 1 796NEON_VOP1(cls_s8, neon_s8, 4) 797#undef NEON_FN 798 799#define NEON_FN(dest, src, dummy) dest = do_clz16((src < 0) ? ~src : src) - 1 800NEON_VOP1(cls_s16, neon_s16, 2) 801#undef NEON_FN 802 803uint32_t HELPER(neon_cls_s32)(uint32_t x) 804{ 805 int count; 806 if ((int32_t)x < 0) 807 x = ~x; 808 for (count = 32; x; count--) 809 x = x >> 1; 810 return count - 1; 811} 812 813/* Bit count. */ 814uint32_t HELPER(neon_cnt_u8)(uint32_t x) 815{ 816 x = (x & 0x55555555) + ((x >> 1) & 0x55555555); 817 x = (x & 0x33333333) + ((x >> 2) & 0x33333333); 818 x = (x & 0x0f0f0f0f) + ((x >> 4) & 0x0f0f0f0f); 819 return x; 820} 821 822#define NEON_QDMULH16(dest, src1, src2, round) do { \ 823 uint32_t tmp = (int32_t)(int16_t) src1 * (int16_t) src2; \ 824 if ((tmp ^ (tmp << 1)) & SIGNBIT) { \ 825 SET_QC(); \ 826 tmp = (tmp >> 31) ^ ~SIGNBIT; \ 827 } \ 828 tmp <<= 1; \ 829 if (round) { \ 830 int32_t old = tmp; \ 831 tmp += 1 << 15; \ 832 if ((int32_t)tmp < old) { \ 833 SET_QC(); \ 834 tmp = SIGNBIT - 1; \ 835 } \ 836 } \ 837 dest = tmp >> 16; \ 838 } while(0) 839#define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 0) 840NEON_VOP_ENV(qdmulh_s16, neon_s16, 2) 841#undef NEON_FN 842#define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 1) 843NEON_VOP_ENV(qrdmulh_s16, neon_s16, 2) 844#undef NEON_FN 845#undef NEON_QDMULH16 846 847#define NEON_QDMULH32(dest, src1, src2, round) do { \ 848 uint64_t tmp = (int64_t)(int32_t) src1 * (int32_t) src2; \ 849 if ((tmp ^ (tmp << 1)) & SIGNBIT64) { \ 850 SET_QC(); \ 851 tmp = (tmp >> 63) ^ ~SIGNBIT64; \ 852 } else { \ 853 tmp <<= 1; \ 854 } \ 855 if (round) { \ 856 int64_t old = tmp; \ 857 tmp += (int64_t)1 << 31; \ 858 if ((int64_t)tmp < old) { \ 859 SET_QC(); \ 860 tmp = SIGNBIT64 - 1; \ 861 } \ 862 } \ 863 dest = tmp >> 32; \ 864 } while(0) 865#define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 0) 866NEON_VOP_ENV(qdmulh_s32, neon_s32, 1) 867#undef NEON_FN 868#define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 1) 869NEON_VOP_ENV(qrdmulh_s32, neon_s32, 1) 870#undef NEON_FN 871#undef NEON_QDMULH32 872 873uint32_t HELPER(neon_narrow_u8)(uint64_t x) 874{ 875 return (x & 0xffu) | ((x >> 8) & 0xff00u) | ((x >> 16) & 0xff0000u) 876 | ((x >> 24) & 0xff000000u); 877} 878 879uint32_t HELPER(neon_narrow_u16)(uint64_t x) 880{ 881 return (x & 0xffffu) | ((x >> 16) & 0xffff0000u); 882} 883 884uint32_t HELPER(neon_narrow_high_u8)(uint64_t x) 885{ 886 return ((x >> 8) & 0xff) | ((x >> 16) & 0xff00) 887 | ((x >> 24) & 0xff0000) | ((x >> 32) & 0xff000000); 888} 889 890uint32_t HELPER(neon_narrow_high_u16)(uint64_t x) 891{ 892 return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000); 893} 894 895uint32_t HELPER(neon_narrow_round_high_u8)(uint64_t x) 896{ 897 x &= 0xff80ff80ff80ff80ull; 898 x += 0x0080008000800080ull; 899 return ((x >> 8) & 0xff) | ((x >> 16) & 0xff00) 900 | ((x >> 24) & 0xff0000) | ((x >> 32) & 0xff000000); 901} 902 903uint32_t HELPER(neon_narrow_round_high_u16)(uint64_t x) 904{ 905 x &= 0xffff8000ffff8000ull; 906 x += 0x0000800000008000ull; 907 return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000); 908} 909 910uint32_t HELPER(neon_narrow_sat_u8)(CPUState *env, uint64_t x) 911{ 912 uint16_t s; 913 uint8_t d; 914 uint32_t res = 0; 915#define SAT8(n) \ 916 s = x >> n; \ 917 if (s > 0xff) { \ 918 d = 0xff; \ 919 SET_QC(); \ 920 } else { \ 921 d = s; \ 922 } \ 923 res |= (uint32_t)d << (n / 2); 924 925 SAT8(0); 926 SAT8(16); 927 SAT8(32); 928 SAT8(48); 929#undef SAT8 930 return res; 931} 932 933uint32_t HELPER(neon_narrow_sat_s8)(CPUState *env, uint64_t x) 934{ 935 int16_t s; 936 uint8_t d; 937 uint32_t res = 0; 938#define SAT8(n) \ 939 s = x >> n; \ 940 if (s != (int8_t)s) { \ 941 d = (s >> 15) ^ 0x7f; \ 942 SET_QC(); \ 943 } else { \ 944 d = s; \ 945 } \ 946 res |= (uint32_t)d << (n / 2); 947 948 SAT8(0); 949 SAT8(16); 950 SAT8(32); 951 SAT8(48); 952#undef SAT8 953 return res; 954} 955 956uint32_t HELPER(neon_narrow_sat_u16)(CPUState *env, uint64_t x) 957{ 958 uint32_t high; 959 uint32_t low; 960 low = x; 961 if (low > 0xffff) { 962 low = 0xffff; 963 SET_QC(); 964 } 965 high = x >> 32; 966 if (high > 0xffff) { 967 high = 0xffff; 968 SET_QC(); 969 } 970 return low | (high << 16); 971} 972 973uint32_t HELPER(neon_narrow_sat_s16)(CPUState *env, uint64_t x) 974{ 975 int32_t low; 976 int32_t high; 977 low = x; 978 if (low != (int16_t)low) { 979 low = (low >> 31) ^ 0x7fff; 980 SET_QC(); 981 } 982 high = x >> 32; 983 if (high != (int16_t)high) { 984 high = (high >> 31) ^ 0x7fff; 985 SET_QC(); 986 } 987 return (uint16_t)low | (high << 16); 988} 989 990uint32_t HELPER(neon_narrow_sat_u32)(CPUState *env, uint64_t x) 991{ 992 if (x > 0xffffffffu) { 993 SET_QC(); 994 return 0xffffffffu; 995 } 996 return x; 997} 998 999uint32_t HELPER(neon_narrow_sat_s32)(CPUState *env, uint64_t x) 1000{ 1001 if ((int64_t)x != (int32_t)x) { 1002 SET_QC(); 1003 return (x >> 63) ^ 0x7fffffff; 1004 } 1005 return x; 1006} 1007 1008uint64_t HELPER(neon_widen_u8)(uint32_t x) 1009{ 1010 uint64_t tmp; 1011 uint64_t ret; 1012 ret = (uint8_t)x; 1013 tmp = (uint8_t)(x >> 8); 1014 ret |= tmp << 16; 1015 tmp = (uint8_t)(x >> 16); 1016 ret |= tmp << 32; 1017 tmp = (uint8_t)(x >> 24); 1018 ret |= tmp << 48; 1019 return ret; 1020} 1021 1022uint64_t HELPER(neon_widen_s8)(uint32_t x) 1023{ 1024 uint64_t tmp; 1025 uint64_t ret; 1026 ret = (uint16_t)(int8_t)x; 1027 tmp = (uint16_t)(int8_t)(x >> 8); 1028 ret |= tmp << 16; 1029 tmp = (uint16_t)(int8_t)(x >> 16); 1030 ret |= tmp << 32; 1031 tmp = (uint16_t)(int8_t)(x >> 24); 1032 ret |= tmp << 48; 1033 return ret; 1034} 1035 1036uint64_t HELPER(neon_widen_u16)(uint32_t x) 1037{ 1038 uint64_t high = (uint16_t)(x >> 16); 1039 return ((uint16_t)x) | (high << 32); 1040} 1041 1042uint64_t HELPER(neon_widen_s16)(uint32_t x) 1043{ 1044 uint64_t high = (int16_t)(x >> 16); 1045 return ((uint32_t)(int16_t)x) | (high << 32); 1046} 1047 1048uint64_t HELPER(neon_addl_u16)(uint64_t a, uint64_t b) 1049{ 1050 uint64_t mask; 1051 mask = (a ^ b) & 0x8000800080008000ull; 1052 a &= ~0x8000800080008000ull; 1053 b &= ~0x8000800080008000ull; 1054 return (a + b) ^ mask; 1055} 1056 1057uint64_t HELPER(neon_addl_u32)(uint64_t a, uint64_t b) 1058{ 1059 uint64_t mask; 1060 mask = (a ^ b) & 0x8000000080000000ull; 1061 a &= ~0x8000000080000000ull; 1062 b &= ~0x8000000080000000ull; 1063 return (a + b) ^ mask; 1064} 1065 1066uint64_t HELPER(neon_paddl_u16)(uint64_t a, uint64_t b) 1067{ 1068 uint64_t tmp; 1069 uint64_t tmp2; 1070 1071 tmp = a & 0x0000ffff0000ffffull; 1072 tmp += (a >> 16) & 0x0000ffff0000ffffull; 1073 tmp2 = b & 0xffff0000ffff0000ull; 1074 tmp2 += (b << 16) & 0xffff0000ffff0000ull; 1075 return ( tmp & 0xffff) 1076 | ((tmp >> 16) & 0xffff0000ull) 1077 | ((tmp2 << 16) & 0xffff00000000ull) 1078 | ( tmp2 & 0xffff000000000000ull); 1079} 1080 1081uint64_t HELPER(neon_paddl_u32)(uint64_t a, uint64_t b) 1082{ 1083 uint32_t low = a + (a >> 32); 1084 uint32_t high = b + (b >> 32); 1085 return low + ((uint64_t)high << 32); 1086} 1087 1088uint64_t HELPER(neon_subl_u16)(uint64_t a, uint64_t b) 1089{ 1090 uint64_t mask; 1091 mask = (a ^ ~b) & 0x8000800080008000ull; 1092 a |= 0x8000800080008000ull; 1093 b &= ~0x8000800080008000ull; 1094 return (a - b) ^ mask; 1095} 1096 1097uint64_t HELPER(neon_subl_u32)(uint64_t a, uint64_t b) 1098{ 1099 uint64_t mask; 1100 mask = (a ^ ~b) & 0x8000000080000000ull; 1101 a |= 0x8000000080000000ull; 1102 b &= ~0x8000000080000000ull; 1103 return (a - b) ^ mask; 1104} 1105 1106uint64_t HELPER(neon_addl_saturate_s32)(CPUState *env, uint64_t a, uint64_t b) 1107{ 1108 uint32_t x, y; 1109 uint32_t low, high; 1110 1111 x = a; 1112 y = b; 1113 low = x + y; 1114 if (((low ^ x) & SIGNBIT) && !((x ^ y) & SIGNBIT)) { 1115 SET_QC(); 1116 low = ((int32_t)x >> 31) ^ ~SIGNBIT; 1117 } 1118 x = a >> 32; 1119 y = b >> 32; 1120 high = x + y; 1121 if (((high ^ x) & SIGNBIT) && !((x ^ y) & SIGNBIT)) { 1122 SET_QC(); 1123 high = ((int32_t)x >> 31) ^ ~SIGNBIT; 1124 } 1125 return low | ((uint64_t)high << 32); 1126} 1127 1128uint64_t HELPER(neon_addl_saturate_s64)(CPUState *env, uint64_t a, uint64_t b) 1129{ 1130 uint64_t result; 1131 1132 result = a + b; 1133 if (((result ^ a) & SIGNBIT64) && !((a ^ b) & SIGNBIT64)) { 1134 SET_QC(); 1135 result = ((int64_t)a >> 63) ^ ~SIGNBIT64; 1136 } 1137 return result; 1138} 1139 1140#define DO_ABD(dest, x, y, type) do { \ 1141 type tmp_x = x; \ 1142 type tmp_y = y; \ 1143 dest = ((tmp_x > tmp_y) ? tmp_x - tmp_y : tmp_y - tmp_x); \ 1144 } while(0) 1145 1146uint64_t HELPER(neon_abdl_u16)(uint32_t a, uint32_t b) 1147{ 1148 uint64_t tmp; 1149 uint64_t result; 1150 DO_ABD(result, a, b, uint8_t); 1151 DO_ABD(tmp, a >> 8, b >> 8, uint8_t); 1152 result |= tmp << 16; 1153 DO_ABD(tmp, a >> 16, b >> 16, uint8_t); 1154 result |= tmp << 32; 1155 DO_ABD(tmp, a >> 24, b >> 24, uint8_t); 1156 result |= tmp << 48; 1157 return result; 1158} 1159 1160uint64_t HELPER(neon_abdl_s16)(uint32_t a, uint32_t b) 1161{ 1162 uint64_t tmp; 1163 uint64_t result; 1164 DO_ABD(result, a, b, int8_t); 1165 DO_ABD(tmp, a >> 8, b >> 8, int8_t); 1166 result |= tmp << 16; 1167 DO_ABD(tmp, a >> 16, b >> 16, int8_t); 1168 result |= tmp << 32; 1169 DO_ABD(tmp, a >> 24, b >> 24, int8_t); 1170 result |= tmp << 48; 1171 return result; 1172} 1173 1174uint64_t HELPER(neon_abdl_u32)(uint32_t a, uint32_t b) 1175{ 1176 uint64_t tmp; 1177 uint64_t result; 1178 DO_ABD(result, a, b, uint16_t); 1179 DO_ABD(tmp, a >> 16, b >> 16, uint16_t); 1180 return result | (tmp << 32); 1181} 1182 1183uint64_t HELPER(neon_abdl_s32)(uint32_t a, uint32_t b) 1184{ 1185 uint64_t tmp; 1186 uint64_t result; 1187 DO_ABD(result, a, b, int16_t); 1188 DO_ABD(tmp, a >> 16, b >> 16, int16_t); 1189 return result | (tmp << 32); 1190} 1191 1192uint64_t HELPER(neon_abdl_u64)(uint32_t a, uint32_t b) 1193{ 1194 uint64_t result; 1195 DO_ABD(result, a, b, uint32_t); 1196 return result; 1197} 1198 1199uint64_t HELPER(neon_abdl_s64)(uint32_t a, uint32_t b) 1200{ 1201 uint64_t result; 1202 DO_ABD(result, a, b, int32_t); 1203 return result; 1204} 1205#undef DO_ABD 1206 1207/* Widening multiply. Named type is the source type. */ 1208#define DO_MULL(dest, x, y, type1, type2) do { \ 1209 type1 tmp_x = x; \ 1210 type1 tmp_y = y; \ 1211 dest = (type2)((type2)tmp_x * (type2)tmp_y); \ 1212 } while(0) 1213 1214uint64_t HELPER(neon_mull_u8)(uint32_t a, uint32_t b) 1215{ 1216 uint64_t tmp; 1217 uint64_t result; 1218 1219 DO_MULL(result, a, b, uint8_t, uint16_t); 1220 DO_MULL(tmp, a >> 8, b >> 8, uint8_t, uint16_t); 1221 result |= tmp << 16; 1222 DO_MULL(tmp, a >> 16, b >> 16, uint8_t, uint16_t); 1223 result |= tmp << 32; 1224 DO_MULL(tmp, a >> 24, b >> 24, uint8_t, uint16_t); 1225 result |= tmp << 48; 1226 return result; 1227} 1228 1229uint64_t HELPER(neon_mull_s8)(uint32_t a, uint32_t b) 1230{ 1231 uint64_t tmp; 1232 uint64_t result; 1233 1234 DO_MULL(result, a, b, int8_t, uint16_t); 1235 DO_MULL(tmp, a >> 8, b >> 8, int8_t, uint16_t); 1236 result |= tmp << 16; 1237 DO_MULL(tmp, a >> 16, b >> 16, int8_t, uint16_t); 1238 result |= tmp << 32; 1239 DO_MULL(tmp, a >> 24, b >> 24, int8_t, uint16_t); 1240 result |= tmp << 48; 1241 return result; 1242} 1243 1244uint64_t HELPER(neon_mull_u16)(uint32_t a, uint32_t b) 1245{ 1246 uint64_t tmp; 1247 uint64_t result; 1248 1249 DO_MULL(result, a, b, uint16_t, uint32_t); 1250 DO_MULL(tmp, a >> 16, b >> 16, uint16_t, uint32_t); 1251 return result | (tmp << 32); 1252} 1253 1254uint64_t HELPER(neon_mull_s16)(uint32_t a, uint32_t b) 1255{ 1256 uint64_t tmp; 1257 uint64_t result; 1258 1259 DO_MULL(result, a, b, int16_t, uint32_t); 1260 DO_MULL(tmp, a >> 16, b >> 16, int16_t, uint32_t); 1261 return result | (tmp << 32); 1262} 1263 1264uint64_t HELPER(neon_negl_u16)(uint64_t x) 1265{ 1266 uint16_t tmp; 1267 uint64_t result; 1268 result = (uint16_t)-x; 1269 tmp = -(x >> 16); 1270 result |= (uint64_t)tmp << 16; 1271 tmp = -(x >> 32); 1272 result |= (uint64_t)tmp << 32; 1273 tmp = -(x >> 48); 1274 result |= (uint64_t)tmp << 48; 1275 return result; 1276} 1277 1278#include <stdio.h> 1279uint64_t HELPER(neon_negl_u32)(uint64_t x) 1280{ 1281 uint32_t low = -x; 1282 uint32_t high = -(x >> 32); 1283 return low | ((uint64_t)high << 32); 1284} 1285 1286/* FIXME: There should be a native op for this. */ 1287uint64_t HELPER(neon_negl_u64)(uint64_t x) 1288{ 1289 return -x; 1290} 1291 1292/* Saturnating sign manuipulation. */ 1293/* ??? Make these use NEON_VOP1 */ 1294#define DO_QABS8(x) do { \ 1295 if (x == (int8_t)0x80) { \ 1296 x = 0x7f; \ 1297 SET_QC(); \ 1298 } else if (x < 0) { \ 1299 x = -x; \ 1300 }} while (0) 1301uint32_t HELPER(neon_qabs_s8)(CPUState *env, uint32_t x) 1302{ 1303 neon_s8 vec; 1304 NEON_UNPACK(neon_s8, vec, x); 1305 DO_QABS8(vec.v1); 1306 DO_QABS8(vec.v2); 1307 DO_QABS8(vec.v3); 1308 DO_QABS8(vec.v4); 1309 NEON_PACK(neon_s8, x, vec); 1310 return x; 1311} 1312#undef DO_QABS8 1313 1314#define DO_QNEG8(x) do { \ 1315 if (x == (int8_t)0x80) { \ 1316 x = 0x7f; \ 1317 SET_QC(); \ 1318 } else { \ 1319 x = -x; \ 1320 }} while (0) 1321uint32_t HELPER(neon_qneg_s8)(CPUState *env, uint32_t x) 1322{ 1323 neon_s8 vec; 1324 NEON_UNPACK(neon_s8, vec, x); 1325 DO_QNEG8(vec.v1); 1326 DO_QNEG8(vec.v2); 1327 DO_QNEG8(vec.v3); 1328 DO_QNEG8(vec.v4); 1329 NEON_PACK(neon_s8, x, vec); 1330 return x; 1331} 1332#undef DO_QNEG8 1333 1334#define DO_QABS16(x) do { \ 1335 if (x == (int16_t)0x8000) { \ 1336 x = 0x7fff; \ 1337 SET_QC(); \ 1338 } else if (x < 0) { \ 1339 x = -x; \ 1340 }} while (0) 1341uint32_t HELPER(neon_qabs_s16)(CPUState *env, uint32_t x) 1342{ 1343 neon_s16 vec; 1344 NEON_UNPACK(neon_s16, vec, x); 1345 DO_QABS16(vec.v1); 1346 DO_QABS16(vec.v2); 1347 NEON_PACK(neon_s16, x, vec); 1348 return x; 1349} 1350#undef DO_QABS16 1351 1352#define DO_QNEG16(x) do { \ 1353 if (x == (int16_t)0x8000) { \ 1354 x = 0x7fff; \ 1355 SET_QC(); \ 1356 } else { \ 1357 x = -x; \ 1358 }} while (0) 1359uint32_t HELPER(neon_qneg_s16)(CPUState *env, uint32_t x) 1360{ 1361 neon_s16 vec; 1362 NEON_UNPACK(neon_s16, vec, x); 1363 DO_QNEG16(vec.v1); 1364 DO_QNEG16(vec.v2); 1365 NEON_PACK(neon_s16, x, vec); 1366 return x; 1367} 1368#undef DO_QNEG16 1369 1370uint32_t HELPER(neon_qabs_s32)(CPUState *env, uint32_t x) 1371{ 1372 if (x == SIGNBIT) { 1373 SET_QC(); 1374 x = ~SIGNBIT; 1375 } else if ((int32_t)x < 0) { 1376 x = -x; 1377 } 1378 return x; 1379} 1380 1381uint32_t HELPER(neon_qneg_s32)(CPUState *env, uint32_t x) 1382{ 1383 if (x == SIGNBIT) { 1384 SET_QC(); 1385 x = ~SIGNBIT; 1386 } else { 1387 x = -x; 1388 } 1389 return x; 1390} 1391 1392/* NEON Float helpers. */ 1393uint32_t HELPER(neon_min_f32)(uint32_t a, uint32_t b) 1394{ 1395 float32 f0 = vfp_itos(a); 1396 float32 f1 = vfp_itos(b); 1397 return (float32_compare_quiet(f0, f1, NFS) == -1) ? a : b; 1398} 1399 1400uint32_t HELPER(neon_max_f32)(uint32_t a, uint32_t b) 1401{ 1402 float32 f0 = vfp_itos(a); 1403 float32 f1 = vfp_itos(b); 1404 return (float32_compare_quiet(f0, f1, NFS) == 1) ? a : b; 1405} 1406 1407uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b) 1408{ 1409 float32 f0 = vfp_itos(a); 1410 float32 f1 = vfp_itos(b); 1411 return vfp_stoi((float32_compare_quiet(f0, f1, NFS) == 1) 1412 ? float32_sub(f0, f1, NFS) 1413 : float32_sub(f1, f0, NFS)); 1414} 1415 1416uint32_t HELPER(neon_add_f32)(uint32_t a, uint32_t b) 1417{ 1418 return vfp_stoi(float32_add(vfp_itos(a), vfp_itos(b), NFS)); 1419} 1420 1421uint32_t HELPER(neon_sub_f32)(uint32_t a, uint32_t b) 1422{ 1423 return vfp_stoi(float32_sub(vfp_itos(a), vfp_itos(b), NFS)); 1424} 1425 1426uint32_t HELPER(neon_mul_f32)(uint32_t a, uint32_t b) 1427{ 1428 return vfp_stoi(float32_mul(vfp_itos(a), vfp_itos(b), NFS)); 1429} 1430 1431/* Floating point comparisons produce an integer result. */ 1432#define NEON_VOP_FCMP(name, cmp) \ 1433uint32_t HELPER(neon_##name)(uint32_t a, uint32_t b) \ 1434{ \ 1435 if (float32_compare_quiet(vfp_itos(a), vfp_itos(b), NFS) cmp 0) \ 1436 return ~0; \ 1437 else \ 1438 return 0; \ 1439} 1440 1441NEON_VOP_FCMP(ceq_f32, ==) 1442NEON_VOP_FCMP(cge_f32, >=) 1443NEON_VOP_FCMP(cgt_f32, >) 1444 1445uint32_t HELPER(neon_acge_f32)(uint32_t a, uint32_t b) 1446{ 1447 float32 f0 = float32_abs(vfp_itos(a)); 1448 float32 f1 = float32_abs(vfp_itos(b)); 1449 return (float32_compare_quiet(f0, f1,NFS) >= 0) ? ~0 : 0; 1450} 1451 1452uint32_t HELPER(neon_acgt_f32)(uint32_t a, uint32_t b) 1453{ 1454 float32 f0 = float32_abs(vfp_itos(a)); 1455 float32 f1 = float32_abs(vfp_itos(b)); 1456 return (float32_compare_quiet(f0, f1, NFS) > 0) ? ~0 : 0; 1457} 1458