1// Copyright 2016, VIXL authors 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions are met: 6// 7// * Redistributions of source code must retain the above copyright notice, 8// this list of conditions and the following disclaimer. 9// * Redistributions in binary form must reproduce the above copyright notice, 10// this list of conditions and the following disclaimer in the documentation 11// and/or other materials provided with the distribution. 12// * Neither the name of ARM Limited nor the names of its contributors may be 13// used to endorse or promote products derived from this software without 14// specific prior written permission. 15// 16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27#include <cfloat> 28#include <cmath> 29#include <cstdio> 30#include <cstdlib> 31#include <cstring> 32 33#include "test-runner.h" 34#include "test-utils-aarch64.h" 35 36#include "aarch64/cpu-aarch64.h" 37#include "aarch64/debugger-aarch64.h" 38#include "aarch64/disasm-aarch64.h" 39#include "aarch64/macro-assembler-aarch64.h" 40#include "aarch64/simulator-aarch64.h" 41 42namespace vixl { 43namespace aarch64 { 44// Trace tests can only work with the simulator. 45#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 46 47#define __ masm-> 48#define TEST(name) TEST_(TRACE_##name) 49 50static void GenerateTestSequenceBase(MacroAssembler* masm) { 51 ExactAssemblyScope guard(masm, 52 masm->GetBuffer()->GetRemainingBytes(), 53 ExactAssemblyScope::kMaximumSize); 54 55 __ adc(w3, w4, w5); 56 __ adc(x6, x7, x8); 57 __ adcs(w9, w10, w11); 58 __ adcs(x12, x13, x14); 59 __ add(w15, w16, w17); 60 __ add(x18, x19, x20); 61 __ adds(w21, w22, w23); 62 __ adds(x24, x25, x26); 63 __ and_(w27, w28, w29); 64 __ and_(x2, x3, x4); 65 __ ands(w5, w6, w7); 66 __ ands(x8, x9, x10); 67 __ asr(w11, w12, 0); 68 __ asr(x13, x14, 1); 69 __ asrv(w15, w16, w17); 70 __ asrv(x18, x19, x20); 71 __ bfm(w21, w22, 5, 6); 72 __ bfm(x23, x24, 7, 8); 73 __ bic(w25, w26, w27); 74 __ bic(x28, x29, x2); 75 __ bics(w3, w4, w5); 76 __ bics(x6, x7, x8); 77 __ ccmn(w9, w10, NoFlag, al); 78 __ ccmn(w9, w10, NoFlag, eq); 79 __ ccmn(w9, w10, NoFlag, ne); 80 __ ccmn(x11, x12, CFlag, al); 81 __ ccmn(x11, x12, CFlag, cc); 82 __ ccmn(x11, x12, CFlag, cs); 83 __ ccmp(w13, w14, VFlag, al); 84 __ ccmp(w13, w14, VFlag, hi); 85 __ ccmp(w13, w14, VFlag, ls); 86 __ ccmp(x15, x16, CVFlag, al); 87 __ ccmp(x15, x16, CVFlag, eq); 88 __ ccmp(x15, x16, CVFlag, ne); 89 __ cinc(w17, w18, cc); 90 __ cinc(w17, w18, cs); 91 __ cinc(x19, x20, hi); 92 __ cinc(x19, x20, ls); 93 __ cinv(w21, w22, eq); 94 __ cinv(w21, w22, ne); 95 __ cinv(x23, x24, cc); 96 __ cinv(x23, x24, cs); 97 __ clrex(); 98 __ cls(w25, w26); 99 __ cls(x27, x28); 100 __ clz(w29, w2); 101 __ clz(x3, x4); 102 __ cmn(w5, w6); 103 __ cmn(x7, x8); 104 __ cmp(w9, w10); 105 __ cmp(x11, x12); 106 __ cneg(w13, w14, hi); 107 __ cneg(w13, w14, ls); 108 __ cneg(x15, x16, eq); 109 __ cneg(x15, x16, ne); 110 __ crc32b(w17, w18, w19); 111 __ crc32cb(w20, w21, w22); 112 __ crc32ch(w23, w24, w25); 113 __ crc32cw(w26, w27, w28); 114 __ crc32h(w4, w5, w6); 115 __ crc32w(w7, w8, w9); 116 __ csel(w13, w14, w15, cc); 117 __ csel(w13, w14, w15, cs); 118 __ csel(x16, x17, x18, hi); 119 __ csel(x16, x17, x18, ls); 120 __ cset(w19, eq); 121 __ cset(w19, ne); 122 __ cset(x20, cc); 123 __ cset(x20, cs); 124 __ csetm(w21, hi); 125 __ csetm(w21, ls); 126 __ csetm(x22, eq); 127 __ csetm(x22, ne); 128 __ csinc(w23, w24, w25, cc); 129 __ csinc(w23, w24, w25, cs); 130 __ csinc(x26, x27, x28, hi); 131 __ csinc(x26, x27, x28, ls); 132 __ csinv(w29, w2, w3, eq); 133 __ csinv(w29, w2, w3, ne); 134 __ csinv(x4, x5, x6, cc); 135 __ csinv(x4, x5, x6, cs); 136 __ csneg(w7, w8, w9, hi); 137 __ csneg(w7, w8, w9, ls); 138 __ csneg(x10, x11, x12, eq); 139 __ csneg(x10, x11, x12, ne); 140 __ dc(CVAC, x0); 141 __ dmb(InnerShareable, BarrierAll); 142 __ dsb(InnerShareable, BarrierAll); 143 __ eon(w13, w14, w15); 144 __ eon(x16, x17, x18); 145 __ eor(w19, w20, w21); 146 __ eor(x22, x23, x24); 147 __ extr(w25, w26, w27, 9); 148 __ extr(x28, x29, x2, 10); 149 __ hint(NOP); 150 __ ic(IVAU, x0); 151 __ isb(); 152 __ ldar(w3, MemOperand(x0)); 153 __ ldar(x4, MemOperand(x0)); 154 __ ldarb(w5, MemOperand(x0)); 155 __ ldarb(x6, MemOperand(x0)); 156 __ ldarh(w7, MemOperand(x0)); 157 __ ldarh(x8, MemOperand(x0)); 158 __ ldaxp(w9, w10, MemOperand(x0)); 159 __ ldaxp(x11, x12, MemOperand(x0)); 160 __ ldaxr(w13, MemOperand(x0)); 161 __ ldaxr(x14, MemOperand(x0)); 162 __ ldaxrb(w15, MemOperand(x0)); 163 __ ldaxrb(x16, MemOperand(x0)); 164 __ ldaxrh(w17, MemOperand(x0)); 165 __ ldaxrh(x18, MemOperand(x0)); 166 __ ldnp(w19, w20, MemOperand(x0)); 167 __ ldnp(x21, x22, MemOperand(x0)); 168 __ ldp(w23, w24, MemOperand(x0)); 169 __ ldp(w23, w24, MemOperand(x1, 8, PostIndex)); 170 __ ldp(w23, w24, MemOperand(x1, 8, PreIndex)); 171 __ ldp(x25, x26, MemOperand(x0)); 172 __ ldp(x25, x26, MemOperand(x1, 16, PostIndex)); 173 __ ldp(x25, x26, MemOperand(x1, 16, PreIndex)); 174 __ ldpsw(x27, x28, MemOperand(x0)); 175 __ ldpsw(x27, x28, MemOperand(x1, 8, PostIndex)); 176 __ ldpsw(x27, x28, MemOperand(x1, 8, PreIndex)); 177 __ ldr(w29, MemOperand(x0)); 178 __ ldr(w29, MemOperand(x1, 4, PostIndex)); 179 __ ldr(w29, MemOperand(x1, 4, PreIndex)); 180 __ ldr(x2, MemOperand(x0)); 181 __ ldr(x2, MemOperand(x1, 8, PostIndex)); 182 __ ldr(x2, MemOperand(x1, 8, PreIndex)); 183 __ ldrb(w3, MemOperand(x0)); 184 __ ldrb(w3, MemOperand(x1, 1, PostIndex)); 185 __ ldrb(w3, MemOperand(x1, 1, PreIndex)); 186 __ ldrb(x4, MemOperand(x0)); 187 __ ldrb(x4, MemOperand(x1, 1, PostIndex)); 188 __ ldrb(x4, MemOperand(x1, 1, PreIndex)); 189 __ ldrh(w5, MemOperand(x0)); 190 __ ldrh(w5, MemOperand(x1, 2, PostIndex)); 191 __ ldrh(w5, MemOperand(x1, 2, PreIndex)); 192 __ ldrh(x6, MemOperand(x0)); 193 __ ldrh(x6, MemOperand(x1, 2, PostIndex)); 194 __ ldrh(x6, MemOperand(x1, 2, PreIndex)); 195 __ ldrsb(w7, MemOperand(x0)); 196 __ ldrsb(w7, MemOperand(x1, 1, PostIndex)); 197 __ ldrsb(w7, MemOperand(x1, 1, PreIndex)); 198 __ ldrsb(x8, MemOperand(x0)); 199 __ ldrsb(x8, MemOperand(x1, 1, PostIndex)); 200 __ ldrsb(x8, MemOperand(x1, 1, PreIndex)); 201 __ ldrsh(w9, MemOperand(x0)); 202 __ ldrsh(w9, MemOperand(x1, 2, PostIndex)); 203 __ ldrsh(w9, MemOperand(x1, 2, PreIndex)); 204 __ ldrsh(x10, MemOperand(x0)); 205 __ ldrsh(x10, MemOperand(x1, 2, PostIndex)); 206 __ ldrsh(x10, MemOperand(x1, 2, PreIndex)); 207 __ ldrsw(x11, MemOperand(x0)); 208 __ ldrsw(x11, MemOperand(x1, 4, PostIndex)); 209 __ ldrsw(x11, MemOperand(x1, 4, PreIndex)); 210 __ ldur(w12, MemOperand(x0, 7)); 211 __ ldur(x13, MemOperand(x0, 15)); 212 __ ldurb(w14, MemOperand(x0, 1)); 213 __ ldurb(x15, MemOperand(x0, 1)); 214 __ ldurh(w16, MemOperand(x0, 3)); 215 __ ldurh(x17, MemOperand(x0, 3)); 216 __ ldursb(w18, MemOperand(x0, 1)); 217 __ ldursb(x19, MemOperand(x0, 1)); 218 __ ldursh(w20, MemOperand(x0, 3)); 219 __ ldursh(x21, MemOperand(x0, 3)); 220 __ ldursw(x22, MemOperand(x0, 7)); 221 __ ldxp(w23, w24, MemOperand(x0)); 222 __ ldxp(x25, x26, MemOperand(x0)); 223 __ ldxr(w27, MemOperand(x0)); 224 __ ldxr(x28, MemOperand(x0)); 225 __ ldxrb(w29, MemOperand(x0)); 226 __ ldxrb(x2, MemOperand(x0)); 227 __ ldxrh(w3, MemOperand(x0)); 228 __ ldxrh(x4, MemOperand(x0)); 229 __ lsl(w5, w6, 2); 230 __ lsl(x7, x8, 3); 231 __ lslv(w9, w10, w11); 232 __ lslv(x12, x13, x14); 233 __ lsr(w15, w16, 4); 234 __ lsr(x17, x18, 5); 235 __ lsrv(w19, w20, w21); 236 __ lsrv(x22, x23, x24); 237 __ madd(w25, w26, w27, w28); 238 __ madd(x29, x2, x3, x4); 239 __ mneg(w5, w6, w7); 240 __ mneg(x8, x9, x10); 241 __ mov(w11, w12); 242 __ mov(x13, x14); 243 __ movk(w15, 130); 244 __ movk(x16, 131); 245 __ movn(w17, 132); 246 __ movn(x18, 133); 247 __ movz(w19, 134); 248 __ movz(x20, 135); 249 __ msub(w22, w23, w24, w25); 250 __ msub(x26, x27, x28, x29); 251 __ mul(w2, w3, w4); 252 __ mul(x5, x6, x7); 253 __ mvn(w8, w9); 254 __ mvn(x10, x11); 255 __ neg(w12, w13); 256 __ neg(x14, x15); 257 __ negs(w16, w17); 258 __ negs(x18, x19); 259 __ ngc(w20, w21); 260 __ ngc(x22, x23); 261 __ ngcs(w24, w25); 262 __ ngcs(x26, x27); 263 __ nop(); 264 __ orn(w28, w29, w2); 265 __ orn(x3, x4, x5); 266 __ orr(w6, w7, w8); 267 __ orr(x9, x10, x11); 268 __ prfm(PLDL1KEEP, MemOperand(x0, 4)); 269 __ prfum(PLDL1KEEP, MemOperand(x0, 1)); 270 __ rbit(w12, w13); 271 __ rbit(x14, x15); 272 __ rev(w16, w17); 273 __ rev(x18, x19); 274 __ rev16(w20, w21); 275 __ rev16(x22, x23); 276 __ rev32(x24, x25); 277 __ rorv(w26, w27, w28); 278 __ rorv(x29, x2, x3); 279 __ sbc(w4, w5, w6); 280 __ sbc(x7, x8, x9); 281 __ sbcs(w10, w11, w12); 282 __ sbcs(x13, x14, x15); 283 __ sbfiz(w16, w17, 2, 3); 284 __ sbfiz(x18, x19, 4, 5); 285 __ sbfx(w22, w23, 6, 7); 286 __ sbfx(x24, x25, 8, 9); 287 __ sdiv(w26, w27, w28); 288 __ sdiv(x29, x2, x3); 289 __ smulh(x12, x13, x14); 290 __ stlr(w18, MemOperand(x0)); 291 __ stlr(x19, MemOperand(x0)); 292 __ stlrb(w20, MemOperand(x0)); 293 __ stlrb(x21, MemOperand(x0)); 294 __ stlrh(w22, MemOperand(x0)); 295 __ stlrh(x23, MemOperand(x0)); 296 __ stlxp(w24, w25, w26, MemOperand(x0)); 297 __ stlxp(x27, x28, x29, MemOperand(x0)); 298 __ stlxr(w2, w3, MemOperand(x0)); 299 __ stlxr(x4, x5, MemOperand(x0)); 300 __ stlxrb(w6, w7, MemOperand(x0)); 301 __ stlxrb(x8, x9, MemOperand(x0)); 302 __ stlxrh(w10, w11, MemOperand(x0)); 303 __ stlxrh(x12, x13, MemOperand(x0)); 304 __ stnp(w14, w15, MemOperand(x0)); 305 __ stnp(x16, x17, MemOperand(x0)); 306 __ stp(w18, w19, MemOperand(x0)); 307 __ stp(w18, w19, MemOperand(x1, 8, PostIndex)); 308 __ stp(w18, w19, MemOperand(x1, 8, PreIndex)); 309 __ stp(x20, x21, MemOperand(x0)); 310 __ stp(x20, x21, MemOperand(x1, 16, PostIndex)); 311 __ stp(x20, x21, MemOperand(x1, 16, PreIndex)); 312 __ str(w22, MemOperand(x0)); 313 __ str(w22, MemOperand(x1, 4, PostIndex)); 314 __ str(w22, MemOperand(x1, 4, PreIndex)); 315 __ str(x23, MemOperand(x0)); 316 __ str(x23, MemOperand(x1, 8, PostIndex)); 317 __ str(x23, MemOperand(x1, 8, PreIndex)); 318 __ strb(w24, MemOperand(x0)); 319 __ strb(w24, MemOperand(x1, 1, PostIndex)); 320 __ strb(w24, MemOperand(x1, 1, PreIndex)); 321 __ strb(x25, MemOperand(x0)); 322 __ strb(x25, MemOperand(x1, 1, PostIndex)); 323 __ strb(x25, MemOperand(x1, 1, PreIndex)); 324 __ strh(w26, MemOperand(x0)); 325 __ strh(w26, MemOperand(x1, 2, PostIndex)); 326 __ strh(w26, MemOperand(x1, 2, PreIndex)); 327 __ strh(x27, MemOperand(x0)); 328 __ strh(x27, MemOperand(x1, 2, PostIndex)); 329 __ strh(x27, MemOperand(x1, 2, PreIndex)); 330 __ stur(w28, MemOperand(x0, 7)); 331 __ stur(x29, MemOperand(x0, 15)); 332 __ sturb(w2, MemOperand(x0, 1)); 333 __ sturb(x3, MemOperand(x0, 1)); 334 __ sturh(w4, MemOperand(x0, 3)); 335 __ sturh(x5, MemOperand(x0, 3)); 336 __ stxp(w6, w7, w8, MemOperand(x0)); 337 __ stxp(x9, x10, x11, MemOperand(x0)); 338 __ stxr(w12, w13, MemOperand(x0)); 339 __ stxr(x14, x15, MemOperand(x0)); 340 __ stxrb(w16, w17, MemOperand(x0)); 341 __ stxrb(x18, x19, MemOperand(x0)); 342 __ stxrh(w20, w21, MemOperand(x0)); 343 __ stxrh(x22, x23, MemOperand(x0)); 344 __ sub(w24, w25, w26); 345 __ sub(x27, x28, x29); 346 __ subs(w2, w3, w4); 347 __ subs(x5, x6, x7); 348 __ sxtb(w8, w9); 349 __ sxtb(x10, x11); 350 __ sxth(w12, w13); 351 __ sxth(x14, x15); 352 __ sxtw(w16, w17); 353 __ sxtw(x18, x19); 354 __ tst(w20, w21); 355 __ tst(x22, x23); 356 __ ubfiz(w24, w25, 10, 11); 357 __ ubfiz(x26, x27, 12, 13); 358 __ ubfm(w28, w29, 14, 15); 359 __ ubfm(x2, x3, 1, 2); 360 __ ubfx(w4, w5, 3, 4); 361 __ ubfx(x6, x7, 5, 6); 362 __ udiv(w8, w9, w10); 363 __ udiv(x11, x12, x13); 364 __ umulh(x22, x23, x24); 365 __ uxtb(w28, w29); 366 __ uxtb(x2, x3); 367 __ uxth(w4, w5); 368 __ uxth(x6, x7); 369 __ uxtw(w8, w9); 370 __ uxtw(x10, x11); 371 372 // Branch tests. 373 { 374 Label end; 375 // Branch to the next instruction. 376 __ b(&end); 377 __ bind(&end); 378 } 379 { 380 Label loop, end; 381 __ subs(x3, x3, x3); 382 __ bind(&loop); 383 // Not-taken branch (the first time). 384 // Taken branch (the second time). 385 __ b(&end, ne); 386 __ cmp(x3, 1); 387 // Backwards branch. 388 __ b(&loop); 389 __ bind(&end); 390 } 391} 392 393 394static void GenerateTestSequenceFP(MacroAssembler* masm) { 395 ExactAssemblyScope guard(masm, 396 masm->GetBuffer()->GetRemainingBytes(), 397 ExactAssemblyScope::kMaximumSize); 398 399 // Scalar floating point instructions. 400 __ fabd(d13, d2, d19); 401 __ fabd(s8, s10, s30); 402 __ fabs(d1, d1); 403 __ fabs(s25, s7); 404 __ facge(d1, d23, d16); 405 __ facge(s4, s17, s1); 406 __ facgt(d2, d21, d24); 407 __ facgt(s12, s26, s12); 408 __ fadd(d13, d11, d22); 409 __ fadd(s27, s19, s8); 410 __ fccmp(d6, d10, NoFlag, hs); 411 __ fccmp(s29, s20, NZVFlag, ne); 412 __ fccmpe(d10, d2, NZCFlag, al); 413 __ fccmpe(s3, s3, NZVFlag, pl); 414 __ fcmeq(d19, d8, d10); 415 __ fcmeq(d0, d18, 0.0); 416 __ fcmeq(s1, s4, s30); 417 __ fcmeq(s22, s29, 0.0); 418 __ fcmge(d27, d18, d1); 419 __ fcmge(d31, d28, 0.0); 420 __ fcmge(s31, s19, s9); 421 __ fcmge(s1, s25, 0.0); 422 __ fcmgt(d18, d1, d15); 423 __ fcmgt(d3, d31, 0.0); 424 __ fcmgt(s11, s25, s2); 425 __ fcmgt(s17, s16, 0.0); 426 __ fcmle(d24, d17, 0.0); 427 __ fcmle(s11, s8, 0.0); 428 __ fcmlt(d5, d31, 0.0); 429 __ fcmlt(s18, s23, 0.0); 430 __ fcmp(d10, d24); 431 __ fcmp(d13, 0.0); 432 __ fcmp(s18, s6); 433 __ fcmp(s16, 0.0); 434 __ fcmpe(d9, d17); 435 __ fcmpe(d29, 0.0); 436 __ fcmpe(s16, s17); 437 __ fcmpe(s22, 0.0); 438 __ fcsel(d10, d14, d19, gt); 439 __ fcsel(s22, s18, s2, ge); 440 __ fcvt(d4, h24); 441 __ fcvt(d11, s2); 442 __ fcvt(h8, d9); 443 __ fcvt(h12, s1); 444 __ fcvt(s12, d31); 445 __ fcvt(s27, h25); 446 __ fcvtas(d28, d16); 447 __ fcvtas(s3, s5); 448 __ fcvtas(w18, d31); 449 __ fcvtas(w29, s24); 450 __ fcvtas(x9, d1); 451 __ fcvtas(x30, s2); 452 __ fcvtau(d14, d0); 453 __ fcvtau(s31, s14); 454 __ fcvtau(w16, d2); 455 __ fcvtau(w18, s0); 456 __ fcvtau(x26, d7); 457 __ fcvtau(x25, s19); 458 __ fcvtms(d30, d25); 459 __ fcvtms(s12, s15); 460 __ fcvtms(w9, d7); 461 __ fcvtms(w19, s6); 462 __ fcvtms(x6, d6); 463 __ fcvtms(x22, s7); 464 __ fcvtmu(d27, d0); 465 __ fcvtmu(s8, s22); 466 __ fcvtmu(w29, d19); 467 __ fcvtmu(w26, s0); 468 __ fcvtmu(x13, d5); 469 __ fcvtmu(x5, s18); 470 __ fcvtns(d30, d15); 471 __ fcvtns(s10, s11); 472 __ fcvtns(w21, d15); 473 __ fcvtns(w18, s10); 474 __ fcvtns(x8, d17); 475 __ fcvtns(x17, s12); 476 __ fcvtnu(d0, d21); 477 __ fcvtnu(s6, s25); 478 __ fcvtnu(w29, d11); 479 __ fcvtnu(w25, s31); 480 __ fcvtnu(x30, d11); 481 __ fcvtnu(x27, s18); 482 __ fcvtps(d11, d22); 483 __ fcvtps(s29, s20); 484 __ fcvtps(w15, d25); 485 __ fcvtps(w16, s7); 486 __ fcvtps(x13, d20); 487 __ fcvtps(x3, s23); 488 __ fcvtpu(d24, d1); 489 __ fcvtpu(s14, s24); 490 __ fcvtpu(w26, d29); 491 __ fcvtpu(wzr, s26); 492 __ fcvtpu(x27, d6); 493 __ fcvtpu(x29, s14); 494 __ fcvtxn(s12, d12); 495 __ fcvtzs(d15, d0); 496 __ fcvtzs(d13, d4, 42); 497 __ fcvtzs(s8, s11); 498 __ fcvtzs(s31, s6, 25); 499 __ fcvtzs(w6, d9); 500 __ fcvtzs(w25, d10, 20); 501 __ fcvtzs(w9, s1); 502 __ fcvtzs(w17, s29, 30); 503 __ fcvtzs(x19, d2); 504 __ fcvtzs(x22, d14, 1); 505 __ fcvtzs(x14, s20); 506 __ fcvtzs(x3, s30, 33); 507 __ fcvtzu(d28, d15); 508 __ fcvtzu(d0, d4, 3); 509 __ fcvtzu(s2, s5); 510 __ fcvtzu(s4, s0, 30); 511 __ fcvtzu(w11, d4); 512 __ fcvtzu(w7, d24, 32); 513 __ fcvtzu(w18, s24); 514 __ fcvtzu(w14, s27, 4); 515 __ fcvtzu(x22, d11); 516 __ fcvtzu(x8, d27, 52); 517 __ fcvtzu(x7, s20); 518 __ fcvtzu(x22, s7, 44); 519 __ fdiv(d6, d14, d15); 520 __ fdiv(s26, s5, s25); 521 __ fmadd(d18, d26, d12, d30); 522 __ fmadd(s13, s9, s28, s4); 523 __ fmax(d12, d5, d5); 524 __ fmax(s12, s28, s6); 525 __ fmaxnm(d28, d4, d2); 526 __ fmaxnm(s6, s10, s8); 527 __ fmin(d20, d20, d18); 528 __ fmin(s7, s13, s16); 529 __ fminnm(d19, d14, d30); 530 __ fminnm(s0, s1, s1); 531 __ fmov(d13, d6); 532 __ fmov(d2, x17); 533 __ fmov(d8, -2.5000); 534 __ fmov(s5, s3); 535 __ fmov(s25, w20); 536 __ fmov(s21, 2.8750f); 537 __ fmov(w18, s24); 538 __ fmov(x18, d2); 539 __ fmsub(d20, d30, d3, d19); 540 __ fmsub(s5, s19, s4, s12); 541 __ fmul(d30, d27, d23); 542 __ fmul(s25, s17, s15); 543 __ fmulx(d4, d17, d1); 544 __ fmulx(s14, s25, s4); 545 __ fneg(d15, d0); 546 __ fneg(s14, s15); 547 __ fnmadd(d0, d16, d22, d31); 548 __ fnmadd(s0, s18, s26, s18); 549 __ fnmsub(d19, d12, d15, d21); 550 __ fnmsub(s29, s0, s11, s26); 551 __ fnmul(d31, d19, d1); 552 __ fnmul(s18, s3, s17); 553 __ frecpe(d7, d21); 554 __ frecpe(s29, s17); 555 __ frecps(d11, d26, d17); 556 __ frecps(s18, s27, s1); 557 __ frecpx(d15, d18); 558 __ frecpx(s5, s10); 559 __ frinta(d16, d30); 560 __ frinta(s1, s22); 561 __ frinti(d19, d29); 562 __ frinti(s14, s21); 563 __ frintm(d20, d30); 564 __ frintm(s1, s16); 565 __ frintn(d30, d1); 566 __ frintn(s24, s10); 567 __ frintp(d4, d20); 568 __ frintp(s13, s3); 569 __ frintx(d13, d20); 570 __ frintx(s17, s7); 571 __ frintz(d0, d8); 572 __ frintz(s15, s29); 573 __ frsqrte(d21, d10); 574 __ frsqrte(s17, s25); 575 __ frsqrts(d4, d29, d17); 576 __ frsqrts(s14, s3, s24); 577 __ fsqrt(d14, d17); 578 __ fsqrt(s4, s14); 579 __ fsub(d13, d19, d7); 580 __ fsub(s3, s21, s27); 581 __ scvtf(d31, d16); 582 __ scvtf(d26, d31, 24); 583 __ scvtf(d6, w16); 584 __ scvtf(d5, w20, 6); 585 __ scvtf(d16, x8); 586 __ scvtf(d15, x8, 10); 587 __ scvtf(s7, s4); 588 __ scvtf(s8, s15, 14); 589 __ scvtf(s29, w10); 590 __ scvtf(s15, w21, 11); 591 __ scvtf(s27, x26); 592 __ scvtf(s26, x12, 38); 593 __ ucvtf(d0, d9); 594 __ ucvtf(d5, d22, 47); 595 __ ucvtf(d30, w27); 596 __ ucvtf(d3, w19, 1); 597 __ ucvtf(d28, x21); 598 __ ucvtf(d27, x30, 35); 599 __ ucvtf(s11, s5); 600 __ ucvtf(s0, s23, 14); 601 __ ucvtf(s20, w19); 602 __ ucvtf(s21, w22, 18); 603 __ ucvtf(s6, x13); 604 __ ucvtf(s7, x2, 21); 605} 606 607 608static void GenerateTestSequenceNEON(MacroAssembler* masm) { 609 ExactAssemblyScope guard(masm, 610 masm->GetBuffer()->GetRemainingBytes(), 611 ExactAssemblyScope::kMaximumSize); 612 613 // NEON integer instructions. 614 __ abs(d19, d0); 615 __ abs(v16.V16B(), v11.V16B()); 616 __ abs(v0.V2D(), v31.V2D()); 617 __ abs(v27.V2S(), v25.V2S()); 618 __ abs(v21.V4H(), v27.V4H()); 619 __ abs(v16.V4S(), v1.V4S()); 620 __ abs(v31.V8B(), v5.V8B()); 621 __ abs(v29.V8H(), v13.V8H()); 622 __ add(d10, d5, d17); 623 __ add(v31.V16B(), v15.V16B(), v23.V16B()); 624 __ add(v10.V2D(), v31.V2D(), v14.V2D()); 625 __ add(v15.V2S(), v14.V2S(), v19.V2S()); 626 __ add(v27.V4H(), v23.V4H(), v17.V4H()); 627 __ add(v25.V4S(), v28.V4S(), v29.V4S()); 628 __ add(v13.V8B(), v7.V8B(), v18.V8B()); 629 __ add(v4.V8H(), v2.V8H(), v1.V8H()); 630 __ addhn(v10.V2S(), v14.V2D(), v15.V2D()); 631 __ addhn(v10.V4H(), v30.V4S(), v26.V4S()); 632 __ addhn(v31.V8B(), v12.V8H(), v22.V8H()); 633 __ addhn2(v16.V16B(), v21.V8H(), v20.V8H()); 634 __ addhn2(v0.V4S(), v2.V2D(), v17.V2D()); 635 __ addhn2(v31.V8H(), v7.V4S(), v17.V4S()); 636 __ addp(d14, v19.V2D()); 637 __ addp(v3.V16B(), v8.V16B(), v28.V16B()); 638 __ addp(v8.V2D(), v5.V2D(), v17.V2D()); 639 __ addp(v22.V2S(), v30.V2S(), v26.V2S()); 640 __ addp(v29.V4H(), v24.V4H(), v14.V4H()); 641 __ addp(v30.V4S(), v26.V4S(), v24.V4S()); 642 __ addp(v12.V8B(), v26.V8B(), v7.V8B()); 643 __ addp(v17.V8H(), v8.V8H(), v12.V8H()); 644 __ addv(b27, v23.V16B()); 645 __ addv(b12, v20.V8B()); 646 __ addv(h27, v30.V4H()); 647 __ addv(h19, v14.V8H()); 648 __ addv(s14, v27.V4S()); 649 __ and_(v10.V16B(), v8.V16B(), v27.V16B()); 650 __ and_(v5.V8B(), v1.V8B(), v16.V8B()); 651 __ bic(v26.V16B(), v3.V16B(), v24.V16B()); 652 __ bic(v7.V2S(), 0xe4, 16); 653 __ bic(v28.V4H(), 0x23, 8); 654 __ bic(v29.V4S(), 0xac); 655 __ bic(v12.V8B(), v31.V8B(), v21.V8B()); 656 __ bic(v18.V8H(), 0x98); 657 __ bif(v12.V16B(), v26.V16B(), v8.V16B()); 658 __ bif(v2.V8B(), v23.V8B(), v27.V8B()); 659 __ bit(v8.V16B(), v3.V16B(), v13.V16B()); 660 __ bit(v5.V8B(), v5.V8B(), v23.V8B()); 661 __ bsl(v9.V16B(), v31.V16B(), v23.V16B()); 662 __ bsl(v14.V8B(), v7.V8B(), v3.V8B()); 663 __ cls(v29.V16B(), v5.V16B()); 664 __ cls(v21.V2S(), v0.V2S()); 665 __ cls(v1.V4H(), v12.V4H()); 666 __ cls(v27.V4S(), v10.V4S()); 667 __ cls(v19.V8B(), v4.V8B()); 668 __ cls(v15.V8H(), v14.V8H()); 669 __ clz(v1.V16B(), v4.V16B()); 670 __ clz(v27.V2S(), v17.V2S()); 671 __ clz(v9.V4H(), v9.V4H()); 672 __ clz(v31.V4S(), v15.V4S()); 673 __ clz(v14.V8B(), v19.V8B()); 674 __ clz(v6.V8H(), v11.V8H()); 675 __ cmeq(d18, d5, d29); 676 __ cmeq(d14, d31, 0); 677 __ cmeq(v19.V16B(), v3.V16B(), v22.V16B()); 678 __ cmeq(v15.V16B(), v9.V16B(), 0); 679 __ cmeq(v12.V2D(), v16.V2D(), v10.V2D()); 680 __ cmeq(v8.V2D(), v22.V2D(), 0); 681 __ cmeq(v2.V2S(), v3.V2S(), v9.V2S()); 682 __ cmeq(v16.V2S(), v25.V2S(), 0); 683 __ cmeq(v6.V4H(), v23.V4H(), v20.V4H()); 684 __ cmeq(v16.V4H(), v13.V4H(), 0); 685 __ cmeq(v21.V4S(), v17.V4S(), v2.V4S()); 686 __ cmeq(v6.V4S(), v25.V4S(), 0); 687 __ cmeq(v16.V8B(), v13.V8B(), v2.V8B()); 688 __ cmeq(v21.V8B(), v16.V8B(), 0); 689 __ cmeq(v20.V8H(), v7.V8H(), v25.V8H()); 690 __ cmeq(v26.V8H(), v8.V8H(), 0); 691 __ cmge(d16, d13, d31); 692 __ cmge(d25, d24, 0); 693 __ cmge(v17.V16B(), v19.V16B(), v17.V16B()); 694 __ cmge(v22.V16B(), v30.V16B(), 0); 695 __ cmge(v28.V2D(), v20.V2D(), v26.V2D()); 696 __ cmge(v6.V2D(), v23.V2D(), 0); 697 __ cmge(v25.V2S(), v22.V2S(), v3.V2S()); 698 __ cmge(v21.V2S(), v11.V2S(), 0); 699 __ cmge(v16.V4H(), v3.V4H(), v12.V4H()); 700 __ cmge(v23.V4H(), v9.V4H(), 0); 701 __ cmge(v7.V4S(), v2.V4S(), v11.V4S()); 702 __ cmge(v0.V4S(), v22.V4S(), 0); 703 __ cmge(v10.V8B(), v30.V8B(), v9.V8B()); 704 __ cmge(v21.V8B(), v8.V8B(), 0); 705 __ cmge(v2.V8H(), v7.V8H(), v26.V8H()); 706 __ cmge(v19.V8H(), v10.V8H(), 0); 707 __ cmgt(d6, d13, d1); 708 __ cmgt(d30, d24, 0); 709 __ cmgt(v20.V16B(), v25.V16B(), v27.V16B()); 710 __ cmgt(v0.V16B(), v25.V16B(), 0); 711 __ cmgt(v22.V2D(), v25.V2D(), v1.V2D()); 712 __ cmgt(v16.V2D(), v16.V2D(), 0); 713 __ cmgt(v5.V2S(), v9.V2S(), v15.V2S()); 714 __ cmgt(v12.V2S(), v18.V2S(), 0); 715 __ cmgt(v28.V4H(), v18.V4H(), v11.V4H()); 716 __ cmgt(v22.V4H(), v3.V4H(), 0); 717 __ cmgt(v5.V4S(), v11.V4S(), v27.V4S()); 718 __ cmgt(v13.V4S(), v20.V4S(), 0); 719 __ cmgt(v27.V8B(), v31.V8B(), v7.V8B()); 720 __ cmgt(v5.V8B(), v0.V8B(), 0); 721 __ cmgt(v22.V8H(), v28.V8H(), v13.V8H()); 722 __ cmgt(v6.V8H(), v2.V8H(), 0); 723 __ cmhi(d21, d8, d22); 724 __ cmhi(v18.V16B(), v19.V16B(), v19.V16B()); 725 __ cmhi(v7.V2D(), v0.V2D(), v21.V2D()); 726 __ cmhi(v15.V2S(), v19.V2S(), v0.V2S()); 727 __ cmhi(v31.V4H(), v7.V4H(), v12.V4H()); 728 __ cmhi(v9.V4S(), v16.V4S(), v22.V4S()); 729 __ cmhi(v7.V8B(), v24.V8B(), v28.V8B()); 730 __ cmhi(v11.V8H(), v10.V8H(), v25.V8H()); 731 __ cmhs(d1, d12, d17); 732 __ cmhs(v21.V16B(), v25.V16B(), v30.V16B()); 733 __ cmhs(v8.V2D(), v2.V2D(), v26.V2D()); 734 __ cmhs(v1.V2S(), v22.V2S(), v29.V2S()); 735 __ cmhs(v26.V4H(), v30.V4H(), v30.V4H()); 736 __ cmhs(v19.V4S(), v20.V4S(), v16.V4S()); 737 __ cmhs(v1.V8B(), v3.V8B(), v26.V8B()); 738 __ cmhs(v20.V8H(), v28.V8H(), v8.V8H()); 739 __ cmle(d30, d24, 0); 740 __ cmle(v0.V16B(), v3.V16B(), 0); 741 __ cmle(v2.V2D(), v30.V2D(), 0); 742 __ cmle(v7.V2S(), v10.V2S(), 0); 743 __ cmle(v9.V4H(), v31.V4H(), 0); 744 __ cmle(v9.V4S(), v18.V4S(), 0); 745 __ cmle(v21.V8B(), v31.V8B(), 0); 746 __ cmle(v29.V8H(), v21.V8H(), 0); 747 __ cmlt(d25, d23, 0); 748 __ cmlt(v7.V16B(), v21.V16B(), 0); 749 __ cmlt(v7.V2D(), v30.V2D(), 0); 750 __ cmlt(v25.V2S(), v28.V2S(), 0); 751 __ cmlt(v0.V4H(), v11.V4H(), 0); 752 __ cmlt(v24.V4S(), v5.V4S(), 0); 753 __ cmlt(v26.V8B(), v11.V8B(), 0); 754 __ cmlt(v1.V8H(), v21.V8H(), 0); 755 __ cmtst(d28, d23, d30); 756 __ cmtst(v26.V16B(), v6.V16B(), v31.V16B()); 757 __ cmtst(v1.V2D(), v21.V2D(), v4.V2D()); 758 __ cmtst(v27.V2S(), v26.V2S(), v20.V2S()); 759 __ cmtst(v26.V4H(), v0.V4H(), v18.V4H()); 760 __ cmtst(v25.V4S(), v16.V4S(), v4.V4S()); 761 __ cmtst(v11.V8B(), v10.V8B(), v9.V8B()); 762 __ cmtst(v0.V8H(), v2.V8H(), v1.V8H()); 763 __ cnt(v25.V16B(), v15.V16B()); 764 __ cnt(v28.V8B(), v6.V8B()); 765 __ dup(v6.V16B(), v7.B(), 7); 766 __ dup(v9.V16B(), w20); 767 __ dup(v12.V2D(), v13.D(), 1); 768 __ dup(v9.V2D(), xzr); 769 __ dup(v4.V2S(), v26.S(), 2); 770 __ dup(v3.V2S(), w12); 771 __ dup(v22.V4H(), v5.H(), 7); 772 __ dup(v16.V4H(), w25); 773 __ dup(v20.V4S(), v10.S(), 2); 774 __ dup(v10.V4S(), w7); 775 __ dup(v30.V8B(), v30.B(), 2); 776 __ dup(v31.V8B(), w15); 777 __ dup(v28.V8H(), v17.H(), 4); 778 __ dup(v2.V8H(), w3); 779 __ eor(v29.V16B(), v25.V16B(), v3.V16B()); 780 __ eor(v3.V8B(), v16.V8B(), v28.V8B()); 781 __ ext(v1.V16B(), v26.V16B(), v6.V16B(), 1); 782 __ ext(v2.V8B(), v30.V8B(), v1.V8B(), 1); 783 __ ld1(v18.V16B(), v19.V16B(), v20.V16B(), v21.V16B(), MemOperand(x0)); 784 __ ld1(v23.V16B(), 785 v24.V16B(), 786 v25.V16B(), 787 v26.V16B(), 788 MemOperand(x1, x2, PostIndex)); 789 __ ld1(v5.V16B(), 790 v6.V16B(), 791 v7.V16B(), 792 v8.V16B(), 793 MemOperand(x1, 64, PostIndex)); 794 __ ld1(v18.V16B(), v19.V16B(), v20.V16B(), MemOperand(x0)); 795 __ ld1(v13.V16B(), v14.V16B(), v15.V16B(), MemOperand(x1, x2, PostIndex)); 796 __ ld1(v19.V16B(), v20.V16B(), v21.V16B(), MemOperand(x1, 48, PostIndex)); 797 __ ld1(v17.V16B(), v18.V16B(), MemOperand(x0)); 798 __ ld1(v20.V16B(), v21.V16B(), MemOperand(x1, x2, PostIndex)); 799 __ ld1(v28.V16B(), v29.V16B(), MemOperand(x1, 32, PostIndex)); 800 __ ld1(v29.V16B(), MemOperand(x0)); 801 __ ld1(v21.V16B(), MemOperand(x1, x2, PostIndex)); 802 __ ld1(v4.V16B(), MemOperand(x1, 16, PostIndex)); 803 __ ld1(v4.V1D(), v5.V1D(), v6.V1D(), v7.V1D(), MemOperand(x0)); 804 __ ld1(v17.V1D(), 805 v18.V1D(), 806 v19.V1D(), 807 v20.V1D(), 808 MemOperand(x1, x2, PostIndex)); 809 __ ld1(v28.V1D(), 810 v29.V1D(), 811 v30.V1D(), 812 v31.V1D(), 813 MemOperand(x1, 32, PostIndex)); 814 __ ld1(v20.V1D(), v21.V1D(), v22.V1D(), MemOperand(x0)); 815 __ ld1(v19.V1D(), v20.V1D(), v21.V1D(), MemOperand(x1, x2, PostIndex)); 816 __ ld1(v12.V1D(), v13.V1D(), v14.V1D(), MemOperand(x1, 24, PostIndex)); 817 __ ld1(v29.V1D(), v30.V1D(), MemOperand(x0)); 818 __ ld1(v31.V1D(), v0.V1D(), MemOperand(x1, x2, PostIndex)); 819 __ ld1(v3.V1D(), v4.V1D(), MemOperand(x1, 16, PostIndex)); 820 __ ld1(v28.V1D(), MemOperand(x0)); 821 __ ld1(v11.V1D(), MemOperand(x1, x2, PostIndex)); 822 __ ld1(v29.V1D(), MemOperand(x1, 8, PostIndex)); 823 __ ld1(v28.V2D(), v29.V2D(), v30.V2D(), v31.V2D(), MemOperand(x0)); 824 __ ld1(v8.V2D(), 825 v9.V2D(), 826 v10.V2D(), 827 v11.V2D(), 828 MemOperand(x1, x2, PostIndex)); 829 __ ld1(v14.V2D(), 830 v15.V2D(), 831 v16.V2D(), 832 v17.V2D(), 833 MemOperand(x1, 64, PostIndex)); 834 __ ld1(v26.V2D(), v27.V2D(), v28.V2D(), MemOperand(x0)); 835 __ ld1(v5.V2D(), v6.V2D(), v7.V2D(), MemOperand(x1, x2, PostIndex)); 836 __ ld1(v26.V2D(), v27.V2D(), v28.V2D(), MemOperand(x1, 48, PostIndex)); 837 __ ld1(v18.V2D(), v19.V2D(), MemOperand(x0)); 838 __ ld1(v21.V2D(), v22.V2D(), MemOperand(x1, x2, PostIndex)); 839 __ ld1(v17.V2D(), v18.V2D(), MemOperand(x1, 32, PostIndex)); 840 __ ld1(v5.V2D(), MemOperand(x0)); 841 __ ld1(v6.V2D(), MemOperand(x1, x2, PostIndex)); 842 __ ld1(v15.V2D(), MemOperand(x1, 16, PostIndex)); 843 __ ld1(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x0)); 844 __ ld1(v24.V2S(), 845 v25.V2S(), 846 v26.V2S(), 847 v27.V2S(), 848 MemOperand(x1, x2, PostIndex)); 849 __ ld1(v27.V2S(), 850 v28.V2S(), 851 v29.V2S(), 852 v30.V2S(), 853 MemOperand(x1, 32, PostIndex)); 854 __ ld1(v11.V2S(), v12.V2S(), v13.V2S(), MemOperand(x0)); 855 __ ld1(v8.V2S(), v9.V2S(), v10.V2S(), MemOperand(x1, x2, PostIndex)); 856 __ ld1(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x1, 24, PostIndex)); 857 __ ld1(v0.V2S(), v1.V2S(), MemOperand(x0)); 858 __ ld1(v13.V2S(), v14.V2S(), MemOperand(x1, x2, PostIndex)); 859 __ ld1(v3.V2S(), v4.V2S(), MemOperand(x1, 16, PostIndex)); 860 __ ld1(v26.V2S(), MemOperand(x0)); 861 __ ld1(v0.V2S(), MemOperand(x1, x2, PostIndex)); 862 __ ld1(v11.V2S(), MemOperand(x1, 8, PostIndex)); 863 __ ld1(v16.V4H(), v17.V4H(), v18.V4H(), v19.V4H(), MemOperand(x0)); 864 __ ld1(v24.V4H(), 865 v25.V4H(), 866 v26.V4H(), 867 v27.V4H(), 868 MemOperand(x1, x2, PostIndex)); 869 __ ld1(v1.V4H(), v2.V4H(), v3.V4H(), v4.V4H(), MemOperand(x1, 32, PostIndex)); 870 __ ld1(v30.V4H(), v31.V4H(), v0.V4H(), MemOperand(x0)); 871 __ ld1(v25.V4H(), v26.V4H(), v27.V4H(), MemOperand(x1, x2, PostIndex)); 872 __ ld1(v3.V4H(), v4.V4H(), v5.V4H(), MemOperand(x1, 24, PostIndex)); 873 __ ld1(v3.V4H(), v4.V4H(), MemOperand(x0)); 874 __ ld1(v3.V4H(), v4.V4H(), MemOperand(x1, x2, PostIndex)); 875 __ ld1(v23.V4H(), v24.V4H(), MemOperand(x1, 16, PostIndex)); 876 __ ld1(v26.V4H(), MemOperand(x0)); 877 __ ld1(v1.V4H(), MemOperand(x1, x2, PostIndex)); 878 __ ld1(v14.V4H(), MemOperand(x1, 8, PostIndex)); 879 __ ld1(v26.V4S(), v27.V4S(), v28.V4S(), v29.V4S(), MemOperand(x0)); 880 __ ld1(v28.V4S(), 881 v29.V4S(), 882 v30.V4S(), 883 v31.V4S(), 884 MemOperand(x1, x2, PostIndex)); 885 __ ld1(v4.V4S(), v5.V4S(), v6.V4S(), v7.V4S(), MemOperand(x1, 64, PostIndex)); 886 __ ld1(v2.V4S(), v3.V4S(), v4.V4S(), MemOperand(x0)); 887 __ ld1(v22.V4S(), v23.V4S(), v24.V4S(), MemOperand(x1, x2, PostIndex)); 888 __ ld1(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x1, 48, PostIndex)); 889 __ ld1(v20.V4S(), v21.V4S(), MemOperand(x0)); 890 __ ld1(v30.V4S(), v31.V4S(), MemOperand(x1, x2, PostIndex)); 891 __ ld1(v11.V4S(), v12.V4S(), MemOperand(x1, 32, PostIndex)); 892 __ ld1(v15.V4S(), MemOperand(x0)); 893 __ ld1(v12.V4S(), MemOperand(x1, x2, PostIndex)); 894 __ ld1(v0.V4S(), MemOperand(x1, 16, PostIndex)); 895 __ ld1(v17.V8B(), v18.V8B(), v19.V8B(), v20.V8B(), MemOperand(x0)); 896 __ ld1(v5.V8B(), v6.V8B(), v7.V8B(), v8.V8B(), MemOperand(x1, x2, PostIndex)); 897 __ ld1(v9.V8B(), 898 v10.V8B(), 899 v11.V8B(), 900 v12.V8B(), 901 MemOperand(x1, 32, PostIndex)); 902 __ ld1(v4.V8B(), v5.V8B(), v6.V8B(), MemOperand(x0)); 903 __ ld1(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x1, x2, PostIndex)); 904 __ ld1(v12.V8B(), v13.V8B(), v14.V8B(), MemOperand(x1, 24, PostIndex)); 905 __ ld1(v10.V8B(), v11.V8B(), MemOperand(x0)); 906 __ ld1(v11.V8B(), v12.V8B(), MemOperand(x1, x2, PostIndex)); 907 __ ld1(v27.V8B(), v28.V8B(), MemOperand(x1, 16, PostIndex)); 908 __ ld1(v31.V8B(), MemOperand(x0)); 909 __ ld1(v10.V8B(), MemOperand(x1, x2, PostIndex)); 910 __ ld1(v28.V8B(), MemOperand(x1, 8, PostIndex)); 911 __ ld1(v5.V8H(), v6.V8H(), v7.V8H(), v8.V8H(), MemOperand(x0)); 912 __ ld1(v2.V8H(), v3.V8H(), v4.V8H(), v5.V8H(), MemOperand(x1, x2, PostIndex)); 913 __ ld1(v10.V8H(), 914 v11.V8H(), 915 v12.V8H(), 916 v13.V8H(), 917 MemOperand(x1, 64, PostIndex)); 918 __ ld1(v26.V8H(), v27.V8H(), v28.V8H(), MemOperand(x0)); 919 __ ld1(v3.V8H(), v4.V8H(), v5.V8H(), MemOperand(x1, x2, PostIndex)); 920 __ ld1(v17.V8H(), v18.V8H(), v19.V8H(), MemOperand(x1, 48, PostIndex)); 921 __ ld1(v4.V8H(), v5.V8H(), MemOperand(x0)); 922 __ ld1(v21.V8H(), v22.V8H(), MemOperand(x1, x2, PostIndex)); 923 __ ld1(v4.V8H(), v5.V8H(), MemOperand(x1, 32, PostIndex)); 924 __ ld1(v9.V8H(), MemOperand(x0)); 925 __ ld1(v27.V8H(), MemOperand(x1, x2, PostIndex)); 926 __ ld1(v26.V8H(), MemOperand(x1, 16, PostIndex)); 927 __ ld1(v19.B(), 1, MemOperand(x0)); 928 __ ld1(v12.B(), 3, MemOperand(x1, x2, PostIndex)); 929 __ ld1(v27.B(), 12, MemOperand(x1, 1, PostIndex)); 930 __ ld1(v10.D(), 1, MemOperand(x0)); 931 __ ld1(v26.D(), 1, MemOperand(x1, x2, PostIndex)); 932 __ ld1(v7.D(), 1, MemOperand(x1, 8, PostIndex)); 933 __ ld1(v19.H(), 5, MemOperand(x0)); 934 __ ld1(v10.H(), 1, MemOperand(x1, x2, PostIndex)); 935 __ ld1(v5.H(), 4, MemOperand(x1, 2, PostIndex)); 936 __ ld1(v21.S(), 2, MemOperand(x0)); 937 __ ld1(v13.S(), 2, MemOperand(x1, x2, PostIndex)); 938 __ ld1(v1.S(), 2, MemOperand(x1, 4, PostIndex)); 939 __ ld1r(v2.V16B(), MemOperand(x0)); 940 __ ld1r(v2.V16B(), MemOperand(x1, x2, PostIndex)); 941 __ ld1r(v22.V16B(), MemOperand(x1, 1, PostIndex)); 942 __ ld1r(v25.V1D(), MemOperand(x0)); 943 __ ld1r(v9.V1D(), MemOperand(x1, x2, PostIndex)); 944 __ ld1r(v23.V1D(), MemOperand(x1, 8, PostIndex)); 945 __ ld1r(v19.V2D(), MemOperand(x0)); 946 __ ld1r(v21.V2D(), MemOperand(x1, x2, PostIndex)); 947 __ ld1r(v30.V2D(), MemOperand(x1, 8, PostIndex)); 948 __ ld1r(v24.V2S(), MemOperand(x0)); 949 __ ld1r(v26.V2S(), MemOperand(x1, x2, PostIndex)); 950 __ ld1r(v28.V2S(), MemOperand(x1, 4, PostIndex)); 951 __ ld1r(v19.V4H(), MemOperand(x0)); 952 __ ld1r(v1.V4H(), MemOperand(x1, x2, PostIndex)); 953 __ ld1r(v21.V4H(), MemOperand(x1, 2, PostIndex)); 954 __ ld1r(v15.V4S(), MemOperand(x0)); 955 __ ld1r(v21.V4S(), MemOperand(x1, x2, PostIndex)); 956 __ ld1r(v23.V4S(), MemOperand(x1, 4, PostIndex)); 957 __ ld1r(v26.V8B(), MemOperand(x0)); 958 __ ld1r(v14.V8B(), MemOperand(x1, x2, PostIndex)); 959 __ ld1r(v19.V8B(), MemOperand(x1, 1, PostIndex)); 960 __ ld1r(v13.V8H(), MemOperand(x0)); 961 __ ld1r(v30.V8H(), MemOperand(x1, x2, PostIndex)); 962 __ ld1r(v27.V8H(), MemOperand(x1, 2, PostIndex)); 963 __ ld2(v21.V16B(), v22.V16B(), MemOperand(x0)); 964 __ ld2(v21.V16B(), v22.V16B(), MemOperand(x1, x2, PostIndex)); 965 __ ld2(v12.V16B(), v13.V16B(), MemOperand(x1, 32, PostIndex)); 966 __ ld2(v14.V2D(), v15.V2D(), MemOperand(x0)); 967 __ ld2(v0.V2D(), v1.V2D(), MemOperand(x1, x2, PostIndex)); 968 __ ld2(v12.V2D(), v13.V2D(), MemOperand(x1, 32, PostIndex)); 969 __ ld2(v27.V2S(), v28.V2S(), MemOperand(x0)); 970 __ ld2(v2.V2S(), v3.V2S(), MemOperand(x1, x2, PostIndex)); 971 __ ld2(v12.V2S(), v13.V2S(), MemOperand(x1, 16, PostIndex)); 972 __ ld2(v9.V4H(), v10.V4H(), MemOperand(x0)); 973 __ ld2(v23.V4H(), v24.V4H(), MemOperand(x1, x2, PostIndex)); 974 __ ld2(v1.V4H(), v2.V4H(), MemOperand(x1, 16, PostIndex)); 975 __ ld2(v20.V4S(), v21.V4S(), MemOperand(x0)); 976 __ ld2(v10.V4S(), v11.V4S(), MemOperand(x1, x2, PostIndex)); 977 __ ld2(v24.V4S(), v25.V4S(), MemOperand(x1, 32, PostIndex)); 978 __ ld2(v17.V8B(), v18.V8B(), MemOperand(x0)); 979 __ ld2(v13.V8B(), v14.V8B(), MemOperand(x1, x2, PostIndex)); 980 __ ld2(v7.V8B(), v8.V8B(), MemOperand(x1, 16, PostIndex)); 981 __ ld2(v30.V8H(), v31.V8H(), MemOperand(x0)); 982 __ ld2(v4.V8H(), v5.V8H(), MemOperand(x1, x2, PostIndex)); 983 __ ld2(v13.V8H(), v14.V8H(), MemOperand(x1, 32, PostIndex)); 984 __ ld2(v5.B(), v6.B(), 12, MemOperand(x0)); 985 __ ld2(v16.B(), v17.B(), 7, MemOperand(x1, x2, PostIndex)); 986 __ ld2(v29.B(), v30.B(), 2, MemOperand(x1, 2, PostIndex)); 987 __ ld2(v11.D(), v12.D(), 1, MemOperand(x0)); 988 __ ld2(v26.D(), v27.D(), 0, MemOperand(x1, x2, PostIndex)); 989 __ ld2(v25.D(), v26.D(), 0, MemOperand(x1, 16, PostIndex)); 990 __ ld2(v18.H(), v19.H(), 7, MemOperand(x0)); 991 __ ld2(v17.H(), v18.H(), 5, MemOperand(x1, x2, PostIndex)); 992 __ ld2(v30.H(), v31.H(), 2, MemOperand(x1, 4, PostIndex)); 993 __ ld2(v29.S(), v30.S(), 3, MemOperand(x0)); 994 __ ld2(v28.S(), v29.S(), 0, MemOperand(x1, x2, PostIndex)); 995 __ ld2(v6.S(), v7.S(), 1, MemOperand(x1, 8, PostIndex)); 996 __ ld2r(v26.V16B(), v27.V16B(), MemOperand(x0)); 997 __ ld2r(v21.V16B(), v22.V16B(), MemOperand(x1, x2, PostIndex)); 998 __ ld2r(v5.V16B(), v6.V16B(), MemOperand(x1, 2, PostIndex)); 999 __ ld2r(v26.V1D(), v27.V1D(), MemOperand(x0)); 1000 __ ld2r(v14.V1D(), v15.V1D(), MemOperand(x1, x2, PostIndex)); 1001 __ ld2r(v23.V1D(), v24.V1D(), MemOperand(x1, 16, PostIndex)); 1002 __ ld2r(v11.V2D(), v12.V2D(), MemOperand(x0)); 1003 __ ld2r(v29.V2D(), v30.V2D(), MemOperand(x1, x2, PostIndex)); 1004 __ ld2r(v15.V2D(), v16.V2D(), MemOperand(x1, 16, PostIndex)); 1005 __ ld2r(v26.V2S(), v27.V2S(), MemOperand(x0)); 1006 __ ld2r(v22.V2S(), v23.V2S(), MemOperand(x1, x2, PostIndex)); 1007 __ ld2r(v2.V2S(), v3.V2S(), MemOperand(x1, 8, PostIndex)); 1008 __ ld2r(v2.V4H(), v3.V4H(), MemOperand(x0)); 1009 __ ld2r(v9.V4H(), v10.V4H(), MemOperand(x1, x2, PostIndex)); 1010 __ ld2r(v6.V4H(), v7.V4H(), MemOperand(x1, 4, PostIndex)); 1011 __ ld2r(v7.V4S(), v8.V4S(), MemOperand(x0)); 1012 __ ld2r(v19.V4S(), v20.V4S(), MemOperand(x1, x2, PostIndex)); 1013 __ ld2r(v21.V4S(), v22.V4S(), MemOperand(x1, 8, PostIndex)); 1014 __ ld2r(v26.V8B(), v27.V8B(), MemOperand(x0)); 1015 __ ld2r(v20.V8B(), v21.V8B(), MemOperand(x1, x2, PostIndex)); 1016 __ ld2r(v11.V8B(), v12.V8B(), MemOperand(x1, 2, PostIndex)); 1017 __ ld2r(v12.V8H(), v13.V8H(), MemOperand(x0)); 1018 __ ld2r(v6.V8H(), v7.V8H(), MemOperand(x1, x2, PostIndex)); 1019 __ ld2r(v25.V8H(), v26.V8H(), MemOperand(x1, 4, PostIndex)); 1020 __ ld3(v20.V16B(), v21.V16B(), v22.V16B(), MemOperand(x0)); 1021 __ ld3(v28.V16B(), v29.V16B(), v30.V16B(), MemOperand(x1, x2, PostIndex)); 1022 __ ld3(v20.V16B(), v21.V16B(), v22.V16B(), MemOperand(x1, 48, PostIndex)); 1023 __ ld3(v21.V2D(), v22.V2D(), v23.V2D(), MemOperand(x0)); 1024 __ ld3(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x1, x2, PostIndex)); 1025 __ ld3(v27.V2D(), v28.V2D(), v29.V2D(), MemOperand(x1, 48, PostIndex)); 1026 __ ld3(v7.V2S(), v8.V2S(), v9.V2S(), MemOperand(x0)); 1027 __ ld3(v20.V2S(), v21.V2S(), v22.V2S(), MemOperand(x1, x2, PostIndex)); 1028 __ ld3(v26.V2S(), v27.V2S(), v28.V2S(), MemOperand(x1, 24, PostIndex)); 1029 __ ld3(v27.V4H(), v28.V4H(), v29.V4H(), MemOperand(x0)); 1030 __ ld3(v28.V4H(), v29.V4H(), v30.V4H(), MemOperand(x1, x2, PostIndex)); 1031 __ ld3(v7.V4H(), v8.V4H(), v9.V4H(), MemOperand(x1, 24, PostIndex)); 1032 __ ld3(v2.V4S(), v3.V4S(), v4.V4S(), MemOperand(x0)); 1033 __ ld3(v24.V4S(), v25.V4S(), v26.V4S(), MemOperand(x1, x2, PostIndex)); 1034 __ ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x1, 48, PostIndex)); 1035 __ ld3(v29.V8B(), v30.V8B(), v31.V8B(), MemOperand(x0)); 1036 __ ld3(v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x1, x2, PostIndex)); 1037 __ ld3(v12.V8B(), v13.V8B(), v14.V8B(), MemOperand(x1, 24, PostIndex)); 1038 __ ld3(v22.V8H(), v23.V8H(), v24.V8H(), MemOperand(x0)); 1039 __ ld3(v13.V8H(), v14.V8H(), v15.V8H(), MemOperand(x1, x2, PostIndex)); 1040 __ ld3(v28.V8H(), v29.V8H(), v30.V8H(), MemOperand(x1, 48, PostIndex)); 1041 __ ld3(v21.B(), v22.B(), v23.B(), 11, MemOperand(x0)); 1042 __ ld3(v5.B(), v6.B(), v7.B(), 9, MemOperand(x1, x2, PostIndex)); 1043 __ ld3(v23.B(), v24.B(), v25.B(), 0, MemOperand(x1, 3, PostIndex)); 1044 __ ld3(v16.D(), v17.D(), v18.D(), 0, MemOperand(x0)); 1045 __ ld3(v30.D(), v31.D(), v0.D(), 0, MemOperand(x1, x2, PostIndex)); 1046 __ ld3(v28.D(), v29.D(), v30.D(), 1, MemOperand(x1, 24, PostIndex)); 1047 __ ld3(v13.H(), v14.H(), v15.H(), 2, MemOperand(x0)); 1048 __ ld3(v22.H(), v23.H(), v24.H(), 7, MemOperand(x1, x2, PostIndex)); 1049 __ ld3(v14.H(), v15.H(), v16.H(), 3, MemOperand(x1, 6, PostIndex)); 1050 __ ld3(v22.S(), v23.S(), v24.S(), 3, MemOperand(x0)); 1051 __ ld3(v30.S(), v31.S(), v0.S(), 2, MemOperand(x1, x2, PostIndex)); 1052 __ ld3(v12.S(), v13.S(), v14.S(), 1, MemOperand(x1, 12, PostIndex)); 1053 __ ld3r(v24.V16B(), v25.V16B(), v26.V16B(), MemOperand(x0)); 1054 __ ld3r(v24.V16B(), v25.V16B(), v26.V16B(), MemOperand(x1, x2, PostIndex)); 1055 __ ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x1, 3, PostIndex)); 1056 __ ld3r(v4.V1D(), v5.V1D(), v6.V1D(), MemOperand(x0)); 1057 __ ld3r(v7.V1D(), v8.V1D(), v9.V1D(), MemOperand(x1, x2, PostIndex)); 1058 __ ld3r(v17.V1D(), v18.V1D(), v19.V1D(), MemOperand(x1, 24, PostIndex)); 1059 __ ld3r(v16.V2D(), v17.V2D(), v18.V2D(), MemOperand(x0)); 1060 __ ld3r(v20.V2D(), v21.V2D(), v22.V2D(), MemOperand(x1, x2, PostIndex)); 1061 __ ld3r(v14.V2D(), v15.V2D(), v16.V2D(), MemOperand(x1, 24, PostIndex)); 1062 __ ld3r(v10.V2S(), v11.V2S(), v12.V2S(), MemOperand(x0)); 1063 __ ld3r(v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x1, x2, PostIndex)); 1064 __ ld3r(v23.V2S(), v24.V2S(), v25.V2S(), MemOperand(x1, 12, PostIndex)); 1065 __ ld3r(v22.V4H(), v23.V4H(), v24.V4H(), MemOperand(x0)); 1066 __ ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x1, x2, PostIndex)); 1067 __ ld3r(v7.V4H(), v8.V4H(), v9.V4H(), MemOperand(x1, 6, PostIndex)); 1068 __ ld3r(v26.V4S(), v27.V4S(), v28.V4S(), MemOperand(x0)); 1069 __ ld3r(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x1, x2, PostIndex)); 1070 __ ld3r(v30.V4S(), v31.V4S(), v0.V4S(), MemOperand(x1, 12, PostIndex)); 1071 __ ld3r(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x0)); 1072 __ ld3r(v10.V8B(), v11.V8B(), v12.V8B(), MemOperand(x1, x2, PostIndex)); 1073 __ ld3r(v28.V8B(), v29.V8B(), v30.V8B(), MemOperand(x1, 3, PostIndex)); 1074 __ ld3r(v6.V8H(), v7.V8H(), v8.V8H(), MemOperand(x0)); 1075 __ ld3r(v29.V8H(), v30.V8H(), v31.V8H(), MemOperand(x1, x2, PostIndex)); 1076 __ ld3r(v7.V8H(), v8.V8H(), v9.V8H(), MemOperand(x1, 6, PostIndex)); 1077 __ ld4(v3.V16B(), v4.V16B(), v5.V16B(), v6.V16B(), MemOperand(x0)); 1078 __ ld4(v2.V16B(), 1079 v3.V16B(), 1080 v4.V16B(), 1081 v5.V16B(), 1082 MemOperand(x1, x2, PostIndex)); 1083 __ ld4(v5.V16B(), 1084 v6.V16B(), 1085 v7.V16B(), 1086 v8.V16B(), 1087 MemOperand(x1, 64, PostIndex)); 1088 __ ld4(v18.V2D(), v19.V2D(), v20.V2D(), v21.V2D(), MemOperand(x0)); 1089 __ ld4(v4.V2D(), v5.V2D(), v6.V2D(), v7.V2D(), MemOperand(x1, x2, PostIndex)); 1090 __ ld4(v29.V2D(), 1091 v30.V2D(), 1092 v31.V2D(), 1093 v0.V2D(), 1094 MemOperand(x1, 64, PostIndex)); 1095 __ ld4(v27.V2S(), v28.V2S(), v29.V2S(), v30.V2S(), MemOperand(x0)); 1096 __ ld4(v24.V2S(), 1097 v25.V2S(), 1098 v26.V2S(), 1099 v27.V2S(), 1100 MemOperand(x1, x2, PostIndex)); 1101 __ ld4(v4.V2S(), v5.V2S(), v6.V2S(), v7.V2S(), MemOperand(x1, 32, PostIndex)); 1102 __ ld4(v16.V4H(), v17.V4H(), v18.V4H(), v19.V4H(), MemOperand(x0)); 1103 __ ld4(v23.V4H(), 1104 v24.V4H(), 1105 v25.V4H(), 1106 v26.V4H(), 1107 MemOperand(x1, x2, PostIndex)); 1108 __ ld4(v2.V4H(), v3.V4H(), v4.V4H(), v5.V4H(), MemOperand(x1, 32, PostIndex)); 1109 __ ld4(v7.V4S(), v8.V4S(), v9.V4S(), v10.V4S(), MemOperand(x0)); 1110 __ ld4(v28.V4S(), 1111 v29.V4S(), 1112 v30.V4S(), 1113 v31.V4S(), 1114 MemOperand(x1, x2, PostIndex)); 1115 __ ld4(v29.V4S(), 1116 v30.V4S(), 1117 v31.V4S(), 1118 v0.V4S(), 1119 MemOperand(x1, 64, PostIndex)); 1120 __ ld4(v15.V8B(), v16.V8B(), v17.V8B(), v18.V8B(), MemOperand(x0)); 1121 __ ld4(v27.V8B(), 1122 v28.V8B(), 1123 v29.V8B(), 1124 v30.V8B(), 1125 MemOperand(x1, x2, PostIndex)); 1126 __ ld4(v5.V8B(), v6.V8B(), v7.V8B(), v8.V8B(), MemOperand(x1, 32, PostIndex)); 1127 __ ld4(v25.V8H(), v26.V8H(), v27.V8H(), v28.V8H(), MemOperand(x0)); 1128 __ ld4(v2.V8H(), v3.V8H(), v4.V8H(), v5.V8H(), MemOperand(x1, x2, PostIndex)); 1129 __ ld4(v20.V8H(), 1130 v21.V8H(), 1131 v22.V8H(), 1132 v23.V8H(), 1133 MemOperand(x1, 64, PostIndex)); 1134 __ ld4(v20.B(), v21.B(), v22.B(), v23.B(), 3, MemOperand(x0)); 1135 __ ld4(v12.B(), v13.B(), v14.B(), v15.B(), 3, MemOperand(x1, x2, PostIndex)); 1136 __ ld4(v27.B(), v28.B(), v29.B(), v30.B(), 6, MemOperand(x1, 4, PostIndex)); 1137 __ ld4(v28.D(), v29.D(), v30.D(), v31.D(), 1, MemOperand(x0)); 1138 __ ld4(v15.D(), v16.D(), v17.D(), v18.D(), 1, MemOperand(x1, x2, PostIndex)); 1139 __ ld4(v16.D(), v17.D(), v18.D(), v19.D(), 1, MemOperand(x1, 32, PostIndex)); 1140 __ ld4(v2.H(), v3.H(), v4.H(), v5.H(), 6, MemOperand(x0)); 1141 __ ld4(v5.H(), v6.H(), v7.H(), v8.H(), 3, MemOperand(x1, x2, PostIndex)); 1142 __ ld4(v7.H(), v8.H(), v9.H(), v10.H(), 6, MemOperand(x1, 8, PostIndex)); 1143 __ ld4(v6.S(), v7.S(), v8.S(), v9.S(), 1, MemOperand(x0)); 1144 __ ld4(v25.S(), v26.S(), v27.S(), v28.S(), 2, MemOperand(x1, x2, PostIndex)); 1145 __ ld4(v8.S(), v9.S(), v10.S(), v11.S(), 3, MemOperand(x1, 16, PostIndex)); 1146 __ ld4r(v14.V16B(), v15.V16B(), v16.V16B(), v17.V16B(), MemOperand(x0)); 1147 __ ld4r(v13.V16B(), 1148 v14.V16B(), 1149 v15.V16B(), 1150 v16.V16B(), 1151 MemOperand(x1, x2, PostIndex)); 1152 __ ld4r(v9.V16B(), 1153 v10.V16B(), 1154 v11.V16B(), 1155 v12.V16B(), 1156 MemOperand(x1, 4, PostIndex)); 1157 __ ld4r(v8.V1D(), v9.V1D(), v10.V1D(), v11.V1D(), MemOperand(x0)); 1158 __ ld4r(v4.V1D(), 1159 v5.V1D(), 1160 v6.V1D(), 1161 v7.V1D(), 1162 MemOperand(x1, x2, PostIndex)); 1163 __ ld4r(v26.V1D(), 1164 v27.V1D(), 1165 v28.V1D(), 1166 v29.V1D(), 1167 MemOperand(x1, 32, PostIndex)); 1168 __ ld4r(v19.V2D(), v20.V2D(), v21.V2D(), v22.V2D(), MemOperand(x0)); 1169 __ ld4r(v28.V2D(), 1170 v29.V2D(), 1171 v30.V2D(), 1172 v31.V2D(), 1173 MemOperand(x1, x2, PostIndex)); 1174 __ ld4r(v15.V2D(), 1175 v16.V2D(), 1176 v17.V2D(), 1177 v18.V2D(), 1178 MemOperand(x1, 32, PostIndex)); 1179 __ ld4r(v31.V2S(), v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x0)); 1180 __ ld4r(v28.V2S(), 1181 v29.V2S(), 1182 v30.V2S(), 1183 v31.V2S(), 1184 MemOperand(x1, x2, PostIndex)); 1185 __ ld4r(v11.V2S(), 1186 v12.V2S(), 1187 v13.V2S(), 1188 v14.V2S(), 1189 MemOperand(x1, 16, PostIndex)); 1190 __ ld4r(v19.V4H(), v20.V4H(), v21.V4H(), v22.V4H(), MemOperand(x0)); 1191 __ ld4r(v22.V4H(), 1192 v23.V4H(), 1193 v24.V4H(), 1194 v25.V4H(), 1195 MemOperand(x1, x2, PostIndex)); 1196 __ ld4r(v20.V4H(), 1197 v21.V4H(), 1198 v22.V4H(), 1199 v23.V4H(), 1200 MemOperand(x1, 8, PostIndex)); 1201 __ ld4r(v16.V4S(), v17.V4S(), v18.V4S(), v19.V4S(), MemOperand(x0)); 1202 __ ld4r(v25.V4S(), 1203 v26.V4S(), 1204 v27.V4S(), 1205 v28.V4S(), 1206 MemOperand(x1, x2, PostIndex)); 1207 __ ld4r(v23.V4S(), 1208 v24.V4S(), 1209 v25.V4S(), 1210 v26.V4S(), 1211 MemOperand(x1, 16, PostIndex)); 1212 __ ld4r(v22.V8B(), v23.V8B(), v24.V8B(), v25.V8B(), MemOperand(x0)); 1213 __ ld4r(v27.V8B(), 1214 v28.V8B(), 1215 v29.V8B(), 1216 v30.V8B(), 1217 MemOperand(x1, x2, PostIndex)); 1218 __ ld4r(v29.V8B(), 1219 v30.V8B(), 1220 v31.V8B(), 1221 v0.V8B(), 1222 MemOperand(x1, 4, PostIndex)); 1223 __ ld4r(v28.V8H(), v29.V8H(), v30.V8H(), v31.V8H(), MemOperand(x0)); 1224 __ ld4r(v25.V8H(), 1225 v26.V8H(), 1226 v27.V8H(), 1227 v28.V8H(), 1228 MemOperand(x1, x2, PostIndex)); 1229 __ ld4r(v22.V8H(), 1230 v23.V8H(), 1231 v24.V8H(), 1232 v25.V8H(), 1233 MemOperand(x1, 8, PostIndex)); 1234 __ mla(v29.V16B(), v7.V16B(), v26.V16B()); 1235 __ mla(v6.V2S(), v4.V2S(), v14.V2S()); 1236 __ mla(v9.V2S(), v11.V2S(), v0.S(), 2); 1237 __ mla(v5.V4H(), v17.V4H(), v25.V4H()); 1238 __ mla(v24.V4H(), v7.V4H(), v11.H(), 3); 1239 __ mla(v12.V4S(), v3.V4S(), v4.V4S()); 1240 __ mla(v10.V4S(), v7.V4S(), v7.S(), 3); 1241 __ mla(v3.V8B(), v16.V8B(), v9.V8B()); 1242 __ mla(v19.V8H(), v22.V8H(), v18.V8H()); 1243 __ mla(v6.V8H(), v2.V8H(), v0.H(), 0); 1244 __ mls(v23.V16B(), v10.V16B(), v11.V16B()); 1245 __ mls(v14.V2S(), v31.V2S(), v22.V2S()); 1246 __ mls(v28.V2S(), v13.V2S(), v1.S(), 3); 1247 __ mls(v2.V4H(), v19.V4H(), v13.V4H()); 1248 __ mls(v18.V4H(), v15.V4H(), v12.H(), 6); 1249 __ mls(v6.V4S(), v11.V4S(), v16.V4S()); 1250 __ mls(v23.V4S(), v16.V4S(), v10.S(), 2); 1251 __ mls(v26.V8B(), v13.V8B(), v23.V8B()); 1252 __ mls(v10.V8H(), v10.V8H(), v12.V8H()); 1253 __ mls(v14.V8H(), v0.V8H(), v14.H(), 7); 1254 __ mov(b22, v1.B(), 3); 1255 __ mov(d7, v13.D(), 1); 1256 __ mov(h26, v21.H(), 2); 1257 __ mov(s26, v19.S(), 0); 1258 __ mov(v26.V16B(), v11.V16B()); 1259 __ mov(v20.V8B(), v0.V8B()); 1260 __ mov(v19.B(), 13, v6.B(), 4); 1261 __ mov(v4.B(), 13, w19); 1262 __ mov(v11.D(), 1, v8.D(), 0); 1263 __ mov(v3.D(), 0, x30); 1264 __ mov(v29.H(), 4, v11.H(), 7); 1265 __ mov(v2.H(), 6, w6); 1266 __ mov(v22.S(), 0, v5.S(), 2); 1267 __ mov(v24.S(), 3, w8); 1268 __ mov(w18, v1.S(), 3); 1269 __ mov(x28, v21.D(), 0); 1270 __ movi(d24, 0xffff0000ffffff); 1271 __ movi(v29.V16B(), 0x80); 1272 __ movi(v12.V2D(), 0xffff00ff00ffff00); 1273 __ movi(v12.V2S(), 0xec, LSL, 24); 1274 __ movi(v10.V2S(), 0x4c, MSL, 16); 1275 __ movi(v26.V4H(), 0xc0, LSL); 1276 __ movi(v24.V4S(), 0x98, LSL, 16); 1277 __ movi(v1.V4S(), 0xde, MSL, 16); 1278 __ movi(v21.V8B(), 0x4d); 1279 __ movi(v29.V8H(), 0x69, LSL); 1280 __ mul(v1.V16B(), v15.V16B(), v17.V16B()); 1281 __ mul(v21.V2S(), v19.V2S(), v29.V2S()); 1282 __ mul(v19.V2S(), v5.V2S(), v3.S(), 0); 1283 __ mul(v29.V4H(), v11.V4H(), v2.V4H()); 1284 __ mul(v2.V4H(), v7.V4H(), v0.H(), 0); 1285 __ mul(v25.V4S(), v26.V4S(), v16.V4S()); 1286 __ mul(v26.V4S(), v6.V4S(), v15.S(), 2); 1287 __ mul(v11.V8B(), v15.V8B(), v31.V8B()); 1288 __ mul(v20.V8H(), v31.V8H(), v15.V8H()); 1289 __ mul(v29.V8H(), v5.V8H(), v9.H(), 4); 1290 __ mvn(v13.V16B(), v21.V16B()); 1291 __ mvn(v28.V8B(), v19.V8B()); 1292 __ mvni(v25.V2S(), 0xb8, LSL, 8); 1293 __ mvni(v17.V2S(), 0x6c, MSL, 16); 1294 __ mvni(v29.V4H(), 0x48, LSL); 1295 __ mvni(v20.V4S(), 0x7a, LSL, 16); 1296 __ mvni(v0.V4S(), 0x1e, MSL, 8); 1297 __ mvni(v31.V8H(), 0x3e, LSL); 1298 __ neg(d25, d11); 1299 __ neg(v4.V16B(), v9.V16B()); 1300 __ neg(v11.V2D(), v25.V2D()); 1301 __ neg(v7.V2S(), v18.V2S()); 1302 __ neg(v7.V4H(), v15.V4H()); 1303 __ neg(v17.V4S(), v18.V4S()); 1304 __ neg(v20.V8B(), v17.V8B()); 1305 __ neg(v0.V8H(), v11.V8H()); 1306 __ orn(v13.V16B(), v11.V16B(), v31.V16B()); 1307 __ orn(v22.V8B(), v16.V8B(), v22.V8B()); 1308 __ orr(v17.V16B(), v17.V16B(), v23.V16B()); 1309 __ orr(v8.V2S(), 0xe3); 1310 __ orr(v11.V4H(), 0x97, 8); 1311 __ orr(v7.V4S(), 0xab); 1312 __ orr(v8.V8B(), v4.V8B(), v3.V8B()); 1313 __ orr(v31.V8H(), 0xb0, 8); 1314 __ pmul(v11.V16B(), v18.V16B(), v23.V16B()); 1315 __ pmul(v8.V8B(), v24.V8B(), v5.V8B()); 1316 __ pmull(v24.V8H(), v18.V8B(), v22.V8B()); 1317 __ pmull2(v13.V8H(), v3.V16B(), v21.V16B()); 1318 __ raddhn(v22.V2S(), v10.V2D(), v21.V2D()); 1319 __ raddhn(v5.V4H(), v13.V4S(), v13.V4S()); 1320 __ raddhn(v10.V8B(), v17.V8H(), v26.V8H()); 1321 __ raddhn2(v9.V16B(), v29.V8H(), v13.V8H()); 1322 __ raddhn2(v27.V4S(), v23.V2D(), v26.V2D()); 1323 __ raddhn2(v0.V8H(), v29.V4S(), v7.V4S()); 1324 __ rbit(v22.V16B(), v15.V16B()); 1325 __ rbit(v30.V8B(), v3.V8B()); 1326 __ rev16(v31.V16B(), v27.V16B()); 1327 __ rev16(v12.V8B(), v26.V8B()); 1328 __ rev32(v5.V16B(), v4.V16B()); 1329 __ rev32(v16.V4H(), v26.V4H()); 1330 __ rev32(v20.V8B(), v3.V8B()); 1331 __ rev32(v20.V8H(), v28.V8H()); 1332 __ rev64(v9.V16B(), v19.V16B()); 1333 __ rev64(v5.V2S(), v16.V2S()); 1334 __ rev64(v7.V4H(), v31.V4H()); 1335 __ rev64(v15.V4S(), v26.V4S()); 1336 __ rev64(v25.V8B(), v9.V8B()); 1337 __ rev64(v11.V8H(), v5.V8H()); 1338 __ rshrn(v18.V2S(), v13.V2D(), 1); 1339 __ rshrn(v25.V4H(), v30.V4S(), 2); 1340 __ rshrn(v13.V8B(), v9.V8H(), 8); 1341 __ rshrn2(v3.V16B(), v6.V8H(), 8); 1342 __ rshrn2(v0.V4S(), v29.V2D(), 25); 1343 __ rshrn2(v27.V8H(), v26.V4S(), 15); 1344 __ rsubhn(v15.V2S(), v25.V2D(), v4.V2D()); 1345 __ rsubhn(v23.V4H(), v9.V4S(), v3.V4S()); 1346 __ rsubhn(v6.V8B(), v30.V8H(), v24.V8H()); 1347 __ rsubhn2(v4.V16B(), v24.V8H(), v20.V8H()); 1348 __ rsubhn2(v1.V4S(), v23.V2D(), v22.V2D()); 1349 __ rsubhn2(v19.V8H(), v2.V4S(), v20.V4S()); 1350 __ saba(v28.V16B(), v9.V16B(), v25.V16B()); 1351 __ saba(v9.V2S(), v28.V2S(), v20.V2S()); 1352 __ saba(v17.V4H(), v22.V4H(), v22.V4H()); 1353 __ saba(v29.V4S(), v5.V4S(), v27.V4S()); 1354 __ saba(v20.V8B(), v21.V8B(), v18.V8B()); 1355 __ saba(v27.V8H(), v17.V8H(), v30.V8H()); 1356 __ sabal(v20.V2D(), v13.V2S(), v7.V2S()); 1357 __ sabal(v4.V4S(), v12.V4H(), v4.V4H()); 1358 __ sabal(v23.V8H(), v24.V8B(), v20.V8B()); 1359 __ sabal2(v26.V2D(), v21.V4S(), v18.V4S()); 1360 __ sabal2(v27.V4S(), v28.V8H(), v8.V8H()); 1361 __ sabal2(v12.V8H(), v16.V16B(), v21.V16B()); 1362 __ sabd(v0.V16B(), v15.V16B(), v13.V16B()); 1363 __ sabd(v15.V2S(), v7.V2S(), v30.V2S()); 1364 __ sabd(v17.V4H(), v17.V4H(), v12.V4H()); 1365 __ sabd(v7.V4S(), v4.V4S(), v22.V4S()); 1366 __ sabd(v23.V8B(), v3.V8B(), v26.V8B()); 1367 __ sabd(v20.V8H(), v28.V8H(), v5.V8H()); 1368 __ sabdl(v27.V2D(), v22.V2S(), v20.V2S()); 1369 __ sabdl(v31.V4S(), v20.V4H(), v23.V4H()); 1370 __ sabdl(v0.V8H(), v20.V8B(), v27.V8B()); 1371 __ sabdl2(v31.V2D(), v11.V4S(), v3.V4S()); 1372 __ sabdl2(v26.V4S(), v11.V8H(), v27.V8H()); 1373 __ sabdl2(v6.V8H(), v8.V16B(), v18.V16B()); 1374 __ sadalp(v8.V1D(), v26.V2S()); 1375 __ sadalp(v12.V2D(), v26.V4S()); 1376 __ sadalp(v12.V2S(), v26.V4H()); 1377 __ sadalp(v4.V4H(), v1.V8B()); 1378 __ sadalp(v15.V4S(), v17.V8H()); 1379 __ sadalp(v21.V8H(), v25.V16B()); 1380 __ saddl(v5.V2D(), v10.V2S(), v14.V2S()); 1381 __ saddl(v18.V4S(), v3.V4H(), v15.V4H()); 1382 __ saddl(v15.V8H(), v2.V8B(), v23.V8B()); 1383 __ saddl2(v16.V2D(), v16.V4S(), v27.V4S()); 1384 __ saddl2(v6.V4S(), v24.V8H(), v0.V8H()); 1385 __ saddl2(v7.V8H(), v20.V16B(), v28.V16B()); 1386 __ saddlp(v10.V1D(), v25.V2S()); 1387 __ saddlp(v15.V2D(), v16.V4S()); 1388 __ saddlp(v18.V2S(), v10.V4H()); 1389 __ saddlp(v29.V4H(), v26.V8B()); 1390 __ saddlp(v10.V4S(), v1.V8H()); 1391 __ saddlp(v0.V8H(), v21.V16B()); 1392 __ saddlv(d12, v7.V4S()); 1393 __ saddlv(h14, v28.V16B()); 1394 __ saddlv(h30, v30.V8B()); 1395 __ saddlv(s27, v3.V4H()); 1396 __ saddlv(s16, v16.V8H()); 1397 __ saddw(v24.V2D(), v11.V2D(), v18.V2S()); 1398 __ saddw(v13.V4S(), v12.V4S(), v6.V4H()); 1399 __ saddw(v19.V8H(), v19.V8H(), v7.V8B()); 1400 __ saddw2(v27.V2D(), v9.V2D(), v26.V4S()); 1401 __ saddw2(v19.V4S(), v23.V4S(), v21.V8H()); 1402 __ saddw2(v15.V8H(), v25.V8H(), v30.V16B()); 1403 __ shadd(v7.V16B(), v4.V16B(), v9.V16B()); 1404 __ shadd(v29.V2S(), v25.V2S(), v24.V2S()); 1405 __ shadd(v31.V4H(), v10.V4H(), v13.V4H()); 1406 __ shadd(v21.V4S(), v16.V4S(), v8.V4S()); 1407 __ shadd(v14.V8B(), v29.V8B(), v22.V8B()); 1408 __ shadd(v19.V8H(), v24.V8H(), v20.V8H()); 1409 __ shl(d22, d25, 23); 1410 __ shl(v5.V16B(), v17.V16B(), 7); 1411 __ shl(v2.V2D(), v4.V2D(), 21); 1412 __ shl(v4.V2S(), v3.V2S(), 26); 1413 __ shl(v3.V4H(), v28.V4H(), 8); 1414 __ shl(v4.V4S(), v31.V4S(), 24); 1415 __ shl(v18.V8B(), v16.V8B(), 2); 1416 __ shl(v0.V8H(), v11.V8H(), 3); 1417 __ shll(v5.V2D(), v24.V2S(), 32); 1418 __ shll(v26.V4S(), v20.V4H(), 16); 1419 __ shll(v5.V8H(), v9.V8B(), 8); 1420 __ shll2(v21.V2D(), v28.V4S(), 32); 1421 __ shll2(v22.V4S(), v1.V8H(), 16); 1422 __ shll2(v30.V8H(), v25.V16B(), 8); 1423 __ shrn(v5.V2S(), v1.V2D(), 28); 1424 __ shrn(v29.V4H(), v18.V4S(), 7); 1425 __ shrn(v17.V8B(), v29.V8H(), 2); 1426 __ shrn2(v5.V16B(), v30.V8H(), 3); 1427 __ shrn2(v24.V4S(), v1.V2D(), 1); 1428 __ shrn2(v5.V8H(), v14.V4S(), 16); 1429 __ shsub(v30.V16B(), v22.V16B(), v23.V16B()); 1430 __ shsub(v22.V2S(), v27.V2S(), v25.V2S()); 1431 __ shsub(v13.V4H(), v22.V4H(), v1.V4H()); 1432 __ shsub(v10.V4S(), v8.V4S(), v23.V4S()); 1433 __ shsub(v6.V8B(), v9.V8B(), v31.V8B()); 1434 __ shsub(v8.V8H(), v31.V8H(), v8.V8H()); 1435 __ sli(d19, d29, 20); 1436 __ sli(v9.V16B(), v24.V16B(), 0); 1437 __ sli(v22.V2D(), v9.V2D(), 10); 1438 __ sli(v11.V2S(), v27.V2S(), 20); 1439 __ sli(v16.V4H(), v15.V4H(), 5); 1440 __ sli(v8.V4S(), v8.V4S(), 25); 1441 __ sli(v10.V8B(), v30.V8B(), 0); 1442 __ sli(v7.V8H(), v28.V8H(), 6); 1443 __ smax(v18.V16B(), v8.V16B(), v1.V16B()); 1444 __ smax(v30.V2S(), v5.V2S(), v1.V2S()); 1445 __ smax(v17.V4H(), v25.V4H(), v19.V4H()); 1446 __ smax(v1.V4S(), v24.V4S(), v31.V4S()); 1447 __ smax(v17.V8B(), v24.V8B(), v24.V8B()); 1448 __ smax(v11.V8H(), v26.V8H(), v10.V8H()); 1449 __ smaxp(v12.V16B(), v14.V16B(), v7.V16B()); 1450 __ smaxp(v31.V2S(), v24.V2S(), v6.V2S()); 1451 __ smaxp(v10.V4H(), v29.V4H(), v10.V4H()); 1452 __ smaxp(v18.V4S(), v11.V4S(), v7.V4S()); 1453 __ smaxp(v21.V8B(), v0.V8B(), v18.V8B()); 1454 __ smaxp(v26.V8H(), v8.V8H(), v15.V8H()); 1455 __ smaxv(b4, v5.V16B()); 1456 __ smaxv(b23, v0.V8B()); 1457 __ smaxv(h6, v0.V4H()); 1458 __ smaxv(h24, v8.V8H()); 1459 __ smaxv(s3, v16.V4S()); 1460 __ smin(v24.V16B(), v8.V16B(), v18.V16B()); 1461 __ smin(v29.V2S(), v8.V2S(), v23.V2S()); 1462 __ smin(v6.V4H(), v11.V4H(), v21.V4H()); 1463 __ smin(v24.V4S(), v23.V4S(), v15.V4S()); 1464 __ smin(v8.V8B(), v16.V8B(), v4.V8B()); 1465 __ smin(v12.V8H(), v1.V8H(), v10.V8H()); 1466 __ sminp(v13.V16B(), v18.V16B(), v28.V16B()); 1467 __ sminp(v22.V2S(), v28.V2S(), v16.V2S()); 1468 __ sminp(v15.V4H(), v12.V4H(), v5.V4H()); 1469 __ sminp(v15.V4S(), v17.V4S(), v8.V4S()); 1470 __ sminp(v21.V8B(), v2.V8B(), v6.V8B()); 1471 __ sminp(v21.V8H(), v12.V8H(), v6.V8H()); 1472 __ sminv(b8, v6.V16B()); 1473 __ sminv(b6, v18.V8B()); 1474 __ sminv(h20, v1.V4H()); 1475 __ sminv(h7, v17.V8H()); 1476 __ sminv(s21, v4.V4S()); 1477 __ smlal(v24.V2D(), v14.V2S(), v21.V2S()); 1478 __ smlal(v31.V2D(), v3.V2S(), v14.S(), 2); 1479 __ smlal(v7.V4S(), v20.V4H(), v21.V4H()); 1480 __ smlal(v19.V4S(), v16.V4H(), v9.H(), 3); 1481 __ smlal(v29.V8H(), v14.V8B(), v1.V8B()); 1482 __ smlal2(v30.V2D(), v26.V4S(), v16.V4S()); 1483 __ smlal2(v31.V2D(), v30.V4S(), v1.S(), 0); 1484 __ smlal2(v17.V4S(), v6.V8H(), v3.V8H()); 1485 __ smlal2(v11.V4S(), v31.V8H(), v5.H(), 7); 1486 __ smlal2(v30.V8H(), v16.V16B(), v29.V16B()); 1487 __ smlsl(v1.V2D(), v20.V2S(), v17.V2S()); 1488 __ smlsl(v29.V2D(), v12.V2S(), v5.S(), 3); 1489 __ smlsl(v0.V4S(), v26.V4H(), v1.V4H()); 1490 __ smlsl(v3.V4S(), v5.V4H(), v6.H(), 5); 1491 __ smlsl(v4.V8H(), v0.V8B(), v26.V8B()); 1492 __ smlsl2(v14.V2D(), v14.V4S(), v5.V4S()); 1493 __ smlsl2(v15.V2D(), v5.V4S(), v0.S(), 1); 1494 __ smlsl2(v29.V4S(), v17.V8H(), v31.V8H()); 1495 __ smlsl2(v6.V4S(), v15.V8H(), v9.H(), 6); 1496 __ smlsl2(v30.V8H(), v15.V16B(), v15.V16B()); 1497 __ smov(w21, v6.B(), 3); 1498 __ smov(w13, v26.H(), 7); 1499 __ smov(x24, v16.B(), 7); 1500 __ smov(x7, v4.H(), 3); 1501 __ smov(x29, v7.S(), 1); 1502 __ smull(v4.V2D(), v29.V2S(), v17.V2S()); 1503 __ smull(v30.V2D(), v21.V2S(), v6.S(), 2); 1504 __ smull(v23.V4S(), v5.V4H(), v23.V4H()); 1505 __ smull(v8.V4S(), v9.V4H(), v2.H(), 1); 1506 __ smull(v31.V8H(), v17.V8B(), v1.V8B()); 1507 __ smull2(v3.V2D(), v3.V4S(), v23.V4S()); 1508 __ smull2(v15.V2D(), v29.V4S(), v6.S(), 1); 1509 __ smull2(v19.V4S(), v20.V8H(), v30.V8H()); 1510 __ smull2(v6.V4S(), v10.V8H(), v7.H(), 4); 1511 __ smull2(v25.V8H(), v8.V16B(), v27.V16B()); 1512 __ sqabs(b3, b15); 1513 __ sqabs(d14, d9); 1514 __ sqabs(h31, h28); 1515 __ sqabs(s8, s0); 1516 __ sqabs(v14.V16B(), v7.V16B()); 1517 __ sqabs(v23.V2D(), v19.V2D()); 1518 __ sqabs(v10.V2S(), v24.V2S()); 1519 __ sqabs(v31.V4H(), v19.V4H()); 1520 __ sqabs(v23.V4S(), v0.V4S()); 1521 __ sqabs(v29.V8B(), v23.V8B()); 1522 __ sqabs(v17.V8H(), v21.V8H()); 1523 __ sqadd(b9, b23, b13); 1524 __ sqadd(d2, d25, d26); 1525 __ sqadd(h7, h29, h25); 1526 __ sqadd(s11, s7, s24); 1527 __ sqadd(v20.V16B(), v16.V16B(), v29.V16B()); 1528 __ sqadd(v23.V2D(), v30.V2D(), v28.V2D()); 1529 __ sqadd(v8.V2S(), v19.V2S(), v2.V2S()); 1530 __ sqadd(v20.V4H(), v12.V4H(), v31.V4H()); 1531 __ sqadd(v14.V4S(), v15.V4S(), v17.V4S()); 1532 __ sqadd(v2.V8B(), v29.V8B(), v13.V8B()); 1533 __ sqadd(v7.V8H(), v19.V8H(), v14.V8H()); 1534 __ sqdmlal(d15, s5, s30); 1535 __ sqdmlal(d24, s10, v2.S(), 3); 1536 __ sqdmlal(s9, h19, h8); 1537 __ sqdmlal(s14, h1, v12.H(), 3); 1538 __ sqdmlal(v30.V2D(), v5.V2S(), v31.V2S()); 1539 __ sqdmlal(v25.V2D(), v14.V2S(), v10.S(), 1); 1540 __ sqdmlal(v19.V4S(), v17.V4H(), v16.V4H()); 1541 __ sqdmlal(v8.V4S(), v5.V4H(), v8.H(), 1); 1542 __ sqdmlal2(v1.V2D(), v23.V4S(), v3.V4S()); 1543 __ sqdmlal2(v19.V2D(), v0.V4S(), v9.S(), 0); 1544 __ sqdmlal2(v26.V4S(), v22.V8H(), v11.V8H()); 1545 __ sqdmlal2(v6.V4S(), v28.V8H(), v13.H(), 4); 1546 __ sqdmlsl(d10, s29, s20); 1547 __ sqdmlsl(d10, s9, v10.S(), 1); 1548 __ sqdmlsl(s30, h9, h24); 1549 __ sqdmlsl(s13, h24, v6.H(), 1); 1550 __ sqdmlsl(v27.V2D(), v10.V2S(), v20.V2S()); 1551 __ sqdmlsl(v23.V2D(), v23.V2S(), v3.S(), 3); 1552 __ sqdmlsl(v7.V4S(), v17.V4H(), v29.V4H()); 1553 __ sqdmlsl(v22.V4S(), v21.V4H(), v3.H(), 4); 1554 __ sqdmlsl2(v12.V2D(), v7.V4S(), v22.V4S()); 1555 __ sqdmlsl2(v20.V2D(), v25.V4S(), v8.S(), 0); 1556 __ sqdmlsl2(v25.V4S(), v26.V8H(), v18.V8H()); 1557 __ sqdmlsl2(v25.V4S(), v19.V8H(), v5.H(), 0); 1558 __ sqdmulh(h17, h27, h12); 1559 __ sqdmulh(h16, h5, v11.H(), 0); 1560 __ sqdmulh(s1, s19, s16); 1561 __ sqdmulh(s1, s16, v2.S(), 0); 1562 __ sqdmulh(v28.V2S(), v1.V2S(), v8.V2S()); 1563 __ sqdmulh(v28.V2S(), v8.V2S(), v3.S(), 0); 1564 __ sqdmulh(v11.V4H(), v25.V4H(), v5.V4H()); 1565 __ sqdmulh(v30.V4H(), v14.V4H(), v8.H(), 5); 1566 __ sqdmulh(v25.V4S(), v21.V4S(), v13.V4S()); 1567 __ sqdmulh(v23.V4S(), v2.V4S(), v10.S(), 3); 1568 __ sqdmulh(v26.V8H(), v5.V8H(), v23.V8H()); 1569 __ sqdmulh(v4.V8H(), v22.V8H(), v4.H(), 3); 1570 __ sqdmull(d25, s2, s26); 1571 __ sqdmull(d30, s14, v5.S(), 1); 1572 __ sqdmull(s29, h18, h11); 1573 __ sqdmull(s11, h13, v7.H(), 6); 1574 __ sqdmull(v23.V2D(), v9.V2S(), v8.V2S()); 1575 __ sqdmull(v18.V2D(), v29.V2S(), v4.S(), 1); 1576 __ sqdmull(v17.V4S(), v24.V4H(), v7.V4H()); 1577 __ sqdmull(v8.V4S(), v15.V4H(), v5.H(), 1); 1578 __ sqdmull2(v28.V2D(), v14.V4S(), v2.V4S()); 1579 __ sqdmull2(v1.V2D(), v24.V4S(), v13.S(), 2); 1580 __ sqdmull2(v11.V4S(), v17.V8H(), v31.V8H()); 1581 __ sqdmull2(v1.V4S(), v20.V8H(), v11.H(), 3); 1582 __ sqneg(b2, b0); 1583 __ sqneg(d24, d2); 1584 __ sqneg(h29, h3); 1585 __ sqneg(s4, s9); 1586 __ sqneg(v14.V16B(), v29.V16B()); 1587 __ sqneg(v30.V2D(), v12.V2D()); 1588 __ sqneg(v28.V2S(), v26.V2S()); 1589 __ sqneg(v4.V4H(), v4.V4H()); 1590 __ sqneg(v9.V4S(), v8.V4S()); 1591 __ sqneg(v20.V8B(), v20.V8B()); 1592 __ sqneg(v27.V8H(), v10.V8H()); 1593 __ sqrdmulh(h7, h24, h0); 1594 __ sqrdmulh(h14, h3, v4.H(), 6); 1595 __ sqrdmulh(s27, s19, s24); 1596 __ sqrdmulh(s31, s21, v4.S(), 0); 1597 __ sqrdmulh(v18.V2S(), v25.V2S(), v1.V2S()); 1598 __ sqrdmulh(v22.V2S(), v5.V2S(), v13.S(), 0); 1599 __ sqrdmulh(v22.V4H(), v24.V4H(), v9.V4H()); 1600 __ sqrdmulh(v13.V4H(), v2.V4H(), v12.H(), 6); 1601 __ sqrdmulh(v9.V4S(), v27.V4S(), v2.V4S()); 1602 __ sqrdmulh(v3.V4S(), v23.V4S(), v7.S(), 1); 1603 __ sqrdmulh(v2.V8H(), v0.V8H(), v7.V8H()); 1604 __ sqrdmulh(v16.V8H(), v9.V8H(), v8.H(), 2); 1605 __ sqrshl(b8, b21, b13); 1606 __ sqrshl(d29, d7, d20); 1607 __ sqrshl(h28, h14, h10); 1608 __ sqrshl(s26, s18, s2); 1609 __ sqrshl(v18.V16B(), v31.V16B(), v26.V16B()); 1610 __ sqrshl(v28.V2D(), v4.V2D(), v0.V2D()); 1611 __ sqrshl(v3.V2S(), v6.V2S(), v0.V2S()); 1612 __ sqrshl(v1.V4H(), v18.V4H(), v22.V4H()); 1613 __ sqrshl(v16.V4S(), v25.V4S(), v7.V4S()); 1614 __ sqrshl(v0.V8B(), v21.V8B(), v5.V8B()); 1615 __ sqrshl(v30.V8H(), v19.V8H(), v8.V8H()); 1616 __ sqrshrn(b6, h21, 4); 1617 __ sqrshrn(h14, s17, 11); 1618 __ sqrshrn(s25, d27, 10); 1619 __ sqrshrn(v6.V2S(), v13.V2D(), 18); 1620 __ sqrshrn(v5.V4H(), v9.V4S(), 15); 1621 __ sqrshrn(v19.V8B(), v12.V8H(), 1); 1622 __ sqrshrn2(v19.V16B(), v21.V8H(), 7); 1623 __ sqrshrn2(v29.V4S(), v24.V2D(), 13); 1624 __ sqrshrn2(v12.V8H(), v2.V4S(), 10); 1625 __ sqrshrun(b16, h9, 5); 1626 __ sqrshrun(h3, s24, 15); 1627 __ sqrshrun(s16, d18, 8); 1628 __ sqrshrun(v28.V2S(), v23.V2D(), 8); 1629 __ sqrshrun(v31.V4H(), v25.V4S(), 10); 1630 __ sqrshrun(v19.V8B(), v23.V8H(), 2); 1631 __ sqrshrun2(v24.V16B(), v0.V8H(), 8); 1632 __ sqrshrun2(v22.V4S(), v1.V2D(), 23); 1633 __ sqrshrun2(v28.V8H(), v21.V4S(), 13); 1634 __ sqshl(b6, b21, b8); 1635 __ sqshl(b11, b26, 2); 1636 __ sqshl(d29, d0, d4); 1637 __ sqshl(d21, d7, 35); 1638 __ sqshl(h20, h25, h17); 1639 __ sqshl(h20, h0, 8); 1640 __ sqshl(s29, s13, s4); 1641 __ sqshl(s10, s11, 20); 1642 __ sqshl(v8.V16B(), v18.V16B(), v28.V16B()); 1643 __ sqshl(v29.V16B(), v29.V16B(), 2); 1644 __ sqshl(v8.V2D(), v31.V2D(), v16.V2D()); 1645 __ sqshl(v7.V2D(), v14.V2D(), 37); 1646 __ sqshl(v0.V2S(), v26.V2S(), v7.V2S()); 1647 __ sqshl(v5.V2S(), v11.V2S(), 19); 1648 __ sqshl(v11.V4H(), v30.V4H(), v0.V4H()); 1649 __ sqshl(v1.V4H(), v18.V4H(), 7); 1650 __ sqshl(v22.V4S(), v3.V4S(), v30.V4S()); 1651 __ sqshl(v16.V4S(), v15.V4S(), 28); 1652 __ sqshl(v6.V8B(), v28.V8B(), v25.V8B()); 1653 __ sqshl(v0.V8B(), v15.V8B(), 0); 1654 __ sqshl(v6.V8H(), v16.V8H(), v30.V8H()); 1655 __ sqshl(v3.V8H(), v20.V8H(), 14); 1656 __ sqshlu(b13, b14, 6); 1657 __ sqshlu(d0, d16, 44); 1658 __ sqshlu(h5, h29, 15); 1659 __ sqshlu(s29, s8, 13); 1660 __ sqshlu(v27.V16B(), v20.V16B(), 2); 1661 __ sqshlu(v24.V2D(), v12.V2D(), 11); 1662 __ sqshlu(v12.V2S(), v19.V2S(), 22); 1663 __ sqshlu(v8.V4H(), v12.V4H(), 11); 1664 __ sqshlu(v18.V4S(), v3.V4S(), 8); 1665 __ sqshlu(v3.V8B(), v10.V8B(), 1); 1666 __ sqshlu(v30.V8H(), v24.V8H(), 4); 1667 __ sqshrn(b1, h28, 1); 1668 __ sqshrn(h31, s7, 10); 1669 __ sqshrn(s4, d10, 24); 1670 __ sqshrn(v10.V2S(), v1.V2D(), 29); 1671 __ sqshrn(v3.V4H(), v13.V4S(), 14); 1672 __ sqshrn(v27.V8B(), v6.V8H(), 7); 1673 __ sqshrn2(v14.V16B(), v23.V8H(), 1); 1674 __ sqshrn2(v25.V4S(), v22.V2D(), 27); 1675 __ sqshrn2(v31.V8H(), v12.V4S(), 10); 1676 __ sqshrun(b9, h0, 1); 1677 __ sqshrun(h11, s6, 7); 1678 __ sqshrun(s13, d12, 13); 1679 __ sqshrun(v10.V2S(), v30.V2D(), 1); 1680 __ sqshrun(v31.V4H(), v3.V4S(), 11); 1681 __ sqshrun(v28.V8B(), v30.V8H(), 8); 1682 __ sqshrun2(v16.V16B(), v27.V8H(), 3); 1683 __ sqshrun2(v27.V4S(), v14.V2D(), 18); 1684 __ sqshrun2(v23.V8H(), v14.V4S(), 1); 1685 __ sqsub(b19, b29, b11); 1686 __ sqsub(d21, d31, d6); 1687 __ sqsub(h18, h10, h19); 1688 __ sqsub(s6, s5, s0); 1689 __ sqsub(v21.V16B(), v22.V16B(), v0.V16B()); 1690 __ sqsub(v22.V2D(), v10.V2D(), v17.V2D()); 1691 __ sqsub(v8.V2S(), v21.V2S(), v2.V2S()); 1692 __ sqsub(v18.V4H(), v25.V4H(), v27.V4H()); 1693 __ sqsub(v13.V4S(), v3.V4S(), v6.V4S()); 1694 __ sqsub(v28.V8B(), v29.V8B(), v16.V8B()); 1695 __ sqsub(v17.V8H(), v6.V8H(), v10.V8H()); 1696 __ sqxtn(b27, h26); 1697 __ sqxtn(h17, s11); 1698 __ sqxtn(s22, d31); 1699 __ sqxtn(v26.V2S(), v5.V2D()); 1700 __ sqxtn(v13.V4H(), v7.V4S()); 1701 __ sqxtn(v19.V8B(), v19.V8H()); 1702 __ sqxtn2(v19.V16B(), v3.V8H()); 1703 __ sqxtn2(v23.V4S(), v1.V2D()); 1704 __ sqxtn2(v13.V8H(), v3.V4S()); 1705 __ sqxtun(b26, h9); 1706 __ sqxtun(h19, s12); 1707 __ sqxtun(s3, d6); 1708 __ sqxtun(v29.V2S(), v26.V2D()); 1709 __ sqxtun(v26.V4H(), v10.V4S()); 1710 __ sqxtun(v7.V8B(), v29.V8H()); 1711 __ sqxtun2(v21.V16B(), v14.V8H()); 1712 __ sqxtun2(v24.V4S(), v15.V2D()); 1713 __ sqxtun2(v30.V8H(), v1.V4S()); 1714 __ srhadd(v21.V16B(), v17.V16B(), v15.V16B()); 1715 __ srhadd(v28.V2S(), v21.V2S(), v29.V2S()); 1716 __ srhadd(v9.V4H(), v1.V4H(), v30.V4H()); 1717 __ srhadd(v24.V4S(), v0.V4S(), v2.V4S()); 1718 __ srhadd(v6.V8B(), v17.V8B(), v15.V8B()); 1719 __ srhadd(v5.V8H(), v7.V8H(), v21.V8H()); 1720 __ sri(d14, d14, 49); 1721 __ sri(v23.V16B(), v8.V16B(), 4); 1722 __ sri(v20.V2D(), v13.V2D(), 20); 1723 __ sri(v16.V2S(), v2.V2S(), 24); 1724 __ sri(v5.V4H(), v23.V4H(), 11); 1725 __ sri(v27.V4S(), v15.V4S(), 23); 1726 __ sri(v19.V8B(), v29.V8B(), 4); 1727 __ sri(v7.V8H(), v29.V8H(), 3); 1728 __ srshl(d2, d9, d26); 1729 __ srshl(v29.V16B(), v17.V16B(), v11.V16B()); 1730 __ srshl(v8.V2D(), v15.V2D(), v4.V2D()); 1731 __ srshl(v25.V2S(), v17.V2S(), v8.V2S()); 1732 __ srshl(v19.V4H(), v7.V4H(), v7.V4H()); 1733 __ srshl(v13.V4S(), v2.V4S(), v17.V4S()); 1734 __ srshl(v22.V8B(), v6.V8B(), v21.V8B()); 1735 __ srshl(v10.V8H(), v17.V8H(), v4.V8H()); 1736 __ srshr(d21, d18, 45); 1737 __ srshr(v3.V16B(), v11.V16B(), 7); 1738 __ srshr(v21.V2D(), v26.V2D(), 53); 1739 __ srshr(v11.V2S(), v5.V2S(), 28); 1740 __ srshr(v7.V4H(), v18.V4H(), 12); 1741 __ srshr(v7.V4S(), v3.V4S(), 30); 1742 __ srshr(v14.V8B(), v2.V8B(), 6); 1743 __ srshr(v21.V8H(), v20.V8H(), 3); 1744 __ srsra(d21, d30, 63); 1745 __ srsra(v27.V16B(), v30.V16B(), 6); 1746 __ srsra(v20.V2D(), v12.V2D(), 27); 1747 __ srsra(v0.V2S(), v17.V2S(), 5); 1748 __ srsra(v14.V4H(), v16.V4H(), 15); 1749 __ srsra(v18.V4S(), v3.V4S(), 20); 1750 __ srsra(v21.V8B(), v1.V8B(), 1); 1751 __ srsra(v31.V8H(), v25.V8H(), 2); 1752 __ sshl(d1, d13, d9); 1753 __ sshl(v17.V16B(), v31.V16B(), v15.V16B()); 1754 __ sshl(v13.V2D(), v16.V2D(), v0.V2D()); 1755 __ sshl(v0.V2S(), v7.V2S(), v22.V2S()); 1756 __ sshl(v23.V4H(), v19.V4H(), v4.V4H()); 1757 __ sshl(v5.V4S(), v5.V4S(), v11.V4S()); 1758 __ sshl(v23.V8B(), v27.V8B(), v7.V8B()); 1759 __ sshl(v29.V8H(), v10.V8H(), v5.V8H()); 1760 __ sshll(v0.V2D(), v2.V2S(), 23); 1761 __ sshll(v11.V4S(), v8.V4H(), 8); 1762 __ sshll(v4.V8H(), v29.V8B(), 1); 1763 __ sshll2(v10.V2D(), v4.V4S(), 14); 1764 __ sshll2(v26.V4S(), v31.V8H(), 6); 1765 __ sshll2(v3.V8H(), v26.V16B(), 4); 1766 __ sshr(d19, d21, 20); 1767 __ sshr(v15.V16B(), v23.V16B(), 5); 1768 __ sshr(v17.V2D(), v14.V2D(), 38); 1769 __ sshr(v3.V2S(), v29.V2S(), 23); 1770 __ sshr(v23.V4H(), v27.V4H(), 4); 1771 __ sshr(v28.V4S(), v3.V4S(), 4); 1772 __ sshr(v14.V8B(), v2.V8B(), 6); 1773 __ sshr(v3.V8H(), v8.V8H(), 6); 1774 __ ssra(d12, d28, 44); 1775 __ ssra(v29.V16B(), v31.V16B(), 4); 1776 __ ssra(v3.V2D(), v0.V2D(), 24); 1777 __ ssra(v14.V2S(), v28.V2S(), 6); 1778 __ ssra(v18.V4H(), v8.V4H(), 7); 1779 __ ssra(v31.V4S(), v14.V4S(), 24); 1780 __ ssra(v28.V8B(), v26.V8B(), 5); 1781 __ ssra(v9.V8H(), v9.V8H(), 14); 1782 __ ssubl(v13.V2D(), v14.V2S(), v3.V2S()); 1783 __ ssubl(v5.V4S(), v16.V4H(), v8.V4H()); 1784 __ ssubl(v0.V8H(), v28.V8B(), v6.V8B()); 1785 __ ssubl2(v5.V2D(), v13.V4S(), v25.V4S()); 1786 __ ssubl2(v3.V4S(), v15.V8H(), v17.V8H()); 1787 __ ssubl2(v15.V8H(), v15.V16B(), v14.V16B()); 1788 __ ssubw(v25.V2D(), v23.V2D(), v26.V2S()); 1789 __ ssubw(v21.V4S(), v18.V4S(), v24.V4H()); 1790 __ ssubw(v30.V8H(), v22.V8H(), v3.V8B()); 1791 __ ssubw2(v16.V2D(), v24.V2D(), v28.V4S()); 1792 __ ssubw2(v31.V4S(), v11.V4S(), v15.V8H()); 1793 __ ssubw2(v4.V8H(), v8.V8H(), v16.V16B()); 1794 __ st1(v18.V16B(), v19.V16B(), v20.V16B(), v21.V16B(), MemOperand(x0)); 1795 __ st1(v10.V16B(), 1796 v11.V16B(), 1797 v12.V16B(), 1798 v13.V16B(), 1799 MemOperand(x1, x2, PostIndex)); 1800 __ st1(v27.V16B(), 1801 v28.V16B(), 1802 v29.V16B(), 1803 v30.V16B(), 1804 MemOperand(x1, 64, PostIndex)); 1805 __ st1(v16.V16B(), v17.V16B(), v18.V16B(), MemOperand(x0)); 1806 __ st1(v21.V16B(), v22.V16B(), v23.V16B(), MemOperand(x1, x2, PostIndex)); 1807 __ st1(v9.V16B(), v10.V16B(), v11.V16B(), MemOperand(x1, 48, PostIndex)); 1808 __ st1(v7.V16B(), v8.V16B(), MemOperand(x0)); 1809 __ st1(v26.V16B(), v27.V16B(), MemOperand(x1, x2, PostIndex)); 1810 __ st1(v22.V16B(), v23.V16B(), MemOperand(x1, 32, PostIndex)); 1811 __ st1(v23.V16B(), MemOperand(x0)); 1812 __ st1(v28.V16B(), MemOperand(x1, x2, PostIndex)); 1813 __ st1(v2.V16B(), MemOperand(x1, 16, PostIndex)); 1814 __ st1(v29.V1D(), v30.V1D(), v31.V1D(), v0.V1D(), MemOperand(x0)); 1815 __ st1(v12.V1D(), 1816 v13.V1D(), 1817 v14.V1D(), 1818 v15.V1D(), 1819 MemOperand(x1, x2, PostIndex)); 1820 __ st1(v30.V1D(), 1821 v31.V1D(), 1822 v0.V1D(), 1823 v1.V1D(), 1824 MemOperand(x1, 32, PostIndex)); 1825 __ st1(v16.V1D(), v17.V1D(), v18.V1D(), MemOperand(x0)); 1826 __ st1(v3.V1D(), v4.V1D(), v5.V1D(), MemOperand(x1, x2, PostIndex)); 1827 __ st1(v14.V1D(), v15.V1D(), v16.V1D(), MemOperand(x1, 24, PostIndex)); 1828 __ st1(v18.V1D(), v19.V1D(), MemOperand(x0)); 1829 __ st1(v5.V1D(), v6.V1D(), MemOperand(x1, x2, PostIndex)); 1830 __ st1(v2.V1D(), v3.V1D(), MemOperand(x1, 16, PostIndex)); 1831 __ st1(v4.V1D(), MemOperand(x0)); 1832 __ st1(v27.V1D(), MemOperand(x1, x2, PostIndex)); 1833 __ st1(v23.V1D(), MemOperand(x1, 8, PostIndex)); 1834 __ st1(v2.V2D(), v3.V2D(), v4.V2D(), v5.V2D(), MemOperand(x0)); 1835 __ st1(v22.V2D(), 1836 v23.V2D(), 1837 v24.V2D(), 1838 v25.V2D(), 1839 MemOperand(x1, x2, PostIndex)); 1840 __ st1(v28.V2D(), 1841 v29.V2D(), 1842 v30.V2D(), 1843 v31.V2D(), 1844 MemOperand(x1, 64, PostIndex)); 1845 __ st1(v17.V2D(), v18.V2D(), v19.V2D(), MemOperand(x0)); 1846 __ st1(v16.V2D(), v17.V2D(), v18.V2D(), MemOperand(x1, x2, PostIndex)); 1847 __ st1(v22.V2D(), v23.V2D(), v24.V2D(), MemOperand(x1, 48, PostIndex)); 1848 __ st1(v21.V2D(), v22.V2D(), MemOperand(x0)); 1849 __ st1(v6.V2D(), v7.V2D(), MemOperand(x1, x2, PostIndex)); 1850 __ st1(v27.V2D(), v28.V2D(), MemOperand(x1, 32, PostIndex)); 1851 __ st1(v21.V2D(), MemOperand(x0)); 1852 __ st1(v29.V2D(), MemOperand(x1, x2, PostIndex)); 1853 __ st1(v20.V2D(), MemOperand(x1, 16, PostIndex)); 1854 __ st1(v22.V2S(), v23.V2S(), v24.V2S(), v25.V2S(), MemOperand(x0)); 1855 __ st1(v8.V2S(), 1856 v9.V2S(), 1857 v10.V2S(), 1858 v11.V2S(), 1859 MemOperand(x1, x2, PostIndex)); 1860 __ st1(v15.V2S(), 1861 v16.V2S(), 1862 v17.V2S(), 1863 v18.V2S(), 1864 MemOperand(x1, 32, PostIndex)); 1865 __ st1(v2.V2S(), v3.V2S(), v4.V2S(), MemOperand(x0)); 1866 __ st1(v23.V2S(), v24.V2S(), v25.V2S(), MemOperand(x1, x2, PostIndex)); 1867 __ st1(v7.V2S(), v8.V2S(), v9.V2S(), MemOperand(x1, 24, PostIndex)); 1868 __ st1(v28.V2S(), v29.V2S(), MemOperand(x0)); 1869 __ st1(v29.V2S(), v30.V2S(), MemOperand(x1, x2, PostIndex)); 1870 __ st1(v23.V2S(), v24.V2S(), MemOperand(x1, 16, PostIndex)); 1871 __ st1(v6.V2S(), MemOperand(x0)); 1872 __ st1(v11.V2S(), MemOperand(x1, x2, PostIndex)); 1873 __ st1(v17.V2S(), MemOperand(x1, 8, PostIndex)); 1874 __ st1(v6.V4H(), v7.V4H(), v8.V4H(), v9.V4H(), MemOperand(x0)); 1875 __ st1(v9.V4H(), 1876 v10.V4H(), 1877 v11.V4H(), 1878 v12.V4H(), 1879 MemOperand(x1, x2, PostIndex)); 1880 __ st1(v25.V4H(), 1881 v26.V4H(), 1882 v27.V4H(), 1883 v28.V4H(), 1884 MemOperand(x1, 32, PostIndex)); 1885 __ st1(v11.V4H(), v12.V4H(), v13.V4H(), MemOperand(x0)); 1886 __ st1(v10.V4H(), v11.V4H(), v12.V4H(), MemOperand(x1, x2, PostIndex)); 1887 __ st1(v12.V4H(), v13.V4H(), v14.V4H(), MemOperand(x1, 24, PostIndex)); 1888 __ st1(v13.V4H(), v14.V4H(), MemOperand(x0)); 1889 __ st1(v15.V4H(), v16.V4H(), MemOperand(x1, x2, PostIndex)); 1890 __ st1(v21.V4H(), v22.V4H(), MemOperand(x1, 16, PostIndex)); 1891 __ st1(v16.V4H(), MemOperand(x0)); 1892 __ st1(v8.V4H(), MemOperand(x1, x2, PostIndex)); 1893 __ st1(v30.V4H(), MemOperand(x1, 8, PostIndex)); 1894 __ st1(v3.V4S(), v4.V4S(), v5.V4S(), v6.V4S(), MemOperand(x0)); 1895 __ st1(v25.V4S(), 1896 v26.V4S(), 1897 v27.V4S(), 1898 v28.V4S(), 1899 MemOperand(x1, x2, PostIndex)); 1900 __ st1(v5.V4S(), v6.V4S(), v7.V4S(), v8.V4S(), MemOperand(x1, 64, PostIndex)); 1901 __ st1(v31.V4S(), v0.V4S(), v1.V4S(), MemOperand(x0)); 1902 __ st1(v30.V4S(), v31.V4S(), v0.V4S(), MemOperand(x1, x2, PostIndex)); 1903 __ st1(v6.V4S(), v7.V4S(), v8.V4S(), MemOperand(x1, 48, PostIndex)); 1904 __ st1(v17.V4S(), v18.V4S(), MemOperand(x0)); 1905 __ st1(v31.V4S(), v0.V4S(), MemOperand(x1, x2, PostIndex)); 1906 __ st1(v1.V4S(), v2.V4S(), MemOperand(x1, 32, PostIndex)); 1907 __ st1(v26.V4S(), MemOperand(x0)); 1908 __ st1(v15.V4S(), MemOperand(x1, x2, PostIndex)); 1909 __ st1(v13.V4S(), MemOperand(x1, 16, PostIndex)); 1910 __ st1(v26.V8B(), v27.V8B(), v28.V8B(), v29.V8B(), MemOperand(x0)); 1911 __ st1(v10.V8B(), 1912 v11.V8B(), 1913 v12.V8B(), 1914 v13.V8B(), 1915 MemOperand(x1, x2, PostIndex)); 1916 __ st1(v15.V8B(), 1917 v16.V8B(), 1918 v17.V8B(), 1919 v18.V8B(), 1920 MemOperand(x1, 32, PostIndex)); 1921 __ st1(v19.V8B(), v20.V8B(), v21.V8B(), MemOperand(x0)); 1922 __ st1(v31.V8B(), v0.V8B(), v1.V8B(), MemOperand(x1, x2, PostIndex)); 1923 __ st1(v9.V8B(), v10.V8B(), v11.V8B(), MemOperand(x1, 24, PostIndex)); 1924 __ st1(v12.V8B(), v13.V8B(), MemOperand(x0)); 1925 __ st1(v2.V8B(), v3.V8B(), MemOperand(x1, x2, PostIndex)); 1926 __ st1(v0.V8B(), v1.V8B(), MemOperand(x1, 16, PostIndex)); 1927 __ st1(v16.V8B(), MemOperand(x0)); 1928 __ st1(v25.V8B(), MemOperand(x1, x2, PostIndex)); 1929 __ st1(v31.V8B(), MemOperand(x1, 8, PostIndex)); 1930 __ st1(v4.V8H(), v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x0)); 1931 __ st1(v3.V8H(), v4.V8H(), v5.V8H(), v6.V8H(), MemOperand(x1, x2, PostIndex)); 1932 __ st1(v26.V8H(), 1933 v27.V8H(), 1934 v28.V8H(), 1935 v29.V8H(), 1936 MemOperand(x1, 64, PostIndex)); 1937 __ st1(v10.V8H(), v11.V8H(), v12.V8H(), MemOperand(x0)); 1938 __ st1(v21.V8H(), v22.V8H(), v23.V8H(), MemOperand(x1, x2, PostIndex)); 1939 __ st1(v18.V8H(), v19.V8H(), v20.V8H(), MemOperand(x1, 48, PostIndex)); 1940 __ st1(v26.V8H(), v27.V8H(), MemOperand(x0)); 1941 __ st1(v24.V8H(), v25.V8H(), MemOperand(x1, x2, PostIndex)); 1942 __ st1(v17.V8H(), v18.V8H(), MemOperand(x1, 32, PostIndex)); 1943 __ st1(v29.V8H(), MemOperand(x0)); 1944 __ st1(v19.V8H(), MemOperand(x1, x2, PostIndex)); 1945 __ st1(v23.V8H(), MemOperand(x1, 16, PostIndex)); 1946 __ st1(v19.B(), 15, MemOperand(x0)); 1947 __ st1(v25.B(), 9, MemOperand(x1, x2, PostIndex)); 1948 __ st1(v4.B(), 8, MemOperand(x1, 1, PostIndex)); 1949 __ st1(v13.D(), 0, MemOperand(x0)); 1950 __ st1(v30.D(), 0, MemOperand(x1, x2, PostIndex)); 1951 __ st1(v3.D(), 0, MemOperand(x1, 8, PostIndex)); 1952 __ st1(v22.H(), 0, MemOperand(x0)); 1953 __ st1(v31.H(), 7, MemOperand(x1, x2, PostIndex)); 1954 __ st1(v23.H(), 3, MemOperand(x1, 2, PostIndex)); 1955 __ st1(v0.S(), 0, MemOperand(x0)); 1956 __ st1(v11.S(), 3, MemOperand(x1, x2, PostIndex)); 1957 __ st1(v24.S(), 3, MemOperand(x1, 4, PostIndex)); 1958 __ st2(v7.V16B(), v8.V16B(), MemOperand(x0)); 1959 __ st2(v5.V16B(), v6.V16B(), MemOperand(x1, x2, PostIndex)); 1960 __ st2(v18.V16B(), v19.V16B(), MemOperand(x1, 32, PostIndex)); 1961 __ st2(v14.V2D(), v15.V2D(), MemOperand(x0)); 1962 __ st2(v7.V2D(), v8.V2D(), MemOperand(x1, x2, PostIndex)); 1963 __ st2(v24.V2D(), v25.V2D(), MemOperand(x1, 32, PostIndex)); 1964 __ st2(v22.V2S(), v23.V2S(), MemOperand(x0)); 1965 __ st2(v4.V2S(), v5.V2S(), MemOperand(x1, x2, PostIndex)); 1966 __ st2(v2.V2S(), v3.V2S(), MemOperand(x1, 16, PostIndex)); 1967 __ st2(v23.V4H(), v24.V4H(), MemOperand(x0)); 1968 __ st2(v8.V4H(), v9.V4H(), MemOperand(x1, x2, PostIndex)); 1969 __ st2(v7.V4H(), v8.V4H(), MemOperand(x1, 16, PostIndex)); 1970 __ st2(v17.V4S(), v18.V4S(), MemOperand(x0)); 1971 __ st2(v6.V4S(), v7.V4S(), MemOperand(x1, x2, PostIndex)); 1972 __ st2(v26.V4S(), v27.V4S(), MemOperand(x1, 32, PostIndex)); 1973 __ st2(v31.V8B(), v0.V8B(), MemOperand(x0)); 1974 __ st2(v0.V8B(), v1.V8B(), MemOperand(x1, x2, PostIndex)); 1975 __ st2(v21.V8B(), v22.V8B(), MemOperand(x1, 16, PostIndex)); 1976 __ st2(v7.V8H(), v8.V8H(), MemOperand(x0)); 1977 __ st2(v22.V8H(), v23.V8H(), MemOperand(x1, x2, PostIndex)); 1978 __ st2(v4.V8H(), v5.V8H(), MemOperand(x1, 32, PostIndex)); 1979 __ st2(v8.B(), v9.B(), 15, MemOperand(x0)); 1980 __ st2(v8.B(), v9.B(), 15, MemOperand(x1, x2, PostIndex)); 1981 __ st2(v7.B(), v8.B(), 4, MemOperand(x1, 2, PostIndex)); 1982 __ st2(v25.D(), v26.D(), 0, MemOperand(x0)); 1983 __ st2(v17.D(), v18.D(), 1, MemOperand(x1, x2, PostIndex)); 1984 __ st2(v3.D(), v4.D(), 1, MemOperand(x1, 16, PostIndex)); 1985 __ st2(v4.H(), v5.H(), 3, MemOperand(x0)); 1986 __ st2(v0.H(), v1.H(), 5, MemOperand(x1, x2, PostIndex)); 1987 __ st2(v22.H(), v23.H(), 2, MemOperand(x1, 4, PostIndex)); 1988 __ st2(v14.S(), v15.S(), 3, MemOperand(x0)); 1989 __ st2(v23.S(), v24.S(), 3, MemOperand(x1, x2, PostIndex)); 1990 __ st2(v0.S(), v1.S(), 2, MemOperand(x1, 8, PostIndex)); 1991 __ st3(v26.V16B(), v27.V16B(), v28.V16B(), MemOperand(x0)); 1992 __ st3(v21.V16B(), v22.V16B(), v23.V16B(), MemOperand(x1, x2, PostIndex)); 1993 __ st3(v24.V16B(), v25.V16B(), v26.V16B(), MemOperand(x1, 48, PostIndex)); 1994 __ st3(v17.V2D(), v18.V2D(), v19.V2D(), MemOperand(x0)); 1995 __ st3(v23.V2D(), v24.V2D(), v25.V2D(), MemOperand(x1, x2, PostIndex)); 1996 __ st3(v10.V2D(), v11.V2D(), v12.V2D(), MemOperand(x1, 48, PostIndex)); 1997 __ st3(v9.V2S(), v10.V2S(), v11.V2S(), MemOperand(x0)); 1998 __ st3(v13.V2S(), v14.V2S(), v15.V2S(), MemOperand(x1, x2, PostIndex)); 1999 __ st3(v22.V2S(), v23.V2S(), v24.V2S(), MemOperand(x1, 24, PostIndex)); 2000 __ st3(v31.V4H(), v0.V4H(), v1.V4H(), MemOperand(x0)); 2001 __ st3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x1, x2, PostIndex)); 2002 __ st3(v19.V4H(), v20.V4H(), v21.V4H(), MemOperand(x1, 24, PostIndex)); 2003 __ st3(v18.V4S(), v19.V4S(), v20.V4S(), MemOperand(x0)); 2004 __ st3(v25.V4S(), v26.V4S(), v27.V4S(), MemOperand(x1, x2, PostIndex)); 2005 __ st3(v16.V4S(), v17.V4S(), v18.V4S(), MemOperand(x1, 48, PostIndex)); 2006 __ st3(v27.V8B(), v28.V8B(), v29.V8B(), MemOperand(x0)); 2007 __ st3(v29.V8B(), v30.V8B(), v31.V8B(), MemOperand(x1, x2, PostIndex)); 2008 __ st3(v30.V8B(), v31.V8B(), v0.V8B(), MemOperand(x1, 24, PostIndex)); 2009 __ st3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x0)); 2010 __ st3(v18.V8H(), v19.V8H(), v20.V8H(), MemOperand(x1, x2, PostIndex)); 2011 __ st3(v18.V8H(), v19.V8H(), v20.V8H(), MemOperand(x1, 48, PostIndex)); 2012 __ st3(v31.B(), v0.B(), v1.B(), 10, MemOperand(x0)); 2013 __ st3(v4.B(), v5.B(), v6.B(), 5, MemOperand(x1, x2, PostIndex)); 2014 __ st3(v5.B(), v6.B(), v7.B(), 1, MemOperand(x1, 3, PostIndex)); 2015 __ st3(v5.D(), v6.D(), v7.D(), 0, MemOperand(x0)); 2016 __ st3(v6.D(), v7.D(), v8.D(), 0, MemOperand(x1, x2, PostIndex)); 2017 __ st3(v0.D(), v1.D(), v2.D(), 0, MemOperand(x1, 24, PostIndex)); 2018 __ st3(v31.H(), v0.H(), v1.H(), 2, MemOperand(x0)); 2019 __ st3(v14.H(), v15.H(), v16.H(), 5, MemOperand(x1, x2, PostIndex)); 2020 __ st3(v21.H(), v22.H(), v23.H(), 6, MemOperand(x1, 6, PostIndex)); 2021 __ st3(v21.S(), v22.S(), v23.S(), 0, MemOperand(x0)); 2022 __ st3(v11.S(), v12.S(), v13.S(), 1, MemOperand(x1, x2, PostIndex)); 2023 __ st3(v15.S(), v16.S(), v17.S(), 0, MemOperand(x1, 12, PostIndex)); 2024 __ st4(v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), MemOperand(x0)); 2025 __ st4(v24.V16B(), 2026 v25.V16B(), 2027 v26.V16B(), 2028 v27.V16B(), 2029 MemOperand(x1, x2, PostIndex)); 2030 __ st4(v15.V16B(), 2031 v16.V16B(), 2032 v17.V16B(), 2033 v18.V16B(), 2034 MemOperand(x1, 64, PostIndex)); 2035 __ st4(v16.V2D(), v17.V2D(), v18.V2D(), v19.V2D(), MemOperand(x0)); 2036 __ st4(v17.V2D(), 2037 v18.V2D(), 2038 v19.V2D(), 2039 v20.V2D(), 2040 MemOperand(x1, x2, PostIndex)); 2041 __ st4(v9.V2D(), 2042 v10.V2D(), 2043 v11.V2D(), 2044 v12.V2D(), 2045 MemOperand(x1, 64, PostIndex)); 2046 __ st4(v23.V2S(), v24.V2S(), v25.V2S(), v26.V2S(), MemOperand(x0)); 2047 __ st4(v15.V2S(), 2048 v16.V2S(), 2049 v17.V2S(), 2050 v18.V2S(), 2051 MemOperand(x1, x2, PostIndex)); 2052 __ st4(v24.V2S(), 2053 v25.V2S(), 2054 v26.V2S(), 2055 v27.V2S(), 2056 MemOperand(x1, 32, PostIndex)); 2057 __ st4(v14.V4H(), v15.V4H(), v16.V4H(), v17.V4H(), MemOperand(x0)); 2058 __ st4(v18.V4H(), 2059 v19.V4H(), 2060 v20.V4H(), 2061 v21.V4H(), 2062 MemOperand(x1, x2, PostIndex)); 2063 __ st4(v1.V4H(), v2.V4H(), v3.V4H(), v4.V4H(), MemOperand(x1, 32, PostIndex)); 2064 __ st4(v13.V4S(), v14.V4S(), v15.V4S(), v16.V4S(), MemOperand(x0)); 2065 __ st4(v6.V4S(), v7.V4S(), v8.V4S(), v9.V4S(), MemOperand(x1, x2, PostIndex)); 2066 __ st4(v15.V4S(), 2067 v16.V4S(), 2068 v17.V4S(), 2069 v18.V4S(), 2070 MemOperand(x1, 64, PostIndex)); 2071 __ st4(v26.V8B(), v27.V8B(), v28.V8B(), v29.V8B(), MemOperand(x0)); 2072 __ st4(v25.V8B(), 2073 v26.V8B(), 2074 v27.V8B(), 2075 v28.V8B(), 2076 MemOperand(x1, x2, PostIndex)); 2077 __ st4(v19.V8B(), 2078 v20.V8B(), 2079 v21.V8B(), 2080 v22.V8B(), 2081 MemOperand(x1, 32, PostIndex)); 2082 __ st4(v19.V8H(), v20.V8H(), v21.V8H(), v22.V8H(), MemOperand(x0)); 2083 __ st4(v15.V8H(), 2084 v16.V8H(), 2085 v17.V8H(), 2086 v18.V8H(), 2087 MemOperand(x1, x2, PostIndex)); 2088 __ st4(v31.V8H(), 2089 v0.V8H(), 2090 v1.V8H(), 2091 v2.V8H(), 2092 MemOperand(x1, 64, PostIndex)); 2093 __ st4(v0.B(), v1.B(), v2.B(), v3.B(), 13, MemOperand(x0)); 2094 __ st4(v4.B(), v5.B(), v6.B(), v7.B(), 10, MemOperand(x1, x2, PostIndex)); 2095 __ st4(v9.B(), v10.B(), v11.B(), v12.B(), 9, MemOperand(x1, 4, PostIndex)); 2096 __ st4(v2.D(), v3.D(), v4.D(), v5.D(), 1, MemOperand(x0)); 2097 __ st4(v7.D(), v8.D(), v9.D(), v10.D(), 0, MemOperand(x1, x2, PostIndex)); 2098 __ st4(v31.D(), v0.D(), v1.D(), v2.D(), 1, MemOperand(x1, 32, PostIndex)); 2099 __ st4(v2.H(), v3.H(), v4.H(), v5.H(), 1, MemOperand(x0)); 2100 __ st4(v27.H(), v28.H(), v29.H(), v30.H(), 3, MemOperand(x1, x2, PostIndex)); 2101 __ st4(v24.H(), v25.H(), v26.H(), v27.H(), 4, MemOperand(x1, 8, PostIndex)); 2102 __ st4(v18.S(), v19.S(), v20.S(), v21.S(), 2, MemOperand(x0)); 2103 __ st4(v6.S(), v7.S(), v8.S(), v9.S(), 2, MemOperand(x1, x2, PostIndex)); 2104 __ st4(v25.S(), v26.S(), v27.S(), v28.S(), 1, MemOperand(x1, 16, PostIndex)); 2105 __ sub(d12, d17, d2); 2106 __ sub(v20.V16B(), v24.V16B(), v8.V16B()); 2107 __ sub(v8.V2D(), v29.V2D(), v5.V2D()); 2108 __ sub(v2.V2S(), v28.V2S(), v24.V2S()); 2109 __ sub(v24.V4H(), v10.V4H(), v4.V4H()); 2110 __ sub(v28.V4S(), v4.V4S(), v17.V4S()); 2111 __ sub(v16.V8B(), v27.V8B(), v2.V8B()); 2112 __ sub(v20.V8H(), v10.V8H(), v13.V8H()); 2113 __ subhn(v5.V2S(), v14.V2D(), v13.V2D()); 2114 __ subhn(v10.V4H(), v5.V4S(), v8.V4S()); 2115 __ subhn(v6.V8B(), v10.V8H(), v22.V8H()); 2116 __ subhn2(v11.V16B(), v6.V8H(), v9.V8H()); 2117 __ subhn2(v25.V4S(), v18.V2D(), v24.V2D()); 2118 __ subhn2(v20.V8H(), v21.V4S(), v1.V4S()); 2119 __ suqadd(b25, b11); 2120 __ suqadd(d13, d1); 2121 __ suqadd(h0, h9); 2122 __ suqadd(s22, s8); 2123 __ suqadd(v24.V16B(), v27.V16B()); 2124 __ suqadd(v26.V2D(), v14.V2D()); 2125 __ suqadd(v7.V2S(), v10.V2S()); 2126 __ suqadd(v25.V4H(), v12.V4H()); 2127 __ suqadd(v4.V4S(), v3.V4S()); 2128 __ suqadd(v14.V8B(), v18.V8B()); 2129 __ suqadd(v31.V8H(), v8.V8H()); 2130 __ sxtl(v16.V2D(), v20.V2S()); 2131 __ sxtl(v27.V4S(), v28.V4H()); 2132 __ sxtl(v0.V8H(), v22.V8B()); 2133 __ sxtl2(v6.V2D(), v7.V4S()); 2134 __ sxtl2(v9.V4S(), v27.V8H()); 2135 __ sxtl2(v16.V8H(), v16.V16B()); 2136 __ tbl(v25.V16B(), 2137 v17.V16B(), 2138 v18.V16B(), 2139 v19.V16B(), 2140 v20.V16B(), 2141 v22.V16B()); 2142 __ tbl(v28.V16B(), v13.V16B(), v14.V16B(), v15.V16B(), v4.V16B()); 2143 __ tbl(v3.V16B(), v0.V16B(), v1.V16B(), v2.V16B()); 2144 __ tbl(v20.V16B(), v15.V16B(), v4.V16B()); 2145 __ tbl(v7.V8B(), v23.V16B(), v24.V16B(), v25.V16B(), v26.V16B(), v20.V8B()); 2146 __ tbl(v8.V8B(), v1.V16B(), v2.V16B(), v3.V16B(), v31.V8B()); 2147 __ tbl(v8.V8B(), v25.V16B(), v26.V16B(), v16.V8B()); 2148 __ tbl(v11.V8B(), v19.V16B(), v30.V8B()); 2149 __ tbx(v25.V16B(), v25.V16B(), v26.V16B(), v27.V16B(), v28.V16B(), v5.V16B()); 2150 __ tbx(v21.V16B(), v29.V16B(), v30.V16B(), v31.V16B(), v24.V16B()); 2151 __ tbx(v6.V16B(), v16.V16B(), v17.V16B(), v1.V16B()); 2152 __ tbx(v13.V16B(), v3.V16B(), v20.V16B()); 2153 __ tbx(v24.V8B(), v29.V16B(), v30.V16B(), v31.V16B(), v0.V16B(), v9.V8B()); 2154 __ tbx(v17.V8B(), v9.V16B(), v10.V16B(), v11.V16B(), v26.V8B()); 2155 __ tbx(v5.V8B(), v3.V16B(), v4.V16B(), v21.V8B()); 2156 __ tbx(v16.V8B(), v11.V16B(), v29.V8B()); 2157 __ trn1(v19.V16B(), v24.V16B(), v12.V16B()); 2158 __ trn1(v2.V2D(), v7.V2D(), v10.V2D()); 2159 __ trn1(v22.V2S(), v0.V2S(), v21.V2S()); 2160 __ trn1(v12.V4H(), v15.V4H(), v20.V4H()); 2161 __ trn1(v30.V4S(), v17.V4S(), v9.V4S()); 2162 __ trn1(v12.V8B(), v19.V8B(), v29.V8B()); 2163 __ trn1(v23.V8H(), v8.V8H(), v9.V8H()); 2164 __ trn2(v28.V16B(), v30.V16B(), v25.V16B()); 2165 __ trn2(v7.V2D(), v27.V2D(), v7.V2D()); 2166 __ trn2(v30.V2S(), v16.V2S(), v19.V2S()); 2167 __ trn2(v24.V4H(), v6.V4H(), v25.V4H()); 2168 __ trn2(v2.V4S(), v19.V4S(), v11.V4S()); 2169 __ trn2(v25.V8B(), v27.V8B(), v18.V8B()); 2170 __ trn2(v12.V8H(), v4.V8H(), v15.V8H()); 2171 __ uaba(v31.V16B(), v12.V16B(), v28.V16B()); 2172 __ uaba(v18.V2S(), v5.V2S(), v14.V2S()); 2173 __ uaba(v9.V4H(), v20.V4H(), v21.V4H()); 2174 __ uaba(v6.V4S(), v20.V4S(), v2.V4S()); 2175 __ uaba(v16.V8B(), v12.V8B(), v5.V8B()); 2176 __ uaba(v15.V8H(), v26.V8H(), v30.V8H()); 2177 __ uabal(v10.V2D(), v18.V2S(), v15.V2S()); 2178 __ uabal(v30.V4S(), v19.V4H(), v7.V4H()); 2179 __ uabal(v4.V8H(), v27.V8B(), v0.V8B()); 2180 __ uabal2(v19.V2D(), v12.V4S(), v2.V4S()); 2181 __ uabal2(v26.V4S(), v5.V8H(), v12.V8H()); 2182 __ uabal2(v19.V8H(), v20.V16B(), v28.V16B()); 2183 __ uabd(v18.V16B(), v4.V16B(), v21.V16B()); 2184 __ uabd(v30.V2S(), v21.V2S(), v16.V2S()); 2185 __ uabd(v8.V4H(), v28.V4H(), v25.V4H()); 2186 __ uabd(v28.V4S(), v12.V4S(), v21.V4S()); 2187 __ uabd(v19.V8B(), v16.V8B(), v28.V8B()); 2188 __ uabd(v9.V8H(), v12.V8H(), v29.V8H()); 2189 __ uabdl(v26.V2D(), v0.V2S(), v8.V2S()); 2190 __ uabdl(v29.V4S(), v31.V4H(), v25.V4H()); 2191 __ uabdl(v27.V8H(), v29.V8B(), v14.V8B()); 2192 __ uabdl2(v20.V2D(), v20.V4S(), v8.V4S()); 2193 __ uabdl2(v22.V4S(), v15.V8H(), v18.V8H()); 2194 __ uabdl2(v9.V8H(), v18.V16B(), v23.V16B()); 2195 __ uadalp(v9.V1D(), v15.V2S()); 2196 __ uadalp(v14.V2D(), v12.V4S()); 2197 __ uadalp(v28.V2S(), v12.V4H()); 2198 __ uadalp(v0.V4H(), v17.V8B()); 2199 __ uadalp(v1.V4S(), v29.V8H()); 2200 __ uadalp(v15.V8H(), v22.V16B()); 2201 __ uaddl(v1.V2D(), v20.V2S(), v27.V2S()); 2202 __ uaddl(v31.V4S(), v25.V4H(), v5.V4H()); 2203 __ uaddl(v12.V8H(), v3.V8B(), v3.V8B()); 2204 __ uaddl2(v5.V2D(), v23.V4S(), v6.V4S()); 2205 __ uaddl2(v1.V4S(), v5.V8H(), v25.V8H()); 2206 __ uaddl2(v22.V8H(), v30.V16B(), v28.V16B()); 2207 __ uaddlp(v7.V1D(), v9.V2S()); 2208 __ uaddlp(v26.V2D(), v4.V4S()); 2209 __ uaddlp(v28.V2S(), v1.V4H()); 2210 __ uaddlp(v20.V4H(), v31.V8B()); 2211 __ uaddlp(v16.V4S(), v17.V8H()); 2212 __ uaddlp(v6.V8H(), v2.V16B()); 2213 __ uaddlv(d28, v22.V4S()); 2214 __ uaddlv(h0, v19.V16B()); 2215 __ uaddlv(h30, v30.V8B()); 2216 __ uaddlv(s24, v18.V4H()); 2217 __ uaddlv(s10, v0.V8H()); 2218 __ uaddw(v9.V2D(), v17.V2D(), v14.V2S()); 2219 __ uaddw(v9.V4S(), v25.V4S(), v3.V4H()); 2220 __ uaddw(v18.V8H(), v1.V8H(), v0.V8B()); 2221 __ uaddw2(v18.V2D(), v5.V2D(), v6.V4S()); 2222 __ uaddw2(v17.V4S(), v15.V4S(), v11.V8H()); 2223 __ uaddw2(v29.V8H(), v11.V8H(), v7.V16B()); 2224 __ uhadd(v13.V16B(), v9.V16B(), v3.V16B()); 2225 __ uhadd(v17.V2S(), v25.V2S(), v24.V2S()); 2226 __ uhadd(v25.V4H(), v23.V4H(), v13.V4H()); 2227 __ uhadd(v0.V4S(), v20.V4S(), v16.V4S()); 2228 __ uhadd(v5.V8B(), v5.V8B(), v25.V8B()); 2229 __ uhadd(v3.V8H(), v29.V8H(), v18.V8H()); 2230 __ uhsub(v1.V16B(), v22.V16B(), v13.V16B()); 2231 __ uhsub(v14.V2S(), v30.V2S(), v30.V2S()); 2232 __ uhsub(v29.V4H(), v14.V4H(), v17.V4H()); 2233 __ uhsub(v26.V4S(), v5.V4S(), v18.V4S()); 2234 __ uhsub(v3.V8B(), v7.V8B(), v12.V8B()); 2235 __ uhsub(v25.V8H(), v21.V8H(), v5.V8H()); 2236 __ umax(v28.V16B(), v12.V16B(), v6.V16B()); 2237 __ umax(v20.V2S(), v19.V2S(), v26.V2S()); 2238 __ umax(v0.V4H(), v31.V4H(), v18.V4H()); 2239 __ umax(v6.V4S(), v21.V4S(), v28.V4S()); 2240 __ umax(v0.V8B(), v2.V8B(), v20.V8B()); 2241 __ umax(v4.V8H(), v11.V8H(), v22.V8H()); 2242 __ umaxp(v1.V16B(), v6.V16B(), v29.V16B()); 2243 __ umaxp(v19.V2S(), v17.V2S(), v27.V2S()); 2244 __ umaxp(v21.V4H(), v16.V4H(), v7.V4H()); 2245 __ umaxp(v9.V4S(), v20.V4S(), v29.V4S()); 2246 __ umaxp(v13.V8B(), v1.V8B(), v16.V8B()); 2247 __ umaxp(v19.V8H(), v23.V8H(), v26.V8H()); 2248 __ umaxv(b17, v30.V16B()); 2249 __ umaxv(b23, v12.V8B()); 2250 __ umaxv(h31, v15.V4H()); 2251 __ umaxv(h15, v25.V8H()); 2252 __ umaxv(s18, v21.V4S()); 2253 __ umin(v22.V16B(), v0.V16B(), v18.V16B()); 2254 __ umin(v1.V2S(), v21.V2S(), v16.V2S()); 2255 __ umin(v17.V4H(), v4.V4H(), v25.V4H()); 2256 __ umin(v24.V4S(), v26.V4S(), v13.V4S()); 2257 __ umin(v20.V8B(), v1.V8B(), v5.V8B()); 2258 __ umin(v26.V8H(), v25.V8H(), v23.V8H()); 2259 __ uminp(v5.V16B(), v1.V16B(), v23.V16B()); 2260 __ uminp(v7.V2S(), v26.V2S(), v30.V2S()); 2261 __ uminp(v9.V4H(), v5.V4H(), v25.V4H()); 2262 __ uminp(v23.V4S(), v10.V4S(), v1.V4S()); 2263 __ uminp(v4.V8B(), v29.V8B(), v14.V8B()); 2264 __ uminp(v21.V8H(), v0.V8H(), v14.V8H()); 2265 __ uminv(b0, v17.V16B()); 2266 __ uminv(b0, v31.V8B()); 2267 __ uminv(h24, v0.V4H()); 2268 __ uminv(h29, v14.V8H()); 2269 __ uminv(s30, v3.V4S()); 2270 __ umlal(v11.V2D(), v11.V2S(), v24.V2S()); 2271 __ umlal(v30.V2D(), v16.V2S(), v11.S(), 3); 2272 __ umlal(v0.V4S(), v9.V4H(), v26.V4H()); 2273 __ umlal(v20.V4S(), v24.V4H(), v12.H(), 4); 2274 __ umlal(v16.V8H(), v21.V8B(), v6.V8B()); 2275 __ umlal2(v17.V2D(), v19.V4S(), v23.V4S()); 2276 __ umlal2(v5.V2D(), v30.V4S(), v8.S(), 0); 2277 __ umlal2(v16.V4S(), v8.V8H(), v15.V8H()); 2278 __ umlal2(v15.V4S(), v26.V8H(), v1.H(), 5); 2279 __ umlal2(v30.V8H(), v1.V16B(), v17.V16B()); 2280 __ umlsl(v18.V2D(), v19.V2S(), v28.V2S()); 2281 __ umlsl(v7.V2D(), v7.V2S(), v8.S(), 0); 2282 __ umlsl(v24.V4S(), v8.V4H(), v4.V4H()); 2283 __ umlsl(v18.V4S(), v22.V4H(), v12.H(), 4); 2284 __ umlsl(v28.V8H(), v14.V8B(), v20.V8B()); 2285 __ umlsl2(v11.V2D(), v0.V4S(), v9.V4S()); 2286 __ umlsl2(v26.V2D(), v16.V4S(), v9.S(), 2); 2287 __ umlsl2(v3.V4S(), v11.V8H(), v9.V8H()); 2288 __ umlsl2(v10.V4S(), v25.V8H(), v9.H(), 4); 2289 __ umlsl2(v24.V8H(), v16.V16B(), v28.V16B()); 2290 __ umov(x30, v25.D(), 1); 2291 __ umull(v12.V2D(), v10.V2S(), v29.V2S()); 2292 __ umull(v22.V2D(), v30.V2S(), v5.S(), 3); 2293 __ umull(v7.V4S(), v0.V4H(), v25.V4H()); 2294 __ umull(v11.V4S(), v13.V4H(), v3.H(), 2); 2295 __ umull(v25.V8H(), v16.V8B(), v10.V8B()); 2296 __ umull2(v17.V2D(), v3.V4S(), v26.V4S()); 2297 __ umull2(v26.V2D(), v11.V4S(), v2.S(), 3); 2298 __ umull2(v12.V4S(), v17.V8H(), v23.V8H()); 2299 __ umull2(v4.V4S(), v31.V8H(), v1.H(), 2); 2300 __ umull2(v5.V8H(), v12.V16B(), v17.V16B()); 2301 __ uqadd(b30, b4, b28); 2302 __ uqadd(d27, d20, d16); 2303 __ uqadd(h7, h14, h28); 2304 __ uqadd(s28, s17, s4); 2305 __ uqadd(v19.V16B(), v22.V16B(), v21.V16B()); 2306 __ uqadd(v16.V2D(), v4.V2D(), v11.V2D()); 2307 __ uqadd(v20.V2S(), v14.V2S(), v4.V2S()); 2308 __ uqadd(v5.V4H(), v0.V4H(), v16.V4H()); 2309 __ uqadd(v21.V4S(), v31.V4S(), v9.V4S()); 2310 __ uqadd(v23.V8B(), v24.V8B(), v3.V8B()); 2311 __ uqadd(v17.V8H(), v27.V8H(), v11.V8H()); 2312 __ uqrshl(b10, b22, b10); 2313 __ uqrshl(d29, d5, d11); 2314 __ uqrshl(h27, h24, h30); 2315 __ uqrshl(s10, s13, s8); 2316 __ uqrshl(v9.V16B(), v18.V16B(), v14.V16B()); 2317 __ uqrshl(v24.V2D(), v15.V2D(), v17.V2D()); 2318 __ uqrshl(v4.V2S(), v14.V2S(), v27.V2S()); 2319 __ uqrshl(v15.V4H(), v5.V4H(), v8.V4H()); 2320 __ uqrshl(v21.V4S(), v29.V4S(), v0.V4S()); 2321 __ uqrshl(v16.V8B(), v24.V8B(), v9.V8B()); 2322 __ uqrshl(v2.V8H(), v0.V8H(), v15.V8H()); 2323 __ uqrshrn(b11, h26, 4); 2324 __ uqrshrn(h7, s30, 5); 2325 __ uqrshrn(s10, d8, 21); 2326 __ uqrshrn(v15.V2S(), v6.V2D(), 11); 2327 __ uqrshrn(v5.V4H(), v26.V4S(), 12); 2328 __ uqrshrn(v28.V8B(), v25.V8H(), 5); 2329 __ uqrshrn2(v25.V16B(), v30.V8H(), 2); 2330 __ uqrshrn2(v21.V4S(), v14.V2D(), 32); 2331 __ uqrshrn2(v13.V8H(), v7.V4S(), 2); 2332 __ uqshl(b13, b0, b23); 2333 __ uqshl(b9, b17, 4); 2334 __ uqshl(d23, d6, d4); 2335 __ uqshl(d8, d11, 44); 2336 __ uqshl(h19, h13, h15); 2337 __ uqshl(h25, h26, 6); 2338 __ uqshl(s4, s24, s10); 2339 __ uqshl(s19, s14, 1); 2340 __ uqshl(v14.V16B(), v30.V16B(), v25.V16B()); 2341 __ uqshl(v6.V16B(), v10.V16B(), 5); 2342 __ uqshl(v18.V2D(), v8.V2D(), v7.V2D()); 2343 __ uqshl(v25.V2D(), v14.V2D(), 18); 2344 __ uqshl(v25.V2S(), v16.V2S(), v23.V2S()); 2345 __ uqshl(v13.V2S(), v15.V2S(), 31); 2346 __ uqshl(v28.V4H(), v24.V4H(), v15.V4H()); 2347 __ uqshl(v4.V4H(), v17.V4H(), 1); 2348 __ uqshl(v9.V4S(), v31.V4S(), v23.V4S()); 2349 __ uqshl(v18.V4S(), v28.V4S(), 31); 2350 __ uqshl(v31.V8B(), v21.V8B(), v15.V8B()); 2351 __ uqshl(v6.V8B(), v21.V8B(), 1); 2352 __ uqshl(v28.V8H(), v2.V8H(), v17.V8H()); 2353 __ uqshl(v24.V8H(), v8.V8H(), 14); 2354 __ uqshrn(b21, h27, 7); 2355 __ uqshrn(h28, s26, 11); 2356 __ uqshrn(s13, d31, 17); 2357 __ uqshrn(v21.V2S(), v16.V2D(), 8); 2358 __ uqshrn(v24.V4H(), v24.V4S(), 2); 2359 __ uqshrn(v5.V8B(), v1.V8H(), 8); 2360 __ uqshrn2(v16.V16B(), v29.V8H(), 6); 2361 __ uqshrn2(v2.V4S(), v6.V2D(), 1); 2362 __ uqshrn2(v16.V8H(), v10.V4S(), 14); 2363 __ uqsub(b28, b20, b26); 2364 __ uqsub(d0, d7, d10); 2365 __ uqsub(h26, h24, h7); 2366 __ uqsub(s23, s23, s16); 2367 __ uqsub(v14.V16B(), v16.V16B(), v24.V16B()); 2368 __ uqsub(v11.V2D(), v17.V2D(), v6.V2D()); 2369 __ uqsub(v10.V2S(), v10.V2S(), v8.V2S()); 2370 __ uqsub(v9.V4H(), v15.V4H(), v12.V4H()); 2371 __ uqsub(v23.V4S(), v18.V4S(), v7.V4S()); 2372 __ uqsub(v9.V8B(), v19.V8B(), v17.V8B()); 2373 __ uqsub(v20.V8H(), v2.V8H(), v6.V8H()); 2374 __ uqxtn(b29, h19); 2375 __ uqxtn(h0, s13); 2376 __ uqxtn(s26, d22); 2377 __ uqxtn(v5.V2S(), v31.V2D()); 2378 __ uqxtn(v30.V4H(), v19.V4S()); 2379 __ uqxtn(v15.V8B(), v2.V8H()); 2380 __ uqxtn2(v29.V16B(), v3.V8H()); 2381 __ uqxtn2(v13.V4S(), v17.V2D()); 2382 __ uqxtn2(v28.V8H(), v11.V4S()); 2383 __ urecpe(v23.V2S(), v15.V2S()); 2384 __ urecpe(v27.V4S(), v7.V4S()); 2385 __ urhadd(v2.V16B(), v15.V16B(), v27.V16B()); 2386 __ urhadd(v15.V2S(), v1.V2S(), v18.V2S()); 2387 __ urhadd(v17.V4H(), v4.V4H(), v26.V4H()); 2388 __ urhadd(v2.V4S(), v27.V4S(), v14.V4S()); 2389 __ urhadd(v5.V8B(), v17.V8B(), v14.V8B()); 2390 __ urhadd(v30.V8H(), v2.V8H(), v25.V8H()); 2391 __ urshl(d4, d28, d30); 2392 __ urshl(v13.V16B(), v31.V16B(), v19.V16B()); 2393 __ urshl(v14.V2D(), v23.V2D(), v21.V2D()); 2394 __ urshl(v10.V2S(), v7.V2S(), v8.V2S()); 2395 __ urshl(v15.V4H(), v21.V4H(), v28.V4H()); 2396 __ urshl(v30.V4S(), v8.V4S(), v23.V4S()); 2397 __ urshl(v31.V8B(), v20.V8B(), v5.V8B()); 2398 __ urshl(v30.V8H(), v27.V8H(), v30.V8H()); 2399 __ urshr(d4, d13, 49); 2400 __ urshr(v2.V16B(), v20.V16B(), 1); 2401 __ urshr(v13.V2D(), v11.V2D(), 51); 2402 __ urshr(v21.V2S(), v31.V2S(), 10); 2403 __ urshr(v21.V4H(), v17.V4H(), 11); 2404 __ urshr(v4.V4S(), v22.V4S(), 1); 2405 __ urshr(v0.V8B(), v1.V8B(), 7); 2406 __ urshr(v13.V8H(), v20.V8H(), 1); 2407 __ ursqrte(v20.V2S(), v16.V2S()); 2408 __ ursqrte(v28.V4S(), v8.V4S()); 2409 __ ursra(d27, d16, 45); 2410 __ ursra(v18.V16B(), v17.V16B(), 3); 2411 __ ursra(v26.V2D(), v28.V2D(), 58); 2412 __ ursra(v8.V2S(), v22.V2S(), 31); 2413 __ ursra(v31.V4H(), v4.V4H(), 7); 2414 __ ursra(v31.V4S(), v15.V4S(), 2); 2415 __ ursra(v3.V8B(), v1.V8B(), 5); 2416 __ ursra(v18.V8H(), v14.V8H(), 13); 2417 __ ushl(d31, d0, d16); 2418 __ ushl(v0.V16B(), v6.V16B(), v2.V16B()); 2419 __ ushl(v18.V2D(), v1.V2D(), v18.V2D()); 2420 __ ushl(v27.V2S(), v7.V2S(), v29.V2S()); 2421 __ ushl(v14.V4H(), v14.V4H(), v13.V4H()); 2422 __ ushl(v22.V4S(), v4.V4S(), v9.V4S()); 2423 __ ushl(v23.V8B(), v22.V8B(), v27.V8B()); 2424 __ ushl(v21.V8H(), v25.V8H(), v8.V8H()); 2425 __ ushll(v11.V2D(), v0.V2S(), 21); 2426 __ ushll(v2.V4S(), v17.V4H(), 8); 2427 __ ushll(v11.V8H(), v14.V8B(), 1); 2428 __ ushll2(v8.V2D(), v29.V4S(), 7); 2429 __ ushll2(v29.V4S(), v9.V8H(), 2); 2430 __ ushll2(v5.V8H(), v24.V16B(), 6); 2431 __ ushr(d28, d27, 53); 2432 __ ushr(v1.V16B(), v9.V16B(), 7); 2433 __ ushr(v2.V2D(), v24.V2D(), 43); 2434 __ ushr(v30.V2S(), v25.V2S(), 11); 2435 __ ushr(v10.V4H(), v26.V4H(), 12); 2436 __ ushr(v4.V4S(), v5.V4S(), 30); 2437 __ ushr(v30.V8B(), v2.V8B(), 1); 2438 __ ushr(v6.V8H(), v12.V8H(), 2); 2439 __ usqadd(b19, b5); 2440 __ usqadd(d9, d2); 2441 __ usqadd(h2, h16); 2442 __ usqadd(s16, s3); 2443 __ usqadd(v31.V16B(), v29.V16B()); 2444 __ usqadd(v8.V2D(), v10.V2D()); 2445 __ usqadd(v18.V2S(), v9.V2S()); 2446 __ usqadd(v24.V4H(), v14.V4H()); 2447 __ usqadd(v10.V4S(), v30.V4S()); 2448 __ usqadd(v16.V8B(), v20.V8B()); 2449 __ usqadd(v12.V8H(), v16.V8H()); 2450 __ usra(d28, d27, 37); 2451 __ usra(v5.V16B(), v22.V16B(), 5); 2452 __ usra(v2.V2D(), v19.V2D(), 33); 2453 __ usra(v0.V2S(), v0.V2S(), 21); 2454 __ usra(v7.V4H(), v6.V4H(), 12); 2455 __ usra(v4.V4S(), v17.V4S(), 9); 2456 __ usra(v9.V8B(), v12.V8B(), 7); 2457 __ usra(v3.V8H(), v27.V8H(), 14); 2458 __ usubl(v29.V2D(), v12.V2S(), v30.V2S()); 2459 __ usubl(v29.V4S(), v28.V4H(), v6.V4H()); 2460 __ usubl(v12.V8H(), v4.V8B(), v14.V8B()); 2461 __ usubl2(v1.V2D(), v24.V4S(), v17.V4S()); 2462 __ usubl2(v4.V4S(), v1.V8H(), v3.V8H()); 2463 __ usubl2(v23.V8H(), v4.V16B(), v7.V16B()); 2464 __ usubw(v9.V2D(), v20.V2D(), v30.V2S()); 2465 __ usubw(v20.V4S(), v16.V4S(), v23.V4H()); 2466 __ usubw(v25.V8H(), v8.V8H(), v29.V8B()); 2467 __ usubw2(v18.V2D(), v29.V2D(), v6.V4S()); 2468 __ usubw2(v6.V4S(), v6.V4S(), v20.V8H()); 2469 __ usubw2(v18.V8H(), v4.V8H(), v16.V16B()); 2470 __ uxtl(v27.V2D(), v21.V2S()); 2471 __ uxtl(v0.V4S(), v31.V4H()); 2472 __ uxtl(v27.V8H(), v10.V8B()); 2473 __ uxtl2(v6.V2D(), v16.V4S()); 2474 __ uxtl2(v22.V4S(), v20.V8H()); 2475 __ uxtl2(v20.V8H(), v21.V16B()); 2476 __ uzp1(v30.V16B(), v9.V16B(), v17.V16B()); 2477 __ uzp1(v7.V2D(), v26.V2D(), v28.V2D()); 2478 __ uzp1(v26.V2S(), v16.V2S(), v22.V2S()); 2479 __ uzp1(v14.V4H(), v19.V4H(), v6.V4H()); 2480 __ uzp1(v17.V4S(), v23.V4S(), v30.V4S()); 2481 __ uzp1(v28.V8B(), v27.V8B(), v13.V8B()); 2482 __ uzp1(v17.V8H(), v1.V8H(), v12.V8H()); 2483 __ uzp2(v8.V16B(), v18.V16B(), v26.V16B()); 2484 __ uzp2(v21.V2D(), v22.V2D(), v24.V2D()); 2485 __ uzp2(v20.V2S(), v21.V2S(), v2.V2S()); 2486 __ uzp2(v16.V4H(), v31.V4H(), v6.V4H()); 2487 __ uzp2(v25.V4S(), v11.V4S(), v8.V4S()); 2488 __ uzp2(v31.V8B(), v31.V8B(), v13.V8B()); 2489 __ uzp2(v8.V8H(), v17.V8H(), v1.V8H()); 2490 __ xtn(v17.V2S(), v26.V2D()); 2491 __ xtn(v3.V4H(), v0.V4S()); 2492 __ xtn(v18.V8B(), v8.V8H()); 2493 __ xtn2(v0.V16B(), v0.V8H()); 2494 __ xtn2(v15.V4S(), v4.V2D()); 2495 __ xtn2(v31.V8H(), v18.V4S()); 2496 __ zip1(v22.V16B(), v9.V16B(), v6.V16B()); 2497 __ zip1(v23.V2D(), v11.V2D(), v2.V2D()); 2498 __ zip1(v26.V2S(), v16.V2S(), v9.V2S()); 2499 __ zip1(v1.V4H(), v9.V4H(), v7.V4H()); 2500 __ zip1(v0.V4S(), v30.V4S(), v20.V4S()); 2501 __ zip1(v30.V8B(), v17.V8B(), v15.V8B()); 2502 __ zip1(v17.V8H(), v8.V8H(), v2.V8H()); 2503 __ zip2(v23.V16B(), v10.V16B(), v11.V16B()); 2504 __ zip2(v30.V2D(), v6.V2D(), v14.V2D()); 2505 __ zip2(v9.V2S(), v10.V2S(), v21.V2S()); 2506 __ zip2(v8.V4H(), v24.V4H(), v29.V4H()); 2507 __ zip2(v0.V4S(), v21.V4S(), v23.V4S()); 2508 __ zip2(v25.V8B(), v23.V8B(), v30.V8B()); 2509 __ zip2(v7.V8H(), v10.V8H(), v30.V8H()); 2510} // NOLINT(readability/fn_size) 2511 2512 2513static void GenerateTestSequenceNEONFP(MacroAssembler* masm) { 2514 ExactAssemblyScope guard(masm, 2515 masm->GetBuffer()->GetRemainingBytes(), 2516 ExactAssemblyScope::kMaximumSize); 2517 2518 // NEON floating point instructions. 2519 __ fabd(v3.V2D(), v25.V2D(), v8.V2D()); 2520 __ fabd(v14.V2S(), v27.V2S(), v11.V2S()); 2521 __ fabd(v9.V4S(), v22.V4S(), v18.V4S()); 2522 __ fabs(v1.V2D(), v29.V2D()); 2523 __ fabs(v6.V2S(), v21.V2S()); 2524 __ fabs(v12.V4S(), v25.V4S()); 2525 __ facge(v18.V2D(), v5.V2D(), v0.V2D()); 2526 __ facge(v15.V2S(), v11.V2S(), v6.V2S()); 2527 __ facge(v30.V4S(), v10.V4S(), v25.V4S()); 2528 __ facgt(v28.V2D(), v16.V2D(), v31.V2D()); 2529 __ facgt(v15.V2S(), v1.V2S(), v4.V2S()); 2530 __ facgt(v22.V4S(), v3.V4S(), v10.V4S()); 2531 __ fadd(v7.V2D(), v10.V2D(), v24.V2D()); 2532 __ fadd(v10.V2S(), v23.V2S(), v7.V2S()); 2533 __ fadd(v16.V4S(), v22.V4S(), v11.V4S()); 2534 __ faddp(d27, v28.V2D()); 2535 __ faddp(s20, v23.V2S()); 2536 __ faddp(v21.V2D(), v4.V2D(), v11.V2D()); 2537 __ faddp(v31.V2S(), v26.V2S(), v1.V2S()); 2538 __ faddp(v13.V4S(), v27.V4S(), v28.V4S()); 2539 __ fcmeq(v17.V2D(), v13.V2D(), v20.V2D()); 2540 __ fcmeq(v24.V2D(), v16.V2D(), 0.0); 2541 __ fcmeq(v26.V2S(), v17.V2S(), v10.V2S()); 2542 __ fcmeq(v24.V2S(), v4.V2S(), 0.0); 2543 __ fcmeq(v8.V4S(), v4.V4S(), v14.V4S()); 2544 __ fcmeq(v26.V4S(), v25.V4S(), 0.0); 2545 __ fcmge(v27.V2D(), v0.V2D(), v0.V2D()); 2546 __ fcmge(v22.V2D(), v30.V2D(), 0.0); 2547 __ fcmge(v7.V2S(), v21.V2S(), v25.V2S()); 2548 __ fcmge(v15.V2S(), v15.V2S(), 0.0); 2549 __ fcmge(v29.V4S(), v4.V4S(), v27.V4S()); 2550 __ fcmge(v22.V4S(), v21.V4S(), 0.0); 2551 __ fcmgt(v1.V2D(), v26.V2D(), v15.V2D()); 2552 __ fcmgt(v15.V2D(), v23.V2D(), 0.0); 2553 __ fcmgt(v21.V2S(), v16.V2S(), v6.V2S()); 2554 __ fcmgt(v1.V2S(), v13.V2S(), 0.0); 2555 __ fcmgt(v14.V4S(), v0.V4S(), v25.V4S()); 2556 __ fcmgt(v13.V4S(), v8.V4S(), 0.0); 2557 __ fcmle(v4.V2D(), v6.V2D(), 0.0); 2558 __ fcmle(v24.V2S(), v31.V2S(), 0.0); 2559 __ fcmle(v8.V4S(), v23.V4S(), 0.0); 2560 __ fcmlt(v7.V2D(), v3.V2D(), 0.0); 2561 __ fcmlt(v15.V2S(), v21.V2S(), 0.0); 2562 __ fcmlt(v1.V4S(), v2.V4S(), 0.0); 2563 __ fcvtas(v6.V2D(), v8.V2D()); 2564 __ fcvtas(v1.V2S(), v9.V2S()); 2565 __ fcvtas(v8.V4S(), v19.V4S()); 2566 __ fcvtau(v5.V2D(), v31.V2D()); 2567 __ fcvtau(v28.V2S(), v29.V2S()); 2568 __ fcvtau(v11.V4S(), v26.V4S()); 2569 __ fcvtl(v8.V2D(), v25.V2S()); 2570 __ fcvtl(v27.V4S(), v14.V4H()); 2571 __ fcvtl2(v1.V2D(), v6.V4S()); 2572 __ fcvtl2(v24.V4S(), v9.V8H()); 2573 __ fcvtms(v9.V2D(), v24.V2D()); 2574 __ fcvtms(v7.V2S(), v11.V2S()); 2575 __ fcvtms(v23.V4S(), v21.V4S()); 2576 __ fcvtmu(v13.V2D(), v1.V2D()); 2577 __ fcvtmu(v26.V2S(), v12.V2S()); 2578 __ fcvtmu(v21.V4S(), v21.V4S()); 2579 __ fcvtn(v11.V2S(), v1.V2D()); 2580 __ fcvtn(v8.V4H(), v2.V4S()); 2581 __ fcvtn2(v24.V4S(), v29.V2D()); 2582 __ fcvtn2(v4.V8H(), v10.V4S()); 2583 __ fcvtns(v25.V2D(), v10.V2D()); 2584 __ fcvtns(v4.V2S(), v8.V2S()); 2585 __ fcvtns(v29.V4S(), v27.V4S()); 2586 __ fcvtnu(v18.V2D(), v27.V2D()); 2587 __ fcvtnu(v11.V2S(), v14.V2S()); 2588 __ fcvtnu(v27.V4S(), v21.V4S()); 2589 __ fcvtps(v23.V2D(), v5.V2D()); 2590 __ fcvtps(v24.V2S(), v15.V2S()); 2591 __ fcvtps(v5.V4S(), v19.V4S()); 2592 __ fcvtpu(v3.V2D(), v21.V2D()); 2593 __ fcvtpu(v3.V2S(), v21.V2S()); 2594 __ fcvtpu(v0.V4S(), v7.V4S()); 2595 __ fcvtxn(v29.V2S(), v11.V2D()); 2596 __ fcvtxn2(v31.V4S(), v25.V2D()); 2597 __ fcvtzs(v19.V2D(), v17.V2D()); 2598 __ fcvtzs(v12.V2D(), v24.V2D(), 64); 2599 __ fcvtzs(v9.V2S(), v2.V2S()); 2600 __ fcvtzs(v5.V2S(), v20.V2S(), 29); 2601 __ fcvtzs(v21.V4S(), v25.V4S()); 2602 __ fcvtzs(v26.V4S(), v1.V4S(), 6); 2603 __ fcvtzu(v13.V2D(), v25.V2D()); 2604 __ fcvtzu(v28.V2D(), v13.V2D(), 32); 2605 __ fcvtzu(v26.V2S(), v6.V2S()); 2606 __ fcvtzu(v9.V2S(), v10.V2S(), 15); 2607 __ fcvtzu(v30.V4S(), v6.V4S()); 2608 __ fcvtzu(v19.V4S(), v22.V4S(), 18); 2609 __ fdiv(v15.V2D(), v8.V2D(), v15.V2D()); 2610 __ fdiv(v12.V2S(), v9.V2S(), v26.V2S()); 2611 __ fdiv(v19.V4S(), v22.V4S(), v19.V4S()); 2612 __ fmax(v19.V2D(), v7.V2D(), v8.V2D()); 2613 __ fmax(v25.V2S(), v12.V2S(), v29.V2S()); 2614 __ fmax(v6.V4S(), v15.V4S(), v5.V4S()); 2615 __ fmaxnm(v16.V2D(), v8.V2D(), v20.V2D()); 2616 __ fmaxnm(v15.V2S(), v26.V2S(), v25.V2S()); 2617 __ fmaxnm(v23.V4S(), v14.V4S(), v16.V4S()); 2618 __ fmaxnmp(d6, v19.V2D()); 2619 __ fmaxnmp(s27, v26.V2S()); 2620 __ fmaxnmp(v8.V2D(), v12.V2D(), v23.V2D()); 2621 __ fmaxnmp(v13.V2S(), v25.V2S(), v22.V2S()); 2622 __ fmaxnmp(v15.V4S(), v11.V4S(), v17.V4S()); 2623 __ fmaxnmv(s27, v19.V4S()); 2624 __ fmaxp(d20, v14.V2D()); 2625 __ fmaxp(s18, v2.V2S()); 2626 __ fmaxp(v9.V2D(), v23.V2D(), v31.V2D()); 2627 __ fmaxp(v7.V2S(), v22.V2S(), v31.V2S()); 2628 __ fmaxp(v18.V4S(), v7.V4S(), v29.V4S()); 2629 __ fmaxv(s31, v29.V4S()); 2630 __ fmin(v2.V2D(), v5.V2D(), v2.V2D()); 2631 __ fmin(v31.V2S(), v17.V2S(), v10.V2S()); 2632 __ fmin(v10.V4S(), v4.V4S(), v16.V4S()); 2633 __ fminnm(v21.V2D(), v6.V2D(), v5.V2D()); 2634 __ fminnm(v22.V2S(), v18.V2S(), v14.V2S()); 2635 __ fminnm(v25.V4S(), v31.V4S(), v3.V4S()); 2636 __ fminnmp(d9, v1.V2D()); 2637 __ fminnmp(s21, v20.V2S()); 2638 __ fminnmp(v16.V2D(), v21.V2D(), v19.V2D()); 2639 __ fminnmp(v16.V2S(), v31.V2S(), v25.V2S()); 2640 __ fminnmp(v26.V4S(), v16.V4S(), v15.V4S()); 2641 __ fminnmv(s3, v4.V4S()); 2642 __ fminp(d24, v26.V2D()); 2643 __ fminp(s7, v17.V2S()); 2644 __ fminp(v23.V2D(), v19.V2D(), v3.V2D()); 2645 __ fminp(v29.V2S(), v21.V2S(), v9.V2S()); 2646 __ fminp(v0.V4S(), v24.V4S(), v21.V4S()); 2647 __ fminv(s25, v8.V4S()); 2648 __ fmla(d23, d0, v9.D(), 1); 2649 __ fmla(s23, s15, v7.S(), 0); 2650 __ fmla(v17.V2D(), v11.V2D(), v6.V2D()); 2651 __ fmla(v30.V2D(), v30.V2D(), v11.D(), 0); 2652 __ fmla(v19.V2S(), v12.V2S(), v6.V2S()); 2653 __ fmla(v24.V2S(), v17.V2S(), v9.S(), 0); 2654 __ fmla(v16.V4S(), v11.V4S(), v11.V4S()); 2655 __ fmla(v27.V4S(), v23.V4S(), v9.S(), 2); 2656 __ fmls(d27, d30, v6.D(), 0); 2657 __ fmls(s21, s16, v2.S(), 0); 2658 __ fmls(v5.V2D(), v19.V2D(), v21.V2D()); 2659 __ fmls(v18.V2D(), v30.V2D(), v12.D(), 0); 2660 __ fmls(v5.V2S(), v16.V2S(), v7.V2S()); 2661 __ fmls(v3.V2S(), v18.V2S(), v11.S(), 1); 2662 __ fmls(v27.V4S(), v5.V4S(), v30.V4S()); 2663 __ fmls(v26.V4S(), v20.V4S(), v4.S(), 3); 2664 __ fmov(v14.V2D(), -0.34375); 2665 __ fmov(v26.V2S(), 0.90625f); 2666 __ fmov(v31.V4S(), -5.0000f); 2667 __ fmov(v28.D(), 1, x25); 2668 __ fmov(x18, v2.D(), 1); 2669 __ fmul(d12, d4, v1.D(), 1); 2670 __ fmul(s30, s1, v15.S(), 3); 2671 __ fmul(v25.V2D(), v0.V2D(), v21.V2D()); 2672 __ fmul(v10.V2D(), v24.V2D(), v10.D(), 1); 2673 __ fmul(v7.V2S(), v24.V2S(), v16.V2S()); 2674 __ fmul(v1.V2S(), v16.V2S(), v4.S(), 2); 2675 __ fmul(v5.V4S(), v28.V4S(), v25.V4S()); 2676 __ fmul(v11.V4S(), v3.V4S(), v8.S(), 0); 2677 __ fmulx(d28, d9, v3.D(), 1); 2678 __ fmulx(s25, s21, v15.S(), 1); 2679 __ fmulx(v31.V2D(), v28.V2D(), v8.V2D()); 2680 __ fmulx(v3.V2D(), v21.V2D(), v6.D(), 0); 2681 __ fmulx(v9.V2S(), v1.V2S(), v0.V2S()); 2682 __ fmulx(v16.V2S(), v27.V2S(), v6.S(), 0); 2683 __ fmulx(v2.V4S(), v4.V4S(), v5.V4S()); 2684 __ fmulx(v18.V4S(), v7.V4S(), v4.S(), 0); 2685 __ fneg(v1.V2D(), v25.V2D()); 2686 __ fneg(v14.V2S(), v31.V2S()); 2687 __ fneg(v5.V4S(), v4.V4S()); 2688 __ frecpe(v18.V2D(), v12.V2D()); 2689 __ frecpe(v10.V2S(), v22.V2S()); 2690 __ frecpe(v5.V4S(), v6.V4S()); 2691 __ frecps(v22.V2D(), v7.V2D(), v26.V2D()); 2692 __ frecps(v31.V2S(), v27.V2S(), v2.V2S()); 2693 __ frecps(v18.V4S(), v6.V4S(), v27.V4S()); 2694 __ frinta(v26.V2D(), v13.V2D()); 2695 __ frinta(v15.V2S(), v26.V2S()); 2696 __ frinta(v13.V4S(), v16.V4S()); 2697 __ frinti(v9.V2D(), v12.V2D()); 2698 __ frinti(v5.V2S(), v19.V2S()); 2699 __ frinti(v15.V4S(), v11.V4S()); 2700 __ frintm(v17.V2D(), v29.V2D()); 2701 __ frintm(v30.V2S(), v11.V2S()); 2702 __ frintm(v1.V4S(), v20.V4S()); 2703 __ frintn(v24.V2D(), v6.V2D()); 2704 __ frintn(v12.V2S(), v17.V2S()); 2705 __ frintn(v29.V4S(), v11.V4S()); 2706 __ frintp(v10.V2D(), v7.V2D()); 2707 __ frintp(v12.V2S(), v18.V2S()); 2708 __ frintp(v26.V4S(), v31.V4S()); 2709 __ frintx(v24.V2D(), v13.V2D()); 2710 __ frintx(v7.V2S(), v9.V2S()); 2711 __ frintx(v18.V4S(), v21.V4S()); 2712 __ frintz(v19.V2D(), v25.V2D()); 2713 __ frintz(v15.V2S(), v8.V2S()); 2714 __ frintz(v20.V4S(), v3.V4S()); 2715 __ frsqrte(v23.V2D(), v5.V2D()); 2716 __ frsqrte(v9.V2S(), v7.V2S()); 2717 __ frsqrte(v3.V4S(), v9.V4S()); 2718 __ frsqrts(v25.V2D(), v28.V2D(), v15.V2D()); 2719 __ frsqrts(v9.V2S(), v26.V2S(), v10.V2S()); 2720 __ frsqrts(v5.V4S(), v1.V4S(), v10.V4S()); 2721 __ fsqrt(v6.V2D(), v18.V2D()); 2722 __ fsqrt(v6.V2S(), v18.V2S()); 2723 __ fsqrt(v0.V4S(), v31.V4S()); 2724 __ fsub(v31.V2D(), v30.V2D(), v31.V2D()); 2725 __ fsub(v11.V2S(), v8.V2S(), v6.V2S()); 2726 __ fsub(v16.V4S(), v0.V4S(), v31.V4S()); 2727 __ scvtf(v25.V2D(), v31.V2D()); 2728 __ scvtf(v10.V2D(), v13.V2D(), 45); 2729 __ scvtf(v10.V2S(), v15.V2S()); 2730 __ scvtf(v18.V2S(), v4.V2S(), 27); 2731 __ scvtf(v17.V4S(), v5.V4S()); 2732 __ scvtf(v11.V4S(), v25.V4S(), 24); 2733 __ ucvtf(v9.V2D(), v3.V2D()); 2734 __ ucvtf(v26.V2D(), v30.V2D(), 46); 2735 __ ucvtf(v11.V2S(), v4.V2S()); 2736 __ ucvtf(v29.V2S(), v3.V2S(), 25); 2737 __ ucvtf(v22.V4S(), v23.V4S()); 2738 __ ucvtf(v18.V4S(), v9.V4S(), 25); 2739} 2740 2741 2742static void MaskAddresses(const char* trace) { 2743// Hexadecimal expressions of the form `\xab` do not work out-of-the box with 2744// BSD `sed`. So we use ANSI-C quoting to have the regular expressions below 2745// work both on Linux and BSD (and macOS). 2746#ifdef __APPLE__ 2747#define MAYBE_ANSI_C_QUOTE "$" 2748#define HEX(val) "\\x" #val 2749#define ESCAPE(c) "\\\\" #c 2750 const char* sed_options = "-i \"\" -E"; 2751#else 2752#define MAYBE_ANSI_C_QUOTE 2753#define HEX(val) "\\x" #val 2754#define ESCAPE(c) "\\" #c 2755 const char* sed_options = "--in-place --regexp-extended"; 2756#endif 2757#define COLOUR "(" HEX(1b) ESCAPE([) "[01];([0-9][0-9])?m)?" 2758 struct { 2759 const char* search; 2760 const char* replace; 2761 } patterns[] = 2762 {// Mask registers that hold addresses that change from run to run. 2763 {"((x0|x1|x2|sp): " COLOUR "0x)[0-9a-f]{16}", 2764 ESCAPE(1) "~~~~~~~~~~~~~~~~"}, 2765 // Mask accessed memory addresses. 2766 {"((<-|->) " COLOUR "0x)[0-9a-f]{16}", ESCAPE(1) "~~~~~~~~~~~~~~~~"}, 2767 // Mask instruction addresses. 2768 {"^0x[0-9a-f]{16}", "0x~~~~~~~~~~~~~~~~"}, 2769 // Mask branch targets. 2770 {"(Branch" COLOUR " to 0x)[0-9a-f]{16}", ESCAPE(1) "~~~~~~~~~~~~~~~~"}, 2771 {"addr 0x[0-9a-f]+", "addr 0x~~~~~~~~~~~~~~~~"}}; 2772 const size_t patterns_length = sizeof(patterns) / sizeof(patterns[0]); 2773 // Rewrite `trace`, masking addresses and other values that legitimately vary 2774 // from run to run. 2775 char command[1024]; 2776 for (size_t i = 0; i < patterns_length; i++) { 2777 size_t length = snprintf(command, 2778 sizeof(command), 2779 "sed %s " MAYBE_ANSI_C_QUOTE "'s/%s/%s/' '%s'", 2780 sed_options, 2781 patterns[i].search, 2782 patterns[i].replace, 2783 trace); 2784 VIXL_CHECK(length < sizeof(command)); 2785 VIXL_CHECK(system(command) == 0); 2786 } 2787} 2788 2789 2790static void TraceTestHelper(bool coloured_trace, 2791 TraceParameters trace_parameters, 2792 const char* ref_file) { 2793 MacroAssembler masm(12 * KBytes); 2794 2795 char trace_stream_filename[] = "/tmp/vixl-test-trace-XXXXXX"; 2796 FILE* trace_stream = fdopen(mkstemp(trace_stream_filename), "w"); 2797 2798 Decoder decoder; 2799 Simulator simulator(&decoder, trace_stream); 2800 simulator.SetColouredTrace(coloured_trace); 2801 simulator.SetTraceParameters(trace_parameters); 2802 simulator.SilenceExclusiveAccessWarning(); 2803 2804 // Set up a scratch buffer so we can test loads and stores. 2805 const int kScratchSize = 64 * KBytes; 2806 const int kScratchGuardSize = 128; 2807 char scratch_buffer[kScratchSize + kScratchGuardSize]; 2808 for (size_t i = 0; i < (sizeof(scratch_buffer) / sizeof(scratch_buffer[0])); 2809 i++) { 2810 scratch_buffer[i] = i & 0xff; 2811 } 2812 // Used for offset addressing. 2813 simulator.WriteRegister(0, scratch_buffer); 2814 // Used for pre-/post-index addressing. 2815 simulator.WriteRegister(1, scratch_buffer); 2816 2817 const int kPostIndexRegisterStep = 13; // Arbitrary interesting value. 2818 // Used for post-index offsets. 2819 simulator.WriteRegister(2, kPostIndexRegisterStep); 2820 2821 // Initialize the other registers with unique values. 2822 uint64_t initial_base_u64 = 0x0100001000100101; 2823 for (unsigned i = 3; i < kNumberOfRegisters; i++) { 2824 if (i == kLinkRegCode) continue; 2825 if (i == kZeroRegCode) continue; 2826 // NoRegLog suppresses the log now, but the registers will still be logged 2827 // before the first instruction is executed since they have been written but 2828 // not printed. 2829 simulator.WriteRegister(i, initial_base_u64 * i, Simulator::NoRegLog); 2830 } 2831 float initial_base_f32 = 1.2345f; 2832 double initial_base_f64 = 1.3456f; 2833 for (unsigned i = 0; i < kNumberOfVRegisters; i++) { 2834 // Try to initialise V registers with reasonable FP values. 2835 uint64_t low = (DoubleToRawbits(initial_base_f64 * i) & ~kSRegMask) | 2836 FloatToRawbits(initial_base_f32 * i); 2837 uint64_t high = low ^ 0x0005555500555555; 2838 LogicVRegister reg(simulator.ReadVRegister(i)); 2839 reg.SetUint(kFormat2D, 0, low); 2840 reg.SetUint(kFormat2D, 1, high); 2841 } 2842 2843 GenerateTestSequenceBase(&masm); 2844 GenerateTestSequenceFP(&masm); 2845 GenerateTestSequenceNEON(&masm); 2846 GenerateTestSequenceNEONFP(&masm); 2847 masm.Ret(); 2848 masm.FinalizeCode(); 2849 2850 simulator.RunFrom(masm.GetBuffer()->GetStartAddress<Instruction*>()); 2851 2852 fclose(trace_stream); 2853 MaskAddresses(trace_stream_filename); 2854 2855 bool trace_matched_reference; 2856 if (Test::generate_test_trace()) { 2857 // Copy trace_stream to stdout. 2858 trace_stream = fopen(trace_stream_filename, "r"); 2859 VIXL_ASSERT(trace_stream != NULL); 2860 fseek(trace_stream, 0, SEEK_SET); 2861 int c; 2862 while (1) { 2863 c = getc(trace_stream); 2864 if (c == EOF) break; 2865 putc(c, stdout); 2866 } 2867 fclose(trace_stream); 2868 trace_matched_reference = true; 2869 } else { 2870 // Check trace_stream against ref_file. 2871 char command[1024]; 2872 size_t length = snprintf(command, 2873 sizeof(command), 2874 "diff -u %s %s", 2875 ref_file, 2876 trace_stream_filename); 2877 VIXL_CHECK(length < sizeof(command)); 2878 trace_matched_reference = (system(command) == 0); 2879 } 2880 2881 uint64_t offset_base = simulator.ReadRegister<uint64_t>(0); 2882 uint64_t index_base = simulator.ReadRegister<uint64_t>(1); 2883 2884 // Clean up before checking the result; VIXL_CHECK aborts. 2885 remove(trace_stream_filename); 2886 2887 VIXL_CHECK(trace_matched_reference); 2888 VIXL_CHECK(index_base >= offset_base); 2889 VIXL_CHECK((index_base - offset_base) <= kScratchSize); 2890} 2891 2892 2893#define REF(name) "test/test-trace-reference/" name 2894 2895// Test individual options. 2896TEST(disasm) { TraceTestHelper(false, LOG_DISASM, REF("log-disasm")); } 2897TEST(regs) { TraceTestHelper(false, LOG_REGS, REF("log-regs")); } 2898TEST(vregs) { TraceTestHelper(false, LOG_VREGS, REF("log-vregs")); } 2899TEST(sysregs) { TraceTestHelper(false, LOG_SYSREGS, REF("log-sysregs")); } 2900TEST(write) { TraceTestHelper(false, LOG_WRITE, REF("log-write")); } 2901TEST(branch) { TraceTestHelper(false, LOG_WRITE, REF("log-branch")); } 2902 2903// Test standard combinations. 2904TEST(none) { TraceTestHelper(false, LOG_NONE, REF("log-none")); } 2905TEST(state) { TraceTestHelper(false, LOG_STATE, REF("log-state")); } 2906TEST(all) { TraceTestHelper(false, LOG_ALL, REF("log-all")); } 2907 2908 2909// Test individual options (with colour). 2910TEST(disasm_colour) { 2911 TraceTestHelper(true, LOG_DISASM, REF("log-disasm-colour")); 2912} 2913TEST(regs_colour) { TraceTestHelper(true, LOG_REGS, REF("log-regs-colour")); } 2914TEST(vregs_colour) { 2915 TraceTestHelper(true, LOG_VREGS, REF("log-vregs-colour")); 2916} 2917TEST(sysregs_colour) { 2918 TraceTestHelper(true, LOG_SYSREGS, REF("log-sysregs-colour")); 2919} 2920TEST(write_colour) { 2921 TraceTestHelper(true, LOG_WRITE, REF("log-write-colour")); 2922} 2923TEST(branch_colour) { 2924 TraceTestHelper(true, LOG_WRITE, REF("log-branch-colour")); 2925} 2926 2927// Test standard combinations (with colour). 2928TEST(none_colour) { TraceTestHelper(true, LOG_NONE, REF("log-none-colour")); } 2929TEST(state_colour) { 2930 TraceTestHelper(true, LOG_STATE, REF("log-state-colour")); 2931} 2932TEST(all_colour) { TraceTestHelper(true, LOG_ALL, REF("log-all-colour")); } 2933 2934 2935#endif // VIXL_INCLUDE_SIMULATOR_AARCH64 2936} // namespace aarch64 2937} // namespace vixl 2938