Searched refs:kInstructionSize (Results 1 - 25 of 39) sorted by relevance

12

/external/vixl/src/aarch64/
H A Dsimulator-constants-aarch64.h86 const unsigned kPrintfArgCountOffset = 1 * kInstructionSize;
87 const unsigned kPrintfArgPatternListOffset = 2 * kInstructionSize;
88 const unsigned kPrintfLength = 3 * kInstructionSize;
110 const unsigned kTraceParamsOffset = 1 * kInstructionSize;
111 const unsigned kTraceCommandOffset = 2 * kInstructionSize;
112 const unsigned kTraceLength = 3 * kInstructionSize;
140 const unsigned kLogParamsOffset = 1 * kInstructionSize;
141 const unsigned kLogLength = 2 * kInstructionSize;
146 const unsigned kRuntimeCallWrapperOffset = 1 * kInstructionSize;
H A Dmacro-assembler-aarch64.cc122 if (option == kBranchRequired) emit_size += kInstructionSize;
125 VIXL_ASSERT(emit_size % kInstructionSize == 0);
136 ExactAssemblyScopeWithoutPoolsCheck guard(masm_, kInstructionSize);
143 ExactAssemblyScopeWithoutPoolsCheck guard(masm_, kInstructionSize);
271 ExactAssemblyScopeWithoutPoolsCheck guard(masm_, kInstructionSize);
297 ExactAssemblyScopeWithoutPoolsCheck guard(masm_, kInstructionSize);
595 EmissionCheckScope guard(this, 2 * kInstructionSize);
621 EmissionCheckScope guard(this, 2 * kInstructionSize);
647 EmissionCheckScope guard(this, 2 * kInstructionSize);
672 EmissionCheckScope guard(this, 2 * kInstructionSize);
[all...]
H A Dinstructions-aarch64.cc229 VIXL_ASSERT((LSSize_offset + LSSize_width) == (kInstructionSize * 8));
279 return encoded_max * kInstructionSize;
305 offset = GetImmBranch() * static_cast<int>(kInstructionSize);
H A Dinstructions-aarch64.h40 const unsigned kInstructionSize = 4; member in namespace:vixl::aarch64
408 // target [instr - range - kInstructionSize, instr + range].
411 (1 << kLoadLiteralImmBitwidth) / 2 - kInstructionSize;
467 return this + kInstructionSize;
H A Ddebugger-aarch64.cc561 from -= (count - 1) * kInstructionSize;
563 const Instruction* to = from + count * kInstructionSize;
1264 debugger->WritePc(debugger->ReadPc() + steps * kInstructionSize);
/external/vixl/test/aarch64/
H A Dtest-fuzz-aarch64.cc50 Instruction buffer[kInstructionSize];
69 Instruction buffer[kInstructionSize];
91 Instruction buffer[kInstructionSize];
107 Instruction buffer[kInstructionSize];
H A Dtest-assembler-aarch64.cc265 instruction += kInstructionSize; \
291 VIXL_CHECK((expected + kInstructionSize) == (masm.GetLiteralPoolSize()))
571 ExactAssemblyScope scope(&masm, 3 * kInstructionSize);
2042 VIXL_ASSERT((offset_into_page % kInstructionSize) == 0);
2076 for (size_t i = 2; i < (kPageSize / kInstructionSize); i += 2) {
2087 for (size_t i = 0; i < (kPageSize / kInstructionSize);) {
2088 if (i++ == (offset_into_page / kInstructionSize)) __ bind(&test);
2090 if (i++ == (offset_into_page / kInstructionSize)) __ bind(&test);
2094 for (size_t i = 0; i < (kPageSize / kInstructionSize); i += 2) {
2121 AdrpPageBoundaryHelper(kInstructionSize *
2041 VIXL_ASSERT(offset_into_page < kPageSize); VIXL_ASSERT((offset_into_page % kInstructionSize) == 0); const uintptr_t kPageOffsetMask = kPageSize - 1; const int kStartPage = -16; const int kEndPage = 16; const int kMaxCodeSize = (kEndPage - kStartPage + 2) * kPageSize; SETUP_CUSTOM(kMaxCodeSize, PageOffsetDependentCode); START(); Label test; Label start; { ExactAssemblyScope scope(&masm, kMaxCodeSize, ExactAssemblyScope::kMaximumSize); __ cmp(wzr, wzr); while ((masm.GetCursorAddress<uintptr_t>() & kPageOffsetMask) != 0) { __ b(&start); } VIXL_STATIC_ASSERT(kStartPage < 0); { ExactAssemblyScope scope_page(&masm, kPageSize); __ bind(&start); __ adrp(x0, &test); __ adrp(x1, &test); for (size_t i = 2; i < (kPageSize / kInstructionSize); i += 2) { __ ccmp(x0, x1, NoFlag, eq); __ adrp(x1, &test); } } VIXL_STATIC_ASSERT(kEndPage >= 0); for (int page = (kStartPage + 1); page <= kEndPage; page++) { ExactAssemblyScope scope_page(&masm, kPageSize); if (page == 0) { for (size_t i = 0; i < (kPageSize / kInstructionSize);) { if (i++ == (offset_into_page / kInstructionSize)) __ bind(&test); __ ccmp(x0, x1, NoFlag, eq); if (i++ == (offset_into_page / kInstructionSize)) __ bind(&test); __ adrp(x1, &test); } } else { for (size_t i = 0; i < (kPageSize / kInstructionSize); i += 2) { __ ccmp(x0, x1, NoFlag, eq); __ adrp(x1, &test); } } } } END(); RUN_CUSTOM(); uintptr_t expected = AlignDown(masm.GetLabelAddress<uintptr_t>(&test), kPageSize); ASSERT_EQUAL_64(expected, x0); ASSERT_EQUAL_64(expected, x1); ASSERT_EQUAL_NZCV(ZCFlag); TEARDOWN_CUSTOM(); } TEST(adrp_page_boundaries) { VIXL_STATIC_ASSERT(kPageSize == 4096); AdrpPageBoundaryHelper(kInstructionSize * 0); AdrpPageBoundaryHelper(kInstructionSize * 1); AdrpPageBoundaryHelper(kInstructionSize * 512); AdrpPageBoundaryHelper(kInstructionSize * 1022); AdrpPageBoundaryHelper(kInstructionSize * 1023); } static void AdrpOffsetHelper(int64_t offset) { const size_t kPageOffsetMask = kPageSize - 1; const int kMaxCodeSize = 2 * kPageSize; SETUP_CUSTOM(kMaxCodeSize, PageOffsetDependentCode); START(); Label page; { ExactAssemblyScope scope(&masm, kMaxCodeSize, ExactAssemblyScope::kMaximumSize); __ cmp(wzr, wzr); while ((masm.GetCursorAddress<uintptr_t>() & kPageOffsetMask) != 0) { __ b(&page); } __ bind(&page); { ExactAssemblyScope scope_page(&masm, kPageSize); __ adrp(x0, offset); __ adrp(x1, offset); for (size_t i = 2; i < kPageSize / kInstructionSize; i += 2) { __ ccmp(x0, x1, NoFlag, eq); __ adrp(x1, offset); } } } END(); RUN_CUSTOM(); uintptr_t expected = masm.GetLabelAddress<uintptr_t>(&page) + (kPageSize * offset); ASSERT_EQUAL_64(expected, x0); ASSERT_EQUAL_64(expected, x1); ASSERT_EQUAL_NZCV(ZCFlag); TEARDOWN_CUSTOM(); } TEST(adrp_offset) { AdrpOffsetHelper(0); AdrpOffsetHelper(1); AdrpOffsetHelper(-1); AdrpOffsetHelper(4); AdrpOffsetHelper(-4); AdrpOffsetHelper(0x000fffff); AdrpOffsetHelper(-0x000fffff); AdrpOffsetHelper(-0x00100000); } TEST(branch_cond) { SETUP(); Label done, wrong; START(); __ Mov(x0, 0x1); __ Mov(x1, 0x1); __ Mov(x2, 0x8000000000000000); __ Cmp(x1, 0); __ B(&wrong, eq); __ B(&wrong, lo); __ B(&wrong, mi); __ B(&wrong, vs); __ B(&wrong, ls); __ B(&wrong, lt); __ B(&wrong, le); Label ok_1; __ B(&ok_1, ne); __ Mov(x0, 0x0); __ Bind(&ok_1); __ Cmp(x1, 1); __ B(&wrong, ne); __ B(&wrong, lo); __ B(&wrong, mi); __ B(&wrong, vs); __ B(&wrong, hi); __ B(&wrong, lt); __ B(&wrong, gt); Label ok_2; __ B(&ok_2, pl); __ Mov(x0, 0x0); __ Bind(&ok_2); __ Cmp(x1, 2); __ B(&wrong, eq); __ B(&wrong, hs); __ B(&wrong, pl); __ B(&wrong, vs); __ B(&wrong, hi); __ B(&wrong, ge); __ B(&wrong, gt); Label ok_3; __ B(&ok_3, vc); __ Mov(x0, 0x0); __ Bind(&ok_3); __ Cmp(x2, 1); __ B(&wrong, eq); __ B(&wrong, lo); __ B(&wrong, mi); __ B(&wrong, vc); __ B(&wrong, ls); __ B(&wrong, ge); __ B(&wrong, gt); Label ok_4; __ B(&ok_4, le); __ Mov(x0, 0x0); __ Bind(&ok_4); Label ok_5; { ExactAssemblyScope scope(&masm, kInstructionSize); __ b(&ok_5, al); } __ Mov(x0, 0x0); __ Bind(&ok_5); Label ok_6; { ExactAssemblyScope scope(&masm, kInstructionSize); __ b(&ok_6, nv); } __ Mov(x0, 0x0); __ Bind(&ok_6); __ B(&done); __ Bind(&wrong); __ Mov(x0, 0x0); __ Bind(&done); END(); RUN(); ASSERT_EQUAL_64(0x1, x0); TEARDOWN(); } TEST(branch_to_reg) { SETUP(); Label fn1, after_fn1; START(); __ Mov(x29, lr); __ Mov(x1, 0); __ B(&after_fn1); __ Bind(&fn1); __ Mov(x0, lr); __ Mov(x1, 42); __ Br(x0); __ Bind(&after_fn1); __ Bl(&fn1); Label fn2, after_fn2; __ Mov(x2, 0); __ B(&after_fn2); __ Bind(&fn2); __ Mov(x0, lr); __ Mov(x2, 84); __ Blr(x0); __ Bind(&after_fn2); __ Bl(&fn2); __ Mov(x3, lr); __ Mov(lr, x29); END(); RUN(); ASSERT_EQUAL_64(core.xreg(3) + kInstructionSize, x0); ASSERT_EQUAL_64(42, x1); ASSERT_EQUAL_64(84, x2); TEARDOWN(); } TEST(compare_branch) { SETUP(); START(); __ Mov(x0, 0); __ Mov(x1, 0); __ Mov(x2, 0); __ Mov(x3, 0); __ Mov(x4, 0); __ Mov(x5, 0); __ Mov(x16, 0); __ Mov(x17, 42); Label zt, zt_end; __ Cbz(w16, &zt); __ B(&zt_end); __ Bind(&zt); __ Mov(x0, 1); __ Bind(&zt_end); Label zf, zf_end; __ Cbz(x17, &zf); __ B(&zf_end); __ Bind(&zf); __ Mov(x1, 1); __ Bind(&zf_end); Label nzt, nzt_end; __ Cbnz(w17, &nzt); __ B(&nzt_end); __ Bind(&nzt); __ Mov(x2, 1); __ Bind(&nzt_end); Label nzf, nzf_end; __ Cbnz(x16, &nzf); __ B(&nzf_end); __ Bind(&nzf); __ Mov(x3, 1); __ Bind(&nzf_end); __ Mov(x18, 0xffffffff00000000); Label a, a_end; __ Cbz(w18, &a); __ B(&a_end); __ Bind(&a); __ Mov(x4, 1); __ Bind(&a_end); Label b, b_end; __ Cbnz(w18, &b); __ B(&b_end); __ Bind(&b); __ Mov(x5, 1); __ Bind(&b_end); END(); RUN(); ASSERT_EQUAL_64(1, x0); ASSERT_EQUAL_64(0, x1); ASSERT_EQUAL_64(1, x2); ASSERT_EQUAL_64(0, x3); ASSERT_EQUAL_64(1, x4); ASSERT_EQUAL_64(0, x5); TEARDOWN(); } TEST(test_branch) { SETUP(); START(); __ Mov(x0, 0); __ Mov(x1, 0); __ Mov(x2, 0); __ Mov(x3, 0); __ Mov(x16, 0xaaaaaaaaaaaaaaaa); Label bz, bz_end; __ Tbz(w16, 0, &bz); __ B(&bz_end); __ Bind(&bz); __ Mov(x0, 1); __ Bind(&bz_end); Label bo, bo_end; __ Tbz(x16, 63, &bo); __ B(&bo_end); __ Bind(&bo); __ Mov(x1, 1); __ Bind(&bo_end); Label nbz, nbz_end; __ Tbnz(x16, 61, &nbz); __ B(&nbz_end); __ Bind(&nbz); __ Mov(x2, 1); __ Bind(&nbz_end); Label nbo, nbo_end; __ Tbnz(w16, 2, &nbo); __ B(&nbo_end); __ Bind(&nbo); __ Mov(x3, 1); __ Bind(&nbo_end); END(); RUN(); ASSERT_EQUAL_64(1, x0); ASSERT_EQUAL_64(0, x1); ASSERT_EQUAL_64(1, x2); ASSERT_EQUAL_64(0, x3); TEARDOWN(); } TEST(branch_type) { SETUP(); Label fail, done; START(); __ Mov(x0, 0x0); __ Mov(x10, 0x7); __ Mov(x11, 0x0); __ Cmp(x10, 0x7); __ B(&fail, ne); __ B(&fail, never); __ B(&fail, reg_zero, x10); __ B(&fail, reg_not_zero, x11); __ B(&fail, reg_bit_clear, x10, 0); __ B(&fail, reg_bit_set, x10, 3); Label l1, l2, l3, l4, l5; __ Cmp(x10, 0x7); __ B(&l1, eq); __ B(&fail); __ Bind(&l1); __ B(&l2, always); __ B(&fail); __ Bind(&l2); __ B(&l3, reg_not_zero, x10); __ B(&fail); __ Bind(&l3); __ B(&l4, reg_bit_clear, x10, 15); __ B(&fail); __ Bind(&l4); __ B(&l5, reg_bit_set, x10, 1); __ B(&fail); __ Bind(&l5); __ B(&done); __ Bind(&fail); __ Mov(x0, 0x1); __ Bind(&done); END(); RUN(); ASSERT_EQUAL_64(0x0, x0); TEARDOWN(); } TEST(ldr_str_offset) { SETUP(); uint64_t src[2] = {0xfedcba9876543210, 0x0123456789abcdef}; uint64_t dst[5] = {0, 0, 0, 0, 0}; uintptr_t src_base = reinterpret_cast<uintptr_t>(src); uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); START(); __ Mov(x17, src_base); __ Mov(x18, dst_base); __ Ldr(w0, MemOperand(x17)); __ Str(w0, MemOperand(x18)); __ Ldr(w1, MemOperand(x17, 4)); __ Str(w1, MemOperand(x18, 12)); __ Ldr(x2, MemOperand(x17, 8)); __ Str(x2, MemOperand(x18, 16)); __ Ldrb(w3, MemOperand(x17, 1)); __ Strb(w3, MemOperand(x18, 25)); __ Ldrh(w4, MemOperand(x17, 2)); __ Strh(w4, MemOperand(x18, 33)); END(); RUN(); ASSERT_EQUAL_64(0x76543210, x0); ASSERT_EQUAL_64(0x76543210, dst[0]); ASSERT_EQUAL_64(0xfedcba98, x1); ASSERT_EQUAL_64(0xfedcba9800000000, dst[1]); ASSERT_EQUAL_64(0x0123456789abcdef, x2); ASSERT_EQUAL_64(0x0123456789abcdef, dst[2]); ASSERT_EQUAL_64(0x32, x3); ASSERT_EQUAL_64(0x3200, dst[3]); ASSERT_EQUAL_64(0x7654, x4); ASSERT_EQUAL_64(0x765400, dst[4]); ASSERT_EQUAL_64(src_base, x17); ASSERT_EQUAL_64(dst_base, x18); TEARDOWN(); } TEST(ldr_str_wide) { SETUP(); uint32_t src[8192]; uint32_t dst[8192]; uintptr_t src_base = reinterpret_cast<uintptr_t>(src); uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); memset(src, 0xaa, 8192 * sizeof(src[0])); memset(dst, 0xaa, 8192 * sizeof(dst[0])); src[0] = 0; src[6144] = 6144; src[8191] = 8191; START(); __ Mov(x22, src_base); __ Mov(x23, dst_base); __ Mov(x24, src_base); __ Mov(x25, dst_base); __ Mov(x26, src_base); __ Mov(x27, dst_base); __ Ldr(w0, MemOperand(x22, 8191 * sizeof(src[0]))); __ Str(w0, MemOperand(x23, 8191 * sizeof(dst[0]))); __ Ldr(w1, MemOperand(x24, 4096 * sizeof(src[0]), PostIndex)); __ Str(w1, MemOperand(x25, 4096 * sizeof(dst[0]), PostIndex)); __ Ldr(w2, MemOperand(x26, 6144 * sizeof(src[0]), PreIndex)); __ Str(w2, MemOperand(x27, 6144 * sizeof(dst[0]), PreIndex)); END(); RUN(); ASSERT_EQUAL_32(8191, w0); ASSERT_EQUAL_32(8191, dst[8191]); ASSERT_EQUAL_64(src_base, x22); ASSERT_EQUAL_64(dst_base, x23); ASSERT_EQUAL_32(0, w1); ASSERT_EQUAL_32(0, dst[0]); ASSERT_EQUAL_64(src_base + 4096 * sizeof(src[0]), x24); ASSERT_EQUAL_64(dst_base + 4096 * sizeof(dst[0]), x25); ASSERT_EQUAL_32(6144, w2); ASSERT_EQUAL_32(6144, dst[6144]); ASSERT_EQUAL_64(src_base + 6144 * sizeof(src[0]), x26); ASSERT_EQUAL_64(dst_base + 6144 * sizeof(dst[0]), x27); TEARDOWN(); } TEST(ldr_str_preindex) { SETUP(); uint64_t src[2] = {0xfedcba9876543210, 0x0123456789abcdef}; uint64_t dst[6] = {0, 0, 0, 0, 0, 0}; uintptr_t src_base = reinterpret_cast<uintptr_t>(src); uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); START(); __ Mov(x17, src_base); __ Mov(x18, dst_base); __ Mov(x19, src_base); __ Mov(x20, dst_base); __ Mov(x21, src_base + 16); __ Mov(x22, dst_base + 40); __ Mov(x23, src_base); __ Mov(x24, dst_base); __ Mov(x25, src_base); __ Mov(x26, dst_base); __ Ldr(w0, MemOperand(x17, 4, PreIndex)); __ Str(w0, MemOperand(x18, 12, PreIndex)); __ Ldr(x1, MemOperand(x19, 8, PreIndex)); __ Str(x1, MemOperand(x20, 16, PreIndex)); __ Ldr(w2, MemOperand(x21, -4, PreIndex)); __ Str(w2, MemOperand(x22, -4, PreIndex)); __ Ldrb(w3, MemOperand(x23, 1, PreIndex)); __ Strb(w3, MemOperand(x24, 25, PreIndex)); __ Ldrh(w4, MemOperand(x25, 3, PreIndex)); __ Strh(w4, MemOperand(x26, 41, PreIndex)); END(); RUN(); ASSERT_EQUAL_64(0xfedcba98, x0); ASSERT_EQUAL_64(0xfedcba9800000000, dst[1]); ASSERT_EQUAL_64(0x0123456789abcdef, x1); ASSERT_EQUAL_64(0x0123456789abcdef, dst[2]); ASSERT_EQUAL_64(0x01234567, x2); ASSERT_EQUAL_64(0x0123456700000000, dst[4]); ASSERT_EQUAL_64(0x32, x3); ASSERT_EQUAL_64(0x3200, dst[3]); ASSERT_EQUAL_64(0x9876, x4); ASSERT_EQUAL_64(0x987600, dst[5]); ASSERT_EQUAL_64(src_base + 4, x17); ASSERT_EQUAL_64(dst_base + 12, x18); ASSERT_EQUAL_64(src_base + 8, x19); ASSERT_EQUAL_64(dst_base + 16, x20); ASSERT_EQUAL_64(src_base + 12, x21); ASSERT_EQUAL_64(dst_base + 36, x22); ASSERT_EQUAL_64(src_base + 1, x23); ASSERT_EQUAL_64(dst_base + 25, x24); ASSERT_EQUAL_64(src_base + 3, x25); ASSERT_EQUAL_64(dst_base + 41, x26); TEARDOWN(); } TEST(ldr_str_postindex) { SETUP(); uint64_t src[2] = {0xfedcba9876543210, 0x0123456789abcdef}; uint64_t dst[6] = {0, 0, 0, 0, 0, 0}; uintptr_t src_base = reinterpret_cast<uintptr_t>(src); uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); START(); __ Mov(x17, src_base + 4); __ Mov(x18, dst_base + 12); __ Mov(x19, src_base + 8); __ Mov(x20, dst_base + 16); __ Mov(x21, src_base + 8); __ Mov(x22, dst_base + 32); __ Mov(x23, src_base + 1); __ Mov(x24, dst_base + 25); __ Mov(x25, src_base + 3); __ Mov(x26, dst_base + 41); __ Ldr(w0, MemOperand(x17, 4, PostIndex)); __ Str(w0, MemOperand(x18, 12, PostIndex)); __ Ldr(x1, MemOperand(x19, 8, PostIndex)); __ Str(x1, MemOperand(x20, 16, PostIndex)); __ Ldr(x2, MemOperand(x21, -8, PostIndex)); __ Str(x2, MemOperand(x22, -32, PostIndex)); __ Ldrb(w3, MemOperand(x23, 1, PostIndex)); __ Strb(w3, MemOperand(x24, 5, PostIndex)); __ Ldrh(w4, MemOperand(x25, -3, PostIndex)); __ Strh(w4, MemOperand(x26, -41, PostIndex)); END(); RUN(); ASSERT_EQUAL_64(0xfedcba98, x0); ASSERT_EQUAL_64(0xfedcba9800000000, dst[1]); ASSERT_EQUAL_64(0x0123456789abcdef, x1); ASSERT_EQUAL_64(0x0123456789abcdef, dst[2]); ASSERT_EQUAL_64(0x0123456789abcdef, x2); ASSERT_EQUAL_64(0x0123456789abcdef, dst[4]); ASSERT_EQUAL_64(0x32, x3); ASSERT_EQUAL_64(0x3200, dst[3]); ASSERT_EQUAL_64(0x9876, x4); ASSERT_EQUAL_64(0x987600, dst[5]); ASSERT_EQUAL_64(src_base + 8, x17); ASSERT_EQUAL_64(dst_base + 24, x18); ASSERT_EQUAL_64(src_base + 16, x19); ASSERT_EQUAL_64(dst_base + 32, x20); ASSERT_EQUAL_64(src_base, x21); ASSERT_EQUAL_64(dst_base, x22); ASSERT_EQUAL_64(src_base + 2, x23); ASSERT_EQUAL_64(dst_base + 30, x24); ASSERT_EQUAL_64(src_base, x25); ASSERT_EQUAL_64(dst_base, x26); TEARDOWN(); } TEST(ldr_str_largeindex) { SETUP(); int largeoffset = 0xabcdef; int64_t data[3] = {0x1122334455667788, 0, 0}; uint64_t base_addr = reinterpret_cast<uintptr_t>(data); uint64_t drifted_addr = base_addr - largeoffset; START(); __ Mov(x19, drifted_addr); __ Ldr(x0, MemOperand(x19, largeoffset, PreIndex)); __ Mov(x20, base_addr); __ Ldr(x1, MemOperand(x20, largeoffset, PostIndex)); __ Mov(x21, drifted_addr); __ Str(x0, MemOperand(x21, largeoffset + 8, PreIndex)); __ Mov(x22, base_addr + 16); __ Str(x0, MemOperand(x22, largeoffset, PostIndex)); END(); RUN(); ASSERT_EQUAL_64(0x1122334455667788, data[0]); ASSERT_EQUAL_64(0x1122334455667788, data[1]); ASSERT_EQUAL_64(0x1122334455667788, data[2]); ASSERT_EQUAL_64(0x1122334455667788, x0); ASSERT_EQUAL_64(0x1122334455667788, x1); ASSERT_EQUAL_64(base_addr, x19); ASSERT_EQUAL_64(base_addr + largeoffset, x20); ASSERT_EQUAL_64(base_addr + 8, x21); ASSERT_EQUAL_64(base_addr + 16 + largeoffset, x22); TEARDOWN(); } TEST(load_signed) { SETUP(); uint32_t src[2] = {0x80008080, 0x7fff7f7f}; uintptr_t src_base = reinterpret_cast<uintptr_t>(src); START(); __ Mov(x24, src_base); __ Ldrsb(w0, MemOperand(x24)); __ Ldrsb(w1, MemOperand(x24, 4)); __ Ldrsh(w2, MemOperand(x24)); __ Ldrsh(w3, MemOperand(x24, 4)); __ Ldrsb(x4, MemOperand(x24)); __ Ldrsb(x5, MemOperand(x24, 4)); __ Ldrsh(x6, MemOperand(x24)); __ Ldrsh(x7, MemOperand(x24, 4)); __ Ldrsw(x8, MemOperand(x24)); __ Ldrsw(x9, MemOperand(x24, 4)); END(); RUN(); ASSERT_EQUAL_64(0xffffff80, x0); ASSERT_EQUAL_64(0x0000007f, x1); ASSERT_EQUAL_64(0xffff8080, x2); ASSERT_EQUAL_64(0x00007f7f, x3); ASSERT_EQUAL_64(0xffffffffffffff80, x4); ASSERT_EQUAL_64(0x000000000000007f, x5); ASSERT_EQUAL_64(0xffffffffffff8080, x6); ASSERT_EQUAL_64(0x0000000000007f7f, x7); ASSERT_EQUAL_64(0xffffffff80008080, x8); ASSERT_EQUAL_64(0x000000007fff7f7f, x9); TEARDOWN(); } TEST(load_store_regoffset) { SETUP(); uint32_t src[3] = {1, 2, 3}; uint32_t dst[4] = {0, 0, 0, 0}; uintptr_t src_base = reinterpret_cast<uintptr_t>(src); uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); START(); __ Mov(x16, src_base); __ Mov(x17, dst_base); __ Mov(x18, src_base + 3 * sizeof(src[0])); __ Mov(x19, dst_base + 3 * sizeof(dst[0])); __ Mov(x20, dst_base + 4 * sizeof(dst[0])); __ Mov(x24, 0); __ Mov(x25, 4); __ Mov(x26, -4); __ Mov(x27, 0xfffffffc); __ Mov(x28, 0xfffffffe); __ Mov(x29, 0xffffffff); __ Ldr(w0, MemOperand(x16, x24)); __ Ldr(x1, MemOperand(x16, x25)); __ Ldr(w2, MemOperand(x18, x26)); __ Ldr(w3, MemOperand(x18, x27, SXTW)); __ Ldr(w4, MemOperand(x18, x28, SXTW, 2)); __ Str(w0, MemOperand(x17, x24)); __ Str(x1, MemOperand(x17, x25)); __ Str(w2, MemOperand(x20, x29, SXTW, 2)); END(); RUN(); ASSERT_EQUAL_64(1, x0); ASSERT_EQUAL_64(0x0000000300000002, x1); ASSERT_EQUAL_64(3, x2); ASSERT_EQUAL_64(3, x3); ASSERT_EQUAL_64(2, x4); ASSERT_EQUAL_32(1, dst[0]); ASSERT_EQUAL_32(2, dst[1]); ASSERT_EQUAL_32(3, dst[2]); ASSERT_EQUAL_32(3, dst[3]); TEARDOWN(); } TEST(load_store_float) { SETUP(); float src[3] = {1.0, 2.0, 3.0}; float dst[3] = {0.0, 0.0, 0.0}; uintptr_t src_base = reinterpret_cast<uintptr_t>(src); uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); START(); __ Mov(x17, src_base); __ Mov(x18, dst_base); __ Mov(x19, src_base); __ Mov(x20, dst_base); __ Mov(x21, src_base); __ Mov(x22, dst_base); __ Ldr(s0, MemOperand(x17, sizeof(src[0]))); __ Str(s0, MemOperand(x18, sizeof(dst[0]), PostIndex)); __ Ldr(s1, MemOperand(x19, sizeof(src[0]), PostIndex)); __ Str(s1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex)); __ Ldr(s2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex)); __ Str(s2, MemOperand(x22, sizeof(dst[0]))); END(); RUN(); ASSERT_EQUAL_FP32(2.0, s0); ASSERT_EQUAL_FP32(2.0, dst[0]); ASSERT_EQUAL_FP32(1.0, s1); ASSERT_EQUAL_FP32(1.0, dst[2]); ASSERT_EQUAL_FP32(3.0, s2); ASSERT_EQUAL_FP32(3.0, dst[1]); ASSERT_EQUAL_64(src_base, x17); ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18); ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19); ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20); ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21); ASSERT_EQUAL_64(dst_base, x22); TEARDOWN(); } TEST(load_store_double) { SETUP(); double src[3] = {1.0, 2.0, 3.0}; double dst[3] = {0.0, 0.0, 0.0}; uintptr_t src_base = reinterpret_cast<uintptr_t>(src); uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); START(); __ Mov(x17, src_base); __ Mov(x18, dst_base); __ Mov(x19, src_base); __ Mov(x20, dst_base); __ Mov(x21, src_base); __ Mov(x22, dst_base); __ Ldr(d0, MemOperand(x17, sizeof(src[0]))); __ Str(d0, MemOperand(x18, sizeof(dst[0]), PostIndex)); __ Ldr(d1, MemOperand(x19, sizeof(src[0]), PostIndex)); __ Str(d1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex)); __ Ldr(d2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex)); __ Str(d2, MemOperand(x22, sizeof(dst[0]))); END(); RUN(); ASSERT_EQUAL_FP64(2.0, d0); ASSERT_EQUAL_FP64(2.0, dst[0]); ASSERT_EQUAL_FP64(1.0, d1); ASSERT_EQUAL_FP64(1.0, dst[2]); ASSERT_EQUAL_FP64(3.0, d2); ASSERT_EQUAL_FP64(3.0, dst[1]); ASSERT_EQUAL_64(src_base, x17); ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18); ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19); ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20); ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21); ASSERT_EQUAL_64(dst_base, x22); TEARDOWN(); } TEST(load_store_b) { SETUP(); uint8_t src[3] = {0x12, 0x23, 0x34}; uint8_t dst[3] = {0, 0, 0}; uintptr_t src_base = reinterpret_cast<uintptr_t>(src); uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); START(); __ Mov(x17, src_base); __ Mov(x18, dst_base); __ Mov(x19, src_base); __ Mov(x20, dst_base); __ Mov(x21, src_base); __ Mov(x22, dst_base); __ Ldr(b0, MemOperand(x17, sizeof(src[0]))); __ Str(b0, MemOperand(x18, sizeof(dst[0]), PostIndex)); __ Ldr(b1, MemOperand(x19, sizeof(src[0]), PostIndex)); __ Str(b1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex)); __ Ldr(b2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex)); __ Str(b2, MemOperand(x22, sizeof(dst[0]))); END(); RUN(); ASSERT_EQUAL_128(0, 0x23, q0); ASSERT_EQUAL_64(0x23, dst[0]); ASSERT_EQUAL_128(0, 0x12, q1); ASSERT_EQUAL_64(0x12, dst[2]); ASSERT_EQUAL_128(0, 0x34, q2); ASSERT_EQUAL_64(0x34, dst[1]); ASSERT_EQUAL_64(src_base, x17); ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18); ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19); ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20); ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21); ASSERT_EQUAL_64(dst_base, x22); TEARDOWN(); } TEST(load_store_h) { SETUP(); uint16_t src[3] = {0x1234, 0x2345, 0x3456}; uint16_t dst[3] = {0, 0, 0}; uintptr_t src_base = reinterpret_cast<uintptr_t>(src); uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); START(); __ Mov(x17, src_base); __ Mov(x18, dst_base); __ Mov(x19, src_base); __ Mov(x20, dst_base); __ Mov(x21, src_base); __ Mov(x22, dst_base); __ Ldr(h0, MemOperand(x17, sizeof(src[0]))); __ Str(h0, MemOperand(x18, sizeof(dst[0]), PostIndex)); __ Ldr(h1, MemOperand(x19, sizeof(src[0]), PostIndex)); __ Str(h1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex)); __ Ldr(h2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex)); __ Str(h2, MemOperand(x22, sizeof(dst[0]))); END(); RUN(); ASSERT_EQUAL_128(0, 0x2345, q0); ASSERT_EQUAL_64(0x2345, dst[0]); ASSERT_EQUAL_128(0, 0x1234, q1); ASSERT_EQUAL_64(0x1234, dst[2]); ASSERT_EQUAL_128(0, 0x3456, q2); ASSERT_EQUAL_64(0x3456, dst[1]); ASSERT_EQUAL_64(src_base, x17); ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18); ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19); ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20); ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21); ASSERT_EQUAL_64(dst_base, x22); TEARDOWN(); } TEST(load_store_q) { SETUP(); uint8_t src[48] = {0x10, 0x32, 0x54, 0x76, 0x98, 0xba, 0xdc, 0xfe, 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x21, 0x43, 0x65, 0x87, 0xa9, 0xcb, 0xed, 0x0f, 0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc, 0xde, 0xf0, 0x24, 0x46, 0x68, 0x8a, 0xac, 0xce, 0xe0, 0x02, 0x42, 0x64, 0x86, 0xa8, 0xca, 0xec, 0x0e, 0x20}; uint64_t dst[6] = {0, 0, 0, 0, 0, 0}; uintptr_t src_base = reinterpret_cast<uintptr_t>(src); uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); START(); __ Mov(x17, src_base); __ Mov(x18, dst_base); __ Mov(x19, src_base); __ Mov(x20, dst_base); __ Mov(x21, src_base); __ Mov(x22, dst_base); __ Ldr(q0, MemOperand(x17, 16)); __ Str(q0, MemOperand(x18, 16, PostIndex)); __ Ldr(q1, MemOperand(x19, 16, PostIndex)); __ Str(q1, MemOperand(x20, 32, PreIndex)); __ Ldr(q2, MemOperand(x21, 32, PreIndex)); __ Str(q2, MemOperand(x22, 16)); END(); RUN(); ASSERT_EQUAL_128(0xf0debc9a78563412, 0x0fedcba987654321, q0); ASSERT_EQUAL_64(0x0fedcba987654321, dst[0]); ASSERT_EQUAL_64(0xf0debc9a78563412, dst[1]); ASSERT_EQUAL_128(0xefcdab8967452301, 0xfedcba9876543210, q1); ASSERT_EQUAL_64(0xfedcba9876543210, dst[4]); ASSERT_EQUAL_64(0xefcdab8967452301, dst[5]); ASSERT_EQUAL_128(0x200eeccaa8866442, 0x02e0ceac8a684624, q2); ASSERT_EQUAL_64(0x02e0ceac8a684624, dst[2]); ASSERT_EQUAL_64(0x200eeccaa8866442, dst[3]); ASSERT_EQUAL_64(src_base, x17); ASSERT_EQUAL_64(dst_base + 16, x18); ASSERT_EQUAL_64(src_base + 16, x19); ASSERT_EQUAL_64(dst_base + 32, x20); ASSERT_EQUAL_64(src_base + 32, x21); ASSERT_EQUAL_64(dst_base, x22); TEARDOWN(); } TEST(load_store_v_regoffset) { SETUP(); uint8_t src[64]; for (unsigned i = 0; i < sizeof(src); i++) { src[i] = i; } uint8_t dst[64]; memset(dst, 0, sizeof(dst)); uintptr_t src_base = reinterpret_cast<uintptr_t>(src); uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); START(); __ Mov(x17, src_base + 16); __ Mov(x18, 1); __ Mov(w19, -1); __ Mov(x20, dst_base - 1); __ Ldr(b0, MemOperand(x17, x18)); __ Ldr(b1, MemOperand(x17, x19, SXTW)); __ Ldr(h2, MemOperand(x17, x18)); __ Ldr(h3, MemOperand(x17, x18, UXTW, 1)); __ Ldr(h4, MemOperand(x17, x19, SXTW, 1)); __ Ldr(h5, MemOperand(x17, x18, LSL, 1)); __ Ldr(s16, MemOperand(x17, x18)); __ Ldr(s17, MemOperand(x17, x18, UXTW, 2)); __ Ldr(s18, MemOperand(x17, x19, SXTW, 2)); __ Ldr(s19, MemOperand(x17, x18, LSL, 2)); __ Ldr(d20, MemOperand(x17, x18)); __ Ldr(d21, MemOperand(x17, x18, UXTW, 3)); __ Ldr(d22, MemOperand(x17, x19, SXTW, 3)); __ Ldr(d23, MemOperand(x17, x18, LSL, 3)); __ Ldr(q24, MemOperand(x17, x18)); __ Ldr(q25, MemOperand(x17, x18, UXTW, 4)); __ Ldr(q26, MemOperand(x17, x19, SXTW, 4)); __ Ldr(q27, MemOperand(x17, x18, LSL, 4)); __ Str(b27, MemOperand(x20, x18)); __ Str(h27, MemOperand(x20, x18, UXTW, 1)); __ Add(x20, x20, 8); __ Str(s27, MemOperand(x20, x19, SXTW, 2)); __ Sub(x20, x20, 8); __ Str(d27, MemOperand(x20, x18, LSL, 3)); __ Add(x20, x20, 32); __ Str(q27, MemOperand(x20, x19, SXTW, 4)); __ Sub(x20, x20, 32); __ Ldr(q6, MemOperand(x20, x18)); __ Ldr(q7, MemOperand(x20, x18, LSL, 4)); END(); RUN(); ASSERT_EQUAL_128(0, 0x11, q0); ASSERT_EQUAL_128(0, 0x0f, q1); ASSERT_EQUAL_128(0, 0x1211, q2); ASSERT_EQUAL_128(0, 0x1312, q3); ASSERT_EQUAL_128(0, 0x0f0e, q4); ASSERT_EQUAL_128(0, 0x1312, q5); ASSERT_EQUAL_128(0, 0x14131211, q16); ASSERT_EQUAL_128(0, 0x17161514, q17); ASSERT_EQUAL_128(0, 0x0f0e0d0c, q18); ASSERT_EQUAL_128(0, 0x17161514, q19); ASSERT_EQUAL_128(0, 0x1817161514131211, q20); ASSERT_EQUAL_128(0, 0x1f1e1d1c1b1a1918, q21); ASSERT_EQUAL_128(0, 0x0f0e0d0c0b0a0908, q22); ASSERT_EQUAL_128(0, 0x1f1e1d1c1b1a1918, q23); ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q24); ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q25); ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q26); ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q27); ASSERT_EQUAL_128(0x2027262524232221, 0x2023222120212020, q6); ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q7); TEARDOWN(); } TEST(neon_ld1_d) { SETUP(); uint8_t src[32 + 5]; for (unsigned i = 0; i < sizeof(src); i++) { src[i] = i; } uintptr_t src_base = reinterpret_cast<uintptr_t>(src); START(); __ Mov(x17, src_base); __ Ldr(q2, MemOperand(x17)); __ Ld1(v2.V8B(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld1(v3.V8B(), v4.V8B(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld1(v5.V4H(), v6.V4H(), v7.V4H(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld1(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld1(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld1(v20.V1D(), v21.V1D(), v22.V1D(), v23.V1D(), MemOperand(x17)); END(); RUN(); ASSERT_EQUAL_128(0, 0x0706050403020100, q2); ASSERT_EQUAL_128(0, 0x0807060504030201, q3); ASSERT_EQUAL_128(0, 0x100f0e0d0c0b0a09, q4); ASSERT_EQUAL_128(0, 0x0908070605040302, q5); ASSERT_EQUAL_128(0, 0x11100f0e0d0c0b0a, q6); ASSERT_EQUAL_128(0, 0x1918171615141312, q7); ASSERT_EQUAL_128(0, 0x0a09080706050403, q16); ASSERT_EQUAL_128(0, 0x1211100f0e0d0c0b, q17); ASSERT_EQUAL_128(0, 0x1a19181716151413, q18); ASSERT_EQUAL_128(0, 0x2221201f1e1d1c1b, q19); ASSERT_EQUAL_128(0, 0x0b0a090807060504, q30); ASSERT_EQUAL_128(0, 0x131211100f0e0d0c, q31); ASSERT_EQUAL_128(0, 0x1b1a191817161514, q0); ASSERT_EQUAL_128(0, 0x232221201f1e1d1c, q1); ASSERT_EQUAL_128(0, 0x0c0b0a0908070605, q20); ASSERT_EQUAL_128(0, 0x14131211100f0e0d, q21); ASSERT_EQUAL_128(0, 0x1c1b1a1918171615, q22); ASSERT_EQUAL_128(0, 0x24232221201f1e1d, q23); TEARDOWN(); } TEST(neon_ld1_d_postindex) { SETUP(); uint8_t src[32 + 5]; for (unsigned i = 0; i < sizeof(src); i++) { src[i] = i; } uintptr_t src_base = reinterpret_cast<uintptr_t>(src); START(); __ Mov(x17, src_base); __ Mov(x18, src_base + 1); __ Mov(x19, src_base + 2); __ Mov(x20, src_base + 3); __ Mov(x21, src_base + 4); __ Mov(x22, src_base + 5); __ Mov(x23, 1); __ Ldr(q2, MemOperand(x17)); __ Ld1(v2.V8B(), MemOperand(x17, x23, PostIndex)); __ Ld1(v3.V8B(), v4.V8B(), MemOperand(x18, 16, PostIndex)); __ Ld1(v5.V4H(), v6.V4H(), v7.V4H(), MemOperand(x19, 24, PostIndex)); __ Ld1(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(), MemOperand(x20, 32, PostIndex)); __ Ld1(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x21, 32, PostIndex)); __ Ld1(v20.V1D(), v21.V1D(), v22.V1D(), v23.V1D(), MemOperand(x22, 32, PostIndex)); END(); RUN(); ASSERT_EQUAL_128(0, 0x0706050403020100, q2); ASSERT_EQUAL_128(0, 0x0807060504030201, q3); ASSERT_EQUAL_128(0, 0x100f0e0d0c0b0a09, q4); ASSERT_EQUAL_128(0, 0x0908070605040302, q5); ASSERT_EQUAL_128(0, 0x11100f0e0d0c0b0a, q6); ASSERT_EQUAL_128(0, 0x1918171615141312, q7); ASSERT_EQUAL_128(0, 0x0a09080706050403, q16); ASSERT_EQUAL_128(0, 0x1211100f0e0d0c0b, q17); ASSERT_EQUAL_128(0, 0x1a19181716151413, q18); ASSERT_EQUAL_128(0, 0x2221201f1e1d1c1b, q19); ASSERT_EQUAL_128(0, 0x0b0a090807060504, q30); ASSERT_EQUAL_128(0, 0x131211100f0e0d0c, q31); ASSERT_EQUAL_128(0, 0x1b1a191817161514, q0); ASSERT_EQUAL_128(0, 0x232221201f1e1d1c, q1); ASSERT_EQUAL_128(0, 0x0c0b0a0908070605, q20); ASSERT_EQUAL_128(0, 0x14131211100f0e0d, q21); ASSERT_EQUAL_128(0, 0x1c1b1a1918171615, q22); ASSERT_EQUAL_128(0, 0x24232221201f1e1d, q23); ASSERT_EQUAL_64(src_base + 1, x17); ASSERT_EQUAL_64(src_base + 1 + 16, x18); ASSERT_EQUAL_64(src_base + 2 + 24, x19); ASSERT_EQUAL_64(src_base + 3 + 32, x20); ASSERT_EQUAL_64(src_base + 4 + 32, x21); ASSERT_EQUAL_64(src_base + 5 + 32, x22); TEARDOWN(); } TEST(neon_ld1_q) { SETUP(); uint8_t src[64 + 4]; for (unsigned i = 0; i < sizeof(src); i++) { src[i] = i; } uintptr_t src_base = reinterpret_cast<uintptr_t>(src); START(); __ Mov(x17, src_base); __ Ld1(v2.V16B(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld1(v3.V16B(), v4.V16B(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld1(v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld1(v16.V4S(), v17.V4S(), v18.V4S(), v19.V4S(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld1(v30.V2D(), v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x17)); END(); RUN(); ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q2); ASSERT_EQUAL_128(0x100f0e0d0c0b0a09, 0x0807060504030201, q3); ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q4); ASSERT_EQUAL_128(0x11100f0e0d0c0b0a, 0x0908070605040302, q5); ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x1918171615141312, q6); ASSERT_EQUAL_128(0x31302f2e2d2c2b2a, 0x2928272625242322, q7); ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x0a09080706050403, q16); ASSERT_EQUAL_128(0x2221201f1e1d1c1b, 0x1a19181716151413, q17); ASSERT_EQUAL_128(0x3231302f2e2d2c2b, 0x2a29282726252423, q18); ASSERT_EQUAL_128(0x4241403f3e3d3c3b, 0x3a39383736353433, q19); ASSERT_EQUAL_128(0x131211100f0e0d0c, 0x0b0a090807060504, q30); ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x1b1a191817161514, q31); ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x2b2a292827262524, q0); ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x3b3a393837363534, q1); TEARDOWN(); } TEST(neon_ld1_q_postindex) { SETUP(); uint8_t src[64 + 4]; for (unsigned i = 0; i < sizeof(src); i++) { src[i] = i; } uintptr_t src_base = reinterpret_cast<uintptr_t>(src); START(); __ Mov(x17, src_base); __ Mov(x18, src_base + 1); __ Mov(x19, src_base + 2); __ Mov(x20, src_base + 3); __ Mov(x21, src_base + 4); __ Mov(x22, 1); __ Ld1(v2.V16B(), MemOperand(x17, x22, PostIndex)); __ Ld1(v3.V16B(), v4.V16B(), MemOperand(x18, 32, PostIndex)); __ Ld1(v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x19, 48, PostIndex)); __ Ld1(v16.V4S(), v17.V4S(), v18.V4S(), v19.V4S(), MemOperand(x20, 64, PostIndex)); __ Ld1(v30.V2D(), v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x21, 64, PostIndex)); END(); RUN(); ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q2); ASSERT_EQUAL_128(0x100f0e0d0c0b0a09, 0x0807060504030201, q3); ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q4); ASSERT_EQUAL_128(0x11100f0e0d0c0b0a, 0x0908070605040302, q5); ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x1918171615141312, q6); ASSERT_EQUAL_128(0x31302f2e2d2c2b2a, 0x2928272625242322, q7); ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x0a09080706050403, q16); ASSERT_EQUAL_128(0x2221201f1e1d1c1b, 0x1a19181716151413, q17); ASSERT_EQUAL_128(0x3231302f2e2d2c2b, 0x2a29282726252423, q18); ASSERT_EQUAL_128(0x4241403f3e3d3c3b, 0x3a39383736353433, q19); ASSERT_EQUAL_128(0x131211100f0e0d0c, 0x0b0a090807060504, q30); ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x1b1a191817161514, q31); ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x2b2a292827262524, q0); ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x3b3a393837363534, q1); ASSERT_EQUAL_64(src_base + 1, x17); ASSERT_EQUAL_64(src_base + 1 + 32, x18); ASSERT_EQUAL_64(src_base + 2 + 48, x19); ASSERT_EQUAL_64(src_base + 3 + 64, x20); ASSERT_EQUAL_64(src_base + 4 + 64, x21); TEARDOWN(); } TEST(neon_ld1_lane) { SETUP(); uint8_t src[64]; for (unsigned i = 0; i < sizeof(src); i++) { src[i] = i; } uintptr_t src_base = reinterpret_cast<uintptr_t>(src); START(); __ Mov(x17, src_base); for (int i = 15; i >= 0; i--) { __ Ld1(v0.B(), i, MemOperand(x17)); __ Add(x17, x17, 1); } __ Mov(x17, src_base); for (int i = 7; i >= 0; i--) { __ Ld1(v1.H(), i, MemOperand(x17)); __ Add(x17, x17, 1); } __ Mov(x17, src_base); for (int i = 3; i >= 0; i--) { __ Ld1(v2.S(), i, MemOperand(x17)); __ Add(x17, x17, 1); } __ Mov(x17, src_base); for (int i = 1; i >= 0; i--) { __ Ld1(v3.D(), i, MemOperand(x17)); __ Add(x17, x17, 1); } __ Mov(x17, src_base); __ Ldr(q4, MemOperand(x17)); __ Ld1(v4.B(), 4, MemOperand(x17)); __ Ldr(q5, MemOperand(x17)); __ Ld1(v5.H(), 3, MemOperand(x17)); __ Ldr(q6, MemOperand(x17)); __ Ld1(v6.S(), 2, MemOperand(x17)); __ Ldr(q7, MemOperand(x17)); __ Ld1(v7.D(), 1, MemOperand(x17)); END(); RUN(); ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0); ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q1); ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q2); ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q3); ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q4); ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q5); ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q6); ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q7); TEARDOWN(); } TEST(neon_ld2_d) { SETUP(); uint8_t src[64 + 4]; for (unsigned i = 0; i < sizeof(src); i++) { src[i] = i; } uintptr_t src_base = reinterpret_cast<uintptr_t>(src); START(); __ Mov(x17, src_base); __ Ld2(v2.V8B(), v3.V8B(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld2(v4.V8B(), v5.V8B(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld2(v6.V4H(), v7.V4H(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld2(v31.V2S(), v0.V2S(), MemOperand(x17)); END(); RUN(); ASSERT_EQUAL_128(0, 0x0e0c0a0806040200, q2); ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q3); ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q4); ASSERT_EQUAL_128(0, 0x100e0c0a08060402, q5); ASSERT_EQUAL_128(0, 0x0f0e0b0a07060302, q6); ASSERT_EQUAL_128(0, 0x11100d0c09080504, q7); ASSERT_EQUAL_128(0, 0x0e0d0c0b06050403, q31); ASSERT_EQUAL_128(0, 0x1211100f0a090807, q0); TEARDOWN(); } TEST(neon_ld2_d_postindex) { SETUP(); uint8_t src[32 + 4]; for (unsigned i = 0; i < sizeof(src); i++) { src[i] = i; } uintptr_t src_base = reinterpret_cast<uintptr_t>(src); START(); __ Mov(x17, src_base); __ Mov(x18, src_base + 1); __ Mov(x19, src_base + 2); __ Mov(x20, src_base + 3); __ Mov(x21, src_base + 4); __ Mov(x22, 1); __ Ld2(v2.V8B(), v3.V8B(), MemOperand(x17, x22, PostIndex)); __ Ld2(v4.V8B(), v5.V8B(), MemOperand(x18, 16, PostIndex)); __ Ld2(v5.V4H(), v6.V4H(), MemOperand(x19, 16, PostIndex)); __ Ld2(v16.V2S(), v17.V2S(), MemOperand(x20, 16, PostIndex)); __ Ld2(v31.V2S(), v0.V2S(), MemOperand(x21, 16, PostIndex)); END(); RUN(); ASSERT_EQUAL_128(0, 0x0e0c0a0806040200, q2); ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q3); ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q4); ASSERT_EQUAL_128(0, 0x0f0e0b0a07060302, q5); ASSERT_EQUAL_128(0, 0x11100d0c09080504, q6); ASSERT_EQUAL_128(0, 0x0e0d0c0b06050403, q16); ASSERT_EQUAL_128(0, 0x1211100f0a090807, q17); ASSERT_EQUAL_128(0, 0x0f0e0d0c07060504, q31); ASSERT_EQUAL_128(0, 0x131211100b0a0908, q0); ASSERT_EQUAL_64(src_base + 1, x17); ASSERT_EQUAL_64(src_base + 1 + 16, x18); ASSERT_EQUAL_64(src_base + 2 + 16, x19); ASSERT_EQUAL_64(src_base + 3 + 16, x20); ASSERT_EQUAL_64(src_base + 4 + 16, x21); TEARDOWN(); } TEST(neon_ld2_q) { SETUP(); uint8_t src[64 + 4]; for (unsigned i = 0; i < sizeof(src); i++) { src[i] = i; } uintptr_t src_base = reinterpret_cast<uintptr_t>(src); START(); __ Mov(x17, src_base); __ Ld2(v2.V16B(), v3.V16B(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld2(v4.V16B(), v5.V16B(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld2(v6.V8H(), v7.V8H(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld2(v16.V4S(), v17.V4S(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld2(v31.V2D(), v0.V2D(), MemOperand(x17)); END(); RUN(); ASSERT_EQUAL_128(0x1e1c1a1816141210, 0x0e0c0a0806040200, q2); ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q3); ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q4); ASSERT_EQUAL_128(0x201e1c1a18161412, 0x100e0c0a08060402, q5); ASSERT_EQUAL_128(0x1f1e1b1a17161312, 0x0f0e0b0a07060302, q6); ASSERT_EQUAL_128(0x21201d1c19181514, 0x11100d0c09080504, q7); ASSERT_EQUAL_128(0x1e1d1c1b16151413, 0x0e0d0c0b06050403, q16); ASSERT_EQUAL_128(0x2221201f1a191817, 0x1211100f0a090807, q17); ASSERT_EQUAL_128(0x1b1a191817161514, 0x0b0a090807060504, q31); ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x131211100f0e0d0c, q0); TEARDOWN(); } TEST(neon_ld2_q_postindex) { SETUP(); uint8_t src[64 + 4]; for (unsigned i = 0; i < sizeof(src); i++) { src[i] = i; } uintptr_t src_base = reinterpret_cast<uintptr_t>(src); START(); __ Mov(x17, src_base); __ Mov(x18, src_base + 1); __ Mov(x19, src_base + 2); __ Mov(x20, src_base + 3); __ Mov(x21, src_base + 4); __ Mov(x22, 1); __ Ld2(v2.V16B(), v3.V16B(), MemOperand(x17, x22, PostIndex)); __ Ld2(v4.V16B(), v5.V16B(), MemOperand(x18, 32, PostIndex)); __ Ld2(v6.V8H(), v7.V8H(), MemOperand(x19, 32, PostIndex)); __ Ld2(v16.V4S(), v17.V4S(), MemOperand(x20, 32, PostIndex)); __ Ld2(v31.V2D(), v0.V2D(), MemOperand(x21, 32, PostIndex)); END(); RUN(); ASSERT_EQUAL_128(0x1e1c1a1816141210, 0x0e0c0a0806040200, q2); ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q3); ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q4); ASSERT_EQUAL_128(0x201e1c1a18161412, 0x100e0c0a08060402, q5); ASSERT_EQUAL_128(0x1f1e1b1a17161312, 0x0f0e0b0a07060302, q6); ASSERT_EQUAL_128(0x21201d1c19181514, 0x11100d0c09080504, q7); ASSERT_EQUAL_128(0x1e1d1c1b16151413, 0x0e0d0c0b06050403, q16); ASSERT_EQUAL_128(0x2221201f1a191817, 0x1211100f0a090807, q17); ASSERT_EQUAL_128(0x1b1a191817161514, 0x0b0a090807060504, q31); ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x131211100f0e0d0c, q0); ASSERT_EQUAL_64(src_base + 1, x17); ASSERT_EQUAL_64(src_base + 1 + 32, x18); ASSERT_EQUAL_64(src_base + 2 + 32, x19); ASSERT_EQUAL_64(src_base + 3 + 32, x20); ASSERT_EQUAL_64(src_base + 4 + 32, x21); TEARDOWN(); } TEST(neon_ld2_lane) { SETUP(); uint8_t src[64]; for (unsigned i = 0; i < sizeof(src); i++) { src[i] = i; } uintptr_t src_base = reinterpret_cast<uintptr_t>(src); START(); __ Mov(x17, src_base); for (int i = 15; i >= 0; i--) { __ Ld2(v0.B(), v1.B(), i, MemOperand(x17)); __ Add(x17, x17, 1); } __ Mov(x17, src_base); for (int i = 7; i >= 0; i--) { __ Ld2(v2.H(), v3.H(), i, MemOperand(x17)); __ Add(x17, x17, 1); } __ Mov(x17, src_base); for (int i = 3; i >= 0; i--) { __ Ld2(v4.S(), v5.S(), i, MemOperand(x17)); __ Add(x17, x17, 1); } __ Mov(x17, src_base); for (int i = 1; i >= 0; i--) { __ Ld2(v6.D(), v7.D(), i, MemOperand(x17)); __ Add(x17, x17, 1); } __ Mov(x17, src_base); __ Mov(x4, x17); __ Ldr(q8, MemOperand(x4, 16, PostIndex)); __ Ldr(q9, MemOperand(x4)); __ Ld2(v8.B(), v9.B(), 4, MemOperand(x17)); __ Mov(x5, x17); __ Ldr(q10, MemOperand(x5, 16, PostIndex)); __ Ldr(q11, MemOperand(x5)); __ Ld2(v10.H(), v11.H(), 3, MemOperand(x17)); __ Mov(x6, x17); __ Ldr(q12, MemOperand(x6, 16, PostIndex)); __ Ldr(q13, MemOperand(x6)); __ Ld2(v12.S(), v13.S(), 2, MemOperand(x17)); __ Mov(x7, x17); __ Ldr(q14, MemOperand(x7, 16, PostIndex)); __ Ldr(q15, MemOperand(x7)); __ Ld2(v14.D(), v15.D(), 1, MemOperand(x17)); END(); RUN(); ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0); ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1); ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q2); ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q3); ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q4); ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q5); ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q6); ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q7); ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q8); ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q9); ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q10); ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q11); ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q12); ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q13); ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q14); ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q15); TEARDOWN(); } TEST(neon_ld2_lane_postindex) { SETUP(); uint8_t src[64]; for (unsigned i = 0; i < sizeof(src); i++) { src[i] = i; } uintptr_t src_base = reinterpret_cast<uintptr_t>(src); START(); __ Mov(x17, src_base); __ Mov(x18, src_base); __ Mov(x19, src_base); __ Mov(x20, src_base); __ Mov(x21, src_base); __ Mov(x22, src_base); __ Mov(x23, src_base); __ Mov(x24, src_base); for (int i = 15; i >= 0; i--) { __ Ld2(v0.B(), v1.B(), i, MemOperand(x17, 2, PostIndex)); } for (int i = 7; i >= 0; i--) { __ Ld2(v2.H(), v3.H(), i, MemOperand(x18, 4, PostIndex)); } for (int i = 3; i >= 0; i--) { __ Ld2(v4.S(), v5.S(), i, MemOperand(x19, 8, PostIndex)); } for (int i = 1; i >= 0; i--) { __ Ld2(v6.D(), v7.D(), i, MemOperand(x20, 16, PostIndex)); } __ Mov(x25, 1); __ Mov(x4, x21); __ Ldr(q8, MemOperand(x4, 16, PostIndex)); __ Ldr(q9, MemOperand(x4)); __ Ld2(v8.B(), v9.B(), 4, MemOperand(x21, x25, PostIndex)); __ Add(x25, x25, 1); __ Mov(x5, x22); __ Ldr(q10, MemOperand(x5, 16, PostIndex)); __ Ldr(q11, MemOperand(x5)); __ Ld2(v10.H(), v11.H(), 3, MemOperand(x22, x25, PostIndex)); __ Add(x25, x25, 1); __ Mov(x6, x23); __ Ldr(q12, MemOperand(x6, 16, PostIndex)); __ Ldr(q13, MemOperand(x6)); __ Ld2(v12.S(), v13.S(), 2, MemOperand(x23, x25, PostIndex)); __ Add(x25, x25, 1); __ Mov(x7, x24); __ Ldr(q14, MemOperand(x7, 16, PostIndex)); __ Ldr(q15, MemOperand(x7)); __ Ld2(v14.D(), v15.D(), 1, MemOperand(x24, x25, PostIndex)); END(); RUN(); ASSERT_EQUAL_128(0x00020406080a0c0e, 0x10121416181a1c1e, q0); ASSERT_EQUAL_128(0x01030507090b0d0f, 0x11131517191b1d1f, q1); ASSERT_EQUAL_128(0x0100050409080d0c, 0x1110151419181d1c, q2); ASSERT_EQUAL_128(0x030207060b0a0f0e, 0x131217161b1a1f1e, q3); ASSERT_EQUAL_128(0x030201000b0a0908, 0x131211101b1a1918, q4); ASSERT_EQUAL_128(0x070605040f0e0d0c, 0x171615141f1e1d1c, q5); ASSERT_EQUAL_128(0x0706050403020100, 0x1716151413121110, q6); ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1f1e1d1c1b1a1918, q7); ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q8); ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q9); ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q10); ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q11); ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q12); ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q13); ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q14); ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q15); ASSERT_EQUAL_64(src_base + 32, x17); ASSERT_EQUAL_64(src_base + 32, x18); ASSERT_EQUAL_64(src_base + 32, x19); ASSERT_EQUAL_64(src_base + 32, x20); ASSERT_EQUAL_64(src_base + 1, x21); ASSERT_EQUAL_64(src_base + 2, x22); ASSERT_EQUAL_64(src_base + 3, x23); ASSERT_EQUAL_64(src_base + 4, x24); TEARDOWN(); } TEST(neon_ld2_alllanes) { SETUP(); uint8_t src[64]; for (unsigned i = 0; i < sizeof(src); i++) { src[i] = i; } uintptr_t src_base = reinterpret_cast<uintptr_t>(src); START(); __ Mov(x17, src_base + 1); __ Mov(x18, 1); __ Ld2r(v0.V8B(), v1.V8B(), MemOperand(x17)); __ Add(x17, x17, 2); __ Ld2r(v2.V16B(), v3.V16B(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld2r(v4.V4H(), v5.V4H(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld2r(v6.V8H(), v7.V8H(), MemOperand(x17)); __ Add(x17, x17, 4); __ Ld2r(v8.V2S(), v9.V2S(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld2r(v10.V4S(), v11.V4S(), MemOperand(x17)); __ Add(x17, x17, 8); __ Ld2r(v12.V2D(), v13.V2D(), MemOperand(x17)); END(); RUN(); ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0); ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1); ASSERT_EQUAL_128(0x0303030303030303, 0x0303030303030303, q2); ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3); ASSERT_EQUAL_128(0x0000000000000000, 0x0504050405040504, q4); ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q5); ASSERT_EQUAL_128(0x0605060506050605, 0x0605060506050605, q6); ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q7); ASSERT_EQUAL_128(0x0000000000000000, 0x0c0b0a090c0b0a09, q8); ASSERT_EQUAL_128(0x0000000000000000, 0x100f0e0d100f0e0d, q9); ASSERT_EQUAL_128(0x0d0c0b0a0d0c0b0a, 0x0d0c0b0a0d0c0b0a, q10); ASSERT_EQUAL_128(0x11100f0e11100f0e, 0x11100f0e11100f0e, q11); ASSERT_EQUAL_128(0x1918171615141312, 0x1918171615141312, q12); ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x21201f1e1d1c1b1a, q13); TEARDOWN(); } TEST(neon_ld2_alllanes_postindex) { SETUP(); uint8_t src[64]; for (unsigned i = 0; i < sizeof(src); i++) { src[i] = i; } uintptr_t src_base = reinterpret_cast<uintptr_t>(src); START(); __ Mov(x17, src_base + 1); __ Mov(x18, 1); __ Ld2r(v0.V8B(), v1.V8B(), MemOperand(x17, 2, PostIndex)); __ Ld2r(v2.V16B(), v3.V16B(), MemOperand(x17, x18, PostIndex)); __ Ld2r(v4.V4H(), v5.V4H(), MemOperand(x17, x18, PostIndex)); __ Ld2r(v6.V8H(), v7.V8H(), MemOperand(x17, 4, PostIndex)); __ Ld2r(v8.V2S(), v9.V2S(), MemOperand(x17, x18, PostIndex)); __ Ld2r(v10.V4S(), v11.V4S(), MemOperand(x17, 8, PostIndex)); __ Ld2r(v12.V2D(), v13.V2D(), MemOperand(x17, 16, PostIndex)); END(); RUN(); ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0); ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1); ASSERT_EQUAL_128(0x0303030303030303, 0x0303030303030303, q2); ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3); ASSERT_EQUAL_128(0x0000000000000000, 0x0504050405040504, q4); ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q5); ASSERT_EQUAL_128(0x0605060506050605, 0x0605060506050605, q6); ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q7); ASSERT_EQUAL_128(0x0000000000000000, 0x0c0b0a090c0b0a09, q8); ASSERT_EQUAL_128(0x0000000000000000, 0x100f0e0d100f0e0d, q9); ASSERT_EQUAL_128(0x0d0c0b0a0d0c0b0a, 0x0d0c0b0a0d0c0b0a, q10); ASSERT_EQUAL_128(0x11100f0e11100f0e, 0x11100f0e11100f0e, q11); ASSERT_EQUAL_128(0x1918171615141312, 0x1918171615141312, q12); ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x21201f1e1d1c1b1a, q13); ASSERT_EQUAL_64(src_base + 34, x17); TEARDOWN(); } TEST(neon_ld3_d) { SETUP(); uint8_t src[64 + 4]; for (unsigned i = 0; i < sizeof(src); i++) { src[i] = i; } uintptr_t src_base = reinterpret_cast<uintptr_t>(src); START(); __ Mov(x17, src_base); __ Ld3(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld3(v5.V8B(), v6.V8B(), v7.V8B(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld3(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17)); END(); RUN(); ASSERT_EQUAL_128(0, 0x15120f0c09060300, q2); ASSERT_EQUAL_128(0, 0x1613100d0a070401, q3); ASSERT_EQUAL_128(0, 0x1714110e0b080502, q4); ASSERT_EQUAL_128(0, 0x1613100d0a070401, q5); ASSERT_EQUAL_128(0, 0x1714110e0b080502, q6); ASSERT_EQUAL_128(0, 0x1815120f0c090603, q7); ASSERT_EQUAL_128(0, 0x15140f0e09080302, q8); ASSERT_EQUAL_128(0, 0x171611100b0a0504, q9); ASSERT_EQUAL_128(0, 0x191813120d0c0706, q10); ASSERT_EQUAL_128(0, 0x1211100f06050403, q31); ASSERT_EQUAL_128(0, 0x161514130a090807, q0); ASSERT_EQUAL_128(0, 0x1a1918170e0d0c0b, q1); TEARDOWN(); } TEST(neon_ld3_d_postindex) { SETUP(); uint8_t src[32 + 4]; for (unsigned i = 0; i < sizeof(src); i++) { src[i] = i; } uintptr_t src_base = reinterpret_cast<uintptr_t>(src); START(); __ Mov(x17, src_base); __ Mov(x18, src_base + 1); __ Mov(x19, src_base + 2); __ Mov(x20, src_base + 3); __ Mov(x21, src_base + 4); __ Mov(x22, 1); __ Ld3(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x17, x22, PostIndex)); __ Ld3(v5.V8B(), v6.V8B(), v7.V8B(), MemOperand(x18, 24, PostIndex)); __ Ld3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x19, 24, PostIndex)); __ Ld3(v11.V2S(), v12.V2S(), v13.V2S(), MemOperand(x20, 24, PostIndex)); __ Ld3(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x21, 24, PostIndex)); END(); RUN(); ASSERT_EQUAL_128(0, 0x15120f0c09060300, q2); ASSERT_EQUAL_128(0, 0x1613100d0a070401, q3); ASSERT_EQUAL_128(0, 0x1714110e0b080502, q4); ASSERT_EQUAL_128(0, 0x1613100d0a070401, q5); ASSERT_EQUAL_128(0, 0x1714110e0b080502, q6); ASSERT_EQUAL_128(0, 0x1815120f0c090603, q7); ASSERT_EQUAL_128(0, 0x15140f0e09080302, q8); ASSERT_EQUAL_128(0, 0x171611100b0a0504, q9); ASSERT_EQUAL_128(0, 0x191813120d0c0706, q10); ASSERT_EQUAL_128(0, 0x1211100f06050403, q11); ASSERT_EQUAL_128(0, 0x161514130a090807, q12); ASSERT_EQUAL_128(0, 0x1a1918170e0d0c0b, q13); ASSERT_EQUAL_128(0, 0x1312111007060504, q31); ASSERT_EQUAL_128(0, 0x171615140b0a0908, q0); ASSERT_EQUAL_128(0, 0x1b1a19180f0e0d0c, q1); ASSERT_EQUAL_64(src_base + 1, x17); ASSERT_EQUAL_64(src_base + 1 + 24, x18); ASSERT_EQUAL_64(src_base + 2 + 24, x19); ASSERT_EQUAL_64(src_base + 3 + 24, x20); ASSERT_EQUAL_64(src_base + 4 + 24, x21); TEARDOWN(); } TEST(neon_ld3_q) { SETUP(); uint8_t src[64 + 4]; for (unsigned i = 0; i < sizeof(src); i++) { src[i] = i; } uintptr_t src_base = reinterpret_cast<uintptr_t>(src); START(); __ Mov(x17, src_base); __ Ld3(v2.V16B(), v3.V16B(), v4.V16B(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld3(v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld3(v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x17)); END(); RUN(); ASSERT_EQUAL_128(0x2d2a2724211e1b18, 0x15120f0c09060300, q2); ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q3); ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q4); ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q5); ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q6); ASSERT_EQUAL_128(0x302d2a2724211e1b, 0x1815120f0c090603, q7); ASSERT_EQUAL_128(0x2d2c272621201b1a, 0x15140f0e09080302, q8); ASSERT_EQUAL_128(0x2f2e292823221d1c, 0x171611100b0a0504, q9); ASSERT_EQUAL_128(0x31302b2a25241f1e, 0x191813120d0c0706, q10); ASSERT_EQUAL_128(0x2a2928271e1d1c1b, 0x1211100f06050403, q11); ASSERT_EQUAL_128(0x2e2d2c2b2221201f, 0x161514130a090807, q12); ASSERT_EQUAL_128(0x3231302f26252423, 0x1a1918170e0d0c0b, q13); ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x0b0a090807060504, q31); ASSERT_EQUAL_128(0x2b2a292827262524, 0x131211100f0e0d0c, q0); ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x1b1a191817161514, q1); TEARDOWN(); } TEST(neon_ld3_q_postindex) { SETUP(); uint8_t src[64 + 4]; for (unsigned i = 0; i < sizeof(src); i++) { src[i] = i; } uintptr_t src_base = reinterpret_cast<uintptr_t>(src); START(); __ Mov(x17, src_base); __ Mov(x18, src_base + 1); __ Mov(x19, src_base + 2); __ Mov(x20, src_base + 3); __ Mov(x21, src_base + 4); __ Mov(x22, 1); __ Ld3(v2.V16B(), v3.V16B(), v4.V16B(), MemOperand(x17, x22, PostIndex)); __ Ld3(v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x18, 48, PostIndex)); __ Ld3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x19, 48, PostIndex)); __ Ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x20, 48, PostIndex)); __ Ld3(v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x21, 48, PostIndex)); END(); RUN(); ASSERT_EQUAL_128(0x2d2a2724211e1b18, 0x15120f0c09060300, q2); ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q3); ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q4); ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q5); ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q6); ASSERT_EQUAL_128(0x302d2a2724211e1b, 0x1815120f0c090603, q7); ASSERT_EQUAL_128(0x2d2c272621201b1a, 0x15140f0e09080302, q8); ASSERT_EQUAL_128(0x2f2e292823221d1c, 0x171611100b0a0504, q9); ASSERT_EQUAL_128(0x31302b2a25241f1e, 0x191813120d0c0706, q10); ASSERT_EQUAL_128(0x2a2928271e1d1c1b, 0x1211100f06050403, q11); ASSERT_EQUAL_128(0x2e2d2c2b2221201f, 0x161514130a090807, q12); ASSERT_EQUAL_128(0x3231302f26252423, 0x1a1918170e0d0c0b, q13); ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x0b0a090807060504, q31); ASSERT_EQUAL_128(0x2b2a292827262524, 0x131211100f0e0d0c, q0); ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x1b1a191817161514, q1); ASSERT_EQUAL_64(src_base + 1, x17); ASSERT_EQUAL_64(src_base + 1 + 48, x18); ASSERT_EQUAL_64(src_base + 2 + 48, x19); ASSERT_EQUAL_64(src_base + 3 + 48, x20); ASSERT_EQUAL_64(src_base + 4 + 48, x21); TEARDOWN(); } TEST(neon_ld3_lane) { SETUP(); uint8_t src[64]; for (unsigned i = 0; i < sizeof(src); i++) { src[i] = i; } uintptr_t src_base = reinterpret_cast<uintptr_t>(src); START(); __ Mov(x17, src_base); for (int i = 15; i >= 0; i--) { __ Ld3(v0.B(), v1.B(), v2.B(), i, MemOperand(x17)); __ Add(x17, x17, 1); } __ Mov(x17, src_base); for (int i = 7; i >= 0; i--) { __ Ld3(v3.H(), v4.H(), v5.H(), i, MemOperand(x17)); __ Add(x17, x17, 1); } __ Mov(x17, src_base); for (int i = 3; i >= 0; i--) { __ Ld3(v6.S(), v7.S(), v8.S(), i, MemOperand(x17)); __ Add(x17, x17, 1); } __ Mov(x17, src_base); for (int i = 1; i >= 0; i--) { __ Ld3(v9.D(), v10.D(), v11.D(), i, MemOperand(x17)); __ Add(x17, x17, 1); } __ Mov(x17, src_base); __ Mov(x4, x17); __ Ldr(q12, MemOperand(x4, 16, PostIndex)); __ Ldr(q13, MemOperand(x4, 16, PostIndex)); __ Ldr(q14, MemOperand(x4)); __ Ld3(v12.B(), v13.B(), v14.B(), 4, MemOperand(x17)); __ Mov(x5, x17); __ Ldr(q15, MemOperand(x5, 16, PostIndex)); __ Ldr(q16, MemOperand(x5, 16, PostIndex)); __ Ldr(q17, MemOperand(x5)); __ Ld3(v15.H(), v16.H(), v17.H(), 3, MemOperand(x17)); __ Mov(x6, x17); __ Ldr(q18, MemOperand(x6, 16, PostIndex)); __ Ldr(q19, MemOperand(x6, 16, PostIndex)); __ Ldr(q20, MemOperand(x6)); __ Ld3(v18.S(), v19.S(), v20.S(), 2, MemOperand(x17)); __ Mov(x7, x17); __ Ldr(q21, MemOperand(x7, 16, PostIndex)); __ Ldr(q22, MemOperand(x7, 16, PostIndex)); __ Ldr(q23, MemOperand(x7)); __ Ld3(v21.D(), v22.D(), v23.D(), 1, MemOperand(x17)); END(); RUN(); ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0); ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1); ASSERT_EQUAL_128(0x0203040506070809, 0x0a0b0c0d0e0f1011, q2); ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q3); ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q4); ASSERT_EQUAL_128(0x0504060507060807, 0x09080a090b0a0c0b, q5); ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q6); ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q7); ASSERT_EQUAL_128(0x0b0a09080c0b0a09, 0x0d0c0b0a0e0d0c0b, q8); ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q9); ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q10); ASSERT_EQUAL_128(0x1716151413121110, 0x1817161514131211, q11); ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q12); ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q13); ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q14); ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q15); ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q16); ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q17); TEARDOWN(); } TEST(neon_ld3_lane_postindex) { SETUP(); uint8_t src[64]; for (unsigned i = 0; i < sizeof(src); i++) { src[i] = i; } uintptr_t src_base = reinterpret_cast<uintptr_t>(src); START(); __ Mov(x17, src_base); __ Mov(x18, src_base); __ Mov(x19, src_base); __ Mov(x20, src_base); __ Mov(x21, src_base); __ Mov(x22, src_base); __ Mov(x23, src_base); __ Mov(x24, src_base); for (int i = 15; i >= 0; i--) { __ Ld3(v0.B(), v1.B(), v2.B(), i, MemOperand(x17, 3, PostIndex)); } for (int i = 7; i >= 0; i--) { __ Ld3(v3.H(), v4.H(), v5.H(), i, MemOperand(x18, 6, PostIndex)); } for (int i = 3; i >= 0; i--) { __ Ld3(v6.S(), v7.S(), v8.S(), i, MemOperand(x19, 12, PostIndex)); } for (int i = 1; i >= 0; i--) { __ Ld3(v9.D(), v10.D(), v11.D(), i, MemOperand(x20, 24, PostIndex)); } __ Mov(x25, 1); __ Mov(x4, x21); __ Ldr(q12, MemOperand(x4, 16, PostIndex)); __ Ldr(q13, MemOperand(x4, 16, PostIndex)); __ Ldr(q14, MemOperand(x4)); __ Ld3(v12.B(), v13.B(), v14.B(), 4, MemOperand(x21, x25, PostIndex)); __ Add(x25, x25, 1); __ Mov(x5, x22); __ Ldr(q15, MemOperand(x5, 16, PostIndex)); __ Ldr(q16, MemOperand(x5, 16, PostIndex)); __ Ldr(q17, MemOperand(x5)); __ Ld3(v15.H(), v16.H(), v17.H(), 3, MemOperand(x22, x25, PostIndex)); __ Add(x25, x25, 1); __ Mov(x6, x23); __ Ldr(q18, MemOperand(x6, 16, PostIndex)); __ Ldr(q19, MemOperand(x6, 16, PostIndex)); __ Ldr(q20, MemOperand(x6)); __ Ld3(v18.S(), v19.S(), v20.S(), 2, MemOperand(x23, x25, PostIndex)); __ Add(x25, x25, 1); __ Mov(x7, x24); __ Ldr(q21, MemOperand(x7, 16, PostIndex)); __ Ldr(q22, MemOperand(x7, 16, PostIndex)); __ Ldr(q23, MemOperand(x7)); __ Ld3(v21.D(), v22.D(), v23.D(), 1, MemOperand(x24, x25, PostIndex)); END(); RUN(); ASSERT_EQUAL_128(0x000306090c0f1215, 0x181b1e2124272a2d, q0); ASSERT_EQUAL_128(0x0104070a0d101316, 0x191c1f2225282b2e, q1); ASSERT_EQUAL_128(0x0205080b0e111417, 0x1a1d202326292c2f, q2); ASSERT_EQUAL_128(0x010007060d0c1312, 0x19181f1e25242b2a, q3); ASSERT_EQUAL_128(0x030209080f0e1514, 0x1b1a212027262d2c, q4); ASSERT_EQUAL_128(0x05040b0a11101716, 0x1d1c232229282f2e, q5); ASSERT_EQUAL_128(0x030201000f0e0d0c, 0x1b1a191827262524, q6); ASSERT_EQUAL_128(0x0706050413121110, 0x1f1e1d1c2b2a2928, q7); ASSERT_EQUAL_128(0x0b0a090817161514, 0x232221202f2e2d2c, q8); ASSERT_EQUAL_128(0x0706050403020100, 0x1f1e1d1c1b1a1918, q9); ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x2726252423222120, q10); ASSERT_EQUAL_128(0x1716151413121110, 0x2f2e2d2c2b2a2928, q11); ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q12); ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q13); ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q14); ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q15); ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q16); ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q17); ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q18); ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q19); ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q20); ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q21); ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q22); ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q23); ASSERT_EQUAL_64(src_base + 48, x17); ASSERT_EQUAL_64(src_base + 48, x18); ASSERT_EQUAL_64(src_base + 48, x19); ASSERT_EQUAL_64(src_base + 48, x20); ASSERT_EQUAL_64(src_base + 1, x21); ASSERT_EQUAL_64(src_base + 2, x22); ASSERT_EQUAL_64(src_base + 3, x23); ASSERT_EQUAL_64(src_base + 4, x24); TEARDOWN(); } TEST(neon_ld3_alllanes) { SETUP(); uint8_t src[64]; for (unsigned i = 0; i < sizeof(src); i++) { src[i] = i; } uintptr_t src_base = reinterpret_cast<uintptr_t>(src); START(); __ Mov(x17, src_base + 1); __ Mov(x18, 1); __ Ld3r(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x17)); __ Add(x17, x17, 3); __ Ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld3r(v9.V8H(), v10.V8H(), v11.V8H(), MemOperand(x17)); __ Add(x17, x17, 6); __ Ld3r(v12.V2S(), v13.V2S(), v14.V2S(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld3r(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17)); __ Add(x17, x17, 12); __ Ld3r(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x17)); END(); RUN(); ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0); ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1); ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2); ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3); ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4); ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5); ASSERT_EQUAL_128(0x0000000000000000, 0x0605060506050605, q6); ASSERT_EQUAL_128(0x0000000000000000, 0x0807080708070807, q7); ASSERT_EQUAL_128(0x0000000000000000, 0x0a090a090a090a09, q8); ASSERT_EQUAL_128(0x0706070607060706, 0x0706070607060706, q9); ASSERT_EQUAL_128(0x0908090809080908, 0x0908090809080908, q10); ASSERT_EQUAL_128(0x0b0a0b0a0b0a0b0a, 0x0b0a0b0a0b0a0b0a, q11); ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0f0e0d0c, q12); ASSERT_EQUAL_128(0x0000000000000000, 0x1312111013121110, q13); ASSERT_EQUAL_128(0x0000000000000000, 0x1716151417161514, q14); ASSERT_EQUAL_128(0x100f0e0d100f0e0d, 0x100f0e0d100f0e0d, q15); ASSERT_EQUAL_128(0x1413121114131211, 0x1413121114131211, q16); ASSERT_EQUAL_128(0x1817161518171615, 0x1817161518171615, q17); ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x201f1e1d1c1b1a19, q18); ASSERT_EQUAL_128(0x2827262524232221, 0x2827262524232221, q19); ASSERT_EQUAL_128(0x302f2e2d2c2b2a29, 0x302f2e2d2c2b2a29, q20); TEARDOWN(); } TEST(neon_ld3_alllanes_postindex) { SETUP(); uint8_t src[64]; for (unsigned i = 0; i < sizeof(src); i++) { src[i] = i; } uintptr_t src_base = reinterpret_cast<uintptr_t>(src); __ Mov(x17, src_base + 1); __ Mov(x18, 1); START(); __ Mov(x17, src_base + 1); __ Mov(x18, 1); __ Ld3r(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x17, 3, PostIndex)); __ Ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17, x18, PostIndex)); __ Ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x17, x18, PostIndex)); __ Ld3r(v9.V8H(), v10.V8H(), v11.V8H(), MemOperand(x17, 6, PostIndex)); __ Ld3r(v12.V2S(), v13.V2S(), v14.V2S(), MemOperand(x17, x18, PostIndex)); __ Ld3r(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17, 12, PostIndex)); __ Ld3r(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x17, 24, PostIndex)); END(); RUN(); ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0); ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1); ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2); ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3); ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4); ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5); ASSERT_EQUAL_128(0x0000000000000000, 0x0605060506050605, q6); ASSERT_EQUAL_128(0x0000000000000000, 0x0807080708070807, q7); ASSERT_EQUAL_128(0x0000000000000000, 0x0a090a090a090a09, q8); ASSERT_EQUAL_128(0x0706070607060706, 0x0706070607060706, q9); ASSERT_EQUAL_128(0x0908090809080908, 0x0908090809080908, q10); ASSERT_EQUAL_128(0x0b0a0b0a0b0a0b0a, 0x0b0a0b0a0b0a0b0a, q11); ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0f0e0d0c, q12); ASSERT_EQUAL_128(0x0000000000000000, 0x1312111013121110, q13); ASSERT_EQUAL_128(0x0000000000000000, 0x1716151417161514, q14); ASSERT_EQUAL_128(0x100f0e0d100f0e0d, 0x100f0e0d100f0e0d, q15); ASSERT_EQUAL_128(0x1413121114131211, 0x1413121114131211, q16); ASSERT_EQUAL_128(0x1817161518171615, 0x1817161518171615, q17); ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x201f1e1d1c1b1a19, q18); ASSERT_EQUAL_128(0x2827262524232221, 0x2827262524232221, q19); ASSERT_EQUAL_128(0x302f2e2d2c2b2a29, 0x302f2e2d2c2b2a29, q20); TEARDOWN(); } TEST(neon_ld4_d) { SETUP(); uint8_t src[64 + 4]; for (unsigned i = 0; i < sizeof(src); i++) { src[i] = i; } uintptr_t src_base = reinterpret_cast<uintptr_t>(src); START(); __ Mov(x17, src_base); __ Ld4(v2.V8B(), v3.V8B(), v4.V8B(), v5.V8B(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld4(v6.V8B(), v7.V8B(), v8.V8B(), v9.V8B(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld4(v10.V4H(), v11.V4H(), v12.V4H(), v13.V4H(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld4(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17)); END(); RUN(); ASSERT_EQUAL_128(0, 0x1c1814100c080400, q2); ASSERT_EQUAL_128(0, 0x1d1915110d090501, q3); ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q4); ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q5); ASSERT_EQUAL_128(0, 0x1d1915110d090501, q6); ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q7); ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q8); ASSERT_EQUAL_128(0, 0x201c1814100c0804, q9); ASSERT_EQUAL_128(0, 0x1b1a13120b0a0302, q10); ASSERT_EQUAL_128(0, 0x1d1c15140d0c0504, q11); ASSERT_EQUAL_128(0, 0x1f1e17160f0e0706, q12); ASSERT_EQUAL_128(0, 0x2120191811100908, q13); ASSERT_EQUAL_128(0, 0x1615141306050403, q30); ASSERT_EQUAL_128(0, 0x1a1918170a090807, q31); ASSERT_EQUAL_128(0, 0x1e1d1c1b0e0d0c0b, q0); ASSERT_EQUAL_128(0, 0x2221201f1211100f, q1); TEARDOWN(); } TEST(neon_ld4_d_postindex) { SETUP(); uint8_t src[32 + 4]; for (unsigned i = 0; i < sizeof(src); i++) { src[i] = i; } uintptr_t src_base = reinterpret_cast<uintptr_t>(src); START(); __ Mov(x17, src_base); __ Mov(x18, src_base + 1); __ Mov(x19, src_base + 2); __ Mov(x20, src_base + 3); __ Mov(x21, src_base + 4); __ Mov(x22, 1); __ Ld4(v2.V8B(), v3.V8B(), v4.V8B(), v5.V8B(), MemOperand(x17, x22, PostIndex)); __ Ld4(v6.V8B(), v7.V8B(), v8.V8B(), v9.V8B(), MemOperand(x18, 32, PostIndex)); __ Ld4(v10.V4H(), v11.V4H(), v12.V4H(), v13.V4H(), MemOperand(x19, 32, PostIndex)); __ Ld4(v14.V2S(), v15.V2S(), v16.V2S(), v17.V2S(), MemOperand(x20, 32, PostIndex)); __ Ld4(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x21, 32, PostIndex)); END(); RUN(); ASSERT_EQUAL_128(0, 0x1c1814100c080400, q2); ASSERT_EQUAL_128(0, 0x1d1915110d090501, q3); ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q4); ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q5); ASSERT_EQUAL_128(0, 0x1d1915110d090501, q6); ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q7); ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q8); ASSERT_EQUAL_128(0, 0x201c1814100c0804, q9); ASSERT_EQUAL_128(0, 0x1b1a13120b0a0302, q10); ASSERT_EQUAL_128(0, 0x1d1c15140d0c0504, q11); ASSERT_EQUAL_128(0, 0x1f1e17160f0e0706, q12); ASSERT_EQUAL_128(0, 0x2120191811100908, q13); ASSERT_EQUAL_128(0, 0x1615141306050403, q14); ASSERT_EQUAL_128(0, 0x1a1918170a090807, q15); ASSERT_EQUAL_128(0, 0x1e1d1c1b0e0d0c0b, q16); ASSERT_EQUAL_128(0, 0x2221201f1211100f, q17); ASSERT_EQUAL_128(0, 0x1716151407060504, q30); ASSERT_EQUAL_128(0, 0x1b1a19180b0a0908, q31); ASSERT_EQUAL_128(0, 0x1f1e1d1c0f0e0d0c, q0); ASSERT_EQUAL_128(0, 0x2322212013121110, q1); ASSERT_EQUAL_64(src_base + 1, x17); ASSERT_EQUAL_64(src_base + 1 + 32, x18); ASSERT_EQUAL_64(src_base + 2 + 32, x19); ASSERT_EQUAL_64(src_base + 3 + 32, x20); ASSERT_EQUAL_64(src_base + 4 + 32, x21); TEARDOWN(); } TEST(neon_ld4_q) { SETUP(); uint8_t src[64 + 4]; for (unsigned i = 0; i < sizeof(src); i++) { src[i] = i; } uintptr_t src_base = reinterpret_cast<uintptr_t>(src); START(); __ Mov(x17, src_base); __ Ld4(v2.V16B(), v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld4(v6.V16B(), v7.V16B(), v8.V16B(), v9.V16B(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld4(v10.V8H(), v11.V8H(), v12.V8H(), v13.V8H(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld4(v14.V4S(), v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld4(v18.V2D(), v19.V2D(), v20.V2D(), v21.V2D(), MemOperand(x17)); END(); RUN(); ASSERT_EQUAL_128(0x3c3834302c282420, 0x1c1814100c080400, q2); ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q3); ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q4); ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q5); ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q6); ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q7); ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q8); ASSERT_EQUAL_128(0x403c3834302c2824, 0x201c1814100c0804, q9); ASSERT_EQUAL_128(0x3b3a33322b2a2322, 0x1b1a13120b0a0302, q10); ASSERT_EQUAL_128(0x3d3c35342d2c2524, 0x1d1c15140d0c0504, q11); ASSERT_EQUAL_128(0x3f3e37362f2e2726, 0x1f1e17160f0e0706, q12); ASSERT_EQUAL_128(0x4140393831302928, 0x2120191811100908, q13); ASSERT_EQUAL_128(0x3635343326252423, 0x1615141306050403, q14); ASSERT_EQUAL_128(0x3a3938372a292827, 0x1a1918170a090807, q15); ASSERT_EQUAL_128(0x3e3d3c3b2e2d2c2b, 0x1e1d1c1b0e0d0c0b, q16); ASSERT_EQUAL_128(0x4241403f3231302f, 0x2221201f1211100f, q17); ASSERT_EQUAL_128(0x2b2a292827262524, 0x0b0a090807060504, q18); ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x131211100f0e0d0c, q19); ASSERT_EQUAL_128(0x3b3a393837363534, 0x1b1a191817161514, q20); ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x232221201f1e1d1c, q21); TEARDOWN(); } TEST(neon_ld4_q_postindex) { SETUP(); uint8_t src[64 + 4]; for (unsigned i = 0; i < sizeof(src); i++) { src[i] = i; } uintptr_t src_base = reinterpret_cast<uintptr_t>(src); START(); __ Mov(x17, src_base); __ Mov(x18, src_base + 1); __ Mov(x19, src_base + 2); __ Mov(x20, src_base + 3); __ Mov(x21, src_base + 4); __ Mov(x22, 1); __ Ld4(v2.V16B(), v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17, x22, PostIndex)); __ Ld4(v6.V16B(), v7.V16B(), v8.V16B(), v9.V16B(), MemOperand(x18, 64, PostIndex)); __ Ld4(v10.V8H(), v11.V8H(), v12.V8H(), v13.V8H(), MemOperand(x19, 64, PostIndex)); __ Ld4(v14.V4S(), v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x20, 64, PostIndex)); __ Ld4(v30.V2D(), v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x21, 64, PostIndex)); END(); RUN(); ASSERT_EQUAL_128(0x3c3834302c282420, 0x1c1814100c080400, q2); ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q3); ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q4); ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q5); ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q6); ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q7); ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q8); ASSERT_EQUAL_128(0x403c3834302c2824, 0x201c1814100c0804, q9); ASSERT_EQUAL_128(0x3b3a33322b2a2322, 0x1b1a13120b0a0302, q10); ASSERT_EQUAL_128(0x3d3c35342d2c2524, 0x1d1c15140d0c0504, q11); ASSERT_EQUAL_128(0x3f3e37362f2e2726, 0x1f1e17160f0e0706, q12); ASSERT_EQUAL_128(0x4140393831302928, 0x2120191811100908, q13); ASSERT_EQUAL_128(0x3635343326252423, 0x1615141306050403, q14); ASSERT_EQUAL_128(0x3a3938372a292827, 0x1a1918170a090807, q15); ASSERT_EQUAL_128(0x3e3d3c3b2e2d2c2b, 0x1e1d1c1b0e0d0c0b, q16); ASSERT_EQUAL_128(0x4241403f3231302f, 0x2221201f1211100f, q17); ASSERT_EQUAL_128(0x2b2a292827262524, 0x0b0a090807060504, q30); ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x131211100f0e0d0c, q31); ASSERT_EQUAL_128(0x3b3a393837363534, 0x1b1a191817161514, q0); ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x232221201f1e1d1c, q1); ASSERT_EQUAL_64(src_base + 1, x17); ASSERT_EQUAL_64(src_base + 1 + 64, x18); ASSERT_EQUAL_64(src_base + 2 + 64, x19); ASSERT_EQUAL_64(src_base + 3 + 64, x20); ASSERT_EQUAL_64(src_base + 4 + 64, x21); TEARDOWN(); } TEST(neon_ld4_lane) { SETUP(); uint8_t src[64]; for (unsigned i = 0; i < sizeof(src); i++) { src[i] = i; } uintptr_t src_base = reinterpret_cast<uintptr_t>(src); START(); __ Mov(x17, src_base); for (int i = 15; i >= 0; i--) { __ Ld4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x17)); __ Add(x17, x17, 1); } __ Mov(x17, src_base); for (int i = 7; i >= 0; i--) { __ Ld4(v4.H(), v5.H(), v6.H(), v7.H(), i, MemOperand(x17)); __ Add(x17, x17, 1); } __ Mov(x17, src_base); for (int i = 3; i >= 0; i--) { __ Ld4(v8.S(), v9.S(), v10.S(), v11.S(), i, MemOperand(x17)); __ Add(x17, x17, 1); } __ Mov(x17, src_base); for (int i = 1; i >= 0; i--) { __ Ld4(v12.D(), v13.D(), v14.D(), v15.D(), i, MemOperand(x17)); __ Add(x17, x17, 1); } __ Mov(x17, src_base); __ Mov(x4, x17); __ Ldr(q16, MemOperand(x4, 16, PostIndex)); __ Ldr(q17, MemOperand(x4, 16, PostIndex)); __ Ldr(q18, MemOperand(x4, 16, PostIndex)); __ Ldr(q19, MemOperand(x4)); __ Ld4(v16.B(), v17.B(), v18.B(), v19.B(), 4, MemOperand(x17)); __ Mov(x5, x17); __ Ldr(q20, MemOperand(x5, 16, PostIndex)); __ Ldr(q21, MemOperand(x5, 16, PostIndex)); __ Ldr(q22, MemOperand(x5, 16, PostIndex)); __ Ldr(q23, MemOperand(x5)); __ Ld4(v20.H(), v21.H(), v22.H(), v23.H(), 3, MemOperand(x17)); __ Mov(x6, x17); __ Ldr(q24, MemOperand(x6, 16, PostIndex)); __ Ldr(q25, MemOperand(x6, 16, PostIndex)); __ Ldr(q26, MemOperand(x6, 16, PostIndex)); __ Ldr(q27, MemOperand(x6)); __ Ld4(v24.S(), v25.S(), v26.S(), v27.S(), 2, MemOperand(x17)); __ Mov(x7, x17); __ Ldr(q28, MemOperand(x7, 16, PostIndex)); __ Ldr(q29, MemOperand(x7, 16, PostIndex)); __ Ldr(q30, MemOperand(x7, 16, PostIndex)); __ Ldr(q31, MemOperand(x7)); __ Ld4(v28.D(), v29.D(), v30.D(), v31.D(), 1, MemOperand(x17)); END(); RUN(); ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0); ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1); ASSERT_EQUAL_128(0x0203040506070809, 0x0a0b0c0d0e0f1011, q2); ASSERT_EQUAL_128(0x030405060708090a, 0x0b0c0d0e0f101112, q3); ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q4); ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q5); ASSERT_EQUAL_128(0x0504060507060807, 0x09080a090b0a0c0b, q6); ASSERT_EQUAL_128(0x0706080709080a09, 0x0b0a0c0b0d0c0e0d, q7); ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q8); ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q9); ASSERT_EQUAL_128(0x0b0a09080c0b0a09, 0x0d0c0b0a0e0d0c0b, q10); ASSERT_EQUAL_128(0x0f0e0d0c100f0e0d, 0x11100f0e1211100f, q11); ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q12); ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q13); ASSERT_EQUAL_128(0x1716151413121110, 0x1817161514131211, q14); ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x201f1e1d1c1b1a19, q15); ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q16); ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q17); ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q18); ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736350333323130, q19); ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q20); ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q21); ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q22); ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x0706353433323130, q23); ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q24); ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q25); ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q26); ASSERT_EQUAL_128(0x3f3e3d3c0f0e0d0c, 0x3736353433323130, q27); ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q28); ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q29); ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q30); ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3736353433323130, q31); TEARDOWN(); } TEST(neon_ld4_lane_postindex) { SETUP(); uint8_t src[64]; for (unsigned i = 0; i < sizeof(src); i++) { src[i] = i; } uintptr_t src_base = reinterpret_cast<uintptr_t>(src); START(); __ Mov(x17, src_base); for (int i = 15; i >= 0; i--) { __ Ld4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x17, 4, PostIndex)); } __ Mov(x18, src_base); for (int i = 7; i >= 0; i--) { __ Ld4(v4.H(), v5.H(), v6.H(), v7.H(), i, MemOperand(x18, 8, PostIndex)); } __ Mov(x19, src_base); for (int i = 3; i >= 0; i--) { __ Ld4(v8.S(), v9.S(), v10.S(), v11.S(), i, MemOperand(x19, 16, PostIndex)); } __ Mov(x20, src_base); for (int i = 1; i >= 0; i--) { __ Ld4(v12.D(), v13.D(), v14.D(), v15.D(), i, MemOperand(x20, 32, PostIndex)); } __ Mov(x25, 1); __ Mov(x21, src_base); __ Mov(x22, src_base); __ Mov(x23, src_base); __ Mov(x24, src_base); __ Mov(x4, x21); __ Ldr(q16, MemOperand(x4, 16, PostIndex)); __ Ldr(q17, MemOperand(x4, 16, PostIndex)); __ Ldr(q18, MemOperand(x4, 16, PostIndex)); __ Ldr(q19, MemOperand(x4)); __ Ld4(v16.B(), v17.B(), v18.B(), v19.B(), 4, MemOperand(x21, x25, PostIndex)); __ Add(x25, x25, 1); __ Mov(x5, x22); __ Ldr(q20, MemOperand(x5, 16, PostIndex)); __ Ldr(q21, MemOperand(x5, 16, PostIndex)); __ Ldr(q22, MemOperand(x5, 16, PostIndex)); __ Ldr(q23, MemOperand(x5)); __ Ld4(v20.H(), v21.H(), v22.H(), v23.H(), 3, MemOperand(x22, x25, PostIndex)); __ Add(x25, x25, 1); __ Mov(x6, x23); __ Ldr(q24, MemOperand(x6, 16, PostIndex)); __ Ldr(q25, MemOperand(x6, 16, PostIndex)); __ Ldr(q26, MemOperand(x6, 16, PostIndex)); __ Ldr(q27, MemOperand(x6)); __ Ld4(v24.S(), v25.S(), v26.S(), v27.S(), 2, MemOperand(x23, x25, PostIndex)); __ Add(x25, x25, 1); __ Mov(x7, x24); __ Ldr(q28, MemOperand(x7, 16, PostIndex)); __ Ldr(q29, MemOperand(x7, 16, PostIndex)); __ Ldr(q30, MemOperand(x7, 16, PostIndex)); __ Ldr(q31, MemOperand(x7)); __ Ld4(v28.D(), v29.D(), v30.D(), v31.D(), 1, MemOperand(x24, x25, PostIndex)); END(); RUN(); ASSERT_EQUAL_128(0x0004080c1014181c, 0x2024282c3034383c, q0); ASSERT_EQUAL_128(0x0105090d1115191d, 0x2125292d3135393d, q1); ASSERT_EQUAL_128(0x02060a0e12161a1e, 0x22262a2e32363a3e, q2); ASSERT_EQUAL_128(0x03070b0f13171b1f, 0x23272b2f33373b3f, q3); ASSERT_EQUAL_128(0x0100090811101918, 0x2120292831303938, q4); ASSERT_EQUAL_128(0x03020b0a13121b1a, 0x23222b2a33323b3a, q5); ASSERT_EQUAL_128(0x05040d0c15141d1c, 0x25242d2c35343d3c, q6); ASSERT_EQUAL_128(0x07060f0e17161f1e, 0x27262f2e37363f3e, q7); ASSERT_EQUAL_128(0x0302010013121110, 0x2322212033323130, q8); ASSERT_EQUAL_128(0x0706050417161514, 0x2726252437363534, q9); ASSERT_EQUAL_128(0x0b0a09081b1a1918, 0x2b2a29283b3a3938, q10); ASSERT_EQUAL_128(0x0f0e0d0c1f1e1d1c, 0x2f2e2d2c3f3e3d3c, q11); ASSERT_EQUAL_128(0x0706050403020100, 0x2726252423222120, q12); ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x2f2e2d2c2b2a2928, q13); ASSERT_EQUAL_128(0x1716151413121110, 0x3736353433323130, q14); ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3f3e3d3c3b3a3938, q15); ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q16); ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q17); ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q18); ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736350333323130, q19); ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q20); ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q21); ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q22); ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x0706353433323130, q23); ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q24); ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q25); ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q26); ASSERT_EQUAL_128(0x3f3e3d3c0f0e0d0c, 0x3736353433323130, q27); ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q28); ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q29); ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q30); ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3736353433323130, q31); ASSERT_EQUAL_64(src_base + 64, x17); ASSERT_EQUAL_64(src_base + 64, x18); ASSERT_EQUAL_64(src_base + 64, x19); ASSERT_EQUAL_64(src_base + 64, x20); ASSERT_EQUAL_64(src_base + 1, x21); ASSERT_EQUAL_64(src_base + 2, x22); ASSERT_EQUAL_64(src_base + 3, x23); ASSERT_EQUAL_64(src_base + 4, x24); TEARDOWN(); } TEST(neon_ld4_alllanes) { SETUP(); uint8_t src[64]; for (unsigned i = 0; i < sizeof(src); i++) { src[i] = i; } uintptr_t src_base = reinterpret_cast<uintptr_t>(src); START(); __ Mov(x17, src_base + 1); __ Mov(x18, 1); __ Ld4r(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x17)); __ Add(x17, x17, 4); __ Ld4r(v4.V16B(), v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld4r(v8.V4H(), v9.V4H(), v10.V4H(), v11.V4H(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld4r(v12.V8H(), v13.V8H(), v14.V8H(), v15.V8H(), MemOperand(x17)); __ Add(x17, x17, 8); __ Ld4r(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(), MemOperand(x17)); __ Add(x17, x17, 1); __ Ld4r(v20.V4S(), v21.V4S(), v22.V4S(), v23.V4S(), MemOperand(x17)); __ Add(x17, x17, 16); __ Ld4r(v24.V2D(), v25.V2D(), v26.V2D(), v27.V2D(), MemOperand(x17)); END(); RUN(); ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0); ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1); ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2); ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q3); ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4); ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5); ASSERT_EQUAL_128(0x0707070707070707, 0x0707070707070707, q6); ASSERT_EQUAL_128(0x0808080808080808, 0x0808080808080808, q7); ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q8); ASSERT_EQUAL_128(0x0000000000000000, 0x0908090809080908, q9); ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a0b0a0b0a0b0a, q10); ASSERT_EQUAL_128(0x0000000000000000, 0x0d0c0d0c0d0c0d0c, q11); ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q12); ASSERT_EQUAL_128(0x0a090a090a090a09, 0x0a090a090a090a09, q13); ASSERT_EQUAL_128(0x0c0b0c0b0c0b0c0b, 0x0c0b0c0b0c0b0c0b, q14); ASSERT_EQUAL_128(0x0e0d0e0d0e0d0e0d, 0x0e0d0e0d0e0d0e0d, q15); ASSERT_EQUAL_128(0x0000000000000000, 0x1211100f1211100f, q16); ASSERT_EQUAL_128(0x0000000000000000, 0x1615141316151413, q17); ASSERT_EQUAL_128(0x0000000000000000, 0x1a1918171a191817, q18); ASSERT_EQUAL_128(0x0000000000000000, 0x1e1d1c1b1e1d1c1b, q19); ASSERT_EQUAL_128(0x1312111013121110, 0x1312111013121110, q20); ASSERT_EQUAL_128(0x1716151417161514, 0x1716151417161514, q21); ASSERT_EQUAL_128(0x1b1a19181b1a1918, 0x1b1a19181b1a1918, q22); ASSERT_EQUAL_128(0x1f1e1d1c1f1e1d1c, 0x1f1e1d1c1f1e1d1c, q23); ASSERT_EQUAL_128(0x2726252423222120, 0x2726252423222120, q24); ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2f2e2d2c2b2a2928, q25); ASSERT_EQUAL_128(0x3736353433323130, 0x3736353433323130, q26); ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3f3e3d3c3b3a3938, q27); TEARDOWN(); } TEST(neon_ld4_alllanes_postindex) { SETUP(); uint8_t src[64]; for (unsigned i = 0; i < sizeof(src); i++) { src[i] = i; } uintptr_t src_base = reinterpret_cast<uintptr_t>(src); __ Mov(x17, src_base + 1); __ Mov(x18, 1); START(); __ Mov(x17, src_base + 1); __ Mov(x18, 1); __ Ld4r(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x17, 4, PostIndex)); __ Ld4r(v4.V16B(), v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x17, x18, PostIndex)); __ Ld4r(v8.V4H(), v9.V4H(), v10.V4H(), v11.V4H(), MemOperand(x17, x18, PostIndex)); __ Ld4r(v12.V8H(), v13.V8H(), v14.V8H(), v15.V8H(), MemOperand(x17, 8, PostIndex)); __ Ld4r(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(), MemOperand(x17, x18, PostIndex)); __ Ld4r(v20.V4S(), v21.V4S(), v22.V4S(), v23.V4S(), MemOperand(x17, 16, PostIndex)); __ Ld4r(v24.V2D(), v25.V2D(), v26.V2D(), v27.V2D(), MemOperand(x17, 32, PostIndex)); END(); RUN(); ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0); ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1); ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2); ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q3); ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4); ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5); ASSERT_EQUAL_128(0x0707070707070707, 0x0707070707070707, q6); ASSERT_EQUAL_128(0x0808080808080808, 0x0808080808080808, q7); ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q8); ASSERT_EQUAL_128(0x0000000000000000, 0x0908090809080908, q9); ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a0b0a0b0a0b0a, q10); ASSERT_EQUAL_128(0x0000000000000000, 0x0d0c0d0c0d0c0d0c, q11); ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q12); ASSERT_EQUAL_128(0x0a090a090a090a09, 0x0a090a090a090a09, q13); ASSERT_EQUAL_128(0x0c0b0c0b0c0b0c0b, 0x0c0b0c0b0c0b0c0b, q14); ASSERT_EQUAL_128(0x0e0d0e0d0e0d0e0d, 0x0e0d0e0d0e0d0e0d, q15); ASSERT_EQUAL_128(0x0000000000000000, 0x1211100f1211100f, q16); ASSERT_EQUAL_128(0x0000000000000000, 0x1615141316151413, q17); ASSERT_EQUAL_128(0x0000000000000000, 0x1a1918171a191817, q18); ASSERT_EQUAL_128(0x0000000000000000, 0x1e1d1c1b1e1d1c1b, q19); ASSERT_EQUAL_128(0x1312111013121110, 0x1312111013121110, q20); ASSERT_EQUAL_128(0x1716151417161514, 0x1716151417161514, q21); ASSERT_EQUAL_128(0x1b1a19181b1a1918, 0x1b1a19181b1a1918, q22); ASSERT_EQUAL_128(0x1f1e1d1c1f1e1d1c, 0x1f1e1d1c1f1e1d1c, q23); ASSERT_EQUAL_128(0x2726252423222120, 0x2726252423222120, q24); ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2f2e2d2c2b2a2928, q25); ASSERT_EQUAL_128(0x3736353433323130, 0x3736353433323130, q26); ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3f3e3d3c3b3a3938, q27); ASSERT_EQUAL_64(src_base + 64, x17); TEARDOWN(); } TEST(neon_st1_lane) { SETUP(); uint8_t src[64]; for (unsigned i = 0; i < sizeof(src); i++) { src[i] = i; } uintptr_t src_base = reinterpret_cast<uintptr_t>(src); START(); __ Mov(x17, src_base); __ Mov(x18, -16); __ Ldr(q0, MemOperand(x17)); for (int i = 15; i >= 0; i--) { __ St1(v0.B(), i, MemOperand(x17)); __ Add(x17, x17, 1); } __ Ldr(q1, MemOperand(x17, x18)); for (int i = 7; i >= 0; i--) { __ St1(v0.H(), i, MemOperand(x17)); __ Add(x17, x17, 2); } __ Ldr(q2, MemOperand(x17, x18)); for (int i = 3; i >= 0; i--) { __ St1(v0.S(), i, MemOperand(x17)); __ Add(x17, x17, 4); } __ Ldr(q3, MemOperand(x17, x18)); for (int i = 1; i >= 0; i--) { __ St1(v0.D(), i, MemOperand(x17)); __ Add(x17, x17, 8); } __ Ldr(q4, MemOperand(x17, x18)); END(); RUN(); ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q1); ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q2); ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q3); ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q4); TEARDOWN(); } TEST(neon_st2_lane) { SETUP(); uint8_t dst[2 * 2 * 4 * 16]; memset(dst, 0, sizeof(dst)); uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); START(); __ Mov(x17, dst_base); __ Mov(x18, dst_base); __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f); __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f); for (int i = 15; i >= 0; i--) { __ St2(v0.B(), v1.B(), i, MemOperand(x18)); __ Add(x18, x18, 2); } for (int i = 15; i >= 0; i--) { __ St2(v0.B(), v1.B(), i, MemOperand(x18, 2, PostIndex)); } __ Ldr(q2, MemOperand(x17, 0 * 16)); __ Ldr(q3, MemOperand(x17, 1 * 16)); __ Ldr(q4, MemOperand(x17, 2 * 16)); __ Ldr(q5, MemOperand(x17, 3 * 16)); __ Mov(x0, 4); for (int i = 7; i >= 0; i--) { __ St2(v0.H(), v1.H(), i, MemOperand(x18)); __ Add(x18, x18, 4); } for (int i = 7; i >= 0; i--) { __ St2(v0.H(), v1.H(), i, MemOperand(x18, x0, PostIndex)); } __ Ldr(q6, MemOperand(x17, 4 * 16)); __ Ldr(q7, MemOperand(x17, 5 * 16)); __ Ldr(q16, MemOperand(x17, 6 * 16)); __ Ldr(q17, MemOperand(x17, 7 * 16)); for (int i = 3; i >= 0; i--) argument
[all...]
/external/vixl/benchmarks/aarch64/
H A Dbench-branch-link.cc57 MacroAssembler masm(instructions * kInstructionSize);
58 ExactAssemblyScope scope(&masm, instructions * kInstructionSize);
H A Dbench-branch.cc59 const int buffer_instruction_count = buffer_size / kInstructionSize;
82 ExactAssemblyScope scope(&masm, remaining * kInstructionSize);
H A Dbench-dataop.cc59 const unsigned buffer_instruction_count = buffer_size / kInstructionSize;
79 ExactAssemblyScope scope(&masm, remaining * kInstructionSize);
H A Dbench-branch-link-masm.cc53 size_t base_buf_size = iterations * 4 * kInstructionSize;
H A Dbench-branch-masm.cc60 buffer_size / (instructions_per_iteration * kInstructionSize);
/external/v8/src/arm64/
H A Dinstructions-arm64.h125 return InstructionAtOffset(count * static_cast<int>(kInstructionSize));
129 return InstructionAtOffset(count * static_cast<int>(kInstructionSize));
311 kInstructionSize;
392 DCHECK(check == NO_CHECK || IsAligned(offset, kInstructionSize));
399 DCHECK(check == NO_CHECK || IsAligned(offset, kInstructionSize));
465 const unsigned kPrintfArgCountOffset = 1 * kInstructionSize;
466 const unsigned kPrintfArgPatternListOffset = 2 * kInstructionSize;
467 const unsigned kPrintfLength = 3 * kInstructionSize;
488 // kInstructionSize so that subsequent instructions are correctly aligned.
491 const unsigned kDebugCodeOffset = 1 * kInstructionSize;
[all...]
H A Dcodegen-arm64.cc50 young_sequence_.length() / kInstructionSize);
56 const int length = kCodeAgeStubEntryOffset / kInstructionSize;
86 kNoCodeAgeSequenceLength / kInstructionSize);
H A Dinstructions-arm64.cc254 DCHECK(IsAligned(DistanceTo(target), kInstructionSize));
290 DCHECK(IsAligned(DistanceTo(target), kInstructionSize));
305 DCHECK(IsAligned(DistanceTo(source), kInstructionSize));
H A Ddeoptimizer-arm64.cc20 return 4 * kInstructionSize;
53 patch_size() / kInstructionSize);
54 patcher.ldr_pcrel(ip0, (2 * kInstructionSize) >> kLoadLiteralScaleLog2);
298 const int Deoptimizer::table_entry_size_ = 2 * kInstructionSize;
H A Dassembler-arm64-inl.h592 Address candidate = pc - 2 * kInstructionSize;
617 STATIC_ASSERT(Assembler::kCallSizeWithoutRelocation == 4 * kInstructionSize);
618 STATIC_ASSERT(Assembler::kCallSizeWithRelocation == 2 * kInstructionSize);
779 static const int kNoCodeAgeSequenceLength = 5 * kInstructionSize;
780 static const int kCodeAgeStubEntryOffset = 3 * kInstructionSize;
945 DCHECK(IsAligned(offset, kInstructionSize));
1166 DCHECK((SizeLS_offset + SizeLS_width) == (kInstructionSize * 8));
H A Dassembler-arm64.cc366 return 4 * kInstructionSize + EntryCount() * kPointerSize;
378 int prologue_size = require_jump ? kInstructionSize : 0;
379 prologue_size += 2 * kInstructionSize;
381 0 : kInstructionSize;
940 return RoundUp(size, kInstructionSize) / kInstructionSize;
945 return kPrintfLength / kInstructionSize;
2313 DCHECK(RoundUp(len, kInstructionSize) <= static_cast<size_t>(kGap));
2317 STATIC_ASSERT(sizeof(pad) == kInstructionSize);
2318 EmitData(pad, RoundUp(pc_offset(), kInstructionSize)
[all...]
H A Dassembler-arm64.h854 static const int kCallSizeWithoutRelocation = 4 * kInstructionSize;
855 static const int kCallSizeWithRelocation = 2 * kInstructionSize;
884 return SizeOfCodeGeneratedSince(label) / kInstructionSize;
892 kDebugBreakSlotInstructions * kInstructionSize;
1850 static const int kMaxVeneerCodeSize = 1 * kInstructionSize;
2016 next_constant_pool_check_ = pc_offset() + instructions * kInstructionSize;
2022 STATIC_ASSERT(sizeof(instruction) == kInstructionSize);
2200 count * kInstructionSize + kGap) {
2205 : Assembler(isolate, start, count * kInstructionSize + kGap) {
H A Dcode-stubs-arm64.h137 Instruction* instr2 = patcher.InstructionAt(kInstructionSize);
/external/vixl/test/
H A Dtest-code-generation-scopes.cc72 CodeBufferCheckScope scope(&masm, aarch64::kInstructionSize);
101 CodeBufferCheckScope scope(&masm, 2 * aarch64::kInstructionSize);
134 scope.Open(&masm, aarch64::kInstructionSize);
164 CodeBufferCheckScope scope(&masm, aarch64::kInstructionSize);
200 scope.Open(&masm, aarch64::kInstructionSize);
230 EmissionCheckScope scope(&masm, aarch64::kInstructionSize);
262 scope.Open(&masm, aarch64::kInstructionSize);
292 EmissionCheckScope scope(&masm, aarch64::kInstructionSize);
328 scope.Open(&masm, aarch64::kInstructionSize);
386 VIXL_CHECK((expected + aarch64::kInstructionSize)
[all...]
/external/vixl/examples/aarch64/
H A Dnon-const-visitor.cc97 for (instr = instr_start; instr < instr_end; instr += kInstructionSize) {
122 for (instr = start; instr < end; instr += kInstructionSize) {
H A Dliteral.cc46 kInstructionSize + sizeof(int64_t),
H A Dcustom-disassembler.cc157 // custom_disasm.MapCodeAddress(0x0, instr_start + 2 * kInstructionSize);
165 for (instr = instr_start; instr < instr_end; instr += kInstructionSize) {
/external/v8/src/debug/arm64/
H A Ddebug-arm64.cc59 // ldr ip0, [pc, #(2 * kInstructionSize)]
68 patcher.ldr_pcrel(ip0, (2 * kInstructionSize) >> kLoadLiteralScaleLog2);

Completed in 560 milliseconds

12