ih264_padding_neon_av8.s revision a2b49e5f0574dee76f81507f288143d83a4b7c1a
1//****************************************************************************** 2//* 3//* Copyright (C) 2015 The Android Open Source Project 4//* 5//* Licensed under the Apache License, Version 2.0 (the "License"); 6//* you may not use this file except in compliance with the License. 7//* You may obtain a copy of the License at: 8//* 9//* http://www.apache.org/licenses/LICENSE-2.0 10//* 11//* Unless required by applicable law or agreed to in writing, software 12//* distributed under the License is distributed on an "AS IS" BASIS, 13//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14//* See the License for the specific language governing permissions and 15//* limitations under the License. 16//* 17//***************************************************************************** 18//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19//*/ 20///** 21// ******************************************************************************* 22// * @file 23// * ih264_padding_neon.s 24// * 25// * @brief 26// * Contains function definitions padding 27// * 28// * @author 29// * Ittiam 30// * 31// * @par List of Functions: 32// * - ih264_pad_top_av8() 33// * - ih264_pad_left_luma_av8() 34// * - ih264_pad_left_chroma_av8() 35// * - ih264_pad_right_luma_av8() 36// * - ih264_pad_right_chroma_av8() 37// * 38// * @remarks 39// * None 40// * 41// ******************************************************************************* 42//*/ 43 44.text 45.p2align 2 46.include "ih264_neon_macros.s" 47///** 48//******************************************************************************* 49//* 50//* @brief pad at the top of a 2d array 51//* 52//* @par Description: 53//* The top row of a 2d array is replicated for pad_size times at the top 54//* 55//* @param[in] pu1_src 56//* UWORD8 pointer to the source 57//* 58//* @param[in] src_strd 59//* integer source stride 60//* 61//* @param[in] wd 62//* integer width of the array 63//* 64//* @param[in] pad_size 65//* integer -padding size of the array 66//* 67//* @returns none 68//* 69//* @remarks none 70//* 71//******************************************************************************* 72//*/ 73//void ih264_pad_top(UWORD8 *pu1_src, 74// WORD32 src_strd, 75// WORD32 wd, 76// WORD32 pad_size) 77//**************Variables Vs Registers************************* 78// x0 => *pu1_src 79// x1 => src_strd 80// x2 => wd 81// x3 => pad_size 82 83 .global ih264_pad_top_av8 84 85ih264_pad_top_av8: 86 87 // STMFD sp!, {x4-x11,x14} //stack stores the values of the arguments 88 push_v_regs 89 stp x19, x20, [sp, #-16]! 90 91 sub x5, x0, x1 92 sub x20, x1, #0 93 neg x6, x20 94 95loop_neon_memcpy_mul_16: 96 // Load 16 bytes 97 ld1 {v0.8b, v1.8b}, [x0], #16 98 mov x4, x5 99 mov x7, x3 100 add x5, x5, #16 101 102loop_neon_pad_top: 103 st1 {v0.8b, v1.8b}, [x4], x6 104 subs x7, x7, #1 105 bne loop_neon_pad_top 106 107 subs x2, x2, #16 108 bne loop_neon_memcpy_mul_16 109 110 // LDMFD sp!,{x4-x11,pc} //Reload the registers from SP 111 ldp x19, x20, [sp], #16 112 pop_v_regs 113 ret 114 115 116 117 118///** 119//******************************************************************************* 120//* 121//* @brief 122//* Padding (luma block) at the left of a 2d array 123//* 124//* @par Description: 125//* The left column of a 2d array is replicated for pad_size times at the left 126//* 127//* 128//* @param[in] pu1_src 129//* UWORD8 pointer to the source 130//* 131//* @param[in] src_strd 132//* integer source stride 133//* 134//* @param[in] ht 135//* integer height of the array 136//* 137//* @param[in] wd 138//* integer width of the array 139//* 140//* @param[in] pad_size 141//* integer -padding size of the array 142//* 143//* @param[in] ht 144//* integer height of the array 145//* 146//* @param[in] wd 147//* integer width of the array 148//* 149//* @returns 150//* 151//* @remarks 152//* None 153//* 154//******************************************************************************* 155//*/ 156//#if PAD_LEFT_LUMA == C 157//void ih264_pad_left_luma(UWORD8 *pu1_src, 158// WORD32 src_strd, 159// WORD32 ht, 160// WORD32 pad_size) 161//**************Variables Vs Registers************************* 162// x0 => *pu1_src 163// x1 => src_strd 164// x2 => ht 165// x3 => pad_size 166 167 168 169 .global ih264_pad_left_luma_av8 170 171ih264_pad_left_luma_av8: 172 173 // STMFD sp!, {x4-x11,x14} //stack stores the values of the arguments 174 push_v_regs 175 stp x19, x20, [sp, #-16]! 176 177 178 sub x4, x0, x3 179 sub x6, x1, #16 180 subs x5, x3, #16 181 bne loop_32 182loop_16: // /*hard coded for width=16 ,height =8,16*/ 183 ldrb w8, [x0] 184 add x0, x0, x1 185 sxtw x8, w8 186 ldrb w9, [x0] 187 add x0, x0, x1 188 sxtw x9, w9 189 dup v0.16b, w8 190 ldrb w10, [x0] 191 add x0, x0, x1 192 sxtw x10, w10 193 st1 {v0.16b}, [x4], x1 // 16 bytes store 194 dup v2.16b, w9 195 st1 {v2.16b}, [x4], x1 // 16 bytes store 196 ldrb w11, [x0] 197 add x0, x0, x1 198 sxtw x11, w11 199 dup v4.16b, w10 200 dup v6.16b, w11 201 st1 {v4.16b}, [x4], x1 // 16 bytes store 202 ldrb w8, [x0] 203 add x0, x0, x1 204 sxtw x8, w8 205 st1 {v6.16b}, [x4], x1 // 16 bytes store 206 ldrb w9, [x0] 207 add x0, x0, x1 208 sxtw x9, w9 209 dup v0.16b, w8 210 ldrb w10, [x0] 211 add x0, x0, x1 212 sxtw x10, w10 213 st1 {v0.16b}, [x4], x1 // 16 bytes store 214 dup v2.16b, w9 215 ldrb w11, [x0] 216 add x0, x0, x1 217 sxtw x11, w11 218 st1 {v2.16b}, [x4], x1 // 16 bytes store 219 dup v4.16b, w10 220 dup v6.16b, w11 221 subs x2, x2, #8 222 st1 {v4.16b}, [x4], x1 // 16 bytes store 223 st1 {v6.16b}, [x4], x1 // 16 bytes store 224 bne loop_16 225 b end_func 226 227loop_32: // /*hard coded for width=32 ,height =8,16*/ 228 ldrb w8, [x0] 229 add x0, x0, x1 230 sxtw x8, w8 231 ldrb w9, [x0] 232 add x0, x0, x1 233 sxtw x9, w9 234 dup v0.16b, w8 235 ldrb w10, [x0] 236 add x0, x0, x1 237 sxtw x10, w10 238 st1 {v0.16b}, [x4], #16 // 16 bytes store 239 dup v2.16b, w9 240 st1 {v0.16b}, [x4], x6 241 st1 {v2.16b}, [x4], #16 // 16 bytes store 242 dup v4.16b, w10 243 st1 {v2.16b}, [x4], x6 // 16 bytes store 244 ldrb w11, [x0] 245 add x0, x0, x1 246 sxtw x11, w11 247 st1 {v4.16b}, [x4], #16 // 16 bytes store 248 dup v6.16b, w11 249 st1 {v4.16b}, [x4], x6 // 16 bytes store 250 ldrb w8, [x0] 251 add x0, x0, x1 252 sxtw x8, w8 253 st1 {v6.16b}, [x4], #16 // 16 bytes store 254 dup v0.16b, w8 255 ldrb w9, [x0] 256 add x0, x0, x1 257 sxtw x9, w9 258 st1 {v6.16b}, [x4], x6 // 16 bytes store 259 ldrb w10, [x0] 260 add x0, x0, x1 261 sxtw x10, w10 262 st1 {v0.16b}, [x4], #16 // 16 bytes store 263 dup v2.16b, w9 264 st1 {v0.16b}, [x4], x6 // 16 bytes store 265 ldrb w11, [x0] 266 add x0, x0, x1 267 sxtw x11, w11 268 st1 {v2.16b}, [x4], #16 // 16 bytes store 269 dup v4.16b, w10 270 st1 {v2.16b}, [x4], x6 // 16 bytes store 271 st1 {v4.16b}, [x4], #16 // 16 bytes store 272 dup v6.16b, w11 273 st1 {v4.16b}, [x4], x6 // 16 bytes store 274 subs x2, x2, #8 275 st1 {v6.16b}, [x4], #16 // 16 bytes store 276 st1 {v6.16b}, [x4], x6 // 16 bytes store 277 bne loop_32 278 279 280 281end_func: 282 // LDMFD sp!,{x4-x11,pc} //Reload the registers from SP 283 ldp x19, x20, [sp], #16 284 pop_v_regs 285 ret 286 287 288 289 290 291///** 292//******************************************************************************* 293//* 294//* @brief 295//* Padding (chroma block) at the left of a 2d array 296//* 297//* @par Description: 298//* The left column of a 2d array is replicated for pad_size times at the left 299//* 300//* 301//* @param[in] pu1_src 302//* UWORD8 pointer to the source 303//* 304//* @param[in] src_strd 305//* integer source stride 306//* 307//* @param[in] ht 308//* integer height of the array 309//* 310//* @param[in] wd 311//* integer width of the array (each colour component) 312//* 313//* @param[in] pad_size 314//* integer -padding size of the array 315//* 316//* @param[in] ht 317//* integer height of the array 318//* 319//* @param[in] wd 320//* integer width of the array 321//* 322//* @returns 323//* 324//* @remarks 325//* None 326//* 327//******************************************************************************* 328//*/ 329//#if PAD_LEFT_CHROMA == C 330//void ih264_pad_left_chroma(UWORD8 *pu1_src, 331// WORD32 src_strd, 332// WORD32 ht, 333// WORD32 pad_size) 334//{ 335// x0 => *pu1_src 336// x1 => src_strd 337// x2 => ht 338// x3 => pad_size 339 340 341 342 .global ih264_pad_left_chroma_av8 343 344ih264_pad_left_chroma_av8: 345 346 // STMFD sp!, {x4-x11, x14} //stack stores the values of the arguments 347 push_v_regs 348 stp x19, x20, [sp, #-16]! 349 350 sub x4, x0, x3 351 sub x6, x1, #16 352 353 354loop_32_l_c: // /*hard coded for width=32 ,height =4,8,12*/ 355 ldrh w8, [x0] 356 add x0, x0, x1 357 sxtw x8, w8 358 ldrh w9, [x0] 359 add x0, x0, x1 360 sxtw x9, w9 361 dup v0.8h, w8 362 ldrh w10, [x0] 363 add x0, x0, x1 364 sxtw x10, w10 365 st1 {v0.16b}, [x4], #16 // 16 bytes store 366 dup v2.8h, w9 367 st1 {v0.16b}, [x4], x6 // 16 bytes store 368 ldrh w11, [x0] 369 add x0, x0, x1 370 sxtw x11, w11 371 st1 {v2.16b}, [x4], #16 // 16 bytes store 372 dup v4.8h, w10 373 st1 {v2.16b}, [x4], x6 // 16 bytes store 374 dup v6.8h, w11 375 st1 {v4.16b}, [x4], #16 // 16 bytes store 376 st1 {v4.16b}, [x4], x6 // 16 bytes store 377 subs x2, x2, #4 378 st1 {v6.16b}, [x4], #16 // 16 bytes store 379 st1 {v6.16b}, [x4], x6 // 16 bytes store 380 381 382 beq end_func_l_c ///* Branching when ht=4*/ 383 384 ldrh w8, [x0] 385 add x0, x0, x1 386 sxtw x8, w8 387 ldrh w9, [x0] 388 add x0, x0, x1 389 sxtw x9, w9 390 dup v0.8h, w8 391 ldrh w10, [x0] 392 add x0, x0, x1 393 sxtw x10, w10 394 st1 {v0.16b}, [x4], #16 // 16 bytes store 395 dup v2.8h, w9 396 st1 {v0.16b}, [x4], x6 397 ldrh w11, [x0] 398 add x0, x0, x1 399 sxtw x11, w11 400 st1 {v2.16b}, [x4], #16 // 16 bytes store 401 dup v4.8h, w10 402 st1 {v2.16b}, [x4], x6 // 16 bytes store 403 dup v6.8h, w11 404 st1 {v4.16b}, [x4], #16 // 16 bytes store 405 st1 {v4.16b}, [x4], x6 // 16 bytes store 406 subs x2, x2, #4 407 st1 {v6.16b}, [x4], #16 // 16 bytes store 408 st1 {v6.16b}, [x4], x6 // 16 bytes store 409 410 beq end_func_l_c ///* Branching when ht=8*/ 411 bne loop_32_l_c 412 413 ldrh w8, [x0] 414 add x0, x0, x1 415 sxtw x8, w8 416 ldrh w9, [x0] 417 add x0, x0, x1 418 sxtw x9, w9 419 dup v0.8h, w8 420 ldrh w10, [x0] 421 add x0, x0, x1 422 sxtw x10, w10 423 st1 {v0.16b}, [x4], #16 // 16 bytes store 424 dup v2.8h, w9 425 st1 {v0.16b}, [x4], x6 426 ldrh w11, [x0] 427 add x0, x0, x1 428 sxtw x11, w11 429 st1 {v2.16b}, [x4], #16 // 16 bytes store 430 dup v4.8h, w10 431 st1 {v2.16b}, [x4], x6 // 16 bytes store 432 dup v6.8h, w11 433 st1 {v4.16b}, [x4], #16 // 16 bytes store 434 st1 {v4.16b}, [x4], x6 // 16 bytes store 435 st1 {v6.16b}, [x4], #16 // 16 bytes store 436 st1 {v6.16b}, [x4], x6 // 16 bytes store 437 438end_func_l_c: 439 // LDMFD sp!,{x4-x11,pc} //Reload the registers from SP 440 ldp x19, x20, [sp], #16 441 pop_v_regs 442 ret 443 444 445 446 447 448///** 449//******************************************************************************* 450//* 451//* @brief 452//* Padding (luma block) at the right of a 2d array 453//* 454//* @par Description: 455//* The right column of a 2d array is replicated for pad_size times at the right 456//* 457//* 458//* @param[in] pu1_src 459//* UWORD8 pointer to the source 460//* 461//* @param[in] src_strd 462//* integer source stride 463//* 464//* @param[in] ht 465//* integer height of the array 466//* 467//* @param[in] wd 468//* integer width of the array 469//* 470//* @param[in] pad_size 471//* integer -padding size of the array 472//* 473//* @param[in] ht 474//* integer height of the array 475//* 476//* @param[in] wd 477//* integer width of the array 478//* 479//* @returns 480//* 481//* @remarks 482//* None 483//* 484//******************************************************************************* 485//*/ 486//#if PAD_RIGHT_LUMA == C 487//void ih264_pad_right_luma(UWORD8 *pu1_src, 488// WORD32 src_strd, 489// WORD32 ht, 490// WORD32 pad_size) 491//{ 492// WORD32 row; 493// 494// for(row = 0; row < ht; row++) 495// { 496// memset(pu1_src, *(pu1_src -1), pad_size); 497// 498// pu1_src += src_strd; 499// } 500//} 501// 502// x0 => *pu1_src 503// x1 => src_strd 504// x2 => ht 505// x3 => pad_size 506 507 508 509 .global ih264_pad_right_luma_av8 510 511ih264_pad_right_luma_av8: 512 513 // STMFD sp!, {x4-x11, x14} //stack stores the values of the arguments 514 push_v_regs 515 stp x19, x20, [sp, #-16]! 516 517 mov x4, x0 518 sub x6, x1, #16 519 sub x0, x0, #1 520 subs x5, x3, #16 521 bne loop_32 522loop_16_r: // /*hard coded for width=16 ,height =8,16*/ 523 ldrb w8, [x0] 524 add x0, x0, x1 525 sxtw x8, w8 526 ldrb w9, [x0] 527 add x0, x0, x1 528 sxtw x9, w9 529 dup v0.16b, w8 530 ldrb w10, [x0] 531 add x0, x0, x1 532 sxtw x10, w10 533 st1 {v0.16b}, [x4], x1 // 16 bytes store 534 dup v2.16b, w9 535 st1 {v2.16b}, [x4], x1 // 16 bytes store 536 ldrb w11, [x0] 537 add x0, x0, x1 538 sxtw x11, w11 539 dup v4.16b, w10 540 dup v6.16b, w11 541 st1 {v4.16b}, [x4], x1 // 16 bytes store 542 ldrb w8, [x0] 543 add x0, x0, x1 544 sxtw x8, w8 545 st1 {v6.16b}, [x4], x1 // 16 bytes store 546 ldrb w9, [x0] 547 add x0, x0, x1 548 sxtw x9, w9 549 dup v0.16b, w8 550 ldrb w10, [x0] 551 add x0, x0, x1 552 sxtw x10, w10 553 st1 {v0.16b}, [x4], x1 // 16 bytes store 554 dup v2.16b, w9 555 ldrb w11, [x0] 556 add x0, x0, x1 557 sxtw x11, w11 558 st1 {v2.16b}, [x4], x1 // 16 bytes store 559 dup v4.16b, w10 560 dup v6.16b, w11 561 subs x2, x2, #8 562 st1 {v4.16b}, [x4], x1 // 16 bytes store 563 st1 {v6.16b}, [x4], x1 // 16 bytes store 564 bne loop_16_r 565 b end_func_r 566 567loop_32_r: // /*hard coded for width=32 ,height =8,16*/ 568 ldrb w8, [x0] 569 add x0, x0, x1 570 sxtw x8, w8 571 ldrb w9, [x0] 572 add x0, x0, x1 573 sxtw x9, w9 574 dup v0.16b, w8 575 ldrb w10, [x0] 576 add x0, x0, x1 577 sxtw x10, w10 578 st1 {v0.16b}, [x4], #16 // 16 bytes store 579 dup v2.16b, w9 580 st1 {v0.16b}, [x4], x6 581 st1 {v2.16b}, [x4], #16 // 16 bytes store 582 dup v4.16b, w10 583 st1 {v2.16b}, [x4], x6 // 16 bytes store 584 ldrb w11, [x0] 585 add x0, x0, x1 586 sxtw x11, w11 587 st1 {v4.16b}, [x4], #16 // 16 bytes store 588 dup v6.16b, w11 589 st1 {v4.16b}, [x4], x6 // 16 bytes store 590 ldrb w8, [x0] 591 add x0, x0, x1 592 sxtw x8, w8 593 st1 {v6.16b}, [x4], #16 // 16 bytes store 594 ldrb w9, [x0] 595 add x0, x0, x1 596 sxtw x9, w9 597 dup v0.16b, w8 598 st1 {v6.16b}, [x4], x6 // 16 bytes store 599 ldrb w10, [x0] 600 add x0, x0, x1 601 sxtw x10, w10 602 st1 {v0.16b}, [x4], #16 // 16 bytes store 603 dup v2.16b, w9 604 st1 {v0.16b}, [x4], x6 // 16 bytes store 605 ldrb w11, [x0] 606 add x0, x0, x1 607 sxtw x11, w11 608 st1 {v2.16b}, [x4], #16 // 16 bytes store 609 dup v4.16b, w10 610 st1 {v2.16b}, [x4], x6 // 16 bytes store 611 st1 {v4.16b}, [x4], #16 // 16 bytes store 612 dup v6.16b, w11 613 st1 {v4.16b}, [x4], x6 // 16 bytes store 614 subs x2, x2, #8 615 st1 {v6.16b}, [x4], #16 // 16 bytes store 616 st1 {v6.16b}, [x4], x6 // 16 bytes store 617 bne loop_32_r 618 619 620 621end_func_r: 622 // LDMFD sp!,{x4-x11,pc} //Reload the registers from SP 623 ldp x19, x20, [sp], #16 624 pop_v_regs 625 ret 626 627 628 629 630 631///** 632//******************************************************************************* 633//* 634//* @brief 635//;* Padding (chroma block) at the right of a 2d array 636//* 637//* @par Description: 638//* The right column of a 2d array is replicated for pad_size times at the right 639//* 640//* 641//* @param[in] pu1_src 642//;* UWORD8 pointer to the source 643//* 644//* @param[in] src_strd 645//* integer source stride 646//* 647//* @param[in] ht 648//;* integer height of the array 649//* 650//* @param[in] wd 651//* integer width of the array (each colour component) 652//* 653//* @param[in] pad_size 654//* integer -padding size of the array 655//* 656//* @param[in] ht 657//;* integer height of the array 658//* 659//* @param[in] wd 660//* integer width of the array 661//* 662//* @returns 663//* 664//* @remarks 665//* None 666//* 667//******************************************************************************* 668//*/ 669//#if PAD_RIGHT_CHROMA == C 670//void ih264_pad_right_chroma(UWORD8 *pu1_src, 671// WORD32 src_strd, 672// WORD32 ht, 673// WORD32 pad_size) 674// x0 => *pu1_src 675// x1 => src_strd 676// x2 => ht 677// x3 => pad_size 678 679 680 681 .global ih264_pad_right_chroma_av8 682 683ih264_pad_right_chroma_av8: 684 685 // STMFD sp!, {x4-x11, x14} //stack stores the values of the arguments 686 push_v_regs 687 stp x19, x20, [sp, #-16]! 688 689 mov x4, x0 690 sub x6, x1, #16 691 sub x0, x0, #2 692loop_32_r_c: // /*hard coded for width=32 ,height =8,4*/ 693 ldrh w8, [x0] 694 add x0, x0, x1 695 sxtw x8, w8 696 ldrh w9, [x0] 697 add x0, x0, x1 698 sxtw x9, w9 699 dup v0.8h, w8 700 ldrh w10, [x0] 701 add x0, x0, x1 702 sxtw x10, w10 703 st1 {v0.16b}, [x4], #16 // 16 bytes store 704 dup v2.8h, w9 705 st1 {v0.16b}, [x4], x6 706 st1 {v2.16b}, [x4], #16 // 16 bytes store 707 dup v4.8h, w10 708 st1 {v2.16b}, [x4], x6 // 16 bytes store 709 subs x2, x2, #4 710 ldrh w11, [x0] 711 add x0, x0, x1 712 sxtw x11, w11 713 st1 {v4.16b}, [x4], #16 // 16 bytes store 714 dup v6.8h, w11 715 st1 {v4.16b}, [x4], x6 // 16 bytes store 716 st1 {v6.16b}, [x4], #16 // 16 bytes store 717 st1 {v6.16b}, [x4], x6 // 16 bytes store 718 719 beq end_func_r_c ///* Branching when ht=4*/ 720 721 ldrh w8, [x0] 722 add x0, x0, x1 723 sxtw x8, w8 724 dup v0.8h, w8 725 ldrh w9, [x0] 726 add x0, x0, x1 727 sxtw x9, w9 728 ldrh w10, [x0] 729 add x0, x0, x1 730 sxtw x10, w10 731 st1 {v0.16b}, [x4], #16 // 16 bytes store 732 dup v2.8h, w9 733 st1 {v0.16b}, [x4], x6 // 16 bytes store 734 ldrh w11, [x0] 735 add x0, x0, x1 736 sxtw x11, w11 737 st1 {v2.16b}, [x4], #16 // 16 bytes store 738 dup v4.8h, w10 739 st1 {v2.16b}, [x4], x6 // 16 bytes store 740 st1 {v4.16b}, [x4], #16 // 16 bytes store 741 dup v6.8h, w11 742 st1 {v4.16b}, [x4], x6 // 16 bytes store 743 subs x2, x2, #4 744 st1 {v6.16b}, [x4], #16 // 16 bytes store 745 st1 {v6.16b}, [x4], x6 // 16 bytes store 746 747 beq end_func_r_c ///* Branching when ht=8*/ 748 bne loop_32_r_c 749 ldrh w8, [x0] 750 add x0, x0, x1 751 sxtw x8, w8 752 dup v0.8h, w8 753 ldrh w9, [x0] 754 add x0, x0, x1 755 sxtw x9, w9 756 ldrh w10, [x0] 757 add x0, x0, x1 758 sxtw x10, w10 759 st1 {v0.16b}, [x4], #16 // 16 bytes store 760 dup v2.8h, w9 761 st1 {v0.16b}, [x4], x6 // 16 bytes store 762 ldrh w11, [x0] 763 add x0, x0, x1 764 sxtw x11, w11 765 st1 {v2.16b}, [x4], #16 // 16 bytes store 766 dup v4.8h, w10 767 st1 {v2.16b}, [x4], x6 // 16 bytes store 768 st1 {v4.16b}, [x4], #16 // 16 bytes store 769 dup v6.8h, w11 770 st1 {v4.16b}, [x4], x6 // 16 bytes store 771 st1 {v6.16b}, [x4], #16 // 16 bytes store 772 st1 {v6.16b}, [x4], x6 // 16 bytes store 773 774end_func_r_c: 775 // LDMFD sp!,{x4-x11,pc} //Reload the registers from SP 776 ldp x19, x20, [sp], #16 777 pop_v_regs 778 ret 779 780 781 782 783 784 785