sse2-memset-atom.S revision 81d6a18c69b71288c0ab0f65e0ee594f752febc8
1/* 2Copyright (c) 2010, Intel Corporation 3All rights reserved. 4 5Redistribution and use in source and binary forms, with or without 6modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 11 * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 15 * Neither the name of Intel Corporation nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29*/ 30 31#include <private/bionic_asm.h> 32 33#include "cache.h" 34 35#ifndef L 36# define L(label) .L##label 37#endif 38 39#ifndef ALIGN 40# define ALIGN(n) .p2align n 41#endif 42 43#ifndef cfi_rel_offset 44# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off 45#endif 46 47#ifndef cfi_restore 48# define cfi_restore(reg) .cfi_restore reg 49#endif 50 51#ifndef cfi_adjust_cfa_offset 52# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off 53#endif 54 55#define CFI_PUSH(REG) \ 56 cfi_adjust_cfa_offset (4); \ 57 cfi_rel_offset (REG, 0) 58 59#define CFI_POP(REG) \ 60 cfi_adjust_cfa_offset (-4); \ 61 cfi_restore (REG) 62 63#define PUSH(REG) pushl REG; CFI_PUSH (REG) 64#define POP(REG) popl REG; CFI_POP (REG) 65 66#define DST PARMS 67#define CHR DST+4 68#define LEN CHR+4 69#define CHK_DST_LEN (LEN+4) 70#define SETRTNVAL movl DST(%esp), %eax 71 72#if (defined SHARED || defined __PIC__) 73# define ENTRANCE PUSH (%ebx); 74# define RETURN_END POP (%ebx); ret 75# define RETURN RETURN_END; CFI_PUSH (%ebx) 76# define PARMS 8 /* Preserve EBX. */ 77# define JMPTBL(I, B) I - B 78 79/* Load an entry in a jump table into EBX and branch to it. TABLE is a 80 jump table with relative offsets. */ 81# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ 82 /* We first load PC into EBX. */ \ 83 call __x86.get_pc_thunk.bx; \ 84 /* Get the address of the jump table. */ \ 85 add $(TABLE - .), %ebx; \ 86 /* Get the entry and convert the relative offset to the \ 87 absolute address. */ \ 88 add (%ebx,%ecx,4), %ebx; \ 89 add %ecx, %edx; \ 90 /* We loaded the jump table and adjuested EDX. Go. */ \ 91 jmp *%ebx 92 93 .section .gnu.linkonce.t.__x86.get_pc_thunk.bx,"ax",@progbits 94 .globl __x86.get_pc_thunk.bx 95 .hidden __x86.get_pc_thunk.bx 96 ALIGN (4) 97 .type __x86.get_pc_thunk.bx,@function 98__x86.get_pc_thunk.bx: 99 movl (%esp), %ebx 100 ret 101#else 102# define ENTRANCE 103# define RETURN_END ret 104# define RETURN RETURN_END 105# define PARMS 4 106# define JMPTBL(I, B) I 107 108/* Branch to an entry in a jump table. TABLE is a jump table with 109 absolute offsets. */ 110# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ 111 add %ecx, %edx; \ 112 jmp *TABLE(,%ecx,4) 113#endif 114 115ENTRY(__memset_chk) 116 movl LEN(%esp), %ecx 117 cmpl %ecx, CHK_DST_LEN(%esp) 118 jbe memset 119 120 jmp __memset_chk_fail 121END(__memset_chk) 122 123 .section .text.sse2,"ax",@progbits 124 ALIGN (4) 125ENTRY (memset) 126 ENTRANCE 127 128 movl LEN(%esp), %ecx 129 movzbl CHR(%esp), %eax 130 movb %al, %ah 131 /* Fill the whole EAX with pattern. */ 132 movl %eax, %edx 133 shl $16, %eax 134 or %edx, %eax 135 movl DST(%esp), %edx 136 cmp $32, %ecx 137 jae L(32bytesormore) 138 139L(write_less32bytes): 140 BRANCH_TO_JMPTBL_ENTRY (L(table_less_32bytes)) 141 142 143 .pushsection .rodata.sse2,"a",@progbits 144 ALIGN (2) 145L(table_less_32bytes): 146 .int JMPTBL (L(write_0bytes), L(table_less_32bytes)) 147 .int JMPTBL (L(write_1bytes), L(table_less_32bytes)) 148 .int JMPTBL (L(write_2bytes), L(table_less_32bytes)) 149 .int JMPTBL (L(write_3bytes), L(table_less_32bytes)) 150 .int JMPTBL (L(write_4bytes), L(table_less_32bytes)) 151 .int JMPTBL (L(write_5bytes), L(table_less_32bytes)) 152 .int JMPTBL (L(write_6bytes), L(table_less_32bytes)) 153 .int JMPTBL (L(write_7bytes), L(table_less_32bytes)) 154 .int JMPTBL (L(write_8bytes), L(table_less_32bytes)) 155 .int JMPTBL (L(write_9bytes), L(table_less_32bytes)) 156 .int JMPTBL (L(write_10bytes), L(table_less_32bytes)) 157 .int JMPTBL (L(write_11bytes), L(table_less_32bytes)) 158 .int JMPTBL (L(write_12bytes), L(table_less_32bytes)) 159 .int JMPTBL (L(write_13bytes), L(table_less_32bytes)) 160 .int JMPTBL (L(write_14bytes), L(table_less_32bytes)) 161 .int JMPTBL (L(write_15bytes), L(table_less_32bytes)) 162 .int JMPTBL (L(write_16bytes), L(table_less_32bytes)) 163 .int JMPTBL (L(write_17bytes), L(table_less_32bytes)) 164 .int JMPTBL (L(write_18bytes), L(table_less_32bytes)) 165 .int JMPTBL (L(write_19bytes), L(table_less_32bytes)) 166 .int JMPTBL (L(write_20bytes), L(table_less_32bytes)) 167 .int JMPTBL (L(write_21bytes), L(table_less_32bytes)) 168 .int JMPTBL (L(write_22bytes), L(table_less_32bytes)) 169 .int JMPTBL (L(write_23bytes), L(table_less_32bytes)) 170 .int JMPTBL (L(write_24bytes), L(table_less_32bytes)) 171 .int JMPTBL (L(write_25bytes), L(table_less_32bytes)) 172 .int JMPTBL (L(write_26bytes), L(table_less_32bytes)) 173 .int JMPTBL (L(write_27bytes), L(table_less_32bytes)) 174 .int JMPTBL (L(write_28bytes), L(table_less_32bytes)) 175 .int JMPTBL (L(write_29bytes), L(table_less_32bytes)) 176 .int JMPTBL (L(write_30bytes), L(table_less_32bytes)) 177 .int JMPTBL (L(write_31bytes), L(table_less_32bytes)) 178 .popsection 179 180 ALIGN (4) 181L(write_28bytes): 182 movl %eax, -28(%edx) 183L(write_24bytes): 184 movl %eax, -24(%edx) 185L(write_20bytes): 186 movl %eax, -20(%edx) 187L(write_16bytes): 188 movl %eax, -16(%edx) 189L(write_12bytes): 190 movl %eax, -12(%edx) 191L(write_8bytes): 192 movl %eax, -8(%edx) 193L(write_4bytes): 194 movl %eax, -4(%edx) 195L(write_0bytes): 196 SETRTNVAL 197 RETURN 198 199 ALIGN (4) 200L(write_29bytes): 201 movl %eax, -29(%edx) 202L(write_25bytes): 203 movl %eax, -25(%edx) 204L(write_21bytes): 205 movl %eax, -21(%edx) 206L(write_17bytes): 207 movl %eax, -17(%edx) 208L(write_13bytes): 209 movl %eax, -13(%edx) 210L(write_9bytes): 211 movl %eax, -9(%edx) 212L(write_5bytes): 213 movl %eax, -5(%edx) 214L(write_1bytes): 215 movb %al, -1(%edx) 216 SETRTNVAL 217 RETURN 218 219 ALIGN (4) 220L(write_30bytes): 221 movl %eax, -30(%edx) 222L(write_26bytes): 223 movl %eax, -26(%edx) 224L(write_22bytes): 225 movl %eax, -22(%edx) 226L(write_18bytes): 227 movl %eax, -18(%edx) 228L(write_14bytes): 229 movl %eax, -14(%edx) 230L(write_10bytes): 231 movl %eax, -10(%edx) 232L(write_6bytes): 233 movl %eax, -6(%edx) 234L(write_2bytes): 235 movw %ax, -2(%edx) 236 SETRTNVAL 237 RETURN 238 239 ALIGN (4) 240L(write_31bytes): 241 movl %eax, -31(%edx) 242L(write_27bytes): 243 movl %eax, -27(%edx) 244L(write_23bytes): 245 movl %eax, -23(%edx) 246L(write_19bytes): 247 movl %eax, -19(%edx) 248L(write_15bytes): 249 movl %eax, -15(%edx) 250L(write_11bytes): 251 movl %eax, -11(%edx) 252L(write_7bytes): 253 movl %eax, -7(%edx) 254L(write_3bytes): 255 movw %ax, -3(%edx) 256 movb %al, -1(%edx) 257 SETRTNVAL 258 RETURN 259 260 ALIGN (4) 261/* ECX > 32 and EDX is 4 byte aligned. */ 262L(32bytesormore): 263 /* Fill xmm0 with the pattern. */ 264 movd %eax, %xmm0 265 pshufd $0, %xmm0, %xmm0 266 testl $0xf, %edx 267 jz L(aligned_16) 268/* ECX > 32 and EDX is not 16 byte aligned. */ 269L(not_aligned_16): 270 movdqu %xmm0, (%edx) 271 movl %edx, %eax 272 and $-16, %edx 273 add $16, %edx 274 sub %edx, %eax 275 add %eax, %ecx 276 movd %xmm0, %eax 277 278 ALIGN (4) 279L(aligned_16): 280 cmp $128, %ecx 281 jae L(128bytesormore) 282 283L(aligned_16_less128bytes): 284 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 285 286 ALIGN (4) 287L(128bytesormore): 288#ifdef SHARED_CACHE_SIZE 289 PUSH (%ebx) 290 mov $SHARED_CACHE_SIZE, %ebx 291#else 292# if (defined SHARED || defined __PIC__) 293 call __x86.get_pc_thunk.bx 294 add $_GLOBAL_OFFSET_TABLE_, %ebx 295 mov __x86_shared_cache_size@GOTOFF(%ebx), %ebx 296# else 297 PUSH (%ebx) 298 mov __x86_shared_cache_size, %ebx 299# endif 300#endif 301 cmp %ebx, %ecx 302 jae L(128bytesormore_nt_start) 303 304 305#ifdef DATA_CACHE_SIZE 306 POP (%ebx) 307# define RESTORE_EBX_STATE CFI_PUSH (%ebx) 308 cmp $DATA_CACHE_SIZE, %ecx 309#else 310# if (defined SHARED || defined __PIC__) 311# define RESTORE_EBX_STATE 312 call __x86.get_pc_thunk.bx 313 add $_GLOBAL_OFFSET_TABLE_, %ebx 314 cmp __x86_data_cache_size@GOTOFF(%ebx), %ecx 315# else 316 POP (%ebx) 317# define RESTORE_EBX_STATE CFI_PUSH (%ebx) 318 cmp __x86_data_cache_size, %ecx 319# endif 320#endif 321 322 jae L(128bytes_L2_normal) 323 subl $128, %ecx 324L(128bytesormore_normal): 325 sub $128, %ecx 326 movdqa %xmm0, (%edx) 327 movdqa %xmm0, 0x10(%edx) 328 movdqa %xmm0, 0x20(%edx) 329 movdqa %xmm0, 0x30(%edx) 330 movdqa %xmm0, 0x40(%edx) 331 movdqa %xmm0, 0x50(%edx) 332 movdqa %xmm0, 0x60(%edx) 333 movdqa %xmm0, 0x70(%edx) 334 lea 128(%edx), %edx 335 jb L(128bytesless_normal) 336 337 338 sub $128, %ecx 339 movdqa %xmm0, (%edx) 340 movdqa %xmm0, 0x10(%edx) 341 movdqa %xmm0, 0x20(%edx) 342 movdqa %xmm0, 0x30(%edx) 343 movdqa %xmm0, 0x40(%edx) 344 movdqa %xmm0, 0x50(%edx) 345 movdqa %xmm0, 0x60(%edx) 346 movdqa %xmm0, 0x70(%edx) 347 lea 128(%edx), %edx 348 jae L(128bytesormore_normal) 349 350L(128bytesless_normal): 351 add $128, %ecx 352 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 353 354 ALIGN (4) 355L(128bytes_L2_normal): 356 prefetcht0 0x380(%edx) 357 prefetcht0 0x3c0(%edx) 358 sub $128, %ecx 359 movdqa %xmm0, (%edx) 360 movaps %xmm0, 0x10(%edx) 361 movaps %xmm0, 0x20(%edx) 362 movaps %xmm0, 0x30(%edx) 363 movaps %xmm0, 0x40(%edx) 364 movaps %xmm0, 0x50(%edx) 365 movaps %xmm0, 0x60(%edx) 366 movaps %xmm0, 0x70(%edx) 367 add $128, %edx 368 cmp $128, %ecx 369 jae L(128bytes_L2_normal) 370 371L(128bytesless_L2_normal): 372 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 373 374 RESTORE_EBX_STATE 375L(128bytesormore_nt_start): 376 sub %ebx, %ecx 377 mov %ebx, %eax 378 and $0x7f, %eax 379 add %eax, %ecx 380 movd %xmm0, %eax 381 ALIGN (4) 382L(128bytesormore_shared_cache_loop): 383 prefetcht0 0x3c0(%edx) 384 prefetcht0 0x380(%edx) 385 sub $0x80, %ebx 386 movdqa %xmm0, (%edx) 387 movdqa %xmm0, 0x10(%edx) 388 movdqa %xmm0, 0x20(%edx) 389 movdqa %xmm0, 0x30(%edx) 390 movdqa %xmm0, 0x40(%edx) 391 movdqa %xmm0, 0x50(%edx) 392 movdqa %xmm0, 0x60(%edx) 393 movdqa %xmm0, 0x70(%edx) 394 add $0x80, %edx 395 cmp $0x80, %ebx 396 jae L(128bytesormore_shared_cache_loop) 397 cmp $0x80, %ecx 398 jb L(shared_cache_loop_end) 399 ALIGN (4) 400L(128bytesormore_nt): 401 sub $0x80, %ecx 402 movntdq %xmm0, (%edx) 403 movntdq %xmm0, 0x10(%edx) 404 movntdq %xmm0, 0x20(%edx) 405 movntdq %xmm0, 0x30(%edx) 406 movntdq %xmm0, 0x40(%edx) 407 movntdq %xmm0, 0x50(%edx) 408 movntdq %xmm0, 0x60(%edx) 409 movntdq %xmm0, 0x70(%edx) 410 add $0x80, %edx 411 cmp $0x80, %ecx 412 jae L(128bytesormore_nt) 413 sfence 414L(shared_cache_loop_end): 415#if defined DATA_CACHE_SIZE || !(defined SHARED || defined __PIC__) 416 POP (%ebx) 417#endif 418 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 419 420 421 .pushsection .rodata.sse2,"a",@progbits 422 ALIGN (2) 423L(table_16_128bytes): 424 .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes)) 425 .int JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes)) 426 .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes)) 427 .int JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes)) 428 .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes)) 429 .int JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes)) 430 .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes)) 431 .int JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes)) 432 .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes)) 433 .int JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes)) 434 .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes)) 435 .int JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes)) 436 .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes)) 437 .int JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes)) 438 .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes)) 439 .int JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes)) 440 .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes)) 441 .int JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes)) 442 .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes)) 443 .int JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes)) 444 .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes)) 445 .int JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes)) 446 .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes)) 447 .int JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes)) 448 .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes)) 449 .int JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes)) 450 .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes)) 451 .int JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes)) 452 .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes)) 453 .int JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes)) 454 .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes)) 455 .int JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes)) 456 .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes)) 457 .int JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes)) 458 .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes)) 459 .int JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes)) 460 .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes)) 461 .int JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes)) 462 .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes)) 463 .int JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes)) 464 .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes)) 465 .int JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes)) 466 .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes)) 467 .int JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes)) 468 .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes)) 469 .int JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes)) 470 .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes)) 471 .int JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes)) 472 .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes)) 473 .int JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes)) 474 .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes)) 475 .int JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes)) 476 .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes)) 477 .int JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes)) 478 .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes)) 479 .int JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes)) 480 .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes)) 481 .int JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes)) 482 .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes)) 483 .int JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes)) 484 .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes)) 485 .int JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes)) 486 .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes)) 487 .int JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes)) 488 .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes)) 489 .int JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes)) 490 .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes)) 491 .int JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes)) 492 .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes)) 493 .int JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes)) 494 .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes)) 495 .int JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes)) 496 .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes)) 497 .int JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes)) 498 .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes)) 499 .int JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes)) 500 .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes)) 501 .int JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes)) 502 .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes)) 503 .int JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes)) 504 .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes)) 505 .int JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes)) 506 .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes)) 507 .int JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes)) 508 .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes)) 509 .int JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes)) 510 .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes)) 511 .int JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes)) 512 .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes)) 513 .int JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes)) 514 .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes)) 515 .int JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes)) 516 .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes)) 517 .int JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes)) 518 .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes)) 519 .int JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes)) 520 .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes)) 521 .int JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes)) 522 .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes)) 523 .int JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes)) 524 .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes)) 525 .int JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes)) 526 .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes)) 527 .int JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes)) 528 .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes)) 529 .int JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes)) 530 .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes)) 531 .int JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes)) 532 .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes)) 533 .int JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes)) 534 .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes)) 535 .int JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes)) 536 .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes)) 537 .int JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes)) 538 .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes)) 539 .int JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes)) 540 .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes)) 541 .int JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes)) 542 .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes)) 543 .int JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes)) 544 .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes)) 545 .int JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes)) 546 .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes)) 547 .int JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes)) 548 .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes)) 549 .int JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes)) 550 .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes)) 551 .int JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes)) 552 .popsection 553 554 ALIGN (4) 555L(aligned_16_112bytes): 556 movdqa %xmm0, -112(%edx) 557L(aligned_16_96bytes): 558 movdqa %xmm0, -96(%edx) 559L(aligned_16_80bytes): 560 movdqa %xmm0, -80(%edx) 561L(aligned_16_64bytes): 562 movdqa %xmm0, -64(%edx) 563L(aligned_16_48bytes): 564 movdqa %xmm0, -48(%edx) 565L(aligned_16_32bytes): 566 movdqa %xmm0, -32(%edx) 567L(aligned_16_16bytes): 568 movdqa %xmm0, -16(%edx) 569L(aligned_16_0bytes): 570 SETRTNVAL 571 RETURN 572 573 ALIGN (4) 574L(aligned_16_113bytes): 575 movdqa %xmm0, -113(%edx) 576L(aligned_16_97bytes): 577 movdqa %xmm0, -97(%edx) 578L(aligned_16_81bytes): 579 movdqa %xmm0, -81(%edx) 580L(aligned_16_65bytes): 581 movdqa %xmm0, -65(%edx) 582L(aligned_16_49bytes): 583 movdqa %xmm0, -49(%edx) 584L(aligned_16_33bytes): 585 movdqa %xmm0, -33(%edx) 586L(aligned_16_17bytes): 587 movdqa %xmm0, -17(%edx) 588L(aligned_16_1bytes): 589 movb %al, -1(%edx) 590 SETRTNVAL 591 RETURN 592 593 ALIGN (4) 594L(aligned_16_114bytes): 595 movdqa %xmm0, -114(%edx) 596L(aligned_16_98bytes): 597 movdqa %xmm0, -98(%edx) 598L(aligned_16_82bytes): 599 movdqa %xmm0, -82(%edx) 600L(aligned_16_66bytes): 601 movdqa %xmm0, -66(%edx) 602L(aligned_16_50bytes): 603 movdqa %xmm0, -50(%edx) 604L(aligned_16_34bytes): 605 movdqa %xmm0, -34(%edx) 606L(aligned_16_18bytes): 607 movdqa %xmm0, -18(%edx) 608L(aligned_16_2bytes): 609 movw %ax, -2(%edx) 610 SETRTNVAL 611 RETURN 612 613 ALIGN (4) 614L(aligned_16_115bytes): 615 movdqa %xmm0, -115(%edx) 616L(aligned_16_99bytes): 617 movdqa %xmm0, -99(%edx) 618L(aligned_16_83bytes): 619 movdqa %xmm0, -83(%edx) 620L(aligned_16_67bytes): 621 movdqa %xmm0, -67(%edx) 622L(aligned_16_51bytes): 623 movdqa %xmm0, -51(%edx) 624L(aligned_16_35bytes): 625 movdqa %xmm0, -35(%edx) 626L(aligned_16_19bytes): 627 movdqa %xmm0, -19(%edx) 628L(aligned_16_3bytes): 629 movw %ax, -3(%edx) 630 movb %al, -1(%edx) 631 SETRTNVAL 632 RETURN 633 634 ALIGN (4) 635L(aligned_16_116bytes): 636 movdqa %xmm0, -116(%edx) 637L(aligned_16_100bytes): 638 movdqa %xmm0, -100(%edx) 639L(aligned_16_84bytes): 640 movdqa %xmm0, -84(%edx) 641L(aligned_16_68bytes): 642 movdqa %xmm0, -68(%edx) 643L(aligned_16_52bytes): 644 movdqa %xmm0, -52(%edx) 645L(aligned_16_36bytes): 646 movdqa %xmm0, -36(%edx) 647L(aligned_16_20bytes): 648 movdqa %xmm0, -20(%edx) 649L(aligned_16_4bytes): 650 movl %eax, -4(%edx) 651 SETRTNVAL 652 RETURN 653 654 ALIGN (4) 655L(aligned_16_117bytes): 656 movdqa %xmm0, -117(%edx) 657L(aligned_16_101bytes): 658 movdqa %xmm0, -101(%edx) 659L(aligned_16_85bytes): 660 movdqa %xmm0, -85(%edx) 661L(aligned_16_69bytes): 662 movdqa %xmm0, -69(%edx) 663L(aligned_16_53bytes): 664 movdqa %xmm0, -53(%edx) 665L(aligned_16_37bytes): 666 movdqa %xmm0, -37(%edx) 667L(aligned_16_21bytes): 668 movdqa %xmm0, -21(%edx) 669L(aligned_16_5bytes): 670 movl %eax, -5(%edx) 671 movb %al, -1(%edx) 672 SETRTNVAL 673 RETURN 674 675 ALIGN (4) 676L(aligned_16_118bytes): 677 movdqa %xmm0, -118(%edx) 678L(aligned_16_102bytes): 679 movdqa %xmm0, -102(%edx) 680L(aligned_16_86bytes): 681 movdqa %xmm0, -86(%edx) 682L(aligned_16_70bytes): 683 movdqa %xmm0, -70(%edx) 684L(aligned_16_54bytes): 685 movdqa %xmm0, -54(%edx) 686L(aligned_16_38bytes): 687 movdqa %xmm0, -38(%edx) 688L(aligned_16_22bytes): 689 movdqa %xmm0, -22(%edx) 690L(aligned_16_6bytes): 691 movl %eax, -6(%edx) 692 movw %ax, -2(%edx) 693 SETRTNVAL 694 RETURN 695 696 ALIGN (4) 697L(aligned_16_119bytes): 698 movdqa %xmm0, -119(%edx) 699L(aligned_16_103bytes): 700 movdqa %xmm0, -103(%edx) 701L(aligned_16_87bytes): 702 movdqa %xmm0, -87(%edx) 703L(aligned_16_71bytes): 704 movdqa %xmm0, -71(%edx) 705L(aligned_16_55bytes): 706 movdqa %xmm0, -55(%edx) 707L(aligned_16_39bytes): 708 movdqa %xmm0, -39(%edx) 709L(aligned_16_23bytes): 710 movdqa %xmm0, -23(%edx) 711L(aligned_16_7bytes): 712 movl %eax, -7(%edx) 713 movw %ax, -3(%edx) 714 movb %al, -1(%edx) 715 SETRTNVAL 716 RETURN 717 718 ALIGN (4) 719L(aligned_16_120bytes): 720 movdqa %xmm0, -120(%edx) 721L(aligned_16_104bytes): 722 movdqa %xmm0, -104(%edx) 723L(aligned_16_88bytes): 724 movdqa %xmm0, -88(%edx) 725L(aligned_16_72bytes): 726 movdqa %xmm0, -72(%edx) 727L(aligned_16_56bytes): 728 movdqa %xmm0, -56(%edx) 729L(aligned_16_40bytes): 730 movdqa %xmm0, -40(%edx) 731L(aligned_16_24bytes): 732 movdqa %xmm0, -24(%edx) 733L(aligned_16_8bytes): 734 movq %xmm0, -8(%edx) 735 SETRTNVAL 736 RETURN 737 738 ALIGN (4) 739L(aligned_16_121bytes): 740 movdqa %xmm0, -121(%edx) 741L(aligned_16_105bytes): 742 movdqa %xmm0, -105(%edx) 743L(aligned_16_89bytes): 744 movdqa %xmm0, -89(%edx) 745L(aligned_16_73bytes): 746 movdqa %xmm0, -73(%edx) 747L(aligned_16_57bytes): 748 movdqa %xmm0, -57(%edx) 749L(aligned_16_41bytes): 750 movdqa %xmm0, -41(%edx) 751L(aligned_16_25bytes): 752 movdqa %xmm0, -25(%edx) 753L(aligned_16_9bytes): 754 movq %xmm0, -9(%edx) 755 movb %al, -1(%edx) 756 SETRTNVAL 757 RETURN 758 759 ALIGN (4) 760L(aligned_16_122bytes): 761 movdqa %xmm0, -122(%edx) 762L(aligned_16_106bytes): 763 movdqa %xmm0, -106(%edx) 764L(aligned_16_90bytes): 765 movdqa %xmm0, -90(%edx) 766L(aligned_16_74bytes): 767 movdqa %xmm0, -74(%edx) 768L(aligned_16_58bytes): 769 movdqa %xmm0, -58(%edx) 770L(aligned_16_42bytes): 771 movdqa %xmm0, -42(%edx) 772L(aligned_16_26bytes): 773 movdqa %xmm0, -26(%edx) 774L(aligned_16_10bytes): 775 movq %xmm0, -10(%edx) 776 movw %ax, -2(%edx) 777 SETRTNVAL 778 RETURN 779 780 ALIGN (4) 781L(aligned_16_123bytes): 782 movdqa %xmm0, -123(%edx) 783L(aligned_16_107bytes): 784 movdqa %xmm0, -107(%edx) 785L(aligned_16_91bytes): 786 movdqa %xmm0, -91(%edx) 787L(aligned_16_75bytes): 788 movdqa %xmm0, -75(%edx) 789L(aligned_16_59bytes): 790 movdqa %xmm0, -59(%edx) 791L(aligned_16_43bytes): 792 movdqa %xmm0, -43(%edx) 793L(aligned_16_27bytes): 794 movdqa %xmm0, -27(%edx) 795L(aligned_16_11bytes): 796 movq %xmm0, -11(%edx) 797 movw %ax, -3(%edx) 798 movb %al, -1(%edx) 799 SETRTNVAL 800 RETURN 801 802 ALIGN (4) 803L(aligned_16_124bytes): 804 movdqa %xmm0, -124(%edx) 805L(aligned_16_108bytes): 806 movdqa %xmm0, -108(%edx) 807L(aligned_16_92bytes): 808 movdqa %xmm0, -92(%edx) 809L(aligned_16_76bytes): 810 movdqa %xmm0, -76(%edx) 811L(aligned_16_60bytes): 812 movdqa %xmm0, -60(%edx) 813L(aligned_16_44bytes): 814 movdqa %xmm0, -44(%edx) 815L(aligned_16_28bytes): 816 movdqa %xmm0, -28(%edx) 817L(aligned_16_12bytes): 818 movq %xmm0, -12(%edx) 819 movl %eax, -4(%edx) 820 SETRTNVAL 821 RETURN 822 823 ALIGN (4) 824L(aligned_16_125bytes): 825 movdqa %xmm0, -125(%edx) 826L(aligned_16_109bytes): 827 movdqa %xmm0, -109(%edx) 828L(aligned_16_93bytes): 829 movdqa %xmm0, -93(%edx) 830L(aligned_16_77bytes): 831 movdqa %xmm0, -77(%edx) 832L(aligned_16_61bytes): 833 movdqa %xmm0, -61(%edx) 834L(aligned_16_45bytes): 835 movdqa %xmm0, -45(%edx) 836L(aligned_16_29bytes): 837 movdqa %xmm0, -29(%edx) 838L(aligned_16_13bytes): 839 movq %xmm0, -13(%edx) 840 movl %eax, -5(%edx) 841 movb %al, -1(%edx) 842 SETRTNVAL 843 RETURN 844 845 ALIGN (4) 846L(aligned_16_126bytes): 847 movdqa %xmm0, -126(%edx) 848L(aligned_16_110bytes): 849 movdqa %xmm0, -110(%edx) 850L(aligned_16_94bytes): 851 movdqa %xmm0, -94(%edx) 852L(aligned_16_78bytes): 853 movdqa %xmm0, -78(%edx) 854L(aligned_16_62bytes): 855 movdqa %xmm0, -62(%edx) 856L(aligned_16_46bytes): 857 movdqa %xmm0, -46(%edx) 858L(aligned_16_30bytes): 859 movdqa %xmm0, -30(%edx) 860L(aligned_16_14bytes): 861 movq %xmm0, -14(%edx) 862 movl %eax, -6(%edx) 863 movw %ax, -2(%edx) 864 SETRTNVAL 865 RETURN 866 867 ALIGN (4) 868L(aligned_16_127bytes): 869 movdqa %xmm0, -127(%edx) 870L(aligned_16_111bytes): 871 movdqa %xmm0, -111(%edx) 872L(aligned_16_95bytes): 873 movdqa %xmm0, -95(%edx) 874L(aligned_16_79bytes): 875 movdqa %xmm0, -79(%edx) 876L(aligned_16_63bytes): 877 movdqa %xmm0, -63(%edx) 878L(aligned_16_47bytes): 879 movdqa %xmm0, -47(%edx) 880L(aligned_16_31bytes): 881 movdqa %xmm0, -31(%edx) 882L(aligned_16_15bytes): 883 movq %xmm0, -15(%edx) 884 movl %eax, -7(%edx) 885 movw %ax, -3(%edx) 886 movb %al, -1(%edx) 887 SETRTNVAL 888 RETURN_END 889 890END (memset) 891