1#define STRLEN sse2_strlen_atom 2 3#ifndef L 4# define L(label) .L##label 5#endif 6 7#ifndef cfi_startproc 8# define cfi_startproc .cfi_startproc 9#endif 10 11#ifndef cfi_endproc 12# define cfi_endproc .cfi_endproc 13#endif 14 15#ifndef cfi_rel_offset 16# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off 17#endif 18 19#ifndef cfi_restore 20# define cfi_restore(reg) .cfi_restore reg 21#endif 22 23#ifndef cfi_adjust_cfa_offset 24# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off 25#endif 26 27#ifndef cfi_remember_state 28# define cfi_remember_state .cfi_remember_state 29#endif 30 31#ifndef cfi_restore_state 32# define cfi_restore_state .cfi_restore_state 33#endif 34 35#ifndef ENTRY 36# define ENTRY(name) \ 37 .type name, @function; \ 38 .globl name; \ 39 .p2align 4; \ 40name: \ 41 cfi_startproc 42#endif 43 44#ifndef END 45# define END(name) \ 46 cfi_endproc; \ 47 .size name, .-name 48#endif 49 50#define CFI_PUSH(REG) \ 51 cfi_adjust_cfa_offset (4); \ 52 cfi_rel_offset (REG, 0) 53 54#define CFI_POP(REG) \ 55 cfi_adjust_cfa_offset (-4); \ 56 cfi_restore (REG) 57 58#define PUSH(REG) pushl REG; CFI_PUSH (REG) 59#define POP(REG) popl REG; CFI_POP (REG) 60#define PARMS 4 61#define STR PARMS 62#define ENTRANCE 63#define RETURN ret 64 65 .text 66ENTRY (STRLEN) 67 ENTRANCE 68 mov STR(%esp), %edx 69 xor %eax, %eax 70 cmpb $0, (%edx) 71 jz L(exit_tail0) 72 cmpb $0, 1(%edx) 73 jz L(exit_tail1) 74 cmpb $0, 2(%edx) 75 jz L(exit_tail2) 76 cmpb $0, 3(%edx) 77 jz L(exit_tail3) 78 cmpb $0, 4(%edx) 79 jz L(exit_tail4) 80 cmpb $0, 5(%edx) 81 jz L(exit_tail5) 82 cmpb $0, 6(%edx) 83 jz L(exit_tail6) 84 cmpb $0, 7(%edx) 85 jz L(exit_tail7) 86 cmpb $0, 8(%edx) 87 jz L(exit_tail8) 88 cmpb $0, 9(%edx) 89 jz L(exit_tail9) 90 cmpb $0, 10(%edx) 91 jz L(exit_tail10) 92 cmpb $0, 11(%edx) 93 jz L(exit_tail11) 94 cmpb $0, 12(%edx) 95 jz L(exit_tail12) 96 cmpb $0, 13(%edx) 97 jz L(exit_tail13) 98 cmpb $0, 14(%edx) 99 jz L(exit_tail14) 100 cmpb $0, 15(%edx) 101 jz L(exit_tail15) 102 pxor %xmm0, %xmm0 103 mov %edx, %eax 104 mov %edx, %ecx 105 and $-16, %eax 106 add $16, %ecx 107 add $16, %eax 108 109 pcmpeqb (%eax), %xmm0 110 pmovmskb %xmm0, %edx 111 pxor %xmm1, %xmm1 112 test %edx, %edx 113 lea 16(%eax), %eax 114 jnz L(exit) 115 116 pcmpeqb (%eax), %xmm1 117 pmovmskb %xmm1, %edx 118 pxor %xmm2, %xmm2 119 test %edx, %edx 120 lea 16(%eax), %eax 121 jnz L(exit) 122 123 124 pcmpeqb (%eax), %xmm2 125 pmovmskb %xmm2, %edx 126 pxor %xmm3, %xmm3 127 test %edx, %edx 128 lea 16(%eax), %eax 129 jnz L(exit) 130 131 pcmpeqb (%eax), %xmm3 132 pmovmskb %xmm3, %edx 133 test %edx, %edx 134 lea 16(%eax), %eax 135 jnz L(exit) 136 137 pcmpeqb (%eax), %xmm0 138 pmovmskb %xmm0, %edx 139 test %edx, %edx 140 lea 16(%eax), %eax 141 jnz L(exit) 142 143 pcmpeqb (%eax), %xmm1 144 pmovmskb %xmm1, %edx 145 test %edx, %edx 146 lea 16(%eax), %eax 147 jnz L(exit) 148 149 pcmpeqb (%eax), %xmm2 150 pmovmskb %xmm2, %edx 151 test %edx, %edx 152 lea 16(%eax), %eax 153 jnz L(exit) 154 155 pcmpeqb (%eax), %xmm3 156 pmovmskb %xmm3, %edx 157 test %edx, %edx 158 lea 16(%eax), %eax 159 jnz L(exit) 160 161 pcmpeqb (%eax), %xmm0 162 pmovmskb %xmm0, %edx 163 test %edx, %edx 164 lea 16(%eax), %eax 165 jnz L(exit) 166 167 pcmpeqb (%eax), %xmm1 168 pmovmskb %xmm1, %edx 169 test %edx, %edx 170 lea 16(%eax), %eax 171 jnz L(exit) 172 173 pcmpeqb (%eax), %xmm2 174 pmovmskb %xmm2, %edx 175 test %edx, %edx 176 lea 16(%eax), %eax 177 jnz L(exit) 178 179 pcmpeqb (%eax), %xmm3 180 pmovmskb %xmm3, %edx 181 test %edx, %edx 182 lea 16(%eax), %eax 183 jnz L(exit) 184 185 pcmpeqb (%eax), %xmm0 186 pmovmskb %xmm0, %edx 187 test %edx, %edx 188 lea 16(%eax), %eax 189 jnz L(exit) 190 191 pcmpeqb (%eax), %xmm1 192 pmovmskb %xmm1, %edx 193 test %edx, %edx 194 lea 16(%eax), %eax 195 jnz L(exit) 196 197 pcmpeqb (%eax), %xmm2 198 pmovmskb %xmm2, %edx 199 test %edx, %edx 200 lea 16(%eax), %eax 201 jnz L(exit) 202 203 pcmpeqb (%eax), %xmm3 204 pmovmskb %xmm3, %edx 205 test %edx, %edx 206 lea 16(%eax), %eax 207 jnz L(exit) 208 209 and $-0x40, %eax 210 PUSH (%esi) 211 PUSH (%edi) 212 PUSH (%ebx) 213 PUSH (%ebp) 214 xor %ebp, %ebp 215L(aligned_64): 216 pcmpeqb (%eax), %xmm0 217 pcmpeqb 16(%eax), %xmm1 218 pcmpeqb 32(%eax), %xmm2 219 pcmpeqb 48(%eax), %xmm3 220 pmovmskb %xmm0, %edx 221 pmovmskb %xmm1, %esi 222 pmovmskb %xmm2, %edi 223 pmovmskb %xmm3, %ebx 224 or %edx, %ebp 225 or %esi, %ebp 226 or %edi, %ebp 227 or %ebx, %ebp 228 lea 64(%eax), %eax 229 jz L(aligned_64) 230L(48leave): 231 test %edx, %edx 232 jnz L(aligned_64_exit_16) 233 test %esi, %esi 234 jnz L(aligned_64_exit_32) 235 test %edi, %edi 236 jnz L(aligned_64_exit_48) 237 mov %ebx, %edx 238 lea (%eax), %eax 239 jmp L(aligned_64_exit) 240L(aligned_64_exit_48): 241 lea -16(%eax), %eax 242 mov %edi, %edx 243 jmp L(aligned_64_exit) 244L(aligned_64_exit_32): 245 lea -32(%eax), %eax 246 mov %esi, %edx 247 jmp L(aligned_64_exit) 248L(aligned_64_exit_16): 249 lea -48(%eax), %eax 250L(aligned_64_exit): 251 POP (%ebp) 252 POP (%ebx) 253 POP (%edi) 254 POP (%esi) 255L(exit): 256 sub %ecx, %eax 257 test %dl, %dl 258 jz L(exit_high) 259 test $0x01, %dl 260 jnz L(exit_tail0) 261 262 test $0x02, %dl 263 jnz L(exit_tail1) 264 265 test $0x04, %dl 266 jnz L(exit_tail2) 267 268 test $0x08, %dl 269 jnz L(exit_tail3) 270 271 test $0x10, %dl 272 jnz L(exit_tail4) 273 274 test $0x20, %dl 275 jnz L(exit_tail5) 276 277 test $0x40, %dl 278 jnz L(exit_tail6) 279 add $7, %eax 280L(exit_tail0): 281 RETURN 282 283L(exit_high): 284 add $8, %eax 285 test $0x01, %dh 286 jnz L(exit_tail0) 287 288 test $0x02, %dh 289 jnz L(exit_tail1) 290 291 test $0x04, %dh 292 jnz L(exit_tail2) 293 294 test $0x08, %dh 295 jnz L(exit_tail3) 296 297 test $0x10, %dh 298 jnz L(exit_tail4) 299 300 test $0x20, %dh 301 jnz L(exit_tail5) 302 303 test $0x40, %dh 304 jnz L(exit_tail6) 305 add $7, %eax 306 RETURN 307 308 .p2align 4 309L(exit_tail1): 310 add $1, %eax 311 RETURN 312 313L(exit_tail2): 314 add $2, %eax 315 RETURN 316 317L(exit_tail3): 318 add $3, %eax 319 RETURN 320 321L(exit_tail4): 322 add $4, %eax 323 RETURN 324 325L(exit_tail5): 326 add $5, %eax 327 RETURN 328 329L(exit_tail6): 330 add $6, %eax 331 RETURN 332 333L(exit_tail7): 334 add $7, %eax 335 RETURN 336 337L(exit_tail8): 338 add $8, %eax 339 RETURN 340 341L(exit_tail9): 342 add $9, %eax 343 RETURN 344 345L(exit_tail10): 346 add $10, %eax 347 RETURN 348 349L(exit_tail11): 350 add $11, %eax 351 RETURN 352 353L(exit_tail12): 354 add $12, %eax 355 RETURN 356 357L(exit_tail13): 358 add $13, %eax 359 RETURN 360 361L(exit_tail14): 362 add $14, %eax 363 RETURN 364 365L(exit_tail15): 366 add $15, %eax 367 ret 368 369END (STRLEN) 370