SkJumper_generated_win.S revision 3f81f3703a68755c88f5cc4a87728b98f34c4cd4
1; Copyright 2017 Google Inc. 2; 3; Use of this source code is governed by a BSD-style license that can be 4; found in the LICENSE file. 5 6; This file is generated semi-automatically with this command: 7; $ src/jumper/build_stages.py 8 9_text SEGMENT 10 11PUBLIC _sk_start_pipeline_hsw 12_sk_start_pipeline_hsw LABEL PROC 13 DB 65,87 ; push %r15 14 DB 65,86 ; push %r14 15 DB 65,85 ; push %r13 16 DB 65,84 ; push %r12 17 DB 86 ; push %rsi 18 DB 87 ; push %rdi 19 DB 83 ; push %rbx 20 DB 72,129,236,160,0,0,0 ; sub $0xa0,%rsp 21 DB 197,120,41,188,36,144,0,0,0 ; vmovaps %xmm15,0x90(%rsp) 22 DB 197,120,41,180,36,128,0,0,0 ; vmovaps %xmm14,0x80(%rsp) 23 DB 197,120,41,108,36,112 ; vmovaps %xmm13,0x70(%rsp) 24 DB 197,120,41,100,36,96 ; vmovaps %xmm12,0x60(%rsp) 25 DB 197,120,41,92,36,80 ; vmovaps %xmm11,0x50(%rsp) 26 DB 197,120,41,84,36,64 ; vmovaps %xmm10,0x40(%rsp) 27 DB 197,120,41,76,36,48 ; vmovaps %xmm9,0x30(%rsp) 28 DB 197,120,41,68,36,32 ; vmovaps %xmm8,0x20(%rsp) 29 DB 197,248,41,124,36,16 ; vmovaps %xmm7,0x10(%rsp) 30 DB 197,248,41,52,36 ; vmovaps %xmm6,(%rsp) 31 DB 77,137,207 ; mov %r9,%r15 32 DB 77,137,198 ; mov %r8,%r14 33 DB 72,137,203 ; mov %rcx,%rbx 34 DB 72,137,214 ; mov %rdx,%rsi 35 DB 72,173 ; lods %ds:(%rsi),%rax 36 DB 73,137,196 ; mov %rax,%r12 37 DB 73,137,245 ; mov %rsi,%r13 38 DB 72,141,67,8 ; lea 0x8(%rbx),%rax 39 DB 76,57,248 ; cmp %r15,%rax 40 DB 118,5 ; jbe 75 <_sk_start_pipeline_hsw+0x75> 41 DB 72,137,216 ; mov %rbx,%rax 42 DB 235,60 ; jmp b1 <_sk_start_pipeline_hsw+0xb1> 43 DB 197,252,87,192 ; vxorps %ymm0,%ymm0,%ymm0 44 DB 197,244,87,201 ; vxorps %ymm1,%ymm1,%ymm1 45 DB 197,236,87,210 ; vxorps %ymm2,%ymm2,%ymm2 46 DB 197,228,87,219 ; vxorps %ymm3,%ymm3,%ymm3 47 DB 197,220,87,228 ; vxorps %ymm4,%ymm4,%ymm4 48 DB 197,212,87,237 ; vxorps %ymm5,%ymm5,%ymm5 49 DB 197,204,87,246 ; vxorps %ymm6,%ymm6,%ymm6 50 DB 197,196,87,255 ; vxorps %ymm7,%ymm7,%ymm7 51 DB 72,137,223 ; mov %rbx,%rdi 52 DB 76,137,238 ; mov %r13,%rsi 53 DB 76,137,242 ; mov %r14,%rdx 54 DB 65,255,212 ; callq *%r12 55 DB 72,141,67,8 ; lea 0x8(%rbx),%rax 56 DB 72,131,195,16 ; add $0x10,%rbx 57 DB 76,57,251 ; cmp %r15,%rbx 58 DB 72,137,195 ; mov %rax,%rbx 59 DB 118,196 ; jbe 75 <_sk_start_pipeline_hsw+0x75> 60 DB 197,248,40,52,36 ; vmovaps (%rsp),%xmm6 61 DB 197,248,40,124,36,16 ; vmovaps 0x10(%rsp),%xmm7 62 DB 197,120,40,68,36,32 ; vmovaps 0x20(%rsp),%xmm8 63 DB 197,120,40,76,36,48 ; vmovaps 0x30(%rsp),%xmm9 64 DB 197,120,40,84,36,64 ; vmovaps 0x40(%rsp),%xmm10 65 DB 197,120,40,92,36,80 ; vmovaps 0x50(%rsp),%xmm11 66 DB 197,120,40,100,36,96 ; vmovaps 0x60(%rsp),%xmm12 67 DB 197,120,40,108,36,112 ; vmovaps 0x70(%rsp),%xmm13 68 DB 197,120,40,180,36,128,0,0,0 ; vmovaps 0x80(%rsp),%xmm14 69 DB 197,120,40,188,36,144,0,0,0 ; vmovaps 0x90(%rsp),%xmm15 70 DB 72,129,196,160,0,0,0 ; add $0xa0,%rsp 71 DB 91 ; pop %rbx 72 DB 95 ; pop %rdi 73 DB 94 ; pop %rsi 74 DB 65,92 ; pop %r12 75 DB 65,93 ; pop %r13 76 DB 65,94 ; pop %r14 77 DB 65,95 ; pop %r15 78 DB 197,248,119 ; vzeroupper 79 DB 195 ; retq 80 81PUBLIC _sk_just_return_hsw 82_sk_just_return_hsw LABEL PROC 83 DB 195 ; retq 84 85PUBLIC _sk_seed_shader_hsw 86_sk_seed_shader_hsw LABEL PROC 87 DB 72,173 ; lods %ds:(%rsi),%rax 88 DB 197,249,110,199 ; vmovd %edi,%xmm0 89 DB 196,226,125,24,192 ; vbroadcastss %xmm0,%ymm0 90 DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0 91 DB 196,226,125,24,74,4 ; vbroadcastss 0x4(%rdx),%ymm1 92 DB 197,252,88,193 ; vaddps %ymm1,%ymm0,%ymm0 93 DB 197,252,88,66,20 ; vaddps 0x14(%rdx),%ymm0,%ymm0 94 DB 196,226,125,24,16 ; vbroadcastss (%rax),%ymm2 95 DB 197,252,91,210 ; vcvtdq2ps %ymm2,%ymm2 96 DB 197,236,88,201 ; vaddps %ymm1,%ymm2,%ymm1 97 DB 196,226,125,24,18 ; vbroadcastss (%rdx),%ymm2 98 DB 72,173 ; lods %ds:(%rsi),%rax 99 DB 197,228,87,219 ; vxorps %ymm3,%ymm3,%ymm3 100 DB 197,220,87,228 ; vxorps %ymm4,%ymm4,%ymm4 101 DB 197,212,87,237 ; vxorps %ymm5,%ymm5,%ymm5 102 DB 197,204,87,246 ; vxorps %ymm6,%ymm6,%ymm6 103 DB 197,196,87,255 ; vxorps %ymm7,%ymm7,%ymm7 104 DB 255,224 ; jmpq *%rax 105 106PUBLIC _sk_constant_color_hsw 107_sk_constant_color_hsw LABEL PROC 108 DB 72,173 ; lods %ds:(%rsi),%rax 109 DB 196,226,125,24,0 ; vbroadcastss (%rax),%ymm0 110 DB 196,226,125,24,72,4 ; vbroadcastss 0x4(%rax),%ymm1 111 DB 196,226,125,24,80,8 ; vbroadcastss 0x8(%rax),%ymm2 112 DB 196,226,125,24,88,12 ; vbroadcastss 0xc(%rax),%ymm3 113 DB 72,173 ; lods %ds:(%rsi),%rax 114 DB 255,224 ; jmpq *%rax 115 116PUBLIC _sk_clear_hsw 117_sk_clear_hsw LABEL PROC 118 DB 72,173 ; lods %ds:(%rsi),%rax 119 DB 72,173 ; lods %ds:(%rsi),%rax 120 DB 197,252,87,192 ; vxorps %ymm0,%ymm0,%ymm0 121 DB 197,244,87,201 ; vxorps %ymm1,%ymm1,%ymm1 122 DB 197,236,87,210 ; vxorps %ymm2,%ymm2,%ymm2 123 DB 197,228,87,219 ; vxorps %ymm3,%ymm3,%ymm3 124 DB 255,224 ; jmpq *%rax 125 126PUBLIC _sk_plus__hsw 127_sk_plus__hsw LABEL PROC 128 DB 72,173 ; lods %ds:(%rsi),%rax 129 DB 197,252,88,196 ; vaddps %ymm4,%ymm0,%ymm0 130 DB 197,244,88,205 ; vaddps %ymm5,%ymm1,%ymm1 131 DB 197,236,88,214 ; vaddps %ymm6,%ymm2,%ymm2 132 DB 197,228,88,223 ; vaddps %ymm7,%ymm3,%ymm3 133 DB 72,173 ; lods %ds:(%rsi),%rax 134 DB 255,224 ; jmpq *%rax 135 136PUBLIC _sk_srcover_hsw 137_sk_srcover_hsw LABEL PROC 138 DB 72,173 ; lods %ds:(%rsi),%rax 139 DB 196,98,125,24,2 ; vbroadcastss (%rdx),%ymm8 140 DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8 141 DB 196,194,93,184,192 ; vfmadd231ps %ymm8,%ymm4,%ymm0 142 DB 196,194,85,184,200 ; vfmadd231ps %ymm8,%ymm5,%ymm1 143 DB 196,194,77,184,208 ; vfmadd231ps %ymm8,%ymm6,%ymm2 144 DB 196,194,69,184,216 ; vfmadd231ps %ymm8,%ymm7,%ymm3 145 DB 72,173 ; lods %ds:(%rsi),%rax 146 DB 255,224 ; jmpq *%rax 147 148PUBLIC _sk_dstover_hsw 149_sk_dstover_hsw LABEL PROC 150 DB 72,173 ; lods %ds:(%rsi),%rax 151 DB 196,98,125,24,2 ; vbroadcastss (%rdx),%ymm8 152 DB 197,60,92,199 ; vsubps %ymm7,%ymm8,%ymm8 153 DB 196,226,61,168,196 ; vfmadd213ps %ymm4,%ymm8,%ymm0 154 DB 196,226,61,168,205 ; vfmadd213ps %ymm5,%ymm8,%ymm1 155 DB 196,226,61,168,214 ; vfmadd213ps %ymm6,%ymm8,%ymm2 156 DB 196,226,61,168,223 ; vfmadd213ps %ymm7,%ymm8,%ymm3 157 DB 72,173 ; lods %ds:(%rsi),%rax 158 DB 255,224 ; jmpq *%rax 159 160PUBLIC _sk_clamp_0_hsw 161_sk_clamp_0_hsw LABEL PROC 162 DB 72,173 ; lods %ds:(%rsi),%rax 163 DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8 164 DB 196,193,124,95,192 ; vmaxps %ymm8,%ymm0,%ymm0 165 DB 196,193,116,95,200 ; vmaxps %ymm8,%ymm1,%ymm1 166 DB 196,193,108,95,208 ; vmaxps %ymm8,%ymm2,%ymm2 167 DB 196,193,100,95,216 ; vmaxps %ymm8,%ymm3,%ymm3 168 DB 72,173 ; lods %ds:(%rsi),%rax 169 DB 255,224 ; jmpq *%rax 170 171PUBLIC _sk_clamp_1_hsw 172_sk_clamp_1_hsw LABEL PROC 173 DB 72,173 ; lods %ds:(%rsi),%rax 174 DB 196,98,125,24,2 ; vbroadcastss (%rdx),%ymm8 175 DB 196,193,124,93,192 ; vminps %ymm8,%ymm0,%ymm0 176 DB 196,193,116,93,200 ; vminps %ymm8,%ymm1,%ymm1 177 DB 196,193,108,93,208 ; vminps %ymm8,%ymm2,%ymm2 178 DB 196,193,100,93,216 ; vminps %ymm8,%ymm3,%ymm3 179 DB 72,173 ; lods %ds:(%rsi),%rax 180 DB 255,224 ; jmpq *%rax 181 182PUBLIC _sk_clamp_a_hsw 183_sk_clamp_a_hsw LABEL PROC 184 DB 72,173 ; lods %ds:(%rsi),%rax 185 DB 196,98,125,24,2 ; vbroadcastss (%rdx),%ymm8 186 DB 196,193,100,93,216 ; vminps %ymm8,%ymm3,%ymm3 187 DB 197,252,93,195 ; vminps %ymm3,%ymm0,%ymm0 188 DB 197,244,93,203 ; vminps %ymm3,%ymm1,%ymm1 189 DB 197,236,93,211 ; vminps %ymm3,%ymm2,%ymm2 190 DB 72,173 ; lods %ds:(%rsi),%rax 191 DB 255,224 ; jmpq *%rax 192 193PUBLIC _sk_set_rgb_hsw 194_sk_set_rgb_hsw LABEL PROC 195 DB 72,173 ; lods %ds:(%rsi),%rax 196 DB 196,226,125,24,0 ; vbroadcastss (%rax),%ymm0 197 DB 196,226,125,24,72,4 ; vbroadcastss 0x4(%rax),%ymm1 198 DB 196,226,125,24,80,8 ; vbroadcastss 0x8(%rax),%ymm2 199 DB 72,173 ; lods %ds:(%rsi),%rax 200 DB 255,224 ; jmpq *%rax 201 202PUBLIC _sk_swap_rb_hsw 203_sk_swap_rb_hsw LABEL PROC 204 DB 197,124,40,192 ; vmovaps %ymm0,%ymm8 205 DB 72,173 ; lods %ds:(%rsi),%rax 206 DB 72,173 ; lods %ds:(%rsi),%rax 207 DB 197,252,40,194 ; vmovaps %ymm2,%ymm0 208 DB 197,124,41,194 ; vmovaps %ymm8,%ymm2 209 DB 255,224 ; jmpq *%rax 210 211PUBLIC _sk_swap_hsw 212_sk_swap_hsw LABEL PROC 213 DB 197,124,40,195 ; vmovaps %ymm3,%ymm8 214 DB 197,124,40,202 ; vmovaps %ymm2,%ymm9 215 DB 197,124,40,209 ; vmovaps %ymm1,%ymm10 216 DB 197,124,40,216 ; vmovaps %ymm0,%ymm11 217 DB 72,173 ; lods %ds:(%rsi),%rax 218 DB 72,173 ; lods %ds:(%rsi),%rax 219 DB 197,252,40,196 ; vmovaps %ymm4,%ymm0 220 DB 197,252,40,205 ; vmovaps %ymm5,%ymm1 221 DB 197,252,40,214 ; vmovaps %ymm6,%ymm2 222 DB 197,252,40,223 ; vmovaps %ymm7,%ymm3 223 DB 197,124,41,220 ; vmovaps %ymm11,%ymm4 224 DB 197,124,41,213 ; vmovaps %ymm10,%ymm5 225 DB 197,124,41,206 ; vmovaps %ymm9,%ymm6 226 DB 197,124,41,199 ; vmovaps %ymm8,%ymm7 227 DB 255,224 ; jmpq *%rax 228 229PUBLIC _sk_move_src_dst_hsw 230_sk_move_src_dst_hsw LABEL PROC 231 DB 72,173 ; lods %ds:(%rsi),%rax 232 DB 72,173 ; lods %ds:(%rsi),%rax 233 DB 197,252,40,224 ; vmovaps %ymm0,%ymm4 234 DB 197,252,40,233 ; vmovaps %ymm1,%ymm5 235 DB 197,252,40,242 ; vmovaps %ymm2,%ymm6 236 DB 197,252,40,251 ; vmovaps %ymm3,%ymm7 237 DB 255,224 ; jmpq *%rax 238 239PUBLIC _sk_move_dst_src_hsw 240_sk_move_dst_src_hsw LABEL PROC 241 DB 72,173 ; lods %ds:(%rsi),%rax 242 DB 72,173 ; lods %ds:(%rsi),%rax 243 DB 197,252,40,196 ; vmovaps %ymm4,%ymm0 244 DB 197,252,40,205 ; vmovaps %ymm5,%ymm1 245 DB 197,252,40,214 ; vmovaps %ymm6,%ymm2 246 DB 197,252,40,223 ; vmovaps %ymm7,%ymm3 247 DB 255,224 ; jmpq *%rax 248 249PUBLIC _sk_premul_hsw 250_sk_premul_hsw LABEL PROC 251 DB 72,173 ; lods %ds:(%rsi),%rax 252 DB 197,252,89,195 ; vmulps %ymm3,%ymm0,%ymm0 253 DB 197,244,89,203 ; vmulps %ymm3,%ymm1,%ymm1 254 DB 197,236,89,211 ; vmulps %ymm3,%ymm2,%ymm2 255 DB 72,173 ; lods %ds:(%rsi),%rax 256 DB 255,224 ; jmpq *%rax 257 258PUBLIC _sk_unpremul_hsw 259_sk_unpremul_hsw LABEL PROC 260 DB 72,173 ; lods %ds:(%rsi),%rax 261 DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8 262 DB 196,65,100,194,200,0 ; vcmpeqps %ymm8,%ymm3,%ymm9 263 DB 196,98,125,24,18 ; vbroadcastss (%rdx),%ymm10 264 DB 197,44,94,211 ; vdivps %ymm3,%ymm10,%ymm10 265 DB 196,67,45,74,192,144 ; vblendvps %ymm9,%ymm8,%ymm10,%ymm8 266 DB 197,188,89,192 ; vmulps %ymm0,%ymm8,%ymm0 267 DB 197,188,89,201 ; vmulps %ymm1,%ymm8,%ymm1 268 DB 197,188,89,210 ; vmulps %ymm2,%ymm8,%ymm2 269 DB 72,173 ; lods %ds:(%rsi),%rax 270 DB 255,224 ; jmpq *%rax 271 272PUBLIC _sk_from_srgb_hsw 273_sk_from_srgb_hsw LABEL PROC 274 DB 72,173 ; lods %ds:(%rsi),%rax 275 DB 196,98,125,24,66,64 ; vbroadcastss 0x40(%rdx),%ymm8 276 DB 197,60,89,200 ; vmulps %ymm0,%ymm8,%ymm9 277 DB 197,124,89,208 ; vmulps %ymm0,%ymm0,%ymm10 278 DB 196,98,125,24,90,60 ; vbroadcastss 0x3c(%rdx),%ymm11 279 DB 196,98,125,24,98,56 ; vbroadcastss 0x38(%rdx),%ymm12 280 DB 196,65,124,40,235 ; vmovaps %ymm11,%ymm13 281 DB 196,66,125,168,236 ; vfmadd213ps %ymm12,%ymm0,%ymm13 282 DB 196,98,125,24,114,52 ; vbroadcastss 0x34(%rdx),%ymm14 283 DB 196,66,45,168,238 ; vfmadd213ps %ymm14,%ymm10,%ymm13 284 DB 196,98,125,24,82,68 ; vbroadcastss 0x44(%rdx),%ymm10 285 DB 196,193,124,194,194,1 ; vcmpltps %ymm10,%ymm0,%ymm0 286 DB 196,195,21,74,193,0 ; vblendvps %ymm0,%ymm9,%ymm13,%ymm0 287 DB 197,60,89,201 ; vmulps %ymm1,%ymm8,%ymm9 288 DB 197,116,89,233 ; vmulps %ymm1,%ymm1,%ymm13 289 DB 196,65,124,40,251 ; vmovaps %ymm11,%ymm15 290 DB 196,66,117,168,252 ; vfmadd213ps %ymm12,%ymm1,%ymm15 291 DB 196,66,21,168,254 ; vfmadd213ps %ymm14,%ymm13,%ymm15 292 DB 196,193,116,194,202,1 ; vcmpltps %ymm10,%ymm1,%ymm1 293 DB 196,195,5,74,201,16 ; vblendvps %ymm1,%ymm9,%ymm15,%ymm1 294 DB 197,60,89,194 ; vmulps %ymm2,%ymm8,%ymm8 295 DB 197,108,89,202 ; vmulps %ymm2,%ymm2,%ymm9 296 DB 196,66,109,168,220 ; vfmadd213ps %ymm12,%ymm2,%ymm11 297 DB 196,66,53,168,222 ; vfmadd213ps %ymm14,%ymm9,%ymm11 298 DB 196,193,108,194,210,1 ; vcmpltps %ymm10,%ymm2,%ymm2 299 DB 196,195,37,74,208,32 ; vblendvps %ymm2,%ymm8,%ymm11,%ymm2 300 DB 72,173 ; lods %ds:(%rsi),%rax 301 DB 255,224 ; jmpq *%rax 302 303PUBLIC _sk_to_srgb_hsw 304_sk_to_srgb_hsw LABEL PROC 305 DB 197,124,82,192 ; vrsqrtps %ymm0,%ymm8 306 DB 196,65,124,83,200 ; vrcpps %ymm8,%ymm9 307 DB 196,65,124,82,208 ; vrsqrtps %ymm8,%ymm10 308 DB 196,98,125,24,66,72 ; vbroadcastss 0x48(%rdx),%ymm8 309 DB 197,60,89,216 ; vmulps %ymm0,%ymm8,%ymm11 310 DB 196,98,125,24,34 ; vbroadcastss (%rdx),%ymm12 311 DB 196,98,125,24,106,76 ; vbroadcastss 0x4c(%rdx),%ymm13 312 DB 196,98,125,24,114,80 ; vbroadcastss 0x50(%rdx),%ymm14 313 DB 196,98,125,24,122,84 ; vbroadcastss 0x54(%rdx),%ymm15 314 DB 196,66,13,168,207 ; vfmadd213ps %ymm15,%ymm14,%ymm9 315 DB 196,66,21,184,202 ; vfmadd231ps %ymm10,%ymm13,%ymm9 316 DB 196,65,28,93,201 ; vminps %ymm9,%ymm12,%ymm9 317 DB 196,98,125,24,82,88 ; vbroadcastss 0x58(%rdx),%ymm10 318 DB 196,193,124,194,194,1 ; vcmpltps %ymm10,%ymm0,%ymm0 319 DB 196,195,53,74,195,0 ; vblendvps %ymm0,%ymm11,%ymm9,%ymm0 320 DB 197,124,82,201 ; vrsqrtps %ymm1,%ymm9 321 DB 196,65,124,83,217 ; vrcpps %ymm9,%ymm11 322 DB 196,65,124,82,201 ; vrsqrtps %ymm9,%ymm9 323 DB 196,66,13,168,223 ; vfmadd213ps %ymm15,%ymm14,%ymm11 324 DB 196,66,21,184,217 ; vfmadd231ps %ymm9,%ymm13,%ymm11 325 DB 197,60,89,201 ; vmulps %ymm1,%ymm8,%ymm9 326 DB 196,65,28,93,219 ; vminps %ymm11,%ymm12,%ymm11 327 DB 196,193,116,194,202,1 ; vcmpltps %ymm10,%ymm1,%ymm1 328 DB 196,195,37,74,201,16 ; vblendvps %ymm1,%ymm9,%ymm11,%ymm1 329 DB 197,124,82,202 ; vrsqrtps %ymm2,%ymm9 330 DB 196,65,124,83,217 ; vrcpps %ymm9,%ymm11 331 DB 196,66,13,168,223 ; vfmadd213ps %ymm15,%ymm14,%ymm11 332 DB 196,65,124,82,201 ; vrsqrtps %ymm9,%ymm9 333 DB 196,66,21,184,217 ; vfmadd231ps %ymm9,%ymm13,%ymm11 334 DB 196,65,28,93,203 ; vminps %ymm11,%ymm12,%ymm9 335 DB 197,60,89,194 ; vmulps %ymm2,%ymm8,%ymm8 336 DB 196,193,108,194,210,1 ; vcmpltps %ymm10,%ymm2,%ymm2 337 DB 196,195,53,74,208,32 ; vblendvps %ymm2,%ymm8,%ymm9,%ymm2 338 DB 72,173 ; lods %ds:(%rsi),%rax 339 DB 72,173 ; lods %ds:(%rsi),%rax 340 DB 255,224 ; jmpq *%rax 341 342PUBLIC _sk_scale_u8_hsw 343_sk_scale_u8_hsw LABEL PROC 344 DB 72,173 ; lods %ds:(%rsi),%rax 345 DB 72,139,0 ; mov (%rax),%rax 346 DB 196,98,125,49,4,56 ; vpmovzxbd (%rax,%rdi,1),%ymm8 347 DB 196,65,124,91,192 ; vcvtdq2ps %ymm8,%ymm8 348 DB 196,98,125,24,74,12 ; vbroadcastss 0xc(%rdx),%ymm9 349 DB 196,65,60,89,193 ; vmulps %ymm9,%ymm8,%ymm8 350 DB 197,188,89,192 ; vmulps %ymm0,%ymm8,%ymm0 351 DB 197,188,89,201 ; vmulps %ymm1,%ymm8,%ymm1 352 DB 197,188,89,210 ; vmulps %ymm2,%ymm8,%ymm2 353 DB 197,188,89,219 ; vmulps %ymm3,%ymm8,%ymm3 354 DB 72,173 ; lods %ds:(%rsi),%rax 355 DB 255,224 ; jmpq *%rax 356 357PUBLIC _sk_lerp_u8_hsw 358_sk_lerp_u8_hsw LABEL PROC 359 DB 72,173 ; lods %ds:(%rsi),%rax 360 DB 72,139,0 ; mov (%rax),%rax 361 DB 196,98,125,49,4,56 ; vpmovzxbd (%rax,%rdi,1),%ymm8 362 DB 196,65,124,91,192 ; vcvtdq2ps %ymm8,%ymm8 363 DB 196,98,125,24,74,12 ; vbroadcastss 0xc(%rdx),%ymm9 364 DB 196,65,60,89,193 ; vmulps %ymm9,%ymm8,%ymm8 365 DB 197,252,92,196 ; vsubps %ymm4,%ymm0,%ymm0 366 DB 196,226,61,168,196 ; vfmadd213ps %ymm4,%ymm8,%ymm0 367 DB 197,244,92,205 ; vsubps %ymm5,%ymm1,%ymm1 368 DB 196,226,61,168,205 ; vfmadd213ps %ymm5,%ymm8,%ymm1 369 DB 197,236,92,214 ; vsubps %ymm6,%ymm2,%ymm2 370 DB 196,226,61,168,214 ; vfmadd213ps %ymm6,%ymm8,%ymm2 371 DB 197,228,92,223 ; vsubps %ymm7,%ymm3,%ymm3 372 DB 196,226,61,168,223 ; vfmadd213ps %ymm7,%ymm8,%ymm3 373 DB 72,173 ; lods %ds:(%rsi),%rax 374 DB 255,224 ; jmpq *%rax 375 376PUBLIC _sk_load_tables_hsw 377_sk_load_tables_hsw LABEL PROC 378 DB 72,173 ; lods %ds:(%rsi),%rax 379 DB 72,139,8 ; mov (%rax),%rcx 380 DB 76,139,64,8 ; mov 0x8(%rax),%r8 381 DB 197,252,16,28,185 ; vmovups (%rcx,%rdi,4),%ymm3 382 DB 196,226,125,24,82,16 ; vbroadcastss 0x10(%rdx),%ymm2 383 DB 197,236,84,203 ; vandps %ymm3,%ymm2,%ymm1 384 DB 197,252,87,192 ; vxorps %ymm0,%ymm0,%ymm0 385 DB 197,124,194,192,0 ; vcmpeqps %ymm0,%ymm0,%ymm8 386 DB 196,65,124,40,200 ; vmovaps %ymm8,%ymm9 387 DB 196,194,53,146,4,136 ; vgatherdps %ymm9,(%r8,%ymm1,4),%ymm0 388 DB 72,139,72,16 ; mov 0x10(%rax),%rcx 389 DB 197,245,114,211,8 ; vpsrld $0x8,%ymm3,%ymm1 390 DB 197,108,84,201 ; vandps %ymm1,%ymm2,%ymm9 391 DB 196,65,124,40,208 ; vmovaps %ymm8,%ymm10 392 DB 196,162,45,146,12,137 ; vgatherdps %ymm10,(%rcx,%ymm9,4),%ymm1 393 DB 72,139,64,24 ; mov 0x18(%rax),%rax 394 DB 197,181,114,211,16 ; vpsrld $0x10,%ymm3,%ymm9 395 DB 196,65,108,84,201 ; vandps %ymm9,%ymm2,%ymm9 396 DB 196,162,61,146,20,136 ; vgatherdps %ymm8,(%rax,%ymm9,4),%ymm2 397 DB 197,229,114,211,24 ; vpsrld $0x18,%ymm3,%ymm3 398 DB 197,252,91,219 ; vcvtdq2ps %ymm3,%ymm3 399 DB 196,98,125,24,66,12 ; vbroadcastss 0xc(%rdx),%ymm8 400 DB 196,193,100,89,216 ; vmulps %ymm8,%ymm3,%ymm3 401 DB 72,173 ; lods %ds:(%rsi),%rax 402 DB 255,224 ; jmpq *%rax 403 404PUBLIC _sk_load_565_hsw 405_sk_load_565_hsw LABEL PROC 406 DB 72,173 ; lods %ds:(%rsi),%rax 407 DB 72,139,0 ; mov (%rax),%rax 408 DB 196,226,125,51,20,120 ; vpmovzxwd (%rax,%rdi,2),%ymm2 409 DB 196,226,125,88,66,104 ; vpbroadcastd 0x68(%rdx),%ymm0 410 DB 197,253,219,194 ; vpand %ymm2,%ymm0,%ymm0 411 DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0 412 DB 196,226,125,24,74,116 ; vbroadcastss 0x74(%rdx),%ymm1 413 DB 197,244,89,192 ; vmulps %ymm0,%ymm1,%ymm0 414 DB 196,226,125,88,74,108 ; vpbroadcastd 0x6c(%rdx),%ymm1 415 DB 197,245,219,202 ; vpand %ymm2,%ymm1,%ymm1 416 DB 197,252,91,201 ; vcvtdq2ps %ymm1,%ymm1 417 DB 196,226,125,24,90,120 ; vbroadcastss 0x78(%rdx),%ymm3 418 DB 197,228,89,201 ; vmulps %ymm1,%ymm3,%ymm1 419 DB 196,226,125,88,90,112 ; vpbroadcastd 0x70(%rdx),%ymm3 420 DB 197,229,219,210 ; vpand %ymm2,%ymm3,%ymm2 421 DB 197,252,91,210 ; vcvtdq2ps %ymm2,%ymm2 422 DB 196,226,125,24,90,124 ; vbroadcastss 0x7c(%rdx),%ymm3 423 DB 197,228,89,210 ; vmulps %ymm2,%ymm3,%ymm2 424 DB 196,226,125,24,26 ; vbroadcastss (%rdx),%ymm3 425 DB 72,173 ; lods %ds:(%rsi),%rax 426 DB 255,224 ; jmpq *%rax 427 428PUBLIC _sk_store_565_hsw 429_sk_store_565_hsw LABEL PROC 430 DB 72,173 ; lods %ds:(%rsi),%rax 431 DB 72,139,0 ; mov (%rax),%rax 432 DB 196,98,125,24,130,128,0,0,0 ; vbroadcastss 0x80(%rdx),%ymm8 433 DB 197,60,89,200 ; vmulps %ymm0,%ymm8,%ymm9 434 DB 196,65,125,91,201 ; vcvtps2dq %ymm9,%ymm9 435 DB 196,193,53,114,241,11 ; vpslld $0xb,%ymm9,%ymm9 436 DB 196,98,125,24,146,132,0,0,0 ; vbroadcastss 0x84(%rdx),%ymm10 437 DB 197,44,89,209 ; vmulps %ymm1,%ymm10,%ymm10 438 DB 196,65,125,91,210 ; vcvtps2dq %ymm10,%ymm10 439 DB 196,193,45,114,242,5 ; vpslld $0x5,%ymm10,%ymm10 440 DB 196,65,45,235,201 ; vpor %ymm9,%ymm10,%ymm9 441 DB 197,60,89,194 ; vmulps %ymm2,%ymm8,%ymm8 442 DB 196,65,125,91,192 ; vcvtps2dq %ymm8,%ymm8 443 DB 196,65,53,235,192 ; vpor %ymm8,%ymm9,%ymm8 444 DB 196,67,125,57,193,1 ; vextracti128 $0x1,%ymm8,%xmm9 445 DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8 446 DB 197,122,127,4,120 ; vmovdqu %xmm8,(%rax,%rdi,2) 447 DB 72,173 ; lods %ds:(%rsi),%rax 448 DB 255,224 ; jmpq *%rax 449 450PUBLIC _sk_load_8888_hsw 451_sk_load_8888_hsw LABEL PROC 452 DB 72,173 ; lods %ds:(%rsi),%rax 453 DB 72,139,0 ; mov (%rax),%rax 454 DB 197,252,16,28,184 ; vmovups (%rax,%rdi,4),%ymm3 455 DB 196,226,125,24,82,16 ; vbroadcastss 0x10(%rdx),%ymm2 456 DB 197,236,84,195 ; vandps %ymm3,%ymm2,%ymm0 457 DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0 458 DB 196,98,125,24,66,12 ; vbroadcastss 0xc(%rdx),%ymm8 459 DB 197,188,89,192 ; vmulps %ymm0,%ymm8,%ymm0 460 DB 197,245,114,211,8 ; vpsrld $0x8,%ymm3,%ymm1 461 DB 197,236,84,201 ; vandps %ymm1,%ymm2,%ymm1 462 DB 197,252,91,201 ; vcvtdq2ps %ymm1,%ymm1 463 DB 197,188,89,201 ; vmulps %ymm1,%ymm8,%ymm1 464 DB 197,181,114,211,16 ; vpsrld $0x10,%ymm3,%ymm9 465 DB 196,193,108,84,209 ; vandps %ymm9,%ymm2,%ymm2 466 DB 197,252,91,210 ; vcvtdq2ps %ymm2,%ymm2 467 DB 197,188,89,210 ; vmulps %ymm2,%ymm8,%ymm2 468 DB 197,229,114,211,24 ; vpsrld $0x18,%ymm3,%ymm3 469 DB 197,252,91,219 ; vcvtdq2ps %ymm3,%ymm3 470 DB 196,193,100,89,216 ; vmulps %ymm8,%ymm3,%ymm3 471 DB 72,173 ; lods %ds:(%rsi),%rax 472 DB 255,224 ; jmpq *%rax 473 474PUBLIC _sk_store_8888_hsw 475_sk_store_8888_hsw LABEL PROC 476 DB 72,173 ; lods %ds:(%rsi),%rax 477 DB 72,139,0 ; mov (%rax),%rax 478 DB 196,98,125,24,66,8 ; vbroadcastss 0x8(%rdx),%ymm8 479 DB 197,60,89,200 ; vmulps %ymm0,%ymm8,%ymm9 480 DB 196,65,125,91,201 ; vcvtps2dq %ymm9,%ymm9 481 DB 197,60,89,209 ; vmulps %ymm1,%ymm8,%ymm10 482 DB 196,65,125,91,210 ; vcvtps2dq %ymm10,%ymm10 483 DB 196,193,45,114,242,8 ; vpslld $0x8,%ymm10,%ymm10 484 DB 196,65,45,235,201 ; vpor %ymm9,%ymm10,%ymm9 485 DB 197,60,89,210 ; vmulps %ymm2,%ymm8,%ymm10 486 DB 196,65,125,91,210 ; vcvtps2dq %ymm10,%ymm10 487 DB 196,193,45,114,242,16 ; vpslld $0x10,%ymm10,%ymm10 488 DB 197,60,89,195 ; vmulps %ymm3,%ymm8,%ymm8 489 DB 196,65,125,91,192 ; vcvtps2dq %ymm8,%ymm8 490 DB 196,193,61,114,240,24 ; vpslld $0x18,%ymm8,%ymm8 491 DB 196,65,45,235,192 ; vpor %ymm8,%ymm10,%ymm8 492 DB 196,65,53,235,192 ; vpor %ymm8,%ymm9,%ymm8 493 DB 197,126,127,4,184 ; vmovdqu %ymm8,(%rax,%rdi,4) 494 DB 72,173 ; lods %ds:(%rsi),%rax 495 DB 255,224 ; jmpq *%rax 496 497PUBLIC _sk_load_f16_hsw 498_sk_load_f16_hsw LABEL PROC 499 DB 72,173 ; lods %ds:(%rsi),%rax 500 DB 72,139,0 ; mov (%rax),%rax 501 DB 197,250,111,4,248 ; vmovdqu (%rax,%rdi,8),%xmm0 502 DB 197,250,111,76,248,16 ; vmovdqu 0x10(%rax,%rdi,8),%xmm1 503 DB 197,250,111,84,248,32 ; vmovdqu 0x20(%rax,%rdi,8),%xmm2 504 DB 197,250,111,92,248,48 ; vmovdqu 0x30(%rax,%rdi,8),%xmm3 505 DB 197,121,97,193 ; vpunpcklwd %xmm1,%xmm0,%xmm8 506 DB 197,249,105,193 ; vpunpckhwd %xmm1,%xmm0,%xmm0 507 DB 197,233,97,203 ; vpunpcklwd %xmm3,%xmm2,%xmm1 508 DB 197,233,105,211 ; vpunpckhwd %xmm3,%xmm2,%xmm2 509 DB 197,57,97,200 ; vpunpcklwd %xmm0,%xmm8,%xmm9 510 DB 197,57,105,192 ; vpunpckhwd %xmm0,%xmm8,%xmm8 511 DB 197,241,97,218 ; vpunpcklwd %xmm2,%xmm1,%xmm3 512 DB 197,113,105,210 ; vpunpckhwd %xmm2,%xmm1,%xmm10 513 DB 197,177,108,195 ; vpunpcklqdq %xmm3,%xmm9,%xmm0 514 DB 196,226,125,19,192 ; vcvtph2ps %xmm0,%ymm0 515 DB 197,177,109,203 ; vpunpckhqdq %xmm3,%xmm9,%xmm1 516 DB 196,226,125,19,201 ; vcvtph2ps %xmm1,%ymm1 517 DB 196,193,57,108,210 ; vpunpcklqdq %xmm10,%xmm8,%xmm2 518 DB 196,226,125,19,210 ; vcvtph2ps %xmm2,%ymm2 519 DB 196,193,57,109,218 ; vpunpckhqdq %xmm10,%xmm8,%xmm3 520 DB 196,226,125,19,219 ; vcvtph2ps %xmm3,%ymm3 521 DB 72,173 ; lods %ds:(%rsi),%rax 522 DB 255,224 ; jmpq *%rax 523 524PUBLIC _sk_store_f16_hsw 525_sk_store_f16_hsw LABEL PROC 526 DB 72,173 ; lods %ds:(%rsi),%rax 527 DB 72,139,0 ; mov (%rax),%rax 528 DB 196,195,125,29,192,4 ; vcvtps2ph $0x4,%ymm0,%xmm8 529 DB 196,195,125,29,201,4 ; vcvtps2ph $0x4,%ymm1,%xmm9 530 DB 196,195,125,29,210,4 ; vcvtps2ph $0x4,%ymm2,%xmm10 531 DB 196,195,125,29,219,4 ; vcvtps2ph $0x4,%ymm3,%xmm11 532 DB 196,65,57,97,225 ; vpunpcklwd %xmm9,%xmm8,%xmm12 533 DB 196,65,57,105,193 ; vpunpckhwd %xmm9,%xmm8,%xmm8 534 DB 196,65,41,97,203 ; vpunpcklwd %xmm11,%xmm10,%xmm9 535 DB 196,65,41,105,211 ; vpunpckhwd %xmm11,%xmm10,%xmm10 536 DB 196,65,25,98,217 ; vpunpckldq %xmm9,%xmm12,%xmm11 537 DB 197,122,127,28,248 ; vmovdqu %xmm11,(%rax,%rdi,8) 538 DB 196,65,25,106,201 ; vpunpckhdq %xmm9,%xmm12,%xmm9 539 DB 197,122,127,76,248,16 ; vmovdqu %xmm9,0x10(%rax,%rdi,8) 540 DB 196,65,57,98,202 ; vpunpckldq %xmm10,%xmm8,%xmm9 541 DB 197,122,127,76,248,32 ; vmovdqu %xmm9,0x20(%rax,%rdi,8) 542 DB 196,65,57,106,194 ; vpunpckhdq %xmm10,%xmm8,%xmm8 543 DB 197,122,127,68,248,48 ; vmovdqu %xmm8,0x30(%rax,%rdi,8) 544 DB 72,173 ; lods %ds:(%rsi),%rax 545 DB 255,224 ; jmpq *%rax 546 547PUBLIC _sk_clamp_x_hsw 548_sk_clamp_x_hsw LABEL PROC 549 DB 72,173 ; lods %ds:(%rsi),%rax 550 DB 196,98,125,88,0 ; vpbroadcastd (%rax),%ymm8 551 DB 196,65,53,118,201 ; vpcmpeqd %ymm9,%ymm9,%ymm9 552 DB 196,65,61,254,193 ; vpaddd %ymm9,%ymm8,%ymm8 553 DB 196,193,124,93,192 ; vminps %ymm8,%ymm0,%ymm0 554 DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8 555 DB 197,188,95,192 ; vmaxps %ymm0,%ymm8,%ymm0 556 DB 72,173 ; lods %ds:(%rsi),%rax 557 DB 255,224 ; jmpq *%rax 558 559PUBLIC _sk_clamp_y_hsw 560_sk_clamp_y_hsw LABEL PROC 561 DB 72,173 ; lods %ds:(%rsi),%rax 562 DB 196,98,125,88,0 ; vpbroadcastd (%rax),%ymm8 563 DB 196,65,53,118,201 ; vpcmpeqd %ymm9,%ymm9,%ymm9 564 DB 196,65,61,254,193 ; vpaddd %ymm9,%ymm8,%ymm8 565 DB 196,193,116,93,200 ; vminps %ymm8,%ymm1,%ymm1 566 DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8 567 DB 197,188,95,201 ; vmaxps %ymm1,%ymm8,%ymm1 568 DB 72,173 ; lods %ds:(%rsi),%rax 569 DB 255,224 ; jmpq *%rax 570 571PUBLIC _sk_matrix_2x3_hsw 572_sk_matrix_2x3_hsw LABEL PROC 573 DB 72,173 ; lods %ds:(%rsi),%rax 574 DB 196,98,125,24,8 ; vbroadcastss (%rax),%ymm9 575 DB 196,98,125,24,80,8 ; vbroadcastss 0x8(%rax),%ymm10 576 DB 196,98,125,24,64,16 ; vbroadcastss 0x10(%rax),%ymm8 577 DB 196,66,117,184,194 ; vfmadd231ps %ymm10,%ymm1,%ymm8 578 DB 196,66,125,184,193 ; vfmadd231ps %ymm9,%ymm0,%ymm8 579 DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10 580 DB 196,98,125,24,88,12 ; vbroadcastss 0xc(%rax),%ymm11 581 DB 196,98,125,24,72,20 ; vbroadcastss 0x14(%rax),%ymm9 582 DB 196,66,117,184,203 ; vfmadd231ps %ymm11,%ymm1,%ymm9 583 DB 196,66,125,184,202 ; vfmadd231ps %ymm10,%ymm0,%ymm9 584 DB 72,173 ; lods %ds:(%rsi),%rax 585 DB 197,124,41,192 ; vmovaps %ymm8,%ymm0 586 DB 197,124,41,201 ; vmovaps %ymm9,%ymm1 587 DB 255,224 ; jmpq *%rax 588 589PUBLIC _sk_matrix_3x4_hsw 590_sk_matrix_3x4_hsw LABEL PROC 591 DB 72,173 ; lods %ds:(%rsi),%rax 592 DB 196,98,125,24,8 ; vbroadcastss (%rax),%ymm9 593 DB 196,98,125,24,80,12 ; vbroadcastss 0xc(%rax),%ymm10 594 DB 196,98,125,24,88,24 ; vbroadcastss 0x18(%rax),%ymm11 595 DB 196,98,125,24,64,36 ; vbroadcastss 0x24(%rax),%ymm8 596 DB 196,66,109,184,195 ; vfmadd231ps %ymm11,%ymm2,%ymm8 597 DB 196,66,117,184,194 ; vfmadd231ps %ymm10,%ymm1,%ymm8 598 DB 196,66,125,184,193 ; vfmadd231ps %ymm9,%ymm0,%ymm8 599 DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10 600 DB 196,98,125,24,88,16 ; vbroadcastss 0x10(%rax),%ymm11 601 DB 196,98,125,24,96,28 ; vbroadcastss 0x1c(%rax),%ymm12 602 DB 196,98,125,24,72,40 ; vbroadcastss 0x28(%rax),%ymm9 603 DB 196,66,109,184,204 ; vfmadd231ps %ymm12,%ymm2,%ymm9 604 DB 196,66,117,184,203 ; vfmadd231ps %ymm11,%ymm1,%ymm9 605 DB 196,66,125,184,202 ; vfmadd231ps %ymm10,%ymm0,%ymm9 606 DB 196,98,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm11 607 DB 196,98,125,24,96,20 ; vbroadcastss 0x14(%rax),%ymm12 608 DB 196,98,125,24,104,32 ; vbroadcastss 0x20(%rax),%ymm13 609 DB 196,98,125,24,80,44 ; vbroadcastss 0x2c(%rax),%ymm10 610 DB 196,66,109,184,213 ; vfmadd231ps %ymm13,%ymm2,%ymm10 611 DB 196,66,117,184,212 ; vfmadd231ps %ymm12,%ymm1,%ymm10 612 DB 196,66,125,184,211 ; vfmadd231ps %ymm11,%ymm0,%ymm10 613 DB 72,173 ; lods %ds:(%rsi),%rax 614 DB 197,124,41,192 ; vmovaps %ymm8,%ymm0 615 DB 197,124,41,201 ; vmovaps %ymm9,%ymm1 616 DB 197,124,41,210 ; vmovaps %ymm10,%ymm2 617 DB 255,224 ; jmpq *%rax 618 619PUBLIC _sk_linear_gradient_2stops_hsw 620_sk_linear_gradient_2stops_hsw LABEL PROC 621 DB 72,173 ; lods %ds:(%rsi),%rax 622 DB 196,226,125,24,72,16 ; vbroadcastss 0x10(%rax),%ymm1 623 DB 196,98,125,24,0 ; vbroadcastss (%rax),%ymm8 624 DB 196,98,125,184,193 ; vfmadd231ps %ymm1,%ymm0,%ymm8 625 DB 196,226,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm2 626 DB 196,226,125,24,72,4 ; vbroadcastss 0x4(%rax),%ymm1 627 DB 196,226,125,184,202 ; vfmadd231ps %ymm2,%ymm0,%ymm1 628 DB 196,226,125,24,88,24 ; vbroadcastss 0x18(%rax),%ymm3 629 DB 196,226,125,24,80,8 ; vbroadcastss 0x8(%rax),%ymm2 630 DB 196,226,125,184,211 ; vfmadd231ps %ymm3,%ymm0,%ymm2 631 DB 196,98,125,24,72,28 ; vbroadcastss 0x1c(%rax),%ymm9 632 DB 196,226,125,24,88,12 ; vbroadcastss 0xc(%rax),%ymm3 633 DB 196,194,125,184,217 ; vfmadd231ps %ymm9,%ymm0,%ymm3 634 DB 72,173 ; lods %ds:(%rsi),%rax 635 DB 197,124,41,192 ; vmovaps %ymm8,%ymm0 636 DB 255,224 ; jmpq *%rax 637 638PUBLIC _sk_start_pipeline_avx 639_sk_start_pipeline_avx LABEL PROC 640 DB 65,87 ; push %r15 641 DB 65,86 ; push %r14 642 DB 65,85 ; push %r13 643 DB 65,84 ; push %r12 644 DB 86 ; push %rsi 645 DB 87 ; push %rdi 646 DB 83 ; push %rbx 647 DB 72,129,236,160,0,0,0 ; sub $0xa0,%rsp 648 DB 197,120,41,188,36,144,0,0,0 ; vmovaps %xmm15,0x90(%rsp) 649 DB 197,120,41,180,36,128,0,0,0 ; vmovaps %xmm14,0x80(%rsp) 650 DB 197,120,41,108,36,112 ; vmovaps %xmm13,0x70(%rsp) 651 DB 197,120,41,100,36,96 ; vmovaps %xmm12,0x60(%rsp) 652 DB 197,120,41,92,36,80 ; vmovaps %xmm11,0x50(%rsp) 653 DB 197,120,41,84,36,64 ; vmovaps %xmm10,0x40(%rsp) 654 DB 197,120,41,76,36,48 ; vmovaps %xmm9,0x30(%rsp) 655 DB 197,120,41,68,36,32 ; vmovaps %xmm8,0x20(%rsp) 656 DB 197,248,41,124,36,16 ; vmovaps %xmm7,0x10(%rsp) 657 DB 197,248,41,52,36 ; vmovaps %xmm6,(%rsp) 658 DB 77,137,207 ; mov %r9,%r15 659 DB 77,137,198 ; mov %r8,%r14 660 DB 72,137,203 ; mov %rcx,%rbx 661 DB 72,137,214 ; mov %rdx,%rsi 662 DB 72,173 ; lods %ds:(%rsi),%rax 663 DB 73,137,196 ; mov %rax,%r12 664 DB 73,137,245 ; mov %rsi,%r13 665 DB 72,141,67,8 ; lea 0x8(%rbx),%rax 666 DB 76,57,248 ; cmp %r15,%rax 667 DB 118,5 ; jbe 75 <_sk_start_pipeline_avx+0x75> 668 DB 72,137,216 ; mov %rbx,%rax 669 DB 235,60 ; jmp b1 <_sk_start_pipeline_avx+0xb1> 670 DB 197,252,87,192 ; vxorps %ymm0,%ymm0,%ymm0 671 DB 197,244,87,201 ; vxorps %ymm1,%ymm1,%ymm1 672 DB 197,236,87,210 ; vxorps %ymm2,%ymm2,%ymm2 673 DB 197,228,87,219 ; vxorps %ymm3,%ymm3,%ymm3 674 DB 197,220,87,228 ; vxorps %ymm4,%ymm4,%ymm4 675 DB 197,212,87,237 ; vxorps %ymm5,%ymm5,%ymm5 676 DB 197,204,87,246 ; vxorps %ymm6,%ymm6,%ymm6 677 DB 197,196,87,255 ; vxorps %ymm7,%ymm7,%ymm7 678 DB 72,137,223 ; mov %rbx,%rdi 679 DB 76,137,238 ; mov %r13,%rsi 680 DB 76,137,242 ; mov %r14,%rdx 681 DB 65,255,212 ; callq *%r12 682 DB 72,141,67,8 ; lea 0x8(%rbx),%rax 683 DB 72,131,195,16 ; add $0x10,%rbx 684 DB 76,57,251 ; cmp %r15,%rbx 685 DB 72,137,195 ; mov %rax,%rbx 686 DB 118,196 ; jbe 75 <_sk_start_pipeline_avx+0x75> 687 DB 197,248,40,52,36 ; vmovaps (%rsp),%xmm6 688 DB 197,248,40,124,36,16 ; vmovaps 0x10(%rsp),%xmm7 689 DB 197,120,40,68,36,32 ; vmovaps 0x20(%rsp),%xmm8 690 DB 197,120,40,76,36,48 ; vmovaps 0x30(%rsp),%xmm9 691 DB 197,120,40,84,36,64 ; vmovaps 0x40(%rsp),%xmm10 692 DB 197,120,40,92,36,80 ; vmovaps 0x50(%rsp),%xmm11 693 DB 197,120,40,100,36,96 ; vmovaps 0x60(%rsp),%xmm12 694 DB 197,120,40,108,36,112 ; vmovaps 0x70(%rsp),%xmm13 695 DB 197,120,40,180,36,128,0,0,0 ; vmovaps 0x80(%rsp),%xmm14 696 DB 197,120,40,188,36,144,0,0,0 ; vmovaps 0x90(%rsp),%xmm15 697 DB 72,129,196,160,0,0,0 ; add $0xa0,%rsp 698 DB 91 ; pop %rbx 699 DB 95 ; pop %rdi 700 DB 94 ; pop %rsi 701 DB 65,92 ; pop %r12 702 DB 65,93 ; pop %r13 703 DB 65,94 ; pop %r14 704 DB 65,95 ; pop %r15 705 DB 197,248,119 ; vzeroupper 706 DB 195 ; retq 707 708PUBLIC _sk_just_return_avx 709_sk_just_return_avx LABEL PROC 710 DB 195 ; retq 711 712PUBLIC _sk_seed_shader_avx 713_sk_seed_shader_avx LABEL PROC 714 DB 72,173 ; lods %ds:(%rsi),%rax 715 DB 197,249,110,199 ; vmovd %edi,%xmm0 716 DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0 717 DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0 718 DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0 719 DB 196,226,125,24,74,4 ; vbroadcastss 0x4(%rdx),%ymm1 720 DB 197,252,88,193 ; vaddps %ymm1,%ymm0,%ymm0 721 DB 197,252,88,66,20 ; vaddps 0x14(%rdx),%ymm0,%ymm0 722 DB 197,249,110,16 ; vmovd (%rax),%xmm2 723 DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2 724 DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2 725 DB 197,252,91,210 ; vcvtdq2ps %ymm2,%ymm2 726 DB 197,236,88,201 ; vaddps %ymm1,%ymm2,%ymm1 727 DB 196,226,125,24,18 ; vbroadcastss (%rdx),%ymm2 728 DB 72,173 ; lods %ds:(%rsi),%rax 729 DB 197,228,87,219 ; vxorps %ymm3,%ymm3,%ymm3 730 DB 197,220,87,228 ; vxorps %ymm4,%ymm4,%ymm4 731 DB 197,212,87,237 ; vxorps %ymm5,%ymm5,%ymm5 732 DB 197,204,87,246 ; vxorps %ymm6,%ymm6,%ymm6 733 DB 197,196,87,255 ; vxorps %ymm7,%ymm7,%ymm7 734 DB 255,224 ; jmpq *%rax 735 736PUBLIC _sk_constant_color_avx 737_sk_constant_color_avx LABEL PROC 738 DB 72,173 ; lods %ds:(%rsi),%rax 739 DB 196,226,125,24,0 ; vbroadcastss (%rax),%ymm0 740 DB 196,226,125,24,72,4 ; vbroadcastss 0x4(%rax),%ymm1 741 DB 196,226,125,24,80,8 ; vbroadcastss 0x8(%rax),%ymm2 742 DB 196,226,125,24,88,12 ; vbroadcastss 0xc(%rax),%ymm3 743 DB 72,173 ; lods %ds:(%rsi),%rax 744 DB 255,224 ; jmpq *%rax 745 746PUBLIC _sk_clear_avx 747_sk_clear_avx LABEL PROC 748 DB 72,173 ; lods %ds:(%rsi),%rax 749 DB 72,173 ; lods %ds:(%rsi),%rax 750 DB 197,252,87,192 ; vxorps %ymm0,%ymm0,%ymm0 751 DB 197,244,87,201 ; vxorps %ymm1,%ymm1,%ymm1 752 DB 197,236,87,210 ; vxorps %ymm2,%ymm2,%ymm2 753 DB 197,228,87,219 ; vxorps %ymm3,%ymm3,%ymm3 754 DB 255,224 ; jmpq *%rax 755 756PUBLIC _sk_plus__avx 757_sk_plus__avx LABEL PROC 758 DB 72,173 ; lods %ds:(%rsi),%rax 759 DB 197,252,88,196 ; vaddps %ymm4,%ymm0,%ymm0 760 DB 197,244,88,205 ; vaddps %ymm5,%ymm1,%ymm1 761 DB 197,236,88,214 ; vaddps %ymm6,%ymm2,%ymm2 762 DB 197,228,88,223 ; vaddps %ymm7,%ymm3,%ymm3 763 DB 72,173 ; lods %ds:(%rsi),%rax 764 DB 255,224 ; jmpq *%rax 765 766PUBLIC _sk_srcover_avx 767_sk_srcover_avx LABEL PROC 768 DB 72,173 ; lods %ds:(%rsi),%rax 769 DB 196,98,125,24,2 ; vbroadcastss (%rdx),%ymm8 770 DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8 771 DB 197,60,89,204 ; vmulps %ymm4,%ymm8,%ymm9 772 DB 197,180,88,192 ; vaddps %ymm0,%ymm9,%ymm0 773 DB 197,60,89,205 ; vmulps %ymm5,%ymm8,%ymm9 774 DB 197,180,88,201 ; vaddps %ymm1,%ymm9,%ymm1 775 DB 197,60,89,206 ; vmulps %ymm6,%ymm8,%ymm9 776 DB 197,180,88,210 ; vaddps %ymm2,%ymm9,%ymm2 777 DB 197,60,89,199 ; vmulps %ymm7,%ymm8,%ymm8 778 DB 197,188,88,219 ; vaddps %ymm3,%ymm8,%ymm3 779 DB 72,173 ; lods %ds:(%rsi),%rax 780 DB 255,224 ; jmpq *%rax 781 782PUBLIC _sk_dstover_avx 783_sk_dstover_avx LABEL PROC 784 DB 72,173 ; lods %ds:(%rsi),%rax 785 DB 196,98,125,24,2 ; vbroadcastss (%rdx),%ymm8 786 DB 197,60,92,199 ; vsubps %ymm7,%ymm8,%ymm8 787 DB 197,188,89,192 ; vmulps %ymm0,%ymm8,%ymm0 788 DB 197,252,88,196 ; vaddps %ymm4,%ymm0,%ymm0 789 DB 197,188,89,201 ; vmulps %ymm1,%ymm8,%ymm1 790 DB 197,244,88,205 ; vaddps %ymm5,%ymm1,%ymm1 791 DB 197,188,89,210 ; vmulps %ymm2,%ymm8,%ymm2 792 DB 197,236,88,214 ; vaddps %ymm6,%ymm2,%ymm2 793 DB 197,188,89,219 ; vmulps %ymm3,%ymm8,%ymm3 794 DB 197,228,88,223 ; vaddps %ymm7,%ymm3,%ymm3 795 DB 72,173 ; lods %ds:(%rsi),%rax 796 DB 255,224 ; jmpq *%rax 797 798PUBLIC _sk_clamp_0_avx 799_sk_clamp_0_avx LABEL PROC 800 DB 72,173 ; lods %ds:(%rsi),%rax 801 DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8 802 DB 196,193,124,95,192 ; vmaxps %ymm8,%ymm0,%ymm0 803 DB 196,193,116,95,200 ; vmaxps %ymm8,%ymm1,%ymm1 804 DB 196,193,108,95,208 ; vmaxps %ymm8,%ymm2,%ymm2 805 DB 196,193,100,95,216 ; vmaxps %ymm8,%ymm3,%ymm3 806 DB 72,173 ; lods %ds:(%rsi),%rax 807 DB 255,224 ; jmpq *%rax 808 809PUBLIC _sk_clamp_1_avx 810_sk_clamp_1_avx LABEL PROC 811 DB 72,173 ; lods %ds:(%rsi),%rax 812 DB 196,98,125,24,2 ; vbroadcastss (%rdx),%ymm8 813 DB 196,193,124,93,192 ; vminps %ymm8,%ymm0,%ymm0 814 DB 196,193,116,93,200 ; vminps %ymm8,%ymm1,%ymm1 815 DB 196,193,108,93,208 ; vminps %ymm8,%ymm2,%ymm2 816 DB 196,193,100,93,216 ; vminps %ymm8,%ymm3,%ymm3 817 DB 72,173 ; lods %ds:(%rsi),%rax 818 DB 255,224 ; jmpq *%rax 819 820PUBLIC _sk_clamp_a_avx 821_sk_clamp_a_avx LABEL PROC 822 DB 72,173 ; lods %ds:(%rsi),%rax 823 DB 196,98,125,24,2 ; vbroadcastss (%rdx),%ymm8 824 DB 196,193,100,93,216 ; vminps %ymm8,%ymm3,%ymm3 825 DB 197,252,93,195 ; vminps %ymm3,%ymm0,%ymm0 826 DB 197,244,93,203 ; vminps %ymm3,%ymm1,%ymm1 827 DB 197,236,93,211 ; vminps %ymm3,%ymm2,%ymm2 828 DB 72,173 ; lods %ds:(%rsi),%rax 829 DB 255,224 ; jmpq *%rax 830 831PUBLIC _sk_set_rgb_avx 832_sk_set_rgb_avx LABEL PROC 833 DB 72,173 ; lods %ds:(%rsi),%rax 834 DB 196,226,125,24,0 ; vbroadcastss (%rax),%ymm0 835 DB 196,226,125,24,72,4 ; vbroadcastss 0x4(%rax),%ymm1 836 DB 196,226,125,24,80,8 ; vbroadcastss 0x8(%rax),%ymm2 837 DB 72,173 ; lods %ds:(%rsi),%rax 838 DB 255,224 ; jmpq *%rax 839 840PUBLIC _sk_swap_rb_avx 841_sk_swap_rb_avx LABEL PROC 842 DB 197,124,40,192 ; vmovaps %ymm0,%ymm8 843 DB 72,173 ; lods %ds:(%rsi),%rax 844 DB 72,173 ; lods %ds:(%rsi),%rax 845 DB 197,252,40,194 ; vmovaps %ymm2,%ymm0 846 DB 197,124,41,194 ; vmovaps %ymm8,%ymm2 847 DB 255,224 ; jmpq *%rax 848 849PUBLIC _sk_swap_avx 850_sk_swap_avx LABEL PROC 851 DB 197,124,40,195 ; vmovaps %ymm3,%ymm8 852 DB 197,124,40,202 ; vmovaps %ymm2,%ymm9 853 DB 197,124,40,209 ; vmovaps %ymm1,%ymm10 854 DB 197,124,40,216 ; vmovaps %ymm0,%ymm11 855 DB 72,173 ; lods %ds:(%rsi),%rax 856 DB 72,173 ; lods %ds:(%rsi),%rax 857 DB 197,252,40,196 ; vmovaps %ymm4,%ymm0 858 DB 197,252,40,205 ; vmovaps %ymm5,%ymm1 859 DB 197,252,40,214 ; vmovaps %ymm6,%ymm2 860 DB 197,252,40,223 ; vmovaps %ymm7,%ymm3 861 DB 197,124,41,220 ; vmovaps %ymm11,%ymm4 862 DB 197,124,41,213 ; vmovaps %ymm10,%ymm5 863 DB 197,124,41,206 ; vmovaps %ymm9,%ymm6 864 DB 197,124,41,199 ; vmovaps %ymm8,%ymm7 865 DB 255,224 ; jmpq *%rax 866 867PUBLIC _sk_move_src_dst_avx 868_sk_move_src_dst_avx LABEL PROC 869 DB 72,173 ; lods %ds:(%rsi),%rax 870 DB 72,173 ; lods %ds:(%rsi),%rax 871 DB 197,252,40,224 ; vmovaps %ymm0,%ymm4 872 DB 197,252,40,233 ; vmovaps %ymm1,%ymm5 873 DB 197,252,40,242 ; vmovaps %ymm2,%ymm6 874 DB 197,252,40,251 ; vmovaps %ymm3,%ymm7 875 DB 255,224 ; jmpq *%rax 876 877PUBLIC _sk_move_dst_src_avx 878_sk_move_dst_src_avx LABEL PROC 879 DB 72,173 ; lods %ds:(%rsi),%rax 880 DB 72,173 ; lods %ds:(%rsi),%rax 881 DB 197,252,40,196 ; vmovaps %ymm4,%ymm0 882 DB 197,252,40,205 ; vmovaps %ymm5,%ymm1 883 DB 197,252,40,214 ; vmovaps %ymm6,%ymm2 884 DB 197,252,40,223 ; vmovaps %ymm7,%ymm3 885 DB 255,224 ; jmpq *%rax 886 887PUBLIC _sk_premul_avx 888_sk_premul_avx LABEL PROC 889 DB 72,173 ; lods %ds:(%rsi),%rax 890 DB 197,252,89,195 ; vmulps %ymm3,%ymm0,%ymm0 891 DB 197,244,89,203 ; vmulps %ymm3,%ymm1,%ymm1 892 DB 197,236,89,211 ; vmulps %ymm3,%ymm2,%ymm2 893 DB 72,173 ; lods %ds:(%rsi),%rax 894 DB 255,224 ; jmpq *%rax 895 896PUBLIC _sk_unpremul_avx 897_sk_unpremul_avx LABEL PROC 898 DB 72,173 ; lods %ds:(%rsi),%rax 899 DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8 900 DB 196,65,100,194,200,0 ; vcmpeqps %ymm8,%ymm3,%ymm9 901 DB 196,98,125,24,18 ; vbroadcastss (%rdx),%ymm10 902 DB 197,44,94,211 ; vdivps %ymm3,%ymm10,%ymm10 903 DB 196,67,45,74,192,144 ; vblendvps %ymm9,%ymm8,%ymm10,%ymm8 904 DB 197,188,89,192 ; vmulps %ymm0,%ymm8,%ymm0 905 DB 197,188,89,201 ; vmulps %ymm1,%ymm8,%ymm1 906 DB 197,188,89,210 ; vmulps %ymm2,%ymm8,%ymm2 907 DB 72,173 ; lods %ds:(%rsi),%rax 908 DB 255,224 ; jmpq *%rax 909 910PUBLIC _sk_from_srgb_avx 911_sk_from_srgb_avx LABEL PROC 912 DB 72,173 ; lods %ds:(%rsi),%rax 913 DB 196,98,125,24,66,64 ; vbroadcastss 0x40(%rdx),%ymm8 914 DB 197,60,89,200 ; vmulps %ymm0,%ymm8,%ymm9 915 DB 197,124,89,208 ; vmulps %ymm0,%ymm0,%ymm10 916 DB 196,98,125,24,90,60 ; vbroadcastss 0x3c(%rdx),%ymm11 917 DB 196,98,125,24,98,56 ; vbroadcastss 0x38(%rdx),%ymm12 918 DB 197,36,89,232 ; vmulps %ymm0,%ymm11,%ymm13 919 DB 196,65,20,88,236 ; vaddps %ymm12,%ymm13,%ymm13 920 DB 196,98,125,24,114,52 ; vbroadcastss 0x34(%rdx),%ymm14 921 DB 196,65,44,89,213 ; vmulps %ymm13,%ymm10,%ymm10 922 DB 196,65,12,88,210 ; vaddps %ymm10,%ymm14,%ymm10 923 DB 196,98,125,24,106,68 ; vbroadcastss 0x44(%rdx),%ymm13 924 DB 196,193,124,194,197,1 ; vcmpltps %ymm13,%ymm0,%ymm0 925 DB 196,195,45,74,193,0 ; vblendvps %ymm0,%ymm9,%ymm10,%ymm0 926 DB 197,60,89,201 ; vmulps %ymm1,%ymm8,%ymm9 927 DB 197,116,89,209 ; vmulps %ymm1,%ymm1,%ymm10 928 DB 197,36,89,249 ; vmulps %ymm1,%ymm11,%ymm15 929 DB 196,65,4,88,252 ; vaddps %ymm12,%ymm15,%ymm15 930 DB 196,65,44,89,215 ; vmulps %ymm15,%ymm10,%ymm10 931 DB 196,65,12,88,210 ; vaddps %ymm10,%ymm14,%ymm10 932 DB 196,193,116,194,205,1 ; vcmpltps %ymm13,%ymm1,%ymm1 933 DB 196,195,45,74,201,16 ; vblendvps %ymm1,%ymm9,%ymm10,%ymm1 934 DB 197,60,89,194 ; vmulps %ymm2,%ymm8,%ymm8 935 DB 197,108,89,202 ; vmulps %ymm2,%ymm2,%ymm9 936 DB 197,36,89,210 ; vmulps %ymm2,%ymm11,%ymm10 937 DB 196,65,44,88,212 ; vaddps %ymm12,%ymm10,%ymm10 938 DB 196,65,52,89,202 ; vmulps %ymm10,%ymm9,%ymm9 939 DB 196,65,12,88,201 ; vaddps %ymm9,%ymm14,%ymm9 940 DB 196,193,108,194,213,1 ; vcmpltps %ymm13,%ymm2,%ymm2 941 DB 196,195,53,74,208,32 ; vblendvps %ymm2,%ymm8,%ymm9,%ymm2 942 DB 72,173 ; lods %ds:(%rsi),%rax 943 DB 255,224 ; jmpq *%rax 944 945PUBLIC _sk_to_srgb_avx 946_sk_to_srgb_avx LABEL PROC 947 DB 197,124,82,192 ; vrsqrtps %ymm0,%ymm8 948 DB 196,65,124,83,200 ; vrcpps %ymm8,%ymm9 949 DB 196,65,124,82,208 ; vrsqrtps %ymm8,%ymm10 950 DB 196,98,125,24,66,72 ; vbroadcastss 0x48(%rdx),%ymm8 951 DB 197,60,89,216 ; vmulps %ymm0,%ymm8,%ymm11 952 DB 196,98,125,24,34 ; vbroadcastss (%rdx),%ymm12 953 DB 196,98,125,24,106,76 ; vbroadcastss 0x4c(%rdx),%ymm13 954 DB 196,98,125,24,114,80 ; vbroadcastss 0x50(%rdx),%ymm14 955 DB 196,98,125,24,122,84 ; vbroadcastss 0x54(%rdx),%ymm15 956 DB 196,65,52,89,206 ; vmulps %ymm14,%ymm9,%ymm9 957 DB 196,65,52,88,207 ; vaddps %ymm15,%ymm9,%ymm9 958 DB 196,65,44,89,213 ; vmulps %ymm13,%ymm10,%ymm10 959 DB 196,65,44,88,201 ; vaddps %ymm9,%ymm10,%ymm9 960 DB 196,65,28,93,201 ; vminps %ymm9,%ymm12,%ymm9 961 DB 196,98,125,24,82,88 ; vbroadcastss 0x58(%rdx),%ymm10 962 DB 196,193,124,194,194,1 ; vcmpltps %ymm10,%ymm0,%ymm0 963 DB 196,195,53,74,195,0 ; vblendvps %ymm0,%ymm11,%ymm9,%ymm0 964 DB 197,124,82,201 ; vrsqrtps %ymm1,%ymm9 965 DB 196,65,124,83,217 ; vrcpps %ymm9,%ymm11 966 DB 196,65,124,82,201 ; vrsqrtps %ymm9,%ymm9 967 DB 196,65,12,89,219 ; vmulps %ymm11,%ymm14,%ymm11 968 DB 196,65,4,88,219 ; vaddps %ymm11,%ymm15,%ymm11 969 DB 196,65,20,89,201 ; vmulps %ymm9,%ymm13,%ymm9 970 DB 196,65,52,88,203 ; vaddps %ymm11,%ymm9,%ymm9 971 DB 197,60,89,217 ; vmulps %ymm1,%ymm8,%ymm11 972 DB 196,65,28,93,201 ; vminps %ymm9,%ymm12,%ymm9 973 DB 196,193,116,194,202,1 ; vcmpltps %ymm10,%ymm1,%ymm1 974 DB 196,195,53,74,203,16 ; vblendvps %ymm1,%ymm11,%ymm9,%ymm1 975 DB 197,124,82,202 ; vrsqrtps %ymm2,%ymm9 976 DB 196,65,124,83,217 ; vrcpps %ymm9,%ymm11 977 DB 196,65,12,89,219 ; vmulps %ymm11,%ymm14,%ymm11 978 DB 196,65,4,88,219 ; vaddps %ymm11,%ymm15,%ymm11 979 DB 196,65,124,82,201 ; vrsqrtps %ymm9,%ymm9 980 DB 196,65,20,89,201 ; vmulps %ymm9,%ymm13,%ymm9 981 DB 196,65,52,88,203 ; vaddps %ymm11,%ymm9,%ymm9 982 DB 196,65,28,93,201 ; vminps %ymm9,%ymm12,%ymm9 983 DB 197,60,89,194 ; vmulps %ymm2,%ymm8,%ymm8 984 DB 196,193,108,194,210,1 ; vcmpltps %ymm10,%ymm2,%ymm2 985 DB 196,195,53,74,208,32 ; vblendvps %ymm2,%ymm8,%ymm9,%ymm2 986 DB 72,173 ; lods %ds:(%rsi),%rax 987 DB 72,173 ; lods %ds:(%rsi),%rax 988 DB 255,224 ; jmpq *%rax 989 990PUBLIC _sk_scale_u8_avx 991_sk_scale_u8_avx LABEL PROC 992 DB 72,173 ; lods %ds:(%rsi),%rax 993 DB 72,139,0 ; mov (%rax),%rax 994 DB 196,98,121,49,68,56,4 ; vpmovzxbd 0x4(%rax,%rdi,1),%xmm8 995 DB 196,98,121,49,12,56 ; vpmovzxbd (%rax,%rdi,1),%xmm9 996 DB 196,67,53,24,192,1 ; vinsertf128 $0x1,%xmm8,%ymm9,%ymm8 997 DB 196,65,124,91,192 ; vcvtdq2ps %ymm8,%ymm8 998 DB 196,98,125,24,74,12 ; vbroadcastss 0xc(%rdx),%ymm9 999 DB 196,65,60,89,193 ; vmulps %ymm9,%ymm8,%ymm8 1000 DB 197,188,89,192 ; vmulps %ymm0,%ymm8,%ymm0 1001 DB 197,188,89,201 ; vmulps %ymm1,%ymm8,%ymm1 1002 DB 197,188,89,210 ; vmulps %ymm2,%ymm8,%ymm2 1003 DB 197,188,89,219 ; vmulps %ymm3,%ymm8,%ymm3 1004 DB 72,173 ; lods %ds:(%rsi),%rax 1005 DB 255,224 ; jmpq *%rax 1006 1007PUBLIC _sk_lerp_u8_avx 1008_sk_lerp_u8_avx LABEL PROC 1009 DB 72,173 ; lods %ds:(%rsi),%rax 1010 DB 72,139,0 ; mov (%rax),%rax 1011 DB 196,98,121,49,68,56,4 ; vpmovzxbd 0x4(%rax,%rdi,1),%xmm8 1012 DB 196,98,121,49,12,56 ; vpmovzxbd (%rax,%rdi,1),%xmm9 1013 DB 196,67,53,24,192,1 ; vinsertf128 $0x1,%xmm8,%ymm9,%ymm8 1014 DB 196,65,124,91,192 ; vcvtdq2ps %ymm8,%ymm8 1015 DB 196,98,125,24,74,12 ; vbroadcastss 0xc(%rdx),%ymm9 1016 DB 196,65,60,89,193 ; vmulps %ymm9,%ymm8,%ymm8 1017 DB 197,252,92,196 ; vsubps %ymm4,%ymm0,%ymm0 1018 DB 196,193,124,89,192 ; vmulps %ymm8,%ymm0,%ymm0 1019 DB 197,252,88,196 ; vaddps %ymm4,%ymm0,%ymm0 1020 DB 197,244,92,205 ; vsubps %ymm5,%ymm1,%ymm1 1021 DB 196,193,116,89,200 ; vmulps %ymm8,%ymm1,%ymm1 1022 DB 197,244,88,205 ; vaddps %ymm5,%ymm1,%ymm1 1023 DB 197,236,92,214 ; vsubps %ymm6,%ymm2,%ymm2 1024 DB 196,193,108,89,208 ; vmulps %ymm8,%ymm2,%ymm2 1025 DB 197,236,88,214 ; vaddps %ymm6,%ymm2,%ymm2 1026 DB 197,228,92,223 ; vsubps %ymm7,%ymm3,%ymm3 1027 DB 196,193,100,89,216 ; vmulps %ymm8,%ymm3,%ymm3 1028 DB 197,228,88,223 ; vaddps %ymm7,%ymm3,%ymm3 1029 DB 72,173 ; lods %ds:(%rsi),%rax 1030 DB 255,224 ; jmpq *%rax 1031 1032PUBLIC _sk_load_tables_avx 1033_sk_load_tables_avx LABEL PROC 1034 DB 65,87 ; push %r15 1035 DB 65,86 ; push %r14 1036 DB 65,84 ; push %r12 1037 DB 83 ; push %rbx 1038 DB 72,173 ; lods %ds:(%rsi),%rax 1039 DB 76,139,0 ; mov (%rax),%r8 1040 DB 72,139,72,8 ; mov 0x8(%rax),%rcx 1041 DB 196,65,124,16,20,184 ; vmovups (%r8,%rdi,4),%ymm10 1042 DB 197,249,110,66,16 ; vmovd 0x10(%rdx),%xmm0 1043 DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0 1044 DB 196,99,125,24,200,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm9 1045 DB 196,193,52,84,194 ; vandps %ymm10,%ymm9,%ymm0 1046 DB 196,193,249,126,192 ; vmovq %xmm0,%r8 1047 DB 69,137,193 ; mov %r8d,%r9d 1048 DB 196,195,249,22,194,1 ; vpextrq $0x1,%xmm0,%r10 1049 DB 69,137,211 ; mov %r10d,%r11d 1050 DB 73,193,234,32 ; shr $0x20,%r10 1051 DB 73,193,232,32 ; shr $0x20,%r8 1052 DB 196,227,125,25,192,1 ; vextractf128 $0x1,%ymm0,%xmm0 1053 DB 196,193,249,126,199 ; vmovq %xmm0,%r15 1054 DB 69,137,254 ; mov %r15d,%r14d 1055 DB 196,227,249,22,195,1 ; vpextrq $0x1,%xmm0,%rbx 1056 DB 65,137,220 ; mov %ebx,%r12d 1057 DB 72,193,235,32 ; shr $0x20,%rbx 1058 DB 73,193,239,32 ; shr $0x20,%r15 1059 DB 196,161,122,16,4,177 ; vmovss (%rcx,%r14,4),%xmm0 1060 DB 196,163,121,33,4,185,16 ; vinsertps $0x10,(%rcx,%r15,4),%xmm0,%xmm0 1061 DB 196,163,121,33,4,161,32 ; vinsertps $0x20,(%rcx,%r12,4),%xmm0,%xmm0 1062 DB 196,227,121,33,4,153,48 ; vinsertps $0x30,(%rcx,%rbx,4),%xmm0,%xmm0 1063 DB 196,161,122,16,12,137 ; vmovss (%rcx,%r9,4),%xmm1 1064 DB 196,163,113,33,12,129,16 ; vinsertps $0x10,(%rcx,%r8,4),%xmm1,%xmm1 1065 DB 196,163,113,33,12,153,32 ; vinsertps $0x20,(%rcx,%r11,4),%xmm1,%xmm1 1066 DB 196,163,113,33,12,145,48 ; vinsertps $0x30,(%rcx,%r10,4),%xmm1,%xmm1 1067 DB 196,227,117,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm1,%ymm0 1068 DB 76,139,120,16 ; mov 0x10(%rax),%r15 1069 DB 196,193,113,114,210,8 ; vpsrld $0x8,%xmm10,%xmm1 1070 DB 196,67,125,25,208,1 ; vextractf128 $0x1,%ymm10,%xmm8 1071 DB 196,193,105,114,208,8 ; vpsrld $0x8,%xmm8,%xmm2 1072 DB 196,227,117,24,202,1 ; vinsertf128 $0x1,%xmm2,%ymm1,%ymm1 1073 DB 197,180,84,201 ; vandps %ymm1,%ymm9,%ymm1 1074 DB 196,193,249,126,200 ; vmovq %xmm1,%r8 1075 DB 69,137,194 ; mov %r8d,%r10d 1076 DB 196,195,249,22,201,1 ; vpextrq $0x1,%xmm1,%r9 1077 DB 69,137,203 ; mov %r9d,%r11d 1078 DB 73,193,233,32 ; shr $0x20,%r9 1079 DB 73,193,232,32 ; shr $0x20,%r8 1080 DB 196,227,125,25,201,1 ; vextractf128 $0x1,%ymm1,%xmm1 1081 DB 196,225,249,126,203 ; vmovq %xmm1,%rbx 1082 DB 65,137,222 ; mov %ebx,%r14d 1083 DB 196,227,249,22,201,1 ; vpextrq $0x1,%xmm1,%rcx 1084 DB 65,137,204 ; mov %ecx,%r12d 1085 DB 72,193,233,32 ; shr $0x20,%rcx 1086 DB 72,193,235,32 ; shr $0x20,%rbx 1087 DB 196,129,122,16,12,183 ; vmovss (%r15,%r14,4),%xmm1 1088 DB 196,195,113,33,12,159,16 ; vinsertps $0x10,(%r15,%rbx,4),%xmm1,%xmm1 1089 DB 196,129,122,16,20,167 ; vmovss (%r15,%r12,4),%xmm2 1090 DB 196,227,113,33,202,32 ; vinsertps $0x20,%xmm2,%xmm1,%xmm1 1091 DB 196,193,122,16,20,143 ; vmovss (%r15,%rcx,4),%xmm2 1092 DB 196,227,113,33,202,48 ; vinsertps $0x30,%xmm2,%xmm1,%xmm1 1093 DB 196,129,122,16,20,151 ; vmovss (%r15,%r10,4),%xmm2 1094 DB 196,131,105,33,20,135,16 ; vinsertps $0x10,(%r15,%r8,4),%xmm2,%xmm2 1095 DB 196,129,122,16,28,159 ; vmovss (%r15,%r11,4),%xmm3 1096 DB 196,227,105,33,211,32 ; vinsertps $0x20,%xmm3,%xmm2,%xmm2 1097 DB 196,129,122,16,28,143 ; vmovss (%r15,%r9,4),%xmm3 1098 DB 196,227,105,33,211,48 ; vinsertps $0x30,%xmm3,%xmm2,%xmm2 1099 DB 196,227,109,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm2,%ymm1 1100 DB 72,139,64,24 ; mov 0x18(%rax),%rax 1101 DB 196,193,105,114,210,16 ; vpsrld $0x10,%xmm10,%xmm2 1102 DB 196,193,97,114,208,16 ; vpsrld $0x10,%xmm8,%xmm3 1103 DB 196,227,109,24,211,1 ; vinsertf128 $0x1,%xmm3,%ymm2,%ymm2 1104 DB 197,180,84,210 ; vandps %ymm2,%ymm9,%ymm2 1105 DB 196,193,249,126,208 ; vmovq %xmm2,%r8 1106 DB 69,137,193 ; mov %r8d,%r9d 1107 DB 196,195,249,22,214,1 ; vpextrq $0x1,%xmm2,%r14 1108 DB 69,137,242 ; mov %r14d,%r10d 1109 DB 73,193,238,32 ; shr $0x20,%r14 1110 DB 73,193,232,32 ; shr $0x20,%r8 1111 DB 196,227,125,25,210,1 ; vextractf128 $0x1,%ymm2,%xmm2 1112 DB 196,225,249,126,211 ; vmovq %xmm2,%rbx 1113 DB 65,137,219 ; mov %ebx,%r11d 1114 DB 196,227,249,22,209,1 ; vpextrq $0x1,%xmm2,%rcx 1115 DB 65,137,207 ; mov %ecx,%r15d 1116 DB 72,193,233,32 ; shr $0x20,%rcx 1117 DB 72,193,235,32 ; shr $0x20,%rbx 1118 DB 196,161,122,16,20,152 ; vmovss (%rax,%r11,4),%xmm2 1119 DB 196,227,105,33,20,152,16 ; vinsertps $0x10,(%rax,%rbx,4),%xmm2,%xmm2 1120 DB 196,161,122,16,28,184 ; vmovss (%rax,%r15,4),%xmm3 1121 DB 196,227,105,33,211,32 ; vinsertps $0x20,%xmm3,%xmm2,%xmm2 1122 DB 197,250,16,28,136 ; vmovss (%rax,%rcx,4),%xmm3 1123 DB 196,99,105,33,203,48 ; vinsertps $0x30,%xmm3,%xmm2,%xmm9 1124 DB 196,161,122,16,28,136 ; vmovss (%rax,%r9,4),%xmm3 1125 DB 196,163,97,33,28,128,16 ; vinsertps $0x10,(%rax,%r8,4),%xmm3,%xmm3 1126 DB 196,161,122,16,20,144 ; vmovss (%rax,%r10,4),%xmm2 1127 DB 196,227,97,33,210,32 ; vinsertps $0x20,%xmm2,%xmm3,%xmm2 1128 DB 196,161,122,16,28,176 ; vmovss (%rax,%r14,4),%xmm3 1129 DB 196,227,105,33,211,48 ; vinsertps $0x30,%xmm3,%xmm2,%xmm2 1130 DB 196,195,109,24,209,1 ; vinsertf128 $0x1,%xmm9,%ymm2,%ymm2 1131 DB 196,193,49,114,210,24 ; vpsrld $0x18,%xmm10,%xmm9 1132 DB 196,193,97,114,208,24 ; vpsrld $0x18,%xmm8,%xmm3 1133 DB 196,227,53,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm9,%ymm3 1134 DB 197,252,91,219 ; vcvtdq2ps %ymm3,%ymm3 1135 DB 196,98,125,24,66,12 ; vbroadcastss 0xc(%rdx),%ymm8 1136 DB 196,193,100,89,216 ; vmulps %ymm8,%ymm3,%ymm3 1137 DB 72,173 ; lods %ds:(%rsi),%rax 1138 DB 91 ; pop %rbx 1139 DB 65,92 ; pop %r12 1140 DB 65,94 ; pop %r14 1141 DB 65,95 ; pop %r15 1142 DB 255,224 ; jmpq *%rax 1143 1144PUBLIC _sk_load_565_avx 1145_sk_load_565_avx LABEL PROC 1146 DB 72,173 ; lods %ds:(%rsi),%rax 1147 DB 72,139,0 ; mov (%rax),%rax 1148 DB 196,226,121,51,68,120,8 ; vpmovzxwd 0x8(%rax,%rdi,2),%xmm0 1149 DB 196,226,121,51,12,120 ; vpmovzxwd (%rax,%rdi,2),%xmm1 1150 DB 196,227,117,24,208,1 ; vinsertf128 $0x1,%xmm0,%ymm1,%ymm2 1151 DB 197,249,110,66,104 ; vmovd 0x68(%rdx),%xmm0 1152 DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0 1153 DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0 1154 DB 197,252,84,194 ; vandps %ymm2,%ymm0,%ymm0 1155 DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0 1156 DB 196,226,125,24,74,116 ; vbroadcastss 0x74(%rdx),%ymm1 1157 DB 197,244,89,192 ; vmulps %ymm0,%ymm1,%ymm0 1158 DB 197,249,110,74,108 ; vmovd 0x6c(%rdx),%xmm1 1159 DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1 1160 DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1 1161 DB 197,244,84,202 ; vandps %ymm2,%ymm1,%ymm1 1162 DB 197,252,91,201 ; vcvtdq2ps %ymm1,%ymm1 1163 DB 196,226,125,24,90,120 ; vbroadcastss 0x78(%rdx),%ymm3 1164 DB 197,228,89,201 ; vmulps %ymm1,%ymm3,%ymm1 1165 DB 197,249,110,90,112 ; vmovd 0x70(%rdx),%xmm3 1166 DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3 1167 DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3 1168 DB 197,228,84,210 ; vandps %ymm2,%ymm3,%ymm2 1169 DB 197,252,91,210 ; vcvtdq2ps %ymm2,%ymm2 1170 DB 196,226,125,24,90,124 ; vbroadcastss 0x7c(%rdx),%ymm3 1171 DB 197,228,89,210 ; vmulps %ymm2,%ymm3,%ymm2 1172 DB 196,226,125,24,26 ; vbroadcastss (%rdx),%ymm3 1173 DB 72,173 ; lods %ds:(%rsi),%rax 1174 DB 255,224 ; jmpq *%rax 1175 1176PUBLIC _sk_store_565_avx 1177_sk_store_565_avx LABEL PROC 1178 DB 72,173 ; lods %ds:(%rsi),%rax 1179 DB 72,139,0 ; mov (%rax),%rax 1180 DB 196,98,125,24,130,128,0,0,0 ; vbroadcastss 0x80(%rdx),%ymm8 1181 DB 197,60,89,200 ; vmulps %ymm0,%ymm8,%ymm9 1182 DB 196,65,125,91,201 ; vcvtps2dq %ymm9,%ymm9 1183 DB 196,193,41,114,241,11 ; vpslld $0xb,%xmm9,%xmm10 1184 DB 196,67,125,25,201,1 ; vextractf128 $0x1,%ymm9,%xmm9 1185 DB 196,193,49,114,241,11 ; vpslld $0xb,%xmm9,%xmm9 1186 DB 196,67,45,24,201,1 ; vinsertf128 $0x1,%xmm9,%ymm10,%ymm9 1187 DB 196,98,125,24,146,132,0,0,0 ; vbroadcastss 0x84(%rdx),%ymm10 1188 DB 197,44,89,209 ; vmulps %ymm1,%ymm10,%ymm10 1189 DB 196,65,125,91,210 ; vcvtps2dq %ymm10,%ymm10 1190 DB 196,193,33,114,242,5 ; vpslld $0x5,%xmm10,%xmm11 1191 DB 196,67,125,25,210,1 ; vextractf128 $0x1,%ymm10,%xmm10 1192 DB 196,193,41,114,242,5 ; vpslld $0x5,%xmm10,%xmm10 1193 DB 196,67,37,24,210,1 ; vinsertf128 $0x1,%xmm10,%ymm11,%ymm10 1194 DB 196,65,45,86,201 ; vorpd %ymm9,%ymm10,%ymm9 1195 DB 197,60,89,194 ; vmulps %ymm2,%ymm8,%ymm8 1196 DB 196,65,125,91,192 ; vcvtps2dq %ymm8,%ymm8 1197 DB 196,65,53,86,192 ; vorpd %ymm8,%ymm9,%ymm8 1198 DB 196,67,125,25,193,1 ; vextractf128 $0x1,%ymm8,%xmm9 1199 DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8 1200 DB 197,122,127,4,120 ; vmovdqu %xmm8,(%rax,%rdi,2) 1201 DB 72,173 ; lods %ds:(%rsi),%rax 1202 DB 255,224 ; jmpq *%rax 1203 1204PUBLIC _sk_load_8888_avx 1205_sk_load_8888_avx LABEL PROC 1206 DB 72,173 ; lods %ds:(%rsi),%rax 1207 DB 72,139,0 ; mov (%rax),%rax 1208 DB 197,252,16,28,184 ; vmovups (%rax,%rdi,4),%ymm3 1209 DB 197,249,110,66,16 ; vmovd 0x10(%rdx),%xmm0 1210 DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0 1211 DB 196,99,125,24,216,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm11 1212 DB 197,164,84,195 ; vandps %ymm3,%ymm11,%ymm0 1213 DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0 1214 DB 196,98,125,24,66,12 ; vbroadcastss 0xc(%rdx),%ymm8 1215 DB 197,188,89,192 ; vmulps %ymm0,%ymm8,%ymm0 1216 DB 197,169,114,211,8 ; vpsrld $0x8,%xmm3,%xmm10 1217 DB 196,195,125,25,217,1 ; vextractf128 $0x1,%ymm3,%xmm9 1218 DB 196,193,113,114,209,8 ; vpsrld $0x8,%xmm9,%xmm1 1219 DB 196,227,45,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm10,%ymm1 1220 DB 197,164,84,201 ; vandps %ymm1,%ymm11,%ymm1 1221 DB 197,252,91,201 ; vcvtdq2ps %ymm1,%ymm1 1222 DB 197,188,89,201 ; vmulps %ymm1,%ymm8,%ymm1 1223 DB 197,169,114,211,16 ; vpsrld $0x10,%xmm3,%xmm10 1224 DB 196,193,105,114,209,16 ; vpsrld $0x10,%xmm9,%xmm2 1225 DB 196,227,45,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm10,%ymm2 1226 DB 197,164,84,210 ; vandps %ymm2,%ymm11,%ymm2 1227 DB 197,252,91,210 ; vcvtdq2ps %ymm2,%ymm2 1228 DB 197,188,89,210 ; vmulps %ymm2,%ymm8,%ymm2 1229 DB 197,169,114,211,24 ; vpsrld $0x18,%xmm3,%xmm10 1230 DB 196,193,97,114,209,24 ; vpsrld $0x18,%xmm9,%xmm3 1231 DB 196,227,45,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm10,%ymm3 1232 DB 197,252,91,219 ; vcvtdq2ps %ymm3,%ymm3 1233 DB 196,193,100,89,216 ; vmulps %ymm8,%ymm3,%ymm3 1234 DB 72,173 ; lods %ds:(%rsi),%rax 1235 DB 255,224 ; jmpq *%rax 1236 1237PUBLIC _sk_store_8888_avx 1238_sk_store_8888_avx LABEL PROC 1239 DB 72,173 ; lods %ds:(%rsi),%rax 1240 DB 72,139,0 ; mov (%rax),%rax 1241 DB 196,98,125,24,66,8 ; vbroadcastss 0x8(%rdx),%ymm8 1242 DB 197,60,89,200 ; vmulps %ymm0,%ymm8,%ymm9 1243 DB 196,65,125,91,201 ; vcvtps2dq %ymm9,%ymm9 1244 DB 197,60,89,209 ; vmulps %ymm1,%ymm8,%ymm10 1245 DB 196,65,125,91,210 ; vcvtps2dq %ymm10,%ymm10 1246 DB 196,193,33,114,242,8 ; vpslld $0x8,%xmm10,%xmm11 1247 DB 196,67,125,25,210,1 ; vextractf128 $0x1,%ymm10,%xmm10 1248 DB 196,193,41,114,242,8 ; vpslld $0x8,%xmm10,%xmm10 1249 DB 196,67,37,24,210,1 ; vinsertf128 $0x1,%xmm10,%ymm11,%ymm10 1250 DB 196,65,45,86,201 ; vorpd %ymm9,%ymm10,%ymm9 1251 DB 197,60,89,210 ; vmulps %ymm2,%ymm8,%ymm10 1252 DB 196,65,125,91,210 ; vcvtps2dq %ymm10,%ymm10 1253 DB 196,193,33,114,242,16 ; vpslld $0x10,%xmm10,%xmm11 1254 DB 196,67,125,25,210,1 ; vextractf128 $0x1,%ymm10,%xmm10 1255 DB 196,193,41,114,242,16 ; vpslld $0x10,%xmm10,%xmm10 1256 DB 196,67,37,24,210,1 ; vinsertf128 $0x1,%xmm10,%ymm11,%ymm10 1257 DB 196,65,53,86,202 ; vorpd %ymm10,%ymm9,%ymm9 1258 DB 197,60,89,195 ; vmulps %ymm3,%ymm8,%ymm8 1259 DB 196,65,125,91,192 ; vcvtps2dq %ymm8,%ymm8 1260 DB 196,193,41,114,240,24 ; vpslld $0x18,%xmm8,%xmm10 1261 DB 196,67,125,25,192,1 ; vextractf128 $0x1,%ymm8,%xmm8 1262 DB 196,193,57,114,240,24 ; vpslld $0x18,%xmm8,%xmm8 1263 DB 196,67,45,24,192,1 ; vinsertf128 $0x1,%xmm8,%ymm10,%ymm8 1264 DB 196,65,53,86,192 ; vorpd %ymm8,%ymm9,%ymm8 1265 DB 197,125,17,4,184 ; vmovupd %ymm8,(%rax,%rdi,4) 1266 DB 72,173 ; lods %ds:(%rsi),%rax 1267 DB 255,224 ; jmpq *%rax 1268 1269PUBLIC _sk_load_f16_avx 1270_sk_load_f16_avx LABEL PROC 1271 DB 72,173 ; lods %ds:(%rsi),%rax 1272 DB 72,139,0 ; mov (%rax),%rax 1273 DB 197,250,111,4,248 ; vmovdqu (%rax,%rdi,8),%xmm0 1274 DB 197,250,111,76,248,16 ; vmovdqu 0x10(%rax,%rdi,8),%xmm1 1275 DB 197,250,111,84,248,32 ; vmovdqu 0x20(%rax,%rdi,8),%xmm2 1276 DB 197,250,111,92,248,48 ; vmovdqu 0x30(%rax,%rdi,8),%xmm3 1277 DB 197,121,97,193 ; vpunpcklwd %xmm1,%xmm0,%xmm8 1278 DB 197,249,105,193 ; vpunpckhwd %xmm1,%xmm0,%xmm0 1279 DB 197,233,97,203 ; vpunpcklwd %xmm3,%xmm2,%xmm1 1280 DB 197,233,105,211 ; vpunpckhwd %xmm3,%xmm2,%xmm2 1281 DB 197,185,97,216 ; vpunpcklwd %xmm0,%xmm8,%xmm3 1282 DB 197,185,105,192 ; vpunpckhwd %xmm0,%xmm8,%xmm0 1283 DB 197,113,97,194 ; vpunpcklwd %xmm2,%xmm1,%xmm8 1284 DB 197,113,105,202 ; vpunpckhwd %xmm2,%xmm1,%xmm9 1285 DB 197,249,110,82,100 ; vmovd 0x64(%rdx),%xmm2 1286 DB 197,249,112,210,0 ; vpshufd $0x0,%xmm2,%xmm2 1287 DB 197,233,101,203 ; vpcmpgtw %xmm3,%xmm2,%xmm1 1288 DB 197,241,223,203 ; vpandn %xmm3,%xmm1,%xmm1 1289 DB 197,233,101,216 ; vpcmpgtw %xmm0,%xmm2,%xmm3 1290 DB 197,225,223,192 ; vpandn %xmm0,%xmm3,%xmm0 1291 DB 196,193,105,101,216 ; vpcmpgtw %xmm8,%xmm2,%xmm3 1292 DB 196,193,97,223,216 ; vpandn %xmm8,%xmm3,%xmm3 1293 DB 196,193,105,101,209 ; vpcmpgtw %xmm9,%xmm2,%xmm2 1294 DB 196,193,105,223,209 ; vpandn %xmm9,%xmm2,%xmm2 1295 DB 196,98,121,51,193 ; vpmovzxwd %xmm1,%xmm8 1296 DB 196,98,121,51,203 ; vpmovzxwd %xmm3,%xmm9 1297 DB 196,65,41,239,210 ; vpxor %xmm10,%xmm10,%xmm10 1298 DB 196,193,113,105,202 ; vpunpckhwd %xmm10,%xmm1,%xmm1 1299 DB 196,193,97,105,218 ; vpunpckhwd %xmm10,%xmm3,%xmm3 1300 DB 196,98,121,51,216 ; vpmovzxwd %xmm0,%xmm11 1301 DB 196,98,121,51,226 ; vpmovzxwd %xmm2,%xmm12 1302 DB 196,65,121,105,234 ; vpunpckhwd %xmm10,%xmm0,%xmm13 1303 DB 196,65,105,105,210 ; vpunpckhwd %xmm10,%xmm2,%xmm10 1304 DB 196,193,121,114,240,13 ; vpslld $0xd,%xmm8,%xmm0 1305 DB 196,193,105,114,241,13 ; vpslld $0xd,%xmm9,%xmm2 1306 DB 196,227,125,24,194,1 ; vinsertf128 $0x1,%xmm2,%ymm0,%ymm0 1307 DB 197,249,110,82,92 ; vmovd 0x5c(%rdx),%xmm2 1308 DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2 1309 DB 196,99,109,24,194,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm8 1310 DB 197,188,89,192 ; vmulps %ymm0,%ymm8,%ymm0 1311 DB 197,241,114,241,13 ; vpslld $0xd,%xmm1,%xmm1 1312 DB 197,233,114,243,13 ; vpslld $0xd,%xmm3,%xmm2 1313 DB 196,227,117,24,202,1 ; vinsertf128 $0x1,%xmm2,%ymm1,%ymm1 1314 DB 197,188,89,201 ; vmulps %ymm1,%ymm8,%ymm1 1315 DB 196,193,105,114,243,13 ; vpslld $0xd,%xmm11,%xmm2 1316 DB 196,193,97,114,244,13 ; vpslld $0xd,%xmm12,%xmm3 1317 DB 196,227,109,24,211,1 ; vinsertf128 $0x1,%xmm3,%ymm2,%ymm2 1318 DB 197,188,89,210 ; vmulps %ymm2,%ymm8,%ymm2 1319 DB 196,193,49,114,245,13 ; vpslld $0xd,%xmm13,%xmm9 1320 DB 196,193,97,114,242,13 ; vpslld $0xd,%xmm10,%xmm3 1321 DB 196,227,53,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm9,%ymm3 1322 DB 197,188,89,219 ; vmulps %ymm3,%ymm8,%ymm3 1323 DB 72,173 ; lods %ds:(%rsi),%rax 1324 DB 255,224 ; jmpq *%rax 1325 1326PUBLIC _sk_store_f16_avx 1327_sk_store_f16_avx LABEL PROC 1328 DB 72,173 ; lods %ds:(%rsi),%rax 1329 DB 72,139,0 ; mov (%rax),%rax 1330 DB 197,121,110,66,96 ; vmovd 0x60(%rdx),%xmm8 1331 DB 196,67,121,4,192,0 ; vpermilps $0x0,%xmm8,%xmm8 1332 DB 196,67,61,24,192,1 ; vinsertf128 $0x1,%xmm8,%ymm8,%ymm8 1333 DB 197,60,89,200 ; vmulps %ymm0,%ymm8,%ymm9 1334 DB 196,67,125,25,202,1 ; vextractf128 $0x1,%ymm9,%xmm10 1335 DB 196,193,41,114,210,13 ; vpsrld $0xd,%xmm10,%xmm10 1336 DB 196,193,49,114,209,13 ; vpsrld $0xd,%xmm9,%xmm9 1337 DB 197,60,89,217 ; vmulps %ymm1,%ymm8,%ymm11 1338 DB 196,67,125,25,220,1 ; vextractf128 $0x1,%ymm11,%xmm12 1339 DB 196,193,25,114,212,13 ; vpsrld $0xd,%xmm12,%xmm12 1340 DB 196,193,33,114,211,13 ; vpsrld $0xd,%xmm11,%xmm11 1341 DB 197,60,89,234 ; vmulps %ymm2,%ymm8,%ymm13 1342 DB 196,67,125,25,238,1 ; vextractf128 $0x1,%ymm13,%xmm14 1343 DB 196,193,9,114,214,13 ; vpsrld $0xd,%xmm14,%xmm14 1344 DB 196,193,17,114,213,13 ; vpsrld $0xd,%xmm13,%xmm13 1345 DB 197,60,89,195 ; vmulps %ymm3,%ymm8,%ymm8 1346 DB 196,67,125,25,199,1 ; vextractf128 $0x1,%ymm8,%xmm15 1347 DB 196,193,1,114,215,13 ; vpsrld $0xd,%xmm15,%xmm15 1348 DB 196,193,57,114,208,13 ; vpsrld $0xd,%xmm8,%xmm8 1349 DB 196,193,33,115,251,2 ; vpslldq $0x2,%xmm11,%xmm11 1350 DB 196,65,33,235,201 ; vpor %xmm9,%xmm11,%xmm9 1351 DB 196,193,33,115,252,2 ; vpslldq $0x2,%xmm12,%xmm11 1352 DB 196,65,33,235,210 ; vpor %xmm10,%xmm11,%xmm10 1353 DB 196,193,57,115,248,2 ; vpslldq $0x2,%xmm8,%xmm8 1354 DB 196,65,57,235,197 ; vpor %xmm13,%xmm8,%xmm8 1355 DB 196,193,33,115,255,2 ; vpslldq $0x2,%xmm15,%xmm11 1356 DB 196,65,33,235,222 ; vpor %xmm14,%xmm11,%xmm11 1357 DB 196,65,49,98,224 ; vpunpckldq %xmm8,%xmm9,%xmm12 1358 DB 197,122,127,36,248 ; vmovdqu %xmm12,(%rax,%rdi,8) 1359 DB 196,65,49,106,192 ; vpunpckhdq %xmm8,%xmm9,%xmm8 1360 DB 197,122,127,68,248,16 ; vmovdqu %xmm8,0x10(%rax,%rdi,8) 1361 DB 196,65,41,98,195 ; vpunpckldq %xmm11,%xmm10,%xmm8 1362 DB 197,122,127,68,248,32 ; vmovdqu %xmm8,0x20(%rax,%rdi,8) 1363 DB 196,65,41,106,195 ; vpunpckhdq %xmm11,%xmm10,%xmm8 1364 DB 197,122,127,68,248,48 ; vmovdqu %xmm8,0x30(%rax,%rdi,8) 1365 DB 72,173 ; lods %ds:(%rsi),%rax 1366 DB 255,224 ; jmpq *%rax 1367 1368PUBLIC _sk_clamp_x_avx 1369_sk_clamp_x_avx LABEL PROC 1370 DB 72,173 ; lods %ds:(%rsi),%rax 1371 DB 196,98,125,24,0 ; vbroadcastss (%rax),%ymm8 1372 DB 196,67,125,25,193,1 ; vextractf128 $0x1,%ymm8,%xmm9 1373 DB 196,65,41,118,210 ; vpcmpeqd %xmm10,%xmm10,%xmm10 1374 DB 196,65,49,254,202 ; vpaddd %xmm10,%xmm9,%xmm9 1375 DB 196,65,57,254,194 ; vpaddd %xmm10,%xmm8,%xmm8 1376 DB 196,67,61,24,193,1 ; vinsertf128 $0x1,%xmm9,%ymm8,%ymm8 1377 DB 196,193,124,93,192 ; vminps %ymm8,%ymm0,%ymm0 1378 DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8 1379 DB 197,188,95,192 ; vmaxps %ymm0,%ymm8,%ymm0 1380 DB 72,173 ; lods %ds:(%rsi),%rax 1381 DB 255,224 ; jmpq *%rax 1382 1383PUBLIC _sk_clamp_y_avx 1384_sk_clamp_y_avx LABEL PROC 1385 DB 72,173 ; lods %ds:(%rsi),%rax 1386 DB 196,98,125,24,0 ; vbroadcastss (%rax),%ymm8 1387 DB 196,67,125,25,193,1 ; vextractf128 $0x1,%ymm8,%xmm9 1388 DB 196,65,41,118,210 ; vpcmpeqd %xmm10,%xmm10,%xmm10 1389 DB 196,65,49,254,202 ; vpaddd %xmm10,%xmm9,%xmm9 1390 DB 196,65,57,254,194 ; vpaddd %xmm10,%xmm8,%xmm8 1391 DB 196,67,61,24,193,1 ; vinsertf128 $0x1,%xmm9,%ymm8,%ymm8 1392 DB 196,193,116,93,200 ; vminps %ymm8,%ymm1,%ymm1 1393 DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8 1394 DB 197,188,95,201 ; vmaxps %ymm1,%ymm8,%ymm1 1395 DB 72,173 ; lods %ds:(%rsi),%rax 1396 DB 255,224 ; jmpq *%rax 1397 1398PUBLIC _sk_matrix_2x3_avx 1399_sk_matrix_2x3_avx LABEL PROC 1400 DB 72,173 ; lods %ds:(%rsi),%rax 1401 DB 196,98,125,24,0 ; vbroadcastss (%rax),%ymm8 1402 DB 196,98,125,24,72,8 ; vbroadcastss 0x8(%rax),%ymm9 1403 DB 196,98,125,24,80,16 ; vbroadcastss 0x10(%rax),%ymm10 1404 DB 197,52,89,201 ; vmulps %ymm1,%ymm9,%ymm9 1405 DB 196,65,52,88,202 ; vaddps %ymm10,%ymm9,%ymm9 1406 DB 197,60,89,192 ; vmulps %ymm0,%ymm8,%ymm8 1407 DB 196,65,60,88,193 ; vaddps %ymm9,%ymm8,%ymm8 1408 DB 196,98,125,24,72,4 ; vbroadcastss 0x4(%rax),%ymm9 1409 DB 196,98,125,24,80,12 ; vbroadcastss 0xc(%rax),%ymm10 1410 DB 196,98,125,24,88,20 ; vbroadcastss 0x14(%rax),%ymm11 1411 DB 197,172,89,201 ; vmulps %ymm1,%ymm10,%ymm1 1412 DB 196,193,116,88,203 ; vaddps %ymm11,%ymm1,%ymm1 1413 DB 197,180,89,192 ; vmulps %ymm0,%ymm9,%ymm0 1414 DB 197,252,88,201 ; vaddps %ymm1,%ymm0,%ymm1 1415 DB 72,173 ; lods %ds:(%rsi),%rax 1416 DB 197,124,41,192 ; vmovaps %ymm8,%ymm0 1417 DB 255,224 ; jmpq *%rax 1418 1419PUBLIC _sk_matrix_3x4_avx 1420_sk_matrix_3x4_avx LABEL PROC 1421 DB 72,173 ; lods %ds:(%rsi),%rax 1422 DB 196,98,125,24,0 ; vbroadcastss (%rax),%ymm8 1423 DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9 1424 DB 196,98,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm10 1425 DB 196,98,125,24,88,36 ; vbroadcastss 0x24(%rax),%ymm11 1426 DB 197,44,89,210 ; vmulps %ymm2,%ymm10,%ymm10 1427 DB 196,65,44,88,211 ; vaddps %ymm11,%ymm10,%ymm10 1428 DB 197,52,89,201 ; vmulps %ymm1,%ymm9,%ymm9 1429 DB 196,65,52,88,202 ; vaddps %ymm10,%ymm9,%ymm9 1430 DB 197,60,89,192 ; vmulps %ymm0,%ymm8,%ymm8 1431 DB 196,65,60,88,193 ; vaddps %ymm9,%ymm8,%ymm8 1432 DB 196,98,125,24,72,4 ; vbroadcastss 0x4(%rax),%ymm9 1433 DB 196,98,125,24,80,16 ; vbroadcastss 0x10(%rax),%ymm10 1434 DB 196,98,125,24,88,28 ; vbroadcastss 0x1c(%rax),%ymm11 1435 DB 196,98,125,24,96,40 ; vbroadcastss 0x28(%rax),%ymm12 1436 DB 197,36,89,218 ; vmulps %ymm2,%ymm11,%ymm11 1437 DB 196,65,36,88,220 ; vaddps %ymm12,%ymm11,%ymm11 1438 DB 197,44,89,209 ; vmulps %ymm1,%ymm10,%ymm10 1439 DB 196,65,44,88,211 ; vaddps %ymm11,%ymm10,%ymm10 1440 DB 197,52,89,200 ; vmulps %ymm0,%ymm9,%ymm9 1441 DB 196,65,52,88,202 ; vaddps %ymm10,%ymm9,%ymm9 1442 DB 196,98,125,24,80,8 ; vbroadcastss 0x8(%rax),%ymm10 1443 DB 196,98,125,24,88,20 ; vbroadcastss 0x14(%rax),%ymm11 1444 DB 196,98,125,24,96,32 ; vbroadcastss 0x20(%rax),%ymm12 1445 DB 196,98,125,24,104,44 ; vbroadcastss 0x2c(%rax),%ymm13 1446 DB 197,156,89,210 ; vmulps %ymm2,%ymm12,%ymm2 1447 DB 196,193,108,88,213 ; vaddps %ymm13,%ymm2,%ymm2 1448 DB 197,164,89,201 ; vmulps %ymm1,%ymm11,%ymm1 1449 DB 197,244,88,202 ; vaddps %ymm2,%ymm1,%ymm1 1450 DB 197,172,89,192 ; vmulps %ymm0,%ymm10,%ymm0 1451 DB 197,252,88,209 ; vaddps %ymm1,%ymm0,%ymm2 1452 DB 72,173 ; lods %ds:(%rsi),%rax 1453 DB 197,124,41,192 ; vmovaps %ymm8,%ymm0 1454 DB 197,124,41,201 ; vmovaps %ymm9,%ymm1 1455 DB 255,224 ; jmpq *%rax 1456 1457PUBLIC _sk_linear_gradient_2stops_avx 1458_sk_linear_gradient_2stops_avx LABEL PROC 1459 DB 72,173 ; lods %ds:(%rsi),%rax 1460 DB 196,226,125,24,72,16 ; vbroadcastss 0x10(%rax),%ymm1 1461 DB 196,226,125,24,16 ; vbroadcastss (%rax),%ymm2 1462 DB 197,244,89,200 ; vmulps %ymm0,%ymm1,%ymm1 1463 DB 197,108,88,193 ; vaddps %ymm1,%ymm2,%ymm8 1464 DB 196,226,125,24,72,20 ; vbroadcastss 0x14(%rax),%ymm1 1465 DB 196,226,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm2 1466 DB 197,244,89,200 ; vmulps %ymm0,%ymm1,%ymm1 1467 DB 197,236,88,201 ; vaddps %ymm1,%ymm2,%ymm1 1468 DB 196,226,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm2 1469 DB 196,226,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm3 1470 DB 197,236,89,208 ; vmulps %ymm0,%ymm2,%ymm2 1471 DB 197,228,88,210 ; vaddps %ymm2,%ymm3,%ymm2 1472 DB 196,226,125,24,88,28 ; vbroadcastss 0x1c(%rax),%ymm3 1473 DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9 1474 DB 197,228,89,192 ; vmulps %ymm0,%ymm3,%ymm0 1475 DB 197,180,88,216 ; vaddps %ymm0,%ymm9,%ymm3 1476 DB 72,173 ; lods %ds:(%rsi),%rax 1477 DB 197,124,41,192 ; vmovaps %ymm8,%ymm0 1478 DB 255,224 ; jmpq *%rax 1479 1480PUBLIC _sk_start_pipeline_sse41 1481_sk_start_pipeline_sse41 LABEL PROC 1482 DB 65,87 ; push %r15 1483 DB 65,86 ; push %r14 1484 DB 65,85 ; push %r13 1485 DB 65,84 ; push %r12 1486 DB 86 ; push %rsi 1487 DB 87 ; push %rdi 1488 DB 83 ; push %rbx 1489 DB 72,129,236,160,0,0,0 ; sub $0xa0,%rsp 1490 DB 68,15,41,188,36,144,0,0,0 ; movaps %xmm15,0x90(%rsp) 1491 DB 68,15,41,180,36,128,0,0,0 ; movaps %xmm14,0x80(%rsp) 1492 DB 68,15,41,108,36,112 ; movaps %xmm13,0x70(%rsp) 1493 DB 68,15,41,100,36,96 ; movaps %xmm12,0x60(%rsp) 1494 DB 68,15,41,92,36,80 ; movaps %xmm11,0x50(%rsp) 1495 DB 68,15,41,84,36,64 ; movaps %xmm10,0x40(%rsp) 1496 DB 68,15,41,76,36,48 ; movaps %xmm9,0x30(%rsp) 1497 DB 68,15,41,68,36,32 ; movaps %xmm8,0x20(%rsp) 1498 DB 15,41,124,36,16 ; movaps %xmm7,0x10(%rsp) 1499 DB 15,41,52,36 ; movaps %xmm6,(%rsp) 1500 DB 77,137,207 ; mov %r9,%r15 1501 DB 77,137,198 ; mov %r8,%r14 1502 DB 72,137,203 ; mov %rcx,%rbx 1503 DB 72,137,214 ; mov %rdx,%rsi 1504 DB 72,173 ; lods %ds:(%rsi),%rax 1505 DB 73,137,196 ; mov %rax,%r12 1506 DB 73,137,245 ; mov %rsi,%r13 1507 DB 72,141,67,4 ; lea 0x4(%rbx),%rax 1508 DB 76,57,248 ; cmp %r15,%rax 1509 DB 118,5 ; jbe 73 <_sk_start_pipeline_sse41+0x73> 1510 DB 72,137,216 ; mov %rbx,%rax 1511 DB 235,52 ; jmp a7 <_sk_start_pipeline_sse41+0xa7> 1512 DB 15,87,192 ; xorps %xmm0,%xmm0 1513 DB 15,87,201 ; xorps %xmm1,%xmm1 1514 DB 15,87,210 ; xorps %xmm2,%xmm2 1515 DB 15,87,219 ; xorps %xmm3,%xmm3 1516 DB 15,87,228 ; xorps %xmm4,%xmm4 1517 DB 15,87,237 ; xorps %xmm5,%xmm5 1518 DB 15,87,246 ; xorps %xmm6,%xmm6 1519 DB 15,87,255 ; xorps %xmm7,%xmm7 1520 DB 72,137,223 ; mov %rbx,%rdi 1521 DB 76,137,238 ; mov %r13,%rsi 1522 DB 76,137,242 ; mov %r14,%rdx 1523 DB 65,255,212 ; callq *%r12 1524 DB 72,141,67,4 ; lea 0x4(%rbx),%rax 1525 DB 72,131,195,8 ; add $0x8,%rbx 1526 DB 76,57,251 ; cmp %r15,%rbx 1527 DB 72,137,195 ; mov %rax,%rbx 1528 DB 118,204 ; jbe 73 <_sk_start_pipeline_sse41+0x73> 1529 DB 15,40,52,36 ; movaps (%rsp),%xmm6 1530 DB 15,40,124,36,16 ; movaps 0x10(%rsp),%xmm7 1531 DB 68,15,40,68,36,32 ; movaps 0x20(%rsp),%xmm8 1532 DB 68,15,40,76,36,48 ; movaps 0x30(%rsp),%xmm9 1533 DB 68,15,40,84,36,64 ; movaps 0x40(%rsp),%xmm10 1534 DB 68,15,40,92,36,80 ; movaps 0x50(%rsp),%xmm11 1535 DB 68,15,40,100,36,96 ; movaps 0x60(%rsp),%xmm12 1536 DB 68,15,40,108,36,112 ; movaps 0x70(%rsp),%xmm13 1537 DB 68,15,40,180,36,128,0,0,0 ; movaps 0x80(%rsp),%xmm14 1538 DB 68,15,40,188,36,144,0,0,0 ; movaps 0x90(%rsp),%xmm15 1539 DB 72,129,196,160,0,0,0 ; add $0xa0,%rsp 1540 DB 91 ; pop %rbx 1541 DB 95 ; pop %rdi 1542 DB 94 ; pop %rsi 1543 DB 65,92 ; pop %r12 1544 DB 65,93 ; pop %r13 1545 DB 65,94 ; pop %r14 1546 DB 65,95 ; pop %r15 1547 DB 195 ; retq 1548 1549PUBLIC _sk_just_return_sse41 1550_sk_just_return_sse41 LABEL PROC 1551 DB 195 ; retq 1552 1553PUBLIC _sk_seed_shader_sse41 1554_sk_seed_shader_sse41 LABEL PROC 1555 DB 72,173 ; lods %ds:(%rsi),%rax 1556 DB 102,15,110,199 ; movd %edi,%xmm0 1557 DB 102,15,112,192,0 ; pshufd $0x0,%xmm0,%xmm0 1558 DB 15,91,200 ; cvtdq2ps %xmm0,%xmm1 1559 DB 243,15,16,18 ; movss (%rdx),%xmm2 1560 DB 243,15,16,90,4 ; movss 0x4(%rdx),%xmm3 1561 DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3 1562 DB 15,88,203 ; addps %xmm3,%xmm1 1563 DB 15,16,66,20 ; movups 0x14(%rdx),%xmm0 1564 DB 15,88,193 ; addps %xmm1,%xmm0 1565 DB 102,15,110,8 ; movd (%rax),%xmm1 1566 DB 102,15,112,201,0 ; pshufd $0x0,%xmm1,%xmm1 1567 DB 15,91,201 ; cvtdq2ps %xmm1,%xmm1 1568 DB 15,88,203 ; addps %xmm3,%xmm1 1569 DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2 1570 DB 72,173 ; lods %ds:(%rsi),%rax 1571 DB 15,87,219 ; xorps %xmm3,%xmm3 1572 DB 15,87,228 ; xorps %xmm4,%xmm4 1573 DB 15,87,237 ; xorps %xmm5,%xmm5 1574 DB 15,87,246 ; xorps %xmm6,%xmm6 1575 DB 15,87,255 ; xorps %xmm7,%xmm7 1576 DB 255,224 ; jmpq *%rax 1577 1578PUBLIC _sk_constant_color_sse41 1579_sk_constant_color_sse41 LABEL PROC 1580 DB 72,173 ; lods %ds:(%rsi),%rax 1581 DB 15,16,24 ; movups (%rax),%xmm3 1582 DB 15,40,195 ; movaps %xmm3,%xmm0 1583 DB 15,198,192,0 ; shufps $0x0,%xmm0,%xmm0 1584 DB 15,40,203 ; movaps %xmm3,%xmm1 1585 DB 15,198,201,85 ; shufps $0x55,%xmm1,%xmm1 1586 DB 15,40,211 ; movaps %xmm3,%xmm2 1587 DB 15,198,210,170 ; shufps $0xaa,%xmm2,%xmm2 1588 DB 15,198,219,255 ; shufps $0xff,%xmm3,%xmm3 1589 DB 72,173 ; lods %ds:(%rsi),%rax 1590 DB 255,224 ; jmpq *%rax 1591 1592PUBLIC _sk_clear_sse41 1593_sk_clear_sse41 LABEL PROC 1594 DB 72,173 ; lods %ds:(%rsi),%rax 1595 DB 72,173 ; lods %ds:(%rsi),%rax 1596 DB 15,87,192 ; xorps %xmm0,%xmm0 1597 DB 15,87,201 ; xorps %xmm1,%xmm1 1598 DB 15,87,210 ; xorps %xmm2,%xmm2 1599 DB 15,87,219 ; xorps %xmm3,%xmm3 1600 DB 255,224 ; jmpq *%rax 1601 1602PUBLIC _sk_plus__sse41 1603_sk_plus__sse41 LABEL PROC 1604 DB 72,173 ; lods %ds:(%rsi),%rax 1605 DB 15,88,196 ; addps %xmm4,%xmm0 1606 DB 15,88,205 ; addps %xmm5,%xmm1 1607 DB 15,88,214 ; addps %xmm6,%xmm2 1608 DB 15,88,223 ; addps %xmm7,%xmm3 1609 DB 72,173 ; lods %ds:(%rsi),%rax 1610 DB 255,224 ; jmpq *%rax 1611 1612PUBLIC _sk_srcover_sse41 1613_sk_srcover_sse41 LABEL PROC 1614 DB 72,173 ; lods %ds:(%rsi),%rax 1615 DB 243,68,15,16,2 ; movss (%rdx),%xmm8 1616 DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 1617 DB 68,15,92,195 ; subps %xmm3,%xmm8 1618 DB 69,15,40,200 ; movaps %xmm8,%xmm9 1619 DB 68,15,89,204 ; mulps %xmm4,%xmm9 1620 DB 65,15,88,193 ; addps %xmm9,%xmm0 1621 DB 69,15,40,200 ; movaps %xmm8,%xmm9 1622 DB 68,15,89,205 ; mulps %xmm5,%xmm9 1623 DB 65,15,88,201 ; addps %xmm9,%xmm1 1624 DB 69,15,40,200 ; movaps %xmm8,%xmm9 1625 DB 68,15,89,206 ; mulps %xmm6,%xmm9 1626 DB 65,15,88,209 ; addps %xmm9,%xmm2 1627 DB 68,15,89,199 ; mulps %xmm7,%xmm8 1628 DB 65,15,88,216 ; addps %xmm8,%xmm3 1629 DB 72,173 ; lods %ds:(%rsi),%rax 1630 DB 255,224 ; jmpq *%rax 1631 1632PUBLIC _sk_dstover_sse41 1633_sk_dstover_sse41 LABEL PROC 1634 DB 72,173 ; lods %ds:(%rsi),%rax 1635 DB 243,68,15,16,2 ; movss (%rdx),%xmm8 1636 DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 1637 DB 68,15,92,199 ; subps %xmm7,%xmm8 1638 DB 65,15,89,192 ; mulps %xmm8,%xmm0 1639 DB 15,88,196 ; addps %xmm4,%xmm0 1640 DB 65,15,89,200 ; mulps %xmm8,%xmm1 1641 DB 15,88,205 ; addps %xmm5,%xmm1 1642 DB 65,15,89,208 ; mulps %xmm8,%xmm2 1643 DB 15,88,214 ; addps %xmm6,%xmm2 1644 DB 65,15,89,216 ; mulps %xmm8,%xmm3 1645 DB 15,88,223 ; addps %xmm7,%xmm3 1646 DB 72,173 ; lods %ds:(%rsi),%rax 1647 DB 255,224 ; jmpq *%rax 1648 1649PUBLIC _sk_clamp_0_sse41 1650_sk_clamp_0_sse41 LABEL PROC 1651 DB 72,173 ; lods %ds:(%rsi),%rax 1652 DB 69,15,87,192 ; xorps %xmm8,%xmm8 1653 DB 65,15,95,192 ; maxps %xmm8,%xmm0 1654 DB 65,15,95,200 ; maxps %xmm8,%xmm1 1655 DB 65,15,95,208 ; maxps %xmm8,%xmm2 1656 DB 65,15,95,216 ; maxps %xmm8,%xmm3 1657 DB 72,173 ; lods %ds:(%rsi),%rax 1658 DB 255,224 ; jmpq *%rax 1659 1660PUBLIC _sk_clamp_1_sse41 1661_sk_clamp_1_sse41 LABEL PROC 1662 DB 72,173 ; lods %ds:(%rsi),%rax 1663 DB 243,68,15,16,2 ; movss (%rdx),%xmm8 1664 DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 1665 DB 65,15,93,192 ; minps %xmm8,%xmm0 1666 DB 65,15,93,200 ; minps %xmm8,%xmm1 1667 DB 65,15,93,208 ; minps %xmm8,%xmm2 1668 DB 65,15,93,216 ; minps %xmm8,%xmm3 1669 DB 72,173 ; lods %ds:(%rsi),%rax 1670 DB 255,224 ; jmpq *%rax 1671 1672PUBLIC _sk_clamp_a_sse41 1673_sk_clamp_a_sse41 LABEL PROC 1674 DB 72,173 ; lods %ds:(%rsi),%rax 1675 DB 243,68,15,16,2 ; movss (%rdx),%xmm8 1676 DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 1677 DB 65,15,93,216 ; minps %xmm8,%xmm3 1678 DB 15,93,195 ; minps %xmm3,%xmm0 1679 DB 15,93,203 ; minps %xmm3,%xmm1 1680 DB 15,93,211 ; minps %xmm3,%xmm2 1681 DB 72,173 ; lods %ds:(%rsi),%rax 1682 DB 255,224 ; jmpq *%rax 1683 1684PUBLIC _sk_set_rgb_sse41 1685_sk_set_rgb_sse41 LABEL PROC 1686 DB 72,173 ; lods %ds:(%rsi),%rax 1687 DB 243,15,16,0 ; movss (%rax),%xmm0 1688 DB 243,15,16,72,4 ; movss 0x4(%rax),%xmm1 1689 DB 15,198,192,0 ; shufps $0x0,%xmm0,%xmm0 1690 DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1 1691 DB 243,15,16,80,8 ; movss 0x8(%rax),%xmm2 1692 DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2 1693 DB 72,173 ; lods %ds:(%rsi),%rax 1694 DB 255,224 ; jmpq *%rax 1695 1696PUBLIC _sk_swap_rb_sse41 1697_sk_swap_rb_sse41 LABEL PROC 1698 DB 68,15,40,192 ; movaps %xmm0,%xmm8 1699 DB 72,173 ; lods %ds:(%rsi),%rax 1700 DB 72,173 ; lods %ds:(%rsi),%rax 1701 DB 15,40,194 ; movaps %xmm2,%xmm0 1702 DB 65,15,40,208 ; movaps %xmm8,%xmm2 1703 DB 255,224 ; jmpq *%rax 1704 1705PUBLIC _sk_swap_sse41 1706_sk_swap_sse41 LABEL PROC 1707 DB 68,15,40,195 ; movaps %xmm3,%xmm8 1708 DB 68,15,40,202 ; movaps %xmm2,%xmm9 1709 DB 68,15,40,209 ; movaps %xmm1,%xmm10 1710 DB 68,15,40,216 ; movaps %xmm0,%xmm11 1711 DB 72,173 ; lods %ds:(%rsi),%rax 1712 DB 72,173 ; lods %ds:(%rsi),%rax 1713 DB 15,40,196 ; movaps %xmm4,%xmm0 1714 DB 15,40,205 ; movaps %xmm5,%xmm1 1715 DB 15,40,214 ; movaps %xmm6,%xmm2 1716 DB 15,40,223 ; movaps %xmm7,%xmm3 1717 DB 65,15,40,227 ; movaps %xmm11,%xmm4 1718 DB 65,15,40,234 ; movaps %xmm10,%xmm5 1719 DB 65,15,40,241 ; movaps %xmm9,%xmm6 1720 DB 65,15,40,248 ; movaps %xmm8,%xmm7 1721 DB 255,224 ; jmpq *%rax 1722 1723PUBLIC _sk_move_src_dst_sse41 1724_sk_move_src_dst_sse41 LABEL PROC 1725 DB 72,173 ; lods %ds:(%rsi),%rax 1726 DB 72,173 ; lods %ds:(%rsi),%rax 1727 DB 15,40,224 ; movaps %xmm0,%xmm4 1728 DB 15,40,233 ; movaps %xmm1,%xmm5 1729 DB 15,40,242 ; movaps %xmm2,%xmm6 1730 DB 15,40,251 ; movaps %xmm3,%xmm7 1731 DB 255,224 ; jmpq *%rax 1732 1733PUBLIC _sk_move_dst_src_sse41 1734_sk_move_dst_src_sse41 LABEL PROC 1735 DB 72,173 ; lods %ds:(%rsi),%rax 1736 DB 72,173 ; lods %ds:(%rsi),%rax 1737 DB 15,40,196 ; movaps %xmm4,%xmm0 1738 DB 15,40,205 ; movaps %xmm5,%xmm1 1739 DB 15,40,214 ; movaps %xmm6,%xmm2 1740 DB 15,40,223 ; movaps %xmm7,%xmm3 1741 DB 255,224 ; jmpq *%rax 1742 1743PUBLIC _sk_premul_sse41 1744_sk_premul_sse41 LABEL PROC 1745 DB 72,173 ; lods %ds:(%rsi),%rax 1746 DB 15,89,195 ; mulps %xmm3,%xmm0 1747 DB 15,89,203 ; mulps %xmm3,%xmm1 1748 DB 15,89,211 ; mulps %xmm3,%xmm2 1749 DB 72,173 ; lods %ds:(%rsi),%rax 1750 DB 255,224 ; jmpq *%rax 1751 1752PUBLIC _sk_unpremul_sse41 1753_sk_unpremul_sse41 LABEL PROC 1754 DB 68,15,40,192 ; movaps %xmm0,%xmm8 1755 DB 72,173 ; lods %ds:(%rsi),%rax 1756 DB 69,15,87,201 ; xorps %xmm9,%xmm9 1757 DB 243,68,15,16,18 ; movss (%rdx),%xmm10 1758 DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 1759 DB 68,15,94,211 ; divps %xmm3,%xmm10 1760 DB 15,40,195 ; movaps %xmm3,%xmm0 1761 DB 65,15,194,193,0 ; cmpeqps %xmm9,%xmm0 1762 DB 102,69,15,56,20,209 ; blendvps %xmm0,%xmm9,%xmm10 1763 DB 69,15,89,194 ; mulps %xmm10,%xmm8 1764 DB 65,15,89,202 ; mulps %xmm10,%xmm1 1765 DB 65,15,89,210 ; mulps %xmm10,%xmm2 1766 DB 72,173 ; lods %ds:(%rsi),%rax 1767 DB 65,15,40,192 ; movaps %xmm8,%xmm0 1768 DB 255,224 ; jmpq *%rax 1769 1770PUBLIC _sk_from_srgb_sse41 1771_sk_from_srgb_sse41 LABEL PROC 1772 DB 72,173 ; lods %ds:(%rsi),%rax 1773 DB 243,68,15,16,90,64 ; movss 0x40(%rdx),%xmm11 1774 DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 1775 DB 69,15,40,211 ; movaps %xmm11,%xmm10 1776 DB 68,15,89,208 ; mulps %xmm0,%xmm10 1777 DB 68,15,40,240 ; movaps %xmm0,%xmm14 1778 DB 69,15,89,246 ; mulps %xmm14,%xmm14 1779 DB 243,68,15,16,66,60 ; movss 0x3c(%rdx),%xmm8 1780 DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 1781 DB 243,68,15,16,98,52 ; movss 0x34(%rdx),%xmm12 1782 DB 243,68,15,16,106,56 ; movss 0x38(%rdx),%xmm13 1783 DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13 1784 DB 69,15,40,200 ; movaps %xmm8,%xmm9 1785 DB 68,15,89,200 ; mulps %xmm0,%xmm9 1786 DB 69,15,88,205 ; addps %xmm13,%xmm9 1787 DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12 1788 DB 69,15,89,206 ; mulps %xmm14,%xmm9 1789 DB 69,15,88,204 ; addps %xmm12,%xmm9 1790 DB 243,68,15,16,114,68 ; movss 0x44(%rdx),%xmm14 1791 DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14 1792 DB 65,15,194,198,1 ; cmpltps %xmm14,%xmm0 1793 DB 102,69,15,56,20,202 ; blendvps %xmm0,%xmm10,%xmm9 1794 DB 69,15,40,251 ; movaps %xmm11,%xmm15 1795 DB 68,15,89,249 ; mulps %xmm1,%xmm15 1796 DB 15,40,193 ; movaps %xmm1,%xmm0 1797 DB 15,89,192 ; mulps %xmm0,%xmm0 1798 DB 69,15,40,208 ; movaps %xmm8,%xmm10 1799 DB 68,15,89,209 ; mulps %xmm1,%xmm10 1800 DB 69,15,88,213 ; addps %xmm13,%xmm10 1801 DB 68,15,89,208 ; mulps %xmm0,%xmm10 1802 DB 69,15,88,212 ; addps %xmm12,%xmm10 1803 DB 65,15,194,206,1 ; cmpltps %xmm14,%xmm1 1804 DB 15,40,193 ; movaps %xmm1,%xmm0 1805 DB 102,69,15,56,20,215 ; blendvps %xmm0,%xmm15,%xmm10 1806 DB 68,15,89,218 ; mulps %xmm2,%xmm11 1807 DB 15,40,194 ; movaps %xmm2,%xmm0 1808 DB 15,89,192 ; mulps %xmm0,%xmm0 1809 DB 68,15,89,194 ; mulps %xmm2,%xmm8 1810 DB 69,15,88,197 ; addps %xmm13,%xmm8 1811 DB 68,15,89,192 ; mulps %xmm0,%xmm8 1812 DB 69,15,88,196 ; addps %xmm12,%xmm8 1813 DB 65,15,194,214,1 ; cmpltps %xmm14,%xmm2 1814 DB 15,40,194 ; movaps %xmm2,%xmm0 1815 DB 102,69,15,56,20,195 ; blendvps %xmm0,%xmm11,%xmm8 1816 DB 72,173 ; lods %ds:(%rsi),%rax 1817 DB 65,15,40,193 ; movaps %xmm9,%xmm0 1818 DB 65,15,40,202 ; movaps %xmm10,%xmm1 1819 DB 65,15,40,208 ; movaps %xmm8,%xmm2 1820 DB 255,224 ; jmpq *%rax 1821 1822PUBLIC _sk_to_srgb_sse41 1823_sk_to_srgb_sse41 LABEL PROC 1824 DB 72,131,236,24 ; sub $0x18,%rsp 1825 DB 15,41,60,36 ; movaps %xmm7,(%rsp) 1826 DB 15,40,254 ; movaps %xmm6,%xmm7 1827 DB 15,40,245 ; movaps %xmm5,%xmm6 1828 DB 15,40,236 ; movaps %xmm4,%xmm5 1829 DB 15,40,227 ; movaps %xmm3,%xmm4 1830 DB 68,15,40,194 ; movaps %xmm2,%xmm8 1831 DB 15,40,217 ; movaps %xmm1,%xmm3 1832 DB 15,82,208 ; rsqrtps %xmm0,%xmm2 1833 DB 68,15,83,202 ; rcpps %xmm2,%xmm9 1834 DB 68,15,82,210 ; rsqrtps %xmm2,%xmm10 1835 DB 243,15,16,18 ; movss (%rdx),%xmm2 1836 DB 243,68,15,16,90,72 ; movss 0x48(%rdx),%xmm11 1837 DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 1838 DB 65,15,40,203 ; movaps %xmm11,%xmm1 1839 DB 15,89,200 ; mulps %xmm0,%xmm1 1840 DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2 1841 DB 243,68,15,16,98,76 ; movss 0x4c(%rdx),%xmm12 1842 DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12 1843 DB 243,68,15,16,106,80 ; movss 0x50(%rdx),%xmm13 1844 DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13 1845 DB 243,68,15,16,114,84 ; movss 0x54(%rdx),%xmm14 1846 DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14 1847 DB 69,15,89,205 ; mulps %xmm13,%xmm9 1848 DB 69,15,88,206 ; addps %xmm14,%xmm9 1849 DB 69,15,89,212 ; mulps %xmm12,%xmm10 1850 DB 69,15,88,209 ; addps %xmm9,%xmm10 1851 DB 68,15,40,202 ; movaps %xmm2,%xmm9 1852 DB 69,15,93,202 ; minps %xmm10,%xmm9 1853 DB 243,68,15,16,122,88 ; movss 0x58(%rdx),%xmm15 1854 DB 69,15,198,255,0 ; shufps $0x0,%xmm15,%xmm15 1855 DB 65,15,194,199,1 ; cmpltps %xmm15,%xmm0 1856 DB 102,68,15,56,20,201 ; blendvps %xmm0,%xmm1,%xmm9 1857 DB 15,82,195 ; rsqrtps %xmm3,%xmm0 1858 DB 15,83,200 ; rcpps %xmm0,%xmm1 1859 DB 15,82,192 ; rsqrtps %xmm0,%xmm0 1860 DB 65,15,89,205 ; mulps %xmm13,%xmm1 1861 DB 65,15,88,206 ; addps %xmm14,%xmm1 1862 DB 65,15,89,196 ; mulps %xmm12,%xmm0 1863 DB 15,88,193 ; addps %xmm1,%xmm0 1864 DB 68,15,40,210 ; movaps %xmm2,%xmm10 1865 DB 68,15,93,208 ; minps %xmm0,%xmm10 1866 DB 65,15,40,203 ; movaps %xmm11,%xmm1 1867 DB 15,89,203 ; mulps %xmm3,%xmm1 1868 DB 65,15,194,223,1 ; cmpltps %xmm15,%xmm3 1869 DB 15,40,195 ; movaps %xmm3,%xmm0 1870 DB 102,68,15,56,20,209 ; blendvps %xmm0,%xmm1,%xmm10 1871 DB 65,15,82,192 ; rsqrtps %xmm8,%xmm0 1872 DB 15,83,200 ; rcpps %xmm0,%xmm1 1873 DB 65,15,89,205 ; mulps %xmm13,%xmm1 1874 DB 65,15,88,206 ; addps %xmm14,%xmm1 1875 DB 15,82,192 ; rsqrtps %xmm0,%xmm0 1876 DB 65,15,89,196 ; mulps %xmm12,%xmm0 1877 DB 15,88,193 ; addps %xmm1,%xmm0 1878 DB 15,93,208 ; minps %xmm0,%xmm2 1879 DB 69,15,89,216 ; mulps %xmm8,%xmm11 1880 DB 69,15,194,199,1 ; cmpltps %xmm15,%xmm8 1881 DB 65,15,40,192 ; movaps %xmm8,%xmm0 1882 DB 102,65,15,56,20,211 ; blendvps %xmm0,%xmm11,%xmm2 1883 DB 72,173 ; lods %ds:(%rsi),%rax 1884 DB 72,173 ; lods %ds:(%rsi),%rax 1885 DB 65,15,40,193 ; movaps %xmm9,%xmm0 1886 DB 65,15,40,202 ; movaps %xmm10,%xmm1 1887 DB 15,40,220 ; movaps %xmm4,%xmm3 1888 DB 15,40,229 ; movaps %xmm5,%xmm4 1889 DB 15,40,238 ; movaps %xmm6,%xmm5 1890 DB 15,40,247 ; movaps %xmm7,%xmm6 1891 DB 15,40,60,36 ; movaps (%rsp),%xmm7 1892 DB 72,131,196,24 ; add $0x18,%rsp 1893 DB 255,224 ; jmpq *%rax 1894 1895PUBLIC _sk_scale_u8_sse41 1896_sk_scale_u8_sse41 LABEL PROC 1897 DB 72,173 ; lods %ds:(%rsi),%rax 1898 DB 72,139,0 ; mov (%rax),%rax 1899 DB 102,68,15,56,49,4,56 ; pmovzxbd (%rax,%rdi,1),%xmm8 1900 DB 69,15,91,192 ; cvtdq2ps %xmm8,%xmm8 1901 DB 243,68,15,16,74,12 ; movss 0xc(%rdx),%xmm9 1902 DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9 1903 DB 69,15,89,200 ; mulps %xmm8,%xmm9 1904 DB 65,15,89,193 ; mulps %xmm9,%xmm0 1905 DB 65,15,89,201 ; mulps %xmm9,%xmm1 1906 DB 65,15,89,209 ; mulps %xmm9,%xmm2 1907 DB 65,15,89,217 ; mulps %xmm9,%xmm3 1908 DB 72,173 ; lods %ds:(%rsi),%rax 1909 DB 255,224 ; jmpq *%rax 1910 1911PUBLIC _sk_lerp_u8_sse41 1912_sk_lerp_u8_sse41 LABEL PROC 1913 DB 72,173 ; lods %ds:(%rsi),%rax 1914 DB 72,139,0 ; mov (%rax),%rax 1915 DB 102,68,15,56,49,4,56 ; pmovzxbd (%rax,%rdi,1),%xmm8 1916 DB 69,15,91,192 ; cvtdq2ps %xmm8,%xmm8 1917 DB 243,68,15,16,74,12 ; movss 0xc(%rdx),%xmm9 1918 DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9 1919 DB 69,15,89,200 ; mulps %xmm8,%xmm9 1920 DB 15,92,196 ; subps %xmm4,%xmm0 1921 DB 65,15,89,193 ; mulps %xmm9,%xmm0 1922 DB 15,88,196 ; addps %xmm4,%xmm0 1923 DB 15,92,205 ; subps %xmm5,%xmm1 1924 DB 65,15,89,201 ; mulps %xmm9,%xmm1 1925 DB 15,88,205 ; addps %xmm5,%xmm1 1926 DB 15,92,214 ; subps %xmm6,%xmm2 1927 DB 65,15,89,209 ; mulps %xmm9,%xmm2 1928 DB 15,88,214 ; addps %xmm6,%xmm2 1929 DB 15,92,223 ; subps %xmm7,%xmm3 1930 DB 65,15,89,217 ; mulps %xmm9,%xmm3 1931 DB 15,88,223 ; addps %xmm7,%xmm3 1932 DB 72,173 ; lods %ds:(%rsi),%rax 1933 DB 255,224 ; jmpq *%rax 1934 1935PUBLIC _sk_load_tables_sse41 1936_sk_load_tables_sse41 LABEL PROC 1937 DB 72,173 ; lods %ds:(%rsi),%rax 1938 DB 72,139,8 ; mov (%rax),%rcx 1939 DB 76,139,64,8 ; mov 0x8(%rax),%r8 1940 DB 243,68,15,111,4,185 ; movdqu (%rcx,%rdi,4),%xmm8 1941 DB 102,15,110,66,16 ; movd 0x10(%rdx),%xmm0 1942 DB 102,15,112,192,0 ; pshufd $0x0,%xmm0,%xmm0 1943 DB 102,65,15,111,200 ; movdqa %xmm8,%xmm1 1944 DB 102,15,114,209,8 ; psrld $0x8,%xmm1 1945 DB 102,15,219,200 ; pand %xmm0,%xmm1 1946 DB 102,65,15,111,208 ; movdqa %xmm8,%xmm2 1947 DB 102,15,114,210,16 ; psrld $0x10,%xmm2 1948 DB 102,15,219,208 ; pand %xmm0,%xmm2 1949 DB 102,65,15,219,192 ; pand %xmm8,%xmm0 1950 DB 102,72,15,58,22,193,1 ; pextrq $0x1,%xmm0,%rcx 1951 DB 65,137,201 ; mov %ecx,%r9d 1952 DB 72,193,233,32 ; shr $0x20,%rcx 1953 DB 102,73,15,126,194 ; movq %xmm0,%r10 1954 DB 69,137,211 ; mov %r10d,%r11d 1955 DB 73,193,234,32 ; shr $0x20,%r10 1956 DB 243,67,15,16,4,152 ; movss (%r8,%r11,4),%xmm0 1957 DB 102,67,15,58,33,4,144,16 ; insertps $0x10,(%r8,%r10,4),%xmm0 1958 DB 102,67,15,58,33,4,136,32 ; insertps $0x20,(%r8,%r9,4),%xmm0 1959 DB 102,65,15,58,33,4,136,48 ; insertps $0x30,(%r8,%rcx,4),%xmm0 1960 DB 72,139,72,16 ; mov 0x10(%rax),%rcx 1961 DB 102,73,15,58,22,200,1 ; pextrq $0x1,%xmm1,%r8 1962 DB 69,137,193 ; mov %r8d,%r9d 1963 DB 73,193,232,32 ; shr $0x20,%r8 1964 DB 102,73,15,126,202 ; movq %xmm1,%r10 1965 DB 69,137,211 ; mov %r10d,%r11d 1966 DB 73,193,234,32 ; shr $0x20,%r10 1967 DB 243,66,15,16,12,153 ; movss (%rcx,%r11,4),%xmm1 1968 DB 102,66,15,58,33,12,145,16 ; insertps $0x10,(%rcx,%r10,4),%xmm1 1969 DB 243,66,15,16,28,137 ; movss (%rcx,%r9,4),%xmm3 1970 DB 102,15,58,33,203,32 ; insertps $0x20,%xmm3,%xmm1 1971 DB 243,66,15,16,28,129 ; movss (%rcx,%r8,4),%xmm3 1972 DB 102,15,58,33,203,48 ; insertps $0x30,%xmm3,%xmm1 1973 DB 72,139,64,24 ; mov 0x18(%rax),%rax 1974 DB 102,72,15,58,22,209,1 ; pextrq $0x1,%xmm2,%rcx 1975 DB 65,137,200 ; mov %ecx,%r8d 1976 DB 72,193,233,32 ; shr $0x20,%rcx 1977 DB 102,73,15,126,209 ; movq %xmm2,%r9 1978 DB 69,137,202 ; mov %r9d,%r10d 1979 DB 73,193,233,32 ; shr $0x20,%r9 1980 DB 243,66,15,16,20,144 ; movss (%rax,%r10,4),%xmm2 1981 DB 102,66,15,58,33,20,136,16 ; insertps $0x10,(%rax,%r9,4),%xmm2 1982 DB 243,66,15,16,28,128 ; movss (%rax,%r8,4),%xmm3 1983 DB 102,15,58,33,211,32 ; insertps $0x20,%xmm3,%xmm2 1984 DB 243,15,16,28,136 ; movss (%rax,%rcx,4),%xmm3 1985 DB 102,15,58,33,211,48 ; insertps $0x30,%xmm3,%xmm2 1986 DB 102,65,15,114,208,24 ; psrld $0x18,%xmm8 1987 DB 69,15,91,192 ; cvtdq2ps %xmm8,%xmm8 1988 DB 243,15,16,90,12 ; movss 0xc(%rdx),%xmm3 1989 DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3 1990 DB 65,15,89,216 ; mulps %xmm8,%xmm3 1991 DB 72,173 ; lods %ds:(%rsi),%rax 1992 DB 255,224 ; jmpq *%rax 1993 1994PUBLIC _sk_load_565_sse41 1995_sk_load_565_sse41 LABEL PROC 1996 DB 72,173 ; lods %ds:(%rsi),%rax 1997 DB 72,139,0 ; mov (%rax),%rax 1998 DB 102,68,15,56,51,12,120 ; pmovzxwd (%rax,%rdi,2),%xmm9 1999 DB 102,15,110,66,104 ; movd 0x68(%rdx),%xmm0 2000 DB 102,15,112,192,0 ; pshufd $0x0,%xmm0,%xmm0 2001 DB 102,65,15,219,193 ; pand %xmm9,%xmm0 2002 DB 15,91,200 ; cvtdq2ps %xmm0,%xmm1 2003 DB 243,15,16,26 ; movss (%rdx),%xmm3 2004 DB 243,15,16,66,116 ; movss 0x74(%rdx),%xmm0 2005 DB 15,198,192,0 ; shufps $0x0,%xmm0,%xmm0 2006 DB 15,89,193 ; mulps %xmm1,%xmm0 2007 DB 102,15,110,74,108 ; movd 0x6c(%rdx),%xmm1 2008 DB 102,15,112,201,0 ; pshufd $0x0,%xmm1,%xmm1 2009 DB 102,65,15,219,201 ; pand %xmm9,%xmm1 2010 DB 68,15,91,193 ; cvtdq2ps %xmm1,%xmm8 2011 DB 243,15,16,74,120 ; movss 0x78(%rdx),%xmm1 2012 DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1 2013 DB 65,15,89,200 ; mulps %xmm8,%xmm1 2014 DB 102,15,110,82,112 ; movd 0x70(%rdx),%xmm2 2015 DB 102,15,112,210,0 ; pshufd $0x0,%xmm2,%xmm2 2016 DB 102,65,15,219,209 ; pand %xmm9,%xmm2 2017 DB 68,15,91,194 ; cvtdq2ps %xmm2,%xmm8 2018 DB 243,15,16,82,124 ; movss 0x7c(%rdx),%xmm2 2019 DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2 2020 DB 65,15,89,208 ; mulps %xmm8,%xmm2 2021 DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3 2022 DB 72,173 ; lods %ds:(%rsi),%rax 2023 DB 255,224 ; jmpq *%rax 2024 2025PUBLIC _sk_store_565_sse41 2026_sk_store_565_sse41 LABEL PROC 2027 DB 72,173 ; lods %ds:(%rsi),%rax 2028 DB 72,139,0 ; mov (%rax),%rax 2029 DB 243,68,15,16,130,128,0,0,0 ; movss 0x80(%rdx),%xmm8 2030 DB 243,68,15,16,138,132,0,0,0 ; movss 0x84(%rdx),%xmm9 2031 DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 2032 DB 69,15,40,208 ; movaps %xmm8,%xmm10 2033 DB 68,15,89,208 ; mulps %xmm0,%xmm10 2034 DB 102,69,15,91,210 ; cvtps2dq %xmm10,%xmm10 2035 DB 102,65,15,114,242,11 ; pslld $0xb,%xmm10 2036 DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9 2037 DB 68,15,89,201 ; mulps %xmm1,%xmm9 2038 DB 102,69,15,91,201 ; cvtps2dq %xmm9,%xmm9 2039 DB 102,65,15,114,241,5 ; pslld $0x5,%xmm9 2040 DB 102,69,15,235,202 ; por %xmm10,%xmm9 2041 DB 68,15,89,194 ; mulps %xmm2,%xmm8 2042 DB 102,69,15,91,192 ; cvtps2dq %xmm8,%xmm8 2043 DB 102,69,15,86,193 ; orpd %xmm9,%xmm8 2044 DB 102,69,15,56,43,192 ; packusdw %xmm8,%xmm8 2045 DB 102,68,15,214,4,120 ; movq %xmm8,(%rax,%rdi,2) 2046 DB 72,173 ; lods %ds:(%rsi),%rax 2047 DB 255,224 ; jmpq *%rax 2048 2049PUBLIC _sk_load_8888_sse41 2050_sk_load_8888_sse41 LABEL PROC 2051 DB 72,173 ; lods %ds:(%rsi),%rax 2052 DB 72,139,0 ; mov (%rax),%rax 2053 DB 243,15,111,28,184 ; movdqu (%rax,%rdi,4),%xmm3 2054 DB 102,15,110,66,16 ; movd 0x10(%rdx),%xmm0 2055 DB 102,15,112,192,0 ; pshufd $0x0,%xmm0,%xmm0 2056 DB 102,15,111,203 ; movdqa %xmm3,%xmm1 2057 DB 102,15,114,209,8 ; psrld $0x8,%xmm1 2058 DB 102,15,219,200 ; pand %xmm0,%xmm1 2059 DB 102,15,111,211 ; movdqa %xmm3,%xmm2 2060 DB 102,15,114,210,16 ; psrld $0x10,%xmm2 2061 DB 102,15,219,208 ; pand %xmm0,%xmm2 2062 DB 102,15,219,195 ; pand %xmm3,%xmm0 2063 DB 15,91,192 ; cvtdq2ps %xmm0,%xmm0 2064 DB 243,68,15,16,66,12 ; movss 0xc(%rdx),%xmm8 2065 DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 2066 DB 65,15,89,192 ; mulps %xmm8,%xmm0 2067 DB 15,91,201 ; cvtdq2ps %xmm1,%xmm1 2068 DB 65,15,89,200 ; mulps %xmm8,%xmm1 2069 DB 15,91,210 ; cvtdq2ps %xmm2,%xmm2 2070 DB 65,15,89,208 ; mulps %xmm8,%xmm2 2071 DB 102,15,114,211,24 ; psrld $0x18,%xmm3 2072 DB 15,91,219 ; cvtdq2ps %xmm3,%xmm3 2073 DB 65,15,89,216 ; mulps %xmm8,%xmm3 2074 DB 72,173 ; lods %ds:(%rsi),%rax 2075 DB 255,224 ; jmpq *%rax 2076 2077PUBLIC _sk_store_8888_sse41 2078_sk_store_8888_sse41 LABEL PROC 2079 DB 72,173 ; lods %ds:(%rsi),%rax 2080 DB 72,139,0 ; mov (%rax),%rax 2081 DB 243,68,15,16,66,8 ; movss 0x8(%rdx),%xmm8 2082 DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 2083 DB 69,15,40,200 ; movaps %xmm8,%xmm9 2084 DB 68,15,89,200 ; mulps %xmm0,%xmm9 2085 DB 102,69,15,91,201 ; cvtps2dq %xmm9,%xmm9 2086 DB 69,15,40,208 ; movaps %xmm8,%xmm10 2087 DB 68,15,89,209 ; mulps %xmm1,%xmm10 2088 DB 102,69,15,91,210 ; cvtps2dq %xmm10,%xmm10 2089 DB 102,65,15,114,242,8 ; pslld $0x8,%xmm10 2090 DB 102,69,15,235,209 ; por %xmm9,%xmm10 2091 DB 69,15,40,200 ; movaps %xmm8,%xmm9 2092 DB 68,15,89,202 ; mulps %xmm2,%xmm9 2093 DB 102,69,15,91,201 ; cvtps2dq %xmm9,%xmm9 2094 DB 102,65,15,114,241,16 ; pslld $0x10,%xmm9 2095 DB 68,15,89,195 ; mulps %xmm3,%xmm8 2096 DB 102,69,15,91,192 ; cvtps2dq %xmm8,%xmm8 2097 DB 102,65,15,114,240,24 ; pslld $0x18,%xmm8 2098 DB 102,69,15,235,193 ; por %xmm9,%xmm8 2099 DB 102,69,15,235,194 ; por %xmm10,%xmm8 2100 DB 243,68,15,127,4,184 ; movdqu %xmm8,(%rax,%rdi,4) 2101 DB 72,173 ; lods %ds:(%rsi),%rax 2102 DB 255,224 ; jmpq *%rax 2103 2104PUBLIC _sk_load_f16_sse41 2105_sk_load_f16_sse41 LABEL PROC 2106 DB 72,173 ; lods %ds:(%rsi),%rax 2107 DB 72,139,0 ; mov (%rax),%rax 2108 DB 243,15,111,4,248 ; movdqu (%rax,%rdi,8),%xmm0 2109 DB 243,15,111,76,248,16 ; movdqu 0x10(%rax,%rdi,8),%xmm1 2110 DB 102,15,111,208 ; movdqa %xmm0,%xmm2 2111 DB 102,15,97,209 ; punpcklwd %xmm1,%xmm2 2112 DB 102,15,105,193 ; punpckhwd %xmm1,%xmm0 2113 DB 102,68,15,111,194 ; movdqa %xmm2,%xmm8 2114 DB 102,68,15,97,192 ; punpcklwd %xmm0,%xmm8 2115 DB 102,15,105,208 ; punpckhwd %xmm0,%xmm2 2116 DB 102,15,110,66,100 ; movd 0x64(%rdx),%xmm0 2117 DB 102,15,112,216,0 ; pshufd $0x0,%xmm0,%xmm3 2118 DB 102,15,111,203 ; movdqa %xmm3,%xmm1 2119 DB 102,65,15,101,200 ; pcmpgtw %xmm8,%xmm1 2120 DB 102,65,15,223,200 ; pandn %xmm8,%xmm1 2121 DB 102,15,101,218 ; pcmpgtw %xmm2,%xmm3 2122 DB 102,15,223,218 ; pandn %xmm2,%xmm3 2123 DB 102,15,56,51,193 ; pmovzxwd %xmm1,%xmm0 2124 DB 102,15,114,240,13 ; pslld $0xd,%xmm0 2125 DB 102,15,110,82,92 ; movd 0x5c(%rdx),%xmm2 2126 DB 102,68,15,112,194,0 ; pshufd $0x0,%xmm2,%xmm8 2127 DB 65,15,89,192 ; mulps %xmm8,%xmm0 2128 DB 102,69,15,239,201 ; pxor %xmm9,%xmm9 2129 DB 102,65,15,105,201 ; punpckhwd %xmm9,%xmm1 2130 DB 102,15,114,241,13 ; pslld $0xd,%xmm1 2131 DB 65,15,89,200 ; mulps %xmm8,%xmm1 2132 DB 102,15,56,51,211 ; pmovzxwd %xmm3,%xmm2 2133 DB 102,15,114,242,13 ; pslld $0xd,%xmm2 2134 DB 65,15,89,208 ; mulps %xmm8,%xmm2 2135 DB 102,65,15,105,217 ; punpckhwd %xmm9,%xmm3 2136 DB 102,15,114,243,13 ; pslld $0xd,%xmm3 2137 DB 65,15,89,216 ; mulps %xmm8,%xmm3 2138 DB 72,173 ; lods %ds:(%rsi),%rax 2139 DB 255,224 ; jmpq *%rax 2140 2141PUBLIC _sk_store_f16_sse41 2142_sk_store_f16_sse41 LABEL PROC 2143 DB 72,173 ; lods %ds:(%rsi),%rax 2144 DB 72,139,0 ; mov (%rax),%rax 2145 DB 102,68,15,110,66,96 ; movd 0x60(%rdx),%xmm8 2146 DB 102,69,15,112,192,0 ; pshufd $0x0,%xmm8,%xmm8 2147 DB 102,69,15,111,200 ; movdqa %xmm8,%xmm9 2148 DB 68,15,89,200 ; mulps %xmm0,%xmm9 2149 DB 102,65,15,114,209,13 ; psrld $0xd,%xmm9 2150 DB 102,69,15,111,208 ; movdqa %xmm8,%xmm10 2151 DB 68,15,89,209 ; mulps %xmm1,%xmm10 2152 DB 102,65,15,114,210,13 ; psrld $0xd,%xmm10 2153 DB 102,69,15,111,216 ; movdqa %xmm8,%xmm11 2154 DB 68,15,89,218 ; mulps %xmm2,%xmm11 2155 DB 102,65,15,114,211,13 ; psrld $0xd,%xmm11 2156 DB 68,15,89,195 ; mulps %xmm3,%xmm8 2157 DB 102,65,15,114,208,13 ; psrld $0xd,%xmm8 2158 DB 102,65,15,115,250,2 ; pslldq $0x2,%xmm10 2159 DB 102,69,15,235,209 ; por %xmm9,%xmm10 2160 DB 102,65,15,115,248,2 ; pslldq $0x2,%xmm8 2161 DB 102,69,15,235,195 ; por %xmm11,%xmm8 2162 DB 102,69,15,111,202 ; movdqa %xmm10,%xmm9 2163 DB 102,69,15,98,200 ; punpckldq %xmm8,%xmm9 2164 DB 243,68,15,127,12,248 ; movdqu %xmm9,(%rax,%rdi,8) 2165 DB 102,69,15,106,208 ; punpckhdq %xmm8,%xmm10 2166 DB 243,68,15,127,84,248,16 ; movdqu %xmm10,0x10(%rax,%rdi,8) 2167 DB 72,173 ; lods %ds:(%rsi),%rax 2168 DB 255,224 ; jmpq *%rax 2169 2170PUBLIC _sk_clamp_x_sse41 2171_sk_clamp_x_sse41 LABEL PROC 2172 DB 72,173 ; lods %ds:(%rsi),%rax 2173 DB 243,68,15,16,0 ; movss (%rax),%xmm8 2174 DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 2175 DB 102,69,15,118,201 ; pcmpeqd %xmm9,%xmm9 2176 DB 102,69,15,254,200 ; paddd %xmm8,%xmm9 2177 DB 65,15,93,193 ; minps %xmm9,%xmm0 2178 DB 69,15,87,192 ; xorps %xmm8,%xmm8 2179 DB 68,15,95,192 ; maxps %xmm0,%xmm8 2180 DB 72,173 ; lods %ds:(%rsi),%rax 2181 DB 65,15,40,192 ; movaps %xmm8,%xmm0 2182 DB 255,224 ; jmpq *%rax 2183 2184PUBLIC _sk_clamp_y_sse41 2185_sk_clamp_y_sse41 LABEL PROC 2186 DB 72,173 ; lods %ds:(%rsi),%rax 2187 DB 243,68,15,16,0 ; movss (%rax),%xmm8 2188 DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 2189 DB 102,69,15,118,201 ; pcmpeqd %xmm9,%xmm9 2190 DB 102,69,15,254,200 ; paddd %xmm8,%xmm9 2191 DB 65,15,93,201 ; minps %xmm9,%xmm1 2192 DB 69,15,87,192 ; xorps %xmm8,%xmm8 2193 DB 68,15,95,193 ; maxps %xmm1,%xmm8 2194 DB 72,173 ; lods %ds:(%rsi),%rax 2195 DB 65,15,40,200 ; movaps %xmm8,%xmm1 2196 DB 255,224 ; jmpq *%rax 2197 2198PUBLIC _sk_matrix_2x3_sse41 2199_sk_matrix_2x3_sse41 LABEL PROC 2200 DB 68,15,40,201 ; movaps %xmm1,%xmm9 2201 DB 68,15,40,192 ; movaps %xmm0,%xmm8 2202 DB 72,173 ; lods %ds:(%rsi),%rax 2203 DB 243,15,16,0 ; movss (%rax),%xmm0 2204 DB 243,15,16,72,4 ; movss 0x4(%rax),%xmm1 2205 DB 15,198,192,0 ; shufps $0x0,%xmm0,%xmm0 2206 DB 243,68,15,16,80,8 ; movss 0x8(%rax),%xmm10 2207 DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 2208 DB 243,68,15,16,88,16 ; movss 0x10(%rax),%xmm11 2209 DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 2210 DB 69,15,89,209 ; mulps %xmm9,%xmm10 2211 DB 69,15,88,211 ; addps %xmm11,%xmm10 2212 DB 65,15,89,192 ; mulps %xmm8,%xmm0 2213 DB 65,15,88,194 ; addps %xmm10,%xmm0 2214 DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1 2215 DB 243,68,15,16,80,12 ; movss 0xc(%rax),%xmm10 2216 DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 2217 DB 243,68,15,16,88,20 ; movss 0x14(%rax),%xmm11 2218 DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 2219 DB 69,15,89,209 ; mulps %xmm9,%xmm10 2220 DB 69,15,88,211 ; addps %xmm11,%xmm10 2221 DB 65,15,89,200 ; mulps %xmm8,%xmm1 2222 DB 65,15,88,202 ; addps %xmm10,%xmm1 2223 DB 72,173 ; lods %ds:(%rsi),%rax 2224 DB 255,224 ; jmpq *%rax 2225 2226PUBLIC _sk_matrix_3x4_sse41 2227_sk_matrix_3x4_sse41 LABEL PROC 2228 DB 68,15,40,201 ; movaps %xmm1,%xmm9 2229 DB 68,15,40,192 ; movaps %xmm0,%xmm8 2230 DB 72,173 ; lods %ds:(%rsi),%rax 2231 DB 243,15,16,0 ; movss (%rax),%xmm0 2232 DB 243,15,16,72,4 ; movss 0x4(%rax),%xmm1 2233 DB 15,198,192,0 ; shufps $0x0,%xmm0,%xmm0 2234 DB 243,68,15,16,80,12 ; movss 0xc(%rax),%xmm10 2235 DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 2236 DB 243,68,15,16,88,24 ; movss 0x18(%rax),%xmm11 2237 DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 2238 DB 243,68,15,16,96,36 ; movss 0x24(%rax),%xmm12 2239 DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12 2240 DB 68,15,89,218 ; mulps %xmm2,%xmm11 2241 DB 69,15,88,220 ; addps %xmm12,%xmm11 2242 DB 69,15,89,209 ; mulps %xmm9,%xmm10 2243 DB 69,15,88,211 ; addps %xmm11,%xmm10 2244 DB 65,15,89,192 ; mulps %xmm8,%xmm0 2245 DB 65,15,88,194 ; addps %xmm10,%xmm0 2246 DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1 2247 DB 243,68,15,16,80,16 ; movss 0x10(%rax),%xmm10 2248 DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 2249 DB 243,68,15,16,88,28 ; movss 0x1c(%rax),%xmm11 2250 DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 2251 DB 243,68,15,16,96,40 ; movss 0x28(%rax),%xmm12 2252 DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12 2253 DB 68,15,89,218 ; mulps %xmm2,%xmm11 2254 DB 69,15,88,220 ; addps %xmm12,%xmm11 2255 DB 69,15,89,209 ; mulps %xmm9,%xmm10 2256 DB 69,15,88,211 ; addps %xmm11,%xmm10 2257 DB 65,15,89,200 ; mulps %xmm8,%xmm1 2258 DB 65,15,88,202 ; addps %xmm10,%xmm1 2259 DB 243,68,15,16,80,8 ; movss 0x8(%rax),%xmm10 2260 DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 2261 DB 243,68,15,16,88,20 ; movss 0x14(%rax),%xmm11 2262 DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 2263 DB 243,68,15,16,96,32 ; movss 0x20(%rax),%xmm12 2264 DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12 2265 DB 243,68,15,16,104,44 ; movss 0x2c(%rax),%xmm13 2266 DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13 2267 DB 68,15,89,226 ; mulps %xmm2,%xmm12 2268 DB 69,15,88,229 ; addps %xmm13,%xmm12 2269 DB 69,15,89,217 ; mulps %xmm9,%xmm11 2270 DB 69,15,88,220 ; addps %xmm12,%xmm11 2271 DB 69,15,89,208 ; mulps %xmm8,%xmm10 2272 DB 69,15,88,211 ; addps %xmm11,%xmm10 2273 DB 72,173 ; lods %ds:(%rsi),%rax 2274 DB 65,15,40,210 ; movaps %xmm10,%xmm2 2275 DB 255,224 ; jmpq *%rax 2276 2277PUBLIC _sk_linear_gradient_2stops_sse41 2278_sk_linear_gradient_2stops_sse41 LABEL PROC 2279 DB 72,173 ; lods %ds:(%rsi),%rax 2280 DB 68,15,16,8 ; movups (%rax),%xmm9 2281 DB 15,16,88,16 ; movups 0x10(%rax),%xmm3 2282 DB 68,15,40,195 ; movaps %xmm3,%xmm8 2283 DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 2284 DB 65,15,40,201 ; movaps %xmm9,%xmm1 2285 DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1 2286 DB 68,15,89,192 ; mulps %xmm0,%xmm8 2287 DB 68,15,88,193 ; addps %xmm1,%xmm8 2288 DB 15,40,203 ; movaps %xmm3,%xmm1 2289 DB 15,198,201,85 ; shufps $0x55,%xmm1,%xmm1 2290 DB 65,15,40,209 ; movaps %xmm9,%xmm2 2291 DB 15,198,210,85 ; shufps $0x55,%xmm2,%xmm2 2292 DB 15,89,200 ; mulps %xmm0,%xmm1 2293 DB 15,88,202 ; addps %xmm2,%xmm1 2294 DB 15,40,211 ; movaps %xmm3,%xmm2 2295 DB 15,198,210,170 ; shufps $0xaa,%xmm2,%xmm2 2296 DB 69,15,40,209 ; movaps %xmm9,%xmm10 2297 DB 69,15,198,210,170 ; shufps $0xaa,%xmm10,%xmm10 2298 DB 15,89,208 ; mulps %xmm0,%xmm2 2299 DB 65,15,88,210 ; addps %xmm10,%xmm2 2300 DB 15,198,219,255 ; shufps $0xff,%xmm3,%xmm3 2301 DB 69,15,198,201,255 ; shufps $0xff,%xmm9,%xmm9 2302 DB 15,89,216 ; mulps %xmm0,%xmm3 2303 DB 65,15,88,217 ; addps %xmm9,%xmm3 2304 DB 72,173 ; lods %ds:(%rsi),%rax 2305 DB 65,15,40,192 ; movaps %xmm8,%xmm0 2306 DB 255,224 ; jmpq *%rax 2307 2308PUBLIC _sk_start_pipeline_sse2 2309_sk_start_pipeline_sse2 LABEL PROC 2310 DB 65,87 ; push %r15 2311 DB 65,86 ; push %r14 2312 DB 65,85 ; push %r13 2313 DB 65,84 ; push %r12 2314 DB 86 ; push %rsi 2315 DB 87 ; push %rdi 2316 DB 83 ; push %rbx 2317 DB 72,129,236,160,0,0,0 ; sub $0xa0,%rsp 2318 DB 68,15,41,188,36,144,0,0,0 ; movaps %xmm15,0x90(%rsp) 2319 DB 68,15,41,180,36,128,0,0,0 ; movaps %xmm14,0x80(%rsp) 2320 DB 68,15,41,108,36,112 ; movaps %xmm13,0x70(%rsp) 2321 DB 68,15,41,100,36,96 ; movaps %xmm12,0x60(%rsp) 2322 DB 68,15,41,92,36,80 ; movaps %xmm11,0x50(%rsp) 2323 DB 68,15,41,84,36,64 ; movaps %xmm10,0x40(%rsp) 2324 DB 68,15,41,76,36,48 ; movaps %xmm9,0x30(%rsp) 2325 DB 68,15,41,68,36,32 ; movaps %xmm8,0x20(%rsp) 2326 DB 15,41,124,36,16 ; movaps %xmm7,0x10(%rsp) 2327 DB 15,41,52,36 ; movaps %xmm6,(%rsp) 2328 DB 77,137,207 ; mov %r9,%r15 2329 DB 77,137,198 ; mov %r8,%r14 2330 DB 72,137,203 ; mov %rcx,%rbx 2331 DB 72,137,214 ; mov %rdx,%rsi 2332 DB 72,173 ; lods %ds:(%rsi),%rax 2333 DB 73,137,196 ; mov %rax,%r12 2334 DB 73,137,245 ; mov %rsi,%r13 2335 DB 72,141,67,4 ; lea 0x4(%rbx),%rax 2336 DB 76,57,248 ; cmp %r15,%rax 2337 DB 118,5 ; jbe 73 <_sk_start_pipeline_sse2+0x73> 2338 DB 72,137,216 ; mov %rbx,%rax 2339 DB 235,52 ; jmp a7 <_sk_start_pipeline_sse2+0xa7> 2340 DB 15,87,192 ; xorps %xmm0,%xmm0 2341 DB 15,87,201 ; xorps %xmm1,%xmm1 2342 DB 15,87,210 ; xorps %xmm2,%xmm2 2343 DB 15,87,219 ; xorps %xmm3,%xmm3 2344 DB 15,87,228 ; xorps %xmm4,%xmm4 2345 DB 15,87,237 ; xorps %xmm5,%xmm5 2346 DB 15,87,246 ; xorps %xmm6,%xmm6 2347 DB 15,87,255 ; xorps %xmm7,%xmm7 2348 DB 72,137,223 ; mov %rbx,%rdi 2349 DB 76,137,238 ; mov %r13,%rsi 2350 DB 76,137,242 ; mov %r14,%rdx 2351 DB 65,255,212 ; callq *%r12 2352 DB 72,141,67,4 ; lea 0x4(%rbx),%rax 2353 DB 72,131,195,8 ; add $0x8,%rbx 2354 DB 76,57,251 ; cmp %r15,%rbx 2355 DB 72,137,195 ; mov %rax,%rbx 2356 DB 118,204 ; jbe 73 <_sk_start_pipeline_sse2+0x73> 2357 DB 15,40,52,36 ; movaps (%rsp),%xmm6 2358 DB 15,40,124,36,16 ; movaps 0x10(%rsp),%xmm7 2359 DB 68,15,40,68,36,32 ; movaps 0x20(%rsp),%xmm8 2360 DB 68,15,40,76,36,48 ; movaps 0x30(%rsp),%xmm9 2361 DB 68,15,40,84,36,64 ; movaps 0x40(%rsp),%xmm10 2362 DB 68,15,40,92,36,80 ; movaps 0x50(%rsp),%xmm11 2363 DB 68,15,40,100,36,96 ; movaps 0x60(%rsp),%xmm12 2364 DB 68,15,40,108,36,112 ; movaps 0x70(%rsp),%xmm13 2365 DB 68,15,40,180,36,128,0,0,0 ; movaps 0x80(%rsp),%xmm14 2366 DB 68,15,40,188,36,144,0,0,0 ; movaps 0x90(%rsp),%xmm15 2367 DB 72,129,196,160,0,0,0 ; add $0xa0,%rsp 2368 DB 91 ; pop %rbx 2369 DB 95 ; pop %rdi 2370 DB 94 ; pop %rsi 2371 DB 65,92 ; pop %r12 2372 DB 65,93 ; pop %r13 2373 DB 65,94 ; pop %r14 2374 DB 65,95 ; pop %r15 2375 DB 195 ; retq 2376 2377PUBLIC _sk_just_return_sse2 2378_sk_just_return_sse2 LABEL PROC 2379 DB 195 ; retq 2380 2381PUBLIC _sk_seed_shader_sse2 2382_sk_seed_shader_sse2 LABEL PROC 2383 DB 72,173 ; lods %ds:(%rsi),%rax 2384 DB 102,15,110,199 ; movd %edi,%xmm0 2385 DB 102,15,112,192,0 ; pshufd $0x0,%xmm0,%xmm0 2386 DB 15,91,200 ; cvtdq2ps %xmm0,%xmm1 2387 DB 243,15,16,18 ; movss (%rdx),%xmm2 2388 DB 243,15,16,90,4 ; movss 0x4(%rdx),%xmm3 2389 DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3 2390 DB 15,88,203 ; addps %xmm3,%xmm1 2391 DB 15,16,66,20 ; movups 0x14(%rdx),%xmm0 2392 DB 15,88,193 ; addps %xmm1,%xmm0 2393 DB 102,15,110,8 ; movd (%rax),%xmm1 2394 DB 102,15,112,201,0 ; pshufd $0x0,%xmm1,%xmm1 2395 DB 15,91,201 ; cvtdq2ps %xmm1,%xmm1 2396 DB 15,88,203 ; addps %xmm3,%xmm1 2397 DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2 2398 DB 72,173 ; lods %ds:(%rsi),%rax 2399 DB 15,87,219 ; xorps %xmm3,%xmm3 2400 DB 15,87,228 ; xorps %xmm4,%xmm4 2401 DB 15,87,237 ; xorps %xmm5,%xmm5 2402 DB 15,87,246 ; xorps %xmm6,%xmm6 2403 DB 15,87,255 ; xorps %xmm7,%xmm7 2404 DB 255,224 ; jmpq *%rax 2405 2406PUBLIC _sk_constant_color_sse2 2407_sk_constant_color_sse2 LABEL PROC 2408 DB 72,173 ; lods %ds:(%rsi),%rax 2409 DB 15,16,24 ; movups (%rax),%xmm3 2410 DB 15,40,195 ; movaps %xmm3,%xmm0 2411 DB 15,198,192,0 ; shufps $0x0,%xmm0,%xmm0 2412 DB 15,40,203 ; movaps %xmm3,%xmm1 2413 DB 15,198,201,85 ; shufps $0x55,%xmm1,%xmm1 2414 DB 15,40,211 ; movaps %xmm3,%xmm2 2415 DB 15,198,210,170 ; shufps $0xaa,%xmm2,%xmm2 2416 DB 15,198,219,255 ; shufps $0xff,%xmm3,%xmm3 2417 DB 72,173 ; lods %ds:(%rsi),%rax 2418 DB 255,224 ; jmpq *%rax 2419 2420PUBLIC _sk_clear_sse2 2421_sk_clear_sse2 LABEL PROC 2422 DB 72,173 ; lods %ds:(%rsi),%rax 2423 DB 72,173 ; lods %ds:(%rsi),%rax 2424 DB 15,87,192 ; xorps %xmm0,%xmm0 2425 DB 15,87,201 ; xorps %xmm1,%xmm1 2426 DB 15,87,210 ; xorps %xmm2,%xmm2 2427 DB 15,87,219 ; xorps %xmm3,%xmm3 2428 DB 255,224 ; jmpq *%rax 2429 2430PUBLIC _sk_plus__sse2 2431_sk_plus__sse2 LABEL PROC 2432 DB 72,173 ; lods %ds:(%rsi),%rax 2433 DB 15,88,196 ; addps %xmm4,%xmm0 2434 DB 15,88,205 ; addps %xmm5,%xmm1 2435 DB 15,88,214 ; addps %xmm6,%xmm2 2436 DB 15,88,223 ; addps %xmm7,%xmm3 2437 DB 72,173 ; lods %ds:(%rsi),%rax 2438 DB 255,224 ; jmpq *%rax 2439 2440PUBLIC _sk_srcover_sse2 2441_sk_srcover_sse2 LABEL PROC 2442 DB 72,173 ; lods %ds:(%rsi),%rax 2443 DB 243,68,15,16,2 ; movss (%rdx),%xmm8 2444 DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 2445 DB 68,15,92,195 ; subps %xmm3,%xmm8 2446 DB 69,15,40,200 ; movaps %xmm8,%xmm9 2447 DB 68,15,89,204 ; mulps %xmm4,%xmm9 2448 DB 65,15,88,193 ; addps %xmm9,%xmm0 2449 DB 69,15,40,200 ; movaps %xmm8,%xmm9 2450 DB 68,15,89,205 ; mulps %xmm5,%xmm9 2451 DB 65,15,88,201 ; addps %xmm9,%xmm1 2452 DB 69,15,40,200 ; movaps %xmm8,%xmm9 2453 DB 68,15,89,206 ; mulps %xmm6,%xmm9 2454 DB 65,15,88,209 ; addps %xmm9,%xmm2 2455 DB 68,15,89,199 ; mulps %xmm7,%xmm8 2456 DB 65,15,88,216 ; addps %xmm8,%xmm3 2457 DB 72,173 ; lods %ds:(%rsi),%rax 2458 DB 255,224 ; jmpq *%rax 2459 2460PUBLIC _sk_dstover_sse2 2461_sk_dstover_sse2 LABEL PROC 2462 DB 72,173 ; lods %ds:(%rsi),%rax 2463 DB 243,68,15,16,2 ; movss (%rdx),%xmm8 2464 DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 2465 DB 68,15,92,199 ; subps %xmm7,%xmm8 2466 DB 65,15,89,192 ; mulps %xmm8,%xmm0 2467 DB 15,88,196 ; addps %xmm4,%xmm0 2468 DB 65,15,89,200 ; mulps %xmm8,%xmm1 2469 DB 15,88,205 ; addps %xmm5,%xmm1 2470 DB 65,15,89,208 ; mulps %xmm8,%xmm2 2471 DB 15,88,214 ; addps %xmm6,%xmm2 2472 DB 65,15,89,216 ; mulps %xmm8,%xmm3 2473 DB 15,88,223 ; addps %xmm7,%xmm3 2474 DB 72,173 ; lods %ds:(%rsi),%rax 2475 DB 255,224 ; jmpq *%rax 2476 2477PUBLIC _sk_clamp_0_sse2 2478_sk_clamp_0_sse2 LABEL PROC 2479 DB 72,173 ; lods %ds:(%rsi),%rax 2480 DB 69,15,87,192 ; xorps %xmm8,%xmm8 2481 DB 65,15,95,192 ; maxps %xmm8,%xmm0 2482 DB 65,15,95,200 ; maxps %xmm8,%xmm1 2483 DB 65,15,95,208 ; maxps %xmm8,%xmm2 2484 DB 65,15,95,216 ; maxps %xmm8,%xmm3 2485 DB 72,173 ; lods %ds:(%rsi),%rax 2486 DB 255,224 ; jmpq *%rax 2487 2488PUBLIC _sk_clamp_1_sse2 2489_sk_clamp_1_sse2 LABEL PROC 2490 DB 72,173 ; lods %ds:(%rsi),%rax 2491 DB 243,68,15,16,2 ; movss (%rdx),%xmm8 2492 DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 2493 DB 65,15,93,192 ; minps %xmm8,%xmm0 2494 DB 65,15,93,200 ; minps %xmm8,%xmm1 2495 DB 65,15,93,208 ; minps %xmm8,%xmm2 2496 DB 65,15,93,216 ; minps %xmm8,%xmm3 2497 DB 72,173 ; lods %ds:(%rsi),%rax 2498 DB 255,224 ; jmpq *%rax 2499 2500PUBLIC _sk_clamp_a_sse2 2501_sk_clamp_a_sse2 LABEL PROC 2502 DB 72,173 ; lods %ds:(%rsi),%rax 2503 DB 243,68,15,16,2 ; movss (%rdx),%xmm8 2504 DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 2505 DB 65,15,93,216 ; minps %xmm8,%xmm3 2506 DB 15,93,195 ; minps %xmm3,%xmm0 2507 DB 15,93,203 ; minps %xmm3,%xmm1 2508 DB 15,93,211 ; minps %xmm3,%xmm2 2509 DB 72,173 ; lods %ds:(%rsi),%rax 2510 DB 255,224 ; jmpq *%rax 2511 2512PUBLIC _sk_set_rgb_sse2 2513_sk_set_rgb_sse2 LABEL PROC 2514 DB 72,173 ; lods %ds:(%rsi),%rax 2515 DB 243,15,16,0 ; movss (%rax),%xmm0 2516 DB 243,15,16,72,4 ; movss 0x4(%rax),%xmm1 2517 DB 15,198,192,0 ; shufps $0x0,%xmm0,%xmm0 2518 DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1 2519 DB 243,15,16,80,8 ; movss 0x8(%rax),%xmm2 2520 DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2 2521 DB 72,173 ; lods %ds:(%rsi),%rax 2522 DB 255,224 ; jmpq *%rax 2523 2524PUBLIC _sk_swap_rb_sse2 2525_sk_swap_rb_sse2 LABEL PROC 2526 DB 68,15,40,192 ; movaps %xmm0,%xmm8 2527 DB 72,173 ; lods %ds:(%rsi),%rax 2528 DB 72,173 ; lods %ds:(%rsi),%rax 2529 DB 15,40,194 ; movaps %xmm2,%xmm0 2530 DB 65,15,40,208 ; movaps %xmm8,%xmm2 2531 DB 255,224 ; jmpq *%rax 2532 2533PUBLIC _sk_swap_sse2 2534_sk_swap_sse2 LABEL PROC 2535 DB 68,15,40,195 ; movaps %xmm3,%xmm8 2536 DB 68,15,40,202 ; movaps %xmm2,%xmm9 2537 DB 68,15,40,209 ; movaps %xmm1,%xmm10 2538 DB 68,15,40,216 ; movaps %xmm0,%xmm11 2539 DB 72,173 ; lods %ds:(%rsi),%rax 2540 DB 72,173 ; lods %ds:(%rsi),%rax 2541 DB 15,40,196 ; movaps %xmm4,%xmm0 2542 DB 15,40,205 ; movaps %xmm5,%xmm1 2543 DB 15,40,214 ; movaps %xmm6,%xmm2 2544 DB 15,40,223 ; movaps %xmm7,%xmm3 2545 DB 65,15,40,227 ; movaps %xmm11,%xmm4 2546 DB 65,15,40,234 ; movaps %xmm10,%xmm5 2547 DB 65,15,40,241 ; movaps %xmm9,%xmm6 2548 DB 65,15,40,248 ; movaps %xmm8,%xmm7 2549 DB 255,224 ; jmpq *%rax 2550 2551PUBLIC _sk_move_src_dst_sse2 2552_sk_move_src_dst_sse2 LABEL PROC 2553 DB 72,173 ; lods %ds:(%rsi),%rax 2554 DB 72,173 ; lods %ds:(%rsi),%rax 2555 DB 15,40,224 ; movaps %xmm0,%xmm4 2556 DB 15,40,233 ; movaps %xmm1,%xmm5 2557 DB 15,40,242 ; movaps %xmm2,%xmm6 2558 DB 15,40,251 ; movaps %xmm3,%xmm7 2559 DB 255,224 ; jmpq *%rax 2560 2561PUBLIC _sk_move_dst_src_sse2 2562_sk_move_dst_src_sse2 LABEL PROC 2563 DB 72,173 ; lods %ds:(%rsi),%rax 2564 DB 72,173 ; lods %ds:(%rsi),%rax 2565 DB 15,40,196 ; movaps %xmm4,%xmm0 2566 DB 15,40,205 ; movaps %xmm5,%xmm1 2567 DB 15,40,214 ; movaps %xmm6,%xmm2 2568 DB 15,40,223 ; movaps %xmm7,%xmm3 2569 DB 255,224 ; jmpq *%rax 2570 2571PUBLIC _sk_premul_sse2 2572_sk_premul_sse2 LABEL PROC 2573 DB 72,173 ; lods %ds:(%rsi),%rax 2574 DB 15,89,195 ; mulps %xmm3,%xmm0 2575 DB 15,89,203 ; mulps %xmm3,%xmm1 2576 DB 15,89,211 ; mulps %xmm3,%xmm2 2577 DB 72,173 ; lods %ds:(%rsi),%rax 2578 DB 255,224 ; jmpq *%rax 2579 2580PUBLIC _sk_unpremul_sse2 2581_sk_unpremul_sse2 LABEL PROC 2582 DB 72,173 ; lods %ds:(%rsi),%rax 2583 DB 69,15,87,192 ; xorps %xmm8,%xmm8 2584 DB 68,15,194,195,0 ; cmpeqps %xmm3,%xmm8 2585 DB 243,68,15,16,10 ; movss (%rdx),%xmm9 2586 DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9 2587 DB 68,15,94,203 ; divps %xmm3,%xmm9 2588 DB 69,15,85,193 ; andnps %xmm9,%xmm8 2589 DB 65,15,89,192 ; mulps %xmm8,%xmm0 2590 DB 65,15,89,200 ; mulps %xmm8,%xmm1 2591 DB 65,15,89,208 ; mulps %xmm8,%xmm2 2592 DB 72,173 ; lods %ds:(%rsi),%rax 2593 DB 255,224 ; jmpq *%rax 2594 2595PUBLIC _sk_from_srgb_sse2 2596_sk_from_srgb_sse2 LABEL PROC 2597 DB 72,173 ; lods %ds:(%rsi),%rax 2598 DB 243,68,15,16,66,64 ; movss 0x40(%rdx),%xmm8 2599 DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 2600 DB 69,15,40,232 ; movaps %xmm8,%xmm13 2601 DB 68,15,89,232 ; mulps %xmm0,%xmm13 2602 DB 68,15,40,224 ; movaps %xmm0,%xmm12 2603 DB 69,15,89,228 ; mulps %xmm12,%xmm12 2604 DB 243,68,15,16,74,60 ; movss 0x3c(%rdx),%xmm9 2605 DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9 2606 DB 243,68,15,16,82,52 ; movss 0x34(%rdx),%xmm10 2607 DB 243,68,15,16,90,56 ; movss 0x38(%rdx),%xmm11 2608 DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 2609 DB 69,15,40,241 ; movaps %xmm9,%xmm14 2610 DB 68,15,89,240 ; mulps %xmm0,%xmm14 2611 DB 69,15,88,243 ; addps %xmm11,%xmm14 2612 DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 2613 DB 69,15,89,244 ; mulps %xmm12,%xmm14 2614 DB 69,15,88,242 ; addps %xmm10,%xmm14 2615 DB 243,68,15,16,98,68 ; movss 0x44(%rdx),%xmm12 2616 DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12 2617 DB 65,15,194,196,1 ; cmpltps %xmm12,%xmm0 2618 DB 68,15,84,232 ; andps %xmm0,%xmm13 2619 DB 65,15,85,198 ; andnps %xmm14,%xmm0 2620 DB 65,15,86,197 ; orps %xmm13,%xmm0 2621 DB 69,15,40,232 ; movaps %xmm8,%xmm13 2622 DB 68,15,89,233 ; mulps %xmm1,%xmm13 2623 DB 68,15,40,241 ; movaps %xmm1,%xmm14 2624 DB 69,15,89,246 ; mulps %xmm14,%xmm14 2625 DB 69,15,40,249 ; movaps %xmm9,%xmm15 2626 DB 68,15,89,249 ; mulps %xmm1,%xmm15 2627 DB 69,15,88,251 ; addps %xmm11,%xmm15 2628 DB 69,15,89,254 ; mulps %xmm14,%xmm15 2629 DB 69,15,88,250 ; addps %xmm10,%xmm15 2630 DB 65,15,194,204,1 ; cmpltps %xmm12,%xmm1 2631 DB 68,15,84,233 ; andps %xmm1,%xmm13 2632 DB 65,15,85,207 ; andnps %xmm15,%xmm1 2633 DB 65,15,86,205 ; orps %xmm13,%xmm1 2634 DB 68,15,89,194 ; mulps %xmm2,%xmm8 2635 DB 68,15,40,234 ; movaps %xmm2,%xmm13 2636 DB 69,15,89,237 ; mulps %xmm13,%xmm13 2637 DB 68,15,89,202 ; mulps %xmm2,%xmm9 2638 DB 69,15,88,203 ; addps %xmm11,%xmm9 2639 DB 69,15,89,205 ; mulps %xmm13,%xmm9 2640 DB 69,15,88,202 ; addps %xmm10,%xmm9 2641 DB 65,15,194,212,1 ; cmpltps %xmm12,%xmm2 2642 DB 68,15,84,194 ; andps %xmm2,%xmm8 2643 DB 65,15,85,209 ; andnps %xmm9,%xmm2 2644 DB 65,15,86,208 ; orps %xmm8,%xmm2 2645 DB 72,173 ; lods %ds:(%rsi),%rax 2646 DB 255,224 ; jmpq *%rax 2647 2648PUBLIC _sk_to_srgb_sse2 2649_sk_to_srgb_sse2 LABEL PROC 2650 DB 72,131,236,40 ; sub $0x28,%rsp 2651 DB 15,41,124,36,16 ; movaps %xmm7,0x10(%rsp) 2652 DB 15,41,52,36 ; movaps %xmm6,(%rsp) 2653 DB 15,40,245 ; movaps %xmm5,%xmm6 2654 DB 15,40,236 ; movaps %xmm4,%xmm5 2655 DB 15,40,227 ; movaps %xmm3,%xmm4 2656 DB 68,15,82,192 ; rsqrtps %xmm0,%xmm8 2657 DB 69,15,83,232 ; rcpps %xmm8,%xmm13 2658 DB 69,15,82,248 ; rsqrtps %xmm8,%xmm15 2659 DB 243,15,16,26 ; movss (%rdx),%xmm3 2660 DB 243,68,15,16,66,72 ; movss 0x48(%rdx),%xmm8 2661 DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 2662 DB 69,15,40,240 ; movaps %xmm8,%xmm14 2663 DB 68,15,89,240 ; mulps %xmm0,%xmm14 2664 DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3 2665 DB 243,68,15,16,82,76 ; movss 0x4c(%rdx),%xmm10 2666 DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 2667 DB 243,68,15,16,90,80 ; movss 0x50(%rdx),%xmm11 2668 DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 2669 DB 243,68,15,16,98,84 ; movss 0x54(%rdx),%xmm12 2670 DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12 2671 DB 69,15,89,235 ; mulps %xmm11,%xmm13 2672 DB 69,15,88,236 ; addps %xmm12,%xmm13 2673 DB 69,15,89,250 ; mulps %xmm10,%xmm15 2674 DB 69,15,88,253 ; addps %xmm13,%xmm15 2675 DB 68,15,40,203 ; movaps %xmm3,%xmm9 2676 DB 69,15,93,207 ; minps %xmm15,%xmm9 2677 DB 243,68,15,16,106,88 ; movss 0x58(%rdx),%xmm13 2678 DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13 2679 DB 65,15,194,197,1 ; cmpltps %xmm13,%xmm0 2680 DB 68,15,84,240 ; andps %xmm0,%xmm14 2681 DB 65,15,85,193 ; andnps %xmm9,%xmm0 2682 DB 65,15,86,198 ; orps %xmm14,%xmm0 2683 DB 68,15,82,201 ; rsqrtps %xmm1,%xmm9 2684 DB 69,15,83,241 ; rcpps %xmm9,%xmm14 2685 DB 69,15,82,201 ; rsqrtps %xmm9,%xmm9 2686 DB 69,15,89,243 ; mulps %xmm11,%xmm14 2687 DB 69,15,88,244 ; addps %xmm12,%xmm14 2688 DB 69,15,89,202 ; mulps %xmm10,%xmm9 2689 DB 69,15,88,206 ; addps %xmm14,%xmm9 2690 DB 68,15,40,243 ; movaps %xmm3,%xmm14 2691 DB 69,15,93,241 ; minps %xmm9,%xmm14 2692 DB 69,15,40,200 ; movaps %xmm8,%xmm9 2693 DB 68,15,89,201 ; mulps %xmm1,%xmm9 2694 DB 65,15,194,205,1 ; cmpltps %xmm13,%xmm1 2695 DB 68,15,84,201 ; andps %xmm1,%xmm9 2696 DB 65,15,85,206 ; andnps %xmm14,%xmm1 2697 DB 65,15,86,201 ; orps %xmm9,%xmm1 2698 DB 68,15,82,202 ; rsqrtps %xmm2,%xmm9 2699 DB 69,15,83,241 ; rcpps %xmm9,%xmm14 2700 DB 69,15,89,243 ; mulps %xmm11,%xmm14 2701 DB 69,15,88,244 ; addps %xmm12,%xmm14 2702 DB 65,15,82,249 ; rsqrtps %xmm9,%xmm7 2703 DB 65,15,89,250 ; mulps %xmm10,%xmm7 2704 DB 65,15,88,254 ; addps %xmm14,%xmm7 2705 DB 15,93,223 ; minps %xmm7,%xmm3 2706 DB 68,15,89,194 ; mulps %xmm2,%xmm8 2707 DB 65,15,194,213,1 ; cmpltps %xmm13,%xmm2 2708 DB 68,15,84,194 ; andps %xmm2,%xmm8 2709 DB 15,85,211 ; andnps %xmm3,%xmm2 2710 DB 65,15,86,208 ; orps %xmm8,%xmm2 2711 DB 72,173 ; lods %ds:(%rsi),%rax 2712 DB 72,173 ; lods %ds:(%rsi),%rax 2713 DB 15,40,220 ; movaps %xmm4,%xmm3 2714 DB 15,40,229 ; movaps %xmm5,%xmm4 2715 DB 15,40,238 ; movaps %xmm6,%xmm5 2716 DB 15,40,52,36 ; movaps (%rsp),%xmm6 2717 DB 15,40,124,36,16 ; movaps 0x10(%rsp),%xmm7 2718 DB 72,131,196,40 ; add $0x28,%rsp 2719 DB 255,224 ; jmpq *%rax 2720 2721PUBLIC _sk_scale_u8_sse2 2722_sk_scale_u8_sse2 LABEL PROC 2723 DB 72,173 ; lods %ds:(%rsi),%rax 2724 DB 72,139,0 ; mov (%rax),%rax 2725 DB 102,68,15,110,4,56 ; movd (%rax,%rdi,1),%xmm8 2726 DB 102,69,15,239,201 ; pxor %xmm9,%xmm9 2727 DB 102,69,15,96,193 ; punpcklbw %xmm9,%xmm8 2728 DB 102,69,15,97,193 ; punpcklwd %xmm9,%xmm8 2729 DB 69,15,91,192 ; cvtdq2ps %xmm8,%xmm8 2730 DB 243,68,15,16,74,12 ; movss 0xc(%rdx),%xmm9 2731 DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9 2732 DB 69,15,89,200 ; mulps %xmm8,%xmm9 2733 DB 65,15,89,193 ; mulps %xmm9,%xmm0 2734 DB 65,15,89,201 ; mulps %xmm9,%xmm1 2735 DB 65,15,89,209 ; mulps %xmm9,%xmm2 2736 DB 65,15,89,217 ; mulps %xmm9,%xmm3 2737 DB 72,173 ; lods %ds:(%rsi),%rax 2738 DB 255,224 ; jmpq *%rax 2739 2740PUBLIC _sk_lerp_u8_sse2 2741_sk_lerp_u8_sse2 LABEL PROC 2742 DB 72,173 ; lods %ds:(%rsi),%rax 2743 DB 72,139,0 ; mov (%rax),%rax 2744 DB 102,68,15,110,4,56 ; movd (%rax,%rdi,1),%xmm8 2745 DB 102,69,15,239,201 ; pxor %xmm9,%xmm9 2746 DB 102,69,15,96,193 ; punpcklbw %xmm9,%xmm8 2747 DB 102,69,15,97,193 ; punpcklwd %xmm9,%xmm8 2748 DB 69,15,91,192 ; cvtdq2ps %xmm8,%xmm8 2749 DB 243,68,15,16,74,12 ; movss 0xc(%rdx),%xmm9 2750 DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9 2751 DB 69,15,89,200 ; mulps %xmm8,%xmm9 2752 DB 15,92,196 ; subps %xmm4,%xmm0 2753 DB 65,15,89,193 ; mulps %xmm9,%xmm0 2754 DB 15,88,196 ; addps %xmm4,%xmm0 2755 DB 15,92,205 ; subps %xmm5,%xmm1 2756 DB 65,15,89,201 ; mulps %xmm9,%xmm1 2757 DB 15,88,205 ; addps %xmm5,%xmm1 2758 DB 15,92,214 ; subps %xmm6,%xmm2 2759 DB 65,15,89,209 ; mulps %xmm9,%xmm2 2760 DB 15,88,214 ; addps %xmm6,%xmm2 2761 DB 15,92,223 ; subps %xmm7,%xmm3 2762 DB 65,15,89,217 ; mulps %xmm9,%xmm3 2763 DB 15,88,223 ; addps %xmm7,%xmm3 2764 DB 72,173 ; lods %ds:(%rsi),%rax 2765 DB 255,224 ; jmpq *%rax 2766 2767PUBLIC _sk_load_tables_sse2 2768_sk_load_tables_sse2 LABEL PROC 2769 DB 72,173 ; lods %ds:(%rsi),%rax 2770 DB 72,139,8 ; mov (%rax),%rcx 2771 DB 76,139,64,8 ; mov 0x8(%rax),%r8 2772 DB 243,68,15,111,4,185 ; movdqu (%rcx,%rdi,4),%xmm8 2773 DB 102,15,110,66,16 ; movd 0x10(%rdx),%xmm0 2774 DB 102,15,112,192,0 ; pshufd $0x0,%xmm0,%xmm0 2775 DB 102,69,15,111,200 ; movdqa %xmm8,%xmm9 2776 DB 102,65,15,114,209,8 ; psrld $0x8,%xmm9 2777 DB 102,68,15,219,200 ; pand %xmm0,%xmm9 2778 DB 102,69,15,111,208 ; movdqa %xmm8,%xmm10 2779 DB 102,65,15,114,210,16 ; psrld $0x10,%xmm10 2780 DB 102,68,15,219,208 ; pand %xmm0,%xmm10 2781 DB 102,65,15,219,192 ; pand %xmm8,%xmm0 2782 DB 102,15,112,216,78 ; pshufd $0x4e,%xmm0,%xmm3 2783 DB 102,72,15,126,217 ; movq %xmm3,%rcx 2784 DB 65,137,201 ; mov %ecx,%r9d 2785 DB 72,193,233,32 ; shr $0x20,%rcx 2786 DB 102,73,15,126,194 ; movq %xmm0,%r10 2787 DB 69,137,211 ; mov %r10d,%r11d 2788 DB 73,193,234,32 ; shr $0x20,%r10 2789 DB 243,67,15,16,28,144 ; movss (%r8,%r10,4),%xmm3 2790 DB 243,65,15,16,4,136 ; movss (%r8,%rcx,4),%xmm0 2791 DB 15,20,216 ; unpcklps %xmm0,%xmm3 2792 DB 243,67,15,16,4,152 ; movss (%r8,%r11,4),%xmm0 2793 DB 243,67,15,16,12,136 ; movss (%r8,%r9,4),%xmm1 2794 DB 15,20,193 ; unpcklps %xmm1,%xmm0 2795 DB 15,20,195 ; unpcklps %xmm3,%xmm0 2796 DB 72,139,72,16 ; mov 0x10(%rax),%rcx 2797 DB 102,65,15,112,201,78 ; pshufd $0x4e,%xmm9,%xmm1 2798 DB 102,73,15,126,200 ; movq %xmm1,%r8 2799 DB 69,137,193 ; mov %r8d,%r9d 2800 DB 73,193,232,32 ; shr $0x20,%r8 2801 DB 102,77,15,126,202 ; movq %xmm9,%r10 2802 DB 69,137,211 ; mov %r10d,%r11d 2803 DB 73,193,234,32 ; shr $0x20,%r10 2804 DB 243,66,15,16,28,145 ; movss (%rcx,%r10,4),%xmm3 2805 DB 243,66,15,16,12,129 ; movss (%rcx,%r8,4),%xmm1 2806 DB 15,20,217 ; unpcklps %xmm1,%xmm3 2807 DB 243,66,15,16,12,153 ; movss (%rcx,%r11,4),%xmm1 2808 DB 243,66,15,16,20,137 ; movss (%rcx,%r9,4),%xmm2 2809 DB 15,20,202 ; unpcklps %xmm2,%xmm1 2810 DB 15,20,203 ; unpcklps %xmm3,%xmm1 2811 DB 72,139,64,24 ; mov 0x18(%rax),%rax 2812 DB 102,65,15,112,210,78 ; pshufd $0x4e,%xmm10,%xmm2 2813 DB 102,72,15,126,209 ; movq %xmm2,%rcx 2814 DB 65,137,200 ; mov %ecx,%r8d 2815 DB 72,193,233,32 ; shr $0x20,%rcx 2816 DB 102,77,15,126,209 ; movq %xmm10,%r9 2817 DB 69,137,202 ; mov %r9d,%r10d 2818 DB 73,193,233,32 ; shr $0x20,%r9 2819 DB 243,70,15,16,12,136 ; movss (%rax,%r9,4),%xmm9 2820 DB 243,15,16,20,136 ; movss (%rax,%rcx,4),%xmm2 2821 DB 68,15,20,202 ; unpcklps %xmm2,%xmm9 2822 DB 243,66,15,16,20,144 ; movss (%rax,%r10,4),%xmm2 2823 DB 243,66,15,16,28,128 ; movss (%rax,%r8,4),%xmm3 2824 DB 15,20,211 ; unpcklps %xmm3,%xmm2 2825 DB 65,15,20,209 ; unpcklps %xmm9,%xmm2 2826 DB 102,65,15,114,208,24 ; psrld $0x18,%xmm8 2827 DB 69,15,91,192 ; cvtdq2ps %xmm8,%xmm8 2828 DB 243,15,16,90,12 ; movss 0xc(%rdx),%xmm3 2829 DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3 2830 DB 65,15,89,216 ; mulps %xmm8,%xmm3 2831 DB 72,173 ; lods %ds:(%rsi),%rax 2832 DB 255,224 ; jmpq *%rax 2833 2834PUBLIC _sk_load_565_sse2 2835_sk_load_565_sse2 LABEL PROC 2836 DB 72,173 ; lods %ds:(%rsi),%rax 2837 DB 72,139,0 ; mov (%rax),%rax 2838 DB 243,68,15,126,12,120 ; movq (%rax,%rdi,2),%xmm9 2839 DB 102,15,239,192 ; pxor %xmm0,%xmm0 2840 DB 102,68,15,97,200 ; punpcklwd %xmm0,%xmm9 2841 DB 102,15,110,66,104 ; movd 0x68(%rdx),%xmm0 2842 DB 102,15,112,192,0 ; pshufd $0x0,%xmm0,%xmm0 2843 DB 102,65,15,219,193 ; pand %xmm9,%xmm0 2844 DB 15,91,200 ; cvtdq2ps %xmm0,%xmm1 2845 DB 243,15,16,26 ; movss (%rdx),%xmm3 2846 DB 243,15,16,66,116 ; movss 0x74(%rdx),%xmm0 2847 DB 15,198,192,0 ; shufps $0x0,%xmm0,%xmm0 2848 DB 15,89,193 ; mulps %xmm1,%xmm0 2849 DB 102,15,110,74,108 ; movd 0x6c(%rdx),%xmm1 2850 DB 102,15,112,201,0 ; pshufd $0x0,%xmm1,%xmm1 2851 DB 102,65,15,219,201 ; pand %xmm9,%xmm1 2852 DB 68,15,91,193 ; cvtdq2ps %xmm1,%xmm8 2853 DB 243,15,16,74,120 ; movss 0x78(%rdx),%xmm1 2854 DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1 2855 DB 65,15,89,200 ; mulps %xmm8,%xmm1 2856 DB 102,15,110,82,112 ; movd 0x70(%rdx),%xmm2 2857 DB 102,15,112,210,0 ; pshufd $0x0,%xmm2,%xmm2 2858 DB 102,65,15,219,209 ; pand %xmm9,%xmm2 2859 DB 68,15,91,194 ; cvtdq2ps %xmm2,%xmm8 2860 DB 243,15,16,82,124 ; movss 0x7c(%rdx),%xmm2 2861 DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2 2862 DB 65,15,89,208 ; mulps %xmm8,%xmm2 2863 DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3 2864 DB 72,173 ; lods %ds:(%rsi),%rax 2865 DB 255,224 ; jmpq *%rax 2866 2867PUBLIC _sk_store_565_sse2 2868_sk_store_565_sse2 LABEL PROC 2869 DB 72,173 ; lods %ds:(%rsi),%rax 2870 DB 72,139,0 ; mov (%rax),%rax 2871 DB 243,68,15,16,130,128,0,0,0 ; movss 0x80(%rdx),%xmm8 2872 DB 243,68,15,16,138,132,0,0,0 ; movss 0x84(%rdx),%xmm9 2873 DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 2874 DB 69,15,40,208 ; movaps %xmm8,%xmm10 2875 DB 68,15,89,208 ; mulps %xmm0,%xmm10 2876 DB 102,69,15,91,210 ; cvtps2dq %xmm10,%xmm10 2877 DB 102,65,15,114,242,11 ; pslld $0xb,%xmm10 2878 DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9 2879 DB 68,15,89,201 ; mulps %xmm1,%xmm9 2880 DB 102,69,15,91,201 ; cvtps2dq %xmm9,%xmm9 2881 DB 102,65,15,114,241,5 ; pslld $0x5,%xmm9 2882 DB 102,69,15,235,202 ; por %xmm10,%xmm9 2883 DB 68,15,89,194 ; mulps %xmm2,%xmm8 2884 DB 102,69,15,91,192 ; cvtps2dq %xmm8,%xmm8 2885 DB 102,69,15,86,193 ; orpd %xmm9,%xmm8 2886 DB 102,65,15,114,240,16 ; pslld $0x10,%xmm8 2887 DB 102,65,15,114,224,16 ; psrad $0x10,%xmm8 2888 DB 102,69,15,107,192 ; packssdw %xmm8,%xmm8 2889 DB 102,68,15,214,4,120 ; movq %xmm8,(%rax,%rdi,2) 2890 DB 72,173 ; lods %ds:(%rsi),%rax 2891 DB 255,224 ; jmpq *%rax 2892 2893PUBLIC _sk_load_8888_sse2 2894_sk_load_8888_sse2 LABEL PROC 2895 DB 72,173 ; lods %ds:(%rsi),%rax 2896 DB 72,139,0 ; mov (%rax),%rax 2897 DB 243,15,111,28,184 ; movdqu (%rax,%rdi,4),%xmm3 2898 DB 102,15,110,66,16 ; movd 0x10(%rdx),%xmm0 2899 DB 102,15,112,192,0 ; pshufd $0x0,%xmm0,%xmm0 2900 DB 102,15,111,203 ; movdqa %xmm3,%xmm1 2901 DB 102,15,114,209,8 ; psrld $0x8,%xmm1 2902 DB 102,15,219,200 ; pand %xmm0,%xmm1 2903 DB 102,15,111,211 ; movdqa %xmm3,%xmm2 2904 DB 102,15,114,210,16 ; psrld $0x10,%xmm2 2905 DB 102,15,219,208 ; pand %xmm0,%xmm2 2906 DB 102,15,219,195 ; pand %xmm3,%xmm0 2907 DB 15,91,192 ; cvtdq2ps %xmm0,%xmm0 2908 DB 243,68,15,16,66,12 ; movss 0xc(%rdx),%xmm8 2909 DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 2910 DB 65,15,89,192 ; mulps %xmm8,%xmm0 2911 DB 15,91,201 ; cvtdq2ps %xmm1,%xmm1 2912 DB 65,15,89,200 ; mulps %xmm8,%xmm1 2913 DB 15,91,210 ; cvtdq2ps %xmm2,%xmm2 2914 DB 65,15,89,208 ; mulps %xmm8,%xmm2 2915 DB 102,15,114,211,24 ; psrld $0x18,%xmm3 2916 DB 15,91,219 ; cvtdq2ps %xmm3,%xmm3 2917 DB 65,15,89,216 ; mulps %xmm8,%xmm3 2918 DB 72,173 ; lods %ds:(%rsi),%rax 2919 DB 255,224 ; jmpq *%rax 2920 2921PUBLIC _sk_store_8888_sse2 2922_sk_store_8888_sse2 LABEL PROC 2923 DB 72,173 ; lods %ds:(%rsi),%rax 2924 DB 72,139,0 ; mov (%rax),%rax 2925 DB 243,68,15,16,66,8 ; movss 0x8(%rdx),%xmm8 2926 DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 2927 DB 69,15,40,200 ; movaps %xmm8,%xmm9 2928 DB 68,15,89,200 ; mulps %xmm0,%xmm9 2929 DB 102,69,15,91,201 ; cvtps2dq %xmm9,%xmm9 2930 DB 69,15,40,208 ; movaps %xmm8,%xmm10 2931 DB 68,15,89,209 ; mulps %xmm1,%xmm10 2932 DB 102,69,15,91,210 ; cvtps2dq %xmm10,%xmm10 2933 DB 102,65,15,114,242,8 ; pslld $0x8,%xmm10 2934 DB 102,69,15,235,209 ; por %xmm9,%xmm10 2935 DB 69,15,40,200 ; movaps %xmm8,%xmm9 2936 DB 68,15,89,202 ; mulps %xmm2,%xmm9 2937 DB 102,69,15,91,201 ; cvtps2dq %xmm9,%xmm9 2938 DB 102,65,15,114,241,16 ; pslld $0x10,%xmm9 2939 DB 68,15,89,195 ; mulps %xmm3,%xmm8 2940 DB 102,69,15,91,192 ; cvtps2dq %xmm8,%xmm8 2941 DB 102,65,15,114,240,24 ; pslld $0x18,%xmm8 2942 DB 102,69,15,235,193 ; por %xmm9,%xmm8 2943 DB 102,69,15,235,194 ; por %xmm10,%xmm8 2944 DB 243,68,15,127,4,184 ; movdqu %xmm8,(%rax,%rdi,4) 2945 DB 72,173 ; lods %ds:(%rsi),%rax 2946 DB 255,224 ; jmpq *%rax 2947 2948PUBLIC _sk_load_f16_sse2 2949_sk_load_f16_sse2 LABEL PROC 2950 DB 72,173 ; lods %ds:(%rsi),%rax 2951 DB 72,139,0 ; mov (%rax),%rax 2952 DB 243,15,111,4,248 ; movdqu (%rax,%rdi,8),%xmm0 2953 DB 243,15,111,76,248,16 ; movdqu 0x10(%rax,%rdi,8),%xmm1 2954 DB 102,15,111,208 ; movdqa %xmm0,%xmm2 2955 DB 102,15,97,209 ; punpcklwd %xmm1,%xmm2 2956 DB 102,15,105,193 ; punpckhwd %xmm1,%xmm0 2957 DB 102,68,15,111,194 ; movdqa %xmm2,%xmm8 2958 DB 102,68,15,97,192 ; punpcklwd %xmm0,%xmm8 2959 DB 102,15,105,208 ; punpckhwd %xmm0,%xmm2 2960 DB 102,15,110,66,100 ; movd 0x64(%rdx),%xmm0 2961 DB 102,15,112,216,0 ; pshufd $0x0,%xmm0,%xmm3 2962 DB 102,15,111,203 ; movdqa %xmm3,%xmm1 2963 DB 102,65,15,101,200 ; pcmpgtw %xmm8,%xmm1 2964 DB 102,65,15,223,200 ; pandn %xmm8,%xmm1 2965 DB 102,15,101,218 ; pcmpgtw %xmm2,%xmm3 2966 DB 102,15,223,218 ; pandn %xmm2,%xmm3 2967 DB 102,69,15,239,192 ; pxor %xmm8,%xmm8 2968 DB 102,15,111,193 ; movdqa %xmm1,%xmm0 2969 DB 102,65,15,97,192 ; punpcklwd %xmm8,%xmm0 2970 DB 102,15,114,240,13 ; pslld $0xd,%xmm0 2971 DB 102,15,110,82,92 ; movd 0x5c(%rdx),%xmm2 2972 DB 102,68,15,112,202,0 ; pshufd $0x0,%xmm2,%xmm9 2973 DB 65,15,89,193 ; mulps %xmm9,%xmm0 2974 DB 102,65,15,105,200 ; punpckhwd %xmm8,%xmm1 2975 DB 102,15,114,241,13 ; pslld $0xd,%xmm1 2976 DB 65,15,89,201 ; mulps %xmm9,%xmm1 2977 DB 102,15,111,211 ; movdqa %xmm3,%xmm2 2978 DB 102,65,15,97,208 ; punpcklwd %xmm8,%xmm2 2979 DB 102,15,114,242,13 ; pslld $0xd,%xmm2 2980 DB 65,15,89,209 ; mulps %xmm9,%xmm2 2981 DB 102,65,15,105,216 ; punpckhwd %xmm8,%xmm3 2982 DB 102,15,114,243,13 ; pslld $0xd,%xmm3 2983 DB 65,15,89,217 ; mulps %xmm9,%xmm3 2984 DB 72,173 ; lods %ds:(%rsi),%rax 2985 DB 255,224 ; jmpq *%rax 2986 2987PUBLIC _sk_store_f16_sse2 2988_sk_store_f16_sse2 LABEL PROC 2989 DB 72,173 ; lods %ds:(%rsi),%rax 2990 DB 72,139,0 ; mov (%rax),%rax 2991 DB 102,68,15,110,66,96 ; movd 0x60(%rdx),%xmm8 2992 DB 102,69,15,112,192,0 ; pshufd $0x0,%xmm8,%xmm8 2993 DB 102,69,15,111,200 ; movdqa %xmm8,%xmm9 2994 DB 68,15,89,200 ; mulps %xmm0,%xmm9 2995 DB 102,65,15,114,209,13 ; psrld $0xd,%xmm9 2996 DB 102,69,15,111,208 ; movdqa %xmm8,%xmm10 2997 DB 68,15,89,209 ; mulps %xmm1,%xmm10 2998 DB 102,65,15,114,210,13 ; psrld $0xd,%xmm10 2999 DB 102,69,15,111,216 ; movdqa %xmm8,%xmm11 3000 DB 68,15,89,218 ; mulps %xmm2,%xmm11 3001 DB 102,65,15,114,211,13 ; psrld $0xd,%xmm11 3002 DB 68,15,89,195 ; mulps %xmm3,%xmm8 3003 DB 102,65,15,114,208,13 ; psrld $0xd,%xmm8 3004 DB 102,65,15,115,250,2 ; pslldq $0x2,%xmm10 3005 DB 102,69,15,235,209 ; por %xmm9,%xmm10 3006 DB 102,65,15,115,248,2 ; pslldq $0x2,%xmm8 3007 DB 102,69,15,235,195 ; por %xmm11,%xmm8 3008 DB 102,69,15,111,202 ; movdqa %xmm10,%xmm9 3009 DB 102,69,15,98,200 ; punpckldq %xmm8,%xmm9 3010 DB 243,68,15,127,12,248 ; movdqu %xmm9,(%rax,%rdi,8) 3011 DB 102,69,15,106,208 ; punpckhdq %xmm8,%xmm10 3012 DB 243,68,15,127,84,248,16 ; movdqu %xmm10,0x10(%rax,%rdi,8) 3013 DB 72,173 ; lods %ds:(%rsi),%rax 3014 DB 255,224 ; jmpq *%rax 3015 3016PUBLIC _sk_clamp_x_sse2 3017_sk_clamp_x_sse2 LABEL PROC 3018 DB 72,173 ; lods %ds:(%rsi),%rax 3019 DB 243,68,15,16,0 ; movss (%rax),%xmm8 3020 DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 3021 DB 102,69,15,118,201 ; pcmpeqd %xmm9,%xmm9 3022 DB 102,69,15,254,200 ; paddd %xmm8,%xmm9 3023 DB 65,15,93,193 ; minps %xmm9,%xmm0 3024 DB 69,15,87,192 ; xorps %xmm8,%xmm8 3025 DB 68,15,95,192 ; maxps %xmm0,%xmm8 3026 DB 72,173 ; lods %ds:(%rsi),%rax 3027 DB 65,15,40,192 ; movaps %xmm8,%xmm0 3028 DB 255,224 ; jmpq *%rax 3029 3030PUBLIC _sk_clamp_y_sse2 3031_sk_clamp_y_sse2 LABEL PROC 3032 DB 72,173 ; lods %ds:(%rsi),%rax 3033 DB 243,68,15,16,0 ; movss (%rax),%xmm8 3034 DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 3035 DB 102,69,15,118,201 ; pcmpeqd %xmm9,%xmm9 3036 DB 102,69,15,254,200 ; paddd %xmm8,%xmm9 3037 DB 65,15,93,201 ; minps %xmm9,%xmm1 3038 DB 69,15,87,192 ; xorps %xmm8,%xmm8 3039 DB 68,15,95,193 ; maxps %xmm1,%xmm8 3040 DB 72,173 ; lods %ds:(%rsi),%rax 3041 DB 65,15,40,200 ; movaps %xmm8,%xmm1 3042 DB 255,224 ; jmpq *%rax 3043 3044PUBLIC _sk_matrix_2x3_sse2 3045_sk_matrix_2x3_sse2 LABEL PROC 3046 DB 68,15,40,201 ; movaps %xmm1,%xmm9 3047 DB 68,15,40,192 ; movaps %xmm0,%xmm8 3048 DB 72,173 ; lods %ds:(%rsi),%rax 3049 DB 243,15,16,0 ; movss (%rax),%xmm0 3050 DB 243,15,16,72,4 ; movss 0x4(%rax),%xmm1 3051 DB 15,198,192,0 ; shufps $0x0,%xmm0,%xmm0 3052 DB 243,68,15,16,80,8 ; movss 0x8(%rax),%xmm10 3053 DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 3054 DB 243,68,15,16,88,16 ; movss 0x10(%rax),%xmm11 3055 DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 3056 DB 69,15,89,209 ; mulps %xmm9,%xmm10 3057 DB 69,15,88,211 ; addps %xmm11,%xmm10 3058 DB 65,15,89,192 ; mulps %xmm8,%xmm0 3059 DB 65,15,88,194 ; addps %xmm10,%xmm0 3060 DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1 3061 DB 243,68,15,16,80,12 ; movss 0xc(%rax),%xmm10 3062 DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 3063 DB 243,68,15,16,88,20 ; movss 0x14(%rax),%xmm11 3064 DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 3065 DB 69,15,89,209 ; mulps %xmm9,%xmm10 3066 DB 69,15,88,211 ; addps %xmm11,%xmm10 3067 DB 65,15,89,200 ; mulps %xmm8,%xmm1 3068 DB 65,15,88,202 ; addps %xmm10,%xmm1 3069 DB 72,173 ; lods %ds:(%rsi),%rax 3070 DB 255,224 ; jmpq *%rax 3071 3072PUBLIC _sk_matrix_3x4_sse2 3073_sk_matrix_3x4_sse2 LABEL PROC 3074 DB 68,15,40,201 ; movaps %xmm1,%xmm9 3075 DB 68,15,40,192 ; movaps %xmm0,%xmm8 3076 DB 72,173 ; lods %ds:(%rsi),%rax 3077 DB 243,15,16,0 ; movss (%rax),%xmm0 3078 DB 243,15,16,72,4 ; movss 0x4(%rax),%xmm1 3079 DB 15,198,192,0 ; shufps $0x0,%xmm0,%xmm0 3080 DB 243,68,15,16,80,12 ; movss 0xc(%rax),%xmm10 3081 DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 3082 DB 243,68,15,16,88,24 ; movss 0x18(%rax),%xmm11 3083 DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 3084 DB 243,68,15,16,96,36 ; movss 0x24(%rax),%xmm12 3085 DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12 3086 DB 68,15,89,218 ; mulps %xmm2,%xmm11 3087 DB 69,15,88,220 ; addps %xmm12,%xmm11 3088 DB 69,15,89,209 ; mulps %xmm9,%xmm10 3089 DB 69,15,88,211 ; addps %xmm11,%xmm10 3090 DB 65,15,89,192 ; mulps %xmm8,%xmm0 3091 DB 65,15,88,194 ; addps %xmm10,%xmm0 3092 DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1 3093 DB 243,68,15,16,80,16 ; movss 0x10(%rax),%xmm10 3094 DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 3095 DB 243,68,15,16,88,28 ; movss 0x1c(%rax),%xmm11 3096 DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 3097 DB 243,68,15,16,96,40 ; movss 0x28(%rax),%xmm12 3098 DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12 3099 DB 68,15,89,218 ; mulps %xmm2,%xmm11 3100 DB 69,15,88,220 ; addps %xmm12,%xmm11 3101 DB 69,15,89,209 ; mulps %xmm9,%xmm10 3102 DB 69,15,88,211 ; addps %xmm11,%xmm10 3103 DB 65,15,89,200 ; mulps %xmm8,%xmm1 3104 DB 65,15,88,202 ; addps %xmm10,%xmm1 3105 DB 243,68,15,16,80,8 ; movss 0x8(%rax),%xmm10 3106 DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 3107 DB 243,68,15,16,88,20 ; movss 0x14(%rax),%xmm11 3108 DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 3109 DB 243,68,15,16,96,32 ; movss 0x20(%rax),%xmm12 3110 DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12 3111 DB 243,68,15,16,104,44 ; movss 0x2c(%rax),%xmm13 3112 DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13 3113 DB 68,15,89,226 ; mulps %xmm2,%xmm12 3114 DB 69,15,88,229 ; addps %xmm13,%xmm12 3115 DB 69,15,89,217 ; mulps %xmm9,%xmm11 3116 DB 69,15,88,220 ; addps %xmm12,%xmm11 3117 DB 69,15,89,208 ; mulps %xmm8,%xmm10 3118 DB 69,15,88,211 ; addps %xmm11,%xmm10 3119 DB 72,173 ; lods %ds:(%rsi),%rax 3120 DB 65,15,40,210 ; movaps %xmm10,%xmm2 3121 DB 255,224 ; jmpq *%rax 3122 3123PUBLIC _sk_linear_gradient_2stops_sse2 3124_sk_linear_gradient_2stops_sse2 LABEL PROC 3125 DB 72,173 ; lods %ds:(%rsi),%rax 3126 DB 68,15,16,8 ; movups (%rax),%xmm9 3127 DB 15,16,88,16 ; movups 0x10(%rax),%xmm3 3128 DB 68,15,40,195 ; movaps %xmm3,%xmm8 3129 DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 3130 DB 65,15,40,201 ; movaps %xmm9,%xmm1 3131 DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1 3132 DB 68,15,89,192 ; mulps %xmm0,%xmm8 3133 DB 68,15,88,193 ; addps %xmm1,%xmm8 3134 DB 15,40,203 ; movaps %xmm3,%xmm1 3135 DB 15,198,201,85 ; shufps $0x55,%xmm1,%xmm1 3136 DB 65,15,40,209 ; movaps %xmm9,%xmm2 3137 DB 15,198,210,85 ; shufps $0x55,%xmm2,%xmm2 3138 DB 15,89,200 ; mulps %xmm0,%xmm1 3139 DB 15,88,202 ; addps %xmm2,%xmm1 3140 DB 15,40,211 ; movaps %xmm3,%xmm2 3141 DB 15,198,210,170 ; shufps $0xaa,%xmm2,%xmm2 3142 DB 69,15,40,209 ; movaps %xmm9,%xmm10 3143 DB 69,15,198,210,170 ; shufps $0xaa,%xmm10,%xmm10 3144 DB 15,89,208 ; mulps %xmm0,%xmm2 3145 DB 65,15,88,210 ; addps %xmm10,%xmm2 3146 DB 15,198,219,255 ; shufps $0xff,%xmm3,%xmm3 3147 DB 69,15,198,201,255 ; shufps $0xff,%xmm9,%xmm9 3148 DB 15,89,216 ; mulps %xmm0,%xmm3 3149 DB 65,15,88,217 ; addps %xmm9,%xmm3 3150 DB 72,173 ; lods %ds:(%rsi),%rax 3151 DB 65,15,40,192 ; movaps %xmm8,%xmm0 3152 DB 255,224 ; jmpq *%rax 3153END 3154