SkJumper_generated_win.S revision 3f81f3703a68755c88f5cc4a87728b98f34c4cd4
1; Copyright 2017 Google Inc.
2;
3; Use of this source code is governed by a BSD-style license that can be
4; found in the LICENSE file.
5
6; This file is generated semi-automatically with this command:
7;   $ src/jumper/build_stages.py
8
9_text SEGMENT
10
11PUBLIC _sk_start_pipeline_hsw
12_sk_start_pipeline_hsw LABEL PROC
13  DB  65,87                               ; push          %r15
14  DB  65,86                               ; push          %r14
15  DB  65,85                               ; push          %r13
16  DB  65,84                               ; push          %r12
17  DB  86                                  ; push          %rsi
18  DB  87                                  ; push          %rdi
19  DB  83                                  ; push          %rbx
20  DB  72,129,236,160,0,0,0                ; sub           $0xa0,%rsp
21  DB  197,120,41,188,36,144,0,0,0         ; vmovaps       %xmm15,0x90(%rsp)
22  DB  197,120,41,180,36,128,0,0,0         ; vmovaps       %xmm14,0x80(%rsp)
23  DB  197,120,41,108,36,112               ; vmovaps       %xmm13,0x70(%rsp)
24  DB  197,120,41,100,36,96                ; vmovaps       %xmm12,0x60(%rsp)
25  DB  197,120,41,92,36,80                 ; vmovaps       %xmm11,0x50(%rsp)
26  DB  197,120,41,84,36,64                 ; vmovaps       %xmm10,0x40(%rsp)
27  DB  197,120,41,76,36,48                 ; vmovaps       %xmm9,0x30(%rsp)
28  DB  197,120,41,68,36,32                 ; vmovaps       %xmm8,0x20(%rsp)
29  DB  197,248,41,124,36,16                ; vmovaps       %xmm7,0x10(%rsp)
30  DB  197,248,41,52,36                    ; vmovaps       %xmm6,(%rsp)
31  DB  77,137,207                          ; mov           %r9,%r15
32  DB  77,137,198                          ; mov           %r8,%r14
33  DB  72,137,203                          ; mov           %rcx,%rbx
34  DB  72,137,214                          ; mov           %rdx,%rsi
35  DB  72,173                              ; lods          %ds:(%rsi),%rax
36  DB  73,137,196                          ; mov           %rax,%r12
37  DB  73,137,245                          ; mov           %rsi,%r13
38  DB  72,141,67,8                         ; lea           0x8(%rbx),%rax
39  DB  76,57,248                           ; cmp           %r15,%rax
40  DB  118,5                               ; jbe           75 <_sk_start_pipeline_hsw+0x75>
41  DB  72,137,216                          ; mov           %rbx,%rax
42  DB  235,60                              ; jmp           b1 <_sk_start_pipeline_hsw+0xb1>
43  DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
44  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
45  DB  197,236,87,210                      ; vxorps        %ymm2,%ymm2,%ymm2
46  DB  197,228,87,219                      ; vxorps        %ymm3,%ymm3,%ymm3
47  DB  197,220,87,228                      ; vxorps        %ymm4,%ymm4,%ymm4
48  DB  197,212,87,237                      ; vxorps        %ymm5,%ymm5,%ymm5
49  DB  197,204,87,246                      ; vxorps        %ymm6,%ymm6,%ymm6
50  DB  197,196,87,255                      ; vxorps        %ymm7,%ymm7,%ymm7
51  DB  72,137,223                          ; mov           %rbx,%rdi
52  DB  76,137,238                          ; mov           %r13,%rsi
53  DB  76,137,242                          ; mov           %r14,%rdx
54  DB  65,255,212                          ; callq         *%r12
55  DB  72,141,67,8                         ; lea           0x8(%rbx),%rax
56  DB  72,131,195,16                       ; add           $0x10,%rbx
57  DB  76,57,251                           ; cmp           %r15,%rbx
58  DB  72,137,195                          ; mov           %rax,%rbx
59  DB  118,196                             ; jbe           75 <_sk_start_pipeline_hsw+0x75>
60  DB  197,248,40,52,36                    ; vmovaps       (%rsp),%xmm6
61  DB  197,248,40,124,36,16                ; vmovaps       0x10(%rsp),%xmm7
62  DB  197,120,40,68,36,32                 ; vmovaps       0x20(%rsp),%xmm8
63  DB  197,120,40,76,36,48                 ; vmovaps       0x30(%rsp),%xmm9
64  DB  197,120,40,84,36,64                 ; vmovaps       0x40(%rsp),%xmm10
65  DB  197,120,40,92,36,80                 ; vmovaps       0x50(%rsp),%xmm11
66  DB  197,120,40,100,36,96                ; vmovaps       0x60(%rsp),%xmm12
67  DB  197,120,40,108,36,112               ; vmovaps       0x70(%rsp),%xmm13
68  DB  197,120,40,180,36,128,0,0,0         ; vmovaps       0x80(%rsp),%xmm14
69  DB  197,120,40,188,36,144,0,0,0         ; vmovaps       0x90(%rsp),%xmm15
70  DB  72,129,196,160,0,0,0                ; add           $0xa0,%rsp
71  DB  91                                  ; pop           %rbx
72  DB  95                                  ; pop           %rdi
73  DB  94                                  ; pop           %rsi
74  DB  65,92                               ; pop           %r12
75  DB  65,93                               ; pop           %r13
76  DB  65,94                               ; pop           %r14
77  DB  65,95                               ; pop           %r15
78  DB  197,248,119                         ; vzeroupper
79  DB  195                                 ; retq
80
81PUBLIC _sk_just_return_hsw
82_sk_just_return_hsw LABEL PROC
83  DB  195                                 ; retq
84
85PUBLIC _sk_seed_shader_hsw
86_sk_seed_shader_hsw LABEL PROC
87  DB  72,173                              ; lods          %ds:(%rsi),%rax
88  DB  197,249,110,199                     ; vmovd         %edi,%xmm0
89  DB  196,226,125,24,192                  ; vbroadcastss  %xmm0,%ymm0
90  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
91  DB  196,226,125,24,74,4                 ; vbroadcastss  0x4(%rdx),%ymm1
92  DB  197,252,88,193                      ; vaddps        %ymm1,%ymm0,%ymm0
93  DB  197,252,88,66,20                    ; vaddps        0x14(%rdx),%ymm0,%ymm0
94  DB  196,226,125,24,16                   ; vbroadcastss  (%rax),%ymm2
95  DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
96  DB  197,236,88,201                      ; vaddps        %ymm1,%ymm2,%ymm1
97  DB  196,226,125,24,18                   ; vbroadcastss  (%rdx),%ymm2
98  DB  72,173                              ; lods          %ds:(%rsi),%rax
99  DB  197,228,87,219                      ; vxorps        %ymm3,%ymm3,%ymm3
100  DB  197,220,87,228                      ; vxorps        %ymm4,%ymm4,%ymm4
101  DB  197,212,87,237                      ; vxorps        %ymm5,%ymm5,%ymm5
102  DB  197,204,87,246                      ; vxorps        %ymm6,%ymm6,%ymm6
103  DB  197,196,87,255                      ; vxorps        %ymm7,%ymm7,%ymm7
104  DB  255,224                             ; jmpq          *%rax
105
106PUBLIC _sk_constant_color_hsw
107_sk_constant_color_hsw LABEL PROC
108  DB  72,173                              ; lods          %ds:(%rsi),%rax
109  DB  196,226,125,24,0                    ; vbroadcastss  (%rax),%ymm0
110  DB  196,226,125,24,72,4                 ; vbroadcastss  0x4(%rax),%ymm1
111  DB  196,226,125,24,80,8                 ; vbroadcastss  0x8(%rax),%ymm2
112  DB  196,226,125,24,88,12                ; vbroadcastss  0xc(%rax),%ymm3
113  DB  72,173                              ; lods          %ds:(%rsi),%rax
114  DB  255,224                             ; jmpq          *%rax
115
116PUBLIC _sk_clear_hsw
117_sk_clear_hsw LABEL PROC
118  DB  72,173                              ; lods          %ds:(%rsi),%rax
119  DB  72,173                              ; lods          %ds:(%rsi),%rax
120  DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
121  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
122  DB  197,236,87,210                      ; vxorps        %ymm2,%ymm2,%ymm2
123  DB  197,228,87,219                      ; vxorps        %ymm3,%ymm3,%ymm3
124  DB  255,224                             ; jmpq          *%rax
125
126PUBLIC _sk_plus__hsw
127_sk_plus__hsw LABEL PROC
128  DB  72,173                              ; lods          %ds:(%rsi),%rax
129  DB  197,252,88,196                      ; vaddps        %ymm4,%ymm0,%ymm0
130  DB  197,244,88,205                      ; vaddps        %ymm5,%ymm1,%ymm1
131  DB  197,236,88,214                      ; vaddps        %ymm6,%ymm2,%ymm2
132  DB  197,228,88,223                      ; vaddps        %ymm7,%ymm3,%ymm3
133  DB  72,173                              ; lods          %ds:(%rsi),%rax
134  DB  255,224                             ; jmpq          *%rax
135
136PUBLIC _sk_srcover_hsw
137_sk_srcover_hsw LABEL PROC
138  DB  72,173                              ; lods          %ds:(%rsi),%rax
139  DB  196,98,125,24,2                     ; vbroadcastss  (%rdx),%ymm8
140  DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
141  DB  196,194,93,184,192                  ; vfmadd231ps   %ymm8,%ymm4,%ymm0
142  DB  196,194,85,184,200                  ; vfmadd231ps   %ymm8,%ymm5,%ymm1
143  DB  196,194,77,184,208                  ; vfmadd231ps   %ymm8,%ymm6,%ymm2
144  DB  196,194,69,184,216                  ; vfmadd231ps   %ymm8,%ymm7,%ymm3
145  DB  72,173                              ; lods          %ds:(%rsi),%rax
146  DB  255,224                             ; jmpq          *%rax
147
148PUBLIC _sk_dstover_hsw
149_sk_dstover_hsw LABEL PROC
150  DB  72,173                              ; lods          %ds:(%rsi),%rax
151  DB  196,98,125,24,2                     ; vbroadcastss  (%rdx),%ymm8
152  DB  197,60,92,199                       ; vsubps        %ymm7,%ymm8,%ymm8
153  DB  196,226,61,168,196                  ; vfmadd213ps   %ymm4,%ymm8,%ymm0
154  DB  196,226,61,168,205                  ; vfmadd213ps   %ymm5,%ymm8,%ymm1
155  DB  196,226,61,168,214                  ; vfmadd213ps   %ymm6,%ymm8,%ymm2
156  DB  196,226,61,168,223                  ; vfmadd213ps   %ymm7,%ymm8,%ymm3
157  DB  72,173                              ; lods          %ds:(%rsi),%rax
158  DB  255,224                             ; jmpq          *%rax
159
160PUBLIC _sk_clamp_0_hsw
161_sk_clamp_0_hsw LABEL PROC
162  DB  72,173                              ; lods          %ds:(%rsi),%rax
163  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
164  DB  196,193,124,95,192                  ; vmaxps        %ymm8,%ymm0,%ymm0
165  DB  196,193,116,95,200                  ; vmaxps        %ymm8,%ymm1,%ymm1
166  DB  196,193,108,95,208                  ; vmaxps        %ymm8,%ymm2,%ymm2
167  DB  196,193,100,95,216                  ; vmaxps        %ymm8,%ymm3,%ymm3
168  DB  72,173                              ; lods          %ds:(%rsi),%rax
169  DB  255,224                             ; jmpq          *%rax
170
171PUBLIC _sk_clamp_1_hsw
172_sk_clamp_1_hsw LABEL PROC
173  DB  72,173                              ; lods          %ds:(%rsi),%rax
174  DB  196,98,125,24,2                     ; vbroadcastss  (%rdx),%ymm8
175  DB  196,193,124,93,192                  ; vminps        %ymm8,%ymm0,%ymm0
176  DB  196,193,116,93,200                  ; vminps        %ymm8,%ymm1,%ymm1
177  DB  196,193,108,93,208                  ; vminps        %ymm8,%ymm2,%ymm2
178  DB  196,193,100,93,216                  ; vminps        %ymm8,%ymm3,%ymm3
179  DB  72,173                              ; lods          %ds:(%rsi),%rax
180  DB  255,224                             ; jmpq          *%rax
181
182PUBLIC _sk_clamp_a_hsw
183_sk_clamp_a_hsw LABEL PROC
184  DB  72,173                              ; lods          %ds:(%rsi),%rax
185  DB  196,98,125,24,2                     ; vbroadcastss  (%rdx),%ymm8
186  DB  196,193,100,93,216                  ; vminps        %ymm8,%ymm3,%ymm3
187  DB  197,252,93,195                      ; vminps        %ymm3,%ymm0,%ymm0
188  DB  197,244,93,203                      ; vminps        %ymm3,%ymm1,%ymm1
189  DB  197,236,93,211                      ; vminps        %ymm3,%ymm2,%ymm2
190  DB  72,173                              ; lods          %ds:(%rsi),%rax
191  DB  255,224                             ; jmpq          *%rax
192
193PUBLIC _sk_set_rgb_hsw
194_sk_set_rgb_hsw LABEL PROC
195  DB  72,173                              ; lods          %ds:(%rsi),%rax
196  DB  196,226,125,24,0                    ; vbroadcastss  (%rax),%ymm0
197  DB  196,226,125,24,72,4                 ; vbroadcastss  0x4(%rax),%ymm1
198  DB  196,226,125,24,80,8                 ; vbroadcastss  0x8(%rax),%ymm2
199  DB  72,173                              ; lods          %ds:(%rsi),%rax
200  DB  255,224                             ; jmpq          *%rax
201
202PUBLIC _sk_swap_rb_hsw
203_sk_swap_rb_hsw LABEL PROC
204  DB  197,124,40,192                      ; vmovaps       %ymm0,%ymm8
205  DB  72,173                              ; lods          %ds:(%rsi),%rax
206  DB  72,173                              ; lods          %ds:(%rsi),%rax
207  DB  197,252,40,194                      ; vmovaps       %ymm2,%ymm0
208  DB  197,124,41,194                      ; vmovaps       %ymm8,%ymm2
209  DB  255,224                             ; jmpq          *%rax
210
211PUBLIC _sk_swap_hsw
212_sk_swap_hsw LABEL PROC
213  DB  197,124,40,195                      ; vmovaps       %ymm3,%ymm8
214  DB  197,124,40,202                      ; vmovaps       %ymm2,%ymm9
215  DB  197,124,40,209                      ; vmovaps       %ymm1,%ymm10
216  DB  197,124,40,216                      ; vmovaps       %ymm0,%ymm11
217  DB  72,173                              ; lods          %ds:(%rsi),%rax
218  DB  72,173                              ; lods          %ds:(%rsi),%rax
219  DB  197,252,40,196                      ; vmovaps       %ymm4,%ymm0
220  DB  197,252,40,205                      ; vmovaps       %ymm5,%ymm1
221  DB  197,252,40,214                      ; vmovaps       %ymm6,%ymm2
222  DB  197,252,40,223                      ; vmovaps       %ymm7,%ymm3
223  DB  197,124,41,220                      ; vmovaps       %ymm11,%ymm4
224  DB  197,124,41,213                      ; vmovaps       %ymm10,%ymm5
225  DB  197,124,41,206                      ; vmovaps       %ymm9,%ymm6
226  DB  197,124,41,199                      ; vmovaps       %ymm8,%ymm7
227  DB  255,224                             ; jmpq          *%rax
228
229PUBLIC _sk_move_src_dst_hsw
230_sk_move_src_dst_hsw LABEL PROC
231  DB  72,173                              ; lods          %ds:(%rsi),%rax
232  DB  72,173                              ; lods          %ds:(%rsi),%rax
233  DB  197,252,40,224                      ; vmovaps       %ymm0,%ymm4
234  DB  197,252,40,233                      ; vmovaps       %ymm1,%ymm5
235  DB  197,252,40,242                      ; vmovaps       %ymm2,%ymm6
236  DB  197,252,40,251                      ; vmovaps       %ymm3,%ymm7
237  DB  255,224                             ; jmpq          *%rax
238
239PUBLIC _sk_move_dst_src_hsw
240_sk_move_dst_src_hsw LABEL PROC
241  DB  72,173                              ; lods          %ds:(%rsi),%rax
242  DB  72,173                              ; lods          %ds:(%rsi),%rax
243  DB  197,252,40,196                      ; vmovaps       %ymm4,%ymm0
244  DB  197,252,40,205                      ; vmovaps       %ymm5,%ymm1
245  DB  197,252,40,214                      ; vmovaps       %ymm6,%ymm2
246  DB  197,252,40,223                      ; vmovaps       %ymm7,%ymm3
247  DB  255,224                             ; jmpq          *%rax
248
249PUBLIC _sk_premul_hsw
250_sk_premul_hsw LABEL PROC
251  DB  72,173                              ; lods          %ds:(%rsi),%rax
252  DB  197,252,89,195                      ; vmulps        %ymm3,%ymm0,%ymm0
253  DB  197,244,89,203                      ; vmulps        %ymm3,%ymm1,%ymm1
254  DB  197,236,89,211                      ; vmulps        %ymm3,%ymm2,%ymm2
255  DB  72,173                              ; lods          %ds:(%rsi),%rax
256  DB  255,224                             ; jmpq          *%rax
257
258PUBLIC _sk_unpremul_hsw
259_sk_unpremul_hsw LABEL PROC
260  DB  72,173                              ; lods          %ds:(%rsi),%rax
261  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
262  DB  196,65,100,194,200,0                ; vcmpeqps      %ymm8,%ymm3,%ymm9
263  DB  196,98,125,24,18                    ; vbroadcastss  (%rdx),%ymm10
264  DB  197,44,94,211                       ; vdivps        %ymm3,%ymm10,%ymm10
265  DB  196,67,45,74,192,144                ; vblendvps     %ymm9,%ymm8,%ymm10,%ymm8
266  DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
267  DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
268  DB  197,188,89,210                      ; vmulps        %ymm2,%ymm8,%ymm2
269  DB  72,173                              ; lods          %ds:(%rsi),%rax
270  DB  255,224                             ; jmpq          *%rax
271
272PUBLIC _sk_from_srgb_hsw
273_sk_from_srgb_hsw LABEL PROC
274  DB  72,173                              ; lods          %ds:(%rsi),%rax
275  DB  196,98,125,24,66,64                 ; vbroadcastss  0x40(%rdx),%ymm8
276  DB  197,60,89,200                       ; vmulps        %ymm0,%ymm8,%ymm9
277  DB  197,124,89,208                      ; vmulps        %ymm0,%ymm0,%ymm10
278  DB  196,98,125,24,90,60                 ; vbroadcastss  0x3c(%rdx),%ymm11
279  DB  196,98,125,24,98,56                 ; vbroadcastss  0x38(%rdx),%ymm12
280  DB  196,65,124,40,235                   ; vmovaps       %ymm11,%ymm13
281  DB  196,66,125,168,236                  ; vfmadd213ps   %ymm12,%ymm0,%ymm13
282  DB  196,98,125,24,114,52                ; vbroadcastss  0x34(%rdx),%ymm14
283  DB  196,66,45,168,238                   ; vfmadd213ps   %ymm14,%ymm10,%ymm13
284  DB  196,98,125,24,82,68                 ; vbroadcastss  0x44(%rdx),%ymm10
285  DB  196,193,124,194,194,1               ; vcmpltps      %ymm10,%ymm0,%ymm0
286  DB  196,195,21,74,193,0                 ; vblendvps     %ymm0,%ymm9,%ymm13,%ymm0
287  DB  197,60,89,201                       ; vmulps        %ymm1,%ymm8,%ymm9
288  DB  197,116,89,233                      ; vmulps        %ymm1,%ymm1,%ymm13
289  DB  196,65,124,40,251                   ; vmovaps       %ymm11,%ymm15
290  DB  196,66,117,168,252                  ; vfmadd213ps   %ymm12,%ymm1,%ymm15
291  DB  196,66,21,168,254                   ; vfmadd213ps   %ymm14,%ymm13,%ymm15
292  DB  196,193,116,194,202,1               ; vcmpltps      %ymm10,%ymm1,%ymm1
293  DB  196,195,5,74,201,16                 ; vblendvps     %ymm1,%ymm9,%ymm15,%ymm1
294  DB  197,60,89,194                       ; vmulps        %ymm2,%ymm8,%ymm8
295  DB  197,108,89,202                      ; vmulps        %ymm2,%ymm2,%ymm9
296  DB  196,66,109,168,220                  ; vfmadd213ps   %ymm12,%ymm2,%ymm11
297  DB  196,66,53,168,222                   ; vfmadd213ps   %ymm14,%ymm9,%ymm11
298  DB  196,193,108,194,210,1               ; vcmpltps      %ymm10,%ymm2,%ymm2
299  DB  196,195,37,74,208,32                ; vblendvps     %ymm2,%ymm8,%ymm11,%ymm2
300  DB  72,173                              ; lods          %ds:(%rsi),%rax
301  DB  255,224                             ; jmpq          *%rax
302
303PUBLIC _sk_to_srgb_hsw
304_sk_to_srgb_hsw LABEL PROC
305  DB  197,124,82,192                      ; vrsqrtps      %ymm0,%ymm8
306  DB  196,65,124,83,200                   ; vrcpps        %ymm8,%ymm9
307  DB  196,65,124,82,208                   ; vrsqrtps      %ymm8,%ymm10
308  DB  196,98,125,24,66,72                 ; vbroadcastss  0x48(%rdx),%ymm8
309  DB  197,60,89,216                       ; vmulps        %ymm0,%ymm8,%ymm11
310  DB  196,98,125,24,34                    ; vbroadcastss  (%rdx),%ymm12
311  DB  196,98,125,24,106,76                ; vbroadcastss  0x4c(%rdx),%ymm13
312  DB  196,98,125,24,114,80                ; vbroadcastss  0x50(%rdx),%ymm14
313  DB  196,98,125,24,122,84                ; vbroadcastss  0x54(%rdx),%ymm15
314  DB  196,66,13,168,207                   ; vfmadd213ps   %ymm15,%ymm14,%ymm9
315  DB  196,66,21,184,202                   ; vfmadd231ps   %ymm10,%ymm13,%ymm9
316  DB  196,65,28,93,201                    ; vminps        %ymm9,%ymm12,%ymm9
317  DB  196,98,125,24,82,88                 ; vbroadcastss  0x58(%rdx),%ymm10
318  DB  196,193,124,194,194,1               ; vcmpltps      %ymm10,%ymm0,%ymm0
319  DB  196,195,53,74,195,0                 ; vblendvps     %ymm0,%ymm11,%ymm9,%ymm0
320  DB  197,124,82,201                      ; vrsqrtps      %ymm1,%ymm9
321  DB  196,65,124,83,217                   ; vrcpps        %ymm9,%ymm11
322  DB  196,65,124,82,201                   ; vrsqrtps      %ymm9,%ymm9
323  DB  196,66,13,168,223                   ; vfmadd213ps   %ymm15,%ymm14,%ymm11
324  DB  196,66,21,184,217                   ; vfmadd231ps   %ymm9,%ymm13,%ymm11
325  DB  197,60,89,201                       ; vmulps        %ymm1,%ymm8,%ymm9
326  DB  196,65,28,93,219                    ; vminps        %ymm11,%ymm12,%ymm11
327  DB  196,193,116,194,202,1               ; vcmpltps      %ymm10,%ymm1,%ymm1
328  DB  196,195,37,74,201,16                ; vblendvps     %ymm1,%ymm9,%ymm11,%ymm1
329  DB  197,124,82,202                      ; vrsqrtps      %ymm2,%ymm9
330  DB  196,65,124,83,217                   ; vrcpps        %ymm9,%ymm11
331  DB  196,66,13,168,223                   ; vfmadd213ps   %ymm15,%ymm14,%ymm11
332  DB  196,65,124,82,201                   ; vrsqrtps      %ymm9,%ymm9
333  DB  196,66,21,184,217                   ; vfmadd231ps   %ymm9,%ymm13,%ymm11
334  DB  196,65,28,93,203                    ; vminps        %ymm11,%ymm12,%ymm9
335  DB  197,60,89,194                       ; vmulps        %ymm2,%ymm8,%ymm8
336  DB  196,193,108,194,210,1               ; vcmpltps      %ymm10,%ymm2,%ymm2
337  DB  196,195,53,74,208,32                ; vblendvps     %ymm2,%ymm8,%ymm9,%ymm2
338  DB  72,173                              ; lods          %ds:(%rsi),%rax
339  DB  72,173                              ; lods          %ds:(%rsi),%rax
340  DB  255,224                             ; jmpq          *%rax
341
342PUBLIC _sk_scale_u8_hsw
343_sk_scale_u8_hsw LABEL PROC
344  DB  72,173                              ; lods          %ds:(%rsi),%rax
345  DB  72,139,0                            ; mov           (%rax),%rax
346  DB  196,98,125,49,4,56                  ; vpmovzxbd     (%rax,%rdi,1),%ymm8
347  DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
348  DB  196,98,125,24,74,12                 ; vbroadcastss  0xc(%rdx),%ymm9
349  DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
350  DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
351  DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
352  DB  197,188,89,210                      ; vmulps        %ymm2,%ymm8,%ymm2
353  DB  197,188,89,219                      ; vmulps        %ymm3,%ymm8,%ymm3
354  DB  72,173                              ; lods          %ds:(%rsi),%rax
355  DB  255,224                             ; jmpq          *%rax
356
357PUBLIC _sk_lerp_u8_hsw
358_sk_lerp_u8_hsw LABEL PROC
359  DB  72,173                              ; lods          %ds:(%rsi),%rax
360  DB  72,139,0                            ; mov           (%rax),%rax
361  DB  196,98,125,49,4,56                  ; vpmovzxbd     (%rax,%rdi,1),%ymm8
362  DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
363  DB  196,98,125,24,74,12                 ; vbroadcastss  0xc(%rdx),%ymm9
364  DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
365  DB  197,252,92,196                      ; vsubps        %ymm4,%ymm0,%ymm0
366  DB  196,226,61,168,196                  ; vfmadd213ps   %ymm4,%ymm8,%ymm0
367  DB  197,244,92,205                      ; vsubps        %ymm5,%ymm1,%ymm1
368  DB  196,226,61,168,205                  ; vfmadd213ps   %ymm5,%ymm8,%ymm1
369  DB  197,236,92,214                      ; vsubps        %ymm6,%ymm2,%ymm2
370  DB  196,226,61,168,214                  ; vfmadd213ps   %ymm6,%ymm8,%ymm2
371  DB  197,228,92,223                      ; vsubps        %ymm7,%ymm3,%ymm3
372  DB  196,226,61,168,223                  ; vfmadd213ps   %ymm7,%ymm8,%ymm3
373  DB  72,173                              ; lods          %ds:(%rsi),%rax
374  DB  255,224                             ; jmpq          *%rax
375
376PUBLIC _sk_load_tables_hsw
377_sk_load_tables_hsw LABEL PROC
378  DB  72,173                              ; lods          %ds:(%rsi),%rax
379  DB  72,139,8                            ; mov           (%rax),%rcx
380  DB  76,139,64,8                         ; mov           0x8(%rax),%r8
381  DB  197,252,16,28,185                   ; vmovups       (%rcx,%rdi,4),%ymm3
382  DB  196,226,125,24,82,16                ; vbroadcastss  0x10(%rdx),%ymm2
383  DB  197,236,84,203                      ; vandps        %ymm3,%ymm2,%ymm1
384  DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
385  DB  197,124,194,192,0                   ; vcmpeqps      %ymm0,%ymm0,%ymm8
386  DB  196,65,124,40,200                   ; vmovaps       %ymm8,%ymm9
387  DB  196,194,53,146,4,136                ; vgatherdps    %ymm9,(%r8,%ymm1,4),%ymm0
388  DB  72,139,72,16                        ; mov           0x10(%rax),%rcx
389  DB  197,245,114,211,8                   ; vpsrld        $0x8,%ymm3,%ymm1
390  DB  197,108,84,201                      ; vandps        %ymm1,%ymm2,%ymm9
391  DB  196,65,124,40,208                   ; vmovaps       %ymm8,%ymm10
392  DB  196,162,45,146,12,137               ; vgatherdps    %ymm10,(%rcx,%ymm9,4),%ymm1
393  DB  72,139,64,24                        ; mov           0x18(%rax),%rax
394  DB  197,181,114,211,16                  ; vpsrld        $0x10,%ymm3,%ymm9
395  DB  196,65,108,84,201                   ; vandps        %ymm9,%ymm2,%ymm9
396  DB  196,162,61,146,20,136               ; vgatherdps    %ymm8,(%rax,%ymm9,4),%ymm2
397  DB  197,229,114,211,24                  ; vpsrld        $0x18,%ymm3,%ymm3
398  DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
399  DB  196,98,125,24,66,12                 ; vbroadcastss  0xc(%rdx),%ymm8
400  DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
401  DB  72,173                              ; lods          %ds:(%rsi),%rax
402  DB  255,224                             ; jmpq          *%rax
403
404PUBLIC _sk_load_565_hsw
405_sk_load_565_hsw LABEL PROC
406  DB  72,173                              ; lods          %ds:(%rsi),%rax
407  DB  72,139,0                            ; mov           (%rax),%rax
408  DB  196,226,125,51,20,120               ; vpmovzxwd     (%rax,%rdi,2),%ymm2
409  DB  196,226,125,88,66,104               ; vpbroadcastd  0x68(%rdx),%ymm0
410  DB  197,253,219,194                     ; vpand         %ymm2,%ymm0,%ymm0
411  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
412  DB  196,226,125,24,74,116               ; vbroadcastss  0x74(%rdx),%ymm1
413  DB  197,244,89,192                      ; vmulps        %ymm0,%ymm1,%ymm0
414  DB  196,226,125,88,74,108               ; vpbroadcastd  0x6c(%rdx),%ymm1
415  DB  197,245,219,202                     ; vpand         %ymm2,%ymm1,%ymm1
416  DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
417  DB  196,226,125,24,90,120               ; vbroadcastss  0x78(%rdx),%ymm3
418  DB  197,228,89,201                      ; vmulps        %ymm1,%ymm3,%ymm1
419  DB  196,226,125,88,90,112               ; vpbroadcastd  0x70(%rdx),%ymm3
420  DB  197,229,219,210                     ; vpand         %ymm2,%ymm3,%ymm2
421  DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
422  DB  196,226,125,24,90,124               ; vbroadcastss  0x7c(%rdx),%ymm3
423  DB  197,228,89,210                      ; vmulps        %ymm2,%ymm3,%ymm2
424  DB  196,226,125,24,26                   ; vbroadcastss  (%rdx),%ymm3
425  DB  72,173                              ; lods          %ds:(%rsi),%rax
426  DB  255,224                             ; jmpq          *%rax
427
428PUBLIC _sk_store_565_hsw
429_sk_store_565_hsw LABEL PROC
430  DB  72,173                              ; lods          %ds:(%rsi),%rax
431  DB  72,139,0                            ; mov           (%rax),%rax
432  DB  196,98,125,24,130,128,0,0,0         ; vbroadcastss  0x80(%rdx),%ymm8
433  DB  197,60,89,200                       ; vmulps        %ymm0,%ymm8,%ymm9
434  DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
435  DB  196,193,53,114,241,11               ; vpslld        $0xb,%ymm9,%ymm9
436  DB  196,98,125,24,146,132,0,0,0         ; vbroadcastss  0x84(%rdx),%ymm10
437  DB  197,44,89,209                       ; vmulps        %ymm1,%ymm10,%ymm10
438  DB  196,65,125,91,210                   ; vcvtps2dq     %ymm10,%ymm10
439  DB  196,193,45,114,242,5                ; vpslld        $0x5,%ymm10,%ymm10
440  DB  196,65,45,235,201                   ; vpor          %ymm9,%ymm10,%ymm9
441  DB  197,60,89,194                       ; vmulps        %ymm2,%ymm8,%ymm8
442  DB  196,65,125,91,192                   ; vcvtps2dq     %ymm8,%ymm8
443  DB  196,65,53,235,192                   ; vpor          %ymm8,%ymm9,%ymm8
444  DB  196,67,125,57,193,1                 ; vextracti128  $0x1,%ymm8,%xmm9
445  DB  196,66,57,43,193                    ; vpackusdw     %xmm9,%xmm8,%xmm8
446  DB  197,122,127,4,120                   ; vmovdqu       %xmm8,(%rax,%rdi,2)
447  DB  72,173                              ; lods          %ds:(%rsi),%rax
448  DB  255,224                             ; jmpq          *%rax
449
450PUBLIC _sk_load_8888_hsw
451_sk_load_8888_hsw LABEL PROC
452  DB  72,173                              ; lods          %ds:(%rsi),%rax
453  DB  72,139,0                            ; mov           (%rax),%rax
454  DB  197,252,16,28,184                   ; vmovups       (%rax,%rdi,4),%ymm3
455  DB  196,226,125,24,82,16                ; vbroadcastss  0x10(%rdx),%ymm2
456  DB  197,236,84,195                      ; vandps        %ymm3,%ymm2,%ymm0
457  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
458  DB  196,98,125,24,66,12                 ; vbroadcastss  0xc(%rdx),%ymm8
459  DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
460  DB  197,245,114,211,8                   ; vpsrld        $0x8,%ymm3,%ymm1
461  DB  197,236,84,201                      ; vandps        %ymm1,%ymm2,%ymm1
462  DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
463  DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
464  DB  197,181,114,211,16                  ; vpsrld        $0x10,%ymm3,%ymm9
465  DB  196,193,108,84,209                  ; vandps        %ymm9,%ymm2,%ymm2
466  DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
467  DB  197,188,89,210                      ; vmulps        %ymm2,%ymm8,%ymm2
468  DB  197,229,114,211,24                  ; vpsrld        $0x18,%ymm3,%ymm3
469  DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
470  DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
471  DB  72,173                              ; lods          %ds:(%rsi),%rax
472  DB  255,224                             ; jmpq          *%rax
473
474PUBLIC _sk_store_8888_hsw
475_sk_store_8888_hsw LABEL PROC
476  DB  72,173                              ; lods          %ds:(%rsi),%rax
477  DB  72,139,0                            ; mov           (%rax),%rax
478  DB  196,98,125,24,66,8                  ; vbroadcastss  0x8(%rdx),%ymm8
479  DB  197,60,89,200                       ; vmulps        %ymm0,%ymm8,%ymm9
480  DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
481  DB  197,60,89,209                       ; vmulps        %ymm1,%ymm8,%ymm10
482  DB  196,65,125,91,210                   ; vcvtps2dq     %ymm10,%ymm10
483  DB  196,193,45,114,242,8                ; vpslld        $0x8,%ymm10,%ymm10
484  DB  196,65,45,235,201                   ; vpor          %ymm9,%ymm10,%ymm9
485  DB  197,60,89,210                       ; vmulps        %ymm2,%ymm8,%ymm10
486  DB  196,65,125,91,210                   ; vcvtps2dq     %ymm10,%ymm10
487  DB  196,193,45,114,242,16               ; vpslld        $0x10,%ymm10,%ymm10
488  DB  197,60,89,195                       ; vmulps        %ymm3,%ymm8,%ymm8
489  DB  196,65,125,91,192                   ; vcvtps2dq     %ymm8,%ymm8
490  DB  196,193,61,114,240,24               ; vpslld        $0x18,%ymm8,%ymm8
491  DB  196,65,45,235,192                   ; vpor          %ymm8,%ymm10,%ymm8
492  DB  196,65,53,235,192                   ; vpor          %ymm8,%ymm9,%ymm8
493  DB  197,126,127,4,184                   ; vmovdqu       %ymm8,(%rax,%rdi,4)
494  DB  72,173                              ; lods          %ds:(%rsi),%rax
495  DB  255,224                             ; jmpq          *%rax
496
497PUBLIC _sk_load_f16_hsw
498_sk_load_f16_hsw LABEL PROC
499  DB  72,173                              ; lods          %ds:(%rsi),%rax
500  DB  72,139,0                            ; mov           (%rax),%rax
501  DB  197,250,111,4,248                   ; vmovdqu       (%rax,%rdi,8),%xmm0
502  DB  197,250,111,76,248,16               ; vmovdqu       0x10(%rax,%rdi,8),%xmm1
503  DB  197,250,111,84,248,32               ; vmovdqu       0x20(%rax,%rdi,8),%xmm2
504  DB  197,250,111,92,248,48               ; vmovdqu       0x30(%rax,%rdi,8),%xmm3
505  DB  197,121,97,193                      ; vpunpcklwd    %xmm1,%xmm0,%xmm8
506  DB  197,249,105,193                     ; vpunpckhwd    %xmm1,%xmm0,%xmm0
507  DB  197,233,97,203                      ; vpunpcklwd    %xmm3,%xmm2,%xmm1
508  DB  197,233,105,211                     ; vpunpckhwd    %xmm3,%xmm2,%xmm2
509  DB  197,57,97,200                       ; vpunpcklwd    %xmm0,%xmm8,%xmm9
510  DB  197,57,105,192                      ; vpunpckhwd    %xmm0,%xmm8,%xmm8
511  DB  197,241,97,218                      ; vpunpcklwd    %xmm2,%xmm1,%xmm3
512  DB  197,113,105,210                     ; vpunpckhwd    %xmm2,%xmm1,%xmm10
513  DB  197,177,108,195                     ; vpunpcklqdq   %xmm3,%xmm9,%xmm0
514  DB  196,226,125,19,192                  ; vcvtph2ps     %xmm0,%ymm0
515  DB  197,177,109,203                     ; vpunpckhqdq   %xmm3,%xmm9,%xmm1
516  DB  196,226,125,19,201                  ; vcvtph2ps     %xmm1,%ymm1
517  DB  196,193,57,108,210                  ; vpunpcklqdq   %xmm10,%xmm8,%xmm2
518  DB  196,226,125,19,210                  ; vcvtph2ps     %xmm2,%ymm2
519  DB  196,193,57,109,218                  ; vpunpckhqdq   %xmm10,%xmm8,%xmm3
520  DB  196,226,125,19,219                  ; vcvtph2ps     %xmm3,%ymm3
521  DB  72,173                              ; lods          %ds:(%rsi),%rax
522  DB  255,224                             ; jmpq          *%rax
523
524PUBLIC _sk_store_f16_hsw
525_sk_store_f16_hsw LABEL PROC
526  DB  72,173                              ; lods          %ds:(%rsi),%rax
527  DB  72,139,0                            ; mov           (%rax),%rax
528  DB  196,195,125,29,192,4                ; vcvtps2ph     $0x4,%ymm0,%xmm8
529  DB  196,195,125,29,201,4                ; vcvtps2ph     $0x4,%ymm1,%xmm9
530  DB  196,195,125,29,210,4                ; vcvtps2ph     $0x4,%ymm2,%xmm10
531  DB  196,195,125,29,219,4                ; vcvtps2ph     $0x4,%ymm3,%xmm11
532  DB  196,65,57,97,225                    ; vpunpcklwd    %xmm9,%xmm8,%xmm12
533  DB  196,65,57,105,193                   ; vpunpckhwd    %xmm9,%xmm8,%xmm8
534  DB  196,65,41,97,203                    ; vpunpcklwd    %xmm11,%xmm10,%xmm9
535  DB  196,65,41,105,211                   ; vpunpckhwd    %xmm11,%xmm10,%xmm10
536  DB  196,65,25,98,217                    ; vpunpckldq    %xmm9,%xmm12,%xmm11
537  DB  197,122,127,28,248                  ; vmovdqu       %xmm11,(%rax,%rdi,8)
538  DB  196,65,25,106,201                   ; vpunpckhdq    %xmm9,%xmm12,%xmm9
539  DB  197,122,127,76,248,16               ; vmovdqu       %xmm9,0x10(%rax,%rdi,8)
540  DB  196,65,57,98,202                    ; vpunpckldq    %xmm10,%xmm8,%xmm9
541  DB  197,122,127,76,248,32               ; vmovdqu       %xmm9,0x20(%rax,%rdi,8)
542  DB  196,65,57,106,194                   ; vpunpckhdq    %xmm10,%xmm8,%xmm8
543  DB  197,122,127,68,248,48               ; vmovdqu       %xmm8,0x30(%rax,%rdi,8)
544  DB  72,173                              ; lods          %ds:(%rsi),%rax
545  DB  255,224                             ; jmpq          *%rax
546
547PUBLIC _sk_clamp_x_hsw
548_sk_clamp_x_hsw LABEL PROC
549  DB  72,173                              ; lods          %ds:(%rsi),%rax
550  DB  196,98,125,88,0                     ; vpbroadcastd  (%rax),%ymm8
551  DB  196,65,53,118,201                   ; vpcmpeqd      %ymm9,%ymm9,%ymm9
552  DB  196,65,61,254,193                   ; vpaddd        %ymm9,%ymm8,%ymm8
553  DB  196,193,124,93,192                  ; vminps        %ymm8,%ymm0,%ymm0
554  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
555  DB  197,188,95,192                      ; vmaxps        %ymm0,%ymm8,%ymm0
556  DB  72,173                              ; lods          %ds:(%rsi),%rax
557  DB  255,224                             ; jmpq          *%rax
558
559PUBLIC _sk_clamp_y_hsw
560_sk_clamp_y_hsw LABEL PROC
561  DB  72,173                              ; lods          %ds:(%rsi),%rax
562  DB  196,98,125,88,0                     ; vpbroadcastd  (%rax),%ymm8
563  DB  196,65,53,118,201                   ; vpcmpeqd      %ymm9,%ymm9,%ymm9
564  DB  196,65,61,254,193                   ; vpaddd        %ymm9,%ymm8,%ymm8
565  DB  196,193,116,93,200                  ; vminps        %ymm8,%ymm1,%ymm1
566  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
567  DB  197,188,95,201                      ; vmaxps        %ymm1,%ymm8,%ymm1
568  DB  72,173                              ; lods          %ds:(%rsi),%rax
569  DB  255,224                             ; jmpq          *%rax
570
571PUBLIC _sk_matrix_2x3_hsw
572_sk_matrix_2x3_hsw LABEL PROC
573  DB  72,173                              ; lods          %ds:(%rsi),%rax
574  DB  196,98,125,24,8                     ; vbroadcastss  (%rax),%ymm9
575  DB  196,98,125,24,80,8                  ; vbroadcastss  0x8(%rax),%ymm10
576  DB  196,98,125,24,64,16                 ; vbroadcastss  0x10(%rax),%ymm8
577  DB  196,66,117,184,194                  ; vfmadd231ps   %ymm10,%ymm1,%ymm8
578  DB  196,66,125,184,193                  ; vfmadd231ps   %ymm9,%ymm0,%ymm8
579  DB  196,98,125,24,80,4                  ; vbroadcastss  0x4(%rax),%ymm10
580  DB  196,98,125,24,88,12                 ; vbroadcastss  0xc(%rax),%ymm11
581  DB  196,98,125,24,72,20                 ; vbroadcastss  0x14(%rax),%ymm9
582  DB  196,66,117,184,203                  ; vfmadd231ps   %ymm11,%ymm1,%ymm9
583  DB  196,66,125,184,202                  ; vfmadd231ps   %ymm10,%ymm0,%ymm9
584  DB  72,173                              ; lods          %ds:(%rsi),%rax
585  DB  197,124,41,192                      ; vmovaps       %ymm8,%ymm0
586  DB  197,124,41,201                      ; vmovaps       %ymm9,%ymm1
587  DB  255,224                             ; jmpq          *%rax
588
589PUBLIC _sk_matrix_3x4_hsw
590_sk_matrix_3x4_hsw LABEL PROC
591  DB  72,173                              ; lods          %ds:(%rsi),%rax
592  DB  196,98,125,24,8                     ; vbroadcastss  (%rax),%ymm9
593  DB  196,98,125,24,80,12                 ; vbroadcastss  0xc(%rax),%ymm10
594  DB  196,98,125,24,88,24                 ; vbroadcastss  0x18(%rax),%ymm11
595  DB  196,98,125,24,64,36                 ; vbroadcastss  0x24(%rax),%ymm8
596  DB  196,66,109,184,195                  ; vfmadd231ps   %ymm11,%ymm2,%ymm8
597  DB  196,66,117,184,194                  ; vfmadd231ps   %ymm10,%ymm1,%ymm8
598  DB  196,66,125,184,193                  ; vfmadd231ps   %ymm9,%ymm0,%ymm8
599  DB  196,98,125,24,80,4                  ; vbroadcastss  0x4(%rax),%ymm10
600  DB  196,98,125,24,88,16                 ; vbroadcastss  0x10(%rax),%ymm11
601  DB  196,98,125,24,96,28                 ; vbroadcastss  0x1c(%rax),%ymm12
602  DB  196,98,125,24,72,40                 ; vbroadcastss  0x28(%rax),%ymm9
603  DB  196,66,109,184,204                  ; vfmadd231ps   %ymm12,%ymm2,%ymm9
604  DB  196,66,117,184,203                  ; vfmadd231ps   %ymm11,%ymm1,%ymm9
605  DB  196,66,125,184,202                  ; vfmadd231ps   %ymm10,%ymm0,%ymm9
606  DB  196,98,125,24,88,8                  ; vbroadcastss  0x8(%rax),%ymm11
607  DB  196,98,125,24,96,20                 ; vbroadcastss  0x14(%rax),%ymm12
608  DB  196,98,125,24,104,32                ; vbroadcastss  0x20(%rax),%ymm13
609  DB  196,98,125,24,80,44                 ; vbroadcastss  0x2c(%rax),%ymm10
610  DB  196,66,109,184,213                  ; vfmadd231ps   %ymm13,%ymm2,%ymm10
611  DB  196,66,117,184,212                  ; vfmadd231ps   %ymm12,%ymm1,%ymm10
612  DB  196,66,125,184,211                  ; vfmadd231ps   %ymm11,%ymm0,%ymm10
613  DB  72,173                              ; lods          %ds:(%rsi),%rax
614  DB  197,124,41,192                      ; vmovaps       %ymm8,%ymm0
615  DB  197,124,41,201                      ; vmovaps       %ymm9,%ymm1
616  DB  197,124,41,210                      ; vmovaps       %ymm10,%ymm2
617  DB  255,224                             ; jmpq          *%rax
618
619PUBLIC _sk_linear_gradient_2stops_hsw
620_sk_linear_gradient_2stops_hsw LABEL PROC
621  DB  72,173                              ; lods          %ds:(%rsi),%rax
622  DB  196,226,125,24,72,16                ; vbroadcastss  0x10(%rax),%ymm1
623  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
624  DB  196,98,125,184,193                  ; vfmadd231ps   %ymm1,%ymm0,%ymm8
625  DB  196,226,125,24,80,20                ; vbroadcastss  0x14(%rax),%ymm2
626  DB  196,226,125,24,72,4                 ; vbroadcastss  0x4(%rax),%ymm1
627  DB  196,226,125,184,202                 ; vfmadd231ps   %ymm2,%ymm0,%ymm1
628  DB  196,226,125,24,88,24                ; vbroadcastss  0x18(%rax),%ymm3
629  DB  196,226,125,24,80,8                 ; vbroadcastss  0x8(%rax),%ymm2
630  DB  196,226,125,184,211                 ; vfmadd231ps   %ymm3,%ymm0,%ymm2
631  DB  196,98,125,24,72,28                 ; vbroadcastss  0x1c(%rax),%ymm9
632  DB  196,226,125,24,88,12                ; vbroadcastss  0xc(%rax),%ymm3
633  DB  196,194,125,184,217                 ; vfmadd231ps   %ymm9,%ymm0,%ymm3
634  DB  72,173                              ; lods          %ds:(%rsi),%rax
635  DB  197,124,41,192                      ; vmovaps       %ymm8,%ymm0
636  DB  255,224                             ; jmpq          *%rax
637
638PUBLIC _sk_start_pipeline_avx
639_sk_start_pipeline_avx LABEL PROC
640  DB  65,87                               ; push          %r15
641  DB  65,86                               ; push          %r14
642  DB  65,85                               ; push          %r13
643  DB  65,84                               ; push          %r12
644  DB  86                                  ; push          %rsi
645  DB  87                                  ; push          %rdi
646  DB  83                                  ; push          %rbx
647  DB  72,129,236,160,0,0,0                ; sub           $0xa0,%rsp
648  DB  197,120,41,188,36,144,0,0,0         ; vmovaps       %xmm15,0x90(%rsp)
649  DB  197,120,41,180,36,128,0,0,0         ; vmovaps       %xmm14,0x80(%rsp)
650  DB  197,120,41,108,36,112               ; vmovaps       %xmm13,0x70(%rsp)
651  DB  197,120,41,100,36,96                ; vmovaps       %xmm12,0x60(%rsp)
652  DB  197,120,41,92,36,80                 ; vmovaps       %xmm11,0x50(%rsp)
653  DB  197,120,41,84,36,64                 ; vmovaps       %xmm10,0x40(%rsp)
654  DB  197,120,41,76,36,48                 ; vmovaps       %xmm9,0x30(%rsp)
655  DB  197,120,41,68,36,32                 ; vmovaps       %xmm8,0x20(%rsp)
656  DB  197,248,41,124,36,16                ; vmovaps       %xmm7,0x10(%rsp)
657  DB  197,248,41,52,36                    ; vmovaps       %xmm6,(%rsp)
658  DB  77,137,207                          ; mov           %r9,%r15
659  DB  77,137,198                          ; mov           %r8,%r14
660  DB  72,137,203                          ; mov           %rcx,%rbx
661  DB  72,137,214                          ; mov           %rdx,%rsi
662  DB  72,173                              ; lods          %ds:(%rsi),%rax
663  DB  73,137,196                          ; mov           %rax,%r12
664  DB  73,137,245                          ; mov           %rsi,%r13
665  DB  72,141,67,8                         ; lea           0x8(%rbx),%rax
666  DB  76,57,248                           ; cmp           %r15,%rax
667  DB  118,5                               ; jbe           75 <_sk_start_pipeline_avx+0x75>
668  DB  72,137,216                          ; mov           %rbx,%rax
669  DB  235,60                              ; jmp           b1 <_sk_start_pipeline_avx+0xb1>
670  DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
671  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
672  DB  197,236,87,210                      ; vxorps        %ymm2,%ymm2,%ymm2
673  DB  197,228,87,219                      ; vxorps        %ymm3,%ymm3,%ymm3
674  DB  197,220,87,228                      ; vxorps        %ymm4,%ymm4,%ymm4
675  DB  197,212,87,237                      ; vxorps        %ymm5,%ymm5,%ymm5
676  DB  197,204,87,246                      ; vxorps        %ymm6,%ymm6,%ymm6
677  DB  197,196,87,255                      ; vxorps        %ymm7,%ymm7,%ymm7
678  DB  72,137,223                          ; mov           %rbx,%rdi
679  DB  76,137,238                          ; mov           %r13,%rsi
680  DB  76,137,242                          ; mov           %r14,%rdx
681  DB  65,255,212                          ; callq         *%r12
682  DB  72,141,67,8                         ; lea           0x8(%rbx),%rax
683  DB  72,131,195,16                       ; add           $0x10,%rbx
684  DB  76,57,251                           ; cmp           %r15,%rbx
685  DB  72,137,195                          ; mov           %rax,%rbx
686  DB  118,196                             ; jbe           75 <_sk_start_pipeline_avx+0x75>
687  DB  197,248,40,52,36                    ; vmovaps       (%rsp),%xmm6
688  DB  197,248,40,124,36,16                ; vmovaps       0x10(%rsp),%xmm7
689  DB  197,120,40,68,36,32                 ; vmovaps       0x20(%rsp),%xmm8
690  DB  197,120,40,76,36,48                 ; vmovaps       0x30(%rsp),%xmm9
691  DB  197,120,40,84,36,64                 ; vmovaps       0x40(%rsp),%xmm10
692  DB  197,120,40,92,36,80                 ; vmovaps       0x50(%rsp),%xmm11
693  DB  197,120,40,100,36,96                ; vmovaps       0x60(%rsp),%xmm12
694  DB  197,120,40,108,36,112               ; vmovaps       0x70(%rsp),%xmm13
695  DB  197,120,40,180,36,128,0,0,0         ; vmovaps       0x80(%rsp),%xmm14
696  DB  197,120,40,188,36,144,0,0,0         ; vmovaps       0x90(%rsp),%xmm15
697  DB  72,129,196,160,0,0,0                ; add           $0xa0,%rsp
698  DB  91                                  ; pop           %rbx
699  DB  95                                  ; pop           %rdi
700  DB  94                                  ; pop           %rsi
701  DB  65,92                               ; pop           %r12
702  DB  65,93                               ; pop           %r13
703  DB  65,94                               ; pop           %r14
704  DB  65,95                               ; pop           %r15
705  DB  197,248,119                         ; vzeroupper
706  DB  195                                 ; retq
707
708PUBLIC _sk_just_return_avx
709_sk_just_return_avx LABEL PROC
710  DB  195                                 ; retq
711
712PUBLIC _sk_seed_shader_avx
713_sk_seed_shader_avx LABEL PROC
714  DB  72,173                              ; lods          %ds:(%rsi),%rax
715  DB  197,249,110,199                     ; vmovd         %edi,%xmm0
716  DB  196,227,121,4,192,0                 ; vpermilps     $0x0,%xmm0,%xmm0
717  DB  196,227,125,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
718  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
719  DB  196,226,125,24,74,4                 ; vbroadcastss  0x4(%rdx),%ymm1
720  DB  197,252,88,193                      ; vaddps        %ymm1,%ymm0,%ymm0
721  DB  197,252,88,66,20                    ; vaddps        0x14(%rdx),%ymm0,%ymm0
722  DB  197,249,110,16                      ; vmovd         (%rax),%xmm2
723  DB  196,227,121,4,210,0                 ; vpermilps     $0x0,%xmm2,%xmm2
724  DB  196,227,109,24,210,1                ; vinsertf128   $0x1,%xmm2,%ymm2,%ymm2
725  DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
726  DB  197,236,88,201                      ; vaddps        %ymm1,%ymm2,%ymm1
727  DB  196,226,125,24,18                   ; vbroadcastss  (%rdx),%ymm2
728  DB  72,173                              ; lods          %ds:(%rsi),%rax
729  DB  197,228,87,219                      ; vxorps        %ymm3,%ymm3,%ymm3
730  DB  197,220,87,228                      ; vxorps        %ymm4,%ymm4,%ymm4
731  DB  197,212,87,237                      ; vxorps        %ymm5,%ymm5,%ymm5
732  DB  197,204,87,246                      ; vxorps        %ymm6,%ymm6,%ymm6
733  DB  197,196,87,255                      ; vxorps        %ymm7,%ymm7,%ymm7
734  DB  255,224                             ; jmpq          *%rax
735
736PUBLIC _sk_constant_color_avx
737_sk_constant_color_avx LABEL PROC
738  DB  72,173                              ; lods          %ds:(%rsi),%rax
739  DB  196,226,125,24,0                    ; vbroadcastss  (%rax),%ymm0
740  DB  196,226,125,24,72,4                 ; vbroadcastss  0x4(%rax),%ymm1
741  DB  196,226,125,24,80,8                 ; vbroadcastss  0x8(%rax),%ymm2
742  DB  196,226,125,24,88,12                ; vbroadcastss  0xc(%rax),%ymm3
743  DB  72,173                              ; lods          %ds:(%rsi),%rax
744  DB  255,224                             ; jmpq          *%rax
745
746PUBLIC _sk_clear_avx
747_sk_clear_avx LABEL PROC
748  DB  72,173                              ; lods          %ds:(%rsi),%rax
749  DB  72,173                              ; lods          %ds:(%rsi),%rax
750  DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
751  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
752  DB  197,236,87,210                      ; vxorps        %ymm2,%ymm2,%ymm2
753  DB  197,228,87,219                      ; vxorps        %ymm3,%ymm3,%ymm3
754  DB  255,224                             ; jmpq          *%rax
755
756PUBLIC _sk_plus__avx
757_sk_plus__avx LABEL PROC
758  DB  72,173                              ; lods          %ds:(%rsi),%rax
759  DB  197,252,88,196                      ; vaddps        %ymm4,%ymm0,%ymm0
760  DB  197,244,88,205                      ; vaddps        %ymm5,%ymm1,%ymm1
761  DB  197,236,88,214                      ; vaddps        %ymm6,%ymm2,%ymm2
762  DB  197,228,88,223                      ; vaddps        %ymm7,%ymm3,%ymm3
763  DB  72,173                              ; lods          %ds:(%rsi),%rax
764  DB  255,224                             ; jmpq          *%rax
765
766PUBLIC _sk_srcover_avx
767_sk_srcover_avx LABEL PROC
768  DB  72,173                              ; lods          %ds:(%rsi),%rax
769  DB  196,98,125,24,2                     ; vbroadcastss  (%rdx),%ymm8
770  DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
771  DB  197,60,89,204                       ; vmulps        %ymm4,%ymm8,%ymm9
772  DB  197,180,88,192                      ; vaddps        %ymm0,%ymm9,%ymm0
773  DB  197,60,89,205                       ; vmulps        %ymm5,%ymm8,%ymm9
774  DB  197,180,88,201                      ; vaddps        %ymm1,%ymm9,%ymm1
775  DB  197,60,89,206                       ; vmulps        %ymm6,%ymm8,%ymm9
776  DB  197,180,88,210                      ; vaddps        %ymm2,%ymm9,%ymm2
777  DB  197,60,89,199                       ; vmulps        %ymm7,%ymm8,%ymm8
778  DB  197,188,88,219                      ; vaddps        %ymm3,%ymm8,%ymm3
779  DB  72,173                              ; lods          %ds:(%rsi),%rax
780  DB  255,224                             ; jmpq          *%rax
781
782PUBLIC _sk_dstover_avx
783_sk_dstover_avx LABEL PROC
784  DB  72,173                              ; lods          %ds:(%rsi),%rax
785  DB  196,98,125,24,2                     ; vbroadcastss  (%rdx),%ymm8
786  DB  197,60,92,199                       ; vsubps        %ymm7,%ymm8,%ymm8
787  DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
788  DB  197,252,88,196                      ; vaddps        %ymm4,%ymm0,%ymm0
789  DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
790  DB  197,244,88,205                      ; vaddps        %ymm5,%ymm1,%ymm1
791  DB  197,188,89,210                      ; vmulps        %ymm2,%ymm8,%ymm2
792  DB  197,236,88,214                      ; vaddps        %ymm6,%ymm2,%ymm2
793  DB  197,188,89,219                      ; vmulps        %ymm3,%ymm8,%ymm3
794  DB  197,228,88,223                      ; vaddps        %ymm7,%ymm3,%ymm3
795  DB  72,173                              ; lods          %ds:(%rsi),%rax
796  DB  255,224                             ; jmpq          *%rax
797
798PUBLIC _sk_clamp_0_avx
799_sk_clamp_0_avx LABEL PROC
800  DB  72,173                              ; lods          %ds:(%rsi),%rax
801  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
802  DB  196,193,124,95,192                  ; vmaxps        %ymm8,%ymm0,%ymm0
803  DB  196,193,116,95,200                  ; vmaxps        %ymm8,%ymm1,%ymm1
804  DB  196,193,108,95,208                  ; vmaxps        %ymm8,%ymm2,%ymm2
805  DB  196,193,100,95,216                  ; vmaxps        %ymm8,%ymm3,%ymm3
806  DB  72,173                              ; lods          %ds:(%rsi),%rax
807  DB  255,224                             ; jmpq          *%rax
808
809PUBLIC _sk_clamp_1_avx
810_sk_clamp_1_avx LABEL PROC
811  DB  72,173                              ; lods          %ds:(%rsi),%rax
812  DB  196,98,125,24,2                     ; vbroadcastss  (%rdx),%ymm8
813  DB  196,193,124,93,192                  ; vminps        %ymm8,%ymm0,%ymm0
814  DB  196,193,116,93,200                  ; vminps        %ymm8,%ymm1,%ymm1
815  DB  196,193,108,93,208                  ; vminps        %ymm8,%ymm2,%ymm2
816  DB  196,193,100,93,216                  ; vminps        %ymm8,%ymm3,%ymm3
817  DB  72,173                              ; lods          %ds:(%rsi),%rax
818  DB  255,224                             ; jmpq          *%rax
819
820PUBLIC _sk_clamp_a_avx
821_sk_clamp_a_avx LABEL PROC
822  DB  72,173                              ; lods          %ds:(%rsi),%rax
823  DB  196,98,125,24,2                     ; vbroadcastss  (%rdx),%ymm8
824  DB  196,193,100,93,216                  ; vminps        %ymm8,%ymm3,%ymm3
825  DB  197,252,93,195                      ; vminps        %ymm3,%ymm0,%ymm0
826  DB  197,244,93,203                      ; vminps        %ymm3,%ymm1,%ymm1
827  DB  197,236,93,211                      ; vminps        %ymm3,%ymm2,%ymm2
828  DB  72,173                              ; lods          %ds:(%rsi),%rax
829  DB  255,224                             ; jmpq          *%rax
830
831PUBLIC _sk_set_rgb_avx
832_sk_set_rgb_avx LABEL PROC
833  DB  72,173                              ; lods          %ds:(%rsi),%rax
834  DB  196,226,125,24,0                    ; vbroadcastss  (%rax),%ymm0
835  DB  196,226,125,24,72,4                 ; vbroadcastss  0x4(%rax),%ymm1
836  DB  196,226,125,24,80,8                 ; vbroadcastss  0x8(%rax),%ymm2
837  DB  72,173                              ; lods          %ds:(%rsi),%rax
838  DB  255,224                             ; jmpq          *%rax
839
840PUBLIC _sk_swap_rb_avx
841_sk_swap_rb_avx LABEL PROC
842  DB  197,124,40,192                      ; vmovaps       %ymm0,%ymm8
843  DB  72,173                              ; lods          %ds:(%rsi),%rax
844  DB  72,173                              ; lods          %ds:(%rsi),%rax
845  DB  197,252,40,194                      ; vmovaps       %ymm2,%ymm0
846  DB  197,124,41,194                      ; vmovaps       %ymm8,%ymm2
847  DB  255,224                             ; jmpq          *%rax
848
849PUBLIC _sk_swap_avx
850_sk_swap_avx LABEL PROC
851  DB  197,124,40,195                      ; vmovaps       %ymm3,%ymm8
852  DB  197,124,40,202                      ; vmovaps       %ymm2,%ymm9
853  DB  197,124,40,209                      ; vmovaps       %ymm1,%ymm10
854  DB  197,124,40,216                      ; vmovaps       %ymm0,%ymm11
855  DB  72,173                              ; lods          %ds:(%rsi),%rax
856  DB  72,173                              ; lods          %ds:(%rsi),%rax
857  DB  197,252,40,196                      ; vmovaps       %ymm4,%ymm0
858  DB  197,252,40,205                      ; vmovaps       %ymm5,%ymm1
859  DB  197,252,40,214                      ; vmovaps       %ymm6,%ymm2
860  DB  197,252,40,223                      ; vmovaps       %ymm7,%ymm3
861  DB  197,124,41,220                      ; vmovaps       %ymm11,%ymm4
862  DB  197,124,41,213                      ; vmovaps       %ymm10,%ymm5
863  DB  197,124,41,206                      ; vmovaps       %ymm9,%ymm6
864  DB  197,124,41,199                      ; vmovaps       %ymm8,%ymm7
865  DB  255,224                             ; jmpq          *%rax
866
867PUBLIC _sk_move_src_dst_avx
868_sk_move_src_dst_avx LABEL PROC
869  DB  72,173                              ; lods          %ds:(%rsi),%rax
870  DB  72,173                              ; lods          %ds:(%rsi),%rax
871  DB  197,252,40,224                      ; vmovaps       %ymm0,%ymm4
872  DB  197,252,40,233                      ; vmovaps       %ymm1,%ymm5
873  DB  197,252,40,242                      ; vmovaps       %ymm2,%ymm6
874  DB  197,252,40,251                      ; vmovaps       %ymm3,%ymm7
875  DB  255,224                             ; jmpq          *%rax
876
877PUBLIC _sk_move_dst_src_avx
878_sk_move_dst_src_avx LABEL PROC
879  DB  72,173                              ; lods          %ds:(%rsi),%rax
880  DB  72,173                              ; lods          %ds:(%rsi),%rax
881  DB  197,252,40,196                      ; vmovaps       %ymm4,%ymm0
882  DB  197,252,40,205                      ; vmovaps       %ymm5,%ymm1
883  DB  197,252,40,214                      ; vmovaps       %ymm6,%ymm2
884  DB  197,252,40,223                      ; vmovaps       %ymm7,%ymm3
885  DB  255,224                             ; jmpq          *%rax
886
887PUBLIC _sk_premul_avx
888_sk_premul_avx LABEL PROC
889  DB  72,173                              ; lods          %ds:(%rsi),%rax
890  DB  197,252,89,195                      ; vmulps        %ymm3,%ymm0,%ymm0
891  DB  197,244,89,203                      ; vmulps        %ymm3,%ymm1,%ymm1
892  DB  197,236,89,211                      ; vmulps        %ymm3,%ymm2,%ymm2
893  DB  72,173                              ; lods          %ds:(%rsi),%rax
894  DB  255,224                             ; jmpq          *%rax
895
896PUBLIC _sk_unpremul_avx
897_sk_unpremul_avx LABEL PROC
898  DB  72,173                              ; lods          %ds:(%rsi),%rax
899  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
900  DB  196,65,100,194,200,0                ; vcmpeqps      %ymm8,%ymm3,%ymm9
901  DB  196,98,125,24,18                    ; vbroadcastss  (%rdx),%ymm10
902  DB  197,44,94,211                       ; vdivps        %ymm3,%ymm10,%ymm10
903  DB  196,67,45,74,192,144                ; vblendvps     %ymm9,%ymm8,%ymm10,%ymm8
904  DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
905  DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
906  DB  197,188,89,210                      ; vmulps        %ymm2,%ymm8,%ymm2
907  DB  72,173                              ; lods          %ds:(%rsi),%rax
908  DB  255,224                             ; jmpq          *%rax
909
910PUBLIC _sk_from_srgb_avx
911_sk_from_srgb_avx LABEL PROC
912  DB  72,173                              ; lods          %ds:(%rsi),%rax
913  DB  196,98,125,24,66,64                 ; vbroadcastss  0x40(%rdx),%ymm8
914  DB  197,60,89,200                       ; vmulps        %ymm0,%ymm8,%ymm9
915  DB  197,124,89,208                      ; vmulps        %ymm0,%ymm0,%ymm10
916  DB  196,98,125,24,90,60                 ; vbroadcastss  0x3c(%rdx),%ymm11
917  DB  196,98,125,24,98,56                 ; vbroadcastss  0x38(%rdx),%ymm12
918  DB  197,36,89,232                       ; vmulps        %ymm0,%ymm11,%ymm13
919  DB  196,65,20,88,236                    ; vaddps        %ymm12,%ymm13,%ymm13
920  DB  196,98,125,24,114,52                ; vbroadcastss  0x34(%rdx),%ymm14
921  DB  196,65,44,89,213                    ; vmulps        %ymm13,%ymm10,%ymm10
922  DB  196,65,12,88,210                    ; vaddps        %ymm10,%ymm14,%ymm10
923  DB  196,98,125,24,106,68                ; vbroadcastss  0x44(%rdx),%ymm13
924  DB  196,193,124,194,197,1               ; vcmpltps      %ymm13,%ymm0,%ymm0
925  DB  196,195,45,74,193,0                 ; vblendvps     %ymm0,%ymm9,%ymm10,%ymm0
926  DB  197,60,89,201                       ; vmulps        %ymm1,%ymm8,%ymm9
927  DB  197,116,89,209                      ; vmulps        %ymm1,%ymm1,%ymm10
928  DB  197,36,89,249                       ; vmulps        %ymm1,%ymm11,%ymm15
929  DB  196,65,4,88,252                     ; vaddps        %ymm12,%ymm15,%ymm15
930  DB  196,65,44,89,215                    ; vmulps        %ymm15,%ymm10,%ymm10
931  DB  196,65,12,88,210                    ; vaddps        %ymm10,%ymm14,%ymm10
932  DB  196,193,116,194,205,1               ; vcmpltps      %ymm13,%ymm1,%ymm1
933  DB  196,195,45,74,201,16                ; vblendvps     %ymm1,%ymm9,%ymm10,%ymm1
934  DB  197,60,89,194                       ; vmulps        %ymm2,%ymm8,%ymm8
935  DB  197,108,89,202                      ; vmulps        %ymm2,%ymm2,%ymm9
936  DB  197,36,89,210                       ; vmulps        %ymm2,%ymm11,%ymm10
937  DB  196,65,44,88,212                    ; vaddps        %ymm12,%ymm10,%ymm10
938  DB  196,65,52,89,202                    ; vmulps        %ymm10,%ymm9,%ymm9
939  DB  196,65,12,88,201                    ; vaddps        %ymm9,%ymm14,%ymm9
940  DB  196,193,108,194,213,1               ; vcmpltps      %ymm13,%ymm2,%ymm2
941  DB  196,195,53,74,208,32                ; vblendvps     %ymm2,%ymm8,%ymm9,%ymm2
942  DB  72,173                              ; lods          %ds:(%rsi),%rax
943  DB  255,224                             ; jmpq          *%rax
944
945PUBLIC _sk_to_srgb_avx
946_sk_to_srgb_avx LABEL PROC
947  DB  197,124,82,192                      ; vrsqrtps      %ymm0,%ymm8
948  DB  196,65,124,83,200                   ; vrcpps        %ymm8,%ymm9
949  DB  196,65,124,82,208                   ; vrsqrtps      %ymm8,%ymm10
950  DB  196,98,125,24,66,72                 ; vbroadcastss  0x48(%rdx),%ymm8
951  DB  197,60,89,216                       ; vmulps        %ymm0,%ymm8,%ymm11
952  DB  196,98,125,24,34                    ; vbroadcastss  (%rdx),%ymm12
953  DB  196,98,125,24,106,76                ; vbroadcastss  0x4c(%rdx),%ymm13
954  DB  196,98,125,24,114,80                ; vbroadcastss  0x50(%rdx),%ymm14
955  DB  196,98,125,24,122,84                ; vbroadcastss  0x54(%rdx),%ymm15
956  DB  196,65,52,89,206                    ; vmulps        %ymm14,%ymm9,%ymm9
957  DB  196,65,52,88,207                    ; vaddps        %ymm15,%ymm9,%ymm9
958  DB  196,65,44,89,213                    ; vmulps        %ymm13,%ymm10,%ymm10
959  DB  196,65,44,88,201                    ; vaddps        %ymm9,%ymm10,%ymm9
960  DB  196,65,28,93,201                    ; vminps        %ymm9,%ymm12,%ymm9
961  DB  196,98,125,24,82,88                 ; vbroadcastss  0x58(%rdx),%ymm10
962  DB  196,193,124,194,194,1               ; vcmpltps      %ymm10,%ymm0,%ymm0
963  DB  196,195,53,74,195,0                 ; vblendvps     %ymm0,%ymm11,%ymm9,%ymm0
964  DB  197,124,82,201                      ; vrsqrtps      %ymm1,%ymm9
965  DB  196,65,124,83,217                   ; vrcpps        %ymm9,%ymm11
966  DB  196,65,124,82,201                   ; vrsqrtps      %ymm9,%ymm9
967  DB  196,65,12,89,219                    ; vmulps        %ymm11,%ymm14,%ymm11
968  DB  196,65,4,88,219                     ; vaddps        %ymm11,%ymm15,%ymm11
969  DB  196,65,20,89,201                    ; vmulps        %ymm9,%ymm13,%ymm9
970  DB  196,65,52,88,203                    ; vaddps        %ymm11,%ymm9,%ymm9
971  DB  197,60,89,217                       ; vmulps        %ymm1,%ymm8,%ymm11
972  DB  196,65,28,93,201                    ; vminps        %ymm9,%ymm12,%ymm9
973  DB  196,193,116,194,202,1               ; vcmpltps      %ymm10,%ymm1,%ymm1
974  DB  196,195,53,74,203,16                ; vblendvps     %ymm1,%ymm11,%ymm9,%ymm1
975  DB  197,124,82,202                      ; vrsqrtps      %ymm2,%ymm9
976  DB  196,65,124,83,217                   ; vrcpps        %ymm9,%ymm11
977  DB  196,65,12,89,219                    ; vmulps        %ymm11,%ymm14,%ymm11
978  DB  196,65,4,88,219                     ; vaddps        %ymm11,%ymm15,%ymm11
979  DB  196,65,124,82,201                   ; vrsqrtps      %ymm9,%ymm9
980  DB  196,65,20,89,201                    ; vmulps        %ymm9,%ymm13,%ymm9
981  DB  196,65,52,88,203                    ; vaddps        %ymm11,%ymm9,%ymm9
982  DB  196,65,28,93,201                    ; vminps        %ymm9,%ymm12,%ymm9
983  DB  197,60,89,194                       ; vmulps        %ymm2,%ymm8,%ymm8
984  DB  196,193,108,194,210,1               ; vcmpltps      %ymm10,%ymm2,%ymm2
985  DB  196,195,53,74,208,32                ; vblendvps     %ymm2,%ymm8,%ymm9,%ymm2
986  DB  72,173                              ; lods          %ds:(%rsi),%rax
987  DB  72,173                              ; lods          %ds:(%rsi),%rax
988  DB  255,224                             ; jmpq          *%rax
989
990PUBLIC _sk_scale_u8_avx
991_sk_scale_u8_avx LABEL PROC
992  DB  72,173                              ; lods          %ds:(%rsi),%rax
993  DB  72,139,0                            ; mov           (%rax),%rax
994  DB  196,98,121,49,68,56,4               ; vpmovzxbd     0x4(%rax,%rdi,1),%xmm8
995  DB  196,98,121,49,12,56                 ; vpmovzxbd     (%rax,%rdi,1),%xmm9
996  DB  196,67,53,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm9,%ymm8
997  DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
998  DB  196,98,125,24,74,12                 ; vbroadcastss  0xc(%rdx),%ymm9
999  DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
1000  DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
1001  DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
1002  DB  197,188,89,210                      ; vmulps        %ymm2,%ymm8,%ymm2
1003  DB  197,188,89,219                      ; vmulps        %ymm3,%ymm8,%ymm3
1004  DB  72,173                              ; lods          %ds:(%rsi),%rax
1005  DB  255,224                             ; jmpq          *%rax
1006
1007PUBLIC _sk_lerp_u8_avx
1008_sk_lerp_u8_avx LABEL PROC
1009  DB  72,173                              ; lods          %ds:(%rsi),%rax
1010  DB  72,139,0                            ; mov           (%rax),%rax
1011  DB  196,98,121,49,68,56,4               ; vpmovzxbd     0x4(%rax,%rdi,1),%xmm8
1012  DB  196,98,121,49,12,56                 ; vpmovzxbd     (%rax,%rdi,1),%xmm9
1013  DB  196,67,53,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm9,%ymm8
1014  DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
1015  DB  196,98,125,24,74,12                 ; vbroadcastss  0xc(%rdx),%ymm9
1016  DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
1017  DB  197,252,92,196                      ; vsubps        %ymm4,%ymm0,%ymm0
1018  DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
1019  DB  197,252,88,196                      ; vaddps        %ymm4,%ymm0,%ymm0
1020  DB  197,244,92,205                      ; vsubps        %ymm5,%ymm1,%ymm1
1021  DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
1022  DB  197,244,88,205                      ; vaddps        %ymm5,%ymm1,%ymm1
1023  DB  197,236,92,214                      ; vsubps        %ymm6,%ymm2,%ymm2
1024  DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
1025  DB  197,236,88,214                      ; vaddps        %ymm6,%ymm2,%ymm2
1026  DB  197,228,92,223                      ; vsubps        %ymm7,%ymm3,%ymm3
1027  DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
1028  DB  197,228,88,223                      ; vaddps        %ymm7,%ymm3,%ymm3
1029  DB  72,173                              ; lods          %ds:(%rsi),%rax
1030  DB  255,224                             ; jmpq          *%rax
1031
1032PUBLIC _sk_load_tables_avx
1033_sk_load_tables_avx LABEL PROC
1034  DB  65,87                               ; push          %r15
1035  DB  65,86                               ; push          %r14
1036  DB  65,84                               ; push          %r12
1037  DB  83                                  ; push          %rbx
1038  DB  72,173                              ; lods          %ds:(%rsi),%rax
1039  DB  76,139,0                            ; mov           (%rax),%r8
1040  DB  72,139,72,8                         ; mov           0x8(%rax),%rcx
1041  DB  196,65,124,16,20,184                ; vmovups       (%r8,%rdi,4),%ymm10
1042  DB  197,249,110,66,16                   ; vmovd         0x10(%rdx),%xmm0
1043  DB  196,227,121,4,192,0                 ; vpermilps     $0x0,%xmm0,%xmm0
1044  DB  196,99,125,24,200,1                 ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm9
1045  DB  196,193,52,84,194                   ; vandps        %ymm10,%ymm9,%ymm0
1046  DB  196,193,249,126,192                 ; vmovq         %xmm0,%r8
1047  DB  69,137,193                          ; mov           %r8d,%r9d
1048  DB  196,195,249,22,194,1                ; vpextrq       $0x1,%xmm0,%r10
1049  DB  69,137,211                          ; mov           %r10d,%r11d
1050  DB  73,193,234,32                       ; shr           $0x20,%r10
1051  DB  73,193,232,32                       ; shr           $0x20,%r8
1052  DB  196,227,125,25,192,1                ; vextractf128  $0x1,%ymm0,%xmm0
1053  DB  196,193,249,126,199                 ; vmovq         %xmm0,%r15
1054  DB  69,137,254                          ; mov           %r15d,%r14d
1055  DB  196,227,249,22,195,1                ; vpextrq       $0x1,%xmm0,%rbx
1056  DB  65,137,220                          ; mov           %ebx,%r12d
1057  DB  72,193,235,32                       ; shr           $0x20,%rbx
1058  DB  73,193,239,32                       ; shr           $0x20,%r15
1059  DB  196,161,122,16,4,177                ; vmovss        (%rcx,%r14,4),%xmm0
1060  DB  196,163,121,33,4,185,16             ; vinsertps     $0x10,(%rcx,%r15,4),%xmm0,%xmm0
1061  DB  196,163,121,33,4,161,32             ; vinsertps     $0x20,(%rcx,%r12,4),%xmm0,%xmm0
1062  DB  196,227,121,33,4,153,48             ; vinsertps     $0x30,(%rcx,%rbx,4),%xmm0,%xmm0
1063  DB  196,161,122,16,12,137               ; vmovss        (%rcx,%r9,4),%xmm1
1064  DB  196,163,113,33,12,129,16            ; vinsertps     $0x10,(%rcx,%r8,4),%xmm1,%xmm1
1065  DB  196,163,113,33,12,153,32            ; vinsertps     $0x20,(%rcx,%r11,4),%xmm1,%xmm1
1066  DB  196,163,113,33,12,145,48            ; vinsertps     $0x30,(%rcx,%r10,4),%xmm1,%xmm1
1067  DB  196,227,117,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
1068  DB  76,139,120,16                       ; mov           0x10(%rax),%r15
1069  DB  196,193,113,114,210,8               ; vpsrld        $0x8,%xmm10,%xmm1
1070  DB  196,67,125,25,208,1                 ; vextractf128  $0x1,%ymm10,%xmm8
1071  DB  196,193,105,114,208,8               ; vpsrld        $0x8,%xmm8,%xmm2
1072  DB  196,227,117,24,202,1                ; vinsertf128   $0x1,%xmm2,%ymm1,%ymm1
1073  DB  197,180,84,201                      ; vandps        %ymm1,%ymm9,%ymm1
1074  DB  196,193,249,126,200                 ; vmovq         %xmm1,%r8
1075  DB  69,137,194                          ; mov           %r8d,%r10d
1076  DB  196,195,249,22,201,1                ; vpextrq       $0x1,%xmm1,%r9
1077  DB  69,137,203                          ; mov           %r9d,%r11d
1078  DB  73,193,233,32                       ; shr           $0x20,%r9
1079  DB  73,193,232,32                       ; shr           $0x20,%r8
1080  DB  196,227,125,25,201,1                ; vextractf128  $0x1,%ymm1,%xmm1
1081  DB  196,225,249,126,203                 ; vmovq         %xmm1,%rbx
1082  DB  65,137,222                          ; mov           %ebx,%r14d
1083  DB  196,227,249,22,201,1                ; vpextrq       $0x1,%xmm1,%rcx
1084  DB  65,137,204                          ; mov           %ecx,%r12d
1085  DB  72,193,233,32                       ; shr           $0x20,%rcx
1086  DB  72,193,235,32                       ; shr           $0x20,%rbx
1087  DB  196,129,122,16,12,183               ; vmovss        (%r15,%r14,4),%xmm1
1088  DB  196,195,113,33,12,159,16            ; vinsertps     $0x10,(%r15,%rbx,4),%xmm1,%xmm1
1089  DB  196,129,122,16,20,167               ; vmovss        (%r15,%r12,4),%xmm2
1090  DB  196,227,113,33,202,32               ; vinsertps     $0x20,%xmm2,%xmm1,%xmm1
1091  DB  196,193,122,16,20,143               ; vmovss        (%r15,%rcx,4),%xmm2
1092  DB  196,227,113,33,202,48               ; vinsertps     $0x30,%xmm2,%xmm1,%xmm1
1093  DB  196,129,122,16,20,151               ; vmovss        (%r15,%r10,4),%xmm2
1094  DB  196,131,105,33,20,135,16            ; vinsertps     $0x10,(%r15,%r8,4),%xmm2,%xmm2
1095  DB  196,129,122,16,28,159               ; vmovss        (%r15,%r11,4),%xmm3
1096  DB  196,227,105,33,211,32               ; vinsertps     $0x20,%xmm3,%xmm2,%xmm2
1097  DB  196,129,122,16,28,143               ; vmovss        (%r15,%r9,4),%xmm3
1098  DB  196,227,105,33,211,48               ; vinsertps     $0x30,%xmm3,%xmm2,%xmm2
1099  DB  196,227,109,24,201,1                ; vinsertf128   $0x1,%xmm1,%ymm2,%ymm1
1100  DB  72,139,64,24                        ; mov           0x18(%rax),%rax
1101  DB  196,193,105,114,210,16              ; vpsrld        $0x10,%xmm10,%xmm2
1102  DB  196,193,97,114,208,16               ; vpsrld        $0x10,%xmm8,%xmm3
1103  DB  196,227,109,24,211,1                ; vinsertf128   $0x1,%xmm3,%ymm2,%ymm2
1104  DB  197,180,84,210                      ; vandps        %ymm2,%ymm9,%ymm2
1105  DB  196,193,249,126,208                 ; vmovq         %xmm2,%r8
1106  DB  69,137,193                          ; mov           %r8d,%r9d
1107  DB  196,195,249,22,214,1                ; vpextrq       $0x1,%xmm2,%r14
1108  DB  69,137,242                          ; mov           %r14d,%r10d
1109  DB  73,193,238,32                       ; shr           $0x20,%r14
1110  DB  73,193,232,32                       ; shr           $0x20,%r8
1111  DB  196,227,125,25,210,1                ; vextractf128  $0x1,%ymm2,%xmm2
1112  DB  196,225,249,126,211                 ; vmovq         %xmm2,%rbx
1113  DB  65,137,219                          ; mov           %ebx,%r11d
1114  DB  196,227,249,22,209,1                ; vpextrq       $0x1,%xmm2,%rcx
1115  DB  65,137,207                          ; mov           %ecx,%r15d
1116  DB  72,193,233,32                       ; shr           $0x20,%rcx
1117  DB  72,193,235,32                       ; shr           $0x20,%rbx
1118  DB  196,161,122,16,20,152               ; vmovss        (%rax,%r11,4),%xmm2
1119  DB  196,227,105,33,20,152,16            ; vinsertps     $0x10,(%rax,%rbx,4),%xmm2,%xmm2
1120  DB  196,161,122,16,28,184               ; vmovss        (%rax,%r15,4),%xmm3
1121  DB  196,227,105,33,211,32               ; vinsertps     $0x20,%xmm3,%xmm2,%xmm2
1122  DB  197,250,16,28,136                   ; vmovss        (%rax,%rcx,4),%xmm3
1123  DB  196,99,105,33,203,48                ; vinsertps     $0x30,%xmm3,%xmm2,%xmm9
1124  DB  196,161,122,16,28,136               ; vmovss        (%rax,%r9,4),%xmm3
1125  DB  196,163,97,33,28,128,16             ; vinsertps     $0x10,(%rax,%r8,4),%xmm3,%xmm3
1126  DB  196,161,122,16,20,144               ; vmovss        (%rax,%r10,4),%xmm2
1127  DB  196,227,97,33,210,32                ; vinsertps     $0x20,%xmm2,%xmm3,%xmm2
1128  DB  196,161,122,16,28,176               ; vmovss        (%rax,%r14,4),%xmm3
1129  DB  196,227,105,33,211,48               ; vinsertps     $0x30,%xmm3,%xmm2,%xmm2
1130  DB  196,195,109,24,209,1                ; vinsertf128   $0x1,%xmm9,%ymm2,%ymm2
1131  DB  196,193,49,114,210,24               ; vpsrld        $0x18,%xmm10,%xmm9
1132  DB  196,193,97,114,208,24               ; vpsrld        $0x18,%xmm8,%xmm3
1133  DB  196,227,53,24,219,1                 ; vinsertf128   $0x1,%xmm3,%ymm9,%ymm3
1134  DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
1135  DB  196,98,125,24,66,12                 ; vbroadcastss  0xc(%rdx),%ymm8
1136  DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
1137  DB  72,173                              ; lods          %ds:(%rsi),%rax
1138  DB  91                                  ; pop           %rbx
1139  DB  65,92                               ; pop           %r12
1140  DB  65,94                               ; pop           %r14
1141  DB  65,95                               ; pop           %r15
1142  DB  255,224                             ; jmpq          *%rax
1143
1144PUBLIC _sk_load_565_avx
1145_sk_load_565_avx LABEL PROC
1146  DB  72,173                              ; lods          %ds:(%rsi),%rax
1147  DB  72,139,0                            ; mov           (%rax),%rax
1148  DB  196,226,121,51,68,120,8             ; vpmovzxwd     0x8(%rax,%rdi,2),%xmm0
1149  DB  196,226,121,51,12,120               ; vpmovzxwd     (%rax,%rdi,2),%xmm1
1150  DB  196,227,117,24,208,1                ; vinsertf128   $0x1,%xmm0,%ymm1,%ymm2
1151  DB  197,249,110,66,104                  ; vmovd         0x68(%rdx),%xmm0
1152  DB  196,227,121,4,192,0                 ; vpermilps     $0x0,%xmm0,%xmm0
1153  DB  196,227,125,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
1154  DB  197,252,84,194                      ; vandps        %ymm2,%ymm0,%ymm0
1155  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
1156  DB  196,226,125,24,74,116               ; vbroadcastss  0x74(%rdx),%ymm1
1157  DB  197,244,89,192                      ; vmulps        %ymm0,%ymm1,%ymm0
1158  DB  197,249,110,74,108                  ; vmovd         0x6c(%rdx),%xmm1
1159  DB  196,227,121,4,201,0                 ; vpermilps     $0x0,%xmm1,%xmm1
1160  DB  196,227,117,24,201,1                ; vinsertf128   $0x1,%xmm1,%ymm1,%ymm1
1161  DB  197,244,84,202                      ; vandps        %ymm2,%ymm1,%ymm1
1162  DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
1163  DB  196,226,125,24,90,120               ; vbroadcastss  0x78(%rdx),%ymm3
1164  DB  197,228,89,201                      ; vmulps        %ymm1,%ymm3,%ymm1
1165  DB  197,249,110,90,112                  ; vmovd         0x70(%rdx),%xmm3
1166  DB  196,227,121,4,219,0                 ; vpermilps     $0x0,%xmm3,%xmm3
1167  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
1168  DB  197,228,84,210                      ; vandps        %ymm2,%ymm3,%ymm2
1169  DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
1170  DB  196,226,125,24,90,124               ; vbroadcastss  0x7c(%rdx),%ymm3
1171  DB  197,228,89,210                      ; vmulps        %ymm2,%ymm3,%ymm2
1172  DB  196,226,125,24,26                   ; vbroadcastss  (%rdx),%ymm3
1173  DB  72,173                              ; lods          %ds:(%rsi),%rax
1174  DB  255,224                             ; jmpq          *%rax
1175
1176PUBLIC _sk_store_565_avx
1177_sk_store_565_avx LABEL PROC
1178  DB  72,173                              ; lods          %ds:(%rsi),%rax
1179  DB  72,139,0                            ; mov           (%rax),%rax
1180  DB  196,98,125,24,130,128,0,0,0         ; vbroadcastss  0x80(%rdx),%ymm8
1181  DB  197,60,89,200                       ; vmulps        %ymm0,%ymm8,%ymm9
1182  DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
1183  DB  196,193,41,114,241,11               ; vpslld        $0xb,%xmm9,%xmm10
1184  DB  196,67,125,25,201,1                 ; vextractf128  $0x1,%ymm9,%xmm9
1185  DB  196,193,49,114,241,11               ; vpslld        $0xb,%xmm9,%xmm9
1186  DB  196,67,45,24,201,1                  ; vinsertf128   $0x1,%xmm9,%ymm10,%ymm9
1187  DB  196,98,125,24,146,132,0,0,0         ; vbroadcastss  0x84(%rdx),%ymm10
1188  DB  197,44,89,209                       ; vmulps        %ymm1,%ymm10,%ymm10
1189  DB  196,65,125,91,210                   ; vcvtps2dq     %ymm10,%ymm10
1190  DB  196,193,33,114,242,5                ; vpslld        $0x5,%xmm10,%xmm11
1191  DB  196,67,125,25,210,1                 ; vextractf128  $0x1,%ymm10,%xmm10
1192  DB  196,193,41,114,242,5                ; vpslld        $0x5,%xmm10,%xmm10
1193  DB  196,67,37,24,210,1                  ; vinsertf128   $0x1,%xmm10,%ymm11,%ymm10
1194  DB  196,65,45,86,201                    ; vorpd         %ymm9,%ymm10,%ymm9
1195  DB  197,60,89,194                       ; vmulps        %ymm2,%ymm8,%ymm8
1196  DB  196,65,125,91,192                   ; vcvtps2dq     %ymm8,%ymm8
1197  DB  196,65,53,86,192                    ; vorpd         %ymm8,%ymm9,%ymm8
1198  DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
1199  DB  196,66,57,43,193                    ; vpackusdw     %xmm9,%xmm8,%xmm8
1200  DB  197,122,127,4,120                   ; vmovdqu       %xmm8,(%rax,%rdi,2)
1201  DB  72,173                              ; lods          %ds:(%rsi),%rax
1202  DB  255,224                             ; jmpq          *%rax
1203
1204PUBLIC _sk_load_8888_avx
1205_sk_load_8888_avx LABEL PROC
1206  DB  72,173                              ; lods          %ds:(%rsi),%rax
1207  DB  72,139,0                            ; mov           (%rax),%rax
1208  DB  197,252,16,28,184                   ; vmovups       (%rax,%rdi,4),%ymm3
1209  DB  197,249,110,66,16                   ; vmovd         0x10(%rdx),%xmm0
1210  DB  196,227,121,4,192,0                 ; vpermilps     $0x0,%xmm0,%xmm0
1211  DB  196,99,125,24,216,1                 ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm11
1212  DB  197,164,84,195                      ; vandps        %ymm3,%ymm11,%ymm0
1213  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
1214  DB  196,98,125,24,66,12                 ; vbroadcastss  0xc(%rdx),%ymm8
1215  DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
1216  DB  197,169,114,211,8                   ; vpsrld        $0x8,%xmm3,%xmm10
1217  DB  196,195,125,25,217,1                ; vextractf128  $0x1,%ymm3,%xmm9
1218  DB  196,193,113,114,209,8               ; vpsrld        $0x8,%xmm9,%xmm1
1219  DB  196,227,45,24,201,1                 ; vinsertf128   $0x1,%xmm1,%ymm10,%ymm1
1220  DB  197,164,84,201                      ; vandps        %ymm1,%ymm11,%ymm1
1221  DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
1222  DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
1223  DB  197,169,114,211,16                  ; vpsrld        $0x10,%xmm3,%xmm10
1224  DB  196,193,105,114,209,16              ; vpsrld        $0x10,%xmm9,%xmm2
1225  DB  196,227,45,24,210,1                 ; vinsertf128   $0x1,%xmm2,%ymm10,%ymm2
1226  DB  197,164,84,210                      ; vandps        %ymm2,%ymm11,%ymm2
1227  DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
1228  DB  197,188,89,210                      ; vmulps        %ymm2,%ymm8,%ymm2
1229  DB  197,169,114,211,24                  ; vpsrld        $0x18,%xmm3,%xmm10
1230  DB  196,193,97,114,209,24               ; vpsrld        $0x18,%xmm9,%xmm3
1231  DB  196,227,45,24,219,1                 ; vinsertf128   $0x1,%xmm3,%ymm10,%ymm3
1232  DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
1233  DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
1234  DB  72,173                              ; lods          %ds:(%rsi),%rax
1235  DB  255,224                             ; jmpq          *%rax
1236
1237PUBLIC _sk_store_8888_avx
1238_sk_store_8888_avx LABEL PROC
1239  DB  72,173                              ; lods          %ds:(%rsi),%rax
1240  DB  72,139,0                            ; mov           (%rax),%rax
1241  DB  196,98,125,24,66,8                  ; vbroadcastss  0x8(%rdx),%ymm8
1242  DB  197,60,89,200                       ; vmulps        %ymm0,%ymm8,%ymm9
1243  DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
1244  DB  197,60,89,209                       ; vmulps        %ymm1,%ymm8,%ymm10
1245  DB  196,65,125,91,210                   ; vcvtps2dq     %ymm10,%ymm10
1246  DB  196,193,33,114,242,8                ; vpslld        $0x8,%xmm10,%xmm11
1247  DB  196,67,125,25,210,1                 ; vextractf128  $0x1,%ymm10,%xmm10
1248  DB  196,193,41,114,242,8                ; vpslld        $0x8,%xmm10,%xmm10
1249  DB  196,67,37,24,210,1                  ; vinsertf128   $0x1,%xmm10,%ymm11,%ymm10
1250  DB  196,65,45,86,201                    ; vorpd         %ymm9,%ymm10,%ymm9
1251  DB  197,60,89,210                       ; vmulps        %ymm2,%ymm8,%ymm10
1252  DB  196,65,125,91,210                   ; vcvtps2dq     %ymm10,%ymm10
1253  DB  196,193,33,114,242,16               ; vpslld        $0x10,%xmm10,%xmm11
1254  DB  196,67,125,25,210,1                 ; vextractf128  $0x1,%ymm10,%xmm10
1255  DB  196,193,41,114,242,16               ; vpslld        $0x10,%xmm10,%xmm10
1256  DB  196,67,37,24,210,1                  ; vinsertf128   $0x1,%xmm10,%ymm11,%ymm10
1257  DB  196,65,53,86,202                    ; vorpd         %ymm10,%ymm9,%ymm9
1258  DB  197,60,89,195                       ; vmulps        %ymm3,%ymm8,%ymm8
1259  DB  196,65,125,91,192                   ; vcvtps2dq     %ymm8,%ymm8
1260  DB  196,193,41,114,240,24               ; vpslld        $0x18,%xmm8,%xmm10
1261  DB  196,67,125,25,192,1                 ; vextractf128  $0x1,%ymm8,%xmm8
1262  DB  196,193,57,114,240,24               ; vpslld        $0x18,%xmm8,%xmm8
1263  DB  196,67,45,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm10,%ymm8
1264  DB  196,65,53,86,192                    ; vorpd         %ymm8,%ymm9,%ymm8
1265  DB  197,125,17,4,184                    ; vmovupd       %ymm8,(%rax,%rdi,4)
1266  DB  72,173                              ; lods          %ds:(%rsi),%rax
1267  DB  255,224                             ; jmpq          *%rax
1268
1269PUBLIC _sk_load_f16_avx
1270_sk_load_f16_avx LABEL PROC
1271  DB  72,173                              ; lods          %ds:(%rsi),%rax
1272  DB  72,139,0                            ; mov           (%rax),%rax
1273  DB  197,250,111,4,248                   ; vmovdqu       (%rax,%rdi,8),%xmm0
1274  DB  197,250,111,76,248,16               ; vmovdqu       0x10(%rax,%rdi,8),%xmm1
1275  DB  197,250,111,84,248,32               ; vmovdqu       0x20(%rax,%rdi,8),%xmm2
1276  DB  197,250,111,92,248,48               ; vmovdqu       0x30(%rax,%rdi,8),%xmm3
1277  DB  197,121,97,193                      ; vpunpcklwd    %xmm1,%xmm0,%xmm8
1278  DB  197,249,105,193                     ; vpunpckhwd    %xmm1,%xmm0,%xmm0
1279  DB  197,233,97,203                      ; vpunpcklwd    %xmm3,%xmm2,%xmm1
1280  DB  197,233,105,211                     ; vpunpckhwd    %xmm3,%xmm2,%xmm2
1281  DB  197,185,97,216                      ; vpunpcklwd    %xmm0,%xmm8,%xmm3
1282  DB  197,185,105,192                     ; vpunpckhwd    %xmm0,%xmm8,%xmm0
1283  DB  197,113,97,194                      ; vpunpcklwd    %xmm2,%xmm1,%xmm8
1284  DB  197,113,105,202                     ; vpunpckhwd    %xmm2,%xmm1,%xmm9
1285  DB  197,249,110,82,100                  ; vmovd         0x64(%rdx),%xmm2
1286  DB  197,249,112,210,0                   ; vpshufd       $0x0,%xmm2,%xmm2
1287  DB  197,233,101,203                     ; vpcmpgtw      %xmm3,%xmm2,%xmm1
1288  DB  197,241,223,203                     ; vpandn        %xmm3,%xmm1,%xmm1
1289  DB  197,233,101,216                     ; vpcmpgtw      %xmm0,%xmm2,%xmm3
1290  DB  197,225,223,192                     ; vpandn        %xmm0,%xmm3,%xmm0
1291  DB  196,193,105,101,216                 ; vpcmpgtw      %xmm8,%xmm2,%xmm3
1292  DB  196,193,97,223,216                  ; vpandn        %xmm8,%xmm3,%xmm3
1293  DB  196,193,105,101,209                 ; vpcmpgtw      %xmm9,%xmm2,%xmm2
1294  DB  196,193,105,223,209                 ; vpandn        %xmm9,%xmm2,%xmm2
1295  DB  196,98,121,51,193                   ; vpmovzxwd     %xmm1,%xmm8
1296  DB  196,98,121,51,203                   ; vpmovzxwd     %xmm3,%xmm9
1297  DB  196,65,41,239,210                   ; vpxor         %xmm10,%xmm10,%xmm10
1298  DB  196,193,113,105,202                 ; vpunpckhwd    %xmm10,%xmm1,%xmm1
1299  DB  196,193,97,105,218                  ; vpunpckhwd    %xmm10,%xmm3,%xmm3
1300  DB  196,98,121,51,216                   ; vpmovzxwd     %xmm0,%xmm11
1301  DB  196,98,121,51,226                   ; vpmovzxwd     %xmm2,%xmm12
1302  DB  196,65,121,105,234                  ; vpunpckhwd    %xmm10,%xmm0,%xmm13
1303  DB  196,65,105,105,210                  ; vpunpckhwd    %xmm10,%xmm2,%xmm10
1304  DB  196,193,121,114,240,13              ; vpslld        $0xd,%xmm8,%xmm0
1305  DB  196,193,105,114,241,13              ; vpslld        $0xd,%xmm9,%xmm2
1306  DB  196,227,125,24,194,1                ; vinsertf128   $0x1,%xmm2,%ymm0,%ymm0
1307  DB  197,249,110,82,92                   ; vmovd         0x5c(%rdx),%xmm2
1308  DB  196,227,121,4,210,0                 ; vpermilps     $0x0,%xmm2,%xmm2
1309  DB  196,99,109,24,194,1                 ; vinsertf128   $0x1,%xmm2,%ymm2,%ymm8
1310  DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
1311  DB  197,241,114,241,13                  ; vpslld        $0xd,%xmm1,%xmm1
1312  DB  197,233,114,243,13                  ; vpslld        $0xd,%xmm3,%xmm2
1313  DB  196,227,117,24,202,1                ; vinsertf128   $0x1,%xmm2,%ymm1,%ymm1
1314  DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
1315  DB  196,193,105,114,243,13              ; vpslld        $0xd,%xmm11,%xmm2
1316  DB  196,193,97,114,244,13               ; vpslld        $0xd,%xmm12,%xmm3
1317  DB  196,227,109,24,211,1                ; vinsertf128   $0x1,%xmm3,%ymm2,%ymm2
1318  DB  197,188,89,210                      ; vmulps        %ymm2,%ymm8,%ymm2
1319  DB  196,193,49,114,245,13               ; vpslld        $0xd,%xmm13,%xmm9
1320  DB  196,193,97,114,242,13               ; vpslld        $0xd,%xmm10,%xmm3
1321  DB  196,227,53,24,219,1                 ; vinsertf128   $0x1,%xmm3,%ymm9,%ymm3
1322  DB  197,188,89,219                      ; vmulps        %ymm3,%ymm8,%ymm3
1323  DB  72,173                              ; lods          %ds:(%rsi),%rax
1324  DB  255,224                             ; jmpq          *%rax
1325
1326PUBLIC _sk_store_f16_avx
1327_sk_store_f16_avx LABEL PROC
1328  DB  72,173                              ; lods          %ds:(%rsi),%rax
1329  DB  72,139,0                            ; mov           (%rax),%rax
1330  DB  197,121,110,66,96                   ; vmovd         0x60(%rdx),%xmm8
1331  DB  196,67,121,4,192,0                  ; vpermilps     $0x0,%xmm8,%xmm8
1332  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
1333  DB  197,60,89,200                       ; vmulps        %ymm0,%ymm8,%ymm9
1334  DB  196,67,125,25,202,1                 ; vextractf128  $0x1,%ymm9,%xmm10
1335  DB  196,193,41,114,210,13               ; vpsrld        $0xd,%xmm10,%xmm10
1336  DB  196,193,49,114,209,13               ; vpsrld        $0xd,%xmm9,%xmm9
1337  DB  197,60,89,217                       ; vmulps        %ymm1,%ymm8,%ymm11
1338  DB  196,67,125,25,220,1                 ; vextractf128  $0x1,%ymm11,%xmm12
1339  DB  196,193,25,114,212,13               ; vpsrld        $0xd,%xmm12,%xmm12
1340  DB  196,193,33,114,211,13               ; vpsrld        $0xd,%xmm11,%xmm11
1341  DB  197,60,89,234                       ; vmulps        %ymm2,%ymm8,%ymm13
1342  DB  196,67,125,25,238,1                 ; vextractf128  $0x1,%ymm13,%xmm14
1343  DB  196,193,9,114,214,13                ; vpsrld        $0xd,%xmm14,%xmm14
1344  DB  196,193,17,114,213,13               ; vpsrld        $0xd,%xmm13,%xmm13
1345  DB  197,60,89,195                       ; vmulps        %ymm3,%ymm8,%ymm8
1346  DB  196,67,125,25,199,1                 ; vextractf128  $0x1,%ymm8,%xmm15
1347  DB  196,193,1,114,215,13                ; vpsrld        $0xd,%xmm15,%xmm15
1348  DB  196,193,57,114,208,13               ; vpsrld        $0xd,%xmm8,%xmm8
1349  DB  196,193,33,115,251,2                ; vpslldq       $0x2,%xmm11,%xmm11
1350  DB  196,65,33,235,201                   ; vpor          %xmm9,%xmm11,%xmm9
1351  DB  196,193,33,115,252,2                ; vpslldq       $0x2,%xmm12,%xmm11
1352  DB  196,65,33,235,210                   ; vpor          %xmm10,%xmm11,%xmm10
1353  DB  196,193,57,115,248,2                ; vpslldq       $0x2,%xmm8,%xmm8
1354  DB  196,65,57,235,197                   ; vpor          %xmm13,%xmm8,%xmm8
1355  DB  196,193,33,115,255,2                ; vpslldq       $0x2,%xmm15,%xmm11
1356  DB  196,65,33,235,222                   ; vpor          %xmm14,%xmm11,%xmm11
1357  DB  196,65,49,98,224                    ; vpunpckldq    %xmm8,%xmm9,%xmm12
1358  DB  197,122,127,36,248                  ; vmovdqu       %xmm12,(%rax,%rdi,8)
1359  DB  196,65,49,106,192                   ; vpunpckhdq    %xmm8,%xmm9,%xmm8
1360  DB  197,122,127,68,248,16               ; vmovdqu       %xmm8,0x10(%rax,%rdi,8)
1361  DB  196,65,41,98,195                    ; vpunpckldq    %xmm11,%xmm10,%xmm8
1362  DB  197,122,127,68,248,32               ; vmovdqu       %xmm8,0x20(%rax,%rdi,8)
1363  DB  196,65,41,106,195                   ; vpunpckhdq    %xmm11,%xmm10,%xmm8
1364  DB  197,122,127,68,248,48               ; vmovdqu       %xmm8,0x30(%rax,%rdi,8)
1365  DB  72,173                              ; lods          %ds:(%rsi),%rax
1366  DB  255,224                             ; jmpq          *%rax
1367
1368PUBLIC _sk_clamp_x_avx
1369_sk_clamp_x_avx LABEL PROC
1370  DB  72,173                              ; lods          %ds:(%rsi),%rax
1371  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
1372  DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
1373  DB  196,65,41,118,210                   ; vpcmpeqd      %xmm10,%xmm10,%xmm10
1374  DB  196,65,49,254,202                   ; vpaddd        %xmm10,%xmm9,%xmm9
1375  DB  196,65,57,254,194                   ; vpaddd        %xmm10,%xmm8,%xmm8
1376  DB  196,67,61,24,193,1                  ; vinsertf128   $0x1,%xmm9,%ymm8,%ymm8
1377  DB  196,193,124,93,192                  ; vminps        %ymm8,%ymm0,%ymm0
1378  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
1379  DB  197,188,95,192                      ; vmaxps        %ymm0,%ymm8,%ymm0
1380  DB  72,173                              ; lods          %ds:(%rsi),%rax
1381  DB  255,224                             ; jmpq          *%rax
1382
1383PUBLIC _sk_clamp_y_avx
1384_sk_clamp_y_avx LABEL PROC
1385  DB  72,173                              ; lods          %ds:(%rsi),%rax
1386  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
1387  DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
1388  DB  196,65,41,118,210                   ; vpcmpeqd      %xmm10,%xmm10,%xmm10
1389  DB  196,65,49,254,202                   ; vpaddd        %xmm10,%xmm9,%xmm9
1390  DB  196,65,57,254,194                   ; vpaddd        %xmm10,%xmm8,%xmm8
1391  DB  196,67,61,24,193,1                  ; vinsertf128   $0x1,%xmm9,%ymm8,%ymm8
1392  DB  196,193,116,93,200                  ; vminps        %ymm8,%ymm1,%ymm1
1393  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
1394  DB  197,188,95,201                      ; vmaxps        %ymm1,%ymm8,%ymm1
1395  DB  72,173                              ; lods          %ds:(%rsi),%rax
1396  DB  255,224                             ; jmpq          *%rax
1397
1398PUBLIC _sk_matrix_2x3_avx
1399_sk_matrix_2x3_avx LABEL PROC
1400  DB  72,173                              ; lods          %ds:(%rsi),%rax
1401  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
1402  DB  196,98,125,24,72,8                  ; vbroadcastss  0x8(%rax),%ymm9
1403  DB  196,98,125,24,80,16                 ; vbroadcastss  0x10(%rax),%ymm10
1404  DB  197,52,89,201                       ; vmulps        %ymm1,%ymm9,%ymm9
1405  DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
1406  DB  197,60,89,192                       ; vmulps        %ymm0,%ymm8,%ymm8
1407  DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
1408  DB  196,98,125,24,72,4                  ; vbroadcastss  0x4(%rax),%ymm9
1409  DB  196,98,125,24,80,12                 ; vbroadcastss  0xc(%rax),%ymm10
1410  DB  196,98,125,24,88,20                 ; vbroadcastss  0x14(%rax),%ymm11
1411  DB  197,172,89,201                      ; vmulps        %ymm1,%ymm10,%ymm1
1412  DB  196,193,116,88,203                  ; vaddps        %ymm11,%ymm1,%ymm1
1413  DB  197,180,89,192                      ; vmulps        %ymm0,%ymm9,%ymm0
1414  DB  197,252,88,201                      ; vaddps        %ymm1,%ymm0,%ymm1
1415  DB  72,173                              ; lods          %ds:(%rsi),%rax
1416  DB  197,124,41,192                      ; vmovaps       %ymm8,%ymm0
1417  DB  255,224                             ; jmpq          *%rax
1418
1419PUBLIC _sk_matrix_3x4_avx
1420_sk_matrix_3x4_avx LABEL PROC
1421  DB  72,173                              ; lods          %ds:(%rsi),%rax
1422  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
1423  DB  196,98,125,24,72,12                 ; vbroadcastss  0xc(%rax),%ymm9
1424  DB  196,98,125,24,80,24                 ; vbroadcastss  0x18(%rax),%ymm10
1425  DB  196,98,125,24,88,36                 ; vbroadcastss  0x24(%rax),%ymm11
1426  DB  197,44,89,210                       ; vmulps        %ymm2,%ymm10,%ymm10
1427  DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
1428  DB  197,52,89,201                       ; vmulps        %ymm1,%ymm9,%ymm9
1429  DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
1430  DB  197,60,89,192                       ; vmulps        %ymm0,%ymm8,%ymm8
1431  DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
1432  DB  196,98,125,24,72,4                  ; vbroadcastss  0x4(%rax),%ymm9
1433  DB  196,98,125,24,80,16                 ; vbroadcastss  0x10(%rax),%ymm10
1434  DB  196,98,125,24,88,28                 ; vbroadcastss  0x1c(%rax),%ymm11
1435  DB  196,98,125,24,96,40                 ; vbroadcastss  0x28(%rax),%ymm12
1436  DB  197,36,89,218                       ; vmulps        %ymm2,%ymm11,%ymm11
1437  DB  196,65,36,88,220                    ; vaddps        %ymm12,%ymm11,%ymm11
1438  DB  197,44,89,209                       ; vmulps        %ymm1,%ymm10,%ymm10
1439  DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
1440  DB  197,52,89,200                       ; vmulps        %ymm0,%ymm9,%ymm9
1441  DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
1442  DB  196,98,125,24,80,8                  ; vbroadcastss  0x8(%rax),%ymm10
1443  DB  196,98,125,24,88,20                 ; vbroadcastss  0x14(%rax),%ymm11
1444  DB  196,98,125,24,96,32                 ; vbroadcastss  0x20(%rax),%ymm12
1445  DB  196,98,125,24,104,44                ; vbroadcastss  0x2c(%rax),%ymm13
1446  DB  197,156,89,210                      ; vmulps        %ymm2,%ymm12,%ymm2
1447  DB  196,193,108,88,213                  ; vaddps        %ymm13,%ymm2,%ymm2
1448  DB  197,164,89,201                      ; vmulps        %ymm1,%ymm11,%ymm1
1449  DB  197,244,88,202                      ; vaddps        %ymm2,%ymm1,%ymm1
1450  DB  197,172,89,192                      ; vmulps        %ymm0,%ymm10,%ymm0
1451  DB  197,252,88,209                      ; vaddps        %ymm1,%ymm0,%ymm2
1452  DB  72,173                              ; lods          %ds:(%rsi),%rax
1453  DB  197,124,41,192                      ; vmovaps       %ymm8,%ymm0
1454  DB  197,124,41,201                      ; vmovaps       %ymm9,%ymm1
1455  DB  255,224                             ; jmpq          *%rax
1456
1457PUBLIC _sk_linear_gradient_2stops_avx
1458_sk_linear_gradient_2stops_avx LABEL PROC
1459  DB  72,173                              ; lods          %ds:(%rsi),%rax
1460  DB  196,226,125,24,72,16                ; vbroadcastss  0x10(%rax),%ymm1
1461  DB  196,226,125,24,16                   ; vbroadcastss  (%rax),%ymm2
1462  DB  197,244,89,200                      ; vmulps        %ymm0,%ymm1,%ymm1
1463  DB  197,108,88,193                      ; vaddps        %ymm1,%ymm2,%ymm8
1464  DB  196,226,125,24,72,20                ; vbroadcastss  0x14(%rax),%ymm1
1465  DB  196,226,125,24,80,4                 ; vbroadcastss  0x4(%rax),%ymm2
1466  DB  197,244,89,200                      ; vmulps        %ymm0,%ymm1,%ymm1
1467  DB  197,236,88,201                      ; vaddps        %ymm1,%ymm2,%ymm1
1468  DB  196,226,125,24,80,24                ; vbroadcastss  0x18(%rax),%ymm2
1469  DB  196,226,125,24,88,8                 ; vbroadcastss  0x8(%rax),%ymm3
1470  DB  197,236,89,208                      ; vmulps        %ymm0,%ymm2,%ymm2
1471  DB  197,228,88,210                      ; vaddps        %ymm2,%ymm3,%ymm2
1472  DB  196,226,125,24,88,28                ; vbroadcastss  0x1c(%rax),%ymm3
1473  DB  196,98,125,24,72,12                 ; vbroadcastss  0xc(%rax),%ymm9
1474  DB  197,228,89,192                      ; vmulps        %ymm0,%ymm3,%ymm0
1475  DB  197,180,88,216                      ; vaddps        %ymm0,%ymm9,%ymm3
1476  DB  72,173                              ; lods          %ds:(%rsi),%rax
1477  DB  197,124,41,192                      ; vmovaps       %ymm8,%ymm0
1478  DB  255,224                             ; jmpq          *%rax
1479
1480PUBLIC _sk_start_pipeline_sse41
1481_sk_start_pipeline_sse41 LABEL PROC
1482  DB  65,87                               ; push          %r15
1483  DB  65,86                               ; push          %r14
1484  DB  65,85                               ; push          %r13
1485  DB  65,84                               ; push          %r12
1486  DB  86                                  ; push          %rsi
1487  DB  87                                  ; push          %rdi
1488  DB  83                                  ; push          %rbx
1489  DB  72,129,236,160,0,0,0                ; sub           $0xa0,%rsp
1490  DB  68,15,41,188,36,144,0,0,0           ; movaps        %xmm15,0x90(%rsp)
1491  DB  68,15,41,180,36,128,0,0,0           ; movaps        %xmm14,0x80(%rsp)
1492  DB  68,15,41,108,36,112                 ; movaps        %xmm13,0x70(%rsp)
1493  DB  68,15,41,100,36,96                  ; movaps        %xmm12,0x60(%rsp)
1494  DB  68,15,41,92,36,80                   ; movaps        %xmm11,0x50(%rsp)
1495  DB  68,15,41,84,36,64                   ; movaps        %xmm10,0x40(%rsp)
1496  DB  68,15,41,76,36,48                   ; movaps        %xmm9,0x30(%rsp)
1497  DB  68,15,41,68,36,32                   ; movaps        %xmm8,0x20(%rsp)
1498  DB  15,41,124,36,16                     ; movaps        %xmm7,0x10(%rsp)
1499  DB  15,41,52,36                         ; movaps        %xmm6,(%rsp)
1500  DB  77,137,207                          ; mov           %r9,%r15
1501  DB  77,137,198                          ; mov           %r8,%r14
1502  DB  72,137,203                          ; mov           %rcx,%rbx
1503  DB  72,137,214                          ; mov           %rdx,%rsi
1504  DB  72,173                              ; lods          %ds:(%rsi),%rax
1505  DB  73,137,196                          ; mov           %rax,%r12
1506  DB  73,137,245                          ; mov           %rsi,%r13
1507  DB  72,141,67,4                         ; lea           0x4(%rbx),%rax
1508  DB  76,57,248                           ; cmp           %r15,%rax
1509  DB  118,5                               ; jbe           73 <_sk_start_pipeline_sse41+0x73>
1510  DB  72,137,216                          ; mov           %rbx,%rax
1511  DB  235,52                              ; jmp           a7 <_sk_start_pipeline_sse41+0xa7>
1512  DB  15,87,192                           ; xorps         %xmm0,%xmm0
1513  DB  15,87,201                           ; xorps         %xmm1,%xmm1
1514  DB  15,87,210                           ; xorps         %xmm2,%xmm2
1515  DB  15,87,219                           ; xorps         %xmm3,%xmm3
1516  DB  15,87,228                           ; xorps         %xmm4,%xmm4
1517  DB  15,87,237                           ; xorps         %xmm5,%xmm5
1518  DB  15,87,246                           ; xorps         %xmm6,%xmm6
1519  DB  15,87,255                           ; xorps         %xmm7,%xmm7
1520  DB  72,137,223                          ; mov           %rbx,%rdi
1521  DB  76,137,238                          ; mov           %r13,%rsi
1522  DB  76,137,242                          ; mov           %r14,%rdx
1523  DB  65,255,212                          ; callq         *%r12
1524  DB  72,141,67,4                         ; lea           0x4(%rbx),%rax
1525  DB  72,131,195,8                        ; add           $0x8,%rbx
1526  DB  76,57,251                           ; cmp           %r15,%rbx
1527  DB  72,137,195                          ; mov           %rax,%rbx
1528  DB  118,204                             ; jbe           73 <_sk_start_pipeline_sse41+0x73>
1529  DB  15,40,52,36                         ; movaps        (%rsp),%xmm6
1530  DB  15,40,124,36,16                     ; movaps        0x10(%rsp),%xmm7
1531  DB  68,15,40,68,36,32                   ; movaps        0x20(%rsp),%xmm8
1532  DB  68,15,40,76,36,48                   ; movaps        0x30(%rsp),%xmm9
1533  DB  68,15,40,84,36,64                   ; movaps        0x40(%rsp),%xmm10
1534  DB  68,15,40,92,36,80                   ; movaps        0x50(%rsp),%xmm11
1535  DB  68,15,40,100,36,96                  ; movaps        0x60(%rsp),%xmm12
1536  DB  68,15,40,108,36,112                 ; movaps        0x70(%rsp),%xmm13
1537  DB  68,15,40,180,36,128,0,0,0           ; movaps        0x80(%rsp),%xmm14
1538  DB  68,15,40,188,36,144,0,0,0           ; movaps        0x90(%rsp),%xmm15
1539  DB  72,129,196,160,0,0,0                ; add           $0xa0,%rsp
1540  DB  91                                  ; pop           %rbx
1541  DB  95                                  ; pop           %rdi
1542  DB  94                                  ; pop           %rsi
1543  DB  65,92                               ; pop           %r12
1544  DB  65,93                               ; pop           %r13
1545  DB  65,94                               ; pop           %r14
1546  DB  65,95                               ; pop           %r15
1547  DB  195                                 ; retq
1548
1549PUBLIC _sk_just_return_sse41
1550_sk_just_return_sse41 LABEL PROC
1551  DB  195                                 ; retq
1552
1553PUBLIC _sk_seed_shader_sse41
1554_sk_seed_shader_sse41 LABEL PROC
1555  DB  72,173                              ; lods          %ds:(%rsi),%rax
1556  DB  102,15,110,199                      ; movd          %edi,%xmm0
1557  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
1558  DB  15,91,200                           ; cvtdq2ps      %xmm0,%xmm1
1559  DB  243,15,16,18                        ; movss         (%rdx),%xmm2
1560  DB  243,15,16,90,4                      ; movss         0x4(%rdx),%xmm3
1561  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
1562  DB  15,88,203                           ; addps         %xmm3,%xmm1
1563  DB  15,16,66,20                         ; movups        0x14(%rdx),%xmm0
1564  DB  15,88,193                           ; addps         %xmm1,%xmm0
1565  DB  102,15,110,8                        ; movd          (%rax),%xmm1
1566  DB  102,15,112,201,0                    ; pshufd        $0x0,%xmm1,%xmm1
1567  DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
1568  DB  15,88,203                           ; addps         %xmm3,%xmm1
1569  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
1570  DB  72,173                              ; lods          %ds:(%rsi),%rax
1571  DB  15,87,219                           ; xorps         %xmm3,%xmm3
1572  DB  15,87,228                           ; xorps         %xmm4,%xmm4
1573  DB  15,87,237                           ; xorps         %xmm5,%xmm5
1574  DB  15,87,246                           ; xorps         %xmm6,%xmm6
1575  DB  15,87,255                           ; xorps         %xmm7,%xmm7
1576  DB  255,224                             ; jmpq          *%rax
1577
1578PUBLIC _sk_constant_color_sse41
1579_sk_constant_color_sse41 LABEL PROC
1580  DB  72,173                              ; lods          %ds:(%rsi),%rax
1581  DB  15,16,24                            ; movups        (%rax),%xmm3
1582  DB  15,40,195                           ; movaps        %xmm3,%xmm0
1583  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
1584  DB  15,40,203                           ; movaps        %xmm3,%xmm1
1585  DB  15,198,201,85                       ; shufps        $0x55,%xmm1,%xmm1
1586  DB  15,40,211                           ; movaps        %xmm3,%xmm2
1587  DB  15,198,210,170                      ; shufps        $0xaa,%xmm2,%xmm2
1588  DB  15,198,219,255                      ; shufps        $0xff,%xmm3,%xmm3
1589  DB  72,173                              ; lods          %ds:(%rsi),%rax
1590  DB  255,224                             ; jmpq          *%rax
1591
1592PUBLIC _sk_clear_sse41
1593_sk_clear_sse41 LABEL PROC
1594  DB  72,173                              ; lods          %ds:(%rsi),%rax
1595  DB  72,173                              ; lods          %ds:(%rsi),%rax
1596  DB  15,87,192                           ; xorps         %xmm0,%xmm0
1597  DB  15,87,201                           ; xorps         %xmm1,%xmm1
1598  DB  15,87,210                           ; xorps         %xmm2,%xmm2
1599  DB  15,87,219                           ; xorps         %xmm3,%xmm3
1600  DB  255,224                             ; jmpq          *%rax
1601
1602PUBLIC _sk_plus__sse41
1603_sk_plus__sse41 LABEL PROC
1604  DB  72,173                              ; lods          %ds:(%rsi),%rax
1605  DB  15,88,196                           ; addps         %xmm4,%xmm0
1606  DB  15,88,205                           ; addps         %xmm5,%xmm1
1607  DB  15,88,214                           ; addps         %xmm6,%xmm2
1608  DB  15,88,223                           ; addps         %xmm7,%xmm3
1609  DB  72,173                              ; lods          %ds:(%rsi),%rax
1610  DB  255,224                             ; jmpq          *%rax
1611
1612PUBLIC _sk_srcover_sse41
1613_sk_srcover_sse41 LABEL PROC
1614  DB  72,173                              ; lods          %ds:(%rsi),%rax
1615  DB  243,68,15,16,2                      ; movss         (%rdx),%xmm8
1616  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
1617  DB  68,15,92,195                        ; subps         %xmm3,%xmm8
1618  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
1619  DB  68,15,89,204                        ; mulps         %xmm4,%xmm9
1620  DB  65,15,88,193                        ; addps         %xmm9,%xmm0
1621  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
1622  DB  68,15,89,205                        ; mulps         %xmm5,%xmm9
1623  DB  65,15,88,201                        ; addps         %xmm9,%xmm1
1624  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
1625  DB  68,15,89,206                        ; mulps         %xmm6,%xmm9
1626  DB  65,15,88,209                        ; addps         %xmm9,%xmm2
1627  DB  68,15,89,199                        ; mulps         %xmm7,%xmm8
1628  DB  65,15,88,216                        ; addps         %xmm8,%xmm3
1629  DB  72,173                              ; lods          %ds:(%rsi),%rax
1630  DB  255,224                             ; jmpq          *%rax
1631
1632PUBLIC _sk_dstover_sse41
1633_sk_dstover_sse41 LABEL PROC
1634  DB  72,173                              ; lods          %ds:(%rsi),%rax
1635  DB  243,68,15,16,2                      ; movss         (%rdx),%xmm8
1636  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
1637  DB  68,15,92,199                        ; subps         %xmm7,%xmm8
1638  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
1639  DB  15,88,196                           ; addps         %xmm4,%xmm0
1640  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
1641  DB  15,88,205                           ; addps         %xmm5,%xmm1
1642  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
1643  DB  15,88,214                           ; addps         %xmm6,%xmm2
1644  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
1645  DB  15,88,223                           ; addps         %xmm7,%xmm3
1646  DB  72,173                              ; lods          %ds:(%rsi),%rax
1647  DB  255,224                             ; jmpq          *%rax
1648
1649PUBLIC _sk_clamp_0_sse41
1650_sk_clamp_0_sse41 LABEL PROC
1651  DB  72,173                              ; lods          %ds:(%rsi),%rax
1652  DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
1653  DB  65,15,95,192                        ; maxps         %xmm8,%xmm0
1654  DB  65,15,95,200                        ; maxps         %xmm8,%xmm1
1655  DB  65,15,95,208                        ; maxps         %xmm8,%xmm2
1656  DB  65,15,95,216                        ; maxps         %xmm8,%xmm3
1657  DB  72,173                              ; lods          %ds:(%rsi),%rax
1658  DB  255,224                             ; jmpq          *%rax
1659
1660PUBLIC _sk_clamp_1_sse41
1661_sk_clamp_1_sse41 LABEL PROC
1662  DB  72,173                              ; lods          %ds:(%rsi),%rax
1663  DB  243,68,15,16,2                      ; movss         (%rdx),%xmm8
1664  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
1665  DB  65,15,93,192                        ; minps         %xmm8,%xmm0
1666  DB  65,15,93,200                        ; minps         %xmm8,%xmm1
1667  DB  65,15,93,208                        ; minps         %xmm8,%xmm2
1668  DB  65,15,93,216                        ; minps         %xmm8,%xmm3
1669  DB  72,173                              ; lods          %ds:(%rsi),%rax
1670  DB  255,224                             ; jmpq          *%rax
1671
1672PUBLIC _sk_clamp_a_sse41
1673_sk_clamp_a_sse41 LABEL PROC
1674  DB  72,173                              ; lods          %ds:(%rsi),%rax
1675  DB  243,68,15,16,2                      ; movss         (%rdx),%xmm8
1676  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
1677  DB  65,15,93,216                        ; minps         %xmm8,%xmm3
1678  DB  15,93,195                           ; minps         %xmm3,%xmm0
1679  DB  15,93,203                           ; minps         %xmm3,%xmm1
1680  DB  15,93,211                           ; minps         %xmm3,%xmm2
1681  DB  72,173                              ; lods          %ds:(%rsi),%rax
1682  DB  255,224                             ; jmpq          *%rax
1683
1684PUBLIC _sk_set_rgb_sse41
1685_sk_set_rgb_sse41 LABEL PROC
1686  DB  72,173                              ; lods          %ds:(%rsi),%rax
1687  DB  243,15,16,0                         ; movss         (%rax),%xmm0
1688  DB  243,15,16,72,4                      ; movss         0x4(%rax),%xmm1
1689  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
1690  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
1691  DB  243,15,16,80,8                      ; movss         0x8(%rax),%xmm2
1692  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
1693  DB  72,173                              ; lods          %ds:(%rsi),%rax
1694  DB  255,224                             ; jmpq          *%rax
1695
1696PUBLIC _sk_swap_rb_sse41
1697_sk_swap_rb_sse41 LABEL PROC
1698  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
1699  DB  72,173                              ; lods          %ds:(%rsi),%rax
1700  DB  72,173                              ; lods          %ds:(%rsi),%rax
1701  DB  15,40,194                           ; movaps        %xmm2,%xmm0
1702  DB  65,15,40,208                        ; movaps        %xmm8,%xmm2
1703  DB  255,224                             ; jmpq          *%rax
1704
1705PUBLIC _sk_swap_sse41
1706_sk_swap_sse41 LABEL PROC
1707  DB  68,15,40,195                        ; movaps        %xmm3,%xmm8
1708  DB  68,15,40,202                        ; movaps        %xmm2,%xmm9
1709  DB  68,15,40,209                        ; movaps        %xmm1,%xmm10
1710  DB  68,15,40,216                        ; movaps        %xmm0,%xmm11
1711  DB  72,173                              ; lods          %ds:(%rsi),%rax
1712  DB  72,173                              ; lods          %ds:(%rsi),%rax
1713  DB  15,40,196                           ; movaps        %xmm4,%xmm0
1714  DB  15,40,205                           ; movaps        %xmm5,%xmm1
1715  DB  15,40,214                           ; movaps        %xmm6,%xmm2
1716  DB  15,40,223                           ; movaps        %xmm7,%xmm3
1717  DB  65,15,40,227                        ; movaps        %xmm11,%xmm4
1718  DB  65,15,40,234                        ; movaps        %xmm10,%xmm5
1719  DB  65,15,40,241                        ; movaps        %xmm9,%xmm6
1720  DB  65,15,40,248                        ; movaps        %xmm8,%xmm7
1721  DB  255,224                             ; jmpq          *%rax
1722
1723PUBLIC _sk_move_src_dst_sse41
1724_sk_move_src_dst_sse41 LABEL PROC
1725  DB  72,173                              ; lods          %ds:(%rsi),%rax
1726  DB  72,173                              ; lods          %ds:(%rsi),%rax
1727  DB  15,40,224                           ; movaps        %xmm0,%xmm4
1728  DB  15,40,233                           ; movaps        %xmm1,%xmm5
1729  DB  15,40,242                           ; movaps        %xmm2,%xmm6
1730  DB  15,40,251                           ; movaps        %xmm3,%xmm7
1731  DB  255,224                             ; jmpq          *%rax
1732
1733PUBLIC _sk_move_dst_src_sse41
1734_sk_move_dst_src_sse41 LABEL PROC
1735  DB  72,173                              ; lods          %ds:(%rsi),%rax
1736  DB  72,173                              ; lods          %ds:(%rsi),%rax
1737  DB  15,40,196                           ; movaps        %xmm4,%xmm0
1738  DB  15,40,205                           ; movaps        %xmm5,%xmm1
1739  DB  15,40,214                           ; movaps        %xmm6,%xmm2
1740  DB  15,40,223                           ; movaps        %xmm7,%xmm3
1741  DB  255,224                             ; jmpq          *%rax
1742
1743PUBLIC _sk_premul_sse41
1744_sk_premul_sse41 LABEL PROC
1745  DB  72,173                              ; lods          %ds:(%rsi),%rax
1746  DB  15,89,195                           ; mulps         %xmm3,%xmm0
1747  DB  15,89,203                           ; mulps         %xmm3,%xmm1
1748  DB  15,89,211                           ; mulps         %xmm3,%xmm2
1749  DB  72,173                              ; lods          %ds:(%rsi),%rax
1750  DB  255,224                             ; jmpq          *%rax
1751
1752PUBLIC _sk_unpremul_sse41
1753_sk_unpremul_sse41 LABEL PROC
1754  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
1755  DB  72,173                              ; lods          %ds:(%rsi),%rax
1756  DB  69,15,87,201                        ; xorps         %xmm9,%xmm9
1757  DB  243,68,15,16,18                     ; movss         (%rdx),%xmm10
1758  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
1759  DB  68,15,94,211                        ; divps         %xmm3,%xmm10
1760  DB  15,40,195                           ; movaps        %xmm3,%xmm0
1761  DB  65,15,194,193,0                     ; cmpeqps       %xmm9,%xmm0
1762  DB  102,69,15,56,20,209                 ; blendvps      %xmm0,%xmm9,%xmm10
1763  DB  69,15,89,194                        ; mulps         %xmm10,%xmm8
1764  DB  65,15,89,202                        ; mulps         %xmm10,%xmm1
1765  DB  65,15,89,210                        ; mulps         %xmm10,%xmm2
1766  DB  72,173                              ; lods          %ds:(%rsi),%rax
1767  DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
1768  DB  255,224                             ; jmpq          *%rax
1769
1770PUBLIC _sk_from_srgb_sse41
1771_sk_from_srgb_sse41 LABEL PROC
1772  DB  72,173                              ; lods          %ds:(%rsi),%rax
1773  DB  243,68,15,16,90,64                  ; movss         0x40(%rdx),%xmm11
1774  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
1775  DB  69,15,40,211                        ; movaps        %xmm11,%xmm10
1776  DB  68,15,89,208                        ; mulps         %xmm0,%xmm10
1777  DB  68,15,40,240                        ; movaps        %xmm0,%xmm14
1778  DB  69,15,89,246                        ; mulps         %xmm14,%xmm14
1779  DB  243,68,15,16,66,60                  ; movss         0x3c(%rdx),%xmm8
1780  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
1781  DB  243,68,15,16,98,52                  ; movss         0x34(%rdx),%xmm12
1782  DB  243,68,15,16,106,56                 ; movss         0x38(%rdx),%xmm13
1783  DB  69,15,198,237,0                     ; shufps        $0x0,%xmm13,%xmm13
1784  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
1785  DB  68,15,89,200                        ; mulps         %xmm0,%xmm9
1786  DB  69,15,88,205                        ; addps         %xmm13,%xmm9
1787  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
1788  DB  69,15,89,206                        ; mulps         %xmm14,%xmm9
1789  DB  69,15,88,204                        ; addps         %xmm12,%xmm9
1790  DB  243,68,15,16,114,68                 ; movss         0x44(%rdx),%xmm14
1791  DB  69,15,198,246,0                     ; shufps        $0x0,%xmm14,%xmm14
1792  DB  65,15,194,198,1                     ; cmpltps       %xmm14,%xmm0
1793  DB  102,69,15,56,20,202                 ; blendvps      %xmm0,%xmm10,%xmm9
1794  DB  69,15,40,251                        ; movaps        %xmm11,%xmm15
1795  DB  68,15,89,249                        ; mulps         %xmm1,%xmm15
1796  DB  15,40,193                           ; movaps        %xmm1,%xmm0
1797  DB  15,89,192                           ; mulps         %xmm0,%xmm0
1798  DB  69,15,40,208                        ; movaps        %xmm8,%xmm10
1799  DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
1800  DB  69,15,88,213                        ; addps         %xmm13,%xmm10
1801  DB  68,15,89,208                        ; mulps         %xmm0,%xmm10
1802  DB  69,15,88,212                        ; addps         %xmm12,%xmm10
1803  DB  65,15,194,206,1                     ; cmpltps       %xmm14,%xmm1
1804  DB  15,40,193                           ; movaps        %xmm1,%xmm0
1805  DB  102,69,15,56,20,215                 ; blendvps      %xmm0,%xmm15,%xmm10
1806  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
1807  DB  15,40,194                           ; movaps        %xmm2,%xmm0
1808  DB  15,89,192                           ; mulps         %xmm0,%xmm0
1809  DB  68,15,89,194                        ; mulps         %xmm2,%xmm8
1810  DB  69,15,88,197                        ; addps         %xmm13,%xmm8
1811  DB  68,15,89,192                        ; mulps         %xmm0,%xmm8
1812  DB  69,15,88,196                        ; addps         %xmm12,%xmm8
1813  DB  65,15,194,214,1                     ; cmpltps       %xmm14,%xmm2
1814  DB  15,40,194                           ; movaps        %xmm2,%xmm0
1815  DB  102,69,15,56,20,195                 ; blendvps      %xmm0,%xmm11,%xmm8
1816  DB  72,173                              ; lods          %ds:(%rsi),%rax
1817  DB  65,15,40,193                        ; movaps        %xmm9,%xmm0
1818  DB  65,15,40,202                        ; movaps        %xmm10,%xmm1
1819  DB  65,15,40,208                        ; movaps        %xmm8,%xmm2
1820  DB  255,224                             ; jmpq          *%rax
1821
1822PUBLIC _sk_to_srgb_sse41
1823_sk_to_srgb_sse41 LABEL PROC
1824  DB  72,131,236,24                       ; sub           $0x18,%rsp
1825  DB  15,41,60,36                         ; movaps        %xmm7,(%rsp)
1826  DB  15,40,254                           ; movaps        %xmm6,%xmm7
1827  DB  15,40,245                           ; movaps        %xmm5,%xmm6
1828  DB  15,40,236                           ; movaps        %xmm4,%xmm5
1829  DB  15,40,227                           ; movaps        %xmm3,%xmm4
1830  DB  68,15,40,194                        ; movaps        %xmm2,%xmm8
1831  DB  15,40,217                           ; movaps        %xmm1,%xmm3
1832  DB  15,82,208                           ; rsqrtps       %xmm0,%xmm2
1833  DB  68,15,83,202                        ; rcpps         %xmm2,%xmm9
1834  DB  68,15,82,210                        ; rsqrtps       %xmm2,%xmm10
1835  DB  243,15,16,18                        ; movss         (%rdx),%xmm2
1836  DB  243,68,15,16,90,72                  ; movss         0x48(%rdx),%xmm11
1837  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
1838  DB  65,15,40,203                        ; movaps        %xmm11,%xmm1
1839  DB  15,89,200                           ; mulps         %xmm0,%xmm1
1840  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
1841  DB  243,68,15,16,98,76                  ; movss         0x4c(%rdx),%xmm12
1842  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
1843  DB  243,68,15,16,106,80                 ; movss         0x50(%rdx),%xmm13
1844  DB  69,15,198,237,0                     ; shufps        $0x0,%xmm13,%xmm13
1845  DB  243,68,15,16,114,84                 ; movss         0x54(%rdx),%xmm14
1846  DB  69,15,198,246,0                     ; shufps        $0x0,%xmm14,%xmm14
1847  DB  69,15,89,205                        ; mulps         %xmm13,%xmm9
1848  DB  69,15,88,206                        ; addps         %xmm14,%xmm9
1849  DB  69,15,89,212                        ; mulps         %xmm12,%xmm10
1850  DB  69,15,88,209                        ; addps         %xmm9,%xmm10
1851  DB  68,15,40,202                        ; movaps        %xmm2,%xmm9
1852  DB  69,15,93,202                        ; minps         %xmm10,%xmm9
1853  DB  243,68,15,16,122,88                 ; movss         0x58(%rdx),%xmm15
1854  DB  69,15,198,255,0                     ; shufps        $0x0,%xmm15,%xmm15
1855  DB  65,15,194,199,1                     ; cmpltps       %xmm15,%xmm0
1856  DB  102,68,15,56,20,201                 ; blendvps      %xmm0,%xmm1,%xmm9
1857  DB  15,82,195                           ; rsqrtps       %xmm3,%xmm0
1858  DB  15,83,200                           ; rcpps         %xmm0,%xmm1
1859  DB  15,82,192                           ; rsqrtps       %xmm0,%xmm0
1860  DB  65,15,89,205                        ; mulps         %xmm13,%xmm1
1861  DB  65,15,88,206                        ; addps         %xmm14,%xmm1
1862  DB  65,15,89,196                        ; mulps         %xmm12,%xmm0
1863  DB  15,88,193                           ; addps         %xmm1,%xmm0
1864  DB  68,15,40,210                        ; movaps        %xmm2,%xmm10
1865  DB  68,15,93,208                        ; minps         %xmm0,%xmm10
1866  DB  65,15,40,203                        ; movaps        %xmm11,%xmm1
1867  DB  15,89,203                           ; mulps         %xmm3,%xmm1
1868  DB  65,15,194,223,1                     ; cmpltps       %xmm15,%xmm3
1869  DB  15,40,195                           ; movaps        %xmm3,%xmm0
1870  DB  102,68,15,56,20,209                 ; blendvps      %xmm0,%xmm1,%xmm10
1871  DB  65,15,82,192                        ; rsqrtps       %xmm8,%xmm0
1872  DB  15,83,200                           ; rcpps         %xmm0,%xmm1
1873  DB  65,15,89,205                        ; mulps         %xmm13,%xmm1
1874  DB  65,15,88,206                        ; addps         %xmm14,%xmm1
1875  DB  15,82,192                           ; rsqrtps       %xmm0,%xmm0
1876  DB  65,15,89,196                        ; mulps         %xmm12,%xmm0
1877  DB  15,88,193                           ; addps         %xmm1,%xmm0
1878  DB  15,93,208                           ; minps         %xmm0,%xmm2
1879  DB  69,15,89,216                        ; mulps         %xmm8,%xmm11
1880  DB  69,15,194,199,1                     ; cmpltps       %xmm15,%xmm8
1881  DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
1882  DB  102,65,15,56,20,211                 ; blendvps      %xmm0,%xmm11,%xmm2
1883  DB  72,173                              ; lods          %ds:(%rsi),%rax
1884  DB  72,173                              ; lods          %ds:(%rsi),%rax
1885  DB  65,15,40,193                        ; movaps        %xmm9,%xmm0
1886  DB  65,15,40,202                        ; movaps        %xmm10,%xmm1
1887  DB  15,40,220                           ; movaps        %xmm4,%xmm3
1888  DB  15,40,229                           ; movaps        %xmm5,%xmm4
1889  DB  15,40,238                           ; movaps        %xmm6,%xmm5
1890  DB  15,40,247                           ; movaps        %xmm7,%xmm6
1891  DB  15,40,60,36                         ; movaps        (%rsp),%xmm7
1892  DB  72,131,196,24                       ; add           $0x18,%rsp
1893  DB  255,224                             ; jmpq          *%rax
1894
1895PUBLIC _sk_scale_u8_sse41
1896_sk_scale_u8_sse41 LABEL PROC
1897  DB  72,173                              ; lods          %ds:(%rsi),%rax
1898  DB  72,139,0                            ; mov           (%rax),%rax
1899  DB  102,68,15,56,49,4,56                ; pmovzxbd      (%rax,%rdi,1),%xmm8
1900  DB  69,15,91,192                        ; cvtdq2ps      %xmm8,%xmm8
1901  DB  243,68,15,16,74,12                  ; movss         0xc(%rdx),%xmm9
1902  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
1903  DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
1904  DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
1905  DB  65,15,89,201                        ; mulps         %xmm9,%xmm1
1906  DB  65,15,89,209                        ; mulps         %xmm9,%xmm2
1907  DB  65,15,89,217                        ; mulps         %xmm9,%xmm3
1908  DB  72,173                              ; lods          %ds:(%rsi),%rax
1909  DB  255,224                             ; jmpq          *%rax
1910
1911PUBLIC _sk_lerp_u8_sse41
1912_sk_lerp_u8_sse41 LABEL PROC
1913  DB  72,173                              ; lods          %ds:(%rsi),%rax
1914  DB  72,139,0                            ; mov           (%rax),%rax
1915  DB  102,68,15,56,49,4,56                ; pmovzxbd      (%rax,%rdi,1),%xmm8
1916  DB  69,15,91,192                        ; cvtdq2ps      %xmm8,%xmm8
1917  DB  243,68,15,16,74,12                  ; movss         0xc(%rdx),%xmm9
1918  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
1919  DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
1920  DB  15,92,196                           ; subps         %xmm4,%xmm0
1921  DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
1922  DB  15,88,196                           ; addps         %xmm4,%xmm0
1923  DB  15,92,205                           ; subps         %xmm5,%xmm1
1924  DB  65,15,89,201                        ; mulps         %xmm9,%xmm1
1925  DB  15,88,205                           ; addps         %xmm5,%xmm1
1926  DB  15,92,214                           ; subps         %xmm6,%xmm2
1927  DB  65,15,89,209                        ; mulps         %xmm9,%xmm2
1928  DB  15,88,214                           ; addps         %xmm6,%xmm2
1929  DB  15,92,223                           ; subps         %xmm7,%xmm3
1930  DB  65,15,89,217                        ; mulps         %xmm9,%xmm3
1931  DB  15,88,223                           ; addps         %xmm7,%xmm3
1932  DB  72,173                              ; lods          %ds:(%rsi),%rax
1933  DB  255,224                             ; jmpq          *%rax
1934
1935PUBLIC _sk_load_tables_sse41
1936_sk_load_tables_sse41 LABEL PROC
1937  DB  72,173                              ; lods          %ds:(%rsi),%rax
1938  DB  72,139,8                            ; mov           (%rax),%rcx
1939  DB  76,139,64,8                         ; mov           0x8(%rax),%r8
1940  DB  243,68,15,111,4,185                 ; movdqu        (%rcx,%rdi,4),%xmm8
1941  DB  102,15,110,66,16                    ; movd          0x10(%rdx),%xmm0
1942  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
1943  DB  102,65,15,111,200                   ; movdqa        %xmm8,%xmm1
1944  DB  102,15,114,209,8                    ; psrld         $0x8,%xmm1
1945  DB  102,15,219,200                      ; pand          %xmm0,%xmm1
1946  DB  102,65,15,111,208                   ; movdqa        %xmm8,%xmm2
1947  DB  102,15,114,210,16                   ; psrld         $0x10,%xmm2
1948  DB  102,15,219,208                      ; pand          %xmm0,%xmm2
1949  DB  102,65,15,219,192                   ; pand          %xmm8,%xmm0
1950  DB  102,72,15,58,22,193,1               ; pextrq        $0x1,%xmm0,%rcx
1951  DB  65,137,201                          ; mov           %ecx,%r9d
1952  DB  72,193,233,32                       ; shr           $0x20,%rcx
1953  DB  102,73,15,126,194                   ; movq          %xmm0,%r10
1954  DB  69,137,211                          ; mov           %r10d,%r11d
1955  DB  73,193,234,32                       ; shr           $0x20,%r10
1956  DB  243,67,15,16,4,152                  ; movss         (%r8,%r11,4),%xmm0
1957  DB  102,67,15,58,33,4,144,16            ; insertps      $0x10,(%r8,%r10,4),%xmm0
1958  DB  102,67,15,58,33,4,136,32            ; insertps      $0x20,(%r8,%r9,4),%xmm0
1959  DB  102,65,15,58,33,4,136,48            ; insertps      $0x30,(%r8,%rcx,4),%xmm0
1960  DB  72,139,72,16                        ; mov           0x10(%rax),%rcx
1961  DB  102,73,15,58,22,200,1               ; pextrq        $0x1,%xmm1,%r8
1962  DB  69,137,193                          ; mov           %r8d,%r9d
1963  DB  73,193,232,32                       ; shr           $0x20,%r8
1964  DB  102,73,15,126,202                   ; movq          %xmm1,%r10
1965  DB  69,137,211                          ; mov           %r10d,%r11d
1966  DB  73,193,234,32                       ; shr           $0x20,%r10
1967  DB  243,66,15,16,12,153                 ; movss         (%rcx,%r11,4),%xmm1
1968  DB  102,66,15,58,33,12,145,16           ; insertps      $0x10,(%rcx,%r10,4),%xmm1
1969  DB  243,66,15,16,28,137                 ; movss         (%rcx,%r9,4),%xmm3
1970  DB  102,15,58,33,203,32                 ; insertps      $0x20,%xmm3,%xmm1
1971  DB  243,66,15,16,28,129                 ; movss         (%rcx,%r8,4),%xmm3
1972  DB  102,15,58,33,203,48                 ; insertps      $0x30,%xmm3,%xmm1
1973  DB  72,139,64,24                        ; mov           0x18(%rax),%rax
1974  DB  102,72,15,58,22,209,1               ; pextrq        $0x1,%xmm2,%rcx
1975  DB  65,137,200                          ; mov           %ecx,%r8d
1976  DB  72,193,233,32                       ; shr           $0x20,%rcx
1977  DB  102,73,15,126,209                   ; movq          %xmm2,%r9
1978  DB  69,137,202                          ; mov           %r9d,%r10d
1979  DB  73,193,233,32                       ; shr           $0x20,%r9
1980  DB  243,66,15,16,20,144                 ; movss         (%rax,%r10,4),%xmm2
1981  DB  102,66,15,58,33,20,136,16           ; insertps      $0x10,(%rax,%r9,4),%xmm2
1982  DB  243,66,15,16,28,128                 ; movss         (%rax,%r8,4),%xmm3
1983  DB  102,15,58,33,211,32                 ; insertps      $0x20,%xmm3,%xmm2
1984  DB  243,15,16,28,136                    ; movss         (%rax,%rcx,4),%xmm3
1985  DB  102,15,58,33,211,48                 ; insertps      $0x30,%xmm3,%xmm2
1986  DB  102,65,15,114,208,24                ; psrld         $0x18,%xmm8
1987  DB  69,15,91,192                        ; cvtdq2ps      %xmm8,%xmm8
1988  DB  243,15,16,90,12                     ; movss         0xc(%rdx),%xmm3
1989  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
1990  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
1991  DB  72,173                              ; lods          %ds:(%rsi),%rax
1992  DB  255,224                             ; jmpq          *%rax
1993
1994PUBLIC _sk_load_565_sse41
1995_sk_load_565_sse41 LABEL PROC
1996  DB  72,173                              ; lods          %ds:(%rsi),%rax
1997  DB  72,139,0                            ; mov           (%rax),%rax
1998  DB  102,68,15,56,51,12,120              ; pmovzxwd      (%rax,%rdi,2),%xmm9
1999  DB  102,15,110,66,104                   ; movd          0x68(%rdx),%xmm0
2000  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
2001  DB  102,65,15,219,193                   ; pand          %xmm9,%xmm0
2002  DB  15,91,200                           ; cvtdq2ps      %xmm0,%xmm1
2003  DB  243,15,16,26                        ; movss         (%rdx),%xmm3
2004  DB  243,15,16,66,116                    ; movss         0x74(%rdx),%xmm0
2005  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
2006  DB  15,89,193                           ; mulps         %xmm1,%xmm0
2007  DB  102,15,110,74,108                   ; movd          0x6c(%rdx),%xmm1
2008  DB  102,15,112,201,0                    ; pshufd        $0x0,%xmm1,%xmm1
2009  DB  102,65,15,219,201                   ; pand          %xmm9,%xmm1
2010  DB  68,15,91,193                        ; cvtdq2ps      %xmm1,%xmm8
2011  DB  243,15,16,74,120                    ; movss         0x78(%rdx),%xmm1
2012  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
2013  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
2014  DB  102,15,110,82,112                   ; movd          0x70(%rdx),%xmm2
2015  DB  102,15,112,210,0                    ; pshufd        $0x0,%xmm2,%xmm2
2016  DB  102,65,15,219,209                   ; pand          %xmm9,%xmm2
2017  DB  68,15,91,194                        ; cvtdq2ps      %xmm2,%xmm8
2018  DB  243,15,16,82,124                    ; movss         0x7c(%rdx),%xmm2
2019  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
2020  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
2021  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
2022  DB  72,173                              ; lods          %ds:(%rsi),%rax
2023  DB  255,224                             ; jmpq          *%rax
2024
2025PUBLIC _sk_store_565_sse41
2026_sk_store_565_sse41 LABEL PROC
2027  DB  72,173                              ; lods          %ds:(%rsi),%rax
2028  DB  72,139,0                            ; mov           (%rax),%rax
2029  DB  243,68,15,16,130,128,0,0,0          ; movss         0x80(%rdx),%xmm8
2030  DB  243,68,15,16,138,132,0,0,0          ; movss         0x84(%rdx),%xmm9
2031  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
2032  DB  69,15,40,208                        ; movaps        %xmm8,%xmm10
2033  DB  68,15,89,208                        ; mulps         %xmm0,%xmm10
2034  DB  102,69,15,91,210                    ; cvtps2dq      %xmm10,%xmm10
2035  DB  102,65,15,114,242,11                ; pslld         $0xb,%xmm10
2036  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
2037  DB  68,15,89,201                        ; mulps         %xmm1,%xmm9
2038  DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
2039  DB  102,65,15,114,241,5                 ; pslld         $0x5,%xmm9
2040  DB  102,69,15,235,202                   ; por           %xmm10,%xmm9
2041  DB  68,15,89,194                        ; mulps         %xmm2,%xmm8
2042  DB  102,69,15,91,192                    ; cvtps2dq      %xmm8,%xmm8
2043  DB  102,69,15,86,193                    ; orpd          %xmm9,%xmm8
2044  DB  102,69,15,56,43,192                 ; packusdw      %xmm8,%xmm8
2045  DB  102,68,15,214,4,120                 ; movq          %xmm8,(%rax,%rdi,2)
2046  DB  72,173                              ; lods          %ds:(%rsi),%rax
2047  DB  255,224                             ; jmpq          *%rax
2048
2049PUBLIC _sk_load_8888_sse41
2050_sk_load_8888_sse41 LABEL PROC
2051  DB  72,173                              ; lods          %ds:(%rsi),%rax
2052  DB  72,139,0                            ; mov           (%rax),%rax
2053  DB  243,15,111,28,184                   ; movdqu        (%rax,%rdi,4),%xmm3
2054  DB  102,15,110,66,16                    ; movd          0x10(%rdx),%xmm0
2055  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
2056  DB  102,15,111,203                      ; movdqa        %xmm3,%xmm1
2057  DB  102,15,114,209,8                    ; psrld         $0x8,%xmm1
2058  DB  102,15,219,200                      ; pand          %xmm0,%xmm1
2059  DB  102,15,111,211                      ; movdqa        %xmm3,%xmm2
2060  DB  102,15,114,210,16                   ; psrld         $0x10,%xmm2
2061  DB  102,15,219,208                      ; pand          %xmm0,%xmm2
2062  DB  102,15,219,195                      ; pand          %xmm3,%xmm0
2063  DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
2064  DB  243,68,15,16,66,12                  ; movss         0xc(%rdx),%xmm8
2065  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
2066  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
2067  DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
2068  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
2069  DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
2070  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
2071  DB  102,15,114,211,24                   ; psrld         $0x18,%xmm3
2072  DB  15,91,219                           ; cvtdq2ps      %xmm3,%xmm3
2073  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
2074  DB  72,173                              ; lods          %ds:(%rsi),%rax
2075  DB  255,224                             ; jmpq          *%rax
2076
2077PUBLIC _sk_store_8888_sse41
2078_sk_store_8888_sse41 LABEL PROC
2079  DB  72,173                              ; lods          %ds:(%rsi),%rax
2080  DB  72,139,0                            ; mov           (%rax),%rax
2081  DB  243,68,15,16,66,8                   ; movss         0x8(%rdx),%xmm8
2082  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
2083  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
2084  DB  68,15,89,200                        ; mulps         %xmm0,%xmm9
2085  DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
2086  DB  69,15,40,208                        ; movaps        %xmm8,%xmm10
2087  DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
2088  DB  102,69,15,91,210                    ; cvtps2dq      %xmm10,%xmm10
2089  DB  102,65,15,114,242,8                 ; pslld         $0x8,%xmm10
2090  DB  102,69,15,235,209                   ; por           %xmm9,%xmm10
2091  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
2092  DB  68,15,89,202                        ; mulps         %xmm2,%xmm9
2093  DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
2094  DB  102,65,15,114,241,16                ; pslld         $0x10,%xmm9
2095  DB  68,15,89,195                        ; mulps         %xmm3,%xmm8
2096  DB  102,69,15,91,192                    ; cvtps2dq      %xmm8,%xmm8
2097  DB  102,65,15,114,240,24                ; pslld         $0x18,%xmm8
2098  DB  102,69,15,235,193                   ; por           %xmm9,%xmm8
2099  DB  102,69,15,235,194                   ; por           %xmm10,%xmm8
2100  DB  243,68,15,127,4,184                 ; movdqu        %xmm8,(%rax,%rdi,4)
2101  DB  72,173                              ; lods          %ds:(%rsi),%rax
2102  DB  255,224                             ; jmpq          *%rax
2103
2104PUBLIC _sk_load_f16_sse41
2105_sk_load_f16_sse41 LABEL PROC
2106  DB  72,173                              ; lods          %ds:(%rsi),%rax
2107  DB  72,139,0                            ; mov           (%rax),%rax
2108  DB  243,15,111,4,248                    ; movdqu        (%rax,%rdi,8),%xmm0
2109  DB  243,15,111,76,248,16                ; movdqu        0x10(%rax,%rdi,8),%xmm1
2110  DB  102,15,111,208                      ; movdqa        %xmm0,%xmm2
2111  DB  102,15,97,209                       ; punpcklwd     %xmm1,%xmm2
2112  DB  102,15,105,193                      ; punpckhwd     %xmm1,%xmm0
2113  DB  102,68,15,111,194                   ; movdqa        %xmm2,%xmm8
2114  DB  102,68,15,97,192                    ; punpcklwd     %xmm0,%xmm8
2115  DB  102,15,105,208                      ; punpckhwd     %xmm0,%xmm2
2116  DB  102,15,110,66,100                   ; movd          0x64(%rdx),%xmm0
2117  DB  102,15,112,216,0                    ; pshufd        $0x0,%xmm0,%xmm3
2118  DB  102,15,111,203                      ; movdqa        %xmm3,%xmm1
2119  DB  102,65,15,101,200                   ; pcmpgtw       %xmm8,%xmm1
2120  DB  102,65,15,223,200                   ; pandn         %xmm8,%xmm1
2121  DB  102,15,101,218                      ; pcmpgtw       %xmm2,%xmm3
2122  DB  102,15,223,218                      ; pandn         %xmm2,%xmm3
2123  DB  102,15,56,51,193                    ; pmovzxwd      %xmm1,%xmm0
2124  DB  102,15,114,240,13                   ; pslld         $0xd,%xmm0
2125  DB  102,15,110,82,92                    ; movd          0x5c(%rdx),%xmm2
2126  DB  102,68,15,112,194,0                 ; pshufd        $0x0,%xmm2,%xmm8
2127  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
2128  DB  102,69,15,239,201                   ; pxor          %xmm9,%xmm9
2129  DB  102,65,15,105,201                   ; punpckhwd     %xmm9,%xmm1
2130  DB  102,15,114,241,13                   ; pslld         $0xd,%xmm1
2131  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
2132  DB  102,15,56,51,211                    ; pmovzxwd      %xmm3,%xmm2
2133  DB  102,15,114,242,13                   ; pslld         $0xd,%xmm2
2134  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
2135  DB  102,65,15,105,217                   ; punpckhwd     %xmm9,%xmm3
2136  DB  102,15,114,243,13                   ; pslld         $0xd,%xmm3
2137  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
2138  DB  72,173                              ; lods          %ds:(%rsi),%rax
2139  DB  255,224                             ; jmpq          *%rax
2140
2141PUBLIC _sk_store_f16_sse41
2142_sk_store_f16_sse41 LABEL PROC
2143  DB  72,173                              ; lods          %ds:(%rsi),%rax
2144  DB  72,139,0                            ; mov           (%rax),%rax
2145  DB  102,68,15,110,66,96                 ; movd          0x60(%rdx),%xmm8
2146  DB  102,69,15,112,192,0                 ; pshufd        $0x0,%xmm8,%xmm8
2147  DB  102,69,15,111,200                   ; movdqa        %xmm8,%xmm9
2148  DB  68,15,89,200                        ; mulps         %xmm0,%xmm9
2149  DB  102,65,15,114,209,13                ; psrld         $0xd,%xmm9
2150  DB  102,69,15,111,208                   ; movdqa        %xmm8,%xmm10
2151  DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
2152  DB  102,65,15,114,210,13                ; psrld         $0xd,%xmm10
2153  DB  102,69,15,111,216                   ; movdqa        %xmm8,%xmm11
2154  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
2155  DB  102,65,15,114,211,13                ; psrld         $0xd,%xmm11
2156  DB  68,15,89,195                        ; mulps         %xmm3,%xmm8
2157  DB  102,65,15,114,208,13                ; psrld         $0xd,%xmm8
2158  DB  102,65,15,115,250,2                 ; pslldq        $0x2,%xmm10
2159  DB  102,69,15,235,209                   ; por           %xmm9,%xmm10
2160  DB  102,65,15,115,248,2                 ; pslldq        $0x2,%xmm8
2161  DB  102,69,15,235,195                   ; por           %xmm11,%xmm8
2162  DB  102,69,15,111,202                   ; movdqa        %xmm10,%xmm9
2163  DB  102,69,15,98,200                    ; punpckldq     %xmm8,%xmm9
2164  DB  243,68,15,127,12,248                ; movdqu        %xmm9,(%rax,%rdi,8)
2165  DB  102,69,15,106,208                   ; punpckhdq     %xmm8,%xmm10
2166  DB  243,68,15,127,84,248,16             ; movdqu        %xmm10,0x10(%rax,%rdi,8)
2167  DB  72,173                              ; lods          %ds:(%rsi),%rax
2168  DB  255,224                             ; jmpq          *%rax
2169
2170PUBLIC _sk_clamp_x_sse41
2171_sk_clamp_x_sse41 LABEL PROC
2172  DB  72,173                              ; lods          %ds:(%rsi),%rax
2173  DB  243,68,15,16,0                      ; movss         (%rax),%xmm8
2174  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
2175  DB  102,69,15,118,201                   ; pcmpeqd       %xmm9,%xmm9
2176  DB  102,69,15,254,200                   ; paddd         %xmm8,%xmm9
2177  DB  65,15,93,193                        ; minps         %xmm9,%xmm0
2178  DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
2179  DB  68,15,95,192                        ; maxps         %xmm0,%xmm8
2180  DB  72,173                              ; lods          %ds:(%rsi),%rax
2181  DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
2182  DB  255,224                             ; jmpq          *%rax
2183
2184PUBLIC _sk_clamp_y_sse41
2185_sk_clamp_y_sse41 LABEL PROC
2186  DB  72,173                              ; lods          %ds:(%rsi),%rax
2187  DB  243,68,15,16,0                      ; movss         (%rax),%xmm8
2188  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
2189  DB  102,69,15,118,201                   ; pcmpeqd       %xmm9,%xmm9
2190  DB  102,69,15,254,200                   ; paddd         %xmm8,%xmm9
2191  DB  65,15,93,201                        ; minps         %xmm9,%xmm1
2192  DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
2193  DB  68,15,95,193                        ; maxps         %xmm1,%xmm8
2194  DB  72,173                              ; lods          %ds:(%rsi),%rax
2195  DB  65,15,40,200                        ; movaps        %xmm8,%xmm1
2196  DB  255,224                             ; jmpq          *%rax
2197
2198PUBLIC _sk_matrix_2x3_sse41
2199_sk_matrix_2x3_sse41 LABEL PROC
2200  DB  68,15,40,201                        ; movaps        %xmm1,%xmm9
2201  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
2202  DB  72,173                              ; lods          %ds:(%rsi),%rax
2203  DB  243,15,16,0                         ; movss         (%rax),%xmm0
2204  DB  243,15,16,72,4                      ; movss         0x4(%rax),%xmm1
2205  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
2206  DB  243,68,15,16,80,8                   ; movss         0x8(%rax),%xmm10
2207  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
2208  DB  243,68,15,16,88,16                  ; movss         0x10(%rax),%xmm11
2209  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
2210  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
2211  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
2212  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
2213  DB  65,15,88,194                        ; addps         %xmm10,%xmm0
2214  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
2215  DB  243,68,15,16,80,12                  ; movss         0xc(%rax),%xmm10
2216  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
2217  DB  243,68,15,16,88,20                  ; movss         0x14(%rax),%xmm11
2218  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
2219  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
2220  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
2221  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
2222  DB  65,15,88,202                        ; addps         %xmm10,%xmm1
2223  DB  72,173                              ; lods          %ds:(%rsi),%rax
2224  DB  255,224                             ; jmpq          *%rax
2225
2226PUBLIC _sk_matrix_3x4_sse41
2227_sk_matrix_3x4_sse41 LABEL PROC
2228  DB  68,15,40,201                        ; movaps        %xmm1,%xmm9
2229  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
2230  DB  72,173                              ; lods          %ds:(%rsi),%rax
2231  DB  243,15,16,0                         ; movss         (%rax),%xmm0
2232  DB  243,15,16,72,4                      ; movss         0x4(%rax),%xmm1
2233  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
2234  DB  243,68,15,16,80,12                  ; movss         0xc(%rax),%xmm10
2235  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
2236  DB  243,68,15,16,88,24                  ; movss         0x18(%rax),%xmm11
2237  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
2238  DB  243,68,15,16,96,36                  ; movss         0x24(%rax),%xmm12
2239  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
2240  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
2241  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
2242  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
2243  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
2244  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
2245  DB  65,15,88,194                        ; addps         %xmm10,%xmm0
2246  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
2247  DB  243,68,15,16,80,16                  ; movss         0x10(%rax),%xmm10
2248  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
2249  DB  243,68,15,16,88,28                  ; movss         0x1c(%rax),%xmm11
2250  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
2251  DB  243,68,15,16,96,40                  ; movss         0x28(%rax),%xmm12
2252  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
2253  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
2254  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
2255  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
2256  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
2257  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
2258  DB  65,15,88,202                        ; addps         %xmm10,%xmm1
2259  DB  243,68,15,16,80,8                   ; movss         0x8(%rax),%xmm10
2260  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
2261  DB  243,68,15,16,88,20                  ; movss         0x14(%rax),%xmm11
2262  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
2263  DB  243,68,15,16,96,32                  ; movss         0x20(%rax),%xmm12
2264  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
2265  DB  243,68,15,16,104,44                 ; movss         0x2c(%rax),%xmm13
2266  DB  69,15,198,237,0                     ; shufps        $0x0,%xmm13,%xmm13
2267  DB  68,15,89,226                        ; mulps         %xmm2,%xmm12
2268  DB  69,15,88,229                        ; addps         %xmm13,%xmm12
2269  DB  69,15,89,217                        ; mulps         %xmm9,%xmm11
2270  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
2271  DB  69,15,89,208                        ; mulps         %xmm8,%xmm10
2272  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
2273  DB  72,173                              ; lods          %ds:(%rsi),%rax
2274  DB  65,15,40,210                        ; movaps        %xmm10,%xmm2
2275  DB  255,224                             ; jmpq          *%rax
2276
2277PUBLIC _sk_linear_gradient_2stops_sse41
2278_sk_linear_gradient_2stops_sse41 LABEL PROC
2279  DB  72,173                              ; lods          %ds:(%rsi),%rax
2280  DB  68,15,16,8                          ; movups        (%rax),%xmm9
2281  DB  15,16,88,16                         ; movups        0x10(%rax),%xmm3
2282  DB  68,15,40,195                        ; movaps        %xmm3,%xmm8
2283  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
2284  DB  65,15,40,201                        ; movaps        %xmm9,%xmm1
2285  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
2286  DB  68,15,89,192                        ; mulps         %xmm0,%xmm8
2287  DB  68,15,88,193                        ; addps         %xmm1,%xmm8
2288  DB  15,40,203                           ; movaps        %xmm3,%xmm1
2289  DB  15,198,201,85                       ; shufps        $0x55,%xmm1,%xmm1
2290  DB  65,15,40,209                        ; movaps        %xmm9,%xmm2
2291  DB  15,198,210,85                       ; shufps        $0x55,%xmm2,%xmm2
2292  DB  15,89,200                           ; mulps         %xmm0,%xmm1
2293  DB  15,88,202                           ; addps         %xmm2,%xmm1
2294  DB  15,40,211                           ; movaps        %xmm3,%xmm2
2295  DB  15,198,210,170                      ; shufps        $0xaa,%xmm2,%xmm2
2296  DB  69,15,40,209                        ; movaps        %xmm9,%xmm10
2297  DB  69,15,198,210,170                   ; shufps        $0xaa,%xmm10,%xmm10
2298  DB  15,89,208                           ; mulps         %xmm0,%xmm2
2299  DB  65,15,88,210                        ; addps         %xmm10,%xmm2
2300  DB  15,198,219,255                      ; shufps        $0xff,%xmm3,%xmm3
2301  DB  69,15,198,201,255                   ; shufps        $0xff,%xmm9,%xmm9
2302  DB  15,89,216                           ; mulps         %xmm0,%xmm3
2303  DB  65,15,88,217                        ; addps         %xmm9,%xmm3
2304  DB  72,173                              ; lods          %ds:(%rsi),%rax
2305  DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
2306  DB  255,224                             ; jmpq          *%rax
2307
2308PUBLIC _sk_start_pipeline_sse2
2309_sk_start_pipeline_sse2 LABEL PROC
2310  DB  65,87                               ; push          %r15
2311  DB  65,86                               ; push          %r14
2312  DB  65,85                               ; push          %r13
2313  DB  65,84                               ; push          %r12
2314  DB  86                                  ; push          %rsi
2315  DB  87                                  ; push          %rdi
2316  DB  83                                  ; push          %rbx
2317  DB  72,129,236,160,0,0,0                ; sub           $0xa0,%rsp
2318  DB  68,15,41,188,36,144,0,0,0           ; movaps        %xmm15,0x90(%rsp)
2319  DB  68,15,41,180,36,128,0,0,0           ; movaps        %xmm14,0x80(%rsp)
2320  DB  68,15,41,108,36,112                 ; movaps        %xmm13,0x70(%rsp)
2321  DB  68,15,41,100,36,96                  ; movaps        %xmm12,0x60(%rsp)
2322  DB  68,15,41,92,36,80                   ; movaps        %xmm11,0x50(%rsp)
2323  DB  68,15,41,84,36,64                   ; movaps        %xmm10,0x40(%rsp)
2324  DB  68,15,41,76,36,48                   ; movaps        %xmm9,0x30(%rsp)
2325  DB  68,15,41,68,36,32                   ; movaps        %xmm8,0x20(%rsp)
2326  DB  15,41,124,36,16                     ; movaps        %xmm7,0x10(%rsp)
2327  DB  15,41,52,36                         ; movaps        %xmm6,(%rsp)
2328  DB  77,137,207                          ; mov           %r9,%r15
2329  DB  77,137,198                          ; mov           %r8,%r14
2330  DB  72,137,203                          ; mov           %rcx,%rbx
2331  DB  72,137,214                          ; mov           %rdx,%rsi
2332  DB  72,173                              ; lods          %ds:(%rsi),%rax
2333  DB  73,137,196                          ; mov           %rax,%r12
2334  DB  73,137,245                          ; mov           %rsi,%r13
2335  DB  72,141,67,4                         ; lea           0x4(%rbx),%rax
2336  DB  76,57,248                           ; cmp           %r15,%rax
2337  DB  118,5                               ; jbe           73 <_sk_start_pipeline_sse2+0x73>
2338  DB  72,137,216                          ; mov           %rbx,%rax
2339  DB  235,52                              ; jmp           a7 <_sk_start_pipeline_sse2+0xa7>
2340  DB  15,87,192                           ; xorps         %xmm0,%xmm0
2341  DB  15,87,201                           ; xorps         %xmm1,%xmm1
2342  DB  15,87,210                           ; xorps         %xmm2,%xmm2
2343  DB  15,87,219                           ; xorps         %xmm3,%xmm3
2344  DB  15,87,228                           ; xorps         %xmm4,%xmm4
2345  DB  15,87,237                           ; xorps         %xmm5,%xmm5
2346  DB  15,87,246                           ; xorps         %xmm6,%xmm6
2347  DB  15,87,255                           ; xorps         %xmm7,%xmm7
2348  DB  72,137,223                          ; mov           %rbx,%rdi
2349  DB  76,137,238                          ; mov           %r13,%rsi
2350  DB  76,137,242                          ; mov           %r14,%rdx
2351  DB  65,255,212                          ; callq         *%r12
2352  DB  72,141,67,4                         ; lea           0x4(%rbx),%rax
2353  DB  72,131,195,8                        ; add           $0x8,%rbx
2354  DB  76,57,251                           ; cmp           %r15,%rbx
2355  DB  72,137,195                          ; mov           %rax,%rbx
2356  DB  118,204                             ; jbe           73 <_sk_start_pipeline_sse2+0x73>
2357  DB  15,40,52,36                         ; movaps        (%rsp),%xmm6
2358  DB  15,40,124,36,16                     ; movaps        0x10(%rsp),%xmm7
2359  DB  68,15,40,68,36,32                   ; movaps        0x20(%rsp),%xmm8
2360  DB  68,15,40,76,36,48                   ; movaps        0x30(%rsp),%xmm9
2361  DB  68,15,40,84,36,64                   ; movaps        0x40(%rsp),%xmm10
2362  DB  68,15,40,92,36,80                   ; movaps        0x50(%rsp),%xmm11
2363  DB  68,15,40,100,36,96                  ; movaps        0x60(%rsp),%xmm12
2364  DB  68,15,40,108,36,112                 ; movaps        0x70(%rsp),%xmm13
2365  DB  68,15,40,180,36,128,0,0,0           ; movaps        0x80(%rsp),%xmm14
2366  DB  68,15,40,188,36,144,0,0,0           ; movaps        0x90(%rsp),%xmm15
2367  DB  72,129,196,160,0,0,0                ; add           $0xa0,%rsp
2368  DB  91                                  ; pop           %rbx
2369  DB  95                                  ; pop           %rdi
2370  DB  94                                  ; pop           %rsi
2371  DB  65,92                               ; pop           %r12
2372  DB  65,93                               ; pop           %r13
2373  DB  65,94                               ; pop           %r14
2374  DB  65,95                               ; pop           %r15
2375  DB  195                                 ; retq
2376
2377PUBLIC _sk_just_return_sse2
2378_sk_just_return_sse2 LABEL PROC
2379  DB  195                                 ; retq
2380
2381PUBLIC _sk_seed_shader_sse2
2382_sk_seed_shader_sse2 LABEL PROC
2383  DB  72,173                              ; lods          %ds:(%rsi),%rax
2384  DB  102,15,110,199                      ; movd          %edi,%xmm0
2385  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
2386  DB  15,91,200                           ; cvtdq2ps      %xmm0,%xmm1
2387  DB  243,15,16,18                        ; movss         (%rdx),%xmm2
2388  DB  243,15,16,90,4                      ; movss         0x4(%rdx),%xmm3
2389  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
2390  DB  15,88,203                           ; addps         %xmm3,%xmm1
2391  DB  15,16,66,20                         ; movups        0x14(%rdx),%xmm0
2392  DB  15,88,193                           ; addps         %xmm1,%xmm0
2393  DB  102,15,110,8                        ; movd          (%rax),%xmm1
2394  DB  102,15,112,201,0                    ; pshufd        $0x0,%xmm1,%xmm1
2395  DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
2396  DB  15,88,203                           ; addps         %xmm3,%xmm1
2397  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
2398  DB  72,173                              ; lods          %ds:(%rsi),%rax
2399  DB  15,87,219                           ; xorps         %xmm3,%xmm3
2400  DB  15,87,228                           ; xorps         %xmm4,%xmm4
2401  DB  15,87,237                           ; xorps         %xmm5,%xmm5
2402  DB  15,87,246                           ; xorps         %xmm6,%xmm6
2403  DB  15,87,255                           ; xorps         %xmm7,%xmm7
2404  DB  255,224                             ; jmpq          *%rax
2405
2406PUBLIC _sk_constant_color_sse2
2407_sk_constant_color_sse2 LABEL PROC
2408  DB  72,173                              ; lods          %ds:(%rsi),%rax
2409  DB  15,16,24                            ; movups        (%rax),%xmm3
2410  DB  15,40,195                           ; movaps        %xmm3,%xmm0
2411  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
2412  DB  15,40,203                           ; movaps        %xmm3,%xmm1
2413  DB  15,198,201,85                       ; shufps        $0x55,%xmm1,%xmm1
2414  DB  15,40,211                           ; movaps        %xmm3,%xmm2
2415  DB  15,198,210,170                      ; shufps        $0xaa,%xmm2,%xmm2
2416  DB  15,198,219,255                      ; shufps        $0xff,%xmm3,%xmm3
2417  DB  72,173                              ; lods          %ds:(%rsi),%rax
2418  DB  255,224                             ; jmpq          *%rax
2419
2420PUBLIC _sk_clear_sse2
2421_sk_clear_sse2 LABEL PROC
2422  DB  72,173                              ; lods          %ds:(%rsi),%rax
2423  DB  72,173                              ; lods          %ds:(%rsi),%rax
2424  DB  15,87,192                           ; xorps         %xmm0,%xmm0
2425  DB  15,87,201                           ; xorps         %xmm1,%xmm1
2426  DB  15,87,210                           ; xorps         %xmm2,%xmm2
2427  DB  15,87,219                           ; xorps         %xmm3,%xmm3
2428  DB  255,224                             ; jmpq          *%rax
2429
2430PUBLIC _sk_plus__sse2
2431_sk_plus__sse2 LABEL PROC
2432  DB  72,173                              ; lods          %ds:(%rsi),%rax
2433  DB  15,88,196                           ; addps         %xmm4,%xmm0
2434  DB  15,88,205                           ; addps         %xmm5,%xmm1
2435  DB  15,88,214                           ; addps         %xmm6,%xmm2
2436  DB  15,88,223                           ; addps         %xmm7,%xmm3
2437  DB  72,173                              ; lods          %ds:(%rsi),%rax
2438  DB  255,224                             ; jmpq          *%rax
2439
2440PUBLIC _sk_srcover_sse2
2441_sk_srcover_sse2 LABEL PROC
2442  DB  72,173                              ; lods          %ds:(%rsi),%rax
2443  DB  243,68,15,16,2                      ; movss         (%rdx),%xmm8
2444  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
2445  DB  68,15,92,195                        ; subps         %xmm3,%xmm8
2446  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
2447  DB  68,15,89,204                        ; mulps         %xmm4,%xmm9
2448  DB  65,15,88,193                        ; addps         %xmm9,%xmm0
2449  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
2450  DB  68,15,89,205                        ; mulps         %xmm5,%xmm9
2451  DB  65,15,88,201                        ; addps         %xmm9,%xmm1
2452  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
2453  DB  68,15,89,206                        ; mulps         %xmm6,%xmm9
2454  DB  65,15,88,209                        ; addps         %xmm9,%xmm2
2455  DB  68,15,89,199                        ; mulps         %xmm7,%xmm8
2456  DB  65,15,88,216                        ; addps         %xmm8,%xmm3
2457  DB  72,173                              ; lods          %ds:(%rsi),%rax
2458  DB  255,224                             ; jmpq          *%rax
2459
2460PUBLIC _sk_dstover_sse2
2461_sk_dstover_sse2 LABEL PROC
2462  DB  72,173                              ; lods          %ds:(%rsi),%rax
2463  DB  243,68,15,16,2                      ; movss         (%rdx),%xmm8
2464  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
2465  DB  68,15,92,199                        ; subps         %xmm7,%xmm8
2466  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
2467  DB  15,88,196                           ; addps         %xmm4,%xmm0
2468  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
2469  DB  15,88,205                           ; addps         %xmm5,%xmm1
2470  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
2471  DB  15,88,214                           ; addps         %xmm6,%xmm2
2472  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
2473  DB  15,88,223                           ; addps         %xmm7,%xmm3
2474  DB  72,173                              ; lods          %ds:(%rsi),%rax
2475  DB  255,224                             ; jmpq          *%rax
2476
2477PUBLIC _sk_clamp_0_sse2
2478_sk_clamp_0_sse2 LABEL PROC
2479  DB  72,173                              ; lods          %ds:(%rsi),%rax
2480  DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
2481  DB  65,15,95,192                        ; maxps         %xmm8,%xmm0
2482  DB  65,15,95,200                        ; maxps         %xmm8,%xmm1
2483  DB  65,15,95,208                        ; maxps         %xmm8,%xmm2
2484  DB  65,15,95,216                        ; maxps         %xmm8,%xmm3
2485  DB  72,173                              ; lods          %ds:(%rsi),%rax
2486  DB  255,224                             ; jmpq          *%rax
2487
2488PUBLIC _sk_clamp_1_sse2
2489_sk_clamp_1_sse2 LABEL PROC
2490  DB  72,173                              ; lods          %ds:(%rsi),%rax
2491  DB  243,68,15,16,2                      ; movss         (%rdx),%xmm8
2492  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
2493  DB  65,15,93,192                        ; minps         %xmm8,%xmm0
2494  DB  65,15,93,200                        ; minps         %xmm8,%xmm1
2495  DB  65,15,93,208                        ; minps         %xmm8,%xmm2
2496  DB  65,15,93,216                        ; minps         %xmm8,%xmm3
2497  DB  72,173                              ; lods          %ds:(%rsi),%rax
2498  DB  255,224                             ; jmpq          *%rax
2499
2500PUBLIC _sk_clamp_a_sse2
2501_sk_clamp_a_sse2 LABEL PROC
2502  DB  72,173                              ; lods          %ds:(%rsi),%rax
2503  DB  243,68,15,16,2                      ; movss         (%rdx),%xmm8
2504  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
2505  DB  65,15,93,216                        ; minps         %xmm8,%xmm3
2506  DB  15,93,195                           ; minps         %xmm3,%xmm0
2507  DB  15,93,203                           ; minps         %xmm3,%xmm1
2508  DB  15,93,211                           ; minps         %xmm3,%xmm2
2509  DB  72,173                              ; lods          %ds:(%rsi),%rax
2510  DB  255,224                             ; jmpq          *%rax
2511
2512PUBLIC _sk_set_rgb_sse2
2513_sk_set_rgb_sse2 LABEL PROC
2514  DB  72,173                              ; lods          %ds:(%rsi),%rax
2515  DB  243,15,16,0                         ; movss         (%rax),%xmm0
2516  DB  243,15,16,72,4                      ; movss         0x4(%rax),%xmm1
2517  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
2518  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
2519  DB  243,15,16,80,8                      ; movss         0x8(%rax),%xmm2
2520  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
2521  DB  72,173                              ; lods          %ds:(%rsi),%rax
2522  DB  255,224                             ; jmpq          *%rax
2523
2524PUBLIC _sk_swap_rb_sse2
2525_sk_swap_rb_sse2 LABEL PROC
2526  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
2527  DB  72,173                              ; lods          %ds:(%rsi),%rax
2528  DB  72,173                              ; lods          %ds:(%rsi),%rax
2529  DB  15,40,194                           ; movaps        %xmm2,%xmm0
2530  DB  65,15,40,208                        ; movaps        %xmm8,%xmm2
2531  DB  255,224                             ; jmpq          *%rax
2532
2533PUBLIC _sk_swap_sse2
2534_sk_swap_sse2 LABEL PROC
2535  DB  68,15,40,195                        ; movaps        %xmm3,%xmm8
2536  DB  68,15,40,202                        ; movaps        %xmm2,%xmm9
2537  DB  68,15,40,209                        ; movaps        %xmm1,%xmm10
2538  DB  68,15,40,216                        ; movaps        %xmm0,%xmm11
2539  DB  72,173                              ; lods          %ds:(%rsi),%rax
2540  DB  72,173                              ; lods          %ds:(%rsi),%rax
2541  DB  15,40,196                           ; movaps        %xmm4,%xmm0
2542  DB  15,40,205                           ; movaps        %xmm5,%xmm1
2543  DB  15,40,214                           ; movaps        %xmm6,%xmm2
2544  DB  15,40,223                           ; movaps        %xmm7,%xmm3
2545  DB  65,15,40,227                        ; movaps        %xmm11,%xmm4
2546  DB  65,15,40,234                        ; movaps        %xmm10,%xmm5
2547  DB  65,15,40,241                        ; movaps        %xmm9,%xmm6
2548  DB  65,15,40,248                        ; movaps        %xmm8,%xmm7
2549  DB  255,224                             ; jmpq          *%rax
2550
2551PUBLIC _sk_move_src_dst_sse2
2552_sk_move_src_dst_sse2 LABEL PROC
2553  DB  72,173                              ; lods          %ds:(%rsi),%rax
2554  DB  72,173                              ; lods          %ds:(%rsi),%rax
2555  DB  15,40,224                           ; movaps        %xmm0,%xmm4
2556  DB  15,40,233                           ; movaps        %xmm1,%xmm5
2557  DB  15,40,242                           ; movaps        %xmm2,%xmm6
2558  DB  15,40,251                           ; movaps        %xmm3,%xmm7
2559  DB  255,224                             ; jmpq          *%rax
2560
2561PUBLIC _sk_move_dst_src_sse2
2562_sk_move_dst_src_sse2 LABEL PROC
2563  DB  72,173                              ; lods          %ds:(%rsi),%rax
2564  DB  72,173                              ; lods          %ds:(%rsi),%rax
2565  DB  15,40,196                           ; movaps        %xmm4,%xmm0
2566  DB  15,40,205                           ; movaps        %xmm5,%xmm1
2567  DB  15,40,214                           ; movaps        %xmm6,%xmm2
2568  DB  15,40,223                           ; movaps        %xmm7,%xmm3
2569  DB  255,224                             ; jmpq          *%rax
2570
2571PUBLIC _sk_premul_sse2
2572_sk_premul_sse2 LABEL PROC
2573  DB  72,173                              ; lods          %ds:(%rsi),%rax
2574  DB  15,89,195                           ; mulps         %xmm3,%xmm0
2575  DB  15,89,203                           ; mulps         %xmm3,%xmm1
2576  DB  15,89,211                           ; mulps         %xmm3,%xmm2
2577  DB  72,173                              ; lods          %ds:(%rsi),%rax
2578  DB  255,224                             ; jmpq          *%rax
2579
2580PUBLIC _sk_unpremul_sse2
2581_sk_unpremul_sse2 LABEL PROC
2582  DB  72,173                              ; lods          %ds:(%rsi),%rax
2583  DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
2584  DB  68,15,194,195,0                     ; cmpeqps       %xmm3,%xmm8
2585  DB  243,68,15,16,10                     ; movss         (%rdx),%xmm9
2586  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
2587  DB  68,15,94,203                        ; divps         %xmm3,%xmm9
2588  DB  69,15,85,193                        ; andnps        %xmm9,%xmm8
2589  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
2590  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
2591  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
2592  DB  72,173                              ; lods          %ds:(%rsi),%rax
2593  DB  255,224                             ; jmpq          *%rax
2594
2595PUBLIC _sk_from_srgb_sse2
2596_sk_from_srgb_sse2 LABEL PROC
2597  DB  72,173                              ; lods          %ds:(%rsi),%rax
2598  DB  243,68,15,16,66,64                  ; movss         0x40(%rdx),%xmm8
2599  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
2600  DB  69,15,40,232                        ; movaps        %xmm8,%xmm13
2601  DB  68,15,89,232                        ; mulps         %xmm0,%xmm13
2602  DB  68,15,40,224                        ; movaps        %xmm0,%xmm12
2603  DB  69,15,89,228                        ; mulps         %xmm12,%xmm12
2604  DB  243,68,15,16,74,60                  ; movss         0x3c(%rdx),%xmm9
2605  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
2606  DB  243,68,15,16,82,52                  ; movss         0x34(%rdx),%xmm10
2607  DB  243,68,15,16,90,56                  ; movss         0x38(%rdx),%xmm11
2608  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
2609  DB  69,15,40,241                        ; movaps        %xmm9,%xmm14
2610  DB  68,15,89,240                        ; mulps         %xmm0,%xmm14
2611  DB  69,15,88,243                        ; addps         %xmm11,%xmm14
2612  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
2613  DB  69,15,89,244                        ; mulps         %xmm12,%xmm14
2614  DB  69,15,88,242                        ; addps         %xmm10,%xmm14
2615  DB  243,68,15,16,98,68                  ; movss         0x44(%rdx),%xmm12
2616  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
2617  DB  65,15,194,196,1                     ; cmpltps       %xmm12,%xmm0
2618  DB  68,15,84,232                        ; andps         %xmm0,%xmm13
2619  DB  65,15,85,198                        ; andnps        %xmm14,%xmm0
2620  DB  65,15,86,197                        ; orps          %xmm13,%xmm0
2621  DB  69,15,40,232                        ; movaps        %xmm8,%xmm13
2622  DB  68,15,89,233                        ; mulps         %xmm1,%xmm13
2623  DB  68,15,40,241                        ; movaps        %xmm1,%xmm14
2624  DB  69,15,89,246                        ; mulps         %xmm14,%xmm14
2625  DB  69,15,40,249                        ; movaps        %xmm9,%xmm15
2626  DB  68,15,89,249                        ; mulps         %xmm1,%xmm15
2627  DB  69,15,88,251                        ; addps         %xmm11,%xmm15
2628  DB  69,15,89,254                        ; mulps         %xmm14,%xmm15
2629  DB  69,15,88,250                        ; addps         %xmm10,%xmm15
2630  DB  65,15,194,204,1                     ; cmpltps       %xmm12,%xmm1
2631  DB  68,15,84,233                        ; andps         %xmm1,%xmm13
2632  DB  65,15,85,207                        ; andnps        %xmm15,%xmm1
2633  DB  65,15,86,205                        ; orps          %xmm13,%xmm1
2634  DB  68,15,89,194                        ; mulps         %xmm2,%xmm8
2635  DB  68,15,40,234                        ; movaps        %xmm2,%xmm13
2636  DB  69,15,89,237                        ; mulps         %xmm13,%xmm13
2637  DB  68,15,89,202                        ; mulps         %xmm2,%xmm9
2638  DB  69,15,88,203                        ; addps         %xmm11,%xmm9
2639  DB  69,15,89,205                        ; mulps         %xmm13,%xmm9
2640  DB  69,15,88,202                        ; addps         %xmm10,%xmm9
2641  DB  65,15,194,212,1                     ; cmpltps       %xmm12,%xmm2
2642  DB  68,15,84,194                        ; andps         %xmm2,%xmm8
2643  DB  65,15,85,209                        ; andnps        %xmm9,%xmm2
2644  DB  65,15,86,208                        ; orps          %xmm8,%xmm2
2645  DB  72,173                              ; lods          %ds:(%rsi),%rax
2646  DB  255,224                             ; jmpq          *%rax
2647
2648PUBLIC _sk_to_srgb_sse2
2649_sk_to_srgb_sse2 LABEL PROC
2650  DB  72,131,236,40                       ; sub           $0x28,%rsp
2651  DB  15,41,124,36,16                     ; movaps        %xmm7,0x10(%rsp)
2652  DB  15,41,52,36                         ; movaps        %xmm6,(%rsp)
2653  DB  15,40,245                           ; movaps        %xmm5,%xmm6
2654  DB  15,40,236                           ; movaps        %xmm4,%xmm5
2655  DB  15,40,227                           ; movaps        %xmm3,%xmm4
2656  DB  68,15,82,192                        ; rsqrtps       %xmm0,%xmm8
2657  DB  69,15,83,232                        ; rcpps         %xmm8,%xmm13
2658  DB  69,15,82,248                        ; rsqrtps       %xmm8,%xmm15
2659  DB  243,15,16,26                        ; movss         (%rdx),%xmm3
2660  DB  243,68,15,16,66,72                  ; movss         0x48(%rdx),%xmm8
2661  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
2662  DB  69,15,40,240                        ; movaps        %xmm8,%xmm14
2663  DB  68,15,89,240                        ; mulps         %xmm0,%xmm14
2664  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
2665  DB  243,68,15,16,82,76                  ; movss         0x4c(%rdx),%xmm10
2666  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
2667  DB  243,68,15,16,90,80                  ; movss         0x50(%rdx),%xmm11
2668  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
2669  DB  243,68,15,16,98,84                  ; movss         0x54(%rdx),%xmm12
2670  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
2671  DB  69,15,89,235                        ; mulps         %xmm11,%xmm13
2672  DB  69,15,88,236                        ; addps         %xmm12,%xmm13
2673  DB  69,15,89,250                        ; mulps         %xmm10,%xmm15
2674  DB  69,15,88,253                        ; addps         %xmm13,%xmm15
2675  DB  68,15,40,203                        ; movaps        %xmm3,%xmm9
2676  DB  69,15,93,207                        ; minps         %xmm15,%xmm9
2677  DB  243,68,15,16,106,88                 ; movss         0x58(%rdx),%xmm13
2678  DB  69,15,198,237,0                     ; shufps        $0x0,%xmm13,%xmm13
2679  DB  65,15,194,197,1                     ; cmpltps       %xmm13,%xmm0
2680  DB  68,15,84,240                        ; andps         %xmm0,%xmm14
2681  DB  65,15,85,193                        ; andnps        %xmm9,%xmm0
2682  DB  65,15,86,198                        ; orps          %xmm14,%xmm0
2683  DB  68,15,82,201                        ; rsqrtps       %xmm1,%xmm9
2684  DB  69,15,83,241                        ; rcpps         %xmm9,%xmm14
2685  DB  69,15,82,201                        ; rsqrtps       %xmm9,%xmm9
2686  DB  69,15,89,243                        ; mulps         %xmm11,%xmm14
2687  DB  69,15,88,244                        ; addps         %xmm12,%xmm14
2688  DB  69,15,89,202                        ; mulps         %xmm10,%xmm9
2689  DB  69,15,88,206                        ; addps         %xmm14,%xmm9
2690  DB  68,15,40,243                        ; movaps        %xmm3,%xmm14
2691  DB  69,15,93,241                        ; minps         %xmm9,%xmm14
2692  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
2693  DB  68,15,89,201                        ; mulps         %xmm1,%xmm9
2694  DB  65,15,194,205,1                     ; cmpltps       %xmm13,%xmm1
2695  DB  68,15,84,201                        ; andps         %xmm1,%xmm9
2696  DB  65,15,85,206                        ; andnps        %xmm14,%xmm1
2697  DB  65,15,86,201                        ; orps          %xmm9,%xmm1
2698  DB  68,15,82,202                        ; rsqrtps       %xmm2,%xmm9
2699  DB  69,15,83,241                        ; rcpps         %xmm9,%xmm14
2700  DB  69,15,89,243                        ; mulps         %xmm11,%xmm14
2701  DB  69,15,88,244                        ; addps         %xmm12,%xmm14
2702  DB  65,15,82,249                        ; rsqrtps       %xmm9,%xmm7
2703  DB  65,15,89,250                        ; mulps         %xmm10,%xmm7
2704  DB  65,15,88,254                        ; addps         %xmm14,%xmm7
2705  DB  15,93,223                           ; minps         %xmm7,%xmm3
2706  DB  68,15,89,194                        ; mulps         %xmm2,%xmm8
2707  DB  65,15,194,213,1                     ; cmpltps       %xmm13,%xmm2
2708  DB  68,15,84,194                        ; andps         %xmm2,%xmm8
2709  DB  15,85,211                           ; andnps        %xmm3,%xmm2
2710  DB  65,15,86,208                        ; orps          %xmm8,%xmm2
2711  DB  72,173                              ; lods          %ds:(%rsi),%rax
2712  DB  72,173                              ; lods          %ds:(%rsi),%rax
2713  DB  15,40,220                           ; movaps        %xmm4,%xmm3
2714  DB  15,40,229                           ; movaps        %xmm5,%xmm4
2715  DB  15,40,238                           ; movaps        %xmm6,%xmm5
2716  DB  15,40,52,36                         ; movaps        (%rsp),%xmm6
2717  DB  15,40,124,36,16                     ; movaps        0x10(%rsp),%xmm7
2718  DB  72,131,196,40                       ; add           $0x28,%rsp
2719  DB  255,224                             ; jmpq          *%rax
2720
2721PUBLIC _sk_scale_u8_sse2
2722_sk_scale_u8_sse2 LABEL PROC
2723  DB  72,173                              ; lods          %ds:(%rsi),%rax
2724  DB  72,139,0                            ; mov           (%rax),%rax
2725  DB  102,68,15,110,4,56                  ; movd          (%rax,%rdi,1),%xmm8
2726  DB  102,69,15,239,201                   ; pxor          %xmm9,%xmm9
2727  DB  102,69,15,96,193                    ; punpcklbw     %xmm9,%xmm8
2728  DB  102,69,15,97,193                    ; punpcklwd     %xmm9,%xmm8
2729  DB  69,15,91,192                        ; cvtdq2ps      %xmm8,%xmm8
2730  DB  243,68,15,16,74,12                  ; movss         0xc(%rdx),%xmm9
2731  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
2732  DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
2733  DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
2734  DB  65,15,89,201                        ; mulps         %xmm9,%xmm1
2735  DB  65,15,89,209                        ; mulps         %xmm9,%xmm2
2736  DB  65,15,89,217                        ; mulps         %xmm9,%xmm3
2737  DB  72,173                              ; lods          %ds:(%rsi),%rax
2738  DB  255,224                             ; jmpq          *%rax
2739
2740PUBLIC _sk_lerp_u8_sse2
2741_sk_lerp_u8_sse2 LABEL PROC
2742  DB  72,173                              ; lods          %ds:(%rsi),%rax
2743  DB  72,139,0                            ; mov           (%rax),%rax
2744  DB  102,68,15,110,4,56                  ; movd          (%rax,%rdi,1),%xmm8
2745  DB  102,69,15,239,201                   ; pxor          %xmm9,%xmm9
2746  DB  102,69,15,96,193                    ; punpcklbw     %xmm9,%xmm8
2747  DB  102,69,15,97,193                    ; punpcklwd     %xmm9,%xmm8
2748  DB  69,15,91,192                        ; cvtdq2ps      %xmm8,%xmm8
2749  DB  243,68,15,16,74,12                  ; movss         0xc(%rdx),%xmm9
2750  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
2751  DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
2752  DB  15,92,196                           ; subps         %xmm4,%xmm0
2753  DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
2754  DB  15,88,196                           ; addps         %xmm4,%xmm0
2755  DB  15,92,205                           ; subps         %xmm5,%xmm1
2756  DB  65,15,89,201                        ; mulps         %xmm9,%xmm1
2757  DB  15,88,205                           ; addps         %xmm5,%xmm1
2758  DB  15,92,214                           ; subps         %xmm6,%xmm2
2759  DB  65,15,89,209                        ; mulps         %xmm9,%xmm2
2760  DB  15,88,214                           ; addps         %xmm6,%xmm2
2761  DB  15,92,223                           ; subps         %xmm7,%xmm3
2762  DB  65,15,89,217                        ; mulps         %xmm9,%xmm3
2763  DB  15,88,223                           ; addps         %xmm7,%xmm3
2764  DB  72,173                              ; lods          %ds:(%rsi),%rax
2765  DB  255,224                             ; jmpq          *%rax
2766
2767PUBLIC _sk_load_tables_sse2
2768_sk_load_tables_sse2 LABEL PROC
2769  DB  72,173                              ; lods          %ds:(%rsi),%rax
2770  DB  72,139,8                            ; mov           (%rax),%rcx
2771  DB  76,139,64,8                         ; mov           0x8(%rax),%r8
2772  DB  243,68,15,111,4,185                 ; movdqu        (%rcx,%rdi,4),%xmm8
2773  DB  102,15,110,66,16                    ; movd          0x10(%rdx),%xmm0
2774  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
2775  DB  102,69,15,111,200                   ; movdqa        %xmm8,%xmm9
2776  DB  102,65,15,114,209,8                 ; psrld         $0x8,%xmm9
2777  DB  102,68,15,219,200                   ; pand          %xmm0,%xmm9
2778  DB  102,69,15,111,208                   ; movdqa        %xmm8,%xmm10
2779  DB  102,65,15,114,210,16                ; psrld         $0x10,%xmm10
2780  DB  102,68,15,219,208                   ; pand          %xmm0,%xmm10
2781  DB  102,65,15,219,192                   ; pand          %xmm8,%xmm0
2782  DB  102,15,112,216,78                   ; pshufd        $0x4e,%xmm0,%xmm3
2783  DB  102,72,15,126,217                   ; movq          %xmm3,%rcx
2784  DB  65,137,201                          ; mov           %ecx,%r9d
2785  DB  72,193,233,32                       ; shr           $0x20,%rcx
2786  DB  102,73,15,126,194                   ; movq          %xmm0,%r10
2787  DB  69,137,211                          ; mov           %r10d,%r11d
2788  DB  73,193,234,32                       ; shr           $0x20,%r10
2789  DB  243,67,15,16,28,144                 ; movss         (%r8,%r10,4),%xmm3
2790  DB  243,65,15,16,4,136                  ; movss         (%r8,%rcx,4),%xmm0
2791  DB  15,20,216                           ; unpcklps      %xmm0,%xmm3
2792  DB  243,67,15,16,4,152                  ; movss         (%r8,%r11,4),%xmm0
2793  DB  243,67,15,16,12,136                 ; movss         (%r8,%r9,4),%xmm1
2794  DB  15,20,193                           ; unpcklps      %xmm1,%xmm0
2795  DB  15,20,195                           ; unpcklps      %xmm3,%xmm0
2796  DB  72,139,72,16                        ; mov           0x10(%rax),%rcx
2797  DB  102,65,15,112,201,78                ; pshufd        $0x4e,%xmm9,%xmm1
2798  DB  102,73,15,126,200                   ; movq          %xmm1,%r8
2799  DB  69,137,193                          ; mov           %r8d,%r9d
2800  DB  73,193,232,32                       ; shr           $0x20,%r8
2801  DB  102,77,15,126,202                   ; movq          %xmm9,%r10
2802  DB  69,137,211                          ; mov           %r10d,%r11d
2803  DB  73,193,234,32                       ; shr           $0x20,%r10
2804  DB  243,66,15,16,28,145                 ; movss         (%rcx,%r10,4),%xmm3
2805  DB  243,66,15,16,12,129                 ; movss         (%rcx,%r8,4),%xmm1
2806  DB  15,20,217                           ; unpcklps      %xmm1,%xmm3
2807  DB  243,66,15,16,12,153                 ; movss         (%rcx,%r11,4),%xmm1
2808  DB  243,66,15,16,20,137                 ; movss         (%rcx,%r9,4),%xmm2
2809  DB  15,20,202                           ; unpcklps      %xmm2,%xmm1
2810  DB  15,20,203                           ; unpcklps      %xmm3,%xmm1
2811  DB  72,139,64,24                        ; mov           0x18(%rax),%rax
2812  DB  102,65,15,112,210,78                ; pshufd        $0x4e,%xmm10,%xmm2
2813  DB  102,72,15,126,209                   ; movq          %xmm2,%rcx
2814  DB  65,137,200                          ; mov           %ecx,%r8d
2815  DB  72,193,233,32                       ; shr           $0x20,%rcx
2816  DB  102,77,15,126,209                   ; movq          %xmm10,%r9
2817  DB  69,137,202                          ; mov           %r9d,%r10d
2818  DB  73,193,233,32                       ; shr           $0x20,%r9
2819  DB  243,70,15,16,12,136                 ; movss         (%rax,%r9,4),%xmm9
2820  DB  243,15,16,20,136                    ; movss         (%rax,%rcx,4),%xmm2
2821  DB  68,15,20,202                        ; unpcklps      %xmm2,%xmm9
2822  DB  243,66,15,16,20,144                 ; movss         (%rax,%r10,4),%xmm2
2823  DB  243,66,15,16,28,128                 ; movss         (%rax,%r8,4),%xmm3
2824  DB  15,20,211                           ; unpcklps      %xmm3,%xmm2
2825  DB  65,15,20,209                        ; unpcklps      %xmm9,%xmm2
2826  DB  102,65,15,114,208,24                ; psrld         $0x18,%xmm8
2827  DB  69,15,91,192                        ; cvtdq2ps      %xmm8,%xmm8
2828  DB  243,15,16,90,12                     ; movss         0xc(%rdx),%xmm3
2829  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
2830  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
2831  DB  72,173                              ; lods          %ds:(%rsi),%rax
2832  DB  255,224                             ; jmpq          *%rax
2833
2834PUBLIC _sk_load_565_sse2
2835_sk_load_565_sse2 LABEL PROC
2836  DB  72,173                              ; lods          %ds:(%rsi),%rax
2837  DB  72,139,0                            ; mov           (%rax),%rax
2838  DB  243,68,15,126,12,120                ; movq          (%rax,%rdi,2),%xmm9
2839  DB  102,15,239,192                      ; pxor          %xmm0,%xmm0
2840  DB  102,68,15,97,200                    ; punpcklwd     %xmm0,%xmm9
2841  DB  102,15,110,66,104                   ; movd          0x68(%rdx),%xmm0
2842  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
2843  DB  102,65,15,219,193                   ; pand          %xmm9,%xmm0
2844  DB  15,91,200                           ; cvtdq2ps      %xmm0,%xmm1
2845  DB  243,15,16,26                        ; movss         (%rdx),%xmm3
2846  DB  243,15,16,66,116                    ; movss         0x74(%rdx),%xmm0
2847  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
2848  DB  15,89,193                           ; mulps         %xmm1,%xmm0
2849  DB  102,15,110,74,108                   ; movd          0x6c(%rdx),%xmm1
2850  DB  102,15,112,201,0                    ; pshufd        $0x0,%xmm1,%xmm1
2851  DB  102,65,15,219,201                   ; pand          %xmm9,%xmm1
2852  DB  68,15,91,193                        ; cvtdq2ps      %xmm1,%xmm8
2853  DB  243,15,16,74,120                    ; movss         0x78(%rdx),%xmm1
2854  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
2855  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
2856  DB  102,15,110,82,112                   ; movd          0x70(%rdx),%xmm2
2857  DB  102,15,112,210,0                    ; pshufd        $0x0,%xmm2,%xmm2
2858  DB  102,65,15,219,209                   ; pand          %xmm9,%xmm2
2859  DB  68,15,91,194                        ; cvtdq2ps      %xmm2,%xmm8
2860  DB  243,15,16,82,124                    ; movss         0x7c(%rdx),%xmm2
2861  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
2862  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
2863  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
2864  DB  72,173                              ; lods          %ds:(%rsi),%rax
2865  DB  255,224                             ; jmpq          *%rax
2866
2867PUBLIC _sk_store_565_sse2
2868_sk_store_565_sse2 LABEL PROC
2869  DB  72,173                              ; lods          %ds:(%rsi),%rax
2870  DB  72,139,0                            ; mov           (%rax),%rax
2871  DB  243,68,15,16,130,128,0,0,0          ; movss         0x80(%rdx),%xmm8
2872  DB  243,68,15,16,138,132,0,0,0          ; movss         0x84(%rdx),%xmm9
2873  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
2874  DB  69,15,40,208                        ; movaps        %xmm8,%xmm10
2875  DB  68,15,89,208                        ; mulps         %xmm0,%xmm10
2876  DB  102,69,15,91,210                    ; cvtps2dq      %xmm10,%xmm10
2877  DB  102,65,15,114,242,11                ; pslld         $0xb,%xmm10
2878  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
2879  DB  68,15,89,201                        ; mulps         %xmm1,%xmm9
2880  DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
2881  DB  102,65,15,114,241,5                 ; pslld         $0x5,%xmm9
2882  DB  102,69,15,235,202                   ; por           %xmm10,%xmm9
2883  DB  68,15,89,194                        ; mulps         %xmm2,%xmm8
2884  DB  102,69,15,91,192                    ; cvtps2dq      %xmm8,%xmm8
2885  DB  102,69,15,86,193                    ; orpd          %xmm9,%xmm8
2886  DB  102,65,15,114,240,16                ; pslld         $0x10,%xmm8
2887  DB  102,65,15,114,224,16                ; psrad         $0x10,%xmm8
2888  DB  102,69,15,107,192                   ; packssdw      %xmm8,%xmm8
2889  DB  102,68,15,214,4,120                 ; movq          %xmm8,(%rax,%rdi,2)
2890  DB  72,173                              ; lods          %ds:(%rsi),%rax
2891  DB  255,224                             ; jmpq          *%rax
2892
2893PUBLIC _sk_load_8888_sse2
2894_sk_load_8888_sse2 LABEL PROC
2895  DB  72,173                              ; lods          %ds:(%rsi),%rax
2896  DB  72,139,0                            ; mov           (%rax),%rax
2897  DB  243,15,111,28,184                   ; movdqu        (%rax,%rdi,4),%xmm3
2898  DB  102,15,110,66,16                    ; movd          0x10(%rdx),%xmm0
2899  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
2900  DB  102,15,111,203                      ; movdqa        %xmm3,%xmm1
2901  DB  102,15,114,209,8                    ; psrld         $0x8,%xmm1
2902  DB  102,15,219,200                      ; pand          %xmm0,%xmm1
2903  DB  102,15,111,211                      ; movdqa        %xmm3,%xmm2
2904  DB  102,15,114,210,16                   ; psrld         $0x10,%xmm2
2905  DB  102,15,219,208                      ; pand          %xmm0,%xmm2
2906  DB  102,15,219,195                      ; pand          %xmm3,%xmm0
2907  DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
2908  DB  243,68,15,16,66,12                  ; movss         0xc(%rdx),%xmm8
2909  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
2910  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
2911  DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
2912  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
2913  DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
2914  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
2915  DB  102,15,114,211,24                   ; psrld         $0x18,%xmm3
2916  DB  15,91,219                           ; cvtdq2ps      %xmm3,%xmm3
2917  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
2918  DB  72,173                              ; lods          %ds:(%rsi),%rax
2919  DB  255,224                             ; jmpq          *%rax
2920
2921PUBLIC _sk_store_8888_sse2
2922_sk_store_8888_sse2 LABEL PROC
2923  DB  72,173                              ; lods          %ds:(%rsi),%rax
2924  DB  72,139,0                            ; mov           (%rax),%rax
2925  DB  243,68,15,16,66,8                   ; movss         0x8(%rdx),%xmm8
2926  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
2927  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
2928  DB  68,15,89,200                        ; mulps         %xmm0,%xmm9
2929  DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
2930  DB  69,15,40,208                        ; movaps        %xmm8,%xmm10
2931  DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
2932  DB  102,69,15,91,210                    ; cvtps2dq      %xmm10,%xmm10
2933  DB  102,65,15,114,242,8                 ; pslld         $0x8,%xmm10
2934  DB  102,69,15,235,209                   ; por           %xmm9,%xmm10
2935  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
2936  DB  68,15,89,202                        ; mulps         %xmm2,%xmm9
2937  DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
2938  DB  102,65,15,114,241,16                ; pslld         $0x10,%xmm9
2939  DB  68,15,89,195                        ; mulps         %xmm3,%xmm8
2940  DB  102,69,15,91,192                    ; cvtps2dq      %xmm8,%xmm8
2941  DB  102,65,15,114,240,24                ; pslld         $0x18,%xmm8
2942  DB  102,69,15,235,193                   ; por           %xmm9,%xmm8
2943  DB  102,69,15,235,194                   ; por           %xmm10,%xmm8
2944  DB  243,68,15,127,4,184                 ; movdqu        %xmm8,(%rax,%rdi,4)
2945  DB  72,173                              ; lods          %ds:(%rsi),%rax
2946  DB  255,224                             ; jmpq          *%rax
2947
2948PUBLIC _sk_load_f16_sse2
2949_sk_load_f16_sse2 LABEL PROC
2950  DB  72,173                              ; lods          %ds:(%rsi),%rax
2951  DB  72,139,0                            ; mov           (%rax),%rax
2952  DB  243,15,111,4,248                    ; movdqu        (%rax,%rdi,8),%xmm0
2953  DB  243,15,111,76,248,16                ; movdqu        0x10(%rax,%rdi,8),%xmm1
2954  DB  102,15,111,208                      ; movdqa        %xmm0,%xmm2
2955  DB  102,15,97,209                       ; punpcklwd     %xmm1,%xmm2
2956  DB  102,15,105,193                      ; punpckhwd     %xmm1,%xmm0
2957  DB  102,68,15,111,194                   ; movdqa        %xmm2,%xmm8
2958  DB  102,68,15,97,192                    ; punpcklwd     %xmm0,%xmm8
2959  DB  102,15,105,208                      ; punpckhwd     %xmm0,%xmm2
2960  DB  102,15,110,66,100                   ; movd          0x64(%rdx),%xmm0
2961  DB  102,15,112,216,0                    ; pshufd        $0x0,%xmm0,%xmm3
2962  DB  102,15,111,203                      ; movdqa        %xmm3,%xmm1
2963  DB  102,65,15,101,200                   ; pcmpgtw       %xmm8,%xmm1
2964  DB  102,65,15,223,200                   ; pandn         %xmm8,%xmm1
2965  DB  102,15,101,218                      ; pcmpgtw       %xmm2,%xmm3
2966  DB  102,15,223,218                      ; pandn         %xmm2,%xmm3
2967  DB  102,69,15,239,192                   ; pxor          %xmm8,%xmm8
2968  DB  102,15,111,193                      ; movdqa        %xmm1,%xmm0
2969  DB  102,65,15,97,192                    ; punpcklwd     %xmm8,%xmm0
2970  DB  102,15,114,240,13                   ; pslld         $0xd,%xmm0
2971  DB  102,15,110,82,92                    ; movd          0x5c(%rdx),%xmm2
2972  DB  102,68,15,112,202,0                 ; pshufd        $0x0,%xmm2,%xmm9
2973  DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
2974  DB  102,65,15,105,200                   ; punpckhwd     %xmm8,%xmm1
2975  DB  102,15,114,241,13                   ; pslld         $0xd,%xmm1
2976  DB  65,15,89,201                        ; mulps         %xmm9,%xmm1
2977  DB  102,15,111,211                      ; movdqa        %xmm3,%xmm2
2978  DB  102,65,15,97,208                    ; punpcklwd     %xmm8,%xmm2
2979  DB  102,15,114,242,13                   ; pslld         $0xd,%xmm2
2980  DB  65,15,89,209                        ; mulps         %xmm9,%xmm2
2981  DB  102,65,15,105,216                   ; punpckhwd     %xmm8,%xmm3
2982  DB  102,15,114,243,13                   ; pslld         $0xd,%xmm3
2983  DB  65,15,89,217                        ; mulps         %xmm9,%xmm3
2984  DB  72,173                              ; lods          %ds:(%rsi),%rax
2985  DB  255,224                             ; jmpq          *%rax
2986
2987PUBLIC _sk_store_f16_sse2
2988_sk_store_f16_sse2 LABEL PROC
2989  DB  72,173                              ; lods          %ds:(%rsi),%rax
2990  DB  72,139,0                            ; mov           (%rax),%rax
2991  DB  102,68,15,110,66,96                 ; movd          0x60(%rdx),%xmm8
2992  DB  102,69,15,112,192,0                 ; pshufd        $0x0,%xmm8,%xmm8
2993  DB  102,69,15,111,200                   ; movdqa        %xmm8,%xmm9
2994  DB  68,15,89,200                        ; mulps         %xmm0,%xmm9
2995  DB  102,65,15,114,209,13                ; psrld         $0xd,%xmm9
2996  DB  102,69,15,111,208                   ; movdqa        %xmm8,%xmm10
2997  DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
2998  DB  102,65,15,114,210,13                ; psrld         $0xd,%xmm10
2999  DB  102,69,15,111,216                   ; movdqa        %xmm8,%xmm11
3000  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
3001  DB  102,65,15,114,211,13                ; psrld         $0xd,%xmm11
3002  DB  68,15,89,195                        ; mulps         %xmm3,%xmm8
3003  DB  102,65,15,114,208,13                ; psrld         $0xd,%xmm8
3004  DB  102,65,15,115,250,2                 ; pslldq        $0x2,%xmm10
3005  DB  102,69,15,235,209                   ; por           %xmm9,%xmm10
3006  DB  102,65,15,115,248,2                 ; pslldq        $0x2,%xmm8
3007  DB  102,69,15,235,195                   ; por           %xmm11,%xmm8
3008  DB  102,69,15,111,202                   ; movdqa        %xmm10,%xmm9
3009  DB  102,69,15,98,200                    ; punpckldq     %xmm8,%xmm9
3010  DB  243,68,15,127,12,248                ; movdqu        %xmm9,(%rax,%rdi,8)
3011  DB  102,69,15,106,208                   ; punpckhdq     %xmm8,%xmm10
3012  DB  243,68,15,127,84,248,16             ; movdqu        %xmm10,0x10(%rax,%rdi,8)
3013  DB  72,173                              ; lods          %ds:(%rsi),%rax
3014  DB  255,224                             ; jmpq          *%rax
3015
3016PUBLIC _sk_clamp_x_sse2
3017_sk_clamp_x_sse2 LABEL PROC
3018  DB  72,173                              ; lods          %ds:(%rsi),%rax
3019  DB  243,68,15,16,0                      ; movss         (%rax),%xmm8
3020  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
3021  DB  102,69,15,118,201                   ; pcmpeqd       %xmm9,%xmm9
3022  DB  102,69,15,254,200                   ; paddd         %xmm8,%xmm9
3023  DB  65,15,93,193                        ; minps         %xmm9,%xmm0
3024  DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
3025  DB  68,15,95,192                        ; maxps         %xmm0,%xmm8
3026  DB  72,173                              ; lods          %ds:(%rsi),%rax
3027  DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
3028  DB  255,224                             ; jmpq          *%rax
3029
3030PUBLIC _sk_clamp_y_sse2
3031_sk_clamp_y_sse2 LABEL PROC
3032  DB  72,173                              ; lods          %ds:(%rsi),%rax
3033  DB  243,68,15,16,0                      ; movss         (%rax),%xmm8
3034  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
3035  DB  102,69,15,118,201                   ; pcmpeqd       %xmm9,%xmm9
3036  DB  102,69,15,254,200                   ; paddd         %xmm8,%xmm9
3037  DB  65,15,93,201                        ; minps         %xmm9,%xmm1
3038  DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
3039  DB  68,15,95,193                        ; maxps         %xmm1,%xmm8
3040  DB  72,173                              ; lods          %ds:(%rsi),%rax
3041  DB  65,15,40,200                        ; movaps        %xmm8,%xmm1
3042  DB  255,224                             ; jmpq          *%rax
3043
3044PUBLIC _sk_matrix_2x3_sse2
3045_sk_matrix_2x3_sse2 LABEL PROC
3046  DB  68,15,40,201                        ; movaps        %xmm1,%xmm9
3047  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
3048  DB  72,173                              ; lods          %ds:(%rsi),%rax
3049  DB  243,15,16,0                         ; movss         (%rax),%xmm0
3050  DB  243,15,16,72,4                      ; movss         0x4(%rax),%xmm1
3051  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
3052  DB  243,68,15,16,80,8                   ; movss         0x8(%rax),%xmm10
3053  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
3054  DB  243,68,15,16,88,16                  ; movss         0x10(%rax),%xmm11
3055  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
3056  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
3057  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
3058  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
3059  DB  65,15,88,194                        ; addps         %xmm10,%xmm0
3060  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
3061  DB  243,68,15,16,80,12                  ; movss         0xc(%rax),%xmm10
3062  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
3063  DB  243,68,15,16,88,20                  ; movss         0x14(%rax),%xmm11
3064  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
3065  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
3066  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
3067  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
3068  DB  65,15,88,202                        ; addps         %xmm10,%xmm1
3069  DB  72,173                              ; lods          %ds:(%rsi),%rax
3070  DB  255,224                             ; jmpq          *%rax
3071
3072PUBLIC _sk_matrix_3x4_sse2
3073_sk_matrix_3x4_sse2 LABEL PROC
3074  DB  68,15,40,201                        ; movaps        %xmm1,%xmm9
3075  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
3076  DB  72,173                              ; lods          %ds:(%rsi),%rax
3077  DB  243,15,16,0                         ; movss         (%rax),%xmm0
3078  DB  243,15,16,72,4                      ; movss         0x4(%rax),%xmm1
3079  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
3080  DB  243,68,15,16,80,12                  ; movss         0xc(%rax),%xmm10
3081  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
3082  DB  243,68,15,16,88,24                  ; movss         0x18(%rax),%xmm11
3083  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
3084  DB  243,68,15,16,96,36                  ; movss         0x24(%rax),%xmm12
3085  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
3086  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
3087  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
3088  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
3089  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
3090  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
3091  DB  65,15,88,194                        ; addps         %xmm10,%xmm0
3092  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
3093  DB  243,68,15,16,80,16                  ; movss         0x10(%rax),%xmm10
3094  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
3095  DB  243,68,15,16,88,28                  ; movss         0x1c(%rax),%xmm11
3096  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
3097  DB  243,68,15,16,96,40                  ; movss         0x28(%rax),%xmm12
3098  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
3099  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
3100  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
3101  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
3102  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
3103  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
3104  DB  65,15,88,202                        ; addps         %xmm10,%xmm1
3105  DB  243,68,15,16,80,8                   ; movss         0x8(%rax),%xmm10
3106  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
3107  DB  243,68,15,16,88,20                  ; movss         0x14(%rax),%xmm11
3108  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
3109  DB  243,68,15,16,96,32                  ; movss         0x20(%rax),%xmm12
3110  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
3111  DB  243,68,15,16,104,44                 ; movss         0x2c(%rax),%xmm13
3112  DB  69,15,198,237,0                     ; shufps        $0x0,%xmm13,%xmm13
3113  DB  68,15,89,226                        ; mulps         %xmm2,%xmm12
3114  DB  69,15,88,229                        ; addps         %xmm13,%xmm12
3115  DB  69,15,89,217                        ; mulps         %xmm9,%xmm11
3116  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
3117  DB  69,15,89,208                        ; mulps         %xmm8,%xmm10
3118  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
3119  DB  72,173                              ; lods          %ds:(%rsi),%rax
3120  DB  65,15,40,210                        ; movaps        %xmm10,%xmm2
3121  DB  255,224                             ; jmpq          *%rax
3122
3123PUBLIC _sk_linear_gradient_2stops_sse2
3124_sk_linear_gradient_2stops_sse2 LABEL PROC
3125  DB  72,173                              ; lods          %ds:(%rsi),%rax
3126  DB  68,15,16,8                          ; movups        (%rax),%xmm9
3127  DB  15,16,88,16                         ; movups        0x10(%rax),%xmm3
3128  DB  68,15,40,195                        ; movaps        %xmm3,%xmm8
3129  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
3130  DB  65,15,40,201                        ; movaps        %xmm9,%xmm1
3131  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
3132  DB  68,15,89,192                        ; mulps         %xmm0,%xmm8
3133  DB  68,15,88,193                        ; addps         %xmm1,%xmm8
3134  DB  15,40,203                           ; movaps        %xmm3,%xmm1
3135  DB  15,198,201,85                       ; shufps        $0x55,%xmm1,%xmm1
3136  DB  65,15,40,209                        ; movaps        %xmm9,%xmm2
3137  DB  15,198,210,85                       ; shufps        $0x55,%xmm2,%xmm2
3138  DB  15,89,200                           ; mulps         %xmm0,%xmm1
3139  DB  15,88,202                           ; addps         %xmm2,%xmm1
3140  DB  15,40,211                           ; movaps        %xmm3,%xmm2
3141  DB  15,198,210,170                      ; shufps        $0xaa,%xmm2,%xmm2
3142  DB  69,15,40,209                        ; movaps        %xmm9,%xmm10
3143  DB  69,15,198,210,170                   ; shufps        $0xaa,%xmm10,%xmm10
3144  DB  15,89,208                           ; mulps         %xmm0,%xmm2
3145  DB  65,15,88,210                        ; addps         %xmm10,%xmm2
3146  DB  15,198,219,255                      ; shufps        $0xff,%xmm3,%xmm3
3147  DB  69,15,198,201,255                   ; shufps        $0xff,%xmm9,%xmm9
3148  DB  15,89,216                           ; mulps         %xmm0,%xmm3
3149  DB  65,15,88,217                        ; addps         %xmm9,%xmm3
3150  DB  72,173                              ; lods          %ds:(%rsi),%rax
3151  DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
3152  DB  255,224                             ; jmpq          *%rax
3153END
3154