SkJumper_generated_win.S revision 7d3d8723319038d16456137ba932f238c1e65dbf
1; Copyright 2017 Google Inc.
2;
3; Use of this source code is governed by a BSD-style license that can be
4; found in the LICENSE file.
5
6; This file is generated semi-automatically with this command:
7;   $ src/jumper/build_stages.py
8
9IFDEF RAX
10_text SEGMENT
11
12PUBLIC _sk_start_pipeline_hsw
13_sk_start_pipeline_hsw LABEL PROC
14  DB  65,87                               ; push          %r15
15  DB  65,86                               ; push          %r14
16  DB  65,85                               ; push          %r13
17  DB  65,84                               ; push          %r12
18  DB  86                                  ; push          %rsi
19  DB  87                                  ; push          %rdi
20  DB  83                                  ; push          %rbx
21  DB  72,129,236,160,0,0,0                ; sub           $0xa0,%rsp
22  DB  197,120,41,188,36,144,0,0,0         ; vmovaps       %xmm15,0x90(%rsp)
23  DB  197,120,41,180,36,128,0,0,0         ; vmovaps       %xmm14,0x80(%rsp)
24  DB  197,120,41,108,36,112               ; vmovaps       %xmm13,0x70(%rsp)
25  DB  197,120,41,100,36,96                ; vmovaps       %xmm12,0x60(%rsp)
26  DB  197,120,41,92,36,80                 ; vmovaps       %xmm11,0x50(%rsp)
27  DB  197,120,41,84,36,64                 ; vmovaps       %xmm10,0x40(%rsp)
28  DB  197,120,41,76,36,48                 ; vmovaps       %xmm9,0x30(%rsp)
29  DB  197,120,41,68,36,32                 ; vmovaps       %xmm8,0x20(%rsp)
30  DB  197,248,41,124,36,16                ; vmovaps       %xmm7,0x10(%rsp)
31  DB  197,248,41,52,36                    ; vmovaps       %xmm6,(%rsp)
32  DB  77,137,205                          ; mov           %r9,%r13
33  DB  77,137,198                          ; mov           %r8,%r14
34  DB  72,137,203                          ; mov           %rcx,%rbx
35  DB  72,137,214                          ; mov           %rdx,%rsi
36  DB  72,173                              ; lods          %ds:(%rsi),%rax
37  DB  73,137,199                          ; mov           %rax,%r15
38  DB  73,137,244                          ; mov           %rsi,%r12
39  DB  72,141,67,8                         ; lea           0x8(%rbx),%rax
40  DB  76,57,232                           ; cmp           %r13,%rax
41  DB  118,5                               ; jbe           75 <_sk_start_pipeline_hsw+0x75>
42  DB  72,137,223                          ; mov           %rbx,%rdi
43  DB  235,65                              ; jmp           b6 <_sk_start_pipeline_hsw+0xb6>
44  DB  185,0,0,0,0                         ; mov           $0x0,%ecx
45  DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
46  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
47  DB  197,236,87,210                      ; vxorps        %ymm2,%ymm2,%ymm2
48  DB  197,228,87,219                      ; vxorps        %ymm3,%ymm3,%ymm3
49  DB  197,220,87,228                      ; vxorps        %ymm4,%ymm4,%ymm4
50  DB  197,212,87,237                      ; vxorps        %ymm5,%ymm5,%ymm5
51  DB  197,204,87,246                      ; vxorps        %ymm6,%ymm6,%ymm6
52  DB  197,196,87,255                      ; vxorps        %ymm7,%ymm7,%ymm7
53  DB  72,137,223                          ; mov           %rbx,%rdi
54  DB  76,137,230                          ; mov           %r12,%rsi
55  DB  76,137,242                          ; mov           %r14,%rdx
56  DB  65,255,215                          ; callq         *%r15
57  DB  72,141,123,8                        ; lea           0x8(%rbx),%rdi
58  DB  72,131,195,16                       ; add           $0x10,%rbx
59  DB  76,57,235                           ; cmp           %r13,%rbx
60  DB  72,137,251                          ; mov           %rdi,%rbx
61  DB  118,191                             ; jbe           75 <_sk_start_pipeline_hsw+0x75>
62  DB  76,137,233                          ; mov           %r13,%rcx
63  DB  72,41,249                           ; sub           %rdi,%rcx
64  DB  116,41                              ; je            e7 <_sk_start_pipeline_hsw+0xe7>
65  DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
66  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
67  DB  197,236,87,210                      ; vxorps        %ymm2,%ymm2,%ymm2
68  DB  197,228,87,219                      ; vxorps        %ymm3,%ymm3,%ymm3
69  DB  197,220,87,228                      ; vxorps        %ymm4,%ymm4,%ymm4
70  DB  197,212,87,237                      ; vxorps        %ymm5,%ymm5,%ymm5
71  DB  197,204,87,246                      ; vxorps        %ymm6,%ymm6,%ymm6
72  DB  197,196,87,255                      ; vxorps        %ymm7,%ymm7,%ymm7
73  DB  76,137,230                          ; mov           %r12,%rsi
74  DB  76,137,242                          ; mov           %r14,%rdx
75  DB  65,255,215                          ; callq         *%r15
76  DB  76,137,232                          ; mov           %r13,%rax
77  DB  197,248,40,52,36                    ; vmovaps       (%rsp),%xmm6
78  DB  197,248,40,124,36,16                ; vmovaps       0x10(%rsp),%xmm7
79  DB  197,120,40,68,36,32                 ; vmovaps       0x20(%rsp),%xmm8
80  DB  197,120,40,76,36,48                 ; vmovaps       0x30(%rsp),%xmm9
81  DB  197,120,40,84,36,64                 ; vmovaps       0x40(%rsp),%xmm10
82  DB  197,120,40,92,36,80                 ; vmovaps       0x50(%rsp),%xmm11
83  DB  197,120,40,100,36,96                ; vmovaps       0x60(%rsp),%xmm12
84  DB  197,120,40,108,36,112               ; vmovaps       0x70(%rsp),%xmm13
85  DB  197,120,40,180,36,128,0,0,0         ; vmovaps       0x80(%rsp),%xmm14
86  DB  197,120,40,188,36,144,0,0,0         ; vmovaps       0x90(%rsp),%xmm15
87  DB  72,129,196,160,0,0,0                ; add           $0xa0,%rsp
88  DB  91                                  ; pop           %rbx
89  DB  95                                  ; pop           %rdi
90  DB  94                                  ; pop           %rsi
91  DB  65,92                               ; pop           %r12
92  DB  65,93                               ; pop           %r13
93  DB  65,94                               ; pop           %r14
94  DB  65,95                               ; pop           %r15
95  DB  197,248,119                         ; vzeroupper
96  DB  195                                 ; retq
97
98PUBLIC _sk_just_return_hsw
99_sk_just_return_hsw LABEL PROC
100  DB  195                                 ; retq
101
102PUBLIC _sk_seed_shader_hsw
103_sk_seed_shader_hsw LABEL PROC
104  DB  72,173                              ; lods          %ds:(%rsi),%rax
105  DB  197,249,110,199                     ; vmovd         %edi,%xmm0
106  DB  196,226,125,88,192                  ; vpbroadcastd  %xmm0,%ymm0
107  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
108  DB  65,184,0,0,0,63                     ; mov           $0x3f000000,%r8d
109  DB  196,193,121,110,200                 ; vmovd         %r8d,%xmm1
110  DB  196,226,125,88,201                  ; vpbroadcastd  %xmm1,%ymm1
111  DB  197,252,88,193                      ; vaddps        %ymm1,%ymm0,%ymm0
112  DB  197,252,88,2                        ; vaddps        (%rdx),%ymm0,%ymm0
113  DB  196,226,125,24,16                   ; vbroadcastss  (%rax),%ymm2
114  DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
115  DB  197,236,88,201                      ; vaddps        %ymm1,%ymm2,%ymm1
116  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
117  DB  197,249,110,208                     ; vmovd         %eax,%xmm2
118  DB  196,226,125,88,210                  ; vpbroadcastd  %xmm2,%ymm2
119  DB  72,173                              ; lods          %ds:(%rsi),%rax
120  DB  197,228,87,219                      ; vxorps        %ymm3,%ymm3,%ymm3
121  DB  197,220,87,228                      ; vxorps        %ymm4,%ymm4,%ymm4
122  DB  197,212,87,237                      ; vxorps        %ymm5,%ymm5,%ymm5
123  DB  197,204,87,246                      ; vxorps        %ymm6,%ymm6,%ymm6
124  DB  197,196,87,255                      ; vxorps        %ymm7,%ymm7,%ymm7
125  DB  255,224                             ; jmpq          *%rax
126
127PUBLIC _sk_constant_color_hsw
128_sk_constant_color_hsw LABEL PROC
129  DB  72,173                              ; lods          %ds:(%rsi),%rax
130  DB  196,226,125,24,0                    ; vbroadcastss  (%rax),%ymm0
131  DB  196,226,125,24,72,4                 ; vbroadcastss  0x4(%rax),%ymm1
132  DB  196,226,125,24,80,8                 ; vbroadcastss  0x8(%rax),%ymm2
133  DB  196,226,125,24,88,12                ; vbroadcastss  0xc(%rax),%ymm3
134  DB  72,173                              ; lods          %ds:(%rsi),%rax
135  DB  255,224                             ; jmpq          *%rax
136
137PUBLIC _sk_clear_hsw
138_sk_clear_hsw LABEL PROC
139  DB  72,173                              ; lods          %ds:(%rsi),%rax
140  DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
141  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
142  DB  197,236,87,210                      ; vxorps        %ymm2,%ymm2,%ymm2
143  DB  197,228,87,219                      ; vxorps        %ymm3,%ymm3,%ymm3
144  DB  255,224                             ; jmpq          *%rax
145
146PUBLIC _sk_srcatop_hsw
147_sk_srcatop_hsw LABEL PROC
148  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
149  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
150  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
151  DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
152  DB  197,60,89,204                       ; vmulps        %ymm4,%ymm8,%ymm9
153  DB  196,194,69,168,193                  ; vfmadd213ps   %ymm9,%ymm7,%ymm0
154  DB  197,60,89,205                       ; vmulps        %ymm5,%ymm8,%ymm9
155  DB  196,194,69,168,201                  ; vfmadd213ps   %ymm9,%ymm7,%ymm1
156  DB  197,60,89,206                       ; vmulps        %ymm6,%ymm8,%ymm9
157  DB  196,194,69,168,209                  ; vfmadd213ps   %ymm9,%ymm7,%ymm2
158  DB  197,60,89,199                       ; vmulps        %ymm7,%ymm8,%ymm8
159  DB  196,194,69,168,216                  ; vfmadd213ps   %ymm8,%ymm7,%ymm3
160  DB  72,173                              ; lods          %ds:(%rsi),%rax
161  DB  255,224                             ; jmpq          *%rax
162
163PUBLIC _sk_dstatop_hsw
164_sk_dstatop_hsw LABEL PROC
165  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
166  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
167  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
168  DB  197,60,92,199                       ; vsubps        %ymm7,%ymm8,%ymm8
169  DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
170  DB  196,226,101,184,196                 ; vfmadd231ps   %ymm4,%ymm3,%ymm0
171  DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
172  DB  196,226,101,184,205                 ; vfmadd231ps   %ymm5,%ymm3,%ymm1
173  DB  197,188,89,210                      ; vmulps        %ymm2,%ymm8,%ymm2
174  DB  196,226,101,184,214                 ; vfmadd231ps   %ymm6,%ymm3,%ymm2
175  DB  197,60,89,195                       ; vmulps        %ymm3,%ymm8,%ymm8
176  DB  196,194,69,168,216                  ; vfmadd213ps   %ymm8,%ymm7,%ymm3
177  DB  72,173                              ; lods          %ds:(%rsi),%rax
178  DB  255,224                             ; jmpq          *%rax
179
180PUBLIC _sk_srcin_hsw
181_sk_srcin_hsw LABEL PROC
182  DB  197,252,89,199                      ; vmulps        %ymm7,%ymm0,%ymm0
183  DB  197,244,89,207                      ; vmulps        %ymm7,%ymm1,%ymm1
184  DB  197,236,89,215                      ; vmulps        %ymm7,%ymm2,%ymm2
185  DB  197,228,89,223                      ; vmulps        %ymm7,%ymm3,%ymm3
186  DB  72,173                              ; lods          %ds:(%rsi),%rax
187  DB  255,224                             ; jmpq          *%rax
188
189PUBLIC _sk_dstin_hsw
190_sk_dstin_hsw LABEL PROC
191  DB  197,228,89,196                      ; vmulps        %ymm4,%ymm3,%ymm0
192  DB  197,228,89,205                      ; vmulps        %ymm5,%ymm3,%ymm1
193  DB  197,228,89,214                      ; vmulps        %ymm6,%ymm3,%ymm2
194  DB  197,228,89,223                      ; vmulps        %ymm7,%ymm3,%ymm3
195  DB  72,173                              ; lods          %ds:(%rsi),%rax
196  DB  255,224                             ; jmpq          *%rax
197
198PUBLIC _sk_srcout_hsw
199_sk_srcout_hsw LABEL PROC
200  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
201  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
202  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
203  DB  197,60,92,199                       ; vsubps        %ymm7,%ymm8,%ymm8
204  DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
205  DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
206  DB  197,188,89,210                      ; vmulps        %ymm2,%ymm8,%ymm2
207  DB  197,188,89,219                      ; vmulps        %ymm3,%ymm8,%ymm3
208  DB  72,173                              ; lods          %ds:(%rsi),%rax
209  DB  255,224                             ; jmpq          *%rax
210
211PUBLIC _sk_dstout_hsw
212_sk_dstout_hsw LABEL PROC
213  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
214  DB  197,249,110,192                     ; vmovd         %eax,%xmm0
215  DB  196,226,125,88,192                  ; vpbroadcastd  %xmm0,%ymm0
216  DB  197,252,92,219                      ; vsubps        %ymm3,%ymm0,%ymm3
217  DB  197,228,89,196                      ; vmulps        %ymm4,%ymm3,%ymm0
218  DB  197,228,89,205                      ; vmulps        %ymm5,%ymm3,%ymm1
219  DB  197,228,89,214                      ; vmulps        %ymm6,%ymm3,%ymm2
220  DB  197,228,89,223                      ; vmulps        %ymm7,%ymm3,%ymm3
221  DB  72,173                              ; lods          %ds:(%rsi),%rax
222  DB  255,224                             ; jmpq          *%rax
223
224PUBLIC _sk_srcover_hsw
225_sk_srcover_hsw LABEL PROC
226  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
227  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
228  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
229  DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
230  DB  196,194,93,184,192                  ; vfmadd231ps   %ymm8,%ymm4,%ymm0
231  DB  196,194,85,184,200                  ; vfmadd231ps   %ymm8,%ymm5,%ymm1
232  DB  196,194,77,184,208                  ; vfmadd231ps   %ymm8,%ymm6,%ymm2
233  DB  196,194,69,184,216                  ; vfmadd231ps   %ymm8,%ymm7,%ymm3
234  DB  72,173                              ; lods          %ds:(%rsi),%rax
235  DB  255,224                             ; jmpq          *%rax
236
237PUBLIC _sk_dstover_hsw
238_sk_dstover_hsw LABEL PROC
239  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
240  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
241  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
242  DB  197,60,92,199                       ; vsubps        %ymm7,%ymm8,%ymm8
243  DB  196,226,61,168,196                  ; vfmadd213ps   %ymm4,%ymm8,%ymm0
244  DB  196,226,61,168,205                  ; vfmadd213ps   %ymm5,%ymm8,%ymm1
245  DB  196,226,61,168,214                  ; vfmadd213ps   %ymm6,%ymm8,%ymm2
246  DB  196,226,61,168,223                  ; vfmadd213ps   %ymm7,%ymm8,%ymm3
247  DB  72,173                              ; lods          %ds:(%rsi),%rax
248  DB  255,224                             ; jmpq          *%rax
249
250PUBLIC _sk_modulate_hsw
251_sk_modulate_hsw LABEL PROC
252  DB  197,252,89,196                      ; vmulps        %ymm4,%ymm0,%ymm0
253  DB  197,244,89,205                      ; vmulps        %ymm5,%ymm1,%ymm1
254  DB  197,236,89,214                      ; vmulps        %ymm6,%ymm2,%ymm2
255  DB  197,228,89,223                      ; vmulps        %ymm7,%ymm3,%ymm3
256  DB  72,173                              ; lods          %ds:(%rsi),%rax
257  DB  255,224                             ; jmpq          *%rax
258
259PUBLIC _sk_multiply_hsw
260_sk_multiply_hsw LABEL PROC
261  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
262  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
263  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
264  DB  197,60,92,207                       ; vsubps        %ymm7,%ymm8,%ymm9
265  DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
266  DB  197,60,89,212                       ; vmulps        %ymm4,%ymm8,%ymm10
267  DB  196,98,53,184,208                   ; vfmadd231ps   %ymm0,%ymm9,%ymm10
268  DB  196,194,93,168,194                  ; vfmadd213ps   %ymm10,%ymm4,%ymm0
269  DB  197,52,89,209                       ; vmulps        %ymm1,%ymm9,%ymm10
270  DB  196,98,61,184,213                   ; vfmadd231ps   %ymm5,%ymm8,%ymm10
271  DB  196,194,85,168,202                  ; vfmadd213ps   %ymm10,%ymm5,%ymm1
272  DB  197,52,89,210                       ; vmulps        %ymm2,%ymm9,%ymm10
273  DB  196,98,61,184,214                   ; vfmadd231ps   %ymm6,%ymm8,%ymm10
274  DB  196,194,77,168,210                  ; vfmadd213ps   %ymm10,%ymm6,%ymm2
275  DB  197,52,89,203                       ; vmulps        %ymm3,%ymm9,%ymm9
276  DB  196,66,69,168,193                   ; vfmadd213ps   %ymm9,%ymm7,%ymm8
277  DB  196,194,69,168,216                  ; vfmadd213ps   %ymm8,%ymm7,%ymm3
278  DB  72,173                              ; lods          %ds:(%rsi),%rax
279  DB  255,224                             ; jmpq          *%rax
280
281PUBLIC _sk_plus__hsw
282_sk_plus__hsw LABEL PROC
283  DB  197,252,88,196                      ; vaddps        %ymm4,%ymm0,%ymm0
284  DB  197,244,88,205                      ; vaddps        %ymm5,%ymm1,%ymm1
285  DB  197,236,88,214                      ; vaddps        %ymm6,%ymm2,%ymm2
286  DB  197,228,88,223                      ; vaddps        %ymm7,%ymm3,%ymm3
287  DB  72,173                              ; lods          %ds:(%rsi),%rax
288  DB  255,224                             ; jmpq          *%rax
289
290PUBLIC _sk_screen_hsw
291_sk_screen_hsw LABEL PROC
292  DB  197,124,88,196                      ; vaddps        %ymm4,%ymm0,%ymm8
293  DB  196,194,93,172,192                  ; vfnmadd213ps  %ymm8,%ymm4,%ymm0
294  DB  197,116,88,197                      ; vaddps        %ymm5,%ymm1,%ymm8
295  DB  196,194,85,172,200                  ; vfnmadd213ps  %ymm8,%ymm5,%ymm1
296  DB  197,108,88,198                      ; vaddps        %ymm6,%ymm2,%ymm8
297  DB  196,194,77,172,208                  ; vfnmadd213ps  %ymm8,%ymm6,%ymm2
298  DB  197,100,88,199                      ; vaddps        %ymm7,%ymm3,%ymm8
299  DB  196,194,69,172,216                  ; vfnmadd213ps  %ymm8,%ymm7,%ymm3
300  DB  72,173                              ; lods          %ds:(%rsi),%rax
301  DB  255,224                             ; jmpq          *%rax
302
303PUBLIC _sk_xor__hsw
304_sk_xor__hsw LABEL PROC
305  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
306  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
307  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
308  DB  197,60,92,207                       ; vsubps        %ymm7,%ymm8,%ymm9
309  DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
310  DB  197,60,89,212                       ; vmulps        %ymm4,%ymm8,%ymm10
311  DB  196,194,53,168,194                  ; vfmadd213ps   %ymm10,%ymm9,%ymm0
312  DB  197,180,89,201                      ; vmulps        %ymm1,%ymm9,%ymm1
313  DB  196,226,61,184,205                  ; vfmadd231ps   %ymm5,%ymm8,%ymm1
314  DB  197,180,89,210                      ; vmulps        %ymm2,%ymm9,%ymm2
315  DB  196,226,61,184,214                  ; vfmadd231ps   %ymm6,%ymm8,%ymm2
316  DB  197,180,89,219                      ; vmulps        %ymm3,%ymm9,%ymm3
317  DB  196,98,69,168,195                   ; vfmadd213ps   %ymm3,%ymm7,%ymm8
318  DB  72,173                              ; lods          %ds:(%rsi),%rax
319  DB  197,124,41,195                      ; vmovaps       %ymm8,%ymm3
320  DB  255,224                             ; jmpq          *%rax
321
322PUBLIC _sk_darken_hsw
323_sk_darken_hsw LABEL PROC
324  DB  197,124,88,196                      ; vaddps        %ymm4,%ymm0,%ymm8
325  DB  197,252,89,199                      ; vmulps        %ymm7,%ymm0,%ymm0
326  DB  197,100,89,204                      ; vmulps        %ymm4,%ymm3,%ymm9
327  DB  196,193,124,95,193                  ; vmaxps        %ymm9,%ymm0,%ymm0
328  DB  197,188,92,192                      ; vsubps        %ymm0,%ymm8,%ymm0
329  DB  197,116,88,197                      ; vaddps        %ymm5,%ymm1,%ymm8
330  DB  197,244,89,207                      ; vmulps        %ymm7,%ymm1,%ymm1
331  DB  197,100,89,205                      ; vmulps        %ymm5,%ymm3,%ymm9
332  DB  196,193,116,95,201                  ; vmaxps        %ymm9,%ymm1,%ymm1
333  DB  197,188,92,201                      ; vsubps        %ymm1,%ymm8,%ymm1
334  DB  197,108,88,198                      ; vaddps        %ymm6,%ymm2,%ymm8
335  DB  197,236,89,215                      ; vmulps        %ymm7,%ymm2,%ymm2
336  DB  197,100,89,206                      ; vmulps        %ymm6,%ymm3,%ymm9
337  DB  196,193,108,95,209                  ; vmaxps        %ymm9,%ymm2,%ymm2
338  DB  197,188,92,210                      ; vsubps        %ymm2,%ymm8,%ymm2
339  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
340  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
341  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
342  DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
343  DB  196,194,69,184,216                  ; vfmadd231ps   %ymm8,%ymm7,%ymm3
344  DB  72,173                              ; lods          %ds:(%rsi),%rax
345  DB  255,224                             ; jmpq          *%rax
346
347PUBLIC _sk_lighten_hsw
348_sk_lighten_hsw LABEL PROC
349  DB  197,124,88,196                      ; vaddps        %ymm4,%ymm0,%ymm8
350  DB  197,252,89,199                      ; vmulps        %ymm7,%ymm0,%ymm0
351  DB  197,100,89,204                      ; vmulps        %ymm4,%ymm3,%ymm9
352  DB  196,193,124,93,193                  ; vminps        %ymm9,%ymm0,%ymm0
353  DB  197,188,92,192                      ; vsubps        %ymm0,%ymm8,%ymm0
354  DB  197,116,88,197                      ; vaddps        %ymm5,%ymm1,%ymm8
355  DB  197,244,89,207                      ; vmulps        %ymm7,%ymm1,%ymm1
356  DB  197,100,89,205                      ; vmulps        %ymm5,%ymm3,%ymm9
357  DB  196,193,116,93,201                  ; vminps        %ymm9,%ymm1,%ymm1
358  DB  197,188,92,201                      ; vsubps        %ymm1,%ymm8,%ymm1
359  DB  197,108,88,198                      ; vaddps        %ymm6,%ymm2,%ymm8
360  DB  197,236,89,215                      ; vmulps        %ymm7,%ymm2,%ymm2
361  DB  197,100,89,206                      ; vmulps        %ymm6,%ymm3,%ymm9
362  DB  196,193,108,93,209                  ; vminps        %ymm9,%ymm2,%ymm2
363  DB  197,188,92,210                      ; vsubps        %ymm2,%ymm8,%ymm2
364  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
365  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
366  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
367  DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
368  DB  196,194,69,184,216                  ; vfmadd231ps   %ymm8,%ymm7,%ymm3
369  DB  72,173                              ; lods          %ds:(%rsi),%rax
370  DB  255,224                             ; jmpq          *%rax
371
372PUBLIC _sk_difference_hsw
373_sk_difference_hsw LABEL PROC
374  DB  197,124,88,196                      ; vaddps        %ymm4,%ymm0,%ymm8
375  DB  197,252,89,199                      ; vmulps        %ymm7,%ymm0,%ymm0
376  DB  197,100,89,204                      ; vmulps        %ymm4,%ymm3,%ymm9
377  DB  196,193,124,93,193                  ; vminps        %ymm9,%ymm0,%ymm0
378  DB  197,252,88,192                      ; vaddps        %ymm0,%ymm0,%ymm0
379  DB  197,188,92,192                      ; vsubps        %ymm0,%ymm8,%ymm0
380  DB  197,116,88,197                      ; vaddps        %ymm5,%ymm1,%ymm8
381  DB  197,244,89,207                      ; vmulps        %ymm7,%ymm1,%ymm1
382  DB  197,100,89,205                      ; vmulps        %ymm5,%ymm3,%ymm9
383  DB  196,193,116,93,201                  ; vminps        %ymm9,%ymm1,%ymm1
384  DB  197,244,88,201                      ; vaddps        %ymm1,%ymm1,%ymm1
385  DB  197,188,92,201                      ; vsubps        %ymm1,%ymm8,%ymm1
386  DB  197,108,88,198                      ; vaddps        %ymm6,%ymm2,%ymm8
387  DB  197,236,89,215                      ; vmulps        %ymm7,%ymm2,%ymm2
388  DB  197,100,89,206                      ; vmulps        %ymm6,%ymm3,%ymm9
389  DB  196,193,108,93,209                  ; vminps        %ymm9,%ymm2,%ymm2
390  DB  197,236,88,210                      ; vaddps        %ymm2,%ymm2,%ymm2
391  DB  197,188,92,210                      ; vsubps        %ymm2,%ymm8,%ymm2
392  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
393  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
394  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
395  DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
396  DB  196,194,69,184,216                  ; vfmadd231ps   %ymm8,%ymm7,%ymm3
397  DB  72,173                              ; lods          %ds:(%rsi),%rax
398  DB  255,224                             ; jmpq          *%rax
399
400PUBLIC _sk_exclusion_hsw
401_sk_exclusion_hsw LABEL PROC
402  DB  197,124,88,196                      ; vaddps        %ymm4,%ymm0,%ymm8
403  DB  197,252,89,196                      ; vmulps        %ymm4,%ymm0,%ymm0
404  DB  197,252,88,192                      ; vaddps        %ymm0,%ymm0,%ymm0
405  DB  197,188,92,192                      ; vsubps        %ymm0,%ymm8,%ymm0
406  DB  197,116,88,197                      ; vaddps        %ymm5,%ymm1,%ymm8
407  DB  197,244,89,205                      ; vmulps        %ymm5,%ymm1,%ymm1
408  DB  197,244,88,201                      ; vaddps        %ymm1,%ymm1,%ymm1
409  DB  197,188,92,201                      ; vsubps        %ymm1,%ymm8,%ymm1
410  DB  197,108,88,198                      ; vaddps        %ymm6,%ymm2,%ymm8
411  DB  197,236,89,214                      ; vmulps        %ymm6,%ymm2,%ymm2
412  DB  197,236,88,210                      ; vaddps        %ymm2,%ymm2,%ymm2
413  DB  197,188,92,210                      ; vsubps        %ymm2,%ymm8,%ymm2
414  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
415  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
416  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
417  DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
418  DB  196,194,69,184,216                  ; vfmadd231ps   %ymm8,%ymm7,%ymm3
419  DB  72,173                              ; lods          %ds:(%rsi),%rax
420  DB  255,224                             ; jmpq          *%rax
421
422PUBLIC _sk_colorburn_hsw
423_sk_colorburn_hsw LABEL PROC
424  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
425  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
426  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
427  DB  197,60,92,207                       ; vsubps        %ymm7,%ymm8,%ymm9
428  DB  197,52,89,216                       ; vmulps        %ymm0,%ymm9,%ymm11
429  DB  196,65,44,87,210                    ; vxorps        %ymm10,%ymm10,%ymm10
430  DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
431  DB  197,60,89,228                       ; vmulps        %ymm4,%ymm8,%ymm12
432  DB  197,68,92,236                       ; vsubps        %ymm4,%ymm7,%ymm13
433  DB  197,20,89,235                       ; vmulps        %ymm3,%ymm13,%ymm13
434  DB  197,20,94,232                       ; vdivps        %ymm0,%ymm13,%ymm13
435  DB  196,65,68,93,237                    ; vminps        %ymm13,%ymm7,%ymm13
436  DB  196,65,68,92,237                    ; vsubps        %ymm13,%ymm7,%ymm13
437  DB  196,66,101,168,235                  ; vfmadd213ps   %ymm11,%ymm3,%ymm13
438  DB  196,65,28,88,237                    ; vaddps        %ymm13,%ymm12,%ymm13
439  DB  197,28,88,224                       ; vaddps        %ymm0,%ymm12,%ymm12
440  DB  196,193,124,194,194,0               ; vcmpeqps      %ymm10,%ymm0,%ymm0
441  DB  196,195,21,74,196,0                 ; vblendvps     %ymm0,%ymm12,%ymm13,%ymm0
442  DB  197,92,194,231,0                    ; vcmpeqps      %ymm7,%ymm4,%ymm12
443  DB  197,36,88,220                       ; vaddps        %ymm4,%ymm11,%ymm11
444  DB  196,195,125,74,195,192              ; vblendvps     %ymm12,%ymm11,%ymm0,%ymm0
445  DB  197,52,89,217                       ; vmulps        %ymm1,%ymm9,%ymm11
446  DB  197,60,89,229                       ; vmulps        %ymm5,%ymm8,%ymm12
447  DB  197,68,92,237                       ; vsubps        %ymm5,%ymm7,%ymm13
448  DB  197,20,89,235                       ; vmulps        %ymm3,%ymm13,%ymm13
449  DB  197,20,94,233                       ; vdivps        %ymm1,%ymm13,%ymm13
450  DB  196,65,68,93,237                    ; vminps        %ymm13,%ymm7,%ymm13
451  DB  196,65,68,92,237                    ; vsubps        %ymm13,%ymm7,%ymm13
452  DB  196,66,101,168,235                  ; vfmadd213ps   %ymm11,%ymm3,%ymm13
453  DB  196,65,28,88,237                    ; vaddps        %ymm13,%ymm12,%ymm13
454  DB  197,28,88,225                       ; vaddps        %ymm1,%ymm12,%ymm12
455  DB  196,193,116,194,202,0               ; vcmpeqps      %ymm10,%ymm1,%ymm1
456  DB  196,195,21,74,204,16                ; vblendvps     %ymm1,%ymm12,%ymm13,%ymm1
457  DB  197,84,194,231,0                    ; vcmpeqps      %ymm7,%ymm5,%ymm12
458  DB  197,36,88,221                       ; vaddps        %ymm5,%ymm11,%ymm11
459  DB  196,195,117,74,203,192              ; vblendvps     %ymm12,%ymm11,%ymm1,%ymm1
460  DB  197,52,89,202                       ; vmulps        %ymm2,%ymm9,%ymm9
461  DB  196,65,108,194,210,0                ; vcmpeqps      %ymm10,%ymm2,%ymm10
462  DB  197,60,89,222                       ; vmulps        %ymm6,%ymm8,%ymm11
463  DB  197,68,92,230                       ; vsubps        %ymm6,%ymm7,%ymm12
464  DB  197,28,89,227                       ; vmulps        %ymm3,%ymm12,%ymm12
465  DB  197,28,94,226                       ; vdivps        %ymm2,%ymm12,%ymm12
466  DB  197,164,88,210                      ; vaddps        %ymm2,%ymm11,%ymm2
467  DB  196,65,68,93,228                    ; vminps        %ymm12,%ymm7,%ymm12
468  DB  196,65,68,92,228                    ; vsubps        %ymm12,%ymm7,%ymm12
469  DB  196,66,101,168,225                  ; vfmadd213ps   %ymm9,%ymm3,%ymm12
470  DB  196,65,36,88,220                    ; vaddps        %ymm12,%ymm11,%ymm11
471  DB  196,227,37,74,210,160               ; vblendvps     %ymm10,%ymm2,%ymm11,%ymm2
472  DB  197,76,194,215,0                    ; vcmpeqps      %ymm7,%ymm6,%ymm10
473  DB  197,52,88,206                       ; vaddps        %ymm6,%ymm9,%ymm9
474  DB  196,195,109,74,209,160              ; vblendvps     %ymm10,%ymm9,%ymm2,%ymm2
475  DB  196,194,69,184,216                  ; vfmadd231ps   %ymm8,%ymm7,%ymm3
476  DB  72,173                              ; lods          %ds:(%rsi),%rax
477  DB  255,224                             ; jmpq          *%rax
478
479PUBLIC _sk_colordodge_hsw
480_sk_colordodge_hsw LABEL PROC
481  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
482  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
483  DB  197,121,110,200                     ; vmovd         %eax,%xmm9
484  DB  196,66,125,88,201                   ; vpbroadcastd  %xmm9,%ymm9
485  DB  197,52,92,215                       ; vsubps        %ymm7,%ymm9,%ymm10
486  DB  197,44,89,216                       ; vmulps        %ymm0,%ymm10,%ymm11
487  DB  197,52,92,203                       ; vsubps        %ymm3,%ymm9,%ymm9
488  DB  197,100,89,228                      ; vmulps        %ymm4,%ymm3,%ymm12
489  DB  197,100,92,232                      ; vsubps        %ymm0,%ymm3,%ymm13
490  DB  196,65,28,94,229                    ; vdivps        %ymm13,%ymm12,%ymm12
491  DB  197,52,89,236                       ; vmulps        %ymm4,%ymm9,%ymm13
492  DB  196,65,68,93,228                    ; vminps        %ymm12,%ymm7,%ymm12
493  DB  196,66,101,168,227                  ; vfmadd213ps   %ymm11,%ymm3,%ymm12
494  DB  196,65,20,88,228                    ; vaddps        %ymm12,%ymm13,%ymm12
495  DB  197,20,88,232                       ; vaddps        %ymm0,%ymm13,%ymm13
496  DB  197,252,194,195,0                   ; vcmpeqps      %ymm3,%ymm0,%ymm0
497  DB  196,195,29,74,197,0                 ; vblendvps     %ymm0,%ymm13,%ymm12,%ymm0
498  DB  196,65,92,194,224,0                 ; vcmpeqps      %ymm8,%ymm4,%ymm12
499  DB  197,36,88,220                       ; vaddps        %ymm4,%ymm11,%ymm11
500  DB  196,195,125,74,195,192              ; vblendvps     %ymm12,%ymm11,%ymm0,%ymm0
501  DB  197,44,89,217                       ; vmulps        %ymm1,%ymm10,%ymm11
502  DB  197,100,89,229                      ; vmulps        %ymm5,%ymm3,%ymm12
503  DB  197,100,92,233                      ; vsubps        %ymm1,%ymm3,%ymm13
504  DB  196,65,28,94,229                    ; vdivps        %ymm13,%ymm12,%ymm12
505  DB  197,52,89,237                       ; vmulps        %ymm5,%ymm9,%ymm13
506  DB  196,65,68,93,228                    ; vminps        %ymm12,%ymm7,%ymm12
507  DB  196,66,101,168,227                  ; vfmadd213ps   %ymm11,%ymm3,%ymm12
508  DB  196,65,20,88,228                    ; vaddps        %ymm12,%ymm13,%ymm12
509  DB  197,20,88,233                       ; vaddps        %ymm1,%ymm13,%ymm13
510  DB  197,244,194,203,0                   ; vcmpeqps      %ymm3,%ymm1,%ymm1
511  DB  196,195,29,74,205,16                ; vblendvps     %ymm1,%ymm13,%ymm12,%ymm1
512  DB  196,65,84,194,224,0                 ; vcmpeqps      %ymm8,%ymm5,%ymm12
513  DB  197,36,88,221                       ; vaddps        %ymm5,%ymm11,%ymm11
514  DB  196,195,117,74,203,192              ; vblendvps     %ymm12,%ymm11,%ymm1,%ymm1
515  DB  197,44,89,210                       ; vmulps        %ymm2,%ymm10,%ymm10
516  DB  197,100,89,222                      ; vmulps        %ymm6,%ymm3,%ymm11
517  DB  197,100,92,226                      ; vsubps        %ymm2,%ymm3,%ymm12
518  DB  196,65,36,94,220                    ; vdivps        %ymm12,%ymm11,%ymm11
519  DB  197,52,89,230                       ; vmulps        %ymm6,%ymm9,%ymm12
520  DB  196,65,68,93,219                    ; vminps        %ymm11,%ymm7,%ymm11
521  DB  196,66,101,168,218                  ; vfmadd213ps   %ymm10,%ymm3,%ymm11
522  DB  196,65,28,88,219                    ; vaddps        %ymm11,%ymm12,%ymm11
523  DB  197,28,88,226                       ; vaddps        %ymm2,%ymm12,%ymm12
524  DB  197,236,194,211,0                   ; vcmpeqps      %ymm3,%ymm2,%ymm2
525  DB  196,195,37,74,212,32                ; vblendvps     %ymm2,%ymm12,%ymm11,%ymm2
526  DB  196,65,76,194,192,0                 ; vcmpeqps      %ymm8,%ymm6,%ymm8
527  DB  197,44,88,214                       ; vaddps        %ymm6,%ymm10,%ymm10
528  DB  196,195,109,74,210,128              ; vblendvps     %ymm8,%ymm10,%ymm2,%ymm2
529  DB  196,194,69,184,217                  ; vfmadd231ps   %ymm9,%ymm7,%ymm3
530  DB  72,173                              ; lods          %ds:(%rsi),%rax
531  DB  255,224                             ; jmpq          *%rax
532
533PUBLIC _sk_hardlight_hsw
534_sk_hardlight_hsw LABEL PROC
535  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
536  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
537  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
538  DB  197,60,92,215                       ; vsubps        %ymm7,%ymm8,%ymm10
539  DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
540  DB  197,60,89,220                       ; vmulps        %ymm4,%ymm8,%ymm11
541  DB  196,98,45,184,216                   ; vfmadd231ps   %ymm0,%ymm10,%ymm11
542  DB  197,124,88,200                      ; vaddps        %ymm0,%ymm0,%ymm9
543  DB  197,52,194,227,2                    ; vcmpleps      %ymm3,%ymm9,%ymm12
544  DB  197,124,89,204                      ; vmulps        %ymm4,%ymm0,%ymm9
545  DB  196,65,52,88,233                    ; vaddps        %ymm9,%ymm9,%ymm13
546  DB  197,100,89,207                      ; vmulps        %ymm7,%ymm3,%ymm9
547  DB  197,68,92,244                       ; vsubps        %ymm4,%ymm7,%ymm14
548  DB  197,228,92,192                      ; vsubps        %ymm0,%ymm3,%ymm0
549  DB  196,193,124,89,198                  ; vmulps        %ymm14,%ymm0,%ymm0
550  DB  197,252,88,192                      ; vaddps        %ymm0,%ymm0,%ymm0
551  DB  197,180,92,192                      ; vsubps        %ymm0,%ymm9,%ymm0
552  DB  196,195,125,74,197,192              ; vblendvps     %ymm12,%ymm13,%ymm0,%ymm0
553  DB  196,193,124,88,195                  ; vaddps        %ymm11,%ymm0,%ymm0
554  DB  197,44,89,217                       ; vmulps        %ymm1,%ymm10,%ymm11
555  DB  196,98,61,184,221                   ; vfmadd231ps   %ymm5,%ymm8,%ymm11
556  DB  197,116,88,225                      ; vaddps        %ymm1,%ymm1,%ymm12
557  DB  197,28,194,227,2                    ; vcmpleps      %ymm3,%ymm12,%ymm12
558  DB  197,116,89,237                      ; vmulps        %ymm5,%ymm1,%ymm13
559  DB  196,65,20,88,237                    ; vaddps        %ymm13,%ymm13,%ymm13
560  DB  197,68,92,245                       ; vsubps        %ymm5,%ymm7,%ymm14
561  DB  197,228,92,201                      ; vsubps        %ymm1,%ymm3,%ymm1
562  DB  196,193,116,89,206                  ; vmulps        %ymm14,%ymm1,%ymm1
563  DB  197,244,88,201                      ; vaddps        %ymm1,%ymm1,%ymm1
564  DB  197,180,92,201                      ; vsubps        %ymm1,%ymm9,%ymm1
565  DB  196,195,117,74,205,192              ; vblendvps     %ymm12,%ymm13,%ymm1,%ymm1
566  DB  196,193,116,88,203                  ; vaddps        %ymm11,%ymm1,%ymm1
567  DB  197,44,89,210                       ; vmulps        %ymm2,%ymm10,%ymm10
568  DB  196,98,61,184,214                   ; vfmadd231ps   %ymm6,%ymm8,%ymm10
569  DB  197,108,88,218                      ; vaddps        %ymm2,%ymm2,%ymm11
570  DB  197,36,194,219,2                    ; vcmpleps      %ymm3,%ymm11,%ymm11
571  DB  197,108,89,230                      ; vmulps        %ymm6,%ymm2,%ymm12
572  DB  196,65,28,88,228                    ; vaddps        %ymm12,%ymm12,%ymm12
573  DB  197,68,92,238                       ; vsubps        %ymm6,%ymm7,%ymm13
574  DB  197,228,92,210                      ; vsubps        %ymm2,%ymm3,%ymm2
575  DB  196,193,108,89,213                  ; vmulps        %ymm13,%ymm2,%ymm2
576  DB  197,236,88,210                      ; vaddps        %ymm2,%ymm2,%ymm2
577  DB  197,180,92,210                      ; vsubps        %ymm2,%ymm9,%ymm2
578  DB  196,195,109,74,212,176              ; vblendvps     %ymm11,%ymm12,%ymm2,%ymm2
579  DB  196,193,108,88,210                  ; vaddps        %ymm10,%ymm2,%ymm2
580  DB  196,194,69,184,216                  ; vfmadd231ps   %ymm8,%ymm7,%ymm3
581  DB  72,173                              ; lods          %ds:(%rsi),%rax
582  DB  255,224                             ; jmpq          *%rax
583
584PUBLIC _sk_overlay_hsw
585_sk_overlay_hsw LABEL PROC
586  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
587  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
588  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
589  DB  197,60,92,215                       ; vsubps        %ymm7,%ymm8,%ymm10
590  DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
591  DB  197,60,89,220                       ; vmulps        %ymm4,%ymm8,%ymm11
592  DB  196,98,45,184,216                   ; vfmadd231ps   %ymm0,%ymm10,%ymm11
593  DB  197,92,88,204                       ; vaddps        %ymm4,%ymm4,%ymm9
594  DB  197,52,194,231,2                    ; vcmpleps      %ymm7,%ymm9,%ymm12
595  DB  197,124,89,204                      ; vmulps        %ymm4,%ymm0,%ymm9
596  DB  196,65,52,88,233                    ; vaddps        %ymm9,%ymm9,%ymm13
597  DB  197,100,89,207                      ; vmulps        %ymm7,%ymm3,%ymm9
598  DB  197,68,92,244                       ; vsubps        %ymm4,%ymm7,%ymm14
599  DB  197,228,92,192                      ; vsubps        %ymm0,%ymm3,%ymm0
600  DB  196,193,124,89,198                  ; vmulps        %ymm14,%ymm0,%ymm0
601  DB  197,252,88,192                      ; vaddps        %ymm0,%ymm0,%ymm0
602  DB  197,180,92,192                      ; vsubps        %ymm0,%ymm9,%ymm0
603  DB  196,195,125,74,197,192              ; vblendvps     %ymm12,%ymm13,%ymm0,%ymm0
604  DB  196,193,124,88,195                  ; vaddps        %ymm11,%ymm0,%ymm0
605  DB  197,44,89,217                       ; vmulps        %ymm1,%ymm10,%ymm11
606  DB  196,98,61,184,221                   ; vfmadd231ps   %ymm5,%ymm8,%ymm11
607  DB  197,84,88,229                       ; vaddps        %ymm5,%ymm5,%ymm12
608  DB  197,28,194,231,2                    ; vcmpleps      %ymm7,%ymm12,%ymm12
609  DB  197,116,89,237                      ; vmulps        %ymm5,%ymm1,%ymm13
610  DB  196,65,20,88,237                    ; vaddps        %ymm13,%ymm13,%ymm13
611  DB  197,68,92,245                       ; vsubps        %ymm5,%ymm7,%ymm14
612  DB  197,228,92,201                      ; vsubps        %ymm1,%ymm3,%ymm1
613  DB  196,193,116,89,206                  ; vmulps        %ymm14,%ymm1,%ymm1
614  DB  197,244,88,201                      ; vaddps        %ymm1,%ymm1,%ymm1
615  DB  197,180,92,201                      ; vsubps        %ymm1,%ymm9,%ymm1
616  DB  196,195,117,74,205,192              ; vblendvps     %ymm12,%ymm13,%ymm1,%ymm1
617  DB  196,193,116,88,203                  ; vaddps        %ymm11,%ymm1,%ymm1
618  DB  197,44,89,210                       ; vmulps        %ymm2,%ymm10,%ymm10
619  DB  196,98,61,184,214                   ; vfmadd231ps   %ymm6,%ymm8,%ymm10
620  DB  197,76,88,222                       ; vaddps        %ymm6,%ymm6,%ymm11
621  DB  197,36,194,223,2                    ; vcmpleps      %ymm7,%ymm11,%ymm11
622  DB  197,108,89,230                      ; vmulps        %ymm6,%ymm2,%ymm12
623  DB  196,65,28,88,228                    ; vaddps        %ymm12,%ymm12,%ymm12
624  DB  197,68,92,238                       ; vsubps        %ymm6,%ymm7,%ymm13
625  DB  197,228,92,210                      ; vsubps        %ymm2,%ymm3,%ymm2
626  DB  196,193,108,89,213                  ; vmulps        %ymm13,%ymm2,%ymm2
627  DB  197,236,88,210                      ; vaddps        %ymm2,%ymm2,%ymm2
628  DB  197,180,92,210                      ; vsubps        %ymm2,%ymm9,%ymm2
629  DB  196,195,109,74,212,176              ; vblendvps     %ymm11,%ymm12,%ymm2,%ymm2
630  DB  196,193,108,88,210                  ; vaddps        %ymm10,%ymm2,%ymm2
631  DB  196,194,69,184,216                  ; vfmadd231ps   %ymm8,%ymm7,%ymm3
632  DB  72,173                              ; lods          %ds:(%rsi),%rax
633  DB  255,224                             ; jmpq          *%rax
634
635PUBLIC _sk_softlight_hsw
636_sk_softlight_hsw LABEL PROC
637  DB  72,131,236,56                       ; sub           $0x38,%rsp
638  DB  197,252,17,20,36                    ; vmovups       %ymm2,(%rsp)
639  DB  196,65,44,87,210                    ; vxorps        %ymm10,%ymm10,%ymm10
640  DB  197,44,194,223,1                    ; vcmpltps      %ymm7,%ymm10,%ymm11
641  DB  197,92,94,199                       ; vdivps        %ymm7,%ymm4,%ymm8
642  DB  196,67,45,74,224,176                ; vblendvps     %ymm11,%ymm8,%ymm10,%ymm12
643  DB  196,65,28,88,196                    ; vaddps        %ymm12,%ymm12,%ymm8
644  DB  196,65,60,88,232                    ; vaddps        %ymm8,%ymm8,%ymm13
645  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
646  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
647  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
648  DB  196,66,21,168,237                   ; vfmadd213ps   %ymm13,%ymm13,%ymm13
649  DB  196,65,28,92,240                    ; vsubps        %ymm8,%ymm12,%ymm14
650  DB  184,0,0,224,64                      ; mov           $0x40e00000,%eax
651  DB  197,121,110,200                     ; vmovd         %eax,%xmm9
652  DB  196,66,125,88,201                   ; vpbroadcastd  %xmm9,%ymm9
653  DB  196,65,28,89,249                    ; vmulps        %ymm9,%ymm12,%ymm15
654  DB  196,66,21,184,254                   ; vfmadd231ps   %ymm14,%ymm13,%ymm15
655  DB  196,65,124,82,236                   ; vrsqrtps      %ymm12,%ymm13
656  DB  196,65,124,83,237                   ; vrcpps        %ymm13,%ymm13
657  DB  196,65,20,92,236                    ; vsubps        %ymm12,%ymm13,%ymm13
658  DB  197,92,88,244                       ; vaddps        %ymm4,%ymm4,%ymm14
659  DB  196,65,12,88,246                    ; vaddps        %ymm14,%ymm14,%ymm14
660  DB  197,12,194,247,2                    ; vcmpleps      %ymm7,%ymm14,%ymm14
661  DB  196,67,21,74,239,224                ; vblendvps     %ymm14,%ymm15,%ymm13,%ymm13
662  DB  197,124,88,240                      ; vaddps        %ymm0,%ymm0,%ymm14
663  DB  197,12,92,251                       ; vsubps        %ymm3,%ymm14,%ymm15
664  DB  196,65,60,92,228                    ; vsubps        %ymm12,%ymm8,%ymm12
665  DB  196,98,5,168,227                    ; vfmadd213ps   %ymm3,%ymm15,%ymm12
666  DB  197,28,89,228                       ; vmulps        %ymm4,%ymm12,%ymm12
667  DB  197,4,89,255                        ; vmulps        %ymm7,%ymm15,%ymm15
668  DB  196,65,4,89,237                     ; vmulps        %ymm13,%ymm15,%ymm13
669  DB  196,98,101,184,236                  ; vfmadd231ps   %ymm4,%ymm3,%ymm13
670  DB  197,12,194,243,2                    ; vcmpleps      %ymm3,%ymm14,%ymm14
671  DB  196,195,21,74,212,224               ; vblendvps     %ymm14,%ymm12,%ymm13,%ymm2
672  DB  197,84,94,239                       ; vdivps        %ymm7,%ymm5,%ymm13
673  DB  196,67,45,74,237,176                ; vblendvps     %ymm11,%ymm13,%ymm10,%ymm13
674  DB  196,65,20,88,245                    ; vaddps        %ymm13,%ymm13,%ymm14
675  DB  196,65,12,88,246                    ; vaddps        %ymm14,%ymm14,%ymm14
676  DB  196,66,13,168,246                   ; vfmadd213ps   %ymm14,%ymm14,%ymm14
677  DB  196,65,20,92,248                    ; vsubps        %ymm8,%ymm13,%ymm15
678  DB  196,65,4,89,246                     ; vmulps        %ymm14,%ymm15,%ymm14
679  DB  196,66,53,184,245                   ; vfmadd231ps   %ymm13,%ymm9,%ymm14
680  DB  196,65,124,82,253                   ; vrsqrtps      %ymm13,%ymm15
681  DB  196,65,124,83,255                   ; vrcpps        %ymm15,%ymm15
682  DB  196,65,4,92,253                     ; vsubps        %ymm13,%ymm15,%ymm15
683  DB  197,84,88,229                       ; vaddps        %ymm5,%ymm5,%ymm12
684  DB  196,65,28,88,228                    ; vaddps        %ymm12,%ymm12,%ymm12
685  DB  197,28,194,231,2                    ; vcmpleps      %ymm7,%ymm12,%ymm12
686  DB  196,67,5,74,230,192                 ; vblendvps     %ymm12,%ymm14,%ymm15,%ymm12
687  DB  197,116,88,241                      ; vaddps        %ymm1,%ymm1,%ymm14
688  DB  196,65,60,92,237                    ; vsubps        %ymm13,%ymm8,%ymm13
689  DB  197,12,92,251                       ; vsubps        %ymm3,%ymm14,%ymm15
690  DB  196,98,5,168,235                    ; vfmadd213ps   %ymm3,%ymm15,%ymm13
691  DB  197,4,89,255                        ; vmulps        %ymm7,%ymm15,%ymm15
692  DB  196,65,4,89,228                     ; vmulps        %ymm12,%ymm15,%ymm12
693  DB  197,20,89,237                       ; vmulps        %ymm5,%ymm13,%ymm13
694  DB  196,98,101,184,229                  ; vfmadd231ps   %ymm5,%ymm3,%ymm12
695  DB  197,12,194,243,2                    ; vcmpleps      %ymm3,%ymm14,%ymm14
696  DB  196,67,29,74,237,224                ; vblendvps     %ymm14,%ymm13,%ymm12,%ymm13
697  DB  197,76,94,231                       ; vdivps        %ymm7,%ymm6,%ymm12
698  DB  196,67,45,74,212,176                ; vblendvps     %ymm11,%ymm12,%ymm10,%ymm10
699  DB  196,65,44,88,218                    ; vaddps        %ymm10,%ymm10,%ymm11
700  DB  196,65,36,88,219                    ; vaddps        %ymm11,%ymm11,%ymm11
701  DB  196,66,37,168,219                   ; vfmadd213ps   %ymm11,%ymm11,%ymm11
702  DB  196,65,44,92,224                    ; vsubps        %ymm8,%ymm10,%ymm12
703  DB  196,65,28,89,219                    ; vmulps        %ymm11,%ymm12,%ymm11
704  DB  196,66,45,168,203                   ; vfmadd213ps   %ymm11,%ymm10,%ymm9
705  DB  196,65,124,82,218                   ; vrsqrtps      %ymm10,%ymm11
706  DB  196,65,124,83,219                   ; vrcpps        %ymm11,%ymm11
707  DB  196,65,36,92,218                    ; vsubps        %ymm10,%ymm11,%ymm11
708  DB  197,76,88,230                       ; vaddps        %ymm6,%ymm6,%ymm12
709  DB  196,65,28,88,228                    ; vaddps        %ymm12,%ymm12,%ymm12
710  DB  197,28,194,231,2                    ; vcmpleps      %ymm7,%ymm12,%ymm12
711  DB  196,67,37,74,201,192                ; vblendvps     %ymm12,%ymm9,%ymm11,%ymm9
712  DB  197,124,16,52,36                    ; vmovups       (%rsp),%ymm14
713  DB  196,65,12,88,222                    ; vaddps        %ymm14,%ymm14,%ymm11
714  DB  197,36,92,227                       ; vsubps        %ymm3,%ymm11,%ymm12
715  DB  196,65,60,92,210                    ; vsubps        %ymm10,%ymm8,%ymm10
716  DB  196,98,29,168,211                   ; vfmadd213ps   %ymm3,%ymm12,%ymm10
717  DB  197,28,89,231                       ; vmulps        %ymm7,%ymm12,%ymm12
718  DB  196,65,28,89,201                    ; vmulps        %ymm9,%ymm12,%ymm9
719  DB  197,44,89,214                       ; vmulps        %ymm6,%ymm10,%ymm10
720  DB  196,98,101,184,206                  ; vfmadd231ps   %ymm6,%ymm3,%ymm9
721  DB  197,36,194,219,2                    ; vcmpleps      %ymm3,%ymm11,%ymm11
722  DB  196,67,53,74,202,176                ; vblendvps     %ymm11,%ymm10,%ymm9,%ymm9
723  DB  197,60,92,215                       ; vsubps        %ymm7,%ymm8,%ymm10
724  DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
725  DB  197,60,89,220                       ; vmulps        %ymm4,%ymm8,%ymm11
726  DB  196,98,45,184,216                   ; vfmadd231ps   %ymm0,%ymm10,%ymm11
727  DB  196,193,108,88,195                  ; vaddps        %ymm11,%ymm2,%ymm0
728  DB  197,172,89,201                      ; vmulps        %ymm1,%ymm10,%ymm1
729  DB  196,226,61,184,205                  ; vfmadd231ps   %ymm5,%ymm8,%ymm1
730  DB  196,193,116,88,205                  ; vaddps        %ymm13,%ymm1,%ymm1
731  DB  196,193,44,89,214                   ; vmulps        %ymm14,%ymm10,%ymm2
732  DB  196,226,61,184,214                  ; vfmadd231ps   %ymm6,%ymm8,%ymm2
733  DB  196,193,108,88,209                  ; vaddps        %ymm9,%ymm2,%ymm2
734  DB  196,194,69,184,216                  ; vfmadd231ps   %ymm8,%ymm7,%ymm3
735  DB  72,173                              ; lods          %ds:(%rsi),%rax
736  DB  72,131,196,56                       ; add           $0x38,%rsp
737  DB  255,224                             ; jmpq          *%rax
738
739PUBLIC _sk_clamp_0_hsw
740_sk_clamp_0_hsw LABEL PROC
741  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
742  DB  196,193,124,95,192                  ; vmaxps        %ymm8,%ymm0,%ymm0
743  DB  196,193,116,95,200                  ; vmaxps        %ymm8,%ymm1,%ymm1
744  DB  196,193,108,95,208                  ; vmaxps        %ymm8,%ymm2,%ymm2
745  DB  196,193,100,95,216                  ; vmaxps        %ymm8,%ymm3,%ymm3
746  DB  72,173                              ; lods          %ds:(%rsi),%rax
747  DB  255,224                             ; jmpq          *%rax
748
749PUBLIC _sk_clamp_1_hsw
750_sk_clamp_1_hsw LABEL PROC
751  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
752  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
753  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
754  DB  196,193,124,93,192                  ; vminps        %ymm8,%ymm0,%ymm0
755  DB  196,193,116,93,200                  ; vminps        %ymm8,%ymm1,%ymm1
756  DB  196,193,108,93,208                  ; vminps        %ymm8,%ymm2,%ymm2
757  DB  196,193,100,93,216                  ; vminps        %ymm8,%ymm3,%ymm3
758  DB  72,173                              ; lods          %ds:(%rsi),%rax
759  DB  255,224                             ; jmpq          *%rax
760
761PUBLIC _sk_clamp_a_hsw
762_sk_clamp_a_hsw LABEL PROC
763  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
764  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
765  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
766  DB  196,193,100,93,216                  ; vminps        %ymm8,%ymm3,%ymm3
767  DB  197,252,93,195                      ; vminps        %ymm3,%ymm0,%ymm0
768  DB  197,244,93,203                      ; vminps        %ymm3,%ymm1,%ymm1
769  DB  197,236,93,211                      ; vminps        %ymm3,%ymm2,%ymm2
770  DB  72,173                              ; lods          %ds:(%rsi),%rax
771  DB  255,224                             ; jmpq          *%rax
772
773PUBLIC _sk_set_rgb_hsw
774_sk_set_rgb_hsw LABEL PROC
775  DB  72,173                              ; lods          %ds:(%rsi),%rax
776  DB  196,226,125,24,0                    ; vbroadcastss  (%rax),%ymm0
777  DB  196,226,125,24,72,4                 ; vbroadcastss  0x4(%rax),%ymm1
778  DB  196,226,125,24,80,8                 ; vbroadcastss  0x8(%rax),%ymm2
779  DB  72,173                              ; lods          %ds:(%rsi),%rax
780  DB  255,224                             ; jmpq          *%rax
781
782PUBLIC _sk_swap_rb_hsw
783_sk_swap_rb_hsw LABEL PROC
784  DB  197,124,40,192                      ; vmovaps       %ymm0,%ymm8
785  DB  72,173                              ; lods          %ds:(%rsi),%rax
786  DB  197,252,40,194                      ; vmovaps       %ymm2,%ymm0
787  DB  197,124,41,194                      ; vmovaps       %ymm8,%ymm2
788  DB  255,224                             ; jmpq          *%rax
789
790PUBLIC _sk_swap_hsw
791_sk_swap_hsw LABEL PROC
792  DB  197,124,40,195                      ; vmovaps       %ymm3,%ymm8
793  DB  197,124,40,202                      ; vmovaps       %ymm2,%ymm9
794  DB  197,124,40,209                      ; vmovaps       %ymm1,%ymm10
795  DB  197,124,40,216                      ; vmovaps       %ymm0,%ymm11
796  DB  72,173                              ; lods          %ds:(%rsi),%rax
797  DB  197,252,40,196                      ; vmovaps       %ymm4,%ymm0
798  DB  197,252,40,205                      ; vmovaps       %ymm5,%ymm1
799  DB  197,252,40,214                      ; vmovaps       %ymm6,%ymm2
800  DB  197,252,40,223                      ; vmovaps       %ymm7,%ymm3
801  DB  197,124,41,220                      ; vmovaps       %ymm11,%ymm4
802  DB  197,124,41,213                      ; vmovaps       %ymm10,%ymm5
803  DB  197,124,41,206                      ; vmovaps       %ymm9,%ymm6
804  DB  197,124,41,199                      ; vmovaps       %ymm8,%ymm7
805  DB  255,224                             ; jmpq          *%rax
806
807PUBLIC _sk_move_src_dst_hsw
808_sk_move_src_dst_hsw LABEL PROC
809  DB  72,173                              ; lods          %ds:(%rsi),%rax
810  DB  197,252,40,224                      ; vmovaps       %ymm0,%ymm4
811  DB  197,252,40,233                      ; vmovaps       %ymm1,%ymm5
812  DB  197,252,40,242                      ; vmovaps       %ymm2,%ymm6
813  DB  197,252,40,251                      ; vmovaps       %ymm3,%ymm7
814  DB  255,224                             ; jmpq          *%rax
815
816PUBLIC _sk_move_dst_src_hsw
817_sk_move_dst_src_hsw LABEL PROC
818  DB  72,173                              ; lods          %ds:(%rsi),%rax
819  DB  197,252,40,196                      ; vmovaps       %ymm4,%ymm0
820  DB  197,252,40,205                      ; vmovaps       %ymm5,%ymm1
821  DB  197,252,40,214                      ; vmovaps       %ymm6,%ymm2
822  DB  197,252,40,223                      ; vmovaps       %ymm7,%ymm3
823  DB  255,224                             ; jmpq          *%rax
824
825PUBLIC _sk_premul_hsw
826_sk_premul_hsw LABEL PROC
827  DB  197,252,89,195                      ; vmulps        %ymm3,%ymm0,%ymm0
828  DB  197,244,89,203                      ; vmulps        %ymm3,%ymm1,%ymm1
829  DB  197,236,89,211                      ; vmulps        %ymm3,%ymm2,%ymm2
830  DB  72,173                              ; lods          %ds:(%rsi),%rax
831  DB  255,224                             ; jmpq          *%rax
832
833PUBLIC _sk_unpremul_hsw
834_sk_unpremul_hsw LABEL PROC
835  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
836  DB  196,65,100,194,200,0                ; vcmpeqps      %ymm8,%ymm3,%ymm9
837  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
838  DB  197,121,110,208                     ; vmovd         %eax,%xmm10
839  DB  196,66,125,88,210                   ; vpbroadcastd  %xmm10,%ymm10
840  DB  197,44,94,211                       ; vdivps        %ymm3,%ymm10,%ymm10
841  DB  196,67,45,74,192,144                ; vblendvps     %ymm9,%ymm8,%ymm10,%ymm8
842  DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
843  DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
844  DB  197,188,89,210                      ; vmulps        %ymm2,%ymm8,%ymm2
845  DB  72,173                              ; lods          %ds:(%rsi),%rax
846  DB  255,224                             ; jmpq          *%rax
847
848PUBLIC _sk_from_srgb_hsw
849_sk_from_srgb_hsw LABEL PROC
850  DB  184,145,131,158,61                  ; mov           $0x3d9e8391,%eax
851  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
852  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
853  DB  197,60,89,200                       ; vmulps        %ymm0,%ymm8,%ymm9
854  DB  197,124,89,208                      ; vmulps        %ymm0,%ymm0,%ymm10
855  DB  184,154,153,153,62                  ; mov           $0x3e99999a,%eax
856  DB  197,121,110,216                     ; vmovd         %eax,%xmm11
857  DB  196,66,125,88,219                   ; vpbroadcastd  %xmm11,%ymm11
858  DB  184,92,143,50,63                    ; mov           $0x3f328f5c,%eax
859  DB  197,121,110,224                     ; vmovd         %eax,%xmm12
860  DB  196,66,125,88,228                   ; vpbroadcastd  %xmm12,%ymm12
861  DB  196,65,125,111,235                  ; vmovdqa       %ymm11,%ymm13
862  DB  196,66,125,168,236                  ; vfmadd213ps   %ymm12,%ymm0,%ymm13
863  DB  184,10,215,35,59                    ; mov           $0x3b23d70a,%eax
864  DB  197,121,110,240                     ; vmovd         %eax,%xmm14
865  DB  196,66,125,88,246                   ; vpbroadcastd  %xmm14,%ymm14
866  DB  196,66,45,168,238                   ; vfmadd213ps   %ymm14,%ymm10,%ymm13
867  DB  184,174,71,97,61                    ; mov           $0x3d6147ae,%eax
868  DB  197,121,110,208                     ; vmovd         %eax,%xmm10
869  DB  196,66,125,88,210                   ; vpbroadcastd  %xmm10,%ymm10
870  DB  196,193,124,194,194,1               ; vcmpltps      %ymm10,%ymm0,%ymm0
871  DB  196,195,21,74,193,0                 ; vblendvps     %ymm0,%ymm9,%ymm13,%ymm0
872  DB  197,60,89,201                       ; vmulps        %ymm1,%ymm8,%ymm9
873  DB  197,116,89,233                      ; vmulps        %ymm1,%ymm1,%ymm13
874  DB  196,65,125,111,251                  ; vmovdqa       %ymm11,%ymm15
875  DB  196,66,117,168,252                  ; vfmadd213ps   %ymm12,%ymm1,%ymm15
876  DB  196,66,21,168,254                   ; vfmadd213ps   %ymm14,%ymm13,%ymm15
877  DB  196,193,116,194,202,1               ; vcmpltps      %ymm10,%ymm1,%ymm1
878  DB  196,195,5,74,201,16                 ; vblendvps     %ymm1,%ymm9,%ymm15,%ymm1
879  DB  197,60,89,194                       ; vmulps        %ymm2,%ymm8,%ymm8
880  DB  197,108,89,202                      ; vmulps        %ymm2,%ymm2,%ymm9
881  DB  196,66,109,168,220                  ; vfmadd213ps   %ymm12,%ymm2,%ymm11
882  DB  196,66,53,168,222                   ; vfmadd213ps   %ymm14,%ymm9,%ymm11
883  DB  196,193,108,194,210,1               ; vcmpltps      %ymm10,%ymm2,%ymm2
884  DB  196,195,37,74,208,32                ; vblendvps     %ymm2,%ymm8,%ymm11,%ymm2
885  DB  72,173                              ; lods          %ds:(%rsi),%rax
886  DB  255,224                             ; jmpq          *%rax
887
888PUBLIC _sk_to_srgb_hsw
889_sk_to_srgb_hsw LABEL PROC
890  DB  197,124,82,192                      ; vrsqrtps      %ymm0,%ymm8
891  DB  196,65,124,83,216                   ; vrcpps        %ymm8,%ymm11
892  DB  196,65,124,82,224                   ; vrsqrtps      %ymm8,%ymm12
893  DB  184,41,92,71,65                     ; mov           $0x41475c29,%eax
894  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
895  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
896  DB  197,60,89,232                       ; vmulps        %ymm0,%ymm8,%ymm13
897  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
898  DB  197,121,110,200                     ; vmovd         %eax,%xmm9
899  DB  196,66,125,88,201                   ; vpbroadcastd  %xmm9,%ymm9
900  DB  184,194,135,210,62                  ; mov           $0x3ed287c2,%eax
901  DB  197,121,110,208                     ; vmovd         %eax,%xmm10
902  DB  196,66,125,88,210                   ; vpbroadcastd  %xmm10,%ymm10
903  DB  184,206,111,48,63                   ; mov           $0x3f306fce,%eax
904  DB  197,121,110,240                     ; vmovd         %eax,%xmm14
905  DB  196,66,125,88,246                   ; vpbroadcastd  %xmm14,%ymm14
906  DB  184,168,87,202,61                   ; mov           $0x3dca57a8,%eax
907  DB  53,0,0,0,128                        ; xor           $0x80000000,%eax
908  DB  197,121,110,248                     ; vmovd         %eax,%xmm15
909  DB  196,66,125,88,255                   ; vpbroadcastd  %xmm15,%ymm15
910  DB  196,66,13,168,223                   ; vfmadd213ps   %ymm15,%ymm14,%ymm11
911  DB  196,66,45,184,220                   ; vfmadd231ps   %ymm12,%ymm10,%ymm11
912  DB  196,65,52,93,219                    ; vminps        %ymm11,%ymm9,%ymm11
913  DB  184,4,231,140,59                    ; mov           $0x3b8ce704,%eax
914  DB  197,121,110,224                     ; vmovd         %eax,%xmm12
915  DB  196,66,125,88,228                   ; vpbroadcastd  %xmm12,%ymm12
916  DB  196,193,124,194,196,1               ; vcmpltps      %ymm12,%ymm0,%ymm0
917  DB  196,195,37,74,197,0                 ; vblendvps     %ymm0,%ymm13,%ymm11,%ymm0
918  DB  197,124,82,217                      ; vrsqrtps      %ymm1,%ymm11
919  DB  196,65,124,83,235                   ; vrcpps        %ymm11,%ymm13
920  DB  196,65,124,82,219                   ; vrsqrtps      %ymm11,%ymm11
921  DB  196,66,13,168,239                   ; vfmadd213ps   %ymm15,%ymm14,%ymm13
922  DB  196,66,45,184,235                   ; vfmadd231ps   %ymm11,%ymm10,%ymm13
923  DB  197,60,89,217                       ; vmulps        %ymm1,%ymm8,%ymm11
924  DB  196,65,52,93,237                    ; vminps        %ymm13,%ymm9,%ymm13
925  DB  196,193,116,194,204,1               ; vcmpltps      %ymm12,%ymm1,%ymm1
926  DB  196,195,21,74,203,16                ; vblendvps     %ymm1,%ymm11,%ymm13,%ymm1
927  DB  197,124,82,218                      ; vrsqrtps      %ymm2,%ymm11
928  DB  196,65,124,83,235                   ; vrcpps        %ymm11,%ymm13
929  DB  196,66,13,168,239                   ; vfmadd213ps   %ymm15,%ymm14,%ymm13
930  DB  196,65,124,82,219                   ; vrsqrtps      %ymm11,%ymm11
931  DB  196,66,45,184,235                   ; vfmadd231ps   %ymm11,%ymm10,%ymm13
932  DB  196,65,52,93,205                    ; vminps        %ymm13,%ymm9,%ymm9
933  DB  197,60,89,194                       ; vmulps        %ymm2,%ymm8,%ymm8
934  DB  196,193,108,194,212,1               ; vcmpltps      %ymm12,%ymm2,%ymm2
935  DB  196,195,53,74,208,32                ; vblendvps     %ymm2,%ymm8,%ymm9,%ymm2
936  DB  72,173                              ; lods          %ds:(%rsi),%rax
937  DB  255,224                             ; jmpq          *%rax
938
939PUBLIC _sk_from_2dot2_hsw
940_sk_from_2dot2_hsw LABEL PROC
941  DB  197,124,82,192                      ; vrsqrtps      %ymm0,%ymm8
942  DB  196,65,124,82,192                   ; vrsqrtps      %ymm8,%ymm8
943  DB  196,65,124,82,192                   ; vrsqrtps      %ymm8,%ymm8
944  DB  196,65,124,82,192                   ; vrsqrtps      %ymm8,%ymm8
945  DB  196,65,124,82,200                   ; vrsqrtps      %ymm8,%ymm9
946  DB  196,65,124,82,201                   ; vrsqrtps      %ymm9,%ymm9
947  DB  197,252,89,192                      ; vmulps        %ymm0,%ymm0,%ymm0
948  DB  196,65,60,89,208                    ; vmulps        %ymm8,%ymm8,%ymm10
949  DB  196,65,60,89,194                    ; vmulps        %ymm10,%ymm8,%ymm8
950  DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
951  DB  197,180,89,192                      ; vmulps        %ymm0,%ymm9,%ymm0
952  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
953  DB  196,193,124,95,192                  ; vmaxps        %ymm8,%ymm0,%ymm0
954  DB  197,124,82,201                      ; vrsqrtps      %ymm1,%ymm9
955  DB  196,65,124,82,201                   ; vrsqrtps      %ymm9,%ymm9
956  DB  196,65,124,82,201                   ; vrsqrtps      %ymm9,%ymm9
957  DB  196,65,124,82,201                   ; vrsqrtps      %ymm9,%ymm9
958  DB  196,65,124,82,209                   ; vrsqrtps      %ymm9,%ymm10
959  DB  196,65,124,82,210                   ; vrsqrtps      %ymm10,%ymm10
960  DB  197,244,89,201                      ; vmulps        %ymm1,%ymm1,%ymm1
961  DB  196,65,52,89,217                    ; vmulps        %ymm9,%ymm9,%ymm11
962  DB  196,65,52,89,203                    ; vmulps        %ymm11,%ymm9,%ymm9
963  DB  196,193,116,89,201                  ; vmulps        %ymm9,%ymm1,%ymm1
964  DB  197,172,89,201                      ; vmulps        %ymm1,%ymm10,%ymm1
965  DB  196,193,116,95,200                  ; vmaxps        %ymm8,%ymm1,%ymm1
966  DB  197,124,82,202                      ; vrsqrtps      %ymm2,%ymm9
967  DB  196,65,124,82,201                   ; vrsqrtps      %ymm9,%ymm9
968  DB  196,65,124,82,201                   ; vrsqrtps      %ymm9,%ymm9
969  DB  196,65,124,82,201                   ; vrsqrtps      %ymm9,%ymm9
970  DB  196,65,124,82,209                   ; vrsqrtps      %ymm9,%ymm10
971  DB  196,65,124,82,210                   ; vrsqrtps      %ymm10,%ymm10
972  DB  197,236,89,210                      ; vmulps        %ymm2,%ymm2,%ymm2
973  DB  196,65,52,89,217                    ; vmulps        %ymm9,%ymm9,%ymm11
974  DB  196,65,52,89,203                    ; vmulps        %ymm11,%ymm9,%ymm9
975  DB  196,193,108,89,209                  ; vmulps        %ymm9,%ymm2,%ymm2
976  DB  197,172,89,210                      ; vmulps        %ymm2,%ymm10,%ymm2
977  DB  196,193,108,95,208                  ; vmaxps        %ymm8,%ymm2,%ymm2
978  DB  72,173                              ; lods          %ds:(%rsi),%rax
979  DB  255,224                             ; jmpq          *%rax
980
981PUBLIC _sk_to_2dot2_hsw
982_sk_to_2dot2_hsw LABEL PROC
983  DB  197,252,82,192                      ; vrsqrtps      %ymm0,%ymm0
984  DB  197,124,82,192                      ; vrsqrtps      %ymm0,%ymm8
985  DB  196,65,124,82,192                   ; vrsqrtps      %ymm8,%ymm8
986  DB  196,65,124,82,192                   ; vrsqrtps      %ymm8,%ymm8
987  DB  196,65,124,82,192                   ; vrsqrtps      %ymm8,%ymm8
988  DB  196,65,124,82,200                   ; vrsqrtps      %ymm8,%ymm9
989  DB  197,252,83,192                      ; vrcpps        %ymm0,%ymm0
990  DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
991  DB  196,65,124,83,193                   ; vrcpps        %ymm9,%ymm8
992  DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
993  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
994  DB  196,193,124,95,192                  ; vmaxps        %ymm8,%ymm0,%ymm0
995  DB  197,252,82,201                      ; vrsqrtps      %ymm1,%ymm1
996  DB  197,124,82,201                      ; vrsqrtps      %ymm1,%ymm9
997  DB  196,65,124,82,201                   ; vrsqrtps      %ymm9,%ymm9
998  DB  196,65,124,82,201                   ; vrsqrtps      %ymm9,%ymm9
999  DB  196,65,124,82,201                   ; vrsqrtps      %ymm9,%ymm9
1000  DB  196,65,124,82,209                   ; vrsqrtps      %ymm9,%ymm10
1001  DB  197,252,83,201                      ; vrcpps        %ymm1,%ymm1
1002  DB  197,180,89,201                      ; vmulps        %ymm1,%ymm9,%ymm1
1003  DB  196,65,124,83,202                   ; vrcpps        %ymm10,%ymm9
1004  DB  196,193,116,89,201                  ; vmulps        %ymm9,%ymm1,%ymm1
1005  DB  196,193,116,95,200                  ; vmaxps        %ymm8,%ymm1,%ymm1
1006  DB  197,252,82,210                      ; vrsqrtps      %ymm2,%ymm2
1007  DB  197,124,82,202                      ; vrsqrtps      %ymm2,%ymm9
1008  DB  196,65,124,82,201                   ; vrsqrtps      %ymm9,%ymm9
1009  DB  196,65,124,82,201                   ; vrsqrtps      %ymm9,%ymm9
1010  DB  196,65,124,82,201                   ; vrsqrtps      %ymm9,%ymm9
1011  DB  196,65,124,82,209                   ; vrsqrtps      %ymm9,%ymm10
1012  DB  197,252,83,210                      ; vrcpps        %ymm2,%ymm2
1013  DB  197,180,89,210                      ; vmulps        %ymm2,%ymm9,%ymm2
1014  DB  196,65,124,83,202                   ; vrcpps        %ymm10,%ymm9
1015  DB  196,193,108,89,209                  ; vmulps        %ymm9,%ymm2,%ymm2
1016  DB  196,193,108,95,208                  ; vmaxps        %ymm8,%ymm2,%ymm2
1017  DB  72,173                              ; lods          %ds:(%rsi),%rax
1018  DB  255,224                             ; jmpq          *%rax
1019
1020PUBLIC _sk_scale_1_float_hsw
1021_sk_scale_1_float_hsw LABEL PROC
1022  DB  72,173                              ; lods          %ds:(%rsi),%rax
1023  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
1024  DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
1025  DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
1026  DB  197,188,89,210                      ; vmulps        %ymm2,%ymm8,%ymm2
1027  DB  197,188,89,219                      ; vmulps        %ymm3,%ymm8,%ymm3
1028  DB  72,173                              ; lods          %ds:(%rsi),%rax
1029  DB  255,224                             ; jmpq          *%rax
1030
1031PUBLIC _sk_scale_u8_hsw
1032_sk_scale_u8_hsw LABEL PROC
1033  DB  73,137,200                          ; mov           %rcx,%r8
1034  DB  72,173                              ; lods          %ds:(%rsi),%rax
1035  DB  72,139,0                            ; mov           (%rax),%rax
1036  DB  72,1,248                            ; add           %rdi,%rax
1037  DB  77,133,192                          ; test          %r8,%r8
1038  DB  117,56                              ; jne           f3a <_sk_scale_u8_hsw+0x48>
1039  DB  197,122,126,0                       ; vmovq         (%rax),%xmm8
1040  DB  196,66,125,49,192                   ; vpmovzxbd     %xmm8,%ymm8
1041  DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
1042  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
1043  DB  197,121,110,200                     ; vmovd         %eax,%xmm9
1044  DB  196,66,125,88,201                   ; vpbroadcastd  %xmm9,%ymm9
1045  DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
1046  DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
1047  DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
1048  DB  197,188,89,210                      ; vmulps        %ymm2,%ymm8,%ymm2
1049  DB  197,188,89,219                      ; vmulps        %ymm3,%ymm8,%ymm3
1050  DB  72,173                              ; lods          %ds:(%rsi),%rax
1051  DB  76,137,193                          ; mov           %r8,%rcx
1052  DB  255,224                             ; jmpq          *%rax
1053  DB  49,201                              ; xor           %ecx,%ecx
1054  DB  77,137,194                          ; mov           %r8,%r10
1055  DB  69,49,201                           ; xor           %r9d,%r9d
1056  DB  68,15,182,24                        ; movzbl        (%rax),%r11d
1057  DB  72,255,192                          ; inc           %rax
1058  DB  73,211,227                          ; shl           %cl,%r11
1059  DB  77,9,217                            ; or            %r11,%r9
1060  DB  72,131,193,8                        ; add           $0x8,%rcx
1061  DB  73,255,202                          ; dec           %r10
1062  DB  117,234                             ; jne           f42 <_sk_scale_u8_hsw+0x50>
1063  DB  196,65,249,110,193                  ; vmovq         %r9,%xmm8
1064  DB  235,167                             ; jmp           f06 <_sk_scale_u8_hsw+0x14>
1065
1066PUBLIC _sk_lerp_1_float_hsw
1067_sk_lerp_1_float_hsw LABEL PROC
1068  DB  72,173                              ; lods          %ds:(%rsi),%rax
1069  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
1070  DB  197,252,92,196                      ; vsubps        %ymm4,%ymm0,%ymm0
1071  DB  196,226,61,168,196                  ; vfmadd213ps   %ymm4,%ymm8,%ymm0
1072  DB  197,244,92,205                      ; vsubps        %ymm5,%ymm1,%ymm1
1073  DB  196,226,61,168,205                  ; vfmadd213ps   %ymm5,%ymm8,%ymm1
1074  DB  197,236,92,214                      ; vsubps        %ymm6,%ymm2,%ymm2
1075  DB  196,226,61,168,214                  ; vfmadd213ps   %ymm6,%ymm8,%ymm2
1076  DB  197,228,92,223                      ; vsubps        %ymm7,%ymm3,%ymm3
1077  DB  196,226,61,168,223                  ; vfmadd213ps   %ymm7,%ymm8,%ymm3
1078  DB  72,173                              ; lods          %ds:(%rsi),%rax
1079  DB  255,224                             ; jmpq          *%rax
1080
1081PUBLIC _sk_lerp_u8_hsw
1082_sk_lerp_u8_hsw LABEL PROC
1083  DB  73,137,200                          ; mov           %rcx,%r8
1084  DB  72,173                              ; lods          %ds:(%rsi),%rax
1085  DB  72,139,0                            ; mov           (%rax),%rax
1086  DB  72,1,248                            ; add           %rdi,%rax
1087  DB  77,133,192                          ; test          %r8,%r8
1088  DB  117,76                              ; jne           fea <_sk_lerp_u8_hsw+0x5c>
1089  DB  197,122,126,0                       ; vmovq         (%rax),%xmm8
1090  DB  196,66,125,49,192                   ; vpmovzxbd     %xmm8,%ymm8
1091  DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
1092  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
1093  DB  197,121,110,200                     ; vmovd         %eax,%xmm9
1094  DB  196,66,125,88,201                   ; vpbroadcastd  %xmm9,%ymm9
1095  DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
1096  DB  197,252,92,196                      ; vsubps        %ymm4,%ymm0,%ymm0
1097  DB  196,226,61,168,196                  ; vfmadd213ps   %ymm4,%ymm8,%ymm0
1098  DB  197,244,92,205                      ; vsubps        %ymm5,%ymm1,%ymm1
1099  DB  196,226,61,168,205                  ; vfmadd213ps   %ymm5,%ymm8,%ymm1
1100  DB  197,236,92,214                      ; vsubps        %ymm6,%ymm2,%ymm2
1101  DB  196,226,61,168,214                  ; vfmadd213ps   %ymm6,%ymm8,%ymm2
1102  DB  197,228,92,223                      ; vsubps        %ymm7,%ymm3,%ymm3
1103  DB  196,226,61,168,223                  ; vfmadd213ps   %ymm7,%ymm8,%ymm3
1104  DB  72,173                              ; lods          %ds:(%rsi),%rax
1105  DB  76,137,193                          ; mov           %r8,%rcx
1106  DB  255,224                             ; jmpq          *%rax
1107  DB  49,201                              ; xor           %ecx,%ecx
1108  DB  77,137,194                          ; mov           %r8,%r10
1109  DB  69,49,201                           ; xor           %r9d,%r9d
1110  DB  68,15,182,24                        ; movzbl        (%rax),%r11d
1111  DB  72,255,192                          ; inc           %rax
1112  DB  73,211,227                          ; shl           %cl,%r11
1113  DB  77,9,217                            ; or            %r11,%r9
1114  DB  72,131,193,8                        ; add           $0x8,%rcx
1115  DB  73,255,202                          ; dec           %r10
1116  DB  117,234                             ; jne           ff2 <_sk_lerp_u8_hsw+0x64>
1117  DB  196,65,249,110,193                  ; vmovq         %r9,%xmm8
1118  DB  235,147                             ; jmp           fa2 <_sk_lerp_u8_hsw+0x14>
1119
1120PUBLIC _sk_lerp_565_hsw
1121_sk_lerp_565_hsw LABEL PROC
1122  DB  72,173                              ; lods          %ds:(%rsi),%rax
1123  DB  76,139,16                           ; mov           (%rax),%r10
1124  DB  72,133,201                          ; test          %rcx,%rcx
1125  DB  15,133,179,0,0,0                    ; jne           10d0 <_sk_lerp_565_hsw+0xc1>
1126  DB  196,193,122,111,28,122              ; vmovdqu       (%r10,%rdi,2),%xmm3
1127  DB  196,98,125,51,195                   ; vpmovzxwd     %xmm3,%ymm8
1128  DB  184,0,248,0,0                       ; mov           $0xf800,%eax
1129  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
1130  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
1131  DB  196,193,101,219,216                 ; vpand         %ymm8,%ymm3,%ymm3
1132  DB  197,124,91,203                      ; vcvtdq2ps     %ymm3,%ymm9
1133  DB  184,8,33,132,55                     ; mov           $0x37842108,%eax
1134  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
1135  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
1136  DB  197,52,89,203                       ; vmulps        %ymm3,%ymm9,%ymm9
1137  DB  184,224,7,0,0                       ; mov           $0x7e0,%eax
1138  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
1139  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
1140  DB  196,193,101,219,216                 ; vpand         %ymm8,%ymm3,%ymm3
1141  DB  197,124,91,211                      ; vcvtdq2ps     %ymm3,%ymm10
1142  DB  184,33,8,2,58                       ; mov           $0x3a020821,%eax
1143  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
1144  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
1145  DB  197,44,89,211                       ; vmulps        %ymm3,%ymm10,%ymm10
1146  DB  184,31,0,0,0                        ; mov           $0x1f,%eax
1147  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
1148  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
1149  DB  196,193,101,219,216                 ; vpand         %ymm8,%ymm3,%ymm3
1150  DB  197,124,91,195                      ; vcvtdq2ps     %ymm3,%ymm8
1151  DB  184,8,33,4,61                       ; mov           $0x3d042108,%eax
1152  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
1153  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
1154  DB  197,188,89,219                      ; vmulps        %ymm3,%ymm8,%ymm3
1155  DB  197,252,92,196                      ; vsubps        %ymm4,%ymm0,%ymm0
1156  DB  196,226,53,168,196                  ; vfmadd213ps   %ymm4,%ymm9,%ymm0
1157  DB  197,244,92,205                      ; vsubps        %ymm5,%ymm1,%ymm1
1158  DB  196,226,45,168,205                  ; vfmadd213ps   %ymm5,%ymm10,%ymm1
1159  DB  197,236,92,214                      ; vsubps        %ymm6,%ymm2,%ymm2
1160  DB  196,226,101,168,214                 ; vfmadd213ps   %ymm6,%ymm3,%ymm2
1161  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
1162  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
1163  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
1164  DB  72,173                              ; lods          %ds:(%rsi),%rax
1165  DB  255,224                             ; jmpq          *%rax
1166  DB  65,137,200                          ; mov           %ecx,%r8d
1167  DB  65,128,224,7                        ; and           $0x7,%r8b
1168  DB  197,225,239,219                     ; vpxor         %xmm3,%xmm3,%xmm3
1169  DB  65,254,200                          ; dec           %r8b
1170  DB  65,128,248,6                        ; cmp           $0x6,%r8b
1171  DB  15,135,59,255,255,255               ; ja            1023 <_sk_lerp_565_hsw+0x14>
1172  DB  69,15,182,192                       ; movzbl        %r8b,%r8d
1173  DB  76,141,13,73,0,0,0                  ; lea           0x49(%rip),%r9        # 113c <_sk_lerp_565_hsw+0x12d>
1174  DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
1175  DB  76,1,200                            ; add           %r9,%rax
1176  DB  255,224                             ; jmpq          *%rax
1177  DB  197,225,239,219                     ; vpxor         %xmm3,%xmm3,%xmm3
1178  DB  196,193,97,196,92,122,12,6          ; vpinsrw       $0x6,0xc(%r10,%rdi,2),%xmm3,%xmm3
1179  DB  196,193,97,196,92,122,10,5          ; vpinsrw       $0x5,0xa(%r10,%rdi,2),%xmm3,%xmm3
1180  DB  196,193,97,196,92,122,8,4           ; vpinsrw       $0x4,0x8(%r10,%rdi,2),%xmm3,%xmm3
1181  DB  196,193,97,196,92,122,6,3           ; vpinsrw       $0x3,0x6(%r10,%rdi,2),%xmm3,%xmm3
1182  DB  196,193,97,196,92,122,4,2           ; vpinsrw       $0x2,0x4(%r10,%rdi,2),%xmm3,%xmm3
1183  DB  196,193,97,196,92,122,2,1           ; vpinsrw       $0x1,0x2(%r10,%rdi,2),%xmm3,%xmm3
1184  DB  196,193,97,196,28,122,0             ; vpinsrw       $0x0,(%r10,%rdi,2),%xmm3,%xmm3
1185  DB  233,231,254,255,255                 ; jmpq          1023 <_sk_lerp_565_hsw+0x14>
1186  DB  244                                 ; hlt
1187  DB  255                                 ; (bad)
1188  DB  255                                 ; (bad)
1189  DB  255                                 ; (bad)
1190  DB  236                                 ; in            (%dx),%al
1191  DB  255                                 ; (bad)
1192  DB  255                                 ; (bad)
1193  DB  255,228                             ; jmpq          *%rsp
1194  DB  255                                 ; (bad)
1195  DB  255                                 ; (bad)
1196  DB  255                                 ; (bad)
1197  DB  220,255                             ; fdivr         %st,%st(7)
1198  DB  255                                 ; (bad)
1199  DB  255,212                             ; callq         *%rsp
1200  DB  255                                 ; (bad)
1201  DB  255                                 ; (bad)
1202  DB  255,204                             ; dec           %esp
1203  DB  255                                 ; (bad)
1204  DB  255                                 ; (bad)
1205  DB  255,192                             ; inc           %eax
1206  DB  255                                 ; (bad)
1207  DB  255                                 ; (bad)
1208  DB  255                                 ; .byte         0xff
1209
1210PUBLIC _sk_load_tables_hsw
1211_sk_load_tables_hsw LABEL PROC
1212  DB  73,137,200                          ; mov           %rcx,%r8
1213  DB  72,173                              ; lods          %ds:(%rsi),%rax
1214  DB  76,141,12,189,0,0,0,0               ; lea           0x0(,%rdi,4),%r9
1215  DB  76,3,8                              ; add           (%rax),%r9
1216  DB  77,133,192                          ; test          %r8,%r8
1217  DB  117,121                             ; jne           11e6 <_sk_load_tables_hsw+0x8e>
1218  DB  196,193,126,111,25                  ; vmovdqu       (%r9),%ymm3
1219  DB  185,255,0,0,0                       ; mov           $0xff,%ecx
1220  DB  197,249,110,193                     ; vmovd         %ecx,%xmm0
1221  DB  196,226,125,88,208                  ; vpbroadcastd  %xmm0,%ymm2
1222  DB  197,237,219,203                     ; vpand         %ymm3,%ymm2,%ymm1
1223  DB  196,65,61,118,192                   ; vpcmpeqd      %ymm8,%ymm8,%ymm8
1224  DB  72,139,72,8                         ; mov           0x8(%rax),%rcx
1225  DB  76,139,72,16                        ; mov           0x10(%rax),%r9
1226  DB  196,65,53,118,201                   ; vpcmpeqd      %ymm9,%ymm9,%ymm9
1227  DB  196,226,53,146,4,137                ; vgatherdps    %ymm9,(%rcx,%ymm1,4),%ymm0
1228  DB  197,245,114,211,8                   ; vpsrld        $0x8,%ymm3,%ymm1
1229  DB  197,109,219,201                     ; vpand         %ymm1,%ymm2,%ymm9
1230  DB  196,65,45,118,210                   ; vpcmpeqd      %ymm10,%ymm10,%ymm10
1231  DB  196,130,45,146,12,137               ; vgatherdps    %ymm10,(%r9,%ymm9,4),%ymm1
1232  DB  72,139,64,24                        ; mov           0x18(%rax),%rax
1233  DB  197,181,114,211,16                  ; vpsrld        $0x10,%ymm3,%ymm9
1234  DB  196,65,109,219,201                  ; vpand         %ymm9,%ymm2,%ymm9
1235  DB  196,162,61,146,20,136               ; vgatherdps    %ymm8,(%rax,%ymm9,4),%ymm2
1236  DB  197,229,114,211,24                  ; vpsrld        $0x18,%ymm3,%ymm3
1237  DB  197,124,91,195                      ; vcvtdq2ps     %ymm3,%ymm8
1238  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
1239  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
1240  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
1241  DB  197,188,89,219                      ; vmulps        %ymm3,%ymm8,%ymm3
1242  DB  72,173                              ; lods          %ds:(%rsi),%rax
1243  DB  76,137,193                          ; mov           %r8,%rcx
1244  DB  255,224                             ; jmpq          *%rax
1245  DB  185,8,0,0,0                         ; mov           $0x8,%ecx
1246  DB  68,41,193                           ; sub           %r8d,%ecx
1247  DB  192,225,3                           ; shl           $0x3,%cl
1248  DB  73,199,194,255,255,255,255          ; mov           $0xffffffffffffffff,%r10
1249  DB  73,211,234                          ; shr           %cl,%r10
1250  DB  196,193,249,110,194                 ; vmovq         %r10,%xmm0
1251  DB  196,226,125,33,192                  ; vpmovsxbd     %xmm0,%ymm0
1252  DB  196,194,125,140,25                  ; vpmaskmovd    (%r9),%ymm0,%ymm3
1253  DB  233,99,255,255,255                  ; jmpq          1172 <_sk_load_tables_hsw+0x1a>
1254
1255PUBLIC _sk_load_a8_hsw
1256_sk_load_a8_hsw LABEL PROC
1257  DB  73,137,200                          ; mov           %rcx,%r8
1258  DB  72,173                              ; lods          %ds:(%rsi),%rax
1259  DB  72,139,0                            ; mov           (%rax),%rax
1260  DB  72,1,248                            ; add           %rdi,%rax
1261  DB  77,133,192                          ; test          %r8,%r8
1262  DB  117,50                              ; jne           1251 <_sk_load_a8_hsw+0x42>
1263  DB  197,250,126,0                       ; vmovq         (%rax),%xmm0
1264  DB  196,226,125,49,192                  ; vpmovzxbd     %xmm0,%ymm0
1265  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
1266  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
1267  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
1268  DB  196,226,125,88,201                  ; vpbroadcastd  %xmm1,%ymm1
1269  DB  197,252,89,217                      ; vmulps        %ymm1,%ymm0,%ymm3
1270  DB  72,173                              ; lods          %ds:(%rsi),%rax
1271  DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
1272  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
1273  DB  197,236,87,210                      ; vxorps        %ymm2,%ymm2,%ymm2
1274  DB  76,137,193                          ; mov           %r8,%rcx
1275  DB  255,224                             ; jmpq          *%rax
1276  DB  49,201                              ; xor           %ecx,%ecx
1277  DB  77,137,194                          ; mov           %r8,%r10
1278  DB  69,49,201                           ; xor           %r9d,%r9d
1279  DB  68,15,182,24                        ; movzbl        (%rax),%r11d
1280  DB  72,255,192                          ; inc           %rax
1281  DB  73,211,227                          ; shl           %cl,%r11
1282  DB  77,9,217                            ; or            %r11,%r9
1283  DB  72,131,193,8                        ; add           $0x8,%rcx
1284  DB  73,255,202                          ; dec           %r10
1285  DB  117,234                             ; jne           1259 <_sk_load_a8_hsw+0x4a>
1286  DB  196,193,249,110,193                 ; vmovq         %r9,%xmm0
1287  DB  235,173                             ; jmp           1223 <_sk_load_a8_hsw+0x14>
1288
1289PUBLIC _sk_gather_a8_hsw
1290_sk_gather_a8_hsw LABEL PROC
1291  DB  65,87                               ; push          %r15
1292  DB  65,86                               ; push          %r14
1293  DB  65,84                               ; push          %r12
1294  DB  83                                  ; push          %rbx
1295  DB  72,173                              ; lods          %ds:(%rsi),%rax
1296  DB  76,139,0                            ; mov           (%rax),%r8
1297  DB  197,254,91,201                      ; vcvttps2dq    %ymm1,%ymm1
1298  DB  196,226,125,88,80,16                ; vpbroadcastd  0x10(%rax),%ymm2
1299  DB  196,226,109,64,201                  ; vpmulld       %ymm1,%ymm2,%ymm1
1300  DB  197,254,91,192                      ; vcvttps2dq    %ymm0,%ymm0
1301  DB  197,245,254,192                     ; vpaddd        %ymm0,%ymm1,%ymm0
1302  DB  196,227,249,22,192,1                ; vpextrq       $0x1,%xmm0,%rax
1303  DB  65,137,193                          ; mov           %eax,%r9d
1304  DB  72,193,232,32                       ; shr           $0x20,%rax
1305  DB  196,193,249,126,194                 ; vmovq         %xmm0,%r10
1306  DB  69,137,211                          ; mov           %r10d,%r11d
1307  DB  73,193,234,32                       ; shr           $0x20,%r10
1308  DB  196,227,125,57,192,1                ; vextracti128  $0x1,%ymm0,%xmm0
1309  DB  196,227,249,22,195,1                ; vpextrq       $0x1,%xmm0,%rbx
1310  DB  65,137,222                          ; mov           %ebx,%r14d
1311  DB  72,193,235,32                       ; shr           $0x20,%rbx
1312  DB  196,193,249,126,199                 ; vmovq         %xmm0,%r15
1313  DB  69,137,252                          ; mov           %r15d,%r12d
1314  DB  73,193,239,32                       ; shr           $0x20,%r15
1315  DB  196,131,121,32,4,24,0               ; vpinsrb       $0x0,(%r8,%r11,1),%xmm0,%xmm0
1316  DB  196,131,121,32,4,16,1               ; vpinsrb       $0x1,(%r8,%r10,1),%xmm0,%xmm0
1317  DB  71,15,182,12,8                      ; movzbl        (%r8,%r9,1),%r9d
1318  DB  196,195,121,32,193,2                ; vpinsrb       $0x2,%r9d,%xmm0,%xmm0
1319  DB  65,15,182,4,0                       ; movzbl        (%r8,%rax,1),%eax
1320  DB  196,227,121,32,192,3                ; vpinsrb       $0x3,%eax,%xmm0,%xmm0
1321  DB  67,15,182,4,32                      ; movzbl        (%r8,%r12,1),%eax
1322  DB  196,227,121,32,192,4                ; vpinsrb       $0x4,%eax,%xmm0,%xmm0
1323  DB  67,15,182,4,56                      ; movzbl        (%r8,%r15,1),%eax
1324  DB  196,227,121,32,192,5                ; vpinsrb       $0x5,%eax,%xmm0,%xmm0
1325  DB  67,15,182,4,48                      ; movzbl        (%r8,%r14,1),%eax
1326  DB  196,227,121,32,192,6                ; vpinsrb       $0x6,%eax,%xmm0,%xmm0
1327  DB  65,15,182,4,24                      ; movzbl        (%r8,%rbx,1),%eax
1328  DB  196,227,121,32,192,7                ; vpinsrb       $0x7,%eax,%xmm0,%xmm0
1329  DB  196,226,125,49,192                  ; vpmovzxbd     %xmm0,%ymm0
1330  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
1331  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
1332  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
1333  DB  196,226,125,88,201                  ; vpbroadcastd  %xmm1,%ymm1
1334  DB  197,252,89,217                      ; vmulps        %ymm1,%ymm0,%ymm3
1335  DB  72,173                              ; lods          %ds:(%rsi),%rax
1336  DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
1337  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
1338  DB  197,237,239,210                     ; vpxor         %ymm2,%ymm2,%ymm2
1339  DB  91                                  ; pop           %rbx
1340  DB  65,92                               ; pop           %r12
1341  DB  65,94                               ; pop           %r14
1342  DB  65,95                               ; pop           %r15
1343  DB  255,224                             ; jmpq          *%rax
1344
1345PUBLIC _sk_store_a8_hsw
1346_sk_store_a8_hsw LABEL PROC
1347  DB  72,173                              ; lods          %ds:(%rsi),%rax
1348  DB  76,139,8                            ; mov           (%rax),%r9
1349  DB  184,0,0,127,67                      ; mov           $0x437f0000,%eax
1350  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
1351  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
1352  DB  197,60,89,195                       ; vmulps        %ymm3,%ymm8,%ymm8
1353  DB  196,65,125,91,192                   ; vcvtps2dq     %ymm8,%ymm8
1354  DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
1355  DB  196,66,57,43,193                    ; vpackusdw     %xmm9,%xmm8,%xmm8
1356  DB  196,65,57,103,192                   ; vpackuswb     %xmm8,%xmm8,%xmm8
1357  DB  72,133,201                          ; test          %rcx,%rcx
1358  DB  117,10                              ; jne           138e <_sk_store_a8_hsw+0x3b>
1359  DB  196,65,123,17,4,57                  ; vmovsd        %xmm8,(%r9,%rdi,1)
1360  DB  72,173                              ; lods          %ds:(%rsi),%rax
1361  DB  255,224                             ; jmpq          *%rax
1362  DB  65,137,200                          ; mov           %ecx,%r8d
1363  DB  65,128,224,7                        ; and           $0x7,%r8b
1364  DB  65,254,200                          ; dec           %r8b
1365  DB  65,128,248,6                        ; cmp           $0x6,%r8b
1366  DB  119,236                             ; ja            138a <_sk_store_a8_hsw+0x37>
1367  DB  196,66,121,48,192                   ; vpmovzxbw     %xmm8,%xmm8
1368  DB  65,15,182,192                       ; movzbl        %r8b,%eax
1369  DB  76,141,5,66,0,0,0                   ; lea           0x42(%rip),%r8        # 13f0 <_sk_store_a8_hsw+0x9d>
1370  DB  73,99,4,128                         ; movslq        (%r8,%rax,4),%rax
1371  DB  76,1,192                            ; add           %r8,%rax
1372  DB  255,224                             ; jmpq          *%rax
1373  DB  196,67,121,20,68,57,6,12            ; vpextrb       $0xc,%xmm8,0x6(%r9,%rdi,1)
1374  DB  196,67,121,20,68,57,5,10            ; vpextrb       $0xa,%xmm8,0x5(%r9,%rdi,1)
1375  DB  196,67,121,20,68,57,4,8             ; vpextrb       $0x8,%xmm8,0x4(%r9,%rdi,1)
1376  DB  196,67,121,20,68,57,3,6             ; vpextrb       $0x6,%xmm8,0x3(%r9,%rdi,1)
1377  DB  196,67,121,20,68,57,2,4             ; vpextrb       $0x4,%xmm8,0x2(%r9,%rdi,1)
1378  DB  196,67,121,20,68,57,1,2             ; vpextrb       $0x2,%xmm8,0x1(%r9,%rdi,1)
1379  DB  196,67,121,20,4,57,0                ; vpextrb       $0x0,%xmm8,(%r9,%rdi,1)
1380  DB  235,154                             ; jmp           138a <_sk_store_a8_hsw+0x37>
1381  DB  247,255                             ; idiv          %edi
1382  DB  255                                 ; (bad)
1383  DB  255                                 ; (bad)
1384  DB  239                                 ; out           %eax,(%dx)
1385  DB  255                                 ; (bad)
1386  DB  255                                 ; (bad)
1387  DB  255,231                             ; jmpq          *%rdi
1388  DB  255                                 ; (bad)
1389  DB  255                                 ; (bad)
1390  DB  255                                 ; (bad)
1391  DB  223,255                             ; (bad)
1392  DB  255                                 ; (bad)
1393  DB  255,215                             ; callq         *%rdi
1394  DB  255                                 ; (bad)
1395  DB  255                                 ; (bad)
1396  DB  255,207                             ; dec           %edi
1397  DB  255                                 ; (bad)
1398  DB  255                                 ; (bad)
1399  DB  255,199                             ; inc           %edi
1400  DB  255                                 ; (bad)
1401  DB  255                                 ; (bad)
1402  DB  255                                 ; .byte         0xff
1403
1404PUBLIC _sk_load_g8_hsw
1405_sk_load_g8_hsw LABEL PROC
1406  DB  73,137,200                          ; mov           %rcx,%r8
1407  DB  72,173                              ; lods          %ds:(%rsi),%rax
1408  DB  72,139,0                            ; mov           (%rax),%rax
1409  DB  72,1,248                            ; add           %rdi,%rax
1410  DB  77,133,192                          ; test          %r8,%r8
1411  DB  117,60                              ; jne           1458 <_sk_load_g8_hsw+0x4c>
1412  DB  197,250,126,0                       ; vmovq         (%rax),%xmm0
1413  DB  196,226,125,49,192                  ; vpmovzxbd     %xmm0,%ymm0
1414  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
1415  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
1416  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
1417  DB  196,226,125,88,201                  ; vpbroadcastd  %xmm1,%ymm1
1418  DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
1419  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
1420  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
1421  DB  196,226,125,88,217                  ; vpbroadcastd  %xmm1,%ymm3
1422  DB  72,173                              ; lods          %ds:(%rsi),%rax
1423  DB  76,137,193                          ; mov           %r8,%rcx
1424  DB  197,252,40,200                      ; vmovaps       %ymm0,%ymm1
1425  DB  197,252,40,208                      ; vmovaps       %ymm0,%ymm2
1426  DB  255,224                             ; jmpq          *%rax
1427  DB  49,201                              ; xor           %ecx,%ecx
1428  DB  77,137,194                          ; mov           %r8,%r10
1429  DB  69,49,201                           ; xor           %r9d,%r9d
1430  DB  68,15,182,24                        ; movzbl        (%rax),%r11d
1431  DB  72,255,192                          ; inc           %rax
1432  DB  73,211,227                          ; shl           %cl,%r11
1433  DB  77,9,217                            ; or            %r11,%r9
1434  DB  72,131,193,8                        ; add           $0x8,%rcx
1435  DB  73,255,202                          ; dec           %r10
1436  DB  117,234                             ; jne           1460 <_sk_load_g8_hsw+0x54>
1437  DB  196,193,249,110,193                 ; vmovq         %r9,%xmm0
1438  DB  235,163                             ; jmp           1420 <_sk_load_g8_hsw+0x14>
1439
1440PUBLIC _sk_gather_g8_hsw
1441_sk_gather_g8_hsw LABEL PROC
1442  DB  65,87                               ; push          %r15
1443  DB  65,86                               ; push          %r14
1444  DB  65,84                               ; push          %r12
1445  DB  83                                  ; push          %rbx
1446  DB  72,173                              ; lods          %ds:(%rsi),%rax
1447  DB  76,139,0                            ; mov           (%rax),%r8
1448  DB  197,254,91,201                      ; vcvttps2dq    %ymm1,%ymm1
1449  DB  196,226,125,88,80,16                ; vpbroadcastd  0x10(%rax),%ymm2
1450  DB  196,226,109,64,201                  ; vpmulld       %ymm1,%ymm2,%ymm1
1451  DB  197,254,91,192                      ; vcvttps2dq    %ymm0,%ymm0
1452  DB  197,245,254,192                     ; vpaddd        %ymm0,%ymm1,%ymm0
1453  DB  196,227,249,22,192,1                ; vpextrq       $0x1,%xmm0,%rax
1454  DB  65,137,193                          ; mov           %eax,%r9d
1455  DB  72,193,232,32                       ; shr           $0x20,%rax
1456  DB  196,193,249,126,194                 ; vmovq         %xmm0,%r10
1457  DB  69,137,211                          ; mov           %r10d,%r11d
1458  DB  73,193,234,32                       ; shr           $0x20,%r10
1459  DB  196,227,125,57,192,1                ; vextracti128  $0x1,%ymm0,%xmm0
1460  DB  196,227,249,22,195,1                ; vpextrq       $0x1,%xmm0,%rbx
1461  DB  65,137,222                          ; mov           %ebx,%r14d
1462  DB  72,193,235,32                       ; shr           $0x20,%rbx
1463  DB  196,193,249,126,199                 ; vmovq         %xmm0,%r15
1464  DB  69,137,252                          ; mov           %r15d,%r12d
1465  DB  73,193,239,32                       ; shr           $0x20,%r15
1466  DB  196,131,121,32,4,24,0               ; vpinsrb       $0x0,(%r8,%r11,1),%xmm0,%xmm0
1467  DB  196,131,121,32,4,16,1               ; vpinsrb       $0x1,(%r8,%r10,1),%xmm0,%xmm0
1468  DB  71,15,182,12,8                      ; movzbl        (%r8,%r9,1),%r9d
1469  DB  196,195,121,32,193,2                ; vpinsrb       $0x2,%r9d,%xmm0,%xmm0
1470  DB  65,15,182,4,0                       ; movzbl        (%r8,%rax,1),%eax
1471  DB  196,227,121,32,192,3                ; vpinsrb       $0x3,%eax,%xmm0,%xmm0
1472  DB  67,15,182,4,32                      ; movzbl        (%r8,%r12,1),%eax
1473  DB  196,227,121,32,192,4                ; vpinsrb       $0x4,%eax,%xmm0,%xmm0
1474  DB  67,15,182,4,56                      ; movzbl        (%r8,%r15,1),%eax
1475  DB  196,227,121,32,192,5                ; vpinsrb       $0x5,%eax,%xmm0,%xmm0
1476  DB  67,15,182,4,48                      ; movzbl        (%r8,%r14,1),%eax
1477  DB  196,227,121,32,192,6                ; vpinsrb       $0x6,%eax,%xmm0,%xmm0
1478  DB  65,15,182,4,24                      ; movzbl        (%r8,%rbx,1),%eax
1479  DB  196,227,121,32,192,7                ; vpinsrb       $0x7,%eax,%xmm0,%xmm0
1480  DB  196,226,125,49,192                  ; vpmovzxbd     %xmm0,%ymm0
1481  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
1482  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
1483  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
1484  DB  196,226,125,88,201                  ; vpbroadcastd  %xmm1,%ymm1
1485  DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
1486  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
1487  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
1488  DB  196,226,125,88,217                  ; vpbroadcastd  %xmm1,%ymm3
1489  DB  72,173                              ; lods          %ds:(%rsi),%rax
1490  DB  197,252,40,200                      ; vmovaps       %ymm0,%ymm1
1491  DB  197,252,40,208                      ; vmovaps       %ymm0,%ymm2
1492  DB  91                                  ; pop           %rbx
1493  DB  65,92                               ; pop           %r12
1494  DB  65,94                               ; pop           %r14
1495  DB  65,95                               ; pop           %r15
1496  DB  255,224                             ; jmpq          *%rax
1497
1498PUBLIC _sk_gather_i8_hsw
1499_sk_gather_i8_hsw LABEL PROC
1500  DB  72,173                              ; lods          %ds:(%rsi),%rax
1501  DB  73,137,192                          ; mov           %rax,%r8
1502  DB  77,133,192                          ; test          %r8,%r8
1503  DB  116,5                               ; je            1573 <_sk_gather_i8_hsw+0xf>
1504  DB  76,137,192                          ; mov           %r8,%rax
1505  DB  235,2                               ; jmp           1575 <_sk_gather_i8_hsw+0x11>
1506  DB  72,173                              ; lods          %ds:(%rsi),%rax
1507  DB  65,87                               ; push          %r15
1508  DB  65,86                               ; push          %r14
1509  DB  65,85                               ; push          %r13
1510  DB  65,84                               ; push          %r12
1511  DB  83                                  ; push          %rbx
1512  DB  76,139,8                            ; mov           (%rax),%r9
1513  DB  197,254,91,201                      ; vcvttps2dq    %ymm1,%ymm1
1514  DB  196,226,125,88,80,16                ; vpbroadcastd  0x10(%rax),%ymm2
1515  DB  196,226,109,64,201                  ; vpmulld       %ymm1,%ymm2,%ymm1
1516  DB  197,254,91,192                      ; vcvttps2dq    %ymm0,%ymm0
1517  DB  197,245,254,192                     ; vpaddd        %ymm0,%ymm1,%ymm0
1518  DB  196,227,249,22,192,1                ; vpextrq       $0x1,%xmm0,%rax
1519  DB  65,137,194                          ; mov           %eax,%r10d
1520  DB  72,193,232,32                       ; shr           $0x20,%rax
1521  DB  196,193,249,126,195                 ; vmovq         %xmm0,%r11
1522  DB  69,137,222                          ; mov           %r11d,%r14d
1523  DB  73,193,235,32                       ; shr           $0x20,%r11
1524  DB  196,227,125,57,192,1                ; vextracti128  $0x1,%ymm0,%xmm0
1525  DB  196,227,249,22,195,1                ; vpextrq       $0x1,%xmm0,%rbx
1526  DB  65,137,223                          ; mov           %ebx,%r15d
1527  DB  72,193,235,32                       ; shr           $0x20,%rbx
1528  DB  196,193,249,126,196                 ; vmovq         %xmm0,%r12
1529  DB  69,137,229                          ; mov           %r12d,%r13d
1530  DB  73,193,236,32                       ; shr           $0x20,%r12
1531  DB  196,131,121,32,4,49,0               ; vpinsrb       $0x0,(%r9,%r14,1),%xmm0,%xmm0
1532  DB  196,131,121,32,4,25,1               ; vpinsrb       $0x1,(%r9,%r11,1),%xmm0,%xmm0
1533  DB  196,131,121,32,4,17,2               ; vpinsrb       $0x2,(%r9,%r10,1),%xmm0,%xmm0
1534  DB  196,195,121,32,4,1,3                ; vpinsrb       $0x3,(%r9,%rax,1),%xmm0,%xmm0
1535  DB  196,131,121,32,4,41,4               ; vpinsrb       $0x4,(%r9,%r13,1),%xmm0,%xmm0
1536  DB  196,131,121,32,4,33,5               ; vpinsrb       $0x5,(%r9,%r12,1),%xmm0,%xmm0
1537  DB  196,131,121,32,4,57,6               ; vpinsrb       $0x6,(%r9,%r15,1),%xmm0,%xmm0
1538  DB  196,195,121,32,4,25,7               ; vpinsrb       $0x7,(%r9,%rbx,1),%xmm0,%xmm0
1539  DB  196,226,125,49,192                  ; vpmovzxbd     %xmm0,%ymm0
1540  DB  73,139,64,8                         ; mov           0x8(%r8),%rax
1541  DB  197,245,118,201                     ; vpcmpeqd      %ymm1,%ymm1,%ymm1
1542  DB  196,226,117,144,28,128              ; vpgatherdd    %ymm1,(%rax,%ymm0,4),%ymm3
1543  DB  184,255,0,0,0                       ; mov           $0xff,%eax
1544  DB  197,249,110,192                     ; vmovd         %eax,%xmm0
1545  DB  196,226,125,88,208                  ; vpbroadcastd  %xmm0,%ymm2
1546  DB  197,237,219,195                     ; vpand         %ymm3,%ymm2,%ymm0
1547  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
1548  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
1549  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
1550  DB  196,98,125,88,193                   ; vpbroadcastd  %xmm1,%ymm8
1551  DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
1552  DB  197,245,114,211,8                   ; vpsrld        $0x8,%ymm3,%ymm1
1553  DB  197,237,219,201                     ; vpand         %ymm1,%ymm2,%ymm1
1554  DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
1555  DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
1556  DB  197,181,114,211,16                  ; vpsrld        $0x10,%ymm3,%ymm9
1557  DB  196,193,109,219,209                 ; vpand         %ymm9,%ymm2,%ymm2
1558  DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
1559  DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
1560  DB  197,229,114,211,24                  ; vpsrld        $0x18,%ymm3,%ymm3
1561  DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
1562  DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
1563  DB  72,173                              ; lods          %ds:(%rsi),%rax
1564  DB  91                                  ; pop           %rbx
1565  DB  65,92                               ; pop           %r12
1566  DB  65,93                               ; pop           %r13
1567  DB  65,94                               ; pop           %r14
1568  DB  65,95                               ; pop           %r15
1569  DB  255,224                             ; jmpq          *%rax
1570
1571PUBLIC _sk_load_565_hsw
1572_sk_load_565_hsw LABEL PROC
1573  DB  72,173                              ; lods          %ds:(%rsi),%rax
1574  DB  76,139,16                           ; mov           (%rax),%r10
1575  DB  72,133,201                          ; test          %rcx,%rcx
1576  DB  15,133,149,0,0,0                    ; jne           1727 <_sk_load_565_hsw+0xa3>
1577  DB  196,193,122,111,4,122               ; vmovdqu       (%r10,%rdi,2),%xmm0
1578  DB  196,226,125,51,208                  ; vpmovzxwd     %xmm0,%ymm2
1579  DB  184,0,248,0,0                       ; mov           $0xf800,%eax
1580  DB  197,249,110,192                     ; vmovd         %eax,%xmm0
1581  DB  196,226,125,88,192                  ; vpbroadcastd  %xmm0,%ymm0
1582  DB  197,253,219,194                     ; vpand         %ymm2,%ymm0,%ymm0
1583  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
1584  DB  184,8,33,132,55                     ; mov           $0x37842108,%eax
1585  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
1586  DB  196,226,125,88,201                  ; vpbroadcastd  %xmm1,%ymm1
1587  DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
1588  DB  184,224,7,0,0                       ; mov           $0x7e0,%eax
1589  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
1590  DB  196,226,125,88,201                  ; vpbroadcastd  %xmm1,%ymm1
1591  DB  197,245,219,202                     ; vpand         %ymm2,%ymm1,%ymm1
1592  DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
1593  DB  184,33,8,2,58                       ; mov           $0x3a020821,%eax
1594  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
1595  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
1596  DB  197,244,89,203                      ; vmulps        %ymm3,%ymm1,%ymm1
1597  DB  184,31,0,0,0                        ; mov           $0x1f,%eax
1598  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
1599  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
1600  DB  197,229,219,210                     ; vpand         %ymm2,%ymm3,%ymm2
1601  DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
1602  DB  184,8,33,4,61                       ; mov           $0x3d042108,%eax
1603  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
1604  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
1605  DB  197,236,89,211                      ; vmulps        %ymm3,%ymm2,%ymm2
1606  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
1607  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
1608  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
1609  DB  72,173                              ; lods          %ds:(%rsi),%rax
1610  DB  255,224                             ; jmpq          *%rax
1611  DB  65,137,200                          ; mov           %ecx,%r8d
1612  DB  65,128,224,7                        ; and           $0x7,%r8b
1613  DB  197,249,239,192                     ; vpxor         %xmm0,%xmm0,%xmm0
1614  DB  65,254,200                          ; dec           %r8b
1615  DB  65,128,248,6                        ; cmp           $0x6,%r8b
1616  DB  15,135,89,255,255,255               ; ja            1698 <_sk_load_565_hsw+0x14>
1617  DB  69,15,182,192                       ; movzbl        %r8b,%r8d
1618  DB  76,141,13,74,0,0,0                  ; lea           0x4a(%rip),%r9        # 1794 <_sk_load_565_hsw+0x110>
1619  DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
1620  DB  76,1,200                            ; add           %r9,%rax
1621  DB  255,224                             ; jmpq          *%rax
1622  DB  197,249,239,192                     ; vpxor         %xmm0,%xmm0,%xmm0
1623  DB  196,193,121,196,68,122,12,6         ; vpinsrw       $0x6,0xc(%r10,%rdi,2),%xmm0,%xmm0
1624  DB  196,193,121,196,68,122,10,5         ; vpinsrw       $0x5,0xa(%r10,%rdi,2),%xmm0,%xmm0
1625  DB  196,193,121,196,68,122,8,4          ; vpinsrw       $0x4,0x8(%r10,%rdi,2),%xmm0,%xmm0
1626  DB  196,193,121,196,68,122,6,3          ; vpinsrw       $0x3,0x6(%r10,%rdi,2),%xmm0,%xmm0
1627  DB  196,193,121,196,68,122,4,2          ; vpinsrw       $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
1628  DB  196,193,121,196,68,122,2,1          ; vpinsrw       $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
1629  DB  196,193,121,196,4,122,0             ; vpinsrw       $0x0,(%r10,%rdi,2),%xmm0,%xmm0
1630  DB  233,5,255,255,255                   ; jmpq          1698 <_sk_load_565_hsw+0x14>
1631  DB  144                                 ; nop
1632  DB  243,255                             ; repz          (bad)
1633  DB  255                                 ; (bad)
1634  DB  255                                 ; (bad)
1635  DB  235,255                             ; jmp           1799 <_sk_load_565_hsw+0x115>
1636  DB  255                                 ; (bad)
1637  DB  255,227                             ; jmpq          *%rbx
1638  DB  255                                 ; (bad)
1639  DB  255                                 ; (bad)
1640  DB  255                                 ; (bad)
1641  DB  219,255                             ; (bad)
1642  DB  255                                 ; (bad)
1643  DB  255,211                             ; callq         *%rbx
1644  DB  255                                 ; (bad)
1645  DB  255                                 ; (bad)
1646  DB  255,203                             ; dec           %ebx
1647  DB  255                                 ; (bad)
1648  DB  255                                 ; (bad)
1649  DB  255                                 ; (bad)
1650  DB  191                                 ; .byte         0xbf
1651  DB  255                                 ; (bad)
1652  DB  255                                 ; (bad)
1653  DB  255                                 ; .byte         0xff
1654
1655PUBLIC _sk_gather_565_hsw
1656_sk_gather_565_hsw LABEL PROC
1657  DB  65,87                               ; push          %r15
1658  DB  65,86                               ; push          %r14
1659  DB  65,84                               ; push          %r12
1660  DB  83                                  ; push          %rbx
1661  DB  72,173                              ; lods          %ds:(%rsi),%rax
1662  DB  76,139,0                            ; mov           (%rax),%r8
1663  DB  197,254,91,201                      ; vcvttps2dq    %ymm1,%ymm1
1664  DB  196,226,125,88,80,16                ; vpbroadcastd  0x10(%rax),%ymm2
1665  DB  196,226,109,64,201                  ; vpmulld       %ymm1,%ymm2,%ymm1
1666  DB  197,254,91,192                      ; vcvttps2dq    %ymm0,%ymm0
1667  DB  197,245,254,192                     ; vpaddd        %ymm0,%ymm1,%ymm0
1668  DB  196,227,249,22,192,1                ; vpextrq       $0x1,%xmm0,%rax
1669  DB  65,137,193                          ; mov           %eax,%r9d
1670  DB  72,193,232,32                       ; shr           $0x20,%rax
1671  DB  196,193,249,126,194                 ; vmovq         %xmm0,%r10
1672  DB  69,137,211                          ; mov           %r10d,%r11d
1673  DB  73,193,234,32                       ; shr           $0x20,%r10
1674  DB  196,227,125,57,192,1                ; vextracti128  $0x1,%ymm0,%xmm0
1675  DB  196,227,249,22,195,1                ; vpextrq       $0x1,%xmm0,%rbx
1676  DB  65,137,222                          ; mov           %ebx,%r14d
1677  DB  72,193,235,32                       ; shr           $0x20,%rbx
1678  DB  196,193,249,126,199                 ; vmovq         %xmm0,%r15
1679  DB  69,137,252                          ; mov           %r15d,%r12d
1680  DB  73,193,239,32                       ; shr           $0x20,%r15
1681  DB  71,15,183,20,80                     ; movzwl        (%r8,%r10,2),%r10d
1682  DB  71,15,183,28,88                     ; movzwl        (%r8,%r11,2),%r11d
1683  DB  196,193,121,110,195                 ; vmovd         %r11d,%xmm0
1684  DB  196,193,121,196,194,1               ; vpinsrw       $0x1,%r10d,%xmm0,%xmm0
1685  DB  71,15,183,12,72                     ; movzwl        (%r8,%r9,2),%r9d
1686  DB  196,193,121,196,193,2               ; vpinsrw       $0x2,%r9d,%xmm0,%xmm0
1687  DB  65,15,183,4,64                      ; movzwl        (%r8,%rax,2),%eax
1688  DB  197,249,196,192,3                   ; vpinsrw       $0x3,%eax,%xmm0,%xmm0
1689  DB  67,15,183,4,96                      ; movzwl        (%r8,%r12,2),%eax
1690  DB  197,249,196,192,4                   ; vpinsrw       $0x4,%eax,%xmm0,%xmm0
1691  DB  67,15,183,4,120                     ; movzwl        (%r8,%r15,2),%eax
1692  DB  197,249,196,192,5                   ; vpinsrw       $0x5,%eax,%xmm0,%xmm0
1693  DB  67,15,183,4,112                     ; movzwl        (%r8,%r14,2),%eax
1694  DB  197,249,196,192,6                   ; vpinsrw       $0x6,%eax,%xmm0,%xmm0
1695  DB  65,15,183,4,88                      ; movzwl        (%r8,%rbx,2),%eax
1696  DB  197,249,196,192,7                   ; vpinsrw       $0x7,%eax,%xmm0,%xmm0
1697  DB  196,226,125,51,208                  ; vpmovzxwd     %xmm0,%ymm2
1698  DB  184,0,248,0,0                       ; mov           $0xf800,%eax
1699  DB  197,249,110,192                     ; vmovd         %eax,%xmm0
1700  DB  196,226,125,88,192                  ; vpbroadcastd  %xmm0,%ymm0
1701  DB  197,253,219,194                     ; vpand         %ymm2,%ymm0,%ymm0
1702  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
1703  DB  184,8,33,132,55                     ; mov           $0x37842108,%eax
1704  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
1705  DB  196,226,125,88,201                  ; vpbroadcastd  %xmm1,%ymm1
1706  DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
1707  DB  184,224,7,0,0                       ; mov           $0x7e0,%eax
1708  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
1709  DB  196,226,125,88,201                  ; vpbroadcastd  %xmm1,%ymm1
1710  DB  197,245,219,202                     ; vpand         %ymm2,%ymm1,%ymm1
1711  DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
1712  DB  184,33,8,2,58                       ; mov           $0x3a020821,%eax
1713  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
1714  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
1715  DB  197,244,89,203                      ; vmulps        %ymm3,%ymm1,%ymm1
1716  DB  184,31,0,0,0                        ; mov           $0x1f,%eax
1717  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
1718  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
1719  DB  197,229,219,210                     ; vpand         %ymm2,%ymm3,%ymm2
1720  DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
1721  DB  184,8,33,4,61                       ; mov           $0x3d042108,%eax
1722  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
1723  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
1724  DB  197,236,89,211                      ; vmulps        %ymm3,%ymm2,%ymm2
1725  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
1726  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
1727  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
1728  DB  72,173                              ; lods          %ds:(%rsi),%rax
1729  DB  91                                  ; pop           %rbx
1730  DB  65,92                               ; pop           %r12
1731  DB  65,94                               ; pop           %r14
1732  DB  65,95                               ; pop           %r15
1733  DB  255,224                             ; jmpq          *%rax
1734
1735PUBLIC _sk_store_565_hsw
1736_sk_store_565_hsw LABEL PROC
1737  DB  72,173                              ; lods          %ds:(%rsi),%rax
1738  DB  76,139,8                            ; mov           (%rax),%r9
1739  DB  184,0,0,248,65                      ; mov           $0x41f80000,%eax
1740  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
1741  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
1742  DB  197,60,89,200                       ; vmulps        %ymm0,%ymm8,%ymm9
1743  DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
1744  DB  196,193,53,114,241,11               ; vpslld        $0xb,%ymm9,%ymm9
1745  DB  184,0,0,124,66                      ; mov           $0x427c0000,%eax
1746  DB  197,121,110,208                     ; vmovd         %eax,%xmm10
1747  DB  196,66,125,88,210                   ; vpbroadcastd  %xmm10,%ymm10
1748  DB  197,44,89,209                       ; vmulps        %ymm1,%ymm10,%ymm10
1749  DB  196,65,125,91,210                   ; vcvtps2dq     %ymm10,%ymm10
1750  DB  196,193,45,114,242,5                ; vpslld        $0x5,%ymm10,%ymm10
1751  DB  196,65,45,235,201                   ; vpor          %ymm9,%ymm10,%ymm9
1752  DB  197,60,89,194                       ; vmulps        %ymm2,%ymm8,%ymm8
1753  DB  196,65,125,91,192                   ; vcvtps2dq     %ymm8,%ymm8
1754  DB  196,65,53,235,192                   ; vpor          %ymm8,%ymm9,%ymm8
1755  DB  196,67,125,57,193,1                 ; vextracti128  $0x1,%ymm8,%xmm9
1756  DB  196,66,57,43,193                    ; vpackusdw     %xmm9,%xmm8,%xmm8
1757  DB  72,133,201                          ; test          %rcx,%rcx
1758  DB  117,10                              ; jne           195f <_sk_store_565_hsw+0x6c>
1759  DB  196,65,122,127,4,121                ; vmovdqu       %xmm8,(%r9,%rdi,2)
1760  DB  72,173                              ; lods          %ds:(%rsi),%rax
1761  DB  255,224                             ; jmpq          *%rax
1762  DB  65,137,200                          ; mov           %ecx,%r8d
1763  DB  65,128,224,7                        ; and           $0x7,%r8b
1764  DB  65,254,200                          ; dec           %r8b
1765  DB  65,128,248,6                        ; cmp           $0x6,%r8b
1766  DB  119,236                             ; ja            195b <_sk_store_565_hsw+0x68>
1767  DB  65,15,182,192                       ; movzbl        %r8b,%eax
1768  DB  76,141,5,66,0,0,0                   ; lea           0x42(%rip),%r8        # 19bc <_sk_store_565_hsw+0xc9>
1769  DB  73,99,4,128                         ; movslq        (%r8,%rax,4),%rax
1770  DB  76,1,192                            ; add           %r8,%rax
1771  DB  255,224                             ; jmpq          *%rax
1772  DB  196,67,121,21,68,121,12,6           ; vpextrw       $0x6,%xmm8,0xc(%r9,%rdi,2)
1773  DB  196,67,121,21,68,121,10,5           ; vpextrw       $0x5,%xmm8,0xa(%r9,%rdi,2)
1774  DB  196,67,121,21,68,121,8,4            ; vpextrw       $0x4,%xmm8,0x8(%r9,%rdi,2)
1775  DB  196,67,121,21,68,121,6,3            ; vpextrw       $0x3,%xmm8,0x6(%r9,%rdi,2)
1776  DB  196,67,121,21,68,121,4,2            ; vpextrw       $0x2,%xmm8,0x4(%r9,%rdi,2)
1777  DB  196,67,121,21,68,121,2,1            ; vpextrw       $0x1,%xmm8,0x2(%r9,%rdi,2)
1778  DB  196,67,121,21,4,121,0               ; vpextrw       $0x0,%xmm8,(%r9,%rdi,2)
1779  DB  235,159                             ; jmp           195b <_sk_store_565_hsw+0x68>
1780  DB  247,255                             ; idiv          %edi
1781  DB  255                                 ; (bad)
1782  DB  255                                 ; (bad)
1783  DB  239                                 ; out           %eax,(%dx)
1784  DB  255                                 ; (bad)
1785  DB  255                                 ; (bad)
1786  DB  255,231                             ; jmpq          *%rdi
1787  DB  255                                 ; (bad)
1788  DB  255                                 ; (bad)
1789  DB  255                                 ; (bad)
1790  DB  223,255                             ; (bad)
1791  DB  255                                 ; (bad)
1792  DB  255,215                             ; callq         *%rdi
1793  DB  255                                 ; (bad)
1794  DB  255                                 ; (bad)
1795  DB  255,207                             ; dec           %edi
1796  DB  255                                 ; (bad)
1797  DB  255                                 ; (bad)
1798  DB  255,199                             ; inc           %edi
1799  DB  255                                 ; (bad)
1800  DB  255                                 ; (bad)
1801  DB  255                                 ; .byte         0xff
1802
1803PUBLIC _sk_load_4444_hsw
1804_sk_load_4444_hsw LABEL PROC
1805  DB  72,173                              ; lods          %ds:(%rsi),%rax
1806  DB  76,139,16                           ; mov           (%rax),%r10
1807  DB  72,133,201                          ; test          %rcx,%rcx
1808  DB  15,133,179,0,0,0                    ; jne           1a99 <_sk_load_4444_hsw+0xc1>
1809  DB  196,193,122,111,4,122               ; vmovdqu       (%r10,%rdi,2),%xmm0
1810  DB  196,98,125,51,200                   ; vpmovzxwd     %xmm0,%ymm9
1811  DB  184,0,240,0,0                       ; mov           $0xf000,%eax
1812  DB  197,249,110,192                     ; vmovd         %eax,%xmm0
1813  DB  196,226,125,88,192                  ; vpbroadcastd  %xmm0,%ymm0
1814  DB  196,193,125,219,193                 ; vpand         %ymm9,%ymm0,%ymm0
1815  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
1816  DB  184,137,136,136,55                  ; mov           $0x37888889,%eax
1817  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
1818  DB  196,226,125,88,201                  ; vpbroadcastd  %xmm1,%ymm1
1819  DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
1820  DB  184,0,15,0,0                        ; mov           $0xf00,%eax
1821  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
1822  DB  196,226,125,88,201                  ; vpbroadcastd  %xmm1,%ymm1
1823  DB  196,193,117,219,201                 ; vpand         %ymm9,%ymm1,%ymm1
1824  DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
1825  DB  184,137,136,136,57                  ; mov           $0x39888889,%eax
1826  DB  197,249,110,208                     ; vmovd         %eax,%xmm2
1827  DB  196,226,125,88,210                  ; vpbroadcastd  %xmm2,%ymm2
1828  DB  197,244,89,202                      ; vmulps        %ymm2,%ymm1,%ymm1
1829  DB  184,240,0,0,0                       ; mov           $0xf0,%eax
1830  DB  197,249,110,208                     ; vmovd         %eax,%xmm2
1831  DB  196,226,125,88,210                  ; vpbroadcastd  %xmm2,%ymm2
1832  DB  196,193,109,219,209                 ; vpand         %ymm9,%ymm2,%ymm2
1833  DB  197,124,91,194                      ; vcvtdq2ps     %ymm2,%ymm8
1834  DB  184,137,136,136,59                  ; mov           $0x3b888889,%eax
1835  DB  197,249,110,208                     ; vmovd         %eax,%xmm2
1836  DB  196,226,125,88,210                  ; vpbroadcastd  %xmm2,%ymm2
1837  DB  197,188,89,210                      ; vmulps        %ymm2,%ymm8,%ymm2
1838  DB  184,15,0,0,0                        ; mov           $0xf,%eax
1839  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
1840  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
1841  DB  196,193,101,219,217                 ; vpand         %ymm9,%ymm3,%ymm3
1842  DB  197,124,91,195                      ; vcvtdq2ps     %ymm3,%ymm8
1843  DB  184,137,136,136,61                  ; mov           $0x3d888889,%eax
1844  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
1845  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
1846  DB  197,188,89,219                      ; vmulps        %ymm3,%ymm8,%ymm3
1847  DB  72,173                              ; lods          %ds:(%rsi),%rax
1848  DB  255,224                             ; jmpq          *%rax
1849  DB  65,137,200                          ; mov           %ecx,%r8d
1850  DB  65,128,224,7                        ; and           $0x7,%r8b
1851  DB  197,249,239,192                     ; vpxor         %xmm0,%xmm0,%xmm0
1852  DB  65,254,200                          ; dec           %r8b
1853  DB  65,128,248,6                        ; cmp           $0x6,%r8b
1854  DB  15,135,59,255,255,255               ; ja            19ec <_sk_load_4444_hsw+0x14>
1855  DB  69,15,182,192                       ; movzbl        %r8b,%r8d
1856  DB  76,141,13,76,0,0,0                  ; lea           0x4c(%rip),%r9        # 1b08 <_sk_load_4444_hsw+0x130>
1857  DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
1858  DB  76,1,200                            ; add           %r9,%rax
1859  DB  255,224                             ; jmpq          *%rax
1860  DB  197,249,239,192                     ; vpxor         %xmm0,%xmm0,%xmm0
1861  DB  196,193,121,196,68,122,12,6         ; vpinsrw       $0x6,0xc(%r10,%rdi,2),%xmm0,%xmm0
1862  DB  196,193,121,196,68,122,10,5         ; vpinsrw       $0x5,0xa(%r10,%rdi,2),%xmm0,%xmm0
1863  DB  196,193,121,196,68,122,8,4          ; vpinsrw       $0x4,0x8(%r10,%rdi,2),%xmm0,%xmm0
1864  DB  196,193,121,196,68,122,6,3          ; vpinsrw       $0x3,0x6(%r10,%rdi,2),%xmm0,%xmm0
1865  DB  196,193,121,196,68,122,4,2          ; vpinsrw       $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
1866  DB  196,193,121,196,68,122,2,1          ; vpinsrw       $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
1867  DB  196,193,121,196,4,122,0             ; vpinsrw       $0x0,(%r10,%rdi,2),%xmm0,%xmm0
1868  DB  233,231,254,255,255                 ; jmpq          19ec <_sk_load_4444_hsw+0x14>
1869  DB  15,31,0                             ; nopl          (%rax)
1870  DB  241                                 ; icebp
1871  DB  255                                 ; (bad)
1872  DB  255                                 ; (bad)
1873  DB  255                                 ; (bad)
1874  DB  233,255,255,255,225                 ; jmpq          ffffffffe2001b10 <_sk_linear_gradient_2stops_hsw+0xffffffffe1fff2bc>
1875  DB  255                                 ; (bad)
1876  DB  255                                 ; (bad)
1877  DB  255                                 ; (bad)
1878  DB  217,255                             ; fcos
1879  DB  255                                 ; (bad)
1880  DB  255,209                             ; callq         *%rcx
1881  DB  255                                 ; (bad)
1882  DB  255                                 ; (bad)
1883  DB  255,201                             ; dec           %ecx
1884  DB  255                                 ; (bad)
1885  DB  255                                 ; (bad)
1886  DB  255                                 ; (bad)
1887  DB  189                                 ; .byte         0xbd
1888  DB  255                                 ; (bad)
1889  DB  255                                 ; (bad)
1890  DB  255                                 ; .byte         0xff
1891
1892PUBLIC _sk_gather_4444_hsw
1893_sk_gather_4444_hsw LABEL PROC
1894  DB  65,87                               ; push          %r15
1895  DB  65,86                               ; push          %r14
1896  DB  65,84                               ; push          %r12
1897  DB  83                                  ; push          %rbx
1898  DB  72,173                              ; lods          %ds:(%rsi),%rax
1899  DB  76,139,0                            ; mov           (%rax),%r8
1900  DB  197,254,91,201                      ; vcvttps2dq    %ymm1,%ymm1
1901  DB  196,226,125,88,80,16                ; vpbroadcastd  0x10(%rax),%ymm2
1902  DB  196,226,109,64,201                  ; vpmulld       %ymm1,%ymm2,%ymm1
1903  DB  197,254,91,192                      ; vcvttps2dq    %ymm0,%ymm0
1904  DB  197,245,254,192                     ; vpaddd        %ymm0,%ymm1,%ymm0
1905  DB  196,227,249,22,192,1                ; vpextrq       $0x1,%xmm0,%rax
1906  DB  65,137,193                          ; mov           %eax,%r9d
1907  DB  72,193,232,32                       ; shr           $0x20,%rax
1908  DB  196,193,249,126,194                 ; vmovq         %xmm0,%r10
1909  DB  69,137,211                          ; mov           %r10d,%r11d
1910  DB  73,193,234,32                       ; shr           $0x20,%r10
1911  DB  196,227,125,57,192,1                ; vextracti128  $0x1,%ymm0,%xmm0
1912  DB  196,227,249,22,195,1                ; vpextrq       $0x1,%xmm0,%rbx
1913  DB  65,137,222                          ; mov           %ebx,%r14d
1914  DB  72,193,235,32                       ; shr           $0x20,%rbx
1915  DB  196,193,249,126,199                 ; vmovq         %xmm0,%r15
1916  DB  69,137,252                          ; mov           %r15d,%r12d
1917  DB  73,193,239,32                       ; shr           $0x20,%r15
1918  DB  71,15,183,20,80                     ; movzwl        (%r8,%r10,2),%r10d
1919  DB  71,15,183,28,88                     ; movzwl        (%r8,%r11,2),%r11d
1920  DB  196,193,121,110,195                 ; vmovd         %r11d,%xmm0
1921  DB  196,193,121,196,194,1               ; vpinsrw       $0x1,%r10d,%xmm0,%xmm0
1922  DB  71,15,183,12,72                     ; movzwl        (%r8,%r9,2),%r9d
1923  DB  196,193,121,196,193,2               ; vpinsrw       $0x2,%r9d,%xmm0,%xmm0
1924  DB  65,15,183,4,64                      ; movzwl        (%r8,%rax,2),%eax
1925  DB  197,249,196,192,3                   ; vpinsrw       $0x3,%eax,%xmm0,%xmm0
1926  DB  67,15,183,4,96                      ; movzwl        (%r8,%r12,2),%eax
1927  DB  197,249,196,192,4                   ; vpinsrw       $0x4,%eax,%xmm0,%xmm0
1928  DB  67,15,183,4,120                     ; movzwl        (%r8,%r15,2),%eax
1929  DB  197,249,196,192,5                   ; vpinsrw       $0x5,%eax,%xmm0,%xmm0
1930  DB  67,15,183,4,112                     ; movzwl        (%r8,%r14,2),%eax
1931  DB  197,249,196,192,6                   ; vpinsrw       $0x6,%eax,%xmm0,%xmm0
1932  DB  65,15,183,4,88                      ; movzwl        (%r8,%rbx,2),%eax
1933  DB  197,249,196,192,7                   ; vpinsrw       $0x7,%eax,%xmm0,%xmm0
1934  DB  196,98,125,51,200                   ; vpmovzxwd     %xmm0,%ymm9
1935  DB  184,0,240,0,0                       ; mov           $0xf000,%eax
1936  DB  197,249,110,192                     ; vmovd         %eax,%xmm0
1937  DB  196,226,125,88,192                  ; vpbroadcastd  %xmm0,%ymm0
1938  DB  196,193,125,219,193                 ; vpand         %ymm9,%ymm0,%ymm0
1939  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
1940  DB  184,137,136,136,55                  ; mov           $0x37888889,%eax
1941  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
1942  DB  196,226,125,88,201                  ; vpbroadcastd  %xmm1,%ymm1
1943  DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
1944  DB  184,0,15,0,0                        ; mov           $0xf00,%eax
1945  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
1946  DB  196,226,125,88,201                  ; vpbroadcastd  %xmm1,%ymm1
1947  DB  196,193,117,219,201                 ; vpand         %ymm9,%ymm1,%ymm1
1948  DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
1949  DB  184,137,136,136,57                  ; mov           $0x39888889,%eax
1950  DB  197,249,110,208                     ; vmovd         %eax,%xmm2
1951  DB  196,226,125,88,210                  ; vpbroadcastd  %xmm2,%ymm2
1952  DB  197,244,89,202                      ; vmulps        %ymm2,%ymm1,%ymm1
1953  DB  184,240,0,0,0                       ; mov           $0xf0,%eax
1954  DB  197,249,110,208                     ; vmovd         %eax,%xmm2
1955  DB  196,226,125,88,210                  ; vpbroadcastd  %xmm2,%ymm2
1956  DB  196,193,109,219,209                 ; vpand         %ymm9,%ymm2,%ymm2
1957  DB  197,124,91,194                      ; vcvtdq2ps     %ymm2,%ymm8
1958  DB  184,137,136,136,59                  ; mov           $0x3b888889,%eax
1959  DB  197,249,110,208                     ; vmovd         %eax,%xmm2
1960  DB  196,226,125,88,210                  ; vpbroadcastd  %xmm2,%ymm2
1961  DB  197,188,89,210                      ; vmulps        %ymm2,%ymm8,%ymm2
1962  DB  184,15,0,0,0                        ; mov           $0xf,%eax
1963  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
1964  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
1965  DB  196,193,101,219,217                 ; vpand         %ymm9,%ymm3,%ymm3
1966  DB  197,124,91,195                      ; vcvtdq2ps     %ymm3,%ymm8
1967  DB  184,137,136,136,61                  ; mov           $0x3d888889,%eax
1968  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
1969  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
1970  DB  197,188,89,219                      ; vmulps        %ymm3,%ymm8,%ymm3
1971  DB  72,173                              ; lods          %ds:(%rsi),%rax
1972  DB  91                                  ; pop           %rbx
1973  DB  65,92                               ; pop           %r12
1974  DB  65,94                               ; pop           %r14
1975  DB  65,95                               ; pop           %r15
1976  DB  255,224                             ; jmpq          *%rax
1977
1978PUBLIC _sk_store_4444_hsw
1979_sk_store_4444_hsw LABEL PROC
1980  DB  72,173                              ; lods          %ds:(%rsi),%rax
1981  DB  76,139,8                            ; mov           (%rax),%r9
1982  DB  184,0,0,112,65                      ; mov           $0x41700000,%eax
1983  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
1984  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
1985  DB  197,60,89,200                       ; vmulps        %ymm0,%ymm8,%ymm9
1986  DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
1987  DB  196,193,53,114,241,12               ; vpslld        $0xc,%ymm9,%ymm9
1988  DB  197,60,89,209                       ; vmulps        %ymm1,%ymm8,%ymm10
1989  DB  196,65,125,91,210                   ; vcvtps2dq     %ymm10,%ymm10
1990  DB  196,193,45,114,242,8                ; vpslld        $0x8,%ymm10,%ymm10
1991  DB  196,65,45,235,201                   ; vpor          %ymm9,%ymm10,%ymm9
1992  DB  197,60,89,210                       ; vmulps        %ymm2,%ymm8,%ymm10
1993  DB  196,65,125,91,210                   ; vcvtps2dq     %ymm10,%ymm10
1994  DB  196,193,45,114,242,4                ; vpslld        $0x4,%ymm10,%ymm10
1995  DB  197,60,89,195                       ; vmulps        %ymm3,%ymm8,%ymm8
1996  DB  196,65,125,91,192                   ; vcvtps2dq     %ymm8,%ymm8
1997  DB  196,65,45,235,192                   ; vpor          %ymm8,%ymm10,%ymm8
1998  DB  196,65,53,235,192                   ; vpor          %ymm8,%ymm9,%ymm8
1999  DB  196,67,125,57,193,1                 ; vextracti128  $0x1,%ymm8,%xmm9
2000  DB  196,66,57,43,193                    ; vpackusdw     %xmm9,%xmm8,%xmm8
2001  DB  72,133,201                          ; test          %rcx,%rcx
2002  DB  117,10                              ; jne           1cf7 <_sk_store_4444_hsw+0x72>
2003  DB  196,65,122,127,4,121                ; vmovdqu       %xmm8,(%r9,%rdi,2)
2004  DB  72,173                              ; lods          %ds:(%rsi),%rax
2005  DB  255,224                             ; jmpq          *%rax
2006  DB  65,137,200                          ; mov           %ecx,%r8d
2007  DB  65,128,224,7                        ; and           $0x7,%r8b
2008  DB  65,254,200                          ; dec           %r8b
2009  DB  65,128,248,6                        ; cmp           $0x6,%r8b
2010  DB  119,236                             ; ja            1cf3 <_sk_store_4444_hsw+0x6e>
2011  DB  65,15,182,192                       ; movzbl        %r8b,%eax
2012  DB  76,141,5,66,0,0,0                   ; lea           0x42(%rip),%r8        # 1d54 <_sk_store_4444_hsw+0xcf>
2013  DB  73,99,4,128                         ; movslq        (%r8,%rax,4),%rax
2014  DB  76,1,192                            ; add           %r8,%rax
2015  DB  255,224                             ; jmpq          *%rax
2016  DB  196,67,121,21,68,121,12,6           ; vpextrw       $0x6,%xmm8,0xc(%r9,%rdi,2)
2017  DB  196,67,121,21,68,121,10,5           ; vpextrw       $0x5,%xmm8,0xa(%r9,%rdi,2)
2018  DB  196,67,121,21,68,121,8,4            ; vpextrw       $0x4,%xmm8,0x8(%r9,%rdi,2)
2019  DB  196,67,121,21,68,121,6,3            ; vpextrw       $0x3,%xmm8,0x6(%r9,%rdi,2)
2020  DB  196,67,121,21,68,121,4,2            ; vpextrw       $0x2,%xmm8,0x4(%r9,%rdi,2)
2021  DB  196,67,121,21,68,121,2,1            ; vpextrw       $0x1,%xmm8,0x2(%r9,%rdi,2)
2022  DB  196,67,121,21,4,121,0               ; vpextrw       $0x0,%xmm8,(%r9,%rdi,2)
2023  DB  235,159                             ; jmp           1cf3 <_sk_store_4444_hsw+0x6e>
2024  DB  247,255                             ; idiv          %edi
2025  DB  255                                 ; (bad)
2026  DB  255                                 ; (bad)
2027  DB  239                                 ; out           %eax,(%dx)
2028  DB  255                                 ; (bad)
2029  DB  255                                 ; (bad)
2030  DB  255,231                             ; jmpq          *%rdi
2031  DB  255                                 ; (bad)
2032  DB  255                                 ; (bad)
2033  DB  255                                 ; (bad)
2034  DB  223,255                             ; (bad)
2035  DB  255                                 ; (bad)
2036  DB  255,215                             ; callq         *%rdi
2037  DB  255                                 ; (bad)
2038  DB  255                                 ; (bad)
2039  DB  255,207                             ; dec           %edi
2040  DB  255                                 ; (bad)
2041  DB  255                                 ; (bad)
2042  DB  255,199                             ; inc           %edi
2043  DB  255                                 ; (bad)
2044  DB  255                                 ; (bad)
2045  DB  255                                 ; .byte         0xff
2046
2047PUBLIC _sk_load_8888_hsw
2048_sk_load_8888_hsw LABEL PROC
2049  DB  73,137,200                          ; mov           %rcx,%r8
2050  DB  72,173                              ; lods          %ds:(%rsi),%rax
2051  DB  76,141,12,189,0,0,0,0               ; lea           0x0(,%rdi,4),%r9
2052  DB  76,3,8                              ; add           (%rax),%r9
2053  DB  77,133,192                          ; test          %r8,%r8
2054  DB  117,104                             ; jne           1ded <_sk_load_8888_hsw+0x7d>
2055  DB  196,193,126,111,25                  ; vmovdqu       (%r9),%ymm3
2056  DB  184,255,0,0,0                       ; mov           $0xff,%eax
2057  DB  197,249,110,192                     ; vmovd         %eax,%xmm0
2058  DB  196,226,125,88,208                  ; vpbroadcastd  %xmm0,%ymm2
2059  DB  197,237,219,195                     ; vpand         %ymm3,%ymm2,%ymm0
2060  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
2061  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
2062  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
2063  DB  196,98,125,88,193                   ; vpbroadcastd  %xmm1,%ymm8
2064  DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
2065  DB  197,245,114,211,8                   ; vpsrld        $0x8,%ymm3,%ymm1
2066  DB  197,237,219,201                     ; vpand         %ymm1,%ymm2,%ymm1
2067  DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
2068  DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
2069  DB  197,181,114,211,16                  ; vpsrld        $0x10,%ymm3,%ymm9
2070  DB  196,193,109,219,209                 ; vpand         %ymm9,%ymm2,%ymm2
2071  DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
2072  DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
2073  DB  197,229,114,211,24                  ; vpsrld        $0x18,%ymm3,%ymm3
2074  DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
2075  DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
2076  DB  72,173                              ; lods          %ds:(%rsi),%rax
2077  DB  76,137,193                          ; mov           %r8,%rcx
2078  DB  255,224                             ; jmpq          *%rax
2079  DB  185,8,0,0,0                         ; mov           $0x8,%ecx
2080  DB  68,41,193                           ; sub           %r8d,%ecx
2081  DB  192,225,3                           ; shl           $0x3,%cl
2082  DB  72,199,192,255,255,255,255          ; mov           $0xffffffffffffffff,%rax
2083  DB  72,211,232                          ; shr           %cl,%rax
2084  DB  196,225,249,110,192                 ; vmovq         %rax,%xmm0
2085  DB  196,226,125,33,192                  ; vpmovsxbd     %xmm0,%ymm0
2086  DB  196,194,125,140,25                  ; vpmaskmovd    (%r9),%ymm0,%ymm3
2087  DB  233,116,255,255,255                 ; jmpq          1d8a <_sk_load_8888_hsw+0x1a>
2088
2089PUBLIC _sk_gather_8888_hsw
2090_sk_gather_8888_hsw LABEL PROC
2091  DB  72,173                              ; lods          %ds:(%rsi),%rax
2092  DB  76,139,0                            ; mov           (%rax),%r8
2093  DB  197,254,91,201                      ; vcvttps2dq    %ymm1,%ymm1
2094  DB  196,226,125,88,80,16                ; vpbroadcastd  0x10(%rax),%ymm2
2095  DB  196,226,109,64,201                  ; vpmulld       %ymm1,%ymm2,%ymm1
2096  DB  197,254,91,192                      ; vcvttps2dq    %ymm0,%ymm0
2097  DB  197,245,254,192                     ; vpaddd        %ymm0,%ymm1,%ymm0
2098  DB  197,245,118,201                     ; vpcmpeqd      %ymm1,%ymm1,%ymm1
2099  DB  196,194,117,144,28,128              ; vpgatherdd    %ymm1,(%r8,%ymm0,4),%ymm3
2100  DB  184,255,0,0,0                       ; mov           $0xff,%eax
2101  DB  197,249,110,192                     ; vmovd         %eax,%xmm0
2102  DB  196,226,125,88,208                  ; vpbroadcastd  %xmm0,%ymm2
2103  DB  197,237,219,195                     ; vpand         %ymm3,%ymm2,%ymm0
2104  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
2105  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
2106  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
2107  DB  196,98,125,88,193                   ; vpbroadcastd  %xmm1,%ymm8
2108  DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
2109  DB  197,245,114,211,8                   ; vpsrld        $0x8,%ymm3,%ymm1
2110  DB  197,237,219,201                     ; vpand         %ymm1,%ymm2,%ymm1
2111  DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
2112  DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
2113  DB  197,181,114,211,16                  ; vpsrld        $0x10,%ymm3,%ymm9
2114  DB  196,193,109,219,209                 ; vpand         %ymm9,%ymm2,%ymm2
2115  DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
2116  DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
2117  DB  197,229,114,211,24                  ; vpsrld        $0x18,%ymm3,%ymm3
2118  DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
2119  DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
2120  DB  72,173                              ; lods          %ds:(%rsi),%rax
2121  DB  255,224                             ; jmpq          *%rax
2122
2123PUBLIC _sk_store_8888_hsw
2124_sk_store_8888_hsw LABEL PROC
2125  DB  73,137,200                          ; mov           %rcx,%r8
2126  DB  72,173                              ; lods          %ds:(%rsi),%rax
2127  DB  76,141,12,189,0,0,0,0               ; lea           0x0(,%rdi,4),%r9
2128  DB  76,3,8                              ; add           (%rax),%r9
2129  DB  184,0,0,127,67                      ; mov           $0x437f0000,%eax
2130  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
2131  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
2132  DB  197,60,89,200                       ; vmulps        %ymm0,%ymm8,%ymm9
2133  DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
2134  DB  197,60,89,209                       ; vmulps        %ymm1,%ymm8,%ymm10
2135  DB  196,65,125,91,210                   ; vcvtps2dq     %ymm10,%ymm10
2136  DB  196,193,45,114,242,8                ; vpslld        $0x8,%ymm10,%ymm10
2137  DB  196,65,45,235,201                   ; vpor          %ymm9,%ymm10,%ymm9
2138  DB  197,60,89,210                       ; vmulps        %ymm2,%ymm8,%ymm10
2139  DB  196,65,125,91,210                   ; vcvtps2dq     %ymm10,%ymm10
2140  DB  196,193,45,114,242,16               ; vpslld        $0x10,%ymm10,%ymm10
2141  DB  197,60,89,195                       ; vmulps        %ymm3,%ymm8,%ymm8
2142  DB  196,65,125,91,192                   ; vcvtps2dq     %ymm8,%ymm8
2143  DB  196,193,61,114,240,24               ; vpslld        $0x18,%ymm8,%ymm8
2144  DB  196,65,45,235,192                   ; vpor          %ymm8,%ymm10,%ymm8
2145  DB  196,65,53,235,192                   ; vpor          %ymm8,%ymm9,%ymm8
2146  DB  77,133,192                          ; test          %r8,%r8
2147  DB  117,12                              ; jne           1f10 <_sk_store_8888_hsw+0x74>
2148  DB  196,65,126,127,1                    ; vmovdqu       %ymm8,(%r9)
2149  DB  72,173                              ; lods          %ds:(%rsi),%rax
2150  DB  76,137,193                          ; mov           %r8,%rcx
2151  DB  255,224                             ; jmpq          *%rax
2152  DB  185,8,0,0,0                         ; mov           $0x8,%ecx
2153  DB  68,41,193                           ; sub           %r8d,%ecx
2154  DB  192,225,3                           ; shl           $0x3,%cl
2155  DB  72,199,192,255,255,255,255          ; mov           $0xffffffffffffffff,%rax
2156  DB  72,211,232                          ; shr           %cl,%rax
2157  DB  196,97,249,110,200                  ; vmovq         %rax,%xmm9
2158  DB  196,66,125,33,201                   ; vpmovsxbd     %xmm9,%ymm9
2159  DB  196,66,53,142,1                     ; vpmaskmovd    %ymm8,%ymm9,(%r9)
2160  DB  235,211                             ; jmp           1f09 <_sk_store_8888_hsw+0x6d>
2161
2162PUBLIC _sk_load_f16_hsw
2163_sk_load_f16_hsw LABEL PROC
2164  DB  72,173                              ; lods          %ds:(%rsi),%rax
2165  DB  72,139,0                            ; mov           (%rax),%rax
2166  DB  72,133,201                          ; test          %rcx,%rcx
2167  DB  117,97                              ; jne           1fa1 <_sk_load_f16_hsw+0x6b>
2168  DB  197,121,16,4,248                    ; vmovupd       (%rax,%rdi,8),%xmm8
2169  DB  197,249,16,84,248,16                ; vmovupd       0x10(%rax,%rdi,8),%xmm2
2170  DB  197,249,16,92,248,32                ; vmovupd       0x20(%rax,%rdi,8),%xmm3
2171  DB  197,122,111,76,248,48               ; vmovdqu       0x30(%rax,%rdi,8),%xmm9
2172  DB  197,185,97,194                      ; vpunpcklwd    %xmm2,%xmm8,%xmm0
2173  DB  197,185,105,210                     ; vpunpckhwd    %xmm2,%xmm8,%xmm2
2174  DB  196,193,97,97,201                   ; vpunpcklwd    %xmm9,%xmm3,%xmm1
2175  DB  196,193,97,105,217                  ; vpunpckhwd    %xmm9,%xmm3,%xmm3
2176  DB  197,121,97,194                      ; vpunpcklwd    %xmm2,%xmm0,%xmm8
2177  DB  197,121,105,202                     ; vpunpckhwd    %xmm2,%xmm0,%xmm9
2178  DB  197,241,97,211                      ; vpunpcklwd    %xmm3,%xmm1,%xmm2
2179  DB  197,241,105,219                     ; vpunpckhwd    %xmm3,%xmm1,%xmm3
2180  DB  197,185,108,194                     ; vpunpcklqdq   %xmm2,%xmm8,%xmm0
2181  DB  196,226,125,19,192                  ; vcvtph2ps     %xmm0,%ymm0
2182  DB  197,185,109,202                     ; vpunpckhqdq   %xmm2,%xmm8,%xmm1
2183  DB  196,226,125,19,201                  ; vcvtph2ps     %xmm1,%ymm1
2184  DB  197,177,108,211                     ; vpunpcklqdq   %xmm3,%xmm9,%xmm2
2185  DB  196,226,125,19,210                  ; vcvtph2ps     %xmm2,%ymm2
2186  DB  197,177,109,219                     ; vpunpckhqdq   %xmm3,%xmm9,%xmm3
2187  DB  196,226,125,19,219                  ; vcvtph2ps     %xmm3,%ymm3
2188  DB  72,173                              ; lods          %ds:(%rsi),%rax
2189  DB  255,224                             ; jmpq          *%rax
2190  DB  197,123,16,4,248                    ; vmovsd        (%rax,%rdi,8),%xmm8
2191  DB  196,65,49,239,201                   ; vpxor         %xmm9,%xmm9,%xmm9
2192  DB  72,131,249,1                        ; cmp           $0x1,%rcx
2193  DB  116,79                              ; je            2000 <_sk_load_f16_hsw+0xca>
2194  DB  197,57,22,68,248,8                  ; vmovhpd       0x8(%rax,%rdi,8),%xmm8,%xmm8
2195  DB  72,131,249,3                        ; cmp           $0x3,%rcx
2196  DB  114,67                              ; jb            2000 <_sk_load_f16_hsw+0xca>
2197  DB  197,251,16,84,248,16                ; vmovsd        0x10(%rax,%rdi,8),%xmm2
2198  DB  72,131,249,3                        ; cmp           $0x3,%rcx
2199  DB  116,68                              ; je            200d <_sk_load_f16_hsw+0xd7>
2200  DB  197,233,22,84,248,24                ; vmovhpd       0x18(%rax,%rdi,8),%xmm2,%xmm2
2201  DB  72,131,249,5                        ; cmp           $0x5,%rcx
2202  DB  114,56                              ; jb            200d <_sk_load_f16_hsw+0xd7>
2203  DB  197,251,16,92,248,32                ; vmovsd        0x20(%rax,%rdi,8),%xmm3
2204  DB  72,131,249,5                        ; cmp           $0x5,%rcx
2205  DB  15,132,114,255,255,255              ; je            1f57 <_sk_load_f16_hsw+0x21>
2206  DB  197,225,22,92,248,40                ; vmovhpd       0x28(%rax,%rdi,8),%xmm3,%xmm3
2207  DB  72,131,249,7                        ; cmp           $0x7,%rcx
2208  DB  15,130,98,255,255,255               ; jb            1f57 <_sk_load_f16_hsw+0x21>
2209  DB  197,122,126,76,248,48               ; vmovq         0x30(%rax,%rdi,8),%xmm9
2210  DB  233,87,255,255,255                  ; jmpq          1f57 <_sk_load_f16_hsw+0x21>
2211  DB  197,225,87,219                      ; vxorpd        %xmm3,%xmm3,%xmm3
2212  DB  197,233,87,210                      ; vxorpd        %xmm2,%xmm2,%xmm2
2213  DB  233,74,255,255,255                  ; jmpq          1f57 <_sk_load_f16_hsw+0x21>
2214  DB  197,225,87,219                      ; vxorpd        %xmm3,%xmm3,%xmm3
2215  DB  233,65,255,255,255                  ; jmpq          1f57 <_sk_load_f16_hsw+0x21>
2216
2217PUBLIC _sk_store_f16_hsw
2218_sk_store_f16_hsw LABEL PROC
2219  DB  72,173                              ; lods          %ds:(%rsi),%rax
2220  DB  72,139,0                            ; mov           (%rax),%rax
2221  DB  196,195,125,29,192,4                ; vcvtps2ph     $0x4,%ymm0,%xmm8
2222  DB  196,195,125,29,201,4                ; vcvtps2ph     $0x4,%ymm1,%xmm9
2223  DB  196,195,125,29,210,4                ; vcvtps2ph     $0x4,%ymm2,%xmm10
2224  DB  196,195,125,29,219,4                ; vcvtps2ph     $0x4,%ymm3,%xmm11
2225  DB  196,65,57,97,225                    ; vpunpcklwd    %xmm9,%xmm8,%xmm12
2226  DB  196,65,57,105,193                   ; vpunpckhwd    %xmm9,%xmm8,%xmm8
2227  DB  196,65,41,97,203                    ; vpunpcklwd    %xmm11,%xmm10,%xmm9
2228  DB  196,65,41,105,235                   ; vpunpckhwd    %xmm11,%xmm10,%xmm13
2229  DB  196,65,25,98,217                    ; vpunpckldq    %xmm9,%xmm12,%xmm11
2230  DB  196,65,25,106,209                   ; vpunpckhdq    %xmm9,%xmm12,%xmm10
2231  DB  196,65,57,98,205                    ; vpunpckldq    %xmm13,%xmm8,%xmm9
2232  DB  196,65,57,106,197                   ; vpunpckhdq    %xmm13,%xmm8,%xmm8
2233  DB  72,133,201                          ; test          %rcx,%rcx
2234  DB  117,27                              ; jne           207b <_sk_store_f16_hsw+0x65>
2235  DB  197,120,17,28,248                   ; vmovups       %xmm11,(%rax,%rdi,8)
2236  DB  197,120,17,84,248,16                ; vmovups       %xmm10,0x10(%rax,%rdi,8)
2237  DB  197,120,17,76,248,32                ; vmovups       %xmm9,0x20(%rax,%rdi,8)
2238  DB  197,122,127,68,248,48               ; vmovdqu       %xmm8,0x30(%rax,%rdi,8)
2239  DB  72,173                              ; lods          %ds:(%rsi),%rax
2240  DB  255,224                             ; jmpq          *%rax
2241  DB  197,121,214,28,248                  ; vmovq         %xmm11,(%rax,%rdi,8)
2242  DB  72,131,249,1                        ; cmp           $0x1,%rcx
2243  DB  116,241                             ; je            2077 <_sk_store_f16_hsw+0x61>
2244  DB  197,121,23,92,248,8                 ; vmovhpd       %xmm11,0x8(%rax,%rdi,8)
2245  DB  72,131,249,3                        ; cmp           $0x3,%rcx
2246  DB  114,229                             ; jb            2077 <_sk_store_f16_hsw+0x61>
2247  DB  197,121,214,84,248,16               ; vmovq         %xmm10,0x10(%rax,%rdi,8)
2248  DB  116,221                             ; je            2077 <_sk_store_f16_hsw+0x61>
2249  DB  197,121,23,84,248,24                ; vmovhpd       %xmm10,0x18(%rax,%rdi,8)
2250  DB  72,131,249,5                        ; cmp           $0x5,%rcx
2251  DB  114,209                             ; jb            2077 <_sk_store_f16_hsw+0x61>
2252  DB  197,121,214,76,248,32               ; vmovq         %xmm9,0x20(%rax,%rdi,8)
2253  DB  116,201                             ; je            2077 <_sk_store_f16_hsw+0x61>
2254  DB  197,121,23,76,248,40                ; vmovhpd       %xmm9,0x28(%rax,%rdi,8)
2255  DB  72,131,249,7                        ; cmp           $0x7,%rcx
2256  DB  114,189                             ; jb            2077 <_sk_store_f16_hsw+0x61>
2257  DB  197,121,214,68,248,48               ; vmovq         %xmm8,0x30(%rax,%rdi,8)
2258  DB  235,181                             ; jmp           2077 <_sk_store_f16_hsw+0x61>
2259
2260PUBLIC _sk_load_u16_be_hsw
2261_sk_load_u16_be_hsw LABEL PROC
2262  DB  72,173                              ; lods          %ds:(%rsi),%rax
2263  DB  72,139,0                            ; mov           (%rax),%rax
2264  DB  72,133,201                          ; test          %rcx,%rcx
2265  DB  15,133,201,0,0,0                    ; jne           2199 <_sk_load_u16_be_hsw+0xd7>
2266  DB  197,121,16,4,248                    ; vmovupd       (%rax,%rdi,8),%xmm8
2267  DB  197,249,16,84,248,16                ; vmovupd       0x10(%rax,%rdi,8),%xmm2
2268  DB  197,249,16,92,248,32                ; vmovupd       0x20(%rax,%rdi,8),%xmm3
2269  DB  197,122,111,76,248,48               ; vmovdqu       0x30(%rax,%rdi,8),%xmm9
2270  DB  197,185,97,194                      ; vpunpcklwd    %xmm2,%xmm8,%xmm0
2271  DB  197,185,105,210                     ; vpunpckhwd    %xmm2,%xmm8,%xmm2
2272  DB  196,193,97,97,201                   ; vpunpcklwd    %xmm9,%xmm3,%xmm1
2273  DB  196,193,97,105,217                  ; vpunpckhwd    %xmm9,%xmm3,%xmm3
2274  DB  197,121,97,194                      ; vpunpcklwd    %xmm2,%xmm0,%xmm8
2275  DB  197,121,105,202                     ; vpunpckhwd    %xmm2,%xmm0,%xmm9
2276  DB  197,241,97,211                      ; vpunpcklwd    %xmm3,%xmm1,%xmm2
2277  DB  197,113,105,219                     ; vpunpckhwd    %xmm3,%xmm1,%xmm11
2278  DB  184,128,0,128,55                    ; mov           $0x37800080,%eax
2279  DB  197,249,110,192                     ; vmovd         %eax,%xmm0
2280  DB  196,98,125,88,208                   ; vpbroadcastd  %xmm0,%ymm10
2281  DB  197,185,108,194                     ; vpunpcklqdq   %xmm2,%xmm8,%xmm0
2282  DB  197,241,113,240,8                   ; vpsllw        $0x8,%xmm0,%xmm1
2283  DB  197,249,113,208,8                   ; vpsrlw        $0x8,%xmm0,%xmm0
2284  DB  197,241,235,192                     ; vpor          %xmm0,%xmm1,%xmm0
2285  DB  196,226,125,51,192                  ; vpmovzxwd     %xmm0,%ymm0
2286  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
2287  DB  197,172,89,192                      ; vmulps        %ymm0,%ymm10,%ymm0
2288  DB  197,185,109,202                     ; vpunpckhqdq   %xmm2,%xmm8,%xmm1
2289  DB  197,233,113,241,8                   ; vpsllw        $0x8,%xmm1,%xmm2
2290  DB  197,241,113,209,8                   ; vpsrlw        $0x8,%xmm1,%xmm1
2291  DB  197,233,235,201                     ; vpor          %xmm1,%xmm2,%xmm1
2292  DB  196,226,125,51,201                  ; vpmovzxwd     %xmm1,%ymm1
2293  DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
2294  DB  197,172,89,201                      ; vmulps        %ymm1,%ymm10,%ymm1
2295  DB  196,193,49,108,211                  ; vpunpcklqdq   %xmm11,%xmm9,%xmm2
2296  DB  197,225,113,242,8                   ; vpsllw        $0x8,%xmm2,%xmm3
2297  DB  197,233,113,210,8                   ; vpsrlw        $0x8,%xmm2,%xmm2
2298  DB  197,225,235,210                     ; vpor          %xmm2,%xmm3,%xmm2
2299  DB  196,226,125,51,210                  ; vpmovzxwd     %xmm2,%ymm2
2300  DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
2301  DB  197,172,89,210                      ; vmulps        %ymm2,%ymm10,%ymm2
2302  DB  196,193,49,109,219                  ; vpunpckhqdq   %xmm11,%xmm9,%xmm3
2303  DB  197,185,113,243,8                   ; vpsllw        $0x8,%xmm3,%xmm8
2304  DB  197,225,113,211,8                   ; vpsrlw        $0x8,%xmm3,%xmm3
2305  DB  197,185,235,219                     ; vpor          %xmm3,%xmm8,%xmm3
2306  DB  196,226,125,51,219                  ; vpmovzxwd     %xmm3,%ymm3
2307  DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
2308  DB  197,172,89,219                      ; vmulps        %ymm3,%ymm10,%ymm3
2309  DB  72,173                              ; lods          %ds:(%rsi),%rax
2310  DB  255,224                             ; jmpq          *%rax
2311  DB  197,123,16,4,248                    ; vmovsd        (%rax,%rdi,8),%xmm8
2312  DB  196,65,49,239,201                   ; vpxor         %xmm9,%xmm9,%xmm9
2313  DB  72,131,249,1                        ; cmp           $0x1,%rcx
2314  DB  116,79                              ; je            21f8 <_sk_load_u16_be_hsw+0x136>
2315  DB  197,57,22,68,248,8                  ; vmovhpd       0x8(%rax,%rdi,8),%xmm8,%xmm8
2316  DB  72,131,249,3                        ; cmp           $0x3,%rcx
2317  DB  114,67                              ; jb            21f8 <_sk_load_u16_be_hsw+0x136>
2318  DB  197,251,16,84,248,16                ; vmovsd        0x10(%rax,%rdi,8),%xmm2
2319  DB  72,131,249,3                        ; cmp           $0x3,%rcx
2320  DB  116,68                              ; je            2205 <_sk_load_u16_be_hsw+0x143>
2321  DB  197,233,22,84,248,24                ; vmovhpd       0x18(%rax,%rdi,8),%xmm2,%xmm2
2322  DB  72,131,249,5                        ; cmp           $0x5,%rcx
2323  DB  114,56                              ; jb            2205 <_sk_load_u16_be_hsw+0x143>
2324  DB  197,251,16,92,248,32                ; vmovsd        0x20(%rax,%rdi,8),%xmm3
2325  DB  72,131,249,5                        ; cmp           $0x5,%rcx
2326  DB  15,132,10,255,255,255               ; je            20e7 <_sk_load_u16_be_hsw+0x25>
2327  DB  197,225,22,92,248,40                ; vmovhpd       0x28(%rax,%rdi,8),%xmm3,%xmm3
2328  DB  72,131,249,7                        ; cmp           $0x7,%rcx
2329  DB  15,130,250,254,255,255              ; jb            20e7 <_sk_load_u16_be_hsw+0x25>
2330  DB  197,122,126,76,248,48               ; vmovq         0x30(%rax,%rdi,8),%xmm9
2331  DB  233,239,254,255,255                 ; jmpq          20e7 <_sk_load_u16_be_hsw+0x25>
2332  DB  197,225,87,219                      ; vxorpd        %xmm3,%xmm3,%xmm3
2333  DB  197,233,87,210                      ; vxorpd        %xmm2,%xmm2,%xmm2
2334  DB  233,226,254,255,255                 ; jmpq          20e7 <_sk_load_u16_be_hsw+0x25>
2335  DB  197,225,87,219                      ; vxorpd        %xmm3,%xmm3,%xmm3
2336  DB  233,217,254,255,255                 ; jmpq          20e7 <_sk_load_u16_be_hsw+0x25>
2337
2338PUBLIC _sk_store_u16_be_hsw
2339_sk_store_u16_be_hsw LABEL PROC
2340  DB  72,173                              ; lods          %ds:(%rsi),%rax
2341  DB  76,139,0                            ; mov           (%rax),%r8
2342  DB  184,0,255,127,71                    ; mov           $0x477fff00,%eax
2343  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
2344  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
2345  DB  197,60,89,200                       ; vmulps        %ymm0,%ymm8,%ymm9
2346  DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
2347  DB  196,67,125,25,202,1                 ; vextractf128  $0x1,%ymm9,%xmm10
2348  DB  196,66,49,43,202                    ; vpackusdw     %xmm10,%xmm9,%xmm9
2349  DB  196,193,41,113,241,8                ; vpsllw        $0x8,%xmm9,%xmm10
2350  DB  196,193,49,113,209,8                ; vpsrlw        $0x8,%xmm9,%xmm9
2351  DB  196,65,41,235,201                   ; vpor          %xmm9,%xmm10,%xmm9
2352  DB  197,60,89,209                       ; vmulps        %ymm1,%ymm8,%ymm10
2353  DB  196,65,125,91,210                   ; vcvtps2dq     %ymm10,%ymm10
2354  DB  196,67,125,25,211,1                 ; vextractf128  $0x1,%ymm10,%xmm11
2355  DB  196,66,41,43,211                    ; vpackusdw     %xmm11,%xmm10,%xmm10
2356  DB  196,193,33,113,242,8                ; vpsllw        $0x8,%xmm10,%xmm11
2357  DB  196,193,41,113,210,8                ; vpsrlw        $0x8,%xmm10,%xmm10
2358  DB  196,65,33,235,210                   ; vpor          %xmm10,%xmm11,%xmm10
2359  DB  197,60,89,218                       ; vmulps        %ymm2,%ymm8,%ymm11
2360  DB  196,65,125,91,219                   ; vcvtps2dq     %ymm11,%ymm11
2361  DB  196,67,125,25,220,1                 ; vextractf128  $0x1,%ymm11,%xmm12
2362  DB  196,66,33,43,220                    ; vpackusdw     %xmm12,%xmm11,%xmm11
2363  DB  196,193,25,113,243,8                ; vpsllw        $0x8,%xmm11,%xmm12
2364  DB  196,193,33,113,211,8                ; vpsrlw        $0x8,%xmm11,%xmm11
2365  DB  196,65,25,235,219                   ; vpor          %xmm11,%xmm12,%xmm11
2366  DB  197,60,89,195                       ; vmulps        %ymm3,%ymm8,%ymm8
2367  DB  196,65,125,91,192                   ; vcvtps2dq     %ymm8,%ymm8
2368  DB  196,67,125,25,196,1                 ; vextractf128  $0x1,%ymm8,%xmm12
2369  DB  196,66,57,43,196                    ; vpackusdw     %xmm12,%xmm8,%xmm8
2370  DB  196,193,25,113,240,8                ; vpsllw        $0x8,%xmm8,%xmm12
2371  DB  196,193,57,113,208,8                ; vpsrlw        $0x8,%xmm8,%xmm8
2372  DB  196,65,25,235,192                   ; vpor          %xmm8,%xmm12,%xmm8
2373  DB  196,65,49,97,226                    ; vpunpcklwd    %xmm10,%xmm9,%xmm12
2374  DB  196,65,49,105,234                   ; vpunpckhwd    %xmm10,%xmm9,%xmm13
2375  DB  196,65,33,97,200                    ; vpunpcklwd    %xmm8,%xmm11,%xmm9
2376  DB  196,65,33,105,192                   ; vpunpckhwd    %xmm8,%xmm11,%xmm8
2377  DB  196,65,25,98,217                    ; vpunpckldq    %xmm9,%xmm12,%xmm11
2378  DB  196,65,25,106,209                   ; vpunpckhdq    %xmm9,%xmm12,%xmm10
2379  DB  196,65,17,98,200                    ; vpunpckldq    %xmm8,%xmm13,%xmm9
2380  DB  196,65,17,106,192                   ; vpunpckhdq    %xmm8,%xmm13,%xmm8
2381  DB  72,133,201                          ; test          %rcx,%rcx
2382  DB  117,31                              ; jne           2301 <_sk_store_u16_be_hsw+0xf3>
2383  DB  196,65,120,17,28,248                ; vmovups       %xmm11,(%r8,%rdi,8)
2384  DB  196,65,120,17,84,248,16             ; vmovups       %xmm10,0x10(%r8,%rdi,8)
2385  DB  196,65,120,17,76,248,32             ; vmovups       %xmm9,0x20(%r8,%rdi,8)
2386  DB  196,65,122,127,68,248,48            ; vmovdqu       %xmm8,0x30(%r8,%rdi,8)
2387  DB  72,173                              ; lods          %ds:(%rsi),%rax
2388  DB  255,224                             ; jmpq          *%rax
2389  DB  196,65,121,214,28,248               ; vmovq         %xmm11,(%r8,%rdi,8)
2390  DB  72,131,249,1                        ; cmp           $0x1,%rcx
2391  DB  116,240                             ; je            22fd <_sk_store_u16_be_hsw+0xef>
2392  DB  196,65,121,23,92,248,8              ; vmovhpd       %xmm11,0x8(%r8,%rdi,8)
2393  DB  72,131,249,3                        ; cmp           $0x3,%rcx
2394  DB  114,227                             ; jb            22fd <_sk_store_u16_be_hsw+0xef>
2395  DB  196,65,121,214,84,248,16            ; vmovq         %xmm10,0x10(%r8,%rdi,8)
2396  DB  116,218                             ; je            22fd <_sk_store_u16_be_hsw+0xef>
2397  DB  196,65,121,23,84,248,24             ; vmovhpd       %xmm10,0x18(%r8,%rdi,8)
2398  DB  72,131,249,5                        ; cmp           $0x5,%rcx
2399  DB  114,205                             ; jb            22fd <_sk_store_u16_be_hsw+0xef>
2400  DB  196,65,121,214,76,248,32            ; vmovq         %xmm9,0x20(%r8,%rdi,8)
2401  DB  116,196                             ; je            22fd <_sk_store_u16_be_hsw+0xef>
2402  DB  196,65,121,23,76,248,40             ; vmovhpd       %xmm9,0x28(%r8,%rdi,8)
2403  DB  72,131,249,7                        ; cmp           $0x7,%rcx
2404  DB  114,183                             ; jb            22fd <_sk_store_u16_be_hsw+0xef>
2405  DB  196,65,121,214,68,248,48            ; vmovq         %xmm8,0x30(%r8,%rdi,8)
2406  DB  235,174                             ; jmp           22fd <_sk_store_u16_be_hsw+0xef>
2407
2408PUBLIC _sk_load_f32_hsw
2409_sk_load_f32_hsw LABEL PROC
2410  DB  72,173                              ; lods          %ds:(%rsi),%rax
2411  DB  72,131,249,7                        ; cmp           $0x7,%rcx
2412  DB  119,110                             ; ja            23c5 <_sk_load_f32_hsw+0x76>
2413  DB  76,139,0                            ; mov           (%rax),%r8
2414  DB  76,141,12,189,0,0,0,0               ; lea           0x0(,%rdi,4),%r9
2415  DB  76,141,21,135,0,0,0                 ; lea           0x87(%rip),%r10        # 23f0 <_sk_load_f32_hsw+0xa1>
2416  DB  73,99,4,138                         ; movslq        (%r10,%rcx,4),%rax
2417  DB  76,1,208                            ; add           %r10,%rax
2418  DB  255,224                             ; jmpq          *%rax
2419  DB  196,3,125,24,68,136,112,1           ; vinsertf128   $0x1,0x70(%r8,%r9,4),%ymm0,%ymm8
2420  DB  196,131,125,24,92,136,96,1          ; vinsertf128   $0x1,0x60(%r8,%r9,4),%ymm0,%ymm3
2421  DB  196,131,125,24,76,136,80,1          ; vinsertf128   $0x1,0x50(%r8,%r9,4),%ymm0,%ymm1
2422  DB  196,131,125,24,84,136,64,1          ; vinsertf128   $0x1,0x40(%r8,%r9,4),%ymm0,%ymm2
2423  DB  196,129,121,16,68,136,48            ; vmovupd       0x30(%r8,%r9,4),%xmm0
2424  DB  196,195,125,13,192,12               ; vblendpd      $0xc,%ymm8,%ymm0,%ymm0
2425  DB  196,1,121,16,68,136,32              ; vmovupd       0x20(%r8,%r9,4),%xmm8
2426  DB  196,99,61,13,203,12                 ; vblendpd      $0xc,%ymm3,%ymm8,%ymm9
2427  DB  196,129,121,16,92,136,16            ; vmovupd       0x10(%r8,%r9,4),%xmm3
2428  DB  196,99,101,13,209,12                ; vblendpd      $0xc,%ymm1,%ymm3,%ymm10
2429  DB  196,129,121,16,12,136               ; vmovupd       (%r8,%r9,4),%xmm1
2430  DB  196,227,117,13,202,12               ; vblendpd      $0xc,%ymm2,%ymm1,%ymm1
2431  DB  196,193,116,20,210                  ; vunpcklps     %ymm10,%ymm1,%ymm2
2432  DB  196,193,116,21,218                  ; vunpckhps     %ymm10,%ymm1,%ymm3
2433  DB  197,180,20,200                      ; vunpcklps     %ymm0,%ymm9,%ymm1
2434  DB  197,52,21,192                       ; vunpckhps     %ymm0,%ymm9,%ymm8
2435  DB  197,237,20,193                      ; vunpcklpd     %ymm1,%ymm2,%ymm0
2436  DB  197,237,21,201                      ; vunpckhpd     %ymm1,%ymm2,%ymm1
2437  DB  196,193,101,20,208                  ; vunpcklpd     %ymm8,%ymm3,%ymm2
2438  DB  196,193,101,21,216                  ; vunpckhpd     %ymm8,%ymm3,%ymm3
2439  DB  72,173                              ; lods          %ds:(%rsi),%rax
2440  DB  255,224                             ; jmpq          *%rax
2441  DB  15,31,0                             ; nopl          (%rax)
2442  DB  130                                 ; (bad)
2443  DB  255                                 ; (bad)
2444  DB  255                                 ; (bad)
2445  DB  255,201                             ; dec           %ecx
2446  DB  255                                 ; (bad)
2447  DB  255                                 ; (bad)
2448  DB  255                                 ; (bad)
2449  DB  188,255,255,255,175                 ; mov           $0xafffffff,%esp
2450  DB  255                                 ; (bad)
2451  DB  255                                 ; (bad)
2452  DB  255,162,255,255,255,154             ; jmpq          *-0x65000001(%rdx)
2453  DB  255                                 ; (bad)
2454  DB  255                                 ; (bad)
2455  DB  255,146,255,255,255,138             ; callq         *-0x75000001(%rdx)
2456  DB  255                                 ; (bad)
2457  DB  255                                 ; (bad)
2458  DB  255                                 ; .byte         0xff
2459
2460PUBLIC _sk_store_f32_hsw
2461_sk_store_f32_hsw LABEL PROC
2462  DB  72,173                              ; lods          %ds:(%rsi),%rax
2463  DB  76,139,0                            ; mov           (%rax),%r8
2464  DB  72,141,4,189,0,0,0,0                ; lea           0x0(,%rdi,4),%rax
2465  DB  197,124,20,193                      ; vunpcklps     %ymm1,%ymm0,%ymm8
2466  DB  197,124,21,217                      ; vunpckhps     %ymm1,%ymm0,%ymm11
2467  DB  197,108,20,203                      ; vunpcklps     %ymm3,%ymm2,%ymm9
2468  DB  197,108,21,227                      ; vunpckhps     %ymm3,%ymm2,%ymm12
2469  DB  196,65,61,20,209                    ; vunpcklpd     %ymm9,%ymm8,%ymm10
2470  DB  196,65,61,21,201                    ; vunpckhpd     %ymm9,%ymm8,%ymm9
2471  DB  196,65,37,20,196                    ; vunpcklpd     %ymm12,%ymm11,%ymm8
2472  DB  196,65,37,21,220                    ; vunpckhpd     %ymm12,%ymm11,%ymm11
2473  DB  72,133,201                          ; test          %rcx,%rcx
2474  DB  117,55                              ; jne           247d <_sk_store_f32_hsw+0x6d>
2475  DB  196,67,45,24,225,1                  ; vinsertf128   $0x1,%xmm9,%ymm10,%ymm12
2476  DB  196,67,61,24,235,1                  ; vinsertf128   $0x1,%xmm11,%ymm8,%ymm13
2477  DB  196,67,45,6,201,49                  ; vperm2f128    $0x31,%ymm9,%ymm10,%ymm9
2478  DB  196,67,61,6,195,49                  ; vperm2f128    $0x31,%ymm11,%ymm8,%ymm8
2479  DB  196,65,125,17,36,128                ; vmovupd       %ymm12,(%r8,%rax,4)
2480  DB  196,65,125,17,108,128,32            ; vmovupd       %ymm13,0x20(%r8,%rax,4)
2481  DB  196,65,125,17,76,128,64             ; vmovupd       %ymm9,0x40(%r8,%rax,4)
2482  DB  196,65,125,17,68,128,96             ; vmovupd       %ymm8,0x60(%r8,%rax,4)
2483  DB  72,173                              ; lods          %ds:(%rsi),%rax
2484  DB  255,224                             ; jmpq          *%rax
2485  DB  196,65,121,17,20,128                ; vmovupd       %xmm10,(%r8,%rax,4)
2486  DB  72,131,249,1                        ; cmp           $0x1,%rcx
2487  DB  116,240                             ; je            2479 <_sk_store_f32_hsw+0x69>
2488  DB  196,65,121,17,76,128,16             ; vmovupd       %xmm9,0x10(%r8,%rax,4)
2489  DB  72,131,249,3                        ; cmp           $0x3,%rcx
2490  DB  114,227                             ; jb            2479 <_sk_store_f32_hsw+0x69>
2491  DB  196,65,121,17,68,128,32             ; vmovupd       %xmm8,0x20(%r8,%rax,4)
2492  DB  116,218                             ; je            2479 <_sk_store_f32_hsw+0x69>
2493  DB  196,65,121,17,92,128,48             ; vmovupd       %xmm11,0x30(%r8,%rax,4)
2494  DB  72,131,249,5                        ; cmp           $0x5,%rcx
2495  DB  114,205                             ; jb            2479 <_sk_store_f32_hsw+0x69>
2496  DB  196,67,125,25,84,128,64,1           ; vextractf128  $0x1,%ymm10,0x40(%r8,%rax,4)
2497  DB  116,195                             ; je            2479 <_sk_store_f32_hsw+0x69>
2498  DB  196,67,125,25,76,128,80,1           ; vextractf128  $0x1,%ymm9,0x50(%r8,%rax,4)
2499  DB  72,131,249,7                        ; cmp           $0x7,%rcx
2500  DB  114,181                             ; jb            2479 <_sk_store_f32_hsw+0x69>
2501  DB  196,67,125,25,68,128,96,1           ; vextractf128  $0x1,%ymm8,0x60(%r8,%rax,4)
2502  DB  235,171                             ; jmp           2479 <_sk_store_f32_hsw+0x69>
2503
2504PUBLIC _sk_clamp_x_hsw
2505_sk_clamp_x_hsw LABEL PROC
2506  DB  72,173                              ; lods          %ds:(%rsi),%rax
2507  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
2508  DB  197,188,95,192                      ; vmaxps        %ymm0,%ymm8,%ymm0
2509  DB  196,98,125,88,0                     ; vpbroadcastd  (%rax),%ymm8
2510  DB  196,65,53,118,201                   ; vpcmpeqd      %ymm9,%ymm9,%ymm9
2511  DB  196,65,61,254,193                   ; vpaddd        %ymm9,%ymm8,%ymm8
2512  DB  196,193,124,93,192                  ; vminps        %ymm8,%ymm0,%ymm0
2513  DB  72,173                              ; lods          %ds:(%rsi),%rax
2514  DB  255,224                             ; jmpq          *%rax
2515
2516PUBLIC _sk_clamp_y_hsw
2517_sk_clamp_y_hsw LABEL PROC
2518  DB  72,173                              ; lods          %ds:(%rsi),%rax
2519  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
2520  DB  197,188,95,201                      ; vmaxps        %ymm1,%ymm8,%ymm1
2521  DB  196,98,125,88,0                     ; vpbroadcastd  (%rax),%ymm8
2522  DB  196,65,53,118,201                   ; vpcmpeqd      %ymm9,%ymm9,%ymm9
2523  DB  196,65,61,254,193                   ; vpaddd        %ymm9,%ymm8,%ymm8
2524  DB  196,193,116,93,200                  ; vminps        %ymm8,%ymm1,%ymm1
2525  DB  72,173                              ; lods          %ds:(%rsi),%rax
2526  DB  255,224                             ; jmpq          *%rax
2527
2528PUBLIC _sk_repeat_x_hsw
2529_sk_repeat_x_hsw LABEL PROC
2530  DB  72,173                              ; lods          %ds:(%rsi),%rax
2531  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
2532  DB  196,65,124,94,200                   ; vdivps        %ymm8,%ymm0,%ymm9
2533  DB  196,67,125,8,201,1                  ; vroundps      $0x1,%ymm9,%ymm9
2534  DB  196,98,61,172,200                   ; vfnmadd213ps  %ymm0,%ymm8,%ymm9
2535  DB  197,253,118,192                     ; vpcmpeqd      %ymm0,%ymm0,%ymm0
2536  DB  197,189,254,192                     ; vpaddd        %ymm0,%ymm8,%ymm0
2537  DB  197,180,93,192                      ; vminps        %ymm0,%ymm9,%ymm0
2538  DB  72,173                              ; lods          %ds:(%rsi),%rax
2539  DB  255,224                             ; jmpq          *%rax
2540
2541PUBLIC _sk_repeat_y_hsw
2542_sk_repeat_y_hsw LABEL PROC
2543  DB  72,173                              ; lods          %ds:(%rsi),%rax
2544  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
2545  DB  196,65,116,94,200                   ; vdivps        %ymm8,%ymm1,%ymm9
2546  DB  196,67,125,8,201,1                  ; vroundps      $0x1,%ymm9,%ymm9
2547  DB  196,98,61,172,201                   ; vfnmadd213ps  %ymm1,%ymm8,%ymm9
2548  DB  197,245,118,201                     ; vpcmpeqd      %ymm1,%ymm1,%ymm1
2549  DB  197,189,254,201                     ; vpaddd        %ymm1,%ymm8,%ymm1
2550  DB  197,180,93,201                      ; vminps        %ymm1,%ymm9,%ymm1
2551  DB  72,173                              ; lods          %ds:(%rsi),%rax
2552  DB  255,224                             ; jmpq          *%rax
2553
2554PUBLIC _sk_mirror_x_hsw
2555_sk_mirror_x_hsw LABEL PROC
2556  DB  72,173                              ; lods          %ds:(%rsi),%rax
2557  DB  197,122,16,0                        ; vmovss        (%rax),%xmm8
2558  DB  196,66,125,24,200                   ; vbroadcastss  %xmm8,%ymm9
2559  DB  196,65,124,92,209                   ; vsubps        %ymm9,%ymm0,%ymm10
2560  DB  196,193,58,88,192                   ; vaddss        %xmm8,%xmm8,%xmm0
2561  DB  196,226,125,24,192                  ; vbroadcastss  %xmm0,%ymm0
2562  DB  197,44,94,192                       ; vdivps        %ymm0,%ymm10,%ymm8
2563  DB  196,67,125,8,192,1                  ; vroundps      $0x1,%ymm8,%ymm8
2564  DB  196,66,125,172,194                  ; vfnmadd213ps  %ymm10,%ymm0,%ymm8
2565  DB  196,193,60,92,193                   ; vsubps        %ymm9,%ymm8,%ymm0
2566  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
2567  DB  197,60,92,192                       ; vsubps        %ymm0,%ymm8,%ymm8
2568  DB  197,188,84,192                      ; vandps        %ymm0,%ymm8,%ymm0
2569  DB  196,65,61,118,192                   ; vpcmpeqd      %ymm8,%ymm8,%ymm8
2570  DB  196,65,53,254,192                   ; vpaddd        %ymm8,%ymm9,%ymm8
2571  DB  196,193,124,93,192                  ; vminps        %ymm8,%ymm0,%ymm0
2572  DB  72,173                              ; lods          %ds:(%rsi),%rax
2573  DB  255,224                             ; jmpq          *%rax
2574
2575PUBLIC _sk_mirror_y_hsw
2576_sk_mirror_y_hsw LABEL PROC
2577  DB  72,173                              ; lods          %ds:(%rsi),%rax
2578  DB  197,122,16,0                        ; vmovss        (%rax),%xmm8
2579  DB  196,66,125,24,200                   ; vbroadcastss  %xmm8,%ymm9
2580  DB  196,65,116,92,209                   ; vsubps        %ymm9,%ymm1,%ymm10
2581  DB  196,193,58,88,200                   ; vaddss        %xmm8,%xmm8,%xmm1
2582  DB  196,226,125,24,201                  ; vbroadcastss  %xmm1,%ymm1
2583  DB  197,44,94,193                       ; vdivps        %ymm1,%ymm10,%ymm8
2584  DB  196,67,125,8,192,1                  ; vroundps      $0x1,%ymm8,%ymm8
2585  DB  196,66,117,172,194                  ; vfnmadd213ps  %ymm10,%ymm1,%ymm8
2586  DB  196,193,60,92,201                   ; vsubps        %ymm9,%ymm8,%ymm1
2587  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
2588  DB  197,60,92,193                       ; vsubps        %ymm1,%ymm8,%ymm8
2589  DB  197,188,84,201                      ; vandps        %ymm1,%ymm8,%ymm1
2590  DB  196,65,61,118,192                   ; vpcmpeqd      %ymm8,%ymm8,%ymm8
2591  DB  196,65,53,254,192                   ; vpaddd        %ymm8,%ymm9,%ymm8
2592  DB  196,193,116,93,200                  ; vminps        %ymm8,%ymm1,%ymm1
2593  DB  72,173                              ; lods          %ds:(%rsi),%rax
2594  DB  255,224                             ; jmpq          *%rax
2595
2596PUBLIC _sk_luminance_to_alpha_hsw
2597_sk_luminance_to_alpha_hsw LABEL PROC
2598  DB  184,208,179,89,62                   ; mov           $0x3e59b3d0,%eax
2599  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
2600  DB  196,98,125,88,195                   ; vpbroadcastd  %xmm3,%ymm8
2601  DB  184,89,23,55,63                     ; mov           $0x3f371759,%eax
2602  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
2603  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
2604  DB  197,228,89,201                      ; vmulps        %ymm1,%ymm3,%ymm1
2605  DB  196,98,125,168,193                  ; vfmadd213ps   %ymm1,%ymm0,%ymm8
2606  DB  184,152,221,147,61                  ; mov           $0x3d93dd98,%eax
2607  DB  197,249,110,192                     ; vmovd         %eax,%xmm0
2608  DB  196,226,125,88,216                  ; vpbroadcastd  %xmm0,%ymm3
2609  DB  196,194,109,168,216                 ; vfmadd213ps   %ymm8,%ymm2,%ymm3
2610  DB  72,173                              ; lods          %ds:(%rsi),%rax
2611  DB  197,253,239,192                     ; vpxor         %ymm0,%ymm0,%ymm0
2612  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
2613  DB  197,236,87,210                      ; vxorps        %ymm2,%ymm2,%ymm2
2614  DB  255,224                             ; jmpq          *%rax
2615
2616PUBLIC _sk_matrix_2x3_hsw
2617_sk_matrix_2x3_hsw LABEL PROC
2618  DB  72,173                              ; lods          %ds:(%rsi),%rax
2619  DB  196,98,125,24,8                     ; vbroadcastss  (%rax),%ymm9
2620  DB  196,98,125,24,80,8                  ; vbroadcastss  0x8(%rax),%ymm10
2621  DB  196,98,125,24,64,16                 ; vbroadcastss  0x10(%rax),%ymm8
2622  DB  196,66,117,184,194                  ; vfmadd231ps   %ymm10,%ymm1,%ymm8
2623  DB  196,66,125,184,193                  ; vfmadd231ps   %ymm9,%ymm0,%ymm8
2624  DB  196,98,125,24,80,4                  ; vbroadcastss  0x4(%rax),%ymm10
2625  DB  196,98,125,24,88,12                 ; vbroadcastss  0xc(%rax),%ymm11
2626  DB  196,98,125,24,72,20                 ; vbroadcastss  0x14(%rax),%ymm9
2627  DB  196,66,117,184,203                  ; vfmadd231ps   %ymm11,%ymm1,%ymm9
2628  DB  196,66,125,184,202                  ; vfmadd231ps   %ymm10,%ymm0,%ymm9
2629  DB  72,173                              ; lods          %ds:(%rsi),%rax
2630  DB  197,124,41,192                      ; vmovaps       %ymm8,%ymm0
2631  DB  197,124,41,201                      ; vmovaps       %ymm9,%ymm1
2632  DB  255,224                             ; jmpq          *%rax
2633
2634PUBLIC _sk_matrix_3x4_hsw
2635_sk_matrix_3x4_hsw LABEL PROC
2636  DB  72,173                              ; lods          %ds:(%rsi),%rax
2637  DB  196,98,125,24,8                     ; vbroadcastss  (%rax),%ymm9
2638  DB  196,98,125,24,80,12                 ; vbroadcastss  0xc(%rax),%ymm10
2639  DB  196,98,125,24,88,24                 ; vbroadcastss  0x18(%rax),%ymm11
2640  DB  196,98,125,24,64,36                 ; vbroadcastss  0x24(%rax),%ymm8
2641  DB  196,66,109,184,195                  ; vfmadd231ps   %ymm11,%ymm2,%ymm8
2642  DB  196,66,117,184,194                  ; vfmadd231ps   %ymm10,%ymm1,%ymm8
2643  DB  196,66,125,184,193                  ; vfmadd231ps   %ymm9,%ymm0,%ymm8
2644  DB  196,98,125,24,80,4                  ; vbroadcastss  0x4(%rax),%ymm10
2645  DB  196,98,125,24,88,16                 ; vbroadcastss  0x10(%rax),%ymm11
2646  DB  196,98,125,24,96,28                 ; vbroadcastss  0x1c(%rax),%ymm12
2647  DB  196,98,125,24,72,40                 ; vbroadcastss  0x28(%rax),%ymm9
2648  DB  196,66,109,184,204                  ; vfmadd231ps   %ymm12,%ymm2,%ymm9
2649  DB  196,66,117,184,203                  ; vfmadd231ps   %ymm11,%ymm1,%ymm9
2650  DB  196,66,125,184,202                  ; vfmadd231ps   %ymm10,%ymm0,%ymm9
2651  DB  196,98,125,24,88,8                  ; vbroadcastss  0x8(%rax),%ymm11
2652  DB  196,98,125,24,96,20                 ; vbroadcastss  0x14(%rax),%ymm12
2653  DB  196,98,125,24,104,32                ; vbroadcastss  0x20(%rax),%ymm13
2654  DB  196,98,125,24,80,44                 ; vbroadcastss  0x2c(%rax),%ymm10
2655  DB  196,66,109,184,213                  ; vfmadd231ps   %ymm13,%ymm2,%ymm10
2656  DB  196,66,117,184,212                  ; vfmadd231ps   %ymm12,%ymm1,%ymm10
2657  DB  196,66,125,184,211                  ; vfmadd231ps   %ymm11,%ymm0,%ymm10
2658  DB  72,173                              ; lods          %ds:(%rsi),%rax
2659  DB  197,124,41,192                      ; vmovaps       %ymm8,%ymm0
2660  DB  197,124,41,201                      ; vmovaps       %ymm9,%ymm1
2661  DB  197,124,41,210                      ; vmovaps       %ymm10,%ymm2
2662  DB  255,224                             ; jmpq          *%rax
2663
2664PUBLIC _sk_matrix_4x5_hsw
2665_sk_matrix_4x5_hsw LABEL PROC
2666  DB  72,173                              ; lods          %ds:(%rsi),%rax
2667  DB  196,98,125,24,8                     ; vbroadcastss  (%rax),%ymm9
2668  DB  196,98,125,24,80,16                 ; vbroadcastss  0x10(%rax),%ymm10
2669  DB  196,98,125,24,88,32                 ; vbroadcastss  0x20(%rax),%ymm11
2670  DB  196,98,125,24,96,48                 ; vbroadcastss  0x30(%rax),%ymm12
2671  DB  196,98,125,24,64,64                 ; vbroadcastss  0x40(%rax),%ymm8
2672  DB  196,66,101,184,196                  ; vfmadd231ps   %ymm12,%ymm3,%ymm8
2673  DB  196,66,109,184,195                  ; vfmadd231ps   %ymm11,%ymm2,%ymm8
2674  DB  196,66,117,184,194                  ; vfmadd231ps   %ymm10,%ymm1,%ymm8
2675  DB  196,66,125,184,193                  ; vfmadd231ps   %ymm9,%ymm0,%ymm8
2676  DB  196,98,125,24,80,4                  ; vbroadcastss  0x4(%rax),%ymm10
2677  DB  196,98,125,24,88,20                 ; vbroadcastss  0x14(%rax),%ymm11
2678  DB  196,98,125,24,96,36                 ; vbroadcastss  0x24(%rax),%ymm12
2679  DB  196,98,125,24,104,52                ; vbroadcastss  0x34(%rax),%ymm13
2680  DB  196,98,125,24,72,68                 ; vbroadcastss  0x44(%rax),%ymm9
2681  DB  196,66,101,184,205                  ; vfmadd231ps   %ymm13,%ymm3,%ymm9
2682  DB  196,66,109,184,204                  ; vfmadd231ps   %ymm12,%ymm2,%ymm9
2683  DB  196,66,117,184,203                  ; vfmadd231ps   %ymm11,%ymm1,%ymm9
2684  DB  196,66,125,184,202                  ; vfmadd231ps   %ymm10,%ymm0,%ymm9
2685  DB  196,98,125,24,88,8                  ; vbroadcastss  0x8(%rax),%ymm11
2686  DB  196,98,125,24,96,24                 ; vbroadcastss  0x18(%rax),%ymm12
2687  DB  196,98,125,24,104,40                ; vbroadcastss  0x28(%rax),%ymm13
2688  DB  196,98,125,24,112,56                ; vbroadcastss  0x38(%rax),%ymm14
2689  DB  196,98,125,24,80,72                 ; vbroadcastss  0x48(%rax),%ymm10
2690  DB  196,66,101,184,214                  ; vfmadd231ps   %ymm14,%ymm3,%ymm10
2691  DB  196,66,109,184,213                  ; vfmadd231ps   %ymm13,%ymm2,%ymm10
2692  DB  196,66,117,184,212                  ; vfmadd231ps   %ymm12,%ymm1,%ymm10
2693  DB  196,66,125,184,211                  ; vfmadd231ps   %ymm11,%ymm0,%ymm10
2694  DB  196,98,125,24,96,12                 ; vbroadcastss  0xc(%rax),%ymm12
2695  DB  196,98,125,24,104,28                ; vbroadcastss  0x1c(%rax),%ymm13
2696  DB  196,98,125,24,112,44                ; vbroadcastss  0x2c(%rax),%ymm14
2697  DB  196,98,125,24,120,60                ; vbroadcastss  0x3c(%rax),%ymm15
2698  DB  196,98,125,24,88,76                 ; vbroadcastss  0x4c(%rax),%ymm11
2699  DB  196,66,101,184,223                  ; vfmadd231ps   %ymm15,%ymm3,%ymm11
2700  DB  196,66,109,184,222                  ; vfmadd231ps   %ymm14,%ymm2,%ymm11
2701  DB  196,66,117,184,221                  ; vfmadd231ps   %ymm13,%ymm1,%ymm11
2702  DB  196,66,125,184,220                  ; vfmadd231ps   %ymm12,%ymm0,%ymm11
2703  DB  72,173                              ; lods          %ds:(%rsi),%rax
2704  DB  197,124,41,192                      ; vmovaps       %ymm8,%ymm0
2705  DB  197,124,41,201                      ; vmovaps       %ymm9,%ymm1
2706  DB  197,124,41,210                      ; vmovaps       %ymm10,%ymm2
2707  DB  197,124,41,219                      ; vmovaps       %ymm11,%ymm3
2708  DB  255,224                             ; jmpq          *%rax
2709
2710PUBLIC _sk_matrix_perspective_hsw
2711_sk_matrix_perspective_hsw LABEL PROC
2712  DB  72,173                              ; lods          %ds:(%rsi),%rax
2713  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
2714  DB  196,98,125,24,72,4                  ; vbroadcastss  0x4(%rax),%ymm9
2715  DB  196,98,125,24,80,8                  ; vbroadcastss  0x8(%rax),%ymm10
2716  DB  196,66,117,184,209                  ; vfmadd231ps   %ymm9,%ymm1,%ymm10
2717  DB  196,66,125,184,208                  ; vfmadd231ps   %ymm8,%ymm0,%ymm10
2718  DB  196,98,125,24,64,12                 ; vbroadcastss  0xc(%rax),%ymm8
2719  DB  196,98,125,24,72,16                 ; vbroadcastss  0x10(%rax),%ymm9
2720  DB  196,98,125,24,88,20                 ; vbroadcastss  0x14(%rax),%ymm11
2721  DB  196,66,117,184,217                  ; vfmadd231ps   %ymm9,%ymm1,%ymm11
2722  DB  196,66,125,184,216                  ; vfmadd231ps   %ymm8,%ymm0,%ymm11
2723  DB  196,98,125,24,64,24                 ; vbroadcastss  0x18(%rax),%ymm8
2724  DB  196,98,125,24,72,28                 ; vbroadcastss  0x1c(%rax),%ymm9
2725  DB  196,98,125,24,96,32                 ; vbroadcastss  0x20(%rax),%ymm12
2726  DB  196,66,117,184,225                  ; vfmadd231ps   %ymm9,%ymm1,%ymm12
2727  DB  196,66,125,184,224                  ; vfmadd231ps   %ymm8,%ymm0,%ymm12
2728  DB  196,193,124,83,204                  ; vrcpps        %ymm12,%ymm1
2729  DB  197,172,89,193                      ; vmulps        %ymm1,%ymm10,%ymm0
2730  DB  197,164,89,201                      ; vmulps        %ymm1,%ymm11,%ymm1
2731  DB  72,173                              ; lods          %ds:(%rsi),%rax
2732  DB  255,224                             ; jmpq          *%rax
2733
2734PUBLIC _sk_linear_gradient_2stops_hsw
2735_sk_linear_gradient_2stops_hsw LABEL PROC
2736  DB  72,173                              ; lods          %ds:(%rsi),%rax
2737  DB  196,226,125,24,72,16                ; vbroadcastss  0x10(%rax),%ymm1
2738  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
2739  DB  196,98,125,184,193                  ; vfmadd231ps   %ymm1,%ymm0,%ymm8
2740  DB  196,226,125,24,80,20                ; vbroadcastss  0x14(%rax),%ymm2
2741  DB  196,226,125,24,72,4                 ; vbroadcastss  0x4(%rax),%ymm1
2742  DB  196,226,125,184,202                 ; vfmadd231ps   %ymm2,%ymm0,%ymm1
2743  DB  196,226,125,24,88,24                ; vbroadcastss  0x18(%rax),%ymm3
2744  DB  196,226,125,24,80,8                 ; vbroadcastss  0x8(%rax),%ymm2
2745  DB  196,226,125,184,211                 ; vfmadd231ps   %ymm3,%ymm0,%ymm2
2746  DB  196,98,125,24,72,28                 ; vbroadcastss  0x1c(%rax),%ymm9
2747  DB  196,226,125,24,88,12                ; vbroadcastss  0xc(%rax),%ymm3
2748  DB  196,194,125,184,217                 ; vfmadd231ps   %ymm9,%ymm0,%ymm3
2749  DB  72,173                              ; lods          %ds:(%rsi),%rax
2750  DB  197,124,41,192                      ; vmovaps       %ymm8,%ymm0
2751  DB  255,224                             ; jmpq          *%rax
2752
2753PUBLIC _sk_start_pipeline_avx
2754_sk_start_pipeline_avx LABEL PROC
2755  DB  65,87                               ; push          %r15
2756  DB  65,86                               ; push          %r14
2757  DB  65,85                               ; push          %r13
2758  DB  65,84                               ; push          %r12
2759  DB  86                                  ; push          %rsi
2760  DB  87                                  ; push          %rdi
2761  DB  83                                  ; push          %rbx
2762  DB  72,129,236,160,0,0,0                ; sub           $0xa0,%rsp
2763  DB  197,120,41,188,36,144,0,0,0         ; vmovaps       %xmm15,0x90(%rsp)
2764  DB  197,120,41,180,36,128,0,0,0         ; vmovaps       %xmm14,0x80(%rsp)
2765  DB  197,120,41,108,36,112               ; vmovaps       %xmm13,0x70(%rsp)
2766  DB  197,120,41,100,36,96                ; vmovaps       %xmm12,0x60(%rsp)
2767  DB  197,120,41,92,36,80                 ; vmovaps       %xmm11,0x50(%rsp)
2768  DB  197,120,41,84,36,64                 ; vmovaps       %xmm10,0x40(%rsp)
2769  DB  197,120,41,76,36,48                 ; vmovaps       %xmm9,0x30(%rsp)
2770  DB  197,120,41,68,36,32                 ; vmovaps       %xmm8,0x20(%rsp)
2771  DB  197,248,41,124,36,16                ; vmovaps       %xmm7,0x10(%rsp)
2772  DB  197,248,41,52,36                    ; vmovaps       %xmm6,(%rsp)
2773  DB  77,137,205                          ; mov           %r9,%r13
2774  DB  77,137,198                          ; mov           %r8,%r14
2775  DB  72,137,203                          ; mov           %rcx,%rbx
2776  DB  72,137,214                          ; mov           %rdx,%rsi
2777  DB  72,173                              ; lods          %ds:(%rsi),%rax
2778  DB  73,137,199                          ; mov           %rax,%r15
2779  DB  73,137,244                          ; mov           %rsi,%r12
2780  DB  72,141,67,8                         ; lea           0x8(%rbx),%rax
2781  DB  76,57,232                           ; cmp           %r13,%rax
2782  DB  118,5                               ; jbe           75 <_sk_start_pipeline_avx+0x75>
2783  DB  72,137,223                          ; mov           %rbx,%rdi
2784  DB  235,65                              ; jmp           b6 <_sk_start_pipeline_avx+0xb6>
2785  DB  185,0,0,0,0                         ; mov           $0x0,%ecx
2786  DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
2787  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
2788  DB  197,236,87,210                      ; vxorps        %ymm2,%ymm2,%ymm2
2789  DB  197,228,87,219                      ; vxorps        %ymm3,%ymm3,%ymm3
2790  DB  197,220,87,228                      ; vxorps        %ymm4,%ymm4,%ymm4
2791  DB  197,212,87,237                      ; vxorps        %ymm5,%ymm5,%ymm5
2792  DB  197,204,87,246                      ; vxorps        %ymm6,%ymm6,%ymm6
2793  DB  197,196,87,255                      ; vxorps        %ymm7,%ymm7,%ymm7
2794  DB  72,137,223                          ; mov           %rbx,%rdi
2795  DB  76,137,230                          ; mov           %r12,%rsi
2796  DB  76,137,242                          ; mov           %r14,%rdx
2797  DB  65,255,215                          ; callq         *%r15
2798  DB  72,141,123,8                        ; lea           0x8(%rbx),%rdi
2799  DB  72,131,195,16                       ; add           $0x10,%rbx
2800  DB  76,57,235                           ; cmp           %r13,%rbx
2801  DB  72,137,251                          ; mov           %rdi,%rbx
2802  DB  118,191                             ; jbe           75 <_sk_start_pipeline_avx+0x75>
2803  DB  76,137,233                          ; mov           %r13,%rcx
2804  DB  72,41,249                           ; sub           %rdi,%rcx
2805  DB  116,41                              ; je            e7 <_sk_start_pipeline_avx+0xe7>
2806  DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
2807  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
2808  DB  197,236,87,210                      ; vxorps        %ymm2,%ymm2,%ymm2
2809  DB  197,228,87,219                      ; vxorps        %ymm3,%ymm3,%ymm3
2810  DB  197,220,87,228                      ; vxorps        %ymm4,%ymm4,%ymm4
2811  DB  197,212,87,237                      ; vxorps        %ymm5,%ymm5,%ymm5
2812  DB  197,204,87,246                      ; vxorps        %ymm6,%ymm6,%ymm6
2813  DB  197,196,87,255                      ; vxorps        %ymm7,%ymm7,%ymm7
2814  DB  76,137,230                          ; mov           %r12,%rsi
2815  DB  76,137,242                          ; mov           %r14,%rdx
2816  DB  65,255,215                          ; callq         *%r15
2817  DB  76,137,232                          ; mov           %r13,%rax
2818  DB  197,248,40,52,36                    ; vmovaps       (%rsp),%xmm6
2819  DB  197,248,40,124,36,16                ; vmovaps       0x10(%rsp),%xmm7
2820  DB  197,120,40,68,36,32                 ; vmovaps       0x20(%rsp),%xmm8
2821  DB  197,120,40,76,36,48                 ; vmovaps       0x30(%rsp),%xmm9
2822  DB  197,120,40,84,36,64                 ; vmovaps       0x40(%rsp),%xmm10
2823  DB  197,120,40,92,36,80                 ; vmovaps       0x50(%rsp),%xmm11
2824  DB  197,120,40,100,36,96                ; vmovaps       0x60(%rsp),%xmm12
2825  DB  197,120,40,108,36,112               ; vmovaps       0x70(%rsp),%xmm13
2826  DB  197,120,40,180,36,128,0,0,0         ; vmovaps       0x80(%rsp),%xmm14
2827  DB  197,120,40,188,36,144,0,0,0         ; vmovaps       0x90(%rsp),%xmm15
2828  DB  72,129,196,160,0,0,0                ; add           $0xa0,%rsp
2829  DB  91                                  ; pop           %rbx
2830  DB  95                                  ; pop           %rdi
2831  DB  94                                  ; pop           %rsi
2832  DB  65,92                               ; pop           %r12
2833  DB  65,93                               ; pop           %r13
2834  DB  65,94                               ; pop           %r14
2835  DB  65,95                               ; pop           %r15
2836  DB  197,248,119                         ; vzeroupper
2837  DB  195                                 ; retq
2838
2839PUBLIC _sk_just_return_avx
2840_sk_just_return_avx LABEL PROC
2841  DB  195                                 ; retq
2842
2843PUBLIC _sk_seed_shader_avx
2844_sk_seed_shader_avx LABEL PROC
2845  DB  72,173                              ; lods          %ds:(%rsi),%rax
2846  DB  197,249,110,199                     ; vmovd         %edi,%xmm0
2847  DB  197,249,112,192,0                   ; vpshufd       $0x0,%xmm0,%xmm0
2848  DB  196,227,125,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
2849  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
2850  DB  65,184,0,0,0,63                     ; mov           $0x3f000000,%r8d
2851  DB  196,193,121,110,200                 ; vmovd         %r8d,%xmm1
2852  DB  196,227,121,4,201,0                 ; vpermilps     $0x0,%xmm1,%xmm1
2853  DB  196,227,117,24,201,1                ; vinsertf128   $0x1,%xmm1,%ymm1,%ymm1
2854  DB  197,252,88,193                      ; vaddps        %ymm1,%ymm0,%ymm0
2855  DB  197,252,88,2                        ; vaddps        (%rdx),%ymm0,%ymm0
2856  DB  196,226,125,24,16                   ; vbroadcastss  (%rax),%ymm2
2857  DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
2858  DB  197,236,88,201                      ; vaddps        %ymm1,%ymm2,%ymm1
2859  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
2860  DB  197,249,110,208                     ; vmovd         %eax,%xmm2
2861  DB  196,227,121,4,210,0                 ; vpermilps     $0x0,%xmm2,%xmm2
2862  DB  196,227,109,24,210,1                ; vinsertf128   $0x1,%xmm2,%ymm2,%ymm2
2863  DB  72,173                              ; lods          %ds:(%rsi),%rax
2864  DB  197,228,87,219                      ; vxorps        %ymm3,%ymm3,%ymm3
2865  DB  197,220,87,228                      ; vxorps        %ymm4,%ymm4,%ymm4
2866  DB  197,212,87,237                      ; vxorps        %ymm5,%ymm5,%ymm5
2867  DB  197,204,87,246                      ; vxorps        %ymm6,%ymm6,%ymm6
2868  DB  197,196,87,255                      ; vxorps        %ymm7,%ymm7,%ymm7
2869  DB  255,224                             ; jmpq          *%rax
2870
2871PUBLIC _sk_constant_color_avx
2872_sk_constant_color_avx LABEL PROC
2873  DB  72,173                              ; lods          %ds:(%rsi),%rax
2874  DB  196,226,125,24,0                    ; vbroadcastss  (%rax),%ymm0
2875  DB  196,226,125,24,72,4                 ; vbroadcastss  0x4(%rax),%ymm1
2876  DB  196,226,125,24,80,8                 ; vbroadcastss  0x8(%rax),%ymm2
2877  DB  196,226,125,24,88,12                ; vbroadcastss  0xc(%rax),%ymm3
2878  DB  72,173                              ; lods          %ds:(%rsi),%rax
2879  DB  255,224                             ; jmpq          *%rax
2880
2881PUBLIC _sk_clear_avx
2882_sk_clear_avx LABEL PROC
2883  DB  72,173                              ; lods          %ds:(%rsi),%rax
2884  DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
2885  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
2886  DB  197,236,87,210                      ; vxorps        %ymm2,%ymm2,%ymm2
2887  DB  197,228,87,219                      ; vxorps        %ymm3,%ymm3,%ymm3
2888  DB  255,224                             ; jmpq          *%rax
2889
2890PUBLIC _sk_srcatop_avx
2891_sk_srcatop_avx LABEL PROC
2892  DB  197,124,89,199                      ; vmulps        %ymm7,%ymm0,%ymm8
2893  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
2894  DB  197,249,110,192                     ; vmovd         %eax,%xmm0
2895  DB  196,227,121,4,192,0                 ; vpermilps     $0x0,%xmm0,%xmm0
2896  DB  196,227,125,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
2897  DB  197,124,92,203                      ; vsubps        %ymm3,%ymm0,%ymm9
2898  DB  197,180,89,196                      ; vmulps        %ymm4,%ymm9,%ymm0
2899  DB  197,188,88,192                      ; vaddps        %ymm0,%ymm8,%ymm0
2900  DB  197,244,89,207                      ; vmulps        %ymm7,%ymm1,%ymm1
2901  DB  197,52,89,197                       ; vmulps        %ymm5,%ymm9,%ymm8
2902  DB  196,193,116,88,200                  ; vaddps        %ymm8,%ymm1,%ymm1
2903  DB  197,236,89,215                      ; vmulps        %ymm7,%ymm2,%ymm2
2904  DB  197,52,89,198                       ; vmulps        %ymm6,%ymm9,%ymm8
2905  DB  196,193,108,88,208                  ; vaddps        %ymm8,%ymm2,%ymm2
2906  DB  197,228,89,223                      ; vmulps        %ymm7,%ymm3,%ymm3
2907  DB  197,52,89,199                       ; vmulps        %ymm7,%ymm9,%ymm8
2908  DB  196,193,100,88,216                  ; vaddps        %ymm8,%ymm3,%ymm3
2909  DB  72,173                              ; lods          %ds:(%rsi),%rax
2910  DB  255,224                             ; jmpq          *%rax
2911
2912PUBLIC _sk_dstatop_avx
2913_sk_dstatop_avx LABEL PROC
2914  DB  197,100,89,196                      ; vmulps        %ymm4,%ymm3,%ymm8
2915  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
2916  DB  197,121,110,200                     ; vmovd         %eax,%xmm9
2917  DB  196,67,121,4,201,0                  ; vpermilps     $0x0,%xmm9,%xmm9
2918  DB  196,67,53,24,201,1                  ; vinsertf128   $0x1,%xmm9,%ymm9,%ymm9
2919  DB  197,52,92,207                       ; vsubps        %ymm7,%ymm9,%ymm9
2920  DB  197,180,89,192                      ; vmulps        %ymm0,%ymm9,%ymm0
2921  DB  197,188,88,192                      ; vaddps        %ymm0,%ymm8,%ymm0
2922  DB  197,100,89,197                      ; vmulps        %ymm5,%ymm3,%ymm8
2923  DB  197,180,89,201                      ; vmulps        %ymm1,%ymm9,%ymm1
2924  DB  197,188,88,201                      ; vaddps        %ymm1,%ymm8,%ymm1
2925  DB  197,100,89,198                      ; vmulps        %ymm6,%ymm3,%ymm8
2926  DB  197,180,89,210                      ; vmulps        %ymm2,%ymm9,%ymm2
2927  DB  197,188,88,210                      ; vaddps        %ymm2,%ymm8,%ymm2
2928  DB  197,100,89,199                      ; vmulps        %ymm7,%ymm3,%ymm8
2929  DB  197,180,89,219                      ; vmulps        %ymm3,%ymm9,%ymm3
2930  DB  197,188,88,219                      ; vaddps        %ymm3,%ymm8,%ymm3
2931  DB  72,173                              ; lods          %ds:(%rsi),%rax
2932  DB  255,224                             ; jmpq          *%rax
2933
2934PUBLIC _sk_srcin_avx
2935_sk_srcin_avx LABEL PROC
2936  DB  197,252,89,199                      ; vmulps        %ymm7,%ymm0,%ymm0
2937  DB  197,244,89,207                      ; vmulps        %ymm7,%ymm1,%ymm1
2938  DB  197,236,89,215                      ; vmulps        %ymm7,%ymm2,%ymm2
2939  DB  197,228,89,223                      ; vmulps        %ymm7,%ymm3,%ymm3
2940  DB  72,173                              ; lods          %ds:(%rsi),%rax
2941  DB  255,224                             ; jmpq          *%rax
2942
2943PUBLIC _sk_dstin_avx
2944_sk_dstin_avx LABEL PROC
2945  DB  197,228,89,196                      ; vmulps        %ymm4,%ymm3,%ymm0
2946  DB  197,228,89,205                      ; vmulps        %ymm5,%ymm3,%ymm1
2947  DB  197,228,89,214                      ; vmulps        %ymm6,%ymm3,%ymm2
2948  DB  197,228,89,223                      ; vmulps        %ymm7,%ymm3,%ymm3
2949  DB  72,173                              ; lods          %ds:(%rsi),%rax
2950  DB  255,224                             ; jmpq          *%rax
2951
2952PUBLIC _sk_srcout_avx
2953_sk_srcout_avx LABEL PROC
2954  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
2955  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
2956  DB  196,67,121,4,192,0                  ; vpermilps     $0x0,%xmm8,%xmm8
2957  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
2958  DB  197,60,92,199                       ; vsubps        %ymm7,%ymm8,%ymm8
2959  DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
2960  DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
2961  DB  197,188,89,210                      ; vmulps        %ymm2,%ymm8,%ymm2
2962  DB  197,188,89,219                      ; vmulps        %ymm3,%ymm8,%ymm3
2963  DB  72,173                              ; lods          %ds:(%rsi),%rax
2964  DB  255,224                             ; jmpq          *%rax
2965
2966PUBLIC _sk_dstout_avx
2967_sk_dstout_avx LABEL PROC
2968  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
2969  DB  197,249,110,192                     ; vmovd         %eax,%xmm0
2970  DB  196,227,121,4,192,0                 ; vpermilps     $0x0,%xmm0,%xmm0
2971  DB  196,227,125,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
2972  DB  197,252,92,219                      ; vsubps        %ymm3,%ymm0,%ymm3
2973  DB  197,228,89,196                      ; vmulps        %ymm4,%ymm3,%ymm0
2974  DB  197,228,89,205                      ; vmulps        %ymm5,%ymm3,%ymm1
2975  DB  197,228,89,214                      ; vmulps        %ymm6,%ymm3,%ymm2
2976  DB  197,228,89,223                      ; vmulps        %ymm7,%ymm3,%ymm3
2977  DB  72,173                              ; lods          %ds:(%rsi),%rax
2978  DB  255,224                             ; jmpq          *%rax
2979
2980PUBLIC _sk_srcover_avx
2981_sk_srcover_avx LABEL PROC
2982  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
2983  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
2984  DB  196,67,121,4,192,0                  ; vpermilps     $0x0,%xmm8,%xmm8
2985  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
2986  DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
2987  DB  197,60,89,204                       ; vmulps        %ymm4,%ymm8,%ymm9
2988  DB  197,180,88,192                      ; vaddps        %ymm0,%ymm9,%ymm0
2989  DB  197,60,89,205                       ; vmulps        %ymm5,%ymm8,%ymm9
2990  DB  197,180,88,201                      ; vaddps        %ymm1,%ymm9,%ymm1
2991  DB  197,60,89,206                       ; vmulps        %ymm6,%ymm8,%ymm9
2992  DB  197,180,88,210                      ; vaddps        %ymm2,%ymm9,%ymm2
2993  DB  197,60,89,199                       ; vmulps        %ymm7,%ymm8,%ymm8
2994  DB  197,188,88,219                      ; vaddps        %ymm3,%ymm8,%ymm3
2995  DB  72,173                              ; lods          %ds:(%rsi),%rax
2996  DB  255,224                             ; jmpq          *%rax
2997
2998PUBLIC _sk_dstover_avx
2999_sk_dstover_avx LABEL PROC
3000  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
3001  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
3002  DB  196,67,121,4,192,0                  ; vpermilps     $0x0,%xmm8,%xmm8
3003  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
3004  DB  197,60,92,199                       ; vsubps        %ymm7,%ymm8,%ymm8
3005  DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
3006  DB  197,252,88,196                      ; vaddps        %ymm4,%ymm0,%ymm0
3007  DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
3008  DB  197,244,88,205                      ; vaddps        %ymm5,%ymm1,%ymm1
3009  DB  197,188,89,210                      ; vmulps        %ymm2,%ymm8,%ymm2
3010  DB  197,236,88,214                      ; vaddps        %ymm6,%ymm2,%ymm2
3011  DB  197,188,89,219                      ; vmulps        %ymm3,%ymm8,%ymm3
3012  DB  197,228,88,223                      ; vaddps        %ymm7,%ymm3,%ymm3
3013  DB  72,173                              ; lods          %ds:(%rsi),%rax
3014  DB  255,224                             ; jmpq          *%rax
3015
3016PUBLIC _sk_modulate_avx
3017_sk_modulate_avx LABEL PROC
3018  DB  197,252,89,196                      ; vmulps        %ymm4,%ymm0,%ymm0
3019  DB  197,244,89,205                      ; vmulps        %ymm5,%ymm1,%ymm1
3020  DB  197,236,89,214                      ; vmulps        %ymm6,%ymm2,%ymm2
3021  DB  197,228,89,223                      ; vmulps        %ymm7,%ymm3,%ymm3
3022  DB  72,173                              ; lods          %ds:(%rsi),%rax
3023  DB  255,224                             ; jmpq          *%rax
3024
3025PUBLIC _sk_multiply_avx
3026_sk_multiply_avx LABEL PROC
3027  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
3028  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
3029  DB  196,67,121,4,192,0                  ; vpermilps     $0x0,%xmm8,%xmm8
3030  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
3031  DB  197,60,92,207                       ; vsubps        %ymm7,%ymm8,%ymm9
3032  DB  197,52,89,208                       ; vmulps        %ymm0,%ymm9,%ymm10
3033  DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
3034  DB  197,60,89,220                       ; vmulps        %ymm4,%ymm8,%ymm11
3035  DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
3036  DB  197,252,89,196                      ; vmulps        %ymm4,%ymm0,%ymm0
3037  DB  196,193,124,88,194                  ; vaddps        %ymm10,%ymm0,%ymm0
3038  DB  197,52,89,209                       ; vmulps        %ymm1,%ymm9,%ymm10
3039  DB  197,60,89,221                       ; vmulps        %ymm5,%ymm8,%ymm11
3040  DB  196,65,36,88,210                    ; vaddps        %ymm10,%ymm11,%ymm10
3041  DB  197,244,89,205                      ; vmulps        %ymm5,%ymm1,%ymm1
3042  DB  196,193,116,88,202                  ; vaddps        %ymm10,%ymm1,%ymm1
3043  DB  197,52,89,210                       ; vmulps        %ymm2,%ymm9,%ymm10
3044  DB  197,60,89,222                       ; vmulps        %ymm6,%ymm8,%ymm11
3045  DB  196,65,36,88,210                    ; vaddps        %ymm10,%ymm11,%ymm10
3046  DB  197,236,89,214                      ; vmulps        %ymm6,%ymm2,%ymm2
3047  DB  196,193,108,88,210                  ; vaddps        %ymm10,%ymm2,%ymm2
3048  DB  197,52,89,203                       ; vmulps        %ymm3,%ymm9,%ymm9
3049  DB  197,60,89,199                       ; vmulps        %ymm7,%ymm8,%ymm8
3050  DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
3051  DB  197,228,89,223                      ; vmulps        %ymm7,%ymm3,%ymm3
3052  DB  196,193,100,88,216                  ; vaddps        %ymm8,%ymm3,%ymm3
3053  DB  72,173                              ; lods          %ds:(%rsi),%rax
3054  DB  255,224                             ; jmpq          *%rax
3055
3056PUBLIC _sk_plus__avx
3057_sk_plus__avx LABEL PROC
3058  DB  197,252,88,196                      ; vaddps        %ymm4,%ymm0,%ymm0
3059  DB  197,244,88,205                      ; vaddps        %ymm5,%ymm1,%ymm1
3060  DB  197,236,88,214                      ; vaddps        %ymm6,%ymm2,%ymm2
3061  DB  197,228,88,223                      ; vaddps        %ymm7,%ymm3,%ymm3
3062  DB  72,173                              ; lods          %ds:(%rsi),%rax
3063  DB  255,224                             ; jmpq          *%rax
3064
3065PUBLIC _sk_screen_avx
3066_sk_screen_avx LABEL PROC
3067  DB  197,124,88,196                      ; vaddps        %ymm4,%ymm0,%ymm8
3068  DB  197,252,89,196                      ; vmulps        %ymm4,%ymm0,%ymm0
3069  DB  197,188,92,192                      ; vsubps        %ymm0,%ymm8,%ymm0
3070  DB  197,116,88,197                      ; vaddps        %ymm5,%ymm1,%ymm8
3071  DB  197,244,89,205                      ; vmulps        %ymm5,%ymm1,%ymm1
3072  DB  197,188,92,201                      ; vsubps        %ymm1,%ymm8,%ymm1
3073  DB  197,108,88,198                      ; vaddps        %ymm6,%ymm2,%ymm8
3074  DB  197,236,89,214                      ; vmulps        %ymm6,%ymm2,%ymm2
3075  DB  197,188,92,210                      ; vsubps        %ymm2,%ymm8,%ymm2
3076  DB  197,100,88,199                      ; vaddps        %ymm7,%ymm3,%ymm8
3077  DB  197,228,89,223                      ; vmulps        %ymm7,%ymm3,%ymm3
3078  DB  197,188,92,219                      ; vsubps        %ymm3,%ymm8,%ymm3
3079  DB  72,173                              ; lods          %ds:(%rsi),%rax
3080  DB  255,224                             ; jmpq          *%rax
3081
3082PUBLIC _sk_xor__avx
3083_sk_xor__avx LABEL PROC
3084  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
3085  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
3086  DB  196,67,121,4,192,0                  ; vpermilps     $0x0,%xmm8,%xmm8
3087  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
3088  DB  197,60,92,207                       ; vsubps        %ymm7,%ymm8,%ymm9
3089  DB  197,180,89,192                      ; vmulps        %ymm0,%ymm9,%ymm0
3090  DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
3091  DB  197,60,89,212                       ; vmulps        %ymm4,%ymm8,%ymm10
3092  DB  196,193,124,88,194                  ; vaddps        %ymm10,%ymm0,%ymm0
3093  DB  197,180,89,201                      ; vmulps        %ymm1,%ymm9,%ymm1
3094  DB  197,60,89,213                       ; vmulps        %ymm5,%ymm8,%ymm10
3095  DB  197,172,88,201                      ; vaddps        %ymm1,%ymm10,%ymm1
3096  DB  197,180,89,210                      ; vmulps        %ymm2,%ymm9,%ymm2
3097  DB  197,60,89,214                       ; vmulps        %ymm6,%ymm8,%ymm10
3098  DB  197,172,88,210                      ; vaddps        %ymm2,%ymm10,%ymm2
3099  DB  197,180,89,219                      ; vmulps        %ymm3,%ymm9,%ymm3
3100  DB  197,60,89,199                       ; vmulps        %ymm7,%ymm8,%ymm8
3101  DB  197,188,88,219                      ; vaddps        %ymm3,%ymm8,%ymm3
3102  DB  72,173                              ; lods          %ds:(%rsi),%rax
3103  DB  255,224                             ; jmpq          *%rax
3104
3105PUBLIC _sk_darken_avx
3106_sk_darken_avx LABEL PROC
3107  DB  197,124,88,196                      ; vaddps        %ymm4,%ymm0,%ymm8
3108  DB  197,252,89,199                      ; vmulps        %ymm7,%ymm0,%ymm0
3109  DB  197,100,89,204                      ; vmulps        %ymm4,%ymm3,%ymm9
3110  DB  196,193,124,95,193                  ; vmaxps        %ymm9,%ymm0,%ymm0
3111  DB  197,188,92,192                      ; vsubps        %ymm0,%ymm8,%ymm0
3112  DB  197,116,88,197                      ; vaddps        %ymm5,%ymm1,%ymm8
3113  DB  197,244,89,207                      ; vmulps        %ymm7,%ymm1,%ymm1
3114  DB  197,100,89,205                      ; vmulps        %ymm5,%ymm3,%ymm9
3115  DB  196,193,116,95,201                  ; vmaxps        %ymm9,%ymm1,%ymm1
3116  DB  197,188,92,201                      ; vsubps        %ymm1,%ymm8,%ymm1
3117  DB  197,108,88,198                      ; vaddps        %ymm6,%ymm2,%ymm8
3118  DB  197,236,89,215                      ; vmulps        %ymm7,%ymm2,%ymm2
3119  DB  197,100,89,206                      ; vmulps        %ymm6,%ymm3,%ymm9
3120  DB  196,193,108,95,209                  ; vmaxps        %ymm9,%ymm2,%ymm2
3121  DB  197,188,92,210                      ; vsubps        %ymm2,%ymm8,%ymm2
3122  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
3123  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
3124  DB  196,67,121,4,192,0                  ; vpermilps     $0x0,%xmm8,%xmm8
3125  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
3126  DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
3127  DB  197,60,89,199                       ; vmulps        %ymm7,%ymm8,%ymm8
3128  DB  197,188,88,219                      ; vaddps        %ymm3,%ymm8,%ymm3
3129  DB  72,173                              ; lods          %ds:(%rsi),%rax
3130  DB  255,224                             ; jmpq          *%rax
3131
3132PUBLIC _sk_lighten_avx
3133_sk_lighten_avx LABEL PROC
3134  DB  197,124,88,196                      ; vaddps        %ymm4,%ymm0,%ymm8
3135  DB  197,252,89,199                      ; vmulps        %ymm7,%ymm0,%ymm0
3136  DB  197,100,89,204                      ; vmulps        %ymm4,%ymm3,%ymm9
3137  DB  196,193,124,93,193                  ; vminps        %ymm9,%ymm0,%ymm0
3138  DB  197,188,92,192                      ; vsubps        %ymm0,%ymm8,%ymm0
3139  DB  197,116,88,197                      ; vaddps        %ymm5,%ymm1,%ymm8
3140  DB  197,244,89,207                      ; vmulps        %ymm7,%ymm1,%ymm1
3141  DB  197,100,89,205                      ; vmulps        %ymm5,%ymm3,%ymm9
3142  DB  196,193,116,93,201                  ; vminps        %ymm9,%ymm1,%ymm1
3143  DB  197,188,92,201                      ; vsubps        %ymm1,%ymm8,%ymm1
3144  DB  197,108,88,198                      ; vaddps        %ymm6,%ymm2,%ymm8
3145  DB  197,236,89,215                      ; vmulps        %ymm7,%ymm2,%ymm2
3146  DB  197,100,89,206                      ; vmulps        %ymm6,%ymm3,%ymm9
3147  DB  196,193,108,93,209                  ; vminps        %ymm9,%ymm2,%ymm2
3148  DB  197,188,92,210                      ; vsubps        %ymm2,%ymm8,%ymm2
3149  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
3150  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
3151  DB  196,67,121,4,192,0                  ; vpermilps     $0x0,%xmm8,%xmm8
3152  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
3153  DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
3154  DB  197,60,89,199                       ; vmulps        %ymm7,%ymm8,%ymm8
3155  DB  197,188,88,219                      ; vaddps        %ymm3,%ymm8,%ymm3
3156  DB  72,173                              ; lods          %ds:(%rsi),%rax
3157  DB  255,224                             ; jmpq          *%rax
3158
3159PUBLIC _sk_difference_avx
3160_sk_difference_avx LABEL PROC
3161  DB  197,124,88,196                      ; vaddps        %ymm4,%ymm0,%ymm8
3162  DB  197,252,89,199                      ; vmulps        %ymm7,%ymm0,%ymm0
3163  DB  197,100,89,204                      ; vmulps        %ymm4,%ymm3,%ymm9
3164  DB  196,193,124,93,193                  ; vminps        %ymm9,%ymm0,%ymm0
3165  DB  197,252,88,192                      ; vaddps        %ymm0,%ymm0,%ymm0
3166  DB  197,188,92,192                      ; vsubps        %ymm0,%ymm8,%ymm0
3167  DB  197,116,88,197                      ; vaddps        %ymm5,%ymm1,%ymm8
3168  DB  197,244,89,207                      ; vmulps        %ymm7,%ymm1,%ymm1
3169  DB  197,100,89,205                      ; vmulps        %ymm5,%ymm3,%ymm9
3170  DB  196,193,116,93,201                  ; vminps        %ymm9,%ymm1,%ymm1
3171  DB  197,244,88,201                      ; vaddps        %ymm1,%ymm1,%ymm1
3172  DB  197,188,92,201                      ; vsubps        %ymm1,%ymm8,%ymm1
3173  DB  197,108,88,198                      ; vaddps        %ymm6,%ymm2,%ymm8
3174  DB  197,236,89,215                      ; vmulps        %ymm7,%ymm2,%ymm2
3175  DB  197,100,89,206                      ; vmulps        %ymm6,%ymm3,%ymm9
3176  DB  196,193,108,93,209                  ; vminps        %ymm9,%ymm2,%ymm2
3177  DB  197,236,88,210                      ; vaddps        %ymm2,%ymm2,%ymm2
3178  DB  197,188,92,210                      ; vsubps        %ymm2,%ymm8,%ymm2
3179  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
3180  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
3181  DB  196,67,121,4,192,0                  ; vpermilps     $0x0,%xmm8,%xmm8
3182  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
3183  DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
3184  DB  197,60,89,199                       ; vmulps        %ymm7,%ymm8,%ymm8
3185  DB  197,188,88,219                      ; vaddps        %ymm3,%ymm8,%ymm3
3186  DB  72,173                              ; lods          %ds:(%rsi),%rax
3187  DB  255,224                             ; jmpq          *%rax
3188
3189PUBLIC _sk_exclusion_avx
3190_sk_exclusion_avx LABEL PROC
3191  DB  197,124,88,196                      ; vaddps        %ymm4,%ymm0,%ymm8
3192  DB  197,252,89,196                      ; vmulps        %ymm4,%ymm0,%ymm0
3193  DB  197,252,88,192                      ; vaddps        %ymm0,%ymm0,%ymm0
3194  DB  197,188,92,192                      ; vsubps        %ymm0,%ymm8,%ymm0
3195  DB  197,116,88,197                      ; vaddps        %ymm5,%ymm1,%ymm8
3196  DB  197,244,89,205                      ; vmulps        %ymm5,%ymm1,%ymm1
3197  DB  197,244,88,201                      ; vaddps        %ymm1,%ymm1,%ymm1
3198  DB  197,188,92,201                      ; vsubps        %ymm1,%ymm8,%ymm1
3199  DB  197,108,88,198                      ; vaddps        %ymm6,%ymm2,%ymm8
3200  DB  197,236,89,214                      ; vmulps        %ymm6,%ymm2,%ymm2
3201  DB  197,236,88,210                      ; vaddps        %ymm2,%ymm2,%ymm2
3202  DB  197,188,92,210                      ; vsubps        %ymm2,%ymm8,%ymm2
3203  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
3204  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
3205  DB  196,67,121,4,192,0                  ; vpermilps     $0x0,%xmm8,%xmm8
3206  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
3207  DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
3208  DB  197,60,89,199                       ; vmulps        %ymm7,%ymm8,%ymm8
3209  DB  197,188,88,219                      ; vaddps        %ymm3,%ymm8,%ymm3
3210  DB  72,173                              ; lods          %ds:(%rsi),%rax
3211  DB  255,224                             ; jmpq          *%rax
3212
3213PUBLIC _sk_colorburn_avx
3214_sk_colorburn_avx LABEL PROC
3215  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
3216  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
3217  DB  196,67,121,4,192,0                  ; vpermilps     $0x0,%xmm8,%xmm8
3218  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
3219  DB  197,60,92,207                       ; vsubps        %ymm7,%ymm8,%ymm9
3220  DB  197,52,89,216                       ; vmulps        %ymm0,%ymm9,%ymm11
3221  DB  196,65,44,87,210                    ; vxorps        %ymm10,%ymm10,%ymm10
3222  DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
3223  DB  197,60,89,228                       ; vmulps        %ymm4,%ymm8,%ymm12
3224  DB  197,68,92,236                       ; vsubps        %ymm4,%ymm7,%ymm13
3225  DB  197,20,89,235                       ; vmulps        %ymm3,%ymm13,%ymm13
3226  DB  197,20,94,232                       ; vdivps        %ymm0,%ymm13,%ymm13
3227  DB  196,65,68,93,237                    ; vminps        %ymm13,%ymm7,%ymm13
3228  DB  196,65,68,92,237                    ; vsubps        %ymm13,%ymm7,%ymm13
3229  DB  197,20,89,235                       ; vmulps        %ymm3,%ymm13,%ymm13
3230  DB  196,65,20,88,235                    ; vaddps        %ymm11,%ymm13,%ymm13
3231  DB  196,65,28,88,237                    ; vaddps        %ymm13,%ymm12,%ymm13
3232  DB  197,28,88,224                       ; vaddps        %ymm0,%ymm12,%ymm12
3233  DB  196,193,124,194,194,0               ; vcmpeqps      %ymm10,%ymm0,%ymm0
3234  DB  196,195,21,74,196,0                 ; vblendvps     %ymm0,%ymm12,%ymm13,%ymm0
3235  DB  197,92,194,231,0                    ; vcmpeqps      %ymm7,%ymm4,%ymm12
3236  DB  197,36,88,220                       ; vaddps        %ymm4,%ymm11,%ymm11
3237  DB  196,195,125,74,195,192              ; vblendvps     %ymm12,%ymm11,%ymm0,%ymm0
3238  DB  197,52,89,217                       ; vmulps        %ymm1,%ymm9,%ymm11
3239  DB  197,60,89,229                       ; vmulps        %ymm5,%ymm8,%ymm12
3240  DB  197,68,92,237                       ; vsubps        %ymm5,%ymm7,%ymm13
3241  DB  197,20,89,235                       ; vmulps        %ymm3,%ymm13,%ymm13
3242  DB  197,20,94,233                       ; vdivps        %ymm1,%ymm13,%ymm13
3243  DB  196,65,68,93,237                    ; vminps        %ymm13,%ymm7,%ymm13
3244  DB  196,65,68,92,237                    ; vsubps        %ymm13,%ymm7,%ymm13
3245  DB  197,20,89,235                       ; vmulps        %ymm3,%ymm13,%ymm13
3246  DB  196,65,36,88,237                    ; vaddps        %ymm13,%ymm11,%ymm13
3247  DB  196,65,28,88,237                    ; vaddps        %ymm13,%ymm12,%ymm13
3248  DB  197,28,88,225                       ; vaddps        %ymm1,%ymm12,%ymm12
3249  DB  196,193,116,194,202,0               ; vcmpeqps      %ymm10,%ymm1,%ymm1
3250  DB  196,195,21,74,204,16                ; vblendvps     %ymm1,%ymm12,%ymm13,%ymm1
3251  DB  197,84,194,231,0                    ; vcmpeqps      %ymm7,%ymm5,%ymm12
3252  DB  197,36,88,221                       ; vaddps        %ymm5,%ymm11,%ymm11
3253  DB  196,195,117,74,203,192              ; vblendvps     %ymm12,%ymm11,%ymm1,%ymm1
3254  DB  197,52,89,202                       ; vmulps        %ymm2,%ymm9,%ymm9
3255  DB  196,65,108,194,210,0                ; vcmpeqps      %ymm10,%ymm2,%ymm10
3256  DB  197,60,89,222                       ; vmulps        %ymm6,%ymm8,%ymm11
3257  DB  197,68,92,230                       ; vsubps        %ymm6,%ymm7,%ymm12
3258  DB  197,28,89,227                       ; vmulps        %ymm3,%ymm12,%ymm12
3259  DB  197,28,94,226                       ; vdivps        %ymm2,%ymm12,%ymm12
3260  DB  197,164,88,210                      ; vaddps        %ymm2,%ymm11,%ymm2
3261  DB  196,65,68,93,228                    ; vminps        %ymm12,%ymm7,%ymm12
3262  DB  196,65,68,92,228                    ; vsubps        %ymm12,%ymm7,%ymm12
3263  DB  197,28,89,227                       ; vmulps        %ymm3,%ymm12,%ymm12
3264  DB  196,65,52,88,228                    ; vaddps        %ymm12,%ymm9,%ymm12
3265  DB  196,65,36,88,220                    ; vaddps        %ymm12,%ymm11,%ymm11
3266  DB  196,227,37,74,210,160               ; vblendvps     %ymm10,%ymm2,%ymm11,%ymm2
3267  DB  197,76,194,215,0                    ; vcmpeqps      %ymm7,%ymm6,%ymm10
3268  DB  197,52,88,206                       ; vaddps        %ymm6,%ymm9,%ymm9
3269  DB  196,195,109,74,209,160              ; vblendvps     %ymm10,%ymm9,%ymm2,%ymm2
3270  DB  197,60,89,199                       ; vmulps        %ymm7,%ymm8,%ymm8
3271  DB  197,188,88,219                      ; vaddps        %ymm3,%ymm8,%ymm3
3272  DB  72,173                              ; lods          %ds:(%rsi),%rax
3273  DB  255,224                             ; jmpq          *%rax
3274
3275PUBLIC _sk_colordodge_avx
3276_sk_colordodge_avx LABEL PROC
3277  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
3278  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
3279  DB  197,121,110,200                     ; vmovd         %eax,%xmm9
3280  DB  196,67,121,4,201,0                  ; vpermilps     $0x0,%xmm9,%xmm9
3281  DB  196,67,53,24,201,1                  ; vinsertf128   $0x1,%xmm9,%ymm9,%ymm9
3282  DB  197,52,92,215                       ; vsubps        %ymm7,%ymm9,%ymm10
3283  DB  197,44,89,216                       ; vmulps        %ymm0,%ymm10,%ymm11
3284  DB  197,52,92,203                       ; vsubps        %ymm3,%ymm9,%ymm9
3285  DB  197,100,89,228                      ; vmulps        %ymm4,%ymm3,%ymm12
3286  DB  197,100,92,232                      ; vsubps        %ymm0,%ymm3,%ymm13
3287  DB  196,65,28,94,229                    ; vdivps        %ymm13,%ymm12,%ymm12
3288  DB  197,52,89,236                       ; vmulps        %ymm4,%ymm9,%ymm13
3289  DB  196,65,68,93,228                    ; vminps        %ymm12,%ymm7,%ymm12
3290  DB  197,28,89,227                       ; vmulps        %ymm3,%ymm12,%ymm12
3291  DB  196,65,28,88,227                    ; vaddps        %ymm11,%ymm12,%ymm12
3292  DB  196,65,20,88,228                    ; vaddps        %ymm12,%ymm13,%ymm12
3293  DB  197,20,88,232                       ; vaddps        %ymm0,%ymm13,%ymm13
3294  DB  197,252,194,195,0                   ; vcmpeqps      %ymm3,%ymm0,%ymm0
3295  DB  196,195,29,74,197,0                 ; vblendvps     %ymm0,%ymm13,%ymm12,%ymm0
3296  DB  196,65,92,194,224,0                 ; vcmpeqps      %ymm8,%ymm4,%ymm12
3297  DB  197,36,88,220                       ; vaddps        %ymm4,%ymm11,%ymm11
3298  DB  196,195,125,74,195,192              ; vblendvps     %ymm12,%ymm11,%ymm0,%ymm0
3299  DB  197,44,89,217                       ; vmulps        %ymm1,%ymm10,%ymm11
3300  DB  197,100,89,229                      ; vmulps        %ymm5,%ymm3,%ymm12
3301  DB  197,100,92,233                      ; vsubps        %ymm1,%ymm3,%ymm13
3302  DB  196,65,28,94,229                    ; vdivps        %ymm13,%ymm12,%ymm12
3303  DB  197,52,89,237                       ; vmulps        %ymm5,%ymm9,%ymm13
3304  DB  196,65,68,93,228                    ; vminps        %ymm12,%ymm7,%ymm12
3305  DB  197,28,89,227                       ; vmulps        %ymm3,%ymm12,%ymm12
3306  DB  196,65,28,88,227                    ; vaddps        %ymm11,%ymm12,%ymm12
3307  DB  196,65,20,88,228                    ; vaddps        %ymm12,%ymm13,%ymm12
3308  DB  197,20,88,233                       ; vaddps        %ymm1,%ymm13,%ymm13
3309  DB  197,244,194,203,0                   ; vcmpeqps      %ymm3,%ymm1,%ymm1
3310  DB  196,195,29,74,205,16                ; vblendvps     %ymm1,%ymm13,%ymm12,%ymm1
3311  DB  196,65,84,194,224,0                 ; vcmpeqps      %ymm8,%ymm5,%ymm12
3312  DB  197,36,88,221                       ; vaddps        %ymm5,%ymm11,%ymm11
3313  DB  196,195,117,74,203,192              ; vblendvps     %ymm12,%ymm11,%ymm1,%ymm1
3314  DB  197,44,89,210                       ; vmulps        %ymm2,%ymm10,%ymm10
3315  DB  197,100,89,222                      ; vmulps        %ymm6,%ymm3,%ymm11
3316  DB  197,100,92,226                      ; vsubps        %ymm2,%ymm3,%ymm12
3317  DB  196,65,36,94,220                    ; vdivps        %ymm12,%ymm11,%ymm11
3318  DB  197,52,89,230                       ; vmulps        %ymm6,%ymm9,%ymm12
3319  DB  196,65,68,93,219                    ; vminps        %ymm11,%ymm7,%ymm11
3320  DB  197,36,89,219                       ; vmulps        %ymm3,%ymm11,%ymm11
3321  DB  196,65,44,88,219                    ; vaddps        %ymm11,%ymm10,%ymm11
3322  DB  196,65,28,88,219                    ; vaddps        %ymm11,%ymm12,%ymm11
3323  DB  197,28,88,226                       ; vaddps        %ymm2,%ymm12,%ymm12
3324  DB  197,236,194,211,0                   ; vcmpeqps      %ymm3,%ymm2,%ymm2
3325  DB  196,195,37,74,212,32                ; vblendvps     %ymm2,%ymm12,%ymm11,%ymm2
3326  DB  196,65,76,194,192,0                 ; vcmpeqps      %ymm8,%ymm6,%ymm8
3327  DB  197,44,88,214                       ; vaddps        %ymm6,%ymm10,%ymm10
3328  DB  196,195,109,74,210,128              ; vblendvps     %ymm8,%ymm10,%ymm2,%ymm2
3329  DB  197,52,89,199                       ; vmulps        %ymm7,%ymm9,%ymm8
3330  DB  197,188,88,219                      ; vaddps        %ymm3,%ymm8,%ymm3
3331  DB  72,173                              ; lods          %ds:(%rsi),%rax
3332  DB  255,224                             ; jmpq          *%rax
3333
3334PUBLIC _sk_hardlight_avx
3335_sk_hardlight_avx LABEL PROC
3336  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
3337  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
3338  DB  196,67,121,4,192,0                  ; vpermilps     $0x0,%xmm8,%xmm8
3339  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
3340  DB  197,60,92,215                       ; vsubps        %ymm7,%ymm8,%ymm10
3341  DB  197,44,89,200                       ; vmulps        %ymm0,%ymm10,%ymm9
3342  DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
3343  DB  197,60,89,220                       ; vmulps        %ymm4,%ymm8,%ymm11
3344  DB  196,65,52,88,219                    ; vaddps        %ymm11,%ymm9,%ymm11
3345  DB  197,124,88,200                      ; vaddps        %ymm0,%ymm0,%ymm9
3346  DB  197,52,194,227,2                    ; vcmpleps      %ymm3,%ymm9,%ymm12
3347  DB  197,124,89,204                      ; vmulps        %ymm4,%ymm0,%ymm9
3348  DB  196,65,52,88,233                    ; vaddps        %ymm9,%ymm9,%ymm13
3349  DB  197,100,89,207                      ; vmulps        %ymm7,%ymm3,%ymm9
3350  DB  197,68,92,244                       ; vsubps        %ymm4,%ymm7,%ymm14
3351  DB  197,228,92,192                      ; vsubps        %ymm0,%ymm3,%ymm0
3352  DB  196,193,124,89,198                  ; vmulps        %ymm14,%ymm0,%ymm0
3353  DB  197,252,88,192                      ; vaddps        %ymm0,%ymm0,%ymm0
3354  DB  197,180,92,192                      ; vsubps        %ymm0,%ymm9,%ymm0
3355  DB  196,195,125,74,197,192              ; vblendvps     %ymm12,%ymm13,%ymm0,%ymm0
3356  DB  196,193,124,88,195                  ; vaddps        %ymm11,%ymm0,%ymm0
3357  DB  197,44,89,217                       ; vmulps        %ymm1,%ymm10,%ymm11
3358  DB  197,60,89,229                       ; vmulps        %ymm5,%ymm8,%ymm12
3359  DB  196,65,28,88,219                    ; vaddps        %ymm11,%ymm12,%ymm11
3360  DB  197,116,88,225                      ; vaddps        %ymm1,%ymm1,%ymm12
3361  DB  197,28,194,227,2                    ; vcmpleps      %ymm3,%ymm12,%ymm12
3362  DB  197,116,89,237                      ; vmulps        %ymm5,%ymm1,%ymm13
3363  DB  196,65,20,88,237                    ; vaddps        %ymm13,%ymm13,%ymm13
3364  DB  197,68,92,245                       ; vsubps        %ymm5,%ymm7,%ymm14
3365  DB  197,228,92,201                      ; vsubps        %ymm1,%ymm3,%ymm1
3366  DB  196,193,116,89,206                  ; vmulps        %ymm14,%ymm1,%ymm1
3367  DB  197,244,88,201                      ; vaddps        %ymm1,%ymm1,%ymm1
3368  DB  197,180,92,201                      ; vsubps        %ymm1,%ymm9,%ymm1
3369  DB  196,195,117,74,205,192              ; vblendvps     %ymm12,%ymm13,%ymm1,%ymm1
3370  DB  196,193,116,88,203                  ; vaddps        %ymm11,%ymm1,%ymm1
3371  DB  197,44,89,210                       ; vmulps        %ymm2,%ymm10,%ymm10
3372  DB  197,60,89,222                       ; vmulps        %ymm6,%ymm8,%ymm11
3373  DB  196,65,36,88,210                    ; vaddps        %ymm10,%ymm11,%ymm10
3374  DB  197,108,88,218                      ; vaddps        %ymm2,%ymm2,%ymm11
3375  DB  197,36,194,219,2                    ; vcmpleps      %ymm3,%ymm11,%ymm11
3376  DB  197,108,89,230                      ; vmulps        %ymm6,%ymm2,%ymm12
3377  DB  196,65,28,88,228                    ; vaddps        %ymm12,%ymm12,%ymm12
3378  DB  197,68,92,238                       ; vsubps        %ymm6,%ymm7,%ymm13
3379  DB  197,228,92,210                      ; vsubps        %ymm2,%ymm3,%ymm2
3380  DB  196,193,108,89,213                  ; vmulps        %ymm13,%ymm2,%ymm2
3381  DB  197,236,88,210                      ; vaddps        %ymm2,%ymm2,%ymm2
3382  DB  197,180,92,210                      ; vsubps        %ymm2,%ymm9,%ymm2
3383  DB  196,195,109,74,212,176              ; vblendvps     %ymm11,%ymm12,%ymm2,%ymm2
3384  DB  196,193,108,88,210                  ; vaddps        %ymm10,%ymm2,%ymm2
3385  DB  197,60,89,199                       ; vmulps        %ymm7,%ymm8,%ymm8
3386  DB  197,188,88,219                      ; vaddps        %ymm3,%ymm8,%ymm3
3387  DB  72,173                              ; lods          %ds:(%rsi),%rax
3388  DB  255,224                             ; jmpq          *%rax
3389
3390PUBLIC _sk_overlay_avx
3391_sk_overlay_avx LABEL PROC
3392  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
3393  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
3394  DB  196,67,121,4,192,0                  ; vpermilps     $0x0,%xmm8,%xmm8
3395  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
3396  DB  197,60,92,215                       ; vsubps        %ymm7,%ymm8,%ymm10
3397  DB  197,44,89,200                       ; vmulps        %ymm0,%ymm10,%ymm9
3398  DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
3399  DB  197,60,89,220                       ; vmulps        %ymm4,%ymm8,%ymm11
3400  DB  196,65,52,88,219                    ; vaddps        %ymm11,%ymm9,%ymm11
3401  DB  197,92,88,204                       ; vaddps        %ymm4,%ymm4,%ymm9
3402  DB  197,52,194,231,2                    ; vcmpleps      %ymm7,%ymm9,%ymm12
3403  DB  197,124,89,204                      ; vmulps        %ymm4,%ymm0,%ymm9
3404  DB  196,65,52,88,233                    ; vaddps        %ymm9,%ymm9,%ymm13
3405  DB  197,100,89,207                      ; vmulps        %ymm7,%ymm3,%ymm9
3406  DB  197,68,92,244                       ; vsubps        %ymm4,%ymm7,%ymm14
3407  DB  197,228,92,192                      ; vsubps        %ymm0,%ymm3,%ymm0
3408  DB  196,193,124,89,198                  ; vmulps        %ymm14,%ymm0,%ymm0
3409  DB  197,252,88,192                      ; vaddps        %ymm0,%ymm0,%ymm0
3410  DB  197,180,92,192                      ; vsubps        %ymm0,%ymm9,%ymm0
3411  DB  196,195,125,74,197,192              ; vblendvps     %ymm12,%ymm13,%ymm0,%ymm0
3412  DB  196,193,124,88,195                  ; vaddps        %ymm11,%ymm0,%ymm0
3413  DB  197,44,89,217                       ; vmulps        %ymm1,%ymm10,%ymm11
3414  DB  197,60,89,229                       ; vmulps        %ymm5,%ymm8,%ymm12
3415  DB  196,65,28,88,219                    ; vaddps        %ymm11,%ymm12,%ymm11
3416  DB  197,84,88,229                       ; vaddps        %ymm5,%ymm5,%ymm12
3417  DB  197,28,194,231,2                    ; vcmpleps      %ymm7,%ymm12,%ymm12
3418  DB  197,116,89,237                      ; vmulps        %ymm5,%ymm1,%ymm13
3419  DB  196,65,20,88,237                    ; vaddps        %ymm13,%ymm13,%ymm13
3420  DB  197,68,92,245                       ; vsubps        %ymm5,%ymm7,%ymm14
3421  DB  197,228,92,201                      ; vsubps        %ymm1,%ymm3,%ymm1
3422  DB  196,193,116,89,206                  ; vmulps        %ymm14,%ymm1,%ymm1
3423  DB  197,244,88,201                      ; vaddps        %ymm1,%ymm1,%ymm1
3424  DB  197,180,92,201                      ; vsubps        %ymm1,%ymm9,%ymm1
3425  DB  196,195,117,74,205,192              ; vblendvps     %ymm12,%ymm13,%ymm1,%ymm1
3426  DB  196,193,116,88,203                  ; vaddps        %ymm11,%ymm1,%ymm1
3427  DB  197,44,89,210                       ; vmulps        %ymm2,%ymm10,%ymm10
3428  DB  197,60,89,222                       ; vmulps        %ymm6,%ymm8,%ymm11
3429  DB  196,65,36,88,210                    ; vaddps        %ymm10,%ymm11,%ymm10
3430  DB  197,76,88,222                       ; vaddps        %ymm6,%ymm6,%ymm11
3431  DB  197,36,194,223,2                    ; vcmpleps      %ymm7,%ymm11,%ymm11
3432  DB  197,108,89,230                      ; vmulps        %ymm6,%ymm2,%ymm12
3433  DB  196,65,28,88,228                    ; vaddps        %ymm12,%ymm12,%ymm12
3434  DB  197,68,92,238                       ; vsubps        %ymm6,%ymm7,%ymm13
3435  DB  197,228,92,210                      ; vsubps        %ymm2,%ymm3,%ymm2
3436  DB  196,193,108,89,213                  ; vmulps        %ymm13,%ymm2,%ymm2
3437  DB  197,236,88,210                      ; vaddps        %ymm2,%ymm2,%ymm2
3438  DB  197,180,92,210                      ; vsubps        %ymm2,%ymm9,%ymm2
3439  DB  196,195,109,74,212,176              ; vblendvps     %ymm11,%ymm12,%ymm2,%ymm2
3440  DB  196,193,108,88,210                  ; vaddps        %ymm10,%ymm2,%ymm2
3441  DB  197,60,89,199                       ; vmulps        %ymm7,%ymm8,%ymm8
3442  DB  197,188,88,219                      ; vaddps        %ymm3,%ymm8,%ymm3
3443  DB  72,173                              ; lods          %ds:(%rsi),%rax
3444  DB  255,224                             ; jmpq          *%rax
3445
3446PUBLIC _sk_softlight_avx
3447_sk_softlight_avx LABEL PROC
3448  DB  72,131,236,56                       ; sub           $0x38,%rsp
3449  DB  197,252,17,20,36                    ; vmovups       %ymm2,(%rsp)
3450  DB  197,252,40,209                      ; vmovaps       %ymm1,%ymm2
3451  DB  196,65,52,87,201                    ; vxorps        %ymm9,%ymm9,%ymm9
3452  DB  197,52,194,215,1                    ; vcmpltps      %ymm7,%ymm9,%ymm10
3453  DB  197,92,94,199                       ; vdivps        %ymm7,%ymm4,%ymm8
3454  DB  196,67,53,74,216,160                ; vblendvps     %ymm10,%ymm8,%ymm9,%ymm11
3455  DB  196,65,36,88,195                    ; vaddps        %ymm11,%ymm11,%ymm8
3456  DB  196,65,60,88,224                    ; vaddps        %ymm8,%ymm8,%ymm12
3457  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
3458  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
3459  DB  196,67,121,4,192,0                  ; vpermilps     $0x0,%xmm8,%xmm8
3460  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
3461  DB  196,65,28,89,236                    ; vmulps        %ymm12,%ymm12,%ymm13
3462  DB  196,65,28,88,229                    ; vaddps        %ymm13,%ymm12,%ymm12
3463  DB  196,65,36,92,232                    ; vsubps        %ymm8,%ymm11,%ymm13
3464  DB  196,65,28,89,237                    ; vmulps        %ymm13,%ymm12,%ymm13
3465  DB  184,0,0,224,64                      ; mov           $0x40e00000,%eax
3466  DB  197,121,110,224                     ; vmovd         %eax,%xmm12
3467  DB  196,67,121,4,228,0                  ; vpermilps     $0x0,%xmm12,%xmm12
3468  DB  196,67,29,24,228,1                  ; vinsertf128   $0x1,%xmm12,%ymm12,%ymm12
3469  DB  196,65,36,89,244                    ; vmulps        %ymm12,%ymm11,%ymm14
3470  DB  196,65,20,88,238                    ; vaddps        %ymm14,%ymm13,%ymm13
3471  DB  196,65,124,82,243                   ; vrsqrtps      %ymm11,%ymm14
3472  DB  196,65,124,83,246                   ; vrcpps        %ymm14,%ymm14
3473  DB  196,65,12,92,243                    ; vsubps        %ymm11,%ymm14,%ymm14
3474  DB  197,92,88,252                       ; vaddps        %ymm4,%ymm4,%ymm15
3475  DB  196,65,4,88,255                     ; vaddps        %ymm15,%ymm15,%ymm15
3476  DB  197,4,194,255,2                     ; vcmpleps      %ymm7,%ymm15,%ymm15
3477  DB  196,67,13,74,237,240                ; vblendvps     %ymm15,%ymm13,%ymm14,%ymm13
3478  DB  197,124,88,240                      ; vaddps        %ymm0,%ymm0,%ymm14
3479  DB  197,12,92,251                       ; vsubps        %ymm3,%ymm14,%ymm15
3480  DB  196,65,60,92,219                    ; vsubps        %ymm11,%ymm8,%ymm11
3481  DB  196,65,4,89,219                     ; vmulps        %ymm11,%ymm15,%ymm11
3482  DB  197,36,88,219                       ; vaddps        %ymm3,%ymm11,%ymm11
3483  DB  197,36,89,220                       ; vmulps        %ymm4,%ymm11,%ymm11
3484  DB  197,4,89,255                        ; vmulps        %ymm7,%ymm15,%ymm15
3485  DB  196,65,4,89,237                     ; vmulps        %ymm13,%ymm15,%ymm13
3486  DB  197,100,89,252                      ; vmulps        %ymm4,%ymm3,%ymm15
3487  DB  196,65,4,88,237                     ; vaddps        %ymm13,%ymm15,%ymm13
3488  DB  197,12,194,243,2                    ; vcmpleps      %ymm3,%ymm14,%ymm14
3489  DB  196,195,21,74,203,224               ; vblendvps     %ymm14,%ymm11,%ymm13,%ymm1
3490  DB  197,84,94,239                       ; vdivps        %ymm7,%ymm5,%ymm13
3491  DB  196,67,53,74,237,160                ; vblendvps     %ymm10,%ymm13,%ymm9,%ymm13
3492  DB  196,65,20,88,245                    ; vaddps        %ymm13,%ymm13,%ymm14
3493  DB  196,65,12,88,246                    ; vaddps        %ymm14,%ymm14,%ymm14
3494  DB  196,65,12,89,254                    ; vmulps        %ymm14,%ymm14,%ymm15
3495  DB  196,65,12,88,247                    ; vaddps        %ymm15,%ymm14,%ymm14
3496  DB  196,65,20,92,248                    ; vsubps        %ymm8,%ymm13,%ymm15
3497  DB  196,65,4,89,246                     ; vmulps        %ymm14,%ymm15,%ymm14
3498  DB  196,65,28,89,253                    ; vmulps        %ymm13,%ymm12,%ymm15
3499  DB  196,65,4,88,246                     ; vaddps        %ymm14,%ymm15,%ymm14
3500  DB  196,65,124,82,253                   ; vrsqrtps      %ymm13,%ymm15
3501  DB  196,65,124,83,255                   ; vrcpps        %ymm15,%ymm15
3502  DB  196,65,4,92,253                     ; vsubps        %ymm13,%ymm15,%ymm15
3503  DB  197,84,88,221                       ; vaddps        %ymm5,%ymm5,%ymm11
3504  DB  196,65,36,88,219                    ; vaddps        %ymm11,%ymm11,%ymm11
3505  DB  197,36,194,223,2                    ; vcmpleps      %ymm7,%ymm11,%ymm11
3506  DB  196,67,5,74,222,176                 ; vblendvps     %ymm11,%ymm14,%ymm15,%ymm11
3507  DB  197,108,88,242                      ; vaddps        %ymm2,%ymm2,%ymm14
3508  DB  196,65,60,92,237                    ; vsubps        %ymm13,%ymm8,%ymm13
3509  DB  197,12,92,251                       ; vsubps        %ymm3,%ymm14,%ymm15
3510  DB  196,65,4,89,237                     ; vmulps        %ymm13,%ymm15,%ymm13
3511  DB  197,4,89,255                        ; vmulps        %ymm7,%ymm15,%ymm15
3512  DB  196,65,4,89,219                     ; vmulps        %ymm11,%ymm15,%ymm11
3513  DB  197,100,89,253                      ; vmulps        %ymm5,%ymm3,%ymm15
3514  DB  196,65,4,88,219                     ; vaddps        %ymm11,%ymm15,%ymm11
3515  DB  197,20,88,235                       ; vaddps        %ymm3,%ymm13,%ymm13
3516  DB  197,20,89,237                       ; vmulps        %ymm5,%ymm13,%ymm13
3517  DB  197,12,194,243,2                    ; vcmpleps      %ymm3,%ymm14,%ymm14
3518  DB  196,67,37,74,237,224                ; vblendvps     %ymm14,%ymm13,%ymm11,%ymm13
3519  DB  197,76,94,223                       ; vdivps        %ymm7,%ymm6,%ymm11
3520  DB  196,67,53,74,203,160                ; vblendvps     %ymm10,%ymm11,%ymm9,%ymm9
3521  DB  196,65,52,88,209                    ; vaddps        %ymm9,%ymm9,%ymm10
3522  DB  196,65,44,88,210                    ; vaddps        %ymm10,%ymm10,%ymm10
3523  DB  196,65,44,89,218                    ; vmulps        %ymm10,%ymm10,%ymm11
3524  DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
3525  DB  196,65,52,92,216                    ; vsubps        %ymm8,%ymm9,%ymm11
3526  DB  196,65,36,89,210                    ; vmulps        %ymm10,%ymm11,%ymm10
3527  DB  196,65,28,89,217                    ; vmulps        %ymm9,%ymm12,%ymm11
3528  DB  196,65,36,88,210                    ; vaddps        %ymm10,%ymm11,%ymm10
3529  DB  196,65,124,82,217                   ; vrsqrtps      %ymm9,%ymm11
3530  DB  196,65,124,83,219                   ; vrcpps        %ymm11,%ymm11
3531  DB  196,65,36,92,217                    ; vsubps        %ymm9,%ymm11,%ymm11
3532  DB  197,76,88,230                       ; vaddps        %ymm6,%ymm6,%ymm12
3533  DB  196,65,28,88,228                    ; vaddps        %ymm12,%ymm12,%ymm12
3534  DB  197,28,194,231,2                    ; vcmpleps      %ymm7,%ymm12,%ymm12
3535  DB  196,67,37,74,210,192                ; vblendvps     %ymm12,%ymm10,%ymm11,%ymm10
3536  DB  197,124,16,52,36                    ; vmovups       (%rsp),%ymm14
3537  DB  196,65,12,88,222                    ; vaddps        %ymm14,%ymm14,%ymm11
3538  DB  197,36,92,227                       ; vsubps        %ymm3,%ymm11,%ymm12
3539  DB  196,65,60,92,201                    ; vsubps        %ymm9,%ymm8,%ymm9
3540  DB  196,65,28,89,201                    ; vmulps        %ymm9,%ymm12,%ymm9
3541  DB  197,28,89,231                       ; vmulps        %ymm7,%ymm12,%ymm12
3542  DB  196,65,28,89,210                    ; vmulps        %ymm10,%ymm12,%ymm10
3543  DB  197,100,89,230                      ; vmulps        %ymm6,%ymm3,%ymm12
3544  DB  196,65,28,88,210                    ; vaddps        %ymm10,%ymm12,%ymm10
3545  DB  197,52,88,203                       ; vaddps        %ymm3,%ymm9,%ymm9
3546  DB  197,52,89,206                       ; vmulps        %ymm6,%ymm9,%ymm9
3547  DB  197,36,194,219,2                    ; vcmpleps      %ymm3,%ymm11,%ymm11
3548  DB  196,67,45,74,201,176                ; vblendvps     %ymm11,%ymm9,%ymm10,%ymm9
3549  DB  197,60,92,215                       ; vsubps        %ymm7,%ymm8,%ymm10
3550  DB  197,172,89,192                      ; vmulps        %ymm0,%ymm10,%ymm0
3551  DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
3552  DB  197,60,89,220                       ; vmulps        %ymm4,%ymm8,%ymm11
3553  DB  196,193,124,88,195                  ; vaddps        %ymm11,%ymm0,%ymm0
3554  DB  197,244,88,192                      ; vaddps        %ymm0,%ymm1,%ymm0
3555  DB  197,172,89,202                      ; vmulps        %ymm2,%ymm10,%ymm1
3556  DB  197,188,89,213                      ; vmulps        %ymm5,%ymm8,%ymm2
3557  DB  197,236,88,201                      ; vaddps        %ymm1,%ymm2,%ymm1
3558  DB  196,193,116,88,205                  ; vaddps        %ymm13,%ymm1,%ymm1
3559  DB  196,193,44,89,214                   ; vmulps        %ymm14,%ymm10,%ymm2
3560  DB  197,60,89,214                       ; vmulps        %ymm6,%ymm8,%ymm10
3561  DB  197,172,88,210                      ; vaddps        %ymm2,%ymm10,%ymm2
3562  DB  196,193,108,88,209                  ; vaddps        %ymm9,%ymm2,%ymm2
3563  DB  197,60,89,199                       ; vmulps        %ymm7,%ymm8,%ymm8
3564  DB  197,188,88,219                      ; vaddps        %ymm3,%ymm8,%ymm3
3565  DB  72,173                              ; lods          %ds:(%rsi),%rax
3566  DB  72,131,196,56                       ; add           $0x38,%rsp
3567  DB  255,224                             ; jmpq          *%rax
3568
3569PUBLIC _sk_clamp_0_avx
3570_sk_clamp_0_avx LABEL PROC
3571  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
3572  DB  196,193,124,95,192                  ; vmaxps        %ymm8,%ymm0,%ymm0
3573  DB  196,193,116,95,200                  ; vmaxps        %ymm8,%ymm1,%ymm1
3574  DB  196,193,108,95,208                  ; vmaxps        %ymm8,%ymm2,%ymm2
3575  DB  196,193,100,95,216                  ; vmaxps        %ymm8,%ymm3,%ymm3
3576  DB  72,173                              ; lods          %ds:(%rsi),%rax
3577  DB  255,224                             ; jmpq          *%rax
3578
3579PUBLIC _sk_clamp_1_avx
3580_sk_clamp_1_avx LABEL PROC
3581  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
3582  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
3583  DB  196,67,121,4,192,0                  ; vpermilps     $0x0,%xmm8,%xmm8
3584  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
3585  DB  196,193,124,93,192                  ; vminps        %ymm8,%ymm0,%ymm0
3586  DB  196,193,116,93,200                  ; vminps        %ymm8,%ymm1,%ymm1
3587  DB  196,193,108,93,208                  ; vminps        %ymm8,%ymm2,%ymm2
3588  DB  196,193,100,93,216                  ; vminps        %ymm8,%ymm3,%ymm3
3589  DB  72,173                              ; lods          %ds:(%rsi),%rax
3590  DB  255,224                             ; jmpq          *%rax
3591
3592PUBLIC _sk_clamp_a_avx
3593_sk_clamp_a_avx LABEL PROC
3594  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
3595  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
3596  DB  196,67,121,4,192,0                  ; vpermilps     $0x0,%xmm8,%xmm8
3597  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
3598  DB  196,193,100,93,216                  ; vminps        %ymm8,%ymm3,%ymm3
3599  DB  197,252,93,195                      ; vminps        %ymm3,%ymm0,%ymm0
3600  DB  197,244,93,203                      ; vminps        %ymm3,%ymm1,%ymm1
3601  DB  197,236,93,211                      ; vminps        %ymm3,%ymm2,%ymm2
3602  DB  72,173                              ; lods          %ds:(%rsi),%rax
3603  DB  255,224                             ; jmpq          *%rax
3604
3605PUBLIC _sk_set_rgb_avx
3606_sk_set_rgb_avx LABEL PROC
3607  DB  72,173                              ; lods          %ds:(%rsi),%rax
3608  DB  196,226,125,24,0                    ; vbroadcastss  (%rax),%ymm0
3609  DB  196,226,125,24,72,4                 ; vbroadcastss  0x4(%rax),%ymm1
3610  DB  196,226,125,24,80,8                 ; vbroadcastss  0x8(%rax),%ymm2
3611  DB  72,173                              ; lods          %ds:(%rsi),%rax
3612  DB  255,224                             ; jmpq          *%rax
3613
3614PUBLIC _sk_swap_rb_avx
3615_sk_swap_rb_avx LABEL PROC
3616  DB  197,124,40,192                      ; vmovaps       %ymm0,%ymm8
3617  DB  72,173                              ; lods          %ds:(%rsi),%rax
3618  DB  197,252,40,194                      ; vmovaps       %ymm2,%ymm0
3619  DB  197,124,41,194                      ; vmovaps       %ymm8,%ymm2
3620  DB  255,224                             ; jmpq          *%rax
3621
3622PUBLIC _sk_swap_avx
3623_sk_swap_avx LABEL PROC
3624  DB  197,124,40,195                      ; vmovaps       %ymm3,%ymm8
3625  DB  197,124,40,202                      ; vmovaps       %ymm2,%ymm9
3626  DB  197,124,40,209                      ; vmovaps       %ymm1,%ymm10
3627  DB  197,124,40,216                      ; vmovaps       %ymm0,%ymm11
3628  DB  72,173                              ; lods          %ds:(%rsi),%rax
3629  DB  197,252,40,196                      ; vmovaps       %ymm4,%ymm0
3630  DB  197,252,40,205                      ; vmovaps       %ymm5,%ymm1
3631  DB  197,252,40,214                      ; vmovaps       %ymm6,%ymm2
3632  DB  197,252,40,223                      ; vmovaps       %ymm7,%ymm3
3633  DB  197,124,41,220                      ; vmovaps       %ymm11,%ymm4
3634  DB  197,124,41,213                      ; vmovaps       %ymm10,%ymm5
3635  DB  197,124,41,206                      ; vmovaps       %ymm9,%ymm6
3636  DB  197,124,41,199                      ; vmovaps       %ymm8,%ymm7
3637  DB  255,224                             ; jmpq          *%rax
3638
3639PUBLIC _sk_move_src_dst_avx
3640_sk_move_src_dst_avx LABEL PROC
3641  DB  72,173                              ; lods          %ds:(%rsi),%rax
3642  DB  197,252,40,224                      ; vmovaps       %ymm0,%ymm4
3643  DB  197,252,40,233                      ; vmovaps       %ymm1,%ymm5
3644  DB  197,252,40,242                      ; vmovaps       %ymm2,%ymm6
3645  DB  197,252,40,251                      ; vmovaps       %ymm3,%ymm7
3646  DB  255,224                             ; jmpq          *%rax
3647
3648PUBLIC _sk_move_dst_src_avx
3649_sk_move_dst_src_avx LABEL PROC
3650  DB  72,173                              ; lods          %ds:(%rsi),%rax
3651  DB  197,252,40,196                      ; vmovaps       %ymm4,%ymm0
3652  DB  197,252,40,205                      ; vmovaps       %ymm5,%ymm1
3653  DB  197,252,40,214                      ; vmovaps       %ymm6,%ymm2
3654  DB  197,252,40,223                      ; vmovaps       %ymm7,%ymm3
3655  DB  255,224                             ; jmpq          *%rax
3656
3657PUBLIC _sk_premul_avx
3658_sk_premul_avx LABEL PROC
3659  DB  197,252,89,195                      ; vmulps        %ymm3,%ymm0,%ymm0
3660  DB  197,244,89,203                      ; vmulps        %ymm3,%ymm1,%ymm1
3661  DB  197,236,89,211                      ; vmulps        %ymm3,%ymm2,%ymm2
3662  DB  72,173                              ; lods          %ds:(%rsi),%rax
3663  DB  255,224                             ; jmpq          *%rax
3664
3665PUBLIC _sk_unpremul_avx
3666_sk_unpremul_avx LABEL PROC
3667  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
3668  DB  196,65,100,194,200,0                ; vcmpeqps      %ymm8,%ymm3,%ymm9
3669  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
3670  DB  197,121,110,208                     ; vmovd         %eax,%xmm10
3671  DB  196,67,121,4,210,0                  ; vpermilps     $0x0,%xmm10,%xmm10
3672  DB  196,67,45,24,210,1                  ; vinsertf128   $0x1,%xmm10,%ymm10,%ymm10
3673  DB  197,44,94,211                       ; vdivps        %ymm3,%ymm10,%ymm10
3674  DB  196,67,45,74,192,144                ; vblendvps     %ymm9,%ymm8,%ymm10,%ymm8
3675  DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
3676  DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
3677  DB  197,188,89,210                      ; vmulps        %ymm2,%ymm8,%ymm2
3678  DB  72,173                              ; lods          %ds:(%rsi),%rax
3679  DB  255,224                             ; jmpq          *%rax
3680
3681PUBLIC _sk_from_srgb_avx
3682_sk_from_srgb_avx LABEL PROC
3683  DB  184,145,131,158,61                  ; mov           $0x3d9e8391,%eax
3684  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
3685  DB  196,67,121,4,192,0                  ; vpermilps     $0x0,%xmm8,%xmm8
3686  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
3687  DB  197,60,89,200                       ; vmulps        %ymm0,%ymm8,%ymm9
3688  DB  197,124,89,208                      ; vmulps        %ymm0,%ymm0,%ymm10
3689  DB  184,154,153,153,62                  ; mov           $0x3e99999a,%eax
3690  DB  197,121,110,216                     ; vmovd         %eax,%xmm11
3691  DB  196,67,121,4,219,0                  ; vpermilps     $0x0,%xmm11,%xmm11
3692  DB  196,67,37,24,219,1                  ; vinsertf128   $0x1,%xmm11,%ymm11,%ymm11
3693  DB  184,92,143,50,63                    ; mov           $0x3f328f5c,%eax
3694  DB  197,121,110,224                     ; vmovd         %eax,%xmm12
3695  DB  196,67,121,4,228,0                  ; vpermilps     $0x0,%xmm12,%xmm12
3696  DB  196,67,29,24,228,1                  ; vinsertf128   $0x1,%xmm12,%ymm12,%ymm12
3697  DB  197,36,89,232                       ; vmulps        %ymm0,%ymm11,%ymm13
3698  DB  196,65,20,88,236                    ; vaddps        %ymm12,%ymm13,%ymm13
3699  DB  184,10,215,35,59                    ; mov           $0x3b23d70a,%eax
3700  DB  197,121,110,240                     ; vmovd         %eax,%xmm14
3701  DB  196,67,121,4,246,0                  ; vpermilps     $0x0,%xmm14,%xmm14
3702  DB  196,67,13,24,246,1                  ; vinsertf128   $0x1,%xmm14,%ymm14,%ymm14
3703  DB  196,65,44,89,213                    ; vmulps        %ymm13,%ymm10,%ymm10
3704  DB  196,65,12,88,210                    ; vaddps        %ymm10,%ymm14,%ymm10
3705  DB  184,174,71,97,61                    ; mov           $0x3d6147ae,%eax
3706  DB  197,121,110,232                     ; vmovd         %eax,%xmm13
3707  DB  196,67,121,4,237,0                  ; vpermilps     $0x0,%xmm13,%xmm13
3708  DB  196,67,21,24,237,1                  ; vinsertf128   $0x1,%xmm13,%ymm13,%ymm13
3709  DB  196,193,124,194,197,1               ; vcmpltps      %ymm13,%ymm0,%ymm0
3710  DB  196,195,45,74,193,0                 ; vblendvps     %ymm0,%ymm9,%ymm10,%ymm0
3711  DB  197,60,89,201                       ; vmulps        %ymm1,%ymm8,%ymm9
3712  DB  197,116,89,209                      ; vmulps        %ymm1,%ymm1,%ymm10
3713  DB  197,36,89,249                       ; vmulps        %ymm1,%ymm11,%ymm15
3714  DB  196,65,28,88,255                    ; vaddps        %ymm15,%ymm12,%ymm15
3715  DB  196,65,44,89,215                    ; vmulps        %ymm15,%ymm10,%ymm10
3716  DB  196,65,12,88,210                    ; vaddps        %ymm10,%ymm14,%ymm10
3717  DB  196,193,116,194,205,1               ; vcmpltps      %ymm13,%ymm1,%ymm1
3718  DB  196,195,45,74,201,16                ; vblendvps     %ymm1,%ymm9,%ymm10,%ymm1
3719  DB  197,60,89,194                       ; vmulps        %ymm2,%ymm8,%ymm8
3720  DB  197,108,89,202                      ; vmulps        %ymm2,%ymm2,%ymm9
3721  DB  197,36,89,210                       ; vmulps        %ymm2,%ymm11,%ymm10
3722  DB  196,65,28,88,210                    ; vaddps        %ymm10,%ymm12,%ymm10
3723  DB  196,65,52,89,202                    ; vmulps        %ymm10,%ymm9,%ymm9
3724  DB  196,65,12,88,201                    ; vaddps        %ymm9,%ymm14,%ymm9
3725  DB  196,193,108,194,213,1               ; vcmpltps      %ymm13,%ymm2,%ymm2
3726  DB  196,195,53,74,208,32                ; vblendvps     %ymm2,%ymm8,%ymm9,%ymm2
3727  DB  72,173                              ; lods          %ds:(%rsi),%rax
3728  DB  255,224                             ; jmpq          *%rax
3729
3730PUBLIC _sk_to_srgb_avx
3731_sk_to_srgb_avx LABEL PROC
3732  DB  197,124,82,192                      ; vrsqrtps      %ymm0,%ymm8
3733  DB  196,65,124,83,232                   ; vrcpps        %ymm8,%ymm13
3734  DB  196,65,124,82,240                   ; vrsqrtps      %ymm8,%ymm14
3735  DB  184,41,92,71,65                     ; mov           $0x41475c29,%eax
3736  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
3737  DB  196,67,121,4,192,0                  ; vpermilps     $0x0,%xmm8,%xmm8
3738  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
3739  DB  197,60,89,224                       ; vmulps        %ymm0,%ymm8,%ymm12
3740  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
3741  DB  197,121,110,200                     ; vmovd         %eax,%xmm9
3742  DB  196,67,121,4,201,0                  ; vpermilps     $0x0,%xmm9,%xmm9
3743  DB  196,67,53,24,201,1                  ; vinsertf128   $0x1,%xmm9,%ymm9,%ymm9
3744  DB  184,194,135,210,62                  ; mov           $0x3ed287c2,%eax
3745  DB  197,121,110,208                     ; vmovd         %eax,%xmm10
3746  DB  196,67,121,4,210,0                  ; vpermilps     $0x0,%xmm10,%xmm10
3747  DB  196,67,45,24,210,1                  ; vinsertf128   $0x1,%xmm10,%ymm10,%ymm10
3748  DB  184,206,111,48,63                   ; mov           $0x3f306fce,%eax
3749  DB  197,121,110,216                     ; vmovd         %eax,%xmm11
3750  DB  196,67,121,4,219,0                  ; vpermilps     $0x0,%xmm11,%xmm11
3751  DB  196,67,37,24,219,1                  ; vinsertf128   $0x1,%xmm11,%ymm11,%ymm11
3752  DB  184,168,87,202,61                   ; mov           $0x3dca57a8,%eax
3753  DB  53,0,0,0,128                        ; xor           $0x80000000,%eax
3754  DB  197,121,110,248                     ; vmovd         %eax,%xmm15
3755  DB  196,67,121,4,255,0                  ; vpermilps     $0x0,%xmm15,%xmm15
3756  DB  196,67,5,24,255,1                   ; vinsertf128   $0x1,%xmm15,%ymm15,%ymm15
3757  DB  196,65,20,89,235                    ; vmulps        %ymm11,%ymm13,%ymm13
3758  DB  196,65,20,88,239                    ; vaddps        %ymm15,%ymm13,%ymm13
3759  DB  196,65,12,89,242                    ; vmulps        %ymm10,%ymm14,%ymm14
3760  DB  196,65,12,88,237                    ; vaddps        %ymm13,%ymm14,%ymm13
3761  DB  196,65,52,93,237                    ; vminps        %ymm13,%ymm9,%ymm13
3762  DB  184,4,231,140,59                    ; mov           $0x3b8ce704,%eax
3763  DB  197,121,110,240                     ; vmovd         %eax,%xmm14
3764  DB  196,67,121,4,246,0                  ; vpermilps     $0x0,%xmm14,%xmm14
3765  DB  196,67,13,24,246,1                  ; vinsertf128   $0x1,%xmm14,%ymm14,%ymm14
3766  DB  196,193,124,194,198,1               ; vcmpltps      %ymm14,%ymm0,%ymm0
3767  DB  196,195,21,74,196,0                 ; vblendvps     %ymm0,%ymm12,%ymm13,%ymm0
3768  DB  197,124,82,225                      ; vrsqrtps      %ymm1,%ymm12
3769  DB  196,65,124,83,236                   ; vrcpps        %ymm12,%ymm13
3770  DB  196,65,124,82,228                   ; vrsqrtps      %ymm12,%ymm12
3771  DB  196,65,36,89,237                    ; vmulps        %ymm13,%ymm11,%ymm13
3772  DB  196,65,4,88,237                     ; vaddps        %ymm13,%ymm15,%ymm13
3773  DB  196,65,44,89,228                    ; vmulps        %ymm12,%ymm10,%ymm12
3774  DB  196,65,28,88,229                    ; vaddps        %ymm13,%ymm12,%ymm12
3775  DB  197,60,89,233                       ; vmulps        %ymm1,%ymm8,%ymm13
3776  DB  196,65,52,93,228                    ; vminps        %ymm12,%ymm9,%ymm12
3777  DB  196,193,116,194,206,1               ; vcmpltps      %ymm14,%ymm1,%ymm1
3778  DB  196,195,29,74,205,16                ; vblendvps     %ymm1,%ymm13,%ymm12,%ymm1
3779  DB  197,124,82,226                      ; vrsqrtps      %ymm2,%ymm12
3780  DB  196,65,124,83,236                   ; vrcpps        %ymm12,%ymm13
3781  DB  196,65,36,89,221                    ; vmulps        %ymm13,%ymm11,%ymm11
3782  DB  196,65,4,88,219                     ; vaddps        %ymm11,%ymm15,%ymm11
3783  DB  196,65,124,82,228                   ; vrsqrtps      %ymm12,%ymm12
3784  DB  196,65,44,89,212                    ; vmulps        %ymm12,%ymm10,%ymm10
3785  DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
3786  DB  196,65,52,93,202                    ; vminps        %ymm10,%ymm9,%ymm9
3787  DB  197,60,89,194                       ; vmulps        %ymm2,%ymm8,%ymm8
3788  DB  196,193,108,194,214,1               ; vcmpltps      %ymm14,%ymm2,%ymm2
3789  DB  196,195,53,74,208,32                ; vblendvps     %ymm2,%ymm8,%ymm9,%ymm2
3790  DB  72,173                              ; lods          %ds:(%rsi),%rax
3791  DB  255,224                             ; jmpq          *%rax
3792
3793PUBLIC _sk_from_2dot2_avx
3794_sk_from_2dot2_avx LABEL PROC
3795  DB  197,124,82,192                      ; vrsqrtps      %ymm0,%ymm8
3796  DB  196,65,124,82,192                   ; vrsqrtps      %ymm8,%ymm8
3797  DB  196,65,124,82,192                   ; vrsqrtps      %ymm8,%ymm8
3798  DB  196,65,124,82,192                   ; vrsqrtps      %ymm8,%ymm8
3799  DB  196,65,124,82,200                   ; vrsqrtps      %ymm8,%ymm9
3800  DB  196,65,124,82,201                   ; vrsqrtps      %ymm9,%ymm9
3801  DB  197,252,89,192                      ; vmulps        %ymm0,%ymm0,%ymm0
3802  DB  196,65,60,89,208                    ; vmulps        %ymm8,%ymm8,%ymm10
3803  DB  196,65,60,89,194                    ; vmulps        %ymm10,%ymm8,%ymm8
3804  DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
3805  DB  197,180,89,192                      ; vmulps        %ymm0,%ymm9,%ymm0
3806  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
3807  DB  196,193,124,95,192                  ; vmaxps        %ymm8,%ymm0,%ymm0
3808  DB  197,124,82,201                      ; vrsqrtps      %ymm1,%ymm9
3809  DB  196,65,124,82,201                   ; vrsqrtps      %ymm9,%ymm9
3810  DB  196,65,124,82,201                   ; vrsqrtps      %ymm9,%ymm9
3811  DB  196,65,124,82,201                   ; vrsqrtps      %ymm9,%ymm9
3812  DB  196,65,124,82,209                   ; vrsqrtps      %ymm9,%ymm10
3813  DB  196,65,124,82,210                   ; vrsqrtps      %ymm10,%ymm10
3814  DB  197,244,89,201                      ; vmulps        %ymm1,%ymm1,%ymm1
3815  DB  196,65,52,89,217                    ; vmulps        %ymm9,%ymm9,%ymm11
3816  DB  196,65,52,89,203                    ; vmulps        %ymm11,%ymm9,%ymm9
3817  DB  196,193,116,89,201                  ; vmulps        %ymm9,%ymm1,%ymm1
3818  DB  197,172,89,201                      ; vmulps        %ymm1,%ymm10,%ymm1
3819  DB  196,193,116,95,200                  ; vmaxps        %ymm8,%ymm1,%ymm1
3820  DB  197,124,82,202                      ; vrsqrtps      %ymm2,%ymm9
3821  DB  196,65,124,82,201                   ; vrsqrtps      %ymm9,%ymm9
3822  DB  196,65,124,82,201                   ; vrsqrtps      %ymm9,%ymm9
3823  DB  196,65,124,82,201                   ; vrsqrtps      %ymm9,%ymm9
3824  DB  196,65,124,82,209                   ; vrsqrtps      %ymm9,%ymm10
3825  DB  196,65,124,82,210                   ; vrsqrtps      %ymm10,%ymm10
3826  DB  197,236,89,210                      ; vmulps        %ymm2,%ymm2,%ymm2
3827  DB  196,65,52,89,217                    ; vmulps        %ymm9,%ymm9,%ymm11
3828  DB  196,65,52,89,203                    ; vmulps        %ymm11,%ymm9,%ymm9
3829  DB  196,193,108,89,209                  ; vmulps        %ymm9,%ymm2,%ymm2
3830  DB  197,172,89,210                      ; vmulps        %ymm2,%ymm10,%ymm2
3831  DB  196,193,108,95,208                  ; vmaxps        %ymm8,%ymm2,%ymm2
3832  DB  72,173                              ; lods          %ds:(%rsi),%rax
3833  DB  255,224                             ; jmpq          *%rax
3834
3835PUBLIC _sk_to_2dot2_avx
3836_sk_to_2dot2_avx LABEL PROC
3837  DB  197,252,82,192                      ; vrsqrtps      %ymm0,%ymm0
3838  DB  197,124,82,192                      ; vrsqrtps      %ymm0,%ymm8
3839  DB  196,65,124,82,192                   ; vrsqrtps      %ymm8,%ymm8
3840  DB  196,65,124,82,192                   ; vrsqrtps      %ymm8,%ymm8
3841  DB  196,65,124,82,192                   ; vrsqrtps      %ymm8,%ymm8
3842  DB  196,65,124,82,200                   ; vrsqrtps      %ymm8,%ymm9
3843  DB  197,252,83,192                      ; vrcpps        %ymm0,%ymm0
3844  DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
3845  DB  196,65,124,83,193                   ; vrcpps        %ymm9,%ymm8
3846  DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
3847  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
3848  DB  196,193,124,95,192                  ; vmaxps        %ymm8,%ymm0,%ymm0
3849  DB  197,252,82,201                      ; vrsqrtps      %ymm1,%ymm1
3850  DB  197,124,82,201                      ; vrsqrtps      %ymm1,%ymm9
3851  DB  196,65,124,82,201                   ; vrsqrtps      %ymm9,%ymm9
3852  DB  196,65,124,82,201                   ; vrsqrtps      %ymm9,%ymm9
3853  DB  196,65,124,82,201                   ; vrsqrtps      %ymm9,%ymm9
3854  DB  196,65,124,82,209                   ; vrsqrtps      %ymm9,%ymm10
3855  DB  197,252,83,201                      ; vrcpps        %ymm1,%ymm1
3856  DB  197,180,89,201                      ; vmulps        %ymm1,%ymm9,%ymm1
3857  DB  196,65,124,83,202                   ; vrcpps        %ymm10,%ymm9
3858  DB  196,193,116,89,201                  ; vmulps        %ymm9,%ymm1,%ymm1
3859  DB  196,193,116,95,200                  ; vmaxps        %ymm8,%ymm1,%ymm1
3860  DB  197,252,82,210                      ; vrsqrtps      %ymm2,%ymm2
3861  DB  197,124,82,202                      ; vrsqrtps      %ymm2,%ymm9
3862  DB  196,65,124,82,201                   ; vrsqrtps      %ymm9,%ymm9
3863  DB  196,65,124,82,201                   ; vrsqrtps      %ymm9,%ymm9
3864  DB  196,65,124,82,201                   ; vrsqrtps      %ymm9,%ymm9
3865  DB  196,65,124,82,209                   ; vrsqrtps      %ymm9,%ymm10
3866  DB  197,252,83,210                      ; vrcpps        %ymm2,%ymm2
3867  DB  197,180,89,210                      ; vmulps        %ymm2,%ymm9,%ymm2
3868  DB  196,65,124,83,202                   ; vrcpps        %ymm10,%ymm9
3869  DB  196,193,108,89,209                  ; vmulps        %ymm9,%ymm2,%ymm2
3870  DB  196,193,108,95,208                  ; vmaxps        %ymm8,%ymm2,%ymm2
3871  DB  72,173                              ; lods          %ds:(%rsi),%rax
3872  DB  255,224                             ; jmpq          *%rax
3873
3874PUBLIC _sk_scale_1_float_avx
3875_sk_scale_1_float_avx LABEL PROC
3876  DB  72,173                              ; lods          %ds:(%rsi),%rax
3877  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
3878  DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
3879  DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
3880  DB  197,188,89,210                      ; vmulps        %ymm2,%ymm8,%ymm2
3881  DB  197,188,89,219                      ; vmulps        %ymm3,%ymm8,%ymm3
3882  DB  72,173                              ; lods          %ds:(%rsi),%rax
3883  DB  255,224                             ; jmpq          *%rax
3884
3885PUBLIC _sk_scale_u8_avx
3886_sk_scale_u8_avx LABEL PROC
3887  DB  73,137,200                          ; mov           %rcx,%r8
3888  DB  72,173                              ; lods          %ds:(%rsi),%rax
3889  DB  72,139,0                            ; mov           (%rax),%rax
3890  DB  72,1,248                            ; add           %rdi,%rax
3891  DB  77,133,192                          ; test          %r8,%r8
3892  DB  117,80                              ; jne           116b <_sk_scale_u8_avx+0x60>
3893  DB  197,122,126,0                       ; vmovq         (%rax),%xmm8
3894  DB  196,66,121,49,200                   ; vpmovzxbd     %xmm8,%xmm9
3895  DB  196,67,121,4,192,229                ; vpermilps     $0xe5,%xmm8,%xmm8
3896  DB  196,66,121,49,192                   ; vpmovzxbd     %xmm8,%xmm8
3897  DB  196,67,53,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm9,%ymm8
3898  DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
3899  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
3900  DB  197,121,110,200                     ; vmovd         %eax,%xmm9
3901  DB  196,67,121,4,201,0                  ; vpermilps     $0x0,%xmm9,%xmm9
3902  DB  196,67,53,24,201,1                  ; vinsertf128   $0x1,%xmm9,%ymm9,%ymm9
3903  DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
3904  DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
3905  DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
3906  DB  197,188,89,210                      ; vmulps        %ymm2,%ymm8,%ymm2
3907  DB  197,188,89,219                      ; vmulps        %ymm3,%ymm8,%ymm3
3908  DB  72,173                              ; lods          %ds:(%rsi),%rax
3909  DB  76,137,193                          ; mov           %r8,%rcx
3910  DB  255,224                             ; jmpq          *%rax
3911  DB  49,201                              ; xor           %ecx,%ecx
3912  DB  77,137,194                          ; mov           %r8,%r10
3913  DB  69,49,201                           ; xor           %r9d,%r9d
3914  DB  68,15,182,24                        ; movzbl        (%rax),%r11d
3915  DB  72,255,192                          ; inc           %rax
3916  DB  73,211,227                          ; shl           %cl,%r11
3917  DB  77,9,217                            ; or            %r11,%r9
3918  DB  72,131,193,8                        ; add           $0x8,%rcx
3919  DB  73,255,202                          ; dec           %r10
3920  DB  117,234                             ; jne           1173 <_sk_scale_u8_avx+0x68>
3921  DB  196,65,249,110,193                  ; vmovq         %r9,%xmm8
3922  DB  235,143                             ; jmp           111f <_sk_scale_u8_avx+0x14>
3923
3924PUBLIC _sk_lerp_1_float_avx
3925_sk_lerp_1_float_avx LABEL PROC
3926  DB  72,173                              ; lods          %ds:(%rsi),%rax
3927  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
3928  DB  197,252,92,196                      ; vsubps        %ymm4,%ymm0,%ymm0
3929  DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
3930  DB  197,252,88,196                      ; vaddps        %ymm4,%ymm0,%ymm0
3931  DB  197,244,92,205                      ; vsubps        %ymm5,%ymm1,%ymm1
3932  DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
3933  DB  197,244,88,205                      ; vaddps        %ymm5,%ymm1,%ymm1
3934  DB  197,236,92,214                      ; vsubps        %ymm6,%ymm2,%ymm2
3935  DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
3936  DB  197,236,88,214                      ; vaddps        %ymm6,%ymm2,%ymm2
3937  DB  197,228,92,223                      ; vsubps        %ymm7,%ymm3,%ymm3
3938  DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
3939  DB  197,228,88,223                      ; vaddps        %ymm7,%ymm3,%ymm3
3940  DB  72,173                              ; lods          %ds:(%rsi),%rax
3941  DB  255,224                             ; jmpq          *%rax
3942
3943PUBLIC _sk_lerp_u8_avx
3944_sk_lerp_u8_avx LABEL PROC
3945  DB  73,137,200                          ; mov           %rcx,%r8
3946  DB  72,173                              ; lods          %ds:(%rsi),%rax
3947  DB  72,139,0                            ; mov           (%rax),%rax
3948  DB  72,1,248                            ; add           %rdi,%rax
3949  DB  77,133,192                          ; test          %r8,%r8
3950  DB  117,116                             ; jne           1253 <_sk_lerp_u8_avx+0x84>
3951  DB  197,122,126,0                       ; vmovq         (%rax),%xmm8
3952  DB  196,66,121,49,200                   ; vpmovzxbd     %xmm8,%xmm9
3953  DB  196,67,121,4,192,229                ; vpermilps     $0xe5,%xmm8,%xmm8
3954  DB  196,66,121,49,192                   ; vpmovzxbd     %xmm8,%xmm8
3955  DB  196,67,53,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm9,%ymm8
3956  DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
3957  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
3958  DB  197,121,110,200                     ; vmovd         %eax,%xmm9
3959  DB  196,67,121,4,201,0                  ; vpermilps     $0x0,%xmm9,%xmm9
3960  DB  196,67,53,24,201,1                  ; vinsertf128   $0x1,%xmm9,%ymm9,%ymm9
3961  DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
3962  DB  197,252,92,196                      ; vsubps        %ymm4,%ymm0,%ymm0
3963  DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
3964  DB  197,252,88,196                      ; vaddps        %ymm4,%ymm0,%ymm0
3965  DB  197,244,92,205                      ; vsubps        %ymm5,%ymm1,%ymm1
3966  DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
3967  DB  197,244,88,205                      ; vaddps        %ymm5,%ymm1,%ymm1
3968  DB  197,236,92,214                      ; vsubps        %ymm6,%ymm2,%ymm2
3969  DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
3970  DB  197,236,88,214                      ; vaddps        %ymm6,%ymm2,%ymm2
3971  DB  197,228,92,223                      ; vsubps        %ymm7,%ymm3,%ymm3
3972  DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
3973  DB  197,228,88,223                      ; vaddps        %ymm7,%ymm3,%ymm3
3974  DB  72,173                              ; lods          %ds:(%rsi),%rax
3975  DB  76,137,193                          ; mov           %r8,%rcx
3976  DB  255,224                             ; jmpq          *%rax
3977  DB  49,201                              ; xor           %ecx,%ecx
3978  DB  77,137,194                          ; mov           %r8,%r10
3979  DB  69,49,201                           ; xor           %r9d,%r9d
3980  DB  68,15,182,24                        ; movzbl        (%rax),%r11d
3981  DB  72,255,192                          ; inc           %rax
3982  DB  73,211,227                          ; shl           %cl,%r11
3983  DB  77,9,217                            ; or            %r11,%r9
3984  DB  72,131,193,8                        ; add           $0x8,%rcx
3985  DB  73,255,202                          ; dec           %r10
3986  DB  117,234                             ; jne           125b <_sk_lerp_u8_avx+0x8c>
3987  DB  196,65,249,110,193                  ; vmovq         %r9,%xmm8
3988  DB  233,104,255,255,255                 ; jmpq          11e3 <_sk_lerp_u8_avx+0x14>
3989
3990PUBLIC _sk_lerp_565_avx
3991_sk_lerp_565_avx LABEL PROC
3992  DB  72,173                              ; lods          %ds:(%rsi),%rax
3993  DB  76,139,16                           ; mov           (%rax),%r10
3994  DB  72,133,201                          ; test          %rcx,%rcx
3995  DB  15,133,250,0,0,0                    ; jne           1383 <_sk_lerp_565_avx+0x108>
3996  DB  196,65,122,111,4,122                ; vmovdqu       (%r10,%rdi,2),%xmm8
3997  DB  197,225,239,219                     ; vpxor         %xmm3,%xmm3,%xmm3
3998  DB  197,185,105,219                     ; vpunpckhwd    %xmm3,%xmm8,%xmm3
3999  DB  196,66,121,51,192                   ; vpmovzxwd     %xmm8,%xmm8
4000  DB  196,99,61,24,195,1                  ; vinsertf128   $0x1,%xmm3,%ymm8,%ymm8
4001  DB  184,0,248,0,0                       ; mov           $0xf800,%eax
4002  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
4003  DB  197,249,112,219,0                   ; vpshufd       $0x0,%xmm3,%xmm3
4004  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
4005  DB  196,193,100,84,216                  ; vandps        %ymm8,%ymm3,%ymm3
4006  DB  197,124,91,203                      ; vcvtdq2ps     %ymm3,%ymm9
4007  DB  184,8,33,132,55                     ; mov           $0x37842108,%eax
4008  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
4009  DB  196,227,121,4,219,0                 ; vpermilps     $0x0,%xmm3,%xmm3
4010  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
4011  DB  197,52,89,203                       ; vmulps        %ymm3,%ymm9,%ymm9
4012  DB  184,224,7,0,0                       ; mov           $0x7e0,%eax
4013  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
4014  DB  197,249,112,219,0                   ; vpshufd       $0x0,%xmm3,%xmm3
4015  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
4016  DB  196,193,100,84,216                  ; vandps        %ymm8,%ymm3,%ymm3
4017  DB  197,124,91,211                      ; vcvtdq2ps     %ymm3,%ymm10
4018  DB  184,33,8,2,58                       ; mov           $0x3a020821,%eax
4019  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
4020  DB  196,227,121,4,219,0                 ; vpermilps     $0x0,%xmm3,%xmm3
4021  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
4022  DB  197,44,89,211                       ; vmulps        %ymm3,%ymm10,%ymm10
4023  DB  184,31,0,0,0                        ; mov           $0x1f,%eax
4024  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
4025  DB  197,249,112,219,0                   ; vpshufd       $0x0,%xmm3,%xmm3
4026  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
4027  DB  196,193,100,84,216                  ; vandps        %ymm8,%ymm3,%ymm3
4028  DB  197,124,91,195                      ; vcvtdq2ps     %ymm3,%ymm8
4029  DB  184,8,33,4,61                       ; mov           $0x3d042108,%eax
4030  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
4031  DB  196,227,121,4,219,0                 ; vpermilps     $0x0,%xmm3,%xmm3
4032  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
4033  DB  197,188,89,219                      ; vmulps        %ymm3,%ymm8,%ymm3
4034  DB  197,252,92,196                      ; vsubps        %ymm4,%ymm0,%ymm0
4035  DB  196,193,124,89,193                  ; vmulps        %ymm9,%ymm0,%ymm0
4036  DB  197,252,88,196                      ; vaddps        %ymm4,%ymm0,%ymm0
4037  DB  197,244,92,205                      ; vsubps        %ymm5,%ymm1,%ymm1
4038  DB  196,193,116,89,202                  ; vmulps        %ymm10,%ymm1,%ymm1
4039  DB  197,244,88,205                      ; vaddps        %ymm5,%ymm1,%ymm1
4040  DB  197,236,92,214                      ; vsubps        %ymm6,%ymm2,%ymm2
4041  DB  197,236,89,211                      ; vmulps        %ymm3,%ymm2,%ymm2
4042  DB  197,236,88,214                      ; vaddps        %ymm6,%ymm2,%ymm2
4043  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
4044  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
4045  DB  196,227,121,4,219,0                 ; vpermilps     $0x0,%xmm3,%xmm3
4046  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
4047  DB  72,173                              ; lods          %ds:(%rsi),%rax
4048  DB  255,224                             ; jmpq          *%rax
4049  DB  65,137,200                          ; mov           %ecx,%r8d
4050  DB  65,128,224,7                        ; and           $0x7,%r8b
4051  DB  196,65,57,239,192                   ; vpxor         %xmm8,%xmm8,%xmm8
4052  DB  65,254,200                          ; dec           %r8b
4053  DB  65,128,248,6                        ; cmp           $0x6,%r8b
4054  DB  15,135,243,254,255,255              ; ja            128f <_sk_lerp_565_avx+0x14>
4055  DB  69,15,182,192                       ; movzbl        %r8b,%r8d
4056  DB  76,141,13,73,0,0,0                  ; lea           0x49(%rip),%r9        # 13f0 <_sk_lerp_565_avx+0x175>
4057  DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
4058  DB  76,1,200                            ; add           %r9,%rax
4059  DB  255,224                             ; jmpq          *%rax
4060  DB  197,225,239,219                     ; vpxor         %xmm3,%xmm3,%xmm3
4061  DB  196,65,97,196,68,122,12,6           ; vpinsrw       $0x6,0xc(%r10,%rdi,2),%xmm3,%xmm8
4062  DB  196,65,57,196,68,122,10,5           ; vpinsrw       $0x5,0xa(%r10,%rdi,2),%xmm8,%xmm8
4063  DB  196,65,57,196,68,122,8,4            ; vpinsrw       $0x4,0x8(%r10,%rdi,2),%xmm8,%xmm8
4064  DB  196,65,57,196,68,122,6,3            ; vpinsrw       $0x3,0x6(%r10,%rdi,2),%xmm8,%xmm8
4065  DB  196,65,57,196,68,122,4,2            ; vpinsrw       $0x2,0x4(%r10,%rdi,2),%xmm8,%xmm8
4066  DB  196,65,57,196,68,122,2,1            ; vpinsrw       $0x1,0x2(%r10,%rdi,2),%xmm8,%xmm8
4067  DB  196,65,57,196,4,122,0               ; vpinsrw       $0x0,(%r10,%rdi,2),%xmm8,%xmm8
4068  DB  233,159,254,255,255                 ; jmpq          128f <_sk_lerp_565_avx+0x14>
4069  DB  244                                 ; hlt
4070  DB  255                                 ; (bad)
4071  DB  255                                 ; (bad)
4072  DB  255                                 ; (bad)
4073  DB  236                                 ; in            (%dx),%al
4074  DB  255                                 ; (bad)
4075  DB  255                                 ; (bad)
4076  DB  255,228                             ; jmpq          *%rsp
4077  DB  255                                 ; (bad)
4078  DB  255                                 ; (bad)
4079  DB  255                                 ; (bad)
4080  DB  220,255                             ; fdivr         %st,%st(7)
4081  DB  255                                 ; (bad)
4082  DB  255,212                             ; callq         *%rsp
4083  DB  255                                 ; (bad)
4084  DB  255                                 ; (bad)
4085  DB  255,204                             ; dec           %esp
4086  DB  255                                 ; (bad)
4087  DB  255                                 ; (bad)
4088  DB  255,192                             ; inc           %eax
4089  DB  255                                 ; (bad)
4090  DB  255                                 ; (bad)
4091  DB  255                                 ; .byte         0xff
4092
4093PUBLIC _sk_load_tables_avx
4094_sk_load_tables_avx LABEL PROC
4095  DB  85                                  ; push          %rbp
4096  DB  65,87                               ; push          %r15
4097  DB  65,86                               ; push          %r14
4098  DB  65,85                               ; push          %r13
4099  DB  65,84                               ; push          %r12
4100  DB  83                                  ; push          %rbx
4101  DB  72,173                              ; lods          %ds:(%rsi),%rax
4102  DB  76,139,0                            ; mov           (%rax),%r8
4103  DB  72,133,201                          ; test          %rcx,%rcx
4104  DB  15,133,56,2,0,0                     ; jne           165c <_sk_load_tables_avx+0x250>
4105  DB  196,65,124,16,4,184                 ; vmovups       (%r8,%rdi,4),%ymm8
4106  DB  187,255,0,0,0                       ; mov           $0xff,%ebx
4107  DB  197,249,110,195                     ; vmovd         %ebx,%xmm0
4108  DB  197,249,112,192,0                   ; vpshufd       $0x0,%xmm0,%xmm0
4109  DB  196,99,125,24,200,1                 ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm9
4110  DB  196,193,52,84,192                   ; vandps        %ymm8,%ymm9,%ymm0
4111  DB  196,193,249,126,193                 ; vmovq         %xmm0,%r9
4112  DB  69,137,203                          ; mov           %r9d,%r11d
4113  DB  196,195,249,22,194,1                ; vpextrq       $0x1,%xmm0,%r10
4114  DB  69,137,214                          ; mov           %r10d,%r14d
4115  DB  73,193,234,32                       ; shr           $0x20,%r10
4116  DB  73,193,233,32                       ; shr           $0x20,%r9
4117  DB  196,227,125,25,192,1                ; vextractf128  $0x1,%ymm0,%xmm0
4118  DB  196,193,249,126,196                 ; vmovq         %xmm0,%r12
4119  DB  69,137,231                          ; mov           %r12d,%r15d
4120  DB  196,227,249,22,195,1                ; vpextrq       $0x1,%xmm0,%rbx
4121  DB  65,137,221                          ; mov           %ebx,%r13d
4122  DB  72,193,235,32                       ; shr           $0x20,%rbx
4123  DB  73,193,236,32                       ; shr           $0x20,%r12
4124  DB  72,139,104,8                        ; mov           0x8(%rax),%rbp
4125  DB  76,139,64,16                        ; mov           0x10(%rax),%r8
4126  DB  196,161,122,16,68,189,0             ; vmovss        0x0(%rbp,%r15,4),%xmm0
4127  DB  196,163,121,33,68,165,0,16          ; vinsertps     $0x10,0x0(%rbp,%r12,4),%xmm0,%xmm0
4128  DB  196,161,122,16,76,173,0             ; vmovss        0x0(%rbp,%r13,4),%xmm1
4129  DB  196,227,121,33,193,32               ; vinsertps     $0x20,%xmm1,%xmm0,%xmm0
4130  DB  197,250,16,76,157,0                 ; vmovss        0x0(%rbp,%rbx,4),%xmm1
4131  DB  196,227,121,33,193,48               ; vinsertps     $0x30,%xmm1,%xmm0,%xmm0
4132  DB  196,161,122,16,76,157,0             ; vmovss        0x0(%rbp,%r11,4),%xmm1
4133  DB  196,163,113,33,76,141,0,16          ; vinsertps     $0x10,0x0(%rbp,%r9,4),%xmm1,%xmm1
4134  DB  196,161,122,16,92,181,0             ; vmovss        0x0(%rbp,%r14,4),%xmm3
4135  DB  196,227,113,33,203,32               ; vinsertps     $0x20,%xmm3,%xmm1,%xmm1
4136  DB  196,161,122,16,92,149,0             ; vmovss        0x0(%rbp,%r10,4),%xmm3
4137  DB  196,227,113,33,203,48               ; vinsertps     $0x30,%xmm3,%xmm1,%xmm1
4138  DB  196,227,117,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
4139  DB  196,193,113,114,208,8               ; vpsrld        $0x8,%xmm8,%xmm1
4140  DB  196,67,125,25,194,1                 ; vextractf128  $0x1,%ymm8,%xmm10
4141  DB  196,193,105,114,210,8               ; vpsrld        $0x8,%xmm10,%xmm2
4142  DB  196,227,117,24,202,1                ; vinsertf128   $0x1,%xmm2,%ymm1,%ymm1
4143  DB  197,180,84,201                      ; vandps        %ymm1,%ymm9,%ymm1
4144  DB  196,193,249,126,201                 ; vmovq         %xmm1,%r9
4145  DB  69,137,203                          ; mov           %r9d,%r11d
4146  DB  196,195,249,22,202,1                ; vpextrq       $0x1,%xmm1,%r10
4147  DB  69,137,214                          ; mov           %r10d,%r14d
4148  DB  73,193,234,32                       ; shr           $0x20,%r10
4149  DB  73,193,233,32                       ; shr           $0x20,%r9
4150  DB  196,227,125,25,201,1                ; vextractf128  $0x1,%ymm1,%xmm1
4151  DB  196,225,249,126,205                 ; vmovq         %xmm1,%rbp
4152  DB  65,137,239                          ; mov           %ebp,%r15d
4153  DB  196,227,249,22,203,1                ; vpextrq       $0x1,%xmm1,%rbx
4154  DB  65,137,220                          ; mov           %ebx,%r12d
4155  DB  72,193,235,32                       ; shr           $0x20,%rbx
4156  DB  72,193,237,32                       ; shr           $0x20,%rbp
4157  DB  196,129,122,16,12,184               ; vmovss        (%r8,%r15,4),%xmm1
4158  DB  196,195,113,33,12,168,16            ; vinsertps     $0x10,(%r8,%rbp,4),%xmm1,%xmm1
4159  DB  196,129,122,16,20,160               ; vmovss        (%r8,%r12,4),%xmm2
4160  DB  196,227,113,33,202,32               ; vinsertps     $0x20,%xmm2,%xmm1,%xmm1
4161  DB  196,193,122,16,20,152               ; vmovss        (%r8,%rbx,4),%xmm2
4162  DB  196,227,113,33,202,48               ; vinsertps     $0x30,%xmm2,%xmm1,%xmm1
4163  DB  196,129,122,16,20,152               ; vmovss        (%r8,%r11,4),%xmm2
4164  DB  196,131,105,33,20,136,16            ; vinsertps     $0x10,(%r8,%r9,4),%xmm2,%xmm2
4165  DB  196,129,122,16,28,176               ; vmovss        (%r8,%r14,4),%xmm3
4166  DB  196,227,105,33,211,32               ; vinsertps     $0x20,%xmm3,%xmm2,%xmm2
4167  DB  196,129,122,16,28,144               ; vmovss        (%r8,%r10,4),%xmm3
4168  DB  196,227,105,33,211,48               ; vinsertps     $0x30,%xmm3,%xmm2,%xmm2
4169  DB  196,227,109,24,201,1                ; vinsertf128   $0x1,%xmm1,%ymm2,%ymm1
4170  DB  72,139,64,24                        ; mov           0x18(%rax),%rax
4171  DB  196,193,105,114,208,16              ; vpsrld        $0x10,%xmm8,%xmm2
4172  DB  196,193,97,114,210,16               ; vpsrld        $0x10,%xmm10,%xmm3
4173  DB  196,227,109,24,211,1                ; vinsertf128   $0x1,%xmm3,%ymm2,%ymm2
4174  DB  197,180,84,210                      ; vandps        %ymm2,%ymm9,%ymm2
4175  DB  196,193,249,126,208                 ; vmovq         %xmm2,%r8
4176  DB  69,137,194                          ; mov           %r8d,%r10d
4177  DB  196,195,249,22,209,1                ; vpextrq       $0x1,%xmm2,%r9
4178  DB  69,137,203                          ; mov           %r9d,%r11d
4179  DB  73,193,233,32                       ; shr           $0x20,%r9
4180  DB  73,193,232,32                       ; shr           $0x20,%r8
4181  DB  196,227,125,25,210,1                ; vextractf128  $0x1,%ymm2,%xmm2
4182  DB  196,225,249,126,213                 ; vmovq         %xmm2,%rbp
4183  DB  65,137,238                          ; mov           %ebp,%r14d
4184  DB  196,227,249,22,211,1                ; vpextrq       $0x1,%xmm2,%rbx
4185  DB  65,137,223                          ; mov           %ebx,%r15d
4186  DB  72,193,235,32                       ; shr           $0x20,%rbx
4187  DB  72,193,237,32                       ; shr           $0x20,%rbp
4188  DB  196,161,122,16,20,176               ; vmovss        (%rax,%r14,4),%xmm2
4189  DB  196,227,105,33,20,168,16            ; vinsertps     $0x10,(%rax,%rbp,4),%xmm2,%xmm2
4190  DB  196,161,122,16,28,184               ; vmovss        (%rax,%r15,4),%xmm3
4191  DB  196,227,105,33,211,32               ; vinsertps     $0x20,%xmm3,%xmm2,%xmm2
4192  DB  197,250,16,28,152                   ; vmovss        (%rax,%rbx,4),%xmm3
4193  DB  196,99,105,33,203,48                ; vinsertps     $0x30,%xmm3,%xmm2,%xmm9
4194  DB  196,161,122,16,28,144               ; vmovss        (%rax,%r10,4),%xmm3
4195  DB  196,163,97,33,28,128,16             ; vinsertps     $0x10,(%rax,%r8,4),%xmm3,%xmm3
4196  DB  196,161,122,16,20,152               ; vmovss        (%rax,%r11,4),%xmm2
4197  DB  196,227,97,33,210,32                ; vinsertps     $0x20,%xmm2,%xmm3,%xmm2
4198  DB  196,161,122,16,28,136               ; vmovss        (%rax,%r9,4),%xmm3
4199  DB  196,227,105,33,211,48               ; vinsertps     $0x30,%xmm3,%xmm2,%xmm2
4200  DB  196,195,109,24,209,1                ; vinsertf128   $0x1,%xmm9,%ymm2,%ymm2
4201  DB  196,193,57,114,208,24               ; vpsrld        $0x18,%xmm8,%xmm8
4202  DB  196,193,97,114,210,24               ; vpsrld        $0x18,%xmm10,%xmm3
4203  DB  196,227,61,24,219,1                 ; vinsertf128   $0x1,%xmm3,%ymm8,%ymm3
4204  DB  197,124,91,195                      ; vcvtdq2ps     %ymm3,%ymm8
4205  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
4206  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
4207  DB  196,227,121,4,219,0                 ; vpermilps     $0x0,%xmm3,%xmm3
4208  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
4209  DB  197,188,89,219                      ; vmulps        %ymm3,%ymm8,%ymm3
4210  DB  72,173                              ; lods          %ds:(%rsi),%rax
4211  DB  91                                  ; pop           %rbx
4212  DB  65,92                               ; pop           %r12
4213  DB  65,93                               ; pop           %r13
4214  DB  65,94                               ; pop           %r14
4215  DB  65,95                               ; pop           %r15
4216  DB  93                                  ; pop           %rbp
4217  DB  255,224                             ; jmpq          *%rax
4218  DB  137,203                             ; mov           %ecx,%ebx
4219  DB  128,227,7                           ; and           $0x7,%bl
4220  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
4221  DB  254,203                             ; dec           %bl
4222  DB  128,251,6                           ; cmp           $0x6,%bl
4223  DB  15,135,185,253,255,255              ; ja            142a <_sk_load_tables_avx+0x1e>
4224  DB  15,182,219                          ; movzbl        %bl,%ebx
4225  DB  76,141,13,137,0,0,0                 ; lea           0x89(%rip),%r9        # 1704 <_sk_load_tables_avx+0x2f8>
4226  DB  73,99,28,153                        ; movslq        (%r9,%rbx,4),%rbx
4227  DB  76,1,203                            ; add           %r9,%rbx
4228  DB  255,227                             ; jmpq          *%rbx
4229  DB  196,193,121,110,68,184,24           ; vmovd         0x18(%r8,%rdi,4),%xmm0
4230  DB  197,249,112,192,68                  ; vpshufd       $0x44,%xmm0,%xmm0
4231  DB  196,227,125,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
4232  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
4233  DB  196,99,117,12,192,64                ; vblendps      $0x40,%ymm0,%ymm1,%ymm8
4234  DB  196,99,125,25,192,1                 ; vextractf128  $0x1,%ymm8,%xmm0
4235  DB  196,195,121,34,68,184,20,1          ; vpinsrd       $0x1,0x14(%r8,%rdi,4),%xmm0,%xmm0
4236  DB  196,99,61,24,192,1                  ; vinsertf128   $0x1,%xmm0,%ymm8,%ymm8
4237  DB  196,99,125,25,192,1                 ; vextractf128  $0x1,%ymm8,%xmm0
4238  DB  196,195,121,34,68,184,16,0          ; vpinsrd       $0x0,0x10(%r8,%rdi,4),%xmm0,%xmm0
4239  DB  196,99,61,24,192,1                  ; vinsertf128   $0x1,%xmm0,%ymm8,%ymm8
4240  DB  196,195,57,34,68,184,12,3           ; vpinsrd       $0x3,0xc(%r8,%rdi,4),%xmm8,%xmm0
4241  DB  196,99,61,12,192,15                 ; vblendps      $0xf,%ymm0,%ymm8,%ymm8
4242  DB  196,195,57,34,68,184,8,2            ; vpinsrd       $0x2,0x8(%r8,%rdi,4),%xmm8,%xmm0
4243  DB  196,99,61,12,192,15                 ; vblendps      $0xf,%ymm0,%ymm8,%ymm8
4244  DB  196,195,57,34,68,184,4,1            ; vpinsrd       $0x1,0x4(%r8,%rdi,4),%xmm8,%xmm0
4245  DB  196,99,61,12,192,15                 ; vblendps      $0xf,%ymm0,%ymm8,%ymm8
4246  DB  196,195,57,34,4,184,0               ; vpinsrd       $0x0,(%r8,%rdi,4),%xmm8,%xmm0
4247  DB  196,99,61,12,192,15                 ; vblendps      $0xf,%ymm0,%ymm8,%ymm8
4248  DB  233,38,253,255,255                  ; jmpq          142a <_sk_load_tables_avx+0x1e>
4249  DB  238                                 ; out           %al,(%dx)
4250  DB  255                                 ; (bad)
4251  DB  255                                 ; (bad)
4252  DB  255,224                             ; jmpq          *%rax
4253  DB  255                                 ; (bad)
4254  DB  255                                 ; (bad)
4255  DB  255,210                             ; callq         *%rdx
4256  DB  255                                 ; (bad)
4257  DB  255                                 ; (bad)
4258  DB  255,196                             ; inc           %esp
4259  DB  255                                 ; (bad)
4260  DB  255                                 ; (bad)
4261  DB  255,176,255,255,255,156             ; pushq         -0x63000001(%rax)
4262  DB  255                                 ; (bad)
4263  DB  255                                 ; (bad)
4264  DB  255                                 ; .byte         0xff
4265  DB  128,255,255                         ; cmp           $0xff,%bh
4266  DB  255                                 ; .byte         0xff
4267
4268PUBLIC _sk_load_a8_avx
4269_sk_load_a8_avx LABEL PROC
4270  DB  73,137,200                          ; mov           %rcx,%r8
4271  DB  72,173                              ; lods          %ds:(%rsi),%rax
4272  DB  72,139,0                            ; mov           (%rax),%rax
4273  DB  72,1,248                            ; add           %rdi,%rax
4274  DB  77,133,192                          ; test          %r8,%r8
4275  DB  117,74                              ; jne           177a <_sk_load_a8_avx+0x5a>
4276  DB  197,250,126,0                       ; vmovq         (%rax),%xmm0
4277  DB  196,226,121,49,200                  ; vpmovzxbd     %xmm0,%xmm1
4278  DB  196,227,121,4,192,229               ; vpermilps     $0xe5,%xmm0,%xmm0
4279  DB  196,226,121,49,192                  ; vpmovzxbd     %xmm0,%xmm0
4280  DB  196,227,117,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
4281  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
4282  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
4283  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
4284  DB  196,227,121,4,201,0                 ; vpermilps     $0x0,%xmm1,%xmm1
4285  DB  196,227,117,24,201,1                ; vinsertf128   $0x1,%xmm1,%ymm1,%ymm1
4286  DB  197,252,89,217                      ; vmulps        %ymm1,%ymm0,%ymm3
4287  DB  72,173                              ; lods          %ds:(%rsi),%rax
4288  DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
4289  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
4290  DB  197,236,87,210                      ; vxorps        %ymm2,%ymm2,%ymm2
4291  DB  76,137,193                          ; mov           %r8,%rcx
4292  DB  255,224                             ; jmpq          *%rax
4293  DB  49,201                              ; xor           %ecx,%ecx
4294  DB  77,137,194                          ; mov           %r8,%r10
4295  DB  69,49,201                           ; xor           %r9d,%r9d
4296  DB  68,15,182,24                        ; movzbl        (%rax),%r11d
4297  DB  72,255,192                          ; inc           %rax
4298  DB  73,211,227                          ; shl           %cl,%r11
4299  DB  77,9,217                            ; or            %r11,%r9
4300  DB  72,131,193,8                        ; add           $0x8,%rcx
4301  DB  73,255,202                          ; dec           %r10
4302  DB  117,234                             ; jne           1782 <_sk_load_a8_avx+0x62>
4303  DB  196,193,249,110,193                 ; vmovq         %r9,%xmm0
4304  DB  235,149                             ; jmp           1734 <_sk_load_a8_avx+0x14>
4305
4306PUBLIC _sk_gather_a8_avx
4307_sk_gather_a8_avx LABEL PROC
4308  DB  65,87                               ; push          %r15
4309  DB  65,86                               ; push          %r14
4310  DB  65,84                               ; push          %r12
4311  DB  83                                  ; push          %rbx
4312  DB  72,173                              ; lods          %ds:(%rsi),%rax
4313  DB  76,139,0                            ; mov           (%rax),%r8
4314  DB  197,254,91,209                      ; vcvttps2dq    %ymm1,%ymm2
4315  DB  197,249,110,72,16                   ; vmovd         0x10(%rax),%xmm1
4316  DB  197,249,112,217,0                   ; vpshufd       $0x0,%xmm1,%xmm3
4317  DB  196,226,97,64,202                   ; vpmulld       %xmm2,%xmm3,%xmm1
4318  DB  196,227,125,25,210,1                ; vextractf128  $0x1,%ymm2,%xmm2
4319  DB  196,226,97,64,210                   ; vpmulld       %xmm2,%xmm3,%xmm2
4320  DB  197,254,91,192                      ; vcvttps2dq    %ymm0,%ymm0
4321  DB  196,227,125,25,195,1                ; vextractf128  $0x1,%ymm0,%xmm3
4322  DB  197,233,254,211                     ; vpaddd        %xmm3,%xmm2,%xmm2
4323  DB  196,227,249,22,208,1                ; vpextrq       $0x1,%xmm2,%rax
4324  DB  65,137,193                          ; mov           %eax,%r9d
4325  DB  72,193,232,32                       ; shr           $0x20,%rax
4326  DB  196,193,249,126,210                 ; vmovq         %xmm2,%r10
4327  DB  69,137,211                          ; mov           %r10d,%r11d
4328  DB  73,193,234,32                       ; shr           $0x20,%r10
4329  DB  197,241,254,192                     ; vpaddd        %xmm0,%xmm1,%xmm0
4330  DB  196,225,249,126,195                 ; vmovq         %xmm0,%rbx
4331  DB  65,137,222                          ; mov           %ebx,%r14d
4332  DB  196,195,249,22,199,1                ; vpextrq       $0x1,%xmm0,%r15
4333  DB  69,137,252                          ; mov           %r15d,%r12d
4334  DB  73,193,239,32                       ; shr           $0x20,%r15
4335  DB  72,193,235,32                       ; shr           $0x20,%rbx
4336  DB  196,131,121,32,4,48,0               ; vpinsrb       $0x0,(%r8,%r14,1),%xmm0,%xmm0
4337  DB  196,195,121,32,4,24,1               ; vpinsrb       $0x1,(%r8,%rbx,1),%xmm0,%xmm0
4338  DB  67,15,182,28,32                     ; movzbl        (%r8,%r12,1),%ebx
4339  DB  196,227,121,32,195,2                ; vpinsrb       $0x2,%ebx,%xmm0,%xmm0
4340  DB  67,15,182,28,56                     ; movzbl        (%r8,%r15,1),%ebx
4341  DB  196,227,121,32,195,3                ; vpinsrb       $0x3,%ebx,%xmm0,%xmm0
4342  DB  196,226,121,49,192                  ; vpmovzxbd     %xmm0,%xmm0
4343  DB  196,131,121,32,12,24,0              ; vpinsrb       $0x0,(%r8,%r11,1),%xmm0,%xmm1
4344  DB  196,131,113,32,12,16,1              ; vpinsrb       $0x1,(%r8,%r10,1),%xmm1,%xmm1
4345  DB  67,15,182,28,8                      ; movzbl        (%r8,%r9,1),%ebx
4346  DB  196,227,113,32,203,2                ; vpinsrb       $0x2,%ebx,%xmm1,%xmm1
4347  DB  65,15,182,4,0                       ; movzbl        (%r8,%rax,1),%eax
4348  DB  196,227,113,32,200,3                ; vpinsrb       $0x3,%eax,%xmm1,%xmm1
4349  DB  196,226,121,49,201                  ; vpmovzxbd     %xmm1,%xmm1
4350  DB  196,227,125,24,193,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
4351  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
4352  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
4353  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
4354  DB  196,227,121,4,201,0                 ; vpermilps     $0x0,%xmm1,%xmm1
4355  DB  196,227,117,24,201,1                ; vinsertf128   $0x1,%xmm1,%ymm1,%ymm1
4356  DB  197,252,89,217                      ; vmulps        %ymm1,%ymm0,%ymm3
4357  DB  72,173                              ; lods          %ds:(%rsi),%rax
4358  DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
4359  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
4360  DB  197,236,87,210                      ; vxorps        %ymm2,%ymm2,%ymm2
4361  DB  91                                  ; pop           %rbx
4362  DB  65,92                               ; pop           %r12
4363  DB  65,94                               ; pop           %r14
4364  DB  65,95                               ; pop           %r15
4365  DB  255,224                             ; jmpq          *%rax
4366
4367PUBLIC _sk_store_a8_avx
4368_sk_store_a8_avx LABEL PROC
4369  DB  72,173                              ; lods          %ds:(%rsi),%rax
4370  DB  76,139,8                            ; mov           (%rax),%r9
4371  DB  184,0,0,127,67                      ; mov           $0x437f0000,%eax
4372  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
4373  DB  196,67,121,4,192,0                  ; vpermilps     $0x0,%xmm8,%xmm8
4374  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
4375  DB  197,60,89,195                       ; vmulps        %ymm3,%ymm8,%ymm8
4376  DB  196,65,125,91,192                   ; vcvtps2dq     %ymm8,%ymm8
4377  DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
4378  DB  196,66,57,43,193                    ; vpackusdw     %xmm9,%xmm8,%xmm8
4379  DB  196,65,57,103,192                   ; vpackuswb     %xmm8,%xmm8,%xmm8
4380  DB  72,133,201                          ; test          %rcx,%rcx
4381  DB  117,10                              ; jne           18db <_sk_store_a8_avx+0x42>
4382  DB  196,65,123,17,4,57                  ; vmovsd        %xmm8,(%r9,%rdi,1)
4383  DB  72,173                              ; lods          %ds:(%rsi),%rax
4384  DB  255,224                             ; jmpq          *%rax
4385  DB  65,137,200                          ; mov           %ecx,%r8d
4386  DB  65,128,224,7                        ; and           $0x7,%r8b
4387  DB  65,254,200                          ; dec           %r8b
4388  DB  65,128,248,6                        ; cmp           $0x6,%r8b
4389  DB  119,236                             ; ja            18d7 <_sk_store_a8_avx+0x3e>
4390  DB  196,66,121,48,192                   ; vpmovzxbw     %xmm8,%xmm8
4391  DB  65,15,182,192                       ; movzbl        %r8b,%eax
4392  DB  76,141,5,69,0,0,0                   ; lea           0x45(%rip),%r8        # 1940 <_sk_store_a8_avx+0xa7>
4393  DB  73,99,4,128                         ; movslq        (%r8,%rax,4),%rax
4394  DB  76,1,192                            ; add           %r8,%rax
4395  DB  255,224                             ; jmpq          *%rax
4396  DB  196,67,121,20,68,57,6,12            ; vpextrb       $0xc,%xmm8,0x6(%r9,%rdi,1)
4397  DB  196,67,121,20,68,57,5,10            ; vpextrb       $0xa,%xmm8,0x5(%r9,%rdi,1)
4398  DB  196,67,121,20,68,57,4,8             ; vpextrb       $0x8,%xmm8,0x4(%r9,%rdi,1)
4399  DB  196,67,121,20,68,57,3,6             ; vpextrb       $0x6,%xmm8,0x3(%r9,%rdi,1)
4400  DB  196,67,121,20,68,57,2,4             ; vpextrb       $0x4,%xmm8,0x2(%r9,%rdi,1)
4401  DB  196,67,121,20,68,57,1,2             ; vpextrb       $0x2,%xmm8,0x1(%r9,%rdi,1)
4402  DB  196,67,121,20,4,57,0                ; vpextrb       $0x0,%xmm8,(%r9,%rdi,1)
4403  DB  235,154                             ; jmp           18d7 <_sk_store_a8_avx+0x3e>
4404  DB  15,31,0                             ; nopl          (%rax)
4405  DB  244                                 ; hlt
4406  DB  255                                 ; (bad)
4407  DB  255                                 ; (bad)
4408  DB  255                                 ; (bad)
4409  DB  236                                 ; in            (%dx),%al
4410  DB  255                                 ; (bad)
4411  DB  255                                 ; (bad)
4412  DB  255,228                             ; jmpq          *%rsp
4413  DB  255                                 ; (bad)
4414  DB  255                                 ; (bad)
4415  DB  255                                 ; (bad)
4416  DB  220,255                             ; fdivr         %st,%st(7)
4417  DB  255                                 ; (bad)
4418  DB  255,212                             ; callq         *%rsp
4419  DB  255                                 ; (bad)
4420  DB  255                                 ; (bad)
4421  DB  255,204                             ; dec           %esp
4422  DB  255                                 ; (bad)
4423  DB  255                                 ; (bad)
4424  DB  255,196                             ; inc           %esp
4425  DB  255                                 ; (bad)
4426  DB  255                                 ; (bad)
4427  DB  255                                 ; .byte         0xff
4428
4429PUBLIC _sk_load_g8_avx
4430_sk_load_g8_avx LABEL PROC
4431  DB  73,137,200                          ; mov           %rcx,%r8
4432  DB  72,173                              ; lods          %ds:(%rsi),%rax
4433  DB  72,139,0                            ; mov           (%rax),%rax
4434  DB  72,1,248                            ; add           %rdi,%rax
4435  DB  77,133,192                          ; test          %r8,%r8
4436  DB  117,91                              ; jne           19c7 <_sk_load_g8_avx+0x6b>
4437  DB  197,250,126,0                       ; vmovq         (%rax),%xmm0
4438  DB  196,226,121,49,200                  ; vpmovzxbd     %xmm0,%xmm1
4439  DB  196,227,121,4,192,229               ; vpermilps     $0xe5,%xmm0,%xmm0
4440  DB  196,226,121,49,192                  ; vpmovzxbd     %xmm0,%xmm0
4441  DB  196,227,117,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
4442  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
4443  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
4444  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
4445  DB  196,227,121,4,201,0                 ; vpermilps     $0x0,%xmm1,%xmm1
4446  DB  196,227,117,24,201,1                ; vinsertf128   $0x1,%xmm1,%ymm1,%ymm1
4447  DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
4448  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
4449  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
4450  DB  196,227,121,4,201,0                 ; vpermilps     $0x0,%xmm1,%xmm1
4451  DB  196,227,117,24,217,1                ; vinsertf128   $0x1,%xmm1,%ymm1,%ymm3
4452  DB  72,173                              ; lods          %ds:(%rsi),%rax
4453  DB  76,137,193                          ; mov           %r8,%rcx
4454  DB  197,252,40,200                      ; vmovaps       %ymm0,%ymm1
4455  DB  197,252,40,208                      ; vmovaps       %ymm0,%ymm2
4456  DB  255,224                             ; jmpq          *%rax
4457  DB  49,201                              ; xor           %ecx,%ecx
4458  DB  77,137,194                          ; mov           %r8,%r10
4459  DB  69,49,201                           ; xor           %r9d,%r9d
4460  DB  68,15,182,24                        ; movzbl        (%rax),%r11d
4461  DB  72,255,192                          ; inc           %rax
4462  DB  73,211,227                          ; shl           %cl,%r11
4463  DB  77,9,217                            ; or            %r11,%r9
4464  DB  72,131,193,8                        ; add           $0x8,%rcx
4465  DB  73,255,202                          ; dec           %r10
4466  DB  117,234                             ; jne           19cf <_sk_load_g8_avx+0x73>
4467  DB  196,193,249,110,193                 ; vmovq         %r9,%xmm0
4468  DB  235,132                             ; jmp           1970 <_sk_load_g8_avx+0x14>
4469
4470PUBLIC _sk_gather_g8_avx
4471_sk_gather_g8_avx LABEL PROC
4472  DB  65,87                               ; push          %r15
4473  DB  65,86                               ; push          %r14
4474  DB  65,84                               ; push          %r12
4475  DB  83                                  ; push          %rbx
4476  DB  72,173                              ; lods          %ds:(%rsi),%rax
4477  DB  76,139,0                            ; mov           (%rax),%r8
4478  DB  197,254,91,209                      ; vcvttps2dq    %ymm1,%ymm2
4479  DB  197,249,110,72,16                   ; vmovd         0x10(%rax),%xmm1
4480  DB  197,249,112,217,0                   ; vpshufd       $0x0,%xmm1,%xmm3
4481  DB  196,226,97,64,202                   ; vpmulld       %xmm2,%xmm3,%xmm1
4482  DB  196,227,125,25,210,1                ; vextractf128  $0x1,%ymm2,%xmm2
4483  DB  196,226,97,64,210                   ; vpmulld       %xmm2,%xmm3,%xmm2
4484  DB  197,254,91,192                      ; vcvttps2dq    %ymm0,%ymm0
4485  DB  196,227,125,25,195,1                ; vextractf128  $0x1,%ymm0,%xmm3
4486  DB  197,233,254,211                     ; vpaddd        %xmm3,%xmm2,%xmm2
4487  DB  196,227,249,22,208,1                ; vpextrq       $0x1,%xmm2,%rax
4488  DB  65,137,193                          ; mov           %eax,%r9d
4489  DB  72,193,232,32                       ; shr           $0x20,%rax
4490  DB  196,193,249,126,210                 ; vmovq         %xmm2,%r10
4491  DB  69,137,211                          ; mov           %r10d,%r11d
4492  DB  73,193,234,32                       ; shr           $0x20,%r10
4493  DB  197,241,254,192                     ; vpaddd        %xmm0,%xmm1,%xmm0
4494  DB  196,225,249,126,195                 ; vmovq         %xmm0,%rbx
4495  DB  65,137,222                          ; mov           %ebx,%r14d
4496  DB  196,195,249,22,199,1                ; vpextrq       $0x1,%xmm0,%r15
4497  DB  69,137,252                          ; mov           %r15d,%r12d
4498  DB  73,193,239,32                       ; shr           $0x20,%r15
4499  DB  72,193,235,32                       ; shr           $0x20,%rbx
4500  DB  196,131,121,32,4,48,0               ; vpinsrb       $0x0,(%r8,%r14,1),%xmm0,%xmm0
4501  DB  196,195,121,32,4,24,1               ; vpinsrb       $0x1,(%r8,%rbx,1),%xmm0,%xmm0
4502  DB  67,15,182,28,32                     ; movzbl        (%r8,%r12,1),%ebx
4503  DB  196,227,121,32,195,2                ; vpinsrb       $0x2,%ebx,%xmm0,%xmm0
4504  DB  67,15,182,28,56                     ; movzbl        (%r8,%r15,1),%ebx
4505  DB  196,227,121,32,195,3                ; vpinsrb       $0x3,%ebx,%xmm0,%xmm0
4506  DB  196,226,121,49,192                  ; vpmovzxbd     %xmm0,%xmm0
4507  DB  196,131,121,32,12,24,0              ; vpinsrb       $0x0,(%r8,%r11,1),%xmm0,%xmm1
4508  DB  196,131,113,32,12,16,1              ; vpinsrb       $0x1,(%r8,%r10,1),%xmm1,%xmm1
4509  DB  67,15,182,28,8                      ; movzbl        (%r8,%r9,1),%ebx
4510  DB  196,227,113,32,203,2                ; vpinsrb       $0x2,%ebx,%xmm1,%xmm1
4511  DB  65,15,182,4,0                       ; movzbl        (%r8,%rax,1),%eax
4512  DB  196,227,113,32,200,3                ; vpinsrb       $0x3,%eax,%xmm1,%xmm1
4513  DB  196,226,121,49,201                  ; vpmovzxbd     %xmm1,%xmm1
4514  DB  196,227,125,24,193,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
4515  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
4516  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
4517  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
4518  DB  196,227,121,4,201,0                 ; vpermilps     $0x0,%xmm1,%xmm1
4519  DB  196,227,117,24,201,1                ; vinsertf128   $0x1,%xmm1,%ymm1,%ymm1
4520  DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
4521  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
4522  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
4523  DB  196,227,121,4,201,0                 ; vpermilps     $0x0,%xmm1,%xmm1
4524  DB  196,227,117,24,217,1                ; vinsertf128   $0x1,%xmm1,%ymm1,%ymm3
4525  DB  72,173                              ; lods          %ds:(%rsi),%rax
4526  DB  197,252,40,200                      ; vmovaps       %ymm0,%ymm1
4527  DB  197,252,40,208                      ; vmovaps       %ymm0,%ymm2
4528  DB  91                                  ; pop           %rbx
4529  DB  65,92                               ; pop           %r12
4530  DB  65,94                               ; pop           %r14
4531  DB  65,95                               ; pop           %r15
4532  DB  255,224                             ; jmpq          *%rax
4533
4534PUBLIC _sk_gather_i8_avx
4535_sk_gather_i8_avx LABEL PROC
4536  DB  72,173                              ; lods          %ds:(%rsi),%rax
4537  DB  73,137,192                          ; mov           %rax,%r8
4538  DB  77,133,192                          ; test          %r8,%r8
4539  DB  116,5                               ; je            1b06 <_sk_gather_i8_avx+0xf>
4540  DB  76,137,192                          ; mov           %r8,%rax
4541  DB  235,2                               ; jmp           1b08 <_sk_gather_i8_avx+0x11>
4542  DB  72,173                              ; lods          %ds:(%rsi),%rax
4543  DB  65,87                               ; push          %r15
4544  DB  65,86                               ; push          %r14
4545  DB  65,85                               ; push          %r13
4546  DB  65,84                               ; push          %r12
4547  DB  83                                  ; push          %rbx
4548  DB  76,139,8                            ; mov           (%rax),%r9
4549  DB  197,254,91,209                      ; vcvttps2dq    %ymm1,%ymm2
4550  DB  197,249,110,72,16                   ; vmovd         0x10(%rax),%xmm1
4551  DB  197,249,112,217,0                   ; vpshufd       $0x0,%xmm1,%xmm3
4552  DB  196,226,97,64,202                   ; vpmulld       %xmm2,%xmm3,%xmm1
4553  DB  196,227,125,25,210,1                ; vextractf128  $0x1,%ymm2,%xmm2
4554  DB  196,226,97,64,210                   ; vpmulld       %xmm2,%xmm3,%xmm2
4555  DB  197,254,91,192                      ; vcvttps2dq    %ymm0,%ymm0
4556  DB  196,227,125,25,195,1                ; vextractf128  $0x1,%ymm0,%xmm3
4557  DB  197,233,254,211                     ; vpaddd        %xmm3,%xmm2,%xmm2
4558  DB  196,227,249,22,208,1                ; vpextrq       $0x1,%xmm2,%rax
4559  DB  65,137,194                          ; mov           %eax,%r10d
4560  DB  72,193,232,32                       ; shr           $0x20,%rax
4561  DB  196,193,249,126,211                 ; vmovq         %xmm2,%r11
4562  DB  69,137,222                          ; mov           %r11d,%r14d
4563  DB  73,193,235,32                       ; shr           $0x20,%r11
4564  DB  197,241,254,192                     ; vpaddd        %xmm0,%xmm1,%xmm0
4565  DB  196,225,249,126,195                 ; vmovq         %xmm0,%rbx
4566  DB  65,137,223                          ; mov           %ebx,%r15d
4567  DB  196,195,249,22,196,1                ; vpextrq       $0x1,%xmm0,%r12
4568  DB  69,137,229                          ; mov           %r12d,%r13d
4569  DB  73,193,236,32                       ; shr           $0x20,%r12
4570  DB  72,193,235,32                       ; shr           $0x20,%rbx
4571  DB  196,131,121,32,4,49,0               ; vpinsrb       $0x0,(%r9,%r14,1),%xmm0,%xmm0
4572  DB  196,131,121,32,4,25,1               ; vpinsrb       $0x1,(%r9,%r11,1),%xmm0,%xmm0
4573  DB  196,131,121,32,4,17,2               ; vpinsrb       $0x2,(%r9,%r10,1),%xmm0,%xmm0
4574  DB  196,195,121,32,4,1,3                ; vpinsrb       $0x3,(%r9,%rax,1),%xmm0,%xmm0
4575  DB  196,226,121,49,192                  ; vpmovzxbd     %xmm0,%xmm0
4576  DB  196,195,249,22,194,1                ; vpextrq       $0x1,%xmm0,%r10
4577  DB  196,193,249,126,195                 ; vmovq         %xmm0,%r11
4578  DB  196,131,121,32,4,57,0               ; vpinsrb       $0x0,(%r9,%r15,1),%xmm0,%xmm0
4579  DB  196,195,121,32,4,25,1               ; vpinsrb       $0x1,(%r9,%rbx,1),%xmm0,%xmm0
4580  DB  196,131,121,32,4,41,2               ; vpinsrb       $0x2,(%r9,%r13,1),%xmm0,%xmm0
4581  DB  196,131,121,32,4,33,3               ; vpinsrb       $0x3,(%r9,%r12,1),%xmm0,%xmm0
4582  DB  196,226,121,49,192                  ; vpmovzxbd     %xmm0,%xmm0
4583  DB  73,139,88,8                         ; mov           0x8(%r8),%rbx
4584  DB  196,193,249,126,193                 ; vmovq         %xmm0,%r9
4585  DB  69,137,200                          ; mov           %r9d,%r8d
4586  DB  73,193,233,30                       ; shr           $0x1e,%r9
4587  DB  196,227,249,22,192,1                ; vpextrq       $0x1,%xmm0,%rax
4588  DB  65,137,198                          ; mov           %eax,%r14d
4589  DB  72,193,232,30                       ; shr           $0x1e,%rax
4590  DB  69,137,223                          ; mov           %r11d,%r15d
4591  DB  73,193,235,30                       ; shr           $0x1e,%r11
4592  DB  69,137,212                          ; mov           %r10d,%r12d
4593  DB  73,193,234,30                       ; shr           $0x1e,%r10
4594  DB  196,161,121,110,4,131               ; vmovd         (%rbx,%r8,4),%xmm0
4595  DB  196,163,121,34,4,11,1               ; vpinsrd       $0x1,(%rbx,%r9,1),%xmm0,%xmm0
4596  DB  196,163,121,34,4,179,2              ; vpinsrd       $0x2,(%rbx,%r14,4),%xmm0,%xmm0
4597  DB  196,99,121,34,4,3,3                 ; vpinsrd       $0x3,(%rbx,%rax,1),%xmm0,%xmm8
4598  DB  196,161,121,110,4,187               ; vmovd         (%rbx,%r15,4),%xmm0
4599  DB  196,163,121,34,4,27,1               ; vpinsrd       $0x1,(%rbx,%r11,1),%xmm0,%xmm0
4600  DB  196,163,121,34,4,163,2              ; vpinsrd       $0x2,(%rbx,%r12,4),%xmm0,%xmm0
4601  DB  196,163,121,34,28,19,3              ; vpinsrd       $0x3,(%rbx,%r10,1),%xmm0,%xmm3
4602  DB  196,227,61,24,195,1                 ; vinsertf128   $0x1,%xmm3,%ymm8,%ymm0
4603  DB  184,255,0,0,0                       ; mov           $0xff,%eax
4604  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
4605  DB  197,249,112,201,0                   ; vpshufd       $0x0,%xmm1,%xmm1
4606  DB  196,99,117,24,217,1                 ; vinsertf128   $0x1,%xmm1,%ymm1,%ymm11
4607  DB  197,164,84,192                      ; vandps        %ymm0,%ymm11,%ymm0
4608  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
4609  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
4610  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
4611  DB  196,227,121,4,201,0                 ; vpermilps     $0x0,%xmm1,%xmm1
4612  DB  196,99,117,24,201,1                 ; vinsertf128   $0x1,%xmm1,%ymm1,%ymm9
4613  DB  196,193,124,89,193                  ; vmulps        %ymm9,%ymm0,%ymm0
4614  DB  196,193,41,114,208,8                ; vpsrld        $0x8,%xmm8,%xmm10
4615  DB  197,241,114,211,8                   ; vpsrld        $0x8,%xmm3,%xmm1
4616  DB  196,227,45,24,201,1                 ; vinsertf128   $0x1,%xmm1,%ymm10,%ymm1
4617  DB  197,164,84,201                      ; vandps        %ymm1,%ymm11,%ymm1
4618  DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
4619  DB  196,193,116,89,201                  ; vmulps        %ymm9,%ymm1,%ymm1
4620  DB  196,193,41,114,208,16               ; vpsrld        $0x10,%xmm8,%xmm10
4621  DB  197,233,114,211,16                  ; vpsrld        $0x10,%xmm3,%xmm2
4622  DB  196,227,45,24,210,1                 ; vinsertf128   $0x1,%xmm2,%ymm10,%ymm2
4623  DB  197,164,84,210                      ; vandps        %ymm2,%ymm11,%ymm2
4624  DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
4625  DB  196,193,108,89,209                  ; vmulps        %ymm9,%ymm2,%ymm2
4626  DB  196,193,57,114,208,24               ; vpsrld        $0x18,%xmm8,%xmm8
4627  DB  197,225,114,211,24                  ; vpsrld        $0x18,%xmm3,%xmm3
4628  DB  196,227,61,24,219,1                 ; vinsertf128   $0x1,%xmm3,%ymm8,%ymm3
4629  DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
4630  DB  196,193,100,89,217                  ; vmulps        %ymm9,%ymm3,%ymm3
4631  DB  72,173                              ; lods          %ds:(%rsi),%rax
4632  DB  91                                  ; pop           %rbx
4633  DB  65,92                               ; pop           %r12
4634  DB  65,93                               ; pop           %r13
4635  DB  65,94                               ; pop           %r14
4636  DB  65,95                               ; pop           %r15
4637  DB  255,224                             ; jmpq          *%rax
4638
4639PUBLIC _sk_load_565_avx
4640_sk_load_565_avx LABEL PROC
4641  DB  72,173                              ; lods          %ds:(%rsi),%rax
4642  DB  76,139,16                           ; mov           (%rax),%r10
4643  DB  72,133,201                          ; test          %rcx,%rcx
4644  DB  15,133,209,0,0,0                    ; jne           1da2 <_sk_load_565_avx+0xdf>
4645  DB  196,193,122,111,4,122               ; vmovdqu       (%r10,%rdi,2),%xmm0
4646  DB  197,241,239,201                     ; vpxor         %xmm1,%xmm1,%xmm1
4647  DB  197,249,105,201                     ; vpunpckhwd    %xmm1,%xmm0,%xmm1
4648  DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
4649  DB  196,227,125,24,209,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm2
4650  DB  184,0,248,0,0                       ; mov           $0xf800,%eax
4651  DB  197,249,110,192                     ; vmovd         %eax,%xmm0
4652  DB  197,249,112,192,0                   ; vpshufd       $0x0,%xmm0,%xmm0
4653  DB  196,227,125,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
4654  DB  197,252,84,194                      ; vandps        %ymm2,%ymm0,%ymm0
4655  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
4656  DB  184,8,33,132,55                     ; mov           $0x37842108,%eax
4657  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
4658  DB  196,227,121,4,201,0                 ; vpermilps     $0x0,%xmm1,%xmm1
4659  DB  196,227,117,24,201,1                ; vinsertf128   $0x1,%xmm1,%ymm1,%ymm1
4660  DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
4661  DB  184,224,7,0,0                       ; mov           $0x7e0,%eax
4662  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
4663  DB  197,249,112,201,0                   ; vpshufd       $0x0,%xmm1,%xmm1
4664  DB  196,227,117,24,201,1                ; vinsertf128   $0x1,%xmm1,%ymm1,%ymm1
4665  DB  197,244,84,202                      ; vandps        %ymm2,%ymm1,%ymm1
4666  DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
4667  DB  184,33,8,2,58                       ; mov           $0x3a020821,%eax
4668  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
4669  DB  196,227,121,4,219,0                 ; vpermilps     $0x0,%xmm3,%xmm3
4670  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
4671  DB  197,244,89,203                      ; vmulps        %ymm3,%ymm1,%ymm1
4672  DB  184,31,0,0,0                        ; mov           $0x1f,%eax
4673  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
4674  DB  197,249,112,219,0                   ; vpshufd       $0x0,%xmm3,%xmm3
4675  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
4676  DB  197,228,84,210                      ; vandps        %ymm2,%ymm3,%ymm2
4677  DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
4678  DB  184,8,33,4,61                       ; mov           $0x3d042108,%eax
4679  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
4680  DB  196,227,121,4,219,0                 ; vpermilps     $0x0,%xmm3,%xmm3
4681  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
4682  DB  197,236,89,211                      ; vmulps        %ymm3,%ymm2,%ymm2
4683  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
4684  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
4685  DB  196,227,121,4,219,0                 ; vpermilps     $0x0,%xmm3,%xmm3
4686  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
4687  DB  72,173                              ; lods          %ds:(%rsi),%rax
4688  DB  255,224                             ; jmpq          *%rax
4689  DB  65,137,200                          ; mov           %ecx,%r8d
4690  DB  65,128,224,7                        ; and           $0x7,%r8b
4691  DB  197,249,239,192                     ; vpxor         %xmm0,%xmm0,%xmm0
4692  DB  65,254,200                          ; dec           %r8b
4693  DB  65,128,248,6                        ; cmp           $0x6,%r8b
4694  DB  15,135,29,255,255,255               ; ja            1cd7 <_sk_load_565_avx+0x14>
4695  DB  69,15,182,192                       ; movzbl        %r8b,%r8d
4696  DB  76,141,13,75,0,0,0                  ; lea           0x4b(%rip),%r9        # 1e10 <_sk_load_565_avx+0x14d>
4697  DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
4698  DB  76,1,200                            ; add           %r9,%rax
4699  DB  255,224                             ; jmpq          *%rax
4700  DB  197,249,239,192                     ; vpxor         %xmm0,%xmm0,%xmm0
4701  DB  196,193,121,196,68,122,12,6         ; vpinsrw       $0x6,0xc(%r10,%rdi,2),%xmm0,%xmm0
4702  DB  196,193,121,196,68,122,10,5         ; vpinsrw       $0x5,0xa(%r10,%rdi,2),%xmm0,%xmm0
4703  DB  196,193,121,196,68,122,8,4          ; vpinsrw       $0x4,0x8(%r10,%rdi,2),%xmm0,%xmm0
4704  DB  196,193,121,196,68,122,6,3          ; vpinsrw       $0x3,0x6(%r10,%rdi,2),%xmm0,%xmm0
4705  DB  196,193,121,196,68,122,4,2          ; vpinsrw       $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
4706  DB  196,193,121,196,68,122,2,1          ; vpinsrw       $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
4707  DB  196,193,121,196,4,122,0             ; vpinsrw       $0x0,(%r10,%rdi,2),%xmm0,%xmm0
4708  DB  233,201,254,255,255                 ; jmpq          1cd7 <_sk_load_565_avx+0x14>
4709  DB  102,144                             ; xchg          %ax,%ax
4710  DB  242,255                             ; repnz         (bad)
4711  DB  255                                 ; (bad)
4712  DB  255                                 ; (bad)
4713  DB  234                                 ; (bad)
4714  DB  255                                 ; (bad)
4715  DB  255                                 ; (bad)
4716  DB  255,226                             ; jmpq          *%rdx
4717  DB  255                                 ; (bad)
4718  DB  255                                 ; (bad)
4719  DB  255                                 ; (bad)
4720  DB  218,255                             ; (bad)
4721  DB  255                                 ; (bad)
4722  DB  255,210                             ; callq         *%rdx
4723  DB  255                                 ; (bad)
4724  DB  255                                 ; (bad)
4725  DB  255,202                             ; dec           %edx
4726  DB  255                                 ; (bad)
4727  DB  255                                 ; (bad)
4728  DB  255                                 ; (bad)
4729  DB  190                                 ; .byte         0xbe
4730  DB  255                                 ; (bad)
4731  DB  255                                 ; (bad)
4732  DB  255                                 ; .byte         0xff
4733
4734PUBLIC _sk_gather_565_avx
4735_sk_gather_565_avx LABEL PROC
4736  DB  85                                  ; push          %rbp
4737  DB  65,87                               ; push          %r15
4738  DB  65,86                               ; push          %r14
4739  DB  65,84                               ; push          %r12
4740  DB  83                                  ; push          %rbx
4741  DB  72,173                              ; lods          %ds:(%rsi),%rax
4742  DB  76,139,0                            ; mov           (%rax),%r8
4743  DB  197,254,91,209                      ; vcvttps2dq    %ymm1,%ymm2
4744  DB  197,249,110,72,16                   ; vmovd         0x10(%rax),%xmm1
4745  DB  197,249,112,217,0                   ; vpshufd       $0x0,%xmm1,%xmm3
4746  DB  196,226,97,64,202                   ; vpmulld       %xmm2,%xmm3,%xmm1
4747  DB  196,227,125,25,210,1                ; vextractf128  $0x1,%ymm2,%xmm2
4748  DB  196,226,97,64,210                   ; vpmulld       %xmm2,%xmm3,%xmm2
4749  DB  197,254,91,192                      ; vcvttps2dq    %ymm0,%ymm0
4750  DB  196,227,125,25,195,1                ; vextractf128  $0x1,%ymm0,%xmm3
4751  DB  197,233,254,211                     ; vpaddd        %xmm3,%xmm2,%xmm2
4752  DB  196,227,249,22,208,1                ; vpextrq       $0x1,%xmm2,%rax
4753  DB  65,137,193                          ; mov           %eax,%r9d
4754  DB  72,193,232,32                       ; shr           $0x20,%rax
4755  DB  196,193,249,126,210                 ; vmovq         %xmm2,%r10
4756  DB  69,137,211                          ; mov           %r10d,%r11d
4757  DB  73,193,234,32                       ; shr           $0x20,%r10
4758  DB  197,241,254,192                     ; vpaddd        %xmm0,%xmm1,%xmm0
4759  DB  196,225,249,126,195                 ; vmovq         %xmm0,%rbx
4760  DB  65,137,222                          ; mov           %ebx,%r14d
4761  DB  196,195,249,22,199,1                ; vpextrq       $0x1,%xmm0,%r15
4762  DB  69,137,252                          ; mov           %r15d,%r12d
4763  DB  73,193,239,32                       ; shr           $0x20,%r15
4764  DB  72,193,235,32                       ; shr           $0x20,%rbx
4765  DB  65,15,183,28,88                     ; movzwl        (%r8,%rbx,2),%ebx
4766  DB  67,15,183,44,112                    ; movzwl        (%r8,%r14,2),%ebp
4767  DB  197,249,110,197                     ; vmovd         %ebp,%xmm0
4768  DB  197,249,196,195,1                   ; vpinsrw       $0x1,%ebx,%xmm0,%xmm0
4769  DB  67,15,183,28,96                     ; movzwl        (%r8,%r12,2),%ebx
4770  DB  197,249,196,195,2                   ; vpinsrw       $0x2,%ebx,%xmm0,%xmm0
4771  DB  67,15,183,28,120                    ; movzwl        (%r8,%r15,2),%ebx
4772  DB  197,249,196,195,3                   ; vpinsrw       $0x3,%ebx,%xmm0,%xmm0
4773  DB  67,15,183,44,88                     ; movzwl        (%r8,%r11,2),%ebp
4774  DB  197,249,196,197,4                   ; vpinsrw       $0x4,%ebp,%xmm0,%xmm0
4775  DB  67,15,183,44,80                     ; movzwl        (%r8,%r10,2),%ebp
4776  DB  197,249,196,197,5                   ; vpinsrw       $0x5,%ebp,%xmm0,%xmm0
4777  DB  67,15,183,44,72                     ; movzwl        (%r8,%r9,2),%ebp
4778  DB  197,249,196,197,6                   ; vpinsrw       $0x6,%ebp,%xmm0,%xmm0
4779  DB  65,15,183,4,64                      ; movzwl        (%r8,%rax,2),%eax
4780  DB  197,249,196,192,7                   ; vpinsrw       $0x7,%eax,%xmm0,%xmm0
4781  DB  197,241,239,201                     ; vpxor         %xmm1,%xmm1,%xmm1
4782  DB  197,249,105,201                     ; vpunpckhwd    %xmm1,%xmm0,%xmm1
4783  DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
4784  DB  196,227,125,24,209,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm2
4785  DB  184,0,248,0,0                       ; mov           $0xf800,%eax
4786  DB  197,249,110,192                     ; vmovd         %eax,%xmm0
4787  DB  197,249,112,192,0                   ; vpshufd       $0x0,%xmm0,%xmm0
4788  DB  196,227,125,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
4789  DB  197,252,84,194                      ; vandps        %ymm2,%ymm0,%ymm0
4790  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
4791  DB  184,8,33,132,55                     ; mov           $0x37842108,%eax
4792  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
4793  DB  196,227,121,4,201,0                 ; vpermilps     $0x0,%xmm1,%xmm1
4794  DB  196,227,117,24,201,1                ; vinsertf128   $0x1,%xmm1,%ymm1,%ymm1
4795  DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
4796  DB  184,224,7,0,0                       ; mov           $0x7e0,%eax
4797  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
4798  DB  197,249,112,201,0                   ; vpshufd       $0x0,%xmm1,%xmm1
4799  DB  196,227,117,24,201,1                ; vinsertf128   $0x1,%xmm1,%ymm1,%ymm1
4800  DB  197,244,84,202                      ; vandps        %ymm2,%ymm1,%ymm1
4801  DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
4802  DB  184,33,8,2,58                       ; mov           $0x3a020821,%eax
4803  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
4804  DB  196,227,121,4,219,0                 ; vpermilps     $0x0,%xmm3,%xmm3
4805  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
4806  DB  197,244,89,203                      ; vmulps        %ymm3,%ymm1,%ymm1
4807  DB  184,31,0,0,0                        ; mov           $0x1f,%eax
4808  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
4809  DB  197,249,112,219,0                   ; vpshufd       $0x0,%xmm3,%xmm3
4810  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
4811  DB  197,228,84,210                      ; vandps        %ymm2,%ymm3,%ymm2
4812  DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
4813  DB  184,8,33,4,61                       ; mov           $0x3d042108,%eax
4814  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
4815  DB  196,227,121,4,219,0                 ; vpermilps     $0x0,%xmm3,%xmm3
4816  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
4817  DB  197,236,89,211                      ; vmulps        %ymm3,%ymm2,%ymm2
4818  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
4819  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
4820  DB  196,227,121,4,219,0                 ; vpermilps     $0x0,%xmm3,%xmm3
4821  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
4822  DB  72,173                              ; lods          %ds:(%rsi),%rax
4823  DB  91                                  ; pop           %rbx
4824  DB  65,92                               ; pop           %r12
4825  DB  65,94                               ; pop           %r14
4826  DB  65,95                               ; pop           %r15
4827  DB  93                                  ; pop           %rbp
4828  DB  255,224                             ; jmpq          *%rax
4829
4830PUBLIC _sk_store_565_avx
4831_sk_store_565_avx LABEL PROC
4832  DB  72,173                              ; lods          %ds:(%rsi),%rax
4833  DB  76,139,8                            ; mov           (%rax),%r9
4834  DB  184,0,0,248,65                      ; mov           $0x41f80000,%eax
4835  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
4836  DB  196,67,121,4,192,0                  ; vpermilps     $0x0,%xmm8,%xmm8
4837  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
4838  DB  197,60,89,200                       ; vmulps        %ymm0,%ymm8,%ymm9
4839  DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
4840  DB  196,193,41,114,241,11               ; vpslld        $0xb,%xmm9,%xmm10
4841  DB  196,67,125,25,201,1                 ; vextractf128  $0x1,%ymm9,%xmm9
4842  DB  196,193,49,114,241,11               ; vpslld        $0xb,%xmm9,%xmm9
4843  DB  196,67,45,24,201,1                  ; vinsertf128   $0x1,%xmm9,%ymm10,%ymm9
4844  DB  184,0,0,124,66                      ; mov           $0x427c0000,%eax
4845  DB  197,121,110,208                     ; vmovd         %eax,%xmm10
4846  DB  196,67,121,4,210,0                  ; vpermilps     $0x0,%xmm10,%xmm10
4847  DB  196,67,45,24,210,1                  ; vinsertf128   $0x1,%xmm10,%ymm10,%ymm10
4848  DB  197,44,89,209                       ; vmulps        %ymm1,%ymm10,%ymm10
4849  DB  196,65,125,91,210                   ; vcvtps2dq     %ymm10,%ymm10
4850  DB  196,193,33,114,242,5                ; vpslld        $0x5,%xmm10,%xmm11
4851  DB  196,67,125,25,210,1                 ; vextractf128  $0x1,%ymm10,%xmm10
4852  DB  196,193,41,114,242,5                ; vpslld        $0x5,%xmm10,%xmm10
4853  DB  196,67,37,24,210,1                  ; vinsertf128   $0x1,%xmm10,%ymm11,%ymm10
4854  DB  196,65,45,86,201                    ; vorpd         %ymm9,%ymm10,%ymm9
4855  DB  197,60,89,194                       ; vmulps        %ymm2,%ymm8,%ymm8
4856  DB  196,65,125,91,192                   ; vcvtps2dq     %ymm8,%ymm8
4857  DB  196,65,53,86,192                    ; vorpd         %ymm8,%ymm9,%ymm8
4858  DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
4859  DB  196,66,57,43,193                    ; vpackusdw     %xmm9,%xmm8,%xmm8
4860  DB  72,133,201                          ; test          %rcx,%rcx
4861  DB  117,10                              ; jne           205b <_sk_store_565_avx+0x9e>
4862  DB  196,65,122,127,4,121                ; vmovdqu       %xmm8,(%r9,%rdi,2)
4863  DB  72,173                              ; lods          %ds:(%rsi),%rax
4864  DB  255,224                             ; jmpq          *%rax
4865  DB  65,137,200                          ; mov           %ecx,%r8d
4866  DB  65,128,224,7                        ; and           $0x7,%r8b
4867  DB  65,254,200                          ; dec           %r8b
4868  DB  65,128,248,6                        ; cmp           $0x6,%r8b
4869  DB  119,236                             ; ja            2057 <_sk_store_565_avx+0x9a>
4870  DB  65,15,182,192                       ; movzbl        %r8b,%eax
4871  DB  76,141,5,66,0,0,0                   ; lea           0x42(%rip),%r8        # 20b8 <_sk_store_565_avx+0xfb>
4872  DB  73,99,4,128                         ; movslq        (%r8,%rax,4),%rax
4873  DB  76,1,192                            ; add           %r8,%rax
4874  DB  255,224                             ; jmpq          *%rax
4875  DB  196,67,121,21,68,121,12,6           ; vpextrw       $0x6,%xmm8,0xc(%r9,%rdi,2)
4876  DB  196,67,121,21,68,121,10,5           ; vpextrw       $0x5,%xmm8,0xa(%r9,%rdi,2)
4877  DB  196,67,121,21,68,121,8,4            ; vpextrw       $0x4,%xmm8,0x8(%r9,%rdi,2)
4878  DB  196,67,121,21,68,121,6,3            ; vpextrw       $0x3,%xmm8,0x6(%r9,%rdi,2)
4879  DB  196,67,121,21,68,121,4,2            ; vpextrw       $0x2,%xmm8,0x4(%r9,%rdi,2)
4880  DB  196,67,121,21,68,121,2,1            ; vpextrw       $0x1,%xmm8,0x2(%r9,%rdi,2)
4881  DB  196,67,121,21,4,121,0               ; vpextrw       $0x0,%xmm8,(%r9,%rdi,2)
4882  DB  235,159                             ; jmp           2057 <_sk_store_565_avx+0x9a>
4883  DB  247,255                             ; idiv          %edi
4884  DB  255                                 ; (bad)
4885  DB  255                                 ; (bad)
4886  DB  239                                 ; out           %eax,(%dx)
4887  DB  255                                 ; (bad)
4888  DB  255                                 ; (bad)
4889  DB  255,231                             ; jmpq          *%rdi
4890  DB  255                                 ; (bad)
4891  DB  255                                 ; (bad)
4892  DB  255                                 ; (bad)
4893  DB  223,255                             ; (bad)
4894  DB  255                                 ; (bad)
4895  DB  255,215                             ; callq         *%rdi
4896  DB  255                                 ; (bad)
4897  DB  255                                 ; (bad)
4898  DB  255,207                             ; dec           %edi
4899  DB  255                                 ; (bad)
4900  DB  255                                 ; (bad)
4901  DB  255,199                             ; inc           %edi
4902  DB  255                                 ; (bad)
4903  DB  255                                 ; (bad)
4904  DB  255                                 ; .byte         0xff
4905
4906PUBLIC _sk_load_4444_avx
4907_sk_load_4444_avx LABEL PROC
4908  DB  72,173                              ; lods          %ds:(%rsi),%rax
4909  DB  76,139,16                           ; mov           (%rax),%r10
4910  DB  72,133,201                          ; test          %rcx,%rcx
4911  DB  15,133,245,0,0,0                    ; jne           21d7 <_sk_load_4444_avx+0x103>
4912  DB  196,193,122,111,4,122               ; vmovdqu       (%r10,%rdi,2),%xmm0
4913  DB  197,241,239,201                     ; vpxor         %xmm1,%xmm1,%xmm1
4914  DB  197,249,105,201                     ; vpunpckhwd    %xmm1,%xmm0,%xmm1
4915  DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
4916  DB  196,99,125,24,201,1                 ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm9
4917  DB  184,0,240,0,0                       ; mov           $0xf000,%eax
4918  DB  197,249,110,192                     ; vmovd         %eax,%xmm0
4919  DB  197,249,112,192,0                   ; vpshufd       $0x0,%xmm0,%xmm0
4920  DB  196,227,125,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
4921  DB  196,193,124,84,193                  ; vandps        %ymm9,%ymm0,%ymm0
4922  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
4923  DB  184,137,136,136,55                  ; mov           $0x37888889,%eax
4924  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
4925  DB  196,227,121,4,201,0                 ; vpermilps     $0x0,%xmm1,%xmm1
4926  DB  196,227,117,24,201,1                ; vinsertf128   $0x1,%xmm1,%ymm1,%ymm1
4927  DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
4928  DB  184,0,15,0,0                        ; mov           $0xf00,%eax
4929  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
4930  DB  197,249,112,201,0                   ; vpshufd       $0x0,%xmm1,%xmm1
4931  DB  196,227,117,24,201,1                ; vinsertf128   $0x1,%xmm1,%ymm1,%ymm1
4932  DB  196,193,116,84,201                  ; vandps        %ymm9,%ymm1,%ymm1
4933  DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
4934  DB  184,137,136,136,57                  ; mov           $0x39888889,%eax
4935  DB  197,249,110,208                     ; vmovd         %eax,%xmm2
4936  DB  196,227,121,4,210,0                 ; vpermilps     $0x0,%xmm2,%xmm2
4937  DB  196,227,109,24,210,1                ; vinsertf128   $0x1,%xmm2,%ymm2,%ymm2
4938  DB  197,244,89,202                      ; vmulps        %ymm2,%ymm1,%ymm1
4939  DB  184,240,0,0,0                       ; mov           $0xf0,%eax
4940  DB  197,249,110,208                     ; vmovd         %eax,%xmm2
4941  DB  197,249,112,210,0                   ; vpshufd       $0x0,%xmm2,%xmm2
4942  DB  196,227,109,24,210,1                ; vinsertf128   $0x1,%xmm2,%ymm2,%ymm2
4943  DB  196,193,108,84,209                  ; vandps        %ymm9,%ymm2,%ymm2
4944  DB  197,124,91,194                      ; vcvtdq2ps     %ymm2,%ymm8
4945  DB  184,137,136,136,59                  ; mov           $0x3b888889,%eax
4946  DB  197,249,110,208                     ; vmovd         %eax,%xmm2
4947  DB  196,227,121,4,210,0                 ; vpermilps     $0x0,%xmm2,%xmm2
4948  DB  196,227,109,24,210,1                ; vinsertf128   $0x1,%xmm2,%ymm2,%ymm2
4949  DB  197,188,89,210                      ; vmulps        %ymm2,%ymm8,%ymm2
4950  DB  184,15,0,0,0                        ; mov           $0xf,%eax
4951  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
4952  DB  197,249,112,219,0                   ; vpshufd       $0x0,%xmm3,%xmm3
4953  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
4954  DB  196,193,100,84,217                  ; vandps        %ymm9,%ymm3,%ymm3
4955  DB  197,124,91,195                      ; vcvtdq2ps     %ymm3,%ymm8
4956  DB  184,137,136,136,61                  ; mov           $0x3d888889,%eax
4957  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
4958  DB  196,227,121,4,219,0                 ; vpermilps     $0x0,%xmm3,%xmm3
4959  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
4960  DB  197,188,89,219                      ; vmulps        %ymm3,%ymm8,%ymm3
4961  DB  72,173                              ; lods          %ds:(%rsi),%rax
4962  DB  255,224                             ; jmpq          *%rax
4963  DB  65,137,200                          ; mov           %ecx,%r8d
4964  DB  65,128,224,7                        ; and           $0x7,%r8b
4965  DB  197,249,239,192                     ; vpxor         %xmm0,%xmm0,%xmm0
4966  DB  65,254,200                          ; dec           %r8b
4967  DB  65,128,248,6                        ; cmp           $0x6,%r8b
4968  DB  15,135,249,254,255,255              ; ja            20e8 <_sk_load_4444_avx+0x14>
4969  DB  69,15,182,192                       ; movzbl        %r8b,%r8d
4970  DB  76,141,13,74,0,0,0                  ; lea           0x4a(%rip),%r9        # 2244 <_sk_load_4444_avx+0x170>
4971  DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
4972  DB  76,1,200                            ; add           %r9,%rax
4973  DB  255,224                             ; jmpq          *%rax
4974  DB  197,249,239,192                     ; vpxor         %xmm0,%xmm0,%xmm0
4975  DB  196,193,121,196,68,122,12,6         ; vpinsrw       $0x6,0xc(%r10,%rdi,2),%xmm0,%xmm0
4976  DB  196,193,121,196,68,122,10,5         ; vpinsrw       $0x5,0xa(%r10,%rdi,2),%xmm0,%xmm0
4977  DB  196,193,121,196,68,122,8,4          ; vpinsrw       $0x4,0x8(%r10,%rdi,2),%xmm0,%xmm0
4978  DB  196,193,121,196,68,122,6,3          ; vpinsrw       $0x3,0x6(%r10,%rdi,2),%xmm0,%xmm0
4979  DB  196,193,121,196,68,122,4,2          ; vpinsrw       $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
4980  DB  196,193,121,196,68,122,2,1          ; vpinsrw       $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
4981  DB  196,193,121,196,4,122,0             ; vpinsrw       $0x0,(%r10,%rdi,2),%xmm0,%xmm0
4982  DB  233,165,254,255,255                 ; jmpq          20e8 <_sk_load_4444_avx+0x14>
4983  DB  144                                 ; nop
4984  DB  243,255                             ; repz          (bad)
4985  DB  255                                 ; (bad)
4986  DB  255                                 ; (bad)
4987  DB  235,255                             ; jmp           2249 <_sk_load_4444_avx+0x175>
4988  DB  255                                 ; (bad)
4989  DB  255,227                             ; jmpq          *%rbx
4990  DB  255                                 ; (bad)
4991  DB  255                                 ; (bad)
4992  DB  255                                 ; (bad)
4993  DB  219,255                             ; (bad)
4994  DB  255                                 ; (bad)
4995  DB  255,211                             ; callq         *%rbx
4996  DB  255                                 ; (bad)
4997  DB  255                                 ; (bad)
4998  DB  255,203                             ; dec           %ebx
4999  DB  255                                 ; (bad)
5000  DB  255                                 ; (bad)
5001  DB  255                                 ; (bad)
5002  DB  191                                 ; .byte         0xbf
5003  DB  255                                 ; (bad)
5004  DB  255                                 ; (bad)
5005  DB  255                                 ; .byte         0xff
5006
5007PUBLIC _sk_gather_4444_avx
5008_sk_gather_4444_avx LABEL PROC
5009  DB  85                                  ; push          %rbp
5010  DB  65,87                               ; push          %r15
5011  DB  65,86                               ; push          %r14
5012  DB  65,84                               ; push          %r12
5013  DB  83                                  ; push          %rbx
5014  DB  72,173                              ; lods          %ds:(%rsi),%rax
5015  DB  76,139,0                            ; mov           (%rax),%r8
5016  DB  197,254,91,209                      ; vcvttps2dq    %ymm1,%ymm2
5017  DB  197,249,110,72,16                   ; vmovd         0x10(%rax),%xmm1
5018  DB  197,249,112,217,0                   ; vpshufd       $0x0,%xmm1,%xmm3
5019  DB  196,226,97,64,202                   ; vpmulld       %xmm2,%xmm3,%xmm1
5020  DB  196,227,125,25,210,1                ; vextractf128  $0x1,%ymm2,%xmm2
5021  DB  196,226,97,64,210                   ; vpmulld       %xmm2,%xmm3,%xmm2
5022  DB  197,254,91,192                      ; vcvttps2dq    %ymm0,%ymm0
5023  DB  196,227,125,25,195,1                ; vextractf128  $0x1,%ymm0,%xmm3
5024  DB  197,233,254,211                     ; vpaddd        %xmm3,%xmm2,%xmm2
5025  DB  196,227,249,22,208,1                ; vpextrq       $0x1,%xmm2,%rax
5026  DB  65,137,193                          ; mov           %eax,%r9d
5027  DB  72,193,232,32                       ; shr           $0x20,%rax
5028  DB  196,193,249,126,210                 ; vmovq         %xmm2,%r10
5029  DB  69,137,211                          ; mov           %r10d,%r11d
5030  DB  73,193,234,32                       ; shr           $0x20,%r10
5031  DB  197,241,254,192                     ; vpaddd        %xmm0,%xmm1,%xmm0
5032  DB  196,225,249,126,195                 ; vmovq         %xmm0,%rbx
5033  DB  65,137,222                          ; mov           %ebx,%r14d
5034  DB  196,195,249,22,199,1                ; vpextrq       $0x1,%xmm0,%r15
5035  DB  69,137,252                          ; mov           %r15d,%r12d
5036  DB  73,193,239,32                       ; shr           $0x20,%r15
5037  DB  72,193,235,32                       ; shr           $0x20,%rbx
5038  DB  65,15,183,28,88                     ; movzwl        (%r8,%rbx,2),%ebx
5039  DB  67,15,183,44,112                    ; movzwl        (%r8,%r14,2),%ebp
5040  DB  197,249,110,197                     ; vmovd         %ebp,%xmm0
5041  DB  197,249,196,195,1                   ; vpinsrw       $0x1,%ebx,%xmm0,%xmm0
5042  DB  67,15,183,28,96                     ; movzwl        (%r8,%r12,2),%ebx
5043  DB  197,249,196,195,2                   ; vpinsrw       $0x2,%ebx,%xmm0,%xmm0
5044  DB  67,15,183,28,120                    ; movzwl        (%r8,%r15,2),%ebx
5045  DB  197,249,196,195,3                   ; vpinsrw       $0x3,%ebx,%xmm0,%xmm0
5046  DB  67,15,183,44,88                     ; movzwl        (%r8,%r11,2),%ebp
5047  DB  197,249,196,197,4                   ; vpinsrw       $0x4,%ebp,%xmm0,%xmm0
5048  DB  67,15,183,44,80                     ; movzwl        (%r8,%r10,2),%ebp
5049  DB  197,249,196,197,5                   ; vpinsrw       $0x5,%ebp,%xmm0,%xmm0
5050  DB  67,15,183,44,72                     ; movzwl        (%r8,%r9,2),%ebp
5051  DB  197,249,196,197,6                   ; vpinsrw       $0x6,%ebp,%xmm0,%xmm0
5052  DB  65,15,183,4,64                      ; movzwl        (%r8,%rax,2),%eax
5053  DB  197,249,196,192,7                   ; vpinsrw       $0x7,%eax,%xmm0,%xmm0
5054  DB  197,241,239,201                     ; vpxor         %xmm1,%xmm1,%xmm1
5055  DB  197,249,105,201                     ; vpunpckhwd    %xmm1,%xmm0,%xmm1
5056  DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
5057  DB  196,99,125,24,201,1                 ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm9
5058  DB  184,0,240,0,0                       ; mov           $0xf000,%eax
5059  DB  197,249,110,192                     ; vmovd         %eax,%xmm0
5060  DB  197,249,112,192,0                   ; vpshufd       $0x0,%xmm0,%xmm0
5061  DB  196,227,125,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
5062  DB  196,193,124,84,193                  ; vandps        %ymm9,%ymm0,%ymm0
5063  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
5064  DB  184,137,136,136,55                  ; mov           $0x37888889,%eax
5065  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
5066  DB  196,227,121,4,201,0                 ; vpermilps     $0x0,%xmm1,%xmm1
5067  DB  196,227,117,24,201,1                ; vinsertf128   $0x1,%xmm1,%ymm1,%ymm1
5068  DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
5069  DB  184,0,15,0,0                        ; mov           $0xf00,%eax
5070  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
5071  DB  197,249,112,201,0                   ; vpshufd       $0x0,%xmm1,%xmm1
5072  DB  196,227,117,24,201,1                ; vinsertf128   $0x1,%xmm1,%ymm1,%ymm1
5073  DB  196,193,116,84,201                  ; vandps        %ymm9,%ymm1,%ymm1
5074  DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
5075  DB  184,137,136,136,57                  ; mov           $0x39888889,%eax
5076  DB  197,249,110,208                     ; vmovd         %eax,%xmm2
5077  DB  196,227,121,4,210,0                 ; vpermilps     $0x0,%xmm2,%xmm2
5078  DB  196,227,109,24,210,1                ; vinsertf128   $0x1,%xmm2,%ymm2,%ymm2
5079  DB  197,244,89,202                      ; vmulps        %ymm2,%ymm1,%ymm1
5080  DB  184,240,0,0,0                       ; mov           $0xf0,%eax
5081  DB  197,249,110,208                     ; vmovd         %eax,%xmm2
5082  DB  197,249,112,210,0                   ; vpshufd       $0x0,%xmm2,%xmm2
5083  DB  196,227,109,24,210,1                ; vinsertf128   $0x1,%xmm2,%ymm2,%ymm2
5084  DB  196,193,108,84,209                  ; vandps        %ymm9,%ymm2,%ymm2
5085  DB  197,124,91,194                      ; vcvtdq2ps     %ymm2,%ymm8
5086  DB  184,137,136,136,59                  ; mov           $0x3b888889,%eax
5087  DB  197,249,110,208                     ; vmovd         %eax,%xmm2
5088  DB  196,227,121,4,210,0                 ; vpermilps     $0x0,%xmm2,%xmm2
5089  DB  196,227,109,24,210,1                ; vinsertf128   $0x1,%xmm2,%ymm2,%ymm2
5090  DB  197,188,89,210                      ; vmulps        %ymm2,%ymm8,%ymm2
5091  DB  184,15,0,0,0                        ; mov           $0xf,%eax
5092  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
5093  DB  197,249,112,219,0                   ; vpshufd       $0x0,%xmm3,%xmm3
5094  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
5095  DB  196,193,100,84,217                  ; vandps        %ymm9,%ymm3,%ymm3
5096  DB  197,124,91,195                      ; vcvtdq2ps     %ymm3,%ymm8
5097  DB  184,137,136,136,61                  ; mov           $0x3d888889,%eax
5098  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
5099  DB  196,227,121,4,219,0                 ; vpermilps     $0x0,%xmm3,%xmm3
5100  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
5101  DB  197,188,89,219                      ; vmulps        %ymm3,%ymm8,%ymm3
5102  DB  72,173                              ; lods          %ds:(%rsi),%rax
5103  DB  91                                  ; pop           %rbx
5104  DB  65,92                               ; pop           %r12
5105  DB  65,94                               ; pop           %r14
5106  DB  65,95                               ; pop           %r15
5107  DB  93                                  ; pop           %rbp
5108  DB  255,224                             ; jmpq          *%rax
5109
5110PUBLIC _sk_store_4444_avx
5111_sk_store_4444_avx LABEL PROC
5112  DB  72,173                              ; lods          %ds:(%rsi),%rax
5113  DB  76,139,8                            ; mov           (%rax),%r9
5114  DB  184,0,0,112,65                      ; mov           $0x41700000,%eax
5115  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
5116  DB  196,67,121,4,192,0                  ; vpermilps     $0x0,%xmm8,%xmm8
5117  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
5118  DB  197,60,89,200                       ; vmulps        %ymm0,%ymm8,%ymm9
5119  DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
5120  DB  196,193,41,114,241,12               ; vpslld        $0xc,%xmm9,%xmm10
5121  DB  196,67,125,25,201,1                 ; vextractf128  $0x1,%ymm9,%xmm9
5122  DB  196,193,49,114,241,12               ; vpslld        $0xc,%xmm9,%xmm9
5123  DB  196,67,45,24,201,1                  ; vinsertf128   $0x1,%xmm9,%ymm10,%ymm9
5124  DB  197,60,89,209                       ; vmulps        %ymm1,%ymm8,%ymm10
5125  DB  196,65,125,91,210                   ; vcvtps2dq     %ymm10,%ymm10
5126  DB  196,193,33,114,242,8                ; vpslld        $0x8,%xmm10,%xmm11
5127  DB  196,67,125,25,210,1                 ; vextractf128  $0x1,%ymm10,%xmm10
5128  DB  196,193,41,114,242,8                ; vpslld        $0x8,%xmm10,%xmm10
5129  DB  196,67,37,24,210,1                  ; vinsertf128   $0x1,%xmm10,%ymm11,%ymm10
5130  DB  196,65,45,86,201                    ; vorpd         %ymm9,%ymm10,%ymm9
5131  DB  197,60,89,210                       ; vmulps        %ymm2,%ymm8,%ymm10
5132  DB  196,65,125,91,210                   ; vcvtps2dq     %ymm10,%ymm10
5133  DB  196,193,33,114,242,4                ; vpslld        $0x4,%xmm10,%xmm11
5134  DB  196,67,125,25,210,1                 ; vextractf128  $0x1,%ymm10,%xmm10
5135  DB  196,193,41,114,242,4                ; vpslld        $0x4,%xmm10,%xmm10
5136  DB  196,67,37,24,210,1                  ; vinsertf128   $0x1,%xmm10,%ymm11,%ymm10
5137  DB  197,60,89,195                       ; vmulps        %ymm3,%ymm8,%ymm8
5138  DB  196,65,125,91,192                   ; vcvtps2dq     %ymm8,%ymm8
5139  DB  196,65,45,86,192                    ; vorpd         %ymm8,%ymm10,%ymm8
5140  DB  196,65,53,86,192                    ; vorpd         %ymm8,%ymm9,%ymm8
5141  DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
5142  DB  196,66,57,43,193                    ; vpackusdw     %xmm9,%xmm8,%xmm8
5143  DB  72,133,201                          ; test          %rcx,%rcx
5144  DB  117,10                              ; jne           24c4 <_sk_store_4444_avx+0xaf>
5145  DB  196,65,122,127,4,121                ; vmovdqu       %xmm8,(%r9,%rdi,2)
5146  DB  72,173                              ; lods          %ds:(%rsi),%rax
5147  DB  255,224                             ; jmpq          *%rax
5148  DB  65,137,200                          ; mov           %ecx,%r8d
5149  DB  65,128,224,7                        ; and           $0x7,%r8b
5150  DB  65,254,200                          ; dec           %r8b
5151  DB  65,128,248,6                        ; cmp           $0x6,%r8b
5152  DB  119,236                             ; ja            24c0 <_sk_store_4444_avx+0xab>
5153  DB  65,15,182,192                       ; movzbl        %r8b,%eax
5154  DB  76,141,5,69,0,0,0                   ; lea           0x45(%rip),%r8        # 2524 <_sk_store_4444_avx+0x10f>
5155  DB  73,99,4,128                         ; movslq        (%r8,%rax,4),%rax
5156  DB  76,1,192                            ; add           %r8,%rax
5157  DB  255,224                             ; jmpq          *%rax
5158  DB  196,67,121,21,68,121,12,6           ; vpextrw       $0x6,%xmm8,0xc(%r9,%rdi,2)
5159  DB  196,67,121,21,68,121,10,5           ; vpextrw       $0x5,%xmm8,0xa(%r9,%rdi,2)
5160  DB  196,67,121,21,68,121,8,4            ; vpextrw       $0x4,%xmm8,0x8(%r9,%rdi,2)
5161  DB  196,67,121,21,68,121,6,3            ; vpextrw       $0x3,%xmm8,0x6(%r9,%rdi,2)
5162  DB  196,67,121,21,68,121,4,2            ; vpextrw       $0x2,%xmm8,0x4(%r9,%rdi,2)
5163  DB  196,67,121,21,68,121,2,1            ; vpextrw       $0x1,%xmm8,0x2(%r9,%rdi,2)
5164  DB  196,67,121,21,4,121,0               ; vpextrw       $0x0,%xmm8,(%r9,%rdi,2)
5165  DB  235,159                             ; jmp           24c0 <_sk_store_4444_avx+0xab>
5166  DB  15,31,0                             ; nopl          (%rax)
5167  DB  244                                 ; hlt
5168  DB  255                                 ; (bad)
5169  DB  255                                 ; (bad)
5170  DB  255                                 ; (bad)
5171  DB  236                                 ; in            (%dx),%al
5172  DB  255                                 ; (bad)
5173  DB  255                                 ; (bad)
5174  DB  255,228                             ; jmpq          *%rsp
5175  DB  255                                 ; (bad)
5176  DB  255                                 ; (bad)
5177  DB  255                                 ; (bad)
5178  DB  220,255                             ; fdivr         %st,%st(7)
5179  DB  255                                 ; (bad)
5180  DB  255,212                             ; callq         *%rsp
5181  DB  255                                 ; (bad)
5182  DB  255                                 ; (bad)
5183  DB  255,204                             ; dec           %esp
5184  DB  255                                 ; (bad)
5185  DB  255                                 ; (bad)
5186  DB  255,196                             ; inc           %esp
5187  DB  255                                 ; (bad)
5188  DB  255                                 ; (bad)
5189  DB  255                                 ; .byte         0xff
5190
5191PUBLIC _sk_load_8888_avx
5192_sk_load_8888_avx LABEL PROC
5193  DB  72,173                              ; lods          %ds:(%rsi),%rax
5194  DB  76,139,16                           ; mov           (%rax),%r10
5195  DB  72,133,201                          ; test          %rcx,%rcx
5196  DB  15,133,157,0,0,0                    ; jne           25eb <_sk_load_8888_avx+0xab>
5197  DB  196,65,124,16,12,186                ; vmovups       (%r10,%rdi,4),%ymm9
5198  DB  184,255,0,0,0                       ; mov           $0xff,%eax
5199  DB  197,249,110,192                     ; vmovd         %eax,%xmm0
5200  DB  197,249,112,192,0                   ; vpshufd       $0x0,%xmm0,%xmm0
5201  DB  196,99,125,24,216,1                 ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm11
5202  DB  196,193,36,84,193                   ; vandps        %ymm9,%ymm11,%ymm0
5203  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
5204  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
5205  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
5206  DB  196,227,121,4,201,0                 ; vpermilps     $0x0,%xmm1,%xmm1
5207  DB  196,99,117,24,193,1                 ; vinsertf128   $0x1,%xmm1,%ymm1,%ymm8
5208  DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
5209  DB  196,193,41,114,209,8                ; vpsrld        $0x8,%xmm9,%xmm10
5210  DB  196,99,125,25,203,1                 ; vextractf128  $0x1,%ymm9,%xmm3
5211  DB  197,241,114,211,8                   ; vpsrld        $0x8,%xmm3,%xmm1
5212  DB  196,227,45,24,201,1                 ; vinsertf128   $0x1,%xmm1,%ymm10,%ymm1
5213  DB  197,164,84,201                      ; vandps        %ymm1,%ymm11,%ymm1
5214  DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
5215  DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
5216  DB  196,193,41,114,209,16               ; vpsrld        $0x10,%xmm9,%xmm10
5217  DB  197,233,114,211,16                  ; vpsrld        $0x10,%xmm3,%xmm2
5218  DB  196,227,45,24,210,1                 ; vinsertf128   $0x1,%xmm2,%ymm10,%ymm2
5219  DB  197,164,84,210                      ; vandps        %ymm2,%ymm11,%ymm2
5220  DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
5221  DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
5222  DB  196,193,49,114,209,24               ; vpsrld        $0x18,%xmm9,%xmm9
5223  DB  197,225,114,211,24                  ; vpsrld        $0x18,%xmm3,%xmm3
5224  DB  196,227,53,24,219,1                 ; vinsertf128   $0x1,%xmm3,%ymm9,%ymm3
5225  DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
5226  DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
5227  DB  72,173                              ; lods          %ds:(%rsi),%rax
5228  DB  255,224                             ; jmpq          *%rax
5229  DB  65,137,200                          ; mov           %ecx,%r8d
5230  DB  65,128,224,7                        ; and           $0x7,%r8b
5231  DB  196,65,52,87,201                    ; vxorps        %ymm9,%ymm9,%ymm9
5232  DB  65,254,200                          ; dec           %r8b
5233  DB  65,128,248,6                        ; cmp           $0x6,%r8b
5234  DB  15,135,80,255,255,255               ; ja            2554 <_sk_load_8888_avx+0x14>
5235  DB  69,15,182,192                       ; movzbl        %r8b,%r8d
5236  DB  76,141,13,137,0,0,0                 ; lea           0x89(%rip),%r9        # 2698 <_sk_load_8888_avx+0x158>
5237  DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
5238  DB  76,1,200                            ; add           %r9,%rax
5239  DB  255,224                             ; jmpq          *%rax
5240  DB  196,193,121,110,68,186,24           ; vmovd         0x18(%r10,%rdi,4),%xmm0
5241  DB  197,249,112,192,68                  ; vpshufd       $0x44,%xmm0,%xmm0
5242  DB  196,227,125,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
5243  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
5244  DB  196,99,117,12,200,64                ; vblendps      $0x40,%ymm0,%ymm1,%ymm9
5245  DB  196,99,125,25,200,1                 ; vextractf128  $0x1,%ymm9,%xmm0
5246  DB  196,195,121,34,68,186,20,1          ; vpinsrd       $0x1,0x14(%r10,%rdi,4),%xmm0,%xmm0
5247  DB  196,99,53,24,200,1                  ; vinsertf128   $0x1,%xmm0,%ymm9,%ymm9
5248  DB  196,99,125,25,200,1                 ; vextractf128  $0x1,%ymm9,%xmm0
5249  DB  196,195,121,34,68,186,16,0          ; vpinsrd       $0x0,0x10(%r10,%rdi,4),%xmm0,%xmm0
5250  DB  196,99,53,24,200,1                  ; vinsertf128   $0x1,%xmm0,%ymm9,%ymm9
5251  DB  196,195,49,34,68,186,12,3           ; vpinsrd       $0x3,0xc(%r10,%rdi,4),%xmm9,%xmm0
5252  DB  196,99,53,12,200,15                 ; vblendps      $0xf,%ymm0,%ymm9,%ymm9
5253  DB  196,195,49,34,68,186,8,2            ; vpinsrd       $0x2,0x8(%r10,%rdi,4),%xmm9,%xmm0
5254  DB  196,99,53,12,200,15                 ; vblendps      $0xf,%ymm0,%ymm9,%ymm9
5255  DB  196,195,49,34,68,186,4,1            ; vpinsrd       $0x1,0x4(%r10,%rdi,4),%xmm9,%xmm0
5256  DB  196,99,53,12,200,15                 ; vblendps      $0xf,%ymm0,%ymm9,%ymm9
5257  DB  196,195,49,34,4,186,0               ; vpinsrd       $0x0,(%r10,%rdi,4),%xmm9,%xmm0
5258  DB  196,99,53,12,200,15                 ; vblendps      $0xf,%ymm0,%ymm9,%ymm9
5259  DB  233,188,254,255,255                 ; jmpq          2554 <_sk_load_8888_avx+0x14>
5260  DB  238                                 ; out           %al,(%dx)
5261  DB  255                                 ; (bad)
5262  DB  255                                 ; (bad)
5263  DB  255,224                             ; jmpq          *%rax
5264  DB  255                                 ; (bad)
5265  DB  255                                 ; (bad)
5266  DB  255,210                             ; callq         *%rdx
5267  DB  255                                 ; (bad)
5268  DB  255                                 ; (bad)
5269  DB  255,196                             ; inc           %esp
5270  DB  255                                 ; (bad)
5271  DB  255                                 ; (bad)
5272  DB  255,176,255,255,255,156             ; pushq         -0x63000001(%rax)
5273  DB  255                                 ; (bad)
5274  DB  255                                 ; (bad)
5275  DB  255                                 ; .byte         0xff
5276  DB  128,255,255                         ; cmp           $0xff,%bh
5277  DB  255                                 ; .byte         0xff
5278
5279PUBLIC _sk_gather_8888_avx
5280_sk_gather_8888_avx LABEL PROC
5281  DB  65,87                               ; push          %r15
5282  DB  65,86                               ; push          %r14
5283  DB  65,84                               ; push          %r12
5284  DB  83                                  ; push          %rbx
5285  DB  72,173                              ; lods          %ds:(%rsi),%rax
5286  DB  76,139,0                            ; mov           (%rax),%r8
5287  DB  197,254,91,201                      ; vcvttps2dq    %ymm1,%ymm1
5288  DB  197,249,110,80,16                   ; vmovd         0x10(%rax),%xmm2
5289  DB  197,249,112,210,0                   ; vpshufd       $0x0,%xmm2,%xmm2
5290  DB  196,226,105,64,217                  ; vpmulld       %xmm1,%xmm2,%xmm3
5291  DB  196,227,125,25,201,1                ; vextractf128  $0x1,%ymm1,%xmm1
5292  DB  196,226,105,64,201                  ; vpmulld       %xmm1,%xmm2,%xmm1
5293  DB  197,254,91,192                      ; vcvttps2dq    %ymm0,%ymm0
5294  DB  196,227,125,25,194,1                ; vextractf128  $0x1,%ymm0,%xmm2
5295  DB  197,241,254,202                     ; vpaddd        %xmm2,%xmm1,%xmm1
5296  DB  196,225,249,126,200                 ; vmovq         %xmm1,%rax
5297  DB  65,137,193                          ; mov           %eax,%r9d
5298  DB  72,193,232,32                       ; shr           $0x20,%rax
5299  DB  196,195,249,22,202,1                ; vpextrq       $0x1,%xmm1,%r10
5300  DB  69,137,211                          ; mov           %r10d,%r11d
5301  DB  73,193,234,32                       ; shr           $0x20,%r10
5302  DB  197,225,254,192                     ; vpaddd        %xmm0,%xmm3,%xmm0
5303  DB  196,225,249,126,195                 ; vmovq         %xmm0,%rbx
5304  DB  65,137,222                          ; mov           %ebx,%r14d
5305  DB  196,195,249,22,199,1                ; vpextrq       $0x1,%xmm0,%r15
5306  DB  69,137,252                          ; mov           %r15d,%r12d
5307  DB  72,193,235,32                       ; shr           $0x20,%rbx
5308  DB  73,193,239,32                       ; shr           $0x20,%r15
5309  DB  196,129,121,110,4,176               ; vmovd         (%r8,%r14,4),%xmm0
5310  DB  196,195,121,34,4,152,1              ; vpinsrd       $0x1,(%r8,%rbx,4),%xmm0,%xmm0
5311  DB  196,131,121,34,4,160,2              ; vpinsrd       $0x2,(%r8,%r12,4),%xmm0,%xmm0
5312  DB  196,3,121,34,4,184,3                ; vpinsrd       $0x3,(%r8,%r15,4),%xmm0,%xmm8
5313  DB  196,129,121,110,4,136               ; vmovd         (%r8,%r9,4),%xmm0
5314  DB  196,195,121,34,4,128,1              ; vpinsrd       $0x1,(%r8,%rax,4),%xmm0,%xmm0
5315  DB  196,131,121,34,4,152,2              ; vpinsrd       $0x2,(%r8,%r11,4),%xmm0,%xmm0
5316  DB  196,131,121,34,28,144,3             ; vpinsrd       $0x3,(%r8,%r10,4),%xmm0,%xmm3
5317  DB  196,227,61,24,195,1                 ; vinsertf128   $0x1,%xmm3,%ymm8,%ymm0
5318  DB  184,255,0,0,0                       ; mov           $0xff,%eax
5319  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
5320  DB  197,249,112,201,0                   ; vpshufd       $0x0,%xmm1,%xmm1
5321  DB  196,99,117,24,217,1                 ; vinsertf128   $0x1,%xmm1,%ymm1,%ymm11
5322  DB  197,164,84,192                      ; vandps        %ymm0,%ymm11,%ymm0
5323  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
5324  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
5325  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
5326  DB  196,227,121,4,201,0                 ; vpermilps     $0x0,%xmm1,%xmm1
5327  DB  196,99,117,24,201,1                 ; vinsertf128   $0x1,%xmm1,%ymm1,%ymm9
5328  DB  196,193,124,89,193                  ; vmulps        %ymm9,%ymm0,%ymm0
5329  DB  196,193,41,114,208,8                ; vpsrld        $0x8,%xmm8,%xmm10
5330  DB  197,241,114,211,8                   ; vpsrld        $0x8,%xmm3,%xmm1
5331  DB  196,227,45,24,201,1                 ; vinsertf128   $0x1,%xmm1,%ymm10,%ymm1
5332  DB  197,164,84,201                      ; vandps        %ymm1,%ymm11,%ymm1
5333  DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
5334  DB  196,193,116,89,201                  ; vmulps        %ymm9,%ymm1,%ymm1
5335  DB  196,193,41,114,208,16               ; vpsrld        $0x10,%xmm8,%xmm10
5336  DB  197,233,114,211,16                  ; vpsrld        $0x10,%xmm3,%xmm2
5337  DB  196,227,45,24,210,1                 ; vinsertf128   $0x1,%xmm2,%ymm10,%ymm2
5338  DB  197,164,84,210                      ; vandps        %ymm2,%ymm11,%ymm2
5339  DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
5340  DB  196,193,108,89,209                  ; vmulps        %ymm9,%ymm2,%ymm2
5341  DB  196,193,57,114,208,24               ; vpsrld        $0x18,%xmm8,%xmm8
5342  DB  197,225,114,211,24                  ; vpsrld        $0x18,%xmm3,%xmm3
5343  DB  196,227,61,24,219,1                 ; vinsertf128   $0x1,%xmm3,%ymm8,%ymm3
5344  DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
5345  DB  196,193,100,89,217                  ; vmulps        %ymm9,%ymm3,%ymm3
5346  DB  72,173                              ; lods          %ds:(%rsi),%rax
5347  DB  91                                  ; pop           %rbx
5348  DB  65,92                               ; pop           %r12
5349  DB  65,94                               ; pop           %r14
5350  DB  65,95                               ; pop           %r15
5351  DB  255,224                             ; jmpq          *%rax
5352
5353PUBLIC _sk_store_8888_avx
5354_sk_store_8888_avx LABEL PROC
5355  DB  72,173                              ; lods          %ds:(%rsi),%rax
5356  DB  76,139,8                            ; mov           (%rax),%r9
5357  DB  184,0,0,127,67                      ; mov           $0x437f0000,%eax
5358  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
5359  DB  196,67,121,4,192,0                  ; vpermilps     $0x0,%xmm8,%xmm8
5360  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
5361  DB  197,60,89,200                       ; vmulps        %ymm0,%ymm8,%ymm9
5362  DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
5363  DB  197,60,89,209                       ; vmulps        %ymm1,%ymm8,%ymm10
5364  DB  196,65,125,91,210                   ; vcvtps2dq     %ymm10,%ymm10
5365  DB  196,193,33,114,242,8                ; vpslld        $0x8,%xmm10,%xmm11
5366  DB  196,67,125,25,210,1                 ; vextractf128  $0x1,%ymm10,%xmm10
5367  DB  196,193,41,114,242,8                ; vpslld        $0x8,%xmm10,%xmm10
5368  DB  196,67,37,24,210,1                  ; vinsertf128   $0x1,%xmm10,%ymm11,%ymm10
5369  DB  196,65,45,86,201                    ; vorpd         %ymm9,%ymm10,%ymm9
5370  DB  197,60,89,210                       ; vmulps        %ymm2,%ymm8,%ymm10
5371  DB  196,65,125,91,210                   ; vcvtps2dq     %ymm10,%ymm10
5372  DB  196,193,33,114,242,16               ; vpslld        $0x10,%xmm10,%xmm11
5373  DB  196,67,125,25,210,1                 ; vextractf128  $0x1,%ymm10,%xmm10
5374  DB  196,193,41,114,242,16               ; vpslld        $0x10,%xmm10,%xmm10
5375  DB  196,67,37,24,210,1                  ; vinsertf128   $0x1,%xmm10,%ymm11,%ymm10
5376  DB  197,60,89,195                       ; vmulps        %ymm3,%ymm8,%ymm8
5377  DB  196,65,125,91,192                   ; vcvtps2dq     %ymm8,%ymm8
5378  DB  196,193,33,114,240,24               ; vpslld        $0x18,%xmm8,%xmm11
5379  DB  196,67,125,25,192,1                 ; vextractf128  $0x1,%ymm8,%xmm8
5380  DB  196,193,57,114,240,24               ; vpslld        $0x18,%xmm8,%xmm8
5381  DB  196,67,37,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm11,%ymm8
5382  DB  196,65,45,86,192                    ; vorpd         %ymm8,%ymm10,%ymm8
5383  DB  196,65,53,86,192                    ; vorpd         %ymm8,%ymm9,%ymm8
5384  DB  72,133,201                          ; test          %rcx,%rcx
5385  DB  117,10                              ; jne           2899 <_sk_store_8888_avx+0xa4>
5386  DB  196,65,124,17,4,185                 ; vmovups       %ymm8,(%r9,%rdi,4)
5387  DB  72,173                              ; lods          %ds:(%rsi),%rax
5388  DB  255,224                             ; jmpq          *%rax
5389  DB  65,137,200                          ; mov           %ecx,%r8d
5390  DB  65,128,224,7                        ; and           $0x7,%r8b
5391  DB  65,254,200                          ; dec           %r8b
5392  DB  65,128,248,6                        ; cmp           $0x6,%r8b
5393  DB  119,236                             ; ja            2895 <_sk_store_8888_avx+0xa0>
5394  DB  65,15,182,192                       ; movzbl        %r8b,%eax
5395  DB  76,141,5,84,0,0,0                   ; lea           0x54(%rip),%r8        # 2908 <_sk_store_8888_avx+0x113>
5396  DB  73,99,4,128                         ; movslq        (%r8,%rax,4),%rax
5397  DB  76,1,192                            ; add           %r8,%rax
5398  DB  255,224                             ; jmpq          *%rax
5399  DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
5400  DB  196,67,121,22,76,185,24,2           ; vpextrd       $0x2,%xmm9,0x18(%r9,%rdi,4)
5401  DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
5402  DB  196,67,121,22,76,185,20,1           ; vpextrd       $0x1,%xmm9,0x14(%r9,%rdi,4)
5403  DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
5404  DB  196,65,122,17,76,185,16             ; vmovss        %xmm9,0x10(%r9,%rdi,4)
5405  DB  196,67,121,22,68,185,12,3           ; vpextrd       $0x3,%xmm8,0xc(%r9,%rdi,4)
5406  DB  196,67,121,22,68,185,8,2            ; vpextrd       $0x2,%xmm8,0x8(%r9,%rdi,4)
5407  DB  196,67,121,22,68,185,4,1            ; vpextrd       $0x1,%xmm8,0x4(%r9,%rdi,4)
5408  DB  196,65,121,126,4,185                ; vmovd         %xmm8,(%r9,%rdi,4)
5409  DB  235,143                             ; jmp           2895 <_sk_store_8888_avx+0xa0>
5410  DB  102,144                             ; xchg          %ax,%ax
5411  DB  246,255                             ; idiv          %bh
5412  DB  255                                 ; (bad)
5413  DB  255                                 ; (bad)
5414  DB  238                                 ; out           %al,(%dx)
5415  DB  255                                 ; (bad)
5416  DB  255                                 ; (bad)
5417  DB  255,230                             ; jmpq          *%rsi
5418  DB  255                                 ; (bad)
5419  DB  255                                 ; (bad)
5420  DB  255                                 ; (bad)
5421  DB  222,255                             ; fdivrp        %st,%st(7)
5422  DB  255                                 ; (bad)
5423  DB  255,209                             ; callq         *%rcx
5424  DB  255                                 ; (bad)
5425  DB  255                                 ; (bad)
5426  DB  255,195                             ; inc           %ebx
5427  DB  255                                 ; (bad)
5428  DB  255                                 ; (bad)
5429  DB  255                                 ; .byte         0xff
5430  DB  181,255                             ; mov           $0xff,%ch
5431  DB  255                                 ; (bad)
5432  DB  255                                 ; .byte         0xff
5433
5434PUBLIC _sk_load_f16_avx
5435_sk_load_f16_avx LABEL PROC
5436  DB  72,173                              ; lods          %ds:(%rsi),%rax
5437  DB  72,139,0                            ; mov           (%rax),%rax
5438  DB  72,133,201                          ; test          %rcx,%rcx
5439  DB  15,133,17,1,0,0                     ; jne           2a43 <_sk_load_f16_avx+0x11f>
5440  DB  197,121,16,4,248                    ; vmovupd       (%rax,%rdi,8),%xmm8
5441  DB  197,249,16,84,248,16                ; vmovupd       0x10(%rax,%rdi,8),%xmm2
5442  DB  197,249,16,92,248,32                ; vmovupd       0x20(%rax,%rdi,8),%xmm3
5443  DB  197,122,111,76,248,48               ; vmovdqu       0x30(%rax,%rdi,8),%xmm9
5444  DB  197,185,97,194                      ; vpunpcklwd    %xmm2,%xmm8,%xmm0
5445  DB  197,185,105,210                     ; vpunpckhwd    %xmm2,%xmm8,%xmm2
5446  DB  196,193,97,97,201                   ; vpunpcklwd    %xmm9,%xmm3,%xmm1
5447  DB  196,193,97,105,217                  ; vpunpckhwd    %xmm9,%xmm3,%xmm3
5448  DB  197,121,97,218                      ; vpunpcklwd    %xmm2,%xmm0,%xmm11
5449  DB  197,121,105,194                     ; vpunpckhwd    %xmm2,%xmm0,%xmm8
5450  DB  197,241,97,211                      ; vpunpcklwd    %xmm3,%xmm1,%xmm2
5451  DB  197,113,105,203                     ; vpunpckhwd    %xmm3,%xmm1,%xmm9
5452  DB  197,161,108,194                     ; vpunpcklqdq   %xmm2,%xmm11,%xmm0
5453  DB  184,0,4,0,4                         ; mov           $0x4000400,%eax
5454  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
5455  DB  197,121,112,233,0                   ; vpshufd       $0x0,%xmm1,%xmm13
5456  DB  197,145,101,200                     ; vpcmpgtw      %xmm0,%xmm13,%xmm1
5457  DB  197,241,223,192                     ; vpandn        %xmm0,%xmm1,%xmm0
5458  DB  196,226,121,51,200                  ; vpmovzxwd     %xmm0,%xmm1
5459  DB  196,65,41,239,210                   ; vpxor         %xmm10,%xmm10,%xmm10
5460  DB  196,193,121,105,194                 ; vpunpckhwd    %xmm10,%xmm0,%xmm0
5461  DB  197,241,114,241,13                  ; vpslld        $0xd,%xmm1,%xmm1
5462  DB  197,249,114,240,13                  ; vpslld        $0xd,%xmm0,%xmm0
5463  DB  196,227,117,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
5464  DB  184,0,0,128,119                     ; mov           $0x77800000,%eax
5465  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
5466  DB  197,249,112,201,0                   ; vpshufd       $0x0,%xmm1,%xmm1
5467  DB  196,99,117,24,225,1                 ; vinsertf128   $0x1,%xmm1,%ymm1,%ymm12
5468  DB  197,156,89,192                      ; vmulps        %ymm0,%ymm12,%ymm0
5469  DB  197,161,109,202                     ; vpunpckhqdq   %xmm2,%xmm11,%xmm1
5470  DB  197,145,101,209                     ; vpcmpgtw      %xmm1,%xmm13,%xmm2
5471  DB  197,233,223,201                     ; vpandn        %xmm1,%xmm2,%xmm1
5472  DB  196,226,121,51,209                  ; vpmovzxwd     %xmm1,%xmm2
5473  DB  196,193,113,105,202                 ; vpunpckhwd    %xmm10,%xmm1,%xmm1
5474  DB  197,233,114,242,13                  ; vpslld        $0xd,%xmm2,%xmm2
5475  DB  197,241,114,241,13                  ; vpslld        $0xd,%xmm1,%xmm1
5476  DB  196,227,109,24,201,1                ; vinsertf128   $0x1,%xmm1,%ymm2,%ymm1
5477  DB  197,156,89,201                      ; vmulps        %ymm1,%ymm12,%ymm1
5478  DB  196,193,57,108,209                  ; vpunpcklqdq   %xmm9,%xmm8,%xmm2
5479  DB  197,145,101,218                     ; vpcmpgtw      %xmm2,%xmm13,%xmm3
5480  DB  197,225,223,210                     ; vpandn        %xmm2,%xmm3,%xmm2
5481  DB  196,226,121,51,218                  ; vpmovzxwd     %xmm2,%xmm3
5482  DB  196,193,105,105,210                 ; vpunpckhwd    %xmm10,%xmm2,%xmm2
5483  DB  197,225,114,243,13                  ; vpslld        $0xd,%xmm3,%xmm3
5484  DB  197,233,114,242,13                  ; vpslld        $0xd,%xmm2,%xmm2
5485  DB  196,227,101,24,210,1                ; vinsertf128   $0x1,%xmm2,%ymm3,%ymm2
5486  DB  197,156,89,210                      ; vmulps        %ymm2,%ymm12,%ymm2
5487  DB  196,65,57,109,193                   ; vpunpckhqdq   %xmm9,%xmm8,%xmm8
5488  DB  196,193,17,101,216                  ; vpcmpgtw      %xmm8,%xmm13,%xmm3
5489  DB  196,193,97,223,216                  ; vpandn        %xmm8,%xmm3,%xmm3
5490  DB  196,98,121,51,195                   ; vpmovzxwd     %xmm3,%xmm8
5491  DB  196,193,97,105,218                  ; vpunpckhwd    %xmm10,%xmm3,%xmm3
5492  DB  196,193,57,114,240,13               ; vpslld        $0xd,%xmm8,%xmm8
5493  DB  197,225,114,243,13                  ; vpslld        $0xd,%xmm3,%xmm3
5494  DB  196,227,61,24,219,1                 ; vinsertf128   $0x1,%xmm3,%ymm8,%ymm3
5495  DB  197,156,89,219                      ; vmulps        %ymm3,%ymm12,%ymm3
5496  DB  72,173                              ; lods          %ds:(%rsi),%rax
5497  DB  255,224                             ; jmpq          *%rax
5498  DB  197,123,16,4,248                    ; vmovsd        (%rax,%rdi,8),%xmm8
5499  DB  196,65,49,239,201                   ; vpxor         %xmm9,%xmm9,%xmm9
5500  DB  72,131,249,1                        ; cmp           $0x1,%rcx
5501  DB  116,79                              ; je            2aa2 <_sk_load_f16_avx+0x17e>
5502  DB  197,57,22,68,248,8                  ; vmovhpd       0x8(%rax,%rdi,8),%xmm8,%xmm8
5503  DB  72,131,249,3                        ; cmp           $0x3,%rcx
5504  DB  114,67                              ; jb            2aa2 <_sk_load_f16_avx+0x17e>
5505  DB  197,251,16,84,248,16                ; vmovsd        0x10(%rax,%rdi,8),%xmm2
5506  DB  72,131,249,3                        ; cmp           $0x3,%rcx
5507  DB  116,68                              ; je            2aaf <_sk_load_f16_avx+0x18b>
5508  DB  197,233,22,84,248,24                ; vmovhpd       0x18(%rax,%rdi,8),%xmm2,%xmm2
5509  DB  72,131,249,5                        ; cmp           $0x5,%rcx
5510  DB  114,56                              ; jb            2aaf <_sk_load_f16_avx+0x18b>
5511  DB  197,251,16,92,248,32                ; vmovsd        0x20(%rax,%rdi,8),%xmm3
5512  DB  72,131,249,5                        ; cmp           $0x5,%rcx
5513  DB  15,132,194,254,255,255              ; je            2949 <_sk_load_f16_avx+0x25>
5514  DB  197,225,22,92,248,40                ; vmovhpd       0x28(%rax,%rdi,8),%xmm3,%xmm3
5515  DB  72,131,249,7                        ; cmp           $0x7,%rcx
5516  DB  15,130,178,254,255,255              ; jb            2949 <_sk_load_f16_avx+0x25>
5517  DB  197,122,126,76,248,48               ; vmovq         0x30(%rax,%rdi,8),%xmm9
5518  DB  233,167,254,255,255                 ; jmpq          2949 <_sk_load_f16_avx+0x25>
5519  DB  197,225,87,219                      ; vxorpd        %xmm3,%xmm3,%xmm3
5520  DB  197,233,87,210                      ; vxorpd        %xmm2,%xmm2,%xmm2
5521  DB  233,154,254,255,255                 ; jmpq          2949 <_sk_load_f16_avx+0x25>
5522  DB  197,225,87,219                      ; vxorpd        %xmm3,%xmm3,%xmm3
5523  DB  233,145,254,255,255                 ; jmpq          2949 <_sk_load_f16_avx+0x25>
5524
5525PUBLIC _sk_store_f16_avx
5526_sk_store_f16_avx LABEL PROC
5527  DB  72,173                              ; lods          %ds:(%rsi),%rax
5528  DB  76,139,0                            ; mov           (%rax),%r8
5529  DB  184,0,0,128,7                       ; mov           $0x7800000,%eax
5530  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
5531  DB  196,65,121,112,192,0                ; vpshufd       $0x0,%xmm8,%xmm8
5532  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
5533  DB  197,60,89,200                       ; vmulps        %ymm0,%ymm8,%ymm9
5534  DB  196,67,125,25,202,1                 ; vextractf128  $0x1,%ymm9,%xmm10
5535  DB  196,193,41,114,210,13               ; vpsrld        $0xd,%xmm10,%xmm10
5536  DB  196,193,49,114,209,13               ; vpsrld        $0xd,%xmm9,%xmm9
5537  DB  196,66,49,43,202                    ; vpackusdw     %xmm10,%xmm9,%xmm9
5538  DB  197,60,89,209                       ; vmulps        %ymm1,%ymm8,%ymm10
5539  DB  196,67,125,25,211,1                 ; vextractf128  $0x1,%ymm10,%xmm11
5540  DB  196,193,33,114,211,13               ; vpsrld        $0xd,%xmm11,%xmm11
5541  DB  196,193,41,114,210,13               ; vpsrld        $0xd,%xmm10,%xmm10
5542  DB  196,66,41,43,211                    ; vpackusdw     %xmm11,%xmm10,%xmm10
5543  DB  197,60,89,218                       ; vmulps        %ymm2,%ymm8,%ymm11
5544  DB  196,67,125,25,220,1                 ; vextractf128  $0x1,%ymm11,%xmm12
5545  DB  196,193,25,114,212,13               ; vpsrld        $0xd,%xmm12,%xmm12
5546  DB  196,193,33,114,211,13               ; vpsrld        $0xd,%xmm11,%xmm11
5547  DB  196,66,33,43,220                    ; vpackusdw     %xmm12,%xmm11,%xmm11
5548  DB  197,60,89,195                       ; vmulps        %ymm3,%ymm8,%ymm8
5549  DB  196,67,125,25,196,1                 ; vextractf128  $0x1,%ymm8,%xmm12
5550  DB  196,193,25,114,212,13               ; vpsrld        $0xd,%xmm12,%xmm12
5551  DB  196,193,57,114,208,13               ; vpsrld        $0xd,%xmm8,%xmm8
5552  DB  196,66,57,43,196                    ; vpackusdw     %xmm12,%xmm8,%xmm8
5553  DB  196,65,49,97,226                    ; vpunpcklwd    %xmm10,%xmm9,%xmm12
5554  DB  196,65,49,105,234                   ; vpunpckhwd    %xmm10,%xmm9,%xmm13
5555  DB  196,65,33,97,200                    ; vpunpcklwd    %xmm8,%xmm11,%xmm9
5556  DB  196,65,33,105,192                   ; vpunpckhwd    %xmm8,%xmm11,%xmm8
5557  DB  196,65,25,98,217                    ; vpunpckldq    %xmm9,%xmm12,%xmm11
5558  DB  196,65,25,106,209                   ; vpunpckhdq    %xmm9,%xmm12,%xmm10
5559  DB  196,65,17,98,200                    ; vpunpckldq    %xmm8,%xmm13,%xmm9
5560  DB  196,65,17,106,192                   ; vpunpckhdq    %xmm8,%xmm13,%xmm8
5561  DB  72,133,201                          ; test          %rcx,%rcx
5562  DB  117,31                              ; jne           2b8a <_sk_store_f16_avx+0xd2>
5563  DB  196,65,120,17,28,248                ; vmovups       %xmm11,(%r8,%rdi,8)
5564  DB  196,65,120,17,84,248,16             ; vmovups       %xmm10,0x10(%r8,%rdi,8)
5565  DB  196,65,120,17,76,248,32             ; vmovups       %xmm9,0x20(%r8,%rdi,8)
5566  DB  196,65,122,127,68,248,48            ; vmovdqu       %xmm8,0x30(%r8,%rdi,8)
5567  DB  72,173                              ; lods          %ds:(%rsi),%rax
5568  DB  255,224                             ; jmpq          *%rax
5569  DB  196,65,121,214,28,248               ; vmovq         %xmm11,(%r8,%rdi,8)
5570  DB  72,131,249,1                        ; cmp           $0x1,%rcx
5571  DB  116,240                             ; je            2b86 <_sk_store_f16_avx+0xce>
5572  DB  196,65,121,23,92,248,8              ; vmovhpd       %xmm11,0x8(%r8,%rdi,8)
5573  DB  72,131,249,3                        ; cmp           $0x3,%rcx
5574  DB  114,227                             ; jb            2b86 <_sk_store_f16_avx+0xce>
5575  DB  196,65,121,214,84,248,16            ; vmovq         %xmm10,0x10(%r8,%rdi,8)
5576  DB  116,218                             ; je            2b86 <_sk_store_f16_avx+0xce>
5577  DB  196,65,121,23,84,248,24             ; vmovhpd       %xmm10,0x18(%r8,%rdi,8)
5578  DB  72,131,249,5                        ; cmp           $0x5,%rcx
5579  DB  114,205                             ; jb            2b86 <_sk_store_f16_avx+0xce>
5580  DB  196,65,121,214,76,248,32            ; vmovq         %xmm9,0x20(%r8,%rdi,8)
5581  DB  116,196                             ; je            2b86 <_sk_store_f16_avx+0xce>
5582  DB  196,65,121,23,76,248,40             ; vmovhpd       %xmm9,0x28(%r8,%rdi,8)
5583  DB  72,131,249,7                        ; cmp           $0x7,%rcx
5584  DB  114,183                             ; jb            2b86 <_sk_store_f16_avx+0xce>
5585  DB  196,65,121,214,68,248,48            ; vmovq         %xmm8,0x30(%r8,%rdi,8)
5586  DB  235,174                             ; jmp           2b86 <_sk_store_f16_avx+0xce>
5587
5588PUBLIC _sk_load_u16_be_avx
5589_sk_load_u16_be_avx LABEL PROC
5590  DB  72,173                              ; lods          %ds:(%rsi),%rax
5591  DB  72,139,0                            ; mov           (%rax),%rax
5592  DB  72,133,201                          ; test          %rcx,%rcx
5593  DB  15,133,1,1,0,0                      ; jne           2ce7 <_sk_load_u16_be_avx+0x10f>
5594  DB  197,121,16,4,248                    ; vmovupd       (%rax,%rdi,8),%xmm8
5595  DB  197,249,16,84,248,16                ; vmovupd       0x10(%rax,%rdi,8),%xmm2
5596  DB  197,249,16,92,248,32                ; vmovupd       0x20(%rax,%rdi,8),%xmm3
5597  DB  197,122,111,76,248,48               ; vmovdqu       0x30(%rax,%rdi,8),%xmm9
5598  DB  197,185,97,194                      ; vpunpcklwd    %xmm2,%xmm8,%xmm0
5599  DB  197,185,105,210                     ; vpunpckhwd    %xmm2,%xmm8,%xmm2
5600  DB  196,193,97,97,201                   ; vpunpcklwd    %xmm9,%xmm3,%xmm1
5601  DB  196,193,97,105,217                  ; vpunpckhwd    %xmm9,%xmm3,%xmm3
5602  DB  197,121,97,210                      ; vpunpcklwd    %xmm2,%xmm0,%xmm10
5603  DB  197,121,105,194                     ; vpunpckhwd    %xmm2,%xmm0,%xmm8
5604  DB  197,241,97,211                      ; vpunpcklwd    %xmm3,%xmm1,%xmm2
5605  DB  197,113,105,203                     ; vpunpckhwd    %xmm3,%xmm1,%xmm9
5606  DB  184,128,0,128,55                    ; mov           $0x37800080,%eax
5607  DB  197,249,110,192                     ; vmovd         %eax,%xmm0
5608  DB  196,227,121,4,192,0                 ; vpermilps     $0x0,%xmm0,%xmm0
5609  DB  196,99,125,24,224,1                 ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm12
5610  DB  197,169,108,194                     ; vpunpcklqdq   %xmm2,%xmm10,%xmm0
5611  DB  197,241,113,240,8                   ; vpsllw        $0x8,%xmm0,%xmm1
5612  DB  197,249,113,208,8                   ; vpsrlw        $0x8,%xmm0,%xmm0
5613  DB  197,241,235,192                     ; vpor          %xmm0,%xmm1,%xmm0
5614  DB  196,65,33,239,219                   ; vpxor         %xmm11,%xmm11,%xmm11
5615  DB  196,193,121,105,203                 ; vpunpckhwd    %xmm11,%xmm0,%xmm1
5616  DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
5617  DB  196,227,125,24,193,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
5618  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
5619  DB  197,156,89,192                      ; vmulps        %ymm0,%ymm12,%ymm0
5620  DB  197,169,109,202                     ; vpunpckhqdq   %xmm2,%xmm10,%xmm1
5621  DB  197,233,113,241,8                   ; vpsllw        $0x8,%xmm1,%xmm2
5622  DB  197,241,113,209,8                   ; vpsrlw        $0x8,%xmm1,%xmm1
5623  DB  197,233,235,201                     ; vpor          %xmm1,%xmm2,%xmm1
5624  DB  196,193,113,105,211                 ; vpunpckhwd    %xmm11,%xmm1,%xmm2
5625  DB  196,226,121,51,201                  ; vpmovzxwd     %xmm1,%xmm1
5626  DB  196,227,117,24,202,1                ; vinsertf128   $0x1,%xmm2,%ymm1,%ymm1
5627  DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
5628  DB  197,156,89,201                      ; vmulps        %ymm1,%ymm12,%ymm1
5629  DB  196,193,57,108,209                  ; vpunpcklqdq   %xmm9,%xmm8,%xmm2
5630  DB  197,169,113,242,8                   ; vpsllw        $0x8,%xmm2,%xmm10
5631  DB  197,233,113,210,8                   ; vpsrlw        $0x8,%xmm2,%xmm2
5632  DB  197,169,235,210                     ; vpor          %xmm2,%xmm10,%xmm2
5633  DB  196,65,105,105,211                  ; vpunpckhwd    %xmm11,%xmm2,%xmm10
5634  DB  196,226,121,51,210                  ; vpmovzxwd     %xmm2,%xmm2
5635  DB  196,195,109,24,210,1                ; vinsertf128   $0x1,%xmm10,%ymm2,%ymm2
5636  DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
5637  DB  197,156,89,210                      ; vmulps        %ymm2,%ymm12,%ymm2
5638  DB  196,193,57,109,217                  ; vpunpckhqdq   %xmm9,%xmm8,%xmm3
5639  DB  197,185,113,243,8                   ; vpsllw        $0x8,%xmm3,%xmm8
5640  DB  197,225,113,211,8                   ; vpsrlw        $0x8,%xmm3,%xmm3
5641  DB  197,185,235,219                     ; vpor          %xmm3,%xmm8,%xmm3
5642  DB  196,65,97,105,195                   ; vpunpckhwd    %xmm11,%xmm3,%xmm8
5643  DB  196,226,121,51,219                  ; vpmovzxwd     %xmm3,%xmm3
5644  DB  196,195,101,24,216,1                ; vinsertf128   $0x1,%xmm8,%ymm3,%ymm3
5645  DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
5646  DB  197,156,89,219                      ; vmulps        %ymm3,%ymm12,%ymm3
5647  DB  72,173                              ; lods          %ds:(%rsi),%rax
5648  DB  255,224                             ; jmpq          *%rax
5649  DB  197,123,16,4,248                    ; vmovsd        (%rax,%rdi,8),%xmm8
5650  DB  196,65,49,239,201                   ; vpxor         %xmm9,%xmm9,%xmm9
5651  DB  72,131,249,1                        ; cmp           $0x1,%rcx
5652  DB  116,79                              ; je            2d46 <_sk_load_u16_be_avx+0x16e>
5653  DB  197,57,22,68,248,8                  ; vmovhpd       0x8(%rax,%rdi,8),%xmm8,%xmm8
5654  DB  72,131,249,3                        ; cmp           $0x3,%rcx
5655  DB  114,67                              ; jb            2d46 <_sk_load_u16_be_avx+0x16e>
5656  DB  197,251,16,84,248,16                ; vmovsd        0x10(%rax,%rdi,8),%xmm2
5657  DB  72,131,249,3                        ; cmp           $0x3,%rcx
5658  DB  116,68                              ; je            2d53 <_sk_load_u16_be_avx+0x17b>
5659  DB  197,233,22,84,248,24                ; vmovhpd       0x18(%rax,%rdi,8),%xmm2,%xmm2
5660  DB  72,131,249,5                        ; cmp           $0x5,%rcx
5661  DB  114,56                              ; jb            2d53 <_sk_load_u16_be_avx+0x17b>
5662  DB  197,251,16,92,248,32                ; vmovsd        0x20(%rax,%rdi,8),%xmm3
5663  DB  72,131,249,5                        ; cmp           $0x5,%rcx
5664  DB  15,132,210,254,255,255              ; je            2bfd <_sk_load_u16_be_avx+0x25>
5665  DB  197,225,22,92,248,40                ; vmovhpd       0x28(%rax,%rdi,8),%xmm3,%xmm3
5666  DB  72,131,249,7                        ; cmp           $0x7,%rcx
5667  DB  15,130,194,254,255,255              ; jb            2bfd <_sk_load_u16_be_avx+0x25>
5668  DB  197,122,126,76,248,48               ; vmovq         0x30(%rax,%rdi,8),%xmm9
5669  DB  233,183,254,255,255                 ; jmpq          2bfd <_sk_load_u16_be_avx+0x25>
5670  DB  197,225,87,219                      ; vxorpd        %xmm3,%xmm3,%xmm3
5671  DB  197,233,87,210                      ; vxorpd        %xmm2,%xmm2,%xmm2
5672  DB  233,170,254,255,255                 ; jmpq          2bfd <_sk_load_u16_be_avx+0x25>
5673  DB  197,225,87,219                      ; vxorpd        %xmm3,%xmm3,%xmm3
5674  DB  233,161,254,255,255                 ; jmpq          2bfd <_sk_load_u16_be_avx+0x25>
5675
5676PUBLIC _sk_store_u16_be_avx
5677_sk_store_u16_be_avx LABEL PROC
5678  DB  72,173                              ; lods          %ds:(%rsi),%rax
5679  DB  76,139,0                            ; mov           (%rax),%r8
5680  DB  184,0,255,127,71                    ; mov           $0x477fff00,%eax
5681  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
5682  DB  196,67,121,4,192,0                  ; vpermilps     $0x0,%xmm8,%xmm8
5683  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
5684  DB  197,60,89,200                       ; vmulps        %ymm0,%ymm8,%ymm9
5685  DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
5686  DB  196,67,125,25,202,1                 ; vextractf128  $0x1,%ymm9,%xmm10
5687  DB  196,66,49,43,202                    ; vpackusdw     %xmm10,%xmm9,%xmm9
5688  DB  196,193,41,113,241,8                ; vpsllw        $0x8,%xmm9,%xmm10
5689  DB  196,193,49,113,209,8                ; vpsrlw        $0x8,%xmm9,%xmm9
5690  DB  196,65,41,235,201                   ; vpor          %xmm9,%xmm10,%xmm9
5691  DB  197,60,89,209                       ; vmulps        %ymm1,%ymm8,%ymm10
5692  DB  196,65,125,91,210                   ; vcvtps2dq     %ymm10,%ymm10
5693  DB  196,67,125,25,211,1                 ; vextractf128  $0x1,%ymm10,%xmm11
5694  DB  196,66,41,43,211                    ; vpackusdw     %xmm11,%xmm10,%xmm10
5695  DB  196,193,33,113,242,8                ; vpsllw        $0x8,%xmm10,%xmm11
5696  DB  196,193,41,113,210,8                ; vpsrlw        $0x8,%xmm10,%xmm10
5697  DB  196,65,33,235,210                   ; vpor          %xmm10,%xmm11,%xmm10
5698  DB  197,60,89,218                       ; vmulps        %ymm2,%ymm8,%ymm11
5699  DB  196,65,125,91,219                   ; vcvtps2dq     %ymm11,%ymm11
5700  DB  196,67,125,25,220,1                 ; vextractf128  $0x1,%ymm11,%xmm12
5701  DB  196,66,33,43,220                    ; vpackusdw     %xmm12,%xmm11,%xmm11
5702  DB  196,193,25,113,243,8                ; vpsllw        $0x8,%xmm11,%xmm12
5703  DB  196,193,33,113,211,8                ; vpsrlw        $0x8,%xmm11,%xmm11
5704  DB  196,65,25,235,219                   ; vpor          %xmm11,%xmm12,%xmm11
5705  DB  197,60,89,195                       ; vmulps        %ymm3,%ymm8,%ymm8
5706  DB  196,65,125,91,192                   ; vcvtps2dq     %ymm8,%ymm8
5707  DB  196,67,125,25,196,1                 ; vextractf128  $0x1,%ymm8,%xmm12
5708  DB  196,66,57,43,196                    ; vpackusdw     %xmm12,%xmm8,%xmm8
5709  DB  196,193,25,113,240,8                ; vpsllw        $0x8,%xmm8,%xmm12
5710  DB  196,193,57,113,208,8                ; vpsrlw        $0x8,%xmm8,%xmm8
5711  DB  196,65,25,235,192                   ; vpor          %xmm8,%xmm12,%xmm8
5712  DB  196,65,49,97,226                    ; vpunpcklwd    %xmm10,%xmm9,%xmm12
5713  DB  196,65,49,105,234                   ; vpunpckhwd    %xmm10,%xmm9,%xmm13
5714  DB  196,65,33,97,200                    ; vpunpcklwd    %xmm8,%xmm11,%xmm9
5715  DB  196,65,33,105,192                   ; vpunpckhwd    %xmm8,%xmm11,%xmm8
5716  DB  196,65,25,98,217                    ; vpunpckldq    %xmm9,%xmm12,%xmm11
5717  DB  196,65,25,106,209                   ; vpunpckhdq    %xmm9,%xmm12,%xmm10
5718  DB  196,65,17,98,200                    ; vpunpckldq    %xmm8,%xmm13,%xmm9
5719  DB  196,65,17,106,192                   ; vpunpckhdq    %xmm8,%xmm13,%xmm8
5720  DB  72,133,201                          ; test          %rcx,%rcx
5721  DB  117,31                              ; jne           2e56 <_sk_store_u16_be_avx+0xfa>
5722  DB  196,65,120,17,28,248                ; vmovups       %xmm11,(%r8,%rdi,8)
5723  DB  196,65,120,17,84,248,16             ; vmovups       %xmm10,0x10(%r8,%rdi,8)
5724  DB  196,65,120,17,76,248,32             ; vmovups       %xmm9,0x20(%r8,%rdi,8)
5725  DB  196,65,122,127,68,248,48            ; vmovdqu       %xmm8,0x30(%r8,%rdi,8)
5726  DB  72,173                              ; lods          %ds:(%rsi),%rax
5727  DB  255,224                             ; jmpq          *%rax
5728  DB  196,65,121,214,28,248               ; vmovq         %xmm11,(%r8,%rdi,8)
5729  DB  72,131,249,1                        ; cmp           $0x1,%rcx
5730  DB  116,240                             ; je            2e52 <_sk_store_u16_be_avx+0xf6>
5731  DB  196,65,121,23,92,248,8              ; vmovhpd       %xmm11,0x8(%r8,%rdi,8)
5732  DB  72,131,249,3                        ; cmp           $0x3,%rcx
5733  DB  114,227                             ; jb            2e52 <_sk_store_u16_be_avx+0xf6>
5734  DB  196,65,121,214,84,248,16            ; vmovq         %xmm10,0x10(%r8,%rdi,8)
5735  DB  116,218                             ; je            2e52 <_sk_store_u16_be_avx+0xf6>
5736  DB  196,65,121,23,84,248,24             ; vmovhpd       %xmm10,0x18(%r8,%rdi,8)
5737  DB  72,131,249,5                        ; cmp           $0x5,%rcx
5738  DB  114,205                             ; jb            2e52 <_sk_store_u16_be_avx+0xf6>
5739  DB  196,65,121,214,76,248,32            ; vmovq         %xmm9,0x20(%r8,%rdi,8)
5740  DB  116,196                             ; je            2e52 <_sk_store_u16_be_avx+0xf6>
5741  DB  196,65,121,23,76,248,40             ; vmovhpd       %xmm9,0x28(%r8,%rdi,8)
5742  DB  72,131,249,7                        ; cmp           $0x7,%rcx
5743  DB  114,183                             ; jb            2e52 <_sk_store_u16_be_avx+0xf6>
5744  DB  196,65,121,214,68,248,48            ; vmovq         %xmm8,0x30(%r8,%rdi,8)
5745  DB  235,174                             ; jmp           2e52 <_sk_store_u16_be_avx+0xf6>
5746
5747PUBLIC _sk_load_f32_avx
5748_sk_load_f32_avx LABEL PROC
5749  DB  72,173                              ; lods          %ds:(%rsi),%rax
5750  DB  72,131,249,7                        ; cmp           $0x7,%rcx
5751  DB  119,110                             ; ja            2f1a <_sk_load_f32_avx+0x76>
5752  DB  76,139,0                            ; mov           (%rax),%r8
5753  DB  76,141,12,189,0,0,0,0               ; lea           0x0(,%rdi,4),%r9
5754  DB  76,141,21,134,0,0,0                 ; lea           0x86(%rip),%r10        # 2f44 <_sk_load_f32_avx+0xa0>
5755  DB  73,99,4,138                         ; movslq        (%r10,%rcx,4),%rax
5756  DB  76,1,208                            ; add           %r10,%rax
5757  DB  255,224                             ; jmpq          *%rax
5758  DB  196,3,125,24,68,136,112,1           ; vinsertf128   $0x1,0x70(%r8,%r9,4),%ymm0,%ymm8
5759  DB  196,131,125,24,92,136,96,1          ; vinsertf128   $0x1,0x60(%r8,%r9,4),%ymm0,%ymm3
5760  DB  196,131,125,24,76,136,80,1          ; vinsertf128   $0x1,0x50(%r8,%r9,4),%ymm0,%ymm1
5761  DB  196,131,125,24,84,136,64,1          ; vinsertf128   $0x1,0x40(%r8,%r9,4),%ymm0,%ymm2
5762  DB  196,129,121,16,68,136,48            ; vmovupd       0x30(%r8,%r9,4),%xmm0
5763  DB  196,195,125,13,192,12               ; vblendpd      $0xc,%ymm8,%ymm0,%ymm0
5764  DB  196,1,121,16,68,136,32              ; vmovupd       0x20(%r8,%r9,4),%xmm8
5765  DB  196,99,61,13,203,12                 ; vblendpd      $0xc,%ymm3,%ymm8,%ymm9
5766  DB  196,129,121,16,92,136,16            ; vmovupd       0x10(%r8,%r9,4),%xmm3
5767  DB  196,99,101,13,209,12                ; vblendpd      $0xc,%ymm1,%ymm3,%ymm10
5768  DB  196,129,121,16,12,136               ; vmovupd       (%r8,%r9,4),%xmm1
5769  DB  196,227,117,13,202,12               ; vblendpd      $0xc,%ymm2,%ymm1,%ymm1
5770  DB  196,193,116,20,210                  ; vunpcklps     %ymm10,%ymm1,%ymm2
5771  DB  196,193,116,21,218                  ; vunpckhps     %ymm10,%ymm1,%ymm3
5772  DB  197,180,20,200                      ; vunpcklps     %ymm0,%ymm9,%ymm1
5773  DB  197,52,21,192                       ; vunpckhps     %ymm0,%ymm9,%ymm8
5774  DB  197,237,20,193                      ; vunpcklpd     %ymm1,%ymm2,%ymm0
5775  DB  197,237,21,201                      ; vunpckhpd     %ymm1,%ymm2,%ymm1
5776  DB  196,193,101,20,208                  ; vunpcklpd     %ymm8,%ymm3,%ymm2
5777  DB  196,193,101,21,216                  ; vunpckhpd     %ymm8,%ymm3,%ymm3
5778  DB  72,173                              ; lods          %ds:(%rsi),%rax
5779  DB  255,224                             ; jmpq          *%rax
5780  DB  102,144                             ; xchg          %ax,%ax
5781  DB  131,255,255                         ; cmp           $0xffffffff,%edi
5782  DB  255,202                             ; dec           %edx
5783  DB  255                                 ; (bad)
5784  DB  255                                 ; (bad)
5785  DB  255                                 ; (bad)
5786  DB  189,255,255,255,176                 ; mov           $0xb0ffffff,%ebp
5787  DB  255                                 ; (bad)
5788  DB  255                                 ; (bad)
5789  DB  255,163,255,255,255,155             ; jmpq          *-0x64000001(%rbx)
5790  DB  255                                 ; (bad)
5791  DB  255                                 ; (bad)
5792  DB  255,147,255,255,255,139             ; callq         *-0x74000001(%rbx)
5793  DB  255                                 ; (bad)
5794  DB  255                                 ; (bad)
5795  DB  255                                 ; .byte         0xff
5796
5797PUBLIC _sk_store_f32_avx
5798_sk_store_f32_avx LABEL PROC
5799  DB  72,173                              ; lods          %ds:(%rsi),%rax
5800  DB  76,139,0                            ; mov           (%rax),%r8
5801  DB  72,141,4,189,0,0,0,0                ; lea           0x0(,%rdi,4),%rax
5802  DB  197,124,20,193                      ; vunpcklps     %ymm1,%ymm0,%ymm8
5803  DB  197,124,21,217                      ; vunpckhps     %ymm1,%ymm0,%ymm11
5804  DB  197,108,20,203                      ; vunpcklps     %ymm3,%ymm2,%ymm9
5805  DB  197,108,21,227                      ; vunpckhps     %ymm3,%ymm2,%ymm12
5806  DB  196,65,61,20,209                    ; vunpcklpd     %ymm9,%ymm8,%ymm10
5807  DB  196,65,61,21,201                    ; vunpckhpd     %ymm9,%ymm8,%ymm9
5808  DB  196,65,37,20,196                    ; vunpcklpd     %ymm12,%ymm11,%ymm8
5809  DB  196,65,37,21,220                    ; vunpckhpd     %ymm12,%ymm11,%ymm11
5810  DB  72,133,201                          ; test          %rcx,%rcx
5811  DB  117,55                              ; jne           2fd1 <_sk_store_f32_avx+0x6d>
5812  DB  196,67,45,24,225,1                  ; vinsertf128   $0x1,%xmm9,%ymm10,%ymm12
5813  DB  196,67,61,24,235,1                  ; vinsertf128   $0x1,%xmm11,%ymm8,%ymm13
5814  DB  196,67,45,6,201,49                  ; vperm2f128    $0x31,%ymm9,%ymm10,%ymm9
5815  DB  196,67,61,6,195,49                  ; vperm2f128    $0x31,%ymm11,%ymm8,%ymm8
5816  DB  196,65,125,17,36,128                ; vmovupd       %ymm12,(%r8,%rax,4)
5817  DB  196,65,125,17,108,128,32            ; vmovupd       %ymm13,0x20(%r8,%rax,4)
5818  DB  196,65,125,17,76,128,64             ; vmovupd       %ymm9,0x40(%r8,%rax,4)
5819  DB  196,65,125,17,68,128,96             ; vmovupd       %ymm8,0x60(%r8,%rax,4)
5820  DB  72,173                              ; lods          %ds:(%rsi),%rax
5821  DB  255,224                             ; jmpq          *%rax
5822  DB  196,65,121,17,20,128                ; vmovupd       %xmm10,(%r8,%rax,4)
5823  DB  72,131,249,1                        ; cmp           $0x1,%rcx
5824  DB  116,240                             ; je            2fcd <_sk_store_f32_avx+0x69>
5825  DB  196,65,121,17,76,128,16             ; vmovupd       %xmm9,0x10(%r8,%rax,4)
5826  DB  72,131,249,3                        ; cmp           $0x3,%rcx
5827  DB  114,227                             ; jb            2fcd <_sk_store_f32_avx+0x69>
5828  DB  196,65,121,17,68,128,32             ; vmovupd       %xmm8,0x20(%r8,%rax,4)
5829  DB  116,218                             ; je            2fcd <_sk_store_f32_avx+0x69>
5830  DB  196,65,121,17,92,128,48             ; vmovupd       %xmm11,0x30(%r8,%rax,4)
5831  DB  72,131,249,5                        ; cmp           $0x5,%rcx
5832  DB  114,205                             ; jb            2fcd <_sk_store_f32_avx+0x69>
5833  DB  196,67,125,25,84,128,64,1           ; vextractf128  $0x1,%ymm10,0x40(%r8,%rax,4)
5834  DB  116,195                             ; je            2fcd <_sk_store_f32_avx+0x69>
5835  DB  196,67,125,25,76,128,80,1           ; vextractf128  $0x1,%ymm9,0x50(%r8,%rax,4)
5836  DB  72,131,249,7                        ; cmp           $0x7,%rcx
5837  DB  114,181                             ; jb            2fcd <_sk_store_f32_avx+0x69>
5838  DB  196,67,125,25,68,128,96,1           ; vextractf128  $0x1,%ymm8,0x60(%r8,%rax,4)
5839  DB  235,171                             ; jmp           2fcd <_sk_store_f32_avx+0x69>
5840
5841PUBLIC _sk_clamp_x_avx
5842_sk_clamp_x_avx LABEL PROC
5843  DB  72,173                              ; lods          %ds:(%rsi),%rax
5844  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
5845  DB  197,60,95,200                       ; vmaxps        %ymm0,%ymm8,%ymm9
5846  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
5847  DB  196,99,125,25,192,1                 ; vextractf128  $0x1,%ymm8,%xmm0
5848  DB  196,65,41,118,210                   ; vpcmpeqd      %xmm10,%xmm10,%xmm10
5849  DB  196,193,121,254,194                 ; vpaddd        %xmm10,%xmm0,%xmm0
5850  DB  196,65,57,254,194                   ; vpaddd        %xmm10,%xmm8,%xmm8
5851  DB  196,227,61,24,192,1                 ; vinsertf128   $0x1,%xmm0,%ymm8,%ymm0
5852  DB  197,180,93,192                      ; vminps        %ymm0,%ymm9,%ymm0
5853  DB  72,173                              ; lods          %ds:(%rsi),%rax
5854  DB  255,224                             ; jmpq          *%rax
5855
5856PUBLIC _sk_clamp_y_avx
5857_sk_clamp_y_avx LABEL PROC
5858  DB  72,173                              ; lods          %ds:(%rsi),%rax
5859  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
5860  DB  197,60,95,201                       ; vmaxps        %ymm1,%ymm8,%ymm9
5861  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
5862  DB  196,99,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm1
5863  DB  196,65,41,118,210                   ; vpcmpeqd      %xmm10,%xmm10,%xmm10
5864  DB  196,193,113,254,202                 ; vpaddd        %xmm10,%xmm1,%xmm1
5865  DB  196,65,57,254,194                   ; vpaddd        %xmm10,%xmm8,%xmm8
5866  DB  196,227,61,24,201,1                 ; vinsertf128   $0x1,%xmm1,%ymm8,%ymm1
5867  DB  197,180,93,201                      ; vminps        %ymm1,%ymm9,%ymm1
5868  DB  72,173                              ; lods          %ds:(%rsi),%rax
5869  DB  255,224                             ; jmpq          *%rax
5870
5871PUBLIC _sk_repeat_x_avx
5872_sk_repeat_x_avx LABEL PROC
5873  DB  72,173                              ; lods          %ds:(%rsi),%rax
5874  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
5875  DB  196,65,124,94,200                   ; vdivps        %ymm8,%ymm0,%ymm9
5876  DB  196,67,125,8,201,1                  ; vroundps      $0x1,%ymm9,%ymm9
5877  DB  196,65,52,89,200                    ; vmulps        %ymm8,%ymm9,%ymm9
5878  DB  196,65,124,92,201                   ; vsubps        %ymm9,%ymm0,%ymm9
5879  DB  196,99,125,25,192,1                 ; vextractf128  $0x1,%ymm8,%xmm0
5880  DB  196,65,41,118,210                   ; vpcmpeqd      %xmm10,%xmm10,%xmm10
5881  DB  196,193,121,254,194                 ; vpaddd        %xmm10,%xmm0,%xmm0
5882  DB  196,65,57,254,194                   ; vpaddd        %xmm10,%xmm8,%xmm8
5883  DB  196,227,61,24,192,1                 ; vinsertf128   $0x1,%xmm0,%ymm8,%ymm0
5884  DB  197,180,93,192                      ; vminps        %ymm0,%ymm9,%ymm0
5885  DB  72,173                              ; lods          %ds:(%rsi),%rax
5886  DB  255,224                             ; jmpq          *%rax
5887
5888PUBLIC _sk_repeat_y_avx
5889_sk_repeat_y_avx LABEL PROC
5890  DB  72,173                              ; lods          %ds:(%rsi),%rax
5891  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
5892  DB  196,65,116,94,200                   ; vdivps        %ymm8,%ymm1,%ymm9
5893  DB  196,67,125,8,201,1                  ; vroundps      $0x1,%ymm9,%ymm9
5894  DB  196,65,52,89,200                    ; vmulps        %ymm8,%ymm9,%ymm9
5895  DB  196,65,116,92,201                   ; vsubps        %ymm9,%ymm1,%ymm9
5896  DB  196,99,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm1
5897  DB  196,65,41,118,210                   ; vpcmpeqd      %xmm10,%xmm10,%xmm10
5898  DB  196,193,113,254,202                 ; vpaddd        %xmm10,%xmm1,%xmm1
5899  DB  196,65,57,254,194                   ; vpaddd        %xmm10,%xmm8,%xmm8
5900  DB  196,227,61,24,201,1                 ; vinsertf128   $0x1,%xmm1,%ymm8,%ymm1
5901  DB  197,180,93,201                      ; vminps        %ymm1,%ymm9,%ymm1
5902  DB  72,173                              ; lods          %ds:(%rsi),%rax
5903  DB  255,224                             ; jmpq          *%rax
5904
5905PUBLIC _sk_mirror_x_avx
5906_sk_mirror_x_avx LABEL PROC
5907  DB  72,173                              ; lods          %ds:(%rsi),%rax
5908  DB  197,121,110,0                       ; vmovd         (%rax),%xmm8
5909  DB  196,65,121,112,200,0                ; vpshufd       $0x0,%xmm8,%xmm9
5910  DB  196,67,53,24,201,1                  ; vinsertf128   $0x1,%xmm9,%ymm9,%ymm9
5911  DB  196,65,124,92,209                   ; vsubps        %ymm9,%ymm0,%ymm10
5912  DB  196,193,58,88,192                   ; vaddss        %xmm8,%xmm8,%xmm0
5913  DB  196,227,121,4,192,0                 ; vpermilps     $0x0,%xmm0,%xmm0
5914  DB  196,227,125,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
5915  DB  197,44,94,192                       ; vdivps        %ymm0,%ymm10,%ymm8
5916  DB  196,67,125,8,192,1                  ; vroundps      $0x1,%ymm8,%ymm8
5917  DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
5918  DB  197,172,92,192                      ; vsubps        %ymm0,%ymm10,%ymm0
5919  DB  196,193,124,92,193                  ; vsubps        %ymm9,%ymm0,%ymm0
5920  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
5921  DB  197,60,92,192                       ; vsubps        %ymm0,%ymm8,%ymm8
5922  DB  197,60,84,192                       ; vandps        %ymm0,%ymm8,%ymm8
5923  DB  196,99,125,25,200,1                 ; vextractf128  $0x1,%ymm9,%xmm0
5924  DB  196,65,41,118,210                   ; vpcmpeqd      %xmm10,%xmm10,%xmm10
5925  DB  196,193,121,254,194                 ; vpaddd        %xmm10,%xmm0,%xmm0
5926  DB  196,65,49,254,202                   ; vpaddd        %xmm10,%xmm9,%xmm9
5927  DB  196,227,53,24,192,1                 ; vinsertf128   $0x1,%xmm0,%ymm9,%ymm0
5928  DB  197,188,93,192                      ; vminps        %ymm0,%ymm8,%ymm0
5929  DB  72,173                              ; lods          %ds:(%rsi),%rax
5930  DB  255,224                             ; jmpq          *%rax
5931
5932PUBLIC _sk_mirror_y_avx
5933_sk_mirror_y_avx LABEL PROC
5934  DB  72,173                              ; lods          %ds:(%rsi),%rax
5935  DB  197,121,110,0                       ; vmovd         (%rax),%xmm8
5936  DB  196,65,121,112,200,0                ; vpshufd       $0x0,%xmm8,%xmm9
5937  DB  196,67,53,24,201,1                  ; vinsertf128   $0x1,%xmm9,%ymm9,%ymm9
5938  DB  196,65,116,92,209                   ; vsubps        %ymm9,%ymm1,%ymm10
5939  DB  196,193,58,88,200                   ; vaddss        %xmm8,%xmm8,%xmm1
5940  DB  196,227,121,4,201,0                 ; vpermilps     $0x0,%xmm1,%xmm1
5941  DB  196,227,117,24,201,1                ; vinsertf128   $0x1,%xmm1,%ymm1,%ymm1
5942  DB  197,44,94,193                       ; vdivps        %ymm1,%ymm10,%ymm8
5943  DB  196,67,125,8,192,1                  ; vroundps      $0x1,%ymm8,%ymm8
5944  DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
5945  DB  197,172,92,201                      ; vsubps        %ymm1,%ymm10,%ymm1
5946  DB  196,193,116,92,201                  ; vsubps        %ymm9,%ymm1,%ymm1
5947  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
5948  DB  197,60,92,193                       ; vsubps        %ymm1,%ymm8,%ymm8
5949  DB  197,60,84,193                       ; vandps        %ymm1,%ymm8,%ymm8
5950  DB  196,99,125,25,201,1                 ; vextractf128  $0x1,%ymm9,%xmm1
5951  DB  196,65,41,118,210                   ; vpcmpeqd      %xmm10,%xmm10,%xmm10
5952  DB  196,193,113,254,202                 ; vpaddd        %xmm10,%xmm1,%xmm1
5953  DB  196,65,49,254,202                   ; vpaddd        %xmm10,%xmm9,%xmm9
5954  DB  196,227,53,24,201,1                 ; vinsertf128   $0x1,%xmm1,%ymm9,%ymm1
5955  DB  197,188,93,201                      ; vminps        %ymm1,%ymm8,%ymm1
5956  DB  72,173                              ; lods          %ds:(%rsi),%rax
5957  DB  255,224                             ; jmpq          *%rax
5958
5959PUBLIC _sk_luminance_to_alpha_avx
5960_sk_luminance_to_alpha_avx LABEL PROC
5961  DB  184,208,179,89,62                   ; mov           $0x3e59b3d0,%eax
5962  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
5963  DB  196,227,121,4,219,0                 ; vpermilps     $0x0,%xmm3,%xmm3
5964  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
5965  DB  197,228,89,192                      ; vmulps        %ymm0,%ymm3,%ymm0
5966  DB  184,89,23,55,63                     ; mov           $0x3f371759,%eax
5967  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
5968  DB  196,227,121,4,219,0                 ; vpermilps     $0x0,%xmm3,%xmm3
5969  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
5970  DB  197,228,89,201                      ; vmulps        %ymm1,%ymm3,%ymm1
5971  DB  197,252,88,193                      ; vaddps        %ymm1,%ymm0,%ymm0
5972  DB  184,152,221,147,61                  ; mov           $0x3d93dd98,%eax
5973  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
5974  DB  196,227,121,4,201,0                 ; vpermilps     $0x0,%xmm1,%xmm1
5975  DB  196,227,117,24,201,1                ; vinsertf128   $0x1,%xmm1,%ymm1,%ymm1
5976  DB  197,244,89,202                      ; vmulps        %ymm2,%ymm1,%ymm1
5977  DB  197,252,88,217                      ; vaddps        %ymm1,%ymm0,%ymm3
5978  DB  72,173                              ; lods          %ds:(%rsi),%rax
5979  DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
5980  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
5981  DB  197,236,87,210                      ; vxorps        %ymm2,%ymm2,%ymm2
5982  DB  255,224                             ; jmpq          *%rax
5983
5984PUBLIC _sk_matrix_2x3_avx
5985_sk_matrix_2x3_avx LABEL PROC
5986  DB  72,173                              ; lods          %ds:(%rsi),%rax
5987  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
5988  DB  196,98,125,24,72,8                  ; vbroadcastss  0x8(%rax),%ymm9
5989  DB  196,98,125,24,80,16                 ; vbroadcastss  0x10(%rax),%ymm10
5990  DB  197,52,89,201                       ; vmulps        %ymm1,%ymm9,%ymm9
5991  DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
5992  DB  197,60,89,192                       ; vmulps        %ymm0,%ymm8,%ymm8
5993  DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
5994  DB  196,98,125,24,72,4                  ; vbroadcastss  0x4(%rax),%ymm9
5995  DB  196,98,125,24,80,12                 ; vbroadcastss  0xc(%rax),%ymm10
5996  DB  196,98,125,24,88,20                 ; vbroadcastss  0x14(%rax),%ymm11
5997  DB  197,172,89,201                      ; vmulps        %ymm1,%ymm10,%ymm1
5998  DB  196,193,116,88,203                  ; vaddps        %ymm11,%ymm1,%ymm1
5999  DB  197,180,89,192                      ; vmulps        %ymm0,%ymm9,%ymm0
6000  DB  197,252,88,201                      ; vaddps        %ymm1,%ymm0,%ymm1
6001  DB  72,173                              ; lods          %ds:(%rsi),%rax
6002  DB  197,124,41,192                      ; vmovaps       %ymm8,%ymm0
6003  DB  255,224                             ; jmpq          *%rax
6004
6005PUBLIC _sk_matrix_3x4_avx
6006_sk_matrix_3x4_avx LABEL PROC
6007  DB  72,173                              ; lods          %ds:(%rsi),%rax
6008  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
6009  DB  196,98,125,24,72,12                 ; vbroadcastss  0xc(%rax),%ymm9
6010  DB  196,98,125,24,80,24                 ; vbroadcastss  0x18(%rax),%ymm10
6011  DB  196,98,125,24,88,36                 ; vbroadcastss  0x24(%rax),%ymm11
6012  DB  197,44,89,210                       ; vmulps        %ymm2,%ymm10,%ymm10
6013  DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
6014  DB  197,52,89,201                       ; vmulps        %ymm1,%ymm9,%ymm9
6015  DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
6016  DB  197,60,89,192                       ; vmulps        %ymm0,%ymm8,%ymm8
6017  DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
6018  DB  196,98,125,24,72,4                  ; vbroadcastss  0x4(%rax),%ymm9
6019  DB  196,98,125,24,80,16                 ; vbroadcastss  0x10(%rax),%ymm10
6020  DB  196,98,125,24,88,28                 ; vbroadcastss  0x1c(%rax),%ymm11
6021  DB  196,98,125,24,96,40                 ; vbroadcastss  0x28(%rax),%ymm12
6022  DB  197,36,89,218                       ; vmulps        %ymm2,%ymm11,%ymm11
6023  DB  196,65,36,88,220                    ; vaddps        %ymm12,%ymm11,%ymm11
6024  DB  197,44,89,209                       ; vmulps        %ymm1,%ymm10,%ymm10
6025  DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
6026  DB  197,52,89,200                       ; vmulps        %ymm0,%ymm9,%ymm9
6027  DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
6028  DB  196,98,125,24,80,8                  ; vbroadcastss  0x8(%rax),%ymm10
6029  DB  196,98,125,24,88,20                 ; vbroadcastss  0x14(%rax),%ymm11
6030  DB  196,98,125,24,96,32                 ; vbroadcastss  0x20(%rax),%ymm12
6031  DB  196,98,125,24,104,44                ; vbroadcastss  0x2c(%rax),%ymm13
6032  DB  197,156,89,210                      ; vmulps        %ymm2,%ymm12,%ymm2
6033  DB  196,193,108,88,213                  ; vaddps        %ymm13,%ymm2,%ymm2
6034  DB  197,164,89,201                      ; vmulps        %ymm1,%ymm11,%ymm1
6035  DB  197,244,88,202                      ; vaddps        %ymm2,%ymm1,%ymm1
6036  DB  197,172,89,192                      ; vmulps        %ymm0,%ymm10,%ymm0
6037  DB  197,252,88,209                      ; vaddps        %ymm1,%ymm0,%ymm2
6038  DB  72,173                              ; lods          %ds:(%rsi),%rax
6039  DB  197,124,41,192                      ; vmovaps       %ymm8,%ymm0
6040  DB  197,124,41,201                      ; vmovaps       %ymm9,%ymm1
6041  DB  255,224                             ; jmpq          *%rax
6042
6043PUBLIC _sk_matrix_4x5_avx
6044_sk_matrix_4x5_avx LABEL PROC
6045  DB  72,173                              ; lods          %ds:(%rsi),%rax
6046  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
6047  DB  196,98,125,24,72,16                 ; vbroadcastss  0x10(%rax),%ymm9
6048  DB  196,98,125,24,80,32                 ; vbroadcastss  0x20(%rax),%ymm10
6049  DB  196,98,125,24,88,48                 ; vbroadcastss  0x30(%rax),%ymm11
6050  DB  196,98,125,24,96,64                 ; vbroadcastss  0x40(%rax),%ymm12
6051  DB  197,36,89,219                       ; vmulps        %ymm3,%ymm11,%ymm11
6052  DB  196,65,36,88,220                    ; vaddps        %ymm12,%ymm11,%ymm11
6053  DB  197,44,89,210                       ; vmulps        %ymm2,%ymm10,%ymm10
6054  DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
6055  DB  197,52,89,201                       ; vmulps        %ymm1,%ymm9,%ymm9
6056  DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
6057  DB  197,60,89,192                       ; vmulps        %ymm0,%ymm8,%ymm8
6058  DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
6059  DB  196,98,125,24,72,4                  ; vbroadcastss  0x4(%rax),%ymm9
6060  DB  196,98,125,24,80,20                 ; vbroadcastss  0x14(%rax),%ymm10
6061  DB  196,98,125,24,88,36                 ; vbroadcastss  0x24(%rax),%ymm11
6062  DB  196,98,125,24,96,52                 ; vbroadcastss  0x34(%rax),%ymm12
6063  DB  196,98,125,24,104,68                ; vbroadcastss  0x44(%rax),%ymm13
6064  DB  197,28,89,227                       ; vmulps        %ymm3,%ymm12,%ymm12
6065  DB  196,65,28,88,229                    ; vaddps        %ymm13,%ymm12,%ymm12
6066  DB  197,36,89,218                       ; vmulps        %ymm2,%ymm11,%ymm11
6067  DB  196,65,36,88,220                    ; vaddps        %ymm12,%ymm11,%ymm11
6068  DB  197,44,89,209                       ; vmulps        %ymm1,%ymm10,%ymm10
6069  DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
6070  DB  197,52,89,200                       ; vmulps        %ymm0,%ymm9,%ymm9
6071  DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
6072  DB  196,98,125,24,80,8                  ; vbroadcastss  0x8(%rax),%ymm10
6073  DB  196,98,125,24,88,24                 ; vbroadcastss  0x18(%rax),%ymm11
6074  DB  196,98,125,24,96,40                 ; vbroadcastss  0x28(%rax),%ymm12
6075  DB  196,98,125,24,104,56                ; vbroadcastss  0x38(%rax),%ymm13
6076  DB  196,98,125,24,112,72                ; vbroadcastss  0x48(%rax),%ymm14
6077  DB  197,20,89,235                       ; vmulps        %ymm3,%ymm13,%ymm13
6078  DB  196,65,20,88,238                    ; vaddps        %ymm14,%ymm13,%ymm13
6079  DB  197,28,89,226                       ; vmulps        %ymm2,%ymm12,%ymm12
6080  DB  196,65,28,88,229                    ; vaddps        %ymm13,%ymm12,%ymm12
6081  DB  197,36,89,217                       ; vmulps        %ymm1,%ymm11,%ymm11
6082  DB  196,65,36,88,220                    ; vaddps        %ymm12,%ymm11,%ymm11
6083  DB  197,44,89,208                       ; vmulps        %ymm0,%ymm10,%ymm10
6084  DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
6085  DB  196,98,125,24,88,12                 ; vbroadcastss  0xc(%rax),%ymm11
6086  DB  196,98,125,24,96,28                 ; vbroadcastss  0x1c(%rax),%ymm12
6087  DB  196,98,125,24,104,44                ; vbroadcastss  0x2c(%rax),%ymm13
6088  DB  196,98,125,24,112,60                ; vbroadcastss  0x3c(%rax),%ymm14
6089  DB  196,98,125,24,120,76                ; vbroadcastss  0x4c(%rax),%ymm15
6090  DB  197,140,89,219                      ; vmulps        %ymm3,%ymm14,%ymm3
6091  DB  196,193,100,88,223                  ; vaddps        %ymm15,%ymm3,%ymm3
6092  DB  197,148,89,210                      ; vmulps        %ymm2,%ymm13,%ymm2
6093  DB  197,236,88,211                      ; vaddps        %ymm3,%ymm2,%ymm2
6094  DB  197,156,89,201                      ; vmulps        %ymm1,%ymm12,%ymm1
6095  DB  197,244,88,202                      ; vaddps        %ymm2,%ymm1,%ymm1
6096  DB  197,164,89,192                      ; vmulps        %ymm0,%ymm11,%ymm0
6097  DB  197,252,88,217                      ; vaddps        %ymm1,%ymm0,%ymm3
6098  DB  72,173                              ; lods          %ds:(%rsi),%rax
6099  DB  197,124,41,192                      ; vmovaps       %ymm8,%ymm0
6100  DB  197,124,41,201                      ; vmovaps       %ymm9,%ymm1
6101  DB  197,124,41,210                      ; vmovaps       %ymm10,%ymm2
6102  DB  255,224                             ; jmpq          *%rax
6103
6104PUBLIC _sk_matrix_perspective_avx
6105_sk_matrix_perspective_avx LABEL PROC
6106  DB  72,173                              ; lods          %ds:(%rsi),%rax
6107  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
6108  DB  196,98,125,24,72,4                  ; vbroadcastss  0x4(%rax),%ymm9
6109  DB  196,98,125,24,80,8                  ; vbroadcastss  0x8(%rax),%ymm10
6110  DB  197,52,89,201                       ; vmulps        %ymm1,%ymm9,%ymm9
6111  DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
6112  DB  197,60,89,192                       ; vmulps        %ymm0,%ymm8,%ymm8
6113  DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
6114  DB  196,98,125,24,72,12                 ; vbroadcastss  0xc(%rax),%ymm9
6115  DB  196,98,125,24,80,16                 ; vbroadcastss  0x10(%rax),%ymm10
6116  DB  196,98,125,24,88,20                 ; vbroadcastss  0x14(%rax),%ymm11
6117  DB  197,44,89,209                       ; vmulps        %ymm1,%ymm10,%ymm10
6118  DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
6119  DB  197,52,89,200                       ; vmulps        %ymm0,%ymm9,%ymm9
6120  DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
6121  DB  196,98,125,24,80,24                 ; vbroadcastss  0x18(%rax),%ymm10
6122  DB  196,98,125,24,88,28                 ; vbroadcastss  0x1c(%rax),%ymm11
6123  DB  196,98,125,24,96,32                 ; vbroadcastss  0x20(%rax),%ymm12
6124  DB  197,164,89,201                      ; vmulps        %ymm1,%ymm11,%ymm1
6125  DB  196,193,116,88,204                  ; vaddps        %ymm12,%ymm1,%ymm1
6126  DB  197,172,89,192                      ; vmulps        %ymm0,%ymm10,%ymm0
6127  DB  197,252,88,193                      ; vaddps        %ymm1,%ymm0,%ymm0
6128  DB  197,252,83,200                      ; vrcpps        %ymm0,%ymm1
6129  DB  197,188,89,193                      ; vmulps        %ymm1,%ymm8,%ymm0
6130  DB  197,180,89,201                      ; vmulps        %ymm1,%ymm9,%ymm1
6131  DB  72,173                              ; lods          %ds:(%rsi),%rax
6132  DB  255,224                             ; jmpq          *%rax
6133
6134PUBLIC _sk_linear_gradient_2stops_avx
6135_sk_linear_gradient_2stops_avx LABEL PROC
6136  DB  72,173                              ; lods          %ds:(%rsi),%rax
6137  DB  196,226,125,24,72,16                ; vbroadcastss  0x10(%rax),%ymm1
6138  DB  196,226,125,24,16                   ; vbroadcastss  (%rax),%ymm2
6139  DB  197,244,89,200                      ; vmulps        %ymm0,%ymm1,%ymm1
6140  DB  197,116,88,194                      ; vaddps        %ymm2,%ymm1,%ymm8
6141  DB  196,226,125,24,72,20                ; vbroadcastss  0x14(%rax),%ymm1
6142  DB  196,226,125,24,80,4                 ; vbroadcastss  0x4(%rax),%ymm2
6143  DB  197,244,89,200                      ; vmulps        %ymm0,%ymm1,%ymm1
6144  DB  197,244,88,202                      ; vaddps        %ymm2,%ymm1,%ymm1
6145  DB  196,226,125,24,80,24                ; vbroadcastss  0x18(%rax),%ymm2
6146  DB  196,226,125,24,88,8                 ; vbroadcastss  0x8(%rax),%ymm3
6147  DB  197,236,89,208                      ; vmulps        %ymm0,%ymm2,%ymm2
6148  DB  197,236,88,211                      ; vaddps        %ymm3,%ymm2,%ymm2
6149  DB  196,226,125,24,88,28                ; vbroadcastss  0x1c(%rax),%ymm3
6150  DB  196,98,125,24,72,12                 ; vbroadcastss  0xc(%rax),%ymm9
6151  DB  197,228,89,192                      ; vmulps        %ymm0,%ymm3,%ymm0
6152  DB  196,193,124,88,217                  ; vaddps        %ymm9,%ymm0,%ymm3
6153  DB  72,173                              ; lods          %ds:(%rsi),%rax
6154  DB  197,124,41,192                      ; vmovaps       %ymm8,%ymm0
6155  DB  255,224                             ; jmpq          *%rax
6156
6157PUBLIC _sk_start_pipeline_sse41
6158_sk_start_pipeline_sse41 LABEL PROC
6159  DB  65,87                               ; push          %r15
6160  DB  65,86                               ; push          %r14
6161  DB  65,85                               ; push          %r13
6162  DB  65,84                               ; push          %r12
6163  DB  86                                  ; push          %rsi
6164  DB  87                                  ; push          %rdi
6165  DB  83                                  ; push          %rbx
6166  DB  72,129,236,160,0,0,0                ; sub           $0xa0,%rsp
6167  DB  68,15,41,188,36,144,0,0,0           ; movaps        %xmm15,0x90(%rsp)
6168  DB  68,15,41,180,36,128,0,0,0           ; movaps        %xmm14,0x80(%rsp)
6169  DB  68,15,41,108,36,112                 ; movaps        %xmm13,0x70(%rsp)
6170  DB  68,15,41,100,36,96                  ; movaps        %xmm12,0x60(%rsp)
6171  DB  68,15,41,92,36,80                   ; movaps        %xmm11,0x50(%rsp)
6172  DB  68,15,41,84,36,64                   ; movaps        %xmm10,0x40(%rsp)
6173  DB  68,15,41,76,36,48                   ; movaps        %xmm9,0x30(%rsp)
6174  DB  68,15,41,68,36,32                   ; movaps        %xmm8,0x20(%rsp)
6175  DB  15,41,124,36,16                     ; movaps        %xmm7,0x10(%rsp)
6176  DB  15,41,52,36                         ; movaps        %xmm6,(%rsp)
6177  DB  77,137,207                          ; mov           %r9,%r15
6178  DB  77,137,198                          ; mov           %r8,%r14
6179  DB  72,137,203                          ; mov           %rcx,%rbx
6180  DB  72,137,214                          ; mov           %rdx,%rsi
6181  DB  72,173                              ; lods          %ds:(%rsi),%rax
6182  DB  73,137,196                          ; mov           %rax,%r12
6183  DB  73,137,245                          ; mov           %rsi,%r13
6184  DB  72,141,67,4                         ; lea           0x4(%rbx),%rax
6185  DB  76,57,248                           ; cmp           %r15,%rax
6186  DB  118,5                               ; jbe           73 <_sk_start_pipeline_sse41+0x73>
6187  DB  72,137,216                          ; mov           %rbx,%rax
6188  DB  235,52                              ; jmp           a7 <_sk_start_pipeline_sse41+0xa7>
6189  DB  15,87,192                           ; xorps         %xmm0,%xmm0
6190  DB  15,87,201                           ; xorps         %xmm1,%xmm1
6191  DB  15,87,210                           ; xorps         %xmm2,%xmm2
6192  DB  15,87,219                           ; xorps         %xmm3,%xmm3
6193  DB  15,87,228                           ; xorps         %xmm4,%xmm4
6194  DB  15,87,237                           ; xorps         %xmm5,%xmm5
6195  DB  15,87,246                           ; xorps         %xmm6,%xmm6
6196  DB  15,87,255                           ; xorps         %xmm7,%xmm7
6197  DB  72,137,223                          ; mov           %rbx,%rdi
6198  DB  76,137,238                          ; mov           %r13,%rsi
6199  DB  76,137,242                          ; mov           %r14,%rdx
6200  DB  65,255,212                          ; callq         *%r12
6201  DB  72,141,67,4                         ; lea           0x4(%rbx),%rax
6202  DB  72,131,195,8                        ; add           $0x8,%rbx
6203  DB  76,57,251                           ; cmp           %r15,%rbx
6204  DB  72,137,195                          ; mov           %rax,%rbx
6205  DB  118,204                             ; jbe           73 <_sk_start_pipeline_sse41+0x73>
6206  DB  15,40,52,36                         ; movaps        (%rsp),%xmm6
6207  DB  15,40,124,36,16                     ; movaps        0x10(%rsp),%xmm7
6208  DB  68,15,40,68,36,32                   ; movaps        0x20(%rsp),%xmm8
6209  DB  68,15,40,76,36,48                   ; movaps        0x30(%rsp),%xmm9
6210  DB  68,15,40,84,36,64                   ; movaps        0x40(%rsp),%xmm10
6211  DB  68,15,40,92,36,80                   ; movaps        0x50(%rsp),%xmm11
6212  DB  68,15,40,100,36,96                  ; movaps        0x60(%rsp),%xmm12
6213  DB  68,15,40,108,36,112                 ; movaps        0x70(%rsp),%xmm13
6214  DB  68,15,40,180,36,128,0,0,0           ; movaps        0x80(%rsp),%xmm14
6215  DB  68,15,40,188,36,144,0,0,0           ; movaps        0x90(%rsp),%xmm15
6216  DB  72,129,196,160,0,0,0                ; add           $0xa0,%rsp
6217  DB  91                                  ; pop           %rbx
6218  DB  95                                  ; pop           %rdi
6219  DB  94                                  ; pop           %rsi
6220  DB  65,92                               ; pop           %r12
6221  DB  65,93                               ; pop           %r13
6222  DB  65,94                               ; pop           %r14
6223  DB  65,95                               ; pop           %r15
6224  DB  195                                 ; retq
6225
6226PUBLIC _sk_just_return_sse41
6227_sk_just_return_sse41 LABEL PROC
6228  DB  195                                 ; retq
6229
6230PUBLIC _sk_seed_shader_sse41
6231_sk_seed_shader_sse41 LABEL PROC
6232  DB  72,173                              ; lods          %ds:(%rsi),%rax
6233  DB  102,15,110,199                      ; movd          %edi,%xmm0
6234  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
6235  DB  15,91,200                           ; cvtdq2ps      %xmm0,%xmm1
6236  DB  185,0,0,0,63                        ; mov           $0x3f000000,%ecx
6237  DB  102,15,110,209                      ; movd          %ecx,%xmm2
6238  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
6239  DB  15,88,202                           ; addps         %xmm2,%xmm1
6240  DB  15,16,2                             ; movups        (%rdx),%xmm0
6241  DB  15,88,193                           ; addps         %xmm1,%xmm0
6242  DB  102,15,110,8                        ; movd          (%rax),%xmm1
6243  DB  102,15,112,201,0                    ; pshufd        $0x0,%xmm1,%xmm1
6244  DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
6245  DB  15,88,202                           ; addps         %xmm2,%xmm1
6246  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
6247  DB  102,15,110,208                      ; movd          %eax,%xmm2
6248  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
6249  DB  72,173                              ; lods          %ds:(%rsi),%rax
6250  DB  15,87,219                           ; xorps         %xmm3,%xmm3
6251  DB  15,87,228                           ; xorps         %xmm4,%xmm4
6252  DB  15,87,237                           ; xorps         %xmm5,%xmm5
6253  DB  15,87,246                           ; xorps         %xmm6,%xmm6
6254  DB  15,87,255                           ; xorps         %xmm7,%xmm7
6255  DB  255,224                             ; jmpq          *%rax
6256
6257PUBLIC _sk_constant_color_sse41
6258_sk_constant_color_sse41 LABEL PROC
6259  DB  72,173                              ; lods          %ds:(%rsi),%rax
6260  DB  243,15,16,0                         ; movss         (%rax),%xmm0
6261  DB  243,15,16,72,4                      ; movss         0x4(%rax),%xmm1
6262  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
6263  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
6264  DB  243,15,16,80,8                      ; movss         0x8(%rax),%xmm2
6265  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
6266  DB  243,15,16,88,12                     ; movss         0xc(%rax),%xmm3
6267  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
6268  DB  72,173                              ; lods          %ds:(%rsi),%rax
6269  DB  255,224                             ; jmpq          *%rax
6270
6271PUBLIC _sk_clear_sse41
6272_sk_clear_sse41 LABEL PROC
6273  DB  72,173                              ; lods          %ds:(%rsi),%rax
6274  DB  15,87,192                           ; xorps         %xmm0,%xmm0
6275  DB  15,87,201                           ; xorps         %xmm1,%xmm1
6276  DB  15,87,210                           ; xorps         %xmm2,%xmm2
6277  DB  15,87,219                           ; xorps         %xmm3,%xmm3
6278  DB  255,224                             ; jmpq          *%rax
6279
6280PUBLIC _sk_srcatop_sse41
6281_sk_srcatop_sse41 LABEL PROC
6282  DB  15,89,199                           ; mulps         %xmm7,%xmm0
6283  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
6284  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
6285  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
6286  DB  68,15,92,195                        ; subps         %xmm3,%xmm8
6287  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
6288  DB  68,15,89,204                        ; mulps         %xmm4,%xmm9
6289  DB  65,15,88,193                        ; addps         %xmm9,%xmm0
6290  DB  15,89,207                           ; mulps         %xmm7,%xmm1
6291  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
6292  DB  68,15,89,205                        ; mulps         %xmm5,%xmm9
6293  DB  65,15,88,201                        ; addps         %xmm9,%xmm1
6294  DB  15,89,215                           ; mulps         %xmm7,%xmm2
6295  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
6296  DB  68,15,89,206                        ; mulps         %xmm6,%xmm9
6297  DB  65,15,88,209                        ; addps         %xmm9,%xmm2
6298  DB  15,89,223                           ; mulps         %xmm7,%xmm3
6299  DB  68,15,89,199                        ; mulps         %xmm7,%xmm8
6300  DB  65,15,88,216                        ; addps         %xmm8,%xmm3
6301  DB  72,173                              ; lods          %ds:(%rsi),%rax
6302  DB  255,224                             ; jmpq          *%rax
6303
6304PUBLIC _sk_dstatop_sse41
6305_sk_dstatop_sse41 LABEL PROC
6306  DB  68,15,40,195                        ; movaps        %xmm3,%xmm8
6307  DB  68,15,89,196                        ; mulps         %xmm4,%xmm8
6308  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
6309  DB  102,68,15,110,200                   ; movd          %eax,%xmm9
6310  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
6311  DB  68,15,92,207                        ; subps         %xmm7,%xmm9
6312  DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
6313  DB  65,15,88,192                        ; addps         %xmm8,%xmm0
6314  DB  68,15,40,195                        ; movaps        %xmm3,%xmm8
6315  DB  68,15,89,197                        ; mulps         %xmm5,%xmm8
6316  DB  65,15,89,201                        ; mulps         %xmm9,%xmm1
6317  DB  65,15,88,200                        ; addps         %xmm8,%xmm1
6318  DB  68,15,40,195                        ; movaps        %xmm3,%xmm8
6319  DB  68,15,89,198                        ; mulps         %xmm6,%xmm8
6320  DB  65,15,89,209                        ; mulps         %xmm9,%xmm2
6321  DB  65,15,88,208                        ; addps         %xmm8,%xmm2
6322  DB  68,15,89,203                        ; mulps         %xmm3,%xmm9
6323  DB  15,89,223                           ; mulps         %xmm7,%xmm3
6324  DB  65,15,88,217                        ; addps         %xmm9,%xmm3
6325  DB  72,173                              ; lods          %ds:(%rsi),%rax
6326  DB  255,224                             ; jmpq          *%rax
6327
6328PUBLIC _sk_srcin_sse41
6329_sk_srcin_sse41 LABEL PROC
6330  DB  15,89,199                           ; mulps         %xmm7,%xmm0
6331  DB  15,89,207                           ; mulps         %xmm7,%xmm1
6332  DB  15,89,215                           ; mulps         %xmm7,%xmm2
6333  DB  15,89,223                           ; mulps         %xmm7,%xmm3
6334  DB  72,173                              ; lods          %ds:(%rsi),%rax
6335  DB  255,224                             ; jmpq          *%rax
6336
6337PUBLIC _sk_dstin_sse41
6338_sk_dstin_sse41 LABEL PROC
6339  DB  15,40,195                           ; movaps        %xmm3,%xmm0
6340  DB  15,89,196                           ; mulps         %xmm4,%xmm0
6341  DB  15,40,203                           ; movaps        %xmm3,%xmm1
6342  DB  15,89,205                           ; mulps         %xmm5,%xmm1
6343  DB  15,40,211                           ; movaps        %xmm3,%xmm2
6344  DB  15,89,214                           ; mulps         %xmm6,%xmm2
6345  DB  15,89,223                           ; mulps         %xmm7,%xmm3
6346  DB  72,173                              ; lods          %ds:(%rsi),%rax
6347  DB  255,224                             ; jmpq          *%rax
6348
6349PUBLIC _sk_srcout_sse41
6350_sk_srcout_sse41 LABEL PROC
6351  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
6352  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
6353  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
6354  DB  68,15,92,199                        ; subps         %xmm7,%xmm8
6355  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
6356  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
6357  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
6358  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
6359  DB  72,173                              ; lods          %ds:(%rsi),%rax
6360  DB  255,224                             ; jmpq          *%rax
6361
6362PUBLIC _sk_dstout_sse41
6363_sk_dstout_sse41 LABEL PROC
6364  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
6365  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
6366  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
6367  DB  68,15,92,195                        ; subps         %xmm3,%xmm8
6368  DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
6369  DB  15,89,196                           ; mulps         %xmm4,%xmm0
6370  DB  65,15,40,200                        ; movaps        %xmm8,%xmm1
6371  DB  15,89,205                           ; mulps         %xmm5,%xmm1
6372  DB  65,15,40,208                        ; movaps        %xmm8,%xmm2
6373  DB  15,89,214                           ; mulps         %xmm6,%xmm2
6374  DB  68,15,89,199                        ; mulps         %xmm7,%xmm8
6375  DB  72,173                              ; lods          %ds:(%rsi),%rax
6376  DB  65,15,40,216                        ; movaps        %xmm8,%xmm3
6377  DB  255,224                             ; jmpq          *%rax
6378
6379PUBLIC _sk_srcover_sse41
6380_sk_srcover_sse41 LABEL PROC
6381  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
6382  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
6383  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
6384  DB  68,15,92,195                        ; subps         %xmm3,%xmm8
6385  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
6386  DB  68,15,89,204                        ; mulps         %xmm4,%xmm9
6387  DB  65,15,88,193                        ; addps         %xmm9,%xmm0
6388  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
6389  DB  68,15,89,205                        ; mulps         %xmm5,%xmm9
6390  DB  65,15,88,201                        ; addps         %xmm9,%xmm1
6391  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
6392  DB  68,15,89,206                        ; mulps         %xmm6,%xmm9
6393  DB  65,15,88,209                        ; addps         %xmm9,%xmm2
6394  DB  68,15,89,199                        ; mulps         %xmm7,%xmm8
6395  DB  65,15,88,216                        ; addps         %xmm8,%xmm3
6396  DB  72,173                              ; lods          %ds:(%rsi),%rax
6397  DB  255,224                             ; jmpq          *%rax
6398
6399PUBLIC _sk_dstover_sse41
6400_sk_dstover_sse41 LABEL PROC
6401  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
6402  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
6403  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
6404  DB  68,15,92,199                        ; subps         %xmm7,%xmm8
6405  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
6406  DB  15,88,196                           ; addps         %xmm4,%xmm0
6407  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
6408  DB  15,88,205                           ; addps         %xmm5,%xmm1
6409  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
6410  DB  15,88,214                           ; addps         %xmm6,%xmm2
6411  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
6412  DB  15,88,223                           ; addps         %xmm7,%xmm3
6413  DB  72,173                              ; lods          %ds:(%rsi),%rax
6414  DB  255,224                             ; jmpq          *%rax
6415
6416PUBLIC _sk_modulate_sse41
6417_sk_modulate_sse41 LABEL PROC
6418  DB  15,89,196                           ; mulps         %xmm4,%xmm0
6419  DB  15,89,205                           ; mulps         %xmm5,%xmm1
6420  DB  15,89,214                           ; mulps         %xmm6,%xmm2
6421  DB  15,89,223                           ; mulps         %xmm7,%xmm3
6422  DB  72,173                              ; lods          %ds:(%rsi),%rax
6423  DB  255,224                             ; jmpq          *%rax
6424
6425PUBLIC _sk_multiply_sse41
6426_sk_multiply_sse41 LABEL PROC
6427  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
6428  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
6429  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
6430  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
6431  DB  68,15,92,207                        ; subps         %xmm7,%xmm9
6432  DB  69,15,40,209                        ; movaps        %xmm9,%xmm10
6433  DB  68,15,89,208                        ; mulps         %xmm0,%xmm10
6434  DB  68,15,92,195                        ; subps         %xmm3,%xmm8
6435  DB  69,15,40,216                        ; movaps        %xmm8,%xmm11
6436  DB  68,15,89,220                        ; mulps         %xmm4,%xmm11
6437  DB  69,15,88,218                        ; addps         %xmm10,%xmm11
6438  DB  15,89,196                           ; mulps         %xmm4,%xmm0
6439  DB  65,15,88,195                        ; addps         %xmm11,%xmm0
6440  DB  69,15,40,209                        ; movaps        %xmm9,%xmm10
6441  DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
6442  DB  69,15,40,216                        ; movaps        %xmm8,%xmm11
6443  DB  68,15,89,221                        ; mulps         %xmm5,%xmm11
6444  DB  69,15,88,218                        ; addps         %xmm10,%xmm11
6445  DB  15,89,205                           ; mulps         %xmm5,%xmm1
6446  DB  65,15,88,203                        ; addps         %xmm11,%xmm1
6447  DB  69,15,40,209                        ; movaps        %xmm9,%xmm10
6448  DB  68,15,89,210                        ; mulps         %xmm2,%xmm10
6449  DB  69,15,40,216                        ; movaps        %xmm8,%xmm11
6450  DB  68,15,89,222                        ; mulps         %xmm6,%xmm11
6451  DB  69,15,88,218                        ; addps         %xmm10,%xmm11
6452  DB  15,89,214                           ; mulps         %xmm6,%xmm2
6453  DB  65,15,88,211                        ; addps         %xmm11,%xmm2
6454  DB  68,15,89,203                        ; mulps         %xmm3,%xmm9
6455  DB  68,15,89,199                        ; mulps         %xmm7,%xmm8
6456  DB  69,15,88,193                        ; addps         %xmm9,%xmm8
6457  DB  15,89,223                           ; mulps         %xmm7,%xmm3
6458  DB  65,15,88,216                        ; addps         %xmm8,%xmm3
6459  DB  72,173                              ; lods          %ds:(%rsi),%rax
6460  DB  255,224                             ; jmpq          *%rax
6461
6462PUBLIC _sk_plus__sse41
6463_sk_plus__sse41 LABEL PROC
6464  DB  15,88,196                           ; addps         %xmm4,%xmm0
6465  DB  15,88,205                           ; addps         %xmm5,%xmm1
6466  DB  15,88,214                           ; addps         %xmm6,%xmm2
6467  DB  15,88,223                           ; addps         %xmm7,%xmm3
6468  DB  72,173                              ; lods          %ds:(%rsi),%rax
6469  DB  255,224                             ; jmpq          *%rax
6470
6471PUBLIC _sk_screen_sse41
6472_sk_screen_sse41 LABEL PROC
6473  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
6474  DB  68,15,88,196                        ; addps         %xmm4,%xmm8
6475  DB  15,89,196                           ; mulps         %xmm4,%xmm0
6476  DB  68,15,92,192                        ; subps         %xmm0,%xmm8
6477  DB  68,15,40,201                        ; movaps        %xmm1,%xmm9
6478  DB  68,15,88,205                        ; addps         %xmm5,%xmm9
6479  DB  15,89,205                           ; mulps         %xmm5,%xmm1
6480  DB  68,15,92,201                        ; subps         %xmm1,%xmm9
6481  DB  68,15,40,210                        ; movaps        %xmm2,%xmm10
6482  DB  68,15,88,214                        ; addps         %xmm6,%xmm10
6483  DB  15,89,214                           ; mulps         %xmm6,%xmm2
6484  DB  68,15,92,210                        ; subps         %xmm2,%xmm10
6485  DB  68,15,40,219                        ; movaps        %xmm3,%xmm11
6486  DB  68,15,88,223                        ; addps         %xmm7,%xmm11
6487  DB  15,89,223                           ; mulps         %xmm7,%xmm3
6488  DB  68,15,92,219                        ; subps         %xmm3,%xmm11
6489  DB  72,173                              ; lods          %ds:(%rsi),%rax
6490  DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
6491  DB  65,15,40,201                        ; movaps        %xmm9,%xmm1
6492  DB  65,15,40,210                        ; movaps        %xmm10,%xmm2
6493  DB  65,15,40,219                        ; movaps        %xmm11,%xmm3
6494  DB  255,224                             ; jmpq          *%rax
6495
6496PUBLIC _sk_xor__sse41
6497_sk_xor__sse41 LABEL PROC
6498  DB  68,15,40,195                        ; movaps        %xmm3,%xmm8
6499  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
6500  DB  102,15,110,216                      ; movd          %eax,%xmm3
6501  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
6502  DB  68,15,40,203                        ; movaps        %xmm3,%xmm9
6503  DB  68,15,92,207                        ; subps         %xmm7,%xmm9
6504  DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
6505  DB  65,15,92,216                        ; subps         %xmm8,%xmm3
6506  DB  68,15,40,211                        ; movaps        %xmm3,%xmm10
6507  DB  68,15,89,212                        ; mulps         %xmm4,%xmm10
6508  DB  65,15,88,194                        ; addps         %xmm10,%xmm0
6509  DB  65,15,89,201                        ; mulps         %xmm9,%xmm1
6510  DB  68,15,40,211                        ; movaps        %xmm3,%xmm10
6511  DB  68,15,89,213                        ; mulps         %xmm5,%xmm10
6512  DB  65,15,88,202                        ; addps         %xmm10,%xmm1
6513  DB  65,15,89,209                        ; mulps         %xmm9,%xmm2
6514  DB  68,15,40,211                        ; movaps        %xmm3,%xmm10
6515  DB  68,15,89,214                        ; mulps         %xmm6,%xmm10
6516  DB  65,15,88,210                        ; addps         %xmm10,%xmm2
6517  DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
6518  DB  15,89,223                           ; mulps         %xmm7,%xmm3
6519  DB  65,15,88,217                        ; addps         %xmm9,%xmm3
6520  DB  72,173                              ; lods          %ds:(%rsi),%rax
6521  DB  255,224                             ; jmpq          *%rax
6522
6523PUBLIC _sk_darken_sse41
6524_sk_darken_sse41 LABEL PROC
6525  DB  68,15,40,193                        ; movaps        %xmm1,%xmm8
6526  DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
6527  DB  15,88,196                           ; addps         %xmm4,%xmm0
6528  DB  68,15,89,207                        ; mulps         %xmm7,%xmm9
6529  DB  15,40,203                           ; movaps        %xmm3,%xmm1
6530  DB  15,89,204                           ; mulps         %xmm4,%xmm1
6531  DB  68,15,95,201                        ; maxps         %xmm1,%xmm9
6532  DB  65,15,92,193                        ; subps         %xmm9,%xmm0
6533  DB  65,15,40,200                        ; movaps        %xmm8,%xmm1
6534  DB  15,88,205                           ; addps         %xmm5,%xmm1
6535  DB  68,15,89,199                        ; mulps         %xmm7,%xmm8
6536  DB  68,15,40,203                        ; movaps        %xmm3,%xmm9
6537  DB  68,15,89,205                        ; mulps         %xmm5,%xmm9
6538  DB  69,15,95,193                        ; maxps         %xmm9,%xmm8
6539  DB  65,15,92,200                        ; subps         %xmm8,%xmm1
6540  DB  68,15,40,194                        ; movaps        %xmm2,%xmm8
6541  DB  68,15,88,198                        ; addps         %xmm6,%xmm8
6542  DB  15,89,215                           ; mulps         %xmm7,%xmm2
6543  DB  68,15,40,203                        ; movaps        %xmm3,%xmm9
6544  DB  68,15,89,206                        ; mulps         %xmm6,%xmm9
6545  DB  65,15,95,209                        ; maxps         %xmm9,%xmm2
6546  DB  68,15,92,194                        ; subps         %xmm2,%xmm8
6547  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
6548  DB  102,15,110,208                      ; movd          %eax,%xmm2
6549  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
6550  DB  15,92,211                           ; subps         %xmm3,%xmm2
6551  DB  15,89,215                           ; mulps         %xmm7,%xmm2
6552  DB  15,88,218                           ; addps         %xmm2,%xmm3
6553  DB  72,173                              ; lods          %ds:(%rsi),%rax
6554  DB  65,15,40,208                        ; movaps        %xmm8,%xmm2
6555  DB  255,224                             ; jmpq          *%rax
6556
6557PUBLIC _sk_lighten_sse41
6558_sk_lighten_sse41 LABEL PROC
6559  DB  68,15,40,193                        ; movaps        %xmm1,%xmm8
6560  DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
6561  DB  15,88,196                           ; addps         %xmm4,%xmm0
6562  DB  68,15,89,207                        ; mulps         %xmm7,%xmm9
6563  DB  15,40,203                           ; movaps        %xmm3,%xmm1
6564  DB  15,89,204                           ; mulps         %xmm4,%xmm1
6565  DB  68,15,93,201                        ; minps         %xmm1,%xmm9
6566  DB  65,15,92,193                        ; subps         %xmm9,%xmm0
6567  DB  65,15,40,200                        ; movaps        %xmm8,%xmm1
6568  DB  15,88,205                           ; addps         %xmm5,%xmm1
6569  DB  68,15,89,199                        ; mulps         %xmm7,%xmm8
6570  DB  68,15,40,203                        ; movaps        %xmm3,%xmm9
6571  DB  68,15,89,205                        ; mulps         %xmm5,%xmm9
6572  DB  69,15,93,193                        ; minps         %xmm9,%xmm8
6573  DB  65,15,92,200                        ; subps         %xmm8,%xmm1
6574  DB  68,15,40,194                        ; movaps        %xmm2,%xmm8
6575  DB  68,15,88,198                        ; addps         %xmm6,%xmm8
6576  DB  15,89,215                           ; mulps         %xmm7,%xmm2
6577  DB  68,15,40,203                        ; movaps        %xmm3,%xmm9
6578  DB  68,15,89,206                        ; mulps         %xmm6,%xmm9
6579  DB  65,15,93,209                        ; minps         %xmm9,%xmm2
6580  DB  68,15,92,194                        ; subps         %xmm2,%xmm8
6581  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
6582  DB  102,15,110,208                      ; movd          %eax,%xmm2
6583  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
6584  DB  15,92,211                           ; subps         %xmm3,%xmm2
6585  DB  15,89,215                           ; mulps         %xmm7,%xmm2
6586  DB  15,88,218                           ; addps         %xmm2,%xmm3
6587  DB  72,173                              ; lods          %ds:(%rsi),%rax
6588  DB  65,15,40,208                        ; movaps        %xmm8,%xmm2
6589  DB  255,224                             ; jmpq          *%rax
6590
6591PUBLIC _sk_difference_sse41
6592_sk_difference_sse41 LABEL PROC
6593  DB  68,15,40,193                        ; movaps        %xmm1,%xmm8
6594  DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
6595  DB  15,88,196                           ; addps         %xmm4,%xmm0
6596  DB  68,15,89,207                        ; mulps         %xmm7,%xmm9
6597  DB  15,40,203                           ; movaps        %xmm3,%xmm1
6598  DB  15,89,204                           ; mulps         %xmm4,%xmm1
6599  DB  68,15,93,201                        ; minps         %xmm1,%xmm9
6600  DB  69,15,88,201                        ; addps         %xmm9,%xmm9
6601  DB  65,15,92,193                        ; subps         %xmm9,%xmm0
6602  DB  65,15,40,200                        ; movaps        %xmm8,%xmm1
6603  DB  15,88,205                           ; addps         %xmm5,%xmm1
6604  DB  68,15,89,199                        ; mulps         %xmm7,%xmm8
6605  DB  68,15,40,203                        ; movaps        %xmm3,%xmm9
6606  DB  68,15,89,205                        ; mulps         %xmm5,%xmm9
6607  DB  69,15,93,193                        ; minps         %xmm9,%xmm8
6608  DB  69,15,88,192                        ; addps         %xmm8,%xmm8
6609  DB  65,15,92,200                        ; subps         %xmm8,%xmm1
6610  DB  68,15,40,194                        ; movaps        %xmm2,%xmm8
6611  DB  68,15,88,198                        ; addps         %xmm6,%xmm8
6612  DB  15,89,215                           ; mulps         %xmm7,%xmm2
6613  DB  68,15,40,203                        ; movaps        %xmm3,%xmm9
6614  DB  68,15,89,206                        ; mulps         %xmm6,%xmm9
6615  DB  65,15,93,209                        ; minps         %xmm9,%xmm2
6616  DB  15,88,210                           ; addps         %xmm2,%xmm2
6617  DB  68,15,92,194                        ; subps         %xmm2,%xmm8
6618  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
6619  DB  102,15,110,208                      ; movd          %eax,%xmm2
6620  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
6621  DB  15,92,211                           ; subps         %xmm3,%xmm2
6622  DB  15,89,215                           ; mulps         %xmm7,%xmm2
6623  DB  15,88,218                           ; addps         %xmm2,%xmm3
6624  DB  72,173                              ; lods          %ds:(%rsi),%rax
6625  DB  65,15,40,208                        ; movaps        %xmm8,%xmm2
6626  DB  255,224                             ; jmpq          *%rax
6627
6628PUBLIC _sk_exclusion_sse41
6629_sk_exclusion_sse41 LABEL PROC
6630  DB  68,15,40,193                        ; movaps        %xmm1,%xmm8
6631  DB  15,40,200                           ; movaps        %xmm0,%xmm1
6632  DB  15,88,196                           ; addps         %xmm4,%xmm0
6633  DB  15,89,204                           ; mulps         %xmm4,%xmm1
6634  DB  15,88,201                           ; addps         %xmm1,%xmm1
6635  DB  15,92,193                           ; subps         %xmm1,%xmm0
6636  DB  65,15,40,200                        ; movaps        %xmm8,%xmm1
6637  DB  15,88,205                           ; addps         %xmm5,%xmm1
6638  DB  68,15,89,197                        ; mulps         %xmm5,%xmm8
6639  DB  69,15,88,192                        ; addps         %xmm8,%xmm8
6640  DB  65,15,92,200                        ; subps         %xmm8,%xmm1
6641  DB  68,15,40,194                        ; movaps        %xmm2,%xmm8
6642  DB  68,15,88,198                        ; addps         %xmm6,%xmm8
6643  DB  15,89,214                           ; mulps         %xmm6,%xmm2
6644  DB  15,88,210                           ; addps         %xmm2,%xmm2
6645  DB  68,15,92,194                        ; subps         %xmm2,%xmm8
6646  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
6647  DB  102,15,110,208                      ; movd          %eax,%xmm2
6648  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
6649  DB  15,92,211                           ; subps         %xmm3,%xmm2
6650  DB  15,89,215                           ; mulps         %xmm7,%xmm2
6651  DB  15,88,218                           ; addps         %xmm2,%xmm3
6652  DB  72,173                              ; lods          %ds:(%rsi),%rax
6653  DB  65,15,40,208                        ; movaps        %xmm8,%xmm2
6654  DB  255,224                             ; jmpq          *%rax
6655
6656PUBLIC _sk_colorburn_sse41
6657_sk_colorburn_sse41 LABEL PROC
6658  DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
6659  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
6660  DB  102,68,15,110,208                   ; movd          %eax,%xmm10
6661  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
6662  DB  69,15,40,218                        ; movaps        %xmm10,%xmm11
6663  DB  68,15,92,223                        ; subps         %xmm7,%xmm11
6664  DB  69,15,40,227                        ; movaps        %xmm11,%xmm12
6665  DB  69,15,89,225                        ; mulps         %xmm9,%xmm12
6666  DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
6667  DB  15,40,199                           ; movaps        %xmm7,%xmm0
6668  DB  15,92,196                           ; subps         %xmm4,%xmm0
6669  DB  15,89,195                           ; mulps         %xmm3,%xmm0
6670  DB  65,15,94,193                        ; divps         %xmm9,%xmm0
6671  DB  68,15,40,239                        ; movaps        %xmm7,%xmm13
6672  DB  68,15,93,232                        ; minps         %xmm0,%xmm13
6673  DB  68,15,40,247                        ; movaps        %xmm7,%xmm14
6674  DB  69,15,92,245                        ; subps         %xmm13,%xmm14
6675  DB  65,15,40,193                        ; movaps        %xmm9,%xmm0
6676  DB  65,15,194,192,0                     ; cmpeqps       %xmm8,%xmm0
6677  DB  68,15,92,211                        ; subps         %xmm3,%xmm10
6678  DB  68,15,89,243                        ; mulps         %xmm3,%xmm14
6679  DB  69,15,88,244                        ; addps         %xmm12,%xmm14
6680  DB  102,69,15,56,20,241                 ; blendvps      %xmm0,%xmm9,%xmm14
6681  DB  69,15,40,202                        ; movaps        %xmm10,%xmm9
6682  DB  68,15,89,204                        ; mulps         %xmm4,%xmm9
6683  DB  68,15,88,228                        ; addps         %xmm4,%xmm12
6684  DB  15,40,196                           ; movaps        %xmm4,%xmm0
6685  DB  15,194,199,0                        ; cmpeqps       %xmm7,%xmm0
6686  DB  69,15,88,206                        ; addps         %xmm14,%xmm9
6687  DB  102,69,15,56,20,204                 ; blendvps      %xmm0,%xmm12,%xmm9
6688  DB  69,15,40,227                        ; movaps        %xmm11,%xmm12
6689  DB  68,15,89,225                        ; mulps         %xmm1,%xmm12
6690  DB  15,40,199                           ; movaps        %xmm7,%xmm0
6691  DB  15,92,197                           ; subps         %xmm5,%xmm0
6692  DB  15,89,195                           ; mulps         %xmm3,%xmm0
6693  DB  15,94,193                           ; divps         %xmm1,%xmm0
6694  DB  68,15,40,239                        ; movaps        %xmm7,%xmm13
6695  DB  68,15,93,232                        ; minps         %xmm0,%xmm13
6696  DB  68,15,40,247                        ; movaps        %xmm7,%xmm14
6697  DB  69,15,92,245                        ; subps         %xmm13,%xmm14
6698  DB  15,40,193                           ; movaps        %xmm1,%xmm0
6699  DB  65,15,194,192,0                     ; cmpeqps       %xmm8,%xmm0
6700  DB  68,15,89,243                        ; mulps         %xmm3,%xmm14
6701  DB  69,15,88,244                        ; addps         %xmm12,%xmm14
6702  DB  102,68,15,56,20,241                 ; blendvps      %xmm0,%xmm1,%xmm14
6703  DB  65,15,40,202                        ; movaps        %xmm10,%xmm1
6704  DB  15,89,205                           ; mulps         %xmm5,%xmm1
6705  DB  68,15,88,229                        ; addps         %xmm5,%xmm12
6706  DB  65,15,88,206                        ; addps         %xmm14,%xmm1
6707  DB  15,40,197                           ; movaps        %xmm5,%xmm0
6708  DB  15,194,199,0                        ; cmpeqps       %xmm7,%xmm0
6709  DB  102,65,15,56,20,204                 ; blendvps      %xmm0,%xmm12,%xmm1
6710  DB  15,40,199                           ; movaps        %xmm7,%xmm0
6711  DB  15,92,198                           ; subps         %xmm6,%xmm0
6712  DB  15,89,195                           ; mulps         %xmm3,%xmm0
6713  DB  15,94,194                           ; divps         %xmm2,%xmm0
6714  DB  68,15,40,231                        ; movaps        %xmm7,%xmm12
6715  DB  68,15,93,224                        ; minps         %xmm0,%xmm12
6716  DB  68,15,40,239                        ; movaps        %xmm7,%xmm13
6717  DB  69,15,92,236                        ; subps         %xmm12,%xmm13
6718  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
6719  DB  68,15,194,194,0                     ; cmpeqps       %xmm2,%xmm8
6720  DB  68,15,89,235                        ; mulps         %xmm3,%xmm13
6721  DB  69,15,88,235                        ; addps         %xmm11,%xmm13
6722  DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
6723  DB  102,68,15,56,20,234                 ; blendvps      %xmm0,%xmm2,%xmm13
6724  DB  68,15,88,222                        ; addps         %xmm6,%xmm11
6725  DB  65,15,40,210                        ; movaps        %xmm10,%xmm2
6726  DB  15,89,214                           ; mulps         %xmm6,%xmm2
6727  DB  65,15,88,213                        ; addps         %xmm13,%xmm2
6728  DB  15,40,198                           ; movaps        %xmm6,%xmm0
6729  DB  15,194,199,0                        ; cmpeqps       %xmm7,%xmm0
6730  DB  102,65,15,56,20,211                 ; blendvps      %xmm0,%xmm11,%xmm2
6731  DB  68,15,89,215                        ; mulps         %xmm7,%xmm10
6732  DB  65,15,88,218                        ; addps         %xmm10,%xmm3
6733  DB  72,173                              ; lods          %ds:(%rsi),%rax
6734  DB  65,15,40,193                        ; movaps        %xmm9,%xmm0
6735  DB  255,224                             ; jmpq          *%rax
6736
6737PUBLIC _sk_colordodge_sse41
6738_sk_colordodge_sse41 LABEL PROC
6739  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
6740  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
6741  DB  102,68,15,110,208                   ; movd          %eax,%xmm10
6742  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
6743  DB  69,15,40,218                        ; movaps        %xmm10,%xmm11
6744  DB  68,15,92,223                        ; subps         %xmm7,%xmm11
6745  DB  69,15,40,227                        ; movaps        %xmm11,%xmm12
6746  DB  69,15,89,224                        ; mulps         %xmm8,%xmm12
6747  DB  68,15,40,203                        ; movaps        %xmm3,%xmm9
6748  DB  68,15,89,204                        ; mulps         %xmm4,%xmm9
6749  DB  15,40,195                           ; movaps        %xmm3,%xmm0
6750  DB  65,15,92,192                        ; subps         %xmm8,%xmm0
6751  DB  68,15,94,200                        ; divps         %xmm0,%xmm9
6752  DB  68,15,40,239                        ; movaps        %xmm7,%xmm13
6753  DB  68,15,40,247                        ; movaps        %xmm7,%xmm14
6754  DB  69,15,93,241                        ; minps         %xmm9,%xmm14
6755  DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
6756  DB  15,194,195,0                        ; cmpeqps       %xmm3,%xmm0
6757  DB  68,15,89,243                        ; mulps         %xmm3,%xmm14
6758  DB  69,15,88,244                        ; addps         %xmm12,%xmm14
6759  DB  102,69,15,56,20,240                 ; blendvps      %xmm0,%xmm8,%xmm14
6760  DB  69,15,87,201                        ; xorps         %xmm9,%xmm9
6761  DB  68,15,92,211                        ; subps         %xmm3,%xmm10
6762  DB  69,15,40,194                        ; movaps        %xmm10,%xmm8
6763  DB  68,15,89,196                        ; mulps         %xmm4,%xmm8
6764  DB  68,15,88,228                        ; addps         %xmm4,%xmm12
6765  DB  69,15,88,198                        ; addps         %xmm14,%xmm8
6766  DB  15,40,196                           ; movaps        %xmm4,%xmm0
6767  DB  65,15,194,193,0                     ; cmpeqps       %xmm9,%xmm0
6768  DB  102,69,15,56,20,196                 ; blendvps      %xmm0,%xmm12,%xmm8
6769  DB  68,15,40,227                        ; movaps        %xmm3,%xmm12
6770  DB  68,15,89,229                        ; mulps         %xmm5,%xmm12
6771  DB  15,40,195                           ; movaps        %xmm3,%xmm0
6772  DB  15,92,193                           ; subps         %xmm1,%xmm0
6773  DB  68,15,94,224                        ; divps         %xmm0,%xmm12
6774  DB  69,15,40,243                        ; movaps        %xmm11,%xmm14
6775  DB  68,15,89,241                        ; mulps         %xmm1,%xmm14
6776  DB  69,15,93,236                        ; minps         %xmm12,%xmm13
6777  DB  15,40,193                           ; movaps        %xmm1,%xmm0
6778  DB  15,194,195,0                        ; cmpeqps       %xmm3,%xmm0
6779  DB  68,15,89,235                        ; mulps         %xmm3,%xmm13
6780  DB  69,15,88,238                        ; addps         %xmm14,%xmm13
6781  DB  102,68,15,56,20,233                 ; blendvps      %xmm0,%xmm1,%xmm13
6782  DB  65,15,40,202                        ; movaps        %xmm10,%xmm1
6783  DB  15,89,205                           ; mulps         %xmm5,%xmm1
6784  DB  68,15,88,245                        ; addps         %xmm5,%xmm14
6785  DB  65,15,88,205                        ; addps         %xmm13,%xmm1
6786  DB  15,40,197                           ; movaps        %xmm5,%xmm0
6787  DB  65,15,194,193,0                     ; cmpeqps       %xmm9,%xmm0
6788  DB  102,65,15,56,20,206                 ; blendvps      %xmm0,%xmm14,%xmm1
6789  DB  68,15,40,227                        ; movaps        %xmm3,%xmm12
6790  DB  68,15,89,230                        ; mulps         %xmm6,%xmm12
6791  DB  15,40,195                           ; movaps        %xmm3,%xmm0
6792  DB  15,92,194                           ; subps         %xmm2,%xmm0
6793  DB  68,15,94,224                        ; divps         %xmm0,%xmm12
6794  DB  68,15,40,239                        ; movaps        %xmm7,%xmm13
6795  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
6796  DB  69,15,93,236                        ; minps         %xmm12,%xmm13
6797  DB  15,40,194                           ; movaps        %xmm2,%xmm0
6798  DB  15,194,195,0                        ; cmpeqps       %xmm3,%xmm0
6799  DB  68,15,89,235                        ; mulps         %xmm3,%xmm13
6800  DB  69,15,88,235                        ; addps         %xmm11,%xmm13
6801  DB  102,68,15,56,20,234                 ; blendvps      %xmm0,%xmm2,%xmm13
6802  DB  65,15,40,210                        ; movaps        %xmm10,%xmm2
6803  DB  15,89,214                           ; mulps         %xmm6,%xmm2
6804  DB  65,15,88,213                        ; addps         %xmm13,%xmm2
6805  DB  68,15,194,206,0                     ; cmpeqps       %xmm6,%xmm9
6806  DB  68,15,88,222                        ; addps         %xmm6,%xmm11
6807  DB  65,15,40,193                        ; movaps        %xmm9,%xmm0
6808  DB  102,65,15,56,20,211                 ; blendvps      %xmm0,%xmm11,%xmm2
6809  DB  68,15,89,215                        ; mulps         %xmm7,%xmm10
6810  DB  65,15,88,218                        ; addps         %xmm10,%xmm3
6811  DB  72,173                              ; lods          %ds:(%rsi),%rax
6812  DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
6813  DB  255,224                             ; jmpq          *%rax
6814
6815PUBLIC _sk_hardlight_sse41
6816_sk_hardlight_sse41 LABEL PROC
6817  DB  72,131,236,24                       ; sub           $0x18,%rsp
6818  DB  15,41,52,36                         ; movaps        %xmm6,(%rsp)
6819  DB  68,15,40,229                        ; movaps        %xmm5,%xmm12
6820  DB  15,40,244                           ; movaps        %xmm4,%xmm6
6821  DB  15,40,227                           ; movaps        %xmm3,%xmm4
6822  DB  15,40,234                           ; movaps        %xmm2,%xmm5
6823  DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
6824  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
6825  DB  102,68,15,110,208                   ; movd          %eax,%xmm10
6826  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
6827  DB  65,15,40,210                        ; movaps        %xmm10,%xmm2
6828  DB  15,92,215                           ; subps         %xmm7,%xmm2
6829  DB  15,40,194                           ; movaps        %xmm2,%xmm0
6830  DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
6831  DB  68,15,92,212                        ; subps         %xmm4,%xmm10
6832  DB  69,15,40,194                        ; movaps        %xmm10,%xmm8
6833  DB  68,15,89,198                        ; mulps         %xmm6,%xmm8
6834  DB  68,15,88,192                        ; addps         %xmm0,%xmm8
6835  DB  68,15,40,252                        ; movaps        %xmm4,%xmm15
6836  DB  69,15,92,249                        ; subps         %xmm9,%xmm15
6837  DB  15,89,223                           ; mulps         %xmm7,%xmm3
6838  DB  68,15,40,239                        ; movaps        %xmm7,%xmm13
6839  DB  68,15,40,247                        ; movaps        %xmm7,%xmm14
6840  DB  15,40,199                           ; movaps        %xmm7,%xmm0
6841  DB  15,92,198                           ; subps         %xmm6,%xmm0
6842  DB  65,15,89,199                        ; mulps         %xmm15,%xmm0
6843  DB  15,88,192                           ; addps         %xmm0,%xmm0
6844  DB  68,15,40,251                        ; movaps        %xmm3,%xmm15
6845  DB  68,15,92,248                        ; subps         %xmm0,%xmm15
6846  DB  65,15,40,193                        ; movaps        %xmm9,%xmm0
6847  DB  15,88,192                           ; addps         %xmm0,%xmm0
6848  DB  15,194,196,2                        ; cmpleps       %xmm4,%xmm0
6849  DB  68,15,89,206                        ; mulps         %xmm6,%xmm9
6850  DB  69,15,88,201                        ; addps         %xmm9,%xmm9
6851  DB  102,69,15,56,20,249                 ; blendvps      %xmm0,%xmm9,%xmm15
6852  DB  68,15,40,218                        ; movaps        %xmm2,%xmm11
6853  DB  68,15,89,217                        ; mulps         %xmm1,%xmm11
6854  DB  15,40,196                           ; movaps        %xmm4,%xmm0
6855  DB  15,92,193                           ; subps         %xmm1,%xmm0
6856  DB  69,15,40,204                        ; movaps        %xmm12,%xmm9
6857  DB  69,15,92,233                        ; subps         %xmm9,%xmm13
6858  DB  68,15,89,232                        ; mulps         %xmm0,%xmm13
6859  DB  69,15,88,237                        ; addps         %xmm13,%xmm13
6860  DB  68,15,40,227                        ; movaps        %xmm3,%xmm12
6861  DB  69,15,92,229                        ; subps         %xmm13,%xmm12
6862  DB  15,40,193                           ; movaps        %xmm1,%xmm0
6863  DB  15,88,192                           ; addps         %xmm0,%xmm0
6864  DB  15,194,196,2                        ; cmpleps       %xmm4,%xmm0
6865  DB  65,15,89,201                        ; mulps         %xmm9,%xmm1
6866  DB  69,15,40,233                        ; movaps        %xmm9,%xmm13
6867  DB  15,88,201                           ; addps         %xmm1,%xmm1
6868  DB  102,68,15,56,20,225                 ; blendvps      %xmm0,%xmm1,%xmm12
6869  DB  65,15,40,202                        ; movaps        %xmm10,%xmm1
6870  DB  69,15,40,202                        ; movaps        %xmm10,%xmm9
6871  DB  68,15,89,215                        ; mulps         %xmm7,%xmm10
6872  DB  69,15,88,199                        ; addps         %xmm15,%xmm8
6873  DB  65,15,89,205                        ; mulps         %xmm13,%xmm1
6874  DB  65,15,88,203                        ; addps         %xmm11,%xmm1
6875  DB  65,15,88,204                        ; addps         %xmm12,%xmm1
6876  DB  15,89,213                           ; mulps         %xmm5,%xmm2
6877  DB  68,15,40,28,36                      ; movaps        (%rsp),%xmm11
6878  DB  69,15,89,203                        ; mulps         %xmm11,%xmm9
6879  DB  68,15,88,202                        ; addps         %xmm2,%xmm9
6880  DB  15,40,197                           ; movaps        %xmm5,%xmm0
6881  DB  15,88,192                           ; addps         %xmm0,%xmm0
6882  DB  15,194,196,2                        ; cmpleps       %xmm4,%xmm0
6883  DB  15,40,212                           ; movaps        %xmm4,%xmm2
6884  DB  15,92,213                           ; subps         %xmm5,%xmm2
6885  DB  65,15,89,235                        ; mulps         %xmm11,%xmm5
6886  DB  15,88,237                           ; addps         %xmm5,%xmm5
6887  DB  69,15,92,243                        ; subps         %xmm11,%xmm14
6888  DB  68,15,89,242                        ; mulps         %xmm2,%xmm14
6889  DB  69,15,88,246                        ; addps         %xmm14,%xmm14
6890  DB  65,15,92,222                        ; subps         %xmm14,%xmm3
6891  DB  102,15,56,20,221                    ; blendvps      %xmm0,%xmm5,%xmm3
6892  DB  68,15,88,203                        ; addps         %xmm3,%xmm9
6893  DB  65,15,88,226                        ; addps         %xmm10,%xmm4
6894  DB  72,173                              ; lods          %ds:(%rsi),%rax
6895  DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
6896  DB  65,15,40,209                        ; movaps        %xmm9,%xmm2
6897  DB  15,40,220                           ; movaps        %xmm4,%xmm3
6898  DB  15,40,230                           ; movaps        %xmm6,%xmm4
6899  DB  65,15,40,237                        ; movaps        %xmm13,%xmm5
6900  DB  65,15,40,243                        ; movaps        %xmm11,%xmm6
6901  DB  72,131,196,24                       ; add           $0x18,%rsp
6902  DB  255,224                             ; jmpq          *%rax
6903
6904PUBLIC _sk_overlay_sse41
6905_sk_overlay_sse41 LABEL PROC
6906  DB  68,15,40,201                        ; movaps        %xmm1,%xmm9
6907  DB  68,15,40,240                        ; movaps        %xmm0,%xmm14
6908  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
6909  DB  102,68,15,110,208                   ; movd          %eax,%xmm10
6910  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
6911  DB  69,15,40,218                        ; movaps        %xmm10,%xmm11
6912  DB  68,15,92,223                        ; subps         %xmm7,%xmm11
6913  DB  65,15,40,195                        ; movaps        %xmm11,%xmm0
6914  DB  65,15,89,198                        ; mulps         %xmm14,%xmm0
6915  DB  68,15,92,211                        ; subps         %xmm3,%xmm10
6916  DB  69,15,40,194                        ; movaps        %xmm10,%xmm8
6917  DB  68,15,89,196                        ; mulps         %xmm4,%xmm8
6918  DB  68,15,88,192                        ; addps         %xmm0,%xmm8
6919  DB  68,15,40,235                        ; movaps        %xmm3,%xmm13
6920  DB  69,15,92,238                        ; subps         %xmm14,%xmm13
6921  DB  68,15,89,244                        ; mulps         %xmm4,%xmm14
6922  DB  15,40,207                           ; movaps        %xmm7,%xmm1
6923  DB  15,92,204                           ; subps         %xmm4,%xmm1
6924  DB  15,40,196                           ; movaps        %xmm4,%xmm0
6925  DB  15,88,192                           ; addps         %xmm0,%xmm0
6926  DB  15,194,199,2                        ; cmpleps       %xmm7,%xmm0
6927  DB  69,15,88,246                        ; addps         %xmm14,%xmm14
6928  DB  68,15,40,227                        ; movaps        %xmm3,%xmm12
6929  DB  68,15,89,231                        ; mulps         %xmm7,%xmm12
6930  DB  65,15,89,205                        ; mulps         %xmm13,%xmm1
6931  DB  15,88,201                           ; addps         %xmm1,%xmm1
6932  DB  69,15,40,236                        ; movaps        %xmm12,%xmm13
6933  DB  68,15,92,233                        ; subps         %xmm1,%xmm13
6934  DB  102,69,15,56,20,238                 ; blendvps      %xmm0,%xmm14,%xmm13
6935  DB  69,15,88,197                        ; addps         %xmm13,%xmm8
6936  DB  65,15,40,195                        ; movaps        %xmm11,%xmm0
6937  DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
6938  DB  65,15,40,202                        ; movaps        %xmm10,%xmm1
6939  DB  15,89,205                           ; mulps         %xmm5,%xmm1
6940  DB  15,88,200                           ; addps         %xmm0,%xmm1
6941  DB  68,15,40,235                        ; movaps        %xmm3,%xmm13
6942  DB  69,15,92,233                        ; subps         %xmm9,%xmm13
6943  DB  68,15,89,205                        ; mulps         %xmm5,%xmm9
6944  DB  68,15,40,247                        ; movaps        %xmm7,%xmm14
6945  DB  68,15,92,245                        ; subps         %xmm5,%xmm14
6946  DB  15,40,197                           ; movaps        %xmm5,%xmm0
6947  DB  15,88,192                           ; addps         %xmm0,%xmm0
6948  DB  15,194,199,2                        ; cmpleps       %xmm7,%xmm0
6949  DB  69,15,88,201                        ; addps         %xmm9,%xmm9
6950  DB  69,15,89,245                        ; mulps         %xmm13,%xmm14
6951  DB  69,15,88,246                        ; addps         %xmm14,%xmm14
6952  DB  69,15,40,236                        ; movaps        %xmm12,%xmm13
6953  DB  69,15,92,238                        ; subps         %xmm14,%xmm13
6954  DB  102,69,15,56,20,233                 ; blendvps      %xmm0,%xmm9,%xmm13
6955  DB  65,15,88,205                        ; addps         %xmm13,%xmm1
6956  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
6957  DB  69,15,40,202                        ; movaps        %xmm10,%xmm9
6958  DB  68,15,89,206                        ; mulps         %xmm6,%xmm9
6959  DB  69,15,88,203                        ; addps         %xmm11,%xmm9
6960  DB  68,15,40,219                        ; movaps        %xmm3,%xmm11
6961  DB  68,15,92,218                        ; subps         %xmm2,%xmm11
6962  DB  15,89,214                           ; mulps         %xmm6,%xmm2
6963  DB  68,15,40,239                        ; movaps        %xmm7,%xmm13
6964  DB  68,15,92,238                        ; subps         %xmm6,%xmm13
6965  DB  15,40,198                           ; movaps        %xmm6,%xmm0
6966  DB  15,88,192                           ; addps         %xmm0,%xmm0
6967  DB  15,194,199,2                        ; cmpleps       %xmm7,%xmm0
6968  DB  15,88,210                           ; addps         %xmm2,%xmm2
6969  DB  69,15,89,235                        ; mulps         %xmm11,%xmm13
6970  DB  69,15,88,237                        ; addps         %xmm13,%xmm13
6971  DB  69,15,92,229                        ; subps         %xmm13,%xmm12
6972  DB  102,68,15,56,20,226                 ; blendvps      %xmm0,%xmm2,%xmm12
6973  DB  69,15,88,204                        ; addps         %xmm12,%xmm9
6974  DB  68,15,89,215                        ; mulps         %xmm7,%xmm10
6975  DB  65,15,88,218                        ; addps         %xmm10,%xmm3
6976  DB  72,173                              ; lods          %ds:(%rsi),%rax
6977  DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
6978  DB  65,15,40,209                        ; movaps        %xmm9,%xmm2
6979  DB  255,224                             ; jmpq          *%rax
6980
6981PUBLIC _sk_softlight_sse41
6982_sk_softlight_sse41 LABEL PROC
6983  DB  72,131,236,56                       ; sub           $0x38,%rsp
6984  DB  15,41,116,36,16                     ; movaps        %xmm6,0x10(%rsp)
6985  DB  15,40,244                           ; movaps        %xmm4,%xmm6
6986  DB  15,41,84,36,32                      ; movaps        %xmm2,0x20(%rsp)
6987  DB  68,15,40,225                        ; movaps        %xmm1,%xmm12
6988  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
6989  DB  15,87,228                           ; xorps         %xmm4,%xmm4
6990  DB  15,194,231,1                        ; cmpltps       %xmm7,%xmm4
6991  DB  15,40,198                           ; movaps        %xmm6,%xmm0
6992  DB  15,94,199                           ; divps         %xmm7,%xmm0
6993  DB  15,84,196                           ; andps         %xmm4,%xmm0
6994  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
6995  DB  102,15,110,200                      ; movd          %eax,%xmm1
6996  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
6997  DB  68,15,40,209                        ; movaps        %xmm1,%xmm10
6998  DB  68,15,92,208                        ; subps         %xmm0,%xmm10
6999  DB  68,15,40,240                        ; movaps        %xmm0,%xmm14
7000  DB  68,15,40,248                        ; movaps        %xmm0,%xmm15
7001  DB  15,82,208                           ; rsqrtps       %xmm0,%xmm2
7002  DB  68,15,83,218                        ; rcpps         %xmm2,%xmm11
7003  DB  68,15,92,216                        ; subps         %xmm0,%xmm11
7004  DB  15,88,192                           ; addps         %xmm0,%xmm0
7005  DB  15,88,192                           ; addps         %xmm0,%xmm0
7006  DB  15,40,208                           ; movaps        %xmm0,%xmm2
7007  DB  15,89,210                           ; mulps         %xmm2,%xmm2
7008  DB  15,88,208                           ; addps         %xmm0,%xmm2
7009  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
7010  DB  69,15,88,201                        ; addps         %xmm9,%xmm9
7011  DB  68,15,92,241                        ; subps         %xmm1,%xmm14
7012  DB  68,15,89,242                        ; mulps         %xmm2,%xmm14
7013  DB  184,0,0,224,64                      ; mov           $0x40e00000,%eax
7014  DB  102,68,15,110,232                   ; movd          %eax,%xmm13
7015  DB  69,15,198,237,0                     ; shufps        $0x0,%xmm13,%xmm13
7016  DB  69,15,89,253                        ; mulps         %xmm13,%xmm15
7017  DB  69,15,88,254                        ; addps         %xmm14,%xmm15
7018  DB  15,40,198                           ; movaps        %xmm6,%xmm0
7019  DB  15,88,192                           ; addps         %xmm0,%xmm0
7020  DB  15,88,192                           ; addps         %xmm0,%xmm0
7021  DB  15,194,199,2                        ; cmpleps       %xmm7,%xmm0
7022  DB  102,69,15,56,20,223                 ; blendvps      %xmm0,%xmm15,%xmm11
7023  DB  65,15,40,193                        ; movaps        %xmm9,%xmm0
7024  DB  15,92,195                           ; subps         %xmm3,%xmm0
7025  DB  68,15,89,208                        ; mulps         %xmm0,%xmm10
7026  DB  68,15,88,211                        ; addps         %xmm3,%xmm10
7027  DB  68,15,89,214                        ; mulps         %xmm6,%xmm10
7028  DB  15,40,211                           ; movaps        %xmm3,%xmm2
7029  DB  15,89,214                           ; mulps         %xmm6,%xmm2
7030  DB  15,89,199                           ; mulps         %xmm7,%xmm0
7031  DB  68,15,89,216                        ; mulps         %xmm0,%xmm11
7032  DB  68,15,88,218                        ; addps         %xmm2,%xmm11
7033  DB  68,15,194,203,2                     ; cmpleps       %xmm3,%xmm9
7034  DB  65,15,40,193                        ; movaps        %xmm9,%xmm0
7035  DB  102,69,15,56,20,218                 ; blendvps      %xmm0,%xmm10,%xmm11
7036  DB  68,15,40,213                        ; movaps        %xmm5,%xmm10
7037  DB  68,15,94,215                        ; divps         %xmm7,%xmm10
7038  DB  68,15,84,212                        ; andps         %xmm4,%xmm10
7039  DB  68,15,40,201                        ; movaps        %xmm1,%xmm9
7040  DB  69,15,92,202                        ; subps         %xmm10,%xmm9
7041  DB  65,15,40,210                        ; movaps        %xmm10,%xmm2
7042  DB  15,88,210                           ; addps         %xmm2,%xmm2
7043  DB  15,88,210                           ; addps         %xmm2,%xmm2
7044  DB  15,40,194                           ; movaps        %xmm2,%xmm0
7045  DB  15,89,192                           ; mulps         %xmm0,%xmm0
7046  DB  15,88,194                           ; addps         %xmm2,%xmm0
7047  DB  65,15,40,210                        ; movaps        %xmm10,%xmm2
7048  DB  15,92,209                           ; subps         %xmm1,%xmm2
7049  DB  15,89,208                           ; mulps         %xmm0,%xmm2
7050  DB  65,15,82,194                        ; rsqrtps       %xmm10,%xmm0
7051  DB  68,15,83,240                        ; rcpps         %xmm0,%xmm14
7052  DB  69,15,92,242                        ; subps         %xmm10,%xmm14
7053  DB  69,15,89,213                        ; mulps         %xmm13,%xmm10
7054  DB  68,15,88,210                        ; addps         %xmm2,%xmm10
7055  DB  15,40,197                           ; movaps        %xmm5,%xmm0
7056  DB  15,88,192                           ; addps         %xmm0,%xmm0
7057  DB  15,88,192                           ; addps         %xmm0,%xmm0
7058  DB  15,194,199,2                        ; cmpleps       %xmm7,%xmm0
7059  DB  102,69,15,56,20,242                 ; blendvps      %xmm0,%xmm10,%xmm14
7060  DB  65,15,40,196                        ; movaps        %xmm12,%xmm0
7061  DB  15,88,192                           ; addps         %xmm0,%xmm0
7062  DB  15,40,208                           ; movaps        %xmm0,%xmm2
7063  DB  15,92,211                           ; subps         %xmm3,%xmm2
7064  DB  68,15,89,202                        ; mulps         %xmm2,%xmm9
7065  DB  68,15,88,203                        ; addps         %xmm3,%xmm9
7066  DB  15,41,44,36                         ; movaps        %xmm5,(%rsp)
7067  DB  68,15,89,205                        ; mulps         %xmm5,%xmm9
7068  DB  15,89,215                           ; mulps         %xmm7,%xmm2
7069  DB  68,15,89,242                        ; mulps         %xmm2,%xmm14
7070  DB  15,40,211                           ; movaps        %xmm3,%xmm2
7071  DB  15,89,213                           ; mulps         %xmm5,%xmm2
7072  DB  68,15,88,242                        ; addps         %xmm2,%xmm14
7073  DB  68,15,40,249                        ; movaps        %xmm1,%xmm15
7074  DB  15,194,195,2                        ; cmpleps       %xmm3,%xmm0
7075  DB  102,69,15,56,20,241                 ; blendvps      %xmm0,%xmm9,%xmm14
7076  DB  68,15,40,209                        ; movaps        %xmm1,%xmm10
7077  DB  15,40,108,36,16                     ; movaps        0x10(%rsp),%xmm5
7078  DB  15,40,197                           ; movaps        %xmm5,%xmm0
7079  DB  15,94,199                           ; divps         %xmm7,%xmm0
7080  DB  15,84,196                           ; andps         %xmm4,%xmm0
7081  DB  15,40,208                           ; movaps        %xmm0,%xmm2
7082  DB  15,92,209                           ; subps         %xmm1,%xmm2
7083  DB  15,92,200                           ; subps         %xmm0,%xmm1
7084  DB  68,15,89,232                        ; mulps         %xmm0,%xmm13
7085  DB  15,82,224                           ; rsqrtps       %xmm0,%xmm4
7086  DB  68,15,83,204                        ; rcpps         %xmm4,%xmm9
7087  DB  68,15,92,200                        ; subps         %xmm0,%xmm9
7088  DB  15,88,192                           ; addps         %xmm0,%xmm0
7089  DB  15,88,192                           ; addps         %xmm0,%xmm0
7090  DB  15,40,224                           ; movaps        %xmm0,%xmm4
7091  DB  15,89,228                           ; mulps         %xmm4,%xmm4
7092  DB  15,88,224                           ; addps         %xmm0,%xmm4
7093  DB  15,89,226                           ; mulps         %xmm2,%xmm4
7094  DB  68,15,88,236                        ; addps         %xmm4,%xmm13
7095  DB  15,40,197                           ; movaps        %xmm5,%xmm0
7096  DB  15,88,192                           ; addps         %xmm0,%xmm0
7097  DB  15,88,192                           ; addps         %xmm0,%xmm0
7098  DB  15,194,199,2                        ; cmpleps       %xmm7,%xmm0
7099  DB  102,69,15,56,20,205                 ; blendvps      %xmm0,%xmm13,%xmm9
7100  DB  68,15,40,108,36,32                  ; movaps        0x20(%rsp),%xmm13
7101  DB  65,15,40,197                        ; movaps        %xmm13,%xmm0
7102  DB  15,88,192                           ; addps         %xmm0,%xmm0
7103  DB  15,40,208                           ; movaps        %xmm0,%xmm2
7104  DB  15,92,211                           ; subps         %xmm3,%xmm2
7105  DB  15,89,202                           ; mulps         %xmm2,%xmm1
7106  DB  15,89,215                           ; mulps         %xmm7,%xmm2
7107  DB  68,15,89,202                        ; mulps         %xmm2,%xmm9
7108  DB  15,40,211                           ; movaps        %xmm3,%xmm2
7109  DB  15,89,213                           ; mulps         %xmm5,%xmm2
7110  DB  68,15,88,202                        ; addps         %xmm2,%xmm9
7111  DB  15,88,203                           ; addps         %xmm3,%xmm1
7112  DB  15,89,205                           ; mulps         %xmm5,%xmm1
7113  DB  15,40,213                           ; movaps        %xmm5,%xmm2
7114  DB  15,194,195,2                        ; cmpleps       %xmm3,%xmm0
7115  DB  102,68,15,56,20,201                 ; blendvps      %xmm0,%xmm1,%xmm9
7116  DB  68,15,92,255                        ; subps         %xmm7,%xmm15
7117  DB  69,15,89,199                        ; mulps         %xmm15,%xmm8
7118  DB  69,15,89,231                        ; mulps         %xmm15,%xmm12
7119  DB  69,15,89,253                        ; mulps         %xmm13,%xmm15
7120  DB  68,15,92,211                        ; subps         %xmm3,%xmm10
7121  DB  65,15,40,194                        ; movaps        %xmm10,%xmm0
7122  DB  15,89,198                           ; mulps         %xmm6,%xmm0
7123  DB  68,15,88,192                        ; addps         %xmm0,%xmm8
7124  DB  69,15,88,195                        ; addps         %xmm11,%xmm8
7125  DB  65,15,40,194                        ; movaps        %xmm10,%xmm0
7126  DB  15,40,44,36                         ; movaps        (%rsp),%xmm5
7127  DB  15,89,197                           ; mulps         %xmm5,%xmm0
7128  DB  68,15,88,224                        ; addps         %xmm0,%xmm12
7129  DB  69,15,88,230                        ; addps         %xmm14,%xmm12
7130  DB  65,15,40,194                        ; movaps        %xmm10,%xmm0
7131  DB  15,89,194                           ; mulps         %xmm2,%xmm0
7132  DB  65,15,88,199                        ; addps         %xmm15,%xmm0
7133  DB  68,15,88,200                        ; addps         %xmm0,%xmm9
7134  DB  68,15,89,215                        ; mulps         %xmm7,%xmm10
7135  DB  65,15,88,218                        ; addps         %xmm10,%xmm3
7136  DB  72,173                              ; lods          %ds:(%rsi),%rax
7137  DB  15,40,230                           ; movaps        %xmm6,%xmm4
7138  DB  15,40,242                           ; movaps        %xmm2,%xmm6
7139  DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
7140  DB  65,15,40,204                        ; movaps        %xmm12,%xmm1
7141  DB  65,15,40,209                        ; movaps        %xmm9,%xmm2
7142  DB  72,131,196,56                       ; add           $0x38,%rsp
7143  DB  255,224                             ; jmpq          *%rax
7144
7145PUBLIC _sk_clamp_0_sse41
7146_sk_clamp_0_sse41 LABEL PROC
7147  DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
7148  DB  65,15,95,192                        ; maxps         %xmm8,%xmm0
7149  DB  65,15,95,200                        ; maxps         %xmm8,%xmm1
7150  DB  65,15,95,208                        ; maxps         %xmm8,%xmm2
7151  DB  65,15,95,216                        ; maxps         %xmm8,%xmm3
7152  DB  72,173                              ; lods          %ds:(%rsi),%rax
7153  DB  255,224                             ; jmpq          *%rax
7154
7155PUBLIC _sk_clamp_1_sse41
7156_sk_clamp_1_sse41 LABEL PROC
7157  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
7158  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
7159  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
7160  DB  65,15,93,192                        ; minps         %xmm8,%xmm0
7161  DB  65,15,93,200                        ; minps         %xmm8,%xmm1
7162  DB  65,15,93,208                        ; minps         %xmm8,%xmm2
7163  DB  65,15,93,216                        ; minps         %xmm8,%xmm3
7164  DB  72,173                              ; lods          %ds:(%rsi),%rax
7165  DB  255,224                             ; jmpq          *%rax
7166
7167PUBLIC _sk_clamp_a_sse41
7168_sk_clamp_a_sse41 LABEL PROC
7169  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
7170  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
7171  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
7172  DB  65,15,93,216                        ; minps         %xmm8,%xmm3
7173  DB  15,93,195                           ; minps         %xmm3,%xmm0
7174  DB  15,93,203                           ; minps         %xmm3,%xmm1
7175  DB  15,93,211                           ; minps         %xmm3,%xmm2
7176  DB  72,173                              ; lods          %ds:(%rsi),%rax
7177  DB  255,224                             ; jmpq          *%rax
7178
7179PUBLIC _sk_set_rgb_sse41
7180_sk_set_rgb_sse41 LABEL PROC
7181  DB  72,173                              ; lods          %ds:(%rsi),%rax
7182  DB  243,15,16,0                         ; movss         (%rax),%xmm0
7183  DB  243,15,16,72,4                      ; movss         0x4(%rax),%xmm1
7184  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
7185  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
7186  DB  243,15,16,80,8                      ; movss         0x8(%rax),%xmm2
7187  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
7188  DB  72,173                              ; lods          %ds:(%rsi),%rax
7189  DB  255,224                             ; jmpq          *%rax
7190
7191PUBLIC _sk_swap_rb_sse41
7192_sk_swap_rb_sse41 LABEL PROC
7193  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
7194  DB  72,173                              ; lods          %ds:(%rsi),%rax
7195  DB  15,40,194                           ; movaps        %xmm2,%xmm0
7196  DB  65,15,40,208                        ; movaps        %xmm8,%xmm2
7197  DB  255,224                             ; jmpq          *%rax
7198
7199PUBLIC _sk_swap_sse41
7200_sk_swap_sse41 LABEL PROC
7201  DB  68,15,40,195                        ; movaps        %xmm3,%xmm8
7202  DB  68,15,40,202                        ; movaps        %xmm2,%xmm9
7203  DB  68,15,40,209                        ; movaps        %xmm1,%xmm10
7204  DB  68,15,40,216                        ; movaps        %xmm0,%xmm11
7205  DB  72,173                              ; lods          %ds:(%rsi),%rax
7206  DB  15,40,196                           ; movaps        %xmm4,%xmm0
7207  DB  15,40,205                           ; movaps        %xmm5,%xmm1
7208  DB  15,40,214                           ; movaps        %xmm6,%xmm2
7209  DB  15,40,223                           ; movaps        %xmm7,%xmm3
7210  DB  65,15,40,227                        ; movaps        %xmm11,%xmm4
7211  DB  65,15,40,234                        ; movaps        %xmm10,%xmm5
7212  DB  65,15,40,241                        ; movaps        %xmm9,%xmm6
7213  DB  65,15,40,248                        ; movaps        %xmm8,%xmm7
7214  DB  255,224                             ; jmpq          *%rax
7215
7216PUBLIC _sk_move_src_dst_sse41
7217_sk_move_src_dst_sse41 LABEL PROC
7218  DB  72,173                              ; lods          %ds:(%rsi),%rax
7219  DB  15,40,224                           ; movaps        %xmm0,%xmm4
7220  DB  15,40,233                           ; movaps        %xmm1,%xmm5
7221  DB  15,40,242                           ; movaps        %xmm2,%xmm6
7222  DB  15,40,251                           ; movaps        %xmm3,%xmm7
7223  DB  255,224                             ; jmpq          *%rax
7224
7225PUBLIC _sk_move_dst_src_sse41
7226_sk_move_dst_src_sse41 LABEL PROC
7227  DB  72,173                              ; lods          %ds:(%rsi),%rax
7228  DB  15,40,196                           ; movaps        %xmm4,%xmm0
7229  DB  15,40,205                           ; movaps        %xmm5,%xmm1
7230  DB  15,40,214                           ; movaps        %xmm6,%xmm2
7231  DB  15,40,223                           ; movaps        %xmm7,%xmm3
7232  DB  255,224                             ; jmpq          *%rax
7233
7234PUBLIC _sk_premul_sse41
7235_sk_premul_sse41 LABEL PROC
7236  DB  15,89,195                           ; mulps         %xmm3,%xmm0
7237  DB  15,89,203                           ; mulps         %xmm3,%xmm1
7238  DB  15,89,211                           ; mulps         %xmm3,%xmm2
7239  DB  72,173                              ; lods          %ds:(%rsi),%rax
7240  DB  255,224                             ; jmpq          *%rax
7241
7242PUBLIC _sk_unpremul_sse41
7243_sk_unpremul_sse41 LABEL PROC
7244  DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
7245  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
7246  DB  102,68,15,110,200                   ; movd          %eax,%xmm9
7247  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
7248  DB  68,15,94,203                        ; divps         %xmm3,%xmm9
7249  DB  68,15,194,195,4                     ; cmpneqps      %xmm3,%xmm8
7250  DB  69,15,84,193                        ; andps         %xmm9,%xmm8
7251  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
7252  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
7253  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
7254  DB  72,173                              ; lods          %ds:(%rsi),%rax
7255  DB  255,224                             ; jmpq          *%rax
7256
7257PUBLIC _sk_from_srgb_sse41
7258_sk_from_srgb_sse41 LABEL PROC
7259  DB  184,145,131,158,61                  ; mov           $0x3d9e8391,%eax
7260  DB  102,68,15,110,216                   ; movd          %eax,%xmm11
7261  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
7262  DB  69,15,40,211                        ; movaps        %xmm11,%xmm10
7263  DB  68,15,89,208                        ; mulps         %xmm0,%xmm10
7264  DB  68,15,40,240                        ; movaps        %xmm0,%xmm14
7265  DB  69,15,89,246                        ; mulps         %xmm14,%xmm14
7266  DB  184,154,153,153,62                  ; mov           $0x3e99999a,%eax
7267  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
7268  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
7269  DB  184,92,143,50,63                    ; mov           $0x3f328f5c,%eax
7270  DB  102,68,15,110,224                   ; movd          %eax,%xmm12
7271  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
7272  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
7273  DB  68,15,89,200                        ; mulps         %xmm0,%xmm9
7274  DB  69,15,88,204                        ; addps         %xmm12,%xmm9
7275  DB  184,10,215,35,59                    ; mov           $0x3b23d70a,%eax
7276  DB  102,68,15,110,232                   ; movd          %eax,%xmm13
7277  DB  69,15,198,237,0                     ; shufps        $0x0,%xmm13,%xmm13
7278  DB  69,15,89,206                        ; mulps         %xmm14,%xmm9
7279  DB  69,15,88,205                        ; addps         %xmm13,%xmm9
7280  DB  184,174,71,97,61                    ; mov           $0x3d6147ae,%eax
7281  DB  102,68,15,110,240                   ; movd          %eax,%xmm14
7282  DB  69,15,198,246,0                     ; shufps        $0x0,%xmm14,%xmm14
7283  DB  65,15,194,198,1                     ; cmpltps       %xmm14,%xmm0
7284  DB  102,69,15,56,20,202                 ; blendvps      %xmm0,%xmm10,%xmm9
7285  DB  69,15,40,251                        ; movaps        %xmm11,%xmm15
7286  DB  68,15,89,249                        ; mulps         %xmm1,%xmm15
7287  DB  15,40,193                           ; movaps        %xmm1,%xmm0
7288  DB  15,89,192                           ; mulps         %xmm0,%xmm0
7289  DB  69,15,40,208                        ; movaps        %xmm8,%xmm10
7290  DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
7291  DB  69,15,88,212                        ; addps         %xmm12,%xmm10
7292  DB  68,15,89,208                        ; mulps         %xmm0,%xmm10
7293  DB  69,15,88,213                        ; addps         %xmm13,%xmm10
7294  DB  65,15,194,206,1                     ; cmpltps       %xmm14,%xmm1
7295  DB  15,40,193                           ; movaps        %xmm1,%xmm0
7296  DB  102,69,15,56,20,215                 ; blendvps      %xmm0,%xmm15,%xmm10
7297  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
7298  DB  15,40,194                           ; movaps        %xmm2,%xmm0
7299  DB  15,89,192                           ; mulps         %xmm0,%xmm0
7300  DB  68,15,89,194                        ; mulps         %xmm2,%xmm8
7301  DB  69,15,88,196                        ; addps         %xmm12,%xmm8
7302  DB  68,15,89,192                        ; mulps         %xmm0,%xmm8
7303  DB  69,15,88,197                        ; addps         %xmm13,%xmm8
7304  DB  65,15,194,214,1                     ; cmpltps       %xmm14,%xmm2
7305  DB  15,40,194                           ; movaps        %xmm2,%xmm0
7306  DB  102,69,15,56,20,195                 ; blendvps      %xmm0,%xmm11,%xmm8
7307  DB  72,173                              ; lods          %ds:(%rsi),%rax
7308  DB  65,15,40,193                        ; movaps        %xmm9,%xmm0
7309  DB  65,15,40,202                        ; movaps        %xmm10,%xmm1
7310  DB  65,15,40,208                        ; movaps        %xmm8,%xmm2
7311  DB  255,224                             ; jmpq          *%rax
7312
7313PUBLIC _sk_to_srgb_sse41
7314_sk_to_srgb_sse41 LABEL PROC
7315  DB  72,131,236,24                       ; sub           $0x18,%rsp
7316  DB  15,41,60,36                         ; movaps        %xmm7,(%rsp)
7317  DB  15,40,254                           ; movaps        %xmm6,%xmm7
7318  DB  15,40,245                           ; movaps        %xmm5,%xmm6
7319  DB  15,40,236                           ; movaps        %xmm4,%xmm5
7320  DB  15,40,227                           ; movaps        %xmm3,%xmm4
7321  DB  15,40,218                           ; movaps        %xmm2,%xmm3
7322  DB  15,40,209                           ; movaps        %xmm1,%xmm2
7323  DB  68,15,82,192                        ; rsqrtps       %xmm0,%xmm8
7324  DB  69,15,83,200                        ; rcpps         %xmm8,%xmm9
7325  DB  69,15,82,248                        ; rsqrtps       %xmm8,%xmm15
7326  DB  184,41,92,71,65                     ; mov           $0x41475c29,%eax
7327  DB  102,68,15,110,216                   ; movd          %eax,%xmm11
7328  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
7329  DB  69,15,40,211                        ; movaps        %xmm11,%xmm10
7330  DB  68,15,89,208                        ; mulps         %xmm0,%xmm10
7331  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
7332  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
7333  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
7334  DB  184,194,135,210,62                  ; mov           $0x3ed287c2,%eax
7335  DB  102,68,15,110,224                   ; movd          %eax,%xmm12
7336  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
7337  DB  184,206,111,48,63                   ; mov           $0x3f306fce,%eax
7338  DB  102,68,15,110,232                   ; movd          %eax,%xmm13
7339  DB  69,15,198,237,0                     ; shufps        $0x0,%xmm13,%xmm13
7340  DB  184,168,87,202,61                   ; mov           $0x3dca57a8,%eax
7341  DB  53,0,0,0,128                        ; xor           $0x80000000,%eax
7342  DB  102,68,15,110,240                   ; movd          %eax,%xmm14
7343  DB  69,15,198,246,0                     ; shufps        $0x0,%xmm14,%xmm14
7344  DB  69,15,89,205                        ; mulps         %xmm13,%xmm9
7345  DB  69,15,88,206                        ; addps         %xmm14,%xmm9
7346  DB  69,15,89,252                        ; mulps         %xmm12,%xmm15
7347  DB  69,15,88,249                        ; addps         %xmm9,%xmm15
7348  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
7349  DB  69,15,93,207                        ; minps         %xmm15,%xmm9
7350  DB  184,4,231,140,59                    ; mov           $0x3b8ce704,%eax
7351  DB  102,68,15,110,248                   ; movd          %eax,%xmm15
7352  DB  69,15,198,255,0                     ; shufps        $0x0,%xmm15,%xmm15
7353  DB  65,15,194,199,1                     ; cmpltps       %xmm15,%xmm0
7354  DB  102,69,15,56,20,202                 ; blendvps      %xmm0,%xmm10,%xmm9
7355  DB  68,15,82,210                        ; rsqrtps       %xmm2,%xmm10
7356  DB  65,15,83,194                        ; rcpps         %xmm10,%xmm0
7357  DB  69,15,82,210                        ; rsqrtps       %xmm10,%xmm10
7358  DB  65,15,89,197                        ; mulps         %xmm13,%xmm0
7359  DB  65,15,88,198                        ; addps         %xmm14,%xmm0
7360  DB  69,15,89,212                        ; mulps         %xmm12,%xmm10
7361  DB  68,15,88,208                        ; addps         %xmm0,%xmm10
7362  DB  65,15,40,200                        ; movaps        %xmm8,%xmm1
7363  DB  65,15,93,202                        ; minps         %xmm10,%xmm1
7364  DB  69,15,40,211                        ; movaps        %xmm11,%xmm10
7365  DB  68,15,89,210                        ; mulps         %xmm2,%xmm10
7366  DB  65,15,194,215,1                     ; cmpltps       %xmm15,%xmm2
7367  DB  15,40,194                           ; movaps        %xmm2,%xmm0
7368  DB  102,65,15,56,20,202                 ; blendvps      %xmm0,%xmm10,%xmm1
7369  DB  15,82,195                           ; rsqrtps       %xmm3,%xmm0
7370  DB  15,83,208                           ; rcpps         %xmm0,%xmm2
7371  DB  65,15,89,213                        ; mulps         %xmm13,%xmm2
7372  DB  65,15,88,214                        ; addps         %xmm14,%xmm2
7373  DB  15,82,192                           ; rsqrtps       %xmm0,%xmm0
7374  DB  65,15,89,196                        ; mulps         %xmm12,%xmm0
7375  DB  15,88,194                           ; addps         %xmm2,%xmm0
7376  DB  68,15,93,192                        ; minps         %xmm0,%xmm8
7377  DB  68,15,89,219                        ; mulps         %xmm3,%xmm11
7378  DB  65,15,194,223,1                     ; cmpltps       %xmm15,%xmm3
7379  DB  15,40,195                           ; movaps        %xmm3,%xmm0
7380  DB  102,69,15,56,20,195                 ; blendvps      %xmm0,%xmm11,%xmm8
7381  DB  72,173                              ; lods          %ds:(%rsi),%rax
7382  DB  65,15,40,193                        ; movaps        %xmm9,%xmm0
7383  DB  65,15,40,208                        ; movaps        %xmm8,%xmm2
7384  DB  15,40,220                           ; movaps        %xmm4,%xmm3
7385  DB  15,40,229                           ; movaps        %xmm5,%xmm4
7386  DB  15,40,238                           ; movaps        %xmm6,%xmm5
7387  DB  15,40,247                           ; movaps        %xmm7,%xmm6
7388  DB  15,40,60,36                         ; movaps        (%rsp),%xmm7
7389  DB  72,131,196,24                       ; add           $0x18,%rsp
7390  DB  255,224                             ; jmpq          *%rax
7391
7392PUBLIC _sk_from_2dot2_sse41
7393_sk_from_2dot2_sse41 LABEL PROC
7394  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
7395  DB  65,15,82,192                        ; rsqrtps       %xmm8,%xmm0
7396  DB  15,82,192                           ; rsqrtps       %xmm0,%xmm0
7397  DB  15,82,192                           ; rsqrtps       %xmm0,%xmm0
7398  DB  68,15,82,200                        ; rsqrtps       %xmm0,%xmm9
7399  DB  65,15,82,193                        ; rsqrtps       %xmm9,%xmm0
7400  DB  68,15,82,208                        ; rsqrtps       %xmm0,%xmm10
7401  DB  69,15,89,192                        ; mulps         %xmm8,%xmm8
7402  DB  65,15,40,193                        ; movaps        %xmm9,%xmm0
7403  DB  15,89,192                           ; mulps         %xmm0,%xmm0
7404  DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
7405  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
7406  DB  65,15,89,194                        ; mulps         %xmm10,%xmm0
7407  DB  69,15,87,210                        ; xorps         %xmm10,%xmm10
7408  DB  65,15,95,194                        ; maxps         %xmm10,%xmm0
7409  DB  68,15,82,193                        ; rsqrtps       %xmm1,%xmm8
7410  DB  69,15,82,192                        ; rsqrtps       %xmm8,%xmm8
7411  DB  69,15,82,192                        ; rsqrtps       %xmm8,%xmm8
7412  DB  69,15,82,200                        ; rsqrtps       %xmm8,%xmm9
7413  DB  69,15,82,193                        ; rsqrtps       %xmm9,%xmm8
7414  DB  69,15,82,216                        ; rsqrtps       %xmm8,%xmm11
7415  DB  15,89,201                           ; mulps         %xmm1,%xmm1
7416  DB  69,15,40,193                        ; movaps        %xmm9,%xmm8
7417  DB  69,15,89,192                        ; mulps         %xmm8,%xmm8
7418  DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
7419  DB  68,15,89,193                        ; mulps         %xmm1,%xmm8
7420  DB  69,15,89,195                        ; mulps         %xmm11,%xmm8
7421  DB  69,15,95,194                        ; maxps         %xmm10,%xmm8
7422  DB  15,82,202                           ; rsqrtps       %xmm2,%xmm1
7423  DB  15,82,201                           ; rsqrtps       %xmm1,%xmm1
7424  DB  15,82,201                           ; rsqrtps       %xmm1,%xmm1
7425  DB  68,15,82,217                        ; rsqrtps       %xmm1,%xmm11
7426  DB  65,15,82,203                        ; rsqrtps       %xmm11,%xmm1
7427  DB  15,82,201                           ; rsqrtps       %xmm1,%xmm1
7428  DB  15,89,210                           ; mulps         %xmm2,%xmm2
7429  DB  69,15,40,203                        ; movaps        %xmm11,%xmm9
7430  DB  69,15,89,201                        ; mulps         %xmm9,%xmm9
7431  DB  69,15,89,203                        ; mulps         %xmm11,%xmm9
7432  DB  68,15,89,202                        ; mulps         %xmm2,%xmm9
7433  DB  68,15,89,201                        ; mulps         %xmm1,%xmm9
7434  DB  69,15,95,202                        ; maxps         %xmm10,%xmm9
7435  DB  72,173                              ; lods          %ds:(%rsi),%rax
7436  DB  65,15,40,200                        ; movaps        %xmm8,%xmm1
7437  DB  65,15,40,209                        ; movaps        %xmm9,%xmm2
7438  DB  255,224                             ; jmpq          *%rax
7439
7440PUBLIC _sk_to_2dot2_sse41
7441_sk_to_2dot2_sse41 LABEL PROC
7442  DB  68,15,82,192                        ; rsqrtps       %xmm0,%xmm8
7443  DB  65,15,82,192                        ; rsqrtps       %xmm8,%xmm0
7444  DB  15,82,192                           ; rsqrtps       %xmm0,%xmm0
7445  DB  15,82,192                           ; rsqrtps       %xmm0,%xmm0
7446  DB  15,82,192                           ; rsqrtps       %xmm0,%xmm0
7447  DB  68,15,82,200                        ; rsqrtps       %xmm0,%xmm9
7448  DB  69,15,83,192                        ; rcpps         %xmm8,%xmm8
7449  DB  68,15,89,192                        ; mulps         %xmm0,%xmm8
7450  DB  65,15,83,193                        ; rcpps         %xmm9,%xmm0
7451  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
7452  DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
7453  DB  65,15,95,192                        ; maxps         %xmm8,%xmm0
7454  DB  68,15,82,201                        ; rsqrtps       %xmm1,%xmm9
7455  DB  65,15,82,201                        ; rsqrtps       %xmm9,%xmm1
7456  DB  15,82,201                           ; rsqrtps       %xmm1,%xmm1
7457  DB  15,82,201                           ; rsqrtps       %xmm1,%xmm1
7458  DB  15,82,201                           ; rsqrtps       %xmm1,%xmm1
7459  DB  68,15,82,209                        ; rsqrtps       %xmm1,%xmm10
7460  DB  69,15,83,201                        ; rcpps         %xmm9,%xmm9
7461  DB  68,15,89,201                        ; mulps         %xmm1,%xmm9
7462  DB  65,15,83,202                        ; rcpps         %xmm10,%xmm1
7463  DB  65,15,89,201                        ; mulps         %xmm9,%xmm1
7464  DB  65,15,95,200                        ; maxps         %xmm8,%xmm1
7465  DB  68,15,82,202                        ; rsqrtps       %xmm2,%xmm9
7466  DB  65,15,82,209                        ; rsqrtps       %xmm9,%xmm2
7467  DB  15,82,210                           ; rsqrtps       %xmm2,%xmm2
7468  DB  15,82,210                           ; rsqrtps       %xmm2,%xmm2
7469  DB  15,82,210                           ; rsqrtps       %xmm2,%xmm2
7470  DB  68,15,82,210                        ; rsqrtps       %xmm2,%xmm10
7471  DB  69,15,83,201                        ; rcpps         %xmm9,%xmm9
7472  DB  68,15,89,202                        ; mulps         %xmm2,%xmm9
7473  DB  65,15,83,210                        ; rcpps         %xmm10,%xmm2
7474  DB  65,15,89,209                        ; mulps         %xmm9,%xmm2
7475  DB  65,15,95,208                        ; maxps         %xmm8,%xmm2
7476  DB  72,173                              ; lods          %ds:(%rsi),%rax
7477  DB  255,224                             ; jmpq          *%rax
7478
7479PUBLIC _sk_scale_1_float_sse41
7480_sk_scale_1_float_sse41 LABEL PROC
7481  DB  72,173                              ; lods          %ds:(%rsi),%rax
7482  DB  243,68,15,16,0                      ; movss         (%rax),%xmm8
7483  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
7484  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
7485  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
7486  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
7487  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
7488  DB  72,173                              ; lods          %ds:(%rsi),%rax
7489  DB  255,224                             ; jmpq          *%rax
7490
7491PUBLIC _sk_scale_u8_sse41
7492_sk_scale_u8_sse41 LABEL PROC
7493  DB  72,173                              ; lods          %ds:(%rsi),%rax
7494  DB  72,139,0                            ; mov           (%rax),%rax
7495  DB  102,68,15,56,49,4,56                ; pmovzxbd      (%rax,%rdi,1),%xmm8
7496  DB  69,15,91,192                        ; cvtdq2ps      %xmm8,%xmm8
7497  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
7498  DB  102,68,15,110,200                   ; movd          %eax,%xmm9
7499  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
7500  DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
7501  DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
7502  DB  65,15,89,201                        ; mulps         %xmm9,%xmm1
7503  DB  65,15,89,209                        ; mulps         %xmm9,%xmm2
7504  DB  65,15,89,217                        ; mulps         %xmm9,%xmm3
7505  DB  72,173                              ; lods          %ds:(%rsi),%rax
7506  DB  255,224                             ; jmpq          *%rax
7507
7508PUBLIC _sk_lerp_1_float_sse41
7509_sk_lerp_1_float_sse41 LABEL PROC
7510  DB  72,173                              ; lods          %ds:(%rsi),%rax
7511  DB  243,68,15,16,0                      ; movss         (%rax),%xmm8
7512  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
7513  DB  15,92,196                           ; subps         %xmm4,%xmm0
7514  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
7515  DB  15,88,196                           ; addps         %xmm4,%xmm0
7516  DB  15,92,205                           ; subps         %xmm5,%xmm1
7517  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
7518  DB  15,88,205                           ; addps         %xmm5,%xmm1
7519  DB  15,92,214                           ; subps         %xmm6,%xmm2
7520  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
7521  DB  15,88,214                           ; addps         %xmm6,%xmm2
7522  DB  15,92,223                           ; subps         %xmm7,%xmm3
7523  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
7524  DB  15,88,223                           ; addps         %xmm7,%xmm3
7525  DB  72,173                              ; lods          %ds:(%rsi),%rax
7526  DB  255,224                             ; jmpq          *%rax
7527
7528PUBLIC _sk_lerp_u8_sse41
7529_sk_lerp_u8_sse41 LABEL PROC
7530  DB  72,173                              ; lods          %ds:(%rsi),%rax
7531  DB  72,139,0                            ; mov           (%rax),%rax
7532  DB  102,68,15,56,49,4,56                ; pmovzxbd      (%rax,%rdi,1),%xmm8
7533  DB  69,15,91,192                        ; cvtdq2ps      %xmm8,%xmm8
7534  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
7535  DB  102,68,15,110,200                   ; movd          %eax,%xmm9
7536  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
7537  DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
7538  DB  15,92,196                           ; subps         %xmm4,%xmm0
7539  DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
7540  DB  15,88,196                           ; addps         %xmm4,%xmm0
7541  DB  15,92,205                           ; subps         %xmm5,%xmm1
7542  DB  65,15,89,201                        ; mulps         %xmm9,%xmm1
7543  DB  15,88,205                           ; addps         %xmm5,%xmm1
7544  DB  15,92,214                           ; subps         %xmm6,%xmm2
7545  DB  65,15,89,209                        ; mulps         %xmm9,%xmm2
7546  DB  15,88,214                           ; addps         %xmm6,%xmm2
7547  DB  15,92,223                           ; subps         %xmm7,%xmm3
7548  DB  65,15,89,217                        ; mulps         %xmm9,%xmm3
7549  DB  15,88,223                           ; addps         %xmm7,%xmm3
7550  DB  72,173                              ; lods          %ds:(%rsi),%rax
7551  DB  255,224                             ; jmpq          *%rax
7552
7553PUBLIC _sk_lerp_565_sse41
7554_sk_lerp_565_sse41 LABEL PROC
7555  DB  72,173                              ; lods          %ds:(%rsi),%rax
7556  DB  72,139,0                            ; mov           (%rax),%rax
7557  DB  102,68,15,56,51,4,120               ; pmovzxwd      (%rax,%rdi,2),%xmm8
7558  DB  184,0,248,0,0                       ; mov           $0xf800,%eax
7559  DB  102,15,110,216                      ; movd          %eax,%xmm3
7560  DB  102,15,112,219,0                    ; pshufd        $0x0,%xmm3,%xmm3
7561  DB  102,65,15,219,216                   ; pand          %xmm8,%xmm3
7562  DB  68,15,91,203                        ; cvtdq2ps      %xmm3,%xmm9
7563  DB  184,8,33,132,55                     ; mov           $0x37842108,%eax
7564  DB  102,68,15,110,208                   ; movd          %eax,%xmm10
7565  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
7566  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
7567  DB  184,224,7,0,0                       ; mov           $0x7e0,%eax
7568  DB  102,15,110,216                      ; movd          %eax,%xmm3
7569  DB  102,15,112,219,0                    ; pshufd        $0x0,%xmm3,%xmm3
7570  DB  102,65,15,219,216                   ; pand          %xmm8,%xmm3
7571  DB  68,15,91,203                        ; cvtdq2ps      %xmm3,%xmm9
7572  DB  184,33,8,2,58                       ; mov           $0x3a020821,%eax
7573  DB  102,68,15,110,216                   ; movd          %eax,%xmm11
7574  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
7575  DB  69,15,89,217                        ; mulps         %xmm9,%xmm11
7576  DB  184,31,0,0,0                        ; mov           $0x1f,%eax
7577  DB  102,15,110,216                      ; movd          %eax,%xmm3
7578  DB  102,15,112,219,0                    ; pshufd        $0x0,%xmm3,%xmm3
7579  DB  102,65,15,219,216                   ; pand          %xmm8,%xmm3
7580  DB  68,15,91,195                        ; cvtdq2ps      %xmm3,%xmm8
7581  DB  184,8,33,4,61                       ; mov           $0x3d042108,%eax
7582  DB  102,15,110,216                      ; movd          %eax,%xmm3
7583  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
7584  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
7585  DB  15,92,196                           ; subps         %xmm4,%xmm0
7586  DB  65,15,89,194                        ; mulps         %xmm10,%xmm0
7587  DB  15,88,196                           ; addps         %xmm4,%xmm0
7588  DB  15,92,205                           ; subps         %xmm5,%xmm1
7589  DB  65,15,89,203                        ; mulps         %xmm11,%xmm1
7590  DB  15,88,205                           ; addps         %xmm5,%xmm1
7591  DB  15,92,214                           ; subps         %xmm6,%xmm2
7592  DB  15,89,211                           ; mulps         %xmm3,%xmm2
7593  DB  15,88,214                           ; addps         %xmm6,%xmm2
7594  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
7595  DB  102,15,110,216                      ; movd          %eax,%xmm3
7596  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
7597  DB  72,173                              ; lods          %ds:(%rsi),%rax
7598  DB  255,224                             ; jmpq          *%rax
7599
7600PUBLIC _sk_load_tables_sse41
7601_sk_load_tables_sse41 LABEL PROC
7602  DB  72,173                              ; lods          %ds:(%rsi),%rax
7603  DB  72,139,8                            ; mov           (%rax),%rcx
7604  DB  76,139,64,8                         ; mov           0x8(%rax),%r8
7605  DB  243,68,15,111,4,185                 ; movdqu        (%rcx,%rdi,4),%xmm8
7606  DB  185,255,0,0,0                       ; mov           $0xff,%ecx
7607  DB  102,15,110,193                      ; movd          %ecx,%xmm0
7608  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
7609  DB  102,65,15,111,200                   ; movdqa        %xmm8,%xmm1
7610  DB  102,15,114,209,8                    ; psrld         $0x8,%xmm1
7611  DB  102,15,219,200                      ; pand          %xmm0,%xmm1
7612  DB  102,65,15,111,208                   ; movdqa        %xmm8,%xmm2
7613  DB  102,15,114,210,16                   ; psrld         $0x10,%xmm2
7614  DB  102,15,219,208                      ; pand          %xmm0,%xmm2
7615  DB  102,65,15,219,192                   ; pand          %xmm8,%xmm0
7616  DB  102,72,15,58,22,193,1               ; pextrq        $0x1,%xmm0,%rcx
7617  DB  65,137,201                          ; mov           %ecx,%r9d
7618  DB  72,193,233,32                       ; shr           $0x20,%rcx
7619  DB  102,73,15,126,194                   ; movq          %xmm0,%r10
7620  DB  69,137,211                          ; mov           %r10d,%r11d
7621  DB  73,193,234,32                       ; shr           $0x20,%r10
7622  DB  243,67,15,16,4,152                  ; movss         (%r8,%r11,4),%xmm0
7623  DB  102,67,15,58,33,4,144,16            ; insertps      $0x10,(%r8,%r10,4),%xmm0
7624  DB  102,67,15,58,33,4,136,32            ; insertps      $0x20,(%r8,%r9,4),%xmm0
7625  DB  102,65,15,58,33,4,136,48            ; insertps      $0x30,(%r8,%rcx,4),%xmm0
7626  DB  76,139,64,16                        ; mov           0x10(%rax),%r8
7627  DB  102,73,15,58,22,202,1               ; pextrq        $0x1,%xmm1,%r10
7628  DB  77,137,209                          ; mov           %r10,%r9
7629  DB  73,193,233,32                       ; shr           $0x20,%r9
7630  DB  102,72,15,126,201                   ; movq          %xmm1,%rcx
7631  DB  65,137,203                          ; mov           %ecx,%r11d
7632  DB  65,129,227,255,255,255,0            ; and           $0xffffff,%r11d
7633  DB  72,193,233,30                       ; shr           $0x1e,%rcx
7634  DB  65,129,226,255,255,255,0            ; and           $0xffffff,%r10d
7635  DB  243,67,15,16,12,152                 ; movss         (%r8,%r11,4),%xmm1
7636  DB  102,65,15,58,33,12,8,16             ; insertps      $0x10,(%r8,%rcx,1),%xmm1
7637  DB  243,67,15,16,28,144                 ; movss         (%r8,%r10,4),%xmm3
7638  DB  102,15,58,33,203,32                 ; insertps      $0x20,%xmm3,%xmm1
7639  DB  243,67,15,16,28,136                 ; movss         (%r8,%r9,4),%xmm3
7640  DB  102,15,58,33,203,48                 ; insertps      $0x30,%xmm3,%xmm1
7641  DB  76,139,72,24                        ; mov           0x18(%rax),%r9
7642  DB  102,72,15,58,22,209,1               ; pextrq        $0x1,%xmm2,%rcx
7643  DB  68,15,183,193                       ; movzwl        %cx,%r8d
7644  DB  72,193,233,32                       ; shr           $0x20,%rcx
7645  DB  102,72,15,126,208                   ; movq          %xmm2,%rax
7646  DB  68,15,183,208                       ; movzwl        %ax,%r10d
7647  DB  72,193,232,30                       ; shr           $0x1e,%rax
7648  DB  243,67,15,16,20,145                 ; movss         (%r9,%r10,4),%xmm2
7649  DB  102,65,15,58,33,20,1,16             ; insertps      $0x10,(%r9,%rax,1),%xmm2
7650  DB  243,67,15,16,28,129                 ; movss         (%r9,%r8,4),%xmm3
7651  DB  102,15,58,33,211,32                 ; insertps      $0x20,%xmm3,%xmm2
7652  DB  243,65,15,16,28,137                 ; movss         (%r9,%rcx,4),%xmm3
7653  DB  102,15,58,33,211,48                 ; insertps      $0x30,%xmm3,%xmm2
7654  DB  102,65,15,114,208,24                ; psrld         $0x18,%xmm8
7655  DB  69,15,91,192                        ; cvtdq2ps      %xmm8,%xmm8
7656  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
7657  DB  102,15,110,216                      ; movd          %eax,%xmm3
7658  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
7659  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
7660  DB  72,173                              ; lods          %ds:(%rsi),%rax
7661  DB  255,224                             ; jmpq          *%rax
7662
7663PUBLIC _sk_load_a8_sse41
7664_sk_load_a8_sse41 LABEL PROC
7665  DB  72,173                              ; lods          %ds:(%rsi),%rax
7666  DB  72,139,0                            ; mov           (%rax),%rax
7667  DB  102,15,56,49,4,56                   ; pmovzxbd      (%rax,%rdi,1),%xmm0
7668  DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
7669  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
7670  DB  102,15,110,216                      ; movd          %eax,%xmm3
7671  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
7672  DB  15,89,216                           ; mulps         %xmm0,%xmm3
7673  DB  72,173                              ; lods          %ds:(%rsi),%rax
7674  DB  15,87,192                           ; xorps         %xmm0,%xmm0
7675  DB  15,87,201                           ; xorps         %xmm1,%xmm1
7676  DB  15,87,210                           ; xorps         %xmm2,%xmm2
7677  DB  255,224                             ; jmpq          *%rax
7678
7679PUBLIC _sk_gather_a8_sse41
7680_sk_gather_a8_sse41 LABEL PROC
7681  DB  72,173                              ; lods          %ds:(%rsi),%rax
7682  DB  76,139,8                            ; mov           (%rax),%r9
7683  DB  243,15,91,201                       ; cvttps2dq     %xmm1,%xmm1
7684  DB  102,15,110,80,16                    ; movd          0x10(%rax),%xmm2
7685  DB  102,15,112,210,0                    ; pshufd        $0x0,%xmm2,%xmm2
7686  DB  102,15,56,64,209                    ; pmulld        %xmm1,%xmm2
7687  DB  243,15,91,192                       ; cvttps2dq     %xmm0,%xmm0
7688  DB  102,15,254,194                      ; paddd         %xmm2,%xmm0
7689  DB  102,72,15,58,22,192,1               ; pextrq        $0x1,%xmm0,%rax
7690  DB  65,137,192                          ; mov           %eax,%r8d
7691  DB  72,193,232,32                       ; shr           $0x20,%rax
7692  DB  102,72,15,126,193                   ; movq          %xmm0,%rcx
7693  DB  65,137,202                          ; mov           %ecx,%r10d
7694  DB  72,193,233,32                       ; shr           $0x20,%rcx
7695  DB  102,67,15,58,32,4,17,0              ; pinsrb        $0x0,(%r9,%r10,1),%xmm0
7696  DB  102,65,15,58,32,4,9,1               ; pinsrb        $0x1,(%r9,%rcx,1),%xmm0
7697  DB  67,15,182,12,1                      ; movzbl        (%r9,%r8,1),%ecx
7698  DB  102,15,58,32,193,2                  ; pinsrb        $0x2,%ecx,%xmm0
7699  DB  65,15,182,4,1                       ; movzbl        (%r9,%rax,1),%eax
7700  DB  102,15,58,32,192,3                  ; pinsrb        $0x3,%eax,%xmm0
7701  DB  102,15,56,49,192                    ; pmovzxbd      %xmm0,%xmm0
7702  DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
7703  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
7704  DB  102,15,110,216                      ; movd          %eax,%xmm3
7705  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
7706  DB  15,89,216                           ; mulps         %xmm0,%xmm3
7707  DB  72,173                              ; lods          %ds:(%rsi),%rax
7708  DB  15,87,192                           ; xorps         %xmm0,%xmm0
7709  DB  102,15,239,201                      ; pxor          %xmm1,%xmm1
7710  DB  102,15,239,210                      ; pxor          %xmm2,%xmm2
7711  DB  255,224                             ; jmpq          *%rax
7712
7713PUBLIC _sk_store_a8_sse41
7714_sk_store_a8_sse41 LABEL PROC
7715  DB  72,173                              ; lods          %ds:(%rsi),%rax
7716  DB  72,139,0                            ; mov           (%rax),%rax
7717  DB  185,0,0,127,67                      ; mov           $0x437f0000,%ecx
7718  DB  102,68,15,110,193                   ; movd          %ecx,%xmm8
7719  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
7720  DB  68,15,89,195                        ; mulps         %xmm3,%xmm8
7721  DB  102,69,15,91,192                    ; cvtps2dq      %xmm8,%xmm8
7722  DB  102,69,15,56,43,192                 ; packusdw      %xmm8,%xmm8
7723  DB  102,69,15,103,192                   ; packuswb      %xmm8,%xmm8
7724  DB  102,68,15,126,4,56                  ; movd          %xmm8,(%rax,%rdi,1)
7725  DB  72,173                              ; lods          %ds:(%rsi),%rax
7726  DB  255,224                             ; jmpq          *%rax
7727
7728PUBLIC _sk_load_g8_sse41
7729_sk_load_g8_sse41 LABEL PROC
7730  DB  72,173                              ; lods          %ds:(%rsi),%rax
7731  DB  72,139,0                            ; mov           (%rax),%rax
7732  DB  102,15,56,49,4,56                   ; pmovzxbd      (%rax,%rdi,1),%xmm0
7733  DB  15,91,200                           ; cvtdq2ps      %xmm0,%xmm1
7734  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
7735  DB  102,15,110,192                      ; movd          %eax,%xmm0
7736  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
7737  DB  15,89,193                           ; mulps         %xmm1,%xmm0
7738  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
7739  DB  102,15,110,216                      ; movd          %eax,%xmm3
7740  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
7741  DB  72,173                              ; lods          %ds:(%rsi),%rax
7742  DB  15,40,200                           ; movaps        %xmm0,%xmm1
7743  DB  15,40,208                           ; movaps        %xmm0,%xmm2
7744  DB  255,224                             ; jmpq          *%rax
7745
7746PUBLIC _sk_gather_g8_sse41
7747_sk_gather_g8_sse41 LABEL PROC
7748  DB  72,173                              ; lods          %ds:(%rsi),%rax
7749  DB  76,139,8                            ; mov           (%rax),%r9
7750  DB  243,15,91,201                       ; cvttps2dq     %xmm1,%xmm1
7751  DB  102,15,110,80,16                    ; movd          0x10(%rax),%xmm2
7752  DB  102,15,112,210,0                    ; pshufd        $0x0,%xmm2,%xmm2
7753  DB  102,15,56,64,209                    ; pmulld        %xmm1,%xmm2
7754  DB  243,15,91,192                       ; cvttps2dq     %xmm0,%xmm0
7755  DB  102,15,254,194                      ; paddd         %xmm2,%xmm0
7756  DB  102,72,15,58,22,192,1               ; pextrq        $0x1,%xmm0,%rax
7757  DB  65,137,192                          ; mov           %eax,%r8d
7758  DB  72,193,232,32                       ; shr           $0x20,%rax
7759  DB  102,72,15,126,193                   ; movq          %xmm0,%rcx
7760  DB  65,137,202                          ; mov           %ecx,%r10d
7761  DB  72,193,233,32                       ; shr           $0x20,%rcx
7762  DB  102,67,15,58,32,4,17,0              ; pinsrb        $0x0,(%r9,%r10,1),%xmm0
7763  DB  102,65,15,58,32,4,9,1               ; pinsrb        $0x1,(%r9,%rcx,1),%xmm0
7764  DB  67,15,182,12,1                      ; movzbl        (%r9,%r8,1),%ecx
7765  DB  102,15,58,32,193,2                  ; pinsrb        $0x2,%ecx,%xmm0
7766  DB  65,15,182,4,1                       ; movzbl        (%r9,%rax,1),%eax
7767  DB  102,15,58,32,192,3                  ; pinsrb        $0x3,%eax,%xmm0
7768  DB  102,15,56,49,192                    ; pmovzxbd      %xmm0,%xmm0
7769  DB  15,91,200                           ; cvtdq2ps      %xmm0,%xmm1
7770  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
7771  DB  102,15,110,192                      ; movd          %eax,%xmm0
7772  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
7773  DB  15,89,193                           ; mulps         %xmm1,%xmm0
7774  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
7775  DB  102,15,110,216                      ; movd          %eax,%xmm3
7776  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
7777  DB  72,173                              ; lods          %ds:(%rsi),%rax
7778  DB  15,40,200                           ; movaps        %xmm0,%xmm1
7779  DB  15,40,208                           ; movaps        %xmm0,%xmm2
7780  DB  255,224                             ; jmpq          *%rax
7781
7782PUBLIC _sk_gather_i8_sse41
7783_sk_gather_i8_sse41 LABEL PROC
7784  DB  72,173                              ; lods          %ds:(%rsi),%rax
7785  DB  73,137,192                          ; mov           %rax,%r8
7786  DB  77,133,192                          ; test          %r8,%r8
7787  DB  116,5                               ; je            1616 <_sk_gather_i8_sse41+0xf>
7788  DB  76,137,192                          ; mov           %r8,%rax
7789  DB  235,2                               ; jmp           1618 <_sk_gather_i8_sse41+0x11>
7790  DB  72,173                              ; lods          %ds:(%rsi),%rax
7791  DB  76,139,16                           ; mov           (%rax),%r10
7792  DB  243,15,91,201                       ; cvttps2dq     %xmm1,%xmm1
7793  DB  102,15,110,80,16                    ; movd          0x10(%rax),%xmm2
7794  DB  102,15,112,210,0                    ; pshufd        $0x0,%xmm2,%xmm2
7795  DB  102,15,56,64,209                    ; pmulld        %xmm1,%xmm2
7796  DB  243,15,91,192                       ; cvttps2dq     %xmm0,%xmm0
7797  DB  102,15,254,194                      ; paddd         %xmm2,%xmm0
7798  DB  102,72,15,58,22,192,1               ; pextrq        $0x1,%xmm0,%rax
7799  DB  65,137,193                          ; mov           %eax,%r9d
7800  DB  72,193,232,32                       ; shr           $0x20,%rax
7801  DB  102,72,15,126,193                   ; movq          %xmm0,%rcx
7802  DB  65,137,203                          ; mov           %ecx,%r11d
7803  DB  72,193,233,32                       ; shr           $0x20,%rcx
7804  DB  102,67,15,58,32,4,26,0              ; pinsrb        $0x0,(%r10,%r11,1),%xmm0
7805  DB  102,65,15,58,32,4,10,1              ; pinsrb        $0x1,(%r10,%rcx,1),%xmm0
7806  DB  102,67,15,58,32,4,10,2              ; pinsrb        $0x2,(%r10,%r9,1),%xmm0
7807  DB  102,65,15,58,32,4,2,3               ; pinsrb        $0x3,(%r10,%rax,1),%xmm0
7808  DB  102,15,56,49,192                    ; pmovzxbd      %xmm0,%xmm0
7809  DB  102,73,15,58,22,193,1               ; pextrq        $0x1,%xmm0,%r9
7810  DB  102,72,15,126,193                   ; movq          %xmm0,%rcx
7811  DB  73,139,64,8                         ; mov           0x8(%r8),%rax
7812  DB  65,137,200                          ; mov           %ecx,%r8d
7813  DB  72,193,233,30                       ; shr           $0x1e,%rcx
7814  DB  69,137,202                          ; mov           %r9d,%r10d
7815  DB  73,193,233,30                       ; shr           $0x1e,%r9
7816  DB  102,66,15,110,28,128                ; movd          (%rax,%r8,4),%xmm3
7817  DB  102,15,58,34,28,8,1                 ; pinsrd        $0x1,(%rax,%rcx,1),%xmm3
7818  DB  102,66,15,58,34,28,144,2            ; pinsrd        $0x2,(%rax,%r10,4),%xmm3
7819  DB  102,66,15,58,34,28,8,3              ; pinsrd        $0x3,(%rax,%r9,1),%xmm3
7820  DB  184,255,0,0,0                       ; mov           $0xff,%eax
7821  DB  102,15,110,192                      ; movd          %eax,%xmm0
7822  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
7823  DB  102,15,111,203                      ; movdqa        %xmm3,%xmm1
7824  DB  102,15,114,209,8                    ; psrld         $0x8,%xmm1
7825  DB  102,15,219,200                      ; pand          %xmm0,%xmm1
7826  DB  102,15,111,211                      ; movdqa        %xmm3,%xmm2
7827  DB  102,15,114,210,16                   ; psrld         $0x10,%xmm2
7828  DB  102,15,219,208                      ; pand          %xmm0,%xmm2
7829  DB  102,15,219,195                      ; pand          %xmm3,%xmm0
7830  DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
7831  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
7832  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
7833  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
7834  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
7835  DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
7836  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
7837  DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
7838  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
7839  DB  102,15,114,211,24                   ; psrld         $0x18,%xmm3
7840  DB  15,91,219                           ; cvtdq2ps      %xmm3,%xmm3
7841  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
7842  DB  72,173                              ; lods          %ds:(%rsi),%rax
7843  DB  255,224                             ; jmpq          *%rax
7844
7845PUBLIC _sk_load_565_sse41
7846_sk_load_565_sse41 LABEL PROC
7847  DB  72,173                              ; lods          %ds:(%rsi),%rax
7848  DB  72,139,0                            ; mov           (%rax),%rax
7849  DB  102,15,56,51,20,120                 ; pmovzxwd      (%rax,%rdi,2),%xmm2
7850  DB  184,0,248,0,0                       ; mov           $0xf800,%eax
7851  DB  102,15,110,192                      ; movd          %eax,%xmm0
7852  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
7853  DB  102,15,219,194                      ; pand          %xmm2,%xmm0
7854  DB  15,91,200                           ; cvtdq2ps      %xmm0,%xmm1
7855  DB  184,8,33,132,55                     ; mov           $0x37842108,%eax
7856  DB  102,15,110,192                      ; movd          %eax,%xmm0
7857  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
7858  DB  15,89,193                           ; mulps         %xmm1,%xmm0
7859  DB  184,224,7,0,0                       ; mov           $0x7e0,%eax
7860  DB  102,15,110,200                      ; movd          %eax,%xmm1
7861  DB  102,15,112,201,0                    ; pshufd        $0x0,%xmm1,%xmm1
7862  DB  102,15,219,202                      ; pand          %xmm2,%xmm1
7863  DB  15,91,217                           ; cvtdq2ps      %xmm1,%xmm3
7864  DB  184,33,8,2,58                       ; mov           $0x3a020821,%eax
7865  DB  102,15,110,200                      ; movd          %eax,%xmm1
7866  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
7867  DB  15,89,203                           ; mulps         %xmm3,%xmm1
7868  DB  184,31,0,0,0                        ; mov           $0x1f,%eax
7869  DB  102,15,110,216                      ; movd          %eax,%xmm3
7870  DB  102,15,112,219,0                    ; pshufd        $0x0,%xmm3,%xmm3
7871  DB  102,15,219,218                      ; pand          %xmm2,%xmm3
7872  DB  15,91,219                           ; cvtdq2ps      %xmm3,%xmm3
7873  DB  184,8,33,4,61                       ; mov           $0x3d042108,%eax
7874  DB  102,15,110,208                      ; movd          %eax,%xmm2
7875  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
7876  DB  15,89,211                           ; mulps         %xmm3,%xmm2
7877  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
7878  DB  102,15,110,216                      ; movd          %eax,%xmm3
7879  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
7880  DB  72,173                              ; lods          %ds:(%rsi),%rax
7881  DB  255,224                             ; jmpq          *%rax
7882
7883PUBLIC _sk_gather_565_sse41
7884_sk_gather_565_sse41 LABEL PROC
7885  DB  72,173                              ; lods          %ds:(%rsi),%rax
7886  DB  76,139,8                            ; mov           (%rax),%r9
7887  DB  243,15,91,201                       ; cvttps2dq     %xmm1,%xmm1
7888  DB  102,15,110,80,16                    ; movd          0x10(%rax),%xmm2
7889  DB  102,15,112,210,0                    ; pshufd        $0x0,%xmm2,%xmm2
7890  DB  102,15,56,64,209                    ; pmulld        %xmm1,%xmm2
7891  DB  243,15,91,192                       ; cvttps2dq     %xmm0,%xmm0
7892  DB  102,15,254,194                      ; paddd         %xmm2,%xmm0
7893  DB  102,72,15,58,22,192,1               ; pextrq        $0x1,%xmm0,%rax
7894  DB  65,137,192                          ; mov           %eax,%r8d
7895  DB  72,193,232,32                       ; shr           $0x20,%rax
7896  DB  102,72,15,126,193                   ; movq          %xmm0,%rcx
7897  DB  65,137,202                          ; mov           %ecx,%r10d
7898  DB  72,193,233,32                       ; shr           $0x20,%rcx
7899  DB  102,67,15,196,4,81,0                ; pinsrw        $0x0,(%r9,%r10,2),%xmm0
7900  DB  102,65,15,196,4,73,1                ; pinsrw        $0x1,(%r9,%rcx,2),%xmm0
7901  DB  67,15,183,12,65                     ; movzwl        (%r9,%r8,2),%ecx
7902  DB  102,15,196,193,2                    ; pinsrw        $0x2,%ecx,%xmm0
7903  DB  65,15,183,4,65                      ; movzwl        (%r9,%rax,2),%eax
7904  DB  102,15,196,192,3                    ; pinsrw        $0x3,%eax,%xmm0
7905  DB  102,15,56,51,208                    ; pmovzxwd      %xmm0,%xmm2
7906  DB  184,0,248,0,0                       ; mov           $0xf800,%eax
7907  DB  102,15,110,192                      ; movd          %eax,%xmm0
7908  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
7909  DB  102,15,219,194                      ; pand          %xmm2,%xmm0
7910  DB  15,91,200                           ; cvtdq2ps      %xmm0,%xmm1
7911  DB  184,8,33,132,55                     ; mov           $0x37842108,%eax
7912  DB  102,15,110,192                      ; movd          %eax,%xmm0
7913  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
7914  DB  15,89,193                           ; mulps         %xmm1,%xmm0
7915  DB  184,224,7,0,0                       ; mov           $0x7e0,%eax
7916  DB  102,15,110,200                      ; movd          %eax,%xmm1
7917  DB  102,15,112,201,0                    ; pshufd        $0x0,%xmm1,%xmm1
7918  DB  102,15,219,202                      ; pand          %xmm2,%xmm1
7919  DB  15,91,217                           ; cvtdq2ps      %xmm1,%xmm3
7920  DB  184,33,8,2,58                       ; mov           $0x3a020821,%eax
7921  DB  102,15,110,200                      ; movd          %eax,%xmm1
7922  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
7923  DB  15,89,203                           ; mulps         %xmm3,%xmm1
7924  DB  184,31,0,0,0                        ; mov           $0x1f,%eax
7925  DB  102,15,110,216                      ; movd          %eax,%xmm3
7926  DB  102,15,112,219,0                    ; pshufd        $0x0,%xmm3,%xmm3
7927  DB  102,15,219,218                      ; pand          %xmm2,%xmm3
7928  DB  15,91,219                           ; cvtdq2ps      %xmm3,%xmm3
7929  DB  184,8,33,4,61                       ; mov           $0x3d042108,%eax
7930  DB  102,15,110,208                      ; movd          %eax,%xmm2
7931  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
7932  DB  15,89,211                           ; mulps         %xmm3,%xmm2
7933  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
7934  DB  102,15,110,216                      ; movd          %eax,%xmm3
7935  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
7936  DB  72,173                              ; lods          %ds:(%rsi),%rax
7937  DB  255,224                             ; jmpq          *%rax
7938
7939PUBLIC _sk_store_565_sse41
7940_sk_store_565_sse41 LABEL PROC
7941  DB  72,173                              ; lods          %ds:(%rsi),%rax
7942  DB  72,139,0                            ; mov           (%rax),%rax
7943  DB  185,0,0,248,65                      ; mov           $0x41f80000,%ecx
7944  DB  102,68,15,110,193                   ; movd          %ecx,%xmm8
7945  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
7946  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
7947  DB  68,15,89,200                        ; mulps         %xmm0,%xmm9
7948  DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
7949  DB  102,65,15,114,241,11                ; pslld         $0xb,%xmm9
7950  DB  185,0,0,124,66                      ; mov           $0x427c0000,%ecx
7951  DB  102,68,15,110,209                   ; movd          %ecx,%xmm10
7952  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
7953  DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
7954  DB  102,69,15,91,210                    ; cvtps2dq      %xmm10,%xmm10
7955  DB  102,65,15,114,242,5                 ; pslld         $0x5,%xmm10
7956  DB  102,69,15,235,209                   ; por           %xmm9,%xmm10
7957  DB  68,15,89,194                        ; mulps         %xmm2,%xmm8
7958  DB  102,69,15,91,192                    ; cvtps2dq      %xmm8,%xmm8
7959  DB  102,69,15,86,194                    ; orpd          %xmm10,%xmm8
7960  DB  102,69,15,56,43,192                 ; packusdw      %xmm8,%xmm8
7961  DB  102,68,15,214,4,120                 ; movq          %xmm8,(%rax,%rdi,2)
7962  DB  72,173                              ; lods          %ds:(%rsi),%rax
7963  DB  255,224                             ; jmpq          *%rax
7964
7965PUBLIC _sk_load_4444_sse41
7966_sk_load_4444_sse41 LABEL PROC
7967  DB  72,173                              ; lods          %ds:(%rsi),%rax
7968  DB  72,139,0                            ; mov           (%rax),%rax
7969  DB  102,68,15,56,51,12,120              ; pmovzxwd      (%rax,%rdi,2),%xmm9
7970  DB  184,0,240,0,0                       ; mov           $0xf000,%eax
7971  DB  102,15,110,192                      ; movd          %eax,%xmm0
7972  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
7973  DB  102,65,15,219,193                   ; pand          %xmm9,%xmm0
7974  DB  15,91,200                           ; cvtdq2ps      %xmm0,%xmm1
7975  DB  184,137,136,136,55                  ; mov           $0x37888889,%eax
7976  DB  102,15,110,192                      ; movd          %eax,%xmm0
7977  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
7978  DB  15,89,193                           ; mulps         %xmm1,%xmm0
7979  DB  184,0,15,0,0                        ; mov           $0xf00,%eax
7980  DB  102,15,110,200                      ; movd          %eax,%xmm1
7981  DB  102,15,112,201,0                    ; pshufd        $0x0,%xmm1,%xmm1
7982  DB  102,65,15,219,201                   ; pand          %xmm9,%xmm1
7983  DB  15,91,209                           ; cvtdq2ps      %xmm1,%xmm2
7984  DB  184,137,136,136,57                  ; mov           $0x39888889,%eax
7985  DB  102,15,110,200                      ; movd          %eax,%xmm1
7986  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
7987  DB  15,89,202                           ; mulps         %xmm2,%xmm1
7988  DB  184,240,0,0,0                       ; mov           $0xf0,%eax
7989  DB  102,15,110,208                      ; movd          %eax,%xmm2
7990  DB  102,15,112,210,0                    ; pshufd        $0x0,%xmm2,%xmm2
7991  DB  102,65,15,219,209                   ; pand          %xmm9,%xmm2
7992  DB  68,15,91,194                        ; cvtdq2ps      %xmm2,%xmm8
7993  DB  184,137,136,136,59                  ; mov           $0x3b888889,%eax
7994  DB  102,15,110,208                      ; movd          %eax,%xmm2
7995  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
7996  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
7997  DB  184,15,0,0,0                        ; mov           $0xf,%eax
7998  DB  102,15,110,216                      ; movd          %eax,%xmm3
7999  DB  102,15,112,219,0                    ; pshufd        $0x0,%xmm3,%xmm3
8000  DB  102,65,15,219,217                   ; pand          %xmm9,%xmm3
8001  DB  68,15,91,195                        ; cvtdq2ps      %xmm3,%xmm8
8002  DB  184,137,136,136,61                  ; mov           $0x3d888889,%eax
8003  DB  102,15,110,216                      ; movd          %eax,%xmm3
8004  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
8005  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
8006  DB  72,173                              ; lods          %ds:(%rsi),%rax
8007  DB  255,224                             ; jmpq          *%rax
8008
8009PUBLIC _sk_gather_4444_sse41
8010_sk_gather_4444_sse41 LABEL PROC
8011  DB  72,173                              ; lods          %ds:(%rsi),%rax
8012  DB  76,139,8                            ; mov           (%rax),%r9
8013  DB  243,15,91,201                       ; cvttps2dq     %xmm1,%xmm1
8014  DB  102,15,110,80,16                    ; movd          0x10(%rax),%xmm2
8015  DB  102,15,112,210,0                    ; pshufd        $0x0,%xmm2,%xmm2
8016  DB  102,15,56,64,209                    ; pmulld        %xmm1,%xmm2
8017  DB  243,15,91,192                       ; cvttps2dq     %xmm0,%xmm0
8018  DB  102,15,254,194                      ; paddd         %xmm2,%xmm0
8019  DB  102,72,15,58,22,192,1               ; pextrq        $0x1,%xmm0,%rax
8020  DB  65,137,192                          ; mov           %eax,%r8d
8021  DB  72,193,232,32                       ; shr           $0x20,%rax
8022  DB  102,72,15,126,193                   ; movq          %xmm0,%rcx
8023  DB  65,137,202                          ; mov           %ecx,%r10d
8024  DB  72,193,233,32                       ; shr           $0x20,%rcx
8025  DB  102,67,15,196,4,81,0                ; pinsrw        $0x0,(%r9,%r10,2),%xmm0
8026  DB  102,65,15,196,4,73,1                ; pinsrw        $0x1,(%r9,%rcx,2),%xmm0
8027  DB  67,15,183,12,65                     ; movzwl        (%r9,%r8,2),%ecx
8028  DB  102,15,196,193,2                    ; pinsrw        $0x2,%ecx,%xmm0
8029  DB  65,15,183,4,65                      ; movzwl        (%r9,%rax,2),%eax
8030  DB  102,15,196,192,3                    ; pinsrw        $0x3,%eax,%xmm0
8031  DB  102,68,15,56,51,200                 ; pmovzxwd      %xmm0,%xmm9
8032  DB  184,0,240,0,0                       ; mov           $0xf000,%eax
8033  DB  102,15,110,192                      ; movd          %eax,%xmm0
8034  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
8035  DB  102,65,15,219,193                   ; pand          %xmm9,%xmm0
8036  DB  15,91,200                           ; cvtdq2ps      %xmm0,%xmm1
8037  DB  184,137,136,136,55                  ; mov           $0x37888889,%eax
8038  DB  102,15,110,192                      ; movd          %eax,%xmm0
8039  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
8040  DB  15,89,193                           ; mulps         %xmm1,%xmm0
8041  DB  184,0,15,0,0                        ; mov           $0xf00,%eax
8042  DB  102,15,110,200                      ; movd          %eax,%xmm1
8043  DB  102,15,112,201,0                    ; pshufd        $0x0,%xmm1,%xmm1
8044  DB  102,65,15,219,201                   ; pand          %xmm9,%xmm1
8045  DB  15,91,209                           ; cvtdq2ps      %xmm1,%xmm2
8046  DB  184,137,136,136,57                  ; mov           $0x39888889,%eax
8047  DB  102,15,110,200                      ; movd          %eax,%xmm1
8048  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
8049  DB  15,89,202                           ; mulps         %xmm2,%xmm1
8050  DB  184,240,0,0,0                       ; mov           $0xf0,%eax
8051  DB  102,15,110,208                      ; movd          %eax,%xmm2
8052  DB  102,15,112,210,0                    ; pshufd        $0x0,%xmm2,%xmm2
8053  DB  102,65,15,219,209                   ; pand          %xmm9,%xmm2
8054  DB  68,15,91,194                        ; cvtdq2ps      %xmm2,%xmm8
8055  DB  184,137,136,136,59                  ; mov           $0x3b888889,%eax
8056  DB  102,15,110,208                      ; movd          %eax,%xmm2
8057  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
8058  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
8059  DB  184,15,0,0,0                        ; mov           $0xf,%eax
8060  DB  102,15,110,216                      ; movd          %eax,%xmm3
8061  DB  102,15,112,219,0                    ; pshufd        $0x0,%xmm3,%xmm3
8062  DB  102,65,15,219,217                   ; pand          %xmm9,%xmm3
8063  DB  68,15,91,195                        ; cvtdq2ps      %xmm3,%xmm8
8064  DB  184,137,136,136,61                  ; mov           $0x3d888889,%eax
8065  DB  102,15,110,216                      ; movd          %eax,%xmm3
8066  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
8067  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
8068  DB  72,173                              ; lods          %ds:(%rsi),%rax
8069  DB  255,224                             ; jmpq          *%rax
8070
8071PUBLIC _sk_store_4444_sse41
8072_sk_store_4444_sse41 LABEL PROC
8073  DB  72,173                              ; lods          %ds:(%rsi),%rax
8074  DB  72,139,0                            ; mov           (%rax),%rax
8075  DB  185,0,0,112,65                      ; mov           $0x41700000,%ecx
8076  DB  102,68,15,110,193                   ; movd          %ecx,%xmm8
8077  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
8078  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
8079  DB  68,15,89,200                        ; mulps         %xmm0,%xmm9
8080  DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
8081  DB  102,65,15,114,241,12                ; pslld         $0xc,%xmm9
8082  DB  69,15,40,208                        ; movaps        %xmm8,%xmm10
8083  DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
8084  DB  102,69,15,91,210                    ; cvtps2dq      %xmm10,%xmm10
8085  DB  102,65,15,114,242,8                 ; pslld         $0x8,%xmm10
8086  DB  102,69,15,235,209                   ; por           %xmm9,%xmm10
8087  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
8088  DB  68,15,89,202                        ; mulps         %xmm2,%xmm9
8089  DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
8090  DB  102,65,15,114,241,4                 ; pslld         $0x4,%xmm9
8091  DB  68,15,89,195                        ; mulps         %xmm3,%xmm8
8092  DB  102,69,15,91,192                    ; cvtps2dq      %xmm8,%xmm8
8093  DB  102,69,15,86,193                    ; orpd          %xmm9,%xmm8
8094  DB  102,69,15,86,194                    ; orpd          %xmm10,%xmm8
8095  DB  102,69,15,56,43,192                 ; packusdw      %xmm8,%xmm8
8096  DB  102,68,15,214,4,120                 ; movq          %xmm8,(%rax,%rdi,2)
8097  DB  72,173                              ; lods          %ds:(%rsi),%rax
8098  DB  255,224                             ; jmpq          *%rax
8099
8100PUBLIC _sk_load_8888_sse41
8101_sk_load_8888_sse41 LABEL PROC
8102  DB  72,173                              ; lods          %ds:(%rsi),%rax
8103  DB  72,139,0                            ; mov           (%rax),%rax
8104  DB  243,15,111,28,184                   ; movdqu        (%rax,%rdi,4),%xmm3
8105  DB  184,255,0,0,0                       ; mov           $0xff,%eax
8106  DB  102,15,110,192                      ; movd          %eax,%xmm0
8107  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
8108  DB  102,15,111,203                      ; movdqa        %xmm3,%xmm1
8109  DB  102,15,114,209,8                    ; psrld         $0x8,%xmm1
8110  DB  102,15,219,200                      ; pand          %xmm0,%xmm1
8111  DB  102,15,111,211                      ; movdqa        %xmm3,%xmm2
8112  DB  102,15,114,210,16                   ; psrld         $0x10,%xmm2
8113  DB  102,15,219,208                      ; pand          %xmm0,%xmm2
8114  DB  102,15,219,195                      ; pand          %xmm3,%xmm0
8115  DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
8116  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
8117  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
8118  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
8119  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
8120  DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
8121  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
8122  DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
8123  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
8124  DB  102,15,114,211,24                   ; psrld         $0x18,%xmm3
8125  DB  15,91,219                           ; cvtdq2ps      %xmm3,%xmm3
8126  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
8127  DB  72,173                              ; lods          %ds:(%rsi),%rax
8128  DB  255,224                             ; jmpq          *%rax
8129
8130PUBLIC _sk_gather_8888_sse41
8131_sk_gather_8888_sse41 LABEL PROC
8132  DB  72,173                              ; lods          %ds:(%rsi),%rax
8133  DB  76,139,8                            ; mov           (%rax),%r9
8134  DB  243,15,91,201                       ; cvttps2dq     %xmm1,%xmm1
8135  DB  102,15,110,80,16                    ; movd          0x10(%rax),%xmm2
8136  DB  102,15,112,210,0                    ; pshufd        $0x0,%xmm2,%xmm2
8137  DB  102,15,56,64,209                    ; pmulld        %xmm1,%xmm2
8138  DB  243,15,91,192                       ; cvttps2dq     %xmm0,%xmm0
8139  DB  102,15,254,194                      ; paddd         %xmm2,%xmm0
8140  DB  102,72,15,126,192                   ; movq          %xmm0,%rax
8141  DB  65,137,192                          ; mov           %eax,%r8d
8142  DB  72,193,232,32                       ; shr           $0x20,%rax
8143  DB  102,72,15,58,22,193,1               ; pextrq        $0x1,%xmm0,%rcx
8144  DB  65,137,202                          ; mov           %ecx,%r10d
8145  DB  72,193,233,32                       ; shr           $0x20,%rcx
8146  DB  102,67,15,110,28,129                ; movd          (%r9,%r8,4),%xmm3
8147  DB  102,65,15,58,34,28,129,1            ; pinsrd        $0x1,(%r9,%rax,4),%xmm3
8148  DB  102,67,15,58,34,28,145,2            ; pinsrd        $0x2,(%r9,%r10,4),%xmm3
8149  DB  102,65,15,58,34,28,137,3            ; pinsrd        $0x3,(%r9,%rcx,4),%xmm3
8150  DB  184,255,0,0,0                       ; mov           $0xff,%eax
8151  DB  102,15,110,192                      ; movd          %eax,%xmm0
8152  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
8153  DB  102,15,111,203                      ; movdqa        %xmm3,%xmm1
8154  DB  102,15,114,209,8                    ; psrld         $0x8,%xmm1
8155  DB  102,15,219,200                      ; pand          %xmm0,%xmm1
8156  DB  102,15,111,211                      ; movdqa        %xmm3,%xmm2
8157  DB  102,15,114,210,16                   ; psrld         $0x10,%xmm2
8158  DB  102,15,219,208                      ; pand          %xmm0,%xmm2
8159  DB  102,15,219,195                      ; pand          %xmm3,%xmm0
8160  DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
8161  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
8162  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
8163  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
8164  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
8165  DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
8166  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
8167  DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
8168  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
8169  DB  102,15,114,211,24                   ; psrld         $0x18,%xmm3
8170  DB  15,91,219                           ; cvtdq2ps      %xmm3,%xmm3
8171  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
8172  DB  72,173                              ; lods          %ds:(%rsi),%rax
8173  DB  255,224                             ; jmpq          *%rax
8174
8175PUBLIC _sk_store_8888_sse41
8176_sk_store_8888_sse41 LABEL PROC
8177  DB  72,173                              ; lods          %ds:(%rsi),%rax
8178  DB  72,139,0                            ; mov           (%rax),%rax
8179  DB  185,0,0,127,67                      ; mov           $0x437f0000,%ecx
8180  DB  102,68,15,110,193                   ; movd          %ecx,%xmm8
8181  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
8182  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
8183  DB  68,15,89,200                        ; mulps         %xmm0,%xmm9
8184  DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
8185  DB  69,15,40,208                        ; movaps        %xmm8,%xmm10
8186  DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
8187  DB  102,69,15,91,210                    ; cvtps2dq      %xmm10,%xmm10
8188  DB  102,65,15,114,242,8                 ; pslld         $0x8,%xmm10
8189  DB  102,69,15,235,209                   ; por           %xmm9,%xmm10
8190  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
8191  DB  68,15,89,202                        ; mulps         %xmm2,%xmm9
8192  DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
8193  DB  102,65,15,114,241,16                ; pslld         $0x10,%xmm9
8194  DB  68,15,89,195                        ; mulps         %xmm3,%xmm8
8195  DB  102,69,15,91,192                    ; cvtps2dq      %xmm8,%xmm8
8196  DB  102,65,15,114,240,24                ; pslld         $0x18,%xmm8
8197  DB  102,69,15,235,193                   ; por           %xmm9,%xmm8
8198  DB  102,69,15,235,194                   ; por           %xmm10,%xmm8
8199  DB  243,68,15,127,4,184                 ; movdqu        %xmm8,(%rax,%rdi,4)
8200  DB  72,173                              ; lods          %ds:(%rsi),%rax
8201  DB  255,224                             ; jmpq          *%rax
8202
8203PUBLIC _sk_load_f16_sse41
8204_sk_load_f16_sse41 LABEL PROC
8205  DB  72,173                              ; lods          %ds:(%rsi),%rax
8206  DB  72,139,0                            ; mov           (%rax),%rax
8207  DB  243,15,111,4,248                    ; movdqu        (%rax,%rdi,8),%xmm0
8208  DB  243,15,111,76,248,16                ; movdqu        0x10(%rax,%rdi,8),%xmm1
8209  DB  102,68,15,111,192                   ; movdqa        %xmm0,%xmm8
8210  DB  102,68,15,97,193                    ; punpcklwd     %xmm1,%xmm8
8211  DB  102,15,105,193                      ; punpckhwd     %xmm1,%xmm0
8212  DB  102,65,15,111,200                   ; movdqa        %xmm8,%xmm1
8213  DB  102,15,97,200                       ; punpcklwd     %xmm0,%xmm1
8214  DB  102,68,15,105,192                   ; punpckhwd     %xmm0,%xmm8
8215  DB  184,0,4,0,4                         ; mov           $0x4000400,%eax
8216  DB  102,15,110,192                      ; movd          %eax,%xmm0
8217  DB  102,15,112,216,0                    ; pshufd        $0x0,%xmm0,%xmm3
8218  DB  102,15,111,195                      ; movdqa        %xmm3,%xmm0
8219  DB  102,15,101,193                      ; pcmpgtw       %xmm1,%xmm0
8220  DB  102,15,223,193                      ; pandn         %xmm1,%xmm0
8221  DB  102,15,56,51,192                    ; pmovzxwd      %xmm0,%xmm0
8222  DB  102,15,114,240,13                   ; pslld         $0xd,%xmm0
8223  DB  184,0,0,128,119                     ; mov           $0x77800000,%eax
8224  DB  102,15,110,208                      ; movd          %eax,%xmm2
8225  DB  102,68,15,112,202,0                 ; pshufd        $0x0,%xmm2,%xmm9
8226  DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
8227  DB  102,15,112,201,78                   ; pshufd        $0x4e,%xmm1,%xmm1
8228  DB  102,15,111,211                      ; movdqa        %xmm3,%xmm2
8229  DB  102,15,101,209                      ; pcmpgtw       %xmm1,%xmm2
8230  DB  102,15,223,209                      ; pandn         %xmm1,%xmm2
8231  DB  102,15,56,51,202                    ; pmovzxwd      %xmm2,%xmm1
8232  DB  102,15,114,241,13                   ; pslld         $0xd,%xmm1
8233  DB  65,15,89,201                        ; mulps         %xmm9,%xmm1
8234  DB  102,15,111,211                      ; movdqa        %xmm3,%xmm2
8235  DB  102,65,15,101,208                   ; pcmpgtw       %xmm8,%xmm2
8236  DB  102,65,15,223,208                   ; pandn         %xmm8,%xmm2
8237  DB  102,15,56,51,210                    ; pmovzxwd      %xmm2,%xmm2
8238  DB  102,15,114,242,13                   ; pslld         $0xd,%xmm2
8239  DB  65,15,89,209                        ; mulps         %xmm9,%xmm2
8240  DB  102,69,15,112,192,78                ; pshufd        $0x4e,%xmm8,%xmm8
8241  DB  102,65,15,101,216                   ; pcmpgtw       %xmm8,%xmm3
8242  DB  102,65,15,223,216                   ; pandn         %xmm8,%xmm3
8243  DB  102,15,56,51,219                    ; pmovzxwd      %xmm3,%xmm3
8244  DB  102,15,114,243,13                   ; pslld         $0xd,%xmm3
8245  DB  65,15,89,217                        ; mulps         %xmm9,%xmm3
8246  DB  72,173                              ; lods          %ds:(%rsi),%rax
8247  DB  255,224                             ; jmpq          *%rax
8248
8249PUBLIC _sk_store_f16_sse41
8250_sk_store_f16_sse41 LABEL PROC
8251  DB  72,173                              ; lods          %ds:(%rsi),%rax
8252  DB  72,139,0                            ; mov           (%rax),%rax
8253  DB  185,0,0,128,7                       ; mov           $0x7800000,%ecx
8254  DB  102,68,15,110,193                   ; movd          %ecx,%xmm8
8255  DB  102,69,15,112,200,0                 ; pshufd        $0x0,%xmm8,%xmm9
8256  DB  102,69,15,111,193                   ; movdqa        %xmm9,%xmm8
8257  DB  68,15,89,192                        ; mulps         %xmm0,%xmm8
8258  DB  102,65,15,114,208,13                ; psrld         $0xd,%xmm8
8259  DB  102,69,15,56,43,192                 ; packusdw      %xmm8,%xmm8
8260  DB  102,69,15,111,209                   ; movdqa        %xmm9,%xmm10
8261  DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
8262  DB  102,65,15,114,210,13                ; psrld         $0xd,%xmm10
8263  DB  102,69,15,56,43,210                 ; packusdw      %xmm10,%xmm10
8264  DB  102,69,15,111,217                   ; movdqa        %xmm9,%xmm11
8265  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
8266  DB  102,65,15,114,211,13                ; psrld         $0xd,%xmm11
8267  DB  102,69,15,56,43,219                 ; packusdw      %xmm11,%xmm11
8268  DB  68,15,89,203                        ; mulps         %xmm3,%xmm9
8269  DB  102,65,15,114,209,13                ; psrld         $0xd,%xmm9
8270  DB  102,69,15,56,43,201                 ; packusdw      %xmm9,%xmm9
8271  DB  102,69,15,97,194                    ; punpcklwd     %xmm10,%xmm8
8272  DB  102,69,15,97,217                    ; punpcklwd     %xmm9,%xmm11
8273  DB  102,69,15,111,200                   ; movdqa        %xmm8,%xmm9
8274  DB  102,69,15,98,203                    ; punpckldq     %xmm11,%xmm9
8275  DB  243,68,15,127,12,248                ; movdqu        %xmm9,(%rax,%rdi,8)
8276  DB  102,69,15,106,195                   ; punpckhdq     %xmm11,%xmm8
8277  DB  243,68,15,127,68,248,16             ; movdqu        %xmm8,0x10(%rax,%rdi,8)
8278  DB  72,173                              ; lods          %ds:(%rsi),%rax
8279  DB  255,224                             ; jmpq          *%rax
8280
8281PUBLIC _sk_load_u16_be_sse41
8282_sk_load_u16_be_sse41 LABEL PROC
8283  DB  72,173                              ; lods          %ds:(%rsi),%rax
8284  DB  72,139,0                            ; mov           (%rax),%rax
8285  DB  243,15,111,4,248                    ; movdqu        (%rax,%rdi,8),%xmm0
8286  DB  243,15,111,76,248,16                ; movdqu        0x10(%rax,%rdi,8),%xmm1
8287  DB  102,15,111,208                      ; movdqa        %xmm0,%xmm2
8288  DB  102,15,97,209                       ; punpcklwd     %xmm1,%xmm2
8289  DB  102,15,105,193                      ; punpckhwd     %xmm1,%xmm0
8290  DB  102,15,111,202                      ; movdqa        %xmm2,%xmm1
8291  DB  102,15,97,200                       ; punpcklwd     %xmm0,%xmm1
8292  DB  102,15,105,208                      ; punpckhwd     %xmm0,%xmm2
8293  DB  184,128,0,128,55                    ; mov           $0x37800080,%eax
8294  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
8295  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
8296  DB  102,15,111,193                      ; movdqa        %xmm1,%xmm0
8297  DB  102,15,113,240,8                    ; psllw         $0x8,%xmm0
8298  DB  102,15,112,217,78                   ; pshufd        $0x4e,%xmm1,%xmm3
8299  DB  102,15,113,209,8                    ; psrlw         $0x8,%xmm1
8300  DB  102,15,235,200                      ; por           %xmm0,%xmm1
8301  DB  102,15,56,51,193                    ; pmovzxwd      %xmm1,%xmm0
8302  DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
8303  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
8304  DB  102,15,111,203                      ; movdqa        %xmm3,%xmm1
8305  DB  102,15,113,241,8                    ; psllw         $0x8,%xmm1
8306  DB  102,15,113,211,8                    ; psrlw         $0x8,%xmm3
8307  DB  102,15,235,217                      ; por           %xmm1,%xmm3
8308  DB  102,15,56,51,203                    ; pmovzxwd      %xmm3,%xmm1
8309  DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
8310  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
8311  DB  102,68,15,111,202                   ; movdqa        %xmm2,%xmm9
8312  DB  102,65,15,113,241,8                 ; psllw         $0x8,%xmm9
8313  DB  102,15,112,218,78                   ; pshufd        $0x4e,%xmm2,%xmm3
8314  DB  102,15,113,210,8                    ; psrlw         $0x8,%xmm2
8315  DB  102,65,15,235,209                   ; por           %xmm9,%xmm2
8316  DB  102,15,56,51,210                    ; pmovzxwd      %xmm2,%xmm2
8317  DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
8318  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
8319  DB  102,68,15,111,203                   ; movdqa        %xmm3,%xmm9
8320  DB  102,65,15,113,241,8                 ; psllw         $0x8,%xmm9
8321  DB  102,15,113,211,8                    ; psrlw         $0x8,%xmm3
8322  DB  102,65,15,235,217                   ; por           %xmm9,%xmm3
8323  DB  102,15,56,51,219                    ; pmovzxwd      %xmm3,%xmm3
8324  DB  15,91,219                           ; cvtdq2ps      %xmm3,%xmm3
8325  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
8326  DB  72,173                              ; lods          %ds:(%rsi),%rax
8327  DB  255,224                             ; jmpq          *%rax
8328
8329PUBLIC _sk_store_u16_be_sse41
8330_sk_store_u16_be_sse41 LABEL PROC
8331  DB  72,173                              ; lods          %ds:(%rsi),%rax
8332  DB  72,139,0                            ; mov           (%rax),%rax
8333  DB  185,0,255,127,71                    ; mov           $0x477fff00,%ecx
8334  DB  102,68,15,110,201                   ; movd          %ecx,%xmm9
8335  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
8336  DB  69,15,40,193                        ; movaps        %xmm9,%xmm8
8337  DB  68,15,89,192                        ; mulps         %xmm0,%xmm8
8338  DB  102,69,15,91,192                    ; cvtps2dq      %xmm8,%xmm8
8339  DB  102,69,15,56,43,192                 ; packusdw      %xmm8,%xmm8
8340  DB  102,69,15,111,208                   ; movdqa        %xmm8,%xmm10
8341  DB  102,65,15,113,242,8                 ; psllw         $0x8,%xmm10
8342  DB  102,65,15,113,208,8                 ; psrlw         $0x8,%xmm8
8343  DB  102,69,15,235,194                   ; por           %xmm10,%xmm8
8344  DB  69,15,40,209                        ; movaps        %xmm9,%xmm10
8345  DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
8346  DB  102,69,15,91,210                    ; cvtps2dq      %xmm10,%xmm10
8347  DB  102,69,15,56,43,210                 ; packusdw      %xmm10,%xmm10
8348  DB  102,69,15,111,218                   ; movdqa        %xmm10,%xmm11
8349  DB  102,65,15,113,243,8                 ; psllw         $0x8,%xmm11
8350  DB  102,65,15,113,210,8                 ; psrlw         $0x8,%xmm10
8351  DB  102,69,15,235,211                   ; por           %xmm11,%xmm10
8352  DB  69,15,40,217                        ; movaps        %xmm9,%xmm11
8353  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
8354  DB  102,69,15,91,219                    ; cvtps2dq      %xmm11,%xmm11
8355  DB  102,69,15,56,43,219                 ; packusdw      %xmm11,%xmm11
8356  DB  102,69,15,111,227                   ; movdqa        %xmm11,%xmm12
8357  DB  102,65,15,113,244,8                 ; psllw         $0x8,%xmm12
8358  DB  102,65,15,113,211,8                 ; psrlw         $0x8,%xmm11
8359  DB  102,69,15,235,220                   ; por           %xmm12,%xmm11
8360  DB  68,15,89,203                        ; mulps         %xmm3,%xmm9
8361  DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
8362  DB  102,69,15,56,43,201                 ; packusdw      %xmm9,%xmm9
8363  DB  102,69,15,111,225                   ; movdqa        %xmm9,%xmm12
8364  DB  102,65,15,113,244,8                 ; psllw         $0x8,%xmm12
8365  DB  102,65,15,113,209,8                 ; psrlw         $0x8,%xmm9
8366  DB  102,69,15,235,204                   ; por           %xmm12,%xmm9
8367  DB  102,69,15,97,194                    ; punpcklwd     %xmm10,%xmm8
8368  DB  102,69,15,97,217                    ; punpcklwd     %xmm9,%xmm11
8369  DB  102,69,15,111,200                   ; movdqa        %xmm8,%xmm9
8370  DB  102,69,15,98,203                    ; punpckldq     %xmm11,%xmm9
8371  DB  243,68,15,127,12,248                ; movdqu        %xmm9,(%rax,%rdi,8)
8372  DB  102,69,15,106,195                   ; punpckhdq     %xmm11,%xmm8
8373  DB  243,68,15,127,68,248,16             ; movdqu        %xmm8,0x10(%rax,%rdi,8)
8374  DB  72,173                              ; lods          %ds:(%rsi),%rax
8375  DB  255,224                             ; jmpq          *%rax
8376
8377PUBLIC _sk_load_f32_sse41
8378_sk_load_f32_sse41 LABEL PROC
8379  DB  72,173                              ; lods          %ds:(%rsi),%rax
8380  DB  72,139,0                            ; mov           (%rax),%rax
8381  DB  72,137,249                          ; mov           %rdi,%rcx
8382  DB  72,193,225,4                        ; shl           $0x4,%rcx
8383  DB  68,15,16,4,8                        ; movups        (%rax,%rcx,1),%xmm8
8384  DB  15,16,68,8,16                       ; movups        0x10(%rax,%rcx,1),%xmm0
8385  DB  15,16,92,8,32                       ; movups        0x20(%rax,%rcx,1),%xmm3
8386  DB  68,15,16,76,8,48                    ; movups        0x30(%rax,%rcx,1),%xmm9
8387  DB  65,15,40,208                        ; movaps        %xmm8,%xmm2
8388  DB  15,20,208                           ; unpcklps      %xmm0,%xmm2
8389  DB  15,40,203                           ; movaps        %xmm3,%xmm1
8390  DB  65,15,20,201                        ; unpcklps      %xmm9,%xmm1
8391  DB  68,15,21,192                        ; unpckhps      %xmm0,%xmm8
8392  DB  65,15,21,217                        ; unpckhps      %xmm9,%xmm3
8393  DB  15,40,194                           ; movaps        %xmm2,%xmm0
8394  DB  102,15,20,193                       ; unpcklpd      %xmm1,%xmm0
8395  DB  15,18,202                           ; movhlps       %xmm2,%xmm1
8396  DB  65,15,40,208                        ; movaps        %xmm8,%xmm2
8397  DB  102,15,20,211                       ; unpcklpd      %xmm3,%xmm2
8398  DB  65,15,18,216                        ; movhlps       %xmm8,%xmm3
8399  DB  72,173                              ; lods          %ds:(%rsi),%rax
8400  DB  255,224                             ; jmpq          *%rax
8401
8402PUBLIC _sk_store_f32_sse41
8403_sk_store_f32_sse41 LABEL PROC
8404  DB  72,173                              ; lods          %ds:(%rsi),%rax
8405  DB  72,139,0                            ; mov           (%rax),%rax
8406  DB  72,137,249                          ; mov           %rdi,%rcx
8407  DB  72,193,225,4                        ; shl           $0x4,%rcx
8408  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
8409  DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
8410  DB  68,15,20,201                        ; unpcklps      %xmm1,%xmm9
8411  DB  68,15,40,210                        ; movaps        %xmm2,%xmm10
8412  DB  68,15,40,218                        ; movaps        %xmm2,%xmm11
8413  DB  68,15,20,219                        ; unpcklps      %xmm3,%xmm11
8414  DB  68,15,21,193                        ; unpckhps      %xmm1,%xmm8
8415  DB  68,15,21,211                        ; unpckhps      %xmm3,%xmm10
8416  DB  69,15,40,225                        ; movaps        %xmm9,%xmm12
8417  DB  102,69,15,20,227                    ; unpcklpd      %xmm11,%xmm12
8418  DB  69,15,18,217                        ; movhlps       %xmm9,%xmm11
8419  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
8420  DB  102,69,15,20,202                    ; unpcklpd      %xmm10,%xmm9
8421  DB  69,15,18,208                        ; movhlps       %xmm8,%xmm10
8422  DB  102,68,15,17,36,8                   ; movupd        %xmm12,(%rax,%rcx,1)
8423  DB  68,15,17,92,8,16                    ; movups        %xmm11,0x10(%rax,%rcx,1)
8424  DB  102,68,15,17,76,8,32                ; movupd        %xmm9,0x20(%rax,%rcx,1)
8425  DB  68,15,17,84,8,48                    ; movups        %xmm10,0x30(%rax,%rcx,1)
8426  DB  72,173                              ; lods          %ds:(%rsi),%rax
8427  DB  255,224                             ; jmpq          *%rax
8428
8429PUBLIC _sk_clamp_x_sse41
8430_sk_clamp_x_sse41 LABEL PROC
8431  DB  72,173                              ; lods          %ds:(%rsi),%rax
8432  DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
8433  DB  68,15,95,192                        ; maxps         %xmm0,%xmm8
8434  DB  243,68,15,16,8                      ; movss         (%rax),%xmm9
8435  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
8436  DB  102,15,118,192                      ; pcmpeqd       %xmm0,%xmm0
8437  DB  102,65,15,254,193                   ; paddd         %xmm9,%xmm0
8438  DB  68,15,93,192                        ; minps         %xmm0,%xmm8
8439  DB  72,173                              ; lods          %ds:(%rsi),%rax
8440  DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
8441  DB  255,224                             ; jmpq          *%rax
8442
8443PUBLIC _sk_clamp_y_sse41
8444_sk_clamp_y_sse41 LABEL PROC
8445  DB  72,173                              ; lods          %ds:(%rsi),%rax
8446  DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
8447  DB  68,15,95,193                        ; maxps         %xmm1,%xmm8
8448  DB  243,68,15,16,8                      ; movss         (%rax),%xmm9
8449  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
8450  DB  102,15,118,201                      ; pcmpeqd       %xmm1,%xmm1
8451  DB  102,65,15,254,201                   ; paddd         %xmm9,%xmm1
8452  DB  68,15,93,193                        ; minps         %xmm1,%xmm8
8453  DB  72,173                              ; lods          %ds:(%rsi),%rax
8454  DB  65,15,40,200                        ; movaps        %xmm8,%xmm1
8455  DB  255,224                             ; jmpq          *%rax
8456
8457PUBLIC _sk_repeat_x_sse41
8458_sk_repeat_x_sse41 LABEL PROC
8459  DB  72,173                              ; lods          %ds:(%rsi),%rax
8460  DB  243,68,15,16,0                      ; movss         (%rax),%xmm8
8461  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
8462  DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
8463  DB  69,15,94,200                        ; divps         %xmm8,%xmm9
8464  DB  102,69,15,58,8,201,1                ; roundps       $0x1,%xmm9,%xmm9
8465  DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
8466  DB  65,15,92,193                        ; subps         %xmm9,%xmm0
8467  DB  102,69,15,118,201                   ; pcmpeqd       %xmm9,%xmm9
8468  DB  102,69,15,254,200                   ; paddd         %xmm8,%xmm9
8469  DB  65,15,93,193                        ; minps         %xmm9,%xmm0
8470  DB  72,173                              ; lods          %ds:(%rsi),%rax
8471  DB  255,224                             ; jmpq          *%rax
8472
8473PUBLIC _sk_repeat_y_sse41
8474_sk_repeat_y_sse41 LABEL PROC
8475  DB  72,173                              ; lods          %ds:(%rsi),%rax
8476  DB  243,68,15,16,0                      ; movss         (%rax),%xmm8
8477  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
8478  DB  68,15,40,201                        ; movaps        %xmm1,%xmm9
8479  DB  69,15,94,200                        ; divps         %xmm8,%xmm9
8480  DB  102,69,15,58,8,201,1                ; roundps       $0x1,%xmm9,%xmm9
8481  DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
8482  DB  65,15,92,201                        ; subps         %xmm9,%xmm1
8483  DB  102,69,15,118,201                   ; pcmpeqd       %xmm9,%xmm9
8484  DB  102,69,15,254,200                   ; paddd         %xmm8,%xmm9
8485  DB  65,15,93,201                        ; minps         %xmm9,%xmm1
8486  DB  72,173                              ; lods          %ds:(%rsi),%rax
8487  DB  255,224                             ; jmpq          *%rax
8488
8489PUBLIC _sk_mirror_x_sse41
8490_sk_mirror_x_sse41 LABEL PROC
8491  DB  72,173                              ; lods          %ds:(%rsi),%rax
8492  DB  243,68,15,16,0                      ; movss         (%rax),%xmm8
8493  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
8494  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
8495  DB  65,15,92,193                        ; subps         %xmm9,%xmm0
8496  DB  243,69,15,88,192                    ; addss         %xmm8,%xmm8
8497  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
8498  DB  68,15,40,208                        ; movaps        %xmm0,%xmm10
8499  DB  69,15,94,208                        ; divps         %xmm8,%xmm10
8500  DB  102,69,15,58,8,210,1                ; roundps       $0x1,%xmm10,%xmm10
8501  DB  69,15,89,208                        ; mulps         %xmm8,%xmm10
8502  DB  65,15,92,194                        ; subps         %xmm10,%xmm0
8503  DB  65,15,92,193                        ; subps         %xmm9,%xmm0
8504  DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
8505  DB  68,15,92,192                        ; subps         %xmm0,%xmm8
8506  DB  65,15,84,192                        ; andps         %xmm8,%xmm0
8507  DB  102,69,15,118,192                   ; pcmpeqd       %xmm8,%xmm8
8508  DB  102,69,15,254,193                   ; paddd         %xmm9,%xmm8
8509  DB  65,15,93,192                        ; minps         %xmm8,%xmm0
8510  DB  72,173                              ; lods          %ds:(%rsi),%rax
8511  DB  255,224                             ; jmpq          *%rax
8512
8513PUBLIC _sk_mirror_y_sse41
8514_sk_mirror_y_sse41 LABEL PROC
8515  DB  72,173                              ; lods          %ds:(%rsi),%rax
8516  DB  243,68,15,16,0                      ; movss         (%rax),%xmm8
8517  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
8518  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
8519  DB  65,15,92,201                        ; subps         %xmm9,%xmm1
8520  DB  243,69,15,88,192                    ; addss         %xmm8,%xmm8
8521  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
8522  DB  68,15,40,209                        ; movaps        %xmm1,%xmm10
8523  DB  69,15,94,208                        ; divps         %xmm8,%xmm10
8524  DB  102,69,15,58,8,210,1                ; roundps       $0x1,%xmm10,%xmm10
8525  DB  69,15,89,208                        ; mulps         %xmm8,%xmm10
8526  DB  65,15,92,202                        ; subps         %xmm10,%xmm1
8527  DB  65,15,92,201                        ; subps         %xmm9,%xmm1
8528  DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
8529  DB  68,15,92,193                        ; subps         %xmm1,%xmm8
8530  DB  65,15,84,200                        ; andps         %xmm8,%xmm1
8531  DB  102,69,15,118,192                   ; pcmpeqd       %xmm8,%xmm8
8532  DB  102,69,15,254,193                   ; paddd         %xmm9,%xmm8
8533  DB  65,15,93,200                        ; minps         %xmm8,%xmm1
8534  DB  72,173                              ; lods          %ds:(%rsi),%rax
8535  DB  255,224                             ; jmpq          *%rax
8536
8537PUBLIC _sk_luminance_to_alpha_sse41
8538_sk_luminance_to_alpha_sse41 LABEL PROC
8539  DB  184,208,179,89,62                   ; mov           $0x3e59b3d0,%eax
8540  DB  102,15,110,216                      ; movd          %eax,%xmm3
8541  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
8542  DB  15,89,216                           ; mulps         %xmm0,%xmm3
8543  DB  184,89,23,55,63                     ; mov           $0x3f371759,%eax
8544  DB  102,15,110,192                      ; movd          %eax,%xmm0
8545  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
8546  DB  15,89,193                           ; mulps         %xmm1,%xmm0
8547  DB  15,88,195                           ; addps         %xmm3,%xmm0
8548  DB  184,152,221,147,61                  ; mov           $0x3d93dd98,%eax
8549  DB  102,15,110,216                      ; movd          %eax,%xmm3
8550  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
8551  DB  15,89,218                           ; mulps         %xmm2,%xmm3
8552  DB  15,88,216                           ; addps         %xmm0,%xmm3
8553  DB  72,173                              ; lods          %ds:(%rsi),%rax
8554  DB  15,87,192                           ; xorps         %xmm0,%xmm0
8555  DB  15,87,201                           ; xorps         %xmm1,%xmm1
8556  DB  15,87,210                           ; xorps         %xmm2,%xmm2
8557  DB  255,224                             ; jmpq          *%rax
8558
8559PUBLIC _sk_matrix_2x3_sse41
8560_sk_matrix_2x3_sse41 LABEL PROC
8561  DB  68,15,40,201                        ; movaps        %xmm1,%xmm9
8562  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
8563  DB  72,173                              ; lods          %ds:(%rsi),%rax
8564  DB  243,15,16,0                         ; movss         (%rax),%xmm0
8565  DB  243,15,16,72,4                      ; movss         0x4(%rax),%xmm1
8566  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
8567  DB  243,68,15,16,80,8                   ; movss         0x8(%rax),%xmm10
8568  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
8569  DB  243,68,15,16,88,16                  ; movss         0x10(%rax),%xmm11
8570  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
8571  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
8572  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
8573  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
8574  DB  65,15,88,194                        ; addps         %xmm10,%xmm0
8575  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
8576  DB  243,68,15,16,80,12                  ; movss         0xc(%rax),%xmm10
8577  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
8578  DB  243,68,15,16,88,20                  ; movss         0x14(%rax),%xmm11
8579  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
8580  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
8581  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
8582  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
8583  DB  65,15,88,202                        ; addps         %xmm10,%xmm1
8584  DB  72,173                              ; lods          %ds:(%rsi),%rax
8585  DB  255,224                             ; jmpq          *%rax
8586
8587PUBLIC _sk_matrix_3x4_sse41
8588_sk_matrix_3x4_sse41 LABEL PROC
8589  DB  68,15,40,201                        ; movaps        %xmm1,%xmm9
8590  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
8591  DB  72,173                              ; lods          %ds:(%rsi),%rax
8592  DB  243,15,16,0                         ; movss         (%rax),%xmm0
8593  DB  243,15,16,72,4                      ; movss         0x4(%rax),%xmm1
8594  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
8595  DB  243,68,15,16,80,12                  ; movss         0xc(%rax),%xmm10
8596  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
8597  DB  243,68,15,16,88,24                  ; movss         0x18(%rax),%xmm11
8598  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
8599  DB  243,68,15,16,96,36                  ; movss         0x24(%rax),%xmm12
8600  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
8601  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
8602  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
8603  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
8604  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
8605  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
8606  DB  65,15,88,194                        ; addps         %xmm10,%xmm0
8607  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
8608  DB  243,68,15,16,80,16                  ; movss         0x10(%rax),%xmm10
8609  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
8610  DB  243,68,15,16,88,28                  ; movss         0x1c(%rax),%xmm11
8611  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
8612  DB  243,68,15,16,96,40                  ; movss         0x28(%rax),%xmm12
8613  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
8614  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
8615  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
8616  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
8617  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
8618  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
8619  DB  65,15,88,202                        ; addps         %xmm10,%xmm1
8620  DB  243,68,15,16,80,8                   ; movss         0x8(%rax),%xmm10
8621  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
8622  DB  243,68,15,16,88,20                  ; movss         0x14(%rax),%xmm11
8623  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
8624  DB  243,68,15,16,96,32                  ; movss         0x20(%rax),%xmm12
8625  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
8626  DB  243,68,15,16,104,44                 ; movss         0x2c(%rax),%xmm13
8627  DB  69,15,198,237,0                     ; shufps        $0x0,%xmm13,%xmm13
8628  DB  68,15,89,226                        ; mulps         %xmm2,%xmm12
8629  DB  69,15,88,229                        ; addps         %xmm13,%xmm12
8630  DB  69,15,89,217                        ; mulps         %xmm9,%xmm11
8631  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
8632  DB  69,15,89,208                        ; mulps         %xmm8,%xmm10
8633  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
8634  DB  72,173                              ; lods          %ds:(%rsi),%rax
8635  DB  65,15,40,210                        ; movaps        %xmm10,%xmm2
8636  DB  255,224                             ; jmpq          *%rax
8637
8638PUBLIC _sk_matrix_4x5_sse41
8639_sk_matrix_4x5_sse41 LABEL PROC
8640  DB  68,15,40,201                        ; movaps        %xmm1,%xmm9
8641  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
8642  DB  72,173                              ; lods          %ds:(%rsi),%rax
8643  DB  243,15,16,0                         ; movss         (%rax),%xmm0
8644  DB  243,15,16,72,4                      ; movss         0x4(%rax),%xmm1
8645  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
8646  DB  243,68,15,16,80,16                  ; movss         0x10(%rax),%xmm10
8647  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
8648  DB  243,68,15,16,88,32                  ; movss         0x20(%rax),%xmm11
8649  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
8650  DB  243,68,15,16,96,48                  ; movss         0x30(%rax),%xmm12
8651  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
8652  DB  243,68,15,16,104,64                 ; movss         0x40(%rax),%xmm13
8653  DB  69,15,198,237,0                     ; shufps        $0x0,%xmm13,%xmm13
8654  DB  68,15,89,227                        ; mulps         %xmm3,%xmm12
8655  DB  69,15,88,229                        ; addps         %xmm13,%xmm12
8656  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
8657  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
8658  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
8659  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
8660  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
8661  DB  65,15,88,194                        ; addps         %xmm10,%xmm0
8662  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
8663  DB  243,68,15,16,80,20                  ; movss         0x14(%rax),%xmm10
8664  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
8665  DB  243,68,15,16,88,36                  ; movss         0x24(%rax),%xmm11
8666  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
8667  DB  243,68,15,16,96,52                  ; movss         0x34(%rax),%xmm12
8668  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
8669  DB  243,68,15,16,104,68                 ; movss         0x44(%rax),%xmm13
8670  DB  69,15,198,237,0                     ; shufps        $0x0,%xmm13,%xmm13
8671  DB  68,15,89,227                        ; mulps         %xmm3,%xmm12
8672  DB  69,15,88,229                        ; addps         %xmm13,%xmm12
8673  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
8674  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
8675  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
8676  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
8677  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
8678  DB  65,15,88,202                        ; addps         %xmm10,%xmm1
8679  DB  243,68,15,16,80,8                   ; movss         0x8(%rax),%xmm10
8680  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
8681  DB  243,68,15,16,88,24                  ; movss         0x18(%rax),%xmm11
8682  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
8683  DB  243,68,15,16,96,40                  ; movss         0x28(%rax),%xmm12
8684  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
8685  DB  243,68,15,16,104,56                 ; movss         0x38(%rax),%xmm13
8686  DB  69,15,198,237,0                     ; shufps        $0x0,%xmm13,%xmm13
8687  DB  243,68,15,16,112,72                 ; movss         0x48(%rax),%xmm14
8688  DB  69,15,198,246,0                     ; shufps        $0x0,%xmm14,%xmm14
8689  DB  68,15,89,235                        ; mulps         %xmm3,%xmm13
8690  DB  69,15,88,238                        ; addps         %xmm14,%xmm13
8691  DB  68,15,89,226                        ; mulps         %xmm2,%xmm12
8692  DB  69,15,88,229                        ; addps         %xmm13,%xmm12
8693  DB  69,15,89,217                        ; mulps         %xmm9,%xmm11
8694  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
8695  DB  69,15,89,208                        ; mulps         %xmm8,%xmm10
8696  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
8697  DB  243,68,15,16,88,12                  ; movss         0xc(%rax),%xmm11
8698  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
8699  DB  243,68,15,16,96,28                  ; movss         0x1c(%rax),%xmm12
8700  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
8701  DB  243,68,15,16,104,44                 ; movss         0x2c(%rax),%xmm13
8702  DB  69,15,198,237,0                     ; shufps        $0x0,%xmm13,%xmm13
8703  DB  243,68,15,16,112,60                 ; movss         0x3c(%rax),%xmm14
8704  DB  69,15,198,246,0                     ; shufps        $0x0,%xmm14,%xmm14
8705  DB  243,68,15,16,120,76                 ; movss         0x4c(%rax),%xmm15
8706  DB  69,15,198,255,0                     ; shufps        $0x0,%xmm15,%xmm15
8707  DB  68,15,89,243                        ; mulps         %xmm3,%xmm14
8708  DB  69,15,88,247                        ; addps         %xmm15,%xmm14
8709  DB  68,15,89,234                        ; mulps         %xmm2,%xmm13
8710  DB  69,15,88,238                        ; addps         %xmm14,%xmm13
8711  DB  69,15,89,225                        ; mulps         %xmm9,%xmm12
8712  DB  69,15,88,229                        ; addps         %xmm13,%xmm12
8713  DB  69,15,89,216                        ; mulps         %xmm8,%xmm11
8714  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
8715  DB  72,173                              ; lods          %ds:(%rsi),%rax
8716  DB  65,15,40,210                        ; movaps        %xmm10,%xmm2
8717  DB  65,15,40,219                        ; movaps        %xmm11,%xmm3
8718  DB  255,224                             ; jmpq          *%rax
8719
8720PUBLIC _sk_matrix_perspective_sse41
8721_sk_matrix_perspective_sse41 LABEL PROC
8722  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
8723  DB  72,173                              ; lods          %ds:(%rsi),%rax
8724  DB  243,15,16,0                         ; movss         (%rax),%xmm0
8725  DB  243,68,15,16,72,4                   ; movss         0x4(%rax),%xmm9
8726  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
8727  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
8728  DB  243,68,15,16,80,8                   ; movss         0x8(%rax),%xmm10
8729  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
8730  DB  68,15,89,201                        ; mulps         %xmm1,%xmm9
8731  DB  69,15,88,202                        ; addps         %xmm10,%xmm9
8732  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
8733  DB  65,15,88,193                        ; addps         %xmm9,%xmm0
8734  DB  243,68,15,16,72,12                  ; movss         0xc(%rax),%xmm9
8735  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
8736  DB  243,68,15,16,80,16                  ; movss         0x10(%rax),%xmm10
8737  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
8738  DB  243,68,15,16,88,20                  ; movss         0x14(%rax),%xmm11
8739  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
8740  DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
8741  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
8742  DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
8743  DB  69,15,88,202                        ; addps         %xmm10,%xmm9
8744  DB  243,68,15,16,80,24                  ; movss         0x18(%rax),%xmm10
8745  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
8746  DB  243,68,15,16,88,28                  ; movss         0x1c(%rax),%xmm11
8747  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
8748  DB  243,68,15,16,96,32                  ; movss         0x20(%rax),%xmm12
8749  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
8750  DB  68,15,89,217                        ; mulps         %xmm1,%xmm11
8751  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
8752  DB  69,15,89,208                        ; mulps         %xmm8,%xmm10
8753  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
8754  DB  65,15,83,202                        ; rcpps         %xmm10,%xmm1
8755  DB  15,89,193                           ; mulps         %xmm1,%xmm0
8756  DB  68,15,89,201                        ; mulps         %xmm1,%xmm9
8757  DB  72,173                              ; lods          %ds:(%rsi),%rax
8758  DB  65,15,40,201                        ; movaps        %xmm9,%xmm1
8759  DB  255,224                             ; jmpq          *%rax
8760
8761PUBLIC _sk_linear_gradient_2stops_sse41
8762_sk_linear_gradient_2stops_sse41 LABEL PROC
8763  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
8764  DB  72,173                              ; lods          %ds:(%rsi),%rax
8765  DB  243,15,16,64,16                     ; movss         0x10(%rax),%xmm0
8766  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
8767  DB  243,15,16,8                         ; movss         (%rax),%xmm1
8768  DB  243,15,16,80,4                      ; movss         0x4(%rax),%xmm2
8769  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
8770  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
8771  DB  15,88,193                           ; addps         %xmm1,%xmm0
8772  DB  243,15,16,72,20                     ; movss         0x14(%rax),%xmm1
8773  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
8774  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
8775  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
8776  DB  15,88,202                           ; addps         %xmm2,%xmm1
8777  DB  243,15,16,80,24                     ; movss         0x18(%rax),%xmm2
8778  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
8779  DB  243,15,16,88,8                      ; movss         0x8(%rax),%xmm3
8780  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
8781  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
8782  DB  15,88,211                           ; addps         %xmm3,%xmm2
8783  DB  243,15,16,88,28                     ; movss         0x1c(%rax),%xmm3
8784  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
8785  DB  243,68,15,16,72,12                  ; movss         0xc(%rax),%xmm9
8786  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
8787  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
8788  DB  65,15,88,217                        ; addps         %xmm9,%xmm3
8789  DB  72,173                              ; lods          %ds:(%rsi),%rax
8790  DB  255,224                             ; jmpq          *%rax
8791
8792PUBLIC _sk_start_pipeline_sse2
8793_sk_start_pipeline_sse2 LABEL PROC
8794  DB  65,87                               ; push          %r15
8795  DB  65,86                               ; push          %r14
8796  DB  65,85                               ; push          %r13
8797  DB  65,84                               ; push          %r12
8798  DB  86                                  ; push          %rsi
8799  DB  87                                  ; push          %rdi
8800  DB  83                                  ; push          %rbx
8801  DB  72,129,236,160,0,0,0                ; sub           $0xa0,%rsp
8802  DB  68,15,41,188,36,144,0,0,0           ; movaps        %xmm15,0x90(%rsp)
8803  DB  68,15,41,180,36,128,0,0,0           ; movaps        %xmm14,0x80(%rsp)
8804  DB  68,15,41,108,36,112                 ; movaps        %xmm13,0x70(%rsp)
8805  DB  68,15,41,100,36,96                  ; movaps        %xmm12,0x60(%rsp)
8806  DB  68,15,41,92,36,80                   ; movaps        %xmm11,0x50(%rsp)
8807  DB  68,15,41,84,36,64                   ; movaps        %xmm10,0x40(%rsp)
8808  DB  68,15,41,76,36,48                   ; movaps        %xmm9,0x30(%rsp)
8809  DB  68,15,41,68,36,32                   ; movaps        %xmm8,0x20(%rsp)
8810  DB  15,41,124,36,16                     ; movaps        %xmm7,0x10(%rsp)
8811  DB  15,41,52,36                         ; movaps        %xmm6,(%rsp)
8812  DB  77,137,207                          ; mov           %r9,%r15
8813  DB  77,137,198                          ; mov           %r8,%r14
8814  DB  72,137,203                          ; mov           %rcx,%rbx
8815  DB  72,137,214                          ; mov           %rdx,%rsi
8816  DB  72,173                              ; lods          %ds:(%rsi),%rax
8817  DB  73,137,196                          ; mov           %rax,%r12
8818  DB  73,137,245                          ; mov           %rsi,%r13
8819  DB  72,141,67,4                         ; lea           0x4(%rbx),%rax
8820  DB  76,57,248                           ; cmp           %r15,%rax
8821  DB  118,5                               ; jbe           73 <_sk_start_pipeline_sse2+0x73>
8822  DB  72,137,216                          ; mov           %rbx,%rax
8823  DB  235,52                              ; jmp           a7 <_sk_start_pipeline_sse2+0xa7>
8824  DB  15,87,192                           ; xorps         %xmm0,%xmm0
8825  DB  15,87,201                           ; xorps         %xmm1,%xmm1
8826  DB  15,87,210                           ; xorps         %xmm2,%xmm2
8827  DB  15,87,219                           ; xorps         %xmm3,%xmm3
8828  DB  15,87,228                           ; xorps         %xmm4,%xmm4
8829  DB  15,87,237                           ; xorps         %xmm5,%xmm5
8830  DB  15,87,246                           ; xorps         %xmm6,%xmm6
8831  DB  15,87,255                           ; xorps         %xmm7,%xmm7
8832  DB  72,137,223                          ; mov           %rbx,%rdi
8833  DB  76,137,238                          ; mov           %r13,%rsi
8834  DB  76,137,242                          ; mov           %r14,%rdx
8835  DB  65,255,212                          ; callq         *%r12
8836  DB  72,141,67,4                         ; lea           0x4(%rbx),%rax
8837  DB  72,131,195,8                        ; add           $0x8,%rbx
8838  DB  76,57,251                           ; cmp           %r15,%rbx
8839  DB  72,137,195                          ; mov           %rax,%rbx
8840  DB  118,204                             ; jbe           73 <_sk_start_pipeline_sse2+0x73>
8841  DB  15,40,52,36                         ; movaps        (%rsp),%xmm6
8842  DB  15,40,124,36,16                     ; movaps        0x10(%rsp),%xmm7
8843  DB  68,15,40,68,36,32                   ; movaps        0x20(%rsp),%xmm8
8844  DB  68,15,40,76,36,48                   ; movaps        0x30(%rsp),%xmm9
8845  DB  68,15,40,84,36,64                   ; movaps        0x40(%rsp),%xmm10
8846  DB  68,15,40,92,36,80                   ; movaps        0x50(%rsp),%xmm11
8847  DB  68,15,40,100,36,96                  ; movaps        0x60(%rsp),%xmm12
8848  DB  68,15,40,108,36,112                 ; movaps        0x70(%rsp),%xmm13
8849  DB  68,15,40,180,36,128,0,0,0           ; movaps        0x80(%rsp),%xmm14
8850  DB  68,15,40,188,36,144,0,0,0           ; movaps        0x90(%rsp),%xmm15
8851  DB  72,129,196,160,0,0,0                ; add           $0xa0,%rsp
8852  DB  91                                  ; pop           %rbx
8853  DB  95                                  ; pop           %rdi
8854  DB  94                                  ; pop           %rsi
8855  DB  65,92                               ; pop           %r12
8856  DB  65,93                               ; pop           %r13
8857  DB  65,94                               ; pop           %r14
8858  DB  65,95                               ; pop           %r15
8859  DB  195                                 ; retq
8860
8861PUBLIC _sk_just_return_sse2
8862_sk_just_return_sse2 LABEL PROC
8863  DB  195                                 ; retq
8864
8865PUBLIC _sk_seed_shader_sse2
8866_sk_seed_shader_sse2 LABEL PROC
8867  DB  72,173                              ; lods          %ds:(%rsi),%rax
8868  DB  102,15,110,199                      ; movd          %edi,%xmm0
8869  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
8870  DB  15,91,200                           ; cvtdq2ps      %xmm0,%xmm1
8871  DB  185,0,0,0,63                        ; mov           $0x3f000000,%ecx
8872  DB  102,15,110,209                      ; movd          %ecx,%xmm2
8873  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
8874  DB  15,88,202                           ; addps         %xmm2,%xmm1
8875  DB  15,16,2                             ; movups        (%rdx),%xmm0
8876  DB  15,88,193                           ; addps         %xmm1,%xmm0
8877  DB  102,15,110,8                        ; movd          (%rax),%xmm1
8878  DB  102,15,112,201,0                    ; pshufd        $0x0,%xmm1,%xmm1
8879  DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
8880  DB  15,88,202                           ; addps         %xmm2,%xmm1
8881  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
8882  DB  102,15,110,208                      ; movd          %eax,%xmm2
8883  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
8884  DB  72,173                              ; lods          %ds:(%rsi),%rax
8885  DB  15,87,219                           ; xorps         %xmm3,%xmm3
8886  DB  15,87,228                           ; xorps         %xmm4,%xmm4
8887  DB  15,87,237                           ; xorps         %xmm5,%xmm5
8888  DB  15,87,246                           ; xorps         %xmm6,%xmm6
8889  DB  15,87,255                           ; xorps         %xmm7,%xmm7
8890  DB  255,224                             ; jmpq          *%rax
8891
8892PUBLIC _sk_constant_color_sse2
8893_sk_constant_color_sse2 LABEL PROC
8894  DB  72,173                              ; lods          %ds:(%rsi),%rax
8895  DB  243,15,16,0                         ; movss         (%rax),%xmm0
8896  DB  243,15,16,72,4                      ; movss         0x4(%rax),%xmm1
8897  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
8898  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
8899  DB  243,15,16,80,8                      ; movss         0x8(%rax),%xmm2
8900  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
8901  DB  243,15,16,88,12                     ; movss         0xc(%rax),%xmm3
8902  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
8903  DB  72,173                              ; lods          %ds:(%rsi),%rax
8904  DB  255,224                             ; jmpq          *%rax
8905
8906PUBLIC _sk_clear_sse2
8907_sk_clear_sse2 LABEL PROC
8908  DB  72,173                              ; lods          %ds:(%rsi),%rax
8909  DB  15,87,192                           ; xorps         %xmm0,%xmm0
8910  DB  15,87,201                           ; xorps         %xmm1,%xmm1
8911  DB  15,87,210                           ; xorps         %xmm2,%xmm2
8912  DB  15,87,219                           ; xorps         %xmm3,%xmm3
8913  DB  255,224                             ; jmpq          *%rax
8914
8915PUBLIC _sk_srcatop_sse2
8916_sk_srcatop_sse2 LABEL PROC
8917  DB  15,89,199                           ; mulps         %xmm7,%xmm0
8918  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
8919  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
8920  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
8921  DB  68,15,92,195                        ; subps         %xmm3,%xmm8
8922  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
8923  DB  68,15,89,204                        ; mulps         %xmm4,%xmm9
8924  DB  65,15,88,193                        ; addps         %xmm9,%xmm0
8925  DB  15,89,207                           ; mulps         %xmm7,%xmm1
8926  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
8927  DB  68,15,89,205                        ; mulps         %xmm5,%xmm9
8928  DB  65,15,88,201                        ; addps         %xmm9,%xmm1
8929  DB  15,89,215                           ; mulps         %xmm7,%xmm2
8930  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
8931  DB  68,15,89,206                        ; mulps         %xmm6,%xmm9
8932  DB  65,15,88,209                        ; addps         %xmm9,%xmm2
8933  DB  15,89,223                           ; mulps         %xmm7,%xmm3
8934  DB  68,15,89,199                        ; mulps         %xmm7,%xmm8
8935  DB  65,15,88,216                        ; addps         %xmm8,%xmm3
8936  DB  72,173                              ; lods          %ds:(%rsi),%rax
8937  DB  255,224                             ; jmpq          *%rax
8938
8939PUBLIC _sk_dstatop_sse2
8940_sk_dstatop_sse2 LABEL PROC
8941  DB  68,15,40,195                        ; movaps        %xmm3,%xmm8
8942  DB  68,15,89,196                        ; mulps         %xmm4,%xmm8
8943  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
8944  DB  102,68,15,110,200                   ; movd          %eax,%xmm9
8945  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
8946  DB  68,15,92,207                        ; subps         %xmm7,%xmm9
8947  DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
8948  DB  65,15,88,192                        ; addps         %xmm8,%xmm0
8949  DB  68,15,40,195                        ; movaps        %xmm3,%xmm8
8950  DB  68,15,89,197                        ; mulps         %xmm5,%xmm8
8951  DB  65,15,89,201                        ; mulps         %xmm9,%xmm1
8952  DB  65,15,88,200                        ; addps         %xmm8,%xmm1
8953  DB  68,15,40,195                        ; movaps        %xmm3,%xmm8
8954  DB  68,15,89,198                        ; mulps         %xmm6,%xmm8
8955  DB  65,15,89,209                        ; mulps         %xmm9,%xmm2
8956  DB  65,15,88,208                        ; addps         %xmm8,%xmm2
8957  DB  68,15,89,203                        ; mulps         %xmm3,%xmm9
8958  DB  15,89,223                           ; mulps         %xmm7,%xmm3
8959  DB  65,15,88,217                        ; addps         %xmm9,%xmm3
8960  DB  72,173                              ; lods          %ds:(%rsi),%rax
8961  DB  255,224                             ; jmpq          *%rax
8962
8963PUBLIC _sk_srcin_sse2
8964_sk_srcin_sse2 LABEL PROC
8965  DB  15,89,199                           ; mulps         %xmm7,%xmm0
8966  DB  15,89,207                           ; mulps         %xmm7,%xmm1
8967  DB  15,89,215                           ; mulps         %xmm7,%xmm2
8968  DB  15,89,223                           ; mulps         %xmm7,%xmm3
8969  DB  72,173                              ; lods          %ds:(%rsi),%rax
8970  DB  255,224                             ; jmpq          *%rax
8971
8972PUBLIC _sk_dstin_sse2
8973_sk_dstin_sse2 LABEL PROC
8974  DB  15,40,195                           ; movaps        %xmm3,%xmm0
8975  DB  15,89,196                           ; mulps         %xmm4,%xmm0
8976  DB  15,40,203                           ; movaps        %xmm3,%xmm1
8977  DB  15,89,205                           ; mulps         %xmm5,%xmm1
8978  DB  15,40,211                           ; movaps        %xmm3,%xmm2
8979  DB  15,89,214                           ; mulps         %xmm6,%xmm2
8980  DB  15,89,223                           ; mulps         %xmm7,%xmm3
8981  DB  72,173                              ; lods          %ds:(%rsi),%rax
8982  DB  255,224                             ; jmpq          *%rax
8983
8984PUBLIC _sk_srcout_sse2
8985_sk_srcout_sse2 LABEL PROC
8986  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
8987  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
8988  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
8989  DB  68,15,92,199                        ; subps         %xmm7,%xmm8
8990  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
8991  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
8992  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
8993  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
8994  DB  72,173                              ; lods          %ds:(%rsi),%rax
8995  DB  255,224                             ; jmpq          *%rax
8996
8997PUBLIC _sk_dstout_sse2
8998_sk_dstout_sse2 LABEL PROC
8999  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
9000  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
9001  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
9002  DB  68,15,92,195                        ; subps         %xmm3,%xmm8
9003  DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
9004  DB  15,89,196                           ; mulps         %xmm4,%xmm0
9005  DB  65,15,40,200                        ; movaps        %xmm8,%xmm1
9006  DB  15,89,205                           ; mulps         %xmm5,%xmm1
9007  DB  65,15,40,208                        ; movaps        %xmm8,%xmm2
9008  DB  15,89,214                           ; mulps         %xmm6,%xmm2
9009  DB  68,15,89,199                        ; mulps         %xmm7,%xmm8
9010  DB  72,173                              ; lods          %ds:(%rsi),%rax
9011  DB  65,15,40,216                        ; movaps        %xmm8,%xmm3
9012  DB  255,224                             ; jmpq          *%rax
9013
9014PUBLIC _sk_srcover_sse2
9015_sk_srcover_sse2 LABEL PROC
9016  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
9017  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
9018  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
9019  DB  68,15,92,195                        ; subps         %xmm3,%xmm8
9020  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
9021  DB  68,15,89,204                        ; mulps         %xmm4,%xmm9
9022  DB  65,15,88,193                        ; addps         %xmm9,%xmm0
9023  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
9024  DB  68,15,89,205                        ; mulps         %xmm5,%xmm9
9025  DB  65,15,88,201                        ; addps         %xmm9,%xmm1
9026  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
9027  DB  68,15,89,206                        ; mulps         %xmm6,%xmm9
9028  DB  65,15,88,209                        ; addps         %xmm9,%xmm2
9029  DB  68,15,89,199                        ; mulps         %xmm7,%xmm8
9030  DB  65,15,88,216                        ; addps         %xmm8,%xmm3
9031  DB  72,173                              ; lods          %ds:(%rsi),%rax
9032  DB  255,224                             ; jmpq          *%rax
9033
9034PUBLIC _sk_dstover_sse2
9035_sk_dstover_sse2 LABEL PROC
9036  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
9037  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
9038  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
9039  DB  68,15,92,199                        ; subps         %xmm7,%xmm8
9040  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
9041  DB  15,88,196                           ; addps         %xmm4,%xmm0
9042  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
9043  DB  15,88,205                           ; addps         %xmm5,%xmm1
9044  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
9045  DB  15,88,214                           ; addps         %xmm6,%xmm2
9046  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
9047  DB  15,88,223                           ; addps         %xmm7,%xmm3
9048  DB  72,173                              ; lods          %ds:(%rsi),%rax
9049  DB  255,224                             ; jmpq          *%rax
9050
9051PUBLIC _sk_modulate_sse2
9052_sk_modulate_sse2 LABEL PROC
9053  DB  15,89,196                           ; mulps         %xmm4,%xmm0
9054  DB  15,89,205                           ; mulps         %xmm5,%xmm1
9055  DB  15,89,214                           ; mulps         %xmm6,%xmm2
9056  DB  15,89,223                           ; mulps         %xmm7,%xmm3
9057  DB  72,173                              ; lods          %ds:(%rsi),%rax
9058  DB  255,224                             ; jmpq          *%rax
9059
9060PUBLIC _sk_multiply_sse2
9061_sk_multiply_sse2 LABEL PROC
9062  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
9063  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
9064  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
9065  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
9066  DB  68,15,92,207                        ; subps         %xmm7,%xmm9
9067  DB  69,15,40,209                        ; movaps        %xmm9,%xmm10
9068  DB  68,15,89,208                        ; mulps         %xmm0,%xmm10
9069  DB  68,15,92,195                        ; subps         %xmm3,%xmm8
9070  DB  69,15,40,216                        ; movaps        %xmm8,%xmm11
9071  DB  68,15,89,220                        ; mulps         %xmm4,%xmm11
9072  DB  69,15,88,218                        ; addps         %xmm10,%xmm11
9073  DB  15,89,196                           ; mulps         %xmm4,%xmm0
9074  DB  65,15,88,195                        ; addps         %xmm11,%xmm0
9075  DB  69,15,40,209                        ; movaps        %xmm9,%xmm10
9076  DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
9077  DB  69,15,40,216                        ; movaps        %xmm8,%xmm11
9078  DB  68,15,89,221                        ; mulps         %xmm5,%xmm11
9079  DB  69,15,88,218                        ; addps         %xmm10,%xmm11
9080  DB  15,89,205                           ; mulps         %xmm5,%xmm1
9081  DB  65,15,88,203                        ; addps         %xmm11,%xmm1
9082  DB  69,15,40,209                        ; movaps        %xmm9,%xmm10
9083  DB  68,15,89,210                        ; mulps         %xmm2,%xmm10
9084  DB  69,15,40,216                        ; movaps        %xmm8,%xmm11
9085  DB  68,15,89,222                        ; mulps         %xmm6,%xmm11
9086  DB  69,15,88,218                        ; addps         %xmm10,%xmm11
9087  DB  15,89,214                           ; mulps         %xmm6,%xmm2
9088  DB  65,15,88,211                        ; addps         %xmm11,%xmm2
9089  DB  68,15,89,203                        ; mulps         %xmm3,%xmm9
9090  DB  68,15,89,199                        ; mulps         %xmm7,%xmm8
9091  DB  69,15,88,193                        ; addps         %xmm9,%xmm8
9092  DB  15,89,223                           ; mulps         %xmm7,%xmm3
9093  DB  65,15,88,216                        ; addps         %xmm8,%xmm3
9094  DB  72,173                              ; lods          %ds:(%rsi),%rax
9095  DB  255,224                             ; jmpq          *%rax
9096
9097PUBLIC _sk_plus__sse2
9098_sk_plus__sse2 LABEL PROC
9099  DB  15,88,196                           ; addps         %xmm4,%xmm0
9100  DB  15,88,205                           ; addps         %xmm5,%xmm1
9101  DB  15,88,214                           ; addps         %xmm6,%xmm2
9102  DB  15,88,223                           ; addps         %xmm7,%xmm3
9103  DB  72,173                              ; lods          %ds:(%rsi),%rax
9104  DB  255,224                             ; jmpq          *%rax
9105
9106PUBLIC _sk_screen_sse2
9107_sk_screen_sse2 LABEL PROC
9108  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
9109  DB  68,15,88,196                        ; addps         %xmm4,%xmm8
9110  DB  15,89,196                           ; mulps         %xmm4,%xmm0
9111  DB  68,15,92,192                        ; subps         %xmm0,%xmm8
9112  DB  68,15,40,201                        ; movaps        %xmm1,%xmm9
9113  DB  68,15,88,205                        ; addps         %xmm5,%xmm9
9114  DB  15,89,205                           ; mulps         %xmm5,%xmm1
9115  DB  68,15,92,201                        ; subps         %xmm1,%xmm9
9116  DB  68,15,40,210                        ; movaps        %xmm2,%xmm10
9117  DB  68,15,88,214                        ; addps         %xmm6,%xmm10
9118  DB  15,89,214                           ; mulps         %xmm6,%xmm2
9119  DB  68,15,92,210                        ; subps         %xmm2,%xmm10
9120  DB  68,15,40,219                        ; movaps        %xmm3,%xmm11
9121  DB  68,15,88,223                        ; addps         %xmm7,%xmm11
9122  DB  15,89,223                           ; mulps         %xmm7,%xmm3
9123  DB  68,15,92,219                        ; subps         %xmm3,%xmm11
9124  DB  72,173                              ; lods          %ds:(%rsi),%rax
9125  DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
9126  DB  65,15,40,201                        ; movaps        %xmm9,%xmm1
9127  DB  65,15,40,210                        ; movaps        %xmm10,%xmm2
9128  DB  65,15,40,219                        ; movaps        %xmm11,%xmm3
9129  DB  255,224                             ; jmpq          *%rax
9130
9131PUBLIC _sk_xor__sse2
9132_sk_xor__sse2 LABEL PROC
9133  DB  68,15,40,195                        ; movaps        %xmm3,%xmm8
9134  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
9135  DB  102,15,110,216                      ; movd          %eax,%xmm3
9136  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
9137  DB  68,15,40,203                        ; movaps        %xmm3,%xmm9
9138  DB  68,15,92,207                        ; subps         %xmm7,%xmm9
9139  DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
9140  DB  65,15,92,216                        ; subps         %xmm8,%xmm3
9141  DB  68,15,40,211                        ; movaps        %xmm3,%xmm10
9142  DB  68,15,89,212                        ; mulps         %xmm4,%xmm10
9143  DB  65,15,88,194                        ; addps         %xmm10,%xmm0
9144  DB  65,15,89,201                        ; mulps         %xmm9,%xmm1
9145  DB  68,15,40,211                        ; movaps        %xmm3,%xmm10
9146  DB  68,15,89,213                        ; mulps         %xmm5,%xmm10
9147  DB  65,15,88,202                        ; addps         %xmm10,%xmm1
9148  DB  65,15,89,209                        ; mulps         %xmm9,%xmm2
9149  DB  68,15,40,211                        ; movaps        %xmm3,%xmm10
9150  DB  68,15,89,214                        ; mulps         %xmm6,%xmm10
9151  DB  65,15,88,210                        ; addps         %xmm10,%xmm2
9152  DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
9153  DB  15,89,223                           ; mulps         %xmm7,%xmm3
9154  DB  65,15,88,217                        ; addps         %xmm9,%xmm3
9155  DB  72,173                              ; lods          %ds:(%rsi),%rax
9156  DB  255,224                             ; jmpq          *%rax
9157
9158PUBLIC _sk_darken_sse2
9159_sk_darken_sse2 LABEL PROC
9160  DB  68,15,40,193                        ; movaps        %xmm1,%xmm8
9161  DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
9162  DB  15,88,196                           ; addps         %xmm4,%xmm0
9163  DB  68,15,89,207                        ; mulps         %xmm7,%xmm9
9164  DB  15,40,203                           ; movaps        %xmm3,%xmm1
9165  DB  15,89,204                           ; mulps         %xmm4,%xmm1
9166  DB  68,15,95,201                        ; maxps         %xmm1,%xmm9
9167  DB  65,15,92,193                        ; subps         %xmm9,%xmm0
9168  DB  65,15,40,200                        ; movaps        %xmm8,%xmm1
9169  DB  15,88,205                           ; addps         %xmm5,%xmm1
9170  DB  68,15,89,199                        ; mulps         %xmm7,%xmm8
9171  DB  68,15,40,203                        ; movaps        %xmm3,%xmm9
9172  DB  68,15,89,205                        ; mulps         %xmm5,%xmm9
9173  DB  69,15,95,193                        ; maxps         %xmm9,%xmm8
9174  DB  65,15,92,200                        ; subps         %xmm8,%xmm1
9175  DB  68,15,40,194                        ; movaps        %xmm2,%xmm8
9176  DB  68,15,88,198                        ; addps         %xmm6,%xmm8
9177  DB  15,89,215                           ; mulps         %xmm7,%xmm2
9178  DB  68,15,40,203                        ; movaps        %xmm3,%xmm9
9179  DB  68,15,89,206                        ; mulps         %xmm6,%xmm9
9180  DB  65,15,95,209                        ; maxps         %xmm9,%xmm2
9181  DB  68,15,92,194                        ; subps         %xmm2,%xmm8
9182  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
9183  DB  102,15,110,208                      ; movd          %eax,%xmm2
9184  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
9185  DB  15,92,211                           ; subps         %xmm3,%xmm2
9186  DB  15,89,215                           ; mulps         %xmm7,%xmm2
9187  DB  15,88,218                           ; addps         %xmm2,%xmm3
9188  DB  72,173                              ; lods          %ds:(%rsi),%rax
9189  DB  65,15,40,208                        ; movaps        %xmm8,%xmm2
9190  DB  255,224                             ; jmpq          *%rax
9191
9192PUBLIC _sk_lighten_sse2
9193_sk_lighten_sse2 LABEL PROC
9194  DB  68,15,40,193                        ; movaps        %xmm1,%xmm8
9195  DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
9196  DB  15,88,196                           ; addps         %xmm4,%xmm0
9197  DB  68,15,89,207                        ; mulps         %xmm7,%xmm9
9198  DB  15,40,203                           ; movaps        %xmm3,%xmm1
9199  DB  15,89,204                           ; mulps         %xmm4,%xmm1
9200  DB  68,15,93,201                        ; minps         %xmm1,%xmm9
9201  DB  65,15,92,193                        ; subps         %xmm9,%xmm0
9202  DB  65,15,40,200                        ; movaps        %xmm8,%xmm1
9203  DB  15,88,205                           ; addps         %xmm5,%xmm1
9204  DB  68,15,89,199                        ; mulps         %xmm7,%xmm8
9205  DB  68,15,40,203                        ; movaps        %xmm3,%xmm9
9206  DB  68,15,89,205                        ; mulps         %xmm5,%xmm9
9207  DB  69,15,93,193                        ; minps         %xmm9,%xmm8
9208  DB  65,15,92,200                        ; subps         %xmm8,%xmm1
9209  DB  68,15,40,194                        ; movaps        %xmm2,%xmm8
9210  DB  68,15,88,198                        ; addps         %xmm6,%xmm8
9211  DB  15,89,215                           ; mulps         %xmm7,%xmm2
9212  DB  68,15,40,203                        ; movaps        %xmm3,%xmm9
9213  DB  68,15,89,206                        ; mulps         %xmm6,%xmm9
9214  DB  65,15,93,209                        ; minps         %xmm9,%xmm2
9215  DB  68,15,92,194                        ; subps         %xmm2,%xmm8
9216  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
9217  DB  102,15,110,208                      ; movd          %eax,%xmm2
9218  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
9219  DB  15,92,211                           ; subps         %xmm3,%xmm2
9220  DB  15,89,215                           ; mulps         %xmm7,%xmm2
9221  DB  15,88,218                           ; addps         %xmm2,%xmm3
9222  DB  72,173                              ; lods          %ds:(%rsi),%rax
9223  DB  65,15,40,208                        ; movaps        %xmm8,%xmm2
9224  DB  255,224                             ; jmpq          *%rax
9225
9226PUBLIC _sk_difference_sse2
9227_sk_difference_sse2 LABEL PROC
9228  DB  68,15,40,193                        ; movaps        %xmm1,%xmm8
9229  DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
9230  DB  15,88,196                           ; addps         %xmm4,%xmm0
9231  DB  68,15,89,207                        ; mulps         %xmm7,%xmm9
9232  DB  15,40,203                           ; movaps        %xmm3,%xmm1
9233  DB  15,89,204                           ; mulps         %xmm4,%xmm1
9234  DB  68,15,93,201                        ; minps         %xmm1,%xmm9
9235  DB  69,15,88,201                        ; addps         %xmm9,%xmm9
9236  DB  65,15,92,193                        ; subps         %xmm9,%xmm0
9237  DB  65,15,40,200                        ; movaps        %xmm8,%xmm1
9238  DB  15,88,205                           ; addps         %xmm5,%xmm1
9239  DB  68,15,89,199                        ; mulps         %xmm7,%xmm8
9240  DB  68,15,40,203                        ; movaps        %xmm3,%xmm9
9241  DB  68,15,89,205                        ; mulps         %xmm5,%xmm9
9242  DB  69,15,93,193                        ; minps         %xmm9,%xmm8
9243  DB  69,15,88,192                        ; addps         %xmm8,%xmm8
9244  DB  65,15,92,200                        ; subps         %xmm8,%xmm1
9245  DB  68,15,40,194                        ; movaps        %xmm2,%xmm8
9246  DB  68,15,88,198                        ; addps         %xmm6,%xmm8
9247  DB  15,89,215                           ; mulps         %xmm7,%xmm2
9248  DB  68,15,40,203                        ; movaps        %xmm3,%xmm9
9249  DB  68,15,89,206                        ; mulps         %xmm6,%xmm9
9250  DB  65,15,93,209                        ; minps         %xmm9,%xmm2
9251  DB  15,88,210                           ; addps         %xmm2,%xmm2
9252  DB  68,15,92,194                        ; subps         %xmm2,%xmm8
9253  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
9254  DB  102,15,110,208                      ; movd          %eax,%xmm2
9255  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
9256  DB  15,92,211                           ; subps         %xmm3,%xmm2
9257  DB  15,89,215                           ; mulps         %xmm7,%xmm2
9258  DB  15,88,218                           ; addps         %xmm2,%xmm3
9259  DB  72,173                              ; lods          %ds:(%rsi),%rax
9260  DB  65,15,40,208                        ; movaps        %xmm8,%xmm2
9261  DB  255,224                             ; jmpq          *%rax
9262
9263PUBLIC _sk_exclusion_sse2
9264_sk_exclusion_sse2 LABEL PROC
9265  DB  68,15,40,193                        ; movaps        %xmm1,%xmm8
9266  DB  15,40,200                           ; movaps        %xmm0,%xmm1
9267  DB  15,88,196                           ; addps         %xmm4,%xmm0
9268  DB  15,89,204                           ; mulps         %xmm4,%xmm1
9269  DB  15,88,201                           ; addps         %xmm1,%xmm1
9270  DB  15,92,193                           ; subps         %xmm1,%xmm0
9271  DB  65,15,40,200                        ; movaps        %xmm8,%xmm1
9272  DB  15,88,205                           ; addps         %xmm5,%xmm1
9273  DB  68,15,89,197                        ; mulps         %xmm5,%xmm8
9274  DB  69,15,88,192                        ; addps         %xmm8,%xmm8
9275  DB  65,15,92,200                        ; subps         %xmm8,%xmm1
9276  DB  68,15,40,194                        ; movaps        %xmm2,%xmm8
9277  DB  68,15,88,198                        ; addps         %xmm6,%xmm8
9278  DB  15,89,214                           ; mulps         %xmm6,%xmm2
9279  DB  15,88,210                           ; addps         %xmm2,%xmm2
9280  DB  68,15,92,194                        ; subps         %xmm2,%xmm8
9281  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
9282  DB  102,15,110,208                      ; movd          %eax,%xmm2
9283  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
9284  DB  15,92,211                           ; subps         %xmm3,%xmm2
9285  DB  15,89,215                           ; mulps         %xmm7,%xmm2
9286  DB  15,88,218                           ; addps         %xmm2,%xmm3
9287  DB  72,173                              ; lods          %ds:(%rsi),%rax
9288  DB  65,15,40,208                        ; movaps        %xmm8,%xmm2
9289  DB  255,224                             ; jmpq          *%rax
9290
9291PUBLIC _sk_colorburn_sse2
9292_sk_colorburn_sse2 LABEL PROC
9293  DB  68,15,40,193                        ; movaps        %xmm1,%xmm8
9294  DB  68,15,40,224                        ; movaps        %xmm0,%xmm12
9295  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
9296  DB  102,68,15,110,200                   ; movd          %eax,%xmm9
9297  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
9298  DB  69,15,40,217                        ; movaps        %xmm9,%xmm11
9299  DB  68,15,92,223                        ; subps         %xmm7,%xmm11
9300  DB  65,15,40,195                        ; movaps        %xmm11,%xmm0
9301  DB  65,15,89,196                        ; mulps         %xmm12,%xmm0
9302  DB  69,15,87,210                        ; xorps         %xmm10,%xmm10
9303  DB  15,40,207                           ; movaps        %xmm7,%xmm1
9304  DB  15,92,204                           ; subps         %xmm4,%xmm1
9305  DB  15,89,203                           ; mulps         %xmm3,%xmm1
9306  DB  65,15,94,204                        ; divps         %xmm12,%xmm1
9307  DB  68,15,40,239                        ; movaps        %xmm7,%xmm13
9308  DB  68,15,93,233                        ; minps         %xmm1,%xmm13
9309  DB  68,15,40,247                        ; movaps        %xmm7,%xmm14
9310  DB  69,15,92,245                        ; subps         %xmm13,%xmm14
9311  DB  65,15,40,204                        ; movaps        %xmm12,%xmm1
9312  DB  65,15,194,202,0                     ; cmpeqps       %xmm10,%xmm1
9313  DB  68,15,92,203                        ; subps         %xmm3,%xmm9
9314  DB  68,15,89,243                        ; mulps         %xmm3,%xmm14
9315  DB  68,15,88,240                        ; addps         %xmm0,%xmm14
9316  DB  68,15,84,225                        ; andps         %xmm1,%xmm12
9317  DB  65,15,85,206                        ; andnps        %xmm14,%xmm1
9318  DB  69,15,40,233                        ; movaps        %xmm9,%xmm13
9319  DB  68,15,89,236                        ; mulps         %xmm4,%xmm13
9320  DB  15,88,196                           ; addps         %xmm4,%xmm0
9321  DB  65,15,86,204                        ; orps          %xmm12,%xmm1
9322  DB  68,15,40,228                        ; movaps        %xmm4,%xmm12
9323  DB  68,15,194,231,0                     ; cmpeqps       %xmm7,%xmm12
9324  DB  65,15,88,205                        ; addps         %xmm13,%xmm1
9325  DB  65,15,84,196                        ; andps         %xmm12,%xmm0
9326  DB  68,15,85,225                        ; andnps        %xmm1,%xmm12
9327  DB  65,15,86,196                        ; orps          %xmm12,%xmm0
9328  DB  65,15,40,203                        ; movaps        %xmm11,%xmm1
9329  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
9330  DB  68,15,40,231                        ; movaps        %xmm7,%xmm12
9331  DB  68,15,92,229                        ; subps         %xmm5,%xmm12
9332  DB  68,15,89,227                        ; mulps         %xmm3,%xmm12
9333  DB  69,15,94,224                        ; divps         %xmm8,%xmm12
9334  DB  68,15,40,239                        ; movaps        %xmm7,%xmm13
9335  DB  69,15,93,236                        ; minps         %xmm12,%xmm13
9336  DB  68,15,40,231                        ; movaps        %xmm7,%xmm12
9337  DB  69,15,92,229                        ; subps         %xmm13,%xmm12
9338  DB  69,15,40,232                        ; movaps        %xmm8,%xmm13
9339  DB  69,15,194,234,0                     ; cmpeqps       %xmm10,%xmm13
9340  DB  68,15,89,227                        ; mulps         %xmm3,%xmm12
9341  DB  68,15,88,225                        ; addps         %xmm1,%xmm12
9342  DB  69,15,84,197                        ; andps         %xmm13,%xmm8
9343  DB  69,15,85,236                        ; andnps        %xmm12,%xmm13
9344  DB  69,15,86,232                        ; orps          %xmm8,%xmm13
9345  DB  69,15,40,193                        ; movaps        %xmm9,%xmm8
9346  DB  68,15,89,197                        ; mulps         %xmm5,%xmm8
9347  DB  15,88,205                           ; addps         %xmm5,%xmm1
9348  DB  69,15,88,232                        ; addps         %xmm8,%xmm13
9349  DB  68,15,40,197                        ; movaps        %xmm5,%xmm8
9350  DB  68,15,194,199,0                     ; cmpeqps       %xmm7,%xmm8
9351  DB  65,15,84,200                        ; andps         %xmm8,%xmm1
9352  DB  69,15,85,197                        ; andnps        %xmm13,%xmm8
9353  DB  65,15,86,200                        ; orps          %xmm8,%xmm1
9354  DB  68,15,40,199                        ; movaps        %xmm7,%xmm8
9355  DB  68,15,92,198                        ; subps         %xmm6,%xmm8
9356  DB  68,15,89,195                        ; mulps         %xmm3,%xmm8
9357  DB  68,15,94,194                        ; divps         %xmm2,%xmm8
9358  DB  68,15,40,231                        ; movaps        %xmm7,%xmm12
9359  DB  69,15,93,224                        ; minps         %xmm8,%xmm12
9360  DB  68,15,40,199                        ; movaps        %xmm7,%xmm8
9361  DB  69,15,92,196                        ; subps         %xmm12,%xmm8
9362  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
9363  DB  68,15,194,210,0                     ; cmpeqps       %xmm2,%xmm10
9364  DB  68,15,89,195                        ; mulps         %xmm3,%xmm8
9365  DB  69,15,88,195                        ; addps         %xmm11,%xmm8
9366  DB  65,15,84,210                        ; andps         %xmm10,%xmm2
9367  DB  69,15,85,208                        ; andnps        %xmm8,%xmm10
9368  DB  69,15,40,195                        ; movaps        %xmm11,%xmm8
9369  DB  68,15,88,198                        ; addps         %xmm6,%xmm8
9370  DB  68,15,86,210                        ; orps          %xmm2,%xmm10
9371  DB  65,15,40,209                        ; movaps        %xmm9,%xmm2
9372  DB  15,89,214                           ; mulps         %xmm6,%xmm2
9373  DB  68,15,88,210                        ; addps         %xmm2,%xmm10
9374  DB  15,40,214                           ; movaps        %xmm6,%xmm2
9375  DB  15,194,215,0                        ; cmpeqps       %xmm7,%xmm2
9376  DB  68,15,84,194                        ; andps         %xmm2,%xmm8
9377  DB  65,15,85,210                        ; andnps        %xmm10,%xmm2
9378  DB  68,15,86,194                        ; orps          %xmm2,%xmm8
9379  DB  68,15,89,207                        ; mulps         %xmm7,%xmm9
9380  DB  65,15,88,217                        ; addps         %xmm9,%xmm3
9381  DB  72,173                              ; lods          %ds:(%rsi),%rax
9382  DB  65,15,40,208                        ; movaps        %xmm8,%xmm2
9383  DB  255,224                             ; jmpq          *%rax
9384
9385PUBLIC _sk_colordodge_sse2
9386_sk_colordodge_sse2 LABEL PROC
9387  DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
9388  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
9389  DB  102,68,15,110,208                   ; movd          %eax,%xmm10
9390  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
9391  DB  69,15,40,218                        ; movaps        %xmm10,%xmm11
9392  DB  68,15,92,223                        ; subps         %xmm7,%xmm11
9393  DB  65,15,40,195                        ; movaps        %xmm11,%xmm0
9394  DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
9395  DB  68,15,40,195                        ; movaps        %xmm3,%xmm8
9396  DB  68,15,89,196                        ; mulps         %xmm4,%xmm8
9397  DB  68,15,40,227                        ; movaps        %xmm3,%xmm12
9398  DB  69,15,92,225                        ; subps         %xmm9,%xmm12
9399  DB  69,15,94,196                        ; divps         %xmm12,%xmm8
9400  DB  68,15,40,231                        ; movaps        %xmm7,%xmm12
9401  DB  68,15,40,239                        ; movaps        %xmm7,%xmm13
9402  DB  69,15,93,232                        ; minps         %xmm8,%xmm13
9403  DB  69,15,40,241                        ; movaps        %xmm9,%xmm14
9404  DB  68,15,194,243,0                     ; cmpeqps       %xmm3,%xmm14
9405  DB  68,15,89,235                        ; mulps         %xmm3,%xmm13
9406  DB  68,15,88,232                        ; addps         %xmm0,%xmm13
9407  DB  69,15,84,206                        ; andps         %xmm14,%xmm9
9408  DB  69,15,85,245                        ; andnps        %xmm13,%xmm14
9409  DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
9410  DB  68,15,92,211                        ; subps         %xmm3,%xmm10
9411  DB  69,15,86,241                        ; orps          %xmm9,%xmm14
9412  DB  69,15,40,202                        ; movaps        %xmm10,%xmm9
9413  DB  68,15,89,204                        ; mulps         %xmm4,%xmm9
9414  DB  15,88,196                           ; addps         %xmm4,%xmm0
9415  DB  69,15,88,241                        ; addps         %xmm9,%xmm14
9416  DB  68,15,40,204                        ; movaps        %xmm4,%xmm9
9417  DB  69,15,194,200,0                     ; cmpeqps       %xmm8,%xmm9
9418  DB  65,15,84,193                        ; andps         %xmm9,%xmm0
9419  DB  69,15,85,206                        ; andnps        %xmm14,%xmm9
9420  DB  65,15,86,193                        ; orps          %xmm9,%xmm0
9421  DB  68,15,40,235                        ; movaps        %xmm3,%xmm13
9422  DB  68,15,89,237                        ; mulps         %xmm5,%xmm13
9423  DB  68,15,40,203                        ; movaps        %xmm3,%xmm9
9424  DB  68,15,92,201                        ; subps         %xmm1,%xmm9
9425  DB  69,15,94,233                        ; divps         %xmm9,%xmm13
9426  DB  69,15,40,203                        ; movaps        %xmm11,%xmm9
9427  DB  68,15,89,201                        ; mulps         %xmm1,%xmm9
9428  DB  69,15,93,229                        ; minps         %xmm13,%xmm12
9429  DB  68,15,40,233                        ; movaps        %xmm1,%xmm13
9430  DB  68,15,194,235,0                     ; cmpeqps       %xmm3,%xmm13
9431  DB  68,15,89,227                        ; mulps         %xmm3,%xmm12
9432  DB  69,15,88,225                        ; addps         %xmm9,%xmm12
9433  DB  65,15,84,205                        ; andps         %xmm13,%xmm1
9434  DB  69,15,85,236                        ; andnps        %xmm12,%xmm13
9435  DB  68,15,86,233                        ; orps          %xmm1,%xmm13
9436  DB  65,15,40,202                        ; movaps        %xmm10,%xmm1
9437  DB  15,89,205                           ; mulps         %xmm5,%xmm1
9438  DB  68,15,88,205                        ; addps         %xmm5,%xmm9
9439  DB  68,15,88,233                        ; addps         %xmm1,%xmm13
9440  DB  15,40,205                           ; movaps        %xmm5,%xmm1
9441  DB  65,15,194,200,0                     ; cmpeqps       %xmm8,%xmm1
9442  DB  68,15,84,201                        ; andps         %xmm1,%xmm9
9443  DB  65,15,85,205                        ; andnps        %xmm13,%xmm1
9444  DB  68,15,86,201                        ; orps          %xmm1,%xmm9
9445  DB  68,15,40,227                        ; movaps        %xmm3,%xmm12
9446  DB  68,15,89,230                        ; mulps         %xmm6,%xmm12
9447  DB  15,40,203                           ; movaps        %xmm3,%xmm1
9448  DB  15,92,202                           ; subps         %xmm2,%xmm1
9449  DB  68,15,94,225                        ; divps         %xmm1,%xmm12
9450  DB  68,15,40,239                        ; movaps        %xmm7,%xmm13
9451  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
9452  DB  69,15,93,236                        ; minps         %xmm12,%xmm13
9453  DB  15,40,202                           ; movaps        %xmm2,%xmm1
9454  DB  15,194,203,0                        ; cmpeqps       %xmm3,%xmm1
9455  DB  68,15,89,235                        ; mulps         %xmm3,%xmm13
9456  DB  69,15,88,235                        ; addps         %xmm11,%xmm13
9457  DB  15,84,209                           ; andps         %xmm1,%xmm2
9458  DB  65,15,85,205                        ; andnps        %xmm13,%xmm1
9459  DB  15,86,202                           ; orps          %xmm2,%xmm1
9460  DB  65,15,40,210                        ; movaps        %xmm10,%xmm2
9461  DB  15,89,214                           ; mulps         %xmm6,%xmm2
9462  DB  15,88,202                           ; addps         %xmm2,%xmm1
9463  DB  68,15,194,198,0                     ; cmpeqps       %xmm6,%xmm8
9464  DB  68,15,88,222                        ; addps         %xmm6,%xmm11
9465  DB  69,15,84,216                        ; andps         %xmm8,%xmm11
9466  DB  68,15,85,193                        ; andnps        %xmm1,%xmm8
9467  DB  69,15,86,195                        ; orps          %xmm11,%xmm8
9468  DB  68,15,89,215                        ; mulps         %xmm7,%xmm10
9469  DB  65,15,88,218                        ; addps         %xmm10,%xmm3
9470  DB  72,173                              ; lods          %ds:(%rsi),%rax
9471  DB  65,15,40,201                        ; movaps        %xmm9,%xmm1
9472  DB  65,15,40,208                        ; movaps        %xmm8,%xmm2
9473  DB  255,224                             ; jmpq          *%rax
9474
9475PUBLIC _sk_hardlight_sse2
9476_sk_hardlight_sse2 LABEL PROC
9477  DB  72,131,236,24                       ; sub           $0x18,%rsp
9478  DB  15,41,52,36                         ; movaps        %xmm6,(%rsp)
9479  DB  15,40,245                           ; movaps        %xmm5,%xmm6
9480  DB  15,40,236                           ; movaps        %xmm4,%xmm5
9481  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
9482  DB  102,68,15,110,216                   ; movd          %eax,%xmm11
9483  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
9484  DB  69,15,40,211                        ; movaps        %xmm11,%xmm10
9485  DB  68,15,92,215                        ; subps         %xmm7,%xmm10
9486  DB  69,15,40,194                        ; movaps        %xmm10,%xmm8
9487  DB  68,15,89,192                        ; mulps         %xmm0,%xmm8
9488  DB  68,15,92,219                        ; subps         %xmm3,%xmm11
9489  DB  69,15,40,203                        ; movaps        %xmm11,%xmm9
9490  DB  68,15,89,205                        ; mulps         %xmm5,%xmm9
9491  DB  69,15,88,200                        ; addps         %xmm8,%xmm9
9492  DB  68,15,40,195                        ; movaps        %xmm3,%xmm8
9493  DB  68,15,92,192                        ; subps         %xmm0,%xmm8
9494  DB  15,40,227                           ; movaps        %xmm3,%xmm4
9495  DB  15,89,231                           ; mulps         %xmm7,%xmm4
9496  DB  68,15,40,239                        ; movaps        %xmm7,%xmm13
9497  DB  68,15,40,247                        ; movaps        %xmm7,%xmm14
9498  DB  68,15,40,255                        ; movaps        %xmm7,%xmm15
9499  DB  68,15,92,253                        ; subps         %xmm5,%xmm15
9500  DB  69,15,89,248                        ; mulps         %xmm8,%xmm15
9501  DB  69,15,88,255                        ; addps         %xmm15,%xmm15
9502  DB  68,15,40,228                        ; movaps        %xmm4,%xmm12
9503  DB  69,15,92,231                        ; subps         %xmm15,%xmm12
9504  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
9505  DB  69,15,88,192                        ; addps         %xmm8,%xmm8
9506  DB  68,15,194,195,2                     ; cmpleps       %xmm3,%xmm8
9507  DB  15,89,197                           ; mulps         %xmm5,%xmm0
9508  DB  15,88,192                           ; addps         %xmm0,%xmm0
9509  DB  65,15,84,192                        ; andps         %xmm8,%xmm0
9510  DB  69,15,85,196                        ; andnps        %xmm12,%xmm8
9511  DB  68,15,86,192                        ; orps          %xmm0,%xmm8
9512  DB  69,15,40,251                        ; movaps        %xmm11,%xmm15
9513  DB  69,15,40,227                        ; movaps        %xmm11,%xmm12
9514  DB  68,15,89,223                        ; mulps         %xmm7,%xmm11
9515  DB  69,15,88,193                        ; addps         %xmm9,%xmm8
9516  DB  65,15,40,194                        ; movaps        %xmm10,%xmm0
9517  DB  15,89,193                           ; mulps         %xmm1,%xmm0
9518  DB  68,15,89,254                        ; mulps         %xmm6,%xmm15
9519  DB  68,15,88,248                        ; addps         %xmm0,%xmm15
9520  DB  15,40,195                           ; movaps        %xmm3,%xmm0
9521  DB  15,92,193                           ; subps         %xmm1,%xmm0
9522  DB  68,15,92,238                        ; subps         %xmm6,%xmm13
9523  DB  68,15,89,232                        ; mulps         %xmm0,%xmm13
9524  DB  69,15,88,237                        ; addps         %xmm13,%xmm13
9525  DB  15,40,196                           ; movaps        %xmm4,%xmm0
9526  DB  65,15,92,197                        ; subps         %xmm13,%xmm0
9527  DB  68,15,40,201                        ; movaps        %xmm1,%xmm9
9528  DB  69,15,88,201                        ; addps         %xmm9,%xmm9
9529  DB  68,15,194,203,2                     ; cmpleps       %xmm3,%xmm9
9530  DB  15,89,206                           ; mulps         %xmm6,%xmm1
9531  DB  15,88,201                           ; addps         %xmm1,%xmm1
9532  DB  65,15,84,201                        ; andps         %xmm9,%xmm1
9533  DB  68,15,85,200                        ; andnps        %xmm0,%xmm9
9534  DB  68,15,86,201                        ; orps          %xmm1,%xmm9
9535  DB  69,15,88,207                        ; addps         %xmm15,%xmm9
9536  DB  68,15,89,210                        ; mulps         %xmm2,%xmm10
9537  DB  68,15,40,44,36                      ; movaps        (%rsp),%xmm13
9538  DB  69,15,89,229                        ; mulps         %xmm13,%xmm12
9539  DB  69,15,88,226                        ; addps         %xmm10,%xmm12
9540  DB  68,15,40,210                        ; movaps        %xmm2,%xmm10
9541  DB  69,15,88,210                        ; addps         %xmm10,%xmm10
9542  DB  68,15,194,211,2                     ; cmpleps       %xmm3,%xmm10
9543  DB  15,40,195                           ; movaps        %xmm3,%xmm0
9544  DB  15,92,194                           ; subps         %xmm2,%xmm0
9545  DB  65,15,89,213                        ; mulps         %xmm13,%xmm2
9546  DB  15,88,210                           ; addps         %xmm2,%xmm2
9547  DB  69,15,92,245                        ; subps         %xmm13,%xmm14
9548  DB  68,15,89,240                        ; mulps         %xmm0,%xmm14
9549  DB  69,15,88,246                        ; addps         %xmm14,%xmm14
9550  DB  65,15,92,230                        ; subps         %xmm14,%xmm4
9551  DB  65,15,84,210                        ; andps         %xmm10,%xmm2
9552  DB  68,15,85,212                        ; andnps        %xmm4,%xmm10
9553  DB  68,15,86,210                        ; orps          %xmm2,%xmm10
9554  DB  69,15,88,212                        ; addps         %xmm12,%xmm10
9555  DB  65,15,88,219                        ; addps         %xmm11,%xmm3
9556  DB  72,173                              ; lods          %ds:(%rsi),%rax
9557  DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
9558  DB  65,15,40,201                        ; movaps        %xmm9,%xmm1
9559  DB  65,15,40,210                        ; movaps        %xmm10,%xmm2
9560  DB  15,40,229                           ; movaps        %xmm5,%xmm4
9561  DB  15,40,238                           ; movaps        %xmm6,%xmm5
9562  DB  65,15,40,245                        ; movaps        %xmm13,%xmm6
9563  DB  72,131,196,24                       ; add           $0x18,%rsp
9564  DB  255,224                             ; jmpq          *%rax
9565
9566PUBLIC _sk_overlay_sse2
9567_sk_overlay_sse2 LABEL PROC
9568  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
9569  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
9570  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
9571  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
9572  DB  68,15,92,207                        ; subps         %xmm7,%xmm9
9573  DB  69,15,40,209                        ; movaps        %xmm9,%xmm10
9574  DB  68,15,89,208                        ; mulps         %xmm0,%xmm10
9575  DB  68,15,92,195                        ; subps         %xmm3,%xmm8
9576  DB  69,15,40,216                        ; movaps        %xmm8,%xmm11
9577  DB  68,15,89,220                        ; mulps         %xmm4,%xmm11
9578  DB  69,15,88,218                        ; addps         %xmm10,%xmm11
9579  DB  68,15,40,227                        ; movaps        %xmm3,%xmm12
9580  DB  68,15,92,224                        ; subps         %xmm0,%xmm12
9581  DB  15,89,196                           ; mulps         %xmm4,%xmm0
9582  DB  68,15,40,239                        ; movaps        %xmm7,%xmm13
9583  DB  68,15,92,236                        ; subps         %xmm4,%xmm13
9584  DB  68,15,40,244                        ; movaps        %xmm4,%xmm14
9585  DB  69,15,88,246                        ; addps         %xmm14,%xmm14
9586  DB  68,15,194,247,2                     ; cmpleps       %xmm7,%xmm14
9587  DB  15,88,192                           ; addps         %xmm0,%xmm0
9588  DB  68,15,40,211                        ; movaps        %xmm3,%xmm10
9589  DB  68,15,89,215                        ; mulps         %xmm7,%xmm10
9590  DB  69,15,89,236                        ; mulps         %xmm12,%xmm13
9591  DB  69,15,88,237                        ; addps         %xmm13,%xmm13
9592  DB  69,15,40,226                        ; movaps        %xmm10,%xmm12
9593  DB  69,15,92,229                        ; subps         %xmm13,%xmm12
9594  DB  65,15,84,198                        ; andps         %xmm14,%xmm0
9595  DB  69,15,85,244                        ; andnps        %xmm12,%xmm14
9596  DB  65,15,86,198                        ; orps          %xmm14,%xmm0
9597  DB  65,15,88,195                        ; addps         %xmm11,%xmm0
9598  DB  69,15,40,217                        ; movaps        %xmm9,%xmm11
9599  DB  68,15,89,217                        ; mulps         %xmm1,%xmm11
9600  DB  69,15,40,224                        ; movaps        %xmm8,%xmm12
9601  DB  68,15,89,229                        ; mulps         %xmm5,%xmm12
9602  DB  69,15,88,227                        ; addps         %xmm11,%xmm12
9603  DB  68,15,40,219                        ; movaps        %xmm3,%xmm11
9604  DB  68,15,92,217                        ; subps         %xmm1,%xmm11
9605  DB  15,89,205                           ; mulps         %xmm5,%xmm1
9606  DB  68,15,40,239                        ; movaps        %xmm7,%xmm13
9607  DB  68,15,92,237                        ; subps         %xmm5,%xmm13
9608  DB  68,15,40,245                        ; movaps        %xmm5,%xmm14
9609  DB  69,15,88,246                        ; addps         %xmm14,%xmm14
9610  DB  68,15,194,247,2                     ; cmpleps       %xmm7,%xmm14
9611  DB  15,88,201                           ; addps         %xmm1,%xmm1
9612  DB  69,15,89,235                        ; mulps         %xmm11,%xmm13
9613  DB  69,15,88,237                        ; addps         %xmm13,%xmm13
9614  DB  69,15,40,218                        ; movaps        %xmm10,%xmm11
9615  DB  69,15,92,221                        ; subps         %xmm13,%xmm11
9616  DB  65,15,84,206                        ; andps         %xmm14,%xmm1
9617  DB  69,15,85,243                        ; andnps        %xmm11,%xmm14
9618  DB  65,15,86,206                        ; orps          %xmm14,%xmm1
9619  DB  65,15,88,204                        ; addps         %xmm12,%xmm1
9620  DB  68,15,89,202                        ; mulps         %xmm2,%xmm9
9621  DB  69,15,40,216                        ; movaps        %xmm8,%xmm11
9622  DB  68,15,89,222                        ; mulps         %xmm6,%xmm11
9623  DB  69,15,88,217                        ; addps         %xmm9,%xmm11
9624  DB  68,15,40,203                        ; movaps        %xmm3,%xmm9
9625  DB  68,15,92,202                        ; subps         %xmm2,%xmm9
9626  DB  15,89,214                           ; mulps         %xmm6,%xmm2
9627  DB  68,15,40,231                        ; movaps        %xmm7,%xmm12
9628  DB  68,15,92,230                        ; subps         %xmm6,%xmm12
9629  DB  68,15,40,238                        ; movaps        %xmm6,%xmm13
9630  DB  69,15,88,237                        ; addps         %xmm13,%xmm13
9631  DB  68,15,194,239,2                     ; cmpleps       %xmm7,%xmm13
9632  DB  15,88,210                           ; addps         %xmm2,%xmm2
9633  DB  69,15,89,225                        ; mulps         %xmm9,%xmm12
9634  DB  69,15,88,228                        ; addps         %xmm12,%xmm12
9635  DB  69,15,92,212                        ; subps         %xmm12,%xmm10
9636  DB  65,15,84,213                        ; andps         %xmm13,%xmm2
9637  DB  69,15,85,234                        ; andnps        %xmm10,%xmm13
9638  DB  65,15,86,213                        ; orps          %xmm13,%xmm2
9639  DB  65,15,88,211                        ; addps         %xmm11,%xmm2
9640  DB  68,15,89,199                        ; mulps         %xmm7,%xmm8
9641  DB  65,15,88,216                        ; addps         %xmm8,%xmm3
9642  DB  72,173                              ; lods          %ds:(%rsi),%rax
9643  DB  255,224                             ; jmpq          *%rax
9644
9645PUBLIC _sk_softlight_sse2
9646_sk_softlight_sse2 LABEL PROC
9647  DB  72,131,236,56                       ; sub           $0x38,%rsp
9648  DB  15,41,84,36,32                      ; movaps        %xmm2,0x20(%rsp)
9649  DB  15,40,209                           ; movaps        %xmm1,%xmm2
9650  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
9651  DB  69,15,87,228                        ; xorps         %xmm12,%xmm12
9652  DB  68,15,194,231,1                     ; cmpltps       %xmm7,%xmm12
9653  DB  68,15,40,212                        ; movaps        %xmm4,%xmm10
9654  DB  68,15,94,215                        ; divps         %xmm7,%xmm10
9655  DB  69,15,84,212                        ; andps         %xmm12,%xmm10
9656  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
9657  DB  102,68,15,110,200                   ; movd          %eax,%xmm9
9658  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
9659  DB  69,15,40,241                        ; movaps        %xmm9,%xmm14
9660  DB  69,15,92,242                        ; subps         %xmm10,%xmm14
9661  DB  69,15,40,218                        ; movaps        %xmm10,%xmm11
9662  DB  69,15,40,234                        ; movaps        %xmm10,%xmm13
9663  DB  65,15,82,194                        ; rsqrtps       %xmm10,%xmm0
9664  DB  68,15,83,248                        ; rcpps         %xmm0,%xmm15
9665  DB  69,15,92,250                        ; subps         %xmm10,%xmm15
9666  DB  69,15,88,210                        ; addps         %xmm10,%xmm10
9667  DB  69,15,88,210                        ; addps         %xmm10,%xmm10
9668  DB  65,15,40,194                        ; movaps        %xmm10,%xmm0
9669  DB  15,89,192                           ; mulps         %xmm0,%xmm0
9670  DB  65,15,88,194                        ; addps         %xmm10,%xmm0
9671  DB  69,15,92,217                        ; subps         %xmm9,%xmm11
9672  DB  68,15,89,216                        ; mulps         %xmm0,%xmm11
9673  DB  184,0,0,224,64                      ; mov           $0x40e00000,%eax
9674  DB  102,68,15,110,208                   ; movd          %eax,%xmm10
9675  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
9676  DB  69,15,89,234                        ; mulps         %xmm10,%xmm13
9677  DB  69,15,88,235                        ; addps         %xmm11,%xmm13
9678  DB  68,15,40,219                        ; movaps        %xmm3,%xmm11
9679  DB  15,40,204                           ; movaps        %xmm4,%xmm1
9680  DB  68,15,89,217                        ; mulps         %xmm1,%xmm11
9681  DB  15,88,228                           ; addps         %xmm4,%xmm4
9682  DB  15,88,228                           ; addps         %xmm4,%xmm4
9683  DB  15,194,231,2                        ; cmpleps       %xmm7,%xmm4
9684  DB  68,15,84,236                        ; andps         %xmm4,%xmm13
9685  DB  65,15,85,231                        ; andnps        %xmm15,%xmm4
9686  DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
9687  DB  15,88,192                           ; addps         %xmm0,%xmm0
9688  DB  65,15,86,229                        ; orps          %xmm13,%xmm4
9689  DB  68,15,40,232                        ; movaps        %xmm0,%xmm13
9690  DB  68,15,92,235                        ; subps         %xmm3,%xmm13
9691  DB  69,15,89,245                        ; mulps         %xmm13,%xmm14
9692  DB  68,15,89,239                        ; mulps         %xmm7,%xmm13
9693  DB  65,15,89,229                        ; mulps         %xmm13,%xmm4
9694  DB  65,15,88,227                        ; addps         %xmm11,%xmm4
9695  DB  69,15,40,217                        ; movaps        %xmm9,%xmm11
9696  DB  68,15,92,219                        ; subps         %xmm3,%xmm11
9697  DB  69,15,40,251                        ; movaps        %xmm11,%xmm15
9698  DB  15,41,76,36,16                      ; movaps        %xmm1,0x10(%rsp)
9699  DB  68,15,89,249                        ; mulps         %xmm1,%xmm15
9700  DB  69,15,40,233                        ; movaps        %xmm9,%xmm13
9701  DB  68,15,92,239                        ; subps         %xmm7,%xmm13
9702  DB  69,15,89,197                        ; mulps         %xmm13,%xmm8
9703  DB  69,15,88,199                        ; addps         %xmm15,%xmm8
9704  DB  68,15,88,243                        ; addps         %xmm3,%xmm14
9705  DB  68,15,89,241                        ; mulps         %xmm1,%xmm14
9706  DB  15,194,195,2                        ; cmpleps       %xmm3,%xmm0
9707  DB  68,15,84,240                        ; andps         %xmm0,%xmm14
9708  DB  15,85,196                           ; andnps        %xmm4,%xmm0
9709  DB  65,15,86,198                        ; orps          %xmm14,%xmm0
9710  DB  65,15,88,192                        ; addps         %xmm8,%xmm0
9711  DB  15,41,44,36                         ; movaps        %xmm5,(%rsp)
9712  DB  68,15,40,197                        ; movaps        %xmm5,%xmm8
9713  DB  68,15,94,199                        ; divps         %xmm7,%xmm8
9714  DB  69,15,84,196                        ; andps         %xmm12,%xmm8
9715  DB  69,15,40,240                        ; movaps        %xmm8,%xmm14
9716  DB  69,15,88,246                        ; addps         %xmm14,%xmm14
9717  DB  69,15,88,246                        ; addps         %xmm14,%xmm14
9718  DB  65,15,40,230                        ; movaps        %xmm14,%xmm4
9719  DB  15,89,228                           ; mulps         %xmm4,%xmm4
9720  DB  65,15,88,230                        ; addps         %xmm14,%xmm4
9721  DB  69,15,40,248                        ; movaps        %xmm8,%xmm15
9722  DB  69,15,92,249                        ; subps         %xmm9,%xmm15
9723  DB  68,15,89,252                        ; mulps         %xmm4,%xmm15
9724  DB  69,15,40,241                        ; movaps        %xmm9,%xmm14
9725  DB  69,15,92,240                        ; subps         %xmm8,%xmm14
9726  DB  65,15,82,224                        ; rsqrtps       %xmm8,%xmm4
9727  DB  15,83,228                           ; rcpps         %xmm4,%xmm4
9728  DB  65,15,92,224                        ; subps         %xmm8,%xmm4
9729  DB  69,15,89,194                        ; mulps         %xmm10,%xmm8
9730  DB  69,15,88,199                        ; addps         %xmm15,%xmm8
9731  DB  68,15,40,253                        ; movaps        %xmm5,%xmm15
9732  DB  69,15,88,255                        ; addps         %xmm15,%xmm15
9733  DB  69,15,88,255                        ; addps         %xmm15,%xmm15
9734  DB  68,15,194,255,2                     ; cmpleps       %xmm7,%xmm15
9735  DB  69,15,84,199                        ; andps         %xmm15,%xmm8
9736  DB  68,15,85,252                        ; andnps        %xmm4,%xmm15
9737  DB  69,15,86,248                        ; orps          %xmm8,%xmm15
9738  DB  68,15,40,194                        ; movaps        %xmm2,%xmm8
9739  DB  69,15,88,192                        ; addps         %xmm8,%xmm8
9740  DB  65,15,40,224                        ; movaps        %xmm8,%xmm4
9741  DB  15,92,227                           ; subps         %xmm3,%xmm4
9742  DB  68,15,89,244                        ; mulps         %xmm4,%xmm14
9743  DB  15,89,231                           ; mulps         %xmm7,%xmm4
9744  DB  68,15,89,252                        ; mulps         %xmm4,%xmm15
9745  DB  15,40,227                           ; movaps        %xmm3,%xmm4
9746  DB  15,89,229                           ; mulps         %xmm5,%xmm4
9747  DB  68,15,88,252                        ; addps         %xmm4,%xmm15
9748  DB  65,15,40,227                        ; movaps        %xmm11,%xmm4
9749  DB  15,89,229                           ; mulps         %xmm5,%xmm4
9750  DB  65,15,89,213                        ; mulps         %xmm13,%xmm2
9751  DB  15,88,212                           ; addps         %xmm4,%xmm2
9752  DB  68,15,88,243                        ; addps         %xmm3,%xmm14
9753  DB  68,15,89,245                        ; mulps         %xmm5,%xmm14
9754  DB  68,15,194,195,2                     ; cmpleps       %xmm3,%xmm8
9755  DB  69,15,84,240                        ; andps         %xmm8,%xmm14
9756  DB  69,15,85,199                        ; andnps        %xmm15,%xmm8
9757  DB  69,15,86,198                        ; orps          %xmm14,%xmm8
9758  DB  68,15,88,194                        ; addps         %xmm2,%xmm8
9759  DB  68,15,40,246                        ; movaps        %xmm6,%xmm14
9760  DB  65,15,40,206                        ; movaps        %xmm14,%xmm1
9761  DB  15,94,207                           ; divps         %xmm7,%xmm1
9762  DB  65,15,84,204                        ; andps         %xmm12,%xmm1
9763  DB  15,40,225                           ; movaps        %xmm1,%xmm4
9764  DB  65,15,92,225                        ; subps         %xmm9,%xmm4
9765  DB  68,15,92,201                        ; subps         %xmm1,%xmm9
9766  DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
9767  DB  15,82,241                           ; rsqrtps       %xmm1,%xmm6
9768  DB  15,83,246                           ; rcpps         %xmm6,%xmm6
9769  DB  15,92,241                           ; subps         %xmm1,%xmm6
9770  DB  15,88,201                           ; addps         %xmm1,%xmm1
9771  DB  15,88,201                           ; addps         %xmm1,%xmm1
9772  DB  15,40,233                           ; movaps        %xmm1,%xmm5
9773  DB  15,89,237                           ; mulps         %xmm5,%xmm5
9774  DB  15,88,233                           ; addps         %xmm1,%xmm5
9775  DB  15,89,236                           ; mulps         %xmm4,%xmm5
9776  DB  68,15,88,213                        ; addps         %xmm5,%xmm10
9777  DB  65,15,40,238                        ; movaps        %xmm14,%xmm5
9778  DB  15,40,205                           ; movaps        %xmm5,%xmm1
9779  DB  15,88,201                           ; addps         %xmm1,%xmm1
9780  DB  15,88,201                           ; addps         %xmm1,%xmm1
9781  DB  15,194,207,2                        ; cmpleps       %xmm7,%xmm1
9782  DB  68,15,84,209                        ; andps         %xmm1,%xmm10
9783  DB  15,85,206                           ; andnps        %xmm6,%xmm1
9784  DB  15,40,84,36,32                      ; movaps        0x20(%rsp),%xmm2
9785  DB  68,15,89,234                        ; mulps         %xmm2,%xmm13
9786  DB  15,88,210                           ; addps         %xmm2,%xmm2
9787  DB  65,15,86,202                        ; orps          %xmm10,%xmm1
9788  DB  15,40,226                           ; movaps        %xmm2,%xmm4
9789  DB  15,92,227                           ; subps         %xmm3,%xmm4
9790  DB  68,15,89,204                        ; mulps         %xmm4,%xmm9
9791  DB  15,89,231                           ; mulps         %xmm7,%xmm4
9792  DB  15,89,204                           ; mulps         %xmm4,%xmm1
9793  DB  15,40,227                           ; movaps        %xmm3,%xmm4
9794  DB  15,89,229                           ; mulps         %xmm5,%xmm4
9795  DB  15,88,204                           ; addps         %xmm4,%xmm1
9796  DB  65,15,40,227                        ; movaps        %xmm11,%xmm4
9797  DB  15,89,229                           ; mulps         %xmm5,%xmm4
9798  DB  65,15,88,229                        ; addps         %xmm13,%xmm4
9799  DB  68,15,88,203                        ; addps         %xmm3,%xmm9
9800  DB  68,15,89,205                        ; mulps         %xmm5,%xmm9
9801  DB  15,40,245                           ; movaps        %xmm5,%xmm6
9802  DB  15,194,211,2                        ; cmpleps       %xmm3,%xmm2
9803  DB  68,15,84,202                        ; andps         %xmm2,%xmm9
9804  DB  15,85,209                           ; andnps        %xmm1,%xmm2
9805  DB  65,15,86,209                        ; orps          %xmm9,%xmm2
9806  DB  15,88,212                           ; addps         %xmm4,%xmm2
9807  DB  68,15,89,223                        ; mulps         %xmm7,%xmm11
9808  DB  65,15,88,219                        ; addps         %xmm11,%xmm3
9809  DB  72,173                              ; lods          %ds:(%rsi),%rax
9810  DB  15,40,100,36,16                     ; movaps        0x10(%rsp),%xmm4
9811  DB  15,40,44,36                         ; movaps        (%rsp),%xmm5
9812  DB  65,15,40,200                        ; movaps        %xmm8,%xmm1
9813  DB  72,131,196,56                       ; add           $0x38,%rsp
9814  DB  255,224                             ; jmpq          *%rax
9815
9816PUBLIC _sk_clamp_0_sse2
9817_sk_clamp_0_sse2 LABEL PROC
9818  DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
9819  DB  65,15,95,192                        ; maxps         %xmm8,%xmm0
9820  DB  65,15,95,200                        ; maxps         %xmm8,%xmm1
9821  DB  65,15,95,208                        ; maxps         %xmm8,%xmm2
9822  DB  65,15,95,216                        ; maxps         %xmm8,%xmm3
9823  DB  72,173                              ; lods          %ds:(%rsi),%rax
9824  DB  255,224                             ; jmpq          *%rax
9825
9826PUBLIC _sk_clamp_1_sse2
9827_sk_clamp_1_sse2 LABEL PROC
9828  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
9829  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
9830  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
9831  DB  65,15,93,192                        ; minps         %xmm8,%xmm0
9832  DB  65,15,93,200                        ; minps         %xmm8,%xmm1
9833  DB  65,15,93,208                        ; minps         %xmm8,%xmm2
9834  DB  65,15,93,216                        ; minps         %xmm8,%xmm3
9835  DB  72,173                              ; lods          %ds:(%rsi),%rax
9836  DB  255,224                             ; jmpq          *%rax
9837
9838PUBLIC _sk_clamp_a_sse2
9839_sk_clamp_a_sse2 LABEL PROC
9840  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
9841  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
9842  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
9843  DB  65,15,93,216                        ; minps         %xmm8,%xmm3
9844  DB  15,93,195                           ; minps         %xmm3,%xmm0
9845  DB  15,93,203                           ; minps         %xmm3,%xmm1
9846  DB  15,93,211                           ; minps         %xmm3,%xmm2
9847  DB  72,173                              ; lods          %ds:(%rsi),%rax
9848  DB  255,224                             ; jmpq          *%rax
9849
9850PUBLIC _sk_set_rgb_sse2
9851_sk_set_rgb_sse2 LABEL PROC
9852  DB  72,173                              ; lods          %ds:(%rsi),%rax
9853  DB  243,15,16,0                         ; movss         (%rax),%xmm0
9854  DB  243,15,16,72,4                      ; movss         0x4(%rax),%xmm1
9855  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
9856  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
9857  DB  243,15,16,80,8                      ; movss         0x8(%rax),%xmm2
9858  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
9859  DB  72,173                              ; lods          %ds:(%rsi),%rax
9860  DB  255,224                             ; jmpq          *%rax
9861
9862PUBLIC _sk_swap_rb_sse2
9863_sk_swap_rb_sse2 LABEL PROC
9864  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
9865  DB  72,173                              ; lods          %ds:(%rsi),%rax
9866  DB  15,40,194                           ; movaps        %xmm2,%xmm0
9867  DB  65,15,40,208                        ; movaps        %xmm8,%xmm2
9868  DB  255,224                             ; jmpq          *%rax
9869
9870PUBLIC _sk_swap_sse2
9871_sk_swap_sse2 LABEL PROC
9872  DB  68,15,40,195                        ; movaps        %xmm3,%xmm8
9873  DB  68,15,40,202                        ; movaps        %xmm2,%xmm9
9874  DB  68,15,40,209                        ; movaps        %xmm1,%xmm10
9875  DB  68,15,40,216                        ; movaps        %xmm0,%xmm11
9876  DB  72,173                              ; lods          %ds:(%rsi),%rax
9877  DB  15,40,196                           ; movaps        %xmm4,%xmm0
9878  DB  15,40,205                           ; movaps        %xmm5,%xmm1
9879  DB  15,40,214                           ; movaps        %xmm6,%xmm2
9880  DB  15,40,223                           ; movaps        %xmm7,%xmm3
9881  DB  65,15,40,227                        ; movaps        %xmm11,%xmm4
9882  DB  65,15,40,234                        ; movaps        %xmm10,%xmm5
9883  DB  65,15,40,241                        ; movaps        %xmm9,%xmm6
9884  DB  65,15,40,248                        ; movaps        %xmm8,%xmm7
9885  DB  255,224                             ; jmpq          *%rax
9886
9887PUBLIC _sk_move_src_dst_sse2
9888_sk_move_src_dst_sse2 LABEL PROC
9889  DB  72,173                              ; lods          %ds:(%rsi),%rax
9890  DB  15,40,224                           ; movaps        %xmm0,%xmm4
9891  DB  15,40,233                           ; movaps        %xmm1,%xmm5
9892  DB  15,40,242                           ; movaps        %xmm2,%xmm6
9893  DB  15,40,251                           ; movaps        %xmm3,%xmm7
9894  DB  255,224                             ; jmpq          *%rax
9895
9896PUBLIC _sk_move_dst_src_sse2
9897_sk_move_dst_src_sse2 LABEL PROC
9898  DB  72,173                              ; lods          %ds:(%rsi),%rax
9899  DB  15,40,196                           ; movaps        %xmm4,%xmm0
9900  DB  15,40,205                           ; movaps        %xmm5,%xmm1
9901  DB  15,40,214                           ; movaps        %xmm6,%xmm2
9902  DB  15,40,223                           ; movaps        %xmm7,%xmm3
9903  DB  255,224                             ; jmpq          *%rax
9904
9905PUBLIC _sk_premul_sse2
9906_sk_premul_sse2 LABEL PROC
9907  DB  15,89,195                           ; mulps         %xmm3,%xmm0
9908  DB  15,89,203                           ; mulps         %xmm3,%xmm1
9909  DB  15,89,211                           ; mulps         %xmm3,%xmm2
9910  DB  72,173                              ; lods          %ds:(%rsi),%rax
9911  DB  255,224                             ; jmpq          *%rax
9912
9913PUBLIC _sk_unpremul_sse2
9914_sk_unpremul_sse2 LABEL PROC
9915  DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
9916  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
9917  DB  102,68,15,110,200                   ; movd          %eax,%xmm9
9918  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
9919  DB  68,15,94,203                        ; divps         %xmm3,%xmm9
9920  DB  68,15,194,195,4                     ; cmpneqps      %xmm3,%xmm8
9921  DB  69,15,84,193                        ; andps         %xmm9,%xmm8
9922  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
9923  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
9924  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
9925  DB  72,173                              ; lods          %ds:(%rsi),%rax
9926  DB  255,224                             ; jmpq          *%rax
9927
9928PUBLIC _sk_from_srgb_sse2
9929_sk_from_srgb_sse2 LABEL PROC
9930  DB  184,145,131,158,61                  ; mov           $0x3d9e8391,%eax
9931  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
9932  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
9933  DB  69,15,40,232                        ; movaps        %xmm8,%xmm13
9934  DB  68,15,89,232                        ; mulps         %xmm0,%xmm13
9935  DB  68,15,40,224                        ; movaps        %xmm0,%xmm12
9936  DB  69,15,89,228                        ; mulps         %xmm12,%xmm12
9937  DB  184,154,153,153,62                  ; mov           $0x3e99999a,%eax
9938  DB  102,68,15,110,200                   ; movd          %eax,%xmm9
9939  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
9940  DB  184,92,143,50,63                    ; mov           $0x3f328f5c,%eax
9941  DB  102,68,15,110,208                   ; movd          %eax,%xmm10
9942  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
9943  DB  69,15,40,241                        ; movaps        %xmm9,%xmm14
9944  DB  68,15,89,240                        ; mulps         %xmm0,%xmm14
9945  DB  69,15,88,242                        ; addps         %xmm10,%xmm14
9946  DB  184,10,215,35,59                    ; mov           $0x3b23d70a,%eax
9947  DB  102,68,15,110,216                   ; movd          %eax,%xmm11
9948  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
9949  DB  69,15,89,244                        ; mulps         %xmm12,%xmm14
9950  DB  69,15,88,243                        ; addps         %xmm11,%xmm14
9951  DB  184,174,71,97,61                    ; mov           $0x3d6147ae,%eax
9952  DB  102,68,15,110,224                   ; movd          %eax,%xmm12
9953  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
9954  DB  65,15,194,196,1                     ; cmpltps       %xmm12,%xmm0
9955  DB  68,15,84,232                        ; andps         %xmm0,%xmm13
9956  DB  65,15,85,198                        ; andnps        %xmm14,%xmm0
9957  DB  65,15,86,197                        ; orps          %xmm13,%xmm0
9958  DB  69,15,40,232                        ; movaps        %xmm8,%xmm13
9959  DB  68,15,89,233                        ; mulps         %xmm1,%xmm13
9960  DB  68,15,40,241                        ; movaps        %xmm1,%xmm14
9961  DB  69,15,89,246                        ; mulps         %xmm14,%xmm14
9962  DB  69,15,40,249                        ; movaps        %xmm9,%xmm15
9963  DB  68,15,89,249                        ; mulps         %xmm1,%xmm15
9964  DB  69,15,88,250                        ; addps         %xmm10,%xmm15
9965  DB  69,15,89,254                        ; mulps         %xmm14,%xmm15
9966  DB  69,15,88,251                        ; addps         %xmm11,%xmm15
9967  DB  65,15,194,204,1                     ; cmpltps       %xmm12,%xmm1
9968  DB  68,15,84,233                        ; andps         %xmm1,%xmm13
9969  DB  65,15,85,207                        ; andnps        %xmm15,%xmm1
9970  DB  65,15,86,205                        ; orps          %xmm13,%xmm1
9971  DB  68,15,89,194                        ; mulps         %xmm2,%xmm8
9972  DB  68,15,40,234                        ; movaps        %xmm2,%xmm13
9973  DB  69,15,89,237                        ; mulps         %xmm13,%xmm13
9974  DB  68,15,89,202                        ; mulps         %xmm2,%xmm9
9975  DB  69,15,88,202                        ; addps         %xmm10,%xmm9
9976  DB  69,15,89,205                        ; mulps         %xmm13,%xmm9
9977  DB  69,15,88,203                        ; addps         %xmm11,%xmm9
9978  DB  65,15,194,212,1                     ; cmpltps       %xmm12,%xmm2
9979  DB  68,15,84,194                        ; andps         %xmm2,%xmm8
9980  DB  65,15,85,209                        ; andnps        %xmm9,%xmm2
9981  DB  65,15,86,208                        ; orps          %xmm8,%xmm2
9982  DB  72,173                              ; lods          %ds:(%rsi),%rax
9983  DB  255,224                             ; jmpq          *%rax
9984
9985PUBLIC _sk_to_srgb_sse2
9986_sk_to_srgb_sse2 LABEL PROC
9987  DB  68,15,82,192                        ; rsqrtps       %xmm0,%xmm8
9988  DB  69,15,83,248                        ; rcpps         %xmm8,%xmm15
9989  DB  69,15,82,232                        ; rsqrtps       %xmm8,%xmm13
9990  DB  184,41,92,71,65                     ; mov           $0x41475c29,%eax
9991  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
9992  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
9993  DB  69,15,40,240                        ; movaps        %xmm8,%xmm14
9994  DB  68,15,89,240                        ; mulps         %xmm0,%xmm14
9995  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
9996  DB  102,68,15,110,200                   ; movd          %eax,%xmm9
9997  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
9998  DB  184,194,135,210,62                  ; mov           $0x3ed287c2,%eax
9999  DB  102,68,15,110,208                   ; movd          %eax,%xmm10
10000  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
10001  DB  184,206,111,48,63                   ; mov           $0x3f306fce,%eax
10002  DB  102,68,15,110,216                   ; movd          %eax,%xmm11
10003  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
10004  DB  184,168,87,202,61                   ; mov           $0x3dca57a8,%eax
10005  DB  53,0,0,0,128                        ; xor           $0x80000000,%eax
10006  DB  102,68,15,110,224                   ; movd          %eax,%xmm12
10007  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
10008  DB  69,15,89,251                        ; mulps         %xmm11,%xmm15
10009  DB  69,15,88,252                        ; addps         %xmm12,%xmm15
10010  DB  69,15,89,234                        ; mulps         %xmm10,%xmm13
10011  DB  69,15,88,239                        ; addps         %xmm15,%xmm13
10012  DB  69,15,40,249                        ; movaps        %xmm9,%xmm15
10013  DB  69,15,93,253                        ; minps         %xmm13,%xmm15
10014  DB  184,4,231,140,59                    ; mov           $0x3b8ce704,%eax
10015  DB  102,68,15,110,232                   ; movd          %eax,%xmm13
10016  DB  69,15,198,237,0                     ; shufps        $0x0,%xmm13,%xmm13
10017  DB  65,15,194,197,1                     ; cmpltps       %xmm13,%xmm0
10018  DB  68,15,84,240                        ; andps         %xmm0,%xmm14
10019  DB  65,15,85,199                        ; andnps        %xmm15,%xmm0
10020  DB  65,15,86,198                        ; orps          %xmm14,%xmm0
10021  DB  68,15,82,241                        ; rsqrtps       %xmm1,%xmm14
10022  DB  69,15,83,254                        ; rcpps         %xmm14,%xmm15
10023  DB  69,15,82,246                        ; rsqrtps       %xmm14,%xmm14
10024  DB  69,15,89,251                        ; mulps         %xmm11,%xmm15
10025  DB  69,15,88,252                        ; addps         %xmm12,%xmm15
10026  DB  69,15,89,242                        ; mulps         %xmm10,%xmm14
10027  DB  69,15,88,247                        ; addps         %xmm15,%xmm14
10028  DB  69,15,40,249                        ; movaps        %xmm9,%xmm15
10029  DB  69,15,93,254                        ; minps         %xmm14,%xmm15
10030  DB  69,15,40,240                        ; movaps        %xmm8,%xmm14
10031  DB  68,15,89,241                        ; mulps         %xmm1,%xmm14
10032  DB  65,15,194,205,1                     ; cmpltps       %xmm13,%xmm1
10033  DB  68,15,84,241                        ; andps         %xmm1,%xmm14
10034  DB  65,15,85,207                        ; andnps        %xmm15,%xmm1
10035  DB  65,15,86,206                        ; orps          %xmm14,%xmm1
10036  DB  68,15,82,242                        ; rsqrtps       %xmm2,%xmm14
10037  DB  69,15,83,254                        ; rcpps         %xmm14,%xmm15
10038  DB  69,15,89,251                        ; mulps         %xmm11,%xmm15
10039  DB  69,15,88,252                        ; addps         %xmm12,%xmm15
10040  DB  69,15,82,222                        ; rsqrtps       %xmm14,%xmm11
10041  DB  69,15,89,218                        ; mulps         %xmm10,%xmm11
10042  DB  69,15,88,223                        ; addps         %xmm15,%xmm11
10043  DB  69,15,93,203                        ; minps         %xmm11,%xmm9
10044  DB  68,15,89,194                        ; mulps         %xmm2,%xmm8
10045  DB  65,15,194,213,1                     ; cmpltps       %xmm13,%xmm2
10046  DB  68,15,84,194                        ; andps         %xmm2,%xmm8
10047  DB  65,15,85,209                        ; andnps        %xmm9,%xmm2
10048  DB  65,15,86,208                        ; orps          %xmm8,%xmm2
10049  DB  72,173                              ; lods          %ds:(%rsi),%rax
10050  DB  255,224                             ; jmpq          *%rax
10051
10052PUBLIC _sk_from_2dot2_sse2
10053_sk_from_2dot2_sse2 LABEL PROC
10054  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
10055  DB  65,15,82,192                        ; rsqrtps       %xmm8,%xmm0
10056  DB  15,82,192                           ; rsqrtps       %xmm0,%xmm0
10057  DB  15,82,192                           ; rsqrtps       %xmm0,%xmm0
10058  DB  68,15,82,200                        ; rsqrtps       %xmm0,%xmm9
10059  DB  65,15,82,193                        ; rsqrtps       %xmm9,%xmm0
10060  DB  68,15,82,208                        ; rsqrtps       %xmm0,%xmm10
10061  DB  69,15,89,192                        ; mulps         %xmm8,%xmm8
10062  DB  65,15,40,193                        ; movaps        %xmm9,%xmm0
10063  DB  15,89,192                           ; mulps         %xmm0,%xmm0
10064  DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
10065  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
10066  DB  65,15,89,194                        ; mulps         %xmm10,%xmm0
10067  DB  69,15,87,210                        ; xorps         %xmm10,%xmm10
10068  DB  65,15,95,194                        ; maxps         %xmm10,%xmm0
10069  DB  68,15,82,193                        ; rsqrtps       %xmm1,%xmm8
10070  DB  69,15,82,192                        ; rsqrtps       %xmm8,%xmm8
10071  DB  69,15,82,192                        ; rsqrtps       %xmm8,%xmm8
10072  DB  69,15,82,200                        ; rsqrtps       %xmm8,%xmm9
10073  DB  69,15,82,193                        ; rsqrtps       %xmm9,%xmm8
10074  DB  69,15,82,216                        ; rsqrtps       %xmm8,%xmm11
10075  DB  15,89,201                           ; mulps         %xmm1,%xmm1
10076  DB  69,15,40,193                        ; movaps        %xmm9,%xmm8
10077  DB  69,15,89,192                        ; mulps         %xmm8,%xmm8
10078  DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
10079  DB  68,15,89,193                        ; mulps         %xmm1,%xmm8
10080  DB  69,15,89,195                        ; mulps         %xmm11,%xmm8
10081  DB  69,15,95,194                        ; maxps         %xmm10,%xmm8
10082  DB  15,82,202                           ; rsqrtps       %xmm2,%xmm1
10083  DB  15,82,201                           ; rsqrtps       %xmm1,%xmm1
10084  DB  15,82,201                           ; rsqrtps       %xmm1,%xmm1
10085  DB  68,15,82,217                        ; rsqrtps       %xmm1,%xmm11
10086  DB  65,15,82,203                        ; rsqrtps       %xmm11,%xmm1
10087  DB  15,82,201                           ; rsqrtps       %xmm1,%xmm1
10088  DB  15,89,210                           ; mulps         %xmm2,%xmm2
10089  DB  69,15,40,203                        ; movaps        %xmm11,%xmm9
10090  DB  69,15,89,201                        ; mulps         %xmm9,%xmm9
10091  DB  69,15,89,203                        ; mulps         %xmm11,%xmm9
10092  DB  68,15,89,202                        ; mulps         %xmm2,%xmm9
10093  DB  68,15,89,201                        ; mulps         %xmm1,%xmm9
10094  DB  69,15,95,202                        ; maxps         %xmm10,%xmm9
10095  DB  72,173                              ; lods          %ds:(%rsi),%rax
10096  DB  65,15,40,200                        ; movaps        %xmm8,%xmm1
10097  DB  65,15,40,209                        ; movaps        %xmm9,%xmm2
10098  DB  255,224                             ; jmpq          *%rax
10099
10100PUBLIC _sk_to_2dot2_sse2
10101_sk_to_2dot2_sse2 LABEL PROC
10102  DB  68,15,82,192                        ; rsqrtps       %xmm0,%xmm8
10103  DB  65,15,82,192                        ; rsqrtps       %xmm8,%xmm0
10104  DB  15,82,192                           ; rsqrtps       %xmm0,%xmm0
10105  DB  15,82,192                           ; rsqrtps       %xmm0,%xmm0
10106  DB  15,82,192                           ; rsqrtps       %xmm0,%xmm0
10107  DB  68,15,82,200                        ; rsqrtps       %xmm0,%xmm9
10108  DB  69,15,83,192                        ; rcpps         %xmm8,%xmm8
10109  DB  68,15,89,192                        ; mulps         %xmm0,%xmm8
10110  DB  65,15,83,193                        ; rcpps         %xmm9,%xmm0
10111  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
10112  DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
10113  DB  65,15,95,192                        ; maxps         %xmm8,%xmm0
10114  DB  68,15,82,201                        ; rsqrtps       %xmm1,%xmm9
10115  DB  65,15,82,201                        ; rsqrtps       %xmm9,%xmm1
10116  DB  15,82,201                           ; rsqrtps       %xmm1,%xmm1
10117  DB  15,82,201                           ; rsqrtps       %xmm1,%xmm1
10118  DB  15,82,201                           ; rsqrtps       %xmm1,%xmm1
10119  DB  68,15,82,209                        ; rsqrtps       %xmm1,%xmm10
10120  DB  69,15,83,201                        ; rcpps         %xmm9,%xmm9
10121  DB  68,15,89,201                        ; mulps         %xmm1,%xmm9
10122  DB  65,15,83,202                        ; rcpps         %xmm10,%xmm1
10123  DB  65,15,89,201                        ; mulps         %xmm9,%xmm1
10124  DB  65,15,95,200                        ; maxps         %xmm8,%xmm1
10125  DB  68,15,82,202                        ; rsqrtps       %xmm2,%xmm9
10126  DB  65,15,82,209                        ; rsqrtps       %xmm9,%xmm2
10127  DB  15,82,210                           ; rsqrtps       %xmm2,%xmm2
10128  DB  15,82,210                           ; rsqrtps       %xmm2,%xmm2
10129  DB  15,82,210                           ; rsqrtps       %xmm2,%xmm2
10130  DB  68,15,82,210                        ; rsqrtps       %xmm2,%xmm10
10131  DB  69,15,83,201                        ; rcpps         %xmm9,%xmm9
10132  DB  68,15,89,202                        ; mulps         %xmm2,%xmm9
10133  DB  65,15,83,210                        ; rcpps         %xmm10,%xmm2
10134  DB  65,15,89,209                        ; mulps         %xmm9,%xmm2
10135  DB  65,15,95,208                        ; maxps         %xmm8,%xmm2
10136  DB  72,173                              ; lods          %ds:(%rsi),%rax
10137  DB  255,224                             ; jmpq          *%rax
10138
10139PUBLIC _sk_scale_1_float_sse2
10140_sk_scale_1_float_sse2 LABEL PROC
10141  DB  72,173                              ; lods          %ds:(%rsi),%rax
10142  DB  243,68,15,16,0                      ; movss         (%rax),%xmm8
10143  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
10144  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
10145  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
10146  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
10147  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
10148  DB  72,173                              ; lods          %ds:(%rsi),%rax
10149  DB  255,224                             ; jmpq          *%rax
10150
10151PUBLIC _sk_scale_u8_sse2
10152_sk_scale_u8_sse2 LABEL PROC
10153  DB  72,173                              ; lods          %ds:(%rsi),%rax
10154  DB  72,139,0                            ; mov           (%rax),%rax
10155  DB  102,68,15,110,4,56                  ; movd          (%rax,%rdi,1),%xmm8
10156  DB  102,69,15,239,201                   ; pxor          %xmm9,%xmm9
10157  DB  102,69,15,96,193                    ; punpcklbw     %xmm9,%xmm8
10158  DB  102,69,15,97,193                    ; punpcklwd     %xmm9,%xmm8
10159  DB  69,15,91,192                        ; cvtdq2ps      %xmm8,%xmm8
10160  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
10161  DB  102,68,15,110,200                   ; movd          %eax,%xmm9
10162  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
10163  DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
10164  DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
10165  DB  65,15,89,201                        ; mulps         %xmm9,%xmm1
10166  DB  65,15,89,209                        ; mulps         %xmm9,%xmm2
10167  DB  65,15,89,217                        ; mulps         %xmm9,%xmm3
10168  DB  72,173                              ; lods          %ds:(%rsi),%rax
10169  DB  255,224                             ; jmpq          *%rax
10170
10171PUBLIC _sk_lerp_1_float_sse2
10172_sk_lerp_1_float_sse2 LABEL PROC
10173  DB  72,173                              ; lods          %ds:(%rsi),%rax
10174  DB  243,68,15,16,0                      ; movss         (%rax),%xmm8
10175  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
10176  DB  15,92,196                           ; subps         %xmm4,%xmm0
10177  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
10178  DB  15,88,196                           ; addps         %xmm4,%xmm0
10179  DB  15,92,205                           ; subps         %xmm5,%xmm1
10180  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
10181  DB  15,88,205                           ; addps         %xmm5,%xmm1
10182  DB  15,92,214                           ; subps         %xmm6,%xmm2
10183  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
10184  DB  15,88,214                           ; addps         %xmm6,%xmm2
10185  DB  15,92,223                           ; subps         %xmm7,%xmm3
10186  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
10187  DB  15,88,223                           ; addps         %xmm7,%xmm3
10188  DB  72,173                              ; lods          %ds:(%rsi),%rax
10189  DB  255,224                             ; jmpq          *%rax
10190
10191PUBLIC _sk_lerp_u8_sse2
10192_sk_lerp_u8_sse2 LABEL PROC
10193  DB  72,173                              ; lods          %ds:(%rsi),%rax
10194  DB  72,139,0                            ; mov           (%rax),%rax
10195  DB  102,68,15,110,4,56                  ; movd          (%rax,%rdi,1),%xmm8
10196  DB  102,69,15,239,201                   ; pxor          %xmm9,%xmm9
10197  DB  102,69,15,96,193                    ; punpcklbw     %xmm9,%xmm8
10198  DB  102,69,15,97,193                    ; punpcklwd     %xmm9,%xmm8
10199  DB  69,15,91,192                        ; cvtdq2ps      %xmm8,%xmm8
10200  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
10201  DB  102,68,15,110,200                   ; movd          %eax,%xmm9
10202  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
10203  DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
10204  DB  15,92,196                           ; subps         %xmm4,%xmm0
10205  DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
10206  DB  15,88,196                           ; addps         %xmm4,%xmm0
10207  DB  15,92,205                           ; subps         %xmm5,%xmm1
10208  DB  65,15,89,201                        ; mulps         %xmm9,%xmm1
10209  DB  15,88,205                           ; addps         %xmm5,%xmm1
10210  DB  15,92,214                           ; subps         %xmm6,%xmm2
10211  DB  65,15,89,209                        ; mulps         %xmm9,%xmm2
10212  DB  15,88,214                           ; addps         %xmm6,%xmm2
10213  DB  15,92,223                           ; subps         %xmm7,%xmm3
10214  DB  65,15,89,217                        ; mulps         %xmm9,%xmm3
10215  DB  15,88,223                           ; addps         %xmm7,%xmm3
10216  DB  72,173                              ; lods          %ds:(%rsi),%rax
10217  DB  255,224                             ; jmpq          *%rax
10218
10219PUBLIC _sk_lerp_565_sse2
10220_sk_lerp_565_sse2 LABEL PROC
10221  DB  72,173                              ; lods          %ds:(%rsi),%rax
10222  DB  72,139,0                            ; mov           (%rax),%rax
10223  DB  243,68,15,126,4,120                 ; movq          (%rax,%rdi,2),%xmm8
10224  DB  102,15,239,219                      ; pxor          %xmm3,%xmm3
10225  DB  102,68,15,97,195                    ; punpcklwd     %xmm3,%xmm8
10226  DB  184,0,248,0,0                       ; mov           $0xf800,%eax
10227  DB  102,15,110,216                      ; movd          %eax,%xmm3
10228  DB  102,15,112,219,0                    ; pshufd        $0x0,%xmm3,%xmm3
10229  DB  102,65,15,219,216                   ; pand          %xmm8,%xmm3
10230  DB  68,15,91,203                        ; cvtdq2ps      %xmm3,%xmm9
10231  DB  184,8,33,132,55                     ; mov           $0x37842108,%eax
10232  DB  102,68,15,110,208                   ; movd          %eax,%xmm10
10233  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
10234  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
10235  DB  184,224,7,0,0                       ; mov           $0x7e0,%eax
10236  DB  102,15,110,216                      ; movd          %eax,%xmm3
10237  DB  102,15,112,219,0                    ; pshufd        $0x0,%xmm3,%xmm3
10238  DB  102,65,15,219,216                   ; pand          %xmm8,%xmm3
10239  DB  68,15,91,203                        ; cvtdq2ps      %xmm3,%xmm9
10240  DB  184,33,8,2,58                       ; mov           $0x3a020821,%eax
10241  DB  102,68,15,110,216                   ; movd          %eax,%xmm11
10242  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
10243  DB  69,15,89,217                        ; mulps         %xmm9,%xmm11
10244  DB  184,31,0,0,0                        ; mov           $0x1f,%eax
10245  DB  102,15,110,216                      ; movd          %eax,%xmm3
10246  DB  102,15,112,219,0                    ; pshufd        $0x0,%xmm3,%xmm3
10247  DB  102,65,15,219,216                   ; pand          %xmm8,%xmm3
10248  DB  68,15,91,195                        ; cvtdq2ps      %xmm3,%xmm8
10249  DB  184,8,33,4,61                       ; mov           $0x3d042108,%eax
10250  DB  102,15,110,216                      ; movd          %eax,%xmm3
10251  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
10252  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
10253  DB  15,92,196                           ; subps         %xmm4,%xmm0
10254  DB  65,15,89,194                        ; mulps         %xmm10,%xmm0
10255  DB  15,88,196                           ; addps         %xmm4,%xmm0
10256  DB  15,92,205                           ; subps         %xmm5,%xmm1
10257  DB  65,15,89,203                        ; mulps         %xmm11,%xmm1
10258  DB  15,88,205                           ; addps         %xmm5,%xmm1
10259  DB  15,92,214                           ; subps         %xmm6,%xmm2
10260  DB  15,89,211                           ; mulps         %xmm3,%xmm2
10261  DB  15,88,214                           ; addps         %xmm6,%xmm2
10262  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
10263  DB  102,15,110,216                      ; movd          %eax,%xmm3
10264  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
10265  DB  72,173                              ; lods          %ds:(%rsi),%rax
10266  DB  255,224                             ; jmpq          *%rax
10267
10268PUBLIC _sk_load_tables_sse2
10269_sk_load_tables_sse2 LABEL PROC
10270  DB  72,173                              ; lods          %ds:(%rsi),%rax
10271  DB  72,139,8                            ; mov           (%rax),%rcx
10272  DB  76,139,64,8                         ; mov           0x8(%rax),%r8
10273  DB  243,68,15,111,4,185                 ; movdqu        (%rcx,%rdi,4),%xmm8
10274  DB  185,255,0,0,0                       ; mov           $0xff,%ecx
10275  DB  102,15,110,193                      ; movd          %ecx,%xmm0
10276  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
10277  DB  102,69,15,111,200                   ; movdqa        %xmm8,%xmm9
10278  DB  102,65,15,114,209,8                 ; psrld         $0x8,%xmm9
10279  DB  102,68,15,219,200                   ; pand          %xmm0,%xmm9
10280  DB  102,69,15,111,208                   ; movdqa        %xmm8,%xmm10
10281  DB  102,65,15,114,210,16                ; psrld         $0x10,%xmm10
10282  DB  102,68,15,219,208                   ; pand          %xmm0,%xmm10
10283  DB  102,65,15,219,192                   ; pand          %xmm8,%xmm0
10284  DB  102,15,112,216,78                   ; pshufd        $0x4e,%xmm0,%xmm3
10285  DB  102,72,15,126,217                   ; movq          %xmm3,%rcx
10286  DB  65,137,201                          ; mov           %ecx,%r9d
10287  DB  72,193,233,32                       ; shr           $0x20,%rcx
10288  DB  102,73,15,126,194                   ; movq          %xmm0,%r10
10289  DB  69,137,211                          ; mov           %r10d,%r11d
10290  DB  73,193,234,32                       ; shr           $0x20,%r10
10291  DB  243,67,15,16,28,144                 ; movss         (%r8,%r10,4),%xmm3
10292  DB  243,65,15,16,4,136                  ; movss         (%r8,%rcx,4),%xmm0
10293  DB  15,20,216                           ; unpcklps      %xmm0,%xmm3
10294  DB  243,67,15,16,4,152                  ; movss         (%r8,%r11,4),%xmm0
10295  DB  243,67,15,16,12,136                 ; movss         (%r8,%r9,4),%xmm1
10296  DB  15,20,193                           ; unpcklps      %xmm1,%xmm0
10297  DB  15,20,195                           ; unpcklps      %xmm3,%xmm0
10298  DB  76,139,64,16                        ; mov           0x10(%rax),%r8
10299  DB  102,65,15,112,201,78                ; pshufd        $0x4e,%xmm9,%xmm1
10300  DB  102,73,15,126,202                   ; movq          %xmm1,%r10
10301  DB  77,137,209                          ; mov           %r10,%r9
10302  DB  73,193,233,32                       ; shr           $0x20,%r9
10303  DB  102,76,15,126,201                   ; movq          %xmm9,%rcx
10304  DB  65,137,203                          ; mov           %ecx,%r11d
10305  DB  65,129,227,255,255,255,0            ; and           $0xffffff,%r11d
10306  DB  72,193,233,30                       ; shr           $0x1e,%rcx
10307  DB  65,129,226,255,255,255,0            ; and           $0xffffff,%r10d
10308  DB  243,65,15,16,28,8                   ; movss         (%r8,%rcx,1),%xmm3
10309  DB  243,67,15,16,12,136                 ; movss         (%r8,%r9,4),%xmm1
10310  DB  15,20,217                           ; unpcklps      %xmm1,%xmm3
10311  DB  243,67,15,16,12,152                 ; movss         (%r8,%r11,4),%xmm1
10312  DB  243,67,15,16,20,144                 ; movss         (%r8,%r10,4),%xmm2
10313  DB  15,20,202                           ; unpcklps      %xmm2,%xmm1
10314  DB  15,20,203                           ; unpcklps      %xmm3,%xmm1
10315  DB  76,139,72,24                        ; mov           0x18(%rax),%r9
10316  DB  102,65,15,112,210,78                ; pshufd        $0x4e,%xmm10,%xmm2
10317  DB  102,72,15,126,209                   ; movq          %xmm2,%rcx
10318  DB  68,15,183,193                       ; movzwl        %cx,%r8d
10319  DB  72,193,233,32                       ; shr           $0x20,%rcx
10320  DB  102,76,15,126,208                   ; movq          %xmm10,%rax
10321  DB  68,15,183,208                       ; movzwl        %ax,%r10d
10322  DB  72,193,232,30                       ; shr           $0x1e,%rax
10323  DB  243,69,15,16,12,1                   ; movss         (%r9,%rax,1),%xmm9
10324  DB  243,65,15,16,20,137                 ; movss         (%r9,%rcx,4),%xmm2
10325  DB  68,15,20,202                        ; unpcklps      %xmm2,%xmm9
10326  DB  243,67,15,16,20,145                 ; movss         (%r9,%r10,4),%xmm2
10327  DB  243,67,15,16,28,129                 ; movss         (%r9,%r8,4),%xmm3
10328  DB  15,20,211                           ; unpcklps      %xmm3,%xmm2
10329  DB  65,15,20,209                        ; unpcklps      %xmm9,%xmm2
10330  DB  102,65,15,114,208,24                ; psrld         $0x18,%xmm8
10331  DB  69,15,91,192                        ; cvtdq2ps      %xmm8,%xmm8
10332  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
10333  DB  102,15,110,216                      ; movd          %eax,%xmm3
10334  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
10335  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
10336  DB  72,173                              ; lods          %ds:(%rsi),%rax
10337  DB  255,224                             ; jmpq          *%rax
10338
10339PUBLIC _sk_load_a8_sse2
10340_sk_load_a8_sse2 LABEL PROC
10341  DB  72,173                              ; lods          %ds:(%rsi),%rax
10342  DB  72,139,0                            ; mov           (%rax),%rax
10343  DB  102,15,110,4,56                     ; movd          (%rax,%rdi,1),%xmm0
10344  DB  102,15,239,201                      ; pxor          %xmm1,%xmm1
10345  DB  102,15,96,193                       ; punpcklbw     %xmm1,%xmm0
10346  DB  102,15,97,193                       ; punpcklwd     %xmm1,%xmm0
10347  DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
10348  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
10349  DB  102,15,110,216                      ; movd          %eax,%xmm3
10350  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
10351  DB  15,89,216                           ; mulps         %xmm0,%xmm3
10352  DB  72,173                              ; lods          %ds:(%rsi),%rax
10353  DB  15,87,192                           ; xorps         %xmm0,%xmm0
10354  DB  102,15,239,201                      ; pxor          %xmm1,%xmm1
10355  DB  15,87,210                           ; xorps         %xmm2,%xmm2
10356  DB  255,224                             ; jmpq          *%rax
10357
10358PUBLIC _sk_gather_a8_sse2
10359_sk_gather_a8_sse2 LABEL PROC
10360  DB  72,173                              ; lods          %ds:(%rsi),%rax
10361  DB  76,139,8                            ; mov           (%rax),%r9
10362  DB  243,15,91,201                       ; cvttps2dq     %xmm1,%xmm1
10363  DB  102,15,110,80,16                    ; movd          0x10(%rax),%xmm2
10364  DB  102,15,112,210,0                    ; pshufd        $0x0,%xmm2,%xmm2
10365  DB  102,15,112,217,245                  ; pshufd        $0xf5,%xmm1,%xmm3
10366  DB  102,15,244,218                      ; pmuludq       %xmm2,%xmm3
10367  DB  102,15,112,219,232                  ; pshufd        $0xe8,%xmm3,%xmm3
10368  DB  102,15,244,209                      ; pmuludq       %xmm1,%xmm2
10369  DB  102,15,112,202,232                  ; pshufd        $0xe8,%xmm2,%xmm1
10370  DB  102,15,98,203                       ; punpckldq     %xmm3,%xmm1
10371  DB  243,15,91,192                       ; cvttps2dq     %xmm0,%xmm0
10372  DB  102,15,254,193                      ; paddd         %xmm1,%xmm0
10373  DB  102,72,15,126,192                   ; movq          %xmm0,%rax
10374  DB  65,137,192                          ; mov           %eax,%r8d
10375  DB  72,193,232,32                       ; shr           $0x20,%rax
10376  DB  102,15,112,192,78                   ; pshufd        $0x4e,%xmm0,%xmm0
10377  DB  102,72,15,126,193                   ; movq          %xmm0,%rcx
10378  DB  65,137,202                          ; mov           %ecx,%r10d
10379  DB  72,193,233,32                       ; shr           $0x20,%rcx
10380  DB  71,15,182,20,17                     ; movzbl        (%r9,%r10,1),%r10d
10381  DB  65,15,182,12,9                      ; movzbl        (%r9,%rcx,1),%ecx
10382  DB  193,225,8                           ; shl           $0x8,%ecx
10383  DB  68,9,209                            ; or            %r10d,%ecx
10384  DB  71,15,182,4,1                       ; movzbl        (%r9,%r8,1),%r8d
10385  DB  65,15,182,4,1                       ; movzbl        (%r9,%rax,1),%eax
10386  DB  193,224,8                           ; shl           $0x8,%eax
10387  DB  68,9,192                            ; or            %r8d,%eax
10388  DB  102,15,196,192,0                    ; pinsrw        $0x0,%eax,%xmm0
10389  DB  102,15,196,193,1                    ; pinsrw        $0x1,%ecx,%xmm0
10390  DB  102,15,239,201                      ; pxor          %xmm1,%xmm1
10391  DB  102,15,96,193                       ; punpcklbw     %xmm1,%xmm0
10392  DB  102,15,97,193                       ; punpcklwd     %xmm1,%xmm0
10393  DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
10394  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
10395  DB  102,15,110,216                      ; movd          %eax,%xmm3
10396  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
10397  DB  15,89,216                           ; mulps         %xmm0,%xmm3
10398  DB  72,173                              ; lods          %ds:(%rsi),%rax
10399  DB  15,87,192                           ; xorps         %xmm0,%xmm0
10400  DB  102,15,239,201                      ; pxor          %xmm1,%xmm1
10401  DB  102,15,239,210                      ; pxor          %xmm2,%xmm2
10402  DB  255,224                             ; jmpq          *%rax
10403
10404PUBLIC _sk_store_a8_sse2
10405_sk_store_a8_sse2 LABEL PROC
10406  DB  72,173                              ; lods          %ds:(%rsi),%rax
10407  DB  72,139,0                            ; mov           (%rax),%rax
10408  DB  185,0,0,127,67                      ; mov           $0x437f0000,%ecx
10409  DB  102,68,15,110,193                   ; movd          %ecx,%xmm8
10410  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
10411  DB  68,15,89,195                        ; mulps         %xmm3,%xmm8
10412  DB  102,69,15,91,192                    ; cvtps2dq      %xmm8,%xmm8
10413  DB  102,65,15,114,240,16                ; pslld         $0x10,%xmm8
10414  DB  102,65,15,114,224,16                ; psrad         $0x10,%xmm8
10415  DB  102,69,15,107,192                   ; packssdw      %xmm8,%xmm8
10416  DB  102,69,15,103,192                   ; packuswb      %xmm8,%xmm8
10417  DB  102,68,15,126,4,56                  ; movd          %xmm8,(%rax,%rdi,1)
10418  DB  72,173                              ; lods          %ds:(%rsi),%rax
10419  DB  255,224                             ; jmpq          *%rax
10420
10421PUBLIC _sk_load_g8_sse2
10422_sk_load_g8_sse2 LABEL PROC
10423  DB  72,173                              ; lods          %ds:(%rsi),%rax
10424  DB  72,139,0                            ; mov           (%rax),%rax
10425  DB  102,15,110,4,56                     ; movd          (%rax,%rdi,1),%xmm0
10426  DB  102,15,239,201                      ; pxor          %xmm1,%xmm1
10427  DB  102,15,96,193                       ; punpcklbw     %xmm1,%xmm0
10428  DB  102,15,97,193                       ; punpcklwd     %xmm1,%xmm0
10429  DB  15,91,200                           ; cvtdq2ps      %xmm0,%xmm1
10430  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
10431  DB  102,15,110,192                      ; movd          %eax,%xmm0
10432  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
10433  DB  15,89,193                           ; mulps         %xmm1,%xmm0
10434  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
10435  DB  102,15,110,216                      ; movd          %eax,%xmm3
10436  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
10437  DB  72,173                              ; lods          %ds:(%rsi),%rax
10438  DB  15,40,200                           ; movaps        %xmm0,%xmm1
10439  DB  15,40,208                           ; movaps        %xmm0,%xmm2
10440  DB  255,224                             ; jmpq          *%rax
10441
10442PUBLIC _sk_gather_g8_sse2
10443_sk_gather_g8_sse2 LABEL PROC
10444  DB  72,173                              ; lods          %ds:(%rsi),%rax
10445  DB  76,139,8                            ; mov           (%rax),%r9
10446  DB  243,15,91,201                       ; cvttps2dq     %xmm1,%xmm1
10447  DB  102,15,110,80,16                    ; movd          0x10(%rax),%xmm2
10448  DB  102,15,112,210,0                    ; pshufd        $0x0,%xmm2,%xmm2
10449  DB  102,15,112,217,245                  ; pshufd        $0xf5,%xmm1,%xmm3
10450  DB  102,15,244,218                      ; pmuludq       %xmm2,%xmm3
10451  DB  102,15,112,219,232                  ; pshufd        $0xe8,%xmm3,%xmm3
10452  DB  102,15,244,209                      ; pmuludq       %xmm1,%xmm2
10453  DB  102,15,112,202,232                  ; pshufd        $0xe8,%xmm2,%xmm1
10454  DB  102,15,98,203                       ; punpckldq     %xmm3,%xmm1
10455  DB  243,15,91,192                       ; cvttps2dq     %xmm0,%xmm0
10456  DB  102,15,254,193                      ; paddd         %xmm1,%xmm0
10457  DB  102,72,15,126,192                   ; movq          %xmm0,%rax
10458  DB  65,137,192                          ; mov           %eax,%r8d
10459  DB  72,193,232,32                       ; shr           $0x20,%rax
10460  DB  102,15,112,192,78                   ; pshufd        $0x4e,%xmm0,%xmm0
10461  DB  102,72,15,126,193                   ; movq          %xmm0,%rcx
10462  DB  65,137,202                          ; mov           %ecx,%r10d
10463  DB  72,193,233,32                       ; shr           $0x20,%rcx
10464  DB  71,15,182,20,17                     ; movzbl        (%r9,%r10,1),%r10d
10465  DB  65,15,182,12,9                      ; movzbl        (%r9,%rcx,1),%ecx
10466  DB  193,225,8                           ; shl           $0x8,%ecx
10467  DB  68,9,209                            ; or            %r10d,%ecx
10468  DB  71,15,182,4,1                       ; movzbl        (%r9,%r8,1),%r8d
10469  DB  65,15,182,4,1                       ; movzbl        (%r9,%rax,1),%eax
10470  DB  193,224,8                           ; shl           $0x8,%eax
10471  DB  68,9,192                            ; or            %r8d,%eax
10472  DB  102,15,196,192,0                    ; pinsrw        $0x0,%eax,%xmm0
10473  DB  102,15,196,193,1                    ; pinsrw        $0x1,%ecx,%xmm0
10474  DB  102,15,239,201                      ; pxor          %xmm1,%xmm1
10475  DB  102,15,96,193                       ; punpcklbw     %xmm1,%xmm0
10476  DB  102,15,97,193                       ; punpcklwd     %xmm1,%xmm0
10477  DB  15,91,200                           ; cvtdq2ps      %xmm0,%xmm1
10478  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
10479  DB  102,15,110,192                      ; movd          %eax,%xmm0
10480  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
10481  DB  15,89,193                           ; mulps         %xmm1,%xmm0
10482  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
10483  DB  102,15,110,216                      ; movd          %eax,%xmm3
10484  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
10485  DB  72,173                              ; lods          %ds:(%rsi),%rax
10486  DB  15,40,200                           ; movaps        %xmm0,%xmm1
10487  DB  15,40,208                           ; movaps        %xmm0,%xmm2
10488  DB  255,224                             ; jmpq          *%rax
10489
10490PUBLIC _sk_gather_i8_sse2
10491_sk_gather_i8_sse2 LABEL PROC
10492  DB  72,173                              ; lods          %ds:(%rsi),%rax
10493  DB  73,137,192                          ; mov           %rax,%r8
10494  DB  77,133,192                          ; test          %r8,%r8
10495  DB  116,5                               ; je            1729 <_sk_gather_i8_sse2+0xf>
10496  DB  76,137,192                          ; mov           %r8,%rax
10497  DB  235,2                               ; jmp           172b <_sk_gather_i8_sse2+0x11>
10498  DB  72,173                              ; lods          %ds:(%rsi),%rax
10499  DB  76,139,16                           ; mov           (%rax),%r10
10500  DB  243,15,91,201                       ; cvttps2dq     %xmm1,%xmm1
10501  DB  102,15,110,80,16                    ; movd          0x10(%rax),%xmm2
10502  DB  102,15,112,210,0                    ; pshufd        $0x0,%xmm2,%xmm2
10503  DB  102,15,112,217,245                  ; pshufd        $0xf5,%xmm1,%xmm3
10504  DB  102,15,244,218                      ; pmuludq       %xmm2,%xmm3
10505  DB  102,15,112,219,232                  ; pshufd        $0xe8,%xmm3,%xmm3
10506  DB  102,15,244,209                      ; pmuludq       %xmm1,%xmm2
10507  DB  102,15,112,202,232                  ; pshufd        $0xe8,%xmm2,%xmm1
10508  DB  102,15,98,203                       ; punpckldq     %xmm3,%xmm1
10509  DB  243,15,91,192                       ; cvttps2dq     %xmm0,%xmm0
10510  DB  102,15,254,193                      ; paddd         %xmm1,%xmm0
10511  DB  102,72,15,126,192                   ; movq          %xmm0,%rax
10512  DB  65,137,193                          ; mov           %eax,%r9d
10513  DB  72,193,232,32                       ; shr           $0x20,%rax
10514  DB  102,15,112,192,78                   ; pshufd        $0x4e,%xmm0,%xmm0
10515  DB  102,72,15,126,193                   ; movq          %xmm0,%rcx
10516  DB  65,137,203                          ; mov           %ecx,%r11d
10517  DB  72,193,233,32                       ; shr           $0x20,%rcx
10518  DB  71,15,182,28,26                     ; movzbl        (%r10,%r11,1),%r11d
10519  DB  65,15,182,12,10                     ; movzbl        (%r10,%rcx,1),%ecx
10520  DB  193,225,8                           ; shl           $0x8,%ecx
10521  DB  68,9,217                            ; or            %r11d,%ecx
10522  DB  71,15,182,12,10                     ; movzbl        (%r10,%r9,1),%r9d
10523  DB  65,15,182,4,2                       ; movzbl        (%r10,%rax,1),%eax
10524  DB  193,224,8                           ; shl           $0x8,%eax
10525  DB  68,9,200                            ; or            %r9d,%eax
10526  DB  102,15,196,192,0                    ; pinsrw        $0x0,%eax,%xmm0
10527  DB  102,15,196,193,1                    ; pinsrw        $0x1,%ecx,%xmm0
10528  DB  102,15,239,201                      ; pxor          %xmm1,%xmm1
10529  DB  102,15,96,193                       ; punpcklbw     %xmm1,%xmm0
10530  DB  102,15,97,193                       ; punpcklwd     %xmm1,%xmm0
10531  DB  102,15,112,200,78                   ; pshufd        $0x4e,%xmm0,%xmm1
10532  DB  102,72,15,126,200                   ; movq          %xmm1,%rax
10533  DB  68,15,182,200                       ; movzbl        %al,%r9d
10534  DB  72,193,232,32                       ; shr           $0x20,%rax
10535  DB  102,72,15,126,193                   ; movq          %xmm0,%rcx
10536  DB  77,139,64,8                         ; mov           0x8(%r8),%r8
10537  DB  68,15,182,209                       ; movzbl        %cl,%r10d
10538  DB  72,193,233,30                       ; shr           $0x1e,%rcx
10539  DB  102,65,15,110,4,8                   ; movd          (%r8,%rcx,1),%xmm0
10540  DB  102,65,15,110,12,128                ; movd          (%r8,%rax,4),%xmm1
10541  DB  102,15,98,193                       ; punpckldq     %xmm1,%xmm0
10542  DB  102,67,15,110,28,144                ; movd          (%r8,%r10,4),%xmm3
10543  DB  102,67,15,110,12,136                ; movd          (%r8,%r9,4),%xmm1
10544  DB  102,15,98,217                       ; punpckldq     %xmm1,%xmm3
10545  DB  102,15,98,216                       ; punpckldq     %xmm0,%xmm3
10546  DB  184,255,0,0,0                       ; mov           $0xff,%eax
10547  DB  102,15,110,192                      ; movd          %eax,%xmm0
10548  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
10549  DB  102,15,111,203                      ; movdqa        %xmm3,%xmm1
10550  DB  102,15,114,209,8                    ; psrld         $0x8,%xmm1
10551  DB  102,15,219,200                      ; pand          %xmm0,%xmm1
10552  DB  102,15,111,211                      ; movdqa        %xmm3,%xmm2
10553  DB  102,15,114,210,16                   ; psrld         $0x10,%xmm2
10554  DB  102,15,219,208                      ; pand          %xmm0,%xmm2
10555  DB  102,15,219,195                      ; pand          %xmm3,%xmm0
10556  DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
10557  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
10558  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
10559  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
10560  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
10561  DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
10562  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
10563  DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
10564  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
10565  DB  102,15,114,211,24                   ; psrld         $0x18,%xmm3
10566  DB  15,91,219                           ; cvtdq2ps      %xmm3,%xmm3
10567  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
10568  DB  72,173                              ; lods          %ds:(%rsi),%rax
10569  DB  255,224                             ; jmpq          *%rax
10570
10571PUBLIC _sk_load_565_sse2
10572_sk_load_565_sse2 LABEL PROC
10573  DB  72,173                              ; lods          %ds:(%rsi),%rax
10574  DB  72,139,0                            ; mov           (%rax),%rax
10575  DB  243,15,126,20,120                   ; movq          (%rax,%rdi,2),%xmm2
10576  DB  102,15,239,192                      ; pxor          %xmm0,%xmm0
10577  DB  102,15,97,208                       ; punpcklwd     %xmm0,%xmm2
10578  DB  184,0,248,0,0                       ; mov           $0xf800,%eax
10579  DB  102,15,110,192                      ; movd          %eax,%xmm0
10580  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
10581  DB  102,15,219,194                      ; pand          %xmm2,%xmm0
10582  DB  15,91,200                           ; cvtdq2ps      %xmm0,%xmm1
10583  DB  184,8,33,132,55                     ; mov           $0x37842108,%eax
10584  DB  102,15,110,192                      ; movd          %eax,%xmm0
10585  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
10586  DB  15,89,193                           ; mulps         %xmm1,%xmm0
10587  DB  184,224,7,0,0                       ; mov           $0x7e0,%eax
10588  DB  102,15,110,200                      ; movd          %eax,%xmm1
10589  DB  102,15,112,201,0                    ; pshufd        $0x0,%xmm1,%xmm1
10590  DB  102,15,219,202                      ; pand          %xmm2,%xmm1
10591  DB  15,91,217                           ; cvtdq2ps      %xmm1,%xmm3
10592  DB  184,33,8,2,58                       ; mov           $0x3a020821,%eax
10593  DB  102,15,110,200                      ; movd          %eax,%xmm1
10594  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
10595  DB  15,89,203                           ; mulps         %xmm3,%xmm1
10596  DB  184,31,0,0,0                        ; mov           $0x1f,%eax
10597  DB  102,15,110,216                      ; movd          %eax,%xmm3
10598  DB  102,15,112,219,0                    ; pshufd        $0x0,%xmm3,%xmm3
10599  DB  102,15,219,218                      ; pand          %xmm2,%xmm3
10600  DB  15,91,219                           ; cvtdq2ps      %xmm3,%xmm3
10601  DB  184,8,33,4,61                       ; mov           $0x3d042108,%eax
10602  DB  102,15,110,208                      ; movd          %eax,%xmm2
10603  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
10604  DB  15,89,211                           ; mulps         %xmm3,%xmm2
10605  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
10606  DB  102,15,110,216                      ; movd          %eax,%xmm3
10607  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
10608  DB  72,173                              ; lods          %ds:(%rsi),%rax
10609  DB  255,224                             ; jmpq          *%rax
10610
10611PUBLIC _sk_gather_565_sse2
10612_sk_gather_565_sse2 LABEL PROC
10613  DB  72,173                              ; lods          %ds:(%rsi),%rax
10614  DB  76,139,8                            ; mov           (%rax),%r9
10615  DB  243,15,91,201                       ; cvttps2dq     %xmm1,%xmm1
10616  DB  102,15,110,80,16                    ; movd          0x10(%rax),%xmm2
10617  DB  102,15,112,210,0                    ; pshufd        $0x0,%xmm2,%xmm2
10618  DB  102,15,112,217,245                  ; pshufd        $0xf5,%xmm1,%xmm3
10619  DB  102,15,244,218                      ; pmuludq       %xmm2,%xmm3
10620  DB  102,15,112,219,232                  ; pshufd        $0xe8,%xmm3,%xmm3
10621  DB  102,15,244,209                      ; pmuludq       %xmm1,%xmm2
10622  DB  102,15,112,202,232                  ; pshufd        $0xe8,%xmm2,%xmm1
10623  DB  102,15,98,203                       ; punpckldq     %xmm3,%xmm1
10624  DB  243,15,91,192                       ; cvttps2dq     %xmm0,%xmm0
10625  DB  102,15,254,193                      ; paddd         %xmm1,%xmm0
10626  DB  102,15,112,200,78                   ; pshufd        $0x4e,%xmm0,%xmm1
10627  DB  102,72,15,126,200                   ; movq          %xmm1,%rax
10628  DB  65,137,192                          ; mov           %eax,%r8d
10629  DB  72,193,232,32                       ; shr           $0x20,%rax
10630  DB  102,72,15,126,193                   ; movq          %xmm0,%rcx
10631  DB  65,137,202                          ; mov           %ecx,%r10d
10632  DB  72,193,233,32                       ; shr           $0x20,%rcx
10633  DB  102,67,15,196,20,81,0               ; pinsrw        $0x0,(%r9,%r10,2),%xmm2
10634  DB  102,65,15,196,20,73,1               ; pinsrw        $0x1,(%r9,%rcx,2),%xmm2
10635  DB  67,15,183,12,65                     ; movzwl        (%r9,%r8,2),%ecx
10636  DB  102,15,196,209,2                    ; pinsrw        $0x2,%ecx,%xmm2
10637  DB  65,15,183,4,65                      ; movzwl        (%r9,%rax,2),%eax
10638  DB  102,15,196,208,3                    ; pinsrw        $0x3,%eax,%xmm2
10639  DB  102,15,239,192                      ; pxor          %xmm0,%xmm0
10640  DB  102,15,97,208                       ; punpcklwd     %xmm0,%xmm2
10641  DB  184,0,248,0,0                       ; mov           $0xf800,%eax
10642  DB  102,15,110,192                      ; movd          %eax,%xmm0
10643  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
10644  DB  102,15,219,194                      ; pand          %xmm2,%xmm0
10645  DB  15,91,200                           ; cvtdq2ps      %xmm0,%xmm1
10646  DB  184,8,33,132,55                     ; mov           $0x37842108,%eax
10647  DB  102,15,110,192                      ; movd          %eax,%xmm0
10648  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
10649  DB  15,89,193                           ; mulps         %xmm1,%xmm0
10650  DB  184,224,7,0,0                       ; mov           $0x7e0,%eax
10651  DB  102,15,110,200                      ; movd          %eax,%xmm1
10652  DB  102,15,112,201,0                    ; pshufd        $0x0,%xmm1,%xmm1
10653  DB  102,15,219,202                      ; pand          %xmm2,%xmm1
10654  DB  15,91,217                           ; cvtdq2ps      %xmm1,%xmm3
10655  DB  184,33,8,2,58                       ; mov           $0x3a020821,%eax
10656  DB  102,15,110,200                      ; movd          %eax,%xmm1
10657  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
10658  DB  15,89,203                           ; mulps         %xmm3,%xmm1
10659  DB  184,31,0,0,0                        ; mov           $0x1f,%eax
10660  DB  102,15,110,216                      ; movd          %eax,%xmm3
10661  DB  102,15,112,219,0                    ; pshufd        $0x0,%xmm3,%xmm3
10662  DB  102,15,219,218                      ; pand          %xmm2,%xmm3
10663  DB  15,91,219                           ; cvtdq2ps      %xmm3,%xmm3
10664  DB  184,8,33,4,61                       ; mov           $0x3d042108,%eax
10665  DB  102,15,110,208                      ; movd          %eax,%xmm2
10666  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
10667  DB  15,89,211                           ; mulps         %xmm3,%xmm2
10668  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
10669  DB  102,15,110,216                      ; movd          %eax,%xmm3
10670  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
10671  DB  72,173                              ; lods          %ds:(%rsi),%rax
10672  DB  255,224                             ; jmpq          *%rax
10673
10674PUBLIC _sk_store_565_sse2
10675_sk_store_565_sse2 LABEL PROC
10676  DB  72,173                              ; lods          %ds:(%rsi),%rax
10677  DB  72,139,0                            ; mov           (%rax),%rax
10678  DB  185,0,0,248,65                      ; mov           $0x41f80000,%ecx
10679  DB  102,68,15,110,193                   ; movd          %ecx,%xmm8
10680  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
10681  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
10682  DB  68,15,89,200                        ; mulps         %xmm0,%xmm9
10683  DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
10684  DB  102,65,15,114,241,11                ; pslld         $0xb,%xmm9
10685  DB  185,0,0,124,66                      ; mov           $0x427c0000,%ecx
10686  DB  102,68,15,110,209                   ; movd          %ecx,%xmm10
10687  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
10688  DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
10689  DB  102,69,15,91,210                    ; cvtps2dq      %xmm10,%xmm10
10690  DB  102,65,15,114,242,5                 ; pslld         $0x5,%xmm10
10691  DB  102,69,15,235,209                   ; por           %xmm9,%xmm10
10692  DB  68,15,89,194                        ; mulps         %xmm2,%xmm8
10693  DB  102,69,15,91,192                    ; cvtps2dq      %xmm8,%xmm8
10694  DB  102,69,15,86,194                    ; orpd          %xmm10,%xmm8
10695  DB  102,65,15,114,240,16                ; pslld         $0x10,%xmm8
10696  DB  102,65,15,114,224,16                ; psrad         $0x10,%xmm8
10697  DB  102,69,15,107,192                   ; packssdw      %xmm8,%xmm8
10698  DB  102,68,15,214,4,120                 ; movq          %xmm8,(%rax,%rdi,2)
10699  DB  72,173                              ; lods          %ds:(%rsi),%rax
10700  DB  255,224                             ; jmpq          *%rax
10701
10702PUBLIC _sk_load_4444_sse2
10703_sk_load_4444_sse2 LABEL PROC
10704  DB  72,173                              ; lods          %ds:(%rsi),%rax
10705  DB  72,139,0                            ; mov           (%rax),%rax
10706  DB  243,68,15,126,12,120                ; movq          (%rax,%rdi,2),%xmm9
10707  DB  102,15,239,192                      ; pxor          %xmm0,%xmm0
10708  DB  102,68,15,97,200                    ; punpcklwd     %xmm0,%xmm9
10709  DB  184,0,240,0,0                       ; mov           $0xf000,%eax
10710  DB  102,15,110,192                      ; movd          %eax,%xmm0
10711  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
10712  DB  102,65,15,219,193                   ; pand          %xmm9,%xmm0
10713  DB  15,91,200                           ; cvtdq2ps      %xmm0,%xmm1
10714  DB  184,137,136,136,55                  ; mov           $0x37888889,%eax
10715  DB  102,15,110,192                      ; movd          %eax,%xmm0
10716  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
10717  DB  15,89,193                           ; mulps         %xmm1,%xmm0
10718  DB  184,0,15,0,0                        ; mov           $0xf00,%eax
10719  DB  102,15,110,200                      ; movd          %eax,%xmm1
10720  DB  102,15,112,201,0                    ; pshufd        $0x0,%xmm1,%xmm1
10721  DB  102,65,15,219,201                   ; pand          %xmm9,%xmm1
10722  DB  15,91,209                           ; cvtdq2ps      %xmm1,%xmm2
10723  DB  184,137,136,136,57                  ; mov           $0x39888889,%eax
10724  DB  102,15,110,200                      ; movd          %eax,%xmm1
10725  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
10726  DB  15,89,202                           ; mulps         %xmm2,%xmm1
10727  DB  184,240,0,0,0                       ; mov           $0xf0,%eax
10728  DB  102,15,110,208                      ; movd          %eax,%xmm2
10729  DB  102,15,112,210,0                    ; pshufd        $0x0,%xmm2,%xmm2
10730  DB  102,65,15,219,209                   ; pand          %xmm9,%xmm2
10731  DB  68,15,91,194                        ; cvtdq2ps      %xmm2,%xmm8
10732  DB  184,137,136,136,59                  ; mov           $0x3b888889,%eax
10733  DB  102,15,110,208                      ; movd          %eax,%xmm2
10734  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
10735  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
10736  DB  184,15,0,0,0                        ; mov           $0xf,%eax
10737  DB  102,15,110,216                      ; movd          %eax,%xmm3
10738  DB  102,15,112,219,0                    ; pshufd        $0x0,%xmm3,%xmm3
10739  DB  102,65,15,219,217                   ; pand          %xmm9,%xmm3
10740  DB  68,15,91,195                        ; cvtdq2ps      %xmm3,%xmm8
10741  DB  184,137,136,136,61                  ; mov           $0x3d888889,%eax
10742  DB  102,15,110,216                      ; movd          %eax,%xmm3
10743  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
10744  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
10745  DB  72,173                              ; lods          %ds:(%rsi),%rax
10746  DB  255,224                             ; jmpq          *%rax
10747
10748PUBLIC _sk_gather_4444_sse2
10749_sk_gather_4444_sse2 LABEL PROC
10750  DB  72,173                              ; lods          %ds:(%rsi),%rax
10751  DB  76,139,8                            ; mov           (%rax),%r9
10752  DB  243,15,91,201                       ; cvttps2dq     %xmm1,%xmm1
10753  DB  102,15,110,80,16                    ; movd          0x10(%rax),%xmm2
10754  DB  102,15,112,210,0                    ; pshufd        $0x0,%xmm2,%xmm2
10755  DB  102,15,112,217,245                  ; pshufd        $0xf5,%xmm1,%xmm3
10756  DB  102,15,244,218                      ; pmuludq       %xmm2,%xmm3
10757  DB  102,15,112,219,232                  ; pshufd        $0xe8,%xmm3,%xmm3
10758  DB  102,15,244,209                      ; pmuludq       %xmm1,%xmm2
10759  DB  102,15,112,202,232                  ; pshufd        $0xe8,%xmm2,%xmm1
10760  DB  102,15,98,203                       ; punpckldq     %xmm3,%xmm1
10761  DB  243,15,91,192                       ; cvttps2dq     %xmm0,%xmm0
10762  DB  102,15,254,193                      ; paddd         %xmm1,%xmm0
10763  DB  102,15,112,200,78                   ; pshufd        $0x4e,%xmm0,%xmm1
10764  DB  102,72,15,126,200                   ; movq          %xmm1,%rax
10765  DB  65,137,192                          ; mov           %eax,%r8d
10766  DB  72,193,232,32                       ; shr           $0x20,%rax
10767  DB  102,72,15,126,193                   ; movq          %xmm0,%rcx
10768  DB  65,137,202                          ; mov           %ecx,%r10d
10769  DB  72,193,233,32                       ; shr           $0x20,%rcx
10770  DB  102,71,15,196,12,81,0               ; pinsrw        $0x0,(%r9,%r10,2),%xmm9
10771  DB  102,69,15,196,12,73,1               ; pinsrw        $0x1,(%r9,%rcx,2),%xmm9
10772  DB  67,15,183,12,65                     ; movzwl        (%r9,%r8,2),%ecx
10773  DB  102,68,15,196,201,2                 ; pinsrw        $0x2,%ecx,%xmm9
10774  DB  65,15,183,4,65                      ; movzwl        (%r9,%rax,2),%eax
10775  DB  102,68,15,196,200,3                 ; pinsrw        $0x3,%eax,%xmm9
10776  DB  102,15,239,192                      ; pxor          %xmm0,%xmm0
10777  DB  102,68,15,97,200                    ; punpcklwd     %xmm0,%xmm9
10778  DB  184,0,240,0,0                       ; mov           $0xf000,%eax
10779  DB  102,15,110,192                      ; movd          %eax,%xmm0
10780  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
10781  DB  102,65,15,219,193                   ; pand          %xmm9,%xmm0
10782  DB  15,91,200                           ; cvtdq2ps      %xmm0,%xmm1
10783  DB  184,137,136,136,55                  ; mov           $0x37888889,%eax
10784  DB  102,15,110,192                      ; movd          %eax,%xmm0
10785  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
10786  DB  15,89,193                           ; mulps         %xmm1,%xmm0
10787  DB  184,0,15,0,0                        ; mov           $0xf00,%eax
10788  DB  102,15,110,200                      ; movd          %eax,%xmm1
10789  DB  102,15,112,201,0                    ; pshufd        $0x0,%xmm1,%xmm1
10790  DB  102,65,15,219,201                   ; pand          %xmm9,%xmm1
10791  DB  15,91,209                           ; cvtdq2ps      %xmm1,%xmm2
10792  DB  184,137,136,136,57                  ; mov           $0x39888889,%eax
10793  DB  102,15,110,200                      ; movd          %eax,%xmm1
10794  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
10795  DB  15,89,202                           ; mulps         %xmm2,%xmm1
10796  DB  184,240,0,0,0                       ; mov           $0xf0,%eax
10797  DB  102,15,110,208                      ; movd          %eax,%xmm2
10798  DB  102,15,112,210,0                    ; pshufd        $0x0,%xmm2,%xmm2
10799  DB  102,65,15,219,209                   ; pand          %xmm9,%xmm2
10800  DB  68,15,91,194                        ; cvtdq2ps      %xmm2,%xmm8
10801  DB  184,137,136,136,59                  ; mov           $0x3b888889,%eax
10802  DB  102,15,110,208                      ; movd          %eax,%xmm2
10803  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
10804  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
10805  DB  184,15,0,0,0                        ; mov           $0xf,%eax
10806  DB  102,15,110,216                      ; movd          %eax,%xmm3
10807  DB  102,15,112,219,0                    ; pshufd        $0x0,%xmm3,%xmm3
10808  DB  102,65,15,219,217                   ; pand          %xmm9,%xmm3
10809  DB  68,15,91,195                        ; cvtdq2ps      %xmm3,%xmm8
10810  DB  184,137,136,136,61                  ; mov           $0x3d888889,%eax
10811  DB  102,15,110,216                      ; movd          %eax,%xmm3
10812  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
10813  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
10814  DB  72,173                              ; lods          %ds:(%rsi),%rax
10815  DB  255,224                             ; jmpq          *%rax
10816
10817PUBLIC _sk_store_4444_sse2
10818_sk_store_4444_sse2 LABEL PROC
10819  DB  72,173                              ; lods          %ds:(%rsi),%rax
10820  DB  72,139,0                            ; mov           (%rax),%rax
10821  DB  185,0,0,112,65                      ; mov           $0x41700000,%ecx
10822  DB  102,68,15,110,193                   ; movd          %ecx,%xmm8
10823  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
10824  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
10825  DB  68,15,89,200                        ; mulps         %xmm0,%xmm9
10826  DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
10827  DB  102,65,15,114,241,12                ; pslld         $0xc,%xmm9
10828  DB  69,15,40,208                        ; movaps        %xmm8,%xmm10
10829  DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
10830  DB  102,69,15,91,210                    ; cvtps2dq      %xmm10,%xmm10
10831  DB  102,65,15,114,242,8                 ; pslld         $0x8,%xmm10
10832  DB  102,69,15,235,209                   ; por           %xmm9,%xmm10
10833  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
10834  DB  68,15,89,202                        ; mulps         %xmm2,%xmm9
10835  DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
10836  DB  102,65,15,114,241,4                 ; pslld         $0x4,%xmm9
10837  DB  68,15,89,195                        ; mulps         %xmm3,%xmm8
10838  DB  102,69,15,91,192                    ; cvtps2dq      %xmm8,%xmm8
10839  DB  102,69,15,86,193                    ; orpd          %xmm9,%xmm8
10840  DB  102,69,15,86,194                    ; orpd          %xmm10,%xmm8
10841  DB  102,65,15,114,240,16                ; pslld         $0x10,%xmm8
10842  DB  102,65,15,114,224,16                ; psrad         $0x10,%xmm8
10843  DB  102,69,15,107,192                   ; packssdw      %xmm8,%xmm8
10844  DB  102,68,15,214,4,120                 ; movq          %xmm8,(%rax,%rdi,2)
10845  DB  72,173                              ; lods          %ds:(%rsi),%rax
10846  DB  255,224                             ; jmpq          *%rax
10847
10848PUBLIC _sk_load_8888_sse2
10849_sk_load_8888_sse2 LABEL PROC
10850  DB  72,173                              ; lods          %ds:(%rsi),%rax
10851  DB  72,139,0                            ; mov           (%rax),%rax
10852  DB  243,15,111,28,184                   ; movdqu        (%rax,%rdi,4),%xmm3
10853  DB  184,255,0,0,0                       ; mov           $0xff,%eax
10854  DB  102,15,110,192                      ; movd          %eax,%xmm0
10855  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
10856  DB  102,15,111,203                      ; movdqa        %xmm3,%xmm1
10857  DB  102,15,114,209,8                    ; psrld         $0x8,%xmm1
10858  DB  102,15,219,200                      ; pand          %xmm0,%xmm1
10859  DB  102,15,111,211                      ; movdqa        %xmm3,%xmm2
10860  DB  102,15,114,210,16                   ; psrld         $0x10,%xmm2
10861  DB  102,15,219,208                      ; pand          %xmm0,%xmm2
10862  DB  102,15,219,195                      ; pand          %xmm3,%xmm0
10863  DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
10864  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
10865  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
10866  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
10867  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
10868  DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
10869  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
10870  DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
10871  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
10872  DB  102,15,114,211,24                   ; psrld         $0x18,%xmm3
10873  DB  15,91,219                           ; cvtdq2ps      %xmm3,%xmm3
10874  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
10875  DB  72,173                              ; lods          %ds:(%rsi),%rax
10876  DB  255,224                             ; jmpq          *%rax
10877
10878PUBLIC _sk_gather_8888_sse2
10879_sk_gather_8888_sse2 LABEL PROC
10880  DB  72,173                              ; lods          %ds:(%rsi),%rax
10881  DB  76,139,8                            ; mov           (%rax),%r9
10882  DB  243,15,91,201                       ; cvttps2dq     %xmm1,%xmm1
10883  DB  102,15,110,80,16                    ; movd          0x10(%rax),%xmm2
10884  DB  102,15,112,210,0                    ; pshufd        $0x0,%xmm2,%xmm2
10885  DB  102,15,112,217,245                  ; pshufd        $0xf5,%xmm1,%xmm3
10886  DB  102,15,244,218                      ; pmuludq       %xmm2,%xmm3
10887  DB  102,15,112,219,232                  ; pshufd        $0xe8,%xmm3,%xmm3
10888  DB  102,15,244,209                      ; pmuludq       %xmm1,%xmm2
10889  DB  102,15,112,202,232                  ; pshufd        $0xe8,%xmm2,%xmm1
10890  DB  102,15,98,203                       ; punpckldq     %xmm3,%xmm1
10891  DB  243,15,91,192                       ; cvttps2dq     %xmm0,%xmm0
10892  DB  102,15,254,193                      ; paddd         %xmm1,%xmm0
10893  DB  102,15,112,200,78                   ; pshufd        $0x4e,%xmm0,%xmm1
10894  DB  102,72,15,126,200                   ; movq          %xmm1,%rax
10895  DB  65,137,192                          ; mov           %eax,%r8d
10896  DB  72,193,232,32                       ; shr           $0x20,%rax
10897  DB  102,72,15,126,193                   ; movq          %xmm0,%rcx
10898  DB  65,137,202                          ; mov           %ecx,%r10d
10899  DB  72,193,233,32                       ; shr           $0x20,%rcx
10900  DB  102,65,15,110,4,137                 ; movd          (%r9,%rcx,4),%xmm0
10901  DB  102,65,15,110,12,129                ; movd          (%r9,%rax,4),%xmm1
10902  DB  102,15,98,193                       ; punpckldq     %xmm1,%xmm0
10903  DB  102,67,15,110,28,145                ; movd          (%r9,%r10,4),%xmm3
10904  DB  102,67,15,110,12,129                ; movd          (%r9,%r8,4),%xmm1
10905  DB  102,15,98,217                       ; punpckldq     %xmm1,%xmm3
10906  DB  102,15,98,216                       ; punpckldq     %xmm0,%xmm3
10907  DB  184,255,0,0,0                       ; mov           $0xff,%eax
10908  DB  102,15,110,192                      ; movd          %eax,%xmm0
10909  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
10910  DB  102,15,111,203                      ; movdqa        %xmm3,%xmm1
10911  DB  102,15,114,209,8                    ; psrld         $0x8,%xmm1
10912  DB  102,15,219,200                      ; pand          %xmm0,%xmm1
10913  DB  102,15,111,211                      ; movdqa        %xmm3,%xmm2
10914  DB  102,15,114,210,16                   ; psrld         $0x10,%xmm2
10915  DB  102,15,219,208                      ; pand          %xmm0,%xmm2
10916  DB  102,15,219,195                      ; pand          %xmm3,%xmm0
10917  DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
10918  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
10919  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
10920  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
10921  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
10922  DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
10923  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
10924  DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
10925  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
10926  DB  102,15,114,211,24                   ; psrld         $0x18,%xmm3
10927  DB  15,91,219                           ; cvtdq2ps      %xmm3,%xmm3
10928  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
10929  DB  72,173                              ; lods          %ds:(%rsi),%rax
10930  DB  255,224                             ; jmpq          *%rax
10931
10932PUBLIC _sk_store_8888_sse2
10933_sk_store_8888_sse2 LABEL PROC
10934  DB  72,173                              ; lods          %ds:(%rsi),%rax
10935  DB  72,139,0                            ; mov           (%rax),%rax
10936  DB  185,0,0,127,67                      ; mov           $0x437f0000,%ecx
10937  DB  102,68,15,110,193                   ; movd          %ecx,%xmm8
10938  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
10939  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
10940  DB  68,15,89,200                        ; mulps         %xmm0,%xmm9
10941  DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
10942  DB  69,15,40,208                        ; movaps        %xmm8,%xmm10
10943  DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
10944  DB  102,69,15,91,210                    ; cvtps2dq      %xmm10,%xmm10
10945  DB  102,65,15,114,242,8                 ; pslld         $0x8,%xmm10
10946  DB  102,69,15,235,209                   ; por           %xmm9,%xmm10
10947  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
10948  DB  68,15,89,202                        ; mulps         %xmm2,%xmm9
10949  DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
10950  DB  102,65,15,114,241,16                ; pslld         $0x10,%xmm9
10951  DB  68,15,89,195                        ; mulps         %xmm3,%xmm8
10952  DB  102,69,15,91,192                    ; cvtps2dq      %xmm8,%xmm8
10953  DB  102,65,15,114,240,24                ; pslld         $0x18,%xmm8
10954  DB  102,69,15,235,193                   ; por           %xmm9,%xmm8
10955  DB  102,69,15,235,194                   ; por           %xmm10,%xmm8
10956  DB  243,68,15,127,4,184                 ; movdqu        %xmm8,(%rax,%rdi,4)
10957  DB  72,173                              ; lods          %ds:(%rsi),%rax
10958  DB  255,224                             ; jmpq          *%rax
10959
10960PUBLIC _sk_load_f16_sse2
10961_sk_load_f16_sse2 LABEL PROC
10962  DB  72,173                              ; lods          %ds:(%rsi),%rax
10963  DB  72,139,0                            ; mov           (%rax),%rax
10964  DB  243,15,111,4,248                    ; movdqu        (%rax,%rdi,8),%xmm0
10965  DB  243,15,111,76,248,16                ; movdqu        0x10(%rax,%rdi,8),%xmm1
10966  DB  102,68,15,111,192                   ; movdqa        %xmm0,%xmm8
10967  DB  102,68,15,97,193                    ; punpcklwd     %xmm1,%xmm8
10968  DB  102,15,105,193                      ; punpckhwd     %xmm1,%xmm0
10969  DB  102,65,15,111,200                   ; movdqa        %xmm8,%xmm1
10970  DB  102,15,97,200                       ; punpcklwd     %xmm0,%xmm1
10971  DB  102,68,15,105,192                   ; punpckhwd     %xmm0,%xmm8
10972  DB  184,0,4,0,4                         ; mov           $0x4000400,%eax
10973  DB  102,15,110,192                      ; movd          %eax,%xmm0
10974  DB  102,15,112,216,0                    ; pshufd        $0x0,%xmm0,%xmm3
10975  DB  102,15,111,195                      ; movdqa        %xmm3,%xmm0
10976  DB  102,15,101,193                      ; pcmpgtw       %xmm1,%xmm0
10977  DB  102,15,223,193                      ; pandn         %xmm1,%xmm0
10978  DB  102,69,15,239,201                   ; pxor          %xmm9,%xmm9
10979  DB  102,65,15,97,193                    ; punpcklwd     %xmm9,%xmm0
10980  DB  102,15,114,240,13                   ; pslld         $0xd,%xmm0
10981  DB  184,0,0,128,119                     ; mov           $0x77800000,%eax
10982  DB  102,15,110,208                      ; movd          %eax,%xmm2
10983  DB  102,68,15,112,210,0                 ; pshufd        $0x0,%xmm2,%xmm10
10984  DB  65,15,89,194                        ; mulps         %xmm10,%xmm0
10985  DB  102,15,112,209,78                   ; pshufd        $0x4e,%xmm1,%xmm2
10986  DB  102,15,111,203                      ; movdqa        %xmm3,%xmm1
10987  DB  102,15,101,202                      ; pcmpgtw       %xmm2,%xmm1
10988  DB  102,15,223,202                      ; pandn         %xmm2,%xmm1
10989  DB  102,65,15,97,201                    ; punpcklwd     %xmm9,%xmm1
10990  DB  102,15,114,241,13                   ; pslld         $0xd,%xmm1
10991  DB  65,15,89,202                        ; mulps         %xmm10,%xmm1
10992  DB  102,15,111,211                      ; movdqa        %xmm3,%xmm2
10993  DB  102,65,15,101,208                   ; pcmpgtw       %xmm8,%xmm2
10994  DB  102,65,15,223,208                   ; pandn         %xmm8,%xmm2
10995  DB  102,65,15,97,209                    ; punpcklwd     %xmm9,%xmm2
10996  DB  102,15,114,242,13                   ; pslld         $0xd,%xmm2
10997  DB  65,15,89,210                        ; mulps         %xmm10,%xmm2
10998  DB  102,69,15,112,192,78                ; pshufd        $0x4e,%xmm8,%xmm8
10999  DB  102,65,15,101,216                   ; pcmpgtw       %xmm8,%xmm3
11000  DB  102,65,15,223,216                   ; pandn         %xmm8,%xmm3
11001  DB  102,65,15,97,217                    ; punpcklwd     %xmm9,%xmm3
11002  DB  102,15,114,243,13                   ; pslld         $0xd,%xmm3
11003  DB  65,15,89,218                        ; mulps         %xmm10,%xmm3
11004  DB  72,173                              ; lods          %ds:(%rsi),%rax
11005  DB  255,224                             ; jmpq          *%rax
11006
11007PUBLIC _sk_store_f16_sse2
11008_sk_store_f16_sse2 LABEL PROC
11009  DB  72,173                              ; lods          %ds:(%rsi),%rax
11010  DB  72,139,0                            ; mov           (%rax),%rax
11011  DB  185,0,0,128,7                       ; mov           $0x7800000,%ecx
11012  DB  102,68,15,110,193                   ; movd          %ecx,%xmm8
11013  DB  102,69,15,112,200,0                 ; pshufd        $0x0,%xmm8,%xmm9
11014  DB  102,69,15,111,193                   ; movdqa        %xmm9,%xmm8
11015  DB  68,15,89,192                        ; mulps         %xmm0,%xmm8
11016  DB  102,65,15,114,208,13                ; psrld         $0xd,%xmm8
11017  DB  102,65,15,114,240,16                ; pslld         $0x10,%xmm8
11018  DB  102,65,15,114,224,16                ; psrad         $0x10,%xmm8
11019  DB  102,69,15,107,192                   ; packssdw      %xmm8,%xmm8
11020  DB  102,69,15,111,209                   ; movdqa        %xmm9,%xmm10
11021  DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
11022  DB  102,65,15,114,210,13                ; psrld         $0xd,%xmm10
11023  DB  102,65,15,114,242,16                ; pslld         $0x10,%xmm10
11024  DB  102,65,15,114,226,16                ; psrad         $0x10,%xmm10
11025  DB  102,69,15,107,210                   ; packssdw      %xmm10,%xmm10
11026  DB  102,69,15,111,217                   ; movdqa        %xmm9,%xmm11
11027  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
11028  DB  102,65,15,114,211,13                ; psrld         $0xd,%xmm11
11029  DB  102,65,15,114,243,16                ; pslld         $0x10,%xmm11
11030  DB  102,65,15,114,227,16                ; psrad         $0x10,%xmm11
11031  DB  102,69,15,107,219                   ; packssdw      %xmm11,%xmm11
11032  DB  68,15,89,203                        ; mulps         %xmm3,%xmm9
11033  DB  102,65,15,114,209,13                ; psrld         $0xd,%xmm9
11034  DB  102,65,15,114,241,16                ; pslld         $0x10,%xmm9
11035  DB  102,65,15,114,225,16                ; psrad         $0x10,%xmm9
11036  DB  102,69,15,107,201                   ; packssdw      %xmm9,%xmm9
11037  DB  102,69,15,97,194                    ; punpcklwd     %xmm10,%xmm8
11038  DB  102,69,15,97,217                    ; punpcklwd     %xmm9,%xmm11
11039  DB  102,69,15,111,200                   ; movdqa        %xmm8,%xmm9
11040  DB  102,69,15,98,203                    ; punpckldq     %xmm11,%xmm9
11041  DB  243,68,15,127,12,248                ; movdqu        %xmm9,(%rax,%rdi,8)
11042  DB  102,69,15,106,195                   ; punpckhdq     %xmm11,%xmm8
11043  DB  243,68,15,127,68,248,16             ; movdqu        %xmm8,0x10(%rax,%rdi,8)
11044  DB  72,173                              ; lods          %ds:(%rsi),%rax
11045  DB  255,224                             ; jmpq          *%rax
11046
11047PUBLIC _sk_load_u16_be_sse2
11048_sk_load_u16_be_sse2 LABEL PROC
11049  DB  72,173                              ; lods          %ds:(%rsi),%rax
11050  DB  72,139,0                            ; mov           (%rax),%rax
11051  DB  243,15,111,4,248                    ; movdqu        (%rax,%rdi,8),%xmm0
11052  DB  243,15,111,76,248,16                ; movdqu        0x10(%rax,%rdi,8),%xmm1
11053  DB  102,15,111,208                      ; movdqa        %xmm0,%xmm2
11054  DB  102,15,97,209                       ; punpcklwd     %xmm1,%xmm2
11055  DB  102,15,105,193                      ; punpckhwd     %xmm1,%xmm0
11056  DB  102,15,111,202                      ; movdqa        %xmm2,%xmm1
11057  DB  102,15,97,200                       ; punpcklwd     %xmm0,%xmm1
11058  DB  102,15,105,208                      ; punpckhwd     %xmm0,%xmm2
11059  DB  184,128,0,128,55                    ; mov           $0x37800080,%eax
11060  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
11061  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
11062  DB  102,15,111,193                      ; movdqa        %xmm1,%xmm0
11063  DB  102,15,113,240,8                    ; psllw         $0x8,%xmm0
11064  DB  102,15,112,217,78                   ; pshufd        $0x4e,%xmm1,%xmm3
11065  DB  102,15,113,209,8                    ; psrlw         $0x8,%xmm1
11066  DB  102,15,235,200                      ; por           %xmm0,%xmm1
11067  DB  102,69,15,239,201                   ; pxor          %xmm9,%xmm9
11068  DB  102,65,15,97,201                    ; punpcklwd     %xmm9,%xmm1
11069  DB  15,91,193                           ; cvtdq2ps      %xmm1,%xmm0
11070  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
11071  DB  102,15,111,203                      ; movdqa        %xmm3,%xmm1
11072  DB  102,15,113,241,8                    ; psllw         $0x8,%xmm1
11073  DB  102,15,113,211,8                    ; psrlw         $0x8,%xmm3
11074  DB  102,15,235,217                      ; por           %xmm1,%xmm3
11075  DB  102,65,15,97,217                    ; punpcklwd     %xmm9,%xmm3
11076  DB  15,91,203                           ; cvtdq2ps      %xmm3,%xmm1
11077  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
11078  DB  102,68,15,111,210                   ; movdqa        %xmm2,%xmm10
11079  DB  102,65,15,113,242,8                 ; psllw         $0x8,%xmm10
11080  DB  102,15,112,218,78                   ; pshufd        $0x4e,%xmm2,%xmm3
11081  DB  102,15,113,210,8                    ; psrlw         $0x8,%xmm2
11082  DB  102,65,15,235,210                   ; por           %xmm10,%xmm2
11083  DB  102,65,15,97,209                    ; punpcklwd     %xmm9,%xmm2
11084  DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
11085  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
11086  DB  102,68,15,111,211                   ; movdqa        %xmm3,%xmm10
11087  DB  102,65,15,113,242,8                 ; psllw         $0x8,%xmm10
11088  DB  102,15,113,211,8                    ; psrlw         $0x8,%xmm3
11089  DB  102,65,15,235,218                   ; por           %xmm10,%xmm3
11090  DB  102,65,15,97,217                    ; punpcklwd     %xmm9,%xmm3
11091  DB  15,91,219                           ; cvtdq2ps      %xmm3,%xmm3
11092  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
11093  DB  72,173                              ; lods          %ds:(%rsi),%rax
11094  DB  255,224                             ; jmpq          *%rax
11095
11096PUBLIC _sk_store_u16_be_sse2
11097_sk_store_u16_be_sse2 LABEL PROC
11098  DB  72,173                              ; lods          %ds:(%rsi),%rax
11099  DB  72,139,0                            ; mov           (%rax),%rax
11100  DB  185,0,255,127,71                    ; mov           $0x477fff00,%ecx
11101  DB  102,68,15,110,201                   ; movd          %ecx,%xmm9
11102  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
11103  DB  69,15,40,193                        ; movaps        %xmm9,%xmm8
11104  DB  68,15,89,192                        ; mulps         %xmm0,%xmm8
11105  DB  102,69,15,91,192                    ; cvtps2dq      %xmm8,%xmm8
11106  DB  102,65,15,114,240,16                ; pslld         $0x10,%xmm8
11107  DB  102,65,15,114,224,16                ; psrad         $0x10,%xmm8
11108  DB  102,69,15,107,192                   ; packssdw      %xmm8,%xmm8
11109  DB  102,69,15,111,208                   ; movdqa        %xmm8,%xmm10
11110  DB  102,65,15,113,242,8                 ; psllw         $0x8,%xmm10
11111  DB  102,65,15,113,208,8                 ; psrlw         $0x8,%xmm8
11112  DB  102,69,15,235,194                   ; por           %xmm10,%xmm8
11113  DB  69,15,40,209                        ; movaps        %xmm9,%xmm10
11114  DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
11115  DB  102,69,15,91,210                    ; cvtps2dq      %xmm10,%xmm10
11116  DB  102,65,15,114,242,16                ; pslld         $0x10,%xmm10
11117  DB  102,65,15,114,226,16                ; psrad         $0x10,%xmm10
11118  DB  102,69,15,107,210                   ; packssdw      %xmm10,%xmm10
11119  DB  102,69,15,111,218                   ; movdqa        %xmm10,%xmm11
11120  DB  102,65,15,113,243,8                 ; psllw         $0x8,%xmm11
11121  DB  102,65,15,113,210,8                 ; psrlw         $0x8,%xmm10
11122  DB  102,69,15,235,211                   ; por           %xmm11,%xmm10
11123  DB  69,15,40,217                        ; movaps        %xmm9,%xmm11
11124  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
11125  DB  102,69,15,91,219                    ; cvtps2dq      %xmm11,%xmm11
11126  DB  102,65,15,114,243,16                ; pslld         $0x10,%xmm11
11127  DB  102,65,15,114,227,16                ; psrad         $0x10,%xmm11
11128  DB  102,69,15,107,219                   ; packssdw      %xmm11,%xmm11
11129  DB  102,69,15,111,227                   ; movdqa        %xmm11,%xmm12
11130  DB  102,65,15,113,244,8                 ; psllw         $0x8,%xmm12
11131  DB  102,65,15,113,211,8                 ; psrlw         $0x8,%xmm11
11132  DB  102,69,15,235,220                   ; por           %xmm12,%xmm11
11133  DB  68,15,89,203                        ; mulps         %xmm3,%xmm9
11134  DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
11135  DB  102,65,15,114,241,16                ; pslld         $0x10,%xmm9
11136  DB  102,65,15,114,225,16                ; psrad         $0x10,%xmm9
11137  DB  102,69,15,107,201                   ; packssdw      %xmm9,%xmm9
11138  DB  102,69,15,111,225                   ; movdqa        %xmm9,%xmm12
11139  DB  102,65,15,113,244,8                 ; psllw         $0x8,%xmm12
11140  DB  102,65,15,113,209,8                 ; psrlw         $0x8,%xmm9
11141  DB  102,69,15,235,204                   ; por           %xmm12,%xmm9
11142  DB  102,69,15,97,194                    ; punpcklwd     %xmm10,%xmm8
11143  DB  102,69,15,97,217                    ; punpcklwd     %xmm9,%xmm11
11144  DB  102,69,15,111,200                   ; movdqa        %xmm8,%xmm9
11145  DB  102,69,15,98,203                    ; punpckldq     %xmm11,%xmm9
11146  DB  243,68,15,127,12,248                ; movdqu        %xmm9,(%rax,%rdi,8)
11147  DB  102,69,15,106,195                   ; punpckhdq     %xmm11,%xmm8
11148  DB  243,68,15,127,68,248,16             ; movdqu        %xmm8,0x10(%rax,%rdi,8)
11149  DB  72,173                              ; lods          %ds:(%rsi),%rax
11150  DB  255,224                             ; jmpq          *%rax
11151
11152PUBLIC _sk_load_f32_sse2
11153_sk_load_f32_sse2 LABEL PROC
11154  DB  72,173                              ; lods          %ds:(%rsi),%rax
11155  DB  72,139,0                            ; mov           (%rax),%rax
11156  DB  72,137,249                          ; mov           %rdi,%rcx
11157  DB  72,193,225,4                        ; shl           $0x4,%rcx
11158  DB  68,15,16,4,8                        ; movups        (%rax,%rcx,1),%xmm8
11159  DB  15,16,68,8,16                       ; movups        0x10(%rax,%rcx,1),%xmm0
11160  DB  15,16,92,8,32                       ; movups        0x20(%rax,%rcx,1),%xmm3
11161  DB  68,15,16,76,8,48                    ; movups        0x30(%rax,%rcx,1),%xmm9
11162  DB  65,15,40,208                        ; movaps        %xmm8,%xmm2
11163  DB  15,20,208                           ; unpcklps      %xmm0,%xmm2
11164  DB  15,40,203                           ; movaps        %xmm3,%xmm1
11165  DB  65,15,20,201                        ; unpcklps      %xmm9,%xmm1
11166  DB  68,15,21,192                        ; unpckhps      %xmm0,%xmm8
11167  DB  65,15,21,217                        ; unpckhps      %xmm9,%xmm3
11168  DB  15,40,194                           ; movaps        %xmm2,%xmm0
11169  DB  102,15,20,193                       ; unpcklpd      %xmm1,%xmm0
11170  DB  15,18,202                           ; movhlps       %xmm2,%xmm1
11171  DB  65,15,40,208                        ; movaps        %xmm8,%xmm2
11172  DB  102,15,20,211                       ; unpcklpd      %xmm3,%xmm2
11173  DB  65,15,18,216                        ; movhlps       %xmm8,%xmm3
11174  DB  72,173                              ; lods          %ds:(%rsi),%rax
11175  DB  255,224                             ; jmpq          *%rax
11176
11177PUBLIC _sk_store_f32_sse2
11178_sk_store_f32_sse2 LABEL PROC
11179  DB  72,173                              ; lods          %ds:(%rsi),%rax
11180  DB  72,139,0                            ; mov           (%rax),%rax
11181  DB  72,137,249                          ; mov           %rdi,%rcx
11182  DB  72,193,225,4                        ; shl           $0x4,%rcx
11183  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
11184  DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
11185  DB  68,15,20,201                        ; unpcklps      %xmm1,%xmm9
11186  DB  68,15,40,210                        ; movaps        %xmm2,%xmm10
11187  DB  68,15,40,218                        ; movaps        %xmm2,%xmm11
11188  DB  68,15,20,219                        ; unpcklps      %xmm3,%xmm11
11189  DB  68,15,21,193                        ; unpckhps      %xmm1,%xmm8
11190  DB  68,15,21,211                        ; unpckhps      %xmm3,%xmm10
11191  DB  69,15,40,225                        ; movaps        %xmm9,%xmm12
11192  DB  102,69,15,20,227                    ; unpcklpd      %xmm11,%xmm12
11193  DB  69,15,18,217                        ; movhlps       %xmm9,%xmm11
11194  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
11195  DB  102,69,15,20,202                    ; unpcklpd      %xmm10,%xmm9
11196  DB  69,15,18,208                        ; movhlps       %xmm8,%xmm10
11197  DB  102,68,15,17,36,8                   ; movupd        %xmm12,(%rax,%rcx,1)
11198  DB  68,15,17,92,8,16                    ; movups        %xmm11,0x10(%rax,%rcx,1)
11199  DB  102,68,15,17,76,8,32                ; movupd        %xmm9,0x20(%rax,%rcx,1)
11200  DB  68,15,17,84,8,48                    ; movups        %xmm10,0x30(%rax,%rcx,1)
11201  DB  72,173                              ; lods          %ds:(%rsi),%rax
11202  DB  255,224                             ; jmpq          *%rax
11203
11204PUBLIC _sk_clamp_x_sse2
11205_sk_clamp_x_sse2 LABEL PROC
11206  DB  72,173                              ; lods          %ds:(%rsi),%rax
11207  DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
11208  DB  68,15,95,192                        ; maxps         %xmm0,%xmm8
11209  DB  243,68,15,16,8                      ; movss         (%rax),%xmm9
11210  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
11211  DB  102,15,118,192                      ; pcmpeqd       %xmm0,%xmm0
11212  DB  102,65,15,254,193                   ; paddd         %xmm9,%xmm0
11213  DB  68,15,93,192                        ; minps         %xmm0,%xmm8
11214  DB  72,173                              ; lods          %ds:(%rsi),%rax
11215  DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
11216  DB  255,224                             ; jmpq          *%rax
11217
11218PUBLIC _sk_clamp_y_sse2
11219_sk_clamp_y_sse2 LABEL PROC
11220  DB  72,173                              ; lods          %ds:(%rsi),%rax
11221  DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
11222  DB  68,15,95,193                        ; maxps         %xmm1,%xmm8
11223  DB  243,68,15,16,8                      ; movss         (%rax),%xmm9
11224  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
11225  DB  102,15,118,201                      ; pcmpeqd       %xmm1,%xmm1
11226  DB  102,65,15,254,201                   ; paddd         %xmm9,%xmm1
11227  DB  68,15,93,193                        ; minps         %xmm1,%xmm8
11228  DB  72,173                              ; lods          %ds:(%rsi),%rax
11229  DB  65,15,40,200                        ; movaps        %xmm8,%xmm1
11230  DB  255,224                             ; jmpq          *%rax
11231
11232PUBLIC _sk_repeat_x_sse2
11233_sk_repeat_x_sse2 LABEL PROC
11234  DB  72,173                              ; lods          %ds:(%rsi),%rax
11235  DB  243,68,15,16,0                      ; movss         (%rax),%xmm8
11236  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
11237  DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
11238  DB  69,15,94,200                        ; divps         %xmm8,%xmm9
11239  DB  243,69,15,91,209                    ; cvttps2dq     %xmm9,%xmm10
11240  DB  69,15,91,210                        ; cvtdq2ps      %xmm10,%xmm10
11241  DB  69,15,194,202,1                     ; cmpltps       %xmm10,%xmm9
11242  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
11243  DB  102,68,15,110,216                   ; movd          %eax,%xmm11
11244  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
11245  DB  69,15,84,217                        ; andps         %xmm9,%xmm11
11246  DB  69,15,92,211                        ; subps         %xmm11,%xmm10
11247  DB  69,15,89,208                        ; mulps         %xmm8,%xmm10
11248  DB  65,15,92,194                        ; subps         %xmm10,%xmm0
11249  DB  102,69,15,118,201                   ; pcmpeqd       %xmm9,%xmm9
11250  DB  102,69,15,254,200                   ; paddd         %xmm8,%xmm9
11251  DB  65,15,93,193                        ; minps         %xmm9,%xmm0
11252  DB  72,173                              ; lods          %ds:(%rsi),%rax
11253  DB  255,224                             ; jmpq          *%rax
11254
11255PUBLIC _sk_repeat_y_sse2
11256_sk_repeat_y_sse2 LABEL PROC
11257  DB  72,173                              ; lods          %ds:(%rsi),%rax
11258  DB  243,68,15,16,0                      ; movss         (%rax),%xmm8
11259  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
11260  DB  68,15,40,201                        ; movaps        %xmm1,%xmm9
11261  DB  69,15,94,200                        ; divps         %xmm8,%xmm9
11262  DB  243,69,15,91,209                    ; cvttps2dq     %xmm9,%xmm10
11263  DB  69,15,91,210                        ; cvtdq2ps      %xmm10,%xmm10
11264  DB  69,15,194,202,1                     ; cmpltps       %xmm10,%xmm9
11265  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
11266  DB  102,68,15,110,216                   ; movd          %eax,%xmm11
11267  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
11268  DB  69,15,84,217                        ; andps         %xmm9,%xmm11
11269  DB  69,15,92,211                        ; subps         %xmm11,%xmm10
11270  DB  69,15,89,208                        ; mulps         %xmm8,%xmm10
11271  DB  65,15,92,202                        ; subps         %xmm10,%xmm1
11272  DB  102,69,15,118,201                   ; pcmpeqd       %xmm9,%xmm9
11273  DB  102,69,15,254,200                   ; paddd         %xmm8,%xmm9
11274  DB  65,15,93,201                        ; minps         %xmm9,%xmm1
11275  DB  72,173                              ; lods          %ds:(%rsi),%rax
11276  DB  255,224                             ; jmpq          *%rax
11277
11278PUBLIC _sk_mirror_x_sse2
11279_sk_mirror_x_sse2 LABEL PROC
11280  DB  72,173                              ; lods          %ds:(%rsi),%rax
11281  DB  243,68,15,16,8                      ; movss         (%rax),%xmm9
11282  DB  69,15,40,193                        ; movaps        %xmm9,%xmm8
11283  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
11284  DB  65,15,92,192                        ; subps         %xmm8,%xmm0
11285  DB  243,69,15,88,201                    ; addss         %xmm9,%xmm9
11286  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
11287  DB  68,15,40,208                        ; movaps        %xmm0,%xmm10
11288  DB  69,15,94,209                        ; divps         %xmm9,%xmm10
11289  DB  243,69,15,91,218                    ; cvttps2dq     %xmm10,%xmm11
11290  DB  69,15,91,219                        ; cvtdq2ps      %xmm11,%xmm11
11291  DB  69,15,194,211,1                     ; cmpltps       %xmm11,%xmm10
11292  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
11293  DB  102,68,15,110,224                   ; movd          %eax,%xmm12
11294  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
11295  DB  69,15,84,226                        ; andps         %xmm10,%xmm12
11296  DB  69,15,87,210                        ; xorps         %xmm10,%xmm10
11297  DB  69,15,92,220                        ; subps         %xmm12,%xmm11
11298  DB  69,15,89,217                        ; mulps         %xmm9,%xmm11
11299  DB  65,15,92,195                        ; subps         %xmm11,%xmm0
11300  DB  65,15,92,192                        ; subps         %xmm8,%xmm0
11301  DB  68,15,92,208                        ; subps         %xmm0,%xmm10
11302  DB  65,15,84,194                        ; andps         %xmm10,%xmm0
11303  DB  102,69,15,118,201                   ; pcmpeqd       %xmm9,%xmm9
11304  DB  102,69,15,254,200                   ; paddd         %xmm8,%xmm9
11305  DB  65,15,93,193                        ; minps         %xmm9,%xmm0
11306  DB  72,173                              ; lods          %ds:(%rsi),%rax
11307  DB  255,224                             ; jmpq          *%rax
11308
11309PUBLIC _sk_mirror_y_sse2
11310_sk_mirror_y_sse2 LABEL PROC
11311  DB  72,173                              ; lods          %ds:(%rsi),%rax
11312  DB  243,68,15,16,8                      ; movss         (%rax),%xmm9
11313  DB  69,15,40,193                        ; movaps        %xmm9,%xmm8
11314  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
11315  DB  65,15,92,200                        ; subps         %xmm8,%xmm1
11316  DB  243,69,15,88,201                    ; addss         %xmm9,%xmm9
11317  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
11318  DB  68,15,40,209                        ; movaps        %xmm1,%xmm10
11319  DB  69,15,94,209                        ; divps         %xmm9,%xmm10
11320  DB  243,69,15,91,218                    ; cvttps2dq     %xmm10,%xmm11
11321  DB  69,15,91,219                        ; cvtdq2ps      %xmm11,%xmm11
11322  DB  69,15,194,211,1                     ; cmpltps       %xmm11,%xmm10
11323  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
11324  DB  102,68,15,110,224                   ; movd          %eax,%xmm12
11325  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
11326  DB  69,15,84,226                        ; andps         %xmm10,%xmm12
11327  DB  69,15,87,210                        ; xorps         %xmm10,%xmm10
11328  DB  69,15,92,220                        ; subps         %xmm12,%xmm11
11329  DB  69,15,89,217                        ; mulps         %xmm9,%xmm11
11330  DB  65,15,92,203                        ; subps         %xmm11,%xmm1
11331  DB  65,15,92,200                        ; subps         %xmm8,%xmm1
11332  DB  68,15,92,209                        ; subps         %xmm1,%xmm10
11333  DB  65,15,84,202                        ; andps         %xmm10,%xmm1
11334  DB  102,69,15,118,201                   ; pcmpeqd       %xmm9,%xmm9
11335  DB  102,69,15,254,200                   ; paddd         %xmm8,%xmm9
11336  DB  65,15,93,201                        ; minps         %xmm9,%xmm1
11337  DB  72,173                              ; lods          %ds:(%rsi),%rax
11338  DB  255,224                             ; jmpq          *%rax
11339
11340PUBLIC _sk_luminance_to_alpha_sse2
11341_sk_luminance_to_alpha_sse2 LABEL PROC
11342  DB  184,208,179,89,62                   ; mov           $0x3e59b3d0,%eax
11343  DB  102,15,110,216                      ; movd          %eax,%xmm3
11344  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
11345  DB  15,89,216                           ; mulps         %xmm0,%xmm3
11346  DB  184,89,23,55,63                     ; mov           $0x3f371759,%eax
11347  DB  102,15,110,192                      ; movd          %eax,%xmm0
11348  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
11349  DB  15,89,193                           ; mulps         %xmm1,%xmm0
11350  DB  15,88,195                           ; addps         %xmm3,%xmm0
11351  DB  184,152,221,147,61                  ; mov           $0x3d93dd98,%eax
11352  DB  102,15,110,216                      ; movd          %eax,%xmm3
11353  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
11354  DB  15,89,218                           ; mulps         %xmm2,%xmm3
11355  DB  15,88,216                           ; addps         %xmm0,%xmm3
11356  DB  72,173                              ; lods          %ds:(%rsi),%rax
11357  DB  15,87,192                           ; xorps         %xmm0,%xmm0
11358  DB  15,87,201                           ; xorps         %xmm1,%xmm1
11359  DB  15,87,210                           ; xorps         %xmm2,%xmm2
11360  DB  255,224                             ; jmpq          *%rax
11361
11362PUBLIC _sk_matrix_2x3_sse2
11363_sk_matrix_2x3_sse2 LABEL PROC
11364  DB  68,15,40,201                        ; movaps        %xmm1,%xmm9
11365  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
11366  DB  72,173                              ; lods          %ds:(%rsi),%rax
11367  DB  243,15,16,0                         ; movss         (%rax),%xmm0
11368  DB  243,15,16,72,4                      ; movss         0x4(%rax),%xmm1
11369  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
11370  DB  243,68,15,16,80,8                   ; movss         0x8(%rax),%xmm10
11371  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
11372  DB  243,68,15,16,88,16                  ; movss         0x10(%rax),%xmm11
11373  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
11374  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
11375  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
11376  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
11377  DB  65,15,88,194                        ; addps         %xmm10,%xmm0
11378  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
11379  DB  243,68,15,16,80,12                  ; movss         0xc(%rax),%xmm10
11380  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
11381  DB  243,68,15,16,88,20                  ; movss         0x14(%rax),%xmm11
11382  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
11383  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
11384  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
11385  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
11386  DB  65,15,88,202                        ; addps         %xmm10,%xmm1
11387  DB  72,173                              ; lods          %ds:(%rsi),%rax
11388  DB  255,224                             ; jmpq          *%rax
11389
11390PUBLIC _sk_matrix_3x4_sse2
11391_sk_matrix_3x4_sse2 LABEL PROC
11392  DB  68,15,40,201                        ; movaps        %xmm1,%xmm9
11393  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
11394  DB  72,173                              ; lods          %ds:(%rsi),%rax
11395  DB  243,15,16,0                         ; movss         (%rax),%xmm0
11396  DB  243,15,16,72,4                      ; movss         0x4(%rax),%xmm1
11397  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
11398  DB  243,68,15,16,80,12                  ; movss         0xc(%rax),%xmm10
11399  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
11400  DB  243,68,15,16,88,24                  ; movss         0x18(%rax),%xmm11
11401  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
11402  DB  243,68,15,16,96,36                  ; movss         0x24(%rax),%xmm12
11403  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
11404  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
11405  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
11406  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
11407  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
11408  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
11409  DB  65,15,88,194                        ; addps         %xmm10,%xmm0
11410  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
11411  DB  243,68,15,16,80,16                  ; movss         0x10(%rax),%xmm10
11412  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
11413  DB  243,68,15,16,88,28                  ; movss         0x1c(%rax),%xmm11
11414  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
11415  DB  243,68,15,16,96,40                  ; movss         0x28(%rax),%xmm12
11416  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
11417  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
11418  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
11419  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
11420  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
11421  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
11422  DB  65,15,88,202                        ; addps         %xmm10,%xmm1
11423  DB  243,68,15,16,80,8                   ; movss         0x8(%rax),%xmm10
11424  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
11425  DB  243,68,15,16,88,20                  ; movss         0x14(%rax),%xmm11
11426  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
11427  DB  243,68,15,16,96,32                  ; movss         0x20(%rax),%xmm12
11428  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
11429  DB  243,68,15,16,104,44                 ; movss         0x2c(%rax),%xmm13
11430  DB  69,15,198,237,0                     ; shufps        $0x0,%xmm13,%xmm13
11431  DB  68,15,89,226                        ; mulps         %xmm2,%xmm12
11432  DB  69,15,88,229                        ; addps         %xmm13,%xmm12
11433  DB  69,15,89,217                        ; mulps         %xmm9,%xmm11
11434  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
11435  DB  69,15,89,208                        ; mulps         %xmm8,%xmm10
11436  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
11437  DB  72,173                              ; lods          %ds:(%rsi),%rax
11438  DB  65,15,40,210                        ; movaps        %xmm10,%xmm2
11439  DB  255,224                             ; jmpq          *%rax
11440
11441PUBLIC _sk_matrix_4x5_sse2
11442_sk_matrix_4x5_sse2 LABEL PROC
11443  DB  68,15,40,201                        ; movaps        %xmm1,%xmm9
11444  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
11445  DB  72,173                              ; lods          %ds:(%rsi),%rax
11446  DB  243,15,16,0                         ; movss         (%rax),%xmm0
11447  DB  243,15,16,72,4                      ; movss         0x4(%rax),%xmm1
11448  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
11449  DB  243,68,15,16,80,16                  ; movss         0x10(%rax),%xmm10
11450  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
11451  DB  243,68,15,16,88,32                  ; movss         0x20(%rax),%xmm11
11452  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
11453  DB  243,68,15,16,96,48                  ; movss         0x30(%rax),%xmm12
11454  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
11455  DB  243,68,15,16,104,64                 ; movss         0x40(%rax),%xmm13
11456  DB  69,15,198,237,0                     ; shufps        $0x0,%xmm13,%xmm13
11457  DB  68,15,89,227                        ; mulps         %xmm3,%xmm12
11458  DB  69,15,88,229                        ; addps         %xmm13,%xmm12
11459  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
11460  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
11461  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
11462  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
11463  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
11464  DB  65,15,88,194                        ; addps         %xmm10,%xmm0
11465  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
11466  DB  243,68,15,16,80,20                  ; movss         0x14(%rax),%xmm10
11467  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
11468  DB  243,68,15,16,88,36                  ; movss         0x24(%rax),%xmm11
11469  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
11470  DB  243,68,15,16,96,52                  ; movss         0x34(%rax),%xmm12
11471  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
11472  DB  243,68,15,16,104,68                 ; movss         0x44(%rax),%xmm13
11473  DB  69,15,198,237,0                     ; shufps        $0x0,%xmm13,%xmm13
11474  DB  68,15,89,227                        ; mulps         %xmm3,%xmm12
11475  DB  69,15,88,229                        ; addps         %xmm13,%xmm12
11476  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
11477  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
11478  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
11479  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
11480  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
11481  DB  65,15,88,202                        ; addps         %xmm10,%xmm1
11482  DB  243,68,15,16,80,8                   ; movss         0x8(%rax),%xmm10
11483  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
11484  DB  243,68,15,16,88,24                  ; movss         0x18(%rax),%xmm11
11485  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
11486  DB  243,68,15,16,96,40                  ; movss         0x28(%rax),%xmm12
11487  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
11488  DB  243,68,15,16,104,56                 ; movss         0x38(%rax),%xmm13
11489  DB  69,15,198,237,0                     ; shufps        $0x0,%xmm13,%xmm13
11490  DB  243,68,15,16,112,72                 ; movss         0x48(%rax),%xmm14
11491  DB  69,15,198,246,0                     ; shufps        $0x0,%xmm14,%xmm14
11492  DB  68,15,89,235                        ; mulps         %xmm3,%xmm13
11493  DB  69,15,88,238                        ; addps         %xmm14,%xmm13
11494  DB  68,15,89,226                        ; mulps         %xmm2,%xmm12
11495  DB  69,15,88,229                        ; addps         %xmm13,%xmm12
11496  DB  69,15,89,217                        ; mulps         %xmm9,%xmm11
11497  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
11498  DB  69,15,89,208                        ; mulps         %xmm8,%xmm10
11499  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
11500  DB  243,68,15,16,88,12                  ; movss         0xc(%rax),%xmm11
11501  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
11502  DB  243,68,15,16,96,28                  ; movss         0x1c(%rax),%xmm12
11503  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
11504  DB  243,68,15,16,104,44                 ; movss         0x2c(%rax),%xmm13
11505  DB  69,15,198,237,0                     ; shufps        $0x0,%xmm13,%xmm13
11506  DB  243,68,15,16,112,60                 ; movss         0x3c(%rax),%xmm14
11507  DB  69,15,198,246,0                     ; shufps        $0x0,%xmm14,%xmm14
11508  DB  243,68,15,16,120,76                 ; movss         0x4c(%rax),%xmm15
11509  DB  69,15,198,255,0                     ; shufps        $0x0,%xmm15,%xmm15
11510  DB  68,15,89,243                        ; mulps         %xmm3,%xmm14
11511  DB  69,15,88,247                        ; addps         %xmm15,%xmm14
11512  DB  68,15,89,234                        ; mulps         %xmm2,%xmm13
11513  DB  69,15,88,238                        ; addps         %xmm14,%xmm13
11514  DB  69,15,89,225                        ; mulps         %xmm9,%xmm12
11515  DB  69,15,88,229                        ; addps         %xmm13,%xmm12
11516  DB  69,15,89,216                        ; mulps         %xmm8,%xmm11
11517  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
11518  DB  72,173                              ; lods          %ds:(%rsi),%rax
11519  DB  65,15,40,210                        ; movaps        %xmm10,%xmm2
11520  DB  65,15,40,219                        ; movaps        %xmm11,%xmm3
11521  DB  255,224                             ; jmpq          *%rax
11522
11523PUBLIC _sk_matrix_perspective_sse2
11524_sk_matrix_perspective_sse2 LABEL PROC
11525  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
11526  DB  72,173                              ; lods          %ds:(%rsi),%rax
11527  DB  243,15,16,0                         ; movss         (%rax),%xmm0
11528  DB  243,68,15,16,72,4                   ; movss         0x4(%rax),%xmm9
11529  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
11530  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
11531  DB  243,68,15,16,80,8                   ; movss         0x8(%rax),%xmm10
11532  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
11533  DB  68,15,89,201                        ; mulps         %xmm1,%xmm9
11534  DB  69,15,88,202                        ; addps         %xmm10,%xmm9
11535  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
11536  DB  65,15,88,193                        ; addps         %xmm9,%xmm0
11537  DB  243,68,15,16,72,12                  ; movss         0xc(%rax),%xmm9
11538  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
11539  DB  243,68,15,16,80,16                  ; movss         0x10(%rax),%xmm10
11540  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
11541  DB  243,68,15,16,88,20                  ; movss         0x14(%rax),%xmm11
11542  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
11543  DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
11544  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
11545  DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
11546  DB  69,15,88,202                        ; addps         %xmm10,%xmm9
11547  DB  243,68,15,16,80,24                  ; movss         0x18(%rax),%xmm10
11548  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
11549  DB  243,68,15,16,88,28                  ; movss         0x1c(%rax),%xmm11
11550  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
11551  DB  243,68,15,16,96,32                  ; movss         0x20(%rax),%xmm12
11552  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
11553  DB  68,15,89,217                        ; mulps         %xmm1,%xmm11
11554  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
11555  DB  69,15,89,208                        ; mulps         %xmm8,%xmm10
11556  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
11557  DB  65,15,83,202                        ; rcpps         %xmm10,%xmm1
11558  DB  15,89,193                           ; mulps         %xmm1,%xmm0
11559  DB  68,15,89,201                        ; mulps         %xmm1,%xmm9
11560  DB  72,173                              ; lods          %ds:(%rsi),%rax
11561  DB  65,15,40,201                        ; movaps        %xmm9,%xmm1
11562  DB  255,224                             ; jmpq          *%rax
11563
11564PUBLIC _sk_linear_gradient_2stops_sse2
11565_sk_linear_gradient_2stops_sse2 LABEL PROC
11566  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
11567  DB  72,173                              ; lods          %ds:(%rsi),%rax
11568  DB  243,15,16,64,16                     ; movss         0x10(%rax),%xmm0
11569  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
11570  DB  243,15,16,8                         ; movss         (%rax),%xmm1
11571  DB  243,15,16,80,4                      ; movss         0x4(%rax),%xmm2
11572  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
11573  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
11574  DB  15,88,193                           ; addps         %xmm1,%xmm0
11575  DB  243,15,16,72,20                     ; movss         0x14(%rax),%xmm1
11576  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
11577  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
11578  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
11579  DB  15,88,202                           ; addps         %xmm2,%xmm1
11580  DB  243,15,16,80,24                     ; movss         0x18(%rax),%xmm2
11581  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
11582  DB  243,15,16,88,8                      ; movss         0x8(%rax),%xmm3
11583  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
11584  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
11585  DB  15,88,211                           ; addps         %xmm3,%xmm2
11586  DB  243,15,16,88,28                     ; movss         0x1c(%rax),%xmm3
11587  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
11588  DB  243,68,15,16,72,12                  ; movss         0xc(%rax),%xmm9
11589  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
11590  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
11591  DB  65,15,88,217                        ; addps         %xmm9,%xmm3
11592  DB  72,173                              ; lods          %ds:(%rsi),%rax
11593  DB  255,224                             ; jmpq          *%rax
11594ENDIF
11595END
11596