SkJumper_generated_win.S revision 5f80485a53e56e92d0914131b03ae33e49c163a8
1; Copyright 2017 Google Inc.
2;
3; Use of this source code is governed by a BSD-style license that can be
4; found in the LICENSE file.
5
6; This file is generated semi-automatically with this command:
7;   $ src/jumper/build_stages.py
8
9IFDEF RAX
10_text SEGMENT
11
12PUBLIC _sk_start_pipeline_hsw
13_sk_start_pipeline_hsw LABEL PROC
14  DB  65,87                               ; push          %r15
15  DB  65,86                               ; push          %r14
16  DB  65,85                               ; push          %r13
17  DB  65,84                               ; push          %r12
18  DB  86                                  ; push          %rsi
19  DB  87                                  ; push          %rdi
20  DB  83                                  ; push          %rbx
21  DB  72,129,236,160,0,0,0                ; sub           $0xa0,%rsp
22  DB  197,120,41,188,36,144,0,0,0         ; vmovaps       %xmm15,0x90(%rsp)
23  DB  197,120,41,180,36,128,0,0,0         ; vmovaps       %xmm14,0x80(%rsp)
24  DB  197,120,41,108,36,112               ; vmovaps       %xmm13,0x70(%rsp)
25  DB  197,120,41,100,36,96                ; vmovaps       %xmm12,0x60(%rsp)
26  DB  197,120,41,92,36,80                 ; vmovaps       %xmm11,0x50(%rsp)
27  DB  197,120,41,84,36,64                 ; vmovaps       %xmm10,0x40(%rsp)
28  DB  197,120,41,76,36,48                 ; vmovaps       %xmm9,0x30(%rsp)
29  DB  197,120,41,68,36,32                 ; vmovaps       %xmm8,0x20(%rsp)
30  DB  197,248,41,124,36,16                ; vmovaps       %xmm7,0x10(%rsp)
31  DB  197,248,41,52,36                    ; vmovaps       %xmm6,(%rsp)
32  DB  77,137,205                          ; mov           %r9,%r13
33  DB  77,137,198                          ; mov           %r8,%r14
34  DB  72,137,203                          ; mov           %rcx,%rbx
35  DB  72,137,214                          ; mov           %rdx,%rsi
36  DB  72,173                              ; lods          %ds:(%rsi),%rax
37  DB  73,137,199                          ; mov           %rax,%r15
38  DB  73,137,244                          ; mov           %rsi,%r12
39  DB  72,141,67,8                         ; lea           0x8(%rbx),%rax
40  DB  76,57,232                           ; cmp           %r13,%rax
41  DB  118,5                               ; jbe           75 <_sk_start_pipeline_hsw+0x75>
42  DB  72,137,223                          ; mov           %rbx,%rdi
43  DB  235,65                              ; jmp           b6 <_sk_start_pipeline_hsw+0xb6>
44  DB  185,0,0,0,0                         ; mov           $0x0,%ecx
45  DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
46  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
47  DB  197,236,87,210                      ; vxorps        %ymm2,%ymm2,%ymm2
48  DB  197,228,87,219                      ; vxorps        %ymm3,%ymm3,%ymm3
49  DB  197,220,87,228                      ; vxorps        %ymm4,%ymm4,%ymm4
50  DB  197,212,87,237                      ; vxorps        %ymm5,%ymm5,%ymm5
51  DB  197,204,87,246                      ; vxorps        %ymm6,%ymm6,%ymm6
52  DB  197,196,87,255                      ; vxorps        %ymm7,%ymm7,%ymm7
53  DB  72,137,223                          ; mov           %rbx,%rdi
54  DB  76,137,230                          ; mov           %r12,%rsi
55  DB  76,137,242                          ; mov           %r14,%rdx
56  DB  65,255,215                          ; callq         *%r15
57  DB  72,141,123,8                        ; lea           0x8(%rbx),%rdi
58  DB  72,131,195,16                       ; add           $0x10,%rbx
59  DB  76,57,235                           ; cmp           %r13,%rbx
60  DB  72,137,251                          ; mov           %rdi,%rbx
61  DB  118,191                             ; jbe           75 <_sk_start_pipeline_hsw+0x75>
62  DB  76,137,233                          ; mov           %r13,%rcx
63  DB  72,41,249                           ; sub           %rdi,%rcx
64  DB  116,41                              ; je            e7 <_sk_start_pipeline_hsw+0xe7>
65  DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
66  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
67  DB  197,236,87,210                      ; vxorps        %ymm2,%ymm2,%ymm2
68  DB  197,228,87,219                      ; vxorps        %ymm3,%ymm3,%ymm3
69  DB  197,220,87,228                      ; vxorps        %ymm4,%ymm4,%ymm4
70  DB  197,212,87,237                      ; vxorps        %ymm5,%ymm5,%ymm5
71  DB  197,204,87,246                      ; vxorps        %ymm6,%ymm6,%ymm6
72  DB  197,196,87,255                      ; vxorps        %ymm7,%ymm7,%ymm7
73  DB  76,137,230                          ; mov           %r12,%rsi
74  DB  76,137,242                          ; mov           %r14,%rdx
75  DB  65,255,215                          ; callq         *%r15
76  DB  76,137,232                          ; mov           %r13,%rax
77  DB  197,248,40,52,36                    ; vmovaps       (%rsp),%xmm6
78  DB  197,248,40,124,36,16                ; vmovaps       0x10(%rsp),%xmm7
79  DB  197,120,40,68,36,32                 ; vmovaps       0x20(%rsp),%xmm8
80  DB  197,120,40,76,36,48                 ; vmovaps       0x30(%rsp),%xmm9
81  DB  197,120,40,84,36,64                 ; vmovaps       0x40(%rsp),%xmm10
82  DB  197,120,40,92,36,80                 ; vmovaps       0x50(%rsp),%xmm11
83  DB  197,120,40,100,36,96                ; vmovaps       0x60(%rsp),%xmm12
84  DB  197,120,40,108,36,112               ; vmovaps       0x70(%rsp),%xmm13
85  DB  197,120,40,180,36,128,0,0,0         ; vmovaps       0x80(%rsp),%xmm14
86  DB  197,120,40,188,36,144,0,0,0         ; vmovaps       0x90(%rsp),%xmm15
87  DB  72,129,196,160,0,0,0                ; add           $0xa0,%rsp
88  DB  91                                  ; pop           %rbx
89  DB  95                                  ; pop           %rdi
90  DB  94                                  ; pop           %rsi
91  DB  65,92                               ; pop           %r12
92  DB  65,93                               ; pop           %r13
93  DB  65,94                               ; pop           %r14
94  DB  65,95                               ; pop           %r15
95  DB  197,248,119                         ; vzeroupper
96  DB  195                                 ; retq
97
98PUBLIC _sk_just_return_hsw
99_sk_just_return_hsw LABEL PROC
100  DB  195                                 ; retq
101
102PUBLIC _sk_seed_shader_hsw
103_sk_seed_shader_hsw LABEL PROC
104  DB  72,173                              ; lods          %ds:(%rsi),%rax
105  DB  197,249,110,199                     ; vmovd         %edi,%xmm0
106  DB  196,226,125,88,192                  ; vpbroadcastd  %xmm0,%ymm0
107  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
108  DB  65,184,0,0,0,63                     ; mov           $0x3f000000,%r8d
109  DB  196,193,121,110,200                 ; vmovd         %r8d,%xmm1
110  DB  196,226,125,88,201                  ; vpbroadcastd  %xmm1,%ymm1
111  DB  197,252,88,193                      ; vaddps        %ymm1,%ymm0,%ymm0
112  DB  197,252,88,2                        ; vaddps        (%rdx),%ymm0,%ymm0
113  DB  196,226,125,24,16                   ; vbroadcastss  (%rax),%ymm2
114  DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
115  DB  197,236,88,201                      ; vaddps        %ymm1,%ymm2,%ymm1
116  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
117  DB  197,249,110,208                     ; vmovd         %eax,%xmm2
118  DB  196,226,125,88,210                  ; vpbroadcastd  %xmm2,%ymm2
119  DB  72,173                              ; lods          %ds:(%rsi),%rax
120  DB  197,228,87,219                      ; vxorps        %ymm3,%ymm3,%ymm3
121  DB  197,220,87,228                      ; vxorps        %ymm4,%ymm4,%ymm4
122  DB  197,212,87,237                      ; vxorps        %ymm5,%ymm5,%ymm5
123  DB  197,204,87,246                      ; vxorps        %ymm6,%ymm6,%ymm6
124  DB  197,196,87,255                      ; vxorps        %ymm7,%ymm7,%ymm7
125  DB  255,224                             ; jmpq          *%rax
126
127PUBLIC _sk_constant_color_hsw
128_sk_constant_color_hsw LABEL PROC
129  DB  72,173                              ; lods          %ds:(%rsi),%rax
130  DB  196,226,125,24,0                    ; vbroadcastss  (%rax),%ymm0
131  DB  196,226,125,24,72,4                 ; vbroadcastss  0x4(%rax),%ymm1
132  DB  196,226,125,24,80,8                 ; vbroadcastss  0x8(%rax),%ymm2
133  DB  196,226,125,24,88,12                ; vbroadcastss  0xc(%rax),%ymm3
134  DB  72,173                              ; lods          %ds:(%rsi),%rax
135  DB  255,224                             ; jmpq          *%rax
136
137PUBLIC _sk_clear_hsw
138_sk_clear_hsw LABEL PROC
139  DB  72,173                              ; lods          %ds:(%rsi),%rax
140  DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
141  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
142  DB  197,236,87,210                      ; vxorps        %ymm2,%ymm2,%ymm2
143  DB  197,228,87,219                      ; vxorps        %ymm3,%ymm3,%ymm3
144  DB  255,224                             ; jmpq          *%rax
145
146PUBLIC _sk_plus__hsw
147_sk_plus__hsw LABEL PROC
148  DB  197,252,88,196                      ; vaddps        %ymm4,%ymm0,%ymm0
149  DB  197,244,88,205                      ; vaddps        %ymm5,%ymm1,%ymm1
150  DB  197,236,88,214                      ; vaddps        %ymm6,%ymm2,%ymm2
151  DB  197,228,88,223                      ; vaddps        %ymm7,%ymm3,%ymm3
152  DB  72,173                              ; lods          %ds:(%rsi),%rax
153  DB  255,224                             ; jmpq          *%rax
154
155PUBLIC _sk_srcover_hsw
156_sk_srcover_hsw LABEL PROC
157  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
158  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
159  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
160  DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
161  DB  196,194,93,184,192                  ; vfmadd231ps   %ymm8,%ymm4,%ymm0
162  DB  196,194,85,184,200                  ; vfmadd231ps   %ymm8,%ymm5,%ymm1
163  DB  196,194,77,184,208                  ; vfmadd231ps   %ymm8,%ymm6,%ymm2
164  DB  196,194,69,184,216                  ; vfmadd231ps   %ymm8,%ymm7,%ymm3
165  DB  72,173                              ; lods          %ds:(%rsi),%rax
166  DB  255,224                             ; jmpq          *%rax
167
168PUBLIC _sk_dstover_hsw
169_sk_dstover_hsw LABEL PROC
170  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
171  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
172  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
173  DB  197,60,92,199                       ; vsubps        %ymm7,%ymm8,%ymm8
174  DB  196,226,61,168,196                  ; vfmadd213ps   %ymm4,%ymm8,%ymm0
175  DB  196,226,61,168,205                  ; vfmadd213ps   %ymm5,%ymm8,%ymm1
176  DB  196,226,61,168,214                  ; vfmadd213ps   %ymm6,%ymm8,%ymm2
177  DB  196,226,61,168,223                  ; vfmadd213ps   %ymm7,%ymm8,%ymm3
178  DB  72,173                              ; lods          %ds:(%rsi),%rax
179  DB  255,224                             ; jmpq          *%rax
180
181PUBLIC _sk_clamp_0_hsw
182_sk_clamp_0_hsw LABEL PROC
183  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
184  DB  196,193,124,95,192                  ; vmaxps        %ymm8,%ymm0,%ymm0
185  DB  196,193,116,95,200                  ; vmaxps        %ymm8,%ymm1,%ymm1
186  DB  196,193,108,95,208                  ; vmaxps        %ymm8,%ymm2,%ymm2
187  DB  196,193,100,95,216                  ; vmaxps        %ymm8,%ymm3,%ymm3
188  DB  72,173                              ; lods          %ds:(%rsi),%rax
189  DB  255,224                             ; jmpq          *%rax
190
191PUBLIC _sk_clamp_1_hsw
192_sk_clamp_1_hsw LABEL PROC
193  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
194  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
195  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
196  DB  196,193,124,93,192                  ; vminps        %ymm8,%ymm0,%ymm0
197  DB  196,193,116,93,200                  ; vminps        %ymm8,%ymm1,%ymm1
198  DB  196,193,108,93,208                  ; vminps        %ymm8,%ymm2,%ymm2
199  DB  196,193,100,93,216                  ; vminps        %ymm8,%ymm3,%ymm3
200  DB  72,173                              ; lods          %ds:(%rsi),%rax
201  DB  255,224                             ; jmpq          *%rax
202
203PUBLIC _sk_clamp_a_hsw
204_sk_clamp_a_hsw LABEL PROC
205  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
206  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
207  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
208  DB  196,193,100,93,216                  ; vminps        %ymm8,%ymm3,%ymm3
209  DB  197,252,93,195                      ; vminps        %ymm3,%ymm0,%ymm0
210  DB  197,244,93,203                      ; vminps        %ymm3,%ymm1,%ymm1
211  DB  197,236,93,211                      ; vminps        %ymm3,%ymm2,%ymm2
212  DB  72,173                              ; lods          %ds:(%rsi),%rax
213  DB  255,224                             ; jmpq          *%rax
214
215PUBLIC _sk_set_rgb_hsw
216_sk_set_rgb_hsw LABEL PROC
217  DB  72,173                              ; lods          %ds:(%rsi),%rax
218  DB  196,226,125,24,0                    ; vbroadcastss  (%rax),%ymm0
219  DB  196,226,125,24,72,4                 ; vbroadcastss  0x4(%rax),%ymm1
220  DB  196,226,125,24,80,8                 ; vbroadcastss  0x8(%rax),%ymm2
221  DB  72,173                              ; lods          %ds:(%rsi),%rax
222  DB  255,224                             ; jmpq          *%rax
223
224PUBLIC _sk_swap_rb_hsw
225_sk_swap_rb_hsw LABEL PROC
226  DB  197,124,40,192                      ; vmovaps       %ymm0,%ymm8
227  DB  72,173                              ; lods          %ds:(%rsi),%rax
228  DB  197,252,40,194                      ; vmovaps       %ymm2,%ymm0
229  DB  197,124,41,194                      ; vmovaps       %ymm8,%ymm2
230  DB  255,224                             ; jmpq          *%rax
231
232PUBLIC _sk_swap_hsw
233_sk_swap_hsw LABEL PROC
234  DB  197,124,40,195                      ; vmovaps       %ymm3,%ymm8
235  DB  197,124,40,202                      ; vmovaps       %ymm2,%ymm9
236  DB  197,124,40,209                      ; vmovaps       %ymm1,%ymm10
237  DB  197,124,40,216                      ; vmovaps       %ymm0,%ymm11
238  DB  72,173                              ; lods          %ds:(%rsi),%rax
239  DB  197,252,40,196                      ; vmovaps       %ymm4,%ymm0
240  DB  197,252,40,205                      ; vmovaps       %ymm5,%ymm1
241  DB  197,252,40,214                      ; vmovaps       %ymm6,%ymm2
242  DB  197,252,40,223                      ; vmovaps       %ymm7,%ymm3
243  DB  197,124,41,220                      ; vmovaps       %ymm11,%ymm4
244  DB  197,124,41,213                      ; vmovaps       %ymm10,%ymm5
245  DB  197,124,41,206                      ; vmovaps       %ymm9,%ymm6
246  DB  197,124,41,199                      ; vmovaps       %ymm8,%ymm7
247  DB  255,224                             ; jmpq          *%rax
248
249PUBLIC _sk_move_src_dst_hsw
250_sk_move_src_dst_hsw LABEL PROC
251  DB  72,173                              ; lods          %ds:(%rsi),%rax
252  DB  197,252,40,224                      ; vmovaps       %ymm0,%ymm4
253  DB  197,252,40,233                      ; vmovaps       %ymm1,%ymm5
254  DB  197,252,40,242                      ; vmovaps       %ymm2,%ymm6
255  DB  197,252,40,251                      ; vmovaps       %ymm3,%ymm7
256  DB  255,224                             ; jmpq          *%rax
257
258PUBLIC _sk_move_dst_src_hsw
259_sk_move_dst_src_hsw LABEL PROC
260  DB  72,173                              ; lods          %ds:(%rsi),%rax
261  DB  197,252,40,196                      ; vmovaps       %ymm4,%ymm0
262  DB  197,252,40,205                      ; vmovaps       %ymm5,%ymm1
263  DB  197,252,40,214                      ; vmovaps       %ymm6,%ymm2
264  DB  197,252,40,223                      ; vmovaps       %ymm7,%ymm3
265  DB  255,224                             ; jmpq          *%rax
266
267PUBLIC _sk_premul_hsw
268_sk_premul_hsw LABEL PROC
269  DB  197,252,89,195                      ; vmulps        %ymm3,%ymm0,%ymm0
270  DB  197,244,89,203                      ; vmulps        %ymm3,%ymm1,%ymm1
271  DB  197,236,89,211                      ; vmulps        %ymm3,%ymm2,%ymm2
272  DB  72,173                              ; lods          %ds:(%rsi),%rax
273  DB  255,224                             ; jmpq          *%rax
274
275PUBLIC _sk_unpremul_hsw
276_sk_unpremul_hsw LABEL PROC
277  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
278  DB  196,65,100,194,200,0                ; vcmpeqps      %ymm8,%ymm3,%ymm9
279  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
280  DB  197,121,110,208                     ; vmovd         %eax,%xmm10
281  DB  196,66,125,88,210                   ; vpbroadcastd  %xmm10,%ymm10
282  DB  197,44,94,211                       ; vdivps        %ymm3,%ymm10,%ymm10
283  DB  196,67,45,74,192,144                ; vblendvps     %ymm9,%ymm8,%ymm10,%ymm8
284  DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
285  DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
286  DB  197,188,89,210                      ; vmulps        %ymm2,%ymm8,%ymm2
287  DB  72,173                              ; lods          %ds:(%rsi),%rax
288  DB  255,224                             ; jmpq          *%rax
289
290PUBLIC _sk_from_srgb_hsw
291_sk_from_srgb_hsw LABEL PROC
292  DB  184,145,131,158,61                  ; mov           $0x3d9e8391,%eax
293  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
294  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
295  DB  197,60,89,200                       ; vmulps        %ymm0,%ymm8,%ymm9
296  DB  197,124,89,208                      ; vmulps        %ymm0,%ymm0,%ymm10
297  DB  184,154,153,153,62                  ; mov           $0x3e99999a,%eax
298  DB  197,121,110,216                     ; vmovd         %eax,%xmm11
299  DB  196,66,125,88,219                   ; vpbroadcastd  %xmm11,%ymm11
300  DB  184,92,143,50,63                    ; mov           $0x3f328f5c,%eax
301  DB  197,121,110,224                     ; vmovd         %eax,%xmm12
302  DB  196,66,125,88,228                   ; vpbroadcastd  %xmm12,%ymm12
303  DB  196,65,125,111,235                  ; vmovdqa       %ymm11,%ymm13
304  DB  196,66,125,168,236                  ; vfmadd213ps   %ymm12,%ymm0,%ymm13
305  DB  184,10,215,35,59                    ; mov           $0x3b23d70a,%eax
306  DB  197,121,110,240                     ; vmovd         %eax,%xmm14
307  DB  196,66,125,88,246                   ; vpbroadcastd  %xmm14,%ymm14
308  DB  196,66,45,168,238                   ; vfmadd213ps   %ymm14,%ymm10,%ymm13
309  DB  184,174,71,97,61                    ; mov           $0x3d6147ae,%eax
310  DB  197,121,110,208                     ; vmovd         %eax,%xmm10
311  DB  196,66,125,88,210                   ; vpbroadcastd  %xmm10,%ymm10
312  DB  196,193,124,194,194,1               ; vcmpltps      %ymm10,%ymm0,%ymm0
313  DB  196,195,21,74,193,0                 ; vblendvps     %ymm0,%ymm9,%ymm13,%ymm0
314  DB  197,60,89,201                       ; vmulps        %ymm1,%ymm8,%ymm9
315  DB  197,116,89,233                      ; vmulps        %ymm1,%ymm1,%ymm13
316  DB  196,65,125,111,251                  ; vmovdqa       %ymm11,%ymm15
317  DB  196,66,117,168,252                  ; vfmadd213ps   %ymm12,%ymm1,%ymm15
318  DB  196,66,21,168,254                   ; vfmadd213ps   %ymm14,%ymm13,%ymm15
319  DB  196,193,116,194,202,1               ; vcmpltps      %ymm10,%ymm1,%ymm1
320  DB  196,195,5,74,201,16                 ; vblendvps     %ymm1,%ymm9,%ymm15,%ymm1
321  DB  197,60,89,194                       ; vmulps        %ymm2,%ymm8,%ymm8
322  DB  197,108,89,202                      ; vmulps        %ymm2,%ymm2,%ymm9
323  DB  196,66,109,168,220                  ; vfmadd213ps   %ymm12,%ymm2,%ymm11
324  DB  196,66,53,168,222                   ; vfmadd213ps   %ymm14,%ymm9,%ymm11
325  DB  196,193,108,194,210,1               ; vcmpltps      %ymm10,%ymm2,%ymm2
326  DB  196,195,37,74,208,32                ; vblendvps     %ymm2,%ymm8,%ymm11,%ymm2
327  DB  72,173                              ; lods          %ds:(%rsi),%rax
328  DB  255,224                             ; jmpq          *%rax
329
330PUBLIC _sk_to_srgb_hsw
331_sk_to_srgb_hsw LABEL PROC
332  DB  197,124,82,192                      ; vrsqrtps      %ymm0,%ymm8
333  DB  196,65,124,83,216                   ; vrcpps        %ymm8,%ymm11
334  DB  196,65,124,82,224                   ; vrsqrtps      %ymm8,%ymm12
335  DB  184,41,92,71,65                     ; mov           $0x41475c29,%eax
336  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
337  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
338  DB  197,60,89,232                       ; vmulps        %ymm0,%ymm8,%ymm13
339  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
340  DB  197,121,110,200                     ; vmovd         %eax,%xmm9
341  DB  196,66,125,88,201                   ; vpbroadcastd  %xmm9,%ymm9
342  DB  184,194,135,210,62                  ; mov           $0x3ed287c2,%eax
343  DB  197,121,110,208                     ; vmovd         %eax,%xmm10
344  DB  196,66,125,88,210                   ; vpbroadcastd  %xmm10,%ymm10
345  DB  184,206,111,48,63                   ; mov           $0x3f306fce,%eax
346  DB  197,121,110,240                     ; vmovd         %eax,%xmm14
347  DB  196,66,125,88,246                   ; vpbroadcastd  %xmm14,%ymm14
348  DB  184,168,87,202,61                   ; mov           $0x3dca57a8,%eax
349  DB  53,0,0,0,128                        ; xor           $0x80000000,%eax
350  DB  197,121,110,248                     ; vmovd         %eax,%xmm15
351  DB  196,66,125,88,255                   ; vpbroadcastd  %xmm15,%ymm15
352  DB  196,66,13,168,223                   ; vfmadd213ps   %ymm15,%ymm14,%ymm11
353  DB  196,66,45,184,220                   ; vfmadd231ps   %ymm12,%ymm10,%ymm11
354  DB  196,65,52,93,219                    ; vminps        %ymm11,%ymm9,%ymm11
355  DB  184,4,231,140,59                    ; mov           $0x3b8ce704,%eax
356  DB  197,121,110,224                     ; vmovd         %eax,%xmm12
357  DB  196,66,125,88,228                   ; vpbroadcastd  %xmm12,%ymm12
358  DB  196,193,124,194,196,1               ; vcmpltps      %ymm12,%ymm0,%ymm0
359  DB  196,195,37,74,197,0                 ; vblendvps     %ymm0,%ymm13,%ymm11,%ymm0
360  DB  197,124,82,217                      ; vrsqrtps      %ymm1,%ymm11
361  DB  196,65,124,83,235                   ; vrcpps        %ymm11,%ymm13
362  DB  196,65,124,82,219                   ; vrsqrtps      %ymm11,%ymm11
363  DB  196,66,13,168,239                   ; vfmadd213ps   %ymm15,%ymm14,%ymm13
364  DB  196,66,45,184,235                   ; vfmadd231ps   %ymm11,%ymm10,%ymm13
365  DB  197,60,89,217                       ; vmulps        %ymm1,%ymm8,%ymm11
366  DB  196,65,52,93,237                    ; vminps        %ymm13,%ymm9,%ymm13
367  DB  196,193,116,194,204,1               ; vcmpltps      %ymm12,%ymm1,%ymm1
368  DB  196,195,21,74,203,16                ; vblendvps     %ymm1,%ymm11,%ymm13,%ymm1
369  DB  197,124,82,218                      ; vrsqrtps      %ymm2,%ymm11
370  DB  196,65,124,83,235                   ; vrcpps        %ymm11,%ymm13
371  DB  196,66,13,168,239                   ; vfmadd213ps   %ymm15,%ymm14,%ymm13
372  DB  196,65,124,82,219                   ; vrsqrtps      %ymm11,%ymm11
373  DB  196,66,45,184,235                   ; vfmadd231ps   %ymm11,%ymm10,%ymm13
374  DB  196,65,52,93,205                    ; vminps        %ymm13,%ymm9,%ymm9
375  DB  197,60,89,194                       ; vmulps        %ymm2,%ymm8,%ymm8
376  DB  196,193,108,194,212,1               ; vcmpltps      %ymm12,%ymm2,%ymm2
377  DB  196,195,53,74,208,32                ; vblendvps     %ymm2,%ymm8,%ymm9,%ymm2
378  DB  72,173                              ; lods          %ds:(%rsi),%rax
379  DB  255,224                             ; jmpq          *%rax
380
381PUBLIC _sk_scale_1_float_hsw
382_sk_scale_1_float_hsw LABEL PROC
383  DB  72,173                              ; lods          %ds:(%rsi),%rax
384  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
385  DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
386  DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
387  DB  197,188,89,210                      ; vmulps        %ymm2,%ymm8,%ymm2
388  DB  197,188,89,219                      ; vmulps        %ymm3,%ymm8,%ymm3
389  DB  72,173                              ; lods          %ds:(%rsi),%rax
390  DB  255,224                             ; jmpq          *%rax
391
392PUBLIC _sk_scale_u8_hsw
393_sk_scale_u8_hsw LABEL PROC
394  DB  73,137,200                          ; mov           %rcx,%r8
395  DB  72,173                              ; lods          %ds:(%rsi),%rax
396  DB  72,139,0                            ; mov           (%rax),%rax
397  DB  72,1,248                            ; add           %rdi,%rax
398  DB  77,133,192                          ; test          %r8,%r8
399  DB  117,56                              ; jne           556 <_sk_scale_u8_hsw+0x48>
400  DB  197,122,126,0                       ; vmovq         (%rax),%xmm8
401  DB  196,66,125,49,192                   ; vpmovzxbd     %xmm8,%ymm8
402  DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
403  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
404  DB  197,121,110,200                     ; vmovd         %eax,%xmm9
405  DB  196,66,125,88,201                   ; vpbroadcastd  %xmm9,%ymm9
406  DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
407  DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
408  DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
409  DB  197,188,89,210                      ; vmulps        %ymm2,%ymm8,%ymm2
410  DB  197,188,89,219                      ; vmulps        %ymm3,%ymm8,%ymm3
411  DB  72,173                              ; lods          %ds:(%rsi),%rax
412  DB  76,137,193                          ; mov           %r8,%rcx
413  DB  255,224                             ; jmpq          *%rax
414  DB  49,201                              ; xor           %ecx,%ecx
415  DB  77,137,194                          ; mov           %r8,%r10
416  DB  69,49,201                           ; xor           %r9d,%r9d
417  DB  68,15,182,24                        ; movzbl        (%rax),%r11d
418  DB  72,255,192                          ; inc           %rax
419  DB  73,211,227                          ; shl           %cl,%r11
420  DB  77,9,217                            ; or            %r11,%r9
421  DB  72,131,193,8                        ; add           $0x8,%rcx
422  DB  73,255,202                          ; dec           %r10
423  DB  117,234                             ; jne           55e <_sk_scale_u8_hsw+0x50>
424  DB  196,65,249,110,193                  ; vmovq         %r9,%xmm8
425  DB  235,167                             ; jmp           522 <_sk_scale_u8_hsw+0x14>
426
427PUBLIC _sk_lerp_1_float_hsw
428_sk_lerp_1_float_hsw LABEL PROC
429  DB  72,173                              ; lods          %ds:(%rsi),%rax
430  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
431  DB  197,252,92,196                      ; vsubps        %ymm4,%ymm0,%ymm0
432  DB  196,226,61,168,196                  ; vfmadd213ps   %ymm4,%ymm8,%ymm0
433  DB  197,244,92,205                      ; vsubps        %ymm5,%ymm1,%ymm1
434  DB  196,226,61,168,205                  ; vfmadd213ps   %ymm5,%ymm8,%ymm1
435  DB  197,236,92,214                      ; vsubps        %ymm6,%ymm2,%ymm2
436  DB  196,226,61,168,214                  ; vfmadd213ps   %ymm6,%ymm8,%ymm2
437  DB  197,228,92,223                      ; vsubps        %ymm7,%ymm3,%ymm3
438  DB  196,226,61,168,223                  ; vfmadd213ps   %ymm7,%ymm8,%ymm3
439  DB  72,173                              ; lods          %ds:(%rsi),%rax
440  DB  255,224                             ; jmpq          *%rax
441
442PUBLIC _sk_lerp_u8_hsw
443_sk_lerp_u8_hsw LABEL PROC
444  DB  73,137,200                          ; mov           %rcx,%r8
445  DB  72,173                              ; lods          %ds:(%rsi),%rax
446  DB  72,139,0                            ; mov           (%rax),%rax
447  DB  72,1,248                            ; add           %rdi,%rax
448  DB  77,133,192                          ; test          %r8,%r8
449  DB  117,76                              ; jne           606 <_sk_lerp_u8_hsw+0x5c>
450  DB  197,122,126,0                       ; vmovq         (%rax),%xmm8
451  DB  196,66,125,49,192                   ; vpmovzxbd     %xmm8,%ymm8
452  DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
453  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
454  DB  197,121,110,200                     ; vmovd         %eax,%xmm9
455  DB  196,66,125,88,201                   ; vpbroadcastd  %xmm9,%ymm9
456  DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
457  DB  197,252,92,196                      ; vsubps        %ymm4,%ymm0,%ymm0
458  DB  196,226,61,168,196                  ; vfmadd213ps   %ymm4,%ymm8,%ymm0
459  DB  197,244,92,205                      ; vsubps        %ymm5,%ymm1,%ymm1
460  DB  196,226,61,168,205                  ; vfmadd213ps   %ymm5,%ymm8,%ymm1
461  DB  197,236,92,214                      ; vsubps        %ymm6,%ymm2,%ymm2
462  DB  196,226,61,168,214                  ; vfmadd213ps   %ymm6,%ymm8,%ymm2
463  DB  197,228,92,223                      ; vsubps        %ymm7,%ymm3,%ymm3
464  DB  196,226,61,168,223                  ; vfmadd213ps   %ymm7,%ymm8,%ymm3
465  DB  72,173                              ; lods          %ds:(%rsi),%rax
466  DB  76,137,193                          ; mov           %r8,%rcx
467  DB  255,224                             ; jmpq          *%rax
468  DB  49,201                              ; xor           %ecx,%ecx
469  DB  77,137,194                          ; mov           %r8,%r10
470  DB  69,49,201                           ; xor           %r9d,%r9d
471  DB  68,15,182,24                        ; movzbl        (%rax),%r11d
472  DB  72,255,192                          ; inc           %rax
473  DB  73,211,227                          ; shl           %cl,%r11
474  DB  77,9,217                            ; or            %r11,%r9
475  DB  72,131,193,8                        ; add           $0x8,%rcx
476  DB  73,255,202                          ; dec           %r10
477  DB  117,234                             ; jne           60e <_sk_lerp_u8_hsw+0x64>
478  DB  196,65,249,110,193                  ; vmovq         %r9,%xmm8
479  DB  235,147                             ; jmp           5be <_sk_lerp_u8_hsw+0x14>
480
481PUBLIC _sk_lerp_565_hsw
482_sk_lerp_565_hsw LABEL PROC
483  DB  72,173                              ; lods          %ds:(%rsi),%rax
484  DB  76,139,16                           ; mov           (%rax),%r10
485  DB  72,133,201                          ; test          %rcx,%rcx
486  DB  15,133,179,0,0,0                    ; jne           6ec <_sk_lerp_565_hsw+0xc1>
487  DB  196,193,122,111,28,122              ; vmovdqu       (%r10,%rdi,2),%xmm3
488  DB  196,98,125,51,195                   ; vpmovzxwd     %xmm3,%ymm8
489  DB  184,0,248,0,0                       ; mov           $0xf800,%eax
490  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
491  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
492  DB  196,193,101,219,216                 ; vpand         %ymm8,%ymm3,%ymm3
493  DB  197,124,91,203                      ; vcvtdq2ps     %ymm3,%ymm9
494  DB  184,8,33,132,55                     ; mov           $0x37842108,%eax
495  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
496  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
497  DB  197,52,89,203                       ; vmulps        %ymm3,%ymm9,%ymm9
498  DB  184,224,7,0,0                       ; mov           $0x7e0,%eax
499  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
500  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
501  DB  196,193,101,219,216                 ; vpand         %ymm8,%ymm3,%ymm3
502  DB  197,124,91,211                      ; vcvtdq2ps     %ymm3,%ymm10
503  DB  184,33,8,2,58                       ; mov           $0x3a020821,%eax
504  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
505  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
506  DB  197,44,89,211                       ; vmulps        %ymm3,%ymm10,%ymm10
507  DB  184,31,0,0,0                        ; mov           $0x1f,%eax
508  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
509  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
510  DB  196,193,101,219,216                 ; vpand         %ymm8,%ymm3,%ymm3
511  DB  197,124,91,195                      ; vcvtdq2ps     %ymm3,%ymm8
512  DB  184,8,33,4,61                       ; mov           $0x3d042108,%eax
513  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
514  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
515  DB  197,188,89,219                      ; vmulps        %ymm3,%ymm8,%ymm3
516  DB  197,252,92,196                      ; vsubps        %ymm4,%ymm0,%ymm0
517  DB  196,226,53,168,196                  ; vfmadd213ps   %ymm4,%ymm9,%ymm0
518  DB  197,244,92,205                      ; vsubps        %ymm5,%ymm1,%ymm1
519  DB  196,226,45,168,205                  ; vfmadd213ps   %ymm5,%ymm10,%ymm1
520  DB  197,236,92,214                      ; vsubps        %ymm6,%ymm2,%ymm2
521  DB  196,226,101,168,214                 ; vfmadd213ps   %ymm6,%ymm3,%ymm2
522  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
523  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
524  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
525  DB  72,173                              ; lods          %ds:(%rsi),%rax
526  DB  255,224                             ; jmpq          *%rax
527  DB  65,137,200                          ; mov           %ecx,%r8d
528  DB  65,128,224,7                        ; and           $0x7,%r8b
529  DB  197,225,239,219                     ; vpxor         %xmm3,%xmm3,%xmm3
530  DB  65,254,200                          ; dec           %r8b
531  DB  65,128,248,6                        ; cmp           $0x6,%r8b
532  DB  15,135,59,255,255,255               ; ja            63f <_sk_lerp_565_hsw+0x14>
533  DB  69,15,182,192                       ; movzbl        %r8b,%r8d
534  DB  76,141,13,73,0,0,0                  ; lea           0x49(%rip),%r9        # 758 <_sk_lerp_565_hsw+0x12d>
535  DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
536  DB  76,1,200                            ; add           %r9,%rax
537  DB  255,224                             ; jmpq          *%rax
538  DB  197,225,239,219                     ; vpxor         %xmm3,%xmm3,%xmm3
539  DB  196,193,97,196,92,122,12,6          ; vpinsrw       $0x6,0xc(%r10,%rdi,2),%xmm3,%xmm3
540  DB  196,193,97,196,92,122,10,5          ; vpinsrw       $0x5,0xa(%r10,%rdi,2),%xmm3,%xmm3
541  DB  196,193,97,196,92,122,8,4           ; vpinsrw       $0x4,0x8(%r10,%rdi,2),%xmm3,%xmm3
542  DB  196,193,97,196,92,122,6,3           ; vpinsrw       $0x3,0x6(%r10,%rdi,2),%xmm3,%xmm3
543  DB  196,193,97,196,92,122,4,2           ; vpinsrw       $0x2,0x4(%r10,%rdi,2),%xmm3,%xmm3
544  DB  196,193,97,196,92,122,2,1           ; vpinsrw       $0x1,0x2(%r10,%rdi,2),%xmm3,%xmm3
545  DB  196,193,97,196,28,122,0             ; vpinsrw       $0x0,(%r10,%rdi,2),%xmm3,%xmm3
546  DB  233,231,254,255,255                 ; jmpq          63f <_sk_lerp_565_hsw+0x14>
547  DB  244                                 ; hlt
548  DB  255                                 ; (bad)
549  DB  255                                 ; (bad)
550  DB  255                                 ; (bad)
551  DB  236                                 ; in            (%dx),%al
552  DB  255                                 ; (bad)
553  DB  255                                 ; (bad)
554  DB  255,228                             ; jmpq          *%rsp
555  DB  255                                 ; (bad)
556  DB  255                                 ; (bad)
557  DB  255                                 ; (bad)
558  DB  220,255                             ; fdivr         %st,%st(7)
559  DB  255                                 ; (bad)
560  DB  255,212                             ; callq         *%rsp
561  DB  255                                 ; (bad)
562  DB  255                                 ; (bad)
563  DB  255,204                             ; dec           %esp
564  DB  255                                 ; (bad)
565  DB  255                                 ; (bad)
566  DB  255,192                             ; inc           %eax
567  DB  255                                 ; (bad)
568  DB  255                                 ; (bad)
569  DB  255                                 ; .byte         0xff
570
571PUBLIC _sk_load_tables_hsw
572_sk_load_tables_hsw LABEL PROC
573  DB  73,137,200                          ; mov           %rcx,%r8
574  DB  72,173                              ; lods          %ds:(%rsi),%rax
575  DB  76,141,12,189,0,0,0,0               ; lea           0x0(,%rdi,4),%r9
576  DB  76,3,8                              ; add           (%rax),%r9
577  DB  77,133,192                          ; test          %r8,%r8
578  DB  117,121                             ; jne           802 <_sk_load_tables_hsw+0x8e>
579  DB  196,193,126,111,25                  ; vmovdqu       (%r9),%ymm3
580  DB  185,255,0,0,0                       ; mov           $0xff,%ecx
581  DB  197,249,110,193                     ; vmovd         %ecx,%xmm0
582  DB  196,226,125,88,208                  ; vpbroadcastd  %xmm0,%ymm2
583  DB  197,237,219,203                     ; vpand         %ymm3,%ymm2,%ymm1
584  DB  196,65,61,118,192                   ; vpcmpeqd      %ymm8,%ymm8,%ymm8
585  DB  72,139,72,8                         ; mov           0x8(%rax),%rcx
586  DB  76,139,72,16                        ; mov           0x10(%rax),%r9
587  DB  196,65,53,118,201                   ; vpcmpeqd      %ymm9,%ymm9,%ymm9
588  DB  196,226,53,146,4,137                ; vgatherdps    %ymm9,(%rcx,%ymm1,4),%ymm0
589  DB  197,245,114,211,8                   ; vpsrld        $0x8,%ymm3,%ymm1
590  DB  197,109,219,201                     ; vpand         %ymm1,%ymm2,%ymm9
591  DB  196,65,45,118,210                   ; vpcmpeqd      %ymm10,%ymm10,%ymm10
592  DB  196,130,45,146,12,137               ; vgatherdps    %ymm10,(%r9,%ymm9,4),%ymm1
593  DB  72,139,64,24                        ; mov           0x18(%rax),%rax
594  DB  197,181,114,211,16                  ; vpsrld        $0x10,%ymm3,%ymm9
595  DB  196,65,109,219,201                  ; vpand         %ymm9,%ymm2,%ymm9
596  DB  196,162,61,146,20,136               ; vgatherdps    %ymm8,(%rax,%ymm9,4),%ymm2
597  DB  197,229,114,211,24                  ; vpsrld        $0x18,%ymm3,%ymm3
598  DB  197,124,91,195                      ; vcvtdq2ps     %ymm3,%ymm8
599  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
600  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
601  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
602  DB  197,188,89,219                      ; vmulps        %ymm3,%ymm8,%ymm3
603  DB  72,173                              ; lods          %ds:(%rsi),%rax
604  DB  76,137,193                          ; mov           %r8,%rcx
605  DB  255,224                             ; jmpq          *%rax
606  DB  185,8,0,0,0                         ; mov           $0x8,%ecx
607  DB  68,41,193                           ; sub           %r8d,%ecx
608  DB  192,225,3                           ; shl           $0x3,%cl
609  DB  73,199,194,255,255,255,255          ; mov           $0xffffffffffffffff,%r10
610  DB  73,211,234                          ; shr           %cl,%r10
611  DB  196,193,249,110,194                 ; vmovq         %r10,%xmm0
612  DB  196,226,125,33,192                  ; vpmovsxbd     %xmm0,%ymm0
613  DB  196,194,125,140,25                  ; vpmaskmovd    (%r9),%ymm0,%ymm3
614  DB  233,99,255,255,255                  ; jmpq          78e <_sk_load_tables_hsw+0x1a>
615
616PUBLIC _sk_load_a8_hsw
617_sk_load_a8_hsw LABEL PROC
618  DB  73,137,200                          ; mov           %rcx,%r8
619  DB  72,173                              ; lods          %ds:(%rsi),%rax
620  DB  72,139,0                            ; mov           (%rax),%rax
621  DB  72,1,248                            ; add           %rdi,%rax
622  DB  77,133,192                          ; test          %r8,%r8
623  DB  117,50                              ; jne           86d <_sk_load_a8_hsw+0x42>
624  DB  197,250,126,0                       ; vmovq         (%rax),%xmm0
625  DB  196,226,125,49,192                  ; vpmovzxbd     %xmm0,%ymm0
626  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
627  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
628  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
629  DB  196,226,125,88,201                  ; vpbroadcastd  %xmm1,%ymm1
630  DB  197,252,89,217                      ; vmulps        %ymm1,%ymm0,%ymm3
631  DB  72,173                              ; lods          %ds:(%rsi),%rax
632  DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
633  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
634  DB  197,236,87,210                      ; vxorps        %ymm2,%ymm2,%ymm2
635  DB  76,137,193                          ; mov           %r8,%rcx
636  DB  255,224                             ; jmpq          *%rax
637  DB  49,201                              ; xor           %ecx,%ecx
638  DB  77,137,194                          ; mov           %r8,%r10
639  DB  69,49,201                           ; xor           %r9d,%r9d
640  DB  68,15,182,24                        ; movzbl        (%rax),%r11d
641  DB  72,255,192                          ; inc           %rax
642  DB  73,211,227                          ; shl           %cl,%r11
643  DB  77,9,217                            ; or            %r11,%r9
644  DB  72,131,193,8                        ; add           $0x8,%rcx
645  DB  73,255,202                          ; dec           %r10
646  DB  117,234                             ; jne           875 <_sk_load_a8_hsw+0x4a>
647  DB  196,193,249,110,193                 ; vmovq         %r9,%xmm0
648  DB  235,173                             ; jmp           83f <_sk_load_a8_hsw+0x14>
649
650PUBLIC _sk_store_a8_hsw
651_sk_store_a8_hsw LABEL PROC
652  DB  72,173                              ; lods          %ds:(%rsi),%rax
653  DB  76,139,8                            ; mov           (%rax),%r9
654  DB  184,0,0,127,67                      ; mov           $0x437f0000,%eax
655  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
656  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
657  DB  197,60,89,195                       ; vmulps        %ymm3,%ymm8,%ymm8
658  DB  196,65,125,91,192                   ; vcvtps2dq     %ymm8,%ymm8
659  DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
660  DB  196,66,57,43,193                    ; vpackusdw     %xmm9,%xmm8,%xmm8
661  DB  196,65,57,103,192                   ; vpackuswb     %xmm8,%xmm8,%xmm8
662  DB  72,133,201                          ; test          %rcx,%rcx
663  DB  117,10                              ; jne           8cd <_sk_store_a8_hsw+0x3b>
664  DB  196,65,123,17,4,57                  ; vmovsd        %xmm8,(%r9,%rdi,1)
665  DB  72,173                              ; lods          %ds:(%rsi),%rax
666  DB  255,224                             ; jmpq          *%rax
667  DB  65,137,200                          ; mov           %ecx,%r8d
668  DB  65,128,224,7                        ; and           $0x7,%r8b
669  DB  65,254,200                          ; dec           %r8b
670  DB  65,128,248,6                        ; cmp           $0x6,%r8b
671  DB  119,236                             ; ja            8c9 <_sk_store_a8_hsw+0x37>
672  DB  196,66,121,48,192                   ; vpmovzxbw     %xmm8,%xmm8
673  DB  65,15,182,192                       ; movzbl        %r8b,%eax
674  DB  76,141,5,67,0,0,0                   ; lea           0x43(%rip),%r8        # 930 <_sk_store_a8_hsw+0x9e>
675  DB  73,99,4,128                         ; movslq        (%r8,%rax,4),%rax
676  DB  76,1,192                            ; add           %r8,%rax
677  DB  255,224                             ; jmpq          *%rax
678  DB  196,67,121,20,68,57,6,12            ; vpextrb       $0xc,%xmm8,0x6(%r9,%rdi,1)
679  DB  196,67,121,20,68,57,5,10            ; vpextrb       $0xa,%xmm8,0x5(%r9,%rdi,1)
680  DB  196,67,121,20,68,57,4,8             ; vpextrb       $0x8,%xmm8,0x4(%r9,%rdi,1)
681  DB  196,67,121,20,68,57,3,6             ; vpextrb       $0x6,%xmm8,0x3(%r9,%rdi,1)
682  DB  196,67,121,20,68,57,2,4             ; vpextrb       $0x4,%xmm8,0x2(%r9,%rdi,1)
683  DB  196,67,121,20,68,57,1,2             ; vpextrb       $0x2,%xmm8,0x1(%r9,%rdi,1)
684  DB  196,67,121,20,4,57,0                ; vpextrb       $0x0,%xmm8,(%r9,%rdi,1)
685  DB  235,154                             ; jmp           8c9 <_sk_store_a8_hsw+0x37>
686  DB  144                                 ; nop
687  DB  246,255                             ; idiv          %bh
688  DB  255                                 ; (bad)
689  DB  255                                 ; (bad)
690  DB  238                                 ; out           %al,(%dx)
691  DB  255                                 ; (bad)
692  DB  255                                 ; (bad)
693  DB  255,230                             ; jmpq          *%rsi
694  DB  255                                 ; (bad)
695  DB  255                                 ; (bad)
696  DB  255                                 ; (bad)
697  DB  222,255                             ; fdivrp        %st,%st(7)
698  DB  255                                 ; (bad)
699  DB  255,214                             ; callq         *%rsi
700  DB  255                                 ; (bad)
701  DB  255                                 ; (bad)
702  DB  255,206                             ; dec           %esi
703  DB  255                                 ; (bad)
704  DB  255                                 ; (bad)
705  DB  255,198                             ; inc           %esi
706  DB  255                                 ; (bad)
707  DB  255                                 ; (bad)
708  DB  255                                 ; .byte         0xff
709
710PUBLIC _sk_load_565_hsw
711_sk_load_565_hsw LABEL PROC
712  DB  72,173                              ; lods          %ds:(%rsi),%rax
713  DB  76,139,16                           ; mov           (%rax),%r10
714  DB  72,133,201                          ; test          %rcx,%rcx
715  DB  15,133,149,0,0,0                    ; jne           9ef <_sk_load_565_hsw+0xa3>
716  DB  196,193,122,111,4,122               ; vmovdqu       (%r10,%rdi,2),%xmm0
717  DB  196,226,125,51,208                  ; vpmovzxwd     %xmm0,%ymm2
718  DB  184,0,248,0,0                       ; mov           $0xf800,%eax
719  DB  197,249,110,192                     ; vmovd         %eax,%xmm0
720  DB  196,226,125,88,192                  ; vpbroadcastd  %xmm0,%ymm0
721  DB  197,253,219,194                     ; vpand         %ymm2,%ymm0,%ymm0
722  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
723  DB  184,8,33,132,55                     ; mov           $0x37842108,%eax
724  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
725  DB  196,226,125,88,201                  ; vpbroadcastd  %xmm1,%ymm1
726  DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
727  DB  184,224,7,0,0                       ; mov           $0x7e0,%eax
728  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
729  DB  196,226,125,88,201                  ; vpbroadcastd  %xmm1,%ymm1
730  DB  197,245,219,202                     ; vpand         %ymm2,%ymm1,%ymm1
731  DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
732  DB  184,33,8,2,58                       ; mov           $0x3a020821,%eax
733  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
734  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
735  DB  197,244,89,203                      ; vmulps        %ymm3,%ymm1,%ymm1
736  DB  184,31,0,0,0                        ; mov           $0x1f,%eax
737  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
738  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
739  DB  197,229,219,210                     ; vpand         %ymm2,%ymm3,%ymm2
740  DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
741  DB  184,8,33,4,61                       ; mov           $0x3d042108,%eax
742  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
743  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
744  DB  197,236,89,211                      ; vmulps        %ymm3,%ymm2,%ymm2
745  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
746  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
747  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
748  DB  72,173                              ; lods          %ds:(%rsi),%rax
749  DB  255,224                             ; jmpq          *%rax
750  DB  65,137,200                          ; mov           %ecx,%r8d
751  DB  65,128,224,7                        ; and           $0x7,%r8b
752  DB  197,249,239,192                     ; vpxor         %xmm0,%xmm0,%xmm0
753  DB  65,254,200                          ; dec           %r8b
754  DB  65,128,248,6                        ; cmp           $0x6,%r8b
755  DB  15,135,89,255,255,255               ; ja            960 <_sk_load_565_hsw+0x14>
756  DB  69,15,182,192                       ; movzbl        %r8b,%r8d
757  DB  76,141,13,74,0,0,0                  ; lea           0x4a(%rip),%r9        # a5c <_sk_load_565_hsw+0x110>
758  DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
759  DB  76,1,200                            ; add           %r9,%rax
760  DB  255,224                             ; jmpq          *%rax
761  DB  197,249,239,192                     ; vpxor         %xmm0,%xmm0,%xmm0
762  DB  196,193,121,196,68,122,12,6         ; vpinsrw       $0x6,0xc(%r10,%rdi,2),%xmm0,%xmm0
763  DB  196,193,121,196,68,122,10,5         ; vpinsrw       $0x5,0xa(%r10,%rdi,2),%xmm0,%xmm0
764  DB  196,193,121,196,68,122,8,4          ; vpinsrw       $0x4,0x8(%r10,%rdi,2),%xmm0,%xmm0
765  DB  196,193,121,196,68,122,6,3          ; vpinsrw       $0x3,0x6(%r10,%rdi,2),%xmm0,%xmm0
766  DB  196,193,121,196,68,122,4,2          ; vpinsrw       $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
767  DB  196,193,121,196,68,122,2,1          ; vpinsrw       $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
768  DB  196,193,121,196,4,122,0             ; vpinsrw       $0x0,(%r10,%rdi,2),%xmm0,%xmm0
769  DB  233,5,255,255,255                   ; jmpq          960 <_sk_load_565_hsw+0x14>
770  DB  144                                 ; nop
771  DB  243,255                             ; repz          (bad)
772  DB  255                                 ; (bad)
773  DB  255                                 ; (bad)
774  DB  235,255                             ; jmp           a61 <_sk_load_565_hsw+0x115>
775  DB  255                                 ; (bad)
776  DB  255,227                             ; jmpq          *%rbx
777  DB  255                                 ; (bad)
778  DB  255                                 ; (bad)
779  DB  255                                 ; (bad)
780  DB  219,255                             ; (bad)
781  DB  255                                 ; (bad)
782  DB  255,211                             ; callq         *%rbx
783  DB  255                                 ; (bad)
784  DB  255                                 ; (bad)
785  DB  255,203                             ; dec           %ebx
786  DB  255                                 ; (bad)
787  DB  255                                 ; (bad)
788  DB  255                                 ; (bad)
789  DB  191                                 ; .byte         0xbf
790  DB  255                                 ; (bad)
791  DB  255                                 ; (bad)
792  DB  255                                 ; .byte         0xff
793
794PUBLIC _sk_store_565_hsw
795_sk_store_565_hsw LABEL PROC
796  DB  72,173                              ; lods          %ds:(%rsi),%rax
797  DB  76,139,8                            ; mov           (%rax),%r9
798  DB  184,0,0,248,65                      ; mov           $0x41f80000,%eax
799  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
800  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
801  DB  197,60,89,200                       ; vmulps        %ymm0,%ymm8,%ymm9
802  DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
803  DB  196,193,53,114,241,11               ; vpslld        $0xb,%ymm9,%ymm9
804  DB  184,0,0,124,66                      ; mov           $0x427c0000,%eax
805  DB  197,121,110,208                     ; vmovd         %eax,%xmm10
806  DB  196,66,125,88,210                   ; vpbroadcastd  %xmm10,%ymm10
807  DB  197,44,89,209                       ; vmulps        %ymm1,%ymm10,%ymm10
808  DB  196,65,125,91,210                   ; vcvtps2dq     %ymm10,%ymm10
809  DB  196,193,45,114,242,5                ; vpslld        $0x5,%ymm10,%ymm10
810  DB  196,65,45,235,201                   ; vpor          %ymm9,%ymm10,%ymm9
811  DB  197,60,89,194                       ; vmulps        %ymm2,%ymm8,%ymm8
812  DB  196,65,125,91,192                   ; vcvtps2dq     %ymm8,%ymm8
813  DB  196,65,53,235,192                   ; vpor          %ymm8,%ymm9,%ymm8
814  DB  196,67,125,57,193,1                 ; vextracti128  $0x1,%ymm8,%xmm9
815  DB  196,66,57,43,193                    ; vpackusdw     %xmm9,%xmm8,%xmm8
816  DB  72,133,201                          ; test          %rcx,%rcx
817  DB  117,10                              ; jne           ae4 <_sk_store_565_hsw+0x6c>
818  DB  196,65,122,127,4,121                ; vmovdqu       %xmm8,(%r9,%rdi,2)
819  DB  72,173                              ; lods          %ds:(%rsi),%rax
820  DB  255,224                             ; jmpq          *%rax
821  DB  65,137,200                          ; mov           %ecx,%r8d
822  DB  65,128,224,7                        ; and           $0x7,%r8b
823  DB  65,254,200                          ; dec           %r8b
824  DB  65,128,248,6                        ; cmp           $0x6,%r8b
825  DB  119,236                             ; ja            ae0 <_sk_store_565_hsw+0x68>
826  DB  65,15,182,192                       ; movzbl        %r8b,%eax
827  DB  76,141,5,69,0,0,0                   ; lea           0x45(%rip),%r8        # b44 <_sk_store_565_hsw+0xcc>
828  DB  73,99,4,128                         ; movslq        (%r8,%rax,4),%rax
829  DB  76,1,192                            ; add           %r8,%rax
830  DB  255,224                             ; jmpq          *%rax
831  DB  196,67,121,21,68,121,12,6           ; vpextrw       $0x6,%xmm8,0xc(%r9,%rdi,2)
832  DB  196,67,121,21,68,121,10,5           ; vpextrw       $0x5,%xmm8,0xa(%r9,%rdi,2)
833  DB  196,67,121,21,68,121,8,4            ; vpextrw       $0x4,%xmm8,0x8(%r9,%rdi,2)
834  DB  196,67,121,21,68,121,6,3            ; vpextrw       $0x3,%xmm8,0x6(%r9,%rdi,2)
835  DB  196,67,121,21,68,121,4,2            ; vpextrw       $0x2,%xmm8,0x4(%r9,%rdi,2)
836  DB  196,67,121,21,68,121,2,1            ; vpextrw       $0x1,%xmm8,0x2(%r9,%rdi,2)
837  DB  196,67,121,21,4,121,0               ; vpextrw       $0x0,%xmm8,(%r9,%rdi,2)
838  DB  235,159                             ; jmp           ae0 <_sk_store_565_hsw+0x68>
839  DB  15,31,0                             ; nopl          (%rax)
840  DB  244                                 ; hlt
841  DB  255                                 ; (bad)
842  DB  255                                 ; (bad)
843  DB  255                                 ; (bad)
844  DB  236                                 ; in            (%dx),%al
845  DB  255                                 ; (bad)
846  DB  255                                 ; (bad)
847  DB  255,228                             ; jmpq          *%rsp
848  DB  255                                 ; (bad)
849  DB  255                                 ; (bad)
850  DB  255                                 ; (bad)
851  DB  220,255                             ; fdivr         %st,%st(7)
852  DB  255                                 ; (bad)
853  DB  255,212                             ; callq         *%rsp
854  DB  255                                 ; (bad)
855  DB  255                                 ; (bad)
856  DB  255,204                             ; dec           %esp
857  DB  255                                 ; (bad)
858  DB  255                                 ; (bad)
859  DB  255,196                             ; inc           %esp
860  DB  255                                 ; (bad)
861  DB  255                                 ; (bad)
862  DB  255                                 ; .byte         0xff
863
864PUBLIC _sk_load_8888_hsw
865_sk_load_8888_hsw LABEL PROC
866  DB  73,137,200                          ; mov           %rcx,%r8
867  DB  72,173                              ; lods          %ds:(%rsi),%rax
868  DB  76,141,12,189,0,0,0,0               ; lea           0x0(,%rdi,4),%r9
869  DB  76,3,8                              ; add           (%rax),%r9
870  DB  77,133,192                          ; test          %r8,%r8
871  DB  117,104                             ; jne           bdd <_sk_load_8888_hsw+0x7d>
872  DB  196,193,126,111,25                  ; vmovdqu       (%r9),%ymm3
873  DB  184,255,0,0,0                       ; mov           $0xff,%eax
874  DB  197,249,110,192                     ; vmovd         %eax,%xmm0
875  DB  196,226,125,88,208                  ; vpbroadcastd  %xmm0,%ymm2
876  DB  197,237,219,195                     ; vpand         %ymm3,%ymm2,%ymm0
877  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
878  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
879  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
880  DB  196,98,125,88,193                   ; vpbroadcastd  %xmm1,%ymm8
881  DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
882  DB  197,245,114,211,8                   ; vpsrld        $0x8,%ymm3,%ymm1
883  DB  197,237,219,201                     ; vpand         %ymm1,%ymm2,%ymm1
884  DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
885  DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
886  DB  197,181,114,211,16                  ; vpsrld        $0x10,%ymm3,%ymm9
887  DB  196,193,109,219,209                 ; vpand         %ymm9,%ymm2,%ymm2
888  DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
889  DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
890  DB  197,229,114,211,24                  ; vpsrld        $0x18,%ymm3,%ymm3
891  DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
892  DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
893  DB  72,173                              ; lods          %ds:(%rsi),%rax
894  DB  76,137,193                          ; mov           %r8,%rcx
895  DB  255,224                             ; jmpq          *%rax
896  DB  185,8,0,0,0                         ; mov           $0x8,%ecx
897  DB  68,41,193                           ; sub           %r8d,%ecx
898  DB  192,225,3                           ; shl           $0x3,%cl
899  DB  72,199,192,255,255,255,255          ; mov           $0xffffffffffffffff,%rax
900  DB  72,211,232                          ; shr           %cl,%rax
901  DB  196,225,249,110,192                 ; vmovq         %rax,%xmm0
902  DB  196,226,125,33,192                  ; vpmovsxbd     %xmm0,%ymm0
903  DB  196,194,125,140,25                  ; vpmaskmovd    (%r9),%ymm0,%ymm3
904  DB  233,116,255,255,255                 ; jmpq          b7a <_sk_load_8888_hsw+0x1a>
905
906PUBLIC _sk_store_8888_hsw
907_sk_store_8888_hsw LABEL PROC
908  DB  73,137,200                          ; mov           %rcx,%r8
909  DB  72,173                              ; lods          %ds:(%rsi),%rax
910  DB  76,141,12,189,0,0,0,0               ; lea           0x0(,%rdi,4),%r9
911  DB  76,3,8                              ; add           (%rax),%r9
912  DB  184,0,0,127,67                      ; mov           $0x437f0000,%eax
913  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
914  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
915  DB  197,60,89,200                       ; vmulps        %ymm0,%ymm8,%ymm9
916  DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
917  DB  197,60,89,209                       ; vmulps        %ymm1,%ymm8,%ymm10
918  DB  196,65,125,91,210                   ; vcvtps2dq     %ymm10,%ymm10
919  DB  196,193,45,114,242,8                ; vpslld        $0x8,%ymm10,%ymm10
920  DB  196,65,45,235,201                   ; vpor          %ymm9,%ymm10,%ymm9
921  DB  197,60,89,210                       ; vmulps        %ymm2,%ymm8,%ymm10
922  DB  196,65,125,91,210                   ; vcvtps2dq     %ymm10,%ymm10
923  DB  196,193,45,114,242,16               ; vpslld        $0x10,%ymm10,%ymm10
924  DB  197,60,89,195                       ; vmulps        %ymm3,%ymm8,%ymm8
925  DB  196,65,125,91,192                   ; vcvtps2dq     %ymm8,%ymm8
926  DB  196,193,61,114,240,24               ; vpslld        $0x18,%ymm8,%ymm8
927  DB  196,65,45,235,192                   ; vpor          %ymm8,%ymm10,%ymm8
928  DB  196,65,53,235,192                   ; vpor          %ymm8,%ymm9,%ymm8
929  DB  77,133,192                          ; test          %r8,%r8
930  DB  117,12                              ; jne           c7a <_sk_store_8888_hsw+0x74>
931  DB  196,65,126,127,1                    ; vmovdqu       %ymm8,(%r9)
932  DB  72,173                              ; lods          %ds:(%rsi),%rax
933  DB  76,137,193                          ; mov           %r8,%rcx
934  DB  255,224                             ; jmpq          *%rax
935  DB  185,8,0,0,0                         ; mov           $0x8,%ecx
936  DB  68,41,193                           ; sub           %r8d,%ecx
937  DB  192,225,3                           ; shl           $0x3,%cl
938  DB  72,199,192,255,255,255,255          ; mov           $0xffffffffffffffff,%rax
939  DB  72,211,232                          ; shr           %cl,%rax
940  DB  196,97,249,110,200                  ; vmovq         %rax,%xmm9
941  DB  196,66,125,33,201                   ; vpmovsxbd     %xmm9,%ymm9
942  DB  196,66,53,142,1                     ; vpmaskmovd    %ymm8,%ymm9,(%r9)
943  DB  235,211                             ; jmp           c73 <_sk_store_8888_hsw+0x6d>
944
945PUBLIC _sk_load_f16_hsw
946_sk_load_f16_hsw LABEL PROC
947  DB  72,173                              ; lods          %ds:(%rsi),%rax
948  DB  72,139,0                            ; mov           (%rax),%rax
949  DB  72,133,201                          ; test          %rcx,%rcx
950  DB  117,97                              ; jne           d0b <_sk_load_f16_hsw+0x6b>
951  DB  197,121,16,4,248                    ; vmovupd       (%rax,%rdi,8),%xmm8
952  DB  197,249,16,84,248,16                ; vmovupd       0x10(%rax,%rdi,8),%xmm2
953  DB  197,249,16,92,248,32                ; vmovupd       0x20(%rax,%rdi,8),%xmm3
954  DB  197,122,111,76,248,48               ; vmovdqu       0x30(%rax,%rdi,8),%xmm9
955  DB  197,185,97,194                      ; vpunpcklwd    %xmm2,%xmm8,%xmm0
956  DB  197,185,105,210                     ; vpunpckhwd    %xmm2,%xmm8,%xmm2
957  DB  196,193,97,97,201                   ; vpunpcklwd    %xmm9,%xmm3,%xmm1
958  DB  196,193,97,105,217                  ; vpunpckhwd    %xmm9,%xmm3,%xmm3
959  DB  197,121,97,194                      ; vpunpcklwd    %xmm2,%xmm0,%xmm8
960  DB  197,121,105,202                     ; vpunpckhwd    %xmm2,%xmm0,%xmm9
961  DB  197,241,97,211                      ; vpunpcklwd    %xmm3,%xmm1,%xmm2
962  DB  197,241,105,219                     ; vpunpckhwd    %xmm3,%xmm1,%xmm3
963  DB  197,185,108,194                     ; vpunpcklqdq   %xmm2,%xmm8,%xmm0
964  DB  196,226,125,19,192                  ; vcvtph2ps     %xmm0,%ymm0
965  DB  197,185,109,202                     ; vpunpckhqdq   %xmm2,%xmm8,%xmm1
966  DB  196,226,125,19,201                  ; vcvtph2ps     %xmm1,%ymm1
967  DB  197,177,108,211                     ; vpunpcklqdq   %xmm3,%xmm9,%xmm2
968  DB  196,226,125,19,210                  ; vcvtph2ps     %xmm2,%ymm2
969  DB  197,177,109,219                     ; vpunpckhqdq   %xmm3,%xmm9,%xmm3
970  DB  196,226,125,19,219                  ; vcvtph2ps     %xmm3,%ymm3
971  DB  72,173                              ; lods          %ds:(%rsi),%rax
972  DB  255,224                             ; jmpq          *%rax
973  DB  197,123,16,4,248                    ; vmovsd        (%rax,%rdi,8),%xmm8
974  DB  196,65,49,239,201                   ; vpxor         %xmm9,%xmm9,%xmm9
975  DB  72,131,249,1                        ; cmp           $0x1,%rcx
976  DB  116,79                              ; je            d6a <_sk_load_f16_hsw+0xca>
977  DB  197,57,22,68,248,8                  ; vmovhpd       0x8(%rax,%rdi,8),%xmm8,%xmm8
978  DB  72,131,249,3                        ; cmp           $0x3,%rcx
979  DB  114,67                              ; jb            d6a <_sk_load_f16_hsw+0xca>
980  DB  197,251,16,84,248,16                ; vmovsd        0x10(%rax,%rdi,8),%xmm2
981  DB  72,131,249,3                        ; cmp           $0x3,%rcx
982  DB  116,68                              ; je            d77 <_sk_load_f16_hsw+0xd7>
983  DB  197,233,22,84,248,24                ; vmovhpd       0x18(%rax,%rdi,8),%xmm2,%xmm2
984  DB  72,131,249,5                        ; cmp           $0x5,%rcx
985  DB  114,56                              ; jb            d77 <_sk_load_f16_hsw+0xd7>
986  DB  197,251,16,92,248,32                ; vmovsd        0x20(%rax,%rdi,8),%xmm3
987  DB  72,131,249,5                        ; cmp           $0x5,%rcx
988  DB  15,132,114,255,255,255              ; je            cc1 <_sk_load_f16_hsw+0x21>
989  DB  197,225,22,92,248,40                ; vmovhpd       0x28(%rax,%rdi,8),%xmm3,%xmm3
990  DB  72,131,249,7                        ; cmp           $0x7,%rcx
991  DB  15,130,98,255,255,255               ; jb            cc1 <_sk_load_f16_hsw+0x21>
992  DB  197,122,126,76,248,48               ; vmovq         0x30(%rax,%rdi,8),%xmm9
993  DB  233,87,255,255,255                  ; jmpq          cc1 <_sk_load_f16_hsw+0x21>
994  DB  197,225,87,219                      ; vxorpd        %xmm3,%xmm3,%xmm3
995  DB  197,233,87,210                      ; vxorpd        %xmm2,%xmm2,%xmm2
996  DB  233,74,255,255,255                  ; jmpq          cc1 <_sk_load_f16_hsw+0x21>
997  DB  197,225,87,219                      ; vxorpd        %xmm3,%xmm3,%xmm3
998  DB  233,65,255,255,255                  ; jmpq          cc1 <_sk_load_f16_hsw+0x21>
999
1000PUBLIC _sk_store_f16_hsw
1001_sk_store_f16_hsw LABEL PROC
1002  DB  72,173                              ; lods          %ds:(%rsi),%rax
1003  DB  72,139,0                            ; mov           (%rax),%rax
1004  DB  196,195,125,29,192,4                ; vcvtps2ph     $0x4,%ymm0,%xmm8
1005  DB  196,195,125,29,201,4                ; vcvtps2ph     $0x4,%ymm1,%xmm9
1006  DB  196,195,125,29,210,4                ; vcvtps2ph     $0x4,%ymm2,%xmm10
1007  DB  196,195,125,29,219,4                ; vcvtps2ph     $0x4,%ymm3,%xmm11
1008  DB  196,65,57,97,225                    ; vpunpcklwd    %xmm9,%xmm8,%xmm12
1009  DB  196,65,57,105,193                   ; vpunpckhwd    %xmm9,%xmm8,%xmm8
1010  DB  196,65,41,97,203                    ; vpunpcklwd    %xmm11,%xmm10,%xmm9
1011  DB  196,65,41,105,235                   ; vpunpckhwd    %xmm11,%xmm10,%xmm13
1012  DB  196,65,25,98,217                    ; vpunpckldq    %xmm9,%xmm12,%xmm11
1013  DB  196,65,25,106,209                   ; vpunpckhdq    %xmm9,%xmm12,%xmm10
1014  DB  196,65,57,98,205                    ; vpunpckldq    %xmm13,%xmm8,%xmm9
1015  DB  196,65,57,106,197                   ; vpunpckhdq    %xmm13,%xmm8,%xmm8
1016  DB  72,133,201                          ; test          %rcx,%rcx
1017  DB  117,27                              ; jne           de5 <_sk_store_f16_hsw+0x65>
1018  DB  197,120,17,28,248                   ; vmovups       %xmm11,(%rax,%rdi,8)
1019  DB  197,120,17,84,248,16                ; vmovups       %xmm10,0x10(%rax,%rdi,8)
1020  DB  197,120,17,76,248,32                ; vmovups       %xmm9,0x20(%rax,%rdi,8)
1021  DB  197,122,127,68,248,48               ; vmovdqu       %xmm8,0x30(%rax,%rdi,8)
1022  DB  72,173                              ; lods          %ds:(%rsi),%rax
1023  DB  255,224                             ; jmpq          *%rax
1024  DB  197,121,214,28,248                  ; vmovq         %xmm11,(%rax,%rdi,8)
1025  DB  72,131,249,1                        ; cmp           $0x1,%rcx
1026  DB  116,241                             ; je            de1 <_sk_store_f16_hsw+0x61>
1027  DB  197,121,23,92,248,8                 ; vmovhpd       %xmm11,0x8(%rax,%rdi,8)
1028  DB  72,131,249,3                        ; cmp           $0x3,%rcx
1029  DB  114,229                             ; jb            de1 <_sk_store_f16_hsw+0x61>
1030  DB  197,121,214,84,248,16               ; vmovq         %xmm10,0x10(%rax,%rdi,8)
1031  DB  116,221                             ; je            de1 <_sk_store_f16_hsw+0x61>
1032  DB  197,121,23,84,248,24                ; vmovhpd       %xmm10,0x18(%rax,%rdi,8)
1033  DB  72,131,249,5                        ; cmp           $0x5,%rcx
1034  DB  114,209                             ; jb            de1 <_sk_store_f16_hsw+0x61>
1035  DB  197,121,214,76,248,32               ; vmovq         %xmm9,0x20(%rax,%rdi,8)
1036  DB  116,201                             ; je            de1 <_sk_store_f16_hsw+0x61>
1037  DB  197,121,23,76,248,40                ; vmovhpd       %xmm9,0x28(%rax,%rdi,8)
1038  DB  72,131,249,7                        ; cmp           $0x7,%rcx
1039  DB  114,189                             ; jb            de1 <_sk_store_f16_hsw+0x61>
1040  DB  197,121,214,68,248,48               ; vmovq         %xmm8,0x30(%rax,%rdi,8)
1041  DB  235,181                             ; jmp           de1 <_sk_store_f16_hsw+0x61>
1042
1043PUBLIC _sk_store_f32_hsw
1044_sk_store_f32_hsw LABEL PROC
1045  DB  72,173                              ; lods          %ds:(%rsi),%rax
1046  DB  76,139,0                            ; mov           (%rax),%r8
1047  DB  72,141,4,189,0,0,0,0                ; lea           0x0(,%rdi,4),%rax
1048  DB  197,124,20,193                      ; vunpcklps     %ymm1,%ymm0,%ymm8
1049  DB  197,124,21,217                      ; vunpckhps     %ymm1,%ymm0,%ymm11
1050  DB  197,108,20,203                      ; vunpcklps     %ymm3,%ymm2,%ymm9
1051  DB  197,108,21,227                      ; vunpckhps     %ymm3,%ymm2,%ymm12
1052  DB  196,65,61,20,209                    ; vunpcklpd     %ymm9,%ymm8,%ymm10
1053  DB  196,65,61,21,201                    ; vunpckhpd     %ymm9,%ymm8,%ymm9
1054  DB  196,65,37,20,196                    ; vunpcklpd     %ymm12,%ymm11,%ymm8
1055  DB  196,65,37,21,220                    ; vunpckhpd     %ymm12,%ymm11,%ymm11
1056  DB  72,133,201                          ; test          %rcx,%rcx
1057  DB  117,55                              ; jne           e99 <_sk_store_f32_hsw+0x6d>
1058  DB  196,67,45,24,225,1                  ; vinsertf128   $0x1,%xmm9,%ymm10,%ymm12
1059  DB  196,67,61,24,235,1                  ; vinsertf128   $0x1,%xmm11,%ymm8,%ymm13
1060  DB  196,67,45,6,201,49                  ; vperm2f128    $0x31,%ymm9,%ymm10,%ymm9
1061  DB  196,67,61,6,195,49                  ; vperm2f128    $0x31,%ymm11,%ymm8,%ymm8
1062  DB  196,65,125,17,36,128                ; vmovupd       %ymm12,(%r8,%rax,4)
1063  DB  196,65,125,17,108,128,32            ; vmovupd       %ymm13,0x20(%r8,%rax,4)
1064  DB  196,65,125,17,76,128,64             ; vmovupd       %ymm9,0x40(%r8,%rax,4)
1065  DB  196,65,125,17,68,128,96             ; vmovupd       %ymm8,0x60(%r8,%rax,4)
1066  DB  72,173                              ; lods          %ds:(%rsi),%rax
1067  DB  255,224                             ; jmpq          *%rax
1068  DB  196,65,121,17,20,128                ; vmovupd       %xmm10,(%r8,%rax,4)
1069  DB  72,131,249,1                        ; cmp           $0x1,%rcx
1070  DB  116,240                             ; je            e95 <_sk_store_f32_hsw+0x69>
1071  DB  196,65,121,17,76,128,16             ; vmovupd       %xmm9,0x10(%r8,%rax,4)
1072  DB  72,131,249,3                        ; cmp           $0x3,%rcx
1073  DB  114,227                             ; jb            e95 <_sk_store_f32_hsw+0x69>
1074  DB  196,65,121,17,68,128,32             ; vmovupd       %xmm8,0x20(%r8,%rax,4)
1075  DB  116,218                             ; je            e95 <_sk_store_f32_hsw+0x69>
1076  DB  196,65,121,17,92,128,48             ; vmovupd       %xmm11,0x30(%r8,%rax,4)
1077  DB  72,131,249,5                        ; cmp           $0x5,%rcx
1078  DB  114,205                             ; jb            e95 <_sk_store_f32_hsw+0x69>
1079  DB  196,67,125,25,84,128,64,1           ; vextractf128  $0x1,%ymm10,0x40(%r8,%rax,4)
1080  DB  116,195                             ; je            e95 <_sk_store_f32_hsw+0x69>
1081  DB  196,67,125,25,76,128,80,1           ; vextractf128  $0x1,%ymm9,0x50(%r8,%rax,4)
1082  DB  72,131,249,7                        ; cmp           $0x7,%rcx
1083  DB  114,181                             ; jb            e95 <_sk_store_f32_hsw+0x69>
1084  DB  196,67,125,25,68,128,96,1           ; vextractf128  $0x1,%ymm8,0x60(%r8,%rax,4)
1085  DB  235,171                             ; jmp           e95 <_sk_store_f32_hsw+0x69>
1086
1087PUBLIC _sk_clamp_x_hsw
1088_sk_clamp_x_hsw LABEL PROC
1089  DB  72,173                              ; lods          %ds:(%rsi),%rax
1090  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
1091  DB  197,188,95,192                      ; vmaxps        %ymm0,%ymm8,%ymm0
1092  DB  196,98,125,88,0                     ; vpbroadcastd  (%rax),%ymm8
1093  DB  196,65,53,118,201                   ; vpcmpeqd      %ymm9,%ymm9,%ymm9
1094  DB  196,65,61,254,193                   ; vpaddd        %ymm9,%ymm8,%ymm8
1095  DB  196,193,124,93,192                  ; vminps        %ymm8,%ymm0,%ymm0
1096  DB  72,173                              ; lods          %ds:(%rsi),%rax
1097  DB  255,224                             ; jmpq          *%rax
1098
1099PUBLIC _sk_clamp_y_hsw
1100_sk_clamp_y_hsw LABEL PROC
1101  DB  72,173                              ; lods          %ds:(%rsi),%rax
1102  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
1103  DB  197,188,95,201                      ; vmaxps        %ymm1,%ymm8,%ymm1
1104  DB  196,98,125,88,0                     ; vpbroadcastd  (%rax),%ymm8
1105  DB  196,65,53,118,201                   ; vpcmpeqd      %ymm9,%ymm9,%ymm9
1106  DB  196,65,61,254,193                   ; vpaddd        %ymm9,%ymm8,%ymm8
1107  DB  196,193,116,93,200                  ; vminps        %ymm8,%ymm1,%ymm1
1108  DB  72,173                              ; lods          %ds:(%rsi),%rax
1109  DB  255,224                             ; jmpq          *%rax
1110
1111PUBLIC _sk_repeat_x_hsw
1112_sk_repeat_x_hsw LABEL PROC
1113  DB  72,173                              ; lods          %ds:(%rsi),%rax
1114  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
1115  DB  196,65,124,94,200                   ; vdivps        %ymm8,%ymm0,%ymm9
1116  DB  196,67,125,8,201,1                  ; vroundps      $0x1,%ymm9,%ymm9
1117  DB  196,98,61,172,200                   ; vfnmadd213ps  %ymm0,%ymm8,%ymm9
1118  DB  197,253,118,192                     ; vpcmpeqd      %ymm0,%ymm0,%ymm0
1119  DB  197,189,254,192                     ; vpaddd        %ymm0,%ymm8,%ymm0
1120  DB  197,180,93,192                      ; vminps        %ymm0,%ymm9,%ymm0
1121  DB  72,173                              ; lods          %ds:(%rsi),%rax
1122  DB  255,224                             ; jmpq          *%rax
1123
1124PUBLIC _sk_repeat_y_hsw
1125_sk_repeat_y_hsw LABEL PROC
1126  DB  72,173                              ; lods          %ds:(%rsi),%rax
1127  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
1128  DB  196,65,116,94,200                   ; vdivps        %ymm8,%ymm1,%ymm9
1129  DB  196,67,125,8,201,1                  ; vroundps      $0x1,%ymm9,%ymm9
1130  DB  196,98,61,172,201                   ; vfnmadd213ps  %ymm1,%ymm8,%ymm9
1131  DB  197,245,118,201                     ; vpcmpeqd      %ymm1,%ymm1,%ymm1
1132  DB  197,189,254,201                     ; vpaddd        %ymm1,%ymm8,%ymm1
1133  DB  197,180,93,201                      ; vminps        %ymm1,%ymm9,%ymm1
1134  DB  72,173                              ; lods          %ds:(%rsi),%rax
1135  DB  255,224                             ; jmpq          *%rax
1136
1137PUBLIC _sk_mirror_x_hsw
1138_sk_mirror_x_hsw LABEL PROC
1139  DB  72,173                              ; lods          %ds:(%rsi),%rax
1140  DB  197,122,16,0                        ; vmovss        (%rax),%xmm8
1141  DB  196,66,125,24,200                   ; vbroadcastss  %xmm8,%ymm9
1142  DB  196,65,124,92,209                   ; vsubps        %ymm9,%ymm0,%ymm10
1143  DB  196,193,58,88,192                   ; vaddss        %xmm8,%xmm8,%xmm0
1144  DB  196,226,125,24,192                  ; vbroadcastss  %xmm0,%ymm0
1145  DB  197,44,94,192                       ; vdivps        %ymm0,%ymm10,%ymm8
1146  DB  196,67,125,8,192,1                  ; vroundps      $0x1,%ymm8,%ymm8
1147  DB  196,66,125,172,194                  ; vfnmadd213ps  %ymm10,%ymm0,%ymm8
1148  DB  196,193,60,92,193                   ; vsubps        %ymm9,%ymm8,%ymm0
1149  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
1150  DB  197,60,92,192                       ; vsubps        %ymm0,%ymm8,%ymm8
1151  DB  197,188,84,192                      ; vandps        %ymm0,%ymm8,%ymm0
1152  DB  196,65,61,118,192                   ; vpcmpeqd      %ymm8,%ymm8,%ymm8
1153  DB  196,65,53,254,192                   ; vpaddd        %ymm8,%ymm9,%ymm8
1154  DB  196,193,124,93,192                  ; vminps        %ymm8,%ymm0,%ymm0
1155  DB  72,173                              ; lods          %ds:(%rsi),%rax
1156  DB  255,224                             ; jmpq          *%rax
1157
1158PUBLIC _sk_mirror_y_hsw
1159_sk_mirror_y_hsw LABEL PROC
1160  DB  72,173                              ; lods          %ds:(%rsi),%rax
1161  DB  197,122,16,0                        ; vmovss        (%rax),%xmm8
1162  DB  196,66,125,24,200                   ; vbroadcastss  %xmm8,%ymm9
1163  DB  196,65,116,92,209                   ; vsubps        %ymm9,%ymm1,%ymm10
1164  DB  196,193,58,88,200                   ; vaddss        %xmm8,%xmm8,%xmm1
1165  DB  196,226,125,24,201                  ; vbroadcastss  %xmm1,%ymm1
1166  DB  197,44,94,193                       ; vdivps        %ymm1,%ymm10,%ymm8
1167  DB  196,67,125,8,192,1                  ; vroundps      $0x1,%ymm8,%ymm8
1168  DB  196,66,117,172,194                  ; vfnmadd213ps  %ymm10,%ymm1,%ymm8
1169  DB  196,193,60,92,201                   ; vsubps        %ymm9,%ymm8,%ymm1
1170  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
1171  DB  197,60,92,193                       ; vsubps        %ymm1,%ymm8,%ymm8
1172  DB  197,188,84,201                      ; vandps        %ymm1,%ymm8,%ymm1
1173  DB  196,65,61,118,192                   ; vpcmpeqd      %ymm8,%ymm8,%ymm8
1174  DB  196,65,53,254,192                   ; vpaddd        %ymm8,%ymm9,%ymm8
1175  DB  196,193,116,93,200                  ; vminps        %ymm8,%ymm1,%ymm1
1176  DB  72,173                              ; lods          %ds:(%rsi),%rax
1177  DB  255,224                             ; jmpq          *%rax
1178
1179PUBLIC _sk_luminance_to_alpha_hsw
1180_sk_luminance_to_alpha_hsw LABEL PROC
1181  DB  184,208,179,89,62                   ; mov           $0x3e59b3d0,%eax
1182  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
1183  DB  196,98,125,88,195                   ; vpbroadcastd  %xmm3,%ymm8
1184  DB  184,89,23,55,63                     ; mov           $0x3f371759,%eax
1185  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
1186  DB  196,226,125,88,219                  ; vpbroadcastd  %xmm3,%ymm3
1187  DB  197,228,89,201                      ; vmulps        %ymm1,%ymm3,%ymm1
1188  DB  196,98,125,168,193                  ; vfmadd213ps   %ymm1,%ymm0,%ymm8
1189  DB  184,152,221,147,61                  ; mov           $0x3d93dd98,%eax
1190  DB  197,249,110,192                     ; vmovd         %eax,%xmm0
1191  DB  196,226,125,88,216                  ; vpbroadcastd  %xmm0,%ymm3
1192  DB  196,194,109,168,216                 ; vfmadd213ps   %ymm8,%ymm2,%ymm3
1193  DB  72,173                              ; lods          %ds:(%rsi),%rax
1194  DB  197,253,239,192                     ; vpxor         %ymm0,%ymm0,%ymm0
1195  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
1196  DB  197,236,87,210                      ; vxorps        %ymm2,%ymm2,%ymm2
1197  DB  255,224                             ; jmpq          *%rax
1198
1199PUBLIC _sk_matrix_2x3_hsw
1200_sk_matrix_2x3_hsw LABEL PROC
1201  DB  72,173                              ; lods          %ds:(%rsi),%rax
1202  DB  196,98,125,24,8                     ; vbroadcastss  (%rax),%ymm9
1203  DB  196,98,125,24,80,8                  ; vbroadcastss  0x8(%rax),%ymm10
1204  DB  196,98,125,24,64,16                 ; vbroadcastss  0x10(%rax),%ymm8
1205  DB  196,66,117,184,194                  ; vfmadd231ps   %ymm10,%ymm1,%ymm8
1206  DB  196,66,125,184,193                  ; vfmadd231ps   %ymm9,%ymm0,%ymm8
1207  DB  196,98,125,24,80,4                  ; vbroadcastss  0x4(%rax),%ymm10
1208  DB  196,98,125,24,88,12                 ; vbroadcastss  0xc(%rax),%ymm11
1209  DB  196,98,125,24,72,20                 ; vbroadcastss  0x14(%rax),%ymm9
1210  DB  196,66,117,184,203                  ; vfmadd231ps   %ymm11,%ymm1,%ymm9
1211  DB  196,66,125,184,202                  ; vfmadd231ps   %ymm10,%ymm0,%ymm9
1212  DB  72,173                              ; lods          %ds:(%rsi),%rax
1213  DB  197,124,41,192                      ; vmovaps       %ymm8,%ymm0
1214  DB  197,124,41,201                      ; vmovaps       %ymm9,%ymm1
1215  DB  255,224                             ; jmpq          *%rax
1216
1217PUBLIC _sk_matrix_3x4_hsw
1218_sk_matrix_3x4_hsw LABEL PROC
1219  DB  72,173                              ; lods          %ds:(%rsi),%rax
1220  DB  196,98,125,24,8                     ; vbroadcastss  (%rax),%ymm9
1221  DB  196,98,125,24,80,12                 ; vbroadcastss  0xc(%rax),%ymm10
1222  DB  196,98,125,24,88,24                 ; vbroadcastss  0x18(%rax),%ymm11
1223  DB  196,98,125,24,64,36                 ; vbroadcastss  0x24(%rax),%ymm8
1224  DB  196,66,109,184,195                  ; vfmadd231ps   %ymm11,%ymm2,%ymm8
1225  DB  196,66,117,184,194                  ; vfmadd231ps   %ymm10,%ymm1,%ymm8
1226  DB  196,66,125,184,193                  ; vfmadd231ps   %ymm9,%ymm0,%ymm8
1227  DB  196,98,125,24,80,4                  ; vbroadcastss  0x4(%rax),%ymm10
1228  DB  196,98,125,24,88,16                 ; vbroadcastss  0x10(%rax),%ymm11
1229  DB  196,98,125,24,96,28                 ; vbroadcastss  0x1c(%rax),%ymm12
1230  DB  196,98,125,24,72,40                 ; vbroadcastss  0x28(%rax),%ymm9
1231  DB  196,66,109,184,204                  ; vfmadd231ps   %ymm12,%ymm2,%ymm9
1232  DB  196,66,117,184,203                  ; vfmadd231ps   %ymm11,%ymm1,%ymm9
1233  DB  196,66,125,184,202                  ; vfmadd231ps   %ymm10,%ymm0,%ymm9
1234  DB  196,98,125,24,88,8                  ; vbroadcastss  0x8(%rax),%ymm11
1235  DB  196,98,125,24,96,20                 ; vbroadcastss  0x14(%rax),%ymm12
1236  DB  196,98,125,24,104,32                ; vbroadcastss  0x20(%rax),%ymm13
1237  DB  196,98,125,24,80,44                 ; vbroadcastss  0x2c(%rax),%ymm10
1238  DB  196,66,109,184,213                  ; vfmadd231ps   %ymm13,%ymm2,%ymm10
1239  DB  196,66,117,184,212                  ; vfmadd231ps   %ymm12,%ymm1,%ymm10
1240  DB  196,66,125,184,211                  ; vfmadd231ps   %ymm11,%ymm0,%ymm10
1241  DB  72,173                              ; lods          %ds:(%rsi),%rax
1242  DB  197,124,41,192                      ; vmovaps       %ymm8,%ymm0
1243  DB  197,124,41,201                      ; vmovaps       %ymm9,%ymm1
1244  DB  197,124,41,210                      ; vmovaps       %ymm10,%ymm2
1245  DB  255,224                             ; jmpq          *%rax
1246
1247PUBLIC _sk_matrix_4x5_hsw
1248_sk_matrix_4x5_hsw LABEL PROC
1249  DB  72,173                              ; lods          %ds:(%rsi),%rax
1250  DB  196,98,125,24,8                     ; vbroadcastss  (%rax),%ymm9
1251  DB  196,98,125,24,80,16                 ; vbroadcastss  0x10(%rax),%ymm10
1252  DB  196,98,125,24,88,32                 ; vbroadcastss  0x20(%rax),%ymm11
1253  DB  196,98,125,24,96,48                 ; vbroadcastss  0x30(%rax),%ymm12
1254  DB  196,98,125,24,64,64                 ; vbroadcastss  0x40(%rax),%ymm8
1255  DB  196,66,101,184,196                  ; vfmadd231ps   %ymm12,%ymm3,%ymm8
1256  DB  196,66,109,184,195                  ; vfmadd231ps   %ymm11,%ymm2,%ymm8
1257  DB  196,66,117,184,194                  ; vfmadd231ps   %ymm10,%ymm1,%ymm8
1258  DB  196,66,125,184,193                  ; vfmadd231ps   %ymm9,%ymm0,%ymm8
1259  DB  196,98,125,24,80,4                  ; vbroadcastss  0x4(%rax),%ymm10
1260  DB  196,98,125,24,88,20                 ; vbroadcastss  0x14(%rax),%ymm11
1261  DB  196,98,125,24,96,36                 ; vbroadcastss  0x24(%rax),%ymm12
1262  DB  196,98,125,24,104,52                ; vbroadcastss  0x34(%rax),%ymm13
1263  DB  196,98,125,24,72,68                 ; vbroadcastss  0x44(%rax),%ymm9
1264  DB  196,66,101,184,205                  ; vfmadd231ps   %ymm13,%ymm3,%ymm9
1265  DB  196,66,109,184,204                  ; vfmadd231ps   %ymm12,%ymm2,%ymm9
1266  DB  196,66,117,184,203                  ; vfmadd231ps   %ymm11,%ymm1,%ymm9
1267  DB  196,66,125,184,202                  ; vfmadd231ps   %ymm10,%ymm0,%ymm9
1268  DB  196,98,125,24,88,8                  ; vbroadcastss  0x8(%rax),%ymm11
1269  DB  196,98,125,24,96,24                 ; vbroadcastss  0x18(%rax),%ymm12
1270  DB  196,98,125,24,104,40                ; vbroadcastss  0x28(%rax),%ymm13
1271  DB  196,98,125,24,112,56                ; vbroadcastss  0x38(%rax),%ymm14
1272  DB  196,98,125,24,80,72                 ; vbroadcastss  0x48(%rax),%ymm10
1273  DB  196,66,101,184,214                  ; vfmadd231ps   %ymm14,%ymm3,%ymm10
1274  DB  196,66,109,184,213                  ; vfmadd231ps   %ymm13,%ymm2,%ymm10
1275  DB  196,66,117,184,212                  ; vfmadd231ps   %ymm12,%ymm1,%ymm10
1276  DB  196,66,125,184,211                  ; vfmadd231ps   %ymm11,%ymm0,%ymm10
1277  DB  196,98,125,24,96,12                 ; vbroadcastss  0xc(%rax),%ymm12
1278  DB  196,98,125,24,104,28                ; vbroadcastss  0x1c(%rax),%ymm13
1279  DB  196,98,125,24,112,44                ; vbroadcastss  0x2c(%rax),%ymm14
1280  DB  196,98,125,24,120,60                ; vbroadcastss  0x3c(%rax),%ymm15
1281  DB  196,98,125,24,88,76                 ; vbroadcastss  0x4c(%rax),%ymm11
1282  DB  196,66,101,184,223                  ; vfmadd231ps   %ymm15,%ymm3,%ymm11
1283  DB  196,66,109,184,222                  ; vfmadd231ps   %ymm14,%ymm2,%ymm11
1284  DB  196,66,117,184,221                  ; vfmadd231ps   %ymm13,%ymm1,%ymm11
1285  DB  196,66,125,184,220                  ; vfmadd231ps   %ymm12,%ymm0,%ymm11
1286  DB  72,173                              ; lods          %ds:(%rsi),%rax
1287  DB  197,124,41,192                      ; vmovaps       %ymm8,%ymm0
1288  DB  197,124,41,201                      ; vmovaps       %ymm9,%ymm1
1289  DB  197,124,41,210                      ; vmovaps       %ymm10,%ymm2
1290  DB  197,124,41,219                      ; vmovaps       %ymm11,%ymm3
1291  DB  255,224                             ; jmpq          *%rax
1292
1293PUBLIC _sk_matrix_perspective_hsw
1294_sk_matrix_perspective_hsw LABEL PROC
1295  DB  72,173                              ; lods          %ds:(%rsi),%rax
1296  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
1297  DB  196,98,125,24,72,4                  ; vbroadcastss  0x4(%rax),%ymm9
1298  DB  196,98,125,24,80,8                  ; vbroadcastss  0x8(%rax),%ymm10
1299  DB  196,66,117,184,209                  ; vfmadd231ps   %ymm9,%ymm1,%ymm10
1300  DB  196,66,125,184,208                  ; vfmadd231ps   %ymm8,%ymm0,%ymm10
1301  DB  196,98,125,24,64,12                 ; vbroadcastss  0xc(%rax),%ymm8
1302  DB  196,98,125,24,72,16                 ; vbroadcastss  0x10(%rax),%ymm9
1303  DB  196,98,125,24,88,20                 ; vbroadcastss  0x14(%rax),%ymm11
1304  DB  196,66,117,184,217                  ; vfmadd231ps   %ymm9,%ymm1,%ymm11
1305  DB  196,66,125,184,216                  ; vfmadd231ps   %ymm8,%ymm0,%ymm11
1306  DB  196,98,125,24,64,24                 ; vbroadcastss  0x18(%rax),%ymm8
1307  DB  196,98,125,24,72,28                 ; vbroadcastss  0x1c(%rax),%ymm9
1308  DB  196,98,125,24,96,32                 ; vbroadcastss  0x20(%rax),%ymm12
1309  DB  196,66,117,184,225                  ; vfmadd231ps   %ymm9,%ymm1,%ymm12
1310  DB  196,66,125,184,224                  ; vfmadd231ps   %ymm8,%ymm0,%ymm12
1311  DB  196,193,124,83,204                  ; vrcpps        %ymm12,%ymm1
1312  DB  197,172,89,193                      ; vmulps        %ymm1,%ymm10,%ymm0
1313  DB  197,164,89,201                      ; vmulps        %ymm1,%ymm11,%ymm1
1314  DB  72,173                              ; lods          %ds:(%rsi),%rax
1315  DB  255,224                             ; jmpq          *%rax
1316
1317PUBLIC _sk_linear_gradient_2stops_hsw
1318_sk_linear_gradient_2stops_hsw LABEL PROC
1319  DB  72,173                              ; lods          %ds:(%rsi),%rax
1320  DB  196,226,125,24,72,16                ; vbroadcastss  0x10(%rax),%ymm1
1321  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
1322  DB  196,98,125,184,193                  ; vfmadd231ps   %ymm1,%ymm0,%ymm8
1323  DB  196,226,125,24,80,20                ; vbroadcastss  0x14(%rax),%ymm2
1324  DB  196,226,125,24,72,4                 ; vbroadcastss  0x4(%rax),%ymm1
1325  DB  196,226,125,184,202                 ; vfmadd231ps   %ymm2,%ymm0,%ymm1
1326  DB  196,226,125,24,88,24                ; vbroadcastss  0x18(%rax),%ymm3
1327  DB  196,226,125,24,80,8                 ; vbroadcastss  0x8(%rax),%ymm2
1328  DB  196,226,125,184,211                 ; vfmadd231ps   %ymm3,%ymm0,%ymm2
1329  DB  196,98,125,24,72,28                 ; vbroadcastss  0x1c(%rax),%ymm9
1330  DB  196,226,125,24,88,12                ; vbroadcastss  0xc(%rax),%ymm3
1331  DB  196,194,125,184,217                 ; vfmadd231ps   %ymm9,%ymm0,%ymm3
1332  DB  72,173                              ; lods          %ds:(%rsi),%rax
1333  DB  197,124,41,192                      ; vmovaps       %ymm8,%ymm0
1334  DB  255,224                             ; jmpq          *%rax
1335
1336PUBLIC _sk_start_pipeline_avx
1337_sk_start_pipeline_avx LABEL PROC
1338  DB  65,87                               ; push          %r15
1339  DB  65,86                               ; push          %r14
1340  DB  65,85                               ; push          %r13
1341  DB  65,84                               ; push          %r12
1342  DB  86                                  ; push          %rsi
1343  DB  87                                  ; push          %rdi
1344  DB  83                                  ; push          %rbx
1345  DB  72,129,236,160,0,0,0                ; sub           $0xa0,%rsp
1346  DB  197,120,41,188,36,144,0,0,0         ; vmovaps       %xmm15,0x90(%rsp)
1347  DB  197,120,41,180,36,128,0,0,0         ; vmovaps       %xmm14,0x80(%rsp)
1348  DB  197,120,41,108,36,112               ; vmovaps       %xmm13,0x70(%rsp)
1349  DB  197,120,41,100,36,96                ; vmovaps       %xmm12,0x60(%rsp)
1350  DB  197,120,41,92,36,80                 ; vmovaps       %xmm11,0x50(%rsp)
1351  DB  197,120,41,84,36,64                 ; vmovaps       %xmm10,0x40(%rsp)
1352  DB  197,120,41,76,36,48                 ; vmovaps       %xmm9,0x30(%rsp)
1353  DB  197,120,41,68,36,32                 ; vmovaps       %xmm8,0x20(%rsp)
1354  DB  197,248,41,124,36,16                ; vmovaps       %xmm7,0x10(%rsp)
1355  DB  197,248,41,52,36                    ; vmovaps       %xmm6,(%rsp)
1356  DB  77,137,205                          ; mov           %r9,%r13
1357  DB  77,137,198                          ; mov           %r8,%r14
1358  DB  72,137,203                          ; mov           %rcx,%rbx
1359  DB  72,137,214                          ; mov           %rdx,%rsi
1360  DB  72,173                              ; lods          %ds:(%rsi),%rax
1361  DB  73,137,199                          ; mov           %rax,%r15
1362  DB  73,137,244                          ; mov           %rsi,%r12
1363  DB  72,141,67,8                         ; lea           0x8(%rbx),%rax
1364  DB  76,57,232                           ; cmp           %r13,%rax
1365  DB  118,5                               ; jbe           75 <_sk_start_pipeline_avx+0x75>
1366  DB  72,137,223                          ; mov           %rbx,%rdi
1367  DB  235,65                              ; jmp           b6 <_sk_start_pipeline_avx+0xb6>
1368  DB  185,0,0,0,0                         ; mov           $0x0,%ecx
1369  DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
1370  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
1371  DB  197,236,87,210                      ; vxorps        %ymm2,%ymm2,%ymm2
1372  DB  197,228,87,219                      ; vxorps        %ymm3,%ymm3,%ymm3
1373  DB  197,220,87,228                      ; vxorps        %ymm4,%ymm4,%ymm4
1374  DB  197,212,87,237                      ; vxorps        %ymm5,%ymm5,%ymm5
1375  DB  197,204,87,246                      ; vxorps        %ymm6,%ymm6,%ymm6
1376  DB  197,196,87,255                      ; vxorps        %ymm7,%ymm7,%ymm7
1377  DB  72,137,223                          ; mov           %rbx,%rdi
1378  DB  76,137,230                          ; mov           %r12,%rsi
1379  DB  76,137,242                          ; mov           %r14,%rdx
1380  DB  65,255,215                          ; callq         *%r15
1381  DB  72,141,123,8                        ; lea           0x8(%rbx),%rdi
1382  DB  72,131,195,16                       ; add           $0x10,%rbx
1383  DB  76,57,235                           ; cmp           %r13,%rbx
1384  DB  72,137,251                          ; mov           %rdi,%rbx
1385  DB  118,191                             ; jbe           75 <_sk_start_pipeline_avx+0x75>
1386  DB  76,137,233                          ; mov           %r13,%rcx
1387  DB  72,41,249                           ; sub           %rdi,%rcx
1388  DB  116,41                              ; je            e7 <_sk_start_pipeline_avx+0xe7>
1389  DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
1390  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
1391  DB  197,236,87,210                      ; vxorps        %ymm2,%ymm2,%ymm2
1392  DB  197,228,87,219                      ; vxorps        %ymm3,%ymm3,%ymm3
1393  DB  197,220,87,228                      ; vxorps        %ymm4,%ymm4,%ymm4
1394  DB  197,212,87,237                      ; vxorps        %ymm5,%ymm5,%ymm5
1395  DB  197,204,87,246                      ; vxorps        %ymm6,%ymm6,%ymm6
1396  DB  197,196,87,255                      ; vxorps        %ymm7,%ymm7,%ymm7
1397  DB  76,137,230                          ; mov           %r12,%rsi
1398  DB  76,137,242                          ; mov           %r14,%rdx
1399  DB  65,255,215                          ; callq         *%r15
1400  DB  76,137,232                          ; mov           %r13,%rax
1401  DB  197,248,40,52,36                    ; vmovaps       (%rsp),%xmm6
1402  DB  197,248,40,124,36,16                ; vmovaps       0x10(%rsp),%xmm7
1403  DB  197,120,40,68,36,32                 ; vmovaps       0x20(%rsp),%xmm8
1404  DB  197,120,40,76,36,48                 ; vmovaps       0x30(%rsp),%xmm9
1405  DB  197,120,40,84,36,64                 ; vmovaps       0x40(%rsp),%xmm10
1406  DB  197,120,40,92,36,80                 ; vmovaps       0x50(%rsp),%xmm11
1407  DB  197,120,40,100,36,96                ; vmovaps       0x60(%rsp),%xmm12
1408  DB  197,120,40,108,36,112               ; vmovaps       0x70(%rsp),%xmm13
1409  DB  197,120,40,180,36,128,0,0,0         ; vmovaps       0x80(%rsp),%xmm14
1410  DB  197,120,40,188,36,144,0,0,0         ; vmovaps       0x90(%rsp),%xmm15
1411  DB  72,129,196,160,0,0,0                ; add           $0xa0,%rsp
1412  DB  91                                  ; pop           %rbx
1413  DB  95                                  ; pop           %rdi
1414  DB  94                                  ; pop           %rsi
1415  DB  65,92                               ; pop           %r12
1416  DB  65,93                               ; pop           %r13
1417  DB  65,94                               ; pop           %r14
1418  DB  65,95                               ; pop           %r15
1419  DB  197,248,119                         ; vzeroupper
1420  DB  195                                 ; retq
1421
1422PUBLIC _sk_just_return_avx
1423_sk_just_return_avx LABEL PROC
1424  DB  195                                 ; retq
1425
1426PUBLIC _sk_seed_shader_avx
1427_sk_seed_shader_avx LABEL PROC
1428  DB  72,173                              ; lods          %ds:(%rsi),%rax
1429  DB  197,249,110,199                     ; vmovd         %edi,%xmm0
1430  DB  197,249,112,192,0                   ; vpshufd       $0x0,%xmm0,%xmm0
1431  DB  196,227,125,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
1432  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
1433  DB  65,184,0,0,0,63                     ; mov           $0x3f000000,%r8d
1434  DB  196,193,121,110,200                 ; vmovd         %r8d,%xmm1
1435  DB  196,227,121,4,201,0                 ; vpermilps     $0x0,%xmm1,%xmm1
1436  DB  196,227,117,24,201,1                ; vinsertf128   $0x1,%xmm1,%ymm1,%ymm1
1437  DB  197,252,88,193                      ; vaddps        %ymm1,%ymm0,%ymm0
1438  DB  197,252,88,2                        ; vaddps        (%rdx),%ymm0,%ymm0
1439  DB  196,226,125,24,16                   ; vbroadcastss  (%rax),%ymm2
1440  DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
1441  DB  197,236,88,201                      ; vaddps        %ymm1,%ymm2,%ymm1
1442  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
1443  DB  197,249,110,208                     ; vmovd         %eax,%xmm2
1444  DB  196,227,121,4,210,0                 ; vpermilps     $0x0,%xmm2,%xmm2
1445  DB  196,227,109,24,210,1                ; vinsertf128   $0x1,%xmm2,%ymm2,%ymm2
1446  DB  72,173                              ; lods          %ds:(%rsi),%rax
1447  DB  197,228,87,219                      ; vxorps        %ymm3,%ymm3,%ymm3
1448  DB  197,220,87,228                      ; vxorps        %ymm4,%ymm4,%ymm4
1449  DB  197,212,87,237                      ; vxorps        %ymm5,%ymm5,%ymm5
1450  DB  197,204,87,246                      ; vxorps        %ymm6,%ymm6,%ymm6
1451  DB  197,196,87,255                      ; vxorps        %ymm7,%ymm7,%ymm7
1452  DB  255,224                             ; jmpq          *%rax
1453
1454PUBLIC _sk_constant_color_avx
1455_sk_constant_color_avx LABEL PROC
1456  DB  72,173                              ; lods          %ds:(%rsi),%rax
1457  DB  196,226,125,24,0                    ; vbroadcastss  (%rax),%ymm0
1458  DB  196,226,125,24,72,4                 ; vbroadcastss  0x4(%rax),%ymm1
1459  DB  196,226,125,24,80,8                 ; vbroadcastss  0x8(%rax),%ymm2
1460  DB  196,226,125,24,88,12                ; vbroadcastss  0xc(%rax),%ymm3
1461  DB  72,173                              ; lods          %ds:(%rsi),%rax
1462  DB  255,224                             ; jmpq          *%rax
1463
1464PUBLIC _sk_clear_avx
1465_sk_clear_avx LABEL PROC
1466  DB  72,173                              ; lods          %ds:(%rsi),%rax
1467  DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
1468  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
1469  DB  197,236,87,210                      ; vxorps        %ymm2,%ymm2,%ymm2
1470  DB  197,228,87,219                      ; vxorps        %ymm3,%ymm3,%ymm3
1471  DB  255,224                             ; jmpq          *%rax
1472
1473PUBLIC _sk_plus__avx
1474_sk_plus__avx LABEL PROC
1475  DB  197,252,88,196                      ; vaddps        %ymm4,%ymm0,%ymm0
1476  DB  197,244,88,205                      ; vaddps        %ymm5,%ymm1,%ymm1
1477  DB  197,236,88,214                      ; vaddps        %ymm6,%ymm2,%ymm2
1478  DB  197,228,88,223                      ; vaddps        %ymm7,%ymm3,%ymm3
1479  DB  72,173                              ; lods          %ds:(%rsi),%rax
1480  DB  255,224                             ; jmpq          *%rax
1481
1482PUBLIC _sk_srcover_avx
1483_sk_srcover_avx LABEL PROC
1484  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
1485  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
1486  DB  196,67,121,4,192,0                  ; vpermilps     $0x0,%xmm8,%xmm8
1487  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
1488  DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
1489  DB  197,60,89,204                       ; vmulps        %ymm4,%ymm8,%ymm9
1490  DB  197,180,88,192                      ; vaddps        %ymm0,%ymm9,%ymm0
1491  DB  197,60,89,205                       ; vmulps        %ymm5,%ymm8,%ymm9
1492  DB  197,180,88,201                      ; vaddps        %ymm1,%ymm9,%ymm1
1493  DB  197,60,89,206                       ; vmulps        %ymm6,%ymm8,%ymm9
1494  DB  197,180,88,210                      ; vaddps        %ymm2,%ymm9,%ymm2
1495  DB  197,60,89,199                       ; vmulps        %ymm7,%ymm8,%ymm8
1496  DB  197,188,88,219                      ; vaddps        %ymm3,%ymm8,%ymm3
1497  DB  72,173                              ; lods          %ds:(%rsi),%rax
1498  DB  255,224                             ; jmpq          *%rax
1499
1500PUBLIC _sk_dstover_avx
1501_sk_dstover_avx LABEL PROC
1502  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
1503  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
1504  DB  196,67,121,4,192,0                  ; vpermilps     $0x0,%xmm8,%xmm8
1505  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
1506  DB  197,60,92,199                       ; vsubps        %ymm7,%ymm8,%ymm8
1507  DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
1508  DB  197,252,88,196                      ; vaddps        %ymm4,%ymm0,%ymm0
1509  DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
1510  DB  197,244,88,205                      ; vaddps        %ymm5,%ymm1,%ymm1
1511  DB  197,188,89,210                      ; vmulps        %ymm2,%ymm8,%ymm2
1512  DB  197,236,88,214                      ; vaddps        %ymm6,%ymm2,%ymm2
1513  DB  197,188,89,219                      ; vmulps        %ymm3,%ymm8,%ymm3
1514  DB  197,228,88,223                      ; vaddps        %ymm7,%ymm3,%ymm3
1515  DB  72,173                              ; lods          %ds:(%rsi),%rax
1516  DB  255,224                             ; jmpq          *%rax
1517
1518PUBLIC _sk_clamp_0_avx
1519_sk_clamp_0_avx LABEL PROC
1520  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
1521  DB  196,193,124,95,192                  ; vmaxps        %ymm8,%ymm0,%ymm0
1522  DB  196,193,116,95,200                  ; vmaxps        %ymm8,%ymm1,%ymm1
1523  DB  196,193,108,95,208                  ; vmaxps        %ymm8,%ymm2,%ymm2
1524  DB  196,193,100,95,216                  ; vmaxps        %ymm8,%ymm3,%ymm3
1525  DB  72,173                              ; lods          %ds:(%rsi),%rax
1526  DB  255,224                             ; jmpq          *%rax
1527
1528PUBLIC _sk_clamp_1_avx
1529_sk_clamp_1_avx LABEL PROC
1530  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
1531  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
1532  DB  196,67,121,4,192,0                  ; vpermilps     $0x0,%xmm8,%xmm8
1533  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
1534  DB  196,193,124,93,192                  ; vminps        %ymm8,%ymm0,%ymm0
1535  DB  196,193,116,93,200                  ; vminps        %ymm8,%ymm1,%ymm1
1536  DB  196,193,108,93,208                  ; vminps        %ymm8,%ymm2,%ymm2
1537  DB  196,193,100,93,216                  ; vminps        %ymm8,%ymm3,%ymm3
1538  DB  72,173                              ; lods          %ds:(%rsi),%rax
1539  DB  255,224                             ; jmpq          *%rax
1540
1541PUBLIC _sk_clamp_a_avx
1542_sk_clamp_a_avx LABEL PROC
1543  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
1544  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
1545  DB  196,67,121,4,192,0                  ; vpermilps     $0x0,%xmm8,%xmm8
1546  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
1547  DB  196,193,100,93,216                  ; vminps        %ymm8,%ymm3,%ymm3
1548  DB  197,252,93,195                      ; vminps        %ymm3,%ymm0,%ymm0
1549  DB  197,244,93,203                      ; vminps        %ymm3,%ymm1,%ymm1
1550  DB  197,236,93,211                      ; vminps        %ymm3,%ymm2,%ymm2
1551  DB  72,173                              ; lods          %ds:(%rsi),%rax
1552  DB  255,224                             ; jmpq          *%rax
1553
1554PUBLIC _sk_set_rgb_avx
1555_sk_set_rgb_avx LABEL PROC
1556  DB  72,173                              ; lods          %ds:(%rsi),%rax
1557  DB  196,226,125,24,0                    ; vbroadcastss  (%rax),%ymm0
1558  DB  196,226,125,24,72,4                 ; vbroadcastss  0x4(%rax),%ymm1
1559  DB  196,226,125,24,80,8                 ; vbroadcastss  0x8(%rax),%ymm2
1560  DB  72,173                              ; lods          %ds:(%rsi),%rax
1561  DB  255,224                             ; jmpq          *%rax
1562
1563PUBLIC _sk_swap_rb_avx
1564_sk_swap_rb_avx LABEL PROC
1565  DB  197,124,40,192                      ; vmovaps       %ymm0,%ymm8
1566  DB  72,173                              ; lods          %ds:(%rsi),%rax
1567  DB  197,252,40,194                      ; vmovaps       %ymm2,%ymm0
1568  DB  197,124,41,194                      ; vmovaps       %ymm8,%ymm2
1569  DB  255,224                             ; jmpq          *%rax
1570
1571PUBLIC _sk_swap_avx
1572_sk_swap_avx LABEL PROC
1573  DB  197,124,40,195                      ; vmovaps       %ymm3,%ymm8
1574  DB  197,124,40,202                      ; vmovaps       %ymm2,%ymm9
1575  DB  197,124,40,209                      ; vmovaps       %ymm1,%ymm10
1576  DB  197,124,40,216                      ; vmovaps       %ymm0,%ymm11
1577  DB  72,173                              ; lods          %ds:(%rsi),%rax
1578  DB  197,252,40,196                      ; vmovaps       %ymm4,%ymm0
1579  DB  197,252,40,205                      ; vmovaps       %ymm5,%ymm1
1580  DB  197,252,40,214                      ; vmovaps       %ymm6,%ymm2
1581  DB  197,252,40,223                      ; vmovaps       %ymm7,%ymm3
1582  DB  197,124,41,220                      ; vmovaps       %ymm11,%ymm4
1583  DB  197,124,41,213                      ; vmovaps       %ymm10,%ymm5
1584  DB  197,124,41,206                      ; vmovaps       %ymm9,%ymm6
1585  DB  197,124,41,199                      ; vmovaps       %ymm8,%ymm7
1586  DB  255,224                             ; jmpq          *%rax
1587
1588PUBLIC _sk_move_src_dst_avx
1589_sk_move_src_dst_avx LABEL PROC
1590  DB  72,173                              ; lods          %ds:(%rsi),%rax
1591  DB  197,252,40,224                      ; vmovaps       %ymm0,%ymm4
1592  DB  197,252,40,233                      ; vmovaps       %ymm1,%ymm5
1593  DB  197,252,40,242                      ; vmovaps       %ymm2,%ymm6
1594  DB  197,252,40,251                      ; vmovaps       %ymm3,%ymm7
1595  DB  255,224                             ; jmpq          *%rax
1596
1597PUBLIC _sk_move_dst_src_avx
1598_sk_move_dst_src_avx LABEL PROC
1599  DB  72,173                              ; lods          %ds:(%rsi),%rax
1600  DB  197,252,40,196                      ; vmovaps       %ymm4,%ymm0
1601  DB  197,252,40,205                      ; vmovaps       %ymm5,%ymm1
1602  DB  197,252,40,214                      ; vmovaps       %ymm6,%ymm2
1603  DB  197,252,40,223                      ; vmovaps       %ymm7,%ymm3
1604  DB  255,224                             ; jmpq          *%rax
1605
1606PUBLIC _sk_premul_avx
1607_sk_premul_avx LABEL PROC
1608  DB  197,252,89,195                      ; vmulps        %ymm3,%ymm0,%ymm0
1609  DB  197,244,89,203                      ; vmulps        %ymm3,%ymm1,%ymm1
1610  DB  197,236,89,211                      ; vmulps        %ymm3,%ymm2,%ymm2
1611  DB  72,173                              ; lods          %ds:(%rsi),%rax
1612  DB  255,224                             ; jmpq          *%rax
1613
1614PUBLIC _sk_unpremul_avx
1615_sk_unpremul_avx LABEL PROC
1616  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
1617  DB  196,65,100,194,200,0                ; vcmpeqps      %ymm8,%ymm3,%ymm9
1618  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
1619  DB  197,121,110,208                     ; vmovd         %eax,%xmm10
1620  DB  196,67,121,4,210,0                  ; vpermilps     $0x0,%xmm10,%xmm10
1621  DB  196,67,45,24,210,1                  ; vinsertf128   $0x1,%xmm10,%ymm10,%ymm10
1622  DB  197,44,94,211                       ; vdivps        %ymm3,%ymm10,%ymm10
1623  DB  196,67,45,74,192,144                ; vblendvps     %ymm9,%ymm8,%ymm10,%ymm8
1624  DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
1625  DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
1626  DB  197,188,89,210                      ; vmulps        %ymm2,%ymm8,%ymm2
1627  DB  72,173                              ; lods          %ds:(%rsi),%rax
1628  DB  255,224                             ; jmpq          *%rax
1629
1630PUBLIC _sk_from_srgb_avx
1631_sk_from_srgb_avx LABEL PROC
1632  DB  184,145,131,158,61                  ; mov           $0x3d9e8391,%eax
1633  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
1634  DB  196,67,121,4,192,0                  ; vpermilps     $0x0,%xmm8,%xmm8
1635  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
1636  DB  197,60,89,200                       ; vmulps        %ymm0,%ymm8,%ymm9
1637  DB  197,124,89,208                      ; vmulps        %ymm0,%ymm0,%ymm10
1638  DB  184,154,153,153,62                  ; mov           $0x3e99999a,%eax
1639  DB  197,121,110,216                     ; vmovd         %eax,%xmm11
1640  DB  196,67,121,4,219,0                  ; vpermilps     $0x0,%xmm11,%xmm11
1641  DB  196,67,37,24,219,1                  ; vinsertf128   $0x1,%xmm11,%ymm11,%ymm11
1642  DB  184,92,143,50,63                    ; mov           $0x3f328f5c,%eax
1643  DB  197,121,110,224                     ; vmovd         %eax,%xmm12
1644  DB  196,67,121,4,228,0                  ; vpermilps     $0x0,%xmm12,%xmm12
1645  DB  196,67,29,24,228,1                  ; vinsertf128   $0x1,%xmm12,%ymm12,%ymm12
1646  DB  197,36,89,232                       ; vmulps        %ymm0,%ymm11,%ymm13
1647  DB  196,65,20,88,236                    ; vaddps        %ymm12,%ymm13,%ymm13
1648  DB  184,10,215,35,59                    ; mov           $0x3b23d70a,%eax
1649  DB  197,121,110,240                     ; vmovd         %eax,%xmm14
1650  DB  196,67,121,4,246,0                  ; vpermilps     $0x0,%xmm14,%xmm14
1651  DB  196,67,13,24,246,1                  ; vinsertf128   $0x1,%xmm14,%ymm14,%ymm14
1652  DB  196,65,44,89,213                    ; vmulps        %ymm13,%ymm10,%ymm10
1653  DB  196,65,12,88,210                    ; vaddps        %ymm10,%ymm14,%ymm10
1654  DB  184,174,71,97,61                    ; mov           $0x3d6147ae,%eax
1655  DB  197,121,110,232                     ; vmovd         %eax,%xmm13
1656  DB  196,67,121,4,237,0                  ; vpermilps     $0x0,%xmm13,%xmm13
1657  DB  196,67,21,24,237,1                  ; vinsertf128   $0x1,%xmm13,%ymm13,%ymm13
1658  DB  196,193,124,194,197,1               ; vcmpltps      %ymm13,%ymm0,%ymm0
1659  DB  196,195,45,74,193,0                 ; vblendvps     %ymm0,%ymm9,%ymm10,%ymm0
1660  DB  197,60,89,201                       ; vmulps        %ymm1,%ymm8,%ymm9
1661  DB  197,116,89,209                      ; vmulps        %ymm1,%ymm1,%ymm10
1662  DB  197,36,89,249                       ; vmulps        %ymm1,%ymm11,%ymm15
1663  DB  196,65,28,88,255                    ; vaddps        %ymm15,%ymm12,%ymm15
1664  DB  196,65,44,89,215                    ; vmulps        %ymm15,%ymm10,%ymm10
1665  DB  196,65,12,88,210                    ; vaddps        %ymm10,%ymm14,%ymm10
1666  DB  196,193,116,194,205,1               ; vcmpltps      %ymm13,%ymm1,%ymm1
1667  DB  196,195,45,74,201,16                ; vblendvps     %ymm1,%ymm9,%ymm10,%ymm1
1668  DB  197,60,89,194                       ; vmulps        %ymm2,%ymm8,%ymm8
1669  DB  197,108,89,202                      ; vmulps        %ymm2,%ymm2,%ymm9
1670  DB  197,36,89,210                       ; vmulps        %ymm2,%ymm11,%ymm10
1671  DB  196,65,28,88,210                    ; vaddps        %ymm10,%ymm12,%ymm10
1672  DB  196,65,52,89,202                    ; vmulps        %ymm10,%ymm9,%ymm9
1673  DB  196,65,12,88,201                    ; vaddps        %ymm9,%ymm14,%ymm9
1674  DB  196,193,108,194,213,1               ; vcmpltps      %ymm13,%ymm2,%ymm2
1675  DB  196,195,53,74,208,32                ; vblendvps     %ymm2,%ymm8,%ymm9,%ymm2
1676  DB  72,173                              ; lods          %ds:(%rsi),%rax
1677  DB  255,224                             ; jmpq          *%rax
1678
1679PUBLIC _sk_to_srgb_avx
1680_sk_to_srgb_avx LABEL PROC
1681  DB  197,124,82,192                      ; vrsqrtps      %ymm0,%ymm8
1682  DB  196,65,124,83,232                   ; vrcpps        %ymm8,%ymm13
1683  DB  196,65,124,82,240                   ; vrsqrtps      %ymm8,%ymm14
1684  DB  184,41,92,71,65                     ; mov           $0x41475c29,%eax
1685  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
1686  DB  196,67,121,4,192,0                  ; vpermilps     $0x0,%xmm8,%xmm8
1687  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
1688  DB  197,60,89,224                       ; vmulps        %ymm0,%ymm8,%ymm12
1689  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
1690  DB  197,121,110,200                     ; vmovd         %eax,%xmm9
1691  DB  196,67,121,4,201,0                  ; vpermilps     $0x0,%xmm9,%xmm9
1692  DB  196,67,53,24,201,1                  ; vinsertf128   $0x1,%xmm9,%ymm9,%ymm9
1693  DB  184,194,135,210,62                  ; mov           $0x3ed287c2,%eax
1694  DB  197,121,110,208                     ; vmovd         %eax,%xmm10
1695  DB  196,67,121,4,210,0                  ; vpermilps     $0x0,%xmm10,%xmm10
1696  DB  196,67,45,24,210,1                  ; vinsertf128   $0x1,%xmm10,%ymm10,%ymm10
1697  DB  184,206,111,48,63                   ; mov           $0x3f306fce,%eax
1698  DB  197,121,110,216                     ; vmovd         %eax,%xmm11
1699  DB  196,67,121,4,219,0                  ; vpermilps     $0x0,%xmm11,%xmm11
1700  DB  196,67,37,24,219,1                  ; vinsertf128   $0x1,%xmm11,%ymm11,%ymm11
1701  DB  184,168,87,202,61                   ; mov           $0x3dca57a8,%eax
1702  DB  53,0,0,0,128                        ; xor           $0x80000000,%eax
1703  DB  197,121,110,248                     ; vmovd         %eax,%xmm15
1704  DB  196,67,121,4,255,0                  ; vpermilps     $0x0,%xmm15,%xmm15
1705  DB  196,67,5,24,255,1                   ; vinsertf128   $0x1,%xmm15,%ymm15,%ymm15
1706  DB  196,65,20,89,235                    ; vmulps        %ymm11,%ymm13,%ymm13
1707  DB  196,65,20,88,239                    ; vaddps        %ymm15,%ymm13,%ymm13
1708  DB  196,65,12,89,242                    ; vmulps        %ymm10,%ymm14,%ymm14
1709  DB  196,65,12,88,237                    ; vaddps        %ymm13,%ymm14,%ymm13
1710  DB  196,65,52,93,237                    ; vminps        %ymm13,%ymm9,%ymm13
1711  DB  184,4,231,140,59                    ; mov           $0x3b8ce704,%eax
1712  DB  197,121,110,240                     ; vmovd         %eax,%xmm14
1713  DB  196,67,121,4,246,0                  ; vpermilps     $0x0,%xmm14,%xmm14
1714  DB  196,67,13,24,246,1                  ; vinsertf128   $0x1,%xmm14,%ymm14,%ymm14
1715  DB  196,193,124,194,198,1               ; vcmpltps      %ymm14,%ymm0,%ymm0
1716  DB  196,195,21,74,196,0                 ; vblendvps     %ymm0,%ymm12,%ymm13,%ymm0
1717  DB  197,124,82,225                      ; vrsqrtps      %ymm1,%ymm12
1718  DB  196,65,124,83,236                   ; vrcpps        %ymm12,%ymm13
1719  DB  196,65,124,82,228                   ; vrsqrtps      %ymm12,%ymm12
1720  DB  196,65,36,89,237                    ; vmulps        %ymm13,%ymm11,%ymm13
1721  DB  196,65,4,88,237                     ; vaddps        %ymm13,%ymm15,%ymm13
1722  DB  196,65,44,89,228                    ; vmulps        %ymm12,%ymm10,%ymm12
1723  DB  196,65,28,88,229                    ; vaddps        %ymm13,%ymm12,%ymm12
1724  DB  197,60,89,233                       ; vmulps        %ymm1,%ymm8,%ymm13
1725  DB  196,65,52,93,228                    ; vminps        %ymm12,%ymm9,%ymm12
1726  DB  196,193,116,194,206,1               ; vcmpltps      %ymm14,%ymm1,%ymm1
1727  DB  196,195,29,74,205,16                ; vblendvps     %ymm1,%ymm13,%ymm12,%ymm1
1728  DB  197,124,82,226                      ; vrsqrtps      %ymm2,%ymm12
1729  DB  196,65,124,83,236                   ; vrcpps        %ymm12,%ymm13
1730  DB  196,65,36,89,221                    ; vmulps        %ymm13,%ymm11,%ymm11
1731  DB  196,65,4,88,219                     ; vaddps        %ymm11,%ymm15,%ymm11
1732  DB  196,65,124,82,228                   ; vrsqrtps      %ymm12,%ymm12
1733  DB  196,65,44,89,212                    ; vmulps        %ymm12,%ymm10,%ymm10
1734  DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
1735  DB  196,65,52,93,202                    ; vminps        %ymm10,%ymm9,%ymm9
1736  DB  197,60,89,194                       ; vmulps        %ymm2,%ymm8,%ymm8
1737  DB  196,193,108,194,214,1               ; vcmpltps      %ymm14,%ymm2,%ymm2
1738  DB  196,195,53,74,208,32                ; vblendvps     %ymm2,%ymm8,%ymm9,%ymm2
1739  DB  72,173                              ; lods          %ds:(%rsi),%rax
1740  DB  255,224                             ; jmpq          *%rax
1741
1742PUBLIC _sk_scale_1_float_avx
1743_sk_scale_1_float_avx LABEL PROC
1744  DB  72,173                              ; lods          %ds:(%rsi),%rax
1745  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
1746  DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
1747  DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
1748  DB  197,188,89,210                      ; vmulps        %ymm2,%ymm8,%ymm2
1749  DB  197,188,89,219                      ; vmulps        %ymm3,%ymm8,%ymm3
1750  DB  72,173                              ; lods          %ds:(%rsi),%rax
1751  DB  255,224                             ; jmpq          *%rax
1752
1753PUBLIC _sk_scale_u8_avx
1754_sk_scale_u8_avx LABEL PROC
1755  DB  73,137,200                          ; mov           %rcx,%r8
1756  DB  72,173                              ; lods          %ds:(%rsi),%rax
1757  DB  72,139,0                            ; mov           (%rax),%rax
1758  DB  72,1,248                            ; add           %rdi,%rax
1759  DB  77,133,192                          ; test          %r8,%r8
1760  DB  117,80                              ; jne           639 <_sk_scale_u8_avx+0x60>
1761  DB  197,122,126,0                       ; vmovq         (%rax),%xmm8
1762  DB  196,66,121,49,200                   ; vpmovzxbd     %xmm8,%xmm9
1763  DB  196,67,121,4,192,229                ; vpermilps     $0xe5,%xmm8,%xmm8
1764  DB  196,66,121,49,192                   ; vpmovzxbd     %xmm8,%xmm8
1765  DB  196,67,53,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm9,%ymm8
1766  DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
1767  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
1768  DB  197,121,110,200                     ; vmovd         %eax,%xmm9
1769  DB  196,67,121,4,201,0                  ; vpermilps     $0x0,%xmm9,%xmm9
1770  DB  196,67,53,24,201,1                  ; vinsertf128   $0x1,%xmm9,%ymm9,%ymm9
1771  DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
1772  DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
1773  DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
1774  DB  197,188,89,210                      ; vmulps        %ymm2,%ymm8,%ymm2
1775  DB  197,188,89,219                      ; vmulps        %ymm3,%ymm8,%ymm3
1776  DB  72,173                              ; lods          %ds:(%rsi),%rax
1777  DB  76,137,193                          ; mov           %r8,%rcx
1778  DB  255,224                             ; jmpq          *%rax
1779  DB  49,201                              ; xor           %ecx,%ecx
1780  DB  77,137,194                          ; mov           %r8,%r10
1781  DB  69,49,201                           ; xor           %r9d,%r9d
1782  DB  68,15,182,24                        ; movzbl        (%rax),%r11d
1783  DB  72,255,192                          ; inc           %rax
1784  DB  73,211,227                          ; shl           %cl,%r11
1785  DB  77,9,217                            ; or            %r11,%r9
1786  DB  72,131,193,8                        ; add           $0x8,%rcx
1787  DB  73,255,202                          ; dec           %r10
1788  DB  117,234                             ; jne           641 <_sk_scale_u8_avx+0x68>
1789  DB  196,65,249,110,193                  ; vmovq         %r9,%xmm8
1790  DB  235,143                             ; jmp           5ed <_sk_scale_u8_avx+0x14>
1791
1792PUBLIC _sk_lerp_1_float_avx
1793_sk_lerp_1_float_avx LABEL PROC
1794  DB  72,173                              ; lods          %ds:(%rsi),%rax
1795  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
1796  DB  197,252,92,196                      ; vsubps        %ymm4,%ymm0,%ymm0
1797  DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
1798  DB  197,252,88,196                      ; vaddps        %ymm4,%ymm0,%ymm0
1799  DB  197,244,92,205                      ; vsubps        %ymm5,%ymm1,%ymm1
1800  DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
1801  DB  197,244,88,205                      ; vaddps        %ymm5,%ymm1,%ymm1
1802  DB  197,236,92,214                      ; vsubps        %ymm6,%ymm2,%ymm2
1803  DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
1804  DB  197,236,88,214                      ; vaddps        %ymm6,%ymm2,%ymm2
1805  DB  197,228,92,223                      ; vsubps        %ymm7,%ymm3,%ymm3
1806  DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
1807  DB  197,228,88,223                      ; vaddps        %ymm7,%ymm3,%ymm3
1808  DB  72,173                              ; lods          %ds:(%rsi),%rax
1809  DB  255,224                             ; jmpq          *%rax
1810
1811PUBLIC _sk_lerp_u8_avx
1812_sk_lerp_u8_avx LABEL PROC
1813  DB  73,137,200                          ; mov           %rcx,%r8
1814  DB  72,173                              ; lods          %ds:(%rsi),%rax
1815  DB  72,139,0                            ; mov           (%rax),%rax
1816  DB  72,1,248                            ; add           %rdi,%rax
1817  DB  77,133,192                          ; test          %r8,%r8
1818  DB  117,116                             ; jne           721 <_sk_lerp_u8_avx+0x84>
1819  DB  197,122,126,0                       ; vmovq         (%rax),%xmm8
1820  DB  196,66,121,49,200                   ; vpmovzxbd     %xmm8,%xmm9
1821  DB  196,67,121,4,192,229                ; vpermilps     $0xe5,%xmm8,%xmm8
1822  DB  196,66,121,49,192                   ; vpmovzxbd     %xmm8,%xmm8
1823  DB  196,67,53,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm9,%ymm8
1824  DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
1825  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
1826  DB  197,121,110,200                     ; vmovd         %eax,%xmm9
1827  DB  196,67,121,4,201,0                  ; vpermilps     $0x0,%xmm9,%xmm9
1828  DB  196,67,53,24,201,1                  ; vinsertf128   $0x1,%xmm9,%ymm9,%ymm9
1829  DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
1830  DB  197,252,92,196                      ; vsubps        %ymm4,%ymm0,%ymm0
1831  DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
1832  DB  197,252,88,196                      ; vaddps        %ymm4,%ymm0,%ymm0
1833  DB  197,244,92,205                      ; vsubps        %ymm5,%ymm1,%ymm1
1834  DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
1835  DB  197,244,88,205                      ; vaddps        %ymm5,%ymm1,%ymm1
1836  DB  197,236,92,214                      ; vsubps        %ymm6,%ymm2,%ymm2
1837  DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
1838  DB  197,236,88,214                      ; vaddps        %ymm6,%ymm2,%ymm2
1839  DB  197,228,92,223                      ; vsubps        %ymm7,%ymm3,%ymm3
1840  DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
1841  DB  197,228,88,223                      ; vaddps        %ymm7,%ymm3,%ymm3
1842  DB  72,173                              ; lods          %ds:(%rsi),%rax
1843  DB  76,137,193                          ; mov           %r8,%rcx
1844  DB  255,224                             ; jmpq          *%rax
1845  DB  49,201                              ; xor           %ecx,%ecx
1846  DB  77,137,194                          ; mov           %r8,%r10
1847  DB  69,49,201                           ; xor           %r9d,%r9d
1848  DB  68,15,182,24                        ; movzbl        (%rax),%r11d
1849  DB  72,255,192                          ; inc           %rax
1850  DB  73,211,227                          ; shl           %cl,%r11
1851  DB  77,9,217                            ; or            %r11,%r9
1852  DB  72,131,193,8                        ; add           $0x8,%rcx
1853  DB  73,255,202                          ; dec           %r10
1854  DB  117,234                             ; jne           729 <_sk_lerp_u8_avx+0x8c>
1855  DB  196,65,249,110,193                  ; vmovq         %r9,%xmm8
1856  DB  233,104,255,255,255                 ; jmpq          6b1 <_sk_lerp_u8_avx+0x14>
1857
1858PUBLIC _sk_lerp_565_avx
1859_sk_lerp_565_avx LABEL PROC
1860  DB  72,173                              ; lods          %ds:(%rsi),%rax
1861  DB  76,139,16                           ; mov           (%rax),%r10
1862  DB  72,133,201                          ; test          %rcx,%rcx
1863  DB  15,133,250,0,0,0                    ; jne           851 <_sk_lerp_565_avx+0x108>
1864  DB  196,65,122,111,4,122                ; vmovdqu       (%r10,%rdi,2),%xmm8
1865  DB  197,225,239,219                     ; vpxor         %xmm3,%xmm3,%xmm3
1866  DB  197,185,105,219                     ; vpunpckhwd    %xmm3,%xmm8,%xmm3
1867  DB  196,66,121,51,192                   ; vpmovzxwd     %xmm8,%xmm8
1868  DB  196,99,61,24,195,1                  ; vinsertf128   $0x1,%xmm3,%ymm8,%ymm8
1869  DB  184,0,248,0,0                       ; mov           $0xf800,%eax
1870  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
1871  DB  197,249,112,219,0                   ; vpshufd       $0x0,%xmm3,%xmm3
1872  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
1873  DB  196,193,100,84,216                  ; vandps        %ymm8,%ymm3,%ymm3
1874  DB  197,124,91,203                      ; vcvtdq2ps     %ymm3,%ymm9
1875  DB  184,8,33,132,55                     ; mov           $0x37842108,%eax
1876  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
1877  DB  196,227,121,4,219,0                 ; vpermilps     $0x0,%xmm3,%xmm3
1878  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
1879  DB  197,52,89,203                       ; vmulps        %ymm3,%ymm9,%ymm9
1880  DB  184,224,7,0,0                       ; mov           $0x7e0,%eax
1881  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
1882  DB  197,249,112,219,0                   ; vpshufd       $0x0,%xmm3,%xmm3
1883  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
1884  DB  196,193,100,84,216                  ; vandps        %ymm8,%ymm3,%ymm3
1885  DB  197,124,91,211                      ; vcvtdq2ps     %ymm3,%ymm10
1886  DB  184,33,8,2,58                       ; mov           $0x3a020821,%eax
1887  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
1888  DB  196,227,121,4,219,0                 ; vpermilps     $0x0,%xmm3,%xmm3
1889  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
1890  DB  197,44,89,211                       ; vmulps        %ymm3,%ymm10,%ymm10
1891  DB  184,31,0,0,0                        ; mov           $0x1f,%eax
1892  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
1893  DB  197,249,112,219,0                   ; vpshufd       $0x0,%xmm3,%xmm3
1894  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
1895  DB  196,193,100,84,216                  ; vandps        %ymm8,%ymm3,%ymm3
1896  DB  197,124,91,195                      ; vcvtdq2ps     %ymm3,%ymm8
1897  DB  184,8,33,4,61                       ; mov           $0x3d042108,%eax
1898  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
1899  DB  196,227,121,4,219,0                 ; vpermilps     $0x0,%xmm3,%xmm3
1900  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
1901  DB  197,188,89,219                      ; vmulps        %ymm3,%ymm8,%ymm3
1902  DB  197,252,92,196                      ; vsubps        %ymm4,%ymm0,%ymm0
1903  DB  196,193,124,89,193                  ; vmulps        %ymm9,%ymm0,%ymm0
1904  DB  197,252,88,196                      ; vaddps        %ymm4,%ymm0,%ymm0
1905  DB  197,244,92,205                      ; vsubps        %ymm5,%ymm1,%ymm1
1906  DB  196,193,116,89,202                  ; vmulps        %ymm10,%ymm1,%ymm1
1907  DB  197,244,88,205                      ; vaddps        %ymm5,%ymm1,%ymm1
1908  DB  197,236,92,214                      ; vsubps        %ymm6,%ymm2,%ymm2
1909  DB  197,236,89,211                      ; vmulps        %ymm3,%ymm2,%ymm2
1910  DB  197,236,88,214                      ; vaddps        %ymm6,%ymm2,%ymm2
1911  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
1912  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
1913  DB  196,227,121,4,219,0                 ; vpermilps     $0x0,%xmm3,%xmm3
1914  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
1915  DB  72,173                              ; lods          %ds:(%rsi),%rax
1916  DB  255,224                             ; jmpq          *%rax
1917  DB  65,137,200                          ; mov           %ecx,%r8d
1918  DB  65,128,224,7                        ; and           $0x7,%r8b
1919  DB  196,65,57,239,192                   ; vpxor         %xmm8,%xmm8,%xmm8
1920  DB  65,254,200                          ; dec           %r8b
1921  DB  65,128,248,6                        ; cmp           $0x6,%r8b
1922  DB  15,135,243,254,255,255              ; ja            75d <_sk_lerp_565_avx+0x14>
1923  DB  69,15,182,192                       ; movzbl        %r8b,%r8d
1924  DB  76,141,13,75,0,0,0                  ; lea           0x4b(%rip),%r9        # 8c0 <_sk_lerp_565_avx+0x177>
1925  DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
1926  DB  76,1,200                            ; add           %r9,%rax
1927  DB  255,224                             ; jmpq          *%rax
1928  DB  197,225,239,219                     ; vpxor         %xmm3,%xmm3,%xmm3
1929  DB  196,65,97,196,68,122,12,6           ; vpinsrw       $0x6,0xc(%r10,%rdi,2),%xmm3,%xmm8
1930  DB  196,65,57,196,68,122,10,5           ; vpinsrw       $0x5,0xa(%r10,%rdi,2),%xmm8,%xmm8
1931  DB  196,65,57,196,68,122,8,4            ; vpinsrw       $0x4,0x8(%r10,%rdi,2),%xmm8,%xmm8
1932  DB  196,65,57,196,68,122,6,3            ; vpinsrw       $0x3,0x6(%r10,%rdi,2),%xmm8,%xmm8
1933  DB  196,65,57,196,68,122,4,2            ; vpinsrw       $0x2,0x4(%r10,%rdi,2),%xmm8,%xmm8
1934  DB  196,65,57,196,68,122,2,1            ; vpinsrw       $0x1,0x2(%r10,%rdi,2),%xmm8,%xmm8
1935  DB  196,65,57,196,4,122,0               ; vpinsrw       $0x0,(%r10,%rdi,2),%xmm8,%xmm8
1936  DB  233,159,254,255,255                 ; jmpq          75d <_sk_lerp_565_avx+0x14>
1937  DB  102,144                             ; xchg          %ax,%ax
1938  DB  242,255                             ; repnz         (bad)
1939  DB  255                                 ; (bad)
1940  DB  255                                 ; (bad)
1941  DB  234                                 ; (bad)
1942  DB  255                                 ; (bad)
1943  DB  255                                 ; (bad)
1944  DB  255,226                             ; jmpq          *%rdx
1945  DB  255                                 ; (bad)
1946  DB  255                                 ; (bad)
1947  DB  255                                 ; (bad)
1948  DB  218,255                             ; (bad)
1949  DB  255                                 ; (bad)
1950  DB  255,210                             ; callq         *%rdx
1951  DB  255                                 ; (bad)
1952  DB  255                                 ; (bad)
1953  DB  255,202                             ; dec           %edx
1954  DB  255                                 ; (bad)
1955  DB  255                                 ; (bad)
1956  DB  255                                 ; (bad)
1957  DB  190                                 ; .byte         0xbe
1958  DB  255                                 ; (bad)
1959  DB  255                                 ; (bad)
1960  DB  255                                 ; .byte         0xff
1961
1962PUBLIC _sk_load_tables_avx
1963_sk_load_tables_avx LABEL PROC
1964  DB  85                                  ; push          %rbp
1965  DB  65,87                               ; push          %r15
1966  DB  65,86                               ; push          %r14
1967  DB  65,85                               ; push          %r13
1968  DB  65,84                               ; push          %r12
1969  DB  83                                  ; push          %rbx
1970  DB  72,173                              ; lods          %ds:(%rsi),%rax
1971  DB  76,139,0                            ; mov           (%rax),%r8
1972  DB  72,133,201                          ; test          %rcx,%rcx
1973  DB  15,133,56,2,0,0                     ; jne           b2c <_sk_load_tables_avx+0x250>
1974  DB  196,65,124,16,4,184                 ; vmovups       (%r8,%rdi,4),%ymm8
1975  DB  187,255,0,0,0                       ; mov           $0xff,%ebx
1976  DB  197,249,110,195                     ; vmovd         %ebx,%xmm0
1977  DB  197,249,112,192,0                   ; vpshufd       $0x0,%xmm0,%xmm0
1978  DB  196,99,125,24,200,1                 ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm9
1979  DB  196,193,52,84,192                   ; vandps        %ymm8,%ymm9,%ymm0
1980  DB  196,193,249,126,193                 ; vmovq         %xmm0,%r9
1981  DB  69,137,203                          ; mov           %r9d,%r11d
1982  DB  196,195,249,22,194,1                ; vpextrq       $0x1,%xmm0,%r10
1983  DB  69,137,214                          ; mov           %r10d,%r14d
1984  DB  73,193,234,32                       ; shr           $0x20,%r10
1985  DB  73,193,233,32                       ; shr           $0x20,%r9
1986  DB  196,227,125,25,192,1                ; vextractf128  $0x1,%ymm0,%xmm0
1987  DB  196,193,249,126,196                 ; vmovq         %xmm0,%r12
1988  DB  69,137,231                          ; mov           %r12d,%r15d
1989  DB  196,227,249,22,195,1                ; vpextrq       $0x1,%xmm0,%rbx
1990  DB  65,137,221                          ; mov           %ebx,%r13d
1991  DB  72,193,235,32                       ; shr           $0x20,%rbx
1992  DB  73,193,236,32                       ; shr           $0x20,%r12
1993  DB  72,139,104,8                        ; mov           0x8(%rax),%rbp
1994  DB  76,139,64,16                        ; mov           0x10(%rax),%r8
1995  DB  196,161,122,16,68,189,0             ; vmovss        0x0(%rbp,%r15,4),%xmm0
1996  DB  196,163,121,33,68,165,0,16          ; vinsertps     $0x10,0x0(%rbp,%r12,4),%xmm0,%xmm0
1997  DB  196,161,122,16,76,173,0             ; vmovss        0x0(%rbp,%r13,4),%xmm1
1998  DB  196,227,121,33,193,32               ; vinsertps     $0x20,%xmm1,%xmm0,%xmm0
1999  DB  197,250,16,76,157,0                 ; vmovss        0x0(%rbp,%rbx,4),%xmm1
2000  DB  196,227,121,33,193,48               ; vinsertps     $0x30,%xmm1,%xmm0,%xmm0
2001  DB  196,161,122,16,76,157,0             ; vmovss        0x0(%rbp,%r11,4),%xmm1
2002  DB  196,163,113,33,76,141,0,16          ; vinsertps     $0x10,0x0(%rbp,%r9,4),%xmm1,%xmm1
2003  DB  196,161,122,16,92,181,0             ; vmovss        0x0(%rbp,%r14,4),%xmm3
2004  DB  196,227,113,33,203,32               ; vinsertps     $0x20,%xmm3,%xmm1,%xmm1
2005  DB  196,161,122,16,92,149,0             ; vmovss        0x0(%rbp,%r10,4),%xmm3
2006  DB  196,227,113,33,203,48               ; vinsertps     $0x30,%xmm3,%xmm1,%xmm1
2007  DB  196,227,117,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
2008  DB  196,193,113,114,208,8               ; vpsrld        $0x8,%xmm8,%xmm1
2009  DB  196,67,125,25,194,1                 ; vextractf128  $0x1,%ymm8,%xmm10
2010  DB  196,193,105,114,210,8               ; vpsrld        $0x8,%xmm10,%xmm2
2011  DB  196,227,117,24,202,1                ; vinsertf128   $0x1,%xmm2,%ymm1,%ymm1
2012  DB  197,180,84,201                      ; vandps        %ymm1,%ymm9,%ymm1
2013  DB  196,193,249,126,201                 ; vmovq         %xmm1,%r9
2014  DB  69,137,203                          ; mov           %r9d,%r11d
2015  DB  196,195,249,22,202,1                ; vpextrq       $0x1,%xmm1,%r10
2016  DB  69,137,214                          ; mov           %r10d,%r14d
2017  DB  73,193,234,32                       ; shr           $0x20,%r10
2018  DB  73,193,233,32                       ; shr           $0x20,%r9
2019  DB  196,227,125,25,201,1                ; vextractf128  $0x1,%ymm1,%xmm1
2020  DB  196,225,249,126,205                 ; vmovq         %xmm1,%rbp
2021  DB  65,137,239                          ; mov           %ebp,%r15d
2022  DB  196,227,249,22,203,1                ; vpextrq       $0x1,%xmm1,%rbx
2023  DB  65,137,220                          ; mov           %ebx,%r12d
2024  DB  72,193,235,32                       ; shr           $0x20,%rbx
2025  DB  72,193,237,32                       ; shr           $0x20,%rbp
2026  DB  196,129,122,16,12,184               ; vmovss        (%r8,%r15,4),%xmm1
2027  DB  196,195,113,33,12,168,16            ; vinsertps     $0x10,(%r8,%rbp,4),%xmm1,%xmm1
2028  DB  196,129,122,16,20,160               ; vmovss        (%r8,%r12,4),%xmm2
2029  DB  196,227,113,33,202,32               ; vinsertps     $0x20,%xmm2,%xmm1,%xmm1
2030  DB  196,193,122,16,20,152               ; vmovss        (%r8,%rbx,4),%xmm2
2031  DB  196,227,113,33,202,48               ; vinsertps     $0x30,%xmm2,%xmm1,%xmm1
2032  DB  196,129,122,16,20,152               ; vmovss        (%r8,%r11,4),%xmm2
2033  DB  196,131,105,33,20,136,16            ; vinsertps     $0x10,(%r8,%r9,4),%xmm2,%xmm2
2034  DB  196,129,122,16,28,176               ; vmovss        (%r8,%r14,4),%xmm3
2035  DB  196,227,105,33,211,32               ; vinsertps     $0x20,%xmm3,%xmm2,%xmm2
2036  DB  196,129,122,16,28,144               ; vmovss        (%r8,%r10,4),%xmm3
2037  DB  196,227,105,33,211,48               ; vinsertps     $0x30,%xmm3,%xmm2,%xmm2
2038  DB  196,227,109,24,201,1                ; vinsertf128   $0x1,%xmm1,%ymm2,%ymm1
2039  DB  72,139,64,24                        ; mov           0x18(%rax),%rax
2040  DB  196,193,105,114,208,16              ; vpsrld        $0x10,%xmm8,%xmm2
2041  DB  196,193,97,114,210,16               ; vpsrld        $0x10,%xmm10,%xmm3
2042  DB  196,227,109,24,211,1                ; vinsertf128   $0x1,%xmm3,%ymm2,%ymm2
2043  DB  197,180,84,210                      ; vandps        %ymm2,%ymm9,%ymm2
2044  DB  196,193,249,126,208                 ; vmovq         %xmm2,%r8
2045  DB  69,137,194                          ; mov           %r8d,%r10d
2046  DB  196,195,249,22,209,1                ; vpextrq       $0x1,%xmm2,%r9
2047  DB  69,137,203                          ; mov           %r9d,%r11d
2048  DB  73,193,233,32                       ; shr           $0x20,%r9
2049  DB  73,193,232,32                       ; shr           $0x20,%r8
2050  DB  196,227,125,25,210,1                ; vextractf128  $0x1,%ymm2,%xmm2
2051  DB  196,225,249,126,213                 ; vmovq         %xmm2,%rbp
2052  DB  65,137,238                          ; mov           %ebp,%r14d
2053  DB  196,227,249,22,211,1                ; vpextrq       $0x1,%xmm2,%rbx
2054  DB  65,137,223                          ; mov           %ebx,%r15d
2055  DB  72,193,235,32                       ; shr           $0x20,%rbx
2056  DB  72,193,237,32                       ; shr           $0x20,%rbp
2057  DB  196,161,122,16,20,176               ; vmovss        (%rax,%r14,4),%xmm2
2058  DB  196,227,105,33,20,168,16            ; vinsertps     $0x10,(%rax,%rbp,4),%xmm2,%xmm2
2059  DB  196,161,122,16,28,184               ; vmovss        (%rax,%r15,4),%xmm3
2060  DB  196,227,105,33,211,32               ; vinsertps     $0x20,%xmm3,%xmm2,%xmm2
2061  DB  197,250,16,28,152                   ; vmovss        (%rax,%rbx,4),%xmm3
2062  DB  196,99,105,33,203,48                ; vinsertps     $0x30,%xmm3,%xmm2,%xmm9
2063  DB  196,161,122,16,28,144               ; vmovss        (%rax,%r10,4),%xmm3
2064  DB  196,163,97,33,28,128,16             ; vinsertps     $0x10,(%rax,%r8,4),%xmm3,%xmm3
2065  DB  196,161,122,16,20,152               ; vmovss        (%rax,%r11,4),%xmm2
2066  DB  196,227,97,33,210,32                ; vinsertps     $0x20,%xmm2,%xmm3,%xmm2
2067  DB  196,161,122,16,28,136               ; vmovss        (%rax,%r9,4),%xmm3
2068  DB  196,227,105,33,211,48               ; vinsertps     $0x30,%xmm3,%xmm2,%xmm2
2069  DB  196,195,109,24,209,1                ; vinsertf128   $0x1,%xmm9,%ymm2,%ymm2
2070  DB  196,193,57,114,208,24               ; vpsrld        $0x18,%xmm8,%xmm8
2071  DB  196,193,97,114,210,24               ; vpsrld        $0x18,%xmm10,%xmm3
2072  DB  196,227,61,24,219,1                 ; vinsertf128   $0x1,%xmm3,%ymm8,%ymm3
2073  DB  197,124,91,195                      ; vcvtdq2ps     %ymm3,%ymm8
2074  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
2075  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
2076  DB  196,227,121,4,219,0                 ; vpermilps     $0x0,%xmm3,%xmm3
2077  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
2078  DB  197,188,89,219                      ; vmulps        %ymm3,%ymm8,%ymm3
2079  DB  72,173                              ; lods          %ds:(%rsi),%rax
2080  DB  91                                  ; pop           %rbx
2081  DB  65,92                               ; pop           %r12
2082  DB  65,93                               ; pop           %r13
2083  DB  65,94                               ; pop           %r14
2084  DB  65,95                               ; pop           %r15
2085  DB  93                                  ; pop           %rbp
2086  DB  255,224                             ; jmpq          *%rax
2087  DB  137,203                             ; mov           %ecx,%ebx
2088  DB  128,227,7                           ; and           $0x7,%bl
2089  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
2090  DB  254,203                             ; dec           %bl
2091  DB  128,251,6                           ; cmp           $0x6,%bl
2092  DB  15,135,185,253,255,255              ; ja            8fa <_sk_load_tables_avx+0x1e>
2093  DB  15,182,219                          ; movzbl        %bl,%ebx
2094  DB  76,141,13,137,0,0,0                 ; lea           0x89(%rip),%r9        # bd4 <_sk_load_tables_avx+0x2f8>
2095  DB  73,99,28,153                        ; movslq        (%r9,%rbx,4),%rbx
2096  DB  76,1,203                            ; add           %r9,%rbx
2097  DB  255,227                             ; jmpq          *%rbx
2098  DB  196,193,121,110,68,184,24           ; vmovd         0x18(%r8,%rdi,4),%xmm0
2099  DB  197,249,112,192,68                  ; vpshufd       $0x44,%xmm0,%xmm0
2100  DB  196,227,125,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
2101  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
2102  DB  196,99,117,12,192,64                ; vblendps      $0x40,%ymm0,%ymm1,%ymm8
2103  DB  196,99,125,25,192,1                 ; vextractf128  $0x1,%ymm8,%xmm0
2104  DB  196,195,121,34,68,184,20,1          ; vpinsrd       $0x1,0x14(%r8,%rdi,4),%xmm0,%xmm0
2105  DB  196,99,61,24,192,1                  ; vinsertf128   $0x1,%xmm0,%ymm8,%ymm8
2106  DB  196,99,125,25,192,1                 ; vextractf128  $0x1,%ymm8,%xmm0
2107  DB  196,195,121,34,68,184,16,0          ; vpinsrd       $0x0,0x10(%r8,%rdi,4),%xmm0,%xmm0
2108  DB  196,99,61,24,192,1                  ; vinsertf128   $0x1,%xmm0,%ymm8,%ymm8
2109  DB  196,195,57,34,68,184,12,3           ; vpinsrd       $0x3,0xc(%r8,%rdi,4),%xmm8,%xmm0
2110  DB  196,99,61,12,192,15                 ; vblendps      $0xf,%ymm0,%ymm8,%ymm8
2111  DB  196,195,57,34,68,184,8,2            ; vpinsrd       $0x2,0x8(%r8,%rdi,4),%xmm8,%xmm0
2112  DB  196,99,61,12,192,15                 ; vblendps      $0xf,%ymm0,%ymm8,%ymm8
2113  DB  196,195,57,34,68,184,4,1            ; vpinsrd       $0x1,0x4(%r8,%rdi,4),%xmm8,%xmm0
2114  DB  196,99,61,12,192,15                 ; vblendps      $0xf,%ymm0,%ymm8,%ymm8
2115  DB  196,195,57,34,4,184,0               ; vpinsrd       $0x0,(%r8,%rdi,4),%xmm8,%xmm0
2116  DB  196,99,61,12,192,15                 ; vblendps      $0xf,%ymm0,%ymm8,%ymm8
2117  DB  233,38,253,255,255                  ; jmpq          8fa <_sk_load_tables_avx+0x1e>
2118  DB  238                                 ; out           %al,(%dx)
2119  DB  255                                 ; (bad)
2120  DB  255                                 ; (bad)
2121  DB  255,224                             ; jmpq          *%rax
2122  DB  255                                 ; (bad)
2123  DB  255                                 ; (bad)
2124  DB  255,210                             ; callq         *%rdx
2125  DB  255                                 ; (bad)
2126  DB  255                                 ; (bad)
2127  DB  255,196                             ; inc           %esp
2128  DB  255                                 ; (bad)
2129  DB  255                                 ; (bad)
2130  DB  255,176,255,255,255,156             ; pushq         -0x63000001(%rax)
2131  DB  255                                 ; (bad)
2132  DB  255                                 ; (bad)
2133  DB  255                                 ; .byte         0xff
2134  DB  128,255,255                         ; cmp           $0xff,%bh
2135  DB  255                                 ; .byte         0xff
2136
2137PUBLIC _sk_load_a8_avx
2138_sk_load_a8_avx LABEL PROC
2139  DB  73,137,200                          ; mov           %rcx,%r8
2140  DB  72,173                              ; lods          %ds:(%rsi),%rax
2141  DB  72,139,0                            ; mov           (%rax),%rax
2142  DB  72,1,248                            ; add           %rdi,%rax
2143  DB  77,133,192                          ; test          %r8,%r8
2144  DB  117,74                              ; jne           c4a <_sk_load_a8_avx+0x5a>
2145  DB  197,250,126,0                       ; vmovq         (%rax),%xmm0
2146  DB  196,226,121,49,200                  ; vpmovzxbd     %xmm0,%xmm1
2147  DB  196,227,121,4,192,229               ; vpermilps     $0xe5,%xmm0,%xmm0
2148  DB  196,226,121,49,192                  ; vpmovzxbd     %xmm0,%xmm0
2149  DB  196,227,117,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
2150  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
2151  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
2152  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
2153  DB  196,227,121,4,201,0                 ; vpermilps     $0x0,%xmm1,%xmm1
2154  DB  196,227,117,24,201,1                ; vinsertf128   $0x1,%xmm1,%ymm1,%ymm1
2155  DB  197,252,89,217                      ; vmulps        %ymm1,%ymm0,%ymm3
2156  DB  72,173                              ; lods          %ds:(%rsi),%rax
2157  DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
2158  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
2159  DB  197,236,87,210                      ; vxorps        %ymm2,%ymm2,%ymm2
2160  DB  76,137,193                          ; mov           %r8,%rcx
2161  DB  255,224                             ; jmpq          *%rax
2162  DB  49,201                              ; xor           %ecx,%ecx
2163  DB  77,137,194                          ; mov           %r8,%r10
2164  DB  69,49,201                           ; xor           %r9d,%r9d
2165  DB  68,15,182,24                        ; movzbl        (%rax),%r11d
2166  DB  72,255,192                          ; inc           %rax
2167  DB  73,211,227                          ; shl           %cl,%r11
2168  DB  77,9,217                            ; or            %r11,%r9
2169  DB  72,131,193,8                        ; add           $0x8,%rcx
2170  DB  73,255,202                          ; dec           %r10
2171  DB  117,234                             ; jne           c52 <_sk_load_a8_avx+0x62>
2172  DB  196,193,249,110,193                 ; vmovq         %r9,%xmm0
2173  DB  235,149                             ; jmp           c04 <_sk_load_a8_avx+0x14>
2174
2175PUBLIC _sk_store_a8_avx
2176_sk_store_a8_avx LABEL PROC
2177  DB  72,173                              ; lods          %ds:(%rsi),%rax
2178  DB  76,139,8                            ; mov           (%rax),%r9
2179  DB  184,0,0,127,67                      ; mov           $0x437f0000,%eax
2180  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
2181  DB  196,67,121,4,192,0                  ; vpermilps     $0x0,%xmm8,%xmm8
2182  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
2183  DB  197,60,89,195                       ; vmulps        %ymm3,%ymm8,%ymm8
2184  DB  196,65,125,91,192                   ; vcvtps2dq     %ymm8,%ymm8
2185  DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
2186  DB  196,66,57,43,193                    ; vpackusdw     %xmm9,%xmm8,%xmm8
2187  DB  196,65,57,103,192                   ; vpackuswb     %xmm8,%xmm8,%xmm8
2188  DB  72,133,201                          ; test          %rcx,%rcx
2189  DB  117,10                              ; jne           cb1 <_sk_store_a8_avx+0x42>
2190  DB  196,65,123,17,4,57                  ; vmovsd        %xmm8,(%r9,%rdi,1)
2191  DB  72,173                              ; lods          %ds:(%rsi),%rax
2192  DB  255,224                             ; jmpq          *%rax
2193  DB  65,137,200                          ; mov           %ecx,%r8d
2194  DB  65,128,224,7                        ; and           $0x7,%r8b
2195  DB  65,254,200                          ; dec           %r8b
2196  DB  65,128,248,6                        ; cmp           $0x6,%r8b
2197  DB  119,236                             ; ja            cad <_sk_store_a8_avx+0x3e>
2198  DB  196,66,121,48,192                   ; vpmovzxbw     %xmm8,%xmm8
2199  DB  65,15,182,192                       ; movzbl        %r8b,%eax
2200  DB  76,141,5,67,0,0,0                   ; lea           0x43(%rip),%r8        # d14 <_sk_store_a8_avx+0xa5>
2201  DB  73,99,4,128                         ; movslq        (%r8,%rax,4),%rax
2202  DB  76,1,192                            ; add           %r8,%rax
2203  DB  255,224                             ; jmpq          *%rax
2204  DB  196,67,121,20,68,57,6,12            ; vpextrb       $0xc,%xmm8,0x6(%r9,%rdi,1)
2205  DB  196,67,121,20,68,57,5,10            ; vpextrb       $0xa,%xmm8,0x5(%r9,%rdi,1)
2206  DB  196,67,121,20,68,57,4,8             ; vpextrb       $0x8,%xmm8,0x4(%r9,%rdi,1)
2207  DB  196,67,121,20,68,57,3,6             ; vpextrb       $0x6,%xmm8,0x3(%r9,%rdi,1)
2208  DB  196,67,121,20,68,57,2,4             ; vpextrb       $0x4,%xmm8,0x2(%r9,%rdi,1)
2209  DB  196,67,121,20,68,57,1,2             ; vpextrb       $0x2,%xmm8,0x1(%r9,%rdi,1)
2210  DB  196,67,121,20,4,57,0                ; vpextrb       $0x0,%xmm8,(%r9,%rdi,1)
2211  DB  235,154                             ; jmp           cad <_sk_store_a8_avx+0x3e>
2212  DB  144                                 ; nop
2213  DB  246,255                             ; idiv          %bh
2214  DB  255                                 ; (bad)
2215  DB  255                                 ; (bad)
2216  DB  238                                 ; out           %al,(%dx)
2217  DB  255                                 ; (bad)
2218  DB  255                                 ; (bad)
2219  DB  255,230                             ; jmpq          *%rsi
2220  DB  255                                 ; (bad)
2221  DB  255                                 ; (bad)
2222  DB  255                                 ; (bad)
2223  DB  222,255                             ; fdivrp        %st,%st(7)
2224  DB  255                                 ; (bad)
2225  DB  255,214                             ; callq         *%rsi
2226  DB  255                                 ; (bad)
2227  DB  255                                 ; (bad)
2228  DB  255,206                             ; dec           %esi
2229  DB  255                                 ; (bad)
2230  DB  255                                 ; (bad)
2231  DB  255,198                             ; inc           %esi
2232  DB  255                                 ; (bad)
2233  DB  255                                 ; (bad)
2234  DB  255                                 ; .byte         0xff
2235
2236PUBLIC _sk_load_565_avx
2237_sk_load_565_avx LABEL PROC
2238  DB  72,173                              ; lods          %ds:(%rsi),%rax
2239  DB  76,139,16                           ; mov           (%rax),%r10
2240  DB  72,133,201                          ; test          %rcx,%rcx
2241  DB  15,133,209,0,0,0                    ; jne           e0f <_sk_load_565_avx+0xdf>
2242  DB  196,193,122,111,4,122               ; vmovdqu       (%r10,%rdi,2),%xmm0
2243  DB  197,241,239,201                     ; vpxor         %xmm1,%xmm1,%xmm1
2244  DB  197,249,105,201                     ; vpunpckhwd    %xmm1,%xmm0,%xmm1
2245  DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
2246  DB  196,227,125,24,209,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm2
2247  DB  184,0,248,0,0                       ; mov           $0xf800,%eax
2248  DB  197,249,110,192                     ; vmovd         %eax,%xmm0
2249  DB  197,249,112,192,0                   ; vpshufd       $0x0,%xmm0,%xmm0
2250  DB  196,227,125,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
2251  DB  197,252,84,194                      ; vandps        %ymm2,%ymm0,%ymm0
2252  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
2253  DB  184,8,33,132,55                     ; mov           $0x37842108,%eax
2254  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
2255  DB  196,227,121,4,201,0                 ; vpermilps     $0x0,%xmm1,%xmm1
2256  DB  196,227,117,24,201,1                ; vinsertf128   $0x1,%xmm1,%ymm1,%ymm1
2257  DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
2258  DB  184,224,7,0,0                       ; mov           $0x7e0,%eax
2259  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
2260  DB  197,249,112,201,0                   ; vpshufd       $0x0,%xmm1,%xmm1
2261  DB  196,227,117,24,201,1                ; vinsertf128   $0x1,%xmm1,%ymm1,%ymm1
2262  DB  197,244,84,202                      ; vandps        %ymm2,%ymm1,%ymm1
2263  DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
2264  DB  184,33,8,2,58                       ; mov           $0x3a020821,%eax
2265  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
2266  DB  196,227,121,4,219,0                 ; vpermilps     $0x0,%xmm3,%xmm3
2267  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
2268  DB  197,244,89,203                      ; vmulps        %ymm3,%ymm1,%ymm1
2269  DB  184,31,0,0,0                        ; mov           $0x1f,%eax
2270  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
2271  DB  197,249,112,219,0                   ; vpshufd       $0x0,%xmm3,%xmm3
2272  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
2273  DB  197,228,84,210                      ; vandps        %ymm2,%ymm3,%ymm2
2274  DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
2275  DB  184,8,33,4,61                       ; mov           $0x3d042108,%eax
2276  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
2277  DB  196,227,121,4,219,0                 ; vpermilps     $0x0,%xmm3,%xmm3
2278  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
2279  DB  197,236,89,211                      ; vmulps        %ymm3,%ymm2,%ymm2
2280  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
2281  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
2282  DB  196,227,121,4,219,0                 ; vpermilps     $0x0,%xmm3,%xmm3
2283  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
2284  DB  72,173                              ; lods          %ds:(%rsi),%rax
2285  DB  255,224                             ; jmpq          *%rax
2286  DB  65,137,200                          ; mov           %ecx,%r8d
2287  DB  65,128,224,7                        ; and           $0x7,%r8b
2288  DB  197,249,239,192                     ; vpxor         %xmm0,%xmm0,%xmm0
2289  DB  65,254,200                          ; dec           %r8b
2290  DB  65,128,248,6                        ; cmp           $0x6,%r8b
2291  DB  15,135,29,255,255,255               ; ja            d44 <_sk_load_565_avx+0x14>
2292  DB  69,15,182,192                       ; movzbl        %r8b,%r8d
2293  DB  76,141,13,74,0,0,0                  ; lea           0x4a(%rip),%r9        # e7c <_sk_load_565_avx+0x14c>
2294  DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
2295  DB  76,1,200                            ; add           %r9,%rax
2296  DB  255,224                             ; jmpq          *%rax
2297  DB  197,249,239,192                     ; vpxor         %xmm0,%xmm0,%xmm0
2298  DB  196,193,121,196,68,122,12,6         ; vpinsrw       $0x6,0xc(%r10,%rdi,2),%xmm0,%xmm0
2299  DB  196,193,121,196,68,122,10,5         ; vpinsrw       $0x5,0xa(%r10,%rdi,2),%xmm0,%xmm0
2300  DB  196,193,121,196,68,122,8,4          ; vpinsrw       $0x4,0x8(%r10,%rdi,2),%xmm0,%xmm0
2301  DB  196,193,121,196,68,122,6,3          ; vpinsrw       $0x3,0x6(%r10,%rdi,2),%xmm0,%xmm0
2302  DB  196,193,121,196,68,122,4,2          ; vpinsrw       $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
2303  DB  196,193,121,196,68,122,2,1          ; vpinsrw       $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
2304  DB  196,193,121,196,4,122,0             ; vpinsrw       $0x0,(%r10,%rdi,2),%xmm0,%xmm0
2305  DB  233,201,254,255,255                 ; jmpq          d44 <_sk_load_565_avx+0x14>
2306  DB  144                                 ; nop
2307  DB  243,255                             ; repz          (bad)
2308  DB  255                                 ; (bad)
2309  DB  255                                 ; (bad)
2310  DB  235,255                             ; jmp           e81 <_sk_load_565_avx+0x151>
2311  DB  255                                 ; (bad)
2312  DB  255,227                             ; jmpq          *%rbx
2313  DB  255                                 ; (bad)
2314  DB  255                                 ; (bad)
2315  DB  255                                 ; (bad)
2316  DB  219,255                             ; (bad)
2317  DB  255                                 ; (bad)
2318  DB  255,211                             ; callq         *%rbx
2319  DB  255                                 ; (bad)
2320  DB  255                                 ; (bad)
2321  DB  255,203                             ; dec           %ebx
2322  DB  255                                 ; (bad)
2323  DB  255                                 ; (bad)
2324  DB  255                                 ; (bad)
2325  DB  191                                 ; .byte         0xbf
2326  DB  255                                 ; (bad)
2327  DB  255                                 ; (bad)
2328  DB  255                                 ; .byte         0xff
2329
2330PUBLIC _sk_store_565_avx
2331_sk_store_565_avx LABEL PROC
2332  DB  72,173                              ; lods          %ds:(%rsi),%rax
2333  DB  76,139,8                            ; mov           (%rax),%r9
2334  DB  184,0,0,248,65                      ; mov           $0x41f80000,%eax
2335  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
2336  DB  196,67,121,4,192,0                  ; vpermilps     $0x0,%xmm8,%xmm8
2337  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
2338  DB  197,60,89,200                       ; vmulps        %ymm0,%ymm8,%ymm9
2339  DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
2340  DB  196,193,41,114,241,11               ; vpslld        $0xb,%xmm9,%xmm10
2341  DB  196,67,125,25,201,1                 ; vextractf128  $0x1,%ymm9,%xmm9
2342  DB  196,193,49,114,241,11               ; vpslld        $0xb,%xmm9,%xmm9
2343  DB  196,67,45,24,201,1                  ; vinsertf128   $0x1,%xmm9,%ymm10,%ymm9
2344  DB  184,0,0,124,66                      ; mov           $0x427c0000,%eax
2345  DB  197,121,110,208                     ; vmovd         %eax,%xmm10
2346  DB  196,67,121,4,210,0                  ; vpermilps     $0x0,%xmm10,%xmm10
2347  DB  196,67,45,24,210,1                  ; vinsertf128   $0x1,%xmm10,%ymm10,%ymm10
2348  DB  197,44,89,209                       ; vmulps        %ymm1,%ymm10,%ymm10
2349  DB  196,65,125,91,210                   ; vcvtps2dq     %ymm10,%ymm10
2350  DB  196,193,33,114,242,5                ; vpslld        $0x5,%xmm10,%xmm11
2351  DB  196,67,125,25,210,1                 ; vextractf128  $0x1,%ymm10,%xmm10
2352  DB  196,193,41,114,242,5                ; vpslld        $0x5,%xmm10,%xmm10
2353  DB  196,67,37,24,210,1                  ; vinsertf128   $0x1,%xmm10,%ymm11,%ymm10
2354  DB  196,65,45,86,201                    ; vorpd         %ymm9,%ymm10,%ymm9
2355  DB  197,60,89,194                       ; vmulps        %ymm2,%ymm8,%ymm8
2356  DB  196,65,125,91,192                   ; vcvtps2dq     %ymm8,%ymm8
2357  DB  196,65,53,86,192                    ; vorpd         %ymm8,%ymm9,%ymm8
2358  DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
2359  DB  196,66,57,43,193                    ; vpackusdw     %xmm9,%xmm8,%xmm8
2360  DB  72,133,201                          ; test          %rcx,%rcx
2361  DB  117,10                              ; jne           f36 <_sk_store_565_avx+0x9e>
2362  DB  196,65,122,127,4,121                ; vmovdqu       %xmm8,(%r9,%rdi,2)
2363  DB  72,173                              ; lods          %ds:(%rsi),%rax
2364  DB  255,224                             ; jmpq          *%rax
2365  DB  65,137,200                          ; mov           %ecx,%r8d
2366  DB  65,128,224,7                        ; and           $0x7,%r8b
2367  DB  65,254,200                          ; dec           %r8b
2368  DB  65,128,248,6                        ; cmp           $0x6,%r8b
2369  DB  119,236                             ; ja            f32 <_sk_store_565_avx+0x9a>
2370  DB  65,15,182,192                       ; movzbl        %r8b,%eax
2371  DB  76,141,5,67,0,0,0                   ; lea           0x43(%rip),%r8        # f94 <_sk_store_565_avx+0xfc>
2372  DB  73,99,4,128                         ; movslq        (%r8,%rax,4),%rax
2373  DB  76,1,192                            ; add           %r8,%rax
2374  DB  255,224                             ; jmpq          *%rax
2375  DB  196,67,121,21,68,121,12,6           ; vpextrw       $0x6,%xmm8,0xc(%r9,%rdi,2)
2376  DB  196,67,121,21,68,121,10,5           ; vpextrw       $0x5,%xmm8,0xa(%r9,%rdi,2)
2377  DB  196,67,121,21,68,121,8,4            ; vpextrw       $0x4,%xmm8,0x8(%r9,%rdi,2)
2378  DB  196,67,121,21,68,121,6,3            ; vpextrw       $0x3,%xmm8,0x6(%r9,%rdi,2)
2379  DB  196,67,121,21,68,121,4,2            ; vpextrw       $0x2,%xmm8,0x4(%r9,%rdi,2)
2380  DB  196,67,121,21,68,121,2,1            ; vpextrw       $0x1,%xmm8,0x2(%r9,%rdi,2)
2381  DB  196,67,121,21,4,121,0               ; vpextrw       $0x0,%xmm8,(%r9,%rdi,2)
2382  DB  235,159                             ; jmp           f32 <_sk_store_565_avx+0x9a>
2383  DB  144                                 ; nop
2384  DB  246,255                             ; idiv          %bh
2385  DB  255                                 ; (bad)
2386  DB  255                                 ; (bad)
2387  DB  238                                 ; out           %al,(%dx)
2388  DB  255                                 ; (bad)
2389  DB  255                                 ; (bad)
2390  DB  255,230                             ; jmpq          *%rsi
2391  DB  255                                 ; (bad)
2392  DB  255                                 ; (bad)
2393  DB  255                                 ; (bad)
2394  DB  222,255                             ; fdivrp        %st,%st(7)
2395  DB  255                                 ; (bad)
2396  DB  255,214                             ; callq         *%rsi
2397  DB  255                                 ; (bad)
2398  DB  255                                 ; (bad)
2399  DB  255,206                             ; dec           %esi
2400  DB  255                                 ; (bad)
2401  DB  255                                 ; (bad)
2402  DB  255,198                             ; inc           %esi
2403  DB  255                                 ; (bad)
2404  DB  255                                 ; (bad)
2405  DB  255                                 ; .byte         0xff
2406
2407PUBLIC _sk_load_8888_avx
2408_sk_load_8888_avx LABEL PROC
2409  DB  72,173                              ; lods          %ds:(%rsi),%rax
2410  DB  76,139,16                           ; mov           (%rax),%r10
2411  DB  72,133,201                          ; test          %rcx,%rcx
2412  DB  15,133,157,0,0,0                    ; jne           105b <_sk_load_8888_avx+0xab>
2413  DB  196,65,124,16,12,186                ; vmovups       (%r10,%rdi,4),%ymm9
2414  DB  184,255,0,0,0                       ; mov           $0xff,%eax
2415  DB  197,249,110,192                     ; vmovd         %eax,%xmm0
2416  DB  197,249,112,192,0                   ; vpshufd       $0x0,%xmm0,%xmm0
2417  DB  196,99,125,24,216,1                 ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm11
2418  DB  196,193,36,84,193                   ; vandps        %ymm9,%ymm11,%ymm0
2419  DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
2420  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
2421  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
2422  DB  196,227,121,4,201,0                 ; vpermilps     $0x0,%xmm1,%xmm1
2423  DB  196,99,117,24,193,1                 ; vinsertf128   $0x1,%xmm1,%ymm1,%ymm8
2424  DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
2425  DB  196,193,41,114,209,8                ; vpsrld        $0x8,%xmm9,%xmm10
2426  DB  196,99,125,25,203,1                 ; vextractf128  $0x1,%ymm9,%xmm3
2427  DB  197,241,114,211,8                   ; vpsrld        $0x8,%xmm3,%xmm1
2428  DB  196,227,45,24,201,1                 ; vinsertf128   $0x1,%xmm1,%ymm10,%ymm1
2429  DB  197,164,84,201                      ; vandps        %ymm1,%ymm11,%ymm1
2430  DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
2431  DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
2432  DB  196,193,41,114,209,16               ; vpsrld        $0x10,%xmm9,%xmm10
2433  DB  197,233,114,211,16                  ; vpsrld        $0x10,%xmm3,%xmm2
2434  DB  196,227,45,24,210,1                 ; vinsertf128   $0x1,%xmm2,%ymm10,%ymm2
2435  DB  197,164,84,210                      ; vandps        %ymm2,%ymm11,%ymm2
2436  DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
2437  DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
2438  DB  196,193,49,114,209,24               ; vpsrld        $0x18,%xmm9,%xmm9
2439  DB  197,225,114,211,24                  ; vpsrld        $0x18,%xmm3,%xmm3
2440  DB  196,227,53,24,219,1                 ; vinsertf128   $0x1,%xmm3,%ymm9,%ymm3
2441  DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
2442  DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
2443  DB  72,173                              ; lods          %ds:(%rsi),%rax
2444  DB  255,224                             ; jmpq          *%rax
2445  DB  65,137,200                          ; mov           %ecx,%r8d
2446  DB  65,128,224,7                        ; and           $0x7,%r8b
2447  DB  196,65,52,87,201                    ; vxorps        %ymm9,%ymm9,%ymm9
2448  DB  65,254,200                          ; dec           %r8b
2449  DB  65,128,248,6                        ; cmp           $0x6,%r8b
2450  DB  15,135,80,255,255,255               ; ja            fc4 <_sk_load_8888_avx+0x14>
2451  DB  69,15,182,192                       ; movzbl        %r8b,%r8d
2452  DB  76,141,13,137,0,0,0                 ; lea           0x89(%rip),%r9        # 1108 <_sk_load_8888_avx+0x158>
2453  DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
2454  DB  76,1,200                            ; add           %r9,%rax
2455  DB  255,224                             ; jmpq          *%rax
2456  DB  196,193,121,110,68,186,24           ; vmovd         0x18(%r10,%rdi,4),%xmm0
2457  DB  197,249,112,192,68                  ; vpshufd       $0x44,%xmm0,%xmm0
2458  DB  196,227,125,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
2459  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
2460  DB  196,99,117,12,200,64                ; vblendps      $0x40,%ymm0,%ymm1,%ymm9
2461  DB  196,99,125,25,200,1                 ; vextractf128  $0x1,%ymm9,%xmm0
2462  DB  196,195,121,34,68,186,20,1          ; vpinsrd       $0x1,0x14(%r10,%rdi,4),%xmm0,%xmm0
2463  DB  196,99,53,24,200,1                  ; vinsertf128   $0x1,%xmm0,%ymm9,%ymm9
2464  DB  196,99,125,25,200,1                 ; vextractf128  $0x1,%ymm9,%xmm0
2465  DB  196,195,121,34,68,186,16,0          ; vpinsrd       $0x0,0x10(%r10,%rdi,4),%xmm0,%xmm0
2466  DB  196,99,53,24,200,1                  ; vinsertf128   $0x1,%xmm0,%ymm9,%ymm9
2467  DB  196,195,49,34,68,186,12,3           ; vpinsrd       $0x3,0xc(%r10,%rdi,4),%xmm9,%xmm0
2468  DB  196,99,53,12,200,15                 ; vblendps      $0xf,%ymm0,%ymm9,%ymm9
2469  DB  196,195,49,34,68,186,8,2            ; vpinsrd       $0x2,0x8(%r10,%rdi,4),%xmm9,%xmm0
2470  DB  196,99,53,12,200,15                 ; vblendps      $0xf,%ymm0,%ymm9,%ymm9
2471  DB  196,195,49,34,68,186,4,1            ; vpinsrd       $0x1,0x4(%r10,%rdi,4),%xmm9,%xmm0
2472  DB  196,99,53,12,200,15                 ; vblendps      $0xf,%ymm0,%ymm9,%ymm9
2473  DB  196,195,49,34,4,186,0               ; vpinsrd       $0x0,(%r10,%rdi,4),%xmm9,%xmm0
2474  DB  196,99,53,12,200,15                 ; vblendps      $0xf,%ymm0,%ymm9,%ymm9
2475  DB  233,188,254,255,255                 ; jmpq          fc4 <_sk_load_8888_avx+0x14>
2476  DB  238                                 ; out           %al,(%dx)
2477  DB  255                                 ; (bad)
2478  DB  255                                 ; (bad)
2479  DB  255,224                             ; jmpq          *%rax
2480  DB  255                                 ; (bad)
2481  DB  255                                 ; (bad)
2482  DB  255,210                             ; callq         *%rdx
2483  DB  255                                 ; (bad)
2484  DB  255                                 ; (bad)
2485  DB  255,196                             ; inc           %esp
2486  DB  255                                 ; (bad)
2487  DB  255                                 ; (bad)
2488  DB  255,176,255,255,255,156             ; pushq         -0x63000001(%rax)
2489  DB  255                                 ; (bad)
2490  DB  255                                 ; (bad)
2491  DB  255                                 ; .byte         0xff
2492  DB  128,255,255                         ; cmp           $0xff,%bh
2493  DB  255                                 ; .byte         0xff
2494
2495PUBLIC _sk_store_8888_avx
2496_sk_store_8888_avx LABEL PROC
2497  DB  72,173                              ; lods          %ds:(%rsi),%rax
2498  DB  76,139,8                            ; mov           (%rax),%r9
2499  DB  184,0,0,127,67                      ; mov           $0x437f0000,%eax
2500  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
2501  DB  196,67,121,4,192,0                  ; vpermilps     $0x0,%xmm8,%xmm8
2502  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
2503  DB  197,60,89,200                       ; vmulps        %ymm0,%ymm8,%ymm9
2504  DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
2505  DB  197,60,89,209                       ; vmulps        %ymm1,%ymm8,%ymm10
2506  DB  196,65,125,91,210                   ; vcvtps2dq     %ymm10,%ymm10
2507  DB  196,193,33,114,242,8                ; vpslld        $0x8,%xmm10,%xmm11
2508  DB  196,67,125,25,210,1                 ; vextractf128  $0x1,%ymm10,%xmm10
2509  DB  196,193,41,114,242,8                ; vpslld        $0x8,%xmm10,%xmm10
2510  DB  196,67,37,24,210,1                  ; vinsertf128   $0x1,%xmm10,%ymm11,%ymm10
2511  DB  196,65,45,86,201                    ; vorpd         %ymm9,%ymm10,%ymm9
2512  DB  197,60,89,210                       ; vmulps        %ymm2,%ymm8,%ymm10
2513  DB  196,65,125,91,210                   ; vcvtps2dq     %ymm10,%ymm10
2514  DB  196,193,33,114,242,16               ; vpslld        $0x10,%xmm10,%xmm11
2515  DB  196,67,125,25,210,1                 ; vextractf128  $0x1,%ymm10,%xmm10
2516  DB  196,193,41,114,242,16               ; vpslld        $0x10,%xmm10,%xmm10
2517  DB  196,67,37,24,210,1                  ; vinsertf128   $0x1,%xmm10,%ymm11,%ymm10
2518  DB  197,60,89,195                       ; vmulps        %ymm3,%ymm8,%ymm8
2519  DB  196,65,125,91,192                   ; vcvtps2dq     %ymm8,%ymm8
2520  DB  196,193,33,114,240,24               ; vpslld        $0x18,%xmm8,%xmm11
2521  DB  196,67,125,25,192,1                 ; vextractf128  $0x1,%ymm8,%xmm8
2522  DB  196,193,57,114,240,24               ; vpslld        $0x18,%xmm8,%xmm8
2523  DB  196,67,37,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm11,%ymm8
2524  DB  196,65,45,86,192                    ; vorpd         %ymm8,%ymm10,%ymm8
2525  DB  196,65,53,86,192                    ; vorpd         %ymm8,%ymm9,%ymm8
2526  DB  72,133,201                          ; test          %rcx,%rcx
2527  DB  117,10                              ; jne           11c8 <_sk_store_8888_avx+0xa4>
2528  DB  196,65,124,17,4,185                 ; vmovups       %ymm8,(%r9,%rdi,4)
2529  DB  72,173                              ; lods          %ds:(%rsi),%rax
2530  DB  255,224                             ; jmpq          *%rax
2531  DB  65,137,200                          ; mov           %ecx,%r8d
2532  DB  65,128,224,7                        ; and           $0x7,%r8b
2533  DB  65,254,200                          ; dec           %r8b
2534  DB  65,128,248,6                        ; cmp           $0x6,%r8b
2535  DB  119,236                             ; ja            11c4 <_sk_store_8888_avx+0xa0>
2536  DB  65,15,182,192                       ; movzbl        %r8b,%eax
2537  DB  76,141,5,85,0,0,0                   ; lea           0x55(%rip),%r8        # 1238 <_sk_store_8888_avx+0x114>
2538  DB  73,99,4,128                         ; movslq        (%r8,%rax,4),%rax
2539  DB  76,1,192                            ; add           %r8,%rax
2540  DB  255,224                             ; jmpq          *%rax
2541  DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
2542  DB  196,67,121,22,76,185,24,2           ; vpextrd       $0x2,%xmm9,0x18(%r9,%rdi,4)
2543  DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
2544  DB  196,67,121,22,76,185,20,1           ; vpextrd       $0x1,%xmm9,0x14(%r9,%rdi,4)
2545  DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
2546  DB  196,65,122,17,76,185,16             ; vmovss        %xmm9,0x10(%r9,%rdi,4)
2547  DB  196,67,121,22,68,185,12,3           ; vpextrd       $0x3,%xmm8,0xc(%r9,%rdi,4)
2548  DB  196,67,121,22,68,185,8,2            ; vpextrd       $0x2,%xmm8,0x8(%r9,%rdi,4)
2549  DB  196,67,121,22,68,185,4,1            ; vpextrd       $0x1,%xmm8,0x4(%r9,%rdi,4)
2550  DB  196,65,121,126,4,185                ; vmovd         %xmm8,(%r9,%rdi,4)
2551  DB  235,143                             ; jmp           11c4 <_sk_store_8888_avx+0xa0>
2552  DB  15,31,0                             ; nopl          (%rax)
2553  DB  245                                 ; cmc
2554  DB  255                                 ; (bad)
2555  DB  255                                 ; (bad)
2556  DB  255                                 ; (bad)
2557  DB  237                                 ; in            (%dx),%eax
2558  DB  255                                 ; (bad)
2559  DB  255                                 ; (bad)
2560  DB  255,229                             ; jmpq          *%rbp
2561  DB  255                                 ; (bad)
2562  DB  255                                 ; (bad)
2563  DB  255                                 ; (bad)
2564  DB  221,255                             ; (bad)
2565  DB  255                                 ; (bad)
2566  DB  255,208                             ; callq         *%rax
2567  DB  255                                 ; (bad)
2568  DB  255                                 ; (bad)
2569  DB  255,194                             ; inc           %edx
2570  DB  255                                 ; (bad)
2571  DB  255                                 ; (bad)
2572  DB  255                                 ; .byte         0xff
2573  DB  180,255                             ; mov           $0xff,%ah
2574  DB  255                                 ; (bad)
2575  DB  255                                 ; .byte         0xff
2576
2577PUBLIC _sk_load_f16_avx
2578_sk_load_f16_avx LABEL PROC
2579  DB  72,173                              ; lods          %ds:(%rsi),%rax
2580  DB  72,139,0                            ; mov           (%rax),%rax
2581  DB  72,133,201                          ; test          %rcx,%rcx
2582  DB  15,133,2,1,0,0                      ; jne           1364 <_sk_load_f16_avx+0x110>
2583  DB  197,121,16,4,248                    ; vmovupd       (%rax,%rdi,8),%xmm8
2584  DB  197,249,16,84,248,16                ; vmovupd       0x10(%rax,%rdi,8),%xmm2
2585  DB  197,249,16,92,248,32                ; vmovupd       0x20(%rax,%rdi,8),%xmm3
2586  DB  197,122,111,76,248,48               ; vmovdqu       0x30(%rax,%rdi,8),%xmm9
2587  DB  197,185,97,194                      ; vpunpcklwd    %xmm2,%xmm8,%xmm0
2588  DB  197,185,105,210                     ; vpunpckhwd    %xmm2,%xmm8,%xmm2
2589  DB  196,193,97,97,201                   ; vpunpcklwd    %xmm9,%xmm3,%xmm1
2590  DB  196,193,97,105,217                  ; vpunpckhwd    %xmm9,%xmm3,%xmm3
2591  DB  197,121,97,194                      ; vpunpcklwd    %xmm2,%xmm0,%xmm8
2592  DB  197,249,105,194                     ; vpunpckhwd    %xmm2,%xmm0,%xmm0
2593  DB  197,241,97,211                      ; vpunpcklwd    %xmm3,%xmm1,%xmm2
2594  DB  197,113,105,203                     ; vpunpckhwd    %xmm3,%xmm1,%xmm9
2595  DB  184,0,4,0,4                         ; mov           $0x4000400,%eax
2596  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
2597  DB  197,249,112,219,0                   ; vpshufd       $0x0,%xmm3,%xmm3
2598  DB  196,193,97,101,200                  ; vpcmpgtw      %xmm8,%xmm3,%xmm1
2599  DB  196,65,113,223,192                  ; vpandn        %xmm8,%xmm1,%xmm8
2600  DB  197,225,101,200                     ; vpcmpgtw      %xmm0,%xmm3,%xmm1
2601  DB  197,241,223,192                     ; vpandn        %xmm0,%xmm1,%xmm0
2602  DB  197,225,101,202                     ; vpcmpgtw      %xmm2,%xmm3,%xmm1
2603  DB  197,241,223,202                     ; vpandn        %xmm2,%xmm1,%xmm1
2604  DB  196,193,97,101,209                  ; vpcmpgtw      %xmm9,%xmm3,%xmm2
2605  DB  196,193,105,223,209                 ; vpandn        %xmm9,%xmm2,%xmm2
2606  DB  196,66,121,51,208                   ; vpmovzxwd     %xmm8,%xmm10
2607  DB  196,98,121,51,201                   ; vpmovzxwd     %xmm1,%xmm9
2608  DB  197,225,239,219                     ; vpxor         %xmm3,%xmm3,%xmm3
2609  DB  197,57,105,195                      ; vpunpckhwd    %xmm3,%xmm8,%xmm8
2610  DB  197,241,105,203                     ; vpunpckhwd    %xmm3,%xmm1,%xmm1
2611  DB  196,98,121,51,216                   ; vpmovzxwd     %xmm0,%xmm11
2612  DB  196,98,121,51,226                   ; vpmovzxwd     %xmm2,%xmm12
2613  DB  197,121,105,235                     ; vpunpckhwd    %xmm3,%xmm0,%xmm13
2614  DB  197,105,105,243                     ; vpunpckhwd    %xmm3,%xmm2,%xmm14
2615  DB  196,193,121,114,242,13              ; vpslld        $0xd,%xmm10,%xmm0
2616  DB  196,193,105,114,241,13              ; vpslld        $0xd,%xmm9,%xmm2
2617  DB  196,227,125,24,194,1                ; vinsertf128   $0x1,%xmm2,%ymm0,%ymm0
2618  DB  184,0,0,128,119                     ; mov           $0x77800000,%eax
2619  DB  197,249,110,208                     ; vmovd         %eax,%xmm2
2620  DB  197,249,112,210,0                   ; vpshufd       $0x0,%xmm2,%xmm2
2621  DB  196,99,109,24,202,1                 ; vinsertf128   $0x1,%xmm2,%ymm2,%ymm9
2622  DB  197,180,89,192                      ; vmulps        %ymm0,%ymm9,%ymm0
2623  DB  196,193,105,114,240,13              ; vpslld        $0xd,%xmm8,%xmm2
2624  DB  197,241,114,241,13                  ; vpslld        $0xd,%xmm1,%xmm1
2625  DB  196,227,109,24,201,1                ; vinsertf128   $0x1,%xmm1,%ymm2,%ymm1
2626  DB  197,180,89,201                      ; vmulps        %ymm1,%ymm9,%ymm1
2627  DB  196,193,105,114,243,13              ; vpslld        $0xd,%xmm11,%xmm2
2628  DB  196,193,97,114,244,13               ; vpslld        $0xd,%xmm12,%xmm3
2629  DB  196,227,109,24,211,1                ; vinsertf128   $0x1,%xmm3,%ymm2,%ymm2
2630  DB  197,180,89,210                      ; vmulps        %ymm2,%ymm9,%ymm2
2631  DB  196,193,57,114,245,13               ; vpslld        $0xd,%xmm13,%xmm8
2632  DB  196,193,97,114,246,13               ; vpslld        $0xd,%xmm14,%xmm3
2633  DB  196,227,61,24,219,1                 ; vinsertf128   $0x1,%xmm3,%ymm8,%ymm3
2634  DB  197,180,89,219                      ; vmulps        %ymm3,%ymm9,%ymm3
2635  DB  72,173                              ; lods          %ds:(%rsi),%rax
2636  DB  255,224                             ; jmpq          *%rax
2637  DB  197,123,16,4,248                    ; vmovsd        (%rax,%rdi,8),%xmm8
2638  DB  196,65,49,239,201                   ; vpxor         %xmm9,%xmm9,%xmm9
2639  DB  72,131,249,1                        ; cmp           $0x1,%rcx
2640  DB  116,79                              ; je            13c3 <_sk_load_f16_avx+0x16f>
2641  DB  197,57,22,68,248,8                  ; vmovhpd       0x8(%rax,%rdi,8),%xmm8,%xmm8
2642  DB  72,131,249,3                        ; cmp           $0x3,%rcx
2643  DB  114,67                              ; jb            13c3 <_sk_load_f16_avx+0x16f>
2644  DB  197,251,16,84,248,16                ; vmovsd        0x10(%rax,%rdi,8),%xmm2
2645  DB  72,131,249,3                        ; cmp           $0x3,%rcx
2646  DB  116,68                              ; je            13d0 <_sk_load_f16_avx+0x17c>
2647  DB  197,233,22,84,248,24                ; vmovhpd       0x18(%rax,%rdi,8),%xmm2,%xmm2
2648  DB  72,131,249,5                        ; cmp           $0x5,%rcx
2649  DB  114,56                              ; jb            13d0 <_sk_load_f16_avx+0x17c>
2650  DB  197,251,16,92,248,32                ; vmovsd        0x20(%rax,%rdi,8),%xmm3
2651  DB  72,131,249,5                        ; cmp           $0x5,%rcx
2652  DB  15,132,209,254,255,255              ; je            1279 <_sk_load_f16_avx+0x25>
2653  DB  197,225,22,92,248,40                ; vmovhpd       0x28(%rax,%rdi,8),%xmm3,%xmm3
2654  DB  72,131,249,7                        ; cmp           $0x7,%rcx
2655  DB  15,130,193,254,255,255              ; jb            1279 <_sk_load_f16_avx+0x25>
2656  DB  197,122,126,76,248,48               ; vmovq         0x30(%rax,%rdi,8),%xmm9
2657  DB  233,182,254,255,255                 ; jmpq          1279 <_sk_load_f16_avx+0x25>
2658  DB  197,225,87,219                      ; vxorpd        %xmm3,%xmm3,%xmm3
2659  DB  197,233,87,210                      ; vxorpd        %xmm2,%xmm2,%xmm2
2660  DB  233,169,254,255,255                 ; jmpq          1279 <_sk_load_f16_avx+0x25>
2661  DB  197,225,87,219                      ; vxorpd        %xmm3,%xmm3,%xmm3
2662  DB  233,160,254,255,255                 ; jmpq          1279 <_sk_load_f16_avx+0x25>
2663
2664PUBLIC _sk_store_f16_avx
2665_sk_store_f16_avx LABEL PROC
2666  DB  72,173                              ; lods          %ds:(%rsi),%rax
2667  DB  76,139,0                            ; mov           (%rax),%r8
2668  DB  184,0,0,128,7                       ; mov           $0x7800000,%eax
2669  DB  197,121,110,192                     ; vmovd         %eax,%xmm8
2670  DB  196,65,121,112,192,0                ; vpshufd       $0x0,%xmm8,%xmm8
2671  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
2672  DB  197,60,89,200                       ; vmulps        %ymm0,%ymm8,%ymm9
2673  DB  196,67,125,25,202,1                 ; vextractf128  $0x1,%ymm9,%xmm10
2674  DB  196,193,41,114,210,13               ; vpsrld        $0xd,%xmm10,%xmm10
2675  DB  196,193,49,114,209,13               ; vpsrld        $0xd,%xmm9,%xmm9
2676  DB  197,60,89,217                       ; vmulps        %ymm1,%ymm8,%ymm11
2677  DB  196,67,125,25,220,1                 ; vextractf128  $0x1,%ymm11,%xmm12
2678  DB  196,193,25,114,212,13               ; vpsrld        $0xd,%xmm12,%xmm12
2679  DB  196,193,33,114,211,13               ; vpsrld        $0xd,%xmm11,%xmm11
2680  DB  197,60,89,234                       ; vmulps        %ymm2,%ymm8,%ymm13
2681  DB  196,67,125,25,238,1                 ; vextractf128  $0x1,%ymm13,%xmm14
2682  DB  196,193,9,114,214,13                ; vpsrld        $0xd,%xmm14,%xmm14
2683  DB  196,193,17,114,213,13               ; vpsrld        $0xd,%xmm13,%xmm13
2684  DB  197,60,89,195                       ; vmulps        %ymm3,%ymm8,%ymm8
2685  DB  196,67,125,25,199,1                 ; vextractf128  $0x1,%ymm8,%xmm15
2686  DB  196,193,1,114,215,13                ; vpsrld        $0xd,%xmm15,%xmm15
2687  DB  196,193,57,114,208,13               ; vpsrld        $0xd,%xmm8,%xmm8
2688  DB  196,193,33,115,251,2                ; vpslldq       $0x2,%xmm11,%xmm11
2689  DB  196,65,33,235,201                   ; vpor          %xmm9,%xmm11,%xmm9
2690  DB  196,193,33,115,252,2                ; vpslldq       $0x2,%xmm12,%xmm11
2691  DB  196,65,33,235,226                   ; vpor          %xmm10,%xmm11,%xmm12
2692  DB  196,193,57,115,248,2                ; vpslldq       $0x2,%xmm8,%xmm8
2693  DB  196,65,57,235,197                   ; vpor          %xmm13,%xmm8,%xmm8
2694  DB  196,193,41,115,255,2                ; vpslldq       $0x2,%xmm15,%xmm10
2695  DB  196,65,41,235,238                   ; vpor          %xmm14,%xmm10,%xmm13
2696  DB  196,65,49,98,216                    ; vpunpckldq    %xmm8,%xmm9,%xmm11
2697  DB  196,65,49,106,208                   ; vpunpckhdq    %xmm8,%xmm9,%xmm10
2698  DB  196,65,25,98,205                    ; vpunpckldq    %xmm13,%xmm12,%xmm9
2699  DB  196,65,25,106,197                   ; vpunpckhdq    %xmm13,%xmm12,%xmm8
2700  DB  72,133,201                          ; test          %rcx,%rcx
2701  DB  117,31                              ; jne           14af <_sk_store_f16_avx+0xd6>
2702  DB  196,65,120,17,28,248                ; vmovups       %xmm11,(%r8,%rdi,8)
2703  DB  196,65,120,17,84,248,16             ; vmovups       %xmm10,0x10(%r8,%rdi,8)
2704  DB  196,65,120,17,76,248,32             ; vmovups       %xmm9,0x20(%r8,%rdi,8)
2705  DB  196,65,122,127,68,248,48            ; vmovdqu       %xmm8,0x30(%r8,%rdi,8)
2706  DB  72,173                              ; lods          %ds:(%rsi),%rax
2707  DB  255,224                             ; jmpq          *%rax
2708  DB  196,65,121,214,28,248               ; vmovq         %xmm11,(%r8,%rdi,8)
2709  DB  72,131,249,1                        ; cmp           $0x1,%rcx
2710  DB  116,240                             ; je            14ab <_sk_store_f16_avx+0xd2>
2711  DB  196,65,121,23,92,248,8              ; vmovhpd       %xmm11,0x8(%r8,%rdi,8)
2712  DB  72,131,249,3                        ; cmp           $0x3,%rcx
2713  DB  114,227                             ; jb            14ab <_sk_store_f16_avx+0xd2>
2714  DB  196,65,121,214,84,248,16            ; vmovq         %xmm10,0x10(%r8,%rdi,8)
2715  DB  116,218                             ; je            14ab <_sk_store_f16_avx+0xd2>
2716  DB  196,65,121,23,84,248,24             ; vmovhpd       %xmm10,0x18(%r8,%rdi,8)
2717  DB  72,131,249,5                        ; cmp           $0x5,%rcx
2718  DB  114,205                             ; jb            14ab <_sk_store_f16_avx+0xd2>
2719  DB  196,65,121,214,76,248,32            ; vmovq         %xmm9,0x20(%r8,%rdi,8)
2720  DB  116,196                             ; je            14ab <_sk_store_f16_avx+0xd2>
2721  DB  196,65,121,23,76,248,40             ; vmovhpd       %xmm9,0x28(%r8,%rdi,8)
2722  DB  72,131,249,7                        ; cmp           $0x7,%rcx
2723  DB  114,183                             ; jb            14ab <_sk_store_f16_avx+0xd2>
2724  DB  196,65,121,214,68,248,48            ; vmovq         %xmm8,0x30(%r8,%rdi,8)
2725  DB  235,174                             ; jmp           14ab <_sk_store_f16_avx+0xd2>
2726
2727PUBLIC _sk_store_f32_avx
2728_sk_store_f32_avx LABEL PROC
2729  DB  72,173                              ; lods          %ds:(%rsi),%rax
2730  DB  76,139,0                            ; mov           (%rax),%r8
2731  DB  72,141,4,189,0,0,0,0                ; lea           0x0(,%rdi,4),%rax
2732  DB  197,124,20,193                      ; vunpcklps     %ymm1,%ymm0,%ymm8
2733  DB  197,124,21,217                      ; vunpckhps     %ymm1,%ymm0,%ymm11
2734  DB  197,108,20,203                      ; vunpcklps     %ymm3,%ymm2,%ymm9
2735  DB  197,108,21,227                      ; vunpckhps     %ymm3,%ymm2,%ymm12
2736  DB  196,65,61,20,209                    ; vunpcklpd     %ymm9,%ymm8,%ymm10
2737  DB  196,65,61,21,201                    ; vunpckhpd     %ymm9,%ymm8,%ymm9
2738  DB  196,65,37,20,196                    ; vunpcklpd     %ymm12,%ymm11,%ymm8
2739  DB  196,65,37,21,220                    ; vunpckhpd     %ymm12,%ymm11,%ymm11
2740  DB  72,133,201                          ; test          %rcx,%rcx
2741  DB  117,55                              ; jne           156a <_sk_store_f32_avx+0x6d>
2742  DB  196,67,45,24,225,1                  ; vinsertf128   $0x1,%xmm9,%ymm10,%ymm12
2743  DB  196,67,61,24,235,1                  ; vinsertf128   $0x1,%xmm11,%ymm8,%ymm13
2744  DB  196,67,45,6,201,49                  ; vperm2f128    $0x31,%ymm9,%ymm10,%ymm9
2745  DB  196,67,61,6,195,49                  ; vperm2f128    $0x31,%ymm11,%ymm8,%ymm8
2746  DB  196,65,125,17,36,128                ; vmovupd       %ymm12,(%r8,%rax,4)
2747  DB  196,65,125,17,108,128,32            ; vmovupd       %ymm13,0x20(%r8,%rax,4)
2748  DB  196,65,125,17,76,128,64             ; vmovupd       %ymm9,0x40(%r8,%rax,4)
2749  DB  196,65,125,17,68,128,96             ; vmovupd       %ymm8,0x60(%r8,%rax,4)
2750  DB  72,173                              ; lods          %ds:(%rsi),%rax
2751  DB  255,224                             ; jmpq          *%rax
2752  DB  196,65,121,17,20,128                ; vmovupd       %xmm10,(%r8,%rax,4)
2753  DB  72,131,249,1                        ; cmp           $0x1,%rcx
2754  DB  116,240                             ; je            1566 <_sk_store_f32_avx+0x69>
2755  DB  196,65,121,17,76,128,16             ; vmovupd       %xmm9,0x10(%r8,%rax,4)
2756  DB  72,131,249,3                        ; cmp           $0x3,%rcx
2757  DB  114,227                             ; jb            1566 <_sk_store_f32_avx+0x69>
2758  DB  196,65,121,17,68,128,32             ; vmovupd       %xmm8,0x20(%r8,%rax,4)
2759  DB  116,218                             ; je            1566 <_sk_store_f32_avx+0x69>
2760  DB  196,65,121,17,92,128,48             ; vmovupd       %xmm11,0x30(%r8,%rax,4)
2761  DB  72,131,249,5                        ; cmp           $0x5,%rcx
2762  DB  114,205                             ; jb            1566 <_sk_store_f32_avx+0x69>
2763  DB  196,67,125,25,84,128,64,1           ; vextractf128  $0x1,%ymm10,0x40(%r8,%rax,4)
2764  DB  116,195                             ; je            1566 <_sk_store_f32_avx+0x69>
2765  DB  196,67,125,25,76,128,80,1           ; vextractf128  $0x1,%ymm9,0x50(%r8,%rax,4)
2766  DB  72,131,249,7                        ; cmp           $0x7,%rcx
2767  DB  114,181                             ; jb            1566 <_sk_store_f32_avx+0x69>
2768  DB  196,67,125,25,68,128,96,1           ; vextractf128  $0x1,%ymm8,0x60(%r8,%rax,4)
2769  DB  235,171                             ; jmp           1566 <_sk_store_f32_avx+0x69>
2770
2771PUBLIC _sk_clamp_x_avx
2772_sk_clamp_x_avx LABEL PROC
2773  DB  72,173                              ; lods          %ds:(%rsi),%rax
2774  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
2775  DB  197,60,95,200                       ; vmaxps        %ymm0,%ymm8,%ymm9
2776  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
2777  DB  196,99,125,25,192,1                 ; vextractf128  $0x1,%ymm8,%xmm0
2778  DB  196,65,41,118,210                   ; vpcmpeqd      %xmm10,%xmm10,%xmm10
2779  DB  196,193,121,254,194                 ; vpaddd        %xmm10,%xmm0,%xmm0
2780  DB  196,65,57,254,194                   ; vpaddd        %xmm10,%xmm8,%xmm8
2781  DB  196,227,61,24,192,1                 ; vinsertf128   $0x1,%xmm0,%ymm8,%ymm0
2782  DB  197,180,93,192                      ; vminps        %ymm0,%ymm9,%ymm0
2783  DB  72,173                              ; lods          %ds:(%rsi),%rax
2784  DB  255,224                             ; jmpq          *%rax
2785
2786PUBLIC _sk_clamp_y_avx
2787_sk_clamp_y_avx LABEL PROC
2788  DB  72,173                              ; lods          %ds:(%rsi),%rax
2789  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
2790  DB  197,60,95,201                       ; vmaxps        %ymm1,%ymm8,%ymm9
2791  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
2792  DB  196,99,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm1
2793  DB  196,65,41,118,210                   ; vpcmpeqd      %xmm10,%xmm10,%xmm10
2794  DB  196,193,113,254,202                 ; vpaddd        %xmm10,%xmm1,%xmm1
2795  DB  196,65,57,254,194                   ; vpaddd        %xmm10,%xmm8,%xmm8
2796  DB  196,227,61,24,201,1                 ; vinsertf128   $0x1,%xmm1,%ymm8,%ymm1
2797  DB  197,180,93,201                      ; vminps        %ymm1,%ymm9,%ymm1
2798  DB  72,173                              ; lods          %ds:(%rsi),%rax
2799  DB  255,224                             ; jmpq          *%rax
2800
2801PUBLIC _sk_repeat_x_avx
2802_sk_repeat_x_avx LABEL PROC
2803  DB  72,173                              ; lods          %ds:(%rsi),%rax
2804  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
2805  DB  196,65,124,94,200                   ; vdivps        %ymm8,%ymm0,%ymm9
2806  DB  196,67,125,8,201,1                  ; vroundps      $0x1,%ymm9,%ymm9
2807  DB  196,65,52,89,200                    ; vmulps        %ymm8,%ymm9,%ymm9
2808  DB  196,65,124,92,201                   ; vsubps        %ymm9,%ymm0,%ymm9
2809  DB  196,99,125,25,192,1                 ; vextractf128  $0x1,%ymm8,%xmm0
2810  DB  196,65,41,118,210                   ; vpcmpeqd      %xmm10,%xmm10,%xmm10
2811  DB  196,193,121,254,194                 ; vpaddd        %xmm10,%xmm0,%xmm0
2812  DB  196,65,57,254,194                   ; vpaddd        %xmm10,%xmm8,%xmm8
2813  DB  196,227,61,24,192,1                 ; vinsertf128   $0x1,%xmm0,%ymm8,%ymm0
2814  DB  197,180,93,192                      ; vminps        %ymm0,%ymm9,%ymm0
2815  DB  72,173                              ; lods          %ds:(%rsi),%rax
2816  DB  255,224                             ; jmpq          *%rax
2817
2818PUBLIC _sk_repeat_y_avx
2819_sk_repeat_y_avx LABEL PROC
2820  DB  72,173                              ; lods          %ds:(%rsi),%rax
2821  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
2822  DB  196,65,116,94,200                   ; vdivps        %ymm8,%ymm1,%ymm9
2823  DB  196,67,125,8,201,1                  ; vroundps      $0x1,%ymm9,%ymm9
2824  DB  196,65,52,89,200                    ; vmulps        %ymm8,%ymm9,%ymm9
2825  DB  196,65,116,92,201                   ; vsubps        %ymm9,%ymm1,%ymm9
2826  DB  196,99,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm1
2827  DB  196,65,41,118,210                   ; vpcmpeqd      %xmm10,%xmm10,%xmm10
2828  DB  196,193,113,254,202                 ; vpaddd        %xmm10,%xmm1,%xmm1
2829  DB  196,65,57,254,194                   ; vpaddd        %xmm10,%xmm8,%xmm8
2830  DB  196,227,61,24,201,1                 ; vinsertf128   $0x1,%xmm1,%ymm8,%ymm1
2831  DB  197,180,93,201                      ; vminps        %ymm1,%ymm9,%ymm1
2832  DB  72,173                              ; lods          %ds:(%rsi),%rax
2833  DB  255,224                             ; jmpq          *%rax
2834
2835PUBLIC _sk_mirror_x_avx
2836_sk_mirror_x_avx LABEL PROC
2837  DB  72,173                              ; lods          %ds:(%rsi),%rax
2838  DB  197,121,110,0                       ; vmovd         (%rax),%xmm8
2839  DB  196,65,121,112,200,0                ; vpshufd       $0x0,%xmm8,%xmm9
2840  DB  196,67,53,24,201,1                  ; vinsertf128   $0x1,%xmm9,%ymm9,%ymm9
2841  DB  196,65,124,92,209                   ; vsubps        %ymm9,%ymm0,%ymm10
2842  DB  196,193,58,88,192                   ; vaddss        %xmm8,%xmm8,%xmm0
2843  DB  196,227,121,4,192,0                 ; vpermilps     $0x0,%xmm0,%xmm0
2844  DB  196,227,125,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
2845  DB  197,44,94,192                       ; vdivps        %ymm0,%ymm10,%ymm8
2846  DB  196,67,125,8,192,1                  ; vroundps      $0x1,%ymm8,%ymm8
2847  DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
2848  DB  197,172,92,192                      ; vsubps        %ymm0,%ymm10,%ymm0
2849  DB  196,193,124,92,193                  ; vsubps        %ymm9,%ymm0,%ymm0
2850  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
2851  DB  197,60,92,192                       ; vsubps        %ymm0,%ymm8,%ymm8
2852  DB  197,60,84,192                       ; vandps        %ymm0,%ymm8,%ymm8
2853  DB  196,99,125,25,200,1                 ; vextractf128  $0x1,%ymm9,%xmm0
2854  DB  196,65,41,118,210                   ; vpcmpeqd      %xmm10,%xmm10,%xmm10
2855  DB  196,193,121,254,194                 ; vpaddd        %xmm10,%xmm0,%xmm0
2856  DB  196,65,49,254,202                   ; vpaddd        %xmm10,%xmm9,%xmm9
2857  DB  196,227,53,24,192,1                 ; vinsertf128   $0x1,%xmm0,%ymm9,%ymm0
2858  DB  197,188,93,192                      ; vminps        %ymm0,%ymm8,%ymm0
2859  DB  72,173                              ; lods          %ds:(%rsi),%rax
2860  DB  255,224                             ; jmpq          *%rax
2861
2862PUBLIC _sk_mirror_y_avx
2863_sk_mirror_y_avx LABEL PROC
2864  DB  72,173                              ; lods          %ds:(%rsi),%rax
2865  DB  197,121,110,0                       ; vmovd         (%rax),%xmm8
2866  DB  196,65,121,112,200,0                ; vpshufd       $0x0,%xmm8,%xmm9
2867  DB  196,67,53,24,201,1                  ; vinsertf128   $0x1,%xmm9,%ymm9,%ymm9
2868  DB  196,65,116,92,209                   ; vsubps        %ymm9,%ymm1,%ymm10
2869  DB  196,193,58,88,200                   ; vaddss        %xmm8,%xmm8,%xmm1
2870  DB  196,227,121,4,201,0                 ; vpermilps     $0x0,%xmm1,%xmm1
2871  DB  196,227,117,24,201,1                ; vinsertf128   $0x1,%xmm1,%ymm1,%ymm1
2872  DB  197,44,94,193                       ; vdivps        %ymm1,%ymm10,%ymm8
2873  DB  196,67,125,8,192,1                  ; vroundps      $0x1,%ymm8,%ymm8
2874  DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
2875  DB  197,172,92,201                      ; vsubps        %ymm1,%ymm10,%ymm1
2876  DB  196,193,116,92,201                  ; vsubps        %ymm9,%ymm1,%ymm1
2877  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
2878  DB  197,60,92,193                       ; vsubps        %ymm1,%ymm8,%ymm8
2879  DB  197,60,84,193                       ; vandps        %ymm1,%ymm8,%ymm8
2880  DB  196,99,125,25,201,1                 ; vextractf128  $0x1,%ymm9,%xmm1
2881  DB  196,65,41,118,210                   ; vpcmpeqd      %xmm10,%xmm10,%xmm10
2882  DB  196,193,113,254,202                 ; vpaddd        %xmm10,%xmm1,%xmm1
2883  DB  196,65,49,254,202                   ; vpaddd        %xmm10,%xmm9,%xmm9
2884  DB  196,227,53,24,201,1                 ; vinsertf128   $0x1,%xmm1,%ymm9,%ymm1
2885  DB  197,188,93,201                      ; vminps        %ymm1,%ymm8,%ymm1
2886  DB  72,173                              ; lods          %ds:(%rsi),%rax
2887  DB  255,224                             ; jmpq          *%rax
2888
2889PUBLIC _sk_luminance_to_alpha_avx
2890_sk_luminance_to_alpha_avx LABEL PROC
2891  DB  184,208,179,89,62                   ; mov           $0x3e59b3d0,%eax
2892  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
2893  DB  196,227,121,4,219,0                 ; vpermilps     $0x0,%xmm3,%xmm3
2894  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
2895  DB  197,228,89,192                      ; vmulps        %ymm0,%ymm3,%ymm0
2896  DB  184,89,23,55,63                     ; mov           $0x3f371759,%eax
2897  DB  197,249,110,216                     ; vmovd         %eax,%xmm3
2898  DB  196,227,121,4,219,0                 ; vpermilps     $0x0,%xmm3,%xmm3
2899  DB  196,227,101,24,219,1                ; vinsertf128   $0x1,%xmm3,%ymm3,%ymm3
2900  DB  197,228,89,201                      ; vmulps        %ymm1,%ymm3,%ymm1
2901  DB  197,252,88,193                      ; vaddps        %ymm1,%ymm0,%ymm0
2902  DB  184,152,221,147,61                  ; mov           $0x3d93dd98,%eax
2903  DB  197,249,110,200                     ; vmovd         %eax,%xmm1
2904  DB  196,227,121,4,201,0                 ; vpermilps     $0x0,%xmm1,%xmm1
2905  DB  196,227,117,24,201,1                ; vinsertf128   $0x1,%xmm1,%ymm1,%ymm1
2906  DB  197,244,89,202                      ; vmulps        %ymm2,%ymm1,%ymm1
2907  DB  197,252,88,217                      ; vaddps        %ymm1,%ymm0,%ymm3
2908  DB  72,173                              ; lods          %ds:(%rsi),%rax
2909  DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
2910  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
2911  DB  197,236,87,210                      ; vxorps        %ymm2,%ymm2,%ymm2
2912  DB  255,224                             ; jmpq          *%rax
2913
2914PUBLIC _sk_matrix_2x3_avx
2915_sk_matrix_2x3_avx LABEL PROC
2916  DB  72,173                              ; lods          %ds:(%rsi),%rax
2917  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
2918  DB  196,98,125,24,72,8                  ; vbroadcastss  0x8(%rax),%ymm9
2919  DB  196,98,125,24,80,16                 ; vbroadcastss  0x10(%rax),%ymm10
2920  DB  197,52,89,201                       ; vmulps        %ymm1,%ymm9,%ymm9
2921  DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
2922  DB  197,60,89,192                       ; vmulps        %ymm0,%ymm8,%ymm8
2923  DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
2924  DB  196,98,125,24,72,4                  ; vbroadcastss  0x4(%rax),%ymm9
2925  DB  196,98,125,24,80,12                 ; vbroadcastss  0xc(%rax),%ymm10
2926  DB  196,98,125,24,88,20                 ; vbroadcastss  0x14(%rax),%ymm11
2927  DB  197,172,89,201                      ; vmulps        %ymm1,%ymm10,%ymm1
2928  DB  196,193,116,88,203                  ; vaddps        %ymm11,%ymm1,%ymm1
2929  DB  197,180,89,192                      ; vmulps        %ymm0,%ymm9,%ymm0
2930  DB  197,252,88,201                      ; vaddps        %ymm1,%ymm0,%ymm1
2931  DB  72,173                              ; lods          %ds:(%rsi),%rax
2932  DB  197,124,41,192                      ; vmovaps       %ymm8,%ymm0
2933  DB  255,224                             ; jmpq          *%rax
2934
2935PUBLIC _sk_matrix_3x4_avx
2936_sk_matrix_3x4_avx LABEL PROC
2937  DB  72,173                              ; lods          %ds:(%rsi),%rax
2938  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
2939  DB  196,98,125,24,72,12                 ; vbroadcastss  0xc(%rax),%ymm9
2940  DB  196,98,125,24,80,24                 ; vbroadcastss  0x18(%rax),%ymm10
2941  DB  196,98,125,24,88,36                 ; vbroadcastss  0x24(%rax),%ymm11
2942  DB  197,44,89,210                       ; vmulps        %ymm2,%ymm10,%ymm10
2943  DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
2944  DB  197,52,89,201                       ; vmulps        %ymm1,%ymm9,%ymm9
2945  DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
2946  DB  197,60,89,192                       ; vmulps        %ymm0,%ymm8,%ymm8
2947  DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
2948  DB  196,98,125,24,72,4                  ; vbroadcastss  0x4(%rax),%ymm9
2949  DB  196,98,125,24,80,16                 ; vbroadcastss  0x10(%rax),%ymm10
2950  DB  196,98,125,24,88,28                 ; vbroadcastss  0x1c(%rax),%ymm11
2951  DB  196,98,125,24,96,40                 ; vbroadcastss  0x28(%rax),%ymm12
2952  DB  197,36,89,218                       ; vmulps        %ymm2,%ymm11,%ymm11
2953  DB  196,65,36,88,220                    ; vaddps        %ymm12,%ymm11,%ymm11
2954  DB  197,44,89,209                       ; vmulps        %ymm1,%ymm10,%ymm10
2955  DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
2956  DB  197,52,89,200                       ; vmulps        %ymm0,%ymm9,%ymm9
2957  DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
2958  DB  196,98,125,24,80,8                  ; vbroadcastss  0x8(%rax),%ymm10
2959  DB  196,98,125,24,88,20                 ; vbroadcastss  0x14(%rax),%ymm11
2960  DB  196,98,125,24,96,32                 ; vbroadcastss  0x20(%rax),%ymm12
2961  DB  196,98,125,24,104,44                ; vbroadcastss  0x2c(%rax),%ymm13
2962  DB  197,156,89,210                      ; vmulps        %ymm2,%ymm12,%ymm2
2963  DB  196,193,108,88,213                  ; vaddps        %ymm13,%ymm2,%ymm2
2964  DB  197,164,89,201                      ; vmulps        %ymm1,%ymm11,%ymm1
2965  DB  197,244,88,202                      ; vaddps        %ymm2,%ymm1,%ymm1
2966  DB  197,172,89,192                      ; vmulps        %ymm0,%ymm10,%ymm0
2967  DB  197,252,88,209                      ; vaddps        %ymm1,%ymm0,%ymm2
2968  DB  72,173                              ; lods          %ds:(%rsi),%rax
2969  DB  197,124,41,192                      ; vmovaps       %ymm8,%ymm0
2970  DB  197,124,41,201                      ; vmovaps       %ymm9,%ymm1
2971  DB  255,224                             ; jmpq          *%rax
2972
2973PUBLIC _sk_matrix_4x5_avx
2974_sk_matrix_4x5_avx LABEL PROC
2975  DB  72,173                              ; lods          %ds:(%rsi),%rax
2976  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
2977  DB  196,98,125,24,72,16                 ; vbroadcastss  0x10(%rax),%ymm9
2978  DB  196,98,125,24,80,32                 ; vbroadcastss  0x20(%rax),%ymm10
2979  DB  196,98,125,24,88,48                 ; vbroadcastss  0x30(%rax),%ymm11
2980  DB  196,98,125,24,96,64                 ; vbroadcastss  0x40(%rax),%ymm12
2981  DB  197,36,89,219                       ; vmulps        %ymm3,%ymm11,%ymm11
2982  DB  196,65,36,88,220                    ; vaddps        %ymm12,%ymm11,%ymm11
2983  DB  197,44,89,210                       ; vmulps        %ymm2,%ymm10,%ymm10
2984  DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
2985  DB  197,52,89,201                       ; vmulps        %ymm1,%ymm9,%ymm9
2986  DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
2987  DB  197,60,89,192                       ; vmulps        %ymm0,%ymm8,%ymm8
2988  DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
2989  DB  196,98,125,24,72,4                  ; vbroadcastss  0x4(%rax),%ymm9
2990  DB  196,98,125,24,80,20                 ; vbroadcastss  0x14(%rax),%ymm10
2991  DB  196,98,125,24,88,36                 ; vbroadcastss  0x24(%rax),%ymm11
2992  DB  196,98,125,24,96,52                 ; vbroadcastss  0x34(%rax),%ymm12
2993  DB  196,98,125,24,104,68                ; vbroadcastss  0x44(%rax),%ymm13
2994  DB  197,28,89,227                       ; vmulps        %ymm3,%ymm12,%ymm12
2995  DB  196,65,28,88,229                    ; vaddps        %ymm13,%ymm12,%ymm12
2996  DB  197,36,89,218                       ; vmulps        %ymm2,%ymm11,%ymm11
2997  DB  196,65,36,88,220                    ; vaddps        %ymm12,%ymm11,%ymm11
2998  DB  197,44,89,209                       ; vmulps        %ymm1,%ymm10,%ymm10
2999  DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
3000  DB  197,52,89,200                       ; vmulps        %ymm0,%ymm9,%ymm9
3001  DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
3002  DB  196,98,125,24,80,8                  ; vbroadcastss  0x8(%rax),%ymm10
3003  DB  196,98,125,24,88,24                 ; vbroadcastss  0x18(%rax),%ymm11
3004  DB  196,98,125,24,96,40                 ; vbroadcastss  0x28(%rax),%ymm12
3005  DB  196,98,125,24,104,56                ; vbroadcastss  0x38(%rax),%ymm13
3006  DB  196,98,125,24,112,72                ; vbroadcastss  0x48(%rax),%ymm14
3007  DB  197,20,89,235                       ; vmulps        %ymm3,%ymm13,%ymm13
3008  DB  196,65,20,88,238                    ; vaddps        %ymm14,%ymm13,%ymm13
3009  DB  197,28,89,226                       ; vmulps        %ymm2,%ymm12,%ymm12
3010  DB  196,65,28,88,229                    ; vaddps        %ymm13,%ymm12,%ymm12
3011  DB  197,36,89,217                       ; vmulps        %ymm1,%ymm11,%ymm11
3012  DB  196,65,36,88,220                    ; vaddps        %ymm12,%ymm11,%ymm11
3013  DB  197,44,89,208                       ; vmulps        %ymm0,%ymm10,%ymm10
3014  DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
3015  DB  196,98,125,24,88,12                 ; vbroadcastss  0xc(%rax),%ymm11
3016  DB  196,98,125,24,96,28                 ; vbroadcastss  0x1c(%rax),%ymm12
3017  DB  196,98,125,24,104,44                ; vbroadcastss  0x2c(%rax),%ymm13
3018  DB  196,98,125,24,112,60                ; vbroadcastss  0x3c(%rax),%ymm14
3019  DB  196,98,125,24,120,76                ; vbroadcastss  0x4c(%rax),%ymm15
3020  DB  197,140,89,219                      ; vmulps        %ymm3,%ymm14,%ymm3
3021  DB  196,193,100,88,223                  ; vaddps        %ymm15,%ymm3,%ymm3
3022  DB  197,148,89,210                      ; vmulps        %ymm2,%ymm13,%ymm2
3023  DB  197,236,88,211                      ; vaddps        %ymm3,%ymm2,%ymm2
3024  DB  197,156,89,201                      ; vmulps        %ymm1,%ymm12,%ymm1
3025  DB  197,244,88,202                      ; vaddps        %ymm2,%ymm1,%ymm1
3026  DB  197,164,89,192                      ; vmulps        %ymm0,%ymm11,%ymm0
3027  DB  197,252,88,217                      ; vaddps        %ymm1,%ymm0,%ymm3
3028  DB  72,173                              ; lods          %ds:(%rsi),%rax
3029  DB  197,124,41,192                      ; vmovaps       %ymm8,%ymm0
3030  DB  197,124,41,201                      ; vmovaps       %ymm9,%ymm1
3031  DB  197,124,41,210                      ; vmovaps       %ymm10,%ymm2
3032  DB  255,224                             ; jmpq          *%rax
3033
3034PUBLIC _sk_matrix_perspective_avx
3035_sk_matrix_perspective_avx LABEL PROC
3036  DB  72,173                              ; lods          %ds:(%rsi),%rax
3037  DB  196,98,125,24,0                     ; vbroadcastss  (%rax),%ymm8
3038  DB  196,98,125,24,72,4                  ; vbroadcastss  0x4(%rax),%ymm9
3039  DB  196,98,125,24,80,8                  ; vbroadcastss  0x8(%rax),%ymm10
3040  DB  197,52,89,201                       ; vmulps        %ymm1,%ymm9,%ymm9
3041  DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
3042  DB  197,60,89,192                       ; vmulps        %ymm0,%ymm8,%ymm8
3043  DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
3044  DB  196,98,125,24,72,12                 ; vbroadcastss  0xc(%rax),%ymm9
3045  DB  196,98,125,24,80,16                 ; vbroadcastss  0x10(%rax),%ymm10
3046  DB  196,98,125,24,88,20                 ; vbroadcastss  0x14(%rax),%ymm11
3047  DB  197,44,89,209                       ; vmulps        %ymm1,%ymm10,%ymm10
3048  DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
3049  DB  197,52,89,200                       ; vmulps        %ymm0,%ymm9,%ymm9
3050  DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
3051  DB  196,98,125,24,80,24                 ; vbroadcastss  0x18(%rax),%ymm10
3052  DB  196,98,125,24,88,28                 ; vbroadcastss  0x1c(%rax),%ymm11
3053  DB  196,98,125,24,96,32                 ; vbroadcastss  0x20(%rax),%ymm12
3054  DB  197,164,89,201                      ; vmulps        %ymm1,%ymm11,%ymm1
3055  DB  196,193,116,88,204                  ; vaddps        %ymm12,%ymm1,%ymm1
3056  DB  197,172,89,192                      ; vmulps        %ymm0,%ymm10,%ymm0
3057  DB  197,252,88,193                      ; vaddps        %ymm1,%ymm0,%ymm0
3058  DB  197,252,83,200                      ; vrcpps        %ymm0,%ymm1
3059  DB  197,188,89,193                      ; vmulps        %ymm1,%ymm8,%ymm0
3060  DB  197,180,89,201                      ; vmulps        %ymm1,%ymm9,%ymm1
3061  DB  72,173                              ; lods          %ds:(%rsi),%rax
3062  DB  255,224                             ; jmpq          *%rax
3063
3064PUBLIC _sk_linear_gradient_2stops_avx
3065_sk_linear_gradient_2stops_avx LABEL PROC
3066  DB  72,173                              ; lods          %ds:(%rsi),%rax
3067  DB  196,226,125,24,72,16                ; vbroadcastss  0x10(%rax),%ymm1
3068  DB  196,226,125,24,16                   ; vbroadcastss  (%rax),%ymm2
3069  DB  197,244,89,200                      ; vmulps        %ymm0,%ymm1,%ymm1
3070  DB  197,108,88,193                      ; vaddps        %ymm1,%ymm2,%ymm8
3071  DB  196,226,125,24,72,20                ; vbroadcastss  0x14(%rax),%ymm1
3072  DB  196,226,125,24,80,4                 ; vbroadcastss  0x4(%rax),%ymm2
3073  DB  197,244,89,200                      ; vmulps        %ymm0,%ymm1,%ymm1
3074  DB  197,236,88,201                      ; vaddps        %ymm1,%ymm2,%ymm1
3075  DB  196,226,125,24,80,24                ; vbroadcastss  0x18(%rax),%ymm2
3076  DB  196,226,125,24,88,8                 ; vbroadcastss  0x8(%rax),%ymm3
3077  DB  197,236,89,208                      ; vmulps        %ymm0,%ymm2,%ymm2
3078  DB  197,228,88,210                      ; vaddps        %ymm2,%ymm3,%ymm2
3079  DB  196,226,125,24,88,28                ; vbroadcastss  0x1c(%rax),%ymm3
3080  DB  196,98,125,24,72,12                 ; vbroadcastss  0xc(%rax),%ymm9
3081  DB  197,228,89,192                      ; vmulps        %ymm0,%ymm3,%ymm0
3082  DB  197,180,88,216                      ; vaddps        %ymm0,%ymm9,%ymm3
3083  DB  72,173                              ; lods          %ds:(%rsi),%rax
3084  DB  197,124,41,192                      ; vmovaps       %ymm8,%ymm0
3085  DB  255,224                             ; jmpq          *%rax
3086
3087PUBLIC _sk_start_pipeline_sse41
3088_sk_start_pipeline_sse41 LABEL PROC
3089  DB  65,87                               ; push          %r15
3090  DB  65,86                               ; push          %r14
3091  DB  65,85                               ; push          %r13
3092  DB  65,84                               ; push          %r12
3093  DB  86                                  ; push          %rsi
3094  DB  87                                  ; push          %rdi
3095  DB  83                                  ; push          %rbx
3096  DB  72,129,236,160,0,0,0                ; sub           $0xa0,%rsp
3097  DB  68,15,41,188,36,144,0,0,0           ; movaps        %xmm15,0x90(%rsp)
3098  DB  68,15,41,180,36,128,0,0,0           ; movaps        %xmm14,0x80(%rsp)
3099  DB  68,15,41,108,36,112                 ; movaps        %xmm13,0x70(%rsp)
3100  DB  68,15,41,100,36,96                  ; movaps        %xmm12,0x60(%rsp)
3101  DB  68,15,41,92,36,80                   ; movaps        %xmm11,0x50(%rsp)
3102  DB  68,15,41,84,36,64                   ; movaps        %xmm10,0x40(%rsp)
3103  DB  68,15,41,76,36,48                   ; movaps        %xmm9,0x30(%rsp)
3104  DB  68,15,41,68,36,32                   ; movaps        %xmm8,0x20(%rsp)
3105  DB  15,41,124,36,16                     ; movaps        %xmm7,0x10(%rsp)
3106  DB  15,41,52,36                         ; movaps        %xmm6,(%rsp)
3107  DB  77,137,207                          ; mov           %r9,%r15
3108  DB  77,137,198                          ; mov           %r8,%r14
3109  DB  72,137,203                          ; mov           %rcx,%rbx
3110  DB  72,137,214                          ; mov           %rdx,%rsi
3111  DB  72,173                              ; lods          %ds:(%rsi),%rax
3112  DB  73,137,196                          ; mov           %rax,%r12
3113  DB  73,137,245                          ; mov           %rsi,%r13
3114  DB  72,141,67,4                         ; lea           0x4(%rbx),%rax
3115  DB  76,57,248                           ; cmp           %r15,%rax
3116  DB  118,5                               ; jbe           73 <_sk_start_pipeline_sse41+0x73>
3117  DB  72,137,216                          ; mov           %rbx,%rax
3118  DB  235,52                              ; jmp           a7 <_sk_start_pipeline_sse41+0xa7>
3119  DB  15,87,192                           ; xorps         %xmm0,%xmm0
3120  DB  15,87,201                           ; xorps         %xmm1,%xmm1
3121  DB  15,87,210                           ; xorps         %xmm2,%xmm2
3122  DB  15,87,219                           ; xorps         %xmm3,%xmm3
3123  DB  15,87,228                           ; xorps         %xmm4,%xmm4
3124  DB  15,87,237                           ; xorps         %xmm5,%xmm5
3125  DB  15,87,246                           ; xorps         %xmm6,%xmm6
3126  DB  15,87,255                           ; xorps         %xmm7,%xmm7
3127  DB  72,137,223                          ; mov           %rbx,%rdi
3128  DB  76,137,238                          ; mov           %r13,%rsi
3129  DB  76,137,242                          ; mov           %r14,%rdx
3130  DB  65,255,212                          ; callq         *%r12
3131  DB  72,141,67,4                         ; lea           0x4(%rbx),%rax
3132  DB  72,131,195,8                        ; add           $0x8,%rbx
3133  DB  76,57,251                           ; cmp           %r15,%rbx
3134  DB  72,137,195                          ; mov           %rax,%rbx
3135  DB  118,204                             ; jbe           73 <_sk_start_pipeline_sse41+0x73>
3136  DB  15,40,52,36                         ; movaps        (%rsp),%xmm6
3137  DB  15,40,124,36,16                     ; movaps        0x10(%rsp),%xmm7
3138  DB  68,15,40,68,36,32                   ; movaps        0x20(%rsp),%xmm8
3139  DB  68,15,40,76,36,48                   ; movaps        0x30(%rsp),%xmm9
3140  DB  68,15,40,84,36,64                   ; movaps        0x40(%rsp),%xmm10
3141  DB  68,15,40,92,36,80                   ; movaps        0x50(%rsp),%xmm11
3142  DB  68,15,40,100,36,96                  ; movaps        0x60(%rsp),%xmm12
3143  DB  68,15,40,108,36,112                 ; movaps        0x70(%rsp),%xmm13
3144  DB  68,15,40,180,36,128,0,0,0           ; movaps        0x80(%rsp),%xmm14
3145  DB  68,15,40,188,36,144,0,0,0           ; movaps        0x90(%rsp),%xmm15
3146  DB  72,129,196,160,0,0,0                ; add           $0xa0,%rsp
3147  DB  91                                  ; pop           %rbx
3148  DB  95                                  ; pop           %rdi
3149  DB  94                                  ; pop           %rsi
3150  DB  65,92                               ; pop           %r12
3151  DB  65,93                               ; pop           %r13
3152  DB  65,94                               ; pop           %r14
3153  DB  65,95                               ; pop           %r15
3154  DB  195                                 ; retq
3155
3156PUBLIC _sk_just_return_sse41
3157_sk_just_return_sse41 LABEL PROC
3158  DB  195                                 ; retq
3159
3160PUBLIC _sk_seed_shader_sse41
3161_sk_seed_shader_sse41 LABEL PROC
3162  DB  72,173                              ; lods          %ds:(%rsi),%rax
3163  DB  102,15,110,199                      ; movd          %edi,%xmm0
3164  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
3165  DB  15,91,200                           ; cvtdq2ps      %xmm0,%xmm1
3166  DB  185,0,0,0,63                        ; mov           $0x3f000000,%ecx
3167  DB  102,15,110,209                      ; movd          %ecx,%xmm2
3168  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
3169  DB  15,88,202                           ; addps         %xmm2,%xmm1
3170  DB  15,16,2                             ; movups        (%rdx),%xmm0
3171  DB  15,88,193                           ; addps         %xmm1,%xmm0
3172  DB  102,15,110,8                        ; movd          (%rax),%xmm1
3173  DB  102,15,112,201,0                    ; pshufd        $0x0,%xmm1,%xmm1
3174  DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
3175  DB  15,88,202                           ; addps         %xmm2,%xmm1
3176  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
3177  DB  102,15,110,208                      ; movd          %eax,%xmm2
3178  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
3179  DB  72,173                              ; lods          %ds:(%rsi),%rax
3180  DB  15,87,219                           ; xorps         %xmm3,%xmm3
3181  DB  15,87,228                           ; xorps         %xmm4,%xmm4
3182  DB  15,87,237                           ; xorps         %xmm5,%xmm5
3183  DB  15,87,246                           ; xorps         %xmm6,%xmm6
3184  DB  15,87,255                           ; xorps         %xmm7,%xmm7
3185  DB  255,224                             ; jmpq          *%rax
3186
3187PUBLIC _sk_constant_color_sse41
3188_sk_constant_color_sse41 LABEL PROC
3189  DB  72,173                              ; lods          %ds:(%rsi),%rax
3190  DB  15,16,24                            ; movups        (%rax),%xmm3
3191  DB  15,40,195                           ; movaps        %xmm3,%xmm0
3192  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
3193  DB  15,40,203                           ; movaps        %xmm3,%xmm1
3194  DB  15,198,201,85                       ; shufps        $0x55,%xmm1,%xmm1
3195  DB  15,40,211                           ; movaps        %xmm3,%xmm2
3196  DB  15,198,210,170                      ; shufps        $0xaa,%xmm2,%xmm2
3197  DB  15,198,219,255                      ; shufps        $0xff,%xmm3,%xmm3
3198  DB  72,173                              ; lods          %ds:(%rsi),%rax
3199  DB  255,224                             ; jmpq          *%rax
3200
3201PUBLIC _sk_clear_sse41
3202_sk_clear_sse41 LABEL PROC
3203  DB  72,173                              ; lods          %ds:(%rsi),%rax
3204  DB  15,87,192                           ; xorps         %xmm0,%xmm0
3205  DB  15,87,201                           ; xorps         %xmm1,%xmm1
3206  DB  15,87,210                           ; xorps         %xmm2,%xmm2
3207  DB  15,87,219                           ; xorps         %xmm3,%xmm3
3208  DB  255,224                             ; jmpq          *%rax
3209
3210PUBLIC _sk_plus__sse41
3211_sk_plus__sse41 LABEL PROC
3212  DB  15,88,196                           ; addps         %xmm4,%xmm0
3213  DB  15,88,205                           ; addps         %xmm5,%xmm1
3214  DB  15,88,214                           ; addps         %xmm6,%xmm2
3215  DB  15,88,223                           ; addps         %xmm7,%xmm3
3216  DB  72,173                              ; lods          %ds:(%rsi),%rax
3217  DB  255,224                             ; jmpq          *%rax
3218
3219PUBLIC _sk_srcover_sse41
3220_sk_srcover_sse41 LABEL PROC
3221  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
3222  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
3223  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
3224  DB  68,15,92,195                        ; subps         %xmm3,%xmm8
3225  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
3226  DB  68,15,89,204                        ; mulps         %xmm4,%xmm9
3227  DB  65,15,88,193                        ; addps         %xmm9,%xmm0
3228  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
3229  DB  68,15,89,205                        ; mulps         %xmm5,%xmm9
3230  DB  65,15,88,201                        ; addps         %xmm9,%xmm1
3231  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
3232  DB  68,15,89,206                        ; mulps         %xmm6,%xmm9
3233  DB  65,15,88,209                        ; addps         %xmm9,%xmm2
3234  DB  68,15,89,199                        ; mulps         %xmm7,%xmm8
3235  DB  65,15,88,216                        ; addps         %xmm8,%xmm3
3236  DB  72,173                              ; lods          %ds:(%rsi),%rax
3237  DB  255,224                             ; jmpq          *%rax
3238
3239PUBLIC _sk_dstover_sse41
3240_sk_dstover_sse41 LABEL PROC
3241  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
3242  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
3243  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
3244  DB  68,15,92,199                        ; subps         %xmm7,%xmm8
3245  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
3246  DB  15,88,196                           ; addps         %xmm4,%xmm0
3247  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
3248  DB  15,88,205                           ; addps         %xmm5,%xmm1
3249  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
3250  DB  15,88,214                           ; addps         %xmm6,%xmm2
3251  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
3252  DB  15,88,223                           ; addps         %xmm7,%xmm3
3253  DB  72,173                              ; lods          %ds:(%rsi),%rax
3254  DB  255,224                             ; jmpq          *%rax
3255
3256PUBLIC _sk_clamp_0_sse41
3257_sk_clamp_0_sse41 LABEL PROC
3258  DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
3259  DB  65,15,95,192                        ; maxps         %xmm8,%xmm0
3260  DB  65,15,95,200                        ; maxps         %xmm8,%xmm1
3261  DB  65,15,95,208                        ; maxps         %xmm8,%xmm2
3262  DB  65,15,95,216                        ; maxps         %xmm8,%xmm3
3263  DB  72,173                              ; lods          %ds:(%rsi),%rax
3264  DB  255,224                             ; jmpq          *%rax
3265
3266PUBLIC _sk_clamp_1_sse41
3267_sk_clamp_1_sse41 LABEL PROC
3268  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
3269  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
3270  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
3271  DB  65,15,93,192                        ; minps         %xmm8,%xmm0
3272  DB  65,15,93,200                        ; minps         %xmm8,%xmm1
3273  DB  65,15,93,208                        ; minps         %xmm8,%xmm2
3274  DB  65,15,93,216                        ; minps         %xmm8,%xmm3
3275  DB  72,173                              ; lods          %ds:(%rsi),%rax
3276  DB  255,224                             ; jmpq          *%rax
3277
3278PUBLIC _sk_clamp_a_sse41
3279_sk_clamp_a_sse41 LABEL PROC
3280  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
3281  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
3282  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
3283  DB  65,15,93,216                        ; minps         %xmm8,%xmm3
3284  DB  15,93,195                           ; minps         %xmm3,%xmm0
3285  DB  15,93,203                           ; minps         %xmm3,%xmm1
3286  DB  15,93,211                           ; minps         %xmm3,%xmm2
3287  DB  72,173                              ; lods          %ds:(%rsi),%rax
3288  DB  255,224                             ; jmpq          *%rax
3289
3290PUBLIC _sk_set_rgb_sse41
3291_sk_set_rgb_sse41 LABEL PROC
3292  DB  72,173                              ; lods          %ds:(%rsi),%rax
3293  DB  243,15,16,0                         ; movss         (%rax),%xmm0
3294  DB  243,15,16,72,4                      ; movss         0x4(%rax),%xmm1
3295  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
3296  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
3297  DB  243,15,16,80,8                      ; movss         0x8(%rax),%xmm2
3298  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
3299  DB  72,173                              ; lods          %ds:(%rsi),%rax
3300  DB  255,224                             ; jmpq          *%rax
3301
3302PUBLIC _sk_swap_rb_sse41
3303_sk_swap_rb_sse41 LABEL PROC
3304  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
3305  DB  72,173                              ; lods          %ds:(%rsi),%rax
3306  DB  15,40,194                           ; movaps        %xmm2,%xmm0
3307  DB  65,15,40,208                        ; movaps        %xmm8,%xmm2
3308  DB  255,224                             ; jmpq          *%rax
3309
3310PUBLIC _sk_swap_sse41
3311_sk_swap_sse41 LABEL PROC
3312  DB  68,15,40,195                        ; movaps        %xmm3,%xmm8
3313  DB  68,15,40,202                        ; movaps        %xmm2,%xmm9
3314  DB  68,15,40,209                        ; movaps        %xmm1,%xmm10
3315  DB  68,15,40,216                        ; movaps        %xmm0,%xmm11
3316  DB  72,173                              ; lods          %ds:(%rsi),%rax
3317  DB  15,40,196                           ; movaps        %xmm4,%xmm0
3318  DB  15,40,205                           ; movaps        %xmm5,%xmm1
3319  DB  15,40,214                           ; movaps        %xmm6,%xmm2
3320  DB  15,40,223                           ; movaps        %xmm7,%xmm3
3321  DB  65,15,40,227                        ; movaps        %xmm11,%xmm4
3322  DB  65,15,40,234                        ; movaps        %xmm10,%xmm5
3323  DB  65,15,40,241                        ; movaps        %xmm9,%xmm6
3324  DB  65,15,40,248                        ; movaps        %xmm8,%xmm7
3325  DB  255,224                             ; jmpq          *%rax
3326
3327PUBLIC _sk_move_src_dst_sse41
3328_sk_move_src_dst_sse41 LABEL PROC
3329  DB  72,173                              ; lods          %ds:(%rsi),%rax
3330  DB  15,40,224                           ; movaps        %xmm0,%xmm4
3331  DB  15,40,233                           ; movaps        %xmm1,%xmm5
3332  DB  15,40,242                           ; movaps        %xmm2,%xmm6
3333  DB  15,40,251                           ; movaps        %xmm3,%xmm7
3334  DB  255,224                             ; jmpq          *%rax
3335
3336PUBLIC _sk_move_dst_src_sse41
3337_sk_move_dst_src_sse41 LABEL PROC
3338  DB  72,173                              ; lods          %ds:(%rsi),%rax
3339  DB  15,40,196                           ; movaps        %xmm4,%xmm0
3340  DB  15,40,205                           ; movaps        %xmm5,%xmm1
3341  DB  15,40,214                           ; movaps        %xmm6,%xmm2
3342  DB  15,40,223                           ; movaps        %xmm7,%xmm3
3343  DB  255,224                             ; jmpq          *%rax
3344
3345PUBLIC _sk_premul_sse41
3346_sk_premul_sse41 LABEL PROC
3347  DB  15,89,195                           ; mulps         %xmm3,%xmm0
3348  DB  15,89,203                           ; mulps         %xmm3,%xmm1
3349  DB  15,89,211                           ; mulps         %xmm3,%xmm2
3350  DB  72,173                              ; lods          %ds:(%rsi),%rax
3351  DB  255,224                             ; jmpq          *%rax
3352
3353PUBLIC _sk_unpremul_sse41
3354_sk_unpremul_sse41 LABEL PROC
3355  DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
3356  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
3357  DB  102,68,15,110,200                   ; movd          %eax,%xmm9
3358  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
3359  DB  68,15,94,203                        ; divps         %xmm3,%xmm9
3360  DB  68,15,194,195,4                     ; cmpneqps      %xmm3,%xmm8
3361  DB  69,15,84,193                        ; andps         %xmm9,%xmm8
3362  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
3363  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
3364  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
3365  DB  72,173                              ; lods          %ds:(%rsi),%rax
3366  DB  255,224                             ; jmpq          *%rax
3367
3368PUBLIC _sk_from_srgb_sse41
3369_sk_from_srgb_sse41 LABEL PROC
3370  DB  184,145,131,158,61                  ; mov           $0x3d9e8391,%eax
3371  DB  102,68,15,110,216                   ; movd          %eax,%xmm11
3372  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
3373  DB  69,15,40,211                        ; movaps        %xmm11,%xmm10
3374  DB  68,15,89,208                        ; mulps         %xmm0,%xmm10
3375  DB  68,15,40,240                        ; movaps        %xmm0,%xmm14
3376  DB  69,15,89,246                        ; mulps         %xmm14,%xmm14
3377  DB  184,154,153,153,62                  ; mov           $0x3e99999a,%eax
3378  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
3379  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
3380  DB  184,92,143,50,63                    ; mov           $0x3f328f5c,%eax
3381  DB  102,68,15,110,224                   ; movd          %eax,%xmm12
3382  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
3383  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
3384  DB  68,15,89,200                        ; mulps         %xmm0,%xmm9
3385  DB  69,15,88,204                        ; addps         %xmm12,%xmm9
3386  DB  184,10,215,35,59                    ; mov           $0x3b23d70a,%eax
3387  DB  102,68,15,110,232                   ; movd          %eax,%xmm13
3388  DB  69,15,198,237,0                     ; shufps        $0x0,%xmm13,%xmm13
3389  DB  69,15,89,206                        ; mulps         %xmm14,%xmm9
3390  DB  69,15,88,205                        ; addps         %xmm13,%xmm9
3391  DB  184,174,71,97,61                    ; mov           $0x3d6147ae,%eax
3392  DB  102,68,15,110,240                   ; movd          %eax,%xmm14
3393  DB  69,15,198,246,0                     ; shufps        $0x0,%xmm14,%xmm14
3394  DB  65,15,194,198,1                     ; cmpltps       %xmm14,%xmm0
3395  DB  102,69,15,56,20,202                 ; blendvps      %xmm0,%xmm10,%xmm9
3396  DB  69,15,40,251                        ; movaps        %xmm11,%xmm15
3397  DB  68,15,89,249                        ; mulps         %xmm1,%xmm15
3398  DB  15,40,193                           ; movaps        %xmm1,%xmm0
3399  DB  15,89,192                           ; mulps         %xmm0,%xmm0
3400  DB  69,15,40,208                        ; movaps        %xmm8,%xmm10
3401  DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
3402  DB  69,15,88,212                        ; addps         %xmm12,%xmm10
3403  DB  68,15,89,208                        ; mulps         %xmm0,%xmm10
3404  DB  69,15,88,213                        ; addps         %xmm13,%xmm10
3405  DB  65,15,194,206,1                     ; cmpltps       %xmm14,%xmm1
3406  DB  15,40,193                           ; movaps        %xmm1,%xmm0
3407  DB  102,69,15,56,20,215                 ; blendvps      %xmm0,%xmm15,%xmm10
3408  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
3409  DB  15,40,194                           ; movaps        %xmm2,%xmm0
3410  DB  15,89,192                           ; mulps         %xmm0,%xmm0
3411  DB  68,15,89,194                        ; mulps         %xmm2,%xmm8
3412  DB  69,15,88,196                        ; addps         %xmm12,%xmm8
3413  DB  68,15,89,192                        ; mulps         %xmm0,%xmm8
3414  DB  69,15,88,197                        ; addps         %xmm13,%xmm8
3415  DB  65,15,194,214,1                     ; cmpltps       %xmm14,%xmm2
3416  DB  15,40,194                           ; movaps        %xmm2,%xmm0
3417  DB  102,69,15,56,20,195                 ; blendvps      %xmm0,%xmm11,%xmm8
3418  DB  72,173                              ; lods          %ds:(%rsi),%rax
3419  DB  65,15,40,193                        ; movaps        %xmm9,%xmm0
3420  DB  65,15,40,202                        ; movaps        %xmm10,%xmm1
3421  DB  65,15,40,208                        ; movaps        %xmm8,%xmm2
3422  DB  255,224                             ; jmpq          *%rax
3423
3424PUBLIC _sk_to_srgb_sse41
3425_sk_to_srgb_sse41 LABEL PROC
3426  DB  72,131,236,24                       ; sub           $0x18,%rsp
3427  DB  15,41,60,36                         ; movaps        %xmm7,(%rsp)
3428  DB  15,40,254                           ; movaps        %xmm6,%xmm7
3429  DB  15,40,245                           ; movaps        %xmm5,%xmm6
3430  DB  15,40,236                           ; movaps        %xmm4,%xmm5
3431  DB  15,40,227                           ; movaps        %xmm3,%xmm4
3432  DB  15,40,218                           ; movaps        %xmm2,%xmm3
3433  DB  15,40,209                           ; movaps        %xmm1,%xmm2
3434  DB  68,15,82,192                        ; rsqrtps       %xmm0,%xmm8
3435  DB  69,15,83,200                        ; rcpps         %xmm8,%xmm9
3436  DB  69,15,82,248                        ; rsqrtps       %xmm8,%xmm15
3437  DB  184,41,92,71,65                     ; mov           $0x41475c29,%eax
3438  DB  102,68,15,110,216                   ; movd          %eax,%xmm11
3439  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
3440  DB  69,15,40,211                        ; movaps        %xmm11,%xmm10
3441  DB  68,15,89,208                        ; mulps         %xmm0,%xmm10
3442  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
3443  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
3444  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
3445  DB  184,194,135,210,62                  ; mov           $0x3ed287c2,%eax
3446  DB  102,68,15,110,224                   ; movd          %eax,%xmm12
3447  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
3448  DB  184,206,111,48,63                   ; mov           $0x3f306fce,%eax
3449  DB  102,68,15,110,232                   ; movd          %eax,%xmm13
3450  DB  69,15,198,237,0                     ; shufps        $0x0,%xmm13,%xmm13
3451  DB  184,168,87,202,61                   ; mov           $0x3dca57a8,%eax
3452  DB  53,0,0,0,128                        ; xor           $0x80000000,%eax
3453  DB  102,68,15,110,240                   ; movd          %eax,%xmm14
3454  DB  69,15,198,246,0                     ; shufps        $0x0,%xmm14,%xmm14
3455  DB  69,15,89,205                        ; mulps         %xmm13,%xmm9
3456  DB  69,15,88,206                        ; addps         %xmm14,%xmm9
3457  DB  69,15,89,252                        ; mulps         %xmm12,%xmm15
3458  DB  69,15,88,249                        ; addps         %xmm9,%xmm15
3459  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
3460  DB  69,15,93,207                        ; minps         %xmm15,%xmm9
3461  DB  184,4,231,140,59                    ; mov           $0x3b8ce704,%eax
3462  DB  102,68,15,110,248                   ; movd          %eax,%xmm15
3463  DB  69,15,198,255,0                     ; shufps        $0x0,%xmm15,%xmm15
3464  DB  65,15,194,199,1                     ; cmpltps       %xmm15,%xmm0
3465  DB  102,69,15,56,20,202                 ; blendvps      %xmm0,%xmm10,%xmm9
3466  DB  68,15,82,210                        ; rsqrtps       %xmm2,%xmm10
3467  DB  65,15,83,194                        ; rcpps         %xmm10,%xmm0
3468  DB  69,15,82,210                        ; rsqrtps       %xmm10,%xmm10
3469  DB  65,15,89,197                        ; mulps         %xmm13,%xmm0
3470  DB  65,15,88,198                        ; addps         %xmm14,%xmm0
3471  DB  69,15,89,212                        ; mulps         %xmm12,%xmm10
3472  DB  68,15,88,208                        ; addps         %xmm0,%xmm10
3473  DB  65,15,40,200                        ; movaps        %xmm8,%xmm1
3474  DB  65,15,93,202                        ; minps         %xmm10,%xmm1
3475  DB  69,15,40,211                        ; movaps        %xmm11,%xmm10
3476  DB  68,15,89,210                        ; mulps         %xmm2,%xmm10
3477  DB  65,15,194,215,1                     ; cmpltps       %xmm15,%xmm2
3478  DB  15,40,194                           ; movaps        %xmm2,%xmm0
3479  DB  102,65,15,56,20,202                 ; blendvps      %xmm0,%xmm10,%xmm1
3480  DB  15,82,195                           ; rsqrtps       %xmm3,%xmm0
3481  DB  15,83,208                           ; rcpps         %xmm0,%xmm2
3482  DB  65,15,89,213                        ; mulps         %xmm13,%xmm2
3483  DB  65,15,88,214                        ; addps         %xmm14,%xmm2
3484  DB  15,82,192                           ; rsqrtps       %xmm0,%xmm0
3485  DB  65,15,89,196                        ; mulps         %xmm12,%xmm0
3486  DB  15,88,194                           ; addps         %xmm2,%xmm0
3487  DB  68,15,93,192                        ; minps         %xmm0,%xmm8
3488  DB  68,15,89,219                        ; mulps         %xmm3,%xmm11
3489  DB  65,15,194,223,1                     ; cmpltps       %xmm15,%xmm3
3490  DB  15,40,195                           ; movaps        %xmm3,%xmm0
3491  DB  102,69,15,56,20,195                 ; blendvps      %xmm0,%xmm11,%xmm8
3492  DB  72,173                              ; lods          %ds:(%rsi),%rax
3493  DB  65,15,40,193                        ; movaps        %xmm9,%xmm0
3494  DB  65,15,40,208                        ; movaps        %xmm8,%xmm2
3495  DB  15,40,220                           ; movaps        %xmm4,%xmm3
3496  DB  15,40,229                           ; movaps        %xmm5,%xmm4
3497  DB  15,40,238                           ; movaps        %xmm6,%xmm5
3498  DB  15,40,247                           ; movaps        %xmm7,%xmm6
3499  DB  15,40,60,36                         ; movaps        (%rsp),%xmm7
3500  DB  72,131,196,24                       ; add           $0x18,%rsp
3501  DB  255,224                             ; jmpq          *%rax
3502
3503PUBLIC _sk_scale_1_float_sse41
3504_sk_scale_1_float_sse41 LABEL PROC
3505  DB  72,173                              ; lods          %ds:(%rsi),%rax
3506  DB  243,68,15,16,0                      ; movss         (%rax),%xmm8
3507  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
3508  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
3509  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
3510  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
3511  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
3512  DB  72,173                              ; lods          %ds:(%rsi),%rax
3513  DB  255,224                             ; jmpq          *%rax
3514
3515PUBLIC _sk_scale_u8_sse41
3516_sk_scale_u8_sse41 LABEL PROC
3517  DB  72,173                              ; lods          %ds:(%rsi),%rax
3518  DB  72,139,0                            ; mov           (%rax),%rax
3519  DB  102,68,15,56,49,4,56                ; pmovzxbd      (%rax,%rdi,1),%xmm8
3520  DB  69,15,91,192                        ; cvtdq2ps      %xmm8,%xmm8
3521  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
3522  DB  102,68,15,110,200                   ; movd          %eax,%xmm9
3523  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
3524  DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
3525  DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
3526  DB  65,15,89,201                        ; mulps         %xmm9,%xmm1
3527  DB  65,15,89,209                        ; mulps         %xmm9,%xmm2
3528  DB  65,15,89,217                        ; mulps         %xmm9,%xmm3
3529  DB  72,173                              ; lods          %ds:(%rsi),%rax
3530  DB  255,224                             ; jmpq          *%rax
3531
3532PUBLIC _sk_lerp_1_float_sse41
3533_sk_lerp_1_float_sse41 LABEL PROC
3534  DB  72,173                              ; lods          %ds:(%rsi),%rax
3535  DB  243,68,15,16,0                      ; movss         (%rax),%xmm8
3536  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
3537  DB  15,92,196                           ; subps         %xmm4,%xmm0
3538  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
3539  DB  15,88,196                           ; addps         %xmm4,%xmm0
3540  DB  15,92,205                           ; subps         %xmm5,%xmm1
3541  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
3542  DB  15,88,205                           ; addps         %xmm5,%xmm1
3543  DB  15,92,214                           ; subps         %xmm6,%xmm2
3544  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
3545  DB  15,88,214                           ; addps         %xmm6,%xmm2
3546  DB  15,92,223                           ; subps         %xmm7,%xmm3
3547  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
3548  DB  15,88,223                           ; addps         %xmm7,%xmm3
3549  DB  72,173                              ; lods          %ds:(%rsi),%rax
3550  DB  255,224                             ; jmpq          *%rax
3551
3552PUBLIC _sk_lerp_u8_sse41
3553_sk_lerp_u8_sse41 LABEL PROC
3554  DB  72,173                              ; lods          %ds:(%rsi),%rax
3555  DB  72,139,0                            ; mov           (%rax),%rax
3556  DB  102,68,15,56,49,4,56                ; pmovzxbd      (%rax,%rdi,1),%xmm8
3557  DB  69,15,91,192                        ; cvtdq2ps      %xmm8,%xmm8
3558  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
3559  DB  102,68,15,110,200                   ; movd          %eax,%xmm9
3560  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
3561  DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
3562  DB  15,92,196                           ; subps         %xmm4,%xmm0
3563  DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
3564  DB  15,88,196                           ; addps         %xmm4,%xmm0
3565  DB  15,92,205                           ; subps         %xmm5,%xmm1
3566  DB  65,15,89,201                        ; mulps         %xmm9,%xmm1
3567  DB  15,88,205                           ; addps         %xmm5,%xmm1
3568  DB  15,92,214                           ; subps         %xmm6,%xmm2
3569  DB  65,15,89,209                        ; mulps         %xmm9,%xmm2
3570  DB  15,88,214                           ; addps         %xmm6,%xmm2
3571  DB  15,92,223                           ; subps         %xmm7,%xmm3
3572  DB  65,15,89,217                        ; mulps         %xmm9,%xmm3
3573  DB  15,88,223                           ; addps         %xmm7,%xmm3
3574  DB  72,173                              ; lods          %ds:(%rsi),%rax
3575  DB  255,224                             ; jmpq          *%rax
3576
3577PUBLIC _sk_lerp_565_sse41
3578_sk_lerp_565_sse41 LABEL PROC
3579  DB  72,173                              ; lods          %ds:(%rsi),%rax
3580  DB  72,139,0                            ; mov           (%rax),%rax
3581  DB  102,68,15,56,51,4,120               ; pmovzxwd      (%rax,%rdi,2),%xmm8
3582  DB  184,0,248,0,0                       ; mov           $0xf800,%eax
3583  DB  102,15,110,216                      ; movd          %eax,%xmm3
3584  DB  102,15,112,219,0                    ; pshufd        $0x0,%xmm3,%xmm3
3585  DB  102,65,15,219,216                   ; pand          %xmm8,%xmm3
3586  DB  68,15,91,203                        ; cvtdq2ps      %xmm3,%xmm9
3587  DB  184,8,33,132,55                     ; mov           $0x37842108,%eax
3588  DB  102,68,15,110,208                   ; movd          %eax,%xmm10
3589  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
3590  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
3591  DB  184,224,7,0,0                       ; mov           $0x7e0,%eax
3592  DB  102,15,110,216                      ; movd          %eax,%xmm3
3593  DB  102,15,112,219,0                    ; pshufd        $0x0,%xmm3,%xmm3
3594  DB  102,65,15,219,216                   ; pand          %xmm8,%xmm3
3595  DB  68,15,91,203                        ; cvtdq2ps      %xmm3,%xmm9
3596  DB  184,33,8,2,58                       ; mov           $0x3a020821,%eax
3597  DB  102,68,15,110,216                   ; movd          %eax,%xmm11
3598  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
3599  DB  69,15,89,217                        ; mulps         %xmm9,%xmm11
3600  DB  184,31,0,0,0                        ; mov           $0x1f,%eax
3601  DB  102,15,110,216                      ; movd          %eax,%xmm3
3602  DB  102,15,112,219,0                    ; pshufd        $0x0,%xmm3,%xmm3
3603  DB  102,65,15,219,216                   ; pand          %xmm8,%xmm3
3604  DB  68,15,91,195                        ; cvtdq2ps      %xmm3,%xmm8
3605  DB  184,8,33,4,61                       ; mov           $0x3d042108,%eax
3606  DB  102,15,110,216                      ; movd          %eax,%xmm3
3607  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
3608  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
3609  DB  15,92,196                           ; subps         %xmm4,%xmm0
3610  DB  65,15,89,194                        ; mulps         %xmm10,%xmm0
3611  DB  15,88,196                           ; addps         %xmm4,%xmm0
3612  DB  15,92,205                           ; subps         %xmm5,%xmm1
3613  DB  65,15,89,203                        ; mulps         %xmm11,%xmm1
3614  DB  15,88,205                           ; addps         %xmm5,%xmm1
3615  DB  15,92,214                           ; subps         %xmm6,%xmm2
3616  DB  15,89,211                           ; mulps         %xmm3,%xmm2
3617  DB  15,88,214                           ; addps         %xmm6,%xmm2
3618  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
3619  DB  102,15,110,216                      ; movd          %eax,%xmm3
3620  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
3621  DB  72,173                              ; lods          %ds:(%rsi),%rax
3622  DB  255,224                             ; jmpq          *%rax
3623
3624PUBLIC _sk_load_tables_sse41
3625_sk_load_tables_sse41 LABEL PROC
3626  DB  72,173                              ; lods          %ds:(%rsi),%rax
3627  DB  72,139,8                            ; mov           (%rax),%rcx
3628  DB  76,139,64,8                         ; mov           0x8(%rax),%r8
3629  DB  243,68,15,111,4,185                 ; movdqu        (%rcx,%rdi,4),%xmm8
3630  DB  185,255,0,0,0                       ; mov           $0xff,%ecx
3631  DB  102,15,110,193                      ; movd          %ecx,%xmm0
3632  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
3633  DB  102,65,15,111,200                   ; movdqa        %xmm8,%xmm1
3634  DB  102,15,114,209,8                    ; psrld         $0x8,%xmm1
3635  DB  102,15,219,200                      ; pand          %xmm0,%xmm1
3636  DB  102,65,15,111,208                   ; movdqa        %xmm8,%xmm2
3637  DB  102,15,114,210,16                   ; psrld         $0x10,%xmm2
3638  DB  102,15,219,208                      ; pand          %xmm0,%xmm2
3639  DB  102,65,15,219,192                   ; pand          %xmm8,%xmm0
3640  DB  102,72,15,58,22,193,1               ; pextrq        $0x1,%xmm0,%rcx
3641  DB  65,137,201                          ; mov           %ecx,%r9d
3642  DB  72,193,233,32                       ; shr           $0x20,%rcx
3643  DB  102,73,15,126,194                   ; movq          %xmm0,%r10
3644  DB  69,137,211                          ; mov           %r10d,%r11d
3645  DB  73,193,234,32                       ; shr           $0x20,%r10
3646  DB  243,67,15,16,4,152                  ; movss         (%r8,%r11,4),%xmm0
3647  DB  102,67,15,58,33,4,144,16            ; insertps      $0x10,(%r8,%r10,4),%xmm0
3648  DB  102,67,15,58,33,4,136,32            ; insertps      $0x20,(%r8,%r9,4),%xmm0
3649  DB  102,65,15,58,33,4,136,48            ; insertps      $0x30,(%r8,%rcx,4),%xmm0
3650  DB  76,139,64,16                        ; mov           0x10(%rax),%r8
3651  DB  102,73,15,58,22,202,1               ; pextrq        $0x1,%xmm1,%r10
3652  DB  77,137,209                          ; mov           %r10,%r9
3653  DB  73,193,233,32                       ; shr           $0x20,%r9
3654  DB  102,72,15,126,201                   ; movq          %xmm1,%rcx
3655  DB  65,137,203                          ; mov           %ecx,%r11d
3656  DB  65,129,227,255,255,255,0            ; and           $0xffffff,%r11d
3657  DB  72,193,233,30                       ; shr           $0x1e,%rcx
3658  DB  65,129,226,255,255,255,0            ; and           $0xffffff,%r10d
3659  DB  243,67,15,16,12,152                 ; movss         (%r8,%r11,4),%xmm1
3660  DB  102,65,15,58,33,12,8,16             ; insertps      $0x10,(%r8,%rcx,1),%xmm1
3661  DB  243,67,15,16,28,144                 ; movss         (%r8,%r10,4),%xmm3
3662  DB  102,15,58,33,203,32                 ; insertps      $0x20,%xmm3,%xmm1
3663  DB  243,67,15,16,28,136                 ; movss         (%r8,%r9,4),%xmm3
3664  DB  102,15,58,33,203,48                 ; insertps      $0x30,%xmm3,%xmm1
3665  DB  76,139,72,24                        ; mov           0x18(%rax),%r9
3666  DB  102,72,15,58,22,209,1               ; pextrq        $0x1,%xmm2,%rcx
3667  DB  68,15,183,193                       ; movzwl        %cx,%r8d
3668  DB  72,193,233,32                       ; shr           $0x20,%rcx
3669  DB  102,72,15,126,208                   ; movq          %xmm2,%rax
3670  DB  68,15,183,208                       ; movzwl        %ax,%r10d
3671  DB  72,193,232,30                       ; shr           $0x1e,%rax
3672  DB  243,67,15,16,20,145                 ; movss         (%r9,%r10,4),%xmm2
3673  DB  102,65,15,58,33,20,1,16             ; insertps      $0x10,(%r9,%rax,1),%xmm2
3674  DB  243,67,15,16,28,129                 ; movss         (%r9,%r8,4),%xmm3
3675  DB  102,15,58,33,211,32                 ; insertps      $0x20,%xmm3,%xmm2
3676  DB  243,65,15,16,28,137                 ; movss         (%r9,%rcx,4),%xmm3
3677  DB  102,15,58,33,211,48                 ; insertps      $0x30,%xmm3,%xmm2
3678  DB  102,65,15,114,208,24                ; psrld         $0x18,%xmm8
3679  DB  69,15,91,192                        ; cvtdq2ps      %xmm8,%xmm8
3680  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
3681  DB  102,15,110,216                      ; movd          %eax,%xmm3
3682  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
3683  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
3684  DB  72,173                              ; lods          %ds:(%rsi),%rax
3685  DB  255,224                             ; jmpq          *%rax
3686
3687PUBLIC _sk_load_a8_sse41
3688_sk_load_a8_sse41 LABEL PROC
3689  DB  72,173                              ; lods          %ds:(%rsi),%rax
3690  DB  72,139,0                            ; mov           (%rax),%rax
3691  DB  102,15,56,49,4,56                   ; pmovzxbd      (%rax,%rdi,1),%xmm0
3692  DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
3693  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
3694  DB  102,15,110,216                      ; movd          %eax,%xmm3
3695  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
3696  DB  15,89,216                           ; mulps         %xmm0,%xmm3
3697  DB  72,173                              ; lods          %ds:(%rsi),%rax
3698  DB  15,87,192                           ; xorps         %xmm0,%xmm0
3699  DB  15,87,201                           ; xorps         %xmm1,%xmm1
3700  DB  15,87,210                           ; xorps         %xmm2,%xmm2
3701  DB  255,224                             ; jmpq          *%rax
3702
3703PUBLIC _sk_store_a8_sse41
3704_sk_store_a8_sse41 LABEL PROC
3705  DB  72,173                              ; lods          %ds:(%rsi),%rax
3706  DB  72,139,0                            ; mov           (%rax),%rax
3707  DB  185,0,0,127,67                      ; mov           $0x437f0000,%ecx
3708  DB  102,68,15,110,193                   ; movd          %ecx,%xmm8
3709  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
3710  DB  68,15,89,195                        ; mulps         %xmm3,%xmm8
3711  DB  102,69,15,91,192                    ; cvtps2dq      %xmm8,%xmm8
3712  DB  102,69,15,56,43,192                 ; packusdw      %xmm8,%xmm8
3713  DB  102,69,15,103,192                   ; packuswb      %xmm8,%xmm8
3714  DB  102,68,15,126,4,56                  ; movd          %xmm8,(%rax,%rdi,1)
3715  DB  72,173                              ; lods          %ds:(%rsi),%rax
3716  DB  255,224                             ; jmpq          *%rax
3717
3718PUBLIC _sk_load_565_sse41
3719_sk_load_565_sse41 LABEL PROC
3720  DB  72,173                              ; lods          %ds:(%rsi),%rax
3721  DB  72,139,0                            ; mov           (%rax),%rax
3722  DB  102,15,56,51,20,120                 ; pmovzxwd      (%rax,%rdi,2),%xmm2
3723  DB  184,0,248,0,0                       ; mov           $0xf800,%eax
3724  DB  102,15,110,192                      ; movd          %eax,%xmm0
3725  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
3726  DB  102,15,219,194                      ; pand          %xmm2,%xmm0
3727  DB  15,91,200                           ; cvtdq2ps      %xmm0,%xmm1
3728  DB  184,8,33,132,55                     ; mov           $0x37842108,%eax
3729  DB  102,15,110,192                      ; movd          %eax,%xmm0
3730  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
3731  DB  15,89,193                           ; mulps         %xmm1,%xmm0
3732  DB  184,224,7,0,0                       ; mov           $0x7e0,%eax
3733  DB  102,15,110,200                      ; movd          %eax,%xmm1
3734  DB  102,15,112,201,0                    ; pshufd        $0x0,%xmm1,%xmm1
3735  DB  102,15,219,202                      ; pand          %xmm2,%xmm1
3736  DB  15,91,217                           ; cvtdq2ps      %xmm1,%xmm3
3737  DB  184,33,8,2,58                       ; mov           $0x3a020821,%eax
3738  DB  102,15,110,200                      ; movd          %eax,%xmm1
3739  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
3740  DB  15,89,203                           ; mulps         %xmm3,%xmm1
3741  DB  184,31,0,0,0                        ; mov           $0x1f,%eax
3742  DB  102,15,110,216                      ; movd          %eax,%xmm3
3743  DB  102,15,112,219,0                    ; pshufd        $0x0,%xmm3,%xmm3
3744  DB  102,15,219,218                      ; pand          %xmm2,%xmm3
3745  DB  15,91,219                           ; cvtdq2ps      %xmm3,%xmm3
3746  DB  184,8,33,4,61                       ; mov           $0x3d042108,%eax
3747  DB  102,15,110,208                      ; movd          %eax,%xmm2
3748  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
3749  DB  15,89,211                           ; mulps         %xmm3,%xmm2
3750  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
3751  DB  102,15,110,216                      ; movd          %eax,%xmm3
3752  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
3753  DB  72,173                              ; lods          %ds:(%rsi),%rax
3754  DB  255,224                             ; jmpq          *%rax
3755
3756PUBLIC _sk_store_565_sse41
3757_sk_store_565_sse41 LABEL PROC
3758  DB  72,173                              ; lods          %ds:(%rsi),%rax
3759  DB  72,139,0                            ; mov           (%rax),%rax
3760  DB  185,0,0,248,65                      ; mov           $0x41f80000,%ecx
3761  DB  102,68,15,110,193                   ; movd          %ecx,%xmm8
3762  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
3763  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
3764  DB  68,15,89,200                        ; mulps         %xmm0,%xmm9
3765  DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
3766  DB  102,65,15,114,241,11                ; pslld         $0xb,%xmm9
3767  DB  185,0,0,124,66                      ; mov           $0x427c0000,%ecx
3768  DB  102,68,15,110,209                   ; movd          %ecx,%xmm10
3769  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
3770  DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
3771  DB  102,69,15,91,210                    ; cvtps2dq      %xmm10,%xmm10
3772  DB  102,65,15,114,242,5                 ; pslld         $0x5,%xmm10
3773  DB  102,69,15,235,209                   ; por           %xmm9,%xmm10
3774  DB  68,15,89,194                        ; mulps         %xmm2,%xmm8
3775  DB  102,69,15,91,192                    ; cvtps2dq      %xmm8,%xmm8
3776  DB  102,69,15,86,194                    ; orpd          %xmm10,%xmm8
3777  DB  102,69,15,56,43,192                 ; packusdw      %xmm8,%xmm8
3778  DB  102,68,15,214,4,120                 ; movq          %xmm8,(%rax,%rdi,2)
3779  DB  72,173                              ; lods          %ds:(%rsi),%rax
3780  DB  255,224                             ; jmpq          *%rax
3781
3782PUBLIC _sk_load_8888_sse41
3783_sk_load_8888_sse41 LABEL PROC
3784  DB  72,173                              ; lods          %ds:(%rsi),%rax
3785  DB  72,139,0                            ; mov           (%rax),%rax
3786  DB  243,15,111,28,184                   ; movdqu        (%rax,%rdi,4),%xmm3
3787  DB  184,255,0,0,0                       ; mov           $0xff,%eax
3788  DB  102,15,110,192                      ; movd          %eax,%xmm0
3789  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
3790  DB  102,15,111,203                      ; movdqa        %xmm3,%xmm1
3791  DB  102,15,114,209,8                    ; psrld         $0x8,%xmm1
3792  DB  102,15,219,200                      ; pand          %xmm0,%xmm1
3793  DB  102,15,111,211                      ; movdqa        %xmm3,%xmm2
3794  DB  102,15,114,210,16                   ; psrld         $0x10,%xmm2
3795  DB  102,15,219,208                      ; pand          %xmm0,%xmm2
3796  DB  102,15,219,195                      ; pand          %xmm3,%xmm0
3797  DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
3798  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
3799  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
3800  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
3801  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
3802  DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
3803  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
3804  DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
3805  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
3806  DB  102,15,114,211,24                   ; psrld         $0x18,%xmm3
3807  DB  15,91,219                           ; cvtdq2ps      %xmm3,%xmm3
3808  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
3809  DB  72,173                              ; lods          %ds:(%rsi),%rax
3810  DB  255,224                             ; jmpq          *%rax
3811
3812PUBLIC _sk_store_8888_sse41
3813_sk_store_8888_sse41 LABEL PROC
3814  DB  72,173                              ; lods          %ds:(%rsi),%rax
3815  DB  72,139,0                            ; mov           (%rax),%rax
3816  DB  185,0,0,127,67                      ; mov           $0x437f0000,%ecx
3817  DB  102,68,15,110,193                   ; movd          %ecx,%xmm8
3818  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
3819  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
3820  DB  68,15,89,200                        ; mulps         %xmm0,%xmm9
3821  DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
3822  DB  69,15,40,208                        ; movaps        %xmm8,%xmm10
3823  DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
3824  DB  102,69,15,91,210                    ; cvtps2dq      %xmm10,%xmm10
3825  DB  102,65,15,114,242,8                 ; pslld         $0x8,%xmm10
3826  DB  102,69,15,235,209                   ; por           %xmm9,%xmm10
3827  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
3828  DB  68,15,89,202                        ; mulps         %xmm2,%xmm9
3829  DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
3830  DB  102,65,15,114,241,16                ; pslld         $0x10,%xmm9
3831  DB  68,15,89,195                        ; mulps         %xmm3,%xmm8
3832  DB  102,69,15,91,192                    ; cvtps2dq      %xmm8,%xmm8
3833  DB  102,65,15,114,240,24                ; pslld         $0x18,%xmm8
3834  DB  102,69,15,235,193                   ; por           %xmm9,%xmm8
3835  DB  102,69,15,235,194                   ; por           %xmm10,%xmm8
3836  DB  243,68,15,127,4,184                 ; movdqu        %xmm8,(%rax,%rdi,4)
3837  DB  72,173                              ; lods          %ds:(%rsi),%rax
3838  DB  255,224                             ; jmpq          *%rax
3839
3840PUBLIC _sk_load_f16_sse41
3841_sk_load_f16_sse41 LABEL PROC
3842  DB  72,173                              ; lods          %ds:(%rsi),%rax
3843  DB  72,139,0                            ; mov           (%rax),%rax
3844  DB  243,15,111,4,248                    ; movdqu        (%rax,%rdi,8),%xmm0
3845  DB  243,15,111,76,248,16                ; movdqu        0x10(%rax,%rdi,8),%xmm1
3846  DB  102,15,111,208                      ; movdqa        %xmm0,%xmm2
3847  DB  102,15,97,209                       ; punpcklwd     %xmm1,%xmm2
3848  DB  102,15,105,193                      ; punpckhwd     %xmm1,%xmm0
3849  DB  102,68,15,111,194                   ; movdqa        %xmm2,%xmm8
3850  DB  102,68,15,97,192                    ; punpcklwd     %xmm0,%xmm8
3851  DB  102,15,105,208                      ; punpckhwd     %xmm0,%xmm2
3852  DB  184,0,4,0,4                         ; mov           $0x4000400,%eax
3853  DB  102,15,110,192                      ; movd          %eax,%xmm0
3854  DB  102,15,112,216,0                    ; pshufd        $0x0,%xmm0,%xmm3
3855  DB  102,15,111,203                      ; movdqa        %xmm3,%xmm1
3856  DB  102,65,15,101,200                   ; pcmpgtw       %xmm8,%xmm1
3857  DB  102,65,15,223,200                   ; pandn         %xmm8,%xmm1
3858  DB  102,15,101,218                      ; pcmpgtw       %xmm2,%xmm3
3859  DB  102,15,223,218                      ; pandn         %xmm2,%xmm3
3860  DB  102,15,56,51,193                    ; pmovzxwd      %xmm1,%xmm0
3861  DB  102,15,114,240,13                   ; pslld         $0xd,%xmm0
3862  DB  184,0,0,128,119                     ; mov           $0x77800000,%eax
3863  DB  102,15,110,208                      ; movd          %eax,%xmm2
3864  DB  102,68,15,112,194,0                 ; pshufd        $0x0,%xmm2,%xmm8
3865  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
3866  DB  102,69,15,239,201                   ; pxor          %xmm9,%xmm9
3867  DB  102,65,15,105,201                   ; punpckhwd     %xmm9,%xmm1
3868  DB  102,15,114,241,13                   ; pslld         $0xd,%xmm1
3869  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
3870  DB  102,15,56,51,211                    ; pmovzxwd      %xmm3,%xmm2
3871  DB  102,15,114,242,13                   ; pslld         $0xd,%xmm2
3872  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
3873  DB  102,65,15,105,217                   ; punpckhwd     %xmm9,%xmm3
3874  DB  102,15,114,243,13                   ; pslld         $0xd,%xmm3
3875  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
3876  DB  72,173                              ; lods          %ds:(%rsi),%rax
3877  DB  255,224                             ; jmpq          *%rax
3878
3879PUBLIC _sk_store_f16_sse41
3880_sk_store_f16_sse41 LABEL PROC
3881  DB  72,173                              ; lods          %ds:(%rsi),%rax
3882  DB  72,139,0                            ; mov           (%rax),%rax
3883  DB  185,0,0,128,7                       ; mov           $0x7800000,%ecx
3884  DB  102,68,15,110,193                   ; movd          %ecx,%xmm8
3885  DB  102,69,15,112,192,0                 ; pshufd        $0x0,%xmm8,%xmm8
3886  DB  102,69,15,111,200                   ; movdqa        %xmm8,%xmm9
3887  DB  68,15,89,200                        ; mulps         %xmm0,%xmm9
3888  DB  102,65,15,114,209,13                ; psrld         $0xd,%xmm9
3889  DB  102,69,15,111,208                   ; movdqa        %xmm8,%xmm10
3890  DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
3891  DB  102,65,15,114,210,13                ; psrld         $0xd,%xmm10
3892  DB  102,69,15,111,216                   ; movdqa        %xmm8,%xmm11
3893  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
3894  DB  102,65,15,114,211,13                ; psrld         $0xd,%xmm11
3895  DB  68,15,89,195                        ; mulps         %xmm3,%xmm8
3896  DB  102,65,15,114,208,13                ; psrld         $0xd,%xmm8
3897  DB  102,65,15,115,250,2                 ; pslldq        $0x2,%xmm10
3898  DB  102,69,15,235,209                   ; por           %xmm9,%xmm10
3899  DB  102,65,15,115,248,2                 ; pslldq        $0x2,%xmm8
3900  DB  102,69,15,235,195                   ; por           %xmm11,%xmm8
3901  DB  102,69,15,111,202                   ; movdqa        %xmm10,%xmm9
3902  DB  102,69,15,98,200                    ; punpckldq     %xmm8,%xmm9
3903  DB  243,68,15,127,12,248                ; movdqu        %xmm9,(%rax,%rdi,8)
3904  DB  102,69,15,106,208                   ; punpckhdq     %xmm8,%xmm10
3905  DB  243,68,15,127,84,248,16             ; movdqu        %xmm10,0x10(%rax,%rdi,8)
3906  DB  72,173                              ; lods          %ds:(%rsi),%rax
3907  DB  255,224                             ; jmpq          *%rax
3908
3909PUBLIC _sk_store_f32_sse41
3910_sk_store_f32_sse41 LABEL PROC
3911  DB  72,173                              ; lods          %ds:(%rsi),%rax
3912  DB  72,139,0                            ; mov           (%rax),%rax
3913  DB  72,137,249                          ; mov           %rdi,%rcx
3914  DB  72,193,225,4                        ; shl           $0x4,%rcx
3915  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
3916  DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
3917  DB  68,15,20,201                        ; unpcklps      %xmm1,%xmm9
3918  DB  68,15,40,210                        ; movaps        %xmm2,%xmm10
3919  DB  68,15,40,218                        ; movaps        %xmm2,%xmm11
3920  DB  68,15,20,219                        ; unpcklps      %xmm3,%xmm11
3921  DB  68,15,21,193                        ; unpckhps      %xmm1,%xmm8
3922  DB  68,15,21,211                        ; unpckhps      %xmm3,%xmm10
3923  DB  69,15,40,225                        ; movaps        %xmm9,%xmm12
3924  DB  102,69,15,20,227                    ; unpcklpd      %xmm11,%xmm12
3925  DB  69,15,18,217                        ; movhlps       %xmm9,%xmm11
3926  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
3927  DB  102,69,15,20,202                    ; unpcklpd      %xmm10,%xmm9
3928  DB  69,15,18,208                        ; movhlps       %xmm8,%xmm10
3929  DB  102,68,15,17,36,8                   ; movupd        %xmm12,(%rax,%rcx,1)
3930  DB  68,15,17,92,8,16                    ; movups        %xmm11,0x10(%rax,%rcx,1)
3931  DB  102,68,15,17,76,8,32                ; movupd        %xmm9,0x20(%rax,%rcx,1)
3932  DB  68,15,17,84,8,48                    ; movups        %xmm10,0x30(%rax,%rcx,1)
3933  DB  72,173                              ; lods          %ds:(%rsi),%rax
3934  DB  255,224                             ; jmpq          *%rax
3935
3936PUBLIC _sk_clamp_x_sse41
3937_sk_clamp_x_sse41 LABEL PROC
3938  DB  72,173                              ; lods          %ds:(%rsi),%rax
3939  DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
3940  DB  68,15,95,192                        ; maxps         %xmm0,%xmm8
3941  DB  243,68,15,16,8                      ; movss         (%rax),%xmm9
3942  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
3943  DB  102,15,118,192                      ; pcmpeqd       %xmm0,%xmm0
3944  DB  102,65,15,254,193                   ; paddd         %xmm9,%xmm0
3945  DB  68,15,93,192                        ; minps         %xmm0,%xmm8
3946  DB  72,173                              ; lods          %ds:(%rsi),%rax
3947  DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
3948  DB  255,224                             ; jmpq          *%rax
3949
3950PUBLIC _sk_clamp_y_sse41
3951_sk_clamp_y_sse41 LABEL PROC
3952  DB  72,173                              ; lods          %ds:(%rsi),%rax
3953  DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
3954  DB  68,15,95,193                        ; maxps         %xmm1,%xmm8
3955  DB  243,68,15,16,8                      ; movss         (%rax),%xmm9
3956  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
3957  DB  102,15,118,201                      ; pcmpeqd       %xmm1,%xmm1
3958  DB  102,65,15,254,201                   ; paddd         %xmm9,%xmm1
3959  DB  68,15,93,193                        ; minps         %xmm1,%xmm8
3960  DB  72,173                              ; lods          %ds:(%rsi),%rax
3961  DB  65,15,40,200                        ; movaps        %xmm8,%xmm1
3962  DB  255,224                             ; jmpq          *%rax
3963
3964PUBLIC _sk_repeat_x_sse41
3965_sk_repeat_x_sse41 LABEL PROC
3966  DB  72,173                              ; lods          %ds:(%rsi),%rax
3967  DB  243,68,15,16,0                      ; movss         (%rax),%xmm8
3968  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
3969  DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
3970  DB  69,15,94,200                        ; divps         %xmm8,%xmm9
3971  DB  102,69,15,58,8,201,1                ; roundps       $0x1,%xmm9,%xmm9
3972  DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
3973  DB  65,15,92,193                        ; subps         %xmm9,%xmm0
3974  DB  102,69,15,118,201                   ; pcmpeqd       %xmm9,%xmm9
3975  DB  102,69,15,254,200                   ; paddd         %xmm8,%xmm9
3976  DB  65,15,93,193                        ; minps         %xmm9,%xmm0
3977  DB  72,173                              ; lods          %ds:(%rsi),%rax
3978  DB  255,224                             ; jmpq          *%rax
3979
3980PUBLIC _sk_repeat_y_sse41
3981_sk_repeat_y_sse41 LABEL PROC
3982  DB  72,173                              ; lods          %ds:(%rsi),%rax
3983  DB  243,68,15,16,0                      ; movss         (%rax),%xmm8
3984  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
3985  DB  68,15,40,201                        ; movaps        %xmm1,%xmm9
3986  DB  69,15,94,200                        ; divps         %xmm8,%xmm9
3987  DB  102,69,15,58,8,201,1                ; roundps       $0x1,%xmm9,%xmm9
3988  DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
3989  DB  65,15,92,201                        ; subps         %xmm9,%xmm1
3990  DB  102,69,15,118,201                   ; pcmpeqd       %xmm9,%xmm9
3991  DB  102,69,15,254,200                   ; paddd         %xmm8,%xmm9
3992  DB  65,15,93,201                        ; minps         %xmm9,%xmm1
3993  DB  72,173                              ; lods          %ds:(%rsi),%rax
3994  DB  255,224                             ; jmpq          *%rax
3995
3996PUBLIC _sk_mirror_x_sse41
3997_sk_mirror_x_sse41 LABEL PROC
3998  DB  72,173                              ; lods          %ds:(%rsi),%rax
3999  DB  243,68,15,16,0                      ; movss         (%rax),%xmm8
4000  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
4001  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
4002  DB  65,15,92,193                        ; subps         %xmm9,%xmm0
4003  DB  243,69,15,88,192                    ; addss         %xmm8,%xmm8
4004  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
4005  DB  68,15,40,208                        ; movaps        %xmm0,%xmm10
4006  DB  69,15,94,208                        ; divps         %xmm8,%xmm10
4007  DB  102,69,15,58,8,210,1                ; roundps       $0x1,%xmm10,%xmm10
4008  DB  69,15,89,208                        ; mulps         %xmm8,%xmm10
4009  DB  65,15,92,194                        ; subps         %xmm10,%xmm0
4010  DB  65,15,92,193                        ; subps         %xmm9,%xmm0
4011  DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
4012  DB  68,15,92,192                        ; subps         %xmm0,%xmm8
4013  DB  65,15,84,192                        ; andps         %xmm8,%xmm0
4014  DB  102,69,15,118,192                   ; pcmpeqd       %xmm8,%xmm8
4015  DB  102,69,15,254,193                   ; paddd         %xmm9,%xmm8
4016  DB  65,15,93,192                        ; minps         %xmm8,%xmm0
4017  DB  72,173                              ; lods          %ds:(%rsi),%rax
4018  DB  255,224                             ; jmpq          *%rax
4019
4020PUBLIC _sk_mirror_y_sse41
4021_sk_mirror_y_sse41 LABEL PROC
4022  DB  72,173                              ; lods          %ds:(%rsi),%rax
4023  DB  243,68,15,16,0                      ; movss         (%rax),%xmm8
4024  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
4025  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
4026  DB  65,15,92,201                        ; subps         %xmm9,%xmm1
4027  DB  243,69,15,88,192                    ; addss         %xmm8,%xmm8
4028  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
4029  DB  68,15,40,209                        ; movaps        %xmm1,%xmm10
4030  DB  69,15,94,208                        ; divps         %xmm8,%xmm10
4031  DB  102,69,15,58,8,210,1                ; roundps       $0x1,%xmm10,%xmm10
4032  DB  69,15,89,208                        ; mulps         %xmm8,%xmm10
4033  DB  65,15,92,202                        ; subps         %xmm10,%xmm1
4034  DB  65,15,92,201                        ; subps         %xmm9,%xmm1
4035  DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
4036  DB  68,15,92,193                        ; subps         %xmm1,%xmm8
4037  DB  65,15,84,200                        ; andps         %xmm8,%xmm1
4038  DB  102,69,15,118,192                   ; pcmpeqd       %xmm8,%xmm8
4039  DB  102,69,15,254,193                   ; paddd         %xmm9,%xmm8
4040  DB  65,15,93,200                        ; minps         %xmm8,%xmm1
4041  DB  72,173                              ; lods          %ds:(%rsi),%rax
4042  DB  255,224                             ; jmpq          *%rax
4043
4044PUBLIC _sk_luminance_to_alpha_sse41
4045_sk_luminance_to_alpha_sse41 LABEL PROC
4046  DB  184,208,179,89,62                   ; mov           $0x3e59b3d0,%eax
4047  DB  102,15,110,216                      ; movd          %eax,%xmm3
4048  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
4049  DB  15,89,216                           ; mulps         %xmm0,%xmm3
4050  DB  184,89,23,55,63                     ; mov           $0x3f371759,%eax
4051  DB  102,15,110,192                      ; movd          %eax,%xmm0
4052  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
4053  DB  15,89,193                           ; mulps         %xmm1,%xmm0
4054  DB  15,88,195                           ; addps         %xmm3,%xmm0
4055  DB  184,152,221,147,61                  ; mov           $0x3d93dd98,%eax
4056  DB  102,15,110,216                      ; movd          %eax,%xmm3
4057  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
4058  DB  15,89,218                           ; mulps         %xmm2,%xmm3
4059  DB  15,88,216                           ; addps         %xmm0,%xmm3
4060  DB  72,173                              ; lods          %ds:(%rsi),%rax
4061  DB  15,87,192                           ; xorps         %xmm0,%xmm0
4062  DB  15,87,201                           ; xorps         %xmm1,%xmm1
4063  DB  15,87,210                           ; xorps         %xmm2,%xmm2
4064  DB  255,224                             ; jmpq          *%rax
4065
4066PUBLIC _sk_matrix_2x3_sse41
4067_sk_matrix_2x3_sse41 LABEL PROC
4068  DB  68,15,40,201                        ; movaps        %xmm1,%xmm9
4069  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
4070  DB  72,173                              ; lods          %ds:(%rsi),%rax
4071  DB  243,15,16,0                         ; movss         (%rax),%xmm0
4072  DB  243,15,16,72,4                      ; movss         0x4(%rax),%xmm1
4073  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
4074  DB  243,68,15,16,80,8                   ; movss         0x8(%rax),%xmm10
4075  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
4076  DB  243,68,15,16,88,16                  ; movss         0x10(%rax),%xmm11
4077  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
4078  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
4079  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
4080  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
4081  DB  65,15,88,194                        ; addps         %xmm10,%xmm0
4082  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
4083  DB  243,68,15,16,80,12                  ; movss         0xc(%rax),%xmm10
4084  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
4085  DB  243,68,15,16,88,20                  ; movss         0x14(%rax),%xmm11
4086  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
4087  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
4088  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
4089  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
4090  DB  65,15,88,202                        ; addps         %xmm10,%xmm1
4091  DB  72,173                              ; lods          %ds:(%rsi),%rax
4092  DB  255,224                             ; jmpq          *%rax
4093
4094PUBLIC _sk_matrix_3x4_sse41
4095_sk_matrix_3x4_sse41 LABEL PROC
4096  DB  68,15,40,201                        ; movaps        %xmm1,%xmm9
4097  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
4098  DB  72,173                              ; lods          %ds:(%rsi),%rax
4099  DB  243,15,16,0                         ; movss         (%rax),%xmm0
4100  DB  243,15,16,72,4                      ; movss         0x4(%rax),%xmm1
4101  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
4102  DB  243,68,15,16,80,12                  ; movss         0xc(%rax),%xmm10
4103  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
4104  DB  243,68,15,16,88,24                  ; movss         0x18(%rax),%xmm11
4105  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
4106  DB  243,68,15,16,96,36                  ; movss         0x24(%rax),%xmm12
4107  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
4108  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
4109  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
4110  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
4111  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
4112  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
4113  DB  65,15,88,194                        ; addps         %xmm10,%xmm0
4114  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
4115  DB  243,68,15,16,80,16                  ; movss         0x10(%rax),%xmm10
4116  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
4117  DB  243,68,15,16,88,28                  ; movss         0x1c(%rax),%xmm11
4118  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
4119  DB  243,68,15,16,96,40                  ; movss         0x28(%rax),%xmm12
4120  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
4121  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
4122  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
4123  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
4124  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
4125  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
4126  DB  65,15,88,202                        ; addps         %xmm10,%xmm1
4127  DB  243,68,15,16,80,8                   ; movss         0x8(%rax),%xmm10
4128  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
4129  DB  243,68,15,16,88,20                  ; movss         0x14(%rax),%xmm11
4130  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
4131  DB  243,68,15,16,96,32                  ; movss         0x20(%rax),%xmm12
4132  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
4133  DB  243,68,15,16,104,44                 ; movss         0x2c(%rax),%xmm13
4134  DB  69,15,198,237,0                     ; shufps        $0x0,%xmm13,%xmm13
4135  DB  68,15,89,226                        ; mulps         %xmm2,%xmm12
4136  DB  69,15,88,229                        ; addps         %xmm13,%xmm12
4137  DB  69,15,89,217                        ; mulps         %xmm9,%xmm11
4138  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
4139  DB  69,15,89,208                        ; mulps         %xmm8,%xmm10
4140  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
4141  DB  72,173                              ; lods          %ds:(%rsi),%rax
4142  DB  65,15,40,210                        ; movaps        %xmm10,%xmm2
4143  DB  255,224                             ; jmpq          *%rax
4144
4145PUBLIC _sk_matrix_4x5_sse41
4146_sk_matrix_4x5_sse41 LABEL PROC
4147  DB  68,15,40,201                        ; movaps        %xmm1,%xmm9
4148  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
4149  DB  72,173                              ; lods          %ds:(%rsi),%rax
4150  DB  243,15,16,0                         ; movss         (%rax),%xmm0
4151  DB  243,15,16,72,4                      ; movss         0x4(%rax),%xmm1
4152  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
4153  DB  243,68,15,16,80,16                  ; movss         0x10(%rax),%xmm10
4154  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
4155  DB  243,68,15,16,88,32                  ; movss         0x20(%rax),%xmm11
4156  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
4157  DB  243,68,15,16,96,48                  ; movss         0x30(%rax),%xmm12
4158  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
4159  DB  243,68,15,16,104,64                 ; movss         0x40(%rax),%xmm13
4160  DB  69,15,198,237,0                     ; shufps        $0x0,%xmm13,%xmm13
4161  DB  68,15,89,227                        ; mulps         %xmm3,%xmm12
4162  DB  69,15,88,229                        ; addps         %xmm13,%xmm12
4163  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
4164  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
4165  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
4166  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
4167  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
4168  DB  65,15,88,194                        ; addps         %xmm10,%xmm0
4169  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
4170  DB  243,68,15,16,80,20                  ; movss         0x14(%rax),%xmm10
4171  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
4172  DB  243,68,15,16,88,36                  ; movss         0x24(%rax),%xmm11
4173  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
4174  DB  243,68,15,16,96,52                  ; movss         0x34(%rax),%xmm12
4175  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
4176  DB  243,68,15,16,104,68                 ; movss         0x44(%rax),%xmm13
4177  DB  69,15,198,237,0                     ; shufps        $0x0,%xmm13,%xmm13
4178  DB  68,15,89,227                        ; mulps         %xmm3,%xmm12
4179  DB  69,15,88,229                        ; addps         %xmm13,%xmm12
4180  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
4181  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
4182  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
4183  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
4184  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
4185  DB  65,15,88,202                        ; addps         %xmm10,%xmm1
4186  DB  243,68,15,16,80,8                   ; movss         0x8(%rax),%xmm10
4187  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
4188  DB  243,68,15,16,88,24                  ; movss         0x18(%rax),%xmm11
4189  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
4190  DB  243,68,15,16,96,40                  ; movss         0x28(%rax),%xmm12
4191  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
4192  DB  243,68,15,16,104,56                 ; movss         0x38(%rax),%xmm13
4193  DB  69,15,198,237,0                     ; shufps        $0x0,%xmm13,%xmm13
4194  DB  243,68,15,16,112,72                 ; movss         0x48(%rax),%xmm14
4195  DB  69,15,198,246,0                     ; shufps        $0x0,%xmm14,%xmm14
4196  DB  68,15,89,235                        ; mulps         %xmm3,%xmm13
4197  DB  69,15,88,238                        ; addps         %xmm14,%xmm13
4198  DB  68,15,89,226                        ; mulps         %xmm2,%xmm12
4199  DB  69,15,88,229                        ; addps         %xmm13,%xmm12
4200  DB  69,15,89,217                        ; mulps         %xmm9,%xmm11
4201  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
4202  DB  69,15,89,208                        ; mulps         %xmm8,%xmm10
4203  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
4204  DB  243,68,15,16,88,12                  ; movss         0xc(%rax),%xmm11
4205  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
4206  DB  243,68,15,16,96,28                  ; movss         0x1c(%rax),%xmm12
4207  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
4208  DB  243,68,15,16,104,44                 ; movss         0x2c(%rax),%xmm13
4209  DB  69,15,198,237,0                     ; shufps        $0x0,%xmm13,%xmm13
4210  DB  243,68,15,16,112,60                 ; movss         0x3c(%rax),%xmm14
4211  DB  69,15,198,246,0                     ; shufps        $0x0,%xmm14,%xmm14
4212  DB  243,68,15,16,120,76                 ; movss         0x4c(%rax),%xmm15
4213  DB  69,15,198,255,0                     ; shufps        $0x0,%xmm15,%xmm15
4214  DB  68,15,89,243                        ; mulps         %xmm3,%xmm14
4215  DB  69,15,88,247                        ; addps         %xmm15,%xmm14
4216  DB  68,15,89,234                        ; mulps         %xmm2,%xmm13
4217  DB  69,15,88,238                        ; addps         %xmm14,%xmm13
4218  DB  69,15,89,225                        ; mulps         %xmm9,%xmm12
4219  DB  69,15,88,229                        ; addps         %xmm13,%xmm12
4220  DB  69,15,89,216                        ; mulps         %xmm8,%xmm11
4221  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
4222  DB  72,173                              ; lods          %ds:(%rsi),%rax
4223  DB  65,15,40,210                        ; movaps        %xmm10,%xmm2
4224  DB  65,15,40,219                        ; movaps        %xmm11,%xmm3
4225  DB  255,224                             ; jmpq          *%rax
4226
4227PUBLIC _sk_matrix_perspective_sse41
4228_sk_matrix_perspective_sse41 LABEL PROC
4229  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
4230  DB  72,173                              ; lods          %ds:(%rsi),%rax
4231  DB  243,15,16,0                         ; movss         (%rax),%xmm0
4232  DB  243,68,15,16,72,4                   ; movss         0x4(%rax),%xmm9
4233  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
4234  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
4235  DB  243,68,15,16,80,8                   ; movss         0x8(%rax),%xmm10
4236  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
4237  DB  68,15,89,201                        ; mulps         %xmm1,%xmm9
4238  DB  69,15,88,202                        ; addps         %xmm10,%xmm9
4239  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
4240  DB  65,15,88,193                        ; addps         %xmm9,%xmm0
4241  DB  243,68,15,16,72,12                  ; movss         0xc(%rax),%xmm9
4242  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
4243  DB  243,68,15,16,80,16                  ; movss         0x10(%rax),%xmm10
4244  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
4245  DB  243,68,15,16,88,20                  ; movss         0x14(%rax),%xmm11
4246  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
4247  DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
4248  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
4249  DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
4250  DB  69,15,88,202                        ; addps         %xmm10,%xmm9
4251  DB  243,68,15,16,80,24                  ; movss         0x18(%rax),%xmm10
4252  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
4253  DB  243,68,15,16,88,28                  ; movss         0x1c(%rax),%xmm11
4254  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
4255  DB  243,68,15,16,96,32                  ; movss         0x20(%rax),%xmm12
4256  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
4257  DB  68,15,89,217                        ; mulps         %xmm1,%xmm11
4258  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
4259  DB  69,15,89,208                        ; mulps         %xmm8,%xmm10
4260  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
4261  DB  65,15,83,202                        ; rcpps         %xmm10,%xmm1
4262  DB  15,89,193                           ; mulps         %xmm1,%xmm0
4263  DB  68,15,89,201                        ; mulps         %xmm1,%xmm9
4264  DB  72,173                              ; lods          %ds:(%rsi),%rax
4265  DB  65,15,40,201                        ; movaps        %xmm9,%xmm1
4266  DB  255,224                             ; jmpq          *%rax
4267
4268PUBLIC _sk_linear_gradient_2stops_sse41
4269_sk_linear_gradient_2stops_sse41 LABEL PROC
4270  DB  72,173                              ; lods          %ds:(%rsi),%rax
4271  DB  68,15,16,8                          ; movups        (%rax),%xmm9
4272  DB  15,16,88,16                         ; movups        0x10(%rax),%xmm3
4273  DB  68,15,40,195                        ; movaps        %xmm3,%xmm8
4274  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
4275  DB  65,15,40,201                        ; movaps        %xmm9,%xmm1
4276  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
4277  DB  68,15,89,192                        ; mulps         %xmm0,%xmm8
4278  DB  68,15,88,193                        ; addps         %xmm1,%xmm8
4279  DB  15,40,203                           ; movaps        %xmm3,%xmm1
4280  DB  15,198,201,85                       ; shufps        $0x55,%xmm1,%xmm1
4281  DB  65,15,40,209                        ; movaps        %xmm9,%xmm2
4282  DB  15,198,210,85                       ; shufps        $0x55,%xmm2,%xmm2
4283  DB  15,89,200                           ; mulps         %xmm0,%xmm1
4284  DB  15,88,202                           ; addps         %xmm2,%xmm1
4285  DB  15,40,211                           ; movaps        %xmm3,%xmm2
4286  DB  15,198,210,170                      ; shufps        $0xaa,%xmm2,%xmm2
4287  DB  69,15,40,209                        ; movaps        %xmm9,%xmm10
4288  DB  69,15,198,210,170                   ; shufps        $0xaa,%xmm10,%xmm10
4289  DB  15,89,208                           ; mulps         %xmm0,%xmm2
4290  DB  65,15,88,210                        ; addps         %xmm10,%xmm2
4291  DB  15,198,219,255                      ; shufps        $0xff,%xmm3,%xmm3
4292  DB  69,15,198,201,255                   ; shufps        $0xff,%xmm9,%xmm9
4293  DB  15,89,216                           ; mulps         %xmm0,%xmm3
4294  DB  65,15,88,217                        ; addps         %xmm9,%xmm3
4295  DB  72,173                              ; lods          %ds:(%rsi),%rax
4296  DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
4297  DB  255,224                             ; jmpq          *%rax
4298
4299PUBLIC _sk_start_pipeline_sse2
4300_sk_start_pipeline_sse2 LABEL PROC
4301  DB  65,87                               ; push          %r15
4302  DB  65,86                               ; push          %r14
4303  DB  65,85                               ; push          %r13
4304  DB  65,84                               ; push          %r12
4305  DB  86                                  ; push          %rsi
4306  DB  87                                  ; push          %rdi
4307  DB  83                                  ; push          %rbx
4308  DB  72,129,236,160,0,0,0                ; sub           $0xa0,%rsp
4309  DB  68,15,41,188,36,144,0,0,0           ; movaps        %xmm15,0x90(%rsp)
4310  DB  68,15,41,180,36,128,0,0,0           ; movaps        %xmm14,0x80(%rsp)
4311  DB  68,15,41,108,36,112                 ; movaps        %xmm13,0x70(%rsp)
4312  DB  68,15,41,100,36,96                  ; movaps        %xmm12,0x60(%rsp)
4313  DB  68,15,41,92,36,80                   ; movaps        %xmm11,0x50(%rsp)
4314  DB  68,15,41,84,36,64                   ; movaps        %xmm10,0x40(%rsp)
4315  DB  68,15,41,76,36,48                   ; movaps        %xmm9,0x30(%rsp)
4316  DB  68,15,41,68,36,32                   ; movaps        %xmm8,0x20(%rsp)
4317  DB  15,41,124,36,16                     ; movaps        %xmm7,0x10(%rsp)
4318  DB  15,41,52,36                         ; movaps        %xmm6,(%rsp)
4319  DB  77,137,207                          ; mov           %r9,%r15
4320  DB  77,137,198                          ; mov           %r8,%r14
4321  DB  72,137,203                          ; mov           %rcx,%rbx
4322  DB  72,137,214                          ; mov           %rdx,%rsi
4323  DB  72,173                              ; lods          %ds:(%rsi),%rax
4324  DB  73,137,196                          ; mov           %rax,%r12
4325  DB  73,137,245                          ; mov           %rsi,%r13
4326  DB  72,141,67,4                         ; lea           0x4(%rbx),%rax
4327  DB  76,57,248                           ; cmp           %r15,%rax
4328  DB  118,5                               ; jbe           73 <_sk_start_pipeline_sse2+0x73>
4329  DB  72,137,216                          ; mov           %rbx,%rax
4330  DB  235,52                              ; jmp           a7 <_sk_start_pipeline_sse2+0xa7>
4331  DB  15,87,192                           ; xorps         %xmm0,%xmm0
4332  DB  15,87,201                           ; xorps         %xmm1,%xmm1
4333  DB  15,87,210                           ; xorps         %xmm2,%xmm2
4334  DB  15,87,219                           ; xorps         %xmm3,%xmm3
4335  DB  15,87,228                           ; xorps         %xmm4,%xmm4
4336  DB  15,87,237                           ; xorps         %xmm5,%xmm5
4337  DB  15,87,246                           ; xorps         %xmm6,%xmm6
4338  DB  15,87,255                           ; xorps         %xmm7,%xmm7
4339  DB  72,137,223                          ; mov           %rbx,%rdi
4340  DB  76,137,238                          ; mov           %r13,%rsi
4341  DB  76,137,242                          ; mov           %r14,%rdx
4342  DB  65,255,212                          ; callq         *%r12
4343  DB  72,141,67,4                         ; lea           0x4(%rbx),%rax
4344  DB  72,131,195,8                        ; add           $0x8,%rbx
4345  DB  76,57,251                           ; cmp           %r15,%rbx
4346  DB  72,137,195                          ; mov           %rax,%rbx
4347  DB  118,204                             ; jbe           73 <_sk_start_pipeline_sse2+0x73>
4348  DB  15,40,52,36                         ; movaps        (%rsp),%xmm6
4349  DB  15,40,124,36,16                     ; movaps        0x10(%rsp),%xmm7
4350  DB  68,15,40,68,36,32                   ; movaps        0x20(%rsp),%xmm8
4351  DB  68,15,40,76,36,48                   ; movaps        0x30(%rsp),%xmm9
4352  DB  68,15,40,84,36,64                   ; movaps        0x40(%rsp),%xmm10
4353  DB  68,15,40,92,36,80                   ; movaps        0x50(%rsp),%xmm11
4354  DB  68,15,40,100,36,96                  ; movaps        0x60(%rsp),%xmm12
4355  DB  68,15,40,108,36,112                 ; movaps        0x70(%rsp),%xmm13
4356  DB  68,15,40,180,36,128,0,0,0           ; movaps        0x80(%rsp),%xmm14
4357  DB  68,15,40,188,36,144,0,0,0           ; movaps        0x90(%rsp),%xmm15
4358  DB  72,129,196,160,0,0,0                ; add           $0xa0,%rsp
4359  DB  91                                  ; pop           %rbx
4360  DB  95                                  ; pop           %rdi
4361  DB  94                                  ; pop           %rsi
4362  DB  65,92                               ; pop           %r12
4363  DB  65,93                               ; pop           %r13
4364  DB  65,94                               ; pop           %r14
4365  DB  65,95                               ; pop           %r15
4366  DB  195                                 ; retq
4367
4368PUBLIC _sk_just_return_sse2
4369_sk_just_return_sse2 LABEL PROC
4370  DB  195                                 ; retq
4371
4372PUBLIC _sk_seed_shader_sse2
4373_sk_seed_shader_sse2 LABEL PROC
4374  DB  72,173                              ; lods          %ds:(%rsi),%rax
4375  DB  102,15,110,199                      ; movd          %edi,%xmm0
4376  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
4377  DB  15,91,200                           ; cvtdq2ps      %xmm0,%xmm1
4378  DB  185,0,0,0,63                        ; mov           $0x3f000000,%ecx
4379  DB  102,15,110,209                      ; movd          %ecx,%xmm2
4380  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
4381  DB  15,88,202                           ; addps         %xmm2,%xmm1
4382  DB  15,16,2                             ; movups        (%rdx),%xmm0
4383  DB  15,88,193                           ; addps         %xmm1,%xmm0
4384  DB  102,15,110,8                        ; movd          (%rax),%xmm1
4385  DB  102,15,112,201,0                    ; pshufd        $0x0,%xmm1,%xmm1
4386  DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
4387  DB  15,88,202                           ; addps         %xmm2,%xmm1
4388  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
4389  DB  102,15,110,208                      ; movd          %eax,%xmm2
4390  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
4391  DB  72,173                              ; lods          %ds:(%rsi),%rax
4392  DB  15,87,219                           ; xorps         %xmm3,%xmm3
4393  DB  15,87,228                           ; xorps         %xmm4,%xmm4
4394  DB  15,87,237                           ; xorps         %xmm5,%xmm5
4395  DB  15,87,246                           ; xorps         %xmm6,%xmm6
4396  DB  15,87,255                           ; xorps         %xmm7,%xmm7
4397  DB  255,224                             ; jmpq          *%rax
4398
4399PUBLIC _sk_constant_color_sse2
4400_sk_constant_color_sse2 LABEL PROC
4401  DB  72,173                              ; lods          %ds:(%rsi),%rax
4402  DB  15,16,24                            ; movups        (%rax),%xmm3
4403  DB  15,40,195                           ; movaps        %xmm3,%xmm0
4404  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
4405  DB  15,40,203                           ; movaps        %xmm3,%xmm1
4406  DB  15,198,201,85                       ; shufps        $0x55,%xmm1,%xmm1
4407  DB  15,40,211                           ; movaps        %xmm3,%xmm2
4408  DB  15,198,210,170                      ; shufps        $0xaa,%xmm2,%xmm2
4409  DB  15,198,219,255                      ; shufps        $0xff,%xmm3,%xmm3
4410  DB  72,173                              ; lods          %ds:(%rsi),%rax
4411  DB  255,224                             ; jmpq          *%rax
4412
4413PUBLIC _sk_clear_sse2
4414_sk_clear_sse2 LABEL PROC
4415  DB  72,173                              ; lods          %ds:(%rsi),%rax
4416  DB  15,87,192                           ; xorps         %xmm0,%xmm0
4417  DB  15,87,201                           ; xorps         %xmm1,%xmm1
4418  DB  15,87,210                           ; xorps         %xmm2,%xmm2
4419  DB  15,87,219                           ; xorps         %xmm3,%xmm3
4420  DB  255,224                             ; jmpq          *%rax
4421
4422PUBLIC _sk_plus__sse2
4423_sk_plus__sse2 LABEL PROC
4424  DB  15,88,196                           ; addps         %xmm4,%xmm0
4425  DB  15,88,205                           ; addps         %xmm5,%xmm1
4426  DB  15,88,214                           ; addps         %xmm6,%xmm2
4427  DB  15,88,223                           ; addps         %xmm7,%xmm3
4428  DB  72,173                              ; lods          %ds:(%rsi),%rax
4429  DB  255,224                             ; jmpq          *%rax
4430
4431PUBLIC _sk_srcover_sse2
4432_sk_srcover_sse2 LABEL PROC
4433  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
4434  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
4435  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
4436  DB  68,15,92,195                        ; subps         %xmm3,%xmm8
4437  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
4438  DB  68,15,89,204                        ; mulps         %xmm4,%xmm9
4439  DB  65,15,88,193                        ; addps         %xmm9,%xmm0
4440  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
4441  DB  68,15,89,205                        ; mulps         %xmm5,%xmm9
4442  DB  65,15,88,201                        ; addps         %xmm9,%xmm1
4443  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
4444  DB  68,15,89,206                        ; mulps         %xmm6,%xmm9
4445  DB  65,15,88,209                        ; addps         %xmm9,%xmm2
4446  DB  68,15,89,199                        ; mulps         %xmm7,%xmm8
4447  DB  65,15,88,216                        ; addps         %xmm8,%xmm3
4448  DB  72,173                              ; lods          %ds:(%rsi),%rax
4449  DB  255,224                             ; jmpq          *%rax
4450
4451PUBLIC _sk_dstover_sse2
4452_sk_dstover_sse2 LABEL PROC
4453  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
4454  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
4455  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
4456  DB  68,15,92,199                        ; subps         %xmm7,%xmm8
4457  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
4458  DB  15,88,196                           ; addps         %xmm4,%xmm0
4459  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
4460  DB  15,88,205                           ; addps         %xmm5,%xmm1
4461  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
4462  DB  15,88,214                           ; addps         %xmm6,%xmm2
4463  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
4464  DB  15,88,223                           ; addps         %xmm7,%xmm3
4465  DB  72,173                              ; lods          %ds:(%rsi),%rax
4466  DB  255,224                             ; jmpq          *%rax
4467
4468PUBLIC _sk_clamp_0_sse2
4469_sk_clamp_0_sse2 LABEL PROC
4470  DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
4471  DB  65,15,95,192                        ; maxps         %xmm8,%xmm0
4472  DB  65,15,95,200                        ; maxps         %xmm8,%xmm1
4473  DB  65,15,95,208                        ; maxps         %xmm8,%xmm2
4474  DB  65,15,95,216                        ; maxps         %xmm8,%xmm3
4475  DB  72,173                              ; lods          %ds:(%rsi),%rax
4476  DB  255,224                             ; jmpq          *%rax
4477
4478PUBLIC _sk_clamp_1_sse2
4479_sk_clamp_1_sse2 LABEL PROC
4480  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
4481  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
4482  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
4483  DB  65,15,93,192                        ; minps         %xmm8,%xmm0
4484  DB  65,15,93,200                        ; minps         %xmm8,%xmm1
4485  DB  65,15,93,208                        ; minps         %xmm8,%xmm2
4486  DB  65,15,93,216                        ; minps         %xmm8,%xmm3
4487  DB  72,173                              ; lods          %ds:(%rsi),%rax
4488  DB  255,224                             ; jmpq          *%rax
4489
4490PUBLIC _sk_clamp_a_sse2
4491_sk_clamp_a_sse2 LABEL PROC
4492  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
4493  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
4494  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
4495  DB  65,15,93,216                        ; minps         %xmm8,%xmm3
4496  DB  15,93,195                           ; minps         %xmm3,%xmm0
4497  DB  15,93,203                           ; minps         %xmm3,%xmm1
4498  DB  15,93,211                           ; minps         %xmm3,%xmm2
4499  DB  72,173                              ; lods          %ds:(%rsi),%rax
4500  DB  255,224                             ; jmpq          *%rax
4501
4502PUBLIC _sk_set_rgb_sse2
4503_sk_set_rgb_sse2 LABEL PROC
4504  DB  72,173                              ; lods          %ds:(%rsi),%rax
4505  DB  243,15,16,0                         ; movss         (%rax),%xmm0
4506  DB  243,15,16,72,4                      ; movss         0x4(%rax),%xmm1
4507  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
4508  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
4509  DB  243,15,16,80,8                      ; movss         0x8(%rax),%xmm2
4510  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
4511  DB  72,173                              ; lods          %ds:(%rsi),%rax
4512  DB  255,224                             ; jmpq          *%rax
4513
4514PUBLIC _sk_swap_rb_sse2
4515_sk_swap_rb_sse2 LABEL PROC
4516  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
4517  DB  72,173                              ; lods          %ds:(%rsi),%rax
4518  DB  15,40,194                           ; movaps        %xmm2,%xmm0
4519  DB  65,15,40,208                        ; movaps        %xmm8,%xmm2
4520  DB  255,224                             ; jmpq          *%rax
4521
4522PUBLIC _sk_swap_sse2
4523_sk_swap_sse2 LABEL PROC
4524  DB  68,15,40,195                        ; movaps        %xmm3,%xmm8
4525  DB  68,15,40,202                        ; movaps        %xmm2,%xmm9
4526  DB  68,15,40,209                        ; movaps        %xmm1,%xmm10
4527  DB  68,15,40,216                        ; movaps        %xmm0,%xmm11
4528  DB  72,173                              ; lods          %ds:(%rsi),%rax
4529  DB  15,40,196                           ; movaps        %xmm4,%xmm0
4530  DB  15,40,205                           ; movaps        %xmm5,%xmm1
4531  DB  15,40,214                           ; movaps        %xmm6,%xmm2
4532  DB  15,40,223                           ; movaps        %xmm7,%xmm3
4533  DB  65,15,40,227                        ; movaps        %xmm11,%xmm4
4534  DB  65,15,40,234                        ; movaps        %xmm10,%xmm5
4535  DB  65,15,40,241                        ; movaps        %xmm9,%xmm6
4536  DB  65,15,40,248                        ; movaps        %xmm8,%xmm7
4537  DB  255,224                             ; jmpq          *%rax
4538
4539PUBLIC _sk_move_src_dst_sse2
4540_sk_move_src_dst_sse2 LABEL PROC
4541  DB  72,173                              ; lods          %ds:(%rsi),%rax
4542  DB  15,40,224                           ; movaps        %xmm0,%xmm4
4543  DB  15,40,233                           ; movaps        %xmm1,%xmm5
4544  DB  15,40,242                           ; movaps        %xmm2,%xmm6
4545  DB  15,40,251                           ; movaps        %xmm3,%xmm7
4546  DB  255,224                             ; jmpq          *%rax
4547
4548PUBLIC _sk_move_dst_src_sse2
4549_sk_move_dst_src_sse2 LABEL PROC
4550  DB  72,173                              ; lods          %ds:(%rsi),%rax
4551  DB  15,40,196                           ; movaps        %xmm4,%xmm0
4552  DB  15,40,205                           ; movaps        %xmm5,%xmm1
4553  DB  15,40,214                           ; movaps        %xmm6,%xmm2
4554  DB  15,40,223                           ; movaps        %xmm7,%xmm3
4555  DB  255,224                             ; jmpq          *%rax
4556
4557PUBLIC _sk_premul_sse2
4558_sk_premul_sse2 LABEL PROC
4559  DB  15,89,195                           ; mulps         %xmm3,%xmm0
4560  DB  15,89,203                           ; mulps         %xmm3,%xmm1
4561  DB  15,89,211                           ; mulps         %xmm3,%xmm2
4562  DB  72,173                              ; lods          %ds:(%rsi),%rax
4563  DB  255,224                             ; jmpq          *%rax
4564
4565PUBLIC _sk_unpremul_sse2
4566_sk_unpremul_sse2 LABEL PROC
4567  DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
4568  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
4569  DB  102,68,15,110,200                   ; movd          %eax,%xmm9
4570  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
4571  DB  68,15,94,203                        ; divps         %xmm3,%xmm9
4572  DB  68,15,194,195,4                     ; cmpneqps      %xmm3,%xmm8
4573  DB  69,15,84,193                        ; andps         %xmm9,%xmm8
4574  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
4575  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
4576  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
4577  DB  72,173                              ; lods          %ds:(%rsi),%rax
4578  DB  255,224                             ; jmpq          *%rax
4579
4580PUBLIC _sk_from_srgb_sse2
4581_sk_from_srgb_sse2 LABEL PROC
4582  DB  184,145,131,158,61                  ; mov           $0x3d9e8391,%eax
4583  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
4584  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
4585  DB  69,15,40,232                        ; movaps        %xmm8,%xmm13
4586  DB  68,15,89,232                        ; mulps         %xmm0,%xmm13
4587  DB  68,15,40,224                        ; movaps        %xmm0,%xmm12
4588  DB  69,15,89,228                        ; mulps         %xmm12,%xmm12
4589  DB  184,154,153,153,62                  ; mov           $0x3e99999a,%eax
4590  DB  102,68,15,110,200                   ; movd          %eax,%xmm9
4591  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
4592  DB  184,92,143,50,63                    ; mov           $0x3f328f5c,%eax
4593  DB  102,68,15,110,208                   ; movd          %eax,%xmm10
4594  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
4595  DB  69,15,40,241                        ; movaps        %xmm9,%xmm14
4596  DB  68,15,89,240                        ; mulps         %xmm0,%xmm14
4597  DB  69,15,88,242                        ; addps         %xmm10,%xmm14
4598  DB  184,10,215,35,59                    ; mov           $0x3b23d70a,%eax
4599  DB  102,68,15,110,216                   ; movd          %eax,%xmm11
4600  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
4601  DB  69,15,89,244                        ; mulps         %xmm12,%xmm14
4602  DB  69,15,88,243                        ; addps         %xmm11,%xmm14
4603  DB  184,174,71,97,61                    ; mov           $0x3d6147ae,%eax
4604  DB  102,68,15,110,224                   ; movd          %eax,%xmm12
4605  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
4606  DB  65,15,194,196,1                     ; cmpltps       %xmm12,%xmm0
4607  DB  68,15,84,232                        ; andps         %xmm0,%xmm13
4608  DB  65,15,85,198                        ; andnps        %xmm14,%xmm0
4609  DB  65,15,86,197                        ; orps          %xmm13,%xmm0
4610  DB  69,15,40,232                        ; movaps        %xmm8,%xmm13
4611  DB  68,15,89,233                        ; mulps         %xmm1,%xmm13
4612  DB  68,15,40,241                        ; movaps        %xmm1,%xmm14
4613  DB  69,15,89,246                        ; mulps         %xmm14,%xmm14
4614  DB  69,15,40,249                        ; movaps        %xmm9,%xmm15
4615  DB  68,15,89,249                        ; mulps         %xmm1,%xmm15
4616  DB  69,15,88,250                        ; addps         %xmm10,%xmm15
4617  DB  69,15,89,254                        ; mulps         %xmm14,%xmm15
4618  DB  69,15,88,251                        ; addps         %xmm11,%xmm15
4619  DB  65,15,194,204,1                     ; cmpltps       %xmm12,%xmm1
4620  DB  68,15,84,233                        ; andps         %xmm1,%xmm13
4621  DB  65,15,85,207                        ; andnps        %xmm15,%xmm1
4622  DB  65,15,86,205                        ; orps          %xmm13,%xmm1
4623  DB  68,15,89,194                        ; mulps         %xmm2,%xmm8
4624  DB  68,15,40,234                        ; movaps        %xmm2,%xmm13
4625  DB  69,15,89,237                        ; mulps         %xmm13,%xmm13
4626  DB  68,15,89,202                        ; mulps         %xmm2,%xmm9
4627  DB  69,15,88,202                        ; addps         %xmm10,%xmm9
4628  DB  69,15,89,205                        ; mulps         %xmm13,%xmm9
4629  DB  69,15,88,203                        ; addps         %xmm11,%xmm9
4630  DB  65,15,194,212,1                     ; cmpltps       %xmm12,%xmm2
4631  DB  68,15,84,194                        ; andps         %xmm2,%xmm8
4632  DB  65,15,85,209                        ; andnps        %xmm9,%xmm2
4633  DB  65,15,86,208                        ; orps          %xmm8,%xmm2
4634  DB  72,173                              ; lods          %ds:(%rsi),%rax
4635  DB  255,224                             ; jmpq          *%rax
4636
4637PUBLIC _sk_to_srgb_sse2
4638_sk_to_srgb_sse2 LABEL PROC
4639  DB  68,15,82,192                        ; rsqrtps       %xmm0,%xmm8
4640  DB  69,15,83,248                        ; rcpps         %xmm8,%xmm15
4641  DB  69,15,82,232                        ; rsqrtps       %xmm8,%xmm13
4642  DB  184,41,92,71,65                     ; mov           $0x41475c29,%eax
4643  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
4644  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
4645  DB  69,15,40,240                        ; movaps        %xmm8,%xmm14
4646  DB  68,15,89,240                        ; mulps         %xmm0,%xmm14
4647  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
4648  DB  102,68,15,110,200                   ; movd          %eax,%xmm9
4649  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
4650  DB  184,194,135,210,62                  ; mov           $0x3ed287c2,%eax
4651  DB  102,68,15,110,208                   ; movd          %eax,%xmm10
4652  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
4653  DB  184,206,111,48,63                   ; mov           $0x3f306fce,%eax
4654  DB  102,68,15,110,216                   ; movd          %eax,%xmm11
4655  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
4656  DB  184,168,87,202,61                   ; mov           $0x3dca57a8,%eax
4657  DB  53,0,0,0,128                        ; xor           $0x80000000,%eax
4658  DB  102,68,15,110,224                   ; movd          %eax,%xmm12
4659  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
4660  DB  69,15,89,251                        ; mulps         %xmm11,%xmm15
4661  DB  69,15,88,252                        ; addps         %xmm12,%xmm15
4662  DB  69,15,89,234                        ; mulps         %xmm10,%xmm13
4663  DB  69,15,88,239                        ; addps         %xmm15,%xmm13
4664  DB  69,15,40,249                        ; movaps        %xmm9,%xmm15
4665  DB  69,15,93,253                        ; minps         %xmm13,%xmm15
4666  DB  184,4,231,140,59                    ; mov           $0x3b8ce704,%eax
4667  DB  102,68,15,110,232                   ; movd          %eax,%xmm13
4668  DB  69,15,198,237,0                     ; shufps        $0x0,%xmm13,%xmm13
4669  DB  65,15,194,197,1                     ; cmpltps       %xmm13,%xmm0
4670  DB  68,15,84,240                        ; andps         %xmm0,%xmm14
4671  DB  65,15,85,199                        ; andnps        %xmm15,%xmm0
4672  DB  65,15,86,198                        ; orps          %xmm14,%xmm0
4673  DB  68,15,82,241                        ; rsqrtps       %xmm1,%xmm14
4674  DB  69,15,83,254                        ; rcpps         %xmm14,%xmm15
4675  DB  69,15,82,246                        ; rsqrtps       %xmm14,%xmm14
4676  DB  69,15,89,251                        ; mulps         %xmm11,%xmm15
4677  DB  69,15,88,252                        ; addps         %xmm12,%xmm15
4678  DB  69,15,89,242                        ; mulps         %xmm10,%xmm14
4679  DB  69,15,88,247                        ; addps         %xmm15,%xmm14
4680  DB  69,15,40,249                        ; movaps        %xmm9,%xmm15
4681  DB  69,15,93,254                        ; minps         %xmm14,%xmm15
4682  DB  69,15,40,240                        ; movaps        %xmm8,%xmm14
4683  DB  68,15,89,241                        ; mulps         %xmm1,%xmm14
4684  DB  65,15,194,205,1                     ; cmpltps       %xmm13,%xmm1
4685  DB  68,15,84,241                        ; andps         %xmm1,%xmm14
4686  DB  65,15,85,207                        ; andnps        %xmm15,%xmm1
4687  DB  65,15,86,206                        ; orps          %xmm14,%xmm1
4688  DB  68,15,82,242                        ; rsqrtps       %xmm2,%xmm14
4689  DB  69,15,83,254                        ; rcpps         %xmm14,%xmm15
4690  DB  69,15,89,251                        ; mulps         %xmm11,%xmm15
4691  DB  69,15,88,252                        ; addps         %xmm12,%xmm15
4692  DB  69,15,82,222                        ; rsqrtps       %xmm14,%xmm11
4693  DB  69,15,89,218                        ; mulps         %xmm10,%xmm11
4694  DB  69,15,88,223                        ; addps         %xmm15,%xmm11
4695  DB  69,15,93,203                        ; minps         %xmm11,%xmm9
4696  DB  68,15,89,194                        ; mulps         %xmm2,%xmm8
4697  DB  65,15,194,213,1                     ; cmpltps       %xmm13,%xmm2
4698  DB  68,15,84,194                        ; andps         %xmm2,%xmm8
4699  DB  65,15,85,209                        ; andnps        %xmm9,%xmm2
4700  DB  65,15,86,208                        ; orps          %xmm8,%xmm2
4701  DB  72,173                              ; lods          %ds:(%rsi),%rax
4702  DB  255,224                             ; jmpq          *%rax
4703
4704PUBLIC _sk_scale_1_float_sse2
4705_sk_scale_1_float_sse2 LABEL PROC
4706  DB  72,173                              ; lods          %ds:(%rsi),%rax
4707  DB  243,68,15,16,0                      ; movss         (%rax),%xmm8
4708  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
4709  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
4710  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
4711  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
4712  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
4713  DB  72,173                              ; lods          %ds:(%rsi),%rax
4714  DB  255,224                             ; jmpq          *%rax
4715
4716PUBLIC _sk_scale_u8_sse2
4717_sk_scale_u8_sse2 LABEL PROC
4718  DB  72,173                              ; lods          %ds:(%rsi),%rax
4719  DB  72,139,0                            ; mov           (%rax),%rax
4720  DB  102,68,15,110,4,56                  ; movd          (%rax,%rdi,1),%xmm8
4721  DB  102,69,15,239,201                   ; pxor          %xmm9,%xmm9
4722  DB  102,69,15,96,193                    ; punpcklbw     %xmm9,%xmm8
4723  DB  102,69,15,97,193                    ; punpcklwd     %xmm9,%xmm8
4724  DB  69,15,91,192                        ; cvtdq2ps      %xmm8,%xmm8
4725  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
4726  DB  102,68,15,110,200                   ; movd          %eax,%xmm9
4727  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
4728  DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
4729  DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
4730  DB  65,15,89,201                        ; mulps         %xmm9,%xmm1
4731  DB  65,15,89,209                        ; mulps         %xmm9,%xmm2
4732  DB  65,15,89,217                        ; mulps         %xmm9,%xmm3
4733  DB  72,173                              ; lods          %ds:(%rsi),%rax
4734  DB  255,224                             ; jmpq          *%rax
4735
4736PUBLIC _sk_lerp_1_float_sse2
4737_sk_lerp_1_float_sse2 LABEL PROC
4738  DB  72,173                              ; lods          %ds:(%rsi),%rax
4739  DB  243,68,15,16,0                      ; movss         (%rax),%xmm8
4740  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
4741  DB  15,92,196                           ; subps         %xmm4,%xmm0
4742  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
4743  DB  15,88,196                           ; addps         %xmm4,%xmm0
4744  DB  15,92,205                           ; subps         %xmm5,%xmm1
4745  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
4746  DB  15,88,205                           ; addps         %xmm5,%xmm1
4747  DB  15,92,214                           ; subps         %xmm6,%xmm2
4748  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
4749  DB  15,88,214                           ; addps         %xmm6,%xmm2
4750  DB  15,92,223                           ; subps         %xmm7,%xmm3
4751  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
4752  DB  15,88,223                           ; addps         %xmm7,%xmm3
4753  DB  72,173                              ; lods          %ds:(%rsi),%rax
4754  DB  255,224                             ; jmpq          *%rax
4755
4756PUBLIC _sk_lerp_u8_sse2
4757_sk_lerp_u8_sse2 LABEL PROC
4758  DB  72,173                              ; lods          %ds:(%rsi),%rax
4759  DB  72,139,0                            ; mov           (%rax),%rax
4760  DB  102,68,15,110,4,56                  ; movd          (%rax,%rdi,1),%xmm8
4761  DB  102,69,15,239,201                   ; pxor          %xmm9,%xmm9
4762  DB  102,69,15,96,193                    ; punpcklbw     %xmm9,%xmm8
4763  DB  102,69,15,97,193                    ; punpcklwd     %xmm9,%xmm8
4764  DB  69,15,91,192                        ; cvtdq2ps      %xmm8,%xmm8
4765  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
4766  DB  102,68,15,110,200                   ; movd          %eax,%xmm9
4767  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
4768  DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
4769  DB  15,92,196                           ; subps         %xmm4,%xmm0
4770  DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
4771  DB  15,88,196                           ; addps         %xmm4,%xmm0
4772  DB  15,92,205                           ; subps         %xmm5,%xmm1
4773  DB  65,15,89,201                        ; mulps         %xmm9,%xmm1
4774  DB  15,88,205                           ; addps         %xmm5,%xmm1
4775  DB  15,92,214                           ; subps         %xmm6,%xmm2
4776  DB  65,15,89,209                        ; mulps         %xmm9,%xmm2
4777  DB  15,88,214                           ; addps         %xmm6,%xmm2
4778  DB  15,92,223                           ; subps         %xmm7,%xmm3
4779  DB  65,15,89,217                        ; mulps         %xmm9,%xmm3
4780  DB  15,88,223                           ; addps         %xmm7,%xmm3
4781  DB  72,173                              ; lods          %ds:(%rsi),%rax
4782  DB  255,224                             ; jmpq          *%rax
4783
4784PUBLIC _sk_lerp_565_sse2
4785_sk_lerp_565_sse2 LABEL PROC
4786  DB  72,173                              ; lods          %ds:(%rsi),%rax
4787  DB  72,139,0                            ; mov           (%rax),%rax
4788  DB  243,68,15,126,4,120                 ; movq          (%rax,%rdi,2),%xmm8
4789  DB  102,15,239,219                      ; pxor          %xmm3,%xmm3
4790  DB  102,68,15,97,195                    ; punpcklwd     %xmm3,%xmm8
4791  DB  184,0,248,0,0                       ; mov           $0xf800,%eax
4792  DB  102,15,110,216                      ; movd          %eax,%xmm3
4793  DB  102,15,112,219,0                    ; pshufd        $0x0,%xmm3,%xmm3
4794  DB  102,65,15,219,216                   ; pand          %xmm8,%xmm3
4795  DB  68,15,91,203                        ; cvtdq2ps      %xmm3,%xmm9
4796  DB  184,8,33,132,55                     ; mov           $0x37842108,%eax
4797  DB  102,68,15,110,208                   ; movd          %eax,%xmm10
4798  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
4799  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
4800  DB  184,224,7,0,0                       ; mov           $0x7e0,%eax
4801  DB  102,15,110,216                      ; movd          %eax,%xmm3
4802  DB  102,15,112,219,0                    ; pshufd        $0x0,%xmm3,%xmm3
4803  DB  102,65,15,219,216                   ; pand          %xmm8,%xmm3
4804  DB  68,15,91,203                        ; cvtdq2ps      %xmm3,%xmm9
4805  DB  184,33,8,2,58                       ; mov           $0x3a020821,%eax
4806  DB  102,68,15,110,216                   ; movd          %eax,%xmm11
4807  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
4808  DB  69,15,89,217                        ; mulps         %xmm9,%xmm11
4809  DB  184,31,0,0,0                        ; mov           $0x1f,%eax
4810  DB  102,15,110,216                      ; movd          %eax,%xmm3
4811  DB  102,15,112,219,0                    ; pshufd        $0x0,%xmm3,%xmm3
4812  DB  102,65,15,219,216                   ; pand          %xmm8,%xmm3
4813  DB  68,15,91,195                        ; cvtdq2ps      %xmm3,%xmm8
4814  DB  184,8,33,4,61                       ; mov           $0x3d042108,%eax
4815  DB  102,15,110,216                      ; movd          %eax,%xmm3
4816  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
4817  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
4818  DB  15,92,196                           ; subps         %xmm4,%xmm0
4819  DB  65,15,89,194                        ; mulps         %xmm10,%xmm0
4820  DB  15,88,196                           ; addps         %xmm4,%xmm0
4821  DB  15,92,205                           ; subps         %xmm5,%xmm1
4822  DB  65,15,89,203                        ; mulps         %xmm11,%xmm1
4823  DB  15,88,205                           ; addps         %xmm5,%xmm1
4824  DB  15,92,214                           ; subps         %xmm6,%xmm2
4825  DB  15,89,211                           ; mulps         %xmm3,%xmm2
4826  DB  15,88,214                           ; addps         %xmm6,%xmm2
4827  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
4828  DB  102,15,110,216                      ; movd          %eax,%xmm3
4829  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
4830  DB  72,173                              ; lods          %ds:(%rsi),%rax
4831  DB  255,224                             ; jmpq          *%rax
4832
4833PUBLIC _sk_load_tables_sse2
4834_sk_load_tables_sse2 LABEL PROC
4835  DB  72,173                              ; lods          %ds:(%rsi),%rax
4836  DB  72,139,8                            ; mov           (%rax),%rcx
4837  DB  76,139,64,8                         ; mov           0x8(%rax),%r8
4838  DB  243,68,15,111,4,185                 ; movdqu        (%rcx,%rdi,4),%xmm8
4839  DB  185,255,0,0,0                       ; mov           $0xff,%ecx
4840  DB  102,15,110,193                      ; movd          %ecx,%xmm0
4841  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
4842  DB  102,69,15,111,200                   ; movdqa        %xmm8,%xmm9
4843  DB  102,65,15,114,209,8                 ; psrld         $0x8,%xmm9
4844  DB  102,68,15,219,200                   ; pand          %xmm0,%xmm9
4845  DB  102,69,15,111,208                   ; movdqa        %xmm8,%xmm10
4846  DB  102,65,15,114,210,16                ; psrld         $0x10,%xmm10
4847  DB  102,68,15,219,208                   ; pand          %xmm0,%xmm10
4848  DB  102,65,15,219,192                   ; pand          %xmm8,%xmm0
4849  DB  102,15,112,216,78                   ; pshufd        $0x4e,%xmm0,%xmm3
4850  DB  102,72,15,126,217                   ; movq          %xmm3,%rcx
4851  DB  65,137,201                          ; mov           %ecx,%r9d
4852  DB  72,193,233,32                       ; shr           $0x20,%rcx
4853  DB  102,73,15,126,194                   ; movq          %xmm0,%r10
4854  DB  69,137,211                          ; mov           %r10d,%r11d
4855  DB  73,193,234,32                       ; shr           $0x20,%r10
4856  DB  243,67,15,16,28,144                 ; movss         (%r8,%r10,4),%xmm3
4857  DB  243,65,15,16,4,136                  ; movss         (%r8,%rcx,4),%xmm0
4858  DB  15,20,216                           ; unpcklps      %xmm0,%xmm3
4859  DB  243,67,15,16,4,152                  ; movss         (%r8,%r11,4),%xmm0
4860  DB  243,67,15,16,12,136                 ; movss         (%r8,%r9,4),%xmm1
4861  DB  15,20,193                           ; unpcklps      %xmm1,%xmm0
4862  DB  15,20,195                           ; unpcklps      %xmm3,%xmm0
4863  DB  76,139,64,16                        ; mov           0x10(%rax),%r8
4864  DB  102,65,15,112,201,78                ; pshufd        $0x4e,%xmm9,%xmm1
4865  DB  102,73,15,126,202                   ; movq          %xmm1,%r10
4866  DB  77,137,209                          ; mov           %r10,%r9
4867  DB  73,193,233,32                       ; shr           $0x20,%r9
4868  DB  102,76,15,126,201                   ; movq          %xmm9,%rcx
4869  DB  65,137,203                          ; mov           %ecx,%r11d
4870  DB  65,129,227,255,255,255,0            ; and           $0xffffff,%r11d
4871  DB  72,193,233,30                       ; shr           $0x1e,%rcx
4872  DB  65,129,226,255,255,255,0            ; and           $0xffffff,%r10d
4873  DB  243,65,15,16,28,8                   ; movss         (%r8,%rcx,1),%xmm3
4874  DB  243,67,15,16,12,136                 ; movss         (%r8,%r9,4),%xmm1
4875  DB  15,20,217                           ; unpcklps      %xmm1,%xmm3
4876  DB  243,67,15,16,12,152                 ; movss         (%r8,%r11,4),%xmm1
4877  DB  243,67,15,16,20,144                 ; movss         (%r8,%r10,4),%xmm2
4878  DB  15,20,202                           ; unpcklps      %xmm2,%xmm1
4879  DB  15,20,203                           ; unpcklps      %xmm3,%xmm1
4880  DB  76,139,72,24                        ; mov           0x18(%rax),%r9
4881  DB  102,65,15,112,210,78                ; pshufd        $0x4e,%xmm10,%xmm2
4882  DB  102,72,15,126,209                   ; movq          %xmm2,%rcx
4883  DB  68,15,183,193                       ; movzwl        %cx,%r8d
4884  DB  72,193,233,32                       ; shr           $0x20,%rcx
4885  DB  102,76,15,126,208                   ; movq          %xmm10,%rax
4886  DB  68,15,183,208                       ; movzwl        %ax,%r10d
4887  DB  72,193,232,30                       ; shr           $0x1e,%rax
4888  DB  243,69,15,16,12,1                   ; movss         (%r9,%rax,1),%xmm9
4889  DB  243,65,15,16,20,137                 ; movss         (%r9,%rcx,4),%xmm2
4890  DB  68,15,20,202                        ; unpcklps      %xmm2,%xmm9
4891  DB  243,67,15,16,20,145                 ; movss         (%r9,%r10,4),%xmm2
4892  DB  243,67,15,16,28,129                 ; movss         (%r9,%r8,4),%xmm3
4893  DB  15,20,211                           ; unpcklps      %xmm3,%xmm2
4894  DB  65,15,20,209                        ; unpcklps      %xmm9,%xmm2
4895  DB  102,65,15,114,208,24                ; psrld         $0x18,%xmm8
4896  DB  69,15,91,192                        ; cvtdq2ps      %xmm8,%xmm8
4897  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
4898  DB  102,15,110,216                      ; movd          %eax,%xmm3
4899  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
4900  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
4901  DB  72,173                              ; lods          %ds:(%rsi),%rax
4902  DB  255,224                             ; jmpq          *%rax
4903
4904PUBLIC _sk_load_a8_sse2
4905_sk_load_a8_sse2 LABEL PROC
4906  DB  72,173                              ; lods          %ds:(%rsi),%rax
4907  DB  72,139,0                            ; mov           (%rax),%rax
4908  DB  102,15,110,4,56                     ; movd          (%rax,%rdi,1),%xmm0
4909  DB  102,15,239,201                      ; pxor          %xmm1,%xmm1
4910  DB  102,15,96,193                       ; punpcklbw     %xmm1,%xmm0
4911  DB  102,15,97,193                       ; punpcklwd     %xmm1,%xmm0
4912  DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
4913  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
4914  DB  102,15,110,216                      ; movd          %eax,%xmm3
4915  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
4916  DB  15,89,216                           ; mulps         %xmm0,%xmm3
4917  DB  72,173                              ; lods          %ds:(%rsi),%rax
4918  DB  15,87,192                           ; xorps         %xmm0,%xmm0
4919  DB  102,15,239,201                      ; pxor          %xmm1,%xmm1
4920  DB  15,87,210                           ; xorps         %xmm2,%xmm2
4921  DB  255,224                             ; jmpq          *%rax
4922
4923PUBLIC _sk_store_a8_sse2
4924_sk_store_a8_sse2 LABEL PROC
4925  DB  72,173                              ; lods          %ds:(%rsi),%rax
4926  DB  72,139,0                            ; mov           (%rax),%rax
4927  DB  185,0,0,127,67                      ; mov           $0x437f0000,%ecx
4928  DB  102,68,15,110,193                   ; movd          %ecx,%xmm8
4929  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
4930  DB  68,15,89,195                        ; mulps         %xmm3,%xmm8
4931  DB  102,69,15,91,192                    ; cvtps2dq      %xmm8,%xmm8
4932  DB  102,65,15,114,240,16                ; pslld         $0x10,%xmm8
4933  DB  102,65,15,114,224,16                ; psrad         $0x10,%xmm8
4934  DB  102,69,15,107,192                   ; packssdw      %xmm8,%xmm8
4935  DB  102,69,15,103,192                   ; packuswb      %xmm8,%xmm8
4936  DB  102,68,15,126,4,56                  ; movd          %xmm8,(%rax,%rdi,1)
4937  DB  72,173                              ; lods          %ds:(%rsi),%rax
4938  DB  255,224                             ; jmpq          *%rax
4939
4940PUBLIC _sk_load_565_sse2
4941_sk_load_565_sse2 LABEL PROC
4942  DB  72,173                              ; lods          %ds:(%rsi),%rax
4943  DB  72,139,0                            ; mov           (%rax),%rax
4944  DB  243,15,126,20,120                   ; movq          (%rax,%rdi,2),%xmm2
4945  DB  102,15,239,192                      ; pxor          %xmm0,%xmm0
4946  DB  102,15,97,208                       ; punpcklwd     %xmm0,%xmm2
4947  DB  184,0,248,0,0                       ; mov           $0xf800,%eax
4948  DB  102,15,110,192                      ; movd          %eax,%xmm0
4949  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
4950  DB  102,15,219,194                      ; pand          %xmm2,%xmm0
4951  DB  15,91,200                           ; cvtdq2ps      %xmm0,%xmm1
4952  DB  184,8,33,132,55                     ; mov           $0x37842108,%eax
4953  DB  102,15,110,192                      ; movd          %eax,%xmm0
4954  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
4955  DB  15,89,193                           ; mulps         %xmm1,%xmm0
4956  DB  184,224,7,0,0                       ; mov           $0x7e0,%eax
4957  DB  102,15,110,200                      ; movd          %eax,%xmm1
4958  DB  102,15,112,201,0                    ; pshufd        $0x0,%xmm1,%xmm1
4959  DB  102,15,219,202                      ; pand          %xmm2,%xmm1
4960  DB  15,91,217                           ; cvtdq2ps      %xmm1,%xmm3
4961  DB  184,33,8,2,58                       ; mov           $0x3a020821,%eax
4962  DB  102,15,110,200                      ; movd          %eax,%xmm1
4963  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
4964  DB  15,89,203                           ; mulps         %xmm3,%xmm1
4965  DB  184,31,0,0,0                        ; mov           $0x1f,%eax
4966  DB  102,15,110,216                      ; movd          %eax,%xmm3
4967  DB  102,15,112,219,0                    ; pshufd        $0x0,%xmm3,%xmm3
4968  DB  102,15,219,218                      ; pand          %xmm2,%xmm3
4969  DB  15,91,219                           ; cvtdq2ps      %xmm3,%xmm3
4970  DB  184,8,33,4,61                       ; mov           $0x3d042108,%eax
4971  DB  102,15,110,208                      ; movd          %eax,%xmm2
4972  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
4973  DB  15,89,211                           ; mulps         %xmm3,%xmm2
4974  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
4975  DB  102,15,110,216                      ; movd          %eax,%xmm3
4976  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
4977  DB  72,173                              ; lods          %ds:(%rsi),%rax
4978  DB  255,224                             ; jmpq          *%rax
4979
4980PUBLIC _sk_store_565_sse2
4981_sk_store_565_sse2 LABEL PROC
4982  DB  72,173                              ; lods          %ds:(%rsi),%rax
4983  DB  72,139,0                            ; mov           (%rax),%rax
4984  DB  185,0,0,248,65                      ; mov           $0x41f80000,%ecx
4985  DB  102,68,15,110,193                   ; movd          %ecx,%xmm8
4986  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
4987  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
4988  DB  68,15,89,200                        ; mulps         %xmm0,%xmm9
4989  DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
4990  DB  102,65,15,114,241,11                ; pslld         $0xb,%xmm9
4991  DB  185,0,0,124,66                      ; mov           $0x427c0000,%ecx
4992  DB  102,68,15,110,209                   ; movd          %ecx,%xmm10
4993  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
4994  DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
4995  DB  102,69,15,91,210                    ; cvtps2dq      %xmm10,%xmm10
4996  DB  102,65,15,114,242,5                 ; pslld         $0x5,%xmm10
4997  DB  102,69,15,235,209                   ; por           %xmm9,%xmm10
4998  DB  68,15,89,194                        ; mulps         %xmm2,%xmm8
4999  DB  102,69,15,91,192                    ; cvtps2dq      %xmm8,%xmm8
5000  DB  102,69,15,86,194                    ; orpd          %xmm10,%xmm8
5001  DB  102,65,15,114,240,16                ; pslld         $0x10,%xmm8
5002  DB  102,65,15,114,224,16                ; psrad         $0x10,%xmm8
5003  DB  102,69,15,107,192                   ; packssdw      %xmm8,%xmm8
5004  DB  102,68,15,214,4,120                 ; movq          %xmm8,(%rax,%rdi,2)
5005  DB  72,173                              ; lods          %ds:(%rsi),%rax
5006  DB  255,224                             ; jmpq          *%rax
5007
5008PUBLIC _sk_load_8888_sse2
5009_sk_load_8888_sse2 LABEL PROC
5010  DB  72,173                              ; lods          %ds:(%rsi),%rax
5011  DB  72,139,0                            ; mov           (%rax),%rax
5012  DB  243,15,111,28,184                   ; movdqu        (%rax,%rdi,4),%xmm3
5013  DB  184,255,0,0,0                       ; mov           $0xff,%eax
5014  DB  102,15,110,192                      ; movd          %eax,%xmm0
5015  DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
5016  DB  102,15,111,203                      ; movdqa        %xmm3,%xmm1
5017  DB  102,15,114,209,8                    ; psrld         $0x8,%xmm1
5018  DB  102,15,219,200                      ; pand          %xmm0,%xmm1
5019  DB  102,15,111,211                      ; movdqa        %xmm3,%xmm2
5020  DB  102,15,114,210,16                   ; psrld         $0x10,%xmm2
5021  DB  102,15,219,208                      ; pand          %xmm0,%xmm2
5022  DB  102,15,219,195                      ; pand          %xmm3,%xmm0
5023  DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
5024  DB  184,129,128,128,59                  ; mov           $0x3b808081,%eax
5025  DB  102,68,15,110,192                   ; movd          %eax,%xmm8
5026  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
5027  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
5028  DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
5029  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
5030  DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
5031  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
5032  DB  102,15,114,211,24                   ; psrld         $0x18,%xmm3
5033  DB  15,91,219                           ; cvtdq2ps      %xmm3,%xmm3
5034  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
5035  DB  72,173                              ; lods          %ds:(%rsi),%rax
5036  DB  255,224                             ; jmpq          *%rax
5037
5038PUBLIC _sk_store_8888_sse2
5039_sk_store_8888_sse2 LABEL PROC
5040  DB  72,173                              ; lods          %ds:(%rsi),%rax
5041  DB  72,139,0                            ; mov           (%rax),%rax
5042  DB  185,0,0,127,67                      ; mov           $0x437f0000,%ecx
5043  DB  102,68,15,110,193                   ; movd          %ecx,%xmm8
5044  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
5045  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
5046  DB  68,15,89,200                        ; mulps         %xmm0,%xmm9
5047  DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
5048  DB  69,15,40,208                        ; movaps        %xmm8,%xmm10
5049  DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
5050  DB  102,69,15,91,210                    ; cvtps2dq      %xmm10,%xmm10
5051  DB  102,65,15,114,242,8                 ; pslld         $0x8,%xmm10
5052  DB  102,69,15,235,209                   ; por           %xmm9,%xmm10
5053  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
5054  DB  68,15,89,202                        ; mulps         %xmm2,%xmm9
5055  DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
5056  DB  102,65,15,114,241,16                ; pslld         $0x10,%xmm9
5057  DB  68,15,89,195                        ; mulps         %xmm3,%xmm8
5058  DB  102,69,15,91,192                    ; cvtps2dq      %xmm8,%xmm8
5059  DB  102,65,15,114,240,24                ; pslld         $0x18,%xmm8
5060  DB  102,69,15,235,193                   ; por           %xmm9,%xmm8
5061  DB  102,69,15,235,194                   ; por           %xmm10,%xmm8
5062  DB  243,68,15,127,4,184                 ; movdqu        %xmm8,(%rax,%rdi,4)
5063  DB  72,173                              ; lods          %ds:(%rsi),%rax
5064  DB  255,224                             ; jmpq          *%rax
5065
5066PUBLIC _sk_load_f16_sse2
5067_sk_load_f16_sse2 LABEL PROC
5068  DB  72,173                              ; lods          %ds:(%rsi),%rax
5069  DB  72,139,0                            ; mov           (%rax),%rax
5070  DB  243,15,111,4,248                    ; movdqu        (%rax,%rdi,8),%xmm0
5071  DB  243,15,111,76,248,16                ; movdqu        0x10(%rax,%rdi,8),%xmm1
5072  DB  102,15,111,208                      ; movdqa        %xmm0,%xmm2
5073  DB  102,15,97,209                       ; punpcklwd     %xmm1,%xmm2
5074  DB  102,15,105,193                      ; punpckhwd     %xmm1,%xmm0
5075  DB  102,68,15,111,194                   ; movdqa        %xmm2,%xmm8
5076  DB  102,68,15,97,192                    ; punpcklwd     %xmm0,%xmm8
5077  DB  102,15,105,208                      ; punpckhwd     %xmm0,%xmm2
5078  DB  184,0,4,0,4                         ; mov           $0x4000400,%eax
5079  DB  102,15,110,192                      ; movd          %eax,%xmm0
5080  DB  102,15,112,216,0                    ; pshufd        $0x0,%xmm0,%xmm3
5081  DB  102,15,111,203                      ; movdqa        %xmm3,%xmm1
5082  DB  102,65,15,101,200                   ; pcmpgtw       %xmm8,%xmm1
5083  DB  102,65,15,223,200                   ; pandn         %xmm8,%xmm1
5084  DB  102,15,101,218                      ; pcmpgtw       %xmm2,%xmm3
5085  DB  102,15,223,218                      ; pandn         %xmm2,%xmm3
5086  DB  102,69,15,239,192                   ; pxor          %xmm8,%xmm8
5087  DB  102,15,111,193                      ; movdqa        %xmm1,%xmm0
5088  DB  102,65,15,97,192                    ; punpcklwd     %xmm8,%xmm0
5089  DB  102,15,114,240,13                   ; pslld         $0xd,%xmm0
5090  DB  184,0,0,128,119                     ; mov           $0x77800000,%eax
5091  DB  102,15,110,208                      ; movd          %eax,%xmm2
5092  DB  102,68,15,112,202,0                 ; pshufd        $0x0,%xmm2,%xmm9
5093  DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
5094  DB  102,65,15,105,200                   ; punpckhwd     %xmm8,%xmm1
5095  DB  102,15,114,241,13                   ; pslld         $0xd,%xmm1
5096  DB  65,15,89,201                        ; mulps         %xmm9,%xmm1
5097  DB  102,15,111,211                      ; movdqa        %xmm3,%xmm2
5098  DB  102,65,15,97,208                    ; punpcklwd     %xmm8,%xmm2
5099  DB  102,15,114,242,13                   ; pslld         $0xd,%xmm2
5100  DB  65,15,89,209                        ; mulps         %xmm9,%xmm2
5101  DB  102,65,15,105,216                   ; punpckhwd     %xmm8,%xmm3
5102  DB  102,15,114,243,13                   ; pslld         $0xd,%xmm3
5103  DB  65,15,89,217                        ; mulps         %xmm9,%xmm3
5104  DB  72,173                              ; lods          %ds:(%rsi),%rax
5105  DB  255,224                             ; jmpq          *%rax
5106
5107PUBLIC _sk_store_f16_sse2
5108_sk_store_f16_sse2 LABEL PROC
5109  DB  72,173                              ; lods          %ds:(%rsi),%rax
5110  DB  72,139,0                            ; mov           (%rax),%rax
5111  DB  185,0,0,128,7                       ; mov           $0x7800000,%ecx
5112  DB  102,68,15,110,193                   ; movd          %ecx,%xmm8
5113  DB  102,69,15,112,192,0                 ; pshufd        $0x0,%xmm8,%xmm8
5114  DB  102,69,15,111,200                   ; movdqa        %xmm8,%xmm9
5115  DB  68,15,89,200                        ; mulps         %xmm0,%xmm9
5116  DB  102,65,15,114,209,13                ; psrld         $0xd,%xmm9
5117  DB  102,69,15,111,208                   ; movdqa        %xmm8,%xmm10
5118  DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
5119  DB  102,65,15,114,210,13                ; psrld         $0xd,%xmm10
5120  DB  102,69,15,111,216                   ; movdqa        %xmm8,%xmm11
5121  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
5122  DB  102,65,15,114,211,13                ; psrld         $0xd,%xmm11
5123  DB  68,15,89,195                        ; mulps         %xmm3,%xmm8
5124  DB  102,65,15,114,208,13                ; psrld         $0xd,%xmm8
5125  DB  102,65,15,115,250,2                 ; pslldq        $0x2,%xmm10
5126  DB  102,69,15,235,209                   ; por           %xmm9,%xmm10
5127  DB  102,65,15,115,248,2                 ; pslldq        $0x2,%xmm8
5128  DB  102,69,15,235,195                   ; por           %xmm11,%xmm8
5129  DB  102,69,15,111,202                   ; movdqa        %xmm10,%xmm9
5130  DB  102,69,15,98,200                    ; punpckldq     %xmm8,%xmm9
5131  DB  243,68,15,127,12,248                ; movdqu        %xmm9,(%rax,%rdi,8)
5132  DB  102,69,15,106,208                   ; punpckhdq     %xmm8,%xmm10
5133  DB  243,68,15,127,84,248,16             ; movdqu        %xmm10,0x10(%rax,%rdi,8)
5134  DB  72,173                              ; lods          %ds:(%rsi),%rax
5135  DB  255,224                             ; jmpq          *%rax
5136
5137PUBLIC _sk_store_f32_sse2
5138_sk_store_f32_sse2 LABEL PROC
5139  DB  72,173                              ; lods          %ds:(%rsi),%rax
5140  DB  72,139,0                            ; mov           (%rax),%rax
5141  DB  72,137,249                          ; mov           %rdi,%rcx
5142  DB  72,193,225,4                        ; shl           $0x4,%rcx
5143  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
5144  DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
5145  DB  68,15,20,201                        ; unpcklps      %xmm1,%xmm9
5146  DB  68,15,40,210                        ; movaps        %xmm2,%xmm10
5147  DB  68,15,40,218                        ; movaps        %xmm2,%xmm11
5148  DB  68,15,20,219                        ; unpcklps      %xmm3,%xmm11
5149  DB  68,15,21,193                        ; unpckhps      %xmm1,%xmm8
5150  DB  68,15,21,211                        ; unpckhps      %xmm3,%xmm10
5151  DB  69,15,40,225                        ; movaps        %xmm9,%xmm12
5152  DB  102,69,15,20,227                    ; unpcklpd      %xmm11,%xmm12
5153  DB  69,15,18,217                        ; movhlps       %xmm9,%xmm11
5154  DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
5155  DB  102,69,15,20,202                    ; unpcklpd      %xmm10,%xmm9
5156  DB  69,15,18,208                        ; movhlps       %xmm8,%xmm10
5157  DB  102,68,15,17,36,8                   ; movupd        %xmm12,(%rax,%rcx,1)
5158  DB  68,15,17,92,8,16                    ; movups        %xmm11,0x10(%rax,%rcx,1)
5159  DB  102,68,15,17,76,8,32                ; movupd        %xmm9,0x20(%rax,%rcx,1)
5160  DB  68,15,17,84,8,48                    ; movups        %xmm10,0x30(%rax,%rcx,1)
5161  DB  72,173                              ; lods          %ds:(%rsi),%rax
5162  DB  255,224                             ; jmpq          *%rax
5163
5164PUBLIC _sk_clamp_x_sse2
5165_sk_clamp_x_sse2 LABEL PROC
5166  DB  72,173                              ; lods          %ds:(%rsi),%rax
5167  DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
5168  DB  68,15,95,192                        ; maxps         %xmm0,%xmm8
5169  DB  243,68,15,16,8                      ; movss         (%rax),%xmm9
5170  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
5171  DB  102,15,118,192                      ; pcmpeqd       %xmm0,%xmm0
5172  DB  102,65,15,254,193                   ; paddd         %xmm9,%xmm0
5173  DB  68,15,93,192                        ; minps         %xmm0,%xmm8
5174  DB  72,173                              ; lods          %ds:(%rsi),%rax
5175  DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
5176  DB  255,224                             ; jmpq          *%rax
5177
5178PUBLIC _sk_clamp_y_sse2
5179_sk_clamp_y_sse2 LABEL PROC
5180  DB  72,173                              ; lods          %ds:(%rsi),%rax
5181  DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
5182  DB  68,15,95,193                        ; maxps         %xmm1,%xmm8
5183  DB  243,68,15,16,8                      ; movss         (%rax),%xmm9
5184  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
5185  DB  102,15,118,201                      ; pcmpeqd       %xmm1,%xmm1
5186  DB  102,65,15,254,201                   ; paddd         %xmm9,%xmm1
5187  DB  68,15,93,193                        ; minps         %xmm1,%xmm8
5188  DB  72,173                              ; lods          %ds:(%rsi),%rax
5189  DB  65,15,40,200                        ; movaps        %xmm8,%xmm1
5190  DB  255,224                             ; jmpq          *%rax
5191
5192PUBLIC _sk_repeat_x_sse2
5193_sk_repeat_x_sse2 LABEL PROC
5194  DB  72,173                              ; lods          %ds:(%rsi),%rax
5195  DB  243,68,15,16,0                      ; movss         (%rax),%xmm8
5196  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
5197  DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
5198  DB  69,15,94,200                        ; divps         %xmm8,%xmm9
5199  DB  243,69,15,91,209                    ; cvttps2dq     %xmm9,%xmm10
5200  DB  69,15,91,210                        ; cvtdq2ps      %xmm10,%xmm10
5201  DB  69,15,194,202,1                     ; cmpltps       %xmm10,%xmm9
5202  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
5203  DB  102,68,15,110,216                   ; movd          %eax,%xmm11
5204  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
5205  DB  69,15,84,217                        ; andps         %xmm9,%xmm11
5206  DB  69,15,92,211                        ; subps         %xmm11,%xmm10
5207  DB  69,15,89,208                        ; mulps         %xmm8,%xmm10
5208  DB  65,15,92,194                        ; subps         %xmm10,%xmm0
5209  DB  102,69,15,118,201                   ; pcmpeqd       %xmm9,%xmm9
5210  DB  102,69,15,254,200                   ; paddd         %xmm8,%xmm9
5211  DB  65,15,93,193                        ; minps         %xmm9,%xmm0
5212  DB  72,173                              ; lods          %ds:(%rsi),%rax
5213  DB  255,224                             ; jmpq          *%rax
5214
5215PUBLIC _sk_repeat_y_sse2
5216_sk_repeat_y_sse2 LABEL PROC
5217  DB  72,173                              ; lods          %ds:(%rsi),%rax
5218  DB  243,68,15,16,0                      ; movss         (%rax),%xmm8
5219  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
5220  DB  68,15,40,201                        ; movaps        %xmm1,%xmm9
5221  DB  69,15,94,200                        ; divps         %xmm8,%xmm9
5222  DB  243,69,15,91,209                    ; cvttps2dq     %xmm9,%xmm10
5223  DB  69,15,91,210                        ; cvtdq2ps      %xmm10,%xmm10
5224  DB  69,15,194,202,1                     ; cmpltps       %xmm10,%xmm9
5225  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
5226  DB  102,68,15,110,216                   ; movd          %eax,%xmm11
5227  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
5228  DB  69,15,84,217                        ; andps         %xmm9,%xmm11
5229  DB  69,15,92,211                        ; subps         %xmm11,%xmm10
5230  DB  69,15,89,208                        ; mulps         %xmm8,%xmm10
5231  DB  65,15,92,202                        ; subps         %xmm10,%xmm1
5232  DB  102,69,15,118,201                   ; pcmpeqd       %xmm9,%xmm9
5233  DB  102,69,15,254,200                   ; paddd         %xmm8,%xmm9
5234  DB  65,15,93,201                        ; minps         %xmm9,%xmm1
5235  DB  72,173                              ; lods          %ds:(%rsi),%rax
5236  DB  255,224                             ; jmpq          *%rax
5237
5238PUBLIC _sk_mirror_x_sse2
5239_sk_mirror_x_sse2 LABEL PROC
5240  DB  72,173                              ; lods          %ds:(%rsi),%rax
5241  DB  243,68,15,16,8                      ; movss         (%rax),%xmm9
5242  DB  69,15,40,193                        ; movaps        %xmm9,%xmm8
5243  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
5244  DB  65,15,92,192                        ; subps         %xmm8,%xmm0
5245  DB  243,69,15,88,201                    ; addss         %xmm9,%xmm9
5246  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
5247  DB  68,15,40,208                        ; movaps        %xmm0,%xmm10
5248  DB  69,15,94,209                        ; divps         %xmm9,%xmm10
5249  DB  243,69,15,91,218                    ; cvttps2dq     %xmm10,%xmm11
5250  DB  69,15,91,219                        ; cvtdq2ps      %xmm11,%xmm11
5251  DB  69,15,194,211,1                     ; cmpltps       %xmm11,%xmm10
5252  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
5253  DB  102,68,15,110,224                   ; movd          %eax,%xmm12
5254  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
5255  DB  69,15,84,226                        ; andps         %xmm10,%xmm12
5256  DB  69,15,87,210                        ; xorps         %xmm10,%xmm10
5257  DB  69,15,92,220                        ; subps         %xmm12,%xmm11
5258  DB  69,15,89,217                        ; mulps         %xmm9,%xmm11
5259  DB  65,15,92,195                        ; subps         %xmm11,%xmm0
5260  DB  65,15,92,192                        ; subps         %xmm8,%xmm0
5261  DB  68,15,92,208                        ; subps         %xmm0,%xmm10
5262  DB  65,15,84,194                        ; andps         %xmm10,%xmm0
5263  DB  102,69,15,118,201                   ; pcmpeqd       %xmm9,%xmm9
5264  DB  102,69,15,254,200                   ; paddd         %xmm8,%xmm9
5265  DB  65,15,93,193                        ; minps         %xmm9,%xmm0
5266  DB  72,173                              ; lods          %ds:(%rsi),%rax
5267  DB  255,224                             ; jmpq          *%rax
5268
5269PUBLIC _sk_mirror_y_sse2
5270_sk_mirror_y_sse2 LABEL PROC
5271  DB  72,173                              ; lods          %ds:(%rsi),%rax
5272  DB  243,68,15,16,8                      ; movss         (%rax),%xmm9
5273  DB  69,15,40,193                        ; movaps        %xmm9,%xmm8
5274  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
5275  DB  65,15,92,200                        ; subps         %xmm8,%xmm1
5276  DB  243,69,15,88,201                    ; addss         %xmm9,%xmm9
5277  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
5278  DB  68,15,40,209                        ; movaps        %xmm1,%xmm10
5279  DB  69,15,94,209                        ; divps         %xmm9,%xmm10
5280  DB  243,69,15,91,218                    ; cvttps2dq     %xmm10,%xmm11
5281  DB  69,15,91,219                        ; cvtdq2ps      %xmm11,%xmm11
5282  DB  69,15,194,211,1                     ; cmpltps       %xmm11,%xmm10
5283  DB  184,0,0,128,63                      ; mov           $0x3f800000,%eax
5284  DB  102,68,15,110,224                   ; movd          %eax,%xmm12
5285  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
5286  DB  69,15,84,226                        ; andps         %xmm10,%xmm12
5287  DB  69,15,87,210                        ; xorps         %xmm10,%xmm10
5288  DB  69,15,92,220                        ; subps         %xmm12,%xmm11
5289  DB  69,15,89,217                        ; mulps         %xmm9,%xmm11
5290  DB  65,15,92,203                        ; subps         %xmm11,%xmm1
5291  DB  65,15,92,200                        ; subps         %xmm8,%xmm1
5292  DB  68,15,92,209                        ; subps         %xmm1,%xmm10
5293  DB  65,15,84,202                        ; andps         %xmm10,%xmm1
5294  DB  102,69,15,118,201                   ; pcmpeqd       %xmm9,%xmm9
5295  DB  102,69,15,254,200                   ; paddd         %xmm8,%xmm9
5296  DB  65,15,93,201                        ; minps         %xmm9,%xmm1
5297  DB  72,173                              ; lods          %ds:(%rsi),%rax
5298  DB  255,224                             ; jmpq          *%rax
5299
5300PUBLIC _sk_luminance_to_alpha_sse2
5301_sk_luminance_to_alpha_sse2 LABEL PROC
5302  DB  184,208,179,89,62                   ; mov           $0x3e59b3d0,%eax
5303  DB  102,15,110,216                      ; movd          %eax,%xmm3
5304  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
5305  DB  15,89,216                           ; mulps         %xmm0,%xmm3
5306  DB  184,89,23,55,63                     ; mov           $0x3f371759,%eax
5307  DB  102,15,110,192                      ; movd          %eax,%xmm0
5308  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
5309  DB  15,89,193                           ; mulps         %xmm1,%xmm0
5310  DB  15,88,195                           ; addps         %xmm3,%xmm0
5311  DB  184,152,221,147,61                  ; mov           $0x3d93dd98,%eax
5312  DB  102,15,110,216                      ; movd          %eax,%xmm3
5313  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
5314  DB  15,89,218                           ; mulps         %xmm2,%xmm3
5315  DB  15,88,216                           ; addps         %xmm0,%xmm3
5316  DB  72,173                              ; lods          %ds:(%rsi),%rax
5317  DB  15,87,192                           ; xorps         %xmm0,%xmm0
5318  DB  15,87,201                           ; xorps         %xmm1,%xmm1
5319  DB  15,87,210                           ; xorps         %xmm2,%xmm2
5320  DB  255,224                             ; jmpq          *%rax
5321
5322PUBLIC _sk_matrix_2x3_sse2
5323_sk_matrix_2x3_sse2 LABEL PROC
5324  DB  68,15,40,201                        ; movaps        %xmm1,%xmm9
5325  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
5326  DB  72,173                              ; lods          %ds:(%rsi),%rax
5327  DB  243,15,16,0                         ; movss         (%rax),%xmm0
5328  DB  243,15,16,72,4                      ; movss         0x4(%rax),%xmm1
5329  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
5330  DB  243,68,15,16,80,8                   ; movss         0x8(%rax),%xmm10
5331  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
5332  DB  243,68,15,16,88,16                  ; movss         0x10(%rax),%xmm11
5333  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
5334  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
5335  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
5336  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
5337  DB  65,15,88,194                        ; addps         %xmm10,%xmm0
5338  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
5339  DB  243,68,15,16,80,12                  ; movss         0xc(%rax),%xmm10
5340  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
5341  DB  243,68,15,16,88,20                  ; movss         0x14(%rax),%xmm11
5342  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
5343  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
5344  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
5345  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
5346  DB  65,15,88,202                        ; addps         %xmm10,%xmm1
5347  DB  72,173                              ; lods          %ds:(%rsi),%rax
5348  DB  255,224                             ; jmpq          *%rax
5349
5350PUBLIC _sk_matrix_3x4_sse2
5351_sk_matrix_3x4_sse2 LABEL PROC
5352  DB  68,15,40,201                        ; movaps        %xmm1,%xmm9
5353  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
5354  DB  72,173                              ; lods          %ds:(%rsi),%rax
5355  DB  243,15,16,0                         ; movss         (%rax),%xmm0
5356  DB  243,15,16,72,4                      ; movss         0x4(%rax),%xmm1
5357  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
5358  DB  243,68,15,16,80,12                  ; movss         0xc(%rax),%xmm10
5359  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
5360  DB  243,68,15,16,88,24                  ; movss         0x18(%rax),%xmm11
5361  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
5362  DB  243,68,15,16,96,36                  ; movss         0x24(%rax),%xmm12
5363  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
5364  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
5365  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
5366  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
5367  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
5368  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
5369  DB  65,15,88,194                        ; addps         %xmm10,%xmm0
5370  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
5371  DB  243,68,15,16,80,16                  ; movss         0x10(%rax),%xmm10
5372  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
5373  DB  243,68,15,16,88,28                  ; movss         0x1c(%rax),%xmm11
5374  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
5375  DB  243,68,15,16,96,40                  ; movss         0x28(%rax),%xmm12
5376  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
5377  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
5378  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
5379  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
5380  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
5381  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
5382  DB  65,15,88,202                        ; addps         %xmm10,%xmm1
5383  DB  243,68,15,16,80,8                   ; movss         0x8(%rax),%xmm10
5384  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
5385  DB  243,68,15,16,88,20                  ; movss         0x14(%rax),%xmm11
5386  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
5387  DB  243,68,15,16,96,32                  ; movss         0x20(%rax),%xmm12
5388  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
5389  DB  243,68,15,16,104,44                 ; movss         0x2c(%rax),%xmm13
5390  DB  69,15,198,237,0                     ; shufps        $0x0,%xmm13,%xmm13
5391  DB  68,15,89,226                        ; mulps         %xmm2,%xmm12
5392  DB  69,15,88,229                        ; addps         %xmm13,%xmm12
5393  DB  69,15,89,217                        ; mulps         %xmm9,%xmm11
5394  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
5395  DB  69,15,89,208                        ; mulps         %xmm8,%xmm10
5396  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
5397  DB  72,173                              ; lods          %ds:(%rsi),%rax
5398  DB  65,15,40,210                        ; movaps        %xmm10,%xmm2
5399  DB  255,224                             ; jmpq          *%rax
5400
5401PUBLIC _sk_matrix_4x5_sse2
5402_sk_matrix_4x5_sse2 LABEL PROC
5403  DB  68,15,40,201                        ; movaps        %xmm1,%xmm9
5404  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
5405  DB  72,173                              ; lods          %ds:(%rsi),%rax
5406  DB  243,15,16,0                         ; movss         (%rax),%xmm0
5407  DB  243,15,16,72,4                      ; movss         0x4(%rax),%xmm1
5408  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
5409  DB  243,68,15,16,80,16                  ; movss         0x10(%rax),%xmm10
5410  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
5411  DB  243,68,15,16,88,32                  ; movss         0x20(%rax),%xmm11
5412  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
5413  DB  243,68,15,16,96,48                  ; movss         0x30(%rax),%xmm12
5414  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
5415  DB  243,68,15,16,104,64                 ; movss         0x40(%rax),%xmm13
5416  DB  69,15,198,237,0                     ; shufps        $0x0,%xmm13,%xmm13
5417  DB  68,15,89,227                        ; mulps         %xmm3,%xmm12
5418  DB  69,15,88,229                        ; addps         %xmm13,%xmm12
5419  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
5420  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
5421  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
5422  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
5423  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
5424  DB  65,15,88,194                        ; addps         %xmm10,%xmm0
5425  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
5426  DB  243,68,15,16,80,20                  ; movss         0x14(%rax),%xmm10
5427  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
5428  DB  243,68,15,16,88,36                  ; movss         0x24(%rax),%xmm11
5429  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
5430  DB  243,68,15,16,96,52                  ; movss         0x34(%rax),%xmm12
5431  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
5432  DB  243,68,15,16,104,68                 ; movss         0x44(%rax),%xmm13
5433  DB  69,15,198,237,0                     ; shufps        $0x0,%xmm13,%xmm13
5434  DB  68,15,89,227                        ; mulps         %xmm3,%xmm12
5435  DB  69,15,88,229                        ; addps         %xmm13,%xmm12
5436  DB  68,15,89,218                        ; mulps         %xmm2,%xmm11
5437  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
5438  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
5439  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
5440  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
5441  DB  65,15,88,202                        ; addps         %xmm10,%xmm1
5442  DB  243,68,15,16,80,8                   ; movss         0x8(%rax),%xmm10
5443  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
5444  DB  243,68,15,16,88,24                  ; movss         0x18(%rax),%xmm11
5445  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
5446  DB  243,68,15,16,96,40                  ; movss         0x28(%rax),%xmm12
5447  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
5448  DB  243,68,15,16,104,56                 ; movss         0x38(%rax),%xmm13
5449  DB  69,15,198,237,0                     ; shufps        $0x0,%xmm13,%xmm13
5450  DB  243,68,15,16,112,72                 ; movss         0x48(%rax),%xmm14
5451  DB  69,15,198,246,0                     ; shufps        $0x0,%xmm14,%xmm14
5452  DB  68,15,89,235                        ; mulps         %xmm3,%xmm13
5453  DB  69,15,88,238                        ; addps         %xmm14,%xmm13
5454  DB  68,15,89,226                        ; mulps         %xmm2,%xmm12
5455  DB  69,15,88,229                        ; addps         %xmm13,%xmm12
5456  DB  69,15,89,217                        ; mulps         %xmm9,%xmm11
5457  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
5458  DB  69,15,89,208                        ; mulps         %xmm8,%xmm10
5459  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
5460  DB  243,68,15,16,88,12                  ; movss         0xc(%rax),%xmm11
5461  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
5462  DB  243,68,15,16,96,28                  ; movss         0x1c(%rax),%xmm12
5463  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
5464  DB  243,68,15,16,104,44                 ; movss         0x2c(%rax),%xmm13
5465  DB  69,15,198,237,0                     ; shufps        $0x0,%xmm13,%xmm13
5466  DB  243,68,15,16,112,60                 ; movss         0x3c(%rax),%xmm14
5467  DB  69,15,198,246,0                     ; shufps        $0x0,%xmm14,%xmm14
5468  DB  243,68,15,16,120,76                 ; movss         0x4c(%rax),%xmm15
5469  DB  69,15,198,255,0                     ; shufps        $0x0,%xmm15,%xmm15
5470  DB  68,15,89,243                        ; mulps         %xmm3,%xmm14
5471  DB  69,15,88,247                        ; addps         %xmm15,%xmm14
5472  DB  68,15,89,234                        ; mulps         %xmm2,%xmm13
5473  DB  69,15,88,238                        ; addps         %xmm14,%xmm13
5474  DB  69,15,89,225                        ; mulps         %xmm9,%xmm12
5475  DB  69,15,88,229                        ; addps         %xmm13,%xmm12
5476  DB  69,15,89,216                        ; mulps         %xmm8,%xmm11
5477  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
5478  DB  72,173                              ; lods          %ds:(%rsi),%rax
5479  DB  65,15,40,210                        ; movaps        %xmm10,%xmm2
5480  DB  65,15,40,219                        ; movaps        %xmm11,%xmm3
5481  DB  255,224                             ; jmpq          *%rax
5482
5483PUBLIC _sk_matrix_perspective_sse2
5484_sk_matrix_perspective_sse2 LABEL PROC
5485  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
5486  DB  72,173                              ; lods          %ds:(%rsi),%rax
5487  DB  243,15,16,0                         ; movss         (%rax),%xmm0
5488  DB  243,68,15,16,72,4                   ; movss         0x4(%rax),%xmm9
5489  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
5490  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
5491  DB  243,68,15,16,80,8                   ; movss         0x8(%rax),%xmm10
5492  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
5493  DB  68,15,89,201                        ; mulps         %xmm1,%xmm9
5494  DB  69,15,88,202                        ; addps         %xmm10,%xmm9
5495  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
5496  DB  65,15,88,193                        ; addps         %xmm9,%xmm0
5497  DB  243,68,15,16,72,12                  ; movss         0xc(%rax),%xmm9
5498  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
5499  DB  243,68,15,16,80,16                  ; movss         0x10(%rax),%xmm10
5500  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
5501  DB  243,68,15,16,88,20                  ; movss         0x14(%rax),%xmm11
5502  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
5503  DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
5504  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
5505  DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
5506  DB  69,15,88,202                        ; addps         %xmm10,%xmm9
5507  DB  243,68,15,16,80,24                  ; movss         0x18(%rax),%xmm10
5508  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
5509  DB  243,68,15,16,88,28                  ; movss         0x1c(%rax),%xmm11
5510  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
5511  DB  243,68,15,16,96,32                  ; movss         0x20(%rax),%xmm12
5512  DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
5513  DB  68,15,89,217                        ; mulps         %xmm1,%xmm11
5514  DB  69,15,88,220                        ; addps         %xmm12,%xmm11
5515  DB  69,15,89,208                        ; mulps         %xmm8,%xmm10
5516  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
5517  DB  65,15,83,202                        ; rcpps         %xmm10,%xmm1
5518  DB  15,89,193                           ; mulps         %xmm1,%xmm0
5519  DB  68,15,89,201                        ; mulps         %xmm1,%xmm9
5520  DB  72,173                              ; lods          %ds:(%rsi),%rax
5521  DB  65,15,40,201                        ; movaps        %xmm9,%xmm1
5522  DB  255,224                             ; jmpq          *%rax
5523
5524PUBLIC _sk_linear_gradient_2stops_sse2
5525_sk_linear_gradient_2stops_sse2 LABEL PROC
5526  DB  72,173                              ; lods          %ds:(%rsi),%rax
5527  DB  68,15,16,8                          ; movups        (%rax),%xmm9
5528  DB  15,16,88,16                         ; movups        0x10(%rax),%xmm3
5529  DB  68,15,40,195                        ; movaps        %xmm3,%xmm8
5530  DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
5531  DB  65,15,40,201                        ; movaps        %xmm9,%xmm1
5532  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
5533  DB  68,15,89,192                        ; mulps         %xmm0,%xmm8
5534  DB  68,15,88,193                        ; addps         %xmm1,%xmm8
5535  DB  15,40,203                           ; movaps        %xmm3,%xmm1
5536  DB  15,198,201,85                       ; shufps        $0x55,%xmm1,%xmm1
5537  DB  65,15,40,209                        ; movaps        %xmm9,%xmm2
5538  DB  15,198,210,85                       ; shufps        $0x55,%xmm2,%xmm2
5539  DB  15,89,200                           ; mulps         %xmm0,%xmm1
5540  DB  15,88,202                           ; addps         %xmm2,%xmm1
5541  DB  15,40,211                           ; movaps        %xmm3,%xmm2
5542  DB  15,198,210,170                      ; shufps        $0xaa,%xmm2,%xmm2
5543  DB  69,15,40,209                        ; movaps        %xmm9,%xmm10
5544  DB  69,15,198,210,170                   ; shufps        $0xaa,%xmm10,%xmm10
5545  DB  15,89,208                           ; mulps         %xmm0,%xmm2
5546  DB  65,15,88,210                        ; addps         %xmm10,%xmm2
5547  DB  15,198,219,255                      ; shufps        $0xff,%xmm3,%xmm3
5548  DB  69,15,198,201,255                   ; shufps        $0xff,%xmm9,%xmm9
5549  DB  15,89,216                           ; mulps         %xmm0,%xmm3
5550  DB  65,15,88,217                        ; addps         %xmm9,%xmm3
5551  DB  72,173                              ; lods          %ds:(%rsi),%rax
5552  DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
5553  DB  255,224                             ; jmpq          *%rax
5554ENDIF
5555END
5556