1.text
2
3.type	_mul_1x1,@function
4.align	16
5_mul_1x1:
6	subq	$128+8,%rsp
7	movq	$-1,%r9
8	leaq	(%rax,%rax,1),%rsi
9	shrq	$3,%r9
10	leaq	(,%rax,4),%rdi
11	andq	%rax,%r9
12	leaq	(,%rax,8),%r12
13	sarq	$63,%rax
14	leaq	(%r9,%r9,1),%r10
15	sarq	$63,%rsi
16	leaq	(,%r9,4),%r11
17	andq	%rbp,%rax
18	sarq	$63,%rdi
19	movq	%rax,%rdx
20	shlq	$63,%rax
21	andq	%rbp,%rsi
22	shrq	$1,%rdx
23	movq	%rsi,%rcx
24	shlq	$62,%rsi
25	andq	%rbp,%rdi
26	shrq	$2,%rcx
27	xorq	%rsi,%rax
28	movq	%rdi,%rbx
29	shlq	$61,%rdi
30	xorq	%rcx,%rdx
31	shrq	$3,%rbx
32	xorq	%rdi,%rax
33	xorq	%rbx,%rdx
34
35	movq	%r9,%r13
36	movq	$0,0(%rsp)
37	xorq	%r10,%r13
38	movq	%r9,8(%rsp)
39	movq	%r11,%r14
40	movq	%r10,16(%rsp)
41	xorq	%r12,%r14
42	movq	%r13,24(%rsp)
43
44	xorq	%r11,%r9
45	movq	%r11,32(%rsp)
46	xorq	%r11,%r10
47	movq	%r9,40(%rsp)
48	xorq	%r11,%r13
49	movq	%r10,48(%rsp)
50	xorq	%r14,%r9
51	movq	%r13,56(%rsp)
52	xorq	%r14,%r10
53
54	movq	%r12,64(%rsp)
55	xorq	%r14,%r13
56	movq	%r9,72(%rsp)
57	xorq	%r11,%r9
58	movq	%r10,80(%rsp)
59	xorq	%r11,%r10
60	movq	%r13,88(%rsp)
61
62	xorq	%r11,%r13
63	movq	%r14,96(%rsp)
64	movq	%r8,%rsi
65	movq	%r9,104(%rsp)
66	andq	%rbp,%rsi
67	movq	%r10,112(%rsp)
68	shrq	$4,%rbp
69	movq	%r13,120(%rsp)
70	movq	%r8,%rdi
71	andq	%rbp,%rdi
72	shrq	$4,%rbp
73
74	movq	(%rsp,%rsi,8),%xmm0
75	movq	%r8,%rsi
76	andq	%rbp,%rsi
77	shrq	$4,%rbp
78	movq	(%rsp,%rdi,8),%rcx
79	movq	%r8,%rdi
80	movq	%rcx,%rbx
81	shlq	$4,%rcx
82	andq	%rbp,%rdi
83	movq	(%rsp,%rsi,8),%xmm1
84	shrq	$60,%rbx
85	xorq	%rcx,%rax
86	pslldq	$1,%xmm1
87	movq	%r8,%rsi
88	shrq	$4,%rbp
89	xorq	%rbx,%rdx
90	andq	%rbp,%rsi
91	shrq	$4,%rbp
92	pxor	%xmm1,%xmm0
93	movq	(%rsp,%rdi,8),%rcx
94	movq	%r8,%rdi
95	movq	%rcx,%rbx
96	shlq	$12,%rcx
97	andq	%rbp,%rdi
98	movq	(%rsp,%rsi,8),%xmm1
99	shrq	$52,%rbx
100	xorq	%rcx,%rax
101	pslldq	$2,%xmm1
102	movq	%r8,%rsi
103	shrq	$4,%rbp
104	xorq	%rbx,%rdx
105	andq	%rbp,%rsi
106	shrq	$4,%rbp
107	pxor	%xmm1,%xmm0
108	movq	(%rsp,%rdi,8),%rcx
109	movq	%r8,%rdi
110	movq	%rcx,%rbx
111	shlq	$20,%rcx
112	andq	%rbp,%rdi
113	movq	(%rsp,%rsi,8),%xmm1
114	shrq	$44,%rbx
115	xorq	%rcx,%rax
116	pslldq	$3,%xmm1
117	movq	%r8,%rsi
118	shrq	$4,%rbp
119	xorq	%rbx,%rdx
120	andq	%rbp,%rsi
121	shrq	$4,%rbp
122	pxor	%xmm1,%xmm0
123	movq	(%rsp,%rdi,8),%rcx
124	movq	%r8,%rdi
125	movq	%rcx,%rbx
126	shlq	$28,%rcx
127	andq	%rbp,%rdi
128	movq	(%rsp,%rsi,8),%xmm1
129	shrq	$36,%rbx
130	xorq	%rcx,%rax
131	pslldq	$4,%xmm1
132	movq	%r8,%rsi
133	shrq	$4,%rbp
134	xorq	%rbx,%rdx
135	andq	%rbp,%rsi
136	shrq	$4,%rbp
137	pxor	%xmm1,%xmm0
138	movq	(%rsp,%rdi,8),%rcx
139	movq	%r8,%rdi
140	movq	%rcx,%rbx
141	shlq	$36,%rcx
142	andq	%rbp,%rdi
143	movq	(%rsp,%rsi,8),%xmm1
144	shrq	$28,%rbx
145	xorq	%rcx,%rax
146	pslldq	$5,%xmm1
147	movq	%r8,%rsi
148	shrq	$4,%rbp
149	xorq	%rbx,%rdx
150	andq	%rbp,%rsi
151	shrq	$4,%rbp
152	pxor	%xmm1,%xmm0
153	movq	(%rsp,%rdi,8),%rcx
154	movq	%r8,%rdi
155	movq	%rcx,%rbx
156	shlq	$44,%rcx
157	andq	%rbp,%rdi
158	movq	(%rsp,%rsi,8),%xmm1
159	shrq	$20,%rbx
160	xorq	%rcx,%rax
161	pslldq	$6,%xmm1
162	movq	%r8,%rsi
163	shrq	$4,%rbp
164	xorq	%rbx,%rdx
165	andq	%rbp,%rsi
166	shrq	$4,%rbp
167	pxor	%xmm1,%xmm0
168	movq	(%rsp,%rdi,8),%rcx
169	movq	%r8,%rdi
170	movq	%rcx,%rbx
171	shlq	$52,%rcx
172	andq	%rbp,%rdi
173	movq	(%rsp,%rsi,8),%xmm1
174	shrq	$12,%rbx
175	xorq	%rcx,%rax
176	pslldq	$7,%xmm1
177	movq	%r8,%rsi
178	shrq	$4,%rbp
179	xorq	%rbx,%rdx
180	andq	%rbp,%rsi
181	shrq	$4,%rbp
182	pxor	%xmm1,%xmm0
183	movq	(%rsp,%rdi,8),%rcx
184	movq	%rcx,%rbx
185	shlq	$60,%rcx
186.byte	102,72,15,126,198
187	shrq	$4,%rbx
188	xorq	%rcx,%rax
189	psrldq	$8,%xmm0
190	xorq	%rbx,%rdx
191.byte	102,72,15,126,199
192	xorq	%rsi,%rax
193	xorq	%rdi,%rdx
194
195	addq	$128+8,%rsp
196	.byte	0xf3,0xc3
197.Lend_mul_1x1:
198.size	_mul_1x1,.-_mul_1x1
199
200.globl	bn_GF2m_mul_2x2
201.type	bn_GF2m_mul_2x2,@function
202.align	16
203bn_GF2m_mul_2x2:
204	movq	OPENSSL_ia32cap_P(%rip),%rax
205	btq	$33,%rax
206	jnc	.Lvanilla_mul_2x2
207
208.byte	102,72,15,110,198
209.byte	102,72,15,110,201
210.byte	102,72,15,110,210
211.byte	102,73,15,110,216
212	movdqa	%xmm0,%xmm4
213	movdqa	%xmm1,%xmm5
214.byte	102,15,58,68,193,0
215	pxor	%xmm2,%xmm4
216	pxor	%xmm3,%xmm5
217.byte	102,15,58,68,211,0
218.byte	102,15,58,68,229,0
219	xorps	%xmm0,%xmm4
220	xorps	%xmm2,%xmm4
221	movdqa	%xmm4,%xmm5
222	pslldq	$8,%xmm4
223	psrldq	$8,%xmm5
224	pxor	%xmm4,%xmm2
225	pxor	%xmm5,%xmm0
226	movdqu	%xmm2,0(%rdi)
227	movdqu	%xmm0,16(%rdi)
228	.byte	0xf3,0xc3
229
230.align	16
231.Lvanilla_mul_2x2:
232	leaq	-136(%rsp),%rsp
233	movq	%r14,80(%rsp)
234	movq	%r13,88(%rsp)
235	movq	%r12,96(%rsp)
236	movq	%rbp,104(%rsp)
237	movq	%rbx,112(%rsp)
238.Lbody_mul_2x2:
239	movq	%rdi,32(%rsp)
240	movq	%rsi,40(%rsp)
241	movq	%rdx,48(%rsp)
242	movq	%rcx,56(%rsp)
243	movq	%r8,64(%rsp)
244
245	movq	$15,%r8
246	movq	%rsi,%rax
247	movq	%rcx,%rbp
248	call	_mul_1x1
249	movq	%rax,16(%rsp)
250	movq	%rdx,24(%rsp)
251
252	movq	48(%rsp),%rax
253	movq	64(%rsp),%rbp
254	call	_mul_1x1
255	movq	%rax,0(%rsp)
256	movq	%rdx,8(%rsp)
257
258	movq	40(%rsp),%rax
259	movq	56(%rsp),%rbp
260	xorq	48(%rsp),%rax
261	xorq	64(%rsp),%rbp
262	call	_mul_1x1
263	movq	0(%rsp),%rbx
264	movq	8(%rsp),%rcx
265	movq	16(%rsp),%rdi
266	movq	24(%rsp),%rsi
267	movq	32(%rsp),%rbp
268
269	xorq	%rdx,%rax
270	xorq	%rcx,%rdx
271	xorq	%rbx,%rax
272	movq	%rbx,0(%rbp)
273	xorq	%rdi,%rdx
274	movq	%rsi,24(%rbp)
275	xorq	%rsi,%rax
276	xorq	%rsi,%rdx
277	xorq	%rdx,%rax
278	movq	%rdx,16(%rbp)
279	movq	%rax,8(%rbp)
280
281	movq	80(%rsp),%r14
282	movq	88(%rsp),%r13
283	movq	96(%rsp),%r12
284	movq	104(%rsp),%rbp
285	movq	112(%rsp),%rbx
286	leaq	136(%rsp),%rsp
287	.byte	0xf3,0xc3
288.Lend_mul_2x2:
289.size	bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
290.byte	71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
291.align	16
292