1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// +build !math_big_pure_go
6
7#include "textflag.h"
8
9// This file provides fast assembly versions for the elementary
10// arithmetic operations on vectors implemented in arith.go.
11
12// func mulWW(x, y Word) (z1, z0 Word)
13TEXT ·mulWW(SB),NOSPLIT,$0
14	MOVL x+0(FP), AX
15	MULL y+4(FP)
16	MOVL DX, z1+8(FP)
17	MOVL AX, z0+12(FP)
18	RET
19
20
21// func divWW(x1, x0, y Word) (q, r Word)
22TEXT ·divWW(SB),NOSPLIT,$0
23	MOVL x1+0(FP), DX
24	MOVL x0+4(FP), AX
25	DIVL y+8(FP)
26	MOVL AX, q+12(FP)
27	MOVL DX, r+16(FP)
28	RET
29
30
31// func addVV(z, x, y []Word) (c Word)
32TEXT ·addVV(SB),NOSPLIT,$0
33	MOVL z+0(FP), DI
34	MOVL x+12(FP), SI
35	MOVL y+24(FP), CX
36	MOVL z_len+4(FP), BP
37	MOVL $0, BX		// i = 0
38	MOVL $0, DX		// c = 0
39	JMP E1
40
41L1:	MOVL (SI)(BX*4), AX
42	ADDL DX, DX		// restore CF
43	ADCL (CX)(BX*4), AX
44	SBBL DX, DX		// save CF
45	MOVL AX, (DI)(BX*4)
46	ADDL $1, BX		// i++
47
48E1:	CMPL BX, BP		// i < n
49	JL L1
50
51	NEGL DX
52	MOVL DX, c+36(FP)
53	RET
54
55
56// func subVV(z, x, y []Word) (c Word)
57// (same as addVV except for SBBL instead of ADCL and label names)
58TEXT ·subVV(SB),NOSPLIT,$0
59	MOVL z+0(FP), DI
60	MOVL x+12(FP), SI
61	MOVL y+24(FP), CX
62	MOVL z_len+4(FP), BP
63	MOVL $0, BX		// i = 0
64	MOVL $0, DX		// c = 0
65	JMP E2
66
67L2:	MOVL (SI)(BX*4), AX
68	ADDL DX, DX		// restore CF
69	SBBL (CX)(BX*4), AX
70	SBBL DX, DX		// save CF
71	MOVL AX, (DI)(BX*4)
72	ADDL $1, BX		// i++
73
74E2:	CMPL BX, BP		// i < n
75	JL L2
76
77	NEGL DX
78	MOVL DX, c+36(FP)
79	RET
80
81
82// func addVW(z, x []Word, y Word) (c Word)
83TEXT ·addVW(SB),NOSPLIT,$0
84	MOVL z+0(FP), DI
85	MOVL x+12(FP), SI
86	MOVL y+24(FP), AX	// c = y
87	MOVL z_len+4(FP), BP
88	MOVL $0, BX		// i = 0
89	JMP E3
90
91L3:	ADDL (SI)(BX*4), AX
92	MOVL AX, (DI)(BX*4)
93	SBBL AX, AX		// save CF
94	NEGL AX
95	ADDL $1, BX		// i++
96
97E3:	CMPL BX, BP		// i < n
98	JL L3
99
100	MOVL AX, c+28(FP)
101	RET
102
103
104// func subVW(z, x []Word, y Word) (c Word)
105TEXT ·subVW(SB),NOSPLIT,$0
106	MOVL z+0(FP), DI
107	MOVL x+12(FP), SI
108	MOVL y+24(FP), AX	// c = y
109	MOVL z_len+4(FP), BP
110	MOVL $0, BX		// i = 0
111	JMP E4
112
113L4:	MOVL (SI)(BX*4), DX
114	SUBL AX, DX
115	MOVL DX, (DI)(BX*4)
116	SBBL AX, AX		// save CF
117	NEGL AX
118	ADDL $1, BX		// i++
119
120E4:	CMPL BX, BP		// i < n
121	JL L4
122
123	MOVL AX, c+28(FP)
124	RET
125
126
127// func shlVU(z, x []Word, s uint) (c Word)
128TEXT ·shlVU(SB),NOSPLIT,$0
129	MOVL z_len+4(FP), BX	// i = z
130	SUBL $1, BX		// i--
131	JL X8b			// i < 0	(n <= 0)
132
133	// n > 0
134	MOVL z+0(FP), DI
135	MOVL x+12(FP), SI
136	MOVL s+24(FP), CX
137	MOVL (SI)(BX*4), AX	// w1 = x[n-1]
138	MOVL $0, DX
139	SHLL CX, DX:AX		// w1>>ŝ
140	MOVL DX, c+28(FP)
141
142	CMPL BX, $0
143	JLE X8a			// i <= 0
144
145	// i > 0
146L8:	MOVL AX, DX		// w = w1
147	MOVL -4(SI)(BX*4), AX	// w1 = x[i-1]
148	SHLL CX, DX:AX		// w<<s | w1>>ŝ
149	MOVL DX, (DI)(BX*4)	// z[i] = w<<s | w1>>ŝ
150	SUBL $1, BX		// i--
151	JG L8			// i > 0
152
153	// i <= 0
154X8a:	SHLL CX, AX		// w1<<s
155	MOVL AX, (DI)		// z[0] = w1<<s
156	RET
157
158X8b:	MOVL $0, c+28(FP)
159	RET
160
161
162// func shrVU(z, x []Word, s uint) (c Word)
163TEXT ·shrVU(SB),NOSPLIT,$0
164	MOVL z_len+4(FP), BP
165	SUBL $1, BP		// n--
166	JL X9b			// n < 0	(n <= 0)
167
168	// n > 0
169	MOVL z+0(FP), DI
170	MOVL x+12(FP), SI
171	MOVL s+24(FP), CX
172	MOVL (SI), AX		// w1 = x[0]
173	MOVL $0, DX
174	SHRL CX, DX:AX		// w1<<ŝ
175	MOVL DX, c+28(FP)
176
177	MOVL $0, BX		// i = 0
178	JMP E9
179
180	// i < n-1
181L9:	MOVL AX, DX		// w = w1
182	MOVL 4(SI)(BX*4), AX	// w1 = x[i+1]
183	SHRL CX, DX:AX		// w>>s | w1<<ŝ
184	MOVL DX, (DI)(BX*4)	// z[i] = w>>s | w1<<ŝ
185	ADDL $1, BX		// i++
186
187E9:	CMPL BX, BP
188	JL L9			// i < n-1
189
190	// i >= n-1
191X9a:	SHRL CX, AX		// w1>>s
192	MOVL AX, (DI)(BP*4)	// z[n-1] = w1>>s
193	RET
194
195X9b:	MOVL $0, c+28(FP)
196	RET
197
198
199// func mulAddVWW(z, x []Word, y, r Word) (c Word)
200TEXT ·mulAddVWW(SB),NOSPLIT,$0
201	MOVL z+0(FP), DI
202	MOVL x+12(FP), SI
203	MOVL y+24(FP), BP
204	MOVL r+28(FP), CX	// c = r
205	MOVL z_len+4(FP), BX
206	LEAL (DI)(BX*4), DI
207	LEAL (SI)(BX*4), SI
208	NEGL BX			// i = -n
209	JMP E5
210
211L5:	MOVL (SI)(BX*4), AX
212	MULL BP
213	ADDL CX, AX
214	ADCL $0, DX
215	MOVL AX, (DI)(BX*4)
216	MOVL DX, CX
217	ADDL $1, BX		// i++
218
219E5:	CMPL BX, $0		// i < 0
220	JL L5
221
222	MOVL CX, c+32(FP)
223	RET
224
225
226// func addMulVVW(z, x []Word, y Word) (c Word)
227TEXT ·addMulVVW(SB),NOSPLIT,$0
228	MOVL z+0(FP), DI
229	MOVL x+12(FP), SI
230	MOVL y+24(FP), BP
231	MOVL z_len+4(FP), BX
232	LEAL (DI)(BX*4), DI
233	LEAL (SI)(BX*4), SI
234	NEGL BX			// i = -n
235	MOVL $0, CX		// c = 0
236	JMP E6
237
238L6:	MOVL (SI)(BX*4), AX
239	MULL BP
240	ADDL CX, AX
241	ADCL $0, DX
242	ADDL AX, (DI)(BX*4)
243	ADCL $0, DX
244	MOVL DX, CX
245	ADDL $1, BX		// i++
246
247E6:	CMPL BX, $0		// i < 0
248	JL L6
249
250	MOVL CX, c+28(FP)
251	RET
252
253
254// func divWVW(z* Word, xn Word, x []Word, y Word) (r Word)
255TEXT ·divWVW(SB),NOSPLIT,$0
256	MOVL z+0(FP), DI
257	MOVL xn+12(FP), DX	// r = xn
258	MOVL x+16(FP), SI
259	MOVL y+28(FP), CX
260	MOVL z_len+4(FP), BX	// i = z
261	JMP E7
262
263L7:	MOVL (SI)(BX*4), AX
264	DIVL CX
265	MOVL AX, (DI)(BX*4)
266
267E7:	SUBL $1, BX		// i--
268	JGE L7			// i >= 0
269
270	MOVL DX, r+32(FP)
271	RET
272
273// func bitLen(x Word) (n int)
274TEXT ·bitLen(SB),NOSPLIT,$0
275	BSRL x+0(FP), AX
276	JZ Z1
277	INCL AX
278	MOVL AX, n+4(FP)
279	RET
280
281Z1:	MOVL $0, n+4(FP)
282	RET
283