1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// +build !math_big_pure_go
6
7#include "textflag.h"
8
9// This file provides fast assembly versions for the elementary
10// arithmetic operations on vectors implemented in arith.go.
11
12// func addVV(z, x, y []Word) (c Word)
13TEXT ·addVV(SB),NOSPLIT,$0
14	ADD.S	$0, R0		// clear carry flag
15	MOVW	z+0(FP), R1
16	MOVW	z_len+4(FP), R4
17	MOVW	x+12(FP), R2
18	MOVW	y+24(FP), R3
19	ADD	R4<<2, R1, R4
20	B E1
21L1:
22	MOVW.P	4(R2), R5
23	MOVW.P	4(R3), R6
24	ADC.S	R6, R5
25	MOVW.P	R5, 4(R1)
26E1:
27	TEQ	R1, R4
28	BNE L1
29
30	MOVW	$0, R0
31	MOVW.CS	$1, R0
32	MOVW	R0, c+36(FP)
33	RET
34
35
36// func subVV(z, x, y []Word) (c Word)
37// (same as addVV except for SBC instead of ADC and label names)
38TEXT ·subVV(SB),NOSPLIT,$0
39	SUB.S	$0, R0		// clear borrow flag
40	MOVW	z+0(FP), R1
41	MOVW	z_len+4(FP), R4
42	MOVW	x+12(FP), R2
43	MOVW	y+24(FP), R3
44	ADD	R4<<2, R1, R4
45	B E2
46L2:
47	MOVW.P	4(R2), R5
48	MOVW.P	4(R3), R6
49	SBC.S	R6, R5
50	MOVW.P	R5, 4(R1)
51E2:
52	TEQ	R1, R4
53	BNE L2
54
55	MOVW	$0, R0
56	MOVW.CC	$1, R0
57	MOVW	R0, c+36(FP)
58	RET
59
60
61// func addVW(z, x []Word, y Word) (c Word)
62TEXT ·addVW(SB),NOSPLIT,$0
63	MOVW	z+0(FP), R1
64	MOVW	z_len+4(FP), R4
65	MOVW	x+12(FP), R2
66	MOVW	y+24(FP), R3
67	ADD	R4<<2, R1, R4
68	TEQ	R1, R4
69	BNE L3a
70	MOVW	R3, c+28(FP)
71	RET
72L3a:
73	MOVW.P	4(R2), R5
74	ADD.S	R3, R5
75	MOVW.P	R5, 4(R1)
76	B	E3
77L3:
78	MOVW.P	4(R2), R5
79	ADC.S	$0, R5
80	MOVW.P	R5, 4(R1)
81E3:
82	TEQ	R1, R4
83	BNE	L3
84
85	MOVW	$0, R0
86	MOVW.CS	$1, R0
87	MOVW	R0, c+28(FP)
88	RET
89
90
91// func subVW(z, x []Word, y Word) (c Word)
92TEXT ·subVW(SB),NOSPLIT,$0
93	MOVW	z+0(FP), R1
94	MOVW	z_len+4(FP), R4
95	MOVW	x+12(FP), R2
96	MOVW	y+24(FP), R3
97	ADD	R4<<2, R1, R4
98	TEQ	R1, R4
99	BNE L4a
100	MOVW	R3, c+28(FP)
101	RET
102L4a:
103	MOVW.P	4(R2), R5
104	SUB.S	R3, R5
105	MOVW.P	R5, 4(R1)
106	B	E4
107L4:
108	MOVW.P	4(R2), R5
109	SBC.S	$0, R5
110	MOVW.P	R5, 4(R1)
111E4:
112	TEQ	R1, R4
113	BNE	L4
114
115	MOVW	$0, R0
116	MOVW.CC	$1, R0
117	MOVW	R0, c+28(FP)
118	RET
119
120
121// func shlVU(z, x []Word, s uint) (c Word)
122TEXT ·shlVU(SB),NOSPLIT,$0
123	MOVW	z_len+4(FP), R5
124	TEQ	$0, R5
125	BEQ	X7
126
127	MOVW	z+0(FP), R1
128	MOVW	x+12(FP), R2
129	ADD	R5<<2, R2, R2
130	ADD	R5<<2, R1, R5
131	MOVW	s+24(FP), R3
132	TEQ	$0, R3	// shift 0 is special
133	BEQ	Y7
134	ADD	$4, R1	// stop one word early
135	MOVW	$32, R4
136	SUB	R3, R4
137	MOVW	$0, R7
138
139	MOVW.W	-4(R2), R6
140	MOVW	R6<<R3, R7
141	MOVW	R6>>R4, R6
142	MOVW	R6, c+28(FP)
143	B E7
144
145L7:
146	MOVW.W	-4(R2), R6
147	ORR	R6>>R4, R7
148	MOVW.W	R7, -4(R5)
149	MOVW	R6<<R3, R7
150E7:
151	TEQ	R1, R5
152	BNE	L7
153
154	MOVW	R7, -4(R5)
155	RET
156
157Y7:	// copy loop, because shift 0 == shift 32
158	MOVW.W	-4(R2), R6
159	MOVW.W	R6, -4(R5)
160	TEQ	R1, R5
161	BNE Y7
162
163X7:
164	MOVW	$0, R1
165	MOVW	R1, c+28(FP)
166	RET
167
168
169// func shrVU(z, x []Word, s uint) (c Word)
170TEXT ·shrVU(SB),NOSPLIT,$0
171	MOVW	z_len+4(FP), R5
172	TEQ	$0, R5
173	BEQ	X6
174
175	MOVW	z+0(FP), R1
176	MOVW	x+12(FP), R2
177	ADD	R5<<2, R1, R5
178	MOVW	s+24(FP), R3
179	TEQ	$0, R3	// shift 0 is special
180	BEQ Y6
181	SUB	$4, R5	// stop one word early
182	MOVW	$32, R4
183	SUB	R3, R4
184	MOVW	$0, R7
185
186	// first word
187	MOVW.P	4(R2), R6
188	MOVW	R6>>R3, R7
189	MOVW	R6<<R4, R6
190	MOVW	R6, c+28(FP)
191	B E6
192
193	// word loop
194L6:
195	MOVW.P	4(R2), R6
196	ORR	R6<<R4, R7
197	MOVW.P	R7, 4(R1)
198	MOVW	R6>>R3, R7
199E6:
200	TEQ	R1, R5
201	BNE	L6
202
203	MOVW	R7, 0(R1)
204	RET
205
206Y6:	// copy loop, because shift 0 == shift 32
207	MOVW.P	4(R2), R6
208	MOVW.P	R6, 4(R1)
209	TEQ R1, R5
210	BNE Y6
211
212X6:
213	MOVW	$0, R1
214	MOVW	R1, c+28(FP)
215	RET
216
217
218// func mulAddVWW(z, x []Word, y, r Word) (c Word)
219TEXT ·mulAddVWW(SB),NOSPLIT,$0
220	MOVW	$0, R0
221	MOVW	z+0(FP), R1
222	MOVW	z_len+4(FP), R5
223	MOVW	x+12(FP), R2
224	MOVW	y+24(FP), R3
225	MOVW	r+28(FP), R4
226	ADD	R5<<2, R1, R5
227	B E8
228
229	// word loop
230L8:
231	MOVW.P	4(R2), R6
232	MULLU	R6, R3, (R7, R6)
233	ADD.S	R4, R6
234	ADC	R0, R7
235	MOVW.P	R6, 4(R1)
236	MOVW	R7, R4
237E8:
238	TEQ	R1, R5
239	BNE	L8
240
241	MOVW	R4, c+32(FP)
242	RET
243
244
245// func addMulVVW(z, x []Word, y Word) (c Word)
246TEXT ·addMulVVW(SB),NOSPLIT,$0
247	MOVW	$0, R0
248	MOVW	z+0(FP), R1
249	MOVW	z_len+4(FP), R5
250	MOVW	x+12(FP), R2
251	MOVW	y+24(FP), R3
252	ADD	R5<<2, R1, R5
253	MOVW	$0, R4
254	B E9
255
256	// word loop
257L9:
258	MOVW.P	4(R2), R6
259	MULLU	R6, R3, (R7, R6)
260	ADD.S	R4, R6
261	ADC	R0, R7
262	MOVW	0(R1), R4
263	ADD.S	R4, R6
264	ADC	R0, R7
265	MOVW.P	R6, 4(R1)
266	MOVW	R7, R4
267E9:
268	TEQ	R1, R5
269	BNE	L9
270
271	MOVW	R4, c+28(FP)
272	RET
273
274
275// func divWVW(z* Word, xn Word, x []Word, y Word) (r Word)
276TEXT ·divWVW(SB),NOSPLIT,$0
277	// ARM has no multiword division, so use portable code.
278	B ·divWVW_g(SB)
279
280
281// func divWW(x1, x0, y Word) (q, r Word)
282TEXT ·divWW(SB),NOSPLIT,$0
283	// ARM has no multiword division, so use portable code.
284	B ·divWW_g(SB)
285
286
287// func mulWW(x, y Word) (z1, z0 Word)
288TEXT ·mulWW(SB),NOSPLIT,$0
289	MOVW	x+0(FP), R1
290	MOVW	y+4(FP), R2
291	MULLU	R1, R2, (R4, R3)
292	MOVW	R4, z1+8(FP)
293	MOVW	R3, z0+12(FP)
294	RET
295