1/*
2Copyright (C) 1996-1997 Id Software, Inc.
3
4This program is free software; you can redistribute it and/or
5modify it under the terms of the GNU General Public License
6as published by the Free Software Foundation; either version 2
7of the License, or (at your option) any later version.
8
9This program is distributed in the hope that it will be useful,
10but WITHOUT ANY WARRANTY; without even the implied warranty of
11MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12
13See the GNU General Public License for more details.
14
15You should have received a copy of the GNU General Public License
16along with this program; if not, write to the Free Software
17Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
18
19*/
20//
21// math.s
22// x86 assembly-language math routines.
23
24#include "asm_i386.h"
25#include "quakeasm.h"
26
27
28#if	id386
29
30	.data
31
32	.align	4
33Ljmptab:	.long	Lcase0, Lcase1, Lcase2, Lcase3
34			.long	Lcase4, Lcase5, Lcase6, Lcase7
35
36	.text
37
38// TODO: rounding needed?
39// stack parameter offset
40#define	val	4
41
42.globl C(Invert24To16)
43C(Invert24To16):
44
45	movl	val(%esp),%ecx
46	movl	$0x100,%edx		// 0x10000000000 as dividend
47	cmpl	%edx,%ecx
48	jle		LOutOfRange
49
50	subl	%eax,%eax
51	divl	%ecx
52
53	ret
54
55LOutOfRange:
56	movl	$0xFFFFFFFF,%eax
57	ret
58
59#define	in	4
60#define out	8
61
62	.align 2
63.globl C(TransformVector)
64C(TransformVector):
65	movl	in(%esp),%eax
66	movl	out(%esp),%edx
67
68	flds	(%eax)		// in[0]
69	fmuls	C(vright)		// in[0]*vright[0]
70	flds	(%eax)		// in[0] | in[0]*vright[0]
71	fmuls	C(vup)		// in[0]*vup[0] | in[0]*vright[0]
72	flds	(%eax)		// in[0] | in[0]*vup[0] | in[0]*vright[0]
73	fmuls	C(vpn)		// in[0]*vpn[0] | in[0]*vup[0] | in[0]*vright[0]
74
75	flds	4(%eax)		// in[1] | ...
76	fmuls	C(vright)+4	// in[1]*vright[1] | ...
77	flds	4(%eax)		// in[1] | in[1]*vright[1] | ...
78	fmuls	C(vup)+4		// in[1]*vup[1] | in[1]*vright[1] | ...
79	flds	4(%eax)		// in[1] | in[1]*vup[1] | in[1]*vright[1] | ...
80	fmuls	C(vpn)+4		// in[1]*vpn[1] | in[1]*vup[1] | in[1]*vright[1] | ...
81	fxch	%st(2)		// in[1]*vright[1] | in[1]*vup[1] | in[1]*vpn[1] | ...
82
83	faddp	%st(0),%st(5)	// in[1]*vup[1] | in[1]*vpn[1] | ...
84	faddp	%st(0),%st(3)	// in[1]*vpn[1] | ...
85	faddp	%st(0),%st(1)	// vpn_accum | vup_accum | vright_accum
86
87	flds	8(%eax)		// in[2] | ...
88	fmuls	C(vright)+8	// in[2]*vright[2] | ...
89	flds	8(%eax)		// in[2] | in[2]*vright[2] | ...
90	fmuls	C(vup)+8		// in[2]*vup[2] | in[2]*vright[2] | ...
91	flds	8(%eax)		// in[2] | in[2]*vup[2] | in[2]*vright[2] | ...
92	fmuls	C(vpn)+8		// in[2]*vpn[2] | in[2]*vup[2] | in[2]*vright[2] | ...
93	fxch	%st(2)		// in[2]*vright[2] | in[2]*vup[2] | in[2]*vpn[2] | ...
94
95	faddp	%st(0),%st(5)	// in[2]*vup[2] | in[2]*vpn[2] | ...
96	faddp	%st(0),%st(3)	// in[2]*vpn[2] | ...
97	faddp	%st(0),%st(1)	// vpn_accum | vup_accum | vright_accum
98
99	fstps	8(%edx)		// out[2]
100	fstps	4(%edx)		// out[1]
101	fstps	(%edx)		// out[0]
102
103	ret
104
105
106#define EMINS	4+4
107#define EMAXS	4+8
108#define P		4+12
109
110	.align 2
111.globl C(BoxOnPlaneSide)
112C(BoxOnPlaneSide):
113	pushl	%ebx
114
115	movl	P(%esp),%edx
116	movl	EMINS(%esp),%ecx
117	xorl	%eax,%eax
118	movl	EMAXS(%esp),%ebx
119	movb	pl_signbits(%edx),%al
120	cmpb	$8,%al
121	jge		Lerror
122	flds	pl_normal(%edx)		// p->normal[0]
123	fld		%st(0)				// p->normal[0] | p->normal[0]
124	jmp		Ljmptab(,%eax,4)
125
126
127//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
128//dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
129Lcase0:
130	fmuls	(%ebx)				// p->normal[0]*emaxs[0] | p->normal[0]
131	flds	pl_normal+4(%edx)	// p->normal[1] | p->normal[0]*emaxs[0] |
132								//  p->normal[0]
133	fxch	%st(2)				// p->normal[0] | p->normal[0]*emaxs[0] |
134								//  p->normal[1]
135	fmuls	(%ecx)				// p->normal[0]*emins[0] |
136								//  p->normal[0]*emaxs[0] | p->normal[1]
137	fxch	%st(2)				// p->normal[1] | p->normal[0]*emaxs[0] |
138								//  p->normal[0]*emins[0]
139	fld		%st(0)				// p->normal[1] | p->normal[1] |
140								//  p->normal[0]*emaxs[0] |
141								//  p->normal[0]*emins[0]
142	fmuls	4(%ebx)				// p->normal[1]*emaxs[1] | p->normal[1] |
143								//  p->normal[0]*emaxs[0] |
144								//  p->normal[0]*emins[0]
145	flds	pl_normal+8(%edx)	// p->normal[2] | p->normal[1]*emaxs[1] |
146								//  p->normal[1] | p->normal[0]*emaxs[0] |
147								//  p->normal[0]*emins[0]
148	fxch	%st(2)				// p->normal[1] | p->normal[1]*emaxs[1] |
149								//  p->normal[2] | p->normal[0]*emaxs[0] |
150								//  p->normal[0]*emins[0]
151	fmuls	4(%ecx)				// p->normal[1]*emins[1] |
152								//  p->normal[1]*emaxs[1] |
153								//  p->normal[2] | p->normal[0]*emaxs[0] |
154								//  p->normal[0]*emins[0]
155	fxch	%st(2)				// p->normal[2] | p->normal[1]*emaxs[1] |
156								//  p->normal[1]*emins[1] |
157								//  p->normal[0]*emaxs[0] |
158								//  p->normal[0]*emins[0]
159	fld		%st(0)				// p->normal[2] | p->normal[2] |
160								//  p->normal[1]*emaxs[1] |
161								//  p->normal[1]*emins[1] |
162								//  p->normal[0]*emaxs[0] |
163								//  p->normal[0]*emins[0]
164	fmuls	8(%ebx)				// p->normal[2]*emaxs[2] |
165								//  p->normal[2] |
166								//  p->normal[1]*emaxs[1] |
167								//  p->normal[1]*emins[1] |
168								//  p->normal[0]*emaxs[0] |
169								//  p->normal[0]*emins[0]
170	fxch	%st(5)				// p->normal[0]*emins[0] |
171								//  p->normal[2] |
172								//  p->normal[1]*emaxs[1] |
173								//  p->normal[1]*emins[1] |
174								//  p->normal[0]*emaxs[0] |
175								//  p->normal[2]*emaxs[2]
176	faddp	%st(0),%st(3)		//p->normal[2] |
177								// p->normal[1]*emaxs[1] |
178								// p->normal[1]*emins[1]+p->normal[0]*emins[0]|
179								// p->normal[0]*emaxs[0] |
180								// p->normal[2]*emaxs[2]
181	fmuls	8(%ecx)				//p->normal[2]*emins[2] |
182								// p->normal[1]*emaxs[1] |
183								// p->normal[1]*emins[1]+p->normal[0]*emins[0]|
184								// p->normal[0]*emaxs[0] |
185								// p->normal[2]*emaxs[2]
186	fxch	%st(1)				//p->normal[1]*emaxs[1] |
187								// p->normal[2]*emins[2] |
188								// p->normal[1]*emins[1]+p->normal[0]*emins[0]|
189								// p->normal[0]*emaxs[0] |
190								// p->normal[2]*emaxs[2]
191	faddp	%st(0),%st(3)		//p->normal[2]*emins[2] |
192								// p->normal[1]*emins[1]+p->normal[0]*emins[0]|
193								// p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]|
194								// p->normal[2]*emaxs[2]
195	fxch	%st(3)				//p->normal[2]*emaxs[2] +
196								// p->normal[1]*emins[1]+p->normal[0]*emins[0]|
197								// p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]|
198								// p->normal[2]*emins[2]
199	faddp	%st(0),%st(2)		//p->normal[1]*emins[1]+p->normal[0]*emins[0]|
200								// dist1 | p->normal[2]*emins[2]
201
202	jmp		LSetSides
203
204//dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
205//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
206Lcase1:
207	fmuls	(%ecx)				// emins[0]
208	flds	pl_normal+4(%edx)
209	fxch	%st(2)
210	fmuls	(%ebx)				// emaxs[0]
211	fxch	%st(2)
212	fld		%st(0)
213	fmuls	4(%ebx)				// emaxs[1]
214	flds	pl_normal+8(%edx)
215	fxch	%st(2)
216	fmuls	4(%ecx)				// emins[1]
217	fxch	%st(2)
218	fld		%st(0)
219	fmuls	8(%ebx)				// emaxs[2]
220	fxch	%st(5)
221	faddp	%st(0),%st(3)
222	fmuls	8(%ecx)				// emins[2]
223	fxch	%st(1)
224	faddp	%st(0),%st(3)
225	fxch	%st(3)
226	faddp	%st(0),%st(2)
227
228	jmp		LSetSides
229
230//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
231//dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
232Lcase2:
233	fmuls	(%ebx)				// emaxs[0]
234	flds	pl_normal+4(%edx)
235	fxch	%st(2)
236	fmuls	(%ecx)				// emins[0]
237	fxch	%st(2)
238	fld		%st(0)
239	fmuls	4(%ecx)				// emins[1]
240	flds	pl_normal+8(%edx)
241	fxch	%st(2)
242	fmuls	4(%ebx)				// emaxs[1]
243	fxch	%st(2)
244	fld		%st(0)
245	fmuls	8(%ebx)				// emaxs[2]
246	fxch	%st(5)
247	faddp	%st(0),%st(3)
248	fmuls	8(%ecx)				// emins[2]
249	fxch	%st(1)
250	faddp	%st(0),%st(3)
251	fxch	%st(3)
252	faddp	%st(0),%st(2)
253
254	jmp		LSetSides
255
256//dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
257//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
258Lcase3:
259	fmuls	(%ecx)				// emins[0]
260	flds	pl_normal+4(%edx)
261	fxch	%st(2)
262	fmuls	(%ebx)				// emaxs[0]
263	fxch	%st(2)
264	fld		%st(0)
265	fmuls	4(%ecx)				// emins[1]
266	flds	pl_normal+8(%edx)
267	fxch	%st(2)
268	fmuls	4(%ebx)				// emaxs[1]
269	fxch	%st(2)
270	fld		%st(0)
271	fmuls	8(%ebx)				// emaxs[2]
272	fxch	%st(5)
273	faddp	%st(0),%st(3)
274	fmuls	8(%ecx)				// emins[2]
275	fxch	%st(1)
276	faddp	%st(0),%st(3)
277	fxch	%st(3)
278	faddp	%st(0),%st(2)
279
280	jmp		LSetSides
281
282//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
283//dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
284Lcase4:
285	fmuls	(%ebx)				// emaxs[0]
286	flds	pl_normal+4(%edx)
287	fxch	%st(2)
288	fmuls	(%ecx)				// emins[0]
289	fxch	%st(2)
290	fld		%st(0)
291	fmuls	4(%ebx)				// emaxs[1]
292	flds	pl_normal+8(%edx)
293	fxch	%st(2)
294	fmuls	4(%ecx)				// emins[1]
295	fxch	%st(2)
296	fld		%st(0)
297	fmuls	8(%ecx)				// emins[2]
298	fxch	%st(5)
299	faddp	%st(0),%st(3)
300	fmuls	8(%ebx)				// emaxs[2]
301	fxch	%st(1)
302	faddp	%st(0),%st(3)
303	fxch	%st(3)
304	faddp	%st(0),%st(2)
305
306	jmp		LSetSides
307
308//dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
309//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
310Lcase5:
311	fmuls	(%ecx)				// emins[0]
312	flds	pl_normal+4(%edx)
313	fxch	%st(2)
314	fmuls	(%ebx)				// emaxs[0]
315	fxch	%st(2)
316	fld		%st(0)
317	fmuls	4(%ebx)				// emaxs[1]
318	flds	pl_normal+8(%edx)
319	fxch	%st(2)
320	fmuls	4(%ecx)				// emins[1]
321	fxch	%st(2)
322	fld		%st(0)
323	fmuls	8(%ecx)				// emins[2]
324	fxch	%st(5)
325	faddp	%st(0),%st(3)
326	fmuls	8(%ebx)				// emaxs[2]
327	fxch	%st(1)
328	faddp	%st(0),%st(3)
329	fxch	%st(3)
330	faddp	%st(0),%st(2)
331
332	jmp		LSetSides
333
334//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
335//dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
336Lcase6:
337	fmuls	(%ebx)				// emaxs[0]
338	flds	pl_normal+4(%edx)
339	fxch	%st(2)
340	fmuls	(%ecx)				// emins[0]
341	fxch	%st(2)
342	fld		%st(0)
343	fmuls	4(%ecx)				// emins[1]
344	flds	pl_normal+8(%edx)
345	fxch	%st(2)
346	fmuls	4(%ebx)				// emaxs[1]
347	fxch	%st(2)
348	fld		%st(0)
349	fmuls	8(%ecx)				// emins[2]
350	fxch	%st(5)
351	faddp	%st(0),%st(3)
352	fmuls	8(%ebx)				// emaxs[2]
353	fxch	%st(1)
354	faddp	%st(0),%st(3)
355	fxch	%st(3)
356	faddp	%st(0),%st(2)
357
358	jmp		LSetSides
359
360//dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
361//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
362Lcase7:
363	fmuls	(%ecx)				// emins[0]
364	flds	pl_normal+4(%edx)
365	fxch	%st(2)
366	fmuls	(%ebx)				// emaxs[0]
367	fxch	%st(2)
368	fld		%st(0)
369	fmuls	4(%ecx)				// emins[1]
370	flds	pl_normal+8(%edx)
371	fxch	%st(2)
372	fmuls	4(%ebx)				// emaxs[1]
373	fxch	%st(2)
374	fld		%st(0)
375	fmuls	8(%ecx)				// emins[2]
376	fxch	%st(5)
377	faddp	%st(0),%st(3)
378	fmuls	8(%ebx)				// emaxs[2]
379	fxch	%st(1)
380	faddp	%st(0),%st(3)
381	fxch	%st(3)
382	faddp	%st(0),%st(2)
383
384LSetSides:
385
386//	sides = 0;
387//	if (dist1 >= p->dist)
388//		sides = 1;
389//	if (dist2 < p->dist)
390//		sides |= 2;
391
392	faddp	%st(0),%st(2)		// dist1 | dist2
393	fcomps	pl_dist(%edx)
394	xorl	%ecx,%ecx
395	fnstsw	%ax
396	fcomps	pl_dist(%edx)
397	andb	$1,%ah
398	xorb	$1,%ah
399	addb	%ah,%cl
400
401	fnstsw	%ax
402	andb	$1,%ah
403	addb	%ah,%ah
404	addb	%ah,%cl
405
406//	return sides;
407
408	popl	%ebx
409	movl	%ecx,%eax	// return status
410
411	ret
412
413
414Lerror:
415	call	C(BOPS_Error)
416
417#endif	// id386
418