1/*
2Copyright (C) 1996-1997 Id Software, Inc.
3
4This program is free software; you can redistribute it and/or
5modify it under the terms of the GNU General Public License
6as published by the Free Software Foundation; either version 2
7of the License, or (at your option) any later version.
8
9This program is distributed in the hope that it will be useful,
10but WITHOUT ANY WARRANTY; without even the implied warranty of
11MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12
13See the GNU General Public License for more details.
14
15You should have received a copy of the GNU General Public License
16along with this program; if not, write to the Free Software
17Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
18
19*/
20//
21// math.s
22// x86 assembly-language math routines.
23
24#define GLQUAKE	1	// don't include unneeded defs
25#include "asm_i386.h"
26#include "quakeasm.h"
27
28
29#if	id386
30
31	.data
32
33	.align	4
34Ljmptab:	.long	Lcase0, Lcase1, Lcase2, Lcase3
35			.long	Lcase4, Lcase5, Lcase6, Lcase7
36
37	.text
38
39// TODO: rounding needed?
40// stack parameter offset
41#define	val	4
42
43.globl C(Invert24To16)
44C(Invert24To16):
45
46	movl	val(%esp),%ecx
47	movl	$0x100,%edx		// 0x10000000000 as dividend
48	cmpl	%edx,%ecx
49	jle		LOutOfRange
50
51	subl	%eax,%eax
52	divl	%ecx
53
54	ret
55
56LOutOfRange:
57	movl	$0xFFFFFFFF,%eax
58	ret
59
60#define	in	4
61#define out	8
62
63	.align 2
64.globl C(TransformVector)
65C(TransformVector):
66	movl	in(%esp),%eax
67	movl	out(%esp),%edx
68
69	flds	(%eax)		// in[0]
70	fmuls	C(vright)		// in[0]*vright[0]
71	flds	(%eax)		// in[0] | in[0]*vright[0]
72	fmuls	C(vup)		// in[0]*vup[0] | in[0]*vright[0]
73	flds	(%eax)		// in[0] | in[0]*vup[0] | in[0]*vright[0]
74	fmuls	C(vpn)		// in[0]*vpn[0] | in[0]*vup[0] | in[0]*vright[0]
75
76	flds	4(%eax)		// in[1] | ...
77	fmuls	C(vright)+4	// in[1]*vright[1] | ...
78	flds	4(%eax)		// in[1] | in[1]*vright[1] | ...
79	fmuls	C(vup)+4		// in[1]*vup[1] | in[1]*vright[1] | ...
80	flds	4(%eax)		// in[1] | in[1]*vup[1] | in[1]*vright[1] | ...
81	fmuls	C(vpn)+4		// in[1]*vpn[1] | in[1]*vup[1] | in[1]*vright[1] | ...
82	fxch	%st(2)		// in[1]*vright[1] | in[1]*vup[1] | in[1]*vpn[1] | ...
83
84	faddp	%st(0),%st(5)	// in[1]*vup[1] | in[1]*vpn[1] | ...
85	faddp	%st(0),%st(3)	// in[1]*vpn[1] | ...
86	faddp	%st(0),%st(1)	// vpn_accum | vup_accum | vright_accum
87
88	flds	8(%eax)		// in[2] | ...
89	fmuls	C(vright)+8	// in[2]*vright[2] | ...
90	flds	8(%eax)		// in[2] | in[2]*vright[2] | ...
91	fmuls	C(vup)+8		// in[2]*vup[2] | in[2]*vright[2] | ...
92	flds	8(%eax)		// in[2] | in[2]*vup[2] | in[2]*vright[2] | ...
93	fmuls	C(vpn)+8		// in[2]*vpn[2] | in[2]*vup[2] | in[2]*vright[2] | ...
94	fxch	%st(2)		// in[2]*vright[2] | in[2]*vup[2] | in[2]*vpn[2] | ...
95
96	faddp	%st(0),%st(5)	// in[2]*vup[2] | in[2]*vpn[2] | ...
97	faddp	%st(0),%st(3)	// in[2]*vpn[2] | ...
98	faddp	%st(0),%st(1)	// vpn_accum | vup_accum | vright_accum
99
100	fstps	8(%edx)		// out[2]
101	fstps	4(%edx)		// out[1]
102	fstps	(%edx)		// out[0]
103
104	ret
105
106
107#define EMINS	4+4
108#define EMAXS	4+8
109#define P		4+12
110
111	.align 2
112.globl C(BoxOnPlaneSide)
113C(BoxOnPlaneSide):
114	pushl	%ebx
115
116	movl	P(%esp),%edx
117	movl	EMINS(%esp),%ecx
118	xorl	%eax,%eax
119	movl	EMAXS(%esp),%ebx
120	movb	pl_signbits(%edx),%al
121	cmpb	$8,%al
122	jge		Lerror
123	flds	pl_normal(%edx)		// p->normal[0]
124	fld		%st(0)				// p->normal[0] | p->normal[0]
125	jmp		Ljmptab(,%eax,4)
126
127
128//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
129//dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
130Lcase0:
131	fmuls	(%ebx)				// p->normal[0]*emaxs[0] | p->normal[0]
132	flds	pl_normal+4(%edx)	// p->normal[1] | p->normal[0]*emaxs[0] |
133								//  p->normal[0]
134	fxch	%st(2)				// p->normal[0] | p->normal[0]*emaxs[0] |
135								//  p->normal[1]
136	fmuls	(%ecx)				// p->normal[0]*emins[0] |
137								//  p->normal[0]*emaxs[0] | p->normal[1]
138	fxch	%st(2)				// p->normal[1] | p->normal[0]*emaxs[0] |
139								//  p->normal[0]*emins[0]
140	fld		%st(0)				// p->normal[1] | p->normal[1] |
141								//  p->normal[0]*emaxs[0] |
142								//  p->normal[0]*emins[0]
143	fmuls	4(%ebx)				// p->normal[1]*emaxs[1] | p->normal[1] |
144								//  p->normal[0]*emaxs[0] |
145								//  p->normal[0]*emins[0]
146	flds	pl_normal+8(%edx)	// p->normal[2] | p->normal[1]*emaxs[1] |
147								//  p->normal[1] | p->normal[0]*emaxs[0] |
148								//  p->normal[0]*emins[0]
149	fxch	%st(2)				// p->normal[1] | p->normal[1]*emaxs[1] |
150								//  p->normal[2] | p->normal[0]*emaxs[0] |
151								//  p->normal[0]*emins[0]
152	fmuls	4(%ecx)				// p->normal[1]*emins[1] |
153								//  p->normal[1]*emaxs[1] |
154								//  p->normal[2] | p->normal[0]*emaxs[0] |
155								//  p->normal[0]*emins[0]
156	fxch	%st(2)				// p->normal[2] | p->normal[1]*emaxs[1] |
157								//  p->normal[1]*emins[1] |
158								//  p->normal[0]*emaxs[0] |
159								//  p->normal[0]*emins[0]
160	fld		%st(0)				// p->normal[2] | p->normal[2] |
161								//  p->normal[1]*emaxs[1] |
162								//  p->normal[1]*emins[1] |
163								//  p->normal[0]*emaxs[0] |
164								//  p->normal[0]*emins[0]
165	fmuls	8(%ebx)				// p->normal[2]*emaxs[2] |
166								//  p->normal[2] |
167								//  p->normal[1]*emaxs[1] |
168								//  p->normal[1]*emins[1] |
169								//  p->normal[0]*emaxs[0] |
170								//  p->normal[0]*emins[0]
171	fxch	%st(5)				// p->normal[0]*emins[0] |
172								//  p->normal[2] |
173								//  p->normal[1]*emaxs[1] |
174								//  p->normal[1]*emins[1] |
175								//  p->normal[0]*emaxs[0] |
176								//  p->normal[2]*emaxs[2]
177	faddp	%st(0),%st(3)		//p->normal[2] |
178								// p->normal[1]*emaxs[1] |
179								// p->normal[1]*emins[1]+p->normal[0]*emins[0]|
180								// p->normal[0]*emaxs[0] |
181								// p->normal[2]*emaxs[2]
182	fmuls	8(%ecx)				//p->normal[2]*emins[2] |
183								// p->normal[1]*emaxs[1] |
184								// p->normal[1]*emins[1]+p->normal[0]*emins[0]|
185								// p->normal[0]*emaxs[0] |
186								// p->normal[2]*emaxs[2]
187	fxch	%st(1)				//p->normal[1]*emaxs[1] |
188								// p->normal[2]*emins[2] |
189								// p->normal[1]*emins[1]+p->normal[0]*emins[0]|
190								// p->normal[0]*emaxs[0] |
191								// p->normal[2]*emaxs[2]
192	faddp	%st(0),%st(3)		//p->normal[2]*emins[2] |
193								// p->normal[1]*emins[1]+p->normal[0]*emins[0]|
194								// p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]|
195								// p->normal[2]*emaxs[2]
196	fxch	%st(3)				//p->normal[2]*emaxs[2] +
197								// p->normal[1]*emins[1]+p->normal[0]*emins[0]|
198								// p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]|
199								// p->normal[2]*emins[2]
200	faddp	%st(0),%st(2)		//p->normal[1]*emins[1]+p->normal[0]*emins[0]|
201								// dist1 | p->normal[2]*emins[2]
202
203	jmp		LSetSides
204
205//dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
206//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
207Lcase1:
208	fmuls	(%ecx)				// emins[0]
209	flds	pl_normal+4(%edx)
210	fxch	%st(2)
211	fmuls	(%ebx)				// emaxs[0]
212	fxch	%st(2)
213	fld		%st(0)
214	fmuls	4(%ebx)				// emaxs[1]
215	flds	pl_normal+8(%edx)
216	fxch	%st(2)
217	fmuls	4(%ecx)				// emins[1]
218	fxch	%st(2)
219	fld		%st(0)
220	fmuls	8(%ebx)				// emaxs[2]
221	fxch	%st(5)
222	faddp	%st(0),%st(3)
223	fmuls	8(%ecx)				// emins[2]
224	fxch	%st(1)
225	faddp	%st(0),%st(3)
226	fxch	%st(3)
227	faddp	%st(0),%st(2)
228
229	jmp		LSetSides
230
231//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
232//dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
233Lcase2:
234	fmuls	(%ebx)				// emaxs[0]
235	flds	pl_normal+4(%edx)
236	fxch	%st(2)
237	fmuls	(%ecx)				// emins[0]
238	fxch	%st(2)
239	fld		%st(0)
240	fmuls	4(%ecx)				// emins[1]
241	flds	pl_normal+8(%edx)
242	fxch	%st(2)
243	fmuls	4(%ebx)				// emaxs[1]
244	fxch	%st(2)
245	fld		%st(0)
246	fmuls	8(%ebx)				// emaxs[2]
247	fxch	%st(5)
248	faddp	%st(0),%st(3)
249	fmuls	8(%ecx)				// emins[2]
250	fxch	%st(1)
251	faddp	%st(0),%st(3)
252	fxch	%st(3)
253	faddp	%st(0),%st(2)
254
255	jmp		LSetSides
256
257//dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
258//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
259Lcase3:
260	fmuls	(%ecx)				// emins[0]
261	flds	pl_normal+4(%edx)
262	fxch	%st(2)
263	fmuls	(%ebx)				// emaxs[0]
264	fxch	%st(2)
265	fld		%st(0)
266	fmuls	4(%ecx)				// emins[1]
267	flds	pl_normal+8(%edx)
268	fxch	%st(2)
269	fmuls	4(%ebx)				// emaxs[1]
270	fxch	%st(2)
271	fld		%st(0)
272	fmuls	8(%ebx)				// emaxs[2]
273	fxch	%st(5)
274	faddp	%st(0),%st(3)
275	fmuls	8(%ecx)				// emins[2]
276	fxch	%st(1)
277	faddp	%st(0),%st(3)
278	fxch	%st(3)
279	faddp	%st(0),%st(2)
280
281	jmp		LSetSides
282
283//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
284//dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
285Lcase4:
286	fmuls	(%ebx)				// emaxs[0]
287	flds	pl_normal+4(%edx)
288	fxch	%st(2)
289	fmuls	(%ecx)				// emins[0]
290	fxch	%st(2)
291	fld		%st(0)
292	fmuls	4(%ebx)				// emaxs[1]
293	flds	pl_normal+8(%edx)
294	fxch	%st(2)
295	fmuls	4(%ecx)				// emins[1]
296	fxch	%st(2)
297	fld		%st(0)
298	fmuls	8(%ecx)				// emins[2]
299	fxch	%st(5)
300	faddp	%st(0),%st(3)
301	fmuls	8(%ebx)				// emaxs[2]
302	fxch	%st(1)
303	faddp	%st(0),%st(3)
304	fxch	%st(3)
305	faddp	%st(0),%st(2)
306
307	jmp		LSetSides
308
309//dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
310//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
311Lcase5:
312	fmuls	(%ecx)				// emins[0]
313	flds	pl_normal+4(%edx)
314	fxch	%st(2)
315	fmuls	(%ebx)				// emaxs[0]
316	fxch	%st(2)
317	fld		%st(0)
318	fmuls	4(%ebx)				// emaxs[1]
319	flds	pl_normal+8(%edx)
320	fxch	%st(2)
321	fmuls	4(%ecx)				// emins[1]
322	fxch	%st(2)
323	fld		%st(0)
324	fmuls	8(%ecx)				// emins[2]
325	fxch	%st(5)
326	faddp	%st(0),%st(3)
327	fmuls	8(%ebx)				// emaxs[2]
328	fxch	%st(1)
329	faddp	%st(0),%st(3)
330	fxch	%st(3)
331	faddp	%st(0),%st(2)
332
333	jmp		LSetSides
334
335//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
336//dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
337Lcase6:
338	fmuls	(%ebx)				// emaxs[0]
339	flds	pl_normal+4(%edx)
340	fxch	%st(2)
341	fmuls	(%ecx)				// emins[0]
342	fxch	%st(2)
343	fld		%st(0)
344	fmuls	4(%ecx)				// emins[1]
345	flds	pl_normal+8(%edx)
346	fxch	%st(2)
347	fmuls	4(%ebx)				// emaxs[1]
348	fxch	%st(2)
349	fld		%st(0)
350	fmuls	8(%ecx)				// emins[2]
351	fxch	%st(5)
352	faddp	%st(0),%st(3)
353	fmuls	8(%ebx)				// emaxs[2]
354	fxch	%st(1)
355	faddp	%st(0),%st(3)
356	fxch	%st(3)
357	faddp	%st(0),%st(2)
358
359	jmp		LSetSides
360
361//dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
362//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
363Lcase7:
364	fmuls	(%ecx)				// emins[0]
365	flds	pl_normal+4(%edx)
366	fxch	%st(2)
367	fmuls	(%ebx)				// emaxs[0]
368	fxch	%st(2)
369	fld		%st(0)
370	fmuls	4(%ecx)				// emins[1]
371	flds	pl_normal+8(%edx)
372	fxch	%st(2)
373	fmuls	4(%ebx)				// emaxs[1]
374	fxch	%st(2)
375	fld		%st(0)
376	fmuls	8(%ecx)				// emins[2]
377	fxch	%st(5)
378	faddp	%st(0),%st(3)
379	fmuls	8(%ebx)				// emaxs[2]
380	fxch	%st(1)
381	faddp	%st(0),%st(3)
382	fxch	%st(3)
383	faddp	%st(0),%st(2)
384
385LSetSides:
386
387//	sides = 0;
388//	if (dist1 >= p->dist)
389//		sides = 1;
390//	if (dist2 < p->dist)
391//		sides |= 2;
392
393	faddp	%st(0),%st(2)		// dist1 | dist2
394	fcomps	pl_dist(%edx)
395	xorl	%ecx,%ecx
396	fnstsw	%ax
397	fcomps	pl_dist(%edx)
398	andb	$1,%ah
399	xorb	$1,%ah
400	addb	%ah,%cl
401
402	fnstsw	%ax
403	andb	$1,%ah
404	addb	%ah,%ah
405	addb	%ah,%cl
406
407//	return sides;
408
409	popl	%ebx
410	movl	%ecx,%eax	// return status
411
412	ret
413
414
415Lerror:
416	call	C(BOPS_Error)
417
418#endif	// id386
419