1fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes/*
2fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * Mesa 3-D graphics library
3b8f29f29eb611c92d43aaf8ffcd2d9743b3af967Brian Paul * Version:  6.1
4fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes *
5b8f29f29eb611c92d43aaf8ffcd2d9743b3af967Brian Paul * Copyright (C) 1999-2004  Brian Paul   All Rights Reserved.
6fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes *
7fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * Permission is hereby granted, free of charge, to any person obtaining a
8fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * copy of this software and associated documentation files (the "Software"),
9fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * to deal in the Software without restriction, including without limitation
10fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * and/or sell copies of the Software, and to permit persons to whom the
12fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * Software is furnished to do so, subject to the following conditions:
13fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes *
14fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * The above copyright notice and this permission notice shall be included
15fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * in all copies or substantial portions of the Software.
16fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes *
17fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes *
2422144ab7552f0799bcfca506bf4ffa7f70a06649Gareth Hughes * Authors:
2505a4b37707d2c598ea68c05d07a3d65bcbf5a076Brian Paul *    Gareth Hughes
26fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes */
27fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
28fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#ifndef __M_DEBUG_UTIL_H__
29fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#define __M_DEBUG_UTIL_H__
30fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
31fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
3281a22ef53953d950052c7bd5a282e96107a25f24Brian Paul#ifdef DEBUG_MATH  /* This code only used for debugging */
33fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
34fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
35fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes/* Comment this out to deactivate the cycle counter.
36fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * NOTE: it works only on CPUs which know the 'rdtsc' command (586 or higher)
37fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * (hope, you don't try to debug Mesa on a 386 ;)
38fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes */
397943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul#if defined(__GNUC__) && \
407943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul    ((defined(__i386__) && defined(USE_X86_ASM)) || \
417943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul     (defined(__sparc__) && defined(USE_SPARC_ASM)))
42fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#define  RUN_DEBUG_BENCHMARK
43fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#endif
44fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
45fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#define TEST_COUNT		128	/* size of the tested vector array   */
46fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
47fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#define REQUIRED_PRECISION	10	/* allow 4 bits to miss              */
48fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#define MAX_PRECISION		24	/* max. precision possible           */
49fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
50fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
51fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#ifdef  RUN_DEBUG_BENCHMARK
52fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes/* Overhead of profiling counter in cycles.  Automatically adjusted to
53fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * your machine at run time - counter initialization should give very
54fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * consistent results.
55fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes */
56fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughesextern long counter_overhead;
57fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
58fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes/* This is the value of the environment variable MESA_PROFILE, and is
59fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * used to determine if we should benchmark the functions as well as
60fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * verify their correctness.
61fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes */
62fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughesextern char *mesa_profile;
63fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
64fab1f07d6ad01463897ae792f4b33738afb07369Jeff Smith/* Modify the number of tests if you like.
65fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * We take the minimum of all results, because every error should be
66fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * positive (time used by other processes, task switches etc).
67fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * It is assumed that all calculations are done in the cache.
68fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes */
69fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
707943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul#if defined(__i386__)
717943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul
72fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#if 1 /* PPro, PII, PIII version */
73fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
74fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes/* Profiling on the P6 architecture requires a little more work, due to
75fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * the internal out-of-order execution.  We must perform a serializing
76fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * 'cpuid' instruction before and after the 'rdtsc' instructions to make
77fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * sure no other uops are executed when we sample the timestamp counter.
78fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes */
79fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#define  INIT_COUNTER()							\
80fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes   do {									\
81fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes      int cycle_i;							\
82fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes      counter_overhead = LONG_MAX;					\
83fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes      for ( cycle_i = 0 ; cycle_i < 8 ; cycle_i++ ) {			\
84fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes	 long cycle_tmp1 = 0, cycle_tmp2 = 0;				\
85fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes	 __asm__ __volatile__ ( "push %%ebx       \n"			\
86fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes				"xor %%eax, %%eax \n"			\
87fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes				"cpuid            \n"			\
88fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes				"rdtsc            \n"			\
89fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes				"mov %%eax, %0    \n"			\
90fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes				"xor %%eax, %%eax \n"			\
91fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes				"cpuid            \n"			\
92fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes				"pop %%ebx        \n"			\
93fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes				"push %%ebx       \n"			\
94fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes				"xor %%eax, %%eax \n"			\
95fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes				"cpuid            \n"			\
96fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes				"rdtsc            \n"			\
97fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes				"mov %%eax, %1    \n"			\
98fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes				"xor %%eax, %%eax \n"			\
99fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes				"cpuid            \n"			\
100fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes				"pop %%ebx        \n"			\
101fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes				: "=m" (cycle_tmp1), "=m" (cycle_tmp2)	\
102fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes				: : "eax", "ecx", "edx" );		\
103fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes	 if ( counter_overhead > (cycle_tmp2 - cycle_tmp1) ) {		\
104fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes	    counter_overhead = cycle_tmp2 - cycle_tmp1;			\
105fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes	 }								\
106fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes      }									\
107fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes   } while (0)
108fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
109fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#define  BEGIN_RACE(x)							\
110fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes   x = LONG_MAX;							\
111fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes   for ( cycle_i = 0 ; cycle_i < 10 ; cycle_i++ ) {			\
112fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes      long cycle_tmp1 = 0, cycle_tmp2 = 0;				\
113fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes      __asm__ __volatile__ ( "push %%ebx       \n"			\
114fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes			     "xor %%eax, %%eax \n"			\
115fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes			     "cpuid            \n"			\
116fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes			     "rdtsc            \n"			\
117fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes			     "mov %%eax, %0    \n"			\
118fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes			     "xor %%eax, %%eax \n"			\
119fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes			     "cpuid            \n"			\
120fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes			     "pop %%ebx        \n"			\
121fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes			     : "=m" (cycle_tmp1)			\
122fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes			     : : "eax", "ecx", "edx" );
123fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
124fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#define END_RACE(x)							\
125fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes      __asm__ __volatile__ ( "push %%ebx       \n"			\
126fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes			     "xor %%eax, %%eax \n"			\
127fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes			     "cpuid            \n"			\
128fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes			     "rdtsc            \n"			\
129fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes			     "mov %%eax, %0    \n"			\
130fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes			     "xor %%eax, %%eax \n"			\
131fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes			     "cpuid            \n"			\
132fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes			     "pop %%ebx        \n"			\
133fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes			     : "=m" (cycle_tmp2)			\
134fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes			     : : "eax", "ecx", "edx" );			\
135fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes      if ( x > (cycle_tmp2 - cycle_tmp1) ) {				\
136fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes	 x = cycle_tmp2 - cycle_tmp1;					\
137fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes      }									\
138fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes   }									\
139fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes   x -= counter_overhead;
140fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
141fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#else /* PPlain, PMMX version */
142fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
143fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes/* To ensure accurate results, we stall the pipelines with the
144fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * non-pairable 'cdq' instruction.  This ensures all the code being
145fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * profiled is complete when the 'rdtsc' instruction executes.
146fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes */
147fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#define  INIT_COUNTER(x)						\
148fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes   do {									\
149fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes      int cycle_i;							\
150fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes      x = LONG_MAX;							\
151fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes      for ( cycle_i = 0 ; cycle_i < 32 ; cycle_i++ ) {			\
152fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes	 long cycle_tmp1, cycle_tmp2, dummy;				\
153fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes	 __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp1) );		\
154fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes	 __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp2) );		\
155fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes	 __asm__ ( "cdq" );						\
156fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes	 __asm__ ( "cdq" );						\
157fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes	 __asm__ ( "rdtsc" : "=a" (cycle_tmp1), "=d" (dummy) );		\
158fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes	 __asm__ ( "cdq" );						\
159fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes	 __asm__ ( "cdq" );						\
160fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes	 __asm__ ( "rdtsc" : "=a" (cycle_tmp2), "=d" (dummy) );		\
161fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes	 if ( x > (cycle_tmp2 - cycle_tmp1) )				\
162fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes	    x = cycle_tmp2 - cycle_tmp1;				\
163fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes      }									\
164fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes   } while (0)
165fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
166fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#define  BEGIN_RACE(x)							\
167fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes   x = LONG_MAX;							\
168fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes   for ( cycle_i = 0 ; cycle_i < 16 ; cycle_i++ ) {			\
169fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes      long cycle_tmp1, cycle_tmp2, dummy;				\
170fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes      __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp1) );			\
171fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes      __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp2) );			\
172fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes      __asm__ ( "cdq" );						\
173fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes      __asm__ ( "cdq" );						\
174fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes      __asm__ ( "rdtsc" : "=a" (cycle_tmp1), "=d" (dummy) );
175fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
176fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
177fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#define END_RACE(x)							\
178fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes      __asm__ ( "cdq" );						\
179fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes      __asm__ ( "cdq" );						\
180fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes      __asm__ ( "rdtsc" : "=a" (cycle_tmp2), "=d" (dummy) );		\
181fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes      if ( x > (cycle_tmp2 - cycle_tmp1) )				\
182fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes	 x = cycle_tmp2 - cycle_tmp1;					\
183fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes   }									\
184fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes   x -= counter_overhead;
185fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
186fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#endif
187fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
188118c2bc860037f084166d3406039d82198ddf3d6Brian Paul#elif defined(__x86_64__)
18942fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul
19042fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul#define rdtscll(val) do { \
19142fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul     unsigned int a,d; \
19242fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul     __asm__ volatile("rdtsc" : "=a" (a), "=d" (d)); \
19342fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul     (val) = ((unsigned long)a) | (((unsigned long)d)<<32); \
19442fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul} while(0)
19542fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul
19642fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul/* Copied from i386 PIII version */
19742fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul#define  INIT_COUNTER()							\
19842fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul   do {									\
19942fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul      int cycle_i;							\
20042fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul      counter_overhead = LONG_MAX;					\
20142fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul      for ( cycle_i = 0 ; cycle_i < 16 ; cycle_i++ ) {			\
20242fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul	 unsigned long cycle_tmp1, cycle_tmp2;        			\
20342fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul	 rdtscll(cycle_tmp1);						\
20442fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul	 rdtscll(cycle_tmp2);						\
20542fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul	 if ( counter_overhead > (cycle_tmp2 - cycle_tmp1) ) {		\
20642fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul	    counter_overhead = cycle_tmp2 - cycle_tmp1;			\
20742fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul	 }								\
20842fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul      }									\
20942fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul   } while (0)
21042fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul
21142fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul
21242fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul#define  BEGIN_RACE(x)							\
21342fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul   x = LONG_MAX;							\
21442fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul   for ( cycle_i = 0 ; cycle_i < 10 ; cycle_i++ ) {			\
21542fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul      unsigned long cycle_tmp1, cycle_tmp2;				\
21642fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul      rdtscll(cycle_tmp1);						\
21742fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul
21842fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul#define END_RACE(x)							\
21942fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul      rdtscll(cycle_tmp2);						\
22042fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul      if ( x > (cycle_tmp2 - cycle_tmp1) ) {				\
22142fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul	 x = cycle_tmp2 - cycle_tmp1;					\
22242fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul      }									\
22342fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul   }									\
22442fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul   x -= counter_overhead;
22542fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul
2267943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul#elif defined(__sparc__)
2277943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul
2287943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul#define  INIT_COUNTER()	\
2297943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul	 do { counter_overhead = 5; } while(0)
2307943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul
2317943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul#define  BEGIN_RACE(x)                                                        \
2327943b349d696f8030f0d2f836ad42a762f4c6026Brian Paulx = LONG_MAX;                                                                 \
2337943b349d696f8030f0d2f836ad42a762f4c6026Brian Paulfor (cycle_i = 0; cycle_i <10; cycle_i++) {                                   \
23418e01393b83b2a1b78a0c0cee7fd3713cb3bc373David S. Miller   register long cycle_tmp1 __asm__("l0");				      \
23518e01393b83b2a1b78a0c0cee7fd3713cb3bc373David S. Miller   register long cycle_tmp2 __asm__("l1");				      \
2367943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul   /* rd %tick, %l0 */							      \
2377943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul   __asm__ __volatile__ (".word 0xa1410000" : "=r" (cycle_tmp1));  /*  save timestamp   */
2387943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul
2397943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul#define END_RACE(x)                                                           \
2407943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul   /* rd %tick, %l1 */							      \
2417943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul   __asm__ __volatile__ (".word 0xa3410000" : "=r" (cycle_tmp2));	      \
2427943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul   if (x > (cycle_tmp2-cycle_tmp1)) x = cycle_tmp2 - cycle_tmp1;              \
2437943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul}                                                                             \
2447943b349d696f8030f0d2f836ad42a762f4c6026Brian Paulx -= counter_overhead;
2457943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul
2467943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul#else
2477943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul#error Your processor is not supported for RUN_XFORM_BENCHMARK
2487943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul#endif
2497943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul
250fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#else
251fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
252fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#define BEGIN_RACE(x)
253fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#define END_RACE(x)
254fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
255fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#endif
256fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
257fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
2585e23af22f708a66695c0e44e599c26f02d8d4dcdGareth Hughes/* =============================================================
259fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * Helper functions
260fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes */
261fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
262fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughesstatic GLfloat rnd( void )
263fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes{
264fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes   GLfloat f = (GLfloat)rand() / (GLfloat)RAND_MAX;
265fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes   GLfloat gran = (GLfloat)(1 << 13);
266fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
267fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes   f = (GLfloat)(GLint)(f * gran) / gran;
268fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
269fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes   return f * 2.0 - 1.0;
270fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes}
271fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
272fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughesstatic int significand_match( GLfloat a, GLfloat b )
273fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes{
274fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes   GLfloat d = a - b;
275fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes   int a_ex, b_ex, d_ex;
276fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
277fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes   if ( d == 0.0F ) {
278fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes      return MAX_PRECISION;   /* Exact match */
279fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes   }
280fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
281fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes   if ( a == 0.0F || b == 0.0F ) {
282fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes      /* It would probably be better to check if the
283fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes       * non-zero number is denormalized and return
284fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes       * the index of the highest set bit here.
285fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes       */
286fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes      return 0;
287fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes   }
288fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
2896fe7a0dc01887e39f028d8d400b98d199744c18fBrian Paul   FREXPF( a, &a_ex );
2906fe7a0dc01887e39f028d8d400b98d199744c18fBrian Paul   FREXPF( b, &b_ex );
2916fe7a0dc01887e39f028d8d400b98d199744c18fBrian Paul   FREXPF( d, &d_ex );
292fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
293fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes   if ( a_ex < b_ex ) {
294fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes      return a_ex - d_ex;
295fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes   } else {
296fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes      return b_ex - d_ex;
297fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes   }
298fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes}
299fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
300fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughesenum { NIL = 0, ONE = 1, NEG = -1, VAR = 2 };
301fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
302fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes/* Ensure our arrays are correctly aligned.
303fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes */
304fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#if defined(__GNUC__)
305b8f29f29eb611c92d43aaf8ffcd2d9743b3af967Brian Paul#  define ALIGN16(type, array)	type array __attribute__ ((aligned (16)))
306f8e50dd796a72c396bb22d414feba75c426c5e7eJosé Fonseca#elif defined(_MSC_VER)
307b8f29f29eb611c92d43aaf8ffcd2d9743b3af967Brian Paul#  define ALIGN16(type, array)	type array __declspec(align(16)) /* GH: Does this work? */
3082cdd699a77f7801b1e37b72108a33fa95354e67aKendall Bennett#elif defined(__WATCOMC__)
309b8f29f29eb611c92d43aaf8ffcd2d9743b3af967Brian Paul#  define ALIGN16(type, array)	                    /* Watcom does not support this */
310cbc96b705d1dc1c6aedba29ad6ce16d526e3b50eBrian Paul#elif defined(__xlC__)
311cbc96b705d1dc1c6aedba29ad6ce16d526e3b50eBrian Paul#  define ALIGN16(type, array)       type __align (16) array
312fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#else
313fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#  warning "ALIGN16 will not 16-byte align!\n"
314fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#  define ALIGN16
315fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#endif
316fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
317fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
31881a22ef53953d950052c7bd5a282e96107a25f24Brian Paul#endif /* DEBUG_MATH */
319fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes
320fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#endif /* __M_DEBUG_UTIL_H__ */
321