1fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes/* 2fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * Mesa 3-D graphics library 3b8f29f29eb611c92d43aaf8ffcd2d9743b3af967Brian Paul * Version: 6.1 4fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * 5b8f29f29eb611c92d43aaf8ffcd2d9743b3af967Brian Paul * Copyright (C) 1999-2004 Brian Paul All Rights Reserved. 6fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * 7fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * Permission is hereby granted, free of charge, to any person obtaining a 8fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * copy of this software and associated documentation files (the "Software"), 9fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * to deal in the Software without restriction, including without limitation 10fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * and/or sell copies of the Software, and to permit persons to whom the 12fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * Software is furnished to do so, subject to the following conditions: 13fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * 14fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * The above copyright notice and this permission notice shall be included 15fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * in all copies or substantial portions of the Software. 16fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * 17fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 21fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 22fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * 2422144ab7552f0799bcfca506bf4ffa7f70a06649Gareth Hughes * Authors: 2505a4b37707d2c598ea68c05d07a3d65bcbf5a076Brian Paul * Gareth Hughes 26fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes */ 27fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 28fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#ifndef __M_DEBUG_UTIL_H__ 29fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#define __M_DEBUG_UTIL_H__ 30fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 31fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 3281a22ef53953d950052c7bd5a282e96107a25f24Brian Paul#ifdef DEBUG_MATH /* This code only used for debugging */ 33fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 34fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 35fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes/* Comment this out to deactivate the cycle counter. 36fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * NOTE: it works only on CPUs which know the 'rdtsc' command (586 or higher) 37fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * (hope, you don't try to debug Mesa on a 386 ;) 38fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes */ 397943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul#if defined(__GNUC__) && \ 407943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul ((defined(__i386__) && defined(USE_X86_ASM)) || \ 417943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul (defined(__sparc__) && defined(USE_SPARC_ASM))) 42fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#define RUN_DEBUG_BENCHMARK 43fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#endif 44fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 45fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#define TEST_COUNT 128 /* size of the tested vector array */ 46fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 47fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#define REQUIRED_PRECISION 10 /* allow 4 bits to miss */ 48fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#define MAX_PRECISION 24 /* max. precision possible */ 49fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 50fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 51fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#ifdef RUN_DEBUG_BENCHMARK 52fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes/* Overhead of profiling counter in cycles. Automatically adjusted to 53fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * your machine at run time - counter initialization should give very 54fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * consistent results. 55fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes */ 56fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughesextern long counter_overhead; 57fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 58fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes/* This is the value of the environment variable MESA_PROFILE, and is 59fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * used to determine if we should benchmark the functions as well as 60fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * verify their correctness. 61fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes */ 62fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughesextern char *mesa_profile; 63fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 64fab1f07d6ad01463897ae792f4b33738afb07369Jeff Smith/* Modify the number of tests if you like. 65fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * We take the minimum of all results, because every error should be 66fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * positive (time used by other processes, task switches etc). 67fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * It is assumed that all calculations are done in the cache. 68fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes */ 69fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 707943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul#if defined(__i386__) 717943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul 72fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#if 1 /* PPro, PII, PIII version */ 73fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 74fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes/* Profiling on the P6 architecture requires a little more work, due to 75fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * the internal out-of-order execution. We must perform a serializing 76fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * 'cpuid' instruction before and after the 'rdtsc' instructions to make 77fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * sure no other uops are executed when we sample the timestamp counter. 78fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes */ 79fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#define INIT_COUNTER() \ 80fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes do { \ 81fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes int cycle_i; \ 82fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes counter_overhead = LONG_MAX; \ 83fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes for ( cycle_i = 0 ; cycle_i < 8 ; cycle_i++ ) { \ 84fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes long cycle_tmp1 = 0, cycle_tmp2 = 0; \ 85fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes __asm__ __volatile__ ( "push %%ebx \n" \ 86fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes "xor %%eax, %%eax \n" \ 87fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes "cpuid \n" \ 88fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes "rdtsc \n" \ 89fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes "mov %%eax, %0 \n" \ 90fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes "xor %%eax, %%eax \n" \ 91fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes "cpuid \n" \ 92fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes "pop %%ebx \n" \ 93fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes "push %%ebx \n" \ 94fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes "xor %%eax, %%eax \n" \ 95fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes "cpuid \n" \ 96fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes "rdtsc \n" \ 97fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes "mov %%eax, %1 \n" \ 98fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes "xor %%eax, %%eax \n" \ 99fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes "cpuid \n" \ 100fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes "pop %%ebx \n" \ 101fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes : "=m" (cycle_tmp1), "=m" (cycle_tmp2) \ 102fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes : : "eax", "ecx", "edx" ); \ 103fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes if ( counter_overhead > (cycle_tmp2 - cycle_tmp1) ) { \ 104fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes counter_overhead = cycle_tmp2 - cycle_tmp1; \ 105fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes } \ 106fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes } \ 107fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes } while (0) 108fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 109fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#define BEGIN_RACE(x) \ 110fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes x = LONG_MAX; \ 111fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes for ( cycle_i = 0 ; cycle_i < 10 ; cycle_i++ ) { \ 112fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes long cycle_tmp1 = 0, cycle_tmp2 = 0; \ 113fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes __asm__ __volatile__ ( "push %%ebx \n" \ 114fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes "xor %%eax, %%eax \n" \ 115fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes "cpuid \n" \ 116fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes "rdtsc \n" \ 117fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes "mov %%eax, %0 \n" \ 118fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes "xor %%eax, %%eax \n" \ 119fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes "cpuid \n" \ 120fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes "pop %%ebx \n" \ 121fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes : "=m" (cycle_tmp1) \ 122fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes : : "eax", "ecx", "edx" ); 123fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 124fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#define END_RACE(x) \ 125fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes __asm__ __volatile__ ( "push %%ebx \n" \ 126fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes "xor %%eax, %%eax \n" \ 127fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes "cpuid \n" \ 128fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes "rdtsc \n" \ 129fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes "mov %%eax, %0 \n" \ 130fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes "xor %%eax, %%eax \n" \ 131fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes "cpuid \n" \ 132fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes "pop %%ebx \n" \ 133fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes : "=m" (cycle_tmp2) \ 134fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes : : "eax", "ecx", "edx" ); \ 135fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes if ( x > (cycle_tmp2 - cycle_tmp1) ) { \ 136fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes x = cycle_tmp2 - cycle_tmp1; \ 137fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes } \ 138fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes } \ 139fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes x -= counter_overhead; 140fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 141fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#else /* PPlain, PMMX version */ 142fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 143fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes/* To ensure accurate results, we stall the pipelines with the 144fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * non-pairable 'cdq' instruction. This ensures all the code being 145fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * profiled is complete when the 'rdtsc' instruction executes. 146fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes */ 147fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#define INIT_COUNTER(x) \ 148fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes do { \ 149fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes int cycle_i; \ 150fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes x = LONG_MAX; \ 151fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes for ( cycle_i = 0 ; cycle_i < 32 ; cycle_i++ ) { \ 152fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes long cycle_tmp1, cycle_tmp2, dummy; \ 153fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp1) ); \ 154fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp2) ); \ 155fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes __asm__ ( "cdq" ); \ 156fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes __asm__ ( "cdq" ); \ 157fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes __asm__ ( "rdtsc" : "=a" (cycle_tmp1), "=d" (dummy) ); \ 158fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes __asm__ ( "cdq" ); \ 159fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes __asm__ ( "cdq" ); \ 160fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes __asm__ ( "rdtsc" : "=a" (cycle_tmp2), "=d" (dummy) ); \ 161fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes if ( x > (cycle_tmp2 - cycle_tmp1) ) \ 162fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes x = cycle_tmp2 - cycle_tmp1; \ 163fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes } \ 164fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes } while (0) 165fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 166fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#define BEGIN_RACE(x) \ 167fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes x = LONG_MAX; \ 168fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes for ( cycle_i = 0 ; cycle_i < 16 ; cycle_i++ ) { \ 169fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes long cycle_tmp1, cycle_tmp2, dummy; \ 170fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp1) ); \ 171fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp2) ); \ 172fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes __asm__ ( "cdq" ); \ 173fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes __asm__ ( "cdq" ); \ 174fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes __asm__ ( "rdtsc" : "=a" (cycle_tmp1), "=d" (dummy) ); 175fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 176fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 177fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#define END_RACE(x) \ 178fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes __asm__ ( "cdq" ); \ 179fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes __asm__ ( "cdq" ); \ 180fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes __asm__ ( "rdtsc" : "=a" (cycle_tmp2), "=d" (dummy) ); \ 181fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes if ( x > (cycle_tmp2 - cycle_tmp1) ) \ 182fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes x = cycle_tmp2 - cycle_tmp1; \ 183fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes } \ 184fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes x -= counter_overhead; 185fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 186fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#endif 187fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 188118c2bc860037f084166d3406039d82198ddf3d6Brian Paul#elif defined(__x86_64__) 18942fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul 19042fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul#define rdtscll(val) do { \ 19142fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul unsigned int a,d; \ 19242fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul __asm__ volatile("rdtsc" : "=a" (a), "=d" (d)); \ 19342fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul (val) = ((unsigned long)a) | (((unsigned long)d)<<32); \ 19442fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul} while(0) 19542fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul 19642fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul/* Copied from i386 PIII version */ 19742fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul#define INIT_COUNTER() \ 19842fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul do { \ 19942fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul int cycle_i; \ 20042fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul counter_overhead = LONG_MAX; \ 20142fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul for ( cycle_i = 0 ; cycle_i < 16 ; cycle_i++ ) { \ 20242fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul unsigned long cycle_tmp1, cycle_tmp2; \ 20342fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul rdtscll(cycle_tmp1); \ 20442fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul rdtscll(cycle_tmp2); \ 20542fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul if ( counter_overhead > (cycle_tmp2 - cycle_tmp1) ) { \ 20642fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul counter_overhead = cycle_tmp2 - cycle_tmp1; \ 20742fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul } \ 20842fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul } \ 20942fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul } while (0) 21042fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul 21142fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul 21242fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul#define BEGIN_RACE(x) \ 21342fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul x = LONG_MAX; \ 21442fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul for ( cycle_i = 0 ; cycle_i < 10 ; cycle_i++ ) { \ 21542fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul unsigned long cycle_tmp1, cycle_tmp2; \ 21642fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul rdtscll(cycle_tmp1); \ 21742fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul 21842fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul#define END_RACE(x) \ 21942fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul rdtscll(cycle_tmp2); \ 22042fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul if ( x > (cycle_tmp2 - cycle_tmp1) ) { \ 22142fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul x = cycle_tmp2 - cycle_tmp1; \ 22242fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul } \ 22342fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul } \ 22442fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul x -= counter_overhead; 22542fa81275c67d7d1ad8d255120af0ffeeb46b963Brian Paul 2267943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul#elif defined(__sparc__) 2277943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul 2287943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul#define INIT_COUNTER() \ 2297943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul do { counter_overhead = 5; } while(0) 2307943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul 2317943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul#define BEGIN_RACE(x) \ 2327943b349d696f8030f0d2f836ad42a762f4c6026Brian Paulx = LONG_MAX; \ 2337943b349d696f8030f0d2f836ad42a762f4c6026Brian Paulfor (cycle_i = 0; cycle_i <10; cycle_i++) { \ 23418e01393b83b2a1b78a0c0cee7fd3713cb3bc373David S. Miller register long cycle_tmp1 __asm__("l0"); \ 23518e01393b83b2a1b78a0c0cee7fd3713cb3bc373David S. Miller register long cycle_tmp2 __asm__("l1"); \ 2367943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul /* rd %tick, %l0 */ \ 2377943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul __asm__ __volatile__ (".word 0xa1410000" : "=r" (cycle_tmp1)); /* save timestamp */ 2387943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul 2397943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul#define END_RACE(x) \ 2407943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul /* rd %tick, %l1 */ \ 2417943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul __asm__ __volatile__ (".word 0xa3410000" : "=r" (cycle_tmp2)); \ 2427943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul if (x > (cycle_tmp2-cycle_tmp1)) x = cycle_tmp2 - cycle_tmp1; \ 2437943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul} \ 2447943b349d696f8030f0d2f836ad42a762f4c6026Brian Paulx -= counter_overhead; 2457943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul 2467943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul#else 2477943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul#error Your processor is not supported for RUN_XFORM_BENCHMARK 2487943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul#endif 2497943b349d696f8030f0d2f836ad42a762f4c6026Brian Paul 250fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#else 251fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 252fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#define BEGIN_RACE(x) 253fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#define END_RACE(x) 254fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 255fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#endif 256fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 257fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 2585e23af22f708a66695c0e44e599c26f02d8d4dcdGareth Hughes/* ============================================================= 259fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * Helper functions 260fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes */ 261fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 262fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughesstatic GLfloat rnd( void ) 263fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes{ 264fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes GLfloat f = (GLfloat)rand() / (GLfloat)RAND_MAX; 265fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes GLfloat gran = (GLfloat)(1 << 13); 266fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 267fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes f = (GLfloat)(GLint)(f * gran) / gran; 268fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 269fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes return f * 2.0 - 1.0; 270fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes} 271fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 272fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughesstatic int significand_match( GLfloat a, GLfloat b ) 273fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes{ 274fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes GLfloat d = a - b; 275fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes int a_ex, b_ex, d_ex; 276fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 277fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes if ( d == 0.0F ) { 278fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes return MAX_PRECISION; /* Exact match */ 279fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes } 280fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 281fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes if ( a == 0.0F || b == 0.0F ) { 282fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes /* It would probably be better to check if the 283fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * non-zero number is denormalized and return 284fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes * the index of the highest set bit here. 285fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes */ 286fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes return 0; 287fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes } 288fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 2896fe7a0dc01887e39f028d8d400b98d199744c18fBrian Paul FREXPF( a, &a_ex ); 2906fe7a0dc01887e39f028d8d400b98d199744c18fBrian Paul FREXPF( b, &b_ex ); 2916fe7a0dc01887e39f028d8d400b98d199744c18fBrian Paul FREXPF( d, &d_ex ); 292fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 293fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes if ( a_ex < b_ex ) { 294fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes return a_ex - d_ex; 295fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes } else { 296fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes return b_ex - d_ex; 297fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes } 298fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes} 299fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 300fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughesenum { NIL = 0, ONE = 1, NEG = -1, VAR = 2 }; 301fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 302fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes/* Ensure our arrays are correctly aligned. 303fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes */ 304fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#if defined(__GNUC__) 305b8f29f29eb611c92d43aaf8ffcd2d9743b3af967Brian Paul# define ALIGN16(type, array) type array __attribute__ ((aligned (16))) 306f8e50dd796a72c396bb22d414feba75c426c5e7eJosé Fonseca#elif defined(_MSC_VER) 307b8f29f29eb611c92d43aaf8ffcd2d9743b3af967Brian Paul# define ALIGN16(type, array) type array __declspec(align(16)) /* GH: Does this work? */ 3082cdd699a77f7801b1e37b72108a33fa95354e67aKendall Bennett#elif defined(__WATCOMC__) 309b8f29f29eb611c92d43aaf8ffcd2d9743b3af967Brian Paul# define ALIGN16(type, array) /* Watcom does not support this */ 310cbc96b705d1dc1c6aedba29ad6ce16d526e3b50eBrian Paul#elif defined(__xlC__) 311cbc96b705d1dc1c6aedba29ad6ce16d526e3b50eBrian Paul# define ALIGN16(type, array) type __align (16) array 312fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#else 313fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes# warning "ALIGN16 will not 16-byte align!\n" 314fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes# define ALIGN16 315fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#endif 316fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 317fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 31881a22ef53953d950052c7bd5a282e96107a25f24Brian Paul#endif /* DEBUG_MATH */ 319fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes 320fe69cb4b9bff800b6078ea7da5ea18bab05678d8Gareth Hughes#endif /* __M_DEBUG_UTIL_H__ */ 321