1/* File "FastTimes.c" - Original code by Matt Slot <fprefect@ambrosiasw.com>  */
2/* Created 4/24/99    - This file is hereby placed in the public domain       */
3/* Updated 5/21/99    - Calibrate to VIA, add TBR support, renamed functions  */
4/* Updated 10/4/99    - Use AbsoluteToNanoseconds() in case Absolute = double */
5/* Updated 2/15/00    - Check for native Time Manager, no need to calibrate   */
6/* Updated 2/19/00    - Fixed default value for gScale under native Time Mgr  */
7/* Updated 3/21/00    - Fixed ns conversion, create 2 different scale factors */
8/* Updated 5/03/00    - Added copyright and placed into PD. No code changes   */
9/* Updated 8/01/00    - Made "Carbon-compatible" by replacing LMGetTicks()    */
10
11/* This file is Copyright (C) Matt Slot, 1999-2012. It is hereby placed into
12   the public domain. The author makes no warranty as to fitness or stability */
13
14#include <Gestalt.h>
15#include <LowMem.h>
16#include <CodeFragments.h>
17#include <DriverServices.h>
18#include <Timer.h>
19
20#include "FastTimes.h"
21
22#ifdef TARGET_CPU_PPC
23#undef GENERATINGPOWERPC /* stop whining */
24#define GENERATINGPOWERPC TARGET_CPU_PPC
25#endif
26
27/* **** **** **** **** **** **** **** **** **** **** **** **** **** **** **** */
28/* **** **** **** **** **** **** **** **** **** **** **** **** **** **** **** */
29/*
30	On 680x0 machines, we just use Microseconds().
31
32	On PowerPC machines, we try several methods:
33	  * DriverServicesLib is available on all PCI PowerMacs, and perhaps
34	    some NuBus PowerMacs. If it is, we use UpTime() : Overhead = 2.1 µsec.
35	  * The PowerPC 601 has a built-in "real time clock" RTC, and we fall
36	    back to that, accessing it directly from asm. Overhead = 1.3 µsec.
37	  * Later PowerPCs have an accurate "time base register" TBR, and we
38	    fall back to that, access it from PowerPC asm. Overhead = 1.3 µsec.
39	  * We can also try Microseconds() which is emulated : Overhead = 36 µsec.
40
41	On PowerPC machines, we avoid the following:
42	  * OpenTransport is available on all PCI and some NuBus PowerMacs, but it
43	    uses UpTime() if available and falls back to Microseconds() otherwise.
44	  * InputSprocket is available on many PowerMacs, but again it uses
45	    UpTime() if available and falls back to Microseconds() otherwise.
46
47	Another PowerPC note: certain configurations, especially 3rd party upgrade
48	cards, may return inaccurate timings for the CPU or memory bus -- causing
49	skew in various system routines (up to 20% drift!). The VIA chip is very
50	accurate, and it's the basis for the Time Manager and Microseconds().
51	Unfortunately, it's also very slow because the MacOS has to (a) switch to
52	68K and (b) poll for a VIA event.
53
54	We compensate for the drift by calibrating a floating point scale factor
55	between our fast method and the accurate timer at startup, then convert
56	each sample quickly on the fly. I'd rather not have the initialization
57	overhead -- but it's simply necessary for accurate timing. You can drop
58	it down to 30 ticks if you prefer, but that's as low as I'd recommend.
59
60	Under MacOS 9, "new world" Macs (iMacs, B+W G3s and G+W G4s) have a native
61	Time Manager implementation: UpTime(), Microseconds(), and TickCount() are
62	all based on the same underlying counter. This makes it silly to calibrate
63	UpTime() against TickCount(). We now check for this feature using Gestalt(),
64	and skip the whole calibration step if possible.
65
66*/
67/* **** **** **** **** **** **** **** **** **** **** **** **** **** **** **** */
68/* **** **** **** **** **** **** **** **** **** **** **** **** **** **** **** */
69
70#define RTCToNano(w)	((double) (w).hi * 1000000000.0 + (double) (w).lo)
71#define WideTo64bit(w)	(*(UInt64 *) &(w))
72
73/* LMGetTicks() is not in Carbon and TickCount() has a fair bit of overhead,
74   so for speed we always read lowmem directly. This is a Mac OS X no-no, but
75   it always work on those systems that don't have a native Time Manager (ie,
76   anything before MacOS 9) -- regardless whether we are in Carbon or not! */
77#define MyLMGetTicks()	(*(volatile UInt32 *) 0x16A)
78
79#if GENERATINGPOWERPC
80
81static asm UnsignedWide PollRTC(void);
82static asm UnsignedWide PollTBR(void);
83static Ptr FindFunctionInSharedLib(StringPtr libName, StringPtr funcName);
84
85static Boolean			gInited = false;
86static Boolean			gNative = false;
87static Boolean			gUseRTC = false;
88static Boolean			gUseTBR = false;
89static double			gScaleUSec = 1.0 / 1000.0;    /* 1 / ( nsec / usec) */
90static double			gScaleMSec = 1.0 / 1000000.0; /* 1 / ( nsec / msec) */
91
92/* Functions loaded from DriverServicesLib */
93typedef AbsoluteTime 	(*UpTimeProcPtr)(void);
94typedef Nanoseconds 	(*A2NSProcPtr)(AbsoluteTime);
95static UpTimeProcPtr 	gUpTime = NULL;
96static A2NSProcPtr 		gA2NS = NULL;
97
98#endif /* GENERATINGPOWERPC */
99
100/* **** **** **** **** **** **** **** **** **** **** **** **** **** **** **** */
101/* **** **** **** **** **** **** **** **** **** **** **** **** **** **** **** */
102
103void FastInitialize() {
104	SInt32			result;
105
106	if (!gInited) {
107
108#if GENERATINGPOWERPC
109
110		/* Initialize the feature flags */
111		gNative = gUseRTC = gUseTBR = false;
112
113		/* We use CFM to find and load needed symbols from shared libraries, so
114		   the application doesn't have to weak-link them, for convenience.   */
115		gUpTime = (UpTimeProcPtr) FindFunctionInSharedLib(
116				"\pDriverServicesLib", "\pUpTime");
117		if (gUpTime) gA2NS = (A2NSProcPtr) FindFunctionInSharedLib(
118				"\pDriverServicesLib", "\pAbsoluteToNanoseconds");
119		if (!gA2NS) gUpTime = nil; /* Pedantic but necessary */
120
121		if (gUpTime) {
122			/* If we loaded UpTime(), then we need to know if the system has
123			   a native implementation of the Time Manager. If so, then it's
124			   pointless to calculate a scale factor against the missing VIA */
125
126			/* gestaltNativeTimeMgr = 4 in some future version of the headers */
127			if (!Gestalt(gestaltTimeMgrVersion, &result) &&
128					(result > gestaltExtendedTimeMgr))
129				gNative = true;
130			}
131		  else {
132			/* If no DriverServicesLib, use Gestalt() to get the processor type.
133			   Only NuBus PowerMacs with old System Software won't have DSL, so
134			   we know it should either be a 601 or 603. */
135
136			/* Use the processor gestalt to determine which register to use */
137		 	if (!Gestalt(gestaltNativeCPUtype, &result)) {
138				if (result == gestaltCPU601) gUseRTC = true;
139				  else if (result > gestaltCPU601) gUseTBR = true;
140				}
141			}
142
143		/* Now calculate a scale factor to keep us accurate. */
144		if ((gUpTime && !gNative) || gUseRTC || gUseTBR) {
145			UInt64			tick, usec1, usec2;
146			UnsignedWide	wide;
147
148			/* Wait for the beginning of the very next tick */
149			for(tick = MyLMGetTicks() + 1; tick > MyLMGetTicks(); );
150
151			/* Poll the selected timer and prepare it (since we have time) */
152			wide = (gUpTime) ? (*gA2NS)((*gUpTime)()) :
153					((gUseRTC) ? PollRTC() : PollTBR());
154			usec1 = (gUseRTC) ? RTCToNano(wide) : WideTo64bit(wide);
155
156			/* Wait for the exact 60th tick to roll over */
157			while(tick + 60 > MyLMGetTicks());
158
159			/* Poll the selected timer again and prepare it  */
160			wide = (gUpTime) ? (*gA2NS)((*gUpTime)()) :
161					((gUseRTC) ? PollRTC() : PollTBR());
162			usec2 = (gUseRTC) ? RTCToNano(wide) : WideTo64bit(wide);
163
164			/* Calculate a scale value that will give microseconds per second.
165			   Remember, there are actually 60.15 ticks in a second, not 60.  */
166			gScaleUSec = (60.0 * 1000000.0) / ((usec2 - usec1) * 60.15);
167			gScaleMSec = gScaleUSec / 1000.0;
168			}
169
170#endif /* GENERATINGPOWERPC */
171
172		/* We've initialized our globals */
173		gInited = true;
174		}
175	}
176
177/* **** **** **** **** **** **** **** **** **** **** **** **** **** **** **** */
178/* **** **** **** **** **** **** **** **** **** **** **** **** **** **** **** */
179
180UInt64 FastMicroseconds() {
181	UnsignedWide	wide;
182	UInt64			usec;
183
184#if GENERATINGPOWERPC
185	/* Initialize globals the first time we are called */
186	if (!gInited) FastInitialize();
187
188	if (gNative) {
189		/* Use DriverServices if it's available -- it's fast and compatible */
190		wide = (*gA2NS)((*gUpTime)());
191		usec = (double) WideTo64bit(wide) * gScaleUSec + 0.5;
192		}
193	  else if (gUpTime) {
194		/* Use DriverServices if it's available -- it's fast and compatible */
195		wide = (*gA2NS)((*gUpTime)());
196		usec = (double) WideTo64bit(wide) * gScaleUSec + 0.5;
197		}
198	  else if (gUseTBR) {
199		/* On a recent PowerPC, we poll the TBR directly */
200		wide = PollTBR();
201		usec = (double) WideTo64bit(wide) * gScaleUSec + 0.5;
202		}
203	  else if (gUseRTC) {
204		/* On a 601, we can poll the RTC instead */
205		wide = PollRTC();
206		usec = (double) RTCToNano(wide) * gScaleUSec + 0.5;
207		}
208	  else
209#endif /* GENERATINGPOWERPC */
210		{
211		/* If all else fails, suffer the mixed mode overhead */
212		Microseconds(&wide);
213		usec = WideTo64bit(wide);
214		}
215
216	return(usec);
217	}
218
219/* **** **** **** **** **** **** **** **** **** **** **** **** **** **** **** */
220/* **** **** **** **** **** **** **** **** **** **** **** **** **** **** **** */
221
222UInt64 FastMilliseconds() {
223	UnsignedWide	wide;
224	UInt64			msec;
225
226#if GENERATINGPOWERPC
227	/* Initialize globals the first time we are called */
228	if (!gInited) FastInitialize();
229
230	if (gNative) {
231		/* Use DriverServices if it's available -- it's fast and compatible */
232		wide = (*gA2NS)((*gUpTime)());
233		msec = (double) WideTo64bit(wide) * gScaleMSec + 0.5;
234		}
235	  else if (gUpTime) {
236		/* Use DriverServices if it's available -- it's fast and compatible */
237		wide = (*gA2NS)((*gUpTime)());
238		msec = (double) WideTo64bit(wide) * gScaleMSec + 0.5;
239		}
240	  else if (gUseTBR) {
241		/* On a recent PowerPC, we poll the TBR directly */
242		wide = PollTBR();
243		msec = (double) WideTo64bit(wide) * gScaleMSec + 0.5;
244		}
245	  else if (gUseRTC) {
246		/* On a 601, we can poll the RTC instead */
247		wide = PollRTC();
248		msec = (double) RTCToNano(wide) * gScaleMSec + 0.5;
249		}
250	  else
251#endif /* GENERATINGPOWERPC */
252		{
253		/* If all else fails, suffer the mixed mode overhead */
254		Microseconds(&wide);
255		msec = ((double) WideTo64bit(wide) + 500.0) / 1000.0;
256		}
257
258	return(msec);
259	}
260
261/* **** **** **** **** **** **** **** **** **** **** **** **** **** **** **** */
262/* **** **** **** **** **** **** **** **** **** **** **** **** **** **** **** */
263
264StringPtr FastMethod() {
265	StringPtr	method = "\p<Unknown>";
266
267#if GENERATINGPOWERPC
268	/* Initialize globals the first time we are called */
269	if (!gInited) FastInitialize();
270
271	if (gNative) {
272		/* The Time Manager and UpTime() are entirely native on this machine */
273		method = "\pNative UpTime()";
274		}
275	  else if (gUpTime) {
276		/* Use DriverServices if it's available -- it's fast and compatible */
277		method = "\pUpTime()";
278		}
279	  else if (gUseTBR) {
280		/* On a recent PowerPC, we poll the TBR directly */
281		method = "\pPowerPC TBR";
282		}
283	  else if (gUseRTC) {
284		/* On a 601, we can poll the RTC instead */
285		method = "\pPowerPC RTC";
286		}
287	  else
288#endif /* GENERATINGPOWERPC */
289		{
290		/* If all else fails, suffer the mixed mode overhead */
291		method = "\pMicroseconds()";
292		}
293
294	return(method);
295	}
296
297/* **** **** **** **** **** **** **** **** **** **** **** **** **** **** **** */
298/* **** **** **** **** **** **** **** **** **** **** **** **** **** **** **** */
299#pragma mark -
300
301#if GENERATINGPOWERPC
302asm static UnsignedWide PollRTC_() {
303entry PollRTC /* Avoid CodeWarrior glue */
304	machine 601
305@AGAIN:
306	mfrtcu	r4 /* RTCU = SPR 4 */
307	mfrtcl	r5 /* RTCL = SPR 5 */
308	mfrtcu	r6
309	cmpw	r4,r6
310	bne		@AGAIN
311	stw		r4,0(r3)
312	stw		r5,4(r3)
313	blr
314	}
315
316/* **** **** **** **** **** **** **** **** **** **** **** **** **** **** **** */
317/* **** **** **** **** **** **** **** **** **** **** **** **** **** **** **** */
318
319asm static UnsignedWide PollTBR_() {
320entry PollTBR /* Avoid CodeWarrior glue */
321	machine 604
322@AGAIN:
323	mftbu	r4 /* TBRU = SPR 268 */
324	mftb	r5 /* TBRL = SPR 269 */
325	mftbu	r6
326	cmpw	r4,r6
327	bne		@AGAIN
328	stw		r4,0(r3)
329	stw		r5,4(r3)
330	blr
331	}
332
333/* **** **** **** **** **** **** **** **** **** **** **** **** **** **** **** */
334/* **** **** **** **** **** **** **** **** **** **** **** **** **** **** **** */
335
336static Ptr FindFunctionInSharedLib(StringPtr libName, StringPtr funcName) {
337	OSErr				error = noErr;
338	Str255				errorStr;
339	Ptr					func = NULL;
340	Ptr					entry = NULL;
341	CFragSymbolClass	symClass;
342	CFragConnectionID	connID;
343
344	/* Find CFM containers for the current archecture -- CFM-PPC or CFM-68K */
345	if (/* error = */ GetSharedLibrary(libName, kCompiledCFragArch,
346			kLoadCFrag, &connID, &entry, errorStr)) return(NULL);
347	if (/* error = */ FindSymbol(connID, funcName, &func, &symClass))
348		return(NULL);
349
350	return(func);
351	}
352#endif /* GENERATINGPOWERPC */
353