1#include <stdio.h> 2#include <stdlib.h> 3#include <assert.h> 4#include <cutils/memory.h> 5#include <time.h> 6 7/* 8 * All systems must implement or emulate the rdhwr instruction to read 9 * the userlocal register. Systems that emulate also return teh count register 10 * when accessing register $2 so this should work on most systems 11 */ 12#define USE_RDHWR 13 14#ifdef USE_RDHWR 15#define UNITS "cycles" 16#define SCALE 2 /* Most CPU's */ 17static inline uint32_t 18get_count(void) 19{ 20 uint32_t res; 21 asm volatile (".set push; .set mips32r2; rdhwr %[res],$2; .set pop" : [res] "=r" (res) : : "memory"); 22 return res; 23} 24#else 25#define UNITS "ns" 26#define SCALE 1 27static inline uint32_t 28get_count(void) 29{ 30 struct timespec now; 31 uint32_t res; 32 clock_gettime(CLOCK_REALTIME, &now); 33 res = (uint32_t)(now.tv_sec * 1000000000LL + now.tv_nsec); 34 // printf ("now=%d.%09d res=%d\n", (int)now.tv_sec, (int)now.tv_nsec, res); 35 return res; 36} 37#endif 38 39uint32_t overhead; 40void 41measure_overhead(void) 42{ 43 int i; 44 uint32_t start, stop, delta; 45 for (i = 0; i < 32; i++) { 46 start = get_count(); 47 stop = get_count(); 48 delta = stop - start; 49 if (overhead == 0 || delta < overhead) 50 overhead = delta; 51 } 52 printf("overhead is %d"UNITS"\n", overhead); 53} 54 55uint32_t 56timeone(void (*fn)(), void *d, uint32_t val, uint32_t bytes) 57{ 58 uint32_t start, stop, delta; 59 start = get_count(); 60 (*fn)(d, val, bytes); 61 stop = get_count(); 62 delta = stop - start - overhead; 63 // printf ("start=0x%08x stop=0x%08x delta=0x%08x\n", start, stop, delta); 64 return delta * SCALE; 65} 66 67/* define VERIFY to check that memset only touches the bytes it's supposed to */ 68/*#define VERIFY*/ 69 70/* 71 * Using a big arena means that memset will most likely miss in the cache 72 * NB Enabling verification effectively warms up the cache... 73 */ 74#define ARENASIZE 0x1000000 75#ifdef VERIFY 76char arena[ARENASIZE+8]; /* Allow space for guard words */ 77#else 78char arena[ARENASIZE]; 79#endif 80 81void 82testone(char *tag, void (*fn)(), int trials, int minbytes, int maxbytes, int size, int threshold) 83{ 84 int offset; 85 void *d; 86 void *p; 87 uint32_t v, notv = 0; 88 uint32_t n; 89 int i, units; 90 int totalunits = 0, totalbytes = 0, samples = 0; 91 92 /* Reset RNG to ensure each test uses same random values */ 93 srand(0); /* FIXME should be able to use some other seed than 0 */ 94 95 for (i = 0; i < trials; i++) { 96 n = minbytes + (rand() % (maxbytes-minbytes)); /* How many bytes to do */ 97 offset = ((rand() % (ARENASIZE-n))); /* Where to start */ 98 99#ifdef VERIFY 100 offset += 4; /* Allow space for guard word at beginning */ 101#endif 102 v = rand(); 103 104 /* Adjust alignment and sizes based on transfer size */ 105 switch (size) { 106 case 1: 107 v &= 0xff; 108 notv = ~v & 0xff; 109 break; 110 case 2: 111 v &= 0xffff; 112 notv = ~v & 0xffff; 113 offset &= ~1; 114 n &= ~1; 115 break; 116 case 4: 117 notv = ~v; 118 offset &= ~3; 119 n &= ~3; 120 break; 121 } 122 123 d = &arena[offset]; 124 125#ifdef VERIFY 126 /* Initialise the area and guard words */ 127 for (p = &arena[offset-4]; p < (void *)&arena[offset+n+4]; p = (void *)((uint32_t)p + size)) { 128 if (size == 1) 129 *(uint8_t *)p = notv; 130 else if (size == 2) 131 *(uint16_t *)p = notv; 132 else if (size == 4) 133 *(uint32_t *)p = notv; 134 } 135#endif 136 units = timeone(fn, d, v, n); 137#ifdef VERIFY 138 /* Check the area and guard words */ 139 for (p = &arena[offset-4]; p < (void *)&arena[offset+n+4]; p = (void *)((uint32_t)p + size)) { 140 uint32_t got = 0; 141 if (size == 1) 142 got = *(uint8_t *)p; 143 else if (size == 2) 144 got = *(uint16_t *)p; 145 else if (size == 4) 146 got = *(uint32_t *)p; 147 if (p < (void *)&arena[offset]) { 148 if (got != notv) 149 printf ("%s: verify failure: preguard:%p d=%p v=%08x got=%08x n=%d\n", tag, p, d, v, got, n); 150 } 151 else if (p < (void *)&arena[offset+n]) { 152 if (got != v) 153 printf ("%s: verify failure: arena:%p d=%p v=%08x got=%08x n=%d\n", tag, p, d, v, n); 154 } 155 else { 156 if (got != notv) 157 printf ("%s: verify failure: postguard:%p d=%p v=%08x got=%08x n=%d\n", tag, p, d, v, n); 158 } 159 } 160#endif 161 162 /* If the cycle count looks reasonable include it in the statistics */ 163 if (units < threshold) { 164 totalbytes += n; 165 totalunits += units; 166 samples++; 167 } 168 } 169 170 printf("%s: samples=%d avglen=%d avg" UNITS "=%d bp"UNITS"=%g\n", 171 tag, samples, totalbytes/samples, totalunits/samples, (double)totalbytes/(double)totalunits); 172} 173 174extern void android_memset32_dumb(uint32_t* dst, uint32_t value, size_t size); 175extern void android_memset16_dumb(uint32_t* dst, uint16_t value, size_t size); 176extern void android_memset32_test(uint32_t* dst, uint32_t value, size_t size); 177extern void android_memset16_test(uint32_t* dst, uint16_t value, size_t size); 178extern void memset_cmips(void* dst, int value, size_t size); 179extern void memset_omips(void* dst, int value, size_t size); 180 181int 182main(int argc, char **argv) 183{ 184 int i; 185 struct { 186 char *type; 187 int trials; 188 int minbytes, maxbytes; 189 } *pp, params[] = { 190 {"small", 10000, 0, 64}, 191 {"medium", 10000, 64, 512}, 192 {"large", 10000, 512, 1280}, 193 {"varied", 10000, 0, 1280}, 194 }; 195#define NPARAMS (sizeof(params)/sizeof(params[0])) 196 struct { 197 char *name; 198 void (*fn)(); 199 int size; 200 } *fp, functions[] = { 201 {"dmemset16", (void (*)())android_memset16_dumb, 2}, 202 {"tmemset16", (void (*)())android_memset16_test, 2}, 203 {"lmemset16", (void (*)())android_memset16, 2}, 204 205 {"dmemset32", (void (*)())android_memset32_dumb, 4}, 206 {"tmemset32", (void (*)())android_memset32_test, 4}, 207 {"lmemset32", (void (*)())android_memset32, 4}, 208 209 {"cmemset", (void (*)())memset_cmips, 1}, 210 {"omemset", (void (*)())memset_omips, 1}, 211 {"lmemset", (void (*)())memset, 1}, 212 }; 213#define NFUNCTIONS (sizeof(functions)/sizeof(functions[0])) 214 char tag[40]; 215 int threshold; 216 217 measure_overhead(); 218 219 /* Warm up the page cache */ 220 memset(arena, 0xff, ARENASIZE); /* use 0xff now to avoid COW later */ 221 222 for (fp = functions; fp < &functions[NFUNCTIONS]; fp++) { 223 (fp->fn)(arena, 0xffffffff, ARENASIZE); /* one call to get the code into Icache */ 224 for (pp = params; pp < ¶ms[NPARAMS]; pp++) { 225 sprintf(tag, "%10s: %7s %4d-%4d", fp->name, pp->type, pp->minbytes, pp->maxbytes); 226 227 /* Set the cycle threshold */ 228 threshold = pp->maxbytes * 4 * 10; /* reasonable for cycles and ns */ 229 testone(tag, fp->fn, pp->trials, pp->minbytes, pp->maxbytes, fp->size, threshold); 230 } 231 printf ("\n"); 232 } 233 234 return 0; 235} 236