1#include <stdio.h>
2#include <stdlib.h>
3#include <assert.h>
4#include <cutils/memory.h>
5#include <time.h>
6
7/*
8 * All systems must implement or emulate the rdhwr instruction to read
9 * the userlocal register. Systems that emulate also return teh count register
10 * when accessing register $2 so this should work on most systems
11 */
12#define USE_RDHWR
13
14#ifdef USE_RDHWR
15#define UNITS "cycles"
16#define SCALE 2			/* Most CPU's */
17static inline uint32_t
18get_count(void)
19{
20  uint32_t res;
21  asm volatile (".set push; .set mips32r2; rdhwr %[res],$2; .set pop" : [res] "=r" (res) : : "memory");
22  return res;
23}
24#else
25#define UNITS "ns"
26#define SCALE 1
27static inline uint32_t
28get_count(void)
29{
30  struct timespec now;
31  uint32_t res;
32  clock_gettime(CLOCK_REALTIME, &now);
33  res = (uint32_t)(now.tv_sec * 1000000000LL + now.tv_nsec);
34  // printf ("now=%d.%09d res=%d\n", (int)now.tv_sec, (int)now.tv_nsec, res);
35  return res;
36}
37#endif
38
39uint32_t overhead;
40void
41measure_overhead(void)
42{
43  int i;
44  uint32_t start, stop, delta;
45  for (i = 0; i < 32; i++) {
46    start = get_count();
47    stop = get_count();
48    delta = stop - start;
49    if (overhead == 0 || delta < overhead)
50      overhead = delta;
51  }
52  printf("overhead is %d"UNITS"\n", overhead);
53}
54
55uint32_t
56timeone(void (*fn)(), void *d, uint32_t val, uint32_t bytes)
57{
58  uint32_t start, stop, delta;
59  start = get_count();
60  (*fn)(d, val, bytes);
61  stop = get_count();
62  delta = stop - start - overhead;
63  // printf ("start=0x%08x stop=0x%08x delta=0x%08x\n", start, stop, delta);
64  return delta * SCALE;
65}
66
67/* define VERIFY to check that memset only touches the bytes it's supposed to */
68/*#define VERIFY*/
69
70/*
71 * Using a big arena means that memset will most likely miss in the cache
72 * NB Enabling verification effectively warms up the cache...
73 */
74#define ARENASIZE 0x1000000
75#ifdef VERIFY
76char arena[ARENASIZE+8];	/* Allow space for guard words */
77#else
78char arena[ARENASIZE];
79#endif
80
81void
82testone(char *tag, void (*fn)(), int trials, int minbytes, int maxbytes, int size, int threshold)
83{
84  int offset;
85  void *d;
86  void *p;
87  uint32_t v, notv = 0;
88  uint32_t n;
89  int i, units;
90  int totalunits = 0, totalbytes = 0, samples = 0;
91
92  /* Reset RNG to ensure each test uses same random values */
93  srand(0);			/* FIXME should be able to use some other seed than 0 */
94
95  for (i = 0; i < trials; i++) {
96    n = minbytes + (rand() % (maxbytes-minbytes));	/* How many bytes to do */
97    offset = ((rand() % (ARENASIZE-n)));		/* Where to start */
98
99#ifdef VERIFY
100    offset += 4;		/* Allow space for guard word at beginning */
101#endif
102    v = rand();
103
104    /* Adjust alignment and sizes based on transfer size */
105    switch (size) {
106    case 1:
107      v &= 0xff;
108      notv = ~v & 0xff;
109      break;
110    case 2:
111      v &= 0xffff;
112      notv = ~v & 0xffff;
113      offset &= ~1;
114      n &= ~1;
115      break;
116    case 4:
117      notv = ~v;
118      offset &= ~3;
119      n &= ~3;
120      break;
121    }
122
123    d = &arena[offset];
124
125#ifdef VERIFY
126    /* Initialise the area and guard words */
127    for (p = &arena[offset-4]; p < (void *)&arena[offset+n+4]; p = (void *)((uint32_t)p + size)) {
128      if (size == 1)
129	*(uint8_t *)p = notv;
130      else if (size == 2)
131	*(uint16_t *)p = notv;
132      else if (size == 4)
133	*(uint32_t *)p = notv;
134    }
135#endif
136    units = timeone(fn, d, v, n);
137#ifdef VERIFY
138    /* Check the area and guard words */
139    for (p = &arena[offset-4]; p < (void *)&arena[offset+n+4]; p = (void *)((uint32_t)p + size)) {
140      uint32_t got = 0;
141      if (size == 1)
142	got = *(uint8_t *)p;
143      else if (size == 2)
144	got = *(uint16_t *)p;
145      else if (size == 4)
146	got = *(uint32_t *)p;
147      if (p < (void *)&arena[offset]) {
148	if (got != notv)
149	  printf ("%s: verify failure: preguard:%p d=%p v=%08x got=%08x n=%d\n", tag, p, d, v, got, n);
150      }
151      else if (p < (void *)&arena[offset+n]) {
152	if (got != v)
153	  printf ("%s: verify failure: arena:%p d=%p v=%08x got=%08x n=%d\n", tag, p, d, v, n);
154      }
155      else {
156	if (got != notv)
157	  printf ("%s: verify failure: postguard:%p d=%p v=%08x got=%08x n=%d\n", tag, p, d, v, n);
158      }
159    }
160#endif
161
162    /* If the cycle count looks reasonable include it in the statistics */
163    if (units < threshold) {
164      totalbytes += n;
165      totalunits += units;
166      samples++;
167    }
168  }
169
170  printf("%s: samples=%d avglen=%d avg" UNITS "=%d bp"UNITS"=%g\n",
171	 tag, samples, totalbytes/samples, totalunits/samples, (double)totalbytes/(double)totalunits);
172}
173
174extern void android_memset32_dumb(uint32_t* dst, uint32_t value, size_t size);
175extern void android_memset16_dumb(uint32_t* dst, uint16_t value, size_t size);
176extern void android_memset32_test(uint32_t* dst, uint32_t value, size_t size);
177extern void android_memset16_test(uint32_t* dst, uint16_t value, size_t size);
178extern void memset_cmips(void* dst, int value, size_t size);
179extern void memset_omips(void* dst, int value, size_t size);
180
181int
182main(int argc, char **argv)
183{
184  int i;
185  struct {
186    char *type;
187    int trials;
188    int minbytes, maxbytes;
189  } *pp, params[] = {
190    {"small",  10000,   0,   64},
191    {"medium", 10000,  64,  512},
192    {"large",  10000, 512, 1280},
193    {"varied", 10000,   0, 1280},
194  };
195#define NPARAMS (sizeof(params)/sizeof(params[0]))
196  struct {
197    char *name;
198    void (*fn)();
199    int size;
200  } *fp, functions[] = {
201    {"dmemset16", (void (*)())android_memset16_dumb, 2},
202    {"tmemset16", (void (*)())android_memset16_test, 2},
203    {"lmemset16", (void (*)())android_memset16,      2},
204
205    {"dmemset32", (void (*)())android_memset32_dumb, 4},
206    {"tmemset32", (void (*)())android_memset32_test, 4},
207    {"lmemset32", (void (*)())android_memset32,      4},
208
209    {"cmemset",    (void (*)())memset_cmips,         1},
210    {"omemset",    (void (*)())memset_omips,         1},
211    {"lmemset",    (void (*)())memset,               1},
212  };
213#define NFUNCTIONS (sizeof(functions)/sizeof(functions[0]))
214  char tag[40];
215  int threshold;
216
217  measure_overhead();
218
219  /* Warm up the page cache */
220  memset(arena, 0xff, ARENASIZE); /* use 0xff now to avoid COW later */
221
222  for (fp = functions; fp < &functions[NFUNCTIONS]; fp++) {
223    (fp->fn)(arena, 0xffffffff, ARENASIZE);	/* one call to get the code into Icache */
224    for (pp = params; pp < &params[NPARAMS]; pp++) {
225      sprintf(tag, "%10s: %7s %4d-%4d", fp->name, pp->type, pp->minbytes, pp->maxbytes);
226
227      /* Set the cycle threshold */
228      threshold = pp->maxbytes * 4 * 10;	/* reasonable for cycles and ns */
229      testone(tag, fp->fn, pp->trials, pp->minbytes, pp->maxbytes, fp->size, threshold);
230    }
231    printf ("\n");
232  }
233
234  return 0;
235}
236