memtest.cpp revision 7341494707810f709855ea85ce03a8ec3ac8dbaf
189fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project/*
289fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project * Copyright (C) 2007 The Android Open Source Project
389fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project *
489fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project * Licensed under the Apache License, Version 2.0 (the "License");
589fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project * you may not use this file except in compliance with the License.
689fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project * You may obtain a copy of the License at
789fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project *
889fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project *      http://www.apache.org/licenses/LICENSE-2.0
989fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project *
1089fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project * Unless required by applicable law or agreed to in writing, software
1189fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project * distributed under the License is distributed on an "AS IS" BASIS,
1289fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1389fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project * See the License for the specific language governing permissions and
1489fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project * limitations under the License.
1589fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project */
1689fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project
1789fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project#include <stdio.h>
1889fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project#include <stdlib.h>
1989fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project#include <string.h>
2089fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project#include <sys/time.h>
2189fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project#include <time.h>
2289fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project#include <unistd.h>
2389fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project#include <sched.h>
2489fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project#include <sys/resource.h>
2589fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project#include <sys/syscall.h>
2689fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project#include <sys/types.h>
2789fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project#include <sys/mman.h>
2889fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project
2989fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project#if 0
3089fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Projectconst int DCACHE_SIZE = 8*1024;
3189fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Projectconst int CPU_FREQ_EST = 195;
3289fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Projectconst int BRANCH_CYCLE = 3;
3389fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project#else
3489fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Projectconst int DCACHE_SIZE  = 32*1024;
3589fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Projectconst int CPU_FREQ_EST = 384;
3689fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Projectconst int BRANCH_CYCLE = 2;
3789fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project#endif
3889fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project
3989fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Projecttypedef long long nsecs_t;
4089fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project
4189fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Projectstatic nsecs_t system_time()
4289fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project{
4389fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project    struct timespec t;
4489fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project    t.tv_sec = t.tv_nsec = 0;
4589fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project    clock_gettime(CLOCK_MONOTONIC, &t);
4689fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project    return nsecs_t(t.tv_sec)*1000000000LL + t.tv_nsec;
4789fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project}
4889fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project
4989fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Projectnsecs_t loop_overhead(size_t count) __attribute__((noinline));
5089fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Projectnsecs_t loop_overhead(size_t count)
5189fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project{
5289fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project    nsecs_t overhead = -system_time();
5389fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project    do {
5489fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project        asm volatile ("":::"memory");
5589fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project    } while (--count);
5689fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project    overhead += system_time();
5789fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project    return overhead;
5889fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project}
5989fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project
6089fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Projectstatic void preload(volatile char* addr, size_t s)
6189fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project{
6289fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project    for (size_t i=0 ; i<s ; i+=32) {
6389fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project        char c = addr[i];
6489fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project        (void)c;
6589fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project    }
6689fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project}
6789fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project
6889fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Projectstatic void usage(char* p) {
6989fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project    printf( "Usage: %s <test> <options>\n"
7089fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project            "<test> is one of the following:\n"
7189fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project            "       cpufreq\n"
7289fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project            "       memcpy [perf [fast] | test]\n"
7389fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project            "       memset [perf | test]\n"
7489fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project            "       memcmp [perf | test]\n"
7589fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project            "       strlen [perf | test]\n"
7689fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project            "       malloc [fill]\n"
7789fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project            "       madvise\n"
7889fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project            "       resampler\n"
7989fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project            "       crash\n"
8089fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project            "       stack (stack smasher)\n"
8189fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project            "       crawl\n"
8289fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project            , p);
8389fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project}
8489fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project
8589fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Projectint cpufreq_test(int argc, char** argv);
8689fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Projectint memcpy_test(int argc, char** argv);
8789fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Projectint memset_test(int argc, char** argv);
8889fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Projectint memcmp_test(int argc, char** argv);
8989fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Projectint strlen_test(int argc, char** argv);
9089fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Projectint malloc_test(int argc, char** argv);
9189fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Projectint madvise_test(int argc, char** argv);
9289fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Projectint crash_test(int argc, char** argv);
9389fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Projectint stack_smasher_test(int argc, char** argv);
9489fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Projectint crawl_test(int argc, char** argv);
9589fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project
9689fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project#if 0
970049acfc8b1c33eb90afc925dacd08a487618e17Glenn Kasten#pragma mark -
9889fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project#pragma mark main
99a23856c0ad1f49a6ebcb71d3f63f329edc999a72Glenn Kasten#endif
100a23856c0ad1f49a6ebcb71d3f63f329edc999a72Glenn Kasten
101a23856c0ad1f49a6ebcb71d3f63f329edc999a72Glenn Kastenint main(int argc, char** argv)
102a23856c0ad1f49a6ebcb71d3f63f329edc999a72Glenn Kasten{
103a23856c0ad1f49a6ebcb71d3f63f329edc999a72Glenn Kasten    if (argc == 1) {
104a23856c0ad1f49a6ebcb71d3f63f329edc999a72Glenn Kasten        usage(argv[0]);
105a23856c0ad1f49a6ebcb71d3f63f329edc999a72Glenn Kasten        return 0;
106a23856c0ad1f49a6ebcb71d3f63f329edc999a72Glenn Kasten    }
107a23856c0ad1f49a6ebcb71d3f63f329edc999a72Glenn Kasten    int err = -1;
108a23856c0ad1f49a6ebcb71d3f63f329edc999a72Glenn Kasten    if      (!strcmp(argv[1], "cpufreq"))   err = cpufreq_test(argc-1, argv+1);
109a23856c0ad1f49a6ebcb71d3f63f329edc999a72Glenn Kasten    else if (!strcmp(argv[1], "memcpy"))    err = memcpy_test(argc-1, argv+1);
110a23856c0ad1f49a6ebcb71d3f63f329edc999a72Glenn Kasten    else if (!strcmp(argv[1], "memset"))    err = memset_test(argc-1, argv+1);
111a23856c0ad1f49a6ebcb71d3f63f329edc999a72Glenn Kasten    else if (!strcmp(argv[1], "memcmp"))    err = memcmp_test(argc-1, argv+1);
112a23856c0ad1f49a6ebcb71d3f63f329edc999a72Glenn Kasten    else if (!strcmp(argv[1], "strlen"))    err = strlen_test(argc-1, argv+1);
113a23856c0ad1f49a6ebcb71d3f63f329edc999a72Glenn Kasten    else if (!strcmp(argv[1], "malloc"))    err = malloc_test(argc-1, argv+1);
114a23856c0ad1f49a6ebcb71d3f63f329edc999a72Glenn Kasten    else if (!strcmp(argv[1], "madvise"))   err = madvise_test(argc-1, argv+1);
115a23856c0ad1f49a6ebcb71d3f63f329edc999a72Glenn Kasten    else if (!strcmp(argv[1], "crash"))     err = crash_test(argc-1, argv+1);
116a23856c0ad1f49a6ebcb71d3f63f329edc999a72Glenn Kasten    else if (!strcmp(argv[1], "stack"))     err = stack_smasher_test(argc-1, argv+1);
117a23856c0ad1f49a6ebcb71d3f63f329edc999a72Glenn Kasten    else if (!strcmp(argv[1], "crawl"))     err = crawl_test(argc-1, argv+1);
118a23856c0ad1f49a6ebcb71d3f63f329edc999a72Glenn Kasten    if (err) {
119a23856c0ad1f49a6ebcb71d3f63f329edc999a72Glenn Kasten        usage(argv[0]);
120a23856c0ad1f49a6ebcb71d3f63f329edc999a72Glenn Kasten    }
12189fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project    return 0;
12289fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project}
12389fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project
12489fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project#if 0
12589fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project#pragma mark -
12689fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project#pragma mark memcpy
12789fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project#endif
12889fa4ad53f2f4d57adbc97ae1149fc00c9b6f3c5The Android Open Source Project
129int validate_memcpy(char* s, char* d, size_t size);
130int validate_memset(char* s, char c, size_t size);
131
132int memcpy_test(int argc, char** argv)
133{
134    int option = 0;
135    if (argc >= 2) {
136        if (!strcmp(argv[1], "perf"))       option = 0;
137        else if (!strcmp(argv[1], "test"))  option = 1;
138        else                                return -1;
139    }
140
141    const int MAX_SIZE = 1024*1024; // 1MB
142    const int CACHED_SPEED_EST = CPU_FREQ_EST*1024*1024; // 150 MB/s
143    const int UNCACHED_SPEED_EST = (CPU_FREQ_EST/4)*1024*1024; // 60 MB/s
144    char* src = (char*)malloc(MAX_SIZE+4+8+32);
145    char* dst = (char*)malloc(MAX_SIZE+4+8+32);
146    memset(src, 0, MAX_SIZE+4+8+32);
147    memset(dst, 0, MAX_SIZE+4+8+32);
148
149    if (option == 0) {
150        bool fast = (argc>=3 && !strcmp(argv[2], "fast"));
151        printf("memcpy() performance test is running, please wait...\n");
152        fflush(stdout);
153        usleep(10000);
154        setpriority(PRIO_PROCESS, 0, -20);
155        static int FAST_SIZES[] = { 1024, DCACHE_SIZE/2, DCACHE_SIZE, DCACHE_SIZE*2, MAX_SIZE };
156
157        struct result_t { int size; float res; };
158        result_t* results = (result_t*)src;
159        int nbr = 0;
160        int size = 0;
161        for (int i=0 ; ; i++) {
162            if (!fast) {
163                if (size<128)          size += 8;
164                else if (size<1024)    size += 128;
165                else if (size<16384)   size += 1024;
166                else                   size <<= 1;
167            } else {
168                if (size_t(i) >= sizeof(FAST_SIZES)/sizeof(FAST_SIZES[0]))
169                    break;
170                size = FAST_SIZES[i];
171            }
172            if (size > MAX_SIZE) {
173                break;
174            }
175
176            const int REPEAT = (((size < DCACHE_SIZE) ?
177                        (CACHED_SPEED_EST) : (UNCACHED_SPEED_EST)) / size) / 2;
178                                // ~0.5 second per test
179
180            const nsecs_t overhead = loop_overhead(REPEAT);
181
182            // tweak to make it a bad case
183            char* ddd = (char*)((long(dst+31)&~31) + 4);
184            char* sss = (char*)((long(src+31)&~31) + 28);
185
186            for (int offset=0 ; offset<=2 ; offset +=2 ) {
187                memcpy(dst, src, size); // just make sure to load the caches I/D
188                nsecs_t t = -system_time();
189                register int count = REPEAT;
190                do {
191                    memcpy(ddd, sss+offset, size);
192                } while (--count);
193                t += system_time() - overhead;
194                const float throughput = (size*1000000000.0f*REPEAT) / (1024*1024*t);
195                results[nbr].size = size;
196                results[nbr].res = throughput;
197                nbr++;
198            }
199        }
200
201        printf("%9s %9s %9s\n", "size", "MB/s", "MB/s (nc)");
202        for (int i=0 ; i<nbr ; i+=2) {
203            printf("%9d %9ld %9ld\n", results[i].size, (long)results[i].res, (long)results[i+1].res);
204        }
205    } else if (option == 1) {
206        printf("memcpy() validation test is running, please wait...\n");
207        fflush(stdout);
208        char* curr = (char*)src;
209        for (int i=0 ; i<MAX_SIZE ; i++) {
210            char c = rand();
211            *curr++ = c != 0x55 ? c : 0xAA;
212        }
213        char* s = src + 1024;
214        char* d = dst + 1024;
215        int nb = 0;
216        for (int size=0 ; size<4096 && !nb ; size++) {
217            nb += validate_memcpy(s, d, size);
218            for (int o=1 ; o<32 && !nb ; o++) {
219                nb += validate_memcpy(s+o, d, size);
220                nb += validate_memcpy(s, d+o, size);
221                nb += validate_memcpy(s+o, d+o, size);
222            }
223        }
224        if (nb) printf("%d error(s) found\n", nb);
225        else    printf("success!\n");
226    }
227    fflush(stdout);
228    free(dst);
229    free(src);
230    return 0;
231}
232
233int validate_memcpy(char* s, char* d, size_t size)
234{
235    int nberr = 0;
236    memset(d-4, 0x55, size+8);
237    memcpy(s, d, size);
238    if (memcmp(s,d,size)) {
239        printf("*** memcpy(%p,%p,%lu) destination != source\n",s,d,size);
240        nberr++;
241    }
242    bool r = (d[size]==0x55)&&(d[size+1]==0x55)&&(d[size+2]==0x55)&&(d[size+3]==0x55);
243    if (!r) {
244        printf("*** memcpy(%p,%p,%lu) clobbered past end of destination!\n",s,d,size);
245        nberr++;
246    }
247    r = (d[-1]==0x55)&&(d[-2]==0x55)&&(d[-3]==0x55)&&(d[-4]==0x55);
248    if (!r) {
249        printf("*** memcpy(%p,%p,%lu) clobbered before start of destination!\n",s,d,size);
250        nberr++;
251    }
252    return nberr;
253}
254
255
256#if 0
257#pragma mark -
258#pragma mark memset
259#endif
260
261int memset_test(int argc, char** argv)
262{
263    int option = 0;
264    if (argc >= 2) {
265        if (!strcmp(argv[1], "perf"))       option = 0;
266        else if (!strcmp(argv[1], "test"))  option = 1;
267        else                                return -1;
268    }
269
270    const int MAX_SIZE = 1024*1024; // 1MB
271    const int CACHED_SPEED_EST = CPU_FREQ_EST*1024*1024; // 195 MB/s
272    const int UNCACHED_SPEED_EST = CPU_FREQ_EST*1024*1024; // 195 MB/s
273    char* dst = (char*)malloc(MAX_SIZE+4+8);
274
275    if (option == 0) {
276        printf("memset() performance test is running, please wait...\n");
277        fflush(stdout);
278        usleep(10000);
279        setpriority(PRIO_PROCESS, 0, -20);
280
281        static int FAST_SIZES[] = { 1024, DCACHE_SIZE/2, DCACHE_SIZE, DCACHE_SIZE*2, MAX_SIZE };
282        const size_t FAST_SIZES_COUNT = sizeof(FAST_SIZES)/sizeof(FAST_SIZES[0]);
283        struct result_t { int size; float res; };
284        result_t results[FAST_SIZES_COUNT*2];
285        int nbr = 0;
286        int size = 0;
287        for (int i=0 ; ; i++) {
288            if (size_t(i) >= sizeof(FAST_SIZES)/sizeof(FAST_SIZES[0]))
289                break;
290            size = FAST_SIZES[i];
291            if (size > MAX_SIZE) {
292                break;
293            }
294            const int REPEAT = (((size < DCACHE_SIZE) ?
295                        (CACHED_SPEED_EST) : (UNCACHED_SPEED_EST)) / size);
296                                // ~0.5 second per test
297
298            const nsecs_t overhead = loop_overhead(REPEAT);
299
300            for (int j=0 ; j<2 ; j++) {
301                if (j==0)   preload(dst, DCACHE_SIZE*4);   // flush D
302                else        preload(dst, size);            // load D
303                nsecs_t t = -system_time();
304                size_t count = REPEAT;
305                do {
306                    memset(dst, 0, size);
307                } while (--count);
308                t += system_time() - overhead;
309
310                const float throughput = (size*1000000000.0f*REPEAT) / (1024*1024*t);
311                results[nbr].size = size;
312                results[nbr].res = throughput;
313                nbr++;
314            }
315        }
316
317        printf("%9s %9s %9s\n", "size", "MB/s", "MB/s (cached)");
318        for (int i=0 ; i<nbr ; i+=2) {
319            printf("%9d %9ld %9ld\n", results[i].size, (long)results[i].res, (long)results[i+1].res);
320        }
321    } else if (option == 1) {
322        printf("memset() validation test is running, please wait...\n");
323        fflush(stdout);
324        char* d = dst + 1024;
325        int nb = 0;
326        for (int o=1 ; o<32 ; o++) {
327            for (int size=0 ; size<4096 && !nb ; size++) {
328                nb += validate_memset(d, char(o), size);
329                nb += validate_memset(d+o, char(o), size);
330            }
331        }
332        if (nb) printf("%d error(s) found\n", nb);
333        else    printf("success!\n");
334    }
335    fflush(stdout);
336    free(dst);
337    return 0;
338}
339
340int validate_memset(char* d, char c, size_t size)
341{
342    int nberr = 0;
343    for (size_t i=0; i<size ; d[i++]=0xaa) ;
344    d[-1] = 0x55;
345    d[size+1] = 0x55;
346    memset(d, c, size);
347    if (d[size+1]!=0x55) {
348        printf("*** memset(%p,%02x,%lu) clobbered past end of destination!\n",d,(int)c,size);
349        nberr++;
350    }
351    if (d[-1]!=0x55) {
352        printf("*** memset(%p,%02x,%lu) clobbered before start of destination!\n",d,(int)c,size);
353        nberr++;
354    }
355    for (size_t i=0 ; i<size ; i++) {
356        if (d[i] != c) {
357            printf("*** memset(%p,%02x,%lu) failed at offset %lu\n",d,(int)c,size, i);
358            nberr++;
359            break;
360        }
361    }
362    return nberr;
363}
364
365#if 0
366#pragma mark -
367#pragma mark memcmp
368#endif
369
370static int ref_memcmp(const void *s1, const void *s2, size_t n)
371{
372  const unsigned char *c1 = (const unsigned char *)s1, *c2 = (const unsigned char *)s2;
373  int d = 0;
374
375  while ( n-- ) {
376    d = (int)*c1++ - (int)*c2++;
377    if ( d )
378      break;
379  }
380
381  return d;
382}
383
384int validate_memcmp(const char* s, const char* d, size_t size)
385{
386
387    int a = ref_memcmp(s, d, size);
388    int b = memcmp(s, d, size);
389    //printf("%d, %d\n", a, b);
390    if (a != b) {
391        printf("*** memcmp(%p,%p,%lu) failed %d should be %d\n",s,d,size,b,a);
392        return 1;
393    }
394    return 0;
395}
396
397int memcmp_test(int argc, char** argv)
398{
399    int option = 0;
400    if (argc >= 2) {
401        if (!strcmp(argv[1], "perf"))       option = 0;
402        else if (!strcmp(argv[1], "test"))  option = 1;
403        else                                return -1;
404    }
405
406    const int MAX_SIZE = 1024*1024; // 1MB
407    const int CACHED_SPEED_EST = CPU_FREQ_EST*1024*1024; // 150 MB/s
408    const int UNCACHED_SPEED_EST = (CPU_FREQ_EST/4)*1024*1024; // 60 MB/s
409    char* src = (char*)malloc(MAX_SIZE+4+8+32);
410    char* dst = (char*)malloc(MAX_SIZE+4+8+32);
411
412    if (option == 0) {
413        printf("memcmp() performance test is running, please wait...\n");
414        fflush(stdout);
415        usleep(10000);
416        setpriority(PRIO_PROCESS, 0, -20);
417
418        static int FAST_SIZES[] = { 1024, DCACHE_SIZE/2, DCACHE_SIZE, DCACHE_SIZE*2, MAX_SIZE };
419
420        struct result_t { int size; float res; };
421        result_t* results = (result_t*)src;
422        int nbr = 0;
423        int size = 0;
424        for (int i=0 ; ; i++) {
425            if (size_t(i) >= sizeof(FAST_SIZES)/sizeof(FAST_SIZES[0]))
426                break;
427            size = FAST_SIZES[i];
428            if (size > MAX_SIZE) {
429                break;
430            }
431
432            const int REPEAT = (((size < DCACHE_SIZE) ?
433                        (CACHED_SPEED_EST) : (UNCACHED_SPEED_EST)) / size) / 2;
434                                // ~0.5 second per test
435
436            const nsecs_t overhead = loop_overhead(REPEAT);
437
438            // tweak to make it a bad case
439            char* ddd = (char*)((long(dst+31)&~31) + 4);
440            char* sss = (char*)((long(src+31)&~31) + 28);
441
442            for (int offset=0 ; offset<=2 ; offset +=2 ) {
443                memcpy(ddd, sss+offset, size); // just make sure to load the caches I/D
444                nsecs_t t = -system_time();
445                register int count = REPEAT;
446                char c;
447                c = memcmp(ddd, sss+offset, size);
448                //printf("size %d, memcmp -> %d\n", size, (int)c);
449                do {
450                    c = memcmp(ddd, sss+offset, size);
451                    asm volatile (""::"r"(c):"memory");
452                } while (--count);
453                t += system_time() - overhead;
454                const float throughput = (size*1000000000.0f*REPEAT) / (1024*1024*t);
455                results[nbr].size = size;
456                results[nbr].res = throughput;
457                nbr++;
458            }
459        }
460
461        printf("%9s %9s %9s\n", "size", "MB/s", "MB/s (nc)");
462        for (int i=0 ; i<nbr ; i+=2) {
463            printf("%9d %9ld %9ld\n", results[i].size, (long)results[i].res, (long)results[i+1].res);
464        }
465    } else {
466        printf("memcmp() validation test is running, please wait...\n");
467        fflush(stdout);
468
469        const char* const s = (const char*)src + 1024;
470        const char* const d = (const char*)dst + 1024;
471        int nb = 0;
472        for (int j=0 ; j<32 ; j++) {
473
474            char *curr0 = (char*)src;
475            char *curr1 = (char*)dst;
476            for (int i=0 ; i<MAX_SIZE ; i++) {
477                char c = rand();
478                *curr0++ = c;
479                *curr1++ = c;
480            }
481            if (j) {
482                src[1024 + j] ^= 0xFF;
483            }
484
485
486            for (int size=0 ; size<32 && !nb ; size++) {
487                for (int o=0 ; o<4 ; o++) {
488                    nb += validate_memcmp(s+o, d+o, size);
489                }
490               // memmove((char*)d+1, d, size);
491                for (int o=0 ; o<4 ; o++) {
492                    nb += validate_memcmp(s, d+o, size);
493                }
494            }
495        }
496        if (nb) printf("%d error(s) found\n", nb);
497        else    printf("success!\n");
498    }
499    fflush(stdout);
500    free(dst);
501    free(src);
502    return 0;
503}
504
505#if 0
506#pragma mark -
507#pragma mark strlen
508#endif
509
510int strlen_test(int argc, char** argv)
511{
512    int option = 0;
513    if (argc >= 2) {
514        if (!strcmp(argv[1], "perf"))       option = 0;
515        else if (!strcmp(argv[1], "test"))  option = 1;
516        else                                return -1;
517    }
518
519    const int MAX_SIZE = 1024*1024; // 1MB
520    const int CACHED_SPEED_EST = CPU_FREQ_EST*1024*1024; // 195 MB/s
521    const int UNCACHED_SPEED_EST = CPU_FREQ_EST*1024*1024; // 195 MB/s
522    char* str = (char*)calloc(MAX_SIZE+4+8, 1);
523
524    if (option == 0) {
525        printf("strlen() performance test is running, please wait...\n");
526        fflush(stdout);
527        usleep(10000);
528        setpriority(PRIO_PROCESS, 0, -20);
529
530        static int FAST_SIZES[] = { 1024, DCACHE_SIZE/2, DCACHE_SIZE, DCACHE_SIZE*2, MAX_SIZE };
531        const size_t FAST_SIZES_COUNT = sizeof(FAST_SIZES)/sizeof(FAST_SIZES[0]);
532        struct result_t { int size; float res; };
533        result_t results[FAST_SIZES_COUNT*2];
534        int nbr = 0;
535        int size = 0;
536        for (int i=0 ; ; i++) {
537            if (size_t(i) >= sizeof(FAST_SIZES)/sizeof(FAST_SIZES[0]))
538                break;
539            size = FAST_SIZES[i];
540            if (size > MAX_SIZE) {
541                break;
542            }
543            const int REPEAT = (((size < DCACHE_SIZE) ?
544                        (CACHED_SPEED_EST) : (UNCACHED_SPEED_EST)) / size);
545                                // ~0.5 second per test
546
547            const nsecs_t overhead = loop_overhead(REPEAT);
548
549            for (int j=0 ; j<2 ; j++) {
550                memset(str, 'A', size-1);
551                if (j==0)   preload(str, DCACHE_SIZE*4);   // flush D
552                else        preload(str, size);            // load D
553
554                nsecs_t t = -system_time();
555                size_t count = REPEAT;
556                int c=0;
557                do {
558                    c = strlen(str);
559                    asm volatile (""::"r"(c):"memory");
560                } while (--count);
561                t += system_time() - overhead;
562
563                const float throughput = (size*1000000000.0f*REPEAT) / (1024*1024*t);
564                results[nbr].size = size;
565                results[nbr].res = throughput;
566                nbr++;
567            }
568        }
569
570        printf("%9s %9s %9s\n", "size", "MB/s", "MB/s (cached)");
571        for (int i=0 ; i<nbr ; i+=2) {
572            printf("%9d %9ld %9ld\n", results[i].size, (long)results[i].res, (long)results[i+1].res);
573        }
574    }
575
576    fflush(stdout);
577    free(str);
578    return 0;
579}
580
581
582#if 0
583#pragma mark -
584#pragma mark malloc
585#endif
586
587int malloc_test(int argc, char** argv)
588{
589    bool fill = (argc>=2 && !strcmp(argv[1], "fill"));
590    size_t total = 0;
591    size_t size = 0x40000000;
592    while (size) {
593        void* addr = malloc(size);
594        if (addr == 0) {
595            printf("size = %9lu failed\n", size);
596            size >>= 1;
597        } else {
598            total += size;
599            printf("size = %9lu, addr = %p (total = %9lu (%lu MB))\n",
600                    size, addr, total, total / (1024*1024));
601            if (fill) {
602                printf("filling...\n");
603                fflush(stdout);
604                memset(addr, 0, size);
605            }
606            size = size + size>>1;
607        }
608    }
609    printf("done. allocated %lu MB\n", total / (1024*1024));
610    return 0;
611}
612
613#if 0
614#pragma mark -
615#pragma mark madvise
616#endif
617
618int madvise_test(int argc, char** argv)
619{
620    for (int i=0 ; i<2 ; i++) {
621        size_t size = i==0 ? 4096 : 48*1024*1024; // 48 MB
622        printf("Allocating %lu MB... ", size/(1024*1024)); fflush(stdout);
623        void* addr1 = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
624        printf("%p (%s)\n", addr1, addr1==(void*)-1 ? "failed" : "OK"); fflush(stdout);
625
626        printf("touching %p...\n", addr1); fflush(stdout);
627        memset(addr1, 0x55, size);
628
629        printf("advising DONTNEED...\n"); fflush(stdout);
630        madvise(addr1, size, MADV_DONTNEED);
631
632        printf("reading back %p...\n", addr1); fflush(stdout);
633        if (*(long*)addr1 == 0) {
634            printf("madvise freed some pages\n");
635        } else if (*(long*)addr1 == 0x55555555) {
636            printf("pages are still there\n");
637        } else {
638            printf("getting garbage back\n");
639        }
640
641        printf("Allocating %lu MB... ", size/(1024*1024)); fflush(stdout);
642        void* addr2 = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
643        printf("%p (%s)\n", addr2, addr2==(void*)-1 ? "failed" : "OK"); fflush(stdout);
644
645        printf("touching %p...\n", addr2); fflush(stdout);
646        memset(addr2, 0xAA, size);
647
648        printf("unmap %p ...\n", addr2); fflush(stdout);
649        munmap(addr2, size);
650
651        printf("touching %p...\n", addr1); fflush(stdout);
652        memset(addr1, 0x55, size);
653
654        printf("unmap %p ...\n", addr1); fflush(stdout);
655        munmap(addr1, size);
656    }
657
658    printf("Done\n"); fflush(stdout);
659    return 0;
660}
661
662#if 0
663#pragma mark -
664#pragma mark cpufreq
665#endif
666
667int cpufreq_test(int argc, char** argv)
668{
669    struct timespec res;
670    clock_getres(CLOCK_REALTIME, &res);
671    printf("CLOCK_REALTIME  resolution: %lu ns\n", res.tv_nsec);
672    clock_getres(CLOCK_MONOTONIC, &res);
673    printf("CLOCK_MONOTONIC resolution: %lu ns\n", res.tv_nsec);
674    clock_getres(CLOCK_PROCESS_CPUTIME_ID, &res);
675    printf("CLOCK_PROCESS_CPUTIME_ID resolution: %lu ns\n", res.tv_nsec);
676    clock_getres(CLOCK_THREAD_CPUTIME_ID, &res);
677    printf("CLOCK_THREAD_CPUTIME_ID  resolution: %lu ns\n", res.tv_nsec);
678
679    if (clock_getres(CLOCK_REALTIME_HR, &res) != 0)
680        printf("CLOCK_REALTIME_HR   resolution: %lu ns\n", res.tv_nsec);
681    else
682        printf("CLOCK_REALTIME_HR   not supported\n");
683
684    if (clock_getres(CLOCK_MONOTONIC_HR, &res) != 0)
685        printf("CLOCK_MONOTONIC_HR  resolution: %lu ns\n", res.tv_nsec);
686    else
687        printf("CLOCK_MONOTONIC_HR  not supported\n");
688
689    printf("\nEstimating the CPU frequency, please wait...\n");
690    fflush(stdout);
691    usleep(10000);
692    setpriority(PRIO_PROCESS, 0, -20);
693
694    const int LOOP_CYCLES = 1+BRANCH_CYCLE; // 1 cycle + 3 cycles for the branch
695    const size_t REPEAT = CPU_FREQ_EST*1000000;   // ~4 seconds (4cycles/loop)
696    register size_t count = REPEAT;
697    nsecs_t t = system_time();
698    do { // this loop generates 1+3 cycles
699        asm volatile ("":::"memory");
700    } while (--count);
701    t = system_time() - t;
702    const float freq = t ? (1000.0f*float(REPEAT)*LOOP_CYCLES) / t : 0;
703    printf("this CPU frequency: %ld MHz\n", long(freq+0.5f));
704    return 0;
705}
706
707#if 0
708#pragma mark -
709#pragma mark crash_test
710#endif
711
712int crash_test(int argc, char** argv)
713{
714    printf("about to crash...\n");
715    asm volatile(
716        "mov r0,  #0 \n"
717        "mov r1,  #1 \n"
718        "mov r2,  #2 \n"
719        "mov r3,  #3 \n"
720        "ldr r12, [r0] \n"
721    );
722
723    return 0;
724}
725
726int stack_smasher_test(int argc, char** argv)
727{
728    int dummy = 0;
729    printf("corrupting our stack...\n");
730    *(volatile long long*)&dummy = 0;
731    return 0;
732}
733
734// --------------------------------------------------------------------
735
736extern "C" void thumb_function_1(int*p);
737extern "C" void thumb_function_2(int*p);
738extern "C" void arm_function_3(int*p);
739extern "C" void arm_function_2(int*p);
740extern "C" void arm_function_1(int*p);
741
742void arm_function_3(int*p) {
743    int a = 0;
744    thumb_function_2(&a);
745}
746
747void arm_function_2(int*p) {
748    int a = 0;
749    thumb_function_1(&a);
750}
751
752void arm_function_1(int*p) {
753    int a = 0;
754    arm_function_2(&a);
755}
756
757int crawl_test(int argc, char** argv)
758{
759    int a = 0;
760    arm_function_1(&a);
761    return 0;
762}
763
764