memtest.cpp revision 7a91aed580a9d3f992bc6ca2186a0a007a07a14c
159c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner/*
259c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner * Copyright (C) 2007 The Android Open Source Project
359c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner *
459c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner * Licensed under the Apache License, Version 2.0 (the "License");
559c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner * you may not use this file except in compliance with the License.
659c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner * You may obtain a copy of the License at
759c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner *
859c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner *      http://www.apache.org/licenses/LICENSE-2.0
959c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner *
1059c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner * Unless required by applicable law or agreed to in writing, software
1159c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner * distributed under the License is distributed on an "AS IS" BASIS,
1259c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1359c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner * See the License for the specific language governing permissions and
1459c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner * limitations under the License.
1559c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner */
1659c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner
1759c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner#include <stdio.h>
1859c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner#include <stdlib.h>
19ddc1cb2c15549ed23dce9d416680a009fa6ae23cWale Ogunwale#include <string.h>
20ddc1cb2c15549ed23dce9d416680a009fa6ae23cWale Ogunwale#include <sys/time.h>
21858d8a6583b0c91c66960167b84c67b6c4e2d3c6Craig Mautner#include <time.h>
22de4ef020ec5c3acdc90c4ba43011dda20d98d4ddCraig Mautner#include <unistd.h>
23d5d5d0f4b8c75c9ed4fea320b4f31740b88dd37eCraig Mautner#include <sched.h>
24c00204b4d14d49a0417b44ca21aee4f0d4c466e0Craig Mautner#include <sys/resource.h>
256601b7bdeb46756fd83ad4c1966ef966c52b46e4Craig Mautner#include <sys/syscall.h>
26de4ef020ec5c3acdc90c4ba43011dda20d98d4ddCraig Mautner#include <sys/types.h>
27b47bbc3d80badb94229bc4ce7a2d5006faa9ef15Craig Mautner#include <sys/mman.h>
2859c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner
294a1cb22056112f7ffd5f4fad8b7a092b96e7cc7bCraig Mautner#if 0
3059c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautnerconst int DCACHE_SIZE = 8*1024;
3159c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautnerconst int CPU_FREQ_EST = 195;
3259c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautnerconst int BRANCH_CYCLE = 3;
3359c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner#else
3459c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautnerconst int DCACHE_SIZE  = 32*1024;
3559c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautnerconst int CPU_FREQ_EST = 384;
3659c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautnerconst int BRANCH_CYCLE = 2;
3759c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner#endif
3859c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner
3959c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner//extern "C" void* xmemcpy(void*, void*, size_t);
4059c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner#define MEMCPY  memcpy
4159c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner
4259c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautnertypedef long long nsecs_t;
4359c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner
4459c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautnerstatic nsecs_t system_time()
4559c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner{
4659c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner    struct timespec t;
4759c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner    t.tv_sec = t.tv_nsec = 0;
4859c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner    clock_gettime(CLOCK_MONOTONIC, &t);
4959c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner    return nsecs_t(t.tv_sec)*1000000000LL + t.tv_nsec;
5059c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner}
51dc548483ae90ba26ad9e2e2cb79f4673140edb49Craig Mautner
5259c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautnernsecs_t loop_overhead(size_t count) __attribute__((noinline));
5359c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautnernsecs_t loop_overhead(size_t count)
5459c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner{
5559c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner    nsecs_t overhead = -system_time();
5659c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner    do {
5759c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner        asm volatile ("":::"memory");
5859c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner    } while (--count);
5959c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner    overhead += system_time();
6059c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner    return overhead;
6159c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner}
6259c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner
6359c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautnerstatic void preload(volatile char* addr, size_t s)
6459c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner{
6559c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner    for (size_t i=0 ; i<s ; i+=32) {
66dde331cebd87982faded6818ad5f9927ff994c96Dianne Hackborn        char c = addr[i];
6759c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner        (void)c;
6859c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner    }
69dde331cebd87982faded6818ad5f9927ff994c96Dianne Hackborn}
70d46747a1c64b6ca3282e8841833980ab91829436Jeff Brown
712d5618c22101cfc4d6478cfe1d846798389540c1Craig Mautnerstatic void usage(char* p) {
722d5618c22101cfc4d6478cfe1d846798389540c1Craig Mautner    printf( "Usage: %s <test> <options>\n"
7359c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner            "<test> is one of the following:\n"
746601b7bdeb46756fd83ad4c1966ef966c52b46e4Craig Mautner            "       cpufreq\n"
75bdc748af8ce62778d2ad15040ecdfada6e4635fdCraig Mautner            "       memcpy [perf [fast] | test]\n"
766601b7bdeb46756fd83ad4c1966ef966c52b46e4Craig Mautner            "       memset [perf | test]\n"
77398341927f3dca68d71024483aa276d10af4c080Craig Mautner            "       memcmp [perf | test]\n"
78398341927f3dca68d71024483aa276d10af4c080Craig Mautner            "       strlen [perf | test]\n"
7976a7165719dc3ccce902953f6244e2c2668aa753Craig Mautner            "       malloc [fill]\n"
8069b0818179201fadc9d2a384d692d8ae4aecd85cCraig Mautner            "       madvise\n"
81398341927f3dca68d71024483aa276d10af4c080Craig Mautner            "       resampler\n"
82dc548483ae90ba26ad9e2e2cb79f4673140edb49Craig Mautner            "       crash\n"
83b1fd65c0ff5784b90d765edb7e3c3115d767dff0Craig Mautner            "       stack (stack smasher)\n"
84b1fd65c0ff5784b90d765edb7e3c3115d767dff0Craig Mautner            "       crawl\n"
85bdc748af8ce62778d2ad15040ecdfada6e4635fdCraig Mautner            , p);
86cf910b0c714b2ca90ea0013e5695850506a1d36fCraig Mautner}
87dc548483ae90ba26ad9e2e2cb79f4673140edb49Craig Mautner
88c00204b4d14d49a0417b44ca21aee4f0d4c466e0Craig Mautnerint cpufreq_test(int argc, char** argv);
89bdc748af8ce62778d2ad15040ecdfada6e4635fdCraig Mautnerint memcpy_test(int argc, char** argv);
90bdc748af8ce62778d2ad15040ecdfada6e4635fdCraig Mautnerint memset_test(int argc, char** argv);
91de4ef020ec5c3acdc90c4ba43011dda20d98d4ddCraig Mautnerint memcmp_test(int argc, char** argv);
92d5d5d0f4b8c75c9ed4fea320b4f31740b88dd37eCraig Mautnerint strlen_test(int argc, char** argv);
93e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwaleint malloc_test(int argc, char** argv);
94e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwaleint madvise_test(int argc, char** argv);
95cf910b0c714b2ca90ea0013e5695850506a1d36fCraig Mautnerint crash_test(int argc, char** argv);
966601b7bdeb46756fd83ad4c1966ef966c52b46e4Craig Mautnerint stack_smasher_test(int argc, char** argv);
976601b7bdeb46756fd83ad4c1966ef966c52b46e4Craig Mautnerint crawl_test(int argc, char** argv);
986601b7bdeb46756fd83ad4c1966ef966c52b46e4Craig Mautner
996601b7bdeb46756fd83ad4c1966ef966c52b46e4Craig Mautner#if 0
1006601b7bdeb46756fd83ad4c1966ef966c52b46e4Craig Mautner#pragma mark -
1016601b7bdeb46756fd83ad4c1966ef966c52b46e4Craig Mautner#pragma mark main
102dc548483ae90ba26ad9e2e2cb79f4673140edb49Craig Mautner#endif
103dc548483ae90ba26ad9e2e2cb79f4673140edb49Craig Mautner
104dc548483ae90ba26ad9e2e2cb79f4673140edb49Craig Mautnerint main(int argc, char** argv)
1059d808b1f4823879ce8b52aefb90c55346017cdc7Craig Mautner{
1069d808b1f4823879ce8b52aefb90c55346017cdc7Craig Mautner    if (argc == 1) {
10795da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner        usage(argv[0]);
10895da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner        return 0;
1091bf2b873470d2ba8a4ac218da73516cc2b20aa76Craig Mautner    }
110b1fd65c0ff5784b90d765edb7e3c3115d767dff0Craig Mautner    int err = -1;
1112d5618c22101cfc4d6478cfe1d846798389540c1Craig Mautner    if      (!strcmp(argv[1], "cpufreq"))   err = cpufreq_test(argc-1, argv+1);
112df88d73092c62a1a3cd2b2056ca63ae2e70cc238Craig Mautner    else if (!strcmp(argv[1], "memcpy"))    err = memcpy_test(argc-1, argv+1);
1132d5618c22101cfc4d6478cfe1d846798389540c1Craig Mautner    else if (!strcmp(argv[1], "memset"))    err = memset_test(argc-1, argv+1);
1149d808b1f4823879ce8b52aefb90c55346017cdc7Craig Mautner    else if (!strcmp(argv[1], "memcmp"))    err = memcmp_test(argc-1, argv+1);
115b47bbc3d80badb94229bc4ce7a2d5006faa9ef15Craig Mautner    else if (!strcmp(argv[1], "strlen"))    err = strlen_test(argc-1, argv+1);
116b47bbc3d80badb94229bc4ce7a2d5006faa9ef15Craig Mautner    else if (!strcmp(argv[1], "malloc"))    err = malloc_test(argc-1, argv+1);
117b47bbc3d80badb94229bc4ce7a2d5006faa9ef15Craig Mautner    else if (!strcmp(argv[1], "madvise"))   err = madvise_test(argc-1, argv+1);
11869b0818179201fadc9d2a384d692d8ae4aecd85cCraig Mautner    else if (!strcmp(argv[1], "crash"))     err = crash_test(argc-1, argv+1);
1199d808b1f4823879ce8b52aefb90c55346017cdc7Craig Mautner    else if (!strcmp(argv[1], "stack"))     err = stack_smasher_test(argc-1, argv+1);
12059c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner    else if (!strcmp(argv[1], "crawl"))     err = crawl_test(argc-1, argv+1);
12159c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner    if (err) {
12259c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner        usage(argv[0]);
12359c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner    }
12459c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner    return 0;
12559c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner}
12659c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner
12759c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner#if 0
12859c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner#pragma mark -
12959c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner#pragma mark memcpy
130b47bbc3d80badb94229bc4ce7a2d5006faa9ef15Craig Mautner#endif
131b47bbc3d80badb94229bc4ce7a2d5006faa9ef15Craig Mautner
132b47bbc3d80badb94229bc4ce7a2d5006faa9ef15Craig Mautnerint validate_memcpy(char* s, char* d, size_t size);
133b47bbc3d80badb94229bc4ce7a2d5006faa9ef15Craig Mautnerint validate_memset(char* s, char c, size_t size);
13459c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner
13559c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautnerint memcpy_test(int argc, char** argv)
13659c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner{
13759c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner    int option = 0;
138a506a6ec94863a35acca9feb165db76ddac3892cJeff Brown    if (argc >= 2) {
139a506a6ec94863a35acca9feb165db76ddac3892cJeff Brown        if (!strcmp(argv[1], "perf"))       option = 0;
140a506a6ec94863a35acca9feb165db76ddac3892cJeff Brown        else if (!strcmp(argv[1], "test"))  option = 1;
141a506a6ec94863a35acca9feb165db76ddac3892cJeff Brown        else                                return -1;
142a506a6ec94863a35acca9feb165db76ddac3892cJeff Brown    }
143a506a6ec94863a35acca9feb165db76ddac3892cJeff Brown
144a506a6ec94863a35acca9feb165db76ddac3892cJeff Brown    const int MAX_SIZE = 1024*1024; // 1MB
145a446bf0e8c7b5f5441aeb11b359ba6776b9b3061keunyoung    const int CACHED_SPEED_EST = CPU_FREQ_EST*1024*1024; // 150 MB/s
146a446bf0e8c7b5f5441aeb11b359ba6776b9b3061keunyoung    const int UNCACHED_SPEED_EST = (CPU_FREQ_EST/4)*1024*1024; // 60 MB/s
147a446bf0e8c7b5f5441aeb11b359ba6776b9b3061keunyoung    char* src = (char*)malloc(MAX_SIZE+4+8+32);
148a446bf0e8c7b5f5441aeb11b359ba6776b9b3061keunyoung    char* dst = (char*)malloc(MAX_SIZE+4+8+32);
149dc548483ae90ba26ad9e2e2cb79f4673140edb49Craig Mautner    memset(src, 0, MAX_SIZE+4+8+32);
150dc548483ae90ba26ad9e2e2cb79f4673140edb49Craig Mautner    memset(dst, 0, MAX_SIZE+4+8+32);
151dc548483ae90ba26ad9e2e2cb79f4673140edb49Craig Mautner
152dc548483ae90ba26ad9e2e2cb79f4673140edb49Craig Mautner    if (option == 0) {
15300af9fe6ae0da5b716212fa754163d90b60c1ee6Craig Mautner        bool fast = (argc>=3 && !strcmp(argv[2], "fast"));
1544cd0c13f8f765118a24e31548c058b5029481beaCraig Mautner        printf("memcpy() performance test is running, please wait...\n");
15500af9fe6ae0da5b716212fa754163d90b60c1ee6Craig Mautner        fflush(stdout);
15600af9fe6ae0da5b716212fa754163d90b60c1ee6Craig Mautner        usleep(10000);
157c00204b4d14d49a0417b44ca21aee4f0d4c466e0Craig Mautner        setpriority(PRIO_PROCESS, 0, -20);
158dc548483ae90ba26ad9e2e2cb79f4673140edb49Craig Mautner        static int FAST_SIZES[] = { 1024, DCACHE_SIZE/2, DCACHE_SIZE, DCACHE_SIZE*2, MAX_SIZE };
159dc548483ae90ba26ad9e2e2cb79f4673140edb49Craig Mautner
160dc548483ae90ba26ad9e2e2cb79f4673140edb49Craig Mautner        struct result_t { int size; float res; };
161dc548483ae90ba26ad9e2e2cb79f4673140edb49Craig Mautner        result_t* results = (result_t*)src;
162d9a22881fda77e208f54f893a804d0001d27a27eCraig Mautner        int nbr = 0;
163dc548483ae90ba26ad9e2e2cb79f4673140edb49Craig Mautner        int size = 0;
164c00204b4d14d49a0417b44ca21aee4f0d4c466e0Craig Mautner        for (int i=0 ; ; i++) {
165c00204b4d14d49a0417b44ca21aee4f0d4c466e0Craig Mautner            if (!fast) {
166d5d5d0f4b8c75c9ed4fea320b4f31740b88dd37eCraig Mautner                if (size<128)          size += 8;
167333c2ec88305a71ab1d59bf0482193273fac61afCraig Mautner                else if (size<1024)    size += 128;
168e0a3884cb627efc650e19fbe76b1b3343468cf57Craig Mautner                else if (size<16384)   size += 1024;
169e0a3884cb627efc650e19fbe76b1b3343468cf57Craig Mautner                else                   size <<= 1;
170de4ef020ec5c3acdc90c4ba43011dda20d98d4ddCraig Mautner            } else {
171d5d5d0f4b8c75c9ed4fea320b4f31740b88dd37eCraig Mautner                if (size_t(i) >= sizeof(FAST_SIZES)/sizeof(FAST_SIZES[0]))
172d5d5d0f4b8c75c9ed4fea320b4f31740b88dd37eCraig Mautner                    break;
17346ac6fa614131d567bed93d1d2067d765ecef85dCraig Mautner                size = FAST_SIZES[i];
174722285e199a9fc74b9b3343b7505c00666848c88Craig Mautner            }
175bdc748af8ce62778d2ad15040ecdfada6e4635fdCraig Mautner            if (size > MAX_SIZE) {
176b660b9d8cf6b951b85a35599d636c470795e9a1aCraig Mautner                break;
177bdc748af8ce62778d2ad15040ecdfada6e4635fdCraig Mautner            }
178722285e199a9fc74b9b3343b7505c00666848c88Craig Mautner
179722285e199a9fc74b9b3343b7505c00666848c88Craig Mautner            const int REPEAT = (((size < DCACHE_SIZE) ?
18046ac6fa614131d567bed93d1d2067d765ecef85dCraig Mautner                        (CACHED_SPEED_EST) : (UNCACHED_SPEED_EST)) / size) / 2;
18146ac6fa614131d567bed93d1d2067d765ecef85dCraig Mautner                                // ~0.5 second per test
1824a1cb22056112f7ffd5f4fad8b7a092b96e7cc7bCraig Mautner
1834a1cb22056112f7ffd5f4fad8b7a092b96e7cc7bCraig Mautner            const nsecs_t overhead = loop_overhead(REPEAT);
1844a1cb22056112f7ffd5f4fad8b7a092b96e7cc7bCraig Mautner
1854a1cb22056112f7ffd5f4fad8b7a092b96e7cc7bCraig Mautner            // tweak to make it a bad case
1864a1cb22056112f7ffd5f4fad8b7a092b96e7cc7bCraig Mautner            char* ddd = (char*)((long(dst+31)&~31) + 4);
18746ac6fa614131d567bed93d1d2067d765ecef85dCraig Mautner            char* sss = (char*)((long(src+31)&~31) + 28);
1884a1cb22056112f7ffd5f4fad8b7a092b96e7cc7bCraig Mautner
18946ac6fa614131d567bed93d1d2067d765ecef85dCraig Mautner            for (int offset=0 ; offset<=2 ; offset +=2 ) {
1904a1cb22056112f7ffd5f4fad8b7a092b96e7cc7bCraig Mautner                memcpy(dst, src, size); // just make sure to load the caches I/D
19146ac6fa614131d567bed93d1d2067d765ecef85dCraig Mautner                nsecs_t t = -system_time();
19246ac6fa614131d567bed93d1d2067d765ecef85dCraig Mautner                register int count = REPEAT;
19346ac6fa614131d567bed93d1d2067d765ecef85dCraig Mautner                do {
194ddc1cb2c15549ed23dce9d416680a009fa6ae23cWale Ogunwale                    MEMCPY(ddd, sss+offset, size);
195ddc1cb2c15549ed23dce9d416680a009fa6ae23cWale Ogunwale                } while (--count);
196df88d73092c62a1a3cd2b2056ca63ae2e70cc238Craig Mautner                t += system_time() - overhead;
197bdc748af8ce62778d2ad15040ecdfada6e4635fdCraig Mautner                const float throughput = (size*1000000000.0f*REPEAT) / (1024*1024*t);
198df88d73092c62a1a3cd2b2056ca63ae2e70cc238Craig Mautner                results[nbr].size = size;
199de4ef020ec5c3acdc90c4ba43011dda20d98d4ddCraig Mautner                results[nbr].res = throughput;
200df88d73092c62a1a3cd2b2056ca63ae2e70cc238Craig Mautner                nbr++;
201b1fd65c0ff5784b90d765edb7e3c3115d767dff0Craig Mautner            }
202ddc1cb2c15549ed23dce9d416680a009fa6ae23cWale Ogunwale        }
203ddc1cb2c15549ed23dce9d416680a009fa6ae23cWale Ogunwale
204ddc1cb2c15549ed23dce9d416680a009fa6ae23cWale Ogunwale        printf("%9s %9s %9s\n", "size", "MB/s", "MB/s (nc)");
205ddc1cb2c15549ed23dce9d416680a009fa6ae23cWale Ogunwale        for (int i=0 ; i<nbr ; i+=2) {
206ddc1cb2c15549ed23dce9d416680a009fa6ae23cWale Ogunwale            printf("%9d %9ld %9ld\n", results[i].size, (long)results[i].res, (long)results[i+1].res);
207bdc748af8ce62778d2ad15040ecdfada6e4635fdCraig Mautner        }
208b1fd65c0ff5784b90d765edb7e3c3115d767dff0Craig Mautner    } else if (option == 1) {
209b1fd65c0ff5784b90d765edb7e3c3115d767dff0Craig Mautner        printf("memcpy() validation test is running, please wait...\n");
210bdc748af8ce62778d2ad15040ecdfada6e4635fdCraig Mautner        fflush(stdout);
211bdc748af8ce62778d2ad15040ecdfada6e4635fdCraig Mautner        char* curr = (char*)src;
212bdc748af8ce62778d2ad15040ecdfada6e4635fdCraig Mautner        for (int i=0 ; i<MAX_SIZE ; i++) {
21305d6272bad2d707b488a6f8784ce8aea5e25b110Craig Mautner            char c = rand();
21405d6272bad2d707b488a6f8784ce8aea5e25b110Craig Mautner            *curr++ = c != 0x55 ? c : 0xAA;
215df88d73092c62a1a3cd2b2056ca63ae2e70cc238Craig Mautner        }
216df88d73092c62a1a3cd2b2056ca63ae2e70cc238Craig Mautner        char* s = src + 1024;
21700af9fe6ae0da5b716212fa754163d90b60c1ee6Craig Mautner        char* d = dst + 1024;
21800af9fe6ae0da5b716212fa754163d90b60c1ee6Craig Mautner        int nb = 0;
219967212cb542e6eeb308678367b53381bff984c31Craig Mautner        for (int size=0 ; size<4096 && !nb ; size++) {
220bdc748af8ce62778d2ad15040ecdfada6e4635fdCraig Mautner            nb += validate_memcpy(s, d, size);
221967212cb542e6eeb308678367b53381bff984c31Craig Mautner            for (int o=1 ; o<32 && !nb ; o++) {
222967212cb542e6eeb308678367b53381bff984c31Craig Mautner                nb += validate_memcpy(s+o, d, size);
223bdc748af8ce62778d2ad15040ecdfada6e4635fdCraig Mautner                nb += validate_memcpy(s, d+o, size);
224bdc748af8ce62778d2ad15040ecdfada6e4635fdCraig Mautner                nb += validate_memcpy(s+o, d+o, size);
225967212cb542e6eeb308678367b53381bff984c31Craig Mautner            }
226967212cb542e6eeb308678367b53381bff984c31Craig Mautner        }
227e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale        if (nb) printf("%d error(s) found\n", nb);
228bdc748af8ce62778d2ad15040ecdfada6e4635fdCraig Mautner        else    printf("success!\n");
229e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale    }
230e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale    fflush(stdout);
231e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale    free(dst);
232e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale    free(src);
233e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale    return 0;
234e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale}
235e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale
236bdc748af8ce62778d2ad15040ecdfada6e4635fdCraig Mautnerint validate_memcpy(char* s, char* d, size_t size)
237bdc748af8ce62778d2ad15040ecdfada6e4635fdCraig Mautner{
238bdc748af8ce62778d2ad15040ecdfada6e4635fdCraig Mautner    int nberr = 0;
239cf910b0c714b2ca90ea0013e5695850506a1d36fCraig Mautner    memset(d-4, 0x55, size+8);
240cf910b0c714b2ca90ea0013e5695850506a1d36fCraig Mautner    MEMCPY(s, d, size);
241e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale    if (memcmp(s,d,size)) {
2426601b7bdeb46756fd83ad4c1966ef966c52b46e4Craig Mautner        printf("*** memcpy(%p,%p,%zd) destination != source\n",s,d,size);
2436601b7bdeb46756fd83ad4c1966ef966c52b46e4Craig Mautner        nberr++;
2446601b7bdeb46756fd83ad4c1966ef966c52b46e4Craig Mautner    }
2456601b7bdeb46756fd83ad4c1966ef966c52b46e4Craig Mautner    bool r = (d[size]==0x55)&&(d[size+1]==0x55)&&(d[size+2]==0x55)&&(d[size+3]==0x55);
246e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale    if (!r) {
247e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale        printf("*** memcpy(%p,%p,%zd) clobbered past end of destination!\n",s,d,size);
2486601b7bdeb46756fd83ad4c1966ef966c52b46e4Craig Mautner        nberr++;
24906d582d4e42893e7e061477004d991d35b5f0d78Doris Liu    }
2506601b7bdeb46756fd83ad4c1966ef966c52b46e4Craig Mautner    r = (d[-1]==0x55)&&(d[-2]==0x55)&&(d[-3]==0x55)&&(d[-4]==0x55);
2516601b7bdeb46756fd83ad4c1966ef966c52b46e4Craig Mautner    if (!r) {
2526601b7bdeb46756fd83ad4c1966ef966c52b46e4Craig Mautner        printf("*** memcpy(%p,%p,%zd) clobbered before start of destination!\n",s,d,size);
2536601b7bdeb46756fd83ad4c1966ef966c52b46e4Craig Mautner        nberr++;
2541bef389f70de89d82189b94967eccee9b3b58d68Craig Mautner    }
2551bef389f70de89d82189b94967eccee9b3b58d68Craig Mautner    return nberr;
2561bef389f70de89d82189b94967eccee9b3b58d68Craig Mautner}
2576601b7bdeb46756fd83ad4c1966ef966c52b46e4Craig Mautner
2586601b7bdeb46756fd83ad4c1966ef966c52b46e4Craig Mautner
259498e8c92d71ff4ccfd2a77bdc53160e8dc93c7feWale Ogunwale#if 0
260858d8a6583b0c91c66960167b84c67b6c4e2d3c6Craig Mautner#pragma mark -
261858d8a6583b0c91c66960167b84c67b6c4e2d3c6Craig Mautner#pragma mark memset
262858d8a6583b0c91c66960167b84c67b6c4e2d3c6Craig Mautner#endif
263858d8a6583b0c91c66960167b84c67b6c4e2d3c6Craig Mautner
264498e8c92d71ff4ccfd2a77bdc53160e8dc93c7feWale Ogunwaleint memset_test(int argc, char** argv)
265498e8c92d71ff4ccfd2a77bdc53160e8dc93c7feWale Ogunwale{
266858d8a6583b0c91c66960167b84c67b6c4e2d3c6Craig Mautner    int option = 0;
267858d8a6583b0c91c66960167b84c67b6c4e2d3c6Craig Mautner    if (argc >= 2) {
268858d8a6583b0c91c66960167b84c67b6c4e2d3c6Craig Mautner        if (!strcmp(argv[1], "perf"))       option = 0;
269ac6f843c917b68ea8805711965b149a9338e3a0eCraig Mautner        else if (!strcmp(argv[1], "test"))  option = 1;
270bdc748af8ce62778d2ad15040ecdfada6e4635fdCraig Mautner        else                                return -1;
271498e8c92d71ff4ccfd2a77bdc53160e8dc93c7feWale Ogunwale    }
272858d8a6583b0c91c66960167b84c67b6c4e2d3c6Craig Mautner
273858d8a6583b0c91c66960167b84c67b6c4e2d3c6Craig Mautner    const int MAX_SIZE = 1024*1024; // 1MB
274858d8a6583b0c91c66960167b84c67b6c4e2d3c6Craig Mautner    const int CACHED_SPEED_EST = CPU_FREQ_EST*1024*1024; // 195 MB/s
27505d290365f0b9ed781ffcb30b38a0c7c6e450e9dCraig Mautner    const int UNCACHED_SPEED_EST = CPU_FREQ_EST*1024*1024; // 195 MB/s
276bdc748af8ce62778d2ad15040ecdfada6e4635fdCraig Mautner    char* dst = (char*)malloc(MAX_SIZE+4+8);
277bdc748af8ce62778d2ad15040ecdfada6e4635fdCraig Mautner
27805d290365f0b9ed781ffcb30b38a0c7c6e450e9dCraig Mautner    if (option == 0) {
27905d290365f0b9ed781ffcb30b38a0c7c6e450e9dCraig Mautner        printf("memset() performance test is running, please wait...\n");
28005d290365f0b9ed781ffcb30b38a0c7c6e450e9dCraig Mautner        fflush(stdout);
28105d290365f0b9ed781ffcb30b38a0c7c6e450e9dCraig Mautner        usleep(10000);
28205d290365f0b9ed781ffcb30b38a0c7c6e450e9dCraig Mautner        setpriority(PRIO_PROCESS, 0, -20);
283bdc748af8ce62778d2ad15040ecdfada6e4635fdCraig Mautner
284e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale        static int FAST_SIZES[] = { 1024, DCACHE_SIZE/2, DCACHE_SIZE, DCACHE_SIZE*2, MAX_SIZE };
285e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale        const size_t FAST_SIZES_COUNT = sizeof(FAST_SIZES)/sizeof(FAST_SIZES[0]);
286e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale        struct result_t { int size; float res; };
287e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale        result_t results[FAST_SIZES_COUNT*2];
288e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale        int nbr = 0;
289e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale        int size = 0;
290e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale        for (int i=0 ; ; i++) {
291e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale            if (size_t(i) >= sizeof(FAST_SIZES)/sizeof(FAST_SIZES[0]))
292e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale                break;
293e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale            size = FAST_SIZES[i];
294e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale            if (size > MAX_SIZE) {
29505d290365f0b9ed781ffcb30b38a0c7c6e450e9dCraig Mautner                break;
29605d290365f0b9ed781ffcb30b38a0c7c6e450e9dCraig Mautner            }
29705d290365f0b9ed781ffcb30b38a0c7c6e450e9dCraig Mautner            const int REPEAT = (((size < DCACHE_SIZE) ?
29805d290365f0b9ed781ffcb30b38a0c7c6e450e9dCraig Mautner                        (CACHED_SPEED_EST) : (UNCACHED_SPEED_EST)) / size);
29905d290365f0b9ed781ffcb30b38a0c7c6e450e9dCraig Mautner                                // ~0.5 second per test
300bdc748af8ce62778d2ad15040ecdfada6e4635fdCraig Mautner
301e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale            const nsecs_t overhead = loop_overhead(REPEAT);
302e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale
303e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale            for (int j=0 ; j<2 ; j++) {
304e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale                if (j==0)   preload(dst, DCACHE_SIZE*4);   // flush D
30505d290365f0b9ed781ffcb30b38a0c7c6e450e9dCraig Mautner                else        preload(dst, size);            // load D
30605d290365f0b9ed781ffcb30b38a0c7c6e450e9dCraig Mautner                nsecs_t t = -system_time();
30705d290365f0b9ed781ffcb30b38a0c7c6e450e9dCraig Mautner                size_t count = REPEAT;
30805d290365f0b9ed781ffcb30b38a0c7c6e450e9dCraig Mautner                do {
309bdc748af8ce62778d2ad15040ecdfada6e4635fdCraig Mautner                    memset(dst, 0, size);
310e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale                } while (--count);
311e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale                t += system_time() - overhead;
312e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale
313e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale                const float throughput = (size*1000000000.0f*REPEAT) / (1024*1024*t);
314e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale                results[nbr].size = size;
315b660b9d8cf6b951b85a35599d636c470795e9a1aCraig Mautner                results[nbr].res = throughput;
31605d290365f0b9ed781ffcb30b38a0c7c6e450e9dCraig Mautner                nbr++;
317b660b9d8cf6b951b85a35599d636c470795e9a1aCraig Mautner            }
31805d290365f0b9ed781ffcb30b38a0c7c6e450e9dCraig Mautner        }
31905d290365f0b9ed781ffcb30b38a0c7c6e450e9dCraig Mautner
32005d290365f0b9ed781ffcb30b38a0c7c6e450e9dCraig Mautner        printf("%9s %9s %9s\n", "size", "MB/s", "MB/s (cached)");
321bdc748af8ce62778d2ad15040ecdfada6e4635fdCraig Mautner        for (int i=0 ; i<nbr ; i+=2) {
322e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale            printf("%9d %9ld %9ld\n", results[i].size, (long)results[i].res, (long)results[i+1].res);
323e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale        }
324e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale    } else if (option == 1) {
325e4a0c5722b1d8db95dfc842d716452dbbf02c86dWale Ogunwale        printf("memset() validation test is running, please wait...\n");
32605d290365f0b9ed781ffcb30b38a0c7c6e450e9dCraig Mautner        fflush(stdout);
32705d290365f0b9ed781ffcb30b38a0c7c6e450e9dCraig Mautner        char* d = dst + 1024;
32805d290365f0b9ed781ffcb30b38a0c7c6e450e9dCraig Mautner        int nb = 0;
3292eb15342be5b075dda3df29b2b014a92ce13a5f8Craig Mautner        for (int o=1 ; o<32 ; o++) {
330bdc748af8ce62778d2ad15040ecdfada6e4635fdCraig Mautner            for (int size=0 ; size<4096 && !nb ; size++) {
331bdc748af8ce62778d2ad15040ecdfada6e4635fdCraig Mautner                nb += validate_memset(d, char(o), size);
3322eb15342be5b075dda3df29b2b014a92ce13a5f8Craig Mautner                nb += validate_memset(d+o, char(o), size);
3332eb15342be5b075dda3df29b2b014a92ce13a5f8Craig Mautner            }
3342eb15342be5b075dda3df29b2b014a92ce13a5f8Craig Mautner        }
33595da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner        if (nb) printf("%d error(s) found\n", nb);
33695da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner        else    printf("success!\n");
33795da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner    }
33895da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner    fflush(stdout);
33995da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner    free(dst);
34095da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner    return 0;
34195da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner}
34295da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner
34395da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautnerint validate_memset(char* d, char c, size_t size)
34495da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner{
34595da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner    int nberr = 0;
34695da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner    for (size_t i=0; i<size ; d[i++]=0xaa) ;
34795da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner    d[-1] = 0x55;
34895da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner    d[size+1] = 0x55;
34995da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner    memset(d, c, size);
35095da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner    if (d[size+1]!=0x55) {
35195da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner        printf("*** memset(%p,%02x,%zd) clobbered past end of destination!\n",d,(int)c,size);
35295da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner        nberr++;
35395da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner    }
35495da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner    if (d[-1]!=0x55) {
35595da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner        printf("*** memset(%p,%02x,%zd) clobbered before start of destination!\n",d,(int)c,size);
35695da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner        nberr++;
35795da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner    }
35895da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner    for (size_t i=0 ; i<size ; i++) {
35995da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner        if (d[i] != c) {
36095da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner            printf("*** memset(%p,%02x,%zd) failed at offset %zd\n",d,(int)c,size, i);
361799bc1d383ea40637e88c4a9dba8671585202d99Craig Mautner            nberr++;
362e3119b7d353e71d1f94ddff932b722b4d285931eCraig Mautner            break;
36395da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner        }
36495da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner    }
36595da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner    return nberr;
36695da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner}
36795da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner
36895da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner#if 0
36995da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner#pragma mark -
37095da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner#pragma mark memcmp
37195da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner#endif
37295da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner
3734a02d8133bb8bda109404c78002f80cda1f1179eWale Ogunwalestatic int ref_memcmp(const void *s1, const void *s2, size_t n)
3744a02d8133bb8bda109404c78002f80cda1f1179eWale Ogunwale{
3754a02d8133bb8bda109404c78002f80cda1f1179eWale Ogunwale  const unsigned char *c1 = (const unsigned char *)s1, *c2 = (const unsigned char *)s2;
3764a02d8133bb8bda109404c78002f80cda1f1179eWale Ogunwale  int d = 0;
3774a02d8133bb8bda109404c78002f80cda1f1179eWale Ogunwale
3784a02d8133bb8bda109404c78002f80cda1f1179eWale Ogunwale  while ( n-- ) {
379a91f9e2959ee905f97977a88fe45bde6ffb874b0Craig Mautner    d = (int)*c1++ - (int)*c2++;
380a91f9e2959ee905f97977a88fe45bde6ffb874b0Craig Mautner    if ( d )
381a91f9e2959ee905f97977a88fe45bde6ffb874b0Craig Mautner      break;
382a91f9e2959ee905f97977a88fe45bde6ffb874b0Craig Mautner  }
383a91f9e2959ee905f97977a88fe45bde6ffb874b0Craig Mautner
384a91f9e2959ee905f97977a88fe45bde6ffb874b0Craig Mautner  return (d < 0 ? -1 : (d > 0 ? 1 : 0));
385a91f9e2959ee905f97977a88fe45bde6ffb874b0Craig Mautner}
386a91f9e2959ee905f97977a88fe45bde6ffb874b0Craig Mautner
387a91f9e2959ee905f97977a88fe45bde6ffb874b0Craig Mautnerint validate_memcmp(const char* s, const char* d, size_t size)
388a91f9e2959ee905f97977a88fe45bde6ffb874b0Craig Mautner{
389a91f9e2959ee905f97977a88fe45bde6ffb874b0Craig Mautner
390a91f9e2959ee905f97977a88fe45bde6ffb874b0Craig Mautner    int a = ref_memcmp(s, d, size);
391a91f9e2959ee905f97977a88fe45bde6ffb874b0Craig Mautner    int b = memcmp(s, d, size);
392d46747a1c64b6ca3282e8841833980ab91829436Jeff Brown    b = (b < 0 ? -1 : (b > 0 ? 1 : 0));
393d46747a1c64b6ca3282e8841833980ab91829436Jeff Brown    //printf("%d, %d\n", a, b);
394d46747a1c64b6ca3282e8841833980ab91829436Jeff Brown    if (a != b) {
395a91f9e2959ee905f97977a88fe45bde6ffb874b0Craig Mautner        printf("*** memcmp(%p,%p,%zd) failed %d should be %d\n",s,d,size,b,a);
396a91f9e2959ee905f97977a88fe45bde6ffb874b0Craig Mautner        return 1;
397a91f9e2959ee905f97977a88fe45bde6ffb874b0Craig Mautner    }
398a91f9e2959ee905f97977a88fe45bde6ffb874b0Craig Mautner    return 0;
399a91f9e2959ee905f97977a88fe45bde6ffb874b0Craig Mautner}
400a91f9e2959ee905f97977a88fe45bde6ffb874b0Craig Mautner
401a91f9e2959ee905f97977a88fe45bde6ffb874b0Craig Mautnerint memcmp_test(int argc, char** argv)
402a91f9e2959ee905f97977a88fe45bde6ffb874b0Craig Mautner{
403a91f9e2959ee905f97977a88fe45bde6ffb874b0Craig Mautner    int option = 0;
404a91f9e2959ee905f97977a88fe45bde6ffb874b0Craig Mautner    if (argc >= 2) {
40595da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner        if (!strcmp(argv[1], "perf"))       option = 0;
40695da1087ed3c7b9983b571bc5409827ae390f15fCraig Mautner        else if (!strcmp(argv[1], "test"))  option = 1;
407bdc748af8ce62778d2ad15040ecdfada6e4635fdCraig Mautner        else                                return -1;
408bdc748af8ce62778d2ad15040ecdfada6e4635fdCraig Mautner    }
409bdc748af8ce62778d2ad15040ecdfada6e4635fdCraig Mautner
410bdc748af8ce62778d2ad15040ecdfada6e4635fdCraig Mautner    const int MAX_SIZE = 1024*1024; // 1MB
411de4ef020ec5c3acdc90c4ba43011dda20d98d4ddCraig Mautner    const int CACHED_SPEED_EST = CPU_FREQ_EST*1024*1024; // 150 MB/s
412dc548483ae90ba26ad9e2e2cb79f4673140edb49Craig Mautner    const int UNCACHED_SPEED_EST = (CPU_FREQ_EST/4)*1024*1024; // 60 MB/s
413e8b85fd41904ceaaa4ac09200b2b39106f9c5c37Craig Mautner    char* src = (char*)malloc(MAX_SIZE+4+8+32);
414dc548483ae90ba26ad9e2e2cb79f4673140edb49Craig Mautner    char* dst = (char*)malloc(MAX_SIZE+4+8+32);
415e8b85fd41904ceaaa4ac09200b2b39106f9c5c37Craig Mautner
416bcb6eb9e26c923333b25074d39722a5dfa8c0320Craig Mautner    if (option == 0) {
417bcb6eb9e26c923333b25074d39722a5dfa8c0320Craig Mautner        printf("memcmp() performance test is running, please wait...\n");
418bcb6eb9e26c923333b25074d39722a5dfa8c0320Craig Mautner        fflush(stdout);
419dc548483ae90ba26ad9e2e2cb79f4673140edb49Craig Mautner        usleep(10000);
420bcb6eb9e26c923333b25074d39722a5dfa8c0320Craig Mautner        setpriority(PRIO_PROCESS, 0, -20);
42183162a90278d9d52d8fca7ee20ba314b452261deCraig Mautner
422bcb6eb9e26c923333b25074d39722a5dfa8c0320Craig Mautner        static int FAST_SIZES[] = { 1024, DCACHE_SIZE/2, DCACHE_SIZE, DCACHE_SIZE*2, MAX_SIZE };
423bcb6eb9e26c923333b25074d39722a5dfa8c0320Craig Mautner
424de4ef020ec5c3acdc90c4ba43011dda20d98d4ddCraig Mautner        struct result_t { int size; float res; };
425bcb6eb9e26c923333b25074d39722a5dfa8c0320Craig Mautner        result_t* results = (result_t*)src;
426de4ef020ec5c3acdc90c4ba43011dda20d98d4ddCraig Mautner        int nbr = 0;
427bcb6eb9e26c923333b25074d39722a5dfa8c0320Craig Mautner        int size = 0;
428b1fd65c0ff5784b90d765edb7e3c3115d767dff0Craig Mautner        for (int i=0 ; ; i++) {
429b1fd65c0ff5784b90d765edb7e3c3115d767dff0Craig Mautner            if (size_t(i) >= sizeof(FAST_SIZES)/sizeof(FAST_SIZES[0]))
430de4ef020ec5c3acdc90c4ba43011dda20d98d4ddCraig Mautner                break;
431dc548483ae90ba26ad9e2e2cb79f4673140edb49Craig Mautner            size = FAST_SIZES[i];
432dc548483ae90ba26ad9e2e2cb79f4673140edb49Craig Mautner            if (size > MAX_SIZE) {
433dc548483ae90ba26ad9e2e2cb79f4673140edb49Craig Mautner                break;
434dc548483ae90ba26ad9e2e2cb79f4673140edb49Craig Mautner            }
435dc548483ae90ba26ad9e2e2cb79f4673140edb49Craig Mautner
436de4ef020ec5c3acdc90c4ba43011dda20d98d4ddCraig Mautner            const int REPEAT = (((size < DCACHE_SIZE) ?
437de4ef020ec5c3acdc90c4ba43011dda20d98d4ddCraig Mautner                        (CACHED_SPEED_EST) : (UNCACHED_SPEED_EST)) / size) / 2;
438de4ef020ec5c3acdc90c4ba43011dda20d98d4ddCraig Mautner                                // ~0.5 second per test
439de4ef020ec5c3acdc90c4ba43011dda20d98d4ddCraig Mautner
440de4ef020ec5c3acdc90c4ba43011dda20d98d4ddCraig Mautner            const nsecs_t overhead = loop_overhead(REPEAT);
441de4ef020ec5c3acdc90c4ba43011dda20d98d4ddCraig Mautner
442de4ef020ec5c3acdc90c4ba43011dda20d98d4ddCraig Mautner            // tweak to make it a bad case
443de4ef020ec5c3acdc90c4ba43011dda20d98d4ddCraig Mautner            char* ddd = (char*)((long(dst+31)&~31) + 4);
444b1fd65c0ff5784b90d765edb7e3c3115d767dff0Craig Mautner            char* sss = (char*)((long(src+31)&~31) + 28);
445de4ef020ec5c3acdc90c4ba43011dda20d98d4ddCraig Mautner
44659c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner            for (int offset=0 ; offset<=2 ; offset +=2 ) {
44759c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner                memcpy(ddd, sss+offset, size); // just make sure to load the caches I/D
448e0a3884cb627efc650e19fbe76b1b3343468cf57Craig Mautner                nsecs_t t = -system_time();
449e0a3884cb627efc650e19fbe76b1b3343468cf57Craig Mautner                register int count = REPEAT;
450e0a3884cb627efc650e19fbe76b1b3343468cf57Craig Mautner                char c;
451e0a3884cb627efc650e19fbe76b1b3343468cf57Craig Mautner                c = memcmp(ddd, sss+offset, size);
452e0a3884cb627efc650e19fbe76b1b3343468cf57Craig Mautner                //printf("size %d, memcmp -> %d\n", size, (int)c);
45359c009776dae5ccbdfb93d7151ff2065ca049dc3Craig Mautner                do {
454                    c = memcmp(ddd, sss+offset, size);
455                    asm volatile (""::"r"(c):"memory");
456                } while (--count);
457                t += system_time() - overhead;
458                const float throughput = (size*1000000000.0f*REPEAT) / (1024*1024*t);
459                results[nbr].size = size;
460                results[nbr].res = throughput;
461                nbr++;
462            }
463        }
464
465        printf("%9s %9s %9s\n", "size", "MB/s", "MB/s (nc)");
466        for (int i=0 ; i<nbr ; i+=2) {
467            printf("%9d %9ld %9ld\n", results[i].size, (long)results[i].res, (long)results[i+1].res);
468        }
469    } else {
470        printf("memcmp() validation test is running, please wait...\n");
471        fflush(stdout);
472
473        const char* const s = (const char*)src + 1024;
474        const char* const d = (const char*)dst + 1024;
475        int nb = 0;
476        for (int j=0 ; j<32 ; j++) {
477
478            char *curr0 = (char*)src;
479            char *curr1 = (char*)dst;
480            for (int i=0 ; i<MAX_SIZE ; i++) {
481                char c = rand();
482                *curr0++ = c;
483                *curr1++ = c;
484            }
485            if (j) {
486                src[1024 + j] ^= 0xFF;
487            }
488
489
490            for (int size=0 ; size<32 && !nb ; size++) {
491                for (int o=0 ; o<4 ; o++) {
492                    nb += validate_memcmp(s+o, d+o, size);
493                }
494               // memmove((char*)d+1, d, size);
495                for (int o=0 ; o<4 ; o++) {
496                    nb += validate_memcmp(s, d+o, size);
497                }
498            }
499        }
500        if (nb) printf("%d error(s) found\n", nb);
501        else    printf("success!\n");
502    }
503    fflush(stdout);
504    free(dst);
505    free(src);
506    return 0;
507}
508
509#if 0
510#pragma mark -
511#pragma mark strlen
512#endif
513
514int strlen_test(int argc, char** argv)
515{
516    int option = 0;
517    if (argc >= 2) {
518        if (!strcmp(argv[1], "perf"))       option = 0;
519        else if (!strcmp(argv[1], "test"))  option = 1;
520        else                                return -1;
521    }
522
523    const int MAX_SIZE = 1024*1024; // 1MB
524    const int CACHED_SPEED_EST = CPU_FREQ_EST*1024*1024; // 195 MB/s
525    const int UNCACHED_SPEED_EST = CPU_FREQ_EST*1024*1024; // 195 MB/s
526    char* str = (char*)calloc(MAX_SIZE+4+8, 1);
527
528    if (option == 0) {
529        printf("strlen() performance test is running, please wait...\n");
530        fflush(stdout);
531        usleep(10000);
532        setpriority(PRIO_PROCESS, 0, -20);
533
534        static int FAST_SIZES[] = { 1024, DCACHE_SIZE/2, DCACHE_SIZE, DCACHE_SIZE*2, MAX_SIZE };
535        const size_t FAST_SIZES_COUNT = sizeof(FAST_SIZES)/sizeof(FAST_SIZES[0]);
536        struct result_t { int size; float res; };
537        result_t results[FAST_SIZES_COUNT*2];
538        int nbr = 0;
539        int size = 0;
540        for (int i=0 ; ; i++) {
541            if (size_t(i) >= sizeof(FAST_SIZES)/sizeof(FAST_SIZES[0]))
542                break;
543            size = FAST_SIZES[i];
544            if (size > MAX_SIZE) {
545                break;
546            }
547            const int REPEAT = (((size < DCACHE_SIZE) ?
548                        (CACHED_SPEED_EST) : (UNCACHED_SPEED_EST)) / size);
549                                // ~0.5 second per test
550
551            const nsecs_t overhead = loop_overhead(REPEAT);
552
553            for (int j=0 ; j<2 ; j++) {
554                memset(str, 'A', size-1);
555                if (j==0)   preload(str, DCACHE_SIZE*4);   // flush D
556                else        preload(str, size);            // load D
557
558                nsecs_t t = -system_time();
559                size_t count = REPEAT;
560                int c=0;
561                do {
562                    c = strlen(str);
563                    asm volatile (""::"r"(c):"memory");
564                } while (--count);
565                t += system_time() - overhead;
566
567                const float throughput = (size*1000000000.0f*REPEAT) / (1024*1024*t);
568                results[nbr].size = size;
569                results[nbr].res = throughput;
570                nbr++;
571            }
572        }
573
574        printf("%9s %9s %9s\n", "size", "MB/s", "MB/s (cached)");
575        for (int i=0 ; i<nbr ; i+=2) {
576            printf("%9d %9ld %9ld\n", results[i].size, (long)results[i].res, (long)results[i+1].res);
577        }
578    }
579
580    fflush(stdout);
581    free(str);
582    return 0;
583}
584
585
586#if 0
587#pragma mark -
588#pragma mark malloc
589#endif
590
591int malloc_test(int argc, char** argv)
592{
593    bool fill = (argc>=2 && !strcmp(argv[1], "fill"));
594    size_t total = 0;
595    size_t size = 0x40000000;
596    while (size) {
597        void* addr = malloc(size);
598        if (addr == 0) {
599            printf("size = %9zd failed\n", size);
600            size >>= 1;
601        } else {
602            total += size;
603            printf("size = %9zd, addr = %p (total = %9zd (%zd MB))\n",
604                    size, addr, total, total / (1024*1024));
605            if (fill) {
606                printf("filling...\n");
607                fflush(stdout);
608                memset(addr, 0, size);
609            }
610            size = size + (size>>1);
611        }
612    }
613    printf("done. allocated %zd MB\n", total / (1024*1024));
614    return 0;
615}
616
617#if 0
618#pragma mark -
619#pragma mark madvise
620#endif
621
622int madvise_test(int argc, char** argv)
623{
624    for (int i=0 ; i<2 ; i++) {
625        size_t size = i==0 ? 4096 : 48*1024*1024; // 48 MB
626        printf("Allocating %zd MB... ", size/(1024*1024)); fflush(stdout);
627        void* addr1 = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
628        printf("%p (%s)\n", addr1, addr1==(void*)-1 ? "failed" : "OK"); fflush(stdout);
629
630        printf("touching %p...\n", addr1); fflush(stdout);
631        memset(addr1, 0x55, size);
632
633        printf("advising DONTNEED...\n"); fflush(stdout);
634        madvise(addr1, size, MADV_DONTNEED);
635
636        printf("reading back %p...\n", addr1); fflush(stdout);
637        if (*(long*)addr1 == 0) {
638            printf("madvise freed some pages\n");
639        } else if (*(long*)addr1 == 0x55555555) {
640            printf("pages are still there\n");
641        } else {
642            printf("getting garbage back\n");
643        }
644
645        printf("Allocating %zd MB... ", size/(1024*1024)); fflush(stdout);
646        void* addr2 = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
647        printf("%p (%s)\n", addr2, addr2==(void*)-1 ? "failed" : "OK"); fflush(stdout);
648
649        printf("touching %p...\n", addr2); fflush(stdout);
650        memset(addr2, 0xAA, size);
651
652        printf("unmap %p ...\n", addr2); fflush(stdout);
653        munmap(addr2, size);
654
655        printf("touching %p...\n", addr1); fflush(stdout);
656        memset(addr1, 0x55, size);
657
658        printf("unmap %p ...\n", addr1); fflush(stdout);
659        munmap(addr1, size);
660    }
661
662    printf("Done\n"); fflush(stdout);
663    return 0;
664}
665
666#if 0
667#pragma mark -
668#pragma mark cpufreq
669#endif
670
671int cpufreq_test(int argc, char** argv)
672{
673    struct timespec res;
674    clock_getres(CLOCK_REALTIME, &res);
675    printf("CLOCK_REALTIME  resolution: %lu ns\n", res.tv_nsec);
676    clock_getres(CLOCK_MONOTONIC, &res);
677    printf("CLOCK_MONOTONIC resolution: %lu ns\n", res.tv_nsec);
678    clock_getres(CLOCK_PROCESS_CPUTIME_ID, &res);
679    printf("CLOCK_PROCESS_CPUTIME_ID resolution: %lu ns\n", res.tv_nsec);
680    clock_getres(CLOCK_THREAD_CPUTIME_ID, &res);
681    printf("CLOCK_THREAD_CPUTIME_ID  resolution: %lu ns\n", res.tv_nsec);
682
683    if (clock_getres(CLOCK_REALTIME_HR, &res) != 0)
684        printf("CLOCK_REALTIME_HR   resolution: %lu ns\n", res.tv_nsec);
685    else
686        printf("CLOCK_REALTIME_HR   not supported\n");
687
688    if (clock_getres(CLOCK_MONOTONIC_HR, &res) != 0)
689        printf("CLOCK_MONOTONIC_HR  resolution: %lu ns\n", res.tv_nsec);
690    else
691        printf("CLOCK_MONOTONIC_HR  not supported\n");
692
693    printf("\nEstimating the CPU frequency, please wait...\n");
694    fflush(stdout);
695    usleep(10000);
696    setpriority(PRIO_PROCESS, 0, -20);
697
698    const int LOOP_CYCLES = 1+BRANCH_CYCLE; // 1 cycle + 3 cycles for the branch
699    const size_t REPEAT = CPU_FREQ_EST*1000000;   // ~4 seconds (4cycles/loop)
700    register size_t count = REPEAT;
701    nsecs_t t = system_time();
702    do { // this loop generates 1+3 cycles
703        asm volatile ("":::"memory");
704    } while (--count);
705    t = system_time() - t;
706    const float freq = t ? (1000.0f*float(REPEAT)*LOOP_CYCLES) / t : 0;
707    printf("this CPU frequency: %ld MHz\n", long(freq+0.5f));
708    return 0;
709}
710
711#if 0
712#pragma mark -
713#pragma mark crash_test
714#endif
715
716int crash_test(int argc, char** argv)
717{
718    printf("about to crash...\n");
719    asm volatile(
720        "mov r0,  #0 \n"
721        "mov r1,  #1 \n"
722        "mov r2,  #2 \n"
723        "mov r3,  #3 \n"
724        "ldr r12, [r0] \n"
725    );
726
727    return 0;
728}
729
730int stack_smasher_test(int argc, char** argv)
731{
732    int dummy = 0;
733    printf("corrupting our stack...\n");
734    *(volatile long long*)&dummy = 0;
735    return 0;
736}
737
738// --------------------------------------------------------------------
739
740extern "C" void thumb_function_1(int*p);
741extern "C" void thumb_function_2(int*p);
742extern "C" void arm_function_3(int*p);
743extern "C" void arm_function_2(int*p);
744extern "C" void arm_function_1(int*p);
745
746void arm_function_3(int*p) {
747    int a = 0;
748    thumb_function_2(&a);
749}
750
751void arm_function_2(int*p) {
752    int a = 0;
753    thumb_function_1(&a);
754}
755
756void arm_function_1(int*p) {
757    int a = 0;
758    arm_function_2(&a);
759}
760
761int crawl_test(int argc, char** argv)
762{
763    int a = 0;
764    arm_function_1(&a);
765    return 0;
766}
767
768