182ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris/*
282ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris** Copyright 2010 The Android Open Source Project
382ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris**
482ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris** Licensed under the Apache License, Version 2.0 (the "License");
582ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris** you may not use this file except in compliance with the License.
682ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris** You may obtain a copy of the License at
782ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris**
882ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris**     http://www.apache.org/licenses/LICENSE-2.0
982ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris**
1082ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris** Unless required by applicable law or agreed to in writing, software
1182ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris** distributed under the License is distributed on an "AS IS" BASIS,
1282ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1382ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris** See the License for the specific language governing permissions and
1482ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris** limitations under the License.
1582ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris*/
1682ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
1782ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris/*
1825ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris * Micro-benchmarking of sleep/cpu speed/memcpy/memset/memory reads/strcmp.
1982ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris */
2082ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
2182ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris#include <stdio.h>
2282ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris#include <stdlib.h>
2382ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris#include <ctype.h>
2482ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris#include <math.h>
2582ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris#include <sched.h>
2682ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris#include <sys/resource.h>
2782ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris#include <time.h>
2882ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris#include <unistd.h>
2982ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
3082ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris// The default size of data that will be manipulated in each iteration of
3182ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris// a memory benchmark. Can be modified with the --data_size option.
3282ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris#define DEFAULT_DATA_SIZE       1000000000
3382ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
34991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// The amount of memory allocated for the cold benchmarks to use.
35991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris#define DEFAULT_COLD_DATA_SIZE  128*1024*1024
36991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris
37991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// The default size of the stride between each buffer for cold benchmarks.
38991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris#define DEFAULT_COLD_STRIDE_SIZE  4096
39991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris
4082ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris// Number of nanoseconds in a second.
4182ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris#define NS_PER_SEC              1000000000
4282ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
4382ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris// The maximum number of arguments that a benchmark will accept.
4482ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris#define MAX_ARGS    2
4582ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
46991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// Default memory alignment of malloc.
47991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris#define DEFAULT_MALLOC_MEMORY_ALIGNMENT   8
48991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris
4982ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris// Contains information about benchmark options.
5082ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferristypedef struct {
5182ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    bool print_average;
5282ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    bool print_each_iter;
5382ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
5482ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    int dst_align;
5525ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris    int dst_or_mask;
5682ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    int src_align;
5725ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris    int src_or_mask;
5882ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
5982ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    int cpu_to_lock;
6082ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
6182ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    int data_size;
627401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    int dst_str_size;
63991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    int cold_data_size;
64991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    int cold_stride_size;
6582ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
6682ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    int args[MAX_ARGS];
6782ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    int num_args;
6882ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris} command_data_t;
6982ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
70014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferristypedef void *(*void_func_t)();
71014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferristypedef void *(*memcpy_func_t)(void *, const void *, size_t);
72014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferristypedef void *(*memset_func_t)(void *, int, size_t);
73014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferristypedef int (*strcmp_func_t)(const char *, const char *);
747401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferristypedef char *(*str_func_t)(char *, const char *);
757401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferristypedef size_t (*strlen_func_t)(const char *);
76014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris
7782ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris// Struct that contains a mapping of benchmark name to benchmark function.
7882ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferristypedef struct {
7982ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    const char *name;
80014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    int (*ptr)(const char *, const command_data_t &, void_func_t func);
81014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    void_func_t func;
8282ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris} function_t;
8382ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
8482ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris// Get the current time in nanoseconds.
8582ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferrisuint64_t nanoTime() {
8682ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris  struct timespec t;
8782ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
8882ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris  t.tv_sec = t.tv_nsec = 0;
8982ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris  clock_gettime(CLOCK_MONOTONIC, &t);
9082ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris  return static_cast<uint64_t>(t.tv_sec) * NS_PER_SEC + t.tv_nsec;
9182ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris}
9282ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
9382ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris// Allocate memory with a specific alignment and return that pointer.
9482ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris// This function assumes an alignment value that is a power of 2.
9582ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris// If the alignment is 0, then use the pointer returned by malloc.
9625ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferrisuint8_t *getAlignedMemory(uint8_t *orig_ptr, int alignment, int or_mask) {
9725ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris  uint64_t ptr = reinterpret_cast<uint64_t>(orig_ptr);
9882ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris  if (alignment > 0) {
9982ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris      // When setting the alignment, set it to exactly the alignment chosen.
10082ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris      // The pointer returned will be guaranteed not to be aligned to anything
10182ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris      // more than that.
10282ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris      ptr += alignment - (ptr & (alignment - 1));
10325ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris      ptr |= alignment | or_mask;
10482ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris  }
10582ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
10682ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris  return reinterpret_cast<uint8_t*>(ptr);
10782ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris}
10882ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
10925ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris// Allocate memory with a specific alignment and return that pointer.
11025ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris// This function assumes an alignment value that is a power of 2.
11125ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris// If the alignment is 0, then use the pointer returned by malloc.
11225ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferrisuint8_t *allocateAlignedMemory(size_t size, int alignment, int or_mask) {
11325ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris  uint64_t ptr = reinterpret_cast<uint64_t>(malloc(size + 3 * alignment));
11425ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris  if (!ptr)
11525ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris      return NULL;
11625ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris  return getAlignedMemory((uint8_t*)ptr, alignment, or_mask);
11725ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris}
11825ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris
119991bcd464d67c3cff76477140331619ff138aafcChristopher Ferrisvoid initString(uint8_t *buf, size_t size) {
120991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    for (size_t i = 0; i < size - 1; i++) {
121991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris        buf[i] = static_cast<char>(32 + (i % 96));
1227401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    }
123991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    buf[size-1] = '\0';
1247401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris}
1257401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris
126991bcd464d67c3cff76477140331619ff138aafcChristopher Ferrisstatic inline double computeAverage(uint64_t time_ns, size_t size, size_t copies) {
127014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    return ((size/1024.0) * copies) / ((double)time_ns/NS_PER_SEC);
128014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris}
12982ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
130014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferrisstatic inline double computeRunningAvg(double avg, double running_avg, size_t cur_idx) {
131014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    return (running_avg / (cur_idx + 1)) * cur_idx + (avg / (cur_idx + 1));
132014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris}
13382ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
134014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferrisstatic inline double computeRunningSquareAvg(double avg, double square_avg, size_t cur_idx) {
135014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    return (square_avg / (cur_idx + 1)) * cur_idx + (avg / (cur_idx + 1)) * avg;
136014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris}
137014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris
138014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferrisstatic inline double computeStdDev(double square_avg, double running_avg) {
139014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    return sqrt(square_avg - running_avg * running_avg);
140014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris}
141014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris
142991bcd464d67c3cff76477140331619ff138aafcChristopher Ferrisstatic inline void printIter(uint64_t time_ns, const char *name, size_t size, size_t copies, double avg) {
143991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    printf("%s %ux%u bytes took %.06f seconds (%f MB/s)\n",
144014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris           name, copies, size, (double)time_ns/NS_PER_SEC, avg/1024.0);
145014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris}
146014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris
147991bcd464d67c3cff76477140331619ff138aafcChristopher Ferrisstatic inline void printSummary(uint64_t time_ns, const char *name, size_t size, size_t copies, double running_avg, double std_dev, double min, double max) {
148991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    printf("  %s %ux%u bytes average %.2f MB/s std dev %.4f min %.2f MB/s max %.2f MB/s\n",
149014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris           name, copies, size, running_avg/1024.0, std_dev/1024.0, min/1024.0,
150014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris           max/1024.0);
151014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris}
15282ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
153991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// For the cold benchmarks, a large buffer will be created which
154991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// contains many "size" buffers. This function will figure out the increment
155991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// needed between each buffer so that each one is aligned to "alignment".
156991bcd464d67c3cff76477140331619ff138aafcChristopher Ferrisint getAlignmentIncrement(size_t size, int alignment) {
157991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    if (alignment == 0) {
158991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris        alignment = DEFAULT_MALLOC_MEMORY_ALIGNMENT;
159991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    }
160991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    alignment *= 2;
161991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    return size + alignment - (size % alignment);
162991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris}
163991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris
164991bcd464d67c3cff76477140331619ff138aafcChristopher Ferrisuint8_t *getColdBuffer(int num_buffers, size_t incr, int alignment, int or_mask) {
165991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    uint8_t *buffers = reinterpret_cast<uint8_t*>(malloc(num_buffers * incr + 3 * alignment));
166991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    if (!buffers) {
167991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris        return NULL;
168991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    }
169991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    return getAlignedMemory(buffers, alignment, or_mask);
170991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris}
171991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris
172991bcd464d67c3cff76477140331619ff138aafcChristopher Ferrisstatic inline double computeColdAverage(uint64_t time_ns, size_t size, size_t copies, size_t num_buffers) {
173991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    return ((size/1024.0) * copies * num_buffers) / ((double)time_ns/NS_PER_SEC);
174991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris}
175991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris
176991bcd464d67c3cff76477140331619ff138aafcChristopher Ferrisstatic void inline printColdIter(uint64_t time_ns, const char *name, size_t size, size_t copies, size_t num_buffers, double avg) {
177991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    printf("%s %ux%ux%u bytes took %.06f seconds (%f MB/s)\n",
178991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris           name, copies, num_buffers, size, (double)time_ns/NS_PER_SEC, avg/1024.0);
179991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris}
180991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris
181991bcd464d67c3cff76477140331619ff138aafcChristopher Ferrisstatic void inline printColdSummary(
182991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris        uint64_t time_ns, const char *name, size_t size, size_t copies, size_t num_buffers,
183991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris        double running_avg, double square_avg, double min, double max) {
184991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    printf("  %s %ux%ux%u bytes average %.2f MB/s std dev %.4f min %.2f MB/s max %.2f MB/s\n",
185991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris           name, copies, num_buffers, size, running_avg/1024.0,
186991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris           computeStdDev(running_avg, square_avg)/1024.0, min/1024.0, max/1024.0);
187991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris}
188991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris
189991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris#define MAINLOOP(cmd_data, BENCH, COMPUTE_AVG, PRINT_ITER, PRINT_AVG) \
190014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    uint64_t time_ns;                                                 \
191014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    int iters = cmd_data.args[1];                                     \
192014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    bool print_average = cmd_data.print_average;                      \
193014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    bool print_each_iter = cmd_data.print_each_iter;                  \
194014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    double min = 0.0, max = 0.0, running_avg = 0.0, square_avg = 0.0; \
195014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    double avg;                                                       \
196014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    for (int i = 0; iters == -1 || i < iters; i++) {                  \
197014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris        time_ns = nanoTime();                                         \
198014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris        BENCH;                                                        \
199014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris        time_ns = nanoTime() - time_ns;                               \
200014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris        avg = COMPUTE_AVG;                                            \
201014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris        if (print_average) {                                          \
202014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris            running_avg = computeRunningAvg(avg, running_avg, i);     \
203014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris            square_avg = computeRunningSquareAvg(avg, square_avg, i); \
204014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris            if (min == 0.0 || avg < min) {                            \
205014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris                min = avg;                                            \
206014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris            }                                                         \
207014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris            if (avg > max) {                                          \
208014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris                max = avg;                                            \
209014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris            }                                                         \
210014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris        }                                                             \
211014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris        if (print_each_iter) {                                        \
212014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris            PRINT_ITER;                                               \
213014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris        }                                                             \
214014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    }                                                                 \
215014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    if (print_average) {                                              \
216014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris        PRINT_AVG;                                                    \
21782ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    }
21882ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
219991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris#define MAINLOOP_DATA(name, cmd_data, size, BENCH)                    \
220991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    size_t copies = cmd_data.data_size/size;                          \
221991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    size_t j;                                                         \
222014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    MAINLOOP(cmd_data,                                                \
223014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris             for (j = 0; j < copies; j++) {                           \
224014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris                 BENCH;                                               \
225014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris             },                                                       \
226014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris             computeAverage(time_ns, size, copies),                   \
227014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris             printIter(time_ns, name, size, copies, avg),             \
228014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris             double std_dev = computeStdDev(square_avg, running_avg); \
229014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris             printSummary(time_ns, name, size, copies, running_avg,   \
230014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris                          std_dev, min, max));
231014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris
232991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris#define MAINLOOP_COLD(name, cmd_data, size, num_incrs, BENCH)                 \
233991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    size_t num_strides = num_buffers / num_incrs;                             \
234991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    if ((num_buffers % num_incrs) != 0) {                                     \
235991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris        num_strides--;                                                        \
236991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    }                                                                         \
237991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    size_t copies = 1;                                                        \
238991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    num_buffers = num_incrs * num_strides;                                    \
239991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    if (num_buffers * size < static_cast<size_t>(cmd_data.data_size)) {       \
240991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris        copies = cmd_data.data_size / (num_buffers * size);                   \
241991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    }                                                                         \
242991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    if (num_strides == 0) {                                                   \
243991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris        printf("%s: Chosen options lead to no copies, aborting.\n", name);    \
244991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris        return -1;                                                            \
245991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    }                                                                         \
246991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    size_t j, k;                                                              \
247991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    MAINLOOP(cmd_data,                                                        \
248991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris             for (j = 0; j < copies; j++) {                                   \
249991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                 for (k = 0; k < num_incrs; k++) {                            \
250991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                     BENCH;                                                   \
251991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                }                                                             \
252991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris            },                                                                \
253991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris            computeColdAverage(time_ns, size, copies, num_buffers),           \
254991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris            printColdIter(time_ns, name, size, copies, num_buffers, avg),     \
255991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris            printColdSummary(time_ns, name, size, copies, num_buffers,        \
256991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                             running_avg, square_avg, min, max));
257991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris
258991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// This version of the macro creates a single buffer of the given size and
259991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// alignment. The variable "buf" will be a pointer to the buffer and should
260991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// be used by the BENCH code.
261991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// INIT - Any specialized code needed to initialize the data. This will only
262991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris//        be executed once.
263991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// BENCH - The actual code to benchmark and is timed.
264991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris#define BENCH_ONE_BUF(name, cmd_data, INIT, BENCH)                            \
265991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    size_t size = cmd_data.args[0]; \
266991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    uint8_t *buf = allocateAlignedMemory(size, cmd_data.dst_align, cmd_data.dst_or_mask); \
267991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    if (!buf)                                                                 \
268991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris        return -1;                                                            \
269991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    INIT;                                                                     \
270991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    MAINLOOP_DATA(name, cmd_data, size, BENCH);
271991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris
272991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// This version of the macro creates two buffers of the given sizes and
273991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// alignments. The variables "buf1" and "buf2" will be pointers to the
274991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// buffers and should be used by the BENCH code.
275991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// INIT - Any specialized code needed to initialize the data. This will only
276991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris//        be executed once.
277991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// BENCH - The actual code to benchmark and is timed.
278991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris#define BENCH_TWO_BUFS(name, cmd_data, INIT, BENCH)                           \
279991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    size_t size = cmd_data.args[0];                                           \
280991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    uint8_t *buf1 = allocateAlignedMemory(size, cmd_data.src_align, cmd_data.src_or_mask); \
281991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    if (!buf1)                                                                \
282991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris        return -1;                                                            \
283991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    size_t total_size = size;                                                 \
284991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    if (cmd_data.dst_str_size > 0)                                            \
285991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris        total_size += cmd_data.dst_str_size;                                  \
286991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    uint8_t *buf2 = allocateAlignedMemory(total_size, cmd_data.dst_align, cmd_data.dst_or_mask); \
287991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    if (!buf2)                                                                \
288991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris        return -1;                                                            \
289991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    INIT;                                                                     \
290991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    MAINLOOP_DATA(name, cmd_data, size, BENCH);
291991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris
292991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// This version of the macro attempts to benchmark code when the data
293991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// being manipulated is not in the cache, thus the cache is cold. It does
294991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// this by creating a single large buffer that is designed to be larger than
295991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// the largest cache in the system. The variable "buf" will be one slice
296991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// of the buffer that the BENCH code should use that is of the correct size
297991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// and alignment. In order to avoid any algorithms that prefetch past the end
298991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// of their "buf" and into the next sequential buffer, the code strides
299991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// through the buffer. Specifically, as "buf" values are iterated in BENCH
300991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// code, the end of "buf" is guaranteed to be at least "stride_size" away
301991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// from the next "buf".
302991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// INIT - Any specialized code needed to initialize the data. This will only
303991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris//        be executed once.
304991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// BENCH - The actual code to benchmark and is timed.
305991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris#define COLD_ONE_BUF(name, cmd_data, INIT, BENCH)                             \
306991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    size_t size = cmd_data.args[0];                                           \
307991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    size_t incr = getAlignmentIncrement(size, cmd_data.dst_align);            \
308991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    size_t num_buffers = cmd_data.cold_data_size / incr;                      \
309991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    size_t buffer_size = num_buffers * incr;                                  \
310991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    uint8_t *buffer = getColdBuffer(num_buffers, incr, cmd_data.dst_align, cmd_data.dst_or_mask); \
311991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    if (!buffer)                                                              \
312991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris        return -1;                                                            \
313991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    size_t num_incrs = cmd_data.cold_stride_size / incr + 1;                  \
314991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    size_t stride_incr = incr * num_incrs;                                    \
315991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    uint8_t *buf;                                                             \
316991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    size_t l;                                                                 \
317991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    INIT;                                                                     \
318991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    MAINLOOP_COLD(name, cmd_data, size, num_incrs,                            \
319991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                  buf = buffer + k * incr;                                    \
320991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                  for (l = 0; l < num_strides; l++) {                         \
321991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                      BENCH;                                                  \
322991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                      buf += stride_incr;                                     \
323991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                  });
324991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris
325991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// This version of the macro attempts to benchmark code when the data
326991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// being manipulated is not in the cache, thus the cache is cold. It does
327991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// this by creating two large buffers each of which is designed to be
328991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// larger than the largest cache in the system. Two variables "buf1" and
329991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// "buf2" will be the two buffers that BENCH code should use. In order
330991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// to avoid any algorithms that prefetch past the end of either "buf1"
331991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// or "buf2" and into the next sequential buffer, the code strides through
332991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// both buffers. Specifically, as "buf1" and "buf2" values are iterated in
333991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// BENCH code, the end of "buf1" and "buf2" is guaranteed to be at least
334991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// "stride_size" away from the next "buf1" and "buf2".
335991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// INIT - Any specialized code needed to initialize the data. This will only
336991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris//        be executed once.
337991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris// BENCH - The actual code to benchmark and is timed.
338991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris#define COLD_TWO_BUFS(name, cmd_data, INIT, BENCH)                            \
339991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    size_t size = cmd_data.args[0];                                           \
340991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    size_t buf1_incr = getAlignmentIncrement(size, cmd_data.src_align);       \
341991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    size_t total_size = size;                                                 \
342991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    if (cmd_data.dst_str_size > 0)                                            \
343991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris        total_size += cmd_data.dst_str_size;                                  \
344991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    size_t buf2_incr = getAlignmentIncrement(total_size, cmd_data.dst_align); \
345991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    size_t max_incr = (buf1_incr > buf2_incr) ? buf1_incr : buf2_incr;        \
346991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    size_t num_buffers = cmd_data.cold_data_size / max_incr;                  \
347991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    size_t buffer1_size = num_buffers * buf1_incr;                            \
348991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    size_t buffer2_size = num_buffers * buf2_incr;                            \
349991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    uint8_t *buffer1 = getColdBuffer(num_buffers, buf1_incr, cmd_data.src_align, cmd_data.src_or_mask); \
350991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    if (!buffer1)                                                             \
351991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris        return -1;                                                            \
352991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    uint8_t *buffer2 = getColdBuffer(num_buffers, buf2_incr, cmd_data.dst_align, cmd_data.dst_or_mask); \
353991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    if (!buffer2)                                                             \
354991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris        return -1;                                                            \
355991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    size_t min_incr = (buf1_incr < buf2_incr) ? buf1_incr : buf2_incr;        \
356991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    size_t num_incrs = cmd_data.cold_stride_size / min_incr + 1;              \
357991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    size_t buf1_stride_incr = buf1_incr * num_incrs;                          \
358991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    size_t buf2_stride_incr = buf2_incr * num_incrs;                          \
359991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    size_t l;                                                                 \
360991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    uint8_t *buf1;                                                            \
361991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    uint8_t *buf2;                                                            \
362991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    INIT;                                                                     \
363991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    MAINLOOP_COLD(name, cmd_data, size, num_incrs,                            \
364991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                  buf1 = buffer1 + k * buf1_incr;                             \
365991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                  buf2 = buffer2 + k * buf2_incr;                             \
366991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                  for (l = 0; l < num_strides; l++) {                         \
367991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                      BENCH;                                                  \
368991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                      buf1 += buf1_stride_incr;                               \
369991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                      buf2 += buf2_stride_incr;                               \
370991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                  });
371991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris
372014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferrisint benchmarkSleep(const char *name, const command_data_t &cmd_data, void_func_t func) {
373014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    int delay = cmd_data.args[0];
374014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    MAINLOOP(cmd_data, sleep(delay),
375014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris             (double)time_ns/NS_PER_SEC,
376014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris             printf("sleep(%d) took %.06f seconds\n", delay, avg);,
377014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris             printf("  sleep(%d) average %.06f seconds std dev %f min %.06f seconds max %0.6f seconds\n", \
378014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris                    delay, running_avg, computeStdDev(square_avg, running_avg), \
379014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris                    min, max));
380014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris
38182ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    return 0;
38282ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris}
38382ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
384014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferrisint benchmarkCpu(const char *name, const command_data_t &cmd_data, void_func_t func) {
38582ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    // Use volatile so that the loop is not optimized away by the compiler.
38682ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    volatile int cpu_foo;
38782ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
388014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    MAINLOOP(cmd_data,
389014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris             for (cpu_foo = 0; cpu_foo < 100000000; cpu_foo++),
390014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris             (double)time_ns/NS_PER_SEC,
391014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris             printf("cpu took %.06f seconds\n", avg),
392014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris             printf("  cpu average %.06f seconds std dev %f min %0.6f seconds max %0.6f seconds\n", \
393014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris                    running_avg, computeStdDev(square_avg, running_avg), min, max));
39482ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
39582ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    return 0;
39682ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris}
39782ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
398014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferrisint benchmarkMemset(const char *name, const command_data_t &cmd_data, void_func_t func) {
399014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    memset_func_t memset_func = reinterpret_cast<memset_func_t>(func);
400991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    BENCH_ONE_BUF(name, cmd_data, ;, memset_func(buf, i, size));
40182ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
402991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    return 0;
403991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris}
40482ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
405991bcd464d67c3cff76477140331619ff138aafcChristopher Ferrisint benchmarkMemsetCold(const char *name, const command_data_t &cmd_data, void_func_t func) {
406991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    memset_func_t memset_func = reinterpret_cast<memset_func_t>(func);
407991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    COLD_ONE_BUF(name, cmd_data, ;, memset_func(buf, l, size));
40882ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
40982ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    return 0;
41082ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris}
41182ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
412014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferrisint benchmarkMemcpy(const char *name, const command_data_t &cmd_data, void_func_t func) {
413014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    memcpy_func_t memcpy_func = reinterpret_cast<memcpy_func_t>(func);
41482ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
415991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    BENCH_TWO_BUFS(name, cmd_data,
416991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                   memset(buf1, 0xff, size); \
417991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                   memset(buf2, 0, size),
418991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                   memcpy_func(buf2, buf1, size));
41982ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
420991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    return 0;
421991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris}
42225ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris
423991bcd464d67c3cff76477140331619ff138aafcChristopher Ferrisint benchmarkMemcpyCold(const char *name, const command_data_t &cmd_data, void_func_t func) {
424991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    memcpy_func_t memcpy_func = reinterpret_cast<memcpy_func_t>(func);
425991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris
426991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    COLD_TWO_BUFS(name, cmd_data,
427991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                  memset(buffer1, 0xff, buffer1_size); \
428991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                  memset(buffer2, 0x0, buffer2_size),
429991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                  memcpy_func(buf2, buf1, size));
43082ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
43125ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris    return 0;
43225ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris}
43325ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris
434014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferrisint benchmarkMemread(const char *name, const command_data_t &cmd_data, void_func_t func) {
43525ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris    int size = cmd_data.args[0];
43625ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris
437014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    uint32_t *src = reinterpret_cast<uint32_t*>(malloc(size));
438014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    if (!src)
43925ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris        return -1;
440014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    memset(src, 0xff, size);
44125ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris
442014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    // Use volatile so the compiler does not optimize away the reads.
443014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    volatile int foo;
444014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    size_t k;
445014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    MAINLOOP_DATA(name, cmd_data, size,
446991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                  for (k = 0; k < size/sizeof(uint32_t); k++) foo = src[k]);
447014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris
448014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    return 0;
449014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris}
450014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris
451014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferrisint benchmarkStrcmp(const char *name, const command_data_t &cmd_data, void_func_t func) {
452014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    strcmp_func_t strcmp_func = reinterpret_cast<strcmp_func_t>(func);
453014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris
454014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    int retval;
455991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    BENCH_TWO_BUFS(name, cmd_data,
456991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                   initString(buf1, size); \
457991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                   initString(buf2, size),
458991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                   retval = strcmp_func(reinterpret_cast<char*>(buf1), reinterpret_cast<char*>(buf2)); \
459991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                   if (retval != 0) printf("%s failed, return value %d\n", name, retval));
46025ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris
46182ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    return 0;
46282ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris}
46382ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
464991bcd464d67c3cff76477140331619ff138aafcChristopher Ferrisint benchmarkStrcmpCold(const char *name, const command_data_t &cmd_data, void_func_t func) {
465991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    strcmp_func_t strcmp_func = reinterpret_cast<strcmp_func_t>(func);
466991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris
467991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    int retval;
468991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    COLD_TWO_BUFS(name, cmd_data,
469991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                  memset(buffer1, 'a', buffer1_size); \
470991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                  memset(buffer2, 'a', buffer2_size); \
471991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                  for (size_t i =0; i < num_buffers; i++) { \
472991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                      buffer1[size-1+buf1_incr*i] = '\0'; \
473991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                      buffer2[size-1+buf2_incr*i] = '\0'; \
474991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                  },
475991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                  retval = strcmp_func(reinterpret_cast<char*>(buf1), reinterpret_cast<char*>(buf2)); \
476991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                  if (retval != 0) printf("%s failed, return value %d\n", name, retval));
4777401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris
478991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    return 0;
479991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris}
4807401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris
481991bcd464d67c3cff76477140331619ff138aafcChristopher Ferrisint benchmarkStrlen(const char *name, const command_data_t &cmd_data, void_func_t func) {
4827401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    size_t real_size;
483991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    strlen_func_t strlen_func = reinterpret_cast<strlen_func_t>(func);
484991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    BENCH_ONE_BUF(name, cmd_data,
485991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                  initString(buf, size),
486991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                  real_size = strlen_func(reinterpret_cast<char*>(buf)); \
4877401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris                  if (real_size + 1 != size) { \
4887401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris                      printf("%s failed, expected %u, got %u\n", name, size, real_size); \
4897401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris                      return -1; \
490991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                  });
491991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris
492991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    return 0;
493991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris}
4947401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris
495991bcd464d67c3cff76477140331619ff138aafcChristopher Ferrisint benchmarkStrlenCold(const char *name, const command_data_t &cmd_data, void_func_t func) {
496991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    strlen_func_t strlen_func = reinterpret_cast<strlen_func_t>(func);
497991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    size_t real_size;
498991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    COLD_ONE_BUF(name, cmd_data,
499991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                 memset(buffer, 'a', buffer_size); \
500991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                 for (size_t i = 0; i < num_buffers; i++) { \
501991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                     buffer[size-1+incr*i] = '\0'; \
502991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                 },
503991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                 real_size = strlen_func(reinterpret_cast<char*>(buf)); \
504991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                 if (real_size + 1 != size) { \
505991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                     printf("%s failed, expected %u, got %u\n", name, size, real_size); \
506991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                     return -1; \
507991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                 });
5087401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    return 0;
5097401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris}
5107401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris
5117401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferrisint benchmarkStrcat(const char *name, const command_data_t &cmd_data, void_func_t func) {
5127401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    str_func_t str_func = reinterpret_cast<str_func_t>(func);
5137401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris
5147401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    int dst_str_size = cmd_data.dst_str_size;
5157401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    if (dst_str_size <= 0) {
5167401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris        printf("%s requires --dst_str_size to be set to a non-zero value.\n",
5177401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris               name);
5187401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris        return -1;
5197401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    }
520991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    BENCH_TWO_BUFS(name, cmd_data,
521991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                   initString(buf1, size); \
522991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                   initString(buf2, dst_str_size),
523991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                   str_func(reinterpret_cast<char*>(buf2), reinterpret_cast<char*>(buf1)); buf2[dst_str_size-1] = '\0');
52482ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
525991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    return 0;
526991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris}
52782ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
528991bcd464d67c3cff76477140331619ff138aafcChristopher Ferrisint benchmarkStrcatCold(const char *name, const command_data_t &cmd_data, void_func_t func) {
529991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    str_func_t str_func = reinterpret_cast<str_func_t>(func);
530991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris
531991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    int dst_str_size = cmd_data.dst_str_size;
532991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    if (dst_str_size <= 0) {
533991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris        printf("%s requires --dst_str_size to be set to a non-zero value.\n",
534991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris               name);
535991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris        return -1;
536991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    }
537991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    COLD_TWO_BUFS(name, cmd_data,
538991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                  memset(buffer1, 'a', buffer1_size); \
539991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                  memset(buffer2, 'b', buffer2_size); \
540991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                  for (size_t i = 0; i < num_buffers; i++) { \
541991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                      buffer1[size-1+buf1_incr*i] = '\0'; \
542991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                      buffer2[dst_str_size-1+buf2_incr*i] = '\0'; \
543991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                  },
544991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                  str_func(reinterpret_cast<char*>(buf2), reinterpret_cast<char*>(buf1)); buf2[dst_str_size-1] = '\0');
54582ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
54682ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    return 0;
54782ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris}
54882ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
549991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris
5507401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferrisint benchmarkStrcpy(const char *name, const command_data_t &cmd_data, void_func_t func) {
5517401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    str_func_t str_func = reinterpret_cast<str_func_t>(func);
5527401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris
553991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    BENCH_TWO_BUFS(name, cmd_data,
554991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                   initString(buf1, size); \
555991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                   memset(buf2, 0, size),
556991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                   str_func(reinterpret_cast<char*>(buf2), reinterpret_cast<char*>(buf1)));
557991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris
558991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    return 0;
559991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris}
5607401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris
561991bcd464d67c3cff76477140331619ff138aafcChristopher Ferrisint benchmarkStrcpyCold(const char *name, const command_data_t &cmd_data, void_func_t func) {
562991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    str_func_t str_func = reinterpret_cast<str_func_t>(func);
563991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris
564991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    COLD_TWO_BUFS(name, cmd_data,
565991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                  memset(buffer1, 'a', buffer1_size); \
566991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                  for (size_t i = 0; i < num_buffers; i++) { \
567991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                     buffer1[size-1+buf1_incr*i] = '\0'; \
568991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                  } \
569991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                  memset(buffer2, 0, buffer2_size),
570991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                  str_func(reinterpret_cast<char*>(buf2), reinterpret_cast<char*>(buf1)));
5717401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris
5727401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    return 0;
5737401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris}
574014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris
57582ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris// Create the mapping structure.
57682ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferrisfunction_t function_table[] = {
577014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    { "cpu", benchmarkCpu, NULL },
578991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    { "memcpy", benchmarkMemcpy, reinterpret_cast<void_func_t>(memcpy) },
579991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    { "memcpy_cold", benchmarkMemcpyCold, reinterpret_cast<void_func_t>(memcpy) },
580014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    { "memread", benchmarkMemread, NULL },
581014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    { "memset", benchmarkMemset, reinterpret_cast<void_func_t>(memset) },
582991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    { "memset_cold", benchmarkMemsetCold, reinterpret_cast<void_func_t>(memset) },
583991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    { "sleep", benchmarkSleep, NULL },
584991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    { "strcat", benchmarkStrcat, reinterpret_cast<void_func_t>(strcat) },
585991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    { "strcat_cold", benchmarkStrcatCold, reinterpret_cast<void_func_t>(strcat) },
586014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    { "strcmp", benchmarkStrcmp, reinterpret_cast<void_func_t>(strcmp) },
587991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    { "strcmp_cold", benchmarkStrcmpCold, reinterpret_cast<void_func_t>(strcmp) },
588014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    { "strcpy", benchmarkStrcpy, reinterpret_cast<void_func_t>(strcpy) },
589991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    { "strcpy_cold", benchmarkStrcpyCold, reinterpret_cast<void_func_t>(strcpy) },
590991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    { "strlen", benchmarkStrlen, reinterpret_cast<void_func_t>(strlen) },
591991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    { "strlen_cold", benchmarkStrlenCold, reinterpret_cast<void_func_t>(strlen) },
59282ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris};
59382ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
59482ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferrisvoid usage() {
59582ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    printf("Usage:\n");
59682ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    printf("  micro_bench [--data_size DATA_BYTES] [--print_average]\n");
59782ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    printf("              [--no_print_each_iter] [--lock_to_cpu CORE]\n");
5987401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    printf("              [--src_align ALIGN] [--src_or_mask OR_MASK]\n");
5997401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    printf("              [--dst_align ALIGN] [--dst_or_mask OR_MASK]\n");
600991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    printf("              [--dst_str_size SIZE] [--cold_data_size DATA_BYTES]\n");
601991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    printf("              [--cold_stride_size SIZE]\n");
60282ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    printf("    --data_size DATA_BYTES\n");
60382ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    printf("      For the data benchmarks (memcpy/memset/memread) the approximate\n");
60482ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    printf("      size of data, in bytes, that will be manipulated in each iteration.\n");
60582ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    printf("    --print_average\n");
60682ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    printf("      Print the average and standard deviation of all iterations.\n");
60782ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    printf("    --no_print_each_iter\n");
60882ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    printf("      Do not print any values in each iteration.\n");
60982ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    printf("    --lock_to_cpu CORE\n");
61082ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    printf("      Lock to the specified CORE. The default is to use the last core found.\n");
6117401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    printf("    --dst_align ALIGN\n");
6127401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    printf("      If the command supports it, align the destination pointer to ALIGN.\n");
6137401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    printf("      The default is to use the value returned by malloc.\n");
6147401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    printf("    --dst_or_mask OR_MASK\n");
6157401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    printf("      If the command supports it, or in the OR_MASK on to the destination pointer.\n");
6167401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    printf("      The OR_MASK must be smaller than the dst_align value.\n");
6177401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    printf("      The default value is 0.\n");
6187401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris
6197401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    printf("    --src_align ALIGN\n");
6207401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    printf("      If the command supports it, align the source pointer to ALIGN. The default is to use the\n");
6217401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    printf("      value returned by malloc.\n");
6227401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    printf("    --src_or_mask OR_MASK\n");
6237401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    printf("      If the command supports it, or in the OR_MASK on to the source pointer.\n");
6247401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    printf("      The OR_MASK must be smaller than the src_align value.\n");
6257401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    printf("      The default value is 0.\n");
6267401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    printf("    --dst_str_size SIZE\n");
6277401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    printf("      If the command supports it, create a destination string of this length.\n");
6287401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    printf("      The default is to not update the destination string.\n");
629991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    printf("    --cold_data_size DATA_SIZE\n");
630991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    printf("      For _cold benchmarks, use this as the total amount of memory to use.\n");
631991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    printf("      The default is 128MB, and the number should be larger than the cache on the chip.\n");
632991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    printf("      This value is specified in bytes.\n");
633991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    printf("    --cold_stride_size SIZE\n");
634991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    printf("      For _cold benchmarks, use this as the minimum stride between iterations.\n");
635991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    printf("      The default is 4096 bytes and the number should be larger than the amount of data\n");
636991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    printf("      pulled in to the cache by each run of the benchmark.\n");
63782ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    printf("    ITERS\n");
63882ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    printf("      The number of iterations to execute each benchmark. If not\n");
63982ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    printf("      passed in then run forever.\n");
64082ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    printf("  micro_bench cpu UNUSED [ITERS]\n");
6417401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    printf("  micro_bench [--dst_align ALIGN] [--dst_or_mask OR_MASK] memcpy NUM_BYTES [ITERS]\n");
64282ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    printf("  micro_bench memread NUM_BYTES [ITERS]\n");
643991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    printf("  micro_bench [--dst_align ALIGN] [--dst_or_mask OR_MASK] memset NUM_BYTES [ITERS]\n");
644991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    printf("  micro_bench sleep TIME_TO_SLEEP [ITERS]\n");
645991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    printf("    TIME_TO_SLEEP\n");
646991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    printf("      The time in seconds to sleep.\n");
6477401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    printf("  micro_bench [--src_align ALIGN] [--src_or_mask OR_MASK] [--dst_align ALIGN] [--dst_or_mask] [--dst_str_size SIZE] strcat NUM_BYTES [ITERS]\n");
6487401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    printf("  micro_bench [--src_align ALIGN] [--src_or_mask OR_MASK] [--dst_align ALIGN] [--dst_or_mask OR_MASK] strcmp NUM_BYTES [ITERS]\n");
6497401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    printf("  micro_bench [--src_align ALIGN] [--src_or_mask OR_MASK] [--dst_align ALIGN] [--dst_or_mask] strcpy NUM_BYTES [ITERS]\n");
6507401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    printf("  micro_bench [--dst_align ALIGN] [--dst_or_mask OR_MASK] strlen NUM_BYTES [ITERS]\n");
651991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    printf("\n");
652991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    printf("  In addition, memcpy/memcpy/memset/strcat/strcpy/strlen have _cold versions\n");
653991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    printf("  that will execute the function on a buffer not in the cache.\n");
65482ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris}
65582ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
65682ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferrisfunction_t *processOptions(int argc, char **argv, command_data_t *cmd_data) {
65782ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    function_t *command = NULL;
65882ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
65982ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    // Initialize the command_flags.
66082ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    cmd_data->print_average = false;
66182ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    cmd_data->print_each_iter = true;
66282ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    cmd_data->dst_align = 0;
66382ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    cmd_data->src_align = 0;
66425ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris    cmd_data->src_or_mask = 0;
66525ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris    cmd_data->dst_or_mask = 0;
66682ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    cmd_data->num_args = 0;
66782ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    cmd_data->cpu_to_lock = -1;
66882ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    cmd_data->data_size = DEFAULT_DATA_SIZE;
6697401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris    cmd_data->dst_str_size = -1;
670991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    cmd_data->cold_data_size = DEFAULT_COLD_DATA_SIZE;
671991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris    cmd_data->cold_stride_size = DEFAULT_COLD_STRIDE_SIZE;
67282ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    for (int i = 0; i < MAX_ARGS; i++) {
67382ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris        cmd_data->args[i] = -1;
67482ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    }
67582ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
67682ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    for (int i = 1; i < argc; i++) {
67782ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris        if (argv[i][0] == '-') {
67882ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris            int *save_value = NULL;
67982ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris            if (strcmp(argv[i], "--print_average") == 0) {
680991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                cmd_data->print_average = true;
68182ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris            } else if (strcmp(argv[i], "--no_print_each_iter") == 0) {
682991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                cmd_data->print_each_iter = false;
68382ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris            } else if (strcmp(argv[i], "--dst_align") == 0) {
684991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                save_value = &cmd_data->dst_align;
68582ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris            } else if (strcmp(argv[i], "--src_align") == 0) {
686991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                save_value = &cmd_data->src_align;
68725ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris            } else if (strcmp(argv[i], "--dst_or_mask") == 0) {
688991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                save_value = &cmd_data->dst_or_mask;
68925ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris            } else if (strcmp(argv[i], "--src_or_mask") == 0) {
690991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                save_value = &cmd_data->src_or_mask;
69182ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris            } else if (strcmp(argv[i], "--lock_to_cpu") == 0) {
692991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                save_value = &cmd_data->cpu_to_lock;
69382ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris            } else if (strcmp(argv[i], "--data_size") == 0) {
694991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                save_value = &cmd_data->data_size;
6957401bc1263d9b9d9f7605a3c5bdaaf3171c7e9c0Christopher Ferris            } else if (strcmp(argv[i], "--dst_str_size") == 0) {
696991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                save_value = &cmd_data->dst_str_size;
697991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris            } else if (strcmp(argv[i], "--cold_data_size") == 0) {
698991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                save_value = &cmd_data->cold_data_size;
699991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris            } else if (strcmp(argv[i], "--cold_stride_size") == 0) {
700991bcd464d67c3cff76477140331619ff138aafcChristopher Ferris                save_value = &cmd_data->cold_stride_size;
70182ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris            } else {
70282ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris                printf("Unknown option %s\n", argv[i]);
70382ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris                return NULL;
70482ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris            }
70582ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris            if (save_value) {
70682ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris                // Checking both characters without a strlen() call should be
70782ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris                // safe since as long as the argument exists, one character will
70882ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris                // be present (\0). And if the first character is '-', then
70982ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris                // there will always be a second character (\0 again).
71082ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris                if (i == argc - 1 || (argv[i + 1][0] == '-' && !isdigit(argv[i + 1][1]))) {
71182ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris                    printf("The option %s requires one argument.\n",
71282ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris                           argv[i]);
71382ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris                    return NULL;
71482ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris                }
71525ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris                *save_value = (int)strtol(argv[++i], NULL, 0);
71682ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris            }
71782ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris        } else if (!command) {
718014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris            for (size_t j = 0; j < sizeof(function_table)/sizeof(function_t); j++) {
719014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris                if (strcmp(argv[i], function_table[j].name) == 0) {
720014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris                    command = &function_table[j];
72182ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris                    break;
72282ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris                }
72382ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris            }
72482ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris            if (!command) {
72582ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris                printf("Uknown command %s\n", argv[i]);
72682ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris                return NULL;
72782ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris            }
72882ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris        } else if (cmd_data->num_args > MAX_ARGS) {
72982ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris            printf("More than %d number arguments passed in.\n", MAX_ARGS);
73082ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris            return NULL;
73182ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris        } else {
73282ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris            cmd_data->args[cmd_data->num_args++] = atoi(argv[i]);
73382ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris        }
73482ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    }
73582ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
73682ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    // Check the arguments passed in make sense.
73782ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    if (cmd_data->num_args != 1 && cmd_data->num_args != 2) {
73882ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris        printf("Not enough arguments passed in.\n");
73982ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris        return NULL;
74082ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    } else if (cmd_data->dst_align < 0) {
74182ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris        printf("The --dst_align option must be greater than or equal to 0.\n");
74282ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris        return NULL;
74382ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    } else if (cmd_data->src_align < 0) {
74482ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris        printf("The --src_align option must be greater than or equal to 0.\n");
74582ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris        return NULL;
74682ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    } else if (cmd_data->data_size <= 0) {
74782ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris        printf("The --data_size option must be a positive number.\n");
74882ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris        return NULL;
74982ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    } else if ((cmd_data->dst_align & (cmd_data->dst_align - 1))) {
75082ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris        printf("The --dst_align option must be a power of 2.\n");
75182ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris        return NULL;
75282ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    } else if ((cmd_data->src_align & (cmd_data->src_align - 1))) {
75382ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris        printf("The --src_align option must be a power of 2.\n");
75482ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris        return NULL;
75525ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris    } else if (!cmd_data->src_align && cmd_data->src_or_mask) {
75625ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris        printf("The --src_or_mask option requires that --src_align be set.\n");
75725ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris        return NULL;
75825ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris    } else if (!cmd_data->dst_align && cmd_data->dst_or_mask) {
75925ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris        printf("The --dst_or_mask option requires that --dst_align be set.\n");
76025ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris        return NULL;
76125ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris    } else if (cmd_data->src_or_mask > cmd_data->src_align) {
76225ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris        printf("The value of --src_or_mask cannot be larger that --src_align.\n");
76325ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris        return NULL;
76425ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris    } else if (cmd_data->dst_or_mask > cmd_data->dst_align) {
76525ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris        printf("The value of --src_or_mask cannot be larger that --src_align.\n");
76625ada90c4b99bd5471e3677542b62cef8d439399Christopher Ferris        return NULL;
76782ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    }
76882ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
76982ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    return command;
77082ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris}
77182ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
77282ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferrisbool raisePriorityAndLock(int cpu_to_lock) {
77382ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    cpu_set_t cpuset;
77482ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
77582ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    if (setpriority(PRIO_PROCESS, 0, -20)) {
77682ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris        perror("Unable to raise priority of process.\n");
77782ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris        return false;
77882ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    }
77982ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
78082ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    CPU_ZERO(&cpuset);
78182ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    if (sched_getaffinity(0, sizeof(cpuset), &cpuset) != 0) {
78282ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris        perror("sched_getaffinity failed");
78382ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris        return false;
78482ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    }
78582ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
78682ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    if (cpu_to_lock < 0) {
78782ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris        // Lock to the last active core we find.
78882ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris        for (int i = 0; i < CPU_SETSIZE; i++) {
78982ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris            if (CPU_ISSET(i, &cpuset)) {
79082ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris                cpu_to_lock = i;
79182ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris            }
79282ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris        }
79382ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    } else if (!CPU_ISSET(cpu_to_lock, &cpuset)) {
79482ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris        printf("Cpu %d does not exist.\n", cpu_to_lock);
79582ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris        return false;
79682ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    }
79782ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
79882ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    if (cpu_to_lock < 0) {
79982ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris        printf("Cannot find any valid cpu to lock.\n");
80082ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris        return false;
80182ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    }
80282ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
80382ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    CPU_ZERO(&cpuset);
80482ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    CPU_SET(cpu_to_lock, &cpuset);
80582ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
80682ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris        perror("sched_setaffinity failed");
80782ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris        return false;
80882ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    }
80982ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
81082ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    return true;
81182ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris}
81282ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
81382ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferrisint main(int argc, char **argv) {
81482ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    command_data_t cmd_data;
81582ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
81682ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    function_t *command = processOptions(argc, argv, &cmd_data);
81782ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    if (!command) {
81882ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris      usage();
81982ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris      return -1;
82082ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    }
82182ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
82282ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    if (!raisePriorityAndLock(cmd_data.cpu_to_lock)) {
82382ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris      return -1;
82482ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    }
82582ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris
82682ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris    printf("%s\n", command->name);
827014cf9dc226a775e27b2e6ace2461c510d9c6c19Christopher Ferris    return (*command->ptr)(command->name, cmd_data, command->func);
82882ac1af86d8ca3b7a42af9ca02e4d0b556681bc8Christopher Ferris}
829