1/* 2 * mem-memcpy.c 3 * 4 * memcpy: Simple memory copy in various ways 5 * 6 * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> 7 */ 8 9#include "../perf.h" 10#include "../util/util.h" 11#include "../util/parse-options.h" 12#include "../util/header.h" 13#include "bench.h" 14#include "mem-memcpy-arch.h" 15 16#include <stdio.h> 17#include <stdlib.h> 18#include <string.h> 19#include <sys/time.h> 20#include <errno.h> 21 22#define K 1024 23 24static const char *length_str = "1MB"; 25static const char *routine = "default"; 26static int iterations = 1; 27static bool use_cycle; 28static int cycle_fd; 29static bool only_prefault; 30static bool no_prefault; 31 32static const struct option options[] = { 33 OPT_STRING('l', "length", &length_str, "1MB", 34 "Specify length of memory to copy. " 35 "Available units: B, KB, MB, GB and TB (upper and lower)"), 36 OPT_STRING('r', "routine", &routine, "default", 37 "Specify routine to copy"), 38 OPT_INTEGER('i', "iterations", &iterations, 39 "repeat memcpy() invocation this number of times"), 40 OPT_BOOLEAN('c', "cycle", &use_cycle, 41 "Use cycles event instead of gettimeofday() for measuring"), 42 OPT_BOOLEAN('o', "only-prefault", &only_prefault, 43 "Show only the result with page faults before memcpy()"), 44 OPT_BOOLEAN('n', "no-prefault", &no_prefault, 45 "Show only the result without page faults before memcpy()"), 46 OPT_END() 47}; 48 49typedef void *(*memcpy_t)(void *, const void *, size_t); 50 51struct routine { 52 const char *name; 53 const char *desc; 54 memcpy_t fn; 55}; 56 57struct routine routines[] = { 58 { "default", 59 "Default memcpy() provided by glibc", 60 memcpy }, 61#ifdef ARCH_X86_64 62 63#define MEMCPY_FN(fn, name, desc) { name, desc, fn }, 64#include "mem-memcpy-x86-64-asm-def.h" 65#undef MEMCPY_FN 66 67#endif 68 69 { NULL, 70 NULL, 71 NULL } 72}; 73 74static const char * const bench_mem_memcpy_usage[] = { 75 "perf bench mem memcpy <options>", 76 NULL 77}; 78 79static struct perf_event_attr cycle_attr = { 80 .type = PERF_TYPE_HARDWARE, 81 .config = PERF_COUNT_HW_CPU_CYCLES 82}; 83 84static void init_cycle(void) 85{ 86 cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, 0); 87 88 if (cycle_fd < 0 && errno == ENOSYS) 89 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); 90 else 91 BUG_ON(cycle_fd < 0); 92} 93 94static u64 get_cycle(void) 95{ 96 int ret; 97 u64 clk; 98 99 ret = read(cycle_fd, &clk, sizeof(u64)); 100 BUG_ON(ret != sizeof(u64)); 101 102 return clk; 103} 104 105static double timeval2double(struct timeval *ts) 106{ 107 return (double)ts->tv_sec + 108 (double)ts->tv_usec / (double)1000000; 109} 110 111static void alloc_mem(void **dst, void **src, size_t length) 112{ 113 *dst = zalloc(length); 114 if (!*dst) 115 die("memory allocation failed - maybe length is too large?\n"); 116 117 *src = zalloc(length); 118 if (!*src) 119 die("memory allocation failed - maybe length is too large?\n"); 120 /* Make sure to always replace the zero pages even if MMAP_THRESH is crossed */ 121 memset(*src, 0, length); 122} 123 124static u64 do_memcpy_cycle(memcpy_t fn, size_t len, bool prefault) 125{ 126 u64 cycle_start = 0ULL, cycle_end = 0ULL; 127 void *src = NULL, *dst = NULL; 128 int i; 129 130 alloc_mem(&src, &dst, len); 131 132 if (prefault) 133 fn(dst, src, len); 134 135 cycle_start = get_cycle(); 136 for (i = 0; i < iterations; ++i) 137 fn(dst, src, len); 138 cycle_end = get_cycle(); 139 140 free(src); 141 free(dst); 142 return cycle_end - cycle_start; 143} 144 145static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault) 146{ 147 struct timeval tv_start, tv_end, tv_diff; 148 void *src = NULL, *dst = NULL; 149 int i; 150 151 alloc_mem(&src, &dst, len); 152 153 if (prefault) 154 fn(dst, src, len); 155 156 BUG_ON(gettimeofday(&tv_start, NULL)); 157 for (i = 0; i < iterations; ++i) 158 fn(dst, src, len); 159 BUG_ON(gettimeofday(&tv_end, NULL)); 160 161 timersub(&tv_end, &tv_start, &tv_diff); 162 163 free(src); 164 free(dst); 165 return (double)((double)len / timeval2double(&tv_diff)); 166} 167 168#define pf (no_prefault ? 0 : 1) 169 170#define print_bps(x) do { \ 171 if (x < K) \ 172 printf(" %14lf B/Sec", x); \ 173 else if (x < K * K) \ 174 printf(" %14lfd KB/Sec", x / K); \ 175 else if (x < K * K * K) \ 176 printf(" %14lf MB/Sec", x / K / K); \ 177 else \ 178 printf(" %14lf GB/Sec", x / K / K / K); \ 179 } while (0) 180 181int bench_mem_memcpy(int argc, const char **argv, 182 const char *prefix __maybe_unused) 183{ 184 int i; 185 size_t len; 186 double result_bps[2]; 187 u64 result_cycle[2]; 188 189 argc = parse_options(argc, argv, options, 190 bench_mem_memcpy_usage, 0); 191 192 if (use_cycle) 193 init_cycle(); 194 195 len = (size_t)perf_atoll((char *)length_str); 196 197 result_cycle[0] = result_cycle[1] = 0ULL; 198 result_bps[0] = result_bps[1] = 0.0; 199 200 if ((s64)len <= 0) { 201 fprintf(stderr, "Invalid length:%s\n", length_str); 202 return 1; 203 } 204 205 /* same to without specifying either of prefault and no-prefault */ 206 if (only_prefault && no_prefault) 207 only_prefault = no_prefault = false; 208 209 for (i = 0; routines[i].name; i++) { 210 if (!strcmp(routines[i].name, routine)) 211 break; 212 } 213 if (!routines[i].name) { 214 printf("Unknown routine:%s\n", routine); 215 printf("Available routines...\n"); 216 for (i = 0; routines[i].name; i++) { 217 printf("\t%s ... %s\n", 218 routines[i].name, routines[i].desc); 219 } 220 return 1; 221 } 222 223 if (bench_format == BENCH_FORMAT_DEFAULT) 224 printf("# Copying %s Bytes ...\n\n", length_str); 225 226 if (!only_prefault && !no_prefault) { 227 /* show both of results */ 228 if (use_cycle) { 229 result_cycle[0] = 230 do_memcpy_cycle(routines[i].fn, len, false); 231 result_cycle[1] = 232 do_memcpy_cycle(routines[i].fn, len, true); 233 } else { 234 result_bps[0] = 235 do_memcpy_gettimeofday(routines[i].fn, 236 len, false); 237 result_bps[1] = 238 do_memcpy_gettimeofday(routines[i].fn, 239 len, true); 240 } 241 } else { 242 if (use_cycle) { 243 result_cycle[pf] = 244 do_memcpy_cycle(routines[i].fn, 245 len, only_prefault); 246 } else { 247 result_bps[pf] = 248 do_memcpy_gettimeofday(routines[i].fn, 249 len, only_prefault); 250 } 251 } 252 253 switch (bench_format) { 254 case BENCH_FORMAT_DEFAULT: 255 if (!only_prefault && !no_prefault) { 256 if (use_cycle) { 257 printf(" %14lf Cycle/Byte\n", 258 (double)result_cycle[0] 259 / (double)len); 260 printf(" %14lf Cycle/Byte (with prefault)\n", 261 (double)result_cycle[1] 262 / (double)len); 263 } else { 264 print_bps(result_bps[0]); 265 printf("\n"); 266 print_bps(result_bps[1]); 267 printf(" (with prefault)\n"); 268 } 269 } else { 270 if (use_cycle) { 271 printf(" %14lf Cycle/Byte", 272 (double)result_cycle[pf] 273 / (double)len); 274 } else 275 print_bps(result_bps[pf]); 276 277 printf("%s\n", only_prefault ? " (with prefault)" : ""); 278 } 279 break; 280 case BENCH_FORMAT_SIMPLE: 281 if (!only_prefault && !no_prefault) { 282 if (use_cycle) { 283 printf("%lf %lf\n", 284 (double)result_cycle[0] / (double)len, 285 (double)result_cycle[1] / (double)len); 286 } else { 287 printf("%lf %lf\n", 288 result_bps[0], result_bps[1]); 289 } 290 } else { 291 if (use_cycle) { 292 printf("%lf\n", (double)result_cycle[pf] 293 / (double)len); 294 } else 295 printf("%lf\n", result_bps[pf]); 296 } 297 break; 298 default: 299 /* reaching this means there's some disaster: */ 300 die("unknown format: %d\n", bench_format); 301 break; 302 } 303 304 return 0; 305} 306