icache_main.cpp revision 61fd2ab615ab17f6de593e8a8cf475465967ac9c
1#include <stdio.h>
2#include <sys/time.h>
3#include <getopt.h>
4
5#include <thread>
6#include <iostream>
7#include <iomanip>
8
9#include <sched.h>
10
11#include "Profiler.h"
12
13extern "C" void icache_test(long count, long step);
14
15static constexpr size_t MAX_CODE_SIZE = 128*1024;
16static constexpr size_t CACHE_LINE_SIZE = 64;
17static constexpr size_t MAX_ITERATIONS_COUNT = MAX_CODE_SIZE / CACHE_LINE_SIZE;
18static constexpr size_t REPETITIONS = 0x800000L;
19
20
21using namespace utils;
22
23static cpu_set_t g_cpu_set;
24
25static void printUsage(char* name) {
26    std::string exec_name(name);
27    std::string usage(
28            "ICACHE is a command-line tool for testing the L1 instruction cache performance.\n"
29            "(Make sure security.perf_harden is set to 0)\n\n"
30            "Usages:\n"
31            "    ICACHE [options]\n"
32            "\n"
33            "Options:\n"
34            "   --help, -h\n"
35            "       print this message\n\n"
36            "   --affinity=[big|little], -a [big|little]\n"
37            "       Specify which CPU the test should run on.\n"
38            "       (assumes \"little\" is the first CPU, and \"big\" the last one)\n\n"
39    );
40    const std::string from("ICACHE");
41    for (size_t pos = usage.find(from); pos != std::string::npos; pos = usage.find(from, pos)) {
42         usage.replace(pos, from.length(), exec_name);
43    }
44    printf("%s", usage.c_str());
45}
46
47static int handleCommandLineArgments(int argc, char* argv[]) {
48    static constexpr const char* OPTSTR = "ha:";
49    static const struct option OPTIONS[] = {
50            { "help",                 no_argument, 0, 'h' },
51            { "affinity",       required_argument, 0, 'a' },
52            { 0, 0, 0, 0 }  // termination of the option list
53    };
54    int opt;
55    int option_index = 0;
56    while ((opt = getopt_long(argc, argv, OPTSTR, OPTIONS, &option_index)) >= 0) {
57        std::string arg(optarg ? optarg : "");
58        switch (opt) {
59            default:
60            case 'h':
61                printUsage(argv[0]);
62                exit(0);
63                break;
64            case 'a':
65                if (arg == "big" || arg == "gold") {
66                    CPU_SET(std::thread::hardware_concurrency()-1, &g_cpu_set);
67                } else if (arg == "little" || arg == "silver") {
68                    CPU_SET(0, &g_cpu_set);
69                } else {
70                    std::cerr << "affinity must be either \"big\" or \"little\"" << std::endl;
71                    exit(0);
72                }
73                break;
74        }
75    }
76    return optind;
77}
78
79int main(int argc, char* argv[]) {
80    CPU_ZERO(&g_cpu_set);
81
82    [[maybe_unused]] int option_index = handleCommandLineArgments(argc, argv);
83    [[maybe_unused]] int num_args = argc - option_index;
84
85    if (CPU_COUNT(&g_cpu_set)) {
86        sched_setaffinity(gettid(), sizeof(g_cpu_set), &g_cpu_set);
87    }
88
89    Profiler& profiler = Profiler::get();
90    profiler.resetEvents(Profiler::EV_CPU_CYCLES | Profiler::EV_L1I_RATES);
91
92    if (!profiler.isValid()) {
93        fprintf(stderr, "performance counters not enabled. try \"setprop security.perf_harden 0\"\n");
94        exit(0);
95    }
96
97    size_t const stepInBytes = 1024;    // 1 KiB steps
98    size_t const step = stepInBytes / CACHE_LINE_SIZE;
99
100    std::cout << std::fixed << std::setprecision(2);
101
102    printf("[KiB]\t[cyc]\t[refs]\t[hit%%]\t[ns]\n");
103
104    Profiler::Counters counters;
105
106    for (size_t i=step ; i <= MAX_ITERATIONS_COUNT ; i += step) {
107        profiler.reset();
108
109        auto now = std::chrono::steady_clock::now();
110        profiler.start();
111        icache_test(REPETITIONS, i);
112        profiler.stop();
113        auto duration = std::chrono::steady_clock::now() - now;
114
115        profiler.readCounters(&counters);
116
117        std::cout << ((i*CACHE_LINE_SIZE)/1024) << "\t"
118            << counters.getCpuCycles()/double(REPETITIONS) << "\t"
119            << counters.getL1IReferences()/double(REPETITIONS) << "\t"
120            << counters.getL1IHitRate()*100 << "\t"
121            << duration.count()/double(REPETITIONS) << "\t"
122            << std::endl;
123    }
124
125    return 0;
126}
127