1#include <binder/Binder.h>
2#include <binder/IBinder.h>
3#include <binder/IPCThreadState.h>
4#include <binder/IServiceManager.h>
5#include <string>
6#include <cstring>
7#include <cstdlib>
8#include <cstdio>
9
10#include <iostream>
11#include <vector>
12#include <tuple>
13
14#include <unistd.h>
15#include <sys/wait.h>
16
17using namespace std;
18using namespace android;
19
20enum BinderWorkerServiceCode {
21    BINDER_NOP = IBinder::FIRST_CALL_TRANSACTION,
22};
23
24#define ASSERT_TRUE(cond) \
25do { \
26    if (!(cond)) {\
27       cerr << __func__ << ":" << __LINE__ << " condition:" << #cond << " failed\n" << endl; \
28       exit(EXIT_FAILURE); \
29    } \
30} while (0)
31
32class BinderWorkerService : public BBinder
33{
34public:
35    BinderWorkerService() {}
36    ~BinderWorkerService() {}
37    virtual status_t onTransact(uint32_t code,
38                                const Parcel& data, Parcel* reply,
39                                uint32_t flags = 0) {
40        (void)flags;
41        (void)data;
42        (void)reply;
43        switch (code) {
44        case BINDER_NOP:
45            return NO_ERROR;
46        default:
47            return UNKNOWN_TRANSACTION;
48        };
49    }
50};
51
52class Pipe {
53    int m_readFd;
54    int m_writeFd;
55    Pipe(int readFd, int writeFd) : m_readFd{readFd}, m_writeFd{writeFd} {}
56    Pipe(const Pipe &) = delete;
57    Pipe& operator=(const Pipe &) = delete;
58    Pipe& operator=(const Pipe &&) = delete;
59public:
60    Pipe(Pipe&& rval) noexcept {
61        m_readFd = rval.m_readFd;
62        m_writeFd = rval.m_writeFd;
63        rval.m_readFd = 0;
64        rval.m_writeFd = 0;
65    }
66    ~Pipe() {
67        if (m_readFd)
68            close(m_readFd);
69        if (m_writeFd)
70            close(m_writeFd);
71    }
72    void signal() {
73        bool val = true;
74        int error = write(m_writeFd, &val, sizeof(val));
75        ASSERT_TRUE(error >= 0);
76    };
77    void wait() {
78        bool val = false;
79        int error = read(m_readFd, &val, sizeof(val));
80        ASSERT_TRUE(error >= 0);
81    }
82    template <typename T> void send(const T& v) {
83        int error = write(m_writeFd, &v, sizeof(T));
84        ASSERT_TRUE(error >= 0);
85    }
86    template <typename T> void recv(T& v) {
87        int error = read(m_readFd, &v, sizeof(T));
88        ASSERT_TRUE(error >= 0);
89    }
90    static tuple<Pipe, Pipe> createPipePair() {
91        int a[2];
92        int b[2];
93
94        int error1 = pipe(a);
95        int error2 = pipe(b);
96        ASSERT_TRUE(error1 >= 0);
97        ASSERT_TRUE(error2 >= 0);
98
99        return make_tuple(Pipe(a[0], b[1]), Pipe(b[0], a[1]));
100    }
101};
102
103static const uint32_t num_buckets = 128;
104static uint64_t max_time_bucket = 50ull * 1000000;
105static uint64_t time_per_bucket = max_time_bucket / num_buckets;
106
107struct ProcResults {
108    uint64_t m_worst = 0;
109    uint32_t m_buckets[num_buckets] = {0};
110    uint64_t m_transactions = 0;
111    uint64_t m_long_transactions = 0;
112    uint64_t m_total_time = 0;
113    uint64_t m_best = max_time_bucket;
114
115    void add_time(uint64_t time) {
116        if (time > max_time_bucket) {
117            m_long_transactions++;
118        }
119        m_buckets[min(time, max_time_bucket-1) / time_per_bucket] += 1;
120        m_best = min(time, m_best);
121        m_worst = max(time, m_worst);
122        m_transactions += 1;
123        m_total_time += time;
124    }
125    static ProcResults combine(const ProcResults& a, const ProcResults& b) {
126        ProcResults ret;
127        for (int i = 0; i < num_buckets; i++) {
128            ret.m_buckets[i] = a.m_buckets[i] + b.m_buckets[i];
129        }
130        ret.m_worst = max(a.m_worst, b.m_worst);
131        ret.m_best = min(a.m_best, b.m_best);
132        ret.m_transactions = a.m_transactions + b.m_transactions;
133        ret.m_long_transactions = a.m_long_transactions + b.m_long_transactions;
134        ret.m_total_time = a.m_total_time + b.m_total_time;
135        return ret;
136    }
137    void dump() {
138        if (m_long_transactions > 0) {
139            cout << (double)m_long_transactions / m_transactions << "% of transactions took longer "
140                "than estimated max latency. Consider setting -m to be higher than "
141                 << m_worst / 1000 << " microseconds" << endl;
142        }
143
144        double best = (double)m_best / 1.0E6;
145        double worst = (double)m_worst / 1.0E6;
146        double average = (double)m_total_time / m_transactions / 1.0E6;
147        cout << "average:" << average << "ms worst:" << worst << "ms best:" << best << "ms" << endl;
148
149        uint64_t cur_total = 0;
150        float time_per_bucket_ms = time_per_bucket / 1.0E6;
151        for (int i = 0; i < num_buckets; i++) {
152            float cur_time = time_per_bucket_ms * i + 0.5f * time_per_bucket_ms;
153            if ((cur_total < 0.5f * m_transactions) && (cur_total + m_buckets[i] >= 0.5f * m_transactions)) {
154                cout << "50%: " << cur_time << " ";
155            }
156            if ((cur_total < 0.9f * m_transactions) && (cur_total + m_buckets[i] >= 0.9f * m_transactions)) {
157                cout << "90%: " << cur_time << " ";
158            }
159            if ((cur_total < 0.95f * m_transactions) && (cur_total + m_buckets[i] >= 0.95f * m_transactions)) {
160                cout << "95%: " << cur_time << " ";
161            }
162            if ((cur_total < 0.99f * m_transactions) && (cur_total + m_buckets[i] >= 0.99f * m_transactions)) {
163                cout << "99%: " << cur_time << " ";
164            }
165            cur_total += m_buckets[i];
166        }
167        cout << endl;
168    }
169};
170
171String16 generateServiceName(int num)
172{
173    char num_str[32];
174    snprintf(num_str, sizeof(num_str), "%d", num);
175    String16 serviceName = String16("binderWorker") + String16(num_str);
176    return serviceName;
177}
178
179void worker_fx(int num,
180               int worker_count,
181               int iterations,
182               int payload_size,
183               bool cs_pair,
184               Pipe p)
185{
186    // Create BinderWorkerService and for go.
187    ProcessState::self()->startThreadPool();
188    sp<IServiceManager> serviceMgr = defaultServiceManager();
189    sp<BinderWorkerService> service = new BinderWorkerService;
190    serviceMgr->addService(generateServiceName(num), service);
191
192    srand(num);
193    p.signal();
194    p.wait();
195
196    // If client/server pairs, then half the workers are
197    // servers and half are clients
198    int server_count = cs_pair ? worker_count / 2 : worker_count;
199
200    // Get references to other binder services.
201    cout << "Created BinderWorker" << num << endl;
202    (void)worker_count;
203    vector<sp<IBinder> > workers;
204    for (int i = 0; i < server_count; i++) {
205        if (num == i)
206            continue;
207        workers.push_back(serviceMgr->getService(generateServiceName(i)));
208    }
209
210    // Run the benchmark if client
211    ProcResults results;
212    chrono::time_point<chrono::high_resolution_clock> start, end;
213    for (int i = 0; (!cs_pair || num >= server_count) && i < iterations; i++) {
214        Parcel data, reply;
215        int target = cs_pair ? num % server_count : rand() % workers.size();
216        int sz = payload_size;
217
218        while (sz >= sizeof(uint32_t)) {
219            data.writeInt32(0);
220            sz -= sizeof(uint32_t);
221        }
222        start = chrono::high_resolution_clock::now();
223        status_t ret = workers[target]->transact(BINDER_NOP, data, &reply);
224        end = chrono::high_resolution_clock::now();
225
226        uint64_t cur_time = uint64_t(chrono::duration_cast<chrono::nanoseconds>(end - start).count());
227        results.add_time(cur_time);
228
229        if (ret != NO_ERROR) {
230           cout << "thread " << num << " failed " << ret << "i : " << i << endl;
231           exit(EXIT_FAILURE);
232        }
233    }
234
235    // Signal completion to master and wait.
236    p.signal();
237    p.wait();
238
239    // Send results to master and wait for go to exit.
240    p.send(results);
241    p.wait();
242
243    exit(EXIT_SUCCESS);
244}
245
246Pipe make_worker(int num, int iterations, int worker_count, int payload_size, bool cs_pair)
247{
248    auto pipe_pair = Pipe::createPipePair();
249    pid_t pid = fork();
250    if (pid) {
251        /* parent */
252        return move(get<0>(pipe_pair));
253    } else {
254        /* child */
255        worker_fx(num, worker_count, iterations, payload_size, cs_pair, move(get<1>(pipe_pair)));
256        /* never get here */
257        return move(get<0>(pipe_pair));
258    }
259
260}
261
262void wait_all(vector<Pipe>& v)
263{
264    for (int i = 0; i < v.size(); i++) {
265        v[i].wait();
266    }
267}
268
269void signal_all(vector<Pipe>& v)
270{
271    for (int i = 0; i < v.size(); i++) {
272        v[i].signal();
273    }
274}
275
276void run_main(int iterations,
277              int workers,
278              int payload_size,
279              int cs_pair,
280              bool training_round=false)
281{
282    vector<Pipe> pipes;
283    // Create all the workers and wait for them to spawn.
284    for (int i = 0; i < workers; i++) {
285        pipes.push_back(make_worker(i, iterations, workers, payload_size, cs_pair));
286    }
287    wait_all(pipes);
288
289    // Run the workers and wait for completion.
290    chrono::time_point<chrono::high_resolution_clock> start, end;
291    cout << "waiting for workers to complete" << endl;
292    start = chrono::high_resolution_clock::now();
293    signal_all(pipes);
294    wait_all(pipes);
295    end = chrono::high_resolution_clock::now();
296
297    // Calculate overall throughput.
298    double iterations_per_sec = double(iterations * workers) / (chrono::duration_cast<chrono::nanoseconds>(end - start).count() / 1.0E9);
299    cout << "iterations per sec: " << iterations_per_sec << endl;
300
301    // Collect all results from the workers.
302    cout << "collecting results" << endl;
303    signal_all(pipes);
304    ProcResults tot_results;
305    for (int i = 0; i < workers; i++) {
306        ProcResults tmp_results;
307        pipes[i].recv(tmp_results);
308        tot_results = ProcResults::combine(tot_results, tmp_results);
309    }
310
311    // Kill all the workers.
312    cout << "killing workers" << endl;
313    signal_all(pipes);
314    for (int i = 0; i < workers; i++) {
315        int status;
316        wait(&status);
317        if (status != 0) {
318            cout << "nonzero child status" << status << endl;
319        }
320    }
321    if (training_round) {
322        // sets max_time_bucket to 2 * m_worst from the training round.
323        // Also needs to adjust time_per_bucket accordingly.
324        max_time_bucket = 2 * tot_results.m_worst;
325        time_per_bucket = max_time_bucket / num_buckets;
326        cout << "Max latency during training: " << tot_results.m_worst / 1.0E6 << "ms" << endl;
327    } else {
328            tot_results.dump();
329    }
330}
331
332int main(int argc, char *argv[])
333{
334    int workers = 2;
335    int iterations = 10000;
336    int payload_size = 0;
337    bool cs_pair = false;
338    bool training_round = false;
339    (void)argc;
340    (void)argv;
341
342    // Parse arguments.
343    for (int i = 1; i < argc; i++) {
344        if (string(argv[i]) == "--help") {
345            cout << "Usage: binderThroughputTest [OPTIONS]" << endl;
346            cout << "\t-i N    : Specify number of iterations." << endl;
347            cout << "\t-m N    : Specify expected max latency in microseconds." << endl;
348            cout << "\t-p      : Split workers into client/server pairs." << endl;
349            cout << "\t-s N    : Specify payload size." << endl;
350            cout << "\t-t N    : Run training round." << endl;
351            cout << "\t-w N    : Specify total number of workers." << endl;
352            return 0;
353        }
354        if (string(argv[i]) == "-w") {
355            workers = atoi(argv[i+1]);
356            i++;
357            continue;
358        }
359        if (string(argv[i]) == "-i") {
360            iterations = atoi(argv[i+1]);
361            i++;
362            continue;
363        }
364        if (string(argv[i]) == "-s") {
365            payload_size = atoi(argv[i+1]);
366            i++;
367        }
368        if (string(argv[i]) == "-p") {
369            // client/server pairs instead of spreading
370            // requests to all workers. If true, half
371            // the workers become clients and half servers
372            cs_pair = true;
373        }
374        if (string(argv[i]) == "-t") {
375            // Run one training round before actually collecting data
376            // to get an approximation of max latency.
377            training_round = true;
378        }
379        if (string(argv[i]) == "-m") {
380            // Caller specified the max latency in microseconds.
381            // No need to run training round in this case.
382            if (atoi(argv[i+1]) > 0) {
383                max_time_bucket = strtoull(argv[i+1], (char **)NULL, 10) * 1000;
384                time_per_bucket = max_time_bucket / num_buckets;
385                i++;
386            } else {
387                cout << "Max latency -m must be positive." << endl;
388                exit(EXIT_FAILURE);
389            }
390        }
391    }
392
393    if (training_round) {
394        cout << "Start training round" << endl;
395        run_main(iterations, workers, payload_size, cs_pair, training_round=true);
396        cout << "Completed training round" << endl << endl;
397    }
398
399    run_main(iterations, workers, payload_size, cs_pair);
400    return 0;
401}
402