1#include <stdio.h>
2#include <vector>
3#include <pthread.h>
4#include <malloc.h>
5#include <algorithm>
6
7using namespace std;
8
9const size_t kNumThreds = 16;
10const size_t kNumIters = 1 << 23;
11
12inline void break_optimization(void *arg) {
13  __asm__ __volatile__("" : : "r" (arg) : "memory");
14}
15
16__attribute__((noinline))
17static void *MallocThread(void *t) {
18  size_t total_malloced = 0, total_freed = 0;
19  size_t max_in_use = 0;
20  size_t tid = reinterpret_cast<size_t>(t);
21  vector<pair<char *, size_t> > allocated;
22  allocated.reserve(kNumIters);
23  for (size_t i = 1; i < kNumIters; i++) {
24    if ((i % (kNumIters / 4)) == 0 && tid == 0)
25      fprintf(stderr, "   T[%ld] iter %ld\n", tid, i);
26    bool allocate = (i % 5) <= 2;  // 60% malloc, 40% free
27    if (i > kNumIters / 4)
28      allocate = i % 2;  // then switch to 50% malloc, 50% free
29    if (allocate) {
30      size_t size = 1 + (i % 200);
31      if ((i % 10001) == 0)
32        size *= 4096;
33      total_malloced += size;
34      char *x = new char[size];
35      x[0] = x[size - 1] = x[size / 2] = 0;
36      allocated.push_back(make_pair(x, size));
37      max_in_use = max(max_in_use, total_malloced - total_freed);
38    } else {
39      if (allocated.empty()) continue;
40      size_t slot = i % allocated.size();
41      char *p = allocated[slot].first;
42      p[0] = 0;  // emulate last user touch of the block
43      size_t size = allocated[slot].second;
44      total_freed += size;
45      swap(allocated[slot], allocated.back());
46      allocated.pop_back();
47      delete [] p;
48    }
49  }
50  if (tid == 0)
51    fprintf(stderr, "   T[%ld] total_malloced: %ldM in use %ldM max %ldM\n",
52           tid, total_malloced >> 20, (total_malloced - total_freed) >> 20,
53           max_in_use >> 20);
54  for (size_t i = 0; i < allocated.size(); i++)
55    delete [] allocated[i].first;
56  return 0;
57}
58
59template <int depth>
60struct DeepStack {
61  __attribute__((noinline))
62  static void *run(void *t) {
63    break_optimization(0);
64    DeepStack<depth - 1>::run(t);
65    break_optimization(0);
66    return 0;
67  }
68};
69
70template<>
71struct DeepStack<0> {
72  static void *run(void *t) {
73    MallocThread(t);
74    return 0;
75  }
76};
77
78// Build with -Dstandalone_malloc_test=main to make it a separate program.
79int standalone_malloc_test() {
80  pthread_t t[kNumThreds];
81  for (size_t i = 0; i < kNumThreds; i++)
82    pthread_create(&t[i], 0, DeepStack<200>::run, reinterpret_cast<void *>(i));
83  for (size_t i = 0; i < kNumThreds; i++)
84    pthread_join(t[i], 0);
85  malloc_stats();
86  return 0;
87}
88