1// Copyright (c) 2005, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8//     * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10//     * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14//     * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// ---
31// Author: Sanjay Ghemawat
32//
33// A fast map from addresses to values.  Assumes that addresses are
34// clustered.  The main use is intended to be for heap-profiling.
35// May be too memory-hungry for other uses.
36//
37// We use a user-defined allocator/de-allocator so that we can use
38// this data structure during heap-profiling.
39//
40// IMPLEMENTATION DETAIL:
41//
42// Some default definitions/parameters:
43//  * Block      -- aligned 128-byte region of the address space
44//  * Cluster    -- aligned 1-MB region of the address space
45//  * Block-ID   -- block-number within a cluster
46//  * Cluster-ID -- Starting address of cluster divided by cluster size
47//
48// We use a three-level map to represent the state:
49//  1. A hash-table maps from a cluster-ID to the data for that cluster.
50//  2. For each non-empty cluster we keep an array indexed by
51//     block-ID tht points to the first entry in the linked-list
52//     for the block.
53//  3. At the bottom, we keep a singly-linked list of all
54//     entries in a block (for non-empty blocks).
55//
56//    hash table
57//  +-------------+
58//  | id->cluster |---> ...
59//  |     ...     |
60//  | id->cluster |--->  Cluster
61//  +-------------+     +-------+    Data for one block
62//                      |  nil  |   +------------------------------------+
63//                      |   ----+---|->[addr/value]-->[addr/value]-->... |
64//                      |  nil  |   +------------------------------------+
65//                      |   ----+--> ...
66//                      |  nil  |
67//                      |  ...  |
68//                      +-------+
69//
70// Note that we require zero-bytes of overhead for completely empty
71// clusters.  The minimum space requirement for a cluster is the size
72// of the hash-table entry plus a pointer value for each block in
73// the cluster.  Empty blocks impose no extra space requirement.
74//
75// The cost of a lookup is:
76//      a. A hash-table lookup to find the cluster
77//      b. An array access in the cluster structure
78//      c. A traversal over the linked-list for a block
79
80#ifndef BASE_ADDRESSMAP_INL_H_
81#define BASE_ADDRESSMAP_INL_H_
82
83#include "config.h"
84#include <stddef.h>
85#include <string.h>
86#if defined HAVE_STDINT_H
87#include <stdint.h>             // to get uint16_t (ISO naming madness)
88#elif defined HAVE_INTTYPES_H
89#include <inttypes.h>           // another place uint16_t might be defined
90#else
91#include <sys/types.h>          // our last best hope
92#endif
93
94// This class is thread-unsafe -- that is, instances of this class can
95// not be accessed concurrently by multiple threads -- because the
96// callback function for Iterate() may mutate contained values. If the
97// callback functions you pass do not mutate their Value* argument,
98// AddressMap can be treated as thread-compatible -- that is, it's
99// safe for multiple threads to call "const" methods on this class,
100// but not safe for one thread to call const methods on this class
101// while another thread is calling non-const methods on the class.
102template <class Value>
103class AddressMap {
104 public:
105  typedef void* (*Allocator)(size_t size);
106  typedef void  (*DeAllocator)(void* ptr);
107  typedef const void* Key;
108
109  // Create an AddressMap that uses the specified allocator/deallocator.
110  // The allocator/deallocator should behave like malloc/free.
111  // For instance, the allocator does not need to return initialized memory.
112  AddressMap(Allocator alloc, DeAllocator dealloc);
113  ~AddressMap();
114
115  // If the map contains an entry for "key", return it. Else return NULL.
116  inline const Value* Find(Key key) const;
117  inline Value* FindMutable(Key key);
118
119  // Insert <key,value> into the map.  Any old value associated
120  // with key is forgotten.
121  void Insert(Key key, Value value);
122
123  // Remove any entry for key in the map.  If an entry was found
124  // and removed, stores the associated value in "*removed_value"
125  // and returns true.  Else returns false.
126  bool FindAndRemove(Key key, Value* removed_value);
127
128  // Similar to Find but we assume that keys are addresses of non-overlapping
129  // memory ranges whose sizes are given by size_func.
130  // If the map contains a range into which "key" points
131  // (at its start or inside of it, but not at the end),
132  // return the address of the associated value
133  // and store its key in "*res_key".
134  // Else return NULL.
135  // max_size specifies largest range size possibly in existence now.
136  typedef size_t (*ValueSizeFunc)(const Value& v);
137  const Value* FindInside(ValueSizeFunc size_func, size_t max_size,
138                          Key key, Key* res_key);
139
140  // Iterate over the address map calling 'callback'
141  // for all stored key-value pairs and passing 'arg' to it.
142  // We don't use full Closure/Callback machinery not to add
143  // unnecessary dependencies to this class with low-level uses.
144  template<class Type>
145  inline void Iterate(void (*callback)(Key, Value*, Type), Type arg) const;
146
147 private:
148  typedef uintptr_t Number;
149
150  // The implementation assumes that addresses inserted into the map
151  // will be clustered.  We take advantage of this fact by splitting
152  // up the address-space into blocks and using a linked-list entry
153  // for each block.
154
155  // Size of each block.  There is one linked-list for each block, so
156  // do not make the block-size too big.  Oterwise, a lot of time
157  // will be spent traversing linked lists.
158  static const int kBlockBits = 7;
159  static const int kBlockSize = 1 << kBlockBits;
160
161  // Entry kept in per-block linked-list
162  struct Entry {
163    Entry* next;
164    Key    key;
165    Value  value;
166  };
167
168  // We further group a sequence of consecutive blocks into a cluster.
169  // The data for a cluster is represented as a dense array of
170  // linked-lists, one list per contained block.
171  static const int kClusterBits = 13;
172  static const Number kClusterSize = 1 << (kBlockBits + kClusterBits);
173  static const int kClusterBlocks = 1 << kClusterBits;
174
175  // We use a simple chaining hash-table to represent the clusters.
176  struct Cluster {
177    Cluster* next;                      // Next cluster in hash table chain
178    Number   id;                        // Cluster ID
179    Entry*   blocks[kClusterBlocks];    // Per-block linked-lists
180  };
181
182  // Number of hash-table entries.  With the block-size/cluster-size
183  // defined above, each cluster covers 1 MB, so an 4K entry
184  // hash-table will give an average hash-chain length of 1 for 4GB of
185  // in-use memory.
186  static const int kHashBits = 12;
187  static const int kHashSize = 1 << 12;
188
189  // Number of entry objects allocated at a time
190  static const int ALLOC_COUNT = 64;
191
192  Cluster**     hashtable_;              // The hash-table
193  Entry*        free_;                   // Free list of unused Entry objects
194
195  // Multiplicative hash function:
196  // The value "kHashMultiplier" is the bottom 32 bits of
197  //    int((sqrt(5)-1)/2 * 2^32)
198  // This is a good multiplier as suggested in CLR, Knuth.  The hash
199  // value is taken to be the top "k" bits of the bottom 32 bits
200  // of the muliplied value.
201  static const uint32_t kHashMultiplier = 2654435769u;
202  static int HashInt(Number x) {
203    // Multiply by a constant and take the top bits of the result.
204    const uint32_t m = static_cast<uint32_t>(x) * kHashMultiplier;
205    return static_cast<int>(m >> (32 - kHashBits));
206  }
207
208  // Find cluster object for specified address.  If not found
209  // and "create" is true, create the object.  If not found
210  // and "create" is false, return NULL.
211  //
212  // This method is bitwise-const if create is false.
213  Cluster* FindCluster(Number address, bool create) {
214    // Look in hashtable
215    const Number cluster_id = address >> (kBlockBits + kClusterBits);
216    const int h = HashInt(cluster_id);
217    for (Cluster* c = hashtable_[h]; c != NULL; c = c->next) {
218      if (c->id == cluster_id) {
219        return c;
220      }
221    }
222
223    // Create cluster if necessary
224    if (create) {
225      Cluster* c = New<Cluster>(1);
226      c->id = cluster_id;
227      c->next = hashtable_[h];
228      hashtable_[h] = c;
229      return c;
230    }
231    return NULL;
232  }
233
234  // Return the block ID for an address within its cluster
235  static int BlockID(Number address) {
236    return (address >> kBlockBits) & (kClusterBlocks - 1);
237  }
238
239  //--------------------------------------------------------------
240  // Memory management -- we keep all objects we allocate linked
241  // together in a singly linked list so we can get rid of them
242  // when we are all done.  Furthermore, we allow the client to
243  // pass in custom memory allocator/deallocator routines.
244  //--------------------------------------------------------------
245  struct Object {
246    Object* next;
247    // The real data starts here
248  };
249
250  Allocator     alloc_;                 // The allocator
251  DeAllocator   dealloc_;               // The deallocator
252  Object*       allocated_;             // List of allocated objects
253
254  // Allocates a zeroed array of T with length "num".  Also inserts
255  // the allocated block into a linked list so it can be deallocated
256  // when we are all done.
257  template <class T> T* New(int num) {
258    void* ptr = (*alloc_)(sizeof(Object) + num*sizeof(T));
259    memset(ptr, 0, sizeof(Object) + num*sizeof(T));
260    Object* obj = reinterpret_cast<Object*>(ptr);
261    obj->next = allocated_;
262    allocated_ = obj;
263    return reinterpret_cast<T*>(reinterpret_cast<Object*>(ptr) + 1);
264  }
265};
266
267// More implementation details follow:
268
269template <class Value>
270AddressMap<Value>::AddressMap(Allocator alloc, DeAllocator dealloc)
271  : free_(NULL),
272    alloc_(alloc),
273    dealloc_(dealloc),
274    allocated_(NULL) {
275  hashtable_ = New<Cluster*>(kHashSize);
276}
277
278template <class Value>
279AddressMap<Value>::~AddressMap() {
280  // De-allocate all of the objects we allocated
281  for (Object* obj = allocated_; obj != NULL; /**/) {
282    Object* next = obj->next;
283    (*dealloc_)(obj);
284    obj = next;
285  }
286}
287
288template <class Value>
289inline const Value* AddressMap<Value>::Find(Key key) const {
290  return const_cast<AddressMap*>(this)->FindMutable(key);
291}
292
293template <class Value>
294inline Value* AddressMap<Value>::FindMutable(Key key) {
295  const Number num = reinterpret_cast<Number>(key);
296  const Cluster* const c = FindCluster(num, false/*do not create*/);
297  if (c != NULL) {
298    for (Entry* e = c->blocks[BlockID(num)]; e != NULL; e = e->next) {
299      if (e->key == key) {
300        return &e->value;
301      }
302    }
303  }
304  return NULL;
305}
306
307template <class Value>
308void AddressMap<Value>::Insert(Key key, Value value) {
309  const Number num = reinterpret_cast<Number>(key);
310  Cluster* const c = FindCluster(num, true/*create*/);
311
312  // Look in linked-list for this block
313  const int block = BlockID(num);
314  for (Entry* e = c->blocks[block]; e != NULL; e = e->next) {
315    if (e->key == key) {
316      e->value = value;
317      return;
318    }
319  }
320
321  // Create entry
322  if (free_ == NULL) {
323    // Allocate a new batch of entries and add to free-list
324    Entry* array = New<Entry>(ALLOC_COUNT);
325    for (int i = 0; i < ALLOC_COUNT-1; i++) {
326      array[i].next = &array[i+1];
327    }
328    array[ALLOC_COUNT-1].next = free_;
329    free_ = &array[0];
330  }
331  Entry* e = free_;
332  free_ = e->next;
333  e->key = key;
334  e->value = value;
335  e->next = c->blocks[block];
336  c->blocks[block] = e;
337}
338
339template <class Value>
340bool AddressMap<Value>::FindAndRemove(Key key, Value* removed_value) {
341  const Number num = reinterpret_cast<Number>(key);
342  Cluster* const c = FindCluster(num, false/*do not create*/);
343  if (c != NULL) {
344    for (Entry** p = &c->blocks[BlockID(num)]; *p != NULL; p = &(*p)->next) {
345      Entry* e = *p;
346      if (e->key == key) {
347        *removed_value = e->value;
348        *p = e->next;         // Remove e from linked-list
349        e->next = free_;      // Add e to free-list
350        free_ = e;
351        return true;
352      }
353    }
354  }
355  return false;
356}
357
358template <class Value>
359const Value* AddressMap<Value>::FindInside(ValueSizeFunc size_func,
360                                           size_t max_size,
361                                           Key key,
362                                           Key* res_key) {
363  const Number key_num = reinterpret_cast<Number>(key);
364  Number num = key_num;  // we'll move this to move back through the clusters
365  while (1) {
366    const Cluster* c = FindCluster(num, false/*do not create*/);
367    if (c != NULL) {
368      while (1) {
369        const int block = BlockID(num);
370        bool had_smaller_key = false;
371        for (const Entry* e = c->blocks[block]; e != NULL; e = e->next) {
372          const Number e_num = reinterpret_cast<Number>(e->key);
373          if (e_num <= key_num) {
374            if (e_num == key_num  ||  // to handle 0-sized ranges
375                key_num < e_num + (*size_func)(e->value)) {
376              *res_key = e->key;
377              return &e->value;
378            }
379            had_smaller_key = true;
380          }
381        }
382        if (had_smaller_key) return NULL;  // got a range before 'key'
383                                           // and it did not contain 'key'
384        if (block == 0) break;
385        // try address-wise previous block
386        num |= kBlockSize - 1;  // start at the last addr of prev block
387        num -= kBlockSize;
388        if (key_num - num > max_size) return NULL;
389      }
390    }
391    if (num < kClusterSize) return NULL;  // first cluster
392    // go to address-wise previous cluster to try
393    num |= kClusterSize - 1;  // start at the last block of previous cluster
394    num -= kClusterSize;
395    if (key_num - num > max_size) return NULL;
396      // Having max_size to limit the search is crucial: else
397      // we have to traverse a lot of empty clusters (or blocks).
398      // We can avoid needing max_size if we put clusters into
399      // a search tree, but performance suffers considerably
400      // if we use this approach by using stl::set.
401  }
402}
403
404template <class Value>
405template <class Type>
406inline void AddressMap<Value>::Iterate(void (*callback)(Key, Value*, Type),
407                                       Type arg) const {
408  // We could optimize this by traversing only non-empty clusters and/or blocks
409  // but it does not speed up heap-checker noticeably.
410  for (int h = 0; h < kHashSize; ++h) {
411    for (const Cluster* c = hashtable_[h]; c != NULL; c = c->next) {
412      for (int b = 0; b < kClusterBlocks; ++b) {
413        for (Entry* e = c->blocks[b]; e != NULL; e = e->next) {
414          callback(e->key, &e->value, arg);
415        }
416      }
417    }
418  }
419}
420
421#endif  // BASE_ADDRESSMAP_INL_H_
422