odb.h revision 10e23eebca4175a8dfe3a788b2bebacb1fcfce54
1/**
2 * @file odb.h
3 * This file contains various definitions and interface for management
4 * of in-memory, through mmaped file, growable hash table, that stores
5 * sample files.
6 *
7 * @remark Copyright 2002 OProfile authors
8 * @remark Read the file COPYING
9 *
10 * @author Philippe Elie
11 */
12
13#ifndef ODB_HASH_H
14#define ODB_HASH_H
15
16#include <stddef.h>
17#include <stdint.h>
18
19#include "op_list.h"
20
21/** the type of key. 64-bit because CG needs 32-bit pair {from,to} */
22typedef uint64_t odb_key_t;
23/** the type of an information in the database */
24typedef unsigned int odb_value_t;
25/** the type of index (node number), list are implemented through index */
26typedef unsigned int odb_index_t;
27/** the type store node number */
28typedef odb_index_t odb_node_nr_t;
29/** store the hash mask, hash table size are always power of two */
30typedef odb_index_t odb_hash_mask_t;
31
32/* there is (bucket factor * nr node) entry in hash table, this can seem
33 * excessive but hash coding eip don't give a good distributions and our
34 * goal is to get a O(1) amortized insert time. bucket factor must be a
35 * power of two. FIXME: see big comment in odb_hash_add_node, you must
36 * re-enable zeroing hash table if BUCKET_FACTOR > 2 (roughly exact, you
37 * want to read the comment in odb_hash_add_node() if you tune this define)
38 */
39#define BUCKET_FACTOR 1
40
41/** a db hash node */
42typedef struct {
43	odb_key_t key;			/**< eip */
44	odb_value_t value;		/**< samples count */
45	odb_index_t next;		/**< next entry for this bucket */
46} odb_node_t;
47
48/** the minimal information which must be stored in the file to reload
49 * properly the data base, following this header is the node array then
50 * the hash table (when growing we avoid to copy node array)
51 */
52typedef struct {
53	odb_node_nr_t size;		/**< in node nr (power of two) */
54	odb_node_nr_t current_size;	/**< nr used node + 1, node 0 unused */
55	int padding[6];			/**< for padding and future use */
56} odb_descr_t;
57
58/** a "database". this is an in memory only description.
59 *
60 * We allow to manage a database inside a mapped file with an "header" of
61 * unknown size so odb_open get a parameter to specify the size of this header.
62 * A typical use is:
63 *
64 * struct header { int etc; ... };
65 * odb_open(&hash, filename, ODB_RW, sizeof(header));
66 * so on this library have no dependency on the header type.
67 *
68 * the internal memory layout from base_memory is:
69 *  the unknown header (sizeof_header)
70 *  odb_descr_t
71 *  the node array: (descr->size * sizeof(odb_node_t) entries
72 *  the hash table: array of odb_index_t indexing the node array
73 *    (descr->size * BUCKET_FACTOR) entries
74 */
75typedef struct odb_data {
76	odb_node_t * node_base;		/**< base memory area of the page */
77	odb_index_t * hash_base;	/**< base memory of hash table */
78	odb_descr_t * descr;		/**< the current state of database */
79	odb_hash_mask_t hash_mask;	/**< == descr->size - 1 */
80	unsigned int sizeof_header;	/**< from base_memory to odb header */
81	unsigned int offset_node;	/**< from base_memory to node array */
82	void * base_memory;		/**< base memory of the maped memory */
83	int fd;				/**< mmaped memory file descriptor */
84	char * filename;                /**< full path name of sample file */
85	int ref_count;                  /**< reference count */
86	struct list_head list;          /**< hash bucket list */
87} odb_data_t;
88
89typedef struct {
90	odb_data_t * data;
91} odb_t;
92
93#ifdef __cplusplus
94extern "C" {
95#endif
96
97/* db_manage.c */
98
99/** how to open the DB file */
100enum odb_rw {
101	ODB_RDONLY = 0,	/**< open for read only */
102	ODB_RDWR = 1	/**< open for read and/or write */
103};
104
105/**
106 * odb_init - initialize a DB file
107 * @param odb the DB file to init
108 */
109void odb_init(odb_t * odb);
110
111/**
112 * odb_open - open a DB file
113 * @param odb the data base object to setup
114 * @param filename the filename where go the maped memory
115 * @param rw \enum ODB_RW if opening for writing, else \enum ODB_RDONLY
116 * @param sizeof_header size of the file header if any
117 *
118 * The sizeof_header parameter allows the data file to have a header
119 * at the start of the file which is skipped.
120 * odb_open() always preallocate a few number of pages.
121 * returns 0 on success, errno on failure
122 */
123int odb_open(odb_t * odb, char const * filename,
124             enum odb_rw rw, size_t sizeof_header);
125
126/** Close the given ODB file */
127void odb_close(odb_t * odb);
128
129/** return the number of times this sample file is open */
130int odb_open_count(odb_t const * odb);
131
132/** return the start of the mapped data */
133void * odb_get_data(odb_t * odb);
134
135/** issue a msync on the used size of the mmaped file */
136void odb_sync(odb_t const * odb);
137
138/**
139 * grow the hashtable in such way current_size is the index of the first free
140 * node. Take care all node pointer can be invalidated by this call.
141 *
142 * Node allocation is done in a two step way 1st) ensure a free node exist
143 * eventually, caller can setup it, 2nd) commit the node allocation with
144 * odb_commit_reservation().
145 * This is done in this way to ensure node setup is visible from another
146 * process like pp tools in an atomic way.
147 *
148 * returns 0 on success, non zero on failure in this case this function do
149 * nothing and errno is set by the first libc call failure allowing to retry
150 * after cleanup some program resource.
151 */
152int odb_grow_hashtable(odb_data_t * data);
153/**
154 * commit a previously successfull node reservation. This can't fail.
155 */
156static __inline void odb_commit_reservation(odb_data_t * data)
157{
158	++data->descr->current_size;
159}
160
161/** "immpossible" node number to indicate an error from odb_hash_add_node() */
162#define ODB_NODE_NR_INVALID ((odb_node_nr_t)-1)
163
164/* db_debug.c */
165/** check that the hash is well built */
166int odb_check_hash(odb_t const * odb);
167
168/* db_stat.c */
169typedef struct odb_hash_stat_t odb_hash_stat_t;
170odb_hash_stat_t * odb_hash_stat(odb_t const * odb);
171void odb_hash_display_stat(odb_hash_stat_t const * stats);
172void odb_hash_free_stat(odb_hash_stat_t * stats);
173
174/* db_insert.c */
175/** update info at key by incrementing its associated value by one,
176 * if the key does not exist a new node is created and the value associated
177 * is set to one.
178 *
179 * returns EXIT_SUCCESS on success, EXIT_FAILURE on failure
180 */
181int odb_update_node(odb_t * odb, odb_key_t key);
182
183/** Add a new node w/o regarding if a node with the same key already exists
184 *
185 * returns EXIT_SUCCESS on success, EXIT_FAILURE on failure
186 */
187int odb_add_node(odb_t * odb, odb_key_t key, odb_value_t value);
188
189/* db_travel.c */
190/**
191 * return a base pointer to the node array and number of node in this array
192 * caller then will iterate through:
193 *
194 * odb_node_nr_t node_nr, pos;
195 * odb_node_t * node = odb_get_iterator(odb, &node_nr);
196 *	for ( pos = 0 ; pos < node_nr ; ++pos)
197 *		// do something
198 *
199 *  note than caller does not need to filter nil key as it's a valid key,
200 * The returned range is all valid (i.e. should never contain zero value).
201 */
202odb_node_t * odb_get_iterator(odb_t const * odb, odb_node_nr_t * nr);
203
204static __inline unsigned int
205odb_do_hash(odb_data_t const * data, odb_key_t value)
206{
207	/* FIXME: better hash for eip value, needs to instrument code
208	 * and do a lot of tests ... */
209	/* trying to combine high order bits his a no-op: inside a binary image
210	 * high order bits don't vary a lot, hash table start with 7 bits mask
211	 * so this hash coding use bits 0-7, 8-15. Hash table is stored in
212	 * files avoiding to rebuilding them at profiling re-start so
213	 * on changing do_hash() change the file format!
214	 */
215	uint32_t temp = (value >> 32) ^ value;
216	return ((temp << 0) ^ (temp >> 8)) & data->hash_mask;
217}
218
219#ifdef __cplusplus
220}
221#endif
222
223#endif /* !ODB_H */
224