1/* -*- mode: C; c-basic-offset: 3; -*- */
2
3/*--------------------------------------------------------------------*/
4/*--- Segment name management                 aspacemgr-segnames.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of Valgrind, a dynamic binary instrumentation
9   framework.
10
11   Copyright (C) 2015-2015  Florian Krohm
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26   02111-1307, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29*/
30
31/* Segment names are stored in a string table.
32
33   The string table is organised into slots of varying length. Slots are
34   adjacent and there are no holes between slots.
35   A slot consists of two parts:
36
37   (1) a fixed size overhead of length 4 bytes
38   (2) a variable size payload of up to 65535 bytes
39
40   The segment name is stored in the payload area. Therefore:
41   a segment name cannot be longer than 65535 bytes including the '\0'
42   terminator. This looks like a reasonable limitation.
43
44   Overall slot layout:
45
46       |          4 bytes            |    max 65535 bytes      |
47       +-----------------------------+-------------------------+
48       |          overhead           |         payload         |
49       +-----------------------------+-------------------------+
50       ^                             ^
51       |                             |
52      -4                             +----- seg->fnIdx
53
54   Each slot is uniquely identified by an index which points to the first
55   byte of the payload area. It is this value that is stored in seg->fnIdx.
56   Note, that this value is at least 4.
57
58   A slot either holds a string or it is free. The status of a slot is
59   identified by the leftmost bit in the overhead field, the so called F-bit.
60   F-bit == 1 means that slot is free; otherwise it is occupied and holds a
61   string.
62
63   Slot containing a string (segment name):
64
65  bits | 1 |      15      |    16    |
66       +---+--------------+----------+-------------------------+
67       | 0 |   refcount   | slotsize | the string including \0 |
68       +---+--------------+----------+-------------------------+
69       ^                  ^          ^
70       |                  |          |
71      -4                 -2          +----- seg->fnIdx
72
73   Segment names are reference counted. 15 bits are available which allows
74   for up to 32767 references. If the string is referenced more than 32767
75   times, the reference count will be frozen and the slot can never
76   become free. I'm not unduly concerned.
77   Two bytes are reserved to hold the size of the slot. Well, it's actually
78   the size of the payload aread (i.e. the size of the slot minus the
79   overhead). Ah well -- the name sticks.
80   With two bytes to store the size, the payload area can be at most 65535
81   bytes large.
82
83   A free slot looks like this:
84
85  bits | 1 |           31            |    16    |
86       +---+-------------------------+----------+--------------+
87       | 1 | index of next free slot | slotsize | .. unused .. |
88       +---+-------------------------+----------+--------------+
89       ^                             ^
90       |                             |
91      -4                             +----- seg->fnIdx
92
93   Free slots are chained together in a singly linked list. An index of
94   zero indicates the end of the chain. Note that zero cannot conflict
95   with an index into the string table as the minumum index is at least
96   four (see above).
97
98   The typical way to traverse the segment names is:
99
100   for (ix = overhead; (size = get_slotsize(ix)) != 0; ix += size + overhead) {
101      if (is_freeslot(ix))
102         do this
103      else
104         do that
105   }
106
107   Important detail: there is a sentinel at the end of the list, namely a
108   slot with a zero-sized payload area.
109
110   Whenever a new segment name needs to be stashed away, the list of free
111   slots is traversed and the first slot which is large enough is being taken
112   (first fit). There will be no splitting of slots, as that complicates
113   matters and without slot coalescing would lead to memory fragmentation.
114   So we leave it as is until a use case comes up that needs something better.
115*/
116
117#include "pub_core_basics.h"     // types
118#include "priv_aspacemgr.h"
119
120// A few constants.
121enum {
122   refcount_size = sizeof(UShort),
123   slotsize_size = sizeof(UShort),
124   overhead = refcount_size + slotsize_size,
125   max_refcount  = 0x7fff,      // 2 bytes - F-bit
126   max_slotsize  = 0xffff,      // 2 bytes
127   max_slotindex = 0x7fffffff,  // 4 bytes - F-bit
128   fbit_mask = 0x80,
129   end_of_chain = 0
130};
131
132/* The old segname implementation allowed for 1000 names on Android and
133   6000 names on other platforms. Each name was allowed to be 1000 characters
134   long. That was very wasteful. */
135#define VG_TABLE_SIZE 1000000
136
137/* String table for segment names */
138
139static HChar segnames[VG_TABLE_SIZE];  /* her majesty, the string table */
140static SizeT segnames_used = 0;        /* number of bytes used */
141static UInt  num_segnames = 0;         /* number of names in string table */
142static UInt  num_slots = 0;            /* number of slots in string table */
143static UInt  freeslot_chain = end_of_chain;
144
145static Bool
146is_freeslot(UInt ix)
147{
148   aspacem_assert(ix >= overhead && ix <= segnames_used);
149   return (segnames[ix - 4] & fbit_mask) != 0;
150}
151
152static void
153put_slotindex(UInt ix, UInt slotindex)
154{
155   aspacem_assert(ix >= overhead && ix <= segnames_used);
156   if (slotindex != 0)
157      aspacem_assert(slotindex >= overhead && slotindex <= segnames_used);
158
159   slotindex |= fbit_mask << 24;
160   segnames[ix - 1] = slotindex & 0xFF;   slotindex >>= 8;
161   segnames[ix - 2] = slotindex & 0xFF;   slotindex >>= 8;
162   segnames[ix - 3] = slotindex & 0xFF;   slotindex >>= 8;
163   segnames[ix - 4] = slotindex & 0xFF;
164}
165
166static UInt
167get_slotindex(UInt ix)
168{
169   aspacem_assert(ix >= overhead && ix <= segnames_used);
170   aspacem_assert(is_freeslot(ix));
171
172   // Avoid unexpected sign extension
173   const UChar *unames = (const UChar *)segnames;
174
175   UInt slotindex = 0;
176   slotindex |= unames[ix - 4];   slotindex <<= 8;
177   slotindex |= unames[ix - 3];   slotindex <<= 8;
178   slotindex |= unames[ix - 2];   slotindex <<= 8;
179   slotindex |= unames[ix - 1];
180
181   return slotindex & max_slotindex;   // removes the F-bit
182}
183
184static void
185put_slotsize(UInt ix, UInt size)
186{
187   aspacem_assert(ix >= overhead && ix <= segnames_used);
188   aspacem_assert(size <= max_slotsize);
189   segnames[ix - 1] = size & 0xff;
190   segnames[ix - 2] = size >> 8;
191}
192
193static UInt
194get_slotsize(UInt ix)
195{
196   aspacem_assert(ix >= overhead && ix <= segnames_used);
197
198   // Avoid unexpected sign extension
199   const UChar *unames = (const UChar *)segnames;
200   if (is_freeslot(ix))
201      return (unames[ix] << 8) | unames[ix+1];
202   else
203      return (unames[ix - 2] << 8) | unames[ix - 1];
204}
205
206static void
207put_refcount(UInt ix, UInt rc)
208{
209   aspacem_assert(ix >= overhead && ix <= segnames_used);
210   aspacem_assert(rc <= max_refcount);
211   // rc <= max_refcount ensures that the F-bit is zero
212   segnames[ix - 3] = rc & 0xff;
213   segnames[ix - 4] = rc >> 8;
214}
215
216static UInt
217get_refcount(UInt ix)
218{
219   aspacem_assert(ix >= overhead && ix <= segnames_used);
220   // must not be a free slot
221   aspacem_assert(! is_freeslot(ix));
222
223   // Avoid unexpected sign extension
224   const UChar *unames = (const UChar *)segnames;
225   return (unames[ix - 4] << 8) | unames[ix - 3];
226}
227
228static void
229inc_refcount(UInt ix)
230{
231   aspacem_assert(ix >= overhead && ix <= segnames_used);
232   UInt rc = get_refcount(ix);
233   if (rc != max_refcount)
234      put_refcount(ix, rc + 1);
235}
236
237static void
238dec_refcount(UInt ix)
239{
240   aspacem_assert(ix >= overhead && ix <= segnames_used);
241   UInt rc = get_refcount(ix);
242   aspacem_assert(rc > 0);
243   if (rc != max_refcount) {
244      --rc;
245      if (rc != 0) {
246         put_refcount(ix, rc);
247      } else {
248         UInt size = get_slotsize(ix);
249         /* Chain this slot in the freelist */
250         put_slotindex(ix, freeslot_chain);
251         get_slotindex(ix);
252         put_slotsize(ix + slotsize_size, size);
253         get_slotindex(ix);
254         freeslot_chain = ix;
255         --num_segnames;
256         if (0) VG_(am_show_nsegments)(0, "AFTER DECREASE rc -> 0");
257      }
258   }
259}
260
261static void
262put_sentinel(UInt ix)
263{
264   aspacem_assert(ix >= overhead && ix <= segnames_used);
265
266   put_refcount(ix, 0);
267   put_slotsize(ix, 0);
268}
269
270
271/* Searches the string table to find an index for the given name.
272   If none is found, an index is allocated and the name stored.
273   If running ouf of memory, return -1. */
274Int
275ML_(am_allocate_segname)(const HChar *name)
276{
277   UInt len, ix, size, next_freeslot;
278
279   aspacem_assert(name);
280
281   if (0) VG_(debugLog)(0, "aspacem", "allocate_segname %s\n", name);
282
283   len = VG_(strlen)(name);
284
285   /* First see if we already have the name. */
286   for (ix = overhead; (size = get_slotsize(ix)) != 0; ix += size + overhead) {
287      if (is_freeslot(ix)) continue;
288      if (VG_(strcmp)(name, segnames + ix) == 0) {
289         inc_refcount(ix);
290         return ix;
291      }
292   }
293
294   /* Is there a free slot in the string table from a previously "freed"
295      segment name ? */
296   Int prev;
297   for (prev = -1, ix = freeslot_chain; ix != end_of_chain;
298        prev = ix, ix = next_freeslot) {
299      next_freeslot = get_slotindex(ix);  // next in chain
300      size = get_slotsize(ix);
301
302      if (size >= len + 1) {
303         /* Note, if the size of the slot is a lot larger than the length
304            of the string we're about to store in it, we could split the
305            slot into two. But that complicates matters and as we're not
306            doing any coalescing of adjacent free slots this could lead to
307            fragmentation. */
308         if (prev == -1)
309            freeslot_chain = next_freeslot;
310         else
311            put_slotindex(prev, next_freeslot);
312         put_refcount(ix, 1);
313         put_slotsize(ix, size);
314         VG_(strcpy)(segnames + ix, name);
315         ++num_segnames;
316         return ix;
317      }
318   }
319
320   /* We need to add a new name. */
321
322   /* Note, that we need at least two bytes in the payload. The reason is
323      that the payload area will be used to store the size of the slot when
324      the slot is on the freelist. */
325   if (len == 0) len = 1;
326
327   /* Is there enough room in the string table? The OVERHEAD is for the
328      sentinel following the payload of new slot. */
329   SizeT need = len + 1 + overhead;
330   if (need > (sizeof segnames) - segnames_used) {
331      return -1;
332   }
333
334   ++num_segnames;
335   ++num_slots;
336
337   /* copy it in */
338   ix = segnames_used;
339   put_refcount(ix, 1);
340   put_slotsize(ix, len + 1);
341   VG_(strcpy)(segnames + ix, name);
342   segnames_used += need;
343
344   /* Add sentinel at end of segment name list */
345   put_sentinel(segnames_used);
346
347   return ix;
348}
349
350/* Debugging output */
351void
352ML_(am_show_segnames)(Int logLevel, const HChar *prefix)
353{
354   UInt size, ix, i;
355
356   VG_(debugLog)(logLevel, "aspacem", "%u segment names in %u slots\n",
357                 num_segnames, num_slots);
358
359   if (freeslot_chain == end_of_chain)
360      VG_(debugLog)(logLevel, "aspacem", "freelist is empty\n");
361   else
362      VG_(debugLog)(logLevel, "aspacem", "freelist begins at %u\n",
363                    freeslot_chain);
364   for (i = 0, ix = overhead; (size = get_slotsize(ix)) != 0;
365        ix += size + overhead, ++i) {
366      if (is_freeslot(ix))
367         VG_(debugLog)(logLevel, "aspacem",
368                       "(%u,%u,0) [free slot: size=%u  next=%u]\n", i, ix,
369                       get_slotsize(ix), get_slotindex(ix));
370      else
371         VG_(debugLog)(logLevel, "aspacem",
372                       "(%u,%u,%u) %s\n", i, ix, get_refcount(ix),
373                       segnames + ix);
374   }
375}
376
377/* Returns a sequence number for the fnIdx position in segnames.
378   Used in aspacemgr debug output to associate a segment with
379   a segment name. */
380Int
381ML_(am_segname_get_seqnr)(Int fnIdx)
382{
383   SizeT ix, size;
384   Int seqnr = -1;
385
386   if (fnIdx == -1) return -1;   // shortcut
387
388   for (ix = overhead; (size = get_slotsize(ix)) != 0; ix += size + overhead) {
389      seqnr++;
390      if (ix == fnIdx)
391         return seqnr;
392   }
393
394   // We should always find the given index; something's busted
395   aspacem_assert(0);
396   return -1;
397}
398
399/* Initialise the string table for segment names. It contains an empty
400   string which is not referenced. */
401void
402ML_(am_segnames_init)(void)
403{
404   aspacem_assert(sizeof segnames >= overhead);
405
406   segnames_used = overhead;
407   put_sentinel(segnames_used);
408}
409
410/* Increase reference count of segment name identified by IX. */
411void
412ML_(am_inc_refcount)(Int ix)
413{
414   if (ix != -1)
415      inc_refcount(ix);
416}
417
418/* Decrease reference count of segment name identified by IX. */
419void
420ML_(am_dec_refcount)(Int ix)
421{
422   if (ix != -1)
423      dec_refcount(ix);
424}
425
426Bool
427ML_(am_sane_segname)(Int ix)
428{
429   return ix == -1 || (ix >= overhead && ix < segnames_used);
430}
431
432const HChar *
433ML_(am_get_segname)(Int ix)
434{
435   return (ix == -1) ? NULL : segnames + ix;
436}
437
438/*--------------------------------------------------------------------*/
439/*--- end                                                          ---*/
440/*--------------------------------------------------------------------*/
441