1
2/*--------------------------------------------------------------------*/
3/*--- Demangling of C++ mangled names.                  demangle.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2000-2013 Julian Seward
11      jseward@acm.org
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26   02111-1307, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29*/
30
31#include "pub_core_basics.h"
32#include "pub_core_demangle.h"
33#include "pub_core_libcassert.h"
34#include "pub_core_libcbase.h"
35#include "pub_core_libcprint.h"
36#include "pub_core_mallocfree.h"
37#include "pub_core_options.h"
38
39#include "vg_libciface.h"
40#include "demangle.h"
41
42/* The demangler's job is to take a raw symbol name and turn it into
43   something a Human Bean can understand.  There are two levels of
44   mangling.
45
46   1. First, C++ names are mangled by the compiler.  So we'll have to
47      undo that.
48
49   2. Optionally, in relatively rare cases, the resulting name is then
50      itself encoded using Z-escaping (see pub_core_redir.h) so as to
51      become part of a redirect-specification.
52
53   Therefore, VG_(demangle) first tries to undo (2).  If successful,
54   the soname part is discarded (humans don't want to see that).
55   Then, it tries to undo (1) (using demangling code from GNU/FSF).
56
57   Finally, change the name of all symbols which are known to be
58   functions below main() to "(below main)".  This helps reduce
59   variability of stack traces, something which has been a problem for
60   the testsuite for a long time.
61
62   --------
63   If do_cxx_demangle == True, does all the above stages:
64   - undo (2) [Z-encoding]
65   - undo (1) [C++ mangling]
66   - do the below-main hack
67
68   If do_cxx_demangle == False, the middle stage is skipped:
69   - undo (2) [Z-encoding]
70   - do the below-main hack
71*/
72
73/* Note that the C++ demangler is from GNU libiberty and is almost
74   completely unmodified.  We use vg_libciface.h as a way to
75   impedance-match the libiberty code into our own framework.
76
77   The current code is from libiberty in the gcc tree, gcc svn
78   r181975, dated 12 Dec 2011 (when the gcc trunk was in Stage 3
79   leading up to a gcc-4.7 release).  As of r141363, libiberty is LGPL
80   2.1, which AFAICT is compatible with "GPL 2 or later" and so is OK
81   for inclusion in Valgrind.
82
83   To update to a newer libiberty, it might be simplest to svn diff
84   the gcc tree libibery against r181975 and then apply those diffs
85   here. */
86
87/* This is the main, standard demangler entry point. */
88
89void VG_(demangle) ( Bool do_cxx_demangling, Bool do_z_demangling,
90                     HChar* orig, HChar* result, Int result_size )
91{
92#  define N_ZBUF 4096
93   HChar* demangled = NULL;
94   HChar z_demangled[N_ZBUF];
95
96   /* Possibly undo (2) */
97   /* Z-Demangling was requested.
98      The fastest way to see if it's a Z-mangled name is just to attempt
99      to Z-demangle it (with NULL for the soname buffer, since we're not
100      interested in that). */
101   if (do_z_demangling) {
102      if (VG_(maybe_Z_demangle)( orig, NULL,0,/*soname*/
103                                 z_demangled, N_ZBUF, NULL, NULL, NULL )) {
104         orig = z_demangled;
105      }
106   }
107
108   /* Possibly undo (1) */
109   if (do_cxx_demangling && VG_(clo_demangle)) {
110      demangled = ML_(cplus_demangle) ( orig, DMGL_ANSI | DMGL_PARAMS );
111   } else {
112      demangled = NULL;
113   }
114   if (demangled) {
115      VG_(strncpy_safely)(result, demangled, result_size);
116      VG_(arena_free) (VG_AR_DEMANGLE, demangled);
117   } else {
118      VG_(strncpy_safely)(result, orig, result_size);
119   }
120
121   // 13 Mar 2005: We used to check here that the demangler wasn't leaking
122   // by calling the (now-removed) function VG_(is_empty_arena)().  But,
123   // very rarely (ie. I've heard of it twice in 3 years), the demangler
124   // does leak.  But, we can't do much about it, and it's not a disaster,
125   // so we just let it slide without aborting or telling the user.
126#  undef N_ZBUF
127}
128
129
130/*------------------------------------------------------------*/
131/*--- DEMANGLE Z-ENCODED NAMES                             ---*/
132/*------------------------------------------------------------*/
133
134/* Demangle a Z-encoded name as described in pub_tool_redir.h.
135   Z-encoded names are used by Valgrind for doing function
136   interception/wrapping.
137
138   Demangle 'sym' into its soname and fnname parts, putting them in
139   the specified buffers.  Returns a Bool indicating whether the
140   demangled failed or not.  A failure can occur because the prefix
141   isn't recognised, the internal Z-escaping is wrong, or because one
142   or the other (or both) of the output buffers becomes full.  Passing
143   'so' as NULL is acceptable if the caller is only interested in the
144   function name part. */
145
146Bool VG_(maybe_Z_demangle) ( const HChar* sym,
147                             /*OUT*/HChar* so, Int soLen,
148                             /*OUT*/HChar* fn, Int fnLen,
149                             /*OUT*/Bool* isWrap,
150                             /*OUT*/Int*  eclassTag,
151                             /*OUT*/Int*  eclassPrio )
152{
153#  define EMITSO(ch)                           \
154      do {                                     \
155         if (so) {                             \
156            if (soi >= soLen) {                \
157               so[soLen-1] = 0; oflow = True;  \
158            } else {                           \
159               so[soi++] = ch; so[soi] = 0;    \
160            }                                  \
161         }                                     \
162      } while (0)
163#  define EMITFN(ch)                           \
164      do {                                     \
165         if (fni >= fnLen) {                   \
166            fn[fnLen-1] = 0; oflow = True;     \
167         } else {                              \
168            fn[fni++] = ch; fn[fni] = 0;       \
169         }                                     \
170      } while (0)
171
172   Bool error, oflow, valid, fn_is_encoded, is_VG_Z_prefixed;
173   Int  soi, fni, i;
174
175   vg_assert(soLen > 0 || (soLen == 0 && so == NULL));
176   vg_assert(fnLen > 0);
177   error = False;
178   oflow = False;
179   soi = 0;
180   fni = 0;
181
182   valid =     sym[0] == '_'
183           &&  sym[1] == 'v'
184           &&  sym[2] == 'g'
185           && (sym[3] == 'r' || sym[3] == 'w')
186           &&  VG_(isdigit)(sym[4])
187           &&  VG_(isdigit)(sym[5])
188           &&  VG_(isdigit)(sym[6])
189           &&  VG_(isdigit)(sym[7])
190           &&  VG_(isdigit)(sym[8])
191           &&  sym[9] == 'Z'
192           && (sym[10] == 'Z' || sym[10] == 'U')
193           &&  sym[11] == '_';
194
195   if (valid
196       && sym[4] == '0' && sym[5] == '0' && sym[6] == '0' && sym[7] == '0'
197       && sym[8] != '0') {
198      /* If the eclass tag is 0000 (meaning "no eclass"), the priority
199         must be 0 too. */
200      valid = False;
201   }
202
203   if (!valid)
204      return False;
205
206   fn_is_encoded = sym[10] == 'Z';
207
208   if (isWrap)
209      *isWrap = sym[3] == 'w';
210
211   if (eclassTag) {
212      *eclassTag =    1000 * ((Int)sym[4] - '0')
213                   +  100 * ((Int)sym[5] - '0')
214                   +  10 * ((Int)sym[6] - '0')
215                   +  1 * ((Int)sym[7] - '0');
216      vg_assert(*eclassTag >= 0 && *eclassTag <= 9999);
217   }
218
219   if (eclassPrio) {
220      *eclassPrio = ((Int)sym[8]) - '0';
221      vg_assert(*eclassPrio >= 0 && *eclassPrio <= 9);
222   }
223
224   /* Now check the soname prefix isn't "VG_Z_", as described in
225      pub_tool_redir.h. */
226   is_VG_Z_prefixed =
227      sym[12] == 'V' &&
228      sym[13] == 'G' &&
229      sym[14] == '_' &&
230      sym[15] == 'Z' &&
231      sym[16] == '_';
232   if (is_VG_Z_prefixed) {
233      vg_assert2(0, "symbol with a 'VG_Z_' prefix: %s.\n"
234                    "see pub_tool_redir.h for an explanation.", sym);
235   }
236
237   /* Now scan the Z-encoded soname. */
238   i = 12;
239   while (True) {
240
241      if (sym[i] == '_')
242      /* Found the delimiter.  Move on to the fnname loop. */
243         break;
244
245      if (sym[i] == 0) {
246         error = True;
247         goto out;
248      }
249
250      if (sym[i] != 'Z') {
251         EMITSO(sym[i]);
252         i++;
253         continue;
254      }
255
256      /* We've got a Z-escape. */
257      i++;
258      switch (sym[i]) {
259         case 'a': EMITSO('*'); break;
260         case 'c': EMITSO(':'); break;
261         case 'd': EMITSO('.'); break;
262         case 'h': EMITSO('-'); break;
263         case 'p': EMITSO('+'); break;
264         case 's': EMITSO(' '); break;
265         case 'u': EMITSO('_'); break;
266         case 'A': EMITSO('@'); break;
267         case 'D': EMITSO('$'); break;
268         case 'L': EMITSO('('); break;
269         case 'R': EMITSO(')'); break;
270         case 'Z': EMITSO('Z'); break;
271         default: error = True; goto out;
272      }
273      i++;
274   }
275
276   vg_assert(sym[i] == '_');
277   i++;
278
279   /* Now deal with the function name part. */
280   if (!fn_is_encoded) {
281
282      /* simple; just copy. */
283      while (True) {
284         if (sym[i] == 0)
285            break;
286         EMITFN(sym[i]);
287         i++;
288      }
289      goto out;
290
291   }
292
293   /* else use a Z-decoding loop like with soname */
294   while (True) {
295
296      if (sym[i] == 0)
297         break;
298
299      if (sym[i] != 'Z') {
300         EMITFN(sym[i]);
301         i++;
302         continue;
303      }
304
305      /* We've got a Z-escape. */
306      i++;
307      switch (sym[i]) {
308         case 'a': EMITFN('*'); break;
309         case 'c': EMITFN(':'); break;
310         case 'd': EMITFN('.'); break;
311         case 'h': EMITFN('-'); break;
312         case 'p': EMITFN('+'); break;
313         case 's': EMITFN(' '); break;
314         case 'u': EMITFN('_'); break;
315         case 'A': EMITFN('@'); break;
316         case 'D': EMITFN('$'); break;
317         case 'L': EMITFN('('); break;
318         case 'R': EMITFN(')'); break;
319         case 'Z': EMITFN('Z'); break;
320         default: error = True; goto out;
321      }
322      i++;
323   }
324
325  out:
326   EMITSO(0);
327   EMITFN(0);
328
329   if (error) {
330      /* Something's wrong.  Give up. */
331      VG_(message)(Vg_UserMsg,
332                   "m_demangle: error Z-demangling: %s\n", sym);
333      return False;
334   }
335   if (oflow) {
336      /* It didn't fit.  Give up. */
337      VG_(message)(Vg_UserMsg,
338                   "m_demangle: oflow Z-demangling: %s\n", sym);
339      return False;
340   }
341
342   return True;
343}
344
345
346/*--------------------------------------------------------------------*/
347/*--- end                                                          ---*/
348/*--------------------------------------------------------------------*/
349