1/* Get macro information.
2   Copyright (C) 2002-2009, 2014 Red Hat, Inc.
3   This file is part of elfutils.
4   Written by Ulrich Drepper <drepper@redhat.com>, 2002.
5
6   This file is free software; you can redistribute it and/or modify
7   it under the terms of either
8
9     * the GNU Lesser General Public License as published by the Free
10       Software Foundation; either version 3 of the License, or (at
11       your option) any later version
12
13   or
14
15     * the GNU General Public License as published by the Free
16       Software Foundation; either version 2 of the License, or (at
17       your option) any later version
18
19   or both in parallel, as here.
20
21   elfutils is distributed in the hope that it will be useful, but
22   WITHOUT ANY WARRANTY; without even the implied warranty of
23   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24   General Public License for more details.
25
26   You should have received copies of the GNU General Public License and
27   the GNU Lesser General Public License along with this program.  If
28   not, see <http://www.gnu.org/licenses/>.  */
29
30#ifdef HAVE_CONFIG_H
31# include <config.h>
32#endif
33
34#include <assert.h>
35#include <dwarf.h>
36#include <search.h>
37#include <stdlib.h>
38#include <string.h>
39
40#include <libdwP.h>
41
42static int
43get_offset_from (Dwarf_Die *die, int name, Dwarf_Word *retp)
44{
45  /* Get the appropriate attribute.  */
46  Dwarf_Attribute attr;
47  if (INTUSE(dwarf_attr) (die, name, &attr) == NULL)
48    return -1;
49
50  /* Offset into the corresponding section.  */
51  return INTUSE(dwarf_formudata) (&attr, retp);
52}
53
54static int
55macro_op_compare (const void *p1, const void *p2)
56{
57  const Dwarf_Macro_Op_Table *t1 = (const Dwarf_Macro_Op_Table *) p1;
58  const Dwarf_Macro_Op_Table *t2 = (const Dwarf_Macro_Op_Table *) p2;
59
60  if (t1->offset < t2->offset)
61    return -1;
62  if (t1->offset > t2->offset)
63    return 1;
64
65  if (t1->sec_index < t2->sec_index)
66    return -1;
67  if (t1->sec_index > t2->sec_index)
68    return 1;
69
70  return 0;
71}
72
73static void
74build_table (Dwarf_Macro_Op_Table *table,
75	     Dwarf_Macro_Op_Proto op_protos[static 255])
76{
77  unsigned ct = 0;
78  for (unsigned i = 1; i < 256; ++i)
79    if (op_protos[i - 1].forms != NULL)
80      table->table[table->opcodes[i - 1] = ct++] = op_protos[i - 1];
81    else
82      table->opcodes[i - 1] = 0xff;
83}
84
85#define MACRO_PROTO(NAME, ...)					\
86  Dwarf_Macro_Op_Proto NAME = ({				\
87      static const uint8_t proto[] = {__VA_ARGS__};		\
88      (Dwarf_Macro_Op_Proto) {sizeof proto, proto};		\
89    })
90
91enum { macinfo_data_size = offsetof (Dwarf_Macro_Op_Table, table[5]) };
92static unsigned char macinfo_data[macinfo_data_size]
93	__attribute__ ((aligned (__alignof (Dwarf_Macro_Op_Table))));
94
95static __attribute__ ((constructor)) void
96init_macinfo_table (void)
97{
98  MACRO_PROTO (p_udata_str, DW_FORM_udata, DW_FORM_string);
99  MACRO_PROTO (p_udata_udata, DW_FORM_udata, DW_FORM_udata);
100  MACRO_PROTO (p_none);
101
102  Dwarf_Macro_Op_Proto op_protos[255] =
103    {
104      [DW_MACINFO_define - 1] = p_udata_str,
105      [DW_MACINFO_undef - 1] = p_udata_str,
106      [DW_MACINFO_vendor_ext - 1] = p_udata_str,
107      [DW_MACINFO_start_file - 1] = p_udata_udata,
108      [DW_MACINFO_end_file - 1] = p_none,
109      /* If you are adding more elements to this array, increase
110	 MACINFO_DATA_SIZE above.  */
111    };
112
113  Dwarf_Macro_Op_Table *macinfo_table = (void *) macinfo_data;
114  memset (macinfo_table, 0, sizeof macinfo_data);
115  build_table (macinfo_table, op_protos);
116  macinfo_table->sec_index = IDX_debug_macinfo;
117}
118
119static Dwarf_Macro_Op_Table *
120get_macinfo_table (Dwarf *dbg, Dwarf_Word macoff, Dwarf_Die *cudie)
121{
122  assert (cudie != NULL);
123
124  Dwarf_Attribute attr_mem, *attr
125    = INTUSE(dwarf_attr) (cudie, DW_AT_stmt_list, &attr_mem);
126  Dwarf_Off line_offset = (Dwarf_Off) -1;
127  if (attr != NULL)
128    if (unlikely (INTUSE(dwarf_formudata) (attr, &line_offset) != 0))
129      return NULL;
130
131  Dwarf_Macro_Op_Table *table = libdw_alloc (dbg, Dwarf_Macro_Op_Table,
132					     macinfo_data_size, 1);
133  memcpy (table, macinfo_data, macinfo_data_size);
134
135  table->offset = macoff;
136  table->sec_index = IDX_debug_macinfo;
137  table->line_offset = line_offset;
138  table->is_64bit = cudie->cu->address_size == 8;
139  table->comp_dir = __libdw_getcompdir (cudie);
140
141  return table;
142}
143
144static Dwarf_Macro_Op_Table *
145get_table_for_offset (Dwarf *dbg, Dwarf_Word macoff,
146		      const unsigned char *readp,
147		      const unsigned char *const endp,
148		      Dwarf_Die *cudie)
149{
150  const unsigned char *startp = readp;
151
152  /* Request at least 3 bytes for header.  */
153  if (readp + 3 > endp)
154    {
155    invalid_dwarf:
156      __libdw_seterrno (DWARF_E_INVALID_DWARF);
157      return NULL;
158    }
159
160  uint16_t version = read_2ubyte_unaligned_inc (dbg, readp);
161  if (version != 4)
162    {
163      __libdw_seterrno (DWARF_E_INVALID_VERSION);
164      return NULL;
165    }
166
167  uint8_t flags = *readp++;
168  bool is_64bit = (flags & 0x1) != 0;
169
170  Dwarf_Off line_offset = (Dwarf_Off) -1;
171  if ((flags & 0x2) != 0)
172    {
173      line_offset = read_addr_unaligned_inc (is_64bit ? 8 : 4, dbg, readp);
174      if (readp > endp)
175	goto invalid_dwarf;
176    }
177  else if (cudie != NULL)
178    {
179      Dwarf_Attribute attr_mem, *attr
180	= INTUSE(dwarf_attr) (cudie, DW_AT_stmt_list, &attr_mem);
181      if (attr != NULL)
182	if (unlikely (INTUSE(dwarf_formudata) (attr, &line_offset) != 0))
183	  return NULL;
184    }
185
186  /* """The macinfo entry types defined in this standard may, but
187     might not, be described in the table""".
188
189     I.e. these may be present.  It's tempting to simply skip them,
190     but it's probably more correct to tolerate that a producer tweaks
191     the way certain opcodes are encoded, for whatever reasons.  */
192
193  MACRO_PROTO (p_udata_str, DW_FORM_udata, DW_FORM_string);
194  MACRO_PROTO (p_udata_strp, DW_FORM_udata, DW_FORM_strp);
195  MACRO_PROTO (p_udata_udata, DW_FORM_udata, DW_FORM_udata);
196  MACRO_PROTO (p_secoffset, DW_FORM_sec_offset);
197  MACRO_PROTO (p_none);
198
199  Dwarf_Macro_Op_Proto op_protos[255] =
200    {
201      [DW_MACRO_GNU_define - 1] = p_udata_str,
202      [DW_MACRO_GNU_undef - 1] = p_udata_str,
203      [DW_MACRO_GNU_define_indirect - 1] = p_udata_strp,
204      [DW_MACRO_GNU_undef_indirect - 1] = p_udata_strp,
205      [DW_MACRO_GNU_start_file - 1] = p_udata_udata,
206      [DW_MACRO_GNU_end_file - 1] = p_none,
207      [DW_MACRO_GNU_transparent_include - 1] = p_secoffset,
208      /* N.B. DW_MACRO_undef_indirectx, DW_MACRO_define_indirectx
209	 should be added when 130313.1 is supported.  */
210    };
211
212  if ((flags & 0x4) != 0)
213    {
214      unsigned count = *readp++;
215      for (unsigned i = 0; i < count; ++i)
216	{
217	  unsigned opcode = *readp++;
218
219	  Dwarf_Macro_Op_Proto e;
220	  if (readp >= endp)
221	    goto invalid;
222	  get_uleb128 (e.nforms, readp, endp);
223	  e.forms = readp;
224	  op_protos[opcode - 1] = e;
225
226	  readp += e.nforms;
227	  if (readp > endp)
228	    {
229	    invalid:
230	      __libdw_seterrno (DWARF_E_INVALID_DWARF);
231	      return NULL;
232	    }
233	}
234    }
235
236  size_t ct = 0;
237  for (unsigned i = 1; i < 256; ++i)
238    if (op_protos[i - 1].forms != NULL)
239      ++ct;
240
241  /* We support at most 0xfe opcodes defined in the table, as 0xff is
242     a value that means that given opcode is not stored at all.  But
243     that should be fine, as opcode 0 is not allocated.  */
244  assert (ct < 0xff);
245
246  size_t macop_table_size = offsetof (Dwarf_Macro_Op_Table, table[ct]);
247
248  Dwarf_Macro_Op_Table *table = libdw_alloc (dbg, Dwarf_Macro_Op_Table,
249					     macop_table_size, 1);
250
251  *table = (Dwarf_Macro_Op_Table) {
252    .offset = macoff,
253    .sec_index = IDX_debug_macro,
254    .line_offset = line_offset,
255    .header_len = readp - startp,
256    .version = version,
257    .is_64bit = is_64bit,
258
259    /* NULL if CUDIE is NULL or DW_AT_comp_dir is absent.  */
260    .comp_dir = __libdw_getcompdir (cudie),
261  };
262  build_table (table, op_protos);
263
264  return table;
265}
266
267static Dwarf_Macro_Op_Table *
268cache_op_table (Dwarf *dbg, int sec_index, Dwarf_Off macoff,
269		const unsigned char *startp,
270		const unsigned char *const endp,
271		Dwarf_Die *cudie)
272{
273  Dwarf_Macro_Op_Table fake = { .offset = macoff, .sec_index = sec_index };
274  Dwarf_Macro_Op_Table **found = tfind (&fake, &dbg->macro_ops,
275					macro_op_compare);
276  if (found != NULL)
277    return *found;
278
279  Dwarf_Macro_Op_Table *table = sec_index == IDX_debug_macro
280    ? get_table_for_offset (dbg, macoff, startp, endp, cudie)
281    : get_macinfo_table (dbg, macoff, cudie);
282
283  if (table == NULL)
284    return NULL;
285
286  Dwarf_Macro_Op_Table **ret = tsearch (table, &dbg->macro_ops,
287					macro_op_compare);
288  if (unlikely (ret == NULL))
289    {
290      __libdw_seterrno (DWARF_E_NOMEM);
291      return NULL;
292    }
293
294  return *ret;
295}
296
297static ptrdiff_t
298read_macros (Dwarf *dbg, int sec_index,
299	     Dwarf_Off macoff, int (*callback) (Dwarf_Macro *, void *),
300	     void *arg, ptrdiff_t offset, bool accept_0xff,
301	     Dwarf_Die *cudie)
302{
303  Elf_Data *d = dbg->sectiondata[sec_index];
304  if (unlikely (d == NULL || d->d_buf == NULL))
305    {
306      __libdw_seterrno (DWARF_E_NO_ENTRY);
307      return -1;
308    }
309
310  if (unlikely (macoff >= d->d_size))
311    {
312      __libdw_seterrno (DWARF_E_INVALID_DWARF);
313      return -1;
314    }
315
316  const unsigned char *const startp = d->d_buf + macoff;
317  const unsigned char *const endp = d->d_buf + d->d_size;
318
319  Dwarf_Macro_Op_Table *table = cache_op_table (dbg, sec_index, macoff,
320						startp, endp, cudie);
321  if (table == NULL)
322    return -1;
323
324  if (offset == 0)
325    offset = table->header_len;
326
327  assert (offset >= 0);
328  assert (offset < endp - startp);
329  const unsigned char *readp = startp + offset;
330
331  while (readp < endp)
332    {
333      unsigned int opcode = *readp++;
334      if (opcode == 0)
335	/* Nothing more to do.  */
336	return 0;
337
338      if (unlikely (opcode == 0xff && ! accept_0xff))
339	{
340	  /* See comment below at dwarf_getmacros for explanation of
341	     why we are doing this.  */
342	  __libdw_seterrno (DWARF_E_INVALID_OPCODE);
343	  return -1;
344	}
345
346      unsigned int idx = table->opcodes[opcode - 1];
347      if (idx == 0xff)
348	{
349	  __libdw_seterrno (DWARF_E_INVALID_OPCODE);
350	  return -1;
351	}
352
353      Dwarf_Macro_Op_Proto *proto = &table->table[idx];
354
355      /* A fake CU with bare minimum data to fool dwarf_formX into
356	 doing the right thing with the attributes that we put out.
357	 We arbitrarily pretend it's version 4.  */
358      Dwarf_CU fake_cu = {
359	.dbg = dbg,
360	.version = 4,
361	.offset_size = table->is_64bit ? 8 : 4,
362	.startp = (void *) startp + offset,
363	.endp = (void *) endp,
364      };
365
366      Dwarf_Attribute *attributes;
367      Dwarf_Attribute *attributesp = NULL;
368      Dwarf_Attribute nattributes[8];
369      if (unlikely (proto->nforms > 8))
370	{
371	  attributesp = malloc (sizeof (Dwarf_Attribute) * proto->nforms);
372	  if (attributesp == NULL)
373	    {
374	      __libdw_seterrno (DWARF_E_NOMEM);
375	      return -1;
376	    }
377	  attributes = attributesp;
378	}
379      else
380	attributes = &nattributes[0];
381
382      for (Dwarf_Word i = 0; i < proto->nforms; ++i)
383	{
384	  /* We pretend this is a DW_AT_GNU_macros attribute so that
385	     DW_FORM_sec_offset forms get correctly interpreted as
386	     offset into .debug_macro.  */
387	  attributes[i].code = DW_AT_GNU_macros;
388	  attributes[i].form = proto->forms[i];
389	  attributes[i].valp = (void *) readp;
390	  attributes[i].cu = &fake_cu;
391
392	  size_t len = __libdw_form_val_len (&fake_cu, proto->forms[i], readp);
393	  if (unlikely (len == (size_t) -1))
394	    {
395	      free (attributesp);
396	      return -1;
397	    }
398
399	  readp += len;
400	}
401
402      Dwarf_Macro macro = {
403	.table = table,
404	.opcode = opcode,
405	.attributes = attributes,
406      };
407
408      int res = callback (&macro, arg);
409      if (unlikely (attributesp != NULL))
410	free (attributesp);
411
412      if (res != DWARF_CB_OK)
413	return readp - startp;
414    }
415
416  return 0;
417}
418
419/* Token layout:
420
421   - The highest bit is used for distinguishing between callers that
422     know that opcode 0xff may have one of two incompatible meanings.
423     The mask that we use for selecting this bit is
424     DWARF_GETMACROS_START.
425
426   - The rest of the token (31 or 63 bits) encodes address inside the
427     macro unit.
428
429   Besides, token value of 0 signals end of iteration and -1 is
430   reserved for signaling errors.  That means it's impossible to
431   represent maximum offset of a .debug_macro unit to new-style
432   callers (which in practice decreases the permissible macro unit
433   size by another 1 byte).  */
434
435static ptrdiff_t
436token_from_offset (ptrdiff_t offset, bool accept_0xff)
437{
438  if (offset == -1 || offset == 0)
439    return offset;
440
441  /* Make sure the offset didn't overflow into the flag bit.  */
442  if ((offset & DWARF_GETMACROS_START) != 0)
443    {
444      __libdw_seterrno (DWARF_E_TOO_BIG);
445      return -1;
446    }
447
448  if (accept_0xff)
449    offset |= DWARF_GETMACROS_START;
450
451  return offset;
452}
453
454static ptrdiff_t
455offset_from_token (ptrdiff_t token, bool *accept_0xffp)
456{
457  *accept_0xffp = (token & DWARF_GETMACROS_START) != 0;
458  token &= ~DWARF_GETMACROS_START;
459
460  return token;
461}
462
463static ptrdiff_t
464gnu_macros_getmacros_off (Dwarf *dbg, Dwarf_Off macoff,
465			  int (*callback) (Dwarf_Macro *, void *),
466			  void *arg, ptrdiff_t offset, bool accept_0xff,
467			  Dwarf_Die *cudie)
468{
469  assert (offset >= 0);
470
471  if (macoff >= dbg->sectiondata[IDX_debug_macro]->d_size)
472    {
473      __libdw_seterrno (DWARF_E_INVALID_OFFSET);
474      return -1;
475    }
476
477  return read_macros (dbg, IDX_debug_macro, macoff,
478		      callback, arg, offset, accept_0xff, cudie);
479}
480
481static ptrdiff_t
482macro_info_getmacros_off (Dwarf *dbg, Dwarf_Off macoff,
483			  int (*callback) (Dwarf_Macro *, void *),
484			  void *arg, ptrdiff_t offset, Dwarf_Die *cudie)
485{
486  assert (offset >= 0);
487
488  return read_macros (dbg, IDX_debug_macinfo, macoff,
489		      callback, arg, offset, true, cudie);
490}
491
492ptrdiff_t
493dwarf_getmacros_off (Dwarf *dbg, Dwarf_Off macoff,
494		     int (*callback) (Dwarf_Macro *, void *),
495		     void *arg, ptrdiff_t token)
496{
497  if (dbg == NULL)
498    {
499      __libdw_seterrno (DWARF_E_NO_DWARF);
500      return -1;
501    }
502
503  bool accept_0xff;
504  ptrdiff_t offset = offset_from_token (token, &accept_0xff);
505  assert (accept_0xff);
506
507  offset = gnu_macros_getmacros_off (dbg, macoff, callback, arg, offset,
508				     accept_0xff, NULL);
509
510  return token_from_offset (offset, accept_0xff);
511}
512
513ptrdiff_t
514dwarf_getmacros (Dwarf_Die *cudie, int (*callback) (Dwarf_Macro *, void *),
515		 void *arg, ptrdiff_t token)
516{
517  if (cudie == NULL)
518    {
519      __libdw_seterrno (DWARF_E_NO_DWARF);
520      return -1;
521    }
522
523  /* This function might be called from a code that expects to see
524     DW_MACINFO_* opcodes, not DW_MACRO_{GNU_,}* ones.  It is fine to
525     serve most DW_MACRO_{GNU_,}* opcodes to such code, because those
526     whose values are the same as DW_MACINFO_* ones also have the same
527     behavior.  It is not very likely that a .debug_macro section
528     would only use the part of opcode space that it shares with
529     .debug_macinfo, but it is possible.  Serving the opcodes that are
530     only valid in DW_MACRO_{GNU_,}* domain is OK as well, because
531     clients in general need to be ready that newer standards define
532     more opcodes, and have coping mechanisms for unfamiliar opcodes.
533
534     The one exception to the above rule is opcode 0xff, which has
535     concrete semantics in .debug_macinfo, but falls into vendor block
536     in .debug_macro, and can be assigned to do whatever.  There is
537     some small probability that the two opcodes would look
538     superficially similar enough that a client would be confused and
539     misbehave as a result.  For this reason, we refuse to serve
540     through this interface 0xff's originating from .debug_macro
541     unless the TOKEN that we obtained indicates the call originates
542     from a new-style caller.  See above for details on what
543     information is encoded into tokens.  */
544
545  bool accept_0xff;
546  ptrdiff_t offset = offset_from_token (token, &accept_0xff);
547
548  /* DW_AT_macro_info */
549  if (dwarf_hasattr (cudie, DW_AT_macro_info))
550    {
551      Dwarf_Word macoff;
552      if (get_offset_from (cudie, DW_AT_macro_info, &macoff) != 0)
553	return -1;
554      offset = macro_info_getmacros_off (cudie->cu->dbg, macoff,
555					 callback, arg, offset, cudie);
556    }
557  else
558    {
559      /* DW_AT_GNU_macros, DW_AT_macros */
560      Dwarf_Word macoff;
561      if (get_offset_from (cudie, DW_AT_GNU_macros, &macoff) != 0)
562	return -1;
563      offset = gnu_macros_getmacros_off (cudie->cu->dbg, macoff,
564					 callback, arg, offset, accept_0xff,
565					 cudie);
566    }
567
568  return token_from_offset (offset, accept_0xff);
569}
570