HashedNameToDIE.h revision cc152b20d7b07aa4c229977b7b0d8d94cbda2ff5
1//===-- HashedNameToDIE.h ---------------------------------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef SymbolFileDWARF_HashedNameToDIE_h_
11#define SymbolFileDWARF_HashedNameToDIE_h_
12
13#include <vector>
14
15#include "DWARFDefines.h"
16#include "DWARFFormValue.h"
17
18#include "lldb/lldb-defines.h"
19#include "lldb/Core/dwarf.h"
20#include "lldb/Core/RegularExpression.h"
21#include "lldb/Core/MappedHash.h"
22
23
24class SymbolFileDWARF;
25class DWARFCompileUnit;
26class DWARFDebugInfoEntry;
27
28struct DWARFMappedHash
29{
30    struct DIEInfo
31    {
32        dw_offset_t offset;  // The DIE offset
33        dw_tag_t tag;
34        uint32_t type_flags; // Any flags for this DIEInfo
35
36        DIEInfo () :
37            offset (DW_INVALID_OFFSET),
38            tag (0),
39            type_flags (0)
40        {
41        }
42
43        DIEInfo (dw_offset_t o, dw_tag_t t, uint32_t f) :
44            offset(o),
45            tag (t),
46            type_flags (f)
47        {
48        }
49
50        void
51        Clear()
52        {
53            offset = DW_INVALID_OFFSET;
54            tag = 0;
55            type_flags = 0;
56        }
57    };
58
59    typedef std::vector<DIEInfo> DIEInfoArray;
60    typedef std::vector<uint32_t> DIEArray;
61
62    static void
63    ExtractDIEArray (const DIEInfoArray &die_info_array,
64                     DIEArray &die_offsets)
65    {
66        const size_t count = die_info_array.size();
67        for (size_t i=0; i<count; ++i)
68        {
69            die_offsets.push_back (die_info_array[i].offset);
70        }
71    }
72
73    static void
74    ExtractTypesFromDIEArray (const DIEInfoArray &die_info_array,
75                              uint32_t type_flag_mask,
76                              uint32_t type_flag_value,
77                              DIEArray &die_offsets)
78    {
79        const size_t count = die_info_array.size();
80        for (size_t i=0; i<count; ++i)
81        {
82            if ((die_info_array[i].type_flags & type_flag_mask) == type_flag_value)
83                die_offsets.push_back (die_info_array[i].offset);
84        }
85    }
86
87    enum AtomType
88    {
89        eAtomTypeNULL       = 0u,
90        eAtomTypeDIEOffset  = 1u,   // DIE offset, check form for encoding
91        eAtomTypeCUOffset   = 2u,   // DIE offset of the compiler unit header that contains the item in question
92        eAtomTypeTag        = 3u,   // DW_TAG_xxx value, should be encoded as DW_FORM_data1 (if no tags exceed 255) or DW_FORM_data2
93        eAtomTypeNameFlags  = 4u,   // Flags from enum NameFlags
94        eAtomTypeTypeFlags  = 5u    // Flags from enum TypeFlags
95    };
96
97    // Bit definitions for the eAtomTypeTypeFlags flags
98    enum TypeFlags
99    {
100        // If the name contains the namespace and class scope or the type
101        // exists in the global namespace, then this bits should be set
102        eTypeFlagNameIsFullyQualified   = ( 1u << 0 ),
103
104        // Always set for C++, only set for ObjC if this is the
105        // @implementation for class
106        eTypeFlagClassIsImplementation  = ( 1u << 1 )
107    };
108
109    struct Atom
110    {
111        uint16_t type;
112        dw_form_t form;
113
114        Atom (uint16_t t = eAtomTypeNULL, dw_form_t f = 0) :
115            type (t),
116            form (f)
117        {
118        }
119    };
120
121    typedef std::vector<Atom> AtomArray;
122
123    static uint32_t
124    GetTypeFlags (SymbolFileDWARF *dwarf2Data,
125                  const DWARFCompileUnit* cu,
126                  const DWARFDebugInfoEntry* die);
127
128
129    static const char *
130    GetAtomTypeName (uint16_t atom)
131    {
132        switch (atom)
133        {
134            case eAtomTypeNULL:         return "NULL";
135            case eAtomTypeDIEOffset:    return "die-offset";
136            case eAtomTypeCUOffset:     return "cu-offset";
137            case eAtomTypeTag:          return "die-tag";
138            case eAtomTypeNameFlags:    return "name-flags";
139            case eAtomTypeTypeFlags:    return "type-flags";
140        }
141        return "<invalid>";
142    }
143    struct Prologue
144    {
145        // DIE offset base so die offsets in hash_data can be CU relative
146        dw_offset_t die_base_offset;
147        AtomArray atoms;
148        size_t min_hash_data_byte_size;
149        bool hash_data_has_fixed_byte_size;
150
151        Prologue (dw_offset_t _die_base_offset = 0) :
152            die_base_offset (_die_base_offset),
153            atoms(),
154            min_hash_data_byte_size(0),
155            hash_data_has_fixed_byte_size(true)
156        {
157            // Define an array of DIE offsets by first defining an array,
158            // and then define the atom type for the array, in this case
159            // we have an array of DIE offsets
160            AppendAtom (eAtomTypeDIEOffset, DW_FORM_data4);
161        }
162
163        virtual ~Prologue()
164        {
165        }
166
167        void
168        ClearAtoms ()
169        {
170            hash_data_has_fixed_byte_size = true;
171            min_hash_data_byte_size = 0;
172            atoms.clear();
173        }
174
175        virtual void
176        Clear ()
177        {
178            die_base_offset = 0;
179            ClearAtoms ();
180        }
181
182        void
183        AppendAtom (AtomType type, dw_form_t form)
184        {
185            atoms.push_back (Atom(type, form));
186            switch (form)
187            {
188                case DW_FORM_indirect:
189                case DW_FORM_exprloc:
190                case DW_FORM_flag_present:
191                case DW_FORM_ref_sig8:
192                    assert (!"Unhandled atom form");
193                    break;
194
195                case DW_FORM_string:
196                case DW_FORM_block:
197                case DW_FORM_block1:
198                case DW_FORM_sdata:
199                case DW_FORM_udata:
200                case DW_FORM_ref_udata:
201                    hash_data_has_fixed_byte_size = false;
202                    // Fall through to the cases below...
203                case DW_FORM_flag:
204                case DW_FORM_data1:
205                case DW_FORM_ref1:
206                case DW_FORM_sec_offset:
207                    min_hash_data_byte_size += 1;
208                    break;
209
210                case DW_FORM_block2:
211                    hash_data_has_fixed_byte_size = false;
212                    // Fall through to the cases below...
213                case DW_FORM_data2:
214                case DW_FORM_ref2:
215                    min_hash_data_byte_size += 2;
216                    break;
217
218                case DW_FORM_block4:
219                    hash_data_has_fixed_byte_size = false;
220                    // Fall through to the cases below...
221                case DW_FORM_data4:
222                case DW_FORM_ref4:
223                case DW_FORM_addr:
224                case DW_FORM_ref_addr:
225                case DW_FORM_strp:
226                    min_hash_data_byte_size += 4;
227                    break;
228
229                case DW_FORM_data8:
230                case DW_FORM_ref8:
231                    min_hash_data_byte_size += 8;
232                    break;
233
234            }
235        }
236
237//        void
238//        Dump (std::ostream* ostrm_ptr);
239
240        uint32_t
241        Read (const lldb_private::DataExtractor &data, uint32_t offset)
242        {
243            ClearAtoms ();
244
245            die_base_offset = data.GetU32 (&offset);
246
247            const uint32_t atom_count = data.GetU32 (&offset);
248            if (atom_count == 0x00060003u)
249            {
250                // Old format, deal with contents of old pre-release format
251                while (data.GetU32(&offset))
252                    /* do nothing */;
253
254                // Hardcode to the only known value for now.
255                AppendAtom (eAtomTypeDIEOffset, DW_FORM_data4);
256            }
257            else
258            {
259                for (uint32_t i=0; i<atom_count; ++i)
260                {
261                    AtomType type = (AtomType)data.GetU16 (&offset);
262                    dw_form_t form = (dw_form_t)data.GetU16 (&offset);
263                    AppendAtom (type, form);
264                }
265            }
266            return offset;
267        }
268
269//        virtual void
270//        Write (BinaryStreamBuf &s);
271
272        size_t
273        GetByteSize () const
274        {
275            // Add an extra count to the atoms size for the zero termination Atom that gets
276            // written to disk
277            return sizeof(die_base_offset) + sizeof(uint32_t) + atoms.size() * sizeof(Atom);
278        }
279
280        size_t
281        GetMinumumHashDataByteSize () const
282        {
283            return min_hash_data_byte_size;
284        }
285
286        bool
287        HashDataHasFixedByteSize() const
288        {
289            return hash_data_has_fixed_byte_size;
290        }
291    };
292
293    struct Header : public MappedHash::Header<Prologue>
294    {
295        Header (dw_offset_t _die_base_offset = 0)
296        {
297        }
298
299        virtual
300        ~Header()
301        {
302        }
303
304        virtual size_t
305        GetByteSize (const HeaderData &header_data)
306        {
307            return header_data.GetByteSize();
308        }
309
310        //        virtual void
311        //        Dump (std::ostream* ostrm_ptr);
312        //
313        virtual uint32_t
314        Read (lldb_private::DataExtractor &data, uint32_t offset)
315        {
316            offset = MappedHash::Header<Prologue>::Read (data, offset);
317            if (offset != UINT32_MAX)
318            {
319                offset = header_data.Read (data, offset);
320            }
321            return offset;
322        }
323
324        bool
325        Read (const lldb_private::DataExtractor &data,
326              uint32_t *offset_ptr,
327              DIEInfo &hash_data) const
328        {
329            const size_t num_atoms = header_data.atoms.size();
330            if (num_atoms == 0)
331                return false;
332
333            for (size_t i=0; i<num_atoms; ++i)
334            {
335                DWARFFormValue form_value (header_data.atoms[i].form);
336
337                if (!form_value.ExtractValue(data, offset_ptr, NULL))
338                    return false;
339
340                switch (header_data.atoms[i].type)
341                {
342                    case eAtomTypeDIEOffset:    // DIE offset, check form for encoding
343                        hash_data.offset = form_value.Reference (header_data.die_base_offset);
344                        break;
345
346                    case eAtomTypeTag:          // DW_TAG value for the DIE
347                        hash_data.tag = form_value.Unsigned ();
348
349                    case eAtomTypeTypeFlags:    // Flags from enum TypeFlags
350                        hash_data.type_flags = form_value.Unsigned ();
351                        break;
352                    default:
353                        return false;
354                        break;
355                }
356            }
357            return true;
358        }
359
360        void
361        Dump (lldb_private::Stream& strm, const DIEInfo &hash_data) const
362        {
363            const size_t num_atoms = header_data.atoms.size();
364            for (size_t i=0; i<num_atoms; ++i)
365            {
366                if (i > 0)
367                    strm.PutCString (", ");
368
369                DWARFFormValue form_value (header_data.atoms[i].form);
370                switch (header_data.atoms[i].type)
371                {
372                    case eAtomTypeDIEOffset:    // DIE offset, check form for encoding
373                        strm.Printf ("{0x%8.8x}", hash_data.offset);
374                        break;
375
376                    case eAtomTypeTag:          // DW_TAG value for the DIE
377                        {
378                            const char *tag_cstr = lldb_private::DW_TAG_value_to_name (hash_data.tag);
379                            if (tag_cstr)
380                                strm.PutCString (tag_cstr);
381                            else
382                                strm.Printf ("DW_TAG_(0x%4.4x)", hash_data.tag);
383                        }
384                        break;
385
386                    case eAtomTypeTypeFlags:    // Flags from enum TypeFlags
387                        strm.Printf ("0x%2.2x", hash_data.type_flags);
388                        if (hash_data.type_flags)
389                        {
390                            strm.PutCString (" (");
391                            if (hash_data.type_flags & eTypeFlagNameIsFullyQualified)
392                                strm.PutCString (" qualified");
393
394                            if (hash_data.type_flags & eTypeFlagClassIsImplementation)
395                                strm.PutCString (" implementation");
396                            strm.PutCString (" )");
397                        }
398                        break;
399
400                    default:
401                        strm.Printf ("AtomType(0x%x)", header_data.atoms[i].type);
402                        break;
403                }
404            }
405        }
406
407    };
408
409//    class ExportTable
410//    {
411//    public:
412//        ExportTable ();
413//
414//        void
415//        AppendNames (DWARFDebugPubnamesSet &pubnames_set,
416//                     StringTable &string_table);
417//
418//        void
419//        AppendNamesEntry (SymbolFileDWARF *dwarf2Data,
420//                          const DWARFCompileUnit* cu,
421//                          const DWARFDebugInfoEntry* die,
422//                          StringTable &string_table);
423//
424//        void
425//        AppendTypesEntry (DWARFData *dwarf2Data,
426//                          const DWARFCompileUnit* cu,
427//                          const DWARFDebugInfoEntry* die,
428//                          StringTable &string_table);
429//
430//        size_t
431//        Save (BinaryStreamBuf &names_data, const StringTable &string_table);
432//
433//        void
434//        AppendName (const char *name,
435//                    uint32_t die_offset,
436//                    StringTable &string_table,
437//                    dw_offset_t name_debug_str_offset = DW_INVALID_OFFSET); // If "name" has already been looked up, then it can be supplied
438//        void
439//        AppendType (const char *name,
440//                    uint32_t die_offset,
441//                    StringTable &string_table);
442//
443//
444//    protected:
445//        struct Entry
446//        {
447//            uint32_t hash;
448//            uint32_t str_offset;
449//            uint32_t die_offset;
450//        };
451//
452//        // Map uniqued .debug_str offset to the corresponding DIE offsets
453//        typedef std::map<uint32_t, DIEInfoArray> NameInfo;
454//        // Map a name hash to one or more name infos
455//        typedef std::map<uint32_t, NameInfo> BucketEntry;
456//
457//        static uint32_t
458//        GetByteSize (const NameInfo &name_info);
459//
460//        typedef std::vector<BucketEntry> BucketEntryColl;
461//        typedef std::vector<Entry> EntryColl;
462//        EntryColl m_entries;
463//
464//    };
465
466
467    // A class for reading and using a saved hash table from a block of data
468    // in memory
469    class MemoryTable : public MappedHash::MemoryTable<uint32_t, DWARFMappedHash::Header, DIEInfoArray>
470    {
471    public:
472
473        MemoryTable (lldb_private::DataExtractor &table_data,
474                     const lldb_private::DataExtractor &string_table,
475                     const char *name) :
476            MappedHash::MemoryTable<uint32_t, Header, DIEInfoArray> (table_data),
477            m_data (table_data),
478            m_string_table (string_table),
479            m_name (name)
480        {
481        }
482
483        virtual
484        ~MemoryTable ()
485        {
486        }
487
488        virtual const char *
489        GetStringForKeyType (KeyType key) const
490        {
491            // The key in the DWARF table is the .debug_str offset for the string
492            return m_string_table.PeekCStr (key);
493        }
494
495        virtual Result
496        GetHashDataForName (const char *name,
497                            uint32_t* hash_data_offset_ptr,
498                            Pair &pair) const
499        {
500            pair.key = m_data.GetU32 (hash_data_offset_ptr);
501            pair.value.clear();
502
503            // If the key is zero, this terminates our chain of HashData objects
504            // for this hash value.
505            if (pair.key == 0)
506                return eResultEndOfHashData;
507
508            // There definitely should be a string for this string offset, if
509            // there isn't, there is something wrong, return and error
510            const char *strp_cstr = m_string_table.PeekCStr (pair.key);
511            if (strp_cstr == NULL)
512            {
513                *hash_data_offset_ptr = UINT32_MAX;
514                return eResultError;
515            }
516
517            const uint32_t count = m_data.GetU32 (hash_data_offset_ptr);
518            const uint32_t min_total_hash_data_size = count * m_header.header_data.GetMinumumHashDataByteSize();
519            if (count > 0 && m_data.ValidOffsetForDataOfSize (*hash_data_offset_ptr, min_total_hash_data_size))
520            {
521                // We have at least one HashData entry, and we have enough
522                // data to parse at leats "count" HashData enties.
523
524                // First make sure the entire C string matches...
525                const bool match = strcmp (name, strp_cstr) == 0;
526
527                if (!match && m_header.header_data.HashDataHasFixedByteSize())
528                {
529                    // If the string doesn't match and we have fixed size data,
530                    // we can just add the total byte size of all HashData objects
531                    // to the hash data offset and be done...
532                    *hash_data_offset_ptr += min_total_hash_data_size;
533                }
534                else
535                {
536                    // If the string does match, or we don't have fixed size data
537                    // then we need to read the hash data as a stream. If the
538                    // string matches we also append all HashData objects to the
539                    // value array.
540                    for (uint32_t i=0; i<count; ++i)
541                    {
542                        DIEInfo die_info;
543                        if (m_header.Read(m_data, hash_data_offset_ptr, die_info))
544                        {
545                            // Only happend the HashData if the string matched...
546                            if (match)
547                                pair.value.push_back (die_info);
548                        }
549                        else
550                        {
551                            // Something went wrong while reading the data
552                            *hash_data_offset_ptr = UINT32_MAX;
553                            return eResultError;
554                        }
555                    }
556                }
557                // Return the correct response depending on if the string matched
558                // or not...
559                if (match)
560                    return eResultKeyMatch;     // The key (cstring) matches and we have lookup results!
561                else
562                    return eResultKeyMismatch;  // The key doesn't match, this function will get called
563                                                // again for the next key/value or the key terminator
564                                                // which in our case is a zero .debug_str offset.
565            }
566            else
567            {
568                *hash_data_offset_ptr = UINT32_MAX;
569                return eResultError;
570            }
571        }
572
573        virtual Result
574        AppendHashDataForRegularExpression (const lldb_private::RegularExpression& regex,
575                                            uint32_t* hash_data_offset_ptr,
576                                            Pair &pair) const
577        {
578            pair.key = m_data.GetU32 (hash_data_offset_ptr);
579            // If the key is zero, this terminates our chain of HashData objects
580            // for this hash value.
581            if (pair.key == 0)
582                return eResultEndOfHashData;
583
584            // There definitely should be a string for this string offset, if
585            // there isn't, there is something wrong, return and error
586            const char *strp_cstr = m_string_table.PeekCStr (pair.key);
587            if (strp_cstr == NULL)
588                return eResultError;
589
590            const uint32_t count = m_data.GetU32 (hash_data_offset_ptr);
591            const uint32_t min_total_hash_data_size = count * m_header.header_data.GetMinumumHashDataByteSize();
592            if (count > 0 && m_data.ValidOffsetForDataOfSize (*hash_data_offset_ptr, min_total_hash_data_size))
593            {
594                const bool match = regex.Execute(strp_cstr);
595
596                if (!match && m_header.header_data.HashDataHasFixedByteSize())
597                {
598                    // If the regex doesn't match and we have fixed size data,
599                    // we can just add the total byte size of all HashData objects
600                    // to the hash data offset and be done...
601                    *hash_data_offset_ptr += min_total_hash_data_size;
602                }
603                else
604                {
605                    // If the string does match, or we don't have fixed size data
606                    // then we need to read the hash data as a stream. If the
607                    // string matches we also append all HashData objects to the
608                    // value array.
609                    for (uint32_t i=0; i<count; ++i)
610                    {
611                        DIEInfo die_info;
612                        if (m_header.Read(m_data, hash_data_offset_ptr, die_info))
613                        {
614                            // Only happend the HashData if the string matched...
615                            if (match)
616                                pair.value.push_back (die_info);
617                        }
618                        else
619                        {
620                            // Something went wrong while reading the data
621                            *hash_data_offset_ptr = UINT32_MAX;
622                            return eResultError;
623                        }
624                    }
625                }
626                // Return the correct response depending on if the string matched
627                // or not...
628                if (match)
629                    return eResultKeyMatch;     // The key (cstring) matches and we have lookup results!
630                else
631                    return eResultKeyMismatch;  // The key doesn't match, this function will get called
632                                                // again for the next key/value or the key terminator
633                                                // which in our case is a zero .debug_str offset.
634            }
635            else
636            {
637                *hash_data_offset_ptr = UINT32_MAX;
638                return eResultError;
639            }
640        }
641
642        size_t
643        AppendAllDIEsThatMatchingRegex (const lldb_private::RegularExpression& regex,
644                                        DIEInfoArray &die_info_array) const
645        {
646            const uint32_t hash_count = m_header.hashes_count;
647            Pair pair;
648            for (uint32_t offset_idx=0; offset_idx<hash_count; ++offset_idx)
649            {
650                uint32_t hash_data_offset = GetHashDataOffset (offset_idx);
651                while (hash_data_offset != UINT32_MAX)
652                {
653                    const uint32_t prev_hash_data_offset = hash_data_offset;
654                    Result hash_result = AppendHashDataForRegularExpression (regex, &hash_data_offset, pair);
655                    if (prev_hash_data_offset == hash_data_offset)
656                        break;
657
658                    // Check the result of getting our hash data
659                    switch (hash_result)
660                    {
661                        case eResultKeyMatch:
662                        case eResultKeyMismatch:
663                            // Whether we matches or not, it doesn't matter, we
664                            // keep looking.
665                            break;
666
667                        case eResultEndOfHashData:
668                        case eResultError:
669                            hash_data_offset = UINT32_MAX;
670                            break;
671                    }
672                }
673            }
674            die_info_array.swap (pair.value);
675            return die_info_array.size();
676        }
677
678        size_t
679        AppendAllDIEsInRange (const uint32_t die_offset_start,
680                              const uint32_t die_offset_end,
681                              DIEInfoArray &die_info_array) const
682        {
683            const uint32_t hash_count = m_header.hashes_count;
684            for (uint32_t offset_idx=0; offset_idx<hash_count; ++offset_idx)
685            {
686                bool done = false;
687                uint32_t hash_data_offset = GetHashDataOffset (offset_idx);
688                while (!done && hash_data_offset != UINT32_MAX)
689                {
690                    KeyType key = m_data.GetU32 (&hash_data_offset);
691                    // If the key is zero, this terminates our chain of HashData objects
692                    // for this hash value.
693                    if (key == 0)
694                        break;
695
696                    const uint32_t count = m_data.GetU32 (&hash_data_offset);
697                    for (uint32_t i=0; i<count; ++i)
698                    {
699                        DIEInfo die_info;
700                        if (m_header.Read(m_data, &hash_data_offset, die_info))
701                        {
702                            if (die_info.offset == 0)
703                                done = true;
704                            if (die_offset_start <= die_info.offset && die_info.offset < die_offset_end)
705                                die_info_array.push_back(die_info);
706                        }
707                    }
708                }
709            }
710            return die_info_array.size();
711        }
712
713        size_t
714        FindByName (const char *name, DIEArray &die_offsets)
715        {
716            DIEInfoArray die_info_array;
717            if (FindByName(name, die_info_array))
718                DWARFMappedHash::ExtractDIEArray (die_info_array, die_offsets);
719            return die_info_array.size();
720        }
721
722        size_t
723        FindCompleteObjCClassByName (const char *name, DIEArray &die_offsets)
724        {
725            DIEInfoArray die_info_array;
726            if (FindByName(name, die_info_array))
727            {
728                if (GetHeader().header_data.atoms.size() == 2)
729                {
730                    // If we have two atoms, then we have the DIE offset and
731                    // the type flags so we can find the objective C class
732                    // efficiently.
733                    DWARFMappedHash::ExtractTypesFromDIEArray (die_info_array,
734                                                               UINT32_MAX,
735                                                               eTypeFlagNameIsFullyQualified | eTypeFlagClassIsImplementation,
736                                                               die_offsets);
737                }
738                else
739                {
740                    // WE don't have the type flags, just return everything
741                    DWARFMappedHash::ExtractDIEArray (die_info_array, die_offsets);
742                }
743            }
744            return die_offsets.size();
745        }
746
747        size_t
748        FindByName (const char *name, DIEInfoArray &die_info_array)
749        {
750            Pair kv_pair;
751            size_t old_size = die_info_array.size();
752            if (Find (name, kv_pair))
753            {
754                die_info_array.swap(kv_pair.value);
755                return die_info_array.size() - old_size;
756            }
757            return 0;
758        }
759
760    protected:
761        const lldb_private::DataExtractor &m_data;
762        const lldb_private::DataExtractor &m_string_table;
763        std::string m_name;
764    };
765};
766
767
768#endif  // SymbolFileDWARF_HashedNameToDIE_h_
769