1//===- NaClBitcodeParser.h -----------------------------------*- C++ -*-===//
2//     Low-level bitcode driver to parse PNaCl bitcode files.
3//
4//                     The LLVM Compiler Infrastructure
5//
6// This file is distributed under the University of Illinois Open Source
7// License. See LICENSE.TXT for details.
8//
9//===----------------------------------------------------------------------===//
10//
11// Parses and processes low-level PNaCl bitcode files. Defines class
12// NaClBitcodeParser.
13//
14// The concepts of PNaCl bitcode files are basically the same as for
15// LLVM bitcode files (see http://llvm.org/docs/BitCodeFormat.html for
16// details).
17//
18// The bitstream format is an abstract encoding of structured data,
19// very similar to XML in some ways. Like XML, bitstream files contain
20// tags, and nested structures, and you can parse the file without
21// having to understand the tags. Unlike XML, the bitstream format is
22// a binary encoding, and provides a mechanism for the file to
23// self-describe "abbreviations".  Abbreviations are effectively size
24// optimizations for the content.
25//
26// The bitcode file is conceptually a sequence of "blocks", defining
27// the content. Blocks contain a sequence of records and
28// blocks. Nested content is defined using nested blocks.  A (data)
29// "record" is a tag, and a vector of (unsigned integer) values.
30//
31// Blocks are identified using Block IDs. Each kind of block has a
32// unique block "ID". Records have two elements:
33//
34//   a) A "code" identifying what type of record it is.
35//   b) A vector of "values" defining the contents of the record.
36//
37// The bitstream "reader" (defined in NaClBitstreamReader.h) defines
38// the implementation that converts the low-level bit file into
39// records and blocks. The bit stream is processed by moving a
40// "cursor" over the sequence of bits.
41//
42// The bitstream reader assumes that each block/record is read in by
43// first reading the "entry". The entry defines whether it corresponds
44// to one of the following:
45//
46//    a) At the beginning of a (possibly nested) block
47//    b) At the end of the current block.
48//    c) The input defines an abberviation.
49//    d) The input defines a record.
50//
51// An entry contains two values, a "kind" and an "ID". The kind
52// defines which of the four cases above occurs. The ID provides
53// identifying information on how to further process the input. For
54// case (a), the ID is the identifier associated with the the block
55// being processed. For case (b) and (c) the ID is ignored. For case
56// (d) the ID identifies the abbreviation that should be used to parse
57// the values.
58//
59// The class NaClBitcodeParser defines a bitcode parser that extracts
60// the blocks and records, which are then processed using virtual
61// callbacks. In general, you will want to implement derived classes
62// for each type of block, so that the corresponding data is processed
63// appropriately.
64//
65// The class NaClBitcodeParser parses a bitcode block, and defines a
66// set of callbacks for that block, including:
67//
68//    a) EnterBlock: What to do once we have entered the block.
69//    b) ProcessRecord: What to do with each parsed record.
70//    c) ParseBlock: Parse the (nested) block with the given ID.
71//    d) ExitBlock: What to do once we have finished processing the block.
72//
73// Note that a separate instance of NaClBitcodeParser (or a
74// corresponding derived class) is created for each nested block. Each
75// instance is responsible for only parsing a single block. Method
76// ParseBlock creates new instances to parse nested blocks. Method
77// GetEnclosingParser() can be used to refer to the parser associated
78// with the enclosing block.
79//
80// Currently, the default processing of abbreviations is handled by
81// the PNaCl bitstream reader, rather than by the parser.
82//
83// If you need to process abbreviations processed by the PNaCl
84// bitstream reader, you must explicitly define a
85// NaClBitcodeParserListener to listen (within the bitstream reader),
86// and make appropriate call backs to the NaClBitcodeParser.
87// The listener is glued to parsers using method SetListener.
88//
89// TODO(kschimpf): Define an intermediate derived class of
90// NaClBitcodeParser that defines callbacks based on the actual
91// structure of PNaCl bitcode files.  That is, it has callbacks for
92// each of the types of blocks (i.e. module, types, global variables,
93// function, symbol tables etc). This derivied class can then be used
94// as the base class for the bitcode reader.
95// ===----------------------------------------------------------------------===//
96
97#ifndef LLVM_BITCODE_NACL_NACLBITCODEPARSER_H
98#define LLVM_BITCODE_NACL_NACLBITCODEPARSER_H
99
100#include "llvm/Bitcode/NaCl/NaClBitstreamReader.h"
101#include "llvm/Bitcode/NaCl/NaClBitcodeDefs.h"
102#include "llvm/Support/raw_ostream.h"
103#include <vector>
104
105namespace llvm {
106
107class NaClBitcodeRecord;
108class NaClBitcodeParser;
109class NaClBitcodeParserListener;
110
111// Defines the base class for data extracted from the input bitstream
112// (i.e blocks and records).
113class NaClBitcodeData {
114  void operator=(const NaClBitcodeData&) = delete;
115
116public:
117  /// Create data element to be read from input cursor.
118  explicit NaClBitcodeData(NaClBitstreamCursor &Cursor)
119      : Cursor(Cursor), StartBit(Cursor.GetCurrentBitNo())
120  {}
121
122  /// Create copy of the given data element.
123  explicit NaClBitcodeData(const NaClBitcodeData &Data)
124      : Cursor(Data.Cursor), StartBit(Data.StartBit)
125  {}
126
127  /// Returns the bitstream reader being used.
128  NaClBitstreamReader &GetReader() const {
129    return *Cursor.getBitStreamReader();
130  }
131
132  /// Returns the cursor position within the bitstream.
133  NaClBitstreamCursor &GetCursor() const {
134    return Cursor;
135  }
136
137  /// Returns the number of bits defined by the data.
138  uint64_t GetNumBits() const {
139    return GetCursor().GetCurrentBitNo() - StartBit;
140  }
141
142  /// Returns the first bit of the stream data.
143  uint64_t GetStartBit() const {
144    return StartBit;
145  }
146
147protected:
148  /// Change the start bit for the data to the new value.
149  void SetStartBit(uint64_t NewValue) {
150    StartBit = NewValue;
151  }
152
153private:
154  // The bitstream cursor defining location within the bitcode file.
155  NaClBitstreamCursor &Cursor;
156
157  // Start bit for the record.
158  uint64_t StartBit;
159};
160
161/// Models the block defined by a (begin) block record, through the
162/// (end) block record.
163class NaClBitcodeBlock : public NaClBitcodeData {
164  NaClBitcodeBlock(const NaClBitcodeBlock &) = delete;
165  void operator=(const NaClBitcodeBlock &) = delete;
166
167public:
168  /// Given the found (begin) block record for block BlockID, create
169  /// the corresponding data associated with that block.
170  NaClBitcodeBlock(unsigned BlockID, const NaClBitcodeRecord &Record);
171
172  /// Create block data for block BlockID, using the input cursor.
173  NaClBitcodeBlock(unsigned BlockID, NaClBitstreamCursor &Cursor)
174      : NaClBitcodeData(Cursor),
175        BlockID(BlockID),
176        EnclosingBlock(0)
177  {
178    LocalStartBit = GetStartBit();
179  }
180
181  /// Print the contents out to the given stream.
182  void Print(raw_ostream& os) const;
183
184  /// Returns pointer to the enclosing block.
185  const NaClBitcodeBlock *GetEnclosingBlock() const {
186    return EnclosingBlock;
187  }
188
189  /// Returns the block ID of the block.
190  unsigned GetBlockID() const {
191    return BlockID;
192  }
193
194  /// Returns the number of bits in the block associated with the
195  /// bitcode parser parsing this block, excluding nested blocks.
196  unsigned GetLocalNumBits() const {
197    return GetCursor().GetCurrentBitNo() - LocalStartBit;
198  }
199
200protected:
201  // The block ID associated with this record.
202  unsigned BlockID;
203  // The enclosing block, if defined.
204  const NaClBitcodeBlock *EnclosingBlock;
205  // Start bit for the block, updated to skip nested blocks.
206  uint64_t LocalStartBit;
207
208  // Note: We friend class NaClBitcodeParser, so that it can
209  // update field LocalStartBit.
210  friend class NaClBitcodeParser;
211};
212
213typedef NaClBitcodeRecordVector NaClRecordVector;
214
215class NaClBitcodeRecordData {
216  NaClBitcodeRecordData &operator=(const NaClBitcodeRecordData &) = delete;
217public:
218  NaClBitcodeRecordData(unsigned Code, const NaClRecordVector &Values)
219      : Code(Code), Values(Values) {}
220  explicit NaClBitcodeRecordData(const NaClBitcodeRecordData &Record)
221      : Code(Record.Code), Values(Record.Values) {}
222  NaClBitcodeRecordData() : Code(0) {}
223  // The selector code associated with the record.
224  unsigned Code;
225  // The sequence of values defining the parsed record.
226  NaClRecordVector Values;
227
228  void Print(raw_ostream &strm) const;
229};
230
231inline raw_ostream &operator<<(raw_ostream &Strm,
232                               const NaClBitcodeRecordData &Data) {
233  Data.Print(Strm);
234  return Strm;
235}
236
237/// Simple container class to convert the values of the corresponding
238/// read record to a simpler form, only containing values.
239struct NaClBitcodeValues {
240public:
241  NaClBitcodeValues(const NaClBitcodeRecordData &Record)
242      : Record(Record) {}
243
244  size_t size() const {
245    return Record.Values.size()+1;
246  }
247
248  uint64_t operator[](size_t index) const {
249    return index == 0 ? Record.Code : Record.Values[index-1];
250  }
251
252private:
253  const NaClBitcodeRecordData &Record;
254};
255
256/// Defines the data associated with reading a block record in the
257/// PNaCl bitcode stream.
258class NaClBitcodeRecord : public NaClBitcodeData {
259public:
260  /// Type for vector of values representing a record.
261  typedef NaClRecordVector RecordVector;
262
263  /// Creates a bitcode record, starting at the position defined
264  /// by cursor.
265  explicit NaClBitcodeRecord(const NaClBitcodeBlock &Block)
266      : NaClBitcodeData(Block.GetCursor()),
267        Block(Block)
268  {}
269
270  /// Print the contents out to the given stream.
271  void Print(raw_ostream& os) const;
272
273  /// The block the record appears in.
274  const NaClBitcodeBlock &GetBlock() const {
275    return Block;
276  }
277
278  /// Returns the block ID associated with the record.
279  unsigned GetBlockID() const {
280    return Block.GetBlockID();
281  }
282
283  /// Returns the kind of entry read from the input stream.
284  unsigned GetEntryKind() const {
285    return Entry.Kind;
286  }
287
288  /// Returns the code value (i.e. selector) associated with the
289  /// record.
290  unsigned GetCode() const {
291    return Data.Code;
292  }
293
294  /// Returns the EntryID (e.g. abbreviation if !=
295  /// naclbitc::UNABBREV_RECORD) associated with the record. Note:
296  /// for block-enter, block-exit, and define-abbreviation, EntryID is
297  /// not the corresponding abbreviation.
298  unsigned GetEntryID() const {
299    return Entry.ID;
300  }
301
302  /// Returns the (value) record associated with the read record.
303  const RecordVector &GetValues() const {
304    return Data.Values;
305  }
306
307  /// Allows lower level access to data representing record.
308  const NaClBitcodeRecordData &GetRecordData() const {
309    return Data;
310  }
311
312  /// Returns true if the record was read using an abbreviation.
313  bool UsedAnAbbreviation() const {
314    return GetEntryKind() == NaClBitstreamEntry::Record &&
315        GetEntryID() != naclbitc::UNABBREV_RECORD;
316  }
317
318  /// Returns the abbrevation index used to read the record.
319  /// Returns naclbitc::UNABBREV_RECORD if not applicable.
320  unsigned GetAbbreviationIndex() const {
321    return UsedAnAbbreviation()
322        ? GetEntryID() : static_cast<unsigned>(naclbitc::UNABBREV_RECORD);
323  }
324
325  /// Destructively change the abbreviation ID to the given value.
326  void SetAbbreviationIndex(unsigned Index) {
327    Entry.ID = Index;
328  }
329
330protected:
331  // The block associated with the record.
332  const NaClBitcodeBlock &Block;
333  // The data of the record.
334  NaClBitcodeRecordData Data;
335  // The entry (i.e. value(s) preceding the record that define what
336  // value comes next).
337  NaClBitstreamEntry Entry;
338
339private:
340  // Allows class NaClBitcodeParser to read values into the
341  // record, thereby hiding the details of how to read values.
342  friend class NaClBitcodeParser;
343  friend class NaClBitcodeParserListener;
344
345  /// Read bitstream entry. Defines what construct appears next in the
346  /// bitstream.
347  void ReadEntry() {
348    SetStartBit(GetCursor().GetCurrentBitNo());
349    Entry = GetCursor().
350        advance(NaClBitstreamCursor::AF_DontAutoprocessAbbrevs, 0);
351  }
352
353  /// Reads in a record's values, if the entry defines a record (Must
354  /// be called after ReadEntry).
355  void ReadValues() {
356    Data.Values.clear();
357    Data.Code = GetCursor().readRecord(Entry.ID, Data.Values);
358  }
359
360  NaClBitcodeRecord(const NaClBitcodeRecord &Rcd) = delete;
361  void operator=(const NaClBitcodeRecord &Rcd) = delete;
362};
363
364inline raw_ostream &operator<<(raw_ostream &Strm,
365                               const NaClBitcodeRecord &Record) {
366  Record.Print(Strm);
367  return Strm;
368}
369
370/// Defines a listener to handle abbreviations within a bitcode file.
371/// In particular, abbreviations and the BlockInfo block are made more
372/// explicit, and then sent to the parser through virtuals
373/// ProcessAbbreviation and SetBID.
374class NaClBitcodeParserListener : public NaClAbbrevListener {
375  friend class NaClBitcodeParser;
376public:
377  // Constructs a listener for the given parser.  Note: All nested
378  // parsers automatically inherit this listener.
379  NaClBitcodeParserListener(NaClBitcodeParser *Parser)
380      : Parser(Parser), GlobalBlockID(naclbitc::BLOCKINFO_BLOCK_ID) {
381  }
382
383  virtual ~NaClBitcodeParserListener() {}
384
385private:
386  virtual void BeginBlockInfoBlock(unsigned NumWords);
387
388  virtual void SetBID();
389
390  virtual void EndBlockInfoBlock();
391
392  virtual void ProcessAbbreviation(NaClBitCodeAbbrev *Abbrev,
393                                   bool IsLocal);
394
395  /// The block parser currently being listened to.
396  NaClBitcodeParser *Parser;
397
398  /// The block ID to use if a global abbreviation. Note: This field is
399  /// updated by calls to method SetBID.
400  unsigned GlobalBlockID;
401};
402
403/// Parses a block in the PNaCl bitcode stream.
404class NaClBitcodeParser {
405  // Allow listener privledges, so that it can update/call the parser
406  // using a clean API.
407  friend class NaClBitcodeParserListener;
408
409  // Implements an error handler for errors in the bitstream reader.
410  // Redirects bitstream reader errors to corresponding parrser error
411  // reporting function.
412  class ErrorHandler : public NaClBitstreamCursor::ErrorHandler {
413    NaClBitcodeParser *Parser;
414  public:
415    ErrorHandler(NaClBitcodeParser *Parser,
416                 NaClBitstreamCursor &Cursor):
417        NaClBitstreamCursor::ErrorHandler(Cursor), Parser(Parser) {}
418    LLVM_ATTRIBUTE_NORETURN
419    void Fatal(const std::string &ErrorMessage) const final {
420      Parser->FatalAt(getCurrentBitNo(), ErrorMessage);
421      llvm_unreachable("GCC treats noreturn virtual functions as returning");
422    }
423    ~ErrorHandler() override {}
424  };
425
426public:
427  // Creates a parser to parse the the block at the given cursor in
428  // the PNaCl bitcode stream. This instance is a "dummy" instance
429  // that starts the parser.
430  explicit NaClBitcodeParser(NaClBitstreamCursor &Cursor)
431      : EnclosingParser(0),
432        Block(ILLEGAL_BLOCK_ID, Cursor),
433        Record(Block),
434        Listener(0),
435        ErrStream(&errs()) {
436    std::unique_ptr<NaClBitstreamCursor::ErrorHandler>
437        ErrHandler(new ErrorHandler(this, Cursor));
438    Cursor.setErrorHandler(ErrHandler);
439  }
440
441  virtual ~NaClBitcodeParser();
442
443  /// Reads the (top-level) block associated with the given block
444  /// record at the stream cursor. Returns true if unable to parse.
445  /// Can be called multiple times to parse multiple blocks.
446  bool Parse();
447
448  // Called once the bitstream reader has entered the corresponding
449  // subblock.  Argument NumWords is set to the number of words in the
450  // corresponding subblock.
451  virtual void EnterBlock(unsigned /*NumWords*/) {}
452
453  // Called when the corresponding EndBlock of the block being parsed
454  // is found.
455  virtual void ExitBlock() {}
456
457  // Called after each record (within the block) is read (into field Record).
458  virtual void ProcessRecord() {}
459
460  // Called if a SetBID record is encountered in the BlockInfo block,
461  // and the parser has a listener.
462  virtual void SetBID() {}
463
464  // Called to process an abbreviation if the parser has a listener.
465  virtual void ProcessAbbreviation(unsigned /*BlockID*/,
466                                   NaClBitCodeAbbrev * /*Abbrev*/,
467                                   bool /*IsLocal*/) {}
468
469  // Creates an instance of the NaClBitcodeParser to use to parse the
470  // block with the given block ID, and then call's method
471  // ParseThisBlock() to parse the corresponding block. Note:
472  // Each derived class should define it's own version of this
473  // method, following the pattern below.
474  virtual bool ParseBlock(unsigned BlockID) {
475    // Default implementation just builds a parser that does nothing.
476    NaClBitcodeParser Parser(BlockID, this);
477    return Parser.ParseThisBlock();
478  }
479
480  // Changes the stream to print errors to, and returns the old error stream.
481  // There are two use cases:
482  // 1) To change (from the default errs()) inside the constructor of the
483  //    derived class. In this context, it will be used for all error
484  //    messages for the derived class.
485  // 2) Temporarily modify it for a single error message.
486  raw_ostream &setErrStream(raw_ostream &Stream) {
487    raw_ostream &OldErrStream = *ErrStream;
488    ErrStream = &Stream;
489    return OldErrStream;
490  }
491
492  // Called when an error occurs. BitPosition is the bit position the
493  // error was found, and Message is the error to report. Always
494  // returns true (the error return value of Parse). Level is
495  // the severity of the error.
496  virtual bool ErrorAt(naclbitc::ErrorLevel Level, uint64_t BitPosition,
497                       const std::string &Message);
498
499  bool ErrorAt(uint64_t BitPosition, const std::string &Message) {
500    return ErrorAt(naclbitc::Error, BitPosition, Message);
501  }
502
503  // Called when an error occurs. Message is the error to
504  // report. Always returns true (the error return value of Parse).
505  bool Error(const std::string &Message) {
506    return ErrorAt(Record.GetStartBit(), Message);
507  }
508
509  // Called when a fatal error occurs. BitPosition is the bit position
510  // the error was found, and Message is the error to report. Does not
511  // return.
512  LLVM_ATTRIBUTE_NORETURN
513  void FatalAt(uint64_t BitPosition, const std::string &Message) {
514    ErrorAt(naclbitc::Fatal, BitPosition, Message);
515    llvm_unreachable("Fatal errors should not return");
516  }
517
518  // Called when a fatal error occurs. Message is the error to
519  // report. Does not return.
520  LLVM_ATTRIBUTE_NORETURN
521  void Fatal(const std::string &Message) {
522    FatalAt(Record.GetStartBit(), Message);
523    llvm_unreachable("GCC treats noreturn virtual functions as returning");
524  }
525
526  // Generates fatal generic error message.
527  LLVM_ATTRIBUTE_NORETURN
528  void Fatal() {
529    Fatal("Fatal error occurred!");
530  }
531
532  // Returns the number of bits in this block, including nested blocks.
533  unsigned GetBlockNumBits() const {
534    return Block.GetNumBits();
535  }
536
537  // Returns the number of bits in this block, excluding nested blocks.
538  unsigned GetBlockLocalNumBits() const {
539    return Block.GetLocalNumBits();
540  }
541
542  /// Returns the block ID associated with the Parser.
543  unsigned GetBlockID() const {
544    return Block.GetBlockID();
545  }
546
547  NaClBitcodeBlock &GetBlock() {
548    return Block;
549  }
550
551  /// Returns the enclosing parser of this block.
552  NaClBitcodeParser *GetEnclosingParser() const {
553    // Note: The top-level parser instance is a dummy instance
554    // and is not considered an enclosing parser.
555    return EnclosingParser->EnclosingParser ? EnclosingParser : 0;
556  }
557
558  // Parses the block using the parser defined by
559  // ParseBlock(unsigned).  Returns true if unable to parse the
560  // block. Note: Should only be called by virtual ParseBlock(unsigned).
561  bool ParseThisBlock() {
562    bool Results;
563    if (Listener) {
564      NaClBitcodeParser *CallingParser = Listener->Parser;
565      Listener->Parser = this;
566      Results = ParseThisBlockInternal();
567      Listener->Parser = CallingParser;
568    } else {
569      Results = ParseThisBlockInternal();
570    }
571    return Results;
572  }
573
574  /// Skips the current block, assuming the parser is at the beginning
575  /// of the block. That is, Record.GetEntryKind() equals
576  /// NaClBitstreamEntry::SubBlock. Returns false if
577  /// successful. Otherwise returns 1.
578  bool SkipBlock() {
579    if (Record.GetEntryKind() != NaClBitstreamEntry::SubBlock)
580      return Error("SkipBlock on non-block record");
581    return Record.GetCursor().SkipBlock();
582  }
583
584protected:
585  // The containing parser.
586  NaClBitcodeParser *EnclosingParser;
587
588  // The block the parser is associated with.
589  NaClBitcodeBlock Block;
590
591  // The current record (within the block) being processed.
592  NaClBitcodeRecord Record;
593
594  // The listener (if any) to use.
595  NaClBitcodeParserListener *Listener;
596
597  // The error stream to use if non-null (uses errs() if null).
598  raw_ostream *ErrStream;
599
600  // Creates a block parser to parse the block associated with the bitcode entry
601  // that defines the beginning of a block. This instance actually parses the
602  // corresponding block. Inherits the bitstream cursor from the
603  // EnclosingParser.
604  NaClBitcodeParser(unsigned BlockID, NaClBitcodeParser *EnclosingParser)
605      : EnclosingParser(EnclosingParser),
606        Block(BlockID, EnclosingParser->Record),
607        Record(Block),
608        Listener(EnclosingParser->Listener),
609        ErrStream(EnclosingParser->ErrStream)
610  {}
611
612  // Same as above, but use the supplied bitstream cursor (instead of
613  // inheriting from the enclosing parser). This constructor allows
614  // parallel parsing of subblocks, by allowing the caller to generate
615  // a different Cursor for each block to be parsed in parallel.
616  NaClBitcodeParser(unsigned BlockID, NaClBitcodeParser *EnclosingParser,
617                    NaClBitstreamCursor &Cursor)
618      : EnclosingParser(EnclosingParser),
619        Block(BlockID, Cursor),
620        Record(Block),
621        Listener(EnclosingParser->Listener),
622        ErrStream(EnclosingParser->ErrStream)
623  {}
624
625  /// Defines the listener for this block, and all enclosing blocks,
626  /// to be the given listener. Should be set in the constructor.
627  void SetListener(NaClBitcodeParserListener* UseListener) {
628    Listener = UseListener;
629  }
630
631private:
632  // Special constant identifying the top-level instance.
633  static const unsigned ILLEGAL_BLOCK_ID = UINT_MAX;
634
635  // Parses the block. Returns true if unable to parse the
636  // block. Note: Should only be called by virtual ParseThisBlock.
637  bool ParseThisBlockInternal() {
638    bool Results;
639    if (GetBlockID() == naclbitc::BLOCKINFO_BLOCK_ID) {
640      Results = ParseBlockInfoInternal();
641    } else {
642      Results = ParseBlockInternal();
643      ExitBlock();
644    }
645    return Results;
646  }
647
648  // Parses a BlockInfo block, where processing is handled through
649  // a listener in the bitstream reader.
650  bool ParseBlockInfoInternal();
651
652  // Parses the non-BlockInfo block. Returns true if unable to parse the
653  // block.
654  bool ParseBlockInternal();
655
656  void operator=(const NaClBitcodeParser &Parser) = delete;
657  NaClBitcodeParser(const NaClBitcodeParser &Parser) = delete;
658
659};
660
661}  // namespace llvm
662
663#endif
664