1//===------------ ARMDecoderEmitter.cpp - Decoder Generator ---------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file is part of the ARM Disassembler.
11// It contains the tablegen backend that emits the decoder functions for ARM and
12// Thumb.  The disassembler core includes the auto-generated file, invokes the
13// decoder functions, and builds up the MCInst based on the decoded Opcode.
14//
15//===----------------------------------------------------------------------===//
16
17#define DEBUG_TYPE "arm-decoder-emitter"
18
19#include "ARMDecoderEmitter.h"
20#include "CodeGenTarget.h"
21#include "llvm/ADT/StringExtras.h"
22#include "llvm/Support/Debug.h"
23#include "llvm/Support/raw_ostream.h"
24#include "llvm/TableGen/Record.h"
25
26#include <vector>
27#include <map>
28#include <string>
29
30using namespace llvm;
31
32/////////////////////////////////////////////////////
33//                                                 //
34//  Enums and Utilities for ARM Instruction Format //
35//                                                 //
36/////////////////////////////////////////////////////
37
38#define ARM_FORMATS                   \
39  ENTRY(ARM_FORMAT_PSEUDO,         0) \
40  ENTRY(ARM_FORMAT_MULFRM,         1) \
41  ENTRY(ARM_FORMAT_BRFRM,          2) \
42  ENTRY(ARM_FORMAT_BRMISCFRM,      3) \
43  ENTRY(ARM_FORMAT_DPFRM,          4) \
44  ENTRY(ARM_FORMAT_DPSOREGREGFRM,     5) \
45  ENTRY(ARM_FORMAT_LDFRM,          6) \
46  ENTRY(ARM_FORMAT_STFRM,          7) \
47  ENTRY(ARM_FORMAT_LDMISCFRM,      8) \
48  ENTRY(ARM_FORMAT_STMISCFRM,      9) \
49  ENTRY(ARM_FORMAT_LDSTMULFRM,    10) \
50  ENTRY(ARM_FORMAT_LDSTEXFRM,     11) \
51  ENTRY(ARM_FORMAT_ARITHMISCFRM,  12) \
52  ENTRY(ARM_FORMAT_SATFRM,        13) \
53  ENTRY(ARM_FORMAT_EXTFRM,        14) \
54  ENTRY(ARM_FORMAT_VFPUNARYFRM,   15) \
55  ENTRY(ARM_FORMAT_VFPBINARYFRM,  16) \
56  ENTRY(ARM_FORMAT_VFPCONV1FRM,   17) \
57  ENTRY(ARM_FORMAT_VFPCONV2FRM,   18) \
58  ENTRY(ARM_FORMAT_VFPCONV3FRM,   19) \
59  ENTRY(ARM_FORMAT_VFPCONV4FRM,   20) \
60  ENTRY(ARM_FORMAT_VFPCONV5FRM,   21) \
61  ENTRY(ARM_FORMAT_VFPLDSTFRM,    22) \
62  ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 23) \
63  ENTRY(ARM_FORMAT_VFPMISCFRM,    24) \
64  ENTRY(ARM_FORMAT_THUMBFRM,      25) \
65  ENTRY(ARM_FORMAT_MISCFRM,       26) \
66  ENTRY(ARM_FORMAT_NEONGETLNFRM,  27) \
67  ENTRY(ARM_FORMAT_NEONSETLNFRM,  28) \
68  ENTRY(ARM_FORMAT_NEONDUPFRM,    29) \
69  ENTRY(ARM_FORMAT_NLdSt,         30) \
70  ENTRY(ARM_FORMAT_N1RegModImm,   31) \
71  ENTRY(ARM_FORMAT_N2Reg,         32) \
72  ENTRY(ARM_FORMAT_NVCVT,         33) \
73  ENTRY(ARM_FORMAT_NVecDupLn,     34) \
74  ENTRY(ARM_FORMAT_N2RegVecShL,   35) \
75  ENTRY(ARM_FORMAT_N2RegVecShR,   36) \
76  ENTRY(ARM_FORMAT_N3Reg,         37) \
77  ENTRY(ARM_FORMAT_N3RegVecSh,    38) \
78  ENTRY(ARM_FORMAT_NVecExtract,   39) \
79  ENTRY(ARM_FORMAT_NVecMulScalar, 40) \
80  ENTRY(ARM_FORMAT_NVTBL,         41) \
81  ENTRY(ARM_FORMAT_DPSOREGIMMFRM, 42)
82
83// ARM instruction format specifies the encoding used by the instruction.
84#define ENTRY(n, v) n = v,
85typedef enum {
86  ARM_FORMATS
87  ARM_FORMAT_NA
88} ARMFormat;
89#undef ENTRY
90
91// Converts enum to const char*.
92static const char *stringForARMFormat(ARMFormat form) {
93#define ENTRY(n, v) case n: return #n;
94  switch(form) {
95    ARM_FORMATS
96  case ARM_FORMAT_NA:
97  default:
98    return "";
99  }
100#undef ENTRY
101}
102
103enum {
104  IndexModeNone = 0,
105  IndexModePre  = 1,
106  IndexModePost = 2,
107  IndexModeUpd  = 3
108};
109
110/////////////////////////
111//                     //
112//  Utility functions  //
113//                     //
114/////////////////////////
115
116/// byteFromBitsInit - Return the byte value from a BitsInit.
117/// Called from getByteField().
118static uint8_t byteFromBitsInit(BitsInit &init) {
119  int width = init.getNumBits();
120
121  assert(width <= 8 && "Field is too large for uint8_t!");
122
123  int index;
124  uint8_t mask = 0x01;
125
126  uint8_t ret = 0;
127
128  for (index = 0; index < width; index++) {
129    if (static_cast<BitInit*>(init.getBit(index))->getValue())
130      ret |= mask;
131
132    mask <<= 1;
133  }
134
135  return ret;
136}
137
138static uint8_t getByteField(const Record &def, const char *str) {
139  BitsInit *bits = def.getValueAsBitsInit(str);
140  return byteFromBitsInit(*bits);
141}
142
143static BitsInit &getBitsField(const Record &def, const char *str) {
144  BitsInit *bits = def.getValueAsBitsInit(str);
145  return *bits;
146}
147
148/// sameStringExceptSuffix - Return true if the two strings differ only in RHS's
149/// suffix.  ("VST4d8", "VST4d8_UPD", "_UPD") as input returns true.
150static
151bool sameStringExceptSuffix(const StringRef LHS, const StringRef RHS,
152                            const StringRef Suffix) {
153
154  if (RHS.startswith(LHS) && RHS.endswith(Suffix))
155    return RHS.size() == LHS.size() + Suffix.size();
156
157  return false;
158}
159
160/// thumbInstruction - Determine whether we have a Thumb instruction.
161/// See also ARMInstrFormats.td.
162static bool thumbInstruction(uint8_t Form) {
163  return Form == ARM_FORMAT_THUMBFRM;
164}
165
166// The set (BIT_TRUE, BIT_FALSE, BIT_UNSET) represents a ternary logic system
167// for a bit value.
168//
169// BIT_UNFILTERED is used as the init value for a filter position.  It is used
170// only for filter processings.
171typedef enum {
172  BIT_TRUE,      // '1'
173  BIT_FALSE,     // '0'
174  BIT_UNSET,     // '?'
175  BIT_UNFILTERED // unfiltered
176} bit_value_t;
177
178static bool ValueSet(bit_value_t V) {
179  return (V == BIT_TRUE || V == BIT_FALSE);
180}
181static bool ValueNotSet(bit_value_t V) {
182  return (V == BIT_UNSET);
183}
184static int Value(bit_value_t V) {
185  return ValueNotSet(V) ? -1 : (V == BIT_FALSE ? 0 : 1);
186}
187static bit_value_t bitFromBits(BitsInit &bits, unsigned index) {
188  if (BitInit *bit = dynamic_cast<BitInit*>(bits.getBit(index)))
189    return bit->getValue() ? BIT_TRUE : BIT_FALSE;
190
191  // The bit is uninitialized.
192  return BIT_UNSET;
193}
194// Prints the bit value for each position.
195static void dumpBits(raw_ostream &o, BitsInit &bits) {
196  unsigned index;
197
198  for (index = bits.getNumBits(); index > 0; index--) {
199    switch (bitFromBits(bits, index - 1)) {
200    case BIT_TRUE:
201      o << "1";
202      break;
203    case BIT_FALSE:
204      o << "0";
205      break;
206    case BIT_UNSET:
207      o << "_";
208      break;
209    default:
210      assert(0 && "unexpected return value from bitFromBits");
211    }
212  }
213}
214
215// Enums for the available target names.
216typedef enum {
217  TARGET_ARM = 0,
218  TARGET_THUMB
219} TARGET_NAME_t;
220
221// FIXME: Possibly auto-detected?
222#define BIT_WIDTH 32
223
224// Forward declaration.
225class ARMFilterChooser;
226
227// Representation of the instruction to work on.
228typedef bit_value_t insn_t[BIT_WIDTH];
229
230/// Filter - Filter works with FilterChooser to produce the decoding tree for
231/// the ISA.
232///
233/// It is useful to think of a Filter as governing the switch stmts of the
234/// decoding tree in a certain level.  Each case stmt delegates to an inferior
235/// FilterChooser to decide what further decoding logic to employ, or in another
236/// words, what other remaining bits to look at.  The FilterChooser eventually
237/// chooses a best Filter to do its job.
238///
239/// This recursive scheme ends when the number of Opcodes assigned to the
240/// FilterChooser becomes 1 or if there is a conflict.  A conflict happens when
241/// the Filter/FilterChooser combo does not know how to distinguish among the
242/// Opcodes assigned.
243///
244/// An example of a conflict is
245///
246/// Conflict:
247///                     111101000.00........00010000....
248///                     111101000.00........0001........
249///                     1111010...00........0001........
250///                     1111010...00....................
251///                     1111010.........................
252///                     1111............................
253///                     ................................
254///     VST4q8a         111101000_00________00010000____
255///     VST4q8b         111101000_00________00010000____
256///
257/// The Debug output shows the path that the decoding tree follows to reach the
258/// the conclusion that there is a conflict.  VST4q8a is a vst4 to double-spaced
259/// even registers, while VST4q8b is a vst4 to double-spaced odd regsisters.
260///
261/// The encoding info in the .td files does not specify this meta information,
262/// which could have been used by the decoder to resolve the conflict.  The
263/// decoder could try to decode the even/odd register numbering and assign to
264/// VST4q8a or VST4q8b, but for the time being, the decoder chooses the "a"
265/// version and return the Opcode since the two have the same Asm format string.
266class ARMFilter {
267protected:
268  ARMFilterChooser *Owner; // points to the FilterChooser who owns this filter
269  unsigned StartBit; // the starting bit position
270  unsigned NumBits; // number of bits to filter
271  bool Mixed; // a mixed region contains both set and unset bits
272
273  // Map of well-known segment value to the set of uid's with that value.
274  std::map<uint64_t, std::vector<unsigned> > FilteredInstructions;
275
276  // Set of uid's with non-constant segment values.
277  std::vector<unsigned> VariableInstructions;
278
279  // Map of well-known segment value to its delegate.
280  std::map<unsigned, ARMFilterChooser*> FilterChooserMap;
281
282  // Number of instructions which fall under FilteredInstructions category.
283  unsigned NumFiltered;
284
285  // Keeps track of the last opcode in the filtered bucket.
286  unsigned LastOpcFiltered;
287
288  // Number of instructions which fall under VariableInstructions category.
289  unsigned NumVariable;
290
291public:
292  unsigned getNumFiltered() { return NumFiltered; }
293  unsigned getNumVariable() { return NumVariable; }
294  unsigned getSingletonOpc() {
295    assert(NumFiltered == 1);
296    return LastOpcFiltered;
297  }
298  // Return the filter chooser for the group of instructions without constant
299  // segment values.
300  ARMFilterChooser &getVariableFC() {
301    assert(NumFiltered == 1);
302    assert(FilterChooserMap.size() == 1);
303    return *(FilterChooserMap.find((unsigned)-1)->second);
304  }
305
306  ARMFilter(const ARMFilter &f);
307  ARMFilter(ARMFilterChooser &owner, unsigned startBit, unsigned numBits,
308            bool mixed);
309
310  ~ARMFilter();
311
312  // Divides the decoding task into sub tasks and delegates them to the
313  // inferior FilterChooser's.
314  //
315  // A special case arises when there's only one entry in the filtered
316  // instructions.  In order to unambiguously decode the singleton, we need to
317  // match the remaining undecoded encoding bits against the singleton.
318  void recurse();
319
320  // Emit code to decode instructions given a segment or segments of bits.
321  void emit(raw_ostream &o, unsigned &Indentation);
322
323  // Returns the number of fanout produced by the filter.  More fanout implies
324  // the filter distinguishes more categories of instructions.
325  unsigned usefulness() const;
326}; // End of class Filter
327
328// These are states of our finite state machines used in FilterChooser's
329// filterProcessor() which produces the filter candidates to use.
330typedef enum {
331  ATTR_NONE,
332  ATTR_FILTERED,
333  ATTR_ALL_SET,
334  ATTR_ALL_UNSET,
335  ATTR_MIXED
336} bitAttr_t;
337
338/// ARMFilterChooser - FilterChooser chooses the best filter among a set of Filters
339/// in order to perform the decoding of instructions at the current level.
340///
341/// Decoding proceeds from the top down.  Based on the well-known encoding bits
342/// of instructions available, FilterChooser builds up the possible Filters that
343/// can further the task of decoding by distinguishing among the remaining
344/// candidate instructions.
345///
346/// Once a filter has been chosen, it is called upon to divide the decoding task
347/// into sub-tasks and delegates them to its inferior FilterChoosers for further
348/// processings.
349///
350/// It is useful to think of a Filter as governing the switch stmts of the
351/// decoding tree.  And each case is delegated to an inferior FilterChooser to
352/// decide what further remaining bits to look at.
353class ARMFilterChooser {
354  static TARGET_NAME_t TargetName;
355
356protected:
357  friend class ARMFilter;
358
359  // Vector of codegen instructions to choose our filter.
360  const std::vector<const CodeGenInstruction*> &AllInstructions;
361
362  // Vector of uid's for this filter chooser to work on.
363  const std::vector<unsigned> Opcodes;
364
365  // Vector of candidate filters.
366  std::vector<ARMFilter> Filters;
367
368  // Array of bit values passed down from our parent.
369  // Set to all BIT_UNFILTERED's for Parent == NULL.
370  bit_value_t FilterBitValues[BIT_WIDTH];
371
372  // Links to the FilterChooser above us in the decoding tree.
373  ARMFilterChooser *Parent;
374
375  // Index of the best filter from Filters.
376  int BestIndex;
377
378public:
379  static void setTargetName(TARGET_NAME_t tn) { TargetName = tn; }
380
381  ARMFilterChooser(const ARMFilterChooser &FC) :
382      AllInstructions(FC.AllInstructions), Opcodes(FC.Opcodes),
383      Filters(FC.Filters), Parent(FC.Parent), BestIndex(FC.BestIndex) {
384    memcpy(FilterBitValues, FC.FilterBitValues, sizeof(FilterBitValues));
385  }
386
387  ARMFilterChooser(const std::vector<const CodeGenInstruction*> &Insts,
388                const std::vector<unsigned> &IDs) :
389      AllInstructions(Insts), Opcodes(IDs), Filters(), Parent(NULL),
390      BestIndex(-1) {
391    for (unsigned i = 0; i < BIT_WIDTH; ++i)
392      FilterBitValues[i] = BIT_UNFILTERED;
393
394    doFilter();
395  }
396
397  ARMFilterChooser(const std::vector<const CodeGenInstruction*> &Insts,
398                   const std::vector<unsigned> &IDs,
399                   bit_value_t (&ParentFilterBitValues)[BIT_WIDTH],
400                   ARMFilterChooser &parent) :
401      AllInstructions(Insts), Opcodes(IDs), Filters(), Parent(&parent),
402      BestIndex(-1) {
403    for (unsigned i = 0; i < BIT_WIDTH; ++i)
404      FilterBitValues[i] = ParentFilterBitValues[i];
405
406    doFilter();
407  }
408
409  // The top level filter chooser has NULL as its parent.
410  bool isTopLevel() { return Parent == NULL; }
411
412  // This provides an opportunity for target specific code emission.
413  void emitTopHook(raw_ostream &o);
414
415  // Emit the top level typedef and decodeInstruction() function.
416  void emitTop(raw_ostream &o, unsigned &Indentation);
417
418  // This provides an opportunity for target specific code emission after
419  // emitTop().
420  void emitBot(raw_ostream &o, unsigned &Indentation);
421
422protected:
423  // Populates the insn given the uid.
424  void insnWithID(insn_t &Insn, unsigned Opcode) const {
425    if (AllInstructions[Opcode]->isPseudo)
426      return;
427
428    BitsInit &Bits = getBitsField(*AllInstructions[Opcode]->TheDef, "Inst");
429
430    for (unsigned i = 0; i < BIT_WIDTH; ++i)
431      Insn[i] = bitFromBits(Bits, i);
432
433    // Set Inst{21} to 1 (wback) when IndexModeBits == IndexModeUpd.
434    Record *R = AllInstructions[Opcode]->TheDef;
435    if (R->getValue("IndexModeBits") &&
436        getByteField(*R, "IndexModeBits") == IndexModeUpd)
437      Insn[21] = BIT_TRUE;
438  }
439
440  // Returns the record name.
441  const std::string &nameWithID(unsigned Opcode) const {
442    return AllInstructions[Opcode]->TheDef->getName();
443  }
444
445  // Populates the field of the insn given the start position and the number of
446  // consecutive bits to scan for.
447  //
448  // Returns false if there exists any uninitialized bit value in the range.
449  // Returns true, otherwise.
450  bool fieldFromInsn(uint64_t &Field, insn_t &Insn, unsigned StartBit,
451      unsigned NumBits) const;
452
453  /// dumpFilterArray - dumpFilterArray prints out debugging info for the given
454  /// filter array as a series of chars.
455  void dumpFilterArray(raw_ostream &o, bit_value_t (&filter)[BIT_WIDTH]);
456
457  /// dumpStack - dumpStack traverses the filter chooser chain and calls
458  /// dumpFilterArray on each filter chooser up to the top level one.
459  void dumpStack(raw_ostream &o, const char *prefix);
460
461  ARMFilter &bestFilter() {
462    assert(BestIndex != -1 && "BestIndex not set");
463    return Filters[BestIndex];
464  }
465
466  // Called from Filter::recurse() when singleton exists.  For debug purpose.
467  void SingletonExists(unsigned Opc);
468
469  bool PositionFiltered(unsigned i) {
470    return ValueSet(FilterBitValues[i]);
471  }
472
473  // Calculates the island(s) needed to decode the instruction.
474  // This returns a lit of undecoded bits of an instructions, for example,
475  // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
476  // decoded bits in order to verify that the instruction matches the Opcode.
477  unsigned getIslands(std::vector<unsigned> &StartBits,
478      std::vector<unsigned> &EndBits, std::vector<uint64_t> &FieldVals,
479      insn_t &Insn);
480
481  // The purpose of this function is for the API client to detect possible
482  // Load/Store Coprocessor instructions.  If the coprocessor number is of
483  // the instruction is either 10 or 11, the decoder should not report the
484  // instruction as LDC/LDC2/STC/STC2, but should match against Advanced SIMD or
485  // VFP instructions.
486  bool LdStCopEncoding1(unsigned Opc) {
487    const std::string &Name = nameWithID(Opc);
488    if (Name == "LDC_OFFSET" || Name == "LDC_OPTION" ||
489        Name == "LDC_POST" || Name == "LDC_PRE" ||
490        Name == "LDCL_OFFSET" || Name == "LDCL_OPTION" ||
491        Name == "LDCL_POST" || Name == "LDCL_PRE" ||
492        Name == "STC_OFFSET" || Name == "STC_OPTION" ||
493        Name == "STC_POST" || Name == "STC_PRE" ||
494        Name == "STCL_OFFSET" || Name == "STCL_OPTION" ||
495        Name == "STCL_POST" || Name == "STCL_PRE")
496      return true;
497    else
498      return false;
499  }
500
501  // Emits code to decode the singleton.  Return true if we have matched all the
502  // well-known bits.
503  bool emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,unsigned Opc);
504
505  // Emits code to decode the singleton, and then to decode the rest.
506  void emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
507                            ARMFilter &Best);
508
509  // Assign a single filter and run with it.
510  void runSingleFilter(ARMFilterChooser &owner, unsigned startBit,
511                       unsigned numBit, bool mixed);
512
513  // reportRegion is a helper function for filterProcessor to mark a region as
514  // eligible for use as a filter region.
515  void reportRegion(bitAttr_t RA, unsigned StartBit, unsigned BitIndex,
516      bool AllowMixed);
517
518  // FilterProcessor scans the well-known encoding bits of the instructions and
519  // builds up a list of candidate filters.  It chooses the best filter and
520  // recursively descends down the decoding tree.
521  bool filterProcessor(bool AllowMixed, bool Greedy = true);
522
523  // Decides on the best configuration of filter(s) to use in order to decode
524  // the instructions.  A conflict of instructions may occur, in which case we
525  // dump the conflict set to the standard error.
526  void doFilter();
527
528  // Emits code to decode our share of instructions.  Returns true if the
529  // emitted code causes a return, which occurs if we know how to decode
530  // the instruction at this level or the instruction is not decodeable.
531  bool emit(raw_ostream &o, unsigned &Indentation);
532};
533
534///////////////////////////
535//                       //
536// Filter Implmenetation //
537//                       //
538///////////////////////////
539
540ARMFilter::ARMFilter(const ARMFilter &f) :
541  Owner(f.Owner), StartBit(f.StartBit), NumBits(f.NumBits), Mixed(f.Mixed),
542  FilteredInstructions(f.FilteredInstructions),
543  VariableInstructions(f.VariableInstructions),
544  FilterChooserMap(f.FilterChooserMap), NumFiltered(f.NumFiltered),
545  LastOpcFiltered(f.LastOpcFiltered), NumVariable(f.NumVariable) {
546}
547
548ARMFilter::ARMFilter(ARMFilterChooser &owner, unsigned startBit, unsigned numBits,
549    bool mixed) : Owner(&owner), StartBit(startBit), NumBits(numBits),
550                  Mixed(mixed) {
551  assert(StartBit + NumBits - 1 < BIT_WIDTH);
552
553  NumFiltered = 0;
554  LastOpcFiltered = 0;
555  NumVariable = 0;
556
557  for (unsigned i = 0, e = Owner->Opcodes.size(); i != e; ++i) {
558    insn_t Insn;
559
560    // Populates the insn given the uid.
561    Owner->insnWithID(Insn, Owner->Opcodes[i]);
562
563    uint64_t Field;
564    // Scans the segment for possibly well-specified encoding bits.
565    bool ok = Owner->fieldFromInsn(Field, Insn, StartBit, NumBits);
566
567    if (ok) {
568      // The encoding bits are well-known.  Lets add the uid of the
569      // instruction into the bucket keyed off the constant field value.
570      LastOpcFiltered = Owner->Opcodes[i];
571      FilteredInstructions[Field].push_back(LastOpcFiltered);
572      ++NumFiltered;
573    } else {
574      // Some of the encoding bit(s) are unspecfied.  This contributes to
575      // one additional member of "Variable" instructions.
576      VariableInstructions.push_back(Owner->Opcodes[i]);
577      ++NumVariable;
578    }
579  }
580
581  assert((FilteredInstructions.size() + VariableInstructions.size() > 0)
582         && "Filter returns no instruction categories");
583}
584
585ARMFilter::~ARMFilter() {
586  std::map<unsigned, ARMFilterChooser*>::iterator filterIterator;
587  for (filterIterator = FilterChooserMap.begin();
588       filterIterator != FilterChooserMap.end();
589       filterIterator++) {
590    delete filterIterator->second;
591  }
592}
593
594// Divides the decoding task into sub tasks and delegates them to the
595// inferior FilterChooser's.
596//
597// A special case arises when there's only one entry in the filtered
598// instructions.  In order to unambiguously decode the singleton, we need to
599// match the remaining undecoded encoding bits against the singleton.
600void ARMFilter::recurse() {
601  std::map<uint64_t, std::vector<unsigned> >::const_iterator mapIterator;
602
603  bit_value_t BitValueArray[BIT_WIDTH];
604  // Starts by inheriting our parent filter chooser's filter bit values.
605  memcpy(BitValueArray, Owner->FilterBitValues, sizeof(BitValueArray));
606
607  unsigned bitIndex;
608
609  if (VariableInstructions.size()) {
610    // Conservatively marks each segment position as BIT_UNSET.
611    for (bitIndex = 0; bitIndex < NumBits; bitIndex++)
612      BitValueArray[StartBit + bitIndex] = BIT_UNSET;
613
614    // Delegates to an inferior filter chooser for further processing on this
615    // group of instructions whose segment values are variable.
616    FilterChooserMap.insert(std::pair<unsigned, ARMFilterChooser*>(
617                              (unsigned)-1,
618                              new ARMFilterChooser(Owner->AllInstructions,
619                                                   VariableInstructions,
620                                                   BitValueArray,
621                                                   *Owner)
622                              ));
623  }
624
625  // No need to recurse for a singleton filtered instruction.
626  // See also Filter::emit().
627  if (getNumFiltered() == 1) {
628    //Owner->SingletonExists(LastOpcFiltered);
629    assert(FilterChooserMap.size() == 1);
630    return;
631  }
632
633  // Otherwise, create sub choosers.
634  for (mapIterator = FilteredInstructions.begin();
635       mapIterator != FilteredInstructions.end();
636       mapIterator++) {
637
638    // Marks all the segment positions with either BIT_TRUE or BIT_FALSE.
639    for (bitIndex = 0; bitIndex < NumBits; bitIndex++) {
640      if (mapIterator->first & (1ULL << bitIndex))
641        BitValueArray[StartBit + bitIndex] = BIT_TRUE;
642      else
643        BitValueArray[StartBit + bitIndex] = BIT_FALSE;
644    }
645
646    // Delegates to an inferior filter chooser for further processing on this
647    // category of instructions.
648    FilterChooserMap.insert(std::pair<unsigned, ARMFilterChooser*>(
649                              mapIterator->first,
650                              new ARMFilterChooser(Owner->AllInstructions,
651                                                   mapIterator->second,
652                                                   BitValueArray,
653                                                   *Owner)
654                              ));
655  }
656}
657
658// Emit code to decode instructions given a segment or segments of bits.
659void ARMFilter::emit(raw_ostream &o, unsigned &Indentation) {
660  o.indent(Indentation) << "// Check Inst{";
661
662  if (NumBits > 1)
663    o << (StartBit + NumBits - 1) << '-';
664
665  o << StartBit << "} ...\n";
666
667  o.indent(Indentation) << "switch (fieldFromInstruction(insn, "
668                        << StartBit << ", " << NumBits << ")) {\n";
669
670  std::map<unsigned, ARMFilterChooser*>::iterator filterIterator;
671
672  bool DefaultCase = false;
673  for (filterIterator = FilterChooserMap.begin();
674       filterIterator != FilterChooserMap.end();
675       filterIterator++) {
676
677    // Field value -1 implies a non-empty set of variable instructions.
678    // See also recurse().
679    if (filterIterator->first == (unsigned)-1) {
680      DefaultCase = true;
681
682      o.indent(Indentation) << "default:\n";
683      o.indent(Indentation) << "  break; // fallthrough\n";
684
685      // Closing curly brace for the switch statement.
686      // This is unconventional because we want the default processing to be
687      // performed for the fallthrough cases as well, i.e., when the "cases"
688      // did not prove a decoded instruction.
689      o.indent(Indentation) << "}\n";
690
691    } else
692      o.indent(Indentation) << "case " << filterIterator->first << ":\n";
693
694    // We arrive at a category of instructions with the same segment value.
695    // Now delegate to the sub filter chooser for further decodings.
696    // The case may fallthrough, which happens if the remaining well-known
697    // encoding bits do not match exactly.
698    if (!DefaultCase) { ++Indentation; ++Indentation; }
699
700    bool finished = filterIterator->second->emit(o, Indentation);
701    // For top level default case, there's no need for a break statement.
702    if (Owner->isTopLevel() && DefaultCase)
703      break;
704    if (!finished)
705      o.indent(Indentation) << "break;\n";
706
707    if (!DefaultCase) { --Indentation; --Indentation; }
708  }
709
710  // If there is no default case, we still need to supply a closing brace.
711  if (!DefaultCase) {
712    // Closing curly brace for the switch statement.
713    o.indent(Indentation) << "}\n";
714  }
715}
716
717// Returns the number of fanout produced by the filter.  More fanout implies
718// the filter distinguishes more categories of instructions.
719unsigned ARMFilter::usefulness() const {
720  if (VariableInstructions.size())
721    return FilteredInstructions.size();
722  else
723    return FilteredInstructions.size() + 1;
724}
725
726//////////////////////////////////
727//                              //
728// Filterchooser Implementation //
729//                              //
730//////////////////////////////////
731
732// Define the symbol here.
733TARGET_NAME_t ARMFilterChooser::TargetName;
734
735// This provides an opportunity for target specific code emission.
736void ARMFilterChooser::emitTopHook(raw_ostream &o) {
737  if (TargetName == TARGET_ARM) {
738    // Emit code that references the ARMFormat data type.
739    o << "static const ARMFormat ARMFormats[] = {\n";
740    for (unsigned i = 0, e = AllInstructions.size(); i != e; ++i) {
741      const Record &Def = *(AllInstructions[i]->TheDef);
742      const std::string &Name = Def.getName();
743      if (Def.isSubClassOf("InstARM") || Def.isSubClassOf("InstThumb"))
744        o.indent(2) <<
745          stringForARMFormat((ARMFormat)getByteField(Def, "Form"));
746      else
747        o << "  ARM_FORMAT_NA";
748
749      o << ",\t// Inst #" << i << " = " << Name << '\n';
750    }
751    o << "  ARM_FORMAT_NA\t// Unreachable.\n";
752    o << "};\n\n";
753  }
754}
755
756// Emit the top level typedef and decodeInstruction() function.
757void ARMFilterChooser::emitTop(raw_ostream &o, unsigned &Indentation) {
758  // Run the target specific emit hook.
759  emitTopHook(o);
760
761  switch (BIT_WIDTH) {
762  case 8:
763    o.indent(Indentation) << "typedef uint8_t field_t;\n";
764    break;
765  case 16:
766    o.indent(Indentation) << "typedef uint16_t field_t;\n";
767    break;
768  case 32:
769    o.indent(Indentation) << "typedef uint32_t field_t;\n";
770    break;
771  case 64:
772    o.indent(Indentation) << "typedef uint64_t field_t;\n";
773    break;
774  default:
775    assert(0 && "Unexpected instruction size!");
776  }
777
778  o << '\n';
779
780  o.indent(Indentation) << "static field_t " <<
781    "fieldFromInstruction(field_t insn, unsigned startBit, unsigned numBits)\n";
782
783  o.indent(Indentation) << "{\n";
784
785  ++Indentation; ++Indentation;
786  o.indent(Indentation) << "assert(startBit + numBits <= " << BIT_WIDTH
787                        << " && \"Instruction field out of bounds!\");\n";
788  o << '\n';
789  o.indent(Indentation) << "field_t fieldMask;\n";
790  o << '\n';
791  o.indent(Indentation) << "if (numBits == " << BIT_WIDTH << ")\n";
792
793  ++Indentation; ++Indentation;
794  o.indent(Indentation) << "fieldMask = (field_t)-1;\n";
795  --Indentation; --Indentation;
796
797  o.indent(Indentation) << "else\n";
798
799  ++Indentation; ++Indentation;
800  o.indent(Indentation) << "fieldMask = ((1 << numBits) - 1) << startBit;\n";
801  --Indentation; --Indentation;
802
803  o << '\n';
804  o.indent(Indentation) << "return (insn & fieldMask) >> startBit;\n";
805  --Indentation; --Indentation;
806
807  o.indent(Indentation) << "}\n";
808
809  o << '\n';
810
811  o.indent(Indentation) <<"static uint16_t decodeInstruction(field_t insn) {\n";
812
813  ++Indentation; ++Indentation;
814  // Emits code to decode the instructions.
815  emit(o, Indentation);
816
817  o << '\n';
818  o.indent(Indentation) << "return 0;\n";
819  --Indentation; --Indentation;
820
821  o.indent(Indentation) << "}\n";
822
823  o << '\n';
824}
825
826// This provides an opportunity for target specific code emission after
827// emitTop().
828void ARMFilterChooser::emitBot(raw_ostream &o, unsigned &Indentation) {
829  if (TargetName != TARGET_THUMB) return;
830
831  // Emit code that decodes the Thumb ISA.
832  o.indent(Indentation)
833    << "static uint16_t decodeThumbInstruction(field_t insn) {\n";
834
835  ++Indentation; ++Indentation;
836
837  // Emits code to decode the instructions.
838  emit(o, Indentation);
839
840  o << '\n';
841  o.indent(Indentation) << "return 0;\n";
842
843  --Indentation; --Indentation;
844
845  o.indent(Indentation) << "}\n";
846}
847
848// Populates the field of the insn given the start position and the number of
849// consecutive bits to scan for.
850//
851// Returns false if and on the first uninitialized bit value encountered.
852// Returns true, otherwise.
853bool ARMFilterChooser::fieldFromInsn(uint64_t &Field, insn_t &Insn,
854    unsigned StartBit, unsigned NumBits) const {
855  Field = 0;
856
857  for (unsigned i = 0; i < NumBits; ++i) {
858    if (Insn[StartBit + i] == BIT_UNSET)
859      return false;
860
861    if (Insn[StartBit + i] == BIT_TRUE)
862      Field = Field | (1ULL << i);
863  }
864
865  return true;
866}
867
868/// dumpFilterArray - dumpFilterArray prints out debugging info for the given
869/// filter array as a series of chars.
870void ARMFilterChooser::dumpFilterArray(raw_ostream &o,
871    bit_value_t (&filter)[BIT_WIDTH]) {
872  unsigned bitIndex;
873
874  for (bitIndex = BIT_WIDTH; bitIndex > 0; bitIndex--) {
875    switch (filter[bitIndex - 1]) {
876    case BIT_UNFILTERED:
877      o << ".";
878      break;
879    case BIT_UNSET:
880      o << "_";
881      break;
882    case BIT_TRUE:
883      o << "1";
884      break;
885    case BIT_FALSE:
886      o << "0";
887      break;
888    }
889  }
890}
891
892/// dumpStack - dumpStack traverses the filter chooser chain and calls
893/// dumpFilterArray on each filter chooser up to the top level one.
894void ARMFilterChooser::dumpStack(raw_ostream &o, const char *prefix) {
895  ARMFilterChooser *current = this;
896
897  while (current) {
898    o << prefix;
899    dumpFilterArray(o, current->FilterBitValues);
900    o << '\n';
901    current = current->Parent;
902  }
903}
904
905// Called from Filter::recurse() when singleton exists.  For debug purpose.
906void ARMFilterChooser::SingletonExists(unsigned Opc) {
907  insn_t Insn0;
908  insnWithID(Insn0, Opc);
909
910  errs() << "Singleton exists: " << nameWithID(Opc)
911         << " with its decoding dominating ";
912  for (unsigned i = 0; i < Opcodes.size(); ++i) {
913    if (Opcodes[i] == Opc) continue;
914    errs() << nameWithID(Opcodes[i]) << ' ';
915  }
916  errs() << '\n';
917
918  dumpStack(errs(), "\t\t");
919  for (unsigned i = 0; i < Opcodes.size(); i++) {
920    const std::string &Name = nameWithID(Opcodes[i]);
921
922    errs() << '\t' << Name << " ";
923    dumpBits(errs(),
924             getBitsField(*AllInstructions[Opcodes[i]]->TheDef, "Inst"));
925    errs() << '\n';
926  }
927}
928
929// Calculates the island(s) needed to decode the instruction.
930// This returns a list of undecoded bits of an instructions, for example,
931// Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
932// decoded bits in order to verify that the instruction matches the Opcode.
933unsigned ARMFilterChooser::getIslands(std::vector<unsigned> &StartBits,
934    std::vector<unsigned> &EndBits, std::vector<uint64_t> &FieldVals,
935    insn_t &Insn) {
936  unsigned Num, BitNo;
937  Num = BitNo = 0;
938
939  uint64_t FieldVal = 0;
940
941  // 0: Init
942  // 1: Water (the bit value does not affect decoding)
943  // 2: Island (well-known bit value needed for decoding)
944  int State = 0;
945  int Val = -1;
946
947  for (unsigned i = 0; i < BIT_WIDTH; ++i) {
948    Val = Value(Insn[i]);
949    bool Filtered = PositionFiltered(i);
950    switch (State) {
951    default:
952      assert(0 && "Unreachable code!");
953      break;
954    case 0:
955    case 1:
956      if (Filtered || Val == -1)
957        State = 1; // Still in Water
958      else {
959        State = 2; // Into the Island
960        BitNo = 0;
961        StartBits.push_back(i);
962        FieldVal = Val;
963      }
964      break;
965    case 2:
966      if (Filtered || Val == -1) {
967        State = 1; // Into the Water
968        EndBits.push_back(i - 1);
969        FieldVals.push_back(FieldVal);
970        ++Num;
971      } else {
972        State = 2; // Still in Island
973        ++BitNo;
974        FieldVal = FieldVal | Val << BitNo;
975      }
976      break;
977    }
978  }
979  // If we are still in Island after the loop, do some housekeeping.
980  if (State == 2) {
981    EndBits.push_back(BIT_WIDTH - 1);
982    FieldVals.push_back(FieldVal);
983    ++Num;
984  }
985
986  assert(StartBits.size() == Num && EndBits.size() == Num &&
987         FieldVals.size() == Num);
988  return Num;
989}
990
991// Emits code to decode the singleton.  Return true if we have matched all the
992// well-known bits.
993bool ARMFilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
994                                         unsigned Opc) {
995  std::vector<unsigned> StartBits;
996  std::vector<unsigned> EndBits;
997  std::vector<uint64_t> FieldVals;
998  insn_t Insn;
999  insnWithID(Insn, Opc);
1000
1001  // This provides a good opportunity to check for possible Ld/St Coprocessor
1002  // Opcode and escapes if the coproc # is either 10 or 11.  It is a NEON/VFP
1003  // instruction is disguise.
1004  if (TargetName == TARGET_ARM && LdStCopEncoding1(Opc)) {
1005    o.indent(Indentation);
1006    // A8.6.51 & A8.6.188
1007    // If coproc = 0b101?, i.e, slice(insn, 11, 8) = 10 or 11, escape.
1008    o << "if (fieldFromInstruction(insn, 9, 3) == 5) break; // fallthrough\n";
1009  }
1010
1011  // Look for islands of undecoded bits of the singleton.
1012  getIslands(StartBits, EndBits, FieldVals, Insn);
1013
1014  unsigned Size = StartBits.size();
1015  unsigned I, NumBits;
1016
1017  // If we have matched all the well-known bits, just issue a return.
1018  if (Size == 0) {
1019    o.indent(Indentation) << "return " << Opc << "; // " << nameWithID(Opc)
1020                          << '\n';
1021    return true;
1022  }
1023
1024  // Otherwise, there are more decodings to be done!
1025
1026  // Emit code to match the island(s) for the singleton.
1027  o.indent(Indentation) << "// Check ";
1028
1029  for (I = Size; I != 0; --I) {
1030    o << "Inst{" << EndBits[I-1] << '-' << StartBits[I-1] << "} ";
1031    if (I > 1)
1032      o << "&& ";
1033    else
1034      o << "for singleton decoding...\n";
1035  }
1036
1037  o.indent(Indentation) << "if (";
1038
1039  for (I = Size; I != 0; --I) {
1040    NumBits = EndBits[I-1] - StartBits[I-1] + 1;
1041    o << "fieldFromInstruction(insn, " << StartBits[I-1] << ", " << NumBits
1042      << ") == " << FieldVals[I-1];
1043    if (I > 1)
1044      o << " && ";
1045    else
1046      o << ")\n";
1047  }
1048
1049  o.indent(Indentation) << "  return " << Opc << "; // " << nameWithID(Opc)
1050                        << '\n';
1051
1052  return false;
1053}
1054
1055// Emits code to decode the singleton, and then to decode the rest.
1056void ARMFilterChooser::emitSingletonDecoder(raw_ostream &o,
1057                                            unsigned &Indentation,
1058                                            ARMFilter &Best) {
1059
1060  unsigned Opc = Best.getSingletonOpc();
1061
1062  emitSingletonDecoder(o, Indentation, Opc);
1063
1064  // Emit code for the rest.
1065  o.indent(Indentation) << "else\n";
1066
1067  Indentation += 2;
1068  Best.getVariableFC().emit(o, Indentation);
1069  Indentation -= 2;
1070}
1071
1072// Assign a single filter and run with it.  Top level API client can initialize
1073// with a single filter to start the filtering process.
1074void ARMFilterChooser::runSingleFilter(ARMFilterChooser &owner,
1075                                       unsigned startBit,
1076                                       unsigned numBit, bool mixed) {
1077  Filters.clear();
1078  ARMFilter F(*this, startBit, numBit, true);
1079  Filters.push_back(F);
1080  BestIndex = 0; // Sole Filter instance to choose from.
1081  bestFilter().recurse();
1082}
1083
1084// reportRegion is a helper function for filterProcessor to mark a region as
1085// eligible for use as a filter region.
1086void ARMFilterChooser::reportRegion(bitAttr_t RA, unsigned StartBit,
1087                                    unsigned BitIndex, bool AllowMixed) {
1088  if (RA == ATTR_MIXED && AllowMixed)
1089    Filters.push_back(ARMFilter(*this, StartBit, BitIndex - StartBit, true));
1090  else if (RA == ATTR_ALL_SET && !AllowMixed)
1091    Filters.push_back(ARMFilter(*this, StartBit, BitIndex - StartBit, false));
1092}
1093
1094// FilterProcessor scans the well-known encoding bits of the instructions and
1095// builds up a list of candidate filters.  It chooses the best filter and
1096// recursively descends down the decoding tree.
1097bool ARMFilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
1098  Filters.clear();
1099  BestIndex = -1;
1100  unsigned numInstructions = Opcodes.size();
1101
1102  assert(numInstructions && "Filter created with no instructions");
1103
1104  // No further filtering is necessary.
1105  if (numInstructions == 1)
1106    return true;
1107
1108  // Heuristics.  See also doFilter()'s "Heuristics" comment when num of
1109  // instructions is 3.
1110  if (AllowMixed && !Greedy) {
1111    assert(numInstructions == 3);
1112
1113    for (unsigned i = 0; i < Opcodes.size(); ++i) {
1114      std::vector<unsigned> StartBits;
1115      std::vector<unsigned> EndBits;
1116      std::vector<uint64_t> FieldVals;
1117      insn_t Insn;
1118
1119      insnWithID(Insn, Opcodes[i]);
1120
1121      // Look for islands of undecoded bits of any instruction.
1122      if (getIslands(StartBits, EndBits, FieldVals, Insn) > 0) {
1123        // Found an instruction with island(s).  Now just assign a filter.
1124        runSingleFilter(*this, StartBits[0], EndBits[0] - StartBits[0] + 1,
1125                        true);
1126        return true;
1127      }
1128    }
1129  }
1130
1131  unsigned BitIndex, InsnIndex;
1132
1133  // We maintain BIT_WIDTH copies of the bitAttrs automaton.
1134  // The automaton consumes the corresponding bit from each
1135  // instruction.
1136  //
1137  //   Input symbols: 0, 1, and _ (unset).
1138  //   States:        NONE, FILTERED, ALL_SET, ALL_UNSET, and MIXED.
1139  //   Initial state: NONE.
1140  //
1141  // (NONE) ------- [01] -> (ALL_SET)
1142  // (NONE) ------- _ ----> (ALL_UNSET)
1143  // (ALL_SET) ---- [01] -> (ALL_SET)
1144  // (ALL_SET) ---- _ ----> (MIXED)
1145  // (ALL_UNSET) -- [01] -> (MIXED)
1146  // (ALL_UNSET) -- _ ----> (ALL_UNSET)
1147  // (MIXED) ------ . ----> (MIXED)
1148  // (FILTERED)---- . ----> (FILTERED)
1149
1150  bitAttr_t bitAttrs[BIT_WIDTH];
1151
1152  // FILTERED bit positions provide no entropy and are not worthy of pursuing.
1153  // Filter::recurse() set either BIT_TRUE or BIT_FALSE for each position.
1154  for (BitIndex = 0; BitIndex < BIT_WIDTH; ++BitIndex)
1155    if (FilterBitValues[BitIndex] == BIT_TRUE ||
1156        FilterBitValues[BitIndex] == BIT_FALSE)
1157      bitAttrs[BitIndex] = ATTR_FILTERED;
1158    else
1159      bitAttrs[BitIndex] = ATTR_NONE;
1160
1161  for (InsnIndex = 0; InsnIndex < numInstructions; ++InsnIndex) {
1162    insn_t insn;
1163
1164    insnWithID(insn, Opcodes[InsnIndex]);
1165
1166    for (BitIndex = 0; BitIndex < BIT_WIDTH; ++BitIndex) {
1167      switch (bitAttrs[BitIndex]) {
1168      case ATTR_NONE:
1169        if (insn[BitIndex] == BIT_UNSET)
1170          bitAttrs[BitIndex] = ATTR_ALL_UNSET;
1171        else
1172          bitAttrs[BitIndex] = ATTR_ALL_SET;
1173        break;
1174      case ATTR_ALL_SET:
1175        if (insn[BitIndex] == BIT_UNSET)
1176          bitAttrs[BitIndex] = ATTR_MIXED;
1177        break;
1178      case ATTR_ALL_UNSET:
1179        if (insn[BitIndex] != BIT_UNSET)
1180          bitAttrs[BitIndex] = ATTR_MIXED;
1181        break;
1182      case ATTR_MIXED:
1183      case ATTR_FILTERED:
1184        break;
1185      }
1186    }
1187  }
1188
1189  // The regionAttr automaton consumes the bitAttrs automatons' state,
1190  // lowest-to-highest.
1191  //
1192  //   Input symbols: F(iltered), (all_)S(et), (all_)U(nset), M(ixed)
1193  //   States:        NONE, ALL_SET, MIXED
1194  //   Initial state: NONE
1195  //
1196  // (NONE) ----- F --> (NONE)
1197  // (NONE) ----- S --> (ALL_SET)     ; and set region start
1198  // (NONE) ----- U --> (NONE)
1199  // (NONE) ----- M --> (MIXED)       ; and set region start
1200  // (ALL_SET) -- F --> (NONE)        ; and report an ALL_SET region
1201  // (ALL_SET) -- S --> (ALL_SET)
1202  // (ALL_SET) -- U --> (NONE)        ; and report an ALL_SET region
1203  // (ALL_SET) -- M --> (MIXED)       ; and report an ALL_SET region
1204  // (MIXED) ---- F --> (NONE)        ; and report a MIXED region
1205  // (MIXED) ---- S --> (ALL_SET)     ; and report a MIXED region
1206  // (MIXED) ---- U --> (NONE)        ; and report a MIXED region
1207  // (MIXED) ---- M --> (MIXED)
1208
1209  bitAttr_t RA = ATTR_NONE;
1210  unsigned StartBit = 0;
1211
1212  for (BitIndex = 0; BitIndex < BIT_WIDTH; BitIndex++) {
1213    bitAttr_t bitAttr = bitAttrs[BitIndex];
1214
1215    assert(bitAttr != ATTR_NONE && "Bit without attributes");
1216
1217    switch (RA) {
1218    case ATTR_NONE:
1219      switch (bitAttr) {
1220      case ATTR_FILTERED:
1221        break;
1222      case ATTR_ALL_SET:
1223        StartBit = BitIndex;
1224        RA = ATTR_ALL_SET;
1225        break;
1226      case ATTR_ALL_UNSET:
1227        break;
1228      case ATTR_MIXED:
1229        StartBit = BitIndex;
1230        RA = ATTR_MIXED;
1231        break;
1232      default:
1233        assert(0 && "Unexpected bitAttr!");
1234      }
1235      break;
1236    case ATTR_ALL_SET:
1237      switch (bitAttr) {
1238      case ATTR_FILTERED:
1239        reportRegion(RA, StartBit, BitIndex, AllowMixed);
1240        RA = ATTR_NONE;
1241        break;
1242      case ATTR_ALL_SET:
1243        break;
1244      case ATTR_ALL_UNSET:
1245        reportRegion(RA, StartBit, BitIndex, AllowMixed);
1246        RA = ATTR_NONE;
1247        break;
1248      case ATTR_MIXED:
1249        reportRegion(RA, StartBit, BitIndex, AllowMixed);
1250        StartBit = BitIndex;
1251        RA = ATTR_MIXED;
1252        break;
1253      default:
1254        assert(0 && "Unexpected bitAttr!");
1255      }
1256      break;
1257    case ATTR_MIXED:
1258      switch (bitAttr) {
1259      case ATTR_FILTERED:
1260        reportRegion(RA, StartBit, BitIndex, AllowMixed);
1261        StartBit = BitIndex;
1262        RA = ATTR_NONE;
1263        break;
1264      case ATTR_ALL_SET:
1265        reportRegion(RA, StartBit, BitIndex, AllowMixed);
1266        StartBit = BitIndex;
1267        RA = ATTR_ALL_SET;
1268        break;
1269      case ATTR_ALL_UNSET:
1270        reportRegion(RA, StartBit, BitIndex, AllowMixed);
1271        RA = ATTR_NONE;
1272        break;
1273      case ATTR_MIXED:
1274        break;
1275      default:
1276        assert(0 && "Unexpected bitAttr!");
1277      }
1278      break;
1279    case ATTR_ALL_UNSET:
1280      assert(0 && "regionAttr state machine has no ATTR_UNSET state");
1281    case ATTR_FILTERED:
1282      assert(0 && "regionAttr state machine has no ATTR_FILTERED state");
1283    }
1284  }
1285
1286  // At the end, if we're still in ALL_SET or MIXED states, report a region
1287  switch (RA) {
1288  case ATTR_NONE:
1289    break;
1290  case ATTR_FILTERED:
1291    break;
1292  case ATTR_ALL_SET:
1293    reportRegion(RA, StartBit, BitIndex, AllowMixed);
1294    break;
1295  case ATTR_ALL_UNSET:
1296    break;
1297  case ATTR_MIXED:
1298    reportRegion(RA, StartBit, BitIndex, AllowMixed);
1299    break;
1300  }
1301
1302  // We have finished with the filter processings.  Now it's time to choose
1303  // the best performing filter.
1304  BestIndex = 0;
1305  bool AllUseless = true;
1306  unsigned BestScore = 0;
1307
1308  for (unsigned i = 0, e = Filters.size(); i != e; ++i) {
1309    unsigned Usefulness = Filters[i].usefulness();
1310
1311    if (Usefulness)
1312      AllUseless = false;
1313
1314    if (Usefulness > BestScore) {
1315      BestIndex = i;
1316      BestScore = Usefulness;
1317    }
1318  }
1319
1320  if (!AllUseless)
1321    bestFilter().recurse();
1322
1323  return !AllUseless;
1324} // end of FilterChooser::filterProcessor(bool)
1325
1326// Decides on the best configuration of filter(s) to use in order to decode
1327// the instructions.  A conflict of instructions may occur, in which case we
1328// dump the conflict set to the standard error.
1329void ARMFilterChooser::doFilter() {
1330  unsigned Num = Opcodes.size();
1331  assert(Num && "FilterChooser created with no instructions");
1332
1333  // Heuristics: Use Inst{31-28} as the top level filter for ARM ISA.
1334  if (TargetName == TARGET_ARM && Parent == NULL) {
1335    runSingleFilter(*this, 28, 4, false);
1336    return;
1337  }
1338
1339  // Try regions of consecutive known bit values first.
1340  if (filterProcessor(false))
1341    return;
1342
1343  // Then regions of mixed bits (both known and unitialized bit values allowed).
1344  if (filterProcessor(true))
1345    return;
1346
1347  // Heuristics to cope with conflict set {t2CMPrs, t2SUBSrr, t2SUBSrs} where
1348  // no single instruction for the maximum ATTR_MIXED region Inst{14-4} has a
1349  // well-known encoding pattern.  In such case, we backtrack and scan for the
1350  // the very first consecutive ATTR_ALL_SET region and assign a filter to it.
1351  if (Num == 3 && filterProcessor(true, false))
1352    return;
1353
1354  // If we come to here, the instruction decoding has failed.
1355  // Set the BestIndex to -1 to indicate so.
1356  BestIndex = -1;
1357}
1358
1359// Emits code to decode our share of instructions.  Returns true if the
1360// emitted code causes a return, which occurs if we know how to decode
1361// the instruction at this level or the instruction is not decodeable.
1362bool ARMFilterChooser::emit(raw_ostream &o, unsigned &Indentation) {
1363  if (Opcodes.size() == 1)
1364    // There is only one instruction in the set, which is great!
1365    // Call emitSingletonDecoder() to see whether there are any remaining
1366    // encodings bits.
1367    return emitSingletonDecoder(o, Indentation, Opcodes[0]);
1368
1369  // Choose the best filter to do the decodings!
1370  if (BestIndex != -1) {
1371    ARMFilter &Best = bestFilter();
1372    if (Best.getNumFiltered() == 1)
1373      emitSingletonDecoder(o, Indentation, Best);
1374    else
1375      bestFilter().emit(o, Indentation);
1376    return false;
1377  }
1378
1379  // If we reach here, there is a conflict in decoding.  Let's resolve the known
1380  // conflicts!
1381  if ((TargetName == TARGET_ARM || TargetName == TARGET_THUMB) &&
1382      Opcodes.size() == 2) {
1383    // Resolve the known conflict sets:
1384    //
1385    // 1. source registers are identical => VMOVDneon; otherwise => VORRd
1386    // 2. source registers are identical => VMOVQ; otherwise => VORRq
1387    // 3. LDR, LDRcp => return LDR for now.
1388    // FIXME: How can we distinguish between LDR and LDRcp?  Do we need to?
1389    // 4. tLDMIA, tLDMIA_UPD => Rn = Inst{10-8}, reglist = Inst{7-0},
1390    //    wback = registers<Rn> = 0
1391    // NOTE: (tLDM, tLDM_UPD) resolution must come before Advanced SIMD
1392    //       addressing mode resolution!!!
1393    // 5. VLD[234]LN*/VST[234]LN* vs. VLD[234]LN*_UPD/VST[234]LN*_UPD conflicts
1394    //    are resolved returning the non-UPD versions of the instructions if the
1395    //    Rm field, i.e., Inst{3-0} is 0b1111.  This is specified in A7.7.1
1396    //    Advanced SIMD addressing mode.
1397    const std::string &name1 = nameWithID(Opcodes[0]);
1398    const std::string &name2 = nameWithID(Opcodes[1]);
1399    if ((name1 == "VMOVDneon" && name2 == "VORRd") ||
1400        (name1 == "VMOVQ" && name2 == "VORRq")) {
1401      // Inserting the opening curly brace for this case block.
1402      --Indentation; --Indentation;
1403      o.indent(Indentation) << "{\n";
1404      ++Indentation; ++Indentation;
1405
1406      o.indent(Indentation)
1407        << "field_t N = fieldFromInstruction(insn, 7, 1), "
1408        << "M = fieldFromInstruction(insn, 5, 1);\n";
1409      o.indent(Indentation)
1410        << "field_t Vn = fieldFromInstruction(insn, 16, 4), "
1411        << "Vm = fieldFromInstruction(insn, 0, 4);\n";
1412      o.indent(Indentation)
1413        << "return (N == M && Vn == Vm) ? "
1414        << Opcodes[0] << " /* " << name1 << " */ : "
1415        << Opcodes[1] << " /* " << name2 << " */ ;\n";
1416
1417      // Inserting the closing curly brace for this case block.
1418      --Indentation; --Indentation;
1419      o.indent(Indentation) << "}\n";
1420      ++Indentation; ++Indentation;
1421
1422      return true;
1423    }
1424    if (name1 == "LDR" && name2 == "LDRcp") {
1425      o.indent(Indentation)
1426        << "return " << Opcodes[0]
1427        << "; // Returning LDR for {LDR, LDRcp}\n";
1428      return true;
1429    }
1430    if (name1 == "tLDMIA" && name2 == "tLDMIA_UPD") {
1431      // Inserting the opening curly brace for this case block.
1432      --Indentation; --Indentation;
1433      o.indent(Indentation) << "{\n";
1434      ++Indentation; ++Indentation;
1435
1436      o.indent(Indentation)
1437        << "unsigned Rn = fieldFromInstruction(insn, 8, 3), "
1438        << "list = fieldFromInstruction(insn, 0, 8);\n";
1439      o.indent(Indentation)
1440        << "return ((list >> Rn) & 1) == 0 ? "
1441        << Opcodes[1] << " /* " << name2 << " */ : "
1442        << Opcodes[0] << " /* " << name1 << " */ ;\n";
1443
1444      // Inserting the closing curly brace for this case block.
1445      --Indentation; --Indentation;
1446      o.indent(Indentation) << "}\n";
1447      ++Indentation; ++Indentation;
1448
1449      return true;
1450    }
1451    if (sameStringExceptSuffix(name1, name2, "_UPD")) {
1452      o.indent(Indentation)
1453        << "return fieldFromInstruction(insn, 0, 4) == 15 ? " << Opcodes[0]
1454        << " /* " << name1 << " */ : " << Opcodes[1] << "/* " << name2
1455        << " */ ; // Advanced SIMD addressing mode\n";
1456      return true;
1457    }
1458
1459    // Otherwise, it does not belong to the known conflict sets.
1460  }
1461
1462  // We don't know how to decode these instructions!  Return 0 and dump the
1463  // conflict set!
1464  o.indent(Indentation) << "return 0;" << " // Conflict set: ";
1465  for (int i = 0, N = Opcodes.size(); i < N; ++i) {
1466    o << nameWithID(Opcodes[i]);
1467    if (i < (N - 1))
1468      o << ", ";
1469    else
1470      o << '\n';
1471  }
1472
1473  // Print out useful conflict information for postmortem analysis.
1474  errs() << "Decoding Conflict:\n";
1475
1476  dumpStack(errs(), "\t\t");
1477
1478  for (unsigned i = 0; i < Opcodes.size(); i++) {
1479    const std::string &Name = nameWithID(Opcodes[i]);
1480
1481    errs() << '\t' << Name << " ";
1482    dumpBits(errs(),
1483             getBitsField(*AllInstructions[Opcodes[i]]->TheDef, "Inst"));
1484    errs() << '\n';
1485  }
1486
1487  return true;
1488}
1489
1490
1491////////////////////////////////////////////
1492//                                        //
1493//  ARMDEBackend                          //
1494//  (Helper class for ARMDecoderEmitter)  //
1495//                                        //
1496////////////////////////////////////////////
1497
1498class ARMDecoderEmitter::ARMDEBackend {
1499public:
1500  ARMDEBackend(ARMDecoderEmitter &frontend, RecordKeeper &Records) :
1501    NumberedInstructions(),
1502    Opcodes(),
1503    Frontend(frontend),
1504    Target(Records),
1505    FC(NULL)
1506  {
1507    if (Target.getName() == "ARM")
1508      TargetName = TARGET_ARM;
1509    else {
1510      errs() << "Target name " << Target.getName() << " not recognized\n";
1511      assert(0 && "Unknown target");
1512    }
1513
1514    // Populate the instructions for our TargetName.
1515    populateInstructions();
1516  }
1517
1518  ~ARMDEBackend() {
1519    if (FC) {
1520      delete FC;
1521      FC = NULL;
1522    }
1523  }
1524
1525  void getInstructionsByEnumValue(std::vector<const CodeGenInstruction*>
1526                                                &NumberedInstructions) {
1527    // We must emit the PHI opcode first...
1528    std::string Namespace = Target.getInstNamespace();
1529    assert(!Namespace.empty() && "No instructions defined.");
1530
1531    NumberedInstructions = Target.getInstructionsByEnumValue();
1532  }
1533
1534  bool populateInstruction(const CodeGenInstruction &CGI, TARGET_NAME_t TN);
1535
1536  void populateInstructions();
1537
1538  // Emits disassembler code for instruction decoding.  This delegates to the
1539  // FilterChooser instance to do the heavy lifting.
1540  void emit(raw_ostream &o);
1541
1542protected:
1543  std::vector<const CodeGenInstruction*> NumberedInstructions;
1544  std::vector<unsigned> Opcodes;
1545  // Special case for the ARM chip, which supports ARM and Thumb ISAs.
1546  // Opcodes2 will be populated with the Thumb opcodes.
1547  std::vector<unsigned> Opcodes2;
1548  ARMDecoderEmitter &Frontend;
1549  CodeGenTarget Target;
1550  ARMFilterChooser *FC;
1551
1552  TARGET_NAME_t TargetName;
1553};
1554
1555bool ARMDecoderEmitter::
1556ARMDEBackend::populateInstruction(const CodeGenInstruction &CGI,
1557                                  TARGET_NAME_t TN) {
1558  const Record &Def = *CGI.TheDef;
1559  const StringRef Name = Def.getName();
1560  uint8_t Form = getByteField(Def, "Form");
1561
1562  BitsInit &Bits = getBitsField(Def, "Inst");
1563
1564  // If all the bit positions are not specified; do not decode this instruction.
1565  // We are bound to fail!  For proper disassembly, the well-known encoding bits
1566  // of the instruction must be fully specified.
1567  //
1568  // This also removes pseudo instructions from considerations of disassembly,
1569  // which is a better design and less fragile than the name matchings.
1570  if (Bits.allInComplete()) return false;
1571
1572  // Ignore "asm parser only" instructions.
1573  if (Def.getValueAsBit("isAsmParserOnly"))
1574    return false;
1575
1576  if (TN == TARGET_ARM) {
1577    if (Form == ARM_FORMAT_PSEUDO)
1578      return false;
1579    if (thumbInstruction(Form))
1580      return false;
1581
1582    // Tail calls are other patterns that generate existing instructions.
1583    if (Name == "TCRETURNdi" || Name == "TCRETURNdiND" ||
1584        Name == "TCRETURNri" || Name == "TCRETURNriND" ||
1585        Name == "TAILJMPd"  || Name == "TAILJMPdt" ||
1586        Name == "TAILJMPdND" || Name == "TAILJMPdNDt" ||
1587        Name == "TAILJMPr"  || Name == "TAILJMPrND" ||
1588        Name == "MOVr_TC")
1589      return false;
1590
1591    // Delegate ADR disassembly to the more generic ADDri/SUBri instructions.
1592    if (Name == "ADR")
1593      return false;
1594
1595    //
1596    // The following special cases are for conflict resolutions.
1597    //
1598
1599    // A8-598: VEXT
1600    // Vector Extract extracts elements from the bottom end of the second
1601    // operand vector and the top end of the first, concatenates them and
1602    // places the result in the destination vector.  The elements of the
1603    // vectors are treated as being 8-bit bitfields.  There is no distinction
1604    // between data types.  The size of the operation can be specified in
1605    // assembler as vext.size.  If the value is 16, 32, or 64, the syntax is
1606    // a pseudo-instruction for a VEXT instruction specifying the equivalent
1607    // number of bytes.
1608    //
1609    // Variants VEXTd16, VEXTd32, VEXTd8, and VEXTdf are reduced to VEXTd8;
1610    // variants VEXTq16, VEXTq32, VEXTq8, and VEXTqf are reduced to VEXTq8.
1611    if (Name == "VEXTd16" || Name == "VEXTd32" || Name == "VEXTdf" ||
1612        Name == "VEXTq16" || Name == "VEXTq32" || Name == "VEXTqf")
1613      return false;
1614  } else if (TN == TARGET_THUMB) {
1615    if (!thumbInstruction(Form))
1616      return false;
1617
1618    // A8.6.25 BX.  Use the generic tBX_Rm, ignore tBX_RET and tBX_RET_vararg.
1619    if (Name == "tBX_RET" || Name == "tBX_RET_vararg")
1620      return false;
1621
1622    // Ignore tADR, prefer tADDrPCi.
1623    if (Name == "tADR")
1624      return false;
1625
1626    // Delegate t2ADR disassembly to the more generic t2ADDri12/t2SUBri12
1627    // instructions.
1628    if (Name == "t2ADR")
1629      return false;
1630
1631    // Ignore tADDrSP, tADDspr, and tPICADD, prefer the generic tADDhirr.
1632    // Ignore t2SUBrSPs, prefer the t2SUB[S]r[r|s].
1633    // Ignore t2ADDrSPs, prefer the t2ADD[S]r[r|s].
1634    if (Name == "tADDrSP" || Name == "tADDspr" || Name == "tPICADD" ||
1635        Name == "t2SUBrSPs" || Name == "t2ADDrSPs")
1636      return false;
1637
1638    // FIXME: Use ldr.n to work around a Darwin assembler bug.
1639    // Introduce a workaround with tLDRpciDIS opcode.
1640    if (Name == "tLDRpci")
1641      return false;
1642
1643    // Ignore t2LDRDpci, prefer the generic t2LDRDi8, t2LDRD_PRE, t2LDRD_POST.
1644    if (Name == "t2LDRDpci")
1645      return false;
1646
1647    // Resolve conflicts:
1648    //
1649    //   t2LDMIA_RET conflict with t2LDM (ditto)
1650    //   tMOVCCi conflicts with tMOVi8
1651    //   tMOVCCr conflicts with tMOVgpr2gpr
1652    //   tLDRcp conflicts with tLDRspi
1653    //   t2MOVCCi16 conflicts with tMOVi16
1654    if (Name == "t2LDMIA_RET" ||
1655        Name == "tMOVCCi" || Name == "tMOVCCr" ||
1656        Name == "tLDRcp" ||
1657        Name == "t2MOVCCi16")
1658      return false;
1659  }
1660
1661  DEBUG({
1662      // Dumps the instruction encoding format.
1663      switch (TargetName) {
1664      case TARGET_ARM:
1665      case TARGET_THUMB:
1666        errs() << Name << " " << stringForARMFormat((ARMFormat)Form);
1667        break;
1668      }
1669
1670      errs() << " ";
1671
1672      // Dumps the instruction encoding bits.
1673      dumpBits(errs(), Bits);
1674
1675      errs() << '\n';
1676
1677      // Dumps the list of operand info.
1678      for (unsigned i = 0, e = CGI.Operands.size(); i != e; ++i) {
1679        const CGIOperandList::OperandInfo &Info = CGI.Operands[i];
1680        const std::string &OperandName = Info.Name;
1681        const Record &OperandDef = *Info.Rec;
1682
1683        errs() << "\t" << OperandName << " (" << OperandDef.getName() << ")\n";
1684      }
1685    });
1686
1687  return true;
1688}
1689
1690void ARMDecoderEmitter::ARMDEBackend::populateInstructions() {
1691  getInstructionsByEnumValue(NumberedInstructions);
1692
1693  unsigned numUIDs = NumberedInstructions.size();
1694  if (TargetName == TARGET_ARM) {
1695    for (unsigned uid = 0; uid < numUIDs; uid++) {
1696      // filter out intrinsics
1697      if (!NumberedInstructions[uid]->TheDef->isSubClassOf("InstARM"))
1698        continue;
1699
1700      if (populateInstruction(*NumberedInstructions[uid], TargetName))
1701        Opcodes.push_back(uid);
1702    }
1703
1704    // Special handling for the ARM chip, which supports two modes of execution.
1705    // This branch handles the Thumb opcodes.
1706    for (unsigned uid = 0; uid < numUIDs; uid++) {
1707      // filter out intrinsics
1708      if (!NumberedInstructions[uid]->TheDef->isSubClassOf("InstARM")
1709          && !NumberedInstructions[uid]->TheDef->isSubClassOf("InstThumb"))
1710        continue;
1711
1712      if (populateInstruction(*NumberedInstructions[uid], TARGET_THUMB))
1713        Opcodes2.push_back(uid);
1714    }
1715
1716    return;
1717  }
1718
1719  // For other targets.
1720  for (unsigned uid = 0; uid < numUIDs; uid++) {
1721    Record *R = NumberedInstructions[uid]->TheDef;
1722    if (R->getValueAsString("Namespace") == "TargetOpcode")
1723      continue;
1724
1725    if (populateInstruction(*NumberedInstructions[uid], TargetName))
1726      Opcodes.push_back(uid);
1727  }
1728}
1729
1730// Emits disassembler code for instruction decoding.  This delegates to the
1731// FilterChooser instance to do the heavy lifting.
1732void ARMDecoderEmitter::ARMDEBackend::emit(raw_ostream &o) {
1733  switch (TargetName) {
1734  case TARGET_ARM:
1735    Frontend.EmitSourceFileHeader("ARM/Thumb Decoders", o);
1736    break;
1737  default:
1738    assert(0 && "Unreachable code!");
1739  }
1740
1741  o << "#include \"llvm/Support/DataTypes.h\"\n";
1742  o << "#include <assert.h>\n";
1743  o << '\n';
1744  o << "namespace llvm {\n\n";
1745
1746  ARMFilterChooser::setTargetName(TargetName);
1747
1748  switch (TargetName) {
1749  case TARGET_ARM: {
1750    // Emit common utility and ARM ISA decoder.
1751    FC = new ARMFilterChooser(NumberedInstructions, Opcodes);
1752    // Reset indentation level.
1753    unsigned Indentation = 0;
1754    FC->emitTop(o, Indentation);
1755    delete FC;
1756
1757    // Emit Thumb ISA decoder as well.
1758    ARMFilterChooser::setTargetName(TARGET_THUMB);
1759    FC = new ARMFilterChooser(NumberedInstructions, Opcodes2);
1760    // Reset indentation level.
1761    Indentation = 0;
1762    FC->emitBot(o, Indentation);
1763    break;
1764  }
1765  default:
1766    assert(0 && "Unreachable code!");
1767  }
1768
1769  o << "\n} // End llvm namespace \n";
1770}
1771
1772/////////////////////////
1773//  Backend interface  //
1774/////////////////////////
1775
1776void ARMDecoderEmitter::initBackend()
1777{
1778  Backend = new ARMDEBackend(*this, Records);
1779}
1780
1781void ARMDecoderEmitter::run(raw_ostream &o)
1782{
1783  Backend->emit(o);
1784}
1785
1786void ARMDecoderEmitter::shutdownBackend()
1787{
1788  delete Backend;
1789  Backend = NULL;
1790}
1791