1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "courgette/assembly_program.h"
6
7#include <memory.h>
8#include <algorithm>
9#include <map>
10#include <set>
11#include <sstream>
12#include <vector>
13
14#include "base/logging.h"
15#include "base/memory/scoped_ptr.h"
16
17#include "courgette/courgette.h"
18#include "courgette/encoded_program.h"
19
20namespace courgette {
21
22// Opcodes of simple assembly language
23enum OP {
24  ORIGIN,         // ORIGIN <rva> - set current address for assembly.
25  MAKEPERELOCS,   // Generates a base relocation table.
26  MAKEELFRELOCS,  // Generates a base relocation table.
27  DEFBYTE,        // DEFBYTE <value> - emit a byte literal.
28  REL32,          // REL32 <label> - emit a rel32 encoded reference to 'label'.
29  ABS32,          // REL32 <label> - emit am abs32 encoded reference to 'label'.
30  REL32ARM,       // REL32ARM <c_op> <label> - arm-specific rel32 reference
31  MAKEELFARMRELOCS, // Generates a base relocation table.
32  DEFBYTES,       // Emits any number of byte literals
33  LAST_OP
34};
35
36// Base class for instructions.  Because we have so many instructions we want to
37// keep them as small as possible.  For this reason we avoid virtual functions.
38//
39class Instruction {
40 public:
41  OP op() const { return static_cast<OP>(op_); }
42
43 protected:
44  explicit Instruction(OP op) : op_(op), info_(0) {}
45  Instruction(OP op, unsigned int info) : op_(op), info_(info) {}
46
47  uint32 op_   : 4;    // A few bits to store the OP code.
48  uint32 info_ : 28;   // Remaining bits in first word available to subclass.
49
50 private:
51  DISALLOW_COPY_AND_ASSIGN(Instruction);
52};
53
54namespace {
55
56// Sets the current address for the emitting instructions.
57class OriginInstruction : public Instruction {
58 public:
59  explicit OriginInstruction(RVA rva) : Instruction(ORIGIN, 0), rva_(rva) {}
60  RVA origin_rva() const { return rva_; }
61 private:
62  RVA  rva_;
63};
64
65// Emits an entire PE base relocation table.
66class PeRelocsInstruction : public Instruction {
67 public:
68  PeRelocsInstruction() : Instruction(MAKEPERELOCS) {}
69};
70
71// Emits an ELF relocation table.
72class ElfRelocsInstruction : public Instruction {
73 public:
74  ElfRelocsInstruction() : Instruction(MAKEELFRELOCS) {}
75};
76
77// Emits an ELF ARM relocation table.
78class ElfARMRelocsInstruction : public Instruction {
79 public:
80  ElfARMRelocsInstruction() : Instruction(MAKEELFARMRELOCS) {}
81};
82
83// Emits a single byte.
84class ByteInstruction : public Instruction {
85 public:
86  explicit ByteInstruction(uint8 value) : Instruction(DEFBYTE, value) {}
87  uint8 byte_value() const { return info_; }
88};
89
90// Emits a single byte.
91class BytesInstruction : public Instruction {
92 public:
93  BytesInstruction(const uint8* values, uint32 len)
94      : Instruction(DEFBYTES, 0),
95        values_(values),
96        len_(len) {}
97  const uint8* byte_values() const { return values_; }
98  uint32 len() const { return len_; }
99
100 private:
101  const uint8* values_;
102  uint32 len_;
103};
104
105// A ABS32 to REL32 instruction emits a reference to a label's address.
106class InstructionWithLabel : public Instruction {
107 public:
108  InstructionWithLabel(OP op, Label* label)
109    : Instruction(op, 0), label_(label) {
110    if (label == NULL) NOTREACHED();
111  }
112  Label* label() const { return label_; }
113 protected:
114  Label* label_;
115};
116
117// An ARM REL32 instruction emits a reference to a label's address and
118// a specially-compressed ARM op.
119class InstructionWithLabelARM : public InstructionWithLabel {
120 public:
121  InstructionWithLabelARM(OP op, uint16 compressed_op, Label* label,
122                          const uint8* arm_op, uint16 op_size)
123    : InstructionWithLabel(op, label), compressed_op_(compressed_op),
124      arm_op_(arm_op), op_size_(op_size) {
125    if (label == NULL) NOTREACHED();
126  }
127  uint16 compressed_op() const { return compressed_op_; }
128  const uint8* arm_op() const { return arm_op_; }
129  uint16 op_size() const { return op_size_; }
130 private:
131  uint16 compressed_op_;
132  const uint8* arm_op_;
133  uint16 op_size_;
134};
135
136}  // namespace
137
138AssemblyProgram::AssemblyProgram(ExecutableType kind)
139  : kind_(kind), image_base_(0) {
140}
141
142static void DeleteContainedLabels(const RVAToLabel& labels) {
143  for (RVAToLabel::const_iterator p = labels.begin();  p != labels.end();  ++p)
144    delete p->second;
145}
146
147AssemblyProgram::~AssemblyProgram() {
148  for (size_t i = 0;  i < instructions_.size();  ++i) {
149    Instruction* instruction = instructions_[i];
150    if (instruction->op() != DEFBYTE)  // Will be in byte_instruction_cache_.
151      delete instruction;
152  }
153  if (byte_instruction_cache_.get()) {
154    for (size_t i = 0;  i < 256;  ++i)
155      delete byte_instruction_cache_[i];
156  }
157  DeleteContainedLabels(rel32_labels_);
158  DeleteContainedLabels(abs32_labels_);
159}
160
161CheckBool AssemblyProgram::EmitPeRelocsInstruction() {
162  return Emit(new(std::nothrow) PeRelocsInstruction());
163}
164
165CheckBool AssemblyProgram::EmitElfRelocationInstruction() {
166  return Emit(new(std::nothrow) ElfRelocsInstruction());
167}
168
169CheckBool AssemblyProgram::EmitElfARMRelocationInstruction() {
170  return Emit(new(std::nothrow) ElfARMRelocsInstruction());
171}
172
173CheckBool AssemblyProgram::EmitOriginInstruction(RVA rva) {
174  return Emit(new(std::nothrow) OriginInstruction(rva));
175}
176
177CheckBool AssemblyProgram::EmitByteInstruction(uint8 byte) {
178  return Emit(GetByteInstruction(byte));
179}
180
181CheckBool AssemblyProgram::EmitBytesInstruction(const uint8* values,
182                                                uint32 len) {
183  return Emit(new(std::nothrow) BytesInstruction(values, len));
184}
185
186CheckBool AssemblyProgram::EmitRel32(Label* label) {
187  return Emit(new(std::nothrow) InstructionWithLabel(REL32, label));
188}
189
190CheckBool AssemblyProgram::EmitRel32ARM(uint16 op, Label* label,
191                                        const uint8* arm_op, uint16 op_size) {
192  return Emit(new(std::nothrow) InstructionWithLabelARM(REL32ARM, op, label,
193                                                        arm_op, op_size));
194}
195
196CheckBool AssemblyProgram::EmitAbs32(Label* label) {
197  return Emit(new(std::nothrow) InstructionWithLabel(ABS32, label));
198}
199
200Label* AssemblyProgram::FindOrMakeAbs32Label(RVA rva) {
201  return FindLabel(rva, &abs32_labels_);
202}
203
204Label* AssemblyProgram::FindOrMakeRel32Label(RVA rva) {
205  return FindLabel(rva, &rel32_labels_);
206}
207
208void AssemblyProgram::DefaultAssignIndexes() {
209  DefaultAssignIndexes(&abs32_labels_);
210  DefaultAssignIndexes(&rel32_labels_);
211}
212
213void AssemblyProgram::UnassignIndexes() {
214  UnassignIndexes(&abs32_labels_);
215  UnassignIndexes(&rel32_labels_);
216}
217
218void AssemblyProgram::AssignRemainingIndexes() {
219  AssignRemainingIndexes(&abs32_labels_);
220  AssignRemainingIndexes(&rel32_labels_);
221}
222
223Label* AssemblyProgram::InstructionAbs32Label(
224    const Instruction* instruction) const {
225  if (instruction->op() == ABS32)
226    return static_cast<const InstructionWithLabel*>(instruction)->label();
227  return NULL;
228}
229
230Label* AssemblyProgram::InstructionRel32Label(
231    const Instruction* instruction) const {
232  if (instruction->op() == REL32 || instruction->op() == REL32ARM) {
233    Label* label =
234        static_cast<const InstructionWithLabel*>(instruction)->label();
235    return label;
236  }
237  return NULL;
238}
239
240CheckBool AssemblyProgram::Emit(Instruction* instruction) {
241  if (!instruction)
242    return false;
243  bool ok = instructions_.push_back(instruction);
244  if (!ok)
245    delete instruction;
246  return ok;
247}
248
249Label* AssemblyProgram::FindLabel(RVA rva, RVAToLabel* labels) {
250  Label*& slot = (*labels)[rva];
251  if (slot == NULL) {
252    slot = new(std::nothrow) Label(rva);
253  }
254  slot->count_++;
255  return slot;
256}
257
258void AssemblyProgram::UnassignIndexes(RVAToLabel* labels) {
259  for (RVAToLabel::iterator p = labels->begin();  p != labels->end();  ++p) {
260    Label* current = p->second;
261    current->index_ = Label::kNoIndex;
262  }
263}
264
265// DefaultAssignIndexes takes a set of labels and assigns indexes in increasing
266// address order.
267//
268void AssemblyProgram::DefaultAssignIndexes(RVAToLabel* labels) {
269  int index = 0;
270  for (RVAToLabel::iterator p = labels->begin();  p != labels->end();  ++p) {
271    Label* current = p->second;
272    if (current->index_ != Label::kNoIndex)
273      NOTREACHED();
274    current->index_ = index;
275    ++index;
276  }
277}
278
279// AssignRemainingIndexes assigns indexes to any addresses (labels) that are not
280// yet assigned an index.
281//
282void AssemblyProgram::AssignRemainingIndexes(RVAToLabel* labels) {
283  // An address table compresses best when each index is associated with an
284  // address that is slight larger than the previous index.
285
286  // First see which indexes have not been used.  The 'available' vector could
287  // grow even bigger, but the number of addresses is a better starting size
288  // than empty.
289  std::vector<bool> available(labels->size(), true);
290  int used = 0;
291
292  for (RVAToLabel::iterator p = labels->begin();  p != labels->end();  ++p) {
293    int index = p->second->index_;
294    if (index != Label::kNoIndex) {
295      while (static_cast<size_t>(index) >= available.size())
296        available.push_back(true);
297      available.at(index) = false;
298      ++used;
299    }
300  }
301
302  VLOG(1) << used << " of " << labels->size() << " labels pre-assigned";
303
304  // Are there any unused labels that happen to be adjacent following a used
305  // label?
306  //
307  int fill_forward_count = 0;
308  Label* prev = 0;
309  for (RVAToLabel::iterator p = labels->begin();  p != labels->end();  ++p) {
310    Label* current = p->second;
311    if (current->index_ == Label::kNoIndex) {
312      int index = 0;
313      if (prev  &&  prev->index_ != Label::kNoIndex)
314        index = prev->index_ + 1;
315      if (index < static_cast<int>(available.size()) && available.at(index)) {
316        current->index_ = index;
317        available.at(index) = false;
318        ++fill_forward_count;
319      }
320    }
321    prev = current;
322  }
323
324  // Are there any unused labels that happen to be adjacent preceeding a used
325  // label?
326  //
327  int fill_backward_count = 0;
328  prev = 0;
329  for (RVAToLabel::reverse_iterator p = labels->rbegin();
330       p != labels->rend();
331       ++p) {
332    Label* current = p->second;
333    if (current->index_ == Label::kNoIndex) {
334      int prev_index;
335      if (prev)
336        prev_index = prev->index_;
337      else
338        prev_index = static_cast<uint32>(available.size());
339      if (prev_index != 0  &&
340          prev_index != Label::kNoIndex  &&
341          available.at(prev_index - 1)) {
342        current->index_ = prev_index - 1;
343        available.at(current->index_) = false;
344        ++fill_backward_count;
345      }
346    }
347    prev = current;
348  }
349
350  // Fill in any remaining indexes
351  int fill_infill_count = 0;
352  int index = 0;
353  for (RVAToLabel::iterator p = labels->begin();  p != labels->end();  ++p) {
354    Label* current = p->second;
355    if (current->index_ == Label::kNoIndex) {
356      while (!available.at(index)) {
357        ++index;
358      }
359      current->index_ = index;
360      available.at(index) = false;
361      ++index;
362      ++fill_infill_count;
363    }
364  }
365
366  VLOG(1) << "  fill forward " << fill_forward_count
367          << "  backward " << fill_backward_count
368          << "  infill " << fill_infill_count;
369}
370
371typedef CheckBool (EncodedProgram::*DefineLabelMethod)(int index, RVA value);
372
373#if defined(OS_WIN)
374__declspec(noinline)
375#endif
376static CheckBool DefineLabels(const RVAToLabel& labels,
377                              EncodedProgram* encoded_format,
378                              DefineLabelMethod define_label) {
379  bool ok = true;
380  for (RVAToLabel::const_iterator p = labels.begin();
381       ok && p != labels.end();
382       ++p) {
383    Label* label = p->second;
384    ok = (encoded_format->*define_label)(label->index_, label->rva_);
385  }
386  return ok;
387}
388
389EncodedProgram* AssemblyProgram::Encode() const {
390  scoped_ptr<EncodedProgram> encoded(new(std::nothrow) EncodedProgram());
391  if (!encoded.get())
392    return NULL;
393
394  encoded->set_image_base(image_base_);
395
396  if (!DefineLabels(abs32_labels_, encoded.get(),
397                    &EncodedProgram::DefineAbs32Label) ||
398      !DefineLabels(rel32_labels_, encoded.get(),
399                    &EncodedProgram::DefineRel32Label)) {
400    return NULL;
401  }
402
403  encoded->EndLabels();
404
405  for (size_t i = 0;  i < instructions_.size();  ++i) {
406    Instruction* instruction = instructions_[i];
407
408    switch (instruction->op()) {
409      case ORIGIN: {
410        OriginInstruction* org = static_cast<OriginInstruction*>(instruction);
411        if (!encoded->AddOrigin(org->origin_rva()))
412          return NULL;
413        break;
414      }
415      case DEFBYTE: {
416        uint8 b = static_cast<ByteInstruction*>(instruction)->byte_value();
417        if (!encoded->AddCopy(1, &b))
418          return NULL;
419        break;
420      }
421      case DEFBYTES: {
422        const uint8* byte_values =
423          static_cast<BytesInstruction*>(instruction)->byte_values();
424        uint32 len = static_cast<BytesInstruction*>(instruction)->len();
425
426        if (!encoded->AddCopy(len, byte_values))
427          return NULL;
428        break;
429      }
430      case REL32: {
431        Label* label = static_cast<InstructionWithLabel*>(instruction)->label();
432        if (!encoded->AddRel32(label->index_))
433          return NULL;
434        break;
435      }
436      case REL32ARM: {
437        Label* label =
438            static_cast<InstructionWithLabelARM*>(instruction)->label();
439        uint16 compressed_op =
440          static_cast<InstructionWithLabelARM*>(instruction)->
441          compressed_op();
442        if (!encoded->AddRel32ARM(compressed_op, label->index_))
443          return NULL;
444        break;
445      }
446      case ABS32: {
447        Label* label = static_cast<InstructionWithLabel*>(instruction)->label();
448        if (!encoded->AddAbs32(label->index_))
449          return NULL;
450        break;
451      }
452      case MAKEPERELOCS: {
453        if (!encoded->AddPeMakeRelocs(kind_))
454          return NULL;
455        break;
456      }
457      case MAKEELFRELOCS: {
458        if (!encoded->AddElfMakeRelocs())
459          return NULL;
460        break;
461      }
462      case MAKEELFARMRELOCS: {
463        if (!encoded->AddElfARMMakeRelocs())
464          return NULL;
465        break;
466      }
467      default: {
468        NOTREACHED() << "Unknown Insn OP kind";
469      }
470    }
471  }
472
473  return encoded.release();
474}
475
476Instruction* AssemblyProgram::GetByteInstruction(uint8 byte) {
477  if (!byte_instruction_cache_.get()) {
478    byte_instruction_cache_.reset(new(std::nothrow) Instruction*[256]);
479    if (!byte_instruction_cache_.get())
480      return NULL;
481
482    for (int i = 0; i < 256; ++i) {
483      byte_instruction_cache_[i] =
484          new(std::nothrow) ByteInstruction(static_cast<uint8>(i));
485      if (!byte_instruction_cache_[i]) {
486        for (int j = 0; j < i; ++j)
487          delete byte_instruction_cache_[j];
488        byte_instruction_cache_.reset();
489        return NULL;
490      }
491    }
492  }
493
494  return byte_instruction_cache_[byte];
495}
496
497// Chosen empirically to give the best reduction in payload size for
498// an update from daisy_3701.98.0 to daisy_4206.0.0.
499const int AssemblyProgram::kLabelLowerLimit = 5;
500
501CheckBool AssemblyProgram::TrimLabels() {
502  // For now only trim for ARM binaries
503  if (kind() != EXE_ELF_32_ARM)
504    return true;
505
506  int lower_limit = kLabelLowerLimit;
507
508  VLOG(1) << "TrimLabels: threshold " << lower_limit;
509
510  // Remove underused labels from the list of labels
511  RVAToLabel::iterator it = rel32_labels_.begin();
512  while (it != rel32_labels_.end()) {
513    if (it->second->count_ <= lower_limit) {
514      rel32_labels_.erase(it++);
515    } else {
516      ++it;
517    }
518  }
519
520  // Walk through the list of instructions, replacing trimmed labels
521  // with the original machine instruction
522  for (size_t i = 0; i < instructions_.size(); ++i) {
523    Instruction* instruction = instructions_[i];
524    switch (instruction->op()) {
525      case REL32ARM: {
526        Label* label =
527            static_cast<InstructionWithLabelARM*>(instruction)->label();
528        if (label->count_ <= lower_limit) {
529          const uint8* arm_op =
530              static_cast<InstructionWithLabelARM*>(instruction)->arm_op();
531          uint16 op_size =
532              static_cast<InstructionWithLabelARM*>(instruction)->op_size();
533
534          if (op_size < 1)
535            return false;
536          BytesInstruction* new_instruction =
537            new(std::nothrow) BytesInstruction(arm_op, op_size);
538          instructions_[i] = new_instruction;
539        }
540        break;
541      }
542      default:
543        break;
544    }
545  }
546
547  return true;
548}
549
550void AssemblyProgram::PrintLabelCounts(RVAToLabel* labels) {
551  for (RVAToLabel::const_iterator p = labels->begin(); p != labels->end();
552       ++p) {
553    Label* current = p->second;
554    if (current->index_ != Label::kNoIndex)
555      printf("%d\n", current->count_);
556  }
557}
558
559void AssemblyProgram::CountRel32ARM() {
560  PrintLabelCounts(&rel32_labels_);
561}
562
563////////////////////////////////////////////////////////////////////////////////
564
565Status TrimLabels(AssemblyProgram* program) {
566  if (program->TrimLabels())
567    return C_OK;
568  else
569    return C_TRIM_FAILED;
570}
571
572Status Encode(AssemblyProgram* program, EncodedProgram** output) {
573  *output = NULL;
574  EncodedProgram *encoded = program->Encode();
575  if (encoded) {
576    *output = encoded;
577    return C_OK;
578  } else {
579    return C_GENERAL_ERROR;
580  }
581}
582
583}  // namespace courgette
584