assembler_test.h revision f0d30ed8fcffc839ceb91359a547c0efb7641d53
1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_COMPILER_UTILS_ASSEMBLER_TEST_H_
18#define ART_COMPILER_UTILS_ASSEMBLER_TEST_H_
19
20#include "assembler.h"
21
22#include "common_runtime_test.h"  // For ScratchFile
23
24#include <cstdio>
25#include <cstdlib>
26#include <fstream>
27#include <iostream>
28#include <iterator>
29#include <sys/stat.h>
30
31namespace art {
32
33// Use a glocal static variable to keep the same name for all test data. Else we'll just spam the
34// temp directory.
35static std::string tmpnam_;
36
37template<typename Ass, typename Reg, typename Imm>
38class AssemblerTest : public testing::Test {
39 public:
40  Ass* GetAssembler() {
41    return assembler_.get();
42  }
43
44  typedef std::string (*TestFn)(Ass* assembler);
45
46  void DriverFn(TestFn f, std::string test_name) {
47    Driver(f(assembler_.get()), test_name);
48  }
49
50  // This driver assumes the assembler has already been called.
51  void DriverStr(std::string assembly_string, std::string test_name) {
52    Driver(assembly_string, test_name);
53  }
54
55  std::string RepeatR(void (Ass::*f)(Reg), std::string fmt) {
56    const std::vector<Reg*> registers = GetRegisters();
57    std::string str;
58    for (auto reg : registers) {
59      (assembler_.get()->*f)(*reg);
60      std::string base = fmt;
61
62      size_t reg_index = base.find("{reg}");
63      if (reg_index != std::string::npos) {
64        std::ostringstream sreg;
65        sreg << *reg;
66        std::string reg_string = sreg.str();
67        base.replace(reg_index, 5, reg_string);
68      }
69
70      if (str.size() > 0) {
71        str += "\n";
72      }
73      str += base;
74    }
75    // Add a newline at the end.
76    str += "\n";
77    return str;
78  }
79
80  std::string RepeatRR(void (Ass::*f)(Reg, Reg), std::string fmt) {
81    const std::vector<Reg*> registers = GetRegisters();
82    std::string str;
83    for (auto reg1 : registers) {
84      for (auto reg2 : registers) {
85        (assembler_.get()->*f)(*reg1, *reg2);
86        std::string base = fmt;
87
88        size_t reg1_index = base.find("{reg1}");
89        if (reg1_index != std::string::npos) {
90          std::ostringstream sreg;
91          sreg << *reg1;
92          std::string reg_string = sreg.str();
93          base.replace(reg1_index, 6, reg_string);
94        }
95
96        size_t reg2_index = base.find("{reg2}");
97        if (reg2_index != std::string::npos) {
98          std::ostringstream sreg;
99          sreg << *reg2;
100          std::string reg_string = sreg.str();
101          base.replace(reg2_index, 6, reg_string);
102        }
103
104        if (str.size() > 0) {
105          str += "\n";
106        }
107        str += base;
108      }
109    }
110    // Add a newline at the end.
111    str += "\n";
112    return str;
113  }
114
115  std::string RepeatRI(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes, std::string fmt) {
116    const std::vector<Reg*> registers = GetRegisters();
117    std::string str;
118    std::vector<int64_t> imms = CreateImmediateValues(imm_bytes);
119    for (auto reg : registers) {
120      for (int64_t imm : imms) {
121        Imm* new_imm = CreateImmediate(imm);
122        (assembler_.get()->*f)(*reg, *new_imm);
123        delete new_imm;
124        std::string base = fmt;
125
126        size_t reg_index = base.find("{reg}");
127        if (reg_index != std::string::npos) {
128          std::ostringstream sreg;
129          sreg << *reg;
130          std::string reg_string = sreg.str();
131          base.replace(reg_index, 5, reg_string);
132        }
133
134        size_t imm_index = base.find("{imm}");
135        if (imm_index != std::string::npos) {
136          std::ostringstream sreg;
137          sreg << imm;
138          std::string imm_string = sreg.str();
139          base.replace(imm_index, 5, imm_string);
140        }
141
142        if (str.size() > 0) {
143          str += "\n";
144        }
145        str += base;
146      }
147    }
148    // Add a newline at the end.
149    str += "\n";
150    return str;
151  }
152
153  std::string RepeatI(void (Ass::*f)(const Imm&), size_t imm_bytes, std::string fmt) {
154    std::string str;
155    std::vector<int64_t> imms = CreateImmediateValues(imm_bytes);
156    for (int64_t imm : imms) {
157      Imm* new_imm = CreateImmediate(imm);
158      (assembler_.get()->*f)(*new_imm);
159      delete new_imm;
160      std::string base = fmt;
161
162      size_t imm_index = base.find("{imm}");
163      if (imm_index != std::string::npos) {
164        std::ostringstream sreg;
165        sreg << imm;
166        std::string imm_string = sreg.str();
167        base.replace(imm_index, 5, imm_string);
168      }
169
170      if (str.size() > 0) {
171        str += "\n";
172      }
173      str += base;
174    }
175    // Add a newline at the end.
176    str += "\n";
177    return str;
178  }
179
180  // This is intended to be run as a test.
181  bool CheckTools() {
182    if (!FileExists(GetAssemblerCommand())) {
183      return false;
184    }
185    LOG(INFO) << "Chosen assembler command: " << GetAssemblerCommand();
186
187    if (!FileExists(GetObjdumpCommand())) {
188      return false;
189    }
190    LOG(INFO) << "Chosen objdump command: " << GetObjdumpCommand();
191
192    // Disassembly is optional.
193    std::string disassembler = GetDisassembleCommand();
194    if (disassembler.length() != 0) {
195      if (!FileExists(disassembler)) {
196        return false;
197      }
198      LOG(INFO) << "Chosen disassemble command: " << GetDisassembleCommand();
199    } else {
200      LOG(INFO) << "No disassembler given.";
201    }
202
203    return true;
204  }
205
206 protected:
207  void SetUp() OVERRIDE {
208    assembler_.reset(new Ass());
209
210    // Fake a runtime test for ScratchFile
211    std::string android_data;
212    CommonRuntimeTest::SetEnvironmentVariables(android_data);
213
214    SetUpHelpers();
215  }
216
217  // Override this to set up any architecture-specific things, e.g., register vectors.
218  virtual void SetUpHelpers() {}
219
220  virtual std::vector<Reg*> GetRegisters() = 0;
221
222  // Get the typically used name for this architecture, e.g., aarch64, x86_64, ...
223  virtual std::string GetArchitectureString() = 0;
224
225  // Get the name of the assembler, e.g., "as" by default.
226  virtual std::string GetAssemblerCmdName() {
227    return "as";
228  }
229
230  // Switches to the assembler command. Default none.
231  virtual std::string GetAssemblerParameters() {
232    return "";
233  }
234
235  // Return the host assembler command for this test.
236  virtual std::string GetAssemblerCommand() {
237    // Already resolved it once?
238    if (resolved_assembler_cmd_.length() != 0) {
239      return resolved_assembler_cmd_;
240    }
241
242    std::string line = FindTool(GetAssemblerCmdName());
243    if (line.length() == 0) {
244      return line;
245    }
246
247    resolved_assembler_cmd_ = line + GetAssemblerParameters();
248
249    return line;
250  }
251
252  // Get the name of the objdump, e.g., "objdump" by default.
253  virtual std::string GetObjdumpCmdName() {
254    return "objdump";
255  }
256
257  // Switches to the objdump command. Default is " -h".
258  virtual std::string GetObjdumpParameters() {
259    return " -h";
260  }
261
262  // Return the host objdump command for this test.
263  virtual std::string GetObjdumpCommand() {
264    // Already resolved it once?
265    if (resolved_objdump_cmd_.length() != 0) {
266      return resolved_objdump_cmd_;
267    }
268
269    std::string line = FindTool(GetObjdumpCmdName());
270    if (line.length() == 0) {
271      return line;
272    }
273
274    resolved_objdump_cmd_ = line + GetObjdumpParameters();
275
276    return line;
277  }
278
279  // Get the name of the objdump, e.g., "objdump" by default.
280  virtual std::string GetDisassembleCmdName() {
281    return "objdump";
282  }
283
284  // Switches to the objdump command. As it's a binary, one needs to push the architecture and
285  // such to objdump, so it's architecture-specific and there is no default.
286  virtual std::string GetDisassembleParameters() = 0;
287
288  // Return the host disassembler command for this test.
289  virtual std::string GetDisassembleCommand() {
290    // Already resolved it once?
291    if (resolved_disassemble_cmd_.length() != 0) {
292      return resolved_disassemble_cmd_;
293    }
294
295    std::string line = FindTool(GetDisassembleCmdName());
296    if (line.length() == 0) {
297      return line;
298    }
299
300    resolved_disassemble_cmd_ = line + GetDisassembleParameters();
301
302    return line;
303  }
304
305  // Create a couple of immediate values up to the number of bytes given.
306  virtual std::vector<int64_t> CreateImmediateValues(size_t imm_bytes) {
307    std::vector<int64_t> res;
308    res.push_back(0);
309    res.push_back(-1);
310    res.push_back(0x12);
311    if (imm_bytes >= 2) {
312      res.push_back(0x1234);
313      res.push_back(-0x1234);
314      if (imm_bytes >= 4) {
315        res.push_back(0x12345678);
316        res.push_back(-0x12345678);
317        if (imm_bytes >= 6) {
318          res.push_back(0x123456789ABC);
319          res.push_back(-0x123456789ABC);
320          if (imm_bytes >= 8) {
321            res.push_back(0x123456789ABCDEF0);
322            res.push_back(-0x123456789ABCDEF0);
323          }
324        }
325      }
326    }
327    return res;
328  }
329
330  // Create an immediate from the specific value.
331  virtual Imm* CreateImmediate(int64_t imm_value) = 0;
332
333 private:
334  // Driver() assembles and compares the results. If the results are not equal and we have a
335  // disassembler, disassemble both and check whether they have the same mnemonics (in which case
336  // we just warn).
337  void Driver(std::string assembly_text, std::string test_name) {
338    EXPECT_NE(assembly_text.length(), 0U) << "Empty assembly";
339
340    NativeAssemblerResult res;
341    Compile(assembly_text, &res, test_name);
342
343    EXPECT_TRUE(res.ok) << res.error_msg;
344    if (!res.ok) {
345      // No way of continuing.
346      return;
347    }
348
349    size_t cs = assembler_->CodeSize();
350    std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs));
351    MemoryRegion code(&(*data)[0], data->size());
352    assembler_->FinalizeInstructions(code);
353
354    if (*data == *res.code) {
355      Clean(&res);
356    } else {
357      if (DisassembleBinaries(*data, *res.code, test_name)) {
358        if (data->size() > res.code->size()) {
359          // Fail this test with a fancy colored warning being printed.
360          EXPECT_TRUE(false) << "Assembly code is not identical, but disassembly of machine code "
361              "is equal: this implies sub-optimal encoding! Our code size=" << data->size() <<
362              ", gcc size=" << res.code->size();
363        } else {
364          // Otherwise just print an info message and clean up.
365          LOG(INFO) << "GCC chose a different encoding than ours, but the overall length is the "
366              "same.";
367          Clean(&res);
368        }
369      } else {
370        // This will output the assembly.
371        EXPECT_EQ(*data, *res.code) << "Outputs (and disassembly) not identical.";
372      }
373    }
374  }
375
376  // Structure to store intermediates and results.
377  struct NativeAssemblerResult {
378    bool ok;
379    std::string error_msg;
380    std::string base_name;
381    std::unique_ptr<std::vector<uint8_t>> code;
382    uintptr_t length;
383  };
384
385  // Compile the assembly file from_file to a binary file to_file. Returns true on success.
386  bool Assemble(const char* from_file, const char* to_file, std::string* error_msg) {
387    bool have_assembler = FileExists(GetAssemblerCommand());
388    EXPECT_TRUE(have_assembler) << "Cannot find assembler:" << GetAssemblerCommand();
389    if (!have_assembler) {
390      return false;
391    }
392
393    std::vector<std::string> args;
394
395    args.push_back(GetAssemblerCommand());
396    args.push_back("-o");
397    args.push_back(to_file);
398    args.push_back(from_file);
399
400    return Exec(args, error_msg);
401  }
402
403  // Runs objdump -h on the binary file and extracts the first line with .text.
404  // Returns "" on failure.
405  std::string Objdump(std::string file) {
406    bool have_objdump = FileExists(GetObjdumpCommand());
407    EXPECT_TRUE(have_objdump) << "Cannot find objdump: " << GetObjdumpCommand();
408    if (!have_objdump) {
409      return "";
410    }
411
412    std::string error_msg;
413    std::vector<std::string> args;
414
415    args.push_back(GetObjdumpCommand());
416    args.push_back(file);
417    args.push_back(">");
418    args.push_back(file+".dump");
419    std::string cmd = Join(args, ' ');
420
421    args.clear();
422    args.push_back("/bin/sh");
423    args.push_back("-c");
424    args.push_back(cmd);
425
426    if (!Exec(args, &error_msg)) {
427      EXPECT_TRUE(false) << error_msg;
428    }
429
430    std::ifstream dump(file+".dump");
431
432    std::string line;
433    bool found = false;
434    while (std::getline(dump, line)) {
435      if (line.find(".text") != line.npos) {
436        found = true;
437        break;
438      }
439    }
440
441    dump.close();
442
443    if (found) {
444      return line;
445    } else {
446      return "";
447    }
448  }
449
450  // Disassemble both binaries and compare the text.
451  bool DisassembleBinaries(std::vector<uint8_t>& data, std::vector<uint8_t>& as,
452                           std::string test_name) {
453    std::string disassembler = GetDisassembleCommand();
454    if (disassembler.length() == 0) {
455      LOG(WARNING) << "No dissassembler command.";
456      return false;
457    }
458
459    std::string data_name = WriteToFile(data, test_name + ".ass");
460    std::string error_msg;
461    if (!DisassembleBinary(data_name, &error_msg)) {
462      LOG(INFO) << "Error disassembling: " << error_msg;
463      std::remove(data_name.c_str());
464      return false;
465    }
466
467    std::string as_name = WriteToFile(as, test_name + ".gcc");
468    if (!DisassembleBinary(as_name, &error_msg)) {
469      LOG(INFO) << "Error disassembling: " << error_msg;
470      std::remove(data_name.c_str());
471      std::remove((data_name + ".dis").c_str());
472      std::remove(as_name.c_str());
473      return false;
474    }
475
476    bool result = CompareFiles(data_name + ".dis", as_name + ".dis");
477
478    if (result) {
479      std::remove(data_name.c_str());
480      std::remove(as_name.c_str());
481      std::remove((data_name + ".dis").c_str());
482      std::remove((as_name + ".dis").c_str());
483    }
484
485    return result;
486  }
487
488  bool DisassembleBinary(std::string file, std::string* error_msg) {
489    std::vector<std::string> args;
490
491    args.push_back(GetDisassembleCommand());
492    args.push_back(file);
493    args.push_back("| sed -n \'/<.data>/,$p\' | sed -e \'s/.*://\'");
494    args.push_back(">");
495    args.push_back(file+".dis");
496    std::string cmd = Join(args, ' ');
497
498    args.clear();
499    args.push_back("/bin/sh");
500    args.push_back("-c");
501    args.push_back(cmd);
502
503    return Exec(args, error_msg);
504  }
505
506  std::string WriteToFile(std::vector<uint8_t>& buffer, std::string test_name) {
507    std::string file_name = GetTmpnam() + std::string("---") + test_name;
508    const char* data = reinterpret_cast<char*>(buffer.data());
509    std::ofstream s_out(file_name + ".o");
510    s_out.write(data, buffer.size());
511    s_out.close();
512    return file_name + ".o";
513  }
514
515  bool CompareFiles(std::string f1, std::string f2) {
516    std::ifstream f1_in(f1);
517    std::ifstream f2_in(f2);
518
519    bool result = std::equal(std::istreambuf_iterator<char>(f1_in),
520                             std::istreambuf_iterator<char>(),
521                             std::istreambuf_iterator<char>(f2_in));
522
523    f1_in.close();
524    f2_in.close();
525
526    return result;
527  }
528
529  // Compile the given assembly code and extract the binary, if possible. Put result into res.
530  bool Compile(std::string assembly_code, NativeAssemblerResult* res, std::string test_name) {
531    res->ok = false;
532    res->code.reset(nullptr);
533
534    res->base_name = GetTmpnam() + std::string("---") + test_name;
535
536    // TODO: Lots of error checking.
537
538    std::ofstream s_out(res->base_name + ".S");
539    s_out << assembly_code;
540    s_out.close();
541
542    if (!Assemble((res->base_name + ".S").c_str(), (res->base_name + ".o").c_str(),
543                  &res->error_msg)) {
544      res->error_msg = "Could not compile.";
545      return false;
546    }
547
548    std::string odump = Objdump(res->base_name + ".o");
549    if (odump.length() == 0) {
550      res->error_msg = "Objdump failed.";
551      return false;
552    }
553
554    std::istringstream iss(odump);
555    std::istream_iterator<std::string> start(iss);
556    std::istream_iterator<std::string> end;
557    std::vector<std::string> tokens(start, end);
558
559    if (tokens.size() < OBJDUMP_SECTION_LINE_MIN_TOKENS) {
560      res->error_msg = "Objdump output not recognized: too few tokens.";
561      return false;
562    }
563
564    if (tokens[1] != ".text") {
565      res->error_msg = "Objdump output not recognized: .text not second token.";
566      return false;
567    }
568
569    std::string lengthToken = "0x" + tokens[2];
570    std::istringstream(lengthToken) >> std::hex >> res->length;
571
572    std::string offsetToken = "0x" + tokens[5];
573    uintptr_t offset;
574    std::istringstream(offsetToken) >> std::hex >> offset;
575
576    std::ifstream obj(res->base_name + ".o");
577    obj.seekg(offset);
578    res->code.reset(new std::vector<uint8_t>(res->length));
579    obj.read(reinterpret_cast<char*>(&(*res->code)[0]), res->length);
580    obj.close();
581
582    res->ok = true;
583    return true;
584  }
585
586  // Remove temporary files.
587  void Clean(const NativeAssemblerResult* res) {
588    std::remove((res->base_name + ".S").c_str());
589    std::remove((res->base_name + ".o").c_str());
590    std::remove((res->base_name + ".o.dump").c_str());
591  }
592
593  // Check whether file exists. Is used for commands, so strips off any parameters: anything after
594  // the first space. We skip to the last slash for this, so it should work with directories with
595  // spaces.
596  static bool FileExists(std::string file) {
597    if (file.length() == 0) {
598      return false;
599    }
600
601    // Need to strip any options.
602    size_t last_slash = file.find_last_of('/');
603    if (last_slash == std::string::npos) {
604      // No slash, start looking at the start.
605      last_slash = 0;
606    }
607    size_t space_index = file.find(' ', last_slash);
608
609    if (space_index == std::string::npos) {
610      std::ifstream infile(file.c_str());
611      return infile.good();
612    } else {
613      std::string copy = file.substr(0, space_index - 1);
614
615      struct stat buf;
616      return stat(copy.c_str(), &buf) == 0;
617    }
618  }
619
620  static std::string GetGCCRootPath() {
621    return "prebuilts/gcc/linux-x86";
622  }
623
624  static std::string GetRootPath() {
625    // 1) Check ANDROID_BUILD_TOP
626    char* build_top = getenv("ANDROID_BUILD_TOP");
627    if (build_top != nullptr) {
628      return std::string(build_top) + "/";
629    }
630
631    // 2) Do cwd
632    char temp[1024];
633    return getcwd(temp, 1024) ? std::string(temp) + "/" : std::string("");
634  }
635
636  std::string FindTool(std::string tool_name) {
637    // Find the current tool. Wild-card pattern is "arch-string*tool-name".
638    std::string gcc_path = GetRootPath() + GetGCCRootPath();
639    std::vector<std::string> args;
640    args.push_back("find");
641    args.push_back(gcc_path);
642    args.push_back("-name");
643    args.push_back(GetArchitectureString() + "*" + tool_name);
644    args.push_back("|");
645    args.push_back("sort");
646    args.push_back("|");
647    args.push_back("tail");
648    args.push_back("-n");
649    args.push_back("1");
650    std::string tmp_file = GetTmpnam();
651    args.push_back(">");
652    args.push_back(tmp_file);
653    std::string sh_args = Join(args, ' ');
654
655    args.clear();
656    args.push_back("/bin/sh");
657    args.push_back("-c");
658    args.push_back(sh_args);
659
660    std::string error_msg;
661    if (!Exec(args, &error_msg)) {
662      EXPECT_TRUE(false) << error_msg;
663      return "";
664    }
665
666    std::ifstream in(tmp_file.c_str());
667    std::string line;
668    if (!std::getline(in, line)) {
669      in.close();
670      std::remove(tmp_file.c_str());
671      return "";
672    }
673    in.close();
674    std::remove(tmp_file.c_str());
675    return line;
676  }
677
678  // Use a consistent tmpnam, so store it.
679  std::string GetTmpnam() {
680    if (tmpnam_.length() == 0) {
681      ScratchFile tmp;
682      tmpnam_ = tmp.GetFilename() + "asm";
683    }
684    return tmpnam_;
685  }
686
687  std::unique_ptr<Ass> assembler_;
688
689  std::string resolved_assembler_cmd_;
690  std::string resolved_objdump_cmd_;
691  std::string resolved_disassemble_cmd_;
692
693  static constexpr size_t OBJDUMP_SECTION_LINE_MIN_TOKENS = 6;
694};
695
696}  // namespace art
697
698#endif  // ART_COMPILER_UTILS_ASSEMBLER_TEST_H_
699