assembler_test.h revision f0d30ed8fcffc839ceb91359a547c0efb7641d53
1/* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#ifndef ART_COMPILER_UTILS_ASSEMBLER_TEST_H_ 18#define ART_COMPILER_UTILS_ASSEMBLER_TEST_H_ 19 20#include "assembler.h" 21 22#include "common_runtime_test.h" // For ScratchFile 23 24#include <cstdio> 25#include <cstdlib> 26#include <fstream> 27#include <iostream> 28#include <iterator> 29#include <sys/stat.h> 30 31namespace art { 32 33// Use a glocal static variable to keep the same name for all test data. Else we'll just spam the 34// temp directory. 35static std::string tmpnam_; 36 37template<typename Ass, typename Reg, typename Imm> 38class AssemblerTest : public testing::Test { 39 public: 40 Ass* GetAssembler() { 41 return assembler_.get(); 42 } 43 44 typedef std::string (*TestFn)(Ass* assembler); 45 46 void DriverFn(TestFn f, std::string test_name) { 47 Driver(f(assembler_.get()), test_name); 48 } 49 50 // This driver assumes the assembler has already been called. 51 void DriverStr(std::string assembly_string, std::string test_name) { 52 Driver(assembly_string, test_name); 53 } 54 55 std::string RepeatR(void (Ass::*f)(Reg), std::string fmt) { 56 const std::vector<Reg*> registers = GetRegisters(); 57 std::string str; 58 for (auto reg : registers) { 59 (assembler_.get()->*f)(*reg); 60 std::string base = fmt; 61 62 size_t reg_index = base.find("{reg}"); 63 if (reg_index != std::string::npos) { 64 std::ostringstream sreg; 65 sreg << *reg; 66 std::string reg_string = sreg.str(); 67 base.replace(reg_index, 5, reg_string); 68 } 69 70 if (str.size() > 0) { 71 str += "\n"; 72 } 73 str += base; 74 } 75 // Add a newline at the end. 76 str += "\n"; 77 return str; 78 } 79 80 std::string RepeatRR(void (Ass::*f)(Reg, Reg), std::string fmt) { 81 const std::vector<Reg*> registers = GetRegisters(); 82 std::string str; 83 for (auto reg1 : registers) { 84 for (auto reg2 : registers) { 85 (assembler_.get()->*f)(*reg1, *reg2); 86 std::string base = fmt; 87 88 size_t reg1_index = base.find("{reg1}"); 89 if (reg1_index != std::string::npos) { 90 std::ostringstream sreg; 91 sreg << *reg1; 92 std::string reg_string = sreg.str(); 93 base.replace(reg1_index, 6, reg_string); 94 } 95 96 size_t reg2_index = base.find("{reg2}"); 97 if (reg2_index != std::string::npos) { 98 std::ostringstream sreg; 99 sreg << *reg2; 100 std::string reg_string = sreg.str(); 101 base.replace(reg2_index, 6, reg_string); 102 } 103 104 if (str.size() > 0) { 105 str += "\n"; 106 } 107 str += base; 108 } 109 } 110 // Add a newline at the end. 111 str += "\n"; 112 return str; 113 } 114 115 std::string RepeatRI(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes, std::string fmt) { 116 const std::vector<Reg*> registers = GetRegisters(); 117 std::string str; 118 std::vector<int64_t> imms = CreateImmediateValues(imm_bytes); 119 for (auto reg : registers) { 120 for (int64_t imm : imms) { 121 Imm* new_imm = CreateImmediate(imm); 122 (assembler_.get()->*f)(*reg, *new_imm); 123 delete new_imm; 124 std::string base = fmt; 125 126 size_t reg_index = base.find("{reg}"); 127 if (reg_index != std::string::npos) { 128 std::ostringstream sreg; 129 sreg << *reg; 130 std::string reg_string = sreg.str(); 131 base.replace(reg_index, 5, reg_string); 132 } 133 134 size_t imm_index = base.find("{imm}"); 135 if (imm_index != std::string::npos) { 136 std::ostringstream sreg; 137 sreg << imm; 138 std::string imm_string = sreg.str(); 139 base.replace(imm_index, 5, imm_string); 140 } 141 142 if (str.size() > 0) { 143 str += "\n"; 144 } 145 str += base; 146 } 147 } 148 // Add a newline at the end. 149 str += "\n"; 150 return str; 151 } 152 153 std::string RepeatI(void (Ass::*f)(const Imm&), size_t imm_bytes, std::string fmt) { 154 std::string str; 155 std::vector<int64_t> imms = CreateImmediateValues(imm_bytes); 156 for (int64_t imm : imms) { 157 Imm* new_imm = CreateImmediate(imm); 158 (assembler_.get()->*f)(*new_imm); 159 delete new_imm; 160 std::string base = fmt; 161 162 size_t imm_index = base.find("{imm}"); 163 if (imm_index != std::string::npos) { 164 std::ostringstream sreg; 165 sreg << imm; 166 std::string imm_string = sreg.str(); 167 base.replace(imm_index, 5, imm_string); 168 } 169 170 if (str.size() > 0) { 171 str += "\n"; 172 } 173 str += base; 174 } 175 // Add a newline at the end. 176 str += "\n"; 177 return str; 178 } 179 180 // This is intended to be run as a test. 181 bool CheckTools() { 182 if (!FileExists(GetAssemblerCommand())) { 183 return false; 184 } 185 LOG(INFO) << "Chosen assembler command: " << GetAssemblerCommand(); 186 187 if (!FileExists(GetObjdumpCommand())) { 188 return false; 189 } 190 LOG(INFO) << "Chosen objdump command: " << GetObjdumpCommand(); 191 192 // Disassembly is optional. 193 std::string disassembler = GetDisassembleCommand(); 194 if (disassembler.length() != 0) { 195 if (!FileExists(disassembler)) { 196 return false; 197 } 198 LOG(INFO) << "Chosen disassemble command: " << GetDisassembleCommand(); 199 } else { 200 LOG(INFO) << "No disassembler given."; 201 } 202 203 return true; 204 } 205 206 protected: 207 void SetUp() OVERRIDE { 208 assembler_.reset(new Ass()); 209 210 // Fake a runtime test for ScratchFile 211 std::string android_data; 212 CommonRuntimeTest::SetEnvironmentVariables(android_data); 213 214 SetUpHelpers(); 215 } 216 217 // Override this to set up any architecture-specific things, e.g., register vectors. 218 virtual void SetUpHelpers() {} 219 220 virtual std::vector<Reg*> GetRegisters() = 0; 221 222 // Get the typically used name for this architecture, e.g., aarch64, x86_64, ... 223 virtual std::string GetArchitectureString() = 0; 224 225 // Get the name of the assembler, e.g., "as" by default. 226 virtual std::string GetAssemblerCmdName() { 227 return "as"; 228 } 229 230 // Switches to the assembler command. Default none. 231 virtual std::string GetAssemblerParameters() { 232 return ""; 233 } 234 235 // Return the host assembler command for this test. 236 virtual std::string GetAssemblerCommand() { 237 // Already resolved it once? 238 if (resolved_assembler_cmd_.length() != 0) { 239 return resolved_assembler_cmd_; 240 } 241 242 std::string line = FindTool(GetAssemblerCmdName()); 243 if (line.length() == 0) { 244 return line; 245 } 246 247 resolved_assembler_cmd_ = line + GetAssemblerParameters(); 248 249 return line; 250 } 251 252 // Get the name of the objdump, e.g., "objdump" by default. 253 virtual std::string GetObjdumpCmdName() { 254 return "objdump"; 255 } 256 257 // Switches to the objdump command. Default is " -h". 258 virtual std::string GetObjdumpParameters() { 259 return " -h"; 260 } 261 262 // Return the host objdump command for this test. 263 virtual std::string GetObjdumpCommand() { 264 // Already resolved it once? 265 if (resolved_objdump_cmd_.length() != 0) { 266 return resolved_objdump_cmd_; 267 } 268 269 std::string line = FindTool(GetObjdumpCmdName()); 270 if (line.length() == 0) { 271 return line; 272 } 273 274 resolved_objdump_cmd_ = line + GetObjdumpParameters(); 275 276 return line; 277 } 278 279 // Get the name of the objdump, e.g., "objdump" by default. 280 virtual std::string GetDisassembleCmdName() { 281 return "objdump"; 282 } 283 284 // Switches to the objdump command. As it's a binary, one needs to push the architecture and 285 // such to objdump, so it's architecture-specific and there is no default. 286 virtual std::string GetDisassembleParameters() = 0; 287 288 // Return the host disassembler command for this test. 289 virtual std::string GetDisassembleCommand() { 290 // Already resolved it once? 291 if (resolved_disassemble_cmd_.length() != 0) { 292 return resolved_disassemble_cmd_; 293 } 294 295 std::string line = FindTool(GetDisassembleCmdName()); 296 if (line.length() == 0) { 297 return line; 298 } 299 300 resolved_disassemble_cmd_ = line + GetDisassembleParameters(); 301 302 return line; 303 } 304 305 // Create a couple of immediate values up to the number of bytes given. 306 virtual std::vector<int64_t> CreateImmediateValues(size_t imm_bytes) { 307 std::vector<int64_t> res; 308 res.push_back(0); 309 res.push_back(-1); 310 res.push_back(0x12); 311 if (imm_bytes >= 2) { 312 res.push_back(0x1234); 313 res.push_back(-0x1234); 314 if (imm_bytes >= 4) { 315 res.push_back(0x12345678); 316 res.push_back(-0x12345678); 317 if (imm_bytes >= 6) { 318 res.push_back(0x123456789ABC); 319 res.push_back(-0x123456789ABC); 320 if (imm_bytes >= 8) { 321 res.push_back(0x123456789ABCDEF0); 322 res.push_back(-0x123456789ABCDEF0); 323 } 324 } 325 } 326 } 327 return res; 328 } 329 330 // Create an immediate from the specific value. 331 virtual Imm* CreateImmediate(int64_t imm_value) = 0; 332 333 private: 334 // Driver() assembles and compares the results. If the results are not equal and we have a 335 // disassembler, disassemble both and check whether they have the same mnemonics (in which case 336 // we just warn). 337 void Driver(std::string assembly_text, std::string test_name) { 338 EXPECT_NE(assembly_text.length(), 0U) << "Empty assembly"; 339 340 NativeAssemblerResult res; 341 Compile(assembly_text, &res, test_name); 342 343 EXPECT_TRUE(res.ok) << res.error_msg; 344 if (!res.ok) { 345 // No way of continuing. 346 return; 347 } 348 349 size_t cs = assembler_->CodeSize(); 350 std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs)); 351 MemoryRegion code(&(*data)[0], data->size()); 352 assembler_->FinalizeInstructions(code); 353 354 if (*data == *res.code) { 355 Clean(&res); 356 } else { 357 if (DisassembleBinaries(*data, *res.code, test_name)) { 358 if (data->size() > res.code->size()) { 359 // Fail this test with a fancy colored warning being printed. 360 EXPECT_TRUE(false) << "Assembly code is not identical, but disassembly of machine code " 361 "is equal: this implies sub-optimal encoding! Our code size=" << data->size() << 362 ", gcc size=" << res.code->size(); 363 } else { 364 // Otherwise just print an info message and clean up. 365 LOG(INFO) << "GCC chose a different encoding than ours, but the overall length is the " 366 "same."; 367 Clean(&res); 368 } 369 } else { 370 // This will output the assembly. 371 EXPECT_EQ(*data, *res.code) << "Outputs (and disassembly) not identical."; 372 } 373 } 374 } 375 376 // Structure to store intermediates and results. 377 struct NativeAssemblerResult { 378 bool ok; 379 std::string error_msg; 380 std::string base_name; 381 std::unique_ptr<std::vector<uint8_t>> code; 382 uintptr_t length; 383 }; 384 385 // Compile the assembly file from_file to a binary file to_file. Returns true on success. 386 bool Assemble(const char* from_file, const char* to_file, std::string* error_msg) { 387 bool have_assembler = FileExists(GetAssemblerCommand()); 388 EXPECT_TRUE(have_assembler) << "Cannot find assembler:" << GetAssemblerCommand(); 389 if (!have_assembler) { 390 return false; 391 } 392 393 std::vector<std::string> args; 394 395 args.push_back(GetAssemblerCommand()); 396 args.push_back("-o"); 397 args.push_back(to_file); 398 args.push_back(from_file); 399 400 return Exec(args, error_msg); 401 } 402 403 // Runs objdump -h on the binary file and extracts the first line with .text. 404 // Returns "" on failure. 405 std::string Objdump(std::string file) { 406 bool have_objdump = FileExists(GetObjdumpCommand()); 407 EXPECT_TRUE(have_objdump) << "Cannot find objdump: " << GetObjdumpCommand(); 408 if (!have_objdump) { 409 return ""; 410 } 411 412 std::string error_msg; 413 std::vector<std::string> args; 414 415 args.push_back(GetObjdumpCommand()); 416 args.push_back(file); 417 args.push_back(">"); 418 args.push_back(file+".dump"); 419 std::string cmd = Join(args, ' '); 420 421 args.clear(); 422 args.push_back("/bin/sh"); 423 args.push_back("-c"); 424 args.push_back(cmd); 425 426 if (!Exec(args, &error_msg)) { 427 EXPECT_TRUE(false) << error_msg; 428 } 429 430 std::ifstream dump(file+".dump"); 431 432 std::string line; 433 bool found = false; 434 while (std::getline(dump, line)) { 435 if (line.find(".text") != line.npos) { 436 found = true; 437 break; 438 } 439 } 440 441 dump.close(); 442 443 if (found) { 444 return line; 445 } else { 446 return ""; 447 } 448 } 449 450 // Disassemble both binaries and compare the text. 451 bool DisassembleBinaries(std::vector<uint8_t>& data, std::vector<uint8_t>& as, 452 std::string test_name) { 453 std::string disassembler = GetDisassembleCommand(); 454 if (disassembler.length() == 0) { 455 LOG(WARNING) << "No dissassembler command."; 456 return false; 457 } 458 459 std::string data_name = WriteToFile(data, test_name + ".ass"); 460 std::string error_msg; 461 if (!DisassembleBinary(data_name, &error_msg)) { 462 LOG(INFO) << "Error disassembling: " << error_msg; 463 std::remove(data_name.c_str()); 464 return false; 465 } 466 467 std::string as_name = WriteToFile(as, test_name + ".gcc"); 468 if (!DisassembleBinary(as_name, &error_msg)) { 469 LOG(INFO) << "Error disassembling: " << error_msg; 470 std::remove(data_name.c_str()); 471 std::remove((data_name + ".dis").c_str()); 472 std::remove(as_name.c_str()); 473 return false; 474 } 475 476 bool result = CompareFiles(data_name + ".dis", as_name + ".dis"); 477 478 if (result) { 479 std::remove(data_name.c_str()); 480 std::remove(as_name.c_str()); 481 std::remove((data_name + ".dis").c_str()); 482 std::remove((as_name + ".dis").c_str()); 483 } 484 485 return result; 486 } 487 488 bool DisassembleBinary(std::string file, std::string* error_msg) { 489 std::vector<std::string> args; 490 491 args.push_back(GetDisassembleCommand()); 492 args.push_back(file); 493 args.push_back("| sed -n \'/<.data>/,$p\' | sed -e \'s/.*://\'"); 494 args.push_back(">"); 495 args.push_back(file+".dis"); 496 std::string cmd = Join(args, ' '); 497 498 args.clear(); 499 args.push_back("/bin/sh"); 500 args.push_back("-c"); 501 args.push_back(cmd); 502 503 return Exec(args, error_msg); 504 } 505 506 std::string WriteToFile(std::vector<uint8_t>& buffer, std::string test_name) { 507 std::string file_name = GetTmpnam() + std::string("---") + test_name; 508 const char* data = reinterpret_cast<char*>(buffer.data()); 509 std::ofstream s_out(file_name + ".o"); 510 s_out.write(data, buffer.size()); 511 s_out.close(); 512 return file_name + ".o"; 513 } 514 515 bool CompareFiles(std::string f1, std::string f2) { 516 std::ifstream f1_in(f1); 517 std::ifstream f2_in(f2); 518 519 bool result = std::equal(std::istreambuf_iterator<char>(f1_in), 520 std::istreambuf_iterator<char>(), 521 std::istreambuf_iterator<char>(f2_in)); 522 523 f1_in.close(); 524 f2_in.close(); 525 526 return result; 527 } 528 529 // Compile the given assembly code and extract the binary, if possible. Put result into res. 530 bool Compile(std::string assembly_code, NativeAssemblerResult* res, std::string test_name) { 531 res->ok = false; 532 res->code.reset(nullptr); 533 534 res->base_name = GetTmpnam() + std::string("---") + test_name; 535 536 // TODO: Lots of error checking. 537 538 std::ofstream s_out(res->base_name + ".S"); 539 s_out << assembly_code; 540 s_out.close(); 541 542 if (!Assemble((res->base_name + ".S").c_str(), (res->base_name + ".o").c_str(), 543 &res->error_msg)) { 544 res->error_msg = "Could not compile."; 545 return false; 546 } 547 548 std::string odump = Objdump(res->base_name + ".o"); 549 if (odump.length() == 0) { 550 res->error_msg = "Objdump failed."; 551 return false; 552 } 553 554 std::istringstream iss(odump); 555 std::istream_iterator<std::string> start(iss); 556 std::istream_iterator<std::string> end; 557 std::vector<std::string> tokens(start, end); 558 559 if (tokens.size() < OBJDUMP_SECTION_LINE_MIN_TOKENS) { 560 res->error_msg = "Objdump output not recognized: too few tokens."; 561 return false; 562 } 563 564 if (tokens[1] != ".text") { 565 res->error_msg = "Objdump output not recognized: .text not second token."; 566 return false; 567 } 568 569 std::string lengthToken = "0x" + tokens[2]; 570 std::istringstream(lengthToken) >> std::hex >> res->length; 571 572 std::string offsetToken = "0x" + tokens[5]; 573 uintptr_t offset; 574 std::istringstream(offsetToken) >> std::hex >> offset; 575 576 std::ifstream obj(res->base_name + ".o"); 577 obj.seekg(offset); 578 res->code.reset(new std::vector<uint8_t>(res->length)); 579 obj.read(reinterpret_cast<char*>(&(*res->code)[0]), res->length); 580 obj.close(); 581 582 res->ok = true; 583 return true; 584 } 585 586 // Remove temporary files. 587 void Clean(const NativeAssemblerResult* res) { 588 std::remove((res->base_name + ".S").c_str()); 589 std::remove((res->base_name + ".o").c_str()); 590 std::remove((res->base_name + ".o.dump").c_str()); 591 } 592 593 // Check whether file exists. Is used for commands, so strips off any parameters: anything after 594 // the first space. We skip to the last slash for this, so it should work with directories with 595 // spaces. 596 static bool FileExists(std::string file) { 597 if (file.length() == 0) { 598 return false; 599 } 600 601 // Need to strip any options. 602 size_t last_slash = file.find_last_of('/'); 603 if (last_slash == std::string::npos) { 604 // No slash, start looking at the start. 605 last_slash = 0; 606 } 607 size_t space_index = file.find(' ', last_slash); 608 609 if (space_index == std::string::npos) { 610 std::ifstream infile(file.c_str()); 611 return infile.good(); 612 } else { 613 std::string copy = file.substr(0, space_index - 1); 614 615 struct stat buf; 616 return stat(copy.c_str(), &buf) == 0; 617 } 618 } 619 620 static std::string GetGCCRootPath() { 621 return "prebuilts/gcc/linux-x86"; 622 } 623 624 static std::string GetRootPath() { 625 // 1) Check ANDROID_BUILD_TOP 626 char* build_top = getenv("ANDROID_BUILD_TOP"); 627 if (build_top != nullptr) { 628 return std::string(build_top) + "/"; 629 } 630 631 // 2) Do cwd 632 char temp[1024]; 633 return getcwd(temp, 1024) ? std::string(temp) + "/" : std::string(""); 634 } 635 636 std::string FindTool(std::string tool_name) { 637 // Find the current tool. Wild-card pattern is "arch-string*tool-name". 638 std::string gcc_path = GetRootPath() + GetGCCRootPath(); 639 std::vector<std::string> args; 640 args.push_back("find"); 641 args.push_back(gcc_path); 642 args.push_back("-name"); 643 args.push_back(GetArchitectureString() + "*" + tool_name); 644 args.push_back("|"); 645 args.push_back("sort"); 646 args.push_back("|"); 647 args.push_back("tail"); 648 args.push_back("-n"); 649 args.push_back("1"); 650 std::string tmp_file = GetTmpnam(); 651 args.push_back(">"); 652 args.push_back(tmp_file); 653 std::string sh_args = Join(args, ' '); 654 655 args.clear(); 656 args.push_back("/bin/sh"); 657 args.push_back("-c"); 658 args.push_back(sh_args); 659 660 std::string error_msg; 661 if (!Exec(args, &error_msg)) { 662 EXPECT_TRUE(false) << error_msg; 663 return ""; 664 } 665 666 std::ifstream in(tmp_file.c_str()); 667 std::string line; 668 if (!std::getline(in, line)) { 669 in.close(); 670 std::remove(tmp_file.c_str()); 671 return ""; 672 } 673 in.close(); 674 std::remove(tmp_file.c_str()); 675 return line; 676 } 677 678 // Use a consistent tmpnam, so store it. 679 std::string GetTmpnam() { 680 if (tmpnam_.length() == 0) { 681 ScratchFile tmp; 682 tmpnam_ = tmp.GetFilename() + "asm"; 683 } 684 return tmpnam_; 685 } 686 687 std::unique_ptr<Ass> assembler_; 688 689 std::string resolved_assembler_cmd_; 690 std::string resolved_objdump_cmd_; 691 std::string resolved_disassemble_cmd_; 692 693 static constexpr size_t OBJDUMP_SECTION_LINE_MIN_TOKENS = 6; 694}; 695 696} // namespace art 697 698#endif // ART_COMPILER_UTILS_ASSEMBLER_TEST_H_ 699