assembler_test.h revision 700a402244a1a423da4f3ba8032459f4b65fa18f
1/* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#ifndef ART_COMPILER_UTILS_ASSEMBLER_TEST_H_ 18#define ART_COMPILER_UTILS_ASSEMBLER_TEST_H_ 19 20#include "assembler.h" 21 22#include "common_runtime_test.h" // For ScratchFile 23 24#include <cstdio> 25#include <cstdlib> 26#include <fstream> 27#include <iostream> 28#include <iterator> 29#include <sys/stat.h> 30 31namespace art { 32 33// Use a glocal static variable to keep the same name for all test data. Else we'll just spam the 34// temp directory. 35static std::string tmpnam_; 36 37template<typename Ass, typename Reg, typename Imm> 38class AssemblerTest : public testing::Test { 39 public: 40 Ass* GetAssembler() { 41 return assembler_.get(); 42 } 43 44 typedef std::string (*TestFn)(Ass* assembler); 45 46 void DriverFn(TestFn f, std::string test_name) { 47 Driver(f(assembler_.get()), test_name); 48 } 49 50 // This driver assumes the assembler has already been called. 51 void DriverStr(std::string assembly_string, std::string test_name) { 52 Driver(assembly_string, test_name); 53 } 54 55 std::string RepeatR(void (Ass::*f)(Reg), std::string fmt) { 56 const std::vector<Reg*> registers = GetRegisters(); 57 std::string str; 58 for (auto reg : registers) { 59 (assembler_.get()->*f)(*reg); 60 std::string base = fmt; 61 62 size_t reg_index = base.find("{reg}"); 63 if (reg_index != std::string::npos) { 64 std::ostringstream sreg; 65 sreg << *reg; 66 std::string reg_string = sreg.str(); 67 base.replace(reg_index, 5, reg_string); 68 } 69 70 if (str.size() > 0) { 71 str += "\n"; 72 } 73 str += base; 74 } 75 // Add a newline at the end. 76 str += "\n"; 77 return str; 78 } 79 80 std::string RepeatRR(void (Ass::*f)(Reg, Reg), std::string fmt) { 81 const std::vector<Reg*> registers = GetRegisters(); 82 std::string str; 83 for (auto reg1 : registers) { 84 for (auto reg2 : registers) { 85 (assembler_.get()->*f)(*reg1, *reg2); 86 std::string base = fmt; 87 88 size_t reg1_index = base.find("{reg1}"); 89 if (reg1_index != std::string::npos) { 90 std::ostringstream sreg; 91 sreg << *reg1; 92 std::string reg_string = sreg.str(); 93 base.replace(reg1_index, 6, reg_string); 94 } 95 96 size_t reg2_index = base.find("{reg2}"); 97 if (reg2_index != std::string::npos) { 98 std::ostringstream sreg; 99 sreg << *reg2; 100 std::string reg_string = sreg.str(); 101 base.replace(reg2_index, 6, reg_string); 102 } 103 104 if (str.size() > 0) { 105 str += "\n"; 106 } 107 str += base; 108 } 109 } 110 // Add a newline at the end. 111 str += "\n"; 112 return str; 113 } 114 115 std::string RepeatRI(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes, std::string fmt) { 116 const std::vector<Reg*> registers = GetRegisters(); 117 std::string str; 118 std::vector<int64_t> imms = CreateImmediateValues(imm_bytes); 119 for (auto reg : registers) { 120 for (int64_t imm : imms) { 121 Imm* new_imm = CreateImmediate(imm); 122 (assembler_.get()->*f)(*reg, *new_imm); 123 delete new_imm; 124 std::string base = fmt; 125 126 size_t reg_index = base.find("{reg}"); 127 if (reg_index != std::string::npos) { 128 std::ostringstream sreg; 129 sreg << *reg; 130 std::string reg_string = sreg.str(); 131 base.replace(reg_index, 5, reg_string); 132 } 133 134 size_t imm_index = base.find("{imm}"); 135 if (imm_index != std::string::npos) { 136 std::ostringstream sreg; 137 sreg << imm; 138 std::string imm_string = sreg.str(); 139 base.replace(imm_index, 5, imm_string); 140 } 141 142 if (str.size() > 0) { 143 str += "\n"; 144 } 145 str += base; 146 } 147 } 148 // Add a newline at the end. 149 str += "\n"; 150 return str; 151 } 152 153 std::string RepeatI(void (Ass::*f)(const Imm&), size_t imm_bytes, std::string fmt) { 154 std::string str; 155 std::vector<int64_t> imms = CreateImmediateValues(imm_bytes); 156 for (int64_t imm : imms) { 157 Imm* new_imm = CreateImmediate(imm); 158 (assembler_.get()->*f)(*new_imm); 159 delete new_imm; 160 std::string base = fmt; 161 162 size_t imm_index = base.find("{imm}"); 163 if (imm_index != std::string::npos) { 164 std::ostringstream sreg; 165 sreg << imm; 166 std::string imm_string = sreg.str(); 167 base.replace(imm_index, 5, imm_string); 168 } 169 170 if (str.size() > 0) { 171 str += "\n"; 172 } 173 str += base; 174 } 175 // Add a newline at the end. 176 str += "\n"; 177 return str; 178 } 179 180 // This is intended to be run as a test. 181 bool CheckTools() { 182 if (!FileExists(GetAssemblerCommand())) { 183 return false; 184 } 185 LOG(INFO) << "Chosen assembler command: " << GetAssemblerCommand(); 186 187 if (!FileExists(GetObjdumpCommand())) { 188 return false; 189 } 190 LOG(INFO) << "Chosen objdump command: " << GetObjdumpCommand(); 191 192 // Disassembly is optional. 193 std::string disassembler = GetDisassembleCommand(); 194 if (disassembler.length() != 0) { 195 if (!FileExists(disassembler)) { 196 return false; 197 } 198 LOG(INFO) << "Chosen disassemble command: " << GetDisassembleCommand(); 199 } else { 200 LOG(INFO) << "No disassembler given."; 201 } 202 203 return true; 204 } 205 206 protected: 207 void SetUp() OVERRIDE { 208 assembler_.reset(new Ass()); 209 210 // Fake a runtime test for ScratchFile 211 std::string android_data; 212 CommonRuntimeTest::SetEnvironmentVariables(android_data); 213 214 SetUpHelpers(); 215 } 216 217 // Override this to set up any architecture-specific things, e.g., register vectors. 218 virtual void SetUpHelpers() {} 219 220 virtual std::vector<Reg*> GetRegisters() = 0; 221 222 // Get the typically used name for this architecture, e.g., aarch64, x86_64, ... 223 virtual std::string GetArchitectureString() = 0; 224 225 // Get the name of the assembler, e.g., "as" by default. 226 virtual std::string GetAssemblerCmdName() { 227 return "as"; 228 } 229 230 // Switches to the assembler command. Default none. 231 virtual std::string GetAssemblerParameters() { 232 return ""; 233 } 234 235 // Return the host assembler command for this test. 236 virtual std::string GetAssemblerCommand() { 237 // Already resolved it once? 238 if (resolved_assembler_cmd_.length() != 0) { 239 return resolved_assembler_cmd_; 240 } 241 242 std::string line = FindTool(GetAssemblerCmdName()); 243 if (line.length() == 0) { 244 return line; 245 } 246 247 resolved_assembler_cmd_ = line + GetAssemblerParameters(); 248 249 return line; 250 } 251 252 // Get the name of the objdump, e.g., "objdump" by default. 253 virtual std::string GetObjdumpCmdName() { 254 return "objdump"; 255 } 256 257 // Switches to the objdump command. Default is " -h". 258 virtual std::string GetObjdumpParameters() { 259 return " -h"; 260 } 261 262 // Return the host objdump command for this test. 263 virtual std::string GetObjdumpCommand() { 264 // Already resolved it once? 265 if (resolved_objdump_cmd_.length() != 0) { 266 return resolved_objdump_cmd_; 267 } 268 269 std::string line = FindTool(GetObjdumpCmdName()); 270 if (line.length() == 0) { 271 return line; 272 } 273 274 resolved_objdump_cmd_ = line + GetObjdumpParameters(); 275 276 return line; 277 } 278 279 // Get the name of the objdump, e.g., "objdump" by default. 280 virtual std::string GetDisassembleCmdName() { 281 return "objdump"; 282 } 283 284 // Switches to the objdump command. As it's a binary, one needs to push the architecture and 285 // such to objdump, so it's architecture-specific and there is no default. 286 virtual std::string GetDisassembleParameters() = 0; 287 288 // Return the host disassembler command for this test. 289 virtual std::string GetDisassembleCommand() { 290 // Already resolved it once? 291 if (resolved_disassemble_cmd_.length() != 0) { 292 return resolved_disassemble_cmd_; 293 } 294 295 std::string line = FindTool(GetDisassembleCmdName()); 296 if (line.length() == 0) { 297 return line; 298 } 299 300 resolved_disassemble_cmd_ = line + GetDisassembleParameters(); 301 302 return line; 303 } 304 305 // Create a couple of immediate values up to the number of bytes given. 306 virtual std::vector<int64_t> CreateImmediateValues(size_t imm_bytes) { 307 std::vector<int64_t> res; 308 res.push_back(0); 309 res.push_back(-1); 310 res.push_back(0x12); 311 if (imm_bytes >= 2) { 312 res.push_back(0x1234); 313 res.push_back(-0x1234); 314 if (imm_bytes >= 4) { 315 res.push_back(0x12345678); 316 res.push_back(-0x12345678); 317 if (imm_bytes >= 6) { 318 res.push_back(0x123456789ABC); 319 res.push_back(-0x123456789ABC); 320 if (imm_bytes >= 8) { 321 res.push_back(0x123456789ABCDEF0); 322 res.push_back(-0x123456789ABCDEF0); 323 } 324 } 325 } 326 } 327 return res; 328 } 329 330 // Create an immediate from the specific value. 331 virtual Imm* CreateImmediate(int64_t imm_value) = 0; 332 333 private: 334 // Driver() assembles and compares the results. If the results are not equal and we have a 335 // disassembler, disassemble both and check whether they have the same mnemonics (in which case 336 // we just warn). 337 void Driver(std::string assembly_text, std::string test_name) { 338 EXPECT_NE(assembly_text.length(), 0U) << "Empty assembly"; 339 340 NativeAssemblerResult res; 341 Compile(assembly_text, &res, test_name); 342 343 EXPECT_TRUE(res.ok) << res.error_msg; 344 if (!res.ok) { 345 // No way of continuing. 346 return; 347 } 348 349 size_t cs = assembler_->CodeSize(); 350 std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs)); 351 MemoryRegion code(&(*data)[0], data->size()); 352 assembler_->FinalizeInstructions(code); 353 354 if (*data == *res.code) { 355 Clean(&res); 356 } else { 357 if (DisassembleBinaries(*data, *res.code, test_name)) { 358 if (data->size() > res.code->size()) { 359 LOG(WARNING) << "Assembly code is not identical, but disassembly of machine code is " 360 "equal: this implies sub-optimal encoding! Our code size=" << data->size() << 361 ", gcc size=" << res.code->size(); 362 } else { 363 LOG(INFO) << "GCC chose a different encoding than ours, but the overall length is the " 364 "same."; 365 } 366 } else { 367 // This will output the assembly. 368 EXPECT_EQ(*data, *res.code) << "Outputs (and disassembly) not identical."; 369 } 370 } 371 } 372 373 // Structure to store intermediates and results. 374 struct NativeAssemblerResult { 375 bool ok; 376 std::string error_msg; 377 std::string base_name; 378 std::unique_ptr<std::vector<uint8_t>> code; 379 uintptr_t length; 380 }; 381 382 // Compile the assembly file from_file to a binary file to_file. Returns true on success. 383 bool Assemble(const char* from_file, const char* to_file, std::string* error_msg) { 384 bool have_assembler = FileExists(GetAssemblerCommand()); 385 EXPECT_TRUE(have_assembler) << "Cannot find assembler:" << GetAssemblerCommand(); 386 if (!have_assembler) { 387 return false; 388 } 389 390 std::vector<std::string> args; 391 392 args.push_back(GetAssemblerCommand()); 393 args.push_back("-o"); 394 args.push_back(to_file); 395 args.push_back(from_file); 396 397 return Exec(args, error_msg); 398 } 399 400 // Runs objdump -h on the binary file and extracts the first line with .text. 401 // Returns "" on failure. 402 std::string Objdump(std::string file) { 403 bool have_objdump = FileExists(GetObjdumpCommand()); 404 EXPECT_TRUE(have_objdump) << "Cannot find objdump: " << GetObjdumpCommand(); 405 if (!have_objdump) { 406 return ""; 407 } 408 409 std::string error_msg; 410 std::vector<std::string> args; 411 412 args.push_back(GetObjdumpCommand()); 413 args.push_back(file); 414 args.push_back(">"); 415 args.push_back(file+".dump"); 416 std::string cmd = Join(args, ' '); 417 418 args.clear(); 419 args.push_back("/bin/sh"); 420 args.push_back("-c"); 421 args.push_back(cmd); 422 423 if (!Exec(args, &error_msg)) { 424 EXPECT_TRUE(false) << error_msg; 425 } 426 427 std::ifstream dump(file+".dump"); 428 429 std::string line; 430 bool found = false; 431 while (std::getline(dump, line)) { 432 if (line.find(".text") != line.npos) { 433 found = true; 434 break; 435 } 436 } 437 438 dump.close(); 439 440 if (found) { 441 return line; 442 } else { 443 return ""; 444 } 445 } 446 447 // Disassemble both binaries and compare the text. 448 bool DisassembleBinaries(std::vector<uint8_t>& data, std::vector<uint8_t>& as, 449 std::string test_name) { 450 std::string disassembler = GetDisassembleCommand(); 451 if (disassembler.length() == 0) { 452 LOG(WARNING) << "No dissassembler command."; 453 return false; 454 } 455 456 std::string data_name = WriteToFile(data, test_name + ".ass"); 457 std::string error_msg; 458 if (!DisassembleBinary(data_name, &error_msg)) { 459 LOG(INFO) << "Error disassembling: " << error_msg; 460 std::remove(data_name.c_str()); 461 return false; 462 } 463 464 std::string as_name = WriteToFile(as, test_name + ".gcc"); 465 if (!DisassembleBinary(as_name, &error_msg)) { 466 LOG(INFO) << "Error disassembling: " << error_msg; 467 std::remove(data_name.c_str()); 468 std::remove((data_name + ".dis").c_str()); 469 std::remove(as_name.c_str()); 470 return false; 471 } 472 473 bool result = CompareFiles(data_name + ".dis", as_name + ".dis"); 474 475 if (result) { 476 std::remove(data_name.c_str()); 477 std::remove(as_name.c_str()); 478 std::remove((data_name + ".dis").c_str()); 479 std::remove((as_name + ".dis").c_str()); 480 } 481 482 return result; 483 } 484 485 bool DisassembleBinary(std::string file, std::string* error_msg) { 486 std::vector<std::string> args; 487 488 args.push_back(GetDisassembleCommand()); 489 args.push_back(file); 490 args.push_back("| sed -n \'/<.data>/,$p\' | sed -e \'s/.*://\'"); 491 args.push_back(">"); 492 args.push_back(file+".dis"); 493 std::string cmd = Join(args, ' '); 494 495 args.clear(); 496 args.push_back("/bin/sh"); 497 args.push_back("-c"); 498 args.push_back(cmd); 499 500 return Exec(args, error_msg); 501 } 502 503 std::string WriteToFile(std::vector<uint8_t>& buffer, std::string test_name) { 504 std::string file_name = GetTmpnam() + std::string("---") + test_name; 505 const char* data = reinterpret_cast<char*>(buffer.data()); 506 std::ofstream s_out(file_name + ".o"); 507 s_out.write(data, buffer.size()); 508 s_out.close(); 509 return file_name + ".o"; 510 } 511 512 bool CompareFiles(std::string f1, std::string f2) { 513 std::ifstream f1_in(f1); 514 std::ifstream f2_in(f2); 515 516 bool result = std::equal(std::istreambuf_iterator<char>(f1_in), 517 std::istreambuf_iterator<char>(), 518 std::istreambuf_iterator<char>(f2_in)); 519 520 f1_in.close(); 521 f2_in.close(); 522 523 return result; 524 } 525 526 // Compile the given assembly code and extract the binary, if possible. Put result into res. 527 bool Compile(std::string assembly_code, NativeAssemblerResult* res, std::string test_name) { 528 res->ok = false; 529 res->code.reset(nullptr); 530 531 res->base_name = GetTmpnam() + std::string("---") + test_name; 532 533 // TODO: Lots of error checking. 534 535 std::ofstream s_out(res->base_name + ".S"); 536 s_out << assembly_code; 537 s_out.close(); 538 539 if (!Assemble((res->base_name + ".S").c_str(), (res->base_name + ".o").c_str(), 540 &res->error_msg)) { 541 res->error_msg = "Could not compile."; 542 return false; 543 } 544 545 std::string odump = Objdump(res->base_name + ".o"); 546 if (odump.length() == 0) { 547 res->error_msg = "Objdump failed."; 548 return false; 549 } 550 551 std::istringstream iss(odump); 552 std::istream_iterator<std::string> start(iss); 553 std::istream_iterator<std::string> end; 554 std::vector<std::string> tokens(start, end); 555 556 if (tokens.size() < OBJDUMP_SECTION_LINE_MIN_TOKENS) { 557 res->error_msg = "Objdump output not recognized: too few tokens."; 558 return false; 559 } 560 561 if (tokens[1] != ".text") { 562 res->error_msg = "Objdump output not recognized: .text not second token."; 563 return false; 564 } 565 566 std::string lengthToken = "0x" + tokens[2]; 567 std::istringstream(lengthToken) >> std::hex >> res->length; 568 569 std::string offsetToken = "0x" + tokens[5]; 570 uintptr_t offset; 571 std::istringstream(offsetToken) >> std::hex >> offset; 572 573 std::ifstream obj(res->base_name + ".o"); 574 obj.seekg(offset); 575 res->code.reset(new std::vector<uint8_t>(res->length)); 576 obj.read(reinterpret_cast<char*>(&(*res->code)[0]), res->length); 577 obj.close(); 578 579 res->ok = true; 580 return true; 581 } 582 583 // Remove temporary files. 584 void Clean(const NativeAssemblerResult* res) { 585 std::remove((res->base_name + ".S").c_str()); 586 std::remove((res->base_name + ".o").c_str()); 587 std::remove((res->base_name + ".o.dump").c_str()); 588 } 589 590 // Check whether file exists. Is used for commands, so strips off any parameters: anything after 591 // the first space. We skip to the last slash for this, so it should work with directories with 592 // spaces. 593 static bool FileExists(std::string file) { 594 if (file.length() == 0) { 595 return false; 596 } 597 598 // Need to strip any options. 599 size_t last_slash = file.find_last_of('/'); 600 if (last_slash == std::string::npos) { 601 // No slash, start looking at the start. 602 last_slash = 0; 603 } 604 size_t space_index = file.find(' ', last_slash); 605 606 if (space_index == std::string::npos) { 607 std::ifstream infile(file.c_str()); 608 return infile.good(); 609 } else { 610 std::string copy = file.substr(0, space_index - 1); 611 612 struct stat buf; 613 return stat(copy.c_str(), &buf) == 0; 614 } 615 } 616 617 static std::string GetGCCRootPath() { 618 return "prebuilts/gcc/linux-x86"; 619 } 620 621 static std::string GetRootPath() { 622 // 1) Check ANDROID_BUILD_TOP 623 char* build_top = getenv("ANDROID_BUILD_TOP"); 624 if (build_top != nullptr) { 625 return std::string(build_top) + "/"; 626 } 627 628 // 2) Do cwd 629 char temp[1024]; 630 return getcwd(temp, 1024) ? std::string(temp) + "/" : std::string(""); 631 } 632 633 std::string FindTool(std::string tool_name) { 634 // Find the current tool. Wild-card pattern is "arch-string*tool-name". 635 std::string gcc_path = GetRootPath() + GetGCCRootPath(); 636 std::vector<std::string> args; 637 args.push_back("find"); 638 args.push_back(gcc_path); 639 args.push_back("-name"); 640 args.push_back(GetArchitectureString() + "*" + tool_name); 641 args.push_back("|"); 642 args.push_back("sort"); 643 args.push_back("|"); 644 args.push_back("tail"); 645 args.push_back("-n"); 646 args.push_back("1"); 647 std::string tmp_file = GetTmpnam(); 648 args.push_back(">"); 649 args.push_back(tmp_file); 650 std::string sh_args = Join(args, ' '); 651 652 args.clear(); 653 args.push_back("/bin/sh"); 654 args.push_back("-c"); 655 args.push_back(sh_args); 656 657 std::string error_msg; 658 if (!Exec(args, &error_msg)) { 659 EXPECT_TRUE(false) << error_msg; 660 return ""; 661 } 662 663 std::ifstream in(tmp_file.c_str()); 664 std::string line; 665 if (!std::getline(in, line)) { 666 in.close(); 667 std::remove(tmp_file.c_str()); 668 return ""; 669 } 670 in.close(); 671 std::remove(tmp_file.c_str()); 672 return line; 673 } 674 675 // Use a consistent tmpnam, so store it. 676 std::string GetTmpnam() { 677 if (tmpnam_.length() == 0) { 678 ScratchFile tmp; 679 tmpnam_ = tmp.GetFilename() + "asm"; 680 } 681 return tmpnam_; 682 } 683 684 std::unique_ptr<Ass> assembler_; 685 686 std::string resolved_assembler_cmd_; 687 std::string resolved_objdump_cmd_; 688 std::string resolved_disassemble_cmd_; 689 690 static constexpr size_t OBJDUMP_SECTION_LINE_MIN_TOKENS = 6; 691}; 692 693} // namespace art 694 695#endif // ART_COMPILER_UTILS_ASSEMBLER_TEST_H_ 696