Miscompilation.cpp revision 3cbacc90475509a842edb91cc56fe73a5bb5ed00
1//===- Miscompilation.cpp - Debug program miscompilations -----------------===//
2//
3// This file implements program miscompilation debugging support.
4//
5//===----------------------------------------------------------------------===//
6
7#include "BugDriver.h"
8#include "SystemUtils.h"
9#include "ListReducer.h"
10#include "llvm/Pass.h"
11#include "llvm/Module.h"
12#include "llvm/Transforms/Utils/Cloning.h"
13#include "llvm/Transforms/Utils/Linker.h"
14#include "Support/CommandLine.h"
15
16// Anonymous namespace to define command line options for miscompilation
17// debugging.
18//
19namespace {
20  // Output - The user can specify a file containing the expected output of the
21  // program.  If this filename is set, it is used as the reference diff source,
22  // otherwise the raw input run through an interpreter is used as the reference
23  // source.
24  //
25  cl::opt<std::string>
26  Output("output", cl::desc("Specify a reference program output "
27			    "(for miscompilation detection)"));
28}
29
30class ReduceMiscompilingPasses : public ListReducer<const PassInfo*> {
31  BugDriver &BD;
32public:
33  ReduceMiscompilingPasses(BugDriver &bd) : BD(bd) {}
34
35  virtual TestResult doTest(std::vector<const PassInfo*> &Prefix,
36                            std::vector<const PassInfo*> &Kept);
37};
38
39ReduceMiscompilingPasses::TestResult
40ReduceMiscompilingPasses::doTest(std::vector<const PassInfo*> &Prefix,
41                                 std::vector<const PassInfo*> &Suffix) {
42  // First, run the program with just the Suffix passes.  If it is still broken
43  // with JUST the kept passes, discard the prefix passes.
44  std::cout << "Checking to see if '" << getPassesString(Suffix)
45            << "' compile correctly: ";
46
47  std::string BytecodeResult;
48  if (BD.runPasses(Suffix, BytecodeResult, false/*delete*/, true/*quiet*/)) {
49    std::cerr << BD.getToolName() << ": Error running this sequence of passes"
50              << " on the input program!\n";
51    exit(1);
52  }
53
54  // Check to see if the finished program matches the reference output...
55  if (BD.diffProgram(Output, BytecodeResult, true /*delete bytecode*/)) {
56    std::cout << "nope.\n";
57    return KeepSuffix;        // Miscompilation detected!
58  }
59  std::cout << "yup.\n";      // No miscompilation!
60
61  if (Prefix.empty()) return NoFailure;
62
63  // Next, see if the program is broken if we run the "prefix" passes first,
64  // then seperately run the "kept" passes.
65  std::cout << "Checking to see if '" << getPassesString(Prefix)
66            << "' compile correctly: ";
67
68  // If it is not broken with the kept passes, it's possible that the prefix
69  // passes must be run before the kept passes to break it.  If the program
70  // WORKS after the prefix passes, but then fails if running the prefix AND
71  // kept passes, we can update our bytecode file to include the result of the
72  // prefix passes, then discard the prefix passes.
73  //
74  if (BD.runPasses(Prefix, BytecodeResult, false/*delete*/, true/*quiet*/)) {
75    std::cerr << BD.getToolName() << ": Error running this sequence of passes"
76              << " on the input program!\n";
77    exit(1);
78  }
79
80  // If the prefix maintains the predicate by itself, only keep the prefix!
81  if (BD.diffProgram(Output, BytecodeResult)) {
82    std::cout << "nope.\n";
83    removeFile(BytecodeResult);
84    return KeepPrefix;
85  }
86  std::cout << "yup.\n";      // No miscompilation!
87
88  // Ok, so now we know that the prefix passes work, try running the suffix
89  // passes on the result of the prefix passes.
90  //
91  Module *PrefixOutput = BD.ParseInputFile(BytecodeResult);
92  if (PrefixOutput == 0) {
93    std::cerr << BD.getToolName() << ": Error reading bytecode file '"
94              << BytecodeResult << "'!\n";
95    exit(1);
96  }
97  removeFile(BytecodeResult);  // No longer need the file on disk
98
99  std::cout << "Checking to see if '" << getPassesString(Suffix)
100            << "' passes compile correctly after the '"
101            << getPassesString(Prefix) << "' passes: ";
102
103  Module *OriginalInput = BD.Program;
104  BD.Program = PrefixOutput;
105  if (BD.runPasses(Suffix, BytecodeResult, false/*delete*/, true/*quiet*/)) {
106    std::cerr << BD.getToolName() << ": Error running this sequence of passes"
107              << " on the input program!\n";
108    exit(1);
109  }
110
111  // Run the result...
112  if (BD.diffProgram(Output, BytecodeResult, true/*delete bytecode*/)) {
113    std::cout << "nope.\n";
114    delete OriginalInput;     // We pruned down the original input...
115    return KeepSuffix;
116  }
117
118  // Otherwise, we must not be running the bad pass anymore.
119  std::cout << "yup.\n";      // No miscompilation!
120  BD.Program = OriginalInput; // Restore original program
121  delete PrefixOutput;        // Free experiment
122  return NoFailure;
123}
124
125static void PrintFunctionList(const std::vector<Function*> &Funcs) {
126  for (unsigned i = 0, e = Funcs.size(); i != e; ++i) {
127    if (i) std::cout << ", ";
128    std::cout << Funcs[i]->getName();
129  }
130}
131
132
133class ReduceMiscompilingFunctions : public ListReducer<Function*> {
134  BugDriver &BD;
135public:
136  ReduceMiscompilingFunctions(BugDriver &bd) : BD(bd) {}
137
138  virtual TestResult doTest(std::vector<Function*> &Prefix,
139                            std::vector<Function*> &Suffix) {
140    if (TestFuncs(Suffix, false))
141      return KeepSuffix;
142    if (!Prefix.empty() && TestFuncs(Prefix, false))
143      return KeepPrefix;
144    return NoFailure;
145  }
146
147  bool TestFuncs(const std::vector<Function*> &Prefix, bool EmitBytecode);
148};
149
150bool ReduceMiscompilingFunctions::TestFuncs(const std::vector<Function*> &Funcs,
151                                            bool EmitBytecode) {
152  // Test to see if the function is misoptimized if we ONLY run it on the
153  // functions listed in Funcs.
154  if (!EmitBytecode) {
155    std::cout << "Checking to see if the program is misoptimized when these "
156              << "functions are run\nthrough the passes: ";
157    PrintFunctionList(Funcs);
158    std::cout << "\n";
159  } else {
160    std::cout <<"Outputting reduced bytecode files which expose the problem:\n";
161  }
162
163  // First step: clone the module for the two halves of the program we want.
164  Module *ToOptimize = CloneModule(BD.Program);
165
166  // Second step: Make sure functions & globals are all external so that linkage
167  // between the two modules will work.
168  for (Module::iterator I = ToOptimize->begin(), E = ToOptimize->end();I!=E;++I)
169    I->setLinkage(GlobalValue::ExternalLinkage);
170  for (Module::giterator I = ToOptimize->gbegin(), E = ToOptimize->gend();
171       I != E; ++I)
172    I->setLinkage(GlobalValue::ExternalLinkage);
173
174  // Third step: make a clone of the externalized program for the non-optimized
175  // part.
176  Module *ToNotOptimize = CloneModule(ToOptimize);
177
178  // Fourth step: Remove the test functions from the ToNotOptimize module, and
179  // all of the global variables.
180  for (unsigned i = 0, e = Funcs.size(); i != e; ++i) {
181    Function *TNOF = ToNotOptimize->getFunction(Funcs[i]->getName(),
182                                                Funcs[i]->getFunctionType());
183    assert(TNOF && "Function doesn't exist in module!");
184    DeleteFunctionBody(TNOF);       // Function is now external in this module!
185  }
186  for (Module::giterator I = ToNotOptimize->gbegin(), E = ToNotOptimize->gend();
187       I != E; ++I)
188    I->setInitializer(0);  // Delete the initializer to make it external
189
190  if (EmitBytecode) {
191    std::cout << "  Non-optimized portion: ";
192    std::swap(BD.Program, ToNotOptimize);
193    BD.EmitProgressBytecode("tonotoptimize", true);
194    std::swap(BD.Program, ToNotOptimize);
195  }
196
197  // Fifth step: Remove all functions from the ToOptimize module EXCEPT for the
198  // ones specified in Funcs.  We know which ones these are because they are
199  // non-external in ToOptimize, but external in ToNotOptimize.
200  //
201  for (Module::iterator I = ToOptimize->begin(), E = ToOptimize->end();I!=E;++I)
202    if (!I->isExternal()) {
203      Function *TNOF = ToNotOptimize->getFunction(I->getName(),
204                                                  I->getFunctionType());
205      assert(TNOF && "Function doesn't exist in ToNotOptimize module??");
206      if (!TNOF->isExternal())
207        DeleteFunctionBody(I);
208    }
209
210  if (EmitBytecode) {
211    std::cout << "  Portion that is input to optimizer: ";
212    std::swap(BD.Program, ToOptimize);
213    BD.EmitProgressBytecode("tooptimize");
214    std::swap(BD.Program, ToOptimize);
215  }
216
217  // Sixth step: Run the optimization passes on ToOptimize, producing a
218  // transformed version of the functions being tested.
219  Module *OldProgram = BD.Program;
220  BD.Program = ToOptimize;
221
222  if (!EmitBytecode)
223    std::cout << "  Optimizing functions being tested: ";
224  std::string BytecodeResult;
225  if (BD.runPasses(BD.PassesToRun, BytecodeResult, false/*delete*/,
226                   true/*quiet*/)) {
227    std::cerr << BD.getToolName() << ": Error running this sequence of passes"
228              << " on the input program!\n";
229    exit(1);
230  }
231
232  if (!EmitBytecode)
233    std::cout << "done.\n";
234
235  delete BD.Program;   // Delete the old "ToOptimize" module
236  BD.Program = BD.ParseInputFile(BytecodeResult);
237
238  if (EmitBytecode) {
239    std::cout << "  'tooptimize' after being optimized: ";
240    BD.EmitProgressBytecode("optimized", true);
241  }
242
243  if (BD.Program == 0) {
244    std::cerr << BD.getToolName() << ": Error reading bytecode file '"
245              << BytecodeResult << "'!\n";
246    exit(1);
247  }
248  removeFile(BytecodeResult);  // No longer need the file on disk
249
250  // Seventh step: Link the optimized part of the program back to the
251  // unoptimized part of the program.
252  //
253  if (LinkModules(BD.Program, ToNotOptimize, &BytecodeResult)) {
254    std::cerr << BD.getToolName() << ": Error linking modules together:"
255              << BytecodeResult << "\n";
256    exit(1);
257  }
258  delete ToNotOptimize;  // We are done with this module...
259
260  if (EmitBytecode) {
261    std::cout << "  Program as tested: ";
262    BD.EmitProgressBytecode("linked", true);
263    delete BD.Program;
264    BD.Program = OldProgram;
265    return false;   // We don't need to actually execute the program here.
266  }
267
268  std::cout << "  Checking to see if the merged program executes correctly: ";
269
270  // Eighth step: Execute the program.  If it does not match the expected
271  // output, then 'Funcs' are being misoptimized!
272  bool Broken = BD.diffProgram(Output);
273
274  delete BD.Program;  // Delete the hacked up program
275  BD.Program = OldProgram;   // Restore the original
276
277  std::cout << (Broken ? "nope.\n" : "yup.\n");
278  return Broken;
279}
280
281
282/// debugMiscompilation - This method is used when the passes selected are not
283/// crashing, but the generated output is semantically different from the
284/// input.
285///
286bool BugDriver::debugMiscompilation() {
287  std::cout << "*** Debugging miscompilation!\n";
288
289  // Set up the execution environment, selecting a method to run LLVM bytecode.
290  if (initializeExecutionEnvironment()) return true;
291
292  // Run the raw input to see where we are coming from.  If a reference output
293  // was specified, make sure that the raw output matches it.  If not, it's a
294  // problem in the front-end or whatever produced the input code.
295  //
296  bool CreatedOutput = false;
297  if (Output.empty()) {
298    std::cout << "Generating reference output from raw program...";
299    Output = executeProgram("bugpoint.reference.out");
300    CreatedOutput = true;
301    std::cout << " done! Reference output is: " << Output << "\n";
302  } else if (diffProgram(Output)) {
303    std::cout << "\n*** Input program does not match reference diff!\n"
304	      << "    Must be problem with input source!\n";
305    return false;  // Problem found
306  }
307
308  // Figure out which transformations miscompile the input program.
309  unsigned OldSize = PassesToRun.size();
310  ReduceMiscompilingPasses(*this).reduceList(PassesToRun);
311
312  // Make sure something was miscompiled...
313  if (PassesToRun.size() == OldSize) {
314    std::cerr << "*** Optimized program matches reference output!  No problem "
315	      << "detected...\nbugpoint can't help you with your problem!\n";
316    return false;
317  }
318
319  std::cout << "\n*** Found miscompiling pass"
320            << (PassesToRun.size() == 1 ? "" : "es") << ": "
321            << getPassesString(PassesToRun) << "\n";
322  EmitProgressBytecode("passinput");
323
324
325  // Okay, now that we have reduced the list of passes which are causing the
326  // failure, see if we can pin down which functions are being
327  // miscompiled... first build a list of all of the non-external functions in
328  // the program.
329  std::vector<Function*> MiscompiledFunctions;
330  for (Module::iterator I = Program->begin(), E = Program->end(); I != E; ++I)
331    if (!I->isExternal())
332      MiscompiledFunctions.push_back(I);
333
334  // Do the reduction...
335  ReduceMiscompilingFunctions(*this).reduceList(MiscompiledFunctions);
336
337  std::cout << "\n*** The following functions are being miscompiled: ";
338  PrintFunctionList(MiscompiledFunctions);
339  std::cout << "\n";
340
341  // Output a bunch of bytecode files for the user...
342  ReduceMiscompilingFunctions(*this).TestFuncs(MiscompiledFunctions, true);
343
344  if (CreatedOutput) removeFile(Output);
345  return false;
346}
347