Miscompilation.cpp revision 640f22e66d90439857a97a83896ee68c4f7128c9
1//===- Miscompilation.cpp - Debug program miscompilations -----------------===//
2//
3// This file implements program miscompilation debugging support.
4//
5//===----------------------------------------------------------------------===//
6
7#include "BugDriver.h"
8#include "SystemUtils.h"
9#include "llvm/Pass.h"
10#include "llvm/Module.h"
11#include "llvm/Transforms/Utils/Cloning.h"
12#include "llvm/Transforms/Utils/Linker.h"
13#include "Support/CommandLine.h"
14
15// Anonymous namespace to define command line options for miscompilation
16// debugging.
17//
18namespace {
19  // Output - The user can specify a file containing the expected output of the
20  // program.  If this filename is set, it is used as the reference diff source,
21  // otherwise the raw input run through an interpreter is used as the reference
22  // source.
23  //
24  cl::opt<std::string>
25  Output("output", cl::desc("Specify a reference program output "
26			    "(for miscompilation detection)"));
27}
28
29template<typename ElTy>
30struct ListReducer {
31  enum TestResult {
32    NoFailure,         // No failure of the predicate was detected
33    KeepSuffix,        // The suffix alone satisfies the predicate
34    KeepPrefix,        // The prefix alone satisfies the predicate
35  };
36
37  // doTest - This virtual function should be overriden by subclasses to
38  // implement the test desired.  The testcase is only required to test to see
39  // if the Kept list still satisfies the property, but if it is going to check
40  // the prefix anyway, it can.
41  //
42  virtual TestResult doTest(const std::vector<ElTy> &Prefix,
43                            const std::vector<ElTy> &Kept) = 0;
44
45  // reduceList - This function attempts to reduce the length of the specified
46  // list while still maintaining the "test" property.  This is the core of the
47  // "work" that bugpoint does.
48  //
49  void reduceList(std::vector<ElTy> &TheList) {
50    unsigned MidTop = TheList.size();
51    while (MidTop > 1) {
52      unsigned Mid = MidTop / 2;
53      std::vector<ElTy> Prefix(TheList.begin()+Mid, TheList.end());
54      std::vector<ElTy> Kept  (TheList.begin(), TheList.begin()+Mid);
55
56      switch (doTest(Prefix, Kept)) {
57      case KeepSuffix:
58        // The property still holds.  We can just drop the prefix elements, and
59        // shorten the list to the "kept" elements.
60        TheList.swap(Kept);
61        MidTop = TheList.size();
62        break;
63      case KeepPrefix:
64        // The predicate still holds, shorten the list to the prefix elements.
65        TheList.swap(Prefix);
66        MidTop = TheList.size();
67        break;
68      case NoFailure:
69        // Otherwise the property doesn't hold.  Some of the elements we removed
70        // must be neccesary to maintain the property.
71        MidTop = Mid;
72        break;
73      }
74    }
75  }
76};
77
78class ReduceMiscompilingPasses : public ListReducer<const PassInfo*> {
79  BugDriver &BD;
80public:
81  ReduceMiscompilingPasses(BugDriver &bd) : BD(bd) {}
82
83  virtual TestResult doTest(const std::vector<const PassInfo*> &Prefix,
84                            const std::vector<const PassInfo*> &Kept);
85};
86
87ReduceMiscompilingPasses::TestResult
88ReduceMiscompilingPasses::doTest(const std::vector<const PassInfo*> &Prefix,
89                                 const std::vector<const PassInfo*> &Kept) {
90  // First, run the program with just the Kept passes.  If it is still broken
91  // with JUST the kept passes, discard the prefix passes.
92  std::cout << "Checking to see if '" << getPassesString(Kept)
93            << "' compile correctly: ";
94
95  std::string BytecodeResult;
96  if (BD.runPasses(Kept, BytecodeResult, false/*delete*/, true/*quiet*/)) {
97    std::cerr << BD.getToolName() << ": Error running this sequence of passes"
98              << " on the input program!\n";
99    exit(1);
100  }
101
102  // Check to see if the finished program matches the reference output...
103  if (BD.diffProgram(Output, BytecodeResult, true /*delete bytecode*/)) {
104    std::cout << "nope.\n";
105    return KeepSuffix;        // Miscompilation detected!
106  }
107  std::cout << "yup.\n";      // No miscompilation!
108
109  if (Prefix.empty()) return NoFailure;
110
111  // First, run the program with just the Kept passes.  If it is still broken
112  // with JUST the kept passes, discard the prefix passes.
113  std::cout << "Checking to see if '" << getPassesString(Prefix)
114            << "' compile correctly: ";
115
116  // If it is not broken with the kept passes, it's possible that the prefix
117  // passes must be run before the kept passes to break it.  If the program
118  // WORKS after the prefix passes, but then fails if running the prefix AND
119  // kept passes, we can update our bytecode file to include the result of the
120  // prefix passes, then discard the prefix passes.
121  //
122  if (BD.runPasses(Prefix, BytecodeResult, false/*delete*/, true/*quiet*/)) {
123    std::cerr << BD.getToolName() << ": Error running this sequence of passes"
124              << " on the input program!\n";
125    exit(1);
126  }
127
128  // If the prefix maintains the predicate by itself, only keep the prefix!
129  if (BD.diffProgram(Output, BytecodeResult)) {
130    std::cout << "nope.\n";
131    removeFile(BytecodeResult);
132    return KeepPrefix;
133  }
134  std::cout << "yup.\n";      // No miscompilation!
135
136  // Ok, so now we know that the prefix passes work, try running the suffix
137  // passes on the result of the prefix passes.
138  //
139  Module *PrefixOutput = BD.ParseInputFile(BytecodeResult);
140  if (PrefixOutput == 0) {
141    std::cerr << BD.getToolName() << ": Error reading bytecode file '"
142              << BytecodeResult << "'!\n";
143    exit(1);
144  }
145  removeFile(BytecodeResult);  // No longer need the file on disk
146
147  std::cout << "Checking to see if '" << getPassesString(Kept)
148            << "' passes compile correctly after the '"
149            << getPassesString(Prefix) << "' passes: ";
150
151  Module *OriginalInput = BD.Program;
152  BD.Program = PrefixOutput;
153  if (BD.runPasses(Kept, BytecodeResult, false/*delete*/, true/*quiet*/)) {
154    std::cerr << BD.getToolName() << ": Error running this sequence of passes"
155              << " on the input program!\n";
156    exit(1);
157  }
158
159  // Run the result...
160  if (BD.diffProgram(Output, BytecodeResult, true/*delete bytecode*/)) {
161    std::cout << "nope.\n";
162    delete OriginalInput;     // We pruned down the original input...
163    return KeepPrefix;
164  }
165
166  // Otherwise, we must not be running the bad pass anymore.
167  std::cout << "yup.\n";      // No miscompilation!
168  BD.Program = OriginalInput; // Restore original program
169  delete PrefixOutput;        // Free experiment
170  return NoFailure;
171}
172
173static void PrintFunctionList(const std::vector<Function*> &Funcs) {
174  for (unsigned i = 0, e = Funcs.size(); i != e; ++i) {
175    if (i) std::cout << ", ";
176    std::cout << Funcs[i]->getName();
177  }
178}
179
180
181class ReduceMiscompilingFunctions : public ListReducer<Function*> {
182  BugDriver &BD;
183public:
184  ReduceMiscompilingFunctions(BugDriver &bd) : BD(bd) {}
185
186  virtual TestResult doTest(const std::vector<Function*> &Prefix,
187                            const std::vector<Function*> &Kept) {
188    if (TestFuncs(Kept, false))
189      return KeepSuffix;
190    if (TestFuncs(Prefix, false))
191      return KeepPrefix;
192    return NoFailure;
193  }
194
195  bool TestFuncs(const std::vector<Function*> &Prefix, bool EmitBytecode);
196};
197
198// DeleteFunctionBody - "Remove" the function by deleting all of it's basic
199// blocks, making it external.
200//
201static void DeleteFunctionBody(Function *F) {
202  // First, break circular use/def chain references...
203  for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I)
204    I->dropAllReferences();
205
206  // Next, delete all of the basic blocks.
207  F->getBasicBlockList().clear();
208
209  assert(F->isExternal() && "This didn't make the function external!");
210}
211
212
213bool ReduceMiscompilingFunctions::TestFuncs(const std::vector<Function*> &Funcs,
214                                            bool EmitBytecode) {
215  // Test to see if the function is misoptimized if we ONLY run it on the
216  // functions listed in Funcs.
217  if (!EmitBytecode) {
218    std::cout << "Checking to see if the program is misoptimized when these "
219              << "functions are run\nthrough the passes: ";
220    PrintFunctionList(Funcs);
221    std::cout << "\n";
222  } else {
223    std::cout <<"Outputting reduced bytecode files which expose the problem:\n";
224  }
225
226  // First step: clone the module for the two halves of the program we want.
227  Module *ToOptimize = CloneModule(BD.Program);
228
229  // Second step: Make sure functions & globals are all external so that linkage
230  // between the two modules will work.
231  for (Module::iterator I = ToOptimize->begin(), E = ToOptimize->end();I!=E;++I)
232    I->setLinkage(GlobalValue::ExternalLinkage);
233  for (Module::giterator I = ToOptimize->gbegin(), E = ToOptimize->gend();
234       I != E; ++I)
235    I->setLinkage(GlobalValue::ExternalLinkage);
236
237  // Third step: make a clone of the externalized program for the non-optimized
238  // part.
239  Module *ToNotOptimize = CloneModule(ToOptimize);
240
241  // Fourth step: Remove the test functions from the ToNotOptimize module, and
242  // all of the global variables.
243  for (unsigned i = 0, e = Funcs.size(); i != e; ++i) {
244    Function *TNOF = ToNotOptimize->getFunction(Funcs[i]->getName(),
245                                                Funcs[i]->getFunctionType());
246    assert(TNOF && "Function doesn't exist in module!");
247    DeleteFunctionBody(TNOF);       // Function is now external in this module!
248  }
249  for (Module::giterator I = ToNotOptimize->gbegin(), E = ToNotOptimize->gend();
250       I != E; ++I)
251    I->setInitializer(0);  // Delete the initializer to make it external
252
253  if (EmitBytecode) {
254    std::cout << "  Non-optimized portion: ";
255    std::swap(BD.Program, ToNotOptimize);
256    BD.EmitProgressBytecode("tonotoptimize", true);
257    std::swap(BD.Program, ToNotOptimize);
258  }
259
260  // Fifth step: Remove all functions from the ToOptimize module EXCEPT for the
261  // ones specified in Funcs.  We know which ones these are because they are
262  // non-external in ToOptimize, but external in ToNotOptimize.
263  //
264  for (Module::iterator I = ToOptimize->begin(), E = ToOptimize->end();I!=E;++I)
265    if (!I->isExternal()) {
266      Function *TNOF = ToNotOptimize->getFunction(I->getName(),
267                                                  I->getFunctionType());
268      assert(TNOF && "Function doesn't exist in ToNotOptimize module??");
269      if (!TNOF->isExternal())
270        DeleteFunctionBody(I);
271    }
272
273  if (EmitBytecode) {
274    std::cout << "  Portion that is input to optimizer: ";
275    std::swap(BD.Program, ToOptimize);
276    BD.EmitProgressBytecode("tooptimize");
277    std::swap(BD.Program, ToOptimize);
278  }
279
280  // Sixth step: Run the optimization passes on ToOptimize, producing a
281  // transformed version of the functions being tested.
282  Module *OldProgram = BD.Program;
283  BD.Program = ToOptimize;
284
285  if (!EmitBytecode)
286    std::cout << "  Optimizing functions being tested: ";
287  std::string BytecodeResult;
288  if (BD.runPasses(BD.PassesToRun, BytecodeResult, false/*delete*/,
289                   true/*quiet*/)) {
290    std::cerr << BD.getToolName() << ": Error running this sequence of passes"
291              << " on the input program!\n";
292    exit(1);
293  }
294
295  if (!EmitBytecode)
296    std::cout << "done.\n";
297
298  delete BD.Program;   // Delete the old "ToOptimize" module
299  BD.Program = BD.ParseInputFile(BytecodeResult);
300
301  if (EmitBytecode) {
302    std::cout << "  'tooptimize' after being optimized: ";
303    BD.EmitProgressBytecode("optimized", true);
304  }
305
306  if (BD.Program == 0) {
307    std::cerr << BD.getToolName() << ": Error reading bytecode file '"
308              << BytecodeResult << "'!\n";
309    exit(1);
310  }
311  removeFile(BytecodeResult);  // No longer need the file on disk
312
313  // Seventh step: Link the optimized part of the program back to the
314  // unoptimized part of the program.
315  //
316  if (LinkModules(BD.Program, ToNotOptimize, &BytecodeResult)) {
317    std::cerr << BD.getToolName() << ": Error linking modules together:"
318              << BytecodeResult << "\n";
319    exit(1);
320  }
321  delete ToNotOptimize;  // We are done with this module...
322
323  if (EmitBytecode) {
324    std::cout << "  Program as tested: ";
325    BD.EmitProgressBytecode("linked", true);
326    delete BD.Program;
327    BD.Program = OldProgram;
328    return false;   // We don't need to actually execute the program here.
329  }
330
331  std::cout << "  Checking to see if the merged program executes correctly: ";
332
333  // Eighth step: Execute the program.  If it does not match the expected
334  // output, then 'Funcs' are being misoptimized!
335  bool Broken = BD.diffProgram(Output);
336
337  delete BD.Program;  // Delete the hacked up program
338  BD.Program = OldProgram;   // Restore the original
339
340  std::cout << (Broken ? "nope.\n" : "yup.\n");
341  return Broken;
342}
343
344
345/// debugMiscompilation - This method is used when the passes selected are not
346/// crashing, but the generated output is semantically different from the
347/// input.
348///
349bool BugDriver::debugMiscompilation() {
350  std::cout << "*** Debugging miscompilation!\n";
351
352  // Set up the execution environment, selecting a method to run LLVM bytecode.
353  if (initializeExecutionEnvironment()) return true;
354
355  // Run the raw input to see where we are coming from.  If a reference output
356  // was specified, make sure that the raw output matches it.  If not, it's a
357  // problem in the front-end or whatever produced the input code.
358  //
359  bool CreatedOutput = false;
360  if (Output.empty()) {
361    std::cout << "Generating reference output from raw program...";
362    Output = executeProgram("bugpoint.reference.out");
363    CreatedOutput = true;
364    std::cout << " done! Reference output is: bugpoint.reference.out.\n";
365  } else if (diffProgram(Output)) {
366    std::cout << "\n*** Input program does not match reference diff!\n"
367	      << "    Must be problem with input source!\n";
368    return false;  // Problem found
369  }
370
371  // Figure out which transformations miscompile the input program.
372  unsigned OldSize = PassesToRun.size();
373  ReduceMiscompilingPasses(*this).reduceList(PassesToRun);
374
375  // Make sure something was miscompiled...
376  if (PassesToRun.size() == OldSize) {
377    std::cerr << "*** Optimized program matches reference output!  No problem "
378	      << "detected...\nbugpoint can't help you with your problem!\n";
379    return false;
380  }
381
382  std::cout << "\n*** Found miscompiling pass"
383            << (PassesToRun.size() == 1 ? "" : "es") << ": "
384            << getPassesString(PassesToRun) << "\n";
385  EmitProgressBytecode("passinput");
386
387
388  // Okay, now that we have reduced the list of passes which are causing the
389  // failure, see if we can pin down which functions are being
390  // miscompiled... first build a list of all of the non-external functions in
391  // the program.
392  std::vector<Function*> MiscompiledFunctions;
393  for (Module::iterator I = Program->begin(), E = Program->end(); I != E; ++I)
394    if (!I->isExternal())
395      MiscompiledFunctions.push_back(I);
396
397  // Do the reduction...
398  ReduceMiscompilingFunctions(*this).reduceList(MiscompiledFunctions);
399
400  std::cout << "\n*** The following functions are being miscompiled: ";
401  PrintFunctionList(MiscompiledFunctions);
402  std::cout << "\n";
403
404  // Output a bunch of bytecode files for the user...
405  ReduceMiscompilingFunctions(*this).TestFuncs(MiscompiledFunctions, true);
406
407  if (CreatedOutput) removeFile(Output);
408  return false;
409}
410