1//===- FuzzerDFSan.cpp - DFSan-based fuzzer mutator -----------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// DataFlowSanitizer (DFSan) is a tool for 10// generalised dynamic data flow (taint) analysis: 11// http://clang.llvm.org/docs/DataFlowSanitizer.html . 12// 13// This file implements a mutation algorithm based on taint 14// analysis feedback from DFSan. 15// 16// The approach has some similarity to "Taint-based Directed Whitebox Fuzzing" 17// by Vijay Ganesh & Tim Leek & Martin Rinard: 18// http://dspace.mit.edu/openaccess-disseminate/1721.1/59320, 19// but it uses a full blown LLVM IR taint analysis and separate instrumentation 20// to analyze all of the "attack points" at once. 21// 22// Workflow: 23// * lib/Fuzzer/Fuzzer*.cpp is compiled w/o any instrumentation. 24// * The code under test is compiled with DFSan *and* with special extra hooks 25// that are inserted before dfsan. Currently supported hooks: 26// - __sanitizer_cov_trace_cmp: inserted before every ICMP instruction, 27// receives the type, size and arguments of ICMP. 28// * Every call to HOOK(a,b) is replaced by DFSan with 29// __dfsw_HOOK(a, b, label(a), label(b)) so that __dfsw_HOOK 30// gets all the taint labels for the arguments. 31// * At the Fuzzer startup we assign a unique DFSan label 32// to every byte of the input string (Fuzzer::CurrentUnit) so that for any 33// chunk of data we know which input bytes it has derived from. 34// * The __dfsw_* functions (implemented in this file) record the 35// parameters (i.e. the application data and the corresponding taint labels) 36// in a global state. 37// * Fuzzer::MutateWithDFSan() tries to use the data recorded by __dfsw_* 38// hooks to guide the fuzzing towards new application states. 39// For example if 4 bytes of data that derive from input bytes {4,5,6,7} 40// are compared with a constant 12345 and the comparison always yields 41// the same result, we try to insert 12345, 12344, 12346 into bytes 42// {4,5,6,7} of the next fuzzed inputs. 43// 44// This code does not function when DFSan is not linked in. 45// Instead of using ifdefs and thus requiring a separate build of lib/Fuzzer 46// we redeclare the dfsan_* interface functions as weak and check if they 47// are nullptr before calling. 48// If this approach proves to be useful we may add attribute(weak) to the 49// dfsan declarations in dfsan_interface.h 50// 51// This module is in the "proof of concept" stage. 52// It is capable of solving only the simplest puzzles 53// like test/dfsan/DFSanSimpleCmpTest.cpp. 54//===----------------------------------------------------------------------===// 55 56/* Example of manual usage: 57( 58 cd $LLVM/lib/Fuzzer/ 59 clang -fPIC -c -g -O2 -std=c++11 Fuzzer*.cpp 60 clang++ -O0 -std=c++11 -fsanitize-coverage=3 \ 61 -mllvm -sanitizer-coverage-experimental-trace-compares=1 \ 62 -fsanitize=dataflow -fsanitize-blacklist=./dfsan_fuzzer_abi.list \ 63 test/dfsan/DFSanSimpleCmpTest.cpp Fuzzer*.o 64 ./a.out 65) 66*/ 67 68#include "FuzzerInternal.h" 69#include <sanitizer/dfsan_interface.h> 70 71#include <cstring> 72#include <iostream> 73#include <unordered_map> 74 75extern "C" { 76__attribute__((weak)) 77dfsan_label dfsan_create_label(const char *desc, void *userdata); 78__attribute__((weak)) 79void dfsan_set_label(dfsan_label label, void *addr, size_t size); 80__attribute__((weak)) 81void dfsan_add_label(dfsan_label label, void *addr, size_t size); 82__attribute__((weak)) 83const struct dfsan_label_info *dfsan_get_label_info(dfsan_label label); 84} // extern "C" 85 86namespace { 87 88// These values are copied from include/llvm/IR/InstrTypes.h. 89// We do not include the LLVM headers here to remain independent. 90// If these values ever change, an assertion in ComputeCmp will fail. 91enum Predicate { 92 ICMP_EQ = 32, ///< equal 93 ICMP_NE = 33, ///< not equal 94 ICMP_UGT = 34, ///< unsigned greater than 95 ICMP_UGE = 35, ///< unsigned greater or equal 96 ICMP_ULT = 36, ///< unsigned less than 97 ICMP_ULE = 37, ///< unsigned less or equal 98 ICMP_SGT = 38, ///< signed greater than 99 ICMP_SGE = 39, ///< signed greater or equal 100 ICMP_SLT = 40, ///< signed less than 101 ICMP_SLE = 41, ///< signed less or equal 102}; 103 104template <class U, class S> 105bool ComputeCmp(size_t CmpType, U Arg1, U Arg2) { 106 switch(CmpType) { 107 case ICMP_EQ : return Arg1 == Arg2; 108 case ICMP_NE : return Arg1 != Arg2; 109 case ICMP_UGT: return Arg1 > Arg2; 110 case ICMP_UGE: return Arg1 >= Arg2; 111 case ICMP_ULT: return Arg1 < Arg2; 112 case ICMP_ULE: return Arg1 <= Arg2; 113 case ICMP_SGT: return (S)Arg1 > (S)Arg2; 114 case ICMP_SGE: return (S)Arg1 >= (S)Arg2; 115 case ICMP_SLT: return (S)Arg1 < (S)Arg2; 116 case ICMP_SLE: return (S)Arg1 <= (S)Arg2; 117 default: assert(0 && "unsupported CmpType"); 118 } 119 return false; 120} 121 122static bool ComputeCmp(size_t CmpSize, size_t CmpType, uint64_t Arg1, 123 uint64_t Arg2) { 124 if (CmpSize == 8) return ComputeCmp<uint64_t, int64_t>(CmpType, Arg1, Arg2); 125 if (CmpSize == 4) return ComputeCmp<uint32_t, int32_t>(CmpType, Arg1, Arg2); 126 if (CmpSize == 2) return ComputeCmp<uint16_t, int16_t>(CmpType, Arg1, Arg2); 127 if (CmpSize == 1) return ComputeCmp<uint8_t, int8_t>(CmpType, Arg1, Arg2); 128 assert(0 && "unsupported type size"); 129 return true; 130} 131 132// As a simplification we use the range of input bytes instead of a set of input 133// bytes. 134struct LabelRange { 135 uint16_t Beg, End; // Range is [Beg, End), thus Beg==End is an empty range. 136 137 LabelRange(uint16_t Beg = 0, uint16_t End = 0) : Beg(Beg), End(End) {} 138 139 static LabelRange Join(LabelRange LR1, LabelRange LR2) { 140 if (LR1.Beg == LR1.End) return LR2; 141 if (LR2.Beg == LR2.End) return LR1; 142 return {std::min(LR1.Beg, LR2.Beg), std::max(LR1.End, LR2.End)}; 143 } 144 LabelRange &Join(LabelRange LR) { 145 return *this = Join(*this, LR); 146 } 147 static LabelRange Singleton(const dfsan_label_info *LI) { 148 uint16_t Idx = (uint16_t)(uintptr_t)LI->userdata; 149 assert(Idx > 0); 150 return {(uint16_t)(Idx - 1), Idx}; 151 } 152}; 153 154std::ostream &operator<<(std::ostream &os, const LabelRange &LR) { 155 return os << "[" << LR.Beg << "," << LR.End << ")"; 156} 157 158class DFSanState { 159 public: 160 DFSanState(const fuzzer::Fuzzer::FuzzingOptions &Options) 161 : Options(Options) {} 162 163 struct CmpSiteInfo { 164 size_t ResCounters[2] = {0, 0}; 165 size_t CmpSize = 0; 166 LabelRange LR; 167 std::unordered_map<uint64_t, size_t> CountedConstants; 168 }; 169 170 LabelRange GetLabelRange(dfsan_label L); 171 void DFSanCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType, 172 uint64_t Arg1, uint64_t Arg2, dfsan_label L1, 173 dfsan_label L2); 174 bool Mutate(fuzzer::Unit *U); 175 176 private: 177 std::unordered_map<uintptr_t, CmpSiteInfo> PcToCmpSiteInfoMap; 178 LabelRange LabelRanges[1 << (sizeof(dfsan_label) * 8)] = {}; 179 const fuzzer::Fuzzer::FuzzingOptions &Options; 180}; 181 182LabelRange DFSanState::GetLabelRange(dfsan_label L) { 183 LabelRange &LR = LabelRanges[L]; 184 if (LR.Beg < LR.End || L == 0) 185 return LR; 186 const dfsan_label_info *LI = dfsan_get_label_info(L); 187 if (LI->l1 || LI->l2) 188 return LR = LabelRange::Join(GetLabelRange(LI->l1), GetLabelRange(LI->l2)); 189 return LR = LabelRange::Singleton(LI); 190} 191 192void DFSanState::DFSanCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType, 193 uint64_t Arg1, uint64_t Arg2, dfsan_label L1, 194 dfsan_label L2) { 195 if (L1 == 0 && L2 == 0) 196 return; // Not actionable. 197 if (L1 != 0 && L2 != 0) 198 return; // Probably still actionable. 199 bool Res = ComputeCmp(CmpSize, CmpType, Arg1, Arg2); 200 CmpSiteInfo &CSI = PcToCmpSiteInfoMap[PC]; 201 CSI.CmpSize = CmpSize; 202 CSI.LR.Join(GetLabelRange(L1)).Join(GetLabelRange(L2)); 203 if (!L1) CSI.CountedConstants[Arg1]++; 204 if (!L2) CSI.CountedConstants[Arg2]++; 205 size_t Counter = CSI.ResCounters[Res]++; 206 207 if (Options.Verbosity >= 2 && 208 (Counter & (Counter - 1)) == 0 && 209 CSI.ResCounters[!Res] == 0) 210 std::cerr << "DFSAN:" 211 << " PC " << std::hex << PC << std::dec 212 << " S " << CmpSize 213 << " T " << CmpType 214 << " A1 " << Arg1 << " A2 " << Arg2 << " R " << Res 215 << " L" << L1 << GetLabelRange(L1) 216 << " L" << L2 << GetLabelRange(L2) 217 << " LR " << CSI.LR 218 << "\n"; 219} 220 221bool DFSanState::Mutate(fuzzer::Unit *U) { 222 for (auto &PCToCmp : PcToCmpSiteInfoMap) { 223 auto &CSI = PCToCmp.second; 224 if (CSI.ResCounters[0] * CSI.ResCounters[1] != 0) continue; 225 if (CSI.ResCounters[0] + CSI.ResCounters[1] < 1000) continue; 226 if (CSI.CountedConstants.size() != 1) continue; 227 uintptr_t C = CSI.CountedConstants.begin()->first; 228 if (U->size() >= CSI.CmpSize) { 229 size_t RangeSize = CSI.LR.End - CSI.LR.Beg; 230 size_t Idx = CSI.LR.Beg + rand() % RangeSize; 231 if (Idx + CSI.CmpSize > U->size()) continue; 232 C += rand() % 5 - 2; 233 memcpy(U->data() + Idx, &C, CSI.CmpSize); 234 return true; 235 } 236 } 237 return false; 238} 239 240static DFSanState *DFSan; 241 242} // namespace 243 244namespace fuzzer { 245 246bool Fuzzer::MutateWithDFSan(Unit *U) { 247 if (!&dfsan_create_label || !DFSan) return false; 248 return DFSan->Mutate(U); 249} 250 251void Fuzzer::InitializeDFSan() { 252 if (!&dfsan_create_label || !Options.UseDFSan) return; 253 DFSan = new DFSanState(Options); 254 CurrentUnit.resize(Options.MaxLen); 255 for (size_t i = 0; i < static_cast<size_t>(Options.MaxLen); i++) { 256 dfsan_label L = dfsan_create_label("input", (void*)(i + 1)); 257 // We assume that no one else has called dfsan_create_label before. 258 assert(L == i + 1); 259 dfsan_set_label(L, &CurrentUnit[i], 1); 260 } 261} 262 263} // namespace fuzzer 264 265extern "C" { 266void __dfsw___sanitizer_cov_trace_cmp(uint64_t SizeAndType, uint64_t Arg1, 267 uint64_t Arg2, dfsan_label L0, 268 dfsan_label L1, dfsan_label L2) { 269 assert(L0 == 0); 270 uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0)); 271 uint64_t CmpSize = (SizeAndType >> 32) / 8; 272 uint64_t Type = (SizeAndType << 32) >> 32; 273 DFSan->DFSanCmpCallback(PC, CmpSize, Type, Arg1, Arg2, L1, L2); 274} 275} // extern "C" 276