1//===-- msandr.cc ---------------------------------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file is a part of MemorySanitizer.
11//
12// DynamoRio client for MemorySanitizer.
13//
14// MemorySanitizer requires that all program code is instrumented. Any memory
15// store that can turn an uninitialized value into an initialized value must be
16// observed by the tool, otherwise we risk reporting a false UMR.
17//
18// This also includes any libraries that the program depends on.
19//
20// In the case when rebuilding all program dependencies with MemorySanitizer is
21// problematic, an experimental MSanDR tool (the code you are currently looking
22// at) can be used. It is a DynamoRio-based tool that uses dynamic
23// instrumentation to
24// * Unpoison all memory stores.
25// * Unpoison TLS slots used by MemorySanitizer to pass function arguments and
26//   return value shadow on anything that looks like a function call or a return
27//   from a function.
28//
29// This tool does not detect the use of uninitialized values in uninstrumented
30// libraries. It merely gets rid of false positives by marking all data that
31// passes through uninstrumented code as fully initialized.
32//===----------------------------------------------------------------------===//
33
34#include <dr_api.h>
35#include <drutil.h>
36#include <drmgr.h>
37#include <drsyscall.h>
38
39#include <sys/mman.h>
40#include <sys/syscall.h>  /* for SYS_mmap */
41
42#include <algorithm>
43#include <string>
44#include <set>
45#include <vector>
46#include <string.h>
47
48using std::string;
49
50#define TESTALL(mask, var) (((mask) & (var)) == (mask))
51#define TESTANY(mask, var) (((mask) & (var)) != 0)
52
53#define CHECK_IMPL(condition, file, line)                                      \
54  do {                                                                         \
55    if (!(condition)) {                                                        \
56      dr_printf("Check failed: `%s`\nat %s:%d\n", #condition, file, line);     \
57      dr_abort();                                                              \
58    }                                                                          \
59  } while (0) // TODO: stacktrace
60
61#define CHECK(condition) CHECK_IMPL(condition, __FILE__, __LINE__)
62
63#define VERBOSITY 0
64
65namespace {
66
67class ModuleData {
68public:
69  ModuleData();
70  ModuleData(const module_data_t *info);
71  // Yes, we want default copy, assign, and dtor semantics.
72
73public:
74  app_pc start_;
75  app_pc end_;
76  // Full path to the module.
77  string path_;
78  module_handle_t handle_;
79  bool should_instrument_;
80  bool executed_;
81};
82
83string g_app_path;
84
85int msan_retval_tls_offset;
86int msan_param_tls_offset;
87
88// A vector of loaded modules sorted by module bounds.  We lookup the current PC
89// in here from the bb event.  This is better than an rb tree because the lookup
90// is faster and the bb event occurs far more than the module load event.
91std::vector<ModuleData> g_module_list;
92
93ModuleData::ModuleData()
94    : start_(NULL), end_(NULL), path_(""), handle_(NULL),
95      should_instrument_(false), executed_(false) {
96}
97
98ModuleData::ModuleData(const module_data_t *info)
99    : start_(info->start), end_(info->end), path_(info->full_path),
100      handle_(info->handle),
101      // We'll check the black/white lists later and adjust this.
102      should_instrument_(true), executed_(false) {
103}
104
105int(*__msan_get_retval_tls_offset)();
106int(*__msan_get_param_tls_offset)();
107void (*__msan_unpoison)(void *base, size_t size);
108bool (*__msan_is_in_loader)();
109
110static generic_func_t LookupCallback(module_data_t *app, const char *name) {
111  generic_func_t callback = dr_get_proc_address(app->handle, name);
112  if (callback == NULL) {
113    dr_printf("Couldn't find `%s` in %s\n", name, app->full_path);
114    CHECK(callback);
115  }
116  return callback;
117}
118
119void InitializeMSanCallbacks() {
120  module_data_t *app = dr_lookup_module_by_name(dr_get_application_name());
121  if (!app) {
122    dr_printf("%s - oops, dr_lookup_module_by_name failed!\n",
123              dr_get_application_name());
124    CHECK(app);
125  }
126  g_app_path = app->full_path;
127
128  __msan_get_retval_tls_offset = (int (*)())
129      LookupCallback(app, "__msan_get_retval_tls_offset");
130  __msan_get_param_tls_offset = (int (*)())
131      LookupCallback(app, "__msan_get_param_tls_offset");
132  __msan_unpoison = (void(*)(void *, size_t))
133      LookupCallback(app, "__msan_unpoison");
134  __msan_is_in_loader = (bool (*)())
135      LookupCallback(app, "__msan_is_in_loader");
136
137  dr_free_module_data(app);
138}
139
140// FIXME: Handle absolute addresses and PC-relative addresses.
141// FIXME: Handle TLS accesses via FS or GS.  DR assumes all other segments have
142// a zero base anyway.
143bool OperandIsInteresting(opnd_t opnd) {
144  return (opnd_is_base_disp(opnd) && opnd_get_segment(opnd) != DR_SEG_FS &&
145          opnd_get_segment(opnd) != DR_SEG_GS);
146}
147
148bool WantToInstrument(instr_t *instr) {
149  // TODO: skip push instructions?
150  switch (instr_get_opcode(instr)) {
151    // FIXME: support the instructions excluded below:
152  case OP_rep_cmps:
153    // f3 a6    rep cmps %ds:(%rsi) %es:(%rdi) %rsi %rdi %rcx -> %rsi %rdi %rcx
154    return false;
155  }
156
157  // Labels appear due to drutil_expand_rep_string()
158  if (instr_is_label(instr))
159    return false;
160
161  CHECK(instr_ok_to_mangle(instr) == true);
162
163  if (instr_writes_memory(instr)) {
164    for (int d = 0; d < instr_num_dsts(instr); d++) {
165      opnd_t op = instr_get_dst(instr, d);
166      if (OperandIsInteresting(op))
167        return true;
168    }
169  }
170
171  return false;
172}
173
174#define PRE(at, what) instrlist_meta_preinsert(bb, at, INSTR_CREATE_##what);
175#define PREF(at, what) instrlist_meta_preinsert(bb, at, what);
176
177void InstrumentMops(void *drcontext, instrlist_t *bb, instr_t *instr, opnd_t op,
178                    bool is_write) {
179  bool need_to_restore_eflags = false;
180  uint flags = instr_get_arith_flags(instr);
181  // TODO: do something smarter with flags and spills in general?
182  // For example, spill them only once for a sequence of instrumented
183  // instructions that don't change/read flags.
184
185  if (!TESTALL(EFLAGS_WRITE_6, flags) || TESTANY(EFLAGS_READ_6, flags)) {
186    if (VERBOSITY > 1)
187      dr_printf("Spilling eflags...\n");
188    need_to_restore_eflags = true;
189    // TODO: Maybe sometimes don't need to 'seto'.
190    // TODO: Maybe sometimes don't want to spill XAX here?
191    // TODO: No need to spill XAX here if XAX is not used in the BB.
192    dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
193    dr_save_arith_flags_to_xax(drcontext, bb, instr);
194    dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_3);
195    dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
196  }
197
198#if 0
199  dr_printf("==DRMSAN== DEBUG: %d %d %d %d %d %d\n",
200            opnd_is_memory_reference(op), opnd_is_base_disp(op),
201            opnd_is_base_disp(op) ? opnd_get_index(op) : -1,
202            opnd_is_far_memory_reference(op), opnd_is_reg_pointer_sized(op),
203            opnd_is_base_disp(op) ? opnd_get_disp(op) : -1);
204#endif
205
206  reg_id_t R1;
207  bool address_in_R1 = false;
208  if (opnd_is_base_disp(op) && opnd_get_index(op) == DR_REG_NULL &&
209      opnd_get_disp(op) == 0) {
210    // If this is a simple access with no offset or index, we can just use the
211    // base for R1.
212    address_in_R1 = true;
213    R1 = opnd_get_base(op);
214  } else {
215    // Otherwise, we need to compute the addr into R1.
216    // TODO: reuse some spare register? e.g. r15 on x64
217    // TODO: might be used as a non-mem-ref register?
218    R1 = DR_REG_XAX;
219  }
220  CHECK(reg_is_pointer_sized(R1)); // otherwise R2 may be wrong.
221
222  // Pick R2 that's not R1 or used by the operand.  It's OK if the instr uses
223  // R2 elsewhere, since we'll restore it before instr.
224  reg_id_t GPR_TO_USE_FOR_R2[] = {
225    DR_REG_XAX, DR_REG_XBX, DR_REG_XCX, DR_REG_XDX
226    // Don't forget to update the +4 below if you add anything else!
227  };
228  std::set<reg_id_t> unused_registers(GPR_TO_USE_FOR_R2, GPR_TO_USE_FOR_R2 + 4);
229  unused_registers.erase(R1);
230  for (int j = 0; j < opnd_num_regs_used(op); j++) {
231    unused_registers.erase(opnd_get_reg_used(op, j));
232  }
233
234  CHECK(unused_registers.size() > 0);
235  reg_id_t R2 = *unused_registers.begin();
236  CHECK(R1 != R2);
237
238  // Save the current values of R1 and R2.
239  dr_save_reg(drcontext, bb, instr, R1, SPILL_SLOT_1);
240  // TODO: Something smarter than spilling a "fixed" register R2?
241  dr_save_reg(drcontext, bb, instr, R2, SPILL_SLOT_2);
242
243  if (!address_in_R1)
244    CHECK(drutil_insert_get_mem_addr(drcontext, bb, instr, op, R1, R2));
245  PRE(instr, mov_imm(drcontext, opnd_create_reg(R2),
246                     OPND_CREATE_INT64(0xffffbfffffffffff)));
247  PRE(instr, and(drcontext, opnd_create_reg(R1), opnd_create_reg(R2)));
248  // There is no mov_st of a 64-bit immediate, so...
249  opnd_size_t op_size = opnd_get_size(op);
250  CHECK(op_size != OPSZ_NA);
251  uint access_size = opnd_size_in_bytes(op_size);
252  if (access_size <= 4) {
253    PRE(instr,
254        mov_st(drcontext, opnd_create_base_disp(R1, DR_REG_NULL, 0, 0, op_size),
255               opnd_create_immed_int((ptr_int_t) 0, op_size)));
256  } else {
257    // FIXME: tail?
258    for (uint ofs = 0; ofs < access_size; ofs += 4) {
259      PRE(instr,
260          mov_st(drcontext, OPND_CREATE_MEM32(R1, ofs), OPND_CREATE_INT32(0)));
261    }
262  }
263
264  // Restore the registers and flags.
265  dr_restore_reg(drcontext, bb, instr, R1, SPILL_SLOT_1);
266  dr_restore_reg(drcontext, bb, instr, R2, SPILL_SLOT_2);
267
268  if (need_to_restore_eflags) {
269    if (VERBOSITY > 1)
270      dr_printf("Restoring eflags\n");
271    // TODO: Check if it's reverse to the dr_restore_reg above and optimize.
272    dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
273    dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_3);
274    dr_restore_arith_flags_from_xax(drcontext, bb, instr);
275    dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
276  }
277
278  // The original instruction is left untouched. The above instrumentation is just
279  // a prefix.
280}
281
282void InstrumentReturn(void *drcontext, instrlist_t *bb, instr_t *instr) {
283  dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
284
285  // Clobbers nothing except xax.
286  bool res =
287      dr_insert_get_seg_base(drcontext, bb, instr, DR_SEG_FS, DR_REG_XAX);
288  CHECK(res);
289
290  // TODO: unpoison more bytes?
291  PRE(instr,
292      mov_st(drcontext, OPND_CREATE_MEM64(DR_REG_XAX, msan_retval_tls_offset),
293             OPND_CREATE_INT32(0)));
294
295  dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
296
297  // The original instruction is left untouched. The above instrumentation is just
298  // a prefix.
299}
300
301void InstrumentIndirectBranch(void *drcontext, instrlist_t *bb,
302                              instr_t *instr) {
303  dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
304
305  // Clobbers nothing except xax.
306  bool res =
307      dr_insert_get_seg_base(drcontext, bb, instr, DR_SEG_FS, DR_REG_XAX);
308  CHECK(res);
309
310  // TODO: unpoison more bytes?
311  for (int i = 0; i < 6; ++i) {
312    PRE(instr,
313        mov_st(drcontext, OPND_CREATE_MEMPTR(DR_REG_XAX, msan_param_tls_offset +
314                                                         i * sizeof(void *)),
315               OPND_CREATE_INT32(0)));
316  }
317
318  dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
319
320  // The original instruction is left untouched. The above instrumentation is just
321  // a prefix.
322}
323
324// For use with binary search.  Modules shouldn't overlap, so we shouldn't have
325// to look at end_.  If that can happen, we won't support such an application.
326bool ModuleDataCompareStart(const ModuleData &left, const ModuleData &right) {
327  return left.start_ < right.start_;
328}
329
330// Look up the module containing PC.  Should be relatively fast, as its called
331// for each bb instrumentation.
332ModuleData *LookupModuleByPC(app_pc pc) {
333  ModuleData fake_mod_data;
334  fake_mod_data.start_ = pc;
335  std::vector<ModuleData>::iterator it =
336      lower_bound(g_module_list.begin(), g_module_list.end(), fake_mod_data,
337                  ModuleDataCompareStart);
338  // if (it == g_module_list.end())
339  //   return NULL;
340  if (it == g_module_list.end() || pc < it->start_)
341    --it;
342  CHECK(it->start_ <= pc);
343  if (pc >= it->end_) {
344    // We're past the end of this module.  We shouldn't be in the next module,
345    // or lower_bound lied to us.
346    ++it;
347    CHECK(it == g_module_list.end() || pc < it->start_);
348    return NULL;
349  }
350
351  // OK, we found the module.
352  return &*it;
353}
354
355bool ShouldInstrumentNonModuleCode() { return true; }
356
357bool ShouldInstrumentModule(ModuleData *mod_data) {
358  // TODO(rnk): Flags for blacklist would get wired in here.
359  generic_func_t p =
360      dr_get_proc_address(mod_data->handle_, "__msan_track_origins");
361  return !p;
362}
363
364bool ShouldInstrumentPc(app_pc pc, ModuleData **pmod_data) {
365  ModuleData *mod_data = LookupModuleByPC(pc);
366  if (pmod_data)
367    *pmod_data = mod_data;
368  if (mod_data != NULL) {
369    // This module is on a blacklist.
370    if (!mod_data->should_instrument_) {
371      return false;
372    }
373  } else if (!ShouldInstrumentNonModuleCode()) {
374    return false;
375  }
376  return true;
377}
378
379// TODO(rnk): Make sure we instrument after __msan_init.
380dr_emit_flags_t
381event_basic_block_app2app(void *drcontext, void *tag, instrlist_t *bb,
382                          bool for_trace, bool translating) {
383  app_pc pc = dr_fragment_app_pc(tag);
384
385  if (ShouldInstrumentPc(pc, NULL))
386    CHECK(drutil_expand_rep_string(drcontext, bb));
387
388  return DR_EMIT_PERSISTABLE;
389}
390
391dr_emit_flags_t event_basic_block(void *drcontext, void *tag, instrlist_t *bb,
392                                  bool for_trace, bool translating) {
393  app_pc pc = dr_fragment_app_pc(tag);
394  ModuleData *mod_data;
395
396  if (!ShouldInstrumentPc(pc, &mod_data))
397    return DR_EMIT_PERSISTABLE;
398
399  if (VERBOSITY > 1)
400    dr_printf("============================================================\n");
401  if (VERBOSITY > 0) {
402    string mod_path = (mod_data ? mod_data->path_ : "<no module, JITed?>");
403    if (mod_data && !mod_data->executed_) {
404      mod_data->executed_ = true; // Nevermind this race.
405      dr_printf("Executing from new module: %s\n", mod_path.c_str());
406    }
407    dr_printf("BB to be instrumented: %p [from %s]; translating = %s\n", pc,
408        mod_path.c_str(), translating ? "true" : "false");
409    if (mod_data) {
410      // Match standard sanitizer trace format for free symbols.
411      // #0 0x7f6e35cf2e45  (/blah/foo.so+0x11fe45)
412      dr_printf(" #0 %p (%s+%p)\n", pc, mod_data->path_.c_str(),
413          pc - mod_data->start_);
414    }
415  }
416  if (VERBOSITY > 1) {
417    instrlist_disassemble(drcontext, pc, bb, STDOUT);
418    instr_t *instr;
419    for (instr = instrlist_first(bb); instr; instr = instr_get_next(instr)) {
420      dr_printf("opcode: %d\n", instr_get_opcode(instr));
421    }
422  }
423
424  for (instr_t *i = instrlist_first(bb); i != NULL; i = instr_get_next(i)) {
425    int opcode = instr_get_opcode(i);
426    if (opcode == OP_ret || opcode == OP_ret_far) {
427      InstrumentReturn(drcontext, bb, i);
428      continue;
429    }
430
431    // These instructions hopefully cover all cases where control is transferred
432    // to a function in a different module (we only care about calls into
433    // compiler-instrumented modules).
434    // * call_ind is used for normal indirect calls.
435    // * jmp_ind is used for indirect tail calls, and calls through PLT (PLT
436    //   stub includes a jump to an address from GOT).
437    if (opcode == OP_call_ind || opcode == OP_call_far_ind ||
438        opcode == OP_jmp_ind || opcode == OP_jmp_far_ind) {
439      InstrumentIndirectBranch(drcontext, bb, i);
440      continue;
441    }
442
443    if (!WantToInstrument(i))
444      continue;
445
446    if (VERBOSITY > 1) {
447      app_pc orig_pc = dr_fragment_app_pc(tag);
448      uint flags = instr_get_arith_flags(i);
449      dr_printf("+%d -> to be instrumented! [opcode=%d, flags = 0x%08X]\n",
450          instr_get_app_pc(i) - orig_pc, instr_get_opcode(i), flags);
451    }
452
453    if (instr_writes_memory(i)) {
454      // Instrument memory writes
455      // bool instrumented_anything = false;
456      for (int d = 0; d < instr_num_dsts(i); d++) {
457        opnd_t op = instr_get_dst(i, d);
458        if (!OperandIsInteresting(op))
459          continue;
460
461        // CHECK(!instrumented_anything);
462        // instrumented_anything = true;
463        InstrumentMops(drcontext, bb, i, op, true);
464        break; // only instrumenting the first dst
465      }
466    }
467  }
468
469// TODO: optimize away redundant restore-spill pairs?
470
471  if (VERBOSITY > 1) {
472    pc = dr_fragment_app_pc(tag);
473    dr_printf("\nFinished instrumenting dynamorio_basic_block(PC=" PFX ")\n", pc);
474    instrlist_disassemble(drcontext, pc, bb, STDOUT);
475  }
476  return DR_EMIT_PERSISTABLE;
477}
478
479void event_module_load(void *drcontext, const module_data_t *info,
480                       bool loaded) {
481  // Insert the module into the list while maintaining the ordering.
482  ModuleData mod_data(info);
483  std::vector<ModuleData>::iterator it =
484      upper_bound(g_module_list.begin(), g_module_list.end(), mod_data,
485                  ModuleDataCompareStart);
486  it = g_module_list.insert(it, mod_data);
487  // Check if we should instrument this module.
488  it->should_instrument_ = ShouldInstrumentModule(&*it);
489  dr_module_set_should_instrument(info->handle, it->should_instrument_);
490
491  if (VERBOSITY > 0)
492    dr_printf("==DRMSAN== Loaded module: %s [%p...%p], instrumentation is %s\n",
493        info->full_path, info->start, info->end,
494        it->should_instrument_ ? "on" : "off");
495}
496
497void event_module_unload(void *drcontext, const module_data_t *info) {
498  if (VERBOSITY > 0)
499    dr_printf("==DRMSAN== Unloaded module: %s [%p...%p]\n", info->full_path,
500        info->start, info->end);
501
502  // Remove the module from the list.
503  ModuleData mod_data(info);
504  std::vector<ModuleData>::iterator it =
505      lower_bound(g_module_list.begin(), g_module_list.end(), mod_data,
506                  ModuleDataCompareStart);
507  // It's a bug if we didn't actually find the module.
508  CHECK(it != g_module_list.end() && it->start_ == mod_data.start_ &&
509        it->end_ == mod_data.end_ && it->path_ == mod_data.path_);
510  g_module_list.erase(it);
511}
512
513void event_exit() {
514  // Clean up so DR doesn't tell us we're leaking memory.
515  drsys_exit();
516  drutil_exit();
517  drmgr_exit();
518
519  if (VERBOSITY > 0)
520    dr_printf("==DRMSAN== DONE\n");
521}
522
523bool event_filter_syscall(void *drcontext, int sysnum) {
524  // FIXME: only intercept syscalls with memory effects.
525  return true; /* intercept everything */
526}
527
528bool drsys_iter_memarg_cb(drsys_arg_t *arg, void *user_data) {
529  CHECK(arg->valid);
530
531  if (arg->pre)
532    return true;
533  if (!TESTANY(DRSYS_PARAM_OUT, arg->mode))
534    return true;
535
536  size_t sz = arg->size;
537
538  if (sz > 0xFFFFFFFF) {
539    drmf_status_t res;
540    drsys_syscall_t *syscall = (drsys_syscall_t *)user_data;
541    const char *name;
542    res = drsys_syscall_name(syscall, &name);
543    CHECK(res == DRMF_SUCCESS);
544
545    dr_printf("SANITY: syscall '%s' arg %d writes %llu bytes memory?!"
546              " Clipping to %llu.\n",
547              name, arg->ordinal, (unsigned long long) sz,
548              (unsigned long long)(sz & 0xFFFFFFFF));
549  }
550
551  if (VERBOSITY > 0) {
552    drmf_status_t res;
553    drsys_syscall_t *syscall = (drsys_syscall_t *)user_data;
554    const char *name;
555    res = drsys_syscall_name(syscall, &name);
556    dr_printf("drsyscall: syscall '%s' arg %d wrote range [%p, %p)\n",
557              name, arg->ordinal, arg->start_addr,
558              (char *)arg->start_addr + sz);
559  }
560
561  // We don't switch to the app context because __msan_unpoison() doesn't need
562  // TLS segments.
563  __msan_unpoison(arg->start_addr, sz);
564
565  return true; /* keep going */
566}
567
568bool event_pre_syscall(void *drcontext, int sysnum) {
569  drsys_syscall_t *syscall;
570  drsys_sysnum_t sysnum_full;
571  bool known;
572  drsys_param_type_t ret_type;
573  drmf_status_t res;
574  const char *name;
575
576  res = drsys_cur_syscall(drcontext, &syscall);
577  CHECK(res == DRMF_SUCCESS);
578
579  res = drsys_syscall_number(syscall, &sysnum_full);
580  CHECK(res == DRMF_SUCCESS);
581  CHECK(sysnum == sysnum_full.number);
582
583  res = drsys_syscall_is_known(syscall, &known);
584  CHECK(res == DRMF_SUCCESS);
585
586  res = drsys_syscall_name(syscall, &name);
587  CHECK(res == DRMF_SUCCESS);
588
589  res = drsys_syscall_return_type(syscall, &ret_type);
590  CHECK(res == DRMF_SUCCESS);
591  CHECK(ret_type != DRSYS_TYPE_INVALID);
592  CHECK(!known || ret_type != DRSYS_TYPE_UNKNOWN);
593
594  res = drsys_iterate_memargs(drcontext, drsys_iter_memarg_cb, NULL);
595  CHECK(res == DRMF_SUCCESS);
596
597  return true;
598}
599
600static bool IsInLoader(void *drcontext) {
601  // TODO: This segment swap is inefficient.  DR should just let us query the
602  // app segment base, which it has.  Alternatively, if we disable
603  // -mangle_app_seg, then we won't need the swap.
604  bool need_swap = !dr_using_app_state(drcontext);
605  if (need_swap)
606    dr_switch_to_app_state(drcontext);
607  bool is_in_loader = __msan_is_in_loader();
608  if (need_swap)
609    dr_switch_to_dr_state(drcontext);
610  return is_in_loader;
611}
612
613void event_post_syscall(void *drcontext, int sysnum) {
614  drsys_syscall_t *syscall;
615  drsys_sysnum_t sysnum_full;
616  bool success = false;
617  drmf_status_t res;
618
619  res = drsys_cur_syscall(drcontext, &syscall);
620  CHECK(res == DRMF_SUCCESS);
621
622  res = drsys_syscall_number(syscall, &sysnum_full);
623  CHECK(res == DRMF_SUCCESS);
624  CHECK(sysnum == sysnum_full.number);
625
626  res = drsys_syscall_succeeded(syscall, dr_syscall_get_result(drcontext),
627                                &success);
628  CHECK(res == DRMF_SUCCESS);
629
630  if (success) {
631    res =
632        drsys_iterate_memargs(drcontext, drsys_iter_memarg_cb, (void *)syscall);
633    CHECK(res == DRMF_SUCCESS);
634  }
635
636  // Our normal mmap interceptor can't intercept calls from the loader itself.
637  // This means we don't clear the shadow for calls to dlopen.  For now, we
638  // solve this by intercepting mmap from ld.so here, but ideally we'd have a
639  // solution that doesn't rely on msandr.
640  //
641  // Be careful not to intercept maps done by the msan rtl.  Otherwise we end up
642  // unpoisoning vast regions of memory and OOMing.
643  // TODO: __msan_unpoison() could "flush" large regions of memory like tsan
644  // does instead of doing a large memset.  However, we need the memory to be
645  // zeroed, where as tsan does not, so plain madvise is not enough.
646  if (success && (sysnum == SYS_mmap IF_NOT_X64(|| sysnum == SYS_mmap2))) {
647    if (IsInLoader(drcontext)) {
648      app_pc base = (app_pc)dr_syscall_get_result(drcontext);
649      ptr_uint_t size;
650      drmf_status_t res = drsys_pre_syscall_arg(drcontext, 1, &size);
651      CHECK(res == DRMF_SUCCESS);
652      if (VERBOSITY > 0)
653        dr_printf("unpoisoning for dlopen: [%p-%p]\n", base, base + size);
654      // We don't switch to the app context because __msan_unpoison() doesn't
655      // need TLS segments.
656      __msan_unpoison(base, size);
657    }
658  }
659}
660
661} // namespace
662
663DR_EXPORT void dr_init(client_id_t id) {
664  drmf_status_t res;
665
666  drmgr_init();
667  drutil_init();
668
669  string app_name = dr_get_application_name();
670  // This blacklist will still run these apps through DR's code cache.  On the
671  // other hand, we are able to follow children of these apps.
672  // FIXME: Once DR has detach, we could just detach here.  Alternatively,
673  // if DR had a fork or exec hook to let us decide there, that would be nice.
674  // FIXME: make the blacklist cmd-adjustable.
675  if (app_name == "python" || app_name == "python2.7" || app_name == "bash" ||
676      app_name == "sh" || app_name == "true" || app_name == "exit" ||
677      app_name == "yes" || app_name == "echo")
678    return;
679
680  drsys_options_t ops;
681  memset(&ops, 0, sizeof(ops));
682  ops.struct_size = sizeof(ops);
683  ops.analyze_unknown_syscalls = false;
684
685  res = drsys_init(id, &ops);
686  CHECK(res == DRMF_SUCCESS);
687
688  dr_register_filter_syscall_event(event_filter_syscall);
689  drmgr_register_pre_syscall_event(event_pre_syscall);
690  drmgr_register_post_syscall_event(event_post_syscall);
691  res = drsys_filter_all_syscalls();
692  CHECK(res == DRMF_SUCCESS);
693
694  InitializeMSanCallbacks();
695
696  // FIXME: the shadow is initialized earlier when DR calls one of our wrapper
697  // functions. This may change one day.
698  // TODO: make this more robust.
699
700  void *drcontext = dr_get_current_drcontext();
701
702  dr_switch_to_app_state(drcontext);
703  msan_retval_tls_offset = __msan_get_retval_tls_offset();
704  msan_param_tls_offset = __msan_get_param_tls_offset();
705  dr_switch_to_dr_state(drcontext);
706  if (VERBOSITY > 0) {
707    dr_printf("__msan_retval_tls offset: %d\n", msan_retval_tls_offset);
708    dr_printf("__msan_param_tls offset: %d\n", msan_param_tls_offset);
709  }
710
711  // Standard DR events.
712  dr_register_exit_event(event_exit);
713
714  drmgr_priority_t priority = {
715    sizeof(priority), /* size of struct */
716    "msandr",         /* name of our operation */
717    NULL,             /* optional name of operation we should precede */
718    NULL,             /* optional name of operation we should follow */
719    0
720  };                  /* numeric priority */
721
722  drmgr_register_bb_app2app_event(event_basic_block_app2app, &priority);
723  drmgr_register_bb_instru2instru_event(event_basic_block, &priority);
724  drmgr_register_module_load_event(event_module_load);
725  drmgr_register_module_unload_event(event_module_unload);
726  if (VERBOSITY > 0)
727    dr_printf("==MSANDR== Starting!\n");
728}
729