1/* Copyright (c) 2008-2010, Google Inc.
2 * All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 *     * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *     * Neither the name of Google Inc. nor the names of its
11 * contributors may be used to endorse or promote products derived from
12 * this software without specific prior written permission.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
18 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
20 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27// This file is part of ThreadSanitizer, a dynamic data race detector.
28// Author: Konstantin Serebryany.
29// Author: Timur Iskhodzhanov.
30
31#define __STDC_LIMIT_MACROS
32#include "pin.H"
33
34#include <stdio.h>
35#include <stdlib.h>
36#include <string.h>
37#include <map>
38#include <assert.h>
39
40#include "thread_sanitizer.h"
41#include "ts_lock.h"
42#include "ts_trace_info.h"
43#include "ts_race_verifier.h"
44#include "common_util.h"
45
46
47#if defined(__GNUC__)
48# include <cxxabi.h>  // __cxa_demangle
49# define ATOMIC_READ(a) __sync_add_and_fetch(a, 0)
50
51#elif defined(_MSC_VER)
52namespace WINDOWS
53{
54// This is the way of including winows.h recommended by PIN docs.
55#include<Windows.h>
56}
57
58#include <intrin.h>
59# define popen(x,y) (NULL)
60# define ATOMIC_READ(a)         _InterlockedCompareExchange(a, 0, 0)
61# define usleep(x) WINDOWS::Sleep((x)/1000)
62# define UINTPTR_MAX ((uintptr_t)-1)
63#endif
64
65#ifdef NDEBUG
66# error "Please don't define NDEBUG"
67#endif
68
69static void DumpEvent(CONTEXT *ctx, EventType type, int32_t tid, uintptr_t pc,
70                      uintptr_t a, uintptr_t info);
71//------ Global PIN lock ------- {{{1
72class ScopedReentrantClientLock {
73 public:
74  ScopedReentrantClientLock(int line)
75    : line_(line) {
76    // if (line && G_flags->debug_level >= 5)  Printf("??Try  at line %d\n", line);
77    PIN_LockClient();
78    if (line && G_flags->debug_level >= 5)  Printf("++Lock at line %d\n", line);
79  }
80  ~ScopedReentrantClientLock() {
81    if (line_ && G_flags->debug_level >= 5) Printf("--Unlock at line %d\n", line_);
82    PIN_UnlockClient();
83  }
84 private:
85  int line_;
86};
87
88//--------------- Globals ----------------- {{{1
89extern FILE *G_out;
90
91// Number of threads created by pthread_create (i.e. not counting main thread).
92static int n_created_threads = 0;
93// Number of started threads, i.e. the number of CallbackForThreadStart calls.
94static int n_started_threads = 0;
95
96const uint32_t kMaxThreads = PIN_MAX_THREADS;
97
98// Serializes the ThreadSanitizer callbacks if TS_SERIALIZED==1
99static TSLock g_main_ts_lock;
100
101// Serializes calls to pthread_create and CreateThread.
102static TSLock g_thread_create_lock;
103// Under g_thread_create_lock.
104static THREADID g_tid_of_thread_which_called_create_thread = -1;
105
106#ifdef _MSC_VER
107// On Windows, we need to create a h-b arc between
108// RtlQueueWorkItem(callback, x, y) and the call to callback.
109// Same for RegisterWaitForSingleObject.
110static unordered_set<uintptr_t> *g_windows_thread_pool_calback_set;
111// Similarly, we need h-b arcs between the returns from callbacks and
112// thre related UnregisterWaitEx. Damn, what a stupid interface!
113static unordered_map<uintptr_t, uintptr_t> *g_windows_thread_pool_wait_object_map;
114#endif
115
116//--------------- StackFrame ----------------- {{{1
117struct StackFrame {
118  uintptr_t pc;
119  uintptr_t sp;
120  StackFrame(uintptr_t p, uintptr_t s) : pc(p), sp(s) { }
121};
122//--------------- InstrumentedCallFrame ----- {{{1
123// Machinery to implement the fast interceptors in PIN
124// (i.e. the ones that don't use PIN_CallApplicationFunction).
125// We instrument the entry of the interesting function (e.g. malloc)
126// and all RET instructions in this function's module (e.g. libc).
127// At entry, we push an InstrumentedCallFrame object onto InstrumentedCallStack.
128// At every RET instruction we check if the stack is not empty (fast path)
129// and if the top contains the current SP. If yes -- this is the function return
130// and we pop the stack.
131struct InstrumentedCallFrame {
132  typedef void (*callback_t)(THREADID tid, InstrumentedCallFrame &frame,
133                             ADDRINT ret);
134  callback_t callback;
135  uintptr_t pc;
136  uintptr_t sp;
137  uintptr_t arg[4];
138};
139
140struct InstrumentedCallStack {
141 public:
142  InstrumentedCallStack() : size_(0) { }
143
144  size_t size() { return size_; }
145
146  void Push(InstrumentedCallFrame::callback_t callback,
147            uintptr_t pc,
148            uintptr_t sp,
149            uintptr_t a0, uintptr_t a1) {
150    CHECK(size() < TS_ARRAY_SIZE(frames_));
151    size_++;
152    Top()->callback = callback;
153    Top()->pc = pc;
154    Top()->sp = sp;
155    Top()->arg[0] = a0;
156    Top()->arg[1] = a1;
157  }
158
159  void Pop() {
160    CHECK(size() > 0);
161    size_--;
162  }
163
164  InstrumentedCallFrame *Top() {
165    CHECK(size() > 0);
166    return &frames_[size_-1];
167  }
168
169  void Print() {
170    for (size_t i = 0; i < size(); i++) {
171      Printf( " %p\n", frames_[i].sp);
172      if (i > 0) CHECK(frames_[i].sp <= frames_[i-1].sp);
173    }
174  }
175
176 private:
177  InstrumentedCallFrame frames_[20];
178  size_t size_;
179};
180
181//--------------- PinThread ----------------- {{{1
182const size_t kThreadLocalEventBufferSize = 2048 - 2;
183// The number of mops should be at least 2 less than the size of TLEB
184// so that we have space to put SBLOCK_ENTER token and the trace_info ptr.
185const size_t kMaxMopsPerTrace = kThreadLocalEventBufferSize - 2;
186
187REG tls_reg;
188
189struct PinThread;
190
191struct ThreadLocalEventBuffer {
192  PinThread *t;
193  size_t size;
194  uintptr_t events[kThreadLocalEventBufferSize];
195};
196
197struct PinThread {
198  ThreadLocalEventBuffer tleb;
199  int          uniq_tid;
200  uint32_t     literace_sampling;  // cache of a flag.
201  volatile long last_child_tid;
202  InstrumentedCallStack ic_stack;
203  THREADID     tid;
204  THREADID     parent_tid;
205  pthread_t    my_ptid;
206  size_t       thread_stack_size_if_known;
207  size_t       last_child_stack_size_if_known;
208  vector<StackFrame> shadow_stack;
209  TraceInfo    *trace_info;
210  int ignore_accesses;  // if > 0, ignore all memory accesses.
211  int ignore_accesses_depth;
212  int ignore_sync;      // if > 0, ignore all sync events.
213  int spin_lock_recursion_depth;
214  bool         thread_finished;
215  bool         thread_done;
216  bool         holding_lock;
217  int          n_consumed_events;
218#ifdef _MSC_VER
219  enum StartupState {
220    STARTING,
221    CHILD_READY,
222    MAY_CONTINUE,
223  };
224  volatile long startup_state;  // used to handle the CREATE_SUSPENDED flag.
225#endif
226  char         padding[64];  // avoid any chance of ping-pong.
227};
228
229// Array of pin threads, indexed by pin's THREADID.
230static PinThread *g_pin_threads;
231
232// If true, ignore all accesses in all threads.
233extern bool global_ignore;
234
235#ifdef _MSC_VER
236static unordered_set<pthread_t> *g_win_handles_which_are_threads;
237#endif
238
239//-------------------- ts_replace ------------------- {{{1
240static void ReportAccesRange(THREADID tid, uintptr_t pc, EventType type, uintptr_t x, size_t size) {
241  if (size && !g_pin_threads[tid].ignore_accesses) {
242    uintptr_t end = x + size;
243    for(uintptr_t a = x; a < end; a += 8) {
244      size_t cur_size = min((uintptr_t)8, end - a);
245      DumpEvent(0, type, tid, pc, a, cur_size);
246    }
247  }
248}
249
250#define REPORT_READ_RANGE(x, size) ReportAccesRange(tid, pc, READ, (uintptr_t)x, size)
251#define REPORT_WRITE_RANGE(x, size) ReportAccesRange(tid, pc, WRITE, (uintptr_t)x, size)
252
253#define EXTRA_REPLACE_PARAMS THREADID tid, uintptr_t pc,
254#define EXTRA_REPLACE_ARGS tid, pc,
255#include "ts_replace.h"
256
257//------------- ThreadSanitizer exports ------------ {{{1
258string Demangle(const char *str) {
259#if defined(__GNUC__)
260  int status;
261  char *demangled = __cxxabiv1::__cxa_demangle(str, 0, 0, &status);
262  if (demangled) {
263    string res = demangled;
264    free(demangled);
265    return res;
266  }
267#endif
268  return str;
269}
270
271void PcToStrings(uintptr_t pc, bool demangle,
272                string *img_name, string *rtn_name,
273                string *file_name, int *line_no) {
274  if (G_flags->symbolize) {
275    RTN rtn;
276    ScopedReentrantClientLock lock(__LINE__);
277    // ClientLock must be held.
278    PIN_GetSourceLocation(pc, NULL, line_no, file_name);
279    *file_name = ConvertToPlatformIndependentPath(*file_name);
280    rtn = RTN_FindByAddress(pc);
281    string name;
282    if (RTN_Valid(rtn)) {
283      *rtn_name = demangle
284          ? Demangle(RTN_Name(rtn).c_str())
285          : RTN_Name(rtn);
286      *img_name = IMG_Name(SEC_Img(RTN_Sec(rtn)));
287    }
288  }
289}
290
291string PcToRtnName(uintptr_t pc, bool demangle) {
292  string res;
293  if (G_flags->symbolize) {
294    {
295      ScopedReentrantClientLock lock(__LINE__);
296      RTN rtn = RTN_FindByAddress(pc);
297      if (RTN_Valid(rtn)) {
298        res = demangle
299            ? Demangle(RTN_Name(rtn).c_str())
300            : RTN_Name(rtn);
301      }
302    }
303  }
304  return res;
305}
306
307//--------------- ThreadLocalEventBuffer ----------------- {{{1
308// thread local event buffer is an array of uintptr_t.
309// The events are encoded like this:
310// { RTN_CALL, call_pc, target_pc }
311// { RTN_EXIT }
312// { SBLOCK_ENTER, trace_info_of_size_n, addr1, addr2, ... addr_n}
313
314enum TLEBSpecificEvents {
315  TLEB_IGNORE_ALL_BEGIN = LAST_EVENT + 1,
316  TLEB_IGNORE_ALL_END,
317  TLEB_IGNORE_SYNC_BEGIN,
318  TLEB_IGNORE_SYNC_END,
319  TLEB_GLOBAL_IGNORE_ON,
320  TLEB_GLOBAL_IGNORE_OFF,
321};
322
323static bool DumpEventPlainText(EventType type, int32_t tid, uintptr_t pc,
324                        uintptr_t a, uintptr_t info) {
325#if DEBUG == 0 || defined(_MSC_VER)
326  return false;
327#else
328  if (G_flags->dump_events.empty()) return false;
329
330  static unordered_set<uintptr_t> *pc_set;
331  if (pc_set == NULL) {
332    pc_set = new unordered_set<uintptr_t>;
333  }
334  static FILE *log_file = NULL;
335  if (log_file == NULL) {
336    log_file = popen(("gzip > " + G_flags->dump_events).c_str(), "w");
337  }
338  if (G_flags->symbolize && pc_set->insert(pc).second) {
339    string img_name, rtn_name, file_name;
340    int line = 0;
341    PcToStrings(pc, false, &img_name, &rtn_name, &file_name, &line);
342    if (file_name.empty()) file_name = "unknown";
343    if (img_name.empty()) img_name = "unknown";
344    if (rtn_name.empty()) rtn_name = "unknown";
345    if (line == 0) line = 1;
346    fprintf(log_file, "#PC %lx %s %s %s %d\n",
347            (long)pc, img_name.c_str(), rtn_name.c_str(),
348            file_name.c_str(), line);
349  }
350  fprintf(log_file, "%s %x %lx %lx %lx\n", kEventNames[type], tid,
351          (long)pc, (long)a, (long)info);
352  return true;
353#endif
354}
355
356static void DumpEventInternal(EventType type, int32_t uniq_tid, uintptr_t pc,
357                              uintptr_t a, uintptr_t info) {
358  if (DumpEventPlainText(type, uniq_tid, pc, a, info)) return;
359  // PIN wraps the tid (after 2048), but we need a uniq tid.
360  Event event(type, uniq_tid, pc, a, info);
361  ThreadSanitizerHandleOneEvent(&event);
362}
363
364void ComputeIgnoreAccesses(PinThread &t) {
365  t.ignore_accesses = (t.ignore_accesses_depth != 0) || (global_ignore != 0);
366}
367
368static void HandleInnerEvent(PinThread &t, uintptr_t event) {
369  DCHECK(event > LAST_EVENT);
370  if (event == TLEB_IGNORE_ALL_BEGIN){
371    t.ignore_accesses_depth++;
372    ComputeIgnoreAccesses(t);
373  } else if (event == TLEB_IGNORE_ALL_END){
374    t.ignore_accesses_depth--;
375    CHECK(t.ignore_accesses_depth >= 0);
376    ComputeIgnoreAccesses(t);
377  } else if (event == TLEB_IGNORE_SYNC_BEGIN){
378    t.ignore_sync++;
379  } else if (event == TLEB_IGNORE_SYNC_END){
380    t.ignore_sync--;
381    CHECK(t.ignore_sync >= 0);
382  } else if (event == TLEB_GLOBAL_IGNORE_ON){
383    Report("INFO: GLOBAL IGNORE ON\n");
384    global_ignore = true;
385    ComputeIgnoreAccesses(t);
386  } else if (event == TLEB_GLOBAL_IGNORE_OFF){
387    Report("INFO: GLOBAL IGNORE OFF\n");
388    global_ignore = false;
389    ComputeIgnoreAccesses(t);
390  } else {
391    Printf("Event: %ld (last: %ld)\n", event, LAST_EVENT);
392    CHECK(0);
393  }
394}
395
396static INLINE bool WantToIgnoreEvent(PinThread &t, uintptr_t event) {
397  if (t.ignore_sync &&
398      (event == WRITER_LOCK || event == READER_LOCK || event == UNLOCK ||
399       event == SIGNAL || event == WAIT)) {
400    // do nothing, we are ignoring locks.
401    return true;
402  } else if (t.ignore_accesses && (event == READ || event == WRITE)) {
403    // do nothing, we are ignoring mops.
404    return true;
405  }
406  return false;
407}
408
409static INLINE void TLEBFlushUnlocked(ThreadLocalEventBuffer &tleb) {
410  if (tleb.size == 0) return;
411  PinThread &t = *tleb.t;
412  // global_ignore should be always on with race verifier
413  DCHECK(!g_race_verifier_active || global_ignore);
414  DCHECK(tleb.size <= kThreadLocalEventBufferSize);
415  if (DEBUG_MODE && t.thread_done) {
416    Printf("ACHTUNG!!! an event from a dead thread T%d\n", t.tid);
417  }
418  DCHECK(!t.thread_done);
419
420  if (TS_SERIALIZED == 1 || DEBUG_MODE) {
421    size_t max_idx = TS_ARRAY_SIZE(G_stats->tleb_flush);
422    size_t idx = min((size_t)u32_log2(tleb.size), max_idx - 1);
423    CHECK(idx < max_idx);
424    G_stats->tleb_flush[idx]++;
425  }
426
427  if (TS_SERIALIZED == 1 && G_flags->offline) {
428    fwrite(tleb.events, sizeof(uintptr_t), tleb.size, G_out);
429    tleb.size = 0;
430    return;
431  }
432
433  size_t i;
434  for (i = 0; i < tleb.size; ) {
435    uintptr_t event = tleb.events[i++];
436    DCHECK(!g_race_verifier_active ||
437        event == SBLOCK_ENTER || event == EXPECT_RACE || event == THR_START);
438    if (event == RTN_EXIT) {
439      if (DumpEventPlainText(RTN_EXIT, t.uniq_tid, 0, 0, 0)) continue;
440      ThreadSanitizerHandleRtnExit(t.uniq_tid);
441    } else if (event == RTN_CALL) {
442      uintptr_t call_pc = tleb.events[i++];
443      uintptr_t target_pc = tleb.events[i++];
444      IGNORE_BELOW_RTN ignore_below = (IGNORE_BELOW_RTN)tleb.events[i++];
445      if (DumpEventPlainText(RTN_CALL, t.uniq_tid, call_pc,
446                             target_pc, ignore_below)) continue;
447      ThreadSanitizerHandleRtnCall(t.uniq_tid, call_pc, target_pc,
448                                   ignore_below);
449    } else if (event == SBLOCK_ENTER){
450      TraceInfo *trace_info = (TraceInfo*) tleb.events[i++];
451      DCHECK(trace_info);
452      bool do_this_trace = true;
453      if (t.ignore_accesses) {
454        do_this_trace = false;
455      } else if (t.literace_sampling) {
456        do_this_trace = !trace_info->LiteRaceSkipTraceRealTid(
457            t.uniq_tid, t.literace_sampling);
458      }
459
460      size_t n = trace_info->n_mops();
461      if (do_this_trace) {
462        if (DEBUG_MODE && !G_flags->dump_events.empty()) {
463          DumpEventPlainText(SBLOCK_ENTER, t.uniq_tid, trace_info->pc(), 0, 0);
464          for (size_t j = 0; j < n; j++) {
465            MopInfo *mop = trace_info->GetMop(j);
466            DCHECK(mop->size());
467            DCHECK(mop);
468            uintptr_t addr = tleb.events[i + j];
469            if (addr) {
470              DumpEventPlainText(mop->is_write() ? WRITE : READ, t.uniq_tid,
471                                     mop->pc(), addr, mop->size());
472            }
473          }
474        } else {
475          ThreadSanitizerHandleTrace(t.uniq_tid, trace_info, tleb.events+i);
476        }
477      }
478      i += n;
479    } else if (event == THR_START) {
480      uintptr_t parent = -1;
481      if (t.parent_tid != (THREADID)-1) {
482        parent = g_pin_threads[t.parent_tid].uniq_tid;
483      }
484      DumpEventInternal(THR_START, t.uniq_tid, 0, 0, parent);
485    } else if (event == THR_END) {
486      DumpEventInternal(THR_END, t.uniq_tid, 0, 0, 0);
487      DCHECK(t.thread_finished == true);
488      DCHECK(t.thread_done == false);
489      t.thread_done = true;
490      i += 3;  // consume the unneeded data.
491      DCHECK(i == tleb.size);  // should be last event in this tleb.
492    } else if (event > LAST_EVENT) {
493      HandleInnerEvent(t, event);
494    } else {
495      // all other events.
496      CHECK(event > NOOP && event < LAST_EVENT);
497      uintptr_t pc    = tleb.events[i++];
498      uintptr_t a     = tleb.events[i++];
499      uintptr_t info  = tleb.events[i++];
500      if (!WantToIgnoreEvent(t, event)) {
501        DumpEventInternal((EventType)event, t.uniq_tid, pc, a, info);
502      }
503    }
504  }
505  DCHECK(i == tleb.size);
506  tleb.size = 0;
507  if (DEBUG_MODE) { // for sanity checking.
508    memset(tleb.events, 0xf0, sizeof(tleb.events));
509  }
510}
511
512static INLINE void TLEBFlushLocked(PinThread &t) {
513#if TS_SERIALIZED==1
514  if (G_flags->dry_run) {
515    t.tleb.size = 0;
516    return;
517  }
518  CHECK(t.tleb.size <= kThreadLocalEventBufferSize);
519  G_stats->lock_sites[0]++;
520  ScopedLock lock(&g_main_ts_lock);
521  TLEBFlushUnlocked(t.tleb);
522#else
523  TLEBFlushUnlocked(t.tleb);
524#endif
525}
526
527static void TLEBAddRtnCall(PinThread &t, uintptr_t call_pc,
528                           uintptr_t target_pc, IGNORE_BELOW_RTN ignore_below) {
529  if (TS_SERIALIZED == 0) {
530    TLEBFlushLocked(t);
531    ThreadSanitizerHandleRtnCall(t.uniq_tid, call_pc, target_pc,
532                                 ignore_below);
533    return;
534  }
535  DCHECK(t.tleb.size <= kThreadLocalEventBufferSize);
536  if (t.tleb.size + 4 > kThreadLocalEventBufferSize) {
537    TLEBFlushLocked(t);
538    DCHECK(t.tleb.size == 0);
539  }
540  t.tleb.events[t.tleb.size++] = RTN_CALL;
541  t.tleb.events[t.tleb.size++] = call_pc;
542  t.tleb.events[t.tleb.size++] = target_pc;
543  t.tleb.events[t.tleb.size++] = ignore_below;
544  DCHECK(t.tleb.size <= kThreadLocalEventBufferSize);
545}
546
547static void TLEBAddRtnExit(PinThread &t) {
548  if (TS_SERIALIZED == 0) {
549    TLEBFlushLocked(t);
550    ThreadSanitizerHandleRtnExit(t.uniq_tid);
551    return;
552  }
553  if (t.tleb.size + 1 > kThreadLocalEventBufferSize) {
554    TLEBFlushLocked(t);
555  }
556  t.tleb.events[t.tleb.size++] = RTN_EXIT;
557  DCHECK(t.tleb.size <= kThreadLocalEventBufferSize);
558}
559
560static INLINE uintptr_t *TLEBAddTrace(PinThread &t) {
561  size_t n = t.trace_info->n_mops();
562  DCHECK(n > 0);
563  if (TS_SERIALIZED == 0) {
564    TLEBFlushLocked(t);
565  } else if (t.tleb.size + 2 + n > kThreadLocalEventBufferSize) {
566    TLEBFlushLocked(t);
567  }
568  if (TS_SERIALIZED == 1) {
569    t.tleb.events[t.tleb.size++] = SBLOCK_ENTER;
570    t.tleb.events[t.tleb.size++] = (uintptr_t)t.trace_info;
571  } else {
572    DCHECK(t.tleb.size == 0);
573    t.tleb.events[0] = SBLOCK_ENTER;
574    t.tleb.events[1] = (uintptr_t)t.trace_info;
575    t.tleb.size += 2;
576  }
577  uintptr_t *mop_addresses = &t.tleb.events[t.tleb.size];
578  // not every address will be written to. so they will stay 0.
579  for (size_t i = 0; i < n; i++) {
580    mop_addresses[i] = 0;
581  }
582  t.tleb.size += n;
583  DCHECK(t.tleb.size <= kThreadLocalEventBufferSize);
584  return mop_addresses;
585}
586
587static void TLEBStartThread(PinThread &t) {
588  CHECK(t.tleb.size == 0);
589  t.tleb.events[t.tleb.size++] = THR_START;
590}
591
592static void TLEBSimpleEvent(PinThread &t, uintptr_t event) {
593  if (g_race_verifier_active)
594    return;
595  if (TS_SERIALIZED == 0) {
596    TLEBFlushLocked(t);
597    if (event < LAST_EVENT) {
598      Event e((EventType)event, t.uniq_tid, 0, 0, 0);
599      ThreadSanitizerHandleOneEvent(&e);
600    } else {
601      HandleInnerEvent(t, event);
602    }
603    return;
604  }
605  if (t.tleb.size + 1 > kThreadLocalEventBufferSize) {
606    TLEBFlushLocked(t);
607  }
608  t.tleb.events[t.tleb.size++] = event;
609  DCHECK(t.tleb.size <= kThreadLocalEventBufferSize);
610}
611
612static void TLEBAddGenericEventAndFlush(PinThread &t,
613                                        EventType type, uintptr_t pc,
614                                        uintptr_t a, uintptr_t info) {
615  if (TS_SERIALIZED == 0) {
616    if (WantToIgnoreEvent(t, type)) return;
617    TLEBFlushLocked(t);
618    Event e(type, t.uniq_tid, pc, a, info);
619    ThreadSanitizerHandleOneEvent(&e);
620    return;
621  }
622  if (t.tleb.size + 4 > kThreadLocalEventBufferSize) {
623    TLEBFlushLocked(t);
624  }
625  DCHECK(type > NOOP && type < LAST_EVENT);
626  t.tleb.events[t.tleb.size++] = type;
627  t.tleb.events[t.tleb.size++] = pc;
628  t.tleb.events[t.tleb.size++] = a;
629  t.tleb.events[t.tleb.size++] = info;
630  TLEBFlushLocked(t);
631  DCHECK(t.tleb.size <= kThreadLocalEventBufferSize);
632}
633
634static void UpdateCallStack(PinThread &t, ADDRINT sp);
635
636// Must be called from its thread (except for THR_END case)!
637static void DumpEventWithSp(uintptr_t sp, EventType type, int32_t tid, uintptr_t pc,
638                            uintptr_t a, uintptr_t info) {
639  if (!g_race_verifier_active || type == EXPECT_RACE) {
640    PinThread &t = g_pin_threads[tid];
641    if (sp) {
642      UpdateCallStack(t, sp);
643    }
644    TLEBAddGenericEventAndFlush(t, type, pc, a, info);
645  }
646}
647static void DumpEvent(CONTEXT *ctx, EventType type, int32_t tid, uintptr_t pc,
648                      uintptr_t a, uintptr_t info) {
649  DumpEventWithSp(ctx ? PIN_GetContextReg(ctx, REG_STACK_PTR) : 0,
650            type, tid, pc, a, info);
651}
652
653//--------- Wraping and relacing --------------- {{{1
654static set<string> g_wrapped_functions;
655static void InformAboutFunctionWrap(RTN rtn, string name) {
656  g_wrapped_functions.insert(name);
657  if (!debug_wrap) return;
658  Printf("Function wrapped: %s (%s %s)\n", name.c_str(),
659         RTN_Name(rtn).c_str(), IMG_Name(SEC_Img(RTN_Sec(rtn))).c_str());
660}
661
662static bool RtnMatchesName(const string &rtn_name, const string &name) {
663  CHECK(name.size() > 0);
664  size_t pos = rtn_name.find(name);
665  if (pos == string::npos) {
666    return false;
667  }
668  if (pos == 0 && name.size() == rtn_name.size()) {
669  //  Printf("Full match: %s %s\n", rtn_name.c_str(), name.c_str());
670    return true;
671  }
672  // match MyFuncName@123
673  if (pos == 0 && name.size() < rtn_name.size()
674      && rtn_name[name.size()] == '@') {
675  //  Printf("Versioned match: %s %s\n", rtn_name.c_str(), name.c_str());
676    return true;
677  }
678  // match _MyFuncName@123
679  if (pos == 1 && rtn_name[0] == '_' && name.size() < rtn_name.size()
680      && rtn_name[name.size() + 1] == '@') {
681    // Printf("Versioned match: %s %s\n", rtn_name.c_str(), name.c_str());
682    return true;
683  }
684
685  return false;
686}
687
688#define FAST_WRAP_PARAM0 THREADID tid, ADDRINT pc, ADDRINT sp
689#define FAST_WRAP_PARAM1 FAST_WRAP_PARAM0, ADDRINT arg0
690#define FAST_WRAP_PARAM2 FAST_WRAP_PARAM1, ADDRINT arg1
691#define FAST_WRAP_PARAM3 FAST_WRAP_PARAM2, ADDRINT arg2
692
693#define FAST_WRAP_PARAM_AFTER \
694  THREADID tid, InstrumentedCallFrame &frame, ADDRINT ret
695
696
697#define DEBUG_FAST_INTERCEPTORS 0
698//#define DEBUG_FAST_INTERCEPTORS (tid == 1)
699
700#define PUSH_AFTER_CALLBACK1(callback, a0) \
701  g_pin_threads[tid].ic_stack.Push(callback, pc, sp, a0, 0); \
702  if (DEBUG_FAST_INTERCEPTORS) \
703    Printf("T%d %s pc=%p sp=%p *sp=(%p) arg0=%p stack_size=%ld\n",\
704         tid, __FUNCTION__, pc, sp,\
705         ((void**)sp)[0],\
706         arg0,\
707         g_pin_threads[tid].ic_stack.size()\
708         );\
709
710
711#define WRAP_NAME(name) Wrap_##name
712#define WRAP4(name) WrapFunc4(img, rtn, #name, (AFUNPTR)Wrap_##name)
713#define WRAPSTD1(name) WrapStdCallFunc1(rtn, #name, (AFUNPTR)Wrap_##name)
714#define WRAPSTD2(name) WrapStdCallFunc2(rtn, #name, (AFUNPTR)Wrap_##name)
715#define WRAPSTD3(name) WrapStdCallFunc3(rtn, #name, (AFUNPTR)Wrap_##name)
716#define WRAPSTD4(name) WrapStdCallFunc4(rtn, #name, (AFUNPTR)Wrap_##name)
717#define WRAPSTD5(name) WrapStdCallFunc5(rtn, #name, (AFUNPTR)Wrap_##name)
718#define WRAPSTD6(name) WrapStdCallFunc6(rtn, #name, (AFUNPTR)Wrap_##name)
719#define WRAPSTD7(name) WrapStdCallFunc7(rtn, #name, (AFUNPTR)Wrap_##name)
720#define WRAPSTD8(name) WrapStdCallFunc8(rtn, #name, (AFUNPTR)Wrap_##name)
721#define WRAPSTD10(name) WrapStdCallFunc10(rtn, #name, (AFUNPTR)Wrap_##name)
722#define WRAPSTD11(name) WrapStdCallFunc11(rtn, #name, (AFUNPTR)Wrap_##name)
723#define WRAP_PARAM4  THREADID tid, ADDRINT pc, CONTEXT *ctx, \
724                                AFUNPTR f,\
725                                uintptr_t arg0, uintptr_t arg1, \
726                                uintptr_t arg2, uintptr_t arg3
727
728#define WRAP_PARAM6 WRAP_PARAM4, uintptr_t arg4, uintptr_t arg5
729#define WRAP_PARAM8 WRAP_PARAM6, uintptr_t arg6, uintptr_t arg7
730#define WRAP_PARAM10 WRAP_PARAM8, uintptr_t arg8, uintptr_t arg9
731#define WRAP_PARAM12 WRAP_PARAM10, uintptr_t arg10, uintptr_t arg11
732
733static uintptr_t CallFun4(CONTEXT *ctx, THREADID tid,
734                         AFUNPTR f, uintptr_t arg0, uintptr_t arg1,
735                         uintptr_t arg2, uintptr_t arg3) {
736  uintptr_t ret = 0xdeadbee1;
737  PIN_CallApplicationFunction(ctx, tid,
738                              CALLINGSTD_DEFAULT, (AFUNPTR)(f),
739                              PIN_PARG(uintptr_t), &ret,
740                              PIN_PARG(uintptr_t), arg0,
741                              PIN_PARG(uintptr_t), arg1,
742                              PIN_PARG(uintptr_t), arg2,
743                              PIN_PARG(uintptr_t), arg3,
744                              PIN_PARG_END());
745  return ret;
746}
747
748static uintptr_t CallFun6(CONTEXT *ctx, THREADID tid,
749                         AFUNPTR f, uintptr_t arg0, uintptr_t arg1,
750                         uintptr_t arg2, uintptr_t arg3,
751                         uintptr_t arg4, uintptr_t arg5) {
752  uintptr_t ret = 0xdeadbee1;
753  PIN_CallApplicationFunction(ctx, tid,
754                              CALLINGSTD_DEFAULT, (AFUNPTR)(f),
755                              PIN_PARG(uintptr_t), &ret,
756                              PIN_PARG(uintptr_t), arg0,
757                              PIN_PARG(uintptr_t), arg1,
758                              PIN_PARG(uintptr_t), arg2,
759                              PIN_PARG(uintptr_t), arg3,
760                              PIN_PARG(uintptr_t), arg4,
761                              PIN_PARG(uintptr_t), arg5,
762                              PIN_PARG_END());
763  return ret;
764}
765
766#define CALL_ME_INSIDE_WRAPPER_4() CallFun4(ctx, tid, f, arg0, arg1, arg2, arg3)
767#define CALL_ME_INSIDE_WRAPPER_6() CallFun6(ctx, tid, f, arg0, arg1, arg2, arg3, arg4, arg5)
768
769// Completely replace (i.e. not wrap) a function with 3 (or less) parameters.
770// The original function will not be called.
771void ReplaceFunc3(IMG img, RTN rtn, const char *name, AFUNPTR replacement_func) {
772  if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
773    InformAboutFunctionWrap(rtn, name);
774    PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
775                                 CALLINGSTD_DEFAULT,
776                                 "proto",
777                                 PIN_PARG(uintptr_t),
778                                 PIN_PARG(uintptr_t),
779                                 PIN_PARG(uintptr_t),
780                                 PIN_PARG_END());
781    RTN_ReplaceSignature(rtn,
782                         AFUNPTR(replacement_func),
783                         IARG_PROTOTYPE, proto,
784                         IARG_THREAD_ID,
785                         IARG_INST_PTR,
786                         IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
787                         IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
788                         IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
789                         IARG_END);
790    PROTO_Free(proto);
791  }
792}
793
794// Wrap a function with up to 4 parameters.
795void WrapFunc4(IMG img, RTN rtn, const char *name, AFUNPTR replacement_func) {
796  if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
797    InformAboutFunctionWrap(rtn, name);
798    PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
799                                 CALLINGSTD_DEFAULT,
800                                 "proto",
801                                 PIN_PARG(uintptr_t),
802                                 PIN_PARG(uintptr_t),
803                                 PIN_PARG(uintptr_t),
804                                 PIN_PARG(uintptr_t),
805                                 PIN_PARG_END());
806    RTN_ReplaceSignature(rtn,
807                         AFUNPTR(replacement_func),
808                         IARG_PROTOTYPE, proto,
809                         IARG_THREAD_ID,
810                         IARG_INST_PTR,
811                         IARG_CONTEXT,
812                         IARG_ORIG_FUNCPTR,
813                         IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
814                         IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
815                         IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
816                         IARG_FUNCARG_ENTRYPOINT_VALUE, 3,
817                         IARG_END);
818    PROTO_Free(proto);
819  }
820}
821
822// Wrap a function with up to 6 parameters.
823void WrapFunc6(IMG img, RTN rtn, const char *name, AFUNPTR replacement_func) {
824  if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
825    InformAboutFunctionWrap(rtn, name);
826    PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
827                                 CALLINGSTD_DEFAULT,
828                                 "proto",
829                                 PIN_PARG(uintptr_t),
830                                 PIN_PARG(uintptr_t),
831                                 PIN_PARG(uintptr_t),
832                                 PIN_PARG(uintptr_t),
833                                 PIN_PARG(uintptr_t),
834                                 PIN_PARG(uintptr_t),
835                                 PIN_PARG_END());
836    RTN_ReplaceSignature(rtn,
837                         AFUNPTR(replacement_func),
838                         IARG_PROTOTYPE, proto,
839                         IARG_THREAD_ID,
840                         IARG_INST_PTR,
841                         IARG_CONTEXT,
842                         IARG_ORIG_FUNCPTR,
843                         IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
844                         IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
845                         IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
846                         IARG_FUNCARG_ENTRYPOINT_VALUE, 3,
847                         IARG_FUNCARG_ENTRYPOINT_VALUE, 4,
848                         IARG_FUNCARG_ENTRYPOINT_VALUE, 5,
849                         IARG_END);
850    PROTO_Free(proto);
851  }
852}
853
854
855//--------- Instrumentation callbacks --------------- {{{1
856//---------- Debug -----------------------------------{{{2
857#define DEB_PR (0)
858
859static void ShowPcAndSp(const char *where, THREADID tid,
860                        ADDRINT pc, ADDRINT sp) {
861    Printf("%s T%d sp=%ld pc=%p %s\n", where, tid, sp, pc,
862           PcToRtnName(pc, true).c_str());
863}
864
865static void PrintShadowStack(PinThread &t) {
866  Printf("T%d Shadow stack (%d)\n", t.tid, (int)t.shadow_stack.size());
867  for (int i = t.shadow_stack.size() - 1; i >= 0; i--) {
868    uintptr_t pc = t.shadow_stack[i].pc;
869    uintptr_t sp = t.shadow_stack[i].sp;
870    Printf("  sp=%ld pc=%lx %s\n", sp, pc, PcToRtnName(pc, true).c_str());
871  }
872}
873
874static void DebugOnlyShowPcAndSp(const char *where, THREADID tid,
875                                 ADDRINT pc, ADDRINT sp) {
876  if (DEB_PR) {
877    ShowPcAndSp(where, tid, pc, sp);
878  }
879}
880
881static uintptr_t WRAP_NAME(ThreadSanitizerQuery)(WRAP_PARAM4) {
882  const char *query = (const char*)arg0;
883  return (uintptr_t)ThreadSanitizerQuery(query);
884}
885
886//--------- Ignores -------------------------------- {{{2
887static void IgnoreMopsBegin(THREADID tid) {
888  // if (tid != 0) Printf("T%d IgnoreMops++\n", tid);
889  TLEBSimpleEvent(g_pin_threads[tid], TLEB_IGNORE_ALL_BEGIN);
890}
891static void IgnoreMopsEnd(THREADID tid) {
892  // if (tid != 0) Printf("T%d IgnoreMops--\n", tid);
893  TLEBSimpleEvent(g_pin_threads[tid], TLEB_IGNORE_ALL_END);
894}
895
896static void IgnoreSyncAndMopsBegin(THREADID tid) {
897  // if (tid != 0) Printf("T%d IgnoreSync++\n", tid);
898  IgnoreMopsBegin(tid);
899  TLEBSimpleEvent(g_pin_threads[tid], TLEB_IGNORE_SYNC_BEGIN);
900}
901static void IgnoreSyncAndMopsEnd(THREADID tid) {
902  // if (tid != 0) Printf("T%d IgnoreSync--\n", tid);
903  IgnoreMopsEnd(tid);
904  TLEBSimpleEvent(g_pin_threads[tid], TLEB_IGNORE_SYNC_END);
905}
906
907//--------- __cxa_guard_* -------------------------- {{{2
908// From gcc/cp/decl.c:
909// --------------------------------------------------------------
910//      Emit code to perform this initialization but once.  This code
911//      looks like:
912//
913//      static <type> guard;
914//      if (!guard.first_byte) {
915//        if (__cxa_guard_acquire (&guard)) {
916//          bool flag = false;
917//          try {
918//            // Do initialization.
919//            flag = true; __cxa_guard_release (&guard);
920//            // Register variable for destruction at end of program.
921//           } catch {
922//          if (!flag) __cxa_guard_abort (&guard);
923//         }
924//      }
925// --------------------------------------------------------------
926// So, when __cxa_guard_acquire returns true, we start ignoring all accesses
927// and in __cxa_guard_release we stop ignoring them.
928// We also need to ignore all accesses inside these two functions.
929
930static void Before_cxa_guard_acquire(THREADID tid, ADDRINT pc, ADDRINT guard) {
931  IgnoreMopsBegin(tid);
932}
933
934static void After_cxa_guard_acquire(THREADID tid, ADDRINT pc, ADDRINT ret) {
935  if (ret) {
936    // Continue ignoring, it will end in __cxa_guard_release.
937  } else {
938    // Stop ignoring, there will be no matching call to __cxa_guard_release.
939    IgnoreMopsEnd(tid);
940  }
941}
942
943static void After_cxa_guard_release(THREADID tid, ADDRINT pc) {
944  IgnoreMopsEnd(tid);
945}
946
947static uintptr_t WRAP_NAME(pthread_once)(WRAP_PARAM4) {
948  uintptr_t ret;
949  IgnoreMopsBegin(tid);
950  ret = CALL_ME_INSIDE_WRAPPER_4();
951  IgnoreMopsEnd(tid);
952  return ret;
953}
954
955void TmpCallback1(THREADID tid, ADDRINT pc) {
956  Printf("%s T%d %lx\n", __FUNCTION__, tid, pc);
957}
958void TmpCallback2(THREADID tid, ADDRINT pc) {
959  Printf("%s T%d %lx\n", __FUNCTION__, tid, pc);
960}
961
962//--------- Threads --------------------------------- {{{2
963static void HandleThreadCreateBefore(THREADID tid, ADDRINT pc) {
964  DumpEvent(0, THR_CREATE_BEFORE, tid, pc, 0, 0);
965  g_thread_create_lock.Lock();
966  IgnoreMopsBegin(tid);
967  CHECK(g_tid_of_thread_which_called_create_thread == (THREADID)-1);
968  g_tid_of_thread_which_called_create_thread = tid;
969  n_created_threads++;
970}
971
972static void HandleThreadCreateAbort(THREADID tid) {
973  CHECK(g_tid_of_thread_which_called_create_thread == tid);
974  g_tid_of_thread_which_called_create_thread = (THREADID)-1;
975  n_created_threads--;
976  IgnoreMopsEnd(tid);
977  g_thread_create_lock.Unlock();
978}
979
980static THREADID HandleThreadCreateAfter(THREADID tid, pthread_t child_ptid,
981                                        bool suspend_child) {
982  // Spin, waiting for last_child_tid to appear (i.e. wait for the thread to
983  // actually start) so that we know the child's tid. No locks.
984  while (!ATOMIC_READ(&g_pin_threads[tid].last_child_tid)) {
985    YIELD();
986  }
987
988  CHECK(g_tid_of_thread_which_called_create_thread == tid);
989  g_tid_of_thread_which_called_create_thread = -1;
990
991  THREADID last_child_tid = g_pin_threads[tid].last_child_tid;
992  CHECK(last_child_tid);
993
994  PinThread &child_t = g_pin_threads[last_child_tid];
995  child_t.my_ptid = child_ptid;
996
997#ifdef _MSC_VER
998  if (suspend_child) {
999    while (ATOMIC_READ(&child_t.startup_state) != PinThread::CHILD_READY) {
1000      YIELD();
1001    }
1002    // Strictly speaking, PIN forbids calling system functions like this.
1003    // This may violate application library isolation but
1004    // a) YIELD == WINDOWS::Sleep, so we violate it anyways
1005    // b) SuspendThread probably calls NtSuspendThread right away
1006    WINDOWS::DWORD old_count = WINDOWS::SuspendThread((WINDOWS::HANDLE)child_ptid);  // TODO handle?
1007    CHECK(old_count == 0);
1008  }
1009  child_t.startup_state = PinThread::MAY_CONTINUE;
1010#else
1011  CHECK(!suspend_child);  // Not implemented - do we need to?
1012#endif
1013
1014  int uniq_tid_of_child = child_t.uniq_tid;
1015  g_pin_threads[tid].last_child_tid = 0;
1016
1017  IgnoreMopsEnd(tid);
1018  g_thread_create_lock.Unlock();
1019
1020  DumpEvent(0, THR_CREATE_AFTER, tid, 0, 0, uniq_tid_of_child);
1021  return last_child_tid;
1022}
1023
1024static uintptr_t WRAP_NAME(pthread_create)(WRAP_PARAM4) {
1025  HandleThreadCreateBefore(tid, pc);
1026
1027  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
1028  if (ret != 0) {
1029    HandleThreadCreateAbort(tid);
1030    return ret;
1031  }
1032
1033  pthread_t child_ptid = *(pthread_t*)arg0;
1034  HandleThreadCreateAfter(tid, child_ptid, false);
1035
1036  return ret;
1037}
1038
1039void CallbackForThreadStart(THREADID tid, CONTEXT *ctxt,
1040                            INT32 flags, void *v) {
1041  // We can not rely on PIN_GetParentTid() since it is broken on Windows.
1042
1043  if (g_pin_threads == NULL) {
1044    g_pin_threads = new PinThread[kMaxThreads];
1045  }
1046
1047  bool has_parent = true;
1048  if (tid == 0) {
1049    // Main thread or we have attached to a running process.
1050    has_parent = false;
1051  } else {
1052    CHECK(tid > 0);
1053  }
1054
1055  CHECK(tid < kMaxThreads);
1056  PinThread &t = g_pin_threads[tid];
1057  memset(&t, 0, sizeof(PinThread));
1058  t.uniq_tid = n_started_threads++;
1059  t.literace_sampling = G_flags->literace_sampling;
1060  t.tid = tid;
1061  t.tleb.t = &t;
1062#if defined(_MSC_VER)
1063  t.startup_state = PinThread::STARTING;
1064#endif
1065  ComputeIgnoreAccesses(t);
1066
1067
1068  PIN_SetContextReg(ctxt, tls_reg, (ADDRINT)&t.tleb.events[2]);
1069
1070  t.parent_tid = -1;
1071  if (has_parent) {
1072    t.parent_tid = g_tid_of_thread_which_called_create_thread;
1073#if !defined(_MSC_VER)  // On Windows, threads may appear out of thin air.
1074    CHECK(t.parent_tid != (THREADID)-1);
1075#endif  // _MSC_VER
1076  }
1077
1078  if (debug_thread) {
1079    Printf("T%d ThreadStart parent=%d child=%d\n", tid, t.parent_tid, tid);
1080  }
1081
1082  if (has_parent && t.parent_tid != (THREADID)-1) {
1083    g_pin_threads[t.parent_tid].last_child_tid = tid;
1084    t.thread_stack_size_if_known =
1085        g_pin_threads[t.parent_tid].last_child_stack_size_if_known;
1086  } else {
1087#if defined(_MSC_VER)
1088    t.startup_state = PinThread::MAY_CONTINUE;
1089#endif
1090  }
1091
1092  // This is a lock-free (thread local) operation.
1093  TLEBStartThread(t);
1094/* TODO(timurrrr): investigate and un-comment
1095#ifdef _MSC_VER
1096  // Ignore all mops & sync before the real thread code.
1097  // See the corresponding IgnoreSyncAndMopsEnd in Before_BaseThreadInitThunk.
1098  IgnoreSyncAndMopsBegin(tid);
1099#endif
1100*/
1101}
1102
1103static void Before_start_thread(THREADID tid, ADDRINT pc, ADDRINT sp) {
1104  PinThread &t = g_pin_threads[tid];
1105  if (debug_thread) {
1106    Printf("T%d Before_start_thread: sp=%p my_ptid=%p diff=%p\n",
1107         tid, sp, t.my_ptid, t.my_ptid - sp);
1108  }
1109  // This is a rather scary hack, but I see no easy way to avoid it.
1110  // On linux NPTL, the pthread_t structure is the same block of memory
1111  // as the stack (and the tls?). Somewhere inside the pthread_t
1112  // object lives the address of stackblock followed by its size
1113  // (see nptl/descr.h).
1114  // At the current point we may not know the value of pthread_t (my_ptid),
1115  // but we do know the current sp, which is a bit less than my_ptid.
1116  //
1117  // address                        value
1118  // ------------------------------------------------
1119  // 0xffffffffffffffff:
1120  //
1121  // stackblock + stackblock_size:
1122  // my_ptid:
1123  //
1124  //                                stackblock_size
1125  //                                stackblock
1126  //
1127  // current_sp:
1128  //
1129  //
1130  // stackblock:
1131  //
1132  // 0x0000000000000000:
1133  // -------------------------------------------------
1134  //
1135  // So, we itrate from sp to the higher addresses (but just in case, not more
1136  // than a few pages) trying to find a pair of values which looks like
1137  // stackblock and stackblock_size. Oh well.
1138  // Note that in valgrind we are able to get this info from
1139  //  pthread_getattr_np (linux) or pthread_get_stackaddr_np (mac),
1140  // but in PIN we can't call those (can we?).
1141  uintptr_t prev = 0;
1142  for (uintptr_t sp1 = sp; sp1 - sp < 0x2000;
1143       sp1 += sizeof(uintptr_t)) {
1144    uintptr_t val = *(uintptr_t*)sp1;
1145    if (val == 0) continue;
1146    if (prev &&
1147        (prev & 0xfff) == 0 && // stack is page aligned
1148        prev < sp &&           // min stack is < sp
1149        prev + val > sp &&     // max stack is > sp
1150        val >= (1 << 15) &&    // stack size is >= 32k
1151        val <= 128 * (1 << 20) // stack size is hardly > 128M
1152        ) {
1153      if (debug_thread) {
1154        Printf("T%d found stack: %p size=%p\n", tid, prev, val);
1155      }
1156      DumpEvent(0, THR_STACK_TOP, tid, pc, prev + val, val);
1157      return;
1158    }
1159    prev = val;
1160  }
1161  // The hack above does not always works. (TODO(kcc)). Do something.
1162  Printf("WARNING: ThreadSanitizerPin is guessing stack size for T%d\n", tid);
1163  DumpEvent(0, THR_STACK_TOP, tid, pc, sp, t.thread_stack_size_if_known);
1164}
1165
1166#ifdef _MSC_VER
1167static uintptr_t WRAP_NAME(CreateThread)(WRAP_PARAM6) {
1168  PinThread &t = g_pin_threads[tid];
1169  t.last_child_stack_size_if_known = arg1 ? arg1 : 1024 * 1024;
1170
1171  HandleThreadCreateBefore(tid, pc);
1172
1173  // We can't start the thread suspended because we want to get its
1174  // PIN thread ID before leaving CreateThread.
1175  // So, we reset the CREATE_SUSPENDED flag and SuspendThread before any client
1176  // code is executed in the HandleThreadCreateAfter if needed.
1177  bool should_be_suspended = arg4 & CREATE_SUSPENDED;
1178  arg4 &= ~CREATE_SUSPENDED;
1179
1180  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_6();
1181  if (ret == NULL) {
1182    HandleThreadCreateAbort(tid);
1183    return ret;
1184  }
1185  pthread_t child_ptid = ret;
1186  THREADID child_tid = HandleThreadCreateAfter(tid, child_ptid,
1187                                               should_be_suspended);
1188  {
1189    ScopedReentrantClientLock lock(__LINE__);
1190    if (g_win_handles_which_are_threads == NULL) {
1191      g_win_handles_which_are_threads = new unordered_set<pthread_t>;
1192    }
1193    g_win_handles_which_are_threads->insert(child_ptid);
1194  }
1195  return ret;
1196}
1197
1198static void Before_BaseThreadInitThunk(THREADID tid, ADDRINT pc, ADDRINT sp) {
1199  PinThread &t = g_pin_threads[tid];
1200  size_t stack_size = t.thread_stack_size_if_known;
1201  // Printf("T%d %s %p %p\n", tid, __FUNCTION__, sp, stack_size);
1202  /* TODO(timurrrr): investigate and uncomment
1203  if (tid != 0) {
1204    // Ignore all mops & sync before the real thread code.
1205    // See the corresponding IgnoreSyncAndMopsBegin in CallbackForThreadStart.
1206    IgnoreSyncAndMopsEnd(tid);
1207    TLEBFlushLocked(t);
1208    CHECK(t.ignore_sync == 0);
1209    CHECK(t.ignore_accesses == 0);
1210  }
1211  */
1212  DumpEvent(0, THR_STACK_TOP, tid, pc, sp, stack_size);
1213
1214#ifdef _MSC_VER
1215  if (t.startup_state != PinThread::MAY_CONTINUE) {
1216    CHECK(t.startup_state == PinThread::STARTING);
1217    t.startup_state = PinThread::CHILD_READY;
1218    while (ATOMIC_READ(&t.startup_state) != PinThread::MAY_CONTINUE) {
1219      YIELD();
1220    }
1221    // Corresponds to SIGNAL from ResumeThread if the thread was suspended on
1222    // start.
1223    DumpEvent(0, WAIT, tid, pc, t.my_ptid, 0);
1224  }
1225#endif
1226}
1227
1228static void Before_RtlExitUserThread(THREADID tid, ADDRINT pc) {
1229  PinThread &t = g_pin_threads[tid];
1230  if (t.tid != 0) {
1231    // Once we started exiting the thread, ignore the locking events.
1232    // This way we will avoid h-b arcs between unrelated threads.
1233    // We also start ignoring all mops, otherwise we will get tons of race
1234    // reports from the windows guts.
1235    IgnoreSyncAndMopsBegin(tid);
1236  }
1237}
1238#endif  // _MSC_VER
1239
1240void CallbackForThreadFini(THREADID tid, const CONTEXT *ctxt,
1241                          INT32 code, void *v) {
1242  PinThread &t = g_pin_threads[tid];
1243  t.thread_finished = true;
1244  // We can not DumpEvent here,
1245  // due to possible deadlock with PIN's internal lock.
1246  if (debug_thread) {
1247    Printf("T%d Thread finished (ptid=%d)\n", tid, t.my_ptid);
1248  }
1249}
1250
1251static bool HandleThreadJoinAfter(THREADID tid, pthread_t joined_ptid) {
1252  THREADID joined_tid = kMaxThreads;
1253  int max_uniq_tid_found = -1;
1254
1255  // TODO(timurrrr): walking through g_pin_threads may be slow.
1256  // Do we need to/Can we optimize it?
1257  for (THREADID j = 1; j < kMaxThreads; j++) {
1258    if (g_pin_threads[j].thread_finished == false)
1259      continue;
1260    if (g_pin_threads[j].my_ptid == joined_ptid) {
1261      // We search for the thread with the maximum uniq_tid to work around
1262      // thread HANDLE reuse issues.
1263      if (max_uniq_tid_found < g_pin_threads[j].uniq_tid) {
1264        max_uniq_tid_found = g_pin_threads[j].uniq_tid;
1265        joined_tid = j;
1266      }
1267    }
1268  }
1269  if (joined_tid == kMaxThreads) {
1270    // This may happen in the following case:
1271    //  - A non-joinable thread is created and a handle is assigned to it.
1272    //  - Since the thread is non-joinable, the handle is then reused
1273    //  for some other purpose, e.g. for a WaitableEvent.
1274    //  - We did not yet register the thread fini event.
1275    //  - We observe WaitForSingleObjectEx(ptid) and think that this is thread
1276    //  join event, while it is not.
1277    if (debug_thread)
1278      Printf("T%d JoinAfter returns false! ptid=%d\n", tid, joined_ptid);
1279    return false;
1280  }
1281  CHECK(joined_tid < kMaxThreads);
1282  CHECK(joined_tid > 0);
1283  g_pin_threads[joined_tid].my_ptid = 0;
1284  int joined_uniq_tid = g_pin_threads[joined_tid].uniq_tid;
1285
1286  if (debug_thread) {
1287    Printf("T%d JoinAfter   parent=%d child=%d (uniq=%d)\n", tid, tid,
1288           joined_tid, joined_uniq_tid);
1289  }
1290
1291  // Here we send an event for a different thread (joined_tid), which is already
1292  // dead.
1293  DumpEvent(0, THR_END, joined_tid, 0, 0, 0);
1294
1295
1296  DumpEvent(0, THR_JOIN_AFTER, tid, 0, joined_uniq_tid, 0);
1297  return true;
1298}
1299
1300static uintptr_t WRAP_NAME(pthread_join)(WRAP_PARAM4) {
1301  if (G_flags->debug_level >= 2)
1302    Printf("T%d in  pthread_join %p\n", tid, arg0);
1303  pthread_t joined_ptid = (pthread_t)arg0;
1304  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
1305  HandleThreadJoinAfter(tid, joined_ptid);
1306  if (G_flags->debug_level >= 2)
1307    Printf("T%d out pthread_join %p\n", tid, arg0);
1308  return ret;
1309}
1310
1311static size_t WRAP_NAME(fwrite)(WRAP_PARAM4) {
1312  void* p = (void*)arg0;
1313  size_t size = (size_t)arg1 * (size_t)arg2;
1314  REPORT_READ_RANGE(p, size);
1315  IgnoreMopsBegin(tid);
1316  size_t ret = CALL_ME_INSIDE_WRAPPER_4();
1317  IgnoreMopsEnd(tid);
1318  return ret;
1319}
1320
1321#ifdef _MSC_VER
1322
1323
1324uintptr_t CallStdCallFun1(CONTEXT *ctx, THREADID tid,
1325                         AFUNPTR f, uintptr_t arg0) {
1326  uintptr_t ret = 0xdeadbee1;
1327  PIN_CallApplicationFunction(ctx, tid,
1328                              CALLINGSTD_STDCALL, (AFUNPTR)(f),
1329                              PIN_PARG(uintptr_t), &ret,
1330                              PIN_PARG(uintptr_t), arg0,
1331                              PIN_PARG_END());
1332  return ret;
1333}
1334
1335uintptr_t CallStdCallFun2(CONTEXT *ctx, THREADID tid,
1336                         AFUNPTR f, uintptr_t arg0, uintptr_t arg1) {
1337  uintptr_t ret = 0xdeadbee2;
1338  PIN_CallApplicationFunction(ctx, tid,
1339                              CALLINGSTD_STDCALL, (AFUNPTR)(f),
1340                              PIN_PARG(uintptr_t), &ret,
1341                              PIN_PARG(uintptr_t), arg0,
1342                              PIN_PARG(uintptr_t), arg1,
1343                              PIN_PARG_END());
1344  return ret;
1345}
1346
1347uintptr_t CallStdCallFun3(CONTEXT *ctx, THREADID tid,
1348                         AFUNPTR f, uintptr_t arg0, uintptr_t arg1,
1349                         uintptr_t arg2) {
1350  uintptr_t ret = 0xdeadbee3;
1351  PIN_CallApplicationFunction(ctx, tid,
1352                              CALLINGSTD_STDCALL, (AFUNPTR)(f),
1353                              PIN_PARG(uintptr_t), &ret,
1354                              PIN_PARG(uintptr_t), arg0,
1355                              PIN_PARG(uintptr_t), arg1,
1356                              PIN_PARG(uintptr_t), arg2,
1357                              PIN_PARG_END());
1358  return ret;
1359}
1360
1361uintptr_t CallStdCallFun4(CONTEXT *ctx, THREADID tid,
1362                         AFUNPTR f, uintptr_t arg0, uintptr_t arg1,
1363                         uintptr_t arg2, uintptr_t arg3) {
1364  uintptr_t ret = 0xdeadbee4;
1365  PIN_CallApplicationFunction(ctx, tid,
1366                              CALLINGSTD_STDCALL, (AFUNPTR)(f),
1367                              PIN_PARG(uintptr_t), &ret,
1368                              PIN_PARG(uintptr_t), arg0,
1369                              PIN_PARG(uintptr_t), arg1,
1370                              PIN_PARG(uintptr_t), arg2,
1371                              PIN_PARG(uintptr_t), arg3,
1372                              PIN_PARG_END());
1373  return ret;
1374}
1375
1376uintptr_t CallStdCallFun5(CONTEXT *ctx, THREADID tid,
1377                         AFUNPTR f, uintptr_t arg0, uintptr_t arg1,
1378                         uintptr_t arg2, uintptr_t arg3,
1379                         uintptr_t arg4) {
1380  uintptr_t ret = 0xdeadbee5;
1381  PIN_CallApplicationFunction(ctx, tid,
1382                              CALLINGSTD_STDCALL, (AFUNPTR)(f),
1383                              PIN_PARG(uintptr_t), &ret,
1384                              PIN_PARG(uintptr_t), arg0,
1385                              PIN_PARG(uintptr_t), arg1,
1386                              PIN_PARG(uintptr_t), arg2,
1387                              PIN_PARG(uintptr_t), arg3,
1388                              PIN_PARG(uintptr_t), arg4,
1389                              PIN_PARG_END());
1390  return ret;
1391}
1392
1393uintptr_t CallStdCallFun6(CONTEXT *ctx, THREADID tid,
1394                         AFUNPTR f, uintptr_t arg0, uintptr_t arg1,
1395                         uintptr_t arg2, uintptr_t arg3,
1396                         uintptr_t arg4, uintptr_t arg5) {
1397  uintptr_t ret = 0xdeadbee6;
1398  PIN_CallApplicationFunction(ctx, tid,
1399                              CALLINGSTD_STDCALL, (AFUNPTR)(f),
1400                              PIN_PARG(uintptr_t), &ret,
1401                              PIN_PARG(uintptr_t), arg0,
1402                              PIN_PARG(uintptr_t), arg1,
1403                              PIN_PARG(uintptr_t), arg2,
1404                              PIN_PARG(uintptr_t), arg3,
1405                              PIN_PARG(uintptr_t), arg4,
1406                              PIN_PARG(uintptr_t), arg5,
1407                              PIN_PARG_END());
1408  return ret;
1409}
1410
1411uintptr_t CallStdCallFun7(CONTEXT *ctx, THREADID tid,
1412                         AFUNPTR f, uintptr_t arg0, uintptr_t arg1,
1413                         uintptr_t arg2, uintptr_t arg3,
1414                         uintptr_t arg4, uintptr_t arg5,
1415                         uintptr_t arg6) {
1416  uintptr_t ret = 0xdeadbee7;
1417  PIN_CallApplicationFunction(ctx, tid,
1418                              CALLINGSTD_STDCALL, (AFUNPTR)(f),
1419                              PIN_PARG(uintptr_t), &ret,
1420                              PIN_PARG(uintptr_t), arg0,
1421                              PIN_PARG(uintptr_t), arg1,
1422                              PIN_PARG(uintptr_t), arg2,
1423                              PIN_PARG(uintptr_t), arg3,
1424                              PIN_PARG(uintptr_t), arg4,
1425                              PIN_PARG(uintptr_t), arg5,
1426                              PIN_PARG(uintptr_t), arg6,
1427                              PIN_PARG_END());
1428  return ret;
1429}
1430
1431uintptr_t WRAP_NAME(ResumeThread)(WRAP_PARAM4) {
1432//  Printf("T%d %s arg0=%p\n", tid, __FUNCTION__, arg0);
1433  DumpEvent(ctx, SIGNAL, tid, pc, arg0, 0);
1434  uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
1435  return ret;
1436}
1437uintptr_t WRAP_NAME(RtlInitializeCriticalSection)(WRAP_PARAM4) {
1438//  Printf("T%d pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0);
1439  DumpEvent(ctx, LOCK_CREATE, tid, pc, arg0, 0);
1440  IgnoreSyncAndMopsBegin(tid);
1441  uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
1442  IgnoreSyncAndMopsEnd(tid);
1443  return ret;
1444}
1445uintptr_t WRAP_NAME(RtlInitializeCriticalSectionAndSpinCount)(WRAP_PARAM4) {
1446//  Printf("T%d pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0);
1447  DumpEvent(ctx, LOCK_CREATE, tid, pc, arg0, 0);
1448  IgnoreSyncAndMopsBegin(tid);
1449  uintptr_t ret = CallStdCallFun2(ctx, tid, f, arg0, arg1);
1450  IgnoreSyncAndMopsEnd(tid);
1451  return ret;
1452}
1453uintptr_t WRAP_NAME(RtlInitializeCriticalSectionEx)(WRAP_PARAM4) {
1454//  Printf("T%d pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0);
1455  DumpEvent(ctx, LOCK_CREATE, tid, pc, arg0, 0);
1456  IgnoreSyncAndMopsBegin(tid);
1457  uintptr_t ret = CallStdCallFun3(ctx, tid, f, arg0, arg1, arg2);
1458  IgnoreSyncAndMopsEnd(tid);
1459  return ret;
1460}
1461uintptr_t WRAP_NAME(RtlDeleteCriticalSection)(WRAP_PARAM4) {
1462//  Printf("T%d pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0);
1463  DumpEvent(ctx, LOCK_DESTROY, tid, pc, arg0, 0);
1464  IgnoreSyncAndMopsBegin(tid);
1465  uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
1466  IgnoreSyncAndMopsEnd(tid);
1467  return ret;
1468}
1469uintptr_t WRAP_NAME(RtlEnterCriticalSection)(WRAP_PARAM4) {
1470//  Printf("T%d pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0);
1471  uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
1472  DumpEvent(ctx, WRITER_LOCK, tid, pc, arg0, 0);
1473  return ret;
1474}
1475uintptr_t WRAP_NAME(RtlTryEnterCriticalSection)(WRAP_PARAM4) {
1476  // Printf("T%d pc=%p %s: %p\n", tid, pc, __FUNCTION__+5, arg0);
1477  uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
1478  if (ret) {
1479    DumpEvent(ctx, WRITER_LOCK, tid, pc, arg0, 0);
1480  }
1481  return ret;
1482}
1483uintptr_t WRAP_NAME(RtlLeaveCriticalSection)(WRAP_PARAM4) {
1484//  Printf("T%d pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0);
1485  DumpEvent(ctx, UNLOCK, tid, pc, arg0, 0);
1486  return CallStdCallFun1(ctx, tid, f, arg0);
1487}
1488
1489uintptr_t WRAP_NAME(DuplicateHandle)(WRAP_PARAM8) {
1490  Printf("WARNING: DuplicateHandle called for handle 0x%X.\n", arg1);
1491  Printf("Future events on this handle may be processed incorrectly.\n");
1492  return CallStdCallFun7(ctx, tid, f, arg0, arg1, arg2, arg3, arg4, arg5, arg6);
1493}
1494
1495uintptr_t WRAP_NAME(SetEvent)(WRAP_PARAM4) {
1496  //Printf("T%d before pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0);
1497  DumpEvent(ctx, SIGNAL, tid, pc, arg0, 0);
1498  uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
1499  //Printf("T%d after pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0);
1500  return ret;
1501}
1502
1503uintptr_t InternalWrapCreateSemaphore(WRAP_PARAM4) {
1504  if (arg3 != NULL) {
1505    Printf("WARNING: CreateSemaphore called with lpName='%s'.\n", arg3);
1506    Printf("Future events on this semaphore may be processed incorrectly "
1507           "if it is reused.\n");
1508  }
1509  return CallStdCallFun4(ctx, tid, f, arg0, arg1, arg2, arg3);
1510}
1511
1512uintptr_t WRAP_NAME(CreateSemaphoreA)(WRAP_PARAM4) {
1513  return InternalWrapCreateSemaphore(tid, pc, ctx, f, arg0, arg1, arg2, arg3);
1514}
1515
1516uintptr_t WRAP_NAME(CreateSemaphoreW)(WRAP_PARAM4) {
1517  return InternalWrapCreateSemaphore(tid, pc, ctx, f, arg0, arg1, arg2, arg3);
1518}
1519
1520uintptr_t WRAP_NAME(ReleaseSemaphore)(WRAP_PARAM4) {
1521  DumpEvent(ctx, SIGNAL, tid, pc, arg0, 0);
1522  return CallStdCallFun3(ctx, tid, f, arg0, arg1, arg2);
1523}
1524
1525uintptr_t WRAP_NAME(RtlInterlockedPushEntrySList)(WRAP_PARAM4) {
1526  DumpEvent(ctx, SIGNAL, tid, pc, arg1, 0);
1527  uintptr_t ret = CallStdCallFun2(ctx, tid, f, arg0, arg1);
1528  // Printf("T%d %s list=%p item=%p\n", tid, __FUNCTION__, arg0, arg1);
1529  return ret;
1530}
1531
1532uintptr_t WRAP_NAME(RtlInterlockedPopEntrySList)(WRAP_PARAM4) {
1533  uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
1534  // Printf("T%d %s list=%p item=%p\n", tid, __FUNCTION__, arg0, ret);
1535  if (ret) {
1536    DumpEvent(ctx, WAIT, tid, pc, ret, 0);
1537  }
1538  return ret;
1539}
1540
1541uintptr_t WRAP_NAME(RtlAcquireSRWLockExclusive)(WRAP_PARAM4) {
1542  uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
1543  DumpEvent(ctx, WRITER_LOCK, tid, pc, arg0, 0);
1544  return ret;
1545}
1546uintptr_t WRAP_NAME(RtlAcquireSRWLockShared)(WRAP_PARAM4) {
1547  uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
1548  DumpEvent(ctx, READER_LOCK, tid, pc, arg0, 0);
1549  return ret;
1550}
1551uintptr_t WRAP_NAME(RtlTryAcquireSRWLockExclusive)(WRAP_PARAM4) {
1552  // Printf("T%d %s %p\n", tid, __FUNCTION__, arg0);
1553  uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
1554  if (ret & 0xFF) {  // Looks like this syscall return value is just 1 byte.
1555    DumpEvent(ctx, WRITER_LOCK, tid, pc, arg0, 0);
1556  }
1557  return ret;
1558}
1559uintptr_t WRAP_NAME(RtlTryAcquireSRWLockShared)(WRAP_PARAM4) {
1560  // Printf("T%d %s %p\n", tid, __FUNCTION__, arg0);
1561  uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
1562  if (ret & 0xFF) {  // Looks like this syscall return value is just 1 byte.
1563    DumpEvent(ctx, READER_LOCK, tid, pc, arg0, 0);
1564  }
1565  return ret;
1566}
1567uintptr_t WRAP_NAME(RtlReleaseSRWLockExclusive)(WRAP_PARAM4) {
1568  // Printf("T%d %s %p\n", tid, __FUNCTION__, arg0);
1569  DumpEvent(ctx, UNLOCK, tid, pc, arg0, 0);
1570  uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
1571  return ret;
1572}
1573uintptr_t WRAP_NAME(RtlReleaseSRWLockShared)(WRAP_PARAM4) {
1574  // Printf("T%d %s %p\n", tid, __FUNCTION__, arg0);
1575  DumpEvent(ctx, UNLOCK, tid, pc, arg0, 0);
1576  uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
1577  return ret;
1578}
1579uintptr_t WRAP_NAME(RtlInitializeSRWLock)(WRAP_PARAM4) {
1580  // Printf("T%d %s %p\n", tid, __FUNCTION__, arg0);
1581  DumpEvent(ctx, LOCK_CREATE, tid, pc, arg0, 0);
1582  uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
1583  return ret;
1584}
1585
1586uintptr_t WRAP_NAME(RtlWakeConditionVariable)(WRAP_PARAM4) {
1587  // Printf("T%d %s arg0=%p\n", tid, __FUNCTION__, arg0);
1588  DumpEvent(ctx, SIGNAL, tid, pc, arg0, 0);
1589  uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
1590  return ret;
1591}
1592uintptr_t WRAP_NAME(RtlWakeAllConditionVariable)(WRAP_PARAM4) {
1593  // Printf("T%d %s arg0=%p\n", tid, __FUNCTION__, arg0);
1594  DumpEvent(ctx, SIGNAL, tid, pc, arg0, 0);
1595  uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
1596  return ret;
1597}
1598uintptr_t WRAP_NAME(RtlSleepConditionVariableSRW)(WRAP_PARAM4) {
1599  // No need to unlock/lock - looks like RtlSleepConditionVariableSRW performs
1600  // Rtl{Acquire,Release}SRW... calls itself!
1601  uintptr_t ret = CallStdCallFun4(ctx, tid, f, arg0, arg1, arg2, arg3);
1602  if ((ret & 0xFF) == 0)
1603    DumpEvent(ctx, WAIT, tid, pc, arg0, 0);
1604  // Printf("T%d %s arg0=%p arg1=%p; ret=%d\n", tid, __FUNCTION__, arg0, arg1, ret);
1605  return ret;
1606}
1607uintptr_t WRAP_NAME(RtlSleepConditionVariableCS)(WRAP_PARAM4) {
1608  // TODO(timurrrr): do we need unlock/lock?
1609  uintptr_t ret = CallStdCallFun3(ctx, tid, f, arg0, arg1, arg2);
1610  if ((ret & 0xFF) == 0)
1611    DumpEvent(ctx, WAIT, tid, pc, arg0, 0);
1612  // Printf("T%d %s arg0=%p arg1=%p; ret=%d\n", tid, __FUNCTION__, arg0, arg1, ret);
1613  return ret;
1614}
1615
1616uintptr_t WRAP_NAME(RtlQueueWorkItem)(WRAP_PARAM4) {
1617  // Printf("T%d %s arg0=%p arg1=%p; arg2=%d\n", tid, __FUNCTION__, arg0, arg1, arg2);
1618  g_windows_thread_pool_calback_set->insert(arg0);
1619  DumpEvent(ctx, SIGNAL, tid, pc, arg0, 0);
1620  uintptr_t ret = CallStdCallFun3(ctx, tid, f, arg0, arg1, arg2);
1621  return ret;
1622}
1623
1624uintptr_t WRAP_NAME(RegisterWaitForSingleObject)(WRAP_PARAM6) {
1625  // Printf("T%d %s arg0=%p arg2=%p\n", tid, __FUNCTION__, arg0, arg2);
1626  g_windows_thread_pool_calback_set->insert(arg2);
1627  DumpEvent(ctx, SIGNAL, tid, pc, arg2, 0);
1628  uintptr_t ret = CallStdCallFun6(ctx, tid, f, arg0, arg1, arg2, arg3, arg4, arg5);
1629  if (ret) {
1630    uintptr_t wait_object = *(uintptr_t*)arg0;
1631    (*g_windows_thread_pool_wait_object_map)[wait_object] = arg2;
1632    // Printf("T%d %s *arg0=%p\n", tid, __FUNCTION__, wait_object);
1633  }
1634  return ret;
1635}
1636
1637uintptr_t WRAP_NAME(UnregisterWaitEx)(WRAP_PARAM4) {
1638  CHECK(g_windows_thread_pool_wait_object_map);
1639  uintptr_t obj = (*g_windows_thread_pool_wait_object_map)[arg0];
1640  // Printf("T%d %s arg0=%p obj=%p\n", tid, __FUNCTION__, arg0, obj);
1641  uintptr_t ret = CallStdCallFun2(ctx, tid, f, arg0, arg1);
1642  if (ret) {
1643    DumpEvent(ctx, WAIT, tid, pc, obj, 0);
1644  }
1645  return ret;
1646}
1647
1648uintptr_t WRAP_NAME(VirtualAlloc)(WRAP_PARAM4) {
1649  // Printf("T%d VirtualAlloc: %p %p %p %p\n", tid, arg0, arg1, arg2, arg3);
1650  uintptr_t ret = CallStdCallFun4(ctx, tid, f, arg0, arg1, arg2, arg3);
1651  return ret;
1652}
1653
1654uintptr_t WRAP_NAME(GlobalAlloc)(WRAP_PARAM4) {
1655  uintptr_t ret = CallStdCallFun2(ctx, tid, f, arg0, arg1);
1656  // Printf("T%d %s(%p %p)=%p\n", tid, __FUNCTION__, arg0, arg1, ret);
1657  if (ret != 0) {
1658    DumpEvent(ctx, MALLOC, tid, pc, ret, arg1);
1659  }
1660  return ret;
1661}
1662
1663uintptr_t WRAP_NAME(ZwAllocateVirtualMemory)(WRAP_PARAM6) {
1664  // Printf("T%d >>%s(%p %p %p %p %p %p)\n", tid, __FUNCTION__, arg0, arg1, arg2, arg3, arg4, arg5);
1665  uintptr_t ret = CallStdCallFun6(ctx, tid, f, arg0, arg1, arg2, arg3, arg4, arg5);
1666  // Printf("T%d <<%s(%p %p) = %p\n", tid, __FUNCTION__, *(void**)arg1, *(void**)arg3, ret);
1667  if (ret == 0) {
1668    DumpEvent(ctx, MALLOC, tid, pc, *(uintptr_t*)arg1, *(uintptr_t*)arg3);
1669  }
1670  return ret;
1671}
1672
1673uintptr_t WRAP_NAME(AllocateHeap)(WRAP_PARAM4) {
1674  uintptr_t ret = CallStdCallFun3(ctx, tid, f, arg0, arg1, arg2);
1675  // Printf("T%d RtlAllocateHeap(%p %p %p)=%p\n", tid, arg0, arg1, arg2, ret);
1676  if (ret != 0) {
1677    DumpEvent(ctx, MALLOC, tid, pc, ret, arg3);
1678  }
1679  return ret;
1680}
1681
1682uintptr_t WRAP_NAME(HeapCreate)(WRAP_PARAM4) {
1683  uintptr_t ret = CallStdCallFun3(ctx, tid, f, arg0, arg1, arg2);
1684  Printf("T%d %s(%p %p %p)=%p\n", tid, __FUNCTION__, arg0, arg1, arg2, ret);
1685  return ret;
1686}
1687
1688// We don't use the definition of WAIT_OBJECT_0 from winbase.h because
1689// it can't be compiled here for some reason.
1690#define WAIT_OBJECT_0_ 0
1691
1692uintptr_t WRAP_NAME(WaitForSingleObjectEx)(WRAP_PARAM4) {
1693  if (G_flags->verbosity >= 1) {
1694    ShowPcAndSp(__FUNCTION__, tid, pc, 0);
1695    Printf("arg0=%lx arg1=%lx\n", arg0, arg1);
1696  }
1697
1698  //Printf("T%d before pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0, arg1);
1699  uintptr_t ret = CallStdCallFun3(ctx, tid, f, arg0, arg1, arg2);
1700  //Printf("T%d after pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0, arg1);
1701
1702  if (ret == WAIT_OBJECT_0_) {
1703    bool is_thread_handle = false;
1704    {
1705      ScopedReentrantClientLock lock(__LINE__);
1706      if (g_win_handles_which_are_threads) {
1707        is_thread_handle = g_win_handles_which_are_threads->count(arg0) > 0;
1708        g_win_handles_which_are_threads->erase(arg0);
1709      }
1710    }
1711    if (is_thread_handle)
1712      HandleThreadJoinAfter(tid, arg0);
1713    DumpEvent(ctx, WAIT, tid, pc, arg0, 0);
1714  }
1715
1716  return ret;
1717}
1718
1719uintptr_t WRAP_NAME(WaitForMultipleObjectsEx)(WRAP_PARAM6) {
1720  if (G_flags->verbosity >= 1) {
1721    ShowPcAndSp(__FUNCTION__, tid, pc, 0);
1722    Printf("arg0=%lx arg1=%lx arg2=%lx arg3=%lx\n", arg0, arg1, arg2, arg3);
1723  }
1724
1725  //Printf("T%d before pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0, arg1);
1726  uintptr_t ret = CallStdCallFun5(ctx, tid, f, arg0, arg1, arg2, arg3, arg4);
1727  //Printf("T%d after pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0, arg1);
1728
1729  if (ret >= WAIT_OBJECT_0_ && ret < WAIT_OBJECT_0_ + arg0) {
1730    // TODO(timurrrr): add support for WAIT_ABANDONED_0
1731
1732    int start_id, count;
1733    if (arg2 /* wait_for_all */ == 1) {
1734      start_id = 0;
1735      count = arg0;
1736    } else {
1737      start_id = ret - WAIT_OBJECT_0_;
1738      count = 1;
1739    }
1740
1741    for (int i = start_id; i < start_id + count; i++) {
1742      uintptr_t handle = ((uintptr_t*)arg1)[i];
1743      bool is_thread_handle = false;
1744      {
1745        ScopedReentrantClientLock lock(__LINE__);
1746        if (g_win_handles_which_are_threads) {
1747          is_thread_handle = g_win_handles_which_are_threads->count(handle) > 0;
1748          g_win_handles_which_are_threads->erase(handle);
1749        }
1750      }
1751      if (is_thread_handle)
1752        HandleThreadJoinAfter(tid, handle);
1753      DumpEvent(ctx, WAIT, tid, pc, handle, 0);
1754    }
1755  }
1756
1757  return ret;
1758}
1759
1760#endif  // _MSC_VER
1761
1762//--------- memory allocation ---------------------- {{{2
1763uintptr_t WRAP_NAME(mmap)(WRAP_PARAM6) {
1764  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_6();
1765
1766  if (ret != (ADDRINT)-1L) {
1767    DumpEvent(ctx, MMAP, tid, pc, ret, arg1);
1768  }
1769
1770  return ret;
1771}
1772
1773uintptr_t WRAP_NAME(munmap)(WRAP_PARAM4) {
1774  PinThread &t = g_pin_threads[tid];
1775  TLEBFlushLocked(t);
1776  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
1777  if (ret != (uintptr_t)-1L) {
1778    DumpEvent(ctx, MUNMAP, tid, pc, arg0, arg1);
1779  }
1780  return ret;
1781}
1782
1783
1784void After_malloc(FAST_WRAP_PARAM_AFTER) {
1785  size_t size = frame.arg[0];
1786  if (DEBUG_FAST_INTERCEPTORS)
1787    Printf("T%d %s %ld %p\n", tid, __FUNCTION__, size, ret);
1788  IgnoreSyncAndMopsEnd(tid);
1789  DumpEventWithSp(frame.sp, MALLOC, tid, frame.pc, ret, size);
1790}
1791
1792void Before_malloc(FAST_WRAP_PARAM1) {
1793  IgnoreSyncAndMopsBegin(tid);
1794  PUSH_AFTER_CALLBACK1(After_malloc, arg0);
1795}
1796
1797void After_free(FAST_WRAP_PARAM_AFTER) {
1798  if (DEBUG_FAST_INTERCEPTORS)
1799    Printf("T%d %s %p\n", tid, __FUNCTION__, frame.arg[0]);
1800  IgnoreSyncAndMopsEnd(tid);
1801}
1802
1803void Before_free(FAST_WRAP_PARAM1) {
1804  PinThread &t = g_pin_threads[tid];
1805  TLEBFlushLocked(t);
1806  DumpEvent(0, FREE, tid, pc, arg0, 0);
1807  IgnoreSyncAndMopsBegin(tid);
1808  PUSH_AFTER_CALLBACK1(After_free, arg0);
1809}
1810
1811void Before_calloc(FAST_WRAP_PARAM2) {
1812  IgnoreSyncAndMopsBegin(tid);
1813  PUSH_AFTER_CALLBACK1(After_malloc, arg0 * arg1);
1814}
1815
1816void Before_realloc(FAST_WRAP_PARAM2) {
1817  PinThread &t = g_pin_threads[tid];
1818  TLEBFlushLocked(t);
1819  IgnoreSyncAndMopsBegin(tid);
1820  // TODO: handle FREE? We don't do it in Valgrind right now.
1821  PUSH_AFTER_CALLBACK1(After_malloc, arg1);
1822}
1823
1824// Fast path for INS_InsertIfCall.
1825ADDRINT Before_RET_IF(THREADID tid, ADDRINT pc, ADDRINT sp, ADDRINT ret) {
1826  PinThread &t = g_pin_threads[tid];
1827  return t.ic_stack.size();
1828}
1829
1830void Before_RET_THEN(THREADID tid, ADDRINT pc, ADDRINT sp, ADDRINT ret) {
1831  PinThread &t = g_pin_threads[tid];
1832  if (t.ic_stack.size() == 0) return;
1833  DCHECK(t.ic_stack.size());
1834  InstrumentedCallFrame *frame = t.ic_stack.Top();
1835  if (DEBUG_FAST_INTERCEPTORS) {
1836    Printf("T%d RET  pc=%p sp=%p *sp=%p frame.sp=%p stack_size %ld\n",
1837           tid, pc, sp, *(uintptr_t*)sp, frame->sp, t.ic_stack.size());
1838    t.ic_stack.Print();
1839  }
1840  while (frame->sp <= sp) {
1841    if (DEBUG_FAST_INTERCEPTORS)
1842      Printf("pop\n");
1843    frame->callback(tid, *frame, ret);
1844    t.ic_stack.Pop();
1845    if (t.ic_stack.size()) {
1846      frame = t.ic_stack.Top();
1847    } else {
1848      break;
1849    }
1850  }
1851}
1852
1853// These are no longer used in favor of "fast" wrappers (e.g. Before_malloc)
1854// TODO(timurrrr): Check on the buildbot and remove.
1855uintptr_t WRAP_NAME(malloc)(WRAP_PARAM4) { CHECK(0); }
1856uintptr_t WRAP_NAME(realloc)(WRAP_PARAM4) { CHECK(0); }
1857uintptr_t WRAP_NAME(calloc)(WRAP_PARAM4) { CHECK(0); }
1858uintptr_t WRAP_NAME(free)(WRAP_PARAM4) { CHECK(0); }
1859
1860
1861//-------- Routines and stack ---------------------- {{{2
1862static INLINE void UpdateCallStack(PinThread &t, ADDRINT sp) {
1863  while (t.shadow_stack.size() > 0 && sp >= t.shadow_stack.back().sp) {
1864    TLEBAddRtnExit(t);
1865    size_t size = t.shadow_stack.size();
1866    CHECK(size < 1000000);  // stay sane.
1867    uintptr_t popped_pc = t.shadow_stack.back().pc;
1868#ifdef _MSC_VER
1869    // h-b edge from here to UnregisterWaitEx.
1870    CHECK(g_windows_thread_pool_calback_set);
1871    if (g_windows_thread_pool_calback_set->count(popped_pc)) {
1872      DumpEvent(0, SIGNAL, t.tid, 0, popped_pc, 0);
1873      // Printf("T%d ret %p\n", t.tid, popped_pc);
1874    }
1875#endif
1876
1877    if (debug_rtn) {
1878      ShowPcAndSp("RET : ", t.tid, popped_pc, sp);
1879    }
1880    t.shadow_stack.pop_back();
1881    CHECK(size - 1 == t.shadow_stack.size());
1882    if (DEB_PR) {
1883      Printf("POP SHADOW STACK\n");
1884      PrintShadowStack(t);
1885    }
1886  }
1887}
1888
1889void InsertBeforeEvent_SysCall(THREADID tid, ADDRINT sp) {
1890  PinThread &t = g_pin_threads[tid];
1891  UpdateCallStack(t, sp);
1892  TLEBFlushLocked(t);
1893}
1894
1895void InsertBeforeEvent_Call(THREADID tid, ADDRINT pc, ADDRINT target,
1896                            ADDRINT sp, IGNORE_BELOW_RTN ignore_below) {
1897  PinThread &t = g_pin_threads[tid];
1898  DebugOnlyShowPcAndSp(__FUNCTION__, t.tid, pc, sp);
1899  UpdateCallStack(t, sp);
1900  TLEBAddRtnCall(t, pc, target, ignore_below);
1901  t.shadow_stack.push_back(StackFrame(target, sp));
1902  if (DEB_PR) {
1903    PrintShadowStack(t);
1904  }
1905  if (DEBUG_MODE && debug_rtn) {
1906    ShowPcAndSp("CALL: ", t.tid, target, sp);
1907  }
1908
1909#ifdef _MSC_VER
1910  // h-b edge from RtlQueueWorkItem to here.
1911  CHECK(g_windows_thread_pool_calback_set);
1912  if (g_windows_thread_pool_calback_set->count(target)) {
1913    DumpEvent(0, WAIT, tid, pc, target, 0);
1914  }
1915#endif
1916}
1917
1918static void OnTraceSerial(THREADID tid, ADDRINT sp, TraceInfo *trace_info,
1919    uintptr_t **tls_reg_p) {
1920  PinThread &t = g_pin_threads[tid];
1921
1922  DCHECK(trace_info);
1923  DCHECK(trace_info->n_mops() > 0);
1924  DebugOnlyShowPcAndSp(__FUNCTION__, t.tid, trace_info->pc(), sp);
1925
1926  UpdateCallStack(t, sp);
1927
1928  t.trace_info = trace_info;
1929  trace_info->counter()++;
1930  *tls_reg_p = TLEBAddTrace(t);
1931}
1932
1933static void OnTraceParallel(uintptr_t *tls_reg, ADDRINT sp, TraceInfo *trace_info) {
1934  // Get the thread handler directly from tls_reg.
1935  PinThread &t = *(PinThread*)(tls_reg - 4);
1936  t.trace_info = trace_info;
1937  if (t.ignore_accesses) return;
1938
1939  DCHECK(trace_info);
1940  DCHECK(trace_info->n_mops() > 0);
1941  DebugOnlyShowPcAndSp(__FUNCTION__, t.tid, trace_info->pc(), sp);
1942
1943  UpdateCallStack(t, sp);
1944
1945
1946  if (DEBUG_MODE && G_flags->show_stats)  // this stat may be racey; avoid ping-pong.
1947    trace_info->counter()++;
1948  TLEBAddTrace(t);
1949}
1950
1951/* Verify all mop accesses in the last trace of the given thread by registering
1952   them with RaceVerifier and sleeping a bit. */
1953static void OnTraceVerifyInternal(PinThread &t, uintptr_t **tls_reg_p) {
1954  DCHECK(g_race_verifier_active);
1955  if (t.trace_info) {
1956    int need_sleep = 0;
1957    for (unsigned i = 0; i < t.trace_info->n_mops(); ++i) {
1958      uintptr_t addr = (*tls_reg_p)[i];
1959      if (addr) {
1960        MopInfo *mop = t.trace_info->GetMop(i);
1961        need_sleep += RaceVerifierStartAccess(t.uniq_tid, addr, mop->pc(),
1962            mop->is_write());
1963      }
1964    }
1965
1966    if (!need_sleep)
1967      return;
1968
1969    usleep(G_flags->race_verifier_sleep_ms * 1000);
1970
1971    for (unsigned i = 0; i < t.trace_info->n_mops(); ++i) {
1972      uintptr_t addr = (*tls_reg_p)[i];
1973      if (addr) {
1974        MopInfo *mop = t.trace_info->GetMop(i);
1975        RaceVerifierEndAccess(t.uniq_tid, addr, mop->pc(), mop->is_write());
1976      }
1977    }
1978  }
1979}
1980
1981static void OnTraceNoMopsVerify(THREADID tid, ADDRINT sp,
1982    uintptr_t **tls_reg_p) {
1983  PinThread &t = g_pin_threads[tid];
1984  DCHECK(g_race_verifier_active);
1985  OnTraceVerifyInternal(t, tls_reg_p);
1986  t.trace_info = NULL;
1987}
1988
1989static void OnTraceVerify(THREADID tid, ADDRINT sp, TraceInfo *trace_info,
1990    uintptr_t **tls_reg_p) {
1991  DCHECK(g_race_verifier_active);
1992  PinThread &t = g_pin_threads[tid];
1993  OnTraceVerifyInternal(t, tls_reg_p);
1994
1995  DCHECK(trace_info->n_mops() > 0);
1996
1997  t.trace_info = trace_info;
1998  trace_info->counter()++;
1999  *tls_reg_p = TLEBAddTrace(t);
2000}
2001
2002
2003//---------- Memory accesses -------------------------- {{{2
2004// 'addr' is the section of t.tleb.events which is set in OnTrace.
2005// 'idx' is the number of this mop in its trace.
2006// 'a' is the actuall address.
2007// 'tid' is thread ID, used only in debug mode.
2008//
2009// In opt mode this is just one instruction! Something like this:
2010// mov %rcx,(%rdi,%rdx,8)
2011static void OnMop(uintptr_t *addr, THREADID tid, ADDRINT idx, ADDRINT a) {
2012  if (DEBUG_MODE) {
2013    PinThread &t= g_pin_threads[tid];
2014    CHECK(idx < kMaxMopsPerTrace);
2015    CHECK(idx < t.trace_info->n_mops());
2016    uintptr_t *ptr = addr + idx;
2017    CHECK(ptr >= t.tleb.events);
2018    CHECK(ptr < t.tleb.events + kThreadLocalEventBufferSize);
2019    if (a == G_flags->trace_addr) {
2020      Printf("T%d %s %lx\n", t.tid, __FUNCTION__, a);
2021    }
2022  }
2023  addr[idx] = a;
2024}
2025
2026static void On_PredicatedMop(BOOL is_running, uintptr_t *addr,
2027                             THREADID tid, ADDRINT idx, ADDRINT a) {
2028  if (is_running) {
2029    OnMop(addr, tid, idx, a);
2030  }
2031}
2032
2033static void OnMopCheckIdentStoreBefore(uintptr_t *addr, THREADID tid, ADDRINT idx, ADDRINT a) {
2034  // Write the value of *a to tleb.
2035  addr[idx] = *(uintptr_t*)a;
2036}
2037static void OnMopCheckIdentStoreAfter(uintptr_t *addr, THREADID tid, ADDRINT idx, ADDRINT a) {
2038  // Check if the previous value of *a is equal to the new one.
2039  // If not, we have a regular memory access. If yes, we have an ident operation,
2040  // which we want to ignore.
2041  uintptr_t previous_value_of_a = addr[idx];
2042  uintptr_t new_value_of_a = *(uintptr_t*)a;
2043  // 111...111 if the values are different, 0 otherwise.
2044  uintptr_t ne_mask = -(uintptr_t)(new_value_of_a != previous_value_of_a);
2045  addr[idx] = ne_mask & a;
2046}
2047
2048//---------- I/O; exit------------------------------- {{{2
2049static const uintptr_t kIOMagic = 0x1234c678;
2050
2051static void Before_SignallingIOCall(THREADID tid, ADDRINT pc) {
2052  DumpEvent(0, SIGNAL, tid, pc, kIOMagic, 0);
2053}
2054
2055static void After_WaitingIOCall(THREADID tid, ADDRINT pc) {
2056  DumpEvent(0, WAIT, tid, pc, kIOMagic, 0);
2057}
2058
2059static const uintptr_t kAtexitMagic = 0x9876f432;
2060
2061static void On_atexit(THREADID tid, ADDRINT pc) {
2062  DumpEvent(0, SIGNAL, tid, pc, kAtexitMagic, 0);
2063}
2064
2065static void On_exit(THREADID tid, ADDRINT pc) {
2066  DumpEvent(0, WAIT, tid, pc, kAtexitMagic, 0);
2067}
2068
2069//---------- Synchronization -------------------------- {{{2
2070// locks
2071static void Before_pthread_unlock(THREADID tid, ADDRINT pc, ADDRINT mu) {
2072  DumpEvent(0, UNLOCK, tid, pc, mu, 0);
2073}
2074
2075static void After_pthread_mutex_lock(FAST_WRAP_PARAM_AFTER) {
2076  DumpEventWithSp(frame.sp, WRITER_LOCK, tid, frame.pc, frame.arg[0], 0);
2077}
2078
2079static void Before_pthread_mutex_lock(FAST_WRAP_PARAM1) {
2080  PUSH_AFTER_CALLBACK1(After_pthread_mutex_lock, arg0);
2081}
2082
2083// In some versions of libpthread, pthread_spin_lock is effectively
2084// a recursive function. It jumps to its first insn:
2085//    beb0:       f0 ff 0f                lock decl (%rdi)
2086//    beb3:       75 0b                   jne    bec0 <pthread_spin_lock+0x10>
2087//    beb5:       31 c0                   xor    %eax,%eax
2088//    beb7:       c3                      retq
2089//    beb8:       0f 1f 84 00 00 00 00    nopl   0x0(%rax,%rax,1)
2090//    bebf:       00
2091//    bec0:       f3 90                   pause
2092//    bec2:       83 3f 00                cmpl   $0x0,(%rdi)
2093//    bec5:       7f e9  >>>>>>>>>>>>>    jg     beb0 <pthread_spin_lock>
2094//    bec7:       eb f7                   jmp    bec0 <pthread_spin_lock+0x10>
2095//
2096// So, we need to act only when we return from the last (depth=0) invocation.
2097static uintptr_t WRAP_NAME(pthread_spin_lock)(WRAP_PARAM4) {
2098  PinThread &t= g_pin_threads[tid];
2099  t.spin_lock_recursion_depth++;
2100  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
2101  t.spin_lock_recursion_depth--;
2102  if (t.spin_lock_recursion_depth == 0) {
2103    DumpEvent(ctx, WRITER_LOCK, tid, pc, arg0, 0);
2104  }
2105  return ret;
2106}
2107
2108static uintptr_t WRAP_NAME(pthread_rwlock_wrlock)(WRAP_PARAM4) {
2109  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
2110  DumpEvent(ctx, WRITER_LOCK, tid, pc, arg0, 0);
2111  return ret;
2112}
2113
2114static uintptr_t WRAP_NAME(pthread_rwlock_rdlock)(WRAP_PARAM4) {
2115  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
2116  DumpEvent(ctx, READER_LOCK, tid, pc, arg0, 0);
2117  return ret;
2118}
2119
2120static uintptr_t WRAP_NAME(pthread_mutex_trylock)(WRAP_PARAM4) {
2121  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
2122  if (ret == 0)
2123    DumpEvent(ctx, WRITER_LOCK, tid, pc, arg0, 0);
2124  return ret;
2125}
2126
2127static uintptr_t WRAP_NAME(pthread_spin_trylock)(WRAP_PARAM4) {
2128  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
2129  if (ret == 0)
2130    DumpEvent(ctx, WRITER_LOCK, tid, pc, arg0, 0);
2131  return ret;
2132}
2133
2134static uintptr_t WRAP_NAME(pthread_spin_init)(WRAP_PARAM4) {
2135  DumpEvent(ctx, UNLOCK_OR_INIT, tid, pc, arg0, 0);
2136  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
2137  return ret;
2138}
2139static uintptr_t WRAP_NAME(pthread_spin_destroy)(WRAP_PARAM4) {
2140  DumpEvent(ctx, LOCK_DESTROY, tid, pc, arg0, 0);
2141  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
2142  return ret;
2143}
2144static uintptr_t WRAP_NAME(pthread_spin_unlock)(WRAP_PARAM4) {
2145  DumpEvent(ctx, UNLOCK_OR_INIT, tid, pc, arg0, 0);
2146  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
2147  return ret;
2148}
2149
2150static uintptr_t WRAP_NAME(pthread_rwlock_trywrlock)(WRAP_PARAM4) {
2151  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
2152  if (ret == 0)
2153    DumpEvent(ctx, WRITER_LOCK, tid, pc, arg0, 0);
2154  return ret;
2155}
2156
2157static uintptr_t WRAP_NAME(pthread_rwlock_tryrdlock)(WRAP_PARAM4) {
2158  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
2159  if (ret == 0)
2160    DumpEvent(ctx, READER_LOCK, tid, pc, arg0, 0);
2161  return ret;
2162}
2163
2164
2165static void Before_pthread_mutex_init(THREADID tid, ADDRINT pc, ADDRINT mu) {
2166  DumpEvent(0, LOCK_CREATE, tid, pc, mu, 0);
2167}
2168static void Before_pthread_rwlock_init(THREADID tid, ADDRINT pc, ADDRINT mu) {
2169  DumpEvent(0, LOCK_CREATE, tid, pc, mu, 0);
2170}
2171
2172static void Before_pthread_mutex_destroy(THREADID tid, ADDRINT pc, ADDRINT mu) {
2173  DumpEvent(0, LOCK_DESTROY, tid, pc, mu, 0);
2174}
2175static void Before_pthread_rwlock_destroy(THREADID tid, ADDRINT pc, ADDRINT mu) {
2176  DumpEvent(0, LOCK_DESTROY, tid, pc, mu, 0);
2177}
2178
2179// barrier
2180static uintptr_t WRAP_NAME(pthread_barrier_init)(WRAP_PARAM4) {
2181  DumpEvent(ctx, CYCLIC_BARRIER_INIT, tid, pc, arg0, arg2);
2182  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
2183  return ret;
2184}
2185static uintptr_t WRAP_NAME(pthread_barrier_wait)(WRAP_PARAM4) {
2186  DumpEvent(ctx, CYCLIC_BARRIER_WAIT_BEFORE, tid, pc, arg0, 0);
2187  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
2188  DumpEvent(ctx, CYCLIC_BARRIER_WAIT_AFTER, tid, pc, arg0, 0);
2189  return ret;
2190}
2191
2192
2193// condvar
2194static void Before_pthread_cond_signal(THREADID tid, ADDRINT pc, ADDRINT cv) {
2195  DumpEvent(0, SIGNAL, tid, pc, cv, 0);
2196}
2197
2198static uintptr_t WRAP_NAME(pthread_cond_wait)(WRAP_PARAM4) {
2199  DumpEvent(ctx, UNLOCK, tid, pc, arg1, 0);
2200  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
2201  DumpEvent(ctx, WAIT, tid, pc, arg0, 0);
2202  DumpEvent(ctx, WRITER_LOCK, tid, pc, arg1, 0);
2203  return ret;
2204}
2205static uintptr_t WRAP_NAME(pthread_cond_timedwait)(WRAP_PARAM4) {
2206  DumpEvent(ctx, UNLOCK, tid, pc, arg1, 0);
2207  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
2208  if (ret == 0) {
2209    DumpEvent(ctx, WAIT, tid, pc, arg0, 0);
2210  }
2211  DumpEvent(ctx, WRITER_LOCK, tid, pc, arg1, 0);
2212  return ret;
2213}
2214
2215// epoll
2216static const uintptr_t kSocketMagic = 0xDEADFBAD;
2217
2218static void Before_epoll_ctl(THREADID tid, ADDRINT pc) {
2219  DumpEvent(0, SIGNAL, tid, pc, kSocketMagic, 0);
2220}
2221
2222static void After_epoll_wait(THREADID tid, ADDRINT pc) {
2223  DumpEvent(0, WAIT, tid, pc, kSocketMagic, 0);
2224}
2225
2226// sem
2227static void After_sem_open(THREADID tid, ADDRINT pc, ADDRINT ret) {
2228  // TODO(kcc): need to handle it more precise?
2229  DumpEvent(0, SIGNAL, tid, pc, ret, 0);
2230}
2231static void Before_sem_post(THREADID tid, ADDRINT pc, ADDRINT sem) {
2232  DumpEvent(0, SIGNAL, tid, pc, sem, 0);
2233}
2234
2235static uintptr_t WRAP_NAME(sem_wait)(WRAP_PARAM4) {
2236  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
2237  DumpEvent(ctx, WAIT, tid, pc, arg0, 0);
2238  return ret;
2239}
2240static uintptr_t WRAP_NAME(sem_trywait)(WRAP_PARAM4) {
2241  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
2242  if (ret == 0) {
2243    DumpEvent(ctx, WAIT, tid, pc, arg0, 0);
2244  }
2245  return ret;
2246}
2247
2248// etc
2249#if defined(__GNUC__)
2250uintptr_t WRAP_NAME(lockf)(WRAP_PARAM4) {
2251  if (arg1 == F_ULOCK) {
2252    DumpEvent(0, SIGNAL, tid, pc, kSocketMagic, 0);
2253  }
2254
2255  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
2256
2257  if (arg1 == F_LOCK && ret == 0) {
2258    DumpEvent(0, WAIT, tid, pc, kSocketMagic, 0);
2259  }
2260
2261  return ret;
2262}
2263#endif
2264
2265//--------- Annotations -------------------------- {{{2
2266static void On_AnnotateBenignRace(THREADID tid, ADDRINT pc,
2267                                  ADDRINT file, ADDRINT line,
2268                                  ADDRINT a, ADDRINT descr) {
2269  DumpEvent(0, BENIGN_RACE, tid, descr, a, 1);
2270}
2271
2272static void On_AnnotateBenignRaceSized(THREADID tid, ADDRINT pc,
2273                                       ADDRINT file, ADDRINT line,
2274                                       ADDRINT a, ADDRINT size, ADDRINT descr) {
2275  DumpEvent(0, BENIGN_RACE, tid, descr, a, size);
2276}
2277
2278static void On_AnnotateExpectRace(THREADID tid, ADDRINT pc,
2279                                  ADDRINT file, ADDRINT line,
2280                                  ADDRINT a, ADDRINT descr) {
2281  DumpEvent(0, EXPECT_RACE, tid, descr, a, 0);
2282}
2283
2284static void On_AnnotateFlushExpectedRaces(THREADID tid, ADDRINT pc,
2285                                  ADDRINT file, ADDRINT line) {
2286  DumpEvent(0, FLUSH_EXPECTED_RACES, 0, 0, 0, 0);
2287}
2288
2289
2290static void On_AnnotateTraceMemory(THREADID tid, ADDRINT pc,
2291                                   ADDRINT file, ADDRINT line,
2292                                   ADDRINT a) {
2293  DumpEvent(0, TRACE_MEM, tid, pc, a, 0);
2294}
2295
2296static void On_AnnotateNewMemory(THREADID tid, ADDRINT pc,
2297                                   ADDRINT file, ADDRINT line,
2298                                   ADDRINT a, ADDRINT size) {
2299  DumpEvent(0, MALLOC, tid, pc, a, size);
2300}
2301
2302static void On_AnnotateNoOp(THREADID tid, ADDRINT pc,
2303                            ADDRINT file, ADDRINT line, ADDRINT a) {
2304  Printf("%s T%d: %s:%d %p\n", __FUNCTION__, tid, (char*)file, (int)line, a);
2305  //DumpEvent(0, STACK_TRACE, tid, pc, 0, 0);
2306//  PrintShadowStack(tid);
2307}
2308
2309static void On_AnnotateFlushState(THREADID tid, ADDRINT pc,
2310                                  ADDRINT file, ADDRINT line) {
2311  DumpEvent(0, FLUSH_STATE, tid, pc, 0, 0);
2312}
2313
2314static void On_AnnotateCondVarSignal(THREADID tid, ADDRINT pc,
2315                                     ADDRINT file, ADDRINT line, ADDRINT obj) {
2316  DumpEvent(0, SIGNAL, tid, pc, obj, 0);
2317}
2318
2319static void On_AnnotateCondVarWait(THREADID tid, ADDRINT pc,
2320                                   ADDRINT file, ADDRINT line, ADDRINT obj) {
2321  DumpEvent(0, WAIT, tid, pc, obj, 0);
2322}
2323
2324static void On_AnnotateHappensBefore(THREADID tid, ADDRINT pc,
2325                                     ADDRINT file, ADDRINT line, ADDRINT obj) {
2326  DumpEvent(0, SIGNAL, tid, pc, obj, 0);
2327}
2328
2329static void On_AnnotateHappensAfter(THREADID tid, ADDRINT pc,
2330                                    ADDRINT file, ADDRINT line, ADDRINT obj) {
2331  DumpEvent(0, WAIT, tid, pc, obj, 0);
2332}
2333
2334static void On_AnnotateEnableRaceDetection(THREADID tid, ADDRINT pc,
2335                                        ADDRINT file, ADDRINT line,
2336                                        ADDRINT enable) {
2337  if (!g_race_verifier_active)
2338    TLEBSimpleEvent(g_pin_threads[tid],
2339        enable ? TLEB_GLOBAL_IGNORE_OFF : TLEB_GLOBAL_IGNORE_ON);
2340}
2341
2342static void On_AnnotateIgnoreReadsBegin(THREADID tid, ADDRINT pc,
2343                                        ADDRINT file, ADDRINT line) {
2344  DumpEvent(0, IGNORE_READS_BEG, tid, pc, 0, 0);
2345}
2346static void On_AnnotateIgnoreReadsEnd(THREADID tid, ADDRINT pc,
2347                                      ADDRINT file, ADDRINT line) {
2348  DumpEvent(0, IGNORE_READS_END, tid, pc, 0, 0);
2349}
2350static void On_AnnotateIgnoreWritesBegin(THREADID tid, ADDRINT pc,
2351                                         ADDRINT file, ADDRINT line) {
2352  DumpEvent(0, IGNORE_WRITES_BEG, tid, pc, 0, 0);
2353}
2354static void On_AnnotateIgnoreWritesEnd(THREADID tid, ADDRINT pc,
2355                                       ADDRINT file, ADDRINT line) {
2356  DumpEvent(0, IGNORE_WRITES_END, tid, pc, 0, 0);
2357}
2358static void On_AnnotateThreadName(THREADID tid, ADDRINT pc,
2359                                  ADDRINT file, ADDRINT line,
2360                                  ADDRINT name) {
2361  DumpEvent(0, SET_THREAD_NAME, tid, pc, name, 0);
2362}
2363static void On_AnnotatePublishMemoryRange(THREADID tid, ADDRINT pc,
2364                                          ADDRINT file, ADDRINT line,
2365                                          ADDRINT a, ADDRINT size) {
2366  DumpEvent(0, PUBLISH_RANGE, tid, pc, a, size);
2367}
2368
2369static void On_AnnotateUnpublishMemoryRange(THREADID tid, ADDRINT pc,
2370                                          ADDRINT file, ADDRINT line,
2371                                          ADDRINT a, ADDRINT size) {
2372//  Printf("T%d %s %lx %lx\n", tid, __FUNCTION__, a, size);
2373  DumpEvent(0, UNPUBLISH_RANGE, tid, pc, a, size);
2374}
2375
2376
2377static void On_AnnotateMutexIsUsedAsCondVar(THREADID tid, ADDRINT pc,
2378                                            ADDRINT file, ADDRINT line,
2379                                            ADDRINT mu) {
2380  DumpEvent(0, HB_LOCK, tid, pc, mu, 0);
2381}
2382
2383static void On_AnnotateMutexIsNotPhb(THREADID tid, ADDRINT pc,
2384                                     ADDRINT file, ADDRINT line,
2385                                     ADDRINT mu) {
2386  DumpEvent(0, NON_HB_LOCK, tid, pc, mu, 0);
2387}
2388
2389static void On_AnnotatePCQCreate(THREADID tid, ADDRINT pc,
2390                                 ADDRINT file, ADDRINT line,
2391                                 ADDRINT pcq) {
2392  DumpEvent(0, PCQ_CREATE, tid, pc, pcq, 0);
2393}
2394
2395static void On_AnnotatePCQDestroy(THREADID tid, ADDRINT pc,
2396                                  ADDRINT file, ADDRINT line,
2397                                  ADDRINT pcq) {
2398  DumpEvent(0, PCQ_DESTROY, tid, pc, pcq, 0);
2399}
2400
2401static void On_AnnotatePCQPut(THREADID tid, ADDRINT pc,
2402                              ADDRINT file, ADDRINT line,
2403                              ADDRINT pcq) {
2404  DumpEvent(0, PCQ_PUT, tid, pc, pcq, 0);
2405}
2406
2407static void On_AnnotatePCQGet(THREADID tid, ADDRINT pc,
2408                              ADDRINT file, ADDRINT line,
2409                              ADDRINT pcq) {
2410  DumpEvent(0, PCQ_GET, tid, pc, pcq, 0);
2411}
2412
2413static void On_AnnotateRWLockCreate(THREADID tid, ADDRINT pc,
2414                                    ADDRINT file, ADDRINT line,
2415                                    ADDRINT lock) {
2416  DumpEvent(0, LOCK_CREATE, tid, pc, lock, 0);
2417}
2418
2419static void On_AnnotateRWLockDestroy(THREADID tid, ADDRINT pc,
2420                                    ADDRINT file, ADDRINT line,
2421                                    ADDRINT lock) {
2422  DumpEvent(0, LOCK_DESTROY, tid, pc, lock, 0);
2423}
2424
2425static void On_AnnotateRWLockAcquired(THREADID tid, ADDRINT pc,
2426                                     ADDRINT file, ADDRINT line,
2427                                     ADDRINT lock, ADDRINT is_w) {
2428  DumpEvent(0, is_w ? WRITER_LOCK : READER_LOCK, tid, pc, lock, 0);
2429}
2430
2431static void On_AnnotateRWLockReleased(THREADID tid, ADDRINT pc,
2432                                     ADDRINT file, ADDRINT line,
2433                                     ADDRINT lock, ADDRINT is_w) {
2434  DumpEvent(0, UNLOCK, tid, pc, lock, 0);
2435}
2436
2437
2438int WRAP_NAME(RunningOnValgrind)(WRAP_PARAM4) {
2439  return 1;
2440}
2441
2442//--------- Instrumentation ----------------------- {{{1
2443static bool IgnoreImage(IMG img) {
2444  string name = IMG_Name(img);
2445  if (name.find("/ld-") != string::npos)
2446    return true;
2447  return false;
2448}
2449
2450static bool IgnoreRtn(RTN rtn) {
2451  CHECK(rtn != RTN_Invalid());
2452  ADDRINT rtn_address = RTN_Address(rtn);
2453  if (ThreadSanitizerWantToInstrumentSblock(rtn_address) == false)
2454    return true;
2455  return false;
2456}
2457
2458static bool InstrumentCall(INS ins) {
2459  // Call.
2460  if (INS_IsProcedureCall(ins) && !INS_IsSyscall(ins)) {
2461    IGNORE_BELOW_RTN ignore_below = IGNORE_BELOW_RTN_UNKNOWN;
2462    if (INS_IsDirectBranchOrCall(ins)) {
2463      ADDRINT target = INS_DirectBranchOrCallTargetAddress(ins);
2464      bool ignore = ThreadSanitizerIgnoreAccessesBelowFunction(target);
2465      ignore_below = ignore ? IGNORE_BELOW_RTN_YES : IGNORE_BELOW_RTN_NO;
2466    }
2467    INS_InsertCall(ins, IPOINT_BEFORE,
2468                   (AFUNPTR)InsertBeforeEvent_Call,
2469                   IARG_THREAD_ID,
2470                   IARG_INST_PTR,
2471                   IARG_BRANCH_TARGET_ADDR,
2472                   IARG_REG_VALUE, REG_STACK_PTR,
2473                   IARG_ADDRINT, ignore_below,
2474                   IARG_END);
2475    return true;
2476  }
2477  if (INS_IsSyscall(ins)) {
2478    INS_InsertCall(ins, IPOINT_BEFORE,
2479                   (AFUNPTR)InsertBeforeEvent_SysCall,
2480                   IARG_THREAD_ID,
2481                   IARG_REG_VALUE, REG_STACK_PTR,
2482                   IARG_END);
2483  }
2484  return false;
2485}
2486
2487
2488// return the number of inserted instrumentations.
2489static void InstrumentMopsInBBl(BBL bbl, RTN rtn, TraceInfo *trace_info, uintptr_t instrument_pc, size_t *mop_idx) {
2490  // compute 'dtor_head', see
2491  // http://code.google.com/p/data-race-test/wiki/PopularDataRaces#Data_race_on_vptr
2492  // On x86_64 only the first BB of DTOR is treated as dtor_head.
2493  // On x86, we have to treat more BBs as dtor_head due to -fPIC.
2494  // See http://code.google.com/p/chromium/issues/detail?id=61199
2495  bool dtor_head = false;
2496#ifdef TARGET_IA32
2497  size_t max_offset_for_dtor_head = 32;
2498#else
2499  size_t max_offset_for_dtor_head = 0;
2500#endif
2501
2502  if (BBL_Address(bbl) - RTN_Address(rtn) <= max_offset_for_dtor_head) {
2503    string demangled_rtn_name = Demangle(RTN_Name(rtn).c_str());
2504    if (demangled_rtn_name.find("::~") != string::npos)
2505      dtor_head = true;
2506  }
2507
2508  INS tail = BBL_InsTail(bbl);
2509  // All memory reads/writes
2510  for( INS ins = BBL_InsHead(bbl);
2511       INS_Valid(ins);
2512       ins = INS_Next(ins) ) {
2513    if (ins != tail) {
2514      CHECK(!INS_IsRet(ins));
2515      CHECK(!INS_IsProcedureCall(ins));
2516    }
2517    // bool is_stack = INS_IsStackRead(ins) || INS_IsStackWrite(ins);
2518    if (INS_IsAtomicUpdate(ins)) continue;
2519
2520    int n_mops = INS_MemoryOperandCount(ins);
2521    if (n_mops == 0) continue;
2522
2523    string opcode_str = OPCODE_StringShort(INS_Opcode(ins));
2524    if (trace_info && debug_ins) {
2525      Printf("  INS: opcode=%s n_mops=%d dis=\"%s\"\n",
2526             opcode_str.c_str(),  n_mops,
2527             INS_Disassemble(ins).c_str());
2528    }
2529
2530    bool ins_ignore_writes = false;
2531    bool ins_ignore_reads = false;
2532
2533    // CALL writes to stack and (if the call is indirect) reads the target
2534    // address. We don't want to handle the stack write.
2535    if (INS_IsCall(ins)) {
2536      CHECK(n_mops == 1 || n_mops == 2);
2537      ins_ignore_writes = true;
2538    }
2539
2540    // PUSH: we ignore the write to stack but we don't ignore the read (if any).
2541    if (opcode_str == "PUSH") {
2542      CHECK(n_mops == 1 || n_mops == 2);
2543      ins_ignore_writes = true;
2544    }
2545
2546    // POP: we are reading from stack, Ignore it.
2547    if (opcode_str == "POP") {
2548      CHECK(n_mops == 1 || n_mops == 2);
2549      ins_ignore_reads = true;
2550      continue;
2551    }
2552
2553    // RET/LEAVE -- ignore it, it just reads the return address and stack.
2554    if (INS_IsRet(ins) || opcode_str == "LEAVE") {
2555      CHECK(n_mops == 1);
2556      continue;
2557    }
2558
2559    bool is_predicated = INS_IsPredicated(ins);
2560    for (int i = 0; i < n_mops; i++) {
2561      if (*mop_idx >= kMaxMopsPerTrace) {
2562        Report("INFO: too many mops in trace: %d %s\n",
2563            INS_Address(ins), PcToRtnName(INS_Address(ins), true).c_str());
2564        return;
2565      }
2566      size_t size = INS_MemoryOperandSize(ins, i);
2567      CHECK(size);
2568      bool is_write = INS_MemoryOperandIsWritten(ins, i);
2569
2570      if (ins_ignore_writes && is_write) continue;
2571      if (ins_ignore_reads && !is_write) continue;
2572      if (instrument_pc && instrument_pc != INS_Address(ins)) continue;
2573
2574      bool check_ident_store = false;
2575      if (dtor_head && is_write && INS_IsMov(ins) && size == sizeof(void*)) {
2576        // This is a special case for '*addr = value', where we want to ignore the
2577        // access if *addr == value before the store.
2578        CHECK(!is_predicated);
2579        check_ident_store = true;
2580      }
2581
2582      if (trace_info) {
2583        if (debug_ins) {
2584          Printf("    size=%ld is_w=%d\n", size, (int)is_write);
2585        }
2586        IPOINT point = IPOINT_BEFORE;
2587        AFUNPTR on_mop_callback = (AFUNPTR)OnMop;
2588        if (check_ident_store) {
2589          INS_InsertCall(ins, IPOINT_BEFORE,
2590            (AFUNPTR)OnMopCheckIdentStoreBefore,
2591            IARG_REG_VALUE, tls_reg,
2592            IARG_THREAD_ID,
2593            IARG_ADDRINT, *mop_idx,
2594            IARG_MEMORYOP_EA, i,
2595            IARG_END);
2596          // This is just a MOV, so we can insert the instrumentation code
2597          // after the insn.
2598          point = IPOINT_AFTER;
2599          on_mop_callback = (AFUNPTR)OnMopCheckIdentStoreAfter;
2600        }
2601
2602        MopInfo *mop = trace_info->GetMop(*mop_idx);
2603        new (mop) MopInfo(INS_Address(ins), size, is_write, false);
2604        if (is_predicated) {
2605          INS_InsertPredicatedCall(ins, point,
2606              (AFUNPTR)On_PredicatedMop,
2607              IARG_EXECUTING,
2608              IARG_REG_VALUE, tls_reg,
2609              IARG_THREAD_ID,
2610              IARG_ADDRINT, *mop_idx,
2611              IARG_MEMORYOP_EA, i,
2612              IARG_END);
2613        } else {
2614          INS_InsertCall(ins, point,
2615              on_mop_callback,
2616              IARG_REG_VALUE, tls_reg,
2617              IARG_THREAD_ID,
2618              IARG_ADDRINT, *mop_idx,
2619              IARG_MEMORYOP_EA, i,
2620              IARG_END);
2621        }
2622      }
2623      (*mop_idx)++;
2624    }
2625  }
2626}
2627
2628void CallbackForTRACE(TRACE trace, void *v) {
2629  CHECK(n_started_threads > 0);
2630
2631  RTN rtn = TRACE_Rtn(trace);
2632  bool ignore_memory = false;
2633  string img_name = "<>";
2634  string rtn_name = "<>";
2635  if (RTN_Valid(rtn)) {
2636    SEC sec = RTN_Sec(rtn);
2637    IMG img = SEC_Img(sec);
2638    rtn_name = RTN_Name(rtn);
2639    img_name = IMG_Name(img);
2640
2641    if (IgnoreImage(img)) {
2642      // Printf("Ignoring memory accesses in %s\n", IMG_Name(img).c_str());
2643      ignore_memory = true;
2644    } else if (IgnoreRtn(rtn)) {
2645      ignore_memory = true;
2646    }
2647  }
2648
2649  uintptr_t instrument_pc = 0;
2650  if (g_race_verifier_active) {
2651    // Check if this trace looks like part of a possible race report.
2652    uintptr_t min_pc = UINTPTR_MAX;
2653    uintptr_t max_pc = 0;
2654    for(BBL bbl = TRACE_BblHead(trace); BBL_Valid(bbl); bbl = BBL_Next(bbl)) {
2655      min_pc = MIN(min_pc, INS_Address(BBL_InsHead(bbl)));
2656      max_pc = MAX(max_pc, INS_Address(BBL_InsTail(bbl)));
2657    }
2658
2659    bool verify_trace = RaceVerifierGetAddresses(min_pc, max_pc, &instrument_pc);
2660    if (!verify_trace)
2661      ignore_memory = true;
2662  }
2663
2664  size_t n_mops = 0;
2665  // count the mops.
2666  for(BBL bbl = TRACE_BblHead(trace); BBL_Valid(bbl); bbl = BBL_Next(bbl)) {
2667    if (!ignore_memory) {
2668      InstrumentMopsInBBl(bbl, rtn, NULL, instrument_pc, &n_mops);
2669    }
2670    INS tail = BBL_InsTail(bbl);
2671    if (INS_IsRet(tail)) {
2672#if 0
2673      INS_InsertIfCall(tail, IPOINT_BEFORE,
2674                       (AFUNPTR)Before_RET_IF,
2675                       IARG_THREAD_ID,
2676                       IARG_END);
2677
2678      INS_InsertThenCall(
2679#else
2680        INS_InsertCall(
2681#endif
2682          tail, IPOINT_BEFORE,
2683          (AFUNPTR)Before_RET_THEN,
2684          IARG_THREAD_ID,
2685          IARG_INST_PTR,
2686          IARG_REG_VALUE, REG_STACK_PTR,
2687          IARG_FUNCRET_EXITPOINT_VALUE,
2688          IARG_END);
2689    }
2690  }
2691
2692  // Handle the head of the trace
2693  INS head = BBL_InsHead(TRACE_BblHead(trace));
2694  CHECK(n_mops <= kMaxMopsPerTrace);
2695
2696  TraceInfo *trace_info = NULL;
2697  if (n_mops) {
2698    trace_info = TraceInfo::NewTraceInfo(n_mops, INS_Address(head));
2699    if (TS_SERIALIZED == 0) {
2700      // TODO(kcc): implement race verifier here.
2701      INS_InsertCall(head, IPOINT_BEFORE,
2702                     (AFUNPTR)OnTraceParallel,
2703                     IARG_REG_VALUE, tls_reg,
2704                     IARG_REG_VALUE, REG_STACK_PTR,
2705                     IARG_PTR, trace_info,
2706                     IARG_END);
2707    } else {
2708      AFUNPTR handler = (AFUNPTR)(g_race_verifier_active ?
2709                                  OnTraceVerify : OnTraceSerial);
2710      INS_InsertCall(head, IPOINT_BEFORE,
2711                     handler,
2712                     IARG_THREAD_ID,
2713                     IARG_REG_VALUE, REG_STACK_PTR,
2714                     IARG_PTR, trace_info,
2715                     IARG_REG_REFERENCE, tls_reg,
2716                     IARG_END);
2717    }
2718  } else {
2719    if (g_race_verifier_active) {
2720      INS_InsertCall(head, IPOINT_BEFORE,
2721                     (AFUNPTR)OnTraceNoMopsVerify,
2722                     IARG_THREAD_ID,
2723                     IARG_REG_VALUE, REG_STACK_PTR,
2724                     IARG_REG_REFERENCE, tls_reg,
2725                     IARG_END);
2726    }
2727  }
2728
2729  // instrument the mops. We want to do it after we instrumented the head
2730  // to maintain the right order of instrumentation callbacks (head first, then
2731  // mops).
2732  size_t i = 0;
2733  if (n_mops) {
2734    if (debug_ins) {
2735      Printf("TRACE %p (%p); n_mops=%ld %s\n", trace_info,
2736             TRACE_Address(trace),
2737             trace_info->n_mops(),
2738             PcToRtnName(trace_info->pc(), false).c_str());
2739    }
2740    for(BBL bbl = TRACE_BblHead(trace); BBL_Valid(bbl); bbl = BBL_Next(bbl)) {
2741      InstrumentMopsInBBl(bbl, rtn, trace_info, instrument_pc, &i);
2742    }
2743  }
2744
2745  // instrument the calls, do it after all other instrumentation.
2746  if (!g_race_verifier_active) {
2747    for(BBL bbl = TRACE_BblHead(trace); BBL_Valid(bbl); bbl = BBL_Next(bbl)) {
2748      InstrumentCall(BBL_InsTail(bbl));
2749    }
2750  }
2751
2752  CHECK(n_mops == i);
2753}
2754
2755
2756#define INSERT_FN_HELPER(point, name, rtn, to_insert, ...) \
2757    RTN_Open(rtn); \
2758    if (G_flags->verbosity >= 2) Printf("RTN: Inserting %-50s (%s) %s (%s) img: %s\n", \
2759    #to_insert, #point, RTN_Name(rtn).c_str(), name, IMG_Name(img).c_str());\
2760    RTN_InsertCall(rtn, point, (AFUNPTR)to_insert, IARG_THREAD_ID, \
2761                   IARG_INST_PTR, __VA_ARGS__, IARG_END);\
2762    RTN_Close(rtn); \
2763
2764#define INSERT_FN(point, name, to_insert, ...) \
2765  while (RtnMatchesName(rtn_name, name)) {\
2766    INSERT_FN_HELPER(point, name, rtn, to_insert, __VA_ARGS__); \
2767    break;\
2768  }\
2769
2770
2771#define INSERT_BEFORE_FN(name, to_insert, ...) \
2772    INSERT_FN(IPOINT_BEFORE, name, to_insert, __VA_ARGS__)
2773
2774#define INSERT_BEFORE_1_SP(name, to_insert) \
2775    INSERT_BEFORE_FN(name, to_insert, \
2776                     IARG_REG_VALUE, REG_STACK_PTR, \
2777                     IARG_FUNCARG_ENTRYPOINT_VALUE, 0)
2778
2779#define INSERT_BEFORE_2_SP(name, to_insert) \
2780    INSERT_BEFORE_FN(name, to_insert, \
2781                     IARG_REG_VALUE, REG_STACK_PTR, \
2782                     IARG_FUNCARG_ENTRYPOINT_VALUE, 0, \
2783                     IARG_FUNCARG_ENTRYPOINT_VALUE, 1)
2784
2785#define INSERT_BEFORE_0(name, to_insert) \
2786    INSERT_BEFORE_FN(name, to_insert, IARG_END);
2787
2788#define INSERT_BEFORE_1(name, to_insert) \
2789    INSERT_BEFORE_FN(name, to_insert, \
2790                     IARG_FUNCARG_ENTRYPOINT_VALUE, 0)
2791
2792#define INSERT_BEFORE_2(name, to_insert) \
2793    INSERT_BEFORE_FN(name, to_insert, \
2794                     IARG_FUNCARG_ENTRYPOINT_VALUE, 0, \
2795                     IARG_FUNCARG_ENTRYPOINT_VALUE, 1)
2796
2797#define INSERT_BEFORE_3(name, to_insert) \
2798    INSERT_BEFORE_FN(name, to_insert, \
2799                     IARG_FUNCARG_ENTRYPOINT_VALUE, 0, \
2800                     IARG_FUNCARG_ENTRYPOINT_VALUE, 1, \
2801                     IARG_FUNCARG_ENTRYPOINT_VALUE, 2)
2802
2803#define INSERT_BEFORE_4(name, to_insert) \
2804    INSERT_BEFORE_FN(name, to_insert, \
2805                     IARG_FUNCARG_ENTRYPOINT_VALUE, 0, \
2806                     IARG_FUNCARG_ENTRYPOINT_VALUE, 1, \
2807                     IARG_FUNCARG_ENTRYPOINT_VALUE, 2, \
2808                     IARG_FUNCARG_ENTRYPOINT_VALUE, 3)
2809
2810#define INSERT_BEFORE_5(name, to_insert) \
2811    INSERT_BEFORE_FN(name, to_insert, \
2812                     IARG_FUNCARG_ENTRYPOINT_VALUE, 0, \
2813                     IARG_FUNCARG_ENTRYPOINT_VALUE, 1, \
2814                     IARG_FUNCARG_ENTRYPOINT_VALUE, 2, \
2815                     IARG_FUNCARG_ENTRYPOINT_VALUE, 3, \
2816                     IARG_FUNCARG_ENTRYPOINT_VALUE, 4)
2817
2818#define INSERT_BEFORE_6(name, to_insert) \
2819    INSERT_BEFORE_FN(name, to_insert, \
2820                     IARG_FUNCARG_ENTRYPOINT_VALUE, 0, \
2821                     IARG_FUNCARG_ENTRYPOINT_VALUE, 1, \
2822                     IARG_FUNCARG_ENTRYPOINT_VALUE, 2, \
2823                     IARG_FUNCARG_ENTRYPOINT_VALUE, 3, \
2824                     IARG_FUNCARG_ENTRYPOINT_VALUE, 4, \
2825                     IARG_FUNCARG_ENTRYPOINT_VALUE, 5)
2826
2827#define INSERT_AFTER_FN(name, to_insert, ...) \
2828    INSERT_FN(IPOINT_AFTER, name, to_insert, __VA_ARGS__)
2829
2830#define INSERT_AFTER_0(name, to_insert) \
2831    INSERT_AFTER_FN(name, to_insert, IARG_END)
2832
2833#define INSERT_AFTER_1(name, to_insert) \
2834    INSERT_AFTER_FN(name, to_insert, IARG_FUNCRET_EXITPOINT_VALUE)
2835
2836
2837#ifdef _MSC_VER
2838void WrapStdCallFunc1(RTN rtn, char *name, AFUNPTR replacement_func) {
2839  if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
2840    InformAboutFunctionWrap(rtn, name);
2841    PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
2842                                 CALLINGSTD_STDCALL,
2843                                 "proto",
2844                                 PIN_PARG(uintptr_t),
2845                                 PIN_PARG_END());
2846    RTN_ReplaceSignature(rtn,
2847                         AFUNPTR(replacement_func),
2848                         IARG_PROTOTYPE, proto,
2849                         IARG_THREAD_ID,
2850                         IARG_INST_PTR,
2851                         IARG_CONTEXT,
2852                         IARG_ORIG_FUNCPTR,
2853                         IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
2854                         IARG_END);
2855    PROTO_Free(proto);
2856  }
2857}
2858
2859void WrapStdCallFunc2(RTN rtn, char *name, AFUNPTR replacement_func) {
2860  if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
2861    InformAboutFunctionWrap(rtn, name);
2862    PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
2863                                 CALLINGSTD_STDCALL,
2864                                 "proto",
2865                                 PIN_PARG(uintptr_t),
2866                                 PIN_PARG(uintptr_t),
2867                                 PIN_PARG_END());
2868    RTN_ReplaceSignature(rtn,
2869                         AFUNPTR(replacement_func),
2870                         IARG_PROTOTYPE, proto,
2871                         IARG_THREAD_ID,
2872                         IARG_INST_PTR,
2873                         IARG_CONTEXT,
2874                         IARG_ORIG_FUNCPTR,
2875                         IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
2876                         IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
2877                         IARG_END);
2878    PROTO_Free(proto);
2879  }
2880}
2881
2882void WrapStdCallFunc3(RTN rtn, char *name, AFUNPTR replacement_func) {
2883  if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
2884    InformAboutFunctionWrap(rtn, name);
2885    PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
2886                                 CALLINGSTD_STDCALL,
2887                                 "proto",
2888                                 PIN_PARG(uintptr_t),
2889                                 PIN_PARG(uintptr_t),
2890                                 PIN_PARG(uintptr_t),
2891                                 PIN_PARG_END());
2892    RTN_ReplaceSignature(rtn,
2893                         AFUNPTR(replacement_func),
2894                         IARG_PROTOTYPE, proto,
2895                         IARG_THREAD_ID,
2896                         IARG_INST_PTR,
2897                         IARG_CONTEXT,
2898                         IARG_ORIG_FUNCPTR,
2899                         IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
2900                         IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
2901                         IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
2902                         IARG_END);
2903    PROTO_Free(proto);
2904  }
2905}
2906
2907void WrapStdCallFunc4(RTN rtn, char *name, AFUNPTR replacement_func) {
2908  if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
2909    InformAboutFunctionWrap(rtn, name);
2910    PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
2911                                 CALLINGSTD_STDCALL,
2912                                 "proto",
2913                                 PIN_PARG(uintptr_t),
2914                                 PIN_PARG(uintptr_t),
2915                                 PIN_PARG(uintptr_t),
2916                                 PIN_PARG(uintptr_t),
2917                                 PIN_PARG_END());
2918    RTN_ReplaceSignature(rtn,
2919                         AFUNPTR(replacement_func),
2920                         IARG_PROTOTYPE, proto,
2921                         IARG_THREAD_ID,
2922                         IARG_INST_PTR,
2923                         IARG_CONTEXT,
2924                         IARG_ORIG_FUNCPTR,
2925                         IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
2926                         IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
2927                         IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
2928                         IARG_FUNCARG_ENTRYPOINT_VALUE, 3,
2929                         IARG_END);
2930    PROTO_Free(proto);
2931  }
2932}
2933
2934void WrapStdCallFunc5(RTN rtn, char *name, AFUNPTR replacement_func) {
2935  if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
2936    InformAboutFunctionWrap(rtn, name);
2937    PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
2938                                 CALLINGSTD_STDCALL,
2939                                 "proto",
2940                                 PIN_PARG(uintptr_t),
2941                                 PIN_PARG(uintptr_t),
2942                                 PIN_PARG(uintptr_t),
2943                                 PIN_PARG(uintptr_t),
2944                                 PIN_PARG(uintptr_t),
2945                                 PIN_PARG_END());
2946    RTN_ReplaceSignature(rtn,
2947                         AFUNPTR(replacement_func),
2948                         IARG_PROTOTYPE, proto,
2949                         IARG_THREAD_ID,
2950                         IARG_INST_PTR,
2951                         IARG_CONTEXT,
2952                         IARG_ORIG_FUNCPTR,
2953                         IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
2954                         IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
2955                         IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
2956                         IARG_FUNCARG_ENTRYPOINT_VALUE, 3,
2957                         IARG_FUNCARG_ENTRYPOINT_VALUE, 4,
2958                         IARG_END);
2959    PROTO_Free(proto);
2960  }
2961}
2962
2963void WrapStdCallFunc6(RTN rtn, char *name, AFUNPTR replacement_func) {
2964  if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
2965    InformAboutFunctionWrap(rtn, name);
2966    PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
2967                                 CALLINGSTD_STDCALL,
2968                                 "proto",
2969                                 PIN_PARG(uintptr_t),
2970                                 PIN_PARG(uintptr_t),
2971                                 PIN_PARG(uintptr_t),
2972                                 PIN_PARG(uintptr_t),
2973                                 PIN_PARG(uintptr_t),
2974                                 PIN_PARG(uintptr_t),
2975                                 PIN_PARG_END());
2976    RTN_ReplaceSignature(rtn,
2977                         AFUNPTR(replacement_func),
2978                         IARG_PROTOTYPE, proto,
2979                         IARG_THREAD_ID,
2980                         IARG_INST_PTR,
2981                         IARG_CONTEXT,
2982                         IARG_ORIG_FUNCPTR,
2983                         IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
2984                         IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
2985                         IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
2986                         IARG_FUNCARG_ENTRYPOINT_VALUE, 3,
2987                         IARG_FUNCARG_ENTRYPOINT_VALUE, 4,
2988                         IARG_FUNCARG_ENTRYPOINT_VALUE, 5,
2989                         IARG_END);
2990    PROTO_Free(proto);
2991  }
2992}
2993
2994void WrapStdCallFunc7(RTN rtn, char *name, AFUNPTR replacement_func) {
2995  if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
2996    InformAboutFunctionWrap(rtn, name);
2997    PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
2998                                 CALLINGSTD_STDCALL,
2999                                 "proto",
3000                                 PIN_PARG(uintptr_t),
3001                                 PIN_PARG(uintptr_t),
3002                                 PIN_PARG(uintptr_t),
3003                                 PIN_PARG(uintptr_t),
3004                                 PIN_PARG(uintptr_t),
3005                                 PIN_PARG(uintptr_t),
3006                                 PIN_PARG(uintptr_t),
3007                                 PIN_PARG_END());
3008    RTN_ReplaceSignature(rtn,
3009                         AFUNPTR(replacement_func),
3010                         IARG_PROTOTYPE, proto,
3011                         IARG_THREAD_ID,
3012                         IARG_INST_PTR,
3013                         IARG_CONTEXT,
3014                         IARG_ORIG_FUNCPTR,
3015                         IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
3016                         IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
3017                         IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
3018                         IARG_FUNCARG_ENTRYPOINT_VALUE, 3,
3019                         IARG_FUNCARG_ENTRYPOINT_VALUE, 4,
3020                         IARG_FUNCARG_ENTRYPOINT_VALUE, 5,
3021                         IARG_FUNCARG_ENTRYPOINT_VALUE, 6,
3022                         IARG_END);
3023    PROTO_Free(proto);
3024  }
3025}
3026
3027void WrapStdCallFunc8(RTN rtn, char *name, AFUNPTR replacement_func) {
3028  if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
3029    InformAboutFunctionWrap(rtn, name);
3030    PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
3031                                 CALLINGSTD_STDCALL,
3032                                 "proto",
3033                                 PIN_PARG(uintptr_t),
3034                                 PIN_PARG(uintptr_t),
3035                                 PIN_PARG(uintptr_t),
3036                                 PIN_PARG(uintptr_t),
3037                                 PIN_PARG(uintptr_t),
3038                                 PIN_PARG(uintptr_t),
3039                                 PIN_PARG(uintptr_t),
3040                                 PIN_PARG(uintptr_t),
3041                                 PIN_PARG_END());
3042    RTN_ReplaceSignature(rtn,
3043                         AFUNPTR(replacement_func),
3044                         IARG_PROTOTYPE, proto,
3045                         IARG_THREAD_ID,
3046                         IARG_INST_PTR,
3047                         IARG_CONTEXT,
3048                         IARG_ORIG_FUNCPTR,
3049                         IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
3050                         IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
3051                         IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
3052                         IARG_FUNCARG_ENTRYPOINT_VALUE, 3,
3053                         IARG_FUNCARG_ENTRYPOINT_VALUE, 4,
3054                         IARG_FUNCARG_ENTRYPOINT_VALUE, 5,
3055                         IARG_FUNCARG_ENTRYPOINT_VALUE, 6,
3056                         IARG_FUNCARG_ENTRYPOINT_VALUE, 7,
3057                         IARG_END);
3058    PROTO_Free(proto);
3059  }
3060}
3061
3062void WrapStdCallFunc10(RTN rtn, char *name, AFUNPTR replacement_func) {
3063  if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
3064    InformAboutFunctionWrap(rtn, name);
3065    PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
3066                                 CALLINGSTD_STDCALL,
3067                                 "proto",
3068                                 PIN_PARG(uintptr_t),
3069                                 PIN_PARG(uintptr_t),
3070                                 PIN_PARG(uintptr_t),
3071                                 PIN_PARG(uintptr_t),
3072                                 PIN_PARG(uintptr_t),
3073                                 PIN_PARG(uintptr_t),
3074                                 PIN_PARG(uintptr_t),
3075                                 PIN_PARG(uintptr_t),
3076                                 PIN_PARG(uintptr_t),
3077                                 PIN_PARG(uintptr_t),
3078                                 PIN_PARG_END());
3079    RTN_ReplaceSignature(rtn,
3080                         AFUNPTR(replacement_func),
3081                         IARG_PROTOTYPE, proto,
3082                         IARG_THREAD_ID,
3083                         IARG_INST_PTR,
3084                         IARG_CONTEXT,
3085                         IARG_ORIG_FUNCPTR,
3086                         IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
3087                         IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
3088                         IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
3089                         IARG_FUNCARG_ENTRYPOINT_VALUE, 3,
3090                         IARG_FUNCARG_ENTRYPOINT_VALUE, 4,
3091                         IARG_FUNCARG_ENTRYPOINT_VALUE, 5,
3092                         IARG_FUNCARG_ENTRYPOINT_VALUE, 6,
3093                         IARG_FUNCARG_ENTRYPOINT_VALUE, 7,
3094                         IARG_FUNCARG_ENTRYPOINT_VALUE, 8,
3095                         IARG_FUNCARG_ENTRYPOINT_VALUE, 9,
3096                         IARG_END);
3097    PROTO_Free(proto);
3098  }
3099}
3100
3101void WrapStdCallFunc11(RTN rtn, char *name, AFUNPTR replacement_func) {
3102  if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
3103    InformAboutFunctionWrap(rtn, name);
3104    PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
3105                                 CALLINGSTD_STDCALL,
3106                                 "proto",
3107                                 PIN_PARG(uintptr_t),
3108                                 PIN_PARG(uintptr_t),
3109                                 PIN_PARG(uintptr_t),
3110                                 PIN_PARG(uintptr_t),
3111                                 PIN_PARG(uintptr_t),
3112                                 PIN_PARG(uintptr_t),
3113                                 PIN_PARG(uintptr_t),
3114                                 PIN_PARG(uintptr_t),
3115                                 PIN_PARG(uintptr_t),
3116                                 PIN_PARG(uintptr_t),
3117                                 PIN_PARG(uintptr_t),
3118                                 PIN_PARG_END());
3119    RTN_ReplaceSignature(rtn,
3120                         AFUNPTR(replacement_func),
3121                         IARG_PROTOTYPE, proto,
3122                         IARG_THREAD_ID,
3123                         IARG_INST_PTR,
3124                         IARG_CONTEXT,
3125                         IARG_ORIG_FUNCPTR,
3126                         IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
3127                         IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
3128                         IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
3129                         IARG_FUNCARG_ENTRYPOINT_VALUE, 3,
3130                         IARG_FUNCARG_ENTRYPOINT_VALUE, 4,
3131                         IARG_FUNCARG_ENTRYPOINT_VALUE, 5,
3132                         IARG_FUNCARG_ENTRYPOINT_VALUE, 6,
3133                         IARG_FUNCARG_ENTRYPOINT_VALUE, 7,
3134                         IARG_FUNCARG_ENTRYPOINT_VALUE, 8,
3135                         IARG_FUNCARG_ENTRYPOINT_VALUE, 9,
3136                         IARG_FUNCARG_ENTRYPOINT_VALUE, 10,
3137                         IARG_END);
3138    PROTO_Free(proto);
3139  }
3140}
3141
3142#endif
3143
3144static void MaybeInstrumentOneRoutine(IMG img, RTN rtn) {
3145  if (IgnoreImage(img)) {
3146    return;
3147  }
3148  string rtn_name = RTN_Name(rtn);
3149  string img_name = IMG_Name(img);
3150  if (debug_wrap) {
3151    Printf("%s: %s %s pc=%p\n", __FUNCTION__, rtn_name.c_str(),
3152           img_name.c_str(), RTN_Address(rtn));
3153  }
3154
3155  // malloc/free/etc
3156  const char *malloc_names[] = {
3157    "malloc",
3158#if defined(__GNUC__)
3159    "_Znwm",
3160    "_Znam",
3161    "_Znwj",
3162    "_Znaj",
3163    "_ZnwmRKSt9nothrow_t",
3164    "_ZnamRKSt9nothrow_t",
3165    "_ZnwjRKSt9nothrow_t",
3166    "_ZnajRKSt9nothrow_t",
3167#endif
3168#if defined(_MSC_VER)
3169    "operator new",
3170    "operator new[]",
3171#endif  // _MSC_VER
3172  };
3173
3174  const char *free_names[] = {
3175    "free",
3176#if defined(__GNUC__)
3177    "_ZdaPv",
3178    "_ZdlPv",
3179    "_ZdlPvRKSt9nothrow_t",
3180    "_ZdaPvRKSt9nothrow_t",
3181#endif  // __GNUC__
3182#if defined(_MSC_VER)
3183    "operator delete",
3184    "operator delete[]",
3185#endif  // _MSC_VER
3186  };
3187
3188  for (size_t i = 0; i < TS_ARRAY_SIZE(malloc_names); i++) {
3189    const char *name = malloc_names[i];
3190    INSERT_BEFORE_1_SP(name, Before_malloc);
3191  }
3192
3193  for (size_t i = 0; i < TS_ARRAY_SIZE(free_names); i++) {
3194    const char *name = free_names[i];
3195    INSERT_BEFORE_1_SP(name, Before_free);
3196  }
3197
3198  INSERT_BEFORE_2_SP("calloc", Before_calloc);
3199  INSERT_BEFORE_2_SP("realloc", Before_realloc);
3200
3201#if defined(__GNUC__)
3202  WrapFunc6(img, rtn, "mmap", (AFUNPTR)WRAP_NAME(mmap));
3203  WrapFunc4(img, rtn, "munmap", (AFUNPTR)WRAP_NAME(munmap));
3204
3205  WrapFunc4(img, rtn, "lockf", (AFUNPTR)WRAP_NAME(lockf));
3206  // pthread create/join
3207  WrapFunc4(img, rtn, "pthread_create", (AFUNPTR)WRAP_NAME(pthread_create));
3208  WrapFunc4(img, rtn, "pthread_join", (AFUNPTR)WRAP_NAME(pthread_join));
3209  WrapFunc4(img, rtn, "fwrite", (AFUNPTR)WRAP_NAME(fwrite));
3210
3211  INSERT_FN(IPOINT_BEFORE, "start_thread",
3212            Before_start_thread,
3213            IARG_REG_VALUE, REG_STACK_PTR, IARG_END);
3214
3215   // pthread_cond_*
3216  INSERT_BEFORE_1("pthread_cond_signal", Before_pthread_cond_signal);
3217  WRAP4(pthread_cond_wait);
3218  WRAP4(pthread_cond_timedwait);
3219
3220  // pthread_mutex_*
3221  INSERT_BEFORE_1("pthread_mutex_init", Before_pthread_mutex_init);
3222  INSERT_BEFORE_1("pthread_mutex_destroy", Before_pthread_mutex_destroy);
3223  INSERT_BEFORE_1("pthread_mutex_unlock", Before_pthread_unlock);
3224
3225
3226  INSERT_BEFORE_1_SP("pthread_mutex_lock", Before_pthread_mutex_lock);
3227  WRAP4(pthread_mutex_trylock);
3228  WRAP4(pthread_spin_lock);
3229  WRAP4(pthread_spin_trylock);
3230  WRAP4(pthread_spin_init);
3231  WRAP4(pthread_spin_destroy);
3232  WRAP4(pthread_spin_unlock);
3233  WRAP4(pthread_rwlock_wrlock);
3234  WRAP4(pthread_rwlock_rdlock);
3235  WRAP4(pthread_rwlock_trywrlock);
3236  WRAP4(pthread_rwlock_tryrdlock);
3237
3238  // pthread_rwlock_*
3239  INSERT_BEFORE_1("pthread_rwlock_init", Before_pthread_rwlock_init);
3240  INSERT_BEFORE_1("pthread_rwlock_destroy", Before_pthread_rwlock_destroy);
3241  INSERT_BEFORE_1("pthread_rwlock_unlock", Before_pthread_unlock);
3242
3243  // pthread_barrier_*
3244  WrapFunc4(img, rtn, "pthread_barrier_init",
3245            (AFUNPTR)WRAP_NAME(pthread_barrier_init));
3246  WrapFunc4(img, rtn, "pthread_barrier_wait",
3247            (AFUNPTR)WRAP_NAME(pthread_barrier_wait));
3248
3249  // pthread_once
3250  WrapFunc4(img, rtn, "pthread_once", (AFUNPTR)WRAP_NAME(pthread_once));
3251
3252  // sem_*
3253  INSERT_AFTER_1("sem_open", After_sem_open);
3254  INSERT_BEFORE_1("sem_post", Before_sem_post);
3255  WRAP4(sem_wait);
3256  WRAP4(sem_trywait);
3257
3258  INSERT_BEFORE_0("epoll_ctl", Before_epoll_ctl);
3259  INSERT_AFTER_0("epoll_wait", After_epoll_wait);
3260#endif  // __GNUC__
3261
3262#ifdef _MSC_VER
3263  WrapStdCallFunc6(rtn, "CreateThread", (AFUNPTR)WRAP_NAME(CreateThread));
3264  WRAPSTD1(ResumeThread);
3265
3266  INSERT_FN(IPOINT_BEFORE, "BaseThreadInitThunk",
3267            Before_BaseThreadInitThunk,
3268            IARG_REG_VALUE, REG_STACK_PTR, IARG_END);
3269
3270  INSERT_BEFORE_0("RtlExitUserThread", Before_RtlExitUserThread);
3271  INSERT_BEFORE_0("ExitThread", Before_RtlExitUserThread);
3272
3273  WRAPSTD1(RtlInitializeCriticalSection);
3274  WRAPSTD2(RtlInitializeCriticalSectionAndSpinCount);
3275  WRAPSTD3(RtlInitializeCriticalSectionEx);
3276  WRAPSTD1(RtlDeleteCriticalSection);
3277  WRAPSTD1(RtlEnterCriticalSection);
3278  WRAPSTD1(RtlTryEnterCriticalSection);
3279  WRAPSTD1(RtlLeaveCriticalSection);
3280  WRAPSTD7(DuplicateHandle);
3281  WRAPSTD1(SetEvent);
3282  WRAPSTD4(CreateSemaphoreA);
3283  WRAPSTD4(CreateSemaphoreW);
3284  WRAPSTD3(ReleaseSemaphore);
3285
3286  WRAPSTD1(RtlInterlockedPopEntrySList);
3287  WRAPSTD2(RtlInterlockedPushEntrySList);
3288
3289#if 1
3290  WRAPSTD1(RtlAcquireSRWLockExclusive);
3291  WRAPSTD1(RtlAcquireSRWLockShared);
3292  WRAPSTD1(RtlTryAcquireSRWLockExclusive);
3293  WRAPSTD1(RtlTryAcquireSRWLockShared);
3294  WRAPSTD1(RtlReleaseSRWLockExclusive);
3295  WRAPSTD1(RtlReleaseSRWLockShared);
3296  WRAPSTD1(RtlInitializeSRWLock);
3297  // For some reason, RtlInitializeSRWLock is aliased to RtlInitializeSRWLock..
3298  WrapStdCallFunc1(rtn, "RtlRunOnceInitialize",
3299                   (AFUNPTR)Wrap_RtlInitializeSRWLock);
3300
3301  /* We haven't seen these syscalls used in the wild yet.
3302  WRAPSTD2(RtlUpdateClonedSRWLock);
3303  WRAPSTD1(RtlAcquireReleaseSRWLockExclusive);
3304  WRAPSTD1(RtlUpdateClonedCriticalSection);
3305  */
3306
3307  WRAPSTD1(RtlWakeConditionVariable);
3308  WRAPSTD1(RtlWakeAllConditionVariable);
3309  WRAPSTD4(RtlSleepConditionVariableSRW);
3310  WRAPSTD3(RtlSleepConditionVariableCS);
3311#endif  // if 1
3312
3313  WRAPSTD3(RtlQueueWorkItem);
3314  WRAPSTD6(RegisterWaitForSingleObject);
3315  WRAPSTD2(UnregisterWaitEx);
3316
3317  WRAPSTD3(WaitForSingleObjectEx);
3318  WRAPSTD5(WaitForMultipleObjectsEx);
3319
3320  WrapStdCallFunc4(rtn, "VirtualAlloc", (AFUNPTR)(WRAP_NAME(VirtualAlloc)));
3321  WrapStdCallFunc6(rtn, "ZwAllocateVirtualMemory", (AFUNPTR)(WRAP_NAME(ZwAllocateVirtualMemory)));
3322  WrapStdCallFunc2(rtn, "GlobalAlloc", (AFUNPTR)WRAP_NAME(GlobalAlloc));
3323//  WrapStdCallFunc3(rtn, "RtlAllocateHeap", (AFUNPTR) WRAP_NAME(AllocateHeap));
3324//  WrapStdCallFunc3(rtn, "HeapCreate", (AFUNPTR) WRAP_NAME(HeapCreate));
3325#endif  // _MSC_VER
3326
3327  // Annotations.
3328  INSERT_BEFORE_4("AnnotateBenignRace", On_AnnotateBenignRace);
3329  INSERT_BEFORE_5("AnnotateBenignRaceSized", On_AnnotateBenignRaceSized);
3330  INSERT_BEFORE_5("WTFAnnotateBenignRaceSized", On_AnnotateBenignRaceSized);
3331  INSERT_BEFORE_4("AnnotateExpectRace", On_AnnotateExpectRace);
3332  INSERT_BEFORE_2("AnnotateFlushExpectedRaces", On_AnnotateFlushExpectedRaces);
3333  INSERT_BEFORE_3("AnnotateTraceMemory", On_AnnotateTraceMemory);
3334  INSERT_BEFORE_4("AnnotateNewMemory", On_AnnotateNewMemory);
3335  INSERT_BEFORE_3("AnnotateNoOp", On_AnnotateNoOp);
3336  INSERT_BEFORE_2("AnnotateFlushState", On_AnnotateFlushState);
3337
3338  INSERT_BEFORE_3("AnnotateCondVarWait", On_AnnotateCondVarWait);
3339  INSERT_BEFORE_3("AnnotateCondVarSignal", On_AnnotateCondVarSignal);
3340  INSERT_BEFORE_3("AnnotateCondVarSignalAll", On_AnnotateCondVarSignal);
3341  INSERT_BEFORE_3("AnnotateHappensBefore", On_AnnotateHappensBefore);
3342  INSERT_BEFORE_3("WTFAnnotateHappensBefore", On_AnnotateHappensBefore);
3343  INSERT_BEFORE_3("AnnotateHappensAfter", On_AnnotateHappensAfter);
3344  INSERT_BEFORE_3("WTFAnnotateHappensAfter", On_AnnotateHappensAfter);
3345
3346  INSERT_BEFORE_3("AnnotateEnableRaceDetection", On_AnnotateEnableRaceDetection);
3347  INSERT_BEFORE_0("AnnotateIgnoreReadsBegin", On_AnnotateIgnoreReadsBegin);
3348  INSERT_BEFORE_0("AnnotateIgnoreReadsEnd", On_AnnotateIgnoreReadsEnd);
3349  INSERT_BEFORE_0("AnnotateIgnoreWritesBegin", On_AnnotateIgnoreWritesBegin);
3350  INSERT_BEFORE_0("AnnotateIgnoreWritesEnd", On_AnnotateIgnoreWritesEnd);
3351  INSERT_BEFORE_3("AnnotateThreadName", On_AnnotateThreadName);
3352  INSERT_BEFORE_4("AnnotatePublishMemoryRange", On_AnnotatePublishMemoryRange);
3353  INSERT_BEFORE_4("AnnotateUnpublishMemoryRange", On_AnnotateUnpublishMemoryRange);
3354  INSERT_BEFORE_3("AnnotateMutexIsUsedAsCondVar", On_AnnotateMutexIsUsedAsCondVar);
3355  INSERT_BEFORE_3("AnnotateMutexIsNotPHB", On_AnnotateMutexIsNotPhb);
3356
3357  INSERT_BEFORE_3("AnnotatePCQCreate", On_AnnotatePCQCreate);
3358  INSERT_BEFORE_3("AnnotatePCQDestroy", On_AnnotatePCQDestroy);
3359  INSERT_BEFORE_3("AnnotatePCQPut", On_AnnotatePCQPut);
3360  INSERT_BEFORE_3("AnnotatePCQGet", On_AnnotatePCQGet);
3361
3362  INSERT_BEFORE_3("AnnotateRWLockCreate", On_AnnotateRWLockCreate);
3363  INSERT_BEFORE_3("AnnotateRWLockDestroy", On_AnnotateRWLockDestroy);
3364  INSERT_BEFORE_4("AnnotateRWLockAcquired", On_AnnotateRWLockAcquired);
3365  INSERT_BEFORE_4("AnnotateRWLockReleased", On_AnnotateRWLockReleased);
3366
3367  // ThreadSanitizerQuery
3368  WrapFunc4(img, rtn, "ThreadSanitizerQuery",
3369            (AFUNPTR)WRAP_NAME(ThreadSanitizerQuery));
3370  WrapFunc4(img, rtn, "RunningOnValgrind",
3371            (AFUNPTR)WRAP_NAME(RunningOnValgrind));
3372
3373  // I/O
3374  INSERT_BEFORE_0("write", Before_SignallingIOCall);
3375  INSERT_BEFORE_0("unlink", Before_SignallingIOCall);
3376  INSERT_BEFORE_0("rmdir", Before_SignallingIOCall);
3377//  INSERT_BEFORE_0("send", Before_SignallingIOCall);
3378  INSERT_AFTER_0("__read_nocancel", After_WaitingIOCall);
3379  INSERT_AFTER_0("fopen", After_WaitingIOCall);
3380  INSERT_AFTER_0("__fopen_internal", After_WaitingIOCall);
3381  INSERT_AFTER_0("open", After_WaitingIOCall);
3382  INSERT_AFTER_0("opendir", After_WaitingIOCall);
3383//  INSERT_AFTER_0("recv", After_WaitingIOCall);
3384
3385  // strlen and friends.
3386  // These wrappers will generate memory access events.
3387  // So, if we don't want to get those events (e.g. memcpy inside
3388  // ld.so or ntdll.dll) we don't wrap them and the regular
3389  // ignore machinery will make sure we don't get the events.
3390  if (ThreadSanitizerWantToInstrumentSblock(RTN_Address(rtn))) {
3391    ReplaceFunc3(img, rtn, "memchr", (AFUNPTR)Replace_memchr);
3392    ReplaceFunc3(img, rtn, "strchr", (AFUNPTR)Replace_strchr);
3393    ReplaceFunc3(img, rtn, "index", (AFUNPTR)Replace_strchr);
3394    ReplaceFunc3(img, rtn, "strchrnul", (AFUNPTR)Replace_strchrnul);
3395    ReplaceFunc3(img, rtn, "strrchr", (AFUNPTR)Replace_strrchr);
3396    ReplaceFunc3(img, rtn, "rindex", (AFUNPTR)Replace_strrchr);
3397    ReplaceFunc3(img, rtn, "strlen", (AFUNPTR)Replace_strlen);
3398    ReplaceFunc3(img, rtn, "strcmp", (AFUNPTR)Replace_strcmp);
3399    ReplaceFunc3(img, rtn, "strncmp", (AFUNPTR)Replace_strncmp);
3400    ReplaceFunc3(img, rtn, "memcpy", (AFUNPTR)Replace_memcpy);
3401    ReplaceFunc3(img, rtn, "memcmp", (AFUNPTR)Replace_memcmp);
3402    ReplaceFunc3(img, rtn, "memmove", (AFUNPTR)Replace_memmove);
3403    ReplaceFunc3(img, rtn, "strcpy", (AFUNPTR)Replace_strcpy);
3404    ReplaceFunc3(img, rtn, "strncpy", (AFUNPTR)Replace_strncpy);
3405    ReplaceFunc3(img, rtn, "strcat", (AFUNPTR)Replace_strcat);
3406    ReplaceFunc3(img, rtn, "stpcpy", (AFUNPTR)Replace_stpcpy);
3407  }
3408
3409  // __cxa_guard_acquire / __cxa_guard_release
3410  INSERT_BEFORE_1("__cxa_guard_acquire", Before_cxa_guard_acquire);
3411  INSERT_AFTER_1("__cxa_guard_acquire", After_cxa_guard_acquire);
3412  INSERT_AFTER_0("__cxa_guard_release", After_cxa_guard_release);
3413
3414  INSERT_BEFORE_0("atexit", On_atexit);
3415  INSERT_BEFORE_0("exit", On_exit);
3416}
3417
3418// Pin calls this function every time a new img is loaded.
3419static void CallbackForIMG(IMG img, void *v) {
3420  if (debug_wrap) {
3421    Printf("Started CallbackForIMG %s\n", IMG_Name(img).c_str());
3422  }
3423
3424  string img_name = IMG_Name(img);
3425  for (SEC sec = IMG_SecHead(img); SEC_Valid(sec); sec = SEC_Next(sec)) {
3426    for (RTN rtn = SEC_RtnHead(sec); RTN_Valid(rtn); rtn = RTN_Next(rtn)) {
3427      MaybeInstrumentOneRoutine(img, rtn);
3428    }
3429  }
3430  // In DEBUG_MODE check that we have the debug symbols in the Windows guts.
3431  // We should work w/o them too.
3432  // TODO(timurrrr): I doubt the problem is the missing symbols.
3433  // I have a strong gut feeling that this syscall was added
3434  // in Vista but only used since Windows 7. We had its wrapper wrong
3435  // (found on W7) but the Vista build was fine for months.
3436  // Also, we wrap RtlReleaseSRWLock*, so our TSan assertions would have been
3437  // broken if RtlTryAcquireSRWLock* wasn't wrapped - and we haven't see this.
3438  if (DEBUG_MODE && img_name.find("ntdll.dll") != string::npos) {
3439    if (g_wrapped_functions.count("RtlTryAcquireSRWLockExclusive") == 0) {
3440      Printf("WARNING: Debug symbols for ntdll.dll not found.\n");
3441    }
3442  }
3443}
3444
3445// Returns:
3446// TRUE
3447// If user is interested to inject Pin (and tool) into child/exec-ed process
3448// FALSE
3449// If user is not interested to inject Pin (and tool) into child/exec-ed process
3450static BOOL CallbackForExec(CHILD_PROCESS childProcess, VOID *val) {
3451  int argc = 0;
3452  const CHAR *const * argv = NULL;
3453  CHILD_PROCESS_GetCommandLine(childProcess, &argc, &argv);
3454  CHECK(argc > 0);
3455  CHECK(argv);
3456  bool follow = G_flags->trace_children;
3457  if (DEBUG_MODE) {
3458    Printf("CallbackForExec: follow=%d: ", follow);
3459    for (int i = 0; i < argc; i++) {
3460      Printf("%s ", argv[i]);
3461    }
3462  }
3463  Printf("\n");
3464  return follow;
3465}
3466
3467//--------- Fini ---------- {{{1
3468static void CallbackForFini(INT32 code, void *v) {
3469  DumpEvent(0, THR_END, 0, 0, 0, 0);
3470  ThreadSanitizerFini();
3471  if (g_race_verifier_active) {
3472    RaceVerifierFini();
3473  }
3474  if (G_flags->show_stats) {
3475    TraceInfo::PrintTraceProfile();
3476  }
3477  if (G_flags->error_exitcode && GetNumberOfFoundErrors() > 0) {
3478    exit(G_flags->error_exitcode);
3479  }
3480}
3481
3482//--------- Call Coverage ----------------- {{{1
3483// A simplistic call coverage tool.
3484// Outputs all pairs <call_site,call_target>.
3485
3486typedef set<pair<uintptr_t, uintptr_t> > CallCoverageSet;
3487static CallCoverageSet *call_coverage_set;
3488
3489static map<uintptr_t, string> *function_names_map;
3490static uintptr_t symbolized_functions_cache[1023];
3491static pair<uintptr_t, uintptr_t> registered_pairs_cache[1023];
3492
3493static void symbolize_pc(uintptr_t pc) {
3494  // Check a simple cache if we already symbolized this pc (racey).
3495  size_t idx = pc % TS_ARRAY_SIZE(symbolized_functions_cache);
3496  if (symbolized_functions_cache[idx] == pc) return;
3497
3498  ScopedReentrantClientLock lock(__LINE__);
3499  CHECK(function_names_map);
3500  if (function_names_map->count(pc) == 0) {
3501    (*function_names_map)[pc] = PcToRtnName(pc, false);
3502  }
3503  symbolized_functions_cache[idx] = pc;
3504}
3505
3506static void CallCoverageRegisterCall(uintptr_t from, uintptr_t to) {
3507  symbolize_pc(from);
3508  symbolize_pc(to);
3509
3510  // Check if we already registered this pair (racey).
3511  size_t idx = (from ^ to) % TS_ARRAY_SIZE(registered_pairs_cache);
3512  if (registered_pairs_cache[idx] == make_pair(from,to)) return;
3513
3514  ScopedReentrantClientLock lock(__LINE__);
3515  call_coverage_set->insert(make_pair(from, to));
3516  registered_pairs_cache[idx] = make_pair(from,to);
3517}
3518
3519static void CallCoverageCallbackForTRACE(TRACE trace, void *v) {
3520  RTN rtn = TRACE_Rtn(trace);
3521  if (RTN_Valid(rtn)) {
3522    SEC sec = RTN_Sec(rtn);
3523    IMG img = SEC_Img(sec);
3524    string img_name = IMG_Name(img);
3525    // Don't instrument system libraries.
3526    if (img_name.find("/usr/") == 0) return;
3527  }
3528
3529  if (call_coverage_set == NULL) {
3530    call_coverage_set = new CallCoverageSet;
3531    function_names_map = new map<uintptr_t, string>;
3532  }
3533  for(BBL bbl = TRACE_BblHead(trace); BBL_Valid(bbl); bbl = BBL_Next(bbl)) {
3534    INS ins = BBL_InsTail(bbl);
3535    if (!INS_IsProcedureCall(ins) || INS_IsSyscall(ins)) continue;
3536    if (INS_IsDirectBranchOrCall(ins)) {
3537      // If <from, to> is know at instrumentation time, don't instrument.
3538      ADDRINT to = INS_DirectBranchOrCallTargetAddress(ins);
3539      ADDRINT from = INS_Address(ins);
3540      CallCoverageRegisterCall(from, to);
3541    } else {
3542      // target is dynamic. Need to instrument.
3543      INS_InsertCall(ins, IPOINT_BEFORE,
3544                     (AFUNPTR)CallCoverageRegisterCall,
3545                     IARG_INST_PTR,
3546                     IARG_BRANCH_TARGET_ADDR,
3547                     IARG_END);
3548    }
3549  }
3550}
3551
3552// Output all <from,to> pairs.
3553static void CallCoverageCallbackForFini(INT32 code, void *v) {
3554  CHECK(call_coverage_set);
3555  CHECK(function_names_map);
3556  for (CallCoverageSet::iterator it = call_coverage_set->begin();
3557       it != call_coverage_set->end(); ++it) {
3558    string from_name = (*function_names_map)[it->first];
3559    string to_name   = (*function_names_map)[it->second];
3560    if (to_name == ".plt" || to_name == "") continue;
3561    Printf("CallCoverage: %s => %s\n", from_name.c_str(), to_name.c_str());
3562  }
3563}
3564
3565//--------- Main -------------------------- {{{1
3566int main(INT32 argc, CHAR **argv) {
3567  PIN_Init(argc, argv);
3568  PIN_InitSymbols();
3569  G_out = stderr;
3570
3571  // Init ThreadSanitizer.
3572  int first_param = 1;
3573  // skip until '-t something.so'.
3574  for (; first_param < argc && argv[first_param] != string("-t");
3575       first_param++) {
3576  }
3577  first_param += 2;
3578  vector<string> args;
3579  for (; first_param < argc; first_param++) {
3580    string param = argv[first_param];
3581    if (param == "--") break;
3582    if (param == "-short_name") continue;
3583    if (param == "-slow_asserts") continue;
3584    if (param == "1") continue;
3585    args.push_back(param);
3586  }
3587
3588  G_flags = new FLAGS;
3589  ThreadSanitizerParseFlags(&args);
3590
3591  if (G_flags->dry_run >= 2) {
3592    PIN_StartProgram();
3593    return 0;
3594  }
3595
3596  FILE *socket_output = OpenSocketForWriting(G_flags->log_file);
3597  if (socket_output) {
3598    G_out = socket_output;
3599  } else if (!G_flags->log_file.empty()) {
3600    // Replace %p with tool PID
3601    string fname = G_flags->log_file;
3602    char pid_str[100] = "";
3603    sprintf(pid_str, "%u", getpid());
3604    while (fname.find("%p") != fname.npos)
3605      fname.replace(fname.find("%p"), 2, pid_str);
3606
3607    G_out = fopen(fname.c_str(), "w");
3608    CHECK(G_out);
3609  }
3610
3611  ThreadSanitizerInit();
3612
3613  if (G_flags->call_coverage) {
3614    PIN_AddFiniFunction(CallCoverageCallbackForFini, 0);
3615    TRACE_AddInstrumentFunction(CallCoverageCallbackForTRACE, 0);
3616    PIN_StartProgram();
3617    return 0;
3618  }
3619
3620  tls_reg = PIN_ClaimToolRegister();
3621  CHECK(REG_valid(tls_reg));
3622#if _MSC_VER
3623  g_windows_thread_pool_calback_set = new unordered_set<uintptr_t>;
3624  g_windows_thread_pool_wait_object_map = new unordered_map<uintptr_t, uintptr_t>;
3625#endif
3626
3627  // Set up PIN callbacks.
3628  PIN_AddThreadStartFunction(CallbackForThreadStart, 0);
3629  PIN_AddThreadFiniFunction(CallbackForThreadFini, 0);
3630  PIN_AddFiniFunction(CallbackForFini, 0);
3631  IMG_AddInstrumentFunction(CallbackForIMG, 0);
3632  TRACE_AddInstrumentFunction(CallbackForTRACE, 0);
3633  PIN_AddFollowChildProcessFunction(CallbackForExec, NULL);
3634
3635  Report("ThreadSanitizerPin r%s pin %d: %s\n",
3636         TS_VERSION, PIN_BUILD_NUMBER,
3637         G_flags->pure_happens_before ? "hybrid=no" : "hybrid=yes");
3638  if (DEBUG_MODE) {
3639    Report("INFO: Debug build\n");
3640  }
3641
3642  if (g_race_verifier_active) {
3643    RaceVerifierInit(G_flags->race_verifier, G_flags->race_verifier_extra);
3644    global_ignore = true;
3645  }
3646
3647  // Fire!
3648  PIN_StartProgram();
3649  return 0;
3650}
3651
3652//--------- Questions about PIN -------------------------- {{{1
3653/* Questions about PIN:
3654
3655  - Names (e.g. pthread_create@... __pthread_mutex_unlock)
3656  - How to get name of a global var by it's address?
3657  - How to get stack pointer at thread creation?
3658  - How to get a stack trace (other than intercepting calls, entries, exits)
3659  - assert with full stack trace?
3660  */
3661// end. {{{1
3662// vim:shiftwidth=2:softtabstop=2:expandtab
3663