dfsan.cc revision e4c3c84e9d470ebe5bae3a28358f28c7f652e5a6
1//===-- dfsan.cc ----------------------------------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file is a part of DataFlowSanitizer.
11//
12// DataFlowSanitizer runtime.  This file defines the public interface to
13// DataFlowSanitizer as well as the definition of certain runtime functions
14// called automatically by the compiler (specifically the instrumentation pass
15// in llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp).
16//
17// The public interface is defined in include/sanitizer/dfsan_interface.h whose
18// functions are prefixed dfsan_ while the compiler interface functions are
19// prefixed __dfsan_.
20//===----------------------------------------------------------------------===//
21
22#include "sanitizer/dfsan_interface.h"
23#include "sanitizer_common/sanitizer_atomic.h"
24#include "sanitizer_common/sanitizer_common.h"
25#include "sanitizer_common/sanitizer_libc.h"
26
27#include "dfsan/dfsan.h"
28
29using namespace __dfsan;
30
31typedef atomic_uint16_t atomic_dfsan_label;
32static const dfsan_label kInitializingLabel = -1;
33
34static const uptr kNumLabels = 1 << (sizeof(dfsan_label) * 8);
35
36static atomic_dfsan_label __dfsan_last_label;
37static dfsan_label_info __dfsan_label_info[kNumLabels];
38
39SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL dfsan_label __dfsan_retval_tls;
40SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL dfsan_label __dfsan_arg_tls[64];
41
42// On Linux/x86_64, memory is laid out as follows:
43//
44// +--------------------+ 0x800000000000 (top of memory)
45// | application memory |
46// +--------------------+ 0x700000008000 (kAppAddr)
47// |                    |
48// |       unused       |
49// |                    |
50// +--------------------+ 0x200200000000 (kUnusedAddr)
51// |    union table     |
52// +--------------------+ 0x200000000000 (kUnionTableAddr)
53// |   shadow memory    |
54// +--------------------+ 0x000000010000 (kShadowAddr)
55// | reserved by kernel |
56// +--------------------+ 0x000000000000
57//
58// To derive a shadow memory address from an application memory address,
59// bits 44-46 are cleared to bring the address into the range
60// [0x000000008000,0x100000000000).  Then the address is shifted left by 1 to
61// account for the double byte representation of shadow labels and move the
62// address into the shadow memory range.  See the function shadow_for below.
63
64typedef atomic_dfsan_label dfsan_union_table_t[kNumLabels][kNumLabels];
65
66static const uptr kShadowAddr = 0x10000;
67static const uptr kUnionTableAddr = 0x200000000000;
68static const uptr kUnusedAddr = kUnionTableAddr + sizeof(dfsan_union_table_t);
69static const uptr kAppAddr = 0x700000008000;
70
71static atomic_dfsan_label *union_table(dfsan_label l1, dfsan_label l2) {
72  return &(*(dfsan_union_table_t *) kUnionTableAddr)[l1][l2];
73}
74
75// Resolves the union of two unequal labels.  Nonequality is a precondition for
76// this function (the instrumentation pass inlines the equality test).
77extern "C" SANITIZER_INTERFACE_ATTRIBUTE
78dfsan_label __dfsan_union(dfsan_label l1, dfsan_label l2) {
79  DCHECK_NE(l1, l2);
80
81  if (l1 == 0)
82    return l2;
83  if (l2 == 0)
84    return l1;
85
86  if (l1 > l2)
87    Swap(l1, l2);
88
89  atomic_dfsan_label *table_ent = union_table(l1, l2);
90  // We need to deal with the case where two threads concurrently request
91  // a union of the same pair of labels.  If the table entry is uninitialized,
92  // (i.e. 0) use a compare-exchange to set the entry to kInitializingLabel
93  // (i.e. -1) to mark that we are initializing it.
94  dfsan_label label = 0;
95  if (atomic_compare_exchange_strong(table_ent, &label, kInitializingLabel,
96                                     memory_order_acquire)) {
97    // Check whether l2 subsumes l1.  We don't need to check whether l1
98    // subsumes l2 because we are guaranteed here that l1 < l2, and (at least
99    // in the cases we are interested in) a label may only subsume labels
100    // created earlier (i.e. with a lower numerical value).
101    if (__dfsan_label_info[l2].l1 == l1 ||
102        __dfsan_label_info[l2].l2 == l1) {
103      label = l2;
104    } else {
105      label =
106        atomic_fetch_add(&__dfsan_last_label, 1, memory_order_relaxed) + 1;
107      CHECK_NE(label, kInitializingLabel);
108      __dfsan_label_info[label].l1 = l1;
109      __dfsan_label_info[label].l2 = l2;
110    }
111    atomic_store(table_ent, label, memory_order_release);
112  } else if (label == kInitializingLabel) {
113    // Another thread is initializing the entry.  Wait until it is finished.
114    do {
115      internal_sched_yield();
116      label = atomic_load(table_ent, memory_order_acquire);
117    } while (label == kInitializingLabel);
118  }
119  return label;
120}
121
122extern "C" SANITIZER_INTERFACE_ATTRIBUTE
123dfsan_label __dfsan_union_load(dfsan_label *ls, size_t n) {
124  dfsan_label label = ls[0];
125  for (size_t i = 1; i != n; ++i) {
126    dfsan_label next_label = ls[i];
127    if (label != next_label)
128      label = __dfsan_union(label, next_label);
129  }
130  return label;
131}
132
133// Like __dfsan_union, but for use from the client or custom functions.  Hence
134// the equality comparison is done here before calling __dfsan_union.
135SANITIZER_INTERFACE_ATTRIBUTE dfsan_label
136dfsan_union(dfsan_label l1, dfsan_label l2) {
137  if (l1 == l2)
138    return l1;
139  return __dfsan_union(l1, l2);
140}
141
142SANITIZER_INTERFACE_ATTRIBUTE
143dfsan_label dfsan_create_label(const char *desc, void *userdata) {
144  dfsan_label label =
145    atomic_fetch_add(&__dfsan_last_label, 1, memory_order_relaxed) + 1;
146  CHECK_NE(label, kInitializingLabel);
147  __dfsan_label_info[label].l1 = __dfsan_label_info[label].l2 = 0;
148  __dfsan_label_info[label].desc = desc;
149  __dfsan_label_info[label].userdata = userdata;
150  __dfsan_retval_tls = 0;  // Ensures return value is unlabelled in the caller.
151  return label;
152}
153
154SANITIZER_INTERFACE_ATTRIBUTE
155void dfsan_set_label(dfsan_label label, void *addr, size_t size) {
156  for (dfsan_label *labelp = shadow_for(addr); size != 0; --size, ++labelp)
157    *labelp = label;
158}
159
160SANITIZER_INTERFACE_ATTRIBUTE
161void dfsan_add_label(dfsan_label label, void *addr, size_t size) {
162  for (dfsan_label *labelp = shadow_for(addr); size != 0; --size, ++labelp)
163    if (*labelp != label)
164      *labelp = __dfsan_union(*labelp, label);
165}
166
167SANITIZER_INTERFACE_ATTRIBUTE dfsan_label dfsan_get_label(long data) {
168  // The label for 'data' is implicitly passed by the instrumentation pass in
169  // the first element of __dfsan_arg_tls.  So we can just return it.
170  __dfsan_retval_tls = 0;  // Ensures return value is unlabelled in the caller.
171  return __dfsan_arg_tls[0];
172}
173
174SANITIZER_INTERFACE_ATTRIBUTE
175const struct dfsan_label_info *dfsan_get_label_info(dfsan_label label) {
176  __dfsan_retval_tls = 0;  // Ensures return value is unlabelled in the caller.
177  return &__dfsan_label_info[label];
178}
179
180int dfsan_has_label(dfsan_label label, dfsan_label elem) {
181  __dfsan_retval_tls = 0;  // Ensures return value is unlabelled in the caller.
182  if (label == elem)
183    return true;
184  const dfsan_label_info *info = dfsan_get_label_info(label);
185  if (info->l1 != 0) {
186    return dfsan_has_label(info->l1, elem) || dfsan_has_label(info->l2, elem);
187  } else {
188    return false;
189  }
190}
191
192dfsan_label dfsan_has_label_with_desc(dfsan_label label, const char *desc) {
193  __dfsan_retval_tls = 0;  // Ensures return value is unlabelled in the caller.
194  const dfsan_label_info *info = dfsan_get_label_info(label);
195  if (info->l1 != 0) {
196    return dfsan_has_label_with_desc(info->l1, desc) ||
197           dfsan_has_label_with_desc(info->l2, desc);
198  } else {
199    return internal_strcmp(desc, info->desc) == 0;
200  }
201}
202
203#ifdef DFSAN_NOLIBC
204extern "C" void dfsan_init() {
205#else
206static void dfsan_init(int argc, char **argv, char **envp) {
207#endif
208  MmapFixedNoReserve(kShadowAddr, kUnusedAddr - kShadowAddr);
209
210  // Protect the region of memory we don't use, to preserve the one-to-one
211  // mapping from application to shadow memory. But if ASLR is disabled, Linux
212  // will load our executable in the middle of our unused region. This mostly
213  // works so long as the program doesn't use too much memory. We support this
214  // case by disabling memory protection when ASLR is disabled.
215  uptr init_addr = (uptr)&dfsan_init;
216  if (!(init_addr >= kUnusedAddr && init_addr < kAppAddr))
217    Mprotect(kUnusedAddr, kAppAddr - kUnusedAddr);
218}
219
220#ifndef DFSAN_NOLIBC
221__attribute__((section(".preinit_array"), used))
222static void (*dfsan_init_ptr)(int, char **, char **) = dfsan_init;
223#endif
224