fault_handler.cc revision fabe91e0d558936ac26b98d2b4ee1af08f58831d
1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "fault_handler.h"
18
19#include <setjmp.h>
20#include <sys/mman.h>
21#include <sys/ucontext.h>
22#include "mirror/art_method.h"
23#include "mirror/class.h"
24#include "sigchain.h"
25#include "thread-inl.h"
26#include "verify_object-inl.h"
27
28// Note on nested signal support
29// -----------------------------
30//
31// Typically a signal handler should not need to deal with signals that occur within it.
32// However, when a SIGSEGV occurs that is in generated code and is not one of the
33// handled signals (implicit checks), we call a function to try to dump the stack
34// to the log.  This enhances the debugging experience but may have the side effect
35// that it may not work.  If the cause of the original SIGSEGV is a corrupted stack or other
36// memory region, the stack backtrace code may run into trouble and may either crash
37// or fail with an abort (SIGABRT).  In either case we don't want that (new) signal to
38// mask the original signal and thus prevent useful debug output from being presented.
39//
40// In order to handle this situation, before we call the stack tracer we do the following:
41//
42// 1. shutdown the fault manager so that we are talking to the real signal management
43//    functions rather than those in sigchain.
44// 2. use pthread_sigmask to allow SIGSEGV and SIGABRT signals to be delivered to the
45//    thread running the signal handler.
46// 3. set the handler for SIGSEGV and SIGABRT to a secondary signal handler.
47// 4. save the thread's state to the TLS of the current thread using 'setjmp'
48//
49// We then call the stack tracer and one of two things may happen:
50// a. it completes successfully
51// b. it crashes and a signal is raised.
52//
53// In the former case, we fall through and everything is fine.  In the latter case
54// our secondary signal handler gets called in a signal context.  This results in
55// a call to FaultManager::HandledNestedSignal(), an archirecture specific function
56// whose purpose is to call 'longjmp' on the jmp_buf saved in the TLS of the current
57// thread.  This results in a return with a non-zero value from 'setjmp'.  We detect this
58// and write something to the log to tell the user that it happened.
59//
60// Regardless of how we got there, we reach the code after the stack tracer and we
61// restore the signal states to their original values, reinstate the fault manager (thus
62// reestablishing the signal chain) and continue.
63
64// This is difficult to test with a runtime test.  To invoke the nested signal code
65// on any signal, uncomment the following line and run something that throws a
66// NullPointerException.
67// #define TEST_NESTED_SIGNAL
68
69namespace art {
70// Static fault manger object accessed by signal handler.
71FaultManager fault_manager;
72
73extern "C" {
74void art_sigsegv_fault() {
75  // Set a breakpoint here to be informed when a SIGSEGV is unhandled by ART.
76  VLOG(signals)<< "Caught unknown SIGSEGV in ART fault handler - chaining to next handler.";
77}
78}
79
80// Signal handler called on SIGSEGV.
81static void art_fault_handler(int sig, siginfo_t* info, void* context) {
82  fault_manager.HandleFault(sig, info, context);
83}
84
85// Signal handler for dealing with a nested signal.
86static void art_nested_signal_handler(int sig, siginfo_t* info, void* context) {
87  fault_manager.HandleNestedSignal(sig, info, context);
88}
89
90FaultManager::FaultManager() : initialized_(false) {
91  sigaction(SIGSEGV, nullptr, &oldaction_);
92}
93
94FaultManager::~FaultManager() {
95}
96
97
98void FaultManager::Init() {
99  CHECK(!initialized_);
100  struct sigaction action;
101  action.sa_sigaction = art_fault_handler;
102  sigemptyset(&action.sa_mask);
103  action.sa_flags = SA_SIGINFO | SA_ONSTACK;
104#if !defined(__APPLE__) && !defined(__mips__)
105  action.sa_restorer = nullptr;
106#endif
107
108  // Set our signal handler now.
109  int e = sigaction(SIGSEGV, &action, &oldaction_);
110  if (e != 0) {
111    VLOG(signals) << "Failed to claim SEGV: " << strerror(errno);
112  }
113  // Make sure our signal handler is called before any user handlers.
114  ClaimSignalChain(SIGSEGV, &oldaction_);
115  initialized_ = true;
116}
117
118void FaultManager::Shutdown() {
119  if (initialized_) {
120    UnclaimSignalChain(SIGSEGV);
121    initialized_ = false;
122  }
123}
124
125void FaultManager::HandleFault(int sig, siginfo_t* info, void* context) {
126  // BE CAREFUL ALLOCATING HERE INCLUDING USING LOG(...)
127  //
128  // If malloc calls abort, it will be holding its lock.
129  // If the handler tries to call malloc, it will deadlock.
130
131  VLOG(signals) << "Handling fault";
132  if (IsInGeneratedCode(info, context, true)) {
133    VLOG(signals) << "in generated code, looking for handler";
134    for (const auto& handler : generated_code_handlers_) {
135      VLOG(signals) << "invoking Action on handler " << handler;
136      if (handler->Action(sig, info, context)) {
137#ifdef TEST_NESTED_SIGNAL
138        // In test mode we want to fall through to stack trace handler
139        // on every signal (in reality this will cause a crash on the first
140        // signal).
141        break;
142#else
143        // We have handled a signal so it's time to return from the
144        // signal handler to the appropriate place.
145        return;
146#endif
147      }
148    }
149  }
150
151  // We hit a signal we didn't handle.  This might be something for which
152  // we can give more information about so call all registered handlers to see
153  // if it is.
154  for (const auto& handler : other_handlers_) {
155    if (handler->Action(sig, info, context)) {
156      return;
157    }
158  }
159
160  // Set a breakpoint in this function to catch unhandled signals.
161  art_sigsegv_fault();
162
163  // Pass this on to the next handler in the chain, or the default if none.
164  InvokeUserSignalHandler(sig, info, context);
165}
166
167void FaultManager::AddHandler(FaultHandler* handler, bool generated_code) {
168  if (generated_code) {
169    generated_code_handlers_.push_back(handler);
170  } else {
171    other_handlers_.push_back(handler);
172  }
173}
174
175void FaultManager::RemoveHandler(FaultHandler* handler) {
176  auto it = std::find(generated_code_handlers_.begin(), generated_code_handlers_.end(), handler);
177  if (it != generated_code_handlers_.end()) {
178    generated_code_handlers_.erase(it);
179    return;
180  }
181  auto it2 = std::find(other_handlers_.begin(), other_handlers_.end(), handler);
182  if (it2 != other_handlers_.end()) {
183    other_handlers_.erase(it);
184    return;
185  }
186  LOG(FATAL) << "Attempted to remove non existent handler " << handler;
187}
188
189// This function is called within the signal handler.  It checks that
190// the mutator_lock is held (shared).  No annotalysis is done.
191bool FaultManager::IsInGeneratedCode(siginfo_t* siginfo, void* context, bool check_dex_pc) {
192  // We can only be running Java code in the current thread if it
193  // is in Runnable state.
194  VLOG(signals) << "Checking for generated code";
195  Thread* thread = Thread::Current();
196  if (thread == nullptr) {
197    VLOG(signals) << "no current thread";
198    return false;
199  }
200
201  ThreadState state = thread->GetState();
202  if (state != kRunnable) {
203    VLOG(signals) << "not runnable";
204    return false;
205  }
206
207  // Current thread is runnable.
208  // Make sure it has the mutator lock.
209  if (!Locks::mutator_lock_->IsSharedHeld(thread)) {
210    VLOG(signals) << "no lock";
211    return false;
212  }
213
214  mirror::ArtMethod* method_obj = 0;
215  uintptr_t return_pc = 0;
216  uintptr_t sp = 0;
217
218  // Get the architecture specific method address and return address.  These
219  // are in architecture specific files in arch/<arch>/fault_handler_<arch>.
220  GetMethodAndReturnPcAndSp(siginfo, context, &method_obj, &return_pc, &sp);
221
222  // If we don't have a potential method, we're outta here.
223  VLOG(signals) << "potential method: " << method_obj;
224  if (method_obj == 0 || !IsAligned<kObjectAlignment>(method_obj)) {
225    VLOG(signals) << "no method";
226    return false;
227  }
228
229  // Verify that the potential method is indeed a method.
230  // TODO: check the GC maps to make sure it's an object.
231  // Check that the class pointer inside the object is not null and is aligned.
232  // TODO: Method might be not a heap address, and GetClass could fault.
233  mirror::Class* cls = method_obj->GetClass<kVerifyNone>();
234  if (cls == nullptr) {
235    VLOG(signals) << "not a class";
236    return false;
237  }
238  if (!IsAligned<kObjectAlignment>(cls)) {
239    VLOG(signals) << "not aligned";
240    return false;
241  }
242
243
244  if (!VerifyClassClass(cls)) {
245    VLOG(signals) << "not a class class";
246    return false;
247  }
248
249  // Now make sure the class is a mirror::ArtMethod.
250  if (!cls->IsArtMethodClass()) {
251    VLOG(signals) << "not a method";
252    return false;
253  }
254
255  // We can be certain that this is a method now.  Check if we have a GC map
256  // at the return PC address.
257  if (true || kIsDebugBuild) {
258    VLOG(signals) << "looking for dex pc for return pc " << std::hex << return_pc;
259    const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(method_obj);
260    uint32_t sought_offset = return_pc - reinterpret_cast<uintptr_t>(code);
261    VLOG(signals) << "pc offset: " << std::hex << sought_offset;
262  }
263  uint32_t dexpc = method_obj->ToDexPc(return_pc, false);
264  VLOG(signals) << "dexpc: " << dexpc;
265  return !check_dex_pc || dexpc != DexFile::kDexNoIndex;
266}
267
268FaultHandler::FaultHandler(FaultManager* manager) : manager_(manager) {
269}
270
271//
272// Null pointer fault handler
273//
274NullPointerHandler::NullPointerHandler(FaultManager* manager) : FaultHandler(manager) {
275  manager_->AddHandler(this, true);
276}
277
278//
279// Suspension fault handler
280//
281SuspensionHandler::SuspensionHandler(FaultManager* manager) : FaultHandler(manager) {
282  manager_->AddHandler(this, true);
283}
284
285//
286// Stack overflow fault handler
287//
288StackOverflowHandler::StackOverflowHandler(FaultManager* manager) : FaultHandler(manager) {
289  manager_->AddHandler(this, true);
290}
291
292//
293// Stack trace handler, used to help get a stack trace from SIGSEGV inside of compiled code.
294//
295JavaStackTraceHandler::JavaStackTraceHandler(FaultManager* manager) : FaultHandler(manager) {
296  manager_->AddHandler(this, false);
297}
298
299bool JavaStackTraceHandler::Action(int sig, siginfo_t* siginfo, void* context) {
300  // Make sure that we are in the generated code, but we may not have a dex pc.
301
302#ifdef TEST_NESTED_SIGNAL
303  bool in_generated_code = true;
304#else
305  bool in_generated_code = manager_->IsInGeneratedCode(siginfo, context, false);
306#endif
307  if (in_generated_code) {
308    LOG(ERROR) << "Dumping java stack trace for crash in generated code";
309    mirror::ArtMethod* method = nullptr;
310    uintptr_t return_pc = 0;
311    uintptr_t sp = 0;
312    Thread* self = Thread::Current();
313
314    // Shutdown the fault manager so that it will remove the signal chain for
315    // SIGSEGV and we call the real sigaction.
316    fault_manager.Shutdown();
317
318    // The action for SIGSEGV should be the default handler now.
319
320    // Unblock the signals we allow so that they can be delivered in the signal handler.
321    sigset_t sigset;
322    sigemptyset(&sigset);
323    sigaddset(&sigset, SIGSEGV);
324    sigaddset(&sigset, SIGABRT);
325    pthread_sigmask(SIG_UNBLOCK, &sigset, nullptr);
326
327    // If we get a signal in this code we want to invoke our nested signal
328    // handler.
329    struct sigaction action, oldsegvaction, oldabortaction;
330    action.sa_sigaction = art_nested_signal_handler;
331
332    // Explictly mask out SIGSEGV and SIGABRT from the nested signal handler.  This
333    // should be the default but we definitely don't want these happening in our
334    // nested signal handler.
335    sigemptyset(&action.sa_mask);
336    sigaddset(&action.sa_mask, SIGSEGV);
337    sigaddset(&action.sa_mask, SIGABRT);
338
339    action.sa_flags = SA_SIGINFO | SA_ONSTACK;
340#if !defined(__APPLE__) && !defined(__mips__)
341    action.sa_restorer = nullptr;
342#endif
343
344    // Catch SIGSEGV and SIGABRT to invoke our nested handler
345    int e1 = sigaction(SIGSEGV, &action, &oldsegvaction);
346    int e2 = sigaction(SIGABRT, &action, &oldabortaction);
347    if (e1 != 0 || e2 != 0) {
348      LOG(ERROR) << "Unable to register nested signal handler - no stack trace possible";
349      // If sigaction failed we have a serious problem.  We cannot catch
350      // any failures in the stack tracer and it's likely to occur since
351      // the program state is bad.  Therefore we don't even try to give
352      // a stack trace.
353    } else {
354      // Save the current state and try to dump the stack.  If this causes a signal
355      // our nested signal handler will be invoked and this will longjmp to the saved
356      // state.
357      if (setjmp(*self->GetNestedSignalState()) == 0) {
358        manager_->GetMethodAndReturnPcAndSp(siginfo, context, &method, &return_pc, &sp);
359        // Inside of generated code, sp[0] is the method, so sp is the frame.
360        StackReference<mirror::ArtMethod>* frame =
361            reinterpret_cast<StackReference<mirror::ArtMethod>*>(sp);
362        self->SetTopOfStack(frame, 0);  // Since we don't necessarily have a dex pc, pass in 0.
363#ifdef TEST_NESTED_SIGNAL
364        // To test the nested signal handler we raise a signal here.  This will cause the
365        // nested signal handler to be called and perform a longjmp back to the setjmp
366        // above.
367        abort();
368#endif
369        self->DumpJavaStack(LOG(ERROR));
370      } else {
371        LOG(ERROR) << "Stack trace aborted due to nested signal - original signal being reported";
372      }
373
374      // Restore the signal handlers.
375      sigaction(SIGSEGV, &oldsegvaction, nullptr);
376      sigaction(SIGABRT, &oldabortaction, nullptr);
377    }
378
379    // Now put the fault manager back in place.
380    fault_manager.Init();
381
382    // And we're done.
383  }
384
385  return false;  // Return false since we want to propagate the fault to the main signal handler.
386}
387
388}   // namespace art
389
390