1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "fault_handler.h"
18
19#include <setjmp.h>
20#include <sys/mman.h>
21#include <sys/ucontext.h>
22
23#include "art_method-inl.h"
24#include "base/stl_util.h"
25#include "mirror/class.h"
26#include "sigchain.h"
27#include "thread-inl.h"
28#include "verify_object-inl.h"
29
30// Note on nested signal support
31// -----------------------------
32//
33// Typically a signal handler should not need to deal with signals that occur within it.
34// However, when a SIGSEGV occurs that is in generated code and is not one of the
35// handled signals (implicit checks), we call a function to try to dump the stack
36// to the log.  This enhances the debugging experience but may have the side effect
37// that it may not work.  If the cause of the original SIGSEGV is a corrupted stack or other
38// memory region, the stack backtrace code may run into trouble and may either crash
39// or fail with an abort (SIGABRT).  In either case we don't want that (new) signal to
40// mask the original signal and thus prevent useful debug output from being presented.
41//
42// In order to handle this situation, before we call the stack tracer we do the following:
43//
44// 1. shutdown the fault manager so that we are talking to the real signal management
45//    functions rather than those in sigchain.
46// 2. use pthread_sigmask to allow SIGSEGV and SIGABRT signals to be delivered to the
47//    thread running the signal handler.
48// 3. set the handler for SIGSEGV and SIGABRT to a secondary signal handler.
49// 4. save the thread's state to the TLS of the current thread using 'setjmp'
50//
51// We then call the stack tracer and one of two things may happen:
52// a. it completes successfully
53// b. it crashes and a signal is raised.
54//
55// In the former case, we fall through and everything is fine.  In the latter case
56// our secondary signal handler gets called in a signal context.  This results in
57// a call to FaultManager::HandledNestedSignal(), an archirecture specific function
58// whose purpose is to call 'longjmp' on the jmp_buf saved in the TLS of the current
59// thread.  This results in a return with a non-zero value from 'setjmp'.  We detect this
60// and write something to the log to tell the user that it happened.
61//
62// Regardless of how we got there, we reach the code after the stack tracer and we
63// restore the signal states to their original values, reinstate the fault manager (thus
64// reestablishing the signal chain) and continue.
65
66// This is difficult to test with a runtime test.  To invoke the nested signal code
67// on any signal, uncomment the following line and run something that throws a
68// NullPointerException.
69// #define TEST_NESTED_SIGNAL
70
71namespace art {
72// Static fault manger object accessed by signal handler.
73FaultManager fault_manager;
74
75extern "C" __attribute__((visibility("default"))) void art_sigsegv_fault() {
76  // Set a breakpoint here to be informed when a SIGSEGV is unhandled by ART.
77  VLOG(signals)<< "Caught unknown SIGSEGV in ART fault handler - chaining to next handler.";
78}
79
80// Signal handler called on SIGSEGV.
81static void art_fault_handler(int sig, siginfo_t* info, void* context) {
82  fault_manager.HandleFault(sig, info, context);
83}
84
85// Signal handler for dealing with a nested signal.
86static void art_nested_signal_handler(int sig, siginfo_t* info, void* context) {
87  fault_manager.HandleNestedSignal(sig, info, context);
88}
89
90FaultManager::FaultManager() : initialized_(false) {
91  sigaction(SIGSEGV, nullptr, &oldaction_);
92}
93
94FaultManager::~FaultManager() {
95}
96
97static void SetUpArtAction(struct sigaction* action) {
98  action->sa_sigaction = art_fault_handler;
99  sigemptyset(&action->sa_mask);
100  action->sa_flags = SA_SIGINFO | SA_ONSTACK;
101#if !defined(__APPLE__) && !defined(__mips__)
102  action->sa_restorer = nullptr;
103#endif
104}
105
106void FaultManager::EnsureArtActionInFrontOfSignalChain() {
107  if (initialized_) {
108    struct sigaction action;
109    SetUpArtAction(&action);
110    EnsureFrontOfChain(SIGSEGV, &action);
111  } else {
112    LOG(WARNING) << "Can't call " << __FUNCTION__ << " due to unitialized fault manager";
113  }
114}
115
116void FaultManager::Init() {
117  CHECK(!initialized_);
118  struct sigaction action;
119  SetUpArtAction(&action);
120
121  // Set our signal handler now.
122  int e = sigaction(SIGSEGV, &action, &oldaction_);
123  if (e != 0) {
124    VLOG(signals) << "Failed to claim SEGV: " << strerror(errno);
125  }
126  // Make sure our signal handler is called before any user handlers.
127  ClaimSignalChain(SIGSEGV, &oldaction_);
128  initialized_ = true;
129}
130
131void FaultManager::Release() {
132  if (initialized_) {
133    UnclaimSignalChain(SIGSEGV);
134    initialized_ = false;
135  }
136}
137
138void FaultManager::Shutdown() {
139  if (initialized_) {
140    Release();
141
142    // Free all handlers.
143    STLDeleteElements(&generated_code_handlers_);
144    STLDeleteElements(&other_handlers_);
145  }
146}
147
148void FaultManager::HandleFault(int sig, siginfo_t* info, void* context) {
149  // BE CAREFUL ALLOCATING HERE INCLUDING USING LOG(...)
150  //
151  // If malloc calls abort, it will be holding its lock.
152  // If the handler tries to call malloc, it will deadlock.
153  VLOG(signals) << "Handling fault";
154  if (IsInGeneratedCode(info, context, true)) {
155    VLOG(signals) << "in generated code, looking for handler";
156    for (const auto& handler : generated_code_handlers_) {
157      VLOG(signals) << "invoking Action on handler " << handler;
158      if (handler->Action(sig, info, context)) {
159#ifdef TEST_NESTED_SIGNAL
160        // In test mode we want to fall through to stack trace handler
161        // on every signal (in reality this will cause a crash on the first
162        // signal).
163        break;
164#else
165        // We have handled a signal so it's time to return from the
166        // signal handler to the appropriate place.
167        return;
168#endif
169      }
170    }
171  }
172
173  // We hit a signal we didn't handle.  This might be something for which
174  // we can give more information about so call all registered handlers to see
175  // if it is.
176
177  Thread* self = Thread::Current();
178
179  // If ART is not running, or the thread is not attached to ART pass the
180  // signal on to the next handler in the chain.
181  if (self == nullptr || Runtime::Current() == nullptr || !Runtime::Current()->IsStarted()) {
182    InvokeUserSignalHandler(sig, info, context);
183    return;
184  }
185  // Now set up the nested signal handler.
186
187  // TODO: add SIGSEGV back to the nested signals when we can handle running out stack gracefully.
188  static const int handled_nested_signals[] = {SIGABRT};
189  constexpr size_t num_handled_nested_signals = arraysize(handled_nested_signals);
190
191  // Release the fault manager so that it will remove the signal chain for
192  // SIGSEGV and we call the real sigaction.
193  fault_manager.Release();
194
195  // The action for SIGSEGV should be the default handler now.
196
197  // Unblock the signals we allow so that they can be delivered in the signal handler.
198  sigset_t sigset;
199  sigemptyset(&sigset);
200  for (int signal : handled_nested_signals) {
201    sigaddset(&sigset, signal);
202  }
203  pthread_sigmask(SIG_UNBLOCK, &sigset, nullptr);
204
205  // If we get a signal in this code we want to invoke our nested signal
206  // handler.
207  struct sigaction action;
208  struct sigaction oldactions[num_handled_nested_signals];
209  action.sa_sigaction = art_nested_signal_handler;
210
211  // Explicitly mask out SIGSEGV and SIGABRT from the nested signal handler.  This
212  // should be the default but we definitely don't want these happening in our
213  // nested signal handler.
214  sigemptyset(&action.sa_mask);
215  for (int signal : handled_nested_signals) {
216    sigaddset(&action.sa_mask, signal);
217  }
218
219  action.sa_flags = SA_SIGINFO | SA_ONSTACK;
220#if !defined(__APPLE__) && !defined(__mips__)
221  action.sa_restorer = nullptr;
222#endif
223
224  // Catch handled signals to invoke our nested handler.
225  bool success = true;
226  for (size_t i = 0; i < num_handled_nested_signals; ++i) {
227    success = sigaction(handled_nested_signals[i], &action, &oldactions[i]) == 0;
228    if (!success) {
229      PLOG(ERROR) << "Unable to set up nested signal handler";
230      break;
231    }
232  }
233  if (success) {
234    // Save the current state and call the handlers.  If anything causes a signal
235    // our nested signal handler will be invoked and this will longjmp to the saved
236    // state.
237    if (setjmp(*self->GetNestedSignalState()) == 0) {
238      for (const auto& handler : other_handlers_) {
239        if (handler->Action(sig, info, context)) {
240          // Restore the signal handlers, reinit the fault manager and return.  Signal was
241          // handled.
242          for (size_t i = 0; i < num_handled_nested_signals; ++i) {
243            success = sigaction(handled_nested_signals[i], &oldactions[i], nullptr) == 0;
244            if (!success) {
245              PLOG(ERROR) << "Unable to restore signal handler";
246            }
247          }
248          fault_manager.Init();
249          return;
250        }
251      }
252    } else {
253      LOG(ERROR) << "Nested signal detected - original signal being reported";
254    }
255
256    // Restore the signal handlers.
257    for (size_t i = 0; i < num_handled_nested_signals; ++i) {
258      success = sigaction(handled_nested_signals[i], &oldactions[i], nullptr) == 0;
259      if (!success) {
260        PLOG(ERROR) << "Unable to restore signal handler";
261      }
262    }
263  }
264
265  // Now put the fault manager back in place.
266  fault_manager.Init();
267
268  // Set a breakpoint in this function to catch unhandled signals.
269  art_sigsegv_fault();
270
271  // Pass this on to the next handler in the chain, or the default if none.
272  InvokeUserSignalHandler(sig, info, context);
273}
274
275void FaultManager::AddHandler(FaultHandler* handler, bool generated_code) {
276  DCHECK(initialized_);
277  if (generated_code) {
278    generated_code_handlers_.push_back(handler);
279  } else {
280    other_handlers_.push_back(handler);
281  }
282}
283
284void FaultManager::RemoveHandler(FaultHandler* handler) {
285  auto it = std::find(generated_code_handlers_.begin(), generated_code_handlers_.end(), handler);
286  if (it != generated_code_handlers_.end()) {
287    generated_code_handlers_.erase(it);
288    return;
289  }
290  auto it2 = std::find(other_handlers_.begin(), other_handlers_.end(), handler);
291  if (it2 != other_handlers_.end()) {
292    other_handlers_.erase(it);
293    return;
294  }
295  LOG(FATAL) << "Attempted to remove non existent handler " << handler;
296}
297
298// This function is called within the signal handler.  It checks that
299// the mutator_lock is held (shared).  No annotalysis is done.
300bool FaultManager::IsInGeneratedCode(siginfo_t* siginfo, void* context, bool check_dex_pc) {
301  // We can only be running Java code in the current thread if it
302  // is in Runnable state.
303  VLOG(signals) << "Checking for generated code";
304  Thread* thread = Thread::Current();
305  if (thread == nullptr) {
306    VLOG(signals) << "no current thread";
307    return false;
308  }
309
310  ThreadState state = thread->GetState();
311  if (state != kRunnable) {
312    VLOG(signals) << "not runnable";
313    return false;
314  }
315
316  // Current thread is runnable.
317  // Make sure it has the mutator lock.
318  if (!Locks::mutator_lock_->IsSharedHeld(thread)) {
319    VLOG(signals) << "no lock";
320    return false;
321  }
322
323  ArtMethod* method_obj = 0;
324  uintptr_t return_pc = 0;
325  uintptr_t sp = 0;
326
327  // Get the architecture specific method address and return address.  These
328  // are in architecture specific files in arch/<arch>/fault_handler_<arch>.
329  GetMethodAndReturnPcAndSp(siginfo, context, &method_obj, &return_pc, &sp);
330
331  // If we don't have a potential method, we're outta here.
332  VLOG(signals) << "potential method: " << method_obj;
333  // TODO: Check linear alloc and image.
334  if (method_obj == 0 || !IsAligned<kObjectAlignment>(method_obj)) {
335    VLOG(signals) << "no method";
336    return false;
337  }
338
339  // Verify that the potential method is indeed a method.
340  // TODO: check the GC maps to make sure it's an object.
341  // Check that the class pointer inside the object is not null and is aligned.
342  // TODO: Method might be not a heap address, and GetClass could fault.
343  // No read barrier because method_obj may not be a real object.
344  mirror::Class* cls = method_obj->GetDeclaringClassNoBarrier();
345  if (cls == nullptr) {
346    VLOG(signals) << "not a class";
347    return false;
348  }
349  if (!IsAligned<kObjectAlignment>(cls)) {
350    VLOG(signals) << "not aligned";
351    return false;
352  }
353
354
355  if (!VerifyClassClass(cls)) {
356    VLOG(signals) << "not a class class";
357    return false;
358  }
359
360  // We can be certain that this is a method now.  Check if we have a GC map
361  // at the return PC address.
362  if (true || kIsDebugBuild) {
363    VLOG(signals) << "looking for dex pc for return pc " << std::hex << return_pc;
364    const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(method_obj,
365                                                                                 sizeof(void*));
366    uint32_t sought_offset = return_pc - reinterpret_cast<uintptr_t>(code);
367    VLOG(signals) << "pc offset: " << std::hex << sought_offset;
368  }
369  uint32_t dexpc = method_obj->ToDexPc(return_pc, false);
370  VLOG(signals) << "dexpc: " << dexpc;
371  return !check_dex_pc || dexpc != DexFile::kDexNoIndex;
372}
373
374FaultHandler::FaultHandler(FaultManager* manager) : manager_(manager) {
375}
376
377//
378// Null pointer fault handler
379//
380NullPointerHandler::NullPointerHandler(FaultManager* manager) : FaultHandler(manager) {
381  manager_->AddHandler(this, true);
382}
383
384//
385// Suspension fault handler
386//
387SuspensionHandler::SuspensionHandler(FaultManager* manager) : FaultHandler(manager) {
388  manager_->AddHandler(this, true);
389}
390
391//
392// Stack overflow fault handler
393//
394StackOverflowHandler::StackOverflowHandler(FaultManager* manager) : FaultHandler(manager) {
395  manager_->AddHandler(this, true);
396}
397
398//
399// Stack trace handler, used to help get a stack trace from SIGSEGV inside of compiled code.
400//
401JavaStackTraceHandler::JavaStackTraceHandler(FaultManager* manager) : FaultHandler(manager) {
402  manager_->AddHandler(this, false);
403}
404
405bool JavaStackTraceHandler::Action(int sig, siginfo_t* siginfo, void* context) {
406  // Make sure that we are in the generated code, but we may not have a dex pc.
407  UNUSED(sig);
408#ifdef TEST_NESTED_SIGNAL
409  bool in_generated_code = true;
410#else
411  bool in_generated_code = manager_->IsInGeneratedCode(siginfo, context, false);
412#endif
413  if (in_generated_code) {
414    LOG(ERROR) << "Dumping java stack trace for crash in generated code";
415    ArtMethod* method = nullptr;
416    uintptr_t return_pc = 0;
417    uintptr_t sp = 0;
418    Thread* self = Thread::Current();
419
420    manager_->GetMethodAndReturnPcAndSp(siginfo, context, &method, &return_pc, &sp);
421    // Inside of generated code, sp[0] is the method, so sp is the frame.
422    self->SetTopOfStack(reinterpret_cast<ArtMethod**>(sp));
423#ifdef TEST_NESTED_SIGNAL
424    // To test the nested signal handler we raise a signal here.  This will cause the
425    // nested signal handler to be called and perform a longjmp back to the setjmp
426    // above.
427    abort();
428#endif
429    self->DumpJavaStack(LOG(ERROR));
430  }
431
432  return false;  // Return false since we want to propagate the fault to the main signal handler.
433}
434
435}   // namespace art
436