1// Copyright (c) 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/process/kill.h"
6
7#include <signal.h>
8#include <sys/event.h>
9#include <sys/types.h>
10#include <sys/wait.h>
11
12#include "base/files/file_util.h"
13#include "base/files/scoped_file.h"
14#include "base/logging.h"
15#include "base/posix/eintr_wrapper.h"
16
17namespace base {
18
19namespace {
20
21const int kWaitBeforeKillSeconds = 2;
22
23// Reap |child| process. This call blocks until completion.
24void BlockingReap(pid_t child) {
25  const pid_t result = HANDLE_EINTR(waitpid(child, NULL, 0));
26  if (result == -1) {
27    DPLOG(ERROR) << "waitpid(" << child << ", NULL, 0)";
28  }
29}
30
31// Waits for |timeout| seconds for the given |child| to exit and reap it. If
32// the child doesn't exit within the time specified, kills it.
33//
34// This function takes two approaches: first, it tries to use kqueue to
35// observe when the process exits. kevent can monitor a kqueue with a
36// timeout, so this method is preferred to wait for a specified period of
37// time. Once the kqueue indicates the process has exited, waitpid will reap
38// the exited child. If the kqueue doesn't provide an exit event notification,
39// before the timeout expires, or if the kqueue fails or misbehaves, the
40// process will be mercilessly killed and reaped.
41//
42// A child process passed to this function may be in one of several states:
43// running, terminated and not yet reaped, and (apparently, and unfortunately)
44// terminated and already reaped. Normally, a process will at least have been
45// asked to exit before this function is called, but this is not required.
46// If a process is terminating and unreaped, there may be a window between the
47// time that kqueue will no longer recognize it and when it becomes an actual
48// zombie that a non-blocking (WNOHANG) waitpid can reap. This condition is
49// detected when kqueue indicates that the process is not running and a
50// non-blocking waitpid fails to reap the process but indicates that it is
51// still running. In this event, a blocking attempt to reap the process
52// collects the known-dying child, preventing zombies from congregating.
53//
54// In the event that the kqueue misbehaves entirely, as it might under a
55// EMFILE condition ("too many open files", or out of file descriptors), this
56// function will forcibly kill and reap the child without delay. This
57// eliminates another potential zombie vector. (If you're out of file
58// descriptors, you're probably deep into something else, but that doesn't
59// mean that zombies be allowed to kick you while you're down.)
60//
61// The fact that this function seemingly can be called to wait on a child
62// that's not only already terminated but already reaped is a bit of a
63// problem: a reaped child's pid can be reclaimed and may refer to a distinct
64// process in that case. The fact that this function can seemingly be called
65// to wait on a process that's not even a child is also a problem: kqueue will
66// work in that case, but waitpid won't, and killing a non-child might not be
67// the best approach.
68void WaitForChildToDie(pid_t child, int timeout) {
69  DCHECK(child > 0);
70  DCHECK(timeout > 0);
71
72  // DON'T ADD ANY EARLY RETURNS TO THIS FUNCTION without ensuring that
73  // |child| has been reaped. Specifically, even if a kqueue, kevent, or other
74  // call fails, this function should fall back to the last resort of trying
75  // to kill and reap the process. Not observing this rule will resurrect
76  // zombies.
77
78  int result;
79
80  ScopedFD kq(HANDLE_EINTR(kqueue()));
81  if (!kq.is_valid()) {
82    DPLOG(ERROR) << "kqueue()";
83  } else {
84    struct kevent change = {0};
85    EV_SET(&change, child, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, NULL);
86    result = HANDLE_EINTR(kevent(kq.get(), &change, 1, NULL, 0, NULL));
87
88    if (result == -1) {
89      if (errno != ESRCH) {
90        DPLOG(ERROR) << "kevent (setup " << child << ")";
91      } else {
92        // At this point, one of the following has occurred:
93        // 1. The process has died but has not yet been reaped.
94        // 2. The process has died and has already been reaped.
95        // 3. The process is in the process of dying. It's no longer
96        //    kqueueable, but it may not be waitable yet either. Mark calls
97        //    this case the "zombie death race".
98
99        result = HANDLE_EINTR(waitpid(child, NULL, WNOHANG));
100
101        if (result != 0) {
102          // A positive result indicates case 1. waitpid succeeded and reaped
103          // the child. A result of -1 indicates case 2. The child has already
104          // been reaped. In both of these cases, no further action is
105          // necessary.
106          return;
107        }
108
109        // |result| is 0, indicating case 3. The process will be waitable in
110        // short order. Fall back out of the kqueue code to kill it (for good
111        // measure) and reap it.
112      }
113    } else {
114      // Keep track of the elapsed time to be able to restart kevent if it's
115      // interrupted.
116      TimeDelta remaining_delta = TimeDelta::FromSeconds(timeout);
117      TimeTicks deadline = TimeTicks::Now() + remaining_delta;
118      result = -1;
119      struct kevent event = {0};
120      while (remaining_delta.InMilliseconds() > 0) {
121        const struct timespec remaining_timespec = remaining_delta.ToTimeSpec();
122        result = kevent(kq.get(), NULL, 0, &event, 1, &remaining_timespec);
123        if (result == -1 && errno == EINTR) {
124          remaining_delta = deadline - TimeTicks::Now();
125          result = 0;
126        } else {
127          break;
128        }
129      }
130
131      if (result == -1) {
132        DPLOG(ERROR) << "kevent (wait " << child << ")";
133      } else if (result > 1) {
134        DLOG(ERROR) << "kevent (wait " << child << "): unexpected result "
135                    << result;
136      } else if (result == 1) {
137        if ((event.fflags & NOTE_EXIT) &&
138            (event.ident == static_cast<uintptr_t>(child))) {
139          // The process is dead or dying. This won't block for long, if at
140          // all.
141          BlockingReap(child);
142          return;
143        } else {
144          DLOG(ERROR) << "kevent (wait " << child
145                      << "): unexpected event: fflags=" << event.fflags
146                      << ", ident=" << event.ident;
147        }
148      }
149    }
150  }
151
152  // The child is still alive, or is very freshly dead. Be sure by sending it
153  // a signal. This is safe even if it's freshly dead, because it will be a
154  // zombie (or on the way to zombiedom) and kill will return 0 even if the
155  // signal is not delivered to a live process.
156  result = kill(child, SIGKILL);
157  if (result == -1) {
158    DPLOG(ERROR) << "kill(" << child << ", SIGKILL)";
159  } else {
160    // The child is definitely on the way out now. BlockingReap won't need to
161    // wait for long, if at all.
162    BlockingReap(child);
163  }
164}
165
166}  // namespace
167
168void EnsureProcessTerminated(ProcessHandle process) {
169  WaitForChildToDie(process, kWaitBeforeKillSeconds);
170}
171
172}  // namespace base
173