17dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// Copyright (c) 2013 The Chromium Authors. All rights reserved. 27dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// Use of this source code is governed by a BSD-style license that can be 37dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// found in the LICENSE file. 47dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch 57dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch#include "base/process/kill.h" 67dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch 77dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch#include <signal.h> 87dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch#include <sys/event.h> 97dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch#include <sys/types.h> 107dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch#include <sys/wait.h> 117dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch 126e8cce623b6e4fe0c9e4af605d675dd9d0338c38Torne (Richard Coles)#include "base/files/file_util.h" 13a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "base/files/scoped_file.h" 147dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch#include "base/logging.h" 157dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch#include "base/posix/eintr_wrapper.h" 167dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch 177dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochnamespace base { 187dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch 197dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochnamespace { 207dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch 217dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochconst int kWaitBeforeKillSeconds = 2; 227dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch 237dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// Reap |child| process. This call blocks until completion. 247dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochvoid BlockingReap(pid_t child) { 257dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch const pid_t result = HANDLE_EINTR(waitpid(child, NULL, 0)); 267dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch if (result == -1) { 277dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch DPLOG(ERROR) << "waitpid(" << child << ", NULL, 0)"; 287dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch } 297dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch} 307dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch 317dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// Waits for |timeout| seconds for the given |child| to exit and reap it. If 327dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// the child doesn't exit within the time specified, kills it. 337dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// 347dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// This function takes two approaches: first, it tries to use kqueue to 357dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// observe when the process exits. kevent can monitor a kqueue with a 367dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// timeout, so this method is preferred to wait for a specified period of 377dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// time. Once the kqueue indicates the process has exited, waitpid will reap 387dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// the exited child. If the kqueue doesn't provide an exit event notification, 397dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// before the timeout expires, or if the kqueue fails or misbehaves, the 407dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// process will be mercilessly killed and reaped. 417dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// 427dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// A child process passed to this function may be in one of several states: 437dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// running, terminated and not yet reaped, and (apparently, and unfortunately) 447dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// terminated and already reaped. Normally, a process will at least have been 457dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// asked to exit before this function is called, but this is not required. 467dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// If a process is terminating and unreaped, there may be a window between the 477dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// time that kqueue will no longer recognize it and when it becomes an actual 487dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// zombie that a non-blocking (WNOHANG) waitpid can reap. This condition is 497dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// detected when kqueue indicates that the process is not running and a 507dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// non-blocking waitpid fails to reap the process but indicates that it is 517dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// still running. In this event, a blocking attempt to reap the process 527dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// collects the known-dying child, preventing zombies from congregating. 537dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// 547dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// In the event that the kqueue misbehaves entirely, as it might under a 557dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// EMFILE condition ("too many open files", or out of file descriptors), this 567dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// function will forcibly kill and reap the child without delay. This 577dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// eliminates another potential zombie vector. (If you're out of file 587dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// descriptors, you're probably deep into something else, but that doesn't 597dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// mean that zombies be allowed to kick you while you're down.) 607dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// 617dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// The fact that this function seemingly can be called to wait on a child 627dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// that's not only already terminated but already reaped is a bit of a 637dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// problem: a reaped child's pid can be reclaimed and may refer to a distinct 647dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// process in that case. The fact that this function can seemingly be called 657dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// to wait on a process that's not even a child is also a problem: kqueue will 667dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// work in that case, but waitpid won't, and killing a non-child might not be 677dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// the best approach. 687dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochvoid WaitForChildToDie(pid_t child, int timeout) { 697dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch DCHECK(child > 0); 707dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch DCHECK(timeout > 0); 717dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch 727dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch // DON'T ADD ANY EARLY RETURNS TO THIS FUNCTION without ensuring that 737dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch // |child| has been reaped. Specifically, even if a kqueue, kevent, or other 747dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch // call fails, this function should fall back to the last resort of trying 757dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch // to kill and reap the process. Not observing this rule will resurrect 767dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch // zombies. 777dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch 787dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch int result; 797dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch 80a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) ScopedFD kq(HANDLE_EINTR(kqueue())); 81a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) if (!kq.is_valid()) { 827dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch DPLOG(ERROR) << "kqueue()"; 837dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch } else { 847dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch struct kevent change = {0}; 857dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch EV_SET(&change, child, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, NULL); 86a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) result = HANDLE_EINTR(kevent(kq.get(), &change, 1, NULL, 0, NULL)); 877dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch 887dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch if (result == -1) { 897dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch if (errno != ESRCH) { 907dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch DPLOG(ERROR) << "kevent (setup " << child << ")"; 917dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch } else { 927dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch // At this point, one of the following has occurred: 937dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch // 1. The process has died but has not yet been reaped. 947dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch // 2. The process has died and has already been reaped. 957dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch // 3. The process is in the process of dying. It's no longer 967dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch // kqueueable, but it may not be waitable yet either. Mark calls 977dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch // this case the "zombie death race". 987dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch 997dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch result = HANDLE_EINTR(waitpid(child, NULL, WNOHANG)); 1007dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch 1017dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch if (result != 0) { 1027dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch // A positive result indicates case 1. waitpid succeeded and reaped 1037dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch // the child. A result of -1 indicates case 2. The child has already 1047dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch // been reaped. In both of these cases, no further action is 1057dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch // necessary. 1067dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch return; 1077dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch } 1087dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch 1097dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch // |result| is 0, indicating case 3. The process will be waitable in 1107dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch // short order. Fall back out of the kqueue code to kill it (for good 1117dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch // measure) and reap it. 1127dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch } 1137dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch } else { 1147dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch // Keep track of the elapsed time to be able to restart kevent if it's 1157dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch // interrupted. 1167dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch TimeDelta remaining_delta = TimeDelta::FromSeconds(timeout); 1177dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch TimeTicks deadline = TimeTicks::Now() + remaining_delta; 1187dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch result = -1; 1197dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch struct kevent event = {0}; 1207dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch while (remaining_delta.InMilliseconds() > 0) { 1217dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch const struct timespec remaining_timespec = remaining_delta.ToTimeSpec(); 122a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) result = kevent(kq.get(), NULL, 0, &event, 1, &remaining_timespec); 1237dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch if (result == -1 && errno == EINTR) { 1247dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch remaining_delta = deadline - TimeTicks::Now(); 1257dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch result = 0; 1267dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch } else { 1277dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch break; 1287dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch } 1297dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch } 1307dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch 1317dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch if (result == -1) { 1327dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch DPLOG(ERROR) << "kevent (wait " << child << ")"; 1337dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch } else if (result > 1) { 1347dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch DLOG(ERROR) << "kevent (wait " << child << "): unexpected result " 1357dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch << result; 1367dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch } else if (result == 1) { 1377dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch if ((event.fflags & NOTE_EXIT) && 1387dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch (event.ident == static_cast<uintptr_t>(child))) { 1397dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch // The process is dead or dying. This won't block for long, if at 1407dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch // all. 1417dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch BlockingReap(child); 1427dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch return; 1437dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch } else { 1447dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch DLOG(ERROR) << "kevent (wait " << child 1457dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch << "): unexpected event: fflags=" << event.fflags 1467dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch << ", ident=" << event.ident; 1477dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch } 1487dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch } 1497dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch } 1507dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch } 1517dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch 1527dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch // The child is still alive, or is very freshly dead. Be sure by sending it 1537dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch // a signal. This is safe even if it's freshly dead, because it will be a 1547dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch // zombie (or on the way to zombiedom) and kill will return 0 even if the 1557dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch // signal is not delivered to a live process. 1567dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch result = kill(child, SIGKILL); 1577dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch if (result == -1) { 1587dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch DPLOG(ERROR) << "kill(" << child << ", SIGKILL)"; 1597dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch } else { 1607dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch // The child is definitely on the way out now. BlockingReap won't need to 1617dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch // wait for long, if at all. 1627dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch BlockingReap(child); 1637dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch } 1647dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch} 1657dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch 1667dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch} // namespace 1677dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch 1687dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochvoid EnsureProcessTerminated(ProcessHandle process) { 1697dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch WaitForChildToDie(process, kWaitBeforeKillSeconds); 1707dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch} 1717dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch 1727dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch} // namespace base 173