1// Copyright (C) 2015 The Android Open Source Project
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// makeparallel communicates with the GNU make jobserver
16// (http://make.mad-scientist.net/papers/jobserver-implementation/)
17// in order claim all available jobs, and then passes the number of jobs
18// claimed to a subprocess with -j<jobs>.
19
20#include <errno.h>
21#include <fcntl.h>
22#include <getopt.h>
23#include <poll.h>
24#include <signal.h>
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include <unistd.h>
29#include <sys/resource.h>
30#include <sys/time.h>
31#include <sys/types.h>
32#include <sys/wait.h>
33
34#include <string>
35#include <vector>
36
37#ifdef __linux__
38#include <error.h>
39#endif
40
41#ifdef __APPLE__
42#include <err.h>
43#define error(code, eval, fmt, ...) errc(eval, code, fmt, ##__VA_ARGS__)
44// Darwin does not interrupt syscalls by default.
45#define TEMP_FAILURE_RETRY(exp) (exp)
46#endif
47
48// Throw an error if fd is not valid.
49static void CheckFd(int fd) {
50  int ret = fcntl(fd, F_GETFD);
51  if (ret < 0) {
52    if (errno == EBADF) {
53      error(errno, 0, "no jobserver pipe, prefix recipe command with '+'");
54    } else {
55      error(errno, errno, "fnctl failed");
56    }
57  }
58}
59
60// Extract flags from MAKEFLAGS that need to be propagated to subproccess
61static std::vector<std::string> ReadMakeflags() {
62  std::vector<std::string> args;
63
64  const char* makeflags_env = getenv("MAKEFLAGS");
65  if (makeflags_env == nullptr) {
66    return args;
67  }
68
69  // The MAKEFLAGS format is pretty useless.  The first argument might be empty
70  // (starts with a leading space), or it might be a set of one-character flags
71  // merged together with no leading space, or it might be a variable
72  // definition.
73
74  std::string makeflags = makeflags_env;
75
76  // Split makeflags into individual args on spaces.  Multiple spaces are
77  // elided, but an initial space will result in a blank arg.
78  size_t base = 0;
79  size_t found;
80  do {
81    found = makeflags.find_first_of(" ", base);
82    args.push_back(makeflags.substr(base, found - base));
83    base = found + 1;
84  } while (found != makeflags.npos);
85
86  // Drop the first argument if it is empty
87  while (args.size() > 0 && args[0].size() == 0) {
88	  args.erase(args.begin());
89  }
90
91  // Prepend a - to the first argument if it does not have one and is not a
92  // variable definition
93  if (args.size() > 0 && args[0][0] != '-') {
94    if (args[0].find('=') == makeflags.npos) {
95      args[0] = '-' + args[0];
96    }
97  }
98
99  return args;
100}
101
102static bool ParseMakeflags(std::vector<std::string>& args,
103    int* in_fd, int* out_fd, bool* parallel, bool* keep_going) {
104
105  std::vector<char*> getopt_argv;
106  // getopt starts reading at argv[1]
107  getopt_argv.reserve(args.size() + 1);
108  getopt_argv.push_back(strdup(""));
109  for (std::string& v : args) {
110    getopt_argv.push_back(strdup(v.c_str()));
111  }
112
113  opterr = 0;
114  optind = 1;
115  while (1) {
116    const static option longopts[] = {
117        {"jobserver-fds", required_argument, 0, 0},
118        {0, 0, 0, 0},
119    };
120    int longopt_index = 0;
121
122    int c = getopt_long(getopt_argv.size(), getopt_argv.data(), "kj",
123        longopts, &longopt_index);
124
125    if (c == -1) {
126      break;
127    }
128
129    switch (c) {
130    case 0:
131      switch (longopt_index) {
132      case 0:
133      {
134        // jobserver-fds
135        if (sscanf(optarg, "%d,%d", in_fd, out_fd) != 2) {
136          error(EXIT_FAILURE, 0, "incorrect format for --jobserver-fds: %s", optarg);
137        }
138        // TODO: propagate in_fd, out_fd
139        break;
140      }
141      default:
142        abort();
143      }
144      break;
145    case 'j':
146      *parallel = true;
147      break;
148    case 'k':
149      *keep_going = true;
150      break;
151    case '?':
152      // ignore unknown arguments
153      break;
154    default:
155      abort();
156    }
157  }
158
159  for (char *v : getopt_argv) {
160    free(v);
161  }
162
163  return true;
164}
165
166// Read a single byte from fd, with timeout in milliseconds.  Returns true if
167// a byte was read, false on timeout.  Throws away the read value.
168// Non-reentrant, uses timer and signal handler global state, plus static
169// variable to communicate with signal handler.
170//
171// Uses a SIGALRM timer to fire a signal after timeout_ms that will interrupt
172// the read syscall if it hasn't yet completed.  If the timer fires before the
173// read the read could block forever, so read from a dup'd fd and close it from
174// the signal handler, which will cause the read to return EBADF if it occurs
175// after the signal.
176// The dup/read/close combo is very similar to the system described to avoid
177// a deadlock between SIGCHLD and read at
178// http://make.mad-scientist.net/papers/jobserver-implementation/
179static bool ReadByteTimeout(int fd, int timeout_ms) {
180  // global variable to communicate with the signal handler
181  static int dup_fd = -1;
182
183  // dup the fd so the signal handler can close it without losing the real one
184  dup_fd = dup(fd);
185  if (dup_fd < 0) {
186    error(errno, errno, "dup failed");
187  }
188
189  // set up a signal handler that closes dup_fd on SIGALRM
190  struct sigaction action = {};
191  action.sa_flags = SA_SIGINFO,
192  action.sa_sigaction = [](int, siginfo_t*, void*) {
193    close(dup_fd);
194  };
195  struct sigaction oldaction = {};
196  int ret = sigaction(SIGALRM, &action, &oldaction);
197  if (ret < 0) {
198    error(errno, errno, "sigaction failed");
199  }
200
201  // queue a SIGALRM after timeout_ms
202  const struct itimerval timeout = {{}, {0, timeout_ms * 1000}};
203  ret = setitimer(ITIMER_REAL, &timeout, NULL);
204  if (ret < 0) {
205    error(errno, errno, "setitimer failed");
206  }
207
208  // start the blocking read
209  char buf;
210  int read_ret = read(dup_fd, &buf, 1);
211  int read_errno = errno;
212
213  // cancel the alarm in case it hasn't fired yet
214  const struct itimerval cancel = {};
215  ret = setitimer(ITIMER_REAL, &cancel, NULL);
216  if (ret < 0) {
217    error(errno, errno, "reset setitimer failed");
218  }
219
220  // remove the signal handler
221  ret = sigaction(SIGALRM, &oldaction, NULL);
222  if (ret < 0) {
223    error(errno, errno, "reset sigaction failed");
224  }
225
226  // clean up the dup'd fd in case the signal never fired
227  close(dup_fd);
228  dup_fd = -1;
229
230  if (read_ret == 0) {
231    error(EXIT_FAILURE, 0, "EOF on jobserver pipe");
232  } else if (read_ret > 0) {
233    return true;
234  } else if (read_errno == EINTR || read_errno == EBADF) {
235    return false;
236  } else {
237    error(read_errno, read_errno, "read failed");
238  }
239  abort();
240}
241
242// Measure the size of the jobserver pool by reading from in_fd until it blocks
243static int GetJobserverTokens(int in_fd) {
244  int tokens = 0;
245  pollfd pollfds[] = {{in_fd, POLLIN, 0}};
246  int ret;
247  while ((ret = TEMP_FAILURE_RETRY(poll(pollfds, 1, 0))) != 0) {
248    if (ret < 0) {
249      error(errno, errno, "poll failed");
250    } else if (pollfds[0].revents != POLLIN) {
251      error(EXIT_FAILURE, 0, "unexpected event %d\n", pollfds[0].revents);
252    }
253
254    // There is probably a job token in the jobserver pipe.  There is a chance
255    // another process reads it first, which would cause a blocking read to
256    // block forever (or until another process put a token back in the pipe).
257    // The file descriptor can't be set to O_NONBLOCK as that would affect
258    // all users of the pipe, including the parent make process.
259    // ReadByteTimeout emulates a non-blocking read on a !O_NONBLOCK socket
260    // using a SIGALRM that fires after a short timeout.
261    bool got_token = ReadByteTimeout(in_fd, 10);
262    if (!got_token) {
263      // No more tokens
264      break;
265    } else {
266      tokens++;
267    }
268  }
269
270  // This process implicitly gets a token, so pool size is measured size + 1
271  return tokens;
272}
273
274// Return tokens to the jobserver pool.
275static void PutJobserverTokens(int out_fd, int tokens) {
276  // Return all the tokens to the pipe
277  char buf = '+';
278  for (int i = 0; i < tokens; i++) {
279    int ret = TEMP_FAILURE_RETRY(write(out_fd, &buf, 1));
280    if (ret < 0) {
281      error(errno, errno, "write failed");
282    } else if (ret == 0) {
283      error(EXIT_FAILURE, 0, "EOF on jobserver pipe");
284    }
285  }
286}
287
288int main(int argc, char* argv[]) {
289  int in_fd = -1;
290  int out_fd = -1;
291  bool parallel = false;
292  bool keep_going = false;
293  bool ninja = false;
294  int tokens = 0;
295
296  if (argc > 1 && strcmp(argv[1], "--ninja") == 0) {
297    ninja = true;
298    argv++;
299    argc--;
300  }
301
302  if (argc < 2) {
303    error(EXIT_FAILURE, 0, "expected command to run");
304  }
305
306  const char* path = argv[1];
307  std::vector<char*> args({argv[1]});
308
309  std::vector<std::string> makeflags = ReadMakeflags();
310  if (ParseMakeflags(makeflags, &in_fd, &out_fd, &parallel, &keep_going)) {
311    if (in_fd >= 0 && out_fd >= 0) {
312      CheckFd(in_fd);
313      CheckFd(out_fd);
314      fcntl(in_fd, F_SETFD, FD_CLOEXEC);
315      fcntl(out_fd, F_SETFD, FD_CLOEXEC);
316      tokens = GetJobserverTokens(in_fd);
317    }
318  }
319
320  std::string jarg;
321  if (parallel) {
322    if (tokens == 0) {
323      if (ninja) {
324        // ninja is parallel by default
325        jarg = "";
326      } else {
327        // make -j with no argument, guess a reasonable parallelism like ninja does
328        jarg = "-j" + std::to_string(sysconf(_SC_NPROCESSORS_ONLN) + 2);
329      }
330    } else {
331      jarg = "-j" + std::to_string(tokens + 1);
332    }
333  }
334
335
336  if (ninja) {
337    if (!parallel) {
338      // ninja is parallel by default, pass -j1 to disable parallelism if make wasn't parallel
339      args.push_back(strdup("-j1"));
340    } else {
341      if (jarg != "") {
342        args.push_back(strdup(jarg.c_str()));
343      }
344    }
345    if (keep_going) {
346      args.push_back(strdup("-k0"));
347    }
348  } else {
349    if (jarg != "") {
350      args.push_back(strdup(jarg.c_str()));
351    }
352  }
353
354  args.insert(args.end(), &argv[2], &argv[argc]);
355
356  args.push_back(nullptr);
357
358  static pid_t pid;
359
360  // Set up signal handlers to forward SIGTERM to child.
361  // Assume that all other signals are sent to the entire process group,
362  // and that we'll wait for our child to exit instead of handling them.
363  struct sigaction action = {};
364  action.sa_flags = SA_RESTART;
365  action.sa_handler = [](int signal) {
366    if (signal == SIGTERM && pid > 0) {
367      kill(pid, signal);
368    }
369  };
370
371  int ret = 0;
372  if (!ret) ret = sigaction(SIGHUP, &action, NULL);
373  if (!ret) ret = sigaction(SIGINT, &action, NULL);
374  if (!ret) ret = sigaction(SIGQUIT, &action, NULL);
375  if (!ret) ret = sigaction(SIGTERM, &action, NULL);
376  if (!ret) ret = sigaction(SIGALRM, &action, NULL);
377  if (ret < 0) {
378    error(errno, errno, "sigaction failed");
379  }
380
381  pid = fork();
382  if (pid < 0) {
383    error(errno, errno, "fork failed");
384  } else if (pid == 0) {
385    // child
386    unsetenv("MAKEFLAGS");
387    unsetenv("MAKELEVEL");
388
389    // make 3.81 sets the stack ulimit to unlimited, which may cause problems
390    // for child processes
391    struct rlimit rlim{};
392    if (getrlimit(RLIMIT_STACK, &rlim) == 0 && rlim.rlim_cur == RLIM_INFINITY) {
393      rlim.rlim_cur = 8*1024*1024;
394      setrlimit(RLIMIT_STACK, &rlim);
395    }
396
397    int ret = execvp(path, args.data());
398    if (ret < 0) {
399      error(errno, errno, "exec %s failed", path);
400    }
401    abort();
402  }
403
404  // parent
405
406  siginfo_t status = {};
407  int exit_status = 0;
408  ret = waitid(P_PID, pid, &status, WEXITED);
409  if (ret < 0) {
410    error(errno, errno, "waitpid failed");
411  } else if (status.si_code == CLD_EXITED) {
412    exit_status = status.si_status;
413  } else {
414    exit_status = -(status.si_status);
415  }
416
417  if (tokens > 0) {
418    PutJobserverTokens(out_fd, tokens);
419  }
420  exit(exit_status);
421}
422