1/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "IptablesRestoreController.h"
18
19#include <poll.h>
20#include <signal.h>
21#include <sys/wait.h>
22#include <unistd.h>
23
24#define LOG_TAG "IptablesRestoreController"
25#include <android-base/logging.h>
26#include <android-base/file.h>
27
28#include "Controllers.h"
29
30constexpr char IPTABLES_RESTORE_PATH[] = "/system/bin/iptables-restore";
31constexpr char IP6TABLES_RESTORE_PATH[] = "/system/bin/ip6tables-restore";
32
33constexpr char PING[] = "#PING\n";
34
35constexpr size_t PING_SIZE = sizeof(PING) - 1;
36
37// Not compile-time constants because they are changed by the unit tests.
38int IptablesRestoreController::MAX_RETRIES = 50;
39int IptablesRestoreController::POLL_TIMEOUT_MS = 100;
40
41class IptablesProcess {
42public:
43    IptablesProcess(pid_t pid, int stdIn, int stdOut, int stdErr) :
44        pid(pid),
45        stdIn(stdIn),
46        processTerminated(false) {
47
48        pollFds[STDOUT_IDX] = { .fd = stdOut, .events = POLLIN };
49        pollFds[STDERR_IDX] = { .fd = stdErr, .events = POLLIN };
50    }
51
52    ~IptablesProcess() {
53        close(stdIn);
54        close(pollFds[STDOUT_IDX].fd);
55        close(pollFds[STDERR_IDX].fd);
56    }
57
58    bool outputReady() {
59        struct pollfd pollfd = { .fd = stdIn, .events = POLLOUT };
60        int ret = poll(&pollfd, 1, 0);
61        if (ret == -1) {
62            ALOGE("outputReady poll failed: %s", strerror(errno));
63            return false;
64        }
65        return (ret == 1) && !(pollfd.revents & POLLERR);
66    }
67
68    void stop() {
69        if (processTerminated) return;
70
71        // This can be called by drainAndWaitForAck (after a POLLHUP) or by sendCommand (if the
72        // process was killed by something else on the system). In both cases, it's safe to send the
73        // PID a SIGTERM, because the PID continues to exist until its parent (i.e., us) calls
74        // waitpid on it, so there's no risk that the PID is reused.
75        int err = kill(pid, SIGTERM);
76        if (err) {
77            err = errno;
78        }
79
80        if (err == ESRCH) {
81            // This means that someone else inside netd but outside this class called waitpid(),
82            // which is a programming error. There's no point in calling waitpid() here since we
83            // know that the process is gone.
84            ALOGE("iptables child process %d unexpectedly disappeared", pid);
85            processTerminated = true;
86            return;
87        }
88
89        if (err) {
90            ALOGE("Error killing iptables child process %d: %s", pid, strerror(err));
91        }
92
93        int status;
94        if (waitpid(pid, &status, 0) == -1) {
95            ALOGE("Error waiting for iptables child process %d: %s", pid, strerror(errno));
96        } else {
97            ALOGW("iptables-restore process %d terminated status=%d", pid, status);
98        }
99
100        processTerminated = true;
101    }
102
103    const pid_t pid;
104    const int stdIn;
105
106    struct pollfd pollFds[2];
107    std::string errBuf;
108
109    std::atomic_bool processTerminated;
110
111    static constexpr size_t STDOUT_IDX = 0;
112    static constexpr size_t STDERR_IDX = 1;
113};
114
115IptablesRestoreController::IptablesRestoreController() :
116    mIpRestore(nullptr),
117    mIp6Restore(nullptr) {
118}
119
120IptablesRestoreController::~IptablesRestoreController() {
121}
122
123/* static */
124IptablesProcess* IptablesRestoreController::forkAndExec(const IptablesProcessType type) {
125    const char* const cmd = (type == IPTABLES_PROCESS) ?
126        IPTABLES_RESTORE_PATH : IP6TABLES_RESTORE_PATH;
127
128    // Create the pipes we'll use for communication with the child
129    // process. One each for the child's in, out and err files.
130    int stdin_pipe[2];
131    int stdout_pipe[2];
132    int stderr_pipe[2];
133
134    if (pipe2(stdin_pipe, 0) == -1 ||
135        pipe2(stdout_pipe, O_NONBLOCK) == -1 ||
136        pipe2(stderr_pipe, O_NONBLOCK) == -1) {
137
138        ALOGE("pipe2() failed: %s", strerror(errno));
139        return nullptr;
140    }
141
142    pid_t child_pid = fork();
143    if (child_pid == 0) {
144        // The child process. Reads from stdin, writes to stderr and stdout.
145
146        // stdin_pipe[1] : The write end of the stdin pipe.
147        // stdout_pipe[0] : The read end of the stdout pipe.
148        // stderr_pipe[0] : The read end of the stderr pipe.
149        if (close(stdin_pipe[1]) == -1 ||
150            close(stdout_pipe[0]) == -1 ||
151            close(stderr_pipe[0]) == -1) {
152
153            ALOGW("close() failed: %s", strerror(errno));
154        }
155
156        // stdin_pipe[0] : The read end of the stdin pipe.
157        // stdout_pipe[1] : The write end of the stdout pipe.
158        // stderr_pipe[1] : The write end of the stderr pipe.
159        if (dup2(stdin_pipe[0], 0) == -1 ||
160            dup2(stdout_pipe[1], 1) == -1 ||
161            dup2(stderr_pipe[1], 2) == -1) {
162            ALOGE("dup2() failed: %s", strerror(errno));
163            abort();
164        }
165
166        if (execl(cmd,
167                  cmd,
168                  "--noflush",  // Don't flush the whole table.
169                  "-w",         // Wait instead of failing if the lock is held.
170                  "-v",         // Verbose mode, to make sure our ping is echoed
171                                // back to us.
172                  nullptr) == -1) {
173            ALOGE("execl(%s, ...) failed: %s", cmd, strerror(errno));
174            abort();
175        }
176
177        // This statement is unreachable. We abort() upon error, and execl
178        // if everything goes well.
179        return nullptr;
180    }
181
182    // The parent process. Writes to stdout and stderr and reads from stdin.
183    if (child_pid == -1) {
184        ALOGE("fork() failed: %s", strerror(errno));
185        return nullptr;
186    }
187
188    // stdin_pipe[0] : The read end of the stdin pipe.
189    // stdout_pipe[1] : The write end of the stdout pipe.
190    // stderr_pipe[1] : The write end of the stderr pipe.
191    if (close(stdin_pipe[0]) == -1 ||
192        close(stdout_pipe[1]) == -1 ||
193        close(stderr_pipe[1]) == -1) {
194        ALOGW("close() failed: %s", strerror(errno));
195    }
196
197    return new IptablesProcess(child_pid, stdin_pipe[1], stdout_pipe[0], stderr_pipe[0]);
198}
199
200// TODO: Return -errno on failure instead of -1.
201// TODO: Maybe we should keep a rotating buffer of the last N commands
202// so that they can be dumped on dumpsys.
203int IptablesRestoreController::sendCommand(const IptablesProcessType type,
204                                           const std::string& command,
205                                           std::string *output) {
206   std::unique_ptr<IptablesProcess> *process =
207           (type == IPTABLES_PROCESS) ? &mIpRestore : &mIp6Restore;
208
209
210    // We might need to fork a new process if we haven't forked one yet, or
211    // if the forked process terminated.
212    //
213    // NOTE: For a given command, this is the last point at which we try to
214    // recover from a child death. If the child dies at some later point during
215    // the execution of this method, we will receive an EPIPE and return an
216    // error. The command will then need to be retried at a higher level.
217    IptablesProcess *existingProcess = process->get();
218    if (existingProcess != nullptr && !existingProcess->outputReady()) {
219        existingProcess->stop();
220        existingProcess = nullptr;
221    }
222
223    if (existingProcess == nullptr) {
224        // Fork a new iptables[6]-restore process.
225        IptablesProcess *newProcess = IptablesRestoreController::forkAndExec(type);
226        if (newProcess == nullptr) {
227            LOG(ERROR) << "Unable to fork ip[6]tables-restore, type: " << type;
228            return -1;
229        }
230
231        process->reset(newProcess);
232    }
233
234    if (!android::base::WriteFully((*process)->stdIn, command.data(), command.length())) {
235        ALOGE("Unable to send command: %s", strerror(errno));
236        return -1;
237    }
238
239    if (!android::base::WriteFully((*process)->stdIn, PING, PING_SIZE)) {
240        ALOGE("Unable to send ping command: %s", strerror(errno));
241        return -1;
242    }
243
244    if (!drainAndWaitForAck(*process, command, output)) {
245        // drainAndWaitForAck has already logged an error.
246        return -1;
247    }
248
249    return 0;
250}
251
252void IptablesRestoreController::maybeLogStderr(const std::unique_ptr<IptablesProcess> &process,
253                                               const std::string& command) {
254    if (process->errBuf.empty()) {
255        return;
256    }
257
258    ALOGE("iptables error:\n"
259          "------- COMMAND -------\n"
260          "%s\n"
261          "-------  ERROR -------\n"
262          "%s"
263          "----------------------\n",
264          command.c_str(), process->errBuf.c_str());
265    process->errBuf.clear();
266}
267
268/* static */
269bool IptablesRestoreController::drainAndWaitForAck(const std::unique_ptr<IptablesProcess> &process,
270                                                   const std::string& command,
271                                                   std::string *output) {
272    bool receivedAck = false;
273    int timeout = 0;
274    while (!receivedAck && (timeout++ < MAX_RETRIES)) {
275        int numEvents = TEMP_FAILURE_RETRY(
276            poll(process->pollFds, ARRAY_SIZE(process->pollFds), POLL_TIMEOUT_MS));
277        if (numEvents == -1) {
278            ALOGE("Poll failed: %s", strerror(errno));
279            return false;
280        }
281
282        // We've timed out, which means something has gone wrong - we know that stdout should have
283        // become available to read with the ACK message, or that stderr should have been available
284        // to read with an error message.
285        if (numEvents == 0) {
286            continue;
287        }
288
289        char buffer[PIPE_BUF];
290        for (size_t i = 0; i < ARRAY_SIZE(process->pollFds); ++i) {
291            const struct pollfd &pollfd = process->pollFds[i];
292            if (pollfd.revents & POLLIN) {
293                ssize_t size;
294                do {
295                    size = TEMP_FAILURE_RETRY(read(pollfd.fd, buffer, sizeof(buffer)));
296
297                    if (size == -1) {
298                        if (errno != EAGAIN) {
299                            ALOGE("Unable to read from descriptor: %s", strerror(errno));
300                        }
301                        break;
302                    }
303
304                    if (i == IptablesProcess::STDOUT_IDX) {
305                        // i == STDOUT_IDX: accumulate stdout into *output, and look
306                        // for the ping response.
307                        output->append(buffer, size);
308                        size_t pos = output->find(PING);
309                        if (pos != std::string::npos) {
310                            if (output->size() > pos + PING_SIZE) {
311                                size_t extra = output->size() - (pos + PING_SIZE);
312                                ALOGW("%zd extra characters after iptables response: '%s...'",
313                                      extra, output->substr(pos + PING_SIZE, 128).c_str());
314                            }
315                            output->resize(pos);
316                            receivedAck = true;
317                        }
318                    } else {
319                        // i == STDERR_IDX: accumulate stderr into errBuf.
320                        process->errBuf.append(buffer, size);
321                    }
322                } while (size > 0);
323            }
324            if (pollfd.revents & POLLHUP) {
325                // The pipe was closed. This likely means the subprocess is exiting, since
326                // iptables-restore only closes stdin on error.
327                process->stop();
328                break;
329            }
330        }
331    }
332
333    if (!receivedAck && !process->processTerminated) {
334        ALOGE("Timed out waiting for response from iptables process %d", process->pid);
335        // Kill the process so that if it eventually recovers, we don't misinterpret the ping
336        // response (or any output) of the command we just sent as coming from future commands.
337        process->stop();
338    }
339
340    maybeLogStderr(process, command);
341
342    return receivedAck;
343}
344
345int IptablesRestoreController::execute(const IptablesTarget target, const std::string& command,
346                                       std::string *output) {
347    std::lock_guard<std::mutex> lock(mLock);
348
349    std::string buffer;
350    if (output == nullptr) {
351        output = &buffer;
352    } else {
353        output->clear();
354    }
355
356    int res = 0;
357    if (target == V4 || target == V4V6) {
358        res |= sendCommand(IPTABLES_PROCESS, command, output);
359    }
360    if (target == V6 || target == V4V6) {
361        res |= sendCommand(IP6TABLES_PROCESS, command, output);
362    }
363    return res;
364}
365
366int IptablesRestoreController::getIpRestorePid(const IptablesProcessType type) {
367    return type == IPTABLES_PROCESS ? mIpRestore->pid : mIp6Restore->pid;
368}
369