1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "RouteController.h"
18
19#include "Fwmark.h"
20#include "UidRanges.h"
21
22#define LOG_TAG "Netd"
23#include "log/log.h"
24#include "logwrap/logwrap.h"
25#include "resolv_netid.h"
26
27#include <arpa/inet.h>
28#include <fcntl.h>
29#include <linux/fib_rules.h>
30#include <map>
31#include <net/if.h>
32#include <sys/stat.h>
33
34namespace {
35
36// BEGIN CONSTANTS --------------------------------------------------------------------------------
37
38const uint32_t RULE_PRIORITY_VPN_OVERRIDE_SYSTEM = 10000;
39const uint32_t RULE_PRIORITY_VPN_OUTPUT_TO_LOCAL = 11000;
40const uint32_t RULE_PRIORITY_SECURE_VPN          = 12000;
41const uint32_t RULE_PRIORITY_EXPLICIT_NETWORK    = 13000;
42const uint32_t RULE_PRIORITY_OUTPUT_INTERFACE    = 14000;
43const uint32_t RULE_PRIORITY_LEGACY_SYSTEM       = 15000;
44const uint32_t RULE_PRIORITY_LEGACY_NETWORK      = 16000;
45const uint32_t RULE_PRIORITY_LOCAL_NETWORK       = 17000;
46const uint32_t RULE_PRIORITY_TETHERING           = 18000;
47const uint32_t RULE_PRIORITY_IMPLICIT_NETWORK    = 19000;
48const uint32_t RULE_PRIORITY_BYPASSABLE_VPN      = 20000;
49const uint32_t RULE_PRIORITY_VPN_FALLTHROUGH     = 21000;
50const uint32_t RULE_PRIORITY_DEFAULT_NETWORK     = 22000;
51const uint32_t RULE_PRIORITY_DIRECTLY_CONNECTED  = 23000;
52const uint32_t RULE_PRIORITY_UNREACHABLE         = 32000;
53
54const uint32_t ROUTE_TABLE_LOCAL_NETWORK  = 97;
55const uint32_t ROUTE_TABLE_LEGACY_NETWORK = 98;
56const uint32_t ROUTE_TABLE_LEGACY_SYSTEM  = 99;
57
58const char* const ROUTE_TABLE_NAME_LOCAL_NETWORK  = "local_network";
59const char* const ROUTE_TABLE_NAME_LEGACY_NETWORK = "legacy_network";
60const char* const ROUTE_TABLE_NAME_LEGACY_SYSTEM  = "legacy_system";
61
62const char* const ROUTE_TABLE_NAME_LOCAL = "local";
63const char* const ROUTE_TABLE_NAME_MAIN  = "main";
64
65// TODO: These values aren't defined by the Linux kernel, because our UID routing changes are not
66// upstream (yet?), so we can't just pick them up from kernel headers. When (if?) the changes make
67// it upstream, we'll remove this and rely on the kernel header values. For now, add a static assert
68// that will warn us if upstream has given these values some other meaning.
69const uint16_t FRA_UID_START = 18;
70const uint16_t FRA_UID_END   = 19;
71static_assert(FRA_UID_START > FRA_MAX,
72             "Android-specific FRA_UID_{START,END} values also assigned in Linux uapi. "
73             "Check that these values match what the kernel does and then update this assertion.");
74
75const uint16_t NETLINK_REQUEST_FLAGS = NLM_F_REQUEST | NLM_F_ACK;
76const uint16_t NETLINK_CREATE_REQUEST_FLAGS = NETLINK_REQUEST_FLAGS | NLM_F_CREATE | NLM_F_EXCL;
77
78const sockaddr_nl NETLINK_ADDRESS = {AF_NETLINK, 0, 0, 0};
79
80const uint8_t AF_FAMILIES[] = {AF_INET, AF_INET6};
81
82const char* const IP_VERSIONS[] = {"-4", "-6"};
83
84const uid_t UID_ROOT = 0;
85const char* const IIF_NONE = NULL;
86const char* const OIF_NONE = NULL;
87const bool ACTION_ADD = true;
88const bool ACTION_DEL = false;
89const bool MODIFY_NON_UID_BASED_RULES = true;
90
91const char* const RT_TABLES_PATH = "/data/misc/net/rt_tables";
92const int RT_TABLES_FLAGS = O_CREAT | O_TRUNC | O_WRONLY | O_NOFOLLOW | O_CLOEXEC;
93const mode_t RT_TABLES_MODE = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;  // mode 0644, rw-r--r--
94
95const unsigned ROUTE_FLUSH_ATTEMPTS = 2;
96
97// Avoids "non-constant-expression cannot be narrowed from type 'unsigned int' to 'unsigned short'"
98// warnings when using RTA_LENGTH(x) inside static initializers (even when x is already uint16_t).
99constexpr uint16_t U16_RTA_LENGTH(uint16_t x) {
100    return RTA_LENGTH(x);
101}
102
103// These are practically const, but can't be declared so, because they are used to initialize
104// non-const pointers ("void* iov_base") in iovec arrays.
105rtattr FRATTR_PRIORITY  = { U16_RTA_LENGTH(sizeof(uint32_t)), FRA_PRIORITY };
106rtattr FRATTR_TABLE     = { U16_RTA_LENGTH(sizeof(uint32_t)), FRA_TABLE };
107rtattr FRATTR_FWMARK    = { U16_RTA_LENGTH(sizeof(uint32_t)), FRA_FWMARK };
108rtattr FRATTR_FWMASK    = { U16_RTA_LENGTH(sizeof(uint32_t)), FRA_FWMASK };
109rtattr FRATTR_UID_START = { U16_RTA_LENGTH(sizeof(uid_t)),    FRA_UID_START };
110rtattr FRATTR_UID_END   = { U16_RTA_LENGTH(sizeof(uid_t)),    FRA_UID_END };
111
112rtattr RTATTR_TABLE     = { U16_RTA_LENGTH(sizeof(uint32_t)), RTA_TABLE };
113rtattr RTATTR_OIF       = { U16_RTA_LENGTH(sizeof(uint32_t)), RTA_OIF };
114
115uint8_t PADDING_BUFFER[RTA_ALIGNTO] = {0, 0, 0, 0};
116
117// END CONSTANTS ----------------------------------------------------------------------------------
118
119// No locks needed because RouteController is accessed only from one thread (in CommandListener).
120std::map<std::string, uint32_t> interfaceToTable;
121
122uint32_t getRouteTableForInterface(const char* interface) {
123    uint32_t index = if_nametoindex(interface);
124    if (index) {
125        index += RouteController::ROUTE_TABLE_OFFSET_FROM_INDEX;
126        interfaceToTable[interface] = index;
127        return index;
128    }
129    // If the interface goes away if_nametoindex() will return 0 but we still need to know
130    // the index so we can remove the rules and routes.
131    auto iter = interfaceToTable.find(interface);
132    if (iter == interfaceToTable.end()) {
133        ALOGE("cannot find interface %s", interface);
134        return RT_TABLE_UNSPEC;
135    }
136    return iter->second;
137}
138
139void addTableName(uint32_t table, const std::string& name, std::string* contents) {
140    char tableString[UINT32_STRLEN];
141    snprintf(tableString, sizeof(tableString), "%u", table);
142    *contents += tableString;
143    *contents += " ";
144    *contents += name;
145    *contents += "\n";
146}
147
148// Doesn't return success/failure as the file is optional; it's okay if we fail to update it.
149void updateTableNamesFile() {
150    std::string contents;
151
152    addTableName(RT_TABLE_LOCAL, ROUTE_TABLE_NAME_LOCAL, &contents);
153    addTableName(RT_TABLE_MAIN,  ROUTE_TABLE_NAME_MAIN,  &contents);
154
155    addTableName(ROUTE_TABLE_LOCAL_NETWORK,  ROUTE_TABLE_NAME_LOCAL_NETWORK,  &contents);
156    addTableName(ROUTE_TABLE_LEGACY_NETWORK, ROUTE_TABLE_NAME_LEGACY_NETWORK, &contents);
157    addTableName(ROUTE_TABLE_LEGACY_SYSTEM,  ROUTE_TABLE_NAME_LEGACY_SYSTEM,  &contents);
158
159    for (const auto& entry : interfaceToTable) {
160        addTableName(entry.second, entry.first, &contents);
161    }
162
163    int fd = open(RT_TABLES_PATH, RT_TABLES_FLAGS, RT_TABLES_MODE);
164    if (fd == -1) {
165        ALOGE("failed to create %s (%s)", RT_TABLES_PATH, strerror(errno));
166        return;
167    }
168    // File creation is affected by umask, so make sure the right mode bits are set.
169    if (fchmod(fd, RT_TABLES_MODE) == -1) {
170        ALOGE("failed to set mode 0%o on %s (%s)", RT_TABLES_MODE, RT_TABLES_PATH, strerror(errno));
171    }
172    ssize_t bytesWritten = write(fd, contents.data(), contents.size());
173    if (bytesWritten != static_cast<ssize_t>(contents.size())) {
174        ALOGE("failed to write to %s (%zd vs %zu bytes) (%s)", RT_TABLES_PATH, bytesWritten,
175              contents.size(), strerror(errno));
176    }
177    close(fd);
178}
179
180// Sends a netlink request and expects an ack.
181// |iov| is an array of struct iovec that contains the netlink message payload.
182// The netlink header is generated by this function based on |action| and |flags|.
183// Returns -errno if there was an error or if the kernel reported an error.
184WARN_UNUSED_RESULT int sendNetlinkRequest(uint16_t action, uint16_t flags, iovec* iov, int iovlen) {
185    nlmsghdr nlmsg = {
186        .nlmsg_type = action,
187        .nlmsg_flags = flags,
188    };
189    iov[0].iov_base = &nlmsg;
190    iov[0].iov_len = sizeof(nlmsg);
191    for (int i = 0; i < iovlen; ++i) {
192        nlmsg.nlmsg_len += iov[i].iov_len;
193    }
194
195    int ret;
196    struct {
197        nlmsghdr msg;
198        nlmsgerr err;
199    } response;
200
201    int sock = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE);
202    if (sock != -1 &&
203            connect(sock, reinterpret_cast<const sockaddr*>(&NETLINK_ADDRESS),
204                    sizeof(NETLINK_ADDRESS)) != -1 &&
205            writev(sock, iov, iovlen) != -1 &&
206            (ret = recv(sock, &response, sizeof(response), 0)) != -1) {
207        if (ret == sizeof(response)) {
208            ret = response.err.error;  // Netlink errors are negative errno.
209            if (ret) {
210                ALOGE("netlink response contains error (%s)", strerror(-ret));
211            }
212        } else {
213            ALOGE("bad netlink response message size (%d != %zu)", ret, sizeof(response));
214            ret = -EBADMSG;
215        }
216    } else {
217        ALOGE("netlink socket/connect/writev/recv failed (%s)", strerror(errno));
218        ret = -errno;
219    }
220
221    if (sock != -1) {
222        close(sock);
223    }
224
225    return ret;
226}
227
228// Returns 0 on success or negative errno on failure.
229int padInterfaceName(const char* input, char* name, size_t* length, uint16_t* padding) {
230    if (!input) {
231        *length = 0;
232        *padding = 0;
233        return 0;
234    }
235    *length = strlcpy(name, input, IFNAMSIZ) + 1;
236    if (*length > IFNAMSIZ) {
237        ALOGE("interface name too long (%zu > %u)", *length, IFNAMSIZ);
238        return -ENAMETOOLONG;
239    }
240    *padding = RTA_SPACE(*length) - RTA_LENGTH(*length);
241    return 0;
242}
243
244// Adds or removes a routing rule for IPv4 and IPv6.
245//
246// + If |table| is non-zero, the rule points at the specified routing table. Otherwise, the rule
247//   returns ENETUNREACH.
248// + If |mask| is non-zero, the rule matches the specified fwmark and mask. Otherwise, |fwmark| is
249//   ignored.
250// + If |iif| is non-NULL, the rule matches the specified incoming interface.
251// + If |oif| is non-NULL, the rule matches the specified outgoing interface.
252// + If |uidStart| and |uidEnd| are not INVALID_UID, the rule matches packets from UIDs in that
253//   range (inclusive). Otherwise, the rule matches packets from all UIDs.
254//
255// Returns 0 on success or negative errno on failure.
256WARN_UNUSED_RESULT int modifyIpRule(uint16_t action, uint32_t priority, uint32_t table,
257                                    uint32_t fwmark, uint32_t mask, const char* iif,
258                                    const char* oif, uid_t uidStart, uid_t uidEnd) {
259    // Ensure that if you set a bit in the fwmark, it's not being ignored by the mask.
260    if (fwmark & ~mask) {
261        ALOGE("mask 0x%x does not select all the bits set in fwmark 0x%x", mask, fwmark);
262        return -ERANGE;
263    }
264
265    // Interface names must include exactly one terminating NULL and be properly padded, or older
266    // kernels will refuse to delete rules.
267    char iifName[IFNAMSIZ], oifName[IFNAMSIZ];
268    size_t iifLength, oifLength;
269    uint16_t iifPadding, oifPadding;
270    if (int ret = padInterfaceName(iif, iifName, &iifLength, &iifPadding)) {
271        return ret;
272    }
273    if (int ret = padInterfaceName(oif, oifName, &oifLength, &oifPadding)) {
274        return ret;
275    }
276
277    // Either both start and end UID must be specified, or neither.
278    if ((uidStart == INVALID_UID) != (uidEnd == INVALID_UID)) {
279        ALOGE("incompatible start and end UIDs (%u vs %u)", uidStart, uidEnd);
280        return -EUSERS;
281    }
282    bool isUidRule = (uidStart != INVALID_UID);
283
284    // Assemble a rule request and put it in an array of iovec structures.
285    fib_rule_hdr rule = {
286        .action = static_cast<uint8_t>(table != RT_TABLE_UNSPEC ? FR_ACT_TO_TBL :
287                                                                  FR_ACT_UNREACHABLE),
288    };
289
290    rtattr fraIifName = { U16_RTA_LENGTH(iifLength), FRA_IIFNAME };
291    rtattr fraOifName = { U16_RTA_LENGTH(oifLength), FRA_OIFNAME };
292
293    iovec iov[] = {
294        { NULL,              0 },
295        { &rule,             sizeof(rule) },
296        { &FRATTR_PRIORITY,  sizeof(FRATTR_PRIORITY) },
297        { &priority,         sizeof(priority) },
298        { &FRATTR_TABLE,     table != RT_TABLE_UNSPEC ? sizeof(FRATTR_TABLE) : 0 },
299        { &table,            table != RT_TABLE_UNSPEC ? sizeof(table) : 0 },
300        { &FRATTR_FWMARK,    mask ? sizeof(FRATTR_FWMARK) : 0 },
301        { &fwmark,           mask ? sizeof(fwmark) : 0 },
302        { &FRATTR_FWMASK,    mask ? sizeof(FRATTR_FWMASK) : 0 },
303        { &mask,             mask ? sizeof(mask) : 0 },
304        { &FRATTR_UID_START, isUidRule ? sizeof(FRATTR_UID_START) : 0 },
305        { &uidStart,         isUidRule ? sizeof(uidStart) : 0 },
306        { &FRATTR_UID_END,   isUidRule ? sizeof(FRATTR_UID_END) : 0 },
307        { &uidEnd,           isUidRule ? sizeof(uidEnd) : 0 },
308        { &fraIifName,       iif != IIF_NONE ? sizeof(fraIifName) : 0 },
309        { iifName,           iifLength },
310        { PADDING_BUFFER,    iifPadding },
311        { &fraOifName,       oif != OIF_NONE ? sizeof(fraOifName) : 0 },
312        { oifName,           oifLength },
313        { PADDING_BUFFER,    oifPadding },
314    };
315
316    uint16_t flags = (action == RTM_NEWRULE) ? NETLINK_CREATE_REQUEST_FLAGS : NETLINK_REQUEST_FLAGS;
317    for (size_t i = 0; i < ARRAY_SIZE(AF_FAMILIES); ++i) {
318        rule.family = AF_FAMILIES[i];
319        if (int ret = sendNetlinkRequest(action, flags, iov, ARRAY_SIZE(iov))) {
320            return ret;
321        }
322    }
323
324    return 0;
325}
326
327WARN_UNUSED_RESULT int modifyIpRule(uint16_t action, uint32_t priority, uint32_t table,
328                                    uint32_t fwmark, uint32_t mask) {
329    return modifyIpRule(action, priority, table, fwmark, mask, IIF_NONE, OIF_NONE, INVALID_UID,
330                        INVALID_UID);
331}
332
333// Adds or deletes an IPv4 or IPv6 route.
334// Returns 0 on success or negative errno on failure.
335WARN_UNUSED_RESULT int modifyIpRoute(uint16_t action, uint32_t table, const char* interface,
336                                     const char* destination, const char* nexthop) {
337    // At least the destination must be non-null.
338    if (!destination) {
339        ALOGE("null destination");
340        return -EFAULT;
341    }
342
343    // Parse the prefix.
344    uint8_t rawAddress[sizeof(in6_addr)];
345    uint8_t family;
346    uint8_t prefixLength;
347    int rawLength = parsePrefix(destination, &family, rawAddress, sizeof(rawAddress),
348                                &prefixLength);
349    if (rawLength < 0) {
350        ALOGE("parsePrefix failed for destination %s (%s)", destination, strerror(-rawLength));
351        return rawLength;
352    }
353
354    if (static_cast<size_t>(rawLength) > sizeof(rawAddress)) {
355        ALOGE("impossible! address too long (%d vs %zu)", rawLength, sizeof(rawAddress));
356        return -ENOBUFS;  // Cannot happen; parsePrefix only supports IPv4 and IPv6.
357    }
358
359    uint8_t type = RTN_UNICAST;
360    uint32_t ifindex;
361    uint8_t rawNexthop[sizeof(in6_addr)];
362
363    if (nexthop && !strcmp(nexthop, "unreachable")) {
364        type = RTN_UNREACHABLE;
365        // 'interface' is likely non-NULL, as the caller (modifyRoute()) likely used it to lookup
366        // the table number. But it's an error to specify an interface ("dev ...") or a nexthop for
367        // unreachable routes, so nuke them. (IPv6 allows them to be specified; IPv4 doesn't.)
368        interface = OIF_NONE;
369        nexthop = NULL;
370    } else if (nexthop && !strcmp(nexthop, "throw")) {
371        type = RTN_THROW;
372        interface = OIF_NONE;
373        nexthop = NULL;
374    } else {
375        // If an interface was specified, find the ifindex.
376        if (interface != OIF_NONE) {
377            ifindex = if_nametoindex(interface);
378            if (!ifindex) {
379                ALOGE("cannot find interface %s", interface);
380                return -ENODEV;
381            }
382        }
383
384        // If a nexthop was specified, parse it as the same family as the prefix.
385        if (nexthop && inet_pton(family, nexthop, rawNexthop) <= 0) {
386            ALOGE("inet_pton failed for nexthop %s", nexthop);
387            return -EINVAL;
388        }
389    }
390
391    // Assemble a rtmsg and put it in an array of iovec structures.
392    rtmsg route = {
393        .rtm_protocol = RTPROT_STATIC,
394        .rtm_type = type,
395        .rtm_family = family,
396        .rtm_dst_len = prefixLength,
397        .rtm_scope = static_cast<uint8_t>(nexthop ? RT_SCOPE_UNIVERSE : RT_SCOPE_LINK),
398    };
399
400    rtattr rtaDst     = { U16_RTA_LENGTH(rawLength), RTA_DST };
401    rtattr rtaGateway = { U16_RTA_LENGTH(rawLength), RTA_GATEWAY };
402
403    iovec iov[] = {
404        { NULL,          0 },
405        { &route,        sizeof(route) },
406        { &RTATTR_TABLE, sizeof(RTATTR_TABLE) },
407        { &table,        sizeof(table) },
408        { &rtaDst,       sizeof(rtaDst) },
409        { rawAddress,    static_cast<size_t>(rawLength) },
410        { &RTATTR_OIF,   interface != OIF_NONE ? sizeof(RTATTR_OIF) : 0 },
411        { &ifindex,      interface != OIF_NONE ? sizeof(ifindex) : 0 },
412        { &rtaGateway,   nexthop ? sizeof(rtaGateway) : 0 },
413        { rawNexthop,    nexthop ? static_cast<size_t>(rawLength) : 0 },
414    };
415
416    uint16_t flags = (action == RTM_NEWROUTE) ? NETLINK_CREATE_REQUEST_FLAGS :
417                                                NETLINK_REQUEST_FLAGS;
418    return sendNetlinkRequest(action, flags, iov, ARRAY_SIZE(iov));
419}
420
421// An iptables rule to mark incoming packets on a network with the netId of the network.
422//
423// This is so that the kernel can:
424// + Use the right fwmark for (and thus correctly route) replies (e.g.: TCP RST, ICMP errors, ping
425//   replies, SYN-ACKs, etc).
426// + Mark sockets that accept connections from this interface so that the connection stays on the
427//   same interface.
428WARN_UNUSED_RESULT int modifyIncomingPacketMark(unsigned netId, const char* interface,
429                                                Permission permission, bool add) {
430    Fwmark fwmark;
431
432    fwmark.netId = netId;
433    fwmark.explicitlySelected = true;
434    fwmark.protectedFromVpn = true;
435    fwmark.permission = permission;
436
437    char markString[UINT32_HEX_STRLEN];
438    snprintf(markString, sizeof(markString), "0x%x", fwmark.intValue);
439
440    if (execIptables(V4V6, "-t", "mangle", add ? "-A" : "-D", "INPUT", "-i", interface, "-j",
441                     "MARK", "--set-mark", markString, NULL)) {
442        ALOGE("failed to change iptables rule that sets incoming packet mark");
443        return -EREMOTEIO;
444    }
445
446    return 0;
447}
448
449// A rule to route responses to the local network forwarded via the VPN.
450//
451// When a VPN is in effect, packets from the local network to upstream networks are forwarded into
452// the VPN's tunnel interface. When the VPN forwards the responses, they emerge out of the tunnel.
453WARN_UNUSED_RESULT int modifyVpnOutputToLocalRule(const char* vpnInterface, bool add) {
454    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_VPN_OUTPUT_TO_LOCAL,
455                        ROUTE_TABLE_LOCAL_NETWORK, MARK_UNSET, MARK_UNSET, vpnInterface, OIF_NONE,
456                        INVALID_UID, INVALID_UID);
457}
458
459// A rule to route all traffic from a given set of UIDs to go over the VPN.
460//
461// Notice that this rule doesn't use the netId. I.e., no matter what netId the user's socket may
462// have, if they are subject to this VPN, their traffic has to go through it. Allows the traffic to
463// bypass the VPN if the protectedFromVpn bit is set.
464WARN_UNUSED_RESULT int modifyVpnUidRangeRule(uint32_t table, uid_t uidStart, uid_t uidEnd,
465                                             bool secure, bool add) {
466    Fwmark fwmark;
467    Fwmark mask;
468
469    fwmark.protectedFromVpn = false;
470    mask.protectedFromVpn = true;
471
472    uint32_t priority;
473
474    if (secure) {
475        priority = RULE_PRIORITY_SECURE_VPN;
476    } else {
477        priority = RULE_PRIORITY_BYPASSABLE_VPN;
478
479        fwmark.explicitlySelected = false;
480        mask.explicitlySelected = true;
481    }
482
483    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, priority, table, fwmark.intValue,
484                        mask.intValue, IIF_NONE, OIF_NONE, uidStart, uidEnd);
485}
486
487// A rule to allow system apps to send traffic over this VPN even if they are not part of the target
488// set of UIDs.
489//
490// This is needed for DnsProxyListener to correctly resolve a request for a user who is in the
491// target set, but where the DnsProxyListener itself is not.
492WARN_UNUSED_RESULT int modifyVpnSystemPermissionRule(unsigned netId, uint32_t table, bool secure,
493                                                     bool add) {
494    Fwmark fwmark;
495    Fwmark mask;
496
497    fwmark.netId = netId;
498    mask.netId = FWMARK_NET_ID_MASK;
499
500    fwmark.permission = PERMISSION_SYSTEM;
501    mask.permission = PERMISSION_SYSTEM;
502
503    uint32_t priority = secure ? RULE_PRIORITY_SECURE_VPN : RULE_PRIORITY_BYPASSABLE_VPN;
504
505    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, priority, table, fwmark.intValue,
506                        mask.intValue);
507}
508
509// A rule to route traffic based on an explicitly chosen network.
510//
511// Supports apps that use the multinetwork APIs to restrict their traffic to a network.
512//
513// Even though we check permissions at the time we set a netId into the fwmark of a socket, we need
514// to check it again in the rules here, because a network's permissions may have been updated via
515// modifyNetworkPermission().
516WARN_UNUSED_RESULT int modifyExplicitNetworkRule(unsigned netId, uint32_t table,
517                                                 Permission permission, uid_t uidStart,
518                                                 uid_t uidEnd, bool add) {
519    Fwmark fwmark;
520    Fwmark mask;
521
522    fwmark.netId = netId;
523    mask.netId = FWMARK_NET_ID_MASK;
524
525    fwmark.explicitlySelected = true;
526    mask.explicitlySelected = true;
527
528    fwmark.permission = permission;
529    mask.permission = permission;
530
531    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_EXPLICIT_NETWORK, table,
532                        fwmark.intValue, mask.intValue, IIF_NONE, OIF_NONE, uidStart, uidEnd);
533}
534
535// A rule to route traffic based on a chosen outgoing interface.
536//
537// Supports apps that use SO_BINDTODEVICE or IP_PKTINFO options and the kernel that already knows
538// the outgoing interface (typically for link-local communications).
539WARN_UNUSED_RESULT int modifyOutputInterfaceRule(const char* interface, uint32_t table,
540                                                 Permission permission, uid_t uidStart,
541                                                 uid_t uidEnd, bool add) {
542    Fwmark fwmark;
543    Fwmark mask;
544
545    fwmark.permission = permission;
546    mask.permission = permission;
547
548    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_OUTPUT_INTERFACE, table,
549                        fwmark.intValue, mask.intValue, IIF_NONE, interface, uidStart, uidEnd);
550}
551
552// A rule to route traffic based on the chosen network.
553//
554// This is for sockets that have not explicitly requested a particular network, but have been
555// bound to one when they called connect(). This ensures that sockets connected on a particular
556// network stay on that network even if the default network changes.
557WARN_UNUSED_RESULT int modifyImplicitNetworkRule(unsigned netId, uint32_t table,
558                                                 Permission permission, bool add) {
559    Fwmark fwmark;
560    Fwmark mask;
561
562    fwmark.netId = netId;
563    mask.netId = FWMARK_NET_ID_MASK;
564
565    fwmark.explicitlySelected = false;
566    mask.explicitlySelected = true;
567
568    fwmark.permission = permission;
569    mask.permission = permission;
570
571    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_IMPLICIT_NETWORK, table,
572                        fwmark.intValue, mask.intValue);
573}
574
575// A rule to enable split tunnel VPNs.
576//
577// If a packet with a VPN's netId doesn't find a route in the VPN's routing table, it's allowed to
578// go over the default network, provided it wasn't explicitly restricted to the VPN and has the
579// permissions required by the default network.
580WARN_UNUSED_RESULT int modifyVpnFallthroughRule(uint16_t action, unsigned vpnNetId,
581                                                const char* physicalInterface,
582                                                Permission permission) {
583    uint32_t table = getRouteTableForInterface(physicalInterface);
584    if (table == RT_TABLE_UNSPEC) {
585        return -ESRCH;
586    }
587
588    Fwmark fwmark;
589    Fwmark mask;
590
591    fwmark.netId = vpnNetId;
592    mask.netId = FWMARK_NET_ID_MASK;
593
594    fwmark.explicitlySelected = false;
595    mask.explicitlySelected = true;
596
597    fwmark.permission = permission;
598    mask.permission = permission;
599
600    return modifyIpRule(action, RULE_PRIORITY_VPN_FALLTHROUGH, table, fwmark.intValue,
601                        mask.intValue);
602}
603
604// Add rules to allow legacy routes added through the requestRouteToHost() API.
605WARN_UNUSED_RESULT int addLegacyRouteRules() {
606    Fwmark fwmark;
607    Fwmark mask;
608
609    fwmark.explicitlySelected = false;
610    mask.explicitlySelected = true;
611
612    // Rules to allow legacy routes to override the default network.
613    if (int ret = modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_LEGACY_SYSTEM, ROUTE_TABLE_LEGACY_SYSTEM,
614                               fwmark.intValue, mask.intValue)) {
615        return ret;
616    }
617    if (int ret = modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_LEGACY_NETWORK,
618                               ROUTE_TABLE_LEGACY_NETWORK, fwmark.intValue, mask.intValue)) {
619        return ret;
620    }
621
622    fwmark.permission = PERMISSION_SYSTEM;
623    mask.permission = PERMISSION_SYSTEM;
624
625    // A rule to allow legacy routes from system apps to override VPNs.
626    return modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_VPN_OVERRIDE_SYSTEM, ROUTE_TABLE_LEGACY_SYSTEM,
627                        fwmark.intValue, mask.intValue);
628}
629
630// Add rules to lookup the local network when specified explicitly or otherwise.
631WARN_UNUSED_RESULT int addLocalNetworkRules(unsigned localNetId) {
632    if (int ret = modifyExplicitNetworkRule(localNetId, ROUTE_TABLE_LOCAL_NETWORK, PERMISSION_NONE,
633                                            INVALID_UID, INVALID_UID, ACTION_ADD)) {
634        return ret;
635    }
636
637    Fwmark fwmark;
638    Fwmark mask;
639
640    fwmark.explicitlySelected = false;
641    mask.explicitlySelected = true;
642
643    return modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_LOCAL_NETWORK, ROUTE_TABLE_LOCAL_NETWORK,
644                        fwmark.intValue, mask.intValue);
645}
646
647// Add a new rule to look up the 'main' table, with the same selectors as the "default network"
648// rule, but with a lower priority. We will never create routes in the main table; it should only be
649// used for directly-connected routes implicitly created by the kernel when adding IP addresses.
650// This is necessary, for example, when adding a route through a directly-connected gateway: in
651// order to add the route, there must already be a directly-connected route that covers the gateway.
652WARN_UNUSED_RESULT int addDirectlyConnectedRule() {
653    Fwmark fwmark;
654    Fwmark mask;
655
656    fwmark.netId = NETID_UNSET;
657    mask.netId = FWMARK_NET_ID_MASK;
658
659    return modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_DIRECTLY_CONNECTED, RT_TABLE_MAIN,
660                        fwmark.intValue, mask.intValue, IIF_NONE, OIF_NONE, UID_ROOT, UID_ROOT);
661}
662
663// Add an explicit unreachable rule close to the end of the prioriy list to make it clear that
664// relying on the kernel-default "from all lookup main" rule at priority 32766 is not intended
665// behaviour. We do flush the kernel-default rules at startup, but having an explicit unreachable
666// rule will hopefully make things even clearer.
667WARN_UNUSED_RESULT int addUnreachableRule() {
668    return modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_UNREACHABLE, RT_TABLE_UNSPEC, MARK_UNSET,
669                        MARK_UNSET);
670}
671
672WARN_UNUSED_RESULT int modifyLocalNetwork(unsigned netId, const char* interface, bool add) {
673    if (int ret = modifyIncomingPacketMark(netId, interface, PERMISSION_NONE, add)) {
674        return ret;
675    }
676    return modifyOutputInterfaceRule(interface, ROUTE_TABLE_LOCAL_NETWORK, PERMISSION_NONE,
677                                     INVALID_UID, INVALID_UID, add);
678}
679
680WARN_UNUSED_RESULT int modifyPhysicalNetwork(unsigned netId, const char* interface,
681                                             Permission permission, bool add) {
682    uint32_t table = getRouteTableForInterface(interface);
683    if (table == RT_TABLE_UNSPEC) {
684        return -ESRCH;
685    }
686
687    if (int ret = modifyIncomingPacketMark(netId, interface, permission, add)) {
688        return ret;
689    }
690    if (int ret = modifyExplicitNetworkRule(netId, table, permission, INVALID_UID, INVALID_UID,
691                                            add)) {
692        return ret;
693    }
694    if (int ret = modifyOutputInterfaceRule(interface, table, permission, INVALID_UID, INVALID_UID,
695                                            add)) {
696        return ret;
697    }
698    return modifyImplicitNetworkRule(netId, table, permission, add);
699}
700
701WARN_UNUSED_RESULT int modifyVirtualNetwork(unsigned netId, const char* interface,
702                                            const UidRanges& uidRanges, bool secure, bool add,
703                                            bool modifyNonUidBasedRules) {
704    uint32_t table = getRouteTableForInterface(interface);
705    if (table == RT_TABLE_UNSPEC) {
706        return -ESRCH;
707    }
708
709    for (const UidRanges::Range& range : uidRanges.getRanges()) {
710        if (int ret = modifyVpnUidRangeRule(table, range.first, range.second, secure, add)) {
711            return ret;
712        }
713        if (int ret = modifyExplicitNetworkRule(netId, table, PERMISSION_NONE, range.first,
714                                                range.second, add)) {
715            return ret;
716        }
717        if (int ret = modifyOutputInterfaceRule(interface, table, PERMISSION_NONE, range.first,
718                                                range.second, add)) {
719            return ret;
720        }
721    }
722
723    if (modifyNonUidBasedRules) {
724        if (int ret = modifyIncomingPacketMark(netId, interface, PERMISSION_NONE, add)) {
725            return ret;
726        }
727        if (int ret = modifyVpnOutputToLocalRule(interface, add)) {
728            return ret;
729        }
730        if (int ret = modifyVpnSystemPermissionRule(netId, table, secure, add)) {
731            return ret;
732        }
733        return modifyExplicitNetworkRule(netId, table, PERMISSION_NONE, UID_ROOT, UID_ROOT, add);
734    }
735
736    return 0;
737}
738
739WARN_UNUSED_RESULT int modifyDefaultNetwork(uint16_t action, const char* interface,
740                                            Permission permission) {
741    uint32_t table = getRouteTableForInterface(interface);
742    if (table == RT_TABLE_UNSPEC) {
743        return -ESRCH;
744    }
745
746    Fwmark fwmark;
747    Fwmark mask;
748
749    fwmark.netId = NETID_UNSET;
750    mask.netId = FWMARK_NET_ID_MASK;
751
752    fwmark.permission = permission;
753    mask.permission = permission;
754
755    return modifyIpRule(action, RULE_PRIORITY_DEFAULT_NETWORK, table, fwmark.intValue,
756                        mask.intValue);
757}
758
759WARN_UNUSED_RESULT int modifyTetheredNetwork(uint16_t action, const char* inputInterface,
760                                             const char* outputInterface) {
761    uint32_t table = getRouteTableForInterface(outputInterface);
762    if (table == RT_TABLE_UNSPEC) {
763        return -ESRCH;
764    }
765
766    return modifyIpRule(action, RULE_PRIORITY_TETHERING, table, MARK_UNSET, MARK_UNSET,
767                        inputInterface, OIF_NONE, INVALID_UID, INVALID_UID);
768}
769
770// Returns 0 on success or negative errno on failure.
771WARN_UNUSED_RESULT int flushRules() {
772    for (size_t i = 0; i < ARRAY_SIZE(IP_VERSIONS); ++i) {
773        const char* argv[] = {
774            IP_PATH,
775            IP_VERSIONS[i],
776            "rule",
777            "flush",
778        };
779        if (android_fork_execvp(ARRAY_SIZE(argv), const_cast<char**>(argv), NULL, false, false)) {
780            ALOGE("failed to flush rules");
781            return -EREMOTEIO;
782        }
783    }
784    return 0;
785}
786
787// Adds or removes an IPv4 or IPv6 route to the specified table and, if it's a directly-connected
788// route, to the main table as well.
789// Returns 0 on success or negative errno on failure.
790WARN_UNUSED_RESULT int modifyRoute(uint16_t action, const char* interface, const char* destination,
791                                   const char* nexthop, RouteController::TableType tableType) {
792    uint32_t table;
793    switch (tableType) {
794        case RouteController::INTERFACE: {
795            table = getRouteTableForInterface(interface);
796            if (table == RT_TABLE_UNSPEC) {
797                return -ESRCH;
798            }
799            break;
800        }
801        case RouteController::LOCAL_NETWORK: {
802            table = ROUTE_TABLE_LOCAL_NETWORK;
803            break;
804        }
805        case RouteController::LEGACY_NETWORK: {
806            table = ROUTE_TABLE_LEGACY_NETWORK;
807            break;
808        }
809        case RouteController::LEGACY_SYSTEM: {
810            table = ROUTE_TABLE_LEGACY_SYSTEM;
811            break;
812        }
813    }
814
815    int ret = modifyIpRoute(action, table, interface, destination, nexthop);
816    // Trying to add a route that already exists shouldn't cause an error.
817    if (ret && !(action == RTM_NEWROUTE && ret == -EEXIST)) {
818        return ret;
819    }
820
821    return 0;
822}
823
824// Returns 0 on success or negative errno on failure.
825WARN_UNUSED_RESULT int flushRoutes(const char* interface) {
826    uint32_t table = getRouteTableForInterface(interface);
827    if (table == RT_TABLE_UNSPEC) {
828        return -ESRCH;
829    }
830
831    char tableString[UINT32_STRLEN];
832    snprintf(tableString, sizeof(tableString), "%u", table);
833
834    int ret = 0;
835    for (size_t i = 0; i < ARRAY_SIZE(IP_VERSIONS); ++i) {
836        const char* argv[] = {
837            IP_PATH,
838            IP_VERSIONS[i],
839            "route",
840            "flush",
841            "table",
842            tableString,
843        };
844
845        // A flush works by dumping routes and deleting each route as it's returned, and it can
846        // fail if something else deletes the route between the dump and the delete. This can
847        // happen, for example, if an interface goes down while we're trying to flush its routes.
848        // So try multiple times and only return an error if the last attempt fails.
849        //
850        // TODO: replace this with our own netlink code.
851        unsigned attempts = 0;
852        int err;
853        do {
854            err = android_fork_execvp(ARRAY_SIZE(argv), const_cast<char**>(argv),
855                                      NULL, false, false);
856            ++attempts;
857        } while (err != 0 && attempts < ROUTE_FLUSH_ATTEMPTS);
858        if (err) {
859            ALOGE("failed to flush %s routes in table %s after %d attempts",
860                  IP_VERSIONS[i], tableString, attempts);
861            ret = -EREMOTEIO;
862        }
863    }
864
865    // If we failed to flush routes, the caller may elect to keep this interface around, so keep
866    // track of its name.
867    if (!ret) {
868        interfaceToTable.erase(interface);
869    }
870
871    return ret;
872}
873
874}  // namespace
875
876int RouteController::Init(unsigned localNetId) {
877    if (int ret = flushRules()) {
878        return ret;
879    }
880    if (int ret = addLegacyRouteRules()) {
881        return ret;
882    }
883    if (int ret = addLocalNetworkRules(localNetId)) {
884        return ret;
885    }
886    if (int ret = addDirectlyConnectedRule()) {
887        return ret;
888    }
889    if (int ret = addUnreachableRule()) {
890        return ret;
891    }
892    updateTableNamesFile();
893    return 0;
894}
895
896int RouteController::addInterfaceToLocalNetwork(unsigned netId, const char* interface) {
897    return modifyLocalNetwork(netId, interface, ACTION_ADD);
898}
899
900int RouteController::removeInterfaceFromLocalNetwork(unsigned netId, const char* interface) {
901    return modifyLocalNetwork(netId, interface, ACTION_DEL);
902}
903
904int RouteController::addInterfaceToPhysicalNetwork(unsigned netId, const char* interface,
905                                                   Permission permission) {
906    if (int ret = modifyPhysicalNetwork(netId, interface, permission, ACTION_ADD)) {
907        return ret;
908    }
909    updateTableNamesFile();
910    return 0;
911}
912
913int RouteController::removeInterfaceFromPhysicalNetwork(unsigned netId, const char* interface,
914                                                        Permission permission) {
915    if (int ret = modifyPhysicalNetwork(netId, interface, permission, ACTION_DEL)) {
916        return ret;
917    }
918    if (int ret = flushRoutes(interface)) {
919        return ret;
920    }
921    updateTableNamesFile();
922    return 0;
923}
924
925int RouteController::addInterfaceToVirtualNetwork(unsigned netId, const char* interface,
926                                                  bool secure, const UidRanges& uidRanges) {
927    if (int ret = modifyVirtualNetwork(netId, interface, uidRanges, secure, ACTION_ADD,
928                                       MODIFY_NON_UID_BASED_RULES)) {
929        return ret;
930    }
931    updateTableNamesFile();
932    return 0;
933}
934
935int RouteController::removeInterfaceFromVirtualNetwork(unsigned netId, const char* interface,
936                                                       bool secure, const UidRanges& uidRanges) {
937    if (int ret = modifyVirtualNetwork(netId, interface, uidRanges, secure, ACTION_DEL,
938                                       MODIFY_NON_UID_BASED_RULES)) {
939        return ret;
940    }
941    if (int ret = flushRoutes(interface)) {
942        return ret;
943    }
944    updateTableNamesFile();
945    return 0;
946}
947
948int RouteController::modifyPhysicalNetworkPermission(unsigned netId, const char* interface,
949                                                     Permission oldPermission,
950                                                     Permission newPermission) {
951    // Add the new rules before deleting the old ones, to avoid race conditions.
952    if (int ret = modifyPhysicalNetwork(netId, interface, newPermission, ACTION_ADD)) {
953        return ret;
954    }
955    return modifyPhysicalNetwork(netId, interface, oldPermission, ACTION_DEL);
956}
957
958int RouteController::addUsersToVirtualNetwork(unsigned netId, const char* interface, bool secure,
959                                              const UidRanges& uidRanges) {
960    return modifyVirtualNetwork(netId, interface, uidRanges, secure, ACTION_ADD,
961                                !MODIFY_NON_UID_BASED_RULES);
962}
963
964int RouteController::removeUsersFromVirtualNetwork(unsigned netId, const char* interface,
965                                                   bool secure, const UidRanges& uidRanges) {
966    return modifyVirtualNetwork(netId, interface, uidRanges, secure, ACTION_DEL,
967                                !MODIFY_NON_UID_BASED_RULES);
968}
969
970int RouteController::addInterfaceToDefaultNetwork(const char* interface, Permission permission) {
971    return modifyDefaultNetwork(RTM_NEWRULE, interface, permission);
972}
973
974int RouteController::removeInterfaceFromDefaultNetwork(const char* interface,
975                                                       Permission permission) {
976    return modifyDefaultNetwork(RTM_DELRULE, interface, permission);
977}
978
979int RouteController::addRoute(const char* interface, const char* destination, const char* nexthop,
980                              TableType tableType) {
981    return modifyRoute(RTM_NEWROUTE, interface, destination, nexthop, tableType);
982}
983
984int RouteController::removeRoute(const char* interface, const char* destination,
985                                 const char* nexthop, TableType tableType) {
986    return modifyRoute(RTM_DELROUTE, interface, destination, nexthop, tableType);
987}
988
989int RouteController::enableTethering(const char* inputInterface, const char* outputInterface) {
990    return modifyTetheredNetwork(RTM_NEWRULE, inputInterface, outputInterface);
991}
992
993int RouteController::disableTethering(const char* inputInterface, const char* outputInterface) {
994    return modifyTetheredNetwork(RTM_DELRULE, inputInterface, outputInterface);
995}
996
997int RouteController::addVirtualNetworkFallthrough(unsigned vpnNetId, const char* physicalInterface,
998                                                  Permission permission) {
999    return modifyVpnFallthroughRule(RTM_NEWRULE, vpnNetId, physicalInterface, permission);
1000}
1001
1002int RouteController::removeVirtualNetworkFallthrough(unsigned vpnNetId,
1003                                                     const char* physicalInterface,
1004                                                     Permission permission) {
1005    return modifyVpnFallthroughRule(RTM_DELRULE, vpnNetId, physicalInterface, permission);
1006}
1007