RouteController.cpp revision 5ad4e98f7b566ffde39491ee4e80d4a15507f053
1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "RouteController.h"
18
19#include "Fwmark.h"
20#include "UidRanges.h"
21#include "DummyNetwork.h"
22
23#define LOG_TAG "Netd"
24#include "log/log.h"
25#include "logwrap/logwrap.h"
26#include "netutils/ifc.h"
27#include "resolv_netid.h"
28
29#include <arpa/inet.h>
30#include <fcntl.h>
31#include <linux/fib_rules.h>
32#include <map>
33#include <net/if.h>
34#include <sys/stat.h>
35
36namespace {
37
38// BEGIN CONSTANTS --------------------------------------------------------------------------------
39
40const uint32_t RULE_PRIORITY_VPN_OVERRIDE_SYSTEM = 10000;
41const uint32_t RULE_PRIORITY_VPN_OUTPUT_TO_LOCAL = 11000;
42const uint32_t RULE_PRIORITY_SECURE_VPN          = 12000;
43const uint32_t RULE_PRIORITY_EXPLICIT_NETWORK    = 13000;
44const uint32_t RULE_PRIORITY_OUTPUT_INTERFACE    = 14000;
45const uint32_t RULE_PRIORITY_LEGACY_SYSTEM       = 15000;
46const uint32_t RULE_PRIORITY_LEGACY_NETWORK      = 16000;
47const uint32_t RULE_PRIORITY_LOCAL_NETWORK       = 17000;
48const uint32_t RULE_PRIORITY_TETHERING           = 18000;
49const uint32_t RULE_PRIORITY_IMPLICIT_NETWORK    = 19000;
50const uint32_t RULE_PRIORITY_BYPASSABLE_VPN      = 20000;
51const uint32_t RULE_PRIORITY_VPN_FALLTHROUGH     = 21000;
52const uint32_t RULE_PRIORITY_DEFAULT_NETWORK     = 22000;
53const uint32_t RULE_PRIORITY_DIRECTLY_CONNECTED  = 23000;
54const uint32_t RULE_PRIORITY_UNREACHABLE         = 32000;
55
56const uint32_t ROUTE_TABLE_LOCAL_NETWORK  = 97;
57const uint32_t ROUTE_TABLE_LEGACY_NETWORK = 98;
58const uint32_t ROUTE_TABLE_LEGACY_SYSTEM  = 99;
59
60const char* const ROUTE_TABLE_NAME_LOCAL_NETWORK  = "local_network";
61const char* const ROUTE_TABLE_NAME_LEGACY_NETWORK = "legacy_network";
62const char* const ROUTE_TABLE_NAME_LEGACY_SYSTEM  = "legacy_system";
63
64const char* const ROUTE_TABLE_NAME_LOCAL = "local";
65const char* const ROUTE_TABLE_NAME_MAIN  = "main";
66
67// TODO: These values aren't defined by the Linux kernel, because our UID routing changes are not
68// upstream (yet?), so we can't just pick them up from kernel headers. When (if?) the changes make
69// it upstream, we'll remove this and rely on the kernel header values. For now, add a static assert
70// that will warn us if upstream has given these values some other meaning.
71const uint16_t FRA_UID_START = 18;
72const uint16_t FRA_UID_END   = 19;
73static_assert(FRA_UID_START > FRA_MAX,
74             "Android-specific FRA_UID_{START,END} values also assigned in Linux uapi. "
75             "Check that these values match what the kernel does and then update this assertion.");
76
77const uint16_t NETLINK_REQUEST_FLAGS = NLM_F_REQUEST | NLM_F_ACK;
78const uint16_t NETLINK_CREATE_REQUEST_FLAGS = NETLINK_REQUEST_FLAGS | NLM_F_CREATE | NLM_F_EXCL;
79
80const sockaddr_nl NETLINK_ADDRESS = {AF_NETLINK, 0, 0, 0};
81
82const uint8_t AF_FAMILIES[] = {AF_INET, AF_INET6};
83
84const char* const IP_VERSIONS[] = {"-4", "-6"};
85
86const uid_t UID_ROOT = 0;
87const char* const IIF_LOOPBACK = "lo";
88const char* const IIF_NONE = NULL;
89const char* const OIF_NONE = NULL;
90const bool ACTION_ADD = true;
91const bool ACTION_DEL = false;
92const bool MODIFY_NON_UID_BASED_RULES = true;
93
94const char* const RT_TABLES_PATH = "/data/misc/net/rt_tables";
95const int RT_TABLES_FLAGS = O_CREAT | O_TRUNC | O_WRONLY | O_NOFOLLOW | O_CLOEXEC;
96const mode_t RT_TABLES_MODE = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;  // mode 0644, rw-r--r--
97
98const unsigned ROUTE_FLUSH_ATTEMPTS = 2;
99
100// Avoids "non-constant-expression cannot be narrowed from type 'unsigned int' to 'unsigned short'"
101// warnings when using RTA_LENGTH(x) inside static initializers (even when x is already uint16_t).
102constexpr uint16_t U16_RTA_LENGTH(uint16_t x) {
103    return RTA_LENGTH(x);
104}
105
106// These are practically const, but can't be declared so, because they are used to initialize
107// non-const pointers ("void* iov_base") in iovec arrays.
108rtattr FRATTR_PRIORITY  = { U16_RTA_LENGTH(sizeof(uint32_t)), FRA_PRIORITY };
109rtattr FRATTR_TABLE     = { U16_RTA_LENGTH(sizeof(uint32_t)), FRA_TABLE };
110rtattr FRATTR_FWMARK    = { U16_RTA_LENGTH(sizeof(uint32_t)), FRA_FWMARK };
111rtattr FRATTR_FWMASK    = { U16_RTA_LENGTH(sizeof(uint32_t)), FRA_FWMASK };
112rtattr FRATTR_UID_START = { U16_RTA_LENGTH(sizeof(uid_t)),    FRA_UID_START };
113rtattr FRATTR_UID_END   = { U16_RTA_LENGTH(sizeof(uid_t)),    FRA_UID_END };
114
115rtattr RTATTR_TABLE     = { U16_RTA_LENGTH(sizeof(uint32_t)), RTA_TABLE };
116rtattr RTATTR_OIF       = { U16_RTA_LENGTH(sizeof(uint32_t)), RTA_OIF };
117
118uint8_t PADDING_BUFFER[RTA_ALIGNTO] = {0, 0, 0, 0};
119
120// END CONSTANTS ----------------------------------------------------------------------------------
121
122// No locks needed because RouteController is accessed only from one thread (in CommandListener).
123std::map<std::string, uint32_t> interfaceToTable;
124
125uint32_t getRouteTableForInterface(const char* interface) {
126    uint32_t index = if_nametoindex(interface);
127    if (index) {
128        index += RouteController::ROUTE_TABLE_OFFSET_FROM_INDEX;
129        interfaceToTable[interface] = index;
130        return index;
131    }
132    // If the interface goes away if_nametoindex() will return 0 but we still need to know
133    // the index so we can remove the rules and routes.
134    auto iter = interfaceToTable.find(interface);
135    if (iter == interfaceToTable.end()) {
136        ALOGE("cannot find interface %s", interface);
137        return RT_TABLE_UNSPEC;
138    }
139    return iter->second;
140}
141
142void addTableName(uint32_t table, const std::string& name, std::string* contents) {
143    char tableString[UINT32_STRLEN];
144    snprintf(tableString, sizeof(tableString), "%u", table);
145    *contents += tableString;
146    *contents += " ";
147    *contents += name;
148    *contents += "\n";
149}
150
151// Doesn't return success/failure as the file is optional; it's okay if we fail to update it.
152void updateTableNamesFile() {
153    std::string contents;
154
155    addTableName(RT_TABLE_LOCAL, ROUTE_TABLE_NAME_LOCAL, &contents);
156    addTableName(RT_TABLE_MAIN,  ROUTE_TABLE_NAME_MAIN,  &contents);
157
158    addTableName(ROUTE_TABLE_LOCAL_NETWORK,  ROUTE_TABLE_NAME_LOCAL_NETWORK,  &contents);
159    addTableName(ROUTE_TABLE_LEGACY_NETWORK, ROUTE_TABLE_NAME_LEGACY_NETWORK, &contents);
160    addTableName(ROUTE_TABLE_LEGACY_SYSTEM,  ROUTE_TABLE_NAME_LEGACY_SYSTEM,  &contents);
161
162    for (const auto& entry : interfaceToTable) {
163        addTableName(entry.second, entry.first, &contents);
164    }
165
166    int fd = open(RT_TABLES_PATH, RT_TABLES_FLAGS, RT_TABLES_MODE);
167    if (fd == -1) {
168        ALOGE("failed to create %s (%s)", RT_TABLES_PATH, strerror(errno));
169        return;
170    }
171    // File creation is affected by umask, so make sure the right mode bits are set.
172    if (fchmod(fd, RT_TABLES_MODE) == -1) {
173        ALOGE("failed to set mode 0%o on %s (%s)", RT_TABLES_MODE, RT_TABLES_PATH, strerror(errno));
174    }
175    ssize_t bytesWritten = write(fd, contents.data(), contents.size());
176    if (bytesWritten != static_cast<ssize_t>(contents.size())) {
177        ALOGE("failed to write to %s (%zd vs %zu bytes) (%s)", RT_TABLES_PATH, bytesWritten,
178              contents.size(), strerror(errno));
179    }
180    close(fd);
181}
182
183// Sends a netlink request and expects an ack.
184// |iov| is an array of struct iovec that contains the netlink message payload.
185// The netlink header is generated by this function based on |action| and |flags|.
186// Returns -errno if there was an error or if the kernel reported an error.
187WARN_UNUSED_RESULT int sendNetlinkRequest(uint16_t action, uint16_t flags, iovec* iov, int iovlen) {
188    nlmsghdr nlmsg = {
189        .nlmsg_type = action,
190        .nlmsg_flags = flags,
191    };
192    iov[0].iov_base = &nlmsg;
193    iov[0].iov_len = sizeof(nlmsg);
194    for (int i = 0; i < iovlen; ++i) {
195        nlmsg.nlmsg_len += iov[i].iov_len;
196    }
197
198    int ret;
199    struct {
200        nlmsghdr msg;
201        nlmsgerr err;
202    } response;
203
204    int sock = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE);
205    if (sock != -1 &&
206            connect(sock, reinterpret_cast<const sockaddr*>(&NETLINK_ADDRESS),
207                    sizeof(NETLINK_ADDRESS)) != -1 &&
208            writev(sock, iov, iovlen) != -1 &&
209            (ret = recv(sock, &response, sizeof(response), 0)) != -1) {
210        if (ret == sizeof(response)) {
211            ret = response.err.error;  // Netlink errors are negative errno.
212            if (ret) {
213                ALOGE("netlink response contains error (%s)", strerror(-ret));
214            }
215        } else {
216            ALOGE("bad netlink response message size (%d != %zu)", ret, sizeof(response));
217            ret = -EBADMSG;
218        }
219    } else {
220        ALOGE("netlink socket/connect/writev/recv failed (%s)", strerror(errno));
221        ret = -errno;
222    }
223
224    if (sock != -1) {
225        close(sock);
226    }
227
228    return ret;
229}
230
231// Returns 0 on success or negative errno on failure.
232int padInterfaceName(const char* input, char* name, size_t* length, uint16_t* padding) {
233    if (!input) {
234        *length = 0;
235        *padding = 0;
236        return 0;
237    }
238    *length = strlcpy(name, input, IFNAMSIZ) + 1;
239    if (*length > IFNAMSIZ) {
240        ALOGE("interface name too long (%zu > %u)", *length, IFNAMSIZ);
241        return -ENAMETOOLONG;
242    }
243    *padding = RTA_SPACE(*length) - RTA_LENGTH(*length);
244    return 0;
245}
246
247// Adds or removes a routing rule for IPv4 and IPv6.
248//
249// + If |table| is non-zero, the rule points at the specified routing table. Otherwise, the rule
250//   returns ENETUNREACH.
251// + If |mask| is non-zero, the rule matches the specified fwmark and mask. Otherwise, |fwmark| is
252//   ignored.
253// + If |iif| is non-NULL, the rule matches the specified incoming interface.
254// + If |oif| is non-NULL, the rule matches the specified outgoing interface.
255// + If |uidStart| and |uidEnd| are not INVALID_UID, the rule matches packets from UIDs in that
256//   range (inclusive). Otherwise, the rule matches packets from all UIDs.
257//
258// Returns 0 on success or negative errno on failure.
259WARN_UNUSED_RESULT int modifyIpRule(uint16_t action, uint32_t priority, uint32_t table,
260                                    uint32_t fwmark, uint32_t mask, const char* iif,
261                                    const char* oif, uid_t uidStart, uid_t uidEnd) {
262    // Ensure that if you set a bit in the fwmark, it's not being ignored by the mask.
263    if (fwmark & ~mask) {
264        ALOGE("mask 0x%x does not select all the bits set in fwmark 0x%x", mask, fwmark);
265        return -ERANGE;
266    }
267
268    // Interface names must include exactly one terminating NULL and be properly padded, or older
269    // kernels will refuse to delete rules.
270    char iifName[IFNAMSIZ], oifName[IFNAMSIZ];
271    size_t iifLength, oifLength;
272    uint16_t iifPadding, oifPadding;
273    if (int ret = padInterfaceName(iif, iifName, &iifLength, &iifPadding)) {
274        return ret;
275    }
276    if (int ret = padInterfaceName(oif, oifName, &oifLength, &oifPadding)) {
277        return ret;
278    }
279
280    // Either both start and end UID must be specified, or neither.
281    if ((uidStart == INVALID_UID) != (uidEnd == INVALID_UID)) {
282        ALOGE("incompatible start and end UIDs (%u vs %u)", uidStart, uidEnd);
283        return -EUSERS;
284    }
285    bool isUidRule = (uidStart != INVALID_UID);
286
287    // Assemble a rule request and put it in an array of iovec structures.
288    fib_rule_hdr rule = {
289        .action = static_cast<uint8_t>(table != RT_TABLE_UNSPEC ? FR_ACT_TO_TBL :
290                                                                  FR_ACT_UNREACHABLE),
291    };
292
293    rtattr fraIifName = { U16_RTA_LENGTH(iifLength), FRA_IIFNAME };
294    rtattr fraOifName = { U16_RTA_LENGTH(oifLength), FRA_OIFNAME };
295
296    iovec iov[] = {
297        { NULL,              0 },
298        { &rule,             sizeof(rule) },
299        { &FRATTR_PRIORITY,  sizeof(FRATTR_PRIORITY) },
300        { &priority,         sizeof(priority) },
301        { &FRATTR_TABLE,     table != RT_TABLE_UNSPEC ? sizeof(FRATTR_TABLE) : 0 },
302        { &table,            table != RT_TABLE_UNSPEC ? sizeof(table) : 0 },
303        { &FRATTR_FWMARK,    mask ? sizeof(FRATTR_FWMARK) : 0 },
304        { &fwmark,           mask ? sizeof(fwmark) : 0 },
305        { &FRATTR_FWMASK,    mask ? sizeof(FRATTR_FWMASK) : 0 },
306        { &mask,             mask ? sizeof(mask) : 0 },
307        { &FRATTR_UID_START, isUidRule ? sizeof(FRATTR_UID_START) : 0 },
308        { &uidStart,         isUidRule ? sizeof(uidStart) : 0 },
309        { &FRATTR_UID_END,   isUidRule ? sizeof(FRATTR_UID_END) : 0 },
310        { &uidEnd,           isUidRule ? sizeof(uidEnd) : 0 },
311        { &fraIifName,       iif != IIF_NONE ? sizeof(fraIifName) : 0 },
312        { iifName,           iifLength },
313        { PADDING_BUFFER,    iifPadding },
314        { &fraOifName,       oif != OIF_NONE ? sizeof(fraOifName) : 0 },
315        { oifName,           oifLength },
316        { PADDING_BUFFER,    oifPadding },
317    };
318
319    uint16_t flags = (action == RTM_NEWRULE) ? NETLINK_CREATE_REQUEST_FLAGS : NETLINK_REQUEST_FLAGS;
320    for (size_t i = 0; i < ARRAY_SIZE(AF_FAMILIES); ++i) {
321        rule.family = AF_FAMILIES[i];
322        if (int ret = sendNetlinkRequest(action, flags, iov, ARRAY_SIZE(iov))) {
323            return ret;
324        }
325    }
326
327    return 0;
328}
329
330WARN_UNUSED_RESULT int modifyIpRule(uint16_t action, uint32_t priority, uint32_t table,
331                                    uint32_t fwmark, uint32_t mask) {
332    return modifyIpRule(action, priority, table, fwmark, mask, IIF_NONE, OIF_NONE, INVALID_UID,
333                        INVALID_UID);
334}
335
336// Adds or deletes an IPv4 or IPv6 route.
337// Returns 0 on success or negative errno on failure.
338WARN_UNUSED_RESULT int modifyIpRoute(uint16_t action, uint32_t table, const char* interface,
339                                     const char* destination, const char* nexthop) {
340    // At least the destination must be non-null.
341    if (!destination) {
342        ALOGE("null destination");
343        return -EFAULT;
344    }
345
346    // Parse the prefix.
347    uint8_t rawAddress[sizeof(in6_addr)];
348    uint8_t family;
349    uint8_t prefixLength;
350    int rawLength = parsePrefix(destination, &family, rawAddress, sizeof(rawAddress),
351                                &prefixLength);
352    if (rawLength < 0) {
353        ALOGE("parsePrefix failed for destination %s (%s)", destination, strerror(-rawLength));
354        return rawLength;
355    }
356
357    if (static_cast<size_t>(rawLength) > sizeof(rawAddress)) {
358        ALOGE("impossible! address too long (%d vs %zu)", rawLength, sizeof(rawAddress));
359        return -ENOBUFS;  // Cannot happen; parsePrefix only supports IPv4 and IPv6.
360    }
361
362    uint8_t type = RTN_UNICAST;
363    uint32_t ifindex;
364    uint8_t rawNexthop[sizeof(in6_addr)];
365
366    if (nexthop && !strcmp(nexthop, "unreachable")) {
367        type = RTN_UNREACHABLE;
368        // 'interface' is likely non-NULL, as the caller (modifyRoute()) likely used it to lookup
369        // the table number. But it's an error to specify an interface ("dev ...") or a nexthop for
370        // unreachable routes, so nuke them. (IPv6 allows them to be specified; IPv4 doesn't.)
371        interface = OIF_NONE;
372        nexthop = NULL;
373    } else if (nexthop && !strcmp(nexthop, "throw")) {
374        type = RTN_THROW;
375        interface = OIF_NONE;
376        nexthop = NULL;
377    } else {
378        // If an interface was specified, find the ifindex.
379        if (interface != OIF_NONE) {
380            ifindex = if_nametoindex(interface);
381            if (!ifindex) {
382                ALOGE("cannot find interface %s", interface);
383                return -ENODEV;
384            }
385        }
386
387        // If a nexthop was specified, parse it as the same family as the prefix.
388        if (nexthop && inet_pton(family, nexthop, rawNexthop) <= 0) {
389            ALOGE("inet_pton failed for nexthop %s", nexthop);
390            return -EINVAL;
391        }
392    }
393
394    // Assemble a rtmsg and put it in an array of iovec structures.
395    rtmsg route = {
396        .rtm_protocol = RTPROT_STATIC,
397        .rtm_type = type,
398        .rtm_family = family,
399        .rtm_dst_len = prefixLength,
400        .rtm_scope = static_cast<uint8_t>(nexthop ? RT_SCOPE_UNIVERSE : RT_SCOPE_LINK),
401    };
402
403    rtattr rtaDst     = { U16_RTA_LENGTH(rawLength), RTA_DST };
404    rtattr rtaGateway = { U16_RTA_LENGTH(rawLength), RTA_GATEWAY };
405
406    iovec iov[] = {
407        { NULL,          0 },
408        { &route,        sizeof(route) },
409        { &RTATTR_TABLE, sizeof(RTATTR_TABLE) },
410        { &table,        sizeof(table) },
411        { &rtaDst,       sizeof(rtaDst) },
412        { rawAddress,    static_cast<size_t>(rawLength) },
413        { &RTATTR_OIF,   interface != OIF_NONE ? sizeof(RTATTR_OIF) : 0 },
414        { &ifindex,      interface != OIF_NONE ? sizeof(ifindex) : 0 },
415        { &rtaGateway,   nexthop ? sizeof(rtaGateway) : 0 },
416        { rawNexthop,    nexthop ? static_cast<size_t>(rawLength) : 0 },
417    };
418
419    uint16_t flags = (action == RTM_NEWROUTE) ? NETLINK_CREATE_REQUEST_FLAGS :
420                                                NETLINK_REQUEST_FLAGS;
421    return sendNetlinkRequest(action, flags, iov, ARRAY_SIZE(iov));
422}
423
424// An iptables rule to mark incoming packets on a network with the netId of the network.
425//
426// This is so that the kernel can:
427// + Use the right fwmark for (and thus correctly route) replies (e.g.: TCP RST, ICMP errors, ping
428//   replies, SYN-ACKs, etc).
429// + Mark sockets that accept connections from this interface so that the connection stays on the
430//   same interface.
431WARN_UNUSED_RESULT int modifyIncomingPacketMark(unsigned netId, const char* interface,
432                                                Permission permission, bool add) {
433    Fwmark fwmark;
434
435    fwmark.netId = netId;
436    fwmark.explicitlySelected = true;
437    fwmark.protectedFromVpn = true;
438    fwmark.permission = permission;
439
440    char markString[UINT32_HEX_STRLEN];
441    snprintf(markString, sizeof(markString), "0x%x", fwmark.intValue);
442
443    if (execIptables(V4V6, "-t", "mangle", add ? "-A" : "-D", "INPUT", "-i", interface, "-j",
444                     "MARK", "--set-mark", markString, NULL)) {
445        ALOGE("failed to change iptables rule that sets incoming packet mark");
446        return -EREMOTEIO;
447    }
448
449    return 0;
450}
451
452// A rule to route responses to the local network forwarded via the VPN.
453//
454// When a VPN is in effect, packets from the local network to upstream networks are forwarded into
455// the VPN's tunnel interface. When the VPN forwards the responses, they emerge out of the tunnel.
456WARN_UNUSED_RESULT int modifyVpnOutputToLocalRule(const char* vpnInterface, bool add) {
457    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_VPN_OUTPUT_TO_LOCAL,
458                        ROUTE_TABLE_LOCAL_NETWORK, MARK_UNSET, MARK_UNSET, vpnInterface, OIF_NONE,
459                        INVALID_UID, INVALID_UID);
460}
461
462// A rule to route all traffic from a given set of UIDs to go over the VPN.
463//
464// Notice that this rule doesn't use the netId. I.e., no matter what netId the user's socket may
465// have, if they are subject to this VPN, their traffic has to go through it. Allows the traffic to
466// bypass the VPN if the protectedFromVpn bit is set.
467WARN_UNUSED_RESULT int modifyVpnUidRangeRule(uint32_t table, uid_t uidStart, uid_t uidEnd,
468                                             bool secure, bool add) {
469    Fwmark fwmark;
470    Fwmark mask;
471
472    fwmark.protectedFromVpn = false;
473    mask.protectedFromVpn = true;
474
475    uint32_t priority;
476
477    if (secure) {
478        priority = RULE_PRIORITY_SECURE_VPN;
479    } else {
480        priority = RULE_PRIORITY_BYPASSABLE_VPN;
481
482        fwmark.explicitlySelected = false;
483        mask.explicitlySelected = true;
484    }
485
486    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, priority, table, fwmark.intValue,
487                        mask.intValue, IIF_LOOPBACK, OIF_NONE, uidStart, uidEnd);
488}
489
490// A rule to allow system apps to send traffic over this VPN even if they are not part of the target
491// set of UIDs.
492//
493// This is needed for DnsProxyListener to correctly resolve a request for a user who is in the
494// target set, but where the DnsProxyListener itself is not.
495WARN_UNUSED_RESULT int modifyVpnSystemPermissionRule(unsigned netId, uint32_t table, bool secure,
496                                                     bool add) {
497    Fwmark fwmark;
498    Fwmark mask;
499
500    fwmark.netId = netId;
501    mask.netId = FWMARK_NET_ID_MASK;
502
503    fwmark.permission = PERMISSION_SYSTEM;
504    mask.permission = PERMISSION_SYSTEM;
505
506    uint32_t priority = secure ? RULE_PRIORITY_SECURE_VPN : RULE_PRIORITY_BYPASSABLE_VPN;
507
508    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, priority, table, fwmark.intValue,
509                        mask.intValue);
510}
511
512// A rule to route traffic based on an explicitly chosen network.
513//
514// Supports apps that use the multinetwork APIs to restrict their traffic to a network.
515//
516// Even though we check permissions at the time we set a netId into the fwmark of a socket, we need
517// to check it again in the rules here, because a network's permissions may have been updated via
518// modifyNetworkPermission().
519WARN_UNUSED_RESULT int modifyExplicitNetworkRule(unsigned netId, uint32_t table,
520                                                 Permission permission, uid_t uidStart,
521                                                 uid_t uidEnd, bool add) {
522    Fwmark fwmark;
523    Fwmark mask;
524
525    fwmark.netId = netId;
526    mask.netId = FWMARK_NET_ID_MASK;
527
528    fwmark.explicitlySelected = true;
529    mask.explicitlySelected = true;
530
531    fwmark.permission = permission;
532    mask.permission = permission;
533
534    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_EXPLICIT_NETWORK, table,
535                        fwmark.intValue, mask.intValue, IIF_NONE, OIF_NONE, uidStart, uidEnd);
536}
537
538// A rule to route traffic based on a chosen outgoing interface.
539//
540// Supports apps that use SO_BINDTODEVICE or IP_PKTINFO options and the kernel that already knows
541// the outgoing interface (typically for link-local communications).
542WARN_UNUSED_RESULT int modifyOutputInterfaceRule(const char* interface, uint32_t table,
543                                                 Permission permission, uid_t uidStart,
544                                                 uid_t uidEnd, bool add) {
545    Fwmark fwmark;
546    Fwmark mask;
547
548    fwmark.permission = permission;
549    mask.permission = permission;
550
551    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_OUTPUT_INTERFACE, table,
552                        fwmark.intValue, mask.intValue, IIF_NONE, interface, uidStart, uidEnd);
553}
554
555// A rule to route traffic based on the chosen network.
556//
557// This is for sockets that have not explicitly requested a particular network, but have been
558// bound to one when they called connect(). This ensures that sockets connected on a particular
559// network stay on that network even if the default network changes.
560WARN_UNUSED_RESULT int modifyImplicitNetworkRule(unsigned netId, uint32_t table,
561                                                 Permission permission, bool add) {
562    Fwmark fwmark;
563    Fwmark mask;
564
565    fwmark.netId = netId;
566    mask.netId = FWMARK_NET_ID_MASK;
567
568    fwmark.explicitlySelected = false;
569    mask.explicitlySelected = true;
570
571    fwmark.permission = permission;
572    mask.permission = permission;
573
574    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_IMPLICIT_NETWORK, table,
575                        fwmark.intValue, mask.intValue);
576}
577
578// A rule to enable split tunnel VPNs.
579//
580// If a packet with a VPN's netId doesn't find a route in the VPN's routing table, it's allowed to
581// go over the default network, provided it wasn't explicitly restricted to the VPN and has the
582// permissions required by the default network.
583WARN_UNUSED_RESULT int modifyVpnFallthroughRule(uint16_t action, unsigned vpnNetId,
584                                                const char* physicalInterface,
585                                                Permission permission) {
586    uint32_t table = getRouteTableForInterface(physicalInterface);
587    if (table == RT_TABLE_UNSPEC) {
588        return -ESRCH;
589    }
590
591    Fwmark fwmark;
592    Fwmark mask;
593
594    fwmark.netId = vpnNetId;
595    mask.netId = FWMARK_NET_ID_MASK;
596
597    fwmark.explicitlySelected = false;
598    mask.explicitlySelected = true;
599
600    fwmark.permission = permission;
601    mask.permission = permission;
602
603    return modifyIpRule(action, RULE_PRIORITY_VPN_FALLTHROUGH, table, fwmark.intValue,
604                        mask.intValue);
605}
606
607// Add rules to allow legacy routes added through the requestRouteToHost() API.
608WARN_UNUSED_RESULT int addLegacyRouteRules() {
609    Fwmark fwmark;
610    Fwmark mask;
611
612    fwmark.explicitlySelected = false;
613    mask.explicitlySelected = true;
614
615    // Rules to allow legacy routes to override the default network.
616    if (int ret = modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_LEGACY_SYSTEM, ROUTE_TABLE_LEGACY_SYSTEM,
617                               fwmark.intValue, mask.intValue)) {
618        return ret;
619    }
620    if (int ret = modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_LEGACY_NETWORK,
621                               ROUTE_TABLE_LEGACY_NETWORK, fwmark.intValue, mask.intValue)) {
622        return ret;
623    }
624
625    fwmark.permission = PERMISSION_SYSTEM;
626    mask.permission = PERMISSION_SYSTEM;
627
628    // A rule to allow legacy routes from system apps to override VPNs.
629    return modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_VPN_OVERRIDE_SYSTEM, ROUTE_TABLE_LEGACY_SYSTEM,
630                        fwmark.intValue, mask.intValue);
631}
632
633// Add rules to lookup the local network when specified explicitly or otherwise.
634WARN_UNUSED_RESULT int addLocalNetworkRules(unsigned localNetId) {
635    if (int ret = modifyExplicitNetworkRule(localNetId, ROUTE_TABLE_LOCAL_NETWORK, PERMISSION_NONE,
636                                            INVALID_UID, INVALID_UID, ACTION_ADD)) {
637        return ret;
638    }
639
640    Fwmark fwmark;
641    Fwmark mask;
642
643    fwmark.explicitlySelected = false;
644    mask.explicitlySelected = true;
645
646    return modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_LOCAL_NETWORK, ROUTE_TABLE_LOCAL_NETWORK,
647                        fwmark.intValue, mask.intValue);
648}
649
650int configureDummyNetwork() {
651    const char *interface = DummyNetwork::INTERFACE_NAME;
652    uint32_t table = getRouteTableForInterface(interface);
653    if (table == RT_TABLE_UNSPEC) {
654        // getRouteTableForInterface has already looged an error.
655        return -ESRCH;
656    }
657
658    ifc_init();
659    int ret = ifc_up(interface);
660    ifc_close();
661    if (ret) {
662        ALOGE("Can't bring up %s: %s", interface, strerror(errno));
663        return -errno;
664    }
665
666    if ((ret = modifyOutputInterfaceRule(interface, table, PERMISSION_NONE,
667                                         INVALID_UID, INVALID_UID, ACTION_ADD))) {
668        ALOGE("Can't create oif rule for %s: %s", interface, strerror(-ret));
669        return ret;
670    }
671
672    if ((ret = modifyIpRoute(RTM_NEWROUTE, table, interface, "0.0.0.0/0", NULL))) {
673        ALOGE("Can't add IPv4 default route to %s: %s", interface, strerror(-ret));
674        return ret;
675    }
676
677    if ((ret = modifyIpRoute(RTM_NEWROUTE, table, interface, "::/0", NULL))) {
678        ALOGE("Can't add IPv6 default route to %s: %s", interface, strerror(-ret));
679        return ret;
680    }
681
682    return 0;
683}
684
685// Add a new rule to look up the 'main' table, with the same selectors as the "default network"
686// rule, but with a lower priority. We will never create routes in the main table; it should only be
687// used for directly-connected routes implicitly created by the kernel when adding IP addresses.
688// This is necessary, for example, when adding a route through a directly-connected gateway: in
689// order to add the route, there must already be a directly-connected route that covers the gateway.
690WARN_UNUSED_RESULT int addDirectlyConnectedRule() {
691    Fwmark fwmark;
692    Fwmark mask;
693
694    fwmark.netId = NETID_UNSET;
695    mask.netId = FWMARK_NET_ID_MASK;
696
697    return modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_DIRECTLY_CONNECTED, RT_TABLE_MAIN,
698                        fwmark.intValue, mask.intValue, IIF_NONE, OIF_NONE, UID_ROOT, UID_ROOT);
699}
700
701// Add an explicit unreachable rule close to the end of the prioriy list to make it clear that
702// relying on the kernel-default "from all lookup main" rule at priority 32766 is not intended
703// behaviour. We do flush the kernel-default rules at startup, but having an explicit unreachable
704// rule will hopefully make things even clearer.
705WARN_UNUSED_RESULT int addUnreachableRule() {
706    return modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_UNREACHABLE, RT_TABLE_UNSPEC, MARK_UNSET,
707                        MARK_UNSET);
708}
709
710WARN_UNUSED_RESULT int modifyLocalNetwork(unsigned netId, const char* interface, bool add) {
711    if (int ret = modifyIncomingPacketMark(netId, interface, PERMISSION_NONE, add)) {
712        return ret;
713    }
714    return modifyOutputInterfaceRule(interface, ROUTE_TABLE_LOCAL_NETWORK, PERMISSION_NONE,
715                                     INVALID_UID, INVALID_UID, add);
716}
717
718WARN_UNUSED_RESULT int modifyPhysicalNetwork(unsigned netId, const char* interface,
719                                             Permission permission, bool add) {
720    uint32_t table = getRouteTableForInterface(interface);
721    if (table == RT_TABLE_UNSPEC) {
722        return -ESRCH;
723    }
724
725    if (int ret = modifyIncomingPacketMark(netId, interface, permission, add)) {
726        return ret;
727    }
728    if (int ret = modifyExplicitNetworkRule(netId, table, permission, INVALID_UID, INVALID_UID,
729                                            add)) {
730        return ret;
731    }
732    if (int ret = modifyOutputInterfaceRule(interface, table, permission, INVALID_UID, INVALID_UID,
733                                            add)) {
734        return ret;
735    }
736    return modifyImplicitNetworkRule(netId, table, permission, add);
737}
738
739WARN_UNUSED_RESULT int modifyVirtualNetwork(unsigned netId, const char* interface,
740                                            const UidRanges& uidRanges, bool secure, bool add,
741                                            bool modifyNonUidBasedRules) {
742    uint32_t table = getRouteTableForInterface(interface);
743    if (table == RT_TABLE_UNSPEC) {
744        return -ESRCH;
745    }
746
747    for (const UidRanges::Range& range : uidRanges.getRanges()) {
748        if (int ret = modifyVpnUidRangeRule(table, range.first, range.second, secure, add)) {
749            return ret;
750        }
751        if (int ret = modifyExplicitNetworkRule(netId, table, PERMISSION_NONE, range.first,
752                                                range.second, add)) {
753            return ret;
754        }
755        if (int ret = modifyOutputInterfaceRule(interface, table, PERMISSION_NONE, range.first,
756                                                range.second, add)) {
757            return ret;
758        }
759    }
760
761    if (modifyNonUidBasedRules) {
762        if (int ret = modifyIncomingPacketMark(netId, interface, PERMISSION_NONE, add)) {
763            return ret;
764        }
765        if (int ret = modifyVpnOutputToLocalRule(interface, add)) {
766            return ret;
767        }
768        if (int ret = modifyVpnSystemPermissionRule(netId, table, secure, add)) {
769            return ret;
770        }
771        return modifyExplicitNetworkRule(netId, table, PERMISSION_NONE, UID_ROOT, UID_ROOT, add);
772    }
773
774    return 0;
775}
776
777WARN_UNUSED_RESULT int modifyDefaultNetwork(uint16_t action, const char* interface,
778                                            Permission permission) {
779    uint32_t table = getRouteTableForInterface(interface);
780    if (table == RT_TABLE_UNSPEC) {
781        return -ESRCH;
782    }
783
784    Fwmark fwmark;
785    Fwmark mask;
786
787    fwmark.netId = NETID_UNSET;
788    mask.netId = FWMARK_NET_ID_MASK;
789
790    fwmark.permission = permission;
791    mask.permission = permission;
792
793    return modifyIpRule(action, RULE_PRIORITY_DEFAULT_NETWORK, table, fwmark.intValue,
794                        mask.intValue);
795}
796
797WARN_UNUSED_RESULT int modifyTetheredNetwork(uint16_t action, const char* inputInterface,
798                                             const char* outputInterface) {
799    uint32_t table = getRouteTableForInterface(outputInterface);
800    if (table == RT_TABLE_UNSPEC) {
801        return -ESRCH;
802    }
803
804    return modifyIpRule(action, RULE_PRIORITY_TETHERING, table, MARK_UNSET, MARK_UNSET,
805                        inputInterface, OIF_NONE, INVALID_UID, INVALID_UID);
806}
807
808// Returns 0 on success or negative errno on failure.
809WARN_UNUSED_RESULT int flushRules() {
810    for (size_t i = 0; i < ARRAY_SIZE(IP_VERSIONS); ++i) {
811        const char* argv[] = {
812            IP_PATH,
813            IP_VERSIONS[i],
814            "rule",
815            "flush",
816        };
817        if (android_fork_execvp(ARRAY_SIZE(argv), const_cast<char**>(argv), NULL, false, false)) {
818            ALOGE("failed to flush rules");
819            return -EREMOTEIO;
820        }
821    }
822    return 0;
823}
824
825// Adds or removes an IPv4 or IPv6 route to the specified table and, if it's a directly-connected
826// route, to the main table as well.
827// Returns 0 on success or negative errno on failure.
828WARN_UNUSED_RESULT int modifyRoute(uint16_t action, const char* interface, const char* destination,
829                                   const char* nexthop, RouteController::TableType tableType) {
830    uint32_t table;
831    switch (tableType) {
832        case RouteController::INTERFACE: {
833            table = getRouteTableForInterface(interface);
834            if (table == RT_TABLE_UNSPEC) {
835                return -ESRCH;
836            }
837            break;
838        }
839        case RouteController::LOCAL_NETWORK: {
840            table = ROUTE_TABLE_LOCAL_NETWORK;
841            break;
842        }
843        case RouteController::LEGACY_NETWORK: {
844            table = ROUTE_TABLE_LEGACY_NETWORK;
845            break;
846        }
847        case RouteController::LEGACY_SYSTEM: {
848            table = ROUTE_TABLE_LEGACY_SYSTEM;
849            break;
850        }
851    }
852
853    int ret = modifyIpRoute(action, table, interface, destination, nexthop);
854    // Trying to add a route that already exists shouldn't cause an error.
855    if (ret && !(action == RTM_NEWROUTE && ret == -EEXIST)) {
856        return ret;
857    }
858
859    return 0;
860}
861
862// Returns 0 on success or negative errno on failure.
863WARN_UNUSED_RESULT int flushRoutes(const char* interface) {
864    uint32_t table = getRouteTableForInterface(interface);
865    if (table == RT_TABLE_UNSPEC) {
866        return -ESRCH;
867    }
868
869    char tableString[UINT32_STRLEN];
870    snprintf(tableString, sizeof(tableString), "%u", table);
871
872    int ret = 0;
873    for (size_t i = 0; i < ARRAY_SIZE(IP_VERSIONS); ++i) {
874        const char* argv[] = {
875            IP_PATH,
876            IP_VERSIONS[i],
877            "route",
878            "flush",
879            "table",
880            tableString,
881        };
882
883        // A flush works by dumping routes and deleting each route as it's returned, and it can
884        // fail if something else deletes the route between the dump and the delete. This can
885        // happen, for example, if an interface goes down while we're trying to flush its routes.
886        // So try multiple times and only return an error if the last attempt fails.
887        //
888        // TODO: replace this with our own netlink code.
889        unsigned attempts = 0;
890        int err;
891        do {
892            err = android_fork_execvp(ARRAY_SIZE(argv), const_cast<char**>(argv),
893                                      NULL, false, false);
894            ++attempts;
895        } while (err != 0 && attempts < ROUTE_FLUSH_ATTEMPTS);
896        if (err) {
897            ALOGE("failed to flush %s routes in table %s after %d attempts",
898                  IP_VERSIONS[i], tableString, attempts);
899            ret = -EREMOTEIO;
900        }
901    }
902
903    // If we failed to flush routes, the caller may elect to keep this interface around, so keep
904    // track of its name.
905    if (!ret) {
906        interfaceToTable.erase(interface);
907    }
908
909    return ret;
910}
911
912}  // namespace
913
914int RouteController::Init(unsigned localNetId) {
915    if (int ret = flushRules()) {
916        return ret;
917    }
918    if (int ret = addLegacyRouteRules()) {
919        return ret;
920    }
921    if (int ret = addLocalNetworkRules(localNetId)) {
922        return ret;
923    }
924    if (int ret = addDirectlyConnectedRule()) {
925        return ret;
926    }
927    if (int ret = addUnreachableRule()) {
928        return ret;
929    }
930    // Don't complain if we can't add the dummy network, since not all devices support it.
931    configureDummyNetwork();
932
933    updateTableNamesFile();
934    return 0;
935}
936
937int RouteController::addInterfaceToLocalNetwork(unsigned netId, const char* interface) {
938    return modifyLocalNetwork(netId, interface, ACTION_ADD);
939}
940
941int RouteController::removeInterfaceFromLocalNetwork(unsigned netId, const char* interface) {
942    return modifyLocalNetwork(netId, interface, ACTION_DEL);
943}
944
945int RouteController::addInterfaceToPhysicalNetwork(unsigned netId, const char* interface,
946                                                   Permission permission) {
947    if (int ret = modifyPhysicalNetwork(netId, interface, permission, ACTION_ADD)) {
948        return ret;
949    }
950    updateTableNamesFile();
951    return 0;
952}
953
954int RouteController::removeInterfaceFromPhysicalNetwork(unsigned netId, const char* interface,
955                                                        Permission permission) {
956    if (int ret = modifyPhysicalNetwork(netId, interface, permission, ACTION_DEL)) {
957        return ret;
958    }
959    if (int ret = flushRoutes(interface)) {
960        return ret;
961    }
962    updateTableNamesFile();
963    return 0;
964}
965
966int RouteController::addInterfaceToVirtualNetwork(unsigned netId, const char* interface,
967                                                  bool secure, const UidRanges& uidRanges) {
968    if (int ret = modifyVirtualNetwork(netId, interface, uidRanges, secure, ACTION_ADD,
969                                       MODIFY_NON_UID_BASED_RULES)) {
970        return ret;
971    }
972    updateTableNamesFile();
973    return 0;
974}
975
976int RouteController::removeInterfaceFromVirtualNetwork(unsigned netId, const char* interface,
977                                                       bool secure, const UidRanges& uidRanges) {
978    if (int ret = modifyVirtualNetwork(netId, interface, uidRanges, secure, ACTION_DEL,
979                                       MODIFY_NON_UID_BASED_RULES)) {
980        return ret;
981    }
982    if (int ret = flushRoutes(interface)) {
983        return ret;
984    }
985    updateTableNamesFile();
986    return 0;
987}
988
989int RouteController::modifyPhysicalNetworkPermission(unsigned netId, const char* interface,
990                                                     Permission oldPermission,
991                                                     Permission newPermission) {
992    // Add the new rules before deleting the old ones, to avoid race conditions.
993    if (int ret = modifyPhysicalNetwork(netId, interface, newPermission, ACTION_ADD)) {
994        return ret;
995    }
996    return modifyPhysicalNetwork(netId, interface, oldPermission, ACTION_DEL);
997}
998
999int RouteController::addUsersToVirtualNetwork(unsigned netId, const char* interface, bool secure,
1000                                              const UidRanges& uidRanges) {
1001    return modifyVirtualNetwork(netId, interface, uidRanges, secure, ACTION_ADD,
1002                                !MODIFY_NON_UID_BASED_RULES);
1003}
1004
1005int RouteController::removeUsersFromVirtualNetwork(unsigned netId, const char* interface,
1006                                                   bool secure, const UidRanges& uidRanges) {
1007    return modifyVirtualNetwork(netId, interface, uidRanges, secure, ACTION_DEL,
1008                                !MODIFY_NON_UID_BASED_RULES);
1009}
1010
1011int RouteController::addInterfaceToDefaultNetwork(const char* interface, Permission permission) {
1012    return modifyDefaultNetwork(RTM_NEWRULE, interface, permission);
1013}
1014
1015int RouteController::removeInterfaceFromDefaultNetwork(const char* interface,
1016                                                       Permission permission) {
1017    return modifyDefaultNetwork(RTM_DELRULE, interface, permission);
1018}
1019
1020int RouteController::addRoute(const char* interface, const char* destination, const char* nexthop,
1021                              TableType tableType) {
1022    return modifyRoute(RTM_NEWROUTE, interface, destination, nexthop, tableType);
1023}
1024
1025int RouteController::removeRoute(const char* interface, const char* destination,
1026                                 const char* nexthop, TableType tableType) {
1027    return modifyRoute(RTM_DELROUTE, interface, destination, nexthop, tableType);
1028}
1029
1030int RouteController::enableTethering(const char* inputInterface, const char* outputInterface) {
1031    return modifyTetheredNetwork(RTM_NEWRULE, inputInterface, outputInterface);
1032}
1033
1034int RouteController::disableTethering(const char* inputInterface, const char* outputInterface) {
1035    return modifyTetheredNetwork(RTM_DELRULE, inputInterface, outputInterface);
1036}
1037
1038int RouteController::addVirtualNetworkFallthrough(unsigned vpnNetId, const char* physicalInterface,
1039                                                  Permission permission) {
1040    return modifyVpnFallthroughRule(RTM_NEWRULE, vpnNetId, physicalInterface, permission);
1041}
1042
1043int RouteController::removeVirtualNetworkFallthrough(unsigned vpnNetId,
1044                                                     const char* physicalInterface,
1045                                                     Permission permission) {
1046    return modifyVpnFallthroughRule(RTM_DELRULE, vpnNetId, physicalInterface, permission);
1047}
1048