RouteController.cpp revision de5d5df753dd35d852ac47a6174b06eacd0d5523
1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "RouteController.h"
18
19#include "Fwmark.h"
20#include "UidRanges.h"
21
22#define LOG_TAG "Netd"
23#include "log/log.h"
24#include "logwrap/logwrap.h"
25#include "resolv_netid.h"
26
27#include <arpa/inet.h>
28#include <fcntl.h>
29#include <linux/fib_rules.h>
30#include <map>
31#include <net/if.h>
32#include <sys/stat.h>
33
34namespace {
35
36// BEGIN CONSTANTS --------------------------------------------------------------------------------
37
38const uint32_t RULE_PRIORITY_VPN_OVERRIDE_SYSTEM = 10000;
39const uint32_t RULE_PRIORITY_VPN_OUTPUT_TO_LOCAL = 11000;
40const uint32_t RULE_PRIORITY_SECURE_VPN          = 12000;
41const uint32_t RULE_PRIORITY_EXPLICIT_NETWORK    = 13000;
42const uint32_t RULE_PRIORITY_OUTPUT_INTERFACE    = 14000;
43const uint32_t RULE_PRIORITY_LEGACY_SYSTEM       = 15000;
44const uint32_t RULE_PRIORITY_LEGACY_NETWORK      = 16000;
45const uint32_t RULE_PRIORITY_LOCAL_NETWORK       = 17000;
46const uint32_t RULE_PRIORITY_TETHERING           = 18000;
47const uint32_t RULE_PRIORITY_IMPLICIT_NETWORK    = 19000;
48const uint32_t RULE_PRIORITY_BYPASSABLE_VPN      = 20000;
49// const uint32_t RULE_PRIORITY_VPN_FALLTHROUGH     = 21000;
50const uint32_t RULE_PRIORITY_DEFAULT_NETWORK     = 22000;
51const uint32_t RULE_PRIORITY_DIRECTLY_CONNECTED  = 23000;
52const uint32_t RULE_PRIORITY_UNREACHABLE         = 24000;
53
54const uint32_t ROUTE_TABLE_LOCAL_NETWORK  = 97;
55const uint32_t ROUTE_TABLE_LEGACY_NETWORK = 98;
56const uint32_t ROUTE_TABLE_LEGACY_SYSTEM  = 99;
57
58const char* const ROUTE_TABLE_NAME_LOCAL_NETWORK  = "local_network";
59const char* const ROUTE_TABLE_NAME_LEGACY_NETWORK = "legacy_network";
60const char* const ROUTE_TABLE_NAME_LEGACY_SYSTEM  = "legacy_system";
61
62const char* const ROUTE_TABLE_NAME_LOCAL = "local";
63const char* const ROUTE_TABLE_NAME_MAIN  = "main";
64
65// TODO: These values aren't defined by the Linux kernel, because our UID routing changes are not
66// upstream (yet?), so we can't just pick them up from kernel headers. When (if?) the changes make
67// it upstream, we'll remove this and rely on the kernel header values. For now, add a static assert
68// that will warn us if upstream has given these values some other meaning.
69const uint16_t FRA_UID_START = 18;
70const uint16_t FRA_UID_END   = 19;
71static_assert(FRA_UID_START > FRA_MAX,
72             "Android-specific FRA_UID_{START,END} values also assigned in Linux uapi. "
73             "Check that these values match what the kernel does and then update this assertion.");
74
75const uint16_t NETLINK_REQUEST_FLAGS = NLM_F_REQUEST | NLM_F_ACK;
76const uint16_t NETLINK_CREATE_REQUEST_FLAGS = NETLINK_REQUEST_FLAGS | NLM_F_CREATE | NLM_F_EXCL;
77
78const sockaddr_nl NETLINK_ADDRESS = {AF_NETLINK, 0, 0, 0};
79
80const uint8_t AF_FAMILIES[] = {AF_INET, AF_INET6};
81
82const char* const IP_VERSIONS[] = {"-4", "-6"};
83
84const uid_t UID_ROOT = 0;
85const char* const IIF_NONE = NULL;
86const char* const OIF_NONE = NULL;
87const bool ACTION_ADD = true;
88const bool ACTION_DEL = false;
89const bool MODIFY_NON_UID_BASED_RULES = true;
90
91const char* const RT_TABLES_PATH = "/data/misc/net/rt_tables";
92const int RT_TABLES_FLAGS = O_CREAT | O_TRUNC | O_WRONLY | O_NOFOLLOW | O_CLOEXEC;
93const mode_t RT_TABLES_MODE = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;  // mode 0644, rw-r--r--
94
95// Avoids "non-constant-expression cannot be narrowed from type 'unsigned int' to 'unsigned short'"
96// warnings when using RTA_LENGTH(x) inside static initializers (even when x is already uint16_t).
97constexpr uint16_t U16_RTA_LENGTH(uint16_t x) {
98    return RTA_LENGTH(x);
99}
100
101// These are practically const, but can't be declared so, because they are used to initialize
102// non-const pointers ("void* iov_base") in iovec arrays.
103rtattr FRATTR_PRIORITY  = { U16_RTA_LENGTH(sizeof(uint32_t)), FRA_PRIORITY };
104rtattr FRATTR_TABLE     = { U16_RTA_LENGTH(sizeof(uint32_t)), FRA_TABLE };
105rtattr FRATTR_FWMARK    = { U16_RTA_LENGTH(sizeof(uint32_t)), FRA_FWMARK };
106rtattr FRATTR_FWMASK    = { U16_RTA_LENGTH(sizeof(uint32_t)), FRA_FWMASK };
107rtattr FRATTR_UID_START = { U16_RTA_LENGTH(sizeof(uid_t)),    FRA_UID_START };
108rtattr FRATTR_UID_END   = { U16_RTA_LENGTH(sizeof(uid_t)),    FRA_UID_END };
109
110rtattr RTATTR_TABLE     = { U16_RTA_LENGTH(sizeof(uint32_t)), RTA_TABLE };
111rtattr RTATTR_OIF       = { U16_RTA_LENGTH(sizeof(uint32_t)), RTA_OIF };
112
113uint8_t PADDING_BUFFER[RTA_ALIGNTO] = {0, 0, 0, 0};
114
115// END CONSTANTS ----------------------------------------------------------------------------------
116
117// No locks needed because RouteController is accessed only from one thread (in CommandListener).
118std::map<std::string, uint32_t> interfaceToTable;
119
120uint32_t getRouteTableForInterface(const char* interface) {
121    uint32_t index = if_nametoindex(interface);
122    if (index) {
123        index += RouteController::ROUTE_TABLE_OFFSET_FROM_INDEX;
124        interfaceToTable[interface] = index;
125        return index;
126    }
127    // If the interface goes away if_nametoindex() will return 0 but we still need to know
128    // the index so we can remove the rules and routes.
129    auto iter = interfaceToTable.find(interface);
130    if (iter == interfaceToTable.end()) {
131        ALOGE("cannot find interface %s", interface);
132        return RT_TABLE_UNSPEC;
133    }
134    return iter->second;
135}
136
137void addTableName(uint32_t table, const std::string& name, std::string* contents) {
138    char tableString[UINT32_STRLEN];
139    snprintf(tableString, sizeof(tableString), "%u", table);
140    *contents += tableString;
141    *contents += " ";
142    *contents += name;
143    *contents += "\n";
144}
145
146// Doesn't return success/failure as the file is optional; it's okay if we fail to update it.
147void updateTableNamesFile() {
148    std::string contents;
149
150    addTableName(RT_TABLE_LOCAL, ROUTE_TABLE_NAME_LOCAL, &contents);
151    addTableName(RT_TABLE_MAIN,  ROUTE_TABLE_NAME_MAIN,  &contents);
152
153    addTableName(ROUTE_TABLE_LOCAL_NETWORK,  ROUTE_TABLE_NAME_LOCAL_NETWORK,  &contents);
154    addTableName(ROUTE_TABLE_LEGACY_NETWORK, ROUTE_TABLE_NAME_LEGACY_NETWORK, &contents);
155    addTableName(ROUTE_TABLE_LEGACY_SYSTEM,  ROUTE_TABLE_NAME_LEGACY_SYSTEM,  &contents);
156
157    for (const auto& entry : interfaceToTable) {
158        addTableName(entry.second, entry.first, &contents);
159    }
160
161    int fd = open(RT_TABLES_PATH, RT_TABLES_FLAGS, RT_TABLES_MODE);
162    if (fd == -1) {
163        ALOGE("failed to create %s (%s)", RT_TABLES_PATH, strerror(errno));
164        return;
165    }
166    // File creation is affected by umask, so make sure the right mode bits are set.
167    if (fchmod(fd, RT_TABLES_MODE) == -1) {
168        ALOGE("failed to set mode 0%o on %s (%s)", RT_TABLES_MODE, RT_TABLES_PATH, strerror(errno));
169    }
170    ssize_t bytesWritten = write(fd, contents.data(), contents.size());
171    if (bytesWritten != static_cast<ssize_t>(contents.size())) {
172        ALOGE("failed to write to %s (%zd vs %zu bytes) (%s)", RT_TABLES_PATH, bytesWritten,
173              contents.size(), strerror(errno));
174    }
175    close(fd);
176}
177
178// Sends a netlink request and expects an ack.
179// |iov| is an array of struct iovec that contains the netlink message payload.
180// The netlink header is generated by this function based on |action| and |flags|.
181// Returns -errno if there was an error or if the kernel reported an error.
182WARN_UNUSED_RESULT int sendNetlinkRequest(uint16_t action, uint16_t flags, iovec* iov, int iovlen) {
183    nlmsghdr nlmsg = {
184        .nlmsg_type = action,
185        .nlmsg_flags = flags,
186    };
187    iov[0].iov_base = &nlmsg;
188    iov[0].iov_len = sizeof(nlmsg);
189    for (int i = 0; i < iovlen; ++i) {
190        nlmsg.nlmsg_len += iov[i].iov_len;
191    }
192
193    int ret;
194    struct {
195        nlmsghdr msg;
196        nlmsgerr err;
197    } response;
198
199    int sock = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE);
200    if (sock != -1 &&
201            connect(sock, reinterpret_cast<const sockaddr*>(&NETLINK_ADDRESS),
202                    sizeof(NETLINK_ADDRESS)) != -1 &&
203            writev(sock, iov, iovlen) != -1 &&
204            (ret = recv(sock, &response, sizeof(response), 0)) != -1) {
205        if (ret == sizeof(response)) {
206            ret = response.err.error;  // Netlink errors are negative errno.
207            if (ret) {
208                ALOGE("netlink response contains error (%s)", strerror(-ret));
209            }
210        } else {
211            ALOGE("bad netlink response message size (%d != %zu)", ret, sizeof(response));
212            ret = -EBADMSG;
213        }
214    } else {
215        ALOGE("netlink socket/connect/writev/recv failed (%s)", strerror(errno));
216        ret = -errno;
217    }
218
219    if (sock != -1) {
220        close(sock);
221    }
222
223    return ret;
224}
225
226// Returns 0 on success or negative errno on failure.
227int padInterfaceName(const char* input, char* name, size_t* length, uint16_t* padding) {
228    if (!input) {
229        *length = 0;
230        *padding = 0;
231        return 0;
232    }
233    *length = strlcpy(name, input, IFNAMSIZ) + 1;
234    if (*length > IFNAMSIZ) {
235        ALOGE("interface name too long (%zu > %u)", *length, IFNAMSIZ);
236        return -ENAMETOOLONG;
237    }
238    *padding = RTA_SPACE(*length) - RTA_LENGTH(*length);
239    return 0;
240}
241
242// Adds or removes a routing rule for IPv4 and IPv6.
243//
244// + If |table| is non-zero, the rule points at the specified routing table. Otherwise, the rule
245//   returns ENETUNREACH.
246// + If |mask| is non-zero, the rule matches the specified fwmark and mask. Otherwise, |fwmark| is
247//   ignored.
248// + If |iif| is non-NULL, the rule matches the specified incoming interface.
249// + If |oif| is non-NULL, the rule matches the specified outgoing interface.
250// + If |uidStart| and |uidEnd| are not INVALID_UID, the rule matches packets from UIDs in that
251//   range (inclusive). Otherwise, the rule matches packets from all UIDs.
252//
253// Returns 0 on success or negative errno on failure.
254WARN_UNUSED_RESULT int modifyIpRule(uint16_t action, uint32_t priority, uint32_t table,
255                                    uint32_t fwmark, uint32_t mask, const char* iif,
256                                    const char* oif, uid_t uidStart, uid_t uidEnd) {
257    // Ensure that if you set a bit in the fwmark, it's not being ignored by the mask.
258    if (fwmark & ~mask) {
259        ALOGE("mask 0x%x does not select all the bits set in fwmark 0x%x", mask, fwmark);
260        return -ERANGE;
261    }
262
263    // Interface names must include exactly one terminating NULL and be properly padded, or older
264    // kernels will refuse to delete rules.
265    char iifName[IFNAMSIZ], oifName[IFNAMSIZ];
266    size_t iifLength, oifLength;
267    uint16_t iifPadding, oifPadding;
268    if (int ret = padInterfaceName(iif, iifName, &iifLength, &iifPadding)) {
269        return ret;
270    }
271    if (int ret = padInterfaceName(oif, oifName, &oifLength, &oifPadding)) {
272        return ret;
273    }
274
275    // Either both start and end UID must be specified, or neither.
276    if ((uidStart == INVALID_UID) != (uidEnd == INVALID_UID)) {
277        ALOGE("incompatible start and end UIDs (%u vs %u)", uidStart, uidEnd);
278        return -EUSERS;
279    }
280    bool isUidRule = (uidStart != INVALID_UID);
281
282    // Assemble a rule request and put it in an array of iovec structures.
283    fib_rule_hdr rule = {
284        .action = static_cast<uint8_t>(table != RT_TABLE_UNSPEC ? FR_ACT_TO_TBL :
285                                                                  FR_ACT_UNREACHABLE),
286    };
287
288    rtattr fraIifName = { U16_RTA_LENGTH(iifLength), FRA_IIFNAME };
289    rtattr fraOifName = { U16_RTA_LENGTH(oifLength), FRA_OIFNAME };
290
291    iovec iov[] = {
292        { NULL,              0 },
293        { &rule,             sizeof(rule) },
294        { &FRATTR_PRIORITY,  sizeof(FRATTR_PRIORITY) },
295        { &priority,         sizeof(priority) },
296        { &FRATTR_TABLE,     table != RT_TABLE_UNSPEC ? sizeof(FRATTR_TABLE) : 0 },
297        { &table,            table != RT_TABLE_UNSPEC ? sizeof(table) : 0 },
298        { &FRATTR_FWMARK,    mask ? sizeof(FRATTR_FWMARK) : 0 },
299        { &fwmark,           mask ? sizeof(fwmark) : 0 },
300        { &FRATTR_FWMASK,    mask ? sizeof(FRATTR_FWMASK) : 0 },
301        { &mask,             mask ? sizeof(mask) : 0 },
302        { &FRATTR_UID_START, isUidRule ? sizeof(FRATTR_UID_START) : 0 },
303        { &uidStart,         isUidRule ? sizeof(uidStart) : 0 },
304        { &FRATTR_UID_END,   isUidRule ? sizeof(FRATTR_UID_END) : 0 },
305        { &uidEnd,           isUidRule ? sizeof(uidEnd) : 0 },
306        { &fraIifName,       iif != IIF_NONE ? sizeof(fraIifName) : 0 },
307        { iifName,           iifLength },
308        { PADDING_BUFFER,    iifPadding },
309        { &fraOifName,       oif != OIF_NONE ? sizeof(fraOifName) : 0 },
310        { oifName,           oifLength },
311        { PADDING_BUFFER,    oifPadding },
312    };
313
314    uint16_t flags = (action == RTM_NEWRULE) ? NETLINK_CREATE_REQUEST_FLAGS : NETLINK_REQUEST_FLAGS;
315    for (size_t i = 0; i < ARRAY_SIZE(AF_FAMILIES); ++i) {
316        rule.family = AF_FAMILIES[i];
317        if (int ret = sendNetlinkRequest(action, flags, iov, ARRAY_SIZE(iov))) {
318            return ret;
319        }
320    }
321
322    return 0;
323}
324
325WARN_UNUSED_RESULT int modifyIpRule(uint16_t action, uint32_t priority, uint32_t table,
326                                    uint32_t fwmark, uint32_t mask) {
327    return modifyIpRule(action, priority, table, fwmark, mask, IIF_NONE, OIF_NONE, INVALID_UID,
328                        INVALID_UID);
329}
330
331// Adds or deletes an IPv4 or IPv6 route.
332// Returns 0 on success or negative errno on failure.
333WARN_UNUSED_RESULT int modifyIpRoute(uint16_t action, uint32_t table, const char* interface,
334                                     const char* destination, const char* nexthop) {
335    // At least the destination must be non-null.
336    if (!destination) {
337        ALOGE("null destination");
338        return -EFAULT;
339    }
340
341    // Parse the prefix.
342    uint8_t rawAddress[sizeof(in6_addr)];
343    uint8_t family;
344    uint8_t prefixLength;
345    int rawLength = parsePrefix(destination, &family, rawAddress, sizeof(rawAddress),
346                                &prefixLength);
347    if (rawLength < 0) {
348        ALOGE("parsePrefix failed for destination %s (%s)", destination, strerror(-rawLength));
349        return rawLength;
350    }
351
352    if (static_cast<size_t>(rawLength) > sizeof(rawAddress)) {
353        ALOGE("impossible! address too long (%d vs %zu)", rawLength, sizeof(rawAddress));
354        return -ENOBUFS;  // Cannot happen; parsePrefix only supports IPv4 and IPv6.
355    }
356
357    uint8_t type = RTN_UNICAST;
358    uint32_t ifindex;
359    uint8_t rawNexthop[sizeof(in6_addr)];
360
361    if (nexthop && !strcmp(nexthop, "unreachable")) {
362        type = RTN_UNREACHABLE;
363        // 'interface' is likely non-NULL, as the caller (modifyRoute()) likely used it to lookup
364        // the table number. But it's an error to specify an interface ("dev ...") or a nexthop for
365        // unreachable routes, so nuke them. (IPv6 allows them to be specified; IPv4 doesn't.)
366        interface = OIF_NONE;
367        nexthop = NULL;
368    } else {
369        // If an interface was specified, find the ifindex.
370        if (interface != OIF_NONE) {
371            ifindex = if_nametoindex(interface);
372            if (!ifindex) {
373                ALOGE("cannot find interface %s", interface);
374                return -ENODEV;
375            }
376        }
377
378        // If a nexthop was specified, parse it as the same family as the prefix.
379        if (nexthop && inet_pton(family, nexthop, rawNexthop) <= 0) {
380            ALOGE("inet_pton failed for nexthop %s", nexthop);
381            return -EINVAL;
382        }
383    }
384
385    // Assemble a rtmsg and put it in an array of iovec structures.
386    rtmsg route = {
387        .rtm_protocol = RTPROT_STATIC,
388        .rtm_type = type,
389        .rtm_family = family,
390        .rtm_dst_len = prefixLength,
391    };
392
393    rtattr rtaDst     = { U16_RTA_LENGTH(rawLength), RTA_DST };
394    rtattr rtaGateway = { U16_RTA_LENGTH(rawLength), RTA_GATEWAY };
395
396    iovec iov[] = {
397        { NULL,          0 },
398        { &route,        sizeof(route) },
399        { &RTATTR_TABLE, sizeof(RTATTR_TABLE) },
400        { &table,        sizeof(table) },
401        { &rtaDst,       sizeof(rtaDst) },
402        { rawAddress,    static_cast<size_t>(rawLength) },
403        { &RTATTR_OIF,   interface != OIF_NONE ? sizeof(RTATTR_OIF) : 0 },
404        { &ifindex,      interface != OIF_NONE ? sizeof(ifindex) : 0 },
405        { &rtaGateway,   nexthop ? sizeof(rtaGateway) : 0 },
406        { rawNexthop,    nexthop ? static_cast<size_t>(rawLength) : 0 },
407    };
408
409    uint16_t flags = (action == RTM_NEWROUTE) ? NETLINK_CREATE_REQUEST_FLAGS :
410                                                NETLINK_REQUEST_FLAGS;
411    return sendNetlinkRequest(action, flags, iov, ARRAY_SIZE(iov));
412}
413
414// An iptables rule to mark incoming packets on a network with the netId of the network.
415//
416// This is so that the kernel can:
417// + Use the right fwmark for (and thus correctly route) replies (e.g.: TCP RST, ICMP errors, ping
418//   replies, SYN-ACKs, etc).
419// + Mark sockets that accept connections from this interface so that the connection stays on the
420//   same interface.
421WARN_UNUSED_RESULT int modifyIncomingPacketMark(unsigned netId, const char* interface,
422                                                Permission permission, bool add) {
423    Fwmark fwmark;
424
425    fwmark.netId = netId;
426    fwmark.explicitlySelected = true;
427    fwmark.protectedFromVpn = true;
428    fwmark.permission = permission;
429
430    char markString[UINT32_HEX_STRLEN];
431    snprintf(markString, sizeof(markString), "0x%x", fwmark.intValue);
432
433    if (execIptables(V4V6, "-t", "mangle", add ? "-A" : "-D", "INPUT", "-i", interface, "-j",
434                     "MARK", "--set-mark", markString, NULL)) {
435        ALOGE("failed to change iptables rule that sets incoming packet mark");
436        return -EREMOTEIO;
437    }
438
439    return 0;
440}
441
442// A rule to route responses to the local network forwarded via the VPN.
443//
444// When a VPN is in effect, packets from the local network to upstream networks are forwarded into
445// the VPN's tunnel interface. When the VPN forwards the responses, they emerge out of the tunnel.
446WARN_UNUSED_RESULT int modifyVpnOutputToLocalRule(const char* vpnInterface, bool add) {
447    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_VPN_OUTPUT_TO_LOCAL,
448                        ROUTE_TABLE_LOCAL_NETWORK, MARK_UNSET, MARK_UNSET, vpnInterface, OIF_NONE,
449                        INVALID_UID, INVALID_UID);
450}
451
452// A rule to route all traffic from a given set of UIDs to go over the VPN.
453//
454// Notice that this rule doesn't use the netId. I.e., no matter what netId the user's socket may
455// have, if they are subject to this VPN, their traffic has to go through it. Allows the traffic to
456// bypass the VPN if the protectedFromVpn bit is set.
457WARN_UNUSED_RESULT int modifyVpnUidRangeRule(uint32_t table, uid_t uidStart, uid_t uidEnd,
458                                             bool secure, bool add) {
459    Fwmark fwmark;
460    Fwmark mask;
461
462    fwmark.protectedFromVpn = false;
463    mask.protectedFromVpn = true;
464
465    uint32_t priority;
466
467    if (secure) {
468        priority = RULE_PRIORITY_SECURE_VPN;
469    } else {
470        priority = RULE_PRIORITY_BYPASSABLE_VPN;
471
472        fwmark.explicitlySelected = false;
473        mask.explicitlySelected = true;
474    }
475
476    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, priority, table, fwmark.intValue,
477                        mask.intValue, IIF_NONE, OIF_NONE, uidStart, uidEnd);
478}
479
480// A rule to allow system apps to send traffic over this VPN even if they are not part of the target
481// set of UIDs.
482//
483// This is needed for DnsProxyListener to correctly resolve a request for a user who is in the
484// target set, but where the DnsProxyListener itself is not.
485WARN_UNUSED_RESULT int modifyVpnSystemPermissionRule(unsigned netId, uint32_t table, bool secure,
486                                                     bool add) {
487    Fwmark fwmark;
488    Fwmark mask;
489
490    fwmark.netId = netId;
491    mask.netId = FWMARK_NET_ID_MASK;
492
493    fwmark.permission = PERMISSION_SYSTEM;
494    mask.permission = PERMISSION_SYSTEM;
495
496    uint32_t priority = secure ? RULE_PRIORITY_SECURE_VPN : RULE_PRIORITY_BYPASSABLE_VPN;
497
498    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, priority, table, fwmark.intValue,
499                        mask.intValue);
500}
501
502// A rule to route traffic based on an explicitly chosen network.
503//
504// Supports apps that use the multinetwork APIs to restrict their traffic to a network.
505//
506// Even though we check permissions at the time we set a netId into the fwmark of a socket, we need
507// to check it again in the rules here, because a network's permissions may have been updated via
508// modifyNetworkPermission().
509WARN_UNUSED_RESULT int modifyExplicitNetworkRule(unsigned netId, uint32_t table,
510                                                 Permission permission, uid_t uidStart,
511                                                 uid_t uidEnd, bool add) {
512    Fwmark fwmark;
513    Fwmark mask;
514
515    fwmark.netId = netId;
516    mask.netId = FWMARK_NET_ID_MASK;
517
518    fwmark.explicitlySelected = true;
519    mask.explicitlySelected = true;
520
521    fwmark.permission = permission;
522    mask.permission = permission;
523
524    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_EXPLICIT_NETWORK, table,
525                        fwmark.intValue, mask.intValue, IIF_NONE, OIF_NONE, uidStart, uidEnd);
526}
527
528// A rule to route traffic based on a chosen outgoing interface.
529//
530// Supports apps that use SO_BINDTODEVICE or IP_PKTINFO options and the kernel that already knows
531// the outgoing interface (typically for link-local communications).
532WARN_UNUSED_RESULT int modifyOutputInterfaceRule(const char* interface, uint32_t table,
533                                                 Permission permission, uid_t uidStart,
534                                                 uid_t uidEnd, bool add) {
535    Fwmark fwmark;
536    Fwmark mask;
537
538    fwmark.permission = permission;
539    mask.permission = permission;
540
541    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_OUTPUT_INTERFACE, table,
542                        fwmark.intValue, mask.intValue, IIF_NONE, interface, uidStart, uidEnd);
543}
544
545// A rule to route traffic based on the chosen network.
546//
547// This is for sockets that have not explicitly requested a particular network, but have been
548// bound to one when they called connect(). This ensures that sockets connected on a particular
549// network stay on that network even if the default network changes.
550WARN_UNUSED_RESULT int modifyImplicitNetworkRule(unsigned netId, uint32_t table,
551                                                 Permission permission, bool add) {
552    Fwmark fwmark;
553    Fwmark mask;
554
555    fwmark.netId = netId;
556    mask.netId = FWMARK_NET_ID_MASK;
557
558    fwmark.explicitlySelected = false;
559    mask.explicitlySelected = true;
560
561    fwmark.permission = permission;
562    mask.permission = permission;
563
564    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_IMPLICIT_NETWORK, table,
565                        fwmark.intValue, mask.intValue);
566}
567
568// Add rules to allow legacy routes added through the requestRouteToHost() API.
569WARN_UNUSED_RESULT int addLegacyRouteRules() {
570    Fwmark fwmark;
571    Fwmark mask;
572
573    fwmark.explicitlySelected = false;
574    mask.explicitlySelected = true;
575
576    // Rules to allow legacy routes to override the default network.
577    if (int ret = modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_LEGACY_SYSTEM, ROUTE_TABLE_LEGACY_SYSTEM,
578                               fwmark.intValue, mask.intValue)) {
579        return ret;
580    }
581    if (int ret = modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_LEGACY_NETWORK,
582                               ROUTE_TABLE_LEGACY_NETWORK, fwmark.intValue, mask.intValue)) {
583        return ret;
584    }
585
586    fwmark.permission = PERMISSION_SYSTEM;
587    mask.permission = PERMISSION_SYSTEM;
588
589    // A rule to allow legacy routes from system apps to override VPNs.
590    return modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_VPN_OVERRIDE_SYSTEM, ROUTE_TABLE_LEGACY_SYSTEM,
591                        fwmark.intValue, mask.intValue);
592}
593
594// Add rules to lookup the local network when specified explicitly or otherwise.
595WARN_UNUSED_RESULT int addLocalNetworkRules(unsigned localNetId) {
596    if (int ret = modifyExplicitNetworkRule(localNetId, ROUTE_TABLE_LOCAL_NETWORK, PERMISSION_NONE,
597                                            INVALID_UID, INVALID_UID, ACTION_ADD)) {
598        return ret;
599    }
600
601    Fwmark fwmark;
602    Fwmark mask;
603
604    fwmark.explicitlySelected = false;
605    mask.explicitlySelected = true;
606
607    return modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_LOCAL_NETWORK, ROUTE_TABLE_LOCAL_NETWORK,
608                        fwmark.intValue, mask.intValue);
609}
610
611// Add a new rule to look up the 'main' table, with the same selectors as the "default network"
612// rule, but with a lower priority. Since the default network rule points to a table with a default
613// route, the rule we're adding will never be used for normal routing lookups. However, the kernel
614// may fall-through to it to find directly-connected routes when it validates that a nexthop (in a
615// route being added) is reachable.
616WARN_UNUSED_RESULT int addDirectlyConnectedRule() {
617    Fwmark fwmark;
618    Fwmark mask;
619
620    fwmark.netId = NETID_UNSET;
621    mask.netId = FWMARK_NET_ID_MASK;
622
623    return modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_DIRECTLY_CONNECTED, RT_TABLE_MAIN,
624                        fwmark.intValue, mask.intValue, IIF_NONE, OIF_NONE, UID_ROOT, UID_ROOT);
625}
626
627// Add a rule to preempt the pre-defined "from all lookup main" rule. Packets that reach this rule
628// will be null-routed, and won't fall-through to the main table.
629WARN_UNUSED_RESULT int addUnreachableRule() {
630    return modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_UNREACHABLE, RT_TABLE_UNSPEC, MARK_UNSET,
631                        MARK_UNSET);
632}
633
634WARN_UNUSED_RESULT int modifyLocalNetwork(unsigned netId, const char* interface, bool add) {
635    if (int ret = modifyIncomingPacketMark(netId, interface, PERMISSION_NONE, add)) {
636        return ret;
637    }
638    return modifyOutputInterfaceRule(interface, ROUTE_TABLE_LOCAL_NETWORK, PERMISSION_NONE,
639                                     INVALID_UID, INVALID_UID, add);
640}
641
642WARN_UNUSED_RESULT int modifyPhysicalNetwork(unsigned netId, const char* interface,
643                                             Permission permission, bool add) {
644    uint32_t table = getRouteTableForInterface(interface);
645    if (table == RT_TABLE_UNSPEC) {
646        return -ESRCH;
647    }
648
649    if (int ret = modifyIncomingPacketMark(netId, interface, permission, add)) {
650        return ret;
651    }
652    if (int ret = modifyExplicitNetworkRule(netId, table, permission, INVALID_UID, INVALID_UID,
653                                            add)) {
654        return ret;
655    }
656    if (int ret = modifyOutputInterfaceRule(interface, table, permission, INVALID_UID, INVALID_UID,
657                                            add)) {
658        return ret;
659    }
660    return modifyImplicitNetworkRule(netId, table, permission, add);
661}
662
663WARN_UNUSED_RESULT int modifyVirtualNetwork(unsigned netId, const char* interface,
664                                            const UidRanges& uidRanges, bool secure, bool add,
665                                            bool modifyNonUidBasedRules) {
666    uint32_t table = getRouteTableForInterface(interface);
667    if (table == RT_TABLE_UNSPEC) {
668        return -ESRCH;
669    }
670
671    for (const UidRanges::Range& range : uidRanges.getRanges()) {
672        if (int ret = modifyVpnUidRangeRule(table, range.first, range.second, secure, add)) {
673            return ret;
674        }
675        if (int ret = modifyExplicitNetworkRule(netId, table, PERMISSION_NONE, range.first,
676                                                range.second, add)) {
677            return ret;
678        }
679        if (int ret = modifyOutputInterfaceRule(interface, table, PERMISSION_NONE, range.first,
680                                                range.second, add)) {
681            return ret;
682        }
683    }
684
685    if (modifyNonUidBasedRules) {
686        if (int ret = modifyIncomingPacketMark(netId, interface, PERMISSION_NONE, add)) {
687            return ret;
688        }
689        if (int ret = modifyVpnOutputToLocalRule(interface, add)) {
690            return ret;
691        }
692        if (int ret = modifyVpnSystemPermissionRule(netId, table, secure, add)) {
693            return ret;
694        }
695        return modifyExplicitNetworkRule(netId, table, PERMISSION_NONE, UID_ROOT, UID_ROOT, add);
696    }
697
698    return 0;
699}
700
701WARN_UNUSED_RESULT int modifyDefaultNetwork(uint16_t action, const char* interface,
702                                            Permission permission) {
703    uint32_t table = getRouteTableForInterface(interface);
704    if (table == RT_TABLE_UNSPEC) {
705        return -ESRCH;
706    }
707
708    Fwmark fwmark;
709    Fwmark mask;
710
711    fwmark.netId = NETID_UNSET;
712    mask.netId = FWMARK_NET_ID_MASK;
713
714    fwmark.permission = permission;
715    mask.permission = permission;
716
717    return modifyIpRule(action, RULE_PRIORITY_DEFAULT_NETWORK, table, fwmark.intValue,
718                        mask.intValue);
719}
720
721WARN_UNUSED_RESULT int modifyTetheredNetwork(uint16_t action, const char* inputInterface,
722                                             const char* outputInterface) {
723    uint32_t table = getRouteTableForInterface(outputInterface);
724    if (table == RT_TABLE_UNSPEC) {
725        return -ESRCH;
726    }
727
728    return modifyIpRule(action, RULE_PRIORITY_TETHERING, table, MARK_UNSET, MARK_UNSET,
729                        inputInterface, OIF_NONE, INVALID_UID, INVALID_UID);
730}
731
732// Returns 0 on success or negative errno on failure.
733WARN_UNUSED_RESULT int flushRules() {
734    for (size_t i = 0; i < ARRAY_SIZE(IP_VERSIONS); ++i) {
735        const char* argv[] = {
736            IP_PATH,
737            IP_VERSIONS[i],
738            "rule",
739            "flush",
740        };
741        if (android_fork_execvp(ARRAY_SIZE(argv), const_cast<char**>(argv), NULL, false, false)) {
742            ALOGE("failed to flush rules");
743            return -EREMOTEIO;
744        }
745    }
746    return 0;
747}
748
749// Adds or removes an IPv4 or IPv6 route to the specified table and, if it's a directly-connected
750// route, to the main table as well.
751// Returns 0 on success or negative errno on failure.
752WARN_UNUSED_RESULT int modifyRoute(uint16_t action, const char* interface, const char* destination,
753                                   const char* nexthop, RouteController::TableType tableType) {
754    uint32_t table;
755    switch (tableType) {
756        case RouteController::INTERFACE: {
757            table = getRouteTableForInterface(interface);
758            if (table == RT_TABLE_UNSPEC) {
759                return -ESRCH;
760            }
761            break;
762        }
763        case RouteController::LOCAL_NETWORK: {
764            table = ROUTE_TABLE_LOCAL_NETWORK;
765            break;
766        }
767        case RouteController::LEGACY_NETWORK: {
768            table = ROUTE_TABLE_LEGACY_NETWORK;
769            break;
770        }
771        case RouteController::LEGACY_SYSTEM: {
772            table = ROUTE_TABLE_LEGACY_SYSTEM;
773            break;
774        }
775    }
776
777    int ret = modifyIpRoute(action, table, interface, destination, nexthop);
778    // We allow apps to call requestRouteToHost() multiple times with the same route, so ignore
779    // EEXIST failures when adding routes to legacy tables.
780    if (ret && !(action == RTM_NEWROUTE && ret == -EEXIST &&
781                 (tableType == RouteController::LEGACY_NETWORK ||
782                  tableType == RouteController::LEGACY_SYSTEM))) {
783        return ret;
784    }
785
786    // If there's no nexthop, this is a directly connected route. Add it to the main table also, to
787    // let the kernel find it when validating nexthops when global routes are added.
788    if (!nexthop) {
789        ret = modifyIpRoute(action, RT_TABLE_MAIN, interface, destination, NULL);
790        // A failure with action == ADD && errno == EEXIST means that the route already exists in
791        // the main table, perhaps because the kernel added it automatically as part of adding the
792        // IP address to the interface. Ignore this, but complain about everything else.
793        if (ret && !(action == RTM_NEWROUTE && ret == -EEXIST)) {
794            return ret;
795        }
796    }
797
798    return 0;
799}
800
801// Returns 0 on success or negative errno on failure.
802WARN_UNUSED_RESULT int flushRoutes(const char* interface) {
803    uint32_t table = getRouteTableForInterface(interface);
804    if (table == RT_TABLE_UNSPEC) {
805        return -ESRCH;
806    }
807
808    char tableString[UINT32_STRLEN];
809    snprintf(tableString, sizeof(tableString), "%u", table);
810
811    for (size_t i = 0; i < ARRAY_SIZE(IP_VERSIONS); ++i) {
812        const char* argv[] = {
813            IP_PATH,
814            IP_VERSIONS[i],
815            "route",
816            "flush",
817            "table",
818            tableString,
819        };
820        if (android_fork_execvp(ARRAY_SIZE(argv), const_cast<char**>(argv), NULL, false, false)) {
821            ALOGE("failed to flush routes");
822            return -EREMOTEIO;
823        }
824    }
825
826    interfaceToTable.erase(interface);
827    return 0;
828}
829
830}  // namespace
831
832int RouteController::Init(unsigned localNetId) {
833    if (int ret = flushRules()) {
834        return ret;
835    }
836    if (int ret = addLegacyRouteRules()) {
837        return ret;
838    }
839    if (int ret = addLocalNetworkRules(localNetId)) {
840        return ret;
841    }
842    if (int ret = addDirectlyConnectedRule()) {
843        return ret;
844    }
845    if (int ret = addUnreachableRule()) {
846        return ret;
847    }
848    updateTableNamesFile();
849    return 0;
850}
851
852int RouteController::addInterfaceToLocalNetwork(unsigned netId, const char* interface) {
853    return modifyLocalNetwork(netId, interface, ACTION_ADD);
854}
855
856int RouteController::removeInterfaceFromLocalNetwork(unsigned netId, const char* interface) {
857    return modifyLocalNetwork(netId, interface, ACTION_DEL);
858}
859
860int RouteController::addInterfaceToPhysicalNetwork(unsigned netId, const char* interface,
861                                                   Permission permission) {
862    if (int ret = modifyPhysicalNetwork(netId, interface, permission, ACTION_ADD)) {
863        return ret;
864    }
865    updateTableNamesFile();
866    return 0;
867}
868
869int RouteController::removeInterfaceFromPhysicalNetwork(unsigned netId, const char* interface,
870                                                        Permission permission) {
871    if (int ret = modifyPhysicalNetwork(netId, interface, permission, ACTION_DEL)) {
872        return ret;
873    }
874    if (int ret = flushRoutes(interface)) {
875        return ret;
876    }
877    updateTableNamesFile();
878    return 0;
879}
880
881int RouteController::addInterfaceToVirtualNetwork(unsigned netId, const char* interface,
882                                                  bool secure, const UidRanges& uidRanges) {
883    if (int ret = modifyVirtualNetwork(netId, interface, uidRanges, secure, ACTION_ADD,
884                                       MODIFY_NON_UID_BASED_RULES)) {
885        return ret;
886    }
887    updateTableNamesFile();
888    return 0;
889}
890
891int RouteController::removeInterfaceFromVirtualNetwork(unsigned netId, const char* interface,
892                                                       bool secure, const UidRanges& uidRanges) {
893    if (int ret = modifyVirtualNetwork(netId, interface, uidRanges, secure, ACTION_DEL,
894                                       MODIFY_NON_UID_BASED_RULES)) {
895        return ret;
896    }
897    if (int ret = flushRoutes(interface)) {
898        return ret;
899    }
900    updateTableNamesFile();
901    return 0;
902}
903
904int RouteController::modifyPhysicalNetworkPermission(unsigned netId, const char* interface,
905                                                     Permission oldPermission,
906                                                     Permission newPermission) {
907    // Add the new rules before deleting the old ones, to avoid race conditions.
908    if (int ret = modifyPhysicalNetwork(netId, interface, newPermission, ACTION_ADD)) {
909        return ret;
910    }
911    return modifyPhysicalNetwork(netId, interface, oldPermission, ACTION_DEL);
912}
913
914int RouteController::addUsersToVirtualNetwork(unsigned netId, const char* interface, bool secure,
915                                              const UidRanges& uidRanges) {
916    return modifyVirtualNetwork(netId, interface, uidRanges, secure, ACTION_ADD,
917                                !MODIFY_NON_UID_BASED_RULES);
918}
919
920int RouteController::removeUsersFromVirtualNetwork(unsigned netId, const char* interface,
921                                                   bool secure, const UidRanges& uidRanges) {
922    return modifyVirtualNetwork(netId, interface, uidRanges, secure, ACTION_DEL,
923                                !MODIFY_NON_UID_BASED_RULES);
924}
925
926int RouteController::addInterfaceToDefaultNetwork(const char* interface, Permission permission) {
927    return modifyDefaultNetwork(RTM_NEWRULE, interface, permission);
928}
929
930int RouteController::removeInterfaceFromDefaultNetwork(const char* interface,
931                                                       Permission permission) {
932    return modifyDefaultNetwork(RTM_DELRULE, interface, permission);
933}
934
935int RouteController::addRoute(const char* interface, const char* destination, const char* nexthop,
936                              TableType tableType) {
937    return modifyRoute(RTM_NEWROUTE, interface, destination, nexthop, tableType);
938}
939
940int RouteController::removeRoute(const char* interface, const char* destination,
941                                 const char* nexthop, TableType tableType) {
942    return modifyRoute(RTM_DELROUTE, interface, destination, nexthop, tableType);
943}
944
945int RouteController::enableTethering(const char* inputInterface, const char* outputInterface) {
946    return modifyTetheredNetwork(RTM_NEWRULE, inputInterface, outputInterface);
947}
948
949int RouteController::disableTethering(const char* inputInterface, const char* outputInterface) {
950    return modifyTetheredNetwork(RTM_DELRULE, inputInterface, outputInterface);
951}
952