RouteController.cpp revision 4acd34a8e95b1191318216ebad409ec5e1b3d5f0
1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "RouteController.h"
18
19#include "Fwmark.h"
20#include "UidRanges.h"
21
22#define LOG_TAG "Netd"
23#include "log/log.h"
24#include "logwrap/logwrap.h"
25#include "resolv_netid.h"
26
27#include <arpa/inet.h>
28#include <fcntl.h>
29#include <linux/fib_rules.h>
30#include <map>
31#include <net/if.h>
32#include <sys/stat.h>
33
34namespace {
35
36// BEGIN CONSTANTS --------------------------------------------------------------------------------
37
38const uint32_t RULE_PRIORITY_VPN_OVERRIDE_SYSTEM = 10000;
39// const uint32_t RULE_PRIORITY_VPN_OVERRIDE_LOCAL  = 11000;
40const uint32_t RULE_PRIORITY_SECURE_VPN          = 12000;
41const uint32_t RULE_PRIORITY_EXPLICIT_NETWORK    = 13000;
42const uint32_t RULE_PRIORITY_OUTPUT_INTERFACE    = 14000;
43const uint32_t RULE_PRIORITY_LEGACY_SYSTEM       = 15000;
44const uint32_t RULE_PRIORITY_LEGACY_NETWORK      = 16000;
45// const uint32_t RULE_PRIORITY_LOCAL_NETWORK       = 17000;
46// const uint32_t RULE_PRIORITY_TETHERING           = 18000;
47const uint32_t RULE_PRIORITY_IMPLICIT_NETWORK    = 19000;
48// const uint32_t RULE_PRIORITY_BYPASSABLE_VPN      = 20000;
49// const uint32_t RULE_PRIORITY_VPN_FALLTHROUGH     = 21000;
50const uint32_t RULE_PRIORITY_DEFAULT_NETWORK     = 22000;
51const uint32_t RULE_PRIORITY_DIRECTLY_CONNECTED  = 23000;
52const uint32_t RULE_PRIORITY_UNREACHABLE         = 24000;
53
54const uint32_t ROUTE_TABLE_LEGACY_NETWORK = 98;
55const uint32_t ROUTE_TABLE_LEGACY_SYSTEM  = 99;
56
57const char* const ROUTE_TABLE_NAME_LEGACY_NETWORK = "legacy_network";
58const char* const ROUTE_TABLE_NAME_LEGACY_SYSTEM  = "legacy_system";
59
60// TODO: These values aren't defined by the Linux kernel, because our UID routing changes are not
61// upstream (yet?), so we can't just pick them up from kernel headers. When (if?) the changes make
62// it upstream, we'll remove this and rely on the kernel header values. For now, add a static assert
63// that will warn us if upstream has given these values some other meaning.
64const uint16_t FRA_UID_START = 18;
65const uint16_t FRA_UID_END   = 19;
66static_assert(FRA_UID_START > FRA_MAX,
67             "Android-specific FRA_UID_{START,END} values also assigned in Linux uapi. "
68             "Check that these values match what the kernel does and then update this assertion.");
69
70const uint16_t NETLINK_REQUEST_FLAGS = NLM_F_REQUEST | NLM_F_ACK;
71const uint16_t NETLINK_CREATE_REQUEST_FLAGS = NETLINK_REQUEST_FLAGS | NLM_F_CREATE | NLM_F_EXCL;
72
73const sockaddr_nl NETLINK_ADDRESS = {AF_NETLINK, 0, 0, 0};
74
75const uint8_t AF_FAMILIES[] = {AF_INET, AF_INET6};
76
77const char* const IP_VERSIONS[] = {"-4", "-6"};
78
79const uid_t UID_ROOT = 0;
80const char* const OIF_NONE = NULL;
81const bool ACTION_ADD = true;
82const bool ACTION_DEL = false;
83const bool MODIFY_NON_UID_BASED_RULES = true;
84
85const char* const RT_TABLES_PATH = "/data/misc/net/rt_tables";
86
87// Avoids "non-constant-expression cannot be narrowed from type 'unsigned int' to 'unsigned short'"
88// warnings when using RTA_LENGTH(x) inside static initializers (even when x is already uint16_t).
89constexpr uint16_t U16_RTA_LENGTH(uint16_t x) {
90    return RTA_LENGTH(x);
91}
92
93// These are practically const, but can't be declared so, because they are used to initialize
94// non-const pointers ("void* iov_base") in iovec arrays.
95rtattr FRATTR_PRIORITY  = { U16_RTA_LENGTH(sizeof(uint32_t)), FRA_PRIORITY };
96rtattr FRATTR_TABLE     = { U16_RTA_LENGTH(sizeof(uint32_t)), FRA_TABLE };
97rtattr FRATTR_FWMARK    = { U16_RTA_LENGTH(sizeof(uint32_t)), FRA_FWMARK };
98rtattr FRATTR_FWMASK    = { U16_RTA_LENGTH(sizeof(uint32_t)), FRA_FWMASK };
99rtattr FRATTR_UID_START = { U16_RTA_LENGTH(sizeof(uid_t)),    FRA_UID_START };
100rtattr FRATTR_UID_END   = { U16_RTA_LENGTH(sizeof(uid_t)),    FRA_UID_END };
101
102rtattr RTATTR_TABLE     = { U16_RTA_LENGTH(sizeof(uint32_t)), RTA_TABLE };
103rtattr RTATTR_OIF       = { U16_RTA_LENGTH(sizeof(uint32_t)), RTA_OIF };
104
105uint8_t PADDING_BUFFER[RTA_ALIGNTO] = {0, 0, 0, 0};
106
107// END CONSTANTS ----------------------------------------------------------------------------------
108
109// No locks needed because RouteController is accessed only from one thread (in CommandListener).
110std::map<std::string, uint32_t> interfaceToTable;
111
112uint32_t getRouteTableForInterface(const char* interface) {
113    uint32_t index = if_nametoindex(interface);
114    if (index) {
115        index += RouteController::ROUTE_TABLE_OFFSET_FROM_INDEX;
116        interfaceToTable[interface] = index;
117        return index;
118    }
119    // If the interface goes away if_nametoindex() will return 0 but we still need to know
120    // the index so we can remove the rules and routes.
121    auto iter = interfaceToTable.find(interface);
122    if (iter == interfaceToTable.end()) {
123        ALOGE("cannot find interface %s", interface);
124        return RT_TABLE_UNSPEC;
125    }
126    return iter->second;
127}
128
129void addTableName(uint32_t table, const std::string& name, std::string* contents) {
130    char tableString[UINT32_STRLEN];
131    snprintf(tableString, sizeof(tableString), "%u", table);
132    *contents += tableString;
133    *contents += " ";
134    *contents += name;
135    *contents += "\n";
136}
137
138// Doesn't return success/failure as the file is optional; it's okay if we fail to update it.
139void updateTableNamesFile() {
140    std::string contents;
141    addTableName(ROUTE_TABLE_LEGACY_NETWORK, ROUTE_TABLE_NAME_LEGACY_NETWORK, &contents);
142    addTableName(ROUTE_TABLE_LEGACY_SYSTEM,  ROUTE_TABLE_NAME_LEGACY_SYSTEM,  &contents);
143    for (const auto& entry : interfaceToTable) {
144        addTableName(entry.second, entry.first, &contents);
145    }
146
147    mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;  // mode 0644, rw-r--r--
148    int fd = open(RT_TABLES_PATH, O_CREAT | O_TRUNC | O_WRONLY | O_NOFOLLOW | O_CLOEXEC, mode);
149    if (fd == -1) {
150        ALOGE("failed to create %s (%s)", RT_TABLES_PATH, strerror(errno));
151        return;
152    }
153    // File creation is affected by umask, so make sure the right mode bits are set.
154    if (fchmod(fd, mode) == -1) {
155        ALOGE("failed to chmod %s to mode 0%o (%s)", RT_TABLES_PATH, mode, strerror(errno));
156    }
157    ssize_t bytesWritten = write(fd, contents.data(), contents.size());
158    if (bytesWritten != static_cast<ssize_t>(contents.size())) {
159        ALOGE("failed to write to %s (%zd vs %zu bytes) (%s)", RT_TABLES_PATH, bytesWritten,
160              contents.size(), strerror(errno));
161    }
162    close(fd);
163}
164
165// Sends a netlink request and expects an ack.
166// |iov| is an array of struct iovec that contains the netlink message payload.
167// The netlink header is generated by this function based on |action| and |flags|.
168// Returns -errno if there was an error or if the kernel reported an error.
169WARN_UNUSED_RESULT int sendNetlinkRequest(uint16_t action, uint16_t flags, iovec* iov, int iovlen) {
170    nlmsghdr nlmsg = {
171        .nlmsg_type = action,
172        .nlmsg_flags = flags,
173    };
174    iov[0].iov_base = &nlmsg;
175    iov[0].iov_len = sizeof(nlmsg);
176    for (int i = 0; i < iovlen; ++i) {
177        nlmsg.nlmsg_len += iov[i].iov_len;
178    }
179
180    int ret;
181    struct {
182        nlmsghdr msg;
183        nlmsgerr err;
184    } response;
185
186    int sock = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE);
187    if (sock != -1 &&
188            connect(sock, reinterpret_cast<const sockaddr*>(&NETLINK_ADDRESS),
189                    sizeof(NETLINK_ADDRESS)) != -1 &&
190            writev(sock, iov, iovlen) != -1 &&
191            (ret = recv(sock, &response, sizeof(response), 0)) != -1) {
192        if (ret == sizeof(response)) {
193            ret = response.err.error;  // Netlink errors are negative errno.
194            if (ret) {
195                ALOGE("netlink response contains error (%s)", strerror(-ret));
196            }
197        } else {
198            ALOGE("bad netlink response message size (%d != %zu)", ret, sizeof(response));
199            ret = -EBADMSG;
200        }
201    } else {
202        ALOGE("netlink socket/connect/writev/recv failed (%s)", strerror(errno));
203        ret = -errno;
204    }
205
206    if (sock != -1) {
207        close(sock);
208    }
209
210    return ret;
211}
212
213// Adds or removes a routing rule for IPv4 and IPv6.
214//
215// + If |table| is non-zero, the rule points at the specified routing table. Otherwise, the rule
216//   returns ENETUNREACH.
217// + If |mask| is non-zero, the rule matches the specified fwmark and mask. Otherwise, |fwmark| is
218//   ignored.
219// + If |interface| is non-NULL, the rule matches the specified outgoing interface.
220//
221// Returns 0 on success or negative errno on failure.
222WARN_UNUSED_RESULT int modifyIpRule(uint16_t action, uint32_t priority, uint32_t table,
223                                    uint32_t fwmark, uint32_t mask, const char* interface,
224                                    uid_t uidStart, uid_t uidEnd) {
225    // Ensure that if you set a bit in the fwmark, it's not being ignored by the mask.
226    if (fwmark & ~mask) {
227        ALOGE("mask 0x%x does not select all the bits set in fwmark 0x%x", mask, fwmark);
228        return -ERANGE;
229    }
230
231    // The interface name must include exactly one terminating NULL and be properly padded, or older
232    // kernels will refuse to delete rules.
233    uint16_t paddingLength = 0;
234    size_t interfaceLength = 0;
235    char oifname[IFNAMSIZ];
236    if (interface != OIF_NONE) {
237        interfaceLength = strlcpy(oifname, interface, IFNAMSIZ) + 1;
238        if (interfaceLength > IFNAMSIZ) {
239            ALOGE("interface name too long (%zu > %u)", interfaceLength, IFNAMSIZ);
240            return -ENAMETOOLONG;
241        }
242        paddingLength = RTA_SPACE(interfaceLength) - RTA_LENGTH(interfaceLength);
243    }
244
245    // Either both start and end UID must be specified, or neither.
246    if ((uidStart == INVALID_UID) != (uidEnd == INVALID_UID)) {
247        ALOGE("incompatible start and end UIDs (%u vs %u)", uidStart, uidEnd);
248        return -EUSERS;
249    }
250    bool isUidRule = (uidStart != INVALID_UID);
251
252    // Assemble a rule request and put it in an array of iovec structures.
253    fib_rule_hdr rule = {
254        .action = static_cast<uint8_t>(table != RT_TABLE_UNSPEC ? FR_ACT_TO_TBL :
255                                                                  FR_ACT_UNREACHABLE),
256    };
257
258    rtattr fraOifname = { U16_RTA_LENGTH(interfaceLength), FRA_OIFNAME };
259
260    iovec iov[] = {
261        { NULL,              0 },
262        { &rule,             sizeof(rule) },
263        { &FRATTR_PRIORITY,  sizeof(FRATTR_PRIORITY) },
264        { &priority,         sizeof(priority) },
265        { &FRATTR_TABLE,     table != RT_TABLE_UNSPEC ? sizeof(FRATTR_TABLE) : 0 },
266        { &table,            table != RT_TABLE_UNSPEC ? sizeof(table) : 0 },
267        { &FRATTR_FWMARK,    mask ? sizeof(FRATTR_FWMARK) : 0 },
268        { &fwmark,           mask ? sizeof(fwmark) : 0 },
269        { &FRATTR_FWMASK,    mask ? sizeof(FRATTR_FWMASK) : 0 },
270        { &mask,             mask ? sizeof(mask) : 0 },
271        { &FRATTR_UID_START, isUidRule ? sizeof(FRATTR_UID_START) : 0 },
272        { &uidStart,         isUidRule ? sizeof(uidStart) : 0 },
273        { &FRATTR_UID_END,   isUidRule ? sizeof(FRATTR_UID_END) : 0 },
274        { &uidEnd,           isUidRule ? sizeof(uidEnd) : 0 },
275        { &fraOifname,       interface != OIF_NONE ? sizeof(fraOifname) : 0 },
276        { oifname,           interfaceLength },
277        { PADDING_BUFFER,    paddingLength },
278    };
279
280    uint16_t flags = (action == RTM_NEWRULE) ? NETLINK_CREATE_REQUEST_FLAGS : NETLINK_REQUEST_FLAGS;
281    for (size_t i = 0; i < ARRAY_SIZE(AF_FAMILIES); ++i) {
282        rule.family = AF_FAMILIES[i];
283        if (int ret = sendNetlinkRequest(action, flags, iov, ARRAY_SIZE(iov))) {
284            return ret;
285        }
286    }
287
288    return 0;
289}
290
291// Adds or deletes an IPv4 or IPv6 route.
292// Returns 0 on success or negative errno on failure.
293WARN_UNUSED_RESULT int modifyIpRoute(uint16_t action, uint32_t table, const char* interface,
294                                     const char* destination, const char* nexthop) {
295    // At least the destination must be non-null.
296    if (!destination) {
297        ALOGE("null destination");
298        return -EFAULT;
299    }
300
301    // Parse the prefix.
302    uint8_t rawAddress[sizeof(in6_addr)];
303    uint8_t family;
304    uint8_t prefixLength;
305    int rawLength = parsePrefix(destination, &family, rawAddress, sizeof(rawAddress),
306                                &prefixLength);
307    if (rawLength < 0) {
308        ALOGE("parsePrefix failed for destination %s (%s)", destination, strerror(-rawLength));
309        return rawLength;
310    }
311
312    if (static_cast<size_t>(rawLength) > sizeof(rawAddress)) {
313        ALOGE("impossible! address too long (%d vs %zu)", rawLength, sizeof(rawAddress));
314        return -ENOBUFS;  // Cannot happen; parsePrefix only supports IPv4 and IPv6.
315    }
316
317    // If an interface was specified, find the ifindex.
318    uint32_t ifindex;
319    if (interface != OIF_NONE) {
320        ifindex = if_nametoindex(interface);
321        if (!ifindex) {
322            ALOGE("cannot find interface %s", interface);
323            return -ENODEV;
324        }
325    }
326
327    // If a nexthop was specified, parse it as the same family as the prefix.
328    uint8_t rawNexthop[sizeof(in6_addr)];
329    if (nexthop && inet_pton(family, nexthop, rawNexthop) <= 0) {
330        ALOGE("inet_pton failed for nexthop %s", nexthop);
331        return -EINVAL;
332    }
333
334    // Assemble a rtmsg and put it in an array of iovec structures.
335    rtmsg route = {
336        .rtm_protocol = RTPROT_STATIC,
337        .rtm_type = RTN_UNICAST,
338        .rtm_family = family,
339        .rtm_dst_len = prefixLength,
340    };
341
342    rtattr rtaDst     = { U16_RTA_LENGTH(rawLength), RTA_DST };
343    rtattr rtaGateway = { U16_RTA_LENGTH(rawLength), RTA_GATEWAY };
344
345    iovec iov[] = {
346        { NULL,          0 },
347        { &route,        sizeof(route) },
348        { &RTATTR_TABLE, sizeof(RTATTR_TABLE) },
349        { &table,        sizeof(table) },
350        { &rtaDst,       sizeof(rtaDst) },
351        { rawAddress,    static_cast<size_t>(rawLength) },
352        { &RTATTR_OIF,   interface != OIF_NONE ? sizeof(RTATTR_OIF) : 0 },
353        { &ifindex,      interface != OIF_NONE ? sizeof(ifindex) : 0 },
354        { &rtaGateway,   nexthop ? sizeof(rtaGateway) : 0 },
355        { rawNexthop,    nexthop ? static_cast<size_t>(rawLength) : 0 },
356    };
357
358    uint16_t flags = (action == RTM_NEWROUTE) ? NETLINK_CREATE_REQUEST_FLAGS :
359                                                NETLINK_REQUEST_FLAGS;
360    return sendNetlinkRequest(action, flags, iov, ARRAY_SIZE(iov));
361}
362
363// Add rules to allow legacy routes added through the requestRouteToHost() API.
364WARN_UNUSED_RESULT int AddLegacyRouteRules() {
365    Fwmark fwmark;
366    Fwmark mask;
367
368    fwmark.explicitlySelected = false;
369    mask.explicitlySelected = true;
370
371    // Rules to allow legacy routes to override the default network.
372    if (int ret = modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_LEGACY_SYSTEM, ROUTE_TABLE_LEGACY_SYSTEM,
373                               fwmark.intValue, mask.intValue, OIF_NONE, INVALID_UID,
374                               INVALID_UID)) {
375        return ret;
376    }
377    if (int ret = modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_LEGACY_NETWORK,
378                               ROUTE_TABLE_LEGACY_NETWORK, fwmark.intValue,
379                               mask.intValue, OIF_NONE, INVALID_UID, INVALID_UID)) {
380        return ret;
381    }
382
383    fwmark.permission = PERMISSION_SYSTEM;
384    mask.permission = PERMISSION_SYSTEM;
385
386    // A rule to allow legacy routes from system apps to override VPNs.
387    return modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_VPN_OVERRIDE_SYSTEM, ROUTE_TABLE_LEGACY_SYSTEM,
388                        fwmark.intValue, mask.intValue, OIF_NONE, INVALID_UID, INVALID_UID);
389}
390
391// Add a new rule to look up the 'main' table, with the same selectors as the "default network"
392// rule, but with a lower priority. Since the default network rule points to a table with a default
393// route, the rule we're adding will never be used for normal routing lookups. However, the kernel
394// may fall-through to it to find directly-connected routes when it validates that a nexthop (in a
395// route being added) is reachable.
396WARN_UNUSED_RESULT int AddDirectlyConnectedRule() {
397    Fwmark fwmark;
398    Fwmark mask;
399
400    fwmark.netId = NETID_UNSET;
401    mask.netId = FWMARK_NET_ID_MASK;
402
403    return modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_DIRECTLY_CONNECTED, RT_TABLE_MAIN,
404                        fwmark.intValue, mask.intValue, OIF_NONE, UID_ROOT, UID_ROOT);
405}
406
407// Add a rule to preempt the pre-defined "from all lookup main" rule. Packets that reach this rule
408// will be null-routed, and won't fall-through to the main table.
409WARN_UNUSED_RESULT int AddUnreachableRule() {
410    return modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_UNREACHABLE, RT_TABLE_UNSPEC, MARK_UNSET,
411                        MARK_UNSET, OIF_NONE, INVALID_UID, INVALID_UID);
412}
413
414// An iptables rule to mark incoming packets on a network with the netId of the network.
415//
416// This is so that the kernel can:
417// + Use the right fwmark for (and thus correctly route) replies (e.g.: TCP RST, ICMP errors, ping
418//   replies, SYN-ACKs, etc).
419// + Mark sockets that accept connections from this interface so that the connection stays on the
420//   same interface.
421WARN_UNUSED_RESULT int modifyIncomingPacketMark(unsigned netId, const char* interface,
422                                                Permission permission, bool add) {
423    Fwmark fwmark;
424
425    fwmark.netId = netId;
426    fwmark.explicitlySelected = true;
427    fwmark.protectedFromVpn = true;
428    fwmark.permission = permission;
429
430    char markString[UINT32_HEX_STRLEN];
431    snprintf(markString, sizeof(markString), "0x%x", fwmark.intValue);
432
433    if (execIptables(V4V6, "-t", "mangle", add ? "-A" : "-D", "INPUT", "-i", interface, "-j",
434                     "MARK", "--set-mark", markString, NULL)) {
435        ALOGE("failed to change iptables rule that sets incoming packet mark");
436        return -EREMOTEIO;
437    }
438
439    return 0;
440}
441
442// A rule to route traffic based on an explicitly chosen network.
443//
444// Supports apps that use the multinetwork APIs to restrict their traffic to a network.
445//
446// Even though we check permissions at the time we set a netId into the fwmark of a socket, we need
447// to check it again in the rules here, because a network's permissions may have been updated via
448// modifyNetworkPermission().
449WARN_UNUSED_RESULT int modifyExplicitNetworkRule(unsigned netId, uint32_t table,
450                                                 Permission permission, uid_t uidStart,
451                                                 uid_t uidEnd, bool add) {
452    Fwmark fwmark;
453    Fwmark mask;
454
455    fwmark.netId = netId;
456    mask.netId = FWMARK_NET_ID_MASK;
457
458    fwmark.explicitlySelected = true;
459    mask.explicitlySelected = true;
460
461    fwmark.permission = permission;
462    mask.permission = permission;
463
464    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_EXPLICIT_NETWORK, table,
465                        fwmark.intValue, mask.intValue, OIF_NONE, uidStart, uidEnd);
466}
467
468// A rule to route traffic based on a chosen outgoing interface.
469//
470// Supports apps that use SO_BINDTODEVICE or IP_PKTINFO options and the kernel that already knows
471// the outgoing interface (typically for link-local communications).
472WARN_UNUSED_RESULT int modifyOutputInterfaceRule(const char* interface, uint32_t table,
473                                                 Permission permission, uid_t uidStart,
474                                                 uid_t uidEnd, bool add) {
475    Fwmark fwmark;
476    Fwmark mask;
477
478    fwmark.permission = permission;
479    mask.permission = permission;
480
481    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_OUTPUT_INTERFACE, table,
482                        fwmark.intValue, mask.intValue, interface, uidStart, uidEnd);
483}
484
485// A rule to route traffic based on the chosen network.
486//
487// This is for sockets that have not explicitly requested a particular network, but have been
488// bound to one when they called connect(). This ensures that sockets connected on a particular
489// network stay on that network even if the default network changes.
490WARN_UNUSED_RESULT int modifyImplicitNetworkRule(unsigned netId, uint32_t table,
491                                                 Permission permission, bool add) {
492    Fwmark fwmark;
493    Fwmark mask;
494
495    fwmark.netId = netId;
496    mask.netId = FWMARK_NET_ID_MASK;
497
498    fwmark.explicitlySelected = false;
499    mask.explicitlySelected = true;
500
501    fwmark.permission = permission;
502    mask.permission = permission;
503
504    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_IMPLICIT_NETWORK, table,
505                        fwmark.intValue, mask.intValue, OIF_NONE, INVALID_UID, INVALID_UID);
506}
507
508// A rule to route all traffic from a given set of UIDs to go over the VPN.
509//
510// Notice that this rule doesn't use the netId. I.e., no matter what netId the user's socket may
511// have, if they are subject to this VPN, their traffic has to go through it. Allows the traffic to
512// bypass the VPN if the protectedFromVpn bit is set.
513WARN_UNUSED_RESULT int modifyVpnUidRangeRule(uint32_t table, uid_t uidStart, uid_t uidEnd,
514                                             bool add) {
515    Fwmark fwmark;
516    Fwmark mask;
517
518    fwmark.protectedFromVpn = false;
519    mask.protectedFromVpn = true;
520
521    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_SECURE_VPN, table,
522                        fwmark.intValue, mask.intValue, OIF_NONE, uidStart, uidEnd);
523}
524
525// A rule to allow system apps to send traffic over this VPN even if they are not part of the target
526// set of UIDs.
527//
528// This is needed for DnsProxyListener to correctly resolve a request for a user who is in the
529// target set, but where the DnsProxyListener itself is not.
530WARN_UNUSED_RESULT int modifyVpnSystemPermissionRule(unsigned netId, uint32_t table, bool add) {
531    Fwmark fwmark;
532    Fwmark mask;
533
534    fwmark.netId = netId;
535    mask.netId = FWMARK_NET_ID_MASK;
536
537    fwmark.permission = PERMISSION_SYSTEM;
538    mask.permission = PERMISSION_SYSTEM;
539
540    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_SECURE_VPN, table,
541                        fwmark.intValue, mask.intValue, OIF_NONE, INVALID_UID, INVALID_UID);
542}
543
544WARN_UNUSED_RESULT int modifyPhysicalNetwork(unsigned netId, const char* interface,
545                                             Permission permission, bool add) {
546    uint32_t table = getRouteTableForInterface(interface);
547    if (table == RT_TABLE_UNSPEC) {
548        return -ESRCH;
549    }
550
551    if (int ret = modifyIncomingPacketMark(netId, interface, permission, add)) {
552        return ret;
553    }
554    if (int ret = modifyExplicitNetworkRule(netId, table, permission, INVALID_UID, INVALID_UID,
555                                            add)) {
556        return ret;
557    }
558    if (int ret = modifyOutputInterfaceRule(interface, table, permission, INVALID_UID, INVALID_UID,
559                                            add)) {
560        return ret;
561    }
562    return modifyImplicitNetworkRule(netId, table, permission, add);
563}
564
565WARN_UNUSED_RESULT int modifyVirtualNetwork(unsigned netId, const char* interface,
566                                            const UidRanges& uidRanges, bool add,
567                                            bool modifyNonUidBasedRules) {
568    uint32_t table = getRouteTableForInterface(interface);
569    if (table == RT_TABLE_UNSPEC) {
570        return -ESRCH;
571    }
572
573    for (const UidRanges::Range& range : uidRanges.getRanges()) {
574        if (int ret = modifyExplicitNetworkRule(netId, table, PERMISSION_NONE, range.first,
575                                                range.second, add)) {
576            return ret;
577        }
578        if (int ret = modifyOutputInterfaceRule(interface, table, PERMISSION_NONE, range.first,
579                                                range.second, add)) {
580            return ret;
581        }
582        if (int ret = modifyVpnUidRangeRule(table, range.first, range.second, add)) {
583            return ret;
584        }
585    }
586
587    if (modifyNonUidBasedRules) {
588        if (int ret = modifyIncomingPacketMark(netId, interface, PERMISSION_NONE, add)) {
589            return ret;
590        }
591        if (int ret = modifyExplicitNetworkRule(netId, table, PERMISSION_NONE, UID_ROOT, UID_ROOT,
592                                                add)) {
593            return ret;
594        }
595        return modifyVpnSystemPermissionRule(netId, table, add);
596    }
597
598    return 0;
599}
600
601WARN_UNUSED_RESULT int modifyDefaultNetwork(uint16_t action, const char* interface,
602                                            Permission permission) {
603    uint32_t table = getRouteTableForInterface(interface);
604    if (table == RT_TABLE_UNSPEC) {
605        return -ESRCH;
606    }
607
608    Fwmark fwmark;
609    Fwmark mask;
610
611    fwmark.netId = NETID_UNSET;
612    mask.netId = FWMARK_NET_ID_MASK;
613
614    fwmark.permission = permission;
615    mask.permission = permission;
616
617    return modifyIpRule(action, RULE_PRIORITY_DEFAULT_NETWORK, table, fwmark.intValue,
618                        mask.intValue, OIF_NONE, INVALID_UID, INVALID_UID);
619}
620
621// Adds or removes an IPv4 or IPv6 route to the specified table and, if it's a directly-connected
622// route, to the main table as well.
623// Returns 0 on success or negative errno on failure.
624WARN_UNUSED_RESULT int modifyRoute(uint16_t action, const char* interface, const char* destination,
625                                   const char* nexthop, RouteController::TableType tableType) {
626    uint32_t table;
627    switch (tableType) {
628        case RouteController::INTERFACE: {
629            table = getRouteTableForInterface(interface);
630            if (table == RT_TABLE_UNSPEC) {
631                return -ESRCH;
632            }
633            break;
634        }
635        case RouteController::LEGACY_NETWORK: {
636            table = ROUTE_TABLE_LEGACY_NETWORK;
637            break;
638        }
639        case RouteController::LEGACY_SYSTEM: {
640            table = ROUTE_TABLE_LEGACY_SYSTEM;
641            break;
642        }
643    }
644
645    int ret = modifyIpRoute(action, table, interface, destination, nexthop);
646    // We allow apps to call requestRouteToHost() multiple times with the same route, so ignore
647    // EEXIST failures when adding routes to legacy tables.
648    if (ret && !(action == RTM_NEWROUTE && ret == -EEXIST &&
649                 tableType != RouteController::INTERFACE)) {
650        return ret;
651    }
652
653    // If there's no nexthop, this is a directly connected route. Add it to the main table also, to
654    // let the kernel find it when validating nexthops when global routes are added.
655    if (!nexthop) {
656        ret = modifyIpRoute(action, RT_TABLE_MAIN, interface, destination, NULL);
657        // A failure with action == ADD && errno == EEXIST means that the route already exists in
658        // the main table, perhaps because the kernel added it automatically as part of adding the
659        // IP address to the interface. Ignore this, but complain about everything else.
660        if (ret && !(action == RTM_NEWROUTE && ret == -EEXIST)) {
661            return ret;
662        }
663    }
664
665    return 0;
666}
667
668// Returns 0 on success or negative errno on failure.
669WARN_UNUSED_RESULT int flushRoutes(const char* interface) {
670    uint32_t table = getRouteTableForInterface(interface);
671    if (table == RT_TABLE_UNSPEC) {
672        return -ESRCH;
673    }
674
675    char tableString[UINT32_STRLEN];
676    snprintf(tableString, sizeof(tableString), "%u", table);
677
678    for (size_t i = 0; i < ARRAY_SIZE(IP_VERSIONS); ++i) {
679        const char* argv[] = {
680            IP_PATH,
681            IP_VERSIONS[i],
682            "route",
683            "flush",
684            "table",
685            tableString,
686        };
687        if (android_fork_execvp(ARRAY_SIZE(argv), const_cast<char**>(argv), NULL, false, false)) {
688            ALOGE("failed to flush routes");
689            return -EREMOTEIO;
690        }
691    }
692
693    interfaceToTable.erase(interface);
694    return 0;
695}
696
697}  // namespace
698
699int RouteController::Init() {
700    if (int ret = AddDirectlyConnectedRule()) {
701        return ret;
702    }
703    if (int ret = AddLegacyRouteRules()) {
704        return ret;
705    }
706    // TODO: Enable once we are sure everything works.
707    if (false) {
708        if (int ret = AddUnreachableRule()) {
709            return ret;
710        }
711    }
712    updateTableNamesFile();
713    return 0;
714}
715
716int RouteController::addInterfaceToPhysicalNetwork(unsigned netId, const char* interface,
717                                                   Permission permission) {
718    if (int ret = modifyPhysicalNetwork(netId, interface, permission, ACTION_ADD)) {
719        return ret;
720    }
721    updateTableNamesFile();
722    return 0;
723}
724
725int RouteController::removeInterfaceFromPhysicalNetwork(unsigned netId, const char* interface,
726                                                        Permission permission) {
727    if (int ret = modifyPhysicalNetwork(netId, interface, permission, ACTION_DEL)) {
728        return ret;
729    }
730    if (int ret = flushRoutes(interface)) {
731        return ret;
732    }
733    updateTableNamesFile();
734    return 0;
735}
736
737int RouteController::addInterfaceToVirtualNetwork(unsigned netId, const char* interface,
738                                                  const UidRanges& uidRanges) {
739    if (int ret = modifyVirtualNetwork(netId, interface, uidRanges, ACTION_ADD,
740                                       MODIFY_NON_UID_BASED_RULES)) {
741        return ret;
742    }
743    updateTableNamesFile();
744    return 0;
745}
746
747int RouteController::removeInterfaceFromVirtualNetwork(unsigned netId, const char* interface,
748                                                       const UidRanges& uidRanges) {
749    if (int ret = modifyVirtualNetwork(netId, interface, uidRanges, ACTION_DEL,
750                                       MODIFY_NON_UID_BASED_RULES)) {
751        return ret;
752    }
753    if (int ret = flushRoutes(interface)) {
754        return ret;
755    }
756    updateTableNamesFile();
757    return 0;
758}
759
760int RouteController::modifyPhysicalNetworkPermission(unsigned netId, const char* interface,
761                                                     Permission oldPermission,
762                                                     Permission newPermission) {
763    // Add the new rules before deleting the old ones, to avoid race conditions.
764    if (int ret = modifyPhysicalNetwork(netId, interface, newPermission, ACTION_ADD)) {
765        return ret;
766    }
767    return modifyPhysicalNetwork(netId, interface, oldPermission, ACTION_DEL);
768}
769
770int RouteController::addUsersToVirtualNetwork(unsigned netId, const char* interface,
771                                              const UidRanges& uidRanges) {
772    return modifyVirtualNetwork(netId, interface, uidRanges, ACTION_ADD,
773                                !MODIFY_NON_UID_BASED_RULES);
774}
775
776int RouteController::removeUsersFromVirtualNetwork(unsigned netId, const char* interface,
777                                                   const UidRanges& uidRanges) {
778    return modifyVirtualNetwork(netId, interface, uidRanges, ACTION_DEL,
779                                !MODIFY_NON_UID_BASED_RULES);
780}
781
782int RouteController::addInterfaceToDefaultNetwork(const char* interface, Permission permission) {
783    return modifyDefaultNetwork(RTM_NEWRULE, interface, permission);
784}
785
786int RouteController::removeInterfaceFromDefaultNetwork(const char* interface,
787                                                       Permission permission) {
788    return modifyDefaultNetwork(RTM_DELRULE, interface, permission);
789}
790
791int RouteController::addRoute(const char* interface, const char* destination, const char* nexthop,
792                              TableType tableType) {
793    return modifyRoute(RTM_NEWROUTE, interface, destination, nexthop, tableType);
794}
795
796int RouteController::removeRoute(const char* interface, const char* destination,
797                                 const char* nexthop, TableType tableType) {
798    return modifyRoute(RTM_DELROUTE, interface, destination, nexthop, tableType);
799}
800