RouteController.cpp revision 57947f02c00bb03651e3f9427c880211c689db7f
1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "RouteController.h"
18
19#include "Fwmark.h"
20#include "UidRanges.h"
21#include "DummyNetwork.h"
22
23#define LOG_TAG "Netd"
24#include "log/log.h"
25#include "logwrap/logwrap.h"
26#include "netutils/ifc.h"
27#include "resolv_netid.h"
28
29#include <arpa/inet.h>
30#include <fcntl.h>
31#include <linux/fib_rules.h>
32#include <map>
33#include <net/if.h>
34#include <sys/stat.h>
35
36namespace {
37
38// BEGIN CONSTANTS --------------------------------------------------------------------------------
39
40const uint32_t RULE_PRIORITY_VPN_OVERRIDE_SYSTEM = 10000;
41const uint32_t RULE_PRIORITY_VPN_OVERRIDE_OIF    = 10500;
42const uint32_t RULE_PRIORITY_VPN_OUTPUT_TO_LOCAL = 11000;
43const uint32_t RULE_PRIORITY_SECURE_VPN          = 12000;
44const uint32_t RULE_PRIORITY_EXPLICIT_NETWORK    = 13000;
45const uint32_t RULE_PRIORITY_OUTPUT_INTERFACE    = 14000;
46const uint32_t RULE_PRIORITY_LEGACY_SYSTEM       = 15000;
47const uint32_t RULE_PRIORITY_LEGACY_NETWORK      = 16000;
48const uint32_t RULE_PRIORITY_LOCAL_NETWORK       = 17000;
49const uint32_t RULE_PRIORITY_TETHERING           = 18000;
50const uint32_t RULE_PRIORITY_IMPLICIT_NETWORK    = 19000;
51const uint32_t RULE_PRIORITY_BYPASSABLE_VPN      = 20000;
52const uint32_t RULE_PRIORITY_VPN_FALLTHROUGH     = 21000;
53const uint32_t RULE_PRIORITY_DEFAULT_NETWORK     = 22000;
54const uint32_t RULE_PRIORITY_DIRECTLY_CONNECTED  = 23000;
55const uint32_t RULE_PRIORITY_UNREACHABLE         = 32000;
56
57const uint32_t ROUTE_TABLE_LOCAL_NETWORK  = 97;
58const uint32_t ROUTE_TABLE_LEGACY_NETWORK = 98;
59const uint32_t ROUTE_TABLE_LEGACY_SYSTEM  = 99;
60
61const char* const ROUTE_TABLE_NAME_LOCAL_NETWORK  = "local_network";
62const char* const ROUTE_TABLE_NAME_LEGACY_NETWORK = "legacy_network";
63const char* const ROUTE_TABLE_NAME_LEGACY_SYSTEM  = "legacy_system";
64
65const char* const ROUTE_TABLE_NAME_LOCAL = "local";
66const char* const ROUTE_TABLE_NAME_MAIN  = "main";
67
68// TODO: These values aren't defined by the Linux kernel, because our UID routing changes are not
69// upstream (yet?), so we can't just pick them up from kernel headers. When (if?) the changes make
70// it upstream, we'll remove this and rely on the kernel header values. For now, add a static assert
71// that will warn us if upstream has given these values some other meaning.
72const uint16_t FRA_UID_START = 18;
73const uint16_t FRA_UID_END   = 19;
74static_assert(FRA_UID_START > FRA_MAX,
75             "Android-specific FRA_UID_{START,END} values also assigned in Linux uapi. "
76             "Check that these values match what the kernel does and then update this assertion.");
77
78const uint16_t NETLINK_REQUEST_FLAGS = NLM_F_REQUEST | NLM_F_ACK;
79const uint16_t NETLINK_CREATE_REQUEST_FLAGS = NETLINK_REQUEST_FLAGS | NLM_F_CREATE | NLM_F_EXCL;
80
81const sockaddr_nl NETLINK_ADDRESS = {AF_NETLINK, 0, 0, 0};
82
83const uint8_t AF_FAMILIES[] = {AF_INET, AF_INET6};
84
85const char* const IP_VERSIONS[] = {"-4", "-6"};
86
87const uid_t UID_ROOT = 0;
88const char* const IIF_LOOPBACK = "lo";
89const char* const IIF_NONE = NULL;
90const char* const OIF_NONE = NULL;
91const bool ACTION_ADD = true;
92const bool ACTION_DEL = false;
93const bool MODIFY_NON_UID_BASED_RULES = true;
94
95const char* const RT_TABLES_PATH = "/data/misc/net/rt_tables";
96const int RT_TABLES_FLAGS = O_CREAT | O_TRUNC | O_WRONLY | O_NOFOLLOW | O_CLOEXEC;
97const mode_t RT_TABLES_MODE = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;  // mode 0644, rw-r--r--
98
99const unsigned ROUTE_FLUSH_ATTEMPTS = 2;
100
101// Avoids "non-constant-expression cannot be narrowed from type 'unsigned int' to 'unsigned short'"
102// warnings when using RTA_LENGTH(x) inside static initializers (even when x is already uint16_t).
103constexpr uint16_t U16_RTA_LENGTH(uint16_t x) {
104    return RTA_LENGTH(x);
105}
106
107// These are practically const, but can't be declared so, because they are used to initialize
108// non-const pointers ("void* iov_base") in iovec arrays.
109rtattr FRATTR_PRIORITY  = { U16_RTA_LENGTH(sizeof(uint32_t)), FRA_PRIORITY };
110rtattr FRATTR_TABLE     = { U16_RTA_LENGTH(sizeof(uint32_t)), FRA_TABLE };
111rtattr FRATTR_FWMARK    = { U16_RTA_LENGTH(sizeof(uint32_t)), FRA_FWMARK };
112rtattr FRATTR_FWMASK    = { U16_RTA_LENGTH(sizeof(uint32_t)), FRA_FWMASK };
113rtattr FRATTR_UID_START = { U16_RTA_LENGTH(sizeof(uid_t)),    FRA_UID_START };
114rtattr FRATTR_UID_END   = { U16_RTA_LENGTH(sizeof(uid_t)),    FRA_UID_END };
115
116rtattr RTATTR_TABLE     = { U16_RTA_LENGTH(sizeof(uint32_t)), RTA_TABLE };
117rtattr RTATTR_OIF       = { U16_RTA_LENGTH(sizeof(uint32_t)), RTA_OIF };
118
119uint8_t PADDING_BUFFER[RTA_ALIGNTO] = {0, 0, 0, 0};
120
121// END CONSTANTS ----------------------------------------------------------------------------------
122
123// No locks needed because RouteController is accessed only from one thread (in CommandListener).
124std::map<std::string, uint32_t> interfaceToTable;
125
126uint32_t getRouteTableForInterface(const char* interface) {
127    uint32_t index = if_nametoindex(interface);
128    if (index) {
129        index += RouteController::ROUTE_TABLE_OFFSET_FROM_INDEX;
130        interfaceToTable[interface] = index;
131        return index;
132    }
133    // If the interface goes away if_nametoindex() will return 0 but we still need to know
134    // the index so we can remove the rules and routes.
135    auto iter = interfaceToTable.find(interface);
136    if (iter == interfaceToTable.end()) {
137        ALOGE("cannot find interface %s", interface);
138        return RT_TABLE_UNSPEC;
139    }
140    return iter->second;
141}
142
143void addTableName(uint32_t table, const std::string& name, std::string* contents) {
144    char tableString[UINT32_STRLEN];
145    snprintf(tableString, sizeof(tableString), "%u", table);
146    *contents += tableString;
147    *contents += " ";
148    *contents += name;
149    *contents += "\n";
150}
151
152// Doesn't return success/failure as the file is optional; it's okay if we fail to update it.
153void updateTableNamesFile() {
154    std::string contents;
155
156    addTableName(RT_TABLE_LOCAL, ROUTE_TABLE_NAME_LOCAL, &contents);
157    addTableName(RT_TABLE_MAIN,  ROUTE_TABLE_NAME_MAIN,  &contents);
158
159    addTableName(ROUTE_TABLE_LOCAL_NETWORK,  ROUTE_TABLE_NAME_LOCAL_NETWORK,  &contents);
160    addTableName(ROUTE_TABLE_LEGACY_NETWORK, ROUTE_TABLE_NAME_LEGACY_NETWORK, &contents);
161    addTableName(ROUTE_TABLE_LEGACY_SYSTEM,  ROUTE_TABLE_NAME_LEGACY_SYSTEM,  &contents);
162
163    for (const auto& entry : interfaceToTable) {
164        addTableName(entry.second, entry.first, &contents);
165    }
166
167    int fd = open(RT_TABLES_PATH, RT_TABLES_FLAGS, RT_TABLES_MODE);
168    if (fd == -1) {
169        ALOGE("failed to create %s (%s)", RT_TABLES_PATH, strerror(errno));
170        return;
171    }
172    // File creation is affected by umask, so make sure the right mode bits are set.
173    if (fchmod(fd, RT_TABLES_MODE) == -1) {
174        ALOGE("failed to set mode 0%o on %s (%s)", RT_TABLES_MODE, RT_TABLES_PATH, strerror(errno));
175    }
176    ssize_t bytesWritten = write(fd, contents.data(), contents.size());
177    if (bytesWritten != static_cast<ssize_t>(contents.size())) {
178        ALOGE("failed to write to %s (%zd vs %zu bytes) (%s)", RT_TABLES_PATH, bytesWritten,
179              contents.size(), strerror(errno));
180    }
181    close(fd);
182}
183
184// Sends a netlink request and expects an ack.
185// |iov| is an array of struct iovec that contains the netlink message payload.
186// The netlink header is generated by this function based on |action| and |flags|.
187// Returns -errno if there was an error or if the kernel reported an error.
188WARN_UNUSED_RESULT int sendNetlinkRequest(uint16_t action, uint16_t flags, iovec* iov, int iovlen) {
189    nlmsghdr nlmsg = {
190        .nlmsg_type = action,
191        .nlmsg_flags = flags,
192    };
193    iov[0].iov_base = &nlmsg;
194    iov[0].iov_len = sizeof(nlmsg);
195    for (int i = 0; i < iovlen; ++i) {
196        nlmsg.nlmsg_len += iov[i].iov_len;
197    }
198
199    int ret;
200    struct {
201        nlmsghdr msg;
202        nlmsgerr err;
203    } response;
204
205    int sock = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE);
206    if (sock != -1 &&
207            connect(sock, reinterpret_cast<const sockaddr*>(&NETLINK_ADDRESS),
208                    sizeof(NETLINK_ADDRESS)) != -1 &&
209            writev(sock, iov, iovlen) != -1 &&
210            (ret = recv(sock, &response, sizeof(response), 0)) != -1) {
211        if (ret == sizeof(response)) {
212            ret = response.err.error;  // Netlink errors are negative errno.
213            if (ret) {
214                ALOGE("netlink response contains error (%s)", strerror(-ret));
215            }
216        } else {
217            ALOGE("bad netlink response message size (%d != %zu)", ret, sizeof(response));
218            ret = -EBADMSG;
219        }
220    } else {
221        ALOGE("netlink socket/connect/writev/recv failed (%s)", strerror(errno));
222        ret = -errno;
223    }
224
225    if (sock != -1) {
226        close(sock);
227    }
228
229    return ret;
230}
231
232// Returns 0 on success or negative errno on failure.
233int padInterfaceName(const char* input, char* name, size_t* length, uint16_t* padding) {
234    if (!input) {
235        *length = 0;
236        *padding = 0;
237        return 0;
238    }
239    *length = strlcpy(name, input, IFNAMSIZ) + 1;
240    if (*length > IFNAMSIZ) {
241        ALOGE("interface name too long (%zu > %u)", *length, IFNAMSIZ);
242        return -ENAMETOOLONG;
243    }
244    *padding = RTA_SPACE(*length) - RTA_LENGTH(*length);
245    return 0;
246}
247
248// Adds or removes a routing rule for IPv4 and IPv6.
249//
250// + If |table| is non-zero, the rule points at the specified routing table. Otherwise, the rule
251//   returns ENETUNREACH.
252// + If |mask| is non-zero, the rule matches the specified fwmark and mask. Otherwise, |fwmark| is
253//   ignored.
254// + If |iif| is non-NULL, the rule matches the specified incoming interface.
255// + If |oif| is non-NULL, the rule matches the specified outgoing interface.
256// + If |uidStart| and |uidEnd| are not INVALID_UID, the rule matches packets from UIDs in that
257//   range (inclusive). Otherwise, the rule matches packets from all UIDs.
258//
259// Returns 0 on success or negative errno on failure.
260WARN_UNUSED_RESULT int modifyIpRule(uint16_t action, uint32_t priority, uint32_t table,
261                                    uint32_t fwmark, uint32_t mask, const char* iif,
262                                    const char* oif, uid_t uidStart, uid_t uidEnd) {
263    // Ensure that if you set a bit in the fwmark, it's not being ignored by the mask.
264    if (fwmark & ~mask) {
265        ALOGE("mask 0x%x does not select all the bits set in fwmark 0x%x", mask, fwmark);
266        return -ERANGE;
267    }
268
269    // Interface names must include exactly one terminating NULL and be properly padded, or older
270    // kernels will refuse to delete rules.
271    char iifName[IFNAMSIZ], oifName[IFNAMSIZ];
272    size_t iifLength, oifLength;
273    uint16_t iifPadding, oifPadding;
274    if (int ret = padInterfaceName(iif, iifName, &iifLength, &iifPadding)) {
275        return ret;
276    }
277    if (int ret = padInterfaceName(oif, oifName, &oifLength, &oifPadding)) {
278        return ret;
279    }
280
281    // Either both start and end UID must be specified, or neither.
282    if ((uidStart == INVALID_UID) != (uidEnd == INVALID_UID)) {
283        ALOGE("incompatible start and end UIDs (%u vs %u)", uidStart, uidEnd);
284        return -EUSERS;
285    }
286    bool isUidRule = (uidStart != INVALID_UID);
287
288    // Assemble a rule request and put it in an array of iovec structures.
289    fib_rule_hdr rule = {
290        .action = static_cast<uint8_t>(table != RT_TABLE_UNSPEC ? FR_ACT_TO_TBL :
291                                                                  FR_ACT_UNREACHABLE),
292    };
293
294    rtattr fraIifName = { U16_RTA_LENGTH(iifLength), FRA_IIFNAME };
295    rtattr fraOifName = { U16_RTA_LENGTH(oifLength), FRA_OIFNAME };
296
297    iovec iov[] = {
298        { NULL,              0 },
299        { &rule,             sizeof(rule) },
300        { &FRATTR_PRIORITY,  sizeof(FRATTR_PRIORITY) },
301        { &priority,         sizeof(priority) },
302        { &FRATTR_TABLE,     table != RT_TABLE_UNSPEC ? sizeof(FRATTR_TABLE) : 0 },
303        { &table,            table != RT_TABLE_UNSPEC ? sizeof(table) : 0 },
304        { &FRATTR_FWMARK,    mask ? sizeof(FRATTR_FWMARK) : 0 },
305        { &fwmark,           mask ? sizeof(fwmark) : 0 },
306        { &FRATTR_FWMASK,    mask ? sizeof(FRATTR_FWMASK) : 0 },
307        { &mask,             mask ? sizeof(mask) : 0 },
308        { &FRATTR_UID_START, isUidRule ? sizeof(FRATTR_UID_START) : 0 },
309        { &uidStart,         isUidRule ? sizeof(uidStart) : 0 },
310        { &FRATTR_UID_END,   isUidRule ? sizeof(FRATTR_UID_END) : 0 },
311        { &uidEnd,           isUidRule ? sizeof(uidEnd) : 0 },
312        { &fraIifName,       iif != IIF_NONE ? sizeof(fraIifName) : 0 },
313        { iifName,           iifLength },
314        { PADDING_BUFFER,    iifPadding },
315        { &fraOifName,       oif != OIF_NONE ? sizeof(fraOifName) : 0 },
316        { oifName,           oifLength },
317        { PADDING_BUFFER,    oifPadding },
318    };
319
320    uint16_t flags = (action == RTM_NEWRULE) ? NETLINK_CREATE_REQUEST_FLAGS : NETLINK_REQUEST_FLAGS;
321    for (size_t i = 0; i < ARRAY_SIZE(AF_FAMILIES); ++i) {
322        rule.family = AF_FAMILIES[i];
323        if (int ret = sendNetlinkRequest(action, flags, iov, ARRAY_SIZE(iov))) {
324            return ret;
325        }
326    }
327
328    return 0;
329}
330
331WARN_UNUSED_RESULT int modifyIpRule(uint16_t action, uint32_t priority, uint32_t table,
332                                    uint32_t fwmark, uint32_t mask) {
333    return modifyIpRule(action, priority, table, fwmark, mask, IIF_NONE, OIF_NONE, INVALID_UID,
334                        INVALID_UID);
335}
336
337// Adds or deletes an IPv4 or IPv6 route.
338// Returns 0 on success or negative errno on failure.
339WARN_UNUSED_RESULT int modifyIpRoute(uint16_t action, uint32_t table, const char* interface,
340                                     const char* destination, const char* nexthop) {
341    // At least the destination must be non-null.
342    if (!destination) {
343        ALOGE("null destination");
344        return -EFAULT;
345    }
346
347    // Parse the prefix.
348    uint8_t rawAddress[sizeof(in6_addr)];
349    uint8_t family;
350    uint8_t prefixLength;
351    int rawLength = parsePrefix(destination, &family, rawAddress, sizeof(rawAddress),
352                                &prefixLength);
353    if (rawLength < 0) {
354        ALOGE("parsePrefix failed for destination %s (%s)", destination, strerror(-rawLength));
355        return rawLength;
356    }
357
358    if (static_cast<size_t>(rawLength) > sizeof(rawAddress)) {
359        ALOGE("impossible! address too long (%d vs %zu)", rawLength, sizeof(rawAddress));
360        return -ENOBUFS;  // Cannot happen; parsePrefix only supports IPv4 and IPv6.
361    }
362
363    uint8_t type = RTN_UNICAST;
364    uint32_t ifindex;
365    uint8_t rawNexthop[sizeof(in6_addr)];
366
367    if (nexthop && !strcmp(nexthop, "unreachable")) {
368        type = RTN_UNREACHABLE;
369        // 'interface' is likely non-NULL, as the caller (modifyRoute()) likely used it to lookup
370        // the table number. But it's an error to specify an interface ("dev ...") or a nexthop for
371        // unreachable routes, so nuke them. (IPv6 allows them to be specified; IPv4 doesn't.)
372        interface = OIF_NONE;
373        nexthop = NULL;
374    } else if (nexthop && !strcmp(nexthop, "throw")) {
375        type = RTN_THROW;
376        interface = OIF_NONE;
377        nexthop = NULL;
378    } else {
379        // If an interface was specified, find the ifindex.
380        if (interface != OIF_NONE) {
381            ifindex = if_nametoindex(interface);
382            if (!ifindex) {
383                ALOGE("cannot find interface %s", interface);
384                return -ENODEV;
385            }
386        }
387
388        // If a nexthop was specified, parse it as the same family as the prefix.
389        if (nexthop && inet_pton(family, nexthop, rawNexthop) <= 0) {
390            ALOGE("inet_pton failed for nexthop %s", nexthop);
391            return -EINVAL;
392        }
393    }
394
395    // Assemble a rtmsg and put it in an array of iovec structures.
396    rtmsg route = {
397        .rtm_protocol = RTPROT_STATIC,
398        .rtm_type = type,
399        .rtm_family = family,
400        .rtm_dst_len = prefixLength,
401        .rtm_scope = static_cast<uint8_t>(nexthop ? RT_SCOPE_UNIVERSE : RT_SCOPE_LINK),
402    };
403
404    rtattr rtaDst     = { U16_RTA_LENGTH(rawLength), RTA_DST };
405    rtattr rtaGateway = { U16_RTA_LENGTH(rawLength), RTA_GATEWAY };
406
407    iovec iov[] = {
408        { NULL,          0 },
409        { &route,        sizeof(route) },
410        { &RTATTR_TABLE, sizeof(RTATTR_TABLE) },
411        { &table,        sizeof(table) },
412        { &rtaDst,       sizeof(rtaDst) },
413        { rawAddress,    static_cast<size_t>(rawLength) },
414        { &RTATTR_OIF,   interface != OIF_NONE ? sizeof(RTATTR_OIF) : 0 },
415        { &ifindex,      interface != OIF_NONE ? sizeof(ifindex) : 0 },
416        { &rtaGateway,   nexthop ? sizeof(rtaGateway) : 0 },
417        { rawNexthop,    nexthop ? static_cast<size_t>(rawLength) : 0 },
418    };
419
420    uint16_t flags = (action == RTM_NEWROUTE) ? NETLINK_CREATE_REQUEST_FLAGS :
421                                                NETLINK_REQUEST_FLAGS;
422    return sendNetlinkRequest(action, flags, iov, ARRAY_SIZE(iov));
423}
424
425// An iptables rule to mark incoming packets on a network with the netId of the network.
426//
427// This is so that the kernel can:
428// + Use the right fwmark for (and thus correctly route) replies (e.g.: TCP RST, ICMP errors, ping
429//   replies, SYN-ACKs, etc).
430// + Mark sockets that accept connections from this interface so that the connection stays on the
431//   same interface.
432WARN_UNUSED_RESULT int modifyIncomingPacketMark(unsigned netId, const char* interface,
433                                                Permission permission, bool add) {
434    Fwmark fwmark;
435
436    fwmark.netId = netId;
437    fwmark.explicitlySelected = true;
438    fwmark.protectedFromVpn = true;
439    fwmark.permission = permission;
440
441    char markString[UINT32_HEX_STRLEN];
442    snprintf(markString, sizeof(markString), "0x%x", fwmark.intValue);
443
444    if (execIptables(V4V6, "-t", "mangle", add ? "-A" : "-D", "INPUT", "-i", interface, "-j",
445                     "MARK", "--set-mark", markString, NULL)) {
446        ALOGE("failed to change iptables rule that sets incoming packet mark");
447        return -EREMOTEIO;
448    }
449
450    return 0;
451}
452
453// A rule to route responses to the local network forwarded via the VPN.
454//
455// When a VPN is in effect, packets from the local network to upstream networks are forwarded into
456// the VPN's tunnel interface. When the VPN forwards the responses, they emerge out of the tunnel.
457WARN_UNUSED_RESULT int modifyVpnOutputToLocalRule(const char* vpnInterface, bool add) {
458    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_VPN_OUTPUT_TO_LOCAL,
459                        ROUTE_TABLE_LOCAL_NETWORK, MARK_UNSET, MARK_UNSET, vpnInterface, OIF_NONE,
460                        INVALID_UID, INVALID_UID);
461}
462
463// A rule to route all traffic from a given set of UIDs to go over the VPN.
464//
465// Notice that this rule doesn't use the netId. I.e., no matter what netId the user's socket may
466// have, if they are subject to this VPN, their traffic has to go through it. Allows the traffic to
467// bypass the VPN if the protectedFromVpn bit is set.
468WARN_UNUSED_RESULT int modifyVpnUidRangeRule(uint32_t table, uid_t uidStart, uid_t uidEnd,
469                                             bool secure, bool add) {
470    Fwmark fwmark;
471    Fwmark mask;
472
473    fwmark.protectedFromVpn = false;
474    mask.protectedFromVpn = true;
475
476    uint32_t priority;
477
478    if (secure) {
479        priority = RULE_PRIORITY_SECURE_VPN;
480    } else {
481        priority = RULE_PRIORITY_BYPASSABLE_VPN;
482
483        fwmark.explicitlySelected = false;
484        mask.explicitlySelected = true;
485    }
486
487    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, priority, table, fwmark.intValue,
488                        mask.intValue, IIF_LOOPBACK, OIF_NONE, uidStart, uidEnd);
489}
490
491// A rule to allow system apps to send traffic over this VPN even if they are not part of the target
492// set of UIDs.
493//
494// This is needed for DnsProxyListener to correctly resolve a request for a user who is in the
495// target set, but where the DnsProxyListener itself is not.
496WARN_UNUSED_RESULT int modifyVpnSystemPermissionRule(unsigned netId, uint32_t table, bool secure,
497                                                     bool add) {
498    Fwmark fwmark;
499    Fwmark mask;
500
501    fwmark.netId = netId;
502    mask.netId = FWMARK_NET_ID_MASK;
503
504    fwmark.permission = PERMISSION_SYSTEM;
505    mask.permission = PERMISSION_SYSTEM;
506
507    uint32_t priority = secure ? RULE_PRIORITY_SECURE_VPN : RULE_PRIORITY_BYPASSABLE_VPN;
508
509    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, priority, table, fwmark.intValue,
510                        mask.intValue);
511}
512
513// A rule to route traffic based on an explicitly chosen network.
514//
515// Supports apps that use the multinetwork APIs to restrict their traffic to a network.
516//
517// Even though we check permissions at the time we set a netId into the fwmark of a socket, we need
518// to check it again in the rules here, because a network's permissions may have been updated via
519// modifyNetworkPermission().
520WARN_UNUSED_RESULT int modifyExplicitNetworkRule(unsigned netId, uint32_t table,
521                                                 Permission permission, uid_t uidStart,
522                                                 uid_t uidEnd, bool add) {
523    Fwmark fwmark;
524    Fwmark mask;
525
526    fwmark.netId = netId;
527    mask.netId = FWMARK_NET_ID_MASK;
528
529    fwmark.explicitlySelected = true;
530    mask.explicitlySelected = true;
531
532    fwmark.permission = permission;
533    mask.permission = permission;
534
535    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_EXPLICIT_NETWORK, table,
536                        fwmark.intValue, mask.intValue, IIF_NONE, OIF_NONE, uidStart, uidEnd);
537}
538
539// A rule to route traffic based on a chosen outgoing interface.
540//
541// Supports apps that use SO_BINDTODEVICE or IP_PKTINFO options and the kernel that already knows
542// the outgoing interface (typically for link-local communications).
543WARN_UNUSED_RESULT int modifyOutputInterfaceRules(const char* interface, uint32_t table,
544                                                  Permission permission, uid_t uidStart,
545                                                  uid_t uidEnd, bool add) {
546    Fwmark fwmark;
547    Fwmark mask;
548
549    fwmark.permission = permission;
550    mask.permission = permission;
551
552    // If this rule does not specify a UID range, then also add a corresponding high-priority rule
553    // for UID. This covers forwarded packets and system daemons such as the tethering DHCP server.
554    if (uidStart == INVALID_UID && uidEnd == INVALID_UID) {
555        if (int ret = modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_VPN_OVERRIDE_OIF,
556                                   table, fwmark.intValue, mask.intValue, IIF_NONE, interface,
557                                   UID_ROOT, UID_ROOT)) {
558            return ret;
559        }
560    }
561
562    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_OUTPUT_INTERFACE, table,
563                        fwmark.intValue, mask.intValue, IIF_NONE, interface, uidStart, uidEnd);
564}
565
566// A rule to route traffic based on the chosen network.
567//
568// This is for sockets that have not explicitly requested a particular network, but have been
569// bound to one when they called connect(). This ensures that sockets connected on a particular
570// network stay on that network even if the default network changes.
571WARN_UNUSED_RESULT int modifyImplicitNetworkRule(unsigned netId, uint32_t table,
572                                                 Permission permission, bool add) {
573    Fwmark fwmark;
574    Fwmark mask;
575
576    fwmark.netId = netId;
577    mask.netId = FWMARK_NET_ID_MASK;
578
579    fwmark.explicitlySelected = false;
580    mask.explicitlySelected = true;
581
582    fwmark.permission = permission;
583    mask.permission = permission;
584
585    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_IMPLICIT_NETWORK, table,
586                        fwmark.intValue, mask.intValue);
587}
588
589// A rule to enable split tunnel VPNs.
590//
591// If a packet with a VPN's netId doesn't find a route in the VPN's routing table, it's allowed to
592// go over the default network, provided it wasn't explicitly restricted to the VPN and has the
593// permissions required by the default network.
594WARN_UNUSED_RESULT int modifyVpnFallthroughRule(uint16_t action, unsigned vpnNetId,
595                                                const char* physicalInterface,
596                                                Permission permission) {
597    uint32_t table = getRouteTableForInterface(physicalInterface);
598    if (table == RT_TABLE_UNSPEC) {
599        return -ESRCH;
600    }
601
602    Fwmark fwmark;
603    Fwmark mask;
604
605    fwmark.netId = vpnNetId;
606    mask.netId = FWMARK_NET_ID_MASK;
607
608    fwmark.explicitlySelected = false;
609    mask.explicitlySelected = true;
610
611    fwmark.permission = permission;
612    mask.permission = permission;
613
614    return modifyIpRule(action, RULE_PRIORITY_VPN_FALLTHROUGH, table, fwmark.intValue,
615                        mask.intValue);
616}
617
618// Add rules to allow legacy routes added through the requestRouteToHost() API.
619WARN_UNUSED_RESULT int addLegacyRouteRules() {
620    Fwmark fwmark;
621    Fwmark mask;
622
623    fwmark.explicitlySelected = false;
624    mask.explicitlySelected = true;
625
626    // Rules to allow legacy routes to override the default network.
627    if (int ret = modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_LEGACY_SYSTEM, ROUTE_TABLE_LEGACY_SYSTEM,
628                               fwmark.intValue, mask.intValue)) {
629        return ret;
630    }
631    if (int ret = modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_LEGACY_NETWORK,
632                               ROUTE_TABLE_LEGACY_NETWORK, fwmark.intValue, mask.intValue)) {
633        return ret;
634    }
635
636    fwmark.permission = PERMISSION_SYSTEM;
637    mask.permission = PERMISSION_SYSTEM;
638
639    // A rule to allow legacy routes from system apps to override VPNs.
640    return modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_VPN_OVERRIDE_SYSTEM, ROUTE_TABLE_LEGACY_SYSTEM,
641                        fwmark.intValue, mask.intValue);
642}
643
644// Add rules to lookup the local network when specified explicitly or otherwise.
645WARN_UNUSED_RESULT int addLocalNetworkRules(unsigned localNetId) {
646    if (int ret = modifyExplicitNetworkRule(localNetId, ROUTE_TABLE_LOCAL_NETWORK, PERMISSION_NONE,
647                                            INVALID_UID, INVALID_UID, ACTION_ADD)) {
648        return ret;
649    }
650
651    Fwmark fwmark;
652    Fwmark mask;
653
654    fwmark.explicitlySelected = false;
655    mask.explicitlySelected = true;
656
657    return modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_LOCAL_NETWORK, ROUTE_TABLE_LOCAL_NETWORK,
658                        fwmark.intValue, mask.intValue);
659}
660
661int configureDummyNetwork() {
662    const char *interface = DummyNetwork::INTERFACE_NAME;
663    uint32_t table = getRouteTableForInterface(interface);
664    if (table == RT_TABLE_UNSPEC) {
665        // getRouteTableForInterface has already looged an error.
666        return -ESRCH;
667    }
668
669    ifc_init();
670    int ret = ifc_up(interface);
671    ifc_close();
672    if (ret) {
673        ALOGE("Can't bring up %s: %s", interface, strerror(errno));
674        return -errno;
675    }
676
677    if ((ret = modifyOutputInterfaceRules(interface, table, PERMISSION_NONE,
678                                          INVALID_UID, INVALID_UID, ACTION_ADD))) {
679        ALOGE("Can't create oif rules for %s: %s", interface, strerror(-ret));
680        return ret;
681    }
682
683    if ((ret = modifyIpRoute(RTM_NEWROUTE, table, interface, "0.0.0.0/0", NULL))) {
684        ALOGE("Can't add IPv4 default route to %s: %s", interface, strerror(-ret));
685        return ret;
686    }
687
688    if ((ret = modifyIpRoute(RTM_NEWROUTE, table, interface, "::/0", NULL))) {
689        ALOGE("Can't add IPv6 default route to %s: %s", interface, strerror(-ret));
690        return ret;
691    }
692
693    return 0;
694}
695
696// Add a new rule to look up the 'main' table, with the same selectors as the "default network"
697// rule, but with a lower priority. We will never create routes in the main table; it should only be
698// used for directly-connected routes implicitly created by the kernel when adding IP addresses.
699// This is necessary, for example, when adding a route through a directly-connected gateway: in
700// order to add the route, there must already be a directly-connected route that covers the gateway.
701WARN_UNUSED_RESULT int addDirectlyConnectedRule() {
702    Fwmark fwmark;
703    Fwmark mask;
704
705    fwmark.netId = NETID_UNSET;
706    mask.netId = FWMARK_NET_ID_MASK;
707
708    return modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_DIRECTLY_CONNECTED, RT_TABLE_MAIN,
709                        fwmark.intValue, mask.intValue, IIF_NONE, OIF_NONE, UID_ROOT, UID_ROOT);
710}
711
712// Add an explicit unreachable rule close to the end of the prioriy list to make it clear that
713// relying on the kernel-default "from all lookup main" rule at priority 32766 is not intended
714// behaviour. We do flush the kernel-default rules at startup, but having an explicit unreachable
715// rule will hopefully make things even clearer.
716WARN_UNUSED_RESULT int addUnreachableRule() {
717    return modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_UNREACHABLE, RT_TABLE_UNSPEC, MARK_UNSET,
718                        MARK_UNSET);
719}
720
721WARN_UNUSED_RESULT int modifyLocalNetwork(unsigned netId, const char* interface, bool add) {
722    if (int ret = modifyIncomingPacketMark(netId, interface, PERMISSION_NONE, add)) {
723        return ret;
724    }
725    return modifyOutputInterfaceRules(interface, ROUTE_TABLE_LOCAL_NETWORK, PERMISSION_NONE,
726                                      INVALID_UID, INVALID_UID, add);
727}
728
729WARN_UNUSED_RESULT int modifyPhysicalNetwork(unsigned netId, const char* interface,
730                                             Permission permission, bool add) {
731    uint32_t table = getRouteTableForInterface(interface);
732    if (table == RT_TABLE_UNSPEC) {
733        return -ESRCH;
734    }
735
736    if (int ret = modifyIncomingPacketMark(netId, interface, permission, add)) {
737        return ret;
738    }
739    if (int ret = modifyExplicitNetworkRule(netId, table, permission, INVALID_UID, INVALID_UID,
740                                            add)) {
741        return ret;
742    }
743    if (int ret = modifyOutputInterfaceRules(interface, table, permission, INVALID_UID, INVALID_UID,
744                                            add)) {
745        return ret;
746    }
747    return modifyImplicitNetworkRule(netId, table, permission, add);
748}
749
750WARN_UNUSED_RESULT int modifyVirtualNetwork(unsigned netId, const char* interface,
751                                            const UidRanges& uidRanges, bool secure, bool add,
752                                            bool modifyNonUidBasedRules) {
753    uint32_t table = getRouteTableForInterface(interface);
754    if (table == RT_TABLE_UNSPEC) {
755        return -ESRCH;
756    }
757
758    for (const UidRanges::Range& range : uidRanges.getRanges()) {
759        if (int ret = modifyVpnUidRangeRule(table, range.first, range.second, secure, add)) {
760            return ret;
761        }
762        if (int ret = modifyExplicitNetworkRule(netId, table, PERMISSION_NONE, range.first,
763                                                range.second, add)) {
764            return ret;
765        }
766        if (int ret = modifyOutputInterfaceRules(interface, table, PERMISSION_NONE, range.first,
767                                                 range.second, add)) {
768            return ret;
769        }
770    }
771
772    if (modifyNonUidBasedRules) {
773        if (int ret = modifyIncomingPacketMark(netId, interface, PERMISSION_NONE, add)) {
774            return ret;
775        }
776        if (int ret = modifyVpnOutputToLocalRule(interface, add)) {
777            return ret;
778        }
779        if (int ret = modifyVpnSystemPermissionRule(netId, table, secure, add)) {
780            return ret;
781        }
782        return modifyExplicitNetworkRule(netId, table, PERMISSION_NONE, UID_ROOT, UID_ROOT, add);
783    }
784
785    return 0;
786}
787
788WARN_UNUSED_RESULT int modifyDefaultNetwork(uint16_t action, const char* interface,
789                                            Permission permission) {
790    uint32_t table = getRouteTableForInterface(interface);
791    if (table == RT_TABLE_UNSPEC) {
792        return -ESRCH;
793    }
794
795    Fwmark fwmark;
796    Fwmark mask;
797
798    fwmark.netId = NETID_UNSET;
799    mask.netId = FWMARK_NET_ID_MASK;
800
801    fwmark.permission = permission;
802    mask.permission = permission;
803
804    return modifyIpRule(action, RULE_PRIORITY_DEFAULT_NETWORK, table, fwmark.intValue,
805                        mask.intValue);
806}
807
808WARN_UNUSED_RESULT int modifyTetheredNetwork(uint16_t action, const char* inputInterface,
809                                             const char* outputInterface) {
810    uint32_t table = getRouteTableForInterface(outputInterface);
811    if (table == RT_TABLE_UNSPEC) {
812        return -ESRCH;
813    }
814
815    return modifyIpRule(action, RULE_PRIORITY_TETHERING, table, MARK_UNSET, MARK_UNSET,
816                        inputInterface, OIF_NONE, INVALID_UID, INVALID_UID);
817}
818
819// Returns 0 on success or negative errno on failure.
820WARN_UNUSED_RESULT int flushRules() {
821    for (size_t i = 0; i < ARRAY_SIZE(IP_VERSIONS); ++i) {
822        const char* argv[] = {
823            IP_PATH,
824            IP_VERSIONS[i],
825            "rule",
826            "flush",
827        };
828        if (android_fork_execvp(ARRAY_SIZE(argv), const_cast<char**>(argv), NULL, false, false)) {
829            ALOGE("failed to flush rules");
830            return -EREMOTEIO;
831        }
832    }
833    return 0;
834}
835
836// Adds or removes an IPv4 or IPv6 route to the specified table and, if it's a directly-connected
837// route, to the main table as well.
838// Returns 0 on success or negative errno on failure.
839WARN_UNUSED_RESULT int modifyRoute(uint16_t action, const char* interface, const char* destination,
840                                   const char* nexthop, RouteController::TableType tableType) {
841    uint32_t table;
842    switch (tableType) {
843        case RouteController::INTERFACE: {
844            table = getRouteTableForInterface(interface);
845            if (table == RT_TABLE_UNSPEC) {
846                return -ESRCH;
847            }
848            break;
849        }
850        case RouteController::LOCAL_NETWORK: {
851            table = ROUTE_TABLE_LOCAL_NETWORK;
852            break;
853        }
854        case RouteController::LEGACY_NETWORK: {
855            table = ROUTE_TABLE_LEGACY_NETWORK;
856            break;
857        }
858        case RouteController::LEGACY_SYSTEM: {
859            table = ROUTE_TABLE_LEGACY_SYSTEM;
860            break;
861        }
862    }
863
864    int ret = modifyIpRoute(action, table, interface, destination, nexthop);
865    // Trying to add a route that already exists shouldn't cause an error.
866    if (ret && !(action == RTM_NEWROUTE && ret == -EEXIST)) {
867        return ret;
868    }
869
870    return 0;
871}
872
873// Returns 0 on success or negative errno on failure.
874WARN_UNUSED_RESULT int flushRoutes(const char* interface) {
875    uint32_t table = getRouteTableForInterface(interface);
876    if (table == RT_TABLE_UNSPEC) {
877        return -ESRCH;
878    }
879
880    char tableString[UINT32_STRLEN];
881    snprintf(tableString, sizeof(tableString), "%u", table);
882
883    int ret = 0;
884    for (size_t i = 0; i < ARRAY_SIZE(IP_VERSIONS); ++i) {
885        const char* argv[] = {
886            IP_PATH,
887            IP_VERSIONS[i],
888            "route",
889            "flush",
890            "table",
891            tableString,
892        };
893
894        // A flush works by dumping routes and deleting each route as it's returned, and it can
895        // fail if something else deletes the route between the dump and the delete. This can
896        // happen, for example, if an interface goes down while we're trying to flush its routes.
897        // So try multiple times and only return an error if the last attempt fails.
898        //
899        // TODO: replace this with our own netlink code.
900        unsigned attempts = 0;
901        int err;
902        do {
903            err = android_fork_execvp(ARRAY_SIZE(argv), const_cast<char**>(argv),
904                                      NULL, false, false);
905            ++attempts;
906        } while (err != 0 && attempts < ROUTE_FLUSH_ATTEMPTS);
907        if (err) {
908            ALOGE("failed to flush %s routes in table %s after %d attempts",
909                  IP_VERSIONS[i], tableString, attempts);
910            ret = -EREMOTEIO;
911        }
912    }
913
914    // If we failed to flush routes, the caller may elect to keep this interface around, so keep
915    // track of its name.
916    if (!ret) {
917        interfaceToTable.erase(interface);
918    }
919
920    return ret;
921}
922
923}  // namespace
924
925int RouteController::Init(unsigned localNetId) {
926    if (int ret = flushRules()) {
927        return ret;
928    }
929    if (int ret = addLegacyRouteRules()) {
930        return ret;
931    }
932    if (int ret = addLocalNetworkRules(localNetId)) {
933        return ret;
934    }
935    if (int ret = addDirectlyConnectedRule()) {
936        return ret;
937    }
938    if (int ret = addUnreachableRule()) {
939        return ret;
940    }
941    // Don't complain if we can't add the dummy network, since not all devices support it.
942    configureDummyNetwork();
943
944    updateTableNamesFile();
945    return 0;
946}
947
948int RouteController::addInterfaceToLocalNetwork(unsigned netId, const char* interface) {
949    return modifyLocalNetwork(netId, interface, ACTION_ADD);
950}
951
952int RouteController::removeInterfaceFromLocalNetwork(unsigned netId, const char* interface) {
953    return modifyLocalNetwork(netId, interface, ACTION_DEL);
954}
955
956int RouteController::addInterfaceToPhysicalNetwork(unsigned netId, const char* interface,
957                                                   Permission permission) {
958    if (int ret = modifyPhysicalNetwork(netId, interface, permission, ACTION_ADD)) {
959        return ret;
960    }
961    updateTableNamesFile();
962    return 0;
963}
964
965int RouteController::removeInterfaceFromPhysicalNetwork(unsigned netId, const char* interface,
966                                                        Permission permission) {
967    if (int ret = modifyPhysicalNetwork(netId, interface, permission, ACTION_DEL)) {
968        return ret;
969    }
970    if (int ret = flushRoutes(interface)) {
971        return ret;
972    }
973    updateTableNamesFile();
974    return 0;
975}
976
977int RouteController::addInterfaceToVirtualNetwork(unsigned netId, const char* interface,
978                                                  bool secure, const UidRanges& uidRanges) {
979    if (int ret = modifyVirtualNetwork(netId, interface, uidRanges, secure, ACTION_ADD,
980                                       MODIFY_NON_UID_BASED_RULES)) {
981        return ret;
982    }
983    updateTableNamesFile();
984    return 0;
985}
986
987int RouteController::removeInterfaceFromVirtualNetwork(unsigned netId, const char* interface,
988                                                       bool secure, const UidRanges& uidRanges) {
989    if (int ret = modifyVirtualNetwork(netId, interface, uidRanges, secure, ACTION_DEL,
990                                       MODIFY_NON_UID_BASED_RULES)) {
991        return ret;
992    }
993    if (int ret = flushRoutes(interface)) {
994        return ret;
995    }
996    updateTableNamesFile();
997    return 0;
998}
999
1000int RouteController::modifyPhysicalNetworkPermission(unsigned netId, const char* interface,
1001                                                     Permission oldPermission,
1002                                                     Permission newPermission) {
1003    // Add the new rules before deleting the old ones, to avoid race conditions.
1004    if (int ret = modifyPhysicalNetwork(netId, interface, newPermission, ACTION_ADD)) {
1005        return ret;
1006    }
1007    return modifyPhysicalNetwork(netId, interface, oldPermission, ACTION_DEL);
1008}
1009
1010int RouteController::addUsersToVirtualNetwork(unsigned netId, const char* interface, bool secure,
1011                                              const UidRanges& uidRanges) {
1012    return modifyVirtualNetwork(netId, interface, uidRanges, secure, ACTION_ADD,
1013                                !MODIFY_NON_UID_BASED_RULES);
1014}
1015
1016int RouteController::removeUsersFromVirtualNetwork(unsigned netId, const char* interface,
1017                                                   bool secure, const UidRanges& uidRanges) {
1018    return modifyVirtualNetwork(netId, interface, uidRanges, secure, ACTION_DEL,
1019                                !MODIFY_NON_UID_BASED_RULES);
1020}
1021
1022int RouteController::addInterfaceToDefaultNetwork(const char* interface, Permission permission) {
1023    return modifyDefaultNetwork(RTM_NEWRULE, interface, permission);
1024}
1025
1026int RouteController::removeInterfaceFromDefaultNetwork(const char* interface,
1027                                                       Permission permission) {
1028    return modifyDefaultNetwork(RTM_DELRULE, interface, permission);
1029}
1030
1031int RouteController::addRoute(const char* interface, const char* destination, const char* nexthop,
1032                              TableType tableType) {
1033    return modifyRoute(RTM_NEWROUTE, interface, destination, nexthop, tableType);
1034}
1035
1036int RouteController::removeRoute(const char* interface, const char* destination,
1037                                 const char* nexthop, TableType tableType) {
1038    return modifyRoute(RTM_DELROUTE, interface, destination, nexthop, tableType);
1039}
1040
1041int RouteController::enableTethering(const char* inputInterface, const char* outputInterface) {
1042    return modifyTetheredNetwork(RTM_NEWRULE, inputInterface, outputInterface);
1043}
1044
1045int RouteController::disableTethering(const char* inputInterface, const char* outputInterface) {
1046    return modifyTetheredNetwork(RTM_DELRULE, inputInterface, outputInterface);
1047}
1048
1049int RouteController::addVirtualNetworkFallthrough(unsigned vpnNetId, const char* physicalInterface,
1050                                                  Permission permission) {
1051    return modifyVpnFallthroughRule(RTM_NEWRULE, vpnNetId, physicalInterface, permission);
1052}
1053
1054int RouteController::removeVirtualNetworkFallthrough(unsigned vpnNetId,
1055                                                     const char* physicalInterface,
1056                                                     Permission permission) {
1057    return modifyVpnFallthroughRule(RTM_DELRULE, vpnNetId, physicalInterface, permission);
1058}
1059