1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "RouteController.h"
18
19#include <arpa/inet.h>
20#include <errno.h>
21#include <fcntl.h>
22#include <linux/fib_rules.h>
23#include <net/if.h>
24#include <sys/stat.h>
25
26#include <private/android_filesystem_config.h>
27
28#include <map>
29
30#include "Fwmark.h"
31#include "UidRanges.h"
32#include "DummyNetwork.h"
33
34#include "base/file.h"
35#define LOG_TAG "Netd"
36#include "log/log.h"
37#include "logwrap/logwrap.h"
38#include "netutils/ifc.h"
39#include "resolv_netid.h"
40
41using android::base::WriteStringToFile;
42
43namespace {
44
45// BEGIN CONSTANTS --------------------------------------------------------------------------------
46
47const uint32_t RULE_PRIORITY_VPN_OVERRIDE_SYSTEM = 10000;
48const uint32_t RULE_PRIORITY_VPN_OVERRIDE_OIF    = 10500;
49const uint32_t RULE_PRIORITY_VPN_OUTPUT_TO_LOCAL = 11000;
50const uint32_t RULE_PRIORITY_SECURE_VPN          = 12000;
51const uint32_t RULE_PRIORITY_EXPLICIT_NETWORK    = 13000;
52const uint32_t RULE_PRIORITY_OUTPUT_INTERFACE    = 14000;
53const uint32_t RULE_PRIORITY_LEGACY_SYSTEM       = 15000;
54const uint32_t RULE_PRIORITY_LEGACY_NETWORK      = 16000;
55const uint32_t RULE_PRIORITY_LOCAL_NETWORK       = 17000;
56const uint32_t RULE_PRIORITY_TETHERING           = 18000;
57const uint32_t RULE_PRIORITY_IMPLICIT_NETWORK    = 19000;
58const uint32_t RULE_PRIORITY_BYPASSABLE_VPN      = 20000;
59const uint32_t RULE_PRIORITY_VPN_FALLTHROUGH     = 21000;
60const uint32_t RULE_PRIORITY_DEFAULT_NETWORK     = 22000;
61const uint32_t RULE_PRIORITY_DIRECTLY_CONNECTED  = 23000;
62const uint32_t RULE_PRIORITY_UNREACHABLE         = 32000;
63
64const uint32_t ROUTE_TABLE_LOCAL_NETWORK  = 97;
65const uint32_t ROUTE_TABLE_LEGACY_NETWORK = 98;
66const uint32_t ROUTE_TABLE_LEGACY_SYSTEM  = 99;
67
68const char* const ROUTE_TABLE_NAME_LOCAL_NETWORK  = "local_network";
69const char* const ROUTE_TABLE_NAME_LEGACY_NETWORK = "legacy_network";
70const char* const ROUTE_TABLE_NAME_LEGACY_SYSTEM  = "legacy_system";
71
72const char* const ROUTE_TABLE_NAME_LOCAL = "local";
73const char* const ROUTE_TABLE_NAME_MAIN  = "main";
74
75// TODO: These values aren't defined by the Linux kernel, because our UID routing changes are not
76// upstream (yet?), so we can't just pick them up from kernel headers. When (if?) the changes make
77// it upstream, we'll remove this and rely on the kernel header values. For now, add a static assert
78// that will warn us if upstream has given these values some other meaning.
79const uint16_t FRA_UID_START = 18;
80const uint16_t FRA_UID_END   = 19;
81static_assert(FRA_UID_START > FRA_MAX,
82             "Android-specific FRA_UID_{START,END} values also assigned in Linux uapi. "
83             "Check that these values match what the kernel does and then update this assertion.");
84
85const uint16_t NETLINK_REQUEST_FLAGS = NLM_F_REQUEST | NLM_F_ACK;
86const uint16_t NETLINK_CREATE_REQUEST_FLAGS = NETLINK_REQUEST_FLAGS | NLM_F_CREATE | NLM_F_EXCL;
87
88const sockaddr_nl NETLINK_ADDRESS = {AF_NETLINK, 0, 0, 0};
89
90const uint8_t AF_FAMILIES[] = {AF_INET, AF_INET6};
91
92const char* const IP_VERSIONS[] = {"-4", "-6"};
93
94const uid_t UID_ROOT = 0;
95const char* const IIF_LOOPBACK = "lo";
96const char* const IIF_NONE = NULL;
97const char* const OIF_NONE = NULL;
98const bool ACTION_ADD = true;
99const bool ACTION_DEL = false;
100const bool MODIFY_NON_UID_BASED_RULES = true;
101
102const char* const RT_TABLES_PATH = "/data/misc/net/rt_tables";
103const mode_t RT_TABLES_MODE = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;  // mode 0644, rw-r--r--
104
105const unsigned ROUTE_FLUSH_ATTEMPTS = 2;
106
107// Avoids "non-constant-expression cannot be narrowed from type 'unsigned int' to 'unsigned short'"
108// warnings when using RTA_LENGTH(x) inside static initializers (even when x is already uint16_t).
109constexpr uint16_t U16_RTA_LENGTH(uint16_t x) {
110    return RTA_LENGTH(x);
111}
112
113// These are practically const, but can't be declared so, because they are used to initialize
114// non-const pointers ("void* iov_base") in iovec arrays.
115rtattr FRATTR_PRIORITY  = { U16_RTA_LENGTH(sizeof(uint32_t)), FRA_PRIORITY };
116rtattr FRATTR_TABLE     = { U16_RTA_LENGTH(sizeof(uint32_t)), FRA_TABLE };
117rtattr FRATTR_FWMARK    = { U16_RTA_LENGTH(sizeof(uint32_t)), FRA_FWMARK };
118rtattr FRATTR_FWMASK    = { U16_RTA_LENGTH(sizeof(uint32_t)), FRA_FWMASK };
119rtattr FRATTR_UID_START = { U16_RTA_LENGTH(sizeof(uid_t)),    FRA_UID_START };
120rtattr FRATTR_UID_END   = { U16_RTA_LENGTH(sizeof(uid_t)),    FRA_UID_END };
121
122rtattr RTATTR_TABLE     = { U16_RTA_LENGTH(sizeof(uint32_t)), RTA_TABLE };
123rtattr RTATTR_OIF       = { U16_RTA_LENGTH(sizeof(uint32_t)), RTA_OIF };
124
125uint8_t PADDING_BUFFER[RTA_ALIGNTO] = {0, 0, 0, 0};
126
127// END CONSTANTS ----------------------------------------------------------------------------------
128
129// No locks needed because RouteController is accessed only from one thread (in CommandListener).
130std::map<std::string, uint32_t> interfaceToTable;
131
132uint32_t getRouteTableForInterface(const char* interface) {
133    uint32_t index = if_nametoindex(interface);
134    if (index) {
135        index += RouteController::ROUTE_TABLE_OFFSET_FROM_INDEX;
136        interfaceToTable[interface] = index;
137        return index;
138    }
139    // If the interface goes away if_nametoindex() will return 0 but we still need to know
140    // the index so we can remove the rules and routes.
141    auto iter = interfaceToTable.find(interface);
142    if (iter == interfaceToTable.end()) {
143        ALOGE("cannot find interface %s", interface);
144        return RT_TABLE_UNSPEC;
145    }
146    return iter->second;
147}
148
149void addTableName(uint32_t table, const std::string& name, std::string* contents) {
150    char tableString[UINT32_STRLEN];
151    snprintf(tableString, sizeof(tableString), "%u", table);
152    *contents += tableString;
153    *contents += " ";
154    *contents += name;
155    *contents += "\n";
156}
157
158// Doesn't return success/failure as the file is optional; it's okay if we fail to update it.
159void updateTableNamesFile() {
160    std::string contents;
161
162    addTableName(RT_TABLE_LOCAL, ROUTE_TABLE_NAME_LOCAL, &contents);
163    addTableName(RT_TABLE_MAIN,  ROUTE_TABLE_NAME_MAIN,  &contents);
164
165    addTableName(ROUTE_TABLE_LOCAL_NETWORK,  ROUTE_TABLE_NAME_LOCAL_NETWORK,  &contents);
166    addTableName(ROUTE_TABLE_LEGACY_NETWORK, ROUTE_TABLE_NAME_LEGACY_NETWORK, &contents);
167    addTableName(ROUTE_TABLE_LEGACY_SYSTEM,  ROUTE_TABLE_NAME_LEGACY_SYSTEM,  &contents);
168
169    for (const auto& entry : interfaceToTable) {
170        addTableName(entry.second, entry.first, &contents);
171    }
172
173    if (!WriteStringToFile(contents, RT_TABLES_PATH, RT_TABLES_MODE, AID_SYSTEM, AID_WIFI)) {
174        ALOGE("failed to write to %s (%s)", RT_TABLES_PATH, strerror(errno));
175        return;
176    }
177}
178
179// Sends a netlink request and expects an ack.
180// |iov| is an array of struct iovec that contains the netlink message payload.
181// The netlink header is generated by this function based on |action| and |flags|.
182// Returns -errno if there was an error or if the kernel reported an error.
183WARN_UNUSED_RESULT int sendNetlinkRequest(uint16_t action, uint16_t flags, iovec* iov, int iovlen) {
184    nlmsghdr nlmsg = {
185        .nlmsg_type = action,
186        .nlmsg_flags = flags,
187    };
188    iov[0].iov_base = &nlmsg;
189    iov[0].iov_len = sizeof(nlmsg);
190    for (int i = 0; i < iovlen; ++i) {
191        nlmsg.nlmsg_len += iov[i].iov_len;
192    }
193
194    int ret;
195    struct {
196        nlmsghdr msg;
197        nlmsgerr err;
198    } response;
199
200    int sock = socket(AF_NETLINK, SOCK_DGRAM | SOCK_CLOEXEC, NETLINK_ROUTE);
201    if (sock != -1 &&
202            connect(sock, reinterpret_cast<const sockaddr*>(&NETLINK_ADDRESS),
203                    sizeof(NETLINK_ADDRESS)) != -1 &&
204            writev(sock, iov, iovlen) != -1 &&
205            (ret = recv(sock, &response, sizeof(response), 0)) != -1) {
206        if (ret == sizeof(response)) {
207            ret = response.err.error;  // Netlink errors are negative errno.
208            if (ret) {
209                ALOGE("netlink response contains error (%s)", strerror(-ret));
210            }
211        } else {
212            ALOGE("bad netlink response message size (%d != %zu)", ret, sizeof(response));
213            ret = -EBADMSG;
214        }
215    } else {
216        ALOGE("netlink socket/connect/writev/recv failed (%s)", strerror(errno));
217        ret = -errno;
218    }
219
220    if (sock != -1) {
221        close(sock);
222    }
223
224    return ret;
225}
226
227// Returns 0 on success or negative errno on failure.
228int padInterfaceName(const char* input, char* name, size_t* length, uint16_t* padding) {
229    if (!input) {
230        *length = 0;
231        *padding = 0;
232        return 0;
233    }
234    *length = strlcpy(name, input, IFNAMSIZ) + 1;
235    if (*length > IFNAMSIZ) {
236        ALOGE("interface name too long (%zu > %u)", *length, IFNAMSIZ);
237        return -ENAMETOOLONG;
238    }
239    *padding = RTA_SPACE(*length) - RTA_LENGTH(*length);
240    return 0;
241}
242
243// Adds or removes a routing rule for IPv4 and IPv6.
244//
245// + If |priority| is RULE_PRIORITY_UNREACHABLE, the rule returns ENETUNREACH (i.e., specifies an
246//   action of FR_ACT_UNREACHABLE). Otherwise, the rule specifies an action of FR_ACT_TO_TBL.
247// + If |table| is non-zero, the rule points at the specified routing table. Otherwise, the table is
248//   unspecified. An unspecified table is only allowed when deleting a rule.
249// + If |mask| is non-zero, the rule matches the specified fwmark and mask. Otherwise, |fwmark| is
250//   ignored.
251// + If |iif| is non-NULL, the rule matches the specified incoming interface.
252// + If |oif| is non-NULL, the rule matches the specified outgoing interface.
253// + If |uidStart| and |uidEnd| are not INVALID_UID, the rule matches packets from UIDs in that
254//   range (inclusive). Otherwise, the rule matches packets from all UIDs.
255//
256// Returns 0 on success or negative errno on failure.
257WARN_UNUSED_RESULT int modifyIpRule(uint16_t action, uint32_t priority, uint32_t table,
258                                    uint32_t fwmark, uint32_t mask, const char* iif,
259                                    const char* oif, uid_t uidStart, uid_t uidEnd) {
260    // Ensure that if you set a bit in the fwmark, it's not being ignored by the mask.
261    if (fwmark & ~mask) {
262        ALOGE("mask 0x%x does not select all the bits set in fwmark 0x%x", mask, fwmark);
263        return -ERANGE;
264    }
265
266    // Interface names must include exactly one terminating NULL and be properly padded, or older
267    // kernels will refuse to delete rules.
268    char iifName[IFNAMSIZ], oifName[IFNAMSIZ];
269    size_t iifLength, oifLength;
270    uint16_t iifPadding, oifPadding;
271    if (int ret = padInterfaceName(iif, iifName, &iifLength, &iifPadding)) {
272        return ret;
273    }
274    if (int ret = padInterfaceName(oif, oifName, &oifLength, &oifPadding)) {
275        return ret;
276    }
277
278    // Either both start and end UID must be specified, or neither.
279    if ((uidStart == INVALID_UID) != (uidEnd == INVALID_UID)) {
280        ALOGE("incompatible start and end UIDs (%u vs %u)", uidStart, uidEnd);
281        return -EUSERS;
282    }
283    bool isUidRule = (uidStart != INVALID_UID);
284
285    // Assemble a rule request and put it in an array of iovec structures.
286    fib_rule_hdr rule = {
287        .action = static_cast<uint8_t>(priority != RULE_PRIORITY_UNREACHABLE ? FR_ACT_TO_TBL :
288                                                                               FR_ACT_UNREACHABLE),
289        // Note that here we're implicitly setting rule.table to 0. When we want to specify a
290        // non-zero table, we do this via the FRATTR_TABLE attribute.
291    };
292
293    // Don't ever create a rule that looks up table 0, because table 0 is the local table.
294    // It's OK to specify a table ID of 0 when deleting a rule, because that doesn't actually select
295    // table 0, it's a wildcard that matches anything.
296    if (table == RT_TABLE_UNSPEC && rule.action == FR_ACT_TO_TBL && action != RTM_DELRULE) {
297        ALOGE("RT_TABLE_UNSPEC only allowed when deleting rules");
298        return -ENOTUNIQ;
299    }
300
301    rtattr fraIifName = { U16_RTA_LENGTH(iifLength), FRA_IIFNAME };
302    rtattr fraOifName = { U16_RTA_LENGTH(oifLength), FRA_OIFNAME };
303
304    iovec iov[] = {
305        { NULL,              0 },
306        { &rule,             sizeof(rule) },
307        { &FRATTR_PRIORITY,  sizeof(FRATTR_PRIORITY) },
308        { &priority,         sizeof(priority) },
309        { &FRATTR_TABLE,     table != RT_TABLE_UNSPEC ? sizeof(FRATTR_TABLE) : 0 },
310        { &table,            table != RT_TABLE_UNSPEC ? sizeof(table) : 0 },
311        { &FRATTR_FWMARK,    mask ? sizeof(FRATTR_FWMARK) : 0 },
312        { &fwmark,           mask ? sizeof(fwmark) : 0 },
313        { &FRATTR_FWMASK,    mask ? sizeof(FRATTR_FWMASK) : 0 },
314        { &mask,             mask ? sizeof(mask) : 0 },
315        { &FRATTR_UID_START, isUidRule ? sizeof(FRATTR_UID_START) : 0 },
316        { &uidStart,         isUidRule ? sizeof(uidStart) : 0 },
317        { &FRATTR_UID_END,   isUidRule ? sizeof(FRATTR_UID_END) : 0 },
318        { &uidEnd,           isUidRule ? sizeof(uidEnd) : 0 },
319        { &fraIifName,       iif != IIF_NONE ? sizeof(fraIifName) : 0 },
320        { iifName,           iifLength },
321        { PADDING_BUFFER,    iifPadding },
322        { &fraOifName,       oif != OIF_NONE ? sizeof(fraOifName) : 0 },
323        { oifName,           oifLength },
324        { PADDING_BUFFER,    oifPadding },
325    };
326
327    uint16_t flags = (action == RTM_NEWRULE) ? NETLINK_CREATE_REQUEST_FLAGS : NETLINK_REQUEST_FLAGS;
328    for (size_t i = 0; i < ARRAY_SIZE(AF_FAMILIES); ++i) {
329        rule.family = AF_FAMILIES[i];
330        if (int ret = sendNetlinkRequest(action, flags, iov, ARRAY_SIZE(iov))) {
331            return ret;
332        }
333    }
334
335    return 0;
336}
337
338WARN_UNUSED_RESULT int modifyIpRule(uint16_t action, uint32_t priority, uint32_t table,
339                                    uint32_t fwmark, uint32_t mask) {
340    return modifyIpRule(action, priority, table, fwmark, mask, IIF_NONE, OIF_NONE, INVALID_UID,
341                        INVALID_UID);
342}
343
344// Adds or deletes an IPv4 or IPv6 route.
345// Returns 0 on success or negative errno on failure.
346WARN_UNUSED_RESULT int modifyIpRoute(uint16_t action, uint32_t table, const char* interface,
347                                     const char* destination, const char* nexthop) {
348    // At least the destination must be non-null.
349    if (!destination) {
350        ALOGE("null destination");
351        return -EFAULT;
352    }
353
354    // Parse the prefix.
355    uint8_t rawAddress[sizeof(in6_addr)];
356    uint8_t family;
357    uint8_t prefixLength;
358    int rawLength = parsePrefix(destination, &family, rawAddress, sizeof(rawAddress),
359                                &prefixLength);
360    if (rawLength < 0) {
361        ALOGE("parsePrefix failed for destination %s (%s)", destination, strerror(-rawLength));
362        return rawLength;
363    }
364
365    if (static_cast<size_t>(rawLength) > sizeof(rawAddress)) {
366        ALOGE("impossible! address too long (%d vs %zu)", rawLength, sizeof(rawAddress));
367        return -ENOBUFS;  // Cannot happen; parsePrefix only supports IPv4 and IPv6.
368    }
369
370    uint8_t type = RTN_UNICAST;
371    uint32_t ifindex;
372    uint8_t rawNexthop[sizeof(in6_addr)];
373
374    if (nexthop && !strcmp(nexthop, "unreachable")) {
375        type = RTN_UNREACHABLE;
376        // 'interface' is likely non-NULL, as the caller (modifyRoute()) likely used it to lookup
377        // the table number. But it's an error to specify an interface ("dev ...") or a nexthop for
378        // unreachable routes, so nuke them. (IPv6 allows them to be specified; IPv4 doesn't.)
379        interface = OIF_NONE;
380        nexthop = NULL;
381    } else if (nexthop && !strcmp(nexthop, "throw")) {
382        type = RTN_THROW;
383        interface = OIF_NONE;
384        nexthop = NULL;
385    } else {
386        // If an interface was specified, find the ifindex.
387        if (interface != OIF_NONE) {
388            ifindex = if_nametoindex(interface);
389            if (!ifindex) {
390                ALOGE("cannot find interface %s", interface);
391                return -ENODEV;
392            }
393        }
394
395        // If a nexthop was specified, parse it as the same family as the prefix.
396        if (nexthop && inet_pton(family, nexthop, rawNexthop) <= 0) {
397            ALOGE("inet_pton failed for nexthop %s", nexthop);
398            return -EINVAL;
399        }
400    }
401
402    // Assemble a rtmsg and put it in an array of iovec structures.
403    rtmsg route = {
404        .rtm_protocol = RTPROT_STATIC,
405        .rtm_type = type,
406        .rtm_family = family,
407        .rtm_dst_len = prefixLength,
408        .rtm_scope = static_cast<uint8_t>(nexthop ? RT_SCOPE_UNIVERSE : RT_SCOPE_LINK),
409    };
410
411    rtattr rtaDst     = { U16_RTA_LENGTH(rawLength), RTA_DST };
412    rtattr rtaGateway = { U16_RTA_LENGTH(rawLength), RTA_GATEWAY };
413
414    iovec iov[] = {
415        { NULL,          0 },
416        { &route,        sizeof(route) },
417        { &RTATTR_TABLE, sizeof(RTATTR_TABLE) },
418        { &table,        sizeof(table) },
419        { &rtaDst,       sizeof(rtaDst) },
420        { rawAddress,    static_cast<size_t>(rawLength) },
421        { &RTATTR_OIF,   interface != OIF_NONE ? sizeof(RTATTR_OIF) : 0 },
422        { &ifindex,      interface != OIF_NONE ? sizeof(ifindex) : 0 },
423        { &rtaGateway,   nexthop ? sizeof(rtaGateway) : 0 },
424        { rawNexthop,    nexthop ? static_cast<size_t>(rawLength) : 0 },
425    };
426
427    uint16_t flags = (action == RTM_NEWROUTE) ? NETLINK_CREATE_REQUEST_FLAGS :
428                                                NETLINK_REQUEST_FLAGS;
429    return sendNetlinkRequest(action, flags, iov, ARRAY_SIZE(iov));
430}
431
432// An iptables rule to mark incoming packets on a network with the netId of the network.
433//
434// This is so that the kernel can:
435// + Use the right fwmark for (and thus correctly route) replies (e.g.: TCP RST, ICMP errors, ping
436//   replies, SYN-ACKs, etc).
437// + Mark sockets that accept connections from this interface so that the connection stays on the
438//   same interface.
439WARN_UNUSED_RESULT int modifyIncomingPacketMark(unsigned netId, const char* interface,
440                                                Permission permission, bool add) {
441    Fwmark fwmark;
442
443    fwmark.netId = netId;
444    fwmark.explicitlySelected = true;
445    fwmark.protectedFromVpn = true;
446    fwmark.permission = permission;
447
448    char markString[UINT32_HEX_STRLEN];
449    snprintf(markString, sizeof(markString), "0x%x", fwmark.intValue);
450
451    if (execIptables(V4V6, "-t", "mangle", add ? "-A" : "-D", "INPUT", "-i", interface, "-j",
452                     "MARK", "--set-mark", markString, NULL)) {
453        ALOGE("failed to change iptables rule that sets incoming packet mark");
454        return -EREMOTEIO;
455    }
456
457    return 0;
458}
459
460// A rule to route responses to the local network forwarded via the VPN.
461//
462// When a VPN is in effect, packets from the local network to upstream networks are forwarded into
463// the VPN's tunnel interface. When the VPN forwards the responses, they emerge out of the tunnel.
464WARN_UNUSED_RESULT int modifyVpnOutputToLocalRule(const char* vpnInterface, bool add) {
465    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_VPN_OUTPUT_TO_LOCAL,
466                        ROUTE_TABLE_LOCAL_NETWORK, MARK_UNSET, MARK_UNSET, vpnInterface, OIF_NONE,
467                        INVALID_UID, INVALID_UID);
468}
469
470// A rule to route all traffic from a given set of UIDs to go over the VPN.
471//
472// Notice that this rule doesn't use the netId. I.e., no matter what netId the user's socket may
473// have, if they are subject to this VPN, their traffic has to go through it. Allows the traffic to
474// bypass the VPN if the protectedFromVpn bit is set.
475WARN_UNUSED_RESULT int modifyVpnUidRangeRule(uint32_t table, uid_t uidStart, uid_t uidEnd,
476                                             bool secure, bool add) {
477    Fwmark fwmark;
478    Fwmark mask;
479
480    fwmark.protectedFromVpn = false;
481    mask.protectedFromVpn = true;
482
483    uint32_t priority;
484
485    if (secure) {
486        priority = RULE_PRIORITY_SECURE_VPN;
487    } else {
488        priority = RULE_PRIORITY_BYPASSABLE_VPN;
489
490        fwmark.explicitlySelected = false;
491        mask.explicitlySelected = true;
492    }
493
494    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, priority, table, fwmark.intValue,
495                        mask.intValue, IIF_LOOPBACK, OIF_NONE, uidStart, uidEnd);
496}
497
498// A rule to allow system apps to send traffic over this VPN even if they are not part of the target
499// set of UIDs.
500//
501// This is needed for DnsProxyListener to correctly resolve a request for a user who is in the
502// target set, but where the DnsProxyListener itself is not.
503WARN_UNUSED_RESULT int modifyVpnSystemPermissionRule(unsigned netId, uint32_t table, bool secure,
504                                                     bool add) {
505    Fwmark fwmark;
506    Fwmark mask;
507
508    fwmark.netId = netId;
509    mask.netId = FWMARK_NET_ID_MASK;
510
511    fwmark.permission = PERMISSION_SYSTEM;
512    mask.permission = PERMISSION_SYSTEM;
513
514    uint32_t priority = secure ? RULE_PRIORITY_SECURE_VPN : RULE_PRIORITY_BYPASSABLE_VPN;
515
516    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, priority, table, fwmark.intValue,
517                        mask.intValue);
518}
519
520// A rule to route traffic based on an explicitly chosen network.
521//
522// Supports apps that use the multinetwork APIs to restrict their traffic to a network.
523//
524// Even though we check permissions at the time we set a netId into the fwmark of a socket, we need
525// to check it again in the rules here, because a network's permissions may have been updated via
526// modifyNetworkPermission().
527WARN_UNUSED_RESULT int modifyExplicitNetworkRule(unsigned netId, uint32_t table,
528                                                 Permission permission, uid_t uidStart,
529                                                 uid_t uidEnd, bool add) {
530    Fwmark fwmark;
531    Fwmark mask;
532
533    fwmark.netId = netId;
534    mask.netId = FWMARK_NET_ID_MASK;
535
536    fwmark.explicitlySelected = true;
537    mask.explicitlySelected = true;
538
539    fwmark.permission = permission;
540    mask.permission = permission;
541
542    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_EXPLICIT_NETWORK, table,
543                        fwmark.intValue, mask.intValue, IIF_NONE, OIF_NONE, uidStart, uidEnd);
544}
545
546// A rule to route traffic based on a chosen outgoing interface.
547//
548// Supports apps that use SO_BINDTODEVICE or IP_PKTINFO options and the kernel that already knows
549// the outgoing interface (typically for link-local communications).
550WARN_UNUSED_RESULT int modifyOutputInterfaceRules(const char* interface, uint32_t table,
551                                                  Permission permission, uid_t uidStart,
552                                                  uid_t uidEnd, bool add) {
553    Fwmark fwmark;
554    Fwmark mask;
555
556    fwmark.permission = permission;
557    mask.permission = permission;
558
559    // If this rule does not specify a UID range, then also add a corresponding high-priority rule
560    // for UID. This covers forwarded packets and system daemons such as the tethering DHCP server.
561    if (uidStart == INVALID_UID && uidEnd == INVALID_UID) {
562        if (int ret = modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_VPN_OVERRIDE_OIF,
563                                   table, fwmark.intValue, mask.intValue, IIF_NONE, interface,
564                                   UID_ROOT, UID_ROOT)) {
565            return ret;
566        }
567    }
568
569    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_OUTPUT_INTERFACE, table,
570                        fwmark.intValue, mask.intValue, IIF_NONE, interface, uidStart, uidEnd);
571}
572
573// A rule to route traffic based on the chosen network.
574//
575// This is for sockets that have not explicitly requested a particular network, but have been
576// bound to one when they called connect(). This ensures that sockets connected on a particular
577// network stay on that network even if the default network changes.
578WARN_UNUSED_RESULT int modifyImplicitNetworkRule(unsigned netId, uint32_t table,
579                                                 Permission permission, bool add) {
580    Fwmark fwmark;
581    Fwmark mask;
582
583    fwmark.netId = netId;
584    mask.netId = FWMARK_NET_ID_MASK;
585
586    fwmark.explicitlySelected = false;
587    mask.explicitlySelected = true;
588
589    fwmark.permission = permission;
590    mask.permission = permission;
591
592    return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_IMPLICIT_NETWORK, table,
593                        fwmark.intValue, mask.intValue);
594}
595
596// A rule to enable split tunnel VPNs.
597//
598// If a packet with a VPN's netId doesn't find a route in the VPN's routing table, it's allowed to
599// go over the default network, provided it wasn't explicitly restricted to the VPN and has the
600// permissions required by the default network.
601WARN_UNUSED_RESULT int modifyVpnFallthroughRule(uint16_t action, unsigned vpnNetId,
602                                                const char* physicalInterface,
603                                                Permission permission) {
604    uint32_t table = getRouteTableForInterface(physicalInterface);
605    if (table == RT_TABLE_UNSPEC) {
606        return -ESRCH;
607    }
608
609    Fwmark fwmark;
610    Fwmark mask;
611
612    fwmark.netId = vpnNetId;
613    mask.netId = FWMARK_NET_ID_MASK;
614
615    fwmark.explicitlySelected = false;
616    mask.explicitlySelected = true;
617
618    fwmark.permission = permission;
619    mask.permission = permission;
620
621    return modifyIpRule(action, RULE_PRIORITY_VPN_FALLTHROUGH, table, fwmark.intValue,
622                        mask.intValue);
623}
624
625// Add rules to allow legacy routes added through the requestRouteToHost() API.
626WARN_UNUSED_RESULT int addLegacyRouteRules() {
627    Fwmark fwmark;
628    Fwmark mask;
629
630    fwmark.explicitlySelected = false;
631    mask.explicitlySelected = true;
632
633    // Rules to allow legacy routes to override the default network.
634    if (int ret = modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_LEGACY_SYSTEM, ROUTE_TABLE_LEGACY_SYSTEM,
635                               fwmark.intValue, mask.intValue)) {
636        return ret;
637    }
638    if (int ret = modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_LEGACY_NETWORK,
639                               ROUTE_TABLE_LEGACY_NETWORK, fwmark.intValue, mask.intValue)) {
640        return ret;
641    }
642
643    fwmark.permission = PERMISSION_SYSTEM;
644    mask.permission = PERMISSION_SYSTEM;
645
646    // A rule to allow legacy routes from system apps to override VPNs.
647    return modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_VPN_OVERRIDE_SYSTEM, ROUTE_TABLE_LEGACY_SYSTEM,
648                        fwmark.intValue, mask.intValue);
649}
650
651// Add rules to lookup the local network when specified explicitly or otherwise.
652WARN_UNUSED_RESULT int addLocalNetworkRules(unsigned localNetId) {
653    if (int ret = modifyExplicitNetworkRule(localNetId, ROUTE_TABLE_LOCAL_NETWORK, PERMISSION_NONE,
654                                            INVALID_UID, INVALID_UID, ACTION_ADD)) {
655        return ret;
656    }
657
658    Fwmark fwmark;
659    Fwmark mask;
660
661    fwmark.explicitlySelected = false;
662    mask.explicitlySelected = true;
663
664    return modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_LOCAL_NETWORK, ROUTE_TABLE_LOCAL_NETWORK,
665                        fwmark.intValue, mask.intValue);
666}
667
668int configureDummyNetwork() {
669    const char *interface = DummyNetwork::INTERFACE_NAME;
670    uint32_t table = getRouteTableForInterface(interface);
671    if (table == RT_TABLE_UNSPEC) {
672        // getRouteTableForInterface has already looged an error.
673        return -ESRCH;
674    }
675
676    ifc_init();
677    int ret = ifc_up(interface);
678    ifc_close();
679    if (ret) {
680        ALOGE("Can't bring up %s: %s", interface, strerror(errno));
681        return -errno;
682    }
683
684    if ((ret = modifyOutputInterfaceRules(interface, table, PERMISSION_NONE,
685                                          INVALID_UID, INVALID_UID, ACTION_ADD))) {
686        ALOGE("Can't create oif rules for %s: %s", interface, strerror(-ret));
687        return ret;
688    }
689
690    if ((ret = modifyIpRoute(RTM_NEWROUTE, table, interface, "0.0.0.0/0", NULL))) {
691        ALOGE("Can't add IPv4 default route to %s: %s", interface, strerror(-ret));
692        return ret;
693    }
694
695    if ((ret = modifyIpRoute(RTM_NEWROUTE, table, interface, "::/0", NULL))) {
696        ALOGE("Can't add IPv6 default route to %s: %s", interface, strerror(-ret));
697        return ret;
698    }
699
700    return 0;
701}
702
703// Add a new rule to look up the 'main' table, with the same selectors as the "default network"
704// rule, but with a lower priority. We will never create routes in the main table; it should only be
705// used for directly-connected routes implicitly created by the kernel when adding IP addresses.
706// This is necessary, for example, when adding a route through a directly-connected gateway: in
707// order to add the route, there must already be a directly-connected route that covers the gateway.
708WARN_UNUSED_RESULT int addDirectlyConnectedRule() {
709    Fwmark fwmark;
710    Fwmark mask;
711
712    fwmark.netId = NETID_UNSET;
713    mask.netId = FWMARK_NET_ID_MASK;
714
715    return modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_DIRECTLY_CONNECTED, RT_TABLE_MAIN,
716                        fwmark.intValue, mask.intValue, IIF_NONE, OIF_NONE, UID_ROOT, UID_ROOT);
717}
718
719// Add an explicit unreachable rule close to the end of the prioriy list to make it clear that
720// relying on the kernel-default "from all lookup main" rule at priority 32766 is not intended
721// behaviour. We do flush the kernel-default rules at startup, but having an explicit unreachable
722// rule will hopefully make things even clearer.
723WARN_UNUSED_RESULT int addUnreachableRule() {
724    return modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_UNREACHABLE, RT_TABLE_UNSPEC, MARK_UNSET,
725                        MARK_UNSET);
726}
727
728WARN_UNUSED_RESULT int modifyLocalNetwork(unsigned netId, const char* interface, bool add) {
729    if (int ret = modifyIncomingPacketMark(netId, interface, PERMISSION_NONE, add)) {
730        return ret;
731    }
732    return modifyOutputInterfaceRules(interface, ROUTE_TABLE_LOCAL_NETWORK, PERMISSION_NONE,
733                                      INVALID_UID, INVALID_UID, add);
734}
735
736WARN_UNUSED_RESULT int modifyPhysicalNetwork(unsigned netId, const char* interface,
737                                             Permission permission, bool add) {
738    uint32_t table = getRouteTableForInterface(interface);
739    if (table == RT_TABLE_UNSPEC) {
740        return -ESRCH;
741    }
742
743    if (int ret = modifyIncomingPacketMark(netId, interface, permission, add)) {
744        return ret;
745    }
746    if (int ret = modifyExplicitNetworkRule(netId, table, permission, INVALID_UID, INVALID_UID,
747                                            add)) {
748        return ret;
749    }
750    if (int ret = modifyOutputInterfaceRules(interface, table, permission, INVALID_UID, INVALID_UID,
751                                            add)) {
752        return ret;
753    }
754    return modifyImplicitNetworkRule(netId, table, permission, add);
755}
756
757WARN_UNUSED_RESULT int modifyVirtualNetwork(unsigned netId, const char* interface,
758                                            const UidRanges& uidRanges, bool secure, bool add,
759                                            bool modifyNonUidBasedRules) {
760    uint32_t table = getRouteTableForInterface(interface);
761    if (table == RT_TABLE_UNSPEC) {
762        return -ESRCH;
763    }
764
765    for (const UidRanges::Range& range : uidRanges.getRanges()) {
766        if (int ret = modifyVpnUidRangeRule(table, range.first, range.second, secure, add)) {
767            return ret;
768        }
769        if (int ret = modifyExplicitNetworkRule(netId, table, PERMISSION_NONE, range.first,
770                                                range.second, add)) {
771            return ret;
772        }
773        if (int ret = modifyOutputInterfaceRules(interface, table, PERMISSION_NONE, range.first,
774                                                 range.second, add)) {
775            return ret;
776        }
777    }
778
779    if (modifyNonUidBasedRules) {
780        if (int ret = modifyIncomingPacketMark(netId, interface, PERMISSION_NONE, add)) {
781            return ret;
782        }
783        if (int ret = modifyVpnOutputToLocalRule(interface, add)) {
784            return ret;
785        }
786        if (int ret = modifyVpnSystemPermissionRule(netId, table, secure, add)) {
787            return ret;
788        }
789        return modifyExplicitNetworkRule(netId, table, PERMISSION_NONE, UID_ROOT, UID_ROOT, add);
790    }
791
792    return 0;
793}
794
795WARN_UNUSED_RESULT int modifyDefaultNetwork(uint16_t action, const char* interface,
796                                            Permission permission) {
797    uint32_t table = getRouteTableForInterface(interface);
798    if (table == RT_TABLE_UNSPEC) {
799        return -ESRCH;
800    }
801
802    Fwmark fwmark;
803    Fwmark mask;
804
805    fwmark.netId = NETID_UNSET;
806    mask.netId = FWMARK_NET_ID_MASK;
807
808    fwmark.permission = permission;
809    mask.permission = permission;
810
811    return modifyIpRule(action, RULE_PRIORITY_DEFAULT_NETWORK, table, fwmark.intValue,
812                        mask.intValue);
813}
814
815WARN_UNUSED_RESULT int modifyTetheredNetwork(uint16_t action, const char* inputInterface,
816                                             const char* outputInterface) {
817    uint32_t table = getRouteTableForInterface(outputInterface);
818    if (table == RT_TABLE_UNSPEC) {
819        return -ESRCH;
820    }
821
822    return modifyIpRule(action, RULE_PRIORITY_TETHERING, table, MARK_UNSET, MARK_UNSET,
823                        inputInterface, OIF_NONE, INVALID_UID, INVALID_UID);
824}
825
826// Returns 0 on success or negative errno on failure.
827WARN_UNUSED_RESULT int flushRules() {
828    for (size_t i = 0; i < ARRAY_SIZE(IP_VERSIONS); ++i) {
829        const char* argv[] = {
830            IP_PATH,
831            IP_VERSIONS[i],
832            "rule",
833            "flush",
834        };
835        if (android_fork_execvp(ARRAY_SIZE(argv), const_cast<char**>(argv), NULL, false, false)) {
836            ALOGE("failed to flush rules");
837            return -EREMOTEIO;
838        }
839    }
840    return 0;
841}
842
843// Adds or removes an IPv4 or IPv6 route to the specified table and, if it's a directly-connected
844// route, to the main table as well.
845// Returns 0 on success or negative errno on failure.
846WARN_UNUSED_RESULT int modifyRoute(uint16_t action, const char* interface, const char* destination,
847                                   const char* nexthop, RouteController::TableType tableType) {
848    uint32_t table;
849    switch (tableType) {
850        case RouteController::INTERFACE: {
851            table = getRouteTableForInterface(interface);
852            if (table == RT_TABLE_UNSPEC) {
853                return -ESRCH;
854            }
855            break;
856        }
857        case RouteController::LOCAL_NETWORK: {
858            table = ROUTE_TABLE_LOCAL_NETWORK;
859            break;
860        }
861        case RouteController::LEGACY_NETWORK: {
862            table = ROUTE_TABLE_LEGACY_NETWORK;
863            break;
864        }
865        case RouteController::LEGACY_SYSTEM: {
866            table = ROUTE_TABLE_LEGACY_SYSTEM;
867            break;
868        }
869    }
870
871    int ret = modifyIpRoute(action, table, interface, destination, nexthop);
872    // Trying to add a route that already exists shouldn't cause an error.
873    if (ret && !(action == RTM_NEWROUTE && ret == -EEXIST)) {
874        return ret;
875    }
876
877    return 0;
878}
879
880// Returns 0 on success or negative errno on failure.
881WARN_UNUSED_RESULT int flushRoutes(const char* interface) {
882    uint32_t table = getRouteTableForInterface(interface);
883    if (table == RT_TABLE_UNSPEC) {
884        return -ESRCH;
885    }
886
887    char tableString[UINT32_STRLEN];
888    snprintf(tableString, sizeof(tableString), "%u", table);
889
890    int ret = 0;
891    for (size_t i = 0; i < ARRAY_SIZE(IP_VERSIONS); ++i) {
892        const char* argv[] = {
893            IP_PATH,
894            IP_VERSIONS[i],
895            "route",
896            "flush",
897            "table",
898            tableString,
899        };
900
901        // A flush works by dumping routes and deleting each route as it's returned, and it can
902        // fail if something else deletes the route between the dump and the delete. This can
903        // happen, for example, if an interface goes down while we're trying to flush its routes.
904        // So try multiple times and only return an error if the last attempt fails.
905        //
906        // TODO: replace this with our own netlink code.
907        unsigned attempts = 0;
908        int err;
909        do {
910            err = android_fork_execvp(ARRAY_SIZE(argv), const_cast<char**>(argv),
911                                      NULL, false, false);
912            ++attempts;
913        } while (err != 0 && attempts < ROUTE_FLUSH_ATTEMPTS);
914        if (err) {
915            ALOGE("failed to flush %s routes in table %s after %d attempts",
916                  IP_VERSIONS[i], tableString, attempts);
917            ret = -EREMOTEIO;
918        }
919    }
920
921    // If we failed to flush routes, the caller may elect to keep this interface around, so keep
922    // track of its name.
923    if (!ret) {
924        interfaceToTable.erase(interface);
925    }
926
927    return ret;
928}
929
930WARN_UNUSED_RESULT int clearTetheringRules(const char* inputInterface) {
931    int ret = 0;
932    while (ret == 0) {
933        ret = modifyIpRule(RTM_DELRULE, RULE_PRIORITY_TETHERING, 0, MARK_UNSET, MARK_UNSET,
934                           inputInterface, OIF_NONE, INVALID_UID, INVALID_UID);
935    }
936
937    if (ret == -ENOENT) {
938        return 0;
939    } else {
940        return ret;
941    }
942}
943
944}  // namespace
945
946int RouteController::Init(unsigned localNetId) {
947    if (int ret = flushRules()) {
948        return ret;
949    }
950    if (int ret = addLegacyRouteRules()) {
951        return ret;
952    }
953    if (int ret = addLocalNetworkRules(localNetId)) {
954        return ret;
955    }
956    if (int ret = addDirectlyConnectedRule()) {
957        return ret;
958    }
959    if (int ret = addUnreachableRule()) {
960        return ret;
961    }
962    // Don't complain if we can't add the dummy network, since not all devices support it.
963    configureDummyNetwork();
964
965    updateTableNamesFile();
966    return 0;
967}
968
969int RouteController::addInterfaceToLocalNetwork(unsigned netId, const char* interface) {
970    return modifyLocalNetwork(netId, interface, ACTION_ADD);
971}
972
973int RouteController::removeInterfaceFromLocalNetwork(unsigned netId, const char* interface) {
974    return modifyLocalNetwork(netId, interface, ACTION_DEL);
975}
976
977int RouteController::addInterfaceToPhysicalNetwork(unsigned netId, const char* interface,
978                                                   Permission permission) {
979    if (int ret = modifyPhysicalNetwork(netId, interface, permission, ACTION_ADD)) {
980        return ret;
981    }
982    updateTableNamesFile();
983    return 0;
984}
985
986int RouteController::removeInterfaceFromPhysicalNetwork(unsigned netId, const char* interface,
987                                                        Permission permission) {
988    if (int ret = modifyPhysicalNetwork(netId, interface, permission, ACTION_DEL)) {
989        return ret;
990    }
991    if (int ret = flushRoutes(interface)) {
992        return ret;
993    }
994    if (int ret = clearTetheringRules(interface)) {
995        return ret;
996    }
997    updateTableNamesFile();
998    return 0;
999}
1000
1001int RouteController::addInterfaceToVirtualNetwork(unsigned netId, const char* interface,
1002                                                  bool secure, const UidRanges& uidRanges) {
1003    if (int ret = modifyVirtualNetwork(netId, interface, uidRanges, secure, ACTION_ADD,
1004                                       MODIFY_NON_UID_BASED_RULES)) {
1005        return ret;
1006    }
1007    updateTableNamesFile();
1008    return 0;
1009}
1010
1011int RouteController::removeInterfaceFromVirtualNetwork(unsigned netId, const char* interface,
1012                                                       bool secure, const UidRanges& uidRanges) {
1013    if (int ret = modifyVirtualNetwork(netId, interface, uidRanges, secure, ACTION_DEL,
1014                                       MODIFY_NON_UID_BASED_RULES)) {
1015        return ret;
1016    }
1017    if (int ret = flushRoutes(interface)) {
1018        return ret;
1019    }
1020    updateTableNamesFile();
1021    return 0;
1022}
1023
1024int RouteController::modifyPhysicalNetworkPermission(unsigned netId, const char* interface,
1025                                                     Permission oldPermission,
1026                                                     Permission newPermission) {
1027    // Add the new rules before deleting the old ones, to avoid race conditions.
1028    if (int ret = modifyPhysicalNetwork(netId, interface, newPermission, ACTION_ADD)) {
1029        return ret;
1030    }
1031    return modifyPhysicalNetwork(netId, interface, oldPermission, ACTION_DEL);
1032}
1033
1034int RouteController::addUsersToVirtualNetwork(unsigned netId, const char* interface, bool secure,
1035                                              const UidRanges& uidRanges) {
1036    return modifyVirtualNetwork(netId, interface, uidRanges, secure, ACTION_ADD,
1037                                !MODIFY_NON_UID_BASED_RULES);
1038}
1039
1040int RouteController::removeUsersFromVirtualNetwork(unsigned netId, const char* interface,
1041                                                   bool secure, const UidRanges& uidRanges) {
1042    return modifyVirtualNetwork(netId, interface, uidRanges, secure, ACTION_DEL,
1043                                !MODIFY_NON_UID_BASED_RULES);
1044}
1045
1046int RouteController::addInterfaceToDefaultNetwork(const char* interface, Permission permission) {
1047    return modifyDefaultNetwork(RTM_NEWRULE, interface, permission);
1048}
1049
1050int RouteController::removeInterfaceFromDefaultNetwork(const char* interface,
1051                                                       Permission permission) {
1052    return modifyDefaultNetwork(RTM_DELRULE, interface, permission);
1053}
1054
1055int RouteController::addRoute(const char* interface, const char* destination, const char* nexthop,
1056                              TableType tableType) {
1057    return modifyRoute(RTM_NEWROUTE, interface, destination, nexthop, tableType);
1058}
1059
1060int RouteController::removeRoute(const char* interface, const char* destination,
1061                                 const char* nexthop, TableType tableType) {
1062    return modifyRoute(RTM_DELROUTE, interface, destination, nexthop, tableType);
1063}
1064
1065int RouteController::enableTethering(const char* inputInterface, const char* outputInterface) {
1066    return modifyTetheredNetwork(RTM_NEWRULE, inputInterface, outputInterface);
1067}
1068
1069int RouteController::disableTethering(const char* inputInterface, const char* outputInterface) {
1070    return modifyTetheredNetwork(RTM_DELRULE, inputInterface, outputInterface);
1071}
1072
1073int RouteController::addVirtualNetworkFallthrough(unsigned vpnNetId, const char* physicalInterface,
1074                                                  Permission permission) {
1075    return modifyVpnFallthroughRule(RTM_NEWRULE, vpnNetId, physicalInterface, permission);
1076}
1077
1078int RouteController::removeVirtualNetworkFallthrough(unsigned vpnNetId,
1079                                                     const char* physicalInterface,
1080                                                     Permission permission) {
1081    return modifyVpnFallthroughRule(RTM_DELRULE, vpnNetId, physicalInterface, permission);
1082}
1083