RouteController.cpp revision 7f972fb1cd3c26af76779a7a3220b9cf5fb63a0a
1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "RouteController.h"
18
19#include "Fwmark.h"
20#include "NetdConstants.h"
21
22#include <arpa/inet.h>
23#include <errno.h>
24#include <linux/netlink.h>
25#include <linux/rtnetlink.h>
26#include <logwrap/logwrap.h>
27#include <map>
28#include <netinet/in.h>
29#include <net/if.h>
30#include <sys/socket.h>
31#include <sys/uio.h>
32#include <unistd.h>
33
34// Avoids "non-constant-expression cannot be narrowed from type 'unsigned int' to 'unsigned short'"
35// warnings when using RTA_LENGTH(x) inside static initializers (even when x is already uint16_t).
36#define U16_RTA_LENGTH(x) static_cast<uint16_t>(RTA_LENGTH((x)))
37
38namespace {
39
40const uint32_t RULE_PRIORITY_PRIVILEGED_LEGACY     = 11000;
41const uint32_t RULE_PRIORITY_PER_NETWORK_EXPLICIT  = 13000;
42const uint32_t RULE_PRIORITY_PER_NETWORK_INTERFACE = 14000;
43const uint32_t RULE_PRIORITY_LEGACY                = 16000;
44const uint32_t RULE_PRIORITY_PER_NETWORK_NORMAL    = 17000;
45const uint32_t RULE_PRIORITY_DEFAULT_NETWORK       = 19000;
46const uint32_t RULE_PRIORITY_MAIN                  = 20000;
47// TODO: Uncomment once we are sure everything works.
48#if 0
49const uint32_t RULE_PRIORITY_UNREACHABLE           = 21000;
50#endif
51
52// TODO: These should be turned into per-UID tables once the kernel supports UID-based routing.
53const int ROUTE_TABLE_PRIVILEGED_LEGACY = RouteController::ROUTE_TABLE_OFFSET_FROM_INDEX - 901;
54const int ROUTE_TABLE_LEGACY            = RouteController::ROUTE_TABLE_OFFSET_FROM_INDEX - 902;
55
56std::map<std::string, uint32_t> interfaceToIndex;
57
58uint32_t getRouteTableForInterface(const char* interface) {
59    uint32_t index = if_nametoindex(interface);
60    if (index) {
61        interfaceToIndex[interface] = index;
62    } else {
63        // If the interface goes away if_nametoindex() will return 0 but we still need to know
64        // the index so we can remove the rules and routes.
65        std::map<std::string, uint32_t>::iterator it = interfaceToIndex.find(interface);
66        if (it != interfaceToIndex.end())
67            index = it->second;
68    }
69    return index ? index + RouteController::ROUTE_TABLE_OFFSET_FROM_INDEX : 0;
70}
71
72// Adds or removes a routing rule for IPv4 and IPv6.
73//
74// + If |table| is non-zero, the rule points at the specified routing table. Otherwise, the rule
75//   returns ENETUNREACH.
76// + If |mask| is non-zero, the rule matches the specified fwmark and mask. Otherwise, |fwmark| is
77//   ignored.
78// + If |interface| is non-NULL, the rule matches the specified outgoing interface.
79bool runIpRuleCommand(const char* action, uint32_t priority, uint32_t table, uint32_t fwmark,
80                      uint32_t mask, const char* interface) {
81    char priorityString[UINT32_STRLEN];
82    snprintf(priorityString, sizeof(priorityString), "%u", priority);
83
84    char tableString[UINT32_STRLEN];
85    snprintf(tableString, sizeof(tableString), "%u", table);
86
87    char fwmarkString[sizeof("0x12345678/0x12345678")];
88    snprintf(fwmarkString, sizeof(fwmarkString), "0x%x/0x%x", fwmark, mask);
89
90    const char* version[] = {"-4", "-6"};
91    for (size_t i = 0; i < ARRAY_SIZE(version); ++i) {
92        int argc = 0;
93        const char* argv[16];
94
95        argv[argc++] = IP_PATH;
96        argv[argc++] = version[i];
97        argv[argc++] = "rule";
98        argv[argc++] = action;
99        argv[argc++] = "priority";
100        argv[argc++] = priorityString;
101        if (table) {
102            argv[argc++] = "table";
103            argv[argc++] = tableString;
104        } else {
105            argv[argc++] = "unreachable";
106        }
107        if (mask) {
108            argv[argc++] = "fwmark";
109            argv[argc++] = fwmarkString;
110        }
111        if (interface) {
112            argv[argc++] = "oif";
113            argv[argc++] = interface;
114        }
115        if (android_fork_execvp(argc, const_cast<char**>(argv), NULL, false, false)) {
116            return false;
117        }
118    }
119
120    return true;
121}
122
123// Adds or deletes an IPv4 or IPv6 route.
124// Returns 0 on success or negative errno on failure.
125int modifyIpRoute(uint16_t action, uint32_t table, const char* interface, const char* destination,
126                  const char* nexthop) {
127    // At least the destination must be non-null.
128    if (!destination) {
129        return -EFAULT;
130    }
131
132    // Parse the prefix.
133    uint8_t rawAddress[sizeof(in6_addr)];
134    uint8_t family, prefixLength;
135    int rawLength = parsePrefix(destination, &family, rawAddress, sizeof(rawAddress),
136                                &prefixLength);
137    if (rawLength < 0) {
138        return rawLength;
139    }
140
141    if (static_cast<size_t>(rawLength) > sizeof(rawAddress)) {
142        return -ENOBUFS;  // Cannot happen; parsePrefix only supports IPv4 and IPv6.
143    }
144
145    // If an interface was specified, find the ifindex.
146    uint32_t ifindex;
147    if (interface) {
148        ifindex = if_nametoindex(interface);
149        if (!ifindex) {
150            return -ENODEV;
151        }
152    }
153
154    // If a nexthop was specified, parse it as the same family as the prefix.
155    uint8_t rawNexthop[sizeof(in6_addr)];
156    if (nexthop && !inet_pton(family, nexthop, rawNexthop)) {
157        return -EINVAL;
158    }
159
160    // Assemble a netlink request and put it in an array of iovec structures.
161    nlmsghdr nlmsg = {
162        .nlmsg_type = action,
163        .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
164    };
165    rtmsg rtmsg = {
166        .rtm_protocol = RTPROT_STATIC,
167        .rtm_type = RTN_UNICAST,
168        .rtm_family = family,
169        .rtm_dst_len = prefixLength,
170    };
171    rtattr rta_table = { U16_RTA_LENGTH(sizeof(table)), RTA_TABLE };
172    rtattr rta_oif = { U16_RTA_LENGTH(sizeof(ifindex)), RTA_OIF };
173    rtattr rta_dst = { U16_RTA_LENGTH(rawLength), RTA_DST };
174    rtattr rta_gateway = { U16_RTA_LENGTH(rawLength), RTA_GATEWAY };
175    if (action == RTM_NEWROUTE) {
176        nlmsg.nlmsg_flags |= (NLM_F_CREATE | NLM_F_EXCL);
177    }
178
179    iovec iov[] = {
180        { &nlmsg,        sizeof(nlmsg) },
181        { &rtmsg,        sizeof(rtmsg) },
182        { &rta_table,    sizeof(rta_table) },
183        { &table,        sizeof(table) },
184        { &rta_dst,      sizeof(rta_dst) },
185        { rawAddress,    static_cast<size_t>(rawLength) },
186        { &rta_oif,      interface ? sizeof(rta_oif) : 0 },
187        { &ifindex,      interface ? sizeof(ifindex) : 0 },
188        { &rta_gateway,  nexthop ? sizeof(rta_gateway) : 0 },
189        { rawNexthop,    nexthop ? static_cast<size_t>(rawLength) : 0 },
190    };
191    int iovlen = ARRAY_SIZE(iov);
192
193    for (int i = 0; i < iovlen; ++i) {
194        nlmsg.nlmsg_len += iov[i].iov_len;
195    }
196
197    int ret;
198    struct {
199        nlmsghdr msg;
200        nlmsgerr err;
201    } response;
202
203    sockaddr_nl kernel = {AF_NETLINK, 0, 0, 0};
204    int sock = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE);
205    if (sock != -1 &&
206            connect(sock, reinterpret_cast<sockaddr *>(&kernel), sizeof(kernel)) != -1 &&
207            writev(sock, iov, iovlen) != -1 &&
208            (ret = recv(sock, &response, sizeof(response), 0)) != -1) {
209        if (ret == sizeof(response)) {
210            ret = response.err.error;  // Netlink errors are negative errno.
211        } else {
212            ret = -EBADMSG;
213        }
214    } else {
215        ret = -errno;
216    }
217
218    if (sock != -1) {
219        close(sock);
220    }
221
222    return ret;
223}
224
225bool modifyPerNetworkRules(unsigned netId, const char* interface, Permission permission, bool add,
226                           bool modifyIptables) {
227    uint32_t table = getRouteTableForInterface(interface);
228    if (!table) {
229        return false;
230    }
231
232    const char* action = add ? ADD : DEL;
233
234    Fwmark fwmark;
235    fwmark.permission = permission;
236
237    Fwmark mask;
238    mask.permission = permission;
239
240    // A rule to route traffic based on a chosen outgoing interface.
241    //
242    // Supports apps that use SO_BINDTODEVICE or IP_PKTINFO options and the kernel that already
243    // knows the outgoing interface (typically for link-local communications).
244    if (!runIpRuleCommand(action, RULE_PRIORITY_PER_NETWORK_INTERFACE, table, fwmark.intValue,
245                          mask.intValue, interface)) {
246        return false;
247    }
248
249    // A rule to route traffic based on the chosen network.
250    //
251    // This is for sockets that have not explicitly requested a particular network, but have been
252    // bound to one when they called connect(). This ensures that sockets connected on a particular
253    // network stay on that network even if the default network changes.
254    fwmark.netId = netId;
255    mask.netId = FWMARK_NET_ID_MASK;
256    if (!runIpRuleCommand(action, RULE_PRIORITY_PER_NETWORK_NORMAL, table, fwmark.intValue,
257                          mask.intValue, NULL)) {
258        return false;
259    }
260
261    // A rule to route traffic based on an explicitly chosen network.
262    //
263    // Supports apps that use the multinetwork APIs to restrict their traffic to a network.
264    //
265    // We don't really need to check the permission bits of the fwmark here, as they would've been
266    // checked at the time the netId was set into the fwmark, but we do so to be consistent.
267    fwmark.explicitlySelected = true;
268    mask.explicitlySelected = true;
269    if (!runIpRuleCommand(action, RULE_PRIORITY_PER_NETWORK_EXPLICIT, table, fwmark.intValue,
270                          mask.intValue, NULL)) {
271        return false;
272    }
273
274    // An iptables rule to mark incoming packets on a network with the netId of the network.
275    //
276    // This is so that the kernel can:
277    // + Use the right fwmark for (and thus correctly route) replies (e.g.: TCP RST, ICMP errors,
278    //   ping replies).
279    // + Mark sockets that accept connections from this interface so that the connection stays on
280    //   the same interface.
281    if (modifyIptables) {
282        action = add ? "-A" : "-D";
283        char markString[UINT32_HEX_STRLEN];
284        snprintf(markString, sizeof(markString), "0x%x", netId);
285        if (execIptables(V4V6, "-t", "mangle", action, "INPUT", "-i", interface, "-j", "MARK",
286                         "--set-mark", markString, NULL)) {
287            return false;
288        }
289    }
290
291    return true;
292}
293
294bool modifyDefaultNetworkRules(const char* interface, Permission permission, const char* action) {
295    uint32_t table = getRouteTableForInterface(interface);
296    if (!table) {
297        return false;
298    }
299
300    Fwmark fwmark;
301    fwmark.netId = 0;
302    fwmark.permission = permission;
303
304    Fwmark mask;
305    mask.netId = FWMARK_NET_ID_MASK;
306    mask.permission = permission;
307
308    return runIpRuleCommand(action, RULE_PRIORITY_DEFAULT_NETWORK, table, fwmark.intValue,
309                            mask.intValue, NULL);
310}
311
312// Adds or removes an IPv4 or IPv6 route to the specified table and, if it's directly-connected
313// route, to the main table as well.
314// Returns 0 on success or negative errno on failure.
315int modifyRoute(const char* interface, const char* destination, const char* nexthop,
316                int action, RouteController::TableType tableType, unsigned /* uid */) {
317    uint32_t table = 0;
318    switch (tableType) {
319        case RouteController::INTERFACE: {
320            table = getRouteTableForInterface(interface);
321            break;
322        }
323        case RouteController::LEGACY: {
324            // TODO: Use the UID to assign a unique table per UID instead of this fixed table.
325            table = ROUTE_TABLE_LEGACY;
326            break;
327        }
328        case RouteController::PRIVILEGED_LEGACY: {
329            // TODO: Use the UID to assign a unique table per UID instead of this fixed table.
330            table = ROUTE_TABLE_PRIVILEGED_LEGACY;
331            break;
332        }
333    }
334    if (!table) {
335        return -ESRCH;
336    }
337
338    int ret = modifyIpRoute(action, table, interface, destination, nexthop);
339    if (ret != 0) {
340        return ret;
341    }
342
343    // If there's no nexthop, this is a directly connected route. Add it to the main table also, to
344    // let the kernel find it when validating nexthops when global routes are added.
345    if (!nexthop) {
346        ret = modifyIpRoute(action, RT_TABLE_MAIN, interface, destination, NULL);
347        // A failure with action == ADD && errno == EEXIST means that the route already exists in
348        // the main table, perhaps because the kernel added it automatically as part of adding the
349        // IP address to the interface. Ignore this, but complain about everything else.
350        if (ret != 0 && !(action == RTM_NEWROUTE && ret == -EEXIST)) {
351            return ret;
352        }
353    }
354
355    return 0;
356}
357
358bool flushRoutes(const char* interface) {
359    uint32_t table = getRouteTableForInterface(interface);
360    if (!table) {
361        return false;
362    }
363    interfaceToIndex.erase(interface);
364
365    char tableString[UINT32_STRLEN];
366    snprintf(tableString, sizeof(tableString), "%u", table);
367
368    const char* version[] = {"-4", "-6"};
369    for (size_t i = 0; i < ARRAY_SIZE(version); ++i) {
370        const char* argv[] = {
371            IP_PATH,
372            version[i],
373            "route"
374            "flush",
375            "table",
376            tableString,
377        };
378        int argc = ARRAY_SIZE(argv);
379
380        if (!android_fork_execvp(argc, const_cast<char**>(argv), NULL, false, false)) {
381            return false;
382        }
383    }
384
385    return true;
386}
387
388}  // namespace
389
390void RouteController::Init() {
391    // Add a new rule to look up the 'main' table, with the same selectors as the "default network"
392    // rule, but with a lower priority. Since the default network rule points to a table with a
393    // default route, the rule we're adding will never be used for normal routing lookups. However,
394    // the kernel may fall-through to it to find directly-connected routes when it validates that a
395    // nexthop (in a route being added) is reachable.
396    Fwmark fwmark;
397    fwmark.netId = 0;
398
399    Fwmark mask;
400    mask.netId = FWMARK_NET_ID_MASK;
401
402    runIpRuleCommand(ADD, RULE_PRIORITY_MAIN, RT_TABLE_MAIN, fwmark.intValue, mask.intValue, NULL);
403
404    // Add rules to allow lookup of legacy routes.
405    //
406    // TODO: Remove these once the kernel supports UID-based routing. Instead, add them on demand
407    // when routes are added.
408    fwmark.netId = 0;
409    mask.netId = 0;
410
411    fwmark.explicitlySelected = false;
412    mask.explicitlySelected = true;
413
414    runIpRuleCommand(ADD, RULE_PRIORITY_LEGACY, ROUTE_TABLE_LEGACY, fwmark.intValue, mask.intValue,
415                     NULL);
416
417    fwmark.permission = PERMISSION_CONNECTIVITY_INTERNAL;
418    mask.permission = PERMISSION_CONNECTIVITY_INTERNAL;
419
420    runIpRuleCommand(ADD, RULE_PRIORITY_PRIVILEGED_LEGACY, ROUTE_TABLE_PRIVILEGED_LEGACY,
421                     fwmark.intValue, mask.intValue, NULL);
422
423// TODO: Uncomment once we are sure everything works.
424#if 0
425    // Add a rule to preempt the pre-defined "from all lookup main" rule. This ensures that packets
426    // that are already marked with a specific NetId don't fall-through to the main table.
427    runIpRuleCommand(ADD, RULE_PRIORITY_UNREACHABLE, 0, 0, 0, NULL);
428#endif
429}
430
431bool RouteController::addInterfaceToNetwork(unsigned netId, const char* interface,
432                                            Permission permission) {
433    return modifyPerNetworkRules(netId, interface, permission, true, true);
434}
435
436bool RouteController::removeInterfaceFromNetwork(unsigned netId, const char* interface,
437                                                 Permission permission) {
438    return modifyPerNetworkRules(netId, interface, permission, false, true) &&
439           flushRoutes(interface);
440}
441
442bool RouteController::modifyNetworkPermission(unsigned netId, const char* interface,
443                                              Permission oldPermission, Permission newPermission) {
444    // Add the new rules before deleting the old ones, to avoid race conditions.
445    return modifyPerNetworkRules(netId, interface, newPermission, true, false) &&
446           modifyPerNetworkRules(netId, interface, oldPermission, false, false);
447}
448
449bool RouteController::addToDefaultNetwork(const char* interface, Permission permission) {
450    return modifyDefaultNetworkRules(interface, permission, ADD);
451}
452
453bool RouteController::removeFromDefaultNetwork(const char* interface, Permission permission) {
454    return modifyDefaultNetworkRules(interface, permission, DEL);
455}
456
457int RouteController::addRoute(const char* interface, const char* destination,
458                              const char* nexthop, TableType tableType, unsigned uid) {
459    return modifyRoute(interface, destination, nexthop, RTM_NEWROUTE, tableType, uid);
460}
461
462int RouteController::removeRoute(const char* interface, const char* destination,
463                                 const char* nexthop, TableType tableType, unsigned uid) {
464    return modifyRoute(interface, destination, nexthop, RTM_DELROUTE, tableType, uid);
465}
466