RouteController.cpp revision 96f261e8b28048b8cb48f5a4e81822c73bb813f4
1/* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "RouteController.h" 18 19#include "Fwmark.h" 20#include "NetdConstants.h" 21 22#include <arpa/inet.h> 23#include <errno.h> 24#include <linux/fib_rules.h> 25#include <linux/netlink.h> 26#include <linux/rtnetlink.h> 27#include <logwrap/logwrap.h> 28#include <map> 29#include <netinet/in.h> 30#include <net/if.h> 31#include <sys/socket.h> 32#include <sys/uio.h> 33#include <unistd.h> 34 35// Avoids "non-constant-expression cannot be narrowed from type 'unsigned int' to 'unsigned short'" 36// warnings when using RTA_LENGTH(x) inside static initializers (even when x is already uint16_t). 37#define U16_RTA_LENGTH(x) static_cast<uint16_t>(RTA_LENGTH((x))) 38 39namespace { 40 41const uint32_t RULE_PRIORITY_PRIVILEGED_LEGACY = 11000; 42const uint32_t RULE_PRIORITY_PER_NETWORK_EXPLICIT = 13000; 43const uint32_t RULE_PRIORITY_PER_NETWORK_INTERFACE = 14000; 44const uint32_t RULE_PRIORITY_LEGACY = 16000; 45const uint32_t RULE_PRIORITY_PER_NETWORK_NORMAL = 17000; 46const uint32_t RULE_PRIORITY_DEFAULT_NETWORK = 19000; 47const uint32_t RULE_PRIORITY_MAIN = 20000; 48// TODO: Uncomment once we are sure everything works. 49#if 0 50const uint32_t RULE_PRIORITY_UNREACHABLE = 21000; 51#endif 52 53// TODO: These should be turned into per-UID tables once the kernel supports UID-based routing. 54const int ROUTE_TABLE_PRIVILEGED_LEGACY = RouteController::ROUTE_TABLE_OFFSET_FROM_INDEX - 901; 55const int ROUTE_TABLE_LEGACY = RouteController::ROUTE_TABLE_OFFSET_FROM_INDEX - 902; 56 57const uint16_t kNetlinkRequestFlags = NLM_F_REQUEST | NLM_F_ACK; 58const uint16_t kNetlinkCreateRequestFlags = kNetlinkRequestFlags | NLM_F_CREATE | NLM_F_EXCL; 59 60std::map<std::string, uint32_t> interfaceToIndex; 61 62uint32_t getRouteTableForInterface(const char* interface) { 63 uint32_t index = if_nametoindex(interface); 64 if (index) { 65 interfaceToIndex[interface] = index; 66 } else { 67 // If the interface goes away if_nametoindex() will return 0 but we still need to know 68 // the index so we can remove the rules and routes. 69 std::map<std::string, uint32_t>::iterator it = interfaceToIndex.find(interface); 70 if (it != interfaceToIndex.end()) 71 index = it->second; 72 } 73 return index ? index + RouteController::ROUTE_TABLE_OFFSET_FROM_INDEX : 0; 74} 75 76// Sends a netlink request and expects an ack. 77// |iov| is an array of struct iovec that contains the netlink message payload. 78// The netlink header is generated by this function based on |action| and |flags|. 79// Returns -errno if there was an error or if the kernel reported an error. 80int sendNetlinkRequest(uint16_t action, uint16_t flags, iovec* iov, int iovlen) { 81 nlmsghdr nlmsg = { 82 .nlmsg_type = action, 83 .nlmsg_flags = flags, 84 }; 85 iov[0].iov_base = &nlmsg; 86 iov[0].iov_len = sizeof(nlmsg); 87 for (int i = 0; i < iovlen; ++i) { 88 nlmsg.nlmsg_len += iov[i].iov_len; 89 } 90 91 int ret; 92 struct { 93 nlmsghdr msg; 94 nlmsgerr err; 95 } response; 96 97 sockaddr_nl kernel = {AF_NETLINK, 0, 0, 0}; 98 int sock = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE); 99 if (sock != -1 && 100 connect(sock, reinterpret_cast<sockaddr*>(&kernel), sizeof(kernel)) != -1 && 101 writev(sock, iov, iovlen) != -1 && 102 (ret = recv(sock, &response, sizeof(response), 0)) != -1) { 103 if (ret == sizeof(response)) { 104 ret = response.err.error; // Netlink errors are negative errno. 105 } else { 106 ret = -EBADMSG; 107 } 108 } else { 109 ret = -errno; 110 } 111 112 if (sock != -1) { 113 close(sock); 114 } 115 116 return ret; 117} 118 119// Adds or removes a routing rule for IPv4 and IPv6. 120// 121// + If |table| is non-zero, the rule points at the specified routing table. Otherwise, the rule 122// returns ENETUNREACH. 123// + If |mask| is non-zero, the rule matches the specified fwmark and mask. Otherwise, |fwmark| is 124// ignored. 125// + If |interface| is non-NULL, the rule matches the specified outgoing interface. 126// 127// Returns 0 on success or negative errno on failure. 128int modifyIpRule(uint16_t action, uint32_t priority, uint32_t table, uint32_t fwmark, uint32_t mask, 129 const char* interface) { 130 // The interface name must include exactly one terminating NULL and be properly padded, or older 131 // kernels will refuse to delete rules. 132 uint8_t padding[RTA_ALIGNTO] = {0, 0, 0, 0}; 133 uint16_t paddingLength = 0; 134 size_t interfaceLength = 0; 135 char oifname[IFNAMSIZ]; 136 if (interface) { 137 interfaceLength = strlcpy(oifname, interface, IFNAMSIZ) + 1; 138 if (interfaceLength > IFNAMSIZ) { 139 return -ENAMETOOLONG; 140 } 141 paddingLength = RTA_SPACE(interfaceLength) - RTA_LENGTH(interfaceLength); 142 } 143 144 // Assemble a rule request and put it in an array of iovec structures. 145 fib_rule_hdr rule = { 146 .action = static_cast<uint8_t>(table ? FR_ACT_TO_TBL : FR_ACT_UNREACHABLE), 147 }; 148 149 rtattr fra_priority = { U16_RTA_LENGTH(sizeof(priority)), FRA_PRIORITY }; 150 rtattr fra_table = { U16_RTA_LENGTH(sizeof(table)), FRA_TABLE }; 151 rtattr fra_fwmark = { U16_RTA_LENGTH(sizeof(fwmark)), FRA_FWMARK }; 152 rtattr fra_fwmask = { U16_RTA_LENGTH(sizeof(mask)), FRA_FWMASK }; 153 rtattr fra_oifname = { U16_RTA_LENGTH(interfaceLength), FRA_OIFNAME }; 154 155 iovec iov[] = { 156 { NULL, 0 }, 157 { &rule, sizeof(rule) }, 158 { &fra_priority, sizeof(fra_priority) }, 159 { &priority, sizeof(priority) }, 160 { &fra_table, table ? sizeof(fra_table) : 0 }, 161 { &table, table ? sizeof(table) : 0 }, 162 { &fra_fwmark, mask ? sizeof(fra_fwmark) : 0 }, 163 { &fwmark, mask ? sizeof(fwmark) : 0 }, 164 { &fra_fwmask, mask ? sizeof(fra_fwmask) : 0 }, 165 { &mask, mask ? sizeof(mask) : 0 }, 166 { &fra_oifname, interface ? sizeof(fra_oifname) : 0 }, 167 { oifname, interfaceLength }, 168 { padding, paddingLength }, 169 }; 170 171 uint16_t flags = (action == RTM_NEWRULE) ? kNetlinkCreateRequestFlags : kNetlinkRequestFlags; 172 uint8_t family[] = {AF_INET, AF_INET6}; 173 for (size_t i = 0; i < ARRAY_SIZE(family); ++i) { 174 rule.family = family[i]; 175 int ret = sendNetlinkRequest(action, flags, iov, ARRAY_SIZE(iov)); 176 if (ret) { 177 return ret; 178 } 179 } 180 181 return 0; 182} 183 184// Adds or deletes an IPv4 or IPv6 route. 185// Returns 0 on success or negative errno on failure. 186int modifyIpRoute(uint16_t action, uint32_t table, const char* interface, const char* destination, 187 const char* nexthop) { 188 // At least the destination must be non-null. 189 if (!destination) { 190 return -EFAULT; 191 } 192 193 // Parse the prefix. 194 uint8_t rawAddress[sizeof(in6_addr)]; 195 uint8_t family, prefixLength; 196 int rawLength = parsePrefix(destination, &family, rawAddress, sizeof(rawAddress), 197 &prefixLength); 198 if (rawLength < 0) { 199 return rawLength; 200 } 201 202 if (static_cast<size_t>(rawLength) > sizeof(rawAddress)) { 203 return -ENOBUFS; // Cannot happen; parsePrefix only supports IPv4 and IPv6. 204 } 205 206 // If an interface was specified, find the ifindex. 207 uint32_t ifindex; 208 if (interface) { 209 ifindex = if_nametoindex(interface); 210 if (!ifindex) { 211 return -ENODEV; 212 } 213 } 214 215 // If a nexthop was specified, parse it as the same family as the prefix. 216 uint8_t rawNexthop[sizeof(in6_addr)]; 217 if (nexthop && !inet_pton(family, nexthop, rawNexthop)) { 218 return -EINVAL; 219 } 220 221 // Assemble a rtmsg and put it in an array of iovec structures. 222 rtmsg rtmsg = { 223 .rtm_protocol = RTPROT_STATIC, 224 .rtm_type = RTN_UNICAST, 225 .rtm_family = family, 226 .rtm_dst_len = prefixLength, 227 }; 228 229 rtattr rta_table = { U16_RTA_LENGTH(sizeof(table)), RTA_TABLE }; 230 rtattr rta_oif = { U16_RTA_LENGTH(sizeof(ifindex)), RTA_OIF }; 231 rtattr rta_dst = { U16_RTA_LENGTH(rawLength), RTA_DST }; 232 rtattr rta_gateway = { U16_RTA_LENGTH(rawLength), RTA_GATEWAY }; 233 234 iovec iov[] = { 235 { NULL, 0 }, 236 { &rtmsg, sizeof(rtmsg) }, 237 { &rta_table, sizeof(rta_table) }, 238 { &table, sizeof(table) }, 239 { &rta_dst, sizeof(rta_dst) }, 240 { rawAddress, static_cast<size_t>(rawLength) }, 241 { &rta_oif, interface ? sizeof(rta_oif) : 0 }, 242 { &ifindex, interface ? sizeof(ifindex) : 0 }, 243 { &rta_gateway, nexthop ? sizeof(rta_gateway) : 0 }, 244 { rawNexthop, nexthop ? static_cast<size_t>(rawLength) : 0 }, 245 }; 246 247 uint16_t flags = (action == RTM_NEWROUTE) ? kNetlinkCreateRequestFlags : kNetlinkRequestFlags; 248 return sendNetlinkRequest(action, flags, iov, ARRAY_SIZE(iov)); 249} 250 251int modifyPerNetworkRules(unsigned netId, const char* interface, Permission permission, bool add, 252 bool modifyIptables) { 253 uint32_t table = getRouteTableForInterface(interface); 254 if (!table) { 255 return -ESRCH; 256 } 257 258 uint16_t action = add ? RTM_NEWRULE : RTM_DELRULE; 259 int ret; 260 261 Fwmark fwmark; 262 fwmark.permission = permission; 263 264 Fwmark mask; 265 mask.permission = permission; 266 267 // A rule to route traffic based on a chosen outgoing interface. 268 // 269 // Supports apps that use SO_BINDTODEVICE or IP_PKTINFO options and the kernel that already 270 // knows the outgoing interface (typically for link-local communications). 271 if ((ret = modifyIpRule(action, RULE_PRIORITY_PER_NETWORK_INTERFACE, table, fwmark.intValue, 272 mask.intValue, interface)) != 0) { 273 return ret; 274 } 275 276 // A rule to route traffic based on the chosen network. 277 // 278 // This is for sockets that have not explicitly requested a particular network, but have been 279 // bound to one when they called connect(). This ensures that sockets connected on a particular 280 // network stay on that network even if the default network changes. 281 fwmark.netId = netId; 282 mask.netId = FWMARK_NET_ID_MASK; 283 if ((ret = modifyIpRule(action, RULE_PRIORITY_PER_NETWORK_NORMAL, table, fwmark.intValue, 284 mask.intValue, NULL)) != 0) { 285 return ret; 286 } 287 288 // A rule to route traffic based on an explicitly chosen network. 289 // 290 // Supports apps that use the multinetwork APIs to restrict their traffic to a network. 291 // 292 // We don't really need to check the permission bits of the fwmark here, as they would've been 293 // checked at the time the netId was set into the fwmark, but we do so to be consistent. 294 fwmark.explicitlySelected = true; 295 mask.explicitlySelected = true; 296 if ((ret = modifyIpRule(action, RULE_PRIORITY_PER_NETWORK_EXPLICIT, table, fwmark.intValue, 297 mask.intValue, NULL)) != 0) { 298 return ret; 299 } 300 301 // An iptables rule to mark incoming packets on a network with the netId of the network. 302 // 303 // This is so that the kernel can: 304 // + Use the right fwmark for (and thus correctly route) replies (e.g.: TCP RST, ICMP errors, 305 // ping replies). 306 // + Mark sockets that accept connections from this interface so that the connection stays on 307 // the same interface. 308 if (modifyIptables) { 309 const char* iptablesAction = add ? "-A" : "-D"; 310 char markString[UINT32_HEX_STRLEN]; 311 snprintf(markString, sizeof(markString), "0x%x", netId); 312 if (execIptables(V4V6, "-t", "mangle", iptablesAction, "INPUT", "-i", interface, 313 "-j", "MARK", "--set-mark", markString, NULL)) { 314 return -EREMOTEIO; 315 } 316 } 317 318 return 0; 319} 320 321int modifyDefaultNetworkRules(const char* interface, Permission permission, uint16_t action) { 322 uint32_t table = getRouteTableForInterface(interface); 323 if (!table) { 324 return -ESRCH; 325 } 326 327 Fwmark fwmark; 328 fwmark.netId = 0; 329 fwmark.permission = permission; 330 331 Fwmark mask; 332 mask.netId = FWMARK_NET_ID_MASK; 333 mask.permission = permission; 334 335 return modifyIpRule(action, RULE_PRIORITY_DEFAULT_NETWORK, table, fwmark.intValue, 336 mask.intValue, NULL); 337} 338 339// Adds or removes an IPv4 or IPv6 route to the specified table and, if it's directly-connected 340// route, to the main table as well. 341// Returns 0 on success or negative errno on failure. 342int modifyRoute(const char* interface, const char* destination, const char* nexthop, 343 uint16_t action, RouteController::TableType tableType, unsigned /* uid */) { 344 uint32_t table = 0; 345 switch (tableType) { 346 case RouteController::INTERFACE: { 347 table = getRouteTableForInterface(interface); 348 break; 349 } 350 case RouteController::LEGACY: { 351 // TODO: Use the UID to assign a unique table per UID instead of this fixed table. 352 table = ROUTE_TABLE_LEGACY; 353 break; 354 } 355 case RouteController::PRIVILEGED_LEGACY: { 356 // TODO: Use the UID to assign a unique table per UID instead of this fixed table. 357 table = ROUTE_TABLE_PRIVILEGED_LEGACY; 358 break; 359 } 360 } 361 if (!table) { 362 return -ESRCH; 363 } 364 365 int ret = modifyIpRoute(action, table, interface, destination, nexthop); 366 if (ret != 0) { 367 return ret; 368 } 369 370 // If there's no nexthop, this is a directly connected route. Add it to the main table also, to 371 // let the kernel find it when validating nexthops when global routes are added. 372 if (!nexthop) { 373 ret = modifyIpRoute(action, RT_TABLE_MAIN, interface, destination, NULL); 374 // A failure with action == ADD && errno == EEXIST means that the route already exists in 375 // the main table, perhaps because the kernel added it automatically as part of adding the 376 // IP address to the interface. Ignore this, but complain about everything else. 377 if (ret != 0 && !(action == RTM_NEWROUTE && ret == -EEXIST)) { 378 return ret; 379 } 380 } 381 382 return 0; 383} 384 385bool flushRoutes(const char* interface) { 386 uint32_t table = getRouteTableForInterface(interface); 387 if (!table) { 388 return false; 389 } 390 interfaceToIndex.erase(interface); 391 392 char tableString[UINT32_STRLEN]; 393 snprintf(tableString, sizeof(tableString), "%u", table); 394 395 const char* version[] = {"-4", "-6"}; 396 for (size_t i = 0; i < ARRAY_SIZE(version); ++i) { 397 const char* argv[] = { 398 IP_PATH, 399 version[i], 400 "route" 401 "flush", 402 "table", 403 tableString, 404 }; 405 int argc = ARRAY_SIZE(argv); 406 407 if (!android_fork_execvp(argc, const_cast<char**>(argv), NULL, false, false)) { 408 return false; 409 } 410 } 411 412 return true; 413} 414 415} // namespace 416 417void RouteController::Init() { 418 // Add a new rule to look up the 'main' table, with the same selectors as the "default network" 419 // rule, but with a lower priority. Since the default network rule points to a table with a 420 // default route, the rule we're adding will never be used for normal routing lookups. However, 421 // the kernel may fall-through to it to find directly-connected routes when it validates that a 422 // nexthop (in a route being added) is reachable. 423 Fwmark fwmark; 424 fwmark.netId = 0; 425 426 Fwmark mask; 427 mask.netId = FWMARK_NET_ID_MASK; 428 429 modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_MAIN, RT_TABLE_MAIN, fwmark.intValue, mask.intValue, 430 NULL); 431 432 // Add rules to allow lookup of legacy routes. 433 // 434 // TODO: Remove these once the kernel supports UID-based routing. Instead, add them on demand 435 // when routes are added. 436 fwmark.netId = 0; 437 mask.netId = 0; 438 439 fwmark.explicitlySelected = false; 440 mask.explicitlySelected = true; 441 442 modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_LEGACY, ROUTE_TABLE_LEGACY, fwmark.intValue, 443 mask.intValue, NULL); 444 445 fwmark.permission = PERMISSION_CONNECTIVITY_INTERNAL; 446 mask.permission = PERMISSION_CONNECTIVITY_INTERNAL; 447 448 modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_PRIVILEGED_LEGACY, ROUTE_TABLE_PRIVILEGED_LEGACY, 449 fwmark.intValue, mask.intValue, NULL); 450 451// TODO: Uncomment once we are sure everything works. 452#if 0 453 // Add a rule to preempt the pre-defined "from all lookup main" rule. This ensures that packets 454 // that are already marked with a specific NetId don't fall-through to the main table. 455 modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_UNREACHABLE, 0, 0, 0, NULL); 456#endif 457} 458 459int RouteController::addInterfaceToNetwork(unsigned netId, const char* interface, 460 Permission permission) { 461 return modifyPerNetworkRules(netId, interface, permission, true, true); 462} 463 464int RouteController::removeInterfaceFromNetwork(unsigned netId, const char* interface, 465 Permission permission) { 466 return modifyPerNetworkRules(netId, interface, permission, false, true) && 467 flushRoutes(interface); 468} 469 470int RouteController::modifyNetworkPermission(unsigned netId, const char* interface, 471 Permission oldPermission, Permission newPermission) { 472 // Add the new rules before deleting the old ones, to avoid race conditions. 473 return modifyPerNetworkRules(netId, interface, newPermission, true, false) && 474 modifyPerNetworkRules(netId, interface, oldPermission, false, false); 475} 476 477int RouteController::addToDefaultNetwork(const char* interface, Permission permission) { 478 return modifyDefaultNetworkRules(interface, permission, RTM_NEWRULE); 479} 480 481int RouteController::removeFromDefaultNetwork(const char* interface, Permission permission) { 482 return modifyDefaultNetworkRules(interface, permission, RTM_DELRULE); 483} 484 485int RouteController::addRoute(const char* interface, const char* destination, 486 const char* nexthop, TableType tableType, unsigned uid) { 487 return modifyRoute(interface, destination, nexthop, RTM_NEWROUTE, tableType, uid); 488} 489 490int RouteController::removeRoute(const char* interface, const char* destination, 491 const char* nexthop, TableType tableType, unsigned uid) { 492 return modifyRoute(interface, destination, nexthop, RTM_DELROUTE, tableType, uid); 493} 494