RouteController.cpp revision 7f972fb1cd3c26af76779a7a3220b9cf5fb63a0a
1/* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "RouteController.h" 18 19#include "Fwmark.h" 20#include "NetdConstants.h" 21 22#include <arpa/inet.h> 23#include <errno.h> 24#include <linux/netlink.h> 25#include <linux/rtnetlink.h> 26#include <logwrap/logwrap.h> 27#include <map> 28#include <netinet/in.h> 29#include <net/if.h> 30#include <sys/socket.h> 31#include <sys/uio.h> 32#include <unistd.h> 33 34// Avoids "non-constant-expression cannot be narrowed from type 'unsigned int' to 'unsigned short'" 35// warnings when using RTA_LENGTH(x) inside static initializers (even when x is already uint16_t). 36#define U16_RTA_LENGTH(x) static_cast<uint16_t>(RTA_LENGTH((x))) 37 38namespace { 39 40const uint32_t RULE_PRIORITY_PRIVILEGED_LEGACY = 11000; 41const uint32_t RULE_PRIORITY_PER_NETWORK_EXPLICIT = 13000; 42const uint32_t RULE_PRIORITY_PER_NETWORK_INTERFACE = 14000; 43const uint32_t RULE_PRIORITY_LEGACY = 16000; 44const uint32_t RULE_PRIORITY_PER_NETWORK_NORMAL = 17000; 45const uint32_t RULE_PRIORITY_DEFAULT_NETWORK = 19000; 46const uint32_t RULE_PRIORITY_MAIN = 20000; 47// TODO: Uncomment once we are sure everything works. 48#if 0 49const uint32_t RULE_PRIORITY_UNREACHABLE = 21000; 50#endif 51 52// TODO: These should be turned into per-UID tables once the kernel supports UID-based routing. 53const int ROUTE_TABLE_PRIVILEGED_LEGACY = RouteController::ROUTE_TABLE_OFFSET_FROM_INDEX - 901; 54const int ROUTE_TABLE_LEGACY = RouteController::ROUTE_TABLE_OFFSET_FROM_INDEX - 902; 55 56std::map<std::string, uint32_t> interfaceToIndex; 57 58uint32_t getRouteTableForInterface(const char* interface) { 59 uint32_t index = if_nametoindex(interface); 60 if (index) { 61 interfaceToIndex[interface] = index; 62 } else { 63 // If the interface goes away if_nametoindex() will return 0 but we still need to know 64 // the index so we can remove the rules and routes. 65 std::map<std::string, uint32_t>::iterator it = interfaceToIndex.find(interface); 66 if (it != interfaceToIndex.end()) 67 index = it->second; 68 } 69 return index ? index + RouteController::ROUTE_TABLE_OFFSET_FROM_INDEX : 0; 70} 71 72// Adds or removes a routing rule for IPv4 and IPv6. 73// 74// + If |table| is non-zero, the rule points at the specified routing table. Otherwise, the rule 75// returns ENETUNREACH. 76// + If |mask| is non-zero, the rule matches the specified fwmark and mask. Otherwise, |fwmark| is 77// ignored. 78// + If |interface| is non-NULL, the rule matches the specified outgoing interface. 79bool runIpRuleCommand(const char* action, uint32_t priority, uint32_t table, uint32_t fwmark, 80 uint32_t mask, const char* interface) { 81 char priorityString[UINT32_STRLEN]; 82 snprintf(priorityString, sizeof(priorityString), "%u", priority); 83 84 char tableString[UINT32_STRLEN]; 85 snprintf(tableString, sizeof(tableString), "%u", table); 86 87 char fwmarkString[sizeof("0x12345678/0x12345678")]; 88 snprintf(fwmarkString, sizeof(fwmarkString), "0x%x/0x%x", fwmark, mask); 89 90 const char* version[] = {"-4", "-6"}; 91 for (size_t i = 0; i < ARRAY_SIZE(version); ++i) { 92 int argc = 0; 93 const char* argv[16]; 94 95 argv[argc++] = IP_PATH; 96 argv[argc++] = version[i]; 97 argv[argc++] = "rule"; 98 argv[argc++] = action; 99 argv[argc++] = "priority"; 100 argv[argc++] = priorityString; 101 if (table) { 102 argv[argc++] = "table"; 103 argv[argc++] = tableString; 104 } else { 105 argv[argc++] = "unreachable"; 106 } 107 if (mask) { 108 argv[argc++] = "fwmark"; 109 argv[argc++] = fwmarkString; 110 } 111 if (interface) { 112 argv[argc++] = "oif"; 113 argv[argc++] = interface; 114 } 115 if (android_fork_execvp(argc, const_cast<char**>(argv), NULL, false, false)) { 116 return false; 117 } 118 } 119 120 return true; 121} 122 123// Adds or deletes an IPv4 or IPv6 route. 124// Returns 0 on success or negative errno on failure. 125int modifyIpRoute(uint16_t action, uint32_t table, const char* interface, const char* destination, 126 const char* nexthop) { 127 // At least the destination must be non-null. 128 if (!destination) { 129 return -EFAULT; 130 } 131 132 // Parse the prefix. 133 uint8_t rawAddress[sizeof(in6_addr)]; 134 uint8_t family, prefixLength; 135 int rawLength = parsePrefix(destination, &family, rawAddress, sizeof(rawAddress), 136 &prefixLength); 137 if (rawLength < 0) { 138 return rawLength; 139 } 140 141 if (static_cast<size_t>(rawLength) > sizeof(rawAddress)) { 142 return -ENOBUFS; // Cannot happen; parsePrefix only supports IPv4 and IPv6. 143 } 144 145 // If an interface was specified, find the ifindex. 146 uint32_t ifindex; 147 if (interface) { 148 ifindex = if_nametoindex(interface); 149 if (!ifindex) { 150 return -ENODEV; 151 } 152 } 153 154 // If a nexthop was specified, parse it as the same family as the prefix. 155 uint8_t rawNexthop[sizeof(in6_addr)]; 156 if (nexthop && !inet_pton(family, nexthop, rawNexthop)) { 157 return -EINVAL; 158 } 159 160 // Assemble a netlink request and put it in an array of iovec structures. 161 nlmsghdr nlmsg = { 162 .nlmsg_type = action, 163 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, 164 }; 165 rtmsg rtmsg = { 166 .rtm_protocol = RTPROT_STATIC, 167 .rtm_type = RTN_UNICAST, 168 .rtm_family = family, 169 .rtm_dst_len = prefixLength, 170 }; 171 rtattr rta_table = { U16_RTA_LENGTH(sizeof(table)), RTA_TABLE }; 172 rtattr rta_oif = { U16_RTA_LENGTH(sizeof(ifindex)), RTA_OIF }; 173 rtattr rta_dst = { U16_RTA_LENGTH(rawLength), RTA_DST }; 174 rtattr rta_gateway = { U16_RTA_LENGTH(rawLength), RTA_GATEWAY }; 175 if (action == RTM_NEWROUTE) { 176 nlmsg.nlmsg_flags |= (NLM_F_CREATE | NLM_F_EXCL); 177 } 178 179 iovec iov[] = { 180 { &nlmsg, sizeof(nlmsg) }, 181 { &rtmsg, sizeof(rtmsg) }, 182 { &rta_table, sizeof(rta_table) }, 183 { &table, sizeof(table) }, 184 { &rta_dst, sizeof(rta_dst) }, 185 { rawAddress, static_cast<size_t>(rawLength) }, 186 { &rta_oif, interface ? sizeof(rta_oif) : 0 }, 187 { &ifindex, interface ? sizeof(ifindex) : 0 }, 188 { &rta_gateway, nexthop ? sizeof(rta_gateway) : 0 }, 189 { rawNexthop, nexthop ? static_cast<size_t>(rawLength) : 0 }, 190 }; 191 int iovlen = ARRAY_SIZE(iov); 192 193 for (int i = 0; i < iovlen; ++i) { 194 nlmsg.nlmsg_len += iov[i].iov_len; 195 } 196 197 int ret; 198 struct { 199 nlmsghdr msg; 200 nlmsgerr err; 201 } response; 202 203 sockaddr_nl kernel = {AF_NETLINK, 0, 0, 0}; 204 int sock = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE); 205 if (sock != -1 && 206 connect(sock, reinterpret_cast<sockaddr *>(&kernel), sizeof(kernel)) != -1 && 207 writev(sock, iov, iovlen) != -1 && 208 (ret = recv(sock, &response, sizeof(response), 0)) != -1) { 209 if (ret == sizeof(response)) { 210 ret = response.err.error; // Netlink errors are negative errno. 211 } else { 212 ret = -EBADMSG; 213 } 214 } else { 215 ret = -errno; 216 } 217 218 if (sock != -1) { 219 close(sock); 220 } 221 222 return ret; 223} 224 225bool modifyPerNetworkRules(unsigned netId, const char* interface, Permission permission, bool add, 226 bool modifyIptables) { 227 uint32_t table = getRouteTableForInterface(interface); 228 if (!table) { 229 return false; 230 } 231 232 const char* action = add ? ADD : DEL; 233 234 Fwmark fwmark; 235 fwmark.permission = permission; 236 237 Fwmark mask; 238 mask.permission = permission; 239 240 // A rule to route traffic based on a chosen outgoing interface. 241 // 242 // Supports apps that use SO_BINDTODEVICE or IP_PKTINFO options and the kernel that already 243 // knows the outgoing interface (typically for link-local communications). 244 if (!runIpRuleCommand(action, RULE_PRIORITY_PER_NETWORK_INTERFACE, table, fwmark.intValue, 245 mask.intValue, interface)) { 246 return false; 247 } 248 249 // A rule to route traffic based on the chosen network. 250 // 251 // This is for sockets that have not explicitly requested a particular network, but have been 252 // bound to one when they called connect(). This ensures that sockets connected on a particular 253 // network stay on that network even if the default network changes. 254 fwmark.netId = netId; 255 mask.netId = FWMARK_NET_ID_MASK; 256 if (!runIpRuleCommand(action, RULE_PRIORITY_PER_NETWORK_NORMAL, table, fwmark.intValue, 257 mask.intValue, NULL)) { 258 return false; 259 } 260 261 // A rule to route traffic based on an explicitly chosen network. 262 // 263 // Supports apps that use the multinetwork APIs to restrict their traffic to a network. 264 // 265 // We don't really need to check the permission bits of the fwmark here, as they would've been 266 // checked at the time the netId was set into the fwmark, but we do so to be consistent. 267 fwmark.explicitlySelected = true; 268 mask.explicitlySelected = true; 269 if (!runIpRuleCommand(action, RULE_PRIORITY_PER_NETWORK_EXPLICIT, table, fwmark.intValue, 270 mask.intValue, NULL)) { 271 return false; 272 } 273 274 // An iptables rule to mark incoming packets on a network with the netId of the network. 275 // 276 // This is so that the kernel can: 277 // + Use the right fwmark for (and thus correctly route) replies (e.g.: TCP RST, ICMP errors, 278 // ping replies). 279 // + Mark sockets that accept connections from this interface so that the connection stays on 280 // the same interface. 281 if (modifyIptables) { 282 action = add ? "-A" : "-D"; 283 char markString[UINT32_HEX_STRLEN]; 284 snprintf(markString, sizeof(markString), "0x%x", netId); 285 if (execIptables(V4V6, "-t", "mangle", action, "INPUT", "-i", interface, "-j", "MARK", 286 "--set-mark", markString, NULL)) { 287 return false; 288 } 289 } 290 291 return true; 292} 293 294bool modifyDefaultNetworkRules(const char* interface, Permission permission, const char* action) { 295 uint32_t table = getRouteTableForInterface(interface); 296 if (!table) { 297 return false; 298 } 299 300 Fwmark fwmark; 301 fwmark.netId = 0; 302 fwmark.permission = permission; 303 304 Fwmark mask; 305 mask.netId = FWMARK_NET_ID_MASK; 306 mask.permission = permission; 307 308 return runIpRuleCommand(action, RULE_PRIORITY_DEFAULT_NETWORK, table, fwmark.intValue, 309 mask.intValue, NULL); 310} 311 312// Adds or removes an IPv4 or IPv6 route to the specified table and, if it's directly-connected 313// route, to the main table as well. 314// Returns 0 on success or negative errno on failure. 315int modifyRoute(const char* interface, const char* destination, const char* nexthop, 316 int action, RouteController::TableType tableType, unsigned /* uid */) { 317 uint32_t table = 0; 318 switch (tableType) { 319 case RouteController::INTERFACE: { 320 table = getRouteTableForInterface(interface); 321 break; 322 } 323 case RouteController::LEGACY: { 324 // TODO: Use the UID to assign a unique table per UID instead of this fixed table. 325 table = ROUTE_TABLE_LEGACY; 326 break; 327 } 328 case RouteController::PRIVILEGED_LEGACY: { 329 // TODO: Use the UID to assign a unique table per UID instead of this fixed table. 330 table = ROUTE_TABLE_PRIVILEGED_LEGACY; 331 break; 332 } 333 } 334 if (!table) { 335 return -ESRCH; 336 } 337 338 int ret = modifyIpRoute(action, table, interface, destination, nexthop); 339 if (ret != 0) { 340 return ret; 341 } 342 343 // If there's no nexthop, this is a directly connected route. Add it to the main table also, to 344 // let the kernel find it when validating nexthops when global routes are added. 345 if (!nexthop) { 346 ret = modifyIpRoute(action, RT_TABLE_MAIN, interface, destination, NULL); 347 // A failure with action == ADD && errno == EEXIST means that the route already exists in 348 // the main table, perhaps because the kernel added it automatically as part of adding the 349 // IP address to the interface. Ignore this, but complain about everything else. 350 if (ret != 0 && !(action == RTM_NEWROUTE && ret == -EEXIST)) { 351 return ret; 352 } 353 } 354 355 return 0; 356} 357 358bool flushRoutes(const char* interface) { 359 uint32_t table = getRouteTableForInterface(interface); 360 if (!table) { 361 return false; 362 } 363 interfaceToIndex.erase(interface); 364 365 char tableString[UINT32_STRLEN]; 366 snprintf(tableString, sizeof(tableString), "%u", table); 367 368 const char* version[] = {"-4", "-6"}; 369 for (size_t i = 0; i < ARRAY_SIZE(version); ++i) { 370 const char* argv[] = { 371 IP_PATH, 372 version[i], 373 "route" 374 "flush", 375 "table", 376 tableString, 377 }; 378 int argc = ARRAY_SIZE(argv); 379 380 if (!android_fork_execvp(argc, const_cast<char**>(argv), NULL, false, false)) { 381 return false; 382 } 383 } 384 385 return true; 386} 387 388} // namespace 389 390void RouteController::Init() { 391 // Add a new rule to look up the 'main' table, with the same selectors as the "default network" 392 // rule, but with a lower priority. Since the default network rule points to a table with a 393 // default route, the rule we're adding will never be used for normal routing lookups. However, 394 // the kernel may fall-through to it to find directly-connected routes when it validates that a 395 // nexthop (in a route being added) is reachable. 396 Fwmark fwmark; 397 fwmark.netId = 0; 398 399 Fwmark mask; 400 mask.netId = FWMARK_NET_ID_MASK; 401 402 runIpRuleCommand(ADD, RULE_PRIORITY_MAIN, RT_TABLE_MAIN, fwmark.intValue, mask.intValue, NULL); 403 404 // Add rules to allow lookup of legacy routes. 405 // 406 // TODO: Remove these once the kernel supports UID-based routing. Instead, add them on demand 407 // when routes are added. 408 fwmark.netId = 0; 409 mask.netId = 0; 410 411 fwmark.explicitlySelected = false; 412 mask.explicitlySelected = true; 413 414 runIpRuleCommand(ADD, RULE_PRIORITY_LEGACY, ROUTE_TABLE_LEGACY, fwmark.intValue, mask.intValue, 415 NULL); 416 417 fwmark.permission = PERMISSION_CONNECTIVITY_INTERNAL; 418 mask.permission = PERMISSION_CONNECTIVITY_INTERNAL; 419 420 runIpRuleCommand(ADD, RULE_PRIORITY_PRIVILEGED_LEGACY, ROUTE_TABLE_PRIVILEGED_LEGACY, 421 fwmark.intValue, mask.intValue, NULL); 422 423// TODO: Uncomment once we are sure everything works. 424#if 0 425 // Add a rule to preempt the pre-defined "from all lookup main" rule. This ensures that packets 426 // that are already marked with a specific NetId don't fall-through to the main table. 427 runIpRuleCommand(ADD, RULE_PRIORITY_UNREACHABLE, 0, 0, 0, NULL); 428#endif 429} 430 431bool RouteController::addInterfaceToNetwork(unsigned netId, const char* interface, 432 Permission permission) { 433 return modifyPerNetworkRules(netId, interface, permission, true, true); 434} 435 436bool RouteController::removeInterfaceFromNetwork(unsigned netId, const char* interface, 437 Permission permission) { 438 return modifyPerNetworkRules(netId, interface, permission, false, true) && 439 flushRoutes(interface); 440} 441 442bool RouteController::modifyNetworkPermission(unsigned netId, const char* interface, 443 Permission oldPermission, Permission newPermission) { 444 // Add the new rules before deleting the old ones, to avoid race conditions. 445 return modifyPerNetworkRules(netId, interface, newPermission, true, false) && 446 modifyPerNetworkRules(netId, interface, oldPermission, false, false); 447} 448 449bool RouteController::addToDefaultNetwork(const char* interface, Permission permission) { 450 return modifyDefaultNetworkRules(interface, permission, ADD); 451} 452 453bool RouteController::removeFromDefaultNetwork(const char* interface, Permission permission) { 454 return modifyDefaultNetworkRules(interface, permission, DEL); 455} 456 457int RouteController::addRoute(const char* interface, const char* destination, 458 const char* nexthop, TableType tableType, unsigned uid) { 459 return modifyRoute(interface, destination, nexthop, RTM_NEWROUTE, tableType, uid); 460} 461 462int RouteController::removeRoute(const char* interface, const char* destination, 463 const char* nexthop, TableType tableType, unsigned uid) { 464 return modifyRoute(interface, destination, nexthop, RTM_DELROUTE, tableType, uid); 465} 466