1/* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package android.net.ip; 18 19import com.android.internal.annotations.GuardedBy; 20 21import android.content.Context; 22import android.net.LinkAddress; 23import android.net.LinkProperties; 24import android.net.LinkProperties.ProvisioningChange; 25import android.net.ProxyInfo; 26import android.net.RouteInfo; 27import android.net.metrics.IpConnectivityLog; 28import android.net.metrics.IpReachabilityEvent; 29import android.net.netlink.NetlinkConstants; 30import android.net.netlink.NetlinkErrorMessage; 31import android.net.netlink.NetlinkMessage; 32import android.net.netlink.NetlinkSocket; 33import android.net.netlink.RtNetlinkNeighborMessage; 34import android.net.netlink.StructNdaCacheInfo; 35import android.net.netlink.StructNdMsg; 36import android.net.netlink.StructNlMsgHdr; 37import android.net.util.AvoidBadWifiTracker; 38import android.os.PowerManager; 39import android.os.SystemClock; 40import android.system.ErrnoException; 41import android.system.NetlinkSocketAddress; 42import android.system.OsConstants; 43import android.util.Log; 44 45import java.io.InterruptedIOException; 46import java.net.Inet6Address; 47import java.net.InetAddress; 48import java.net.InetSocketAddress; 49import java.net.NetworkInterface; 50import java.net.SocketAddress; 51import java.net.SocketException; 52import java.nio.ByteBuffer; 53import java.util.Arrays; 54import java.util.HashMap; 55import java.util.HashSet; 56import java.util.List; 57import java.util.Map; 58import java.util.Set; 59 60 61/** 62 * IpReachabilityMonitor. 63 * 64 * Monitors on-link IP reachability and notifies callers whenever any on-link 65 * addresses of interest appear to have become unresponsive. 66 * 67 * This code does not concern itself with "why" a neighbour might have become 68 * unreachable. Instead, it primarily reacts to the kernel's notion of IP 69 * reachability for each of the neighbours we know to be critically important 70 * to normal network connectivity. As such, it is often "just the messenger": 71 * the neighbours about which it warns are already deemed by the kernel to have 72 * become unreachable. 73 * 74 * 75 * How it works: 76 * 77 * 1. The "on-link neighbours of interest" found in a given LinkProperties 78 * instance are added to a "watch list" via #updateLinkProperties(). 79 * This usually means all default gateways and any on-link DNS servers. 80 * 81 * 2. We listen continuously for netlink neighbour messages (RTM_NEWNEIGH, 82 * RTM_DELNEIGH), watching only for neighbours in the watch list. 83 * 84 * - A neighbour going into NUD_REACHABLE, NUD_STALE, NUD_DELAY, and 85 * even NUD_PROBE is perfectly normal; we merely record the new state. 86 * 87 * - A neighbour's entry may be deleted (RTM_DELNEIGH), for example due 88 * to garbage collection. This is not necessarily of immediate 89 * concern; we record the neighbour as moving to NUD_NONE. 90 * 91 * - A neighbour transitioning to NUD_FAILED (for any reason) is 92 * critically important and is handled as described below in #4. 93 * 94 * 3. All on-link neighbours in the watch list can be forcibly "probed" by 95 * calling #probeAll(). This should be called whenever it is important to 96 * verify that critical neighbours on the link are still reachable, e.g. 97 * when roaming between BSSIDs. 98 * 99 * - The kernel will send unicast ARP requests for IPv4 neighbours and 100 * unicast NS packets for IPv6 neighbours. The expected replies will 101 * likely be unicast. 102 * 103 * - The forced probing is done holding a wakelock. The kernel may, 104 * however, initiate probing of a neighbor on its own, i.e. whenever 105 * a neighbour has expired from NUD_DELAY. 106 * 107 * - The kernel sends: 108 * 109 * /proc/sys/net/ipv{4,6}/neigh/<ifname>/ucast_solicit 110 * 111 * number of probes (usually 3) every: 112 * 113 * /proc/sys/net/ipv{4,6}/neigh/<ifname>/retrans_time_ms 114 * 115 * number of milliseconds (usually 1000ms). This normally results in 116 * 3 unicast packets, 1 per second. 117 * 118 * - If no response is received to any of the probe packets, the kernel 119 * marks the neighbour as being in state NUD_FAILED, and the listening 120 * process in #2 will learn of it. 121 * 122 * 4. We call the supplied Callback#notifyLost() function if the loss of a 123 * neighbour in NUD_FAILED would cause IPv4 or IPv6 configuration to 124 * become incomplete (a loss of provisioning). 125 * 126 * - For example, losing all our IPv4 on-link DNS servers (or losing 127 * our only IPv6 default gateway) constitutes a loss of IPv4 (IPv6) 128 * provisioning; Callback#notifyLost() would be called. 129 * 130 * - Since it can be non-trivial to reacquire certain IP provisioning 131 * state it may be best for the link to disconnect completely and 132 * reconnect afresh. 133 * 134 * @hide 135 */ 136public class IpReachabilityMonitor { 137 private static final String TAG = "IpReachabilityMonitor"; 138 private static final boolean DBG = false; 139 private static final boolean VDBG = false; 140 141 public interface Callback { 142 // This callback function must execute as quickly as possible as it is 143 // run on the same thread that listens to kernel neighbor updates. 144 // 145 // TODO: refactor to something like notifyProvisioningLost(String msg). 146 public void notifyLost(InetAddress ip, String logMsg); 147 } 148 149 private final Object mLock = new Object(); 150 private final PowerManager.WakeLock mWakeLock; 151 private final String mInterfaceName; 152 private final int mInterfaceIndex; 153 private final Callback mCallback; 154 private final AvoidBadWifiTracker mAvoidBadWifiTracker; 155 private final NetlinkSocketObserver mNetlinkSocketObserver; 156 private final Thread mObserverThread; 157 private final IpConnectivityLog mMetricsLog = new IpConnectivityLog(); 158 @GuardedBy("mLock") 159 private LinkProperties mLinkProperties = new LinkProperties(); 160 // TODO: consider a map to a private NeighborState class holding more 161 // information than a single NUD state entry. 162 @GuardedBy("mLock") 163 private Map<InetAddress, Short> mIpWatchList = new HashMap<>(); 164 @GuardedBy("mLock") 165 private int mIpWatchListVersion; 166 @GuardedBy("mLock") 167 private boolean mRunning; 168 // Time in milliseconds of the last forced probe request. 169 private volatile long mLastProbeTimeMs; 170 171 /** 172 * Make the kernel perform neighbor reachability detection (IPv4 ARP or IPv6 ND) 173 * for the given IP address on the specified interface index. 174 * 175 * @return 0 if the request was successfully passed to the kernel; otherwise return 176 * a non-zero error code. 177 */ 178 private static int probeNeighbor(int ifIndex, InetAddress ip) { 179 final String msgSnippet = "probing ip=" + ip.getHostAddress() + "%" + ifIndex; 180 if (DBG) { Log.d(TAG, msgSnippet); } 181 182 final byte[] msg = RtNetlinkNeighborMessage.newNewNeighborMessage( 183 1, ip, StructNdMsg.NUD_PROBE, ifIndex, null); 184 185 int errno = -OsConstants.EPROTO; 186 try (NetlinkSocket nlSocket = new NetlinkSocket(OsConstants.NETLINK_ROUTE)) { 187 final long IO_TIMEOUT = 300L; 188 nlSocket.connectToKernel(); 189 nlSocket.sendMessage(msg, 0, msg.length, IO_TIMEOUT); 190 final ByteBuffer bytes = nlSocket.recvMessage(IO_TIMEOUT); 191 // recvMessage() guaranteed to not return null if it did not throw. 192 final NetlinkMessage response = NetlinkMessage.parse(bytes); 193 if (response != null && response instanceof NetlinkErrorMessage && 194 (((NetlinkErrorMessage) response).getNlMsgError() != null)) { 195 errno = ((NetlinkErrorMessage) response).getNlMsgError().error; 196 if (errno != 0) { 197 // TODO: consider ignoring EINVAL (-22), which appears to be 198 // normal when probing a neighbor for which the kernel does 199 // not already have / no longer has a link layer address. 200 Log.e(TAG, "Error " + msgSnippet + ", errmsg=" + response.toString()); 201 } 202 } else { 203 String errmsg; 204 if (response == null) { 205 bytes.position(0); 206 errmsg = "raw bytes: " + NetlinkConstants.hexify(bytes); 207 } else { 208 errmsg = response.toString(); 209 } 210 Log.e(TAG, "Error " + msgSnippet + ", errmsg=" + errmsg); 211 } 212 } catch (ErrnoException e) { 213 Log.e(TAG, "Error " + msgSnippet, e); 214 errno = -e.errno; 215 } catch (InterruptedIOException e) { 216 Log.e(TAG, "Error " + msgSnippet, e); 217 errno = -OsConstants.ETIMEDOUT; 218 } catch (SocketException e) { 219 Log.e(TAG, "Error " + msgSnippet, e); 220 errno = -OsConstants.EIO; 221 } 222 return errno; 223 } 224 225 public IpReachabilityMonitor(Context context, String ifName, Callback callback) { 226 this(context, ifName, callback, null); 227 } 228 229 public IpReachabilityMonitor(Context context, String ifName, Callback callback, 230 AvoidBadWifiTracker tracker) throws IllegalArgumentException { 231 mInterfaceName = ifName; 232 int ifIndex = -1; 233 try { 234 NetworkInterface netIf = NetworkInterface.getByName(ifName); 235 mInterfaceIndex = netIf.getIndex(); 236 } catch (SocketException | NullPointerException e) { 237 throw new IllegalArgumentException("invalid interface '" + ifName + "': ", e); 238 } 239 mWakeLock = ((PowerManager) context.getSystemService(Context.POWER_SERVICE)).newWakeLock( 240 PowerManager.PARTIAL_WAKE_LOCK, TAG + "." + mInterfaceName); 241 mCallback = callback; 242 mAvoidBadWifiTracker = tracker; 243 mNetlinkSocketObserver = new NetlinkSocketObserver(); 244 mObserverThread = new Thread(mNetlinkSocketObserver); 245 mObserverThread.start(); 246 } 247 248 public void stop() { 249 synchronized (mLock) { mRunning = false; } 250 clearLinkProperties(); 251 mNetlinkSocketObserver.clearNetlinkSocket(); 252 } 253 254 // TODO: add a public dump() method that can be called during a bug report. 255 256 private String describeWatchList() { 257 final String delimiter = ", "; 258 StringBuilder sb = new StringBuilder(); 259 synchronized (mLock) { 260 sb.append("iface{" + mInterfaceName + "/" + mInterfaceIndex + "}, "); 261 sb.append("v{" + mIpWatchListVersion + "}, "); 262 sb.append("ntable=["); 263 boolean firstTime = true; 264 for (Map.Entry<InetAddress, Short> entry : mIpWatchList.entrySet()) { 265 if (firstTime) { 266 firstTime = false; 267 } else { 268 sb.append(delimiter); 269 } 270 sb.append(entry.getKey().getHostAddress() + "/" + 271 StructNdMsg.stringForNudState(entry.getValue())); 272 } 273 sb.append("]"); 274 } 275 return sb.toString(); 276 } 277 278 private boolean isWatching(InetAddress ip) { 279 synchronized (mLock) { 280 return mRunning && mIpWatchList.containsKey(ip); 281 } 282 } 283 284 private boolean stillRunning() { 285 synchronized (mLock) { 286 return mRunning; 287 } 288 } 289 290 private static boolean isOnLink(List<RouteInfo> routes, InetAddress ip) { 291 for (RouteInfo route : routes) { 292 if (!route.hasGateway() && route.matches(ip)) { 293 return true; 294 } 295 } 296 return false; 297 } 298 299 private short getNeighborStateLocked(InetAddress ip) { 300 if (mIpWatchList.containsKey(ip)) { 301 return mIpWatchList.get(ip); 302 } 303 return StructNdMsg.NUD_NONE; 304 } 305 306 public void updateLinkProperties(LinkProperties lp) { 307 if (!mInterfaceName.equals(lp.getInterfaceName())) { 308 // TODO: figure out whether / how to cope with interface changes. 309 Log.wtf(TAG, "requested LinkProperties interface '" + lp.getInterfaceName() + 310 "' does not match: " + mInterfaceName); 311 return; 312 } 313 314 synchronized (mLock) { 315 mLinkProperties = new LinkProperties(lp); 316 Map<InetAddress, Short> newIpWatchList = new HashMap<>(); 317 318 final List<RouteInfo> routes = mLinkProperties.getRoutes(); 319 for (RouteInfo route : routes) { 320 if (route.hasGateway()) { 321 InetAddress gw = route.getGateway(); 322 if (isOnLink(routes, gw)) { 323 newIpWatchList.put(gw, getNeighborStateLocked(gw)); 324 } 325 } 326 } 327 328 for (InetAddress nameserver : lp.getDnsServers()) { 329 if (isOnLink(routes, nameserver)) { 330 newIpWatchList.put(nameserver, getNeighborStateLocked(nameserver)); 331 } 332 } 333 334 mIpWatchList = newIpWatchList; 335 mIpWatchListVersion++; 336 } 337 if (DBG) { Log.d(TAG, "watch: " + describeWatchList()); } 338 } 339 340 public void clearLinkProperties() { 341 synchronized (mLock) { 342 mLinkProperties.clear(); 343 mIpWatchList.clear(); 344 mIpWatchListVersion++; 345 } 346 if (DBG) { Log.d(TAG, "clear: " + describeWatchList()); } 347 } 348 349 private void handleNeighborLost(String msg) { 350 InetAddress ip = null; 351 final ProvisioningChange delta; 352 synchronized (mLock) { 353 LinkProperties whatIfLp = new LinkProperties(mLinkProperties); 354 355 for (Map.Entry<InetAddress, Short> entry : mIpWatchList.entrySet()) { 356 if (entry.getValue() != StructNdMsg.NUD_FAILED) { 357 continue; 358 } 359 360 ip = entry.getKey(); 361 for (RouteInfo route : mLinkProperties.getRoutes()) { 362 if (ip.equals(route.getGateway())) { 363 whatIfLp.removeRoute(route); 364 } 365 } 366 367 if (avoidingBadLinks() || !(ip instanceof Inet6Address)) { 368 // We should do this unconditionally, but alas we cannot: b/31827713. 369 whatIfLp.removeDnsServer(ip); 370 } 371 } 372 373 delta = LinkProperties.compareProvisioning(mLinkProperties, whatIfLp); 374 } 375 376 if (delta == ProvisioningChange.LOST_PROVISIONING) { 377 final String logMsg = "FAILURE: LOST_PROVISIONING, " + msg; 378 Log.w(TAG, logMsg); 379 if (mCallback != null) { 380 // TODO: remove |ip| when the callback signature no longer has 381 // an InetAddress argument. 382 mCallback.notifyLost(ip, logMsg); 383 } 384 } 385 logNudFailed(delta); 386 } 387 388 private boolean avoidingBadLinks() { 389 return (mAvoidBadWifiTracker != null) ? mAvoidBadWifiTracker.currentValue() : true; 390 } 391 392 public void probeAll() { 393 Set<InetAddress> ipProbeList = new HashSet<InetAddress>(); 394 synchronized (mLock) { 395 ipProbeList.addAll(mIpWatchList.keySet()); 396 } 397 398 if (!ipProbeList.isEmpty() && stillRunning()) { 399 // Keep the CPU awake long enough to allow all ARP/ND 400 // probes a reasonable chance at success. See b/23197666. 401 // 402 // The wakelock we use is (by default) refcounted, and this version 403 // of acquire(timeout) queues a release message to keep acquisitions 404 // and releases balanced. 405 mWakeLock.acquire(getProbeWakeLockDuration()); 406 } 407 408 for (InetAddress target : ipProbeList) { 409 if (!stillRunning()) { 410 break; 411 } 412 final int returnValue = probeNeighbor(mInterfaceIndex, target); 413 logEvent(IpReachabilityEvent.PROBE, returnValue); 414 } 415 mLastProbeTimeMs = SystemClock.elapsedRealtime(); 416 } 417 418 private static long getProbeWakeLockDuration() { 419 // Ideally, this would be computed by examining the values of: 420 // 421 // /proc/sys/net/ipv[46]/neigh/<ifname>/ucast_solicit 422 // 423 // and: 424 // 425 // /proc/sys/net/ipv[46]/neigh/<ifname>/retrans_time_ms 426 // 427 // For now, just make some assumptions. 428 final long numUnicastProbes = 3; 429 final long retransTimeMs = 1000; 430 final long gracePeriodMs = 500; 431 return (numUnicastProbes * retransTimeMs) + gracePeriodMs; 432 } 433 434 private void logEvent(int probeType, int errorCode) { 435 int eventType = probeType | (errorCode & 0xff); 436 mMetricsLog.log(new IpReachabilityEvent(mInterfaceName, eventType)); 437 } 438 439 private void logNudFailed(ProvisioningChange delta) { 440 long duration = SystemClock.elapsedRealtime() - mLastProbeTimeMs; 441 boolean isFromProbe = (duration < getProbeWakeLockDuration()); 442 boolean isProvisioningLost = (delta == ProvisioningChange.LOST_PROVISIONING); 443 int eventType = IpReachabilityEvent.nudFailureEventType(isFromProbe, isProvisioningLost); 444 mMetricsLog.log(new IpReachabilityEvent(mInterfaceName, eventType)); 445 } 446 447 // TODO: simplify the number of objects by making this extend Thread. 448 private final class NetlinkSocketObserver implements Runnable { 449 private NetlinkSocket mSocket; 450 451 @Override 452 public void run() { 453 if (VDBG) { Log.d(TAG, "Starting observing thread."); } 454 synchronized (mLock) { mRunning = true; } 455 456 try { 457 setupNetlinkSocket(); 458 } catch (ErrnoException | SocketException e) { 459 Log.e(TAG, "Failed to suitably initialize a netlink socket", e); 460 synchronized (mLock) { mRunning = false; } 461 } 462 463 ByteBuffer byteBuffer; 464 while (stillRunning()) { 465 try { 466 byteBuffer = recvKernelReply(); 467 } catch (ErrnoException e) { 468 if (stillRunning()) { Log.w(TAG, "ErrnoException: ", e); } 469 break; 470 } 471 final long whenMs = SystemClock.elapsedRealtime(); 472 if (byteBuffer == null) { 473 continue; 474 } 475 parseNetlinkMessageBuffer(byteBuffer, whenMs); 476 } 477 478 clearNetlinkSocket(); 479 480 synchronized (mLock) { mRunning = false; } 481 if (VDBG) { Log.d(TAG, "Finishing observing thread."); } 482 } 483 484 private void clearNetlinkSocket() { 485 if (mSocket != null) { 486 mSocket.close(); 487 } 488 } 489 490 // TODO: Refactor the main loop to recreate the socket upon recoverable errors. 491 private void setupNetlinkSocket() throws ErrnoException, SocketException { 492 clearNetlinkSocket(); 493 mSocket = new NetlinkSocket(OsConstants.NETLINK_ROUTE); 494 495 final NetlinkSocketAddress listenAddr = new NetlinkSocketAddress( 496 0, OsConstants.RTMGRP_NEIGH); 497 mSocket.bind(listenAddr); 498 499 if (VDBG) { 500 final NetlinkSocketAddress nlAddr = mSocket.getLocalAddress(); 501 Log.d(TAG, "bound to sockaddr_nl{" 502 + ((long) (nlAddr.getPortId() & 0xffffffff)) + ", " 503 + nlAddr.getGroupsMask() 504 + "}"); 505 } 506 } 507 508 private ByteBuffer recvKernelReply() throws ErrnoException { 509 try { 510 return mSocket.recvMessage(0); 511 } catch (InterruptedIOException e) { 512 // Interruption or other error, e.g. another thread closed our file descriptor. 513 } catch (ErrnoException e) { 514 if (e.errno != OsConstants.EAGAIN) { 515 throw e; 516 } 517 } 518 return null; 519 } 520 521 private void parseNetlinkMessageBuffer(ByteBuffer byteBuffer, long whenMs) { 522 while (byteBuffer.remaining() > 0) { 523 final int position = byteBuffer.position(); 524 final NetlinkMessage nlMsg = NetlinkMessage.parse(byteBuffer); 525 if (nlMsg == null || nlMsg.getHeader() == null) { 526 byteBuffer.position(position); 527 Log.e(TAG, "unparsable netlink msg: " + NetlinkConstants.hexify(byteBuffer)); 528 break; 529 } 530 531 final int srcPortId = nlMsg.getHeader().nlmsg_pid; 532 if (srcPortId != 0) { 533 Log.e(TAG, "non-kernel source portId: " + ((long) (srcPortId & 0xffffffff))); 534 break; 535 } 536 537 if (nlMsg instanceof NetlinkErrorMessage) { 538 Log.e(TAG, "netlink error: " + nlMsg); 539 continue; 540 } else if (!(nlMsg instanceof RtNetlinkNeighborMessage)) { 541 if (DBG) { 542 Log.d(TAG, "non-rtnetlink neighbor msg: " + nlMsg); 543 } 544 continue; 545 } 546 547 evaluateRtNetlinkNeighborMessage((RtNetlinkNeighborMessage) nlMsg, whenMs); 548 } 549 } 550 551 private void evaluateRtNetlinkNeighborMessage( 552 RtNetlinkNeighborMessage neighMsg, long whenMs) { 553 final StructNdMsg ndMsg = neighMsg.getNdHeader(); 554 if (ndMsg == null || ndMsg.ndm_ifindex != mInterfaceIndex) { 555 return; 556 } 557 558 final InetAddress destination = neighMsg.getDestination(); 559 if (!isWatching(destination)) { 560 return; 561 } 562 563 final short msgType = neighMsg.getHeader().nlmsg_type; 564 final short nudState = ndMsg.ndm_state; 565 final String eventMsg = "NeighborEvent{" 566 + "elapsedMs=" + whenMs + ", " 567 + destination.getHostAddress() + ", " 568 + "[" + NetlinkConstants.hexify(neighMsg.getLinkLayerAddress()) + "], " 569 + NetlinkConstants.stringForNlMsgType(msgType) + ", " 570 + StructNdMsg.stringForNudState(nudState) 571 + "}"; 572 573 if (VDBG) { 574 Log.d(TAG, neighMsg.toString()); 575 } else if (DBG) { 576 Log.d(TAG, eventMsg); 577 } 578 579 synchronized (mLock) { 580 if (mIpWatchList.containsKey(destination)) { 581 final short value = 582 (msgType == NetlinkConstants.RTM_DELNEIGH) 583 ? StructNdMsg.NUD_NONE 584 : nudState; 585 mIpWatchList.put(destination, value); 586 } 587 } 588 589 if (nudState == StructNdMsg.NUD_FAILED) { 590 Log.w(TAG, "ALERT: " + eventMsg); 591 handleNeighborLost(eventMsg); 592 } 593 } 594 } 595} 596