IpReachabilityMonitor.java revision 5b25a0f7960048cbf5929ba144e7a575eb4f7d32
1/* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package android.net.ip; 18 19import com.android.internal.annotations.GuardedBy; 20 21import android.content.Context; 22import android.net.LinkAddress; 23import android.net.LinkProperties; 24import android.net.LinkProperties.ProvisioningChange; 25import android.net.ProxyInfo; 26import android.net.RouteInfo; 27import android.net.metrics.IpReachabilityMonitorMessageEvent; 28import android.net.metrics.IpReachabilityMonitorProbeEvent; 29import android.net.metrics.IpReachabilityMonitorLostEvent; 30import android.net.netlink.NetlinkConstants; 31import android.net.netlink.NetlinkErrorMessage; 32import android.net.netlink.NetlinkMessage; 33import android.net.netlink.NetlinkSocket; 34import android.net.netlink.RtNetlinkNeighborMessage; 35import android.net.netlink.StructNdaCacheInfo; 36import android.net.netlink.StructNdMsg; 37import android.net.netlink.StructNlMsgHdr; 38import android.os.PowerManager; 39import android.os.SystemClock; 40import android.system.ErrnoException; 41import android.system.NetlinkSocketAddress; 42import android.system.OsConstants; 43import android.util.Log; 44 45import java.io.InterruptedIOException; 46import java.net.InetAddress; 47import java.net.InetSocketAddress; 48import java.net.NetworkInterface; 49import java.net.SocketAddress; 50import java.net.SocketException; 51import java.nio.ByteBuffer; 52import java.util.Arrays; 53import java.util.HashMap; 54import java.util.HashSet; 55import java.util.List; 56import java.util.Map; 57import java.util.Set; 58 59 60/** 61 * IpReachabilityMonitor. 62 * 63 * Monitors on-link IP reachability and notifies callers whenever any on-link 64 * addresses of interest appear to have become unresponsive. 65 * 66 * This code does not concern itself with "why" a neighbour might have become 67 * unreachable. Instead, it primarily reacts to the kernel's notion of IP 68 * reachability for each of the neighbours we know to be critically important 69 * to normal network connectivity. As such, it is often "just the messenger": 70 * the neighbours about which it warns are already deemed by the kernel to have 71 * become unreachable. 72 * 73 * 74 * How it works: 75 * 76 * 1. The "on-link neighbours of interest" found in a given LinkProperties 77 * instance are added to a "watch list" via #updateLinkProperties(). 78 * This usually means all default gateways and any on-link DNS servers. 79 * 80 * 2. We listen continuously for netlink neighbour messages (RTM_NEWNEIGH, 81 * RTM_DELNEIGH), watching only for neighbours in the watch list. 82 * 83 * - A neighbour going into NUD_REACHABLE, NUD_STALE, NUD_DELAY, and 84 * even NUD_PROBE is perfectly normal; we merely record the new state. 85 * 86 * - A neighbour's entry may be deleted (RTM_DELNEIGH), for example due 87 * to garbage collection. This is not necessarily of immediate 88 * concern; we record the neighbour as moving to NUD_NONE. 89 * 90 * - A neighbour transitioning to NUD_FAILED (for any reason) is 91 * critically important and is handled as described below in #4. 92 * 93 * 3. All on-link neighbours in the watch list can be forcibly "probed" by 94 * calling #probeAll(). This should be called whenever it is important to 95 * verify that critical neighbours on the link are still reachable, e.g. 96 * when roaming between BSSIDs. 97 * 98 * - The kernel will send unicast ARP requests for IPv4 neighbours and 99 * unicast NS packets for IPv6 neighbours. The expected replies will 100 * likely be unicast. 101 * 102 * - The forced probing is done holding a wakelock. The kernel may, 103 * however, initiate probing of a neighbor on its own, i.e. whenever 104 * a neighbour has expired from NUD_DELAY. 105 * 106 * - The kernel sends: 107 * 108 * /proc/sys/net/ipv{4,6}/neigh/<ifname>/ucast_solicit 109 * 110 * number of probes (usually 3) every: 111 * 112 * /proc/sys/net/ipv{4,6}/neigh/<ifname>/retrans_time_ms 113 * 114 * number of milliseconds (usually 1000ms). This normally results in 115 * 3 unicast packets, 1 per second. 116 * 117 * - If no response is received to any of the probe packets, the kernel 118 * marks the neighbour as being in state NUD_FAILED, and the listening 119 * process in #2 will learn of it. 120 * 121 * 4. We call the supplied Callback#notifyLost() function if the loss of a 122 * neighbour in NUD_FAILED would cause IPv4 or IPv6 configuration to 123 * become incomplete (a loss of provisioning). 124 * 125 * - For example, losing all our IPv4 on-link DNS servers (or losing 126 * our only IPv6 default gateway) constitutes a loss of IPv4 (IPv6) 127 * provisioning; Callback#notifyLost() would be called. 128 * 129 * - Since it can be non-trivial to reacquire certain IP provisioning 130 * state it may be best for the link to disconnect completely and 131 * reconnect afresh. 132 * 133 * 134 * @hide 135 */ 136public class IpReachabilityMonitor { 137 private static final String TAG = "IpReachabilityMonitor"; 138 private static final boolean DBG = false; 139 private static final boolean VDBG = false; 140 141 public interface Callback { 142 // This callback function must execute as quickly as possible as it is 143 // run on the same thread that listens to kernel neighbor updates. 144 // 145 // TODO: refactor to something like notifyProvisioningLost(String msg). 146 public void notifyLost(InetAddress ip, String logMsg); 147 } 148 149 private final Object mLock = new Object(); 150 private final PowerManager.WakeLock mWakeLock; 151 private final String mInterfaceName; 152 private final int mInterfaceIndex; 153 private final Callback mCallback; 154 private final NetlinkSocketObserver mNetlinkSocketObserver; 155 private final Thread mObserverThread; 156 @GuardedBy("mLock") 157 private LinkProperties mLinkProperties = new LinkProperties(); 158 // TODO: consider a map to a private NeighborState class holding more 159 // information than a single NUD state entry. 160 @GuardedBy("mLock") 161 private Map<InetAddress, Short> mIpWatchList = new HashMap<>(); 162 @GuardedBy("mLock") 163 private int mIpWatchListVersion; 164 @GuardedBy("mLock") 165 private boolean mRunning; 166 167 /** 168 * Make the kernel perform neighbor reachability detection (IPv4 ARP or IPv6 ND) 169 * for the given IP address on the specified interface index. 170 * 171 * @return true, if the request was successfully passed to the kernel; false otherwise. 172 */ 173 public static boolean probeNeighbor(int ifIndex, InetAddress ip) { 174 final long IO_TIMEOUT = 300L; 175 final String msgSnippet = "probing ip=" + ip.getHostAddress() + "%" + ifIndex; 176 if (DBG) { Log.d(TAG, msgSnippet); } 177 178 final byte[] msg = RtNetlinkNeighborMessage.newNewNeighborMessage( 179 1, ip, StructNdMsg.NUD_PROBE, ifIndex, null); 180 boolean returnValue = false; 181 182 try (NetlinkSocket nlSocket = new NetlinkSocket(OsConstants.NETLINK_ROUTE)) { 183 nlSocket.connectToKernel(); 184 nlSocket.sendMessage(msg, 0, msg.length, IO_TIMEOUT); 185 final ByteBuffer bytes = nlSocket.recvMessage(IO_TIMEOUT); 186 final NetlinkMessage response = NetlinkMessage.parse(bytes); 187 if (response != null && response instanceof NetlinkErrorMessage && 188 (((NetlinkErrorMessage) response).getNlMsgError() != null) && 189 (((NetlinkErrorMessage) response).getNlMsgError().error == 0)) { 190 returnValue = true; 191 } else { 192 String errmsg; 193 if (bytes == null) { 194 errmsg = "null recvMessage"; 195 } else if (response == null) { 196 bytes.position(0); 197 errmsg = "raw bytes: " + NetlinkConstants.hexify(bytes); 198 } else { 199 // TODO: consider ignoring EINVAL (-22), which appears to be 200 // normal when probing a neighbor for which the kernel does 201 // not already have / no longer has a link layer address. 202 errmsg = response.toString(); 203 } 204 Log.e(TAG, "Error " + msgSnippet + ", errmsg=" + errmsg); 205 } 206 } catch (ErrnoException | InterruptedIOException | SocketException e) { 207 Log.d(TAG, "Error " + msgSnippet, e); 208 } 209 IpReachabilityMonitorProbeEvent.logEvent("ifindex-" + ifIndex, ip.getHostAddress(), 210 returnValue); 211 return returnValue; 212 } 213 214 public IpReachabilityMonitor(Context context, String ifName, Callback callback) 215 throws IllegalArgumentException { 216 mInterfaceName = ifName; 217 int ifIndex = -1; 218 try { 219 NetworkInterface netIf = NetworkInterface.getByName(ifName); 220 mInterfaceIndex = netIf.getIndex(); 221 } catch (SocketException | NullPointerException e) { 222 throw new IllegalArgumentException("invalid interface '" + ifName + "': ", e); 223 } 224 mWakeLock = ((PowerManager) context.getSystemService(Context.POWER_SERVICE)).newWakeLock( 225 PowerManager.PARTIAL_WAKE_LOCK, TAG + "." + mInterfaceName); 226 mCallback = callback; 227 mNetlinkSocketObserver = new NetlinkSocketObserver(); 228 mObserverThread = new Thread(mNetlinkSocketObserver); 229 mObserverThread.start(); 230 } 231 232 public void stop() { 233 synchronized (mLock) { mRunning = false; } 234 clearLinkProperties(); 235 mNetlinkSocketObserver.clearNetlinkSocket(); 236 } 237 238 // TODO: add a public dump() method that can be called during a bug report. 239 240 private String describeWatchList() { 241 final String delimiter = ", "; 242 StringBuilder sb = new StringBuilder(); 243 synchronized (mLock) { 244 sb.append("iface{" + mInterfaceName + "/" + mInterfaceIndex + "}, "); 245 sb.append("v{" + mIpWatchListVersion + "}, "); 246 sb.append("ntable=["); 247 boolean firstTime = true; 248 for (Map.Entry<InetAddress, Short> entry : mIpWatchList.entrySet()) { 249 if (firstTime) { 250 firstTime = false; 251 } else { 252 sb.append(delimiter); 253 } 254 sb.append(entry.getKey().getHostAddress() + "/" + 255 StructNdMsg.stringForNudState(entry.getValue())); 256 } 257 sb.append("]"); 258 } 259 return sb.toString(); 260 } 261 262 private boolean isWatching(InetAddress ip) { 263 synchronized (mLock) { 264 return mRunning && mIpWatchList.containsKey(ip); 265 } 266 } 267 268 private boolean stillRunning() { 269 synchronized (mLock) { 270 return mRunning; 271 } 272 } 273 274 private static boolean isOnLink(List<RouteInfo> routes, InetAddress ip) { 275 for (RouteInfo route : routes) { 276 if (!route.hasGateway() && route.matches(ip)) { 277 return true; 278 } 279 } 280 return false; 281 } 282 283 private short getNeighborStateLocked(InetAddress ip) { 284 if (mIpWatchList.containsKey(ip)) { 285 return mIpWatchList.get(ip); 286 } 287 return StructNdMsg.NUD_NONE; 288 } 289 290 public void updateLinkProperties(LinkProperties lp) { 291 if (!mInterfaceName.equals(lp.getInterfaceName())) { 292 // TODO: figure out whether / how to cope with interface changes. 293 Log.wtf(TAG, "requested LinkProperties interface '" + lp.getInterfaceName() + 294 "' does not match: " + mInterfaceName); 295 return; 296 } 297 298 synchronized (mLock) { 299 mLinkProperties = new LinkProperties(lp); 300 Map<InetAddress, Short> newIpWatchList = new HashMap<>(); 301 302 final List<RouteInfo> routes = mLinkProperties.getRoutes(); 303 for (RouteInfo route : routes) { 304 if (route.hasGateway()) { 305 InetAddress gw = route.getGateway(); 306 if (isOnLink(routes, gw)) { 307 newIpWatchList.put(gw, getNeighborStateLocked(gw)); 308 } 309 } 310 } 311 312 for (InetAddress nameserver : lp.getDnsServers()) { 313 if (isOnLink(routes, nameserver)) { 314 newIpWatchList.put(nameserver, getNeighborStateLocked(nameserver)); 315 } 316 } 317 318 mIpWatchList = newIpWatchList; 319 mIpWatchListVersion++; 320 } 321 if (DBG) { Log.d(TAG, "watch: " + describeWatchList()); } 322 } 323 324 public void clearLinkProperties() { 325 synchronized (mLock) { 326 mLinkProperties.clear(); 327 mIpWatchList.clear(); 328 mIpWatchListVersion++; 329 } 330 if (DBG) { Log.d(TAG, "clear: " + describeWatchList()); } 331 } 332 333 private void handleNeighborLost(String msg) { 334 InetAddress ip = null; 335 ProvisioningChange delta; 336 synchronized (mLock) { 337 LinkProperties whatIfLp = new LinkProperties(mLinkProperties); 338 339 for (Map.Entry<InetAddress, Short> entry : mIpWatchList.entrySet()) { 340 if (entry.getValue() != StructNdMsg.NUD_FAILED) { 341 continue; 342 } 343 344 ip = entry.getKey(); 345 for (RouteInfo route : mLinkProperties.getRoutes()) { 346 if (ip.equals(route.getGateway())) { 347 whatIfLp.removeRoute(route); 348 } 349 } 350 whatIfLp.removeDnsServer(ip); 351 } 352 353 delta = LinkProperties.compareProvisioning(mLinkProperties, whatIfLp); 354 } 355 356 if (delta == ProvisioningChange.LOST_PROVISIONING) { 357 IpReachabilityMonitorLostEvent.logEvent(mInterfaceName); 358 final String logMsg = "FAILURE: LOST_PROVISIONING, " + msg; 359 Log.w(TAG, logMsg); 360 if (mCallback != null) { 361 // TODO: remove |ip| when the callback signature no longer has 362 // an InetAddress argument. 363 mCallback.notifyLost(ip, logMsg); 364 } 365 } 366 } 367 368 public void probeAll() { 369 Set<InetAddress> ipProbeList = new HashSet<InetAddress>(); 370 synchronized (mLock) { 371 ipProbeList.addAll(mIpWatchList.keySet()); 372 } 373 374 if (!ipProbeList.isEmpty() && stillRunning()) { 375 // Keep the CPU awake long enough to allow all ARP/ND 376 // probes a reasonable chance at success. See b/23197666. 377 // 378 // The wakelock we use is (by default) refcounted, and this version 379 // of acquire(timeout) queues a release message to keep acquisitions 380 // and releases balanced. 381 mWakeLock.acquire(getProbeWakeLockDuration()); 382 } 383 384 for (InetAddress target : ipProbeList) { 385 if (!stillRunning()) { 386 break; 387 } 388 probeNeighbor(mInterfaceIndex, target); 389 } 390 } 391 392 private long getProbeWakeLockDuration() { 393 // Ideally, this would be computed by examining the values of: 394 // 395 // /proc/sys/net/ipv[46]/neigh/<ifname>/ucast_solicit 396 // 397 // and: 398 // 399 // /proc/sys/net/ipv[46]/neigh/<ifname>/retrans_time_ms 400 // 401 // For now, just make some assumptions. 402 final long numUnicastProbes = 3; 403 final long retransTimeMs = 1000; 404 final long gracePeriodMs = 500; 405 return (numUnicastProbes * retransTimeMs) + gracePeriodMs; 406 } 407 408 // TODO: simplify the number of objects by making this extend Thread. 409 private final class NetlinkSocketObserver implements Runnable { 410 private NetlinkSocket mSocket; 411 412 @Override 413 public void run() { 414 if (VDBG) { Log.d(TAG, "Starting observing thread."); } 415 synchronized (mLock) { mRunning = true; } 416 417 try { 418 setupNetlinkSocket(); 419 } catch (ErrnoException | SocketException e) { 420 Log.e(TAG, "Failed to suitably initialize a netlink socket", e); 421 synchronized (mLock) { mRunning = false; } 422 } 423 424 ByteBuffer byteBuffer; 425 while (stillRunning()) { 426 try { 427 byteBuffer = recvKernelReply(); 428 } catch (ErrnoException e) { 429 if (stillRunning()) { Log.w(TAG, "ErrnoException: ", e); } 430 break; 431 } 432 final long whenMs = SystemClock.elapsedRealtime(); 433 if (byteBuffer == null) { 434 continue; 435 } 436 parseNetlinkMessageBuffer(byteBuffer, whenMs); 437 } 438 439 clearNetlinkSocket(); 440 441 synchronized (mLock) { mRunning = false; } 442 if (VDBG) { Log.d(TAG, "Finishing observing thread."); } 443 } 444 445 private void clearNetlinkSocket() { 446 if (mSocket != null) { 447 mSocket.close(); 448 } 449 } 450 451 // TODO: Refactor the main loop to recreate the socket upon recoverable errors. 452 private void setupNetlinkSocket() throws ErrnoException, SocketException { 453 clearNetlinkSocket(); 454 mSocket = new NetlinkSocket(OsConstants.NETLINK_ROUTE); 455 456 final NetlinkSocketAddress listenAddr = new NetlinkSocketAddress( 457 0, OsConstants.RTMGRP_NEIGH); 458 mSocket.bind(listenAddr); 459 460 if (VDBG) { 461 final NetlinkSocketAddress nlAddr = mSocket.getLocalAddress(); 462 Log.d(TAG, "bound to sockaddr_nl{" 463 + ((long) (nlAddr.getPortId() & 0xffffffff)) + ", " 464 + nlAddr.getGroupsMask() 465 + "}"); 466 } 467 } 468 469 private ByteBuffer recvKernelReply() throws ErrnoException { 470 try { 471 return mSocket.recvMessage(0); 472 } catch (InterruptedIOException e) { 473 // Interruption or other error, e.g. another thread closed our file descriptor. 474 } catch (ErrnoException e) { 475 if (e.errno != OsConstants.EAGAIN) { 476 throw e; 477 } 478 } 479 return null; 480 } 481 482 private void parseNetlinkMessageBuffer(ByteBuffer byteBuffer, long whenMs) { 483 while (byteBuffer.remaining() > 0) { 484 final int position = byteBuffer.position(); 485 final NetlinkMessage nlMsg = NetlinkMessage.parse(byteBuffer); 486 if (nlMsg == null || nlMsg.getHeader() == null) { 487 byteBuffer.position(position); 488 Log.e(TAG, "unparsable netlink msg: " + NetlinkConstants.hexify(byteBuffer)); 489 break; 490 } 491 492 final int srcPortId = nlMsg.getHeader().nlmsg_pid; 493 if (srcPortId != 0) { 494 Log.e(TAG, "non-kernel source portId: " + ((long) (srcPortId & 0xffffffff))); 495 break; 496 } 497 498 if (nlMsg instanceof NetlinkErrorMessage) { 499 Log.e(TAG, "netlink error: " + nlMsg); 500 continue; 501 } else if (!(nlMsg instanceof RtNetlinkNeighborMessage)) { 502 if (DBG) { 503 Log.d(TAG, "non-rtnetlink neighbor msg: " + nlMsg); 504 } 505 continue; 506 } 507 508 evaluateRtNetlinkNeighborMessage((RtNetlinkNeighborMessage) nlMsg, whenMs); 509 } 510 } 511 512 private void evaluateRtNetlinkNeighborMessage( 513 RtNetlinkNeighborMessage neighMsg, long whenMs) { 514 final StructNdMsg ndMsg = neighMsg.getNdHeader(); 515 if (ndMsg == null || ndMsg.ndm_ifindex != mInterfaceIndex) { 516 return; 517 } 518 519 final InetAddress destination = neighMsg.getDestination(); 520 if (!isWatching(destination)) { 521 return; 522 } 523 524 final short msgType = neighMsg.getHeader().nlmsg_type; 525 final short nudState = ndMsg.ndm_state; 526 IpReachabilityMonitorMessageEvent.logEvent(mInterfaceName, 527 destination.getHostAddress(), msgType, nudState); 528 final String eventMsg = "NeighborEvent{" 529 + "elapsedMs=" + whenMs + ", " 530 + destination.getHostAddress() + ", " 531 + "[" + NetlinkConstants.hexify(neighMsg.getLinkLayerAddress()) + "], " 532 + NetlinkConstants.stringForNlMsgType(msgType) + ", " 533 + StructNdMsg.stringForNudState(nudState) 534 + "}"; 535 536 if (VDBG) { 537 Log.d(TAG, neighMsg.toString()); 538 } else if (DBG) { 539 Log.d(TAG, eventMsg); 540 } 541 542 synchronized (mLock) { 543 if (mIpWatchList.containsKey(destination)) { 544 final short value = 545 (msgType == NetlinkConstants.RTM_DELNEIGH) 546 ? StructNdMsg.NUD_NONE 547 : nudState; 548 mIpWatchList.put(destination, value); 549 } 550 } 551 552 if (nudState == StructNdMsg.NUD_FAILED) { 553 Log.w(TAG, "ALERT: " + eventMsg); 554 handleNeighborLost(eventMsg); 555 } 556 } 557 } 558} 559