Watchdog.java revision 4de9936e85696208dfe91d1c40e3e5226e57634a
1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.android.server; 18 19import com.android.server.am.ActivityManagerService; 20import com.android.server.power.PowerManagerService; 21 22import android.app.AlarmManager; 23import android.app.PendingIntent; 24import android.content.BroadcastReceiver; 25import android.content.ContentResolver; 26import android.content.Context; 27import android.content.Intent; 28import android.content.IntentFilter; 29import android.os.Debug; 30import android.os.Handler; 31import android.os.Message; 32import android.os.Process; 33import android.os.ServiceManager; 34import android.os.SystemClock; 35import android.os.SystemProperties; 36import android.util.EventLog; 37import android.util.Log; 38import android.util.Slog; 39 40import java.io.File; 41import java.util.ArrayList; 42import java.util.Calendar; 43 44/** This class calls its monitor every minute. Killing this process if they don't return **/ 45public class Watchdog extends Thread { 46 static final String TAG = "Watchdog"; 47 static final boolean localLOGV = false || false; 48 49 // Set this to true to use debug default values. 50 static final boolean DB = false; 51 52 // Set this to true to have the watchdog record kernel thread stacks when it fires 53 static final boolean RECORD_KERNEL_THREADS = true; 54 55 static final int MONITOR = 2718; 56 57 static final int TIME_TO_RESTART = DB ? 15*1000 : 60*1000; 58 static final int TIME_TO_WAIT = TIME_TO_RESTART / 2; 59 60 static final int MEMCHECK_DEFAULT_MIN_SCREEN_OFF = DB ? 1*60 : 5*60; // 5 minutes 61 static final int MEMCHECK_DEFAULT_MIN_ALARM = DB ? 1*60 : 3*60; // 3 minutes 62 static final int MEMCHECK_DEFAULT_RECHECK_INTERVAL = DB ? 1*60 : 5*60; // 5 minutes 63 64 static final int REBOOT_DEFAULT_INTERVAL = DB ? 1 : 0; // never force reboot 65 static final int REBOOT_DEFAULT_START_TIME = 3*60*60; // 3:00am 66 static final int REBOOT_DEFAULT_WINDOW = 60*60; // within 1 hour 67 68 static final String REBOOT_ACTION = "com.android.service.Watchdog.REBOOT"; 69 70 static final String[] NATIVE_STACKS_OF_INTEREST = new String[] { 71 "/system/bin/mediaserver", 72 "/system/bin/sdcard", 73 "/system/bin/surfaceflinger" 74 }; 75 76 static Watchdog sWatchdog; 77 78 /* This handler will be used to post message back onto the main thread */ 79 final Handler mHandler; 80 final ArrayList<Monitor> mMonitors = new ArrayList<Monitor>(); 81 ContentResolver mResolver; 82 BatteryService mBattery; 83 PowerManagerService mPower; 84 AlarmManagerService mAlarm; 85 ActivityManagerService mActivity; 86 boolean mCompleted; 87 boolean mForceKillSystem; 88 Monitor mCurrentMonitor; 89 90 int mPhonePid; 91 92 final Calendar mCalendar = Calendar.getInstance(); 93 int mMinScreenOff = MEMCHECK_DEFAULT_MIN_SCREEN_OFF; 94 int mMinAlarm = MEMCHECK_DEFAULT_MIN_ALARM; 95 boolean mNeedScheduledCheck; 96 PendingIntent mCheckupIntent; 97 PendingIntent mRebootIntent; 98 99 long mBootTime; 100 int mRebootInterval; 101 102 boolean mReqRebootNoWait; // should wait for one interval before reboot? 103 int mReqRebootInterval = -1; // >= 0 if a reboot has been requested 104 int mReqRebootStartTime = -1; // >= 0 if a specific start time has been requested 105 int mReqRebootWindow = -1; // >= 0 if a specific window has been requested 106 int mReqMinScreenOff = -1; // >= 0 if a specific screen off time has been requested 107 int mReqMinNextAlarm = -1; // >= 0 if specific time to next alarm has been requested 108 int mReqRecheckInterval= -1; // >= 0 if a specific recheck interval has been requested 109 110 /** 111 * Used for scheduling monitor callbacks and checking memory usage. 112 */ 113 final class HeartbeatHandler extends Handler { 114 @Override 115 public void handleMessage(Message msg) { 116 switch (msg.what) { 117 case MONITOR: { 118 // See if we should force a reboot. 119 int rebootInterval = mReqRebootInterval >= 0 120 ? mReqRebootInterval : REBOOT_DEFAULT_INTERVAL; 121 if (mRebootInterval != rebootInterval) { 122 mRebootInterval = rebootInterval; 123 // We have been running long enough that a reboot can 124 // be considered... 125 checkReboot(false); 126 } 127 128 final int size = mMonitors.size(); 129 for (int i = 0 ; i < size ; i++) { 130 mCurrentMonitor = mMonitors.get(i); 131 mCurrentMonitor.monitor(); 132 } 133 134 synchronized (Watchdog.this) { 135 mCompleted = true; 136 mCurrentMonitor = null; 137 } 138 } break; 139 } 140 } 141 } 142 143 final class RebootReceiver extends BroadcastReceiver { 144 @Override 145 public void onReceive(Context c, Intent intent) { 146 if (localLOGV) Slog.v(TAG, "Alarm went off, checking reboot."); 147 checkReboot(true); 148 } 149 } 150 151 final class RebootRequestReceiver extends BroadcastReceiver { 152 @Override 153 public void onReceive(Context c, Intent intent) { 154 mReqRebootNoWait = intent.getIntExtra("nowait", 0) != 0; 155 mReqRebootInterval = intent.getIntExtra("interval", -1); 156 mReqRebootStartTime = intent.getIntExtra("startTime", -1); 157 mReqRebootWindow = intent.getIntExtra("window", -1); 158 mReqMinScreenOff = intent.getIntExtra("minScreenOff", -1); 159 mReqMinNextAlarm = intent.getIntExtra("minNextAlarm", -1); 160 mReqRecheckInterval = intent.getIntExtra("recheckInterval", -1); 161 EventLog.writeEvent(EventLogTags.WATCHDOG_REQUESTED_REBOOT, 162 mReqRebootNoWait ? 1 : 0, mReqRebootInterval, 163 mReqRecheckInterval, mReqRebootStartTime, 164 mReqRebootWindow, mReqMinScreenOff, mReqMinNextAlarm); 165 checkReboot(true); 166 } 167 } 168 169 public interface Monitor { 170 void monitor(); 171 } 172 173 public static Watchdog getInstance() { 174 if (sWatchdog == null) { 175 sWatchdog = new Watchdog(); 176 } 177 178 return sWatchdog; 179 } 180 181 private Watchdog() { 182 super("watchdog"); 183 mHandler = new HeartbeatHandler(); 184 } 185 186 public void init(Context context, BatteryService battery, 187 PowerManagerService power, AlarmManagerService alarm, 188 ActivityManagerService activity) { 189 mResolver = context.getContentResolver(); 190 mBattery = battery; 191 mPower = power; 192 mAlarm = alarm; 193 mActivity = activity; 194 195 context.registerReceiver(new RebootReceiver(), 196 new IntentFilter(REBOOT_ACTION)); 197 mRebootIntent = PendingIntent.getBroadcast(context, 198 0, new Intent(REBOOT_ACTION), 0); 199 200 context.registerReceiver(new RebootRequestReceiver(), 201 new IntentFilter(Intent.ACTION_REBOOT), 202 android.Manifest.permission.REBOOT, null); 203 204 mBootTime = System.currentTimeMillis(); 205 } 206 207 public void processStarted(String name, int pid) { 208 synchronized (this) { 209 if ("com.android.phone".equals(name)) { 210 mPhonePid = pid; 211 } 212 } 213 } 214 215 public void addMonitor(Monitor monitor) { 216 synchronized (this) { 217 if (isAlive()) { 218 throw new RuntimeException("Monitors can't be added while the Watchdog is running"); 219 } 220 mMonitors.add(monitor); 221 } 222 } 223 224 void checkReboot(boolean fromAlarm) { 225 int rebootInterval = mReqRebootInterval >= 0 ? mReqRebootInterval 226 : REBOOT_DEFAULT_INTERVAL; 227 mRebootInterval = rebootInterval; 228 if (rebootInterval <= 0) { 229 // No reboot interval requested. 230 if (localLOGV) Slog.v(TAG, "No need to schedule a reboot alarm!"); 231 mAlarm.remove(mRebootIntent); 232 return; 233 } 234 235 long rebootStartTime = mReqRebootStartTime >= 0 ? mReqRebootStartTime 236 : REBOOT_DEFAULT_START_TIME; 237 long rebootWindowMillis = (mReqRebootWindow >= 0 ? mReqRebootWindow 238 : REBOOT_DEFAULT_WINDOW) * 1000; 239 long recheckInterval = (mReqRecheckInterval >= 0 ? mReqRecheckInterval 240 : MEMCHECK_DEFAULT_RECHECK_INTERVAL) * 1000; 241 242 retrieveBrutalityAmount(); 243 244 long realStartTime; 245 long now; 246 247 synchronized (this) { 248 now = System.currentTimeMillis(); 249 realStartTime = computeCalendarTime(mCalendar, now, 250 rebootStartTime); 251 252 long rebootIntervalMillis = rebootInterval*24*60*60*1000; 253 if (DB || mReqRebootNoWait || 254 (now-mBootTime) >= (rebootIntervalMillis-rebootWindowMillis)) { 255 if (fromAlarm && rebootWindowMillis <= 0) { 256 // No reboot window -- just immediately reboot. 257 EventLog.writeEvent(EventLogTags.WATCHDOG_SCHEDULED_REBOOT, now, 258 (int)rebootIntervalMillis, (int)rebootStartTime*1000, 259 (int)rebootWindowMillis, ""); 260 rebootSystem("Checkin scheduled forced"); 261 return; 262 } 263 264 // Are we within the reboot window? 265 if (now < realStartTime) { 266 // Schedule alarm for next check interval. 267 realStartTime = computeCalendarTime(mCalendar, 268 now, rebootStartTime); 269 } else if (now < (realStartTime+rebootWindowMillis)) { 270 String doit = shouldWeBeBrutalLocked(now); 271 EventLog.writeEvent(EventLogTags.WATCHDOG_SCHEDULED_REBOOT, now, 272 (int)rebootInterval, (int)rebootStartTime*1000, 273 (int)rebootWindowMillis, doit != null ? doit : ""); 274 if (doit == null) { 275 rebootSystem("Checked scheduled range"); 276 return; 277 } 278 279 // Schedule next alarm either within the window or in the 280 // next interval. 281 if ((now+recheckInterval) >= (realStartTime+rebootWindowMillis)) { 282 realStartTime = computeCalendarTime(mCalendar, 283 now + rebootIntervalMillis, rebootStartTime); 284 } else { 285 realStartTime = now + recheckInterval; 286 } 287 } else { 288 // Schedule alarm for next check interval. 289 realStartTime = computeCalendarTime(mCalendar, 290 now + rebootIntervalMillis, rebootStartTime); 291 } 292 } 293 } 294 295 if (localLOGV) Slog.v(TAG, "Scheduling next reboot alarm for " 296 + ((realStartTime-now)/1000/60) + "m from now"); 297 mAlarm.remove(mRebootIntent); 298 mAlarm.set(AlarmManager.RTC_WAKEUP, realStartTime, mRebootIntent); 299 } 300 301 /** 302 * Perform a full reboot of the system. 303 */ 304 void rebootSystem(String reason) { 305 Slog.i(TAG, "Rebooting system because: " + reason); 306 PowerManagerService pms = (PowerManagerService) ServiceManager.getService("power"); 307 pms.reboot(reason); 308 } 309 310 /** 311 * Load the current Gservices settings for when 312 * {@link #shouldWeBeBrutalLocked} will allow the brutality to happen. 313 * Must not be called with the lock held. 314 */ 315 void retrieveBrutalityAmount() { 316 mMinScreenOff = (mReqMinScreenOff >= 0 ? mReqMinScreenOff 317 : MEMCHECK_DEFAULT_MIN_SCREEN_OFF) * 1000; 318 mMinAlarm = (mReqMinNextAlarm >= 0 ? mReqMinNextAlarm 319 : MEMCHECK_DEFAULT_MIN_ALARM) * 1000; 320 } 321 322 /** 323 * Determine whether it is a good time to kill, crash, or otherwise 324 * plunder the current situation for the overall long-term benefit of 325 * the world. 326 * 327 * @param curTime The current system time. 328 * @return Returns null if this is a good time, else a String with the 329 * text of why it is not a good time. 330 */ 331 String shouldWeBeBrutalLocked(long curTime) { 332 if (mBattery == null || !mBattery.isPowered()) { 333 return "battery"; 334 } 335 336 if (mMinScreenOff >= 0 && (mPower == null || 337 mPower.timeSinceScreenWasLastOn() < mMinScreenOff)) { 338 return "screen"; 339 } 340 341 if (mMinAlarm >= 0 && (mAlarm == null || 342 mAlarm.timeToNextAlarm() < mMinAlarm)) { 343 return "alarm"; 344 } 345 346 return null; 347 } 348 349 static long computeCalendarTime(Calendar c, long curTime, 350 long secondsSinceMidnight) { 351 352 // start with now 353 c.setTimeInMillis(curTime); 354 355 int val = (int)secondsSinceMidnight / (60*60); 356 c.set(Calendar.HOUR_OF_DAY, val); 357 secondsSinceMidnight -= val * (60*60); 358 val = (int)secondsSinceMidnight / 60; 359 c.set(Calendar.MINUTE, val); 360 c.set(Calendar.SECOND, (int)secondsSinceMidnight - (val*60)); 361 c.set(Calendar.MILLISECOND, 0); 362 363 long newTime = c.getTimeInMillis(); 364 if (newTime < curTime) { 365 // The given time (in seconds since midnight) has already passed for today, so advance 366 // by one day (due to daylight savings, etc., the delta may differ from 24 hours). 367 c.add(Calendar.DAY_OF_MONTH, 1); 368 newTime = c.getTimeInMillis(); 369 } 370 371 return newTime; 372 } 373 374 @Override 375 public void run() { 376 boolean waitedHalf = false; 377 while (true) { 378 mCompleted = false; 379 mHandler.sendEmptyMessage(MONITOR); 380 381 synchronized (this) { 382 long timeout = TIME_TO_WAIT; 383 384 // NOTE: We use uptimeMillis() here because we do not want to increment the time we 385 // wait while asleep. If the device is asleep then the thing that we are waiting 386 // to timeout on is asleep as well and won't have a chance to run, causing a false 387 // positive on when to kill things. 388 long start = SystemClock.uptimeMillis(); 389 while (timeout > 0 && !mForceKillSystem) { 390 try { 391 wait(timeout); // notifyAll() is called when mForceKillSystem is set 392 } catch (InterruptedException e) { 393 Log.wtf(TAG, e); 394 } 395 timeout = TIME_TO_WAIT - (SystemClock.uptimeMillis() - start); 396 } 397 398 if (mCompleted && !mForceKillSystem) { 399 // The monitors have returned. 400 waitedHalf = false; 401 continue; 402 } 403 404 if (!waitedHalf) { 405 // We've waited half the deadlock-detection interval. Pull a stack 406 // trace and wait another half. 407 ArrayList<Integer> pids = new ArrayList<Integer>(); 408 pids.add(Process.myPid()); 409 ActivityManagerService.dumpStackTraces(true, pids, null, null, 410 NATIVE_STACKS_OF_INTEREST); 411 waitedHalf = true; 412 continue; 413 } 414 } 415 416 // If we got here, that means that the system is most likely hung. 417 // First collect stack traces from all threads of the system process. 418 // Then kill this process so that the system will restart. 419 420 final String name = (mCurrentMonitor != null) ? 421 mCurrentMonitor.getClass().getName() : "null"; 422 EventLog.writeEvent(EventLogTags.WATCHDOG, name); 423 424 ArrayList<Integer> pids = new ArrayList<Integer>(); 425 pids.add(Process.myPid()); 426 if (mPhonePid > 0) pids.add(mPhonePid); 427 // Pass !waitedHalf so that just in case we somehow wind up here without having 428 // dumped the halfway stacks, we properly re-initialize the trace file. 429 final File stack = ActivityManagerService.dumpStackTraces( 430 !waitedHalf, pids, null, null, NATIVE_STACKS_OF_INTEREST); 431 432 // Give some extra time to make sure the stack traces get written. 433 // The system's been hanging for a minute, another second or two won't hurt much. 434 SystemClock.sleep(2000); 435 436 // Pull our own kernel thread stacks as well if we're configured for that 437 if (RECORD_KERNEL_THREADS) { 438 dumpKernelStackTraces(); 439 } 440 441 // Try to add the error to the dropbox, but assuming that the ActivityManager 442 // itself may be deadlocked. (which has happened, causing this statement to 443 // deadlock and the watchdog as a whole to be ineffective) 444 Thread dropboxThread = new Thread("watchdogWriteToDropbox") { 445 public void run() { 446 mActivity.addErrorToDropBox( 447 "watchdog", null, "system_server", null, null, 448 name, null, stack, null); 449 } 450 }; 451 dropboxThread.start(); 452 try { 453 dropboxThread.join(2000); // wait up to 2 seconds for it to return. 454 } catch (InterruptedException ignored) {} 455 456 // Only kill the process if the debugger is not attached. 457 if (!Debug.isDebuggerConnected()) { 458 Slog.w(TAG, "*** WATCHDOG KILLING SYSTEM PROCESS: " + name); 459 Process.killProcess(Process.myPid()); 460 System.exit(10); 461 } else { 462 Slog.w(TAG, "Debugger connected: Watchdog is *not* killing the system process"); 463 } 464 465 waitedHalf = false; 466 } 467 } 468 469 private File dumpKernelStackTraces() { 470 String tracesPath = SystemProperties.get("dalvik.vm.stack-trace-file", null); 471 if (tracesPath == null || tracesPath.length() == 0) { 472 return null; 473 } 474 475 native_dumpKernelStacks(tracesPath); 476 return new File(tracesPath); 477 } 478 479 private native void native_dumpKernelStacks(String tracesPath); 480} 481