Watchdog.java revision c27181c7f3e11170ec82807cfa416f0a906ff574
1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.android.server; 18 19import com.android.server.am.ActivityManagerService; 20 21import android.app.AlarmManager; 22import android.app.PendingIntent; 23import android.content.BroadcastReceiver; 24import android.content.ContentResolver; 25import android.content.Context; 26import android.content.Intent; 27import android.content.IntentFilter; 28import android.os.Debug; 29import android.os.Handler; 30import android.os.Message; 31import android.os.Process; 32import android.os.ServiceManager; 33import android.os.SystemClock; 34import android.os.SystemProperties; 35import android.provider.Settings; 36import android.util.Config; 37import android.util.EventLog; 38import android.util.Log; 39import android.util.Slog; 40 41import java.io.File; 42import java.io.FileInputStream; 43import java.io.FileOutputStream; 44import java.io.IOException; 45import java.util.ArrayList; 46import java.util.Calendar; 47 48/** This class calls its monitor every minute. Killing this process if they don't return **/ 49public class Watchdog extends Thread { 50 static final String TAG = "Watchdog"; 51 static final boolean localLOGV = false || Config.LOGV; 52 53 // Set this to true to use debug default values. 54 static final boolean DB = false; 55 56 // Set this to true to have the watchdog record kernel thread stacks when it fires 57 static final boolean RECORD_KERNEL_THREADS = true; 58 59 static final int MONITOR = 2718; 60 61 static final int TIME_TO_RESTART = DB ? 15*1000 : 60*1000; 62 static final int TIME_TO_WAIT = TIME_TO_RESTART / 2; 63 64 static final int MEMCHECK_DEFAULT_MIN_SCREEN_OFF = DB ? 1*60 : 5*60; // 5 minutes 65 static final int MEMCHECK_DEFAULT_MIN_ALARM = DB ? 1*60 : 3*60; // 3 minutes 66 static final int MEMCHECK_DEFAULT_RECHECK_INTERVAL = DB ? 1*60 : 5*60; // 5 minutes 67 68 static final int REBOOT_DEFAULT_INTERVAL = DB ? 1 : 0; // never force reboot 69 static final int REBOOT_DEFAULT_START_TIME = 3*60*60; // 3:00am 70 static final int REBOOT_DEFAULT_WINDOW = 60*60; // within 1 hour 71 72 static final String REBOOT_ACTION = "com.android.service.Watchdog.REBOOT"; 73 74 static Watchdog sWatchdog; 75 76 /* This handler will be used to post message back onto the main thread */ 77 final Handler mHandler; 78 final ArrayList<Monitor> mMonitors = new ArrayList<Monitor>(); 79 ContentResolver mResolver; 80 BatteryService mBattery; 81 PowerManagerService mPower; 82 AlarmManagerService mAlarm; 83 ActivityManagerService mActivity; 84 boolean mCompleted; 85 boolean mForceKillSystem; 86 Monitor mCurrentMonitor; 87 88 int mPhonePid; 89 90 final Calendar mCalendar = Calendar.getInstance(); 91 int mMinScreenOff = MEMCHECK_DEFAULT_MIN_SCREEN_OFF; 92 int mMinAlarm = MEMCHECK_DEFAULT_MIN_ALARM; 93 boolean mNeedScheduledCheck; 94 PendingIntent mCheckupIntent; 95 PendingIntent mRebootIntent; 96 97 long mBootTime; 98 int mRebootInterval; 99 100 boolean mReqRebootNoWait; // should wait for one interval before reboot? 101 int mReqRebootInterval = -1; // >= 0 if a reboot has been requested 102 int mReqRebootStartTime = -1; // >= 0 if a specific start time has been requested 103 int mReqRebootWindow = -1; // >= 0 if a specific window has been requested 104 int mReqMinScreenOff = -1; // >= 0 if a specific screen off time has been requested 105 int mReqMinNextAlarm = -1; // >= 0 if specific time to next alarm has been requested 106 int mReqRecheckInterval= -1; // >= 0 if a specific recheck interval has been requested 107 108 /** 109 * Used for scheduling monitor callbacks and checking memory usage. 110 */ 111 final class HeartbeatHandler extends Handler { 112 @Override 113 public void handleMessage(Message msg) { 114 switch (msg.what) { 115 case MONITOR: { 116 long now = SystemClock.uptimeMillis(); 117 118 // See if we should force a reboot. 119 int rebootInterval = mReqRebootInterval >= 0 120 ? mReqRebootInterval : Settings.Secure.getInt( 121 mResolver, Settings.Secure.REBOOT_INTERVAL, 122 REBOOT_DEFAULT_INTERVAL); 123 if (mRebootInterval != rebootInterval) { 124 mRebootInterval = rebootInterval; 125 // We have been running long enough that a reboot can 126 // be considered... 127 checkReboot(false); 128 } 129 130 final int size = mMonitors.size(); 131 for (int i = 0 ; i < size ; i++) { 132 mCurrentMonitor = mMonitors.get(i); 133 mCurrentMonitor.monitor(); 134 } 135 136 synchronized (Watchdog.this) { 137 mCompleted = true; 138 mCurrentMonitor = null; 139 } 140 } break; 141 } 142 } 143 } 144 145 final class RebootReceiver extends BroadcastReceiver { 146 @Override 147 public void onReceive(Context c, Intent intent) { 148 if (localLOGV) Slog.v(TAG, "Alarm went off, checking reboot."); 149 checkReboot(true); 150 } 151 } 152 153 final class RebootRequestReceiver extends BroadcastReceiver { 154 @Override 155 public void onReceive(Context c, Intent intent) { 156 mReqRebootNoWait = intent.getIntExtra("nowait", 0) != 0; 157 mReqRebootInterval = intent.getIntExtra("interval", -1); 158 mReqRebootStartTime = intent.getIntExtra("startTime", -1); 159 mReqRebootWindow = intent.getIntExtra("window", -1); 160 mReqMinScreenOff = intent.getIntExtra("minScreenOff", -1); 161 mReqMinNextAlarm = intent.getIntExtra("minNextAlarm", -1); 162 mReqRecheckInterval = intent.getIntExtra("recheckInterval", -1); 163 EventLog.writeEvent(EventLogTags.WATCHDOG_REQUESTED_REBOOT, 164 mReqRebootNoWait ? 1 : 0, mReqRebootInterval, 165 mReqRecheckInterval, mReqRebootStartTime, 166 mReqRebootWindow, mReqMinScreenOff, mReqMinNextAlarm); 167 checkReboot(true); 168 } 169 } 170 171 public interface Monitor { 172 void monitor(); 173 } 174 175 public static Watchdog getInstance() { 176 if (sWatchdog == null) { 177 sWatchdog = new Watchdog(); 178 } 179 180 return sWatchdog; 181 } 182 183 private Watchdog() { 184 super("watchdog"); 185 mHandler = new HeartbeatHandler(); 186 } 187 188 public void init(Context context, BatteryService battery, 189 PowerManagerService power, AlarmManagerService alarm, 190 ActivityManagerService activity) { 191 mResolver = context.getContentResolver(); 192 mBattery = battery; 193 mPower = power; 194 mAlarm = alarm; 195 mActivity = activity; 196 197 context.registerReceiver(new RebootReceiver(), 198 new IntentFilter(REBOOT_ACTION)); 199 mRebootIntent = PendingIntent.getBroadcast(context, 200 0, new Intent(REBOOT_ACTION), 0); 201 202 context.registerReceiver(new RebootRequestReceiver(), 203 new IntentFilter(Intent.ACTION_REBOOT), 204 android.Manifest.permission.REBOOT, null); 205 206 mBootTime = System.currentTimeMillis(); 207 } 208 209 public void processStarted(String name, int pid) { 210 synchronized (this) { 211 if ("com.android.phone".equals(name)) { 212 mPhonePid = pid; 213 } 214 } 215 } 216 217 public void addMonitor(Monitor monitor) { 218 synchronized (this) { 219 if (isAlive()) { 220 throw new RuntimeException("Monitors can't be added while the Watchdog is running"); 221 } 222 mMonitors.add(monitor); 223 } 224 } 225 226 void checkReboot(boolean fromAlarm) { 227 int rebootInterval = mReqRebootInterval >= 0 ? mReqRebootInterval 228 : Settings.Secure.getInt( 229 mResolver, Settings.Secure.REBOOT_INTERVAL, 230 REBOOT_DEFAULT_INTERVAL); 231 mRebootInterval = rebootInterval; 232 if (rebootInterval <= 0) { 233 // No reboot interval requested. 234 if (localLOGV) Slog.v(TAG, "No need to schedule a reboot alarm!"); 235 mAlarm.remove(mRebootIntent); 236 return; 237 } 238 239 long rebootStartTime = mReqRebootStartTime >= 0 ? mReqRebootStartTime 240 : Settings.Secure.getLong( 241 mResolver, Settings.Secure.REBOOT_START_TIME, 242 REBOOT_DEFAULT_START_TIME); 243 long rebootWindowMillis = (mReqRebootWindow >= 0 ? mReqRebootWindow 244 : Settings.Secure.getLong( 245 mResolver, Settings.Secure.REBOOT_WINDOW, 246 REBOOT_DEFAULT_WINDOW)) * 1000; 247 long recheckInterval = (mReqRecheckInterval >= 0 ? mReqRecheckInterval 248 : Settings.Secure.getLong( 249 mResolver, Settings.Secure.MEMCHECK_RECHECK_INTERVAL, 250 MEMCHECK_DEFAULT_RECHECK_INTERVAL)) * 1000; 251 252 retrieveBrutalityAmount(); 253 254 long realStartTime; 255 long now; 256 257 synchronized (this) { 258 now = System.currentTimeMillis(); 259 realStartTime = computeCalendarTime(mCalendar, now, 260 rebootStartTime); 261 262 long rebootIntervalMillis = rebootInterval*24*60*60*1000; 263 if (DB || mReqRebootNoWait || 264 (now-mBootTime) >= (rebootIntervalMillis-rebootWindowMillis)) { 265 if (fromAlarm && rebootWindowMillis <= 0) { 266 // No reboot window -- just immediately reboot. 267 EventLog.writeEvent(EventLogTags.WATCHDOG_SCHEDULED_REBOOT, now, 268 (int)rebootIntervalMillis, (int)rebootStartTime*1000, 269 (int)rebootWindowMillis, ""); 270 rebootSystem("Checkin scheduled forced"); 271 return; 272 } 273 274 // Are we within the reboot window? 275 if (now < realStartTime) { 276 // Schedule alarm for next check interval. 277 realStartTime = computeCalendarTime(mCalendar, 278 now, rebootStartTime); 279 } else if (now < (realStartTime+rebootWindowMillis)) { 280 String doit = shouldWeBeBrutalLocked(now); 281 EventLog.writeEvent(EventLogTags.WATCHDOG_SCHEDULED_REBOOT, now, 282 (int)rebootInterval, (int)rebootStartTime*1000, 283 (int)rebootWindowMillis, doit != null ? doit : ""); 284 if (doit == null) { 285 rebootSystem("Checked scheduled range"); 286 return; 287 } 288 289 // Schedule next alarm either within the window or in the 290 // next interval. 291 if ((now+recheckInterval) >= (realStartTime+rebootWindowMillis)) { 292 realStartTime = computeCalendarTime(mCalendar, 293 now + rebootIntervalMillis, rebootStartTime); 294 } else { 295 realStartTime = now + recheckInterval; 296 } 297 } else { 298 // Schedule alarm for next check interval. 299 realStartTime = computeCalendarTime(mCalendar, 300 now + rebootIntervalMillis, rebootStartTime); 301 } 302 } 303 } 304 305 if (localLOGV) Slog.v(TAG, "Scheduling next reboot alarm for " 306 + ((realStartTime-now)/1000/60) + "m from now"); 307 mAlarm.remove(mRebootIntent); 308 mAlarm.set(AlarmManager.RTC_WAKEUP, realStartTime, mRebootIntent); 309 } 310 311 /** 312 * Perform a full reboot of the system. 313 */ 314 void rebootSystem(String reason) { 315 Slog.i(TAG, "Rebooting system because: " + reason); 316 PowerManagerService pms = (PowerManagerService) ServiceManager.getService("power"); 317 pms.reboot(reason); 318 } 319 320 /** 321 * Load the current Gservices settings for when 322 * {@link #shouldWeBeBrutalLocked} will allow the brutality to happen. 323 * Must not be called with the lock held. 324 */ 325 void retrieveBrutalityAmount() { 326 mMinScreenOff = (mReqMinScreenOff >= 0 ? mReqMinScreenOff 327 : Settings.Secure.getInt( 328 mResolver, Settings.Secure.MEMCHECK_MIN_SCREEN_OFF, 329 MEMCHECK_DEFAULT_MIN_SCREEN_OFF)) * 1000; 330 mMinAlarm = (mReqMinNextAlarm >= 0 ? mReqMinNextAlarm 331 : Settings.Secure.getInt( 332 mResolver, Settings.Secure.MEMCHECK_MIN_ALARM, 333 MEMCHECK_DEFAULT_MIN_ALARM)) * 1000; 334 } 335 336 /** 337 * Determine whether it is a good time to kill, crash, or otherwise 338 * plunder the current situation for the overall long-term benefit of 339 * the world. 340 * 341 * @param curTime The current system time. 342 * @return Returns null if this is a good time, else a String with the 343 * text of why it is not a good time. 344 */ 345 String shouldWeBeBrutalLocked(long curTime) { 346 if (mBattery == null || !mBattery.isPowered()) { 347 return "battery"; 348 } 349 350 if (mMinScreenOff >= 0 && (mPower == null || 351 mPower.timeSinceScreenOn() < mMinScreenOff)) { 352 return "screen"; 353 } 354 355 if (mMinAlarm >= 0 && (mAlarm == null || 356 mAlarm.timeToNextAlarm() < mMinAlarm)) { 357 return "alarm"; 358 } 359 360 return null; 361 } 362 363 static long computeCalendarTime(Calendar c, long curTime, 364 long secondsSinceMidnight) { 365 366 // start with now 367 c.setTimeInMillis(curTime); 368 369 int val = (int)secondsSinceMidnight / (60*60); 370 c.set(Calendar.HOUR_OF_DAY, val); 371 secondsSinceMidnight -= val * (60*60); 372 val = (int)secondsSinceMidnight / 60; 373 c.set(Calendar.MINUTE, val); 374 c.set(Calendar.SECOND, (int)secondsSinceMidnight - (val*60)); 375 c.set(Calendar.MILLISECOND, 0); 376 377 long newTime = c.getTimeInMillis(); 378 if (newTime < curTime) { 379 // The given time (in seconds since midnight) has already passed for today, so advance 380 // by one day (due to daylight savings, etc., the delta may differ from 24 hours). 381 c.add(Calendar.DAY_OF_MONTH, 1); 382 newTime = c.getTimeInMillis(); 383 } 384 385 return newTime; 386 } 387 388 @Override 389 public void run() { 390 boolean waitedHalf = false; 391 while (true) { 392 mCompleted = false; 393 mHandler.sendEmptyMessage(MONITOR); 394 395 synchronized (this) { 396 long timeout = TIME_TO_WAIT; 397 398 // NOTE: We use uptimeMillis() here because we do not want to increment the time we 399 // wait while asleep. If the device is asleep then the thing that we are waiting 400 // to timeout on is asleep as well and won't have a chance to run, causing a false 401 // positive on when to kill things. 402 long start = SystemClock.uptimeMillis(); 403 while (timeout > 0 && !mForceKillSystem) { 404 try { 405 wait(timeout); // notifyAll() is called when mForceKillSystem is set 406 } catch (InterruptedException e) { 407 Log.wtf(TAG, e); 408 } 409 timeout = TIME_TO_WAIT - (SystemClock.uptimeMillis() - start); 410 } 411 412 if (mCompleted && !mForceKillSystem) { 413 // The monitors have returned. 414 waitedHalf = false; 415 continue; 416 } 417 418 if (!waitedHalf) { 419 // We've waited half the deadlock-detection interval. Pull a stack 420 // trace and wait another half. 421 ArrayList pids = new ArrayList(); 422 pids.add(Process.myPid()); 423 File stack = ActivityManagerService.dumpStackTraces(true, pids); 424 waitedHalf = true; 425 continue; 426 } 427 } 428 429 // If we got here, that means that the system is most likely hung. 430 // First collect stack traces from all threads of the system process. 431 // Then kill this process so that the system will restart. 432 433 String name = (mCurrentMonitor != null) ? mCurrentMonitor.getClass().getName() : "null"; 434 EventLog.writeEvent(EventLogTags.WATCHDOG, name); 435 436 ArrayList pids = new ArrayList(); 437 pids.add(Process.myPid()); 438 if (mPhonePid > 0) pids.add(mPhonePid); 439 // Pass !waitedHalf so that just in case we somehow wind up here without having 440 // dumped the halfway stacks, we properly re-initialize the trace file. 441 File stack = ActivityManagerService.dumpStackTraces(!waitedHalf, pids); 442 443 // Give some extra time to make sure the stack traces get written. 444 // The system's been hanging for a minute, another second or two won't hurt much. 445 SystemClock.sleep(2000); 446 447 // Pull our own kernel thread stacks as well if we're configured for that 448 if (RECORD_KERNEL_THREADS) { 449 dumpKernelStackTraces(); 450 } 451 452 mActivity.addErrorToDropBox("watchdog", null, null, null, name, null, stack, null); 453 454 // Only kill the process if the debugger is not attached. 455 if (!Debug.isDebuggerConnected()) { 456 Slog.w(TAG, "*** WATCHDOG KILLING SYSTEM PROCESS: " + name); 457 Process.killProcess(Process.myPid()); 458 System.exit(10); 459 } else { 460 Slog.w(TAG, "Debugger connected: Watchdog is *not* killing the system process"); 461 } 462 463 waitedHalf = false; 464 } 465 } 466 467 private File dumpKernelStackTraces() { 468 String tracesPath = SystemProperties.get("dalvik.vm.stack-trace-file", null); 469 if (tracesPath == null || tracesPath.length() == 0) { 470 return null; 471 } 472 473 native_dumpKernelStacks(tracesPath); 474 return new File(tracesPath); 475 } 476 477 private native void native_dumpKernelStacks(String tracesPath); 478} 479