1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.android.server; 18 19import com.android.server.am.ActivityManagerService; 20import com.android.server.power.PowerManagerService; 21 22import android.app.AlarmManager; 23import android.app.PendingIntent; 24import android.content.BroadcastReceiver; 25import android.content.ContentResolver; 26import android.content.Context; 27import android.content.Intent; 28import android.content.IntentFilter; 29import android.os.BatteryManager; 30import android.os.Debug; 31import android.os.Handler; 32import android.os.Message; 33import android.os.Process; 34import android.os.ServiceManager; 35import android.os.SystemClock; 36import android.os.SystemProperties; 37import android.util.EventLog; 38import android.util.Log; 39import android.util.Slog; 40 41import java.io.File; 42import java.util.ArrayList; 43import java.util.Calendar; 44 45/** This class calls its monitor every minute. Killing this process if they don't return **/ 46public class Watchdog extends Thread { 47 static final String TAG = "Watchdog"; 48 static final boolean localLOGV = false || false; 49 50 // Set this to true to use debug default values. 51 static final boolean DB = false; 52 53 // Set this to true to have the watchdog record kernel thread stacks when it fires 54 static final boolean RECORD_KERNEL_THREADS = true; 55 56 static final int MONITOR = 2718; 57 58 static final int TIME_TO_RESTART = DB ? 15*1000 : 60*1000; 59 static final int TIME_TO_WAIT = TIME_TO_RESTART / 2; 60 61 static final int MEMCHECK_DEFAULT_MIN_SCREEN_OFF = DB ? 1*60 : 5*60; // 5 minutes 62 static final int MEMCHECK_DEFAULT_MIN_ALARM = DB ? 1*60 : 3*60; // 3 minutes 63 static final int MEMCHECK_DEFAULT_RECHECK_INTERVAL = DB ? 1*60 : 5*60; // 5 minutes 64 65 static final int REBOOT_DEFAULT_INTERVAL = DB ? 1 : 0; // never force reboot 66 static final int REBOOT_DEFAULT_START_TIME = 3*60*60; // 3:00am 67 static final int REBOOT_DEFAULT_WINDOW = 60*60; // within 1 hour 68 69 static final String REBOOT_ACTION = "com.android.service.Watchdog.REBOOT"; 70 71 static final String[] NATIVE_STACKS_OF_INTEREST = new String[] { 72 "/system/bin/mediaserver", 73 "/system/bin/sdcard", 74 "/system/bin/surfaceflinger" 75 }; 76 77 static Watchdog sWatchdog; 78 79 /* This handler will be used to post message back onto the main thread */ 80 final Handler mHandler; 81 final ArrayList<Monitor> mMonitors = new ArrayList<Monitor>(); 82 ContentResolver mResolver; 83 BatteryService mBattery; 84 PowerManagerService mPower; 85 AlarmManagerService mAlarm; 86 ActivityManagerService mActivity; 87 boolean mCompleted; 88 boolean mForceKillSystem; 89 Monitor mCurrentMonitor; 90 91 int mPhonePid; 92 93 final Calendar mCalendar = Calendar.getInstance(); 94 int mMinScreenOff = MEMCHECK_DEFAULT_MIN_SCREEN_OFF; 95 int mMinAlarm = MEMCHECK_DEFAULT_MIN_ALARM; 96 boolean mNeedScheduledCheck; 97 PendingIntent mCheckupIntent; 98 PendingIntent mRebootIntent; 99 100 long mBootTime; 101 int mRebootInterval; 102 103 boolean mReqRebootNoWait; // should wait for one interval before reboot? 104 int mReqRebootInterval = -1; // >= 0 if a reboot has been requested 105 int mReqRebootStartTime = -1; // >= 0 if a specific start time has been requested 106 int mReqRebootWindow = -1; // >= 0 if a specific window has been requested 107 int mReqMinScreenOff = -1; // >= 0 if a specific screen off time has been requested 108 int mReqMinNextAlarm = -1; // >= 0 if specific time to next alarm has been requested 109 int mReqRecheckInterval= -1; // >= 0 if a specific recheck interval has been requested 110 111 /** 112 * Used for scheduling monitor callbacks and checking memory usage. 113 */ 114 final class HeartbeatHandler extends Handler { 115 @Override 116 public void handleMessage(Message msg) { 117 switch (msg.what) { 118 case MONITOR: { 119 // See if we should force a reboot. 120 int rebootInterval = mReqRebootInterval >= 0 121 ? mReqRebootInterval : REBOOT_DEFAULT_INTERVAL; 122 if (mRebootInterval != rebootInterval) { 123 mRebootInterval = rebootInterval; 124 // We have been running long enough that a reboot can 125 // be considered... 126 checkReboot(false); 127 } 128 129 final int size = mMonitors.size(); 130 for (int i = 0 ; i < size ; i++) { 131 mCurrentMonitor = mMonitors.get(i); 132 mCurrentMonitor.monitor(); 133 } 134 135 synchronized (Watchdog.this) { 136 mCompleted = true; 137 mCurrentMonitor = null; 138 } 139 } break; 140 } 141 } 142 } 143 144 final class RebootReceiver extends BroadcastReceiver { 145 @Override 146 public void onReceive(Context c, Intent intent) { 147 if (localLOGV) Slog.v(TAG, "Alarm went off, checking reboot."); 148 checkReboot(true); 149 } 150 } 151 152 final class RebootRequestReceiver extends BroadcastReceiver { 153 @Override 154 public void onReceive(Context c, Intent intent) { 155 mReqRebootNoWait = intent.getIntExtra("nowait", 0) != 0; 156 mReqRebootInterval = intent.getIntExtra("interval", -1); 157 mReqRebootStartTime = intent.getIntExtra("startTime", -1); 158 mReqRebootWindow = intent.getIntExtra("window", -1); 159 mReqMinScreenOff = intent.getIntExtra("minScreenOff", -1); 160 mReqMinNextAlarm = intent.getIntExtra("minNextAlarm", -1); 161 mReqRecheckInterval = intent.getIntExtra("recheckInterval", -1); 162 EventLog.writeEvent(EventLogTags.WATCHDOG_REQUESTED_REBOOT, 163 mReqRebootNoWait ? 1 : 0, mReqRebootInterval, 164 mReqRecheckInterval, mReqRebootStartTime, 165 mReqRebootWindow, mReqMinScreenOff, mReqMinNextAlarm); 166 checkReboot(true); 167 } 168 } 169 170 public interface Monitor { 171 void monitor(); 172 } 173 174 public static Watchdog getInstance() { 175 if (sWatchdog == null) { 176 sWatchdog = new Watchdog(); 177 } 178 179 return sWatchdog; 180 } 181 182 private Watchdog() { 183 super("watchdog"); 184 mHandler = new HeartbeatHandler(); 185 } 186 187 public void init(Context context, BatteryService battery, 188 PowerManagerService power, AlarmManagerService alarm, 189 ActivityManagerService activity) { 190 mResolver = context.getContentResolver(); 191 mBattery = battery; 192 mPower = power; 193 mAlarm = alarm; 194 mActivity = activity; 195 196 context.registerReceiver(new RebootReceiver(), 197 new IntentFilter(REBOOT_ACTION)); 198 mRebootIntent = PendingIntent.getBroadcast(context, 199 0, new Intent(REBOOT_ACTION), 0); 200 201 context.registerReceiver(new RebootRequestReceiver(), 202 new IntentFilter(Intent.ACTION_REBOOT), 203 android.Manifest.permission.REBOOT, null); 204 205 mBootTime = System.currentTimeMillis(); 206 } 207 208 public void processStarted(String name, int pid) { 209 synchronized (this) { 210 if ("com.android.phone".equals(name)) { 211 mPhonePid = pid; 212 } 213 } 214 } 215 216 public void addMonitor(Monitor monitor) { 217 synchronized (this) { 218 if (isAlive()) { 219 throw new RuntimeException("Monitors can't be added while the Watchdog is running"); 220 } 221 mMonitors.add(monitor); 222 } 223 } 224 225 void checkReboot(boolean fromAlarm) { 226 int rebootInterval = mReqRebootInterval >= 0 ? mReqRebootInterval 227 : REBOOT_DEFAULT_INTERVAL; 228 mRebootInterval = rebootInterval; 229 if (rebootInterval <= 0) { 230 // No reboot interval requested. 231 if (localLOGV) Slog.v(TAG, "No need to schedule a reboot alarm!"); 232 mAlarm.remove(mRebootIntent); 233 return; 234 } 235 236 long rebootStartTime = mReqRebootStartTime >= 0 ? mReqRebootStartTime 237 : REBOOT_DEFAULT_START_TIME; 238 long rebootWindowMillis = (mReqRebootWindow >= 0 ? mReqRebootWindow 239 : REBOOT_DEFAULT_WINDOW) * 1000; 240 long recheckInterval = (mReqRecheckInterval >= 0 ? mReqRecheckInterval 241 : MEMCHECK_DEFAULT_RECHECK_INTERVAL) * 1000; 242 243 retrieveBrutalityAmount(); 244 245 long realStartTime; 246 long now; 247 248 synchronized (this) { 249 now = System.currentTimeMillis(); 250 realStartTime = computeCalendarTime(mCalendar, now, 251 rebootStartTime); 252 253 long rebootIntervalMillis = rebootInterval*24*60*60*1000; 254 if (DB || mReqRebootNoWait || 255 (now-mBootTime) >= (rebootIntervalMillis-rebootWindowMillis)) { 256 if (fromAlarm && rebootWindowMillis <= 0) { 257 // No reboot window -- just immediately reboot. 258 EventLog.writeEvent(EventLogTags.WATCHDOG_SCHEDULED_REBOOT, now, 259 (int)rebootIntervalMillis, (int)rebootStartTime*1000, 260 (int)rebootWindowMillis, ""); 261 rebootSystem("Checkin scheduled forced"); 262 return; 263 } 264 265 // Are we within the reboot window? 266 if (now < realStartTime) { 267 // Schedule alarm for next check interval. 268 realStartTime = computeCalendarTime(mCalendar, 269 now, rebootStartTime); 270 } else if (now < (realStartTime+rebootWindowMillis)) { 271 String doit = shouldWeBeBrutalLocked(now); 272 EventLog.writeEvent(EventLogTags.WATCHDOG_SCHEDULED_REBOOT, now, 273 (int)rebootInterval, (int)rebootStartTime*1000, 274 (int)rebootWindowMillis, doit != null ? doit : ""); 275 if (doit == null) { 276 rebootSystem("Checked scheduled range"); 277 return; 278 } 279 280 // Schedule next alarm either within the window or in the 281 // next interval. 282 if ((now+recheckInterval) >= (realStartTime+rebootWindowMillis)) { 283 realStartTime = computeCalendarTime(mCalendar, 284 now + rebootIntervalMillis, rebootStartTime); 285 } else { 286 realStartTime = now + recheckInterval; 287 } 288 } else { 289 // Schedule alarm for next check interval. 290 realStartTime = computeCalendarTime(mCalendar, 291 now + rebootIntervalMillis, rebootStartTime); 292 } 293 } 294 } 295 296 if (localLOGV) Slog.v(TAG, "Scheduling next reboot alarm for " 297 + ((realStartTime-now)/1000/60) + "m from now"); 298 mAlarm.remove(mRebootIntent); 299 mAlarm.set(AlarmManager.RTC_WAKEUP, realStartTime, mRebootIntent); 300 } 301 302 /** 303 * Perform a full reboot of the system. 304 */ 305 void rebootSystem(String reason) { 306 Slog.i(TAG, "Rebooting system because: " + reason); 307 PowerManagerService pms = (PowerManagerService) ServiceManager.getService("power"); 308 pms.reboot(false, reason, false); 309 } 310 311 /** 312 * Load the current Gservices settings for when 313 * {@link #shouldWeBeBrutalLocked} will allow the brutality to happen. 314 * Must not be called with the lock held. 315 */ 316 void retrieveBrutalityAmount() { 317 mMinScreenOff = (mReqMinScreenOff >= 0 ? mReqMinScreenOff 318 : MEMCHECK_DEFAULT_MIN_SCREEN_OFF) * 1000; 319 mMinAlarm = (mReqMinNextAlarm >= 0 ? mReqMinNextAlarm 320 : MEMCHECK_DEFAULT_MIN_ALARM) * 1000; 321 } 322 323 /** 324 * Determine whether it is a good time to kill, crash, or otherwise 325 * plunder the current situation for the overall long-term benefit of 326 * the world. 327 * 328 * @param curTime The current system time. 329 * @return Returns null if this is a good time, else a String with the 330 * text of why it is not a good time. 331 */ 332 String shouldWeBeBrutalLocked(long curTime) { 333 if (mBattery == null || !mBattery.isPowered(BatteryManager.BATTERY_PLUGGED_ANY)) { 334 return "battery"; 335 } 336 337 if (mMinScreenOff >= 0 && (mPower == null || 338 mPower.timeSinceScreenWasLastOn() < mMinScreenOff)) { 339 return "screen"; 340 } 341 342 if (mMinAlarm >= 0 && (mAlarm == null || 343 mAlarm.timeToNextAlarm() < mMinAlarm)) { 344 return "alarm"; 345 } 346 347 return null; 348 } 349 350 static long computeCalendarTime(Calendar c, long curTime, 351 long secondsSinceMidnight) { 352 353 // start with now 354 c.setTimeInMillis(curTime); 355 356 int val = (int)secondsSinceMidnight / (60*60); 357 c.set(Calendar.HOUR_OF_DAY, val); 358 secondsSinceMidnight -= val * (60*60); 359 val = (int)secondsSinceMidnight / 60; 360 c.set(Calendar.MINUTE, val); 361 c.set(Calendar.SECOND, (int)secondsSinceMidnight - (val*60)); 362 c.set(Calendar.MILLISECOND, 0); 363 364 long newTime = c.getTimeInMillis(); 365 if (newTime < curTime) { 366 // The given time (in seconds since midnight) has already passed for today, so advance 367 // by one day (due to daylight savings, etc., the delta may differ from 24 hours). 368 c.add(Calendar.DAY_OF_MONTH, 1); 369 newTime = c.getTimeInMillis(); 370 } 371 372 return newTime; 373 } 374 375 @Override 376 public void run() { 377 boolean waitedHalf = false; 378 while (true) { 379 mCompleted = false; 380 mHandler.sendEmptyMessage(MONITOR); 381 382 synchronized (this) { 383 long timeout = TIME_TO_WAIT; 384 385 // NOTE: We use uptimeMillis() here because we do not want to increment the time we 386 // wait while asleep. If the device is asleep then the thing that we are waiting 387 // to timeout on is asleep as well and won't have a chance to run, causing a false 388 // positive on when to kill things. 389 long start = SystemClock.uptimeMillis(); 390 while (timeout > 0 && !mForceKillSystem) { 391 try { 392 wait(timeout); // notifyAll() is called when mForceKillSystem is set 393 } catch (InterruptedException e) { 394 Log.wtf(TAG, e); 395 } 396 timeout = TIME_TO_WAIT - (SystemClock.uptimeMillis() - start); 397 } 398 399 if (mCompleted && !mForceKillSystem) { 400 // The monitors have returned. 401 waitedHalf = false; 402 continue; 403 } 404 405 if (!waitedHalf) { 406 // We've waited half the deadlock-detection interval. Pull a stack 407 // trace and wait another half. 408 ArrayList<Integer> pids = new ArrayList<Integer>(); 409 pids.add(Process.myPid()); 410 ActivityManagerService.dumpStackTraces(true, pids, null, null, 411 NATIVE_STACKS_OF_INTEREST); 412 waitedHalf = true; 413 continue; 414 } 415 } 416 417 // If we got here, that means that the system is most likely hung. 418 // First collect stack traces from all threads of the system process. 419 // Then kill this process so that the system will restart. 420 421 final String name = (mCurrentMonitor != null) ? 422 mCurrentMonitor.getClass().getName() : "null"; 423 EventLog.writeEvent(EventLogTags.WATCHDOG, name); 424 425 ArrayList<Integer> pids = new ArrayList<Integer>(); 426 pids.add(Process.myPid()); 427 if (mPhonePid > 0) pids.add(mPhonePid); 428 // Pass !waitedHalf so that just in case we somehow wind up here without having 429 // dumped the halfway stacks, we properly re-initialize the trace file. 430 final File stack = ActivityManagerService.dumpStackTraces( 431 !waitedHalf, pids, null, null, NATIVE_STACKS_OF_INTEREST); 432 433 // Give some extra time to make sure the stack traces get written. 434 // The system's been hanging for a minute, another second or two won't hurt much. 435 SystemClock.sleep(2000); 436 437 // Pull our own kernel thread stacks as well if we're configured for that 438 if (RECORD_KERNEL_THREADS) { 439 dumpKernelStackTraces(); 440 } 441 442 // Try to add the error to the dropbox, but assuming that the ActivityManager 443 // itself may be deadlocked. (which has happened, causing this statement to 444 // deadlock and the watchdog as a whole to be ineffective) 445 Thread dropboxThread = new Thread("watchdogWriteToDropbox") { 446 public void run() { 447 mActivity.addErrorToDropBox( 448 "watchdog", null, "system_server", null, null, 449 name, null, stack, null); 450 } 451 }; 452 dropboxThread.start(); 453 try { 454 dropboxThread.join(2000); // wait up to 2 seconds for it to return. 455 } catch (InterruptedException ignored) {} 456 457 // Only kill the process if the debugger is not attached. 458 if (!Debug.isDebuggerConnected()) { 459 Slog.w(TAG, "*** WATCHDOG KILLING SYSTEM PROCESS: " + name); 460 Process.killProcess(Process.myPid()); 461 System.exit(10); 462 } else { 463 Slog.w(TAG, "Debugger connected: Watchdog is *not* killing the system process"); 464 } 465 466 waitedHalf = false; 467 } 468 } 469 470 private File dumpKernelStackTraces() { 471 String tracesPath = SystemProperties.get("dalvik.vm.stack-trace-file", null); 472 if (tracesPath == null || tracesPath.length() == 0) { 473 return null; 474 } 475 476 native_dumpKernelStacks(tracesPath); 477 return new File(tracesPath); 478 } 479 480 private native void native_dumpKernelStacks(String tracesPath); 481} 482