1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.server;
18
19import com.android.server.am.ActivityManagerService;
20
21import android.app.AlarmManager;
22import android.app.PendingIntent;
23import android.content.BroadcastReceiver;
24import android.content.ContentResolver;
25import android.content.Context;
26import android.content.Intent;
27import android.content.IntentFilter;
28import android.os.Debug;
29import android.os.Handler;
30import android.os.Message;
31import android.os.Process;
32import android.os.ServiceManager;
33import android.os.SystemClock;
34import android.os.SystemProperties;
35import android.provider.Settings;
36import android.util.Config;
37import android.util.EventLog;
38import android.util.Log;
39import android.util.Slog;
40
41import java.io.File;
42import java.io.FileInputStream;
43import java.io.FileOutputStream;
44import java.io.IOException;
45import java.util.ArrayList;
46import java.util.Calendar;
47
48/** This class calls its monitor every minute. Killing this process if they don't return **/
49public class Watchdog extends Thread {
50    static final String TAG = "Watchdog";
51    static final boolean localLOGV = false || Config.LOGV;
52
53    // Set this to true to use debug default values.
54    static final boolean DB = false;
55
56    // Set this to true to have the watchdog record kernel thread stacks when it fires
57    static final boolean RECORD_KERNEL_THREADS = true;
58
59    static final int MONITOR = 2718;
60    static final int GLOBAL_PSS = 2719;
61
62    static final int TIME_TO_RESTART = DB ? 15*1000 : 60*1000;
63    static final int TIME_TO_WAIT = TIME_TO_RESTART / 2;
64
65    static final int MEMCHECK_DEFAULT_INTERVAL = DB ? 30 : 30*60; // 30 minutes
66    static final int MEMCHECK_DEFAULT_LOG_REALTIME_INTERVAL = DB ? 60 : 2*60*60;      // 2 hours
67    static final int MEMCHECK_DEFAULT_SYSTEM_SOFT_THRESHOLD = (DB ? 10:16)*1024*1024; // 16MB
68    static final int MEMCHECK_DEFAULT_SYSTEM_HARD_THRESHOLD = (DB ? 14:20)*1024*1024; // 20MB
69    static final int MEMCHECK_DEFAULT_PHONE_SOFT_THRESHOLD = (DB ? 4:8)*1024*1024;    // 8MB
70    static final int MEMCHECK_DEFAULT_PHONE_HARD_THRESHOLD = (DB ? 8:12)*1024*1024;   // 12MB
71
72    static final int MEMCHECK_DEFAULT_EXEC_START_TIME = 1*60*60;           // 1:00am
73    static final int MEMCHECK_DEFAULT_EXEC_END_TIME = 5*60*60;             // 5:00am
74    static final int MEMCHECK_DEFAULT_MIN_SCREEN_OFF = DB ? 1*60 : 5*60;   // 5 minutes
75    static final int MEMCHECK_DEFAULT_MIN_ALARM = DB ? 1*60 : 3*60;        // 3 minutes
76    static final int MEMCHECK_DEFAULT_RECHECK_INTERVAL = DB ? 1*60 : 5*60; // 5 minutes
77
78    static final int REBOOT_DEFAULT_INTERVAL = DB ? 1 : 0;                 // never force reboot
79    static final int REBOOT_DEFAULT_START_TIME = 3*60*60;                  // 3:00am
80    static final int REBOOT_DEFAULT_WINDOW = 60*60;                        // within 1 hour
81
82    static final String CHECKUP_ACTION = "com.android.service.Watchdog.CHECKUP";
83    static final String REBOOT_ACTION = "com.android.service.Watchdog.REBOOT";
84
85    static Watchdog sWatchdog;
86
87    /* This handler will be used to post message back onto the main thread */
88    final Handler mHandler;
89    final Runnable mGlobalPssCollected;
90    final ArrayList<Monitor> mMonitors = new ArrayList<Monitor>();
91    ContentResolver mResolver;
92    BatteryService mBattery;
93    PowerManagerService mPower;
94    AlarmManagerService mAlarm;
95    ActivityManagerService mActivity;
96    boolean mCompleted;
97    boolean mForceKillSystem;
98    Monitor mCurrentMonitor;
99
100    PssRequestor mPhoneReq;
101    int mPhonePid;
102    int mPhonePss;
103
104    long mLastMemCheckTime = -(MEMCHECK_DEFAULT_INTERVAL*1000);
105    boolean mHavePss;
106    long mLastMemCheckRealtime = -(MEMCHECK_DEFAULT_LOG_REALTIME_INTERVAL*1000);
107    boolean mHaveGlobalPss;
108    final MemMonitor mSystemMemMonitor = new MemMonitor("system",
109            Settings.Secure.MEMCHECK_SYSTEM_ENABLED,
110            Settings.Secure.MEMCHECK_SYSTEM_SOFT_THRESHOLD,
111            MEMCHECK_DEFAULT_SYSTEM_SOFT_THRESHOLD,
112            Settings.Secure.MEMCHECK_SYSTEM_HARD_THRESHOLD,
113            MEMCHECK_DEFAULT_SYSTEM_HARD_THRESHOLD);
114    final MemMonitor mPhoneMemMonitor = new MemMonitor("com.android.phone",
115            Settings.Secure.MEMCHECK_PHONE_ENABLED,
116            Settings.Secure.MEMCHECK_PHONE_SOFT_THRESHOLD,
117            MEMCHECK_DEFAULT_PHONE_SOFT_THRESHOLD,
118            Settings.Secure.MEMCHECK_PHONE_HARD_THRESHOLD,
119            MEMCHECK_DEFAULT_PHONE_HARD_THRESHOLD);
120
121    final Calendar mCalendar = Calendar.getInstance();
122    long mMemcheckLastTime;
123    long mMemcheckExecStartTime;
124    long mMemcheckExecEndTime;
125    int mMinScreenOff = MEMCHECK_DEFAULT_MIN_SCREEN_OFF;
126    int mMinAlarm = MEMCHECK_DEFAULT_MIN_ALARM;
127    boolean mNeedScheduledCheck;
128    PendingIntent mCheckupIntent;
129    PendingIntent mRebootIntent;
130
131    long mBootTime;
132    int mRebootInterval;
133
134    boolean mReqRebootNoWait;     // should wait for one interval before reboot?
135    int mReqRebootInterval = -1;  // >= 0 if a reboot has been requested
136    int mReqRebootStartTime = -1; // >= 0 if a specific start time has been requested
137    int mReqRebootWindow = -1;    // >= 0 if a specific window has been requested
138    int mReqMinScreenOff = -1;    // >= 0 if a specific screen off time has been requested
139    int mReqMinNextAlarm = -1;    // >= 0 if specific time to next alarm has been requested
140    int mReqRecheckInterval= -1;  // >= 0 if a specific recheck interval has been requested
141
142    /**
143     * This class monitors the memory in a particular process.
144     */
145    final class MemMonitor {
146        final String mProcessName;
147        final String mEnabledSetting;
148        final String mSoftSetting;
149        final String mHardSetting;
150
151        int mSoftThreshold;
152        int mHardThreshold;
153        boolean mEnabled;
154        long mLastPss;
155
156        static final int STATE_OK = 0;
157        static final int STATE_SOFT = 1;
158        static final int STATE_HARD = 2;
159        int mState;
160
161        MemMonitor(String processName, String enabledSetting,
162                String softSetting, int defSoftThreshold,
163                String hardSetting, int defHardThreshold) {
164            mProcessName = processName;
165            mEnabledSetting = enabledSetting;
166            mSoftSetting = softSetting;
167            mHardSetting = hardSetting;
168            mSoftThreshold = defSoftThreshold;
169            mHardThreshold = defHardThreshold;
170        }
171
172        void retrieveSettings(ContentResolver resolver) {
173            mSoftThreshold = Settings.Secure.getInt(
174                    resolver, mSoftSetting, mSoftThreshold);
175            mHardThreshold = Settings.Secure.getInt(
176                    resolver, mHardSetting, mHardThreshold);
177            mEnabled = Settings.Secure.getInt(
178                    resolver, mEnabledSetting, 0) != 0;
179        }
180
181        boolean checkLocked(long curTime, int pid, int pss) {
182            mLastPss = pss;
183            if (mLastPss < mSoftThreshold) {
184                mState = STATE_OK;
185            } else if (mLastPss < mHardThreshold) {
186                mState = STATE_SOFT;
187            } else {
188                mState = STATE_HARD;
189            }
190            EventLog.writeEvent(EventLogTags.WATCHDOG_PROC_PSS, mProcessName, pid, mLastPss);
191
192            if (mState == STATE_OK) {
193                // Memory is good, don't recover.
194                return false;
195            }
196
197            if (mState == STATE_HARD) {
198                // Memory is really bad, kill right now.
199                EventLog.writeEvent(EventLogTags.WATCHDOG_HARD_RESET, mProcessName, pid,
200                        mHardThreshold, mLastPss);
201                return mEnabled;
202            }
203
204            // It is time to schedule a reset...
205            // Check if we are currently within the time to kill processes due
206            // to memory use.
207            computeMemcheckTimesLocked(curTime);
208            String skipReason = null;
209            if (curTime < mMemcheckExecStartTime || curTime > mMemcheckExecEndTime) {
210                skipReason = "time";
211            } else {
212                skipReason = shouldWeBeBrutalLocked(curTime);
213            }
214            EventLog.writeEvent(EventLogTags.WATCHDOG_SOFT_RESET, mProcessName, pid,
215                    mSoftThreshold, mLastPss, skipReason != null ? skipReason : "");
216            if (skipReason != null) {
217                mNeedScheduledCheck = true;
218                return false;
219            }
220            return mEnabled;
221        }
222
223        void clear() {
224            mLastPss = 0;
225            mState = STATE_OK;
226        }
227    }
228
229    /**
230     * Used for scheduling monitor callbacks and checking memory usage.
231     */
232    final class HeartbeatHandler extends Handler {
233        @Override
234        public void handleMessage(Message msg) {
235            switch (msg.what) {
236                case GLOBAL_PSS: {
237                    if (mHaveGlobalPss) {
238                        // During the last pass we collected pss information, so
239                        // now it is time to report it.
240                        mHaveGlobalPss = false;
241                        if (localLOGV) Slog.v(TAG, "Received global pss, logging.");
242                        logGlobalMemory();
243                    }
244                } break;
245
246                case MONITOR: {
247                    if (mHavePss) {
248                        // During the last pass we collected pss information, so
249                        // now it is time to report it.
250                        mHavePss = false;
251                        if (localLOGV) Slog.v(TAG, "Have pss, checking memory.");
252                        checkMemory();
253                    }
254
255                    if (mHaveGlobalPss) {
256                        // During the last pass we collected pss information, so
257                        // now it is time to report it.
258                        mHaveGlobalPss = false;
259                        if (localLOGV) Slog.v(TAG, "Have global pss, logging.");
260                        logGlobalMemory();
261                    }
262
263                    long now = SystemClock.uptimeMillis();
264
265                    // See if we should force a reboot.
266                    int rebootInterval = mReqRebootInterval >= 0
267                            ? mReqRebootInterval : Settings.Secure.getInt(
268                            mResolver, Settings.Secure.REBOOT_INTERVAL,
269                            REBOOT_DEFAULT_INTERVAL);
270                    if (mRebootInterval != rebootInterval) {
271                        mRebootInterval = rebootInterval;
272                        // We have been running long enough that a reboot can
273                        // be considered...
274                        checkReboot(false);
275                    }
276
277                    // See if we should check memory conditions.
278                    long memCheckInterval = Settings.Secure.getLong(
279                            mResolver, Settings.Secure.MEMCHECK_INTERVAL,
280                            MEMCHECK_DEFAULT_INTERVAL) * 1000;
281                    if ((mLastMemCheckTime+memCheckInterval) < now) {
282                        // It is now time to collect pss information.  This
283                        // is async so we won't report it now.  And to keep
284                        // things simple, we will assume that everyone has
285                        // reported back by the next MONITOR message.
286                        mLastMemCheckTime = now;
287                        if (localLOGV) Slog.v(TAG, "Collecting memory usage.");
288                        collectMemory();
289                        mHavePss = true;
290
291                        long memCheckRealtimeInterval = Settings.Secure.getLong(
292                                mResolver, Settings.Secure.MEMCHECK_LOG_REALTIME_INTERVAL,
293                                MEMCHECK_DEFAULT_LOG_REALTIME_INTERVAL) * 1000;
294                        long realtimeNow = SystemClock.elapsedRealtime();
295                        if ((mLastMemCheckRealtime+memCheckRealtimeInterval) < realtimeNow) {
296                            mLastMemCheckRealtime = realtimeNow;
297                            if (localLOGV) Slog.v(TAG, "Collecting global memory usage.");
298                            collectGlobalMemory();
299                            mHaveGlobalPss = true;
300                        }
301                    }
302
303                    final int size = mMonitors.size();
304                    for (int i = 0 ; i < size ; i++) {
305                        mCurrentMonitor = mMonitors.get(i);
306                        mCurrentMonitor.monitor();
307                    }
308
309                    synchronized (Watchdog.this) {
310                        mCompleted = true;
311                        mCurrentMonitor = null;
312                    }
313                } break;
314            }
315        }
316    }
317
318    final class GlobalPssCollected implements Runnable {
319        public void run() {
320            mHandler.sendEmptyMessage(GLOBAL_PSS);
321        }
322    }
323
324    final class CheckupReceiver extends BroadcastReceiver {
325        @Override
326        public void onReceive(Context c, Intent intent) {
327            if (localLOGV) Slog.v(TAG, "Alarm went off, checking memory.");
328            checkMemory();
329        }
330    }
331
332    final class RebootReceiver extends BroadcastReceiver {
333        @Override
334        public void onReceive(Context c, Intent intent) {
335            if (localLOGV) Slog.v(TAG, "Alarm went off, checking reboot.");
336            checkReboot(true);
337        }
338    }
339
340    final class RebootRequestReceiver extends BroadcastReceiver {
341        @Override
342        public void onReceive(Context c, Intent intent) {
343            mReqRebootNoWait = intent.getIntExtra("nowait", 0) != 0;
344            mReqRebootInterval = intent.getIntExtra("interval", -1);
345            mReqRebootStartTime = intent.getIntExtra("startTime", -1);
346            mReqRebootWindow = intent.getIntExtra("window", -1);
347            mReqMinScreenOff = intent.getIntExtra("minScreenOff", -1);
348            mReqMinNextAlarm = intent.getIntExtra("minNextAlarm", -1);
349            mReqRecheckInterval = intent.getIntExtra("recheckInterval", -1);
350            EventLog.writeEvent(EventLogTags.WATCHDOG_REQUESTED_REBOOT,
351                    mReqRebootNoWait ? 1 : 0, mReqRebootInterval,
352                            mReqRecheckInterval, mReqRebootStartTime,
353                    mReqRebootWindow, mReqMinScreenOff, mReqMinNextAlarm);
354            checkReboot(true);
355        }
356    }
357
358    public interface Monitor {
359        void monitor();
360    }
361
362    public interface PssRequestor {
363        void requestPss();
364    }
365
366    public class PssStats {
367        public int mEmptyPss;
368        public int mEmptyCount;
369        public int mBackgroundPss;
370        public int mBackgroundCount;
371        public int mServicePss;
372        public int mServiceCount;
373        public int mVisiblePss;
374        public int mVisibleCount;
375        public int mForegroundPss;
376        public int mForegroundCount;
377
378        public int mNoPssCount;
379
380        public int mProcDeaths[] = new int[10];
381    }
382
383    public static Watchdog getInstance() {
384        if (sWatchdog == null) {
385            sWatchdog = new Watchdog();
386        }
387
388        return sWatchdog;
389    }
390
391    private Watchdog() {
392        super("watchdog");
393        mHandler = new HeartbeatHandler();
394        mGlobalPssCollected = new GlobalPssCollected();
395    }
396
397    public void init(Context context, BatteryService battery,
398            PowerManagerService power, AlarmManagerService alarm,
399            ActivityManagerService activity) {
400        mResolver = context.getContentResolver();
401        mBattery = battery;
402        mPower = power;
403        mAlarm = alarm;
404        mActivity = activity;
405
406        context.registerReceiver(new CheckupReceiver(),
407                new IntentFilter(CHECKUP_ACTION));
408        mCheckupIntent = PendingIntent.getBroadcast(context,
409                0, new Intent(CHECKUP_ACTION), 0);
410
411        context.registerReceiver(new RebootReceiver(),
412                new IntentFilter(REBOOT_ACTION));
413        mRebootIntent = PendingIntent.getBroadcast(context,
414                0, new Intent(REBOOT_ACTION), 0);
415
416        context.registerReceiver(new RebootRequestReceiver(),
417                new IntentFilter(Intent.ACTION_REBOOT),
418                android.Manifest.permission.REBOOT, null);
419
420        mBootTime = System.currentTimeMillis();
421    }
422
423    public void processStarted(PssRequestor req, String name, int pid) {
424        synchronized (this) {
425            if ("com.android.phone".equals(name)) {
426                mPhoneReq = req;
427                mPhonePid = pid;
428                mPhonePss = 0;
429            }
430        }
431    }
432
433    public void reportPss(PssRequestor req, String name, int pss) {
434        synchronized (this) {
435            if (mPhoneReq == req) {
436                mPhonePss = pss;
437            }
438        }
439    }
440
441    public void addMonitor(Monitor monitor) {
442        synchronized (this) {
443            if (isAlive()) {
444                throw new RuntimeException("Monitors can't be added while the Watchdog is running");
445            }
446            mMonitors.add(monitor);
447        }
448    }
449
450    /**
451     * Retrieve memory usage information from specific processes being
452     * monitored.  This is an async operation, so must be done before doing
453     * memory checks.
454     */
455    void collectMemory() {
456        synchronized (this) {
457            if (mPhoneReq != null) {
458                mPhoneReq.requestPss();
459            }
460        }
461    }
462
463    /**
464     * Retrieve memory usage over all application processes.  This is an
465     * async operation, so must be done before doing memory checks.
466     */
467    void collectGlobalMemory() {
468        mActivity.requestPss(mGlobalPssCollected);
469    }
470
471    /**
472     * Check memory usage in the system, scheduling kills/reboots as needed.
473     * This always runs on the mHandler thread.
474     */
475    void checkMemory() {
476        boolean needScheduledCheck;
477        long curTime;
478        long nextTime = 0;
479
480        long recheckInterval = Settings.Secure.getLong(
481                mResolver, Settings.Secure.MEMCHECK_RECHECK_INTERVAL,
482                MEMCHECK_DEFAULT_RECHECK_INTERVAL) * 1000;
483
484        mSystemMemMonitor.retrieveSettings(mResolver);
485        mPhoneMemMonitor.retrieveSettings(mResolver);
486        retrieveBrutalityAmount();
487
488        synchronized (this) {
489            curTime = System.currentTimeMillis();
490            mNeedScheduledCheck = false;
491
492            // How is the system doing?
493            if (mSystemMemMonitor.checkLocked(curTime, Process.myPid(),
494                    (int)Process.getPss(Process.myPid()))) {
495                // Not good!  Time to suicide.
496                mForceKillSystem = true;
497                notifyAll();
498                return;
499            }
500
501            // How is the phone process doing?
502            if (mPhoneReq != null) {
503                if (mPhoneMemMonitor.checkLocked(curTime, mPhonePid,
504                        mPhonePss)) {
505                    // Just kill the phone process and let it restart.
506                    Slog.i(TAG, "Watchdog is killing the phone process");
507                    Process.killProcess(mPhonePid);
508                }
509            } else {
510                mPhoneMemMonitor.clear();
511            }
512
513            needScheduledCheck = mNeedScheduledCheck;
514            if (needScheduledCheck) {
515                // Something is going bad, but now is not a good time to
516                // tear things down...  schedule an alarm to check again soon.
517                nextTime = curTime + recheckInterval;
518                if (nextTime < mMemcheckExecStartTime) {
519                    nextTime = mMemcheckExecStartTime;
520                } else if (nextTime >= mMemcheckExecEndTime){
521                    // Need to check during next exec time...  so that needs
522                    // to be computed.
523                    if (localLOGV) Slog.v(TAG, "Computing next time range");
524                    computeMemcheckTimesLocked(nextTime);
525                    nextTime = mMemcheckExecStartTime;
526                }
527
528                if (localLOGV) {
529                    mCalendar.setTimeInMillis(nextTime);
530                    Slog.v(TAG, "Next Alarm Time: " + mCalendar);
531                }
532            }
533        }
534
535        if (needScheduledCheck) {
536            if (localLOGV) Slog.v(TAG, "Scheduling next memcheck alarm for "
537                    + ((nextTime-curTime)/1000/60) + "m from now");
538            mAlarm.remove(mCheckupIntent);
539            mAlarm.set(AlarmManager.RTC_WAKEUP, nextTime, mCheckupIntent);
540        } else {
541            if (localLOGV) Slog.v(TAG, "No need to schedule a memcheck alarm!");
542            mAlarm.remove(mCheckupIntent);
543        }
544    }
545
546    final PssStats mPssStats = new PssStats();
547    final String[] mMemInfoFields = new String[] {
548            "MemFree:", "Buffers:", "Cached:",
549            "Active:", "Inactive:",
550            "AnonPages:", "Mapped:", "Slab:",
551            "SReclaimable:", "SUnreclaim:", "PageTables:" };
552    final long[] mMemInfoSizes = new long[mMemInfoFields.length];
553    final String[] mVMStatFields = new String[] {
554            "pgfree ", "pgactivate ", "pgdeactivate ",
555            "pgfault ", "pgmajfault " };
556    final long[] mVMStatSizes = new long[mVMStatFields.length];
557    final long[] mPrevVMStatSizes = new long[mVMStatFields.length];
558    long mLastLogGlobalMemoryTime;
559
560    void logGlobalMemory() {
561        PssStats stats = mPssStats;
562        mActivity.collectPss(stats);
563        EventLog.writeEvent(EventLogTags.WATCHDOG_PSS_STATS,
564                stats.mEmptyPss, stats.mEmptyCount,
565                stats.mBackgroundPss, stats.mBackgroundCount,
566                stats.mServicePss, stats.mServiceCount,
567                stats.mVisiblePss, stats.mVisibleCount,
568                stats.mForegroundPss, stats.mForegroundCount,
569                stats.mNoPssCount);
570        EventLog.writeEvent(EventLogTags.WATCHDOG_PROC_STATS,
571                stats.mProcDeaths[0], stats.mProcDeaths[1], stats.mProcDeaths[2],
572                stats.mProcDeaths[3], stats.mProcDeaths[4]);
573        Process.readProcLines("/proc/meminfo", mMemInfoFields, mMemInfoSizes);
574        for (int i=0; i<mMemInfoSizes.length; i++) {
575            mMemInfoSizes[i] *= 1024;
576        }
577        EventLog.writeEvent(EventLogTags.WATCHDOG_MEMINFO,
578                (int)mMemInfoSizes[0], (int)mMemInfoSizes[1], (int)mMemInfoSizes[2],
579                (int)mMemInfoSizes[3], (int)mMemInfoSizes[4],
580                (int)mMemInfoSizes[5], (int)mMemInfoSizes[6], (int)mMemInfoSizes[7],
581                (int)mMemInfoSizes[8], (int)mMemInfoSizes[9], (int)mMemInfoSizes[10]);
582        long now = SystemClock.uptimeMillis();
583        long dur = now - mLastLogGlobalMemoryTime;
584        mLastLogGlobalMemoryTime = now;
585        Process.readProcLines("/proc/vmstat", mVMStatFields, mVMStatSizes);
586        for (int i=0; i<mVMStatSizes.length; i++) {
587            long v = mVMStatSizes[i];
588            mVMStatSizes[i] -= mPrevVMStatSizes[i];
589            mPrevVMStatSizes[i] = v;
590        }
591        EventLog.writeEvent(EventLogTags.WATCHDOG_VMSTAT, dur,
592                (int)mVMStatSizes[0], (int)mVMStatSizes[1], (int)mVMStatSizes[2],
593                (int)mVMStatSizes[3], (int)mVMStatSizes[4]);
594    }
595
596    void checkReboot(boolean fromAlarm) {
597        int rebootInterval = mReqRebootInterval >= 0 ? mReqRebootInterval
598                : Settings.Secure.getInt(
599                mResolver, Settings.Secure.REBOOT_INTERVAL,
600                REBOOT_DEFAULT_INTERVAL);
601        mRebootInterval = rebootInterval;
602        if (rebootInterval <= 0) {
603            // No reboot interval requested.
604            if (localLOGV) Slog.v(TAG, "No need to schedule a reboot alarm!");
605            mAlarm.remove(mRebootIntent);
606            return;
607        }
608
609        long rebootStartTime = mReqRebootStartTime >= 0 ? mReqRebootStartTime
610                : Settings.Secure.getLong(
611                mResolver, Settings.Secure.REBOOT_START_TIME,
612                REBOOT_DEFAULT_START_TIME);
613        long rebootWindowMillis = (mReqRebootWindow >= 0 ? mReqRebootWindow
614                : Settings.Secure.getLong(
615                mResolver, Settings.Secure.REBOOT_WINDOW,
616                REBOOT_DEFAULT_WINDOW)) * 1000;
617        long recheckInterval = (mReqRecheckInterval >= 0 ? mReqRecheckInterval
618                : Settings.Secure.getLong(
619                mResolver, Settings.Secure.MEMCHECK_RECHECK_INTERVAL,
620                MEMCHECK_DEFAULT_RECHECK_INTERVAL)) * 1000;
621
622        retrieveBrutalityAmount();
623
624        long realStartTime;
625        long now;
626
627        synchronized (this) {
628            now = System.currentTimeMillis();
629            realStartTime = computeCalendarTime(mCalendar, now,
630                    rebootStartTime);
631
632            long rebootIntervalMillis = rebootInterval*24*60*60*1000;
633            if (DB || mReqRebootNoWait ||
634                    (now-mBootTime) >= (rebootIntervalMillis-rebootWindowMillis)) {
635                if (fromAlarm && rebootWindowMillis <= 0) {
636                    // No reboot window -- just immediately reboot.
637                    EventLog.writeEvent(EventLogTags.WATCHDOG_SCHEDULED_REBOOT, now,
638                            (int)rebootIntervalMillis, (int)rebootStartTime*1000,
639                            (int)rebootWindowMillis, "");
640                    rebootSystem("Checkin scheduled forced");
641                    return;
642                }
643
644                // Are we within the reboot window?
645                if (now < realStartTime) {
646                    // Schedule alarm for next check interval.
647                    realStartTime = computeCalendarTime(mCalendar,
648                            now, rebootStartTime);
649                } else if (now < (realStartTime+rebootWindowMillis)) {
650                    String doit = shouldWeBeBrutalLocked(now);
651                    EventLog.writeEvent(EventLogTags.WATCHDOG_SCHEDULED_REBOOT, now,
652                            (int)rebootInterval, (int)rebootStartTime*1000,
653                            (int)rebootWindowMillis, doit != null ? doit : "");
654                    if (doit == null) {
655                        rebootSystem("Checked scheduled range");
656                        return;
657                    }
658
659                    // Schedule next alarm either within the window or in the
660                    // next interval.
661                    if ((now+recheckInterval) >= (realStartTime+rebootWindowMillis)) {
662                        realStartTime = computeCalendarTime(mCalendar,
663                                now + rebootIntervalMillis, rebootStartTime);
664                    } else {
665                        realStartTime = now + recheckInterval;
666                    }
667                } else {
668                    // Schedule alarm for next check interval.
669                    realStartTime = computeCalendarTime(mCalendar,
670                            now + rebootIntervalMillis, rebootStartTime);
671                }
672            }
673        }
674
675        if (localLOGV) Slog.v(TAG, "Scheduling next reboot alarm for "
676                + ((realStartTime-now)/1000/60) + "m from now");
677        mAlarm.remove(mRebootIntent);
678        mAlarm.set(AlarmManager.RTC_WAKEUP, realStartTime, mRebootIntent);
679    }
680
681    /**
682     * Perform a full reboot of the system.
683     */
684    void rebootSystem(String reason) {
685        Slog.i(TAG, "Rebooting system because: " + reason);
686        PowerManagerService pms = (PowerManagerService) ServiceManager.getService("power");
687        pms.reboot(reason);
688    }
689
690    /**
691     * Load the current Gservices settings for when
692     * {@link #shouldWeBeBrutalLocked} will allow the brutality to happen.
693     * Must not be called with the lock held.
694     */
695    void retrieveBrutalityAmount() {
696        mMinScreenOff = (mReqMinScreenOff >= 0 ? mReqMinScreenOff
697                : Settings.Secure.getInt(
698                mResolver, Settings.Secure.MEMCHECK_MIN_SCREEN_OFF,
699                MEMCHECK_DEFAULT_MIN_SCREEN_OFF)) * 1000;
700        mMinAlarm = (mReqMinNextAlarm >= 0 ? mReqMinNextAlarm
701                : Settings.Secure.getInt(
702                mResolver, Settings.Secure.MEMCHECK_MIN_ALARM,
703                MEMCHECK_DEFAULT_MIN_ALARM)) * 1000;
704    }
705
706    /**
707     * Determine whether it is a good time to kill, crash, or otherwise
708     * plunder the current situation for the overall long-term benefit of
709     * the world.
710     *
711     * @param curTime The current system time.
712     * @return Returns null if this is a good time, else a String with the
713     * text of why it is not a good time.
714     */
715    String shouldWeBeBrutalLocked(long curTime) {
716        if (mBattery == null || !mBattery.isPowered()) {
717            return "battery";
718        }
719
720        if (mMinScreenOff >= 0 && (mPower == null ||
721                mPower.timeSinceScreenOn() < mMinScreenOff)) {
722            return "screen";
723        }
724
725        if (mMinAlarm >= 0 && (mAlarm == null ||
726                mAlarm.timeToNextAlarm() < mMinAlarm)) {
727            return "alarm";
728        }
729
730        return null;
731    }
732
733    /**
734     * Compute the times during which we next would like to perform process
735     * restarts.
736     *
737     * @param curTime The current system time.
738     */
739    void computeMemcheckTimesLocked(long curTime) {
740        if (mMemcheckLastTime == curTime) {
741            return;
742        }
743
744        mMemcheckLastTime = curTime;
745
746        long memcheckExecStartTime = Settings.Secure.getLong(
747                mResolver, Settings.Secure.MEMCHECK_EXEC_START_TIME,
748                MEMCHECK_DEFAULT_EXEC_START_TIME);
749        long memcheckExecEndTime = Settings.Secure.getLong(
750                mResolver, Settings.Secure.MEMCHECK_EXEC_END_TIME,
751                MEMCHECK_DEFAULT_EXEC_END_TIME);
752
753        mMemcheckExecEndTime = computeCalendarTime(mCalendar, curTime,
754                memcheckExecEndTime);
755        if (mMemcheckExecEndTime < curTime) {
756            memcheckExecStartTime += 24*60*60;
757            memcheckExecEndTime += 24*60*60;
758            mMemcheckExecEndTime = computeCalendarTime(mCalendar, curTime,
759                    memcheckExecEndTime);
760        }
761        mMemcheckExecStartTime = computeCalendarTime(mCalendar, curTime,
762                memcheckExecStartTime);
763
764        if (localLOGV) {
765            mCalendar.setTimeInMillis(curTime);
766            Slog.v(TAG, "Current Time: " + mCalendar);
767            mCalendar.setTimeInMillis(mMemcheckExecStartTime);
768            Slog.v(TAG, "Start Check Time: " + mCalendar);
769            mCalendar.setTimeInMillis(mMemcheckExecEndTime);
770            Slog.v(TAG, "End Check Time: " + mCalendar);
771        }
772    }
773
774    static long computeCalendarTime(Calendar c, long curTime,
775            long secondsSinceMidnight) {
776
777        // start with now
778        c.setTimeInMillis(curTime);
779
780        int val = (int)secondsSinceMidnight / (60*60);
781        c.set(Calendar.HOUR_OF_DAY, val);
782        secondsSinceMidnight -= val * (60*60);
783        val = (int)secondsSinceMidnight / 60;
784        c.set(Calendar.MINUTE, val);
785        c.set(Calendar.SECOND, (int)secondsSinceMidnight - (val*60));
786        c.set(Calendar.MILLISECOND, 0);
787
788        long newTime = c.getTimeInMillis();
789        if (newTime < curTime) {
790            // The given time (in seconds since midnight) has already passed for today, so advance
791            // by one day (due to daylight savings, etc., the delta may differ from 24 hours).
792            c.add(Calendar.DAY_OF_MONTH, 1);
793            newTime = c.getTimeInMillis();
794        }
795
796        return newTime;
797    }
798
799    @Override
800    public void run() {
801        boolean waitedHalf = false;
802        while (true) {
803            mCompleted = false;
804            mHandler.sendEmptyMessage(MONITOR);
805
806            synchronized (this) {
807                long timeout = TIME_TO_WAIT;
808
809                // NOTE: We use uptimeMillis() here because we do not want to increment the time we
810                // wait while asleep. If the device is asleep then the thing that we are waiting
811                // to timeout on is asleep as well and won't have a chance to run, causing a false
812                // positive on when to kill things.
813                long start = SystemClock.uptimeMillis();
814                while (timeout > 0 && !mForceKillSystem) {
815                    try {
816                        wait(timeout);  // notifyAll() is called when mForceKillSystem is set
817                    } catch (InterruptedException e) {
818                        Log.wtf(TAG, e);
819                    }
820                    timeout = TIME_TO_WAIT - (SystemClock.uptimeMillis() - start);
821                }
822
823                if (mCompleted && !mForceKillSystem) {
824                    // The monitors have returned.
825                    waitedHalf = false;
826                    continue;
827                }
828
829                if (!waitedHalf) {
830                    // We've waited half the deadlock-detection interval.  Pull a stack
831                    // trace and wait another half.
832                    ArrayList pids = new ArrayList();
833                    pids.add(Process.myPid());
834                    File stack = ActivityManagerService.dumpStackTraces(true, pids);
835                    waitedHalf = true;
836                    continue;
837                }
838            }
839
840            // If we got here, that means that the system is most likely hung.
841            // First collect stack traces from all threads of the system process.
842            // Then kill this process so that the system will restart.
843
844            String name = (mCurrentMonitor != null) ? mCurrentMonitor.getClass().getName() : "null";
845            EventLog.writeEvent(EventLogTags.WATCHDOG, name);
846
847            ArrayList pids = new ArrayList();
848            pids.add(Process.myPid());
849            if (mPhonePid > 0) pids.add(mPhonePid);
850            // Pass !waitedHalf so that just in case we somehow wind up here without having
851            // dumped the halfway stacks, we properly re-initialize the trace file.
852            File stack = ActivityManagerService.dumpStackTraces(!waitedHalf, pids);
853
854            // Give some extra time to make sure the stack traces get written.
855            // The system's been hanging for a minute, another second or two won't hurt much.
856            SystemClock.sleep(2000);
857
858            // Pull our own kernel thread stacks as well if we're configured for that
859            if (RECORD_KERNEL_THREADS) {
860                dumpKernelStackTraces();
861            }
862
863            mActivity.addErrorToDropBox("watchdog", null, null, null, name, null, stack, null);
864
865            // Only kill the process if the debugger is not attached.
866            if (!Debug.isDebuggerConnected()) {
867                Slog.w(TAG, "*** WATCHDOG KILLING SYSTEM PROCESS: " + name);
868                Process.killProcess(Process.myPid());
869                System.exit(10);
870            } else {
871                Slog.w(TAG, "Debugger connected: Watchdog is *not* killing the system process");
872            }
873
874            waitedHalf = false;
875        }
876    }
877
878    private File dumpKernelStackTraces() {
879        String tracesPath = SystemProperties.get("dalvik.vm.stack-trace-file", null);
880        if (tracesPath == null || tracesPath.length() == 0) {
881            return null;
882        }
883
884        native_dumpKernelStacks(tracesPath);
885        return new File(tracesPath);
886    }
887
888    private native void native_dumpKernelStacks(String tracesPath);
889}
890