1/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.server;
18
19import static com.android.server.pm.PackageManagerServiceUtils.logCriticalInfo;
20
21import android.content.ContentResolver;
22import android.content.Context;
23import android.os.Build;
24import android.os.Environment;
25import android.os.FileUtils;
26import android.os.RecoverySystem;
27import android.os.SystemClock;
28import android.os.SystemProperties;
29import android.os.UserHandle;
30import android.provider.Settings;
31import android.text.format.DateUtils;
32import android.util.ExceptionUtils;
33import android.util.Log;
34import android.util.MathUtils;
35import android.util.Slog;
36import android.util.SparseArray;
37
38import com.android.internal.util.ArrayUtils;
39import com.android.server.pm.PackageManagerService;
40
41import java.io.File;
42
43/**
44 * Utilities to help rescue the system from crash loops. Callers are expected to
45 * report boot events and persistent app crashes, and if they happen frequently
46 * enough this class will slowly escalate through several rescue operations
47 * before finally rebooting and prompting the user if they want to wipe data as
48 * a last resort.
49 *
50 * @hide
51 */
52public class RescueParty {
53    private static final String TAG = "RescueParty";
54
55    private static final String PROP_ENABLE_RESCUE = "persist.sys.enable_rescue";
56    private static final String PROP_DISABLE_RESCUE = "persist.sys.disable_rescue";
57    private static final String PROP_RESCUE_LEVEL = "sys.rescue_level";
58    private static final String PROP_RESCUE_BOOT_COUNT = "sys.rescue_boot_count";
59    private static final String PROP_RESCUE_BOOT_START = "sys.rescue_boot_start";
60    private static final String PROP_VIRTUAL_DEVICE = "ro.hardware.virtual_device";
61
62    private static final int LEVEL_NONE = 0;
63    private static final int LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS = 1;
64    private static final int LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES = 2;
65    private static final int LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS = 3;
66    private static final int LEVEL_FACTORY_RESET = 4;
67
68    /** Threshold for boot loops */
69    private static final Threshold sBoot = new BootThreshold();
70    /** Threshold for app crash loops */
71    private static SparseArray<Threshold> sApps = new SparseArray<>();
72
73    private static boolean isDisabled() {
74        // Check if we're explicitly enabled for testing
75        if (SystemProperties.getBoolean(PROP_ENABLE_RESCUE, false)) {
76            return false;
77        }
78
79        // We're disabled on all engineering devices
80        if (Build.IS_ENG) {
81            Slog.v(TAG, "Disabled because of eng build");
82            return true;
83        }
84
85        // We're disabled on userdebug devices connected over USB, since that's
86        // a decent signal that someone is actively trying to debug the device,
87        // or that it's in a lab environment.
88        if (Build.IS_USERDEBUG && isUsbActive()) {
89            Slog.v(TAG, "Disabled because of active USB connection");
90            return true;
91        }
92
93        // One last-ditch check
94        if (SystemProperties.getBoolean(PROP_DISABLE_RESCUE, false)) {
95            Slog.v(TAG, "Disabled because of manual property");
96            return true;
97        }
98
99        return false;
100    }
101
102    /**
103     * Take note of a boot event. If we notice too many of these events
104     * happening in rapid succession, we'll send out a rescue party.
105     */
106    public static void noteBoot(Context context) {
107        if (isDisabled()) return;
108        if (sBoot.incrementAndTest()) {
109            sBoot.reset();
110            incrementRescueLevel(sBoot.uid);
111            executeRescueLevel(context);
112        }
113    }
114
115    /**
116     * Take note of a persistent app crash. If we notice too many of these
117     * events happening in rapid succession, we'll send out a rescue party.
118     */
119    public static void notePersistentAppCrash(Context context, int uid) {
120        if (isDisabled()) return;
121        Threshold t = sApps.get(uid);
122        if (t == null) {
123            t = new AppThreshold(uid);
124            sApps.put(uid, t);
125        }
126        if (t.incrementAndTest()) {
127            t.reset();
128            incrementRescueLevel(t.uid);
129            executeRescueLevel(context);
130        }
131    }
132
133    /**
134     * Check if we're currently attempting to reboot for a factory reset.
135     */
136    public static boolean isAttemptingFactoryReset() {
137        return SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) == LEVEL_FACTORY_RESET;
138    }
139
140    /**
141     * Escalate to the next rescue level. After incrementing the level you'll
142     * probably want to call {@link #executeRescueLevel(Context)}.
143     */
144    private static void incrementRescueLevel(int triggerUid) {
145        final int level = MathUtils.constrain(
146                SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) + 1,
147                LEVEL_NONE, LEVEL_FACTORY_RESET);
148        SystemProperties.set(PROP_RESCUE_LEVEL, Integer.toString(level));
149
150        EventLogTags.writeRescueLevel(level, triggerUid);
151        logCriticalInfo(Log.WARN, "Incremented rescue level to "
152                + levelToString(level) + " triggered by UID " + triggerUid);
153    }
154
155    /**
156     * Called when {@code SettingsProvider} has been published, which is a good
157     * opportunity to reset any settings depending on our rescue level.
158     */
159    public static void onSettingsProviderPublished(Context context) {
160        executeRescueLevel(context);
161    }
162
163    private static void executeRescueLevel(Context context) {
164        final int level = SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE);
165        if (level == LEVEL_NONE) return;
166
167        Slog.w(TAG, "Attempting rescue level " + levelToString(level));
168        try {
169            executeRescueLevelInternal(context, level);
170            EventLogTags.writeRescueSuccess(level);
171            logCriticalInfo(Log.DEBUG,
172                    "Finished rescue level " + levelToString(level));
173        } catch (Throwable t) {
174            final String msg = ExceptionUtils.getCompleteMessage(t);
175            EventLogTags.writeRescueFailure(level, msg);
176            logCriticalInfo(Log.ERROR,
177                    "Failed rescue level " + levelToString(level) + ": " + msg);
178        }
179    }
180
181    private static void executeRescueLevelInternal(Context context, int level) throws Exception {
182        switch (level) {
183            case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS:
184                resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_DEFAULTS);
185                break;
186            case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES:
187                resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_CHANGES);
188                break;
189            case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS:
190                resetAllSettings(context, Settings.RESET_MODE_TRUSTED_DEFAULTS);
191                break;
192            case LEVEL_FACTORY_RESET:
193                RecoverySystem.rebootPromptAndWipeUserData(context, TAG);
194                break;
195        }
196    }
197
198    private static void resetAllSettings(Context context, int mode) throws Exception {
199        // Try our best to reset all settings possible, and once finished
200        // rethrow any exception that we encountered
201        Exception res = null;
202        final ContentResolver resolver = context.getContentResolver();
203        try {
204            Settings.Global.resetToDefaultsAsUser(resolver, null, mode, UserHandle.USER_SYSTEM);
205        } catch (Throwable t) {
206            res = new RuntimeException("Failed to reset global settings", t);
207        }
208        for (int userId : getAllUserIds()) {
209            try {
210                Settings.Secure.resetToDefaultsAsUser(resolver, null, mode, userId);
211            } catch (Throwable t) {
212                res = new RuntimeException("Failed to reset secure settings for " + userId, t);
213            }
214        }
215        if (res != null) {
216            throw res;
217        }
218    }
219
220    /**
221     * Threshold that can be triggered if a number of events occur within a
222     * window of time.
223     */
224    private abstract static class Threshold {
225        public abstract int getCount();
226        public abstract void setCount(int count);
227        public abstract long getStart();
228        public abstract void setStart(long start);
229
230        private final int uid;
231        private final int triggerCount;
232        private final long triggerWindow;
233
234        public Threshold(int uid, int triggerCount, long triggerWindow) {
235            this.uid = uid;
236            this.triggerCount = triggerCount;
237            this.triggerWindow = triggerWindow;
238        }
239
240        public void reset() {
241            setCount(0);
242            setStart(0);
243        }
244
245        /**
246         * @return if this threshold has been triggered
247         */
248        public boolean incrementAndTest() {
249            final long now = SystemClock.elapsedRealtime();
250            final long window = now - getStart();
251            if (window > triggerWindow) {
252                setCount(1);
253                setStart(now);
254                return false;
255            } else {
256                int count = getCount() + 1;
257                setCount(count);
258                EventLogTags.writeRescueNote(uid, count, window);
259                Slog.w(TAG, "Noticed " + count + " events for UID " + uid + " in last "
260                        + (window / 1000) + " sec");
261                return (count >= triggerCount);
262            }
263        }
264    }
265
266    /**
267     * Specialization of {@link Threshold} for monitoring boot events. It stores
268     * counters in system properties for robustness.
269     */
270    private static class BootThreshold extends Threshold {
271        public BootThreshold() {
272            // We're interested in 5 events in any 300 second period; this
273            // window is super relaxed because booting can take a long time if
274            // forced to dexopt things.
275            super(android.os.Process.ROOT_UID, 5, 300 * DateUtils.SECOND_IN_MILLIS);
276        }
277
278        @Override
279        public int getCount() {
280            return SystemProperties.getInt(PROP_RESCUE_BOOT_COUNT, 0);
281        }
282
283        @Override
284        public void setCount(int count) {
285            SystemProperties.set(PROP_RESCUE_BOOT_COUNT, Integer.toString(count));
286        }
287
288        @Override
289        public long getStart() {
290            return SystemProperties.getLong(PROP_RESCUE_BOOT_START, 0);
291        }
292
293        @Override
294        public void setStart(long start) {
295            SystemProperties.set(PROP_RESCUE_BOOT_START, Long.toString(start));
296        }
297    }
298
299    /**
300     * Specialization of {@link Threshold} for monitoring app crashes. It stores
301     * counters in memory.
302     */
303    private static class AppThreshold extends Threshold {
304        private int count;
305        private long start;
306
307        public AppThreshold(int uid) {
308            // We're interested in 5 events in any 30 second period; apps crash
309            // pretty quickly so we can keep a tight leash on them.
310            super(uid, 5, 30 * DateUtils.SECOND_IN_MILLIS);
311        }
312
313        @Override public int getCount() { return count; }
314        @Override public void setCount(int count) { this.count = count; }
315        @Override public long getStart() { return start; }
316        @Override public void setStart(long start) { this.start = start; }
317    }
318
319    private static int[] getAllUserIds() {
320        int[] userIds = { UserHandle.USER_SYSTEM };
321        try {
322            for (File file : FileUtils.listFilesOrEmpty(Environment.getDataSystemDeDirectory())) {
323                try {
324                    final int userId = Integer.parseInt(file.getName());
325                    if (userId != UserHandle.USER_SYSTEM) {
326                        userIds = ArrayUtils.appendInt(userIds, userId);
327                    }
328                } catch (NumberFormatException ignored) {
329                }
330            }
331        } catch (Throwable t) {
332            Slog.w(TAG, "Trouble discovering users", t);
333        }
334        return userIds;
335    }
336
337    /**
338     * Hacky test to check if the device has an active USB connection, which is
339     * a good proxy for someone doing local development work.
340     */
341    private static boolean isUsbActive() {
342        if (SystemProperties.getBoolean(PROP_VIRTUAL_DEVICE, false)) {
343            Slog.v(TAG, "Assuming virtual device is connected over USB");
344            return true;
345        }
346        try {
347            final String state = FileUtils
348                    .readTextFile(new File("/sys/class/android_usb/android0/state"), 128, "");
349            return "CONFIGURED".equals(state.trim());
350        } catch (Throwable t) {
351            Slog.w(TAG, "Failed to determine if device was on USB", t);
352            return false;
353        }
354    }
355
356    private static String levelToString(int level) {
357        switch (level) {
358            case LEVEL_NONE: return "NONE";
359            case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS: return "RESET_SETTINGS_UNTRUSTED_DEFAULTS";
360            case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES: return "RESET_SETTINGS_UNTRUSTED_CHANGES";
361            case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS: return "RESET_SETTINGS_TRUSTED_DEFAULTS";
362            case LEVEL_FACTORY_RESET: return "FACTORY_RESET";
363            default: return Integer.toString(level);
364        }
365    }
366}
367