1/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.server;
18
19import android.content.ContentResolver;
20import android.content.Context;
21import android.os.Build;
22import android.os.Environment;
23import android.os.FileUtils;
24import android.os.RecoverySystem;
25import android.os.SystemClock;
26import android.os.SystemProperties;
27import android.os.UserHandle;
28import android.provider.Settings;
29import android.text.format.DateUtils;
30import android.util.ExceptionUtils;
31import android.util.Log;
32import android.util.MathUtils;
33import android.util.Slog;
34import android.util.SparseArray;
35
36import com.android.internal.util.ArrayUtils;
37import com.android.server.pm.PackageManagerService;
38
39import java.io.File;
40
41/**
42 * Utilities to help rescue the system from crash loops. Callers are expected to
43 * report boot events and persistent app crashes, and if they happen frequently
44 * enough this class will slowly escalate through several rescue operations
45 * before finally rebooting and prompting the user if they want to wipe data as
46 * a last resort.
47 *
48 * @hide
49 */
50public class RescueParty {
51    private static final String TAG = "RescueParty";
52
53    private static final String PROP_ENABLE_RESCUE = "persist.sys.enable_rescue";
54    private static final String PROP_DISABLE_RESCUE = "persist.sys.disable_rescue";
55    private static final String PROP_RESCUE_LEVEL = "sys.rescue_level";
56    private static final String PROP_RESCUE_BOOT_COUNT = "sys.rescue_boot_count";
57    private static final String PROP_RESCUE_BOOT_START = "sys.rescue_boot_start";
58    private static final String PROP_VIRTUAL_DEVICE = "ro.hardware.virtual_device";
59
60    private static final int LEVEL_NONE = 0;
61    private static final int LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS = 1;
62    private static final int LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES = 2;
63    private static final int LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS = 3;
64    private static final int LEVEL_FACTORY_RESET = 4;
65
66    /** Threshold for boot loops */
67    private static final Threshold sBoot = new BootThreshold();
68    /** Threshold for app crash loops */
69    private static SparseArray<Threshold> sApps = new SparseArray<>();
70
71    private static boolean isDisabled() {
72        // Check if we're explicitly enabled for testing
73        if (SystemProperties.getBoolean(PROP_ENABLE_RESCUE, false)) {
74            return false;
75        }
76
77        // We're disabled on all engineering devices
78        if (Build.IS_ENG) {
79            Slog.v(TAG, "Disabled because of eng build");
80            return true;
81        }
82
83        // We're disabled on userdebug devices connected over USB, since that's
84        // a decent signal that someone is actively trying to debug the device,
85        // or that it's in a lab environment.
86        if (Build.IS_USERDEBUG && isUsbActive()) {
87            Slog.v(TAG, "Disabled because of active USB connection");
88            return true;
89        }
90
91        // One last-ditch check
92        if (SystemProperties.getBoolean(PROP_DISABLE_RESCUE, false)) {
93            Slog.v(TAG, "Disabled because of manual property");
94            return true;
95        }
96
97        return false;
98    }
99
100    /**
101     * Take note of a boot event. If we notice too many of these events
102     * happening in rapid succession, we'll send out a rescue party.
103     */
104    public static void noteBoot(Context context) {
105        if (isDisabled()) return;
106        if (sBoot.incrementAndTest()) {
107            sBoot.reset();
108            incrementRescueLevel(sBoot.uid);
109            executeRescueLevel(context);
110        }
111    }
112
113    /**
114     * Take note of a persistent app crash. If we notice too many of these
115     * events happening in rapid succession, we'll send out a rescue party.
116     */
117    public static void notePersistentAppCrash(Context context, int uid) {
118        if (isDisabled()) return;
119        Threshold t = sApps.get(uid);
120        if (t == null) {
121            t = new AppThreshold(uid);
122            sApps.put(uid, t);
123        }
124        if (t.incrementAndTest()) {
125            t.reset();
126            incrementRescueLevel(t.uid);
127            executeRescueLevel(context);
128        }
129    }
130
131    /**
132     * Check if we're currently attempting to reboot for a factory reset.
133     */
134    public static boolean isAttemptingFactoryReset() {
135        return SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) == LEVEL_FACTORY_RESET;
136    }
137
138    /**
139     * Escalate to the next rescue level. After incrementing the level you'll
140     * probably want to call {@link #executeRescueLevel(Context)}.
141     */
142    private static void incrementRescueLevel(int triggerUid) {
143        final int level = MathUtils.constrain(
144                SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) + 1,
145                LEVEL_NONE, LEVEL_FACTORY_RESET);
146        SystemProperties.set(PROP_RESCUE_LEVEL, Integer.toString(level));
147
148        EventLogTags.writeRescueLevel(level, triggerUid);
149        PackageManagerService.logCriticalInfo(Log.WARN, "Incremented rescue level to "
150                + levelToString(level) + " triggered by UID " + triggerUid);
151    }
152
153    /**
154     * Called when {@code SettingsProvider} has been published, which is a good
155     * opportunity to reset any settings depending on our rescue level.
156     */
157    public static void onSettingsProviderPublished(Context context) {
158        executeRescueLevel(context);
159    }
160
161    private static void executeRescueLevel(Context context) {
162        final int level = SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE);
163        if (level == LEVEL_NONE) return;
164
165        Slog.w(TAG, "Attempting rescue level " + levelToString(level));
166        try {
167            executeRescueLevelInternal(context, level);
168            EventLogTags.writeRescueSuccess(level);
169            PackageManagerService.logCriticalInfo(Log.DEBUG,
170                    "Finished rescue level " + levelToString(level));
171        } catch (Throwable t) {
172            final String msg = ExceptionUtils.getCompleteMessage(t);
173            EventLogTags.writeRescueFailure(level, msg);
174            PackageManagerService.logCriticalInfo(Log.ERROR,
175                    "Failed rescue level " + levelToString(level) + ": " + msg);
176        }
177    }
178
179    private static void executeRescueLevelInternal(Context context, int level) throws Exception {
180        switch (level) {
181            case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS:
182                resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_DEFAULTS);
183                break;
184            case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES:
185                resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_CHANGES);
186                break;
187            case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS:
188                resetAllSettings(context, Settings.RESET_MODE_TRUSTED_DEFAULTS);
189                break;
190            case LEVEL_FACTORY_RESET:
191                RecoverySystem.rebootPromptAndWipeUserData(context, TAG);
192                break;
193        }
194    }
195
196    private static void resetAllSettings(Context context, int mode) throws Exception {
197        // Try our best to reset all settings possible, and once finished
198        // rethrow any exception that we encountered
199        Exception res = null;
200        final ContentResolver resolver = context.getContentResolver();
201        try {
202            Settings.Global.resetToDefaultsAsUser(resolver, null, mode, UserHandle.USER_SYSTEM);
203        } catch (Throwable t) {
204            res = new RuntimeException("Failed to reset global settings", t);
205        }
206        for (int userId : getAllUserIds()) {
207            try {
208                Settings.Secure.resetToDefaultsAsUser(resolver, null, mode, userId);
209            } catch (Throwable t) {
210                res = new RuntimeException("Failed to reset secure settings for " + userId, t);
211            }
212        }
213        if (res != null) {
214            throw res;
215        }
216    }
217
218    /**
219     * Threshold that can be triggered if a number of events occur within a
220     * window of time.
221     */
222    private abstract static class Threshold {
223        public abstract int getCount();
224        public abstract void setCount(int count);
225        public abstract long getStart();
226        public abstract void setStart(long start);
227
228        private final int uid;
229        private final int triggerCount;
230        private final long triggerWindow;
231
232        public Threshold(int uid, int triggerCount, long triggerWindow) {
233            this.uid = uid;
234            this.triggerCount = triggerCount;
235            this.triggerWindow = triggerWindow;
236        }
237
238        public void reset() {
239            setCount(0);
240            setStart(0);
241        }
242
243        /**
244         * @return if this threshold has been triggered
245         */
246        public boolean incrementAndTest() {
247            final long now = SystemClock.elapsedRealtime();
248            final long window = now - getStart();
249            if (window > triggerWindow) {
250                setCount(1);
251                setStart(now);
252                return false;
253            } else {
254                int count = getCount() + 1;
255                setCount(count);
256                EventLogTags.writeRescueNote(uid, count, window);
257                Slog.w(TAG, "Noticed " + count + " events for UID " + uid + " in last "
258                        + (window / 1000) + " sec");
259                return (count >= triggerCount);
260            }
261        }
262    }
263
264    /**
265     * Specialization of {@link Threshold} for monitoring boot events. It stores
266     * counters in system properties for robustness.
267     */
268    private static class BootThreshold extends Threshold {
269        public BootThreshold() {
270            // We're interested in 5 events in any 300 second period; this
271            // window is super relaxed because booting can take a long time if
272            // forced to dexopt things.
273            super(android.os.Process.ROOT_UID, 5, 300 * DateUtils.SECOND_IN_MILLIS);
274        }
275
276        @Override
277        public int getCount() {
278            return SystemProperties.getInt(PROP_RESCUE_BOOT_COUNT, 0);
279        }
280
281        @Override
282        public void setCount(int count) {
283            SystemProperties.set(PROP_RESCUE_BOOT_COUNT, Integer.toString(count));
284        }
285
286        @Override
287        public long getStart() {
288            return SystemProperties.getLong(PROP_RESCUE_BOOT_START, 0);
289        }
290
291        @Override
292        public void setStart(long start) {
293            SystemProperties.set(PROP_RESCUE_BOOT_START, Long.toString(start));
294        }
295    }
296
297    /**
298     * Specialization of {@link Threshold} for monitoring app crashes. It stores
299     * counters in memory.
300     */
301    private static class AppThreshold extends Threshold {
302        private int count;
303        private long start;
304
305        public AppThreshold(int uid) {
306            // We're interested in 5 events in any 30 second period; apps crash
307            // pretty quickly so we can keep a tight leash on them.
308            super(uid, 5, 30 * DateUtils.SECOND_IN_MILLIS);
309        }
310
311        @Override public int getCount() { return count; }
312        @Override public void setCount(int count) { this.count = count; }
313        @Override public long getStart() { return start; }
314        @Override public void setStart(long start) { this.start = start; }
315    }
316
317    private static int[] getAllUserIds() {
318        int[] userIds = { UserHandle.USER_SYSTEM };
319        try {
320            for (File file : FileUtils.listFilesOrEmpty(Environment.getDataSystemDeDirectory())) {
321                try {
322                    final int userId = Integer.parseInt(file.getName());
323                    if (userId != UserHandle.USER_SYSTEM) {
324                        userIds = ArrayUtils.appendInt(userIds, userId);
325                    }
326                } catch (NumberFormatException ignored) {
327                }
328            }
329        } catch (Throwable t) {
330            Slog.w(TAG, "Trouble discovering users", t);
331        }
332        return userIds;
333    }
334
335    /**
336     * Hacky test to check if the device has an active USB connection, which is
337     * a good proxy for someone doing local development work.
338     */
339    private static boolean isUsbActive() {
340        if (SystemProperties.getBoolean(PROP_VIRTUAL_DEVICE, false)) {
341            Slog.v(TAG, "Assuming virtual device is connected over USB");
342            return true;
343        }
344        try {
345            final String state = FileUtils
346                    .readTextFile(new File("/sys/class/android_usb/android0/state"), 128, "");
347            return "CONFIGURED".equals(state.trim());
348        } catch (Throwable t) {
349            Slog.w(TAG, "Failed to determine if device was on USB", t);
350            return false;
351        }
352    }
353
354    private static String levelToString(int level) {
355        switch (level) {
356            case LEVEL_NONE: return "NONE";
357            case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS: return "RESET_SETTINGS_UNTRUSTED_DEFAULTS";
358            case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES: return "RESET_SETTINGS_UNTRUSTED_CHANGES";
359            case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS: return "RESET_SETTINGS_TRUSTED_DEFAULTS";
360            case LEVEL_FACTORY_RESET: return "FACTORY_RESET";
361            default: return Integer.toString(level);
362        }
363    }
364}
365