1/*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.server.devicepolicy;
18
19import static java.util.concurrent.TimeUnit.MILLISECONDS;
20import static java.util.concurrent.TimeUnit.NANOSECONDS;
21
22import android.app.admin.DeviceAdminReceiver;
23import android.app.admin.SecurityLog;
24import android.app.admin.SecurityLog.SecurityEvent;
25import android.os.Process;
26import android.os.SystemClock;
27import android.util.Log;
28import android.util.Slog;
29
30import com.android.internal.annotations.GuardedBy;
31import com.android.internal.annotations.VisibleForTesting;
32
33import java.io.IOException;
34import java.util.ArrayList;
35import java.util.List;
36import java.util.concurrent.Semaphore;
37import java.util.concurrent.TimeUnit;
38import java.util.concurrent.locks.Lock;
39import java.util.concurrent.locks.ReentrantLock;
40
41/**
42 * A class managing access to the security logs. It maintains an internal buffer of pending
43 * logs to be retrieved by the device owner. The logs are retrieved from the logd daemon via
44 * JNI binding, and kept until device owner has retrieved to prevent loss of logs. Access to
45 * the logs from the device owner is rate-limited, and device owner is notified when the logs
46 * are ready to be retrieved. This happens every two hours, or when our internal buffer is
47 * larger than a certain threshold.
48 */
49class SecurityLogMonitor implements Runnable {
50    private final DevicePolicyManagerService mService;
51
52    private final Lock mLock = new ReentrantLock();
53
54    SecurityLogMonitor(DevicePolicyManagerService service) {
55        this(service, 0 /* id */);
56    }
57
58    @VisibleForTesting
59    SecurityLogMonitor(DevicePolicyManagerService service, long id) {
60        this.mService = service;
61        this.mId = id;
62        this.mLastForceNanos = System.nanoTime();
63    }
64
65    private static final boolean DEBUG = false;  // STOPSHIP if true.
66    private static final String TAG = "SecurityLogMonitor";
67    /**
68     * Each log entry can hold up to 4K bytes (but as of {@link android.os.Build.VERSION_CODES#N}
69     * it should be less than 100 bytes), setting 1024 entries as the threshold to notify Device
70     * Owner.
71     */
72    @VisibleForTesting static final int BUFFER_ENTRIES_NOTIFICATION_LEVEL = 1024;
73    /**
74     * The maximum number of entries we should store before dropping earlier logs, to limit the
75     * memory usage.
76     */
77    private static final int BUFFER_ENTRIES_MAXIMUM_LEVEL = BUFFER_ENTRIES_NOTIFICATION_LEVEL * 10;
78    /**
79     * Critical log buffer level, 90% of capacity.
80     */
81    private static final int BUFFER_ENTRIES_CRITICAL_LEVEL = BUFFER_ENTRIES_MAXIMUM_LEVEL * 9 / 10;
82    /**
83     * How often should Device Owner be notified under normal circumstances.
84     */
85    private static final long RATE_LIMIT_INTERVAL_MS = TimeUnit.HOURS.toMillis(2);
86    /**
87     * How often to retry the notification about available logs if it is ignored or missed by DO.
88     */
89    private static final long BROADCAST_RETRY_INTERVAL_MS = TimeUnit.MINUTES.toMillis(30);
90    /**
91     * Internally how often should the monitor poll the security logs from logd.
92     */
93    private static final long POLLING_INTERVAL_MS = TimeUnit.MINUTES.toMillis(1);
94    /**
95     * Overlap between two subsequent log requests, required to avoid losing out of order events.
96     */
97    private static final long OVERLAP_NS = TimeUnit.SECONDS.toNanos(3);
98
99    /** Minimum time between forced fetch attempts. */
100    private static final long FORCE_FETCH_THROTTLE_NS = TimeUnit.SECONDS.toNanos(10);
101
102    @GuardedBy("mLock")
103    private Thread mMonitorThread = null;
104    @GuardedBy("mLock")
105    private ArrayList<SecurityEvent> mPendingLogs = new ArrayList<>();
106    @GuardedBy("mLock")
107    private long mId;
108    @GuardedBy("mLock")
109    private boolean mAllowedToRetrieve = false;
110
111    // Whether we have already logged the fact that log buffer reached 90%, to avoid dupes.
112    @GuardedBy("mLock")
113    private boolean mCriticalLevelLogged = false;
114
115    /**
116     * Last events fetched from log to check for overlap between batches. We can leave it empty if
117     * we are sure there will be no overlap anymore, e.g. when we get empty batch.
118     */
119    private final ArrayList<SecurityEvent> mLastEvents = new ArrayList<>();
120    /** Timestamp of the very last event, -1 means request from the beginning of time. */
121    private long mLastEventNanos = -1;
122
123    /**
124     * When DO will be allowed to retrieve the log, in milliseconds since boot (as per
125     * {@link SystemClock#elapsedRealtime()}). After that it will mark the time to retry broadcast.
126     */
127    @GuardedBy("mLock")
128    private long mNextAllowedRetrievalTimeMillis = -1;
129    @GuardedBy("mLock")
130    private boolean mPaused = false;
131
132    /** Semaphore used to force log fetching on request from adb. */
133    private final Semaphore mForceSemaphore = new Semaphore(0 /* permits */);
134
135    /** The last timestamp when force fetch was used, to prevent abuse. */
136    @GuardedBy("mForceSemaphore")
137    private long mLastForceNanos = 0;
138
139    void start() {
140        Slog.i(TAG, "Starting security logging.");
141        SecurityLog.writeEvent(SecurityLog.TAG_LOGGING_STARTED);
142        mLock.lock();
143        try {
144            if (mMonitorThread == null) {
145                mPendingLogs = new ArrayList<>();
146                mCriticalLevelLogged = false;
147                mId = 0;
148                mAllowedToRetrieve = false;
149                mNextAllowedRetrievalTimeMillis = -1;
150                mPaused = false;
151
152                mMonitorThread = new Thread(this);
153                mMonitorThread.start();
154            }
155        } finally {
156            mLock.unlock();
157        }
158    }
159
160    void stop() {
161        Slog.i(TAG, "Stopping security logging.");
162        SecurityLog.writeEvent(SecurityLog.TAG_LOGGING_STOPPED);
163        mLock.lock();
164        try {
165            if (mMonitorThread != null) {
166                mMonitorThread.interrupt();
167                try {
168                    mMonitorThread.join(TimeUnit.SECONDS.toMillis(5));
169                } catch (InterruptedException e) {
170                    Log.e(TAG, "Interrupted while waiting for thread to stop", e);
171                }
172                // Reset state and clear buffer
173                mPendingLogs = new ArrayList<>();
174                mId = 0;
175                mAllowedToRetrieve = false;
176                mNextAllowedRetrievalTimeMillis = -1;
177                mPaused = false;
178                mMonitorThread = null;
179            }
180        } finally {
181            mLock.unlock();
182        }
183    }
184
185    /**
186     * If logs are being collected, keep collecting them but stop notifying the device owner that
187     * new logs are available (since they cannot be retrieved).
188     */
189    void pause() {
190        Slog.i(TAG, "Paused.");
191
192        mLock.lock();
193        mPaused = true;
194        mAllowedToRetrieve = false;
195        mLock.unlock();
196    }
197
198    /**
199     * If logs are being collected, start notifying the device owner when logs are ready to be
200     * retrieved again (if it was paused).
201     * <p>If logging is enabled and there are logs ready to be retrieved, this method will attempt
202     * to notify the device owner. Therefore calling identity should be cleared before calling it
203     * (in case the method is called from a user other than the DO's user).
204     */
205    void resume() {
206        mLock.lock();
207        try {
208            if (!mPaused) {
209                Log.d(TAG, "Attempted to resume, but logging is not paused.");
210                return;
211            }
212            mPaused = false;
213            mAllowedToRetrieve = false;
214        } finally {
215            mLock.unlock();
216        }
217
218        Slog.i(TAG, "Resumed.");
219        try {
220            notifyDeviceOwnerIfNeeded(false /* force */);
221        } catch (InterruptedException e) {
222            Log.w(TAG, "Thread interrupted.", e);
223        }
224    }
225
226    /**
227     * Discard all collected logs.
228     */
229    void discardLogs() {
230        mLock.lock();
231        mAllowedToRetrieve = false;
232        mPendingLogs = new ArrayList<>();
233        mCriticalLevelLogged = false;
234        mLock.unlock();
235        Slog.i(TAG, "Discarded all logs.");
236    }
237
238    /**
239     * Returns the new batch of logs since the last call to this method. Returns null if
240     * rate limit is exceeded.
241     */
242    List<SecurityEvent> retrieveLogs() {
243        mLock.lock();
244        try {
245            if (mAllowedToRetrieve) {
246                mAllowedToRetrieve = false;
247                mNextAllowedRetrievalTimeMillis = SystemClock.elapsedRealtime()
248                        + RATE_LIMIT_INTERVAL_MS;
249                List<SecurityEvent> result = mPendingLogs;
250                mPendingLogs = new ArrayList<>();
251                mCriticalLevelLogged = false;
252                return result;
253            } else {
254                return null;
255            }
256        } finally {
257            mLock.unlock();
258        }
259    }
260
261    /**
262     * Requests the next (or the first) batch of events from the log with appropriate timestamp.
263     */
264    private void getNextBatch(ArrayList<SecurityEvent> newLogs) throws IOException {
265        if (mLastEventNanos < 0) {
266            // Non-blocking read that returns all logs immediately.
267            if (DEBUG) Slog.d(TAG, "SecurityLog.readEvents");
268            SecurityLog.readEvents(newLogs);
269        } else {
270            // If we have last events from the previous batch, request log events with time overlap
271            // with previously retrieved messages to avoid losing events due to reordering in logd.
272            final long startNanos = mLastEvents.isEmpty()
273                    ? mLastEventNanos : Math.max(0, mLastEventNanos - OVERLAP_NS);
274            if (DEBUG) Slog.d(TAG, "SecurityLog.readEventsSince: " + startNanos);
275            // Non-blocking read that returns all logs with timestamps >= startNanos immediately.
276            SecurityLog.readEventsSince(startNanos, newLogs);
277        }
278
279        // Sometimes events may be reordered in logd due to simultaneous readers and writers. In
280        // this case, we have to sort it to make overlap checking work. This is very unlikely.
281        for (int i = 0; i < newLogs.size() - 1; i++) {
282            if (newLogs.get(i).getTimeNanos() > newLogs.get(i+1).getTimeNanos()) {
283                if (DEBUG) Slog.d(TAG, "Got out of order events, sorting.");
284                // Sort using comparator that compares timestamps.
285                newLogs.sort((e1, e2) -> Long.signum(e1.getTimeNanos() - e2.getTimeNanos()));
286                break;
287            }
288        }
289
290        if (DEBUG) Slog.d(TAG, "Got " + newLogs.size() + " new events.");
291    }
292
293    /**
294     * Save the last events for overlap checking with the next batch.
295     */
296    private void saveLastEvents(ArrayList<SecurityEvent> newLogs) {
297        mLastEvents.clear();
298        if (newLogs.isEmpty()) {
299            // This can happen if no events were logged yet or the buffer got cleared. In this case
300            // we aren't going to have any overlap next time, leave mLastEvents events empty.
301            return;
302        }
303
304        // Save the last timestamp.
305        mLastEventNanos = newLogs.get(newLogs.size() - 1).getTimeNanos();
306        // Position of the earliest event that has to be saved. Start from the penultimate event,
307        // going backward.
308        int pos = newLogs.size() - 2;
309        while (pos >= 0 && mLastEventNanos - newLogs.get(pos).getTimeNanos() < OVERLAP_NS) {
310            pos--;
311        }
312        // We either run past the start of the list or encountered an event that is too old to keep.
313        pos++;
314        mLastEvents.addAll(newLogs.subList(pos, newLogs.size()));
315        if (DEBUG) Slog.d(TAG, mLastEvents.size() + " events saved for overlap check");
316    }
317
318    /**
319     * Merges a new batch into already fetched logs and deals with overlapping and out of order
320     * events.
321     */
322    @GuardedBy("mLock")
323    private void mergeBatchLocked(final ArrayList<SecurityEvent> newLogs) {
324        // Reserve capacity so that copying doesn't occur.
325        mPendingLogs.ensureCapacity(mPendingLogs.size() + newLogs.size());
326        // Run through the first events of the batch to check if there is an overlap with previous
327        // batch and if so, skip overlapping events. Events are sorted by timestamp, so we can
328        // compare it in linear time by advancing two pointers, one for each batch.
329        int curPos = 0;
330        int lastPos = 0;
331        // For the first batch mLastEvents will be empty, so no iterations will happen.
332        while (lastPos < mLastEvents.size() && curPos < newLogs.size()) {
333            final SecurityEvent curEvent = newLogs.get(curPos);
334            final long currentNanos = curEvent.getTimeNanos();
335            if (currentNanos > mLastEventNanos) {
336                // We got past the last event of the last batch, no overlap possible anymore.
337                break;
338            }
339            final SecurityEvent lastEvent = mLastEvents.get(lastPos);
340            final long lastNanos = lastEvent.getTimeNanos();
341            if (lastNanos > currentNanos) {
342                // New event older than the last we've seen so far, must be due to reordering.
343                if (DEBUG) Slog.d(TAG, "New event in the overlap: " + currentNanos);
344                assignLogId(curEvent);
345                mPendingLogs.add(curEvent);
346                curPos++;
347            } else if (lastNanos < currentNanos) {
348                if (DEBUG) Slog.d(TAG, "Event disappeared from the overlap: " + lastNanos);
349                lastPos++;
350            } else {
351                // Two events have the same timestamp, check if they are the same.
352                if (lastEvent.equals(curEvent)) {
353                    // Actual overlap, just skip the event.
354                    if (DEBUG) Slog.d(TAG, "Skipped dup event with timestamp: " + lastNanos);
355                } else {
356                    // Wow, what a coincidence, or probably the clock is too coarse.
357                    assignLogId(curEvent);
358                    mPendingLogs.add(curEvent);
359                    if (DEBUG) Slog.d(TAG, "Event timestamp collision: " + lastNanos);
360                }
361                lastPos++;
362                curPos++;
363            }
364        }
365        // Assign an id to the new logs, after the overlap with mLastEvents.
366        List<SecurityEvent> idLogs = newLogs.subList(curPos, newLogs.size());
367        for (SecurityEvent event : idLogs) {
368            assignLogId(event);
369        }
370        // Save the rest of the new batch.
371        mPendingLogs.addAll(idLogs);
372
373        checkCriticalLevel();
374
375        if (mPendingLogs.size() > BUFFER_ENTRIES_MAXIMUM_LEVEL) {
376            // Truncate buffer down to half of BUFFER_ENTRIES_MAXIMUM_LEVEL.
377            mPendingLogs = new ArrayList<>(mPendingLogs.subList(
378                    mPendingLogs.size() - (BUFFER_ENTRIES_MAXIMUM_LEVEL / 2),
379                    mPendingLogs.size()));
380            mCriticalLevelLogged = false;
381            Slog.i(TAG, "Pending logs buffer full. Discarding old logs.");
382        }
383        if (DEBUG) Slog.d(TAG, mPendingLogs.size() + " pending events in the buffer after merging,"
384                + " with ids " + mPendingLogs.get(0).getId()
385                + " to " + mPendingLogs.get(mPendingLogs.size() - 1).getId());
386    }
387
388    @GuardedBy("mLock")
389    private void checkCriticalLevel() {
390        if (!SecurityLog.isLoggingEnabled()) {
391            return;
392        }
393
394        if (mPendingLogs.size() >= BUFFER_ENTRIES_CRITICAL_LEVEL) {
395            if (!mCriticalLevelLogged) {
396                mCriticalLevelLogged = true;
397                SecurityLog.writeEvent(SecurityLog.TAG_LOG_BUFFER_SIZE_CRITICAL);
398            }
399        }
400    }
401
402    @GuardedBy("mLock")
403    private void assignLogId(SecurityEvent event) {
404        event.setId(mId);
405        if (mId == Long.MAX_VALUE) {
406            Slog.i(TAG, "Reached maximum id value; wrapping around.");
407            mId = 0;
408        } else {
409            mId++;
410        }
411    }
412
413    @Override
414    public void run() {
415        Process.setThreadPriority(Process.THREAD_PRIORITY_BACKGROUND);
416
417        final ArrayList<SecurityEvent> newLogs = new ArrayList<>();
418        while (!Thread.currentThread().isInterrupted()) {
419            try {
420                final boolean force = mForceSemaphore.tryAcquire(POLLING_INTERVAL_MS, MILLISECONDS);
421
422                getNextBatch(newLogs);
423
424                mLock.lockInterruptibly();
425                try {
426                    mergeBatchLocked(newLogs);
427                } finally {
428                    mLock.unlock();
429                }
430
431                saveLastEvents(newLogs);
432                newLogs.clear();
433                notifyDeviceOwnerIfNeeded(force);
434            } catch (IOException e) {
435                Log.e(TAG, "Failed to read security log", e);
436            } catch (InterruptedException e) {
437                Log.i(TAG, "Thread interrupted, exiting.", e);
438                // We are asked to stop.
439                break;
440            }
441        }
442
443        // Discard previous batch info.
444        mLastEvents.clear();
445        if (mLastEventNanos != -1) {
446            // Make sure we don't read old events if logging is re-enabled. Since mLastEvents is
447            // empty, the next request will be done without overlap, so it is enough to add 1 ns.
448            mLastEventNanos += 1;
449        }
450
451        Slog.i(TAG, "MonitorThread exit.");
452    }
453
454    private void notifyDeviceOwnerIfNeeded(boolean force) throws InterruptedException {
455        boolean allowRetrievalAndNotifyDO = false;
456        mLock.lockInterruptibly();
457        try {
458            if (mPaused) {
459                return;
460            }
461            final int logSize = mPendingLogs.size();
462            if (logSize >= BUFFER_ENTRIES_NOTIFICATION_LEVEL || (force && logSize > 0)) {
463                // Allow DO to retrieve logs if too many pending logs or if forced.
464                if (!mAllowedToRetrieve) {
465                    allowRetrievalAndNotifyDO = true;
466                }
467                if (DEBUG) Slog.d(TAG, "Number of log entries over threshold: " + logSize);
468            }
469            if (logSize > 0 && SystemClock.elapsedRealtime() >= mNextAllowedRetrievalTimeMillis) {
470                // Rate limit reset
471                allowRetrievalAndNotifyDO = true;
472                if (DEBUG) Slog.d(TAG, "Timeout reached");
473            }
474            if (allowRetrievalAndNotifyDO) {
475                mAllowedToRetrieve = true;
476                // Set the timeout to retry the notification if the DO misses it.
477                mNextAllowedRetrievalTimeMillis = SystemClock.elapsedRealtime()
478                        + BROADCAST_RETRY_INTERVAL_MS;
479            }
480        } finally {
481            mLock.unlock();
482        }
483        if (allowRetrievalAndNotifyDO) {
484            Slog.i(TAG, "notify DO");
485            mService.sendDeviceOwnerCommand(DeviceAdminReceiver.ACTION_SECURITY_LOGS_AVAILABLE,
486                    null);
487        }
488    }
489
490    /**
491     * Forces the logs to be fetched and made available. Returns 0 on success or timeout to wait
492     * before attempting in milliseconds.
493     */
494    public long forceLogs() {
495        final long nowNanos = System.nanoTime();
496        // We only synchronize with another calls to this function, not with the fetching thread.
497        synchronized (mForceSemaphore) {
498            final long toWaitNanos = mLastForceNanos + FORCE_FETCH_THROTTLE_NS - nowNanos;
499            if (toWaitNanos > 0) {
500                return NANOSECONDS.toMillis(toWaitNanos) + 1; // Round up.
501            }
502            mLastForceNanos = nowNanos;
503            // There is a race condition with the fetching thread below, but if the last permit is
504            // acquired just after we do the check, logs are forced anyway and that's what we need.
505            if (mForceSemaphore.availablePermits() == 0) {
506                mForceSemaphore.release();
507            }
508            return 0;
509        }
510    }
511}
512