FastMixer.cpp revision 63492411cf4abff452b47f59ec7ff6f3e9925c84
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17// <IMPORTANT_WARNING>
18// Design rules for threadLoop() are given in the comments at section "Fast mixer thread" of
19// StateQueue.h.  In particular, avoid library and system calls except at well-known points.
20// The design rules are only for threadLoop(), and don't apply to FastMixerDumpState methods.
21// </IMPORTANT_WARNING>
22
23#define LOG_TAG "FastMixer"
24//#define LOG_NDEBUG 0
25
26#define ATRACE_TAG ATRACE_TAG_AUDIO
27
28#include "Configuration.h"
29#include <time.h>
30#include <utils/Log.h>
31#include <utils/Trace.h>
32#include <system/audio.h>
33#ifdef FAST_MIXER_STATISTICS
34#include <cpustats/CentralTendencyStatistics.h>
35#ifdef CPU_FREQUENCY_STATISTICS
36#include <cpustats/ThreadCpuUsage.h>
37#endif
38#endif
39#include "AudioMixer.h"
40#include "FastMixer.h"
41
42#define FAST_HOT_IDLE_NS     1000000L   // 1 ms: time to sleep while hot idling
43#define FAST_DEFAULT_NS    999999999L   // ~1 sec: default time to sleep
44#define MIN_WARMUP_CYCLES          2    // minimum number of loop cycles to wait for warmup
45#define MAX_WARMUP_CYCLES         10    // maximum number of loop cycles to wait for warmup
46
47#define FCC_2                       2   // fixed channel count assumption
48
49namespace android {
50
51// Fast mixer thread
52bool FastMixer::threadLoop()
53{
54    static const FastMixerState initial;
55    const FastMixerState *previous = &initial, *current = &initial;
56    FastMixerState preIdle; // copy of state before we went into idle
57    struct timespec oldTs = {0, 0};
58    bool oldTsValid = false;
59    long slopNs = 0;    // accumulated time we've woken up too early (> 0) or too late (< 0)
60    long sleepNs = -1;  // -1: busy wait, 0: sched_yield, > 0: nanosleep
61    int fastTrackNames[FastMixerState::kMaxFastTracks]; // handles used by mixer to identify tracks
62    int generations[FastMixerState::kMaxFastTracks];    // last observed mFastTracks[i].mGeneration
63    unsigned i;
64    for (i = 0; i < FastMixerState::kMaxFastTracks; ++i) {
65        fastTrackNames[i] = -1;
66        generations[i] = 0;
67    }
68    NBAIO_Sink *outputSink = NULL;
69    int outputSinkGen = 0;
70    AudioMixer* mixer = NULL;
71    short *mixBuffer = NULL;
72    enum {UNDEFINED, MIXED, ZEROED} mixBufferState = UNDEFINED;
73    NBAIO_Format format = Format_Invalid;
74    unsigned sampleRate = 0;
75    int fastTracksGen = 0;
76    long periodNs = 0;      // expected period; the time required to render one mix buffer
77    long underrunNs = 0;    // underrun likely when write cycle is greater than this value
78    long overrunNs = 0;     // overrun likely when write cycle is less than this value
79    long forceNs = 0;       // if overrun detected, force the write cycle to take this much time
80    long warmupNs = 0;      // warmup complete when write cycle is greater than to this value
81    FastMixerDumpState dummyDumpState, *dumpState = &dummyDumpState;
82    bool ignoreNextOverrun = true;  // used to ignore initial overrun and first after an underrun
83#ifdef FAST_MIXER_STATISTICS
84    struct timespec oldLoad = {0, 0};    // previous value of clock_gettime(CLOCK_THREAD_CPUTIME_ID)
85    bool oldLoadValid = false;  // whether oldLoad is valid
86    uint32_t bounds = 0;
87    bool full = false;      // whether we have collected at least mSamplingN samples
88#ifdef CPU_FREQUENCY_STATISTICS
89    ThreadCpuUsage tcu;     // for reading the current CPU clock frequency in kHz
90#endif
91#endif
92    unsigned coldGen = 0;   // last observed mColdGen
93    bool isWarm = false;    // true means ready to mix, false means wait for warmup before mixing
94    struct timespec measuredWarmupTs = {0, 0};  // how long did it take for warmup to complete
95    uint32_t warmupCycles = 0;  // counter of number of loop cycles required to warmup
96    NBAIO_Sink* teeSink = NULL; // if non-NULL, then duplicate write() to this non-blocking sink
97    NBLog::Writer dummyLogWriter, *logWriter = &dummyLogWriter;
98    uint32_t totalNativeFramesWritten = 0;  // copied to dumpState->mFramesWritten
99
100    // next 2 fields are valid only when timestampStatus == NO_ERROR
101    AudioTimestamp timestamp;
102    uint32_t nativeFramesWrittenButNotPresented = 0;    // the = 0 is to silence the compiler
103    status_t timestampStatus = INVALID_OPERATION;
104
105    for (;;) {
106
107        // either nanosleep, sched_yield, or busy wait
108        if (sleepNs >= 0) {
109            if (sleepNs > 0) {
110                ALOG_ASSERT(sleepNs < 1000000000);
111                const struct timespec req = {0, sleepNs};
112                nanosleep(&req, NULL);
113            } else {
114                sched_yield();
115            }
116        }
117        // default to long sleep for next cycle
118        sleepNs = FAST_DEFAULT_NS;
119
120        // poll for state change
121        const FastMixerState *next = mSQ.poll();
122        if (next == NULL) {
123            // continue to use the default initial state until a real state is available
124            ALOG_ASSERT(current == &initial && previous == &initial);
125            next = current;
126        }
127
128        FastMixerState::Command command = next->mCommand;
129        if (next != current) {
130
131            // As soon as possible of learning of a new dump area, start using it
132            dumpState = next->mDumpState != NULL ? next->mDumpState : &dummyDumpState;
133            teeSink = next->mTeeSink;
134            logWriter = next->mNBLogWriter != NULL ? next->mNBLogWriter : &dummyLogWriter;
135            if (mixer != NULL) {
136                mixer->setLog(logWriter);
137            }
138
139            // We want to always have a valid reference to the previous (non-idle) state.
140            // However, the state queue only guarantees access to current and previous states.
141            // So when there is a transition from a non-idle state into an idle state, we make a
142            // copy of the last known non-idle state so it is still available on return from idle.
143            // The possible transitions are:
144            //  non-idle -> non-idle    update previous from current in-place
145            //  non-idle -> idle        update previous from copy of current
146            //  idle     -> idle        don't update previous
147            //  idle     -> non-idle    don't update previous
148            if (!(current->mCommand & FastMixerState::IDLE)) {
149                if (command & FastMixerState::IDLE) {
150                    preIdle = *current;
151                    current = &preIdle;
152                    oldTsValid = false;
153#ifdef FAST_MIXER_STATISTICS
154                    oldLoadValid = false;
155#endif
156                    ignoreNextOverrun = true;
157                }
158                previous = current;
159            }
160            current = next;
161        }
162#if !LOG_NDEBUG
163        next = NULL;    // not referenced again
164#endif
165
166        dumpState->mCommand = command;
167
168        switch (command) {
169        case FastMixerState::INITIAL:
170        case FastMixerState::HOT_IDLE:
171            sleepNs = FAST_HOT_IDLE_NS;
172            continue;
173        case FastMixerState::COLD_IDLE:
174            // only perform a cold idle command once
175            // FIXME consider checking previous state and only perform if previous != COLD_IDLE
176            if (current->mColdGen != coldGen) {
177                int32_t *coldFutexAddr = current->mColdFutexAddr;
178                ALOG_ASSERT(coldFutexAddr != NULL);
179                int32_t old = android_atomic_dec(coldFutexAddr);
180                if (old <= 0) {
181                    (void) syscall(__NR_futex, coldFutexAddr, FUTEX_WAIT_PRIVATE, old - 1, NULL);
182                }
183                int policy = sched_getscheduler(0);
184                if (!(policy == SCHED_FIFO || policy == SCHED_RR)) {
185                    ALOGE("did not receive expected priority boost");
186                }
187                // This may be overly conservative; there could be times that the normal mixer
188                // requests such a brief cold idle that it doesn't require resetting this flag.
189                isWarm = false;
190                measuredWarmupTs.tv_sec = 0;
191                measuredWarmupTs.tv_nsec = 0;
192                warmupCycles = 0;
193                sleepNs = -1;
194                coldGen = current->mColdGen;
195#ifdef FAST_MIXER_STATISTICS
196                bounds = 0;
197                full = false;
198#endif
199                oldTsValid = !clock_gettime(CLOCK_MONOTONIC, &oldTs);
200                timestampStatus = INVALID_OPERATION;
201            } else {
202                sleepNs = FAST_HOT_IDLE_NS;
203            }
204            continue;
205        case FastMixerState::EXIT:
206            delete mixer;
207            delete[] mixBuffer;
208            return false;
209        case FastMixerState::MIX:
210        case FastMixerState::WRITE:
211        case FastMixerState::MIX_WRITE:
212            break;
213        default:
214            LOG_FATAL("bad command %d", command);
215        }
216
217        // there is a non-idle state available to us; did the state change?
218        size_t frameCount = current->mFrameCount;
219        if (current != previous) {
220
221            // handle state change here, but since we want to diff the state,
222            // we're prepared for previous == &initial the first time through
223            unsigned previousTrackMask;
224
225            // check for change in output HAL configuration
226            NBAIO_Format previousFormat = format;
227            if (current->mOutputSinkGen != outputSinkGen) {
228                outputSink = current->mOutputSink;
229                outputSinkGen = current->mOutputSinkGen;
230                if (outputSink == NULL) {
231                    format = Format_Invalid;
232                    sampleRate = 0;
233                } else {
234                    format = outputSink->format();
235                    sampleRate = Format_sampleRate(format);
236                    ALOG_ASSERT(Format_channelCount(format) == FCC_2);
237                }
238            }
239
240            if ((format != previousFormat) || (frameCount != previous->mFrameCount)) {
241                // FIXME to avoid priority inversion, don't delete here
242                delete mixer;
243                mixer = NULL;
244                delete[] mixBuffer;
245                mixBuffer = NULL;
246                if (frameCount > 0 && sampleRate > 0) {
247                    // FIXME new may block for unbounded time at internal mutex of the heap
248                    //       implementation; it would be better to have normal mixer allocate for us
249                    //       to avoid blocking here and to prevent possible priority inversion
250                    mixer = new AudioMixer(frameCount, sampleRate, FastMixerState::kMaxFastTracks);
251                    mixBuffer = new short[frameCount * FCC_2];
252                    periodNs = (frameCount * 1000000000LL) / sampleRate;    // 1.00
253                    underrunNs = (frameCount * 1750000000LL) / sampleRate;  // 1.75
254                    overrunNs = (frameCount * 500000000LL) / sampleRate;    // 0.50
255                    forceNs = (frameCount * 950000000LL) / sampleRate;      // 0.95
256                    warmupNs = (frameCount * 500000000LL) / sampleRate;     // 0.50
257                } else {
258                    periodNs = 0;
259                    underrunNs = 0;
260                    overrunNs = 0;
261                    forceNs = 0;
262                    warmupNs = 0;
263                }
264                mixBufferState = UNDEFINED;
265#if !LOG_NDEBUG
266                for (i = 0; i < FastMixerState::kMaxFastTracks; ++i) {
267                    fastTrackNames[i] = -1;
268                }
269#endif
270                // we need to reconfigure all active tracks
271                previousTrackMask = 0;
272                fastTracksGen = current->mFastTracksGen - 1;
273                dumpState->mFrameCount = frameCount;
274            } else {
275                previousTrackMask = previous->mTrackMask;
276            }
277
278            // check for change in active track set
279            unsigned currentTrackMask = current->mTrackMask;
280            dumpState->mTrackMask = currentTrackMask;
281            if (current->mFastTracksGen != fastTracksGen) {
282                ALOG_ASSERT(mixBuffer != NULL);
283                int name;
284
285                // process removed tracks first to avoid running out of track names
286                unsigned removedTracks = previousTrackMask & ~currentTrackMask;
287                while (removedTracks != 0) {
288                    i = __builtin_ctz(removedTracks);
289                    removedTracks &= ~(1 << i);
290                    const FastTrack* fastTrack = &current->mFastTracks[i];
291                    ALOG_ASSERT(fastTrack->mBufferProvider == NULL);
292                    if (mixer != NULL) {
293                        name = fastTrackNames[i];
294                        ALOG_ASSERT(name >= 0);
295                        mixer->deleteTrackName(name);
296                    }
297#if !LOG_NDEBUG
298                    fastTrackNames[i] = -1;
299#endif
300                    // don't reset track dump state, since other side is ignoring it
301                    generations[i] = fastTrack->mGeneration;
302                }
303
304                // now process added tracks
305                unsigned addedTracks = currentTrackMask & ~previousTrackMask;
306                while (addedTracks != 0) {
307                    i = __builtin_ctz(addedTracks);
308                    addedTracks &= ~(1 << i);
309                    const FastTrack* fastTrack = &current->mFastTracks[i];
310                    AudioBufferProvider *bufferProvider = fastTrack->mBufferProvider;
311                    ALOG_ASSERT(bufferProvider != NULL && fastTrackNames[i] == -1);
312                    if (mixer != NULL) {
313                        // calling getTrackName with default channel mask and a random invalid
314                        //   sessionId (no effects here)
315                        name = mixer->getTrackName(AUDIO_CHANNEL_OUT_STEREO, -555);
316                        ALOG_ASSERT(name >= 0);
317                        fastTrackNames[i] = name;
318                        mixer->setBufferProvider(name, bufferProvider);
319                        mixer->setParameter(name, AudioMixer::TRACK, AudioMixer::MAIN_BUFFER,
320                                (void *) mixBuffer);
321                        // newly allocated track names default to full scale volume
322                        mixer->setParameter(name, AudioMixer::TRACK, AudioMixer::CHANNEL_MASK,
323                                (void *)(uintptr_t)fastTrack->mChannelMask);
324                        mixer->enable(name);
325                    }
326                    generations[i] = fastTrack->mGeneration;
327                }
328
329                // finally process (potentially) modified tracks; these use the same slot
330                // but may have a different buffer provider or volume provider
331                unsigned modifiedTracks = currentTrackMask & previousTrackMask;
332                while (modifiedTracks != 0) {
333                    i = __builtin_ctz(modifiedTracks);
334                    modifiedTracks &= ~(1 << i);
335                    const FastTrack* fastTrack = &current->mFastTracks[i];
336                    if (fastTrack->mGeneration != generations[i]) {
337                        // this track was actually modified
338                        AudioBufferProvider *bufferProvider = fastTrack->mBufferProvider;
339                        ALOG_ASSERT(bufferProvider != NULL);
340                        if (mixer != NULL) {
341                            name = fastTrackNames[i];
342                            ALOG_ASSERT(name >= 0);
343                            mixer->setBufferProvider(name, bufferProvider);
344                            if (fastTrack->mVolumeProvider == NULL) {
345                                mixer->setParameter(name, AudioMixer::VOLUME, AudioMixer::VOLUME0,
346                                        (void *)0x1000);
347                                mixer->setParameter(name, AudioMixer::VOLUME, AudioMixer::VOLUME1,
348                                        (void *)0x1000);
349                            }
350                            mixer->setParameter(name, AudioMixer::RESAMPLE,
351                                    AudioMixer::REMOVE, NULL);
352                            mixer->setParameter(name, AudioMixer::TRACK, AudioMixer::CHANNEL_MASK,
353                                    (void *)(uintptr_t) fastTrack->mChannelMask);
354                            // already enabled
355                        }
356                        generations[i] = fastTrack->mGeneration;
357                    }
358                }
359
360                fastTracksGen = current->mFastTracksGen;
361
362                dumpState->mNumTracks = popcount(currentTrackMask);
363            }
364
365#if 1   // FIXME shouldn't need this
366            // only process state change once
367            previous = current;
368#endif
369        }
370
371        // do work using current state here
372        if ((command & FastMixerState::MIX) && (mixer != NULL) && isWarm) {
373            ALOG_ASSERT(mixBuffer != NULL);
374            // for each track, update volume and check for underrun
375            unsigned currentTrackMask = current->mTrackMask;
376            while (currentTrackMask != 0) {
377                i = __builtin_ctz(currentTrackMask);
378                currentTrackMask &= ~(1 << i);
379                const FastTrack* fastTrack = &current->mFastTracks[i];
380
381                // Refresh the per-track timestamp
382                if (timestampStatus == NO_ERROR) {
383                    uint32_t trackFramesWrittenButNotPresented =
384                        nativeFramesWrittenButNotPresented;
385                    uint32_t trackFramesWritten = fastTrack->mBufferProvider->framesReleased();
386                    // Can't provide an AudioTimestamp before first frame presented,
387                    // or during the brief 32-bit wraparound window
388                    if (trackFramesWritten >= trackFramesWrittenButNotPresented) {
389                        AudioTimestamp perTrackTimestamp;
390                        perTrackTimestamp.mPosition =
391                                trackFramesWritten - trackFramesWrittenButNotPresented;
392                        perTrackTimestamp.mTime = timestamp.mTime;
393                        fastTrack->mBufferProvider->onTimestamp(perTrackTimestamp);
394                    }
395                }
396
397                int name = fastTrackNames[i];
398                ALOG_ASSERT(name >= 0);
399                if (fastTrack->mVolumeProvider != NULL) {
400                    uint32_t vlr = fastTrack->mVolumeProvider->getVolumeLR();
401                    mixer->setParameter(name, AudioMixer::VOLUME, AudioMixer::VOLUME0,
402                            (void *)(uintptr_t)(vlr & 0xFFFF));
403                    mixer->setParameter(name, AudioMixer::VOLUME, AudioMixer::VOLUME1,
404                            (void *)(uintptr_t)(vlr >> 16));
405                }
406                // FIXME The current implementation of framesReady() for fast tracks
407                // takes a tryLock, which can block
408                // up to 1 ms.  If enough active tracks all blocked in sequence, this would result
409                // in the overall fast mix cycle being delayed.  Should use a non-blocking FIFO.
410                size_t framesReady = fastTrack->mBufferProvider->framesReady();
411                if (ATRACE_ENABLED()) {
412                    // I wish we had formatted trace names
413                    char traceName[16];
414                    strcpy(traceName, "fRdy");
415                    traceName[4] = i + (i < 10 ? '0' : 'A' - 10);
416                    traceName[5] = '\0';
417                    ATRACE_INT(traceName, framesReady);
418                }
419                FastTrackDump *ftDump = &dumpState->mTracks[i];
420                FastTrackUnderruns underruns = ftDump->mUnderruns;
421                if (framesReady < frameCount) {
422                    if (framesReady == 0) {
423                        underruns.mBitFields.mEmpty++;
424                        underruns.mBitFields.mMostRecent = UNDERRUN_EMPTY;
425                        mixer->disable(name);
426                    } else {
427                        // allow mixing partial buffer
428                        underruns.mBitFields.mPartial++;
429                        underruns.mBitFields.mMostRecent = UNDERRUN_PARTIAL;
430                        mixer->enable(name);
431                    }
432                } else {
433                    underruns.mBitFields.mFull++;
434                    underruns.mBitFields.mMostRecent = UNDERRUN_FULL;
435                    mixer->enable(name);
436                }
437                ftDump->mUnderruns = underruns;
438                ftDump->mFramesReady = framesReady;
439            }
440
441            int64_t pts;
442            if (outputSink == NULL || (OK != outputSink->getNextWriteTimestamp(&pts)))
443                pts = AudioBufferProvider::kInvalidPTS;
444
445            // process() is CPU-bound
446            mixer->process(pts);
447            mixBufferState = MIXED;
448        } else if (mixBufferState == MIXED) {
449            mixBufferState = UNDEFINED;
450        }
451        bool attemptedWrite = false;
452        //bool didFullWrite = false;    // dumpsys could display a count of partial writes
453        if ((command & FastMixerState::WRITE) && (outputSink != NULL) && (mixBuffer != NULL)) {
454            if (mixBufferState == UNDEFINED) {
455                memset(mixBuffer, 0, frameCount * FCC_2 * sizeof(short));
456                mixBufferState = ZEROED;
457            }
458            if (teeSink != NULL) {
459                (void) teeSink->write(mixBuffer, frameCount);
460            }
461            // FIXME write() is non-blocking and lock-free for a properly implemented NBAIO sink,
462            //       but this code should be modified to handle both non-blocking and blocking sinks
463            dumpState->mWriteSequence++;
464            ATRACE_BEGIN("write");
465            ssize_t framesWritten = outputSink->write(mixBuffer, frameCount);
466            ATRACE_END();
467            dumpState->mWriteSequence++;
468            if (framesWritten >= 0) {
469                ALOG_ASSERT((size_t) framesWritten <= frameCount);
470                totalNativeFramesWritten += framesWritten;
471                dumpState->mFramesWritten = totalNativeFramesWritten;
472                //if ((size_t) framesWritten == frameCount) {
473                //    didFullWrite = true;
474                //}
475            } else {
476                dumpState->mWriteErrors++;
477            }
478            attemptedWrite = true;
479            // FIXME count # of writes blocked excessively, CPU usage, etc. for dump
480
481            timestampStatus = outputSink->getTimestamp(timestamp);
482            if (timestampStatus == NO_ERROR) {
483                uint32_t totalNativeFramesPresented = timestamp.mPosition;
484                if (totalNativeFramesPresented <= totalNativeFramesWritten) {
485                    nativeFramesWrittenButNotPresented =
486                        totalNativeFramesWritten - totalNativeFramesPresented;
487                } else {
488                    // HAL reported that more frames were presented than were written
489                    timestampStatus = INVALID_OPERATION;
490                }
491            }
492        }
493
494        // To be exactly periodic, compute the next sleep time based on current time.
495        // This code doesn't have long-term stability when the sink is non-blocking.
496        // FIXME To avoid drift, use the local audio clock or watch the sink's fill status.
497        struct timespec newTs;
498        int rc = clock_gettime(CLOCK_MONOTONIC, &newTs);
499        if (rc == 0) {
500            //logWriter->logTimestamp(newTs);
501            if (oldTsValid) {
502                time_t sec = newTs.tv_sec - oldTs.tv_sec;
503                long nsec = newTs.tv_nsec - oldTs.tv_nsec;
504                ALOGE_IF(sec < 0 || (sec == 0 && nsec < 0),
505                        "clock_gettime(CLOCK_MONOTONIC) failed: was %ld.%09ld but now %ld.%09ld",
506                        oldTs.tv_sec, oldTs.tv_nsec, newTs.tv_sec, newTs.tv_nsec);
507                if (nsec < 0) {
508                    --sec;
509                    nsec += 1000000000;
510                }
511                // To avoid an initial underrun on fast tracks after exiting standby,
512                // do not start pulling data from tracks and mixing until warmup is complete.
513                // Warmup is considered complete after the earlier of:
514                //      MIN_WARMUP_CYCLES write() attempts and last one blocks for at least warmupNs
515                //      MAX_WARMUP_CYCLES write() attempts.
516                // This is overly conservative, but to get better accuracy requires a new HAL API.
517                if (!isWarm && attemptedWrite) {
518                    measuredWarmupTs.tv_sec += sec;
519                    measuredWarmupTs.tv_nsec += nsec;
520                    if (measuredWarmupTs.tv_nsec >= 1000000000) {
521                        measuredWarmupTs.tv_sec++;
522                        measuredWarmupTs.tv_nsec -= 1000000000;
523                    }
524                    ++warmupCycles;
525                    if ((nsec > warmupNs && warmupCycles >= MIN_WARMUP_CYCLES) ||
526                            (warmupCycles >= MAX_WARMUP_CYCLES)) {
527                        isWarm = true;
528                        dumpState->mMeasuredWarmupTs = measuredWarmupTs;
529                        dumpState->mWarmupCycles = warmupCycles;
530                    }
531                }
532                sleepNs = -1;
533                if (isWarm) {
534                    if (sec > 0 || nsec > underrunNs) {
535                        ATRACE_NAME("underrun");
536                        // FIXME only log occasionally
537                        ALOGV("underrun: time since last cycle %d.%03ld sec",
538                                (int) sec, nsec / 1000000L);
539                        dumpState->mUnderruns++;
540                        ignoreNextOverrun = true;
541                    } else if (nsec < overrunNs) {
542                        if (ignoreNextOverrun) {
543                            ignoreNextOverrun = false;
544                        } else {
545                            // FIXME only log occasionally
546                            ALOGV("overrun: time since last cycle %d.%03ld sec",
547                                    (int) sec, nsec / 1000000L);
548                            dumpState->mOverruns++;
549                        }
550                        // This forces a minimum cycle time. It:
551                        //  - compensates for an audio HAL with jitter due to sample rate conversion
552                        //  - works with a variable buffer depth audio HAL that never pulls at a
553                        //    rate < than overrunNs per buffer.
554                        //  - recovers from overrun immediately after underrun
555                        // It doesn't work with a non-blocking audio HAL.
556                        sleepNs = forceNs - nsec;
557                    } else {
558                        ignoreNextOverrun = false;
559                    }
560                }
561#ifdef FAST_MIXER_STATISTICS
562                if (isWarm) {
563                    // advance the FIFO queue bounds
564                    size_t i = bounds & (dumpState->mSamplingN - 1);
565                    bounds = (bounds & 0xFFFF0000) | ((bounds + 1) & 0xFFFF);
566                    if (full) {
567                        bounds += 0x10000;
568                    } else if (!(bounds & (dumpState->mSamplingN - 1))) {
569                        full = true;
570                    }
571                    // compute the delta value of clock_gettime(CLOCK_MONOTONIC)
572                    uint32_t monotonicNs = nsec;
573                    if (sec > 0 && sec < 4) {
574                        monotonicNs += sec * 1000000000;
575                    }
576                    // compute raw CPU load = delta value of clock_gettime(CLOCK_THREAD_CPUTIME_ID)
577                    uint32_t loadNs = 0;
578                    struct timespec newLoad;
579                    rc = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &newLoad);
580                    if (rc == 0) {
581                        if (oldLoadValid) {
582                            sec = newLoad.tv_sec - oldLoad.tv_sec;
583                            nsec = newLoad.tv_nsec - oldLoad.tv_nsec;
584                            if (nsec < 0) {
585                                --sec;
586                                nsec += 1000000000;
587                            }
588                            loadNs = nsec;
589                            if (sec > 0 && sec < 4) {
590                                loadNs += sec * 1000000000;
591                            }
592                        } else {
593                            // first time through the loop
594                            oldLoadValid = true;
595                        }
596                        oldLoad = newLoad;
597                    }
598#ifdef CPU_FREQUENCY_STATISTICS
599                    // get the absolute value of CPU clock frequency in kHz
600                    int cpuNum = sched_getcpu();
601                    uint32_t kHz = tcu.getCpukHz(cpuNum);
602                    kHz = (kHz << 4) | (cpuNum & 0xF);
603#endif
604                    // save values in FIFO queues for dumpsys
605                    // these stores #1, #2, #3 are not atomic with respect to each other,
606                    // or with respect to store #4 below
607                    dumpState->mMonotonicNs[i] = monotonicNs;
608                    dumpState->mLoadNs[i] = loadNs;
609#ifdef CPU_FREQUENCY_STATISTICS
610                    dumpState->mCpukHz[i] = kHz;
611#endif
612                    // this store #4 is not atomic with respect to stores #1, #2, #3 above, but
613                    // the newest open & oldest closed halves are atomic with respect to each other
614                    dumpState->mBounds = bounds;
615                    ATRACE_INT("cycle_ms", monotonicNs / 1000000);
616                    ATRACE_INT("load_us", loadNs / 1000);
617                }
618#endif
619            } else {
620                // first time through the loop
621                oldTsValid = true;
622                sleepNs = periodNs;
623                ignoreNextOverrun = true;
624            }
625            oldTs = newTs;
626        } else {
627            // monotonic clock is broken
628            oldTsValid = false;
629            sleepNs = periodNs;
630        }
631
632
633    }   // for (;;)
634
635    // never return 'true'; Thread::_threadLoop() locks mutex which can result in priority inversion
636}
637
638FastMixerDumpState::FastMixerDumpState(
639#ifdef FAST_MIXER_STATISTICS
640        uint32_t samplingN
641#endif
642        ) :
643    mCommand(FastMixerState::INITIAL), mWriteSequence(0), mFramesWritten(0),
644    mNumTracks(0), mWriteErrors(0), mUnderruns(0), mOverruns(0),
645    mSampleRate(0), mFrameCount(0), /* mMeasuredWarmupTs({0, 0}), */ mWarmupCycles(0),
646    mTrackMask(0)
647#ifdef FAST_MIXER_STATISTICS
648    , mSamplingN(0), mBounds(0)
649#endif
650{
651    mMeasuredWarmupTs.tv_sec = 0;
652    mMeasuredWarmupTs.tv_nsec = 0;
653#ifdef FAST_MIXER_STATISTICS
654    increaseSamplingN(samplingN);
655#endif
656}
657
658#ifdef FAST_MIXER_STATISTICS
659void FastMixerDumpState::increaseSamplingN(uint32_t samplingN)
660{
661    if (samplingN <= mSamplingN || samplingN > kSamplingN || roundup(samplingN) != samplingN) {
662        return;
663    }
664    uint32_t additional = samplingN - mSamplingN;
665    // sample arrays aren't accessed atomically with respect to the bounds,
666    // so clearing reduces chance for dumpsys to read random uninitialized samples
667    memset(&mMonotonicNs[mSamplingN], 0, sizeof(mMonotonicNs[0]) * additional);
668    memset(&mLoadNs[mSamplingN], 0, sizeof(mLoadNs[0]) * additional);
669#ifdef CPU_FREQUENCY_STATISTICS
670    memset(&mCpukHz[mSamplingN], 0, sizeof(mCpukHz[0]) * additional);
671#endif
672    mSamplingN = samplingN;
673}
674#endif
675
676FastMixerDumpState::~FastMixerDumpState()
677{
678}
679
680// helper function called by qsort()
681static int compare_uint32_t(const void *pa, const void *pb)
682{
683    uint32_t a = *(const uint32_t *)pa;
684    uint32_t b = *(const uint32_t *)pb;
685    if (a < b) {
686        return -1;
687    } else if (a > b) {
688        return 1;
689    } else {
690        return 0;
691    }
692}
693
694void FastMixerDumpState::dump(int fd) const
695{
696    if (mCommand == FastMixerState::INITIAL) {
697        dprintf(fd, "FastMixer not initialized\n");
698        return;
699    }
700#define COMMAND_MAX 32
701    char string[COMMAND_MAX];
702    switch (mCommand) {
703    case FastMixerState::INITIAL:
704        strcpy(string, "INITIAL");
705        break;
706    case FastMixerState::HOT_IDLE:
707        strcpy(string, "HOT_IDLE");
708        break;
709    case FastMixerState::COLD_IDLE:
710        strcpy(string, "COLD_IDLE");
711        break;
712    case FastMixerState::EXIT:
713        strcpy(string, "EXIT");
714        break;
715    case FastMixerState::MIX:
716        strcpy(string, "MIX");
717        break;
718    case FastMixerState::WRITE:
719        strcpy(string, "WRITE");
720        break;
721    case FastMixerState::MIX_WRITE:
722        strcpy(string, "MIX_WRITE");
723        break;
724    default:
725        snprintf(string, COMMAND_MAX, "%d", mCommand);
726        break;
727    }
728    double measuredWarmupMs = (mMeasuredWarmupTs.tv_sec * 1000.0) +
729            (mMeasuredWarmupTs.tv_nsec / 1000000.0);
730    double mixPeriodSec = (double) mFrameCount / (double) mSampleRate;
731    dprintf(fd, "FastMixer command=%s writeSequence=%u framesWritten=%u\n"
732                 "          numTracks=%u writeErrors=%u underruns=%u overruns=%u\n"
733                 "          sampleRate=%u frameCount=%zu measuredWarmup=%.3g ms, warmupCycles=%u\n"
734                 "          mixPeriod=%.2f ms\n",
735                 string, mWriteSequence, mFramesWritten,
736                 mNumTracks, mWriteErrors, mUnderruns, mOverruns,
737                 mSampleRate, mFrameCount, measuredWarmupMs, mWarmupCycles,
738                 mixPeriodSec * 1e3);
739#ifdef FAST_MIXER_STATISTICS
740    // find the interval of valid samples
741    uint32_t bounds = mBounds;
742    uint32_t newestOpen = bounds & 0xFFFF;
743    uint32_t oldestClosed = bounds >> 16;
744    uint32_t n = (newestOpen - oldestClosed) & 0xFFFF;
745    if (n > mSamplingN) {
746        ALOGE("too many samples %u", n);
747        n = mSamplingN;
748    }
749    // statistics for monotonic (wall clock) time, thread raw CPU load in time, CPU clock frequency,
750    // and adjusted CPU load in MHz normalized for CPU clock frequency
751    CentralTendencyStatistics wall, loadNs;
752#ifdef CPU_FREQUENCY_STATISTICS
753    CentralTendencyStatistics kHz, loadMHz;
754    uint32_t previousCpukHz = 0;
755#endif
756    // Assuming a normal distribution for cycle times, three standard deviations on either side of
757    // the mean account for 99.73% of the population.  So if we take each tail to be 1/1000 of the
758    // sample set, we get 99.8% combined, or close to three standard deviations.
759    static const uint32_t kTailDenominator = 1000;
760    uint32_t *tail = n >= kTailDenominator ? new uint32_t[n] : NULL;
761    // loop over all the samples
762    for (uint32_t j = 0; j < n; ++j) {
763        size_t i = oldestClosed++ & (mSamplingN - 1);
764        uint32_t wallNs = mMonotonicNs[i];
765        if (tail != NULL) {
766            tail[j] = wallNs;
767        }
768        wall.sample(wallNs);
769        uint32_t sampleLoadNs = mLoadNs[i];
770        loadNs.sample(sampleLoadNs);
771#ifdef CPU_FREQUENCY_STATISTICS
772        uint32_t sampleCpukHz = mCpukHz[i];
773        // skip bad kHz samples
774        if ((sampleCpukHz & ~0xF) != 0) {
775            kHz.sample(sampleCpukHz >> 4);
776            if (sampleCpukHz == previousCpukHz) {
777                double megacycles = (double) sampleLoadNs * (double) (sampleCpukHz >> 4) * 1e-12;
778                double adjMHz = megacycles / mixPeriodSec;  // _not_ wallNs * 1e9
779                loadMHz.sample(adjMHz);
780            }
781        }
782        previousCpukHz = sampleCpukHz;
783#endif
784    }
785    dprintf(fd, "Simple moving statistics over last %.1f seconds:\n", wall.n() * mixPeriodSec);
786    dprintf(fd, "  wall clock time in ms per mix cycle:\n"
787                "    mean=%.2f min=%.2f max=%.2f stddev=%.2f\n",
788                wall.mean()*1e-6, wall.minimum()*1e-6, wall.maximum()*1e-6, wall.stddev()*1e-6);
789    dprintf(fd, "  raw CPU load in us per mix cycle:\n"
790                "    mean=%.0f min=%.0f max=%.0f stddev=%.0f\n",
791                loadNs.mean()*1e-3, loadNs.minimum()*1e-3, loadNs.maximum()*1e-3,
792                loadNs.stddev()*1e-3);
793#ifdef CPU_FREQUENCY_STATISTICS
794    dprintf(fd, "  CPU clock frequency in MHz:\n"
795                "    mean=%.0f min=%.0f max=%.0f stddev=%.0f\n",
796                kHz.mean()*1e-3, kHz.minimum()*1e-3, kHz.maximum()*1e-3, kHz.stddev()*1e-3);
797    dprintf(fd, "  adjusted CPU load in MHz (i.e. normalized for CPU clock frequency):\n"
798                "    mean=%.1f min=%.1f max=%.1f stddev=%.1f\n",
799                loadMHz.mean(), loadMHz.minimum(), loadMHz.maximum(), loadMHz.stddev());
800#endif
801    if (tail != NULL) {
802        qsort(tail, n, sizeof(uint32_t), compare_uint32_t);
803        // assume same number of tail samples on each side, left and right
804        uint32_t count = n / kTailDenominator;
805        CentralTendencyStatistics left, right;
806        for (uint32_t i = 0; i < count; ++i) {
807            left.sample(tail[i]);
808            right.sample(tail[n - (i + 1)]);
809        }
810        dprintf(fd, "Distribution of mix cycle times in ms for the tails (> ~3 stddev outliers):\n"
811                    "  left tail: mean=%.2f min=%.2f max=%.2f stddev=%.2f\n"
812                    "  right tail: mean=%.2f min=%.2f max=%.2f stddev=%.2f\n",
813                    left.mean()*1e-6, left.minimum()*1e-6, left.maximum()*1e-6, left.stddev()*1e-6,
814                    right.mean()*1e-6, right.minimum()*1e-6, right.maximum()*1e-6,
815                    right.stddev()*1e-6);
816        delete[] tail;
817    }
818#endif
819    // The active track mask and track states are updated non-atomically.
820    // So if we relied on isActive to decide whether to display,
821    // then we might display an obsolete track or omit an active track.
822    // Instead we always display all tracks, with an indication
823    // of whether we think the track is active.
824    uint32_t trackMask = mTrackMask;
825    dprintf(fd, "Fast tracks: kMaxFastTracks=%u activeMask=%#x\n",
826            FastMixerState::kMaxFastTracks, trackMask);
827    dprintf(fd, "Index Active Full Partial Empty  Recent Ready\n");
828    for (uint32_t i = 0; i < FastMixerState::kMaxFastTracks; ++i, trackMask >>= 1) {
829        bool isActive = trackMask & 1;
830        const FastTrackDump *ftDump = &mTracks[i];
831        const FastTrackUnderruns& underruns = ftDump->mUnderruns;
832        const char *mostRecent;
833        switch (underruns.mBitFields.mMostRecent) {
834        case UNDERRUN_FULL:
835            mostRecent = "full";
836            break;
837        case UNDERRUN_PARTIAL:
838            mostRecent = "partial";
839            break;
840        case UNDERRUN_EMPTY:
841            mostRecent = "empty";
842            break;
843        default:
844            mostRecent = "?";
845            break;
846        }
847        dprintf(fd, "%5u %6s %4u %7u %5u %7s %5zu\n", i, isActive ? "yes" : "no",
848                (underruns.mBitFields.mFull) & UNDERRUN_MASK,
849                (underruns.mBitFields.mPartial) & UNDERRUN_MASK,
850                (underruns.mBitFields.mEmpty) & UNDERRUN_MASK,
851                mostRecent, ftDump->mFramesReady);
852    }
853}
854
855}   // namespace android
856