1/*
2** Copyright 2011, The Android Open-Source Project
3**
4** Licensed under the Apache License, Version 2.0 (the "License");
5** you may not use this file except in compliance with the License.
6** You may obtain a copy of the License at
7**
8**     http://www.apache.org/licenses/LICENSE-2.0
9**
10** Unless required by applicable law or agreed to in writing, software
11** distributed under the License is distributed on an "AS IS" BASIS,
12** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13** See the License for the specific language governing permissions and
14** limitations under the License.
15*/
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "echo_reference"
19
20#include <errno.h>
21#include <inttypes.h>
22#include <pthread.h>
23#include <stdlib.h>
24
25#include <log/log.h>
26#include <system/audio.h>
27#include <audio_utils/resampler.h>
28#include <audio_utils/echo_reference.h>
29
30// echo reference state: bit field indicating if read, write or both are active.
31enum state {
32    ECHOREF_IDLE = 0x00,        // idle
33    ECHOREF_READING = 0x01,     // reading is active
34    ECHOREF_WRITING = 0x02      // writing is active
35};
36
37struct echo_reference {
38    struct echo_reference_itfe itfe;
39    int status;                     // init status
40    uint32_t state;                 // active state: reading, writing or both
41    audio_format_t rd_format;       // read sample format
42    uint32_t rd_channel_count;      // read number of channels
43    uint32_t rd_sampling_rate;      // read sampling rate in Hz
44    size_t rd_frame_size;           // read frame size (bytes per sample)
45    audio_format_t wr_format;       // write sample format
46    uint32_t wr_channel_count;      // write number of channels
47    uint32_t wr_sampling_rate;      // write sampling rate in Hz
48    size_t wr_frame_size;           // write frame size (bytes per sample)
49    void *buffer;                   // main buffer
50    size_t buf_size;                // main buffer size in frames
51    size_t frames_in;               // number of frames in main buffer
52    void *wr_buf;                   // buffer for input conversions
53    size_t wr_buf_size;             // size of conversion buffer in frames
54    size_t wr_frames_in;            // number of frames in conversion buffer
55    size_t wr_curr_frame_size;      // number of frames given to current write() function
56    void *wr_src_buf;               // resampler input buf (either wr_buf or buffer used by write())
57    struct timespec wr_render_time; // latest render time indicated by write()
58                                    // default ALSA gettimeofday() format
59    int32_t  playback_delay;        // playback buffer delay indicated by last write()
60    int16_t prev_delta_sign;        // sign of previous delay difference:
61                                    //  1: positive, -1: negative, 0: unknown
62    uint16_t delta_count;           // number of consecutive delay differences with same sign
63    pthread_mutex_t lock;                      // mutex protecting read/write concurrency
64    pthread_cond_t cond;                       // condition signaled when data is ready to read
65    struct resampler_itfe *resampler;          // input resampler
66    struct resampler_buffer_provider provider; // resampler buffer provider
67};
68
69
70int echo_reference_get_next_buffer(struct resampler_buffer_provider *buffer_provider,
71                                   struct resampler_buffer* buffer)
72{
73    struct echo_reference *er;
74
75    if (buffer_provider == NULL) {
76        return -EINVAL;
77    }
78
79    er = (struct echo_reference *)((char *)buffer_provider -
80                                      offsetof(struct echo_reference, provider));
81
82    if (er->wr_src_buf == NULL || er->wr_frames_in == 0) {
83        buffer->raw = NULL;
84        buffer->frame_count = 0;
85        return -ENODATA;
86    }
87
88    buffer->frame_count = (buffer->frame_count > er->wr_frames_in) ?
89            er->wr_frames_in : buffer->frame_count;
90    // this is er->rd_channel_count here as we resample after stereo to mono conversion if any
91    buffer->i16 = (int16_t *)er->wr_src_buf + (er->wr_curr_frame_size - er->wr_frames_in) *
92            er->rd_channel_count;
93
94    return 0;
95}
96
97void echo_reference_release_buffer(struct resampler_buffer_provider *buffer_provider,
98                                  struct resampler_buffer* buffer)
99{
100    struct echo_reference *er;
101
102    if (buffer_provider == NULL) {
103        return;
104    }
105
106    er = (struct echo_reference *)((char *)buffer_provider -
107                                      offsetof(struct echo_reference, provider));
108
109    er->wr_frames_in -= buffer->frame_count;
110}
111
112static void echo_reference_reset_l(struct echo_reference *er)
113{
114    ALOGV("echo_reference_reset_l()");
115    free(er->buffer);
116    er->buffer = NULL;
117    er->buf_size = 0;
118    er->frames_in = 0;
119    free(er->wr_buf);
120    er->wr_buf = NULL;
121    er->wr_buf_size = 0;
122    er->wr_render_time.tv_sec = 0;
123    er->wr_render_time.tv_nsec = 0;
124    er->delta_count = 0;
125    er->prev_delta_sign = 0;
126}
127
128/* additional space in resampler buffer allowing for extra samples to be returned
129 * by speex resampler when sample rates ratio is not an integer.
130 */
131#define RESAMPLER_HEADROOM_SAMPLES   10
132
133static int echo_reference_write(struct echo_reference_itfe *echo_reference,
134                         struct echo_reference_buffer *buffer)
135{
136    struct echo_reference *er = (struct echo_reference *)echo_reference;
137    int status = 0;
138
139    if (er == NULL) {
140        return -EINVAL;
141    }
142
143    pthread_mutex_lock(&er->lock);
144
145    if (buffer == NULL) {
146        ALOGV("echo_reference_write() stop write");
147        er->state &= ~ECHOREF_WRITING;
148        echo_reference_reset_l(er);
149        goto exit;
150    }
151
152    ALOGV("echo_reference_write() START trying to write %zu frames", buffer->frame_count);
153    ALOGV("echo_reference_write() playbackTimestamp:[%d].[%d], er->playback_delay:[%" PRId32 "]",
154            (int)buffer->time_stamp.tv_sec,
155            (int)buffer->time_stamp.tv_nsec, er->playback_delay);
156
157    //ALOGV("echo_reference_write() %d frames", buffer->frame_count);
158    // discard writes until a valid time stamp is provided.
159
160    if ((buffer->time_stamp.tv_sec == 0) && (buffer->time_stamp.tv_nsec == 0) &&
161        (er->wr_render_time.tv_sec == 0) && (er->wr_render_time.tv_nsec == 0)) {
162        goto exit;
163    }
164
165    if ((er->state & ECHOREF_WRITING) == 0) {
166        ALOGV("echo_reference_write() start write");
167        if (er->resampler != NULL) {
168            er->resampler->reset(er->resampler);
169        }
170        er->state |= ECHOREF_WRITING;
171    }
172
173    if ((er->state & ECHOREF_READING) == 0) {
174        goto exit;
175    }
176
177    er->wr_render_time.tv_sec  = buffer->time_stamp.tv_sec;
178    er->wr_render_time.tv_nsec = buffer->time_stamp.tv_nsec;
179
180    er->playback_delay = buffer->delay_ns;
181
182    // this will be used in the get_next_buffer, to support variable input buffer sizes
183    er->wr_curr_frame_size = buffer->frame_count;
184
185    void *srcBuf;
186    size_t inFrames;
187    // do stereo to mono and down sampling if necessary
188    if (er->rd_channel_count != er->wr_channel_count ||
189            er->rd_sampling_rate != er->wr_sampling_rate) {
190        size_t wrBufSize = buffer->frame_count;
191
192        inFrames = buffer->frame_count;
193
194        if (er->rd_sampling_rate != er->wr_sampling_rate) {
195            inFrames = (buffer->frame_count * er->rd_sampling_rate) / er->wr_sampling_rate +
196                                                    RESAMPLER_HEADROOM_SAMPLES;
197            // wr_buf is not only used as resampler output but also for stereo to mono conversion
198            // output so buffer size is driven by both write and read sample rates
199            if (inFrames > wrBufSize) {
200                wrBufSize = inFrames;
201            }
202        }
203
204        if (er->wr_buf_size < wrBufSize) {
205            ALOGV("echo_reference_write() increasing write buffer size from %zu to %zu",
206                    er->wr_buf_size, wrBufSize);
207            er->wr_buf_size = wrBufSize;
208            er->wr_buf = realloc(er->wr_buf, er->wr_buf_size * er->rd_frame_size);
209        }
210
211        if (er->rd_channel_count != er->wr_channel_count) {
212            // must be stereo to mono
213            int16_t *src16 = (int16_t *)buffer->raw;
214            int16_t *dst16 = (int16_t *)er->wr_buf;
215            size_t frames = buffer->frame_count;
216            while (frames--) {
217                *dst16++ = (int16_t)(((int32_t)*src16 + (int32_t)*(src16 + 1)) >> 1);
218                src16 += 2;
219            }
220        }
221        if (er->wr_sampling_rate != er->rd_sampling_rate) {
222            if (er->resampler == NULL) {
223                int rc;
224                ALOGV("echo_reference_write() new ReSampler(%d, %d)",
225                      er->wr_sampling_rate, er->rd_sampling_rate);
226                er->provider.get_next_buffer = echo_reference_get_next_buffer;
227                er->provider.release_buffer = echo_reference_release_buffer;
228                rc = create_resampler(er->wr_sampling_rate,
229                                 er->rd_sampling_rate,
230                                 er->rd_channel_count,
231                                 RESAMPLER_QUALITY_DEFAULT,
232                                 &er->provider,
233                                 &er->resampler);
234                if (rc != 0) {
235                    er->resampler = NULL;
236                    ALOGV("echo_reference_write() failure to create resampler %d", rc);
237                    status = -ENODEV;
238                    goto exit;
239                }
240            }
241            // er->wr_src_buf and er->wr_frames_in are used by getNexBuffer() called by the
242            // resampler to get new frames
243            if (er->rd_channel_count != er->wr_channel_count) {
244                er->wr_src_buf = er->wr_buf;
245            } else {
246                er->wr_src_buf = buffer->raw;
247            }
248            er->wr_frames_in = buffer->frame_count;
249            // inFrames is always more than we need here to get frames remaining from previous runs
250            // inFrames is updated by resample() with the number of frames produced
251            ALOGV("echo_reference_write() ReSampling(%d, %d)",
252                  er->wr_sampling_rate, er->rd_sampling_rate);
253            er->resampler->resample_from_provider(er->resampler,
254                                                     (int16_t *)er->wr_buf, &inFrames);
255            ALOGV_IF(er->wr_frames_in != 0,
256                    "echo_reference_write() er->wr_frames_in not 0 (%d) after resampler",
257                    er->wr_frames_in);
258        }
259        srcBuf = er->wr_buf;
260    } else {
261        inFrames = buffer->frame_count;
262        srcBuf = buffer->raw;
263    }
264
265    if (er->frames_in + inFrames > er->buf_size) {
266        ALOGV("echo_reference_write() increasing buffer size from %zu to %zu",
267                er->buf_size, er->frames_in + inFrames);
268                er->buf_size = er->frames_in + inFrames;
269                er->buffer = realloc(er->buffer, er->buf_size * er->rd_frame_size);
270    }
271    memcpy((char *)er->buffer + er->frames_in * er->rd_frame_size,
272           srcBuf,
273           inFrames * er->rd_frame_size);
274    er->frames_in += inFrames;
275
276    ALOGV("echo_reference_write() frames written:[%zu], frames total:[%zu] buffer size:[%zu]\n"
277          "                       er->wr_render_time:[%d].[%d], er->playback_delay:[%" PRId32 "]",
278          inFrames, er->frames_in, er->buf_size,
279          (int)er->wr_render_time.tv_sec, (int)er->wr_render_time.tv_nsec, er->playback_delay);
280
281    pthread_cond_signal(&er->cond);
282exit:
283    pthread_mutex_unlock(&er->lock);
284    ALOGV("echo_reference_write() END");
285    return status;
286}
287
288// delay jump threshold to update ref buffer: 6 samples at 8kHz in nsecs
289#define MIN_DELAY_DELTA_NS (375000*2)
290// number of consecutive delta with same sign between expected and actual delay before adjusting
291// the buffer
292#define MIN_DELTA_NUM 4
293
294
295static int echo_reference_read(struct echo_reference_itfe *echo_reference,
296                         struct echo_reference_buffer *buffer)
297{
298    struct echo_reference *er = (struct echo_reference *)echo_reference;
299
300    if (er == NULL) {
301        return -EINVAL;
302    }
303
304    pthread_mutex_lock(&er->lock);
305
306    if (buffer == NULL) {
307        ALOGV("echo_reference_read() stop read");
308        er->state &= ~ECHOREF_READING;
309        goto exit;
310    }
311
312    ALOGV("echo_reference_read() START, delayCapture:[%" PRId32 "], "
313            "er->frames_in:[%zu],buffer->frame_count:[%zu]",
314    buffer->delay_ns, er->frames_in, buffer->frame_count);
315
316    if ((er->state & ECHOREF_READING) == 0) {
317        ALOGV("echo_reference_read() start read");
318        echo_reference_reset_l(er);
319        er->state |= ECHOREF_READING;
320    }
321
322    if ((er->state & ECHOREF_WRITING) == 0) {
323        memset(buffer->raw, 0, er->rd_frame_size * buffer->frame_count);
324        buffer->delay_ns = 0;
325        goto exit;
326    }
327
328//    ALOGV("echo_reference_read() %d frames", buffer->frame_count);
329
330    // allow some time for new frames to arrive if not enough frames are ready for read
331    if (er->frames_in < buffer->frame_count) {
332        uint32_t timeoutMs = (uint32_t)((1000 * buffer->frame_count) / er->rd_sampling_rate / 2);
333        struct timespec ts = {0, 0};
334
335#ifndef HAVE_PTHREAD_COND_TIMEDWAIT_RELATIVE
336        clock_gettime(CLOCK_REALTIME, &ts);
337#endif
338
339        ts.tv_sec  += timeoutMs/1000;
340        ts.tv_nsec += (timeoutMs%1000) * 1000000;
341        if (ts.tv_nsec >= 1000000000) {
342            ts.tv_nsec -= 1000000000;
343            ts.tv_sec  += 1;
344        }
345
346#ifdef HAVE_PTHREAD_COND_TIMEDWAIT_RELATIVE
347        pthread_cond_timedwait_relative_np(&er->cond, &er->lock, &ts);
348#else
349        pthread_cond_timedwait(&er->cond, &er->lock, &ts);
350#endif
351
352        ALOGV_IF((er->frames_in < buffer->frame_count),
353                 "echo_reference_read() waited %d ms but still not enough frames"\
354                 " er->frames_in: %d, buffer->frame_count = %d",
355                 timeoutMs, er->frames_in, buffer->frame_count);
356    }
357
358    int64_t timeDiff;
359    struct timespec tmp;
360
361    if ((er->wr_render_time.tv_sec == 0 && er->wr_render_time.tv_nsec == 0) ||
362        (buffer->time_stamp.tv_sec == 0 && buffer->time_stamp.tv_nsec == 0)) {
363        ALOGV("echo_reference_read(): NEW:timestamp is zero---------setting timeDiff = 0, "\
364             "not updating delay this time");
365        timeDiff = 0;
366    } else {
367        if (buffer->time_stamp.tv_nsec < er->wr_render_time.tv_nsec) {
368            tmp.tv_sec = buffer->time_stamp.tv_sec - er->wr_render_time.tv_sec - 1;
369            tmp.tv_nsec = 1000000000 + buffer->time_stamp.tv_nsec - er->wr_render_time.tv_nsec;
370        } else {
371            tmp.tv_sec = buffer->time_stamp.tv_sec - er->wr_render_time.tv_sec;
372            tmp.tv_nsec = buffer->time_stamp.tv_nsec - er->wr_render_time.tv_nsec;
373        }
374        timeDiff = (((int64_t)tmp.tv_sec * 1000000000 + tmp.tv_nsec));
375
376        int64_t expectedDelayNs =  er->playback_delay + buffer->delay_ns - timeDiff;
377
378        if (er->resampler != NULL) {
379            // Resampler already compensates part of the delay
380            int32_t rsmp_delay = er->resampler->delay_ns(er->resampler);
381            expectedDelayNs -= rsmp_delay;
382        }
383
384        ALOGV("echo_reference_read(): expectedDelayNs[%" PRId64 "] = "
385                "er->playback_delay[%" PRId32 "] + delayCapture[%" PRId32
386                "] - timeDiff[%" PRId64 "]",
387                expectedDelayNs, er->playback_delay, buffer->delay_ns, timeDiff);
388
389        if (expectedDelayNs > 0) {
390            int64_t delayNs = ((int64_t)er->frames_in * 1000000000) / er->rd_sampling_rate;
391
392            int64_t  deltaNs = delayNs - expectedDelayNs;
393
394            ALOGV("echo_reference_read(): EchoPathDelayDeviation between reference and DMA [%"
395                    PRId64 "]", deltaNs);
396            if (llabs(deltaNs) >= MIN_DELAY_DELTA_NS) {
397                // smooth the variation and update the reference buffer only
398                // if a deviation in the same direction is observed for more than MIN_DELTA_NUM
399                // consecutive reads.
400                int16_t delay_sign = (deltaNs >= 0) ? 1 : -1;
401                if (delay_sign == er->prev_delta_sign) {
402                    er->delta_count++;
403                } else {
404                    er->delta_count = 1;
405                }
406                er->prev_delta_sign = delay_sign;
407
408                if (er->delta_count > MIN_DELTA_NUM) {
409                    size_t previousFrameIn = er->frames_in;
410                    er->frames_in = (size_t)((expectedDelayNs * er->rd_sampling_rate)/1000000000);
411                    int offset = er->frames_in - previousFrameIn;
412
413                    ALOGV("echo_reference_read(): deltaNs ENOUGH and %s: "
414                            "er->frames_in: %zu, previousFrameIn = %zu",
415                         delay_sign ? "positive" : "negative", er->frames_in, previousFrameIn);
416
417                    if (deltaNs < 0) {
418                        // Less data available in the reference buffer than expected
419                        if (er->frames_in > er->buf_size) {
420                            er->buf_size = er->frames_in;
421                            er->buffer  = realloc(er->buffer, er->buf_size * er->rd_frame_size);
422                            ALOGV("echo_reference_read(): increasing buffer size to %zu",
423                                  er->buf_size);
424                        }
425
426                        if (offset > 0) {
427                            memset((char *)er->buffer + previousFrameIn * er->rd_frame_size,
428                                   0, offset * er->rd_frame_size);
429                            ALOGV("echo_reference_read(): pushing ref buffer by [%d]", offset);
430                        }
431                    } else {
432                        // More data available in the reference buffer than expected
433                        offset = -offset;
434                        if (offset > 0) {
435                            memcpy(er->buffer, (char *)er->buffer + (offset * er->rd_frame_size),
436                                   er->frames_in * er->rd_frame_size);
437                            ALOGV("echo_reference_read(): shifting ref buffer by [%zu]",
438                                  er->frames_in);
439                        }
440                    }
441                }
442            } else {
443                er->delta_count = 0;
444                er->prev_delta_sign = 0;
445                ALOGV("echo_reference_read(): Constant EchoPathDelay - difference "
446                        "between reference and DMA %" PRId64, deltaNs);
447            }
448        } else {
449            ALOGV("echo_reference_read(): NEGATIVE expectedDelayNs[%" PRId64
450                 "] = er->playback_delay[%" PRId32 "] + delayCapture[%" PRId32
451                 "] - timeDiff[%" PRId64 "]",
452                 expectedDelayNs, er->playback_delay, buffer->delay_ns, timeDiff);
453        }
454    }
455
456    if (er->frames_in < buffer->frame_count) {
457        if (buffer->frame_count > er->buf_size) {
458            er->buf_size = buffer->frame_count;
459            er->buffer  = realloc(er->buffer, er->buf_size * er->rd_frame_size);
460            ALOGV("echo_reference_read(): increasing buffer size to %zu", er->buf_size);
461        }
462        // filling up the reference buffer with 0s to match the expected delay.
463        memset((char *)er->buffer + er->frames_in * er->rd_frame_size,
464            0, (buffer->frame_count - er->frames_in) * er->rd_frame_size);
465        er->frames_in = buffer->frame_count;
466    }
467
468    memcpy(buffer->raw,
469           (char *)er->buffer,
470           buffer->frame_count * er->rd_frame_size);
471
472    er->frames_in -= buffer->frame_count;
473    memcpy(er->buffer,
474           (char *)er->buffer + buffer->frame_count * er->rd_frame_size,
475           er->frames_in * er->rd_frame_size);
476
477    // As the reference buffer is now time aligned to the microphone signal there is a zero delay
478    buffer->delay_ns = 0;
479
480    ALOGV("echo_reference_read() END %zu frames, total frames in %zu",
481          buffer->frame_count, er->frames_in);
482
483    pthread_cond_signal(&er->cond);
484
485exit:
486    pthread_mutex_unlock(&er->lock);
487    return 0;
488}
489
490
491int create_echo_reference(audio_format_t rdFormat,
492                            uint32_t rdChannelCount,
493                            uint32_t rdSamplingRate,
494                            audio_format_t wrFormat,
495                            uint32_t wrChannelCount,
496                            uint32_t wrSamplingRate,
497                            struct echo_reference_itfe **echo_reference)
498{
499    struct echo_reference *er;
500
501    ALOGV("create_echo_reference()");
502
503    if (echo_reference == NULL) {
504        return -EINVAL;
505    }
506
507    *echo_reference = NULL;
508
509    if (rdFormat != AUDIO_FORMAT_PCM_16_BIT ||
510            rdFormat != wrFormat) {
511        ALOGW("create_echo_reference bad format rd %d, wr %d", rdFormat, wrFormat);
512        return -EINVAL;
513    }
514    if ((rdChannelCount != 1 && rdChannelCount != 2) ||
515            wrChannelCount != 2) {
516        ALOGW("create_echo_reference bad channel count rd %d, wr %d", rdChannelCount,
517                wrChannelCount);
518        return -EINVAL;
519    }
520
521    er = (struct echo_reference *)calloc(1, sizeof(struct echo_reference));
522
523    er->itfe.read = echo_reference_read;
524    er->itfe.write = echo_reference_write;
525
526    er->state = ECHOREF_IDLE;
527    er->rd_format = rdFormat;
528    er->rd_channel_count = rdChannelCount;
529    er->rd_sampling_rate = rdSamplingRate;
530    er->wr_format = wrFormat;
531    er->wr_channel_count = wrChannelCount;
532    er->wr_sampling_rate = wrSamplingRate;
533    er->rd_frame_size = audio_bytes_per_sample(rdFormat) * rdChannelCount;
534    er->wr_frame_size = audio_bytes_per_sample(wrFormat) * wrChannelCount;
535    *echo_reference = &er->itfe;
536    return 0;
537}
538
539void release_echo_reference(struct echo_reference_itfe *echo_reference) {
540    struct echo_reference *er = (struct echo_reference *)echo_reference;
541
542    if (er == NULL) {
543        return;
544    }
545
546    ALOGV("EchoReference dstor");
547    echo_reference_reset_l(er);
548    if (er->resampler != NULL) {
549        release_resampler(er->resampler);
550    }
551    free(er);
552}
553
554