AudioGroup.cpp revision 9795c258778208fd96a072b7a91028285aafbc32
1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <stdio.h>
18#include <stdint.h>
19#include <string.h>
20#include <errno.h>
21#include <fcntl.h>
22#include <sys/epoll.h>
23#include <sys/types.h>
24#include <sys/socket.h>
25#include <sys/stat.h>
26#include <sys/time.h>
27#include <time.h>
28#include <arpa/inet.h>
29#include <netinet/in.h>
30
31#define LOG_TAG "AudioGroup"
32#include <cutils/atomic.h>
33#include <utils/Log.h>
34#include <utils/Errors.h>
35#include <utils/RefBase.h>
36#include <utils/threads.h>
37#include <utils/SystemClock.h>
38#include <media/AudioSystem.h>
39#include <media/AudioRecord.h>
40#include <media/AudioTrack.h>
41#include <media/mediarecorder.h>
42
43#include "jni.h"
44#include "JNIHelp.h"
45
46#include "AudioCodec.h"
47#include "EchoSuppressor.h"
48
49extern int parse(JNIEnv *env, jstring jAddress, int port, sockaddr_storage *ss);
50
51namespace {
52
53using namespace android;
54
55int gRandom = -1;
56
57// We use a circular array to implement jitter buffer. The simplest way is doing
58// a modulo operation on the index while accessing the array. However modulo can
59// be expensive on some platforms, such as ARM. Thus we round up the size of the
60// array to the nearest power of 2 and then use bitwise-and instead of modulo.
61// Currently we make it 512ms long and assume packet interval is 40ms or less.
62// The first 80ms is the place where samples get mixed. The rest 432ms is the
63// real jitter buffer. For a stream at 8000Hz it takes 8192 bytes. These numbers
64// are chosen by experiments and each of them can be adjusted as needed.
65
66// Originally a stream does not send packets when it is receive-only or there is
67// nothing to mix. However, this causes some problems with certain firewalls and
68// proxies. A firewall might remove a port mapping when there is no outgoing
69// packet for a preiod of time, and a proxy might wait for incoming packets from
70// both sides before start forwarding. To solve these problems, we send out a
71// silence packet on the stream for every second. It should be good enough to
72// keep the stream alive with relatively low resources.
73
74// Other notes:
75// + We use elapsedRealtime() to get the time. Since we use 32bit variables
76//   instead of 64bit ones, comparison must be done by subtraction.
77// + Sampling rate must be multiple of 1000Hz, and packet length must be in
78//   milliseconds. No floating points.
79// + If we cannot get enough CPU, we drop samples and simulate packet loss.
80// + Resampling is not done yet, so streams in one group must use the same rate.
81//   For the first release only 8000Hz is supported.
82
83#define BUFFER_SIZE     512
84#define HISTORY_SIZE    80
85#define MEASURE_PERIOD  2000
86
87class AudioStream
88{
89public:
90    AudioStream();
91    ~AudioStream();
92    bool set(int mode, int socket, sockaddr_storage *remote,
93        AudioCodec *codec, int sampleRate, int sampleCount,
94        int codecType, int dtmfType);
95
96    void sendDtmf(int event);
97    bool mix(int32_t *output, int head, int tail, int sampleRate);
98    void encode(int tick, AudioStream *chain);
99    void decode(int tick);
100
101    enum {
102        NORMAL = 0,
103        SEND_ONLY = 1,
104        RECEIVE_ONLY = 2,
105        LAST_MODE = 2,
106    };
107
108private:
109    int mMode;
110    int mSocket;
111    sockaddr_storage mRemote;
112    AudioCodec *mCodec;
113    uint32_t mCodecMagic;
114    uint32_t mDtmfMagic;
115    bool mFixRemote;
116
117    int mTick;
118    int mSampleRate;
119    int mSampleCount;
120    int mInterval;
121    int mKeepAlive;
122
123    int16_t *mBuffer;
124    int mBufferMask;
125    int mBufferHead;
126    int mBufferTail;
127    int mLatencyTimer;
128    int mLatencyScore;
129
130    uint16_t mSequence;
131    uint32_t mTimestamp;
132    uint32_t mSsrc;
133
134    int mDtmfEvent;
135    int mDtmfStart;
136
137    AudioStream *mNext;
138
139    friend class AudioGroup;
140};
141
142AudioStream::AudioStream()
143{
144    mSocket = -1;
145    mCodec = NULL;
146    mBuffer = NULL;
147    mNext = NULL;
148}
149
150AudioStream::~AudioStream()
151{
152    close(mSocket);
153    delete mCodec;
154    delete [] mBuffer;
155    LOGD("stream[%d] is dead", mSocket);
156}
157
158bool AudioStream::set(int mode, int socket, sockaddr_storage *remote,
159    AudioCodec *codec, int sampleRate, int sampleCount,
160    int codecType, int dtmfType)
161{
162    if (mode < 0 || mode > LAST_MODE) {
163        return false;
164    }
165    mMode = mode;
166
167    mCodecMagic = (0x8000 | codecType) << 16;
168    mDtmfMagic = (dtmfType == -1) ? 0 : (0x8000 | dtmfType) << 16;
169
170    mTick = elapsedRealtime();
171    mSampleRate = sampleRate / 1000;
172    mSampleCount = sampleCount;
173    mInterval = mSampleCount / mSampleRate;
174
175    // Allocate jitter buffer.
176    for (mBufferMask = 8; mBufferMask < mSampleRate; mBufferMask <<= 1);
177    mBufferMask *= BUFFER_SIZE;
178    mBuffer = new int16_t[mBufferMask];
179    --mBufferMask;
180    mBufferHead = 0;
181    mBufferTail = 0;
182    mLatencyTimer = 0;
183    mLatencyScore = 0;
184
185    // Initialize random bits.
186    read(gRandom, &mSequence, sizeof(mSequence));
187    read(gRandom, &mTimestamp, sizeof(mTimestamp));
188    read(gRandom, &mSsrc, sizeof(mSsrc));
189
190    mDtmfEvent = -1;
191    mDtmfStart = 0;
192
193    // Only take over these things when succeeded.
194    mSocket = socket;
195    if (codec) {
196        mRemote = *remote;
197        mCodec = codec;
198
199        // Here we should never get an private address, but some buggy proxy
200        // servers do give us one. To solve this, we replace the address when
201        // the first time we successfully decode an incoming packet.
202        mFixRemote = false;
203        if (remote->ss_family == AF_INET) {
204            unsigned char *address =
205                (unsigned char *)&((sockaddr_in *)remote)->sin_addr;
206            if (address[0] == 10 ||
207                (address[0] == 172 && (address[1] >> 4) == 1) ||
208                (address[0] == 192 && address[1] == 168)) {
209                mFixRemote = true;
210            }
211        }
212    }
213
214    LOGD("stream[%d] is configured as %s %dkHz %dms mode %d", mSocket,
215        (codec ? codec->name : "RAW"), mSampleRate, mInterval, mMode);
216    return true;
217}
218
219void AudioStream::sendDtmf(int event)
220{
221    if (mDtmfMagic != 0) {
222        mDtmfEvent = event << 24;
223        mDtmfStart = mTimestamp + mSampleCount;
224    }
225}
226
227bool AudioStream::mix(int32_t *output, int head, int tail, int sampleRate)
228{
229    if (mMode == SEND_ONLY) {
230        return false;
231    }
232
233    if (head - mBufferHead < 0) {
234        head = mBufferHead;
235    }
236    if (tail - mBufferTail > 0) {
237        tail = mBufferTail;
238    }
239    if (tail - head <= 0) {
240        return false;
241    }
242
243    head *= mSampleRate;
244    tail *= mSampleRate;
245
246    if (sampleRate == mSampleRate) {
247        for (int i = head; i - tail < 0; ++i) {
248            output[i - head] += mBuffer[i & mBufferMask];
249        }
250    } else {
251        // TODO: implement resampling.
252        return false;
253    }
254    return true;
255}
256
257void AudioStream::encode(int tick, AudioStream *chain)
258{
259    if (tick - mTick >= mInterval) {
260        // We just missed the train. Pretend that packets in between are lost.
261        int skipped = (tick - mTick) / mInterval;
262        mTick += skipped * mInterval;
263        mSequence += skipped;
264        mTimestamp += skipped * mSampleCount;
265        LOGV("stream[%d] skips %d packets", mSocket, skipped);
266    }
267
268    tick = mTick;
269    mTick += mInterval;
270    ++mSequence;
271    mTimestamp += mSampleCount;
272
273    // If there is an ongoing DTMF event, send it now.
274    if (mMode != RECEIVE_ONLY && mDtmfEvent != -1) {
275        int duration = mTimestamp - mDtmfStart;
276        // Make sure duration is reasonable.
277        if (duration >= 0 && duration < mSampleRate * 100) {
278            duration += mSampleCount;
279            int32_t buffer[4] = {
280                htonl(mDtmfMagic | mSequence),
281                htonl(mDtmfStart),
282                mSsrc,
283                htonl(mDtmfEvent | duration),
284            };
285            if (duration >= mSampleRate * 100) {
286                buffer[3] |= htonl(1 << 23);
287                mDtmfEvent = -1;
288            }
289            sendto(mSocket, buffer, sizeof(buffer), MSG_DONTWAIT,
290                (sockaddr *)&mRemote, sizeof(mRemote));
291            return;
292        }
293        mDtmfEvent = -1;
294    }
295
296    int32_t buffer[mSampleCount + 3];
297    int16_t samples[mSampleCount];
298    if (mMode == RECEIVE_ONLY) {
299        if ((mTick ^ mKeepAlive) >> 10 == 0) {
300            return;
301        }
302        mKeepAlive = mTick;
303        memset(samples, 0, sizeof(samples));
304    } else {
305        // Mix all other streams.
306        bool mixed = false;
307        memset(buffer, 0, sizeof(buffer));
308        while (chain) {
309            if (chain != this &&
310                chain->mix(buffer, tick - mInterval, tick, mSampleRate)) {
311                mixed = true;
312            }
313            chain = chain->mNext;
314        }
315
316        if (mixed) {
317            // Saturate into 16 bits.
318            for (int i = 0; i < mSampleCount; ++i) {
319                int32_t sample = buffer[i];
320                if (sample < -32768) {
321                    sample = -32768;
322                }
323                if (sample > 32767) {
324                    sample = 32767;
325                }
326                samples[i] = sample;
327            }
328        } else {
329            if ((mTick ^ mKeepAlive) >> 10 == 0) {
330                return;
331            }
332            mKeepAlive = mTick;
333            memset(samples, 0, sizeof(samples));
334            LOGV("stream[%d] no data", mSocket);
335        }
336    }
337
338    if (!mCodec) {
339        // Special case for device stream.
340        send(mSocket, samples, sizeof(samples), MSG_DONTWAIT);
341        return;
342    }
343
344    // Cook the packet and send it out.
345    buffer[0] = htonl(mCodecMagic | mSequence);
346    buffer[1] = htonl(mTimestamp);
347    buffer[2] = mSsrc;
348    int length = mCodec->encode(&buffer[3], samples);
349    if (length <= 0) {
350        LOGV("stream[%d] encoder error", mSocket);
351        return;
352    }
353    sendto(mSocket, buffer, length + 12, MSG_DONTWAIT, (sockaddr *)&mRemote,
354        sizeof(mRemote));
355}
356
357void AudioStream::decode(int tick)
358{
359    char c;
360    if (mMode == SEND_ONLY) {
361        recv(mSocket, &c, 1, MSG_DONTWAIT);
362        return;
363    }
364
365    // Make sure mBufferHead and mBufferTail are reasonable.
366    if ((unsigned int)(tick + BUFFER_SIZE - mBufferHead) > BUFFER_SIZE * 2) {
367        mBufferHead = tick - HISTORY_SIZE;
368        mBufferTail = mBufferHead;
369    }
370
371    if (tick - mBufferHead > HISTORY_SIZE) {
372        // Throw away outdated samples.
373        mBufferHead = tick - HISTORY_SIZE;
374        if (mBufferTail - mBufferHead < 0) {
375            mBufferTail = mBufferHead;
376        }
377    }
378
379    // Adjust the jitter buffer if the latency keeps larger than two times of the
380    // packet interval in the past two seconds.
381    int score = mBufferTail - tick - mInterval * 2;
382    if (mLatencyScore > score) {
383        mLatencyScore = score;
384    }
385    if (mLatencyScore <= 0) {
386        mLatencyTimer = tick;
387        mLatencyScore = score;
388    } else if (tick - mLatencyTimer >= MEASURE_PERIOD) {
389        LOGV("stream[%d] reduces latency of %dms", mSocket, mLatencyScore);
390        mBufferTail -= mLatencyScore;
391        mLatencyTimer = tick;
392    }
393
394    if (mBufferTail - mBufferHead > BUFFER_SIZE - mInterval) {
395        // Buffer overflow. Drop the packet.
396        LOGV("stream[%d] buffer overflow", mSocket);
397        recv(mSocket, &c, 1, MSG_DONTWAIT);
398        return;
399    }
400
401    // Receive the packet and decode it.
402    int16_t samples[mSampleCount];
403    int length = 0;
404    if (!mCodec) {
405        // Special case for device stream.
406        length = recv(mSocket, samples, sizeof(samples),
407            MSG_TRUNC | MSG_DONTWAIT) >> 1;
408    } else {
409        __attribute__((aligned(4))) uint8_t buffer[2048];
410        sockaddr_storage remote;
411        socklen_t len = sizeof(remote);
412
413        length = recvfrom(mSocket, buffer, sizeof(buffer),
414            MSG_TRUNC | MSG_DONTWAIT, (sockaddr *)&remote, &len);
415
416        // Do we need to check SSRC, sequence, and timestamp? They are not
417        // reliable but at least they can be used to identify duplicates?
418        if (length < 12 || length > (int)sizeof(buffer) ||
419            (ntohl(*(uint32_t *)buffer) & 0xC07F0000) != mCodecMagic) {
420            LOGV("stream[%d] malformed packet", mSocket);
421            return;
422        }
423        int offset = 12 + ((buffer[0] & 0x0F) << 2);
424        if ((buffer[0] & 0x10) != 0) {
425            offset += 4 + (ntohs(*(uint16_t *)&buffer[offset + 2]) << 2);
426        }
427        if ((buffer[0] & 0x20) != 0) {
428            length -= buffer[length - 1];
429        }
430        length -= offset;
431        if (length >= 0) {
432            length = mCodec->decode(samples, &buffer[offset], length);
433        }
434        if (length > 0 && mFixRemote) {
435            mRemote = remote;
436            mFixRemote = false;
437        }
438    }
439    if (length <= 0) {
440        LOGV("stream[%d] decoder error", mSocket);
441        return;
442    }
443
444    if (tick - mBufferTail > 0) {
445        // Buffer underrun. Reset the jitter buffer.
446        LOGV("stream[%d] buffer underrun", mSocket);
447        if (mBufferTail - mBufferHead <= 0) {
448            mBufferHead = tick + mInterval;
449            mBufferTail = mBufferHead;
450        } else {
451            int tail = (tick + mInterval) * mSampleRate;
452            for (int i = mBufferTail * mSampleRate; i - tail < 0; ++i) {
453                mBuffer[i & mBufferMask] = 0;
454            }
455            mBufferTail = tick + mInterval;
456        }
457    }
458
459    // Append to the jitter buffer.
460    int tail = mBufferTail * mSampleRate;
461    for (int i = 0; i < mSampleCount; ++i) {
462        mBuffer[tail & mBufferMask] = samples[i];
463        ++tail;
464    }
465    mBufferTail += mInterval;
466}
467
468//------------------------------------------------------------------------------
469
470class AudioGroup
471{
472public:
473    AudioGroup();
474    ~AudioGroup();
475    bool set(int sampleRate, int sampleCount);
476
477    bool setMode(int mode);
478    bool sendDtmf(int event);
479    bool add(AudioStream *stream);
480    bool remove(int socket);
481
482    enum {
483        ON_HOLD = 0,
484        MUTED = 1,
485        NORMAL = 2,
486        EC_ENABLED = 3,
487        LAST_MODE = 3,
488    };
489
490private:
491    AudioStream *mChain;
492    int mEventQueue;
493    volatile int mDtmfEvent;
494
495    int mMode;
496    int mSampleRate;
497    int mSampleCount;
498    int mDeviceSocket;
499
500    class NetworkThread : public Thread
501    {
502    public:
503        NetworkThread(AudioGroup *group) : Thread(false), mGroup(group) {}
504
505        bool start()
506        {
507            if (run("Network", ANDROID_PRIORITY_AUDIO) != NO_ERROR) {
508                LOGE("cannot start network thread");
509                return false;
510            }
511            return true;
512        }
513
514    private:
515        AudioGroup *mGroup;
516        bool threadLoop();
517    };
518    sp<NetworkThread> mNetworkThread;
519
520    class DeviceThread : public Thread
521    {
522    public:
523        DeviceThread(AudioGroup *group) : Thread(false), mGroup(group) {}
524
525        bool start()
526        {
527            if (run("Device", ANDROID_PRIORITY_AUDIO) != NO_ERROR) {
528                LOGE("cannot start device thread");
529                return false;
530            }
531            return true;
532        }
533
534    private:
535        AudioGroup *mGroup;
536        bool threadLoop();
537    };
538    sp<DeviceThread> mDeviceThread;
539};
540
541AudioGroup::AudioGroup()
542{
543    mMode = ON_HOLD;
544    mChain = NULL;
545    mEventQueue = -1;
546    mDtmfEvent = -1;
547    mDeviceSocket = -1;
548    mNetworkThread = new NetworkThread(this);
549    mDeviceThread = new DeviceThread(this);
550}
551
552AudioGroup::~AudioGroup()
553{
554    mNetworkThread->requestExitAndWait();
555    mDeviceThread->requestExitAndWait();
556    close(mEventQueue);
557    close(mDeviceSocket);
558    while (mChain) {
559        AudioStream *next = mChain->mNext;
560        delete mChain;
561        mChain = next;
562    }
563    LOGD("group[%d] is dead", mDeviceSocket);
564}
565
566bool AudioGroup::set(int sampleRate, int sampleCount)
567{
568    mEventQueue = epoll_create(2);
569    if (mEventQueue == -1) {
570        LOGE("epoll_create: %s", strerror(errno));
571        return false;
572    }
573
574    mSampleRate = sampleRate;
575    mSampleCount = sampleCount;
576
577    // Create device socket.
578    int pair[2];
579    if (socketpair(AF_UNIX, SOCK_DGRAM, 0, pair)) {
580        LOGE("socketpair: %s", strerror(errno));
581        return false;
582    }
583    mDeviceSocket = pair[0];
584
585    // Create device stream.
586    mChain = new AudioStream;
587    if (!mChain->set(AudioStream::NORMAL, pair[1], NULL, NULL,
588        sampleRate, sampleCount, -1, -1)) {
589        close(pair[1]);
590        LOGE("cannot initialize device stream");
591        return false;
592    }
593
594    // Give device socket a reasonable timeout.
595    timeval tv;
596    tv.tv_sec = 0;
597    tv.tv_usec = 1000 * sampleCount / sampleRate * 500;
598    if (setsockopt(pair[0], SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) {
599        LOGE("setsockopt: %s", strerror(errno));
600        return false;
601    }
602
603    // Add device stream into event queue.
604    epoll_event event;
605    event.events = EPOLLIN;
606    event.data.ptr = mChain;
607    if (epoll_ctl(mEventQueue, EPOLL_CTL_ADD, pair[1], &event)) {
608        LOGE("epoll_ctl: %s", strerror(errno));
609        return false;
610    }
611
612    // Anything else?
613    LOGD("stream[%d] joins group[%d]", pair[1], pair[0]);
614    return true;
615}
616
617bool AudioGroup::setMode(int mode)
618{
619    if (mode < 0 || mode > LAST_MODE) {
620        return false;
621    }
622    if (mMode == mode) {
623        return true;
624    }
625
626    mDeviceThread->requestExitAndWait();
627    LOGD("group[%d] switches from mode %d to %d", mDeviceSocket, mMode, mode);
628    mMode = mode;
629    return (mode == ON_HOLD) || mDeviceThread->start();
630}
631
632bool AudioGroup::sendDtmf(int event)
633{
634    if (event < 0 || event > 15) {
635        return false;
636    }
637
638    // DTMF is rarely used, so we try to make it as lightweight as possible.
639    // Using volatile might be dodgy, but using a pipe or pthread primitives
640    // or stop-set-restart threads seems too heavy. Will investigate later.
641    timespec ts;
642    ts.tv_sec = 0;
643    ts.tv_nsec = 100000000;
644    for (int i = 0; mDtmfEvent != -1 && i < 20; ++i) {
645        nanosleep(&ts, NULL);
646    }
647    if (mDtmfEvent != -1) {
648        return false;
649    }
650    mDtmfEvent = event;
651    nanosleep(&ts, NULL);
652    return true;
653}
654
655bool AudioGroup::add(AudioStream *stream)
656{
657    mNetworkThread->requestExitAndWait();
658
659    epoll_event event;
660    event.events = EPOLLIN;
661    event.data.ptr = stream;
662    if (epoll_ctl(mEventQueue, EPOLL_CTL_ADD, stream->mSocket, &event)) {
663        LOGE("epoll_ctl: %s", strerror(errno));
664        return false;
665    }
666
667    stream->mNext = mChain->mNext;
668    mChain->mNext = stream;
669    if (!mNetworkThread->start()) {
670        // Only take over the stream when succeeded.
671        mChain->mNext = stream->mNext;
672        return false;
673    }
674
675    LOGD("stream[%d] joins group[%d]", stream->mSocket, mDeviceSocket);
676    return true;
677}
678
679bool AudioGroup::remove(int socket)
680{
681    mNetworkThread->requestExitAndWait();
682
683    for (AudioStream *stream = mChain; stream->mNext; stream = stream->mNext) {
684        AudioStream *target = stream->mNext;
685        if (target->mSocket == socket) {
686            if (epoll_ctl(mEventQueue, EPOLL_CTL_DEL, socket, NULL)) {
687                LOGE("epoll_ctl: %s", strerror(errno));
688                return false;
689            }
690            stream->mNext = target->mNext;
691            LOGD("stream[%d] leaves group[%d]", socket, mDeviceSocket);
692            delete target;
693            break;
694        }
695    }
696
697    // Do not start network thread if there is only one stream.
698    if (!mChain->mNext || !mNetworkThread->start()) {
699        return false;
700    }
701    return true;
702}
703
704bool AudioGroup::NetworkThread::threadLoop()
705{
706    AudioStream *chain = mGroup->mChain;
707    int tick = elapsedRealtime();
708    int deadline = tick + 10;
709    int count = 0;
710
711    for (AudioStream *stream = chain; stream; stream = stream->mNext) {
712        if (tick - stream->mTick >= 0) {
713            stream->encode(tick, chain);
714        }
715        if (deadline - stream->mTick > 0) {
716            deadline = stream->mTick;
717        }
718        ++count;
719    }
720
721    int event = mGroup->mDtmfEvent;
722    if (event != -1) {
723        for (AudioStream *stream = chain; stream; stream = stream->mNext) {
724            stream->sendDtmf(event);
725        }
726        mGroup->mDtmfEvent = -1;
727    }
728
729    deadline -= tick;
730    if (deadline < 1) {
731        deadline = 1;
732    }
733
734    epoll_event events[count];
735    count = epoll_wait(mGroup->mEventQueue, events, count, deadline);
736    if (count == -1) {
737        LOGE("epoll_wait: %s", strerror(errno));
738        return false;
739    }
740    for (int i = 0; i < count; ++i) {
741        ((AudioStream *)events[i].data.ptr)->decode(tick);
742    }
743
744    return true;
745}
746
747bool AudioGroup::DeviceThread::threadLoop()
748{
749    int mode = mGroup->mMode;
750    int sampleRate = mGroup->mSampleRate;
751    int sampleCount = mGroup->mSampleCount;
752    int deviceSocket = mGroup->mDeviceSocket;
753
754    // Find out the frame count for AudioTrack and AudioRecord.
755    int output = 0;
756    int input = 0;
757    if (AudioTrack::getMinFrameCount(&output, AudioSystem::VOICE_CALL,
758        sampleRate) != NO_ERROR || output <= 0 ||
759        AudioRecord::getMinFrameCount(&input, sampleRate,
760        AudioSystem::PCM_16_BIT, 1) != NO_ERROR || input <= 0) {
761        LOGE("cannot compute frame count");
762        return false;
763    }
764    LOGD("reported frame count: output %d, input %d", output, input);
765
766    if (output < sampleCount * 2) {
767        output = sampleCount * 2;
768    }
769    if (input < sampleCount * 2) {
770        input = sampleCount * 2;
771    }
772    LOGD("adjusted frame count: output %d, input %d", output, input);
773
774    // Initialize AudioTrack and AudioRecord.
775    AudioTrack track;
776    AudioRecord record;
777    if (track.set(AudioSystem::VOICE_CALL, sampleRate, AudioSystem::PCM_16_BIT,
778        AudioSystem::CHANNEL_OUT_MONO, output) != NO_ERROR ||
779        record.set(AUDIO_SOURCE_MIC, sampleRate, AudioSystem::PCM_16_BIT,
780        AudioSystem::CHANNEL_IN_MONO, input) != NO_ERROR) {
781        LOGE("cannot initialize audio device");
782        return false;
783    }
784    LOGD("latency: output %d, input %d", track.latency(), record.latency());
785
786    // Initialize echo canceler.
787    EchoSuppressor echo(sampleCount,
788        (track.latency() + record.latency()) * sampleRate / 1000);
789
790    // Give device socket a reasonable buffer size.
791    setsockopt(deviceSocket, SOL_SOCKET, SO_RCVBUF, &output, sizeof(output));
792    setsockopt(deviceSocket, SOL_SOCKET, SO_SNDBUF, &output, sizeof(output));
793
794    // Drain device socket.
795    char c;
796    while (recv(deviceSocket, &c, 1, MSG_DONTWAIT) == 1);
797
798    // Start AudioRecord before AudioTrack. This prevents AudioTrack from being
799    // disabled due to buffer underrun while waiting for AudioRecord.
800    if (mode != MUTED) {
801        record.start();
802        int16_t one;
803        record.read(&one, sizeof(one));
804    }
805    track.start();
806
807    while (!exitPending()) {
808        int16_t output[sampleCount];
809        if (recv(deviceSocket, output, sizeof(output), 0) <= 0) {
810            memset(output, 0, sizeof(output));
811        }
812
813        int16_t input[sampleCount];
814        int toWrite = sampleCount;
815        int toRead = (mode == MUTED) ? 0 : sampleCount;
816        int chances = 100;
817
818        while (--chances > 0 && (toWrite > 0 || toRead > 0)) {
819            if (toWrite > 0) {
820                AudioTrack::Buffer buffer;
821                buffer.frameCount = toWrite;
822
823                status_t status = track.obtainBuffer(&buffer, 1);
824                if (status == NO_ERROR) {
825                    int offset = sampleCount - toWrite;
826                    memcpy(buffer.i8, &output[offset], buffer.size);
827                    toWrite -= buffer.frameCount;
828                    track.releaseBuffer(&buffer);
829                } else if (status != TIMED_OUT && status != WOULD_BLOCK) {
830                    LOGE("cannot write to AudioTrack");
831                    return true;
832                }
833            }
834
835            if (toRead > 0) {
836                AudioRecord::Buffer buffer;
837                buffer.frameCount = toRead;
838
839                status_t status = record.obtainBuffer(&buffer, 1);
840                if (status == NO_ERROR) {
841                    int offset = sampleCount - toRead;
842                    memcpy(&input[offset], buffer.i8, buffer.size);
843                    toRead -= buffer.frameCount;
844                    record.releaseBuffer(&buffer);
845                } else if (status != TIMED_OUT && status != WOULD_BLOCK) {
846                    LOGE("cannot read from AudioRecord");
847                    return true;
848                }
849            }
850        }
851
852        if (chances <= 0) {
853            LOGW("device loop timeout");
854            while (recv(deviceSocket, &c, 1, MSG_DONTWAIT) == 1);
855        }
856
857        if (mode != MUTED) {
858            if (mode == NORMAL) {
859                send(deviceSocket, input, sizeof(input), MSG_DONTWAIT);
860            } else {
861                echo.run(output, input);
862                send(deviceSocket, input, sizeof(input), MSG_DONTWAIT);
863            }
864        }
865    }
866    return false;
867}
868
869//------------------------------------------------------------------------------
870
871static jfieldID gNative;
872static jfieldID gMode;
873
874void add(JNIEnv *env, jobject thiz, jint mode,
875    jint socket, jstring jRemoteAddress, jint remotePort,
876    jstring jCodecSpec, jint dtmfType)
877{
878    AudioCodec *codec = NULL;
879    AudioStream *stream = NULL;
880    AudioGroup *group = NULL;
881
882    // Sanity check.
883    sockaddr_storage remote;
884    if (parse(env, jRemoteAddress, remotePort, &remote) < 0) {
885        // Exception already thrown.
886        return;
887    }
888    if (!jCodecSpec) {
889        jniThrowNullPointerException(env, "codecSpec");
890        return;
891    }
892    const char *codecSpec = env->GetStringUTFChars(jCodecSpec, NULL);
893    if (!codecSpec) {
894        // Exception already thrown.
895        return;
896    }
897
898    // Create audio codec.
899    int codecType = -1;
900    char codecName[16];
901    int sampleRate = -1;
902    sscanf(codecSpec, "%d %15[^/]%*c%d", &codecType, codecName, &sampleRate);
903    codec = newAudioCodec(codecName);
904    int sampleCount = (codec ? codec->set(sampleRate, codecSpec) : -1);
905    env->ReleaseStringUTFChars(jCodecSpec, codecSpec);
906    if (sampleCount <= 0) {
907        jniThrowException(env, "java/lang/IllegalStateException",
908            "cannot initialize audio codec");
909        goto error;
910    }
911
912    // Create audio stream.
913    stream = new AudioStream;
914    if (!stream->set(mode, socket, &remote, codec, sampleRate, sampleCount,
915        codecType, dtmfType)) {
916        jniThrowException(env, "java/lang/IllegalStateException",
917            "cannot initialize audio stream");
918        goto error;
919    }
920    socket = -1;
921    codec = NULL;
922
923    // Create audio group.
924    group = (AudioGroup *)env->GetIntField(thiz, gNative);
925    if (!group) {
926        int mode = env->GetIntField(thiz, gMode);
927        group = new AudioGroup;
928        if (!group->set(8000, 256) || !group->setMode(mode)) {
929            jniThrowException(env, "java/lang/IllegalStateException",
930                "cannot initialize audio group");
931            goto error;
932        }
933    }
934
935    // Add audio stream into audio group.
936    if (!group->add(stream)) {
937        jniThrowException(env, "java/lang/IllegalStateException",
938            "cannot add audio stream");
939        goto error;
940    }
941
942    // Succeed.
943    env->SetIntField(thiz, gNative, (int)group);
944    return;
945
946error:
947    delete group;
948    delete stream;
949    delete codec;
950    close(socket);
951    env->SetIntField(thiz, gNative, NULL);
952}
953
954void remove(JNIEnv *env, jobject thiz, jint socket)
955{
956    AudioGroup *group = (AudioGroup *)env->GetIntField(thiz, gNative);
957    if (group) {
958        if (socket == -1 || !group->remove(socket)) {
959            delete group;
960            env->SetIntField(thiz, gNative, NULL);
961        }
962    }
963}
964
965void setMode(JNIEnv *env, jobject thiz, jint mode)
966{
967    if (mode < 0 || mode > AudioGroup::LAST_MODE) {
968        jniThrowException(env, "java/lang/IllegalArgumentException", NULL);
969        return;
970    }
971    AudioGroup *group = (AudioGroup *)env->GetIntField(thiz, gNative);
972    if (group && !group->setMode(mode)) {
973        jniThrowException(env, "java/lang/IllegalArgumentException", NULL);
974        return;
975    }
976    env->SetIntField(thiz, gMode, mode);
977}
978
979void sendDtmf(JNIEnv *env, jobject thiz, jint event)
980{
981    AudioGroup *group = (AudioGroup *)env->GetIntField(thiz, gNative);
982    if (group && !group->sendDtmf(event)) {
983        jniThrowException(env, "java/lang/IllegalArgumentException", NULL);
984    }
985}
986
987JNINativeMethod gMethods[] = {
988    {"add", "(IILjava/lang/String;ILjava/lang/String;I)V", (void *)add},
989    {"remove", "(I)V", (void *)remove},
990    {"setMode", "(I)V", (void *)setMode},
991    {"sendDtmf", "(I)V", (void *)sendDtmf},
992};
993
994} // namespace
995
996int registerAudioGroup(JNIEnv *env)
997{
998    gRandom = open("/dev/urandom", O_RDONLY);
999    if (gRandom == -1) {
1000        LOGE("urandom: %s", strerror(errno));
1001        return -1;
1002    }
1003
1004    jclass clazz;
1005    if ((clazz = env->FindClass("android/net/rtp/AudioGroup")) == NULL ||
1006        (gNative = env->GetFieldID(clazz, "mNative", "I")) == NULL ||
1007        (gMode = env->GetFieldID(clazz, "mMode", "I")) == NULL ||
1008        env->RegisterNatives(clazz, gMethods, NELEM(gMethods)) < 0) {
1009        LOGE("JNI registration failed");
1010        return -1;
1011    }
1012    return 0;
1013}
1014