1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "content/renderer/media/android/audio_decoder_android.h"
6
7#include <errno.h>
8#include <fcntl.h>
9#include <limits.h>
10#include <sys/mman.h>
11#include <unistd.h>
12#include <vector>
13
14#include "base/file_descriptor_posix.h"
15#include "base/logging.h"
16#include "base/memory/shared_memory.h"
17#include "base/posix/eintr_wrapper.h"
18#include "content/common/view_messages.h"
19#include "media/base/android/webaudio_media_codec_info.h"
20#include "media/base/audio_bus.h"
21#include "media/base/limits.h"
22#include "third_party/WebKit/public/platform/WebAudioBus.h"
23
24namespace content {
25
26class AudioDecoderIO {
27 public:
28  AudioDecoderIO(const char* data, size_t data_size);
29  ~AudioDecoderIO();
30  bool ShareEncodedToProcess(base::SharedMemoryHandle* handle);
31
32  // Returns true if AudioDecoderIO was successfully created.
33  bool IsValid() const;
34
35  int read_fd() const { return read_fd_; }
36  int write_fd() const { return write_fd_; }
37
38 private:
39  // Shared memory that will hold the encoded audio data.  This is
40  // used by MediaCodec for decoding.
41  base::SharedMemory encoded_shared_memory_;
42
43  // A pipe used to communicate with MediaCodec.  MediaCodec owns
44  // write_fd_ and writes to it.
45  int read_fd_;
46  int write_fd_;
47
48  DISALLOW_COPY_AND_ASSIGN(AudioDecoderIO);
49};
50
51AudioDecoderIO::AudioDecoderIO(const char* data, size_t data_size)
52    : read_fd_(-1),
53      write_fd_(-1) {
54
55  if (!data || !data_size || data_size > 0x80000000)
56    return;
57
58  // Create the shared memory and copy our data to it so that
59  // MediaCodec can access it.
60  encoded_shared_memory_.CreateAndMapAnonymous(data_size);
61
62  if (!encoded_shared_memory_.memory())
63    return;
64
65  memcpy(encoded_shared_memory_.memory(), data, data_size);
66
67  // Create a pipe for reading/writing the decoded PCM data
68  int pipefd[2];
69
70  if (pipe(pipefd))
71    return;
72
73  read_fd_ = pipefd[0];
74  write_fd_ = pipefd[1];
75}
76
77AudioDecoderIO::~AudioDecoderIO() {
78  // Close the read end of the pipe.  The write end should have been
79  // closed by MediaCodec.
80  if (read_fd_ >= 0 && close(read_fd_)) {
81    DVLOG(1) << "Cannot close read fd " << read_fd_
82             << ": " << strerror(errno);
83  }
84}
85
86bool AudioDecoderIO::IsValid() const {
87  return read_fd_ >= 0 && write_fd_ >= 0 &&
88      encoded_shared_memory_.memory();
89}
90
91bool AudioDecoderIO::ShareEncodedToProcess(base::SharedMemoryHandle* handle) {
92  return encoded_shared_memory_.ShareToProcess(
93      base::Process::Current().handle(),
94      handle);
95}
96
97static float ConvertSampleToFloat(int16_t sample) {
98  const float kMaxScale = 1.0f / std::numeric_limits<int16_t>::max();
99  const float kMinScale = -1.0f / std::numeric_limits<int16_t>::min();
100
101  return sample * (sample < 0 ? kMinScale : kMaxScale);
102}
103
104// A basic WAVE file decoder.  See
105// https://ccrma.stanford.edu/courses/422/projects/WaveFormat/ for a
106// basic guide to the WAVE file format.
107class WAVEDecoder {
108 public:
109  WAVEDecoder(const uint8* data, size_t data_size);
110  ~WAVEDecoder();
111
112  // Try to decode the data as a WAVE file.  If the data is a supported
113  // WAVE file, |destination_bus| is filled with the decoded data and
114  // DecodeWAVEFile returns true.  Otherwise, DecodeWAVEFile returns
115  // false.
116  bool DecodeWAVEFile(blink::WebAudioBus* destination_bus);
117
118 private:
119  // Minimum number of bytes in a WAVE file to hold all of the data we
120  // need to interpret it as a WAVE file.
121  static const unsigned kMinimumWAVLength = 44;
122
123  // Number of bytes in the chunk ID field.
124  static const unsigned kChunkIDLength = 4;
125
126  // Number of bytes in the chunk size field.
127  static const unsigned kChunkSizeLength = 4;
128
129  // Number of bytes in the format field of the "RIFF" chunk.
130  static const unsigned kFormatFieldLength = 4;
131
132  // Number of bytes in a valid "fmt" chunk.
133  static const unsigned kFMTChunkLength = 16;
134
135  // Supported audio format in a WAVE file.
136  // TODO(rtoy): Consider supporting other formats here, if necessary.
137  static const int16_t kAudioFormatPCM = 1;
138
139  // Maximum number (inclusive) of bytes per sample supported by this
140  // decoder.
141  static const unsigned kMaximumBytesPerSample = 3;
142
143  // Read an unsigned integer of |length| bytes from |buffer|.  The
144  // integer is interpreted as being in little-endian order.
145  uint32_t ReadUnsignedInteger(const uint8_t* buffer, size_t length);
146
147  // Read a PCM sample from the WAVE data at |pcm_data|.
148  int16_t ReadPCMSample(const uint8_t* pcm_data);
149
150  // Read a WAVE chunk header including the chunk ID and chunk size.
151  // Returns false if the header could not be read.
152  bool ReadChunkHeader();
153
154  // Read and parse the "fmt" chunk.  Returns false if the fmt chunk
155  // could not be read or contained unsupported formats.
156  bool ReadFMTChunk();
157
158  // Read data chunk and save it to |destination_bus|.  Returns false
159  // if the data chunk could not be read correctly.
160  bool CopyDataChunkToBus(blink::WebAudioBus* destination_bus);
161
162  // The WAVE chunk ID that identifies the chunk.
163  uint8_t chunk_id_[kChunkIDLength];
164
165  // The number of bytes in the data portion of the chunk.
166  size_t chunk_size_;
167
168  // The total number of bytes in the encoded data.
169  size_t data_size_;
170
171  // The current position within the WAVE file.
172  const uint8_t* buffer_;
173
174  // Points one byte past the end of the in-memory WAVE file.  Used for
175  // detecting if we've reached the end of the file.
176  const uint8_t* buffer_end_;
177
178  size_t bytes_per_sample_;
179
180  uint16_t number_of_channels_;
181
182  // Sample rate of the WAVE data, in Hz.
183  uint32_t sample_rate_;
184
185  DISALLOW_COPY_AND_ASSIGN(WAVEDecoder);
186};
187
188WAVEDecoder::WAVEDecoder(const uint8_t* encoded_data, size_t data_size)
189    : data_size_(data_size),
190      buffer_(encoded_data),
191      buffer_end_(encoded_data + 1),
192      bytes_per_sample_(0),
193      number_of_channels_(0),
194      sample_rate_(0) {
195  if (buffer_ + data_size > buffer_)
196    buffer_end_ = buffer_ + data_size;
197}
198
199WAVEDecoder::~WAVEDecoder() {}
200
201uint32_t WAVEDecoder::ReadUnsignedInteger(const uint8_t* buffer,
202                                          size_t length) {
203  unsigned value = 0;
204
205  if (length == 0 || length > sizeof(value)) {
206    DCHECK(false) << "ReadUnsignedInteger: Invalid length: " << length;
207    return 0;
208  }
209
210  // All integer fields in a WAVE file are little-endian.
211  for (size_t k = length; k > 0; --k)
212    value = (value << 8) + buffer[k - 1];
213
214  return value;
215}
216
217int16_t WAVEDecoder::ReadPCMSample(const uint8_t* pcm_data) {
218  uint32_t unsigned_sample = ReadUnsignedInteger(pcm_data, bytes_per_sample_);
219  int16_t sample;
220
221  // Convert the unsigned data into a 16-bit PCM sample.
222  switch (bytes_per_sample_) {
223    case 1:
224      sample = (unsigned_sample - 128) << 8;
225      break;
226    case 2:
227      sample = static_cast<int16_t>(unsigned_sample);
228      break;
229    case 3:
230      // Android currently converts 24-bit WAVE data into 16-bit
231      // samples by taking the high-order 16 bits without rounding.
232      // We do the same here for consistency.
233      sample = static_cast<int16_t>(unsigned_sample >> 8);
234      break;
235    default:
236      sample = 0;
237      break;
238  }
239  return sample;
240}
241
242bool WAVEDecoder::ReadChunkHeader() {
243  if (buffer_ + kChunkIDLength + kChunkSizeLength >= buffer_end_)
244    return false;
245
246  memcpy(chunk_id_, buffer_, kChunkIDLength);
247
248  chunk_size_ = ReadUnsignedInteger(buffer_ + kChunkIDLength, kChunkSizeLength);
249
250  // Adjust for padding
251  if (chunk_size_ % 2)
252    ++chunk_size_;
253
254  // Check for completely bogus chunk size.
255  if (chunk_size_ > data_size_)
256    return false;
257
258  return true;
259}
260
261bool WAVEDecoder::ReadFMTChunk() {
262  // The fmt chunk has basic info about the format of the audio
263  // data.  Only a basic PCM format is supported.
264  if (chunk_size_ < kFMTChunkLength) {
265    DVLOG(1) << "FMT chunk too short: " << chunk_size_;
266    return 0;
267  }
268
269  uint16_t audio_format = ReadUnsignedInteger(buffer_, 2);
270
271  if (audio_format != kAudioFormatPCM) {
272    DVLOG(1) << "Audio format not supported: " << audio_format;
273    return false;
274  }
275
276  number_of_channels_ = ReadUnsignedInteger(buffer_ + 2, 2);
277  sample_rate_ = ReadUnsignedInteger(buffer_ + 4, 4);
278  unsigned bits_per_sample = ReadUnsignedInteger(buffer_ + 14, 2);
279
280  // Sanity checks.
281
282  if (!number_of_channels_ ||
283      number_of_channels_ > media::limits::kMaxChannels) {
284    DVLOG(1) << "Unsupported number of channels: " << number_of_channels_;
285    return false;
286  }
287
288  if (sample_rate_ < media::limits::kMinSampleRate ||
289      sample_rate_ > media::limits::kMaxSampleRate) {
290    DVLOG(1) << "Unsupported sample rate: " << sample_rate_;
291    return false;
292  }
293
294  // We only support 8, 16, and 24 bits per sample.
295  if (bits_per_sample == 8 || bits_per_sample == 16 || bits_per_sample == 24) {
296    bytes_per_sample_ = bits_per_sample / 8;
297    return true;
298  }
299
300  DVLOG(1) << "Unsupported bits per sample: " << bits_per_sample;
301  return false;
302}
303
304bool WAVEDecoder::CopyDataChunkToBus(blink::WebAudioBus* destination_bus) {
305  // The data chunk contains the audio data itself.
306  if (!bytes_per_sample_ || bytes_per_sample_ > kMaximumBytesPerSample) {
307    DVLOG(1) << "WARNING: data chunk without preceeding fmt chunk,"
308             << " or invalid bytes per sample.";
309    return false;
310  }
311
312  VLOG(0) << "Decoding WAVE file: " << number_of_channels_ << " channels, "
313          << sample_rate_ << " kHz, "
314          << chunk_size_ / bytes_per_sample_ / number_of_channels_
315          << " frames, " << 8 * bytes_per_sample_ << " bits/sample";
316
317  // Create the destination bus of the appropriate size and then decode
318  // the data into the bus.
319  size_t number_of_frames =
320      chunk_size_ / bytes_per_sample_ / number_of_channels_;
321
322  destination_bus->initialize(
323      number_of_channels_, number_of_frames, sample_rate_);
324
325  for (size_t m = 0; m < number_of_frames; ++m) {
326    for (uint16_t k = 0; k < number_of_channels_; ++k) {
327      int16_t sample = ReadPCMSample(buffer_);
328
329      buffer_ += bytes_per_sample_;
330      destination_bus->channelData(k)[m] = ConvertSampleToFloat(sample);
331    }
332  }
333
334  return true;
335}
336
337bool WAVEDecoder::DecodeWAVEFile(blink::WebAudioBus* destination_bus) {
338  // Parse and decode WAVE file. If we can't parse it, return false.
339
340  if (buffer_ + kMinimumWAVLength > buffer_end_) {
341    DVLOG(1) << "Buffer too small to contain full WAVE header: ";
342    return false;
343  }
344
345  // Do we have a RIFF file?
346  ReadChunkHeader();
347  if (memcmp(chunk_id_, "RIFF", kChunkIDLength) != 0) {
348    DVLOG(1) << "RIFF missing";
349    return false;
350  }
351  buffer_ += kChunkIDLength + kChunkSizeLength;
352
353  // Check the format field of the RIFF chunk
354  memcpy(chunk_id_, buffer_, kFormatFieldLength);
355  if (memcmp(chunk_id_, "WAVE", kFormatFieldLength) != 0) {
356    DVLOG(1) << "Invalid WAVE file:  missing WAVE header";
357    return false;
358  }
359  // Advance past the format field
360  buffer_ += kFormatFieldLength;
361
362  // We have a WAVE file.  Start parsing the chunks.
363
364  while (buffer_ < buffer_end_) {
365    if (!ReadChunkHeader()) {
366      DVLOG(1) << "Couldn't read chunk header";
367      return false;
368    }
369
370    // Consume the chunk ID and chunk size
371    buffer_ += kChunkIDLength + kChunkSizeLength;
372
373    // Make sure we can read all chunk_size bytes.
374    if (buffer_ + chunk_size_ > buffer_end_) {
375      DVLOG(1) << "Insufficient bytes to read chunk of size " << chunk_size_;
376      return false;
377    }
378
379    if (memcmp(chunk_id_, "fmt ", kChunkIDLength) == 0) {
380      if (!ReadFMTChunk())
381        return false;
382    } else if (memcmp(chunk_id_, "data", kChunkIDLength) == 0) {
383      // Return after reading the data chunk, whether we succeeded or
384      // not.
385      return CopyDataChunkToBus(destination_bus);
386    } else {
387      // Ignore these chunks that we don't know about.
388      DVLOG(0) << "Ignoring WAVE chunk `" << chunk_id_ << "' size "
389               << chunk_size_;
390    }
391
392    // Advance to next chunk.
393    buffer_ += chunk_size_;
394  }
395
396  // If we get here, that means we didn't find a data chunk, so we
397  // couldn't handle this WAVE file.
398
399  return false;
400}
401
402// The number of frames is known so preallocate the destination
403// bus and copy the pcm data to the destination bus as it's being
404// received.
405static void CopyPcmDataToBus(int input_fd,
406                             blink::WebAudioBus* destination_bus,
407                             size_t number_of_frames,
408                             unsigned number_of_channels,
409                             double file_sample_rate) {
410  destination_bus->initialize(number_of_channels,
411                              number_of_frames,
412                              file_sample_rate);
413
414  int16_t pipe_data[PIPE_BUF / sizeof(int16_t)];
415  size_t decoded_frames = 0;
416  size_t current_sample_in_frame = 0;
417  ssize_t nread;
418
419  while ((nread = HANDLE_EINTR(read(input_fd, pipe_data, sizeof(pipe_data)))) >
420         0) {
421    size_t samples_in_pipe = nread / sizeof(int16_t);
422
423    // The pipe may not contain a whole number of frames.  This is
424    // especially true if the number of channels is greater than
425    // 2. Thus, keep track of which sample in a frame is being
426    // processed, so we handle the boundary at the end of the pipe
427    // correctly.
428    for (size_t m = 0; m < samples_in_pipe; ++m) {
429      if (decoded_frames >= number_of_frames)
430        break;
431
432      destination_bus->channelData(current_sample_in_frame)[decoded_frames] =
433          ConvertSampleToFloat(pipe_data[m]);
434      ++current_sample_in_frame;
435
436      if (current_sample_in_frame >= number_of_channels) {
437        current_sample_in_frame = 0;
438        ++decoded_frames;
439      }
440    }
441  }
442
443  // number_of_frames is only an estimate.  Resize the buffer with the
444  // actual number of received frames.
445  if (decoded_frames < number_of_frames)
446    destination_bus->resizeSmaller(decoded_frames);
447}
448
449// The number of frames is unknown, so keep reading and buffering
450// until there's no more data and then copy the data to the
451// destination bus.
452static void BufferAndCopyPcmDataToBus(int input_fd,
453                                      blink::WebAudioBus* destination_bus,
454                                      unsigned number_of_channels,
455                                      double file_sample_rate) {
456  int16_t pipe_data[PIPE_BUF / sizeof(int16_t)];
457  std::vector<int16_t> decoded_samples;
458  ssize_t nread;
459
460  while ((nread = HANDLE_EINTR(read(input_fd, pipe_data, sizeof(pipe_data)))) >
461         0) {
462    size_t samples_in_pipe = nread / sizeof(int16_t);
463    if (decoded_samples.size() + samples_in_pipe > decoded_samples.capacity()) {
464      decoded_samples.reserve(std::max(samples_in_pipe,
465                                       2 * decoded_samples.capacity()));
466    }
467    std::copy(pipe_data,
468              pipe_data + samples_in_pipe,
469              back_inserter(decoded_samples));
470  }
471
472  DVLOG(1) << "Total samples read = " << decoded_samples.size();
473
474  // Convert the samples and save them in the audio bus.
475  size_t number_of_samples = decoded_samples.size();
476  size_t number_of_frames = decoded_samples.size() / number_of_channels;
477  size_t decoded_frames = 0;
478
479  destination_bus->initialize(number_of_channels,
480                              number_of_frames,
481                              file_sample_rate);
482
483  for (size_t m = 0; m < number_of_samples; m += number_of_channels) {
484    for (size_t k = 0; k < number_of_channels; ++k) {
485      int16_t sample = decoded_samples[m + k];
486      destination_bus->channelData(k)[decoded_frames] =
487          ConvertSampleToFloat(sample);
488    }
489    ++decoded_frames;
490  }
491
492  // number_of_frames is only an estimate.  Resize the buffer with the
493  // actual number of received frames.
494  if (decoded_frames < number_of_frames)
495    destination_bus->resizeSmaller(decoded_frames);
496}
497
498static bool TryWAVEFileDecoder(blink::WebAudioBus* destination_bus,
499                               const uint8_t* encoded_data,
500                               size_t data_size) {
501  WAVEDecoder decoder(encoded_data, data_size);
502
503  return decoder.DecodeWAVEFile(destination_bus);
504}
505
506// To decode audio data, we want to use the Android MediaCodec class.
507// But this can't run in a sandboxed process so we need initiate the
508// request to MediaCodec in the browser.  To do this, we create a
509// shared memory buffer that holds the audio data.  We send a message
510// to the browser to start the decoder using this buffer and one end
511// of a pipe.  The MediaCodec class will decode the data from the
512// shared memory and write the PCM samples back to us over a pipe.
513bool DecodeAudioFileData(blink::WebAudioBus* destination_bus, const char* data,
514                         size_t data_size,
515                         scoped_refptr<ThreadSafeSender> sender) {
516  // Try to decode the data as a WAVE file first.  If it can't be
517  // decoded, use MediaCodec.  See crbug.com/259048.
518  if (TryWAVEFileDecoder(
519          destination_bus, reinterpret_cast<const uint8_t*>(data), data_size)) {
520    return true;
521  }
522
523  AudioDecoderIO audio_decoder(data, data_size);
524
525  if (!audio_decoder.IsValid())
526    return false;
527
528  base::SharedMemoryHandle encoded_data_handle;
529  audio_decoder.ShareEncodedToProcess(&encoded_data_handle);
530  base::FileDescriptor fd(audio_decoder.write_fd(), true);
531
532  DVLOG(1) << "DecodeAudioFileData: Starting MediaCodec";
533
534  // Start MediaCodec processing in the browser which will read from
535  // encoded_data_handle for our shared memory and write the decoded
536  // PCM samples (16-bit integer) to our pipe.
537
538  sender->Send(new ViewHostMsg_RunWebAudioMediaCodec(
539      encoded_data_handle, fd, data_size));
540
541  // First, read the number of channels, the sample rate, and the
542  // number of frames and a flag indicating if the file is an
543  // ogg/vorbis file.  This must be coordinated with
544  // WebAudioMediaCodecBridge!
545  //
546  // If we know the number of samples, we can create the destination
547  // bus directly and do the conversion directly to the bus instead of
548  // buffering up everything before saving the data to the bus.
549
550  int input_fd = audio_decoder.read_fd();
551  struct media::WebAudioMediaCodecInfo info;
552
553  DVLOG(1) << "Reading audio file info from fd " << input_fd;
554  ssize_t nread = HANDLE_EINTR(read(input_fd, &info, sizeof(info)));
555  DVLOG(1) << "read:  " << nread << " bytes:\n"
556           << " 0: number of channels = " << info.channel_count << "\n"
557           << " 1: sample rate        = " << info.sample_rate << "\n"
558           << " 2: number of frames   = " << info.number_of_frames << "\n";
559
560  if (nread != sizeof(info))
561    return false;
562
563  unsigned number_of_channels = info.channel_count;
564  double file_sample_rate = static_cast<double>(info.sample_rate);
565  size_t number_of_frames = info.number_of_frames;
566
567  // Sanity checks
568  if (!number_of_channels ||
569      number_of_channels > media::limits::kMaxChannels ||
570      file_sample_rate < media::limits::kMinSampleRate ||
571      file_sample_rate > media::limits::kMaxSampleRate) {
572    return false;
573  }
574
575  if (number_of_frames > 0) {
576    CopyPcmDataToBus(input_fd,
577                     destination_bus,
578                     number_of_frames,
579                     number_of_channels,
580                     file_sample_rate);
581  } else {
582    BufferAndCopyPcmDataToBus(input_fd,
583                              destination_bus,
584                              number_of_channels,
585                              file_sample_rate);
586  }
587
588  return true;
589}
590
591}  // namespace content
592