1c8b59c046895fa5b6d79f73e0b5817330fcfbfc1A. Unique TensorFlower/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
27202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan
37202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ RyanLicensed under the Apache License, Version 2.0 (the "License");
47202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryanyou may not use this file except in compliance with the License.
57202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ RyanYou may obtain a copy of the License at
67202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan
77202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan    http://www.apache.org/licenses/LICENSE-2.0
87202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan
97202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ RyanUnless required by applicable law or agreed to in writing, software
107202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryandistributed under the License is distributed on an "AS IS" BASIS,
117202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ RyanWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
127202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ RyanSee the License for the specific language governing permissions and
137202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryanlimitations under the License.
147202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan==============================================================================*/
157202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan
167202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan// Functions to write audio in WAV format.
177202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan
187202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan#include <math.h>
197202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan#include <string.h>
207202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan#include <algorithm>
217202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan
227202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan#include "tensorflow/core/lib/core/casts.h"
237202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan#include "tensorflow/core/lib/core/coding.h"
247202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan#include "tensorflow/core/lib/core/errors.h"
257202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan#include "tensorflow/core/lib/wav/wav_io.h"
2646231cf242c19d74af75370eefd9e9b7c504c08aVijay Vasudevan#include "tensorflow/core/platform/cpu_info.h"
277202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan#include "tensorflow/core/platform/logging.h"
287202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan#include "tensorflow/core/platform/macros.h"
297202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan
307202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryannamespace tensorflow {
317202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryannamespace wav {
327202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryannamespace {
337202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan
347202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryanstruct TF_PACKED RiffChunk {
357202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  char chunk_id[4];
367202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  char chunk_data_size[4];
377202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  char riff_type[4];
387202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan};
397202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryanstatic_assert(sizeof(RiffChunk) == 12, "TF_PACKED does not work.");
407202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan
417202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryanstruct TF_PACKED FormatChunk {
427202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  char chunk_id[4];
437202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  char chunk_data_size[4];
447202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  char compression_code[2];
457202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  char channel_numbers[2];
467202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  char sample_rate[4];
477202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  char bytes_per_second[4];
487202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  char bytes_per_frame[2];
497202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  char bits_per_sample[2];
507202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan};
517202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryanstatic_assert(sizeof(FormatChunk) == 24, "TF_PACKED does not work.");
527202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan
537202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryanstruct TF_PACKED DataChunk {
547202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  char chunk_id[4];
557202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  char chunk_data_size[4];
567202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan};
577202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryanstatic_assert(sizeof(DataChunk) == 8, "TF_PACKED does not work.");
587202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan
597202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryanstruct TF_PACKED WavHeader {
607202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  RiffChunk riff_chunk;
617202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  FormatChunk format_chunk;
627202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  DataChunk data_chunk;
637202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan};
647202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryanstatic_assert(sizeof(WavHeader) ==
657202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan                  sizeof(RiffChunk) + sizeof(FormatChunk) + sizeof(DataChunk),
667202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan              "TF_PACKED does not work.");
677202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan
68ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Wardenconstexpr char kRiffChunkId[] = "RIFF";
69ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Wardenconstexpr char kRiffType[] = "WAVE";
70ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Wardenconstexpr char kFormatChunkId[] = "fmt ";
71ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Wardenconstexpr char kDataChunkId[] = "data";
72ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden
737202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryaninline int16 FloatToInt16Sample(float data) {
747202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  constexpr float kMultiplier = 1.0f * (1 << 15);
757202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  return std::min<float>(std::max<float>(roundf(data * kMultiplier), kint16min),
767202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan                         kint16max);
777202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan}
787202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan
79ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Wardeninline float Int16SampleToFloat(int16 data) {
80ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  constexpr float kMultiplier = 1.0f / (1 << 15);
81ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  return data * kMultiplier;
82ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden}
83ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden
84ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete WardenStatus ExpectText(const string& data, const string& expected_text,
85ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden                  int* offset) {
86ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  const int new_offset = *offset + expected_text.size();
87ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  if (new_offset > data.size()) {
88ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden    return errors::InvalidArgument("Data too short when trying to read ",
89ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden                                   expected_text);
90ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  }
91ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  const string found_text(data.begin() + *offset, data.begin() + new_offset);
92ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  if (found_text != expected_text) {
93ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden    return errors::InvalidArgument("Header mismatch: Expected ", expected_text,
94ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden                                   " but found ", found_text);
95ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  }
96ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  *offset = new_offset;
97ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  return Status::OK();
98ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden}
99ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden
100ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Wardentemplate <class T>
101ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete WardenStatus ReadValue(const string& data, T* value, int* offset) {
102ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  const int new_offset = *offset + sizeof(T);
103ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  if (new_offset > data.size()) {
104ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden    return errors::InvalidArgument("Data too short when trying to read value");
105ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  }
106ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  if (port::kLittleEndian) {
107ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden    memcpy(value, data.data() + *offset, sizeof(T));
108ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  } else {
109ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden    *value = 0;
110ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden    const uint8* data_buf =
111ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden        reinterpret_cast<const uint8*>(data.data() + *offset);
112ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden    int shift = 0;
113ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden    for (int i = 0; i < sizeof(T); ++i, shift += 8) {
114de01be952d6dbd63f855796fa165e4de844cb43anamrata-ibm      *value = *value | (data_buf[i] << shift);
115ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden    }
116ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  }
117ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  *offset = new_offset;
118ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  return Status::OK();
119ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden}
120ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden
1210c6fd1703eb8f990c8b071471b0105339ccf821dPete WardenStatus ReadString(const string& data, int expected_length, string* value,
1220c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden                  int* offset) {
1230c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden  const int new_offset = *offset + expected_length;
1240c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden  if (new_offset > data.size()) {
1250c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden    return errors::InvalidArgument("Data too short when trying to read string");
1260c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden  }
1270c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden  *value = string(data.begin() + *offset, data.begin() + new_offset);
1280c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden  *offset = new_offset;
1290c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden  return Status::OK();
1300c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden}
1310c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden
1327202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan}  // namespace
1337202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan
1347202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ RyanStatus EncodeAudioAsS16LEWav(const float* audio, size_t sample_rate,
1357202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan                             size_t num_channels, size_t num_frames,
1367202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan                             string* wav_string) {
1377202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  constexpr size_t kFormatChunkSize = 16;
1387202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  constexpr size_t kCompressionCodePcm = 1;
1397202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  constexpr size_t kBitsPerSample = 16;
1407202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  constexpr size_t kBytesPerSample = kBitsPerSample / 8;
1417202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  constexpr size_t kHeaderSize = sizeof(WavHeader);
1427202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan
1437202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  if (audio == nullptr) {
1447202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan    return errors::InvalidArgument("audio is null");
1457202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  }
1467202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  if (wav_string == nullptr) {
1477202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan    return errors::InvalidArgument("wav_string is null");
1487202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  }
1497202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  if (sample_rate == 0 || sample_rate > kuint32max) {
1507202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan    return errors::InvalidArgument("sample_rate must be in (0, 2^32), got: ",
1517202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan                                   sample_rate);
1527202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  }
1537202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  if (num_channels == 0 || num_channels > kuint16max) {
1547202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan    return errors::InvalidArgument("num_channels must be in (0, 2^16), got: ",
1557202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan                                   num_channels);
1567202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  }
1577202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  if (num_frames == 0) {
1587202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan    return errors::InvalidArgument("num_frames must be positive.");
1597202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  }
1607202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan
161a0ffaf3caa0234653035a692858606c7bdacd63bFrank Chen  const size_t bytes_per_second = sample_rate * kBytesPerSample * num_channels;
1627202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  const size_t num_samples = num_frames * num_channels;
1637202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  const size_t data_size = num_samples * kBytesPerSample;
1647202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  const size_t file_size = kHeaderSize + num_samples * kBytesPerSample;
1657202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  const size_t bytes_per_frame = kBytesPerSample * num_channels;
1667202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan
1677202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  // WAV represents the length of the file as a uint32 so file_size cannot
1687202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  // exceed kuint32max.
1697202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  if (file_size > kuint32max) {
1707202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan    return errors::InvalidArgument(
1717202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan        "Provided channels and frames cannot be encoded as a WAV.");
1727202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  }
1737202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan
1747202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  wav_string->resize(file_size);
1757202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  char* data = &wav_string->at(0);
1767202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  WavHeader* header = bit_cast<WavHeader*>(data);
1777202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan
1787202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  // Fill RIFF chunk.
1797202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  auto* riff_chunk = &header->riff_chunk;
1807202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  memcpy(riff_chunk->chunk_id, kRiffChunkId, 4);
1817202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  core::EncodeFixed32(riff_chunk->chunk_data_size, file_size - 8);
1827202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  memcpy(riff_chunk->riff_type, kRiffType, 4);
1837202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan
1847202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  // Fill format chunk.
1857202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  auto* format_chunk = &header->format_chunk;
1867202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  memcpy(format_chunk->chunk_id, kFormatChunkId, 4);
1877202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  core::EncodeFixed32(format_chunk->chunk_data_size, kFormatChunkSize);
1887202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  core::EncodeFixed16(format_chunk->compression_code, kCompressionCodePcm);
1897202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  core::EncodeFixed16(format_chunk->channel_numbers, num_channels);
1907202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  core::EncodeFixed32(format_chunk->sample_rate, sample_rate);
1917202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  core::EncodeFixed32(format_chunk->bytes_per_second, bytes_per_second);
1927202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  core::EncodeFixed16(format_chunk->bytes_per_frame, bytes_per_frame);
1937202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  core::EncodeFixed16(format_chunk->bits_per_sample, kBitsPerSample);
1947202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan
1957202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  // Fill data chunk.
1967202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  auto* data_chunk = &header->data_chunk;
1977202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  memcpy(data_chunk->chunk_id, kDataChunkId, 4);
1987202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  core::EncodeFixed32(data_chunk->chunk_data_size, data_size);
1997202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan
2007202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  // Write the audio.
2017202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  data += kHeaderSize;
2027202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  for (size_t i = 0; i < num_samples; ++i) {
2037202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan    int16 sample = FloatToInt16Sample(audio[i]);
2047202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan    core::EncodeFixed16(&data[i * kBytesPerSample],
2057202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan                        static_cast<uint16>(sample));
2067202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  }
2077202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan  return Status::OK();
2087202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan}
2097202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan
210ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete WardenStatus DecodeLin16WaveAsFloatVector(const string& wav_string,
211ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden                                    std::vector<float>* float_values,
212ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden                                    uint32* sample_count, uint16* channel_count,
213ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden                                    uint32* sample_rate) {
214ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  int offset = 0;
215ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  TF_RETURN_IF_ERROR(ExpectText(wav_string, kRiffChunkId, &offset));
216ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  uint32 total_file_size;
217ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  TF_RETURN_IF_ERROR(ReadValue<uint32>(wav_string, &total_file_size, &offset));
218ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  TF_RETURN_IF_ERROR(ExpectText(wav_string, kRiffType, &offset));
219ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  TF_RETURN_IF_ERROR(ExpectText(wav_string, kFormatChunkId, &offset));
220ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  uint32 format_chunk_size;
221ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  TF_RETURN_IF_ERROR(
222ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden      ReadValue<uint32>(wav_string, &format_chunk_size, &offset));
223ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  if ((format_chunk_size != 16) && (format_chunk_size != 18)) {
224ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden    return errors::InvalidArgument(
225ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden        "Bad file size for WAV: Expected 16 or 18, but got", format_chunk_size);
226ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  }
227ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  uint16 audio_format;
228ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  TF_RETURN_IF_ERROR(ReadValue<uint16>(wav_string, &audio_format, &offset));
229ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  if (audio_format != 1) {
230ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden    return errors::InvalidArgument(
231ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden        "Bad audio format for WAV: Expected 1 (PCM), but got", audio_format);
232ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  }
233ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  TF_RETURN_IF_ERROR(ReadValue<uint16>(wav_string, channel_count, &offset));
234ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  TF_RETURN_IF_ERROR(ReadValue<uint32>(wav_string, sample_rate, &offset));
235ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  uint32 bytes_per_second;
236ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  TF_RETURN_IF_ERROR(ReadValue<uint32>(wav_string, &bytes_per_second, &offset));
237ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  uint16 bytes_per_sample;
238ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  TF_RETURN_IF_ERROR(ReadValue<uint16>(wav_string, &bytes_per_sample, &offset));
239ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  // Confusingly, bits per sample is defined as holding the number of bits for
240ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  // one channel, unlike the definition of sample used elsewhere in the WAV
241ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  // spec. For example, bytes per sample is the memory needed for all channels
242ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  // for one point in time.
243ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  uint16 bits_per_sample;
244ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  TF_RETURN_IF_ERROR(ReadValue<uint16>(wav_string, &bits_per_sample, &offset));
245ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  if (bits_per_sample != 16) {
246ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden    return errors::InvalidArgument(
247ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden        "Can only read 16-bit WAV files, but received ", bits_per_sample);
248ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  }
249ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  const uint32 expected_bytes_per_sample =
250ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden      ((bits_per_sample * *channel_count) + 7) / 8;
251ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  if (bytes_per_sample != expected_bytes_per_sample) {
252ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden    return errors::InvalidArgument(
253ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden        "Bad bytes per sample in WAV header: Expected ",
254ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden        expected_bytes_per_sample, " but got ", bytes_per_sample);
255ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  }
256a0ffaf3caa0234653035a692858606c7bdacd63bFrank Chen  const uint32 expected_bytes_per_second = bytes_per_sample * *sample_rate;
257ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  if (bytes_per_second != expected_bytes_per_second) {
258ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden    return errors::InvalidArgument(
259ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden        "Bad bytes per second in WAV header: Expected ",
260ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden        expected_bytes_per_second, " but got ", bytes_per_second,
261ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden        " (sample_rate=", *sample_rate, ", bytes_per_sample=", bytes_per_sample,
262ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden        ")");
263ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  }
264ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  if (format_chunk_size == 18) {
265ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden    // Skip over this unused section.
266ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden    offset += 2;
267ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  }
2680c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden
2690c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden  bool was_data_found = false;
2700c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden  while (offset < wav_string.size()) {
2710c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden    string chunk_id;
2720c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden    TF_RETURN_IF_ERROR(ReadString(wav_string, 4, &chunk_id, &offset));
2730c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden    uint32 chunk_size;
2740c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden    TF_RETURN_IF_ERROR(ReadValue<uint32>(wav_string, &chunk_size, &offset));
2750c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden    if (chunk_id == kDataChunkId) {
2760c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden      if (was_data_found) {
2770c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden        return errors::InvalidArgument("More than one data chunk found in WAV");
2780c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden      }
2790c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden      was_data_found = true;
2800c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden      *sample_count = chunk_size / bytes_per_sample;
2810c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden      const uint32 data_count = *sample_count * *channel_count;
2820c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden      float_values->resize(data_count);
2830c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden      for (int i = 0; i < data_count; ++i) {
2840c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden        int16 single_channel_value = 0;
2850c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden        TF_RETURN_IF_ERROR(
2860c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden            ReadValue<int16>(wav_string, &single_channel_value, &offset));
2870c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden        (*float_values)[i] = Int16SampleToFloat(single_channel_value);
2880c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden      }
2890c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden    } else {
2900c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden      offset += chunk_size;
2910c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden    }
2920c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden  }
2930c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden  if (!was_data_found) {
2940c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden    return errors::InvalidArgument("No data chunk found in WAV");
295ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  }
296ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden  return Status::OK();
297ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden}
298ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden
2997202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan}  // namespace wav
3007202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan}  // namespace tensorflow
301