1c8b59c046895fa5b6d79f73e0b5817330fcfbfc1A. Unique TensorFlower/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 27202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan 37202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ RyanLicensed under the Apache License, Version 2.0 (the "License"); 47202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryanyou may not use this file except in compliance with the License. 57202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ RyanYou may obtain a copy of the License at 67202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan 77202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan http://www.apache.org/licenses/LICENSE-2.0 87202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan 97202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ RyanUnless required by applicable law or agreed to in writing, software 107202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryandistributed under the License is distributed on an "AS IS" BASIS, 117202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ RyanWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 127202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ RyanSee the License for the specific language governing permissions and 137202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryanlimitations under the License. 147202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan==============================================================================*/ 157202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan 167202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan// Functions to write audio in WAV format. 177202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan 187202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan#include <math.h> 197202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan#include <string.h> 207202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan#include <algorithm> 217202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan 227202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan#include "tensorflow/core/lib/core/casts.h" 237202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan#include "tensorflow/core/lib/core/coding.h" 247202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan#include "tensorflow/core/lib/core/errors.h" 257202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan#include "tensorflow/core/lib/wav/wav_io.h" 2646231cf242c19d74af75370eefd9e9b7c504c08aVijay Vasudevan#include "tensorflow/core/platform/cpu_info.h" 277202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan#include "tensorflow/core/platform/logging.h" 287202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan#include "tensorflow/core/platform/macros.h" 297202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan 307202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryannamespace tensorflow { 317202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryannamespace wav { 327202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryannamespace { 337202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan 347202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryanstruct TF_PACKED RiffChunk { 357202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan char chunk_id[4]; 367202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan char chunk_data_size[4]; 377202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan char riff_type[4]; 387202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan}; 397202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryanstatic_assert(sizeof(RiffChunk) == 12, "TF_PACKED does not work."); 407202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan 417202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryanstruct TF_PACKED FormatChunk { 427202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan char chunk_id[4]; 437202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan char chunk_data_size[4]; 447202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan char compression_code[2]; 457202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan char channel_numbers[2]; 467202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan char sample_rate[4]; 477202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan char bytes_per_second[4]; 487202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan char bytes_per_frame[2]; 497202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan char bits_per_sample[2]; 507202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan}; 517202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryanstatic_assert(sizeof(FormatChunk) == 24, "TF_PACKED does not work."); 527202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan 537202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryanstruct TF_PACKED DataChunk { 547202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan char chunk_id[4]; 557202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan char chunk_data_size[4]; 567202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan}; 577202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryanstatic_assert(sizeof(DataChunk) == 8, "TF_PACKED does not work."); 587202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan 597202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryanstruct TF_PACKED WavHeader { 607202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan RiffChunk riff_chunk; 617202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan FormatChunk format_chunk; 627202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan DataChunk data_chunk; 637202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan}; 647202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryanstatic_assert(sizeof(WavHeader) == 657202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan sizeof(RiffChunk) + sizeof(FormatChunk) + sizeof(DataChunk), 667202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan "TF_PACKED does not work."); 677202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan 68ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Wardenconstexpr char kRiffChunkId[] = "RIFF"; 69ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Wardenconstexpr char kRiffType[] = "WAVE"; 70ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Wardenconstexpr char kFormatChunkId[] = "fmt "; 71ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Wardenconstexpr char kDataChunkId[] = "data"; 72ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden 737202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryaninline int16 FloatToInt16Sample(float data) { 747202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan constexpr float kMultiplier = 1.0f * (1 << 15); 757202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan return std::min<float>(std::max<float>(roundf(data * kMultiplier), kint16min), 767202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan kint16max); 777202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan} 787202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan 79ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Wardeninline float Int16SampleToFloat(int16 data) { 80ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden constexpr float kMultiplier = 1.0f / (1 << 15); 81ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden return data * kMultiplier; 82ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden} 83ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden 84ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete WardenStatus ExpectText(const string& data, const string& expected_text, 85ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden int* offset) { 86ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden const int new_offset = *offset + expected_text.size(); 87ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden if (new_offset > data.size()) { 88ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden return errors::InvalidArgument("Data too short when trying to read ", 89ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden expected_text); 90ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden } 91ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden const string found_text(data.begin() + *offset, data.begin() + new_offset); 92ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden if (found_text != expected_text) { 93ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden return errors::InvalidArgument("Header mismatch: Expected ", expected_text, 94ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden " but found ", found_text); 95ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden } 96ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden *offset = new_offset; 97ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden return Status::OK(); 98ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden} 99ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden 100ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Wardentemplate <class T> 101ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete WardenStatus ReadValue(const string& data, T* value, int* offset) { 102ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden const int new_offset = *offset + sizeof(T); 103ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden if (new_offset > data.size()) { 104ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden return errors::InvalidArgument("Data too short when trying to read value"); 105ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden } 106ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden if (port::kLittleEndian) { 107ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden memcpy(value, data.data() + *offset, sizeof(T)); 108ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden } else { 109ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden *value = 0; 110ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden const uint8* data_buf = 111ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden reinterpret_cast<const uint8*>(data.data() + *offset); 112ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden int shift = 0; 113ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden for (int i = 0; i < sizeof(T); ++i, shift += 8) { 114de01be952d6dbd63f855796fa165e4de844cb43anamrata-ibm *value = *value | (data_buf[i] << shift); 115ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden } 116ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden } 117ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden *offset = new_offset; 118ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden return Status::OK(); 119ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden} 120ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden 1210c6fd1703eb8f990c8b071471b0105339ccf821dPete WardenStatus ReadString(const string& data, int expected_length, string* value, 1220c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden int* offset) { 1230c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden const int new_offset = *offset + expected_length; 1240c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden if (new_offset > data.size()) { 1250c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden return errors::InvalidArgument("Data too short when trying to read string"); 1260c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden } 1270c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden *value = string(data.begin() + *offset, data.begin() + new_offset); 1280c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden *offset = new_offset; 1290c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden return Status::OK(); 1300c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden} 1310c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden 1327202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan} // namespace 1337202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan 1347202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ RyanStatus EncodeAudioAsS16LEWav(const float* audio, size_t sample_rate, 1357202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan size_t num_channels, size_t num_frames, 1367202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan string* wav_string) { 1377202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan constexpr size_t kFormatChunkSize = 16; 1387202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan constexpr size_t kCompressionCodePcm = 1; 1397202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan constexpr size_t kBitsPerSample = 16; 1407202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan constexpr size_t kBytesPerSample = kBitsPerSample / 8; 1417202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan constexpr size_t kHeaderSize = sizeof(WavHeader); 1427202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan 1437202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan if (audio == nullptr) { 1447202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan return errors::InvalidArgument("audio is null"); 1457202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan } 1467202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan if (wav_string == nullptr) { 1477202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan return errors::InvalidArgument("wav_string is null"); 1487202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan } 1497202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan if (sample_rate == 0 || sample_rate > kuint32max) { 1507202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan return errors::InvalidArgument("sample_rate must be in (0, 2^32), got: ", 1517202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan sample_rate); 1527202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan } 1537202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan if (num_channels == 0 || num_channels > kuint16max) { 1547202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan return errors::InvalidArgument("num_channels must be in (0, 2^16), got: ", 1557202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan num_channels); 1567202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan } 1577202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan if (num_frames == 0) { 1587202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan return errors::InvalidArgument("num_frames must be positive."); 1597202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan } 1607202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan 161a0ffaf3caa0234653035a692858606c7bdacd63bFrank Chen const size_t bytes_per_second = sample_rate * kBytesPerSample * num_channels; 1627202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan const size_t num_samples = num_frames * num_channels; 1637202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan const size_t data_size = num_samples * kBytesPerSample; 1647202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan const size_t file_size = kHeaderSize + num_samples * kBytesPerSample; 1657202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan const size_t bytes_per_frame = kBytesPerSample * num_channels; 1667202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan 1677202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan // WAV represents the length of the file as a uint32 so file_size cannot 1687202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan // exceed kuint32max. 1697202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan if (file_size > kuint32max) { 1707202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan return errors::InvalidArgument( 1717202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan "Provided channels and frames cannot be encoded as a WAV."); 1727202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan } 1737202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan 1747202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan wav_string->resize(file_size); 1757202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan char* data = &wav_string->at(0); 1767202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan WavHeader* header = bit_cast<WavHeader*>(data); 1777202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan 1787202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan // Fill RIFF chunk. 1797202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan auto* riff_chunk = &header->riff_chunk; 1807202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan memcpy(riff_chunk->chunk_id, kRiffChunkId, 4); 1817202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan core::EncodeFixed32(riff_chunk->chunk_data_size, file_size - 8); 1827202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan memcpy(riff_chunk->riff_type, kRiffType, 4); 1837202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan 1847202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan // Fill format chunk. 1857202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan auto* format_chunk = &header->format_chunk; 1867202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan memcpy(format_chunk->chunk_id, kFormatChunkId, 4); 1877202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan core::EncodeFixed32(format_chunk->chunk_data_size, kFormatChunkSize); 1887202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan core::EncodeFixed16(format_chunk->compression_code, kCompressionCodePcm); 1897202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan core::EncodeFixed16(format_chunk->channel_numbers, num_channels); 1907202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan core::EncodeFixed32(format_chunk->sample_rate, sample_rate); 1917202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan core::EncodeFixed32(format_chunk->bytes_per_second, bytes_per_second); 1927202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan core::EncodeFixed16(format_chunk->bytes_per_frame, bytes_per_frame); 1937202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan core::EncodeFixed16(format_chunk->bits_per_sample, kBitsPerSample); 1947202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan 1957202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan // Fill data chunk. 1967202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan auto* data_chunk = &header->data_chunk; 1977202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan memcpy(data_chunk->chunk_id, kDataChunkId, 4); 1987202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan core::EncodeFixed32(data_chunk->chunk_data_size, data_size); 1997202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan 2007202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan // Write the audio. 2017202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan data += kHeaderSize; 2027202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan for (size_t i = 0; i < num_samples; ++i) { 2037202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan int16 sample = FloatToInt16Sample(audio[i]); 2047202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan core::EncodeFixed16(&data[i * kBytesPerSample], 2057202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan static_cast<uint16>(sample)); 2067202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan } 2077202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan return Status::OK(); 2087202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan} 2097202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan 210ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete WardenStatus DecodeLin16WaveAsFloatVector(const string& wav_string, 211ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden std::vector<float>* float_values, 212ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden uint32* sample_count, uint16* channel_count, 213ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden uint32* sample_rate) { 214ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden int offset = 0; 215ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden TF_RETURN_IF_ERROR(ExpectText(wav_string, kRiffChunkId, &offset)); 216ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden uint32 total_file_size; 217ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden TF_RETURN_IF_ERROR(ReadValue<uint32>(wav_string, &total_file_size, &offset)); 218ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden TF_RETURN_IF_ERROR(ExpectText(wav_string, kRiffType, &offset)); 219ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden TF_RETURN_IF_ERROR(ExpectText(wav_string, kFormatChunkId, &offset)); 220ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden uint32 format_chunk_size; 221ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden TF_RETURN_IF_ERROR( 222ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden ReadValue<uint32>(wav_string, &format_chunk_size, &offset)); 223ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden if ((format_chunk_size != 16) && (format_chunk_size != 18)) { 224ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden return errors::InvalidArgument( 225ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden "Bad file size for WAV: Expected 16 or 18, but got", format_chunk_size); 226ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden } 227ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden uint16 audio_format; 228ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden TF_RETURN_IF_ERROR(ReadValue<uint16>(wav_string, &audio_format, &offset)); 229ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden if (audio_format != 1) { 230ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden return errors::InvalidArgument( 231ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden "Bad audio format for WAV: Expected 1 (PCM), but got", audio_format); 232ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden } 233ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden TF_RETURN_IF_ERROR(ReadValue<uint16>(wav_string, channel_count, &offset)); 234ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden TF_RETURN_IF_ERROR(ReadValue<uint32>(wav_string, sample_rate, &offset)); 235ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden uint32 bytes_per_second; 236ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden TF_RETURN_IF_ERROR(ReadValue<uint32>(wav_string, &bytes_per_second, &offset)); 237ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden uint16 bytes_per_sample; 238ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden TF_RETURN_IF_ERROR(ReadValue<uint16>(wav_string, &bytes_per_sample, &offset)); 239ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden // Confusingly, bits per sample is defined as holding the number of bits for 240ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden // one channel, unlike the definition of sample used elsewhere in the WAV 241ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden // spec. For example, bytes per sample is the memory needed for all channels 242ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden // for one point in time. 243ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden uint16 bits_per_sample; 244ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden TF_RETURN_IF_ERROR(ReadValue<uint16>(wav_string, &bits_per_sample, &offset)); 245ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden if (bits_per_sample != 16) { 246ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden return errors::InvalidArgument( 247ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden "Can only read 16-bit WAV files, but received ", bits_per_sample); 248ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden } 249ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden const uint32 expected_bytes_per_sample = 250ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden ((bits_per_sample * *channel_count) + 7) / 8; 251ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden if (bytes_per_sample != expected_bytes_per_sample) { 252ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden return errors::InvalidArgument( 253ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden "Bad bytes per sample in WAV header: Expected ", 254ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden expected_bytes_per_sample, " but got ", bytes_per_sample); 255ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden } 256a0ffaf3caa0234653035a692858606c7bdacd63bFrank Chen const uint32 expected_bytes_per_second = bytes_per_sample * *sample_rate; 257ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden if (bytes_per_second != expected_bytes_per_second) { 258ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden return errors::InvalidArgument( 259ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden "Bad bytes per second in WAV header: Expected ", 260ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden expected_bytes_per_second, " but got ", bytes_per_second, 261ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden " (sample_rate=", *sample_rate, ", bytes_per_sample=", bytes_per_sample, 262ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden ")"); 263ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden } 264ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden if (format_chunk_size == 18) { 265ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden // Skip over this unused section. 266ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden offset += 2; 267ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden } 2680c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden 2690c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden bool was_data_found = false; 2700c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden while (offset < wav_string.size()) { 2710c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden string chunk_id; 2720c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden TF_RETURN_IF_ERROR(ReadString(wav_string, 4, &chunk_id, &offset)); 2730c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden uint32 chunk_size; 2740c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden TF_RETURN_IF_ERROR(ReadValue<uint32>(wav_string, &chunk_size, &offset)); 2750c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden if (chunk_id == kDataChunkId) { 2760c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden if (was_data_found) { 2770c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden return errors::InvalidArgument("More than one data chunk found in WAV"); 2780c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden } 2790c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden was_data_found = true; 2800c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden *sample_count = chunk_size / bytes_per_sample; 2810c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden const uint32 data_count = *sample_count * *channel_count; 2820c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden float_values->resize(data_count); 2830c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden for (int i = 0; i < data_count; ++i) { 2840c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden int16 single_channel_value = 0; 2850c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden TF_RETURN_IF_ERROR( 2860c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden ReadValue<int16>(wav_string, &single_channel_value, &offset)); 2870c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden (*float_values)[i] = Int16SampleToFloat(single_channel_value); 2880c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden } 2890c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden } else { 2900c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden offset += chunk_size; 2910c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden } 2920c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden } 2930c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden if (!was_data_found) { 2940c6fd1703eb8f990c8b071471b0105339ccf821dPete Warden return errors::InvalidArgument("No data chunk found in WAV"); 295ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden } 296ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden return Status::OK(); 297ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden} 298ce0a07f2479c864b0a6cc8f4a218d74e8b480746Pete Warden 2997202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan} // namespace wav 3007202b5d0276f024f9c43daa9ffe81ed2f77b50cdRJ Ryan} // namespace tensorflow 301