1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <CoreVideo/CoreVideo.h>
6#include <OpenGL/CGLIOSurface.h>
7#include <OpenGL/gl.h>
8
9#include "base/bind.h"
10#include "base/command_line.h"
11#include "base/sys_byteorder.h"
12#include "base/thread_task_runner_handle.h"
13#include "content/common/gpu/media/vt_video_decode_accelerator.h"
14#include "content/public/common/content_switches.h"
15#include "media/filters/h264_parser.h"
16#include "ui/gl/scoped_binders.h"
17#include "ui/gl/scoped_cgl.h"
18
19using content_common_gpu_media::kModuleVt;
20using content_common_gpu_media::InitializeStubs;
21using content_common_gpu_media::IsVtInitialized;
22using content_common_gpu_media::StubPathMap;
23
24namespace content {
25
26// Size of NALU length headers in AVCC/MPEG-4 format (can be 1, 2, or 4).
27static const int kNALUHeaderLength = 4;
28
29// We only request 5 picture buffers from the client which are used to hold the
30// decoded samples. These buffers are then reused when the client tells us that
31// it is done with the buffer.
32static const int kNumPictureBuffers = 5;
33
34// Route decoded frame callbacks back into the VTVideoDecodeAccelerator.
35static void OutputThunk(
36    void* decompression_output_refcon,
37    void* source_frame_refcon,
38    OSStatus status,
39    VTDecodeInfoFlags info_flags,
40    CVImageBufferRef image_buffer,
41    CMTime presentation_time_stamp,
42    CMTime presentation_duration) {
43  VTVideoDecodeAccelerator* vda =
44      reinterpret_cast<VTVideoDecodeAccelerator*>(decompression_output_refcon);
45  int32_t bitstream_id = reinterpret_cast<intptr_t>(source_frame_refcon);
46  vda->Output(bitstream_id, status, image_buffer);
47}
48
49VTVideoDecodeAccelerator::DecodedFrame::DecodedFrame(
50    int32_t bitstream_id,
51    CVImageBufferRef image_buffer)
52    : bitstream_id(bitstream_id),
53      image_buffer(image_buffer) {
54}
55
56VTVideoDecodeAccelerator::DecodedFrame::~DecodedFrame() {
57}
58
59VTVideoDecodeAccelerator::PendingAction::PendingAction(
60    Action action,
61    int32_t bitstream_id)
62    : action(action),
63      bitstream_id(bitstream_id) {
64}
65
66VTVideoDecodeAccelerator::PendingAction::~PendingAction() {
67}
68
69VTVideoDecodeAccelerator::VTVideoDecodeAccelerator(CGLContextObj cgl_context)
70    : cgl_context_(cgl_context),
71      client_(NULL),
72      format_(NULL),
73      session_(NULL),
74      gpu_task_runner_(base::ThreadTaskRunnerHandle::Get()),
75      weak_this_factory_(this),
76      decoder_thread_("VTDecoderThread") {
77  callback_.decompressionOutputCallback = OutputThunk;
78  callback_.decompressionOutputRefCon = this;
79}
80
81VTVideoDecodeAccelerator::~VTVideoDecodeAccelerator() {
82}
83
84bool VTVideoDecodeAccelerator::Initialize(
85    media::VideoCodecProfile profile,
86    Client* client) {
87  DCHECK(CalledOnValidThread());
88  client_ = client;
89
90  // Only H.264 is supported.
91  if (profile < media::H264PROFILE_MIN || profile > media::H264PROFILE_MAX)
92    return false;
93
94  // Require --no-sandbox until VideoToolbox library loading is part of sandbox
95  // startup (and this VDA is ready for regular users).
96  if (!base::CommandLine::ForCurrentProcess()->HasSwitch(switches::kNoSandbox))
97    return false;
98
99  if (!IsVtInitialized()) {
100    // CoreVideo is also required, but the loader stops after the first
101    // path is loaded. Instead we rely on the transitive dependency from
102    // VideoToolbox to CoreVideo.
103    // TODO(sandersd): Fallback to PrivateFrameworks for VideoToolbox.
104    StubPathMap paths;
105    paths[kModuleVt].push_back(FILE_PATH_LITERAL(
106        "/System/Library/Frameworks/VideoToolbox.framework/VideoToolbox"));
107    if (!InitializeStubs(paths))
108      return false;
109  }
110
111  // Spawn a thread to handle parsing and calling VideoToolbox.
112  if (!decoder_thread_.Start())
113    return false;
114
115  return true;
116}
117
118// TODO(sandersd): Proper error reporting instead of CHECKs.
119void VTVideoDecodeAccelerator::ConfigureDecoder(
120    const std::vector<const uint8_t*>& nalu_data_ptrs,
121    const std::vector<size_t>& nalu_data_sizes) {
122  DCHECK(decoder_thread_.message_loop_proxy()->BelongsToCurrentThread());
123  // Construct a new format description from the parameter sets.
124  // TODO(sandersd): Replace this with custom code to support OS X < 10.9.
125  format_.reset();
126  CHECK(!CMVideoFormatDescriptionCreateFromH264ParameterSets(
127      kCFAllocatorDefault,
128      nalu_data_ptrs.size(),      // parameter_set_count
129      &nalu_data_ptrs.front(),    // &parameter_set_pointers
130      &nalu_data_sizes.front(),   // &parameter_set_sizes
131      kNALUHeaderLength,          // nal_unit_header_length
132      format_.InitializeInto()));
133  CMVideoDimensions coded_dimensions =
134      CMVideoFormatDescriptionGetDimensions(format_);
135
136  // Prepare VideoToolbox configuration dictionaries.
137  base::ScopedCFTypeRef<CFMutableDictionaryRef> decoder_config(
138      CFDictionaryCreateMutable(
139          kCFAllocatorDefault,
140          1,  // capacity
141          &kCFTypeDictionaryKeyCallBacks,
142          &kCFTypeDictionaryValueCallBacks));
143
144  CFDictionarySetValue(
145      decoder_config,
146      // kVTVideoDecoderSpecification_EnableHardwareAcceleratedVideoDecoder
147      CFSTR("EnableHardwareAcceleratedVideoDecoder"),
148      kCFBooleanTrue);
149
150  base::ScopedCFTypeRef<CFMutableDictionaryRef> image_config(
151      CFDictionaryCreateMutable(
152          kCFAllocatorDefault,
153          4,  // capacity
154          &kCFTypeDictionaryKeyCallBacks,
155          &kCFTypeDictionaryValueCallBacks));
156
157#define CFINT(i) CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &i)
158  // TODO(sandersd): RGBA option for 4:4:4 video.
159  int32_t pixel_format = kCVPixelFormatType_422YpCbCr8;
160  base::ScopedCFTypeRef<CFNumberRef> cf_pixel_format(CFINT(pixel_format));
161  base::ScopedCFTypeRef<CFNumberRef> cf_width(CFINT(coded_dimensions.width));
162  base::ScopedCFTypeRef<CFNumberRef> cf_height(CFINT(coded_dimensions.height));
163#undef CFINT
164  CFDictionarySetValue(
165      image_config, kCVPixelBufferPixelFormatTypeKey, cf_pixel_format);
166  CFDictionarySetValue(image_config, kCVPixelBufferWidthKey, cf_width);
167  CFDictionarySetValue(image_config, kCVPixelBufferHeightKey, cf_height);
168  CFDictionarySetValue(
169      image_config, kCVPixelBufferOpenGLCompatibilityKey, kCFBooleanTrue);
170
171  // TODO(sandersd): Check if the session is already compatible.
172  session_.reset();
173  CHECK(!VTDecompressionSessionCreate(
174      kCFAllocatorDefault,
175      format_,              // video_format_description
176      decoder_config,       // video_decoder_specification
177      image_config,         // destination_image_buffer_attributes
178      &callback_,           // output_callback
179      session_.InitializeInto()));
180
181  // If the size has changed, trigger a request for new picture buffers.
182  // TODO(sandersd): Move to SendPictures(), and use this just as a hint for an
183  // upcoming size change.
184  gfx::Size new_coded_size(coded_dimensions.width, coded_dimensions.height);
185  if (coded_size_ != new_coded_size) {
186    coded_size_ = new_coded_size;
187    gpu_task_runner_->PostTask(FROM_HERE, base::Bind(
188        &VTVideoDecodeAccelerator::SizeChangedTask,
189        weak_this_factory_.GetWeakPtr(),
190        coded_size_));;
191  }
192}
193
194void VTVideoDecodeAccelerator::Decode(const media::BitstreamBuffer& bitstream) {
195  DCHECK(CalledOnValidThread());
196  CHECK_GE(bitstream.id(), 0) << "Negative bitstream_id";
197  pending_bitstream_ids_.push(bitstream.id());
198  decoder_thread_.message_loop_proxy()->PostTask(FROM_HERE, base::Bind(
199      &VTVideoDecodeAccelerator::DecodeTask, base::Unretained(this),
200      bitstream));
201}
202
203// TODO(sandersd): Proper error reporting instead of CHECKs.
204void VTVideoDecodeAccelerator::DecodeTask(
205    const media::BitstreamBuffer bitstream) {
206  DCHECK(decoder_thread_.message_loop_proxy()->BelongsToCurrentThread());
207
208  // Map the bitstream buffer.
209  base::SharedMemory memory(bitstream.handle(), true);
210  size_t size = bitstream.size();
211  CHECK(memory.Map(size));
212  const uint8_t* buf = static_cast<uint8_t*>(memory.memory());
213
214  // NALUs are stored with Annex B format in the bitstream buffer (start codes),
215  // but VideoToolbox expects AVCC/MPEG-4 format (length headers), so we must
216  // rewrite the data.
217  //
218  // 1. Locate relevant NALUs and compute the size of the translated data.
219  //    Also record any parameter sets for VideoToolbox initialization.
220  size_t data_size = 0;
221  std::vector<media::H264NALU> nalus;
222  std::vector<const uint8_t*> config_nalu_data_ptrs;
223  std::vector<size_t> config_nalu_data_sizes;
224  parser_.SetStream(buf, size);
225  media::H264NALU nalu;
226  while (true) {
227    media::H264Parser::Result result = parser_.AdvanceToNextNALU(&nalu);
228    if (result == media::H264Parser::kEOStream)
229      break;
230    CHECK_EQ(result, media::H264Parser::kOk);
231    // TODO(sandersd): Check that these are only at the start.
232    if (nalu.nal_unit_type == media::H264NALU::kSPS ||
233        nalu.nal_unit_type == media::H264NALU::kPPS ||
234        nalu.nal_unit_type == media::H264NALU::kSPSExt) {
235      DVLOG(2) << "Parameter set " << nalu.nal_unit_type;
236      config_nalu_data_ptrs.push_back(nalu.data);
237      config_nalu_data_sizes.push_back(nalu.size);
238    } else {
239      nalus.push_back(nalu);
240      data_size += kNALUHeaderLength + nalu.size;
241    }
242  }
243
244  // 2. Initialize VideoToolbox.
245  // TODO(sandersd): Reinitialize when there are new parameter sets.
246  if (!session_)
247    ConfigureDecoder(config_nalu_data_ptrs, config_nalu_data_sizes);
248
249  // If there are no non-configuration units, immediately return an empty
250  // (ie. dropped) frame. It is an error to create a MemoryBlock with zero
251  // size.
252  if (!data_size) {
253    gpu_task_runner_->PostTask(FROM_HERE, base::Bind(
254        &VTVideoDecodeAccelerator::OutputTask,
255        weak_this_factory_.GetWeakPtr(),
256        DecodedFrame(bitstream.id(), NULL)));
257    return;
258  }
259
260  // 3. Allocate a memory-backed CMBlockBuffer for the translated data.
261  base::ScopedCFTypeRef<CMBlockBufferRef> data;
262  CHECK(!CMBlockBufferCreateWithMemoryBlock(
263      kCFAllocatorDefault,
264      NULL,                 // &memory_block
265      data_size,            // block_length
266      kCFAllocatorDefault,  // block_allocator
267      NULL,                 // &custom_block_source
268      0,                    // offset_to_data
269      data_size,            // data_length
270      0,                    // flags
271      data.InitializeInto()));
272
273  // 4. Copy NALU data, inserting length headers.
274  size_t offset = 0;
275  for (size_t i = 0; i < nalus.size(); i++) {
276    media::H264NALU& nalu = nalus[i];
277    uint32_t header = base::HostToNet32(static_cast<uint32_t>(nalu.size));
278    CHECK(!CMBlockBufferReplaceDataBytes(
279        &header, data, offset, kNALUHeaderLength));
280    offset += kNALUHeaderLength;
281    CHECK(!CMBlockBufferReplaceDataBytes(nalu.data, data, offset, nalu.size));
282    offset += nalu.size;
283  }
284
285  // 5. Package the data for VideoToolbox and request decoding.
286  base::ScopedCFTypeRef<CMSampleBufferRef> frame;
287  CHECK(!CMSampleBufferCreate(
288      kCFAllocatorDefault,
289      data,                 // data_buffer
290      true,                 // data_ready
291      NULL,                 // make_data_ready_callback
292      NULL,                 // make_data_ready_refcon
293      format_,              // format_description
294      1,                    // num_samples
295      0,                    // num_sample_timing_entries
296      NULL,                 // &sample_timing_array
297      0,                    // num_sample_size_entries
298      NULL,                 // &sample_size_array
299      frame.InitializeInto()));
300
301  // Asynchronous Decompression allows for parallel submission of frames
302  // (without it, DecodeFrame() does not return until the frame has been
303  // decoded). We don't enable Temporal Processing so that frames are always
304  // returned in decode order; this makes it easier to avoid deadlock.
305  VTDecodeFrameFlags decode_flags =
306      kVTDecodeFrame_EnableAsynchronousDecompression;
307
308  intptr_t bitstream_id = bitstream.id();
309  CHECK(!VTDecompressionSessionDecodeFrame(
310      session_,
311      frame,                                  // sample_buffer
312      decode_flags,                           // decode_flags
313      reinterpret_cast<void*>(bitstream_id),  // source_frame_refcon
314      NULL));                                 // &info_flags_out
315}
316
317// This method may be called on any VideoToolbox thread.
318// TODO(sandersd): Proper error reporting instead of CHECKs.
319void VTVideoDecodeAccelerator::Output(
320    int32_t bitstream_id,
321    OSStatus status,
322    CVImageBufferRef image_buffer) {
323  CHECK(!status);
324  CHECK_EQ(CFGetTypeID(image_buffer), CVPixelBufferGetTypeID());
325  CFRetain(image_buffer);
326  gpu_task_runner_->PostTask(FROM_HERE, base::Bind(
327      &VTVideoDecodeAccelerator::OutputTask,
328      weak_this_factory_.GetWeakPtr(),
329      DecodedFrame(bitstream_id, image_buffer)));
330}
331
332void VTVideoDecodeAccelerator::OutputTask(DecodedFrame frame) {
333  DCHECK(CalledOnValidThread());
334  decoded_frames_.push(frame);
335  ProcessDecodedFrames();
336}
337
338void VTVideoDecodeAccelerator::SizeChangedTask(gfx::Size coded_size) {
339  DCHECK(CalledOnValidThread());
340  texture_size_ = coded_size;
341  // TODO(sandersd): Dismiss existing picture buffers.
342  client_->ProvidePictureBuffers(
343      kNumPictureBuffers, texture_size_, GL_TEXTURE_RECTANGLE_ARB);
344}
345
346void VTVideoDecodeAccelerator::AssignPictureBuffers(
347    const std::vector<media::PictureBuffer>& pictures) {
348  DCHECK(CalledOnValidThread());
349
350  for (size_t i = 0; i < pictures.size(); i++) {
351    CHECK(!texture_ids_.count(pictures[i].id()));
352    available_picture_ids_.push(pictures[i].id());
353    texture_ids_[pictures[i].id()] = pictures[i].texture_id();
354  }
355
356  // Pictures are not marked as uncleared until after this method returns, and
357  // they will be broken if they are used before that happens. So, schedule
358  // future work after that happens.
359  gpu_task_runner_->PostTask(FROM_HERE, base::Bind(
360      &VTVideoDecodeAccelerator::ProcessDecodedFrames,
361      weak_this_factory_.GetWeakPtr()));
362}
363
364void VTVideoDecodeAccelerator::ReusePictureBuffer(int32_t picture_id) {
365  DCHECK(CalledOnValidThread());
366  DCHECK_EQ(CFGetRetainCount(picture_bindings_[picture_id]), 1);
367  picture_bindings_.erase(picture_id);
368  available_picture_ids_.push(picture_id);
369  ProcessDecodedFrames();
370}
371
372void VTVideoDecodeAccelerator::CompleteAction(Action action) {
373  DCHECK(CalledOnValidThread());
374  switch (action) {
375    case ACTION_FLUSH:
376      client_->NotifyFlushDone();
377      break;
378    case ACTION_RESET:
379      client_->NotifyResetDone();
380      break;
381    case ACTION_DESTROY:
382      delete this;
383      break;
384  }
385}
386
387void VTVideoDecodeAccelerator::CompleteActions(int32_t bitstream_id) {
388  DCHECK(CalledOnValidThread());
389  while (!pending_actions_.empty() &&
390         pending_actions_.front().bitstream_id == bitstream_id) {
391    CompleteAction(pending_actions_.front().action);
392    pending_actions_.pop();
393  }
394}
395
396void VTVideoDecodeAccelerator::ProcessDecodedFrames() {
397  DCHECK(CalledOnValidThread());
398
399  while (!decoded_frames_.empty()) {
400    if (pending_actions_.empty()) {
401      // No pending actions; send frames normally.
402      SendPictures(pending_bitstream_ids_.back());
403      return;
404    }
405
406    int32_t next_action_bitstream_id = pending_actions_.front().bitstream_id;
407    int32_t last_sent_bitstream_id = -1;
408    switch (pending_actions_.front().action) {
409      case ACTION_FLUSH:
410        // Send frames normally.
411        last_sent_bitstream_id = SendPictures(next_action_bitstream_id);
412        break;
413
414      case ACTION_RESET:
415        // Drop decoded frames.
416        while (!decoded_frames_.empty() &&
417               last_sent_bitstream_id != next_action_bitstream_id) {
418          last_sent_bitstream_id = decoded_frames_.front().bitstream_id;
419          decoded_frames_.pop();
420          DCHECK_EQ(pending_bitstream_ids_.front(), last_sent_bitstream_id);
421          pending_bitstream_ids_.pop();
422          client_->NotifyEndOfBitstreamBuffer(last_sent_bitstream_id);
423        }
424        break;
425
426      case ACTION_DESTROY:
427        // Drop decoded frames, without bookkeeping.
428        while (!decoded_frames_.empty()) {
429          last_sent_bitstream_id = decoded_frames_.front().bitstream_id;
430          decoded_frames_.pop();
431        }
432
433        // Handle completing the action specially, as it is important not to
434        // access |this| after calling CompleteAction().
435        if (last_sent_bitstream_id == next_action_bitstream_id)
436          CompleteAction(ACTION_DESTROY);
437
438        // Either |this| was deleted or no more progress can be made.
439        return;
440    }
441
442    // If we ran out of buffers (or pictures), no more progress can be made
443    // until more frames are decoded.
444    if (last_sent_bitstream_id != next_action_bitstream_id)
445      return;
446
447    // Complete all actions pending for this |bitstream_id|, then loop to see
448    // if progress can be made on the next action.
449    CompleteActions(next_action_bitstream_id);
450  }
451}
452
453int32_t VTVideoDecodeAccelerator::SendPictures(int32_t up_to_bitstream_id) {
454  DCHECK(CalledOnValidThread());
455  DCHECK(!decoded_frames_.empty());
456
457  if (available_picture_ids_.empty())
458    return -1;
459
460  gfx::ScopedCGLSetCurrentContext scoped_set_current_context(cgl_context_);
461  glEnable(GL_TEXTURE_RECTANGLE_ARB);
462
463  int32_t last_sent_bitstream_id = -1;
464  while (!available_picture_ids_.empty() &&
465         !decoded_frames_.empty() &&
466         last_sent_bitstream_id != up_to_bitstream_id) {
467    DecodedFrame frame = decoded_frames_.front();
468    decoded_frames_.pop();
469    DCHECK_EQ(pending_bitstream_ids_.front(), frame.bitstream_id);
470    pending_bitstream_ids_.pop();
471    int32_t picture_id = available_picture_ids_.front();
472    available_picture_ids_.pop();
473
474    CVImageBufferRef image_buffer = frame.image_buffer.get();
475    if (image_buffer) {
476      IOSurfaceRef surface = CVPixelBufferGetIOSurface(image_buffer);
477
478      // TODO(sandersd): Find out why this sometimes fails due to no GL context.
479      gfx::ScopedTextureBinder
480          texture_binder(GL_TEXTURE_RECTANGLE_ARB, texture_ids_[picture_id]);
481      CHECK(!CGLTexImageIOSurface2D(
482          cgl_context_,                 // ctx
483          GL_TEXTURE_RECTANGLE_ARB,     // target
484          GL_RGB,                       // internal_format
485          texture_size_.width(),        // width
486          texture_size_.height(),       // height
487          GL_YCBCR_422_APPLE,           // format
488          GL_UNSIGNED_SHORT_8_8_APPLE,  // type
489          surface,                      // io_surface
490          0));                          // plane
491
492      picture_bindings_[picture_id] = frame.image_buffer;
493      client_->PictureReady(media::Picture(
494          picture_id, frame.bitstream_id, gfx::Rect(texture_size_)));
495    }
496
497    client_->NotifyEndOfBitstreamBuffer(frame.bitstream_id);
498    last_sent_bitstream_id = frame.bitstream_id;
499  }
500
501  glDisable(GL_TEXTURE_RECTANGLE_ARB);
502  return last_sent_bitstream_id;
503}
504
505void VTVideoDecodeAccelerator::FlushTask() {
506  DCHECK(decoder_thread_.message_loop_proxy()->BelongsToCurrentThread());
507  CHECK(!VTDecompressionSessionFinishDelayedFrames(session_));
508}
509
510void VTVideoDecodeAccelerator::QueueAction(Action action) {
511  DCHECK(CalledOnValidThread());
512  if (pending_bitstream_ids_.empty()) {
513    // If there are no pending frames, all actions complete immediately.
514    CompleteAction(action);
515  } else {
516    // Otherwise, queue the action.
517    pending_actions_.push(PendingAction(action, pending_bitstream_ids_.back()));
518
519    // Request a flush to make sure the action will eventually complete.
520    decoder_thread_.message_loop_proxy()->PostTask(FROM_HERE, base::Bind(
521        &VTVideoDecodeAccelerator::FlushTask, base::Unretained(this)));
522
523    // See if we can make progress now that there is a new pending action.
524    ProcessDecodedFrames();
525  }
526}
527
528void VTVideoDecodeAccelerator::Flush() {
529  DCHECK(CalledOnValidThread());
530  QueueAction(ACTION_FLUSH);
531}
532
533void VTVideoDecodeAccelerator::Reset() {
534  DCHECK(CalledOnValidThread());
535  QueueAction(ACTION_RESET);
536}
537
538void VTVideoDecodeAccelerator::Destroy() {
539  DCHECK(CalledOnValidThread());
540  // Drop any other pending actions.
541  while (!pending_actions_.empty())
542    pending_actions_.pop();
543  // Return all bitstream buffers.
544  while (!pending_bitstream_ids_.empty()) {
545    client_->NotifyEndOfBitstreamBuffer(pending_bitstream_ids_.front());
546    pending_bitstream_ids_.pop();
547  }
548  QueueAction(ACTION_DESTROY);
549}
550
551bool VTVideoDecodeAccelerator::CanDecodeOnIOThread() {
552  return false;
553}
554
555}  // namespace content
556