1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "ui/surface/accelerated_surface_transformer_win.h"
6
7#include <vector>
8
9#include "accelerated_surface_transformer_win_hlsl_compiled.h"
10#include "base/debug/trace_event.h"
11#include "base/memory/ref_counted.h"
12#include "base/metrics/histogram.h"
13#include "base/single_thread_task_runner.h"
14#include "base/synchronization/lock.h"
15#include "base/synchronization/waitable_event.h"
16#include "base/win/scoped_comptr.h"
17#include "ui/gfx/native_widget_types.h"
18#include "ui/gfx/rect.h"
19#include "ui/gfx/size.h"
20#include "ui/surface/d3d9_utils_win.h"
21#include "ui/surface/surface_export.h"
22
23using base::win::ScopedComPtr;
24using std::vector;
25using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertRGBtoY8UV44;
26using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertUV44toU2V2;
27using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsOneTexture;
28using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsFetch2Pixels;
29using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsFetch4Pixels;
30using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsOneTexture;
31using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsFetch4PixelsScale2;
32using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertRGBtoY;
33using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertRGBtoU;
34using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertRGBtoV;
35
36namespace d3d_utils = ui_surface_d3d9_utils;
37
38namespace {
39
40struct Vertex {
41  float x, y, z, w;
42  float u, v;
43};
44
45const static D3DVERTEXELEMENT9 g_vertexElements[] = {
46  { 0, 0, D3DDECLTYPE_FLOAT4, 0, D3DDECLUSAGE_POSITION, 0 },
47  { 0, 16, D3DDECLTYPE_FLOAT2, 0, D3DDECLUSAGE_TEXCOORD, 0 },
48  D3DDECL_END()
49};
50
51class ScopedRenderTargetRestorer {
52 public:
53  ScopedRenderTargetRestorer(IDirect3DDevice9* device,
54                             int render_target_id)
55    : device_(device),
56      target_id_(render_target_id) {
57    device_->GetRenderTarget(target_id_, original_render_target_.Receive());
58  }
59  ~ScopedRenderTargetRestorer() {
60    device_->SetRenderTarget(target_id_, original_render_target_);
61  }
62 private:
63  ScopedComPtr<IDirect3DDevice9> device_;
64  int target_id_;
65  ScopedComPtr<IDirect3DSurface9> original_render_target_;
66};
67
68// Calculate the number necessary to transform |src_subrect| into |dst_size|
69// by repeating downsampling of the image of |src_subrect| by a factor no more
70// than 2.
71int GetResampleCount(const gfx::Rect& src_subrect,
72                     const gfx::Size& dst_size) {
73  // At least one copy is required, since the back buffer itself is not
74  // lockable.
75  int min_resample_count = 1;
76  int width_count = 0;
77  int width = src_subrect.width();
78  while (width > dst_size.width()) {
79    ++width_count;
80    width >>= 1;
81  }
82  int height_count = 0;
83  int height = src_subrect.height();
84  while (height > dst_size.height()) {
85    ++height_count;
86    height >>= 1;
87  }
88  return std::max(std::max(width_count, height_count),
89                  min_resample_count);
90}
91
92// Returns half the size of |size| no smaller than |min_size|.
93gfx::Size GetHalfSizeNoLessThan(const gfx::Size& size,
94                                const gfx::Size& min_size) {
95  return gfx::Size(std::max(min_size.width(), size.width() / 2),
96                   std::max(min_size.height(), size.height() / 2));
97}
98
99}  // namespace
100
101AcceleratedSurfaceTransformer::AcceleratedSurfaceTransformer()
102    : device_supports_multiple_render_targets_(false),
103      vertex_shader_sources_(),
104      pixel_shader_sources_() {
105
106  // Associate passes with actual shader programs.
107  vertex_shader_sources_[ONE_TEXTURE] = kVsOneTexture;
108  pixel_shader_sources_[ONE_TEXTURE] = kPsOneTexture;
109
110  vertex_shader_sources_[RGB_TO_YV12_FAST__PASS_1_OF_2] = kVsFetch4Pixels;
111  pixel_shader_sources_[RGB_TO_YV12_FAST__PASS_1_OF_2] = kPsConvertRGBtoY8UV44;
112
113  vertex_shader_sources_[RGB_TO_YV12_FAST__PASS_2_OF_2] = kVsFetch2Pixels;
114  pixel_shader_sources_[RGB_TO_YV12_FAST__PASS_2_OF_2] = kPsConvertUV44toU2V2;
115
116  vertex_shader_sources_[RGB_TO_YV12_SLOW__PASS_1_OF_3] = kVsFetch4Pixels;
117  pixel_shader_sources_[RGB_TO_YV12_SLOW__PASS_1_OF_3] = kPsConvertRGBtoY;
118
119  vertex_shader_sources_[RGB_TO_YV12_SLOW__PASS_2_OF_3] = kVsFetch4PixelsScale2;
120  pixel_shader_sources_[RGB_TO_YV12_SLOW__PASS_2_OF_3] = kPsConvertRGBtoU;
121
122  vertex_shader_sources_[RGB_TO_YV12_SLOW__PASS_3_OF_3] = kVsFetch4PixelsScale2;
123  pixel_shader_sources_[RGB_TO_YV12_SLOW__PASS_3_OF_3] = kPsConvertRGBtoV;
124
125  COMPILE_ASSERT(NUM_SHADERS == 6, must_initialize_shader_sources);
126}
127
128bool AcceleratedSurfaceTransformer::Init(IDirect3DDevice9* device) {
129  bool result = DoInit(device);
130  if (!result) {
131    ReleaseAll();
132  }
133  return result;
134}
135
136bool AcceleratedSurfaceTransformer::DoInit(IDirect3DDevice9* device) {
137  device_ = device;
138
139  {
140    D3DCAPS9 caps;
141    HRESULT hr = device->GetDeviceCaps(&caps);
142    if (FAILED(hr))
143      return false;
144
145    device_supports_multiple_render_targets_ = (caps.NumSimultaneousRTs >= 2);
146
147    // Log statistics about which paths we take.
148    UMA_HISTOGRAM_BOOLEAN("GPU.AcceleratedSurfaceTransformerCanUseMRT",
149                          device_supports_multiple_render_targets());
150  }
151
152  // Force compilation of all shaders that could be used on this GPU.
153  if (!CompileShaderCombo(ONE_TEXTURE))
154    return false;
155
156  if (device_supports_multiple_render_targets()) {
157    if (!CompileShaderCombo(RGB_TO_YV12_FAST__PASS_1_OF_2) ||
158        !CompileShaderCombo(RGB_TO_YV12_FAST__PASS_2_OF_2)) {
159      return false;
160    }
161  } else {
162    if (!CompileShaderCombo(RGB_TO_YV12_SLOW__PASS_1_OF_3) ||
163        !CompileShaderCombo(RGB_TO_YV12_SLOW__PASS_2_OF_3) ||
164        !CompileShaderCombo(RGB_TO_YV12_SLOW__PASS_3_OF_3)) {
165      return false;
166    }
167  }
168  COMPILE_ASSERT(NUM_SHADERS == 6, must_compile_at_doinit);
169
170  ScopedComPtr<IDirect3DVertexDeclaration9> vertex_declaration;
171  HRESULT hr = device_->CreateVertexDeclaration(g_vertexElements,
172                                                vertex_declaration.Receive());
173  if (FAILED(hr))
174    return false;
175  hr = device_->SetVertexDeclaration(vertex_declaration);
176  if (FAILED(hr))
177    return false;
178
179  return true;
180}
181
182bool AcceleratedSurfaceTransformer::CompileShaderCombo(
183    ShaderCombo shader) {
184  if (!vertex_shaders_[shader]) {
185    HRESULT hr = device_->CreateVertexShader(
186        reinterpret_cast<const DWORD*>(vertex_shader_sources_[shader]),
187        vertex_shaders_[shader].Receive());
188
189    if (FAILED(hr))
190      return false;
191
192    for (int i = 0; i < NUM_SHADERS; ++i) {
193      if (vertex_shader_sources_[i] == vertex_shader_sources_[shader] &&
194          i != shader) {
195        vertex_shaders_[i] = vertex_shaders_[shader];
196      }
197    }
198  }
199
200  if (!pixel_shaders_[shader]) {
201    HRESULT hr = device_->CreatePixelShader(
202        reinterpret_cast<const DWORD*>(pixel_shader_sources_[shader]),
203        pixel_shaders_[shader].Receive());
204
205    if (FAILED(hr))
206      return false;
207
208    for (int i = 0; i < NUM_SHADERS; ++i) {
209      if (pixel_shader_sources_[i] == pixel_shader_sources_[shader] &&
210          i != shader) {
211        pixel_shaders_[i] = pixel_shaders_[shader];
212      }
213    }
214  }
215
216  return true;
217}
218
219void AcceleratedSurfaceTransformer::ReleaseAll() {
220  for (int i = 0; i < NUM_SHADERS; i++) {
221    vertex_shaders_[i] = NULL;
222    pixel_shaders_[i] = NULL;
223  }
224
225  user_scratch_texture_ = NULL;
226  uv_scratch_texture_ = NULL;
227  y_scratch_surface_ = NULL;
228  u_scratch_surface_ = NULL;
229  v_scratch_surface_ = NULL;
230  for (int i = 0; i < arraysize(scaler_scratch_surfaces_); i++)
231    scaler_scratch_surfaces_[i] = NULL;
232
233  device_ = NULL;
234}
235void AcceleratedSurfaceTransformer::DetachAll() {
236  for (int i = 0; i < NUM_SHADERS; i++) {
237    vertex_shaders_[i].Detach();
238    pixel_shaders_[i].Detach();
239  }
240
241  user_scratch_texture_.Detach();
242  uv_scratch_texture_.Detach();
243  y_scratch_surface_.Detach();
244  u_scratch_surface_.Detach();
245  v_scratch_surface_.Detach();
246  for (int i = 0; i < arraysize(scaler_scratch_surfaces_); i++)
247    scaler_scratch_surfaces_[i].Detach();
248
249  device_.Detach();
250}
251
252bool AcceleratedSurfaceTransformer::CopyInverted(
253    IDirect3DTexture9* src_texture,
254    IDirect3DSurface9* dst_surface,
255    const gfx::Size& dst_size) {
256  return CopyWithTextureScale(src_texture, dst_surface, dst_size, 1.0f, -1.0f);
257}
258
259bool AcceleratedSurfaceTransformer::Copy(
260    IDirect3DTexture9* src_texture,
261    IDirect3DSurface9* dst_surface,
262    const gfx::Size& dst_size) {
263  return CopyWithTextureScale(src_texture, dst_surface, dst_size, 1.0f, 1.0f);
264}
265
266bool AcceleratedSurfaceTransformer::CopyWithTextureScale(
267    IDirect3DTexture9* src_texture,
268    IDirect3DSurface9* dst_surface,
269    const gfx::Size& dst_size,
270    float texture_scale_x,
271    float texture_scale_y) {
272
273  if (!SetShaderCombo(ONE_TEXTURE))
274    return false;
275
276  // Set the kTextureScale vertex shader constant, which is assigned to
277  // register 1.
278  float texture_scale[4] = {texture_scale_x, texture_scale_y, 0, 0};
279  device()->SetVertexShaderConstantF(1, texture_scale, 1);
280
281  ScopedRenderTargetRestorer render_target_restorer(device(), 0);
282  device()->SetRenderTarget(0, dst_surface);
283  device()->SetTexture(0, src_texture);
284
285  D3DVIEWPORT9 viewport = {
286    0, 0,
287    dst_size.width(), dst_size.height(),
288    0, 1
289  };
290  device()->SetViewport(&viewport);
291
292  if (d3d_utils::GetSize(src_texture) == dst_size) {
293    device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
294    device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT);
295  } else {
296    device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR);
297    device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
298  }
299  device()->SetSamplerState(0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
300  device()->SetSamplerState(0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);
301
302  DrawScreenAlignedQuad(dst_size);
303
304  // Clear surface references.
305  device()->SetTexture(0, NULL);
306  return true;
307}
308
309void AcceleratedSurfaceTransformer::DrawScreenAlignedQuad(
310    const gfx::Size& size) {
311  const float target_size[4] = { size.width(), size.height(), 0, 0};
312
313  // Set the uniform shader constant |kRenderTargetSize|, which is bound
314  // to register c0.
315  device()->SetVertexShaderConstantF(0, target_size, 1);
316
317  // We always send down the same vertices. The vertex program will take
318  // care of doing resolution-dependent position adjustment.
319  Vertex vertices[] = {
320    { -1, +1, 0.5f, 1, 0, 0 },
321    { +1, +1, 0.5f, 1, 1, 0 },
322    { +1, -1, 0.5f, 1, 1, 1 },
323    { -1, -1, 0.5f, 1, 0, 1 }
324  };
325
326  device()->BeginScene();
327  device()->DrawPrimitiveUP(D3DPT_TRIANGLEFAN,
328                            2,
329                            vertices,
330                            sizeof(vertices[0]));
331  device()->EndScene();
332
333}
334
335bool AcceleratedSurfaceTransformer::GetIntermediateTexture(
336    const gfx::Size& size,
337    IDirect3DTexture9** texture,
338    IDirect3DSurface9** texture_level_zero) {
339  if (!d3d_utils::CreateOrReuseRenderTargetTexture(device(),
340                                                   size,
341                                                   &user_scratch_texture_,
342                                                   texture_level_zero))
343    return false;
344
345  *texture = ScopedComPtr<IDirect3DTexture9>(user_scratch_texture_).Detach();
346  return true;
347}
348
349// Resize an RGB surface using repeated linear interpolation.
350bool AcceleratedSurfaceTransformer::ResizeBilinear(
351    IDirect3DSurface9* src_surface,
352    const gfx::Rect& src_subrect,
353    IDirect3DSurface9* dst_surface,
354    const gfx::Rect& dst_rect) {
355  COMPILE_ASSERT(arraysize(scaler_scratch_surfaces_) == 2, surface_count);
356
357  gfx::Size src_size = src_subrect.size();
358  gfx::Size dst_size = dst_rect.size();
359
360  if (src_size.IsEmpty() || dst_size.IsEmpty())
361    return false;
362
363  HRESULT hr = S_OK;
364  // Set up intermediate buffers needed for downsampling.
365  const int resample_count = GetResampleCount(src_subrect, dst_size);
366  const gfx::Size half_size =
367      GetHalfSizeNoLessThan(src_subrect.size(), dst_size);
368  if (resample_count > 1) {
369    if (!d3d_utils::CreateOrReuseLockableSurface(device(),
370                                                 half_size,
371                                                 &scaler_scratch_surfaces_[0]))
372      return false;
373  }
374  if (resample_count > 2) {
375    const gfx::Size quarter_size = GetHalfSizeNoLessThan(half_size, dst_size);
376    if (!d3d_utils::CreateOrReuseLockableSurface(device(),
377                                                 quarter_size,
378                                                 &scaler_scratch_surfaces_[1]))
379      return false;
380  }
381
382  // Repeat downsampling the surface until its size becomes identical to
383  // |dst_size|. We keep the factor of each downsampling no more than two
384  // because using a factor more than two can introduce aliasing.
385  RECT read_rect = src_subrect.ToRECT();
386  gfx::Size write_size = half_size;
387  int read_buffer_index = 1;
388  int write_buffer_index = 0;
389  for (int i = 0; i < resample_count; ++i) {
390    TRACE_EVENT0("gpu", "StretchRect");
391    IDirect3DSurface9* read_buffer =
392        (i == 0) ? src_surface : scaler_scratch_surfaces_[read_buffer_index];
393    IDirect3DSurface9* write_buffer;
394    RECT write_rect;
395    if (i == resample_count - 1) {
396      write_buffer = dst_surface;
397      write_rect = dst_rect.ToRECT();
398    } else {
399      write_buffer = scaler_scratch_surfaces_[write_buffer_index];
400      write_rect = gfx::Rect(write_size).ToRECT();
401    }
402
403    hr = device()->StretchRect(read_buffer,
404                               &read_rect,
405                               write_buffer,
406                               &write_rect,
407                               D3DTEXF_LINEAR);
408
409    if (FAILED(hr))
410      return false;
411    read_rect = write_rect;
412    write_size = GetHalfSizeNoLessThan(write_size, dst_size);
413    std::swap(read_buffer_index, write_buffer_index);
414  }
415
416  return true;
417}
418
419bool AcceleratedSurfaceTransformer::TransformRGBToYV12(
420    IDirect3DTexture9* src_surface,
421    const gfx::Size& dst_size,
422    IDirect3DSurface9** dst_y,
423    IDirect3DSurface9** dst_u,
424    IDirect3DSurface9** dst_v) {
425  gfx::Size packed_y_size;
426  gfx::Size packed_uv_size;
427  if (!AllocYUVBuffers(dst_size, &packed_y_size, &packed_uv_size,
428                       dst_y, dst_u, dst_v)) {
429    return false;
430  }
431
432  if (device_supports_multiple_render_targets()) {
433    return TransformRGBToYV12_MRT(src_surface,
434                                  dst_size,
435                                  packed_y_size,
436                                  packed_uv_size,
437                                  *dst_y,
438                                  *dst_u,
439                                  *dst_v);
440  } else {
441    return TransformRGBToYV12_WithoutMRT(src_surface,
442                                         dst_size,
443                                         packed_y_size,
444                                         packed_uv_size,
445                                         *dst_y,
446                                         *dst_u,
447                                         *dst_v);
448  }
449}
450
451bool AcceleratedSurfaceTransformer::ReadFast(IDirect3DSurface9* gpu_surface,
452                                             uint8* dst,
453                                             int dst_bytes_per_row,
454                                             int dst_num_rows,
455                                             int dst_stride) {
456  // TODO(nick): Compared to GetRenderTargetData, LockRect+memcpy is 50% faster
457  // on some systems, but 100x slower on others. We should have logic here to
458  // choose the best path, probably by adaptively trying both and picking the
459  // faster one. http://crbug.com/168532
460  return ReadByGetRenderTargetData(gpu_surface, dst, dst_bytes_per_row,
461                                   dst_num_rows, dst_stride);
462}
463
464bool AcceleratedSurfaceTransformer::ReadByLockAndCopy(
465    IDirect3DSurface9* gpu_surface,
466    uint8* dst,
467    int dst_bytes_per_row,
468    int dst_num_rows,
469    int dst_stride) {
470  D3DLOCKED_RECT locked_rect;
471  {
472    TRACE_EVENT0("gpu", "LockRect");
473    HRESULT hr = gpu_surface->LockRect(&locked_rect, NULL,
474                                       D3DLOCK_READONLY | D3DLOCK_NOSYSLOCK);
475    if (FAILED(hr)) {
476      LOG(ERROR) << "Failed to lock surface";
477      return false;
478    }
479  }
480
481  {
482    TRACE_EVENT0("gpu", "memcpy");
483    uint8* dst_row = dst;
484    uint8* src_row = reinterpret_cast<uint8*>(locked_rect.pBits);
485    for (int i = 0; i < dst_num_rows; i++) {
486      memcpy(dst_row, src_row, dst_bytes_per_row);
487      src_row += locked_rect.Pitch;
488      dst_row += dst_stride;
489    }
490  }
491  gpu_surface->UnlockRect();
492  return true;
493}
494
495bool AcceleratedSurfaceTransformer::ReadByGetRenderTargetData(
496    IDirect3DSurface9* gpu_surface,
497    uint8* dst,
498    int dst_bytes_per_row,
499    int dst_num_rows,
500    int dst_stride) {
501  HRESULT hr = 0;
502  ScopedComPtr<IDirect3DSurface9> system_surface;
503  gfx::Size src_size = d3d_utils::GetSize(gpu_surface);
504
505  // Depending on pitch and alignment, we might be able to wrap |dst| in an
506  // offscreen- plain surface for a direct copy.
507  const bool direct_copy = (dst_stride == dst_bytes_per_row &&
508                            src_size.width() * 4 == dst_bytes_per_row &&
509                            dst_num_rows >= src_size.height());
510
511  {
512    TRACE_EVENT0("gpu", "CreateOffscreenPlainSurface");
513    HANDLE handle = reinterpret_cast<HANDLE>(dst);
514    hr = device()->CreateOffscreenPlainSurface(src_size.width(),
515                                               src_size.height(),
516                                               D3DFMT_A8R8G8B8,
517                                               D3DPOOL_SYSTEMMEM,
518                                               system_surface.Receive(),
519                                               direct_copy ? &handle : NULL);
520    if (!SUCCEEDED(hr)) {
521      LOG(ERROR) << "Failed to create offscreen plain surface.";
522      return false;
523    }
524  }
525
526  {
527    TRACE_EVENT0("gpu", "GetRenderTargetData");
528    hr = device()->GetRenderTargetData(gpu_surface, system_surface);
529    if (FAILED(hr)) {
530      LOG(ERROR) << "Failed GetRenderTargetData";
531      return false;
532    }
533  }
534
535  if (direct_copy) {
536    // We're done: |system_surface| is a wrapper around |dst|.
537    return true;
538  } else {
539    // Extra memcpy required from |system_surface| to |dst|.
540    return ReadByLockAndCopy(system_surface, dst, dst_bytes_per_row,
541                             dst_num_rows, dst_stride);
542  }
543}
544
545bool AcceleratedSurfaceTransformer::AllocYUVBuffers(
546    const gfx::Size& dst_size,
547    gfx::Size* y_size,
548    gfx::Size* uv_size,
549    IDirect3DSurface9** dst_y,
550    IDirect3DSurface9** dst_u,
551    IDirect3DSurface9** dst_v) {
552
553  // Y is full height, packed into 4 components.
554  *y_size = gfx::Size((dst_size.width() + 3) / 4, dst_size.height());
555
556  // U and V are half the size (rounded up) of Y.
557  *uv_size = gfx::Size((y_size->width() + 1) / 2, (y_size->height() + 1) / 2);
558
559  if (!d3d_utils::CreateOrReuseLockableSurface(device(), *y_size,
560                                               &y_scratch_surface_)) {
561    return false;
562  }
563  if (!d3d_utils::CreateOrReuseLockableSurface(device(), *uv_size,
564                                               &u_scratch_surface_)) {
565    return false;
566  }
567  if (!d3d_utils::CreateOrReuseLockableSurface(device(), *uv_size,
568                                               &v_scratch_surface_)) {
569    return false;
570  }
571
572  *dst_y = ScopedComPtr<IDirect3DSurface9>(y_scratch_surface_).Detach();
573  *dst_u = ScopedComPtr<IDirect3DSurface9>(u_scratch_surface_).Detach();
574  *dst_v = ScopedComPtr<IDirect3DSurface9>(v_scratch_surface_).Detach();
575
576  return true;
577}
578
579bool AcceleratedSurfaceTransformer::TransformRGBToYV12_MRT(
580    IDirect3DTexture9* src_surface,
581    const gfx::Size& dst_size,
582    const gfx::Size& packed_y_size,
583    const gfx::Size& packed_uv_size,
584    IDirect3DSurface9* dst_y,
585    IDirect3DSurface9* dst_u,
586    IDirect3DSurface9* dst_v) {
587  TRACE_EVENT0("gpu", "RGBToYV12_MRT");
588
589  ScopedRenderTargetRestorer color0_restorer(device(), 0);
590  ScopedRenderTargetRestorer color1_restorer(device(), 1);
591
592  // Create an intermediate surface to hold the UUVV values. This is color
593  // target 1 for the first pass, and texture 0 for the second pass. Its
594  // values are not read afterwards.
595
596  ScopedComPtr<IDirect3DSurface9> uv_as_surface;
597  if (!d3d_utils::CreateOrReuseRenderTargetTexture(device(),
598                                                   packed_y_size,
599                                                   &uv_scratch_texture_,
600                                                   uv_as_surface.Receive())) {
601    return false;
602  }
603
604  // Clamping is required if (dst_size.width() % 8 != 0) or if
605  // (dst_size.height != 0), so we set it always. Both passes rely on this.
606  device()->SetSamplerState(0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
607  device()->SetSamplerState(0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);
608
609  /////////////////////////////////////////
610  // Pass 1: RGB --(scaled)--> YYYY + UUVV
611  SetShaderCombo(RGB_TO_YV12_FAST__PASS_1_OF_2);
612
613  // Enable bilinear filtering if scaling is required. The filtering will take
614  // place entirely in the first pass.
615  if (d3d_utils::GetSize(src_surface) != dst_size) {
616    device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR);
617    device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
618  } else {
619    device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
620    device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT);
621  }
622
623  device()->SetTexture(0, src_surface);
624  device()->SetRenderTarget(0, dst_y);
625  device()->SetRenderTarget(1, uv_as_surface);
626  DrawScreenAlignedQuad(dst_size);
627
628  /////////////////////////////////////////
629  // Pass 2: UUVV -> UUUU + VVVV
630  SetShaderCombo(RGB_TO_YV12_FAST__PASS_2_OF_2);
631
632  // The second pass uses bilinear minification to achieve vertical scaling,
633  // so enable it always.
634  device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
635  device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
636
637  device()->SetTexture(0, uv_scratch_texture_);
638  device()->SetRenderTarget(0, dst_u);
639  device()->SetRenderTarget(1, dst_v);
640  DrawScreenAlignedQuad(packed_y_size);
641
642  // Clear surface references.
643  device()->SetTexture(0, NULL);
644  return true;
645}
646
647bool AcceleratedSurfaceTransformer::TransformRGBToYV12_WithoutMRT(
648    IDirect3DTexture9* src_surface,
649    const gfx::Size& dst_size,
650    const gfx::Size& packed_y_size,
651    const gfx::Size& packed_uv_size,
652    IDirect3DSurface9* dst_y,
653    IDirect3DSurface9* dst_u,
654    IDirect3DSurface9* dst_v) {
655  TRACE_EVENT0("gpu", "RGBToYV12_WithoutMRT");
656
657  ScopedRenderTargetRestorer color0_restorer(device(), 0);
658
659  ScopedComPtr<IDirect3DTexture9> scaled_src_surface;
660
661  // If scaling is requested, do it to a temporary texture. The MRT path
662  // gets a scale for free, so we need to support it here too (even though
663  // it's an extra operation).
664  if (d3d_utils::GetSize(src_surface) == dst_size) {
665    scaled_src_surface = src_surface;
666  } else {
667    ScopedComPtr<IDirect3DSurface9> dst_level0;
668    if (!d3d_utils::CreateOrReuseRenderTargetTexture(
669            device(), dst_size, &uv_scratch_texture_, dst_level0.Receive())) {
670      return false;
671    }
672    if (!Copy(src_surface, dst_level0, dst_size)) {
673      return false;
674    }
675    scaled_src_surface = uv_scratch_texture_;
676  }
677
678  // Input texture is the same for all three passes.
679  device()->SetTexture(0, scaled_src_surface);
680
681  // Clamping is required if (dst_size.width() % 8 != 0) or if
682  // (dst_size.height != 0), so we set it always. All passes rely on this.
683  device()->SetSamplerState(0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
684  device()->SetSamplerState(0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);
685
686  /////////////////////
687  // Pass 1: RGB -> Y.
688  SetShaderCombo(RGB_TO_YV12_SLOW__PASS_1_OF_3);
689
690  // Pass 1 just needs point sampling.
691  device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
692  device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT);
693
694  device()->SetRenderTarget(0, dst_y);
695  DrawScreenAlignedQuad(dst_size);
696
697  // Passes 2 and 3 rely on bilinear minification to downsample U and V.
698  device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
699  device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
700
701  /////////////////////
702  // Pass 2: RGB -> U.
703  SetShaderCombo(RGB_TO_YV12_SLOW__PASS_2_OF_3);
704  device()->SetRenderTarget(0, dst_u);
705  DrawScreenAlignedQuad(dst_size);
706
707  /////////////////////
708  // Pass 3: RGB -> V.
709  SetShaderCombo(RGB_TO_YV12_SLOW__PASS_3_OF_3);
710  device()->SetRenderTarget(0, dst_v);
711  DrawScreenAlignedQuad(dst_size);
712
713  // Clear surface references.
714  device()->SetTexture(0, NULL);
715  return true;
716}
717
718IDirect3DDevice9* AcceleratedSurfaceTransformer::device() {
719  return device_;
720}
721
722bool AcceleratedSurfaceTransformer::SetShaderCombo(ShaderCombo combo) {
723  // Compile shaders on first use, if needed. Normally the compilation should
724  // already have happened at Init() time, but test code might force
725  // us down an unusual path.
726  if (!CompileShaderCombo(combo))
727    return false;
728
729  HRESULT hr = device()->SetVertexShader(vertex_shaders_[combo]);
730  if (!SUCCEEDED(hr))
731    return false;
732  hr = device()->SetPixelShader(pixel_shaders_[combo]);
733  if (!SUCCEEDED(hr))
734    return false;
735  return true;
736}
737