1/**************************************************************************
2 *
3 * Copyright 2007 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#include "main/bufferobj.h"
29#include "main/image.h"
30#include "main/pbo.h"
31#include "main/imports.h"
32#include "main/readpix.h"
33#include "main/enums.h"
34#include "main/framebuffer.h"
35#include "util/u_inlines.h"
36#include "util/u_format.h"
37#include "cso_cache/cso_context.h"
38
39#include "st_cb_fbo.h"
40#include "st_atom.h"
41#include "st_context.h"
42#include "st_cb_bitmap.h"
43#include "st_cb_readpixels.h"
44#include "st_debug.h"
45#include "state_tracker/st_cb_texture.h"
46#include "state_tracker/st_format.h"
47#include "state_tracker/st_pbo.h"
48#include "state_tracker/st_texture.h"
49
50/* The readpixels cache caches a blitted staging texture so that back-to-back
51 * calls to glReadPixels with user pointers require less CPU-GPU synchronization.
52 *
53 * Assumptions:
54 *
55 * (1) Blits have high synchronization overheads, and it is beneficial to
56 *     use a single blit of the entire framebuffer instead of many smaller
57 *     blits (because the smaller blits cannot be batched, and we have to wait
58 *     for the GPU after each one).
59 *
60 * (2) transfer_map implicitly involves a blit as well (for de-tiling, copy
61 *     from VRAM, etc.), so that it is beneficial to replace the
62 *     _mesa_readpixels path as well when possible.
63 *
64 * Change this #define to true to fill and use the cache whenever possible
65 * (this is inefficient and only meant for testing / debugging).
66 */
67#define ALWAYS_READPIXELS_CACHE false
68
69static boolean
70needs_integer_signed_unsigned_conversion(const struct gl_context *ctx,
71                                         GLenum format, GLenum type)
72{
73   struct gl_renderbuffer *rb =
74      _mesa_get_read_renderbuffer_for_format(ctx, format);
75
76   assert(rb);
77
78   GLenum srcType = _mesa_get_format_datatype(rb->Format);
79
80    if ((srcType == GL_INT &&
81        (type == GL_UNSIGNED_INT ||
82         type == GL_UNSIGNED_SHORT ||
83         type == GL_UNSIGNED_BYTE)) ||
84       (srcType == GL_UNSIGNED_INT &&
85        (type == GL_INT ||
86         type == GL_SHORT ||
87         type == GL_BYTE))) {
88      return TRUE;
89   }
90
91   return FALSE;
92}
93
94static bool
95try_pbo_readpixels(struct st_context *st, struct st_renderbuffer *strb,
96                   bool invert_y,
97                   GLint x, GLint y, GLsizei width, GLsizei height,
98                   enum pipe_format src_format, enum pipe_format dst_format,
99                   const struct gl_pixelstore_attrib *pack, void *pixels)
100{
101   struct pipe_context *pipe = st->pipe;
102   struct pipe_screen *screen = pipe->screen;
103   struct cso_context *cso = st->cso_context;
104   struct pipe_surface *surface = strb->surface;
105   struct pipe_resource *texture = strb->texture;
106   const struct util_format_description *desc;
107   struct st_pbo_addresses addr;
108   struct pipe_framebuffer_state fb;
109   enum pipe_texture_target view_target;
110   bool success = false;
111
112   if (texture->nr_samples > 1)
113      return false;
114
115   if (!screen->is_format_supported(screen, dst_format, PIPE_BUFFER, 0,
116                                    PIPE_BIND_SHADER_IMAGE))
117      return false;
118
119   desc = util_format_description(dst_format);
120
121   /* Compute PBO addresses */
122   addr.bytes_per_pixel = desc->block.bits / 8;
123   addr.xoffset = x;
124   addr.yoffset = y;
125   addr.width = width;
126   addr.height = height;
127   addr.depth = 1;
128   if (!st_pbo_addresses_pixelstore(st, GL_TEXTURE_2D, false, pack, pixels, &addr))
129      return false;
130
131   cso_save_state(cso, (CSO_BIT_FRAGMENT_SAMPLER_VIEWS |
132                        CSO_BIT_FRAGMENT_SAMPLERS |
133                        CSO_BIT_FRAGMENT_IMAGE0 |
134                        CSO_BIT_BLEND |
135                        CSO_BIT_VERTEX_ELEMENTS |
136                        CSO_BIT_AUX_VERTEX_BUFFER_SLOT |
137                        CSO_BIT_FRAMEBUFFER |
138                        CSO_BIT_VIEWPORT |
139                        CSO_BIT_RASTERIZER |
140                        CSO_BIT_DEPTH_STENCIL_ALPHA |
141                        CSO_BIT_STREAM_OUTPUTS |
142                        CSO_BIT_PAUSE_QUERIES |
143                        CSO_BIT_SAMPLE_MASK |
144                        CSO_BIT_MIN_SAMPLES |
145                        CSO_BIT_RENDER_CONDITION |
146                        CSO_BITS_ALL_SHADERS));
147   cso_save_constant_buffer_slot0(cso, PIPE_SHADER_FRAGMENT);
148
149   cso_set_sample_mask(cso, ~0);
150   cso_set_min_samples(cso, 1);
151   cso_set_render_condition(cso, NULL, FALSE, 0);
152
153   /* Set up the sampler_view */
154   {
155      struct pipe_sampler_view templ;
156      struct pipe_sampler_view *sampler_view;
157      struct pipe_sampler_state sampler = {0};
158      const struct pipe_sampler_state *samplers[1] = {&sampler};
159
160      u_sampler_view_default_template(&templ, texture, src_format);
161
162      switch (texture->target) {
163      case PIPE_TEXTURE_CUBE:
164      case PIPE_TEXTURE_CUBE_ARRAY:
165         view_target = PIPE_TEXTURE_2D_ARRAY;
166         break;
167      default:
168         view_target = texture->target;
169         break;
170      }
171
172      templ.target = view_target;
173      templ.u.tex.first_level = surface->u.tex.level;
174      templ.u.tex.last_level = templ.u.tex.first_level;
175
176      if (view_target != PIPE_TEXTURE_3D) {
177         templ.u.tex.first_layer = surface->u.tex.first_layer;
178         templ.u.tex.last_layer = templ.u.tex.last_layer;
179      } else {
180         addr.constants.layer_offset = surface->u.tex.first_layer;
181      }
182
183      sampler_view = pipe->create_sampler_view(pipe, texture, &templ);
184      if (sampler_view == NULL)
185         goto fail;
186
187      cso_set_sampler_views(cso, PIPE_SHADER_FRAGMENT, 1, &sampler_view);
188
189      pipe_sampler_view_reference(&sampler_view, NULL);
190
191      cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, 1, samplers);
192   }
193
194   /* Set up destination image */
195   {
196      struct pipe_image_view image;
197
198      memset(&image, 0, sizeof(image));
199      image.resource = addr.buffer;
200      image.format = dst_format;
201      image.access = PIPE_IMAGE_ACCESS_WRITE;
202      image.u.buf.offset = addr.first_element * addr.bytes_per_pixel;
203      image.u.buf.size = (addr.last_element - addr.first_element + 1) *
204                         addr.bytes_per_pixel;
205
206      cso_set_shader_images(cso, PIPE_SHADER_FRAGMENT, 0, 1, &image);
207   }
208
209   /* Set up no-attachment framebuffer */
210   memset(&fb, 0, sizeof(fb));
211   fb.width = surface->width;
212   fb.height = surface->height;
213   fb.samples = 1;
214   fb.layers = 1;
215   cso_set_framebuffer(cso, &fb);
216
217   /* Any blend state would do. Set this just to prevent drivers having
218    * blend == NULL.
219    */
220   cso_set_blend(cso, &st->pbo.upload_blend);
221
222   cso_set_viewport_dims(cso, fb.width, fb.height, invert_y);
223
224   if (invert_y)
225      st_pbo_addresses_invert_y(&addr, fb.height);
226
227   {
228      struct pipe_depth_stencil_alpha_state dsa;
229      memset(&dsa, 0, sizeof(dsa));
230      cso_set_depth_stencil_alpha(cso, &dsa);
231   }
232
233   /* Set up the fragment shader */
234   {
235      void *fs = st_pbo_get_download_fs(st, view_target, src_format, dst_format);
236      if (!fs)
237         goto fail;
238
239      cso_set_fragment_shader_handle(cso, fs);
240   }
241
242   success = st_pbo_draw(st, &addr, fb.width, fb.height);
243
244   /* Buffer written via shader images needs explicit synchronization. */
245   pipe->memory_barrier(pipe, PIPE_BARRIER_ALL);
246
247fail:
248   cso_restore_state(cso);
249   cso_restore_constant_buffer_slot0(cso, PIPE_SHADER_FRAGMENT);
250
251   return success;
252}
253
254/* Invalidate the readpixels cache to ensure we don't read stale data.
255 */
256void st_invalidate_readpix_cache(struct st_context *st)
257{
258   pipe_resource_reference(&st->readpix_cache.src, NULL);
259   pipe_resource_reference(&st->readpix_cache.cache, NULL);
260}
261
262/**
263 * Create a staging texture and blit the requested region to it.
264 */
265static struct pipe_resource *
266blit_to_staging(struct st_context *st, struct st_renderbuffer *strb,
267                   bool invert_y,
268                   GLint x, GLint y, GLsizei width, GLsizei height,
269                   GLenum format,
270                   enum pipe_format src_format, enum pipe_format dst_format)
271{
272   struct pipe_context *pipe = st->pipe;
273   struct pipe_screen *screen = pipe->screen;
274   struct pipe_resource dst_templ;
275   struct pipe_resource *dst;
276   struct pipe_blit_info blit;
277
278   /* We are creating a texture of the size of the region being read back.
279    * Need to check for NPOT texture support. */
280   if (!screen->get_param(screen, PIPE_CAP_NPOT_TEXTURES) &&
281       (!util_is_power_of_two(width) ||
282        !util_is_power_of_two(height)))
283      return NULL;
284
285   /* create the destination texture */
286   memset(&dst_templ, 0, sizeof(dst_templ));
287   dst_templ.target = PIPE_TEXTURE_2D;
288   dst_templ.format = dst_format;
289   if (util_format_is_depth_or_stencil(dst_format))
290      dst_templ.bind |= PIPE_BIND_DEPTH_STENCIL;
291   else
292      dst_templ.bind |= PIPE_BIND_RENDER_TARGET;
293   dst_templ.usage = PIPE_USAGE_STAGING;
294
295   st_gl_texture_dims_to_pipe_dims(GL_TEXTURE_2D, width, height, 1,
296                                   &dst_templ.width0, &dst_templ.height0,
297                                   &dst_templ.depth0, &dst_templ.array_size);
298
299   dst = screen->resource_create(screen, &dst_templ);
300   if (!dst)
301      return NULL;
302
303   memset(&blit, 0, sizeof(blit));
304   blit.src.resource = strb->texture;
305   blit.src.level = strb->surface->u.tex.level;
306   blit.src.format = src_format;
307   blit.dst.resource = dst;
308   blit.dst.level = 0;
309   blit.dst.format = dst->format;
310   blit.src.box.x = x;
311   blit.dst.box.x = 0;
312   blit.src.box.y = y;
313   blit.dst.box.y = 0;
314   blit.src.box.z = strb->surface->u.tex.first_layer;
315   blit.dst.box.z = 0;
316   blit.src.box.width = blit.dst.box.width = width;
317   blit.src.box.height = blit.dst.box.height = height;
318   blit.src.box.depth = blit.dst.box.depth = 1;
319   blit.mask = st_get_blit_mask(strb->Base._BaseFormat, format);
320   blit.filter = PIPE_TEX_FILTER_NEAREST;
321   blit.scissor_enable = FALSE;
322
323   if (invert_y) {
324      blit.src.box.y = strb->Base.Height - blit.src.box.y;
325      blit.src.box.height = -blit.src.box.height;
326   }
327
328   /* blit */
329   st->pipe->blit(st->pipe, &blit);
330
331   return dst;
332}
333
334static struct pipe_resource *
335try_cached_readpixels(struct st_context *st, struct st_renderbuffer *strb,
336                      bool invert_y,
337                      GLsizei width, GLsizei height,
338                      GLenum format,
339                      enum pipe_format src_format, enum pipe_format dst_format)
340{
341   struct pipe_resource *src = strb->texture;
342   struct pipe_resource *dst = NULL;
343
344   if (ST_DEBUG & DEBUG_NOREADPIXCACHE)
345      return NULL;
346
347   /* Reset cache after invalidation or switch of parameters. */
348   if (st->readpix_cache.src != src ||
349       st->readpix_cache.dst_format != dst_format ||
350       st->readpix_cache.level != strb->surface->u.tex.level ||
351       st->readpix_cache.layer != strb->surface->u.tex.first_layer) {
352      pipe_resource_reference(&st->readpix_cache.src, src);
353      pipe_resource_reference(&st->readpix_cache.cache, NULL);
354      st->readpix_cache.dst_format = dst_format;
355      st->readpix_cache.level = strb->surface->u.tex.level;
356      st->readpix_cache.layer = strb->surface->u.tex.first_layer;
357      st->readpix_cache.hits = 0;
358   }
359
360   /* Decide whether to trigger the cache. */
361   if (!st->readpix_cache.cache) {
362      if (!strb->use_readpix_cache && !ALWAYS_READPIXELS_CACHE) {
363         /* Heuristic: If previous successive calls read at least a fraction
364          * of the surface _and_ we read again, trigger the cache.
365          */
366         unsigned threshold = MAX2(1, strb->Base.Width * strb->Base.Height / 8);
367
368         if (st->readpix_cache.hits < threshold) {
369            st->readpix_cache.hits += width * height;
370            return NULL;
371         }
372
373         strb->use_readpix_cache = true;
374      }
375
376      /* Fill the cache */
377      st->readpix_cache.cache = blit_to_staging(st, strb, invert_y,
378                                                0, 0,
379                                                strb->Base.Width,
380                                                strb->Base.Height, format,
381                                                src_format, dst_format);
382   }
383
384   /* Return an owning reference to stay consistent with the non-cached path */
385   pipe_resource_reference(&dst, st->readpix_cache.cache);
386
387   return dst;
388}
389
390/**
391 * This uses a blit to copy the read buffer to a texture format which matches
392 * the format and type combo and then a fast read-back is done using memcpy.
393 * We can do arbitrary X/Y/Z/W/0/1 swizzling here as long as there is
394 * a format which matches the swizzling.
395 *
396 * If such a format isn't available, we fall back to _mesa_readpixels.
397 *
398 * NOTE: Some drivers use a blit to convert between tiled and linear
399 *       texture layouts during texture uploads/downloads, so the blit
400 *       we do here should be free in such cases.
401 */
402static void
403st_ReadPixels(struct gl_context *ctx, GLint x, GLint y,
404              GLsizei width, GLsizei height,
405              GLenum format, GLenum type,
406              const struct gl_pixelstore_attrib *pack,
407              void *pixels)
408{
409   struct st_context *st = st_context(ctx);
410   struct gl_renderbuffer *rb =
411         _mesa_get_read_renderbuffer_for_format(ctx, format);
412   struct st_renderbuffer *strb = st_renderbuffer(rb);
413   struct pipe_context *pipe = st->pipe;
414   struct pipe_screen *screen = pipe->screen;
415   struct pipe_resource *src;
416   struct pipe_resource *dst = NULL;
417   enum pipe_format dst_format, src_format;
418   unsigned bind;
419   struct pipe_transfer *tex_xfer;
420   ubyte *map = NULL;
421   int dst_x, dst_y;
422
423   /* Validate state (to be sure we have up-to-date framebuffer surfaces)
424    * and flush the bitmap cache prior to reading. */
425   st_validate_state(st, ST_PIPELINE_RENDER);
426   st_flush_bitmap_cache(st);
427
428   if (!st->prefer_blit_based_texture_transfer) {
429      goto fallback;
430   }
431
432   /* This must be done after state validation. */
433   src = strb->texture;
434
435   /* XXX Fallback for depth-stencil formats due to an incomplete
436    * stencil blit implementation in some drivers. */
437   if (format == GL_DEPTH_STENCIL) {
438      goto fallback;
439   }
440
441   /* If the base internal format and the texture format don't match, we have
442    * to use the slow path. */
443   if (rb->_BaseFormat !=
444       _mesa_get_format_base_format(rb->Format)) {
445      goto fallback;
446   }
447
448   if (_mesa_readpixels_needs_slow_path(ctx, format, type, GL_TRUE)) {
449      goto fallback;
450   }
451
452   /* Convert the source format to what is expected by ReadPixels
453    * and see if it's supported. */
454   src_format = util_format_linear(src->format);
455   src_format = util_format_luminance_to_red(src_format);
456   src_format = util_format_intensity_to_red(src_format);
457
458   if (!src_format ||
459       !screen->is_format_supported(screen, src_format, src->target,
460                                    src->nr_samples,
461                                    PIPE_BIND_SAMPLER_VIEW)) {
462      goto fallback;
463   }
464
465   if (format == GL_DEPTH_COMPONENT || format == GL_DEPTH_STENCIL)
466      bind = PIPE_BIND_DEPTH_STENCIL;
467   else
468      bind = PIPE_BIND_RENDER_TARGET;
469
470   /* Choose the destination format by finding the best match
471    * for the format+type combo. */
472   dst_format = st_choose_matching_format(st, bind, format, type,
473                                          pack->SwapBytes);
474   if (dst_format == PIPE_FORMAT_NONE) {
475      goto fallback;
476   }
477
478   if (st->pbo.download_enabled && _mesa_is_bufferobj(pack->BufferObj)) {
479      if (try_pbo_readpixels(st, strb,
480                             st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP,
481                             x, y, width, height,
482                             src_format, dst_format,
483                             pack, pixels))
484         return;
485   }
486
487   if (needs_integer_signed_unsigned_conversion(ctx, format, type)) {
488      goto fallback;
489   }
490
491   /* Cache a staging texture for back-to-back ReadPixels, to avoid CPU-GPU
492    * synchronization overhead.
493    */
494   dst = try_cached_readpixels(st, strb,
495                               st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP,
496                               width, height, format, src_format, dst_format);
497   if (dst) {
498      dst_x = x;
499      dst_y = y;
500   } else {
501      /* See if the texture format already matches the format and type,
502       * in which case the memcpy-based fast path will likely be used and
503       * we don't have to blit. */
504      if (_mesa_format_matches_format_and_type(rb->Format, format,
505                                               type, pack->SwapBytes, NULL)) {
506         goto fallback;
507      }
508
509      dst = blit_to_staging(st, strb,
510                            st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP,
511                            x, y, width, height, format,
512                            src_format, dst_format);
513      if (!dst)
514         goto fallback;
515
516      dst_x = 0;
517      dst_y = 0;
518   }
519
520   /* map resources */
521   pixels = _mesa_map_pbo_dest(ctx, pack, pixels);
522
523   map = pipe_transfer_map_3d(pipe, dst, 0, PIPE_TRANSFER_READ,
524                              dst_x, dst_y, 0, width, height, 1, &tex_xfer);
525   if (!map) {
526      _mesa_unmap_pbo_dest(ctx, pack);
527      pipe_resource_reference(&dst, NULL);
528      goto fallback;
529   }
530
531   /* memcpy data into a user buffer */
532   {
533      const uint bytesPerRow = width * util_format_get_blocksize(dst_format);
534      const int destStride = _mesa_image_row_stride(pack, width, format, type);
535      char *dest = _mesa_image_address2d(pack, pixels,
536                                         width, height, format,
537                                         type, 0, 0);
538
539      if (tex_xfer->stride == bytesPerRow && destStride == bytesPerRow) {
540         memcpy(dest, map, bytesPerRow * height);
541      } else {
542         GLuint row;
543
544         for (row = 0; row < (unsigned) height; row++) {
545            memcpy(dest, map, bytesPerRow);
546            map += tex_xfer->stride;
547            dest += destStride;
548         }
549      }
550   }
551
552   pipe_transfer_unmap(pipe, tex_xfer);
553   _mesa_unmap_pbo_dest(ctx, pack);
554   pipe_resource_reference(&dst, NULL);
555   return;
556
557fallback:
558   _mesa_readpixels(ctx, x, y, width, height, format, type, pack, pixels);
559}
560
561void st_init_readpixels_functions(struct dd_function_table *functions)
562{
563   functions->ReadPixels = st_ReadPixels;
564}
565