brw_blorp_blit.cpp revision 36bc0fe4f2e90ea9efa19940f477472dad6fb18f
1506d70be21cd3469118de89297cba0c0f709c1aePaul Berry/*
2506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * Copyright © 2012 Intel Corporation
3506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *
4506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * Permission is hereby granted, free of charge, to any person obtaining a
5506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * copy of this software and associated documentation files (the "Software"),
6506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * to deal in the Software without restriction, including without limitation
7506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * and/or sell copies of the Software, and to permit persons to whom the
9506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * Software is furnished to do so, subject to the following conditions:
10506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *
11506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * The above copyright notice and this permission notice (including the next
12506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * paragraph) shall be included in all copies or substantial portions of the
13506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * Software.
14506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *
15506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * IN THE SOFTWARE.
22506d70be21cd3469118de89297cba0c0f709c1aePaul Berry */
23506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
24506d70be21cd3469118de89297cba0c0f709c1aePaul Berry#include "main/teximage.h"
25c738ea1191cd1b5a0dc60b0e6d05fd918083e961Paul Berry#include "main/fbobject.h"
26506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
27506d70be21cd3469118de89297cba0c0f709c1aePaul Berry#include "glsl/ralloc.h"
28506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
29506d70be21cd3469118de89297cba0c0f709c1aePaul Berry#include "intel_fbo.h"
30506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
31506d70be21cd3469118de89297cba0c0f709c1aePaul Berry#include "brw_blorp.h"
32506d70be21cd3469118de89297cba0c0f709c1aePaul Berry#include "brw_context.h"
33506d70be21cd3469118de89297cba0c0f709c1aePaul Berry#include "brw_eu.h"
34506d70be21cd3469118de89297cba0c0f709c1aePaul Berry#include "brw_state.h"
35506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
36506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
37506d70be21cd3469118de89297cba0c0f709c1aePaul Berry/**
38506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * Helper function for handling mirror image blits.
39506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *
40506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * If coord0 > coord1, swap them and invert the "mirror" boolean.
41506d70be21cd3469118de89297cba0c0f709c1aePaul Berry */
42506d70be21cd3469118de89297cba0c0f709c1aePaul Berrystatic inline void
43506d70be21cd3469118de89297cba0c0f709c1aePaul Berryfixup_mirroring(bool &mirror, GLint &coord0, GLint &coord1)
44506d70be21cd3469118de89297cba0c0f709c1aePaul Berry{
45506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   if (coord0 > coord1) {
46506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      mirror = !mirror;
47506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      GLint tmp = coord0;
48506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      coord0 = coord1;
49506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      coord1 = tmp;
50506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   }
51506d70be21cd3469118de89297cba0c0f709c1aePaul Berry}
52506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
53506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
5447b64c9290d54f78e5a20e378593977cd47e285fPaul Berry/**
5547b64c9290d54f78e5a20e378593977cd47e285fPaul Berry * Adjust {src,dst}_x{0,1} to account for clipping and scissoring of
5647b64c9290d54f78e5a20e378593977cd47e285fPaul Berry * destination coordinates.
5747b64c9290d54f78e5a20e378593977cd47e285fPaul Berry *
5847b64c9290d54f78e5a20e378593977cd47e285fPaul Berry * Return true if there is still blitting to do, false if all pixels got
5947b64c9290d54f78e5a20e378593977cd47e285fPaul Berry * rejected by the clip and/or scissor.
6047b64c9290d54f78e5a20e378593977cd47e285fPaul Berry *
6147b64c9290d54f78e5a20e378593977cd47e285fPaul Berry * For clarity, the nomenclature of this function assumes we are clipping and
6247b64c9290d54f78e5a20e378593977cd47e285fPaul Berry * scissoring the X coordinate; the exact same logic applies for Y
6347b64c9290d54f78e5a20e378593977cd47e285fPaul Berry * coordinates.
6475f409d75cacf90df2d6f1d718251a5d5cd92f7fPaul Berry *
6575f409d75cacf90df2d6f1d718251a5d5cd92f7fPaul Berry * Note: this function may also be used to account for clipping of source
6675f409d75cacf90df2d6f1d718251a5d5cd92f7fPaul Berry * coordinates, by swapping the roles of src and dst.
6747b64c9290d54f78e5a20e378593977cd47e285fPaul Berry */
6847b64c9290d54f78e5a20e378593977cd47e285fPaul Berrystatic inline bool
6947b64c9290d54f78e5a20e378593977cd47e285fPaul Berryclip_or_scissor(bool mirror, GLint &src_x0, GLint &src_x1, GLint &dst_x0,
7047b64c9290d54f78e5a20e378593977cd47e285fPaul Berry                GLint &dst_x1, GLint fb_xmin, GLint fb_xmax)
7147b64c9290d54f78e5a20e378593977cd47e285fPaul Berry{
7247b64c9290d54f78e5a20e378593977cd47e285fPaul Berry   /* If we are going to scissor everything away, stop. */
7347b64c9290d54f78e5a20e378593977cd47e285fPaul Berry   if (!(fb_xmin < fb_xmax &&
7447b64c9290d54f78e5a20e378593977cd47e285fPaul Berry         dst_x0 < fb_xmax &&
7547b64c9290d54f78e5a20e378593977cd47e285fPaul Berry         fb_xmin < dst_x1 &&
7647b64c9290d54f78e5a20e378593977cd47e285fPaul Berry         dst_x0 < dst_x1)) {
7747b64c9290d54f78e5a20e378593977cd47e285fPaul Berry      return false;
7847b64c9290d54f78e5a20e378593977cd47e285fPaul Berry   }
7947b64c9290d54f78e5a20e378593977cd47e285fPaul Berry
8047b64c9290d54f78e5a20e378593977cd47e285fPaul Berry   /* Clip the destination rectangle, and keep track of how many pixels we
8147b64c9290d54f78e5a20e378593977cd47e285fPaul Berry    * clipped off of the left and right sides of it.
8247b64c9290d54f78e5a20e378593977cd47e285fPaul Berry    */
8347b64c9290d54f78e5a20e378593977cd47e285fPaul Berry   GLint pixels_clipped_left = 0;
8447b64c9290d54f78e5a20e378593977cd47e285fPaul Berry   GLint pixels_clipped_right = 0;
8547b64c9290d54f78e5a20e378593977cd47e285fPaul Berry   if (dst_x0 < fb_xmin) {
8647b64c9290d54f78e5a20e378593977cd47e285fPaul Berry      pixels_clipped_left = fb_xmin - dst_x0;
8747b64c9290d54f78e5a20e378593977cd47e285fPaul Berry      dst_x0 = fb_xmin;
8847b64c9290d54f78e5a20e378593977cd47e285fPaul Berry   }
8947b64c9290d54f78e5a20e378593977cd47e285fPaul Berry   if (fb_xmax < dst_x1) {
9047b64c9290d54f78e5a20e378593977cd47e285fPaul Berry      pixels_clipped_right = dst_x1 - fb_xmax;
9147b64c9290d54f78e5a20e378593977cd47e285fPaul Berry      dst_x1 = fb_xmax;
9247b64c9290d54f78e5a20e378593977cd47e285fPaul Berry   }
9347b64c9290d54f78e5a20e378593977cd47e285fPaul Berry
9447b64c9290d54f78e5a20e378593977cd47e285fPaul Berry   /* If we are mirrored, then before applying pixels_clipped_{left,right} to
9547b64c9290d54f78e5a20e378593977cd47e285fPaul Berry    * the source coordinates, we need to flip them to account for the
9647b64c9290d54f78e5a20e378593977cd47e285fPaul Berry    * mirroring.
9747b64c9290d54f78e5a20e378593977cd47e285fPaul Berry    */
9847b64c9290d54f78e5a20e378593977cd47e285fPaul Berry   if (mirror) {
9947b64c9290d54f78e5a20e378593977cd47e285fPaul Berry      GLint tmp = pixels_clipped_left;
10047b64c9290d54f78e5a20e378593977cd47e285fPaul Berry      pixels_clipped_left = pixels_clipped_right;
10147b64c9290d54f78e5a20e378593977cd47e285fPaul Berry      pixels_clipped_right = tmp;
10247b64c9290d54f78e5a20e378593977cd47e285fPaul Berry   }
10347b64c9290d54f78e5a20e378593977cd47e285fPaul Berry
10447b64c9290d54f78e5a20e378593977cd47e285fPaul Berry   /* Adjust the source rectangle to remove the pixels corresponding to those
10547b64c9290d54f78e5a20e378593977cd47e285fPaul Berry    * that were clipped/scissored out of the destination rectangle.
10647b64c9290d54f78e5a20e378593977cd47e285fPaul Berry    */
10747b64c9290d54f78e5a20e378593977cd47e285fPaul Berry   src_x0 += pixels_clipped_left;
10847b64c9290d54f78e5a20e378593977cd47e285fPaul Berry   src_x1 -= pixels_clipped_right;
10947b64c9290d54f78e5a20e378593977cd47e285fPaul Berry
11047b64c9290d54f78e5a20e378593977cd47e285fPaul Berry   return true;
11147b64c9290d54f78e5a20e378593977cd47e285fPaul Berry}
11247b64c9290d54f78e5a20e378593977cd47e285fPaul Berry
11347b64c9290d54f78e5a20e378593977cd47e285fPaul Berry
114fa1d267beb4adb542ea90b805306599f602c38d2Paul Berrystatic struct intel_mipmap_tree *
1155c66640ac7c271a96f66f4cb49adad54eb58cc47Paul Berryfind_miptree(GLbitfield buffer_bit, struct intel_renderbuffer *irb)
116fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry{
117fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry   struct intel_mipmap_tree *mt = irb->mt;
118fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry   if (buffer_bit == GL_STENCIL_BUFFER_BIT && mt->stencil_mt)
119fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry      mt = mt->stencil_mt;
120fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry   return mt;
121fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry}
122fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry
1236cc9df331b4799715b31d7ec606ad09fa914e260Chad Versacevoid
1246cc9df331b4799715b31d7ec606ad09fa914e260Chad Versacebrw_blorp_blit_miptrees(struct intel_context *intel,
1256cc9df331b4799715b31d7ec606ad09fa914e260Chad Versace                        struct intel_mipmap_tree *src_mt,
126e87174cf4b499c8e9558438e70b0da5f0f38f54aPaul Berry                        unsigned src_level, unsigned src_layer,
1276cc9df331b4799715b31d7ec606ad09fa914e260Chad Versace                        struct intel_mipmap_tree *dst_mt,
128e87174cf4b499c8e9558438e70b0da5f0f38f54aPaul Berry                        unsigned dst_level, unsigned dst_layer,
1296cc9df331b4799715b31d7ec606ad09fa914e260Chad Versace                        int src_x0, int src_y0,
1306cc9df331b4799715b31d7ec606ad09fa914e260Chad Versace                        int dst_x0, int dst_y0,
1316cc9df331b4799715b31d7ec606ad09fa914e260Chad Versace                        int dst_x1, int dst_y1,
1326cc9df331b4799715b31d7ec606ad09fa914e260Chad Versace                        bool mirror_x, bool mirror_y)
1336cc9df331b4799715b31d7ec606ad09fa914e260Chad Versace{
1346cc9df331b4799715b31d7ec606ad09fa914e260Chad Versace   brw_blorp_blit_params params(brw_context(&intel->ctx),
135e87174cf4b499c8e9558438e70b0da5f0f38f54aPaul Berry                                src_mt, src_level, src_layer,
136e87174cf4b499c8e9558438e70b0da5f0f38f54aPaul Berry                                dst_mt, dst_level, dst_layer,
1376cc9df331b4799715b31d7ec606ad09fa914e260Chad Versace                                src_x0, src_y0,
1386cc9df331b4799715b31d7ec606ad09fa914e260Chad Versace                                dst_x0, dst_y0,
1396cc9df331b4799715b31d7ec606ad09fa914e260Chad Versace                                dst_x1, dst_y1,
1406cc9df331b4799715b31d7ec606ad09fa914e260Chad Versace                                mirror_x, mirror_y);
1416cc9df331b4799715b31d7ec606ad09fa914e260Chad Versace   brw_blorp_exec(intel, &params);
1426cc9df331b4799715b31d7ec606ad09fa914e260Chad Versace}
143fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry
144fa1d267beb4adb542ea90b805306599f602c38d2Paul Berrystatic void
145fa1d267beb4adb542ea90b805306599f602c38d2Paul Berrydo_blorp_blit(struct intel_context *intel, GLbitfield buffer_bit,
1465c66640ac7c271a96f66f4cb49adad54eb58cc47Paul Berry              struct intel_renderbuffer *src_irb,
1475c66640ac7c271a96f66f4cb49adad54eb58cc47Paul Berry              struct intel_renderbuffer *dst_irb,
148fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry              GLint srcX0, GLint srcY0,
149fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry              GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
150fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry              bool mirror_x, bool mirror_y)
151fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry{
152fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry   /* Find source/dst miptrees */
1535c66640ac7c271a96f66f4cb49adad54eb58cc47Paul Berry   struct intel_mipmap_tree *src_mt = find_miptree(buffer_bit, src_irb);
1545c66640ac7c271a96f66f4cb49adad54eb58cc47Paul Berry   struct intel_mipmap_tree *dst_mt = find_miptree(buffer_bit, dst_irb);
155fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry
156fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry   /* Get ready to blit.  This includes depth resolving the src and dst
157fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry    * buffers if necessary.
158fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry    */
1595c66640ac7c271a96f66f4cb49adad54eb58cc47Paul Berry   intel_renderbuffer_resolve_depth(intel, src_irb);
1605c66640ac7c271a96f66f4cb49adad54eb58cc47Paul Berry   intel_renderbuffer_resolve_depth(intel, dst_irb);
161fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry
162fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry   /* Do the blit */
163e87174cf4b499c8e9558438e70b0da5f0f38f54aPaul Berry   brw_blorp_blit_miptrees(intel,
164e87174cf4b499c8e9558438e70b0da5f0f38f54aPaul Berry                           src_mt, src_irb->mt_level, src_irb->mt_layer,
165e87174cf4b499c8e9558438e70b0da5f0f38f54aPaul Berry                           dst_mt, dst_irb->mt_level, dst_irb->mt_layer,
1666cc9df331b4799715b31d7ec606ad09fa914e260Chad Versace                           srcX0, srcY0, dstX0, dstY0, dstX1, dstY1,
1676cc9df331b4799715b31d7ec606ad09fa914e260Chad Versace                           mirror_x, mirror_y);
168fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry
1695c66640ac7c271a96f66f4cb49adad54eb58cc47Paul Berry   intel_renderbuffer_set_needs_hiz_resolve(dst_irb);
1705c66640ac7c271a96f66f4cb49adad54eb58cc47Paul Berry   intel_renderbuffer_set_needs_downsample(dst_irb);
171fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry}
172fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry
173fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry
174fa1d267beb4adb542ea90b805306599f602c38d2Paul Berrystatic bool
1755c66640ac7c271a96f66f4cb49adad54eb58cc47Paul Berryformats_match(GLbitfield buffer_bit, struct intel_renderbuffer *src_irb,
1765c66640ac7c271a96f66f4cb49adad54eb58cc47Paul Berry              struct intel_renderbuffer *dst_irb)
177fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry{
178fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry   /* Note: don't just check gl_renderbuffer::Format, because in some cases
179fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry    * multiple gl_formats resolve to the same native type in the miptree (for
180fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry    * example MESA_FORMAT_X8_Z24 and MESA_FORMAT_S8_Z24), and we can blit
181fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry    * between those formats.
182fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry    */
1835c66640ac7c271a96f66f4cb49adad54eb58cc47Paul Berry   return find_miptree(buffer_bit, src_irb)->format ==
1845c66640ac7c271a96f66f4cb49adad54eb58cc47Paul Berry      find_miptree(buffer_bit, dst_irb)->format;
185fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry}
186fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry
187fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry
188506d70be21cd3469118de89297cba0c0f709c1aePaul Berrystatic bool
189506d70be21cd3469118de89297cba0c0f709c1aePaul Berrytry_blorp_blit(struct intel_context *intel,
190506d70be21cd3469118de89297cba0c0f709c1aePaul Berry               GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
191506d70be21cd3469118de89297cba0c0f709c1aePaul Berry               GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
192506d70be21cd3469118de89297cba0c0f709c1aePaul Berry               GLenum filter, GLbitfield buffer_bit)
193506d70be21cd3469118de89297cba0c0f709c1aePaul Berry{
194506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   struct gl_context *ctx = &intel->ctx;
195506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
196506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   /* Sync up the state of window system buffers.  We need to do this before
197506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    * we go looking for the buffers.
198506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    */
199506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   intel_prepare_render(intel);
200506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
201506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   const struct gl_framebuffer *read_fb = ctx->ReadBuffer;
202506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   const struct gl_framebuffer *draw_fb = ctx->DrawBuffer;
203506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
204506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   /* Detect if the blit needs to be mirrored */
205506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   bool mirror_x = false, mirror_y = false;
206506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   fixup_mirroring(mirror_x, srcX0, srcX1);
207506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   fixup_mirroring(mirror_x, dstX0, dstX1);
208506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   fixup_mirroring(mirror_y, srcY0, srcY1);
209506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   fixup_mirroring(mirror_y, dstY0, dstY1);
210506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
211506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   /* Make sure width and height match */
212da54d2e576426122009be083ecbfb9eefd8a3799Paul Berry   if (srcX1 - srcX0 != dstX1 - dstX0) return false;
213da54d2e576426122009be083ecbfb9eefd8a3799Paul Berry   if (srcY1 - srcY0 != dstY1 - dstY0) return false;
214506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
21547b64c9290d54f78e5a20e378593977cd47e285fPaul Berry   /* If the destination rectangle needs to be clipped or scissored, do so.
216506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    */
21747b64c9290d54f78e5a20e378593977cd47e285fPaul Berry   if (!(clip_or_scissor(mirror_x, srcX0, srcX1, dstX0, dstX1,
21847b64c9290d54f78e5a20e378593977cd47e285fPaul Berry                         draw_fb->_Xmin, draw_fb->_Xmax) &&
21947b64c9290d54f78e5a20e378593977cd47e285fPaul Berry         clip_or_scissor(mirror_y, srcY0, srcY1, dstY0, dstY1,
22047b64c9290d54f78e5a20e378593977cd47e285fPaul Berry                         draw_fb->_Ymin, draw_fb->_Ymax))) {
22147b64c9290d54f78e5a20e378593977cd47e285fPaul Berry      /* Everything got clipped/scissored away, so the blit was successful. */
22247b64c9290d54f78e5a20e378593977cd47e285fPaul Berry      return true;
22347b64c9290d54f78e5a20e378593977cd47e285fPaul Berry   }
22447b64c9290d54f78e5a20e378593977cd47e285fPaul Berry
22575f409d75cacf90df2d6f1d718251a5d5cd92f7fPaul Berry   /* If the source rectangle needs to be clipped or scissored, do so. */
22675f409d75cacf90df2d6f1d718251a5d5cd92f7fPaul Berry   if (!(clip_or_scissor(mirror_x, dstX0, dstX1, srcX0, srcX1,
22775f409d75cacf90df2d6f1d718251a5d5cd92f7fPaul Berry                         0, read_fb->Width) &&
22875f409d75cacf90df2d6f1d718251a5d5cd92f7fPaul Berry         clip_or_scissor(mirror_y, dstY0, dstY1, srcY0, srcY1,
22975f409d75cacf90df2d6f1d718251a5d5cd92f7fPaul Berry                         0, read_fb->Height))) {
23075f409d75cacf90df2d6f1d718251a5d5cd92f7fPaul Berry      /* Everything got clipped/scissored away, so the blit was successful. */
23175f409d75cacf90df2d6f1d718251a5d5cd92f7fPaul Berry      return true;
23275f409d75cacf90df2d6f1d718251a5d5cd92f7fPaul Berry   }
233506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
2340dbec6ae07e7b3d566cc397ab09caa413e412846Paul Berry   /* Account for the fact that in the system framebuffer, the origin is at
2350dbec6ae07e7b3d566cc397ab09caa413e412846Paul Berry    * the lower left.
2360dbec6ae07e7b3d566cc397ab09caa413e412846Paul Berry    */
237c738ea1191cd1b5a0dc60b0e6d05fd918083e961Paul Berry   if (_mesa_is_winsys_fbo(read_fb)) {
2380dbec6ae07e7b3d566cc397ab09caa413e412846Paul Berry      GLint tmp = read_fb->Height - srcY0;
2390dbec6ae07e7b3d566cc397ab09caa413e412846Paul Berry      srcY0 = read_fb->Height - srcY1;
2400dbec6ae07e7b3d566cc397ab09caa413e412846Paul Berry      srcY1 = tmp;
2410dbec6ae07e7b3d566cc397ab09caa413e412846Paul Berry      mirror_y = !mirror_y;
2420dbec6ae07e7b3d566cc397ab09caa413e412846Paul Berry   }
243c738ea1191cd1b5a0dc60b0e6d05fd918083e961Paul Berry   if (_mesa_is_winsys_fbo(draw_fb)) {
2440dbec6ae07e7b3d566cc397ab09caa413e412846Paul Berry      GLint tmp = draw_fb->Height - dstY0;
2450dbec6ae07e7b3d566cc397ab09caa413e412846Paul Berry      dstY0 = draw_fb->Height - dstY1;
2460dbec6ae07e7b3d566cc397ab09caa413e412846Paul Berry      dstY1 = tmp;
2470dbec6ae07e7b3d566cc397ab09caa413e412846Paul Berry      mirror_y = !mirror_y;
2480dbec6ae07e7b3d566cc397ab09caa413e412846Paul Berry   }
2490dbec6ae07e7b3d566cc397ab09caa413e412846Paul Berry
250fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry   /* Find buffers */
2515c66640ac7c271a96f66f4cb49adad54eb58cc47Paul Berry   struct intel_renderbuffer *src_irb;
2525c66640ac7c271a96f66f4cb49adad54eb58cc47Paul Berry   struct intel_renderbuffer *dst_irb;
253fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry   switch (buffer_bit) {
254fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry   case GL_COLOR_BUFFER_BIT:
2555c66640ac7c271a96f66f4cb49adad54eb58cc47Paul Berry      src_irb = intel_renderbuffer(read_fb->_ColorReadBuffer);
256ff9313fac70fa85d051dd4d2b9d3402d39f67ceaPaul Berry      for (unsigned i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; ++i) {
2575c66640ac7c271a96f66f4cb49adad54eb58cc47Paul Berry         dst_irb = intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i]);
2585c66640ac7c271a96f66f4cb49adad54eb58cc47Paul Berry         if (dst_irb && !formats_match(buffer_bit, src_irb, dst_irb))
259ff9313fac70fa85d051dd4d2b9d3402d39f67ceaPaul Berry            return false;
260ff9313fac70fa85d051dd4d2b9d3402d39f67ceaPaul Berry      }
261ff9313fac70fa85d051dd4d2b9d3402d39f67ceaPaul Berry      for (unsigned i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; ++i) {
2625c66640ac7c271a96f66f4cb49adad54eb58cc47Paul Berry         dst_irb = intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i]);
2635c66640ac7c271a96f66f4cb49adad54eb58cc47Paul Berry         do_blorp_blit(intel, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
264ff9313fac70fa85d051dd4d2b9d3402d39f67ceaPaul Berry                       dstX0, dstY0, dstX1, dstY1, mirror_x, mirror_y);
265ff9313fac70fa85d051dd4d2b9d3402d39f67ceaPaul Berry      }
266fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry      break;
267fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry   case GL_DEPTH_BUFFER_BIT:
2685c66640ac7c271a96f66f4cb49adad54eb58cc47Paul Berry      src_irb =
2695c66640ac7c271a96f66f4cb49adad54eb58cc47Paul Berry         intel_renderbuffer(read_fb->Attachment[BUFFER_DEPTH].Renderbuffer);
2705c66640ac7c271a96f66f4cb49adad54eb58cc47Paul Berry      dst_irb =
2715c66640ac7c271a96f66f4cb49adad54eb58cc47Paul Berry         intel_renderbuffer(draw_fb->Attachment[BUFFER_DEPTH].Renderbuffer);
2725c66640ac7c271a96f66f4cb49adad54eb58cc47Paul Berry      if (!formats_match(buffer_bit, src_irb, dst_irb))
273fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry         return false;
2745c66640ac7c271a96f66f4cb49adad54eb58cc47Paul Berry      do_blorp_blit(intel, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
275fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry                    dstX0, dstY0, dstX1, dstY1, mirror_x, mirror_y);
276fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry      break;
277fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry   case GL_STENCIL_BUFFER_BIT:
2785c66640ac7c271a96f66f4cb49adad54eb58cc47Paul Berry      src_irb =
2795c66640ac7c271a96f66f4cb49adad54eb58cc47Paul Berry         intel_renderbuffer(read_fb->Attachment[BUFFER_STENCIL].Renderbuffer);
2805c66640ac7c271a96f66f4cb49adad54eb58cc47Paul Berry      dst_irb =
2815c66640ac7c271a96f66f4cb49adad54eb58cc47Paul Berry         intel_renderbuffer(draw_fb->Attachment[BUFFER_STENCIL].Renderbuffer);
2825c66640ac7c271a96f66f4cb49adad54eb58cc47Paul Berry      if (!formats_match(buffer_bit, src_irb, dst_irb))
283fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry         return false;
2845c66640ac7c271a96f66f4cb49adad54eb58cc47Paul Berry      do_blorp_blit(intel, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
285fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry                    dstX0, dstY0, dstX1, dstY1, mirror_x, mirror_y);
286fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry      break;
287fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry   default:
288fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry      assert(false);
289fa1d267beb4adb542ea90b805306599f602c38d2Paul Berry   }
290506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
291506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   return true;
292506d70be21cd3469118de89297cba0c0f709c1aePaul Berry}
293506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
294506d70be21cd3469118de89297cba0c0f709c1aePaul BerryGLbitfield
295506d70be21cd3469118de89297cba0c0f709c1aePaul Berrybrw_blorp_framebuffer(struct intel_context *intel,
296506d70be21cd3469118de89297cba0c0f709c1aePaul Berry                      GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
297506d70be21cd3469118de89297cba0c0f709c1aePaul Berry                      GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
298506d70be21cd3469118de89297cba0c0f709c1aePaul Berry                      GLbitfield mask, GLenum filter)
299506d70be21cd3469118de89297cba0c0f709c1aePaul Berry{
300b08545199ac8a01392a805f158d22cc03060a6fbPaul Berry   /* BLORP is not supported before Gen6. */
301b08545199ac8a01392a805f158d22cc03060a6fbPaul Berry   if (intel->gen < 6)
302506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      return mask;
303506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
304506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   static GLbitfield buffer_bits[] = {
305506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      GL_COLOR_BUFFER_BIT,
306506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      GL_DEPTH_BUFFER_BIT,
307506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      GL_STENCIL_BUFFER_BIT,
308506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   };
309506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
310506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   for (unsigned int i = 0; i < ARRAY_SIZE(buffer_bits); ++i) {
311506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      if ((mask & buffer_bits[i]) &&
312506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       try_blorp_blit(intel,
313506d70be21cd3469118de89297cba0c0f709c1aePaul Berry                      srcX0, srcY0, srcX1, srcY1,
314506d70be21cd3469118de89297cba0c0f709c1aePaul Berry                      dstX0, dstY0, dstX1, dstY1,
315506d70be21cd3469118de89297cba0c0f709c1aePaul Berry                      filter, buffer_bits[i])) {
316506d70be21cd3469118de89297cba0c0f709c1aePaul Berry         mask &= ~buffer_bits[i];
317506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      }
318506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   }
319506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
320506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   return mask;
321506d70be21cd3469118de89297cba0c0f709c1aePaul Berry}
322506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
323665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry
324665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry/**
325665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry * Enum to specify the order of arguments in a sampler message
326665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry */
327665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berryenum sampler_message_arg
328665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry{
329665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry   SAMPLER_MESSAGE_ARG_U_FLOAT,
330665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry   SAMPLER_MESSAGE_ARG_V_FLOAT,
331665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry   SAMPLER_MESSAGE_ARG_U_INT,
332665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry   SAMPLER_MESSAGE_ARG_V_INT,
333233c207e9e477b6b0a5c6705e727129b92989073Paul Berry   SAMPLER_MESSAGE_ARG_SI_INT,
3344ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berry   SAMPLER_MESSAGE_ARG_MCS_INT,
335233c207e9e477b6b0a5c6705e727129b92989073Paul Berry   SAMPLER_MESSAGE_ARG_ZERO_INT,
336665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry};
337665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry
338506d70be21cd3469118de89297cba0c0f709c1aePaul Berry/**
339506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * Generator for WM programs used in BLORP blits.
340506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *
341506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * The bulk of the work done by the WM program is to wrap and unwrap the
342506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * coordinate transformations used by the hardware to store surfaces in
34319e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry * memory.  The hardware transforms a pixel location (X, Y, S) (where S is the
34419e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry * sample index for a multisampled surface) to a memory offset by the
34519e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry * following formulas:
346506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *
3478b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *   offset = tile(tiling_format, encode_msaa(num_samples, layout, X, Y, S))
3488b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *   (X, Y, S) = decode_msaa(num_samples, layout, detile(tiling_format, offset))
34919e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry *
3501bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry * For a single-sampled surface, or for a multisampled surface using
3511bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry * INTEL_MSAA_LAYOUT_UMS, encode_msaa() and decode_msaa are the identity
3521bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry * function:
35319e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry *
3541bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry *   encode_msaa(1, NONE, X, Y, 0) = (X, Y, 0)
3551bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry *   decode_msaa(1, NONE, X, Y, 0) = (X, Y, 0)
3561bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry *   encode_msaa(n, UMS, X, Y, S) = (X, Y, S)
3571bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry *   decode_msaa(n, UMS, X, Y, S) = (X, Y, S)
35819e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry *
3591bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry * For a 4x multisampled surface using INTEL_MSAA_LAYOUT_IMS, encode_msaa()
3601bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry * embeds the sample number into bit 1 of the X and Y coordinates:
36119e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry *
3621bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry *   encode_msaa(4, IMS, X, Y, S) = (X', Y', 0)
36319e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry *     where X' = (X & ~0b1) << 1 | (S & 0b1) << 1 | (X & 0b1)
36419e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry *           Y' = (Y & ~0b1 ) << 1 | (S & 0b10) | (Y & 0b1)
3651bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry *   decode_msaa(4, IMS, X, Y, 0) = (X', Y', S)
36619e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry *     where X' = (X & ~0b11) >> 1 | (X & 0b1)
36719e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry *           Y' = (Y & ~0b11) >> 1 | (Y & 0b1)
36819e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry *           S = (Y & 0b10) | (X & 0b10) >> 1
369506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *
3707fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry * For an 8x multisampled surface using INTEL_MSAA_LAYOUT_IMS, encode_msaa()
3717fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry * embeds the sample number into bits 1 and 2 of the X coordinate and bit 1 of
3727fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry * the Y coordinate:
3737fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry *
3747fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry *   encode_msaa(8, IMS, X, Y, S) = (X', Y', 0)
3757fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry *     where X' = (X & ~0b1) << 2 | (S & 0b100) | (S & 0b1) << 1 | (X & 0b1)
3767fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry *           Y' = (Y & ~0b1) << 1 | (S & 0b10) | (Y & 0b1)
3777fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry *   decode_msaa(8, IMS, X, Y, 0) = (X', Y', S)
3787fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry *     where X' = (X & ~0b111) >> 2 | (X & 0b1)
3797fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry *           Y' = (Y & ~0b11) >> 1 | (Y & 0b1)
3807fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry *           S = (X & 0b100) | (Y & 0b10) | (X & 0b10) >> 1
3817fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry *
382506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * For X tiling, tile() combines together the low-order bits of the X and Y
383506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * coordinates in the pattern 0byyyxxxxxxxxx, creating 4k tiles that are 512
384506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * bytes wide and 8 rows high:
385506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *
3868b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *   tile(x_tiled, X, Y, S) = A
387506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *     where A = tile_num << 12 | offset
3888b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *           tile_num = (Y' >> 3) * tile_pitch + (X' >> 9)
3898b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *           offset = (Y' & 0b111) << 9
390506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *                    | (X & 0b111111111)
391506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *           X' = X * cpp
3928b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *           Y' = Y + S * qpitch
3938b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *   detile(x_tiled, A) = (X, Y, S)
394506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *     where X = X' / cpp
3958b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *           Y = Y' % qpitch
3968b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *           S = Y' / qpitch
3978b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *           Y' = (tile_num / tile_pitch) << 3
3988b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *                | (A & 0b111000000000) >> 9
399506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *           X' = (tile_num % tile_pitch) << 9
400506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *                | (A & 0b111111111)
401506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *
402506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * (In all tiling formulas, cpp is the number of bytes occupied by a single
4038b1f467cce34340637e9baca4847fc5273cf7541Paul Berry * sample ("chars per pixel"), tile_pitch is the number of 4k tiles required
4048b1f467cce34340637e9baca4847fc5273cf7541Paul Berry * to fill the width of the surface, and qpitch is the spacing (in rows)
4058b1f467cce34340637e9baca4847fc5273cf7541Paul Berry * between array slices).
406506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *
407506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * For Y tiling, tile() combines together the low-order bits of the X and Y
408506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * coordinates in the pattern 0bxxxyyyyyxxxx, creating 4k tiles that are 128
409506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * bytes wide and 32 rows high:
410506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *
4118b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *   tile(y_tiled, X, Y, S) = A
412506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *     where A = tile_num << 12 | offset
4138b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *           tile_num = (Y' >> 5) * tile_pitch + (X' >> 7)
414506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *           offset = (X' & 0b1110000) << 5
415506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *                    | (Y' & 0b11111) << 4
416506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *                    | (X' & 0b1111)
417506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *           X' = X * cpp
4188b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *           Y' = Y + S * qpitch
4198b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *   detile(y_tiled, A) = (X, Y, S)
420506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *     where X = X' / cpp
4218b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *           Y = Y' % qpitch
4228b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *           S = Y' / qpitch
4238b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *           Y' = (tile_num / tile_pitch) << 5
4248b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *                | (A & 0b111110000) >> 4
425506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *           X' = (tile_num % tile_pitch) << 7
426506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *                | (A & 0b111000000000) >> 5
427506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *                | (A & 0b1111)
428506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *
429506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * For W tiling, tile() combines together the low-order bits of the X and Y
430506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * coordinates in the pattern 0bxxxyyyyxyxyx, creating 4k tiles that are 64
431506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * bytes wide and 64 rows high (note that W tiling is only used for stencil
4328b1f467cce34340637e9baca4847fc5273cf7541Paul Berry * buffers, which always have cpp = 1 and S=0):
433506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *
4348b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *   tile(w_tiled, X, Y, S) = A
435506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *     where A = tile_num << 12 | offset
4368b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *           tile_num = (Y' >> 6) * tile_pitch + (X' >> 6)
437506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *           offset = (X' & 0b111000) << 6
4388b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *                    | (Y' & 0b111100) << 3
439506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *                    | (X' & 0b100) << 2
4408b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *                    | (Y' & 0b10) << 2
441506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *                    | (X' & 0b10) << 1
4428b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *                    | (Y' & 0b1) << 1
443506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *                    | (X' & 0b1)
444506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *           X' = X * cpp = X
4458b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *           Y' = Y + S * qpitch
4468b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *   detile(w_tiled, A) = (X, Y, S)
447506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *     where X = X' / cpp = X'
4488b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *           Y = Y' % qpitch = Y'
4498b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *           S = Y / qpitch = 0
4508b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *           Y' = (tile_num / tile_pitch) << 6
4518b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *                | (A & 0b111100000) >> 3
4528b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *                | (A & 0b1000) >> 2
4538b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *                | (A & 0b10) >> 1
454506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *           X' = (tile_num % tile_pitch) << 6
455506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *                | (A & 0b111000000000) >> 6
456506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *                | (A & 0b10000) >> 2
457506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *                | (A & 0b100) >> 1
458506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *                | (A & 0b1)
459506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *
460506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * Finally, for a non-tiled surface, tile() simply combines together the X and
461506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * Y coordinates in the natural way:
462506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *
4638b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *   tile(untiled, X, Y, S) = A
464506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *     where A = Y * pitch + X'
465506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *           X' = X * cpp
4668b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *           Y' = Y + S * qpitch
4678b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *   detile(untiled, A) = (X, Y, S)
468506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *     where X = X' / cpp
4698b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *           Y = Y' % qpitch
4708b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *           S = Y' / qpitch
471506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *           X' = A % pitch
4728b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *           Y' = A / pitch
473506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *
474506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * (In these formulas, pitch is the number of bytes occupied by a single row
47519e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry * of samples).
476506d70be21cd3469118de89297cba0c0f709c1aePaul Berry */
477506d70be21cd3469118de89297cba0c0f709c1aePaul Berryclass brw_blorp_blit_program
478506d70be21cd3469118de89297cba0c0f709c1aePaul Berry{
479506d70be21cd3469118de89297cba0c0f709c1aePaul Berrypublic:
480506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   brw_blorp_blit_program(struct brw_context *brw,
481506d70be21cd3469118de89297cba0c0f709c1aePaul Berry                          const brw_blorp_blit_prog_key *key);
482506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   ~brw_blorp_blit_program();
483506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
484506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   const GLuint *compile(struct brw_context *brw, GLuint *program_size);
485506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
486506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   brw_blorp_prog_data prog_data;
487506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
488506d70be21cd3469118de89297cba0c0f709c1aePaul Berryprivate:
489506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   void alloc_regs();
490506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   void alloc_push_const_regs(int base_reg);
491506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   void compute_frag_coords();
492506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   void translate_tiling(bool old_tiled_w, bool new_tiled_w);
4931bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry   void encode_msaa(unsigned num_samples, intel_msaa_layout layout);
4941bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry   void decode_msaa(unsigned num_samples, intel_msaa_layout layout);
495506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   void kill_if_outside_dst_rect();
496506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   void translate_dst_to_src();
49719e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   void single_to_blend();
49817eae9762cdd6cfa69a060001e26113dfc0d7c86Paul Berry   void manual_blend(unsigned num_samples);
4994725ba03cae87ddbf1fa10feaca3d42f24115f91Paul Berry   void sample(struct brw_reg dst);
5004725ba03cae87ddbf1fa10feaca3d42f24115f91Paul Berry   void texel_fetch(struct brw_reg dst);
5014ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berry   void mcs_fetch();
502665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry   void expand_to_32_bits(struct brw_reg src, struct brw_reg dst);
5034725ba03cae87ddbf1fa10feaca3d42f24115f91Paul Berry   void texture_lookup(struct brw_reg dst, GLuint msg_type,
5044725ba03cae87ddbf1fa10feaca3d42f24115f91Paul Berry                       const sampler_message_arg *args, int num_args);
505506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   void render_target_write();
506506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
507b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry   /**
508b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry    * Base-2 logarithm of the maximum number of samples that can be blended.
509b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry    */
51017eae9762cdd6cfa69a060001e26113dfc0d7c86Paul Berry   static const unsigned LOG2_MAX_BLEND_SAMPLES = 3;
511b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry
512506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   void *mem_ctx;
513506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   struct brw_context *brw;
514506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   const brw_blorp_blit_prog_key *key;
515506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   struct brw_compile func;
516506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
517506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   /* Thread dispatch header */
518506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   struct brw_reg R0;
519506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
520506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   /* Pixel X/Y coordinates (always in R1). */
521506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   struct brw_reg R1;
522506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
523506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   /* Push constants */
524506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   struct brw_reg dst_x0;
525506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   struct brw_reg dst_x1;
526506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   struct brw_reg dst_y0;
527506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   struct brw_reg dst_y1;
528506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   struct {
529506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      struct brw_reg multiplier;
530506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      struct brw_reg offset;
531506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   } x_transform, y_transform;
532506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
533b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry   /* Data read from texture (4 vec16's per array element) */
534b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry   struct brw_reg texture_data[LOG2_MAX_BLEND_SAMPLES + 1];
535506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
5364ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berry   /* Auxiliary storage for the contents of the MCS surface.
5374ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berry    *
5384ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berry    * Since the sampler always returns 8 registers worth of data, this is 8
5394ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berry    * registers wide, even though we only use the first 2 registers of it.
5404ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berry    */
5414ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berry   struct brw_reg mcs_data;
5424ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berry
543506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   /* X coordinates.  We have two of them so that we can perform coordinate
544506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    * transformations easily.
545506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    */
546506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   struct brw_reg x_coords[2];
547506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
548506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   /* Y coordinates.  We have two of them so that we can perform coordinate
549506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    * transformations easily.
550506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    */
551506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   struct brw_reg y_coords[2];
552506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
553506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   /* Which element of x_coords and y_coords is currently in use.
554506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    */
555506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   int xy_coord_index;
556506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
55719e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   /* True if, at the point in the program currently being compiled, the
55819e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry    * sample index is known to be zero.
55919e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry    */
56019e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   bool s_is_zero;
56119e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry
56219e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   /* Register storing the sample index when s_is_zero is false. */
56319e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   struct brw_reg sample_index;
56419e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry
565506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   /* Temporaries */
566506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   struct brw_reg t1;
567506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   struct brw_reg t2;
568506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
569665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry   /* MRF used for sampling and render target writes */
570506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   GLuint base_mrf;
571506d70be21cd3469118de89297cba0c0f709c1aePaul Berry};
572506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
573506d70be21cd3469118de89297cba0c0f709c1aePaul Berrybrw_blorp_blit_program::brw_blorp_blit_program(
574506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      struct brw_context *brw,
575506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      const brw_blorp_blit_prog_key *key)
576506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   : mem_ctx(ralloc_context(NULL)),
577506d70be21cd3469118de89297cba0c0f709c1aePaul Berry     brw(brw),
578506d70be21cd3469118de89297cba0c0f709c1aePaul Berry     key(key)
579506d70be21cd3469118de89297cba0c0f709c1aePaul Berry{
580506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   brw_init_compile(brw, &func, mem_ctx);
581506d70be21cd3469118de89297cba0c0f709c1aePaul Berry}
582506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
583506d70be21cd3469118de89297cba0c0f709c1aePaul Berrybrw_blorp_blit_program::~brw_blorp_blit_program()
584506d70be21cd3469118de89297cba0c0f709c1aePaul Berry{
585506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   ralloc_free(mem_ctx);
586506d70be21cd3469118de89297cba0c0f709c1aePaul Berry}
587506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
588506d70be21cd3469118de89297cba0c0f709c1aePaul Berryconst GLuint *
589506d70be21cd3469118de89297cba0c0f709c1aePaul Berrybrw_blorp_blit_program::compile(struct brw_context *brw,
590506d70be21cd3469118de89297cba0c0f709c1aePaul Berry                                GLuint *program_size)
591506d70be21cd3469118de89297cba0c0f709c1aePaul Berry{
59219e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   /* Sanity checks */
59334a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry   if (key->dst_tiled_w && key->rt_samples > 0) {
59434a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry      /* If the destination image is W tiled and multisampled, then the thread
59534a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry       * must be dispatched once per sample, not once per pixel.  This is
59634a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry       * necessary because after conversion between W and Y tiling, there's no
59719e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry       * guarantee that all samples corresponding to a single pixel will still
59819e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry       * be together.
59919e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry       */
60034a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry      assert(key->persample_msaa_dispatch);
60119e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   }
60219e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry
60319e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   if (key->blend) {
6044725ba03cae87ddbf1fa10feaca3d42f24115f91Paul Berry      /* We are blending, which means we won't have an opportunity to
6054725ba03cae87ddbf1fa10feaca3d42f24115f91Paul Berry       * translate the tiling and sample count for the texture surface.  So
60619e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry       * the surface state for the texture must be configured with the correct
60719e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry       * tiling and sample count.
60819e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry       */
60919e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry      assert(!key->src_tiled_w);
61019e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry      assert(key->tex_samples == key->src_samples);
6111bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry      assert(key->tex_layout == key->src_layout);
61219e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry      assert(key->tex_samples > 0);
61319e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   }
61419e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry
61534a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry   if (key->persample_msaa_dispatch) {
61634a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry      /* It only makes sense to do persample dispatch if the render target is
61734a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry       * configured as multisampled.
61834a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry       */
61934a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry      assert(key->rt_samples > 0);
62034a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry   }
62134a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry
6221bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry   /* Make sure layout is consistent with sample count */
6231bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry   assert((key->tex_layout == INTEL_MSAA_LAYOUT_NONE) ==
6241bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry          (key->tex_samples == 0));
6251bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry   assert((key->rt_layout == INTEL_MSAA_LAYOUT_NONE) ==
6261bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry          (key->rt_samples == 0));
6271bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry   assert((key->src_layout == INTEL_MSAA_LAYOUT_NONE) ==
6281bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry          (key->src_samples == 0));
6291bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry   assert((key->dst_layout == INTEL_MSAA_LAYOUT_NONE) ==
6301bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry          (key->dst_samples == 0));
6318b1f467cce34340637e9baca4847fc5273cf7541Paul Berry
63234a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry   /* Set up prog_data */
63334a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry   memset(&prog_data, 0, sizeof(prog_data));
63434a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry   prog_data.persample_msaa_dispatch = key->persample_msaa_dispatch;
63534a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry
636506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
637506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
638506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   alloc_regs();
639506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   compute_frag_coords();
640506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
641506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   /* Render target and texture hardware don't support W tiling. */
642506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   const bool rt_tiled_w = false;
643506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   const bool tex_tiled_w = false;
644506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
645506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   /* The address that data will be written to is determined by the
64619e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry    * coordinates supplied to the WM thread and the tiling and sample count of
64719e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry    * the render target, according to the formula:
648506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    *
64919e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry    * (X, Y, S) = decode_msaa(rt_samples, detile(rt_tiling, offset))
650506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    *
65119e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry    * If the actual tiling and sample count of the destination surface are not
65219e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry    * the same as the configuration of the render target, then these
65319e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry    * coordinates are wrong and we have to adjust them to compensate for the
65419e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry    * difference.
655506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    */
65619e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   if (rt_tiled_w != key->dst_tiled_w ||
6578b1f467cce34340637e9baca4847fc5273cf7541Paul Berry       key->rt_samples != key->dst_samples ||
6581bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry       key->rt_layout != key->dst_layout) {
6591bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry      encode_msaa(key->rt_samples, key->rt_layout);
6608b1f467cce34340637e9baca4847fc5273cf7541Paul Berry      /* Now (X, Y, S) = detile(rt_tiling, offset) */
661506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      translate_tiling(rt_tiled_w, key->dst_tiled_w);
6628b1f467cce34340637e9baca4847fc5273cf7541Paul Berry      /* Now (X, Y, S) = detile(dst_tiling, offset) */
6631bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry      decode_msaa(key->dst_samples, key->dst_layout);
66419e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   }
665506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
66619e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   /* Now (X, Y, S) = decode_msaa(dst_samples, detile(dst_tiling, offset)).
667506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    *
66819e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry    * That is: X, Y and S now contain the true coordinates and sample index of
66919e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry    * the data that the WM thread should output.
670506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    *
671506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    * If we need to kill pixels that are outside the destination rectangle,
672506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    * now is the time to do it.
673506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    */
674506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
675506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   if (key->use_kill)
676506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      kill_if_outside_dst_rect();
677506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
678506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   /* Next, apply a translation to obtain coordinates in the source image. */
679506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   translate_dst_to_src();
680506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
68119e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   /* If the source image is not multisampled, then we want to fetch sample
68219e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry    * number 0, because that's the only sample there is.
683506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    */
68419e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   if (key->src_samples == 0)
68519e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry      s_is_zero = true;
686506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
68719e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   /* X, Y, and S are now the coordinates of the pixel in the source image
68819e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry    * that we want to texture from.  Exception: if we are blending, then S is
68919e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry    * irrelevant, because we are going to fetch all samples.
690506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    */
69119e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   if (key->blend) {
6924725ba03cae87ddbf1fa10feaca3d42f24115f91Paul Berry      if (brw->intel.gen == 6) {
6934725ba03cae87ddbf1fa10feaca3d42f24115f91Paul Berry         /* Gen6 hardware an automatically blend using the SAMPLE message */
6944725ba03cae87ddbf1fa10feaca3d42f24115f91Paul Berry         single_to_blend();
695b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry         sample(texture_data[0]);
6964725ba03cae87ddbf1fa10feaca3d42f24115f91Paul Berry      } else {
6974725ba03cae87ddbf1fa10feaca3d42f24115f91Paul Berry         /* Gen7+ hardware doesn't automaticaly blend. */
69817eae9762cdd6cfa69a060001e26113dfc0d7c86Paul Berry         manual_blend(key->src_samples);
6994725ba03cae87ddbf1fa10feaca3d42f24115f91Paul Berry      }
70019e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   } else {
70119e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry      /* We aren't blending, which means we just want to fetch a single sample
70219e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry       * from the source surface.  The address that we want to fetch from is
70319e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry       * related to the X, Y and S values according to the formula:
70419e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry       *
70519e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry       * (X, Y, S) = decode_msaa(src_samples, detile(src_tiling, offset)).
70619e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry       *
70719e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry       * If the actual tiling and sample count of the source surface are not
70819e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry       * the same as the configuration of the texture, then we need to adjust
70919e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry       * the coordinates to compensate for the difference.
71019e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry       */
71119e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry      if (tex_tiled_w != key->src_tiled_w ||
7128b1f467cce34340637e9baca4847fc5273cf7541Paul Berry          key->tex_samples != key->src_samples ||
7131bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry          key->tex_layout != key->src_layout) {
7141bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry         encode_msaa(key->src_samples, key->src_layout);
7158b1f467cce34340637e9baca4847fc5273cf7541Paul Berry         /* Now (X, Y, S) = detile(src_tiling, offset) */
71619e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry         translate_tiling(key->src_tiled_w, tex_tiled_w);
7178b1f467cce34340637e9baca4847fc5273cf7541Paul Berry         /* Now (X, Y, S) = detile(tex_tiling, offset) */
7181bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry         decode_msaa(key->tex_samples, key->tex_layout);
71919e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry      }
720506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
72119e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry      /* Now (X, Y, S) = decode_msaa(tex_samples, detile(tex_tiling, offset)).
72219e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry       *
72319e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry       * In other words: X, Y, and S now contain values which, when passed to
72419e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry       * the texturing unit, will cause data to be read from the correct
72519e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry       * memory location.  So we can fetch the texel now.
72619e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry       */
7274ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berry      if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS)
7284ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berry         mcs_fetch();
729b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry      texel_fetch(texture_data[0]);
73019e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   }
73119e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry
73219e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   /* Finally, write the fetched (or blended) value to the render target and
73319e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry    * terminate the thread.
734506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    */
735506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   render_target_write();
736506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   return brw_get_program(&func, program_size);
737506d70be21cd3469118de89297cba0c0f709c1aePaul Berry}
738506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
739506d70be21cd3469118de89297cba0c0f709c1aePaul Berryvoid
740506d70be21cd3469118de89297cba0c0f709c1aePaul Berrybrw_blorp_blit_program::alloc_push_const_regs(int base_reg)
741506d70be21cd3469118de89297cba0c0f709c1aePaul Berry{
742506d70be21cd3469118de89297cba0c0f709c1aePaul Berry#define CONST_LOC(name) offsetof(brw_blorp_wm_push_constants, name)
743506d70be21cd3469118de89297cba0c0f709c1aePaul Berry#define ALLOC_REG(name) \
744506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   this->name = \
745506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, base_reg, CONST_LOC(name) / 2)
746506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
747506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   ALLOC_REG(dst_x0);
748506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   ALLOC_REG(dst_x1);
749506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   ALLOC_REG(dst_y0);
750506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   ALLOC_REG(dst_y1);
751506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   ALLOC_REG(x_transform.multiplier);
752506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   ALLOC_REG(x_transform.offset);
753506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   ALLOC_REG(y_transform.multiplier);
754506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   ALLOC_REG(y_transform.offset);
755506d70be21cd3469118de89297cba0c0f709c1aePaul Berry#undef CONST_LOC
756506d70be21cd3469118de89297cba0c0f709c1aePaul Berry#undef ALLOC_REG
757506d70be21cd3469118de89297cba0c0f709c1aePaul Berry}
758506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
759506d70be21cd3469118de89297cba0c0f709c1aePaul Berryvoid
760506d70be21cd3469118de89297cba0c0f709c1aePaul Berrybrw_blorp_blit_program::alloc_regs()
761506d70be21cd3469118de89297cba0c0f709c1aePaul Berry{
762506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   int reg = 0;
763506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   this->R0 = retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW);
764506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   this->R1 = retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW);
765506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   prog_data.first_curbe_grf = reg;
766506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   alloc_push_const_regs(reg);
767506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   reg += BRW_BLORP_NUM_PUSH_CONST_REGS;
768b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry   for (unsigned i = 0; i < ARRAY_SIZE(texture_data); ++i) {
769e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry      this->texture_data[i] =
770e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry         retype(vec16(brw_vec8_grf(reg, 0)), key->texture_data_type);
771e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry      reg += 8;
772b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry   }
7734ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berry   this->mcs_data =
7744ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berry      retype(brw_vec8_grf(reg, 0), BRW_REGISTER_TYPE_UD); reg += 8;
775506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   for (int i = 0; i < 2; ++i) {
776506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      this->x_coords[i]
777506d70be21cd3469118de89297cba0c0f709c1aePaul Berry         = vec16(retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW));
778506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      this->y_coords[i]
779506d70be21cd3469118de89297cba0c0f709c1aePaul Berry         = vec16(retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW));
780506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   }
781506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   this->xy_coord_index = 0;
78219e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   this->sample_index
78319e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry      = vec16(retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW));
784506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   this->t1 = vec16(retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW));
785506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   this->t2 = vec16(retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW));
786506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
787b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry   /* Make sure we didn't run out of registers */
788b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry   assert(reg <= GEN7_MRF_HACK_START);
789b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry
790506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   int mrf = 2;
791506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   this->base_mrf = mrf;
792506d70be21cd3469118de89297cba0c0f709c1aePaul Berry}
793506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
794506d70be21cd3469118de89297cba0c0f709c1aePaul Berry/* In the code that follows, X and Y can be used to quickly refer to the
795506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * active elements of x_coords and y_coords, and Xp and Yp ("X prime" and "Y
796506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * prime") to the inactive elements.
79719e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry *
79819e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry * S can be used to quickly refer to sample_index.
799506d70be21cd3469118de89297cba0c0f709c1aePaul Berry */
800506d70be21cd3469118de89297cba0c0f709c1aePaul Berry#define X x_coords[xy_coord_index]
801506d70be21cd3469118de89297cba0c0f709c1aePaul Berry#define Y y_coords[xy_coord_index]
802506d70be21cd3469118de89297cba0c0f709c1aePaul Berry#define Xp x_coords[!xy_coord_index]
803506d70be21cd3469118de89297cba0c0f709c1aePaul Berry#define Yp y_coords[!xy_coord_index]
80419e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry#define S sample_index
805506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
806506d70be21cd3469118de89297cba0c0f709c1aePaul Berry/* Quickly swap the roles of (X, Y) and (Xp, Yp).  Saves us from having to do
807506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * MOVs to transfor (Xp, Yp) to (X, Y) after a coordinate transformation.
808506d70be21cd3469118de89297cba0c0f709c1aePaul Berry */
809506d70be21cd3469118de89297cba0c0f709c1aePaul Berry#define SWAP_XY_AND_XPYP() xy_coord_index = !xy_coord_index;
810506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
811506d70be21cd3469118de89297cba0c0f709c1aePaul Berry/**
812506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * Emit code to compute the X and Y coordinates of the pixels being rendered
813506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * by this WM invocation.
814506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *
815506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * Assuming the render target is set up for Y tiling, these (X, Y) values are
816506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * related to the address offset where outputs will be written by the formula:
817506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *
818506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *   (X, Y, S) = decode_msaa(detile(offset)).
819506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *
820506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * (See brw_blorp_blit_program).
821506d70be21cd3469118de89297cba0c0f709c1aePaul Berry */
822506d70be21cd3469118de89297cba0c0f709c1aePaul Berryvoid
823506d70be21cd3469118de89297cba0c0f709c1aePaul Berrybrw_blorp_blit_program::compute_frag_coords()
824506d70be21cd3469118de89297cba0c0f709c1aePaul Berry{
825506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   /* R1.2[15:0] = X coordinate of upper left pixel of subspan 0 (pixel 0)
826506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    * R1.3[15:0] = X coordinate of upper left pixel of subspan 1 (pixel 4)
827506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    * R1.4[15:0] = X coordinate of upper left pixel of subspan 2 (pixel 8)
828506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    * R1.5[15:0] = X coordinate of upper left pixel of subspan 3 (pixel 12)
829506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    *
830506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    * Pixels within a subspan are laid out in this arrangement:
831506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    * 0 1
832506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    * 2 3
833506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    *
834506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    * So, to compute the coordinates of each pixel, we need to read every 2nd
835506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    * 16-bit value (vstride=2) from R1, starting at the 4th 16-bit value
836506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    * (suboffset=4), and duplicate each value 4 times (hstride=0, width=4).
837506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    * In other words, the data we want to access is R1.4<2;4,0>UW.
838506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    *
839506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    * Then, we need to add the repeating sequence (0, 1, 0, 1, ...) to the
840506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    * result, since pixels n+1 and n+3 are in the right half of the subspan.
841506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    */
842506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   brw_ADD(&func, X, stride(suboffset(R1, 4), 2, 4, 0), brw_imm_v(0x10101010));
843506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
844506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   /* Similarly, Y coordinates for subspans come from R1.2[31:16] through
845506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    * R1.5[31:16], so to get pixel Y coordinates we need to start at the 5th
846506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    * 16-bit value instead of the 4th (R1.5<2;4,0>UW instead of
847506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    * R1.4<2;4,0>UW).
848506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    *
849506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    * And we need to add the repeating sequence (0, 0, 1, 1, ...), since
850506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    * pixels n+2 and n+3 are in the bottom half of the subspan.
851506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    */
852506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   brw_ADD(&func, Y, stride(suboffset(R1, 5), 2, 4, 0), brw_imm_v(0x11001100));
85319e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry
85434a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry   if (key->persample_msaa_dispatch) {
855619471dc322de80942f7dbb29a437890e48155c6Paul Berry      switch (key->rt_samples) {
856619471dc322de80942f7dbb29a437890e48155c6Paul Berry      case 4:
857619471dc322de80942f7dbb29a437890e48155c6Paul Berry         /* The WM will be run in MSDISPMODE_PERSAMPLE with num_samples == 4.
858619471dc322de80942f7dbb29a437890e48155c6Paul Berry          * Therefore, subspan 0 will represent sample 0, subspan 1 will
859619471dc322de80942f7dbb29a437890e48155c6Paul Berry          * represent sample 1, and so on.
860619471dc322de80942f7dbb29a437890e48155c6Paul Berry          *
861619471dc322de80942f7dbb29a437890e48155c6Paul Berry          * So we need to populate S with the sequence (0, 0, 0, 0, 1, 1, 1,
862619471dc322de80942f7dbb29a437890e48155c6Paul Berry          * 1, 2, 2, 2, 2, 3, 3, 3, 3).  The easiest way to do this is to
863619471dc322de80942f7dbb29a437890e48155c6Paul Berry          * populate a temporary variable with the sequence (0, 1, 2, 3), and
864619471dc322de80942f7dbb29a437890e48155c6Paul Berry          * then copy from it using vstride=1, width=4, hstride=0.
865619471dc322de80942f7dbb29a437890e48155c6Paul Berry          */
866619471dc322de80942f7dbb29a437890e48155c6Paul Berry         brw_MOV(&func, t1, brw_imm_v(0x3210));
867619471dc322de80942f7dbb29a437890e48155c6Paul Berry         brw_MOV(&func, S, stride(t1, 1, 4, 0));
868619471dc322de80942f7dbb29a437890e48155c6Paul Berry         break;
869619471dc322de80942f7dbb29a437890e48155c6Paul Berry      case 8: {
870619471dc322de80942f7dbb29a437890e48155c6Paul Berry         /* The WM will be run in MSDISPMODE_PERSAMPLE with num_samples == 8.
871619471dc322de80942f7dbb29a437890e48155c6Paul Berry          * Therefore, subspan 0 will represent sample N (where N is 0 or 4),
872619471dc322de80942f7dbb29a437890e48155c6Paul Berry          * subspan 1 will represent sample 1, and so on.  We can find the
873619471dc322de80942f7dbb29a437890e48155c6Paul Berry          * value of N by looking at R0.0 bits 7:6 ("Starting Sample Pair
874619471dc322de80942f7dbb29a437890e48155c6Paul Berry          * Index") and multiplying by two (since samples are always delivered
875619471dc322de80942f7dbb29a437890e48155c6Paul Berry          * in pairs).  That is, we compute 2*((R0.0 & 0xc0) >> 6) == (R0.0 &
876619471dc322de80942f7dbb29a437890e48155c6Paul Berry          * 0xc0) >> 5.
877619471dc322de80942f7dbb29a437890e48155c6Paul Berry          *
878619471dc322de80942f7dbb29a437890e48155c6Paul Berry          * Then we need to add N to the sequence (0, 0, 0, 0, 1, 1, 1, 1, 2,
879619471dc322de80942f7dbb29a437890e48155c6Paul Berry          * 2, 2, 2, 3, 3, 3, 3), which we compute by populating a temporary
880619471dc322de80942f7dbb29a437890e48155c6Paul Berry          * variable with the sequence (0, 1, 2, 3), and then reading from it
881619471dc322de80942f7dbb29a437890e48155c6Paul Berry          * using vstride=1, width=4, hstride=0.
882619471dc322de80942f7dbb29a437890e48155c6Paul Berry          */
883619471dc322de80942f7dbb29a437890e48155c6Paul Berry         struct brw_reg t1_ud1 = vec1(retype(t1, BRW_REGISTER_TYPE_UD));
884619471dc322de80942f7dbb29a437890e48155c6Paul Berry         struct brw_reg r0_ud1 = vec1(retype(R0, BRW_REGISTER_TYPE_UD));
885619471dc322de80942f7dbb29a437890e48155c6Paul Berry         brw_AND(&func, t1_ud1, r0_ud1, brw_imm_ud(0xc0));
886619471dc322de80942f7dbb29a437890e48155c6Paul Berry         brw_SHR(&func, t1_ud1, t1_ud1, brw_imm_ud(5));
887619471dc322de80942f7dbb29a437890e48155c6Paul Berry         brw_MOV(&func, t2, brw_imm_v(0x3210));
888619471dc322de80942f7dbb29a437890e48155c6Paul Berry         brw_ADD(&func, S, retype(t1_ud1, BRW_REGISTER_TYPE_UW),
889619471dc322de80942f7dbb29a437890e48155c6Paul Berry                 stride(t2, 1, 4, 0));
890619471dc322de80942f7dbb29a437890e48155c6Paul Berry         break;
891619471dc322de80942f7dbb29a437890e48155c6Paul Berry      }
892619471dc322de80942f7dbb29a437890e48155c6Paul Berry      default:
893619471dc322de80942f7dbb29a437890e48155c6Paul Berry         assert(!"Unrecognized sample count in "
894619471dc322de80942f7dbb29a437890e48155c6Paul Berry                "brw_blorp_blit_program::compute_frag_coords()");
895619471dc322de80942f7dbb29a437890e48155c6Paul Berry         break;
896619471dc322de80942f7dbb29a437890e48155c6Paul Berry      }
89734a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry      s_is_zero = false;
89834a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry   } else {
89934a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry      /* Either the destination surface is single-sampled, or the WM will be
90034a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry       * run in MSDISPMODE_PERPIXEL (which causes a single fragment dispatch
90134a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry       * per pixel).  In either case, it's not meaningful to compute a sample
90234a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry       * value.  Just set it to 0.
90334a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry       */
90434a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry      s_is_zero = true;
90534a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry   }
906506d70be21cd3469118de89297cba0c0f709c1aePaul Berry}
907506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
908506d70be21cd3469118de89297cba0c0f709c1aePaul Berry/**
909506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * Emit code to compensate for the difference between Y and W tiling.
910506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *
911506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * This code modifies the X and Y coordinates according to the formula:
912506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *
9138b1f467cce34340637e9baca4847fc5273cf7541Paul Berry *   (X', Y', S') = detile(new_tiling, tile(old_tiling, X, Y, S))
914506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *
915506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * (See brw_blorp_blit_program).
916506d70be21cd3469118de89297cba0c0f709c1aePaul Berry *
917506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * It can only translate between W and Y tiling, so new_tiling and old_tiling
918506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * are booleans where true represents W tiling and false represents Y tiling.
919506d70be21cd3469118de89297cba0c0f709c1aePaul Berry */
920506d70be21cd3469118de89297cba0c0f709c1aePaul Berryvoid
921506d70be21cd3469118de89297cba0c0f709c1aePaul Berrybrw_blorp_blit_program::translate_tiling(bool old_tiled_w, bool new_tiled_w)
922506d70be21cd3469118de89297cba0c0f709c1aePaul Berry{
923506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   if (old_tiled_w == new_tiled_w)
924506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      return;
925506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
9268b1f467cce34340637e9baca4847fc5273cf7541Paul Berry   /* In the code that follows, we can safely assume that S = 0, because W
9271bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry    * tiling formats always use IMS layout.
9288b1f467cce34340637e9baca4847fc5273cf7541Paul Berry    */
9298b1f467cce34340637e9baca4847fc5273cf7541Paul Berry   assert(s_is_zero);
9308b1f467cce34340637e9baca4847fc5273cf7541Paul Berry
931506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   if (new_tiled_w) {
932506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      /* Given X and Y coordinates that describe an address using Y tiling,
933506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       * translate to the X and Y coordinates that describe the same address
934506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       * using W tiling.
935506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       *
936506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       * If we break down the low order bits of X and Y, using a
937506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       * single letter to represent each low-order bit:
938506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       *
939506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       *   X = A << 7 | 0bBCDEFGH
940506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       *   Y = J << 5 | 0bKLMNP                                       (1)
941506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       *
942506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       * Then we can apply the Y tiling formula to see the memory offset being
943506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       * addressed:
944506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       *
945506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       *   offset = (J * tile_pitch + A) << 12 | 0bBCDKLMNPEFGH       (2)
946506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       *
947506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       * If we apply the W detiling formula to this memory location, that the
948506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       * corresponding X' and Y' coordinates are:
949506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       *
950506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       *   X' = A << 6 | 0bBCDPFH                                     (3)
951506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       *   Y' = J << 6 | 0bKLMNEG
952506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       *
953506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       * Combining (1) and (3), we see that to transform (X, Y) to (X', Y'),
954506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       * we need to make the following computation:
955506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       *
956506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       *   X' = (X & ~0b1011) >> 1 | (Y & 0b1) << 2 | X & 0b1         (4)
957506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       *   Y' = (Y & ~0b1) << 1 | (X & 0b1000) >> 2 | (X & 0b10) >> 1
958506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       */
959506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_AND(&func, t1, X, brw_imm_uw(0xfff4)); /* X & ~0b1011 */
960506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_SHR(&func, t1, t1, brw_imm_uw(1)); /* (X & ~0b1011) >> 1 */
961506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_AND(&func, t2, Y, brw_imm_uw(1)); /* Y & 0b1 */
962506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_SHL(&func, t2, t2, brw_imm_uw(2)); /* (Y & 0b1) << 2 */
963506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_OR(&func, t1, t1, t2); /* (X & ~0b1011) >> 1 | (Y & 0b1) << 2 */
964506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_AND(&func, t2, X, brw_imm_uw(1)); /* X & 0b1 */
965506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_OR(&func, Xp, t1, t2);
966506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_AND(&func, t1, Y, brw_imm_uw(0xfffe)); /* Y & ~0b1 */
967506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_SHL(&func, t1, t1, brw_imm_uw(1)); /* (Y & ~0b1) << 1 */
968506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_AND(&func, t2, X, brw_imm_uw(8)); /* X & 0b1000 */
969506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_SHR(&func, t2, t2, brw_imm_uw(2)); /* (X & 0b1000) >> 2 */
970506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_OR(&func, t1, t1, t2); /* (Y & ~0b1) << 1 | (X & 0b1000) >> 2 */
971506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_AND(&func, t2, X, brw_imm_uw(2)); /* X & 0b10 */
972506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_SHR(&func, t2, t2, brw_imm_uw(1)); /* (X & 0b10) >> 1 */
973506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_OR(&func, Yp, t1, t2);
974506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      SWAP_XY_AND_XPYP();
975506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   } else {
976506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      /* Applying the same logic as above, but in reverse, we obtain the
977506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       * formulas:
978506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       *
979506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       * X' = (X & ~0b101) << 1 | (Y & 0b10) << 2 | (Y & 0b1) << 1 | X & 0b1
980506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       * Y' = (Y & ~0b11) >> 1 | (X & 0b100) >> 2
981506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       */
982506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_AND(&func, t1, X, brw_imm_uw(0xfffa)); /* X & ~0b101 */
983506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_SHL(&func, t1, t1, brw_imm_uw(1)); /* (X & ~0b101) << 1 */
984506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_AND(&func, t2, Y, brw_imm_uw(2)); /* Y & 0b10 */
985506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_SHL(&func, t2, t2, brw_imm_uw(2)); /* (Y & 0b10) << 2 */
986506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_OR(&func, t1, t1, t2); /* (X & ~0b101) << 1 | (Y & 0b10) << 2 */
987506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_AND(&func, t2, Y, brw_imm_uw(1)); /* Y & 0b1 */
988506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_SHL(&func, t2, t2, brw_imm_uw(1)); /* (Y & 0b1) << 1 */
989506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_OR(&func, t1, t1, t2); /* (X & ~0b101) << 1 | (Y & 0b10) << 2
990506d70be21cd3469118de89297cba0c0f709c1aePaul Berry                                    | (Y & 0b1) << 1 */
991506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_AND(&func, t2, X, brw_imm_uw(1)); /* X & 0b1 */
992506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_OR(&func, Xp, t1, t2);
993506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_AND(&func, t1, Y, brw_imm_uw(0xfffc)); /* Y & ~0b11 */
994506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_SHR(&func, t1, t1, brw_imm_uw(1)); /* (Y & ~0b11) >> 1 */
995506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_AND(&func, t2, X, brw_imm_uw(4)); /* X & 0b100 */
996506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_SHR(&func, t2, t2, brw_imm_uw(2)); /* (X & 0b100) >> 2 */
997506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_OR(&func, Yp, t1, t2);
998506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      SWAP_XY_AND_XPYP();
999506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   }
1000506d70be21cd3469118de89297cba0c0f709c1aePaul Berry}
1001506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
1002506d70be21cd3469118de89297cba0c0f709c1aePaul Berry/**
100319e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry * Emit code to compensate for the difference between MSAA and non-MSAA
100419e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry * surfaces.
100519e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry *
100619e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry * This code modifies the X and Y coordinates according to the formula:
100719e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry *
10087fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry *   (X', Y', S') = encode_msaa(num_samples, IMS, X, Y, S)
100919e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry *
101019e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry * (See brw_blorp_blit_program).
101119e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry */
101219e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berryvoid
10131bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berrybrw_blorp_blit_program::encode_msaa(unsigned num_samples,
10141bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry                                    intel_msaa_layout layout)
101519e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry{
10161bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry   switch (layout) {
10171bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry   case INTEL_MSAA_LAYOUT_NONE:
10188b1f467cce34340637e9baca4847fc5273cf7541Paul Berry      /* No translation necessary, and S should already be zero. */
10198b1f467cce34340637e9baca4847fc5273cf7541Paul Berry      assert(s_is_zero);
10201bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry      break;
10211bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry   case INTEL_MSAA_LAYOUT_CMS:
10221bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry      /* We can't compensate for compressed layout since at this point in the
10231bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry       * program we haven't read from the MCS buffer.
10241bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry       */
10251bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry      assert(!"Bad layout in encode_msaa");
10261bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry      break;
10271bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry   case INTEL_MSAA_LAYOUT_UMS:
102819e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry      /* No translation necessary. */
10291bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry      break;
10301bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry   case INTEL_MSAA_LAYOUT_IMS:
10317fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry      switch (num_samples) {
10327fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry      case 4:
10337fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         /* encode_msaa(4, IMS, X, Y, S) = (X', Y', 0)
10347fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry          *   where X' = (X & ~0b1) << 1 | (S & 0b1) << 1 | (X & 0b1)
10357fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry          *         Y' = (Y & ~0b1) << 1 | (S & 0b10) | (Y & 0b1)
10367fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry          */
10377fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_AND(&func, t1, X, brw_imm_uw(0xfffe)); /* X & ~0b1 */
10387fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         if (!s_is_zero) {
10397fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry            brw_AND(&func, t2, S, brw_imm_uw(1)); /* S & 0b1 */
10407fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry            brw_OR(&func, t1, t1, t2); /* (X & ~0b1) | (S & 0b1) */
10417fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         }
10427fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_SHL(&func, t1, t1, brw_imm_uw(1)); /* (X & ~0b1) << 1
10437fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry                                                   | (S & 0b1) << 1 */
10447fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_AND(&func, t2, X, brw_imm_uw(1)); /* X & 0b1 */
10457fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_OR(&func, Xp, t1, t2);
10467fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_AND(&func, t1, Y, brw_imm_uw(0xfffe)); /* Y & ~0b1 */
10477fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_SHL(&func, t1, t1, brw_imm_uw(1)); /* (Y & ~0b1) << 1 */
10487fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         if (!s_is_zero) {
10497fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry            brw_AND(&func, t2, S, brw_imm_uw(2)); /* S & 0b10 */
10507fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry            brw_OR(&func, t1, t1, t2); /* (Y & ~0b1) << 1 | (S & 0b10) */
10517fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         }
10527fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_AND(&func, t2, Y, brw_imm_uw(1)); /* Y & 0b1 */
10537fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_OR(&func, Yp, t1, t2);
10547fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         break;
10557fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry      case 8:
10567fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         /* encode_msaa(8, IMS, X, Y, S) = (X', Y', 0)
10577fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry          *   where X' = (X & ~0b1) << 2 | (S & 0b100) | (S & 0b1) << 1
10587fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry          *              | (X & 0b1)
10597fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry          *         Y' = (Y & ~0b1) << 1 | (S & 0b10) | (Y & 0b1)
10607fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry          */
10617fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_AND(&func, t1, X, brw_imm_uw(0xfffe)); /* X & ~0b1 */
10627fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_SHL(&func, t1, t1, brw_imm_uw(2)); /* (X & ~0b1) << 2 */
10637fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         if (!s_is_zero) {
10647fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry            brw_AND(&func, t2, S, brw_imm_uw(4)); /* S & 0b100 */
10657fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry            brw_OR(&func, t1, t1, t2); /* (X & ~0b1) << 2 | (S & 0b100) */
10667fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry            brw_AND(&func, t2, S, brw_imm_uw(1)); /* S & 0b1 */
10677fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry            brw_SHL(&func, t2, t2, brw_imm_uw(1)); /* (S & 0b1) << 1 */
10687fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry            brw_OR(&func, t1, t1, t2); /* (X & ~0b1) << 2 | (S & 0b100)
10697fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry                                          | (S & 0b1) << 1 */
10707fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         }
10717fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_AND(&func, t2, X, brw_imm_uw(1)); /* X & 0b1 */
10727fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_OR(&func, Xp, t1, t2);
10737fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_AND(&func, t1, Y, brw_imm_uw(0xfffe)); /* Y & ~0b1 */
10747fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_SHL(&func, t1, t1, brw_imm_uw(1)); /* (Y & ~0b1) << 1 */
10757fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         if (!s_is_zero) {
10767fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry            brw_AND(&func, t2, S, brw_imm_uw(2)); /* S & 0b10 */
10777fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry            brw_OR(&func, t1, t1, t2); /* (Y & ~0b1) << 1 | (S & 0b10) */
10787fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         }
10797fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_AND(&func, t2, Y, brw_imm_uw(1)); /* Y & 0b1 */
10807fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_OR(&func, Yp, t1, t2);
10817fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         break;
108219e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry      }
108319e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry      SWAP_XY_AND_XPYP();
10848b1f467cce34340637e9baca4847fc5273cf7541Paul Berry      s_is_zero = true;
10851bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry      break;
108619e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   }
108719e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry}
108819e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry
108919e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry/**
109019e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry * Emit code to compensate for the difference between MSAA and non-MSAA
109119e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry * surfaces.
109219e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry *
109319e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry * This code modifies the X and Y coordinates according to the formula:
109419e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry *
10957fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry *   (X', Y', S) = decode_msaa(num_samples, IMS, X, Y, S)
109619e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry *
109719e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry * (See brw_blorp_blit_program).
109819e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry */
109919e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berryvoid
11001bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berrybrw_blorp_blit_program::decode_msaa(unsigned num_samples,
11011bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry                                    intel_msaa_layout layout)
110219e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry{
11031bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry   switch (layout) {
11041bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry   case INTEL_MSAA_LAYOUT_NONE:
11058b1f467cce34340637e9baca4847fc5273cf7541Paul Berry      /* No translation necessary, and S should already be zero. */
11068b1f467cce34340637e9baca4847fc5273cf7541Paul Berry      assert(s_is_zero);
11071bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry      break;
11081bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry   case INTEL_MSAA_LAYOUT_CMS:
11091bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry      /* We can't compensate for compressed layout since at this point in the
11101bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry       * program we don't have access to the MCS buffer.
11111bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry       */
11121bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry      assert(!"Bad layout in encode_msaa");
11131bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry      break;
11141bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry   case INTEL_MSAA_LAYOUT_UMS:
111519e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry      /* No translation necessary. */
11161bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry      break;
11171bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry   case INTEL_MSAA_LAYOUT_IMS:
11188b1f467cce34340637e9baca4847fc5273cf7541Paul Berry      assert(s_is_zero);
11197fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry      switch (num_samples) {
11207fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry      case 4:
11217fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         /* decode_msaa(4, IMS, X, Y, 0) = (X', Y', S)
11227fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry          *   where X' = (X & ~0b11) >> 1 | (X & 0b1)
11237fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry          *         Y' = (Y & ~0b11) >> 1 | (Y & 0b1)
11247fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry          *         S = (Y & 0b10) | (X & 0b10) >> 1
11257fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry          */
11267fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_AND(&func, t1, X, brw_imm_uw(0xfffc)); /* X & ~0b11 */
11277fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_SHR(&func, t1, t1, brw_imm_uw(1)); /* (X & ~0b11) >> 1 */
11287fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_AND(&func, t2, X, brw_imm_uw(1)); /* X & 0b1 */
11297fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_OR(&func, Xp, t1, t2);
11307fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_AND(&func, t1, Y, brw_imm_uw(0xfffc)); /* Y & ~0b11 */
11317fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_SHR(&func, t1, t1, brw_imm_uw(1)); /* (Y & ~0b11) >> 1 */
11327fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_AND(&func, t2, Y, brw_imm_uw(1)); /* Y & 0b1 */
11337fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_OR(&func, Yp, t1, t2);
11347fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_AND(&func, t1, Y, brw_imm_uw(2)); /* Y & 0b10 */
11357fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_AND(&func, t2, X, brw_imm_uw(2)); /* X & 0b10 */
11367fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_SHR(&func, t2, t2, brw_imm_uw(1)); /* (X & 0b10) >> 1 */
11377fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_OR(&func, S, t1, t2);
11387fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         break;
11397fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry      case 8:
11407fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         /* decode_msaa(8, IMS, X, Y, 0) = (X', Y', S)
11417fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry          *   where X' = (X & ~0b111) >> 2 | (X & 0b1)
11427fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry          *         Y' = (Y & ~0b11) >> 1 | (Y & 0b1)
11437fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry          *         S = (X & 0b100) | (Y & 0b10) | (X & 0b10) >> 1
11447fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry          */
11457fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_AND(&func, t1, X, brw_imm_uw(0xfff8)); /* X & ~0b111 */
11467fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_SHR(&func, t1, t1, brw_imm_uw(2)); /* (X & ~0b111) >> 2 */
11477fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_AND(&func, t2, X, brw_imm_uw(1)); /* X & 0b1 */
11487fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_OR(&func, Xp, t1, t2);
11497fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_AND(&func, t1, Y, brw_imm_uw(0xfffc)); /* Y & ~0b11 */
11507fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_SHR(&func, t1, t1, brw_imm_uw(1)); /* (Y & ~0b11) >> 1 */
11517fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_AND(&func, t2, Y, brw_imm_uw(1)); /* Y & 0b1 */
11527fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_OR(&func, Yp, t1, t2);
11537fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_AND(&func, t1, X, brw_imm_uw(4)); /* X & 0b100 */
11547fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_AND(&func, t2, Y, brw_imm_uw(2)); /* Y & 0b10 */
11557fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_OR(&func, t1, t1, t2); /* (X & 0b100) | (Y & 0b10) */
11567fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_AND(&func, t2, X, brw_imm_uw(2)); /* X & 0b10 */
11577fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_SHR(&func, t2, t2, brw_imm_uw(1)); /* (X & 0b10) >> 1 */
11587fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         brw_OR(&func, S, t1, t2);
11597fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry         break;
11607fae97c98bfa13bff0e9da857e86eefdb625584cPaul Berry      }
116119e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry      s_is_zero = false;
116219e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry      SWAP_XY_AND_XPYP();
11631bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry      break;
116419e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   }
116519e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry}
116619e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry
116719e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry/**
1168506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * Emit code that kills pixels whose X and Y coordinates are outside the
1169506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * boundary of the rectangle defined by the push constants (dst_x0, dst_y0,
1170506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * dst_x1, dst_y1).
1171506d70be21cd3469118de89297cba0c0f709c1aePaul Berry */
1172506d70be21cd3469118de89297cba0c0f709c1aePaul Berryvoid
1173506d70be21cd3469118de89297cba0c0f709c1aePaul Berrybrw_blorp_blit_program::kill_if_outside_dst_rect()
1174506d70be21cd3469118de89297cba0c0f709c1aePaul Berry{
1175506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   struct brw_reg f0 = brw_flag_reg();
1176506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
1177506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   struct brw_reg null16 = vec16(retype(brw_null_reg(), BRW_REGISTER_TYPE_UW));
1178506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
1179506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   brw_CMP(&func, null16, BRW_CONDITIONAL_GE, X, dst_x0);
1180506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   brw_CMP(&func, null16, BRW_CONDITIONAL_GE, Y, dst_y0);
1181506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   brw_CMP(&func, null16, BRW_CONDITIONAL_L, X, dst_x1);
1182506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   brw_CMP(&func, null16, BRW_CONDITIONAL_L, Y, dst_y1);
1183506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
1184506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   brw_set_predicate_control(&func, BRW_PREDICATE_NONE);
1185506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   brw_push_insn_state(&func);
1186506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   brw_set_mask_control(&func, BRW_MASK_DISABLE);
1187506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   brw_AND(&func, g1, f0, g1);
1188506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   brw_pop_insn_state(&func);
1189506d70be21cd3469118de89297cba0c0f709c1aePaul Berry}
1190506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
1191506d70be21cd3469118de89297cba0c0f709c1aePaul Berry/**
1192506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * Emit code to translate from destination (X, Y) coordinates to source (X, Y)
1193506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * coordinates.
1194506d70be21cd3469118de89297cba0c0f709c1aePaul Berry */
1195506d70be21cd3469118de89297cba0c0f709c1aePaul Berryvoid
1196506d70be21cd3469118de89297cba0c0f709c1aePaul Berrybrw_blorp_blit_program::translate_dst_to_src()
1197506d70be21cd3469118de89297cba0c0f709c1aePaul Berry{
1198506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   brw_MUL(&func, Xp, X, x_transform.multiplier);
1199506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   brw_MUL(&func, Yp, Y, y_transform.multiplier);
1200506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   brw_ADD(&func, Xp, Xp, x_transform.offset);
1201506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   brw_ADD(&func, Yp, Yp, y_transform.offset);
1202506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   SWAP_XY_AND_XPYP();
1203506d70be21cd3469118de89297cba0c0f709c1aePaul Berry}
1204506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
1205506d70be21cd3469118de89297cba0c0f709c1aePaul Berry/**
120619e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry * Emit code to transform the X and Y coordinates as needed for blending
120719e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry * together the different samples in an MSAA texture.
120819e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry */
120919e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berryvoid
121019e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berrybrw_blorp_blit_program::single_to_blend()
121119e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry{
121219e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   /* When looking up samples in an MSAA texture using the SAMPLE message,
121319e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry    * Gen6 requires the texture coordinates to be odd integers (so that they
121419e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry    * correspond to the center of a 2x2 block representing the four samples
121519e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry    * that maxe up a pixel).  So we need to multiply our X and Y coordinates
121619e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry    * each by 2 and then add 1.
121719e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry    */
121819e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   brw_SHL(&func, t1, X, brw_imm_w(1));
121919e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   brw_SHL(&func, t2, Y, brw_imm_w(1));
122019e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   brw_ADD(&func, Xp, t1, brw_imm_w(1));
122119e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   brw_ADD(&func, Yp, t2, brw_imm_w(1));
122219e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   SWAP_XY_AND_XPYP();
122319e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry}
122419e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry
1225b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry
1226b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry/**
1227b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry * Count the number of trailing 1 bits in the given value.  For example:
1228b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry *
1229b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry * count_trailing_one_bits(0) == 0
1230b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry * count_trailing_one_bits(7) == 3
1231b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry * count_trailing_one_bits(11) == 2
1232b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry */
1233b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berryinline int count_trailing_one_bits(unsigned value)
1234b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry{
1235b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry#if defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) /* gcc 3.4 or later */
1236b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry   return __builtin_ctz(~value);
1237b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry#else
1238b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry   return _mesa_bitcount(value & ~(value + 1));
1239b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry#endif
1240b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry}
1241b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry
1242b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry
12434725ba03cae87ddbf1fa10feaca3d42f24115f91Paul Berryvoid
124417eae9762cdd6cfa69a060001e26113dfc0d7c86Paul Berrybrw_blorp_blit_program::manual_blend(unsigned num_samples)
12454725ba03cae87ddbf1fa10feaca3d42f24115f91Paul Berry{
12464ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berry   if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS)
12474ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berry      mcs_fetch();
12484ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berry
1249b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry   /* We add together samples using a binary tree structure, e.g. for 4x MSAA:
1250b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry    *
1251b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry    *   result = ((sample[0] + sample[1]) + (sample[2] + sample[3])) / 4
1252b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry    *
1253b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry    * This ensures that when all samples have the same value, no numerical
1254b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry    * precision is lost, since each addition operation always adds two equal
1255b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry    * values, and summing two equal floating point values does not lose
1256b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry    * precision.
1257b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry    *
1258b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry    * We perform this computation by treating the texture_data array as a
1259b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry    * stack and performing the following operations:
1260b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry    *
1261b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry    * - push sample 0 onto stack
1262b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry    * - push sample 1 onto stack
1263b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry    * - add top two stack entries
1264b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry    * - push sample 2 onto stack
1265b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry    * - push sample 3 onto stack
1266b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry    * - add top two stack entries
1267b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry    * - add top two stack entries
1268b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry    * - divide top stack entry by 4
1269b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry    *
1270b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry    * Note that after pushing sample i onto the stack, the number of add
1271b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry    * operations we do is equal to the number of trailing 1 bits in i.  This
1272b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry    * works provided the total number of samples is a power of two, which it
1273b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry    * always is for i965.
1274e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry    *
1275e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry    * For integer formats, we replace the add operations with average
1276e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry    * operations and skip the final division.
1277b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry    */
1278e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry   typedef struct brw_instruction *(*brw_op2_ptr)(struct brw_compile *,
1279e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry                                                  struct brw_reg,
1280e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry                                                  struct brw_reg,
1281e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry                                                  struct brw_reg);
1282e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry   brw_op2_ptr combine_op =
1283e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry      key->texture_data_type == BRW_REGISTER_TYPE_F ? brw_ADD : brw_AVG;
1284b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry   unsigned stack_depth = 0;
128517eae9762cdd6cfa69a060001e26113dfc0d7c86Paul Berry   for (unsigned i = 0; i < num_samples; ++i) {
1286b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry      assert(stack_depth == _mesa_bitcount(i)); /* Loop invariant */
1287b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry
1288b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry      /* Push sample i onto the stack */
1289b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry      assert(stack_depth < ARRAY_SIZE(texture_data));
1290b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry      if (i == 0) {
1291b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry         s_is_zero = true;
1292b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry      } else {
1293b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry         s_is_zero = false;
1294b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry         brw_MOV(&func, S, brw_imm_uw(i));
1295b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry      }
1296b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry      texel_fetch(texture_data[stack_depth++]);
1297b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry
1298f91b4d92b97664e6354f66138705e93bec363ba0Paul Berry      if (i == 0 && key->tex_layout == INTEL_MSAA_LAYOUT_CMS) {
1299f91b4d92b97664e6354f66138705e93bec363ba0Paul Berry         /* The Ivy Bridge PRM, Vol4 Part1 p27 (Multisample Control Surface)
1300f91b4d92b97664e6354f66138705e93bec363ba0Paul Berry          * suggests an optimization:
1301f91b4d92b97664e6354f66138705e93bec363ba0Paul Berry          *
1302f91b4d92b97664e6354f66138705e93bec363ba0Paul Berry          *     "A simple optimization with probable large return in
1303f91b4d92b97664e6354f66138705e93bec363ba0Paul Berry          *     performance is to compare the MCS value to zero (indicating
1304f91b4d92b97664e6354f66138705e93bec363ba0Paul Berry          *     all samples are on sample slice 0), and sample only from
1305f91b4d92b97664e6354f66138705e93bec363ba0Paul Berry          *     sample slice 0 using ld2dss if MCS is zero."
1306f91b4d92b97664e6354f66138705e93bec363ba0Paul Berry          *
1307f91b4d92b97664e6354f66138705e93bec363ba0Paul Berry          * Note that in the case where the MCS value is zero, sampling from
1308f91b4d92b97664e6354f66138705e93bec363ba0Paul Berry          * sample slice 0 using ld2dss and sampling from sample 0 using
1309f91b4d92b97664e6354f66138705e93bec363ba0Paul Berry          * ld2dms are equivalent (since all samples are on sample slice 0).
1310f91b4d92b97664e6354f66138705e93bec363ba0Paul Berry          * Since we have already sampled from sample 0, all we need to do is
1311f91b4d92b97664e6354f66138705e93bec363ba0Paul Berry          * skip the remaining fetches and averaging if MCS is zero.
1312f91b4d92b97664e6354f66138705e93bec363ba0Paul Berry          */
1313f91b4d92b97664e6354f66138705e93bec363ba0Paul Berry         brw_CMP(&func, vec16(brw_null_reg()), BRW_CONDITIONAL_NZ,
1314f91b4d92b97664e6354f66138705e93bec363ba0Paul Berry                 mcs_data, brw_imm_ud(0));
1315f91b4d92b97664e6354f66138705e93bec363ba0Paul Berry         brw_IF(&func, BRW_EXECUTE_16);
1316f91b4d92b97664e6354f66138705e93bec363ba0Paul Berry      }
1317f91b4d92b97664e6354f66138705e93bec363ba0Paul Berry
1318b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry      /* Do count_trailing_one_bits(i) times */
1319b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry      for (int j = count_trailing_one_bits(i); j-- > 0; ) {
1320b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry         assert(stack_depth >= 2);
1321b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry         --stack_depth;
1322b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry
1323b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry         /* TODO: should use a smaller loop bound for non_RGBA formats */
1324b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry         for (int k = 0; k < 4; ++k) {
1325e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry            combine_op(&func, offset(texture_data[stack_depth - 1], 2*k),
1326e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry                       offset(vec8(texture_data[stack_depth - 1]), 2*k),
1327e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry                       offset(vec8(texture_data[stack_depth]), 2*k));
1328b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry         }
13294725ba03cae87ddbf1fa10feaca3d42f24115f91Paul Berry      }
13304725ba03cae87ddbf1fa10feaca3d42f24115f91Paul Berry   }
13314725ba03cae87ddbf1fa10feaca3d42f24115f91Paul Berry
1332b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry   /* We should have just 1 sample on the stack now. */
1333b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry   assert(stack_depth == 1);
1334b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry
1335e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry   if (key->texture_data_type == BRW_REGISTER_TYPE_F) {
1336e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry      /* Scale the result down by a factor of num_samples */
1337e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry      /* TODO: should use a smaller loop bound for non-RGBA formats */
1338e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry      for (int j = 0; j < 4; ++j) {
1339e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry         brw_MUL(&func, offset(texture_data[0], 2*j),
1340e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry                 offset(vec8(texture_data[0]), 2*j),
1341e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry                 brw_imm_f(1.0/num_samples));
1342e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry      }
13434725ba03cae87ddbf1fa10feaca3d42f24115f91Paul Berry   }
1344f91b4d92b97664e6354f66138705e93bec363ba0Paul Berry
1345f91b4d92b97664e6354f66138705e93bec363ba0Paul Berry   if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS)
1346f91b4d92b97664e6354f66138705e93bec363ba0Paul Berry      brw_ENDIF(&func);
13474725ba03cae87ddbf1fa10feaca3d42f24115f91Paul Berry}
13484725ba03cae87ddbf1fa10feaca3d42f24115f91Paul Berry
134919e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry/**
135019e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry * Emit code to look up a value in the texture using the SAMPLE message (which
135119e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry * does blending of MSAA surfaces).
135219e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry */
135319e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berryvoid
13544725ba03cae87ddbf1fa10feaca3d42f24115f91Paul Berrybrw_blorp_blit_program::sample(struct brw_reg dst)
135519e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry{
1356665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry   static const sampler_message_arg args[2] = {
1357665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry      SAMPLER_MESSAGE_ARG_U_FLOAT,
1358665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry      SAMPLER_MESSAGE_ARG_V_FLOAT
1359665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry   };
1360665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry
13614725ba03cae87ddbf1fa10feaca3d42f24115f91Paul Berry   texture_lookup(dst, GEN5_SAMPLER_MESSAGE_SAMPLE, args, ARRAY_SIZE(args));
136219e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry}
136319e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry
136419e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry/**
1365506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * Emit code to look up a value in the texture using the SAMPLE_LD message
1366506d70be21cd3469118de89297cba0c0f709c1aePaul Berry * (which does a simple texel fetch).
1367506d70be21cd3469118de89297cba0c0f709c1aePaul Berry */
1368506d70be21cd3469118de89297cba0c0f709c1aePaul Berryvoid
13694725ba03cae87ddbf1fa10feaca3d42f24115f91Paul Berrybrw_blorp_blit_program::texel_fetch(struct brw_reg dst)
1370506d70be21cd3469118de89297cba0c0f709c1aePaul Berry{
13711c73c705fadf164d61003415e3380f2d06f2e7b3Paul Berry   static const sampler_message_arg gen6_args[5] = {
1372665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry      SAMPLER_MESSAGE_ARG_U_INT,
1373233c207e9e477b6b0a5c6705e727129b92989073Paul Berry      SAMPLER_MESSAGE_ARG_V_INT,
1374233c207e9e477b6b0a5c6705e727129b92989073Paul Berry      SAMPLER_MESSAGE_ARG_ZERO_INT, /* R */
1375233c207e9e477b6b0a5c6705e727129b92989073Paul Berry      SAMPLER_MESSAGE_ARG_ZERO_INT, /* LOD */
1376233c207e9e477b6b0a5c6705e727129b92989073Paul Berry      SAMPLER_MESSAGE_ARG_SI_INT
1377665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry   };
13781c73c705fadf164d61003415e3380f2d06f2e7b3Paul Berry   static const sampler_message_arg gen7_ld_args[3] = {
13791c73c705fadf164d61003415e3380f2d06f2e7b3Paul Berry      SAMPLER_MESSAGE_ARG_U_INT,
13801c73c705fadf164d61003415e3380f2d06f2e7b3Paul Berry      SAMPLER_MESSAGE_ARG_ZERO_INT, /* LOD */
13811c73c705fadf164d61003415e3380f2d06f2e7b3Paul Berry      SAMPLER_MESSAGE_ARG_V_INT
13821c73c705fadf164d61003415e3380f2d06f2e7b3Paul Berry   };
13831c73c705fadf164d61003415e3380f2d06f2e7b3Paul Berry   static const sampler_message_arg gen7_ld2dss_args[3] = {
13841c73c705fadf164d61003415e3380f2d06f2e7b3Paul Berry      SAMPLER_MESSAGE_ARG_SI_INT,
13851c73c705fadf164d61003415e3380f2d06f2e7b3Paul Berry      SAMPLER_MESSAGE_ARG_U_INT,
13861c73c705fadf164d61003415e3380f2d06f2e7b3Paul Berry      SAMPLER_MESSAGE_ARG_V_INT
13871c73c705fadf164d61003415e3380f2d06f2e7b3Paul Berry   };
13884ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berry   static const sampler_message_arg gen7_ld2dms_args[4] = {
13894ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berry      SAMPLER_MESSAGE_ARG_SI_INT,
13904ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berry      SAMPLER_MESSAGE_ARG_MCS_INT,
13914ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berry      SAMPLER_MESSAGE_ARG_U_INT,
13924ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berry      SAMPLER_MESSAGE_ARG_V_INT
13934ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berry   };
1394665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry
13951c73c705fadf164d61003415e3380f2d06f2e7b3Paul Berry   switch (brw->intel.gen) {
13961c73c705fadf164d61003415e3380f2d06f2e7b3Paul Berry   case 6:
13974725ba03cae87ddbf1fa10feaca3d42f24115f91Paul Berry      texture_lookup(dst, GEN5_SAMPLER_MESSAGE_SAMPLE_LD, gen6_args,
13981c73c705fadf164d61003415e3380f2d06f2e7b3Paul Berry                     s_is_zero ? 2 : 5);
13991c73c705fadf164d61003415e3380f2d06f2e7b3Paul Berry      break;
14001c73c705fadf164d61003415e3380f2d06f2e7b3Paul Berry   case 7:
140160c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry      switch (key->tex_layout) {
140260c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry      case INTEL_MSAA_LAYOUT_IMS:
140360c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry         /* From the Ivy Bridge PRM, Vol4 Part1 p72 (Multisampled Surface Storage
140460c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry          * Format):
140560c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry          *
140660c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry          *     If this field is MSFMT_DEPTH_STENCIL
140760c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry          *     [a.k.a. INTEL_MSAA_LAYOUT_IMS], the only sampling engine
140860c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry          *     messages allowed are "ld2dms", "resinfo", and "sampleinfo".
140960c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry          *
141060c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry          * So fall through to emit the same message as we use for
141160c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry          * INTEL_MSAA_LAYOUT_CMS.
141260c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry          */
141360c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry      case INTEL_MSAA_LAYOUT_CMS:
141460c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry         texture_lookup(dst, GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS,
141560c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry                        gen7_ld2dms_args, ARRAY_SIZE(gen7_ld2dms_args));
141660c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry         break;
141760c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry      case INTEL_MSAA_LAYOUT_UMS:
141860c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry         texture_lookup(dst, GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS,
141960c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry                        gen7_ld2dss_args, ARRAY_SIZE(gen7_ld2dss_args));
142060c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry         break;
142160c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry      case INTEL_MSAA_LAYOUT_NONE:
14221c73c705fadf164d61003415e3380f2d06f2e7b3Paul Berry         assert(s_is_zero);
14234725ba03cae87ddbf1fa10feaca3d42f24115f91Paul Berry         texture_lookup(dst, GEN5_SAMPLER_MESSAGE_SAMPLE_LD, gen7_ld_args,
14241c73c705fadf164d61003415e3380f2d06f2e7b3Paul Berry                        ARRAY_SIZE(gen7_ld_args));
142560c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry         break;
14261c73c705fadf164d61003415e3380f2d06f2e7b3Paul Berry      }
14271c73c705fadf164d61003415e3380f2d06f2e7b3Paul Berry      break;
14281c73c705fadf164d61003415e3380f2d06f2e7b3Paul Berry   default:
14291c73c705fadf164d61003415e3380f2d06f2e7b3Paul Berry      assert(!"Should not get here.");
14301c73c705fadf164d61003415e3380f2d06f2e7b3Paul Berry      break;
14311c73c705fadf164d61003415e3380f2d06f2e7b3Paul Berry   };
1432506d70be21cd3469118de89297cba0c0f709c1aePaul Berry}
1433506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
1434506d70be21cd3469118de89297cba0c0f709c1aePaul Berryvoid
14354ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berrybrw_blorp_blit_program::mcs_fetch()
14364ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berry{
14374ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berry   static const sampler_message_arg gen7_ld_mcs_args[2] = {
14384ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berry      SAMPLER_MESSAGE_ARG_U_INT,
14394ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berry      SAMPLER_MESSAGE_ARG_V_INT
14404ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berry   };
14414ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berry   texture_lookup(vec16(mcs_data), GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS,
14424ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berry                  gen7_ld_mcs_args, ARRAY_SIZE(gen7_ld_mcs_args));
14434ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berry}
14444ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berry
14454ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berryvoid
1446665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berrybrw_blorp_blit_program::expand_to_32_bits(struct brw_reg src,
1447665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry                                          struct brw_reg dst)
1448506d70be21cd3469118de89297cba0c0f709c1aePaul Berry{
1449665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry   brw_MOV(&func, vec8(dst), vec8(src));
1450506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   brw_set_compression_control(&func, BRW_COMPRESSION_2NDHALF);
1451665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry   brw_MOV(&func, offset(vec8(dst), 1), suboffset(vec8(src), 8));
1452506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
1453665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry}
1454665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry
1455665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berryvoid
14564725ba03cae87ddbf1fa10feaca3d42f24115f91Paul Berrybrw_blorp_blit_program::texture_lookup(struct brw_reg dst,
14574725ba03cae87ddbf1fa10feaca3d42f24115f91Paul Berry                                       GLuint msg_type,
1458665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry                                       const sampler_message_arg *args,
1459665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry                                       int num_args)
1460665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry{
1461665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry   struct brw_reg mrf =
1462665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry      retype(vec16(brw_message_reg(base_mrf)), BRW_REGISTER_TYPE_UD);
1463665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry   for (int arg = 0; arg < num_args; ++arg) {
1464665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry      switch (args[arg]) {
1465665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry      case SAMPLER_MESSAGE_ARG_U_FLOAT:
1466665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry         expand_to_32_bits(X, retype(mrf, BRW_REGISTER_TYPE_F));
1467665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry         break;
1468665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry      case SAMPLER_MESSAGE_ARG_V_FLOAT:
1469665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry         expand_to_32_bits(Y, retype(mrf, BRW_REGISTER_TYPE_F));
1470665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry         break;
1471665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry      case SAMPLER_MESSAGE_ARG_U_INT:
1472665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry         expand_to_32_bits(X, mrf);
1473665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry         break;
1474665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry      case SAMPLER_MESSAGE_ARG_V_INT:
1475665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry         expand_to_32_bits(Y, mrf);
1476665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry         break;
1477233c207e9e477b6b0a5c6705e727129b92989073Paul Berry      case SAMPLER_MESSAGE_ARG_SI_INT:
1478233c207e9e477b6b0a5c6705e727129b92989073Paul Berry         /* Note: on Gen7, this code may be reached with s_is_zero==true
1479233c207e9e477b6b0a5c6705e727129b92989073Paul Berry          * because in Gen7's ld2dss message, the sample index is the first
1480233c207e9e477b6b0a5c6705e727129b92989073Paul Berry          * argument.  When this happens, we need to move a 0 into the
1481233c207e9e477b6b0a5c6705e727129b92989073Paul Berry          * appropriate message register.
1482233c207e9e477b6b0a5c6705e727129b92989073Paul Berry          */
1483233c207e9e477b6b0a5c6705e727129b92989073Paul Berry         if (s_is_zero)
1484233c207e9e477b6b0a5c6705e727129b92989073Paul Berry            brw_MOV(&func, mrf, brw_imm_ud(0));
1485233c207e9e477b6b0a5c6705e727129b92989073Paul Berry         else
1486233c207e9e477b6b0a5c6705e727129b92989073Paul Berry            expand_to_32_bits(S, mrf);
1487233c207e9e477b6b0a5c6705e727129b92989073Paul Berry         break;
14884ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berry      case SAMPLER_MESSAGE_ARG_MCS_INT:
148960c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry         switch (key->tex_layout) {
149060c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry         case INTEL_MSAA_LAYOUT_CMS:
149160c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry            brw_MOV(&func, mrf, mcs_data);
149260c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry            break;
149360c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry         case INTEL_MSAA_LAYOUT_IMS:
149460c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry            /* When sampling from an IMS surface, MCS data is not relevant,
149560c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry             * and the hardware ignores it.  So don't bother populating it.
149660c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry             */
149760c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry            break;
149860c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry         default:
149960c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry            /* We shouldn't be trying to send MCS data with any other
150060c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry             * layouts.
150160c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry             */
150260c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry            assert (!"Unsupported layout for MCS data");
150360c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry            break;
150460c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry         }
15054ebbc766210190cb1f03fa4fc762bf7ecc0c7f90Paul Berry         break;
1506233c207e9e477b6b0a5c6705e727129b92989073Paul Berry      case SAMPLER_MESSAGE_ARG_ZERO_INT:
1507233c207e9e477b6b0a5c6705e727129b92989073Paul Berry         brw_MOV(&func, mrf, brw_imm_ud(0));
1508233c207e9e477b6b0a5c6705e727129b92989073Paul Berry         break;
1509665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry      }
1510665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry      mrf.nr += 2;
1511665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry   }
1512506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
1513506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   brw_SAMPLE(&func,
15144725ba03cae87ddbf1fa10feaca3d42f24115f91Paul Berry              retype(dst, BRW_REGISTER_TYPE_UW) /* dest */,
1515506d70be21cd3469118de89297cba0c0f709c1aePaul Berry              base_mrf /* msg_reg_nr */,
1516665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry              brw_message_reg(base_mrf) /* src0 */,
1517506d70be21cd3469118de89297cba0c0f709c1aePaul Berry              BRW_BLORP_TEXTURE_BINDING_TABLE_INDEX,
1518665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry              0 /* sampler */,
1519506d70be21cd3469118de89297cba0c0f709c1aePaul Berry              WRITEMASK_XYZW,
1520506d70be21cd3469118de89297cba0c0f709c1aePaul Berry              msg_type,
1521506d70be21cd3469118de89297cba0c0f709c1aePaul Berry              8 /* response_length.  TODO: should be smaller for non-RGBA formats? */,
1522665dc82bdc0e83854dd0f700ec264021bfb5cb39Paul Berry              mrf.nr - base_mrf /* msg_length */,
1523506d70be21cd3469118de89297cba0c0f709c1aePaul Berry              0 /* header_present */,
1524506d70be21cd3469118de89297cba0c0f709c1aePaul Berry              BRW_SAMPLER_SIMD_MODE_SIMD16,
1525506d70be21cd3469118de89297cba0c0f709c1aePaul Berry              BRW_SAMPLER_RETURN_FORMAT_FLOAT32);
1526506d70be21cd3469118de89297cba0c0f709c1aePaul Berry}
1527506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
1528506d70be21cd3469118de89297cba0c0f709c1aePaul Berry#undef X
1529506d70be21cd3469118de89297cba0c0f709c1aePaul Berry#undef Y
1530506d70be21cd3469118de89297cba0c0f709c1aePaul Berry#undef U
1531506d70be21cd3469118de89297cba0c0f709c1aePaul Berry#undef V
1532506d70be21cd3469118de89297cba0c0f709c1aePaul Berry#undef S
1533506d70be21cd3469118de89297cba0c0f709c1aePaul Berry#undef SWAP_XY_AND_XPYP
1534506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
1535506d70be21cd3469118de89297cba0c0f709c1aePaul Berryvoid
1536506d70be21cd3469118de89297cba0c0f709c1aePaul Berrybrw_blorp_blit_program::render_target_write()
1537506d70be21cd3469118de89297cba0c0f709c1aePaul Berry{
1538e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry   struct brw_reg mrf_rt_write =
1539e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry      retype(vec16(brw_message_reg(base_mrf)), key->texture_data_type);
1540506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   int mrf_offset = 0;
1541506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
1542506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   /* If we may have killed pixels, then we need to send R0 and R1 in a header
1543506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    * so that the render target knows which pixels we killed.
1544506d70be21cd3469118de89297cba0c0f709c1aePaul Berry    */
1545506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   bool use_header = key->use_kill;
1546506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   if (use_header) {
1547506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      /* Copy R0/1 to MRF */
1548506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_MOV(&func, retype(mrf_rt_write, BRW_REGISTER_TYPE_UD),
1549506d70be21cd3469118de89297cba0c0f709c1aePaul Berry              retype(R0, BRW_REGISTER_TYPE_UD));
1550506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      mrf_offset += 2;
1551506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   }
1552506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
1553506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   /* Copy texture data to MRFs */
1554506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   for (int i = 0; i < 4; ++i) {
1555506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      /* E.g. mov(16) m2.0<1>:f r2.0<8;8,1>:f { Align1, H1 } */
15564725ba03cae87ddbf1fa10feaca3d42f24115f91Paul Berry      brw_MOV(&func, offset(mrf_rt_write, mrf_offset),
1557b961d37e613b8b14927c42e09d16d09d70ebcb77Paul Berry              offset(vec8(texture_data[0]), 2*i));
1558506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      mrf_offset += 2;
1559506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   }
1560506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
1561506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   /* Now write to the render target and terminate the thread */
1562506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   brw_fb_WRITE(&func,
1563506d70be21cd3469118de89297cba0c0f709c1aePaul Berry                16 /* dispatch_width */,
1564506d70be21cd3469118de89297cba0c0f709c1aePaul Berry                base_mrf /* msg_reg_nr */,
1565506d70be21cd3469118de89297cba0c0f709c1aePaul Berry                mrf_rt_write /* src0 */,
156629362875f2613ad87abe7725ce3c56c36d16cf9bEric Anholt                BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE,
1567506d70be21cd3469118de89297cba0c0f709c1aePaul Berry                BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX,
1568506d70be21cd3469118de89297cba0c0f709c1aePaul Berry                mrf_offset /* msg_length.  TODO: Should be smaller for non-RGBA formats. */,
1569506d70be21cd3469118de89297cba0c0f709c1aePaul Berry                0 /* response_length */,
1570506d70be21cd3469118de89297cba0c0f709c1aePaul Berry                true /* eot */,
1571506d70be21cd3469118de89297cba0c0f709c1aePaul Berry                use_header);
1572506d70be21cd3469118de89297cba0c0f709c1aePaul Berry}
1573506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
1574506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
1575506d70be21cd3469118de89297cba0c0f709c1aePaul Berryvoid
1576506d70be21cd3469118de89297cba0c0f709c1aePaul Berrybrw_blorp_coord_transform_params::setup(GLuint src0, GLuint dst0, GLuint dst1,
1577506d70be21cd3469118de89297cba0c0f709c1aePaul Berry                                        bool mirror)
1578506d70be21cd3469118de89297cba0c0f709c1aePaul Berry{
1579506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   if (!mirror) {
1580506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      /* When not mirroring a coordinate (say, X), we need:
1581506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       *   x' - src_x0 = x - dst_x0
1582506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       * Therefore:
1583506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       *   x' = 1*x + (src_x0 - dst_x0)
1584506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       */
1585506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      multiplier = 1;
1586506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      offset = src0 - dst0;
1587506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   } else {
1588506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      /* When mirroring X we need:
1589506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       *   x' - src_x0 = dst_x1 - x - 1
1590506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       * Therefore:
1591506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       *   x' = -1*x + (src_x0 + dst_x1 - 1)
1592506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       */
1593506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      multiplier = -1;
1594506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      offset = src0 + dst1 - 1;
1595506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   }
1596506d70be21cd3469118de89297cba0c0f709c1aePaul Berry}
1597506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
1598506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
15991bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry/**
16001bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry * Determine which MSAA layout the GPU pipeline should be configured for,
16011bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry * based on the chip generation, the number of samples, and the true layout of
16021bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry * the image in memory.
16031bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry */
16041bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berryinline intel_msaa_layout
16051bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berrycompute_msaa_layout_for_pipeline(struct brw_context *brw, unsigned num_samples,
16061bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry                                 intel_msaa_layout true_layout)
16071bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry{
160897fc89c6cbaa3b5ef7f678d2dc2c7d5bbba05315Paul Berry   if (num_samples <= 1) {
16091bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry      /* When configuring the GPU for non-MSAA, we can still accommodate IMS
16101bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry       * format buffers, by transforming coordinates appropriately.
16111bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry       */
16121bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry      assert(true_layout == INTEL_MSAA_LAYOUT_NONE ||
16131bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry             true_layout == INTEL_MSAA_LAYOUT_IMS);
16141bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry      return INTEL_MSAA_LAYOUT_NONE;
16150dd5e98aa5c146ef21ab44b34fb7714206d5ec08Paul Berry   } else {
16160dd5e98aa5c146ef21ab44b34fb7714206d5ec08Paul Berry      assert(true_layout != INTEL_MSAA_LAYOUT_NONE);
16171bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry   }
16181bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry
16191bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry   /* Prior to Gen7, all MSAA surfaces use IMS layout. */
16201bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry   if (brw->intel.gen == 6) {
16211bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry      assert(true_layout == INTEL_MSAA_LAYOUT_IMS);
16221bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry   }
16231bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry
16241bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry   return true_layout;
16251bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry}
16261bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry
16271bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry
16288b1f467cce34340637e9baca4847fc5273cf7541Paul Berrybrw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw,
16298b1f467cce34340637e9baca4847fc5273cf7541Paul Berry                                             struct intel_mipmap_tree *src_mt,
1630e87174cf4b499c8e9558438e70b0da5f0f38f54aPaul Berry                                             unsigned src_level, unsigned src_layer,
1631506d70be21cd3469118de89297cba0c0f709c1aePaul Berry                                             struct intel_mipmap_tree *dst_mt,
1632e87174cf4b499c8e9558438e70b0da5f0f38f54aPaul Berry                                             unsigned dst_level, unsigned dst_layer,
1633506d70be21cd3469118de89297cba0c0f709c1aePaul Berry                                             GLuint src_x0, GLuint src_y0,
1634506d70be21cd3469118de89297cba0c0f709c1aePaul Berry                                             GLuint dst_x0, GLuint dst_y0,
1635506d70be21cd3469118de89297cba0c0f709c1aePaul Berry                                             GLuint dst_x1, GLuint dst_y1,
1636506d70be21cd3469118de89297cba0c0f709c1aePaul Berry                                             bool mirror_x, bool mirror_y)
1637506d70be21cd3469118de89297cba0c0f709c1aePaul Berry{
1638e87174cf4b499c8e9558438e70b0da5f0f38f54aPaul Berry   src.set(brw, src_mt, src_level, src_layer);
1639e87174cf4b499c8e9558438e70b0da5f0f38f54aPaul Berry   dst.set(brw, dst_mt, dst_level, dst_layer);
1640506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
1641506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   use_wm_prog = true;
1642506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   memset(&wm_prog_key, 0, sizeof(wm_prog_key));
1643506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
1644e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry   /* texture_data_type indicates the register type that should be used to
1645e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry    * manipulate texture data.
1646e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry    */
1647e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry   switch (_mesa_get_format_datatype(src_mt->format)) {
1648e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry   case GL_UNSIGNED_NORMALIZED:
1649e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry   case GL_SIGNED_NORMALIZED:
1650e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry   case GL_FLOAT:
1651e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry      wm_prog_key.texture_data_type = BRW_REGISTER_TYPE_F;
1652e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry      break;
1653e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry   case GL_UNSIGNED_INT:
1654e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry      if (src_mt->format == MESA_FORMAT_S8) {
1655e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry         /* We process stencil as though it's an unsigned normalized color */
1656e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry         wm_prog_key.texture_data_type = BRW_REGISTER_TYPE_F;
1657e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry      } else {
1658e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry         wm_prog_key.texture_data_type = BRW_REGISTER_TYPE_UD;
1659e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry      }
1660e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry      break;
1661e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry   case GL_INT:
1662e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry      wm_prog_key.texture_data_type = BRW_REGISTER_TYPE_D;
1663e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry      break;
1664e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry   default:
1665e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry      assert(!"Unrecognized blorp format");
1666e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry      break;
1667e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry   }
1668e5d983267a98bf9f73f0ea981eaca339b975a8dbPaul Berry
16698b1f467cce34340637e9baca4847fc5273cf7541Paul Berry   if (brw->intel.gen > 6) {
167060c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry      /* Gen7's rendering hardware only supports the IMS layout for depth and
167160c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry       * stencil render targets.  Blorp always maps its destination surface as
167260c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry       * a color render target (even if it's actually a depth or stencil
167360c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry       * buffer).  So if the destination is IMS, we'll have to map it as a
167460c3e69dbf297426c42e4b8f94c5f0493bd9be5fPaul Berry       * single-sampled texture and interleave the samples ourselves.
16758b1f467cce34340637e9baca4847fc5273cf7541Paul Berry       */
16761bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry      if (dst_mt->msaa_layout == INTEL_MSAA_LAYOUT_IMS)
16778b1f467cce34340637e9baca4847fc5273cf7541Paul Berry         dst.num_samples = 0;
16788b1f467cce34340637e9baca4847fc5273cf7541Paul Berry   }
16798b1f467cce34340637e9baca4847fc5273cf7541Paul Berry
168097fc89c6cbaa3b5ef7f678d2dc2c7d5bbba05315Paul Berry   if (dst.map_stencil_as_y_tiled && dst.num_samples > 1) {
168134a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry      /* If the destination surface is a W-tiled multisampled stencil buffer
168234a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry       * that we're mapping as Y tiled, then we need to arrange for the WM
168334a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry       * program to run once per sample rather than once per pixel, because
168434a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry       * the memory layout of related samples doesn't match between W and Y
168534a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry       * tiling.
1686233c207e9e477b6b0a5c6705e727129b92989073Paul Berry       */
168734a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry      wm_prog_key.persample_msaa_dispatch = true;
1688233c207e9e477b6b0a5c6705e727129b92989073Paul Berry   }
1689233c207e9e477b6b0a5c6705e727129b92989073Paul Berry
169097fc89c6cbaa3b5ef7f678d2dc2c7d5bbba05315Paul Berry   if (src.num_samples > 0 && dst.num_samples > 1) {
169119e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry      /* We are blitting from a multisample buffer to a multisample buffer, so
169219e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry       * we must preserve samples within a pixel.  This means we have to
169334a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry       * arrange for the WM program to run once per sample rather than once
169434a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry       * per pixel.
169519e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry       */
169634a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry      wm_prog_key.persample_msaa_dispatch = true;
169719e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   }
169819e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry
169919e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   /* The render path must be configured to use the same number of samples as
170019e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry    * the destination buffer.
170119e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry    */
170219e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   num_samples = dst.num_samples;
170319e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry
170419e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   GLenum base_format = _mesa_get_format_base_format(src_mt->format);
170519e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   if (base_format != GL_DEPTH_COMPONENT && /* TODO: what about depth/stencil? */
170619e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry       base_format != GL_STENCIL_INDEX &&
170797fc89c6cbaa3b5ef7f678d2dc2c7d5bbba05315Paul Berry       src_mt->num_samples > 1 && dst_mt->num_samples <= 1) {
170819e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry      /* We are downsampling a color buffer, so blend. */
170919e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry      wm_prog_key.blend = true;
171019e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   }
171119e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry
171219e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   /* src_samples and dst_samples are the true sample counts */
171319e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   wm_prog_key.src_samples = src_mt->num_samples;
171419e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   wm_prog_key.dst_samples = dst_mt->num_samples;
171519e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry
171619e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   /* tex_samples and rt_samples are the sample counts that are set up in
171719e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry    * SURFACE_STATE.
171819e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry    */
171919e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   wm_prog_key.tex_samples = src.num_samples;
172019e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   wm_prog_key.rt_samples  = dst.num_samples;
172119e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry
17221bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry   /* tex_layout and rt_layout indicate the MSAA layout the GPU pipeline will
17231bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry    * use to access the source and destination surfaces.
172467b0f7c7dddeb92ee4d24ed3977e20b70f5674f6Paul Berry    */
17251bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry   wm_prog_key.tex_layout =
17261bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry      compute_msaa_layout_for_pipeline(brw, src.num_samples, src.msaa_layout);
17271bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry   wm_prog_key.rt_layout =
17281bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry      compute_msaa_layout_for_pipeline(brw, dst.num_samples, dst.msaa_layout);
172967b0f7c7dddeb92ee4d24ed3977e20b70f5674f6Paul Berry
17301bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry   /* src_layout and dst_layout indicate the true MSAA layout used by src and
17311bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry    * dst.
17328b1f467cce34340637e9baca4847fc5273cf7541Paul Berry    */
17331bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry   wm_prog_key.src_layout = src_mt->msaa_layout;
17341bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry   wm_prog_key.dst_layout = dst_mt->msaa_layout;
17358b1f467cce34340637e9baca4847fc5273cf7541Paul Berry
1736506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   wm_prog_key.src_tiled_w = src.map_stencil_as_y_tiled;
1737506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   wm_prog_key.dst_tiled_w = dst.map_stencil_as_y_tiled;
1738506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   x0 = wm_push_consts.dst_x0 = dst_x0;
1739506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   y0 = wm_push_consts.dst_y0 = dst_y0;
1740506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   x1 = wm_push_consts.dst_x1 = dst_x1;
1741506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   y1 = wm_push_consts.dst_y1 = dst_y1;
1742506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   wm_push_consts.x_transform.setup(src_x0, dst_x0, dst_x1, mirror_x);
1743506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   wm_push_consts.y_transform.setup(src_y0, dst_y0, dst_y1, mirror_y);
1744506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
174597fc89c6cbaa3b5ef7f678d2dc2c7d5bbba05315Paul Berry   if (dst.num_samples <= 1 && dst_mt->num_samples > 1) {
174619e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry      /* We must expand the rectangle we send through the rendering pipeline,
174719e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry       * to account for the fact that we are mapping the destination region as
174819e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry       * single-sampled when it is in fact multisampled.  We must also align
174919e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry       * it to a multiple of the multisampling pattern, because the
175019e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry       * differences between multisampled and single-sampled surface formats
175119e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry       * will mean that pixels are scrambled within the multisampling pattern.
175219e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry       * TODO: what if this makes the coordinates too large?
17538b1f467cce34340637e9baca4847fc5273cf7541Paul Berry       *
17541bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry       * Note: this only works if the destination surface uses the IMS layout.
17551bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry       * If it's UMS, then we have no choice but to set up the rendering
17561bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry       * pipeline as multisampled.
175719e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry       */
17581bd4d456cdecf7bea55f4e3dac574af54efad994Paul Berry      assert(dst_mt->msaa_layout == INTEL_MSAA_LAYOUT_IMS);
1759082874e3891e588f674508be6578f600b35852c4Paul Berry      switch (dst_mt->num_samples) {
1760082874e3891e588f674508be6578f600b35852c4Paul Berry      case 4:
17614df2848786d4778a2ce7dbf2e046e191036ccb56Paul Berry         x0 = ROUND_DOWN_TO(x0 * 2, 4);
17624df2848786d4778a2ce7dbf2e046e191036ccb56Paul Berry         y0 = ROUND_DOWN_TO(y0 * 2, 4);
1763082874e3891e588f674508be6578f600b35852c4Paul Berry         x1 = ALIGN(x1 * 2, 4);
1764082874e3891e588f674508be6578f600b35852c4Paul Berry         y1 = ALIGN(y1 * 2, 4);
1765082874e3891e588f674508be6578f600b35852c4Paul Berry         break;
1766082874e3891e588f674508be6578f600b35852c4Paul Berry      case 8:
17674df2848786d4778a2ce7dbf2e046e191036ccb56Paul Berry         x0 = ROUND_DOWN_TO(x0 * 4, 8);
17684df2848786d4778a2ce7dbf2e046e191036ccb56Paul Berry         y0 = ROUND_DOWN_TO(y0 * 2, 4);
1769082874e3891e588f674508be6578f600b35852c4Paul Berry         x1 = ALIGN(x1 * 4, 8);
1770082874e3891e588f674508be6578f600b35852c4Paul Berry         y1 = ALIGN(y1 * 2, 4);
1771082874e3891e588f674508be6578f600b35852c4Paul Berry         break;
1772082874e3891e588f674508be6578f600b35852c4Paul Berry      default:
1773082874e3891e588f674508be6578f600b35852c4Paul Berry         assert(!"Unrecognized sample count in brw_blorp_blit_params ctor");
1774082874e3891e588f674508be6578f600b35852c4Paul Berry         break;
1775082874e3891e588f674508be6578f600b35852c4Paul Berry      }
177619e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry      wm_prog_key.use_kill = true;
177719e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry   }
177819e9b24626c2b9d7abef054d57bb2a52106c545bPaul Berry
1779506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   if (dst.map_stencil_as_y_tiled) {
1780602e9a0f3727b036caf3a7b228fe90d36d832ea7Paul Berry      /* We must modify the rectangle we send through the rendering pipeline
178176c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry       * (and the size and x/y offset of the destination surface), to account
178276c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry       * for the fact that we are mapping it as Y-tiled when it is in fact
178376c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry       * W-tiled.
178421e9850d5369f9757b5005df4c8af38668a3053bPaul Berry       *
178521e9850d5369f9757b5005df4c8af38668a3053bPaul Berry       * Both Y tiling and W tiling can be understood as organizations of
178621e9850d5369f9757b5005df4c8af38668a3053bPaul Berry       * 32-byte sub-tiles; within each 32-byte sub-tile, the layout of pixels
178721e9850d5369f9757b5005df4c8af38668a3053bPaul Berry       * is different, but the layout of the 32-byte sub-tiles within the 4k
178821e9850d5369f9757b5005df4c8af38668a3053bPaul Berry       * tile is the same (8 sub-tiles across by 16 sub-tiles down, in
178921e9850d5369f9757b5005df4c8af38668a3053bPaul Berry       * column-major order).  In Y tiling, the sub-tiles are 16 bytes wide
179021e9850d5369f9757b5005df4c8af38668a3053bPaul Berry       * and 2 rows high; in W tiling, they are 8 bytes wide and 4 rows high.
179121e9850d5369f9757b5005df4c8af38668a3053bPaul Berry       *
179221e9850d5369f9757b5005df4c8af38668a3053bPaul Berry       * Therefore, to account for the layout differences within the 32-byte
179321e9850d5369f9757b5005df4c8af38668a3053bPaul Berry       * sub-tiles, we must expand the rectangle so the X coordinates of its
179421e9850d5369f9757b5005df4c8af38668a3053bPaul Berry       * edges are multiples of 8 (the W sub-tile width), and its Y
179521e9850d5369f9757b5005df4c8af38668a3053bPaul Berry       * coordinates of its edges are multiples of 4 (the W sub-tile height).
179621e9850d5369f9757b5005df4c8af38668a3053bPaul Berry       * Then we need to scale the X and Y coordinates of the rectangle to
179721e9850d5369f9757b5005df4c8af38668a3053bPaul Berry       * account for the differences in aspect ratio between the Y and W
179876c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry       * sub-tiles.  We need to modify the layer width and height similarly.
179976c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry       *
180036bc0fe4f2e90ea9efa19940f477472dad6fb18fPaul Berry       * A correction needs to be applied when MSAA is in use: since
180136bc0fe4f2e90ea9efa19940f477472dad6fb18fPaul Berry       * INTEL_MSAA_LAYOUT_IMS uses an interleaving pattern whose height is 4,
180236bc0fe4f2e90ea9efa19940f477472dad6fb18fPaul Berry       * we need to align the Y coordinates to multiples of 8, so that when
180336bc0fe4f2e90ea9efa19940f477472dad6fb18fPaul Berry       * they are divided by two they are still multiples of 4.
180436bc0fe4f2e90ea9efa19940f477472dad6fb18fPaul Berry       *
180576c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry       * Note: Since the x/y offset of the surface will be applied using the
180676c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry       * SURFACE_STATE command packet, it will be invisible to the swizzling
180776c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry       * code in the shader; therefore it needs to be in a multiple of the
180876c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry       * 32-byte sub-tile size.  Fortunately it is, since the sub-tile is 8
180976c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry       * pixels wide and 4 pixels high (when viewed as a W-tiled stencil
181076c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry       * buffer), and the miplevel alignment used for stencil buffers is 8
181176c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry       * pixels horizontally and either 4 or 8 pixels vertically (see
181276c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry       * intel_horizontal_texture_alignment_unit() and
181376c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry       * intel_vertical_texture_alignment_unit()).
181476c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry       *
181576c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry       * Note: Also, since the SURFACE_STATE command packet can only apply
181676c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry       * offsets that are multiples of 4 pixels horizontally and 2 pixels
181776c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry       * vertically, it is important that the offsets will be multiples of
181876c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry       * these sizes after they are converted into Y-tiled coordinates.
181976c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry       * Fortunately they will be, since we know from above that the offsets
182076c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry       * are a multiple of the 32-byte sub-tile size, and in Y-tiled
182176c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry       * coordinates the sub-tile is 16 pixels wide and 2 pixels high.
182234a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry       *
1823602e9a0f3727b036caf3a7b228fe90d36d832ea7Paul Berry       * TODO: what if this makes the coordinates (or the texture size) too
1824602e9a0f3727b036caf3a7b228fe90d36d832ea7Paul Berry       * large?
1825506d70be21cd3469118de89297cba0c0f709c1aePaul Berry       */
182636bc0fe4f2e90ea9efa19940f477472dad6fb18fPaul Berry      const unsigned x_align = 8, y_align = dst.num_samples != 0 ? 8 : 4;
18274df2848786d4778a2ce7dbf2e046e191036ccb56Paul Berry      x0 = ROUND_DOWN_TO(x0, x_align) * 2;
18284df2848786d4778a2ce7dbf2e046e191036ccb56Paul Berry      y0 = ROUND_DOWN_TO(y0, y_align) / 2;
182934a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry      x1 = ALIGN(x1, x_align) * 2;
183034a5f12e35dd4a5aff6683a8286d4582ba17df14Paul Berry      y1 = ALIGN(y1, y_align) / 2;
183176c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry      dst.width = ALIGN(dst.width, x_align) * 2;
183276c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry      dst.height = ALIGN(dst.height, y_align) / 2;
183376c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry      dst.x_offset *= 2;
183476c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry      dst.y_offset /= 2;
1835506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      wm_prog_key.use_kill = true;
1836506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   }
1837602e9a0f3727b036caf3a7b228fe90d36d832ea7Paul Berry
1838602e9a0f3727b036caf3a7b228fe90d36d832ea7Paul Berry   if (src.map_stencil_as_y_tiled) {
183976c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry      /* We must modify the size and x/y offset of the source surface to
184076c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry       * account for the fact that we are mapping it as Y-tiled when it is in
184176c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry       * fact W tiled.
184276c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry       *
184376c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry       * See the comments above concerning x/y offset alignment for the
184476c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry       * destination surface.
1845602e9a0f3727b036caf3a7b228fe90d36d832ea7Paul Berry       *
1846602e9a0f3727b036caf3a7b228fe90d36d832ea7Paul Berry       * TODO: what if this makes the texture size too large?
1847602e9a0f3727b036caf3a7b228fe90d36d832ea7Paul Berry       */
184836bc0fe4f2e90ea9efa19940f477472dad6fb18fPaul Berry      const unsigned x_align = 8, y_align = src.num_samples != 0 ? 8 : 4;
184976c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry      src.width = ALIGN(src.width, x_align) * 2;
185076c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry      src.height = ALIGN(src.height, y_align) / 2;
185176c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry      src.x_offset *= 2;
185276c1c34c4aa2fa48126aee8d16e943bf0e3ff750Paul Berry      src.y_offset /= 2;
1853602e9a0f3727b036caf3a7b228fe90d36d832ea7Paul Berry   }
1854506d70be21cd3469118de89297cba0c0f709c1aePaul Berry}
1855506d70be21cd3469118de89297cba0c0f709c1aePaul Berry
1856506d70be21cd3469118de89297cba0c0f709c1aePaul Berryuint32_t
1857506d70be21cd3469118de89297cba0c0f709c1aePaul Berrybrw_blorp_blit_params::get_wm_prog(struct brw_context *brw,
1858506d70be21cd3469118de89297cba0c0f709c1aePaul Berry                                   brw_blorp_prog_data **prog_data) const
1859506d70be21cd3469118de89297cba0c0f709c1aePaul Berry{
1860506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   uint32_t prog_offset;
1861506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   if (!brw_search_cache(&brw->cache, BRW_BLORP_BLIT_PROG,
1862506d70be21cd3469118de89297cba0c0f709c1aePaul Berry                         &this->wm_prog_key, sizeof(this->wm_prog_key),
1863506d70be21cd3469118de89297cba0c0f709c1aePaul Berry                         &prog_offset, prog_data)) {
1864506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_blorp_blit_program prog(brw, &this->wm_prog_key);
1865506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      GLuint program_size;
1866506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      const GLuint *program = prog.compile(brw, &program_size);
1867506d70be21cd3469118de89297cba0c0f709c1aePaul Berry      brw_upload_cache(&brw->cache, BRW_BLORP_BLIT_PROG,
1868506d70be21cd3469118de89297cba0c0f709c1aePaul Berry                       &this->wm_prog_key, sizeof(this->wm_prog_key),
1869506d70be21cd3469118de89297cba0c0f709c1aePaul Berry                       program, program_size,
1870506d70be21cd3469118de89297cba0c0f709c1aePaul Berry                       &prog.prog_data, sizeof(prog.prog_data),
1871506d70be21cd3469118de89297cba0c0f709c1aePaul Berry                       &prog_offset, prog_data);
1872506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   }
1873506d70be21cd3469118de89297cba0c0f709c1aePaul Berry   return prog_offset;
1874506d70be21cd3469118de89297cba0c0f709c1aePaul Berry}
1875