1/*
2 * Copyright © 2013-2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "brw_vec4_surface_builder.h"
25
26using namespace brw;
27
28namespace {
29   namespace array_utils {
30      /**
31       * Copy one every \p src_stride logical components of the argument into
32       * one every \p dst_stride logical components of the result.
33       */
34      src_reg
35      emit_stride(const vec4_builder &bld, const src_reg &src, unsigned size,
36                  unsigned dst_stride, unsigned src_stride)
37      {
38         if (src_stride == 1 && dst_stride == 1) {
39            return src;
40         } else {
41            const dst_reg dst = bld.vgrf(src.type,
42                                         DIV_ROUND_UP(size * dst_stride, 4));
43
44            for (unsigned i = 0; i < size; ++i)
45               bld.MOV(writemask(offset(dst, 8, i * dst_stride / 4),
46                                 1 << (i * dst_stride % 4)),
47                       swizzle(offset(src, 8, i * src_stride / 4),
48                               brw_swizzle_for_mask(1 << (i * src_stride % 4))));
49
50            return src_reg(dst);
51         }
52      }
53
54      /**
55       * Convert a VEC4 into an array of registers with the layout expected by
56       * the recipient shared unit.  If \p has_simd4x2 is true the argument is
57       * left unmodified in SIMD4x2 form, otherwise it will be rearranged into
58       * a SIMD8 vector.
59       */
60      src_reg
61      emit_insert(const vec4_builder &bld, const src_reg &src,
62                  unsigned n, bool has_simd4x2)
63      {
64         if (src.file == BAD_FILE || n == 0) {
65            return src_reg();
66
67         } else {
68            /* Pad unused components with zeroes. */
69            const unsigned mask = (1 << n) - 1;
70            const dst_reg tmp = bld.vgrf(src.type);
71
72            bld.MOV(writemask(tmp, mask), src);
73            if (n < 4)
74               bld.MOV(writemask(tmp, ~mask), brw_imm_d(0));
75
76            return emit_stride(bld, src_reg(tmp), n, has_simd4x2 ? 1 : 4, 1);
77         }
78      }
79
80      /**
81       * Convert an array of registers back into a VEC4 according to the
82       * layout expected from some shared unit.  If \p has_simd4x2 is true the
83       * argument is left unmodified in SIMD4x2 form, otherwise it will be
84       * rearranged from SIMD8 form.
85       */
86      src_reg
87      emit_extract(const vec4_builder &bld, const src_reg src,
88                   unsigned n, bool has_simd4x2)
89      {
90         if (src.file == BAD_FILE || n == 0) {
91            return src_reg();
92
93         } else {
94            return emit_stride(bld, src, n, 1, has_simd4x2 ? 1 : 4);
95         }
96      }
97   }
98}
99
100namespace brw {
101   namespace surface_access {
102      namespace {
103         using namespace array_utils;
104
105         /**
106          * Generate a send opcode for a surface message and return the
107          * result.
108          */
109         src_reg
110         emit_send(const vec4_builder &bld, enum opcode op,
111                   const src_reg &header,
112                   const src_reg &addr, unsigned addr_sz,
113                   const src_reg &src, unsigned src_sz,
114                   const src_reg &surface,
115                   unsigned arg, unsigned ret_sz,
116                   brw_predicate pred = BRW_PREDICATE_NONE)
117         {
118            /* Calculate the total number of components of the payload. */
119            const unsigned header_sz = (header.file == BAD_FILE ? 0 : 1);
120            const unsigned sz = header_sz + addr_sz + src_sz;
121
122            /* Construct the payload. */
123            const dst_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz);
124            unsigned n = 0;
125
126            if (header_sz)
127               bld.exec_all().MOV(offset(payload, 8, n++),
128                                  retype(header, BRW_REGISTER_TYPE_UD));
129
130            for (unsigned i = 0; i < addr_sz; i++)
131               bld.MOV(offset(payload, 8, n++),
132                       offset(retype(addr, BRW_REGISTER_TYPE_UD), 8, i));
133
134            for (unsigned i = 0; i < src_sz; i++)
135               bld.MOV(offset(payload, 8, n++),
136                       offset(retype(src, BRW_REGISTER_TYPE_UD), 8, i));
137
138            /* Reduce the dynamically uniform surface index to a single
139             * scalar.
140             */
141            const src_reg usurface = bld.emit_uniformize(surface);
142
143            /* Emit the message send instruction. */
144            const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, ret_sz);
145            vec4_instruction *inst =
146               bld.emit(op, dst, src_reg(payload), usurface, brw_imm_ud(arg));
147            inst->mlen = sz;
148            inst->size_written = ret_sz * REG_SIZE;
149            inst->header_size = header_sz;
150            inst->predicate = pred;
151
152            return src_reg(dst);
153         }
154      }
155
156      /**
157       * Emit an untyped surface read opcode.  \p dims determines the number
158       * of components of the address and \p size the number of components of
159       * the returned value.
160       */
161      src_reg
162      emit_untyped_read(const vec4_builder &bld,
163                        const src_reg &surface, const src_reg &addr,
164                        unsigned dims, unsigned size,
165                        brw_predicate pred)
166      {
167         return emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_READ, src_reg(),
168                          emit_insert(bld, addr, dims, true), 1,
169                          src_reg(), 0,
170                          surface, size, 1, pred);
171      }
172
173      /**
174       * Emit an untyped surface write opcode.  \p dims determines the number
175       * of components of the address and \p size the number of components of
176       * the argument.
177       */
178      void
179      emit_untyped_write(const vec4_builder &bld, const src_reg &surface,
180                         const src_reg &addr, const src_reg &src,
181                         unsigned dims, unsigned size,
182                         brw_predicate pred)
183      {
184         const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
185                                   bld.shader->devinfo->is_haswell);
186         emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_WRITE, src_reg(),
187                   emit_insert(bld, addr, dims, has_simd4x2),
188                   has_simd4x2 ? 1 : dims,
189                   emit_insert(bld, src, size, has_simd4x2),
190                   has_simd4x2 ? 1 : size,
191                   surface, size, 0, pred);
192      }
193
194      /**
195       * Emit an untyped surface atomic opcode.  \p dims determines the number
196       * of components of the address and \p rsize the number of components of
197       * the returned value (either zero or one).
198       */
199      src_reg
200      emit_untyped_atomic(const vec4_builder &bld,
201                          const src_reg &surface, const src_reg &addr,
202                          const src_reg &src0, const src_reg &src1,
203                          unsigned dims, unsigned rsize, unsigned op,
204                          brw_predicate pred)
205      {
206         const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
207                                   bld.shader->devinfo->is_haswell);
208
209         /* Zip the components of both sources, they are represented as the X
210          * and Y components of the same vector.
211          */
212         const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
213         const dst_reg srcs = bld.vgrf(BRW_REGISTER_TYPE_UD);
214
215         if (size >= 1)
216            bld.MOV(writemask(srcs, WRITEMASK_X), src0);
217         if (size >= 2)
218            bld.MOV(writemask(srcs, WRITEMASK_Y), src1);
219
220         return emit_send(bld, SHADER_OPCODE_UNTYPED_ATOMIC, src_reg(),
221                          emit_insert(bld, addr, dims, has_simd4x2),
222                          has_simd4x2 ? 1 : dims,
223                          emit_insert(bld, src_reg(srcs), size, has_simd4x2),
224                          has_simd4x2 && size ? 1 : size,
225                          surface, op, rsize, pred);
226      }
227
228      namespace {
229         /**
230          * Initialize the header present in typed surface messages.
231          */
232         src_reg
233         emit_typed_message_header(const vec4_builder &bld)
234         {
235            const vec4_builder ubld = bld.exec_all();
236            const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD);
237
238            ubld.MOV(dst, brw_imm_d(0));
239
240            if (bld.shader->devinfo->gen == 7 &&
241                !bld.shader->devinfo->is_haswell) {
242               /* The sample mask is used on IVB for the SIMD8 messages that
243                * have no SIMD4x2 variant.  We only use the two X channels
244                * in that case, mask everything else out.
245                */
246               ubld.MOV(writemask(dst, WRITEMASK_W), brw_imm_d(0x11));
247            }
248
249            return src_reg(dst);
250         }
251      }
252
253      /**
254       * Emit a typed surface read opcode.  \p dims determines the number of
255       * components of the address and \p size the number of components of the
256       * returned value.
257       */
258      src_reg
259      emit_typed_read(const vec4_builder &bld, const src_reg &surface,
260                      const src_reg &addr, unsigned dims, unsigned size)
261      {
262         const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
263                                   bld.shader->devinfo->is_haswell);
264         const src_reg tmp =
265            emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_READ,
266                      emit_typed_message_header(bld),
267                      emit_insert(bld, addr, dims, has_simd4x2),
268                      has_simd4x2 ? 1 : dims,
269                      src_reg(), 0,
270                      surface, size,
271                      has_simd4x2 ? 1 : size);
272
273         return emit_extract(bld, tmp, size, has_simd4x2);
274      }
275
276      /**
277       * Emit a typed surface write opcode.  \p dims determines the number of
278       * components of the address and \p size the number of components of the
279       * argument.
280       */
281      void
282      emit_typed_write(const vec4_builder &bld, const src_reg &surface,
283                       const src_reg &addr, const src_reg &src,
284                       unsigned dims, unsigned size)
285      {
286         const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
287                                   bld.shader->devinfo->is_haswell);
288         emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_WRITE,
289                   emit_typed_message_header(bld),
290                   emit_insert(bld, addr, dims, has_simd4x2),
291                   has_simd4x2 ? 1 : dims,
292                   emit_insert(bld, src, size, has_simd4x2),
293                   has_simd4x2 ? 1 : size,
294                   surface, size, 0);
295      }
296
297      /**
298       * Emit a typed surface atomic opcode.  \p dims determines the number of
299       * components of the address and \p rsize the number of components of
300       * the returned value (either zero or one).
301       */
302      src_reg
303      emit_typed_atomic(const vec4_builder &bld,
304                        const src_reg &surface, const src_reg &addr,
305                        const src_reg &src0, const src_reg &src1,
306                        unsigned dims, unsigned rsize, unsigned op,
307                        brw_predicate pred)
308      {
309         const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
310                                   bld.shader->devinfo->is_haswell);
311
312         /* Zip the components of both sources, they are represented as the X
313          * and Y components of the same vector.
314          */
315         const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
316         const dst_reg srcs = bld.vgrf(BRW_REGISTER_TYPE_UD);
317
318         if (size >= 1)
319            bld.MOV(writemask(srcs, WRITEMASK_X), src0);
320         if (size >= 2)
321            bld.MOV(writemask(srcs, WRITEMASK_Y), src1);
322
323         return emit_send(bld, SHADER_OPCODE_TYPED_ATOMIC,
324                          emit_typed_message_header(bld),
325                          emit_insert(bld, addr, dims, has_simd4x2),
326                          has_simd4x2 ? 1 : dims,
327                          emit_insert(bld, src_reg(srcs), size, has_simd4x2),
328                          has_simd4x2 ? 1 : size,
329                          surface, op, rsize, pred);
330      }
331   }
332}
333