1/* -*- c++ -*- */
2/*
3 * Copyright © 2010-2015 Intel Corporation
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25#ifndef BRW_IR_FS_H
26#define BRW_IR_FS_H
27
28#include "brw_shader.h"
29
30class fs_inst;
31
32class fs_reg : public backend_reg {
33public:
34   DECLARE_RALLOC_CXX_OPERATORS(fs_reg)
35
36   void init();
37
38   fs_reg();
39   fs_reg(struct ::brw_reg reg);
40   fs_reg(enum brw_reg_file file, int nr);
41   fs_reg(enum brw_reg_file file, int nr, enum brw_reg_type type);
42
43   bool equals(const fs_reg &r) const;
44   bool is_contiguous() const;
45
46   /**
47    * Return the size in bytes of a single logical component of the
48    * register assuming the given execution width.
49    */
50   unsigned component_size(unsigned width) const;
51
52   /** Register region horizontal stride */
53   uint8_t stride;
54};
55
56static inline fs_reg
57negate(fs_reg reg)
58{
59   assert(reg.file != IMM);
60   reg.negate = !reg.negate;
61   return reg;
62}
63
64static inline fs_reg
65retype(fs_reg reg, enum brw_reg_type type)
66{
67   reg.type = type;
68   return reg;
69}
70
71static inline fs_reg
72byte_offset(fs_reg reg, unsigned delta)
73{
74   switch (reg.file) {
75   case BAD_FILE:
76      break;
77   case VGRF:
78   case ATTR:
79   case UNIFORM:
80      reg.offset += delta;
81      break;
82   case MRF: {
83      const unsigned suboffset = reg.offset + delta;
84      reg.nr += suboffset / REG_SIZE;
85      reg.offset = suboffset % REG_SIZE;
86      break;
87   }
88   case ARF:
89   case FIXED_GRF: {
90      const unsigned suboffset = reg.subnr + delta;
91      reg.nr += suboffset / REG_SIZE;
92      reg.subnr = suboffset % REG_SIZE;
93      break;
94   }
95   case IMM:
96   default:
97      assert(delta == 0);
98   }
99   return reg;
100}
101
102static inline fs_reg
103horiz_offset(const fs_reg &reg, unsigned delta)
104{
105   switch (reg.file) {
106   case BAD_FILE:
107   case UNIFORM:
108   case IMM:
109      /* These only have a single component that is implicitly splatted.  A
110       * horizontal offset should be a harmless no-op.
111       * XXX - Handle vector immediates correctly.
112       */
113      return reg;
114   case VGRF:
115   case MRF:
116   case ATTR:
117      return byte_offset(reg, delta * reg.stride * type_sz(reg.type));
118   case ARF:
119   case FIXED_GRF:
120      if (reg.is_null()) {
121         return reg;
122      } else {
123         const unsigned stride = reg.hstride ? 1 << (reg.hstride - 1) : 0;
124         return byte_offset(reg, delta * stride * type_sz(reg.type));
125      }
126   }
127   unreachable("Invalid register file");
128}
129
130static inline fs_reg
131offset(fs_reg reg, unsigned width, unsigned delta)
132{
133   switch (reg.file) {
134   case BAD_FILE:
135      break;
136   case ARF:
137   case FIXED_GRF:
138   case MRF:
139   case VGRF:
140   case ATTR:
141   case UNIFORM:
142      return byte_offset(reg, delta * reg.component_size(width));
143   case IMM:
144      assert(delta == 0);
145   }
146   return reg;
147}
148
149/**
150 * Get the scalar channel of \p reg given by \p idx and replicate it to all
151 * channels of the result.
152 */
153static inline fs_reg
154component(fs_reg reg, unsigned idx)
155{
156   reg = horiz_offset(reg, idx);
157   reg.stride = 0;
158   return reg;
159}
160
161/**
162 * Return an integer identifying the discrete address space a register is
163 * contained in.  A register is by definition fully contained in the single
164 * reg_space it belongs to, so two registers with different reg_space ids are
165 * guaranteed not to overlap.  Most register files are a single reg_space of
166 * its own, only the VGRF file is composed of multiple discrete address
167 * spaces, one for each VGRF allocation.
168 */
169static inline uint32_t
170reg_space(const fs_reg &r)
171{
172   return r.file << 16 | (r.file == VGRF ? r.nr : 0);
173}
174
175/**
176 * Return the base offset in bytes of a register relative to the start of its
177 * reg_space().
178 */
179static inline unsigned
180reg_offset(const fs_reg &r)
181{
182   return (r.file == VGRF || r.file == IMM ? 0 : r.nr) *
183          (r.file == UNIFORM ? 4 : REG_SIZE) + r.offset +
184          (r.file == ARF || r.file == FIXED_GRF ? r.subnr : 0);
185}
186
187/**
188 * Return the amount of padding in bytes left unused between individual
189 * components of register \p r due to a (horizontal) stride value greater than
190 * one, or zero if components are tightly packed in the register file.
191 */
192static inline unsigned
193reg_padding(const fs_reg &r)
194{
195   const unsigned stride = ((r.file != ARF && r.file != FIXED_GRF) ? r.stride :
196                            r.hstride == 0 ? 0 :
197                            1 << (r.hstride - 1));
198   return (MAX2(1, stride) - 1) * type_sz(r.type);
199}
200
201/**
202 * Return whether the register region starting at \p r and spanning \p dr
203 * bytes could potentially overlap the register region starting at \p s and
204 * spanning \p ds bytes.
205 */
206static inline bool
207regions_overlap(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds)
208{
209   if (r.file == MRF && (r.nr & BRW_MRF_COMPR4)) {
210      fs_reg t = r;
211      t.nr &= ~BRW_MRF_COMPR4;
212      /* COMPR4 regions are translated by the hardware during decompression
213       * into two separate half-regions 4 MRFs apart from each other.
214       */
215      return regions_overlap(t, dr / 2, s, ds) ||
216             regions_overlap(byte_offset(t, 4 * REG_SIZE), dr / 2, s, ds);
217
218   } else if (s.file == MRF && (s.nr & BRW_MRF_COMPR4)) {
219      return regions_overlap(s, ds, r, dr);
220
221   } else {
222      return reg_space(r) == reg_space(s) &&
223             !(reg_offset(r) + dr <= reg_offset(s) ||
224               reg_offset(s) + ds <= reg_offset(r));
225   }
226}
227
228/**
229 * Check that the register region given by r [r.offset, r.offset + dr[
230 * is fully contained inside the register region given by s
231 * [s.offset, s.offset + ds[.
232 */
233static inline bool
234region_contained_in(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds)
235{
236   return reg_space(r) == reg_space(s) &&
237          reg_offset(r) >= reg_offset(s) &&
238          reg_offset(r) + dr <= reg_offset(s) + ds;
239}
240
241/**
242 * Return whether the given register region is n-periodic, i.e. whether the
243 * original region remains invariant after shifting it by \p n scalar
244 * channels.
245 */
246static inline bool
247is_periodic(const fs_reg &reg, unsigned n)
248{
249   if (reg.file == BAD_FILE || reg.is_null()) {
250      return true;
251
252   } else if (reg.file == IMM) {
253      const unsigned period = (reg.type == BRW_REGISTER_TYPE_UV ||
254                               reg.type == BRW_REGISTER_TYPE_V ? 8 :
255                               reg.type == BRW_REGISTER_TYPE_VF ? 4 :
256                               1);
257      return n % period == 0;
258
259   } else if (reg.file == ARF || reg.file == FIXED_GRF) {
260      const unsigned period = (reg.hstride == 0 && reg.vstride == 0 ? 1 :
261                               reg.vstride == 0 ? 1 << reg.width :
262                               ~0);
263      return n % period == 0;
264
265   } else {
266      return reg.stride == 0;
267   }
268}
269
270static inline bool
271is_uniform(const fs_reg &reg)
272{
273   return is_periodic(reg, 1);
274}
275
276/**
277 * Get the specified 8-component quarter of a register.
278 * XXX - Maybe come up with a less misleading name for this (e.g. quarter())?
279 */
280static inline fs_reg
281half(const fs_reg &reg, unsigned idx)
282{
283   assert(idx < 2);
284   return horiz_offset(reg, 8 * idx);
285}
286
287/**
288 * Reinterpret each channel of register \p reg as a vector of values of the
289 * given smaller type and take the i-th subcomponent from each.
290 */
291static inline fs_reg
292subscript(fs_reg reg, brw_reg_type type, unsigned i)
293{
294   assert((i + 1) * type_sz(type) <= type_sz(reg.type));
295
296   if (reg.file == ARF || reg.file == FIXED_GRF) {
297      /* The stride is encoded inconsistently for fixed GRF and ARF registers
298       * as the log2 of the actual vertical and horizontal strides.
299       */
300      const int delta = _mesa_logbase2(type_sz(reg.type)) -
301                        _mesa_logbase2(type_sz(type));
302      reg.hstride += (reg.hstride ? delta : 0);
303      reg.vstride += (reg.vstride ? delta : 0);
304
305   } else if (reg.file == IMM) {
306      assert(reg.type == type);
307
308   } else {
309      reg.stride *= type_sz(reg.type) / type_sz(type);
310   }
311
312   return byte_offset(retype(reg, type), i * type_sz(type));
313}
314
315static const fs_reg reg_undef;
316
317class fs_inst : public backend_instruction {
318   fs_inst &operator=(const fs_inst &);
319
320   void init(enum opcode opcode, uint8_t exec_width, const fs_reg &dst,
321             const fs_reg *src, unsigned sources);
322
323public:
324   DECLARE_RALLOC_CXX_OPERATORS(fs_inst)
325
326   fs_inst();
327   fs_inst(enum opcode opcode, uint8_t exec_size);
328   fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst);
329   fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
330           const fs_reg &src0);
331   fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
332           const fs_reg &src0, const fs_reg &src1);
333   fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
334           const fs_reg &src0, const fs_reg &src1, const fs_reg &src2);
335   fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
336           const fs_reg src[], unsigned sources);
337   fs_inst(const fs_inst &that);
338   ~fs_inst();
339
340   void resize_sources(uint8_t num_sources);
341
342   bool equals(fs_inst *inst) const;
343   bool is_send_from_grf() const;
344   bool is_partial_write() const;
345   bool is_copy_payload(const brw::simple_allocator &grf_alloc) const;
346   unsigned components_read(unsigned i) const;
347   unsigned size_read(int arg) const;
348   bool can_do_source_mods(const struct gen_device_info *devinfo);
349   bool can_change_types() const;
350   bool has_side_effects() const;
351   bool has_source_and_destination_hazard() const;
352
353   /**
354    * Return the subset of flag registers read by the instruction as a bitset
355    * with byte granularity.
356    */
357   unsigned flags_read(const gen_device_info *devinfo) const;
358
359   /**
360    * Return the subset of flag registers updated by the instruction (either
361    * partially or fully) as a bitset with byte granularity.
362    */
363   unsigned flags_written() const;
364
365   fs_reg dst;
366   fs_reg *src;
367
368   uint8_t sources; /**< Number of fs_reg sources. */
369
370   bool eot:1;
371   bool pi_noperspective:1;   /**< Pixel interpolator noperspective flag */
372};
373
374/**
375 * Make the execution of \p inst dependent on the evaluation of a possibly
376 * inverted predicate.
377 */
378static inline fs_inst *
379set_predicate_inv(enum brw_predicate pred, bool inverse,
380                  fs_inst *inst)
381{
382   inst->predicate = pred;
383   inst->predicate_inverse = inverse;
384   return inst;
385}
386
387/**
388 * Make the execution of \p inst dependent on the evaluation of a predicate.
389 */
390static inline fs_inst *
391set_predicate(enum brw_predicate pred, fs_inst *inst)
392{
393   return set_predicate_inv(pred, false, inst);
394}
395
396/**
397 * Write the result of evaluating the condition given by \p mod to a flag
398 * register.
399 */
400static inline fs_inst *
401set_condmod(enum brw_conditional_mod mod, fs_inst *inst)
402{
403   inst->conditional_mod = mod;
404   return inst;
405}
406
407/**
408 * Clamp the result of \p inst to the saturation range of its destination
409 * datatype.
410 */
411static inline fs_inst *
412set_saturate(bool saturate, fs_inst *inst)
413{
414   inst->saturate = saturate;
415   return inst;
416}
417
418/**
419 * Return the number of dataflow registers written by the instruction (either
420 * fully or partially) counted from 'floor(reg_offset(inst->dst) /
421 * register_size)'.  The somewhat arbitrary register size unit is 4B for the
422 * UNIFORM and IMM files and 32B for all other files.
423 */
424inline unsigned
425regs_written(const fs_inst *inst)
426{
427   assert(inst->dst.file != UNIFORM && inst->dst.file != IMM);
428   return DIV_ROUND_UP(reg_offset(inst->dst) % REG_SIZE +
429                       inst->size_written -
430                       MIN2(inst->size_written, reg_padding(inst->dst)),
431                       REG_SIZE);
432}
433
434/**
435 * Return the number of dataflow registers read by the instruction (either
436 * fully or partially) counted from 'floor(reg_offset(inst->src[i]) /
437 * register_size)'.  The somewhat arbitrary register size unit is 4B for the
438 * UNIFORM and IMM files and 32B for all other files.
439 */
440inline unsigned
441regs_read(const fs_inst *inst, unsigned i)
442{
443   const unsigned reg_size =
444      inst->src[i].file == UNIFORM || inst->src[i].file == IMM ? 4 : REG_SIZE;
445   return DIV_ROUND_UP(reg_offset(inst->src[i]) % reg_size +
446                       inst->size_read(i) -
447                       MIN2(inst->size_read(i), reg_padding(inst->src[i])),
448                       reg_size);
449}
450
451#endif
452