1/*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include <stdbool.h>
25#include "util/ralloc.h"
26#include "vc4_qir.h"
27#include "vc4_qpu.h"
28
29#define QPU_MUX(mux, muxfield)                                  \
30        QPU_SET_FIELD(mux != QPU_MUX_SMALL_IMM ? mux : QPU_MUX_B, muxfield)
31
32static uint64_t
33set_src_raddr(uint64_t inst, struct qpu_reg src)
34{
35        if (src.mux == QPU_MUX_A) {
36                assert(QPU_GET_FIELD(inst, QPU_RADDR_A) == QPU_R_NOP ||
37                       QPU_GET_FIELD(inst, QPU_RADDR_A) == src.addr);
38                return QPU_UPDATE_FIELD(inst, src.addr, QPU_RADDR_A);
39        }
40
41        if (src.mux == QPU_MUX_B) {
42                assert((QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP ||
43                        QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr) &&
44                       QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM);
45                return QPU_UPDATE_FIELD(inst, src.addr, QPU_RADDR_B);
46        }
47
48        if (src.mux == QPU_MUX_SMALL_IMM) {
49                if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_SMALL_IMM) {
50                        assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr);
51                } else {
52                        inst = qpu_set_sig(inst, QPU_SIG_SMALL_IMM);
53                        assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP);
54                }
55                return ((inst & ~QPU_RADDR_B_MASK) |
56                        QPU_SET_FIELD(src.addr, QPU_RADDR_B));
57        }
58
59        return inst;
60}
61
62uint64_t
63qpu_NOP()
64{
65        uint64_t inst = 0;
66
67        inst |= QPU_SET_FIELD(QPU_A_NOP, QPU_OP_ADD);
68        inst |= QPU_SET_FIELD(QPU_M_NOP, QPU_OP_MUL);
69
70        /* Note: These field values are actually non-zero */
71        inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);
72        inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
73        inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
74        inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
75        inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
76
77        return inst;
78}
79
80static uint64_t
81qpu_a_dst(struct qpu_reg dst)
82{
83        uint64_t inst = 0;
84
85        if (dst.mux <= QPU_MUX_R5) {
86                /* Translate the mux to the ACCn values. */
87                inst |= QPU_SET_FIELD(32 + dst.mux, QPU_WADDR_ADD);
88        } else {
89                inst |= QPU_SET_FIELD(dst.addr, QPU_WADDR_ADD);
90                if (dst.mux == QPU_MUX_B)
91                        inst |= QPU_WS;
92        }
93
94        return inst;
95}
96
97static uint64_t
98qpu_m_dst(struct qpu_reg dst)
99{
100        uint64_t inst = 0;
101
102        if (dst.mux <= QPU_MUX_R5) {
103                /* Translate the mux to the ACCn values. */
104                inst |= QPU_SET_FIELD(32 + dst.mux, QPU_WADDR_MUL);
105        } else {
106                inst |= QPU_SET_FIELD(dst.addr, QPU_WADDR_MUL);
107                if (dst.mux == QPU_MUX_A)
108                        inst |= QPU_WS;
109        }
110
111        return inst;
112}
113
114uint64_t
115qpu_a_MOV(struct qpu_reg dst, struct qpu_reg src)
116{
117        uint64_t inst = 0;
118
119        inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
120        inst |= QPU_SET_FIELD(QPU_A_OR, QPU_OP_ADD);
121        inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
122        inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
123        inst |= qpu_a_dst(dst);
124        inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);
125        inst |= QPU_MUX(src.mux, QPU_ADD_A);
126        inst |= QPU_MUX(src.mux, QPU_ADD_B);
127        inst = set_src_raddr(inst, src);
128        inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
129
130        return inst;
131}
132
133uint64_t
134qpu_m_MOV(struct qpu_reg dst, struct qpu_reg src)
135{
136        uint64_t inst = 0;
137
138        inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
139        inst |= QPU_SET_FIELD(QPU_M_V8MIN, QPU_OP_MUL);
140        inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
141        inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
142        inst |= qpu_m_dst(dst);
143        inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);
144        inst |= QPU_MUX(src.mux, QPU_MUL_A);
145        inst |= QPU_MUX(src.mux, QPU_MUL_B);
146        inst = set_src_raddr(inst, src);
147        inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);
148
149        return inst;
150}
151
152uint64_t
153qpu_load_imm_ui(struct qpu_reg dst, uint32_t val)
154{
155        uint64_t inst = 0;
156
157        inst |= qpu_a_dst(dst);
158        inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
159        inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);
160        inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);
161        inst |= QPU_SET_FIELD(QPU_SIG_LOAD_IMM, QPU_SIG);
162        inst |= val;
163
164        return inst;
165}
166
167uint64_t
168qpu_load_imm_u2(struct qpu_reg dst, uint32_t val)
169{
170        return qpu_load_imm_ui(dst, val) | QPU_SET_FIELD(QPU_LOAD_IMM_MODE_U2,
171                                                         QPU_LOAD_IMM_MODE);
172}
173
174uint64_t
175qpu_load_imm_i2(struct qpu_reg dst, uint32_t val)
176{
177        return qpu_load_imm_ui(dst, val) | QPU_SET_FIELD(QPU_LOAD_IMM_MODE_I2,
178                                                         QPU_LOAD_IMM_MODE);
179}
180
181uint64_t
182qpu_branch(uint32_t cond, uint32_t target)
183{
184        uint64_t inst = 0;
185
186        inst |= qpu_a_dst(qpu_ra(QPU_W_NOP));
187        inst |= qpu_m_dst(qpu_rb(QPU_W_NOP));
188        inst |= QPU_SET_FIELD(cond, QPU_BRANCH_COND);
189        inst |= QPU_SET_FIELD(QPU_SIG_BRANCH, QPU_SIG);
190        inst |= QPU_SET_FIELD(target, QPU_BRANCH_TARGET);
191
192        return inst;
193}
194
195uint64_t
196qpu_a_alu2(enum qpu_op_add op,
197           struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1)
198{
199        uint64_t inst = 0;
200
201        inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
202        inst |= QPU_SET_FIELD(op, QPU_OP_ADD);
203        inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
204        inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
205        inst |= qpu_a_dst(dst);
206        inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);
207        inst |= QPU_MUX(src0.mux, QPU_ADD_A);
208        inst = set_src_raddr(inst, src0);
209        inst |= QPU_MUX(src1.mux, QPU_ADD_B);
210        inst = set_src_raddr(inst, src1);
211        inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
212
213        return inst;
214}
215
216uint64_t
217qpu_m_alu2(enum qpu_op_mul op,
218           struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1)
219{
220        uint64_t inst = 0;
221
222        inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
223        inst |= QPU_SET_FIELD(op, QPU_OP_MUL);
224        inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
225        inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
226        inst |= qpu_m_dst(dst);
227        inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);
228        inst |= QPU_MUX(src0.mux, QPU_MUL_A);
229        inst = set_src_raddr(inst, src0);
230        inst |= QPU_MUX(src1.mux, QPU_MUL_B);
231        inst = set_src_raddr(inst, src1);
232        inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);
233
234        return inst;
235}
236
237uint64_t
238qpu_m_rot(struct qpu_reg dst, struct qpu_reg src0, int rot)
239{
240	uint64_t inst = 0;
241	inst = qpu_m_alu2(QPU_M_V8MIN, dst, src0, src0);
242
243	inst = QPU_UPDATE_FIELD(inst, QPU_SIG_SMALL_IMM, QPU_SIG);
244	inst = QPU_UPDATE_FIELD(inst, QPU_SMALL_IMM_MUL_ROT + rot,
245                                QPU_SMALL_IMM);
246
247	return inst;
248}
249
250static bool
251merge_fields(uint64_t *merge,
252             uint64_t a, uint64_t b,
253             uint64_t mask, uint64_t ignore)
254{
255        if ((a & mask) == ignore) {
256                *merge = (*merge & ~mask) | (b & mask);
257        } else if ((b & mask) == ignore) {
258                *merge = (*merge & ~mask) | (a & mask);
259        } else {
260                if ((a & mask) != (b & mask))
261                        return false;
262        }
263
264        return true;
265}
266
267int
268qpu_num_sf_accesses(uint64_t inst)
269{
270        int accesses = 0;
271        static const uint32_t specials[] = {
272                QPU_W_TLB_COLOR_MS,
273                QPU_W_TLB_COLOR_ALL,
274                QPU_W_TLB_Z,
275                QPU_W_TMU0_S,
276                QPU_W_TMU0_T,
277                QPU_W_TMU0_R,
278                QPU_W_TMU0_B,
279                QPU_W_TMU1_S,
280                QPU_W_TMU1_T,
281                QPU_W_TMU1_R,
282                QPU_W_TMU1_B,
283                QPU_W_SFU_RECIP,
284                QPU_W_SFU_RECIPSQRT,
285                QPU_W_SFU_EXP,
286                QPU_W_SFU_LOG,
287        };
288        uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
289        uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
290        uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
291        uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
292
293        for (int j = 0; j < ARRAY_SIZE(specials); j++) {
294                if (waddr_add == specials[j])
295                        accesses++;
296                if (waddr_mul == specials[j])
297                        accesses++;
298        }
299
300        if (raddr_a == QPU_R_MUTEX_ACQUIRE)
301                accesses++;
302        if (raddr_b == QPU_R_MUTEX_ACQUIRE &&
303            QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM)
304                accesses++;
305
306        /* XXX: semaphore, combined color read/write? */
307        switch (QPU_GET_FIELD(inst, QPU_SIG)) {
308        case QPU_SIG_COLOR_LOAD:
309        case QPU_SIG_COLOR_LOAD_END:
310        case QPU_SIG_LOAD_TMU0:
311        case QPU_SIG_LOAD_TMU1:
312                accesses++;
313        }
314
315        return accesses;
316}
317
318static bool
319qpu_waddr_ignores_ws(uint32_t waddr)
320{
321        switch(waddr) {
322        case QPU_W_ACC0:
323        case QPU_W_ACC1:
324        case QPU_W_ACC2:
325        case QPU_W_ACC3:
326        case QPU_W_NOP:
327        case QPU_W_TLB_Z:
328        case QPU_W_TLB_COLOR_MS:
329        case QPU_W_TLB_COLOR_ALL:
330        case QPU_W_TLB_ALPHA_MASK:
331        case QPU_W_VPM:
332        case QPU_W_SFU_RECIP:
333        case QPU_W_SFU_RECIPSQRT:
334        case QPU_W_SFU_EXP:
335        case QPU_W_SFU_LOG:
336        case QPU_W_TMU0_S:
337        case QPU_W_TMU0_T:
338        case QPU_W_TMU0_R:
339        case QPU_W_TMU0_B:
340        case QPU_W_TMU1_S:
341        case QPU_W_TMU1_T:
342        case QPU_W_TMU1_R:
343        case QPU_W_TMU1_B:
344                return true;
345        }
346
347        return false;
348}
349
350static void
351swap_ra_file_mux_helper(uint64_t *merge, uint64_t *a, uint32_t mux_shift)
352{
353        uint64_t mux_mask = (uint64_t)0x7 << mux_shift;
354        uint64_t mux_a_val = (uint64_t)QPU_MUX_A << mux_shift;
355        uint64_t mux_b_val = (uint64_t)QPU_MUX_B << mux_shift;
356
357        if ((*a & mux_mask) == mux_a_val) {
358                *a = (*a & ~mux_mask) | mux_b_val;
359                *merge = (*merge & ~mux_mask) | mux_b_val;
360        }
361}
362
363static bool
364try_swap_ra_file(uint64_t *merge, uint64_t *a, uint64_t *b)
365{
366        uint32_t raddr_a_a = QPU_GET_FIELD(*a, QPU_RADDR_A);
367        uint32_t raddr_a_b = QPU_GET_FIELD(*a, QPU_RADDR_B);
368        uint32_t raddr_b_a = QPU_GET_FIELD(*b, QPU_RADDR_A);
369        uint32_t raddr_b_b = QPU_GET_FIELD(*b, QPU_RADDR_B);
370
371        if (raddr_a_b != QPU_R_NOP)
372                return false;
373
374        switch (raddr_a_a) {
375        case QPU_R_UNIF:
376        case QPU_R_VARY:
377                break;
378        default:
379                return false;
380        }
381
382        if (!(*merge & QPU_PM) &&
383            QPU_GET_FIELD(*merge, QPU_UNPACK) != QPU_UNPACK_NOP) {
384                return false;
385        }
386
387        if (raddr_b_b != QPU_R_NOP &&
388            raddr_b_b != raddr_a_a)
389                return false;
390
391        /* Move raddr A to B in instruction a. */
392        *a = (*a & ~QPU_RADDR_A_MASK) | QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
393        *a = (*a & ~QPU_RADDR_B_MASK) | QPU_SET_FIELD(raddr_a_a, QPU_RADDR_B);
394        *merge = QPU_UPDATE_FIELD(*merge, raddr_b_a, QPU_RADDR_A);
395        *merge = QPU_UPDATE_FIELD(*merge, raddr_a_a, QPU_RADDR_B);
396        swap_ra_file_mux_helper(merge, a, QPU_ADD_A_SHIFT);
397        swap_ra_file_mux_helper(merge, a, QPU_ADD_B_SHIFT);
398        swap_ra_file_mux_helper(merge, a, QPU_MUL_A_SHIFT);
399        swap_ra_file_mux_helper(merge, a, QPU_MUL_B_SHIFT);
400
401        return true;
402}
403
404static bool
405convert_mov(uint64_t *inst)
406{
407        uint32_t add_a = QPU_GET_FIELD(*inst, QPU_ADD_A);
408        uint32_t waddr_add = QPU_GET_FIELD(*inst, QPU_WADDR_ADD);
409        uint32_t cond_add = QPU_GET_FIELD(*inst, QPU_COND_ADD);
410
411        /* Is it a MOV? */
412        if (QPU_GET_FIELD(*inst, QPU_OP_ADD) != QPU_A_OR ||
413            (add_a != QPU_GET_FIELD(*inst, QPU_ADD_B))) {
414                return false;
415        }
416
417        if (QPU_GET_FIELD(*inst, QPU_SIG) != QPU_SIG_NONE)
418                return false;
419
420        /* We could maybe support this in the .8888 and .8a-.8d cases. */
421        if (*inst & QPU_PM)
422                return false;
423
424        *inst = QPU_UPDATE_FIELD(*inst, QPU_A_NOP, QPU_OP_ADD);
425        *inst = QPU_UPDATE_FIELD(*inst, QPU_M_V8MIN, QPU_OP_MUL);
426
427        *inst = QPU_UPDATE_FIELD(*inst, add_a, QPU_MUL_A);
428        *inst = QPU_UPDATE_FIELD(*inst, add_a, QPU_MUL_B);
429        *inst = QPU_UPDATE_FIELD(*inst, QPU_MUX_R0, QPU_ADD_A);
430        *inst = QPU_UPDATE_FIELD(*inst, QPU_MUX_R0, QPU_ADD_B);
431
432        *inst = QPU_UPDATE_FIELD(*inst, waddr_add, QPU_WADDR_MUL);
433        *inst = QPU_UPDATE_FIELD(*inst, QPU_W_NOP, QPU_WADDR_ADD);
434
435        *inst = QPU_UPDATE_FIELD(*inst, cond_add, QPU_COND_MUL);
436        *inst = QPU_UPDATE_FIELD(*inst, QPU_COND_NEVER, QPU_COND_ADD);
437
438        if (!qpu_waddr_ignores_ws(waddr_add))
439                *inst ^= QPU_WS;
440
441        return true;
442}
443
444static bool
445writes_a_file(uint64_t inst)
446{
447        if (!(inst & QPU_WS))
448                return QPU_GET_FIELD(inst, QPU_WADDR_ADD) < 32;
449        else
450                return QPU_GET_FIELD(inst, QPU_WADDR_MUL) < 32;
451}
452
453static bool
454reads_r4(uint64_t inst)
455{
456        return (QPU_GET_FIELD(inst, QPU_ADD_A) == QPU_MUX_R4 ||
457                QPU_GET_FIELD(inst, QPU_ADD_B) == QPU_MUX_R4 ||
458                QPU_GET_FIELD(inst, QPU_MUL_A) == QPU_MUX_R4 ||
459                QPU_GET_FIELD(inst, QPU_MUL_B) == QPU_MUX_R4);
460}
461
462uint64_t
463qpu_merge_inst(uint64_t a, uint64_t b)
464{
465        uint64_t merge = a | b;
466        bool ok = true;
467        uint32_t a_sig = QPU_GET_FIELD(a, QPU_SIG);
468        uint32_t b_sig = QPU_GET_FIELD(b, QPU_SIG);
469
470        if (QPU_GET_FIELD(a, QPU_OP_ADD) != QPU_A_NOP &&
471            QPU_GET_FIELD(b, QPU_OP_ADD) != QPU_A_NOP) {
472                if (QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP ||
473                    QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP ||
474                    !(convert_mov(&a) || convert_mov(&b))) {
475                        return 0;
476                } else {
477                        merge = a | b;
478                }
479        }
480
481        if (QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP &&
482            QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
483                return 0;
484
485        if (qpu_num_sf_accesses(a) && qpu_num_sf_accesses(b))
486                return 0;
487
488        if (a_sig == QPU_SIG_LOAD_IMM ||
489            b_sig == QPU_SIG_LOAD_IMM ||
490            a_sig == QPU_SIG_SMALL_IMM ||
491            b_sig == QPU_SIG_SMALL_IMM ||
492            a_sig == QPU_SIG_BRANCH ||
493            b_sig == QPU_SIG_BRANCH) {
494                return 0;
495        }
496
497        ok = ok && merge_fields(&merge, a, b, QPU_SIG_MASK,
498                                QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG));
499
500        /* Misc fields that have to match exactly. */
501        ok = ok && merge_fields(&merge, a, b, QPU_SF, ~0);
502
503        if (!merge_fields(&merge, a, b, QPU_RADDR_A_MASK,
504                          QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A))) {
505                /* Since we tend to use regfile A by default both for register
506                 * allocation and for our special values (uniforms and
507                 * varyings), try swapping uniforms and varyings to regfile B
508                 * to resolve raddr A conflicts.
509                 */
510                if (!try_swap_ra_file(&merge, &a, &b) &&
511                    !try_swap_ra_file(&merge, &b, &a)) {
512                        return 0;
513                }
514        }
515
516        ok = ok && merge_fields(&merge, a, b, QPU_RADDR_B_MASK,
517                                QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B));
518
519        ok = ok && merge_fields(&merge, a, b, QPU_WADDR_ADD_MASK,
520                                QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD));
521        ok = ok && merge_fields(&merge, a, b, QPU_WADDR_MUL_MASK,
522                                QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL));
523
524        /* Allow disagreement on WS (swapping A vs B physical reg file as the
525         * destination for ADD/MUL) if one of the original instructions
526         * ignores it (probably because it's just writing to accumulators).
527         */
528        if (qpu_waddr_ignores_ws(QPU_GET_FIELD(a, QPU_WADDR_ADD)) &&
529            qpu_waddr_ignores_ws(QPU_GET_FIELD(a, QPU_WADDR_MUL))) {
530                merge = (merge & ~QPU_WS) | (b & QPU_WS);
531        } else if (qpu_waddr_ignores_ws(QPU_GET_FIELD(b, QPU_WADDR_ADD)) &&
532                   qpu_waddr_ignores_ws(QPU_GET_FIELD(b, QPU_WADDR_MUL))) {
533                merge = (merge & ~QPU_WS) | (a & QPU_WS);
534        } else {
535                if ((a & QPU_WS) != (b & QPU_WS))
536                        return 0;
537        }
538
539        if (!merge_fields(&merge, a, b, QPU_PM, ~0)) {
540                /* If one instruction has PM bit set and the other not, the
541                 * one without PM shouldn't do packing/unpacking, and we
542                 * have to make sure non-NOP packing/unpacking from PM
543                 * instruction aren't added to it.
544                 */
545                uint64_t temp;
546
547                /* Let a be the one with PM bit */
548                if (!(a & QPU_PM)) {
549                        temp = a;
550                        a = b;
551                        b = temp;
552                }
553
554                if ((b & (QPU_PACK_MASK | QPU_UNPACK_MASK)) != 0)
555                        return 0;
556
557                if ((a & QPU_PACK_MASK) != 0 &&
558                    QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
559                        return 0;
560
561                if ((a & QPU_UNPACK_MASK) != 0 && reads_r4(b))
562                        return 0;
563        } else {
564                /* packing: Make sure that non-NOP packs agree, then deal with
565                 * special-case failing of adding a non-NOP pack to something
566                 * with a NOP pack.
567                 */
568                if (!merge_fields(&merge, a, b, QPU_PACK_MASK, 0))
569                        return 0;
570                bool new_a_pack = (QPU_GET_FIELD(a, QPU_PACK) !=
571                                QPU_GET_FIELD(merge, QPU_PACK));
572                bool new_b_pack = (QPU_GET_FIELD(b, QPU_PACK) !=
573                                QPU_GET_FIELD(merge, QPU_PACK));
574                if (!(merge & QPU_PM)) {
575                        /* Make sure we're not going to be putting a new
576                         * a-file packing on either half.
577                         */
578                        if (new_a_pack && writes_a_file(a))
579                                return 0;
580
581                        if (new_b_pack && writes_a_file(b))
582                                return 0;
583                } else {
584                        /* Make sure we're not going to be putting new MUL
585                         * packing oneither half.
586                         */
587                        if (new_a_pack &&
588                            QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP)
589                                return 0;
590
591                        if (new_b_pack &&
592                            QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
593                                return 0;
594                }
595
596                /* unpacking: Make sure that non-NOP unpacks agree, then deal
597                 * with special-case failing of adding a non-NOP unpack to
598                 * something with a NOP unpack.
599                 */
600                if (!merge_fields(&merge, a, b, QPU_UNPACK_MASK, 0))
601                        return 0;
602                bool new_a_unpack = (QPU_GET_FIELD(a, QPU_UNPACK) !=
603                                QPU_GET_FIELD(merge, QPU_UNPACK));
604                bool new_b_unpack = (QPU_GET_FIELD(b, QPU_UNPACK) !=
605                                QPU_GET_FIELD(merge, QPU_UNPACK));
606                if (!(merge & QPU_PM)) {
607                        /* Make sure we're not going to be putting a new
608                         * a-file packing on either half.
609                         */
610                        if (new_a_unpack &&
611                            QPU_GET_FIELD(a, QPU_RADDR_A) != QPU_R_NOP)
612                                return 0;
613
614                        if (new_b_unpack &&
615                            QPU_GET_FIELD(b, QPU_RADDR_A) != QPU_R_NOP)
616                                return 0;
617                } else {
618                        /* Make sure we're not going to be putting new r4
619                         * unpack on either half.
620                         */
621                        if (new_a_unpack && reads_r4(a))
622                                return 0;
623
624                        if (new_b_unpack && reads_r4(b))
625                                return 0;
626                }
627        }
628
629        if (ok)
630                return merge;
631        else
632                return 0;
633}
634
635uint64_t
636qpu_set_sig(uint64_t inst, uint32_t sig)
637{
638        assert(QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_NONE);
639        return QPU_UPDATE_FIELD(inst, sig, QPU_SIG);
640}
641
642uint64_t
643qpu_set_cond_add(uint64_t inst, uint32_t cond)
644{
645        assert(QPU_GET_FIELD(inst, QPU_COND_ADD) == QPU_COND_ALWAYS);
646        return QPU_UPDATE_FIELD(inst, cond, QPU_COND_ADD);
647}
648
649uint64_t
650qpu_set_cond_mul(uint64_t inst, uint32_t cond)
651{
652        assert(QPU_GET_FIELD(inst, QPU_COND_MUL) == QPU_COND_ALWAYS);
653        return QPU_UPDATE_FIELD(inst, cond, QPU_COND_MUL);
654}
655
656bool
657qpu_waddr_is_tlb(uint32_t waddr)
658{
659        switch (waddr) {
660        case QPU_W_TLB_COLOR_ALL:
661        case QPU_W_TLB_COLOR_MS:
662        case QPU_W_TLB_Z:
663                return true;
664        default:
665                return false;
666        }
667}
668
669bool
670qpu_inst_is_tlb(uint64_t inst)
671{
672        uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
673
674        return (qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_ADD)) ||
675                qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_MUL)) ||
676                sig == QPU_SIG_COLOR_LOAD ||
677                sig == QPU_SIG_WAIT_FOR_SCOREBOARD);
678}
679
680/**
681 * Returns the small immediate value to be encoded in to the raddr b field if
682 * the argument can be represented as one, or ~0 otherwise.
683 */
684uint32_t
685qpu_encode_small_immediate(uint32_t i)
686{
687        if (i <= 15)
688                return i;
689        if ((int)i < 0 && (int)i >= -16)
690                return i + 32;
691
692        switch (i) {
693        case 0x3f800000:
694                return 32;
695        case 0x40000000:
696                return 33;
697        case 0x40800000:
698                return 34;
699        case 0x41000000:
700                return 35;
701        case 0x41800000:
702                return 36;
703        case 0x42000000:
704                return 37;
705        case 0x42800000:
706                return 38;
707        case 0x43000000:
708                return 39;
709        case 0x3b800000:
710                return 40;
711        case 0x3c000000:
712                return 41;
713        case 0x3c800000:
714                return 42;
715        case 0x3d000000:
716                return 43;
717        case 0x3d800000:
718                return 44;
719        case 0x3e000000:
720                return 45;
721        case 0x3e800000:
722                return 46;
723        case 0x3f000000:
724                return 47;
725        }
726
727        return ~0;
728}
729
730void
731qpu_serialize_one_inst(struct vc4_compile *c, uint64_t inst)
732{
733        if (c->qpu_inst_count >= c->qpu_inst_size) {
734                c->qpu_inst_size = MAX2(16, c->qpu_inst_size * 2);
735                c->qpu_insts = reralloc(c, c->qpu_insts,
736                                        uint64_t, c->qpu_inst_size);
737        }
738        c->qpu_insts[c->qpu_inst_count++] = inst;
739}
740