instr-a3xx.h revision 660d5c1646f5d63f9626b24beabc9cfc318849d4
1/*
2 * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#ifndef INSTR_A3XX_H_
25#define INSTR_A3XX_H_
26
27#define PACKED __attribute__((__packed__))
28
29#include <stdint.h>
30#include <assert.h>
31
32typedef enum {
33	/* category 0: */
34	OPC_NOP = 0,
35	OPC_BR = 1,
36	OPC_JUMP = 2,
37	OPC_CALL = 3,
38	OPC_RET = 4,
39	OPC_KILL = 5,
40	OPC_END = 6,
41	OPC_EMIT = 7,
42	OPC_CUT = 8,
43	OPC_CHMASK = 9,
44	OPC_CHSH = 10,
45	OPC_FLOW_REV = 11,
46
47	/* category 1: */
48	/* no opc.. all category 1 are variants of mov */
49
50	/* category 2: */
51	OPC_ADD_F = 0,
52	OPC_MIN_F = 1,
53	OPC_MAX_F = 2,
54	OPC_MUL_F = 3,
55	OPC_SIGN_F = 4,
56	OPC_CMPS_F = 5,
57	OPC_ABSNEG_F = 6,
58	OPC_CMPV_F = 7,
59	/* 8 - invalid */
60	OPC_FLOOR_F = 9,
61	OPC_CEIL_F = 10,
62	OPC_RNDNE_F = 11,
63	OPC_RNDAZ_F = 12,
64	OPC_TRUNC_F = 13,
65	/* 14-15 - invalid */
66	OPC_ADD_U = 16,
67	OPC_ADD_S = 17,
68	OPC_SUB_U = 18,
69	OPC_SUB_S = 19,
70	OPC_CMPS_U = 20,
71	OPC_CMPS_S = 21,
72	OPC_MIN_U = 22,
73	OPC_MIN_S = 23,
74	OPC_MAX_U = 24,
75	OPC_MAX_S = 25,
76	OPC_ABSNEG_S = 26,
77	/* 27 - invalid */
78	OPC_AND_B = 28,
79	OPC_OR_B = 29,
80	OPC_NOT_B = 30,
81	OPC_XOR_B = 31,
82	/* 32 - invalid */
83	OPC_CMPV_U = 33,
84	OPC_CMPV_S = 34,
85	/* 35-47 - invalid */
86	OPC_MUL_U = 48,
87	OPC_MUL_S = 49,
88	OPC_MULL_U = 50,
89	OPC_BFREV_B = 51,
90	OPC_CLZ_S = 52,
91	OPC_CLZ_B = 53,
92	OPC_SHL_B = 54,
93	OPC_SHR_B = 55,
94	OPC_ASHR_B = 56,
95	OPC_BARY_F = 57,
96	OPC_MGEN_B = 58,
97	OPC_GETBIT_B = 59,
98	OPC_SETRM = 60,
99	OPC_CBITS_B = 61,
100	OPC_SHB = 62,
101	OPC_MSAD = 63,
102
103	/* category 3: */
104	OPC_MAD_U16 = 0,
105	OPC_MADSH_U16 = 1,
106	OPC_MAD_S16 = 2,
107	OPC_MADSH_M16 = 3,   /* should this be .s16? */
108	OPC_MAD_U24 = 4,
109	OPC_MAD_S24 = 5,
110	OPC_MAD_F16 = 6,
111	OPC_MAD_F32 = 7,
112	OPC_SEL_B16 = 8,
113	OPC_SEL_B32 = 9,
114	OPC_SEL_S16 = 10,
115	OPC_SEL_S32 = 11,
116	OPC_SEL_F16 = 12,
117	OPC_SEL_F32 = 13,
118	OPC_SAD_S16 = 14,
119	OPC_SAD_S32 = 15,
120
121	/* category 4: */
122	OPC_RCP = 0,
123	OPC_RSQ = 1,
124	OPC_LOG2 = 2,
125	OPC_EXP2 = 3,
126	OPC_SIN = 4,
127	OPC_COS = 5,
128	OPC_SQRT = 6,
129	// 7-63 - invalid
130
131	/* category 5: */
132	OPC_ISAM = 0,
133	OPC_ISAML = 1,
134	OPC_ISAMM = 2,
135	OPC_SAM = 3,
136	OPC_SAMB = 4,
137	OPC_SAML = 5,
138	OPC_SAMGQ = 6,
139	OPC_GETLOD = 7,
140	OPC_CONV = 8,
141	OPC_CONVM = 9,
142	OPC_GETSIZE = 10,
143	OPC_GETBUF = 11,
144	OPC_GETPOS = 12,
145	OPC_GETINFO = 13,
146	OPC_DSX = 14,
147	OPC_DSY = 15,
148	OPC_GATHER4R = 16,
149	OPC_GATHER4G = 17,
150	OPC_GATHER4B = 18,
151	OPC_GATHER4A = 19,
152	OPC_SAMGP0 = 20,
153	OPC_SAMGP1 = 21,
154	OPC_SAMGP2 = 22,
155	OPC_SAMGP3 = 23,
156	OPC_DSXPP_1 = 24,
157	OPC_DSYPP_1 = 25,
158	OPC_RGETPOS = 26,
159	OPC_RGETINFO = 27,
160
161	/* category 6: */
162	OPC_LDG = 0,        /* load-global */
163	OPC_LDL = 1,
164	OPC_LDP = 2,
165	OPC_STG = 3,        /* store-global */
166	OPC_STL = 4,
167	OPC_STP = 5,
168	OPC_STI = 6,
169	OPC_G2L = 7,
170	OPC_L2G = 8,
171	OPC_PREFETCH = 9,
172	OPC_LDLW = 10,
173	OPC_STLW = 11,
174	OPC_RESFMT = 14,
175	OPC_RESINFO = 15,
176	OPC_ATOMIC_ADD_L = 16,
177	OPC_ATOMIC_SUB_L = 17,
178	OPC_ATOMIC_XCHG_L = 18,
179	OPC_ATOMIC_INC_L = 19,
180	OPC_ATOMIC_DEC_L = 20,
181	OPC_ATOMIC_CMPXCHG_L = 21,
182	OPC_ATOMIC_MIN_L = 22,
183	OPC_ATOMIC_MAX_L = 23,
184	OPC_ATOMIC_AND_L = 24,
185	OPC_ATOMIC_OR_L = 25,
186	OPC_ATOMIC_XOR_L = 26,
187	OPC_LDGB_TYPED_4D = 27,
188	OPC_STGB_4D_4 = 28,
189	OPC_STIB = 29,
190	OPC_LDC_4 = 30,
191	OPC_LDLV = 31,
192
193	/* meta instructions (category -1): */
194	/* placeholder instr to mark shader inputs: */
195	OPC_META_INPUT = 0,
196	OPC_META_PHI = 1,
197	/* The "fan-in" and "fan-out" instructions are used for keeping
198	 * track of instructions that write to multiple dst registers
199	 * (fan-out) like texture sample instructions, or read multiple
200	 * consecutive scalar registers (fan-in) (bary.f, texture samp)
201	 */
202	OPC_META_FO = 2,
203	OPC_META_FI = 3,
204
205} opc_t;
206
207typedef enum {
208	TYPE_F16 = 0,
209	TYPE_F32 = 1,
210	TYPE_U16 = 2,
211	TYPE_U32 = 3,
212	TYPE_S16 = 4,
213	TYPE_S32 = 5,
214	TYPE_U8  = 6,
215	TYPE_S8  = 7,  // XXX I assume?
216} type_t;
217
218static inline uint32_t type_size(type_t type)
219{
220	switch (type) {
221	case TYPE_F32:
222	case TYPE_U32:
223	case TYPE_S32:
224		return 32;
225	case TYPE_F16:
226	case TYPE_U16:
227	case TYPE_S16:
228		return 16;
229	case TYPE_U8:
230	case TYPE_S8:
231		return 8;
232	default:
233		assert(0); /* invalid type */
234		return 0;
235	}
236}
237
238static inline int type_float(type_t type)
239{
240	return (type == TYPE_F32) || (type == TYPE_F16);
241}
242
243static inline int type_uint(type_t type)
244{
245	return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8);
246}
247
248static inline int type_sint(type_t type)
249{
250	return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8);
251}
252
253typedef union PACKED {
254	/* normal gpr or const src register: */
255	struct PACKED {
256		uint32_t comp  : 2;
257		uint32_t num   : 10;
258	};
259	/* for immediate val: */
260	int32_t  iim_val   : 11;
261	/* to make compiler happy: */
262	uint32_t dummy32;
263	uint32_t dummy10   : 10;
264	uint32_t dummy11   : 11;
265	uint32_t dummy12   : 12;
266	uint32_t dummy13   : 13;
267	uint32_t dummy8    : 8;
268} reg_t;
269
270/* special registers: */
271#define REG_A0 61       /* address register */
272#define REG_P0 62       /* predicate register */
273
274static inline int reg_special(reg_t reg)
275{
276	return (reg.num == REG_A0) || (reg.num == REG_P0);
277}
278
279typedef struct PACKED {
280	/* dword0: */
281	union PACKED {
282		struct PACKED {
283			int16_t  immed    : 16;
284			uint32_t dummy1   : 16;
285		} a3xx;
286		struct PACKED {
287			int32_t  immed    : 20;
288			uint32_t dummy1   : 12;
289		} a4xx;
290	};
291
292	/* dword1: */
293	uint32_t dummy2   : 8;
294	uint32_t repeat   : 3;
295	uint32_t dummy3   : 1;
296	uint32_t ss       : 1;
297	uint32_t dummy4   : 7;
298	uint32_t inv      : 1;
299	uint32_t comp     : 2;
300	uint32_t opc      : 4;
301	uint32_t jmp_tgt  : 1;
302	uint32_t sync     : 1;
303	uint32_t opc_cat  : 3;
304} instr_cat0_t;
305
306typedef struct PACKED {
307	/* dword0: */
308	union PACKED {
309		/* for normal src register: */
310		struct PACKED {
311			uint32_t src : 11;
312			/* at least low bit of pad must be zero or it will
313			 * look like a address relative src
314			 */
315			uint32_t pad : 21;
316		};
317		/* for address relative: */
318		struct PACKED {
319			int32_t  off : 10;
320			uint32_t src_rel_c : 1;
321			uint32_t src_rel : 1;
322			uint32_t unknown : 20;
323		};
324		/* for immediate: */
325		int32_t  iim_val;
326		uint32_t uim_val;
327		float    fim_val;
328	};
329
330	/* dword1: */
331	uint32_t dst        : 8;
332	uint32_t repeat     : 3;
333	uint32_t src_r      : 1;
334	uint32_t ss         : 1;
335	uint32_t ul         : 1;
336	uint32_t dst_type   : 3;
337	uint32_t dst_rel    : 1;
338	uint32_t src_type   : 3;
339	uint32_t src_c      : 1;
340	uint32_t src_im     : 1;
341	uint32_t even       : 1;
342	uint32_t pos_inf    : 1;
343	uint32_t must_be_0  : 2;
344	uint32_t jmp_tgt    : 1;
345	uint32_t sync       : 1;
346	uint32_t opc_cat    : 3;
347} instr_cat1_t;
348
349typedef struct PACKED {
350	/* dword0: */
351	union PACKED {
352		struct PACKED {
353			uint32_t src1         : 11;
354			uint32_t must_be_zero1: 2;
355			uint32_t src1_im      : 1;   /* immediate */
356			uint32_t src1_neg     : 1;   /* negate */
357			uint32_t src1_abs     : 1;   /* absolute value */
358		};
359		struct PACKED {
360			uint32_t src1         : 10;
361			uint32_t src1_c       : 1;   /* relative-const */
362			uint32_t src1_rel     : 1;   /* relative address */
363			uint32_t must_be_zero : 1;
364			uint32_t dummy        : 3;
365		} rel1;
366		struct PACKED {
367			uint32_t src1         : 12;
368			uint32_t src1_c       : 1;   /* const */
369			uint32_t dummy        : 3;
370		} c1;
371	};
372
373	union PACKED {
374		struct PACKED {
375			uint32_t src2         : 11;
376			uint32_t must_be_zero2: 2;
377			uint32_t src2_im      : 1;   /* immediate */
378			uint32_t src2_neg     : 1;   /* negate */
379			uint32_t src2_abs     : 1;   /* absolute value */
380		};
381		struct PACKED {
382			uint32_t src2         : 10;
383			uint32_t src2_c       : 1;   /* relative-const */
384			uint32_t src2_rel     : 1;   /* relative address */
385			uint32_t must_be_zero : 1;
386			uint32_t dummy        : 3;
387		} rel2;
388		struct PACKED {
389			uint32_t src2         : 12;
390			uint32_t src2_c       : 1;   /* const */
391			uint32_t dummy        : 3;
392		} c2;
393	};
394
395	/* dword1: */
396	uint32_t dst      : 8;
397	uint32_t repeat   : 3;
398	uint32_t src1_r   : 1;
399	uint32_t ss       : 1;
400	uint32_t ul       : 1;   /* dunno */
401	uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
402	uint32_t ei       : 1;
403	uint32_t cond     : 3;
404	uint32_t src2_r   : 1;
405	uint32_t full     : 1;   /* not half */
406	uint32_t opc      : 6;
407	uint32_t jmp_tgt  : 1;
408	uint32_t sync     : 1;
409	uint32_t opc_cat  : 3;
410} instr_cat2_t;
411
412typedef struct PACKED {
413	/* dword0: */
414	union PACKED {
415		struct PACKED {
416			uint32_t src1         : 11;
417			uint32_t must_be_zero1: 2;
418			uint32_t src2_c       : 1;
419			uint32_t src1_neg     : 1;
420			uint32_t src2_r       : 1;
421		};
422		struct PACKED {
423			uint32_t src1         : 10;
424			uint32_t src1_c       : 1;
425			uint32_t src1_rel     : 1;
426			uint32_t must_be_zero : 1;
427			uint32_t dummy        : 3;
428		} rel1;
429		struct PACKED {
430			uint32_t src1         : 12;
431			uint32_t src1_c       : 1;
432			uint32_t dummy        : 3;
433		} c1;
434	};
435
436	union PACKED {
437		struct PACKED {
438			uint32_t src3         : 11;
439			uint32_t must_be_zero2: 2;
440			uint32_t src3_r       : 1;
441			uint32_t src2_neg     : 1;
442			uint32_t src3_neg     : 1;
443		};
444		struct PACKED {
445			uint32_t src3         : 10;
446			uint32_t src3_c       : 1;
447			uint32_t src3_rel     : 1;
448			uint32_t must_be_zero : 1;
449			uint32_t dummy        : 3;
450		} rel2;
451		struct PACKED {
452			uint32_t src3         : 12;
453			uint32_t src3_c       : 1;
454			uint32_t dummy        : 3;
455		} c2;
456	};
457
458	/* dword1: */
459	uint32_t dst      : 8;
460	uint32_t repeat   : 3;
461	uint32_t src1_r   : 1;
462	uint32_t ss       : 1;
463	uint32_t ul       : 1;
464	uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
465	uint32_t src2     : 8;
466	uint32_t opc      : 4;
467	uint32_t jmp_tgt  : 1;
468	uint32_t sync     : 1;
469	uint32_t opc_cat  : 3;
470} instr_cat3_t;
471
472static inline bool instr_cat3_full(instr_cat3_t *cat3)
473{
474	switch (cat3->opc) {
475	case OPC_MAD_F16:
476	case OPC_MAD_U16:
477	case OPC_MAD_S16:
478	case OPC_SEL_B16:
479	case OPC_SEL_S16:
480	case OPC_SEL_F16:
481	case OPC_SAD_S16:
482	case OPC_SAD_S32:  // really??
483		return false;
484	default:
485		return true;
486	}
487}
488
489typedef struct PACKED {
490	/* dword0: */
491	union PACKED {
492		struct PACKED {
493			uint32_t src          : 11;
494			uint32_t must_be_zero1: 2;
495			uint32_t src_im       : 1;   /* immediate */
496			uint32_t src_neg      : 1;   /* negate */
497			uint32_t src_abs      : 1;   /* absolute value */
498		};
499		struct PACKED {
500			uint32_t src          : 10;
501			uint32_t src_c        : 1;   /* relative-const */
502			uint32_t src_rel      : 1;   /* relative address */
503			uint32_t must_be_zero : 1;
504			uint32_t dummy        : 3;
505		} rel;
506		struct PACKED {
507			uint32_t src          : 12;
508			uint32_t src_c        : 1;   /* const */
509			uint32_t dummy        : 3;
510		} c;
511	};
512	uint32_t dummy1   : 16;  /* seem to be ignored */
513
514	/* dword1: */
515	uint32_t dst      : 8;
516	uint32_t repeat   : 3;
517	uint32_t src_r    : 1;
518	uint32_t ss       : 1;
519	uint32_t ul       : 1;
520	uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
521	uint32_t dummy2   : 5;   /* seem to be ignored */
522	uint32_t full     : 1;   /* not half */
523	uint32_t opc      : 6;
524	uint32_t jmp_tgt  : 1;
525	uint32_t sync     : 1;
526	uint32_t opc_cat  : 3;
527} instr_cat4_t;
528
529typedef struct PACKED {
530	/* dword0: */
531	union PACKED {
532		/* normal case: */
533		struct PACKED {
534			uint32_t full     : 1;   /* not half */
535			uint32_t src1     : 8;
536			uint32_t src2     : 8;
537			uint32_t dummy1   : 4;   /* seem to be ignored */
538			uint32_t samp     : 4;
539			uint32_t tex      : 7;
540		} norm;
541		/* s2en case: */
542		struct PACKED {
543			uint32_t full     : 1;   /* not half */
544			uint32_t src1     : 8;
545			uint32_t src2     : 11;
546			uint32_t dummy1   : 1;
547			uint32_t src3     : 8;
548			uint32_t dummy2   : 3;
549		} s2en;
550		/* same in either case: */
551		// XXX I think, confirm this
552		struct PACKED {
553			uint32_t full     : 1;   /* not half */
554			uint32_t src1     : 8;
555			uint32_t pad      : 23;
556		};
557	};
558
559	/* dword1: */
560	uint32_t dst      : 8;
561	uint32_t wrmask   : 4;   /* write-mask */
562	uint32_t type     : 3;
563	uint32_t dummy2   : 1;   /* seems to be ignored */
564	uint32_t is_3d    : 1;
565
566	uint32_t is_a     : 1;
567	uint32_t is_s     : 1;
568	uint32_t is_s2en  : 1;
569	uint32_t is_o     : 1;
570	uint32_t is_p     : 1;
571
572	uint32_t opc      : 5;
573	uint32_t jmp_tgt  : 1;
574	uint32_t sync     : 1;
575	uint32_t opc_cat  : 3;
576} instr_cat5_t;
577
578/* [src1 + off], src2: */
579typedef struct PACKED {
580	/* dword0: */
581	uint32_t mustbe1  : 1;
582	int32_t  off      : 13;
583	uint32_t src1     : 8;
584	uint32_t src1_im  : 1;
585	uint32_t src2_im  : 1;
586	uint32_t src2     : 8;
587
588	/* dword1: */
589	uint32_t dst      : 8;
590	uint32_t dummy2   : 9;
591	uint32_t type     : 3;
592	uint32_t dummy3   : 2;
593	uint32_t opc      : 5;
594	uint32_t jmp_tgt  : 1;
595	uint32_t sync     : 1;
596	uint32_t opc_cat  : 3;
597} instr_cat6a_t;
598
599/* [src1], src2: */
600typedef struct PACKED {
601	/* dword0: */
602	uint32_t mustbe0  : 1;
603	uint32_t src1     : 8;
604	uint32_t ignore0  : 13;
605	uint32_t src1_im  : 1;
606	uint32_t src2_im  : 1;
607	uint32_t src2     : 8;
608
609	/* dword1: */
610	uint32_t dst      : 8;
611	uint32_t dummy2   : 9;
612	uint32_t type     : 3;
613	uint32_t dummy3   : 2;
614	uint32_t opc      : 5;
615	uint32_t jmp_tgt  : 1;
616	uint32_t sync     : 1;
617	uint32_t opc_cat  : 3;
618} instr_cat6b_t;
619
620/* I think some of the other cat6 instructions use additional
621 * sub-encodings..
622 */
623
624typedef union PACKED {
625	instr_cat6a_t a;
626	instr_cat6b_t b;
627	struct PACKED {
628		/* dword0: */
629		uint32_t has_off  : 1;
630		uint32_t pad1     : 31;
631
632		/* dword1: */
633		uint32_t dst      : 8;
634		uint32_t dummy2   : 9;
635		uint32_t type     : 3;
636		uint32_t dummy3   : 2;
637		uint32_t opc      : 5;
638		uint32_t jmp_tgt  : 1;
639		uint32_t sync     : 1;
640		uint32_t opc_cat  : 3;
641	};
642} instr_cat6_t;
643
644typedef union PACKED {
645	instr_cat0_t cat0;
646	instr_cat1_t cat1;
647	instr_cat2_t cat2;
648	instr_cat3_t cat3;
649	instr_cat4_t cat4;
650	instr_cat5_t cat5;
651	instr_cat6_t cat6;
652	struct PACKED {
653		/* dword0: */
654		uint64_t pad1     : 40;
655		uint32_t repeat   : 3;  /* cat0-cat4 */
656		uint32_t pad2     : 1;
657		uint32_t ss       : 1;  /* cat1-cat4 (cat0??) */
658		uint32_t ul       : 1;  /* cat2-cat4 (and cat1 in blob.. which may be bug??) */
659		uint32_t pad3     : 13;
660		uint32_t jmp_tgt  : 1;
661		uint32_t sync     : 1;
662		uint32_t opc_cat  : 3;
663
664	};
665} instr_t;
666
667static inline uint32_t instr_opc(instr_t *instr)
668{
669	switch (instr->opc_cat) {
670	case 0:  return instr->cat0.opc;
671	case 1:  return 0;
672	case 2:  return instr->cat2.opc;
673	case 3:  return instr->cat3.opc;
674	case 4:  return instr->cat4.opc;
675	case 5:  return instr->cat5.opc;
676	case 6:  return instr->cat6.opc;
677	default: return 0;
678	}
679}
680
681static inline bool is_mad(opc_t opc)
682{
683	switch (opc) {
684	case OPC_MAD_U16:
685	case OPC_MAD_S16:
686	case OPC_MAD_U24:
687	case OPC_MAD_S24:
688	case OPC_MAD_F16:
689	case OPC_MAD_F32:
690		return true;
691	default:
692		return false;
693	}
694}
695
696static inline bool is_madsh(opc_t opc)
697{
698	switch (opc) {
699	case OPC_MADSH_U16:
700	case OPC_MADSH_M16:
701		return true;
702	default:
703		return false;
704	}
705}
706
707#endif /* INSTR_A3XX_H_ */
708