instr-a3xx.h revision 203f37540a698a812f0a66e2f3f1fff954af22ab
1/*
2 * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#ifndef INSTR_A3XX_H_
25#define INSTR_A3XX_H_
26
27#define PACKED __attribute__((__packed__))
28
29#include <stdint.h>
30#include <assert.h>
31
32typedef enum {
33	/* category 0: */
34	OPC_NOP = 0,
35	OPC_BR = 1,
36	OPC_JUMP = 2,
37	OPC_CALL = 3,
38	OPC_RET = 4,
39	OPC_KILL = 5,
40	OPC_END = 6,
41	OPC_EMIT = 7,
42	OPC_CUT = 8,
43	OPC_CHMASK = 9,
44	OPC_CHSH = 10,
45	OPC_FLOW_REV = 11,
46
47	/* category 1: */
48	/* no opc.. all category 1 are variants of mov */
49
50	/* category 2: */
51	OPC_ADD_F = 0,
52	OPC_MIN_F = 1,
53	OPC_MAX_F = 2,
54	OPC_MUL_F = 3,
55	OPC_SIGN_F = 4,
56	OPC_CMPS_F = 5,
57	OPC_ABSNEG_F = 6,
58	OPC_CMPV_F = 7,
59	/* 8 - invalid */
60	OPC_FLOOR_F = 9,
61	OPC_CEIL_F = 10,
62	OPC_RNDNE_F = 11,
63	OPC_RNDAZ_F = 12,
64	OPC_TRUNC_F = 13,
65	/* 14-15 - invalid */
66	OPC_ADD_U = 16,
67	OPC_ADD_S = 17,
68	OPC_SUB_U = 18,
69	OPC_SUB_S = 19,
70	OPC_CMPS_U = 20,
71	OPC_CMPS_S = 21,
72	OPC_MIN_U = 22,
73	OPC_MIN_S = 23,
74	OPC_MAX_U = 24,
75	OPC_MAX_S = 25,
76	OPC_ABSNEG_S = 26,
77	/* 27 - invalid */
78	OPC_AND_B = 28,
79	OPC_OR_B = 29,
80	OPC_NOT_B = 30,
81	OPC_XOR_B = 31,
82	/* 32 - invalid */
83	OPC_CMPV_U = 33,
84	OPC_CMPV_S = 34,
85	/* 35-47 - invalid */
86	OPC_MUL_U = 48,
87	OPC_MUL_S = 49,
88	OPC_MULL_U = 50,
89	OPC_BFREV_B = 51,
90	OPC_CLZ_S = 52,
91	OPC_CLZ_B = 53,
92	OPC_SHL_B = 54,
93	OPC_SHR_B = 55,
94	OPC_ASHR_B = 56,
95	OPC_BARY_F = 57,
96	OPC_MGEN_B = 58,
97	OPC_GETBIT_B = 59,
98	OPC_SETRM = 60,
99	OPC_CBITS_B = 61,
100	OPC_SHB = 62,
101	OPC_MSAD = 63,
102
103	/* category 3: */
104	OPC_MAD_U16 = 0,
105	OPC_MADSH_U16 = 1,
106	OPC_MAD_S16 = 2,
107	OPC_MADSH_M16 = 3,   /* should this be .s16? */
108	OPC_MAD_U24 = 4,
109	OPC_MAD_S24 = 5,
110	OPC_MAD_F16 = 6,
111	OPC_MAD_F32 = 7,
112	OPC_SEL_B16 = 8,
113	OPC_SEL_B32 = 9,
114	OPC_SEL_S16 = 10,
115	OPC_SEL_S32 = 11,
116	OPC_SEL_F16 = 12,
117	OPC_SEL_F32 = 13,
118	OPC_SAD_S16 = 14,
119	OPC_SAD_S32 = 15,
120
121	/* category 4: */
122	OPC_RCP = 0,
123	OPC_RSQ = 1,
124	OPC_LOG2 = 2,
125	OPC_EXP2 = 3,
126	OPC_SIN = 4,
127	OPC_COS = 5,
128	OPC_SQRT = 6,
129	// 7-63 - invalid
130
131	/* category 5: */
132	OPC_ISAM = 0,
133	OPC_ISAML = 1,
134	OPC_ISAMM = 2,
135	OPC_SAM = 3,
136	OPC_SAMB = 4,
137	OPC_SAML = 5,
138	OPC_SAMGQ = 6,
139	OPC_GETLOD = 7,
140	OPC_CONV = 8,
141	OPC_CONVM = 9,
142	OPC_GETSIZE = 10,
143	OPC_GETBUF = 11,
144	OPC_GETPOS = 12,
145	OPC_GETINFO = 13,
146	OPC_DSX = 14,
147	OPC_DSY = 15,
148	OPC_GATHER4R = 16,
149	OPC_GATHER4G = 17,
150	OPC_GATHER4B = 18,
151	OPC_GATHER4A = 19,
152	OPC_SAMGP0 = 20,
153	OPC_SAMGP1 = 21,
154	OPC_SAMGP2 = 22,
155	OPC_SAMGP3 = 23,
156	OPC_DSXPP_1 = 24,
157	OPC_DSYPP_1 = 25,
158	OPC_RGETPOS = 26,
159	OPC_RGETINFO = 27,
160
161	/* category 6: */
162	OPC_LDG = 0,        /* load-global */
163	OPC_LDL = 1,
164	OPC_LDP = 2,
165	OPC_STG = 3,        /* store-global */
166	OPC_STL = 4,
167	OPC_STP = 5,
168	OPC_STI = 6,
169	OPC_G2L = 7,
170	OPC_L2G = 8,
171	OPC_PREFETCH = 9,
172	OPC_LDLW = 10,
173	OPC_STLW = 11,
174	OPC_RESFMT = 14,
175	OPC_RESINFO = 15,
176	OPC_ATOMIC_ADD_L = 16,
177	OPC_ATOMIC_SUB_L = 17,
178	OPC_ATOMIC_XCHG_L = 18,
179	OPC_ATOMIC_INC_L = 19,
180	OPC_ATOMIC_DEC_L = 20,
181	OPC_ATOMIC_CMPXCHG_L = 21,
182	OPC_ATOMIC_MIN_L = 22,
183	OPC_ATOMIC_MAX_L = 23,
184	OPC_ATOMIC_AND_L = 24,
185	OPC_ATOMIC_OR_L = 25,
186	OPC_ATOMIC_XOR_L = 26,
187	OPC_LDGB_TYPED_4D = 27,
188	OPC_STGB_4D_4 = 28,
189	OPC_STIB = 29,
190	OPC_LDC_4 = 30,
191	OPC_LDLV = 31,
192
193	/* meta instructions (category -1): */
194	/* placeholder instr to mark inputs/outputs: */
195	OPC_META_INPUT = 0,
196	OPC_META_OUTPUT = 1,
197	/* The "fan-in" and "fan-out" instructions are used for keeping
198	 * track of instructions that write to multiple dst registers
199	 * (fan-out) like texture sample instructions, or read multiple
200	 * consecutive scalar registers (fan-in) (bary.f, texture samp)
201	 */
202	OPC_META_FO = 2,
203	OPC_META_FI = 3,
204	/* branches/flow control */
205	OPC_META_FLOW = 4,
206	OPC_META_PHI = 5,
207
208} opc_t;
209
210typedef enum {
211	TYPE_F16 = 0,
212	TYPE_F32 = 1,
213	TYPE_U16 = 2,
214	TYPE_U32 = 3,
215	TYPE_S16 = 4,
216	TYPE_S32 = 5,
217	TYPE_U8  = 6,
218	TYPE_S8  = 7,  // XXX I assume?
219} type_t;
220
221static inline uint32_t type_size(type_t type)
222{
223	switch (type) {
224	case TYPE_F32:
225	case TYPE_U32:
226	case TYPE_S32:
227		return 32;
228	case TYPE_F16:
229	case TYPE_U16:
230	case TYPE_S16:
231		return 16;
232	case TYPE_U8:
233	case TYPE_S8:
234		return 8;
235	default:
236		assert(0); /* invalid type */
237		return 0;
238	}
239}
240
241static inline int type_float(type_t type)
242{
243	return (type == TYPE_F32) || (type == TYPE_F16);
244}
245
246static inline int type_uint(type_t type)
247{
248	return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8);
249}
250
251static inline int type_sint(type_t type)
252{
253	return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8);
254}
255
256typedef union PACKED {
257	/* normal gpr or const src register: */
258	struct PACKED {
259		uint32_t comp  : 2;
260		uint32_t num   : 10;
261	};
262	/* for immediate val: */
263	int32_t  iim_val   : 11;
264	/* to make compiler happy: */
265	uint32_t dummy32;
266	uint32_t dummy10   : 10;
267	uint32_t dummy11   : 11;
268	uint32_t dummy12   : 12;
269	uint32_t dummy13   : 13;
270	uint32_t dummy8    : 8;
271} reg_t;
272
273/* special registers: */
274#define REG_A0 61       /* address register */
275#define REG_P0 62       /* predicate register */
276
277static inline int reg_special(reg_t reg)
278{
279	return (reg.num == REG_A0) || (reg.num == REG_P0);
280}
281
282typedef struct PACKED {
283	/* dword0: */
284	int16_t  immed    : 16;
285	uint32_t dummy1   : 16;
286
287	/* dword1: */
288	uint32_t dummy2   : 8;
289	uint32_t repeat   : 3;
290	uint32_t dummy3   : 1;
291	uint32_t ss       : 1;
292	uint32_t dummy4   : 7;
293	uint32_t inv      : 1;
294	uint32_t comp     : 2;
295	uint32_t opc      : 4;
296	uint32_t jmp_tgt  : 1;
297	uint32_t sync     : 1;
298	uint32_t opc_cat  : 3;
299} instr_cat0_t;
300
301typedef struct PACKED {
302	/* dword0: */
303	union PACKED {
304		/* for normal src register: */
305		struct PACKED {
306			uint32_t src : 11;
307			/* at least low bit of pad must be zero or it will
308			 * look like a address relative src
309			 */
310			uint32_t pad : 21;
311		};
312		/* for address relative: */
313		struct PACKED {
314			int32_t  off : 10;
315			uint32_t src_rel_c : 1;
316			uint32_t src_rel : 1;
317			uint32_t unknown : 20;
318		};
319		/* for immediate: */
320		int32_t  iim_val;
321		uint32_t uim_val;
322		float    fim_val;
323	};
324
325	/* dword1: */
326	uint32_t dst        : 8;
327	uint32_t repeat     : 3;
328	uint32_t src_r      : 1;
329	uint32_t ss         : 1;
330	uint32_t ul         : 1;
331	uint32_t dst_type   : 3;
332	uint32_t dst_rel    : 1;
333	uint32_t src_type   : 3;
334	uint32_t src_c      : 1;
335	uint32_t src_im     : 1;
336	uint32_t even       : 1;
337	uint32_t pos_inf    : 1;
338	uint32_t must_be_0  : 2;
339	uint32_t jmp_tgt    : 1;
340	uint32_t sync       : 1;
341	uint32_t opc_cat    : 3;
342} instr_cat1_t;
343
344typedef struct PACKED {
345	/* dword0: */
346	union PACKED {
347		struct PACKED {
348			uint32_t src1         : 11;
349			uint32_t must_be_zero1: 2;
350			uint32_t src1_im      : 1;   /* immediate */
351			uint32_t src1_neg     : 1;   /* negate */
352			uint32_t src1_abs     : 1;   /* absolute value */
353		};
354		struct PACKED {
355			uint32_t src1         : 10;
356			uint32_t src1_c       : 1;   /* relative-const */
357			uint32_t src1_rel     : 1;   /* relative address */
358			uint32_t must_be_zero : 1;
359			uint32_t dummy        : 3;
360		} rel1;
361		struct PACKED {
362			uint32_t src1         : 12;
363			uint32_t src1_c       : 1;   /* const */
364			uint32_t dummy        : 3;
365		} c1;
366	};
367
368	union PACKED {
369		struct PACKED {
370			uint32_t src2         : 11;
371			uint32_t must_be_zero2: 2;
372			uint32_t src2_im      : 1;   /* immediate */
373			uint32_t src2_neg     : 1;   /* negate */
374			uint32_t src2_abs     : 1;   /* absolute value */
375		};
376		struct PACKED {
377			uint32_t src2         : 10;
378			uint32_t src2_c       : 1;   /* relative-const */
379			uint32_t src2_rel     : 1;   /* relative address */
380			uint32_t must_be_zero : 1;
381			uint32_t dummy        : 3;
382		} rel2;
383		struct PACKED {
384			uint32_t src2         : 12;
385			uint32_t src2_c       : 1;   /* const */
386			uint32_t dummy        : 3;
387		} c2;
388	};
389
390	/* dword1: */
391	uint32_t dst      : 8;
392	uint32_t repeat   : 3;
393	uint32_t src1_r   : 1;
394	uint32_t ss       : 1;
395	uint32_t ul       : 1;   /* dunno */
396	uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
397	uint32_t ei       : 1;
398	uint32_t cond     : 3;
399	uint32_t src2_r   : 1;
400	uint32_t full     : 1;   /* not half */
401	uint32_t opc      : 6;
402	uint32_t jmp_tgt  : 1;
403	uint32_t sync     : 1;
404	uint32_t opc_cat  : 3;
405} instr_cat2_t;
406
407typedef struct PACKED {
408	/* dword0: */
409	union PACKED {
410		struct PACKED {
411			uint32_t src1         : 11;
412			uint32_t must_be_zero1: 2;
413			uint32_t src2_c       : 1;
414			uint32_t src1_neg     : 1;
415			uint32_t src2_r       : 1;
416		};
417		struct PACKED {
418			uint32_t src1         : 10;
419			uint32_t src1_c       : 1;
420			uint32_t src1_rel     : 1;
421			uint32_t must_be_zero : 1;
422			uint32_t dummy        : 3;
423		} rel1;
424		struct PACKED {
425			uint32_t src1         : 12;
426			uint32_t src1_c       : 1;
427			uint32_t dummy        : 3;
428		} c1;
429	};
430
431	union PACKED {
432		struct PACKED {
433			uint32_t src3         : 11;
434			uint32_t must_be_zero2: 2;
435			uint32_t src3_r       : 1;
436			uint32_t src2_neg     : 1;
437			uint32_t src3_neg     : 1;
438		};
439		struct PACKED {
440			uint32_t src3         : 10;
441			uint32_t src3_c       : 1;
442			uint32_t src3_rel     : 1;
443			uint32_t must_be_zero : 1;
444			uint32_t dummy        : 3;
445		} rel2;
446		struct PACKED {
447			uint32_t src3         : 12;
448			uint32_t src3_c       : 1;
449			uint32_t dummy        : 3;
450		} c2;
451	};
452
453	/* dword1: */
454	uint32_t dst      : 8;
455	uint32_t repeat   : 3;
456	uint32_t src1_r   : 1;
457	uint32_t ss       : 1;
458	uint32_t ul       : 1;
459	uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
460	uint32_t src2     : 8;
461	uint32_t opc      : 4;
462	uint32_t jmp_tgt  : 1;
463	uint32_t sync     : 1;
464	uint32_t opc_cat  : 3;
465} instr_cat3_t;
466
467static inline bool instr_cat3_full(instr_cat3_t *cat3)
468{
469	switch (cat3->opc) {
470	case OPC_MAD_F16:
471	case OPC_MAD_U16:
472	case OPC_MAD_S16:
473	case OPC_SEL_B16:
474	case OPC_SEL_S16:
475	case OPC_SEL_F16:
476	case OPC_SAD_S16:
477	case OPC_SAD_S32:  // really??
478		return false;
479	default:
480		return true;
481	}
482}
483
484typedef struct PACKED {
485	/* dword0: */
486	union PACKED {
487		struct PACKED {
488			uint32_t src          : 11;
489			uint32_t must_be_zero1: 2;
490			uint32_t src_im       : 1;   /* immediate */
491			uint32_t src_neg      : 1;   /* negate */
492			uint32_t src_abs      : 1;   /* absolute value */
493		};
494		struct PACKED {
495			uint32_t src          : 10;
496			uint32_t src_c        : 1;   /* relative-const */
497			uint32_t src_rel      : 1;   /* relative address */
498			uint32_t must_be_zero : 1;
499			uint32_t dummy        : 3;
500		} rel;
501		struct PACKED {
502			uint32_t src          : 12;
503			uint32_t src_c        : 1;   /* const */
504			uint32_t dummy        : 3;
505		} c;
506	};
507	uint32_t dummy1   : 16;  /* seem to be ignored */
508
509	/* dword1: */
510	uint32_t dst      : 8;
511	uint32_t repeat   : 3;
512	uint32_t src_r    : 1;
513	uint32_t ss       : 1;
514	uint32_t ul       : 1;
515	uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
516	uint32_t dummy2   : 5;   /* seem to be ignored */
517	uint32_t full     : 1;   /* not half */
518	uint32_t opc      : 6;
519	uint32_t jmp_tgt  : 1;
520	uint32_t sync     : 1;
521	uint32_t opc_cat  : 3;
522} instr_cat4_t;
523
524typedef struct PACKED {
525	/* dword0: */
526	union PACKED {
527		/* normal case: */
528		struct PACKED {
529			uint32_t full     : 1;   /* not half */
530			uint32_t src1     : 8;
531			uint32_t src2     : 8;
532			uint32_t dummy1   : 4;   /* seem to be ignored */
533			uint32_t samp     : 4;
534			uint32_t tex      : 7;
535		} norm;
536		/* s2en case: */
537		struct PACKED {
538			uint32_t full     : 1;   /* not half */
539			uint32_t src1     : 8;
540			uint32_t src2     : 11;
541			uint32_t dummy1   : 1;
542			uint32_t src3     : 8;
543			uint32_t dummy2   : 3;
544		} s2en;
545		/* same in either case: */
546		// XXX I think, confirm this
547		struct PACKED {
548			uint32_t full     : 1;   /* not half */
549			uint32_t src1     : 8;
550			uint32_t pad      : 23;
551		};
552	};
553
554	/* dword1: */
555	uint32_t dst      : 8;
556	uint32_t wrmask   : 4;   /* write-mask */
557	uint32_t type     : 3;
558	uint32_t dummy2   : 1;   /* seems to be ignored */
559	uint32_t is_3d    : 1;
560
561	uint32_t is_a     : 1;
562	uint32_t is_s     : 1;
563	uint32_t is_s2en  : 1;
564	uint32_t is_o     : 1;
565	uint32_t is_p     : 1;
566
567	uint32_t opc      : 5;
568	uint32_t jmp_tgt  : 1;
569	uint32_t sync     : 1;
570	uint32_t opc_cat  : 3;
571} instr_cat5_t;
572
573/* [src1 + off], src2: */
574typedef struct PACKED {
575	/* dword0: */
576	uint32_t mustbe1  : 1;
577	int32_t  off      : 13;
578	uint32_t src1     : 8;
579	uint32_t src1_im  : 1;
580	uint32_t src2_im  : 1;
581	uint32_t src2     : 8;
582
583	/* dword1: */
584	uint32_t dst      : 8;
585	uint32_t dummy2   : 9;
586	uint32_t type     : 3;
587	uint32_t dummy3   : 2;
588	uint32_t opc      : 5;
589	uint32_t jmp_tgt  : 1;
590	uint32_t sync     : 1;
591	uint32_t opc_cat  : 3;
592} instr_cat6a_t;
593
594/* [src1], src2: */
595typedef struct PACKED {
596	/* dword0: */
597	uint32_t mustbe0  : 1;
598	uint32_t src1     : 8;
599	uint32_t ignore0  : 13;
600	uint32_t src1_im  : 1;
601	uint32_t src2_im  : 1;
602	uint32_t src2     : 8;
603
604	/* dword1: */
605	uint32_t dst      : 8;
606	uint32_t dummy2   : 9;
607	uint32_t type     : 3;
608	uint32_t dummy3   : 2;
609	uint32_t opc      : 5;
610	uint32_t jmp_tgt  : 1;
611	uint32_t sync     : 1;
612	uint32_t opc_cat  : 3;
613} instr_cat6b_t;
614
615/* I think some of the other cat6 instructions use additional
616 * sub-encodings..
617 */
618
619typedef union PACKED {
620	instr_cat6a_t a;
621	instr_cat6b_t b;
622	struct PACKED {
623		/* dword0: */
624		uint32_t has_off  : 1;
625		uint32_t pad1     : 31;
626
627		/* dword1: */
628		uint32_t pad2     : 17;
629		uint32_t type     : 3;
630		uint32_t pad3     : 2;
631		uint32_t opc      : 5;
632		uint32_t jmp_tgt  : 1;
633		uint32_t sync     : 1;
634		uint32_t opc_cat  : 3;
635	};
636} instr_cat6_t;
637
638typedef union PACKED {
639	instr_cat0_t cat0;
640	instr_cat1_t cat1;
641	instr_cat2_t cat2;
642	instr_cat3_t cat3;
643	instr_cat4_t cat4;
644	instr_cat5_t cat5;
645	instr_cat6_t cat6;
646	struct PACKED {
647		/* dword0: */
648		uint64_t pad1     : 40;
649		uint32_t repeat   : 3;  /* cat0-cat4 */
650		uint32_t pad2     : 1;
651		uint32_t ss       : 1;  /* cat1-cat4 (cat0??) */
652		uint32_t ul       : 1;  /* cat2-cat4 (and cat1 in blob.. which may be bug??) */
653		uint32_t pad3     : 13;
654		uint32_t jmp_tgt  : 1;
655		uint32_t sync     : 1;
656		uint32_t opc_cat  : 3;
657
658	};
659} instr_t;
660
661static inline uint32_t instr_opc(instr_t *instr)
662{
663	switch (instr->opc_cat) {
664	case 0:  return instr->cat0.opc;
665	case 1:  return 0;
666	case 2:  return instr->cat2.opc;
667	case 3:  return instr->cat3.opc;
668	case 4:  return instr->cat4.opc;
669	case 5:  return instr->cat5.opc;
670	case 6:  return instr->cat6.opc;
671	default: return 0;
672	}
673}
674
675static inline bool is_mad(opc_t opc)
676{
677	switch (opc) {
678	case OPC_MAD_U16:
679	case OPC_MADSH_U16:
680	case OPC_MAD_S16:
681	case OPC_MADSH_M16:
682	case OPC_MAD_U24:
683	case OPC_MAD_S24:
684	case OPC_MAD_F16:
685	case OPC_MAD_F32:
686		return true;
687	default:
688		return false;
689	}
690}
691
692#endif /* INSTR_A3XX_H_ */
693