1/*
2 * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
22
23#ifndef RADEON_CODE_H
24#define RADEON_CODE_H
25
26#include <stdint.h>
27
28#define R300_PFS_MAX_ALU_INST     64
29#define R300_PFS_MAX_TEX_INST     32
30#define R300_PFS_MAX_TEX_INDIRECT 4
31#define R300_PFS_NUM_TEMP_REGS    32
32#define R300_PFS_NUM_CONST_REGS   32
33
34#define R400_PFS_MAX_ALU_INST     512
35#define R400_PFS_MAX_TEX_INST     512
36
37#define R500_PFS_MAX_INST         512
38#define R500_PFS_NUM_TEMP_REGS    128
39#define R500_PFS_NUM_CONST_REGS   256
40#define R500_PFS_MAX_BRANCH_DEPTH_FULL 32
41#define R500_PFS_MAX_BRANCH_DEPTH_PARTIAL 4
42
43/* The r500 maximum depth is not just for loops, but any combination of loops
44 * and subroutine jumps. */
45#define R500_PVS_MAX_LOOP_DEPTH 8
46
47#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
48
49enum {
50	/**
51	 * External constants are constants whose meaning is unknown to this
52	 * compiler. For example, a Mesa gl_program's constants are turned
53	 * into external constants.
54	 */
55	RC_CONSTANT_EXTERNAL = 0,
56
57	RC_CONSTANT_IMMEDIATE,
58
59	/**
60	 * Constant referring to state that is known by this compiler,
61	 * see RC_STATE_xxx, i.e. *not* arbitrary Mesa (or other) state.
62	 */
63	RC_CONSTANT_STATE
64};
65
66enum {
67	RC_STATE_SHADOW_AMBIENT = 0,
68
69	RC_STATE_R300_WINDOW_DIMENSION,
70	RC_STATE_R300_TEXRECT_FACTOR,
71	RC_STATE_R300_TEXSCALE_FACTOR,
72	RC_STATE_R300_VIEWPORT_SCALE,
73	RC_STATE_R300_VIEWPORT_OFFSET
74};
75
76struct rc_constant {
77	unsigned Type:2; /**< RC_CONSTANT_xxx */
78	unsigned Size:3;
79
80	union {
81		unsigned External;
82		float Immediate[4];
83		unsigned State[2];
84	} u;
85};
86
87struct rc_constant_list {
88	struct rc_constant * Constants;
89	unsigned Count;
90
91	unsigned _Reserved;
92};
93
94void rc_constants_init(struct rc_constant_list * c);
95void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src);
96void rc_constants_destroy(struct rc_constant_list * c);
97unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant);
98unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state1, unsigned state2);
99unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data);
100unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle);
101void rc_constants_print(struct rc_constant_list * c);
102
103/**
104 * Compare functions.
105 *
106 * \note By design, RC_COMPARE_FUNC_xxx + GL_NEVER gives you
107 * the correct GL compare function.
108 */
109typedef enum {
110	RC_COMPARE_FUNC_NEVER = 0,
111	RC_COMPARE_FUNC_LESS,
112	RC_COMPARE_FUNC_EQUAL,
113	RC_COMPARE_FUNC_LEQUAL,
114	RC_COMPARE_FUNC_GREATER,
115	RC_COMPARE_FUNC_NOTEQUAL,
116	RC_COMPARE_FUNC_GEQUAL,
117	RC_COMPARE_FUNC_ALWAYS
118} rc_compare_func;
119
120/**
121 * Coordinate wrapping modes.
122 *
123 * These are not quite the same as their GL counterparts yet.
124 */
125typedef enum {
126	RC_WRAP_NONE = 0,
127	RC_WRAP_REPEAT,
128	RC_WRAP_MIRRORED_REPEAT,
129	RC_WRAP_MIRRORED_CLAMP
130} rc_wrap_mode;
131
132/**
133 * Stores state that influences the compilation of a fragment program.
134 */
135struct r300_fragment_program_external_state {
136	struct {
137		/**
138		 * This field contains swizzle for some lowering passes
139		 * (shadow comparison, unorm->snorm conversion)
140		 */
141		unsigned texture_swizzle:12;
142
143		/**
144		 * If the sampler is used as a shadow sampler,
145		 * this field specifies the compare function.
146		 *
147		 * Otherwise, this field is \ref RC_COMPARE_FUNC_NEVER (aka 0).
148		 * \sa rc_compare_func
149		 */
150		unsigned texture_compare_func : 3;
151
152		/**
153		 * No matter what the sampler type is,
154		 * this field turns it into a shadow sampler.
155		 */
156		unsigned compare_mode_enabled : 1;
157
158		/**
159		 * If the sampler will receive non-normalized coords,
160		 * this field is set. The scaling factor is given by
161		 * RC_STATE_R300_TEXRECT_FACTOR.
162		 */
163		unsigned non_normalized_coords : 1;
164
165		/**
166		 * This field specifies wrapping modes for the sampler.
167		 *
168		 * If this field is \ref RC_WRAP_NONE (aka 0), no wrapping maths
169		 * will be performed on the coordinates.
170		 */
171		unsigned wrap_mode : 3;
172
173		/**
174		 * The coords are scaled after applying the wrap mode emulation
175		 * and right before texture fetch. The scaling factor is given by
176		 * RC_STATE_R300_TEXSCALE_FACTOR. */
177		unsigned clamp_and_scale_before_fetch : 1;
178
179		/**
180		 * Fetch RGTC1_SNORM or LATC1_SNORM as UNORM and convert UNORM -> SNORM
181		 * in the shader.
182		 */
183		unsigned convert_unorm_to_snorm:1;
184	} unit[16];
185};
186
187
188
189struct r300_fragment_program_node {
190	int tex_offset; /**< first tex instruction */
191	int tex_end; /**< last tex instruction, relative to tex_offset */
192	int alu_offset; /**< first ALU instruction */
193	int alu_end; /**< last ALU instruction, relative to alu_offset */
194	int flags;
195};
196
197/**
198 * Stores an R300 fragment program in its compiled-to-hardware form.
199 */
200struct r300_fragment_program_code {
201	struct {
202		unsigned int length; /**< total # of texture instructions used */
203		uint32_t inst[R400_PFS_MAX_TEX_INST];
204	} tex;
205
206	struct {
207		unsigned int length; /**< total # of ALU instructions used */
208		struct {
209			uint32_t rgb_inst;
210			uint32_t rgb_addr;
211			uint32_t alpha_inst;
212			uint32_t alpha_addr;
213			uint32_t r400_ext_addr;
214		} inst[R400_PFS_MAX_ALU_INST];
215	} alu;
216
217	uint32_t config; /* US_CONFIG */
218	uint32_t pixsize; /* US_PIXSIZE */
219	uint32_t code_offset; /* US_CODE_OFFSET */
220	uint32_t r400_code_offset_ext; /* US_CODE_EXT */
221	uint32_t code_addr[4]; /* US_CODE_ADDR */
222	/*US_CODE_BANK.R390_MODE: Enables 512 instructions and 64 temporaries
223	 * for r400 cards */
224	unsigned int r390_mode:1;
225};
226
227
228struct r500_fragment_program_code {
229	struct {
230		uint32_t inst0;
231		uint32_t inst1;
232		uint32_t inst2;
233		uint32_t inst3;
234		uint32_t inst4;
235		uint32_t inst5;
236	} inst[R500_PFS_MAX_INST];
237
238	int inst_end; /* Number of instructions - 1; also, last instruction to be executed */
239
240	int max_temp_idx;
241
242	uint32_t us_fc_ctrl;
243
244	uint32_t int_constants[32];
245	uint32_t int_constant_count;
246};
247
248struct rX00_fragment_program_code {
249	union {
250		struct r300_fragment_program_code r300;
251		struct r500_fragment_program_code r500;
252	} code;
253
254	unsigned writes_depth:1;
255
256	struct rc_constant_list constants;
257	unsigned *constants_remap_table;
258};
259
260
261#define R300_VS_MAX_ALU		256
262#define R300_VS_MAX_ALU_DWORDS  (R300_VS_MAX_ALU * 4)
263#define R500_VS_MAX_ALU	        1024
264#define R500_VS_MAX_ALU_DWORDS  (R500_VS_MAX_ALU * 4)
265#define R300_VS_MAX_TEMPS	32
266/* This is the max for all chipsets (r300-r500) */
267#define R300_VS_MAX_FC_OPS 16
268#define R300_VS_MAX_LOOP_DEPTH 1
269
270#define VSF_MAX_INPUTS 32
271#define VSF_MAX_OUTPUTS 32
272
273struct r300_vertex_program_code {
274	int length;
275	union {
276		uint32_t d[R500_VS_MAX_ALU_DWORDS];
277		float f[R500_VS_MAX_ALU_DWORDS];
278	} body;
279
280	int pos_end;
281	int num_temporaries;	/* Number of temp vars used by program */
282	int inputs[VSF_MAX_INPUTS];
283	int outputs[VSF_MAX_OUTPUTS];
284
285	struct rc_constant_list constants;
286	unsigned *constants_remap_table;
287
288	uint32_t InputsRead;
289	uint32_t OutputsWritten;
290
291	unsigned int num_fc_ops;
292	uint32_t fc_ops;
293	union {
294	        uint32_t r300[R300_VS_MAX_FC_OPS];
295		struct {
296			uint32_t lw;
297			uint32_t uw;
298		} r500[R300_VS_MAX_FC_OPS];
299	} fc_op_addrs;
300	int32_t fc_loop_index[R300_VS_MAX_FC_OPS];
301};
302
303#endif /* RADEON_CODE_H */
304
305