1/**************************************************************************
2
3Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
4                     VA Linux Systems Inc., Fremont, California.
5
6All Rights Reserved.
7
8Permission is hereby granted, free of charge, to any person obtaining
9a copy of this software and associated documentation files (the
10"Software"), to deal in the Software without restriction, including
11without limitation the rights to use, copy, modify, merge, publish,
12distribute, sublicense, and/or sell copies of the Software, and to
13permit persons to whom the Software is furnished to do so, subject to
14the following conditions:
15
16The above copyright notice and this permission notice (including the
17next paragraph) shall be included in all copies or substantial
18portions of the Software.
19
20THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
28**************************************************************************/
29
30/*
31 * Authors:
32 *   Kevin E. Martin <martin@valinux.com>
33 *   Gareth Hughes <gareth@valinux.com>
34 *   Keith Whitwell <keith@tungstengraphics.com>
35 */
36
37#include <sched.h>
38#include <errno.h>
39
40#include "main/attrib.h"
41#include "main/bufferobj.h"
42#include "swrast/swrast.h"
43
44#include "main/glheader.h"
45#include "main/imports.h"
46#include "main/simple_list.h"
47
48#include "radeon_context.h"
49#include "radeon_common.h"
50#include "radeon_ioctl.h"
51
52#define STANDALONE_MMIO
53
54#define RADEON_TIMEOUT             512
55#define RADEON_IDLE_RETRY           16
56
57
58/* =============================================================
59 * Kernel command buffer handling
60 */
61
62/* The state atoms will be emitted in the order they appear in the atom list,
63 * so this step is important.
64 */
65void radeonSetUpAtomList( r100ContextPtr rmesa )
66{
67   int i, mtu = rmesa->radeon.glCtx->Const.MaxTextureUnits;
68
69   make_empty_list(&rmesa->radeon.hw.atomlist);
70   rmesa->radeon.hw.atomlist.name = "atom-list";
71
72   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.ctx);
73   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.set);
74   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.lin);
75   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.msk);
76   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.vpt);
77   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.tcl);
78   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.msc);
79   for (i = 0; i < mtu; ++i) {
80       insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.tex[i]);
81       insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.txr[i]);
82       insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.cube[i]);
83   }
84   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.zbs);
85   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.mtl);
86   for (i = 0; i < 3 + mtu; ++i)
87      insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.mat[i]);
88   for (i = 0; i < 8; ++i)
89      insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.lit[i]);
90   for (i = 0; i < 6; ++i)
91      insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.ucp[i]);
92   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.stp);
93   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.eye);
94   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.grd);
95   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.fog);
96   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.glt);
97}
98
99static void radeonEmitScissor(r100ContextPtr rmesa)
100{
101    BATCH_LOCALS(&rmesa->radeon);
102    if (rmesa->radeon.state.scissor.enabled) {
103        BEGIN_BATCH(6);
104        OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 0));
105        OUT_BATCH(rmesa->hw.ctx.cmd[CTX_PP_CNTL] | RADEON_SCISSOR_ENABLE);
106        OUT_BATCH(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
107        OUT_BATCH((rmesa->radeon.state.scissor.rect.y1 << 16) |
108                  rmesa->radeon.state.scissor.rect.x1);
109        OUT_BATCH(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
110        OUT_BATCH(((rmesa->radeon.state.scissor.rect.y2) << 16) |
111                  (rmesa->radeon.state.scissor.rect.x2));
112        END_BATCH();
113    } else {
114        BEGIN_BATCH(2);
115        OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 0));
116        OUT_BATCH(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & ~RADEON_SCISSOR_ENABLE);
117        END_BATCH();
118    }
119}
120
121/* Fire a section of the retained (indexed_verts) buffer as a regular
122 * primtive.
123 */
124extern void radeonEmitVbufPrim( r100ContextPtr rmesa,
125				GLuint vertex_format,
126				GLuint primitive,
127				GLuint vertex_nr )
128{
129   BATCH_LOCALS(&rmesa->radeon);
130
131   assert(!(primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
132
133   radeonEmitState(&rmesa->radeon);
134   radeonEmitScissor(rmesa);
135
136#if RADEON_OLD_PACKETS
137   BEGIN_BATCH(8);
138   OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM, 3);
139   OUT_BATCH(rmesa->ioctl.vertex_offset);
140
141   OUT_BATCH(vertex_nr);
142   OUT_BATCH(vertex_format);
143   OUT_BATCH(primitive |  RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
144	     RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
145	     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
146	     (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
147
148   radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
149			 rmesa->ioctl.bo,
150			 RADEON_GEM_DOMAIN_GTT,
151			 0, 0);
152
153   END_BATCH();
154
155#else
156   BEGIN_BATCH(4);
157   OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_DRAW_VBUF, 1);
158   OUT_BATCH(vertex_format);
159   OUT_BATCH(primitive |
160	     RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
161	     RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
162	     RADEON_CP_VC_CNTL_MAOS_ENABLE |
163	     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
164	     (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
165   END_BATCH();
166#endif
167}
168
169void radeonFlushElts( struct gl_context *ctx )
170{
171   r100ContextPtr rmesa = R100_CONTEXT(ctx);
172   BATCH_LOCALS(&rmesa->radeon);
173   int nr;
174   uint32_t *cmd = (uint32_t *)(rmesa->radeon.cmdbuf.cs->packets + rmesa->tcl.elt_cmd_start);
175   int dwords = (rmesa->radeon.cmdbuf.cs->section_ndw - rmesa->radeon.cmdbuf.cs->section_cdw);
176
177   if (RADEON_DEBUG & RADEON_IOCTL)
178      fprintf(stderr, "%s\n", __FUNCTION__);
179
180   assert( rmesa->radeon.dma.flush == radeonFlushElts );
181   rmesa->radeon.dma.flush = NULL;
182
183   nr = rmesa->tcl.elt_used;
184
185#if RADEON_OLD_PACKETS
186   dwords -= 2;
187#endif
188
189#if RADEON_OLD_PACKETS
190   cmd[1] |= (dwords + 3) << 16;
191   cmd[5] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT;
192#else
193   cmd[1] |= (dwords + 2) << 16;
194   cmd[3] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT;
195#endif
196
197   rmesa->radeon.cmdbuf.cs->cdw += dwords;
198   rmesa->radeon.cmdbuf.cs->section_cdw += dwords;
199
200#if RADEON_OLD_PACKETS
201   radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
202			 rmesa->ioctl.bo,
203			 RADEON_GEM_DOMAIN_GTT,
204			 0, 0);
205#endif
206
207   END_BATCH();
208
209   if (RADEON_DEBUG & RADEON_SYNC) {
210      fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
211      radeonFinish( rmesa->radeon.glCtx );
212   }
213
214}
215
216GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa,
217				    GLuint vertex_format,
218				    GLuint primitive,
219				    GLuint min_nr )
220{
221   GLushort *retval;
222   int align_min_nr;
223   BATCH_LOCALS(&rmesa->radeon);
224
225   if (RADEON_DEBUG & RADEON_IOCTL)
226      fprintf(stderr, "%s %d prim %x\n", __FUNCTION__, min_nr, primitive);
227
228   assert((primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
229
230   radeonEmitState(&rmesa->radeon);
231   radeonEmitScissor(rmesa);
232
233   rmesa->tcl.elt_cmd_start = rmesa->radeon.cmdbuf.cs->cdw;
234
235   /* round up min_nr to align the state */
236   align_min_nr = (min_nr + 1) & ~1;
237
238#if RADEON_OLD_PACKETS
239   BEGIN_BATCH_NO_AUTOSTATE(2+ELTS_BUFSZ(align_min_nr)/4);
240   OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM, 0);
241   OUT_BATCH(rmesa->ioctl.vertex_offset);
242   OUT_BATCH(rmesa->ioctl.vertex_max);
243   OUT_BATCH(vertex_format);
244   OUT_BATCH(primitive |
245	     RADEON_CP_VC_CNTL_PRIM_WALK_IND |
246	     RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
247	     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
248#else
249   BEGIN_BATCH_NO_AUTOSTATE(ELTS_BUFSZ(align_min_nr)/4);
250   OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_DRAW_INDX, 0);
251   OUT_BATCH(vertex_format);
252   OUT_BATCH(primitive |
253	     RADEON_CP_VC_CNTL_PRIM_WALK_IND |
254	     RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
255	     RADEON_CP_VC_CNTL_MAOS_ENABLE |
256	     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
257#endif
258
259
260   rmesa->tcl.elt_cmd_offset = rmesa->radeon.cmdbuf.cs->cdw;
261   rmesa->tcl.elt_used = min_nr;
262
263   retval = (GLushort *)(rmesa->radeon.cmdbuf.cs->packets + rmesa->tcl.elt_cmd_offset);
264
265   if (RADEON_DEBUG & RADEON_RENDER)
266      fprintf(stderr, "%s: header prim %x \n",
267	      __FUNCTION__, primitive);
268
269   assert(!rmesa->radeon.dma.flush);
270   rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
271   rmesa->radeon.dma.flush = radeonFlushElts;
272
273   return retval;
274}
275
276void radeonEmitVertexAOS( r100ContextPtr rmesa,
277			  GLuint vertex_size,
278			  struct radeon_bo *bo,
279			  GLuint offset )
280{
281#if RADEON_OLD_PACKETS
282   rmesa->ioctl.vertex_offset = offset;
283   rmesa->ioctl.bo = bo;
284#else
285   BATCH_LOCALS(&rmesa->radeon);
286
287   if (RADEON_DEBUG & (RADEON_PRIMS|DEBUG_IOCTL))
288      fprintf(stderr, "%s:  vertex_size 0x%x offset 0x%x \n",
289	      __FUNCTION__, vertex_size, offset);
290
291   BEGIN_BATCH(7);
292   OUT_BATCH_PACKET3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, 2);
293   OUT_BATCH(1);
294   OUT_BATCH(vertex_size | (vertex_size << 8));
295   OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
296   END_BATCH();
297
298#endif
299}
300
301
302void radeonEmitAOS( r100ContextPtr rmesa,
303		    GLuint nr,
304		    GLuint offset )
305{
306#if RADEON_OLD_PACKETS
307   assert( nr == 1 );
308   rmesa->ioctl.bo = rmesa->radeon.tcl.aos[0].bo;
309   rmesa->ioctl.vertex_offset =
310     (rmesa->radeon.tcl.aos[0].offset + offset * rmesa->radeon.tcl.aos[0].stride * 4);
311   rmesa->ioctl.vertex_max = rmesa->radeon.tcl.aos[0].count;
312#else
313   BATCH_LOCALS(&rmesa->radeon);
314   uint32_t voffset;
315   //   int sz = AOS_BUFSZ(nr);
316   int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
317   int i;
318
319   if (RADEON_DEBUG & RADEON_IOCTL)
320      fprintf(stderr, "%s\n", __FUNCTION__);
321
322   BEGIN_BATCH(sz+2+(nr * 2));
323   OUT_BATCH_PACKET3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, sz - 1);
324   OUT_BATCH(nr);
325
326   {
327      for (i = 0; i + 1 < nr; i += 2) {
328	 OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
329		   (rmesa->radeon.tcl.aos[i].stride << 8) |
330		   (rmesa->radeon.tcl.aos[i + 1].components << 16) |
331		   (rmesa->radeon.tcl.aos[i + 1].stride << 24));
332
333	 voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
334	    offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
335	 OUT_BATCH(voffset);
336	 voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
337	    offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
338	 OUT_BATCH(voffset);
339      }
340
341      if (nr & 1) {
342	 OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
343		   (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
344	 voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
345	    offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
346	 OUT_BATCH(voffset);
347      }
348      for (i = 0; i + 1 < nr; i += 2) {
349	 voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
350	    offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
351	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
352			       rmesa->radeon.tcl.aos[i+0].bo,
353			       RADEON_GEM_DOMAIN_GTT,
354			       0, 0);
355	 voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
356	    offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
357	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
358			       rmesa->radeon.tcl.aos[i+1].bo,
359			       RADEON_GEM_DOMAIN_GTT,
360			       0, 0);
361      }
362      if (nr & 1) {
363	 voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
364	    offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
365	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
366			       rmesa->radeon.tcl.aos[nr-1].bo,
367			       RADEON_GEM_DOMAIN_GTT,
368			       0, 0);
369      }
370   }
371   END_BATCH();
372
373#endif
374}
375
376/* ================================================================
377 * Buffer clear
378 */
379#define RADEON_MAX_CLEARS	256
380
381static void radeonClear( struct gl_context *ctx, GLbitfield mask )
382{
383   r100ContextPtr rmesa = R100_CONTEXT(ctx);
384   GLuint hwmask, swmask;
385   GLuint hwbits = BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT |
386                   BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL |
387                   BUFFER_BIT_COLOR0;
388
389   if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) {
390      rmesa->radeon.front_buffer_dirty = GL_TRUE;
391   }
392
393   if ( RADEON_DEBUG & RADEON_IOCTL ) {
394      fprintf( stderr, "radeonClear\n");
395   }
396
397   radeon_firevertices(&rmesa->radeon);
398
399   hwmask = mask & hwbits;
400   swmask = mask & ~hwbits;
401
402   if ( swmask ) {
403      if (RADEON_DEBUG & RADEON_FALLBACKS)
404	 fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, swmask);
405      _swrast_Clear( ctx, swmask );
406   }
407
408   if ( !hwmask )
409      return;
410
411   radeonUserClear(ctx, hwmask);
412}
413
414void radeonInitIoctlFuncs( struct gl_context *ctx )
415{
416    ctx->Driver.Clear = radeonClear;
417    ctx->Driver.Finish = radeonFinish;
418    ctx->Driver.Flush = radeonFlush;
419}
420
421