1/**************************************************************************
2
3Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
4                     VA Linux Systems Inc., Fremont, California.
5
6All Rights Reserved.
7
8Permission is hereby granted, free of charge, to any person obtaining
9a copy of this software and associated documentation files (the
10"Software"), to deal in the Software without restriction, including
11without limitation the rights to use, copy, modify, merge, publish,
12distribute, sublicense, and/or sell copies of the Software, and to
13permit persons to whom the Software is furnished to do so, subject to
14the following conditions:
15
16The above copyright notice and this permission notice (including the
17next paragraph) shall be included in all copies or substantial
18portions of the Software.
19
20THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
28**************************************************************************/
29
30/*
31 * Authors:
32 *   Kevin E. Martin <martin@valinux.com>
33 *   Gareth Hughes <gareth@valinux.com>
34 *   Keith Whitwell <keithw@vmware.com>
35 */
36
37#include <sched.h>
38#include <errno.h>
39
40#include "main/attrib.h"
41#include "main/bufferobj.h"
42#include "swrast/swrast.h"
43
44#include "main/glheader.h"
45#include "main/imports.h"
46#include "util/simple_list.h"
47
48#include "radeon_context.h"
49#include "radeon_common.h"
50#include "radeon_ioctl.h"
51
52#define RADEON_TIMEOUT             512
53#define RADEON_IDLE_RETRY           16
54
55
56/* =============================================================
57 * Kernel command buffer handling
58 */
59
60/* The state atoms will be emitted in the order they appear in the atom list,
61 * so this step is important.
62 */
63void radeonSetUpAtomList( r100ContextPtr rmesa )
64{
65   int i, mtu = rmesa->radeon.glCtx.Const.MaxTextureUnits;
66
67   make_empty_list(&rmesa->radeon.hw.atomlist);
68   rmesa->radeon.hw.atomlist.name = "atom-list";
69
70   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.ctx);
71   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.set);
72   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.lin);
73   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.msk);
74   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.vpt);
75   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.tcl);
76   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.msc);
77   for (i = 0; i < mtu; ++i) {
78       insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.tex[i]);
79       insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.txr[i]);
80       insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.cube[i]);
81   }
82   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.zbs);
83   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.mtl);
84   for (i = 0; i < 3 + mtu; ++i)
85      insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.mat[i]);
86   for (i = 0; i < 8; ++i)
87      insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.lit[i]);
88   for (i = 0; i < 6; ++i)
89      insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.ucp[i]);
90   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.stp);
91   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.eye);
92   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.grd);
93   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.fog);
94   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.glt);
95}
96
97static void radeonEmitScissor(r100ContextPtr rmesa)
98{
99    BATCH_LOCALS(&rmesa->radeon);
100    if (rmesa->radeon.state.scissor.enabled) {
101        BEGIN_BATCH(6);
102        OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 0));
103        OUT_BATCH(rmesa->hw.ctx.cmd[CTX_PP_CNTL] | RADEON_SCISSOR_ENABLE);
104        OUT_BATCH(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
105        OUT_BATCH((rmesa->radeon.state.scissor.rect.y1 << 16) |
106                  rmesa->radeon.state.scissor.rect.x1);
107        OUT_BATCH(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
108        OUT_BATCH(((rmesa->radeon.state.scissor.rect.y2) << 16) |
109                  (rmesa->radeon.state.scissor.rect.x2));
110        END_BATCH();
111    } else {
112        BEGIN_BATCH(2);
113        OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 0));
114        OUT_BATCH(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & ~RADEON_SCISSOR_ENABLE);
115        END_BATCH();
116    }
117}
118
119/* Fire a section of the retained (indexed_verts) buffer as a regular
120 * primtive.
121 */
122extern void radeonEmitVbufPrim( r100ContextPtr rmesa,
123				GLuint vertex_format,
124				GLuint primitive,
125				GLuint vertex_nr )
126{
127   BATCH_LOCALS(&rmesa->radeon);
128
129   assert(!(primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
130
131   radeonEmitState(&rmesa->radeon);
132   radeonEmitScissor(rmesa);
133
134#if RADEON_OLD_PACKETS
135   BEGIN_BATCH(8);
136   OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM, 3);
137   OUT_BATCH(rmesa->ioctl.vertex_offset);
138
139   OUT_BATCH(vertex_nr);
140   OUT_BATCH(vertex_format);
141   OUT_BATCH(primitive |  RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
142	     RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
143	     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
144	     (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
145
146   radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
147			 rmesa->ioctl.bo,
148			 RADEON_GEM_DOMAIN_GTT,
149			 0, 0);
150
151   END_BATCH();
152
153#else
154   BEGIN_BATCH(4);
155   OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_DRAW_VBUF, 1);
156   OUT_BATCH(vertex_format);
157   OUT_BATCH(primitive |
158	     RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
159	     RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
160	     RADEON_CP_VC_CNTL_MAOS_ENABLE |
161	     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
162	     (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
163   END_BATCH();
164#endif
165}
166
167void radeonFlushElts( struct gl_context *ctx )
168{
169   r100ContextPtr rmesa = R100_CONTEXT(ctx);
170   BATCH_LOCALS(&rmesa->radeon);
171   int nr;
172   uint32_t *cmd = (uint32_t *)(rmesa->radeon.cmdbuf.cs->packets + rmesa->tcl.elt_cmd_start);
173   int dwords = (rmesa->radeon.cmdbuf.cs->section_ndw - rmesa->radeon.cmdbuf.cs->section_cdw);
174
175   if (RADEON_DEBUG & RADEON_IOCTL)
176      fprintf(stderr, "%s\n", __func__);
177
178   assert( rmesa->radeon.dma.flush == radeonFlushElts );
179   rmesa->radeon.dma.flush = NULL;
180
181   nr = rmesa->tcl.elt_used;
182
183#if RADEON_OLD_PACKETS
184   dwords -= 2;
185#endif
186
187#if RADEON_OLD_PACKETS
188   cmd[1] |= (dwords + 3) << 16;
189   cmd[5] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT;
190#else
191   cmd[1] |= (dwords + 2) << 16;
192   cmd[3] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT;
193#endif
194
195   rmesa->radeon.cmdbuf.cs->cdw += dwords;
196   rmesa->radeon.cmdbuf.cs->section_cdw += dwords;
197
198#if RADEON_OLD_PACKETS
199   radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
200			 rmesa->ioctl.bo,
201			 RADEON_GEM_DOMAIN_GTT,
202			 0, 0);
203#endif
204
205   END_BATCH();
206
207   if (RADEON_DEBUG & RADEON_SYNC) {
208      fprintf(stderr, "%s: Syncing\n", __func__);
209      radeonFinish( &rmesa->radeon.glCtx );
210   }
211
212}
213
214GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa,
215				    GLuint vertex_format,
216				    GLuint primitive,
217				    GLuint min_nr )
218{
219   GLushort *retval;
220   int align_min_nr;
221   BATCH_LOCALS(&rmesa->radeon);
222
223   if (RADEON_DEBUG & RADEON_IOCTL)
224      fprintf(stderr, "%s %d prim %x\n", __func__, min_nr, primitive);
225
226   assert((primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
227
228   radeonEmitState(&rmesa->radeon);
229   radeonEmitScissor(rmesa);
230
231   rmesa->tcl.elt_cmd_start = rmesa->radeon.cmdbuf.cs->cdw;
232
233   /* round up min_nr to align the state */
234   align_min_nr = (min_nr + 1) & ~1;
235
236#if RADEON_OLD_PACKETS
237   BEGIN_BATCH(2+ELTS_BUFSZ(align_min_nr)/4);
238   OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM, 0);
239   OUT_BATCH(rmesa->ioctl.vertex_offset);
240   OUT_BATCH(rmesa->ioctl.vertex_max);
241   OUT_BATCH(vertex_format);
242   OUT_BATCH(primitive |
243	     RADEON_CP_VC_CNTL_PRIM_WALK_IND |
244	     RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
245	     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
246#else
247   BEGIN_BATCH(ELTS_BUFSZ(align_min_nr)/4);
248   OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_DRAW_INDX, 0);
249   OUT_BATCH(vertex_format);
250   OUT_BATCH(primitive |
251	     RADEON_CP_VC_CNTL_PRIM_WALK_IND |
252	     RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
253	     RADEON_CP_VC_CNTL_MAOS_ENABLE |
254	     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
255#endif
256
257
258   rmesa->tcl.elt_cmd_offset = rmesa->radeon.cmdbuf.cs->cdw;
259   rmesa->tcl.elt_used = min_nr;
260
261   retval = (GLushort *)(rmesa->radeon.cmdbuf.cs->packets + rmesa->tcl.elt_cmd_offset);
262
263   if (RADEON_DEBUG & RADEON_RENDER)
264      fprintf(stderr, "%s: header prim %x \n",
265	      __func__, primitive);
266
267   assert(!rmesa->radeon.dma.flush);
268   rmesa->radeon.glCtx.Driver.NeedFlush |= FLUSH_STORED_VERTICES;
269   rmesa->radeon.dma.flush = radeonFlushElts;
270
271   return retval;
272}
273
274void radeonEmitVertexAOS( r100ContextPtr rmesa,
275			  GLuint vertex_size,
276			  struct radeon_bo *bo,
277			  GLuint offset )
278{
279#if RADEON_OLD_PACKETS
280   rmesa->ioctl.vertex_offset = offset;
281   rmesa->ioctl.bo = bo;
282#else
283   BATCH_LOCALS(&rmesa->radeon);
284
285   if (RADEON_DEBUG & (RADEON_PRIMS|RADEON_IOCTL))
286      fprintf(stderr, "%s:  vertex_size 0x%x offset 0x%x \n",
287	      __func__, vertex_size, offset);
288
289   BEGIN_BATCH(7);
290   OUT_BATCH_PACKET3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, 2);
291   OUT_BATCH(1);
292   OUT_BATCH(vertex_size | (vertex_size << 8));
293   OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
294   END_BATCH();
295
296#endif
297}
298
299
300void radeonEmitAOS( r100ContextPtr rmesa,
301		    GLuint nr,
302		    GLuint offset )
303{
304#if RADEON_OLD_PACKETS
305   assert( nr == 1 );
306   rmesa->ioctl.bo = rmesa->radeon.tcl.aos[0].bo;
307   rmesa->ioctl.vertex_offset =
308     (rmesa->radeon.tcl.aos[0].offset + offset * rmesa->radeon.tcl.aos[0].stride * 4);
309   rmesa->ioctl.vertex_max = rmesa->radeon.tcl.aos[0].count;
310#else
311   BATCH_LOCALS(&rmesa->radeon);
312   uint32_t voffset;
313   //   int sz = AOS_BUFSZ(nr);
314   int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
315   int i;
316
317   if (RADEON_DEBUG & RADEON_IOCTL)
318      fprintf(stderr, "%s\n", __func__);
319
320   BEGIN_BATCH(sz+2+(nr * 2));
321   OUT_BATCH_PACKET3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, sz - 1);
322   OUT_BATCH(nr);
323
324   {
325      for (i = 0; i + 1 < nr; i += 2) {
326	 OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
327		   (rmesa->radeon.tcl.aos[i].stride << 8) |
328		   (rmesa->radeon.tcl.aos[i + 1].components << 16) |
329		   (rmesa->radeon.tcl.aos[i + 1].stride << 24));
330
331	 voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
332	    offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
333	 OUT_BATCH(voffset);
334	 voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
335	    offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
336	 OUT_BATCH(voffset);
337      }
338
339      if (nr & 1) {
340	 OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
341		   (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
342	 voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
343	    offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
344	 OUT_BATCH(voffset);
345      }
346      for (i = 0; i + 1 < nr; i += 2) {
347	 voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
348	    offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
349	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
350			       rmesa->radeon.tcl.aos[i+0].bo,
351			       RADEON_GEM_DOMAIN_GTT,
352			       0, 0);
353	 voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
354	    offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
355	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
356			       rmesa->radeon.tcl.aos[i+1].bo,
357			       RADEON_GEM_DOMAIN_GTT,
358			       0, 0);
359      }
360      if (nr & 1) {
361	 voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
362	    offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
363	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
364			       rmesa->radeon.tcl.aos[nr-1].bo,
365			       RADEON_GEM_DOMAIN_GTT,
366			       0, 0);
367      }
368   }
369   END_BATCH();
370
371#endif
372}
373
374/* ================================================================
375 * Buffer clear
376 */
377#define RADEON_MAX_CLEARS	256
378
379static void radeonClear( struct gl_context *ctx, GLbitfield mask )
380{
381   r100ContextPtr rmesa = R100_CONTEXT(ctx);
382   GLuint hwmask, swmask;
383   GLuint hwbits = BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT |
384                   BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL |
385                   BUFFER_BIT_COLOR0;
386
387   if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) {
388      rmesa->radeon.front_buffer_dirty = GL_TRUE;
389   }
390
391   if ( RADEON_DEBUG & RADEON_IOCTL ) {
392      fprintf( stderr, "radeonClear\n");
393   }
394
395   radeon_firevertices(&rmesa->radeon);
396
397   hwmask = mask & hwbits;
398   swmask = mask & ~hwbits;
399
400   if ( swmask ) {
401      if (RADEON_DEBUG & RADEON_FALLBACKS)
402	 fprintf(stderr, "%s: swrast clear, mask: %x\n", __func__, swmask);
403      _swrast_Clear( ctx, swmask );
404   }
405
406   if ( !hwmask )
407      return;
408
409   radeonUserClear(ctx, hwmask);
410}
411
412void radeonInitIoctlFuncs( struct gl_context *ctx )
413{
414    ctx->Driver.Clear = radeonClear;
415    ctx->Driver.Finish = radeonFinish;
416    ctx->Driver.Flush = radeonFlush;
417}
418
419