r300_render.c revision cb17f5ee752d07d82e9b079c6bda9d89e51c7108
1/*
2 * Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com>
3 * Copyright 2010 Marek Olšák <maraeo@gmail.com>
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24/* r300_render: Vertex and index buffer primitive emission. Contains both
25 * HW TCL fastpath rendering, and SW TCL Draw-assisted rendering. */
26
27#include "draw/draw_context.h"
28#include "draw/draw_vbuf.h"
29
30#include "util/u_inlines.h"
31
32#include "util/u_format.h"
33#include "util/u_memory.h"
34#include "util/u_upload_mgr.h"
35#include "util/u_prim.h"
36
37#include "r300_cs.h"
38#include "r300_context.h"
39#include "r300_screen_buffer.h"
40#include "r300_emit.h"
41#include "r300_reg.h"
42#include "r300_state_derived.h"
43
44#include <limits.h>
45
46static uint32_t r300_translate_primitive(unsigned prim)
47{
48    switch (prim) {
49        case PIPE_PRIM_POINTS:
50            return R300_VAP_VF_CNTL__PRIM_POINTS;
51        case PIPE_PRIM_LINES:
52            return R300_VAP_VF_CNTL__PRIM_LINES;
53        case PIPE_PRIM_LINE_LOOP:
54            return R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
55        case PIPE_PRIM_LINE_STRIP:
56            return R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
57        case PIPE_PRIM_TRIANGLES:
58            return R300_VAP_VF_CNTL__PRIM_TRIANGLES;
59        case PIPE_PRIM_TRIANGLE_STRIP:
60            return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
61        case PIPE_PRIM_TRIANGLE_FAN:
62            return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
63        case PIPE_PRIM_QUADS:
64            return R300_VAP_VF_CNTL__PRIM_QUADS;
65        case PIPE_PRIM_QUAD_STRIP:
66            return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
67        case PIPE_PRIM_POLYGON:
68            return R300_VAP_VF_CNTL__PRIM_POLYGON;
69        default:
70            return 0;
71    }
72}
73
74static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300,
75                                            unsigned mode)
76{
77    struct r300_rs_state* rs = (struct r300_rs_state*)r300->rs_state.state;
78    uint32_t color_control = rs->color_control;
79
80    /* By default (see r300_state.c:r300_create_rs_state) color_control is
81     * initialized to provoking the first vertex.
82     *
83     * Triangle fans must be reduced to the second vertex, not the first, in
84     * Gallium flatshade-first mode, as per the GL spec.
85     * (http://www.opengl.org/registry/specs/ARB/provoking_vertex.txt)
86     *
87     * Quads never provoke correctly in flatshade-first mode. The first
88     * vertex is never considered as provoking, so only the second, third,
89     * and fourth vertices can be selected, and both "third" and "last" modes
90     * select the fourth vertex. This is probably due to D3D lacking quads.
91     *
92     * Similarly, polygons reduce to the first, not the last, vertex, when in
93     * "last" mode, and all other modes start from the second vertex.
94     *
95     * ~ C.
96     */
97
98    if (rs->rs.flatshade_first) {
99        switch (mode) {
100            case PIPE_PRIM_TRIANGLE_FAN:
101                color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_SECOND;
102                break;
103            case PIPE_PRIM_QUADS:
104            case PIPE_PRIM_QUAD_STRIP:
105            case PIPE_PRIM_POLYGON:
106                color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST;
107                break;
108            default:
109                color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_FIRST;
110                break;
111        }
112    } else {
113        color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST;
114    }
115
116    return color_control;
117}
118
119static boolean index_bias_supported(struct r300_context *r300)
120{
121    return r300->screen->caps.is_r500 &&
122           r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0);
123}
124
125static void r500_emit_index_bias(struct r300_context *r300, int index_bias)
126{
127    CS_LOCALS(r300);
128
129    BEGIN_CS(2);
130    OUT_CS_REG(R500_VAP_INDEX_OFFSET,
131               (index_bias & 0xFFFFFF) | (index_bias < 0 ? 1<<24 : 0));
132    END_CS;
133}
134
135/* This function splits the index bias value into two parts:
136 * - buffer_offset: the value that can be safely added to buffer offsets
137 *   in r300_emit_aos (it must yield a positive offset when added to
138 *   a vertex buffer offset)
139 * - index_offset: the value that must be manually subtracted from indices
140 *   in an index buffer to achieve negative offsets. */
141static void r300_split_index_bias(struct r300_context *r300, int index_bias,
142                                  int *buffer_offset, int *index_offset)
143{
144    struct pipe_vertex_buffer *vb, *vbufs = r300->vertex_buffer;
145    struct pipe_vertex_element *velem = r300->velems->velem;
146    unsigned i, size;
147    int max_neg_bias;
148
149    if (index_bias < 0) {
150        /* See how large index bias we may subtract. We must be careful
151         * here because negative buffer offsets are not allowed
152         * by the DRM API. */
153        max_neg_bias = INT_MAX;
154        for (i = 0; i < r300->velems->count; i++) {
155            vb = &vbufs[velem[i].vertex_buffer_index];
156            size = (vb->buffer_offset + velem[i].src_offset) / vb->stride;
157            max_neg_bias = MIN2(max_neg_bias, size);
158        }
159
160        /* Now set the minimum allowed value. */
161        *buffer_offset = MAX2(-max_neg_bias, index_bias);
162    } else {
163        /* A positive index bias is OK. */
164        *buffer_offset = index_bias;
165    }
166
167    *index_offset = index_bias - *buffer_offset;
168}
169
170enum r300_prepare_flags {
171    PREP_FIRST_DRAW     = (1 << 0), /* call emit_dirty_state and friends? */
172    PREP_VALIDATE_VBOS  = (1 << 1), /* validate VBOs? */
173    PREP_EMIT_AOS       = (1 << 2), /* call emit_aos? */
174    PREP_EMIT_AOS_SWTCL = (1 << 3), /* call emit_aos_swtcl? */
175    PREP_INDEXED        = (1 << 4)  /* is this draw_elements? */
176};
177
178/**
179 * Check if the requested number of dwords is available in the CS and
180 * if not, flush. Then validate buffers and emit dirty state.
181 * \param r300          The context.
182 * \param flags         See r300_prepare_flags.
183 * \param index_buffer  The index buffer to validate. The parameter may be NULL.
184 * \param cs_dwords     The number of dwords to reserve in CS.
185 * \param aos_offset    The offset passed to emit_aos.
186 * \param index_bias    The index bias to emit.
187 * \param end_cs_dwords The number of free dwords which must be available
188 *                      at the end of CS after drawing in case the CS space
189 *                      management is performed by a draw_* function manually.
190 *                      The parameter may be NULL.
191 */
192static void r300_prepare_for_rendering(struct r300_context *r300,
193                                       enum r300_prepare_flags flags,
194                                       struct pipe_resource *index_buffer,
195                                       unsigned cs_dwords,
196                                       int aos_offset,
197                                       int index_bias,
198                                       unsigned *end_cs_dwords)
199{
200    unsigned end_dwords    = 0;
201    boolean flushed        = FALSE;
202    boolean first_draw     = flags & PREP_FIRST_DRAW;
203    boolean emit_aos       = flags & PREP_EMIT_AOS;
204    boolean emit_aos_swtcl = flags & PREP_EMIT_AOS_SWTCL;
205    boolean indexed        = flags & PREP_INDEXED;
206    boolean hw_index_bias  = index_bias_supported(r300);
207
208    /* Add dirty state, index offset, and AOS. */
209    if (first_draw) {
210        cs_dwords += r300_get_num_dirty_dwords(r300);
211
212        if (hw_index_bias)
213            cs_dwords += 2; /* emit_index_offset */
214
215        if (emit_aos)
216            cs_dwords += 55; /* emit_aos */
217
218        if (emit_aos_swtcl)
219            cs_dwords += 7; /* emit_aos_swtcl */
220    }
221
222    /* Emitted in flush. */
223    end_dwords += 26; /* emit_query_end */
224
225    cs_dwords += end_dwords;
226
227    /* Reserve requested CS space. */
228    if (!r300_check_cs(r300, cs_dwords)) {
229        r300->context.flush(&r300->context, 0, NULL);
230        flushed = TRUE;
231    }
232
233    /* Validate buffers and emit dirty state if needed. */
234    if (first_draw || flushed) {
235        r300_emit_buffer_validate(r300, flags & PREP_VALIDATE_VBOS, index_buffer);
236        r300_emit_dirty_state(r300);
237        if (hw_index_bias) {
238            if (r300->screen->caps.has_tcl)
239                r500_emit_index_bias(r300, index_bias);
240            else
241                r500_emit_index_bias(r300, 0);
242        }
243
244        if (emit_aos)
245            r300_emit_aos(r300, aos_offset, indexed);
246
247        if (emit_aos_swtcl)
248            r300_emit_aos_swtcl(r300, indexed);
249    }
250
251    if (end_cs_dwords)
252        *end_cs_dwords = end_dwords;
253}
254
255static boolean immd_is_good_idea(struct r300_context *r300,
256                                 unsigned count)
257{
258    struct pipe_vertex_element* velem;
259    struct pipe_vertex_buffer* vbuf;
260    boolean checked[PIPE_MAX_ATTRIBS] = {0};
261    unsigned vertex_element_count = r300->velems->count;
262    unsigned i, vbi;
263
264    if (DBG_ON(r300, DBG_NO_IMMD)) {
265        return FALSE;
266    }
267
268    if (r300->draw) {
269        return FALSE;
270    }
271
272    if (count > 10) {
273        return FALSE;
274    }
275
276    /* We shouldn't map buffers referenced by CS, busy buffers,
277     * and ones placed in VRAM. */
278    /* XXX Check for VRAM buffers. */
279    for (i = 0; i < vertex_element_count; i++) {
280        velem = &r300->velems->velem[i];
281        vbi = velem->vertex_buffer_index;
282
283        if (!checked[vbi]) {
284            vbuf = &r300->vertex_buffer[vbi];
285
286            if (r300_buffer_is_referenced(&r300->context,
287                                          vbuf->buffer,
288                                          R300_REF_CS | R300_REF_HW)) {
289                /* It's a very bad idea to map it... */
290                return FALSE;
291            }
292            checked[vbi] = TRUE;
293        }
294    }
295    return TRUE;
296}
297
298/*****************************************************************************
299 * The emission of draw packets for r500. Older GPUs may use these functions *
300 * after resolving fallback issues (e.g. stencil ref two-sided).             *
301 ****************************************************************************/
302
303static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
304                                            unsigned mode,
305                                            unsigned start,
306                                            unsigned count)
307{
308    struct pipe_vertex_element* velem;
309    struct pipe_vertex_buffer* vbuf;
310    unsigned vertex_element_count = r300->velems->count;
311    unsigned i, v, vbi, dw, elem_offset, dwords;
312
313    /* Size of the vertex, in dwords. */
314    unsigned vertex_size = 0;
315
316    /* Offsets of the attribute, in dwords, from the start of the vertex. */
317    unsigned offset[PIPE_MAX_ATTRIBS];
318
319    /* Size of the vertex element, in dwords. */
320    unsigned size[PIPE_MAX_ATTRIBS];
321
322    /* Stride to the same attrib in the next vertex in the vertex buffer,
323     * in dwords. */
324    unsigned stride[PIPE_MAX_ATTRIBS] = {0};
325
326    /* Mapped vertex buffers. */
327    uint32_t* map[PIPE_MAX_ATTRIBS] = {0};
328    struct pipe_transfer* transfer[PIPE_MAX_ATTRIBS] = {NULL};
329
330    CS_LOCALS(r300);
331
332    /* Calculate the vertex size, offsets, strides etc. and map the buffers. */
333    for (i = 0; i < vertex_element_count; i++) {
334        velem = &r300->velems->velem[i];
335        offset[i] = velem->src_offset / 4;
336        size[i] = r300->velems->hw_format_size[i] / 4;
337        vertex_size += size[i];
338        vbi = velem->vertex_buffer_index;
339
340        /* Map the buffer. */
341        if (!map[vbi]) {
342            vbuf = &r300->vertex_buffer[vbi];
343            map[vbi] = (uint32_t*)pipe_buffer_map(&r300->context,
344                                                  vbuf->buffer,
345                                                  PIPE_TRANSFER_READ,
346						  &transfer[vbi]);
347            map[vbi] += vbuf->buffer_offset / 4;
348            stride[vbi] = vbuf->stride / 4;
349        }
350    }
351
352    dwords = 9 + count * vertex_size;
353
354    r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0, NULL);
355
356    BEGIN_CS(dwords);
357    OUT_CS_REG(R300_GA_COLOR_CONTROL,
358            r300_provoking_vertex_fixes(r300, mode));
359    OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size);
360    OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
361    OUT_CS(count - 1);
362    OUT_CS(0);
363    OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size);
364    OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) |
365            r300_translate_primitive(mode));
366
367    /* Emit vertices. */
368    for (v = 0; v < count; v++) {
369        for (i = 0; i < vertex_element_count; i++) {
370            velem = &r300->velems->velem[i];
371            vbi = velem->vertex_buffer_index;
372            elem_offset = offset[i] + stride[vbi] * (v + start);
373
374            for (dw = 0; dw < size[i]; dw++) {
375                OUT_CS(map[vbi][elem_offset + dw]);
376            }
377        }
378    }
379    END_CS;
380
381    /* Unmap buffers. */
382    for (i = 0; i < vertex_element_count; i++) {
383        vbi = r300->velems->velem[i].vertex_buffer_index;
384
385        if (map[vbi]) {
386            vbuf = &r300->vertex_buffer[vbi];
387            pipe_buffer_unmap(&r300->context, vbuf->buffer, transfer[vbi]);
388            map[vbi] = NULL;
389        }
390    }
391}
392
393static void r300_emit_draw_arrays(struct r300_context *r300,
394                                  unsigned mode,
395                                  unsigned count)
396{
397    boolean alt_num_verts = count > 65535;
398    CS_LOCALS(r300);
399
400    if (count >= (1 << 24)) {
401        fprintf(stderr, "r300: Got a huge number of vertices: %i, "
402                "refusing to render.\n", count);
403        return;
404    }
405
406    BEGIN_CS(7 + (alt_num_verts ? 2 : 0));
407    if (alt_num_verts) {
408        OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count);
409    }
410    OUT_CS_REG(R300_GA_COLOR_CONTROL,
411            r300_provoking_vertex_fixes(r300, mode));
412    OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
413    OUT_CS(count - 1);
414    OUT_CS(0);
415    OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
416    OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) |
417           r300_translate_primitive(mode) |
418           (alt_num_verts ? R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS : 0));
419    END_CS;
420}
421
422static void r300_emit_draw_elements(struct r300_context *r300,
423                                    struct pipe_resource* indexBuffer,
424                                    unsigned indexSize,
425                                    unsigned minIndex,
426                                    unsigned maxIndex,
427                                    unsigned mode,
428                                    unsigned start,
429                                    unsigned count)
430{
431    uint32_t count_dwords;
432    uint32_t offset_dwords = indexSize * start / sizeof(uint32_t);
433    boolean alt_num_verts = count > 65535;
434    CS_LOCALS(r300);
435
436    if (count >= (1 << 24)) {
437        fprintf(stderr, "r300: Got a huge number of vertices: %i, "
438                "refusing to render.\n", count);
439        return;
440    }
441
442    maxIndex = MIN2(maxIndex, r300->vertex_buffer_max_index);
443
444    DBG(r300, DBG_DRAW, "r300: Indexbuf of %u indices, min %u max %u\n",
445        count, minIndex, maxIndex);
446
447    BEGIN_CS(13 + (alt_num_verts ? 2 : 0));
448    if (alt_num_verts) {
449        OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count);
450    }
451    OUT_CS_REG(R300_GA_COLOR_CONTROL,
452            r300_provoking_vertex_fixes(r300, mode));
453    OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
454    OUT_CS(maxIndex);
455    OUT_CS(minIndex);
456    OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0);
457    if (indexSize == 4) {
458        count_dwords = count;
459        OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
460               R300_VAP_VF_CNTL__INDEX_SIZE_32bit |
461               r300_translate_primitive(mode) |
462               (alt_num_verts ? R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS : 0));
463    } else {
464        count_dwords = (count + 1) / 2;
465        OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
466               r300_translate_primitive(mode) |
467               (alt_num_verts ? R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS : 0));
468    }
469
470    /* INDX_BUFFER is a truly special packet3.
471     * Unlike most other packet3, where the offset is after the count,
472     * the order is reversed, so the relocation ends up carrying the
473     * size of the indexbuf instead of the offset.
474     */
475    OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2);
476    OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2) |
477           (0 << R300_INDX_BUFFER_SKIP_SHIFT));
478    OUT_CS(offset_dwords << 2);
479    OUT_CS_BUF_RELOC(indexBuffer, count_dwords,
480		     r300_buffer(indexBuffer)->domain, 0, 0);
481
482    END_CS;
483}
484
485/* This is the fast-path drawing & emission for HW TCL. */
486static void r300_draw_range_elements(struct pipe_context* pipe,
487                                     struct pipe_resource* indexBuffer,
488                                     unsigned indexSize,
489                                     int indexBias,
490                                     unsigned minIndex,
491                                     unsigned maxIndex,
492                                     unsigned mode,
493                                     unsigned start,
494                                     unsigned count)
495{
496    struct r300_context* r300 = r300_context(pipe);
497    struct pipe_resource* orgIndexBuffer = indexBuffer;
498    boolean alt_num_verts = r300->screen->caps.is_r500 &&
499                            count > 65536 &&
500                            r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0);
501    unsigned short_count;
502    int buffer_offset = 0, index_offset = 0; /* for index bias emulation */
503    boolean translate = FALSE;
504
505    if (r300->skip_rendering) {
506        return;
507    }
508
509    if (!u_trim_pipe_prim(mode, &count)) {
510        return;
511    }
512
513    /* Set up fallback for incompatible vertex layout if needed. */
514    if (r300->incompatible_vb_layout || r300->velems->incompatible_layout) {
515        r300_begin_vertex_translate(r300);
516        translate = TRUE;
517    }
518
519    if (indexBias && !index_bias_supported(r300)) {
520        r300_split_index_bias(r300, indexBias, &buffer_offset, &index_offset);
521    }
522
523    r300_translate_index_buffer(r300, &indexBuffer, &indexSize, index_offset,
524                                &start, count);
525
526    r300_update_derived_state(r300);
527    r300_upload_index_buffer(r300, &indexBuffer, indexSize, start, count);
528
529    /* 15 dwords for emit_draw_elements */
530    r300_prepare_for_rendering(r300,
531        PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED,
532        indexBuffer, 15, buffer_offset, indexBias, NULL);
533
534    u_upload_flush(r300->upload_vb);
535    u_upload_flush(r300->upload_ib);
536    if (alt_num_verts || count <= 65535) {
537        r300_emit_draw_elements(r300, indexBuffer, indexSize,
538                                 minIndex, maxIndex, mode, start, count);
539    } else {
540        do {
541            short_count = MIN2(count, 65534);
542            r300_emit_draw_elements(r300, indexBuffer, indexSize,
543                                     minIndex, maxIndex,
544                                     mode, start, short_count);
545
546            start += short_count;
547            count -= short_count;
548
549            /* 15 dwords for emit_draw_elements */
550            if (count) {
551                r300_prepare_for_rendering(r300,
552                    PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED,
553                    indexBuffer, 15, buffer_offset, indexBias, NULL);
554            }
555        } while (count);
556    }
557
558    if (indexBuffer != orgIndexBuffer) {
559        pipe_resource_reference( &indexBuffer, NULL );
560    }
561
562    if (translate) {
563        r300_end_vertex_translate(r300);
564    }
565}
566
567/* Simple helpers for context setup. Should probably be moved to util. */
568static void r300_draw_elements(struct pipe_context* pipe,
569                               struct pipe_resource* indexBuffer,
570                               unsigned indexSize, int indexBias, unsigned mode,
571                               unsigned start, unsigned count)
572{
573    struct r300_context *r300 = r300_context(pipe);
574
575    pipe->draw_range_elements(pipe, indexBuffer, indexSize, indexBias,
576                              0, r300->vertex_buffer_max_index,
577                              mode, start, count);
578}
579
580static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
581                             unsigned start, unsigned count)
582{
583    struct r300_context* r300 = r300_context(pipe);
584    boolean alt_num_verts = r300->screen->caps.is_r500 &&
585                            count > 65536 &&
586                            r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0);
587    unsigned short_count;
588    boolean translate = FALSE;
589
590    if (r300->skip_rendering) {
591        return;
592    }
593
594    if (!u_trim_pipe_prim(mode, &count)) {
595        return;
596    }
597
598    /* Set up fallback for incompatible vertex layout if needed. */
599    if (r300->incompatible_vb_layout || r300->velems->incompatible_layout) {
600        r300_begin_vertex_translate(r300);
601        translate = TRUE;
602    }
603
604    r300_update_derived_state(r300);
605
606    if (immd_is_good_idea(r300, count)) {
607        r300_emit_draw_arrays_immediate(r300, mode, start, count);
608    } else {
609        /* 9 spare dwords for emit_draw_arrays. */
610        r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS,
611                               NULL, 9, start, 0, NULL);
612
613        if (alt_num_verts || count <= 65535) {
614            r300_emit_draw_arrays(r300, mode, count);
615        } else {
616            do {
617                short_count = MIN2(count, 65535);
618                r300_emit_draw_arrays(r300, mode, short_count);
619
620                start += short_count;
621                count -= short_count;
622
623                /* 9 spare dwords for emit_draw_arrays. */
624                if (count) {
625                    r300_prepare_for_rendering(r300,
626                        PREP_VALIDATE_VBOS | PREP_EMIT_AOS, NULL, 9,
627                        start, 0, NULL);
628                }
629            } while (count);
630        }
631	u_upload_flush(r300->upload_vb);
632    }
633
634    if (translate) {
635        r300_end_vertex_translate(r300);
636    }
637}
638
639/****************************************************************************
640 * The rest of this file is for SW TCL rendering only. Please be polite and *
641 * keep these functions separated so that they are easier to locate. ~C.    *
642 ***************************************************************************/
643
644/* SW TCL arrays, using Draw. */
645static void r300_swtcl_draw_arrays(struct pipe_context* pipe,
646                                   unsigned mode,
647                                   unsigned start,
648                                   unsigned count)
649{
650    struct r300_context* r300 = r300_context(pipe);
651    struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS];
652    int i;
653
654    if (r300->skip_rendering) {
655        return;
656    }
657
658    if (!u_trim_pipe_prim(mode, &count)) {
659        return;
660    }
661
662    r300_update_derived_state(r300);
663
664    for (i = 0; i < r300->vertex_buffer_count; i++) {
665        void* buf = pipe_buffer_map(pipe,
666                                    r300->vertex_buffer[i].buffer,
667                                    PIPE_TRANSFER_READ,
668				    &vb_transfer[i]);
669        draw_set_mapped_vertex_buffer(r300->draw, i, buf);
670    }
671
672    draw_set_mapped_element_buffer(r300->draw, 0, 0, NULL);
673
674    draw_arrays(r300->draw, mode, start, count);
675
676    /* XXX Not sure whether this is the best fix.
677     * It prevents CS from being rejected and weird assertion failures. */
678    draw_flush(r300->draw);
679
680    for (i = 0; i < r300->vertex_buffer_count; i++) {
681        pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer,
682			  vb_transfer[i]);
683        draw_set_mapped_vertex_buffer(r300->draw, i, NULL);
684    }
685}
686
687/* SW TCL elements, using Draw. */
688static void r300_swtcl_draw_range_elements(struct pipe_context* pipe,
689                                           struct pipe_resource* indexBuffer,
690                                           unsigned indexSize,
691                                           int indexBias,
692                                           unsigned minIndex,
693                                           unsigned maxIndex,
694                                           unsigned mode,
695                                           unsigned start,
696                                           unsigned count)
697{
698    struct r300_context* r300 = r300_context(pipe);
699    struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS];
700    struct pipe_transfer *ib_transfer;
701    int i;
702    void* indices;
703
704    if (r300->skip_rendering) {
705        return;
706    }
707
708    if (!u_trim_pipe_prim(mode, &count)) {
709        return;
710    }
711
712    r300_update_derived_state(r300);
713
714    for (i = 0; i < r300->vertex_buffer_count; i++) {
715        void* buf = pipe_buffer_map(pipe,
716                                    r300->vertex_buffer[i].buffer,
717                                    PIPE_TRANSFER_READ,
718				    &vb_transfer[i]);
719        draw_set_mapped_vertex_buffer(r300->draw, i, buf);
720    }
721
722    indices = pipe_buffer_map(pipe, indexBuffer,
723                              PIPE_TRANSFER_READ, &ib_transfer);
724    draw_set_mapped_element_buffer_range(r300->draw, indexSize, indexBias,
725                                         minIndex, maxIndex, indices);
726
727    draw_arrays(r300->draw, mode, start, count);
728
729    /* XXX Not sure whether this is the best fix.
730     * It prevents CS from being rejected and weird assertion failures. */
731    draw_flush(r300->draw);
732
733    for (i = 0; i < r300->vertex_buffer_count; i++) {
734        pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer,
735			  vb_transfer[i]);
736        draw_set_mapped_vertex_buffer(r300->draw, i, NULL);
737    }
738
739    pipe_buffer_unmap(pipe, indexBuffer,
740		      ib_transfer);
741    draw_set_mapped_element_buffer_range(r300->draw, 0, 0,
742                                         start, start + count - 1,
743                                         NULL);
744}
745
746/* Object for rendering using Draw. */
747struct r300_render {
748    /* Parent class */
749    struct vbuf_render base;
750
751    /* Pipe context */
752    struct r300_context* r300;
753
754    /* Vertex information */
755    size_t vertex_size;
756    unsigned prim;
757    unsigned hwprim;
758
759    /* VBO */
760    struct pipe_resource* vbo;
761    size_t vbo_size;
762    size_t vbo_offset;
763    size_t vbo_max_used;
764    void * vbo_ptr;
765
766    struct pipe_transfer *vbo_transfer;
767};
768
769static INLINE struct r300_render*
770r300_render(struct vbuf_render* render)
771{
772    return (struct r300_render*)render;
773}
774
775static const struct vertex_info*
776r300_render_get_vertex_info(struct vbuf_render* render)
777{
778    struct r300_render* r300render = r300_render(render);
779    struct r300_context* r300 = r300render->r300;
780
781    return &r300->vertex_info;
782}
783
784static boolean r300_render_allocate_vertices(struct vbuf_render* render,
785                                                   ushort vertex_size,
786                                                   ushort count)
787{
788    struct r300_render* r300render = r300_render(render);
789    struct r300_context* r300 = r300render->r300;
790    struct pipe_screen* screen = r300->context.screen;
791    size_t size = (size_t)vertex_size * (size_t)count;
792
793    if (size + r300render->vbo_offset > r300render->vbo_size)
794    {
795        pipe_resource_reference(&r300->vbo, NULL);
796        r300render->vbo = pipe_buffer_create(screen,
797                                             PIPE_BIND_VERTEX_BUFFER,
798                                             R300_MAX_DRAW_VBO_SIZE);
799        r300render->vbo_offset = 0;
800        r300render->vbo_size = R300_MAX_DRAW_VBO_SIZE;
801    }
802
803    r300render->vertex_size = vertex_size;
804    r300->vbo = r300render->vbo;
805    r300->vbo_offset = r300render->vbo_offset;
806
807    return (r300render->vbo) ? TRUE : FALSE;
808}
809
810static void* r300_render_map_vertices(struct vbuf_render* render)
811{
812    struct r300_render* r300render = r300_render(render);
813
814    assert(!r300render->vbo_transfer);
815
816    r300render->vbo_ptr = pipe_buffer_map(&r300render->r300->context,
817					  r300render->vbo,
818                                          PIPE_TRANSFER_WRITE,
819					  &r300render->vbo_transfer);
820
821    return ((uint8_t*)r300render->vbo_ptr + r300render->vbo_offset);
822}
823
824static void r300_render_unmap_vertices(struct vbuf_render* render,
825                                             ushort min,
826                                             ushort max)
827{
828    struct r300_render* r300render = r300_render(render);
829    struct pipe_context* context = &r300render->r300->context;
830
831    assert(r300render->vbo_transfer);
832
833    r300render->vbo_max_used = MAX2(r300render->vbo_max_used,
834                                    r300render->vertex_size * (max + 1));
835    pipe_buffer_unmap(context, r300render->vbo, r300render->vbo_transfer);
836
837    r300render->vbo_transfer = NULL;
838}
839
840static void r300_render_release_vertices(struct vbuf_render* render)
841{
842    struct r300_render* r300render = r300_render(render);
843
844    r300render->vbo_offset += r300render->vbo_max_used;
845    r300render->vbo_max_used = 0;
846}
847
848static boolean r300_render_set_primitive(struct vbuf_render* render,
849                                               unsigned prim)
850{
851    struct r300_render* r300render = r300_render(render);
852
853    r300render->prim = prim;
854    r300render->hwprim = r300_translate_primitive(prim);
855
856    return TRUE;
857}
858
859static void r300_render_draw_arrays(struct vbuf_render* render,
860                                    unsigned start,
861                                    unsigned count)
862{
863    struct r300_render* r300render = r300_render(render);
864    struct r300_context* r300 = r300render->r300;
865    uint8_t* ptr;
866    unsigned i;
867    unsigned dwords = 6;
868
869    CS_LOCALS(r300);
870
871    (void) i; (void) ptr;
872
873    r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL,
874                               NULL, dwords, 0, 0, NULL);
875
876    DBG(r300, DBG_DRAW, "r300: Doing vbuf render, count %d\n", count);
877
878    /* Uncomment to dump all VBOs rendered through this interface.
879     * Slow and noisy!
880    ptr = pipe_buffer_map(&r300render->r300->context,
881                          r300render->vbo, PIPE_TRANSFER_READ,
882                          &r300render->vbo_transfer);
883
884    for (i = 0; i < count; i++) {
885        printf("r300: Vertex %d\n", i);
886        draw_dump_emitted_vertex(&r300->vertex_info, ptr);
887        ptr += r300->vertex_info.size * 4;
888        printf("\n");
889    }
890
891    pipe_buffer_unmap(&r300render->r300->context, r300render->vbo,
892        r300render->vbo_transfer);
893    */
894
895    BEGIN_CS(dwords);
896    OUT_CS_REG(R300_GA_COLOR_CONTROL,
897            r300_provoking_vertex_fixes(r300, r300render->prim));
898    OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1);
899    OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
900    OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) |
901           r300render->hwprim);
902    END_CS;
903}
904
905static void r300_render_draw_elements(struct vbuf_render* render,
906                                      const ushort* indices,
907                                      uint count)
908{
909    struct r300_render* r300render = r300_render(render);
910    struct r300_context* r300 = r300render->r300;
911    int i;
912    unsigned end_cs_dwords;
913    unsigned max_index = (r300render->vbo_size - r300render->vbo_offset) /
914                         (r300render->r300->vertex_info.size * 4) - 1;
915    unsigned short_count;
916    struct r300_cs_info cs_info;
917
918    CS_LOCALS(r300);
919
920    /* Reserve at least 256 dwords.
921     *
922     * Below we manage the CS space manually because there may be more
923     * indices than it can fit in CS. */
924    r300_prepare_for_rendering(r300,
925        PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL | PREP_INDEXED,
926        NULL, 256, 0, 0, &end_cs_dwords);
927
928    while (count) {
929        r300->rws->get_cs_info(r300->rws, &cs_info);
930
931        short_count = MIN2(count, (cs_info.free - end_cs_dwords - 6) * 2);
932
933        BEGIN_CS(6 + (short_count+1)/2);
934        OUT_CS_REG(R300_GA_COLOR_CONTROL,
935                r300_provoking_vertex_fixes(r300, r300render->prim));
936        OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, max_index);
937        OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (short_count+1)/2);
938        OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (short_count << 16) |
939               r300render->hwprim);
940        for (i = 0; i < short_count-1; i += 2) {
941            OUT_CS(indices[i+1] << 16 | indices[i]);
942        }
943        if (short_count % 2) {
944            OUT_CS(indices[short_count-1]);
945        }
946        END_CS;
947
948        /* OK now subtract the emitted indices and see if we need to emit
949         * another draw packet. */
950        indices += short_count;
951        count -= short_count;
952
953        if (count) {
954            r300_prepare_for_rendering(r300,
955                PREP_EMIT_AOS_SWTCL | PREP_INDEXED,
956                NULL, 256, 0, 0, &end_cs_dwords);
957        }
958    }
959}
960
961static void r300_render_destroy(struct vbuf_render* render)
962{
963    FREE(render);
964}
965
966static struct vbuf_render* r300_render_create(struct r300_context* r300)
967{
968    struct r300_render* r300render = CALLOC_STRUCT(r300_render);
969
970    r300render->r300 = r300;
971
972    /* XXX find real numbers plz */
973    r300render->base.max_vertex_buffer_bytes = 128 * 1024;
974    r300render->base.max_indices = 16 * 1024;
975
976    r300render->base.get_vertex_info = r300_render_get_vertex_info;
977    r300render->base.allocate_vertices = r300_render_allocate_vertices;
978    r300render->base.map_vertices = r300_render_map_vertices;
979    r300render->base.unmap_vertices = r300_render_unmap_vertices;
980    r300render->base.set_primitive = r300_render_set_primitive;
981    r300render->base.draw_elements = r300_render_draw_elements;
982    r300render->base.draw_arrays = r300_render_draw_arrays;
983    r300render->base.release_vertices = r300_render_release_vertices;
984    r300render->base.destroy = r300_render_destroy;
985
986    r300render->vbo = NULL;
987    r300render->vbo_size = 0;
988    r300render->vbo_offset = 0;
989
990    return &r300render->base;
991}
992
993struct draw_stage* r300_draw_stage(struct r300_context* r300)
994{
995    struct vbuf_render* render;
996    struct draw_stage* stage;
997
998    render = r300_render_create(r300);
999
1000    if (!render) {
1001        return NULL;
1002    }
1003
1004    stage = draw_vbuf_stage(r300->draw, render);
1005
1006    if (!stage) {
1007        render->destroy(render);
1008        return NULL;
1009    }
1010
1011    draw_set_render(r300->draw, render);
1012
1013    return stage;
1014}
1015
1016void r300_init_render_functions(struct r300_context *r300)
1017{
1018    /* Set generic functions. */
1019    r300->context.draw_elements = r300_draw_elements;
1020
1021    /* Set draw functions based on presence of HW TCL. */
1022    if (r300->screen->caps.has_tcl) {
1023        r300->context.draw_arrays = r300_draw_arrays;
1024        r300->context.draw_range_elements = r300_draw_range_elements;
1025    } else {
1026        r300->context.draw_arrays = r300_swtcl_draw_arrays;
1027        r300->context.draw_range_elements = r300_swtcl_draw_range_elements;
1028    }
1029
1030    /* Plug in the two-sided stencil reference value fallback if needed. */
1031    if (!r300->screen->caps.is_r500)
1032        r300_plug_in_stencil_ref_fallback(r300);
1033}
1034