r300_state.c revision 53228fe2a8dafe102a3d764de97ee963ecfb4508
1/*
2 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
3 * Copyright 2009 Marek Olšák <maraeo@gmail.com>
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24#include "draw/draw_context.h"
25
26#include "util/u_framebuffer.h"
27#include "util/u_half.h"
28#include "util/u_helpers.h"
29#include "util/u_math.h"
30#include "util/u_mm.h"
31#include "util/u_memory.h"
32#include "util/u_pack_color.h"
33#include "util/u_transfer.h"
34
35#include "tgsi/tgsi_parse.h"
36
37#include "pipe/p_config.h"
38
39#include "r300_cb.h"
40#include "r300_context.h"
41#include "r300_emit.h"
42#include "r300_reg.h"
43#include "r300_screen.h"
44#include "r300_screen_buffer.h"
45#include "r300_state_inlines.h"
46#include "r300_fs.h"
47#include "r300_texture.h"
48#include "r300_vs.h"
49
50/* r300_state: Functions used to intialize state context by translating
51 * Gallium state objects into semi-native r300 state objects. */
52
53#define UPDATE_STATE(cso, atom) \
54    if (cso != atom.state) { \
55        atom.state = cso;    \
56        r300_mark_atom_dirty(r300, &(atom));   \
57    }
58
59static boolean blend_discard_if_src_alpha_0(unsigned srcRGB, unsigned srcA,
60                                            unsigned dstRGB, unsigned dstA)
61{
62    /* If the blend equation is ADD or REVERSE_SUBTRACT,
63     * SRC_ALPHA == 0, and the following state is set, the colorbuffer
64     * will not be changed.
65     * Notice that the dst factors are the src factors inverted. */
66    return (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
67            srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
68            srcRGB == PIPE_BLENDFACTOR_ZERO) &&
69           (srcA == PIPE_BLENDFACTOR_SRC_COLOR ||
70            srcA == PIPE_BLENDFACTOR_SRC_ALPHA ||
71            srcA == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
72            srcA == PIPE_BLENDFACTOR_ZERO) &&
73           (dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
74            dstRGB == PIPE_BLENDFACTOR_ONE) &&
75           (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
76            dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
77            dstA == PIPE_BLENDFACTOR_ONE);
78}
79
80static boolean blend_discard_if_src_alpha_1(unsigned srcRGB, unsigned srcA,
81                                            unsigned dstRGB, unsigned dstA)
82{
83    /* If the blend equation is ADD or REVERSE_SUBTRACT,
84     * SRC_ALPHA == 1, and the following state is set, the colorbuffer
85     * will not be changed.
86     * Notice that the dst factors are the src factors inverted. */
87    return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
88            srcRGB == PIPE_BLENDFACTOR_ZERO) &&
89           (srcA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
90            srcA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
91            srcA == PIPE_BLENDFACTOR_ZERO) &&
92           (dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
93            dstRGB == PIPE_BLENDFACTOR_ONE) &&
94           (dstA == PIPE_BLENDFACTOR_SRC_COLOR ||
95            dstA == PIPE_BLENDFACTOR_SRC_ALPHA ||
96            dstA == PIPE_BLENDFACTOR_ONE);
97}
98
99static boolean blend_discard_if_src_color_0(unsigned srcRGB, unsigned srcA,
100                                            unsigned dstRGB, unsigned dstA)
101{
102    /* If the blend equation is ADD or REVERSE_SUBTRACT,
103     * SRC_COLOR == (0,0,0), and the following state is set, the colorbuffer
104     * will not be changed.
105     * Notice that the dst factors are the src factors inverted. */
106    return (srcRGB == PIPE_BLENDFACTOR_SRC_COLOR ||
107            srcRGB == PIPE_BLENDFACTOR_ZERO) &&
108           (srcA == PIPE_BLENDFACTOR_ZERO) &&
109           (dstRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
110            dstRGB == PIPE_BLENDFACTOR_ONE) &&
111           (dstA == PIPE_BLENDFACTOR_ONE);
112}
113
114static boolean blend_discard_if_src_color_1(unsigned srcRGB, unsigned srcA,
115                                            unsigned dstRGB, unsigned dstA)
116{
117    /* If the blend equation is ADD or REVERSE_SUBTRACT,
118     * SRC_COLOR == (1,1,1), and the following state is set, the colorbuffer
119     * will not be changed.
120     * Notice that the dst factors are the src factors inverted. */
121    return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
122            srcRGB == PIPE_BLENDFACTOR_ZERO) &&
123           (srcA == PIPE_BLENDFACTOR_ZERO) &&
124           (dstRGB == PIPE_BLENDFACTOR_SRC_COLOR ||
125            dstRGB == PIPE_BLENDFACTOR_ONE) &&
126           (dstA == PIPE_BLENDFACTOR_ONE);
127}
128
129static boolean blend_discard_if_src_alpha_color_0(unsigned srcRGB, unsigned srcA,
130                                                  unsigned dstRGB, unsigned dstA)
131{
132    /* If the blend equation is ADD or REVERSE_SUBTRACT,
133     * SRC_ALPHA_COLOR == (0,0,0,0), and the following state is set,
134     * the colorbuffer will not be changed.
135     * Notice that the dst factors are the src factors inverted. */
136    return (srcRGB == PIPE_BLENDFACTOR_SRC_COLOR ||
137            srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
138            srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
139            srcRGB == PIPE_BLENDFACTOR_ZERO) &&
140           (srcA == PIPE_BLENDFACTOR_SRC_COLOR ||
141            srcA == PIPE_BLENDFACTOR_SRC_ALPHA ||
142            srcA == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
143            srcA == PIPE_BLENDFACTOR_ZERO) &&
144           (dstRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
145            dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
146            dstRGB == PIPE_BLENDFACTOR_ONE) &&
147           (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
148            dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
149            dstA == PIPE_BLENDFACTOR_ONE);
150}
151
152static boolean blend_discard_if_src_alpha_color_1(unsigned srcRGB, unsigned srcA,
153                                                  unsigned dstRGB, unsigned dstA)
154{
155    /* If the blend equation is ADD or REVERSE_SUBTRACT,
156     * SRC_ALPHA_COLOR == (1,1,1,1), and the following state is set,
157     * the colorbuffer will not be changed.
158     * Notice that the dst factors are the src factors inverted. */
159    return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
160            srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
161            srcRGB == PIPE_BLENDFACTOR_ZERO) &&
162           (srcA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
163            srcA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
164            srcA == PIPE_BLENDFACTOR_ZERO) &&
165           (dstRGB == PIPE_BLENDFACTOR_SRC_COLOR ||
166            dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
167            dstRGB == PIPE_BLENDFACTOR_ONE) &&
168           (dstA == PIPE_BLENDFACTOR_SRC_COLOR ||
169            dstA == PIPE_BLENDFACTOR_SRC_ALPHA ||
170            dstA == PIPE_BLENDFACTOR_ONE);
171}
172
173static unsigned blend_discard_conditionally(unsigned eqRGB, unsigned eqA,
174                                            unsigned dstRGB, unsigned dstA,
175                                            unsigned srcRGB, unsigned srcA)
176{
177    unsigned blend_control = 0;
178
179    /* Optimization: discard pixels which don't change the colorbuffer.
180     *
181     * The code below is non-trivial and some math is involved.
182     *
183     * Discarding pixels must be disabled when FP16 AA is enabled.
184     * This is a hardware bug. Also, this implementation wouldn't work
185     * with FP blending enabled and equation clamping disabled.
186     *
187     * Equations other than ADD are rarely used and therefore won't be
188     * optimized. */
189    if ((eqRGB == PIPE_BLEND_ADD || eqRGB == PIPE_BLEND_REVERSE_SUBTRACT) &&
190        (eqA == PIPE_BLEND_ADD || eqA == PIPE_BLEND_REVERSE_SUBTRACT)) {
191        /* ADD: X+Y
192         * REVERSE_SUBTRACT: Y-X
193         *
194         * The idea is:
195         * If X = src*srcFactor = 0 and Y = dst*dstFactor = 1,
196         * then CB will not be changed.
197         *
198         * Given the srcFactor and dstFactor variables, we can derive
199         * what src and dst should be equal to and discard appropriate
200         * pixels.
201         */
202        if (blend_discard_if_src_alpha_0(srcRGB, srcA, dstRGB, dstA)) {
203            blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0;
204        } else if (blend_discard_if_src_alpha_1(srcRGB, srcA,
205                                                dstRGB, dstA)) {
206            blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1;
207        } else if (blend_discard_if_src_color_0(srcRGB, srcA,
208                                                dstRGB, dstA)) {
209            blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_0;
210        } else if (blend_discard_if_src_color_1(srcRGB, srcA,
211                                                dstRGB, dstA)) {
212            blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_1;
213        } else if (blend_discard_if_src_alpha_color_0(srcRGB, srcA,
214                                                      dstRGB, dstA)) {
215            blend_control |=
216                R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_0;
217        } else if (blend_discard_if_src_alpha_color_1(srcRGB, srcA,
218                                                      dstRGB, dstA)) {
219            blend_control |=
220                R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1;
221        }
222    }
223    return blend_control;
224}
225
226/* The hardware colormask is clunky a must be swizzled depending on the format.
227 * This was figured out by trial-and-error. */
228static unsigned bgra_cmask(unsigned mask)
229{
230    return ((mask & PIPE_MASK_R) << 2) |
231           ((mask & PIPE_MASK_B) >> 2) |
232           (mask & (PIPE_MASK_G | PIPE_MASK_A));
233}
234
235static unsigned rgba_cmask(unsigned mask)
236{
237    return mask & PIPE_MASK_RGBA;
238}
239
240static unsigned rrrr_cmask(unsigned mask)
241{
242    return (mask & PIPE_MASK_R) |
243           ((mask & PIPE_MASK_R) << 1) |
244           ((mask & PIPE_MASK_R) << 2) |
245           ((mask & PIPE_MASK_R) << 3);
246}
247
248static unsigned aaaa_cmask(unsigned mask)
249{
250    return ((mask & PIPE_MASK_A) >> 3) |
251           ((mask & PIPE_MASK_A) >> 2) |
252           ((mask & PIPE_MASK_A) >> 1) |
253           (mask & PIPE_MASK_A);
254}
255
256static unsigned grrg_cmask(unsigned mask)
257{
258    return ((mask & PIPE_MASK_R) << 1) |
259           ((mask & PIPE_MASK_R) << 2) |
260           ((mask & PIPE_MASK_G) >> 1) |
261           ((mask & PIPE_MASK_G) << 2);
262}
263
264static unsigned arra_cmask(unsigned mask)
265{
266    return ((mask & PIPE_MASK_R) << 1) |
267           ((mask & PIPE_MASK_R) << 2) |
268           ((mask & PIPE_MASK_A) >> 3) |
269           (mask & PIPE_MASK_A);
270}
271
272static unsigned blend_read_enable(unsigned eqRGB, unsigned eqA,
273                                  unsigned dstRGB, unsigned dstA,
274                                  unsigned srcRGB, unsigned srcA,
275                                  boolean src_alpha_optz)
276{
277    unsigned blend_control = 0;
278
279    /* Optimization: some operations do not require the destination color.
280     *
281     * When SRC_ALPHA_SATURATE is used, colorbuffer reads must be enabled,
282     * otherwise blending gives incorrect results. It seems to be
283     * a hardware bug. */
284    if (eqRGB == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MIN ||
285        eqRGB == PIPE_BLEND_MAX || eqA == PIPE_BLEND_MAX ||
286        dstRGB != PIPE_BLENDFACTOR_ZERO ||
287        dstA != PIPE_BLENDFACTOR_ZERO ||
288        srcRGB == PIPE_BLENDFACTOR_DST_COLOR ||
289        srcRGB == PIPE_BLENDFACTOR_DST_ALPHA ||
290        srcRGB == PIPE_BLENDFACTOR_INV_DST_COLOR ||
291        srcRGB == PIPE_BLENDFACTOR_INV_DST_ALPHA ||
292        srcA == PIPE_BLENDFACTOR_DST_COLOR ||
293        srcA == PIPE_BLENDFACTOR_DST_ALPHA ||
294        srcA == PIPE_BLENDFACTOR_INV_DST_COLOR ||
295        srcA == PIPE_BLENDFACTOR_INV_DST_ALPHA ||
296        srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) {
297        /* Enable reading from the colorbuffer. */
298        blend_control |= R300_READ_ENABLE;
299
300        if (src_alpha_optz) {
301            /* Optimization: Depending on incoming pixels, we can
302             * conditionally disable the reading in hardware... */
303            if (eqRGB != PIPE_BLEND_MIN && eqA != PIPE_BLEND_MIN &&
304                eqRGB != PIPE_BLEND_MAX && eqA != PIPE_BLEND_MAX) {
305                /* Disable reading if SRC_ALPHA == 0. */
306                if ((dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
307                     dstRGB == PIPE_BLENDFACTOR_ZERO) &&
308                    (dstA == PIPE_BLENDFACTOR_SRC_COLOR ||
309                     dstA == PIPE_BLENDFACTOR_SRC_ALPHA ||
310                     dstA == PIPE_BLENDFACTOR_ZERO) &&
311                    (srcRGB != PIPE_BLENDFACTOR_DST_COLOR &&
312                     srcRGB != PIPE_BLENDFACTOR_DST_ALPHA &&
313                     srcRGB != PIPE_BLENDFACTOR_INV_DST_COLOR &&
314                     srcRGB != PIPE_BLENDFACTOR_INV_DST_ALPHA)) {
315                     blend_control |= R500_SRC_ALPHA_0_NO_READ;
316                }
317
318                /* Disable reading if SRC_ALPHA == 1. */
319                if ((dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
320                     dstRGB == PIPE_BLENDFACTOR_ZERO) &&
321                    (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
322                     dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
323                     dstA == PIPE_BLENDFACTOR_ZERO) &&
324                    (srcRGB != PIPE_BLENDFACTOR_DST_COLOR &&
325                     srcRGB != PIPE_BLENDFACTOR_DST_ALPHA &&
326                     srcRGB != PIPE_BLENDFACTOR_INV_DST_COLOR &&
327                     srcRGB != PIPE_BLENDFACTOR_INV_DST_ALPHA)) {
328                     blend_control |= R500_SRC_ALPHA_1_NO_READ;
329                }
330            }
331        }
332    }
333    return blend_control;
334}
335
336/* Create a new blend state based on the CSO blend state.
337 *
338 * This encompasses alpha blending, logic/raster ops, and blend dithering. */
339static void* r300_create_blend_state(struct pipe_context* pipe,
340                                     const struct pipe_blend_state* state)
341{
342    struct r300_screen* r300screen = r300_screen(pipe->screen);
343    struct r300_blend_state* blend = CALLOC_STRUCT(r300_blend_state);
344    uint32_t blend_control = 0;       /* R300_RB3D_CBLEND: 0x4e04 */
345    uint32_t blend_control_noclamp = 0;    /* R300_RB3D_CBLEND: 0x4e04 */
346    uint32_t blend_control_noalpha = 0;    /* R300_RB3D_CBLEND: 0x4e04 */
347    uint32_t blend_control_noalpha_noclamp = 0;    /* R300_RB3D_CBLEND: 0x4e04 */
348    uint32_t alpha_blend_control = 0; /* R300_RB3D_ABLEND: 0x4e08 */
349    uint32_t alpha_blend_control_noclamp = 0; /* R300_RB3D_ABLEND: 0x4e08 */
350    uint32_t alpha_blend_control_noalpha = 0; /* R300_RB3D_ABLEND: 0x4e08 */
351    uint32_t alpha_blend_control_noalpha_noclamp = 0; /* R300_RB3D_ABLEND: 0x4e08 */
352    uint32_t rop = 0;                 /* R300_RB3D_ROPCNTL: 0x4e18 */
353    uint32_t dither = 0;              /* R300_RB3D_DITHER_CTL: 0x4e50 */
354    int i;
355
356    const unsigned eqRGB = state->rt[0].rgb_func;
357    const unsigned srcRGB = state->rt[0].rgb_src_factor;
358    const unsigned dstRGB = state->rt[0].rgb_dst_factor;
359
360    const unsigned eqA = state->rt[0].alpha_func;
361    const unsigned srcA = state->rt[0].alpha_src_factor;
362    const unsigned dstA = state->rt[0].alpha_dst_factor;
363
364    unsigned srcRGBX = srcRGB;
365    unsigned dstRGBX = dstRGB;
366    CB_LOCALS;
367
368    blend->state = *state;
369
370    /* force DST_ALPHA to ONE where we can */
371    switch (srcRGBX) {
372    case PIPE_BLENDFACTOR_DST_ALPHA:
373        srcRGBX = PIPE_BLENDFACTOR_ONE;
374        break;
375    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
376        srcRGBX = PIPE_BLENDFACTOR_ZERO;
377        break;
378    }
379
380    switch (dstRGBX) {
381    case PIPE_BLENDFACTOR_DST_ALPHA:
382        dstRGBX = PIPE_BLENDFACTOR_ONE;
383        break;
384    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
385        dstRGBX = PIPE_BLENDFACTOR_ZERO;
386        break;
387    }
388
389    /* Get blending register values. */
390    if (state->rt[0].blend_enable) {
391        unsigned blend_eq, blend_eq_noclamp;
392
393        /* despite the name, ALPHA_BLEND_ENABLE has nothing to do with alpha,
394         * this is just the crappy D3D naming */
395        blend_control = blend_control_noclamp =
396            R300_ALPHA_BLEND_ENABLE |
397            ( r300_translate_blend_factor(srcRGB) << R300_SRC_BLEND_SHIFT) |
398            ( r300_translate_blend_factor(dstRGB) << R300_DST_BLEND_SHIFT);
399
400        blend_control_noalpha = blend_control_noalpha_noclamp =
401            R300_ALPHA_BLEND_ENABLE |
402            ( r300_translate_blend_factor(srcRGBX) << R300_SRC_BLEND_SHIFT) |
403            ( r300_translate_blend_factor(dstRGBX) << R300_DST_BLEND_SHIFT);
404
405        blend_eq = r300_translate_blend_function(eqRGB, TRUE);
406        blend_eq_noclamp = r300_translate_blend_function(eqRGB, FALSE);
407
408        blend_control |= blend_eq;
409        blend_control_noalpha |= blend_eq;
410        blend_control_noclamp |= blend_eq_noclamp;
411        blend_control_noalpha_noclamp |= blend_eq_noclamp;
412
413        /* Optimization: some operations do not require the destination color. */
414        blend_control |= blend_read_enable(eqRGB, eqA, dstRGB, dstA,
415                                           srcRGB, srcA, r300screen->caps.is_r500);
416        blend_control_noclamp |= blend_read_enable(eqRGB, eqA, dstRGB, dstA,
417                                                   srcRGB, srcA, FALSE);
418        blend_control_noalpha |= blend_read_enable(eqRGB, eqA, dstRGBX, dstA,
419                                                   srcRGBX, srcA, r300screen->caps.is_r500);
420        blend_control_noalpha_noclamp |= blend_read_enable(eqRGB, eqA, dstRGBX, dstA,
421                                                           srcRGBX, srcA, FALSE);
422
423        /* Optimization: discard pixels which don't change the colorbuffer.
424         * It cannot be used with FP16 AA. */
425        blend_control |= blend_discard_conditionally(eqRGB, eqA, dstRGB, dstA,
426                                                     srcRGB, srcA);
427        blend_control_noalpha |= blend_discard_conditionally(eqRGB, eqA, dstRGBX, dstA,
428                                                             srcRGBX, srcA);
429
430        /* separate alpha */
431        if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
432            blend_control |= R300_SEPARATE_ALPHA_ENABLE;
433            blend_control_noclamp |= R300_SEPARATE_ALPHA_ENABLE;
434
435            alpha_blend_control = alpha_blend_control_noclamp =
436                (r300_translate_blend_factor(srcA) << R300_SRC_BLEND_SHIFT) |
437                (r300_translate_blend_factor(dstA) << R300_DST_BLEND_SHIFT);
438            alpha_blend_control |= r300_translate_blend_function(eqA, TRUE);
439            alpha_blend_control_noclamp |= r300_translate_blend_function(eqA, FALSE);
440        }
441        if (srcA != srcRGBX || dstA != dstRGBX || eqA != eqRGB) {
442            blend_control_noalpha |= R300_SEPARATE_ALPHA_ENABLE;
443            blend_control_noalpha_noclamp |= R300_SEPARATE_ALPHA_ENABLE;
444
445            alpha_blend_control_noalpha = alpha_blend_control_noalpha_noclamp =
446                (r300_translate_blend_factor(srcA) << R300_SRC_BLEND_SHIFT) |
447                (r300_translate_blend_factor(dstA) << R300_DST_BLEND_SHIFT);
448            alpha_blend_control_noalpha |= r300_translate_blend_function(eqA, TRUE);
449            alpha_blend_control_noalpha_noclamp |= r300_translate_blend_function(eqA, FALSE);
450        }
451    }
452
453    /* PIPE_LOGICOP_* don't need to be translated, fortunately. */
454    if (state->logicop_enable) {
455        rop = R300_RB3D_ROPCNTL_ROP_ENABLE |
456                (state->logicop_func) << R300_RB3D_ROPCNTL_ROP_SHIFT;
457    }
458
459    /* Neither fglrx nor classic r300 ever set this, regardless of dithering
460     * state. Since it's an optional implementation detail, we can leave it
461     * out and never dither.
462     *
463     * This could be revisited if we ever get quality or conformance hints.
464     *
465    if (state->dither) {
466        dither = R300_RB3D_DITHER_CTL_DITHER_MODE_LUT |
467                        R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_LUT;
468    }
469    */
470
471    /* Build a command buffer. */
472    {
473        unsigned (*func[COLORMASK_NUM_SWIZZLES])(unsigned) = {
474            bgra_cmask,
475            rgba_cmask,
476            rrrr_cmask,
477            aaaa_cmask,
478            grrg_cmask,
479            arra_cmask,
480            bgra_cmask,
481            rgba_cmask
482        };
483
484        for (i = 0; i < COLORMASK_NUM_SWIZZLES; i++) {
485            boolean has_alpha = i != COLORMASK_RGBX && i != COLORMASK_BGRX;
486
487            BEGIN_CB(blend->cb_clamp[i], 8);
488            OUT_CB_REG(R300_RB3D_ROPCNTL, rop);
489            OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3);
490            OUT_CB(has_alpha ? blend_control : blend_control_noalpha);
491            OUT_CB(has_alpha ? alpha_blend_control : alpha_blend_control_noalpha);
492            OUT_CB(func[i](state->rt[0].colormask));
493            OUT_CB_REG(R300_RB3D_DITHER_CTL, dither);
494            END_CB;
495        }
496    }
497
498    /* Build a command buffer (for RGBA16F). */
499    BEGIN_CB(blend->cb_noclamp, 8);
500    OUT_CB_REG(R300_RB3D_ROPCNTL, rop);
501    OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3);
502    OUT_CB(blend_control_noclamp);
503    OUT_CB(alpha_blend_control_noclamp);
504    OUT_CB(rgba_cmask(state->rt[0].colormask));
505    OUT_CB_REG(R300_RB3D_DITHER_CTL, dither);
506    END_CB;
507
508    /* Build a command buffer (for RGB16F). */
509    BEGIN_CB(blend->cb_noclamp_noalpha, 8);
510    OUT_CB_REG(R300_RB3D_ROPCNTL, rop);
511    OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3);
512    OUT_CB(blend_control_noalpha_noclamp);
513    OUT_CB(alpha_blend_control_noalpha_noclamp);
514    OUT_CB(rgba_cmask(state->rt[0].colormask));
515    OUT_CB_REG(R300_RB3D_DITHER_CTL, dither);
516    END_CB;
517
518    /* The same as above, but with no colorbuffer reads and writes. */
519    BEGIN_CB(blend->cb_no_readwrite, 8);
520    OUT_CB_REG(R300_RB3D_ROPCNTL, rop);
521    OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3);
522    OUT_CB(0);
523    OUT_CB(0);
524    OUT_CB(0);
525    OUT_CB_REG(R300_RB3D_DITHER_CTL, dither);
526    END_CB;
527
528    return (void*)blend;
529}
530
531/* Bind blend state. */
532static void r300_bind_blend_state(struct pipe_context* pipe,
533                                  void* state)
534{
535    struct r300_context* r300 = r300_context(pipe);
536    struct r300_blend_state *blend  = (struct r300_blend_state*)state;
537    boolean last_alpha_to_one = r300->alpha_to_one;
538    boolean last_alpha_to_coverage = r300->alpha_to_coverage;
539
540    UPDATE_STATE(state, r300->blend_state);
541
542    if (!blend)
543        return;
544
545    r300->alpha_to_one = blend->state.alpha_to_one;
546    r300->alpha_to_coverage = blend->state.alpha_to_coverage;
547
548    if (r300->alpha_to_one != last_alpha_to_one && r300->msaa_enable &&
549        r300->fs_status == FRAGMENT_SHADER_VALID) {
550        r300->fs_status = FRAGMENT_SHADER_MAYBE_DIRTY;
551    }
552
553    if (r300->alpha_to_coverage != last_alpha_to_coverage &&
554        r300->msaa_enable) {
555        r300_mark_atom_dirty(r300, &r300->dsa_state);
556    }
557}
558
559/* Free blend state. */
560static void r300_delete_blend_state(struct pipe_context* pipe,
561                                    void* state)
562{
563    FREE(state);
564}
565
566/* Convert float to 10bit integer */
567static unsigned float_to_fixed10(float f)
568{
569    return CLAMP((unsigned)(f * 1023.9f), 0, 1023);
570}
571
572/* Set blend color.
573 * Setup both R300 and R500 registers, figure out later which one to write. */
574static void r300_set_blend_color(struct pipe_context* pipe,
575                                 const struct pipe_blend_color* color)
576{
577    struct r300_context* r300 = r300_context(pipe);
578    struct pipe_framebuffer_state *fb = r300->fb_state.state;
579    struct r300_blend_color_state *state =
580        (struct r300_blend_color_state*)r300->blend_color_state.state;
581    struct pipe_blend_color c;
582    enum pipe_format format = fb->nr_cbufs ? fb->cbufs[0]->format : 0;
583    float tmp;
584    CB_LOCALS;
585
586    state->state = *color; /* Save it, so that we can reuse it in set_fb_state */
587    c = *color;
588
589    /* The blend color is dependent on the colorbuffer format. */
590    if (fb->nr_cbufs) {
591        switch (format) {
592        case PIPE_FORMAT_R8_UNORM:
593        case PIPE_FORMAT_L8_UNORM:
594        case PIPE_FORMAT_I8_UNORM:
595            c.color[1] = c.color[0];
596            break;
597
598        case PIPE_FORMAT_A8_UNORM:
599            c.color[1] = c.color[3];
600            break;
601
602        case PIPE_FORMAT_R8G8_UNORM:
603            c.color[2] = c.color[1];
604            break;
605
606        case PIPE_FORMAT_L8A8_UNORM:
607        case PIPE_FORMAT_R8A8_UNORM:
608            c.color[2] = c.color[3];
609            break;
610
611        case PIPE_FORMAT_R8G8B8A8_UNORM:
612        case PIPE_FORMAT_R8G8B8X8_UNORM:
613            tmp = c.color[0];
614            c.color[0] = c.color[2];
615            c.color[2] = tmp;
616            break;
617
618        default:;
619        }
620    }
621
622    if (r300->screen->caps.is_r500) {
623        BEGIN_CB(state->cb, 3);
624        OUT_CB_REG_SEQ(R500_RB3D_CONSTANT_COLOR_AR, 2);
625
626        switch (format) {
627        case PIPE_FORMAT_R16G16B16A16_FLOAT:
628        case PIPE_FORMAT_R16G16B16X16_FLOAT:
629            OUT_CB(util_float_to_half(c.color[2]) |
630                   (util_float_to_half(c.color[3]) << 16));
631            OUT_CB(util_float_to_half(c.color[0]) |
632                   (util_float_to_half(c.color[1]) << 16));
633            break;
634
635        default:
636            OUT_CB(float_to_fixed10(c.color[0]) |
637                   (float_to_fixed10(c.color[3]) << 16));
638            OUT_CB(float_to_fixed10(c.color[2]) |
639                   (float_to_fixed10(c.color[1]) << 16));
640        }
641
642        END_CB;
643    } else {
644        union util_color uc;
645        util_pack_color(c.color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
646
647        BEGIN_CB(state->cb, 2);
648        OUT_CB_REG(R300_RB3D_BLEND_COLOR, uc.ui);
649        END_CB;
650    }
651
652    r300_mark_atom_dirty(r300, &r300->blend_color_state);
653}
654
655static void r300_set_clip_state(struct pipe_context* pipe,
656                                const struct pipe_clip_state* state)
657{
658    struct r300_context* r300 = r300_context(pipe);
659    struct r300_clip_state *clip =
660            (struct r300_clip_state*)r300->clip_state.state;
661    CB_LOCALS;
662
663    if (r300->screen->caps.has_tcl) {
664        BEGIN_CB(clip->cb, r300->clip_state.size);
665        OUT_CB_REG(R300_VAP_PVS_VECTOR_INDX_REG,
666                   (r300->screen->caps.is_r500 ?
667                    R500_PVS_UCP_START : R300_PVS_UCP_START));
668        OUT_CB_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, 6 * 4);
669        OUT_CB_TABLE(state->ucp, 6 * 4);
670        END_CB;
671
672        r300_mark_atom_dirty(r300, &r300->clip_state);
673    } else {
674        draw_set_clip_state(r300->draw, state);
675    }
676}
677
678/* Create a new depth, stencil, and alpha state based on the CSO dsa state.
679 *
680 * This contains the depth buffer, stencil buffer, alpha test, and such.
681 * On the Radeon, depth and stencil buffer setup are intertwined, which is
682 * the reason for some of the strange-looking assignments across registers. */
683static void* r300_create_dsa_state(struct pipe_context* pipe,
684                          const struct pipe_depth_stencil_alpha_state* state)
685{
686    boolean is_r500 = r300_screen(pipe->screen)->caps.is_r500;
687    struct r300_dsa_state* dsa = CALLOC_STRUCT(r300_dsa_state);
688    CB_LOCALS;
689    uint32_t alpha_value_fp16 = 0;
690    uint32_t z_buffer_control = 0;
691    uint32_t z_stencil_control = 0;
692    uint32_t stencil_ref_mask = 0;
693    uint32_t stencil_ref_bf = 0;
694
695    dsa->dsa = *state;
696
697    /* Depth test setup. - separate write mask depth for decomp flush */
698    if (state->depth.writemask) {
699        z_buffer_control |= R300_Z_WRITE_ENABLE;
700    }
701
702    if (state->depth.enabled) {
703        z_buffer_control |= R300_Z_ENABLE;
704
705        z_stencil_control |=
706            (r300_translate_depth_stencil_function(state->depth.func) <<
707                R300_Z_FUNC_SHIFT);
708    }
709
710    /* Stencil buffer setup. */
711    if (state->stencil[0].enabled) {
712        z_buffer_control |= R300_STENCIL_ENABLE;
713        z_stencil_control |=
714            (r300_translate_depth_stencil_function(state->stencil[0].func) <<
715                R300_S_FRONT_FUNC_SHIFT) |
716            (r300_translate_stencil_op(state->stencil[0].fail_op) <<
717                R300_S_FRONT_SFAIL_OP_SHIFT) |
718            (r300_translate_stencil_op(state->stencil[0].zpass_op) <<
719                R300_S_FRONT_ZPASS_OP_SHIFT) |
720            (r300_translate_stencil_op(state->stencil[0].zfail_op) <<
721                R300_S_FRONT_ZFAIL_OP_SHIFT);
722
723        stencil_ref_mask =
724                (state->stencil[0].valuemask << R300_STENCILMASK_SHIFT) |
725                (state->stencil[0].writemask << R300_STENCILWRITEMASK_SHIFT);
726
727        if (state->stencil[1].enabled) {
728            dsa->two_sided = TRUE;
729
730            z_buffer_control |= R300_STENCIL_FRONT_BACK;
731            z_stencil_control |=
732            (r300_translate_depth_stencil_function(state->stencil[1].func) <<
733                R300_S_BACK_FUNC_SHIFT) |
734            (r300_translate_stencil_op(state->stencil[1].fail_op) <<
735                R300_S_BACK_SFAIL_OP_SHIFT) |
736            (r300_translate_stencil_op(state->stencil[1].zpass_op) <<
737                R300_S_BACK_ZPASS_OP_SHIFT) |
738            (r300_translate_stencil_op(state->stencil[1].zfail_op) <<
739                R300_S_BACK_ZFAIL_OP_SHIFT);
740
741            stencil_ref_bf =
742                (state->stencil[1].valuemask << R300_STENCILMASK_SHIFT) |
743                (state->stencil[1].writemask << R300_STENCILWRITEMASK_SHIFT);
744
745            if (is_r500) {
746                z_buffer_control |= R500_STENCIL_REFMASK_FRONT_BACK;
747            } else {
748                dsa->two_sided_stencil_ref =
749                  (state->stencil[0].valuemask != state->stencil[1].valuemask ||
750                   state->stencil[0].writemask != state->stencil[1].writemask);
751            }
752        }
753    }
754
755    /* Alpha test setup. */
756    if (state->alpha.enabled) {
757        dsa->alpha_function =
758            r300_translate_alpha_function(state->alpha.func) |
759            R300_FG_ALPHA_FUNC_ENABLE;
760
761        dsa->alpha_function |= float_to_ubyte(state->alpha.ref_value);
762        alpha_value_fp16 = util_float_to_half(state->alpha.ref_value);
763    }
764
765    BEGIN_CB(&dsa->cb_begin, 8);
766    OUT_CB_REG_SEQ(R300_ZB_CNTL, 3);
767    OUT_CB(z_buffer_control);
768    OUT_CB(z_stencil_control);
769    OUT_CB(stencil_ref_mask);
770    OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, stencil_ref_bf);
771    OUT_CB_REG(R500_FG_ALPHA_VALUE, alpha_value_fp16);
772    END_CB;
773
774    BEGIN_CB(dsa->cb_zb_no_readwrite, 8);
775    OUT_CB_REG_SEQ(R300_ZB_CNTL, 3);
776    OUT_CB(0);
777    OUT_CB(0);
778    OUT_CB(0);
779    OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, 0);
780    OUT_CB_REG(R500_FG_ALPHA_VALUE, alpha_value_fp16);
781    END_CB;
782
783    return (void*)dsa;
784}
785
786static void r300_dsa_inject_stencilref(struct r300_context *r300)
787{
788    struct r300_dsa_state *dsa =
789            (struct r300_dsa_state*)r300->dsa_state.state;
790
791    if (!dsa)
792        return;
793
794    dsa->stencil_ref_mask =
795        (dsa->stencil_ref_mask & ~R300_STENCILREF_MASK) |
796        r300->stencil_ref.ref_value[0];
797    dsa->stencil_ref_bf =
798        (dsa->stencil_ref_bf & ~R300_STENCILREF_MASK) |
799        r300->stencil_ref.ref_value[1];
800}
801
802/* Bind DSA state. */
803static void r300_bind_dsa_state(struct pipe_context* pipe,
804                                void* state)
805{
806    struct r300_context* r300 = r300_context(pipe);
807
808    if (!state) {
809        return;
810    }
811
812    UPDATE_STATE(state, r300->dsa_state);
813
814    r300_mark_atom_dirty(r300, &r300->hyperz_state); /* Will be updated before the emission. */
815    r300_dsa_inject_stencilref(r300);
816}
817
818/* Free DSA state. */
819static void r300_delete_dsa_state(struct pipe_context* pipe,
820                                  void* state)
821{
822    FREE(state);
823}
824
825static void r300_set_stencil_ref(struct pipe_context* pipe,
826                                 const struct pipe_stencil_ref* sr)
827{
828    struct r300_context* r300 = r300_context(pipe);
829
830    r300->stencil_ref = *sr;
831
832    r300_dsa_inject_stencilref(r300);
833    r300_mark_atom_dirty(r300, &r300->dsa_state);
834}
835
836static void r300_tex_set_tiling_flags(struct r300_context *r300,
837                                      struct r300_resource *tex,
838                                      unsigned level)
839{
840    /* Check if the macrotile flag needs to be changed.
841     * Skip changing the flags otherwise. */
842    if (tex->tex.macrotile[tex->surface_level] !=
843        tex->tex.macrotile[level]) {
844        r300->rws->buffer_set_tiling(tex->buf, r300->cs,
845                tex->tex.microtile, tex->tex.macrotile[level],
846                0, 0, 0, 0, 0,
847                tex->tex.stride_in_bytes[0]);
848
849        tex->surface_level = level;
850    }
851}
852
853/* This switcheroo is needed just because of goddamned MACRO_SWITCH. */
854static void r300_fb_set_tiling_flags(struct r300_context *r300,
855                               const struct pipe_framebuffer_state *state)
856{
857    unsigned i;
858
859    /* Set tiling flags for new surfaces. */
860    for (i = 0; i < state->nr_cbufs; i++) {
861        r300_tex_set_tiling_flags(r300,
862                                  r300_resource(state->cbufs[i]->texture),
863                                  state->cbufs[i]->u.tex.level);
864    }
865    if (state->zsbuf) {
866        r300_tex_set_tiling_flags(r300,
867                                  r300_resource(state->zsbuf->texture),
868                                  state->zsbuf->u.tex.level);
869    }
870}
871
872static void r300_print_fb_surf_info(struct pipe_surface *surf, unsigned index,
873                                    const char *binding)
874{
875    struct pipe_resource *tex = surf->texture;
876    struct r300_resource *rtex = r300_resource(tex);
877
878    fprintf(stderr,
879            "r300:   %s[%i] Dim: %ix%i, Firstlayer: %i, "
880            "Lastlayer: %i, Level: %i, Format: %s\n"
881
882            "r300:     TEX: Macro: %s, Micro: %s, "
883            "Dim: %ix%ix%i, LastLevel: %i, Format: %s\n",
884
885            binding, index, surf->width, surf->height,
886            surf->u.tex.first_layer, surf->u.tex.last_layer, surf->u.tex.level,
887            util_format_short_name(surf->format),
888
889            rtex->tex.macrotile[0] ? "YES" : " NO",
890            rtex->tex.microtile ? "YES" : " NO",
891            tex->width0, tex->height0, tex->depth0,
892            tex->last_level, util_format_short_name(surf->format));
893}
894
895void r300_mark_fb_state_dirty(struct r300_context *r300,
896                              enum r300_fb_state_change change)
897{
898    struct pipe_framebuffer_state *state = r300->fb_state.state;
899
900    r300_mark_atom_dirty(r300, &r300->gpu_flush);
901    r300_mark_atom_dirty(r300, &r300->fb_state);
902
903    /* What is marked as dirty depends on the enum r300_fb_state_change. */
904    if (change == R300_CHANGED_FB_STATE) {
905        r300_mark_atom_dirty(r300, &r300->aa_state);
906        r300_mark_atom_dirty(r300, &r300->dsa_state); /* for AlphaRef */
907        r300_set_blend_color(&r300->context, r300->blend_color_state.state);
908    }
909
910    if (change == R300_CHANGED_FB_STATE ||
911        change == R300_CHANGED_HYPERZ_FLAG) {
912        r300_mark_atom_dirty(r300, &r300->hyperz_state);
913    }
914
915    if (change == R300_CHANGED_FB_STATE ||
916        change == R300_CHANGED_MULTIWRITE) {
917        r300_mark_atom_dirty(r300, &r300->fb_state_pipelined);
918    }
919
920    /* Now compute the fb_state atom size. */
921    r300->fb_state.size = 2 + (8 * state->nr_cbufs);
922
923    if (r300->cbzb_clear)
924        r300->fb_state.size += 10;
925    else if (state->zsbuf) {
926        r300->fb_state.size += 10;
927        if (r300->hyperz_enabled)
928            r300->fb_state.size += 8;
929    }
930
931    if (r300->cmask_in_use) {
932        r300->fb_state.size += 6;
933        if (r300->screen->caps.is_r500 && r300->screen->info.drm_minor >= 29) {
934            r300->fb_state.size += 3;
935        }
936    }
937
938    /* The size of the rest of atoms stays the same. */
939}
940
941static unsigned r300_get_num_samples(struct r300_context *r300)
942{
943    struct pipe_framebuffer_state* fb =
944            (struct pipe_framebuffer_state*)r300->fb_state.state;
945    unsigned i, num_samples;
946
947    if (!fb->nr_cbufs && !fb->zsbuf)
948        return 1;
949
950    num_samples = 6;
951
952    for (i = 0; i < fb->nr_cbufs; i++)
953        num_samples = MIN2(num_samples, fb->cbufs[i]->texture->nr_samples);
954
955    if (fb->zsbuf)
956        num_samples = MIN2(num_samples, fb->zsbuf->texture->nr_samples);
957
958    if (!num_samples)
959        num_samples = 1;
960
961    return num_samples;
962}
963
964static void
965r300_set_framebuffer_state(struct pipe_context* pipe,
966                           const struct pipe_framebuffer_state* state)
967{
968    struct r300_context* r300 = r300_context(pipe);
969    struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state;
970    struct pipe_framebuffer_state *old_state = r300->fb_state.state;
971    unsigned max_width, max_height, i;
972    uint32_t zbuffer_bpp = 0;
973    boolean unlock_zbuffer = FALSE;
974
975    if (r300->screen->caps.is_r500) {
976        max_width = max_height = 4096;
977    } else if (r300->screen->caps.is_r400) {
978        max_width = max_height = 4021;
979    } else {
980        max_width = max_height = 2560;
981    }
982
983    if (state->width > max_width || state->height > max_height) {
984        fprintf(stderr, "r300: Implementation error: Render targets are too "
985        "big in %s, refusing to bind framebuffer state!\n", __FUNCTION__);
986        return;
987    }
988
989    if (old_state->zsbuf && r300->zmask_in_use && !r300->locked_zbuffer) {
990        /* There is a zmask in use, what are we gonna do? */
991        if (state->zsbuf) {
992            if (!pipe_surface_equal(old_state->zsbuf, state->zsbuf)) {
993                /* Decompress the currently bound zbuffer before we bind another one. */
994                r300_decompress_zmask(r300);
995                r300->hiz_in_use = FALSE;
996            }
997        } else {
998            /* We don't bind another zbuffer, so lock the current one. */
999            pipe_surface_reference(&r300->locked_zbuffer, old_state->zsbuf);
1000        }
1001    } else if (r300->locked_zbuffer) {
1002        /* We have a locked zbuffer now, what are we gonna do? */
1003        if (state->zsbuf) {
1004            if (!pipe_surface_equal(r300->locked_zbuffer, state->zsbuf)) {
1005                /* We are binding some other zbuffer, so decompress the locked one,
1006                 * it gets unlocked automatically. */
1007                r300_decompress_zmask_locked_unsafe(r300);
1008                r300->hiz_in_use = FALSE;
1009            } else {
1010                /* We are binding the locked zbuffer again, so unlock it. */
1011                unlock_zbuffer = TRUE;
1012            }
1013        }
1014    }
1015    assert(state->zsbuf || (r300->locked_zbuffer && !unlock_zbuffer) || !r300->zmask_in_use);
1016
1017    /* Set whether CMASK can be used. */
1018    r300->cmask_in_use =
1019        state->nr_cbufs == 1 &&
1020        r300->screen->cmask_resource == state->cbufs[0]->texture;
1021
1022    /* Need to reset clamping or colormask. */
1023    r300_mark_atom_dirty(r300, &r300->blend_state);
1024
1025    /* Re-swizzle the blend color. */
1026    r300_set_blend_color(pipe, &((struct r300_blend_color_state*)r300->blend_color_state.state)->state);
1027
1028    /* If zsbuf is set from NULL to non-NULL or vice versa.. */
1029    if (!!old_state->zsbuf != !!state->zsbuf) {
1030        r300_mark_atom_dirty(r300, &r300->dsa_state);
1031    }
1032
1033    if (r300->screen->info.drm_minor < 12) {
1034       /* The tiling flags are dependent on the surface miplevel, unfortunately.
1035        * This workarounds a bad design decision in old kernels which were
1036        * rewriting tile fields in registers. */
1037        r300_fb_set_tiling_flags(r300, state);
1038    }
1039
1040    util_copy_framebuffer_state(r300->fb_state.state, state);
1041
1042    if (unlock_zbuffer) {
1043        pipe_surface_reference(&r300->locked_zbuffer, NULL);
1044    }
1045
1046    r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE);
1047
1048    if (state->zsbuf) {
1049        switch (util_format_get_blocksize(state->zsbuf->format)) {
1050        case 2:
1051            zbuffer_bpp = 16;
1052            break;
1053        case 4:
1054            zbuffer_bpp = 24;
1055            break;
1056        }
1057
1058        /* Polygon offset depends on the zbuffer bit depth. */
1059        if (r300->zbuffer_bpp != zbuffer_bpp) {
1060            r300->zbuffer_bpp = zbuffer_bpp;
1061
1062            if (r300->polygon_offset_enabled)
1063                r300_mark_atom_dirty(r300, &r300->rs_state);
1064        }
1065    }
1066
1067    r300->num_samples = r300_get_num_samples(r300);
1068
1069    /* Set up AA config. */
1070    if (r300->num_samples > 1) {
1071        switch (r300->num_samples) {
1072        case 2:
1073            aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE |
1074                            R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2;
1075            break;
1076        case 4:
1077            aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE |
1078                            R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4;
1079            break;
1080        case 6:
1081            aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE |
1082                            R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6;
1083            break;
1084        }
1085    } else {
1086        aa->aa_config = 0;
1087    }
1088
1089    if (DBG_ON(r300, DBG_FB)) {
1090        fprintf(stderr, "r300: set_framebuffer_state:\n");
1091        for (i = 0; i < state->nr_cbufs; i++) {
1092            r300_print_fb_surf_info(state->cbufs[i], i, "CB");
1093        }
1094        if (state->zsbuf) {
1095            r300_print_fb_surf_info(state->zsbuf, 0, "ZB");
1096        }
1097    }
1098}
1099
1100/* Create fragment shader state. */
1101static void* r300_create_fs_state(struct pipe_context* pipe,
1102                                  const struct pipe_shader_state* shader)
1103{
1104    struct r300_fragment_shader* fs = NULL;
1105
1106    fs = (struct r300_fragment_shader*)CALLOC_STRUCT(r300_fragment_shader);
1107
1108    /* Copy state directly into shader. */
1109    fs->state = *shader;
1110    fs->state.tokens = tgsi_dup_tokens(shader->tokens);
1111
1112    return (void*)fs;
1113}
1114
1115void r300_mark_fs_code_dirty(struct r300_context *r300)
1116{
1117    struct r300_fragment_shader* fs = r300_fs(r300);
1118
1119    r300_mark_atom_dirty(r300, &r300->fs);
1120    r300_mark_atom_dirty(r300, &r300->fs_rc_constant_state);
1121    r300_mark_atom_dirty(r300, &r300->fs_constants);
1122    r300->fs.size = fs->shader->cb_code_size;
1123
1124    if (r300->screen->caps.is_r500) {
1125        r300->fs_rc_constant_state.size = fs->shader->rc_state_count * 7;
1126        r300->fs_constants.size = fs->shader->externals_count * 4 + 3;
1127    } else {
1128        r300->fs_rc_constant_state.size = fs->shader->rc_state_count * 5;
1129        r300->fs_constants.size = fs->shader->externals_count * 4 + 1;
1130    }
1131
1132    ((struct r300_constant_buffer*)r300->fs_constants.state)->remap_table =
1133            fs->shader->code.constants_remap_table;
1134}
1135
1136/* Bind fragment shader state. */
1137static void r300_bind_fs_state(struct pipe_context* pipe, void* shader)
1138{
1139    struct r300_context* r300 = r300_context(pipe);
1140    struct r300_fragment_shader* fs = (struct r300_fragment_shader*)shader;
1141
1142    if (fs == NULL) {
1143        r300->fs.state = NULL;
1144        return;
1145    }
1146
1147    r300->fs.state = fs;
1148    r300->fs_status = FRAGMENT_SHADER_DIRTY;
1149
1150    r300_mark_atom_dirty(r300, &r300->rs_block_state); /* Will be updated before the emission. */
1151}
1152
1153/* Delete fragment shader state. */
1154static void r300_delete_fs_state(struct pipe_context* pipe, void* shader)
1155{
1156    struct r300_fragment_shader* fs = (struct r300_fragment_shader*)shader;
1157    struct r300_fragment_shader_code *tmp, *ptr = fs->first;
1158
1159    while (ptr) {
1160        tmp = ptr;
1161        ptr = ptr->next;
1162        rc_constants_destroy(&tmp->code.constants);
1163        FREE(tmp->cb_code);
1164        FREE(tmp);
1165    }
1166    FREE((void*)fs->state.tokens);
1167    FREE(shader);
1168}
1169
1170static void r300_set_polygon_stipple(struct pipe_context* pipe,
1171                                     const struct pipe_poly_stipple* state)
1172{
1173    /* XXX no idea how to set this up, but not terribly important */
1174}
1175
1176/* Create a new rasterizer state based on the CSO rasterizer state.
1177 *
1178 * This is a very large chunk of state, and covers most of the graphics
1179 * backend (GB), geometry assembly (GA), and setup unit (SU) blocks.
1180 *
1181 * In a not entirely unironic sidenote, this state has nearly nothing to do
1182 * with the actual block on the Radeon called the rasterizer (RS). */
1183static void* r300_create_rs_state(struct pipe_context* pipe,
1184                                  const struct pipe_rasterizer_state* state)
1185{
1186    struct r300_rs_state* rs = CALLOC_STRUCT(r300_rs_state);
1187    uint32_t vap_control_status;    /* R300_VAP_CNTL_STATUS: 0x2140 */
1188    uint32_t vap_clip_cntl;         /* R300_VAP_CLIP_CNTL: 0x221C */
1189    uint32_t point_size;            /* R300_GA_POINT_SIZE: 0x421c */
1190    uint32_t point_minmax;          /* R300_GA_POINT_MINMAX: 0x4230 */
1191    uint32_t line_control;          /* R300_GA_LINE_CNTL: 0x4234 */
1192    uint32_t polygon_offset_enable; /* R300_SU_POLY_OFFSET_ENABLE: 0x42b4 */
1193    uint32_t cull_mode;             /* R300_SU_CULL_MODE: 0x42b8 */
1194    uint32_t line_stipple_config;   /* R300_GA_LINE_STIPPLE_CONFIG: 0x4328 */
1195    uint32_t line_stipple_value;    /* R300_GA_LINE_STIPPLE_VALUE: 0x4260 */
1196    uint32_t polygon_mode;          /* R300_GA_POLY_MODE: 0x4288 */
1197    uint32_t clip_rule;             /* R300_SC_CLIP_RULE: 0x43D0 */
1198    uint32_t round_mode;            /* R300_GA_ROUND_MODE: 0x428c */
1199
1200    /* Point sprites texture coordinates, 0: lower left, 1: upper right */
1201    float point_texcoord_left = 0;  /* R300_GA_POINT_S0: 0x4200 */
1202    float point_texcoord_bottom = 0;/* R300_GA_POINT_T0: 0x4204 */
1203    float point_texcoord_right = 1; /* R300_GA_POINT_S1: 0x4208 */
1204    float point_texcoord_top = 0;   /* R300_GA_POINT_T1: 0x420c */
1205    boolean vclamp = !r300_context(pipe)->screen->caps.is_r500;
1206    CB_LOCALS;
1207
1208    /* Copy rasterizer state. */
1209    rs->rs = *state;
1210    rs->rs_draw = *state;
1211
1212    rs->rs.sprite_coord_enable = state->point_quad_rasterization *
1213                                 state->sprite_coord_enable;
1214
1215    /* Override some states for Draw. */
1216    rs->rs_draw.sprite_coord_enable = 0; /* We can do this in HW. */
1217    rs->rs_draw.offset_point = 0;
1218    rs->rs_draw.offset_line = 0;
1219    rs->rs_draw.offset_tri = 0;
1220    rs->rs_draw.offset_clamp = 0;
1221
1222#ifdef PIPE_ARCH_LITTLE_ENDIAN
1223    vap_control_status = R300_VC_NO_SWAP;
1224#else
1225    vap_control_status = R300_VC_32BIT_SWAP;
1226#endif
1227
1228    /* If no TCL engine is present, turn off the HW TCL. */
1229    if (!r300_screen(pipe->screen)->caps.has_tcl) {
1230        vap_control_status |= R300_VAP_TCL_BYPASS;
1231    }
1232
1233    /* Point size width and height. */
1234    point_size =
1235        pack_float_16_6x(state->point_size) |
1236        (pack_float_16_6x(state->point_size) << R300_POINTSIZE_X_SHIFT);
1237
1238    /* Point size clamping. */
1239    if (state->point_size_per_vertex) {
1240        /* Per-vertex point size.
1241         * Clamp to [0, max FB size] */
1242        float min_psiz = util_get_min_point_size(state);
1243        float max_psiz = pipe->screen->get_paramf(pipe->screen,
1244                                        PIPE_CAPF_MAX_POINT_WIDTH);
1245        point_minmax =
1246            (pack_float_16_6x(min_psiz) << R300_GA_POINT_MINMAX_MIN_SHIFT) |
1247            (pack_float_16_6x(max_psiz) << R300_GA_POINT_MINMAX_MAX_SHIFT);
1248    } else {
1249        /* We cannot disable the point-size vertex output,
1250         * so clamp it. */
1251        float psiz = state->point_size;
1252        point_minmax =
1253            (pack_float_16_6x(psiz) << R300_GA_POINT_MINMAX_MIN_SHIFT) |
1254            (pack_float_16_6x(psiz) << R300_GA_POINT_MINMAX_MAX_SHIFT);
1255    }
1256
1257    /* Line control. */
1258    line_control = pack_float_16_6x(state->line_width) |
1259        R300_GA_LINE_CNTL_END_TYPE_COMP;
1260
1261    /* Enable polygon mode */
1262    polygon_mode = 0;
1263    if (state->fill_front != PIPE_POLYGON_MODE_FILL ||
1264        state->fill_back != PIPE_POLYGON_MODE_FILL) {
1265        polygon_mode = R300_GA_POLY_MODE_DUAL;
1266    }
1267
1268    /* Front face */
1269    if (state->front_ccw)
1270        cull_mode = R300_FRONT_FACE_CCW;
1271    else
1272        cull_mode = R300_FRONT_FACE_CW;
1273
1274    /* Polygon offset */
1275    polygon_offset_enable = 0;
1276    if (util_get_offset(state, state->fill_front)) {
1277       polygon_offset_enable |= R300_FRONT_ENABLE;
1278    }
1279    if (util_get_offset(state, state->fill_back)) {
1280       polygon_offset_enable |= R300_BACK_ENABLE;
1281    }
1282
1283    rs->polygon_offset_enable = polygon_offset_enable != 0;
1284
1285    /* Polygon mode */
1286    if (polygon_mode) {
1287       polygon_mode |=
1288          r300_translate_polygon_mode_front(state->fill_front);
1289       polygon_mode |=
1290          r300_translate_polygon_mode_back(state->fill_back);
1291    }
1292
1293    if (state->cull_face & PIPE_FACE_FRONT) {
1294        cull_mode |= R300_CULL_FRONT;
1295    }
1296    if (state->cull_face & PIPE_FACE_BACK) {
1297        cull_mode |= R300_CULL_BACK;
1298    }
1299
1300    if (state->line_stipple_enable) {
1301        line_stipple_config =
1302            R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_LINE |
1303            (fui((float)state->line_stipple_factor) &
1304                R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_MASK);
1305        /* XXX this might need to be scaled up */
1306        line_stipple_value = state->line_stipple_pattern;
1307    } else {
1308        line_stipple_config = 0;
1309        line_stipple_value = 0;
1310    }
1311
1312    if (state->flatshade) {
1313        rs->color_control = R300_SHADE_MODEL_FLAT;
1314    } else {
1315        rs->color_control = R300_SHADE_MODEL_SMOOTH;
1316    }
1317
1318    clip_rule = state->scissor ? 0xAAAA : 0xFFFF;
1319
1320    /* Point sprites coord mode */
1321    if (rs->rs.sprite_coord_enable) {
1322        switch (state->sprite_coord_mode) {
1323            case PIPE_SPRITE_COORD_UPPER_LEFT:
1324                point_texcoord_top = 0.0f;
1325                point_texcoord_bottom = 1.0f;
1326                break;
1327            case PIPE_SPRITE_COORD_LOWER_LEFT:
1328                point_texcoord_top = 1.0f;
1329                point_texcoord_bottom = 0.0f;
1330                break;
1331        }
1332    }
1333
1334    if (r300_screen(pipe->screen)->caps.has_tcl) {
1335       vap_clip_cntl = (state->clip_plane_enable & 63) |
1336                       R300_PS_UCP_MODE_CLIP_AS_TRIFAN |
1337                       (state->depth_clip ? 0 : R300_CLIP_DISABLE);
1338    } else {
1339       vap_clip_cntl = R300_CLIP_DISABLE;
1340    }
1341
1342    /* Vertex color clamping. FP20 means no clamping. */
1343    round_mode =
1344      R300_GA_ROUND_MODE_GEOMETRY_ROUND_NEAREST |
1345      (!vclamp ? (R300_GA_ROUND_MODE_RGB_CLAMP_FP20 |
1346                  R300_GA_ROUND_MODE_ALPHA_CLAMP_FP20) : 0);
1347
1348    /* Build the main command buffer. */
1349    BEGIN_CB(rs->cb_main, RS_STATE_MAIN_SIZE);
1350    OUT_CB_REG(R300_VAP_CNTL_STATUS, vap_control_status);
1351    OUT_CB_REG(R300_VAP_CLIP_CNTL, vap_clip_cntl);
1352    OUT_CB_REG(R300_GA_POINT_SIZE, point_size);
1353    OUT_CB_REG_SEQ(R300_GA_POINT_MINMAX, 2);
1354    OUT_CB(point_minmax);
1355    OUT_CB(line_control);
1356    OUT_CB_REG_SEQ(R300_SU_POLY_OFFSET_ENABLE, 2);
1357    OUT_CB(polygon_offset_enable);
1358    rs->cull_mode_index = 11;
1359    OUT_CB(cull_mode);
1360    OUT_CB_REG(R300_GA_LINE_STIPPLE_CONFIG, line_stipple_config);
1361    OUT_CB_REG(R300_GA_LINE_STIPPLE_VALUE, line_stipple_value);
1362    OUT_CB_REG(R300_GA_POLY_MODE, polygon_mode);
1363    OUT_CB_REG(R300_GA_ROUND_MODE, round_mode);
1364    OUT_CB_REG(R300_SC_CLIP_RULE, clip_rule);
1365    OUT_CB_REG_SEQ(R300_GA_POINT_S0, 4);
1366    OUT_CB_32F(point_texcoord_left);
1367    OUT_CB_32F(point_texcoord_bottom);
1368    OUT_CB_32F(point_texcoord_right);
1369    OUT_CB_32F(point_texcoord_top);
1370    END_CB;
1371
1372    /* Build the two command buffers for polygon offset setup. */
1373    if (polygon_offset_enable) {
1374        float scale = state->offset_scale * 12;
1375        float offset = state->offset_units * 4;
1376
1377        BEGIN_CB(rs->cb_poly_offset_zb16, 5);
1378        OUT_CB_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 4);
1379        OUT_CB_32F(scale);
1380        OUT_CB_32F(offset);
1381        OUT_CB_32F(scale);
1382        OUT_CB_32F(offset);
1383        END_CB;
1384
1385        offset = state->offset_units * 2;
1386
1387        BEGIN_CB(rs->cb_poly_offset_zb24, 5);
1388        OUT_CB_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 4);
1389        OUT_CB_32F(scale);
1390        OUT_CB_32F(offset);
1391        OUT_CB_32F(scale);
1392        OUT_CB_32F(offset);
1393        END_CB;
1394    }
1395
1396    return (void*)rs;
1397}
1398
1399/* Bind rasterizer state. */
1400static void r300_bind_rs_state(struct pipe_context* pipe, void* state)
1401{
1402    struct r300_context* r300 = r300_context(pipe);
1403    struct r300_rs_state* rs = (struct r300_rs_state*)state;
1404    int last_sprite_coord_enable = r300->sprite_coord_enable;
1405    boolean last_two_sided_color = r300->two_sided_color;
1406    boolean last_msaa_enable = r300->msaa_enable;
1407    boolean last_flatshade = r300->flatshade;
1408
1409    if (r300->draw && rs) {
1410        draw_set_rasterizer_state(r300->draw, &rs->rs_draw, state);
1411    }
1412
1413    if (rs) {
1414        r300->polygon_offset_enabled = rs->polygon_offset_enable;
1415        r300->sprite_coord_enable = rs->rs.sprite_coord_enable;
1416        r300->two_sided_color = rs->rs.light_twoside;
1417        r300->msaa_enable = rs->rs.multisample;
1418        r300->flatshade = rs->rs.flatshade;
1419    } else {
1420        r300->polygon_offset_enabled = FALSE;
1421        r300->sprite_coord_enable = 0;
1422        r300->two_sided_color = FALSE;
1423        r300->msaa_enable = FALSE;
1424        r300->flatshade = FALSE;
1425    }
1426
1427    UPDATE_STATE(state, r300->rs_state);
1428    r300->rs_state.size = RS_STATE_MAIN_SIZE + (r300->polygon_offset_enabled ? 5 : 0);
1429
1430    if (last_sprite_coord_enable != r300->sprite_coord_enable ||
1431        last_two_sided_color != r300->two_sided_color ||
1432        last_flatshade != r300->flatshade) {
1433        r300_mark_atom_dirty(r300, &r300->rs_block_state);
1434    }
1435
1436    if (last_msaa_enable != r300->msaa_enable) {
1437        if (r300->alpha_to_coverage) {
1438            r300_mark_atom_dirty(r300, &r300->dsa_state);
1439        }
1440
1441        if (r300->alpha_to_one &&
1442            r300->fs_status == FRAGMENT_SHADER_VALID) {
1443            r300->fs_status = FRAGMENT_SHADER_MAYBE_DIRTY;
1444        }
1445    }
1446}
1447
1448/* Free rasterizer state. */
1449static void r300_delete_rs_state(struct pipe_context* pipe, void* state)
1450{
1451    FREE(state);
1452}
1453
1454static void*
1455        r300_create_sampler_state(struct pipe_context* pipe,
1456                                  const struct pipe_sampler_state* state)
1457{
1458    struct r300_context* r300 = r300_context(pipe);
1459    struct r300_sampler_state* sampler = CALLOC_STRUCT(r300_sampler_state);
1460    boolean is_r500 = r300->screen->caps.is_r500;
1461    int lod_bias;
1462
1463    sampler->state = *state;
1464
1465    /* r300 doesn't handle CLAMP and MIRROR_CLAMP correctly when either MAG
1466     * or MIN filter is NEAREST. Since texwrap produces same results
1467     * for CLAMP and CLAMP_TO_EDGE, we use them instead. */
1468    if (sampler->state.min_img_filter == PIPE_TEX_FILTER_NEAREST ||
1469        sampler->state.mag_img_filter == PIPE_TEX_FILTER_NEAREST) {
1470        /* Wrap S. */
1471        if (sampler->state.wrap_s == PIPE_TEX_WRAP_CLAMP)
1472            sampler->state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
1473        else if (sampler->state.wrap_s == PIPE_TEX_WRAP_MIRROR_CLAMP)
1474            sampler->state.wrap_s = PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE;
1475
1476        /* Wrap T. */
1477        if (sampler->state.wrap_t == PIPE_TEX_WRAP_CLAMP)
1478            sampler->state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
1479        else if (sampler->state.wrap_t == PIPE_TEX_WRAP_MIRROR_CLAMP)
1480            sampler->state.wrap_t = PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE;
1481
1482        /* Wrap R. */
1483        if (sampler->state.wrap_r == PIPE_TEX_WRAP_CLAMP)
1484            sampler->state.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
1485        else if (sampler->state.wrap_r == PIPE_TEX_WRAP_MIRROR_CLAMP)
1486            sampler->state.wrap_r = PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE;
1487    }
1488
1489    sampler->filter0 |=
1490        (r300_translate_wrap(sampler->state.wrap_s) << R300_TX_WRAP_S_SHIFT) |
1491        (r300_translate_wrap(sampler->state.wrap_t) << R300_TX_WRAP_T_SHIFT) |
1492        (r300_translate_wrap(sampler->state.wrap_r) << R300_TX_WRAP_R_SHIFT);
1493
1494    sampler->filter0 |= r300_translate_tex_filters(state->min_img_filter,
1495                                                   state->mag_img_filter,
1496                                                   state->min_mip_filter,
1497                                                   state->max_anisotropy > 1);
1498
1499    sampler->filter0 |= r300_anisotropy(state->max_anisotropy);
1500
1501    /* Unfortunately, r300-r500 don't support floating-point mipmap lods. */
1502    /* We must pass these to the merge function to clamp them properly. */
1503    sampler->min_lod = (unsigned)MAX2(state->min_lod, 0);
1504    sampler->max_lod = (unsigned)MAX2(ceilf(state->max_lod), 0);
1505
1506    lod_bias = CLAMP((int)(state->lod_bias * 32 + 1), -(1 << 9), (1 << 9) - 1);
1507
1508    sampler->filter1 |= (lod_bias << R300_LOD_BIAS_SHIFT) & R300_LOD_BIAS_MASK;
1509
1510    /* This is very high quality anisotropic filtering for R5xx.
1511     * It's good for benchmarking the performance of texturing but
1512     * in practice we don't want to slow down the driver because it's
1513     * a pretty good performance killer. Feel free to play with it. */
1514    if (DBG_ON(r300, DBG_ANISOHQ) && is_r500) {
1515        sampler->filter1 |= r500_anisotropy(state->max_anisotropy);
1516    }
1517
1518    /* R500-specific fixups and optimizations */
1519    if (r300->screen->caps.is_r500) {
1520        sampler->filter1 |= R500_BORDER_FIX;
1521    }
1522
1523    return (void*)sampler;
1524}
1525
1526static void r300_bind_sampler_states(struct pipe_context* pipe,
1527                                     unsigned count,
1528                                     void** states)
1529{
1530    struct r300_context* r300 = r300_context(pipe);
1531    struct r300_textures_state* state =
1532        (struct r300_textures_state*)r300->textures_state.state;
1533    unsigned tex_units = r300->screen->caps.num_tex_units;
1534
1535    if (count > tex_units) {
1536        return;
1537    }
1538
1539    memcpy(state->sampler_states, states, sizeof(void*) * count);
1540    state->sampler_state_count = count;
1541
1542    r300_mark_atom_dirty(r300, &r300->textures_state);
1543}
1544
1545static void r300_lacks_vertex_textures(struct pipe_context* pipe,
1546                                       unsigned count,
1547                                       void** states)
1548{
1549}
1550
1551static void r300_delete_sampler_state(struct pipe_context* pipe, void* state)
1552{
1553    FREE(state);
1554}
1555
1556static uint32_t r300_assign_texture_cache_region(unsigned index, unsigned num)
1557{
1558    /* This looks like a hack, but I believe it's suppose to work like
1559     * that. To illustrate how this works, let's assume you have 5 textures.
1560     * From docs, 5 and the successive numbers are:
1561     *
1562     * FOURTH_1     = 5
1563     * FOURTH_2     = 6
1564     * FOURTH_3     = 7
1565     * EIGHTH_0     = 8
1566     * EIGHTH_1     = 9
1567     *
1568     * First 3 textures will get 3/4 of size of the cache, divived evenly
1569     * between them. The last 1/4 of the cache must be divided between
1570     * the last 2 textures, each will therefore get 1/8 of the cache.
1571     * Why not just to use "5 + texture_index" ?
1572     *
1573     * This simple trick works for all "num" <= 16.
1574     */
1575    if (num <= 1)
1576        return R300_TX_CACHE(R300_TX_CACHE_WHOLE);
1577    else
1578        return R300_TX_CACHE(num + index);
1579}
1580
1581static void r300_set_fragment_sampler_views(struct pipe_context* pipe,
1582                                            unsigned count,
1583                                            struct pipe_sampler_view** views)
1584{
1585    struct r300_context* r300 = r300_context(pipe);
1586    struct r300_textures_state* state =
1587        (struct r300_textures_state*)r300->textures_state.state;
1588    struct r300_resource *texture;
1589    unsigned i, real_num_views = 0, view_index = 0;
1590    unsigned tex_units = r300->screen->caps.num_tex_units;
1591    boolean dirty_tex = FALSE;
1592
1593    if (count > tex_units) {
1594        return;
1595    }
1596
1597    /* Calculate the real number of views. */
1598    for (i = 0; i < count; i++) {
1599        if (views[i])
1600            real_num_views++;
1601    }
1602
1603    for (i = 0; i < count; i++) {
1604        pipe_sampler_view_reference(
1605                (struct pipe_sampler_view**)&state->sampler_views[i],
1606                views[i]);
1607
1608        if (!views[i]) {
1609            continue;
1610        }
1611
1612        /* A new sampler view (= texture)... */
1613        dirty_tex = TRUE;
1614
1615        /* Set the texrect factor in the fragment shader.
1616             * Needed for RECT and NPOT fallback. */
1617        texture = r300_resource(views[i]->texture);
1618        if (texture->tex.is_npot) {
1619            r300_mark_atom_dirty(r300, &r300->fs_rc_constant_state);
1620        }
1621
1622        state->sampler_views[i]->texcache_region =
1623                r300_assign_texture_cache_region(view_index, real_num_views);
1624        view_index++;
1625    }
1626
1627    for (i = count; i < tex_units; i++) {
1628        if (state->sampler_views[i]) {
1629            pipe_sampler_view_reference(
1630                    (struct pipe_sampler_view**)&state->sampler_views[i],
1631                    NULL);
1632        }
1633    }
1634
1635    state->sampler_view_count = count;
1636
1637    r300_mark_atom_dirty(r300, &r300->textures_state);
1638
1639    if (dirty_tex) {
1640        r300_mark_atom_dirty(r300, &r300->texture_cache_inval);
1641    }
1642}
1643
1644struct pipe_sampler_view *
1645r300_create_sampler_view_custom(struct pipe_context *pipe,
1646                         struct pipe_resource *texture,
1647                         const struct pipe_sampler_view *templ,
1648                         unsigned width0_override,
1649                         unsigned height0_override)
1650{
1651    struct r300_sampler_view *view = CALLOC_STRUCT(r300_sampler_view);
1652    struct r300_resource *tex = r300_resource(texture);
1653    boolean is_r500 = r300_screen(pipe->screen)->caps.is_r500;
1654    boolean dxtc_swizzle = r300_screen(pipe->screen)->caps.dxtc_swizzle;
1655
1656    if (view) {
1657        unsigned hwformat;
1658
1659        view->base = *templ;
1660        view->base.reference.count = 1;
1661        view->base.context = pipe;
1662        view->base.texture = NULL;
1663        pipe_resource_reference(&view->base.texture, texture);
1664
1665	view->width0_override = width0_override;
1666	view->height0_override = height0_override;
1667        view->swizzle[0] = templ->swizzle_r;
1668        view->swizzle[1] = templ->swizzle_g;
1669        view->swizzle[2] = templ->swizzle_b;
1670        view->swizzle[3] = templ->swizzle_a;
1671
1672        hwformat = r300_translate_texformat(templ->format,
1673                                            view->swizzle,
1674                                            is_r500,
1675                                            dxtc_swizzle);
1676
1677        if (hwformat == ~0) {
1678            fprintf(stderr, "r300: Ooops. Got unsupported format %s in %s.\n",
1679                    util_format_short_name(templ->format), __func__);
1680        }
1681        assert(hwformat != ~0);
1682
1683	r300_texture_setup_format_state(r300_screen(pipe->screen), tex,
1684					templ->format, 0,
1685	                                width0_override, height0_override,
1686					&view->format);
1687        view->format.format1 |= hwformat;
1688        if (is_r500) {
1689            view->format.format2 |= r500_tx_format_msb_bit(templ->format);
1690        }
1691    }
1692
1693    return (struct pipe_sampler_view*)view;
1694}
1695
1696static struct pipe_sampler_view *
1697r300_create_sampler_view(struct pipe_context *pipe,
1698                         struct pipe_resource *texture,
1699                         const struct pipe_sampler_view *templ)
1700{
1701    return r300_create_sampler_view_custom(pipe, texture, templ,
1702                                           r300_resource(texture)->tex.width0,
1703                                           r300_resource(texture)->tex.height0);
1704}
1705
1706
1707static void
1708r300_sampler_view_destroy(struct pipe_context *pipe,
1709                          struct pipe_sampler_view *view)
1710{
1711   pipe_resource_reference(&view->texture, NULL);
1712   FREE(view);
1713}
1714
1715static void r300_set_sample_mask(struct pipe_context *pipe,
1716                                 unsigned mask)
1717{
1718    struct r300_context* r300 = r300_context(pipe);
1719
1720    *((unsigned*)r300->sample_mask.state) = mask;
1721
1722    r300_mark_atom_dirty(r300, &r300->sample_mask);
1723}
1724
1725static void r300_set_scissor_state(struct pipe_context* pipe,
1726                                   const struct pipe_scissor_state* state)
1727{
1728    struct r300_context* r300 = r300_context(pipe);
1729
1730    memcpy(r300->scissor_state.state, state,
1731        sizeof(struct pipe_scissor_state));
1732
1733    r300_mark_atom_dirty(r300, &r300->scissor_state);
1734}
1735
1736static void r300_set_viewport_state(struct pipe_context* pipe,
1737                                    const struct pipe_viewport_state* state)
1738{
1739    struct r300_context* r300 = r300_context(pipe);
1740    struct r300_viewport_state* viewport =
1741        (struct r300_viewport_state*)r300->viewport_state.state;
1742
1743    r300->viewport = *state;
1744
1745    if (r300->draw) {
1746        draw_set_viewport_state(r300->draw, state);
1747        viewport->vte_control = R300_VTX_XY_FMT | R300_VTX_Z_FMT;
1748        return;
1749    }
1750
1751    /* Do the transform in HW. */
1752    viewport->vte_control = R300_VTX_W0_FMT;
1753
1754    if (state->scale[0] != 1.0f) {
1755        viewport->xscale = state->scale[0];
1756        viewport->vte_control |= R300_VPORT_X_SCALE_ENA;
1757    }
1758    if (state->scale[1] != 1.0f) {
1759        viewport->yscale = state->scale[1];
1760        viewport->vte_control |= R300_VPORT_Y_SCALE_ENA;
1761    }
1762    if (state->scale[2] != 1.0f) {
1763        viewport->zscale = state->scale[2];
1764        viewport->vte_control |= R300_VPORT_Z_SCALE_ENA;
1765    }
1766    if (state->translate[0] != 0.0f) {
1767        viewport->xoffset = state->translate[0];
1768        viewport->vte_control |= R300_VPORT_X_OFFSET_ENA;
1769    }
1770    if (state->translate[1] != 0.0f) {
1771        viewport->yoffset = state->translate[1];
1772        viewport->vte_control |= R300_VPORT_Y_OFFSET_ENA;
1773    }
1774    if (state->translate[2] != 0.0f) {
1775        viewport->zoffset = state->translate[2];
1776        viewport->vte_control |= R300_VPORT_Z_OFFSET_ENA;
1777    }
1778
1779    r300_mark_atom_dirty(r300, &r300->viewport_state);
1780    if (r300->fs.state && r300_fs(r300)->shader &&
1781        r300_fs(r300)->shader->inputs.wpos != ATTR_UNUSED) {
1782        r300_mark_atom_dirty(r300, &r300->fs_rc_constant_state);
1783    }
1784}
1785
1786static void r300_set_vertex_buffers_hwtcl(struct pipe_context* pipe,
1787                                    unsigned start_slot, unsigned count,
1788                                    const struct pipe_vertex_buffer* buffers)
1789{
1790    struct r300_context* r300 = r300_context(pipe);
1791
1792    util_set_vertex_buffers_count(r300->vertex_buffer,
1793                                  &r300->nr_vertex_buffers,
1794                                  buffers, start_slot, count);
1795
1796    /* There must be at least one vertex buffer set, otherwise it locks up. */
1797    if (!r300->nr_vertex_buffers) {
1798        util_set_vertex_buffers_count(r300->vertex_buffer,
1799                                      &r300->nr_vertex_buffers,
1800                                      &r300->dummy_vb, 0, 1);
1801    }
1802
1803    r300->vertex_arrays_dirty = TRUE;
1804}
1805
1806static void r300_set_vertex_buffers_swtcl(struct pipe_context* pipe,
1807                                    unsigned start_slot, unsigned count,
1808                                    const struct pipe_vertex_buffer* buffers)
1809{
1810    struct r300_context* r300 = r300_context(pipe);
1811    unsigned i;
1812
1813    util_set_vertex_buffers_count(r300->vertex_buffer,
1814                                  &r300->nr_vertex_buffers,
1815                                  buffers, start_slot, count);
1816    draw_set_vertex_buffers(r300->draw, start_slot, count, buffers);
1817
1818    if (!buffers)
1819        return;
1820
1821    for (i = 0; i < count; i++) {
1822        if (buffers[i].user_buffer) {
1823            draw_set_mapped_vertex_buffer(r300->draw, start_slot + i,
1824                                          buffers[i].user_buffer);
1825        } else if (buffers[i].buffer) {
1826            draw_set_mapped_vertex_buffer(r300->draw, start_slot + i,
1827                r300_resource(buffers[i].buffer)->malloced_buffer);
1828        }
1829    }
1830}
1831
1832static void r300_set_index_buffer_hwtcl(struct pipe_context* pipe,
1833                                        const struct pipe_index_buffer *ib)
1834{
1835    struct r300_context* r300 = r300_context(pipe);
1836
1837    if (ib) {
1838        pipe_resource_reference(&r300->index_buffer.buffer, ib->buffer);
1839        memcpy(&r300->index_buffer, ib, sizeof(*ib));
1840    } else {
1841        pipe_resource_reference(&r300->index_buffer.buffer, NULL);
1842    }
1843}
1844
1845static void r300_set_index_buffer_swtcl(struct pipe_context* pipe,
1846                                        const struct pipe_index_buffer *ib)
1847{
1848    struct r300_context* r300 = r300_context(pipe);
1849
1850    if (ib) {
1851        const void *buf = NULL;
1852        if (ib->user_buffer) {
1853            buf = ib->user_buffer;
1854        } else if (ib->buffer) {
1855            buf = r300_resource(ib->buffer)->malloced_buffer;
1856        }
1857        draw_set_indexes(r300->draw,
1858                         (const ubyte *) buf + ib->offset,
1859                         ib->index_size);
1860    }
1861}
1862
1863/* Initialize the PSC tables. */
1864static void r300_vertex_psc(struct r300_vertex_element_state *velems)
1865{
1866    struct r300_vertex_stream_state *vstream = &velems->vertex_stream;
1867    uint16_t type, swizzle;
1868    enum pipe_format format;
1869    unsigned i;
1870
1871    /* Vertex shaders have no semantics on their inputs,
1872     * so PSC should just route stuff based on the vertex elements,
1873     * and not on attrib information. */
1874    for (i = 0; i < velems->count; i++) {
1875        format = velems->velem[i].src_format;
1876
1877        type = r300_translate_vertex_data_type(format);
1878        if (type == R300_INVALID_FORMAT) {
1879            fprintf(stderr, "r300: Bad vertex format %s.\n",
1880                    util_format_short_name(format));
1881            assert(0);
1882            abort();
1883        }
1884
1885        type |= i << R300_DST_VEC_LOC_SHIFT;
1886        swizzle = r300_translate_vertex_data_swizzle(format);
1887
1888        if (i & 1) {
1889            vstream->vap_prog_stream_cntl[i >> 1] |= type << 16;
1890            vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
1891        } else {
1892            vstream->vap_prog_stream_cntl[i >> 1] |= type;
1893            vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle;
1894        }
1895    }
1896
1897    /* Set the last vector in the PSC. */
1898    if (i) {
1899        i -= 1;
1900    }
1901    vstream->vap_prog_stream_cntl[i >> 1] |=
1902        (R300_LAST_VEC << (i & 1 ? 16 : 0));
1903
1904    vstream->count = (i >> 1) + 1;
1905}
1906
1907static void* r300_create_vertex_elements_state(struct pipe_context* pipe,
1908                                               unsigned count,
1909                                               const struct pipe_vertex_element* attribs)
1910{
1911    struct r300_vertex_element_state *velems;
1912    unsigned i;
1913    struct pipe_vertex_element dummy_attrib = {0};
1914
1915    /* R300 Programmable Stream Control (PSC) doesn't support 0 vertex elements. */
1916    if (!count) {
1917        dummy_attrib.src_format = PIPE_FORMAT_R8G8B8A8_UNORM;
1918        attribs = &dummy_attrib;
1919        count = 1;
1920    } else if (count > 16) {
1921        fprintf(stderr, "r300: More than 16 vertex elements are not supported,"
1922                " requested %i, using 16.\n", count);
1923        count = 16;
1924    }
1925
1926    velems = CALLOC_STRUCT(r300_vertex_element_state);
1927    if (!velems)
1928        return NULL;
1929
1930    velems->count = count;
1931    memcpy(velems->velem, attribs, sizeof(struct pipe_vertex_element) * count);
1932
1933    if (r300_screen(pipe->screen)->caps.has_tcl) {
1934        /* Setup PSC.
1935         * The unused components will be replaced by (..., 0, 1). */
1936        r300_vertex_psc(velems);
1937
1938        for (i = 0; i < count; i++) {
1939            velems->format_size[i] =
1940                align(util_format_get_blocksize(velems->velem[i].src_format), 4);
1941            velems->vertex_size_dwords += velems->format_size[i] / 4;
1942        }
1943    }
1944
1945    return velems;
1946}
1947
1948static void r300_bind_vertex_elements_state(struct pipe_context *pipe,
1949                                            void *state)
1950{
1951    struct r300_context *r300 = r300_context(pipe);
1952    struct r300_vertex_element_state *velems = state;
1953
1954    if (velems == NULL) {
1955        return;
1956    }
1957
1958    r300->velems = velems;
1959
1960    if (r300->draw) {
1961        draw_set_vertex_elements(r300->draw, velems->count, velems->velem);
1962        return;
1963    }
1964
1965    UPDATE_STATE(&velems->vertex_stream, r300->vertex_stream_state);
1966    r300->vertex_stream_state.size = (1 + velems->vertex_stream.count) * 2;
1967    r300->vertex_arrays_dirty = TRUE;
1968}
1969
1970static void r300_delete_vertex_elements_state(struct pipe_context *pipe, void *state)
1971{
1972    FREE(state);
1973}
1974
1975static void* r300_create_vs_state(struct pipe_context* pipe,
1976                                  const struct pipe_shader_state* shader)
1977{
1978    struct r300_context* r300 = r300_context(pipe);
1979    struct r300_vertex_shader* vs = CALLOC_STRUCT(r300_vertex_shader);
1980
1981    /* Copy state directly into shader. */
1982    vs->state = *shader;
1983    vs->state.tokens = tgsi_dup_tokens(shader->tokens);
1984
1985    if (r300->screen->caps.has_tcl) {
1986        r300_init_vs_outputs(r300, vs);
1987        r300_translate_vertex_shader(r300, vs);
1988    } else {
1989        r300_draw_init_vertex_shader(r300, vs);
1990    }
1991
1992    return vs;
1993}
1994
1995static void r300_bind_vs_state(struct pipe_context* pipe, void* shader)
1996{
1997    struct r300_context* r300 = r300_context(pipe);
1998    struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader;
1999
2000    if (vs == NULL) {
2001        r300->vs_state.state = NULL;
2002        return;
2003    }
2004    if (vs == r300->vs_state.state) {
2005        return;
2006    }
2007    r300->vs_state.state = vs;
2008
2009    /* The majority of the RS block bits is dependent on the vertex shader. */
2010    r300_mark_atom_dirty(r300, &r300->rs_block_state); /* Will be updated before the emission. */
2011
2012    if (r300->screen->caps.has_tcl) {
2013        unsigned fc_op_dwords = r300->screen->caps.is_r500 ? 3 : 2;
2014        r300_mark_atom_dirty(r300, &r300->vs_state);
2015        r300->vs_state.size = vs->code.length + 9 +
2016			(R300_VS_MAX_FC_OPS * fc_op_dwords + 4);
2017
2018        r300_mark_atom_dirty(r300, &r300->vs_constants);
2019        r300->vs_constants.size =
2020                2 +
2021                (vs->externals_count ? vs->externals_count * 4 + 3 : 0) +
2022                (vs->immediates_count ? vs->immediates_count * 4 + 3 : 0);
2023
2024        ((struct r300_constant_buffer*)r300->vs_constants.state)->remap_table =
2025                vs->code.constants_remap_table;
2026
2027        r300_mark_atom_dirty(r300, &r300->pvs_flush);
2028    } else {
2029        draw_bind_vertex_shader(r300->draw,
2030                (struct draw_vertex_shader*)vs->draw_vs);
2031    }
2032}
2033
2034static void r300_delete_vs_state(struct pipe_context* pipe, void* shader)
2035{
2036    struct r300_context* r300 = r300_context(pipe);
2037    struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader;
2038
2039    if (r300->screen->caps.has_tcl) {
2040        rc_constants_destroy(&vs->code.constants);
2041        FREE(vs->code.constants_remap_table);
2042    } else {
2043        draw_delete_vertex_shader(r300->draw,
2044                (struct draw_vertex_shader*)vs->draw_vs);
2045    }
2046
2047    FREE((void*)vs->state.tokens);
2048    FREE(shader);
2049}
2050
2051static void r300_set_constant_buffer(struct pipe_context *pipe,
2052                                     uint shader, uint index,
2053                                     struct pipe_constant_buffer *cb)
2054{
2055    struct r300_context* r300 = r300_context(pipe);
2056    struct r300_constant_buffer *cbuf;
2057    uint32_t *mapped;
2058
2059    if (!cb || (!cb->buffer && !cb->user_buffer))
2060        return;
2061
2062    switch (shader) {
2063        case PIPE_SHADER_VERTEX:
2064            cbuf = (struct r300_constant_buffer*)r300->vs_constants.state;
2065            break;
2066        case PIPE_SHADER_FRAGMENT:
2067            cbuf = (struct r300_constant_buffer*)r300->fs_constants.state;
2068            break;
2069        default:
2070            return;
2071    }
2072
2073
2074    if (cb->user_buffer)
2075        mapped = (uint32_t*)cb->user_buffer;
2076    else {
2077        struct r300_resource *rbuf = r300_resource(cb->buffer);
2078
2079        if (rbuf && rbuf->malloced_buffer)
2080            mapped = (uint32_t*)rbuf->malloced_buffer;
2081        else
2082            return;
2083    }
2084
2085    if (shader == PIPE_SHADER_FRAGMENT ||
2086        (shader == PIPE_SHADER_VERTEX && r300->screen->caps.has_tcl)) {
2087        cbuf->ptr = mapped;
2088    }
2089
2090    if (shader == PIPE_SHADER_VERTEX) {
2091        if (r300->screen->caps.has_tcl) {
2092            struct r300_vertex_shader *vs =
2093                    (struct r300_vertex_shader*)r300->vs_state.state;
2094
2095            if (!vs) {
2096                cbuf->buffer_base = 0;
2097                return;
2098            }
2099
2100            cbuf->buffer_base = r300->vs_const_base;
2101            r300->vs_const_base += vs->code.constants.Count;
2102            if (r300->vs_const_base > R500_MAX_PVS_CONST_VECS) {
2103                r300->vs_const_base = vs->code.constants.Count;
2104                cbuf->buffer_base = 0;
2105                r300_mark_atom_dirty(r300, &r300->pvs_flush);
2106            }
2107            r300_mark_atom_dirty(r300, &r300->vs_constants);
2108        } else if (r300->draw) {
2109            draw_set_mapped_constant_buffer(r300->draw, PIPE_SHADER_VERTEX,
2110                0, mapped, cb->buffer_size);
2111        }
2112    } else if (shader == PIPE_SHADER_FRAGMENT) {
2113        r300_mark_atom_dirty(r300, &r300->fs_constants);
2114    }
2115}
2116
2117static void r300_texture_barrier(struct pipe_context *pipe)
2118{
2119    struct r300_context *r300 = r300_context(pipe);
2120
2121    r300_mark_atom_dirty(r300, &r300->gpu_flush);
2122    r300_mark_atom_dirty(r300, &r300->texture_cache_inval);
2123}
2124
2125void r300_init_state_functions(struct r300_context* r300)
2126{
2127    r300->context.create_blend_state = r300_create_blend_state;
2128    r300->context.bind_blend_state = r300_bind_blend_state;
2129    r300->context.delete_blend_state = r300_delete_blend_state;
2130
2131    r300->context.set_blend_color = r300_set_blend_color;
2132
2133    r300->context.set_clip_state = r300_set_clip_state;
2134    r300->context.set_sample_mask = r300_set_sample_mask;
2135
2136    r300->context.set_constant_buffer = r300_set_constant_buffer;
2137
2138    r300->context.create_depth_stencil_alpha_state = r300_create_dsa_state;
2139    r300->context.bind_depth_stencil_alpha_state = r300_bind_dsa_state;
2140    r300->context.delete_depth_stencil_alpha_state = r300_delete_dsa_state;
2141
2142    r300->context.set_stencil_ref = r300_set_stencil_ref;
2143
2144    r300->context.set_framebuffer_state = r300_set_framebuffer_state;
2145
2146    r300->context.create_fs_state = r300_create_fs_state;
2147    r300->context.bind_fs_state = r300_bind_fs_state;
2148    r300->context.delete_fs_state = r300_delete_fs_state;
2149
2150    r300->context.set_polygon_stipple = r300_set_polygon_stipple;
2151
2152    r300->context.create_rasterizer_state = r300_create_rs_state;
2153    r300->context.bind_rasterizer_state = r300_bind_rs_state;
2154    r300->context.delete_rasterizer_state = r300_delete_rs_state;
2155
2156    r300->context.create_sampler_state = r300_create_sampler_state;
2157    r300->context.bind_fragment_sampler_states = r300_bind_sampler_states;
2158    r300->context.bind_vertex_sampler_states = r300_lacks_vertex_textures;
2159    r300->context.delete_sampler_state = r300_delete_sampler_state;
2160
2161    r300->context.set_fragment_sampler_views = r300_set_fragment_sampler_views;
2162    r300->context.create_sampler_view = r300_create_sampler_view;
2163    r300->context.sampler_view_destroy = r300_sampler_view_destroy;
2164
2165    r300->context.set_scissor_state = r300_set_scissor_state;
2166
2167    r300->context.set_viewport_state = r300_set_viewport_state;
2168
2169    if (r300->screen->caps.has_tcl) {
2170        r300->context.set_vertex_buffers = r300_set_vertex_buffers_hwtcl;
2171        r300->context.set_index_buffer = r300_set_index_buffer_hwtcl;
2172    } else {
2173        r300->context.set_vertex_buffers = r300_set_vertex_buffers_swtcl;
2174        r300->context.set_index_buffer = r300_set_index_buffer_swtcl;
2175    }
2176
2177    r300->context.create_vertex_elements_state = r300_create_vertex_elements_state;
2178    r300->context.bind_vertex_elements_state = r300_bind_vertex_elements_state;
2179    r300->context.delete_vertex_elements_state = r300_delete_vertex_elements_state;
2180
2181    r300->context.create_vs_state = r300_create_vs_state;
2182    r300->context.bind_vs_state = r300_bind_vs_state;
2183    r300->context.delete_vs_state = r300_delete_vs_state;
2184
2185    r300->context.texture_barrier = r300_texture_barrier;
2186}
2187