nv50_push.c revision 3c9df0bda67cdcbc340a4f20997f7a3345cbe9cb
1#include "pipe/p_context.h" 2#include "pipe/p_state.h" 3#include "util/u_inlines.h" 4#include "util/u_format.h" 5 6#include "nouveau/nouveau_util.h" 7#include "nv50_context.h" 8#include "nv50_resource.h" 9 10struct push_context { 11 struct nv50_context *nv50; 12 13 unsigned vtx_size; 14 15 void *idxbuf; 16 int32_t idxbias; 17 unsigned idxsize; 18 19 float edgeflag; 20 int edgeflag_attr; 21 22 struct { 23 void *map; 24 unsigned stride; 25 unsigned divisor; 26 unsigned step; 27 void (*push)(struct nouveau_channel *, void *); 28 } attr[16]; 29 unsigned attr_nr; 30}; 31 32static void 33emit_b32_1(struct nouveau_channel *chan, void *data) 34{ 35 uint32_t *v = data; 36 37 OUT_RING(chan, v[0]); 38} 39 40static void 41emit_b32_2(struct nouveau_channel *chan, void *data) 42{ 43 uint32_t *v = data; 44 45 OUT_RING(chan, v[0]); 46 OUT_RING(chan, v[1]); 47} 48 49static void 50emit_b32_3(struct nouveau_channel *chan, void *data) 51{ 52 uint32_t *v = data; 53 54 OUT_RING(chan, v[0]); 55 OUT_RING(chan, v[1]); 56 OUT_RING(chan, v[2]); 57} 58 59static void 60emit_b32_4(struct nouveau_channel *chan, void *data) 61{ 62 uint32_t *v = data; 63 64 OUT_RING(chan, v[0]); 65 OUT_RING(chan, v[1]); 66 OUT_RING(chan, v[2]); 67 OUT_RING(chan, v[3]); 68} 69 70static void 71emit_b16_1(struct nouveau_channel *chan, void *data) 72{ 73 uint16_t *v = data; 74 75 OUT_RING(chan, v[0]); 76} 77 78static void 79emit_b16_3(struct nouveau_channel *chan, void *data) 80{ 81 uint16_t *v = data; 82 83 OUT_RING(chan, (v[1] << 16) | v[0]); 84 OUT_RING(chan, v[2]); 85} 86 87static void 88emit_b08_1(struct nouveau_channel *chan, void *data) 89{ 90 uint8_t *v = data; 91 92 OUT_RING(chan, v[0]); 93} 94 95static void 96emit_b08_3(struct nouveau_channel *chan, void *data) 97{ 98 uint8_t *v = data; 99 100 OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]); 101} 102 103static INLINE void 104emit_vertex(struct push_context *ctx, unsigned n) 105{ 106 struct nouveau_grobj *tesla = ctx->nv50->screen->tesla; 107 struct nouveau_channel *chan = tesla->channel; 108 int i; 109 110 if (ctx->edgeflag_attr < 16) { 111 float *edgeflag = ctx->attr[ctx->edgeflag_attr].map + 112 ctx->attr[ctx->edgeflag_attr].stride * n; 113 114 if (*edgeflag != ctx->edgeflag) { 115 BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1); 116 OUT_RING (chan, *edgeflag ? 1 : 0); 117 ctx->edgeflag = *edgeflag; 118 } 119 } 120 121 BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, ctx->vtx_size); 122 for (i = 0; i < ctx->attr_nr; i++) 123 ctx->attr[i].push(chan, ctx->attr[i].map + ctx->attr[i].stride * n); 124} 125 126static void 127emit_edgeflag(void *priv, boolean enabled) 128{ 129 struct push_context *ctx = priv; 130 struct nouveau_grobj *tesla = ctx->nv50->screen->tesla; 131 struct nouveau_channel *chan = tesla->channel; 132 133 BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1); 134 OUT_RING (chan, enabled ? 1 : 0); 135} 136 137static void 138emit_elt08(void *priv, unsigned start, unsigned count) 139{ 140 struct push_context *ctx = priv; 141 uint8_t *idxbuf = ctx->idxbuf; 142 143 while (count--) 144 emit_vertex(ctx, idxbuf[start++]); 145} 146 147static void 148emit_elt08_biased(void *priv, unsigned start, unsigned count) 149{ 150 struct push_context *ctx = priv; 151 uint8_t *idxbuf = ctx->idxbuf; 152 153 while (count--) 154 emit_vertex(ctx, idxbuf[start++] + ctx->idxbias); 155} 156 157static void 158emit_elt16(void *priv, unsigned start, unsigned count) 159{ 160 struct push_context *ctx = priv; 161 uint16_t *idxbuf = ctx->idxbuf; 162 163 while (count--) 164 emit_vertex(ctx, idxbuf[start++]); 165} 166 167static void 168emit_elt16_biased(void *priv, unsigned start, unsigned count) 169{ 170 struct push_context *ctx = priv; 171 uint16_t *idxbuf = ctx->idxbuf; 172 173 while (count--) 174 emit_vertex(ctx, idxbuf[start++] + ctx->idxbias); 175} 176 177static void 178emit_elt32(void *priv, unsigned start, unsigned count) 179{ 180 struct push_context *ctx = priv; 181 uint32_t *idxbuf = ctx->idxbuf; 182 183 while (count--) 184 emit_vertex(ctx, idxbuf[start++]); 185} 186 187static void 188emit_elt32_biased(void *priv, unsigned start, unsigned count) 189{ 190 struct push_context *ctx = priv; 191 uint32_t *idxbuf = ctx->idxbuf; 192 193 while (count--) 194 emit_vertex(ctx, idxbuf[start++] + ctx->idxbias); 195} 196 197static void 198emit_verts(void *priv, unsigned start, unsigned count) 199{ 200 while (count--) 201 emit_vertex(priv, start++); 202} 203 204void 205nv50_push_elements_instanced(struct pipe_context *pipe, 206 struct pipe_resource *idxbuf, 207 unsigned idxsize, int idxbias, 208 unsigned mode, unsigned start, unsigned count, 209 unsigned i_start, unsigned i_count) 210{ 211 struct nv50_context *nv50 = nv50_context(pipe); 212 struct nouveau_grobj *tesla = nv50->screen->tesla; 213 struct nouveau_channel *chan = tesla->channel; 214 struct push_context ctx; 215 const unsigned p_overhead = 4 + /* begin/end */ 216 4; /* potential edgeflag enable/disable */ 217 const unsigned v_overhead = 1 + /* VERTEX_DATA packet header */ 218 2; /* potential edgeflag modification */ 219 struct u_split_prim s; 220 unsigned vtx_size; 221 boolean nzi = FALSE; 222 int i; 223 224 ctx.nv50 = nv50; 225 ctx.attr_nr = 0; 226 ctx.idxbuf = NULL; 227 ctx.vtx_size = 0; 228 ctx.edgeflag = 0.5f; 229 ctx.edgeflag_attr = nv50->vertprog->cfg.edgeflag_in; 230 231 /* map vertex buffers, determine vertex size */ 232 for (i = 0; i < nv50->vtxelt->num_elements; i++) { 233 struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i]; 234 struct pipe_vertex_buffer *vb = &nv50->vtxbuf[ve->vertex_buffer_index]; 235 struct nouveau_bo *bo = nv50_resource(vb->buffer)->bo; 236 unsigned size, nr_components, n; 237 238 if (!(nv50->vbo_fifo & (1 << i))) 239 continue; 240 n = ctx.attr_nr++; 241 242 if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) { 243 assert(bo->map); 244 return; 245 } 246 ctx.attr[n].map = bo->map + vb->buffer_offset + ve->src_offset; 247 nouveau_bo_unmap(bo); 248 249 ctx.attr[n].stride = vb->stride; 250 ctx.attr[n].divisor = ve->instance_divisor; 251 if (ctx.attr[n].divisor) { 252 ctx.attr[n].step = i_start % ve->instance_divisor; 253 ctx.attr[n].map += i_start * vb->stride; 254 } 255 256 size = util_format_get_component_bits(ve->src_format, 257 UTIL_FORMAT_COLORSPACE_RGB, 0); 258 nr_components = util_format_get_nr_components(ve->src_format); 259 switch (size) { 260 case 8: 261 switch (nr_components) { 262 case 1: ctx.attr[n].push = emit_b08_1; break; 263 case 2: ctx.attr[n].push = emit_b16_1; break; 264 case 3: ctx.attr[n].push = emit_b08_3; break; 265 case 4: ctx.attr[n].push = emit_b32_1; break; 266 } 267 ctx.vtx_size++; 268 break; 269 case 16: 270 switch (nr_components) { 271 case 1: ctx.attr[n].push = emit_b16_1; break; 272 case 2: ctx.attr[n].push = emit_b32_1; break; 273 case 3: ctx.attr[n].push = emit_b16_3; break; 274 case 4: ctx.attr[n].push = emit_b32_2; break; 275 } 276 ctx.vtx_size += (nr_components + 1) >> 1; 277 break; 278 case 32: 279 switch (nr_components) { 280 case 1: ctx.attr[n].push = emit_b32_1; break; 281 case 2: ctx.attr[n].push = emit_b32_2; break; 282 case 3: ctx.attr[n].push = emit_b32_3; break; 283 case 4: ctx.attr[n].push = emit_b32_4; break; 284 } 285 ctx.vtx_size += nr_components; 286 break; 287 default: 288 assert(0); 289 return; 290 } 291 } 292 vtx_size = ctx.vtx_size + v_overhead; 293 294 /* map index buffer, if present */ 295 if (idxbuf) { 296 struct nouveau_bo *bo = nv50_resource(idxbuf)->bo; 297 298 if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) { 299 assert(bo->map); 300 return; 301 } 302 ctx.idxbuf = bo->map; 303 ctx.idxbias = idxbias; 304 ctx.idxsize = idxsize; 305 nouveau_bo_unmap(bo); 306 } 307 308 s.priv = &ctx; 309 s.edge = emit_edgeflag; 310 if (idxbuf) { 311 if (idxsize == 1) 312 s.emit = idxbias ? emit_elt08_biased : emit_elt08; 313 else 314 if (idxsize == 2) 315 s.emit = idxbias ? emit_elt16_biased : emit_elt16; 316 else 317 s.emit = idxbias ? emit_elt32_biased : emit_elt32; 318 } else 319 s.emit = emit_verts; 320 321 /* per-instance loop */ 322 BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2); 323 OUT_RING (chan, NV50_CB_AUX | (24 << 8)); 324 OUT_RING (chan, i_start); 325 while (i_count--) { 326 unsigned max_verts; 327 boolean done; 328 329 for (i = 0; i < ctx.attr_nr; i++) { 330 if (!ctx.attr[i].divisor || 331 ctx.attr[i].divisor != ++ctx.attr[i].step) 332 continue; 333 ctx.attr[i].step = 0; 334 ctx.attr[i].map += ctx.attr[i].stride; 335 } 336 337 u_split_prim_init(&s, mode, start, count); 338 do { 339 if (AVAIL_RING(chan) < p_overhead + (6 * vtx_size)) { 340 FIRE_RING(chan); 341 if (!nv50_state_validate(nv50, p_overhead + (6 * vtx_size))) { 342 assert(0); 343 return; 344 } 345 } 346 347 max_verts = AVAIL_RING(chan); 348 max_verts -= p_overhead; 349 max_verts /= vtx_size; 350 351 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); 352 OUT_RING (chan, nv50_prim(s.mode) | (nzi ? (1 << 28) : 0)); 353 done = u_split_prim_next(&s, max_verts); 354 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); 355 OUT_RING (chan, 0); 356 } while (!done); 357 358 nzi = TRUE; 359 } 360} 361