1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keithw@vmware.com>
30  */
31
32
33
34#include "brw_context.h"
35#include "brw_state.h"
36#include "brw_defines.h"
37#include "main/macros.h"
38
39static void
40brw_upload_vs_unit(struct brw_context *brw)
41{
42   const struct gen_device_info *devinfo = &brw->screen->devinfo;
43   struct brw_stage_state *stage_state = &brw->vs.base;
44   const struct brw_stage_prog_data *prog_data = stage_state->prog_data;
45   const struct brw_vue_prog_data *vue_prog_data =
46      brw_vue_prog_data(stage_state->prog_data);
47
48   struct brw_vs_unit_state *vs;
49
50   vs = brw_state_batch(brw, AUB_TRACE_VS_STATE,
51			sizeof(*vs), 32, &stage_state->state_offset);
52   memset(vs, 0, sizeof(*vs));
53
54   /* BRW_NEW_PROGRAM_CACHE | BRW_NEW_VS_PROG_DATA */
55   vs->thread0.grf_reg_count = ALIGN(vue_prog_data->total_grf, 16) / 16 - 1;
56   vs->thread0.kernel_start_pointer =
57      brw_program_reloc(brw,
58			stage_state->state_offset +
59			offsetof(struct brw_vs_unit_state, thread0),
60			stage_state->prog_offset +
61			(vs->thread0.grf_reg_count << 1)) >> 6;
62
63   if (prog_data->use_alt_mode)
64      vs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
65   else
66      vs->thread1.floating_point_mode = BRW_FLOATING_POINT_IEEE_754;
67
68   /* Choosing multiple program flow means that we may get 2-vertex threads,
69    * which will have the channel mask for dwords 4-7 enabled in the thread,
70    * and those dwords will be written to the second URB handle when we
71    * brw_urb_WRITE() results.
72    */
73   /* Force single program flow on Ironlake.  We cannot reliably get
74    * all applications working without it.  See:
75    * https://bugs.freedesktop.org/show_bug.cgi?id=29172
76    *
77    * The most notable and reliably failing application is the Humus
78    * demo "CelShading"
79   */
80   vs->thread1.single_program_flow = (brw->gen == 5);
81
82   vs->thread1.binding_table_entry_count =
83      prog_data->binding_table.size_bytes / 4;
84
85   if (prog_data->total_scratch != 0) {
86      vs->thread2.scratch_space_base_pointer =
87	 stage_state->scratch_bo->offset64 >> 10; /* reloc */
88      vs->thread2.per_thread_scratch_space =
89	 ffs(stage_state->per_thread_scratch) - 11;
90   } else {
91      vs->thread2.scratch_space_base_pointer = 0;
92      vs->thread2.per_thread_scratch_space = 0;
93   }
94
95   vs->thread3.urb_entry_read_length = vue_prog_data->urb_read_length;
96   vs->thread3.const_urb_entry_read_length = prog_data->curb_read_length;
97   vs->thread3.dispatch_grf_start_reg = prog_data->dispatch_grf_start_reg;
98   vs->thread3.urb_entry_read_offset = 0;
99
100   /* BRW_NEW_CURBE_OFFSETS */
101   vs->thread3.const_urb_entry_read_offset = brw->curbe.vs_start * 2;
102
103   /* BRW_NEW_URB_FENCE */
104   if (brw->gen == 5) {
105      switch (brw->urb.nr_vs_entries) {
106      case 8:
107      case 12:
108      case 16:
109      case 32:
110      case 64:
111      case 96:
112      case 128:
113      case 168:
114      case 192:
115      case 224:
116      case 256:
117	 vs->thread4.nr_urb_entries = brw->urb.nr_vs_entries >> 2;
118	 break;
119      default:
120         unreachable("not reached");
121      }
122   } else {
123      switch (brw->urb.nr_vs_entries) {
124      case 8:
125      case 12:
126      case 16:
127      case 32:
128	 break;
129      case 64:
130	 assert(brw->is_g4x);
131	 break;
132      default:
133         unreachable("not reached");
134      }
135      vs->thread4.nr_urb_entries = brw->urb.nr_vs_entries;
136   }
137
138   vs->thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
139
140   vs->thread4.max_threads = CLAMP(brw->urb.nr_vs_entries / 2,
141				   1, devinfo->max_vs_threads) - 1;
142
143   if (brw->gen == 5)
144      vs->vs5.sampler_count = 0; /* hardware requirement */
145   else {
146      vs->vs5.sampler_count = (stage_state->sampler_count + 3) / 4;
147   }
148
149
150   if (unlikely(INTEL_DEBUG & DEBUG_STATS))
151      vs->thread4.stats_enable = 1;
152
153   /* Vertex program always enabled:
154    */
155   vs->vs6.vs_enable = 1;
156
157   /* Set the sampler state pointer, and its reloc
158    */
159   if (stage_state->sampler_count) {
160      /* BRW_NEW_SAMPLER_STATE_TABLE - reloc */
161      vs->vs5.sampler_state_pointer =
162         (brw->batch.bo->offset64 + stage_state->sampler_offset) >> 5;
163      drm_intel_bo_emit_reloc(brw->batch.bo,
164                              stage_state->state_offset +
165                              offsetof(struct brw_vs_unit_state, vs5),
166                              brw->batch.bo,
167                              (stage_state->sampler_offset |
168                               vs->vs5.sampler_count),
169                              I915_GEM_DOMAIN_INSTRUCTION, 0);
170   }
171
172   /* Emit scratch space relocation */
173   if (prog_data->total_scratch != 0) {
174      drm_intel_bo_emit_reloc(brw->batch.bo,
175			      stage_state->state_offset +
176			      offsetof(struct brw_vs_unit_state, thread2),
177			      stage_state->scratch_bo,
178			      vs->thread2.per_thread_scratch_space,
179			      I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
180   }
181
182   brw->ctx.NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
183}
184
185const struct brw_tracked_state brw_vs_unit = {
186   .dirty = {
187      .mesa  = 0,
188      .brw   = BRW_NEW_BATCH |
189               BRW_NEW_BLORP |
190               BRW_NEW_CURBE_OFFSETS |
191               BRW_NEW_PROGRAM_CACHE |
192               BRW_NEW_SAMPLER_STATE_TABLE |
193               BRW_NEW_URB_FENCE |
194               BRW_NEW_VS_PROG_DATA,
195   },
196   .emit = brw_upload_vs_unit,
197};
198