nv50_grctx.c revision d5f3c90d4f3ad6b054f9855b7b69137b97bda131
1/*
2 * Copyright 2009 Marcin Kościelnicki
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23#define CP_FLAG_CLEAR                 0
24#define CP_FLAG_SET                   1
25#define CP_FLAG_SWAP_DIRECTION        ((0 * 32) + 0)
26#define CP_FLAG_SWAP_DIRECTION_LOAD   0
27#define CP_FLAG_SWAP_DIRECTION_SAVE   1
28#define CP_FLAG_UNK01                 ((0 * 32) + 1)
29#define CP_FLAG_UNK01_CLEAR           0
30#define CP_FLAG_UNK01_SET             1
31#define CP_FLAG_UNK03                 ((0 * 32) + 3)
32#define CP_FLAG_UNK03_CLEAR           0
33#define CP_FLAG_UNK03_SET             1
34#define CP_FLAG_USER_SAVE             ((0 * 32) + 5)
35#define CP_FLAG_USER_SAVE_NOT_PENDING 0
36#define CP_FLAG_USER_SAVE_PENDING     1
37#define CP_FLAG_USER_LOAD             ((0 * 32) + 6)
38#define CP_FLAG_USER_LOAD_NOT_PENDING 0
39#define CP_FLAG_USER_LOAD_PENDING     1
40#define CP_FLAG_UNK0B                 ((0 * 32) + 0xb)
41#define CP_FLAG_UNK0B_CLEAR           0
42#define CP_FLAG_UNK0B_SET             1
43#define CP_FLAG_UNK1D                 ((0 * 32) + 0x1d)
44#define CP_FLAG_UNK1D_CLEAR           0
45#define CP_FLAG_UNK1D_SET             1
46#define CP_FLAG_UNK20                 ((1 * 32) + 0)
47#define CP_FLAG_UNK20_CLEAR           0
48#define CP_FLAG_UNK20_SET             1
49#define CP_FLAG_STATUS                ((2 * 32) + 0)
50#define CP_FLAG_STATUS_BUSY           0
51#define CP_FLAG_STATUS_IDLE           1
52#define CP_FLAG_AUTO_SAVE             ((2 * 32) + 4)
53#define CP_FLAG_AUTO_SAVE_NOT_PENDING 0
54#define CP_FLAG_AUTO_SAVE_PENDING     1
55#define CP_FLAG_AUTO_LOAD             ((2 * 32) + 5)
56#define CP_FLAG_AUTO_LOAD_NOT_PENDING 0
57#define CP_FLAG_AUTO_LOAD_PENDING     1
58#define CP_FLAG_XFER                  ((2 * 32) + 11)
59#define CP_FLAG_XFER_IDLE             0
60#define CP_FLAG_XFER_BUSY             1
61#define CP_FLAG_NEWCTX                ((2 * 32) + 12)
62#define CP_FLAG_NEWCTX_BUSY           0
63#define CP_FLAG_NEWCTX_DONE           1
64#define CP_FLAG_ALWAYS                ((2 * 32) + 13)
65#define CP_FLAG_ALWAYS_FALSE          0
66#define CP_FLAG_ALWAYS_TRUE           1
67
68#define CP_CTX                   0x00100000
69#define CP_CTX_COUNT             0x000f0000
70#define CP_CTX_COUNT_SHIFT               16
71#define CP_CTX_REG               0x00003fff
72#define CP_LOAD_SR               0x00200000
73#define CP_LOAD_SR_VALUE         0x000fffff
74#define CP_BRA                   0x00400000
75#define CP_BRA_IP                0x0001ff00
76#define CP_BRA_IP_SHIFT                   8
77#define CP_BRA_IF_CLEAR          0x00000080
78#define CP_BRA_FLAG              0x0000007f
79#define CP_WAIT                  0x00500000
80#define CP_WAIT_SET              0x00000080
81#define CP_WAIT_FLAG             0x0000007f
82#define CP_SET                   0x00700000
83#define CP_SET_1                 0x00000080
84#define CP_SET_FLAG              0x0000007f
85#define CP_NEWCTX                0x00600004
86#define CP_NEXT_TO_SWAP          0x00600005
87#define CP_SET_CONTEXT_POINTER   0x00600006
88#define CP_SET_XFER_POINTER      0x00600007
89#define CP_ENABLE                0x00600009
90#define CP_END                   0x0060000c
91#define CP_NEXT_TO_CURRENT       0x0060000d
92#define CP_DISABLE1              0x0090ffff
93#define CP_DISABLE2              0x0091ffff
94#define CP_XFER_1      0x008000ff
95#define CP_XFER_2      0x008800ff
96#define CP_SEEK_1      0x00c000ff
97#define CP_SEEK_2      0x00c800ff
98
99#include "drmP.h"
100#include "nouveau_drv.h"
101#include "nouveau_grctx.h"
102
103/*
104 * This code deals with PGRAPH contexts on NV50 family cards. Like NV40, it's
105 * the GPU itself that does context-switching, but it needs a special
106 * microcode to do it. And it's the driver's task to supply this microcode,
107 * further known as ctxprog, as well as the initial context values, known
108 * as ctxvals.
109 *
110 * Without ctxprog, you cannot switch contexts. Not even in software, since
111 * the majority of context [xfer strands] isn't accessible directly. You're
112 * stuck with a single channel, and you also suffer all the problems resulting
113 * from missing ctxvals, since you cannot load them.
114 *
115 * Without ctxvals, you're stuck with PGRAPH's default context. It's enough to
116 * run 2d operations, but trying to utilise 3d or CUDA will just lock you up,
117 * since you don't have... some sort of needed setup.
118 *
119 * Nouveau will just disable acceleration if not given ctxprog + ctxvals, since
120 * it's too much hassle to handle no-ctxprog as a special case.
121 */
122
123/*
124 * How ctxprogs work.
125 *
126 * The ctxprog is written in its own kind of microcode, with very small and
127 * crappy set of available commands. You upload it to a small [512 insns]
128 * area of memory on PGRAPH, and it'll be run when PFIFO wants PGRAPH to
129 * switch channel. or when the driver explicitely requests it. Stuff visible
130 * to ctxprog consists of: PGRAPH MMIO registers, PGRAPH context strands,
131 * the per-channel context save area in VRAM [known as ctxvals or grctx],
132 * 4 flags registers, a scratch register, two grctx pointers, plus many
133 * random poorly-understood details.
134 *
135 * When ctxprog runs, it's supposed to check what operations are asked of it,
136 * save old context if requested, optionally reset PGRAPH and switch to the
137 * new channel, and load the new context. Context consists of three major
138 * parts: subset of MMIO registers and two "xfer areas".
139 */
140
141/* TODO:
142 *  - document unimplemented bits compared to nvidia
143 *  - NVAx: make a TP subroutine, use it.
144 *  - use 0x4008fc instead of 0x1540?
145 */
146
147enum cp_label {
148	cp_check_load = 1,
149	cp_setup_auto_load,
150	cp_setup_load,
151	cp_setup_save,
152	cp_swap_state,
153	cp_prepare_exit,
154	cp_exit,
155};
156
157static void nv50_graph_construct_mmio(struct nouveau_grctx *ctx);
158static void nv50_graph_construct_xfer1(struct nouveau_grctx *ctx);
159static void nv50_graph_construct_xfer2(struct nouveau_grctx *ctx);
160
161/* Main function: construct the ctxprog skeleton, call the other functions. */
162
163int
164nv50_grctx_init(struct nouveau_grctx *ctx)
165{
166	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
167
168	switch (dev_priv->chipset) {
169	case 0x50:
170	case 0x84:
171	case 0x86:
172	case 0x92:
173	case 0x94:
174	case 0x96:
175	case 0x98:
176	case 0xa0:
177	case 0xa5:
178	case 0xa8:
179	case 0xaa:
180	case 0xac:
181		break;
182	default:
183		NV_ERROR(ctx->dev, "I don't know how to make a ctxprog for "
184				   "your NV%x card.\n", dev_priv->chipset);
185		NV_ERROR(ctx->dev, "Disabling acceleration. Please contact "
186				   "the devs.\n");
187		return -ENOSYS;
188	}
189	/* decide whether we're loading/unloading the context */
190	cp_bra (ctx, AUTO_SAVE, PENDING, cp_setup_save);
191	cp_bra (ctx, USER_SAVE, PENDING, cp_setup_save);
192
193	cp_name(ctx, cp_check_load);
194	cp_bra (ctx, AUTO_LOAD, PENDING, cp_setup_auto_load);
195	cp_bra (ctx, USER_LOAD, PENDING, cp_setup_load);
196	cp_bra (ctx, ALWAYS, TRUE, cp_exit);
197
198	/* setup for context load */
199	cp_name(ctx, cp_setup_auto_load);
200	cp_out (ctx, CP_DISABLE1);
201	cp_out (ctx, CP_DISABLE2);
202	cp_out (ctx, CP_ENABLE);
203	cp_out (ctx, CP_NEXT_TO_SWAP);
204	cp_set (ctx, UNK01, SET);
205	cp_name(ctx, cp_setup_load);
206	cp_out (ctx, CP_NEWCTX);
207	cp_wait(ctx, NEWCTX, BUSY);
208	cp_set (ctx, UNK1D, CLEAR);
209	cp_set (ctx, SWAP_DIRECTION, LOAD);
210	cp_bra (ctx, UNK0B, SET, cp_prepare_exit);
211	cp_bra (ctx, ALWAYS, TRUE, cp_swap_state);
212
213	/* setup for context save */
214	cp_name(ctx, cp_setup_save);
215	cp_set (ctx, UNK1D, SET);
216	cp_wait(ctx, STATUS, BUSY);
217	cp_set (ctx, UNK01, SET);
218	cp_set (ctx, SWAP_DIRECTION, SAVE);
219
220	/* general PGRAPH state */
221	cp_name(ctx, cp_swap_state);
222	cp_set (ctx, UNK03, SET);
223	cp_pos (ctx, 0x00004/4);
224	cp_ctx (ctx, 0x400828, 1); /* needed. otherwise, flickering happens. */
225	cp_pos (ctx, 0x00100/4);
226	nv50_graph_construct_mmio(ctx);
227	nv50_graph_construct_xfer1(ctx);
228	nv50_graph_construct_xfer2(ctx);
229
230	cp_bra (ctx, SWAP_DIRECTION, SAVE, cp_check_load);
231
232	cp_set (ctx, UNK20, SET);
233	cp_set (ctx, SWAP_DIRECTION, SAVE); /* no idea why this is needed, but fixes at least one lockup. */
234	cp_lsr (ctx, ctx->ctxvals_base);
235	cp_out (ctx, CP_SET_XFER_POINTER);
236	cp_lsr (ctx, 4);
237	cp_out (ctx, CP_SEEK_1);
238	cp_out (ctx, CP_XFER_1);
239	cp_wait(ctx, XFER, BUSY);
240
241	/* pre-exit state updates */
242	cp_name(ctx, cp_prepare_exit);
243	cp_set (ctx, UNK01, CLEAR);
244	cp_set (ctx, UNK03, CLEAR);
245	cp_set (ctx, UNK1D, CLEAR);
246
247	cp_bra (ctx, USER_SAVE, PENDING, cp_exit);
248	cp_out (ctx, CP_NEXT_TO_CURRENT);
249
250	cp_name(ctx, cp_exit);
251	cp_set (ctx, USER_SAVE, NOT_PENDING);
252	cp_set (ctx, USER_LOAD, NOT_PENDING);
253	cp_out (ctx, CP_END);
254	ctx->ctxvals_pos += 0x400; /* padding... no idea why you need it */
255
256	return 0;
257}
258
259/*
260 * Constructs MMIO part of ctxprog and ctxvals. Just a matter of knowing which
261 * registers to save/restore and the default values for them.
262 */
263
264static void
265nv50_graph_construct_mmio(struct nouveau_grctx *ctx)
266{
267	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
268	int i, j;
269	int offset, base;
270	uint32_t units = nv_rd32 (ctx->dev, 0x1540);
271
272	/* 0800 */
273	cp_ctx(ctx, 0x400808, 7);
274	gr_def(ctx, 0x400814, 0x00000030);
275	cp_ctx(ctx, 0x400834, 0x32);
276	if (dev_priv->chipset == 0x50) {
277		gr_def(ctx, 0x400834, 0xff400040);
278		gr_def(ctx, 0x400838, 0xfff00080);
279		gr_def(ctx, 0x40083c, 0xfff70090);
280		gr_def(ctx, 0x400840, 0xffe806a8);
281	}
282	gr_def(ctx, 0x400844, 0x00000002);
283	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
284		gr_def(ctx, 0x400894, 0x00001000);
285	gr_def(ctx, 0x4008e8, 0x00000003);
286	gr_def(ctx, 0x4008ec, 0x00001000);
287	if (dev_priv->chipset == 0x50)
288		cp_ctx(ctx, 0x400908, 0xb);
289	else if (dev_priv->chipset < 0xa0)
290		cp_ctx(ctx, 0x400908, 0xc);
291	else
292		cp_ctx(ctx, 0x400908, 0xe);
293
294	if (dev_priv->chipset >= 0xa0)
295		cp_ctx(ctx, 0x400b00, 0x1);
296	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
297		cp_ctx(ctx, 0x400b10, 0x1);
298		gr_def(ctx, 0x400b10, 0x0001629d);
299		cp_ctx(ctx, 0x400b20, 0x1);
300		gr_def(ctx, 0x400b20, 0x0001629d);
301	}
302
303	/* 0C00 */
304	cp_ctx(ctx, 0x400c08, 0x2);
305	gr_def(ctx, 0x400c08, 0x0000fe0c);
306
307	/* 1000 */
308	if (dev_priv->chipset < 0xa0) {
309		cp_ctx(ctx, 0x401008, 0x4);
310		gr_def(ctx, 0x401014, 0x00001000);
311	} else if (dev_priv->chipset == 0xa0 || dev_priv->chipset >= 0xaa) {
312		cp_ctx(ctx, 0x401008, 0x5);
313		gr_def(ctx, 0x401018, 0x00001000);
314	} else {
315		cp_ctx(ctx, 0x401008, 0x5);
316		gr_def(ctx, 0x401018, 0x00004000);
317	}
318
319	/* 1400 */
320	cp_ctx(ctx, 0x401400, 0x8);
321	cp_ctx(ctx, 0x401424, 0x3);
322	if (dev_priv->chipset == 0x50)
323		gr_def(ctx, 0x40142c, 0x0001fd87);
324	else
325		gr_def(ctx, 0x40142c, 0x00000187);
326	cp_ctx(ctx, 0x401540, 0x5);
327	gr_def(ctx, 0x401550, 0x00001018);
328
329	/* 1800 */
330	cp_ctx(ctx, 0x401814, 0x1);
331	gr_def(ctx, 0x401814, 0x000000ff);
332	if (dev_priv->chipset == 0x50) {
333		cp_ctx(ctx, 0x40181c, 0xe);
334		gr_def(ctx, 0x401850, 0x00000004);
335	} else if (dev_priv->chipset < 0xa0) {
336		cp_ctx(ctx, 0x40181c, 0xf);
337		gr_def(ctx, 0x401854, 0x00000004);
338	} else {
339		cp_ctx(ctx, 0x40181c, 0x13);
340		gr_def(ctx, 0x401864, 0x00000004);
341	}
342
343	/* 1C00 */
344	cp_ctx(ctx, 0x401c00, 0x1);
345	switch (dev_priv->chipset) {
346	case 0x50:
347		gr_def(ctx, 0x401c00, 0x0001005f);
348		break;
349	case 0x84:
350	case 0x86:
351	case 0x94:
352		gr_def(ctx, 0x401c00, 0x044d00df);
353		break;
354	case 0x92:
355	case 0x96:
356	case 0x98:
357	case 0xa0:
358	case 0xaa:
359	case 0xac:
360		gr_def(ctx, 0x401c00, 0x042500df);
361		break;
362	case 0xa5:
363	case 0xa8:
364		gr_def(ctx, 0x401c00, 0x142500df);
365		break;
366	}
367
368	/* 2400 */
369	cp_ctx(ctx, 0x402400, 0x1);
370	if (dev_priv->chipset == 0x50)
371		cp_ctx(ctx, 0x402408, 0x1);
372	else
373		cp_ctx(ctx, 0x402408, 0x2);
374	gr_def(ctx, 0x402408, 0x00000600);
375
376	/* 2800 */
377	cp_ctx(ctx, 0x402800, 0x1);
378	if (dev_priv->chipset == 0x50)
379		gr_def(ctx, 0x402800, 0x00000006);
380
381	/* 2C00 */
382	cp_ctx(ctx, 0x402c08, 0x6);
383	if (dev_priv->chipset != 0x50)
384		gr_def(ctx, 0x402c14, 0x01000000);
385	gr_def(ctx, 0x402c18, 0x000000ff);
386	if (dev_priv->chipset == 0x50)
387		cp_ctx(ctx, 0x402ca0, 0x1);
388	else
389		cp_ctx(ctx, 0x402ca0, 0x2);
390	if (dev_priv->chipset < 0xa0)
391		gr_def(ctx, 0x402ca0, 0x00000400);
392	else if (dev_priv->chipset == 0xa0 || dev_priv->chipset >= 0xaa)
393		gr_def(ctx, 0x402ca0, 0x00000800);
394	else
395		gr_def(ctx, 0x402ca0, 0x00000400);
396	cp_ctx(ctx, 0x402cac, 0x4);
397
398	/* 3000 */
399	cp_ctx(ctx, 0x403004, 0x1);
400	gr_def(ctx, 0x403004, 0x00000001);
401
402	/* 3404 */
403	if (dev_priv->chipset >= 0xa0) {
404		cp_ctx(ctx, 0x403404, 0x1);
405		gr_def(ctx, 0x403404, 0x00000001);
406	}
407
408	/* 5000 */
409	cp_ctx(ctx, 0x405000, 0x1);
410	switch (dev_priv->chipset) {
411	case 0x50:
412		gr_def(ctx, 0x405000, 0x00300080);
413		break;
414	case 0x84:
415	case 0xa0:
416	case 0xa5:
417	case 0xa8:
418	case 0xaa:
419	case 0xac:
420		gr_def(ctx, 0x405000, 0x000e0080);
421		break;
422	case 0x86:
423	case 0x92:
424	case 0x94:
425	case 0x96:
426	case 0x98:
427		gr_def(ctx, 0x405000, 0x00000080);
428		break;
429	}
430	cp_ctx(ctx, 0x405014, 0x1);
431	gr_def(ctx, 0x405014, 0x00000004);
432	cp_ctx(ctx, 0x40501c, 0x1);
433	cp_ctx(ctx, 0x405024, 0x1);
434	cp_ctx(ctx, 0x40502c, 0x1);
435
436	/* 5400 or maybe 4800 */
437	if (dev_priv->chipset == 0x50) {
438		offset = 0x405400;
439		cp_ctx(ctx, 0x405400, 0xea);
440	} else if (dev_priv->chipset < 0x94) {
441		offset = 0x405400;
442		cp_ctx(ctx, 0x405400, 0xcb);
443	} else if (dev_priv->chipset < 0xa0) {
444		offset = 0x405400;
445		cp_ctx(ctx, 0x405400, 0xcc);
446	} else if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
447		offset = 0x404800;
448		cp_ctx(ctx, 0x404800, 0xda);
449	} else {
450		offset = 0x405400;
451		cp_ctx(ctx, 0x405400, 0xd4);
452	}
453	gr_def(ctx, offset + 0x0c, 0x00000002);
454	gr_def(ctx, offset + 0x10, 0x00000001);
455	if (dev_priv->chipset >= 0x94)
456		offset += 4;
457	gr_def(ctx, offset + 0x1c, 0x00000001);
458	gr_def(ctx, offset + 0x20, 0x00000100);
459	gr_def(ctx, offset + 0x38, 0x00000002);
460	gr_def(ctx, offset + 0x3c, 0x00000001);
461	gr_def(ctx, offset + 0x40, 0x00000001);
462	gr_def(ctx, offset + 0x50, 0x00000001);
463	gr_def(ctx, offset + 0x54, 0x003fffff);
464	gr_def(ctx, offset + 0x58, 0x00001fff);
465	gr_def(ctx, offset + 0x60, 0x00000001);
466	gr_def(ctx, offset + 0x64, 0x00000001);
467	gr_def(ctx, offset + 0x6c, 0x00000001);
468	gr_def(ctx, offset + 0x70, 0x00000001);
469	gr_def(ctx, offset + 0x74, 0x00000001);
470	gr_def(ctx, offset + 0x78, 0x00000004);
471	gr_def(ctx, offset + 0x7c, 0x00000001);
472	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
473		offset += 4;
474	gr_def(ctx, offset + 0x80, 0x00000001);
475	gr_def(ctx, offset + 0x84, 0x00000001);
476	gr_def(ctx, offset + 0x88, 0x00000007);
477	gr_def(ctx, offset + 0x8c, 0x00000001);
478	gr_def(ctx, offset + 0x90, 0x00000007);
479	gr_def(ctx, offset + 0x94, 0x00000001);
480	gr_def(ctx, offset + 0x98, 0x00000001);
481	gr_def(ctx, offset + 0x9c, 0x00000001);
482	if (dev_priv->chipset == 0x50) {
483		 gr_def(ctx, offset + 0xb0, 0x00000001);
484		 gr_def(ctx, offset + 0xb4, 0x00000001);
485		 gr_def(ctx, offset + 0xbc, 0x00000001);
486		 gr_def(ctx, offset + 0xc0, 0x0000000a);
487		 gr_def(ctx, offset + 0xd0, 0x00000040);
488		 gr_def(ctx, offset + 0xd8, 0x00000002);
489		 gr_def(ctx, offset + 0xdc, 0x00000100);
490		 gr_def(ctx, offset + 0xe0, 0x00000001);
491		 gr_def(ctx, offset + 0xe4, 0x00000100);
492		 gr_def(ctx, offset + 0x100, 0x00000001);
493		 gr_def(ctx, offset + 0x124, 0x00000004);
494		 gr_def(ctx, offset + 0x13c, 0x00000001);
495		 gr_def(ctx, offset + 0x140, 0x00000100);
496		 gr_def(ctx, offset + 0x148, 0x00000001);
497		 gr_def(ctx, offset + 0x154, 0x00000100);
498		 gr_def(ctx, offset + 0x158, 0x00000001);
499		 gr_def(ctx, offset + 0x15c, 0x00000100);
500		 gr_def(ctx, offset + 0x164, 0x00000001);
501		 gr_def(ctx, offset + 0x170, 0x00000100);
502		 gr_def(ctx, offset + 0x174, 0x00000001);
503		 gr_def(ctx, offset + 0x17c, 0x00000001);
504		 gr_def(ctx, offset + 0x188, 0x00000002);
505		 gr_def(ctx, offset + 0x190, 0x00000001);
506		 gr_def(ctx, offset + 0x198, 0x00000001);
507		 gr_def(ctx, offset + 0x1ac, 0x00000003);
508		 offset += 0xd0;
509	} else {
510		gr_def(ctx, offset + 0xb0, 0x00000001);
511		gr_def(ctx, offset + 0xb4, 0x00000100);
512		gr_def(ctx, offset + 0xbc, 0x00000001);
513		gr_def(ctx, offset + 0xc8, 0x00000100);
514		gr_def(ctx, offset + 0xcc, 0x00000001);
515		gr_def(ctx, offset + 0xd0, 0x00000100);
516		gr_def(ctx, offset + 0xd8, 0x00000001);
517		gr_def(ctx, offset + 0xe4, 0x00000100);
518	}
519	gr_def(ctx, offset + 0xf8, 0x00000004);
520	gr_def(ctx, offset + 0xfc, 0x00000070);
521	gr_def(ctx, offset + 0x100, 0x00000080);
522	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
523		offset += 4;
524	gr_def(ctx, offset + 0x114, 0x0000000c);
525	if (dev_priv->chipset == 0x50)
526		offset -= 4;
527	gr_def(ctx, offset + 0x11c, 0x00000008);
528	gr_def(ctx, offset + 0x120, 0x00000014);
529	if (dev_priv->chipset == 0x50) {
530		gr_def(ctx, offset + 0x124, 0x00000026);
531		offset -= 0x18;
532	} else {
533		gr_def(ctx, offset + 0x128, 0x00000029);
534		gr_def(ctx, offset + 0x12c, 0x00000027);
535		gr_def(ctx, offset + 0x130, 0x00000026);
536		gr_def(ctx, offset + 0x134, 0x00000008);
537		gr_def(ctx, offset + 0x138, 0x00000004);
538		gr_def(ctx, offset + 0x13c, 0x00000027);
539	}
540	gr_def(ctx, offset + 0x148, 0x00000001);
541	gr_def(ctx, offset + 0x14c, 0x00000002);
542	gr_def(ctx, offset + 0x150, 0x00000003);
543	gr_def(ctx, offset + 0x154, 0x00000004);
544	gr_def(ctx, offset + 0x158, 0x00000005);
545	gr_def(ctx, offset + 0x15c, 0x00000006);
546	gr_def(ctx, offset + 0x160, 0x00000007);
547	gr_def(ctx, offset + 0x164, 0x00000001);
548	gr_def(ctx, offset + 0x1a8, 0x000000cf);
549	if (dev_priv->chipset == 0x50)
550		offset -= 4;
551	gr_def(ctx, offset + 0x1d8, 0x00000080);
552	gr_def(ctx, offset + 0x1dc, 0x00000004);
553	gr_def(ctx, offset + 0x1e0, 0x00000004);
554	if (dev_priv->chipset == 0x50)
555		offset -= 4;
556	else
557		gr_def(ctx, offset + 0x1e4, 0x00000003);
558	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
559		gr_def(ctx, offset + 0x1ec, 0x00000003);
560		offset += 8;
561	}
562	gr_def(ctx, offset + 0x1e8, 0x00000001);
563	if (dev_priv->chipset == 0x50)
564		offset -= 4;
565	gr_def(ctx, offset + 0x1f4, 0x00000012);
566	gr_def(ctx, offset + 0x1f8, 0x00000010);
567	gr_def(ctx, offset + 0x1fc, 0x0000000c);
568	gr_def(ctx, offset + 0x200, 0x00000001);
569	gr_def(ctx, offset + 0x210, 0x00000004);
570	gr_def(ctx, offset + 0x214, 0x00000002);
571	gr_def(ctx, offset + 0x218, 0x00000004);
572	if (dev_priv->chipset >= 0xa0)
573		offset += 4;
574	gr_def(ctx, offset + 0x224, 0x003fffff);
575	gr_def(ctx, offset + 0x228, 0x00001fff);
576	if (dev_priv->chipset == 0x50)
577		offset -= 0x20;
578	else if (dev_priv->chipset >= 0xa0) {
579		gr_def(ctx, offset + 0x250, 0x00000001);
580		gr_def(ctx, offset + 0x254, 0x00000001);
581		gr_def(ctx, offset + 0x258, 0x00000002);
582		offset += 0x10;
583	}
584	gr_def(ctx, offset + 0x250, 0x00000004);
585	gr_def(ctx, offset + 0x254, 0x00000014);
586	gr_def(ctx, offset + 0x258, 0x00000001);
587	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
588		offset += 4;
589	gr_def(ctx, offset + 0x264, 0x00000002);
590	if (dev_priv->chipset >= 0xa0)
591		offset += 8;
592	gr_def(ctx, offset + 0x270, 0x00000001);
593	gr_def(ctx, offset + 0x278, 0x00000002);
594	gr_def(ctx, offset + 0x27c, 0x00001000);
595	if (dev_priv->chipset == 0x50)
596		offset -= 0xc;
597	else {
598		gr_def(ctx, offset + 0x280, 0x00000e00);
599		gr_def(ctx, offset + 0x284, 0x00001000);
600		gr_def(ctx, offset + 0x288, 0x00001e00);
601	}
602	gr_def(ctx, offset + 0x290, 0x00000001);
603	gr_def(ctx, offset + 0x294, 0x00000001);
604	gr_def(ctx, offset + 0x298, 0x00000001);
605	gr_def(ctx, offset + 0x29c, 0x00000001);
606	gr_def(ctx, offset + 0x2a0, 0x00000001);
607	gr_def(ctx, offset + 0x2b0, 0x00000200);
608	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
609		gr_def(ctx, offset + 0x2b4, 0x00000200);
610		offset += 4;
611	}
612	if (dev_priv->chipset < 0xa0) {
613		gr_def(ctx, offset + 0x2b8, 0x00000001);
614		gr_def(ctx, offset + 0x2bc, 0x00000070);
615		gr_def(ctx, offset + 0x2c0, 0x00000080);
616		gr_def(ctx, offset + 0x2cc, 0x00000001);
617		gr_def(ctx, offset + 0x2d0, 0x00000070);
618		gr_def(ctx, offset + 0x2d4, 0x00000080);
619	} else {
620		gr_def(ctx, offset + 0x2b8, 0x00000001);
621		gr_def(ctx, offset + 0x2bc, 0x000000f0);
622		gr_def(ctx, offset + 0x2c0, 0x000000ff);
623		gr_def(ctx, offset + 0x2cc, 0x00000001);
624		gr_def(ctx, offset + 0x2d0, 0x000000f0);
625		gr_def(ctx, offset + 0x2d4, 0x000000ff);
626		gr_def(ctx, offset + 0x2dc, 0x00000009);
627		offset += 4;
628	}
629	gr_def(ctx, offset + 0x2e4, 0x00000001);
630	gr_def(ctx, offset + 0x2e8, 0x000000cf);
631	gr_def(ctx, offset + 0x2f0, 0x00000001);
632	gr_def(ctx, offset + 0x300, 0x000000cf);
633	gr_def(ctx, offset + 0x308, 0x00000002);
634	gr_def(ctx, offset + 0x310, 0x00000001);
635	gr_def(ctx, offset + 0x318, 0x00000001);
636	gr_def(ctx, offset + 0x320, 0x000000cf);
637	gr_def(ctx, offset + 0x324, 0x000000cf);
638	gr_def(ctx, offset + 0x328, 0x00000001);
639
640	/* 6000? */
641	if (dev_priv->chipset == 0x50)
642		cp_ctx(ctx, 0x4063e0, 0x1);
643
644	/* 6800 */
645	if (dev_priv->chipset < 0x90) {
646		cp_ctx(ctx, 0x406814, 0x2b);
647		gr_def(ctx, 0x406818, 0x00000f80);
648		gr_def(ctx, 0x406860, 0x007f0080);
649		gr_def(ctx, 0x40689c, 0x007f0080);
650	} else {
651		cp_ctx(ctx, 0x406814, 0x4);
652		if (dev_priv->chipset == 0x98)
653			gr_def(ctx, 0x406818, 0x00000f80);
654		else
655			gr_def(ctx, 0x406818, 0x00001f80);
656		if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
657			gr_def(ctx, 0x40681c, 0x00000030);
658		cp_ctx(ctx, 0x406830, 0x3);
659	}
660
661	/* 7000: per-ROP group state */
662	for (i = 0; i < 8; i++) {
663		if (units & (1<<(i+16))) {
664			cp_ctx(ctx, 0x407000 + (i<<8), 3);
665			if (dev_priv->chipset == 0x50)
666				gr_def(ctx, 0x407000 + (i<<8), 0x1b74f820);
667			else if (dev_priv->chipset != 0xa5)
668				gr_def(ctx, 0x407000 + (i<<8), 0x3b74f821);
669			else
670				gr_def(ctx, 0x407000 + (i<<8), 0x7b74f821);
671			gr_def(ctx, 0x407004 + (i<<8), 0x89058001);
672
673			if (dev_priv->chipset == 0x50) {
674				cp_ctx(ctx, 0x407010 + (i<<8), 1);
675			} else if (dev_priv->chipset < 0xa0) {
676				cp_ctx(ctx, 0x407010 + (i<<8), 2);
677				gr_def(ctx, 0x407010 + (i<<8), 0x00001000);
678				gr_def(ctx, 0x407014 + (i<<8), 0x0000001f);
679			} else {
680				cp_ctx(ctx, 0x407010 + (i<<8), 3);
681				gr_def(ctx, 0x407010 + (i<<8), 0x00001000);
682				if (dev_priv->chipset != 0xa5)
683					gr_def(ctx, 0x407014 + (i<<8), 0x000000ff);
684				else
685					gr_def(ctx, 0x407014 + (i<<8), 0x000001ff);
686			}
687
688			cp_ctx(ctx, 0x407080 + (i<<8), 4);
689			if (dev_priv->chipset != 0xa5)
690				gr_def(ctx, 0x407080 + (i<<8), 0x027c10fa);
691			else
692				gr_def(ctx, 0x407080 + (i<<8), 0x827c10fa);
693			if (dev_priv->chipset == 0x50)
694				gr_def(ctx, 0x407084 + (i<<8), 0x000000c0);
695			else
696				gr_def(ctx, 0x407084 + (i<<8), 0x400000c0);
697			gr_def(ctx, 0x407088 + (i<<8), 0xb7892080);
698
699			if (dev_priv->chipset < 0xa0)
700				cp_ctx(ctx, 0x407094 + (i<<8), 1);
701			else if (dev_priv->chipset <= 0xa0 || dev_priv->chipset >= 0xaa)
702				cp_ctx(ctx, 0x407094 + (i<<8), 3);
703			else {
704				cp_ctx(ctx, 0x407094 + (i<<8), 4);
705				gr_def(ctx, 0x4070a0 + (i<<8), 1);
706			}
707		}
708	}
709
710	cp_ctx(ctx, 0x407c00, 0x3);
711	if (dev_priv->chipset < 0x90)
712		gr_def(ctx, 0x407c00, 0x00010040);
713	else if (dev_priv->chipset < 0xa0)
714		gr_def(ctx, 0x407c00, 0x00390040);
715	else
716		gr_def(ctx, 0x407c00, 0x003d0040);
717	gr_def(ctx, 0x407c08, 0x00000022);
718	if (dev_priv->chipset >= 0xa0) {
719		cp_ctx(ctx, 0x407c10, 0x3);
720		cp_ctx(ctx, 0x407c20, 0x1);
721		cp_ctx(ctx, 0x407c2c, 0x1);
722	}
723
724	if (dev_priv->chipset < 0xa0) {
725		cp_ctx(ctx, 0x407d00, 0x9);
726	} else {
727		cp_ctx(ctx, 0x407d00, 0x15);
728	}
729	if (dev_priv->chipset == 0x98)
730		gr_def(ctx, 0x407d08, 0x00380040);
731	else {
732		if (dev_priv->chipset < 0x90)
733			gr_def(ctx, 0x407d08, 0x00010040);
734		else if (dev_priv->chipset < 0xa0)
735			gr_def(ctx, 0x407d08, 0x00390040);
736		else
737			gr_def(ctx, 0x407d08, 0x003d0040);
738		gr_def(ctx, 0x407d0c, 0x00000022);
739	}
740
741	/* 8000+: per-TP state */
742	for (i = 0; i < 10; i++) {
743		if (units & (1<<i)) {
744			if (dev_priv->chipset < 0xa0)
745				base = 0x408000 + (i<<12);
746			else
747				base = 0x408000 + (i<<11);
748			if (dev_priv->chipset < 0xa0)
749				offset = base + 0xc00;
750			else
751				offset = base + 0x80;
752			cp_ctx(ctx, offset + 0x00, 1);
753			gr_def(ctx, offset + 0x00, 0x0000ff0a);
754			cp_ctx(ctx, offset + 0x08, 1);
755
756			/* per-MP state */
757			for (j = 0; j < (dev_priv->chipset < 0xa0 ? 2 : 4); j++) {
758				if (!(units & (1 << (j+24)))) continue;
759				if (dev_priv->chipset < 0xa0)
760					offset = base + 0x200 + (j<<7);
761				else
762					offset = base + 0x100 + (j<<7);
763				cp_ctx(ctx, offset, 0x20);
764				gr_def(ctx, offset + 0x00, 0x01800000);
765				gr_def(ctx, offset + 0x04, 0x00160000);
766				gr_def(ctx, offset + 0x08, 0x01800000);
767				gr_def(ctx, offset + 0x18, 0x0003ffff);
768				switch (dev_priv->chipset) {
769				case 0x50:
770					gr_def(ctx, offset + 0x1c, 0x00080000);
771					break;
772				case 0x84:
773					gr_def(ctx, offset + 0x1c, 0x00880000);
774					break;
775				case 0x86:
776					gr_def(ctx, offset + 0x1c, 0x008c0000);
777					break;
778				case 0x92:
779				case 0x96:
780				case 0x98:
781					gr_def(ctx, offset + 0x1c, 0x118c0000);
782					break;
783				case 0x94:
784					gr_def(ctx, offset + 0x1c, 0x10880000);
785					break;
786				case 0xa0:
787				case 0xa5:
788					gr_def(ctx, offset + 0x1c, 0x310c0000);
789					break;
790				case 0xa8:
791				case 0xaa:
792				case 0xac:
793					gr_def(ctx, offset + 0x1c, 0x300c0000);
794					break;
795				}
796				gr_def(ctx, offset + 0x40, 0x00010401);
797				if (dev_priv->chipset == 0x50)
798					gr_def(ctx, offset + 0x48, 0x00000040);
799				else
800					gr_def(ctx, offset + 0x48, 0x00000078);
801				gr_def(ctx, offset + 0x50, 0x000000bf);
802				gr_def(ctx, offset + 0x58, 0x00001210);
803				if (dev_priv->chipset == 0x50)
804					gr_def(ctx, offset + 0x5c, 0x00000080);
805				else
806					gr_def(ctx, offset + 0x5c, 0x08000080);
807				if (dev_priv->chipset >= 0xa0)
808					gr_def(ctx, offset + 0x68, 0x0000003e);
809			}
810
811			if (dev_priv->chipset < 0xa0)
812				cp_ctx(ctx, base + 0x300, 0x4);
813			else
814				cp_ctx(ctx, base + 0x300, 0x5);
815			if (dev_priv->chipset == 0x50)
816				gr_def(ctx, base + 0x304, 0x00007070);
817			else if (dev_priv->chipset < 0xa0)
818				gr_def(ctx, base + 0x304, 0x00027070);
819			else if (dev_priv->chipset <= 0xa0 || dev_priv->chipset >= 0xaa)
820				gr_def(ctx, base + 0x304, 0x01127070);
821			else
822				gr_def(ctx, base + 0x304, 0x05127070);
823
824			if (dev_priv->chipset < 0xa0)
825				cp_ctx(ctx, base + 0x318, 1);
826			else
827				cp_ctx(ctx, base + 0x320, 1);
828			if (dev_priv->chipset == 0x50)
829				gr_def(ctx, base + 0x318, 0x0003ffff);
830			else if (dev_priv->chipset < 0xa0)
831				gr_def(ctx, base + 0x318, 0x03ffffff);
832			else
833				gr_def(ctx, base + 0x320, 0x07ffffff);
834
835			if (dev_priv->chipset < 0xa0)
836				cp_ctx(ctx, base + 0x324, 5);
837			else
838				cp_ctx(ctx, base + 0x328, 4);
839
840			if (dev_priv->chipset < 0xa0) {
841				cp_ctx(ctx, base + 0x340, 9);
842				offset = base + 0x340;
843			} else if (dev_priv->chipset <= 0xa0 || dev_priv->chipset >= 0xaa) {
844				cp_ctx(ctx, base + 0x33c, 0xb);
845				offset = base + 0x344;
846			} else {
847				cp_ctx(ctx, base + 0x33c, 0xd);
848				offset = base + 0x344;
849			}
850			gr_def(ctx, offset + 0x0, 0x00120407);
851			gr_def(ctx, offset + 0x4, 0x05091507);
852			if (dev_priv->chipset == 0x84)
853				gr_def(ctx, offset + 0x8, 0x05100202);
854			else
855				gr_def(ctx, offset + 0x8, 0x05010202);
856			gr_def(ctx, offset + 0xc, 0x00030201);
857
858			cp_ctx(ctx, base + 0x400, 2);
859			gr_def(ctx, base + 0x404, 0x00000040);
860			cp_ctx(ctx, base + 0x40c, 2);
861			gr_def(ctx, base + 0x40c, 0x0d0c0b0a);
862			gr_def(ctx, base + 0x410, 0x00141210);
863
864			if (dev_priv->chipset < 0xa0)
865				offset = base + 0x800;
866			else
867				offset = base + 0x500;
868			cp_ctx(ctx, offset, 6);
869			gr_def(ctx, offset + 0x0, 0x000001f0);
870			gr_def(ctx, offset + 0x4, 0x00000001);
871			gr_def(ctx, offset + 0x8, 0x00000003);
872			if (dev_priv->chipset == 0x50 || dev_priv->chipset >= 0xaa)
873				gr_def(ctx, offset + 0xc, 0x00008000);
874			gr_def(ctx, offset + 0x14, 0x00039e00);
875			cp_ctx(ctx, offset + 0x1c, 2);
876			if (dev_priv->chipset == 0x50)
877				gr_def(ctx, offset + 0x1c, 0x00000040);
878			else
879				gr_def(ctx, offset + 0x1c, 0x00000100);
880			gr_def(ctx, offset + 0x20, 0x00003800);
881
882			if (dev_priv->chipset >= 0xa0) {
883				cp_ctx(ctx, base + 0x54c, 2);
884				if (dev_priv->chipset <= 0xa0 || dev_priv->chipset >= 0xaa)
885					gr_def(ctx, base + 0x54c, 0x003fe006);
886				else
887					gr_def(ctx, base + 0x54c, 0x003fe007);
888				gr_def(ctx, base + 0x550, 0x003fe000);
889			}
890
891			if (dev_priv->chipset < 0xa0)
892				offset = base + 0xa00;
893			else
894				offset = base + 0x680;
895			cp_ctx(ctx, offset, 1);
896			gr_def(ctx, offset, 0x00404040);
897
898			if (dev_priv->chipset < 0xa0)
899				offset = base + 0xe00;
900			else
901				offset = base + 0x700;
902			cp_ctx(ctx, offset, 2);
903			if (dev_priv->chipset < 0xa0)
904				gr_def(ctx, offset, 0x0077f005);
905			else if (dev_priv->chipset == 0xa5)
906				gr_def(ctx, offset, 0x6cf7f007);
907			else if (dev_priv->chipset == 0xa8)
908				gr_def(ctx, offset, 0x6cfff007);
909			else if (dev_priv->chipset == 0xac)
910				gr_def(ctx, offset, 0x0cfff007);
911			else
912				gr_def(ctx, offset, 0x0cf7f007);
913			if (dev_priv->chipset == 0x50)
914				gr_def(ctx, offset + 0x4, 0x00007fff);
915			else if (dev_priv->chipset < 0xa0)
916				gr_def(ctx, offset + 0x4, 0x003f7fff);
917			else
918				gr_def(ctx, offset + 0x4, 0x02bf7fff);
919			cp_ctx(ctx, offset + 0x2c, 1);
920			if (dev_priv->chipset == 0x50) {
921				cp_ctx(ctx, offset + 0x50, 9);
922				gr_def(ctx, offset + 0x54, 0x000003ff);
923				gr_def(ctx, offset + 0x58, 0x00000003);
924				gr_def(ctx, offset + 0x5c, 0x00000003);
925				gr_def(ctx, offset + 0x60, 0x000001ff);
926				gr_def(ctx, offset + 0x64, 0x0000001f);
927				gr_def(ctx, offset + 0x68, 0x0000000f);
928				gr_def(ctx, offset + 0x6c, 0x0000000f);
929			} else if(dev_priv->chipset < 0xa0) {
930				cp_ctx(ctx, offset + 0x50, 1);
931				cp_ctx(ctx, offset + 0x70, 1);
932			} else {
933				cp_ctx(ctx, offset + 0x50, 1);
934				cp_ctx(ctx, offset + 0x60, 5);
935			}
936		}
937	}
938}
939
940/*
941 * xfer areas. These are a pain.
942 *
943 * There are 2 xfer areas: the first one is big and contains all sorts of
944 * stuff, the second is small and contains some per-TP context.
945 *
946 * Each area is split into 8 "strands". The areas, when saved to grctx,
947 * are made of 8-word blocks. Each block contains a single word from
948 * each strand. The strands are independent of each other, their
949 * addresses are unrelated to each other, and data in them is closely
950 * packed together. The strand layout varies a bit between cards: here
951 * and there, a single word is thrown out in the middle and the whole
952 * strand is offset by a bit from corresponding one on another chipset.
953 * For this reason, addresses of stuff in strands are almost useless.
954 * Knowing sequence of stuff and size of gaps between them is much more
955 * useful, and that's how we build the strands in our generator.
956 *
957 * NVA0 takes this mess to a whole new level by cutting the old strands
958 * into a few dozen pieces [known as genes], rearranging them randomly,
959 * and putting them back together to make new strands. Hopefully these
960 * genes correspond more or less directly to the same PGRAPH subunits
961 * as in 400040 register.
962 *
963 * The most common value in default context is 0, and when the genes
964 * are separated by 0's, gene bounduaries are quite speculative...
965 * some of them can be clearly deduced, others can be guessed, and yet
966 * others won't be resolved without figuring out the real meaning of
967 * given ctxval. For the same reason, ending point of each strand
968 * is unknown. Except for strand 0, which is the longest strand and
969 * its end corresponds to end of the whole xfer.
970 *
971 * An unsolved mystery is the seek instruction: it takes an argument
972 * in bits 8-18, and that argument is clearly the place in strands to
973 * seek to... but the offsets don't seem to correspond to offsets as
974 * seen in grctx. Perhaps there's another, real, not randomly-changing
975 * addressing in strands, and the xfer insn just happens to skip over
976 * the unused bits? NV10-NV30 PIPE comes to mind...
977 *
978 * As far as I know, there's no way to access the xfer areas directly
979 * without the help of ctxprog.
980 */
981
982static inline void
983xf_emit(struct nouveau_grctx *ctx, int num, uint32_t val) {
984	int i;
985	if (val && ctx->mode == NOUVEAU_GRCTX_VALS)
986		for (i = 0; i < num; i++)
987			nv_wo32(ctx->dev, ctx->data, ctx->ctxvals_pos + (i << 3), val);
988	ctx->ctxvals_pos += num << 3;
989}
990
991/* Gene declarations... */
992
993static void nv50_graph_construct_gene_m2mf(struct nouveau_grctx *ctx);
994static void nv50_graph_construct_gene_unk1(struct nouveau_grctx *ctx);
995static void nv50_graph_construct_gene_unk2(struct nouveau_grctx *ctx);
996static void nv50_graph_construct_gene_unk3(struct nouveau_grctx *ctx);
997static void nv50_graph_construct_gene_unk4(struct nouveau_grctx *ctx);
998static void nv50_graph_construct_gene_unk5(struct nouveau_grctx *ctx);
999static void nv50_graph_construct_gene_unk6(struct nouveau_grctx *ctx);
1000static void nv50_graph_construct_gene_unk7(struct nouveau_grctx *ctx);
1001static void nv50_graph_construct_gene_unk8(struct nouveau_grctx *ctx);
1002static void nv50_graph_construct_gene_unk9(struct nouveau_grctx *ctx);
1003static void nv50_graph_construct_gene_unk10(struct nouveau_grctx *ctx);
1004static void nv50_graph_construct_gene_ropc(struct nouveau_grctx *ctx);
1005static void nv50_graph_construct_xfer_tp(struct nouveau_grctx *ctx);
1006
1007static void
1008nv50_graph_construct_xfer1(struct nouveau_grctx *ctx)
1009{
1010	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
1011	int i;
1012	int offset;
1013	int size = 0;
1014	uint32_t units = nv_rd32 (ctx->dev, 0x1540);
1015
1016	offset = (ctx->ctxvals_pos+0x3f)&~0x3f;
1017	ctx->ctxvals_base = offset;
1018
1019	if (dev_priv->chipset < 0xa0) {
1020		/* Strand 0 */
1021		ctx->ctxvals_pos = offset;
1022		switch (dev_priv->chipset) {
1023		case 0x50:
1024			xf_emit(ctx, 0x99, 0);
1025			break;
1026		case 0x84:
1027		case 0x86:
1028			xf_emit(ctx, 0x384, 0);
1029			break;
1030		case 0x92:
1031		case 0x94:
1032		case 0x96:
1033		case 0x98:
1034			xf_emit(ctx, 0x380, 0);
1035			break;
1036		}
1037		nv50_graph_construct_gene_m2mf (ctx);
1038		switch (dev_priv->chipset) {
1039		case 0x50:
1040		case 0x84:
1041		case 0x86:
1042		case 0x98:
1043			xf_emit(ctx, 0x4c4, 0);
1044			break;
1045		case 0x92:
1046		case 0x94:
1047		case 0x96:
1048			xf_emit(ctx, 0x984, 0);
1049			break;
1050		}
1051		nv50_graph_construct_gene_unk5(ctx);
1052		if (dev_priv->chipset == 0x50)
1053			xf_emit(ctx, 0xa, 0);
1054		else
1055			xf_emit(ctx, 0xb, 0);
1056		nv50_graph_construct_gene_unk4(ctx);
1057		nv50_graph_construct_gene_unk3(ctx);
1058		if ((ctx->ctxvals_pos-offset)/8 > size)
1059			size = (ctx->ctxvals_pos-offset)/8;
1060
1061		/* Strand 1 */
1062		ctx->ctxvals_pos = offset + 0x1;
1063		nv50_graph_construct_gene_unk6(ctx);
1064		nv50_graph_construct_gene_unk7(ctx);
1065		nv50_graph_construct_gene_unk8(ctx);
1066		switch (dev_priv->chipset) {
1067		case 0x50:
1068		case 0x92:
1069			xf_emit(ctx, 0xfb, 0);
1070			break;
1071		case 0x84:
1072			xf_emit(ctx, 0xd3, 0);
1073			break;
1074		case 0x94:
1075		case 0x96:
1076			xf_emit(ctx, 0xab, 0);
1077			break;
1078		case 0x86:
1079		case 0x98:
1080			xf_emit(ctx, 0x6b, 0);
1081			break;
1082		}
1083		xf_emit(ctx, 2, 0x4e3bfdf);
1084		xf_emit(ctx, 4, 0);
1085		xf_emit(ctx, 1, 0x0fac6881);
1086		xf_emit(ctx, 0xb, 0);
1087		xf_emit(ctx, 2, 0x4e3bfdf);
1088		if ((ctx->ctxvals_pos-offset)/8 > size)
1089			size = (ctx->ctxvals_pos-offset)/8;
1090
1091		/* Strand 2 */
1092		ctx->ctxvals_pos = offset + 0x2;
1093		switch (dev_priv->chipset) {
1094		case 0x50:
1095		case 0x92:
1096			xf_emit(ctx, 0xa80, 0);
1097			break;
1098		case 0x84:
1099			xf_emit(ctx, 0xa7e, 0);
1100			break;
1101		case 0x94:
1102		case 0x96:
1103			xf_emit(ctx, 0xa7c, 0);
1104			break;
1105		case 0x86:
1106		case 0x98:
1107			xf_emit(ctx, 0xa7a, 0);
1108			break;
1109		}
1110		xf_emit(ctx, 1, 0x3fffff);
1111		xf_emit(ctx, 2, 0);
1112		xf_emit(ctx, 1, 0x1fff);
1113		xf_emit(ctx, 0xe, 0);
1114		nv50_graph_construct_gene_unk9(ctx);
1115		nv50_graph_construct_gene_unk2(ctx);
1116		nv50_graph_construct_gene_unk1(ctx);
1117		nv50_graph_construct_gene_unk10(ctx);
1118		if ((ctx->ctxvals_pos-offset)/8 > size)
1119			size = (ctx->ctxvals_pos-offset)/8;
1120
1121		/* Strand 3: per-ROP group state */
1122		ctx->ctxvals_pos = offset + 3;
1123		for (i = 0; i < 6; i++)
1124			if (units & (1 << (i + 16)))
1125				nv50_graph_construct_gene_ropc(ctx);
1126		if ((ctx->ctxvals_pos-offset)/8 > size)
1127			size = (ctx->ctxvals_pos-offset)/8;
1128
1129		/* Strands 4-7: per-TP state */
1130		for (i = 0; i < 4; i++) {
1131			ctx->ctxvals_pos = offset + 4 + i;
1132			if (units & (1 << (2 * i)))
1133				nv50_graph_construct_xfer_tp(ctx);
1134			if (units & (1 << (2 * i + 1)))
1135				nv50_graph_construct_xfer_tp(ctx);
1136			if ((ctx->ctxvals_pos-offset)/8 > size)
1137				size = (ctx->ctxvals_pos-offset)/8;
1138		}
1139	} else {
1140		/* Strand 0 */
1141		ctx->ctxvals_pos = offset;
1142		if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
1143			xf_emit(ctx, 0x385, 0);
1144		else
1145			xf_emit(ctx, 0x384, 0);
1146		nv50_graph_construct_gene_m2mf(ctx);
1147		xf_emit(ctx, 0x950, 0);
1148		nv50_graph_construct_gene_unk10(ctx);
1149		xf_emit(ctx, 1, 0x0fac6881);
1150		if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
1151			xf_emit(ctx, 1, 1);
1152			xf_emit(ctx, 3, 0);
1153		}
1154		nv50_graph_construct_gene_unk8(ctx);
1155		if (dev_priv->chipset == 0xa0)
1156			xf_emit(ctx, 0x189, 0);
1157		else if (dev_priv->chipset < 0xa8)
1158			xf_emit(ctx, 0x99, 0);
1159		else if (dev_priv->chipset == 0xaa)
1160			xf_emit(ctx, 0x65, 0);
1161		else
1162			xf_emit(ctx, 0x6d, 0);
1163		nv50_graph_construct_gene_unk9(ctx);
1164		if ((ctx->ctxvals_pos-offset)/8 > size)
1165			size = (ctx->ctxvals_pos-offset)/8;
1166
1167		/* Strand 1 */
1168		ctx->ctxvals_pos = offset + 1;
1169		nv50_graph_construct_gene_unk1(ctx);
1170		if ((ctx->ctxvals_pos-offset)/8 > size)
1171			size = (ctx->ctxvals_pos-offset)/8;
1172
1173		/* Strand 2 */
1174		ctx->ctxvals_pos = offset + 2;
1175		if (dev_priv->chipset == 0xa0) {
1176			nv50_graph_construct_gene_unk2(ctx);
1177		}
1178		xf_emit(ctx, 0x36, 0);
1179		nv50_graph_construct_gene_unk5(ctx);
1180		if ((ctx->ctxvals_pos-offset)/8 > size)
1181			size = (ctx->ctxvals_pos-offset)/8;
1182
1183		/* Strand 3 */
1184		ctx->ctxvals_pos = offset + 3;
1185		xf_emit(ctx, 1, 0);
1186		xf_emit(ctx, 1, 1);
1187		nv50_graph_construct_gene_unk6(ctx);
1188		if ((ctx->ctxvals_pos-offset)/8 > size)
1189			size = (ctx->ctxvals_pos-offset)/8;
1190
1191		/* Strand 4 */
1192		ctx->ctxvals_pos = offset + 4;
1193		if (dev_priv->chipset == 0xa0)
1194			xf_emit(ctx, 0xa80, 0);
1195		else
1196			xf_emit(ctx, 0xa7a, 0);
1197		xf_emit(ctx, 1, 0x3fffff);
1198		xf_emit(ctx, 2, 0);
1199		xf_emit(ctx, 1, 0x1fff);
1200		if ((ctx->ctxvals_pos-offset)/8 > size)
1201			size = (ctx->ctxvals_pos-offset)/8;
1202
1203		/* Strand 5 */
1204		ctx->ctxvals_pos = offset + 5;
1205		xf_emit(ctx, 1, 0);
1206		xf_emit(ctx, 1, 0x0fac6881);
1207		xf_emit(ctx, 0xb, 0);
1208		xf_emit(ctx, 2, 0x4e3bfdf);
1209		xf_emit(ctx, 3, 0);
1210		if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
1211			xf_emit(ctx, 1, 0x11);
1212		xf_emit(ctx, 1, 0);
1213		xf_emit(ctx, 2, 0x4e3bfdf);
1214		xf_emit(ctx, 2, 0);
1215		if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
1216			xf_emit(ctx, 1, 0x11);
1217		xf_emit(ctx, 1, 0);
1218		for (i = 0; i < 8; i++)
1219			if (units & (1<<(i+16)))
1220				nv50_graph_construct_gene_ropc(ctx);
1221		if ((ctx->ctxvals_pos-offset)/8 > size)
1222			size = (ctx->ctxvals_pos-offset)/8;
1223
1224		/* Strand 6 */
1225		ctx->ctxvals_pos = offset + 6;
1226		nv50_graph_construct_gene_unk3(ctx);
1227		xf_emit(ctx, 0xb, 0);
1228		nv50_graph_construct_gene_unk4(ctx);
1229		nv50_graph_construct_gene_unk7(ctx);
1230		if (units & (1 << 0))
1231			nv50_graph_construct_xfer_tp(ctx);
1232		if (units & (1 << 1))
1233			nv50_graph_construct_xfer_tp(ctx);
1234		if (units & (1 << 2))
1235			nv50_graph_construct_xfer_tp(ctx);
1236		if (units & (1 << 3))
1237			nv50_graph_construct_xfer_tp(ctx);
1238		if ((ctx->ctxvals_pos-offset)/8 > size)
1239			size = (ctx->ctxvals_pos-offset)/8;
1240
1241		/* Strand 7 */
1242		ctx->ctxvals_pos = offset + 7;
1243		if (dev_priv->chipset == 0xa0) {
1244			if (units & (1 << 4))
1245				nv50_graph_construct_xfer_tp(ctx);
1246			if (units & (1 << 5))
1247				nv50_graph_construct_xfer_tp(ctx);
1248			if (units & (1 << 6))
1249				nv50_graph_construct_xfer_tp(ctx);
1250			if (units & (1 << 7))
1251				nv50_graph_construct_xfer_tp(ctx);
1252			if (units & (1 << 8))
1253				nv50_graph_construct_xfer_tp(ctx);
1254			if (units & (1 << 9))
1255				nv50_graph_construct_xfer_tp(ctx);
1256		} else {
1257			nv50_graph_construct_gene_unk2(ctx);
1258		}
1259		if ((ctx->ctxvals_pos-offset)/8 > size)
1260			size = (ctx->ctxvals_pos-offset)/8;
1261	}
1262
1263	ctx->ctxvals_pos = offset + size * 8;
1264	ctx->ctxvals_pos = (ctx->ctxvals_pos+0x3f)&~0x3f;
1265	cp_lsr (ctx, offset);
1266	cp_out (ctx, CP_SET_XFER_POINTER);
1267	cp_lsr (ctx, size);
1268	cp_out (ctx, CP_SEEK_1);
1269	cp_out (ctx, CP_XFER_1);
1270	cp_wait(ctx, XFER, BUSY);
1271}
1272
1273/*
1274 * non-trivial demagiced parts of ctx init go here
1275 */
1276
1277static void
1278nv50_graph_construct_gene_m2mf(struct nouveau_grctx *ctx)
1279{
1280	/* m2mf state */
1281	xf_emit (ctx, 1, 0);		/* DMA_NOTIFY instance >> 4 */
1282	xf_emit (ctx, 1, 0);		/* DMA_BUFFER_IN instance >> 4 */
1283	xf_emit (ctx, 1, 0);		/* DMA_BUFFER_OUT instance >> 4 */
1284	xf_emit (ctx, 1, 0);		/* OFFSET_IN */
1285	xf_emit (ctx, 1, 0);		/* OFFSET_OUT */
1286	xf_emit (ctx, 1, 0);		/* PITCH_IN */
1287	xf_emit (ctx, 1, 0);		/* PITCH_OUT */
1288	xf_emit (ctx, 1, 0);		/* LINE_LENGTH */
1289	xf_emit (ctx, 1, 0);		/* LINE_COUNT */
1290	xf_emit (ctx, 1, 0x21);		/* FORMAT: bits 0-4 INPUT_INC, bits 5-9 OUTPUT_INC */
1291	xf_emit (ctx, 1, 1);		/* LINEAR_IN */
1292	xf_emit (ctx, 1, 0x2);		/* TILING_MODE_IN: bits 0-2 y tiling, bits 3-5 z tiling */
1293	xf_emit (ctx, 1, 0x100);	/* TILING_PITCH_IN */
1294	xf_emit (ctx, 1, 0x100);	/* TILING_HEIGHT_IN */
1295	xf_emit (ctx, 1, 1);		/* TILING_DEPTH_IN */
1296	xf_emit (ctx, 1, 0);		/* TILING_POSITION_IN_Z */
1297	xf_emit (ctx, 1, 0);		/* TILING_POSITION_IN */
1298	xf_emit (ctx, 1, 1);		/* LINEAR_OUT */
1299	xf_emit (ctx, 1, 0x2);		/* TILING_MODE_OUT: bits 0-2 y tiling, bits 3-5 z tiling */
1300	xf_emit (ctx, 1, 0x100);	/* TILING_PITCH_OUT */
1301	xf_emit (ctx, 1, 0x100);	/* TILING_HEIGHT_OUT */
1302	xf_emit (ctx, 1, 1);		/* TILING_DEPTH_OUT */
1303	xf_emit (ctx, 1, 0);		/* TILING_POSITION_OUT_Z */
1304	xf_emit (ctx, 1, 0);		/* TILING_POSITION_OUT */
1305	xf_emit (ctx, 1, 0);		/* OFFSET_IN_HIGH */
1306	xf_emit (ctx, 1, 0);		/* OFFSET_OUT_HIGH */
1307}
1308
1309static void
1310nv50_graph_construct_gene_unk1(struct nouveau_grctx *ctx)
1311{
1312	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
1313	/* end of area 2 on pre-NVA0, area 1 on NVAx */
1314	xf_emit(ctx, 2, 4);
1315	xf_emit(ctx, 1, 0);
1316	xf_emit(ctx, 1, 0x80);
1317	xf_emit(ctx, 1, 4);
1318	xf_emit(ctx, 1, 0x80c14);
1319	xf_emit(ctx, 1, 0);
1320	if (dev_priv->chipset == 0x50)
1321		xf_emit(ctx, 1, 0x3ff);
1322	else
1323		xf_emit(ctx, 1, 0x7ff);
1324	switch (dev_priv->chipset) {
1325	case 0x50:
1326	case 0x86:
1327	case 0x98:
1328	case 0xaa:
1329	case 0xac:
1330		xf_emit(ctx, 0x542, 0);
1331		break;
1332	case 0x84:
1333	case 0x92:
1334	case 0x94:
1335	case 0x96:
1336		xf_emit(ctx, 0x942, 0);
1337		break;
1338	case 0xa0:
1339		xf_emit(ctx, 0x2042, 0);
1340		break;
1341	case 0xa5:
1342	case 0xa8:
1343		xf_emit(ctx, 0x842, 0);
1344		break;
1345	}
1346	xf_emit(ctx, 2, 4);
1347	xf_emit(ctx, 1, 0);
1348	xf_emit(ctx, 1, 0x80);
1349	xf_emit(ctx, 1, 4);
1350	xf_emit(ctx, 1, 1);
1351	xf_emit(ctx, 1, 0);
1352	xf_emit(ctx, 1, 0x27);
1353	xf_emit(ctx, 1, 0);
1354	xf_emit(ctx, 1, 0x26);
1355	xf_emit(ctx, 3, 0);
1356}
1357
1358static void
1359nv50_graph_construct_gene_unk10(struct nouveau_grctx *ctx)
1360{
1361	/* end of area 2 on pre-NVA0, area 1 on NVAx */
1362	xf_emit(ctx, 0x10, 0x04000000);
1363	xf_emit(ctx, 0x24, 0);
1364	xf_emit(ctx, 2, 0x04e3bfdf);
1365	xf_emit(ctx, 2, 0);
1366	xf_emit(ctx, 1, 0x1fe21);
1367}
1368
1369static void
1370nv50_graph_construct_gene_unk2(struct nouveau_grctx *ctx)
1371{
1372	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
1373	/* middle of area 2 on pre-NVA0, beginning of area 2 on NVA0, area 7 on >NVA0 */
1374	if (dev_priv->chipset != 0x50) {
1375		xf_emit(ctx, 5, 0);
1376		xf_emit(ctx, 1, 0x80c14);
1377		xf_emit(ctx, 2, 0);
1378		xf_emit(ctx, 1, 0x804);
1379		xf_emit(ctx, 1, 0);
1380		xf_emit(ctx, 2, 4);
1381		xf_emit(ctx, 1, 0x8100c12);
1382	}
1383	xf_emit(ctx, 1, 0);
1384	xf_emit(ctx, 2, 4);
1385	xf_emit(ctx, 1, 0);
1386	xf_emit(ctx, 1, 0x10);
1387	if (dev_priv->chipset == 0x50)
1388		xf_emit(ctx, 3, 0);
1389	else
1390		xf_emit(ctx, 4, 0);
1391	xf_emit(ctx, 1, 0x804);
1392	xf_emit(ctx, 1, 1);
1393	xf_emit(ctx, 1, 0x1a);
1394	if (dev_priv->chipset != 0x50)
1395		xf_emit(ctx, 1, 0x7f);
1396	xf_emit(ctx, 1, 0);
1397	xf_emit(ctx, 1, 1);
1398	xf_emit(ctx, 1, 0x80c14);
1399	xf_emit(ctx, 1, 0);
1400	xf_emit(ctx, 1, 0x8100c12);
1401	xf_emit(ctx, 2, 4);
1402	xf_emit(ctx, 1, 0);
1403	xf_emit(ctx, 1, 0x10);
1404	xf_emit(ctx, 3, 0);
1405	xf_emit(ctx, 1, 1);
1406	xf_emit(ctx, 1, 0x8100c12);
1407	xf_emit(ctx, 6, 0);
1408	if (dev_priv->chipset == 0x50)
1409		xf_emit(ctx, 1, 0x3ff);
1410	else
1411		xf_emit(ctx, 1, 0x7ff);
1412	xf_emit(ctx, 1, 0x80c14);
1413	xf_emit(ctx, 0x38, 0);
1414	xf_emit(ctx, 1, 1);
1415	xf_emit(ctx, 2, 0);
1416	xf_emit(ctx, 1, 0x10);
1417	xf_emit(ctx, 0x38, 0);
1418	xf_emit(ctx, 2, 0x88);
1419	xf_emit(ctx, 2, 0);
1420	xf_emit(ctx, 1, 4);
1421	xf_emit(ctx, 0x16, 0);
1422	xf_emit(ctx, 1, 0x26);
1423	xf_emit(ctx, 2, 0);
1424	xf_emit(ctx, 1, 0x3f800000);
1425	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
1426		xf_emit(ctx, 4, 0);
1427	else
1428		xf_emit(ctx, 3, 0);
1429	xf_emit(ctx, 1, 0x1a);
1430	xf_emit(ctx, 1, 0x10);
1431	if (dev_priv->chipset != 0x50)
1432		xf_emit(ctx, 0x28, 0);
1433	else
1434		xf_emit(ctx, 0x25, 0);
1435	xf_emit(ctx, 1, 0x52);
1436	xf_emit(ctx, 1, 0);
1437	xf_emit(ctx, 1, 0x26);
1438	xf_emit(ctx, 1, 0);
1439	xf_emit(ctx, 2, 4);
1440	xf_emit(ctx, 1, 0);
1441	xf_emit(ctx, 1, 0x1a);
1442	xf_emit(ctx, 2, 0);
1443	xf_emit(ctx, 1, 0x00ffff00);
1444	xf_emit(ctx, 1, 0);
1445}
1446
1447static void
1448nv50_graph_construct_gene_unk3(struct nouveau_grctx *ctx)
1449{
1450	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
1451	/* end of area 0 on pre-NVA0, beginning of area 6 on NVAx */
1452	xf_emit(ctx, 1, 0x3f);
1453	xf_emit(ctx, 0xa, 0);
1454	xf_emit(ctx, 1, 2);
1455	xf_emit(ctx, 2, 0x04000000);
1456	xf_emit(ctx, 8, 0);
1457	xf_emit(ctx, 1, 4);
1458	xf_emit(ctx, 3, 0);
1459	xf_emit(ctx, 1, 4);
1460	if (dev_priv->chipset == 0x50)
1461		xf_emit(ctx, 0x10, 0);
1462	else
1463		xf_emit(ctx, 0x11, 0);
1464	xf_emit(ctx, 1, 1);
1465	xf_emit(ctx, 1, 0x1001);
1466	xf_emit(ctx, 4, 0xffff);
1467	xf_emit(ctx, 0x20, 0);
1468	xf_emit(ctx, 0x10, 0x3f800000);
1469	xf_emit(ctx, 1, 0x10);
1470	if (dev_priv->chipset == 0x50)
1471		xf_emit(ctx, 1, 0);
1472	else
1473		xf_emit(ctx, 2, 0);
1474	xf_emit(ctx, 1, 3);
1475	xf_emit(ctx, 2, 0);
1476}
1477
1478static void
1479nv50_graph_construct_gene_unk4(struct nouveau_grctx *ctx)
1480{
1481	/* middle of area 0 on pre-NVA0, middle of area 6 on NVAx */
1482	xf_emit(ctx, 2, 0x04000000);
1483	xf_emit(ctx, 1, 0);
1484	xf_emit(ctx, 1, 0x80);
1485	xf_emit(ctx, 3, 0);
1486	xf_emit(ctx, 1, 0x80);
1487	xf_emit(ctx, 1, 0);
1488}
1489
1490static void
1491nv50_graph_construct_gene_unk5(struct nouveau_grctx *ctx)
1492{
1493	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
1494	/* middle of area 0 on pre-NVA0 [after m2mf], end of area 2 on NVAx */
1495	xf_emit(ctx, 2, 4);
1496	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
1497		xf_emit(ctx, 0x1c4d, 0);
1498	else
1499		xf_emit(ctx, 0x1c4b, 0);
1500	xf_emit(ctx, 2, 4);
1501	xf_emit(ctx, 1, 0x8100c12);
1502	if (dev_priv->chipset != 0x50)
1503		xf_emit(ctx, 1, 3);
1504	xf_emit(ctx, 1, 0);
1505	xf_emit(ctx, 1, 0x8100c12);
1506	xf_emit(ctx, 1, 0);
1507	xf_emit(ctx, 1, 0x80c14);
1508	xf_emit(ctx, 1, 1);
1509	if (dev_priv->chipset >= 0xa0)
1510		xf_emit(ctx, 2, 4);
1511	xf_emit(ctx, 1, 0x80c14);
1512	xf_emit(ctx, 2, 0);
1513	xf_emit(ctx, 1, 0x8100c12);
1514	xf_emit(ctx, 1, 0x27);
1515	xf_emit(ctx, 2, 0);
1516	xf_emit(ctx, 1, 1);
1517	xf_emit(ctx, 0x3c1, 0);
1518	xf_emit(ctx, 1, 1);
1519	xf_emit(ctx, 0x16, 0);
1520	xf_emit(ctx, 1, 0x8100c12);
1521	xf_emit(ctx, 1, 0);
1522}
1523
1524static void
1525nv50_graph_construct_gene_unk6(struct nouveau_grctx *ctx)
1526{
1527	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
1528	/* beginning of area 1 on pre-NVA0 [after m2mf], area 3 on NVAx */
1529	xf_emit(ctx, 4, 0);
1530	xf_emit(ctx, 1, 0xf);
1531	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
1532		xf_emit(ctx, 8, 0);
1533	else
1534		xf_emit(ctx, 4, 0);
1535	xf_emit(ctx, 1, 0x20);
1536	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
1537		xf_emit(ctx, 0x11, 0);
1538	else if (dev_priv->chipset >= 0xa0)
1539		xf_emit(ctx, 0xf, 0);
1540	else
1541		xf_emit(ctx, 0xe, 0);
1542	xf_emit(ctx, 1, 0x1a);
1543	xf_emit(ctx, 0xd, 0);
1544	xf_emit(ctx, 2, 4);
1545	xf_emit(ctx, 1, 0);
1546	xf_emit(ctx, 1, 4);
1547	xf_emit(ctx, 1, 8);
1548	xf_emit(ctx, 1, 0);
1549	if (dev_priv->chipset == 0x50)
1550		xf_emit(ctx, 1, 0x3ff);
1551	else
1552		xf_emit(ctx, 1, 0x7ff);
1553	if (dev_priv->chipset == 0xa8)
1554		xf_emit(ctx, 1, 0x1e00);
1555	xf_emit(ctx, 0xc, 0);
1556	xf_emit(ctx, 1, 0xf);
1557	if (dev_priv->chipset == 0x50)
1558		xf_emit(ctx, 0x125, 0);
1559	else if (dev_priv->chipset < 0xa0)
1560		xf_emit(ctx, 0x126, 0);
1561	else if (dev_priv->chipset == 0xa0 || dev_priv->chipset >= 0xaa)
1562		xf_emit(ctx, 0x124, 0);
1563	else
1564		xf_emit(ctx, 0x1f7, 0);
1565	xf_emit(ctx, 1, 0xf);
1566	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
1567		xf_emit(ctx, 3, 0);
1568	else
1569		xf_emit(ctx, 1, 0);
1570	xf_emit(ctx, 1, 1);
1571	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
1572		xf_emit(ctx, 0xa1, 0);
1573	else
1574		xf_emit(ctx, 0x5a, 0);
1575	xf_emit(ctx, 1, 0xf);
1576	if (dev_priv->chipset < 0xa0)
1577		xf_emit(ctx, 0x834, 0);
1578	else if (dev_priv->chipset == 0xa0)
1579		xf_emit(ctx, 0x1873, 0);
1580	else if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
1581		xf_emit(ctx, 0x8ba, 0);
1582	else
1583		xf_emit(ctx, 0x833, 0);
1584	xf_emit(ctx, 1, 0xf);
1585	xf_emit(ctx, 0xf, 0);
1586}
1587
1588static void
1589nv50_graph_construct_gene_unk7(struct nouveau_grctx *ctx)
1590{
1591	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
1592	/* middle of area 1 on pre-NVA0 [after m2mf], middle of area 6 on NVAx */
1593	xf_emit(ctx, 2, 0);
1594	if (dev_priv->chipset == 0x50)
1595		xf_emit(ctx, 2, 1);
1596	else
1597		xf_emit(ctx, 2, 0);
1598	xf_emit(ctx, 1, 0);
1599	xf_emit(ctx, 1, 1);
1600	xf_emit(ctx, 2, 0x100);
1601	xf_emit(ctx, 1, 0x11);
1602	xf_emit(ctx, 1, 0);
1603	xf_emit(ctx, 1, 8);
1604	xf_emit(ctx, 5, 0);
1605	xf_emit(ctx, 1, 1);
1606	xf_emit(ctx, 1, 0);
1607	xf_emit(ctx, 3, 1);
1608	xf_emit(ctx, 1, 0xcf);
1609	xf_emit(ctx, 1, 2);
1610	xf_emit(ctx, 6, 0);
1611	xf_emit(ctx, 1, 1);
1612	xf_emit(ctx, 1, 0);
1613	xf_emit(ctx, 3, 1);
1614	xf_emit(ctx, 4, 0);
1615	xf_emit(ctx, 1, 4);
1616	xf_emit(ctx, 1, 0);
1617	xf_emit(ctx, 1, 1);
1618	xf_emit(ctx, 1, 0x15);
1619	xf_emit(ctx, 3, 0);
1620	xf_emit(ctx, 1, 0x4444480);
1621	xf_emit(ctx, 0x37, 0);
1622}
1623
1624static void
1625nv50_graph_construct_gene_unk8(struct nouveau_grctx *ctx)
1626{
1627	/* middle of area 1 on pre-NVA0 [after m2mf], middle of area 0 on NVAx */
1628	xf_emit(ctx, 4, 0);
1629	xf_emit(ctx, 1, 0x8100c12);
1630	xf_emit(ctx, 4, 0);
1631	xf_emit(ctx, 1, 0x100);
1632	xf_emit(ctx, 2, 0);
1633	xf_emit(ctx, 1, 0x10001);
1634	xf_emit(ctx, 1, 0);
1635	xf_emit(ctx, 1, 0x10001);
1636	xf_emit(ctx, 1, 1);
1637	xf_emit(ctx, 1, 0x10001);
1638	xf_emit(ctx, 1, 1);
1639	xf_emit(ctx, 1, 4);
1640	xf_emit(ctx, 1, 2);
1641}
1642
1643static void
1644nv50_graph_construct_gene_unk9(struct nouveau_grctx *ctx)
1645{
1646	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
1647	/* middle of area 2 on pre-NVA0 [after m2mf], end of area 0 on NVAx */
1648	xf_emit(ctx, 1, 0x3f800000);
1649	xf_emit(ctx, 6, 0);
1650	xf_emit(ctx, 1, 4);
1651	xf_emit(ctx, 1, 0x1a);
1652	xf_emit(ctx, 2, 0);
1653	xf_emit(ctx, 1, 1);
1654	xf_emit(ctx, 0x12, 0);
1655	xf_emit(ctx, 1, 0x00ffff00);
1656	xf_emit(ctx, 6, 0);
1657	xf_emit(ctx, 1, 0xf);
1658	xf_emit(ctx, 7, 0);
1659	xf_emit(ctx, 1, 0x0fac6881);
1660	xf_emit(ctx, 1, 0x11);
1661	xf_emit(ctx, 0xf, 0);
1662	xf_emit(ctx, 1, 4);
1663	xf_emit(ctx, 2, 0);
1664	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
1665		xf_emit(ctx, 1, 3);
1666	else if (dev_priv->chipset >= 0xa0)
1667		xf_emit(ctx, 1, 1);
1668	xf_emit(ctx, 2, 0);
1669	xf_emit(ctx, 1, 2);
1670	xf_emit(ctx, 2, 0x04000000);
1671	xf_emit(ctx, 3, 0);
1672	xf_emit(ctx, 1, 5);
1673	xf_emit(ctx, 1, 0x52);
1674	if (dev_priv->chipset == 0x50) {
1675		xf_emit(ctx, 0x13, 0);
1676	} else {
1677		xf_emit(ctx, 4, 0);
1678		xf_emit(ctx, 1, 1);
1679		if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
1680			xf_emit(ctx, 0x11, 0);
1681		else
1682			xf_emit(ctx, 0x10, 0);
1683	}
1684	xf_emit(ctx, 0x10, 0x3f800000);
1685	xf_emit(ctx, 1, 0x10);
1686	xf_emit(ctx, 0x26, 0);
1687	xf_emit(ctx, 1, 0x8100c12);
1688	xf_emit(ctx, 1, 5);
1689	xf_emit(ctx, 2, 0);
1690	xf_emit(ctx, 1, 1);
1691	xf_emit(ctx, 1, 0);
1692	xf_emit(ctx, 4, 0xffff);
1693	if (dev_priv->chipset != 0x50)
1694		xf_emit(ctx, 1, 3);
1695	if (dev_priv->chipset < 0xa0)
1696		xf_emit(ctx, 0x1f, 0);
1697	else if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
1698		xf_emit(ctx, 0xc, 0);
1699	else
1700		xf_emit(ctx, 3, 0);
1701	xf_emit(ctx, 1, 0x00ffff00);
1702	xf_emit(ctx, 1, 0x1a);
1703	if (dev_priv->chipset != 0x50) {
1704		xf_emit(ctx, 1, 0);
1705		xf_emit(ctx, 1, 3);
1706	}
1707	if (dev_priv->chipset < 0xa0)
1708		xf_emit(ctx, 0x26, 0);
1709	else
1710		xf_emit(ctx, 0x3c, 0);
1711	xf_emit(ctx, 1, 0x102);
1712	xf_emit(ctx, 1, 0);
1713	xf_emit(ctx, 4, 4);
1714	if (dev_priv->chipset >= 0xa0)
1715		xf_emit(ctx, 8, 0);
1716	xf_emit(ctx, 2, 4);
1717	xf_emit(ctx, 1, 0);
1718	if (dev_priv->chipset == 0x50)
1719		xf_emit(ctx, 1, 0x3ff);
1720	else
1721		xf_emit(ctx, 1, 0x7ff);
1722	xf_emit(ctx, 1, 0);
1723	xf_emit(ctx, 1, 0x102);
1724	xf_emit(ctx, 9, 0);
1725	xf_emit(ctx, 4, 4);
1726	xf_emit(ctx, 0x2c, 0);
1727}
1728
1729static void
1730nv50_graph_construct_gene_ropc(struct nouveau_grctx *ctx)
1731{
1732	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
1733	int magic2;
1734	if (dev_priv->chipset == 0x50) {
1735		magic2 = 0x00003e60;
1736	} else if (dev_priv->chipset <= 0xa0 || dev_priv->chipset >= 0xaa) {
1737		magic2 = 0x001ffe67;
1738	} else {
1739		magic2 = 0x00087e67;
1740	}
1741	xf_emit(ctx, 8, 0);
1742	xf_emit(ctx, 1, 2);
1743	xf_emit(ctx, 1, 0);
1744	xf_emit(ctx, 1, magic2);
1745	xf_emit(ctx, 4, 0);
1746	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
1747		xf_emit(ctx, 1, 1);
1748	xf_emit(ctx, 7, 0);
1749	if (dev_priv->chipset >= 0xa0 && dev_priv->chipset < 0xaa)
1750		xf_emit(ctx, 1, 0x15);
1751	xf_emit(ctx, 1, 0);
1752	xf_emit(ctx, 1, 1);
1753	xf_emit(ctx, 1, 0x10);
1754	xf_emit(ctx, 2, 0);
1755	xf_emit(ctx, 1, 1);
1756	xf_emit(ctx, 4, 0);
1757	if (dev_priv->chipset == 0x86 || dev_priv->chipset == 0x92 || dev_priv->chipset == 0x98 || dev_priv->chipset >= 0xa0) {
1758		xf_emit(ctx, 1, 4);
1759		xf_emit(ctx, 1, 0x400);
1760		xf_emit(ctx, 1, 0x300);
1761		xf_emit(ctx, 1, 0x1001);
1762		if (dev_priv->chipset != 0xa0) {
1763			if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
1764				xf_emit(ctx, 1, 0);
1765			else
1766				xf_emit(ctx, 1, 0x15);
1767		}
1768		xf_emit(ctx, 3, 0);
1769	}
1770	xf_emit(ctx, 2, 0);
1771	xf_emit(ctx, 1, 2);
1772	xf_emit(ctx, 8, 0);
1773	xf_emit(ctx, 1, 1);
1774	xf_emit(ctx, 1, 0x10);
1775	xf_emit(ctx, 1, 0);
1776	xf_emit(ctx, 1, 1);
1777	xf_emit(ctx, 0x13, 0);
1778	xf_emit(ctx, 1, 0x10);
1779	xf_emit(ctx, 0x10, 0);
1780	xf_emit(ctx, 0x10, 0x3f800000);
1781	xf_emit(ctx, 0x19, 0);
1782	xf_emit(ctx, 1, 0x10);
1783	xf_emit(ctx, 1, 0);
1784	xf_emit(ctx, 1, 0x3f);
1785	xf_emit(ctx, 6, 0);
1786	xf_emit(ctx, 1, 1);
1787	xf_emit(ctx, 1, 0);
1788	xf_emit(ctx, 1, 1);
1789	xf_emit(ctx, 1, 0);
1790	xf_emit(ctx, 1, 1);
1791	if (dev_priv->chipset >= 0xa0) {
1792		xf_emit(ctx, 2, 0);
1793		xf_emit(ctx, 1, 0x1001);
1794		xf_emit(ctx, 0xb, 0);
1795	} else {
1796		xf_emit(ctx, 0xc, 0);
1797	}
1798	xf_emit(ctx, 1, 0x11);
1799	xf_emit(ctx, 7, 0);
1800	xf_emit(ctx, 1, 0xf);
1801	xf_emit(ctx, 7, 0);
1802	xf_emit(ctx, 1, 0x11);
1803	if (dev_priv->chipset == 0x50)
1804		xf_emit(ctx, 4, 0);
1805	else
1806		xf_emit(ctx, 6, 0);
1807	xf_emit(ctx, 3, 1);
1808	xf_emit(ctx, 1, 2);
1809	xf_emit(ctx, 1, 1);
1810	xf_emit(ctx, 1, 2);
1811	xf_emit(ctx, 1, 1);
1812	xf_emit(ctx, 1, 0);
1813	xf_emit(ctx, 1, magic2);
1814	xf_emit(ctx, 1, 0);
1815	xf_emit(ctx, 1, 0x0fac6881);
1816	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
1817		xf_emit(ctx, 1, 0);
1818		xf_emit(ctx, 0x18, 1);
1819		xf_emit(ctx, 8, 2);
1820		xf_emit(ctx, 8, 1);
1821		xf_emit(ctx, 8, 2);
1822		xf_emit(ctx, 8, 1);
1823		xf_emit(ctx, 3, 0);
1824		xf_emit(ctx, 1, 1);
1825		xf_emit(ctx, 5, 0);
1826		xf_emit(ctx, 1, 1);
1827		xf_emit(ctx, 0x16, 0);
1828	} else {
1829		if (dev_priv->chipset >= 0xa0)
1830			xf_emit(ctx, 0x1b, 0);
1831		else
1832			xf_emit(ctx, 0x15, 0);
1833	}
1834	xf_emit(ctx, 1, 1);
1835	xf_emit(ctx, 1, 2);
1836	xf_emit(ctx, 2, 1);
1837	xf_emit(ctx, 1, 2);
1838	xf_emit(ctx, 2, 1);
1839	if (dev_priv->chipset >= 0xa0)
1840		xf_emit(ctx, 4, 0);
1841	else
1842		xf_emit(ctx, 3, 0);
1843	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
1844		xf_emit(ctx, 0x10, 1);
1845		xf_emit(ctx, 8, 2);
1846		xf_emit(ctx, 0x10, 1);
1847		xf_emit(ctx, 8, 2);
1848		xf_emit(ctx, 8, 1);
1849		xf_emit(ctx, 3, 0);
1850	}
1851	xf_emit(ctx, 1, 0x11);
1852	xf_emit(ctx, 1, 1);
1853	xf_emit(ctx, 0x5b, 0);
1854}
1855
1856static void
1857nv50_graph_construct_xfer_tp_x1(struct nouveau_grctx *ctx)
1858{
1859	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
1860	int magic3;
1861	if (dev_priv->chipset == 0x50)
1862		magic3 = 0x1000;
1863	else if (dev_priv->chipset == 0x86 || dev_priv->chipset == 0x98 || dev_priv->chipset >= 0xa8)
1864		magic3 = 0x1e00;
1865	else
1866		magic3 = 0;
1867	xf_emit(ctx, 1, 0);
1868	xf_emit(ctx, 1, 4);
1869	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
1870		xf_emit(ctx, 0x24, 0);
1871	else if (dev_priv->chipset >= 0xa0)
1872		xf_emit(ctx, 0x14, 0);
1873	else
1874		xf_emit(ctx, 0x15, 0);
1875	xf_emit(ctx, 2, 4);
1876	if (dev_priv->chipset >= 0xa0)
1877		xf_emit(ctx, 1, 0x03020100);
1878	else
1879		xf_emit(ctx, 1, 0x00608080);
1880	xf_emit(ctx, 4, 0);
1881	xf_emit(ctx, 1, 4);
1882	xf_emit(ctx, 2, 0);
1883	xf_emit(ctx, 2, 4);
1884	xf_emit(ctx, 1, 0x80);
1885	if (magic3)
1886		xf_emit(ctx, 1, magic3);
1887	xf_emit(ctx, 1, 4);
1888	xf_emit(ctx, 0x24, 0);
1889	xf_emit(ctx, 1, 4);
1890	xf_emit(ctx, 1, 0x80);
1891	xf_emit(ctx, 1, 4);
1892	xf_emit(ctx, 1, 0x03020100);
1893	xf_emit(ctx, 1, 3);
1894	if (magic3)
1895		xf_emit(ctx, 1, magic3);
1896	xf_emit(ctx, 1, 4);
1897	xf_emit(ctx, 4, 0);
1898	xf_emit(ctx, 1, 4);
1899	xf_emit(ctx, 1, 3);
1900	xf_emit(ctx, 3, 0);
1901	xf_emit(ctx, 1, 4);
1902	if (dev_priv->chipset == 0x94 || dev_priv->chipset == 0x96)
1903		xf_emit(ctx, 0x1024, 0);
1904	else if (dev_priv->chipset < 0xa0)
1905		xf_emit(ctx, 0xa24, 0);
1906	else if (dev_priv->chipset == 0xa0 || dev_priv->chipset >= 0xaa)
1907		xf_emit(ctx, 0x214, 0);
1908	else
1909		xf_emit(ctx, 0x414, 0);
1910	xf_emit(ctx, 1, 4);
1911	xf_emit(ctx, 1, 3);
1912	xf_emit(ctx, 2, 0);
1913}
1914
1915static void
1916nv50_graph_construct_xfer_tp_x2(struct nouveau_grctx *ctx)
1917{
1918	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
1919	int magic1, magic2;
1920	if (dev_priv->chipset == 0x50) {
1921		magic1 = 0x3ff;
1922		magic2 = 0x00003e60;
1923	} else if (dev_priv->chipset <= 0xa0 || dev_priv->chipset >= 0xaa) {
1924		magic1 = 0x7ff;
1925		magic2 = 0x001ffe67;
1926	} else {
1927		magic1 = 0x7ff;
1928		magic2 = 0x00087e67;
1929	}
1930	xf_emit(ctx, 3, 0);
1931	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
1932		xf_emit(ctx, 1, 1);
1933	xf_emit(ctx, 0xc, 0);
1934	xf_emit(ctx, 1, 0xf);
1935	xf_emit(ctx, 0xb, 0);
1936	xf_emit(ctx, 1, 4);
1937	xf_emit(ctx, 4, 0xffff);
1938	xf_emit(ctx, 8, 0);
1939	xf_emit(ctx, 1, 1);
1940	xf_emit(ctx, 3, 0);
1941	xf_emit(ctx, 1, 1);
1942	xf_emit(ctx, 5, 0);
1943	xf_emit(ctx, 1, 1);
1944	xf_emit(ctx, 2, 0);
1945	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
1946		xf_emit(ctx, 1, 3);
1947		xf_emit(ctx, 1, 0);
1948	} else if (dev_priv->chipset >= 0xa0)
1949		xf_emit(ctx, 1, 1);
1950	xf_emit(ctx, 0xa, 0);
1951	xf_emit(ctx, 2, 1);
1952	xf_emit(ctx, 1, 2);
1953	xf_emit(ctx, 2, 1);
1954	xf_emit(ctx, 1, 2);
1955	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
1956		xf_emit(ctx, 1, 0);
1957		xf_emit(ctx, 0x18, 1);
1958		xf_emit(ctx, 8, 2);
1959		xf_emit(ctx, 8, 1);
1960		xf_emit(ctx, 8, 2);
1961		xf_emit(ctx, 8, 1);
1962		xf_emit(ctx, 1, 0);
1963	}
1964	xf_emit(ctx, 1, 1);
1965	xf_emit(ctx, 1, 0);
1966	xf_emit(ctx, 1, 0x11);
1967	xf_emit(ctx, 7, 0);
1968	xf_emit(ctx, 1, 0x0fac6881);
1969	xf_emit(ctx, 2, 0);
1970	xf_emit(ctx, 1, 4);
1971	xf_emit(ctx, 3, 0);
1972	xf_emit(ctx, 1, 0x11);
1973	xf_emit(ctx, 1, 1);
1974	xf_emit(ctx, 1, 0);
1975	xf_emit(ctx, 3, 0xcf);
1976	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
1977		xf_emit(ctx, 1, 1);
1978	xf_emit(ctx, 0xa, 0);
1979	xf_emit(ctx, 2, 1);
1980	xf_emit(ctx, 1, 2);
1981	xf_emit(ctx, 2, 1);
1982	xf_emit(ctx, 1, 2);
1983	xf_emit(ctx, 1, 1);
1984	xf_emit(ctx, 1, 0);
1985	xf_emit(ctx, 8, 1);
1986	xf_emit(ctx, 1, 0x11);
1987	xf_emit(ctx, 7, 0);
1988	xf_emit(ctx, 1, 0x0fac6881);
1989	xf_emit(ctx, 1, 0xf);
1990	xf_emit(ctx, 7, 0);
1991	xf_emit(ctx, 1, magic2);
1992	xf_emit(ctx, 2, 0);
1993	xf_emit(ctx, 1, 0x11);
1994	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
1995		xf_emit(ctx, 2, 1);
1996	else
1997		xf_emit(ctx, 1, 1);
1998	if(dev_priv->chipset == 0x50)
1999		xf_emit(ctx, 1, 0);
2000	else
2001		xf_emit(ctx, 3, 0);
2002	xf_emit(ctx, 1, 4);
2003	xf_emit(ctx, 5, 0);
2004	xf_emit(ctx, 1, 1);
2005	xf_emit(ctx, 4, 0);
2006	xf_emit(ctx, 1, 0x11);
2007	xf_emit(ctx, 7, 0);
2008	xf_emit(ctx, 1, 0x0fac6881);
2009	xf_emit(ctx, 3, 0);
2010	xf_emit(ctx, 1, 0x11);
2011	xf_emit(ctx, 1, 1);
2012	xf_emit(ctx, 1, 0);
2013	xf_emit(ctx, 1, 1);
2014	xf_emit(ctx, 1, 0);
2015	xf_emit(ctx, 1, 1);
2016	xf_emit(ctx, 1, 0);
2017	xf_emit(ctx, 1, magic1);
2018	xf_emit(ctx, 1, 0);
2019	xf_emit(ctx, 1, 1);
2020	xf_emit(ctx, 1, 0);
2021	xf_emit(ctx, 1, 1);
2022	xf_emit(ctx, 2, 0);
2023	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
2024		xf_emit(ctx, 1, 1);
2025	xf_emit(ctx, 0x28, 0);
2026	xf_emit(ctx, 8, 8);
2027	xf_emit(ctx, 1, 0x11);
2028	xf_emit(ctx, 7, 0);
2029	xf_emit(ctx, 1, 0x0fac6881);
2030	xf_emit(ctx, 8, 0x400);
2031	xf_emit(ctx, 8, 0x300);
2032	xf_emit(ctx, 1, 1);
2033	xf_emit(ctx, 1, 0xf);
2034	xf_emit(ctx, 7, 0);
2035	xf_emit(ctx, 1, 0x20);
2036	xf_emit(ctx, 1, 0x11);
2037	xf_emit(ctx, 1, 0x100);
2038	xf_emit(ctx, 1, 0);
2039	xf_emit(ctx, 1, 1);
2040	xf_emit(ctx, 2, 0);
2041	xf_emit(ctx, 1, 0x40);
2042	xf_emit(ctx, 1, 0x100);
2043	xf_emit(ctx, 1, 0);
2044	xf_emit(ctx, 1, 3);
2045	xf_emit(ctx, 4, 0);
2046	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
2047		xf_emit(ctx, 1, 1);
2048	xf_emit(ctx, 1, magic2);
2049	xf_emit(ctx, 3, 0);
2050	xf_emit(ctx, 1, 2);
2051	xf_emit(ctx, 1, 0x0fac6881);
2052	xf_emit(ctx, 9, 0);
2053	xf_emit(ctx, 1, 1);
2054	xf_emit(ctx, 4, 0);
2055	xf_emit(ctx, 1, 4);
2056	xf_emit(ctx, 1, 0);
2057	xf_emit(ctx, 1, 1);
2058	xf_emit(ctx, 1, 0x400);
2059	xf_emit(ctx, 1, 0x300);
2060	xf_emit(ctx, 1, 0x1001);
2061	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
2062		xf_emit(ctx, 4, 0);
2063	else
2064		xf_emit(ctx, 3, 0);
2065	xf_emit(ctx, 1, 0x11);
2066	xf_emit(ctx, 7, 0);
2067	xf_emit(ctx, 1, 0x0fac6881);
2068	xf_emit(ctx, 1, 0xf);
2069	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
2070		xf_emit(ctx, 0x15, 0);
2071		xf_emit(ctx, 1, 1);
2072		xf_emit(ctx, 3, 0);
2073	} else
2074		xf_emit(ctx, 0x17, 0);
2075	if (dev_priv->chipset >= 0xa0)
2076		xf_emit(ctx, 1, 0x0fac6881);
2077	xf_emit(ctx, 1, magic2);
2078	xf_emit(ctx, 3, 0);
2079	xf_emit(ctx, 1, 0x11);
2080	xf_emit(ctx, 2, 0);
2081	xf_emit(ctx, 1, 4);
2082	xf_emit(ctx, 1, 0);
2083	xf_emit(ctx, 2, 1);
2084	xf_emit(ctx, 3, 0);
2085	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
2086		xf_emit(ctx, 2, 1);
2087	else
2088		xf_emit(ctx, 1, 1);
2089	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
2090		xf_emit(ctx, 2, 0);
2091	else if (dev_priv->chipset != 0x50)
2092		xf_emit(ctx, 1, 0);
2093}
2094
2095static void
2096nv50_graph_construct_xfer_tp_x3(struct nouveau_grctx *ctx)
2097{
2098	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
2099	xf_emit(ctx, 3, 0);
2100	xf_emit(ctx, 1, 1);
2101	xf_emit(ctx, 1, 0);
2102	xf_emit(ctx, 1, 1);
2103	if (dev_priv->chipset == 0x50)
2104		xf_emit(ctx, 2, 0);
2105	else
2106		xf_emit(ctx, 3, 0);
2107	xf_emit(ctx, 1, 0x2a712488);
2108	xf_emit(ctx, 1, 0);
2109	xf_emit(ctx, 1, 0x4085c000);
2110	xf_emit(ctx, 1, 0x40);
2111	xf_emit(ctx, 1, 0x100);
2112	xf_emit(ctx, 1, 0x10100);
2113	xf_emit(ctx, 1, 0x02800000);
2114}
2115
2116static void
2117nv50_graph_construct_xfer_tp_x4(struct nouveau_grctx *ctx)
2118{
2119	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
2120	xf_emit(ctx, 2, 0x04e3bfdf);
2121	xf_emit(ctx, 1, 1);
2122	xf_emit(ctx, 1, 0);
2123	xf_emit(ctx, 1, 0x00ffff00);
2124	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
2125		xf_emit(ctx, 2, 1);
2126	else
2127		xf_emit(ctx, 1, 1);
2128	xf_emit(ctx, 2, 0);
2129	xf_emit(ctx, 1, 0x00ffff00);
2130	xf_emit(ctx, 8, 0);
2131	xf_emit(ctx, 1, 1);
2132	xf_emit(ctx, 1, 0);
2133	xf_emit(ctx, 1, 1);
2134	xf_emit(ctx, 1, 0x30201000);
2135	xf_emit(ctx, 1, 0x70605040);
2136	xf_emit(ctx, 1, 0xb8a89888);
2137	xf_emit(ctx, 1, 0xf8e8d8c8);
2138	xf_emit(ctx, 1, 0);
2139	xf_emit(ctx, 1, 0x1a);
2140}
2141
2142static void
2143nv50_graph_construct_xfer_tp_x5(struct nouveau_grctx *ctx)
2144{
2145	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
2146	xf_emit(ctx, 3, 0);
2147	xf_emit(ctx, 1, 0xfac6881);
2148	xf_emit(ctx, 4, 0);
2149	xf_emit(ctx, 1, 4);
2150	xf_emit(ctx, 1, 0);
2151	xf_emit(ctx, 2, 1);
2152	xf_emit(ctx, 2, 0);
2153	xf_emit(ctx, 1, 1);
2154	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
2155		xf_emit(ctx, 0xb, 0);
2156	else
2157		xf_emit(ctx, 0xa, 0);
2158	xf_emit(ctx, 8, 1);
2159	xf_emit(ctx, 1, 0x11);
2160	xf_emit(ctx, 7, 0);
2161	xf_emit(ctx, 1, 0xfac6881);
2162	xf_emit(ctx, 1, 0xf);
2163	xf_emit(ctx, 7, 0);
2164	xf_emit(ctx, 1, 0x11);
2165	xf_emit(ctx, 1, 1);
2166	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
2167		xf_emit(ctx, 6, 0);
2168		xf_emit(ctx, 1, 1);
2169		xf_emit(ctx, 6, 0);
2170	} else {
2171		xf_emit(ctx, 0xb, 0);
2172	}
2173}
2174
2175static void
2176nv50_graph_construct_xfer_tp(struct nouveau_grctx *ctx)
2177{
2178	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
2179	if (dev_priv->chipset < 0xa0) {
2180		nv50_graph_construct_xfer_tp_x1(ctx);
2181		nv50_graph_construct_xfer_tp_x2(ctx);
2182		nv50_graph_construct_xfer_tp_x3(ctx);
2183		if (dev_priv->chipset == 0x50)
2184			xf_emit(ctx, 0xf, 0);
2185		else
2186			xf_emit(ctx, 0x12, 0);
2187		nv50_graph_construct_xfer_tp_x4(ctx);
2188	} else {
2189		nv50_graph_construct_xfer_tp_x3(ctx);
2190		if (dev_priv->chipset < 0xaa)
2191			xf_emit(ctx, 0xc, 0);
2192		else
2193			xf_emit(ctx, 0xa, 0);
2194		nv50_graph_construct_xfer_tp_x2(ctx);
2195		nv50_graph_construct_xfer_tp_x5(ctx);
2196		nv50_graph_construct_xfer_tp_x4(ctx);
2197		nv50_graph_construct_xfer_tp_x1(ctx);
2198	}
2199}
2200
2201static void
2202nv50_graph_construct_xfer_tp2(struct nouveau_grctx *ctx)
2203{
2204	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
2205	int i, mpcnt;
2206	if (dev_priv->chipset == 0x98 || dev_priv->chipset == 0xaa)
2207		mpcnt = 1;
2208	else if (dev_priv->chipset < 0xa0 || dev_priv->chipset >= 0xa8)
2209		mpcnt = 2;
2210	else
2211		mpcnt = 3;
2212	for (i = 0; i < mpcnt; i++) {
2213		xf_emit(ctx, 1, 0);
2214		xf_emit(ctx, 1, 0x80);
2215		xf_emit(ctx, 1, 0x80007004);
2216		xf_emit(ctx, 1, 0x04000400);
2217		if (dev_priv->chipset >= 0xa0)
2218			xf_emit(ctx, 1, 0xc0);
2219		xf_emit(ctx, 1, 0x1000);
2220		xf_emit(ctx, 2, 0);
2221		if (dev_priv->chipset == 0x86 || dev_priv->chipset == 0x98 || dev_priv->chipset >= 0xa8) {
2222			xf_emit(ctx, 1, 0xe00);
2223			xf_emit(ctx, 1, 0x1e00);
2224		}
2225		xf_emit(ctx, 1, 1);
2226		xf_emit(ctx, 2, 0);
2227		if (dev_priv->chipset == 0x50)
2228			xf_emit(ctx, 2, 0x1000);
2229		xf_emit(ctx, 1, 1);
2230		xf_emit(ctx, 1, 0);
2231		xf_emit(ctx, 1, 4);
2232		xf_emit(ctx, 1, 2);
2233		if (dev_priv->chipset >= 0xaa)
2234			xf_emit(ctx, 0xb, 0);
2235		else if (dev_priv->chipset >= 0xa0)
2236			xf_emit(ctx, 0xc, 0);
2237		else
2238			xf_emit(ctx, 0xa, 0);
2239	}
2240	xf_emit(ctx, 1, 0x08100c12);
2241	xf_emit(ctx, 1, 0);
2242	if (dev_priv->chipset >= 0xa0) {
2243		xf_emit(ctx, 1, 0x1fe21);
2244	}
2245	xf_emit(ctx, 5, 0);
2246	xf_emit(ctx, 4, 0xffff);
2247	xf_emit(ctx, 1, 1);
2248	xf_emit(ctx, 2, 0x10001);
2249	xf_emit(ctx, 1, 1);
2250	xf_emit(ctx, 1, 0);
2251	xf_emit(ctx, 1, 0x1fe21);
2252	xf_emit(ctx, 1, 0);
2253	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
2254		xf_emit(ctx, 1, 1);
2255	xf_emit(ctx, 4, 0);
2256	xf_emit(ctx, 1, 0x08100c12);
2257	xf_emit(ctx, 1, 4);
2258	xf_emit(ctx, 1, 0);
2259	xf_emit(ctx, 1, 2);
2260	xf_emit(ctx, 1, 0x11);
2261	xf_emit(ctx, 8, 0);
2262	xf_emit(ctx, 1, 0xfac6881);
2263	xf_emit(ctx, 1, 0);
2264	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
2265		xf_emit(ctx, 1, 3);
2266	xf_emit(ctx, 3, 0);
2267	xf_emit(ctx, 1, 4);
2268	xf_emit(ctx, 9, 0);
2269	xf_emit(ctx, 1, 2);
2270	xf_emit(ctx, 2, 1);
2271	xf_emit(ctx, 1, 2);
2272	xf_emit(ctx, 3, 1);
2273	xf_emit(ctx, 1, 0);
2274	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
2275		xf_emit(ctx, 8, 2);
2276		xf_emit(ctx, 0x10, 1);
2277		xf_emit(ctx, 8, 2);
2278		xf_emit(ctx, 0x18, 1);
2279		xf_emit(ctx, 3, 0);
2280	}
2281	xf_emit(ctx, 1, 4);
2282	if (dev_priv->chipset == 0x50)
2283		xf_emit(ctx, 0x3a0, 0);
2284	else if (dev_priv->chipset < 0x94)
2285		xf_emit(ctx, 0x3a2, 0);
2286	else if (dev_priv->chipset == 0x98 || dev_priv->chipset == 0xaa)
2287		xf_emit(ctx, 0x39f, 0);
2288	else
2289		xf_emit(ctx, 0x3a3, 0);
2290	xf_emit(ctx, 1, 0x11);
2291	xf_emit(ctx, 1, 0);
2292	xf_emit(ctx, 1, 1);
2293	xf_emit(ctx, 0x2d, 0);
2294}
2295
2296static void
2297nv50_graph_construct_xfer2(struct nouveau_grctx *ctx)
2298{
2299	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
2300	int i;
2301	uint32_t offset;
2302	uint32_t units = nv_rd32 (ctx->dev, 0x1540);
2303	int size = 0;
2304
2305	offset = (ctx->ctxvals_pos+0x3f)&~0x3f;
2306
2307	if (dev_priv->chipset < 0xa0) {
2308		for (i = 0; i < 8; i++) {
2309			ctx->ctxvals_pos = offset + i;
2310			if (i == 0)
2311				xf_emit(ctx, 1, 0x08100c12);
2312			if (units & (1 << i))
2313				nv50_graph_construct_xfer_tp2(ctx);
2314			if ((ctx->ctxvals_pos-offset)/8 > size)
2315				size = (ctx->ctxvals_pos-offset)/8;
2316		}
2317	} else {
2318		/* Strand 0: TPs 0, 1 */
2319		ctx->ctxvals_pos = offset;
2320		xf_emit(ctx, 1, 0x08100c12);
2321		if (units & (1 << 0))
2322			nv50_graph_construct_xfer_tp2(ctx);
2323		if (units & (1 << 1))
2324			nv50_graph_construct_xfer_tp2(ctx);
2325		if ((ctx->ctxvals_pos-offset)/8 > size)
2326			size = (ctx->ctxvals_pos-offset)/8;
2327
2328		/* Strand 0: TPs 2, 3 */
2329		ctx->ctxvals_pos = offset + 1;
2330		if (units & (1 << 2))
2331			nv50_graph_construct_xfer_tp2(ctx);
2332		if (units & (1 << 3))
2333			nv50_graph_construct_xfer_tp2(ctx);
2334		if ((ctx->ctxvals_pos-offset)/8 > size)
2335			size = (ctx->ctxvals_pos-offset)/8;
2336
2337		/* Strand 0: TPs 4, 5, 6 */
2338		ctx->ctxvals_pos = offset + 2;
2339		if (units & (1 << 4))
2340			nv50_graph_construct_xfer_tp2(ctx);
2341		if (units & (1 << 5))
2342			nv50_graph_construct_xfer_tp2(ctx);
2343		if (units & (1 << 6))
2344			nv50_graph_construct_xfer_tp2(ctx);
2345		if ((ctx->ctxvals_pos-offset)/8 > size)
2346			size = (ctx->ctxvals_pos-offset)/8;
2347
2348		/* Strand 0: TPs 7, 8, 9 */
2349		ctx->ctxvals_pos = offset + 3;
2350		if (units & (1 << 7))
2351			nv50_graph_construct_xfer_tp2(ctx);
2352		if (units & (1 << 8))
2353			nv50_graph_construct_xfer_tp2(ctx);
2354		if (units & (1 << 9))
2355			nv50_graph_construct_xfer_tp2(ctx);
2356		if ((ctx->ctxvals_pos-offset)/8 > size)
2357			size = (ctx->ctxvals_pos-offset)/8;
2358	}
2359	ctx->ctxvals_pos = offset + size * 8;
2360	ctx->ctxvals_pos = (ctx->ctxvals_pos+0x3f)&~0x3f;
2361	cp_lsr (ctx, offset);
2362	cp_out (ctx, CP_SET_XFER_POINTER);
2363	cp_lsr (ctx, size);
2364	cp_out (ctx, CP_SEEK_2);
2365	cp_out (ctx, CP_XFER_2);
2366	cp_wait(ctx, XFER, BUSY);
2367}
2368