1/* fuc microcode for nvc0 PGRAPH/HUB 2 * 3 * Copyright 2011 Red Hat Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: Ben Skeggs 24 */ 25 26/* To build: 27 * m4 nvc0_grhub.fuc | envyas -a -w -m fuc -V nva3 -o nvc0_grhub.fuc.h 28 */ 29 30.section #nvc0_grhub_data 31include(`nvc0_graph.fuc') 32gpc_count: .b32 0 33rop_count: .b32 0 34cmd_queue: queue_init 35hub_mmio_list_head: .b32 0 36hub_mmio_list_tail: .b32 0 37 38ctx_current: .b32 0 39 40chipsets: 41.b8 0xc0 0 0 0 42.b16 #nvc0_hub_mmio_head 43.b16 #nvc0_hub_mmio_tail 44.b8 0xc1 0 0 0 45.b16 #nvc0_hub_mmio_head 46.b16 #nvc1_hub_mmio_tail 47.b8 0xc3 0 0 0 48.b16 #nvc0_hub_mmio_head 49.b16 #nvc0_hub_mmio_tail 50.b8 0xc4 0 0 0 51.b16 #nvc0_hub_mmio_head 52.b16 #nvc0_hub_mmio_tail 53.b8 0xc8 0 0 0 54.b16 #nvc0_hub_mmio_head 55.b16 #nvc0_hub_mmio_tail 56.b8 0xce 0 0 0 57.b16 #nvc0_hub_mmio_head 58.b16 #nvc0_hub_mmio_tail 59.b8 0xcf 0 0 0 60.b16 #nvc0_hub_mmio_head 61.b16 #nvc0_hub_mmio_tail 62.b8 0xd9 0 0 0 63.b16 #nvd9_hub_mmio_head 64.b16 #nvd9_hub_mmio_tail 65.b8 0 0 0 0 66 67nvc0_hub_mmio_head: 68mmctx_data(0x17e91c, 2) 69mmctx_data(0x400204, 2) 70mmctx_data(0x404004, 11) 71mmctx_data(0x404044, 1) 72mmctx_data(0x404094, 14) 73mmctx_data(0x4040d0, 7) 74mmctx_data(0x4040f8, 1) 75mmctx_data(0x404130, 3) 76mmctx_data(0x404150, 3) 77mmctx_data(0x404164, 2) 78mmctx_data(0x404174, 3) 79mmctx_data(0x404200, 8) 80mmctx_data(0x404404, 14) 81mmctx_data(0x404460, 4) 82mmctx_data(0x404480, 1) 83mmctx_data(0x404498, 1) 84mmctx_data(0x404604, 4) 85mmctx_data(0x404618, 32) 86mmctx_data(0x404698, 21) 87mmctx_data(0x4046f0, 2) 88mmctx_data(0x404700, 22) 89mmctx_data(0x405800, 1) 90mmctx_data(0x405830, 3) 91mmctx_data(0x405854, 1) 92mmctx_data(0x405870, 4) 93mmctx_data(0x405a00, 2) 94mmctx_data(0x405a18, 1) 95mmctx_data(0x406020, 1) 96mmctx_data(0x406028, 4) 97mmctx_data(0x4064a8, 2) 98mmctx_data(0x4064b4, 2) 99mmctx_data(0x407804, 1) 100mmctx_data(0x40780c, 6) 101mmctx_data(0x4078bc, 1) 102mmctx_data(0x408000, 7) 103mmctx_data(0x408064, 1) 104mmctx_data(0x408800, 3) 105mmctx_data(0x408900, 4) 106mmctx_data(0x408980, 1) 107nvc0_hub_mmio_tail: 108mmctx_data(0x4064c0, 2) 109nvc1_hub_mmio_tail: 110 111nvd9_hub_mmio_head: 112mmctx_data(0x17e91c, 2) 113mmctx_data(0x400204, 2) 114mmctx_data(0x404004, 10) 115mmctx_data(0x404044, 1) 116mmctx_data(0x404094, 14) 117mmctx_data(0x4040d0, 7) 118mmctx_data(0x4040f8, 1) 119mmctx_data(0x404130, 3) 120mmctx_data(0x404150, 3) 121mmctx_data(0x404164, 2) 122mmctx_data(0x404178, 2) 123mmctx_data(0x404200, 8) 124mmctx_data(0x404404, 14) 125mmctx_data(0x404460, 4) 126mmctx_data(0x404480, 1) 127mmctx_data(0x404498, 1) 128mmctx_data(0x404604, 4) 129mmctx_data(0x404618, 32) 130mmctx_data(0x404698, 21) 131mmctx_data(0x4046f0, 2) 132mmctx_data(0x404700, 22) 133mmctx_data(0x405800, 1) 134mmctx_data(0x405830, 3) 135mmctx_data(0x405854, 1) 136mmctx_data(0x405870, 4) 137mmctx_data(0x405a00, 2) 138mmctx_data(0x405a18, 1) 139mmctx_data(0x406020, 1) 140mmctx_data(0x406028, 4) 141mmctx_data(0x4064a8, 2) 142mmctx_data(0x4064b4, 5) 143mmctx_data(0x407804, 1) 144mmctx_data(0x40780c, 6) 145mmctx_data(0x4078bc, 1) 146mmctx_data(0x408000, 7) 147mmctx_data(0x408064, 1) 148mmctx_data(0x408800, 3) 149mmctx_data(0x408900, 4) 150mmctx_data(0x408980, 1) 151nvd9_hub_mmio_tail: 152 153.align 256 154chan_data: 155chan_mmio_count: .b32 0 156chan_mmio_address: .b32 0 157 158.align 256 159xfer_data: .b32 0 160 161.section #nvc0_grhub_code 162bra #init 163define(`include_code') 164include(`nvc0_graph.fuc') 165 166// reports an exception to the host 167// 168// In: $r15 error code (see nvc0_graph.fuc) 169// 170error: 171 push $r14 172 mov $r14 0x814 173 shl b32 $r14 6 174 iowr I[$r14 + 0x000] $r15 // CC_SCRATCH[5] = error code 175 mov $r14 0xc1c 176 shl b32 $r14 6 177 mov $r15 1 178 iowr I[$r14 + 0x000] $r15 // INTR_UP_SET 179 pop $r14 180 ret 181 182// HUB fuc initialisation, executed by triggering ucode start, will 183// fall through to main loop after completion. 184// 185// Input: 186// CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh) 187// 188// Output: 189// CC_SCRATCH[0]: 190// 31:31: set to signal completion 191// CC_SCRATCH[1]: 192// 31:0: total PGRAPH context size 193// 194init: 195 clear b32 $r0 196 mov $sp $r0 197 mov $xdbase $r0 198 199 // enable fifo access 200 mov $r1 0x1200 201 mov $r2 2 202 iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE 203 204 // setup i0 handler, and route all interrupts to it 205 mov $r1 #ih 206 mov $iv0 $r1 207 mov $r1 0x400 208 iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH 209 210 // route HUB_CHANNEL_SWITCH to fuc interrupt 8 211 mov $r3 0x404 212 shl b32 $r3 6 213 mov $r2 0x2003 // { HUB_CHANNEL_SWITCH, ZERO } -> intr 8 214 iowr I[$r3 + 0x000] $r2 215 216 // not sure what these are, route them because NVIDIA does, and 217 // the IRQ handler will signal the host if we ever get one.. we 218 // may find out if/why we need to handle these if so.. 219 // 220 mov $r2 0x2004 221 iowr I[$r3 + 0x004] $r2 // { 0x04, ZERO } -> intr 9 222 mov $r2 0x200b 223 iowr I[$r3 + 0x008] $r2 // { 0x0b, ZERO } -> intr 10 224 mov $r2 0x200c 225 iowr I[$r3 + 0x01c] $r2 // { 0x0c, ZERO } -> intr 15 226 227 // enable all INTR_UP interrupts 228 mov $r2 0xc24 229 shl b32 $r2 6 230 not b32 $r3 $r0 231 iowr I[$r2] $r3 232 233 // enable fifo, ctxsw, 9, 10, 15 interrupts 234 mov $r2 -0x78fc // 0x8704 235 sethi $r2 0 236 iowr I[$r1 + 0x000] $r2 // INTR_EN_SET 237 238 // fifo level triggered, rest edge 239 sub b32 $r1 0x100 240 mov $r2 4 241 iowr I[$r1] $r2 242 243 // enable interrupts 244 bset $flags ie0 245 246 // fetch enabled GPC/ROP counts 247 mov $r14 -0x69fc // 0x409604 248 sethi $r14 0x400000 249 call #nv_rd32 250 extr $r1 $r15 16:20 251 st b32 D[$r0 + #rop_count] $r1 252 and $r15 0x1f 253 st b32 D[$r0 + #gpc_count] $r15 254 255 // set BAR_REQMASK to GPC mask 256 mov $r1 1 257 shl b32 $r1 $r15 258 sub b32 $r1 1 259 mov $r2 0x40c 260 shl b32 $r2 6 261 iowr I[$r2 + 0x000] $r1 262 iowr I[$r2 + 0x100] $r1 263 264 // find context data for this chipset 265 mov $r2 0x800 266 shl b32 $r2 6 267 iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0] 268 mov $r15 #chipsets - 8 269 init_find_chipset: 270 add b32 $r15 8 271 ld b32 $r3 D[$r15 + 0x00] 272 cmpu b32 $r3 $r2 273 bra e #init_context 274 cmpu b32 $r3 0 275 bra ne #init_find_chipset 276 // unknown chipset 277 ret 278 279 // context size calculation, reserve first 256 bytes for use by fuc 280 init_context: 281 mov $r1 256 282 283 // calculate size of mmio context data 284 ld b16 $r14 D[$r15 + 4] 285 ld b16 $r15 D[$r15 + 6] 286 sethi $r14 0 287 st b32 D[$r0 + #hub_mmio_list_head] $r14 288 st b32 D[$r0 + #hub_mmio_list_tail] $r15 289 call #mmctx_size 290 291 // set mmctx base addresses now so we don't have to do it later, 292 // they don't (currently) ever change 293 mov $r3 0x700 294 shl b32 $r3 6 295 shr b32 $r4 $r1 8 296 iowr I[$r3 + 0x000] $r4 // MMCTX_SAVE_SWBASE 297 iowr I[$r3 + 0x100] $r4 // MMCTX_LOAD_SWBASE 298 add b32 $r3 0x1300 299 add b32 $r1 $r15 300 shr b32 $r15 2 301 iowr I[$r3 + 0x000] $r15 // MMCTX_LOAD_COUNT, wtf for?!? 302 303 // strands, base offset needs to be aligned to 256 bytes 304 shr b32 $r1 8 305 add b32 $r1 1 306 shl b32 $r1 8 307 mov b32 $r15 $r1 308 call #strand_ctx_init 309 add b32 $r1 $r15 310 311 // initialise each GPC in sequence by passing in the offset of its 312 // context data in GPCn_CC_SCRATCH[1], and starting its FUC (which 313 // has previously been uploaded by the host) running. 314 // 315 // the GPC fuc init sequence will set GPCn_CC_SCRATCH[0] bit 31 316 // when it has completed, and return the size of its context data 317 // in GPCn_CC_SCRATCH[1] 318 // 319 ld b32 $r3 D[$r0 + #gpc_count] 320 mov $r4 0x2000 321 sethi $r4 0x500000 322 init_gpc: 323 // setup, and start GPC ucode running 324 add b32 $r14 $r4 0x804 325 mov b32 $r15 $r1 326 call #nv_wr32 // CC_SCRATCH[1] = ctx offset 327 add b32 $r14 $r4 0x800 328 mov b32 $r15 $r2 329 call #nv_wr32 // CC_SCRATCH[0] = chipset 330 add b32 $r14 $r4 0x10c 331 clear b32 $r15 332 call #nv_wr32 333 add b32 $r14 $r4 0x104 334 call #nv_wr32 // ENTRY 335 add b32 $r14 $r4 0x100 336 mov $r15 2 // CTRL_START_TRIGGER 337 call #nv_wr32 // CTRL 338 339 // wait for it to complete, and adjust context size 340 add b32 $r14 $r4 0x800 341 init_gpc_wait: 342 call #nv_rd32 343 xbit $r15 $r15 31 344 bra e #init_gpc_wait 345 add b32 $r14 $r4 0x804 346 call #nv_rd32 347 add b32 $r1 $r15 348 349 // next! 350 add b32 $r4 0x8000 351 sub b32 $r3 1 352 bra ne #init_gpc 353 354 // save context size, and tell host we're ready 355 mov $r2 0x800 356 shl b32 $r2 6 357 iowr I[$r2 + 0x100] $r1 // CC_SCRATCH[1] = context size 358 add b32 $r2 0x800 359 clear b32 $r1 360 bset $r1 31 361 iowr I[$r2 + 0x000] $r1 // CC_SCRATCH[0] |= 0x80000000 362 363// Main program loop, very simple, sleeps until woken up by the interrupt 364// handler, pulls a command from the queue and executes its handler 365// 366main: 367 // sleep until we have something to do 368 bset $flags $p0 369 sleep $p0 370 mov $r13 #cmd_queue 371 call #queue_get 372 bra $p1 #main 373 374 // context switch, requested by GPU? 375 cmpu b32 $r14 0x4001 376 bra ne #main_not_ctx_switch 377 trace_set(T_AUTO) 378 mov $r1 0xb00 379 shl b32 $r1 6 380 iord $r2 I[$r1 + 0x100] // CHAN_NEXT 381 iord $r1 I[$r1 + 0x000] // CHAN_CUR 382 383 xbit $r3 $r1 31 384 bra e #chsw_no_prev 385 xbit $r3 $r2 31 386 bra e #chsw_prev_no_next 387 push $r2 388 mov b32 $r2 $r1 389 trace_set(T_SAVE) 390 bclr $flags $p1 391 bset $flags $p2 392 call #ctx_xfer 393 trace_clr(T_SAVE); 394 pop $r2 395 trace_set(T_LOAD); 396 bset $flags $p1 397 call #ctx_xfer 398 trace_clr(T_LOAD); 399 bra #chsw_done 400 chsw_prev_no_next: 401 push $r2 402 mov b32 $r2 $r1 403 bclr $flags $p1 404 bclr $flags $p2 405 call #ctx_xfer 406 pop $r2 407 mov $r1 0xb00 408 shl b32 $r1 6 409 iowr I[$r1] $r2 410 bra #chsw_done 411 chsw_no_prev: 412 xbit $r3 $r2 31 413 bra e #chsw_done 414 bset $flags $p1 415 bclr $flags $p2 416 call #ctx_xfer 417 418 // ack the context switch request 419 chsw_done: 420 mov $r1 0xb0c 421 shl b32 $r1 6 422 mov $r2 1 423 iowr I[$r1 + 0x000] $r2 // 0x409b0c 424 trace_clr(T_AUTO) 425 bra #main 426 427 // request to set current channel? (*not* a context switch) 428 main_not_ctx_switch: 429 cmpu b32 $r14 0x0001 430 bra ne #main_not_ctx_chan 431 mov b32 $r2 $r15 432 call #ctx_chan 433 bra #main_done 434 435 // request to store current channel context? 436 main_not_ctx_chan: 437 cmpu b32 $r14 0x0002 438 bra ne #main_not_ctx_save 439 trace_set(T_SAVE) 440 bclr $flags $p1 441 bclr $flags $p2 442 call #ctx_xfer 443 trace_clr(T_SAVE) 444 bra #main_done 445 446 main_not_ctx_save: 447 shl b32 $r15 $r14 16 448 or $r15 E_BAD_COMMAND 449 call #error 450 bra #main 451 452 main_done: 453 mov $r1 0x820 454 shl b32 $r1 6 455 clear b32 $r2 456 bset $r2 31 457 iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000 458 bra #main 459 460// interrupt handler 461ih: 462 push $r8 463 mov $r8 $flags 464 push $r8 465 push $r9 466 push $r10 467 push $r11 468 push $r13 469 push $r14 470 push $r15 471 472 // incoming fifo command? 473 iord $r10 I[$r0 + 0x200] // INTR 474 and $r11 $r10 0x00000004 475 bra e #ih_no_fifo 476 // queue incoming fifo command for later processing 477 mov $r11 0x1900 478 mov $r13 #cmd_queue 479 iord $r14 I[$r11 + 0x100] // FIFO_CMD 480 iord $r15 I[$r11 + 0x000] // FIFO_DATA 481 call #queue_put 482 add b32 $r11 0x400 483 mov $r14 1 484 iowr I[$r11 + 0x000] $r14 // FIFO_ACK 485 486 // context switch request? 487 ih_no_fifo: 488 and $r11 $r10 0x00000100 489 bra e #ih_no_ctxsw 490 // enqueue a context switch for later processing 491 mov $r13 #cmd_queue 492 mov $r14 0x4001 493 call #queue_put 494 495 // anything we didn't handle, bring it to the host's attention 496 ih_no_ctxsw: 497 mov $r11 0x104 498 not b32 $r11 499 and $r11 $r10 $r11 500 bra e #ih_no_other 501 mov $r10 0xc1c 502 shl b32 $r10 6 503 iowr I[$r10] $r11 // INTR_UP_SET 504 505 // ack, and wake up main() 506 ih_no_other: 507 iowr I[$r0 + 0x100] $r10 // INTR_ACK 508 509 pop $r15 510 pop $r14 511 pop $r13 512 pop $r11 513 pop $r10 514 pop $r9 515 pop $r8 516 mov $flags $r8 517 pop $r8 518 bclr $flags $p0 519 iret 520 521// Not real sure, but, MEM_CMD 7 will hang forever if this isn't done 522ctx_4160s: 523 mov $r14 0x4160 524 sethi $r14 0x400000 525 mov $r15 1 526 call #nv_wr32 527 ctx_4160s_wait: 528 call #nv_rd32 529 xbit $r15 $r15 4 530 bra e #ctx_4160s_wait 531 ret 532 533// Without clearing again at end of xfer, some things cause PGRAPH 534// to hang with STATUS=0x00000007 until it's cleared.. fbcon can 535// still function with it set however... 536ctx_4160c: 537 mov $r14 0x4160 538 sethi $r14 0x400000 539 clear b32 $r15 540 call #nv_wr32 541 ret 542 543// Again, not real sure 544// 545// In: $r15 value to set 0x404170 to 546// 547ctx_4170s: 548 mov $r14 0x4170 549 sethi $r14 0x400000 550 or $r15 0x10 551 call #nv_wr32 552 ret 553 554// Waits for a ctx_4170s() call to complete 555// 556ctx_4170w: 557 mov $r14 0x4170 558 sethi $r14 0x400000 559 call #nv_rd32 560 and $r15 0x10 561 bra ne #ctx_4170w 562 ret 563 564// Disables various things, waits a bit, and re-enables them.. 565// 566// Not sure how exactly this helps, perhaps "ENABLE" is not such a 567// good description for the bits we turn off? Anyways, without this, 568// funny things happen. 569// 570ctx_redswitch: 571 mov $r14 0x614 572 shl b32 $r14 6 573 mov $r15 0x270 574 iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_GPC, POWER_ALL 575 mov $r15 8 576 ctx_redswitch_delay: 577 sub b32 $r15 1 578 bra ne #ctx_redswitch_delay 579 mov $r15 0x770 580 iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_ALL, POWER_ALL 581 ret 582 583// Not a clue what this is for, except that unless the value is 0x10, the 584// strand context is saved (and presumably restored) incorrectly.. 585// 586// In: $r15 value to set to (0x00/0x10 are used) 587// 588ctx_86c: 589 mov $r14 0x86c 590 shl b32 $r14 6 591 iowr I[$r14] $r15 // HUB(0x86c) = val 592 mov $r14 -0x75ec 593 sethi $r14 0x400000 594 call #nv_wr32 // ROP(0xa14) = val 595 mov $r14 -0x5794 596 sethi $r14 0x410000 597 call #nv_wr32 // GPC(0x86c) = val 598 ret 599 600// ctx_load - load's a channel's ctxctl data, and selects its vm 601// 602// In: $r2 channel address 603// 604ctx_load: 605 trace_set(T_CHAN) 606 607 // switch to channel, somewhat magic in parts.. 608 mov $r10 12 // DONE_UNK12 609 call #wait_donez 610 mov $r1 0xa24 611 shl b32 $r1 6 612 iowr I[$r1 + 0x000] $r0 // 0x409a24 613 mov $r3 0xb00 614 shl b32 $r3 6 615 iowr I[$r3 + 0x100] $r2 // CHAN_NEXT 616 mov $r1 0xa0c 617 shl b32 $r1 6 618 mov $r4 7 619 iowr I[$r1 + 0x000] $r2 // MEM_CHAN 620 iowr I[$r1 + 0x100] $r4 // MEM_CMD 621 ctx_chan_wait_0: 622 iord $r4 I[$r1 + 0x100] 623 and $r4 0x1f 624 bra ne #ctx_chan_wait_0 625 iowr I[$r3 + 0x000] $r2 // CHAN_CUR 626 627 // load channel header, fetch PGRAPH context pointer 628 mov $xtargets $r0 629 bclr $r2 31 630 shl b32 $r2 4 631 add b32 $r2 2 632 633 trace_set(T_LCHAN) 634 mov $r1 0xa04 635 shl b32 $r1 6 636 iowr I[$r1 + 0x000] $r2 // MEM_BASE 637 mov $r1 0xa20 638 shl b32 $r1 6 639 mov $r2 0x0002 640 sethi $r2 0x80000000 641 iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vram 642 mov $r1 0x10 // chan + 0x0210 643 mov $r2 #xfer_data 644 sethi $r2 0x00020000 // 16 bytes 645 xdld $r1 $r2 646 xdwait 647 trace_clr(T_LCHAN) 648 649 // update current context 650 ld b32 $r1 D[$r0 + #xfer_data + 4] 651 shl b32 $r1 24 652 ld b32 $r2 D[$r0 + #xfer_data + 0] 653 shr b32 $r2 8 654 or $r1 $r2 655 st b32 D[$r0 + #ctx_current] $r1 656 657 // set transfer base to start of context, and fetch context header 658 trace_set(T_LCTXH) 659 mov $r2 0xa04 660 shl b32 $r2 6 661 iowr I[$r2 + 0x000] $r1 // MEM_BASE 662 mov $r2 1 663 mov $r1 0xa20 664 shl b32 $r1 6 665 iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vm 666 mov $r1 #chan_data 667 sethi $r1 0x00060000 // 256 bytes 668 xdld $r0 $r1 669 xdwait 670 trace_clr(T_LCTXH) 671 672 trace_clr(T_CHAN) 673 ret 674 675// ctx_chan - handler for HUB_SET_CHAN command, will set a channel as 676// the active channel for ctxctl, but not actually transfer 677// any context data. intended for use only during initial 678// context construction. 679// 680// In: $r2 channel address 681// 682ctx_chan: 683 call #ctx_4160s 684 call #ctx_load 685 mov $r10 12 // DONE_UNK12 686 call #wait_donez 687 mov $r1 0xa10 688 shl b32 $r1 6 689 mov $r2 5 690 iowr I[$r1 + 0x000] $r2 // MEM_CMD = 5 (???) 691 ctx_chan_wait: 692 iord $r2 I[$r1 + 0x000] 693 or $r2 $r2 694 bra ne #ctx_chan_wait 695 call #ctx_4160c 696 ret 697 698// Execute per-context state overrides list 699// 700// Only executed on the first load of a channel. Might want to look into 701// removing this and having the host directly modify the channel's context 702// to change this state... The nouveau DRM already builds this list as 703// it's definitely needed for NVIDIA's, so we may as well use it for now 704// 705// Input: $r1 mmio list length 706// 707ctx_mmio_exec: 708 // set transfer base to be the mmio list 709 ld b32 $r3 D[$r0 + #chan_mmio_address] 710 mov $r2 0xa04 711 shl b32 $r2 6 712 iowr I[$r2 + 0x000] $r3 // MEM_BASE 713 714 clear b32 $r3 715 ctx_mmio_loop: 716 // fetch next 256 bytes of mmio list if necessary 717 and $r4 $r3 0xff 718 bra ne #ctx_mmio_pull 719 mov $r5 #xfer_data 720 sethi $r5 0x00060000 // 256 bytes 721 xdld $r3 $r5 722 xdwait 723 724 // execute a single list entry 725 ctx_mmio_pull: 726 ld b32 $r14 D[$r4 + #xfer_data + 0x00] 727 ld b32 $r15 D[$r4 + #xfer_data + 0x04] 728 call #nv_wr32 729 730 // next! 731 add b32 $r3 8 732 sub b32 $r1 1 733 bra ne #ctx_mmio_loop 734 735 // set transfer base back to the current context 736 ctx_mmio_done: 737 ld b32 $r3 D[$r0 + #ctx_current] 738 iowr I[$r2 + 0x000] $r3 // MEM_BASE 739 740 // disable the mmio list now, we don't need/want to execute it again 741 st b32 D[$r0 + #chan_mmio_count] $r0 742 mov $r1 #chan_data 743 sethi $r1 0x00060000 // 256 bytes 744 xdst $r0 $r1 745 xdwait 746 ret 747 748// Transfer HUB context data between GPU and storage area 749// 750// In: $r2 channel address 751// $p1 clear on save, set on load 752// $p2 set if opposite direction done/will be done, so: 753// on save it means: "a load will follow this save" 754// on load it means: "a save preceeded this load" 755// 756ctx_xfer: 757 bra not $p1 #ctx_xfer_pre 758 bra $p2 #ctx_xfer_pre_load 759 ctx_xfer_pre: 760 mov $r15 0x10 761 call #ctx_86c 762 call #ctx_4160s 763 bra not $p1 #ctx_xfer_exec 764 765 ctx_xfer_pre_load: 766 mov $r15 2 767 call #ctx_4170s 768 call #ctx_4170w 769 call #ctx_redswitch 770 clear b32 $r15 771 call #ctx_4170s 772 call #ctx_load 773 774 // fetch context pointer, and initiate xfer on all GPCs 775 ctx_xfer_exec: 776 ld b32 $r1 D[$r0 + #ctx_current] 777 mov $r2 0x414 778 shl b32 $r2 6 779 iowr I[$r2 + 0x000] $r0 // BAR_STATUS = reset 780 mov $r14 -0x5b00 781 sethi $r14 0x410000 782 mov b32 $r15 $r1 783 call #nv_wr32 // GPC_BCAST_WRCMD_DATA = ctx pointer 784 add b32 $r14 4 785 xbit $r15 $flags $p1 786 xbit $r2 $flags $p2 787 shl b32 $r2 1 788 or $r15 $r2 789 call #nv_wr32 // GPC_BCAST_WRCMD_CMD = GPC_XFER(type) 790 791 // strands 792 mov $r1 0x4afc 793 sethi $r1 0x20000 794 mov $r2 0xc 795 iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c 796 call #strand_wait 797 mov $r2 0x47fc 798 sethi $r2 0x20000 799 iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00 800 xbit $r2 $flags $p1 801 add b32 $r2 3 802 iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD) 803 804 // mmio context 805 xbit $r10 $flags $p1 // direction 806 or $r10 6 // first, last 807 mov $r11 0 // base = 0 808 ld b32 $r12 D[$r0 + #hub_mmio_list_head] 809 ld b32 $r13 D[$r0 + #hub_mmio_list_tail] 810 mov $r14 0 // not multi 811 call #mmctx_xfer 812 813 // wait for GPCs to all complete 814 mov $r10 8 // DONE_BAR 815 call #wait_doneo 816 817 // wait for strand xfer to complete 818 call #strand_wait 819 820 // post-op 821 bra $p1 #ctx_xfer_post 822 mov $r10 12 // DONE_UNK12 823 call #wait_donez 824 mov $r1 0xa10 825 shl b32 $r1 6 826 mov $r2 5 827 iowr I[$r1] $r2 // MEM_CMD 828 ctx_xfer_post_save_wait: 829 iord $r2 I[$r1] 830 or $r2 $r2 831 bra ne #ctx_xfer_post_save_wait 832 833 bra $p2 #ctx_xfer_done 834 ctx_xfer_post: 835 mov $r15 2 836 call #ctx_4170s 837 clear b32 $r15 838 call #ctx_86c 839 call #strand_post 840 call #ctx_4170w 841 clear b32 $r15 842 call #ctx_4170s 843 844 bra not $p1 #ctx_xfer_no_post_mmio 845 ld b32 $r1 D[$r0 + #chan_mmio_count] 846 or $r1 $r1 847 bra e #ctx_xfer_no_post_mmio 848 call #ctx_mmio_exec 849 850 ctx_xfer_no_post_mmio: 851 call #ctx_4160c 852 853 ctx_xfer_done: 854 ret 855 856.align 256 857