hubnve0.fuc revision 1978a2f280563427bb675b3197391745aef73424
1/* fuc microcode for nve0 PGRAPH/HUB 2 * 3 * Copyright 2011 Red Hat Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: Ben Skeggs 24 */ 25 26/* To build: 27 * m4 nve0_grhub.fuc | envyas -a -w -m fuc -V nva3 -o nve0_grhub.fuc.h 28 */ 29 30.section #nve0_grhub_data 31include(`nve0.fuc') 32gpc_count: .b32 0 33rop_count: .b32 0 34cmd_queue: queue_init 35hub_mmio_list_head: .b32 0 36hub_mmio_list_tail: .b32 0 37 38ctx_current: .b32 0 39 40chipsets: 41.b8 0xe4 0 0 0 42.b16 #nve4_hub_mmio_head 43.b16 #nve4_hub_mmio_tail 44.b8 0xe7 0 0 0 45.b16 #nve4_hub_mmio_head 46.b16 #nve4_hub_mmio_tail 47.b8 0 0 0 0 48 49nve4_hub_mmio_head: 50mmctx_data(0x17e91c, 2) 51mmctx_data(0x400204, 2) 52mmctx_data(0x404010, 7) 53mmctx_data(0x4040a8, 9) 54mmctx_data(0x4040d0, 7) 55mmctx_data(0x4040f8, 1) 56mmctx_data(0x404130, 3) 57mmctx_data(0x404150, 3) 58mmctx_data(0x404164, 1) 59mmctx_data(0x4041a0, 4) 60mmctx_data(0x404200, 4) 61mmctx_data(0x404404, 14) 62mmctx_data(0x404460, 4) 63mmctx_data(0x404480, 1) 64mmctx_data(0x404498, 1) 65mmctx_data(0x404604, 4) 66mmctx_data(0x404618, 4) 67mmctx_data(0x40462c, 2) 68mmctx_data(0x404640, 1) 69mmctx_data(0x404654, 1) 70mmctx_data(0x404660, 1) 71mmctx_data(0x404678, 19) 72mmctx_data(0x4046c8, 3) 73mmctx_data(0x404700, 3) 74mmctx_data(0x404718, 10) 75mmctx_data(0x404744, 2) 76mmctx_data(0x404754, 1) 77mmctx_data(0x405800, 1) 78mmctx_data(0x405830, 3) 79mmctx_data(0x405854, 1) 80mmctx_data(0x405870, 4) 81mmctx_data(0x405a00, 2) 82mmctx_data(0x405a18, 1) 83mmctx_data(0x405b00, 1) 84mmctx_data(0x405b10, 1) 85mmctx_data(0x406020, 1) 86mmctx_data(0x406028, 4) 87mmctx_data(0x4064a8, 2) 88mmctx_data(0x4064b4, 2) 89mmctx_data(0x4064c0, 12) 90mmctx_data(0x4064fc, 1) 91mmctx_data(0x407040, 1) 92mmctx_data(0x407804, 1) 93mmctx_data(0x40780c, 6) 94mmctx_data(0x4078bc, 1) 95mmctx_data(0x408000, 7) 96mmctx_data(0x408064, 1) 97mmctx_data(0x408800, 3) 98mmctx_data(0x408840, 1) 99mmctx_data(0x408900, 3) 100mmctx_data(0x408980, 1) 101nve4_hub_mmio_tail: 102 103.align 256 104chan_data: 105chan_mmio_count: .b32 0 106chan_mmio_address: .b32 0 107 108.align 256 109xfer_data: .b32 0 110 111.section #nve0_grhub_code 112bra #init 113define(`include_code') 114include(`nve0.fuc') 115 116// reports an exception to the host 117// 118// In: $r15 error code (see nve0.fuc) 119// 120error: 121 push $r14 122 mov $r14 0x814 123 shl b32 $r14 6 124 iowr I[$r14 + 0x000] $r15 // CC_SCRATCH[5] = error code 125 mov $r14 0xc1c 126 shl b32 $r14 6 127 mov $r15 1 128 iowr I[$r14 + 0x000] $r15 // INTR_UP_SET 129 pop $r14 130 ret 131 132// HUB fuc initialisation, executed by triggering ucode start, will 133// fall through to main loop after completion. 134// 135// Input: 136// CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh) 137// 138// Output: 139// CC_SCRATCH[0]: 140// 31:31: set to signal completion 141// CC_SCRATCH[1]: 142// 31:0: total PGRAPH context size 143// 144init: 145 clear b32 $r0 146 mov $sp $r0 147 mov $xdbase $r0 148 149 // enable fifo access 150 mov $r1 0x1200 151 mov $r2 2 152 iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE 153 154 // setup i0 handler, and route all interrupts to it 155 mov $r1 #ih 156 mov $iv0 $r1 157 mov $r1 0x400 158 iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH 159 160 // route HUB_CHANNEL_SWITCH to fuc interrupt 8 161 mov $r3 0x404 162 shl b32 $r3 6 163 mov $r2 0x2003 // { HUB_CHANNEL_SWITCH, ZERO } -> intr 8 164 iowr I[$r3 + 0x000] $r2 165 166 // not sure what these are, route them because NVIDIA does, and 167 // the IRQ handler will signal the host if we ever get one.. we 168 // may find out if/why we need to handle these if so.. 169 // 170 mov $r2 0x2004 171 iowr I[$r3 + 0x004] $r2 // { 0x04, ZERO } -> intr 9 172 mov $r2 0x200b 173 iowr I[$r3 + 0x008] $r2 // { 0x0b, ZERO } -> intr 10 174 mov $r2 0x200c 175 iowr I[$r3 + 0x01c] $r2 // { 0x0c, ZERO } -> intr 15 176 177 // enable all INTR_UP interrupts 178 mov $r2 0xc24 179 shl b32 $r2 6 180 not b32 $r3 $r0 181 iowr I[$r2] $r3 182 183 // enable fifo, ctxsw, 9, 10, 15 interrupts 184 mov $r2 -0x78fc // 0x8704 185 sethi $r2 0 186 iowr I[$r1 + 0x000] $r2 // INTR_EN_SET 187 188 // fifo level triggered, rest edge 189 sub b32 $r1 0x100 190 mov $r2 4 191 iowr I[$r1] $r2 192 193 // enable interrupts 194 bset $flags ie0 195 196 // fetch enabled GPC/ROP counts 197 mov $r14 -0x69fc // 0x409604 198 sethi $r14 0x400000 199 call #nv_rd32 200 extr $r1 $r15 16:20 201 st b32 D[$r0 + #rop_count] $r1 202 and $r15 0x1f 203 st b32 D[$r0 + #gpc_count] $r15 204 205 // set BAR_REQMASK to GPC mask 206 mov $r1 1 207 shl b32 $r1 $r15 208 sub b32 $r1 1 209 mov $r2 0x40c 210 shl b32 $r2 6 211 iowr I[$r2 + 0x000] $r1 212 iowr I[$r2 + 0x100] $r1 213 214 // find context data for this chipset 215 mov $r2 0x800 216 shl b32 $r2 6 217 iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0] 218 mov $r15 #chipsets - 8 219 init_find_chipset: 220 add b32 $r15 8 221 ld b32 $r3 D[$r15 + 0x00] 222 cmpu b32 $r3 $r2 223 bra e #init_context 224 cmpu b32 $r3 0 225 bra ne #init_find_chipset 226 // unknown chipset 227 ret 228 229 // context size calculation, reserve first 256 bytes for use by fuc 230 init_context: 231 mov $r1 256 232 233 // calculate size of mmio context data 234 ld b16 $r14 D[$r15 + 4] 235 ld b16 $r15 D[$r15 + 6] 236 sethi $r14 0 237 st b32 D[$r0 + #hub_mmio_list_head] $r14 238 st b32 D[$r0 + #hub_mmio_list_tail] $r15 239 call #mmctx_size 240 241 // set mmctx base addresses now so we don't have to do it later, 242 // they don't (currently) ever change 243 mov $r3 0x700 244 shl b32 $r3 6 245 shr b32 $r4 $r1 8 246 iowr I[$r3 + 0x000] $r4 // MMCTX_SAVE_SWBASE 247 iowr I[$r3 + 0x100] $r4 // MMCTX_LOAD_SWBASE 248 add b32 $r3 0x1300 249 add b32 $r1 $r15 250 shr b32 $r15 2 251 iowr I[$r3 + 0x000] $r15 // MMCTX_LOAD_COUNT, wtf for?!? 252 253 // strands, base offset needs to be aligned to 256 bytes 254 shr b32 $r1 8 255 add b32 $r1 1 256 shl b32 $r1 8 257 mov b32 $r15 $r1 258 call #strand_ctx_init 259 add b32 $r1 $r15 260 261 // initialise each GPC in sequence by passing in the offset of its 262 // context data in GPCn_CC_SCRATCH[1], and starting its FUC (which 263 // has previously been uploaded by the host) running. 264 // 265 // the GPC fuc init sequence will set GPCn_CC_SCRATCH[0] bit 31 266 // when it has completed, and return the size of its context data 267 // in GPCn_CC_SCRATCH[1] 268 // 269 ld b32 $r3 D[$r0 + #gpc_count] 270 mov $r4 0x2000 271 sethi $r4 0x500000 272 init_gpc: 273 // setup, and start GPC ucode running 274 add b32 $r14 $r4 0x804 275 mov b32 $r15 $r1 276 call #nv_wr32 // CC_SCRATCH[1] = ctx offset 277 add b32 $r14 $r4 0x800 278 mov b32 $r15 $r2 279 call #nv_wr32 // CC_SCRATCH[0] = chipset 280 add b32 $r14 $r4 0x10c 281 clear b32 $r15 282 call #nv_wr32 283 add b32 $r14 $r4 0x104 284 call #nv_wr32 // ENTRY 285 add b32 $r14 $r4 0x100 286 mov $r15 2 // CTRL_START_TRIGGER 287 call #nv_wr32 // CTRL 288 289 // wait for it to complete, and adjust context size 290 add b32 $r14 $r4 0x800 291 init_gpc_wait: 292 call #nv_rd32 293 xbit $r15 $r15 31 294 bra e #init_gpc_wait 295 add b32 $r14 $r4 0x804 296 call #nv_rd32 297 add b32 $r1 $r15 298 299 // next! 300 add b32 $r4 0x8000 301 sub b32 $r3 1 302 bra ne #init_gpc 303 304 // save context size, and tell host we're ready 305 mov $r2 0x800 306 shl b32 $r2 6 307 iowr I[$r2 + 0x100] $r1 // CC_SCRATCH[1] = context size 308 add b32 $r2 0x800 309 clear b32 $r1 310 bset $r1 31 311 iowr I[$r2 + 0x000] $r1 // CC_SCRATCH[0] |= 0x80000000 312 313// Main program loop, very simple, sleeps until woken up by the interrupt 314// handler, pulls a command from the queue and executes its handler 315// 316main: 317 // sleep until we have something to do 318 bset $flags $p0 319 sleep $p0 320 mov $r13 #cmd_queue 321 call #queue_get 322 bra $p1 #main 323 324 // context switch, requested by GPU? 325 cmpu b32 $r14 0x4001 326 bra ne #main_not_ctx_switch 327 trace_set(T_AUTO) 328 mov $r1 0xb00 329 shl b32 $r1 6 330 iord $r2 I[$r1 + 0x100] // CHAN_NEXT 331 iord $r1 I[$r1 + 0x000] // CHAN_CUR 332 333 xbit $r3 $r1 31 334 bra e #chsw_no_prev 335 xbit $r3 $r2 31 336 bra e #chsw_prev_no_next 337 push $r2 338 mov b32 $r2 $r1 339 trace_set(T_SAVE) 340 bclr $flags $p1 341 bset $flags $p2 342 call #ctx_xfer 343 trace_clr(T_SAVE); 344 pop $r2 345 trace_set(T_LOAD); 346 bset $flags $p1 347 call #ctx_xfer 348 trace_clr(T_LOAD); 349 bra #chsw_done 350 chsw_prev_no_next: 351 push $r2 352 mov b32 $r2 $r1 353 bclr $flags $p1 354 bclr $flags $p2 355 call #ctx_xfer 356 pop $r2 357 mov $r1 0xb00 358 shl b32 $r1 6 359 iowr I[$r1] $r2 360 bra #chsw_done 361 chsw_no_prev: 362 xbit $r3 $r2 31 363 bra e #chsw_done 364 bset $flags $p1 365 bclr $flags $p2 366 call #ctx_xfer 367 368 // ack the context switch request 369 chsw_done: 370 mov $r1 0xb0c 371 shl b32 $r1 6 372 mov $r2 1 373 iowr I[$r1 + 0x000] $r2 // 0x409b0c 374 trace_clr(T_AUTO) 375 bra #main 376 377 // request to set current channel? (*not* a context switch) 378 main_not_ctx_switch: 379 cmpu b32 $r14 0x0001 380 bra ne #main_not_ctx_chan 381 mov b32 $r2 $r15 382 call #ctx_chan 383 bra #main_done 384 385 // request to store current channel context? 386 main_not_ctx_chan: 387 cmpu b32 $r14 0x0002 388 bra ne #main_not_ctx_save 389 trace_set(T_SAVE) 390 bclr $flags $p1 391 bclr $flags $p2 392 call #ctx_xfer 393 trace_clr(T_SAVE) 394 bra #main_done 395 396 main_not_ctx_save: 397 shl b32 $r15 $r14 16 398 or $r15 E_BAD_COMMAND 399 call #error 400 bra #main 401 402 main_done: 403 mov $r1 0x820 404 shl b32 $r1 6 405 clear b32 $r2 406 bset $r2 31 407 iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000 408 bra #main 409 410// interrupt handler 411ih: 412 push $r8 413 mov $r8 $flags 414 push $r8 415 push $r9 416 push $r10 417 push $r11 418 push $r13 419 push $r14 420 push $r15 421 422 // incoming fifo command? 423 iord $r10 I[$r0 + 0x200] // INTR 424 and $r11 $r10 0x00000004 425 bra e #ih_no_fifo 426 // queue incoming fifo command for later processing 427 mov $r11 0x1900 428 mov $r13 #cmd_queue 429 iord $r14 I[$r11 + 0x100] // FIFO_CMD 430 iord $r15 I[$r11 + 0x000] // FIFO_DATA 431 call #queue_put 432 add b32 $r11 0x400 433 mov $r14 1 434 iowr I[$r11 + 0x000] $r14 // FIFO_ACK 435 436 // context switch request? 437 ih_no_fifo: 438 and $r11 $r10 0x00000100 439 bra e #ih_no_ctxsw 440 // enqueue a context switch for later processing 441 mov $r13 #cmd_queue 442 mov $r14 0x4001 443 call #queue_put 444 445 // anything we didn't handle, bring it to the host's attention 446 ih_no_ctxsw: 447 mov $r11 0x104 448 not b32 $r11 449 and $r11 $r10 $r11 450 bra e #ih_no_other 451 mov $r10 0xc1c 452 shl b32 $r10 6 453 iowr I[$r10] $r11 // INTR_UP_SET 454 455 // ack, and wake up main() 456 ih_no_other: 457 iowr I[$r0 + 0x100] $r10 // INTR_ACK 458 459 pop $r15 460 pop $r14 461 pop $r13 462 pop $r11 463 pop $r10 464 pop $r9 465 pop $r8 466 mov $flags $r8 467 pop $r8 468 bclr $flags $p0 469 iret 470 471// Not real sure, but, MEM_CMD 7 will hang forever if this isn't done 472ctx_4160s: 473 mov $r14 0x4160 474 sethi $r14 0x400000 475 mov $r15 1 476 call #nv_wr32 477 ctx_4160s_wait: 478 call #nv_rd32 479 xbit $r15 $r15 4 480 bra e #ctx_4160s_wait 481 ret 482 483// Without clearing again at end of xfer, some things cause PGRAPH 484// to hang with STATUS=0x00000007 until it's cleared.. fbcon can 485// still function with it set however... 486ctx_4160c: 487 mov $r14 0x4160 488 sethi $r14 0x400000 489 clear b32 $r15 490 call #nv_wr32 491 ret 492 493// Again, not real sure 494// 495// In: $r15 value to set 0x404170 to 496// 497ctx_4170s: 498 mov $r14 0x4170 499 sethi $r14 0x400000 500 or $r15 0x10 501 call #nv_wr32 502 ret 503 504// Waits for a ctx_4170s() call to complete 505// 506ctx_4170w: 507 mov $r14 0x4170 508 sethi $r14 0x400000 509 call #nv_rd32 510 and $r15 0x10 511 bra ne #ctx_4170w 512 ret 513 514// Disables various things, waits a bit, and re-enables them.. 515// 516// Not sure how exactly this helps, perhaps "ENABLE" is not such a 517// good description for the bits we turn off? Anyways, without this, 518// funny things happen. 519// 520ctx_redswitch: 521 mov $r14 0x614 522 shl b32 $r14 6 523 mov $r15 0x270 524 iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_GPC, POWER_ALL 525 mov $r15 8 526 ctx_redswitch_delay: 527 sub b32 $r15 1 528 bra ne #ctx_redswitch_delay 529 mov $r15 0x770 530 iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_ALL, POWER_ALL 531 ret 532 533// Not a clue what this is for, except that unless the value is 0x10, the 534// strand context is saved (and presumably restored) incorrectly.. 535// 536// In: $r15 value to set to (0x00/0x10 are used) 537// 538ctx_86c: 539 mov $r14 0x86c 540 shl b32 $r14 6 541 iowr I[$r14] $r15 // HUB(0x86c) = val 542 mov $r14 -0x75ec 543 sethi $r14 0x400000 544 call #nv_wr32 // ROP(0xa14) = val 545 mov $r14 -0x5794 546 sethi $r14 0x410000 547 call #nv_wr32 // GPC(0x86c) = val 548 ret 549 550// ctx_load - load's a channel's ctxctl data, and selects its vm 551// 552// In: $r2 channel address 553// 554ctx_load: 555 trace_set(T_CHAN) 556 557 // switch to channel, somewhat magic in parts.. 558 mov $r10 12 // DONE_UNK12 559 call #wait_donez 560 mov $r1 0xa24 561 shl b32 $r1 6 562 iowr I[$r1 + 0x000] $r0 // 0x409a24 563 mov $r3 0xb00 564 shl b32 $r3 6 565 iowr I[$r3 + 0x100] $r2 // CHAN_NEXT 566 mov $r1 0xa0c 567 shl b32 $r1 6 568 mov $r4 7 569 iowr I[$r1 + 0x000] $r2 // MEM_CHAN 570 iowr I[$r1 + 0x100] $r4 // MEM_CMD 571 ctx_chan_wait_0: 572 iord $r4 I[$r1 + 0x100] 573 and $r4 0x1f 574 bra ne #ctx_chan_wait_0 575 iowr I[$r3 + 0x000] $r2 // CHAN_CUR 576 577 // load channel header, fetch PGRAPH context pointer 578 mov $xtargets $r0 579 bclr $r2 31 580 shl b32 $r2 4 581 add b32 $r2 2 582 583 trace_set(T_LCHAN) 584 mov $r1 0xa04 585 shl b32 $r1 6 586 iowr I[$r1 + 0x000] $r2 // MEM_BASE 587 mov $r1 0xa20 588 shl b32 $r1 6 589 mov $r2 0x0002 590 sethi $r2 0x80000000 591 iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vram 592 mov $r1 0x10 // chan + 0x0210 593 mov $r2 #xfer_data 594 sethi $r2 0x00020000 // 16 bytes 595 xdld $r1 $r2 596 xdwait 597 trace_clr(T_LCHAN) 598 599 // update current context 600 ld b32 $r1 D[$r0 + #xfer_data + 4] 601 shl b32 $r1 24 602 ld b32 $r2 D[$r0 + #xfer_data + 0] 603 shr b32 $r2 8 604 or $r1 $r2 605 st b32 D[$r0 + #ctx_current] $r1 606 607 // set transfer base to start of context, and fetch context header 608 trace_set(T_LCTXH) 609 mov $r2 0xa04 610 shl b32 $r2 6 611 iowr I[$r2 + 0x000] $r1 // MEM_BASE 612 mov $r2 1 613 mov $r1 0xa20 614 shl b32 $r1 6 615 iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vm 616 mov $r1 #chan_data 617 sethi $r1 0x00060000 // 256 bytes 618 xdld $r0 $r1 619 xdwait 620 trace_clr(T_LCTXH) 621 622 trace_clr(T_CHAN) 623 ret 624 625// ctx_chan - handler for HUB_SET_CHAN command, will set a channel as 626// the active channel for ctxctl, but not actually transfer 627// any context data. intended for use only during initial 628// context construction. 629// 630// In: $r2 channel address 631// 632ctx_chan: 633 call #ctx_4160s 634 call #ctx_load 635 mov $r10 12 // DONE_UNK12 636 call #wait_donez 637 mov $r1 0xa10 638 shl b32 $r1 6 639 mov $r2 5 640 iowr I[$r1 + 0x000] $r2 // MEM_CMD = 5 (???) 641 ctx_chan_wait: 642 iord $r2 I[$r1 + 0x000] 643 or $r2 $r2 644 bra ne #ctx_chan_wait 645 call #ctx_4160c 646 ret 647 648// Execute per-context state overrides list 649// 650// Only executed on the first load of a channel. Might want to look into 651// removing this and having the host directly modify the channel's context 652// to change this state... The nouveau DRM already builds this list as 653// it's definitely needed for NVIDIA's, so we may as well use it for now 654// 655// Input: $r1 mmio list length 656// 657ctx_mmio_exec: 658 // set transfer base to be the mmio list 659 ld b32 $r3 D[$r0 + #chan_mmio_address] 660 mov $r2 0xa04 661 shl b32 $r2 6 662 iowr I[$r2 + 0x000] $r3 // MEM_BASE 663 664 clear b32 $r3 665 ctx_mmio_loop: 666 // fetch next 256 bytes of mmio list if necessary 667 and $r4 $r3 0xff 668 bra ne #ctx_mmio_pull 669 mov $r5 #xfer_data 670 sethi $r5 0x00060000 // 256 bytes 671 xdld $r3 $r5 672 xdwait 673 674 // execute a single list entry 675 ctx_mmio_pull: 676 ld b32 $r14 D[$r4 + #xfer_data + 0x00] 677 ld b32 $r15 D[$r4 + #xfer_data + 0x04] 678 call #nv_wr32 679 680 // next! 681 add b32 $r3 8 682 sub b32 $r1 1 683 bra ne #ctx_mmio_loop 684 685 // set transfer base back to the current context 686 ctx_mmio_done: 687 ld b32 $r3 D[$r0 + #ctx_current] 688 iowr I[$r2 + 0x000] $r3 // MEM_BASE 689 690 // disable the mmio list now, we don't need/want to execute it again 691 st b32 D[$r0 + #chan_mmio_count] $r0 692 mov $r1 #chan_data 693 sethi $r1 0x00060000 // 256 bytes 694 xdst $r0 $r1 695 xdwait 696 ret 697 698// Transfer HUB context data between GPU and storage area 699// 700// In: $r2 channel address 701// $p1 clear on save, set on load 702// $p2 set if opposite direction done/will be done, so: 703// on save it means: "a load will follow this save" 704// on load it means: "a save preceeded this load" 705// 706ctx_xfer: 707 bra not $p1 #ctx_xfer_pre 708 bra $p2 #ctx_xfer_pre_load 709 ctx_xfer_pre: 710 mov $r15 0x10 711 call #ctx_86c 712 call #ctx_4160s 713 bra not $p1 #ctx_xfer_exec 714 715 ctx_xfer_pre_load: 716 mov $r15 2 717 call #ctx_4170s 718 call #ctx_4170w 719 call #ctx_redswitch 720 clear b32 $r15 721 call #ctx_4170s 722 call #ctx_load 723 724 // fetch context pointer, and initiate xfer on all GPCs 725 ctx_xfer_exec: 726 ld b32 $r1 D[$r0 + #ctx_current] 727 mov $r2 0x414 728 shl b32 $r2 6 729 iowr I[$r2 + 0x000] $r0 // BAR_STATUS = reset 730 mov $r14 -0x5b00 731 sethi $r14 0x410000 732 mov b32 $r15 $r1 733 call #nv_wr32 // GPC_BCAST_WRCMD_DATA = ctx pointer 734 add b32 $r14 4 735 xbit $r15 $flags $p1 736 xbit $r2 $flags $p2 737 shl b32 $r2 1 738 or $r15 $r2 739 call #nv_wr32 // GPC_BCAST_WRCMD_CMD = GPC_XFER(type) 740 741 // strands 742 mov $r1 0x4afc 743 sethi $r1 0x20000 744 mov $r2 0xc 745 iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c 746 call #strand_wait 747 mov $r2 0x47fc 748 sethi $r2 0x20000 749 iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00 750 xbit $r2 $flags $p1 751 add b32 $r2 3 752 iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD) 753 754 // mmio context 755 xbit $r10 $flags $p1 // direction 756 or $r10 6 // first, last 757 mov $r11 0 // base = 0 758 ld b32 $r12 D[$r0 + #hub_mmio_list_head] 759 ld b32 $r13 D[$r0 + #hub_mmio_list_tail] 760 mov $r14 0 // not multi 761 call #mmctx_xfer 762 763 // wait for GPCs to all complete 764 mov $r10 8 // DONE_BAR 765 call #wait_doneo 766 767 // wait for strand xfer to complete 768 call #strand_wait 769 770 // post-op 771 bra $p1 #ctx_xfer_post 772 mov $r10 12 // DONE_UNK12 773 call #wait_donez 774 mov $r1 0xa10 775 shl b32 $r1 6 776 mov $r2 5 777 iowr I[$r1] $r2 // MEM_CMD 778 ctx_xfer_post_save_wait: 779 iord $r2 I[$r1] 780 or $r2 $r2 781 bra ne #ctx_xfer_post_save_wait 782 783 bra $p2 #ctx_xfer_done 784 ctx_xfer_post: 785 mov $r15 2 786 call #ctx_4170s 787 clear b32 $r15 788 call #ctx_86c 789 call #strand_post 790 call #ctx_4170w 791 clear b32 $r15 792 call #ctx_4170s 793 794 bra not $p1 #ctx_xfer_no_post_mmio 795 ld b32 $r1 D[$r0 + #chan_mmio_count] 796 or $r1 $r1 797 bra e #ctx_xfer_no_post_mmio 798 call #ctx_mmio_exec 799 800 ctx_xfer_no_post_mmio: 801 call #ctx_4160c 802 803 ctx_xfer_done: 804 ret 805 806.align 256 807