1/* 2 * Memory arbiter functions. Allocates bandwidth through the 3 * arbiter and sets up arbiter breakpoints. 4 * 5 * The algorithm first assigns slots to the clients that has specified 6 * bandwidth (e.g. ethernet) and then the remaining slots are divided 7 * on all the active clients. 8 * 9 * Copyright (c) 2004-2007 Axis Communications AB. 10 * 11 * The artpec-3 has two arbiters. The memory hierarchy looks like this: 12 * 13 * 14 * CPU DMAs 15 * | | 16 * | | 17 * -------------- ------------------ 18 * | foo arbiter|----| Internal memory| 19 * -------------- ------------------ 20 * | 21 * -------------- 22 * | L2 cache | 23 * -------------- 24 * | 25 * h264 etc | 26 * | | 27 * | | 28 * -------------- 29 * | bar arbiter| 30 * -------------- 31 * | 32 * --------- 33 * | SDRAM | 34 * --------- 35 * 36 */ 37 38#include <hwregs/reg_map.h> 39#include <hwregs/reg_rdwr.h> 40#include <hwregs/marb_foo_defs.h> 41#include <hwregs/marb_bar_defs.h> 42#include <arbiter.h> 43#include <hwregs/intr_vect.h> 44#include <linux/interrupt.h> 45#include <linux/irq.h> 46#include <linux/signal.h> 47#include <linux/errno.h> 48#include <linux/spinlock.h> 49#include <asm/io.h> 50#include <asm/irq_regs.h> 51 52#define D(x) 53 54struct crisv32_watch_entry { 55 unsigned long instance; 56 watch_callback *cb; 57 unsigned long start; 58 unsigned long end; 59 int used; 60}; 61 62#define NUMBER_OF_BP 4 63#define SDRAM_BANDWIDTH 400000000 64#define INTMEM_BANDWIDTH 400000000 65#define NBR_OF_SLOTS 64 66#define NBR_OF_REGIONS 2 67#define NBR_OF_CLIENTS 15 68#define ARBITERS 2 69#define UNASSIGNED 100 70 71struct arbiter { 72 unsigned long instance; 73 int nbr_regions; 74 int nbr_clients; 75 int requested_slots[NBR_OF_REGIONS][NBR_OF_CLIENTS]; 76 int active_clients[NBR_OF_REGIONS][NBR_OF_CLIENTS]; 77}; 78 79static struct crisv32_watch_entry watches[ARBITERS][NUMBER_OF_BP] = 80{ 81 { 82 {regi_marb_foo_bp0}, 83 {regi_marb_foo_bp1}, 84 {regi_marb_foo_bp2}, 85 {regi_marb_foo_bp3} 86 }, 87 { 88 {regi_marb_bar_bp0}, 89 {regi_marb_bar_bp1}, 90 {regi_marb_bar_bp2}, 91 {regi_marb_bar_bp3} 92 } 93}; 94 95struct arbiter arbiters[ARBITERS] = 96{ 97 { /* L2 cache arbiter */ 98 .instance = regi_marb_foo, 99 .nbr_regions = 2, 100 .nbr_clients = 15 101 }, 102 { /* DDR2 arbiter */ 103 .instance = regi_marb_bar, 104 .nbr_regions = 1, 105 .nbr_clients = 9 106 } 107}; 108 109static int max_bandwidth[NBR_OF_REGIONS] = {SDRAM_BANDWIDTH, INTMEM_BANDWIDTH}; 110 111DEFINE_SPINLOCK(arbiter_lock); 112 113static irqreturn_t 114crisv32_foo_arbiter_irq(int irq, void *dev_id); 115static irqreturn_t 116crisv32_bar_arbiter_irq(int irq, void *dev_id); 117 118/* 119 * "I'm the arbiter, I know the score. 120 * From square one I'll be watching all 64." 121 * (memory arbiter slots, that is) 122 * 123 * Or in other words: 124 * Program the memory arbiter slots for "region" according to what's 125 * in requested_slots[] and active_clients[], while minimizing 126 * latency. A caller may pass a non-zero positive amount for 127 * "unused_slots", which must then be the unallocated, remaining 128 * number of slots, free to hand out to any client. 129 */ 130 131static void crisv32_arbiter_config(int arbiter, int region, int unused_slots) 132{ 133 int slot; 134 int client; 135 int interval = 0; 136 137 /* 138 * This vector corresponds to the hardware arbiter slots (see 139 * the hardware documentation for semantics). We initialize 140 * each slot with a suitable sentinel value outside the valid 141 * range {0 .. NBR_OF_CLIENTS - 1} and replace them with 142 * client indexes. Then it's fed to the hardware. 143 */ 144 s8 val[NBR_OF_SLOTS]; 145 146 for (slot = 0; slot < NBR_OF_SLOTS; slot++) 147 val[slot] = -1; 148 149 for (client = 0; client < arbiters[arbiter].nbr_clients; client++) { 150 int pos; 151 /* Allocate the requested non-zero number of slots, but 152 * also give clients with zero-requests one slot each 153 * while stocks last. We do the latter here, in client 154 * order. This makes sure zero-request clients are the 155 * first to get to any spare slots, else those slots 156 * could, when bandwidth is allocated close to the limit, 157 * all be allocated to low-index non-zero-request clients 158 * in the default-fill loop below. Another positive but 159 * secondary effect is a somewhat better spread of the 160 * zero-bandwidth clients in the vector, avoiding some of 161 * the latency that could otherwise be caused by the 162 * partitioning of non-zero-bandwidth clients at low 163 * indexes and zero-bandwidth clients at high 164 * indexes. (Note that this spreading can only affect the 165 * unallocated bandwidth.) All the above only matters for 166 * memory-intensive situations, of course. 167 */ 168 if (!arbiters[arbiter].requested_slots[region][client]) { 169 /* 170 * Skip inactive clients. Also skip zero-slot 171 * allocations in this pass when there are no known 172 * free slots. 173 */ 174 if (!arbiters[arbiter].active_clients[region][client] || 175 unused_slots <= 0) 176 continue; 177 178 unused_slots--; 179 180 /* Only allocate one slot for this client. */ 181 interval = NBR_OF_SLOTS; 182 } else 183 interval = NBR_OF_SLOTS / 184 arbiters[arbiter].requested_slots[region][client]; 185 186 pos = 0; 187 while (pos < NBR_OF_SLOTS) { 188 if (val[pos] >= 0) 189 pos++; 190 else { 191 val[pos] = client; 192 pos += interval; 193 } 194 } 195 } 196 197 client = 0; 198 for (slot = 0; slot < NBR_OF_SLOTS; slot++) { 199 /* 200 * Allocate remaining slots in round-robin 201 * client-number order for active clients. For this 202 * pass, we ignore requested bandwidth and previous 203 * allocations. 204 */ 205 if (val[slot] < 0) { 206 int first = client; 207 while (!arbiters[arbiter].active_clients[region][client]) { 208 client = (client + 1) % 209 arbiters[arbiter].nbr_clients; 210 if (client == first) 211 break; 212 } 213 val[slot] = client; 214 client = (client + 1) % arbiters[arbiter].nbr_clients; 215 } 216 if (arbiter == 0) { 217 if (region == EXT_REGION) 218 REG_WR_INT_VECT(marb_foo, regi_marb_foo, 219 rw_l2_slots, slot, val[slot]); 220 else if (region == INT_REGION) 221 REG_WR_INT_VECT(marb_foo, regi_marb_foo, 222 rw_intm_slots, slot, val[slot]); 223 } else { 224 REG_WR_INT_VECT(marb_bar, regi_marb_bar, 225 rw_ddr2_slots, slot, val[slot]); 226 } 227 } 228} 229 230extern char _stext, _etext; 231 232static void crisv32_arbiter_init(void) 233{ 234 static int initialized; 235 236 if (initialized) 237 return; 238 239 initialized = 1; 240 241 /* 242 * CPU caches are always set to active, but with zero 243 * bandwidth allocated. It should be ok to allocate zero 244 * bandwidth for the caches, because DMA for other channels 245 * will supposedly finish, once their programmed amount is 246 * done, and then the caches will get access according to the 247 * "fixed scheme" for unclaimed slots. Though, if for some 248 * use-case somewhere, there's a maximum CPU latency for 249 * e.g. some interrupt, we have to start allocating specific 250 * bandwidth for the CPU caches too. 251 */ 252 arbiters[0].active_clients[EXT_REGION][11] = 1; 253 arbiters[0].active_clients[EXT_REGION][12] = 1; 254 crisv32_arbiter_config(0, EXT_REGION, 0); 255 crisv32_arbiter_config(0, INT_REGION, 0); 256 crisv32_arbiter_config(1, EXT_REGION, 0); 257 258 if (request_irq(MEMARB_FOO_INTR_VECT, crisv32_foo_arbiter_irq, 259 0, "arbiter", NULL)) 260 printk(KERN_ERR "Couldn't allocate arbiter IRQ\n"); 261 262 if (request_irq(MEMARB_BAR_INTR_VECT, crisv32_bar_arbiter_irq, 263 0, "arbiter", NULL)) 264 printk(KERN_ERR "Couldn't allocate arbiter IRQ\n"); 265 266#ifndef CONFIG_ETRAX_KGDB 267 /* Global watch for writes to kernel text segment. */ 268 crisv32_arbiter_watch(virt_to_phys(&_stext), &_etext - &_stext, 269 MARB_CLIENTS(arbiter_all_clients, arbiter_bar_all_clients), 270 arbiter_all_write, NULL); 271#endif 272 273 /* Set up max burst sizes by default */ 274 REG_WR_INT(marb_bar, regi_marb_bar, rw_h264_rd_burst, 3); 275 REG_WR_INT(marb_bar, regi_marb_bar, rw_h264_wr_burst, 3); 276 REG_WR_INT(marb_bar, regi_marb_bar, rw_ccd_burst, 3); 277 REG_WR_INT(marb_bar, regi_marb_bar, rw_vin_wr_burst, 3); 278 REG_WR_INT(marb_bar, regi_marb_bar, rw_vin_rd_burst, 3); 279 REG_WR_INT(marb_bar, regi_marb_bar, rw_sclr_rd_burst, 3); 280 REG_WR_INT(marb_bar, regi_marb_bar, rw_vout_burst, 3); 281 REG_WR_INT(marb_bar, regi_marb_bar, rw_sclr_fifo_burst, 3); 282 REG_WR_INT(marb_bar, regi_marb_bar, rw_l2cache_burst, 3); 283} 284 285int crisv32_arbiter_allocate_bandwidth(int client, int region, 286 unsigned long bandwidth) 287{ 288 int i; 289 int total_assigned = 0; 290 int total_clients = 0; 291 int req; 292 int arbiter = 0; 293 294 crisv32_arbiter_init(); 295 296 if (client & 0xffff0000) { 297 arbiter = 1; 298 client >>= 16; 299 } 300 301 for (i = 0; i < arbiters[arbiter].nbr_clients; i++) { 302 total_assigned += arbiters[arbiter].requested_slots[region][i]; 303 total_clients += arbiters[arbiter].active_clients[region][i]; 304 } 305 306 /* Avoid division by 0 for 0-bandwidth requests. */ 307 req = bandwidth == 0 308 ? 0 : NBR_OF_SLOTS / (max_bandwidth[region] / bandwidth); 309 310 /* 311 * We make sure that there are enough slots only for non-zero 312 * requests. Requesting 0 bandwidth *may* allocate slots, 313 * though if all bandwidth is allocated, such a client won't 314 * get any and will have to rely on getting memory access 315 * according to the fixed scheme that's the default when one 316 * of the slot-allocated clients doesn't claim their slot. 317 */ 318 if (total_assigned + req > NBR_OF_SLOTS) 319 return -ENOMEM; 320 321 arbiters[arbiter].active_clients[region][client] = 1; 322 arbiters[arbiter].requested_slots[region][client] = req; 323 crisv32_arbiter_config(arbiter, region, NBR_OF_SLOTS - total_assigned); 324 325 /* Propagate allocation from foo to bar */ 326 if (arbiter == 0) 327 crisv32_arbiter_allocate_bandwidth(8 << 16, 328 EXT_REGION, bandwidth); 329 return 0; 330} 331 332/* 333 * Main entry for bandwidth deallocation. 334 * 335 * Strictly speaking, for a somewhat constant set of clients where 336 * each client gets a constant bandwidth and is just enabled or 337 * disabled (somewhat dynamically), no action is necessary here to 338 * avoid starvation for non-zero-allocation clients, as the allocated 339 * slots will just be unused. However, handing out those unused slots 340 * to active clients avoids needless latency if the "fixed scheme" 341 * would give unclaimed slots to an eager low-index client. 342 */ 343 344void crisv32_arbiter_deallocate_bandwidth(int client, int region) 345{ 346 int i; 347 int total_assigned = 0; 348 int arbiter = 0; 349 350 if (client & 0xffff0000) 351 arbiter = 1; 352 353 arbiters[arbiter].requested_slots[region][client] = 0; 354 arbiters[arbiter].active_clients[region][client] = 0; 355 356 for (i = 0; i < arbiters[arbiter].nbr_clients; i++) 357 total_assigned += arbiters[arbiter].requested_slots[region][i]; 358 359 crisv32_arbiter_config(arbiter, region, NBR_OF_SLOTS - total_assigned); 360} 361 362int crisv32_arbiter_watch(unsigned long start, unsigned long size, 363 unsigned long clients, unsigned long accesses, 364 watch_callback *cb) 365{ 366 int i; 367 int arbiter; 368 int used[2]; 369 int ret = 0; 370 371 crisv32_arbiter_init(); 372 373 if (start > 0x80000000) { 374 printk(KERN_ERR "Arbiter: %lX doesn't look like a " 375 "physical address", start); 376 return -EFAULT; 377 } 378 379 spin_lock(&arbiter_lock); 380 381 if (clients & 0xffff) 382 used[0] = 1; 383 if (clients & 0xffff0000) 384 used[1] = 1; 385 386 for (arbiter = 0; arbiter < ARBITERS; arbiter++) { 387 if (!used[arbiter]) 388 continue; 389 390 for (i = 0; i < NUMBER_OF_BP; i++) { 391 if (!watches[arbiter][i].used) { 392 unsigned intr_mask; 393 if (arbiter) 394 intr_mask = REG_RD_INT(marb_bar, 395 regi_marb_bar, rw_intr_mask); 396 else 397 intr_mask = REG_RD_INT(marb_foo, 398 regi_marb_foo, rw_intr_mask); 399 400 watches[arbiter][i].used = 1; 401 watches[arbiter][i].start = start; 402 watches[arbiter][i].end = start + size; 403 watches[arbiter][i].cb = cb; 404 405 ret |= (i + 1) << (arbiter + 8); 406 if (arbiter) { 407 REG_WR_INT(marb_bar_bp, 408 watches[arbiter][i].instance, 409 rw_first_addr, 410 watches[arbiter][i].start); 411 REG_WR_INT(marb_bar_bp, 412 watches[arbiter][i].instance, 413 rw_last_addr, 414 watches[arbiter][i].end); 415 REG_WR_INT(marb_bar_bp, 416 watches[arbiter][i].instance, 417 rw_op, accesses); 418 REG_WR_INT(marb_bar_bp, 419 watches[arbiter][i].instance, 420 rw_clients, 421 clients & 0xffff); 422 } else { 423 REG_WR_INT(marb_foo_bp, 424 watches[arbiter][i].instance, 425 rw_first_addr, 426 watches[arbiter][i].start); 427 REG_WR_INT(marb_foo_bp, 428 watches[arbiter][i].instance, 429 rw_last_addr, 430 watches[arbiter][i].end); 431 REG_WR_INT(marb_foo_bp, 432 watches[arbiter][i].instance, 433 rw_op, accesses); 434 REG_WR_INT(marb_foo_bp, 435 watches[arbiter][i].instance, 436 rw_clients, clients >> 16); 437 } 438 439 if (i == 0) 440 intr_mask |= 1; 441 else if (i == 1) 442 intr_mask |= 2; 443 else if (i == 2) 444 intr_mask |= 4; 445 else if (i == 3) 446 intr_mask |= 8; 447 448 if (arbiter) 449 REG_WR_INT(marb_bar, regi_marb_bar, 450 rw_intr_mask, intr_mask); 451 else 452 REG_WR_INT(marb_foo, regi_marb_foo, 453 rw_intr_mask, intr_mask); 454 455 spin_unlock(&arbiter_lock); 456 457 break; 458 } 459 } 460 } 461 spin_unlock(&arbiter_lock); 462 if (ret) 463 return ret; 464 else 465 return -ENOMEM; 466} 467 468int crisv32_arbiter_unwatch(int id) 469{ 470 int arbiter; 471 int intr_mask; 472 473 crisv32_arbiter_init(); 474 475 spin_lock(&arbiter_lock); 476 477 for (arbiter = 0; arbiter < ARBITERS; arbiter++) { 478 int id2; 479 480 if (arbiter) 481 intr_mask = REG_RD_INT(marb_bar, regi_marb_bar, 482 rw_intr_mask); 483 else 484 intr_mask = REG_RD_INT(marb_foo, regi_marb_foo, 485 rw_intr_mask); 486 487 id2 = (id & (0xff << (arbiter + 8))) >> (arbiter + 8); 488 if (id2 == 0) 489 continue; 490 id2--; 491 if ((id2 >= NUMBER_OF_BP) || (!watches[arbiter][id2].used)) { 492 spin_unlock(&arbiter_lock); 493 return -EINVAL; 494 } 495 496 memset(&watches[arbiter][id2], 0, 497 sizeof(struct crisv32_watch_entry)); 498 499 if (id2 == 0) 500 intr_mask &= ~1; 501 else if (id2 == 1) 502 intr_mask &= ~2; 503 else if (id2 == 2) 504 intr_mask &= ~4; 505 else if (id2 == 3) 506 intr_mask &= ~8; 507 508 if (arbiter) 509 REG_WR_INT(marb_bar, regi_marb_bar, rw_intr_mask, 510 intr_mask); 511 else 512 REG_WR_INT(marb_foo, regi_marb_foo, rw_intr_mask, 513 intr_mask); 514 } 515 516 spin_unlock(&arbiter_lock); 517 return 0; 518} 519 520extern void show_registers(struct pt_regs *regs); 521 522 523static irqreturn_t 524crisv32_foo_arbiter_irq(int irq, void *dev_id) 525{ 526 reg_marb_foo_r_masked_intr masked_intr = 527 REG_RD(marb_foo, regi_marb_foo, r_masked_intr); 528 reg_marb_foo_bp_r_brk_clients r_clients; 529 reg_marb_foo_bp_r_brk_addr r_addr; 530 reg_marb_foo_bp_r_brk_op r_op; 531 reg_marb_foo_bp_r_brk_first_client r_first; 532 reg_marb_foo_bp_r_brk_size r_size; 533 reg_marb_foo_bp_rw_ack ack = {0}; 534 reg_marb_foo_rw_ack_intr ack_intr = { 535 .bp0 = 1, .bp1 = 1, .bp2 = 1, .bp3 = 1 536 }; 537 struct crisv32_watch_entry *watch; 538 unsigned arbiter = (unsigned)dev_id; 539 540 masked_intr = REG_RD(marb_foo, regi_marb_foo, r_masked_intr); 541 542 if (masked_intr.bp0) 543 watch = &watches[arbiter][0]; 544 else if (masked_intr.bp1) 545 watch = &watches[arbiter][1]; 546 else if (masked_intr.bp2) 547 watch = &watches[arbiter][2]; 548 else if (masked_intr.bp3) 549 watch = &watches[arbiter][3]; 550 else 551 return IRQ_NONE; 552 553 /* Retrieve all useful information and print it. */ 554 r_clients = REG_RD(marb_foo_bp, watch->instance, r_brk_clients); 555 r_addr = REG_RD(marb_foo_bp, watch->instance, r_brk_addr); 556 r_op = REG_RD(marb_foo_bp, watch->instance, r_brk_op); 557 r_first = REG_RD(marb_foo_bp, watch->instance, r_brk_first_client); 558 r_size = REG_RD(marb_foo_bp, watch->instance, r_brk_size); 559 560 printk(KERN_DEBUG "Arbiter IRQ\n"); 561 printk(KERN_DEBUG "Clients %X addr %X op %X first %X size %X\n", 562 REG_TYPE_CONV(int, reg_marb_foo_bp_r_brk_clients, r_clients), 563 REG_TYPE_CONV(int, reg_marb_foo_bp_r_brk_addr, r_addr), 564 REG_TYPE_CONV(int, reg_marb_foo_bp_r_brk_op, r_op), 565 REG_TYPE_CONV(int, reg_marb_foo_bp_r_brk_first_client, r_first), 566 REG_TYPE_CONV(int, reg_marb_foo_bp_r_brk_size, r_size)); 567 568 REG_WR(marb_foo_bp, watch->instance, rw_ack, ack); 569 REG_WR(marb_foo, regi_marb_foo, rw_ack_intr, ack_intr); 570 571 printk(KERN_DEBUG "IRQ occurred at %X\n", (unsigned)get_irq_regs()); 572 573 if (watch->cb) 574 watch->cb(); 575 576 return IRQ_HANDLED; 577} 578 579static irqreturn_t 580crisv32_bar_arbiter_irq(int irq, void *dev_id) 581{ 582 reg_marb_bar_r_masked_intr masked_intr = 583 REG_RD(marb_bar, regi_marb_bar, r_masked_intr); 584 reg_marb_bar_bp_r_brk_clients r_clients; 585 reg_marb_bar_bp_r_brk_addr r_addr; 586 reg_marb_bar_bp_r_brk_op r_op; 587 reg_marb_bar_bp_r_brk_first_client r_first; 588 reg_marb_bar_bp_r_brk_size r_size; 589 reg_marb_bar_bp_rw_ack ack = {0}; 590 reg_marb_bar_rw_ack_intr ack_intr = { 591 .bp0 = 1, .bp1 = 1, .bp2 = 1, .bp3 = 1 592 }; 593 struct crisv32_watch_entry *watch; 594 unsigned arbiter = (unsigned)dev_id; 595 596 masked_intr = REG_RD(marb_bar, regi_marb_bar, r_masked_intr); 597 598 if (masked_intr.bp0) 599 watch = &watches[arbiter][0]; 600 else if (masked_intr.bp1) 601 watch = &watches[arbiter][1]; 602 else if (masked_intr.bp2) 603 watch = &watches[arbiter][2]; 604 else if (masked_intr.bp3) 605 watch = &watches[arbiter][3]; 606 else 607 return IRQ_NONE; 608 609 /* Retrieve all useful information and print it. */ 610 r_clients = REG_RD(marb_bar_bp, watch->instance, r_brk_clients); 611 r_addr = REG_RD(marb_bar_bp, watch->instance, r_brk_addr); 612 r_op = REG_RD(marb_bar_bp, watch->instance, r_brk_op); 613 r_first = REG_RD(marb_bar_bp, watch->instance, r_brk_first_client); 614 r_size = REG_RD(marb_bar_bp, watch->instance, r_brk_size); 615 616 printk(KERN_DEBUG "Arbiter IRQ\n"); 617 printk(KERN_DEBUG "Clients %X addr %X op %X first %X size %X\n", 618 REG_TYPE_CONV(int, reg_marb_bar_bp_r_brk_clients, r_clients), 619 REG_TYPE_CONV(int, reg_marb_bar_bp_r_brk_addr, r_addr), 620 REG_TYPE_CONV(int, reg_marb_bar_bp_r_brk_op, r_op), 621 REG_TYPE_CONV(int, reg_marb_bar_bp_r_brk_first_client, r_first), 622 REG_TYPE_CONV(int, reg_marb_bar_bp_r_brk_size, r_size)); 623 624 REG_WR(marb_bar_bp, watch->instance, rw_ack, ack); 625 REG_WR(marb_bar, regi_marb_bar, rw_ack_intr, ack_intr); 626 627 printk(KERN_DEBUG "IRQ occurred at %X\n", (unsigned)get_irq_regs()->erp); 628 629 if (watch->cb) 630 watch->cb(); 631 632 return IRQ_HANDLED; 633} 634 635