1/*
2 * Memory arbiter functions. Allocates bandwidth through the
3 * arbiter and sets up arbiter breakpoints.
4 *
5 * The algorithm first assigns slots to the clients that has specified
6 * bandwidth (e.g. ethernet) and then the remaining slots are divided
7 * on all the active clients.
8 *
9 * Copyright (c) 2004-2007 Axis Communications AB.
10 *
11 * The artpec-3 has two arbiters. The memory hierarchy looks like this:
12 *
13 *
14 * CPU DMAs
15 *  |   |
16 *  |   |
17 * --------------    ------------------
18 * | foo arbiter|----| Internal memory|
19 * --------------    ------------------
20 *      |
21 * --------------
22 * | L2 cache   |
23 * --------------
24 *             |
25 * h264 etc    |
26 *    |        |
27 *    |        |
28 * --------------
29 * | bar arbiter|
30 * --------------
31 *       |
32 * ---------
33 * | SDRAM |
34 * ---------
35 *
36 */
37
38#include <hwregs/reg_map.h>
39#include <hwregs/reg_rdwr.h>
40#include <hwregs/marb_foo_defs.h>
41#include <hwregs/marb_bar_defs.h>
42#include <arbiter.h>
43#include <hwregs/intr_vect.h>
44#include <linux/interrupt.h>
45#include <linux/irq.h>
46#include <linux/signal.h>
47#include <linux/errno.h>
48#include <linux/spinlock.h>
49#include <asm/io.h>
50#include <asm/irq_regs.h>
51
52#define D(x)
53
54struct crisv32_watch_entry {
55  unsigned long instance;
56  watch_callback *cb;
57  unsigned long start;
58  unsigned long end;
59  int used;
60};
61
62#define NUMBER_OF_BP 4
63#define SDRAM_BANDWIDTH 400000000
64#define INTMEM_BANDWIDTH 400000000
65#define NBR_OF_SLOTS 64
66#define NBR_OF_REGIONS 2
67#define NBR_OF_CLIENTS 15
68#define ARBITERS 2
69#define UNASSIGNED 100
70
71struct arbiter {
72  unsigned long instance;
73  int nbr_regions;
74  int nbr_clients;
75  int requested_slots[NBR_OF_REGIONS][NBR_OF_CLIENTS];
76  int active_clients[NBR_OF_REGIONS][NBR_OF_CLIENTS];
77};
78
79static struct crisv32_watch_entry watches[ARBITERS][NUMBER_OF_BP] =
80{
81  {
82  {regi_marb_foo_bp0},
83  {regi_marb_foo_bp1},
84  {regi_marb_foo_bp2},
85  {regi_marb_foo_bp3}
86  },
87  {
88  {regi_marb_bar_bp0},
89  {regi_marb_bar_bp1},
90  {regi_marb_bar_bp2},
91  {regi_marb_bar_bp3}
92  }
93};
94
95struct arbiter arbiters[ARBITERS] =
96{
97  { /* L2 cache arbiter */
98    .instance = regi_marb_foo,
99    .nbr_regions = 2,
100    .nbr_clients = 15
101  },
102  { /* DDR2 arbiter */
103    .instance = regi_marb_bar,
104    .nbr_regions = 1,
105    .nbr_clients = 9
106  }
107};
108
109static int max_bandwidth[NBR_OF_REGIONS] = {SDRAM_BANDWIDTH, INTMEM_BANDWIDTH};
110
111DEFINE_SPINLOCK(arbiter_lock);
112
113static irqreturn_t
114crisv32_foo_arbiter_irq(int irq, void *dev_id);
115static irqreturn_t
116crisv32_bar_arbiter_irq(int irq, void *dev_id);
117
118/*
119 * "I'm the arbiter, I know the score.
120 *  From square one I'll be watching all 64."
121 * (memory arbiter slots, that is)
122 *
123 *  Or in other words:
124 * Program the memory arbiter slots for "region" according to what's
125 * in requested_slots[] and active_clients[], while minimizing
126 * latency. A caller may pass a non-zero positive amount for
127 * "unused_slots", which must then be the unallocated, remaining
128 * number of slots, free to hand out to any client.
129 */
130
131static void crisv32_arbiter_config(int arbiter, int region, int unused_slots)
132{
133	int slot;
134	int client;
135	int interval = 0;
136
137	/*
138	 * This vector corresponds to the hardware arbiter slots (see
139	 * the hardware documentation for semantics). We initialize
140	 * each slot with a suitable sentinel value outside the valid
141	 * range {0 .. NBR_OF_CLIENTS - 1} and replace them with
142	 * client indexes. Then it's fed to the hardware.
143	 */
144	s8 val[NBR_OF_SLOTS];
145
146	for (slot = 0; slot < NBR_OF_SLOTS; slot++)
147	    val[slot] = -1;
148
149	for (client = 0; client < arbiters[arbiter].nbr_clients; client++) {
150	    int pos;
151	    /* Allocate the requested non-zero number of slots, but
152	     * also give clients with zero-requests one slot each
153	     * while stocks last. We do the latter here, in client
154	     * order. This makes sure zero-request clients are the
155	     * first to get to any spare slots, else those slots
156	     * could, when bandwidth is allocated close to the limit,
157	     * all be allocated to low-index non-zero-request clients
158	     * in the default-fill loop below. Another positive but
159	     * secondary effect is a somewhat better spread of the
160	     * zero-bandwidth clients in the vector, avoiding some of
161	     * the latency that could otherwise be caused by the
162	     * partitioning of non-zero-bandwidth clients at low
163	     * indexes and zero-bandwidth clients at high
164	     * indexes. (Note that this spreading can only affect the
165	     * unallocated bandwidth.)  All the above only matters for
166	     * memory-intensive situations, of course.
167	     */
168	    if (!arbiters[arbiter].requested_slots[region][client]) {
169		/*
170		 * Skip inactive clients. Also skip zero-slot
171		 * allocations in this pass when there are no known
172		 * free slots.
173		 */
174		if (!arbiters[arbiter].active_clients[region][client] ||
175				unused_slots <= 0)
176			continue;
177
178		unused_slots--;
179
180		/* Only allocate one slot for this client. */
181		interval = NBR_OF_SLOTS;
182	    } else
183		interval = NBR_OF_SLOTS /
184			arbiters[arbiter].requested_slots[region][client];
185
186	    pos = 0;
187	    while (pos < NBR_OF_SLOTS) {
188		if (val[pos] >= 0)
189		   pos++;
190		else {
191			val[pos] = client;
192			pos += interval;
193		}
194	    }
195	}
196
197	client = 0;
198	for (slot = 0; slot < NBR_OF_SLOTS; slot++) {
199		/*
200		 * Allocate remaining slots in round-robin
201		 * client-number order for active clients. For this
202		 * pass, we ignore requested bandwidth and previous
203		 * allocations.
204		 */
205		if (val[slot] < 0) {
206			int first = client;
207			while (!arbiters[arbiter].active_clients[region][client]) {
208				client = (client + 1) %
209					arbiters[arbiter].nbr_clients;
210				if (client == first)
211				   break;
212			}
213			val[slot] = client;
214			client = (client + 1) % arbiters[arbiter].nbr_clients;
215		}
216		if (arbiter == 0) {
217			if (region == EXT_REGION)
218				REG_WR_INT_VECT(marb_foo, regi_marb_foo,
219					rw_l2_slots, slot, val[slot]);
220			else if (region == INT_REGION)
221				REG_WR_INT_VECT(marb_foo, regi_marb_foo,
222					rw_intm_slots, slot, val[slot]);
223		} else {
224			REG_WR_INT_VECT(marb_bar, regi_marb_bar,
225				rw_ddr2_slots, slot, val[slot]);
226		}
227	}
228}
229
230extern char _stext, _etext;
231
232static void crisv32_arbiter_init(void)
233{
234	static int initialized;
235
236	if (initialized)
237		return;
238
239	initialized = 1;
240
241	/*
242	 * CPU caches are always set to active, but with zero
243	 * bandwidth allocated. It should be ok to allocate zero
244	 * bandwidth for the caches, because DMA for other channels
245	 * will supposedly finish, once their programmed amount is
246	 * done, and then the caches will get access according to the
247	 * "fixed scheme" for unclaimed slots. Though, if for some
248	 * use-case somewhere, there's a maximum CPU latency for
249	 * e.g. some interrupt, we have to start allocating specific
250	 * bandwidth for the CPU caches too.
251	 */
252	arbiters[0].active_clients[EXT_REGION][11] = 1;
253	arbiters[0].active_clients[EXT_REGION][12] = 1;
254	crisv32_arbiter_config(0, EXT_REGION, 0);
255	crisv32_arbiter_config(0, INT_REGION, 0);
256	crisv32_arbiter_config(1, EXT_REGION, 0);
257
258	if (request_irq(MEMARB_FOO_INTR_VECT, crisv32_foo_arbiter_irq,
259			0, "arbiter", NULL))
260		printk(KERN_ERR "Couldn't allocate arbiter IRQ\n");
261
262	if (request_irq(MEMARB_BAR_INTR_VECT, crisv32_bar_arbiter_irq,
263			0, "arbiter", NULL))
264		printk(KERN_ERR "Couldn't allocate arbiter IRQ\n");
265
266#ifndef CONFIG_ETRAX_KGDB
267	/* Global watch for writes to kernel text segment. */
268	crisv32_arbiter_watch(virt_to_phys(&_stext), &_etext - &_stext,
269		MARB_CLIENTS(arbiter_all_clients, arbiter_bar_all_clients),
270			      arbiter_all_write, NULL);
271#endif
272
273	/* Set up max burst sizes by default */
274	REG_WR_INT(marb_bar, regi_marb_bar, rw_h264_rd_burst, 3);
275	REG_WR_INT(marb_bar, regi_marb_bar, rw_h264_wr_burst, 3);
276	REG_WR_INT(marb_bar, regi_marb_bar, rw_ccd_burst, 3);
277	REG_WR_INT(marb_bar, regi_marb_bar, rw_vin_wr_burst, 3);
278	REG_WR_INT(marb_bar, regi_marb_bar, rw_vin_rd_burst, 3);
279	REG_WR_INT(marb_bar, regi_marb_bar, rw_sclr_rd_burst, 3);
280	REG_WR_INT(marb_bar, regi_marb_bar, rw_vout_burst, 3);
281	REG_WR_INT(marb_bar, regi_marb_bar, rw_sclr_fifo_burst, 3);
282	REG_WR_INT(marb_bar, regi_marb_bar, rw_l2cache_burst, 3);
283}
284
285int crisv32_arbiter_allocate_bandwidth(int client, int region,
286				      unsigned long bandwidth)
287{
288	int i;
289	int total_assigned = 0;
290	int total_clients = 0;
291	int req;
292	int arbiter = 0;
293
294	crisv32_arbiter_init();
295
296	if (client & 0xffff0000) {
297		arbiter = 1;
298		client >>= 16;
299	}
300
301	for (i = 0; i < arbiters[arbiter].nbr_clients; i++) {
302		total_assigned += arbiters[arbiter].requested_slots[region][i];
303		total_clients += arbiters[arbiter].active_clients[region][i];
304	}
305
306	/* Avoid division by 0 for 0-bandwidth requests. */
307	req = bandwidth == 0
308		? 0 : NBR_OF_SLOTS / (max_bandwidth[region] / bandwidth);
309
310	/*
311	 * We make sure that there are enough slots only for non-zero
312	 * requests. Requesting 0 bandwidth *may* allocate slots,
313	 * though if all bandwidth is allocated, such a client won't
314	 * get any and will have to rely on getting memory access
315	 * according to the fixed scheme that's the default when one
316	 * of the slot-allocated clients doesn't claim their slot.
317	 */
318	if (total_assigned + req > NBR_OF_SLOTS)
319	   return -ENOMEM;
320
321	arbiters[arbiter].active_clients[region][client] = 1;
322	arbiters[arbiter].requested_slots[region][client] = req;
323	crisv32_arbiter_config(arbiter, region, NBR_OF_SLOTS - total_assigned);
324
325	/* Propagate allocation from foo to bar */
326	if (arbiter == 0)
327		crisv32_arbiter_allocate_bandwidth(8 << 16,
328			EXT_REGION, bandwidth);
329	return 0;
330}
331
332/*
333 * Main entry for bandwidth deallocation.
334 *
335 * Strictly speaking, for a somewhat constant set of clients where
336 * each client gets a constant bandwidth and is just enabled or
337 * disabled (somewhat dynamically), no action is necessary here to
338 * avoid starvation for non-zero-allocation clients, as the allocated
339 * slots will just be unused. However, handing out those unused slots
340 * to active clients avoids needless latency if the "fixed scheme"
341 * would give unclaimed slots to an eager low-index client.
342 */
343
344void crisv32_arbiter_deallocate_bandwidth(int client, int region)
345{
346	int i;
347	int total_assigned = 0;
348	int arbiter = 0;
349
350	if (client & 0xffff0000)
351		arbiter = 1;
352
353	arbiters[arbiter].requested_slots[region][client] = 0;
354	arbiters[arbiter].active_clients[region][client] = 0;
355
356	for (i = 0; i < arbiters[arbiter].nbr_clients; i++)
357		total_assigned += arbiters[arbiter].requested_slots[region][i];
358
359	crisv32_arbiter_config(arbiter, region, NBR_OF_SLOTS - total_assigned);
360}
361
362int crisv32_arbiter_watch(unsigned long start, unsigned long size,
363			  unsigned long clients, unsigned long accesses,
364			  watch_callback *cb)
365{
366	int i;
367	int arbiter;
368	int used[2];
369	int ret = 0;
370
371	crisv32_arbiter_init();
372
373	if (start > 0x80000000) {
374		printk(KERN_ERR "Arbiter: %lX doesn't look like a "
375			"physical address", start);
376		return -EFAULT;
377	}
378
379	spin_lock(&arbiter_lock);
380
381	if (clients & 0xffff)
382		used[0] = 1;
383	if (clients & 0xffff0000)
384		used[1] = 1;
385
386	for (arbiter = 0; arbiter < ARBITERS; arbiter++) {
387		if (!used[arbiter])
388			continue;
389
390		for (i = 0; i < NUMBER_OF_BP; i++) {
391			if (!watches[arbiter][i].used) {
392				unsigned intr_mask;
393				if (arbiter)
394					intr_mask = REG_RD_INT(marb_bar,
395						regi_marb_bar, rw_intr_mask);
396				else
397					intr_mask = REG_RD_INT(marb_foo,
398						regi_marb_foo, rw_intr_mask);
399
400				watches[arbiter][i].used = 1;
401				watches[arbiter][i].start = start;
402				watches[arbiter][i].end = start + size;
403				watches[arbiter][i].cb = cb;
404
405				ret |= (i + 1) << (arbiter + 8);
406				if (arbiter) {
407					REG_WR_INT(marb_bar_bp,
408						watches[arbiter][i].instance,
409						rw_first_addr,
410						watches[arbiter][i].start);
411					REG_WR_INT(marb_bar_bp,
412						watches[arbiter][i].instance,
413						rw_last_addr,
414						watches[arbiter][i].end);
415					REG_WR_INT(marb_bar_bp,
416						watches[arbiter][i].instance,
417						rw_op, accesses);
418					REG_WR_INT(marb_bar_bp,
419						watches[arbiter][i].instance,
420						rw_clients,
421						clients & 0xffff);
422				} else {
423					REG_WR_INT(marb_foo_bp,
424						watches[arbiter][i].instance,
425						rw_first_addr,
426						watches[arbiter][i].start);
427					REG_WR_INT(marb_foo_bp,
428						watches[arbiter][i].instance,
429						rw_last_addr,
430						watches[arbiter][i].end);
431					REG_WR_INT(marb_foo_bp,
432						watches[arbiter][i].instance,
433						rw_op, accesses);
434					REG_WR_INT(marb_foo_bp,
435						watches[arbiter][i].instance,
436						rw_clients, clients >> 16);
437				}
438
439				if (i == 0)
440					intr_mask |= 1;
441				else if (i == 1)
442					intr_mask |= 2;
443				else if (i == 2)
444					intr_mask |= 4;
445				else if (i == 3)
446					intr_mask |= 8;
447
448				if (arbiter)
449					REG_WR_INT(marb_bar, regi_marb_bar,
450						rw_intr_mask, intr_mask);
451				else
452					REG_WR_INT(marb_foo, regi_marb_foo,
453						rw_intr_mask, intr_mask);
454
455				spin_unlock(&arbiter_lock);
456
457				break;
458			}
459		}
460	}
461	spin_unlock(&arbiter_lock);
462	if (ret)
463		return ret;
464	else
465		return -ENOMEM;
466}
467
468int crisv32_arbiter_unwatch(int id)
469{
470	int arbiter;
471	int intr_mask;
472
473	crisv32_arbiter_init();
474
475	spin_lock(&arbiter_lock);
476
477	for (arbiter = 0; arbiter < ARBITERS; arbiter++) {
478		int id2;
479
480		if (arbiter)
481			intr_mask = REG_RD_INT(marb_bar, regi_marb_bar,
482				rw_intr_mask);
483		else
484			intr_mask = REG_RD_INT(marb_foo, regi_marb_foo,
485				rw_intr_mask);
486
487		id2 = (id & (0xff << (arbiter + 8))) >> (arbiter + 8);
488		if (id2 == 0)
489			continue;
490		id2--;
491		if ((id2 >= NUMBER_OF_BP) || (!watches[arbiter][id2].used)) {
492			spin_unlock(&arbiter_lock);
493			return -EINVAL;
494		}
495
496		memset(&watches[arbiter][id2], 0,
497			sizeof(struct crisv32_watch_entry));
498
499		if (id2 == 0)
500			intr_mask &= ~1;
501		else if (id2 == 1)
502			intr_mask &= ~2;
503		else if (id2 == 2)
504			intr_mask &= ~4;
505		else if (id2 == 3)
506			intr_mask &= ~8;
507
508		if (arbiter)
509			REG_WR_INT(marb_bar, regi_marb_bar, rw_intr_mask,
510				intr_mask);
511		else
512			REG_WR_INT(marb_foo, regi_marb_foo, rw_intr_mask,
513				intr_mask);
514	}
515
516	spin_unlock(&arbiter_lock);
517	return 0;
518}
519
520extern void show_registers(struct pt_regs *regs);
521
522
523static irqreturn_t
524crisv32_foo_arbiter_irq(int irq, void *dev_id)
525{
526	reg_marb_foo_r_masked_intr masked_intr =
527		REG_RD(marb_foo, regi_marb_foo, r_masked_intr);
528	reg_marb_foo_bp_r_brk_clients r_clients;
529	reg_marb_foo_bp_r_brk_addr r_addr;
530	reg_marb_foo_bp_r_brk_op r_op;
531	reg_marb_foo_bp_r_brk_first_client r_first;
532	reg_marb_foo_bp_r_brk_size r_size;
533	reg_marb_foo_bp_rw_ack ack = {0};
534	reg_marb_foo_rw_ack_intr ack_intr = {
535		.bp0 = 1, .bp1 = 1, .bp2 = 1, .bp3 = 1
536	};
537	struct crisv32_watch_entry *watch;
538	unsigned arbiter = (unsigned)dev_id;
539
540	masked_intr = REG_RD(marb_foo, regi_marb_foo, r_masked_intr);
541
542	if (masked_intr.bp0)
543		watch = &watches[arbiter][0];
544	else if (masked_intr.bp1)
545		watch = &watches[arbiter][1];
546	else if (masked_intr.bp2)
547		watch = &watches[arbiter][2];
548	else if (masked_intr.bp3)
549		watch = &watches[arbiter][3];
550	else
551		return IRQ_NONE;
552
553	/* Retrieve all useful information and print it. */
554	r_clients = REG_RD(marb_foo_bp, watch->instance, r_brk_clients);
555	r_addr = REG_RD(marb_foo_bp, watch->instance, r_brk_addr);
556	r_op = REG_RD(marb_foo_bp, watch->instance, r_brk_op);
557	r_first = REG_RD(marb_foo_bp, watch->instance, r_brk_first_client);
558	r_size = REG_RD(marb_foo_bp, watch->instance, r_brk_size);
559
560	printk(KERN_DEBUG "Arbiter IRQ\n");
561	printk(KERN_DEBUG "Clients %X addr %X op %X first %X size %X\n",
562	       REG_TYPE_CONV(int, reg_marb_foo_bp_r_brk_clients, r_clients),
563	       REG_TYPE_CONV(int, reg_marb_foo_bp_r_brk_addr, r_addr),
564	       REG_TYPE_CONV(int, reg_marb_foo_bp_r_brk_op, r_op),
565	       REG_TYPE_CONV(int, reg_marb_foo_bp_r_brk_first_client, r_first),
566	       REG_TYPE_CONV(int, reg_marb_foo_bp_r_brk_size, r_size));
567
568	REG_WR(marb_foo_bp, watch->instance, rw_ack, ack);
569	REG_WR(marb_foo, regi_marb_foo, rw_ack_intr, ack_intr);
570
571	printk(KERN_DEBUG "IRQ occurred at %X\n", (unsigned)get_irq_regs());
572
573	if (watch->cb)
574		watch->cb();
575
576	return IRQ_HANDLED;
577}
578
579static irqreturn_t
580crisv32_bar_arbiter_irq(int irq, void *dev_id)
581{
582	reg_marb_bar_r_masked_intr masked_intr =
583		REG_RD(marb_bar, regi_marb_bar, r_masked_intr);
584	reg_marb_bar_bp_r_brk_clients r_clients;
585	reg_marb_bar_bp_r_brk_addr r_addr;
586	reg_marb_bar_bp_r_brk_op r_op;
587	reg_marb_bar_bp_r_brk_first_client r_first;
588	reg_marb_bar_bp_r_brk_size r_size;
589	reg_marb_bar_bp_rw_ack ack = {0};
590	reg_marb_bar_rw_ack_intr ack_intr = {
591		.bp0 = 1, .bp1 = 1, .bp2 = 1, .bp3 = 1
592	};
593	struct crisv32_watch_entry *watch;
594	unsigned arbiter = (unsigned)dev_id;
595
596	masked_intr = REG_RD(marb_bar, regi_marb_bar, r_masked_intr);
597
598	if (masked_intr.bp0)
599		watch = &watches[arbiter][0];
600	else if (masked_intr.bp1)
601		watch = &watches[arbiter][1];
602	else if (masked_intr.bp2)
603		watch = &watches[arbiter][2];
604	else if (masked_intr.bp3)
605		watch = &watches[arbiter][3];
606	else
607		return IRQ_NONE;
608
609	/* Retrieve all useful information and print it. */
610	r_clients = REG_RD(marb_bar_bp, watch->instance, r_brk_clients);
611	r_addr = REG_RD(marb_bar_bp, watch->instance, r_brk_addr);
612	r_op = REG_RD(marb_bar_bp, watch->instance, r_brk_op);
613	r_first = REG_RD(marb_bar_bp, watch->instance, r_brk_first_client);
614	r_size = REG_RD(marb_bar_bp, watch->instance, r_brk_size);
615
616	printk(KERN_DEBUG "Arbiter IRQ\n");
617	printk(KERN_DEBUG "Clients %X addr %X op %X first %X size %X\n",
618	       REG_TYPE_CONV(int, reg_marb_bar_bp_r_brk_clients, r_clients),
619	       REG_TYPE_CONV(int, reg_marb_bar_bp_r_brk_addr, r_addr),
620	       REG_TYPE_CONV(int, reg_marb_bar_bp_r_brk_op, r_op),
621	       REG_TYPE_CONV(int, reg_marb_bar_bp_r_brk_first_client, r_first),
622	       REG_TYPE_CONV(int, reg_marb_bar_bp_r_brk_size, r_size));
623
624	REG_WR(marb_bar_bp, watch->instance, rw_ack, ack);
625	REG_WR(marb_bar, regi_marb_bar, rw_ack_intr, ack_intr);
626
627	printk(KERN_DEBUG "IRQ occurred at %X\n", (unsigned)get_irq_regs()->erp);
628
629	if (watch->cb)
630		watch->cb();
631
632	return IRQ_HANDLED;
633}
634
635