1/*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 *      Vadim Girlin
25 */
26
27#define FBC_DEBUG 0
28
29#if FBC_DEBUG
30#define FBC_DUMP(q) do { q } while (0)
31#else
32#define FBC_DUMP(q)
33#endif
34
35#include "sb_bc.h"
36#include "sb_shader.h"
37#include "sb_pass.h"
38
39namespace r600_sb {
40
41void bc_finalizer::insert_rv6xx_load_ar_workaround(alu_group_node *b4) {
42
43	alu_group_node *g = sh.create_alu_group();
44	alu_node *a = sh.create_alu();
45
46	a->bc.set_op(ALU_OP0_NOP);
47	a->bc.last = 1;
48
49	g->push_back(a);
50	b4->insert_before(g);
51}
52
53int bc_finalizer::run() {
54
55	run_on(sh.root);
56
57	regions_vec &rv = sh.get_regions();
58	for (regions_vec::reverse_iterator I = rv.rbegin(), E = rv.rend(); I != E;
59			++I) {
60		region_node *r = *I;
61
62		assert(r);
63
64		bool loop = r->is_loop();
65
66		if (loop)
67			finalize_loop(r);
68		else
69			finalize_if(r);
70
71		r->expand();
72	}
73
74	cf_peephole();
75
76	// workaround for some problems on r6xx/7xx
77	// add ALU NOP to each vertex shader
78	if (!ctx.is_egcm() && (sh.target == TARGET_VS || sh.target == TARGET_ES)) {
79		cf_node *c = sh.create_clause(NST_ALU_CLAUSE);
80
81		alu_group_node *g = sh.create_alu_group();
82
83		alu_node *a = sh.create_alu();
84		a->bc.set_op(ALU_OP0_NOP);
85		a->bc.last = 1;
86
87		g->push_back(a);
88		c->push_back(g);
89
90		sh.root->push_back(c);
91
92		c = sh.create_cf(CF_OP_NOP);
93		sh.root->push_back(c);
94
95		last_cf = c;
96	}
97
98	if (!ctx.is_cayman() && last_cf->bc.op_ptr->flags & CF_ALU) {
99		last_cf = sh.create_cf(CF_OP_NOP);
100		sh.root->push_back(last_cf);
101	}
102
103	if (ctx.is_cayman()) {
104		if (!last_cf) {
105			cf_node *c = sh.create_cf(CF_OP_CF_END);
106			sh.root->push_back(c);
107		} else
108			last_cf->insert_after(sh.create_cf(CF_OP_CF_END));
109	} else
110		last_cf->bc.end_of_program = 1;
111
112	for (unsigned t = EXP_PIXEL; t < EXP_TYPE_COUNT; ++t) {
113		cf_node *le = last_export[t];
114		if (le)
115			le->bc.set_op(CF_OP_EXPORT_DONE);
116	}
117
118	sh.ngpr = ngpr;
119	sh.nstack = nstack;
120	return 0;
121}
122
123void bc_finalizer::finalize_loop(region_node* r) {
124
125	update_nstack(r);
126
127	cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10);
128	cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END);
129
130	// Update last_cf, but don't overwrite it if it's outside the current loop nest since
131	// it may point to a cf that is later in program order.
132	// The single parent level check is sufficient since finalize_loop() is processed in
133	// reverse order from innermost to outermost loop nest level.
134	if (!last_cf || last_cf->get_parent_region() == r) {
135		last_cf = loop_end;
136	}
137
138	loop_start->jump_after(loop_end);
139	loop_end->jump_after(loop_start);
140
141	for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
142			I != E; ++I) {
143		depart_node *dep = *I;
144		cf_node *loop_break = sh.create_cf(CF_OP_LOOP_BREAK);
145		loop_break->jump(loop_end);
146		dep->push_back(loop_break);
147		dep->expand();
148	}
149
150	// FIXME produces unnecessary LOOP_CONTINUE
151	for (repeat_vec::iterator I = r->repeats.begin(), E = r->repeats.end();
152			I != E; ++I) {
153		repeat_node *rep = *I;
154		if (!(rep->parent == r && rep->prev == NULL)) {
155			cf_node *loop_cont = sh.create_cf(CF_OP_LOOP_CONTINUE);
156			loop_cont->jump(loop_end);
157			rep->push_back(loop_cont);
158		}
159		rep->expand();
160	}
161
162	r->push_front(loop_start);
163	r->push_back(loop_end);
164}
165
166void bc_finalizer::finalize_if(region_node* r) {
167
168	update_nstack(r);
169
170	// expecting the following control flow structure here:
171	//   - region
172	//     {
173	//       - depart/repeat 1 (it may be depart/repeat for some outer region)
174	//         {
175	//           - if
176	//             {
177	//               - depart/repeat 2 (possibly for outer region)
178	//                 {
179	//                   - some optional code
180	//                 }
181	//             }
182	//           - optional <else> code> ...
183	//         }
184	//     }
185
186	container_node *repdep1 = static_cast<container_node*>(r->first);
187	assert(repdep1->is_depart() || repdep1->is_repeat());
188
189	if_node *n_if = static_cast<if_node*>(repdep1->first);
190
191	if (n_if) {
192
193
194		assert(n_if->is_if());
195
196		container_node *repdep2 = static_cast<container_node*>(n_if->first);
197		assert(repdep2->is_depart() || repdep2->is_repeat());
198
199		cf_node *if_jump = sh.create_cf(CF_OP_JUMP);
200		cf_node *if_pop = sh.create_cf(CF_OP_POP);
201
202		if (!last_cf || last_cf->get_parent_region() == r) {
203			last_cf = if_pop;
204		}
205		if_pop->bc.pop_count = 1;
206		if_pop->jump_after(if_pop);
207
208		r->push_front(if_jump);
209		r->push_back(if_pop);
210
211		bool has_else = n_if->next;
212
213		if (has_else) {
214			cf_node *nelse = sh.create_cf(CF_OP_ELSE);
215			n_if->insert_after(nelse);
216			if_jump->jump(nelse);
217			nelse->jump_after(if_pop);
218			nelse->bc.pop_count = 1;
219
220		} else {
221			if_jump->jump_after(if_pop);
222			if_jump->bc.pop_count = 1;
223		}
224
225		n_if->expand();
226	}
227
228	for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
229			I != E; ++I) {
230		(*I)->expand();
231	}
232	r->departs.clear();
233	assert(r->repeats.empty());
234}
235
236void bc_finalizer::run_on(container_node* c) {
237	node *prev_node = NULL;
238	for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
239		node *n = *I;
240
241		if (n->is_alu_group()) {
242			finalize_alu_group(static_cast<alu_group_node*>(n), prev_node);
243		} else {
244			if (n->is_alu_clause()) {
245				cf_node *c = static_cast<cf_node*>(n);
246
247				if (c->bc.op == CF_OP_ALU_PUSH_BEFORE && ctx.is_egcm()) {
248					if (ctx.stack_workaround_8xx) {
249						region_node *r = c->get_parent_region();
250						if (r) {
251							unsigned ifs, loops;
252							unsigned elems = get_stack_depth(r, loops, ifs);
253							unsigned dmod1 = elems % ctx.stack_entry_size;
254							unsigned dmod2 = (elems + 1) % ctx.stack_entry_size;
255
256							if (elems && (!dmod1 || !dmod2))
257								c->flags |= NF_ALU_STACK_WORKAROUND;
258						}
259					} else if (ctx.stack_workaround_9xx) {
260						region_node *r = c->get_parent_region();
261						if (r) {
262							unsigned ifs, loops;
263							get_stack_depth(r, loops, ifs);
264							if (loops >= 2)
265								c->flags |= NF_ALU_STACK_WORKAROUND;
266						}
267					}
268				}
269			} else if (n->is_fetch_inst()) {
270				finalize_fetch(static_cast<fetch_node*>(n));
271			} else if (n->is_cf_inst()) {
272				finalize_cf(static_cast<cf_node*>(n));
273			}
274			if (n->is_container())
275				run_on(static_cast<container_node*>(n));
276		}
277		prev_node = n;
278	}
279}
280
281void bc_finalizer::finalize_alu_group(alu_group_node* g, node *prev_node) {
282
283	alu_node *last = NULL;
284	alu_group_node *prev_g = NULL;
285	bool add_nop = false;
286	if (prev_node && prev_node->is_alu_group()) {
287		prev_g = static_cast<alu_group_node*>(prev_node);
288	}
289
290	for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
291		alu_node *n = static_cast<alu_node*>(*I);
292		unsigned slot = n->bc.slot;
293		value *d = n->dst.empty() ? NULL : n->dst[0];
294
295		if (d && d->is_special_reg()) {
296			assert((n->bc.op_ptr->flags & AF_MOVA) || d->is_geometry_emit());
297			d = NULL;
298		}
299
300		sel_chan fdst = d ? d->get_final_gpr() : sel_chan(0, 0);
301
302		if (d) {
303			assert(fdst.chan() == slot || slot == SLOT_TRANS);
304		}
305
306		if (!(n->bc.op_ptr->flags & AF_MOVA && ctx.is_cayman()))
307			n->bc.dst_gpr = fdst.sel();
308		n->bc.dst_chan = d ? fdst.chan() : slot < SLOT_TRANS ? slot : 0;
309
310
311		if (d && d->is_rel() && d->rel && !d->rel->is_const()) {
312			n->bc.dst_rel = 1;
313			update_ngpr(d->array->gpr.sel() + d->array->array_size -1);
314		} else {
315			n->bc.dst_rel = 0;
316		}
317
318		n->bc.write_mask = d != NULL;
319		n->bc.last = 0;
320
321		if (n->bc.op_ptr->flags & AF_PRED) {
322			n->bc.update_pred = (n->dst[1] != NULL);
323			n->bc.update_exec_mask = (n->dst[2] != NULL);
324		}
325
326		// FIXME handle predication here
327		n->bc.pred_sel = PRED_SEL_OFF;
328
329		update_ngpr(n->bc.dst_gpr);
330
331		add_nop |= finalize_alu_src(g, n, prev_g);
332
333		last = n;
334	}
335
336	if (add_nop) {
337		if (sh.get_ctx().r6xx_gpr_index_workaround) {
338			insert_rv6xx_load_ar_workaround(g);
339		}
340	}
341	last->bc.last = 1;
342}
343
344bool bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a, alu_group_node *prev) {
345	vvec &sv = a->src;
346	bool add_nop = false;
347	FBC_DUMP(
348		sblog << "finalize_alu_src: ";
349		dump::dump_op(a);
350		sblog << "\n";
351	);
352
353	unsigned si = 0;
354
355	for (vvec::iterator I = sv.begin(), E = sv.end(); I != E; ++I, ++si) {
356		value *v = *I;
357		assert(v);
358
359		bc_alu_src &src = a->bc.src[si];
360		sel_chan sc;
361		src.rel = 0;
362
363		sel_chan gpr;
364
365		switch (v->kind) {
366		case VLK_REL_REG:
367			sc = v->get_final_gpr();
368			src.sel = sc.sel();
369			src.chan = sc.chan();
370			if (!v->rel->is_const()) {
371				src.rel = 1;
372				update_ngpr(v->array->gpr.sel() + v->array->array_size -1);
373				if (prev && !add_nop) {
374					for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
375						alu_node *pn = static_cast<alu_node*>(*pI);
376						if (pn->bc.dst_gpr == src.sel) {
377							add_nop = true;
378							break;
379						}
380					}
381				}
382			} else
383				src.rel = 0;
384
385			break;
386		case VLK_REG:
387			gpr = v->get_final_gpr();
388			src.sel = gpr.sel();
389			src.chan = gpr.chan();
390			update_ngpr(src.sel);
391			break;
392		case VLK_TEMP:
393			src.sel = v->gpr.sel();
394			src.chan = v->gpr.chan();
395			update_ngpr(src.sel);
396			break;
397		case VLK_UNDEF:
398		case VLK_CONST: {
399			literal lv = v->literal_value;
400			src.chan = 0;
401
402			if (lv == literal(0))
403				src.sel = ALU_SRC_0;
404			else if (lv == literal(0.5f))
405				src.sel = ALU_SRC_0_5;
406			else if (lv == literal(1.0f))
407				src.sel = ALU_SRC_1;
408			else if (lv == literal(1))
409				src.sel = ALU_SRC_1_INT;
410			else if (lv == literal(-1))
411				src.sel = ALU_SRC_M_1_INT;
412			else {
413				src.sel = ALU_SRC_LITERAL;
414				src.chan = g->literal_chan(lv);
415				src.value = lv;
416			}
417			break;
418		}
419		case VLK_KCACHE: {
420			cf_node *clause = static_cast<cf_node*>(g->parent);
421			assert(clause->is_alu_clause());
422			sel_chan k = translate_kcache(clause, v);
423
424			assert(k && "kcache translation failed");
425
426			src.sel = k.sel();
427			src.chan = k.chan();
428			break;
429		}
430		case VLK_PARAM:
431		case VLK_SPECIAL_CONST:
432			src.sel = v->select.sel();
433			src.chan = v->select.chan();
434			break;
435		default:
436			assert(!"unknown value kind");
437			break;
438		}
439		if (prev && !add_nop) {
440			for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
441				alu_node *pn = static_cast<alu_node*>(*pI);
442				if (pn->bc.dst_rel) {
443					if (pn->bc.dst_gpr == src.sel) {
444						add_nop = true;
445						break;
446					}
447				}
448			}
449		}
450	}
451
452	while (si < 3) {
453		a->bc.src[si++].sel = 0;
454	}
455	return add_nop;
456}
457
458void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start)
459{
460	int reg = -1;
461
462	for (unsigned chan = 0; chan < 4; ++chan) {
463
464		dst.bc.dst_sel[chan] = SEL_MASK;
465
466		unsigned sel = SEL_MASK;
467
468		value *v = src.src[arg_start + chan];
469
470		if (!v || v->is_undef()) {
471			sel = SEL_MASK;
472		} else if (v->is_const()) {
473			literal l = v->literal_value;
474			if (l == literal(0))
475				sel = SEL_0;
476			else if (l == literal(1.0f))
477				sel = SEL_1;
478			else {
479				sblog << "invalid fetch constant operand  " << chan << " ";
480				dump::dump_op(&src);
481				sblog << "\n";
482				abort();
483			}
484
485		} else if (v->is_any_gpr()) {
486			unsigned vreg = v->gpr.sel();
487			unsigned vchan = v->gpr.chan();
488
489			if (reg == -1)
490				reg = vreg;
491			else if ((unsigned)reg != vreg) {
492				sblog << "invalid fetch source operand  " << chan << " ";
493				dump::dump_op(&src);
494				sblog << "\n";
495				abort();
496			}
497
498			sel = vchan;
499
500		} else {
501			sblog << "invalid fetch source operand  " << chan << " ";
502			dump::dump_op(&src);
503			sblog << "\n";
504			abort();
505		}
506
507		dst.bc.src_sel[chan] = sel;
508	}
509
510	if (reg >= 0)
511		update_ngpr(reg);
512
513	dst.bc.src_gpr = reg >= 0 ? reg : 0;
514}
515
516void bc_finalizer::emit_set_grad(fetch_node* f) {
517
518	assert(f->src.size() == 12 || f->src.size() == 13);
519	unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H };
520
521	unsigned arg_start = 0;
522
523	for (unsigned op = 0; op < 2; ++op) {
524		fetch_node *n = sh.create_fetch();
525		n->bc.set_op(ops[op]);
526
527		arg_start += 4;
528
529		copy_fetch_src(*n, *f, arg_start);
530
531		f->insert_before(n);
532	}
533
534}
535
536void bc_finalizer::emit_set_texture_offsets(fetch_node &f) {
537	assert(f.src.size() == 8);
538
539	fetch_node *n = sh.create_fetch();
540
541	n->bc.set_op(FETCH_OP_SET_TEXTURE_OFFSETS);
542
543	copy_fetch_src(*n, f, 4);
544
545	f.insert_before(n);
546}
547
548void bc_finalizer::finalize_fetch(fetch_node* f) {
549
550	int reg = -1;
551
552	// src
553
554	unsigned src_count = 4;
555
556	unsigned flags = f->bc.op_ptr->flags;
557
558	if (flags & FF_VTX) {
559		src_count = 1;
560	} else if (flags & FF_USEGRAD) {
561		emit_set_grad(f);
562	} else if (flags & FF_USE_TEXTURE_OFFSETS) {
563		emit_set_texture_offsets(*f);
564	}
565
566	for (unsigned chan = 0; chan < src_count; ++chan) {
567
568		unsigned sel = f->bc.src_sel[chan];
569
570		if (sel > SEL_W)
571			continue;
572
573		value *v = f->src[chan];
574
575		if (v->is_undef()) {
576			sel = SEL_MASK;
577		} else if (v->is_const()) {
578			literal l = v->literal_value;
579			if (l == literal(0))
580				sel = SEL_0;
581			else if (l == literal(1.0f))
582				sel = SEL_1;
583			else {
584				sblog << "invalid fetch constant operand  " << chan << " ";
585				dump::dump_op(f);
586				sblog << "\n";
587				abort();
588			}
589
590		} else if (v->is_any_gpr()) {
591			unsigned vreg = v->gpr.sel();
592			unsigned vchan = v->gpr.chan();
593
594			if (reg == -1)
595				reg = vreg;
596			else if ((unsigned)reg != vreg) {
597				sblog << "invalid fetch source operand  " << chan << " ";
598				dump::dump_op(f);
599				sblog << "\n";
600				abort();
601			}
602
603			sel = vchan;
604
605		} else {
606			sblog << "invalid fetch source operand  " << chan << " ";
607			dump::dump_op(f);
608			sblog << "\n";
609			abort();
610		}
611
612		f->bc.src_sel[chan] = sel;
613	}
614
615	if (reg >= 0)
616		update_ngpr(reg);
617
618	f->bc.src_gpr = reg >= 0 ? reg : 0;
619
620	// dst
621
622	reg = -1;
623
624	unsigned dst_swz[4] = {SEL_MASK, SEL_MASK, SEL_MASK, SEL_MASK};
625
626	for (unsigned chan = 0; chan < 4; ++chan) {
627
628		unsigned sel = f->bc.dst_sel[chan];
629
630		if (sel == SEL_MASK)
631			continue;
632
633		value *v = f->dst[chan];
634		if (!v)
635			continue;
636
637		if (v->is_any_gpr()) {
638			unsigned vreg = v->gpr.sel();
639			unsigned vchan = v->gpr.chan();
640
641			if (reg == -1)
642				reg = vreg;
643			else if ((unsigned)reg != vreg) {
644				sblog << "invalid fetch dst operand  " << chan << " ";
645				dump::dump_op(f);
646				sblog << "\n";
647				abort();
648			}
649
650			dst_swz[vchan] = sel;
651
652		} else {
653			sblog << "invalid fetch dst operand  " << chan << " ";
654			dump::dump_op(f);
655			sblog << "\n";
656			abort();
657		}
658
659	}
660
661	for (unsigned i = 0; i < 4; ++i)
662		f->bc.dst_sel[i] = dst_swz[i];
663
664	assert(reg >= 0);
665
666	if (reg >= 0)
667		update_ngpr(reg);
668
669	f->bc.dst_gpr = reg >= 0 ? reg : 0;
670}
671
672void bc_finalizer::finalize_cf(cf_node* c) {
673
674	unsigned flags = c->bc.op_ptr->flags;
675
676	c->bc.end_of_program = 0;
677	last_cf = c;
678
679	if (flags & CF_EXP) {
680		c->bc.set_op(CF_OP_EXPORT);
681		last_export[c->bc.type] = c;
682
683		int reg = -1;
684
685		for (unsigned chan = 0; chan < 4; ++chan) {
686
687			unsigned sel = c->bc.sel[chan];
688
689			if (sel > SEL_W)
690				continue;
691
692			value *v = c->src[chan];
693
694			if (v->is_undef()) {
695				sel = SEL_MASK;
696			} else if (v->is_const()) {
697				literal l = v->literal_value;
698				if (l == literal(0))
699					sel = SEL_0;
700				else if (l == literal(1.0f))
701					sel = SEL_1;
702				else {
703					sblog << "invalid export constant operand  " << chan << " ";
704					dump::dump_op(c);
705					sblog << "\n";
706					abort();
707				}
708
709			} else if (v->is_any_gpr()) {
710				unsigned vreg = v->gpr.sel();
711				unsigned vchan = v->gpr.chan();
712
713				if (reg == -1)
714					reg = vreg;
715				else if ((unsigned)reg != vreg) {
716					sblog << "invalid export source operand  " << chan << " ";
717					dump::dump_op(c);
718					sblog << "\n";
719					abort();
720				}
721
722				sel = vchan;
723
724			} else {
725				sblog << "invalid export source operand  " << chan << " ";
726				dump::dump_op(c);
727				sblog << "\n";
728				abort();
729			}
730
731			c->bc.sel[chan] = sel;
732		}
733
734		if (reg >= 0)
735			update_ngpr(reg);
736
737		c->bc.rw_gpr = reg >= 0 ? reg : 0;
738
739	} else if (flags & CF_MEM) {
740
741		int reg = -1;
742		unsigned mask = 0;
743
744		for (unsigned chan = 0; chan < 4; ++chan) {
745			value *v = c->src[chan];
746			if (!v || v->is_undef())
747				continue;
748
749			if (!v->is_any_gpr() || v->gpr.chan() != chan) {
750				sblog << "invalid source operand  " << chan << " ";
751				dump::dump_op(c);
752				sblog << "\n";
753				abort();
754			}
755			unsigned vreg = v->gpr.sel();
756			if (reg == -1)
757				reg = vreg;
758			else if ((unsigned)reg != vreg) {
759				sblog << "invalid source operand  " << chan << " ";
760				dump::dump_op(c);
761				sblog << "\n";
762				abort();
763			}
764
765			mask |= (1 << chan);
766		}
767
768		if (reg >= 0)
769			update_ngpr(reg);
770
771		c->bc.rw_gpr = reg >= 0 ? reg : 0;
772		c->bc.comp_mask = mask;
773
774		if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) {
775
776			reg = -1;
777
778			for (unsigned chan = 0; chan < 4; ++chan) {
779				value *v = c->src[4 + chan];
780				if (!v || v->is_undef())
781					continue;
782
783				if (!v->is_any_gpr() || v->gpr.chan() != chan) {
784					sblog << "invalid source operand  " << chan << " ";
785					dump::dump_op(c);
786					sblog << "\n";
787					abort();
788				}
789				unsigned vreg = v->gpr.sel();
790				if (reg == -1)
791					reg = vreg;
792				else if ((unsigned)reg != vreg) {
793					sblog << "invalid source operand  " << chan << " ";
794					dump::dump_op(c);
795					sblog << "\n";
796					abort();
797				}
798			}
799
800			assert(reg >= 0);
801
802			if (reg >= 0)
803				update_ngpr(reg);
804
805			c->bc.index_gpr = reg >= 0 ? reg : 0;
806		}
807	} else if (flags & CF_CALL) {
808		update_nstack(c->get_parent_region(), ctx.wavefront_size == 16 ? 2 : 1);
809	}
810}
811
812sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) {
813	unsigned sel = v->select.kcache_sel();
814	unsigned bank = v->select.kcache_bank();
815	unsigned chan = v->select.chan();
816	static const unsigned kc_base[] = {128, 160, 256, 288};
817
818	sel &= 4095;
819
820	unsigned line = sel >> 4;
821
822	for (unsigned k = 0; k < 4; ++k) {
823		bc_kcache &kc = alu->bc.kc[k];
824
825		if (kc.mode == KC_LOCK_NONE)
826			break;
827
828		if (kc.bank == bank && (kc.addr == line ||
829				(kc.mode == KC_LOCK_2 && kc.addr + 1 == line))) {
830
831			sel = kc_base[k] + (sel - (kc.addr << 4));
832
833			return sel_chan(sel, chan);
834		}
835	}
836
837	assert(!"kcache translation error");
838	return 0;
839}
840
841void bc_finalizer::update_ngpr(unsigned gpr) {
842	if (gpr < MAX_GPR - ctx.alu_temp_gprs && gpr >= ngpr)
843		ngpr = gpr + 1;
844}
845
846unsigned bc_finalizer::get_stack_depth(node *n, unsigned &loops,
847                                           unsigned &ifs, unsigned add) {
848	unsigned stack_elements = add;
849	bool has_non_wqm_push = (add != 0);
850	region_node *r = n->is_region() ?
851			static_cast<region_node*>(n) : n->get_parent_region();
852
853	loops = 0;
854	ifs = 0;
855
856	while (r) {
857		if (r->is_loop()) {
858			++loops;
859		} else {
860			++ifs;
861			has_non_wqm_push = true;
862		}
863		r = r->get_parent_region();
864	}
865	stack_elements += (loops * ctx.stack_entry_size) + ifs;
866
867	// reserve additional elements in some cases
868	switch (ctx.hw_class) {
869	case HW_CLASS_R600:
870	case HW_CLASS_R700:
871		// If any non-WQM push is invoked, 2 elements should be reserved.
872		if (has_non_wqm_push)
873			stack_elements += 2;
874		break;
875	case HW_CLASS_CAYMAN:
876		// If any stack operation is invoked, 2 elements should be reserved
877		if (stack_elements)
878			stack_elements += 2;
879		break;
880	case HW_CLASS_EVERGREEN:
881		// According to the docs we need to reserve 1 element for each of the
882		// following cases:
883		//   1) non-WQM push is used with WQM/LOOP frames on stack
884		//   2) ALU_ELSE_AFTER is used at the point of max stack usage
885		// NOTE:
886		// It was found that the conditions above are not sufficient, there are
887		// other cases where we also need to reserve stack space, that's why
888		// we always reserve 1 stack element if we have non-WQM push on stack.
889		// Condition 2 is ignored for now because we don't use this instruction.
890		if (has_non_wqm_push)
891			++stack_elements;
892		break;
893	case HW_CLASS_UNKNOWN:
894		assert(0);
895	}
896	return stack_elements;
897}
898
899void bc_finalizer::update_nstack(region_node* r, unsigned add) {
900	unsigned loops = 0;
901	unsigned ifs = 0;
902	unsigned elems = r ? get_stack_depth(r, loops, ifs, add) : add;
903
904	// XXX all chips expect this value to be computed using 4 as entry size,
905	// not the real entry size
906	unsigned stack_entries = (elems + 3) >> 2;
907
908	if (nstack < stack_entries)
909		nstack = stack_entries;
910}
911
912void bc_finalizer::cf_peephole() {
913	if (ctx.stack_workaround_8xx || ctx.stack_workaround_9xx) {
914		for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
915				I = N) {
916			N = I; ++N;
917			cf_node *c = static_cast<cf_node*>(*I);
918
919			if (c->bc.op == CF_OP_ALU_PUSH_BEFORE &&
920					(c->flags & NF_ALU_STACK_WORKAROUND)) {
921				cf_node *push = sh.create_cf(CF_OP_PUSH);
922				c->insert_before(push);
923				push->jump(c);
924				c->bc.set_op(CF_OP_ALU);
925			}
926		}
927	}
928
929	for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
930			I = N) {
931		N = I; ++N;
932
933		cf_node *c = static_cast<cf_node*>(*I);
934
935		if (c->jump_after_target) {
936			c->jump_target = static_cast<cf_node*>(c->jump_target->next);
937			c->jump_after_target = false;
938		}
939
940		if (c->is_cf_op(CF_OP_POP)) {
941			node *p = c->prev;
942			if (p->is_alu_clause()) {
943				cf_node *a = static_cast<cf_node*>(p);
944
945				if (a->bc.op == CF_OP_ALU) {
946					a->bc.set_op(CF_OP_ALU_POP_AFTER);
947					c->remove();
948				}
949			}
950		} else if (c->is_cf_op(CF_OP_JUMP) && c->jump_target == c->next) {
951			// if JUMP is immediately followed by its jump target,
952			// then JUMP is useless and we can eliminate it
953			c->remove();
954		}
955	}
956}
957
958} // namespace r600_sb
959