sb_core.cpp revision 3f18dd818f7ce0adab0afe1a4b4db2e242513086
1/*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 *      Vadim Girlin
25 */
26
27#define SB_RA_SCHED_CHECK DEBUG
28
29extern "C" {
30#include "os/os_time.h"
31#include "r600_pipe.h"
32#include "r600_shader.h"
33
34#include "sb_public.h"
35}
36
37#include <stack>
38#include <map>
39#include <iostream>
40
41#include "sb_bc.h"
42#include "sb_shader.h"
43#include "sb_pass.h"
44#include "sb_sched.h"
45
46using namespace r600_sb;
47
48using std::cerr;
49
50static sb_hw_class translate_chip_class(enum chip_class cc);
51static sb_hw_chip translate_chip(enum radeon_family rf);
52
53sb_context *r600_sb_context_create(struct r600_context *rctx) {
54
55	sb_context *sctx = new sb_context();
56
57	if (sctx->init(rctx->isa, translate_chip(rctx->family),
58			translate_chip_class(rctx->chip_class))) {
59		delete sctx;
60		sctx = NULL;
61	}
62
63	unsigned df = rctx->screen->debug_flags;
64
65	sb_context::dump_pass = df & DBG_SB_DUMP;
66	sb_context::dump_stat = df & DBG_SB_STAT;
67	sb_context::dry_run = df & DBG_SB_DRY_RUN;
68	sb_context::no_fallback = df & DBG_SB_NO_FALLBACK;
69
70	sb_context::dskip_start = debug_get_num_option("R600_SB_DSKIP_START", 0);
71	sb_context::dskip_end = debug_get_num_option("R600_SB_DSKIP_END", 0);
72	sb_context::dskip_mode = debug_get_num_option("R600_SB_DSKIP_MODE", 0);
73
74	return sctx;
75}
76
77void r600_sb_context_destroy(void * sctx) {
78	if (sctx) {
79		sb_context *ctx = static_cast<sb_context*>(sctx);
80
81		if (sb_context::dump_stat) {
82			cerr << "context src stats: ";
83			ctx->src_stats.dump(cerr);
84			cerr << "context opt stats: ";
85			ctx->opt_stats.dump(cerr);
86			cerr << "context diff: ";
87			ctx->src_stats.dump_diff(cerr, ctx->opt_stats);
88		}
89
90		delete ctx;
91	}
92}
93
94int r600_sb_bytecode_process(struct r600_context *rctx,
95                             struct r600_bytecode *bc,
96                             struct r600_shader *pshader,
97                             int dump_source_bytecode,
98                             int optimize) {
99	int r = 0;
100	unsigned shader_id = bc->debug_id;
101
102	sb_context *ctx = (sb_context *)rctx->sb_context;
103	if (!ctx) {
104		rctx->sb_context = ctx = r600_sb_context_create(rctx);
105	}
106
107	int64_t time_start = 0;
108	if (sb_context::dump_stat) {
109		time_start = os_time_get_nano();
110	}
111
112	SB_DUMP_STAT( cerr << "\nsb: shader " << shader_id << "\n"; );
113
114	bc_parser parser(*ctx, bc, pshader, dump_source_bytecode, optimize);
115
116	if ((r = parser.parse())) {
117		assert(0);
118		return r;
119	}
120
121	/* skip some shaders (use shaders from default backend)
122	 * dskip_start - range start, dskip_end - range_end,
123	 * e.g. start = 5, end = 6 means shaders 5 & 6
124	 *
125	 * dskip_mode == 0 - disabled,
126	 * dskip_mode == 1 - don't process the shaders from the [start;end] range
127	 * dskip_mode == 2 - process only the shaders from the range
128	 */
129	if (sb_context::dskip_mode) {
130		if ((sb_context::dskip_start <= shader_id &&
131				shader_id <= sb_context::dskip_end) ==
132						(sb_context::dskip_mode == 1)) {
133			cerr << "sb: skipped shader " << shader_id << " : " << "["
134					<< sb_context::dskip_start << "; "
135					<< sb_context::dskip_end << "] mode "
136					<< sb_context::dskip_mode << "\n";
137			return 0;
138		}
139	}
140
141	shader *sh = parser.get_shader();
142	SB_DUMP_PASS( cerr << "\n\n###### after parse\n"; sh->dump_ir(); );
143
144	if (!optimize) {
145		delete sh;
146		return 0;
147	}
148
149#define SB_RUN_PASS(n, dump) \
150	do { \
151		r = n(*sh).run(); \
152		if (r) { \
153			cerr << "sb: error (" << r << ") in the " << #n << " pass.\n"; \
154			if (sb_context::no_fallback) \
155				return r; \
156			cerr << "sb: using unoptimized bytecode...\n"; \
157			delete sh; \
158			return 0; \
159		} \
160		if (dump) { \
161			SB_DUMP_PASS( cerr << "\n\n###### after " << #n << "\n"; \
162				sh->dump_ir();); \
163		} \
164		assert(!r); \
165	} while (0)
166
167	SB_RUN_PASS(ssa_prepare,		0);
168	SB_RUN_PASS(ssa_rename,			1);
169
170	if (sh->has_alu_predication)
171		SB_RUN_PASS(psi_ops,		1);
172
173	SB_RUN_PASS(liveness,			0);
174	SB_RUN_PASS(dce_cleanup,		0);
175	SB_RUN_PASS(def_use,			0);
176
177	sh->set_undef(sh->root->live_before);
178
179	SB_RUN_PASS(peephole,			1);
180	SB_RUN_PASS(if_conversion,		1);
181
182	SB_RUN_PASS(def_use,			0);
183
184	SB_RUN_PASS(gvn,				1);
185
186	SB_RUN_PASS(liveness,			0);
187	SB_RUN_PASS(dce_cleanup,		1);
188	SB_RUN_PASS(def_use,			0);
189
190	SB_RUN_PASS(liveness,			0);
191	SB_RUN_PASS(dce_cleanup,		0);
192
193	SB_RUN_PASS(ra_split,			0);
194	SB_RUN_PASS(def_use,			0);
195
196	// create 'basic blocks'. it's not like we build CFG, they are just
197	// container nodes in the correct locations for code placement
198	sh->create_bbs();
199
200	SB_RUN_PASS(gcm,				1);
201
202	sh->compute_interferences = true;
203	SB_RUN_PASS(liveness,			0);
204
205	SB_RUN_PASS(ra_coalesce,		1);
206	SB_RUN_PASS(ra_init,			1);
207
208	SB_RUN_PASS(post_scheduler,		1);
209
210	sh->expand_bbs();
211
212#if SB_RA_SCHED_CHECK
213	// check code correctness after regalloc/scheduler
214	SB_RUN_PASS(ra_checker,			0);
215#endif
216
217	SB_RUN_PASS(bc_finalizer,		0);
218
219	sh->optimized = true;
220
221	bc_builder builder(*sh);
222
223	if ((r = builder.build())) {
224		assert(0);
225		return r;
226	}
227
228	if (!sb_context::dry_run) {
229		bytecode &nbc = builder.get_bytecode();
230
231		free(bc->bytecode);
232		bc->ndw = nbc.ndw();
233		bc->bytecode = (uint32_t*) malloc(bc->ndw << 2);
234		nbc.write_data(bc->bytecode);
235
236		bc->ngpr = sh->ngpr;
237		bc->nstack = sh->nstack;
238	} else {
239		SB_DUMP_STAT( cerr << "SB_USE_NEW_BYTECODE is not enabled\n"; );
240	}
241
242
243	if (sb_context::dump_stat) {
244		int64_t t = os_time_get_nano() - time_start;
245
246		cerr << "sb: processing shader " << shader_id << " done ( "
247				<< ((double)t)/1000000.0 << " ms ).\n";
248
249		sh->opt_stats.ndw = bc->ndw;
250		sh->collect_stats(true);
251
252		cerr << "src stats: ";
253		sh->src_stats.dump(cerr);
254		cerr << "opt stats: ";
255		sh->opt_stats.dump(cerr);
256		cerr << "diff: ";
257		sh->src_stats.dump_diff(cerr, sh->opt_stats);
258	}
259
260	delete sh;
261	return 0;
262}
263
264static sb_hw_chip translate_chip(enum radeon_family rf) {
265	switch (rf) {
266
267#define TRANSLATE_CHIP(c) case CHIP_##c: return HW_CHIP_##c
268		TRANSLATE_CHIP(R600);
269		TRANSLATE_CHIP(RV610);
270		TRANSLATE_CHIP(RV630);
271		TRANSLATE_CHIP(RV670);
272		TRANSLATE_CHIP(RV620);
273		TRANSLATE_CHIP(RV635);
274		TRANSLATE_CHIP(RS780);
275		TRANSLATE_CHIP(RS880);
276		TRANSLATE_CHIP(RV770);
277		TRANSLATE_CHIP(RV730);
278		TRANSLATE_CHIP(RV710);
279		TRANSLATE_CHIP(RV740);
280		TRANSLATE_CHIP(CEDAR);
281		TRANSLATE_CHIP(REDWOOD);
282		TRANSLATE_CHIP(JUNIPER);
283		TRANSLATE_CHIP(CYPRESS);
284		TRANSLATE_CHIP(HEMLOCK);
285		TRANSLATE_CHIP(PALM);
286		TRANSLATE_CHIP(SUMO);
287		TRANSLATE_CHIP(SUMO2);
288		TRANSLATE_CHIP(BARTS);
289		TRANSLATE_CHIP(TURKS);
290		TRANSLATE_CHIP(CAICOS);
291		TRANSLATE_CHIP(CAYMAN);
292#undef TRANSLATE_CHIP
293
294		default:
295			assert(!"unknown chip");
296			return HW_CHIP_UNKNOWN;
297	}
298}
299
300static sb_hw_class translate_chip_class(enum chip_class cc) {
301	switch(cc) {
302		case R600: return HW_CLASS_R600;
303		case R700: return HW_CLASS_R700;
304		case EVERGREEN: return HW_CLASS_EVERGREEN;
305		case CAYMAN: return HW_CLASS_CAYMAN;
306
307		default:
308			assert(!"unknown chip class");
309			return HW_CLASS_UNKNOWN;
310	}
311}
312