sb_core.cpp revision 3f18dd818f7ce0adab0afe1a4b4db2e242513086
1/* 2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Vadim Girlin 25 */ 26 27#define SB_RA_SCHED_CHECK DEBUG 28 29extern "C" { 30#include "os/os_time.h" 31#include "r600_pipe.h" 32#include "r600_shader.h" 33 34#include "sb_public.h" 35} 36 37#include <stack> 38#include <map> 39#include <iostream> 40 41#include "sb_bc.h" 42#include "sb_shader.h" 43#include "sb_pass.h" 44#include "sb_sched.h" 45 46using namespace r600_sb; 47 48using std::cerr; 49 50static sb_hw_class translate_chip_class(enum chip_class cc); 51static sb_hw_chip translate_chip(enum radeon_family rf); 52 53sb_context *r600_sb_context_create(struct r600_context *rctx) { 54 55 sb_context *sctx = new sb_context(); 56 57 if (sctx->init(rctx->isa, translate_chip(rctx->family), 58 translate_chip_class(rctx->chip_class))) { 59 delete sctx; 60 sctx = NULL; 61 } 62 63 unsigned df = rctx->screen->debug_flags; 64 65 sb_context::dump_pass = df & DBG_SB_DUMP; 66 sb_context::dump_stat = df & DBG_SB_STAT; 67 sb_context::dry_run = df & DBG_SB_DRY_RUN; 68 sb_context::no_fallback = df & DBG_SB_NO_FALLBACK; 69 70 sb_context::dskip_start = debug_get_num_option("R600_SB_DSKIP_START", 0); 71 sb_context::dskip_end = debug_get_num_option("R600_SB_DSKIP_END", 0); 72 sb_context::dskip_mode = debug_get_num_option("R600_SB_DSKIP_MODE", 0); 73 74 return sctx; 75} 76 77void r600_sb_context_destroy(void * sctx) { 78 if (sctx) { 79 sb_context *ctx = static_cast<sb_context*>(sctx); 80 81 if (sb_context::dump_stat) { 82 cerr << "context src stats: "; 83 ctx->src_stats.dump(cerr); 84 cerr << "context opt stats: "; 85 ctx->opt_stats.dump(cerr); 86 cerr << "context diff: "; 87 ctx->src_stats.dump_diff(cerr, ctx->opt_stats); 88 } 89 90 delete ctx; 91 } 92} 93 94int r600_sb_bytecode_process(struct r600_context *rctx, 95 struct r600_bytecode *bc, 96 struct r600_shader *pshader, 97 int dump_source_bytecode, 98 int optimize) { 99 int r = 0; 100 unsigned shader_id = bc->debug_id; 101 102 sb_context *ctx = (sb_context *)rctx->sb_context; 103 if (!ctx) { 104 rctx->sb_context = ctx = r600_sb_context_create(rctx); 105 } 106 107 int64_t time_start = 0; 108 if (sb_context::dump_stat) { 109 time_start = os_time_get_nano(); 110 } 111 112 SB_DUMP_STAT( cerr << "\nsb: shader " << shader_id << "\n"; ); 113 114 bc_parser parser(*ctx, bc, pshader, dump_source_bytecode, optimize); 115 116 if ((r = parser.parse())) { 117 assert(0); 118 return r; 119 } 120 121 /* skip some shaders (use shaders from default backend) 122 * dskip_start - range start, dskip_end - range_end, 123 * e.g. start = 5, end = 6 means shaders 5 & 6 124 * 125 * dskip_mode == 0 - disabled, 126 * dskip_mode == 1 - don't process the shaders from the [start;end] range 127 * dskip_mode == 2 - process only the shaders from the range 128 */ 129 if (sb_context::dskip_mode) { 130 if ((sb_context::dskip_start <= shader_id && 131 shader_id <= sb_context::dskip_end) == 132 (sb_context::dskip_mode == 1)) { 133 cerr << "sb: skipped shader " << shader_id << " : " << "[" 134 << sb_context::dskip_start << "; " 135 << sb_context::dskip_end << "] mode " 136 << sb_context::dskip_mode << "\n"; 137 return 0; 138 } 139 } 140 141 shader *sh = parser.get_shader(); 142 SB_DUMP_PASS( cerr << "\n\n###### after parse\n"; sh->dump_ir(); ); 143 144 if (!optimize) { 145 delete sh; 146 return 0; 147 } 148 149#define SB_RUN_PASS(n, dump) \ 150 do { \ 151 r = n(*sh).run(); \ 152 if (r) { \ 153 cerr << "sb: error (" << r << ") in the " << #n << " pass.\n"; \ 154 if (sb_context::no_fallback) \ 155 return r; \ 156 cerr << "sb: using unoptimized bytecode...\n"; \ 157 delete sh; \ 158 return 0; \ 159 } \ 160 if (dump) { \ 161 SB_DUMP_PASS( cerr << "\n\n###### after " << #n << "\n"; \ 162 sh->dump_ir();); \ 163 } \ 164 assert(!r); \ 165 } while (0) 166 167 SB_RUN_PASS(ssa_prepare, 0); 168 SB_RUN_PASS(ssa_rename, 1); 169 170 if (sh->has_alu_predication) 171 SB_RUN_PASS(psi_ops, 1); 172 173 SB_RUN_PASS(liveness, 0); 174 SB_RUN_PASS(dce_cleanup, 0); 175 SB_RUN_PASS(def_use, 0); 176 177 sh->set_undef(sh->root->live_before); 178 179 SB_RUN_PASS(peephole, 1); 180 SB_RUN_PASS(if_conversion, 1); 181 182 SB_RUN_PASS(def_use, 0); 183 184 SB_RUN_PASS(gvn, 1); 185 186 SB_RUN_PASS(liveness, 0); 187 SB_RUN_PASS(dce_cleanup, 1); 188 SB_RUN_PASS(def_use, 0); 189 190 SB_RUN_PASS(liveness, 0); 191 SB_RUN_PASS(dce_cleanup, 0); 192 193 SB_RUN_PASS(ra_split, 0); 194 SB_RUN_PASS(def_use, 0); 195 196 // create 'basic blocks'. it's not like we build CFG, they are just 197 // container nodes in the correct locations for code placement 198 sh->create_bbs(); 199 200 SB_RUN_PASS(gcm, 1); 201 202 sh->compute_interferences = true; 203 SB_RUN_PASS(liveness, 0); 204 205 SB_RUN_PASS(ra_coalesce, 1); 206 SB_RUN_PASS(ra_init, 1); 207 208 SB_RUN_PASS(post_scheduler, 1); 209 210 sh->expand_bbs(); 211 212#if SB_RA_SCHED_CHECK 213 // check code correctness after regalloc/scheduler 214 SB_RUN_PASS(ra_checker, 0); 215#endif 216 217 SB_RUN_PASS(bc_finalizer, 0); 218 219 sh->optimized = true; 220 221 bc_builder builder(*sh); 222 223 if ((r = builder.build())) { 224 assert(0); 225 return r; 226 } 227 228 if (!sb_context::dry_run) { 229 bytecode &nbc = builder.get_bytecode(); 230 231 free(bc->bytecode); 232 bc->ndw = nbc.ndw(); 233 bc->bytecode = (uint32_t*) malloc(bc->ndw << 2); 234 nbc.write_data(bc->bytecode); 235 236 bc->ngpr = sh->ngpr; 237 bc->nstack = sh->nstack; 238 } else { 239 SB_DUMP_STAT( cerr << "SB_USE_NEW_BYTECODE is not enabled\n"; ); 240 } 241 242 243 if (sb_context::dump_stat) { 244 int64_t t = os_time_get_nano() - time_start; 245 246 cerr << "sb: processing shader " << shader_id << " done ( " 247 << ((double)t)/1000000.0 << " ms ).\n"; 248 249 sh->opt_stats.ndw = bc->ndw; 250 sh->collect_stats(true); 251 252 cerr << "src stats: "; 253 sh->src_stats.dump(cerr); 254 cerr << "opt stats: "; 255 sh->opt_stats.dump(cerr); 256 cerr << "diff: "; 257 sh->src_stats.dump_diff(cerr, sh->opt_stats); 258 } 259 260 delete sh; 261 return 0; 262} 263 264static sb_hw_chip translate_chip(enum radeon_family rf) { 265 switch (rf) { 266 267#define TRANSLATE_CHIP(c) case CHIP_##c: return HW_CHIP_##c 268 TRANSLATE_CHIP(R600); 269 TRANSLATE_CHIP(RV610); 270 TRANSLATE_CHIP(RV630); 271 TRANSLATE_CHIP(RV670); 272 TRANSLATE_CHIP(RV620); 273 TRANSLATE_CHIP(RV635); 274 TRANSLATE_CHIP(RS780); 275 TRANSLATE_CHIP(RS880); 276 TRANSLATE_CHIP(RV770); 277 TRANSLATE_CHIP(RV730); 278 TRANSLATE_CHIP(RV710); 279 TRANSLATE_CHIP(RV740); 280 TRANSLATE_CHIP(CEDAR); 281 TRANSLATE_CHIP(REDWOOD); 282 TRANSLATE_CHIP(JUNIPER); 283 TRANSLATE_CHIP(CYPRESS); 284 TRANSLATE_CHIP(HEMLOCK); 285 TRANSLATE_CHIP(PALM); 286 TRANSLATE_CHIP(SUMO); 287 TRANSLATE_CHIP(SUMO2); 288 TRANSLATE_CHIP(BARTS); 289 TRANSLATE_CHIP(TURKS); 290 TRANSLATE_CHIP(CAICOS); 291 TRANSLATE_CHIP(CAYMAN); 292#undef TRANSLATE_CHIP 293 294 default: 295 assert(!"unknown chip"); 296 return HW_CHIP_UNKNOWN; 297 } 298} 299 300static sb_hw_class translate_chip_class(enum chip_class cc) { 301 switch(cc) { 302 case R600: return HW_CLASS_R600; 303 case R700: return HW_CLASS_R700; 304 case EVERGREEN: return HW_CLASS_EVERGREEN; 305 case CAYMAN: return HW_CLASS_CAYMAN; 306 307 default: 308 assert(!"unknown chip class"); 309 return HW_CLASS_UNKNOWN; 310 } 311} 312