RSForEachExpand.cpp revision 357b58691936bef425bd315c13a2d8019d7e9c7e
1/* 2 * Copyright 2012, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "bcc/Assert.h" 18#include "bcc/Renderscript/RSTransforms.h" 19 20#include <cstdlib> 21 22#include <llvm/IR/DerivedTypes.h> 23#include <llvm/IR/Function.h> 24#include <llvm/IR/Instructions.h> 25#include <llvm/IR/IRBuilder.h> 26#include <llvm/IR/Module.h> 27#include <llvm/Pass.h> 28#include <llvm/Support/raw_ostream.h> 29#include <llvm/IR/DataLayout.h> 30#include <llvm/IR/Type.h> 31 32#include "bcc/Config/Config.h" 33#include "bcc/Renderscript/RSInfo.h" 34#include "bcc/Support/Log.h" 35 36using namespace bcc; 37 38namespace { 39 40/* RSForEachExpandPass - This pass operates on functions that are able to be 41 * called via rsForEach() or "foreach_<NAME>". We create an inner loop for the 42 * ForEach-able function to be invoked over the appropriate data cells of the 43 * input/output allocations (adjusting other relevant parameters as we go). We 44 * support doing this for any ForEach-able compute kernels. The new function 45 * name is the original function name followed by ".expand". Note that we 46 * still generate code for the original function. 47 */ 48class RSForEachExpandPass : public llvm::ModulePass { 49private: 50 static char ID; 51 52 llvm::Module *M; 53 llvm::LLVMContext *C; 54 55 const RSInfo::ExportForeachFuncListTy &mFuncs; 56 57 // Turns on optimization of allocation stride values. 58 bool mEnableStepOpt; 59 60 uint32_t getRootSignature(llvm::Function *F) { 61 const llvm::NamedMDNode *ExportForEachMetadata = 62 M->getNamedMetadata("#rs_export_foreach"); 63 64 if (!ExportForEachMetadata) { 65 llvm::SmallVector<llvm::Type*, 8> RootArgTys; 66 for (llvm::Function::arg_iterator B = F->arg_begin(), 67 E = F->arg_end(); 68 B != E; 69 ++B) { 70 RootArgTys.push_back(B->getType()); 71 } 72 73 // For pre-ICS bitcode, we may not have signature information. In that 74 // case, we use the size of the RootArgTys to select the number of 75 // arguments. 76 return (1 << RootArgTys.size()) - 1; 77 } 78 79 if (ExportForEachMetadata->getNumOperands() == 0) { 80 return 0; 81 } 82 83 bccAssert(ExportForEachMetadata->getNumOperands() > 0); 84 85 // We only handle the case for legacy root() functions here, so this is 86 // hard-coded to look at only the first such function. 87 llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0); 88 if (SigNode != NULL && SigNode->getNumOperands() == 1) { 89 llvm::Value *SigVal = SigNode->getOperand(0); 90 if (SigVal->getValueID() == llvm::Value::MDStringVal) { 91 llvm::StringRef SigString = 92 static_cast<llvm::MDString*>(SigVal)->getString(); 93 uint32_t Signature = 0; 94 if (SigString.getAsInteger(10, Signature)) { 95 ALOGE("Non-integer signature value '%s'", SigString.str().c_str()); 96 return 0; 97 } 98 return Signature; 99 } 100 } 101 102 return 0; 103 } 104 105 // Get the actual value we should use to step through an allocation. 106 // DL - Target Data size/layout information. 107 // T - Type of allocation (should be a pointer). 108 // OrigStep - Original step increment (root.expand() input from driver). 109 llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *T, 110 llvm::Value *OrigStep) { 111 bccAssert(DL); 112 bccAssert(T); 113 bccAssert(OrigStep); 114 llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(T); 115 llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C); 116 if (mEnableStepOpt && T != VoidPtrTy && PT) { 117 llvm::Type *ET = PT->getElementType(); 118 uint64_t ETSize = DL->getTypeAllocSize(ET); 119 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C); 120 return llvm::ConstantInt::get(Int32Ty, ETSize); 121 } else { 122 return OrigStep; 123 } 124 } 125 126 static bool hasIn(uint32_t Signature) { 127 return Signature & 0x01; 128 } 129 130 static bool hasOut(uint32_t Signature) { 131 return Signature & 0x02; 132 } 133 134 static bool hasUsrData(uint32_t Signature) { 135 return Signature & 0x04; 136 } 137 138 static bool hasX(uint32_t Signature) { 139 return Signature & 0x08; 140 } 141 142 static bool hasY(uint32_t Signature) { 143 return Signature & 0x10; 144 } 145 146 static bool isKernel(uint32_t Signature) { 147 return Signature & 0x20; 148 } 149 150 /// @brief Returns the type of the ForEach stub parameter structure. 151 /// 152 /// Renderscript uses a single structure in which all parameters are passed 153 /// to keep the signature of the expanded function independent of the 154 /// parameters passed to it. 155 llvm::Type *getForeachStubTy() { 156 llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C); 157 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C); 158 llvm::Type *SizeTy = Int32Ty; 159 /* Defined in frameworks/base/libs/rs/rs_hal.h: 160 * 161 * struct RsForEachStubParamStruct { 162 * const void *in; 163 * void *out; 164 * const void *usr; 165 * size_t usr_len; 166 * uint32_t x; 167 * uint32_t y; 168 * uint32_t z; 169 * uint32_t lod; 170 * enum RsAllocationCubemapFace face; 171 * uint32_t ar[16]; 172 * }; 173 */ 174 llvm::SmallVector<llvm::Type*, 9> StructTys; 175 StructTys.push_back(VoidPtrTy); // const void *in 176 StructTys.push_back(VoidPtrTy); // void *out 177 StructTys.push_back(VoidPtrTy); // const void *usr 178 StructTys.push_back(SizeTy); // size_t usr_len 179 StructTys.push_back(Int32Ty); // uint32_t x 180 StructTys.push_back(Int32Ty); // uint32_t y 181 StructTys.push_back(Int32Ty); // uint32_t z 182 StructTys.push_back(Int32Ty); // uint32_t lod 183 StructTys.push_back(Int32Ty); // enum RsAllocationCubemapFace 184 StructTys.push_back(llvm::ArrayType::get(Int32Ty, 16)); // uint32_t ar[16] 185 186 return llvm::StructType::create(StructTys, "RsForEachStubParamStruct"); 187 } 188 189 /// @brief Create skeleton of the expanded function. 190 /// 191 /// This creates a function with the following signature: 192 /// 193 /// void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2, 194 /// uint32_t instep, uint32_t outstep) 195 /// 196 llvm::Function *createEmptyExpandedFunction(llvm::StringRef OldName) { 197 llvm::Type *ForEachStubPtrTy = getForeachStubTy()->getPointerTo(); 198 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C); 199 200 llvm::SmallVector<llvm::Type*, 8> ParamTys; 201 ParamTys.push_back(ForEachStubPtrTy); // const RsForEachStubParamStruct *p 202 ParamTys.push_back(Int32Ty); // uint32_t x1 203 ParamTys.push_back(Int32Ty); // uint32_t x2 204 ParamTys.push_back(Int32Ty); // uint32_t instep 205 ParamTys.push_back(Int32Ty); // uint32_t outstep 206 207 llvm::FunctionType *FT = 208 llvm::FunctionType::get(llvm::Type::getVoidTy(*C), ParamTys, false); 209 return llvm::Function::Create(FT, llvm::GlobalValue::ExternalLinkage, 210 OldName + ".expand", M); 211 } 212 213public: 214 RSForEachExpandPass(const RSInfo::ExportForeachFuncListTy &pForeachFuncs, 215 bool pEnableStepOpt) 216 : ModulePass(ID), M(NULL), C(NULL), mFuncs(pForeachFuncs), 217 mEnableStepOpt(pEnableStepOpt) { 218 } 219 220 /* Performs the actual optimization on a selected function. On success, the 221 * Module will contain a new function of the name "<NAME>.expand" that 222 * invokes <NAME>() in a loop with the appropriate parameters. 223 */ 224 bool ExpandFunction(llvm::Function *F, uint32_t Signature) { 225 ALOGV("Expanding ForEach-able Function %s", F->getName().str().c_str()); 226 227 if (!Signature) { 228 Signature = getRootSignature(F); 229 if (!Signature) { 230 // We couldn't determine how to expand this function based on its 231 // function signature. 232 return false; 233 } 234 } 235 236 llvm::DataLayout DL(M); 237 238 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C); 239 llvm::Function *ExpandedFunc = createEmptyExpandedFunction(F->getName()); 240 241 // Create and name the actual arguments to this expanded function. 242 llvm::SmallVector<llvm::Argument*, 8> ArgVec; 243 for (llvm::Function::arg_iterator B = ExpandedFunc->arg_begin(), 244 E = ExpandedFunc->arg_end(); 245 B != E; 246 ++B) { 247 ArgVec.push_back(B); 248 } 249 250 if (ArgVec.size() != 5) { 251 ALOGE("Incorrect number of arguments to function: %zu", 252 ArgVec.size()); 253 return false; 254 } 255 llvm::Value *Arg_p = ArgVec[0]; 256 llvm::Value *Arg_x1 = ArgVec[1]; 257 llvm::Value *Arg_x2 = ArgVec[2]; 258 llvm::Value *Arg_instep = ArgVec[3]; 259 llvm::Value *Arg_outstep = ArgVec[4]; 260 261 Arg_p->setName("p"); 262 Arg_x1->setName("x1"); 263 Arg_x2->setName("x2"); 264 Arg_instep->setName("arg_instep"); 265 Arg_outstep->setName("arg_outstep"); 266 267 llvm::Value *InStep = NULL; 268 llvm::Value *OutStep = NULL; 269 270 // Construct the actual function body. 271 llvm::BasicBlock *Begin = 272 llvm::BasicBlock::Create(*C, "Begin", ExpandedFunc); 273 llvm::IRBuilder<> Builder(Begin); 274 275 // uint32_t X = x1; 276 llvm::AllocaInst *AX = Builder.CreateAlloca(Int32Ty, 0, "AX"); 277 Builder.CreateStore(Arg_x1, AX); 278 279 // Collect and construct the arguments for the kernel(). 280 // Note that we load any loop-invariant arguments before entering the Loop. 281 llvm::Function::arg_iterator Args = F->arg_begin(); 282 283 llvm::Type *InTy = NULL; 284 llvm::AllocaInst *AIn = NULL; 285 if (hasIn(Signature)) { 286 InTy = Args->getType(); 287 AIn = Builder.CreateAlloca(InTy, 0, "AIn"); 288 InStep = getStepValue(&DL, InTy, Arg_instep); 289 InStep->setName("instep"); 290 Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad( 291 Builder.CreateStructGEP(Arg_p, 0)), InTy), AIn); 292 Args++; 293 } 294 295 llvm::Type *OutTy = NULL; 296 llvm::AllocaInst *AOut = NULL; 297 if (hasOut(Signature)) { 298 OutTy = Args->getType(); 299 AOut = Builder.CreateAlloca(OutTy, 0, "AOut"); 300 OutStep = getStepValue(&DL, OutTy, Arg_outstep); 301 OutStep->setName("outstep"); 302 Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad( 303 Builder.CreateStructGEP(Arg_p, 1)), OutTy), AOut); 304 Args++; 305 } 306 307 llvm::Value *UsrData = NULL; 308 if (hasUsrData(Signature)) { 309 llvm::Type *UsrDataTy = Args->getType(); 310 UsrData = Builder.CreatePointerCast(Builder.CreateLoad( 311 Builder.CreateStructGEP(Arg_p, 2)), UsrDataTy); 312 UsrData->setName("UsrData"); 313 Args++; 314 } 315 316 if (hasX(Signature)) { 317 Args++; 318 } 319 320 llvm::Value *Y = NULL; 321 if (hasY(Signature)) { 322 Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y"); 323 Args++; 324 } 325 326 bccAssert(Args == F->arg_end()); 327 328 llvm::BasicBlock *Loop = llvm::BasicBlock::Create(*C, "Loop", ExpandedFunc); 329 llvm::BasicBlock *Exit = llvm::BasicBlock::Create(*C, "Exit", ExpandedFunc); 330 331 // if (x1 < x2) goto Loop; else goto Exit; 332 llvm::Value *Cond = Builder.CreateICmpSLT(Arg_x1, Arg_x2); 333 Builder.CreateCondBr(Cond, Loop, Exit); 334 335 // Loop: 336 Builder.SetInsertPoint(Loop); 337 338 // Populate the actual call to kernel(). 339 llvm::SmallVector<llvm::Value*, 8> RootArgs; 340 341 llvm::Value *InPtr = NULL; 342 llvm::Value *OutPtr = NULL; 343 344 if (AIn) { 345 InPtr = Builder.CreateLoad(AIn, "InPtr"); 346 RootArgs.push_back(InPtr); 347 } 348 349 if (AOut) { 350 OutPtr = Builder.CreateLoad(AOut, "OutPtr"); 351 RootArgs.push_back(OutPtr); 352 } 353 354 if (UsrData) { 355 RootArgs.push_back(UsrData); 356 } 357 358 // We always have to load X, since it is used to iterate through the loop. 359 llvm::Value *X = Builder.CreateLoad(AX, "X"); 360 if (hasX(Signature)) { 361 RootArgs.push_back(X); 362 } 363 364 if (Y) { 365 RootArgs.push_back(Y); 366 } 367 368 Builder.CreateCall(F, RootArgs); 369 370 if (InPtr) { 371 // InPtr += instep 372 llvm::Value *NewIn = Builder.CreateIntToPtr(Builder.CreateNUWAdd( 373 Builder.CreatePtrToInt(InPtr, Int32Ty), InStep), InTy); 374 Builder.CreateStore(NewIn, AIn); 375 } 376 377 if (OutPtr) { 378 // OutPtr += outstep 379 llvm::Value *NewOut = Builder.CreateIntToPtr(Builder.CreateNUWAdd( 380 Builder.CreatePtrToInt(OutPtr, Int32Ty), OutStep), OutTy); 381 Builder.CreateStore(NewOut, AOut); 382 } 383 384 // X++; 385 llvm::Value *XPlusOne = 386 Builder.CreateNUWAdd(X, llvm::ConstantInt::get(Int32Ty, 1)); 387 Builder.CreateStore(XPlusOne, AX); 388 389 // If (X < x2) goto Loop; else goto Exit; 390 Cond = Builder.CreateICmpSLT(XPlusOne, Arg_x2); 391 Builder.CreateCondBr(Cond, Loop, Exit); 392 393 // Exit: 394 Builder.SetInsertPoint(Exit); 395 Builder.CreateRetVoid(); 396 397 return true; 398 } 399 400 /* Expand a pass-by-value kernel. 401 */ 402 bool ExpandKernel(llvm::Function *F, uint32_t Signature) { 403 bccAssert(isKernel(Signature)); 404 ALOGV("Expanding kernel Function %s", F->getName().str().c_str()); 405 406 // TODO: Refactor this to share functionality with ExpandFunction. 407 llvm::DataLayout DL(M); 408 409 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C); 410 llvm::Function *ExpandedFunc = createEmptyExpandedFunction(F->getName()); 411 412 // Create and name the actual arguments to this expanded function. 413 llvm::SmallVector<llvm::Argument*, 8> ArgVec; 414 for (llvm::Function::arg_iterator B = ExpandedFunc->arg_begin(), 415 E = ExpandedFunc->arg_end(); 416 B != E; 417 ++B) { 418 ArgVec.push_back(B); 419 } 420 421 if (ArgVec.size() != 5) { 422 ALOGE("Incorrect number of arguments to function: %zu", 423 ArgVec.size()); 424 return false; 425 } 426 llvm::Value *Arg_p = ArgVec[0]; 427 llvm::Value *Arg_x1 = ArgVec[1]; 428 llvm::Value *Arg_x2 = ArgVec[2]; 429 llvm::Value *Arg_instep = ArgVec[3]; 430 llvm::Value *Arg_outstep = ArgVec[4]; 431 432 Arg_p->setName("p"); 433 Arg_x1->setName("x1"); 434 Arg_x2->setName("x2"); 435 Arg_instep->setName("arg_instep"); 436 Arg_outstep->setName("arg_outstep"); 437 438 llvm::Value *InStep = NULL; 439 llvm::Value *OutStep = NULL; 440 441 // Construct the actual function body. 442 llvm::BasicBlock *Begin = 443 llvm::BasicBlock::Create(*C, "Begin", ExpandedFunc); 444 llvm::IRBuilder<> Builder(Begin); 445 446 // uint32_t X = x1; 447 llvm::AllocaInst *AX = Builder.CreateAlloca(Int32Ty, 0, "AX"); 448 Builder.CreateStore(Arg_x1, AX); 449 450 // Collect and construct the arguments for the kernel(). 451 // Note that we load any loop-invariant arguments before entering the Loop. 452 llvm::Function::arg_iterator Args = F->arg_begin(); 453 454 llvm::Type *OutTy = NULL; 455 llvm::AllocaInst *AOut = NULL; 456 bool PassOutByReference = false; 457 if (hasOut(Signature)) { 458 llvm::Type *OutBaseTy = F->getReturnType(); 459 if (OutBaseTy->isVoidTy()) { 460 PassOutByReference = true; 461 OutTy = Args->getType(); 462 Args++; 463 } else { 464 OutTy = OutBaseTy->getPointerTo(); 465 // We don't increment Args, since we are using the actual return type. 466 } 467 AOut = Builder.CreateAlloca(OutTy, 0, "AOut"); 468 OutStep = getStepValue(&DL, OutTy, Arg_outstep); 469 OutStep->setName("outstep"); 470 Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad( 471 Builder.CreateStructGEP(Arg_p, 1)), OutTy), AOut); 472 } 473 474 llvm::Type *InBaseTy = NULL; 475 llvm::Type *InTy = NULL; 476 llvm::AllocaInst *AIn = NULL; 477 if (hasIn(Signature)) { 478 InBaseTy = Args->getType(); 479 InTy =InBaseTy->getPointerTo(); 480 AIn = Builder.CreateAlloca(InTy, 0, "AIn"); 481 InStep = getStepValue(&DL, InTy, Arg_instep); 482 InStep->setName("instep"); 483 Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad( 484 Builder.CreateStructGEP(Arg_p, 0)), InTy), AIn); 485 Args++; 486 } 487 488 // No usrData parameter on kernels. 489 bccAssert(!hasUsrData(Signature)); 490 491 if (hasX(Signature)) { 492 Args++; 493 } 494 495 llvm::Value *Y = NULL; 496 if (hasY(Signature)) { 497 Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y"); 498 Args++; 499 } 500 501 bccAssert(Args == F->arg_end()); 502 503 llvm::BasicBlock *Loop = llvm::BasicBlock::Create(*C, "Loop", ExpandedFunc); 504 llvm::BasicBlock *Exit = llvm::BasicBlock::Create(*C, "Exit", ExpandedFunc); 505 506 // if (x1 < x2) goto Loop; else goto Exit; 507 llvm::Value *Cond = Builder.CreateICmpSLT(Arg_x1, Arg_x2); 508 Builder.CreateCondBr(Cond, Loop, Exit); 509 510 // Loop: 511 Builder.SetInsertPoint(Loop); 512 513 // Populate the actual call to kernel(). 514 llvm::SmallVector<llvm::Value*, 8> RootArgs; 515 516 llvm::Value *InPtr = NULL; 517 llvm::Value *In = NULL; 518 llvm::Value *OutPtr = NULL; 519 520 if (PassOutByReference) { 521 OutPtr = Builder.CreateLoad(AOut, "OutPtr"); 522 RootArgs.push_back(OutPtr); 523 } 524 525 if (AIn) { 526 InPtr = Builder.CreateLoad(AIn, "InPtr"); 527 In = Builder.CreateLoad(InPtr, "In"); 528 RootArgs.push_back(In); 529 } 530 531 // We always have to load X, since it is used to iterate through the loop. 532 llvm::Value *X = Builder.CreateLoad(AX, "X"); 533 if (hasX(Signature)) { 534 RootArgs.push_back(X); 535 } 536 537 if (Y) { 538 RootArgs.push_back(Y); 539 } 540 541 llvm::Value *RetVal = Builder.CreateCall(F, RootArgs); 542 543 if (AOut && !PassOutByReference) { 544 OutPtr = Builder.CreateLoad(AOut, "OutPtr"); 545 Builder.CreateStore(RetVal, OutPtr); 546 } 547 548 if (InPtr) { 549 // InPtr += instep 550 llvm::Value *NewIn = Builder.CreateIntToPtr(Builder.CreateNUWAdd( 551 Builder.CreatePtrToInt(InPtr, Int32Ty), InStep), InTy); 552 Builder.CreateStore(NewIn, AIn); 553 } 554 555 if (OutPtr) { 556 // OutPtr += outstep 557 llvm::Value *NewOut = Builder.CreateIntToPtr(Builder.CreateNUWAdd( 558 Builder.CreatePtrToInt(OutPtr, Int32Ty), OutStep), OutTy); 559 Builder.CreateStore(NewOut, AOut); 560 } 561 562 // X++; 563 llvm::Value *XPlusOne = 564 Builder.CreateNUWAdd(X, llvm::ConstantInt::get(Int32Ty, 1)); 565 Builder.CreateStore(XPlusOne, AX); 566 567 // If (X < x2) goto Loop; else goto Exit; 568 Cond = Builder.CreateICmpSLT(XPlusOne, Arg_x2); 569 Builder.CreateCondBr(Cond, Loop, Exit); 570 571 // Exit: 572 Builder.SetInsertPoint(Exit); 573 Builder.CreateRetVoid(); 574 575 return true; 576 } 577 578 virtual bool runOnModule(llvm::Module &M) { 579 bool Changed = false; 580 this->M = &M; 581 C = &M.getContext(); 582 583 for (RSInfo::ExportForeachFuncListTy::const_iterator 584 func_iter = mFuncs.begin(), func_end = mFuncs.end(); 585 func_iter != func_end; func_iter++) { 586 const char *name = func_iter->first; 587 uint32_t signature = func_iter->second; 588 llvm::Function *kernel = M.getFunction(name); 589 if (kernel && isKernel(signature)) { 590 Changed |= ExpandKernel(kernel, signature); 591 } 592 else if (kernel && kernel->getReturnType()->isVoidTy()) { 593 Changed |= ExpandFunction(kernel, signature); 594 } 595 } 596 597 return Changed; 598 } 599 600 virtual const char *getPassName() const { 601 return "ForEach-able Function Expansion"; 602 } 603 604}; // end RSForEachExpandPass 605 606} // end anonymous namespace 607 608char RSForEachExpandPass::ID = 0; 609 610namespace bcc { 611 612llvm::ModulePass * 613createRSForEachExpandPass(const RSInfo::ExportForeachFuncListTy &pForeachFuncs, 614 bool pEnableStepOpt){ 615 return new RSForEachExpandPass(pForeachFuncs, pEnableStepOpt); 616} 617 618} // end namespace bcc 619