RSForEachExpand.cpp revision 802f65931852d925bbe2e478bafe422b4002e7c4
1/* 2 * Copyright 2012, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "bcc/Assert.h" 18#include "bcc/Renderscript/RSTransforms.h" 19 20#include <cstdlib> 21 22#include <llvm/IR/DerivedTypes.h> 23#include <llvm/IR/Function.h> 24#include <llvm/IR/Instructions.h> 25#include <llvm/IR/IRBuilder.h> 26#include <llvm/IR/Module.h> 27#include <llvm/Pass.h> 28#include <llvm/Support/raw_ostream.h> 29#include <llvm/IR/DataLayout.h> 30#include <llvm/IR/Type.h> 31 32#include "bcc/Config/Config.h" 33#include "bcc/Renderscript/RSInfo.h" 34#include "bcc/Support/Log.h" 35 36using namespace bcc; 37 38namespace { 39 40/* RSForEachExpandPass - This pass operates on functions that are able to be 41 * called via rsForEach() or "foreach_<NAME>". We create an inner loop for the 42 * ForEach-able function to be invoked over the appropriate data cells of the 43 * input/output allocations (adjusting other relevant parameters as we go). We 44 * support doing this for any ForEach-able compute kernels. The new function 45 * name is the original function name followed by ".expand". Note that we 46 * still generate code for the original function. 47 */ 48class RSForEachExpandPass : public llvm::ModulePass { 49private: 50 static char ID; 51 52 llvm::Module *M; 53 llvm::LLVMContext *C; 54 55 const RSInfo::ExportForeachFuncListTy &mFuncs; 56 57 // Turns on optimization of allocation stride values. 58 bool mEnableStepOpt; 59 60 uint32_t getRootSignature(llvm::Function *F) { 61 const llvm::NamedMDNode *ExportForEachMetadata = 62 M->getNamedMetadata("#rs_export_foreach"); 63 64 if (!ExportForEachMetadata) { 65 llvm::SmallVector<llvm::Type*, 8> RootArgTys; 66 for (llvm::Function::arg_iterator B = F->arg_begin(), 67 E = F->arg_end(); 68 B != E; 69 ++B) { 70 RootArgTys.push_back(B->getType()); 71 } 72 73 // For pre-ICS bitcode, we may not have signature information. In that 74 // case, we use the size of the RootArgTys to select the number of 75 // arguments. 76 return (1 << RootArgTys.size()) - 1; 77 } 78 79 if (ExportForEachMetadata->getNumOperands() == 0) { 80 return 0; 81 } 82 83 bccAssert(ExportForEachMetadata->getNumOperands() > 0); 84 85 // We only handle the case for legacy root() functions here, so this is 86 // hard-coded to look at only the first such function. 87 llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0); 88 if (SigNode != NULL && SigNode->getNumOperands() == 1) { 89 llvm::Value *SigVal = SigNode->getOperand(0); 90 if (SigVal->getValueID() == llvm::Value::MDStringVal) { 91 llvm::StringRef SigString = 92 static_cast<llvm::MDString*>(SigVal)->getString(); 93 uint32_t Signature = 0; 94 if (SigString.getAsInteger(10, Signature)) { 95 ALOGE("Non-integer signature value '%s'", SigString.str().c_str()); 96 return 0; 97 } 98 return Signature; 99 } 100 } 101 102 return 0; 103 } 104 105 // Get the actual value we should use to step through an allocation. 106 // DL - Target Data size/layout information. 107 // T - Type of allocation (should be a pointer). 108 // OrigStep - Original step increment (root.expand() input from driver). 109 llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *T, 110 llvm::Value *OrigStep) { 111 bccAssert(DL); 112 bccAssert(T); 113 bccAssert(OrigStep); 114 llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(T); 115 llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C); 116 if (mEnableStepOpt && T != VoidPtrTy && PT) { 117 llvm::Type *ET = PT->getElementType(); 118 uint64_t ETSize = DL->getTypeAllocSize(ET); 119 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C); 120 return llvm::ConstantInt::get(Int32Ty, ETSize); 121 } else { 122 return OrigStep; 123 } 124 } 125 126 static bool hasIn(uint32_t Signature) { 127 return Signature & 0x01; 128 } 129 130 static bool hasOut(uint32_t Signature) { 131 return Signature & 0x02; 132 } 133 134 static bool hasUsrData(uint32_t Signature) { 135 return Signature & 0x04; 136 } 137 138 static bool hasX(uint32_t Signature) { 139 return Signature & 0x08; 140 } 141 142 static bool hasY(uint32_t Signature) { 143 return Signature & 0x10; 144 } 145 146 static bool isKernel(uint32_t Signature) { 147 return Signature & 0x20; 148 } 149 150 /// @brief Returns the type of the ForEach stub parameter structure. 151 /// 152 /// Renderscript uses a single structure in which all parameters are passed 153 /// to keep the signature of the expanded function independent of the 154 /// parameters passed to it. 155 llvm::Type *getForeachStubTy() { 156 llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C); 157 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C); 158 llvm::Type *SizeTy = Int32Ty; 159 /* Defined in frameworks/base/libs/rs/rs_hal.h: 160 * 161 * struct RsForEachStubParamStruct { 162 * const void *in; 163 * void *out; 164 * const void *usr; 165 * size_t usr_len; 166 * uint32_t x; 167 * uint32_t y; 168 * uint32_t z; 169 * uint32_t lod; 170 * enum RsAllocationCubemapFace face; 171 * uint32_t ar[16]; 172 * }; 173 */ 174 llvm::SmallVector<llvm::Type*, 9> StructTys; 175 StructTys.push_back(VoidPtrTy); // const void *in 176 StructTys.push_back(VoidPtrTy); // void *out 177 StructTys.push_back(VoidPtrTy); // const void *usr 178 StructTys.push_back(SizeTy); // size_t usr_len 179 StructTys.push_back(Int32Ty); // uint32_t x 180 StructTys.push_back(Int32Ty); // uint32_t y 181 StructTys.push_back(Int32Ty); // uint32_t z 182 StructTys.push_back(Int32Ty); // uint32_t lod 183 StructTys.push_back(Int32Ty); // enum RsAllocationCubemapFace 184 StructTys.push_back(llvm::ArrayType::get(Int32Ty, 16)); // uint32_t ar[16] 185 186 return llvm::StructType::create(StructTys, "RsForEachStubParamStruct"); 187 } 188 189 /// @brief Create skeleton of the expanded function. 190 /// 191 /// This creates a function with the following signature: 192 /// 193 /// void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2, 194 /// uint32_t instep, uint32_t outstep) 195 /// 196 llvm::Function *createEmptyExpandedFunction(llvm::StringRef OldName) { 197 llvm::Type *ForEachStubPtrTy = getForeachStubTy()->getPointerTo(); 198 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C); 199 200 llvm::SmallVector<llvm::Type*, 8> ParamTys; 201 ParamTys.push_back(ForEachStubPtrTy); // const RsForEachStubParamStruct *p 202 ParamTys.push_back(Int32Ty); // uint32_t x1 203 ParamTys.push_back(Int32Ty); // uint32_t x2 204 ParamTys.push_back(Int32Ty); // uint32_t instep 205 ParamTys.push_back(Int32Ty); // uint32_t outstep 206 207 llvm::FunctionType *FT = 208 llvm::FunctionType::get(llvm::Type::getVoidTy(*C), ParamTys, false); 209 llvm::Function *F = 210 llvm::Function::Create(FT, llvm::GlobalValue::ExternalLinkage, 211 OldName + ".expand", M); 212 213 llvm::Function::arg_iterator AI = F->arg_begin(); 214 215 AI->setName("p"); 216 AI++; 217 AI->setName("x1"); 218 AI++; 219 AI->setName("x2"); 220 AI++; 221 AI->setName("arg_instep"); 222 AI++; 223 AI->setName("arg_outstep"); 224 AI++; 225 226 assert(AI == F->arg_end()); 227 228 return F; 229 } 230 231public: 232 RSForEachExpandPass(const RSInfo::ExportForeachFuncListTy &pForeachFuncs, 233 bool pEnableStepOpt) 234 : ModulePass(ID), M(NULL), C(NULL), mFuncs(pForeachFuncs), 235 mEnableStepOpt(pEnableStepOpt) { 236 } 237 238 /* Performs the actual optimization on a selected function. On success, the 239 * Module will contain a new function of the name "<NAME>.expand" that 240 * invokes <NAME>() in a loop with the appropriate parameters. 241 */ 242 bool ExpandFunction(llvm::Function *F, uint32_t Signature) { 243 ALOGV("Expanding ForEach-able Function %s", F->getName().str().c_str()); 244 245 if (!Signature) { 246 Signature = getRootSignature(F); 247 if (!Signature) { 248 // We couldn't determine how to expand this function based on its 249 // function signature. 250 return false; 251 } 252 } 253 254 llvm::DataLayout DL(M); 255 256 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C); 257 llvm::Function *ExpandedFunc = createEmptyExpandedFunction(F->getName()); 258 259 // Create and name the actual arguments to this expanded function. 260 llvm::SmallVector<llvm::Argument*, 8> ArgVec; 261 for (llvm::Function::arg_iterator B = ExpandedFunc->arg_begin(), 262 E = ExpandedFunc->arg_end(); 263 B != E; 264 ++B) { 265 ArgVec.push_back(B); 266 } 267 268 if (ArgVec.size() != 5) { 269 ALOGE("Incorrect number of arguments to function: %zu", 270 ArgVec.size()); 271 return false; 272 } 273 llvm::Value *Arg_p = ArgVec[0]; 274 llvm::Value *Arg_x1 = ArgVec[1]; 275 llvm::Value *Arg_x2 = ArgVec[2]; 276 llvm::Value *Arg_instep = ArgVec[3]; 277 llvm::Value *Arg_outstep = ArgVec[4]; 278 279 llvm::Value *InStep = NULL; 280 llvm::Value *OutStep = NULL; 281 282 // Construct the actual function body. 283 llvm::BasicBlock *Begin = 284 llvm::BasicBlock::Create(*C, "Begin", ExpandedFunc); 285 llvm::IRBuilder<> Builder(Begin); 286 287 // uint32_t X = x1; 288 llvm::AllocaInst *AX = Builder.CreateAlloca(Int32Ty, 0, "AX"); 289 Builder.CreateStore(Arg_x1, AX); 290 291 // Collect and construct the arguments for the kernel(). 292 // Note that we load any loop-invariant arguments before entering the Loop. 293 llvm::Function::arg_iterator Args = F->arg_begin(); 294 295 llvm::Type *InTy = NULL; 296 llvm::AllocaInst *AIn = NULL; 297 if (hasIn(Signature)) { 298 InTy = Args->getType(); 299 AIn = Builder.CreateAlloca(InTy, 0, "AIn"); 300 InStep = getStepValue(&DL, InTy, Arg_instep); 301 InStep->setName("instep"); 302 Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad( 303 Builder.CreateStructGEP(Arg_p, 0)), InTy), AIn); 304 Args++; 305 } 306 307 llvm::Type *OutTy = NULL; 308 llvm::AllocaInst *AOut = NULL; 309 if (hasOut(Signature)) { 310 OutTy = Args->getType(); 311 AOut = Builder.CreateAlloca(OutTy, 0, "AOut"); 312 OutStep = getStepValue(&DL, OutTy, Arg_outstep); 313 OutStep->setName("outstep"); 314 Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad( 315 Builder.CreateStructGEP(Arg_p, 1)), OutTy), AOut); 316 Args++; 317 } 318 319 llvm::Value *UsrData = NULL; 320 if (hasUsrData(Signature)) { 321 llvm::Type *UsrDataTy = Args->getType(); 322 UsrData = Builder.CreatePointerCast(Builder.CreateLoad( 323 Builder.CreateStructGEP(Arg_p, 2)), UsrDataTy); 324 UsrData->setName("UsrData"); 325 Args++; 326 } 327 328 if (hasX(Signature)) { 329 Args++; 330 } 331 332 llvm::Value *Y = NULL; 333 if (hasY(Signature)) { 334 Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y"); 335 Args++; 336 } 337 338 bccAssert(Args == F->arg_end()); 339 340 llvm::BasicBlock *Loop = llvm::BasicBlock::Create(*C, "Loop", ExpandedFunc); 341 llvm::BasicBlock *Exit = llvm::BasicBlock::Create(*C, "Exit", ExpandedFunc); 342 343 // if (x1 < x2) goto Loop; else goto Exit; 344 llvm::Value *Cond = Builder.CreateICmpSLT(Arg_x1, Arg_x2); 345 Builder.CreateCondBr(Cond, Loop, Exit); 346 347 // Loop: 348 Builder.SetInsertPoint(Loop); 349 350 // Populate the actual call to kernel(). 351 llvm::SmallVector<llvm::Value*, 8> RootArgs; 352 353 llvm::Value *InPtr = NULL; 354 llvm::Value *OutPtr = NULL; 355 356 if (AIn) { 357 InPtr = Builder.CreateLoad(AIn, "InPtr"); 358 RootArgs.push_back(InPtr); 359 } 360 361 if (AOut) { 362 OutPtr = Builder.CreateLoad(AOut, "OutPtr"); 363 RootArgs.push_back(OutPtr); 364 } 365 366 if (UsrData) { 367 RootArgs.push_back(UsrData); 368 } 369 370 // We always have to load X, since it is used to iterate through the loop. 371 llvm::Value *X = Builder.CreateLoad(AX, "X"); 372 if (hasX(Signature)) { 373 RootArgs.push_back(X); 374 } 375 376 if (Y) { 377 RootArgs.push_back(Y); 378 } 379 380 Builder.CreateCall(F, RootArgs); 381 382 if (InPtr) { 383 // InPtr += instep 384 llvm::Value *NewIn = Builder.CreateIntToPtr(Builder.CreateNUWAdd( 385 Builder.CreatePtrToInt(InPtr, Int32Ty), InStep), InTy); 386 Builder.CreateStore(NewIn, AIn); 387 } 388 389 if (OutPtr) { 390 // OutPtr += outstep 391 llvm::Value *NewOut = Builder.CreateIntToPtr(Builder.CreateNUWAdd( 392 Builder.CreatePtrToInt(OutPtr, Int32Ty), OutStep), OutTy); 393 Builder.CreateStore(NewOut, AOut); 394 } 395 396 // X++; 397 llvm::Value *XPlusOne = 398 Builder.CreateNUWAdd(X, llvm::ConstantInt::get(Int32Ty, 1)); 399 Builder.CreateStore(XPlusOne, AX); 400 401 // If (X < x2) goto Loop; else goto Exit; 402 Cond = Builder.CreateICmpSLT(XPlusOne, Arg_x2); 403 Builder.CreateCondBr(Cond, Loop, Exit); 404 405 // Exit: 406 Builder.SetInsertPoint(Exit); 407 Builder.CreateRetVoid(); 408 409 return true; 410 } 411 412 /* Expand a pass-by-value kernel. 413 */ 414 bool ExpandKernel(llvm::Function *F, uint32_t Signature) { 415 bccAssert(isKernel(Signature)); 416 ALOGV("Expanding kernel Function %s", F->getName().str().c_str()); 417 418 // TODO: Refactor this to share functionality with ExpandFunction. 419 llvm::DataLayout DL(M); 420 421 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C); 422 llvm::Function *ExpandedFunc = createEmptyExpandedFunction(F->getName()); 423 424 // Create and name the actual arguments to this expanded function. 425 llvm::SmallVector<llvm::Argument*, 8> ArgVec; 426 for (llvm::Function::arg_iterator B = ExpandedFunc->arg_begin(), 427 E = ExpandedFunc->arg_end(); 428 B != E; 429 ++B) { 430 ArgVec.push_back(B); 431 } 432 433 if (ArgVec.size() != 5) { 434 ALOGE("Incorrect number of arguments to function: %zu", 435 ArgVec.size()); 436 return false; 437 } 438 llvm::Value *Arg_p = ArgVec[0]; 439 llvm::Value *Arg_x1 = ArgVec[1]; 440 llvm::Value *Arg_x2 = ArgVec[2]; 441 llvm::Value *Arg_instep = ArgVec[3]; 442 llvm::Value *Arg_outstep = ArgVec[4]; 443 444 llvm::Value *InStep = NULL; 445 llvm::Value *OutStep = NULL; 446 447 // Construct the actual function body. 448 llvm::BasicBlock *Begin = 449 llvm::BasicBlock::Create(*C, "Begin", ExpandedFunc); 450 llvm::IRBuilder<> Builder(Begin); 451 452 // uint32_t X = x1; 453 llvm::AllocaInst *AX = Builder.CreateAlloca(Int32Ty, 0, "AX"); 454 Builder.CreateStore(Arg_x1, AX); 455 456 // Collect and construct the arguments for the kernel(). 457 // Note that we load any loop-invariant arguments before entering the Loop. 458 llvm::Function::arg_iterator Args = F->arg_begin(); 459 460 llvm::Type *OutTy = NULL; 461 llvm::AllocaInst *AOut = NULL; 462 bool PassOutByReference = false; 463 if (hasOut(Signature)) { 464 llvm::Type *OutBaseTy = F->getReturnType(); 465 if (OutBaseTy->isVoidTy()) { 466 PassOutByReference = true; 467 OutTy = Args->getType(); 468 Args++; 469 } else { 470 OutTy = OutBaseTy->getPointerTo(); 471 // We don't increment Args, since we are using the actual return type. 472 } 473 AOut = Builder.CreateAlloca(OutTy, 0, "AOut"); 474 OutStep = getStepValue(&DL, OutTy, Arg_outstep); 475 OutStep->setName("outstep"); 476 Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad( 477 Builder.CreateStructGEP(Arg_p, 1)), OutTy), AOut); 478 } 479 480 llvm::Type *InBaseTy = NULL; 481 llvm::Type *InTy = NULL; 482 llvm::AllocaInst *AIn = NULL; 483 if (hasIn(Signature)) { 484 InBaseTy = Args->getType(); 485 InTy =InBaseTy->getPointerTo(); 486 AIn = Builder.CreateAlloca(InTy, 0, "AIn"); 487 InStep = getStepValue(&DL, InTy, Arg_instep); 488 InStep->setName("instep"); 489 Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad( 490 Builder.CreateStructGEP(Arg_p, 0)), InTy), AIn); 491 Args++; 492 } 493 494 // No usrData parameter on kernels. 495 bccAssert(!hasUsrData(Signature)); 496 497 if (hasX(Signature)) { 498 Args++; 499 } 500 501 llvm::Value *Y = NULL; 502 if (hasY(Signature)) { 503 Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y"); 504 Args++; 505 } 506 507 bccAssert(Args == F->arg_end()); 508 509 llvm::BasicBlock *Loop = llvm::BasicBlock::Create(*C, "Loop", ExpandedFunc); 510 llvm::BasicBlock *Exit = llvm::BasicBlock::Create(*C, "Exit", ExpandedFunc); 511 512 // if (x1 < x2) goto Loop; else goto Exit; 513 llvm::Value *Cond = Builder.CreateICmpSLT(Arg_x1, Arg_x2); 514 Builder.CreateCondBr(Cond, Loop, Exit); 515 516 // Loop: 517 Builder.SetInsertPoint(Loop); 518 519 // Populate the actual call to kernel(). 520 llvm::SmallVector<llvm::Value*, 8> RootArgs; 521 522 llvm::Value *InPtr = NULL; 523 llvm::Value *In = NULL; 524 llvm::Value *OutPtr = NULL; 525 526 if (PassOutByReference) { 527 OutPtr = Builder.CreateLoad(AOut, "OutPtr"); 528 RootArgs.push_back(OutPtr); 529 } 530 531 if (AIn) { 532 InPtr = Builder.CreateLoad(AIn, "InPtr"); 533 In = Builder.CreateLoad(InPtr, "In"); 534 RootArgs.push_back(In); 535 } 536 537 // We always have to load X, since it is used to iterate through the loop. 538 llvm::Value *X = Builder.CreateLoad(AX, "X"); 539 if (hasX(Signature)) { 540 RootArgs.push_back(X); 541 } 542 543 if (Y) { 544 RootArgs.push_back(Y); 545 } 546 547 llvm::Value *RetVal = Builder.CreateCall(F, RootArgs); 548 549 if (AOut && !PassOutByReference) { 550 OutPtr = Builder.CreateLoad(AOut, "OutPtr"); 551 Builder.CreateStore(RetVal, OutPtr); 552 } 553 554 if (InPtr) { 555 // InPtr += instep 556 llvm::Value *NewIn = Builder.CreateIntToPtr(Builder.CreateNUWAdd( 557 Builder.CreatePtrToInt(InPtr, Int32Ty), InStep), InTy); 558 Builder.CreateStore(NewIn, AIn); 559 } 560 561 if (OutPtr) { 562 // OutPtr += outstep 563 llvm::Value *NewOut = Builder.CreateIntToPtr(Builder.CreateNUWAdd( 564 Builder.CreatePtrToInt(OutPtr, Int32Ty), OutStep), OutTy); 565 Builder.CreateStore(NewOut, AOut); 566 } 567 568 // X++; 569 llvm::Value *XPlusOne = 570 Builder.CreateNUWAdd(X, llvm::ConstantInt::get(Int32Ty, 1)); 571 Builder.CreateStore(XPlusOne, AX); 572 573 // If (X < x2) goto Loop; else goto Exit; 574 Cond = Builder.CreateICmpSLT(XPlusOne, Arg_x2); 575 Builder.CreateCondBr(Cond, Loop, Exit); 576 577 // Exit: 578 Builder.SetInsertPoint(Exit); 579 Builder.CreateRetVoid(); 580 581 return true; 582 } 583 584 virtual bool runOnModule(llvm::Module &M) { 585 bool Changed = false; 586 this->M = &M; 587 C = &M.getContext(); 588 589 for (RSInfo::ExportForeachFuncListTy::const_iterator 590 func_iter = mFuncs.begin(), func_end = mFuncs.end(); 591 func_iter != func_end; func_iter++) { 592 const char *name = func_iter->first; 593 uint32_t signature = func_iter->second; 594 llvm::Function *kernel = M.getFunction(name); 595 if (kernel && isKernel(signature)) { 596 Changed |= ExpandKernel(kernel, signature); 597 } 598 else if (kernel && kernel->getReturnType()->isVoidTy()) { 599 Changed |= ExpandFunction(kernel, signature); 600 } 601 } 602 603 return Changed; 604 } 605 606 virtual const char *getPassName() const { 607 return "ForEach-able Function Expansion"; 608 } 609 610}; // end RSForEachExpandPass 611 612} // end anonymous namespace 613 614char RSForEachExpandPass::ID = 0; 615 616namespace bcc { 617 618llvm::ModulePass * 619createRSForEachExpandPass(const RSInfo::ExportForeachFuncListTy &pForeachFuncs, 620 bool pEnableStepOpt){ 621 return new RSForEachExpandPass(pForeachFuncs, pEnableStepOpt); 622} 623 624} // end namespace bcc 625