RSForEachExpand.cpp revision a108bc5ec0ca0cb48c72492d54a71126bccfa7d6
1/* 2 * Copyright 2012, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "bcc/Assert.h" 18#include "bcc/Renderscript/RSTransforms.h" 19 20#include <cstdlib> 21#include <functional> 22 23#include <llvm/IR/DerivedTypes.h> 24#include <llvm/IR/Function.h> 25#include <llvm/IR/Instructions.h> 26#include <llvm/IR/IRBuilder.h> 27#include <llvm/IR/MDBuilder.h> 28#include <llvm/IR/Module.h> 29#include <llvm/Pass.h> 30#include <llvm/Support/raw_ostream.h> 31#include <llvm/IR/DataLayout.h> 32#include <llvm/IR/Function.h> 33#include <llvm/IR/Type.h> 34#include <llvm/Transforms/Utils/BasicBlockUtils.h> 35 36#include "bcc/Config/Config.h" 37#include "bcc/Support/Log.h" 38 39#include "bcinfo/MetadataExtractor.h" 40 41#define NUM_EXPANDED_FUNCTION_PARAMS 4 42 43using namespace bcc; 44 45namespace { 46 47static const bool gEnableRsTbaa = true; 48 49/* RSForEachExpandPass - This pass operates on functions that are able to be 50 * called via rsForEach() or "foreach_<NAME>". We create an inner loop for the 51 * ForEach-able function to be invoked over the appropriate data cells of the 52 * input/output allocations (adjusting other relevant parameters as we go). We 53 * support doing this for any ForEach-able compute kernels. The new function 54 * name is the original function name followed by ".expand". Note that we 55 * still generate code for the original function. 56 */ 57class RSForEachExpandPass : public llvm::ModulePass { 58public: 59 static char ID; 60 61private: 62 static const size_t RS_KERNEL_INPUT_LIMIT = 8; // see frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h 63 64 enum RsLaunchDimensionsField { 65 RsLaunchDimensionsFieldX, 66 RsLaunchDimensionsFieldY, 67 RsLaunchDimensionsFieldZ, 68 RsLaunchDimensionsFieldLod, 69 RsLaunchDimensionsFieldFace, 70 RsLaunchDimensionsFieldArray, 71 72 RsLaunchDimensionsFieldCount 73 }; 74 75 enum RsExpandKernelDriverInfoPfxField { 76 RsExpandKernelDriverInfoPfxFieldInPtr, 77 RsExpandKernelDriverInfoPfxFieldInStride, 78 RsExpandKernelDriverInfoPfxFieldInLen, 79 RsExpandKernelDriverInfoPfxFieldOutPtr, 80 RsExpandKernelDriverInfoPfxFieldOutStride, 81 RsExpandKernelDriverInfoPfxFieldOutLen, 82 RsExpandKernelDriverInfoPfxFieldDim, 83 RsExpandKernelDriverInfoPfxFieldCurrent, 84 RsExpandKernelDriverInfoPfxFieldUsr, 85 RsExpandKernelDriverInfoPfxFieldUsLenr, 86 87 RsExpandKernelDriverInfoPfxFieldCount 88 }; 89 90 llvm::Module *Module; 91 llvm::LLVMContext *Context; 92 93 /* 94 * Pointer to LLVM type information for the the function signature 95 * for expanded kernels. This must be re-calculated for each 96 * module the pass is run on. 97 */ 98 llvm::FunctionType *ExpandedFunctionType; 99 100 uint32_t mExportForEachCount; 101 const char **mExportForEachNameList; 102 const uint32_t *mExportForEachSignatureList; 103 104 // Turns on optimization of allocation stride values. 105 bool mEnableStepOpt; 106 107 uint32_t getRootSignature(llvm::Function *Function) { 108 const llvm::NamedMDNode *ExportForEachMetadata = 109 Module->getNamedMetadata("#rs_export_foreach"); 110 111 if (!ExportForEachMetadata) { 112 llvm::SmallVector<llvm::Type*, 8> RootArgTys; 113 for (llvm::Function::arg_iterator B = Function->arg_begin(), 114 E = Function->arg_end(); 115 B != E; 116 ++B) { 117 RootArgTys.push_back(B->getType()); 118 } 119 120 // For pre-ICS bitcode, we may not have signature information. In that 121 // case, we use the size of the RootArgTys to select the number of 122 // arguments. 123 return (1 << RootArgTys.size()) - 1; 124 } 125 126 if (ExportForEachMetadata->getNumOperands() == 0) { 127 return 0; 128 } 129 130 bccAssert(ExportForEachMetadata->getNumOperands() > 0); 131 132 // We only handle the case for legacy root() functions here, so this is 133 // hard-coded to look at only the first such function. 134 llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0); 135 if (SigNode != nullptr && SigNode->getNumOperands() == 1) { 136 llvm::Metadata *SigMD = SigNode->getOperand(0); 137 if (llvm::MDString *SigS = llvm::dyn_cast<llvm::MDString>(SigMD)) { 138 llvm::StringRef SigString = SigS->getString(); 139 uint32_t Signature = 0; 140 if (SigString.getAsInteger(10, Signature)) { 141 ALOGE("Non-integer signature value '%s'", SigString.str().c_str()); 142 return 0; 143 } 144 return Signature; 145 } 146 } 147 148 return 0; 149 } 150 151 bool isStepOptSupported(llvm::Type *AllocType) { 152 153 llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType); 154 llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context); 155 156 if (mEnableStepOpt) { 157 return false; 158 } 159 160 if (AllocType == VoidPtrTy) { 161 return false; 162 } 163 164 if (!PT) { 165 return false; 166 } 167 168 // remaining conditions are 64-bit only 169 if (VoidPtrTy->getPrimitiveSizeInBits() == 32) { 170 return true; 171 } 172 173 // coerce suggests an upconverted struct type, which we can't support 174 if (AllocType->getStructName().find("coerce") != llvm::StringRef::npos) { 175 return false; 176 } 177 178 // 2xi64 and i128 suggest an upconverted struct type, which are also unsupported 179 llvm::Type *V2xi64Ty = llvm::VectorType::get(llvm::Type::getInt64Ty(*Context), 2); 180 llvm::Type *Int128Ty = llvm::Type::getIntNTy(*Context, 128); 181 if (AllocType == V2xi64Ty || AllocType == Int128Ty) { 182 return false; 183 } 184 185 return true; 186 } 187 188 // Get the actual value we should use to step through an allocation. 189 // 190 // Normally the value we use to step through an allocation is given to us by 191 // the driver. However, for certain primitive data types, we can derive an 192 // integer constant for the step value. We use this integer constant whenever 193 // possible to allow further compiler optimizations to take place. 194 // 195 // DL - Target Data size/layout information. 196 // T - Type of allocation (should be a pointer). 197 // OrigStep - Original step increment (root.expand() input from driver). 198 llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *AllocType, 199 llvm::Value *OrigStep) { 200 bccAssert(DL); 201 bccAssert(AllocType); 202 bccAssert(OrigStep); 203 llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType); 204 if (isStepOptSupported(AllocType)) { 205 llvm::Type *ET = PT->getElementType(); 206 uint64_t ETSize = DL->getTypeAllocSize(ET); 207 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context); 208 return llvm::ConstantInt::get(Int32Ty, ETSize); 209 } else { 210 return OrigStep; 211 } 212 } 213 214 /// Builds the types required by the pass for the given context. 215 void buildTypes(void) { 216 // Create the RsLaunchDimensionsTy and RsExpandKernelDriverInfoPfxTy structs. 217 218 llvm::Type *Int8Ty = llvm::Type::getInt8Ty(*Context); 219 llvm::Type *Int8PtrTy = Int8Ty->getPointerTo(); 220 llvm::Type *Int8PtrArrayInputLimitTy = llvm::ArrayType::get(Int8PtrTy, RS_KERNEL_INPUT_LIMIT); 221 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context); 222 llvm::Type *Int32ArrayInputLimitTy = llvm::ArrayType::get(Int32Ty, RS_KERNEL_INPUT_LIMIT); 223 llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context); 224 llvm::Type *Int32Array4Ty = llvm::ArrayType::get(Int32Ty, 4); 225 226 /* Defined in frameworks/base/libs/rs/cpu_ref/rsCpuCore.h: 227 * 228 * struct RsLaunchDimensions { 229 * uint32_t x; 230 * uint32_t y; 231 * uint32_t z; 232 * uint32_t lod; 233 * uint32_t face; 234 * uint32_t array[4]; 235 * }; 236 */ 237 llvm::SmallVector<llvm::Type*, RsLaunchDimensionsFieldCount> RsLaunchDimensionsTypes; 238 RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t x 239 RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t y 240 RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t z 241 RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t lod 242 RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t face 243 RsLaunchDimensionsTypes.push_back(Int32Array4Ty); // uint32_t array[4] 244 llvm::StructType *RsLaunchDimensionsTy = 245 llvm::StructType::create(RsLaunchDimensionsTypes, "RsLaunchDimensions"); 246 247 /* Defined as the beginning of RsExpandKernelDriverInfo in frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h: 248 * 249 * struct RsExpandKernelDriverInfoPfx { 250 * const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT]; 251 * uint32_t inStride[RS_KERNEL_INPUT_LIMIT]; 252 * uint32_t inLen; 253 * 254 * uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT]; 255 * uint32_t outStride[RS_KERNEL_INPUT_LIMIT]; 256 * uint32_t outLen; 257 * 258 * // Dimension of the launch 259 * RsLaunchDimensions dim; 260 * 261 * // The walking iterator of the launch 262 * RsLaunchDimensions current; 263 * 264 * const void *usr; 265 * uint32_t usrLen; 266 * 267 * // Items below this line are not used by the compiler and can be change in the driver. 268 * // So the compiler must assume there are an unknown number of fields of unknown type 269 * // beginning here. 270 * }; 271 * 272 * The name "RsExpandKernelDriverInfoPfx" is known to RSInvariantPass (RSInvariant.cpp). 273 */ 274 llvm::SmallVector<llvm::Type*, RsExpandKernelDriverInfoPfxFieldCount> RsExpandKernelDriverInfoPfxTypes; 275 RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT] 276 RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy); // uint32_t inStride[RS_KERNEL_INPUT_LIMIT] 277 RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty); // uint32_t inLen 278 RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT] 279 RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy); // uint32_t outStride[RS_KERNEL_INPUT_LIMIT] 280 RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty); // uint32_t outLen 281 RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy); // RsLaunchDimensions dim 282 RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy); // RsLaunchDimensions current 283 RsExpandKernelDriverInfoPfxTypes.push_back(VoidPtrTy); // const void *usr 284 RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty); // uint32_t usrLen 285 llvm::StructType *RsExpandKernelDriverInfoPfxTy = 286 llvm::StructType::create(RsExpandKernelDriverInfoPfxTypes, "RsExpandKernelDriverInfoPfx"); 287 288 // Create the function type for expanded kernels. 289 290 llvm::Type *RsExpandKernelDriverInfoPfxPtrTy = RsExpandKernelDriverInfoPfxTy->getPointerTo(); 291 292 llvm::SmallVector<llvm::Type*, 8> ParamTypes; 293 ParamTypes.push_back(RsExpandKernelDriverInfoPfxPtrTy); // const RsExpandKernelDriverInfoPfx *p 294 ParamTypes.push_back(Int32Ty); // uint32_t x1 295 ParamTypes.push_back(Int32Ty); // uint32_t x2 296 ParamTypes.push_back(Int32Ty); // uint32_t outstep 297 298 ExpandedFunctionType = 299 llvm::FunctionType::get(llvm::Type::getVoidTy(*Context), ParamTypes, 300 false); 301 } 302 303 /// @brief Create skeleton of the expanded function. 304 /// 305 /// This creates a function with the following signature: 306 /// 307 /// void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2, 308 /// uint32_t outstep) 309 /// 310 llvm::Function *createEmptyExpandedFunction(llvm::StringRef OldName) { 311 llvm::Function *ExpandedFunction = 312 llvm::Function::Create(ExpandedFunctionType, 313 llvm::GlobalValue::ExternalLinkage, 314 OldName + ".expand", Module); 315 316 bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS); 317 318 llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin(); 319 320 (AI++)->setName("p"); 321 (AI++)->setName("x1"); 322 (AI++)->setName("x2"); 323 (AI++)->setName("arg_outstep"); 324 325 llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin", 326 ExpandedFunction); 327 llvm::IRBuilder<> Builder(Begin); 328 Builder.CreateRetVoid(); 329 330 return ExpandedFunction; 331 } 332 333 /// @brief Create an empty loop 334 /// 335 /// Create a loop of the form: 336 /// 337 /// for (i = LowerBound; i < UpperBound; i++) 338 /// ; 339 /// 340 /// After the loop has been created, the builder is set such that 341 /// instructions can be added to the loop body. 342 /// 343 /// @param Builder The builder to use to build this loop. The current 344 /// position of the builder is the position the loop 345 /// will be inserted. 346 /// @param LowerBound The first value of the loop iterator 347 /// @param UpperBound The maximal value of the loop iterator 348 /// @param LoopIV A reference that will be set to the loop iterator. 349 /// @return The BasicBlock that will be executed after the loop. 350 llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder, 351 llvm::Value *LowerBound, 352 llvm::Value *UpperBound, 353 llvm::PHINode **LoopIV) { 354 assert(LowerBound->getType() == UpperBound->getType()); 355 356 llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB; 357 llvm::Value *Cond, *IVNext; 358 llvm::PHINode *IV; 359 360 CondBB = Builder.GetInsertBlock(); 361 AfterBB = llvm::SplitBlock(CondBB, Builder.GetInsertPoint(), nullptr, nullptr); 362 HeaderBB = llvm::BasicBlock::Create(*Context, "Loop", CondBB->getParent()); 363 364 // if (LowerBound < Upperbound) 365 // goto LoopHeader 366 // else 367 // goto AfterBB 368 CondBB->getTerminator()->eraseFromParent(); 369 Builder.SetInsertPoint(CondBB); 370 Cond = Builder.CreateICmpULT(LowerBound, UpperBound); 371 Builder.CreateCondBr(Cond, HeaderBB, AfterBB); 372 373 // iv = PHI [CondBB -> LowerBound], [LoopHeader -> NextIV ] 374 // iv.next = iv + 1 375 // if (iv.next < Upperbound) 376 // goto LoopHeader 377 // else 378 // goto AfterBB 379 Builder.SetInsertPoint(HeaderBB); 380 IV = Builder.CreatePHI(LowerBound->getType(), 2, "X"); 381 IV->addIncoming(LowerBound, CondBB); 382 IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1)); 383 IV->addIncoming(IVNext, HeaderBB); 384 Cond = Builder.CreateICmpULT(IVNext, UpperBound); 385 Builder.CreateCondBr(Cond, HeaderBB, AfterBB); 386 AfterBB->setName("Exit"); 387 Builder.SetInsertPoint(HeaderBB->getFirstNonPHI()); 388 *LoopIV = IV; 389 return AfterBB; 390 } 391 392public: 393 RSForEachExpandPass(bool pEnableStepOpt = true) 394 : ModulePass(ID), Module(nullptr), Context(nullptr), 395 mEnableStepOpt(pEnableStepOpt) { 396 397 } 398 399 virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override { 400 // This pass does not use any other analysis passes, but it does 401 // add/wrap the existing functions in the module (thus altering the CFG). 402 } 403 404 // Build contribution to outgoing argument list for calling a 405 // ForEach-able function, based on the special parameters of that 406 // function. 407 // 408 // Signature - metadata bits for the signature of the ForEach-able function 409 // X, Arg_p - values derived directly from expanded function, 410 // suitable for computing arguments for the ForEach-able function 411 // CalleeArgs - contribution is accumulated here 412 // Bump - invoked once for each contributed outgoing argument 413 void ExpandSpecialArguments(uint32_t Signature, 414 llvm::Value *X, 415 llvm::Value *Arg_p, 416 llvm::IRBuilder<> &Builder, 417 llvm::SmallVector<llvm::Value*, 8> &CalleeArgs, 418 std::function<void ()> Bump) { 419 420 if (bcinfo::MetadataExtractor::hasForEachSignatureCtxt(Signature)) { 421 CalleeArgs.push_back(Arg_p); 422 Bump(); 423 } 424 425 if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) { 426 CalleeArgs.push_back(X); 427 Bump(); 428 } 429 430 if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature) || 431 bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) { 432 433 llvm::Value *Current = Builder.CreateStructGEP(Arg_p, RsExpandKernelDriverInfoPfxFieldCurrent); 434 435 if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) { 436 llvm::Value *Y = Builder.CreateLoad( 437 Builder.CreateStructGEP(Current, RsLaunchDimensionsFieldY), "Y"); 438 CalleeArgs.push_back(Y); 439 Bump(); 440 } 441 442 if (bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) { 443 llvm::Value *Z = Builder.CreateLoad( 444 Builder.CreateStructGEP(Current, RsLaunchDimensionsFieldZ), "Z"); 445 CalleeArgs.push_back(Z); 446 Bump(); 447 } 448 } 449 } 450 451 /* Performs the actual optimization on a selected function. On success, the 452 * Module will contain a new function of the name "<NAME>.expand" that 453 * invokes <NAME>() in a loop with the appropriate parameters. 454 */ 455 bool ExpandFunction(llvm::Function *Function, uint32_t Signature) { 456 ALOGV("Expanding ForEach-able Function %s", 457 Function->getName().str().c_str()); 458 459 if (!Signature) { 460 Signature = getRootSignature(Function); 461 if (!Signature) { 462 // We couldn't determine how to expand this function based on its 463 // function signature. 464 return false; 465 } 466 } 467 468 llvm::DataLayout DL(Module); 469 470 llvm::Function *ExpandedFunction = 471 createEmptyExpandedFunction(Function->getName()); 472 473 /* 474 * Extract the expanded function's parameters. It is guaranteed by 475 * createEmptyExpandedFunction that there will be five parameters. 476 */ 477 478 bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS); 479 480 llvm::Function::arg_iterator ExpandedFunctionArgIter = 481 ExpandedFunction->arg_begin(); 482 483 llvm::Value *Arg_p = &*(ExpandedFunctionArgIter++); 484 llvm::Value *Arg_x1 = &*(ExpandedFunctionArgIter++); 485 llvm::Value *Arg_x2 = &*(ExpandedFunctionArgIter++); 486 llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter); 487 488 llvm::Value *InStep = nullptr; 489 llvm::Value *OutStep = nullptr; 490 491 // Construct the actual function body. 492 llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin()); 493 494 // Collect and construct the arguments for the kernel(). 495 // Note that we load any loop-invariant arguments before entering the Loop. 496 llvm::Function::arg_iterator FunctionArgIter = Function->arg_begin(); 497 498 llvm::Type *InTy = nullptr; 499 llvm::Value *InBasePtr = nullptr; 500 if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) { 501 llvm::Value *InsBasePtr = Builder.CreateStructGEP(Arg_p, RsExpandKernelDriverInfoPfxFieldInPtr, "inputs_base"); 502 503 llvm::Value *InStepsBase = Builder.CreateStructGEP(Arg_p, RsExpandKernelDriverInfoPfxFieldInStride, "insteps_base"); 504 505 llvm::Value *InStepAddr = Builder.CreateConstInBoundsGEP2_32(InStepsBase, 0, 0); 506 llvm::LoadInst *InStepArg = Builder.CreateLoad(InStepAddr, 507 "instep_addr"); 508 509 InTy = (FunctionArgIter++)->getType(); 510 InStep = getStepValue(&DL, InTy, InStepArg); 511 512 InStep->setName("instep"); 513 514 llvm::Value *InputAddr = Builder.CreateConstInBoundsGEP2_32(InsBasePtr, 0, 0); 515 InBasePtr = Builder.CreateLoad(InputAddr, "input_base"); 516 } 517 518 llvm::Type *OutTy = nullptr; 519 llvm::Value *OutBasePtr = nullptr; 520 if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) { 521 OutTy = (FunctionArgIter++)->getType(); 522 OutStep = getStepValue(&DL, OutTy, Arg_outstep); 523 OutStep->setName("outstep"); 524 OutBasePtr = Builder.CreateLoad( 525 Builder.CreateConstInBoundsGEP2_32( 526 Builder.CreateStructGEP(Arg_p, RsExpandKernelDriverInfoPfxFieldOutPtr), 0, 0)); 527 } 528 529 llvm::Value *UsrData = nullptr; 530 if (bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)) { 531 llvm::Type *UsrDataTy = (FunctionArgIter++)->getType(); 532 UsrData = Builder.CreatePointerCast(Builder.CreateLoad( 533 Builder.CreateStructGEP(Arg_p, RsExpandKernelDriverInfoPfxFieldUsr)), UsrDataTy); 534 UsrData->setName("UsrData"); 535 } 536 537 llvm::PHINode *IV; 538 createLoop(Builder, Arg_x1, Arg_x2, &IV); 539 540 llvm::SmallVector<llvm::Value*, 8> CalleeArgs; 541 ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs, 542 [&FunctionArgIter]() { FunctionArgIter++; }); 543 544 bccAssert(FunctionArgIter == Function->arg_end()); 545 546 // Populate the actual call to kernel(). 547 llvm::SmallVector<llvm::Value*, 8> RootArgs; 548 549 llvm::Value *InPtr = nullptr; 550 llvm::Value *OutPtr = nullptr; 551 552 // Calculate the current input and output pointers 553 // 554 // We always calculate the input/output pointers with a GEP operating on i8 555 // values and only cast at the very end to OutTy. This is because the step 556 // between two values is given in bytes. 557 // 558 // TODO: We could further optimize the output by using a GEP operation of 559 // type 'OutTy' in cases where the element type of the allocation allows. 560 if (OutBasePtr) { 561 llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1); 562 OutOffset = Builder.CreateMul(OutOffset, OutStep); 563 OutPtr = Builder.CreateGEP(OutBasePtr, OutOffset); 564 OutPtr = Builder.CreatePointerCast(OutPtr, OutTy); 565 } 566 567 if (InBasePtr) { 568 llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1); 569 InOffset = Builder.CreateMul(InOffset, InStep); 570 InPtr = Builder.CreateGEP(InBasePtr, InOffset); 571 InPtr = Builder.CreatePointerCast(InPtr, InTy); 572 } 573 574 if (InPtr) { 575 RootArgs.push_back(InPtr); 576 } 577 578 if (OutPtr) { 579 RootArgs.push_back(OutPtr); 580 } 581 582 if (UsrData) { 583 RootArgs.push_back(UsrData); 584 } 585 586 RootArgs.append(CalleeArgs.begin(), CalleeArgs.end()); 587 588 Builder.CreateCall(Function, RootArgs); 589 590 return true; 591 } 592 593 /* Expand a pass-by-value kernel. 594 */ 595 bool ExpandKernel(llvm::Function *Function, uint32_t Signature) { 596 bccAssert(bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)); 597 ALOGV("Expanding kernel Function %s", Function->getName().str().c_str()); 598 599 // TODO: Refactor this to share functionality with ExpandFunction. 600 llvm::DataLayout DL(Module); 601 602 llvm::Function *ExpandedFunction = 603 createEmptyExpandedFunction(Function->getName()); 604 605 /* 606 * Extract the expanded function's parameters. It is guaranteed by 607 * createEmptyExpandedFunction that there will be five parameters. 608 */ 609 610 bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS); 611 612 llvm::Function::arg_iterator ExpandedFunctionArgIter = 613 ExpandedFunction->arg_begin(); 614 615 llvm::Value *Arg_p = &*(ExpandedFunctionArgIter++); 616 llvm::Value *Arg_x1 = &*(ExpandedFunctionArgIter++); 617 llvm::Value *Arg_x2 = &*(ExpandedFunctionArgIter++); 618 llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter); 619 620 // Construct the actual function body. 621 llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin()); 622 623 // Create TBAA meta-data. 624 llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript, 625 *TBAAAllocation, *TBAAPointer; 626 llvm::MDBuilder MDHelper(*Context); 627 628 TBAARenderScriptDistinct = 629 MDHelper.createTBAARoot("RenderScript Distinct TBAA"); 630 TBAARenderScript = MDHelper.createTBAANode("RenderScript TBAA", 631 TBAARenderScriptDistinct); 632 TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation", 633 TBAARenderScript); 634 TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation, 635 TBAAAllocation, 0); 636 TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer", 637 TBAARenderScript); 638 TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0); 639 640 llvm::MDNode *AliasingDomain, *AliasingScope; 641 AliasingDomain = MDHelper.createAnonymousAliasScopeDomain("RS argument scope domain"); 642 AliasingScope = MDHelper.createAnonymousAliasScope(AliasingDomain, "RS argument scope"); 643 644 /* 645 * Collect and construct the arguments for the kernel(). 646 * 647 * Note that we load any loop-invariant arguments before entering the Loop. 648 */ 649 size_t NumInputs = Function->arg_size(); 650 651 // No usrData parameter on kernels. 652 bccAssert( 653 !bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)); 654 655 llvm::Function::arg_iterator ArgIter = Function->arg_begin(); 656 657 // Check the return type 658 llvm::Type *OutTy = nullptr; 659 llvm::Value *OutStep = nullptr; 660 llvm::LoadInst *OutBasePtr = nullptr; 661 llvm::Value *CastedOutBasePtr = nullptr; 662 663 bool PassOutByPointer = false; 664 665 if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) { 666 llvm::Type *OutBaseTy = Function->getReturnType(); 667 668 if (OutBaseTy->isVoidTy()) { 669 PassOutByPointer = true; 670 OutTy = ArgIter->getType(); 671 672 ArgIter++; 673 --NumInputs; 674 } else { 675 // We don't increment Args, since we are using the actual return type. 676 OutTy = OutBaseTy->getPointerTo(); 677 } 678 679 OutStep = getStepValue(&DL, OutTy, Arg_outstep); 680 OutStep->setName("outstep"); 681 OutBasePtr = Builder.CreateLoad( 682 Builder.CreateConstInBoundsGEP2_32( 683 Builder.CreateStructGEP(Arg_p, RsExpandKernelDriverInfoPfxFieldOutPtr), 0, 0)); 684 685 if (gEnableRsTbaa) { 686 OutBasePtr->setMetadata("tbaa", TBAAPointer); 687 } 688 689 OutBasePtr->setMetadata("alias.scope", AliasingScope); 690 691 CastedOutBasePtr = Builder.CreatePointerCast(OutBasePtr, OutTy, "casted_out"); 692 } 693 694 llvm::PHINode *IV; 695 createLoop(Builder, Arg_x1, Arg_x2, &IV); 696 697 llvm::SmallVector<llvm::Value*, 8> CalleeArgs; 698 ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs, 699 [&NumInputs]() { --NumInputs; }); 700 701 llvm::SmallVector<llvm::Type*, 8> InTypes; 702 llvm::SmallVector<llvm::Value*, 8> InSteps; 703 llvm::SmallVector<llvm::Value*, 8> InBasePtrs; 704 llvm::SmallVector<bool, 8> InIsStructPointer; 705 706 bccAssert(NumInputs <= RS_KERNEL_INPUT_LIMIT); 707 708 if (NumInputs > 0) { 709 llvm::Value *InsBasePtr = Builder.CreateStructGEP(Arg_p, RsExpandKernelDriverInfoPfxFieldInPtr, "inputs_base"); 710 711 llvm::Value *InStepsBase = Builder.CreateStructGEP(Arg_p, RsExpandKernelDriverInfoPfxFieldInStride, "insteps_base"); 712 713 for (size_t InputIndex = 0; InputIndex < NumInputs; 714 ++InputIndex, ArgIter++) { 715 716 llvm::Value *InStepAddr = Builder.CreateConstInBoundsGEP2_32(InStepsBase, 0, InputIndex); 717 llvm::LoadInst *InStepArg = Builder.CreateLoad(InStepAddr, 718 "instep_addr"); 719 720 llvm::Type *InType = ArgIter->getType(); 721 722 /* 723 * AArch64 calling dictate that structs of sufficient size get passed by 724 * pointer instead of passed by value. This, combined with the fact 725 * that we don't allow kernels to operate on pointer data means that if 726 * we see a kernel with a pointer parameter we know that it is struct 727 * input that has been promoted. As such we don't need to convert its 728 * type to a pointer. Later we will need to know to avoid a load, so we 729 * save this information in InIsStructPointer. 730 */ 731 if (!InType->isPointerTy()) { 732 InType = InType->getPointerTo(); 733 InIsStructPointer.push_back(false); 734 } else { 735 InIsStructPointer.push_back(true); 736 } 737 738 llvm::Value *InStep = getStepValue(&DL, InType, InStepArg); 739 740 InStep->setName("instep"); 741 742 llvm::Value *InputAddr = Builder.CreateConstInBoundsGEP2_32(InsBasePtr, 0, InputIndex); 743 llvm::LoadInst *InBasePtr = Builder.CreateLoad(InputAddr, 744 "input_base"); 745 llvm::Value *CastInBasePtr = Builder.CreatePointerCast(InBasePtr, 746 InType, "casted_in"); 747 if (gEnableRsTbaa) { 748 InBasePtr->setMetadata("tbaa", TBAAPointer); 749 } 750 751 InBasePtr->setMetadata("alias.scope", AliasingScope); 752 753 InTypes.push_back(InType); 754 InSteps.push_back(InStep); 755 InBasePtrs.push_back(CastInBasePtr); 756 } 757 } 758 759 // Populate the actual call to kernel(). 760 llvm::SmallVector<llvm::Value*, 8> RootArgs; 761 762 // Calculate the current input and output pointers 763 // 764 // 765 // We always calculate the input/output pointers with a GEP operating on i8 766 // values combined with a multiplication and only cast at the very end to 767 // OutTy. This is to account for dynamic stepping sizes when the value 768 // isn't apparent at compile time. In the (very common) case when we know 769 // the step size at compile time, due to haveing complete type information 770 // this multiplication will optmized out and produces code equivalent to a 771 // a GEP on a pointer of the correct type. 772 773 // Output 774 775 llvm::Value *OutPtr = nullptr; 776 if (CastedOutBasePtr) { 777 llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1); 778 779 OutPtr = Builder.CreateGEP(CastedOutBasePtr, OutOffset); 780 781 if (PassOutByPointer) { 782 RootArgs.push_back(OutPtr); 783 } 784 } 785 786 // Inputs 787 788 if (NumInputs > 0) { 789 llvm::Value *Offset = Builder.CreateSub(IV, Arg_x1); 790 791 for (size_t Index = 0; Index < NumInputs; ++Index) { 792 llvm::Value *InPtr = Builder.CreateGEP(InBasePtrs[Index], Offset); 793 llvm::Value *Input; 794 795 if (InIsStructPointer[Index]) { 796 Input = InPtr; 797 798 } else { 799 llvm::LoadInst *InputLoad = Builder.CreateLoad(InPtr, "input"); 800 801 if (gEnableRsTbaa) { 802 InputLoad->setMetadata("tbaa", TBAAAllocation); 803 } 804 805 InputLoad->setMetadata("alias.scope", AliasingScope); 806 807 Input = InputLoad; 808 } 809 810 RootArgs.push_back(Input); 811 } 812 } 813 814 RootArgs.append(CalleeArgs.begin(), CalleeArgs.end()); 815 816 llvm::Value *RetVal = Builder.CreateCall(Function, RootArgs); 817 818 if (OutPtr && !PassOutByPointer) { 819 llvm::StoreInst *Store = Builder.CreateStore(RetVal, OutPtr); 820 if (gEnableRsTbaa) { 821 Store->setMetadata("tbaa", TBAAAllocation); 822 } 823 Store->setMetadata("alias.scope", AliasingScope); 824 } 825 826 return true; 827 } 828 829 /// @brief Checks if pointers to allocation internals are exposed 830 /// 831 /// This function verifies if through the parameters passed to the kernel 832 /// or through calls to the runtime library the script gains access to 833 /// pointers pointing to data within a RenderScript Allocation. 834 /// If we know we control all loads from and stores to data within 835 /// RenderScript allocations and if we know the run-time internal accesses 836 /// are all annotated with RenderScript TBAA metadata, only then we 837 /// can safely use TBAA to distinguish between generic and from-allocation 838 /// pointers. 839 bool allocPointersExposed(llvm::Module &Module) { 840 // Old style kernel function can expose pointers to elements within 841 // allocations. 842 // TODO: Extend analysis to allow simple cases of old-style kernels. 843 for (size_t i = 0; i < mExportForEachCount; ++i) { 844 const char *Name = mExportForEachNameList[i]; 845 uint32_t Signature = mExportForEachSignatureList[i]; 846 if (Module.getFunction(Name) && 847 !bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)) { 848 return true; 849 } 850 } 851 852 // Check for library functions that expose a pointer to an Allocation or 853 // that are not yet annotated with RenderScript-specific tbaa information. 854 static std::vector<std::string> Funcs; 855 856 // rsGetElementAt(...) 857 Funcs.push_back("_Z14rsGetElementAt13rs_allocationj"); 858 Funcs.push_back("_Z14rsGetElementAt13rs_allocationjj"); 859 Funcs.push_back("_Z14rsGetElementAt13rs_allocationjjj"); 860 // rsSetElementAt() 861 Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvj"); 862 Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjj"); 863 Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjjj"); 864 // rsGetElementAtYuv_uchar_Y() 865 Funcs.push_back("_Z25rsGetElementAtYuv_uchar_Y13rs_allocationjj"); 866 // rsGetElementAtYuv_uchar_U() 867 Funcs.push_back("_Z25rsGetElementAtYuv_uchar_U13rs_allocationjj"); 868 // rsGetElementAtYuv_uchar_V() 869 Funcs.push_back("_Z25rsGetElementAtYuv_uchar_V13rs_allocationjj"); 870 871 for (std::vector<std::string>::iterator FI = Funcs.begin(), 872 FE = Funcs.end(); 873 FI != FE; ++FI) { 874 llvm::Function *Function = Module.getFunction(*FI); 875 876 if (!Function) { 877 ALOGE("Missing run-time function '%s'", FI->c_str()); 878 return true; 879 } 880 881 if (Function->getNumUses() > 0) { 882 return true; 883 } 884 } 885 886 return false; 887 } 888 889 /// @brief Connect RenderScript TBAA metadata to C/C++ metadata 890 /// 891 /// The TBAA metadata used to annotate loads/stores from RenderScript 892 /// Allocations is generated in a separate TBAA tree with a 893 /// "RenderScript Distinct TBAA" root node. LLVM does assume may-alias for 894 /// all nodes in unrelated alias analysis trees. This function makes the 895 /// "RenderScript TBAA" node (which is parented by the Distinct TBAA root), 896 /// a subtree of the normal C/C++ TBAA tree aside of normal C/C++ types. With 897 /// the connected trees every access to an Allocation is resolved to 898 /// must-alias if compared to a normal C/C++ access. 899 void connectRenderScriptTBAAMetadata(llvm::Module &Module) { 900 llvm::MDBuilder MDHelper(*Context); 901 llvm::MDNode *TBAARenderScriptDistinct = 902 MDHelper.createTBAARoot("RenderScript Distinct TBAA"); 903 llvm::MDNode *TBAARenderScript = MDHelper.createTBAANode( 904 "RenderScript TBAA", TBAARenderScriptDistinct); 905 llvm::MDNode *TBAARoot = MDHelper.createTBAARoot("Simple C/C++ TBAA"); 906 TBAARenderScript->replaceOperandWith(1, TBAARoot); 907 } 908 909 virtual bool runOnModule(llvm::Module &Module) { 910 bool Changed = false; 911 this->Module = &Module; 912 this->Context = &Module.getContext(); 913 914 this->buildTypes(); 915 916 bcinfo::MetadataExtractor me(&Module); 917 if (!me.extract()) { 918 ALOGE("Could not extract metadata from module!"); 919 return false; 920 } 921 mExportForEachCount = me.getExportForEachSignatureCount(); 922 mExportForEachNameList = me.getExportForEachNameList(); 923 mExportForEachSignatureList = me.getExportForEachSignatureList(); 924 925 bool AllocsExposed = allocPointersExposed(Module); 926 927 for (size_t i = 0; i < mExportForEachCount; ++i) { 928 const char *name = mExportForEachNameList[i]; 929 uint32_t signature = mExportForEachSignatureList[i]; 930 llvm::Function *kernel = Module.getFunction(name); 931 if (kernel) { 932 if (bcinfo::MetadataExtractor::hasForEachSignatureKernel(signature)) { 933 Changed |= ExpandKernel(kernel, signature); 934 kernel->setLinkage(llvm::GlobalValue::InternalLinkage); 935 } else if (kernel->getReturnType()->isVoidTy()) { 936 Changed |= ExpandFunction(kernel, signature); 937 kernel->setLinkage(llvm::GlobalValue::InternalLinkage); 938 } else { 939 // There are some graphics root functions that are not 940 // expanded, but that will be called directly. For those 941 // functions, we can not set the linkage to internal. 942 } 943 } 944 } 945 946 if (gEnableRsTbaa && !AllocsExposed) { 947 connectRenderScriptTBAAMetadata(Module); 948 } 949 950 return Changed; 951 } 952 953 virtual const char *getPassName() const { 954 return "ForEach-able Function Expansion"; 955 } 956 957}; // end RSForEachExpandPass 958 959} // end anonymous namespace 960 961char RSForEachExpandPass::ID = 0; 962static llvm::RegisterPass<RSForEachExpandPass> X("foreachexp", "ForEach Expand Pass"); 963 964namespace bcc { 965 966llvm::ModulePass * 967createRSForEachExpandPass(bool pEnableStepOpt){ 968 return new RSForEachExpandPass(pEnableStepOpt); 969} 970 971} // end namespace bcc 972