RSForEachExpand.cpp revision 354d1c132ad7e1ff6fdb0da95443245848a0601f
1/* 2 * Copyright 2012, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "bcc/Assert.h" 18#include "bcc/Renderscript/RSTransforms.h" 19 20#include <cstdlib> 21#include <functional> 22 23#include <llvm/IR/DerivedTypes.h> 24#include <llvm/IR/Function.h> 25#include <llvm/IR/Instructions.h> 26#include <llvm/IR/IRBuilder.h> 27#include <llvm/IR/MDBuilder.h> 28#include <llvm/IR/Module.h> 29#include <llvm/Pass.h> 30#include <llvm/Support/raw_ostream.h> 31#include <llvm/IR/DataLayout.h> 32#include <llvm/IR/Function.h> 33#include <llvm/IR/Type.h> 34#include <llvm/Transforms/Utils/BasicBlockUtils.h> 35 36#include "bcc/Config/Config.h" 37#include "bcc/Support/Log.h" 38 39#include "bcinfo/MetadataExtractor.h" 40 41#define NUM_EXPANDED_FUNCTION_PARAMS 4 42 43using namespace bcc; 44 45namespace { 46 47static const bool gEnableRsTbaa = true; 48 49/* RSForEachExpandPass - This pass operates on functions that are able to be 50 * called via rsForEach() or "foreach_<NAME>". We create an inner loop for the 51 * ForEach-able function to be invoked over the appropriate data cells of the 52 * input/output allocations (adjusting other relevant parameters as we go). We 53 * support doing this for any ForEach-able compute kernels. The new function 54 * name is the original function name followed by ".expand". Note that we 55 * still generate code for the original function. 56 */ 57class RSForEachExpandPass : public llvm::ModulePass { 58public: 59 static char ID; 60 61private: 62 static const size_t RS_KERNEL_INPUT_LIMIT = 8; // see frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h 63 64 enum RsLaunchDimensionsField { 65 RsLaunchDimensionsFieldX, 66 RsLaunchDimensionsFieldY, 67 RsLaunchDimensionsFieldZ, 68 RsLaunchDimensionsFieldLod, 69 RsLaunchDimensionsFieldFace, 70 RsLaunchDimensionsFieldArray, 71 72 RsLaunchDimensionsFieldCount 73 }; 74 75 enum RsExpandKernelDriverInfoPfxField { 76 RsExpandKernelDriverInfoPfxFieldInPtr, 77 RsExpandKernelDriverInfoPfxFieldInStride, 78 RsExpandKernelDriverInfoPfxFieldInLen, 79 RsExpandKernelDriverInfoPfxFieldOutPtr, 80 RsExpandKernelDriverInfoPfxFieldOutStride, 81 RsExpandKernelDriverInfoPfxFieldOutLen, 82 RsExpandKernelDriverInfoPfxFieldDim, 83 RsExpandKernelDriverInfoPfxFieldCurrent, 84 RsExpandKernelDriverInfoPfxFieldUsr, 85 RsExpandKernelDriverInfoPfxFieldUsLenr, 86 87 RsExpandKernelDriverInfoPfxFieldCount 88 }; 89 90 llvm::Module *Module; 91 llvm::LLVMContext *Context; 92 93 /* 94 * Pointer to LLVM type information for the the function signature 95 * for expanded kernels. This must be re-calculated for each 96 * module the pass is run on. 97 */ 98 llvm::FunctionType *ExpandedFunctionType; 99 100 uint32_t mExportForEachCount; 101 const char **mExportForEachNameList; 102 const uint32_t *mExportForEachSignatureList; 103 104 // Turns on optimization of allocation stride values. 105 bool mEnableStepOpt; 106 107 uint32_t getRootSignature(llvm::Function *Function) { 108 const llvm::NamedMDNode *ExportForEachMetadata = 109 Module->getNamedMetadata("#rs_export_foreach"); 110 111 if (!ExportForEachMetadata) { 112 llvm::SmallVector<llvm::Type*, 8> RootArgTys; 113 for (llvm::Function::arg_iterator B = Function->arg_begin(), 114 E = Function->arg_end(); 115 B != E; 116 ++B) { 117 RootArgTys.push_back(B->getType()); 118 } 119 120 // For pre-ICS bitcode, we may not have signature information. In that 121 // case, we use the size of the RootArgTys to select the number of 122 // arguments. 123 return (1 << RootArgTys.size()) - 1; 124 } 125 126 if (ExportForEachMetadata->getNumOperands() == 0) { 127 return 0; 128 } 129 130 bccAssert(ExportForEachMetadata->getNumOperands() > 0); 131 132 // We only handle the case for legacy root() functions here, so this is 133 // hard-coded to look at only the first such function. 134 llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0); 135 if (SigNode != nullptr && SigNode->getNumOperands() == 1) { 136 llvm::Metadata *SigMD = SigNode->getOperand(0); 137 if (llvm::MDString *SigS = llvm::dyn_cast<llvm::MDString>(SigMD)) { 138 llvm::StringRef SigString = SigS->getString(); 139 uint32_t Signature = 0; 140 if (SigString.getAsInteger(10, Signature)) { 141 ALOGE("Non-integer signature value '%s'", SigString.str().c_str()); 142 return 0; 143 } 144 return Signature; 145 } 146 } 147 148 return 0; 149 } 150 151 bool isStepOptSupported(llvm::Type *AllocType) { 152 153 llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType); 154 llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context); 155 156 if (mEnableStepOpt) { 157 return false; 158 } 159 160 if (AllocType == VoidPtrTy) { 161 return false; 162 } 163 164 if (!PT) { 165 return false; 166 } 167 168 // remaining conditions are 64-bit only 169 if (VoidPtrTy->getPrimitiveSizeInBits() == 32) { 170 return true; 171 } 172 173 // coerce suggests an upconverted struct type, which we can't support 174 if (AllocType->getStructName().find("coerce") != llvm::StringRef::npos) { 175 return false; 176 } 177 178 // 2xi64 and i128 suggest an upconverted struct type, which are also unsupported 179 llvm::Type *V2xi64Ty = llvm::VectorType::get(llvm::Type::getInt64Ty(*Context), 2); 180 llvm::Type *Int128Ty = llvm::Type::getIntNTy(*Context, 128); 181 if (AllocType == V2xi64Ty || AllocType == Int128Ty) { 182 return false; 183 } 184 185 return true; 186 } 187 188 // Get the actual value we should use to step through an allocation. 189 // 190 // Normally the value we use to step through an allocation is given to us by 191 // the driver. However, for certain primitive data types, we can derive an 192 // integer constant for the step value. We use this integer constant whenever 193 // possible to allow further compiler optimizations to take place. 194 // 195 // DL - Target Data size/layout information. 196 // T - Type of allocation (should be a pointer). 197 // OrigStep - Original step increment (root.expand() input from driver). 198 llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *AllocType, 199 llvm::Value *OrigStep) { 200 bccAssert(DL); 201 bccAssert(AllocType); 202 bccAssert(OrigStep); 203 llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType); 204 if (isStepOptSupported(AllocType)) { 205 llvm::Type *ET = PT->getElementType(); 206 uint64_t ETSize = DL->getTypeAllocSize(ET); 207 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context); 208 return llvm::ConstantInt::get(Int32Ty, ETSize); 209 } else { 210 return OrigStep; 211 } 212 } 213 214 /// Builds the types required by the pass for the given context. 215 void buildTypes(void) { 216 // Create the RsLaunchDimensionsTy and RsExpandKernelDriverInfoPfxTy structs. 217 218 llvm::Type *Int8Ty = llvm::Type::getInt8Ty(*Context); 219 llvm::Type *Int8PtrTy = Int8Ty->getPointerTo(); 220 llvm::Type *Int8PtrArrayInputLimitTy = llvm::ArrayType::get(Int8PtrTy, RS_KERNEL_INPUT_LIMIT); 221 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context); 222 llvm::Type *Int32ArrayInputLimitTy = llvm::ArrayType::get(Int32Ty, RS_KERNEL_INPUT_LIMIT); 223 llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context); 224 llvm::Type *Int32Array4Ty = llvm::ArrayType::get(Int32Ty, 4); 225 226 /* Defined in frameworks/base/libs/rs/cpu_ref/rsCpuCore.h: 227 * 228 * struct RsLaunchDimensions { 229 * uint32_t x; 230 * uint32_t y; 231 * uint32_t z; 232 * uint32_t lod; 233 * uint32_t face; 234 * uint32_t array[4]; 235 * }; 236 */ 237 llvm::SmallVector<llvm::Type*, RsLaunchDimensionsFieldCount> RsLaunchDimensionsTypes; 238 RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t x 239 RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t y 240 RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t z 241 RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t lod 242 RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t face 243 RsLaunchDimensionsTypes.push_back(Int32Array4Ty); // uint32_t array[4] 244 llvm::StructType *RsLaunchDimensionsTy = 245 llvm::StructType::create(RsLaunchDimensionsTypes, "RsLaunchDimensions"); 246 247 /* Defined as the beginning of RsExpandKernelDriverInfo in frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h: 248 * 249 * struct RsExpandKernelDriverInfoPfx { 250 * const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT]; 251 * uint32_t inStride[RS_KERNEL_INPUT_LIMIT]; 252 * uint32_t inLen; 253 * 254 * uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT]; 255 * uint32_t outStride[RS_KERNEL_INPUT_LIMIT]; 256 * uint32_t outLen; 257 * 258 * // Dimension of the launch 259 * RsLaunchDimensions dim; 260 * 261 * // The walking iterator of the launch 262 * RsLaunchDimensions current; 263 * 264 * const void *usr; 265 * uint32_t usrLen; 266 * 267 * // Items below this line are not used by the compiler and can be change in the driver. 268 * // So the compiler must assume there are an unknown number of fields of unknown type 269 * // beginning here. 270 * }; 271 * 272 * The name "RsExpandKernelDriverInfoPfx" is known to RSInvariantPass (RSInvariant.cpp). 273 */ 274 llvm::SmallVector<llvm::Type*, RsExpandKernelDriverInfoPfxFieldCount> RsExpandKernelDriverInfoPfxTypes; 275 RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT] 276 RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy); // uint32_t inStride[RS_KERNEL_INPUT_LIMIT] 277 RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty); // uint32_t inLen 278 RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT] 279 RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy); // uint32_t outStride[RS_KERNEL_INPUT_LIMIT] 280 RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty); // uint32_t outLen 281 RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy); // RsLaunchDimensions dim 282 RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy); // RsLaunchDimensions current 283 RsExpandKernelDriverInfoPfxTypes.push_back(VoidPtrTy); // const void *usr 284 RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty); // uint32_t usrLen 285 llvm::StructType *RsExpandKernelDriverInfoPfxTy = 286 llvm::StructType::create(RsExpandKernelDriverInfoPfxTypes, "RsExpandKernelDriverInfoPfx"); 287 288 // Create the function type for expanded kernels. 289 290 llvm::Type *RsExpandKernelDriverInfoPfxPtrTy = RsExpandKernelDriverInfoPfxTy->getPointerTo(); 291 292 llvm::SmallVector<llvm::Type*, 8> ParamTypes; 293 ParamTypes.push_back(RsExpandKernelDriverInfoPfxPtrTy); // const RsExpandKernelDriverInfoPfx *p 294 ParamTypes.push_back(Int32Ty); // uint32_t x1 295 ParamTypes.push_back(Int32Ty); // uint32_t x2 296 ParamTypes.push_back(Int32Ty); // uint32_t outstep 297 298 ExpandedFunctionType = 299 llvm::FunctionType::get(llvm::Type::getVoidTy(*Context), ParamTypes, 300 false); 301 } 302 303 /// @brief Create skeleton of the expanded function. 304 /// 305 /// This creates a function with the following signature: 306 /// 307 /// void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2, 308 /// uint32_t outstep) 309 /// 310 llvm::Function *createEmptyExpandedFunction(llvm::StringRef OldName) { 311 llvm::Function *ExpandedFunction = 312 llvm::Function::Create(ExpandedFunctionType, 313 llvm::GlobalValue::ExternalLinkage, 314 OldName + ".expand", Module); 315 316 bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS); 317 318 llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin(); 319 320 (AI++)->setName("p"); 321 (AI++)->setName("x1"); 322 (AI++)->setName("x2"); 323 (AI++)->setName("arg_outstep"); 324 325 llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin", 326 ExpandedFunction); 327 llvm::IRBuilder<> Builder(Begin); 328 Builder.CreateRetVoid(); 329 330 return ExpandedFunction; 331 } 332 333 /// @brief Create an empty loop 334 /// 335 /// Create a loop of the form: 336 /// 337 /// for (i = LowerBound; i < UpperBound; i++) 338 /// ; 339 /// 340 /// After the loop has been created, the builder is set such that 341 /// instructions can be added to the loop body. 342 /// 343 /// @param Builder The builder to use to build this loop. The current 344 /// position of the builder is the position the loop 345 /// will be inserted. 346 /// @param LowerBound The first value of the loop iterator 347 /// @param UpperBound The maximal value of the loop iterator 348 /// @param LoopIV A reference that will be set to the loop iterator. 349 /// @return The BasicBlock that will be executed after the loop. 350 llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder, 351 llvm::Value *LowerBound, 352 llvm::Value *UpperBound, 353 llvm::PHINode **LoopIV) { 354 assert(LowerBound->getType() == UpperBound->getType()); 355 356 llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB; 357 llvm::Value *Cond, *IVNext; 358 llvm::PHINode *IV; 359 360 CondBB = Builder.GetInsertBlock(); 361 // DT = &getAnalysis<DominatorTree>(); 362 // LI = &getAnalysis<LoopInfo>(); 363 AfterBB = llvm::SplitBlock(CondBB, Builder.GetInsertPoint(), nullptr, nullptr); 364 HeaderBB = llvm::BasicBlock::Create(*Context, "Loop", CondBB->getParent()); 365 366 // if (LowerBound < Upperbound) 367 // goto LoopHeader 368 // else 369 // goto AfterBB 370 CondBB->getTerminator()->eraseFromParent(); 371 Builder.SetInsertPoint(CondBB); 372 Cond = Builder.CreateICmpULT(LowerBound, UpperBound); 373 Builder.CreateCondBr(Cond, HeaderBB, AfterBB); 374 375 // iv = PHI [CondBB -> LowerBound], [LoopHeader -> NextIV ] 376 // iv.next = iv + 1 377 // if (iv.next < Upperbound) 378 // goto LoopHeader 379 // else 380 // goto AfterBB 381 Builder.SetInsertPoint(HeaderBB); 382 IV = Builder.CreatePHI(LowerBound->getType(), 2, "X"); 383 IV->addIncoming(LowerBound, CondBB); 384 IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1)); 385 IV->addIncoming(IVNext, HeaderBB); 386 Cond = Builder.CreateICmpULT(IVNext, UpperBound); 387 Builder.CreateCondBr(Cond, HeaderBB, AfterBB); 388 AfterBB->setName("Exit"); 389 Builder.SetInsertPoint(HeaderBB->getFirstNonPHI()); 390 *LoopIV = IV; 391 return AfterBB; 392 } 393 394public: 395 RSForEachExpandPass(bool pEnableStepOpt = true) 396 : ModulePass(ID), Module(nullptr), Context(nullptr), 397 mEnableStepOpt(pEnableStepOpt) { 398 399 } 400 401 virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override { 402 // This pass does not use any other analysis passes, but it does 403 // add/wrap the existing functions in the module (thus altering the CFG). 404 } 405 406 // Build contribution to outgoing argument list for calling a 407 // ForEach-able function, based on the special parameters of that 408 // function. 409 // 410 // Signature - metadata bits for the signature of the ForEach-able function 411 // X, Arg_p - values derived directly from expanded function, 412 // suitable for computing arguments for the ForEach-able function 413 // CalleeArgs - contribution is accumulated here 414 // Bump - invoked once for each contributed outgoing argument 415 void ExpandSpecialArguments(uint32_t Signature, 416 llvm::Value *X, 417 llvm::Value *Arg_p, 418 llvm::IRBuilder<> &Builder, 419 llvm::SmallVector<llvm::Value*, 8> &CalleeArgs, 420 std::function<void ()> Bump) { 421 422 if (bcinfo::MetadataExtractor::hasForEachSignatureCtxt(Signature)) { 423 CalleeArgs.push_back(Arg_p); 424 Bump(); 425 } 426 427 if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) { 428 CalleeArgs.push_back(X); 429 Bump(); 430 } 431 432 if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature) || 433 bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) { 434 435 llvm::Value *Current = Builder.CreateStructGEP(Arg_p, RsExpandKernelDriverInfoPfxFieldCurrent); 436 437 if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) { 438 llvm::Value *Y = Builder.CreateLoad( 439 Builder.CreateStructGEP(Current, RsLaunchDimensionsFieldY), "Y"); 440 CalleeArgs.push_back(Y); 441 Bump(); 442 } 443 444 if (bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) { 445 llvm::Value *Z = Builder.CreateLoad( 446 Builder.CreateStructGEP(Current, RsLaunchDimensionsFieldZ), "Z"); 447 CalleeArgs.push_back(Z); 448 Bump(); 449 } 450 } 451 } 452 453 /* Performs the actual optimization on a selected function. On success, the 454 * Module will contain a new function of the name "<NAME>.expand" that 455 * invokes <NAME>() in a loop with the appropriate parameters. 456 */ 457 bool ExpandFunction(llvm::Function *Function, uint32_t Signature) { 458 ALOGV("Expanding ForEach-able Function %s", 459 Function->getName().str().c_str()); 460 461 if (!Signature) { 462 Signature = getRootSignature(Function); 463 if (!Signature) { 464 // We couldn't determine how to expand this function based on its 465 // function signature. 466 return false; 467 } 468 } 469 470 llvm::DataLayout DL(Module); 471 472 llvm::Function *ExpandedFunction = 473 createEmptyExpandedFunction(Function->getName()); 474 475 /* 476 * Extract the expanded function's parameters. It is guaranteed by 477 * createEmptyExpandedFunction that there will be five parameters. 478 */ 479 480 bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS); 481 482 llvm::Function::arg_iterator ExpandedFunctionArgIter = 483 ExpandedFunction->arg_begin(); 484 485 llvm::Value *Arg_p = &*(ExpandedFunctionArgIter++); 486 llvm::Value *Arg_x1 = &*(ExpandedFunctionArgIter++); 487 llvm::Value *Arg_x2 = &*(ExpandedFunctionArgIter++); 488 llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter); 489 490 llvm::Value *InStep = nullptr; 491 llvm::Value *OutStep = nullptr; 492 493 // Construct the actual function body. 494 llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin()); 495 496 // Collect and construct the arguments for the kernel(). 497 // Note that we load any loop-invariant arguments before entering the Loop. 498 llvm::Function::arg_iterator FunctionArgIter = Function->arg_begin(); 499 500 llvm::Type *InTy = nullptr; 501 llvm::Value *InBasePtr = nullptr; 502 if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) { 503 llvm::Value *InsBasePtr = Builder.CreateStructGEP(Arg_p, RsExpandKernelDriverInfoPfxFieldInPtr, "inputs_base"); 504 505 llvm::Value *InStepsBase = Builder.CreateStructGEP(Arg_p, RsExpandKernelDriverInfoPfxFieldInStride, "insteps_base"); 506 507 llvm::Value *InStepAddr = Builder.CreateConstInBoundsGEP2_32(InStepsBase, 0, 0); 508 llvm::LoadInst *InStepArg = Builder.CreateLoad(InStepAddr, 509 "instep_addr"); 510 511 InTy = (FunctionArgIter++)->getType(); 512 InStep = getStepValue(&DL, InTy, InStepArg); 513 514 InStep->setName("instep"); 515 516 llvm::Value *InputAddr = Builder.CreateConstInBoundsGEP2_32(InsBasePtr, 0, 0); 517 InBasePtr = Builder.CreateLoad(InputAddr, "input_base"); 518 } 519 520 llvm::Type *OutTy = nullptr; 521 llvm::Value *OutBasePtr = nullptr; 522 if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) { 523 OutTy = (FunctionArgIter++)->getType(); 524 OutStep = getStepValue(&DL, OutTy, Arg_outstep); 525 OutStep->setName("outstep"); 526 OutBasePtr = Builder.CreateLoad( 527 Builder.CreateConstInBoundsGEP2_32( 528 Builder.CreateStructGEP(Arg_p, RsExpandKernelDriverInfoPfxFieldOutPtr), 0, 0)); 529 } 530 531 llvm::Value *UsrData = nullptr; 532 if (bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)) { 533 llvm::Type *UsrDataTy = (FunctionArgIter++)->getType(); 534 UsrData = Builder.CreatePointerCast(Builder.CreateLoad( 535 Builder.CreateStructGEP(Arg_p, RsExpandKernelDriverInfoPfxFieldUsr)), UsrDataTy); 536 UsrData->setName("UsrData"); 537 } 538 539 llvm::PHINode *IV; 540 createLoop(Builder, Arg_x1, Arg_x2, &IV); 541 542 llvm::SmallVector<llvm::Value*, 8> CalleeArgs; 543 ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs, 544 [&FunctionArgIter]() { FunctionArgIter++; }); 545 546 bccAssert(FunctionArgIter == Function->arg_end()); 547 548 // Populate the actual call to kernel(). 549 llvm::SmallVector<llvm::Value*, 8> RootArgs; 550 551 llvm::Value *InPtr = nullptr; 552 llvm::Value *OutPtr = nullptr; 553 554 // Calculate the current input and output pointers 555 // 556 // We always calculate the input/output pointers with a GEP operating on i8 557 // values and only cast at the very end to OutTy. This is because the step 558 // between two values is given in bytes. 559 // 560 // TODO: We could further optimize the output by using a GEP operation of 561 // type 'OutTy' in cases where the element type of the allocation allows. 562 if (OutBasePtr) { 563 llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1); 564 OutOffset = Builder.CreateMul(OutOffset, OutStep); 565 OutPtr = Builder.CreateGEP(OutBasePtr, OutOffset); 566 OutPtr = Builder.CreatePointerCast(OutPtr, OutTy); 567 } 568 569 if (InBasePtr) { 570 llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1); 571 InOffset = Builder.CreateMul(InOffset, InStep); 572 InPtr = Builder.CreateGEP(InBasePtr, InOffset); 573 InPtr = Builder.CreatePointerCast(InPtr, InTy); 574 } 575 576 if (InPtr) { 577 RootArgs.push_back(InPtr); 578 } 579 580 if (OutPtr) { 581 RootArgs.push_back(OutPtr); 582 } 583 584 if (UsrData) { 585 RootArgs.push_back(UsrData); 586 } 587 588 RootArgs.append(CalleeArgs.begin(), CalleeArgs.end()); 589 590 Builder.CreateCall(Function, RootArgs); 591 592 return true; 593 } 594 595 /* Expand a pass-by-value kernel. 596 */ 597 bool ExpandKernel(llvm::Function *Function, uint32_t Signature) { 598 bccAssert(bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)); 599 ALOGV("Expanding kernel Function %s", Function->getName().str().c_str()); 600 601 // TODO: Refactor this to share functionality with ExpandFunction. 602 llvm::DataLayout DL(Module); 603 604 llvm::Function *ExpandedFunction = 605 createEmptyExpandedFunction(Function->getName()); 606 607 /* 608 * Extract the expanded function's parameters. It is guaranteed by 609 * createEmptyExpandedFunction that there will be five parameters. 610 */ 611 612 bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS); 613 614 llvm::Function::arg_iterator ExpandedFunctionArgIter = 615 ExpandedFunction->arg_begin(); 616 617 llvm::Value *Arg_p = &*(ExpandedFunctionArgIter++); 618 llvm::Value *Arg_x1 = &*(ExpandedFunctionArgIter++); 619 llvm::Value *Arg_x2 = &*(ExpandedFunctionArgIter++); 620 llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter); 621 622 // Construct the actual function body. 623 llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin()); 624 625 // Create TBAA meta-data. 626 llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript, 627 *TBAAAllocation, *TBAAPointer; 628 llvm::MDBuilder MDHelper(*Context); 629 630 TBAARenderScriptDistinct = 631 MDHelper.createTBAARoot("RenderScript Distinct TBAA"); 632 TBAARenderScript = MDHelper.createTBAANode("RenderScript TBAA", 633 TBAARenderScriptDistinct); 634 TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation", 635 TBAARenderScript); 636 TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation, 637 TBAAAllocation, 0); 638 TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer", 639 TBAARenderScript); 640 TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0); 641 642 llvm::MDNode *AliasingDomain, *AliasingScope; 643 AliasingDomain = MDHelper.createAnonymousAliasScopeDomain("RS argument scope domain"); 644 AliasingScope = MDHelper.createAnonymousAliasScope(AliasingDomain, "RS argument scope"); 645 646 /* 647 * Collect and construct the arguments for the kernel(). 648 * 649 * Note that we load any loop-invariant arguments before entering the Loop. 650 */ 651 size_t NumInputs = Function->arg_size(); 652 653 // No usrData parameter on kernels. 654 bccAssert( 655 !bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)); 656 657 llvm::Function::arg_iterator ArgIter = Function->arg_begin(); 658 659 // Check the return type 660 llvm::Type *OutTy = nullptr; 661 llvm::Value *OutStep = nullptr; 662 llvm::LoadInst *OutBasePtr = nullptr; 663 llvm::Value *CastedOutBasePtr = nullptr; 664 665 bool PassOutByPointer = false; 666 667 if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) { 668 llvm::Type *OutBaseTy = Function->getReturnType(); 669 670 if (OutBaseTy->isVoidTy()) { 671 PassOutByPointer = true; 672 OutTy = ArgIter->getType(); 673 674 ArgIter++; 675 --NumInputs; 676 } else { 677 // We don't increment Args, since we are using the actual return type. 678 OutTy = OutBaseTy->getPointerTo(); 679 } 680 681 OutStep = getStepValue(&DL, OutTy, Arg_outstep); 682 OutStep->setName("outstep"); 683 OutBasePtr = Builder.CreateLoad( 684 Builder.CreateConstInBoundsGEP2_32( 685 Builder.CreateStructGEP(Arg_p, RsExpandKernelDriverInfoPfxFieldOutPtr), 0, 0)); 686 687 if (gEnableRsTbaa) { 688 OutBasePtr->setMetadata("tbaa", TBAAPointer); 689 } 690 691 OutBasePtr->setMetadata("alias.scope", AliasingScope); 692 693 CastedOutBasePtr = Builder.CreatePointerCast(OutBasePtr, OutTy, "casted_out"); 694 } 695 696 llvm::PHINode *IV; 697 createLoop(Builder, Arg_x1, Arg_x2, &IV); 698 699 llvm::SmallVector<llvm::Value*, 8> CalleeArgs; 700 ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs, 701 [&NumInputs]() { --NumInputs; }); 702 703 llvm::SmallVector<llvm::Type*, 8> InTypes; 704 llvm::SmallVector<llvm::Value*, 8> InSteps; 705 llvm::SmallVector<llvm::Value*, 8> InBasePtrs; 706 llvm::SmallVector<bool, 8> InIsStructPointer; 707 708 bccAssert(NumInputs <= RS_KERNEL_INPUT_LIMIT); 709 710 if (NumInputs > 0) { 711 llvm::Value *InsBasePtr = Builder.CreateStructGEP(Arg_p, RsExpandKernelDriverInfoPfxFieldInPtr, "inputs_base"); 712 713 llvm::Value *InStepsBase = Builder.CreateStructGEP(Arg_p, RsExpandKernelDriverInfoPfxFieldInStride, "insteps_base"); 714 715 for (size_t InputIndex = 0; InputIndex < NumInputs; 716 ++InputIndex, ArgIter++) { 717 718 llvm::Value *InStepAddr = Builder.CreateConstInBoundsGEP2_32(InStepsBase, 0, InputIndex); 719 llvm::LoadInst *InStepArg = Builder.CreateLoad(InStepAddr, 720 "instep_addr"); 721 722 llvm::Type *InType = ArgIter->getType(); 723 724 /* 725 * AArch64 calling dictate that structs of sufficient size get passed by 726 * pointer instead of passed by value. This, combined with the fact 727 * that we don't allow kernels to operate on pointer data means that if 728 * we see a kernel with a pointer parameter we know that it is struct 729 * input that has been promoted. As such we don't need to convert its 730 * type to a pointer. Later we will need to know to avoid a load, so we 731 * save this information in InIsStructPointer. 732 */ 733 if (!InType->isPointerTy()) { 734 InType = InType->getPointerTo(); 735 InIsStructPointer.push_back(false); 736 } else { 737 InIsStructPointer.push_back(true); 738 } 739 740 llvm::Value *InStep = getStepValue(&DL, InType, InStepArg); 741 742 InStep->setName("instep"); 743 744 llvm::Value *InputAddr = Builder.CreateConstInBoundsGEP2_32(InsBasePtr, 0, InputIndex); 745 llvm::LoadInst *InBasePtr = Builder.CreateLoad(InputAddr, 746 "input_base"); 747 llvm::Value *CastInBasePtr = Builder.CreatePointerCast(InBasePtr, 748 InType, "casted_in"); 749 if (gEnableRsTbaa) { 750 InBasePtr->setMetadata("tbaa", TBAAPointer); 751 } 752 753 InBasePtr->setMetadata("alias.scope", AliasingScope); 754 755 InTypes.push_back(InType); 756 InSteps.push_back(InStep); 757 InBasePtrs.push_back(CastInBasePtr); 758 } 759 } 760 761 // Populate the actual call to kernel(). 762 llvm::SmallVector<llvm::Value*, 8> RootArgs; 763 764 // Calculate the current input and output pointers 765 // 766 // 767 // We always calculate the input/output pointers with a GEP operating on i8 768 // values combined with a multiplication and only cast at the very end to 769 // OutTy. This is to account for dynamic stepping sizes when the value 770 // isn't apparent at compile time. In the (very common) case when we know 771 // the step size at compile time, due to haveing complete type information 772 // this multiplication will optmized out and produces code equivalent to a 773 // a GEP on a pointer of the correct type. 774 775 // Output 776 777 llvm::Value *OutPtr = nullptr; 778 if (CastedOutBasePtr) { 779 llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1); 780 781 OutPtr = Builder.CreateGEP(CastedOutBasePtr, OutOffset); 782 783 if (PassOutByPointer) { 784 RootArgs.push_back(OutPtr); 785 } 786 } 787 788 // Inputs 789 790 if (NumInputs > 0) { 791 llvm::Value *Offset = Builder.CreateSub(IV, Arg_x1); 792 793 for (size_t Index = 0; Index < NumInputs; ++Index) { 794 llvm::Value *InPtr = Builder.CreateGEP(InBasePtrs[Index], Offset); 795 llvm::Value *Input; 796 797 if (InIsStructPointer[Index]) { 798 Input = InPtr; 799 800 } else { 801 llvm::LoadInst *InputLoad = Builder.CreateLoad(InPtr, "input"); 802 803 if (gEnableRsTbaa) { 804 InputLoad->setMetadata("tbaa", TBAAAllocation); 805 } 806 807 InputLoad->setMetadata("alias.scope", AliasingScope); 808 809 Input = InputLoad; 810 } 811 812 RootArgs.push_back(Input); 813 } 814 } 815 816 RootArgs.append(CalleeArgs.begin(), CalleeArgs.end()); 817 818 llvm::Value *RetVal = Builder.CreateCall(Function, RootArgs); 819 820 if (OutPtr && !PassOutByPointer) { 821 llvm::StoreInst *Store = Builder.CreateStore(RetVal, OutPtr); 822 if (gEnableRsTbaa) { 823 Store->setMetadata("tbaa", TBAAAllocation); 824 } 825 Store->setMetadata("alias.scope", AliasingScope); 826 } 827 828 return true; 829 } 830 831 /// @brief Checks if pointers to allocation internals are exposed 832 /// 833 /// This function verifies if through the parameters passed to the kernel 834 /// or through calls to the runtime library the script gains access to 835 /// pointers pointing to data within a RenderScript Allocation. 836 /// If we know we control all loads from and stores to data within 837 /// RenderScript allocations and if we know the run-time internal accesses 838 /// are all annotated with RenderScript TBAA metadata, only then we 839 /// can safely use TBAA to distinguish between generic and from-allocation 840 /// pointers. 841 bool allocPointersExposed(llvm::Module &Module) { 842 // Old style kernel function can expose pointers to elements within 843 // allocations. 844 // TODO: Extend analysis to allow simple cases of old-style kernels. 845 for (size_t i = 0; i < mExportForEachCount; ++i) { 846 const char *Name = mExportForEachNameList[i]; 847 uint32_t Signature = mExportForEachSignatureList[i]; 848 if (Module.getFunction(Name) && 849 !bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)) { 850 return true; 851 } 852 } 853 854 // Check for library functions that expose a pointer to an Allocation or 855 // that are not yet annotated with RenderScript-specific tbaa information. 856 static std::vector<std::string> Funcs; 857 858 // rsGetElementAt(...) 859 Funcs.push_back("_Z14rsGetElementAt13rs_allocationj"); 860 Funcs.push_back("_Z14rsGetElementAt13rs_allocationjj"); 861 Funcs.push_back("_Z14rsGetElementAt13rs_allocationjjj"); 862 // rsSetElementAt() 863 Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvj"); 864 Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjj"); 865 Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjjj"); 866 // rsGetElementAtYuv_uchar_Y() 867 Funcs.push_back("_Z25rsGetElementAtYuv_uchar_Y13rs_allocationjj"); 868 // rsGetElementAtYuv_uchar_U() 869 Funcs.push_back("_Z25rsGetElementAtYuv_uchar_U13rs_allocationjj"); 870 // rsGetElementAtYuv_uchar_V() 871 Funcs.push_back("_Z25rsGetElementAtYuv_uchar_V13rs_allocationjj"); 872 873 for (std::vector<std::string>::iterator FI = Funcs.begin(), 874 FE = Funcs.end(); 875 FI != FE; ++FI) { 876 llvm::Function *Function = Module.getFunction(*FI); 877 878 if (!Function) { 879 ALOGE("Missing run-time function '%s'", FI->c_str()); 880 return true; 881 } 882 883 if (Function->getNumUses() > 0) { 884 return true; 885 } 886 } 887 888 return false; 889 } 890 891 /// @brief Connect RenderScript TBAA metadata to C/C++ metadata 892 /// 893 /// The TBAA metadata used to annotate loads/stores from RenderScript 894 /// Allocations is generated in a separate TBAA tree with a 895 /// "RenderScript Distinct TBAA" root node. LLVM does assume may-alias for 896 /// all nodes in unrelated alias analysis trees. This function makes the 897 /// "RenderScript TBAA" node (which is parented by the Distinct TBAA root), 898 /// a subtree of the normal C/C++ TBAA tree aside of normal C/C++ types. With 899 /// the connected trees every access to an Allocation is resolved to 900 /// must-alias if compared to a normal C/C++ access. 901 void connectRenderScriptTBAAMetadata(llvm::Module &Module) { 902 llvm::MDBuilder MDHelper(*Context); 903 llvm::MDNode *TBAARenderScriptDistinct = 904 MDHelper.createTBAARoot("RenderScript Distinct TBAA"); 905 llvm::MDNode *TBAARenderScript = MDHelper.createTBAANode( 906 "RenderScript TBAA", TBAARenderScriptDistinct); 907 llvm::MDNode *TBAARoot = MDHelper.createTBAARoot("Simple C/C++ TBAA"); 908 TBAARenderScript->replaceOperandWith(1, TBAARoot); 909 } 910 911 virtual bool runOnModule(llvm::Module &Module) { 912 bool Changed = false; 913 this->Module = &Module; 914 this->Context = &Module.getContext(); 915 916 this->buildTypes(); 917 918 bcinfo::MetadataExtractor me(&Module); 919 if (!me.extract()) { 920 ALOGE("Could not extract metadata from module!"); 921 return false; 922 } 923 mExportForEachCount = me.getExportForEachSignatureCount(); 924 mExportForEachNameList = me.getExportForEachNameList(); 925 mExportForEachSignatureList = me.getExportForEachSignatureList(); 926 927 bool AllocsExposed = allocPointersExposed(Module); 928 929 for (size_t i = 0; i < mExportForEachCount; ++i) { 930 const char *name = mExportForEachNameList[i]; 931 uint32_t signature = mExportForEachSignatureList[i]; 932 llvm::Function *kernel = Module.getFunction(name); 933 if (kernel) { 934 if (bcinfo::MetadataExtractor::hasForEachSignatureKernel(signature)) { 935 Changed |= ExpandKernel(kernel, signature); 936 kernel->setLinkage(llvm::GlobalValue::InternalLinkage); 937 } else if (kernel->getReturnType()->isVoidTy()) { 938 Changed |= ExpandFunction(kernel, signature); 939 kernel->setLinkage(llvm::GlobalValue::InternalLinkage); 940 } else { 941 // There are some graphics root functions that are not 942 // expanded, but that will be called directly. For those 943 // functions, we can not set the linkage to internal. 944 } 945 } 946 } 947 948 if (gEnableRsTbaa && !AllocsExposed) { 949 connectRenderScriptTBAAMetadata(Module); 950 } 951 952 return Changed; 953 } 954 955 virtual const char *getPassName() const { 956 return "ForEach-able Function Expansion"; 957 } 958 959}; // end RSForEachExpandPass 960 961} // end anonymous namespace 962 963char RSForEachExpandPass::ID = 0; 964static llvm::RegisterPass<RSForEachExpandPass> X("foreachexp", "ForEach Expand Pass"); 965 966namespace bcc { 967 968llvm::ModulePass * 969createRSForEachExpandPass(bool pEnableStepOpt){ 970 return new RSForEachExpandPass(pEnableStepOpt); 971} 972 973} // end namespace bcc 974