llvm_util.h revision 4198e27be8115585ad6b5b141383fb7dc7856c24
11e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 21e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 31e67c90e2caceeff82d09793d1ef5fa0300d219bPeter HawkinsLicensed under the Apache License, Version 2.0 (the "License"); 41e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsyou may not use this file except in compliance with the License. 51e67c90e2caceeff82d09793d1ef5fa0300d219bPeter HawkinsYou may obtain a copy of the License at 61e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 71e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins http://www.apache.org/licenses/LICENSE-2.0 81e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 91e67c90e2caceeff82d09793d1ef5fa0300d219bPeter HawkinsUnless required by applicable law or agreed to in writing, software 101e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsdistributed under the License is distributed on an "AS IS" BASIS, 111e67c90e2caceeff82d09793d1ef5fa0300d219bPeter HawkinsWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 121e67c90e2caceeff82d09793d1ef5fa0300d219bPeter HawkinsSee the License for the specific language governing permissions and 131e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinslimitations under the License. 141e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins==============================================================================*/ 151e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 161e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_LLVM_UTIL_H_ 171e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#define TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_LLVM_UTIL_H_ 181e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 191e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include <stdint.h> 201e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include <string> 211e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include <vector> 221e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 2334cbf161d7b1191ad5c1b3bc02fc52d338e8b175Jiri Simsa#include "llvm/ADT/StringRef.h" 2434cbf161d7b1191ad5c1b3bc02fc52d338e8b175Jiri Simsa#include "llvm/IR/BasicBlock.h" 2534cbf161d7b1191ad5c1b3bc02fc52d338e8b175Jiri Simsa#include "llvm/IR/IRBuilder.h" 2634cbf161d7b1191ad5c1b3bc02fc52d338e8b175Jiri Simsa#include "llvm/IR/Instructions.h" 2734cbf161d7b1191ad5c1b3bc02fc52d338e8b175Jiri Simsa#include "llvm/IR/Module.h" 2834cbf161d7b1191ad5c1b3bc02fc52d338e8b175Jiri Simsa#include "llvm/IR/Value.h" 2934cbf161d7b1191ad5c1b3bc02fc52d338e8b175Jiri Simsa#include "llvm/Support/raw_ostream.h" 3002ac85399d4fb35d5055ecf426632b9446a70041A. Unique TensorFlower#include "tensorflow/compiler/xla/literal_util.h" 3114664766b12a205296342ed20cc17bc8fd3f07d6Justin Lebar#include "tensorflow/compiler/xla/service/hlo_instruction.h" 321e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/compiler/xla/types.h" 331e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/compiler/xla/xla_data.pb.h" 341e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/core/lib/core/stringpiece.h" 351e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/core/lib/gtl/array_slice.h" 361e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/core/platform/types.h" 371e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 381e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsnamespace llvm { 391e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsclass FastMathFlags; 401e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsclass TargetOptions; 411e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins}; 421e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 431e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsnamespace xla { 441e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsnamespace llvm_ir { 451e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 461e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// Convert a std::string (used by LLVM's interfaces) to string. 471e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsstring AsString(const std::string& str); 481e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 491e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// Convert a tensorflow::StringPiece to a llvm::StringRef. Note: both 501e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// tensorflow::StringPiece and llvm::StringRef are non-owning pointers into a 511e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// string in memory. This method is used to feed strings to LLVM 521e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// & Clang APIs that expect llvm::StringRef. 531e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsllvm::StringRef AsStringRef(tensorflow::StringPiece str); 541e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 551e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinstemplate <typename T> 561e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsllvm::ArrayRef<T> AsArrayRef(const std::vector<T>& vec) { 571e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins return llvm::ArrayRef<T>(vec.data(), vec.size()); 581e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins} 591e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 601e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinstemplate <typename T> 611e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsllvm::ArrayRef<T> AsArrayRef(const tensorflow::gtl::ArraySlice<T>& slice) { 621e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins return llvm::ArrayRef<T>(slice.data(), slice.size()); 631e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins} 641e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 651e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// Dump the given LLVM entity to a string. This works for Types and Values. 661e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinstemplate <typename T> 671e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsstring DumpToString(const T& entity) { 681e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins std::string buffer_string; 691e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins llvm::raw_string_ostream ostream(buffer_string); 701e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins entity.print(ostream); 711e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins ostream.flush(); 721e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins return AsString(buffer_string); 731e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins} 741e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 751e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// Dump the given LLVM module to a string. This requires a function distinct 761e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// from DumpToString because the signatures of the print() methods for Values 771e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// and Modules are slightly different. 781e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsstring DumpModuleToString(const llvm::Module& module); 791e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 809249768389a22b45ee6a10930adffcc10c7f93ceJustin Lebar// Constructs a human-friendly name from the given inputs. The result is 819249768389a22b45ee6a10930adffcc10c7f93ceJustin Lebar// suitable for use as an llvm::Value's name. 8214664766b12a205296342ed20cc17bc8fd3f07d6Justin Lebar// 839249768389a22b45ee6a10930adffcc10c7f93ceJustin Lebar// This is equivalent to 849249768389a22b45ee6a10930adffcc10c7f93ceJustin Lebar// 859249768389a22b45ee6a10930adffcc10c7f93ceJustin Lebar// - changing the HloInstruction* to its name() (if we called that overload), 869249768389a22b45ee6a10930adffcc10c7f93ceJustin Lebar// - joining all of the nonempty inputs by '.', and then 879249768389a22b45ee6a10930adffcc10c7f93ceJustin Lebar// - removing all '%'s. 889249768389a22b45ee6a10930adffcc10c7f93ceJustin Lebar// 899249768389a22b45ee6a10930adffcc10c7f93ceJustin Lebarstring IrName(string a); 909249768389a22b45ee6a10930adffcc10c7f93ceJustin Lebarstring IrName(tensorflow::StringPiece a, tensorflow::StringPiece b); 919249768389a22b45ee6a10930adffcc10c7f93ceJustin Lebarstring IrName(const HloInstruction* a, tensorflow::StringPiece b = ""); 9214664766b12a205296342ed20cc17bc8fd3f07d6Justin Lebar 9314664766b12a205296342ed20cc17bc8fd3f07d6Justin Lebar// Removes special characters from a function name. 9414664766b12a205296342ed20cc17bc8fd3f07d6Justin Lebar// 9514664766b12a205296342ed20cc17bc8fd3f07d6Justin Lebar// Note that this can cause different inputs to map to the same output, so after 9614664766b12a205296342ed20cc17bc8fd3f07d6Justin Lebar// sanitizing a function name, you must run it through a uniquer. 9714664766b12a205296342ed20cc17bc8fd3f07d6Justin Lebarstring SanitizeFunctionName(string function_name); 981e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 991e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// Emits a call to the specified intrinsic with the given operands. Overloaded 1001e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// intrinsics (for example, "minnum") must include a type in overloaded_types 1011e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// for each overloaded type. Typically, overloaded intrinsics have only a single 1021e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// overloaded type. 1031e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsllvm::Value* EmitCallToIntrinsic( 1041e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins llvm::Intrinsic::ID intrinsic_id, 1051e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins tensorflow::gtl::ArraySlice<llvm::Value*> operands, 1061e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins tensorflow::gtl::ArraySlice<llvm::Type*> overloaded_types, 1071e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins llvm::IRBuilder<>* ir_builder); 1081e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 109646ccee7113be0695fa3b7faa23586c798212566A. Unique TensorFlower// Emit float max. Emit maxnum intrinsic is fast math is disabled, or 110646ccee7113be0695fa3b7faa23586c798212566A. Unique TensorFlower// fcmp+select otherwise 111646ccee7113be0695fa3b7faa23586c798212566A. Unique TensorFlowerllvm::Value* EmitFloatMax(llvm::Value* lhs_value, llvm::Value* rhs_value, 112646ccee7113be0695fa3b7faa23586c798212566A. Unique TensorFlower llvm::IRBuilder<>* ir_builder); 113646ccee7113be0695fa3b7faa23586c798212566A. Unique TensorFlower 114646ccee7113be0695fa3b7faa23586c798212566A. Unique TensorFlower// Emit float min. Emit minnum intrinsic is fast math is disabled, or 115646ccee7113be0695fa3b7faa23586c798212566A. Unique TensorFlower// fcmp+select otherwise 116646ccee7113be0695fa3b7faa23586c798212566A. Unique TensorFlowerllvm::Value* EmitFloatMin(llvm::Value* lhs_value, llvm::Value* rhs_value, 117646ccee7113be0695fa3b7faa23586c798212566A. Unique TensorFlower llvm::IRBuilder<>* ir_builder); 118646ccee7113be0695fa3b7faa23586c798212566A. Unique TensorFlower 1191e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// Convenience methods for emitting a GEP instruction that indexes into a buffer 1201e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// (1-dimensional array), equivalent to array[index]. The type is automatically 1211e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// determined from the element type of the array. The int64 index overload 1221e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// wraps the index in a i64 llvm::Value. 1231e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsllvm::Value* EmitBufferIndexingGEP(llvm::Value* array, llvm::Value* index, 1241e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins llvm::IRBuilder<>* ir_builder); 1251e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsllvm::Value* EmitBufferIndexingGEP(llvm::Value* array, int64 index, 1261e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins llvm::IRBuilder<>* ir_builder); 1271e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1281e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// Returns the LLVM type which represents the given XLA primitive type. 1291e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsllvm::Type* PrimitiveTypeToIrType(PrimitiveType element_type, 1304198e27be8115585ad6b5b141383fb7dc7856c24A. Unique TensorFlower llvm::Module* module); 1311e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1321e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// Returns the LLVM type which represents the given XLA shape. For example, 1331e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// if "shape" is [5 x [10 x f32]], the function returns [5 x [10 x float]]. 1344198e27be8115585ad6b5b141383fb7dc7856c24A. Unique TensorFlowerllvm::Type* ShapeToIrType(const Shape& shape, llvm::Module* module); 1351e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 136ea125c27974135fbad6bcb75b720499c68d52357A. Unique TensorFlower// Returns a value that represents a pointer to a global string constant that 137ea125c27974135fbad6bcb75b720499c68d52357A. Unique TensorFlower// encodes the shape as a serialized protobuf. 138ea125c27974135fbad6bcb75b720499c68d52357A. Unique TensorFlowerStatusOr<llvm::Value*> EncodeSelfDescribingShapeConstant( 139ea125c27974135fbad6bcb75b720499c68d52357A. Unique TensorFlower const Shape& shape, int32* shape_size, llvm::IRBuilder<>* ir_builder); 140ea125c27974135fbad6bcb75b720499c68d52357A. Unique TensorFlower 141ea125c27974135fbad6bcb75b720499c68d52357A. Unique TensorFlower// Inverses the encoding of a Shape protobuf into an LLVM global variable. 142ea125c27974135fbad6bcb75b720499c68d52357A. Unique TensorFlower// 143ea125c27974135fbad6bcb75b720499c68d52357A. Unique TensorFlower// This is intended to be called from the runtime to decode the llvm::Constants 144ea125c27974135fbad6bcb75b720499c68d52357A. Unique TensorFlower// that are created via ConvertShapeToSelfDescribingConstant and subsequently 145ea125c27974135fbad6bcb75b720499c68d52357A. Unique TensorFlower// embedded into the program. 146ea125c27974135fbad6bcb75b720499c68d52357A. Unique TensorFlowerStatusOr<Shape> DecodeSelfDescribingShapeConstant(const void* shape_ptr, 147ea125c27974135fbad6bcb75b720499c68d52357A. Unique TensorFlower int32 size_bytes); 148ea125c27974135fbad6bcb75b720499c68d52357A. Unique TensorFlower 1491e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// Converts a given literal to an IR Constant. Literals have known constant 1501e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// values at IR emission time. 1511e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsllvm::Constant* ConvertLiteralToIrConstant(const Literal& literal, 1524198e27be8115585ad6b5b141383fb7dc7856c24A. Unique TensorFlower llvm::Module* module); 1531e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1541e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// Inserts an allocate of the requested type at the entry point of the 1551e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// function that the builder is currently building. The insert point 1561e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// of the builder is set to the same place after calling this function 1571e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// as before. 1581e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// 1591e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// This can be useful to avoid e.g. executing an alloca every time 1601e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// through a loop. 1611e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsllvm::AllocaInst* EmitAllocaAtFunctionEntry(llvm::Type* type, 1621e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins tensorflow::StringPiece name, 1631e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins llvm::IRBuilder<>* ir_builder, 1641e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins int alignment = 0); 1651e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1661e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// As EmitAllocaAtFunctionEntry, but allocates element_count entries 167646ccee7113be0695fa3b7faa23586c798212566A. Unique TensorFlower// instead of a single element. 1681e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsllvm::AllocaInst* EmitAllocaAtFunctionEntryWithCount( 1691e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins llvm::Type* type, llvm::Value* element_count, tensorflow::StringPiece name, 1701e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins llvm::IRBuilder<>* ir_builder, int alignment = 0); 1711e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1721b5235fd897f7ea5cffc715300f67b4dc852fa27Jonathan Hseu// Creates a basic block with the same context and function as for the 1731e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// builder. Inserts at the end of the function if insert_before is 1741e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// null. 1751e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsllvm::BasicBlock* CreateBasicBlock(llvm::BasicBlock* insert_before, 1761e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins tensorflow::StringPiece name, 1771e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins llvm::IRBuilder<>* ir_builder); 1781e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1791e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// Struct with data on a conditional branch in a diamond shape created 1801e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// via EmitIfThenElse. 1811e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsstruct LlvmIfData { 1821e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // The block that has the conditional branch. 1831e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins llvm::BasicBlock* if_block; 1841e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1851e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // The block that is executed if the condition is true. 1861e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins llvm::BasicBlock* true_block; 1871e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1881e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // The block that is executed if the condition is false. 1891e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins llvm::BasicBlock* false_block; 1901e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1911e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // The block that follows after both the true_block and the 1921e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // false_block. 1931e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins llvm::BasicBlock* after_block; 1941e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins}; 1951e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1961e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// Inserts a diamond-shaped if-then-else construct at the current 1971e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// insertion point of the builder. This involves splitting the current 1981e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// block into two blocks, at the insertion point, and introducing a 1991e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// true-block and a false-block that connect the two split pieces. The 2001e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// true-block is executed if the condition parameter evaluates to true 2011e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// and otherwise the false-block is executed. If `emit_else` is false, 2021e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// it jumps to the after-block rather than the false-block if the 2031e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// condition is false, and the returned `false_block` is null. 2041e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// 2051e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// Currently the insertion point of the builder must be a well-formed 2061e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// block with a terminator. If you need to use this for a 2071e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// non-terminated block, just make the function able to do that too. 2081e67c90e2caceeff82d09793d1ef5fa0300d219bPeter HawkinsLlvmIfData EmitIfThenElse(llvm::Value* condition, tensorflow::StringPiece name, 2091e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins llvm::IRBuilder<>* ir_builder, bool emit_else = true); 2101e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 2111e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// Emits a compare operation between "lhs" and "rhs" with the given predicate, 2121e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// and then converts the result to i8 so that it is addressable. 2131e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsllvm::Value* EmitComparison(llvm::CmpInst::Predicate predicate, 2141e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins llvm::Value* lhs, llvm::Value* rhs, 2151e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins llvm::IRBuilder<>* ir_builder); 2161e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 2171e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// Emits a call that logs the given value with the given tag as a prefix. 2181e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// The provided tag and value are passed to a runtime logging call that is 2191e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// embedded in this translation unit when the emitted code is executed. 2201e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// 2211e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// This can be very useful for debugging generated programs in short order when 2221e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// developing new generated routines. 2231e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// 2241e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// Precondition: value must be an int64. 2251e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// Precondition: tag must be a stable pointer for the lifetime of the generated 2261e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// program (the constant pointer is burned in to the program). 2271e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsvoid EmitLogging(const char* tag, llvm::Value* value, 2281e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins llvm::IRBuilder<>* ir_builder); 2291e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 2301e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// Adds alignment metadata to a load instruction using the given alignment. 2311e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// The alignment refers to the result of the load, not the load itself. 2321e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsvoid SetAlignmentMetadataForLoad(llvm::LoadInst* load, uint64_t alignment); 2331e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 2341e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// Adds dereferenceable metadata to a load instruction using the given 2351e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// the number of dereferenceable bytes. 2361e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// Dereferenceable refers to the result of the load, not the load itself. 2371e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsvoid SetDereferenceableMetadataForLoad(llvm::LoadInst* load, 2381e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins uint64_t dereferenceable_bytes); 2391e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 2401e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// Tells LLVM `inst >= lower && inst < upper`. Returns `inst` for convenience. 2411e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsllvm::Instruction* AddRangeMetadata(int64 lower, int64 upper, 2421e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins llvm::Instruction* inst); 2431e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 2441e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsvoid SetToFirstInsertPoint(llvm::BasicBlock* blk, llvm::IRBuilder<>* builder); 2451e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 2461e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// Create a bitwise rotation of `rotand` by `rotor`. 2471e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsllvm::Value* CreateRor(llvm::Value* rotand, llvm::Value* rotor, 2481e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins llvm::IRBuilder<>* builder); 2491e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 2501e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// Returns the number of bytes within the shape. 2511e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsint64 ByteSizeOf(const Shape& shape, const llvm::DataLayout& data_layout); 2521e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 253d45505fe0c7ab9a10f16682f54d0eb54c4776cd1Justin Lebar// Gets an llvm::FastMathFlags that reflects the settings in the given 2547754ec45dc38e0f9cd047948045646418caad305Justin Lebar// module config. 255abbb19bb9445ffee96ff2946083a3b5c8dadc0d0Eli Benderskyllvm::FastMathFlags GetFastMathFlags(bool fast_math_enabled); 2561e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 257d45505fe0c7ab9a10f16682f54d0eb54c4776cd1Justin Lebar// Sets values in the given TargetOptions struct according to the given 258d45505fe0c7ab9a10f16682f54d0eb54c4776cd1Justin Lebar// compilation options. 259abbb19bb9445ffee96ff2946083a3b5c8dadc0d0Eli Benderskyvoid SetTargetOptions(bool fast_math_enabled, 260d45505fe0c7ab9a10f16682f54d0eb54c4776cd1Justin Lebar llvm::TargetOptions* target_options); 2611e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 262efc63f6248a4a85c885e4a4facabd7242ee3a94cA. Unique TensorFlower// Computes a conservative union of the metadata in "a" and "b". For 263efc63f6248a4a85c885e4a4facabd7242ee3a94cA. Unique TensorFlower// aliasing-related metadata, this means the result can be applied to 264efc63f6248a4a85c885e4a4facabd7242ee3a94cA. Unique TensorFlower// instructions whose aliasing relationship can be described either by "a" *or* 265efc63f6248a4a85c885e4a4facabd7242ee3a94cA. Unique TensorFlower// by "b". 266efc63f6248a4a85c885e4a4facabd7242ee3a94cA. Unique TensorFlowerstd::map<int, llvm::MDNode*> MergeMetadata( 267efc63f6248a4a85c885e4a4facabd7242ee3a94cA. Unique TensorFlower llvm::LLVMContext* context, const std::map<int, llvm::MDNode*>& a, 268efc63f6248a4a85c885e4a4facabd7242ee3a94cA. Unique TensorFlower const std::map<int, llvm::MDNode*>& b); 269efc63f6248a4a85c885e4a4facabd7242ee3a94cA. Unique TensorFlower 270b6d5ff49ecfb5925597c3d5dcf40dd289125e8c2Sanjoy Das// Dumps out `llvm_module` to a file in the directory named `directory_name`, 271b6d5ff49ecfb5925597c3d5dcf40dd289125e8c2Sanjoy Das// creating the directory if necessary. A sanitized version of 272b6d5ff49ecfb5925597c3d5dcf40dd289125e8c2Sanjoy Das// `hlo_module_name` is incorporated into the file name. If `optimized` is true 273b6d5ff49ecfb5925597c3d5dcf40dd289125e8c2Sanjoy Das// then a suffix of "-with-opt.ll" is used, else a suffix of "-no-opt.ll" is 274b6d5ff49ecfb5925597c3d5dcf40dd289125e8c2Sanjoy Das// used. 275b6d5ff49ecfb5925597c3d5dcf40dd289125e8c2Sanjoy DasStatus DumpIRToDirectory(const string& directory_name, 276b6d5ff49ecfb5925597c3d5dcf40dd289125e8c2Sanjoy Das const string& hlo_module_name, 277b6d5ff49ecfb5925597c3d5dcf40dd289125e8c2Sanjoy Das const llvm::Module& llvm_module, bool optimized); 278b6d5ff49ecfb5925597c3d5dcf40dd289125e8c2Sanjoy Das 2791e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins} // namespace llvm_ir 2801e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins} // namespace xla 2811e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 2821e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#endif // TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_LLVM_UTIL_H_ 283