1/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3Licensed under the Apache License, Version 2.0 (the "License"); 4you may not use this file except in compliance with the License. 5You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9Unless required by applicable law or agreed to in writing, software 10distributed under the License is distributed on an "AS IS" BASIS, 11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12See the License for the specific language governing permissions and 13limitations under the License. 14==============================================================================*/ 15 16#include "tensorflow/compiler/xla/service/llvm_ir/alias_analysis.h" 17 18#include <unordered_set> 19 20#include "llvm/IR/MDBuilder.h" 21#include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" 22#include "tensorflow/compiler/xla/service/logical_buffer.h" 23#include "tensorflow/compiler/xla/types.h" 24 25namespace xla { 26namespace llvm_ir { 27 28// Sentry allocation used to represent parameters of the entry computation in 29// alias_scope_metadata_ and noalias_metadata_. 30static const BufferAllocation* kParameterAllocation = new BufferAllocation( 31 /*index=*/-1, /*size=*/0, /*is_thread_local=*/false, /*is_reusable=*/false, 32 LogicalBuffer::Color(0)); 33 34void AliasAnalysis::AddAliasingInformationToIrArray(const HloInstruction& hlo, 35 llvm_ir::IrArray* array) { 36 BufferAllocation::Slice buffer_slice; 37 if (hlo.opcode() == HloOpcode::kParameter) { 38 // Parameters may alias with each other but may not alias with our temporary 39 // buffers. 40 buffer_slice = BufferAllocation::Slice(kParameterAllocation, 0, 0); 41 } else { 42 const std::set<BufferAllocation::Slice> slices = 43 assignment_.GetAllSlices(&hlo, /*index=*/{}); 44 if (slices.empty() || slices.size() > 1) { 45 // Skip HLOs which don't have a buffer assigned or for which the 46 // buffer can't be determined statically. We cannot determine their 47 // aliasing properties in these cases. 48 return; 49 } 50 buffer_slice = *slices.begin(); 51 } 52 53 if (module_.config().debug_options().xla_llvm_enable_alias_scope_metadata()) { 54 llvm::MDNode*& alias_scope_md = alias_scope_metadata_[buffer_slice]; 55 if (alias_scope_md == nullptr) { 56 alias_scope_md = 57 GetAliasScopeMetadataForBuffer(buffer_slice, GetAliasDomain()); 58 } 59 if (alias_scope_md != nullptr) { 60 array->AddAliasScopeMetadata(alias_scope_md); 61 } 62 } 63 64 if (module_.config().debug_options().xla_llvm_enable_noalias_metadata()) { 65 llvm::MDNode*& noalias_md = noalias_metadata_[buffer_slice]; 66 if (noalias_md == nullptr) { 67 noalias_md = GetNoaliasMetadataForBuffer(buffer_slice, GetAliasDomain(), 68 assignment_, hlo); 69 } 70 if (noalias_md != nullptr) { 71 array->AddNoaliasMetadata(noalias_md); 72 } 73 } 74 75 if (module_.config() 76 .debug_options() 77 .xla_llvm_enable_invariant_load_metadata()) { 78 // Parameters of the entry computation are never stored to, loading from a 79 // parameter pointer should always return the same result within a loop. 80 if (hlo.opcode() == HloOpcode::kParameter) { 81 const std::vector<HloInstruction*>& parameter_instructions = 82 module_.entry_computation()->parameter_instructions(); 83 if (std::find(parameter_instructions.begin(), 84 parameter_instructions.end(), 85 &hlo) != parameter_instructions.end()) { 86 array->MarkInvariantOverWholeProgram(context_); 87 } 88 } 89 } 90} 91 92llvm::MDNode* AliasAnalysis::GetAliasDomain() { 93 llvm::MDBuilder metadata_builder(*context_); 94 if (alias_domain_ == nullptr) { 95 // We use createAliasScopeDomain rather than createAnonymousAliasScopeDomain 96 // so that when functions get inlined, we continue using the one domain, 97 // rather than duplicating it (and thus having two AA domains in one 98 // function). 99 // 100 // A side-effect of this is that if you ever compile two HLO modules in the 101 // same LLVM module, they'll have the same alias scope domain. This isn't a 102 // problem because the two HLO modules will never interact with one another. 103 alias_domain_ = 104 metadata_builder.createAliasScopeDomain("XLA global AA domain"); 105 } 106 return alias_domain_; 107} 108 109llvm::MDNode* AliasAnalysis::GetAliasScopeMetadataForBuffer( 110 const BufferAllocation::Slice& buffer_slice, llvm::MDNode* domain) { 111 // While we could synthesize an alias.scope, doing so is not more profitable 112 // than LLVM's default behavior. 113 if (buffer_slice.allocation() == kParameterAllocation) { 114 return nullptr; 115 } 116 117 llvm::MDBuilder metadata_builder(domain->getContext()); 118 llvm::MDNode* scope = metadata_builder.createAliasScope( 119 AsStringRef("buffer: " + buffer_slice.ToString()), domain); 120 llvm::MDNode* scope_list = llvm::MDNode::get(domain->getContext(), scope); 121 return scope_list; 122} 123 124llvm::MDNode* AliasAnalysis::GetNoaliasMetadataForBuffer( 125 const BufferAllocation::Slice& buffer_slice, llvm::MDNode* domain, 126 const BufferAssignment& assignment, const HloInstruction& hlo) { 127 // We want to construct a list of buffers which: 128 // 129 // 1. Do not alias the given buffer. 130 // 2. Will plausibly be used in the vicinity of the given buffer. 131 // 132 // Making the noalias set overly large will result in either a massive 133 // slowdown in LLVM or LLVM will just ignore the noalias set. 134 // 135 // A plausible list of instructions are: 136 // 1. Users of the given hlo. 137 // 2. Operands of users of the given hlo. 138 // 3. Operands of the given hlo. 139 // 140 // This set can be increased as we need. For now only consider top-level 141 // buffers (index = {}) not buffers nested within the instruction's 142 // operands/output which are not typically touched. 143 std::vector<const LogicalBuffer*> worklist; 144 auto add_buffers_to_worklist = 145 [&worklist, &assignment](const HloInstruction* instruction) { 146 for (const LogicalBuffer* buffer : 147 assignment.GetSourceBuffers(instruction, /*index=*/{})) { 148 worklist.push_back(buffer); 149 } 150 }; 151 152 for (HloInstruction* user : hlo.users()) { 153 add_buffers_to_worklist(user); 154 for (HloInstruction* operand : user->operands()) { 155 add_buffers_to_worklist(operand); 156 } 157 } 158 159 add_buffers_to_worklist(&hlo); 160 for (HloInstruction* operand : hlo.operands()) { 161 add_buffers_to_worklist(operand); 162 } 163 164 tensorflow::gtl::FlatSet<BufferAllocation::Slice, 165 BufferAllocation::Slice::Hasher> 166 buffers; 167 for (const LogicalBuffer* buffer : worklist) { 168 // Skip buffers which cannot be added to the noalias set. 169 if (!assignment.HasAllocation(*buffer) || 170 buffer->instruction()->opcode() == HloOpcode::kParameter) { 171 continue; 172 } 173 const BufferAllocation::Slice noalias_slice = 174 assignment.GetAssignedAllocation(*buffer).GetSlice(*buffer); 175 // Our buffer must not overlap with the noalias slice. 176 if (!buffer_slice.OverlapsWith(noalias_slice)) { 177 buffers.insert(noalias_slice); 178 // Some instructions have too many operands, causing the noalias set to be 179 // too large. To reduce compilation time (b/31901575), truncate noalias 180 // sets to at most 500 elements. 181 // 182 // Future work: improvements to LLVM's scoped AA that avoid creating a 183 // MDNode set for every alias query can help to reduce the compilation 184 // time as well. 185 constexpr int kMaxNoAliasSetSize = 500; 186 if (buffers.size() >= kMaxNoAliasSetSize) { 187 break; 188 } 189 } 190 } 191 192 // Don't bother constructing a noalias metadata node if it would be empty. 193 if (buffers.empty()) { 194 return nullptr; 195 } 196 197 llvm::MDBuilder metadata_builder(domain->getContext()); 198 std::vector<llvm::Metadata*> scopes; 199 for (const BufferAllocation::Slice noalias_slice : buffers) { 200 llvm::MDNode* scope = metadata_builder.createAliasScope( 201 AsStringRef("buffer: " + noalias_slice.ToString()), domain); 202 scopes.push_back(scope); 203 } 204 llvm::MDNode* noalias_list = 205 llvm::MDNode::get(domain->getContext(), AsArrayRef(scopes)); 206 return noalias_list; 207} 208 209} // namespace llvm_ir 210} // namespace xla 211