128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlowerLicensed under the Apache License, Version 2.0 (the "License"); 428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFloweryou may not use this file except in compliance with the License. 528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlowerYou may obtain a copy of the License at 628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower http://www.apache.org/licenses/LICENSE-2.0 828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlowerUnless required by applicable law or agreed to in writing, software 1028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlowerdistributed under the License is distributed on an "AS IS" BASIS, 1128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlowerWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlowerSee the License for the specific language governing permissions and 1328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlowerlimitations under the License. 1428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower==============================================================================*/ 1528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 1628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#ifdef TENSORFLOW_USE_GDR 1728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 1828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#include "tensorflow/contrib/gdr/gdr_memory_manager.h" 1928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 2028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#include <atomic> 2128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#include <cerrno> 2228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#include <fstream> 2328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#include <list> 2428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#include <map> 2528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#include <set> 2628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 2728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#include <fcntl.h> 2828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#include <rdma/rdma_cma.h> 2928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#include <rdma/rdma_verbs.h> 3028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#include <sys/epoll.h> 3128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 3228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#include "tensorflow/contrib/gdr/gdr.pb.h" 3328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#include "tensorflow/core/common_runtime/bfc_allocator.h" 3428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#include "tensorflow/core/common_runtime/device.h" 3528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#include "tensorflow/core/common_runtime/dma_helper.h" 3628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#if GOOGLE_CUDA 3728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#include "tensorflow/core/common_runtime/gpu/gpu_util.h" 3828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#include "tensorflow/core/common_runtime/gpu/process_state.h" 3928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#endif // GOOGLE_CUDA 4028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#include "tensorflow/core/framework/allocator_registry.h" 4128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#include "tensorflow/core/lib/core/status.h" 4228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#include "tensorflow/core/platform/macros.h" 4328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#include "tensorflow/core/platform/mutex.h" 4428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 4528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlowernamespace tensorflow { 4628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 4728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlowernamespace { 4828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 4928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlowerbool IsGDRAvailable() { 5028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#if defined(__APPLE__) 5128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower return false; 5228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#elif defined(PLATFORM_WINDOWS) 5328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower return false; 5428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#else 5528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower std::ifstream ifs("/proc/modules"); 5628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower string line; 5728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower while (std::getline(ifs, line)) { 5828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower auto sep = line.find(' '); 5928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower CHECK_NE(sep, std::string::npos); 6028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (line.substr(0, sep) == "nv_peer_mem") { 6128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower return true; 6228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 6328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 6428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower return false; 6528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#endif 6628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower} 6728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 6828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlowerint TryToReadNumaNode(ibv_device* device) { 6928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#if defined(__APPLE__) 7028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower LOG(INFO) << "OS X does not support NUMA - returning NUMA node 0"; 7128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower return 0; 7228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#elif defined(PLATFORM_WINDOWS) 7328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower // Windows support for NUMA is not currently implemented. Return node 0. 7428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower return 0; 7528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#else 7628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower VLOG(2) << "Trying to read NUMA node for device: " << device->name; 7728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower static const int kUnknownNumaNode = -1; 7828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 7928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower auto filename = string(device->ibdev_path) + "/device/numa_node"; 8028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 8128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower std::ifstream ifs(filename.c_str()); 8228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower string content; 8328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower CHECK(std::getline(ifs, content)); 8428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 8528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower int32 value; 8628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (strings::safe_strto32(content, &value)) { 8728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (value < 0) { 8828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower LOG(INFO) << "Successful NUMA node read from SysFS had negative value (" 894463d105a8a4a83642b9709ba79310e8f4ddf577A. Unique TensorFlower << value 904463d105a8a4a83642b9709ba79310e8f4ddf577A. Unique TensorFlower << "), but there must be at least one NUMA node" 914463d105a8a4a83642b9709ba79310e8f4ddf577A. Unique TensorFlower ", so returning NUMA node zero"; 9228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower return 0; 9328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 9428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower LOG(INFO) << "NUMA node for device: " << device->name << " is " << value; 9528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower return value; 9628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 9728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower return kUnknownNumaNode; 9828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#endif 9928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower} 10028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 10128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlowervoid EndpointDeleter(rdma_cm_id* id) { 10228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (id) { 10328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower rdma_destroy_ep(id); 10428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 10528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower} 10628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 10728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlowervoid MRDeleter(ibv_mr* mr) { 10828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (mr) { 10928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower rdma_dereg_mr(mr); 11028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 11128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower} 11228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 11328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlowerusing RdmaEndpointPtr = std::unique_ptr<rdma_cm_id, decltype(&EndpointDeleter)>; 11428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 11528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlowerusing MemoryRegionPtr = std::unique_ptr<ibv_mr, decltype(&MRDeleter)>; 11628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 11728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlowerclass GdrMemoryManager : public RemoteMemoryManager { 11828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower public: 11928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower GdrMemoryManager(const string& host, const string& port); 12028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 12128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower virtual ~GdrMemoryManager(); 12228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 12328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower virtual Status Init() override; 12428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 12528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower virtual void Run() override; 12628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 12728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower virtual void Stop() override; 12828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 129e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai virtual void TransportOptionsFromTensor( 13028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower ::google::protobuf::Any* mutable_transport_options, const Tensor& tensor, 131e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai Device* device, DeviceContext* device_context, bool on_host, 132e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai StatusCallback done) override; 13328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 134e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai virtual void TensorFromTransportOptions( 13528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower Tensor* tensor, const ::google::protobuf::Any& transport_options, 136e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai Device* device, DeviceContext* device_context, bool on_host, 137e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai StatusCallback done) override; 13828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 13928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower protected: 14028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower Status CreateEndpoint(const string& host, const string& port, 14128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower RdmaEndpointPtr& endpoint); 14228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 14328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower static bool Comparator(const void* ptr, const MemoryRegionPtr& other) { 14428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower return ptr < reinterpret_cast<char*>(other->addr) + other->length; 14528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 14628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 14728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower ibv_mr* FindMemoryRegion(void* addr, size_t length); 14828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 14928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower void InsertMemoryRegion(void* addr, size_t length); 15028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 15128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower void EvictMemoryRegion(void* addr, size_t length); 15228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 15328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower private: 15428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower const string host_; 15528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower const string port_; 15628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower RdmaEndpointPtr listening_; 15728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower std::atomic<bool> stopped_; 15828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower int epfd_; 15928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 16028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower // Server side endpoints 16128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower // Accessed sequentially in Run() so not protected by lock 16228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower std::list<RdmaEndpointPtr> server_clients_; 16328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 16428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower using TensorKey = uint32_t; 16528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower std::atomic<TensorKey> next_key_; 16628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 16728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower // Server side on-the-fly tensor buffers 16828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower mutex server_mu_; 16928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower std::map<TensorKey, const TensorBuffer*> tensor_buffers_ 17028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower GUARDED_BY(server_mu_); 17128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 17228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower // Client side endpoints 17328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower mutex client_mu_; 17428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower std::map<std::pair<string, string>, RdmaEndpointPtr> clients_ 17528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower GUARDED_BY(cient_mu_); 17628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 17728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower // Managed memory regions 17828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower mutex alloc_mu_; 17928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower std::vector<MemoryRegionPtr> mrs_ GUARDED_BY(alloc_mu_); 18028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 18128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower TF_DISALLOW_COPY_AND_ASSIGN(GdrMemoryManager); 18228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower}; 18328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 18428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower// TODO(byronyi): remove this class duplicated from the one in 18528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower// common/runtime/gpu/pool_allocator.h when it is available in common_runtime 18628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlowerclass BasicCPUAllocator : public SubAllocator { 18728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower public: 18828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower ~BasicCPUAllocator() override {} 18928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 19028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower void* Alloc(size_t alignment, size_t num_bytes) override { 19128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower return port::AlignedMalloc(num_bytes, alignment); 19228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 19328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower void Free(void* ptr, size_t) override { port::AlignedFree(ptr); } 19428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower}; 19528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 19628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower// TODO(byronyi): remove this class and its registration when the default 19728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower// cpu_allocator() returns visitable allocator 19828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlowerclass BFCRdmaAllocator : public BFCAllocator { 19928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower public: 20028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower BFCRdmaAllocator() 20128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower : BFCAllocator(new BasicCPUAllocator(), 1LL << 36, true, "cpu_rdma_bfc") { 20228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 20328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower}; 20428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 20528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlowerREGISTER_MEM_ALLOCATOR("BFCRdmaAllocator", 101, BFCRdmaAllocator); 20628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 20728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlowerGdrMemoryManager::GdrMemoryManager(const string& host, const string& port) 20828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower : host_(host), 20928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower port_(port), 21028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower listening_(nullptr, EndpointDeleter), 21128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower stopped_(true), 21228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower next_key_(0) {} 21328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 21428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlowerGdrMemoryManager::~GdrMemoryManager() { close(epfd_); } 21528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 21628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlowerStatus GdrMemoryManager::Init() { 21728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower epfd_ = epoll_create1(0); 21828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (epfd_ == -1) { 21928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower return errors::Unavailable(strerror(errno), ": ", "epoll_create"); 22028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 22128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 22228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower rdma_addrinfo* addrinfo; 22328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower rdma_addrinfo hints = {}; 22428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower hints.ai_port_space = RDMA_PS_TCP; 22528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower hints.ai_flags = RAI_PASSIVE; 22628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (rdma_getaddrinfo(const_cast<char*>(host_.c_str()), 22728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower const_cast<char*>(port_.c_str()), &hints, &addrinfo)) { 22828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower return errors::Unavailable(strerror(errno), ": ", "cannot resolve rdma://", 22928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower host_, ":", port_); 23028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 23128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 23228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower ibv_qp_init_attr init_attr = {}; 23328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower init_attr.qp_type = IBV_QPT_RC; 23428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower init_attr.cap.max_recv_wr = 32; 23528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower init_attr.cap.max_send_wr = 1; 23628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower init_attr.cap.max_recv_sge = 1; 23728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower init_attr.cap.max_send_sge = 1; 23828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 23928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower // Create listening endpoint 24028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower rdma_cm_id* id; 24128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (rdma_create_ep(&id, addrinfo, nullptr, &init_attr)) { 24228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower return errors::Unavailable(strerror(errno), ": ", "cannot bind to rdma://", 24328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower host_, ":", port_); 24428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 24528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower listening_.reset(id); 24628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower rdma_freeaddrinfo(addrinfo); 24728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 24828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower // Listen without backlog 24928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (rdma_listen(listening_.get(), 0)) { 25028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower return errors::Unavailable(strerror(errno), ": ", 25128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower "cannot listen on rdma://", host_, ":", port_); 25228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 25328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower LOG(INFO) << "RDMA server is listening on " << host_ << ":" << port_; 25428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 25528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (listening_->verbs == nullptr) { 25628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower return errors::Unimplemented( 25728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower "Unsupported address ", host_, ":", port_, 25828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower " as it does not bind to a particular RDMA device"); 25928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 26028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 26128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower int flags = fcntl(listening_->channel->fd, F_GETFL, 0); 26228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (fcntl(listening_->channel->fd, F_SETFL, flags | O_NONBLOCK)) { 26328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower return errors::Unavailable(strerror(errno), ": ", 26428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower "cannot set server to non-blocking mode"); 26528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 26628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 26728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower epoll_event event = {}; 26828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower event.events = EPOLLIN | EPOLLPRI; 26928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower event.data.ptr = listening_.get(); 27028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (epoll_ctl(epfd_, EPOLL_CTL_ADD, listening_->channel->fd, &event)) { 27128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower return errors::Unavailable(strerror(errno), ": ", 27228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower "cannot add server to epoll"); 27328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 27428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 27528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower Allocator* allocators[] = { 27628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#if GOOGLE_CUDA 27728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower ProcessState::singleton()->GetCUDAHostAllocator(0), 27828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower ProcessState::singleton()->GetCPUAllocator(0), 27928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#endif // GOOGLE_CUDA 28028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower cpu_allocator(), 28128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower }; 28228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 28328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower using namespace std::placeholders; 28428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower VisitableAllocator::Visitor alloc_visitor = 28528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower std::bind(&GdrMemoryManager::InsertMemoryRegion, this, _1, _2); 28628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower VisitableAllocator::Visitor free_visitor = 28728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower std::bind(&GdrMemoryManager::EvictMemoryRegion, this, _1, _2); 28828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 28928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower std::set<Allocator*> instrumented_; 29028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 29128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower // Host memory allocators 29228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower for (Allocator* allocator : allocators) { 29328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower auto* visitable_allocator = dynamic_cast<VisitableAllocator*>(allocator); 2944463d105a8a4a83642b9709ba79310e8f4ddf577A. Unique TensorFlower CHECK(visitable_allocator) 2954463d105a8a4a83642b9709ba79310e8f4ddf577A. Unique TensorFlower << "is not visitable for instrumentation" << allocator->Name(); 29628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower // Make sure we don't instrument the same allocator twice 29728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (instrumented_.find(allocator) == std::end(instrumented_)) { 29828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower visitable_allocator->AddAllocVisitor(alloc_visitor); 29928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower visitable_allocator->AddFreeVisitor(free_visitor); 30028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower instrumented_.insert(allocator); 30128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower LOG(INFO) << "Instrumenting CPU allocator " << allocator->Name(); 30228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 30328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 30428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 30528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#if GOOGLE_CUDA 30628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower VisitableAllocator::Visitor cuda_alloc_visitor = 30728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower std::bind(&GdrMemoryManager::InsertMemoryRegion, this, _1, _2); 30828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (IsGDRAvailable()) { 30928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower // Note we don't free allocated GPU memory so there is no free visitor 31028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower int32_t bus_id = TryToReadNumaNode(listening_->verbs->device) + 1; 31128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower ProcessState::singleton()->AddGPUAllocVisitor(bus_id, cuda_alloc_visitor); 31228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower LOG(INFO) << "Instrumenting GPU allocator with bus_id " << bus_id; 31328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 31428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#endif // GOOGLE_CUDA 31528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 31628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower return Status::OK(); 31728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower} 31828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 31928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlowervoid GdrMemoryManager::Run() { 32028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower stopped_ = false; 32128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower while (!stopped_) { 32228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower epoll_event events[32]; 32328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower int ret = epoll_wait(epfd_, events, 32, 1); 32428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (ret == -1) { 32528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower LOG(ERROR) << "epoll_wait: " << strerror(errno); 32628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower return; 32728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 32828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower for (int i = 0; i < ret; i++) { 32928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower rdma_cm_id* id = static_cast<rdma_cm_id*>(events[i].data.ptr); 33028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (id == listening_.get()) { 33128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower // Accept incoming connections 33228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (!rdma_get_request(listening_.get(), &id)) { 33328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (!rdma_accept(id, nullptr)) { 33428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower LOG(INFO) << "Accepted new RDMA connection"; 33528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (ibv_req_notify_cq(id->recv_cq, 0)) { 33628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower LOG(ERROR) << strerror(errno) << ": ibv_req_notify_cq failed"; 33728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower EndpointDeleter(id); 33828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower continue; 33928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 34028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower for (int i = 0; i < 32; i++) { 34128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (rdma_post_recvv(id, nullptr, nullptr, 0)) { 34228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower LOG(ERROR) << strerror(errno) << ": rdma_post_recvv failed"; 34328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower EndpointDeleter(id); 34428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower continue; 34528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 34628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 34728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower int flags = fcntl(id->recv_cq_channel->fd, F_GETFL, 0); 34828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (fcntl(id->recv_cq_channel->fd, F_SETFL, flags | O_NONBLOCK)) { 34928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower LOG(ERROR) << strerror(errno) 35028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower << ": cannot set server_client to non-blocking mode"; 35128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower EndpointDeleter(id); 35228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower continue; 35328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 35428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower epoll_event event = {}; 35528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower event.events = EPOLLIN | EPOLLPRI; 35628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower event.data.ptr = id; 35728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (epoll_ctl(epfd_, EPOLL_CTL_ADD, id->recv_cq_channel->fd, 35828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower &event)) { 35928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower LOG(ERROR) << strerror(errno) 36028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower << ": cannot add server client to epoll"; 36128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower EndpointDeleter(id); 36228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower continue; 36328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 36428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower server_clients_.push_back({id, EndpointDeleter}); 36528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 36628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 36728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } else { 36828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower // Polling work completions 36928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower ibv_cq* cq; 37028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower void* context; 37128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (!ibv_get_cq_event(id->recv_cq_channel, &cq, &context)) { 37228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower ibv_ack_cq_events(id->recv_cq, 1); 37328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (ibv_req_notify_cq(id->recv_cq, 0)) { 37428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower LOG(ERROR) << strerror(errno) << ": ibv_req_notify_cq failed"; 37528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower continue; 37628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 37728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower ibv_wc wc[32]; 37828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower int ret = ibv_poll_cq(id->recv_cq, 32, wc); 37928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (ret < 0) { 38028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower LOG(ERROR) << "ibv_poll_cq failed"; 38128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower continue; 38228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 38328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower for (int i = 0; i < ret; i++) { 38428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (wc[i].opcode != IBV_WC_RECV_RDMA_WITH_IMM) { 38528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower LOG(ERROR) << "Received unknown operation " << wc[i].opcode; 38628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 38728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (wc[i].status != 0) { 38828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower LOG(ERROR) << ibv_wc_status_str(wc[i].status); 38928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 39028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower TensorKey tensor_key = ntohl(wc[i].imm_data); 39128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower { 39228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower mutex_lock l(server_mu_); 39328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower auto iter = tensor_buffers_.find(tensor_key); 39428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (iter == std::end(tensor_buffers_)) { 39528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower LOG(ERROR) << "Cannot find tensor buffer for tensor key " 39628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower << tensor_key; 39728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } else { 39828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower const TensorBuffer* buffer = iter->second; 39928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower buffer->Unref(); 40028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower tensor_buffers_.erase(iter); 40128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 40228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 40328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (rdma_post_recvv(id, nullptr, nullptr, 0)) { 40428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower perror("rdma_post_recvv"); 40528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower LOG(ERROR) << "rdma_post_recvv failed"; 40628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower continue; 40728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 40828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 40928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 41028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 41128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 41228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 41328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower} 41428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 41528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlowervoid GdrMemoryManager::Stop() { stopped_ = true; } 41628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 417e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Caivoid GdrMemoryManager::TransportOptionsFromTensor( 41828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower ::google::protobuf::Any* mutable_transport_options, const Tensor& tensor, 419e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai Device* device, DeviceContext* device_context, bool on_host, 420e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai StatusCallback done) { 42128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower auto buffer = DMAHelper::buffer(&tensor); 42228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower void* addr = buffer->data(); 42328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower size_t length = buffer->size(); 42428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (length == 0) { 425e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai done(errors::Unavailable("Cannot register tensor buffer of size 0")); 426e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai return; 42728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 42828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 42928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower ibv_mr* mr = FindMemoryRegion(addr, length); 43028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 43128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#if GOOGLE_CUDA 432e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai if (!on_host) { 43328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower Allocator* alloc = ProcessState::singleton()->GetCUDAHostAllocator(0); 434e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai Tensor* host_copy = new Tensor(alloc, tensor.dtype(), tensor.shape()); 435e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai GPUUtil::CopyGPUTensorToCPU( 436e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai device, device_context, &tensor, host_copy, 437e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai [done, host_copy, mutable_transport_options, this](const Status& s) { 438e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai if (!s.ok()) { 439e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai done(s); 440e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai delete host_copy; 441e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai return; 442e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai } 443e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai auto buffer = DMAHelper::buffer(host_copy); 444e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai void* addr = buffer->data(); 445e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai size_t length = buffer->size(); 446e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai ibv_mr* mr = FindMemoryRegion(addr, length); 447e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai 448e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai if (mr == nullptr) { 449e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai done(errors::Unavailable("Cannot find pinned memory region")); 450e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai delete host_copy; 451e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai return; 452e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai } 453e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai 454e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai buffer->Ref(); 455e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai TensorKey tensor_key = next_key_++; 456e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai { 457e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai mutex_lock l(server_mu_); 458e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai tensor_buffers_.insert(std::make_pair(tensor_key, buffer)); 459e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai } 460e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai 461e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai uint64_t checksum = 0; 462e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai if (VLOG_IS_ON(2)) { 463e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai checksum = GPUUtil::Checksum(*host_copy); 464e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai } 465e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai 466e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai RemoteMemoryRegion remote_mr; 467e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai remote_mr.set_host(host_); 468e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai remote_mr.set_port(port_); 469e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai remote_mr.set_addr(reinterpret_cast<uint64_t>(addr)); 470e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai remote_mr.set_rkey(mr->rkey); 471e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai remote_mr.set_tensor_key(tensor_key); 472e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai remote_mr.set_checksum(checksum); 473e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai mutable_transport_options->PackFrom(remote_mr); 474e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai 475e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai done(Status::OK()); 476e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai delete host_copy; 477e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai }); 478e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai return; 47928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 48028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#endif 48128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 48228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (mr == nullptr) { 483e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai done(errors::Unavailable("Cannot find pinned memory region")); 484e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai return; 48528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 48628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 48728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower buffer->Ref(); 48828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower TensorKey tensor_key = next_key_++; 48928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower { 49028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower mutex_lock l(server_mu_); 49128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower tensor_buffers_.insert(std::make_pair(tensor_key, buffer)); 49228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 49328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 49428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower uint64_t checksum = 0; 49528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (VLOG_IS_ON(2)) { 49628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#ifdef GOOGLE_CUDA 497e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai if (!on_host) { 498e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai checksum = GPUUtil::Checksum(device, device_context, tensor); 49928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } else { 50028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower checksum = GPUUtil::Checksum(tensor); 50128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 50228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#endif 50328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 50428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 50528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower RemoteMemoryRegion remote_mr; 50628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower remote_mr.set_host(host_); 50728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower remote_mr.set_port(port_); 50828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower remote_mr.set_addr(reinterpret_cast<uint64_t>(addr)); 50928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower remote_mr.set_rkey(mr->rkey); 51028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower remote_mr.set_tensor_key(tensor_key); 51128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower remote_mr.set_checksum(checksum); 51228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower mutable_transport_options->PackFrom(remote_mr); 51328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 514e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai done(Status::OK()); 51528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower} 51628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 517e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Caivoid GdrMemoryManager::TensorFromTransportOptions( 51828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower Tensor* tensor, const ::google::protobuf::Any& transport_options, 519e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai Device* device, DeviceContext* device_context, bool on_host, 520e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai StatusCallback done) { 52128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower RemoteMemoryRegion remote_mr; 52228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (!transport_options.UnpackTo(&remote_mr)) { 523e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai done(errors::NotFound("No RDMA transport options found")); 524e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai return; 52528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 52628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 52728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower auto buffer = DMAHelper::buffer(tensor); 52828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower void* addr = buffer->data(); 52928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower size_t length = buffer->size(); 53028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower ibv_mr* mr = FindMemoryRegion(addr, length); 53128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 53228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower Tensor host_copy; 53328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#if GOOGLE_CUDA 534e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai if (mr == nullptr && !on_host) { 53528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower Allocator* alloc = ProcessState::singleton()->GetCUDAHostAllocator(0); 53628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower host_copy = Tensor(alloc, tensor->dtype(), tensor->shape()); 53728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower buffer = DMAHelper::buffer(&host_copy); 53828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower addr = buffer->data(); 53928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower length = buffer->size(); 54028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower mr = FindMemoryRegion(addr, length); 54128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 54228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#endif // GOOGLE_CUDA 54328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 54428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (mr == nullptr) { 545e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai done(errors::Unavailable("Cannot find pinned memory region")); 546e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai return; 54728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 54828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 54928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower decltype(clients_)::iterator iter; 55028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower bool success; 55128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower { 55228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower mutex_lock l(client_mu_); 55328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower std::tie(iter, success) = clients_.insert( 55428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower std::make_pair(std::make_pair(remote_mr.host(), remote_mr.port()), 55528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower RdmaEndpointPtr(nullptr, EndpointDeleter))); 55628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (success || iter->second.get() == nullptr) { 557e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai Status s = 558e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai CreateEndpoint(remote_mr.host(), remote_mr.port(), iter->second); 559e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai if (!s.ok()) { 560e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai done(s); 561e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai return; 562e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai } 56328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 56428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 56528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower rdma_cm_id* id = iter->second.get(); 56628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 56728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower uint64_t start = Env::Default()->NowMicros(); 56828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 56928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (rdma_post_read(id, nullptr, buffer->data(), buffer->size(), mr, 0, 57028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower remote_mr.addr(), remote_mr.rkey())) { 571e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai done(errors::Unavailable(strerror(errno), ": ", "rdma_post_read failed")); 572e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai return; 57328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 57428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 57528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower ibv_send_wr wr = {}; 57628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower wr.opcode = IBV_WR_RDMA_WRITE_WITH_IMM; 57728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower wr.imm_data = htonl(remote_mr.tensor_key()); 578e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai wr.send_flags = IBV_SEND_SIGNALED; 57928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower ibv_send_wr* bad_wr; 58028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (ibv_post_send(id->qp, &wr, &bad_wr)) { 581e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai done(errors::Unavailable(strerror(errno), ": ", "ibv_post_send failed")); 582e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai return; 58328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 58428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 58528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower ibv_wc wc = {}; 586e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai int ret; 587e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai while ((ret = ibv_poll_cq(id->send_cq, 1, &wc)) == 0) 588e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai ; 58928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (ret < 0 || wc.status) { 590e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai done(errors::Unavailable(ibv_wc_status_str(wc.status))); 591e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai return; 59228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 59328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 59428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#if GOOGLE_CUDA 59528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (host_copy.NumElements() > 0) { 596e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai uint64_t checksum = 0; 597e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai if (VLOG_IS_ON(2)) { 598e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai checksum = GPUUtil::Checksum(host_copy); 599e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai CHECK(checksum == remote_mr.checksum()) 600e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai << "Checksum mismatch: " << checksum << "!=" << remote_mr.checksum(); 60128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 602e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai Tensor* ref = new Tensor; 603e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai std::swap(host_copy, *ref); 604e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai GPUUtil::CopyCPUTensorToGPU( 605e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai ref, device_context, device, tensor, 606e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai [ref, done, buffer, remote_mr, start](const Status& s) { 607e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai if (!s.ok()) { 608e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai done(s); 609e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai delete ref; 610e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai return; 611e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai } 612e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai uint64_t end = Env::Default()->NowMicros(); 613e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai 614e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai VLOG(2) << "RDMA from remote memory region " << remote_mr.rkey() 615e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai << " of size " << buffer->size() << " with tensor key " 616e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai << remote_mr.tensor_key() << " took " << (end - start) 617e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai << " micros"; 618e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai done(Status::OK()); 619e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai delete ref; 620e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai }); 621e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai return; 62228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 62328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#endif // GOOGLE_CUDA 62428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 62528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower uint64_t end = Env::Default()->NowMicros(); 62628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 62728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower VLOG(2) << "RDMA from remote memory region " << remote_mr.rkey() 62828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower << " of size " << buffer->size() << " with tensor key " 62928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower << remote_mr.tensor_key() << " took " << (end - start) << " micros"; 63028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 63128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower uint64_t checksum = 0; 63228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (VLOG_IS_ON(2)) { 63328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#ifdef GOOGLE_CUDA 63428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (device->tensorflow_gpu_device_info() && (!on_host)) { 635e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai checksum = GPUUtil::Checksum(device, device_context, *tensor); 63628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } else { 63728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower checksum = GPUUtil::Checksum(*tensor); 63828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 6394463d105a8a4a83642b9709ba79310e8f4ddf577A. Unique TensorFlower CHECK(checksum == remote_mr.checksum()) 6404463d105a8a4a83642b9709ba79310e8f4ddf577A. Unique TensorFlower << "Checksum mismatch: " << checksum << "!=" << remote_mr.checksum(); 64128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#endif 64228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 643e2e3a943c0a28b7656325acb3fcd035743d55ea0Shanqing Cai done(Status::OK()); 64428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower} 64528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 64628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlowerStatus GdrMemoryManager::CreateEndpoint(const string& host, const string& port, 64728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower RdmaEndpointPtr& endpoint) { 64828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower rdma_addrinfo* addrinfo; 64928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower rdma_addrinfo hints = {}; 65028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower hints.ai_port_space = RDMA_PS_TCP; 65128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (rdma_getaddrinfo(const_cast<char*>(host.c_str()), 65228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower const_cast<char*>(port.c_str()), &hints, &addrinfo)) { 65328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower return errors::InvalidArgument( 65428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower strerror(errno), ": ", "cannot connect to rdma://", host, ":", port); 65528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 65628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 65728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower ibv_qp_init_attr init_attr = {}; 65828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower init_attr.qp_type = IBV_QPT_RC; 65928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower init_attr.cap.max_recv_wr = 1; 66028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower init_attr.cap.max_send_wr = 32; 66128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower init_attr.cap.max_recv_sge = 1; 66228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower init_attr.cap.max_send_sge = 1; 66328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 66428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower rdma_cm_id* id; 66528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (rdma_create_ep(&id, addrinfo, nullptr, &init_attr)) { 66628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower rdma_freeaddrinfo(addrinfo); 66728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower return errors::Unavailable(strerror(errno), ": ", 66828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower "cannot create endpoint to rdma://", host, ":", 66928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower port); 67028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 67128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower rdma_freeaddrinfo(addrinfo); 67228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 67328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (rdma_connect(id, nullptr)) { 67428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower rdma_destroy_ep(id); 67528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower return errors::Unavailable(strerror(errno), ": ", 67628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower "cannot connect to rdma://", host, ":", port); 67728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 67828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 67928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower LOG(INFO) << "RDMA endpoint connected to rdma://" << host << ":" << port; 68028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower endpoint = RdmaEndpointPtr(id, EndpointDeleter); 68128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower return Status::OK(); 68228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower} 68328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 68428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFloweribv_mr* GdrMemoryManager::FindMemoryRegion(void* addr, size_t length) { 68528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (length == 0) return nullptr; 68628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower mutex_lock l(alloc_mu_); 68728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower auto iter = std::upper_bound(mrs_.begin(), mrs_.end(), addr, &Comparator); 68828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (iter == std::end(mrs_) || iter->get()->addr > addr) { 68928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower return nullptr; 69028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } else { 69128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower return iter->get(); 69228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 69328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower} 69428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 69528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlowervoid GdrMemoryManager::InsertMemoryRegion(void* addr, size_t length) { 69628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (length == 0) return; 69728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower ibv_mr* mr = rdma_reg_read(listening_.get(), addr, length); 69828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (mr != nullptr) { 69928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower mutex_lock l(alloc_mu_); 70028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower auto iter = std::upper_bound(mrs_.begin(), mrs_.end(), addr, &Comparator); 70128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower mrs_.insert(iter, {mr, &MRDeleter}); 70228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } else { 70328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower LOG(WARNING) << "Cannot register memory region"; 70428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 70528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower} 70628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 70728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlowervoid GdrMemoryManager::EvictMemoryRegion(void* addr, size_t length) { 70828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (length == 0) return; 70928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower mutex_lock l(alloc_mu_); 71028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower auto iter = std::upper_bound(mrs_.begin(), mrs_.end(), addr, &Comparator); 71128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower if (iter != std::end(mrs_) && iter->get()->addr == addr) { 71228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower mrs_.erase(iter); 71328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } else { 71428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower LOG(WARNING) << "Failed to de-register memory region"; 71528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower } 71628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower} 71728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 71828ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower} // namespace 71928ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 72028ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlowerRemoteMemoryManager* CreateRemoteMemoryManager(const string& host, 72128ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower const string& port) { 72228ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower return new GdrMemoryManager(host, port); 72328ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower} 72428ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 72528ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower} // namespace tensorflow 72628ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower 72728ce1d163eeffe618a6972c5245be0e660d94e85A. Unique TensorFlower#endif // TENSORFLOW_USE_GDR 728