/external/tensorflow/tensorflow/core/distributed_runtime/ |
H A D | partial_run_mgr_test.cc | 27 int step_id = 1; local 29 partial_run_mgr.FindOrCreate(step_id, &cancellation_manager); 36 int step_id = 1; local 38 partial_run_mgr.FindOrCreate(step_id, &cancellation_manager); 41 partial_run_mgr.FindOrCreate(step_id, &found_cancellation_manager); 48 int step_id = 1; local 50 partial_run_mgr.FindOrCreate(step_id, &cancellation_manager); 62 int step_id = 1; local 64 partial_run_mgr.FindOrCreate(step_id, &cancellation_manager); 68 step_id, [ 109 int step_id = 1; local 125 int step_id = 1; local [all...] |
H A D | rendezvous_mgr_interface.h | 47 // until the tensor is received. Each global unique "step_id" 69 // "step_id". The caller takes ownership of one reference on the 74 virtual RemoteRendezvous* Find(int64 step_id) = 0; 76 // Finds the local rendezvous instance for the "step_id". Runs 80 virtual void RecvLocalAsync(int64 step_id, 85 virtual Status RecvLocal(int64 step_id, const Rendezvous::ParsedKey& parsed, 88 // Removes rendezvous for "step_id". 92 virtual void Cleanup(int64 step_id) = 0;
|
H A D | partial_run_mgr.cc | 29 bool PartialRunMgr::FindOrCreate(int step_id, argument 32 auto it = step_id_to_partial_run_.find(step_id); 41 step_id_to_partial_run_[step_id] = std::move(partial_run); 45 void PartialRunMgr::ExecutorDone(int step_id, const Status& executor_status) { argument 50 auto run_it = step_id_to_partial_run_.find(step_id); 68 step_id_to_partial_run_.erase(step_id); 72 void PartialRunMgr::PartialRunDone(int step_id, StatusCallback done, argument 77 auto run_it = step_id_to_partial_run_.find(step_id); 93 step_id_to_partial_run_.erase(step_id);
|
H A D | partial_run_mgr.h | 49 // Find or create the CancellationManager associated with step_id. 53 bool FindOrCreate(int step_id, CancellationManager** cancellation_manager); 58 void ExecutorDone(int step_id, const Status& executor_status); 66 void PartialRunDone(int step_id, StatusCallback done, const Status& status);
|
H A D | worker_cache_logger.cc | 59 bool WorkerCacheLogger::RetrieveLogs(int64 step_id, StepStats* ss) { argument 61 LogMap::iterator iter = log_map_.find(step_id); 71 void WorkerCacheLogger::Save(const string& device, int64 step_id, argument 74 StepLog* sl = &log_map_[step_id]; 85 void WorkerCacheLogger::RecordRecvTensor(int64 step_id, int64 start_usecs, argument 91 RecordDataTransfer(step_id, start_usecs, end_usecs, tensor_name, src_device, 95 void WorkerCacheLogger::RecordDataTransfer(int64 step_id, int64 start_usecs, argument 128 Save(dst_device, step_id, ns);
|
H A D | worker_cache_logger.h | 48 bool RetrieveLogs(int64 step_id, StepStats* ss); 59 void RecordRecvTensor(int64 step_id, int64 start_usecs, int64 end_usecs, 65 void RecordDataTransfer(int64 step_id, int64 start_usecs, int64 end_usecs, 84 void Save(const string& device, int64 step_id, NodeExecStats* ns);
|
H A D | worker.cc | 81 void Worker::AbortStep(int64 step_id) { argument 82 Rendezvous* rendez = env_->rendezvous_mgr->Find(step_id); 83 SchedNonBlockingClosureAfter(1000000, [rendez, step_id]() { 87 rendez->StartAbort(errors::Aborted("Step ", step_id)); 136 const int64 step_id = request->step_id(); local 137 TRACEPRINTF("RunGraph: %lld", step_id); 156 opts->SetCancelCallback([this, cm, step_id]() { 158 AbortStep(step_id); 176 request->graph_handle(), step_id, sessio 209 const int64 step_id = request->step_id(); local 287 const int64 step_id = request->step_id(); local [all...] |
H A D | graph_mgr.h | 53 // caller generated global unique id "step_id". Multiple executions 55 // "step_id" used are different. 88 void ExecuteAsync(const string& handle, const int64 step_id, 95 Status SendInputs(const int64 step_id, const NamedTensors& in); 96 Status RecvOutputs(const int64 step_id, NamedTensors* out); 97 void RecvOutputsAsync(const int64 step_id, NamedTensors* out, 121 // TODO(zhifengc): Dup-detection. Ensure step_id only run once. 162 void StartParallelExecutors(const string& handle, int64 step_id, Item* item,
|
H A D | base_rendezvous_mgr.h | 44 // until the tensor is received. Each global unique "step_id" 67 // "step_id". The caller takes ownership of one reference on the 72 RemoteRendezvous* Find(int64 step_id) override; 74 // Finds the local rendezvous instance for the "step_id". Runs 78 void RecvLocalAsync(int64 step_id, const Rendezvous::ParsedKey& parsed, 82 Status RecvLocal(int64 step_id, const Rendezvous::ParsedKey& parsed, 85 // Removes rendezvous for "step_id". 89 void Cleanup(int64 step_id) override; 95 virtual BaseRemoteRendezvous* Create(int64 step_id, 99 // Maps step_id t [all...] |
H A D | base_rendezvous_mgr.cc | 53 RemoteRendezvous* BaseRendezvousMgr::Find(int64 step_id) { argument 54 return FindOrCreate(step_id); 57 BaseRemoteRendezvous* BaseRendezvousMgr::FindOrCreate(int64 step_id) { argument 59 auto iter = table_.find(step_id); 61 auto rr = Create(step_id, worker_env_); 62 iter = table_.insert({step_id, rr}).first; 68 void BaseRendezvousMgr::RecvLocalAsync(int64 step_id, argument 71 auto rendez = FindOrCreate(step_id); 85 Status BaseRendezvousMgr::RecvLocal(int64 step_id, argument 90 RecvLocalAsync(step_id, parse 104 Cleanup(int64 step_id) argument 133 BaseRemoteRendezvous(const WorkerEnv* env, int64 step_id) argument [all...] |
H A D | worker_cache_wrapper.h | 82 virtual bool RetrieveLogs(int64 step_id, StepStats* ss) { argument 83 return wrapped_->RetrieveLogs(step_id, ss);
|
H A D | worker_cache.h | 81 virtual bool RetrieveLogs(int64 step_id, StepStats* ss) { return false; } argument
|
/external/tensorflow/tensorflow/contrib/verbs/ |
H A D | verbs_util.cc | 28 string VerbsUtil::AppendStepidToKey(const string& key, int64 step_id) { argument 29 return strings::StrCat(key, ";", step_id); 34 int64& step_id) { 36 // a key (with step_id) has exact 6 parts if split by ";" 42 // part 6: step_id 44 CHECK(parts.size() == 6) << "Key with step_id must have 6 parts"; 45 strings::safe_strto64(parts[5], &step_id); 46 parts.pop_back(); // remove step_id 33 GetKeyAndStepId(const string& key_with_step_id, string& key, int64& step_id) argument
|
H A D | verbs_util.h | 27 static string AppendStepidToKey(const string& key, int64 step_id); 29 int64& step_id);
|
H A D | rdma_rendezvous_mgr.h | 30 // until the tensor is received. Each global unique "step_id" 52 BaseRemoteRendezvous* Create(int64 step_id,
|
H A D | rdma_rendezvous_mgr.cc | 32 RdmaRemoteRendezvous(const WorkerEnv* env, int64 step_id, RdmaMgr* rdma_mgr) argument 33 : BaseRemoteRendezvous(env, step_id), rdma_mgr_(rdma_mgr) {} 85 BaseRemoteRendezvous* RdmaRendezvousMgr::Create(int64 step_id, argument 87 return new RdmaRemoteRendezvous(worker_env, step_id, rdma_mgr_);
|
/external/tensorflow/tensorflow/core/framework/ |
H A D | log_memory.h | 62 static void RecordStep(int64 step_id, const string& handle); 66 // OpKernelContext the step_id indicates which step is executing, 67 // otherwise step_id is one of the SpecialStepIds defined in 70 static void RecordTensorAllocation(const string& kernel_name, int64 step_id, 83 static void RecordTensorOutput(const string& kernel_name, int64 step_id, 91 // operation such as memcpy. The step_id if >=0 indicates which step 92 // is executing, otherwise step_id is one of the SpecialStepIds 95 static void RecordRawAllocation(const string& operation, int64 step_id, 104 static void RecordRawDeallocation(const string& operation, int64 step_id,
|
H A D | log_memory.cc | 41 void LogMemory::RecordStep(const int64 step_id, const string& handle) { argument 43 step.set_step_id(step_id); 49 const int64 step_id, 52 allocation.set_step_id(step_id); 67 const int64 step_id, const int index, 70 output.set_step_id(step_id); 78 const int64 step_id, size_t num_bytes, 81 allocation.set_step_id(step_id); 91 const int64 step_id, void* ptr, 94 deallocation.set_step_id(step_id); 48 RecordTensorAllocation(const string& kernel_name, const int64 step_id, const Tensor& tensor) argument 66 RecordTensorOutput(const string& kernel_name, const int64 step_id, const int index, const Tensor& tensor) argument 77 RecordRawAllocation(const string& operation, const int64 step_id, size_t num_bytes, void* ptr, Allocator* allocator) argument 90 RecordRawDeallocation(const string& operation, const int64 step_id, void* ptr, Allocator* allocator, bool deferred) argument [all...] |
/external/tensorflow/tensorflow/contrib/gdr/ |
H A D | gdr_rendezvous_mgr.h | 32 BaseRemoteRendezvous* Create(int64 step_id, const WorkerEnv* worker_env);
|
H A D | gdr_worker.cc | 59 const int64 step_id = request->step_id(); local 61 TRACEPRINTF("RecvTensor: %lld %s", step_id, key.c_str()); 77 opts->SetCancelCallback([this, step_id]() { AbortStep(step_id); }); 80 step_id, parsed,
|
/external/tensorflow/tensorflow/core/distributed_runtime/rpc/ |
H A D | rpc_rendezvous_mgr.h | 29 // until the tensor is received. Each global unique "step_id" 50 BaseRemoteRendezvous* Create(int64 step_id, const WorkerEnv* worker_env);
|
H A D | rpc_rendezvous_mgr_test.cc | 85 const int64 step_id = 123; local 90 RemoteRendezvous* rendez = rmgr_.Find(step_id); 99 TF_ASSERT_OK(rmgr_.RecvLocal(step_id, key, &val, &val_dead)); 102 rmgr_.Cleanup(step_id); 110 const int64 step_id = 123; local 111 RemoteRendezvous* rendez = rmgr_.Find(step_id); 124 const int64 step_id = 321; local 125 RemoteRendezvous* rendez = rmgr_.Find(step_id); 127 SchedClosure([this, step_id]() { 129 rmgr_.Cleanup(step_id); 144 const int64 step_id = 123; local 170 const int64 step_id = 123; local [all...] |
/external/tensorflow/tensorflow/contrib/mpi/ |
H A D | mpi_rendezvous_mgr.h | 72 void Init(const Rendezvous::ParsedKey& parsed, const int64 step_id, argument 75 mRes_.set_step_id(step_id); 106 void Init(const Rendezvous::ParsedKey& parsed, const int64 step_id) { argument 107 req_.set_step_id(step_id); 118 MPIRemoteRendezvous(const WorkerEnv* env, int64 step_id, const MPIUtils* util, argument 120 : BaseRemoteRendezvous(env, step_id), 148 void QueueRequest(std::string key, int64 step_id, argument 153 const std::string key_id = strings::StrCat(key, "_", step_id); 158 BaseRemoteRendezvous* Create(int64 step_id, 194 void GetRecvCall(const int64 step_id, cons argument 206 RemoveRecvCall(const int64 step_id, const std::string& key) argument [all...] |
H A D | mpi_rendezvous_mgr.cc | 57 BaseRemoteRendezvous* MPIRendezvousMgr::Create(int64 step_id, argument 59 return new MPIRemoteRendezvous(worker_env, step_id, mpiutils_, this); 155 const int64 step_id = request.step_id(); local 199 [this, parsed, step_id, send_cb]( 205 << " step: " << step_id 211 << " @ step: " << step_id << std::endl; 214 mpi_send_call->Init(parsed, step_id, is_dead); 271 worker_env_2->compute_pool->Schedule([this, step_id, parsed, done_cb]() { 272 this->RecvLocalAsync(step_id, parse 291 const int64 step_id = mRes.step_id(); local [all...] |
/external/tensorflow/tensorflow/core/protobuf/ |
H A D | worker.proto | 209 // The master generates a global unique `step_id` to distinguish 211 // (e.g., send/recv ops) with each other using `step_id` to 213 int64 step_id = 2; 276 int64 step_id = 1; 291 // REQUIRED: This must eventually correspond to the `step_id` passed 293 int64 step_id = 1; 366 int64 step_id = 1;
|