1122cdce33e3e0a01a7f82645617317530aa571fbA. Unique TensorFlower/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 29c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudlur 39c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath KudlurLicensed under the Apache License, Version 2.0 (the "License"); 49c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudluryou may not use this file except in compliance with the License. 59c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath KudlurYou may obtain a copy of the License at 69c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudlur 79c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudlur http://www.apache.org/licenses/LICENSE-2.0 89c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudlur 99c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath KudlurUnless required by applicable law or agreed to in writing, software 109c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudlurdistributed under the License is distributed on an "AS IS" BASIS, 119c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath KudlurWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 129c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath KudlurSee the License for the specific language governing permissions and 139c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudlurlimitations under the License. 149c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudlur==============================================================================*/ 159c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudlur 16f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Exposes the family of BLAS routines as pre-canned high performance calls for 17f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// use in conjunction with the StreamExecutor abstraction. 18f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// 19f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Note that this interface is optionally supported by platforms; see 20f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// StreamExecutor::SupportsBlas() for details. 21f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// 22f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// This abstraction makes it simple to entrain BLAS operations on GPU data into 23f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// a Stream -- users typically will not use this API directly, but will use the 24f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Stream builder methods to entrain these operations "under the hood". For 25f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// example: 26f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// 27f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// DeviceMemory<float> x = stream_exec->AllocateArray<float>(1024); 28f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// DeviceMemory<float> y = stream_exec->AllocateArray<float>(1024); 29f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// // ... populate x and y ... 30f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Stream stream{stream_exec}; 31f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// stream 32f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// .Init() 33553e8f14c8c025a8c09e0a6cb824c786bc258f56A. Unique TensorFlower// .ThenBlasAxpy(1024, 5.5, x, 1, &y, 1); 34553e8f14c8c025a8c09e0a6cb824c786bc258f56A. Unique TensorFlower// SE_CHECK_OK(stream.BlockHostUntilDone()); 35f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// 36f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// By using stream operations in this manner the user can easily intermix custom 37f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// kernel launches (via StreamExecutor::ThenLaunch()) with these pre-canned BLAS 38f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// routines. 39f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 40f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#ifndef TENSORFLOW_STREAM_EXECUTOR_BLAS_H_ 41f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#define TENSORFLOW_STREAM_EXECUTOR_BLAS_H_ 42f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 43f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#include <complex> 44f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#include "tensorflow/stream_executor/platform/port.h" 45f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 46f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#include "tensorflow/stream_executor/lib/array_slice.h" 47b4fa4ad13831174f2276f0b382e06a7d5c42a7e7A. Unique TensorFlower 48b4fa4ad13831174f2276f0b382e06a7d5c42a7e7A. Unique TensorFlowernamespace Eigen { 49b4fa4ad13831174f2276f0b382e06a7d5c42a7e7A. Unique TensorFlowerstruct half; 50b4fa4ad13831174f2276f0b382e06a7d5c42a7e7A. Unique TensorFlower} // namespace Eigen 51f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 52f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurnamespace perftools { 53f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurnamespace gputools { 54f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 55f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurclass Stream; 5605ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlowerclass ScratchAllocator; 57f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 58f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurtemplate <typename ElemT> 59f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurclass DeviceMemory; 60f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 61f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurnamespace blas { 62f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 63f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Specifies whether the input matrix will be transposed or 64f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// transposed+conjugated before any BLAS operations. 65f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurenum class Transpose { kNoTranspose, kTranspose, kConjugateTranspose }; 66f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 67f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Returns a name for t. 68f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurstring TransposeString(Transpose t); 69f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 70f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Specifies whether the upper or lower triangular part of a 71f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// symmetric/Hermitian matrix is used. 72f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurenum class UpperLower { kUpper, kLower }; 73f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 74f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Returns a name for ul. 75f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurstring UpperLowerString(UpperLower ul); 76f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 77f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Specifies whether a matrix is unit triangular. 78f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurenum class Diagonal { kUnit, kNonUnit }; 79f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 80f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Returns a name for d. 81f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurstring DiagonalString(Diagonal d); 82f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 83f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Specifies whether a Hermitian matrix appears on the left or right in 84f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// operation. 85f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurenum class Side { kLeft, kRight }; 86f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 87f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Returns a name for s. 88f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurstring SideString(Side s); 89f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 9001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// Type with which intermediate computations of a blas routine are performed. 9101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// 9201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// Some blas calls can perform computations with a type that's different than 9301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// the type of their inputs/outputs. This lets you e.g. multiply two matricies 9401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// of int8s using float32s to store the matmul's intermediate values. 9501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebarenum class ComputationType { 9601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar kF16, // 16-bit floating-point 9701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar kF32, // 32-bit floating-point 9801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar kF64, // 64-bit floating-point 99a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower kI32, // 32-bit integer 10001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar kComplexF32, // Complex number comprised of two f32s. 101a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower kComplexF64, // Complex number comprised of two f64s. 10201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar}; 10301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar 10401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// Converts a ComputationType to a string. 10501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebarstring ComputationTypeString(ComputationType ty); 10601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar 10701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// Opaque identifier for an "algorithm" used by a blas routine. This functions 10801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// as a hint to the blas library. 10901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebartypedef int64 AlgorithmType; 1103e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wangconstexpr AlgorithmType kDefaultAlgorithm = -1; 1113e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wangconstexpr AlgorithmType kDefaultBlasGemm = -2; 1123e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wangconstexpr AlgorithmType kDefaultBlasGemv = -3; 1133e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wangconstexpr AlgorithmType kNoAlgorithm = -4; 11401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar 115a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower// blas uses -1 to represent the default algorithm. This happens to match up 116a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower// with the CUBLAS_GEMM_DFALT constant, so cuda_blas.cc is using static_cast 117a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower// to convert from AlgorithmType to cublasGemmAlgo_t, and uses a static_assert 118a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower// to ensure that this assumption does not break. 119a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower// If another blas implementation uses a different value for the default 120a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower// algorithm, then it needs to convert kDefaultGemmAlgo to that value 121a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower// (e.g. via a function called ToWhateverGemmAlgo). 122a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlowerconstexpr AlgorithmType kDefaultGemmAlgo = -1; 123a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower 12401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// Describes the result of a performance experiment, usually timing the speed of 12501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// a particular AlgorithmType. 12601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// 12701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// If the call we were benchmarking failed (a common occurrence; not all 12801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// algorithms are valid for all calls), is_valid() will be false. 12901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebarclass ProfileResult { 13001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar public: 13101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar bool is_valid() const { return is_valid_; } 13201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar void set_is_valid(bool val) { is_valid_ = val; } 13301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar AlgorithmType algorithm() const { return algorithm_; } 13401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar void set_algorithm(AlgorithmType val) { algorithm_ = val; } 13501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar float elapsed_time_in_ms() const { return elapsed_time_in_ms_; } 13601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar void set_elapsed_time_in_ms(float val) { elapsed_time_in_ms_ = val; } 13701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar 13801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar private: 13901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar bool is_valid_ = false; 1403e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang AlgorithmType algorithm_ = kDefaultAlgorithm; 14101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar float elapsed_time_in_ms_ = std::numeric_limits<float>::max(); 14201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar}; 14301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar 1443e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wangclass AlgorithmConfig { 1453e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang public: 1463e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang AlgorithmConfig() : algorithm_(kDefaultAlgorithm) {} 1473e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang explicit AlgorithmConfig(AlgorithmType algorithm) : algorithm_(algorithm) {} 1483e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang AlgorithmType algorithm() const { return algorithm_; } 1493e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang void set_algorithm(AlgorithmType val) { algorithm_ = val; } 1503e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang bool operator==(const AlgorithmConfig &other) const { 1513e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang return this->algorithm_ == other.algorithm_; 1523e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang } 1533e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang bool operator!=(const AlgorithmConfig &other) const { 1543e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang return !(*this == other); 1553e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang } 1563e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang string ToString() const; 1573e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang 1583e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang private: 1593e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang AlgorithmType algorithm_; 1603e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang}; 1613e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang 162f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// BLAS support interface -- this can be derived from a GPU executor when the 163f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// underlying platform has an BLAS library implementation available. See 164f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// StreamExecutor::AsBlas(). 165f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// 166f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Thread-hostile: CUDA associates a CUDA-context with a particular thread in 167f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// the system. Any operation that a user attempts to perform by enqueueing BLAS 168f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// operations on a thread not-associated with the CUDA-context has unknown 169f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// behavior at the current time; see b/13176597 170f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurclass BlasSupport { 171f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur public: 172f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual ~BlasSupport() {} 173f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 174f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes the sum of magnitudes of the vector elements. 175f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // result <- |Re x(1)| + |Im x(1)| + |Re x(2)| + |Im x(2)|+ ... + |Re x(n)| 176f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // + |Im x(n)|. 177f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Note that Im x(i) = 0 for real types float/double. 178f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasAsum(Stream *stream, uint64 elem_count, 179f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, 180f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *result) = 0; 181f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasAsum(Stream *stream, uint64 elem_count, 182f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, 183f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *result) = 0; 184f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasAsum(Stream *stream, uint64 elem_count, 185f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 186f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *result) = 0; 187f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasAsum(Stream *stream, uint64 elem_count, 188f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 189f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *result) = 0; 190f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 191f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a BLAS y <- ax+y operation. 192f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasAxpy(Stream *stream, uint64 elem_count, float alpha, 193f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, 194f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *y, int incy) = 0; 195f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasAxpy(Stream *stream, uint64 elem_count, double alpha, 196f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, 197f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *y, int incy) = 0; 198f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasAxpy(Stream *stream, uint64 elem_count, 199f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 200f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 201f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy) = 0; 202f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasAxpy(Stream *stream, uint64 elem_count, 203f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 204f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 205f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy) = 0; 206f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 207f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Copies vector to another vector: y <- x. 208f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasCopy(Stream *stream, uint64 elem_count, 209f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, 210f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *y, int incy) = 0; 211f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasCopy(Stream *stream, uint64 elem_count, 212f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, 213f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *y, int incy) = 0; 214f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasCopy(Stream *stream, uint64 elem_count, 215f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 216f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy) = 0; 217f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasCopy(Stream *stream, uint64 elem_count, 218f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 219f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy) = 0; 220f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 221f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a BLAS dot product result <- x . y. 222f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasDot(Stream *stream, uint64 elem_count, 223f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, 224f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &y, int incy, 225f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *result) = 0; 226f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasDot(Stream *stream, uint64 elem_count, 227f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, 228f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &y, int incy, 229f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *result) = 0; 230f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 231f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a BLAS dot product result <- conj(x) . y for complex types. 232f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasDotc(Stream *stream, uint64 elem_count, 233f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 234f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &y, int incy, 235f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *result) = 0; 236f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasDotc(Stream *stream, uint64 elem_count, 237f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 238f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &y, int incy, 239f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *result) = 0; 240f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 241f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a BLAS dot product result <- x . y for complex types. Note that 242f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // x is unconjugated in this routine. 243f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasDotu(Stream *stream, uint64 elem_count, 244f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 245f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &y, int incy, 246f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *result) = 0; 247f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasDotu(Stream *stream, uint64 elem_count, 248f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 249f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &y, int incy, 250f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *result) = 0; 251f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 252f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes the Euclidean norm of a vector: result <- ||x||. 253f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // See the following link for more information of Euclidean norm: 254f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // http://en.wikipedia.org/wiki/Norm_(mathematics)#Euclidean_norm 255f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasNrm2(Stream *stream, uint64 elem_count, 256f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, 257f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *result) = 0; 258f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasNrm2(Stream *stream, uint64 elem_count, 259f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, 260f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *result) = 0; 261f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasNrm2(Stream *stream, uint64 elem_count, 262f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 263f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *result) = 0; 264f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasNrm2(Stream *stream, uint64 elem_count, 265f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 266f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *result) = 0; 267f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 268f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs rotation of points in the plane: 269f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // x(i) = c*x(i) + s*y(i) 270f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // y(i) = c*y(i) - s*x(i). 271f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasRot(Stream *stream, uint64 elem_count, 272f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *x, int incx, 273f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *y, int incy, float c, 274f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float s) = 0; 275f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasRot(Stream *stream, uint64 elem_count, 276f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *x, int incx, 277f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *y, int incy, double c, 278f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double s) = 0; 279f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasRot(Stream *stream, uint64 elem_count, 280f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *x, int incx, 281f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy, 282f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float c, float s) = 0; 283f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasRot(Stream *stream, uint64 elem_count, 284f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *x, int incx, 285f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy, 286f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double c, double s) = 0; 287f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 288f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes the parameters for a Givens rotation. 289f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Given the Cartesian coordinates (a, b) of a point, these routines return 290f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // the parameters c, s, r, and z associated with the Givens rotation. The 291f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // parameters c and s define a unitary matrix such that: 292f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 293f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // | c s |.| a | = | r | 294f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // | -s c | | b | | 0 | 295f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 296f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // The parameter z is defined such that if |a| > |b|, z is s; otherwise if 297f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // c is not 0 z is 1/c; otherwise z is 1. 298f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasRotg(Stream *stream, DeviceMemory<float> *a, 299f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *b, DeviceMemory<float> *c, 300f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *s) = 0; 301f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasRotg(Stream *stream, DeviceMemory<double> *a, 302f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *b, DeviceMemory<double> *c, 303f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *s) = 0; 304f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasRotg(Stream *stream, DeviceMemory<std::complex<float>> *a, 305f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *b, 306f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *c, 307f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *s) = 0; 308f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasRotg(Stream *stream, DeviceMemory<std::complex<double>> *a, 309f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *b, 310f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *c, 311f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *s) = 0; 312f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 313f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs modified Givens rotation of points in the plane. 314f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Given two vectors x and y, each vector element of these vectors is replaced 315f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // as follows: 316f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 317f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // | x(i) | = H | x(i) | 318f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // | y(i) | | y(i) | 319f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 320f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // for i=1 to n, where H is a modified Givens transformation matrix whose 321f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // values are stored in the param[1] through param[4] array. 322f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // For more information please Google this routine. 323f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasRotm(Stream *stream, uint64 elem_count, 324f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *x, int incx, 325f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *y, int incy, 326f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> ¶m) = 0; 327f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasRotm(Stream *stream, uint64 elem_count, 328f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *x, int incx, 329f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *y, int incy, 330f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> ¶m) = 0; 331f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 332f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes the parameters for a modified Givens rotation. 333f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Given Cartesian coordinates (x1, y1) of an input vector, these routines 334f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // compute the components of a modified Givens transformation matrix H that 335f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // zeros the y-component of the resulting vector: 336f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 337f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // | x1 | = H | x1 * sqrt(d1) | 338f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // | 0 | | y1 * sqrt(d1) | 339f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 340f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // For more information please Google this routine. 341f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasRotmg(Stream *stream, DeviceMemory<float> *d1, 342f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *d2, DeviceMemory<float> *x1, 343f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &y1, 344f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *param) = 0; 345f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasRotmg(Stream *stream, DeviceMemory<double> *d1, 346f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *d2, DeviceMemory<double> *x1, 347f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &y1, 348f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *param) = 0; 349f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 350f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes the product of a vector by a scalar: x <- a*x. 351f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasScal(Stream *stream, uint64 elem_count, float alpha, 352f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *x, int incx) = 0; 353f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasScal(Stream *stream, uint64 elem_count, double alpha, 354f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *x, int incx) = 0; 355f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasScal(Stream *stream, uint64 elem_count, float alpha, 356f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *x, int incx) = 0; 357f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasScal(Stream *stream, uint64 elem_count, double alpha, 358f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *x, int incx) = 0; 359f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasScal(Stream *stream, uint64 elem_count, 360f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 361f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *x, int incx) = 0; 362f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasScal(Stream *stream, uint64 elem_count, 363f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 364f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *x, int incx) = 0; 365f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 366f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Swaps a vector with another vector. 367f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSwap(Stream *stream, uint64 elem_count, 368f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *x, int incx, 369f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *y, int incy) = 0; 370f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSwap(Stream *stream, uint64 elem_count, 371f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *x, int incx, 372f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *y, int incy) = 0; 373f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSwap(Stream *stream, uint64 elem_count, 374f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *x, int incx, 375f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy) = 0; 376f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSwap(Stream *stream, uint64 elem_count, 377f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *x, int incx, 378f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy) = 0; 379f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 380f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Finds the index of the element with maximum absolute value. 381f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasIamax(Stream *stream, uint64 elem_count, 382f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, 383f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<int> *result) = 0; 384f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasIamax(Stream *stream, uint64 elem_count, 385f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, 386f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<int> *result) = 0; 387f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasIamax(Stream *stream, uint64 elem_count, 388f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 389f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<int> *result) = 0; 390f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasIamax(Stream *stream, uint64 elem_count, 391f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, 392f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx, DeviceMemory<int> *result) = 0; 393f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 394f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Finds the index of the element with minimum absolute value. 395f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasIamin(Stream *stream, uint64 elem_count, 396f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, 397f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<int> *result) = 0; 398f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasIamin(Stream *stream, uint64 elem_count, 399f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, 400f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<int> *result) = 0; 401f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasIamin(Stream *stream, uint64 elem_count, 402f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 403f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<int> *result) = 0; 404f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasIamin(Stream *stream, uint64 elem_count, 405f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, 406f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx, DeviceMemory<int> *result) = 0; 407f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 408f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes a matrix-vector product using a general band matrix: 409f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 410f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // y <- alpha * a * x + beta * y, 411f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 412f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // y <- alpha * a' * x + beta * y, 413f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 414f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // y <- alpha * conj(a') * x + beta * y, 415f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 416f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha and beta are scalars; a is an m-by-n general band matrix, with kl 417f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // sub-diagonals and ku super-diagonals; x is a vector with 418f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // n(trans==kNoTranspose)/m(otherwise) elements; 419f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // y is a vector with m(trans==kNoTranspose)/n(otherwise) elements. 420f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGbmv(Stream *stream, blas::Transpose trans, uint64 m, 421f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, uint64 kl, uint64 ku, float alpha, 422f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &a, int lda, 423f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, float beta, 424f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *y, int incy) = 0; 425f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGbmv(Stream *stream, blas::Transpose trans, uint64 m, 426f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, uint64 kl, uint64 ku, double alpha, 427f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &a, int lda, 428f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, double beta, 429f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *y, int incy) = 0; 430f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGbmv(Stream *stream, blas::Transpose trans, uint64 m, 431f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, uint64 kl, uint64 ku, 432f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 433f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, 434f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 435f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, 436f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy) = 0; 437f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGbmv(Stream *stream, blas::Transpose trans, uint64 m, 438f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, uint64 kl, uint64 ku, 439f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 440f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, 441f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 442f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, 443f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy) = 0; 444f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 445f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes a matrix-vector product using a general matrix. 446f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 447f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // y <- alpha * a * x + beta * y, 448f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 449f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // y <- alpha * a' * x + beta * y, 450f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 451f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // y <- alpha * conj(a') * x + beta * y, 452f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 453f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha and beta are scalars; a is an m-by-n general matrix; x is a vector 454f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // with n(trans==kNoTranspose)/m(otherwise) elements; 455f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // y is a vector with m(trans==kNoTranspose)/n(otherwise) elements. 456f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGemv(Stream *stream, blas::Transpose trans, uint64 m, 457f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, float alpha, const DeviceMemory<float> &a, 458f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, const DeviceMemory<float> &x, int incx, 459f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float beta, DeviceMemory<float> *y, int incy) = 0; 460f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGemv(Stream *stream, blas::Transpose trans, uint64 m, 461f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, double alpha, const DeviceMemory<double> &a, 462f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, const DeviceMemory<double> &x, int incx, 463f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double beta, DeviceMemory<double> *y, int incy) = 0; 464f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGemv(Stream *stream, blas::Transpose trans, uint64 m, 465f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, std::complex<float> alpha, 466f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, 467f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 468f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, 469f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy) = 0; 470f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGemv(Stream *stream, blas::Transpose trans, uint64 m, 471f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, std::complex<double> alpha, 472f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, 473f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 474f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, 475f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy) = 0; 476f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 4773e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang virtual bool DoBlasGemvWithProfiling( 4783e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang Stream *stream, blas::Transpose trans, uint64 m, uint64 n, float alpha, 4793e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang const DeviceMemory<float> &a, int lda, const DeviceMemory<float> &x, 4803e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang int incx, float beta, DeviceMemory<float> *y, int incy, 4813e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang ProfileResult *output_profile_result) = 0; 4823e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang virtual bool DoBlasGemvWithProfiling( 4833e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang Stream *stream, blas::Transpose trans, uint64 m, uint64 n, double alpha, 4843e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang const DeviceMemory<double> &a, int lda, const DeviceMemory<double> &x, 4853e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang int incx, double beta, DeviceMemory<double> *y, int incy, 4863e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang ProfileResult *output_profile_result) = 0; 4873e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang virtual bool DoBlasGemvWithProfiling( 4883e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang Stream *stream, blas::Transpose trans, uint64 m, uint64 n, 4893e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang std::complex<float> alpha, const DeviceMemory<std::complex<float>> &a, 4903e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang int lda, const DeviceMemory<std::complex<float>> &x, int incx, 4913e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang std::complex<float> beta, DeviceMemory<std::complex<float>> *y, int incy, 4923e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang ProfileResult *output_profile_result) = 0; 4933e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang virtual bool DoBlasGemvWithProfiling( 4943e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang Stream *stream, blas::Transpose trans, uint64 m, uint64 n, 4953e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang std::complex<double> alpha, const DeviceMemory<std::complex<double>> &a, 4963e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang int lda, const DeviceMemory<std::complex<double>> &x, int incx, 4973e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang std::complex<double> beta, DeviceMemory<std::complex<double>> *y, 4983e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang int incy, ProfileResult *output_profile_result) = 0; 4993e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang 500f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a rank-1 update of a general matrix. 501f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 502f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a <- alpha * x * y' + a, 503f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 504f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha is a scalar; x is an m-element vector; y is an n-element vector; a is 505f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // an m-by-n general matrix. 506f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGer(Stream *stream, uint64 m, uint64 n, float alpha, 507f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, 508f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &y, int incy, 509f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *a, int lda) = 0; 510f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGer(Stream *stream, uint64 m, uint64 n, double alpha, 511f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, 512f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &y, int incy, 513f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *a, int lda) = 0; 514f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 515f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a rank-1 update (conjugated) of a general matrix. 516f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 517f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a <- alpha * x * conj(y') + a, 518f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 519f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha is a scalar; x is an m-element vector; y is an n-element vector; a is 520f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // an m-by-n general matrix. 521f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGerc(Stream *stream, uint64 m, uint64 n, 522f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 523f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 524f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &y, int incy, 525f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *a, int lda) = 0; 526f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGerc(Stream *stream, uint64 m, uint64 n, 527f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 528f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 529f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &y, int incy, 530f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *a, int lda) = 0; 531f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 532f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a rank-1 update (unconjugated) of a general matrix. 533f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 534f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a <- alpha * x * y' + a, 535f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 536f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha is a scalar; x is an m-element vector; y is an n-element vector; a is 537f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // an m-by-n general matrix. 538f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGeru(Stream *stream, uint64 m, uint64 n, 539f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 540f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 541f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &y, int incy, 542f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *a, int lda) = 0; 543f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGeru(Stream *stream, uint64 m, uint64 n, 544f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 545f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 546f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &y, int incy, 547f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *a, int lda) = 0; 548f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 549f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes a matrix-vector product using a Hermitian band matrix. 550f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 551f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // y <- alpha * a * x + beta * y, 552f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 553f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha and beta are scalars; a is an n-by-n Hermitian band matrix, with k 554f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // super-diagonals; x and y are n-element vectors. 555f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHbmv(Stream *stream, blas::UpperLower uplo, uint64 n, 556f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, std::complex<float> alpha, 557f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, 558f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 559f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, 560f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy) = 0; 561f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHbmv(Stream *stream, blas::UpperLower uplo, uint64 n, 562f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, std::complex<double> alpha, 563f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, 564f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 565f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, 566f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy) = 0; 567f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 568f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes a matrix-vector product using a Hermitian matrix. 569f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 570f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // y <- alpha * a * x + beta * y, 571f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 572f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha and beta are scalars; a is an n-by-n Hermitian matrix; x and y are 573f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // n-element vectors. 574f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHemv(Stream *stream, blas::UpperLower uplo, uint64 n, 575f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 576f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, 577f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 578f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, 579f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy) = 0; 580f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHemv(Stream *stream, blas::UpperLower uplo, uint64 n, 581f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 582f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, 583f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 584f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, 585f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy) = 0; 586f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 587f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a rank-1 update of a Hermitian matrix. 588f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 589f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a <- alpha * x * conj(x') + a, 590f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 591f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha is a scalar; x is an n-element vector; a is an n-by-n Hermitian 592f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // matrix. 593f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHer(Stream *stream, blas::UpperLower uplo, uint64 n, 594f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, 595f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 596f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *a, int lda) = 0; 597f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHer(Stream *stream, blas::UpperLower uplo, uint64 n, 598f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, 599f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 600f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *a, int lda) = 0; 601f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 602f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a rank-2 update of a Hermitian matrix. 603f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 604f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a <- alpha * x * conj(x') + conj(alpha) * y * conj(x') + a, 605f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 606f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha is a scalar; x and y are n-element vectors; a is an n-by-n Hermitian 607f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // matrix. 608f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHer2(Stream *stream, blas::UpperLower uplo, uint64 n, 609f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 610f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 611f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &y, int incy, 612f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *a, int lda) = 0; 613f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHer2(Stream *stream, blas::UpperLower uplo, uint64 n, 614f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 615f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 616f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &y, int incy, 617f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *a, int lda) = 0; 618f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 619f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes a matrix-vector product using a Hermitian packed matrix. 620f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 621f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // y <- alpha * a * x + beta * y, 622f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 623f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha and beta are scalars; a is an n-by-n Hermitian matrix, supplied in 624f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // packed form; x and y are n-element vectors. 625f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHpmv(Stream *stream, blas::UpperLower uplo, uint64 n, 626f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 627f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &ap, 628f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 629f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, 630f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy) = 0; 631f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHpmv(Stream *stream, blas::UpperLower uplo, uint64 n, 632f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 633f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &ap, 634f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 635f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, 636f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy) = 0; 637f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 638f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a rank-1 update of a Hermitian packed matrix. 639f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 640f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a <- alpha * x * conj(x') + a, 641f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 642f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha is a scalar; x is an n-element vector; a is an n-by-n Hermitian 643f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // matrix, supplied in packed form. 644f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHpr(Stream *stream, blas::UpperLower uplo, uint64 n, 645f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, 646f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 647f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *ap) = 0; 648f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHpr(Stream *stream, blas::UpperLower uplo, uint64 n, 649f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, 650f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 651f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *ap) = 0; 652f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 653f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a rank-2 update of a Hermitian packed matrix. 654f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 655f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a <- alpha * x * conj(x') + conj(alpha) * y * conj(x') + a, 656f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 657f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha is a scalar; x and y are n-element vectors; a is an n-by-n Hermitian 658f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // matrix, supplied in packed form. 659f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHpr2(Stream *stream, blas::UpperLower uplo, uint64 n, 660f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 661f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 662f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &y, int incy, 663f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *ap) = 0; 664f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHpr2(Stream *stream, blas::UpperLower uplo, uint64 n, 665f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 666f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 667f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &y, int incy, 668f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *ap) = 0; 669f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 670f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes a matrix-vector product using a symmetric band matrix. 671f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 672f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // y <- alpha * a * x + beta * y, 673f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 674f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha and beta are scalars; a is an n-by-n symmetric band matrix, with k 675f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // super-diagonals; x and y are n-element vectors. 676f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSbmv(Stream *stream, blas::UpperLower uplo, uint64 n, 677f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, float alpha, const DeviceMemory<float> &a, 678f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, const DeviceMemory<float> &x, int incx, 679f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float beta, DeviceMemory<float> *y, int incy) = 0; 680f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSbmv(Stream *stream, blas::UpperLower uplo, uint64 n, 681f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, double alpha, const DeviceMemory<double> &a, 682f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, const DeviceMemory<double> &x, int incx, 683f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double beta, DeviceMemory<double> *y, int incy) = 0; 684f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 685f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes a matrix-vector product using a symmetric packed matrix. 686f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 687f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // y <- alpha * a * x + beta * y, 688f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 689f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha and beta are scalars; a is an n-by-n symmetric matrix, supplied in 690f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // packed form; x and y are n-element vectors. 691f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSpmv(Stream *stream, blas::UpperLower uplo, uint64 n, 692f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &ap, 693f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, float beta, 694f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *y, int incy) = 0; 695f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSpmv(Stream *stream, blas::UpperLower uplo, uint64 n, 696f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &ap, 697f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, double beta, 698f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *y, int incy) = 0; 699f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 700f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a rank-1 update of a symmetric packed matrix. 701f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 702f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a <- alpha * x * x' + a, 703f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 704f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha is a scalar; x is an n-element vector; a is an n-by-n symmetric 705f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // matrix, supplied in packed form. 706f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSpr(Stream *stream, blas::UpperLower uplo, uint64 n, 707f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &x, int incx, 708f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *ap) = 0; 709f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSpr(Stream *stream, blas::UpperLower uplo, uint64 n, 710f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &x, int incx, 711f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *ap) = 0; 712f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 713f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a rank-2 update of a symmetric packed matrix. 714f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 715f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a <- alpha * x * x' + alpha * y * x' + a, 716f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 717f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha is a scalar; x and y are n-element vectors; a is an n-by-n symmetric 718f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // matrix, supplied in packed form. 719f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSpr2(Stream *stream, blas::UpperLower uplo, uint64 n, 720f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &x, int incx, 721f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &y, int incy, 722f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *ap) = 0; 723f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSpr2(Stream *stream, blas::UpperLower uplo, uint64 n, 724f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &x, int incx, 725f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &y, int incy, 726f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *ap) = 0; 727f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 728f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes a matrix-vector product for a symmetric matrix. 729f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 730f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // y <- alpha * a * x + beta * y, 731f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 732f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha and beta are scalars; a is an n-by-n symmetric matrix; x and y are 733f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // n-element vectors. 734f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSymv(Stream *stream, blas::UpperLower uplo, uint64 n, 735f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &a, int lda, 736f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, float beta, 737f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *y, int incy) = 0; 738f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSymv(Stream *stream, blas::UpperLower uplo, uint64 n, 739f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &a, int lda, 740f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, double beta, 741f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *y, int incy) = 0; 742f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 743f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a rank-1 update of a symmetric matrix. 744f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 745f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a <- alpha * x * x' + a, 746f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 747f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha is a scalar; x is an n-element vector; a is an n-by-n symmetric 748f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // matrix. 749f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSyr(Stream *stream, blas::UpperLower uplo, uint64 n, 750f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &x, int incx, 751f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *a, int lda) = 0; 752f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSyr(Stream *stream, blas::UpperLower uplo, uint64 n, 753f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &x, int incx, 754f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *a, int lda) = 0; 755f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 756f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a rank-2 update of symmetric matrix. 757f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 758f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a <- alpha * x * x' + alpha * y * x' + a, 759f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 760f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha is a scalar; x and y are n-element vectors; a is an n-by-n symmetric 761f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // matrix. 762f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSyr2(Stream *stream, blas::UpperLower uplo, uint64 n, 763f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &x, int incx, 764f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &y, int incy, 765f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *a, int lda) = 0; 766f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSyr2(Stream *stream, blas::UpperLower uplo, uint64 n, 767f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &x, int incx, 768f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &y, int incy, 769f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *a, int lda) = 0; 770f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 771f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes a matrix-vector product using a triangular band matrix. 772f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 773f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // x <- a * x, 774f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 775f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // x <- a' * x, 776f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 777f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // x <- conj(a') * x, 778f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 779f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a is an n-by-n unit, or non-unit, upper or lower triangular band matrix, 780f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // with k+1 diagonals; x is a n-element vector. 781f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTbmv(Stream *stream, blas::UpperLower uplo, 782f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 783f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, const DeviceMemory<float> &a, int lda, 784f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *x, int incx) = 0; 785f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTbmv(Stream *stream, blas::UpperLower uplo, 786f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 787f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, const DeviceMemory<double> &a, int lda, 788f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *x, int incx) = 0; 789f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTbmv(Stream *stream, blas::UpperLower uplo, 790f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 791f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, const DeviceMemory<std::complex<float>> &a, 792f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, DeviceMemory<std::complex<float>> *x, 793f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx) = 0; 794f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTbmv(Stream *stream, blas::UpperLower uplo, 795f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 796f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, const DeviceMemory<std::complex<double>> &a, 797f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, DeviceMemory<std::complex<double>> *x, 798f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx) = 0; 799f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 800f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Solves a system of linear equations whose coefficients are in a triangular 801f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // band matrix as below: 802f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 803f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a * x = b, 804f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 805f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a' * x = b, 806f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 807f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // conj(a') * x = b, 808f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 809f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // b and x are n-element vectors; a is an n-by-n unit, or non-unit, upper or 810f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // lower triangular band matrix, with k+1 diagonals. 811f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTbsv(Stream *stream, blas::UpperLower uplo, 812f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 813f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, const DeviceMemory<float> &a, int lda, 814f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *x, int incx) = 0; 815f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTbsv(Stream *stream, blas::UpperLower uplo, 816f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 817f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, const DeviceMemory<double> &a, int lda, 818f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *x, int incx) = 0; 819f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTbsv(Stream *stream, blas::UpperLower uplo, 820f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 821f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, const DeviceMemory<std::complex<float>> &a, 822f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, DeviceMemory<std::complex<float>> *x, 823f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx) = 0; 824f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTbsv(Stream *stream, blas::UpperLower uplo, 825f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 826f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, const DeviceMemory<std::complex<double>> &a, 827f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, DeviceMemory<std::complex<double>> *x, 828f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx) = 0; 829f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 830f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes a matrix-vector product using a triangular packed matrix. 831f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 832f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // x <- a * x, 833f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 834f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // x <- a' * x, 835f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 836f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // x <- conj(a') * x, 837f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 838f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a is an n-by-n unit, or non-unit, upper or lower triangular matrix, 839f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // supplied in packed form; x is a n-element vector. 840f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTpmv(Stream *stream, blas::UpperLower uplo, 841f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 842f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &ap, DeviceMemory<float> *x, 843f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx) = 0; 844f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTpmv(Stream *stream, blas::UpperLower uplo, 845f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 846f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &ap, 847f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *x, int incx) = 0; 848f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTpmv(Stream *stream, blas::UpperLower uplo, 849f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 850f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &ap, 851f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *x, int incx) = 0; 852f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTpmv(Stream *stream, blas::UpperLower uplo, 853f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 854f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &ap, 855f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *x, int incx) = 0; 856f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 857f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Solves a system of linear equations whose coefficients are in a triangular 858f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // packed matrix as below: 859f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 860f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a * x = b, 861f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 862f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a' * x = b, 863f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 864f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // conj(a') * x = b, 865f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 866f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // b and x are n-element vectors; a is an n-by-n unit, or non-unit, upper or 867f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // lower triangular matrix, supplied in packed form. 868f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTpsv(Stream *stream, blas::UpperLower uplo, 869f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 870f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &ap, DeviceMemory<float> *x, 871f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx) = 0; 872f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTpsv(Stream *stream, blas::UpperLower uplo, 873f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 874f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &ap, 875f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *x, int incx) = 0; 876f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTpsv(Stream *stream, blas::UpperLower uplo, 877f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 878f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &ap, 879f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *x, int incx) = 0; 880f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTpsv(Stream *stream, blas::UpperLower uplo, 881f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 882f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &ap, 883f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *x, int incx) = 0; 884f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 885f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes a matrix-vector product using a triangular matrix. 886f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 887f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // x <- a * x, 888f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 889f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // x <- a' * x, 890f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 891f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // x <- conj(a') * x, 892f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 893f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a is an n-by-n unit, or non-unit, upper or lower triangular matrix; x is a 894f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // n-element vector. 895f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTrmv(Stream *stream, blas::UpperLower uplo, 896f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 897f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &a, int lda, 898f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *x, int incx) = 0; 899f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTrmv(Stream *stream, blas::UpperLower uplo, 900f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 901f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &a, int lda, 902f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *x, int incx) = 0; 903f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTrmv(Stream *stream, blas::UpperLower uplo, 904f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 905f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, 906f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *x, int incx) = 0; 907f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTrmv(Stream *stream, blas::UpperLower uplo, 908f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 909f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, 910f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *x, int incx) = 0; 911f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 912f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Solves a system of linear equations whose coefficients are in a triangular 913f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // matrix as below: 914f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 915f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a * x = b, 916f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 917f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a' * x = b, 918f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 919f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // conj(a') * x = b, 920f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 921f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // b and x are n-element vectors; a is an n-by-n unit, or non-unit, upper or 922f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // lower triangular matrix. 923f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTrsv(Stream *stream, blas::UpperLower uplo, 924f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 925f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &a, int lda, 926f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *x, int incx) = 0; 927f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTrsv(Stream *stream, blas::UpperLower uplo, 928f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 929f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &a, int lda, 930f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *x, int incx) = 0; 931f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTrsv(Stream *stream, blas::UpperLower uplo, 932f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 933f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, 934f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *x, int incx) = 0; 935f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTrsv(Stream *stream, blas::UpperLower uplo, 936f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 937f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, 938f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *x, int incx) = 0; 939f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 940f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes a matrix-matrix product with general matrices: 941f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 942f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // c <- alpha * op(a) * op(b) + beta * c, 943f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 944f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // op(X) is one of op(X) = X, or op(X) = X', or op(X) = conj(X'); alpha and 945f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // beta are scalars; a, b, and c are matrices; op(a) is an m-by-k matrix; 946f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // op(b) is a k-by-n matrix; c is an m-by-n matrix. 947523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower // 948523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower // Note: The half interface uses float precision internally; the version 949523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower // that uses half precision internally is not yet supported. There is no 950523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower // batched version of the half-precision interface. 951523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower virtual bool DoBlasGemm(Stream *stream, blas::Transpose transa, 952523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower blas::Transpose transb, uint64 m, uint64 n, uint64 k, 95301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar float alpha, const DeviceMemory<Eigen::half> &a, 95401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar int lda, const DeviceMemory<Eigen::half> &b, int ldb, 95501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar float beta, DeviceMemory<Eigen::half> *c, 95601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar int ldc) = 0; 957f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGemm(Stream *stream, blas::Transpose transa, 958f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose transb, uint64 m, uint64 n, uint64 k, 959f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &a, int lda, 960f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &b, int ldb, float beta, 961f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *c, int ldc) = 0; 962f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGemm(Stream *stream, blas::Transpose transa, 963f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose transb, uint64 m, uint64 n, uint64 k, 964f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &a, int lda, 965f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &b, int ldb, double beta, 966f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *c, int ldc) = 0; 967f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGemm(Stream *stream, blas::Transpose transa, 968f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose transb, uint64 m, uint64 n, uint64 k, 969f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 970f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, 971f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &b, int ldb, 972f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, 973f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *c, int ldc) = 0; 974f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGemm(Stream *stream, blas::Transpose transa, 975f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose transb, uint64 m, uint64 n, uint64 k, 976f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 977f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, 978f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &b, int ldb, 979f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, 980f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *c, int ldc) = 0; 981f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 9823e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang virtual bool DoBlasGemmWithProfiling( 9833e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, 9843e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang uint64 n, uint64 k, float alpha, const DeviceMemory<Eigen::half> &a, 9853e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang int lda, const DeviceMemory<Eigen::half> &b, int ldb, float beta, 9863e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang DeviceMemory<Eigen::half> *c, int ldc, 9873e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang ProfileResult *output_profile_result) = 0; 9883e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang virtual bool DoBlasGemmWithProfiling( 9893e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, 9903e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang uint64 n, uint64 k, float alpha, const DeviceMemory<float> &a, int lda, 9913e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang const DeviceMemory<float> &b, int ldb, float beta, DeviceMemory<float> *c, 9923e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang int ldc, ProfileResult *output_profile_result) = 0; 9933e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang virtual bool DoBlasGemmWithProfiling( 9943e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, 9953e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang uint64 n, uint64 k, double alpha, const DeviceMemory<double> &a, int lda, 9963e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang const DeviceMemory<double> &b, int ldb, double beta, 9973e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang DeviceMemory<double> *c, int ldc, 9983e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang ProfileResult *output_profile_result) = 0; 9993e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang virtual bool DoBlasGemmWithProfiling( 10003e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, 10013e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang uint64 n, uint64 k, std::complex<float> alpha, 10023e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang const DeviceMemory<std::complex<float>> &a, int lda, 10033e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang const DeviceMemory<std::complex<float>> &b, int ldb, 10043e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang std::complex<float> beta, DeviceMemory<std::complex<float>> *c, int ldc, 10053e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang ProfileResult *output_profile_result) = 0; 10063e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang virtual bool DoBlasGemmWithProfiling( 10073e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, 10083e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang uint64 n, uint64 k, std::complex<double> alpha, 10093e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang const DeviceMemory<std::complex<double>> &a, int lda, 10103e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang const DeviceMemory<std::complex<double>> &b, int ldb, 10113e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang std::complex<double> beta, DeviceMemory<std::complex<double>> *c, int ldc, 10123e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang ProfileResult *output_profile_result) = 0; 10133e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang 10143e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang // Gets a list of supported algorithms for DoBlasGemmWithAlgorithm. 101501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar virtual bool GetBlasGemmAlgorithms( 101601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar std::vector<AlgorithmType> *out_algorithms) = 0; 101701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar 101801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar // Like DoBlasGemm, but accepts an algorithm and an compute type. 101901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar // 102001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar // The compute type lets you say (e.g.) that the inputs and outputs are 102101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar // Eigen::halfs, but you want the internal computations to be done with 102201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar // float32 precision. 102301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar // 102401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar // Note the subtle difference in the version that accepts Eigen:::half -- 102501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar // alpha and beta have type const Eigen::half&, not float. 102601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar // 102701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar // If output_profile_result is not null, a failure here does not put the 102801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar // stream in a failure state. Instead, success/failure is indicated by 102901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar // output_profile_result->is_valid(). This lets you use this function for 103001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar // choosing the best algorithm among many (some of which may fail) without 103101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar // creating a new Stream for each attempt. 103201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar virtual bool DoBlasGemmWithAlgorithm( 103301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, 1034a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower uint64 n, uint64 k, int alpha, const DeviceMemory<int8> &a, int lda, 1035a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower const DeviceMemory<int8> &b, int ldb, int beta, DeviceMemory<int32> *c, 1036a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower int ldc, ComputationType computation_type, AlgorithmType algorithm, 1037a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower ProfileResult *output_profile_result) = 0; 1038a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower virtual bool DoBlasGemmWithAlgorithm( 1039a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, 104001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar uint64 n, uint64 k, const Eigen::half &alpha, 104101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar const DeviceMemory<Eigen::half> &a, int lda, 104201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar const DeviceMemory<Eigen::half> &b, int ldb, const Eigen::half &beta, 104301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar DeviceMemory<Eigen::half> *c, int ldc, ComputationType computation_type, 104401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar AlgorithmType algorithm, ProfileResult *output_profile_result) = 0; 104501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar virtual bool DoBlasGemmWithAlgorithm( 104601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, 104701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar uint64 n, uint64 k, float alpha, const DeviceMemory<float> &a, int lda, 104801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar const DeviceMemory<float> &b, int ldb, float beta, DeviceMemory<float> *c, 104901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar int ldc, ComputationType computation_type, AlgorithmType algorithm, 105001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar ProfileResult *output_profile_result) = 0; 105101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar virtual bool DoBlasGemmWithAlgorithm( 105201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, 105301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar uint64 n, uint64 k, double alpha, const DeviceMemory<double> &a, int lda, 105401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar const DeviceMemory<double> &b, int ldb, double beta, 105501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar DeviceMemory<double> *c, int ldc, ComputationType computation_type, 105601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar AlgorithmType algorithm, ProfileResult *output_profile_result) = 0; 105701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar virtual bool DoBlasGemmWithAlgorithm( 105801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, 105901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar uint64 n, uint64 k, std::complex<float> alpha, 106001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar const DeviceMemory<std::complex<float>> &a, int lda, 106101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar const DeviceMemory<std::complex<float>> &b, int ldb, 106201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar std::complex<float> beta, DeviceMemory<std::complex<float>> *c, int ldc, 106301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar ComputationType computation_type, AlgorithmType algorithm, 106401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar ProfileResult *output_profile_result) = 0; 106501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar virtual bool DoBlasGemmWithAlgorithm( 106601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, 106701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar uint64 n, uint64 k, std::complex<double> alpha, 106801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar const DeviceMemory<std::complex<double>> &a, int lda, 106901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar const DeviceMemory<std::complex<double>> &b, int ldb, 107001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar std::complex<double> beta, DeviceMemory<std::complex<double>> *c, int ldc, 107101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar ComputationType computation_type, AlgorithmType algorithm, 107201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar ProfileResult *output_profile_result) = 0; 107301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar 1074f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes a batch of matrix-matrix product with general matrices. 1075f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // This is a batched version of DoBlasGemm. 1076f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // The batched GEMM computes matrix product for each input/output in a, b, 1077f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // and c, which contain batch_count DeviceMemory objects. 1078f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGemmBatched( 1079f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, 1080f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, uint64 k, float alpha, 1081f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<float> *> &a, int lda, 1082f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<float> *> &b, int ldb, float beta, 1083f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<float> *> &c, int ldc, 108405ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower int batch_count, ScratchAllocator *scratch_allocator) = 0; 1085f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGemmBatched( 1086f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, 1087f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, uint64 k, double alpha, 1088f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<double> *> &a, int lda, 1089f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<double> *> &b, int ldb, double beta, 1090f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<double> *> &c, int ldc, 109105ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower int batch_count, ScratchAllocator *scratch_allocator) = 0; 1092f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGemmBatched( 1093f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, 1094f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, uint64 k, std::complex<float> alpha, 1095f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<std::complex<float>> *> &a, int lda, 1096f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<std::complex<float>> *> &b, int ldb, 1097f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, 1098f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<std::complex<float>> *> &c, int ldc, 109905ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower int batch_count, ScratchAllocator *scratch_allocator) = 0; 1100f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGemmBatched( 1101f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, 1102f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, uint64 k, std::complex<double> alpha, 1103f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<std::complex<double>> *> &a, int lda, 1104f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<std::complex<double>> *> &b, int ldb, 1105f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, 1106f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<std::complex<double>> *> &c, int ldc, 110705ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower int batch_count, ScratchAllocator *scratch_allocator) = 0; 1108f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 1109f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes a matrix-matrix product where one input matrix is Hermitian: 1110f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 1111f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // c <- alpha * a * b + beta * c, 1112f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 1113f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // c <- alpha * b * a + beta * c, 1114f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 1115f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha and beta are scalars; a is a Hermitian matrix; b and c are m-by-n 1116f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // matrices. 1117f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHemm(Stream *stream, blas::Side side, 1118f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::UpperLower uplo, uint64 m, uint64 n, 1119f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 1120f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, 1121f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &b, int ldb, 1122f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, 1123f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *c, int ldc) = 0; 1124f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHemm(Stream *stream, blas::Side side, 1125f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::UpperLower uplo, uint64 m, uint64 n, 1126f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 1127f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, 1128f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &b, int ldb, 1129f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, 1130f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *c, int ldc) = 0; 1131f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 1132f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a Hermitian rank-k update. 1133f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 1134f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // c <- alpha * a * conj(a') + beta * c, 1135f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 1136f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // c <- alpha * conj(a') * a + beta * c, 1137f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 1138f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha and beta are scalars; c is a n-by-n Hermitian matrix; a is an n-by-k 1139f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // matrix in the first case and a k-by-n matrix in the second case. 1140f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHerk(Stream *stream, blas::UpperLower uplo, 1141f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, 1142f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, 1143f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, 1144f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float beta, DeviceMemory<std::complex<float>> *c, 1145f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int ldc) = 0; 1146f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHerk(Stream *stream, blas::UpperLower uplo, 1147f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, 1148f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, 1149f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, 1150f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double beta, DeviceMemory<std::complex<double>> *c, 1151f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int ldc) = 0; 1152f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 1153f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a Hermitian rank-2k update. 1154f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 1155f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // c <- alpha * a * conj(b') + conj(alpha) * b * conj(a') + beta * c, 1156f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 1157f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // c <- alpha * conj(b') * a + conj(alpha) * conj(a') * b + beta * c, 1158f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 1159f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha and beta are scalars; c is a n-by-n Hermitian matrix; a and b are 1160f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // n-by-k matrices in the first case and k-by-n matrices in the second case. 1161f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHer2k(Stream *stream, blas::UpperLower uplo, 1162f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, 1163f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 1164f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, 1165f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &b, int ldb, 1166f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float beta, DeviceMemory<std::complex<float>> *c, 1167f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int ldc) = 0; 1168f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHer2k(Stream *stream, blas::UpperLower uplo, 1169f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, 1170f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 1171f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, 1172f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &b, int ldb, 1173f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double beta, DeviceMemory<std::complex<double>> *c, 1174f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int ldc) = 0; 1175f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 1176f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes a matrix-matrix product where one input matrix is symmetric. 1177f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 1178f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // c <- alpha * a * b + beta * c, 1179f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 1180f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // c <- alpha * b * a + beta * c, 1181f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 1182f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha and beta are scalars; a is a symmetric matrix; b and c are m-by-n 1183f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // matrices. 1184f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSymm(Stream *stream, blas::Side side, 1185f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::UpperLower uplo, uint64 m, uint64 n, 1186f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &a, int lda, 1187f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &b, int ldb, float beta, 1188f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *c, int ldc) = 0; 1189f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSymm(Stream *stream, blas::Side side, 1190f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::UpperLower uplo, uint64 m, uint64 n, 1191f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &a, int lda, 1192f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &b, int ldb, double beta, 1193f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *c, int ldc) = 0; 1194f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSymm(Stream *stream, blas::Side side, 1195f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::UpperLower uplo, uint64 m, uint64 n, 1196f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 1197f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, 1198f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &b, int ldb, 1199f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, 1200f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *c, int ldc) = 0; 1201f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSymm(Stream *stream, blas::Side side, 1202f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::UpperLower uplo, uint64 m, uint64 n, 1203f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 1204f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, 1205f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &b, int ldb, 1206f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, 1207f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *c, int ldc) = 0; 1208f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 1209f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a symmetric rank-k update. 1210f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 1211f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // c <- alpha * a * a' + beta * c, 1212f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 1213f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // c <- alpha * a' * a + beta * c, 1214f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 1215f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha and beta are scalars; c is a n-by-n symmetric matrix; a is an n-by-k 1216f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // matrix in the first case and a k-by-n matrix in the second case. 1217f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSyrk(Stream *stream, blas::UpperLower uplo, 1218f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, 1219f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &a, int lda, 1220f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float beta, DeviceMemory<float> *c, int ldc) = 0; 1221f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSyrk(Stream *stream, blas::UpperLower uplo, 1222f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, 1223f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &a, int lda, 1224f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double beta, DeviceMemory<double> *c, int ldc) = 0; 1225f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSyrk(Stream *stream, blas::UpperLower uplo, 1226f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, 1227f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 1228f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, 1229f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, 1230f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *c, int ldc) = 0; 1231f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSyrk(Stream *stream, blas::UpperLower uplo, 1232f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, 1233f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 1234f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, 1235f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, 1236f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *c, int ldc) = 0; 1237f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 1238f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a symmetric rank-2k update. 1239f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 1240f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // c <- alpha * a * b' + alpha * b * a' + beta * c, 1241f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 1242f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // c <- alpha * b' * a + alpha * a' * b + beta * c, 1243f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 1244f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha and beta are scalars; c is a n-by-n symmetric matrix; a and b are 1245f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // n-by-k matrices in the first case and k-by-n matrices in the second case. 1246f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSyr2k(Stream *stream, blas::UpperLower uplo, 1247f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, 1248f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &a, int lda, 1249f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &b, int ldb, float beta, 1250f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *c, int ldc) = 0; 1251f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSyr2k(Stream *stream, blas::UpperLower uplo, 1252f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, 1253f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &a, int lda, 1254f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &b, int ldb, double beta, 1255f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *c, int ldc) = 0; 1256f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSyr2k(Stream *stream, blas::UpperLower uplo, 1257f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, 1258f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 1259f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, 1260f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &b, int ldb, 1261f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, 1262f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *c, int ldc) = 0; 1263f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSyr2k(Stream *stream, blas::UpperLower uplo, 1264f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, 1265f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 1266f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, 1267f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &b, int ldb, 1268f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, 1269f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *c, int ldc) = 0; 1270f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 1271f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes a matrix-matrix product where one input matrix is triangular. 1272f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 1273f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // b <- alpha * op(a) * b, 1274f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 1275f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // b <- alpha * b * op(a) 1276f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 1277f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha is a scalar; b is an m-by-n matrix; a is a unit, or non-unit, upper 1278f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or lower triangular matrix; op(a) is one of op(a) = a, or op(a) = a', or 1279f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // op(a) = conj(a'). 1280f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTrmm(Stream *stream, blas::Side side, 1281f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::UpperLower uplo, blas::Transpose transa, 1282f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Diagonal diag, uint64 m, uint64 n, float alpha, 1283f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &a, int lda, 1284f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *b, int ldb) = 0; 1285f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTrmm(Stream *stream, blas::Side side, 1286f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::UpperLower uplo, blas::Transpose transa, 1287f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Diagonal diag, uint64 m, uint64 n, double alpha, 1288f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &a, int lda, 1289f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *b, int ldb) = 0; 1290f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTrmm(Stream *stream, blas::Side side, 1291f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::UpperLower uplo, blas::Transpose transa, 1292f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Diagonal diag, uint64 m, uint64 n, 1293f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 1294f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, 1295f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *b, int ldb) = 0; 1296f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTrmm(Stream *stream, blas::Side side, 1297f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::UpperLower uplo, blas::Transpose transa, 1298f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Diagonal diag, uint64 m, uint64 n, 1299f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 1300f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, 1301f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *b, int ldb) = 0; 1302f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 1303f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Solves a triangular matrix equation. 1304f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 1305f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // op(a) * x = alpha * b, 1306f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 1307f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // x * op(a) = alpha * b 1308f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 1309f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha is a scalar; x and b are m-by-n matrices; a is a unit, or non-unit, 1310f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // upper or lower triangular matrix; op(a) is one of op(a) = a, or op(a) = a', 1311f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or op(a) = conj(a'). 1312f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTrsm(Stream *stream, blas::Side side, 1313f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::UpperLower uplo, blas::Transpose transa, 1314f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Diagonal diag, uint64 m, uint64 n, float alpha, 1315f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &a, int lda, 1316f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *b, int ldb) = 0; 1317f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTrsm(Stream *stream, blas::Side side, 1318f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::UpperLower uplo, blas::Transpose transa, 1319f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Diagonal diag, uint64 m, uint64 n, double alpha, 1320f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &a, int lda, 1321f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *b, int ldb) = 0; 1322f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTrsm(Stream *stream, blas::Side side, 1323f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::UpperLower uplo, blas::Transpose transa, 1324f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Diagonal diag, uint64 m, uint64 n, 1325f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 1326f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, 1327f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *b, int ldb) = 0; 1328f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTrsm(Stream *stream, blas::Side side, 1329f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::UpperLower uplo, blas::Transpose transa, 1330f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Diagonal diag, uint64 m, uint64 n, 1331f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 1332f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, 1333f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *b, int ldb) = 0; 1334f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 1335f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur protected: 1336f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur BlasSupport() {} 1337f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 1338f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur private: 1339f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur SE_DISALLOW_COPY_AND_ASSIGN(BlasSupport); 1340f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur}; 1341f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 1342f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Macro used to quickly declare overrides for abstract virtuals in the 1343f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// BlasSupport base class. 134405ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower#define TENSORFLOW_STREAM_EXECUTOR_GPU_BLAS_SUPPORT_OVERRIDES \ 1345f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasAsum(Stream *stream, uint64 elem_count, \ 1346f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, \ 1347f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *result) override; \ 1348f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasAsum(Stream *stream, uint64 elem_count, \ 1349f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, \ 1350f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *result) override; \ 1351f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasAsum(Stream *stream, uint64 elem_count, \ 1352f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1353f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *result) override; \ 1354f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasAsum(Stream *stream, uint64 elem_count, \ 1355f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1356f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *result) override; \ 1357f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasAxpy(Stream *stream, uint64 elem_count, float alpha, \ 1358f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, \ 1359f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *y, int incy) override; \ 1360f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasAxpy(Stream *stream, uint64 elem_count, double alpha, \ 1361f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, \ 1362f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *y, int incy) override; \ 1363f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasAxpy(Stream *stream, uint64 elem_count, \ 1364f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, \ 1365f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1366f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy) override; \ 1367f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasAxpy(Stream *stream, uint64 elem_count, \ 1368f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, \ 1369f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1370f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy) override; \ 1371f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasCopy(Stream *stream, uint64 elem_count, \ 1372f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, \ 1373f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *y, int incy) override; \ 1374f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasCopy(Stream *stream, uint64 elem_count, \ 1375f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, \ 1376f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *y, int incy) override; \ 1377f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasCopy(Stream *stream, uint64 elem_count, \ 1378f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1379f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy) override; \ 1380f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasCopy(Stream *stream, uint64 elem_count, \ 1381f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1382f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy) override; \ 1383f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasDot(Stream *stream, uint64 elem_count, \ 1384f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, \ 1385f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &y, int incy, \ 1386f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *result) override; \ 1387f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasDot(Stream *stream, uint64 elem_count, \ 1388f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, \ 1389f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &y, int incy, \ 1390f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *result) override; \ 1391f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasDotc(Stream *stream, uint64 elem_count, \ 1392f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1393f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &y, int incy, \ 1394f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *result) override; \ 1395f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasDotc(Stream *stream, uint64 elem_count, \ 1396f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1397f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &y, int incy, \ 1398f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *result) override; \ 1399f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasDotu(Stream *stream, uint64 elem_count, \ 1400f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1401f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &y, int incy, \ 1402f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *result) override; \ 1403f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasDotu(Stream *stream, uint64 elem_count, \ 1404f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1405f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &y, int incy, \ 1406f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *result) override; \ 1407f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasNrm2(Stream *stream, uint64 elem_count, \ 1408f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, \ 1409f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *result) override; \ 1410f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasNrm2(Stream *stream, uint64 elem_count, \ 1411f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, \ 1412f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *result) override; \ 1413f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasNrm2(Stream *stream, uint64 elem_count, \ 1414f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1415f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *result) override; \ 1416f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasNrm2(Stream *stream, uint64 elem_count, \ 1417f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1418f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *result) override; \ 1419f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasRot(Stream *stream, uint64 elem_count, DeviceMemory<float> *x, \ 1420f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx, DeviceMemory<float> *y, int incy, float c, float s) \ 1421f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur override; \ 1422f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasRot(Stream *stream, uint64 elem_count, DeviceMemory<double> *x, \ 1423f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx, DeviceMemory<double> *y, int incy, double c, \ 1424f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double s) override; \ 1425f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasRot(Stream *stream, uint64 elem_count, \ 1426f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *x, int incx, \ 1427f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy, float c, \ 1428f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float s) override; \ 1429f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasRot(Stream *stream, uint64 elem_count, \ 1430f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *x, int incx, \ 1431f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy, double c, \ 1432f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double s) override; \ 1433f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasRotg(Stream *stream, DeviceMemory<float> *a, \ 1434f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *b, DeviceMemory<float> *c, \ 1435f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *s) override; \ 1436f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasRotg(Stream *stream, DeviceMemory<double> *a, \ 1437f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *b, DeviceMemory<double> *c, \ 1438f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *s) override; \ 1439f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasRotg(Stream *stream, DeviceMemory<std::complex<float>> *a, \ 1440f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *b, \ 1441f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *c, \ 1442f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *s) override; \ 1443f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasRotg(Stream *stream, DeviceMemory<std::complex<double>> *a, \ 1444f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *b, \ 1445f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *c, \ 1446f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *s) override; \ 1447f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasRotm(Stream *stream, uint64 elem_count, DeviceMemory<float> *x, \ 1448f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx, DeviceMemory<float> *y, int incy, \ 1449f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> ¶m) override; \ 1450f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasRotm(Stream *stream, uint64 elem_count, DeviceMemory<double> *x, \ 1451f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx, DeviceMemory<double> *y, int incy, \ 1452f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> ¶m) override; \ 1453f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasRotmg(Stream *stream, DeviceMemory<float> *d1, \ 1454f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *d2, DeviceMemory<float> *x1, \ 1455f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &y1, DeviceMemory<float> *param) \ 1456f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur override; \ 1457f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasRotmg(Stream *stream, DeviceMemory<double> *d1, \ 1458f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *d2, DeviceMemory<double> *x1, \ 1459f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &y1, \ 1460f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *param) override; \ 1461f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasScal(Stream *stream, uint64 elem_count, float alpha, \ 1462f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *x, int incx) override; \ 1463f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasScal(Stream *stream, uint64 elem_count, double alpha, \ 1464f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *x, int incx) override; \ 1465f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasScal(Stream *stream, uint64 elem_count, float alpha, \ 1466f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *x, int incx) override; \ 1467f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasScal(Stream *stream, uint64 elem_count, double alpha, \ 1468f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *x, int incx) override; \ 1469f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasScal(Stream *stream, uint64 elem_count, \ 1470f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, \ 1471f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *x, int incx) override; \ 1472f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasScal(Stream *stream, uint64 elem_count, \ 1473f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, \ 1474f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *x, int incx) override; \ 1475f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSwap(Stream *stream, uint64 elem_count, DeviceMemory<float> *x, \ 1476f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx, DeviceMemory<float> *y, int incy) override; \ 1477f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSwap(Stream *stream, uint64 elem_count, DeviceMemory<double> *x, \ 1478f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx, DeviceMemory<double> *y, int incy) override; \ 1479f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSwap(Stream *stream, uint64 elem_count, \ 1480f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *x, int incx, \ 1481f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy) override; \ 1482f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSwap(Stream *stream, uint64 elem_count, \ 1483f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *x, int incx, \ 1484f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy) override; \ 1485f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasIamax(Stream *stream, uint64 elem_count, \ 1486f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, \ 1487f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<int> *result) override; \ 1488f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasIamax(Stream *stream, uint64 elem_count, \ 1489f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, \ 1490f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<int> *result) override; \ 1491f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasIamax(Stream *stream, uint64 elem_count, \ 1492f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1493f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<int> *result) override; \ 1494f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasIamax(Stream *stream, uint64 elem_count, \ 1495f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1496f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<int> *result) override; \ 1497f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasIamin(Stream *stream, uint64 elem_count, \ 1498f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, \ 1499f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<int> *result) override; \ 1500f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasIamin(Stream *stream, uint64 elem_count, \ 1501f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, \ 1502f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<int> *result) override; \ 1503f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasIamin(Stream *stream, uint64 elem_count, \ 1504f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1505f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<int> *result) override; \ 1506f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasIamin(Stream *stream, uint64 elem_count, \ 1507f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1508f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<int> *result) override; \ 1509f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGbmv(Stream *stream, blas::Transpose trans, uint64 m, uint64 n, \ 1510f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 kl, uint64 ku, float alpha, \ 1511f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &a, int lda, \ 1512f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, float beta, \ 1513f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *y, int incy) override; \ 1514f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGbmv(Stream *stream, blas::Transpose trans, uint64 m, uint64 n, \ 1515f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 kl, uint64 ku, double alpha, \ 1516f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &a, int lda, \ 1517f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, double beta, \ 1518f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *y, int incy) override; \ 1519f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGbmv(Stream *stream, blas::Transpose trans, uint64 m, uint64 n, \ 1520f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 kl, uint64 ku, std::complex<float> alpha, \ 1521f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, \ 1522f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1523f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, \ 1524f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy) override; \ 1525f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGbmv(Stream *stream, blas::Transpose trans, uint64 m, uint64 n, \ 1526f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 kl, uint64 ku, std::complex<double> alpha, \ 1527f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, \ 1528f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1529f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, \ 1530f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy) override; \ 1531f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGemv(Stream *stream, blas::Transpose trans, uint64 m, uint64 n, \ 1532f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &a, int lda, \ 1533f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, float beta, \ 1534f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *y, int incy) override; \ 1535f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGemv(Stream *stream, blas::Transpose trans, uint64 m, uint64 n, \ 1536f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &a, int lda, \ 1537f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, double beta, \ 1538f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *y, int incy) override; \ 1539f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGemv(Stream *stream, blas::Transpose trans, uint64 m, uint64 n, \ 1540f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, \ 1541f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, \ 1542f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1543f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, \ 1544f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy) override; \ 1545f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGemv(Stream *stream, blas::Transpose trans, uint64 m, uint64 n, \ 1546f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, \ 1547f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, \ 1548f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1549f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, \ 1550f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy) override; \ 15513e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang bool DoBlasGemvWithProfiling( \ 15523e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang Stream *stream, blas::Transpose trans, uint64 m, uint64 n, float alpha, \ 15533e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang const DeviceMemory<float> &a, int lda, const DeviceMemory<float> &x, \ 15543e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang int incx, float beta, DeviceMemory<float> *y, int incy, \ 15553e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang blas::ProfileResult *output_profile_result) override; \ 15563e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang bool DoBlasGemvWithProfiling( \ 15573e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang Stream *stream, blas::Transpose trans, uint64 m, uint64 n, double alpha, \ 15583e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang const DeviceMemory<double> &a, int lda, const DeviceMemory<double> &x, \ 15593e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang int incx, double beta, DeviceMemory<double> *y, int incy, \ 15603e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang blas::ProfileResult *output_profile_result) override; \ 15613e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang bool DoBlasGemvWithProfiling( \ 15623e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang Stream *stream, blas::Transpose trans, uint64 m, uint64 n, \ 15633e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang std::complex<float> alpha, const DeviceMemory<std::complex<float>> &a, \ 15643e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang int lda, const DeviceMemory<std::complex<float>> &x, int incx, \ 15653e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang std::complex<float> beta, DeviceMemory<std::complex<float>> *y, \ 15663e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang int incy, blas::ProfileResult *output_profile_result) override; \ 15673e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang bool DoBlasGemvWithProfiling( \ 15683e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang Stream *stream, blas::Transpose trans, uint64 m, uint64 n, \ 15693e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang std::complex<double> alpha, const DeviceMemory<std::complex<double>> &a, \ 15703e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang int lda, const DeviceMemory<std::complex<double>> &x, int incx, \ 15713e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang std::complex<double> beta, DeviceMemory<std::complex<double>> *y, \ 15723e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang int incy, blas::ProfileResult *output_profile_result) override; \ 1573f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGer(Stream *stream, uint64 m, uint64 n, float alpha, \ 1574f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, \ 1575f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &y, int incy, \ 1576f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *a, int lda) override; \ 1577f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGer(Stream *stream, uint64 m, uint64 n, double alpha, \ 1578f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, \ 1579f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &y, int incy, \ 1580f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *a, int lda) override; \ 1581f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGerc(Stream *stream, uint64 m, uint64 n, \ 1582f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, \ 1583f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1584f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &y, int incy, \ 1585f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *a, int lda) override; \ 1586f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGerc(Stream *stream, uint64 m, uint64 n, \ 1587f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, \ 1588f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1589f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &y, int incy, \ 1590f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *a, int lda) override; \ 1591f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGeru(Stream *stream, uint64 m, uint64 n, \ 1592f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, \ 1593f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1594f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &y, int incy, \ 1595f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *a, int lda) override; \ 1596f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGeru(Stream *stream, uint64 m, uint64 n, \ 1597f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, \ 1598f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1599f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &y, int incy, \ 1600f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *a, int lda) override; \ 1601f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHbmv(Stream *stream, blas::UpperLower uplo, uint64 n, uint64 k, \ 1602f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, \ 1603f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, \ 1604f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1605f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, \ 1606f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy) override; \ 1607f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHbmv(Stream *stream, blas::UpperLower uplo, uint64 n, uint64 k, \ 1608f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, \ 1609f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, \ 1610f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1611f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, \ 1612f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy) override; \ 1613f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHemv(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1614f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, \ 1615f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, \ 1616f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1617f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, \ 1618f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy) override; \ 1619f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHemv(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1620f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, \ 1621f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, \ 1622f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1623f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, \ 1624f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy) override; \ 1625f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHer(Stream *stream, blas::UpperLower uplo, uint64 n, float alpha, \ 1626f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1627f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *a, int lda) override; \ 1628f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHer(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1629f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<std::complex<double>> &x, \ 1630f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx, DeviceMemory<std::complex<double>> *a, int lda) \ 1631f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur override; \ 1632f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHer2(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1633f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, \ 1634f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1635f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &y, int incy, \ 1636f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *a, int lda) override; \ 1637f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHer2(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1638f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, \ 1639f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1640f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &y, int incy, \ 1641f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *a, int lda) override; \ 1642f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHpmv(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1643f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, \ 1644f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &ap, \ 1645f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1646f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, \ 1647f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy) override; \ 1648f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHpmv(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1649f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, \ 1650f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &ap, \ 1651f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1652f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, \ 1653f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy) override; \ 1654f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHpr(Stream *stream, blas::UpperLower uplo, uint64 n, float alpha, \ 1655f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1656f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *ap) override; \ 1657f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHpr(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1658f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<std::complex<double>> &x, \ 1659f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx, DeviceMemory<std::complex<double>> *ap) override; \ 1660f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHpr2(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1661f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, \ 1662f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1663f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &y, int incy, \ 1664f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *ap) override; \ 1665f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHpr2(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1666f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, \ 1667f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1668f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &y, int incy, \ 1669f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *ap) override; \ 1670f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSbmv(Stream *stream, blas::UpperLower uplo, uint64 n, uint64 k, \ 1671f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &a, int lda, \ 1672f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, float beta, \ 1673f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *y, int incy) override; \ 1674f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSbmv(Stream *stream, blas::UpperLower uplo, uint64 n, uint64 k, \ 1675f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &a, int lda, \ 1676f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, double beta, \ 1677f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *y, int incy) override; \ 1678f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSpmv(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1679f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &ap, \ 1680f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, float beta, \ 1681f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *y, int incy) override; \ 1682f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSpmv(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1683f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &ap, \ 1684f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, double beta, \ 1685f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *y, int incy) override; \ 1686f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSpr(Stream *stream, blas::UpperLower uplo, uint64 n, float alpha, \ 1687f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, \ 1688f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *ap) override; \ 1689f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSpr(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1690f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &x, int incx, \ 1691f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *ap) override; \ 1692f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSpr2(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1693f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &x, int incx, \ 1694f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &y, int incy, \ 1695f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *ap) override; \ 1696f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSpr2(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1697f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &x, int incx, \ 1698f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &y, int incy, \ 1699f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *ap) override; \ 1700f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSymv(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1701f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &a, int lda, \ 1702f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, float beta, \ 1703f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *y, int incy) override; \ 1704f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSymv(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1705f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &a, int lda, \ 1706f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, double beta, \ 1707f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *y, int incy) override; \ 1708f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSyr(Stream *stream, blas::UpperLower uplo, uint64 n, float alpha, \ 1709f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, \ 1710f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *a, int lda) override; \ 1711f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSyr(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1712f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &x, int incx, \ 1713f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *a, int lda) override; \ 1714f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSyr2(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1715f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &x, int incx, \ 1716f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &y, int incy, \ 1717f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *a, int lda) override; \ 1718f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSyr2(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1719f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &x, int incx, \ 1720f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &y, int incy, \ 1721f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *a, int lda) override; \ 1722f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTbmv(Stream *stream, blas::UpperLower uplo, \ 1723f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1724f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, const DeviceMemory<float> &a, int lda, \ 1725f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *x, int incx) override; \ 1726f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTbmv(Stream *stream, blas::UpperLower uplo, \ 1727f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1728f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, const DeviceMemory<double> &a, int lda, \ 1729f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *x, int incx) override; \ 1730f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTbmv(Stream *stream, blas::UpperLower uplo, \ 1731f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1732f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, const DeviceMemory<std::complex<float>> &a, \ 1733f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, DeviceMemory<std::complex<float>> *x, int incx) \ 1734f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur override; \ 1735f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTbmv(Stream *stream, blas::UpperLower uplo, \ 1736f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1737f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, const DeviceMemory<std::complex<double>> &a, \ 1738f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, DeviceMemory<std::complex<double>> *x, int incx) \ 1739f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur override; \ 1740f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTbsv(Stream *stream, blas::UpperLower uplo, \ 1741f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1742f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, const DeviceMemory<float> &a, int lda, \ 1743f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *x, int incx) override; \ 1744f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTbsv(Stream *stream, blas::UpperLower uplo, \ 1745f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1746f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, const DeviceMemory<double> &a, int lda, \ 1747f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *x, int incx) override; \ 1748f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTbsv(Stream *stream, blas::UpperLower uplo, \ 1749f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1750f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, const DeviceMemory<std::complex<float>> &a, \ 1751f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, DeviceMemory<std::complex<float>> *x, int incx) \ 1752f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur override; \ 1753f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTbsv(Stream *stream, blas::UpperLower uplo, \ 1754f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1755f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, const DeviceMemory<std::complex<double>> &a, \ 1756f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, DeviceMemory<std::complex<double>> *x, int incx) \ 1757f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur override; \ 1758f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTpmv(Stream *stream, blas::UpperLower uplo, \ 1759f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1760f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &ap, DeviceMemory<float> *x, \ 1761f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx) override; \ 1762f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTpmv(Stream *stream, blas::UpperLower uplo, \ 1763f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1764f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &ap, DeviceMemory<double> *x, \ 1765f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx) override; \ 1766f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTpmv(Stream *stream, blas::UpperLower uplo, \ 1767f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1768f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &ap, \ 1769f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *x, int incx) override; \ 1770f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTpmv(Stream *stream, blas::UpperLower uplo, \ 1771f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1772f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &ap, \ 1773f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *x, int incx) override; \ 1774f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTpsv(Stream *stream, blas::UpperLower uplo, \ 1775f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1776f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &ap, DeviceMemory<float> *x, \ 1777f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx) override; \ 1778f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTpsv(Stream *stream, blas::UpperLower uplo, \ 1779f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1780f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &ap, DeviceMemory<double> *x, \ 1781f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx) override; \ 1782f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTpsv(Stream *stream, blas::UpperLower uplo, \ 1783f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1784f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &ap, \ 1785f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *x, int incx) override; \ 1786f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTpsv(Stream *stream, blas::UpperLower uplo, \ 1787f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1788f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &ap, \ 1789f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *x, int incx) override; \ 1790f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTrmv(Stream *stream, blas::UpperLower uplo, \ 1791f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1792f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &a, int lda, \ 1793f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *x, int incx) override; \ 1794f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTrmv(Stream *stream, blas::UpperLower uplo, \ 1795f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1796f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &a, int lda, \ 1797f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *x, int incx) override; \ 1798f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTrmv(Stream *stream, blas::UpperLower uplo, \ 1799f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1800f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, \ 1801f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *x, int incx) override; \ 1802f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTrmv(Stream *stream, blas::UpperLower uplo, \ 1803f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1804f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, \ 1805f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *x, int incx) override; \ 1806f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTrsv(Stream *stream, blas::UpperLower uplo, \ 1807f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1808f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &a, int lda, \ 1809f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *x, int incx) override; \ 1810f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTrsv(Stream *stream, blas::UpperLower uplo, \ 1811f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1812f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &a, int lda, \ 1813f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *x, int incx) override; \ 1814f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTrsv(Stream *stream, blas::UpperLower uplo, \ 1815f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1816f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, \ 1817f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *x, int incx) override; \ 1818f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTrsv(Stream *stream, blas::UpperLower uplo, \ 1819f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1820f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, \ 1821f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *x, int incx) override; \ 1822f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGemm(Stream *stream, blas::Transpose transa, \ 1823f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose transb, uint64 m, uint64 n, uint64 k, \ 1824523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower float alpha, const DeviceMemory<Eigen::half> &a, int lda, \ 1825523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower const DeviceMemory<Eigen::half> &b, int ldb, float beta, \ 1826523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower DeviceMemory<Eigen::half> *c, int ldc) override; \ 1827523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower bool DoBlasGemm(Stream *stream, blas::Transpose transa, \ 1828523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower blas::Transpose transb, uint64 m, uint64 n, uint64 k, \ 1829f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &a, int lda, \ 1830f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &b, int ldb, float beta, \ 1831f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *c, int ldc) override; \ 1832f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGemm(Stream *stream, blas::Transpose transa, \ 1833f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose transb, uint64 m, uint64 n, uint64 k, \ 1834f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &a, int lda, \ 1835f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &b, int ldb, double beta, \ 1836f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *c, int ldc) override; \ 1837f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGemm(Stream *stream, blas::Transpose transa, \ 1838f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose transb, uint64 m, uint64 n, uint64 k, \ 1839f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, \ 1840f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, \ 1841f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &b, int ldb, \ 1842f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, \ 1843f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *c, int ldc) override; \ 1844f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGemm(Stream *stream, blas::Transpose transa, \ 1845f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose transb, uint64 m, uint64 n, uint64 k, \ 1846f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, \ 1847f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, \ 1848f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &b, int ldb, \ 1849f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, \ 1850f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *c, int ldc) override; \ 18513e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang bool DoBlasGemmWithProfiling( \ 18523e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang Stream *stream, blas::Transpose transa, blas::Transpose transb, \ 18533e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang uint64 m, uint64 n, uint64 k, float alpha, \ 18543e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang const DeviceMemory<Eigen::half> &a, int lda, \ 18553e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang const DeviceMemory<Eigen::half> &b, int ldb, float beta, \ 18563e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang DeviceMemory<Eigen::half> *c, int ldc, \ 18573e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang blas::ProfileResult *output_profile_result) override; \ 18583e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang bool DoBlasGemmWithProfiling( \ 18593e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang Stream *stream, blas::Transpose transa, blas::Transpose transb, \ 18603e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang uint64 m, uint64 n, uint64 k, float alpha, const DeviceMemory<float> &a, \ 18613e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang int lda, const DeviceMemory<float> &b, int ldb, float beta, \ 18623e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang DeviceMemory<float> *c, int ldc, \ 18633e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang blas::ProfileResult *output_profile_result) override; \ 18643e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang bool DoBlasGemmWithProfiling( \ 18653e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang Stream *stream, blas::Transpose transa, blas::Transpose transb, \ 18663e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang uint64 m, uint64 n, uint64 k, double alpha, \ 18673e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang const DeviceMemory<double> &a, int lda, const DeviceMemory<double> &b, \ 18683e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang int ldb, double beta, DeviceMemory<double> *c, int ldc, \ 18693e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang blas::ProfileResult *output_profile_result) override; \ 18703e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang bool DoBlasGemmWithProfiling( \ 18713e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang Stream *stream, blas::Transpose transa, blas::Transpose transb, \ 18723e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang uint64 m, uint64 n, uint64 k, std::complex<float> alpha, \ 18733e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang const DeviceMemory<std::complex<float>> &a, int lda, \ 18743e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang const DeviceMemory<std::complex<float>> &b, int ldb, \ 18753e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang std::complex<float> beta, DeviceMemory<std::complex<float>> *c, int ldc, \ 18763e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang blas::ProfileResult *output_profile_result) override; \ 18773e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang bool DoBlasGemmWithProfiling( \ 18783e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang Stream *stream, blas::Transpose transa, blas::Transpose transb, \ 18793e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang uint64 m, uint64 n, uint64 k, std::complex<double> alpha, \ 18803e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang const DeviceMemory<std::complex<double>> &a, int lda, \ 18813e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang const DeviceMemory<std::complex<double>> &b, int ldb, \ 18823e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang std::complex<double> beta, DeviceMemory<std::complex<double>> *c, \ 18833e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang int ldc, blas::ProfileResult *output_profile_result) override; \ 188401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar bool GetBlasGemmAlgorithms(std::vector<blas::AlgorithmType> *out_algorithms) \ 188501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar override; \ 188601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar bool DoBlasGemmWithAlgorithm( \ 188701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar Stream *stream, blas::Transpose transa, blas::Transpose transb, \ 1888a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower uint64 m, uint64 n, uint64 k, int alpha, const DeviceMemory<int8> &a, \ 1889a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower int lda, const DeviceMemory<int8> &b, int ldb, int beta, \ 1890a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower DeviceMemory<int> *c, int ldc, blas::ComputationType computation_type, \ 1891a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower blas::AlgorithmType algorithm, \ 1892a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower blas::ProfileResult *output_profile_result) override; \ 1893a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower bool DoBlasGemmWithAlgorithm( \ 1894a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower Stream *stream, blas::Transpose transa, blas::Transpose transb, \ 189501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar uint64 m, uint64 n, uint64 k, const Eigen::half &alpha, \ 189601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar const DeviceMemory<Eigen::half> &a, int lda, \ 189701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar const DeviceMemory<Eigen::half> &b, int ldb, const Eigen::half &beta, \ 189801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar DeviceMemory<Eigen::half> *c, int ldc, \ 189901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar blas::ComputationType computation_type, blas::AlgorithmType algorithm, \ 190001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar blas::ProfileResult *output_profile_result) override; \ 190101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar bool DoBlasGemmWithAlgorithm( \ 190201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar Stream *stream, blas::Transpose transa, blas::Transpose transb, \ 190301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar uint64 m, uint64 n, uint64 k, float alpha, const DeviceMemory<float> &a, \ 190401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar int lda, const DeviceMemory<float> &b, int ldb, float beta, \ 190501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar DeviceMemory<float> *c, int ldc, blas::ComputationType computation_type, \ 190601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar blas::AlgorithmType algorithm, \ 190701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar blas::ProfileResult *output_profile_result) override; \ 190801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar bool DoBlasGemmWithAlgorithm( \ 190901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar Stream *stream, blas::Transpose transa, blas::Transpose transb, \ 191001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar uint64 m, uint64 n, uint64 k, double alpha, \ 191101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar const DeviceMemory<double> &a, int lda, const DeviceMemory<double> &b, \ 191201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar int ldb, double beta, DeviceMemory<double> *c, int ldc, \ 191301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar blas::ComputationType computation_type, blas::AlgorithmType algorithm, \ 191401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar blas::ProfileResult *output_profile_result) override; \ 191501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar bool DoBlasGemmWithAlgorithm( \ 191601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar Stream *stream, blas::Transpose transa, blas::Transpose transb, \ 191701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar uint64 m, uint64 n, uint64 k, std::complex<float> alpha, \ 191801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar const DeviceMemory<std::complex<float>> &a, int lda, \ 191901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar const DeviceMemory<std::complex<float>> &b, int ldb, \ 192001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar std::complex<float> beta, DeviceMemory<std::complex<float>> *c, int ldc, \ 192101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar blas::ComputationType computation_type, blas::AlgorithmType algorithm, \ 192201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar blas::ProfileResult *output_profile_result) override; \ 192301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar bool DoBlasGemmWithAlgorithm( \ 192401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar Stream *stream, blas::Transpose transa, blas::Transpose transb, \ 192501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar uint64 m, uint64 n, uint64 k, std::complex<double> alpha, \ 192601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar const DeviceMemory<std::complex<double>> &a, int lda, \ 192701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar const DeviceMemory<std::complex<double>> &b, int ldb, \ 192801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar std::complex<double> beta, DeviceMemory<std::complex<double>> *c, \ 192901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar int ldc, blas::ComputationType computation_type, \ 193001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar blas::AlgorithmType algorithm, \ 193101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar blas::ProfileResult *output_profile_result) override; \ 1932f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGemmBatched( \ 1933f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur Stream *stream, blas::Transpose transa, blas::Transpose transb, \ 1934f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 m, uint64 n, uint64 k, float alpha, \ 1935f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<float> *> &a, int lda, \ 1936f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<float> *> &b, int ldb, float beta, \ 1937f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<float> *> &c, int ldc, \ 193805ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower int batch_count, ScratchAllocator *scratch_allocator) override; \ 1939f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGemmBatched( \ 1940f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur Stream *stream, blas::Transpose transa, blas::Transpose transb, \ 1941f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 m, uint64 n, uint64 k, double alpha, \ 1942f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<double> *> &a, int lda, \ 1943f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<double> *> &b, int ldb, double beta, \ 1944f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<double> *> &c, int ldc, \ 194505ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower int batch_count, ScratchAllocator *scratch_allocator) override; \ 1946f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGemmBatched( \ 1947f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur Stream *stream, blas::Transpose transa, blas::Transpose transb, \ 1948f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 m, uint64 n, uint64 k, std::complex<float> alpha, \ 1949f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<std::complex<float>> *> &a, int lda, \ 1950f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<std::complex<float>> *> &b, int ldb, \ 1951f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, \ 1952f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<std::complex<float>> *> &c, int ldc, \ 195305ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower int batch_count, ScratchAllocator *scratch_allocator) override; \ 1954f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGemmBatched( \ 1955f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur Stream *stream, blas::Transpose transa, blas::Transpose transb, \ 1956f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 m, uint64 n, uint64 k, std::complex<double> alpha, \ 1957f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<std::complex<double>> *> &a, \ 1958f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, \ 1959f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<std::complex<double>> *> &b, \ 1960f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int ldb, std::complex<double> beta, \ 1961f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<std::complex<double>> *> &c, \ 196205ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower int ldc, int batch_count, ScratchAllocator *scratch_allocator) override; \ 1963f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHemm(Stream *stream, blas::Side side, blas::UpperLower uplo, \ 1964f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 m, uint64 n, std::complex<float> alpha, \ 1965f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, \ 1966f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &b, int ldb, \ 1967f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, \ 1968f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *c, int ldc) override; \ 1969f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHemm(Stream *stream, blas::Side side, blas::UpperLower uplo, \ 1970f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 m, uint64 n, std::complex<double> alpha, \ 1971f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, \ 1972f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &b, int ldb, \ 1973f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, \ 1974f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *c, int ldc) override; \ 1975f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHerk(Stream *stream, blas::UpperLower uplo, \ 1976f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, float alpha, \ 1977f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, \ 1978f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float beta, DeviceMemory<std::complex<float>> *c, int ldc) \ 1979f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur override; \ 1980f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHerk(Stream *stream, blas::UpperLower uplo, \ 1981f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, double alpha, \ 1982f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, \ 1983f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double beta, DeviceMemory<std::complex<double>> *c, int ldc) \ 1984f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur override; \ 1985f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHer2k( \ 1986f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur Stream *stream, blas::UpperLower uplo, blas::Transpose trans, uint64 n, \ 1987f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, std::complex<float> alpha, \ 1988f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, \ 1989f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &b, int ldb, float beta, \ 1990f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *c, int ldc) override; \ 1991f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHer2k( \ 1992f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur Stream *stream, blas::UpperLower uplo, blas::Transpose trans, uint64 n, \ 1993f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, std::complex<double> alpha, \ 1994f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, \ 1995f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &b, int ldb, double beta, \ 1996f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *c, int ldc) override; \ 1997f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSymm(Stream *stream, blas::Side side, blas::UpperLower uplo, \ 1998f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 m, uint64 n, float alpha, \ 1999f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &a, int lda, \ 2000f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &b, int ldb, float beta, \ 2001f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *c, int ldc) override; \ 2002f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSymm(Stream *stream, blas::Side side, blas::UpperLower uplo, \ 2003f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 m, uint64 n, double alpha, \ 2004f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &a, int lda, \ 2005f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &b, int ldb, double beta, \ 2006f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *c, int ldc) override; \ 2007f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSymm(Stream *stream, blas::Side side, blas::UpperLower uplo, \ 2008f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 m, uint64 n, std::complex<float> alpha, \ 2009f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, \ 2010f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &b, int ldb, \ 2011f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, \ 2012f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *c, int ldc) override; \ 2013f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSymm(Stream *stream, blas::Side side, blas::UpperLower uplo, \ 2014f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 m, uint64 n, std::complex<double> alpha, \ 2015f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, \ 2016f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &b, int ldb, \ 2017f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, \ 2018f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *c, int ldc) override; \ 2019f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSyrk(Stream *stream, blas::UpperLower uplo, \ 2020f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, float alpha, \ 2021f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &a, int lda, float beta, \ 2022f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *c, int ldc) override; \ 2023f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSyrk(Stream *stream, blas::UpperLower uplo, \ 2024f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, double alpha, \ 2025f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &a, int lda, double beta, \ 2026f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *c, int ldc) override; \ 2027f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSyrk(Stream *stream, blas::UpperLower uplo, \ 2028f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, \ 2029f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, \ 2030f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, \ 2031f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, \ 2032f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *c, int ldc) override; \ 2033f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSyrk(Stream *stream, blas::UpperLower uplo, \ 2034f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, \ 2035f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, \ 2036f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, \ 2037f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, \ 2038f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *c, int ldc) override; \ 2039f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSyr2k(Stream *stream, blas::UpperLower uplo, \ 2040f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, float alpha, \ 2041f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &a, int lda, \ 2042f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &b, int ldb, float beta, \ 2043f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *c, int ldc) override; \ 2044f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSyr2k(Stream *stream, blas::UpperLower uplo, \ 2045f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, double alpha, \ 2046f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &a, int lda, \ 2047f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &b, int ldb, double beta, \ 2048f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *c, int ldc) override; \ 2049f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSyr2k(Stream *stream, blas::UpperLower uplo, \ 2050f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, \ 2051f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, \ 2052f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, \ 2053f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &b, int ldb, \ 2054f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, \ 2055f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *c, int ldc) override; \ 2056f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSyr2k(Stream *stream, blas::UpperLower uplo, \ 2057f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, \ 2058f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, \ 2059f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, \ 2060f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &b, int ldb, \ 2061f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, \ 2062f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *c, int ldc) override; \ 2063f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTrmm(Stream *stream, blas::Side side, blas::UpperLower uplo, \ 2064f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose transa, blas::Diagonal diag, uint64 m, \ 2065f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, float alpha, const DeviceMemory<float> &a, \ 2066f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, DeviceMemory<float> *b, int ldb) override; \ 2067f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTrmm(Stream *stream, blas::Side side, blas::UpperLower uplo, \ 2068f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose transa, blas::Diagonal diag, uint64 m, \ 2069f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, double alpha, const DeviceMemory<double> &a, \ 2070f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, DeviceMemory<double> *b, int ldb) override; \ 2071f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTrmm(Stream *stream, blas::Side side, blas::UpperLower uplo, \ 2072f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose transa, blas::Diagonal diag, uint64 m, \ 2073f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, std::complex<float> alpha, \ 2074f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, \ 2075f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *b, int ldb) override; \ 2076f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTrmm(Stream *stream, blas::Side side, blas::UpperLower uplo, \ 2077f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose transa, blas::Diagonal diag, uint64 m, \ 2078f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, std::complex<double> alpha, \ 2079f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, \ 2080f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *b, int ldb) override; \ 2081f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTrsm(Stream *stream, blas::Side side, blas::UpperLower uplo, \ 2082f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose transa, blas::Diagonal diag, uint64 m, \ 2083f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, float alpha, const DeviceMemory<float> &a, \ 2084f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, DeviceMemory<float> *b, int ldb) override; \ 2085f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTrsm(Stream *stream, blas::Side side, blas::UpperLower uplo, \ 2086f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose transa, blas::Diagonal diag, uint64 m, \ 2087f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, double alpha, const DeviceMemory<double> &a, \ 2088f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, DeviceMemory<double> *b, int ldb) override; \ 2089f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTrsm(Stream *stream, blas::Side side, blas::UpperLower uplo, \ 2090f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose transa, blas::Diagonal diag, uint64 m, \ 2091f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, std::complex<float> alpha, \ 2092f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, \ 2093f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *b, int ldb) override; \ 2094f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTrsm(Stream *stream, blas::Side side, blas::UpperLower uplo, \ 2095f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose transa, blas::Diagonal diag, uint64 m, \ 2096f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, std::complex<double> alpha, \ 2097f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, \ 2098f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *b, int ldb) override; 2099f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 2100f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur} // namespace blas 2101f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur} // namespace gputools 2102f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur} // namespace perftools 2103f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 2104f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#endif // TENSORFLOW_STREAM_EXECUTOR_BLAS_H_ 2105