blas.h revision a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cd
1122cdce33e3e0a01a7f82645617317530aa571fbA. Unique TensorFlower/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 29c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudlur 39c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath KudlurLicensed under the Apache License, Version 2.0 (the "License"); 49c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudluryou may not use this file except in compliance with the License. 59c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath KudlurYou may obtain a copy of the License at 69c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudlur 79c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudlur http://www.apache.org/licenses/LICENSE-2.0 89c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudlur 99c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath KudlurUnless required by applicable law or agreed to in writing, software 109c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudlurdistributed under the License is distributed on an "AS IS" BASIS, 119c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath KudlurWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 129c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath KudlurSee the License for the specific language governing permissions and 139c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudlurlimitations under the License. 149c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudlur==============================================================================*/ 159c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudlur 16f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Exposes the family of BLAS routines as pre-canned high performance calls for 17f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// use in conjunction with the StreamExecutor abstraction. 18f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// 19f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Note that this interface is optionally supported by platforms; see 20f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// StreamExecutor::SupportsBlas() for details. 21f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// 22f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// This abstraction makes it simple to entrain BLAS operations on GPU data into 23f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// a Stream -- users typically will not use this API directly, but will use the 24f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Stream builder methods to entrain these operations "under the hood". For 25f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// example: 26f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// 27f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// DeviceMemory<float> x = stream_exec->AllocateArray<float>(1024); 28f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// DeviceMemory<float> y = stream_exec->AllocateArray<float>(1024); 29f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// // ... populate x and y ... 30f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Stream stream{stream_exec}; 31f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// stream 32f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// .Init() 33f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// .ThenBlasAxpy(1024, 5.5, x, 1, &y, 1) 34f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// .BlockHostUntilDone(); 35f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// 36f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// By using stream operations in this manner the user can easily intermix custom 37f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// kernel launches (via StreamExecutor::ThenLaunch()) with these pre-canned BLAS 38f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// routines. 39f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 40f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#ifndef TENSORFLOW_STREAM_EXECUTOR_BLAS_H_ 41f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#define TENSORFLOW_STREAM_EXECUTOR_BLAS_H_ 42f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 43f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#include <complex> 44f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#include "tensorflow/stream_executor/platform/port.h" 45f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 46f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#include "tensorflow/stream_executor/lib/array_slice.h" 47f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#include "tensorflow/stream_executor/platform/port.h" 48b4fa4ad13831174f2276f0b382e06a7d5c42a7e7A. Unique TensorFlower 49b4fa4ad13831174f2276f0b382e06a7d5c42a7e7A. Unique TensorFlowernamespace Eigen { 50b4fa4ad13831174f2276f0b382e06a7d5c42a7e7A. Unique TensorFlowerstruct half; 51b4fa4ad13831174f2276f0b382e06a7d5c42a7e7A. Unique TensorFlower} // namespace Eigen 52f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 53f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurnamespace perftools { 54f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurnamespace gputools { 55f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 56f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurclass Stream; 5705ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlowerclass ScratchAllocator; 58f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 59f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurtemplate <typename ElemT> 60f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurclass DeviceMemory; 61f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 62f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurnamespace blas { 63f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 64f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Specifies whether the input matrix will be transposed or 65f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// transposed+conjugated before any BLAS operations. 66f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurenum class Transpose { kNoTranspose, kTranspose, kConjugateTranspose }; 67f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 68f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Returns a name for t. 69f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurstring TransposeString(Transpose t); 70f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 71f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Specifies whether the upper or lower triangular part of a 72f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// symmetric/Hermitian matrix is used. 73f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurenum class UpperLower { kUpper, kLower }; 74f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 75f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Returns a name for ul. 76f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurstring UpperLowerString(UpperLower ul); 77f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 78f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Specifies whether a matrix is unit triangular. 79f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurenum class Diagonal { kUnit, kNonUnit }; 80f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 81f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Returns a name for d. 82f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurstring DiagonalString(Diagonal d); 83f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 84f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Specifies whether a Hermitian matrix appears on the left or right in 85f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// operation. 86f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurenum class Side { kLeft, kRight }; 87f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 88f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Returns a name for s. 89f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurstring SideString(Side s); 90f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 9101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// Type with which intermediate computations of a blas routine are performed. 9201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// 9301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// Some blas calls can perform computations with a type that's different than 9401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// the type of their inputs/outputs. This lets you e.g. multiply two matricies 9501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// of int8s using float32s to store the matmul's intermediate values. 9601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebarenum class ComputationType { 9701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar kF16, // 16-bit floating-point 9801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar kF32, // 32-bit floating-point 9901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar kF64, // 64-bit floating-point 100a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower kI32, // 32-bit integer 10101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar kComplexF32, // Complex number comprised of two f32s. 102a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower kComplexF64, // Complex number comprised of two f64s. 10301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar}; 10401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar 10501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// Converts a ComputationType to a string. 10601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebarstring ComputationTypeString(ComputationType ty); 10701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar 10801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// Opaque identifier for an "algorithm" used by a blas routine. This functions 10901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// as a hint to the blas library. 11001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebartypedef int64 AlgorithmType; 11101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar 112a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower// blas uses -1 to represent the default algorithm. This happens to match up 113a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower// with the CUBLAS_GEMM_DFALT constant, so cuda_blas.cc is using static_cast 114a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower// to convert from AlgorithmType to cublasGemmAlgo_t, and uses a static_assert 115a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower// to ensure that this assumption does not break. 116a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower// If another blas implementation uses a different value for the default 117a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower// algorithm, then it needs to convert kDefaultGemmAlgo to that value 118a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower// (e.g. via a function called ToWhateverGemmAlgo). 119a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlowerconstexpr AlgorithmType kDefaultGemmAlgo = -1; 120a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower 12101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// Describes the result of a performance experiment, usually timing the speed of 12201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// a particular AlgorithmType. 12301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// 12401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// If the call we were benchmarking failed (a common occurrence; not all 12501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// algorithms are valid for all calls), is_valid() will be false. 12601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebarclass ProfileResult { 12701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar public: 12801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar bool is_valid() const { return is_valid_; } 12901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar void set_is_valid(bool val) { is_valid_ = val; } 13001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar AlgorithmType algorithm() const { return algorithm_; } 13101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar void set_algorithm(AlgorithmType val) { algorithm_ = val; } 13201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar float elapsed_time_in_ms() const { return elapsed_time_in_ms_; } 13301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar void set_elapsed_time_in_ms(float val) { elapsed_time_in_ms_ = val; } 13401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar 13501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar private: 13601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar bool is_valid_ = false; 13701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar AlgorithmType algorithm_ = 0; 13801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar float elapsed_time_in_ms_ = std::numeric_limits<float>::max(); 13901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar}; 14001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar 141f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// BLAS support interface -- this can be derived from a GPU executor when the 142f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// underlying platform has an BLAS library implementation available. See 143f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// StreamExecutor::AsBlas(). 144f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// 145f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Thread-hostile: CUDA associates a CUDA-context with a particular thread in 146f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// the system. Any operation that a user attempts to perform by enqueueing BLAS 147f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// operations on a thread not-associated with the CUDA-context has unknown 148f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// behavior at the current time; see b/13176597 149f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurclass BlasSupport { 150f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur public: 151f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual ~BlasSupport() {} 152f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 153f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes the sum of magnitudes of the vector elements. 154f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // result <- |Re x(1)| + |Im x(1)| + |Re x(2)| + |Im x(2)|+ ... + |Re x(n)| 155f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // + |Im x(n)|. 156f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Note that Im x(i) = 0 for real types float/double. 157f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasAsum(Stream *stream, uint64 elem_count, 158f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, 159f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *result) = 0; 160f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasAsum(Stream *stream, uint64 elem_count, 161f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, 162f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *result) = 0; 163f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasAsum(Stream *stream, uint64 elem_count, 164f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 165f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *result) = 0; 166f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasAsum(Stream *stream, uint64 elem_count, 167f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 168f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *result) = 0; 169f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 170f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a BLAS y <- ax+y operation. 171f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasAxpy(Stream *stream, uint64 elem_count, float alpha, 172f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, 173f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *y, int incy) = 0; 174f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasAxpy(Stream *stream, uint64 elem_count, double alpha, 175f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, 176f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *y, int incy) = 0; 177f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasAxpy(Stream *stream, uint64 elem_count, 178f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 179f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 180f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy) = 0; 181f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasAxpy(Stream *stream, uint64 elem_count, 182f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 183f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 184f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy) = 0; 185f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 186f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Copies vector to another vector: y <- x. 187f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasCopy(Stream *stream, uint64 elem_count, 188f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, 189f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *y, int incy) = 0; 190f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasCopy(Stream *stream, uint64 elem_count, 191f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, 192f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *y, int incy) = 0; 193f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasCopy(Stream *stream, uint64 elem_count, 194f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 195f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy) = 0; 196f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasCopy(Stream *stream, uint64 elem_count, 197f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 198f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy) = 0; 199f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 200f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a BLAS dot product result <- x . y. 201f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasDot(Stream *stream, uint64 elem_count, 202f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, 203f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &y, int incy, 204f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *result) = 0; 205f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasDot(Stream *stream, uint64 elem_count, 206f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, 207f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &y, int incy, 208f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *result) = 0; 209f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 210f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a BLAS dot product result <- conj(x) . y for complex types. 211f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasDotc(Stream *stream, uint64 elem_count, 212f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 213f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &y, int incy, 214f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *result) = 0; 215f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasDotc(Stream *stream, uint64 elem_count, 216f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 217f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &y, int incy, 218f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *result) = 0; 219f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 220f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a BLAS dot product result <- x . y for complex types. Note that 221f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // x is unconjugated in this routine. 222f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasDotu(Stream *stream, uint64 elem_count, 223f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 224f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &y, int incy, 225f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *result) = 0; 226f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasDotu(Stream *stream, uint64 elem_count, 227f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 228f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &y, int incy, 229f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *result) = 0; 230f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 231f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes the Euclidean norm of a vector: result <- ||x||. 232f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // See the following link for more information of Euclidean norm: 233f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // http://en.wikipedia.org/wiki/Norm_(mathematics)#Euclidean_norm 234f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasNrm2(Stream *stream, uint64 elem_count, 235f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, 236f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *result) = 0; 237f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasNrm2(Stream *stream, uint64 elem_count, 238f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, 239f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *result) = 0; 240f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasNrm2(Stream *stream, uint64 elem_count, 241f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 242f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *result) = 0; 243f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasNrm2(Stream *stream, uint64 elem_count, 244f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 245f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *result) = 0; 246f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 247f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs rotation of points in the plane: 248f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // x(i) = c*x(i) + s*y(i) 249f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // y(i) = c*y(i) - s*x(i). 250f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasRot(Stream *stream, uint64 elem_count, 251f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *x, int incx, 252f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *y, int incy, float c, 253f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float s) = 0; 254f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasRot(Stream *stream, uint64 elem_count, 255f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *x, int incx, 256f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *y, int incy, double c, 257f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double s) = 0; 258f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasRot(Stream *stream, uint64 elem_count, 259f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *x, int incx, 260f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy, 261f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float c, float s) = 0; 262f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasRot(Stream *stream, uint64 elem_count, 263f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *x, int incx, 264f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy, 265f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double c, double s) = 0; 266f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 267f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes the parameters for a Givens rotation. 268f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Given the Cartesian coordinates (a, b) of a point, these routines return 269f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // the parameters c, s, r, and z associated with the Givens rotation. The 270f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // parameters c and s define a unitary matrix such that: 271f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 272f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // | c s |.| a | = | r | 273f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // | -s c | | b | | 0 | 274f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 275f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // The parameter z is defined such that if |a| > |b|, z is s; otherwise if 276f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // c is not 0 z is 1/c; otherwise z is 1. 277f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasRotg(Stream *stream, DeviceMemory<float> *a, 278f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *b, DeviceMemory<float> *c, 279f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *s) = 0; 280f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasRotg(Stream *stream, DeviceMemory<double> *a, 281f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *b, DeviceMemory<double> *c, 282f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *s) = 0; 283f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasRotg(Stream *stream, DeviceMemory<std::complex<float>> *a, 284f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *b, 285f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *c, 286f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *s) = 0; 287f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasRotg(Stream *stream, DeviceMemory<std::complex<double>> *a, 288f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *b, 289f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *c, 290f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *s) = 0; 291f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 292f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs modified Givens rotation of points in the plane. 293f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Given two vectors x and y, each vector element of these vectors is replaced 294f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // as follows: 295f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 296f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // | x(i) | = H | x(i) | 297f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // | y(i) | | y(i) | 298f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 299f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // for i=1 to n, where H is a modified Givens transformation matrix whose 300f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // values are stored in the param[1] through param[4] array. 301f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // For more information please Google this routine. 302f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasRotm(Stream *stream, uint64 elem_count, 303f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *x, int incx, 304f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *y, int incy, 305f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> ¶m) = 0; 306f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasRotm(Stream *stream, uint64 elem_count, 307f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *x, int incx, 308f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *y, int incy, 309f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> ¶m) = 0; 310f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 311f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes the parameters for a modified Givens rotation. 312f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Given Cartesian coordinates (x1, y1) of an input vector, these routines 313f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // compute the components of a modified Givens transformation matrix H that 314f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // zeros the y-component of the resulting vector: 315f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 316f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // | x1 | = H | x1 * sqrt(d1) | 317f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // | 0 | | y1 * sqrt(d1) | 318f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 319f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // For more information please Google this routine. 320f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasRotmg(Stream *stream, DeviceMemory<float> *d1, 321f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *d2, DeviceMemory<float> *x1, 322f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &y1, 323f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *param) = 0; 324f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasRotmg(Stream *stream, DeviceMemory<double> *d1, 325f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *d2, DeviceMemory<double> *x1, 326f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &y1, 327f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *param) = 0; 328f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 329f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes the product of a vector by a scalar: x <- a*x. 330f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasScal(Stream *stream, uint64 elem_count, float alpha, 331f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *x, int incx) = 0; 332f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasScal(Stream *stream, uint64 elem_count, double alpha, 333f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *x, int incx) = 0; 334f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasScal(Stream *stream, uint64 elem_count, float alpha, 335f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *x, int incx) = 0; 336f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasScal(Stream *stream, uint64 elem_count, double alpha, 337f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *x, int incx) = 0; 338f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasScal(Stream *stream, uint64 elem_count, 339f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 340f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *x, int incx) = 0; 341f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasScal(Stream *stream, uint64 elem_count, 342f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 343f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *x, int incx) = 0; 344f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 345f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Swaps a vector with another vector. 346f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSwap(Stream *stream, uint64 elem_count, 347f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *x, int incx, 348f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *y, int incy) = 0; 349f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSwap(Stream *stream, uint64 elem_count, 350f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *x, int incx, 351f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *y, int incy) = 0; 352f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSwap(Stream *stream, uint64 elem_count, 353f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *x, int incx, 354f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy) = 0; 355f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSwap(Stream *stream, uint64 elem_count, 356f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *x, int incx, 357f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy) = 0; 358f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 359f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Finds the index of the element with maximum absolute value. 360f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasIamax(Stream *stream, uint64 elem_count, 361f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, 362f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<int> *result) = 0; 363f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasIamax(Stream *stream, uint64 elem_count, 364f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, 365f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<int> *result) = 0; 366f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasIamax(Stream *stream, uint64 elem_count, 367f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 368f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<int> *result) = 0; 369f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasIamax(Stream *stream, uint64 elem_count, 370f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, 371f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx, DeviceMemory<int> *result) = 0; 372f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 373f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Finds the index of the element with minimum absolute value. 374f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasIamin(Stream *stream, uint64 elem_count, 375f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, 376f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<int> *result) = 0; 377f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasIamin(Stream *stream, uint64 elem_count, 378f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, 379f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<int> *result) = 0; 380f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasIamin(Stream *stream, uint64 elem_count, 381f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 382f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<int> *result) = 0; 383f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasIamin(Stream *stream, uint64 elem_count, 384f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, 385f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx, DeviceMemory<int> *result) = 0; 386f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 387f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes a matrix-vector product using a general band matrix: 388f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 389f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // y <- alpha * a * x + beta * y, 390f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 391f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // y <- alpha * a' * x + beta * y, 392f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 393f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // y <- alpha * conj(a') * x + beta * y, 394f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 395f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha and beta are scalars; a is an m-by-n general band matrix, with kl 396f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // sub-diagonals and ku super-diagonals; x is a vector with 397f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // n(trans==kNoTranspose)/m(otherwise) elements; 398f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // y is a vector with m(trans==kNoTranspose)/n(otherwise) elements. 399f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGbmv(Stream *stream, blas::Transpose trans, uint64 m, 400f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, uint64 kl, uint64 ku, float alpha, 401f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &a, int lda, 402f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, float beta, 403f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *y, int incy) = 0; 404f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGbmv(Stream *stream, blas::Transpose trans, uint64 m, 405f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, uint64 kl, uint64 ku, double alpha, 406f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &a, int lda, 407f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, double beta, 408f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *y, int incy) = 0; 409f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGbmv(Stream *stream, blas::Transpose trans, uint64 m, 410f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, uint64 kl, uint64 ku, 411f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 412f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, 413f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 414f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, 415f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy) = 0; 416f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGbmv(Stream *stream, blas::Transpose trans, uint64 m, 417f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, uint64 kl, uint64 ku, 418f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 419f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, 420f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 421f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, 422f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy) = 0; 423f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 424f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes a matrix-vector product using a general matrix. 425f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 426f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // y <- alpha * a * x + beta * y, 427f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 428f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // y <- alpha * a' * x + beta * y, 429f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 430f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // y <- alpha * conj(a') * x + beta * y, 431f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 432f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha and beta are scalars; a is an m-by-n general matrix; x is a vector 433f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // with n(trans==kNoTranspose)/m(otherwise) elements; 434f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // y is a vector with m(trans==kNoTranspose)/n(otherwise) elements. 435f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGemv(Stream *stream, blas::Transpose trans, uint64 m, 436f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, float alpha, const DeviceMemory<float> &a, 437f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, const DeviceMemory<float> &x, int incx, 438f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float beta, DeviceMemory<float> *y, int incy) = 0; 439f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGemv(Stream *stream, blas::Transpose trans, uint64 m, 440f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, double alpha, const DeviceMemory<double> &a, 441f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, const DeviceMemory<double> &x, int incx, 442f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double beta, DeviceMemory<double> *y, int incy) = 0; 443f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGemv(Stream *stream, blas::Transpose trans, uint64 m, 444f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, std::complex<float> alpha, 445f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, 446f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 447f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, 448f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy) = 0; 449f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGemv(Stream *stream, blas::Transpose trans, uint64 m, 450f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, std::complex<double> alpha, 451f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, 452f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 453f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, 454f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy) = 0; 455f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 456f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a rank-1 update of a general matrix. 457f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 458f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a <- alpha * x * y' + a, 459f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 460f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha is a scalar; x is an m-element vector; y is an n-element vector; a is 461f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // an m-by-n general matrix. 462f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGer(Stream *stream, uint64 m, uint64 n, float alpha, 463f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, 464f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &y, int incy, 465f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *a, int lda) = 0; 466f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGer(Stream *stream, uint64 m, uint64 n, double alpha, 467f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, 468f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &y, int incy, 469f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *a, int lda) = 0; 470f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 471f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a rank-1 update (conjugated) of a general matrix. 472f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 473f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a <- alpha * x * conj(y') + a, 474f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 475f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha is a scalar; x is an m-element vector; y is an n-element vector; a is 476f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // an m-by-n general matrix. 477f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGerc(Stream *stream, uint64 m, uint64 n, 478f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 479f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 480f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &y, int incy, 481f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *a, int lda) = 0; 482f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGerc(Stream *stream, uint64 m, uint64 n, 483f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 484f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 485f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &y, int incy, 486f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *a, int lda) = 0; 487f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 488f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a rank-1 update (unconjugated) of a general matrix. 489f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 490f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a <- alpha * x * y' + a, 491f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 492f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha is a scalar; x is an m-element vector; y is an n-element vector; a is 493f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // an m-by-n general matrix. 494f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGeru(Stream *stream, uint64 m, uint64 n, 495f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 496f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 497f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &y, int incy, 498f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *a, int lda) = 0; 499f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGeru(Stream *stream, uint64 m, uint64 n, 500f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 501f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 502f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &y, int incy, 503f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *a, int lda) = 0; 504f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 505f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes a matrix-vector product using a Hermitian band matrix. 506f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 507f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // y <- alpha * a * x + beta * y, 508f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 509f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha and beta are scalars; a is an n-by-n Hermitian band matrix, with k 510f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // super-diagonals; x and y are n-element vectors. 511f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHbmv(Stream *stream, blas::UpperLower uplo, uint64 n, 512f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, std::complex<float> alpha, 513f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, 514f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 515f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, 516f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy) = 0; 517f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHbmv(Stream *stream, blas::UpperLower uplo, uint64 n, 518f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, std::complex<double> alpha, 519f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, 520f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 521f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, 522f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy) = 0; 523f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 524f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes a matrix-vector product using a Hermitian matrix. 525f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 526f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // y <- alpha * a * x + beta * y, 527f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 528f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha and beta are scalars; a is an n-by-n Hermitian matrix; x and y are 529f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // n-element vectors. 530f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHemv(Stream *stream, blas::UpperLower uplo, uint64 n, 531f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 532f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, 533f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 534f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, 535f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy) = 0; 536f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHemv(Stream *stream, blas::UpperLower uplo, uint64 n, 537f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 538f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, 539f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 540f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, 541f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy) = 0; 542f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 543f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a rank-1 update of a Hermitian matrix. 544f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 545f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a <- alpha * x * conj(x') + a, 546f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 547f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha is a scalar; x is an n-element vector; a is an n-by-n Hermitian 548f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // matrix. 549f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHer(Stream *stream, blas::UpperLower uplo, uint64 n, 550f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, 551f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 552f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *a, int lda) = 0; 553f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHer(Stream *stream, blas::UpperLower uplo, uint64 n, 554f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, 555f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 556f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *a, int lda) = 0; 557f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 558f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a rank-2 update of a Hermitian matrix. 559f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 560f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a <- alpha * x * conj(x') + conj(alpha) * y * conj(x') + a, 561f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 562f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha is a scalar; x and y are n-element vectors; a is an n-by-n Hermitian 563f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // matrix. 564f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHer2(Stream *stream, blas::UpperLower uplo, uint64 n, 565f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 566f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 567f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &y, int incy, 568f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *a, int lda) = 0; 569f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHer2(Stream *stream, blas::UpperLower uplo, uint64 n, 570f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 571f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 572f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &y, int incy, 573f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *a, int lda) = 0; 574f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 575f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes a matrix-vector product using a Hermitian packed matrix. 576f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 577f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // y <- alpha * a * x + beta * y, 578f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 579f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha and beta are scalars; a is an n-by-n Hermitian matrix, supplied in 580f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // packed form; x and y are n-element vectors. 581f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHpmv(Stream *stream, blas::UpperLower uplo, uint64 n, 582f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 583f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &ap, 584f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 585f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, 586f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy) = 0; 587f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHpmv(Stream *stream, blas::UpperLower uplo, uint64 n, 588f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 589f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &ap, 590f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 591f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, 592f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy) = 0; 593f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 594f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a rank-1 update of a Hermitian packed matrix. 595f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 596f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a <- alpha * x * conj(x') + a, 597f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 598f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha is a scalar; x is an n-element vector; a is an n-by-n Hermitian 599f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // matrix, supplied in packed form. 600f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHpr(Stream *stream, blas::UpperLower uplo, uint64 n, 601f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, 602f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 603f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *ap) = 0; 604f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHpr(Stream *stream, blas::UpperLower uplo, uint64 n, 605f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, 606f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 607f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *ap) = 0; 608f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 609f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a rank-2 update of a Hermitian packed matrix. 610f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 611f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a <- alpha * x * conj(x') + conj(alpha) * y * conj(x') + a, 612f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 613f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha is a scalar; x and y are n-element vectors; a is an n-by-n Hermitian 614f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // matrix, supplied in packed form. 615f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHpr2(Stream *stream, blas::UpperLower uplo, uint64 n, 616f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 617f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, 618f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &y, int incy, 619f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *ap) = 0; 620f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHpr2(Stream *stream, blas::UpperLower uplo, uint64 n, 621f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 622f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, 623f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &y, int incy, 624f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *ap) = 0; 625f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 626f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes a matrix-vector product using a symmetric band matrix. 627f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 628f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // y <- alpha * a * x + beta * y, 629f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 630f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha and beta are scalars; a is an n-by-n symmetric band matrix, with k 631f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // super-diagonals; x and y are n-element vectors. 632f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSbmv(Stream *stream, blas::UpperLower uplo, uint64 n, 633f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, float alpha, const DeviceMemory<float> &a, 634f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, const DeviceMemory<float> &x, int incx, 635f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float beta, DeviceMemory<float> *y, int incy) = 0; 636f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSbmv(Stream *stream, blas::UpperLower uplo, uint64 n, 637f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, double alpha, const DeviceMemory<double> &a, 638f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, const DeviceMemory<double> &x, int incx, 639f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double beta, DeviceMemory<double> *y, int incy) = 0; 640f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 641f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes a matrix-vector product using a symmetric packed matrix. 642f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 643f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // y <- alpha * a * x + beta * y, 644f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 645f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha and beta are scalars; a is an n-by-n symmetric matrix, supplied in 646f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // packed form; x and y are n-element vectors. 647f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSpmv(Stream *stream, blas::UpperLower uplo, uint64 n, 648f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &ap, 649f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, float beta, 650f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *y, int incy) = 0; 651f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSpmv(Stream *stream, blas::UpperLower uplo, uint64 n, 652f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &ap, 653f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, double beta, 654f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *y, int incy) = 0; 655f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 656f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a rank-1 update of a symmetric packed matrix. 657f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 658f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a <- alpha * x * x' + a, 659f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 660f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha is a scalar; x is an n-element vector; a is an n-by-n symmetric 661f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // matrix, supplied in packed form. 662f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSpr(Stream *stream, blas::UpperLower uplo, uint64 n, 663f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &x, int incx, 664f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *ap) = 0; 665f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSpr(Stream *stream, blas::UpperLower uplo, uint64 n, 666f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &x, int incx, 667f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *ap) = 0; 668f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 669f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a rank-2 update of a symmetric packed matrix. 670f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 671f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a <- alpha * x * x' + alpha * y * x' + a, 672f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 673f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha is a scalar; x and y are n-element vectors; a is an n-by-n symmetric 674f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // matrix, supplied in packed form. 675f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSpr2(Stream *stream, blas::UpperLower uplo, uint64 n, 676f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &x, int incx, 677f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &y, int incy, 678f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *ap) = 0; 679f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSpr2(Stream *stream, blas::UpperLower uplo, uint64 n, 680f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &x, int incx, 681f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &y, int incy, 682f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *ap) = 0; 683f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 684f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes a matrix-vector product for a symmetric matrix. 685f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 686f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // y <- alpha * a * x + beta * y, 687f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 688f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha and beta are scalars; a is an n-by-n symmetric matrix; x and y are 689f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // n-element vectors. 690f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSymv(Stream *stream, blas::UpperLower uplo, uint64 n, 691f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &a, int lda, 692f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, float beta, 693f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *y, int incy) = 0; 694f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSymv(Stream *stream, blas::UpperLower uplo, uint64 n, 695f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &a, int lda, 696f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, double beta, 697f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *y, int incy) = 0; 698f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 699f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a rank-1 update of a symmetric matrix. 700f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 701f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a <- alpha * x * x' + a, 702f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 703f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha is a scalar; x is an n-element vector; a is an n-by-n symmetric 704f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // matrix. 705f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSyr(Stream *stream, blas::UpperLower uplo, uint64 n, 706f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &x, int incx, 707f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *a, int lda) = 0; 708f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSyr(Stream *stream, blas::UpperLower uplo, uint64 n, 709f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &x, int incx, 710f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *a, int lda) = 0; 711f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 712f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a rank-2 update of symmetric matrix. 713f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 714f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a <- alpha * x * x' + alpha * y * x' + a, 715f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 716f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha is a scalar; x and y are n-element vectors; a is an n-by-n symmetric 717f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // matrix. 718f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSyr2(Stream *stream, blas::UpperLower uplo, uint64 n, 719f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &x, int incx, 720f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &y, int incy, 721f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *a, int lda) = 0; 722f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSyr2(Stream *stream, blas::UpperLower uplo, uint64 n, 723f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &x, int incx, 724f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &y, int incy, 725f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *a, int lda) = 0; 726f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 727f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes a matrix-vector product using a triangular band matrix. 728f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 729f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // x <- a * x, 730f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 731f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // x <- a' * x, 732f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 733f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // x <- conj(a') * x, 734f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 735f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a is an n-by-n unit, or non-unit, upper or lower triangular band matrix, 736f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // with k+1 diagonals; x is a n-element vector. 737f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTbmv(Stream *stream, blas::UpperLower uplo, 738f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 739f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, const DeviceMemory<float> &a, int lda, 740f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *x, int incx) = 0; 741f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTbmv(Stream *stream, blas::UpperLower uplo, 742f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 743f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, const DeviceMemory<double> &a, int lda, 744f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *x, int incx) = 0; 745f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTbmv(Stream *stream, blas::UpperLower uplo, 746f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 747f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, const DeviceMemory<std::complex<float>> &a, 748f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, DeviceMemory<std::complex<float>> *x, 749f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx) = 0; 750f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTbmv(Stream *stream, blas::UpperLower uplo, 751f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 752f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, const DeviceMemory<std::complex<double>> &a, 753f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, DeviceMemory<std::complex<double>> *x, 754f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx) = 0; 755f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 756f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Solves a system of linear equations whose coefficients are in a triangular 757f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // band matrix as below: 758f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 759f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a * x = b, 760f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 761f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a' * x = b, 762f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 763f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // conj(a') * x = b, 764f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 765f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // b and x are n-element vectors; a is an n-by-n unit, or non-unit, upper or 766f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // lower triangular band matrix, with k+1 diagonals. 767f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTbsv(Stream *stream, blas::UpperLower uplo, 768f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 769f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, const DeviceMemory<float> &a, int lda, 770f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *x, int incx) = 0; 771f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTbsv(Stream *stream, blas::UpperLower uplo, 772f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 773f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, const DeviceMemory<double> &a, int lda, 774f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *x, int incx) = 0; 775f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTbsv(Stream *stream, blas::UpperLower uplo, 776f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 777f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, const DeviceMemory<std::complex<float>> &a, 778f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, DeviceMemory<std::complex<float>> *x, 779f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx) = 0; 780f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTbsv(Stream *stream, blas::UpperLower uplo, 781f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 782f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, const DeviceMemory<std::complex<double>> &a, 783f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, DeviceMemory<std::complex<double>> *x, 784f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx) = 0; 785f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 786f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes a matrix-vector product using a triangular packed matrix. 787f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 788f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // x <- a * x, 789f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 790f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // x <- a' * x, 791f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 792f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // x <- conj(a') * x, 793f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 794f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a is an n-by-n unit, or non-unit, upper or lower triangular matrix, 795f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // supplied in packed form; x is a n-element vector. 796f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTpmv(Stream *stream, blas::UpperLower uplo, 797f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 798f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &ap, DeviceMemory<float> *x, 799f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx) = 0; 800f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTpmv(Stream *stream, blas::UpperLower uplo, 801f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 802f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &ap, 803f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *x, int incx) = 0; 804f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTpmv(Stream *stream, blas::UpperLower uplo, 805f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 806f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &ap, 807f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *x, int incx) = 0; 808f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTpmv(Stream *stream, blas::UpperLower uplo, 809f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 810f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &ap, 811f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *x, int incx) = 0; 812f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 813f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Solves a system of linear equations whose coefficients are in a triangular 814f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // packed matrix as below: 815f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 816f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a * x = b, 817f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 818f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a' * x = b, 819f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 820f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // conj(a') * x = b, 821f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 822f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // b and x are n-element vectors; a is an n-by-n unit, or non-unit, upper or 823f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // lower triangular matrix, supplied in packed form. 824f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTpsv(Stream *stream, blas::UpperLower uplo, 825f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 826f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &ap, DeviceMemory<float> *x, 827f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx) = 0; 828f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTpsv(Stream *stream, blas::UpperLower uplo, 829f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 830f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &ap, 831f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *x, int incx) = 0; 832f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTpsv(Stream *stream, blas::UpperLower uplo, 833f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 834f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &ap, 835f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *x, int incx) = 0; 836f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTpsv(Stream *stream, blas::UpperLower uplo, 837f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 838f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &ap, 839f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *x, int incx) = 0; 840f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 841f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes a matrix-vector product using a triangular matrix. 842f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 843f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // x <- a * x, 844f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 845f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // x <- a' * x, 846f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 847f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // x <- conj(a') * x, 848f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 849f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a is an n-by-n unit, or non-unit, upper or lower triangular matrix; x is a 850f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // n-element vector. 851f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTrmv(Stream *stream, blas::UpperLower uplo, 852f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 853f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &a, int lda, 854f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *x, int incx) = 0; 855f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTrmv(Stream *stream, blas::UpperLower uplo, 856f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 857f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &a, int lda, 858f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *x, int incx) = 0; 859f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTrmv(Stream *stream, blas::UpperLower uplo, 860f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 861f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, 862f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *x, int incx) = 0; 863f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTrmv(Stream *stream, blas::UpperLower uplo, 864f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 865f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, 866f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *x, int incx) = 0; 867f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 868f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Solves a system of linear equations whose coefficients are in a triangular 869f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // matrix as below: 870f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 871f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a * x = b, 872f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 873f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // a' * x = b, 874f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 875f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // conj(a') * x = b, 876f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 877f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // b and x are n-element vectors; a is an n-by-n unit, or non-unit, upper or 878f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // lower triangular matrix. 879f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTrsv(Stream *stream, blas::UpperLower uplo, 880f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 881f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &a, int lda, 882f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *x, int incx) = 0; 883f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTrsv(Stream *stream, blas::UpperLower uplo, 884f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 885f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &a, int lda, 886f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *x, int incx) = 0; 887f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTrsv(Stream *stream, blas::UpperLower uplo, 888f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 889f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, 890f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *x, int incx) = 0; 891f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTrsv(Stream *stream, blas::UpperLower uplo, 892f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, 893f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, 894f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *x, int incx) = 0; 895f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 896f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes a matrix-matrix product with general matrices: 897f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 898f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // c <- alpha * op(a) * op(b) + beta * c, 899f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 900f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // op(X) is one of op(X) = X, or op(X) = X', or op(X) = conj(X'); alpha and 901f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // beta are scalars; a, b, and c are matrices; op(a) is an m-by-k matrix; 902f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // op(b) is a k-by-n matrix; c is an m-by-n matrix. 903523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower // 904523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower // Note: The half interface uses float precision internally; the version 905523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower // that uses half precision internally is not yet supported. There is no 906523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower // batched version of the half-precision interface. 907523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower virtual bool DoBlasGemm(Stream *stream, blas::Transpose transa, 908523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower blas::Transpose transb, uint64 m, uint64 n, uint64 k, 90901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar float alpha, const DeviceMemory<Eigen::half> &a, 91001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar int lda, const DeviceMemory<Eigen::half> &b, int ldb, 91101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar float beta, DeviceMemory<Eigen::half> *c, 91201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar int ldc) = 0; 913f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGemm(Stream *stream, blas::Transpose transa, 914f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose transb, uint64 m, uint64 n, uint64 k, 915f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &a, int lda, 916f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &b, int ldb, float beta, 917f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *c, int ldc) = 0; 918f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGemm(Stream *stream, blas::Transpose transa, 919f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose transb, uint64 m, uint64 n, uint64 k, 920f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &a, int lda, 921f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &b, int ldb, double beta, 922f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *c, int ldc) = 0; 923f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGemm(Stream *stream, blas::Transpose transa, 924f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose transb, uint64 m, uint64 n, uint64 k, 925f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 926f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, 927f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &b, int ldb, 928f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, 929f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *c, int ldc) = 0; 930f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGemm(Stream *stream, blas::Transpose transa, 931f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose transb, uint64 m, uint64 n, uint64 k, 932f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 933f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, 934f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &b, int ldb, 935f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, 936f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *c, int ldc) = 0; 937f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 93801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar // Gets a list of supported algorithms for DoBlasGemmWithAlgorithm. Note that 93901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar // any or all of these algorithms may still be 94001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar virtual bool GetBlasGemmAlgorithms( 94101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar std::vector<AlgorithmType> *out_algorithms) = 0; 94201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar 94301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar // Like DoBlasGemm, but accepts an algorithm and an compute type. 94401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar // 94501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar // The compute type lets you say (e.g.) that the inputs and outputs are 94601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar // Eigen::halfs, but you want the internal computations to be done with 94701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar // float32 precision. 94801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar // 94901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar // Note the subtle difference in the version that accepts Eigen:::half -- 95001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar // alpha and beta have type const Eigen::half&, not float. 95101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar // 95201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar // If output_profile_result is not null, a failure here does not put the 95301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar // stream in a failure state. Instead, success/failure is indicated by 95401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar // output_profile_result->is_valid(). This lets you use this function for 95501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar // choosing the best algorithm among many (some of which may fail) without 95601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar // creating a new Stream for each attempt. 95701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar virtual bool DoBlasGemmWithAlgorithm( 95801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, 959a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower uint64 n, uint64 k, int alpha, const DeviceMemory<int8> &a, int lda, 960a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower const DeviceMemory<int8> &b, int ldb, int beta, DeviceMemory<int32> *c, 961a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower int ldc, ComputationType computation_type, AlgorithmType algorithm, 962a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower ProfileResult *output_profile_result) = 0; 963a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower virtual bool DoBlasGemmWithAlgorithm( 964a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, 96501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar uint64 n, uint64 k, const Eigen::half &alpha, 96601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar const DeviceMemory<Eigen::half> &a, int lda, 96701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar const DeviceMemory<Eigen::half> &b, int ldb, const Eigen::half &beta, 96801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar DeviceMemory<Eigen::half> *c, int ldc, ComputationType computation_type, 96901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar AlgorithmType algorithm, ProfileResult *output_profile_result) = 0; 97001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar virtual bool DoBlasGemmWithAlgorithm( 97101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, 97201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar uint64 n, uint64 k, float alpha, const DeviceMemory<float> &a, int lda, 97301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar const DeviceMemory<float> &b, int ldb, float beta, DeviceMemory<float> *c, 97401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar int ldc, ComputationType computation_type, AlgorithmType algorithm, 97501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar ProfileResult *output_profile_result) = 0; 97601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar virtual bool DoBlasGemmWithAlgorithm( 97701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, 97801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar uint64 n, uint64 k, double alpha, const DeviceMemory<double> &a, int lda, 97901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar const DeviceMemory<double> &b, int ldb, double beta, 98001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar DeviceMemory<double> *c, int ldc, ComputationType computation_type, 98101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar AlgorithmType algorithm, ProfileResult *output_profile_result) = 0; 98201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar virtual bool DoBlasGemmWithAlgorithm( 98301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, 98401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar uint64 n, uint64 k, std::complex<float> alpha, 98501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar const DeviceMemory<std::complex<float>> &a, int lda, 98601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar const DeviceMemory<std::complex<float>> &b, int ldb, 98701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar std::complex<float> beta, DeviceMemory<std::complex<float>> *c, int ldc, 98801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar ComputationType computation_type, AlgorithmType algorithm, 98901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar ProfileResult *output_profile_result) = 0; 99001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar virtual bool DoBlasGemmWithAlgorithm( 99101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, 99201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar uint64 n, uint64 k, std::complex<double> alpha, 99301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar const DeviceMemory<std::complex<double>> &a, int lda, 99401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar const DeviceMemory<std::complex<double>> &b, int ldb, 99501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar std::complex<double> beta, DeviceMemory<std::complex<double>> *c, int ldc, 99601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar ComputationType computation_type, AlgorithmType algorithm, 99701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar ProfileResult *output_profile_result) = 0; 99801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar 999f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes a batch of matrix-matrix product with general matrices. 1000f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // This is a batched version of DoBlasGemm. 1001f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // The batched GEMM computes matrix product for each input/output in a, b, 1002f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // and c, which contain batch_count DeviceMemory objects. 1003f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGemmBatched( 1004f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, 1005f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, uint64 k, float alpha, 1006f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<float> *> &a, int lda, 1007f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<float> *> &b, int ldb, float beta, 1008f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<float> *> &c, int ldc, 100905ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower int batch_count, ScratchAllocator *scratch_allocator) = 0; 1010f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGemmBatched( 1011f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, 1012f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, uint64 k, double alpha, 1013f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<double> *> &a, int lda, 1014f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<double> *> &b, int ldb, double beta, 1015f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<double> *> &c, int ldc, 101605ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower int batch_count, ScratchAllocator *scratch_allocator) = 0; 1017f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGemmBatched( 1018f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, 1019f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, uint64 k, std::complex<float> alpha, 1020f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<std::complex<float>> *> &a, int lda, 1021f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<std::complex<float>> *> &b, int ldb, 1022f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, 1023f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<std::complex<float>> *> &c, int ldc, 102405ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower int batch_count, ScratchAllocator *scratch_allocator) = 0; 1025f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasGemmBatched( 1026f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, 1027f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, uint64 k, std::complex<double> alpha, 1028f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<std::complex<double>> *> &a, int lda, 1029f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<std::complex<double>> *> &b, int ldb, 1030f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, 1031f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<std::complex<double>> *> &c, int ldc, 103205ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower int batch_count, ScratchAllocator *scratch_allocator) = 0; 1033f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 1034f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes a matrix-matrix product where one input matrix is Hermitian: 1035f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 1036f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // c <- alpha * a * b + beta * c, 1037f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 1038f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // c <- alpha * b * a + beta * c, 1039f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 1040f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha and beta are scalars; a is a Hermitian matrix; b and c are m-by-n 1041f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // matrices. 1042f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHemm(Stream *stream, blas::Side side, 1043f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::UpperLower uplo, uint64 m, uint64 n, 1044f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 1045f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, 1046f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &b, int ldb, 1047f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, 1048f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *c, int ldc) = 0; 1049f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHemm(Stream *stream, blas::Side side, 1050f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::UpperLower uplo, uint64 m, uint64 n, 1051f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 1052f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, 1053f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &b, int ldb, 1054f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, 1055f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *c, int ldc) = 0; 1056f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 1057f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a Hermitian rank-k update. 1058f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 1059f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // c <- alpha * a * conj(a') + beta * c, 1060f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 1061f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // c <- alpha * conj(a') * a + beta * c, 1062f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 1063f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha and beta are scalars; c is a n-by-n Hermitian matrix; a is an n-by-k 1064f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // matrix in the first case and a k-by-n matrix in the second case. 1065f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHerk(Stream *stream, blas::UpperLower uplo, 1066f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, 1067f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, 1068f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, 1069f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float beta, DeviceMemory<std::complex<float>> *c, 1070f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int ldc) = 0; 1071f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHerk(Stream *stream, blas::UpperLower uplo, 1072f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, 1073f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, 1074f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, 1075f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double beta, DeviceMemory<std::complex<double>> *c, 1076f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int ldc) = 0; 1077f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 1078f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a Hermitian rank-2k update. 1079f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 1080f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // c <- alpha * a * conj(b') + conj(alpha) * b * conj(a') + beta * c, 1081f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 1082f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // c <- alpha * conj(b') * a + conj(alpha) * conj(a') * b + beta * c, 1083f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 1084f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha and beta are scalars; c is a n-by-n Hermitian matrix; a and b are 1085f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // n-by-k matrices in the first case and k-by-n matrices in the second case. 1086f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHer2k(Stream *stream, blas::UpperLower uplo, 1087f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, 1088f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 1089f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, 1090f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &b, int ldb, 1091f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float beta, DeviceMemory<std::complex<float>> *c, 1092f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int ldc) = 0; 1093f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasHer2k(Stream *stream, blas::UpperLower uplo, 1094f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, 1095f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 1096f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, 1097f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &b, int ldb, 1098f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double beta, DeviceMemory<std::complex<double>> *c, 1099f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int ldc) = 0; 1100f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 1101f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes a matrix-matrix product where one input matrix is symmetric. 1102f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 1103f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // c <- alpha * a * b + beta * c, 1104f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 1105f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // c <- alpha * b * a + beta * c, 1106f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 1107f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha and beta are scalars; a is a symmetric matrix; b and c are m-by-n 1108f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // matrices. 1109f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSymm(Stream *stream, blas::Side side, 1110f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::UpperLower uplo, uint64 m, uint64 n, 1111f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &a, int lda, 1112f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &b, int ldb, float beta, 1113f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *c, int ldc) = 0; 1114f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSymm(Stream *stream, blas::Side side, 1115f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::UpperLower uplo, uint64 m, uint64 n, 1116f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &a, int lda, 1117f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &b, int ldb, double beta, 1118f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *c, int ldc) = 0; 1119f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSymm(Stream *stream, blas::Side side, 1120f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::UpperLower uplo, uint64 m, uint64 n, 1121f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 1122f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, 1123f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &b, int ldb, 1124f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, 1125f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *c, int ldc) = 0; 1126f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSymm(Stream *stream, blas::Side side, 1127f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::UpperLower uplo, uint64 m, uint64 n, 1128f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 1129f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, 1130f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &b, int ldb, 1131f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, 1132f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *c, int ldc) = 0; 1133f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 1134f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a symmetric rank-k update. 1135f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 1136f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // c <- alpha * a * a' + beta * c, 1137f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 1138f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // c <- alpha * a' * a + beta * c, 1139f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 1140f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha and beta are scalars; c is a n-by-n symmetric matrix; a is an n-by-k 1141f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // matrix in the first case and a k-by-n matrix in the second case. 1142f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSyrk(Stream *stream, blas::UpperLower uplo, 1143f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, 1144f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &a, int lda, 1145f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float beta, DeviceMemory<float> *c, int ldc) = 0; 1146f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSyrk(Stream *stream, blas::UpperLower uplo, 1147f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, 1148f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &a, int lda, 1149f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double beta, DeviceMemory<double> *c, int ldc) = 0; 1150f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSyrk(Stream *stream, blas::UpperLower uplo, 1151f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, 1152f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 1153f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, 1154f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, 1155f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *c, int ldc) = 0; 1156f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSyrk(Stream *stream, blas::UpperLower uplo, 1157f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, 1158f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 1159f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, 1160f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, 1161f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *c, int ldc) = 0; 1162f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 1163f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Performs a symmetric rank-2k update. 1164f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 1165f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // c <- alpha * a * b' + alpha * b * a' + beta * c, 1166f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 1167f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // c <- alpha * b' * a + alpha * a' * b + beta * c, 1168f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 1169f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha and beta are scalars; c is a n-by-n symmetric matrix; a and b are 1170f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // n-by-k matrices in the first case and k-by-n matrices in the second case. 1171f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSyr2k(Stream *stream, blas::UpperLower uplo, 1172f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, 1173f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &a, int lda, 1174f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &b, int ldb, float beta, 1175f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *c, int ldc) = 0; 1176f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSyr2k(Stream *stream, blas::UpperLower uplo, 1177f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, 1178f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &a, int lda, 1179f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &b, int ldb, double beta, 1180f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *c, int ldc) = 0; 1181f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSyr2k(Stream *stream, blas::UpperLower uplo, 1182f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, 1183f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 1184f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, 1185f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &b, int ldb, 1186f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, 1187f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *c, int ldc) = 0; 1188f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasSyr2k(Stream *stream, blas::UpperLower uplo, 1189f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, 1190f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 1191f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, 1192f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &b, int ldb, 1193f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, 1194f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *c, int ldc) = 0; 1195f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 1196f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Computes a matrix-matrix product where one input matrix is triangular. 1197f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 1198f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // b <- alpha * op(a) * b, 1199f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 1200f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // b <- alpha * b * op(a) 1201f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 1202f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha is a scalar; b is an m-by-n matrix; a is a unit, or non-unit, upper 1203f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or lower triangular matrix; op(a) is one of op(a) = a, or op(a) = a', or 1204f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // op(a) = conj(a'). 1205f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTrmm(Stream *stream, blas::Side side, 1206f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::UpperLower uplo, blas::Transpose transa, 1207f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Diagonal diag, uint64 m, uint64 n, float alpha, 1208f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &a, int lda, 1209f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *b, int ldb) = 0; 1210f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTrmm(Stream *stream, blas::Side side, 1211f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::UpperLower uplo, blas::Transpose transa, 1212f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Diagonal diag, uint64 m, uint64 n, double alpha, 1213f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &a, int lda, 1214f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *b, int ldb) = 0; 1215f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTrmm(Stream *stream, blas::Side side, 1216f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::UpperLower uplo, blas::Transpose transa, 1217f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Diagonal diag, uint64 m, uint64 n, 1218f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 1219f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, 1220f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *b, int ldb) = 0; 1221f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTrmm(Stream *stream, blas::Side side, 1222f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::UpperLower uplo, blas::Transpose transa, 1223f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Diagonal diag, uint64 m, uint64 n, 1224f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 1225f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, 1226f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *b, int ldb) = 0; 1227f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 1228f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Solves a triangular matrix equation. 1229f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 1230f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // op(a) * x = alpha * b, 1231f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or 1232f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // x * op(a) = alpha * b 1233f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // 1234f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // alpha is a scalar; x and b are m-by-n matrices; a is a unit, or non-unit, 1235f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // upper or lower triangular matrix; op(a) is one of op(a) = a, or op(a) = a', 1236f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // or op(a) = conj(a'). 1237f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTrsm(Stream *stream, blas::Side side, 1238f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::UpperLower uplo, blas::Transpose transa, 1239f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Diagonal diag, uint64 m, uint64 n, float alpha, 1240f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &a, int lda, 1241f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *b, int ldb) = 0; 1242f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTrsm(Stream *stream, blas::Side side, 1243f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::UpperLower uplo, blas::Transpose transa, 1244f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Diagonal diag, uint64 m, uint64 n, double alpha, 1245f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &a, int lda, 1246f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *b, int ldb) = 0; 1247f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTrsm(Stream *stream, blas::Side side, 1248f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::UpperLower uplo, blas::Transpose transa, 1249f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Diagonal diag, uint64 m, uint64 n, 1250f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, 1251f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, 1252f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *b, int ldb) = 0; 1253f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur virtual bool DoBlasTrsm(Stream *stream, blas::Side side, 1254f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::UpperLower uplo, blas::Transpose transa, 1255f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Diagonal diag, uint64 m, uint64 n, 1256f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, 1257f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, 1258f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *b, int ldb) = 0; 1259f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 1260f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur protected: 1261f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur BlasSupport() {} 1262f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 1263f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur private: 1264f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur SE_DISALLOW_COPY_AND_ASSIGN(BlasSupport); 1265f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur}; 1266f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 1267f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Macro used to quickly declare overrides for abstract virtuals in the 1268f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// BlasSupport base class. 126905ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower#define TENSORFLOW_STREAM_EXECUTOR_GPU_BLAS_SUPPORT_OVERRIDES \ 1270f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasAsum(Stream *stream, uint64 elem_count, \ 1271f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, \ 1272f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *result) override; \ 1273f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasAsum(Stream *stream, uint64 elem_count, \ 1274f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, \ 1275f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *result) override; \ 1276f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasAsum(Stream *stream, uint64 elem_count, \ 1277f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1278f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *result) override; \ 1279f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasAsum(Stream *stream, uint64 elem_count, \ 1280f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1281f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *result) override; \ 1282f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasAxpy(Stream *stream, uint64 elem_count, float alpha, \ 1283f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, \ 1284f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *y, int incy) override; \ 1285f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasAxpy(Stream *stream, uint64 elem_count, double alpha, \ 1286f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, \ 1287f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *y, int incy) override; \ 1288f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasAxpy(Stream *stream, uint64 elem_count, \ 1289f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, \ 1290f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1291f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy) override; \ 1292f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasAxpy(Stream *stream, uint64 elem_count, \ 1293f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, \ 1294f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1295f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy) override; \ 1296f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasCopy(Stream *stream, uint64 elem_count, \ 1297f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, \ 1298f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *y, int incy) override; \ 1299f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasCopy(Stream *stream, uint64 elem_count, \ 1300f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, \ 1301f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *y, int incy) override; \ 1302f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasCopy(Stream *stream, uint64 elem_count, \ 1303f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1304f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy) override; \ 1305f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasCopy(Stream *stream, uint64 elem_count, \ 1306f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1307f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy) override; \ 1308f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasDot(Stream *stream, uint64 elem_count, \ 1309f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, \ 1310f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &y, int incy, \ 1311f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *result) override; \ 1312f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasDot(Stream *stream, uint64 elem_count, \ 1313f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, \ 1314f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &y, int incy, \ 1315f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *result) override; \ 1316f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasDotc(Stream *stream, uint64 elem_count, \ 1317f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1318f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &y, int incy, \ 1319f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *result) override; \ 1320f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasDotc(Stream *stream, uint64 elem_count, \ 1321f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1322f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &y, int incy, \ 1323f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *result) override; \ 1324f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasDotu(Stream *stream, uint64 elem_count, \ 1325f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1326f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &y, int incy, \ 1327f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *result) override; \ 1328f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasDotu(Stream *stream, uint64 elem_count, \ 1329f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1330f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &y, int incy, \ 1331f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *result) override; \ 1332f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasNrm2(Stream *stream, uint64 elem_count, \ 1333f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, \ 1334f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *result) override; \ 1335f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasNrm2(Stream *stream, uint64 elem_count, \ 1336f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, \ 1337f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *result) override; \ 1338f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasNrm2(Stream *stream, uint64 elem_count, \ 1339f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1340f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *result) override; \ 1341f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasNrm2(Stream *stream, uint64 elem_count, \ 1342f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1343f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *result) override; \ 1344f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasRot(Stream *stream, uint64 elem_count, DeviceMemory<float> *x, \ 1345f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx, DeviceMemory<float> *y, int incy, float c, float s) \ 1346f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur override; \ 1347f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasRot(Stream *stream, uint64 elem_count, DeviceMemory<double> *x, \ 1348f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx, DeviceMemory<double> *y, int incy, double c, \ 1349f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double s) override; \ 1350f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasRot(Stream *stream, uint64 elem_count, \ 1351f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *x, int incx, \ 1352f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy, float c, \ 1353f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float s) override; \ 1354f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasRot(Stream *stream, uint64 elem_count, \ 1355f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *x, int incx, \ 1356f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy, double c, \ 1357f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double s) override; \ 1358f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasRotg(Stream *stream, DeviceMemory<float> *a, \ 1359f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *b, DeviceMemory<float> *c, \ 1360f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *s) override; \ 1361f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasRotg(Stream *stream, DeviceMemory<double> *a, \ 1362f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *b, DeviceMemory<double> *c, \ 1363f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *s) override; \ 1364f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasRotg(Stream *stream, DeviceMemory<std::complex<float>> *a, \ 1365f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *b, \ 1366f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *c, \ 1367f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *s) override; \ 1368f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasRotg(Stream *stream, DeviceMemory<std::complex<double>> *a, \ 1369f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *b, \ 1370f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *c, \ 1371f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *s) override; \ 1372f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasRotm(Stream *stream, uint64 elem_count, DeviceMemory<float> *x, \ 1373f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx, DeviceMemory<float> *y, int incy, \ 1374f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> ¶m) override; \ 1375f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasRotm(Stream *stream, uint64 elem_count, DeviceMemory<double> *x, \ 1376f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx, DeviceMemory<double> *y, int incy, \ 1377f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> ¶m) override; \ 1378f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasRotmg(Stream *stream, DeviceMemory<float> *d1, \ 1379f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *d2, DeviceMemory<float> *x1, \ 1380f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &y1, DeviceMemory<float> *param) \ 1381f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur override; \ 1382f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasRotmg(Stream *stream, DeviceMemory<double> *d1, \ 1383f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *d2, DeviceMemory<double> *x1, \ 1384f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &y1, \ 1385f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *param) override; \ 1386f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasScal(Stream *stream, uint64 elem_count, float alpha, \ 1387f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *x, int incx) override; \ 1388f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasScal(Stream *stream, uint64 elem_count, double alpha, \ 1389f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *x, int incx) override; \ 1390f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasScal(Stream *stream, uint64 elem_count, float alpha, \ 1391f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *x, int incx) override; \ 1392f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasScal(Stream *stream, uint64 elem_count, double alpha, \ 1393f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *x, int incx) override; \ 1394f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasScal(Stream *stream, uint64 elem_count, \ 1395f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, \ 1396f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *x, int incx) override; \ 1397f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasScal(Stream *stream, uint64 elem_count, \ 1398f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, \ 1399f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *x, int incx) override; \ 1400f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSwap(Stream *stream, uint64 elem_count, DeviceMemory<float> *x, \ 1401f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx, DeviceMemory<float> *y, int incy) override; \ 1402f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSwap(Stream *stream, uint64 elem_count, DeviceMemory<double> *x, \ 1403f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx, DeviceMemory<double> *y, int incy) override; \ 1404f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSwap(Stream *stream, uint64 elem_count, \ 1405f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *x, int incx, \ 1406f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy) override; \ 1407f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSwap(Stream *stream, uint64 elem_count, \ 1408f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *x, int incx, \ 1409f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy) override; \ 1410f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasIamax(Stream *stream, uint64 elem_count, \ 1411f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, \ 1412f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<int> *result) override; \ 1413f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasIamax(Stream *stream, uint64 elem_count, \ 1414f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, \ 1415f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<int> *result) override; \ 1416f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasIamax(Stream *stream, uint64 elem_count, \ 1417f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1418f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<int> *result) override; \ 1419f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasIamax(Stream *stream, uint64 elem_count, \ 1420f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1421f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<int> *result) override; \ 1422f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasIamin(Stream *stream, uint64 elem_count, \ 1423f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, \ 1424f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<int> *result) override; \ 1425f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasIamin(Stream *stream, uint64 elem_count, \ 1426f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, \ 1427f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<int> *result) override; \ 1428f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasIamin(Stream *stream, uint64 elem_count, \ 1429f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1430f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<int> *result) override; \ 1431f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasIamin(Stream *stream, uint64 elem_count, \ 1432f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1433f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<int> *result) override; \ 1434f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGbmv(Stream *stream, blas::Transpose trans, uint64 m, uint64 n, \ 1435f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 kl, uint64 ku, float alpha, \ 1436f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &a, int lda, \ 1437f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, float beta, \ 1438f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *y, int incy) override; \ 1439f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGbmv(Stream *stream, blas::Transpose trans, uint64 m, uint64 n, \ 1440f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 kl, uint64 ku, double alpha, \ 1441f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &a, int lda, \ 1442f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, double beta, \ 1443f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *y, int incy) override; \ 1444f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGbmv(Stream *stream, blas::Transpose trans, uint64 m, uint64 n, \ 1445f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 kl, uint64 ku, std::complex<float> alpha, \ 1446f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, \ 1447f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1448f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, \ 1449f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy) override; \ 1450f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGbmv(Stream *stream, blas::Transpose trans, uint64 m, uint64 n, \ 1451f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 kl, uint64 ku, std::complex<double> alpha, \ 1452f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, \ 1453f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1454f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, \ 1455f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy) override; \ 1456f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGemv(Stream *stream, blas::Transpose trans, uint64 m, uint64 n, \ 1457f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &a, int lda, \ 1458f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, float beta, \ 1459f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *y, int incy) override; \ 1460f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGemv(Stream *stream, blas::Transpose trans, uint64 m, uint64 n, \ 1461f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &a, int lda, \ 1462f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, double beta, \ 1463f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *y, int incy) override; \ 1464f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGemv(Stream *stream, blas::Transpose trans, uint64 m, uint64 n, \ 1465f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, \ 1466f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, \ 1467f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1468f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, \ 1469f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy) override; \ 1470f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGemv(Stream *stream, blas::Transpose trans, uint64 m, uint64 n, \ 1471f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, \ 1472f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, \ 1473f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1474f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, \ 1475f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy) override; \ 1476f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGer(Stream *stream, uint64 m, uint64 n, float alpha, \ 1477f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, \ 1478f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &y, int incy, \ 1479f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *a, int lda) override; \ 1480f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGer(Stream *stream, uint64 m, uint64 n, double alpha, \ 1481f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, \ 1482f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &y, int incy, \ 1483f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *a, int lda) override; \ 1484f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGerc(Stream *stream, uint64 m, uint64 n, \ 1485f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, \ 1486f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1487f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &y, int incy, \ 1488f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *a, int lda) override; \ 1489f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGerc(Stream *stream, uint64 m, uint64 n, \ 1490f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, \ 1491f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1492f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &y, int incy, \ 1493f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *a, int lda) override; \ 1494f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGeru(Stream *stream, uint64 m, uint64 n, \ 1495f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, \ 1496f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1497f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &y, int incy, \ 1498f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *a, int lda) override; \ 1499f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGeru(Stream *stream, uint64 m, uint64 n, \ 1500f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, \ 1501f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1502f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &y, int incy, \ 1503f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *a, int lda) override; \ 1504f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHbmv(Stream *stream, blas::UpperLower uplo, uint64 n, uint64 k, \ 1505f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, \ 1506f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, \ 1507f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1508f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, \ 1509f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy) override; \ 1510f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHbmv(Stream *stream, blas::UpperLower uplo, uint64 n, uint64 k, \ 1511f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, \ 1512f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, \ 1513f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1514f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, \ 1515f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy) override; \ 1516f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHemv(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1517f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, \ 1518f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, \ 1519f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1520f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, \ 1521f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy) override; \ 1522f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHemv(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1523f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, \ 1524f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, \ 1525f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1526f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, \ 1527f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy) override; \ 1528f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHer(Stream *stream, blas::UpperLower uplo, uint64 n, float alpha, \ 1529f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1530f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *a, int lda) override; \ 1531f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHer(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1532f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<std::complex<double>> &x, \ 1533f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx, DeviceMemory<std::complex<double>> *a, int lda) \ 1534f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur override; \ 1535f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHer2(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1536f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, \ 1537f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1538f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &y, int incy, \ 1539f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *a, int lda) override; \ 1540f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHer2(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1541f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, \ 1542f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1543f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &y, int incy, \ 1544f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *a, int lda) override; \ 1545f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHpmv(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1546f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, \ 1547f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &ap, \ 1548f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1549f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, \ 1550f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *y, int incy) override; \ 1551f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHpmv(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1552f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, \ 1553f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &ap, \ 1554f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1555f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, \ 1556f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *y, int incy) override; \ 1557f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHpr(Stream *stream, blas::UpperLower uplo, uint64 n, float alpha, \ 1558f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1559f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *ap) override; \ 1560f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHpr(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1561f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<std::complex<double>> &x, \ 1562f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx, DeviceMemory<std::complex<double>> *ap) override; \ 1563f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHpr2(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1564f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, \ 1565f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &x, int incx, \ 1566f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &y, int incy, \ 1567f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *ap) override; \ 1568f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHpr2(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1569f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, \ 1570f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &x, int incx, \ 1571f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &y, int incy, \ 1572f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *ap) override; \ 1573f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSbmv(Stream *stream, blas::UpperLower uplo, uint64 n, uint64 k, \ 1574f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &a, int lda, \ 1575f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, float beta, \ 1576f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *y, int incy) override; \ 1577f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSbmv(Stream *stream, blas::UpperLower uplo, uint64 n, uint64 k, \ 1578f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &a, int lda, \ 1579f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, double beta, \ 1580f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *y, int incy) override; \ 1581f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSpmv(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1582f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &ap, \ 1583f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, float beta, \ 1584f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *y, int incy) override; \ 1585f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSpmv(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1586f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &ap, \ 1587f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, double beta, \ 1588f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *y, int incy) override; \ 1589f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSpr(Stream *stream, blas::UpperLower uplo, uint64 n, float alpha, \ 1590f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, \ 1591f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *ap) override; \ 1592f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSpr(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1593f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &x, int incx, \ 1594f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *ap) override; \ 1595f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSpr2(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1596f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &x, int incx, \ 1597f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &y, int incy, \ 1598f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *ap) override; \ 1599f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSpr2(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1600f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &x, int incx, \ 1601f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &y, int incy, \ 1602f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *ap) override; \ 1603f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSymv(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1604f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &a, int lda, \ 1605f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, float beta, \ 1606f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *y, int incy) override; \ 1607f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSymv(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1608f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &a, int lda, \ 1609f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &x, int incx, double beta, \ 1610f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *y, int incy) override; \ 1611f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSyr(Stream *stream, blas::UpperLower uplo, uint64 n, float alpha, \ 1612f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &x, int incx, \ 1613f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *a, int lda) override; \ 1614f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSyr(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1615f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &x, int incx, \ 1616f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *a, int lda) override; \ 1617f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSyr2(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1618f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &x, int incx, \ 1619f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &y, int incy, \ 1620f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *a, int lda) override; \ 1621f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSyr2(Stream *stream, blas::UpperLower uplo, uint64 n, \ 1622f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &x, int incx, \ 1623f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &y, int incy, \ 1624f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *a, int lda) override; \ 1625f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTbmv(Stream *stream, blas::UpperLower uplo, \ 1626f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1627f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, const DeviceMemory<float> &a, int lda, \ 1628f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *x, int incx) override; \ 1629f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTbmv(Stream *stream, blas::UpperLower uplo, \ 1630f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1631f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, const DeviceMemory<double> &a, int lda, \ 1632f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *x, int incx) override; \ 1633f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTbmv(Stream *stream, blas::UpperLower uplo, \ 1634f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1635f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, const DeviceMemory<std::complex<float>> &a, \ 1636f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, DeviceMemory<std::complex<float>> *x, int incx) \ 1637f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur override; \ 1638f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTbmv(Stream *stream, blas::UpperLower uplo, \ 1639f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1640f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, const DeviceMemory<std::complex<double>> &a, \ 1641f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, DeviceMemory<std::complex<double>> *x, int incx) \ 1642f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur override; \ 1643f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTbsv(Stream *stream, blas::UpperLower uplo, \ 1644f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1645f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, const DeviceMemory<float> &a, int lda, \ 1646f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *x, int incx) override; \ 1647f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTbsv(Stream *stream, blas::UpperLower uplo, \ 1648f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1649f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, const DeviceMemory<double> &a, int lda, \ 1650f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *x, int incx) override; \ 1651f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTbsv(Stream *stream, blas::UpperLower uplo, \ 1652f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1653f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, const DeviceMemory<std::complex<float>> &a, \ 1654f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, DeviceMemory<std::complex<float>> *x, int incx) \ 1655f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur override; \ 1656f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTbsv(Stream *stream, blas::UpperLower uplo, \ 1657f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1658f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, const DeviceMemory<std::complex<double>> &a, \ 1659f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, DeviceMemory<std::complex<double>> *x, int incx) \ 1660f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur override; \ 1661f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTpmv(Stream *stream, blas::UpperLower uplo, \ 1662f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1663f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &ap, DeviceMemory<float> *x, \ 1664f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx) override; \ 1665f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTpmv(Stream *stream, blas::UpperLower uplo, \ 1666f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1667f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &ap, DeviceMemory<double> *x, \ 1668f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx) override; \ 1669f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTpmv(Stream *stream, blas::UpperLower uplo, \ 1670f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1671f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &ap, \ 1672f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *x, int incx) override; \ 1673f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTpmv(Stream *stream, blas::UpperLower uplo, \ 1674f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1675f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &ap, \ 1676f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *x, int incx) override; \ 1677f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTpsv(Stream *stream, blas::UpperLower uplo, \ 1678f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1679f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &ap, DeviceMemory<float> *x, \ 1680f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx) override; \ 1681f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTpsv(Stream *stream, blas::UpperLower uplo, \ 1682f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1683f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &ap, DeviceMemory<double> *x, \ 1684f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int incx) override; \ 1685f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTpsv(Stream *stream, blas::UpperLower uplo, \ 1686f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1687f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &ap, \ 1688f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *x, int incx) override; \ 1689f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTpsv(Stream *stream, blas::UpperLower uplo, \ 1690f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1691f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &ap, \ 1692f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *x, int incx) override; \ 1693f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTrmv(Stream *stream, blas::UpperLower uplo, \ 1694f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1695f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &a, int lda, \ 1696f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *x, int incx) override; \ 1697f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTrmv(Stream *stream, blas::UpperLower uplo, \ 1698f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1699f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &a, int lda, \ 1700f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *x, int incx) override; \ 1701f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTrmv(Stream *stream, blas::UpperLower uplo, \ 1702f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1703f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, \ 1704f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *x, int incx) override; \ 1705f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTrmv(Stream *stream, blas::UpperLower uplo, \ 1706f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1707f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, \ 1708f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *x, int incx) override; \ 1709f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTrsv(Stream *stream, blas::UpperLower uplo, \ 1710f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1711f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &a, int lda, \ 1712f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *x, int incx) override; \ 1713f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTrsv(Stream *stream, blas::UpperLower uplo, \ 1714f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1715f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &a, int lda, \ 1716f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *x, int incx) override; \ 1717f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTrsv(Stream *stream, blas::UpperLower uplo, \ 1718f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1719f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, \ 1720f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *x, int incx) override; \ 1721f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTrsv(Stream *stream, blas::UpperLower uplo, \ 1722f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, blas::Diagonal diag, uint64 n, \ 1723f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, \ 1724f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *x, int incx) override; \ 1725f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGemm(Stream *stream, blas::Transpose transa, \ 1726f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose transb, uint64 m, uint64 n, uint64 k, \ 1727523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower float alpha, const DeviceMemory<Eigen::half> &a, int lda, \ 1728523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower const DeviceMemory<Eigen::half> &b, int ldb, float beta, \ 1729523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower DeviceMemory<Eigen::half> *c, int ldc) override; \ 1730523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower bool DoBlasGemm(Stream *stream, blas::Transpose transa, \ 1731523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower blas::Transpose transb, uint64 m, uint64 n, uint64 k, \ 1732f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float alpha, const DeviceMemory<float> &a, int lda, \ 1733f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &b, int ldb, float beta, \ 1734f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *c, int ldc) override; \ 1735f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGemm(Stream *stream, blas::Transpose transa, \ 1736f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose transb, uint64 m, uint64 n, uint64 k, \ 1737f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double alpha, const DeviceMemory<double> &a, int lda, \ 1738f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &b, int ldb, double beta, \ 1739f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *c, int ldc) override; \ 1740f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGemm(Stream *stream, blas::Transpose transa, \ 1741f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose transb, uint64 m, uint64 n, uint64 k, \ 1742f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, \ 1743f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, \ 1744f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &b, int ldb, \ 1745f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, \ 1746f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *c, int ldc) override; \ 1747f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGemm(Stream *stream, blas::Transpose transa, \ 1748f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose transb, uint64 m, uint64 n, uint64 k, \ 1749f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, \ 1750f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, \ 1751f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &b, int ldb, \ 1752f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, \ 1753f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *c, int ldc) override; \ 175401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar bool GetBlasGemmAlgorithms(std::vector<blas::AlgorithmType> *out_algorithms) \ 175501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar override; \ 175601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar bool DoBlasGemmWithAlgorithm( \ 175701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar Stream *stream, blas::Transpose transa, blas::Transpose transb, \ 1758a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower uint64 m, uint64 n, uint64 k, int alpha, const DeviceMemory<int8> &a, \ 1759a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower int lda, const DeviceMemory<int8> &b, int ldb, int beta, \ 1760a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower DeviceMemory<int> *c, int ldc, blas::ComputationType computation_type, \ 1761a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower blas::AlgorithmType algorithm, \ 1762a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower blas::ProfileResult *output_profile_result) override; \ 1763a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower bool DoBlasGemmWithAlgorithm( \ 1764a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower Stream *stream, blas::Transpose transa, blas::Transpose transb, \ 176501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar uint64 m, uint64 n, uint64 k, const Eigen::half &alpha, \ 176601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar const DeviceMemory<Eigen::half> &a, int lda, \ 176701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar const DeviceMemory<Eigen::half> &b, int ldb, const Eigen::half &beta, \ 176801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar DeviceMemory<Eigen::half> *c, int ldc, \ 176901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar blas::ComputationType computation_type, blas::AlgorithmType algorithm, \ 177001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar blas::ProfileResult *output_profile_result) override; \ 177101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar bool DoBlasGemmWithAlgorithm( \ 177201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar Stream *stream, blas::Transpose transa, blas::Transpose transb, \ 177301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar uint64 m, uint64 n, uint64 k, float alpha, const DeviceMemory<float> &a, \ 177401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar int lda, const DeviceMemory<float> &b, int ldb, float beta, \ 177501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar DeviceMemory<float> *c, int ldc, blas::ComputationType computation_type, \ 177601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar blas::AlgorithmType algorithm, \ 177701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar blas::ProfileResult *output_profile_result) override; \ 177801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar bool DoBlasGemmWithAlgorithm( \ 177901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar Stream *stream, blas::Transpose transa, blas::Transpose transb, \ 178001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar uint64 m, uint64 n, uint64 k, double alpha, \ 178101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar const DeviceMemory<double> &a, int lda, const DeviceMemory<double> &b, \ 178201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar int ldb, double beta, DeviceMemory<double> *c, int ldc, \ 178301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar blas::ComputationType computation_type, blas::AlgorithmType algorithm, \ 178401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar blas::ProfileResult *output_profile_result) override; \ 178501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar bool DoBlasGemmWithAlgorithm( \ 178601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar Stream *stream, blas::Transpose transa, blas::Transpose transb, \ 178701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar uint64 m, uint64 n, uint64 k, std::complex<float> alpha, \ 178801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar const DeviceMemory<std::complex<float>> &a, int lda, \ 178901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar const DeviceMemory<std::complex<float>> &b, int ldb, \ 179001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar std::complex<float> beta, DeviceMemory<std::complex<float>> *c, int ldc, \ 179101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar blas::ComputationType computation_type, blas::AlgorithmType algorithm, \ 179201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar blas::ProfileResult *output_profile_result) override; \ 179301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar bool DoBlasGemmWithAlgorithm( \ 179401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar Stream *stream, blas::Transpose transa, blas::Transpose transb, \ 179501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar uint64 m, uint64 n, uint64 k, std::complex<double> alpha, \ 179601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar const DeviceMemory<std::complex<double>> &a, int lda, \ 179701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar const DeviceMemory<std::complex<double>> &b, int ldb, \ 179801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar std::complex<double> beta, DeviceMemory<std::complex<double>> *c, \ 179901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar int ldc, blas::ComputationType computation_type, \ 180001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar blas::AlgorithmType algorithm, \ 180101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar blas::ProfileResult *output_profile_result) override; \ 1802f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGemmBatched( \ 1803f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur Stream *stream, blas::Transpose transa, blas::Transpose transb, \ 1804f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 m, uint64 n, uint64 k, float alpha, \ 1805f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<float> *> &a, int lda, \ 1806f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<float> *> &b, int ldb, float beta, \ 1807f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<float> *> &c, int ldc, \ 180805ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower int batch_count, ScratchAllocator *scratch_allocator) override; \ 1809f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGemmBatched( \ 1810f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur Stream *stream, blas::Transpose transa, blas::Transpose transb, \ 1811f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 m, uint64 n, uint64 k, double alpha, \ 1812f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<double> *> &a, int lda, \ 1813f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<double> *> &b, int ldb, double beta, \ 1814f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<double> *> &c, int ldc, \ 181505ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower int batch_count, ScratchAllocator *scratch_allocator) override; \ 1816f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGemmBatched( \ 1817f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur Stream *stream, blas::Transpose transa, blas::Transpose transb, \ 1818f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 m, uint64 n, uint64 k, std::complex<float> alpha, \ 1819f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<std::complex<float>> *> &a, int lda, \ 1820f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<std::complex<float>> *> &b, int ldb, \ 1821f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, \ 1822f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<std::complex<float>> *> &c, int ldc, \ 182305ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower int batch_count, ScratchAllocator *scratch_allocator) override; \ 1824f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasGemmBatched( \ 1825f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur Stream *stream, blas::Transpose transa, blas::Transpose transb, \ 1826f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 m, uint64 n, uint64 k, std::complex<double> alpha, \ 1827f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<std::complex<double>> *> &a, \ 1828f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, \ 1829f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<std::complex<double>> *> &b, \ 1830f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int ldb, std::complex<double> beta, \ 1831f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const port::ArraySlice<DeviceMemory<std::complex<double>> *> &c, \ 183205ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower int ldc, int batch_count, ScratchAllocator *scratch_allocator) override; \ 1833f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHemm(Stream *stream, blas::Side side, blas::UpperLower uplo, \ 1834f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 m, uint64 n, std::complex<float> alpha, \ 1835f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, \ 1836f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &b, int ldb, \ 1837f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, \ 1838f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *c, int ldc) override; \ 1839f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHemm(Stream *stream, blas::Side side, blas::UpperLower uplo, \ 1840f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 m, uint64 n, std::complex<double> alpha, \ 1841f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, \ 1842f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &b, int ldb, \ 1843f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, \ 1844f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *c, int ldc) override; \ 1845f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHerk(Stream *stream, blas::UpperLower uplo, \ 1846f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, float alpha, \ 1847f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, \ 1848f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur float beta, DeviceMemory<std::complex<float>> *c, int ldc) \ 1849f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur override; \ 1850f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHerk(Stream *stream, blas::UpperLower uplo, \ 1851f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, double alpha, \ 1852f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, \ 1853f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur double beta, DeviceMemory<std::complex<double>> *c, int ldc) \ 1854f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur override; \ 1855f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHer2k( \ 1856f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur Stream *stream, blas::UpperLower uplo, blas::Transpose trans, uint64 n, \ 1857f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, std::complex<float> alpha, \ 1858f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, \ 1859f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &b, int ldb, float beta, \ 1860f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *c, int ldc) override; \ 1861f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasHer2k( \ 1862f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur Stream *stream, blas::UpperLower uplo, blas::Transpose trans, uint64 n, \ 1863f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 k, std::complex<double> alpha, \ 1864f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, \ 1865f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &b, int ldb, double beta, \ 1866f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *c, int ldc) override; \ 1867f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSymm(Stream *stream, blas::Side side, blas::UpperLower uplo, \ 1868f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 m, uint64 n, float alpha, \ 1869f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &a, int lda, \ 1870f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &b, int ldb, float beta, \ 1871f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *c, int ldc) override; \ 1872f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSymm(Stream *stream, blas::Side side, blas::UpperLower uplo, \ 1873f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 m, uint64 n, double alpha, \ 1874f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &a, int lda, \ 1875f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &b, int ldb, double beta, \ 1876f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *c, int ldc) override; \ 1877f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSymm(Stream *stream, blas::Side side, blas::UpperLower uplo, \ 1878f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 m, uint64 n, std::complex<float> alpha, \ 1879f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, \ 1880f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &b, int ldb, \ 1881f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, \ 1882f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *c, int ldc) override; \ 1883f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSymm(Stream *stream, blas::Side side, blas::UpperLower uplo, \ 1884f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 m, uint64 n, std::complex<double> alpha, \ 1885f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, \ 1886f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &b, int ldb, \ 1887f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, \ 1888f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *c, int ldc) override; \ 1889f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSyrk(Stream *stream, blas::UpperLower uplo, \ 1890f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, float alpha, \ 1891f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &a, int lda, float beta, \ 1892f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *c, int ldc) override; \ 1893f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSyrk(Stream *stream, blas::UpperLower uplo, \ 1894f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, double alpha, \ 1895f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &a, int lda, double beta, \ 1896f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *c, int ldc) override; \ 1897f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSyrk(Stream *stream, blas::UpperLower uplo, \ 1898f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, \ 1899f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, \ 1900f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, \ 1901f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, \ 1902f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *c, int ldc) override; \ 1903f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSyrk(Stream *stream, blas::UpperLower uplo, \ 1904f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, \ 1905f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, \ 1906f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, \ 1907f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, \ 1908f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *c, int ldc) override; \ 1909f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSyr2k(Stream *stream, blas::UpperLower uplo, \ 1910f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, float alpha, \ 1911f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &a, int lda, \ 1912f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<float> &b, int ldb, float beta, \ 1913f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<float> *c, int ldc) override; \ 1914f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSyr2k(Stream *stream, blas::UpperLower uplo, \ 1915f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, double alpha, \ 1916f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &a, int lda, \ 1917f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<double> &b, int ldb, double beta, \ 1918f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<double> *c, int ldc) override; \ 1919f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSyr2k(Stream *stream, blas::UpperLower uplo, \ 1920f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, \ 1921f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> alpha, \ 1922f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, \ 1923f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &b, int ldb, \ 1924f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<float> beta, \ 1925f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *c, int ldc) override; \ 1926f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasSyr2k(Stream *stream, blas::UpperLower uplo, \ 1927f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose trans, uint64 n, uint64 k, \ 1928f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> alpha, \ 1929f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, \ 1930f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &b, int ldb, \ 1931f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur std::complex<double> beta, \ 1932f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *c, int ldc) override; \ 1933f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTrmm(Stream *stream, blas::Side side, blas::UpperLower uplo, \ 1934f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose transa, blas::Diagonal diag, uint64 m, \ 1935f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, float alpha, const DeviceMemory<float> &a, \ 1936f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, DeviceMemory<float> *b, int ldb) override; \ 1937f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTrmm(Stream *stream, blas::Side side, blas::UpperLower uplo, \ 1938f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose transa, blas::Diagonal diag, uint64 m, \ 1939f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, double alpha, const DeviceMemory<double> &a, \ 1940f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, DeviceMemory<double> *b, int ldb) override; \ 1941f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTrmm(Stream *stream, blas::Side side, blas::UpperLower uplo, \ 1942f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose transa, blas::Diagonal diag, uint64 m, \ 1943f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, std::complex<float> alpha, \ 1944f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, \ 1945f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *b, int ldb) override; \ 1946f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTrmm(Stream *stream, blas::Side side, blas::UpperLower uplo, \ 1947f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose transa, blas::Diagonal diag, uint64 m, \ 1948f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, std::complex<double> alpha, \ 1949f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, \ 1950f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *b, int ldb) override; \ 1951f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTrsm(Stream *stream, blas::Side side, blas::UpperLower uplo, \ 1952f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose transa, blas::Diagonal diag, uint64 m, \ 1953f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, float alpha, const DeviceMemory<float> &a, \ 1954f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, DeviceMemory<float> *b, int ldb) override; \ 1955f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTrsm(Stream *stream, blas::Side side, blas::UpperLower uplo, \ 1956f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose transa, blas::Diagonal diag, uint64 m, \ 1957f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, double alpha, const DeviceMemory<double> &a, \ 1958f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur int lda, DeviceMemory<double> *b, int ldb) override; \ 1959f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTrsm(Stream *stream, blas::Side side, blas::UpperLower uplo, \ 1960f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose transa, blas::Diagonal diag, uint64 m, \ 1961f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, std::complex<float> alpha, \ 1962f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<float>> &a, int lda, \ 1963f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<float>> *b, int ldb) override; \ 1964f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur bool DoBlasTrsm(Stream *stream, blas::Side side, blas::UpperLower uplo, \ 1965f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur blas::Transpose transa, blas::Diagonal diag, uint64 m, \ 1966f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 n, std::complex<double> alpha, \ 1967f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur const DeviceMemory<std::complex<double>> &a, int lda, \ 1968f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur DeviceMemory<std::complex<double>> *b, int ldb) override; 1969f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 1970f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur} // namespace blas 1971f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur} // namespace gputools 1972f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur} // namespace perftools 1973f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 1974f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#endif // TENSORFLOW_STREAM_EXECUTOR_BLAS_H_ 1975